From cccee15a9195db78f7d3955971a3914624250863 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Fri, 15 Sep 2023 02:59:56 -0400 Subject: [PATCH] Add hasQueryHs (#6702) --- Code/GraphMol/AddHs.cpp | 71 +++++++++++++++++-- Code/GraphMol/MolOps.h | 12 ++++ Code/GraphMol/Wrap/MolOps.cpp | 21 ++++++ Code/GraphMol/Wrap/rough_test.py | 12 +++- Code/GraphMol/molopstest.cpp | 33 ++++++++- Code/JavaWrappers/MolOps.i | 2 + .../src-test/org/RDKit/MolQueryTests.java | 13 ++++ 7 files changed, 155 insertions(+), 9 deletions(-) diff --git a/Code/GraphMol/AddHs.cpp b/Code/GraphMol/AddHs.cpp index d92716134..b1151b5f5 100644 --- a/Code/GraphMol/AddHs.cpp +++ b/Code/GraphMol/AddHs.cpp @@ -1063,7 +1063,13 @@ ROMol *removeAllHs(const ROMol &mol, bool sanitize) { } namespace { -bool isQueryH(const Atom *atom) { +enum class HydrogenType { + NotAHydrogen, + UnMergableQueryHydrogen, + QueryHydrogen +}; + +HydrogenType isQueryH(const Atom *atom) { PRECONDITION(atom, "bogus atom"); if (atom->getAtomicNum() == 1) { // the simple case: the atom is flagged as being an H and @@ -1071,18 +1077,18 @@ bool isQueryH(const Atom *atom) { if (!atom->hasQuery() || (!atom->getQuery()->getNegation() && atom->getQuery()->getDescription() == "AtomAtomicNum")) { - return true; + return HydrogenType::QueryHydrogen; } } if (!(atom->getDegree() <= 1)) { // bonded and unbonded H atoms will continue rest will be returned - return false; + return HydrogenType::NotAHydrogen; } if (atom->hasQuery() && atom->getQuery()->getNegation()) { // we will not merge negated queries - return false; + return HydrogenType::NotAHydrogen; } bool hasHQuery = false, hasOr = false; @@ -1119,10 +1125,10 @@ bool isQueryH(const Atom *atom) { "in ORs is not supported. This query will not " "be merged" << std::endl; - return false; + return HydrogenType::UnMergableQueryHydrogen; } } - return hasHQuery; + return hasHQuery ? HydrogenType::QueryHydrogen : HydrogenType::NotAHydrogen; } } // namespace @@ -1145,7 +1151,7 @@ void mergeQueryHs(RWMol &mol, bool mergeUnmappedOnly, bool mergeIsotopes) { boost::dynamic_bitset<> hatoms(mol.getNumAtoms()); for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) { - hatoms[i] = isQueryH(mol.getAtomWithIdx(i)); + hatoms[i] = isQueryH(mol.getAtomWithIdx(i)) == HydrogenType::QueryHydrogen; } unsigned int currIdx = 0, stopIdx = mol.getNumAtoms(); while (currIdx < stopIdx) { @@ -1263,5 +1269,56 @@ bool needsHs(const ROMol &mol) { return false; } +std::pair hasQueryHs(const ROMol &mol) { + bool queryHs = false; + // We don't care about announcing ORs or other items during isQueryH + RDLog::LogStateSetter blocker; + + for (const auto atom : mol.atoms()) { + switch (isQueryH(atom)) { + case HydrogenType::UnMergableQueryHydrogen: + return std::make_pair(true, true); + case HydrogenType::QueryHydrogen: + queryHs = true; + break; + default: // HydrogenType::NotAHydrogen: + break; + } + if (atom->hasQuery()) { + if (atom->getQuery()->getDescription() == "RecursiveStructure") { + auto *rsq = dynamic_cast(atom->getQuery()); + CHECK_INVARIANT(rsq, "could not convert recursive structure query"); + auto res = hasQueryHs(*rsq->getQueryMol()); + if(res.second) { // unmergableH implies queryH + return res; + } + queryHs |= res.first; + } + + // FIX: shouldn't be repeating this code here -- yet again! + std::list childStack( + atom->getQuery()->beginChildren(), atom->getQuery()->endChildren()); + while (!childStack.empty()) { + QueryAtom::QUERYATOM_QUERY::CHILD_TYPE qry = childStack.front(); + childStack.pop_front(); + if (qry->getDescription() == "RecursiveStructure") { + auto *rsq = dynamic_cast(qry.get()); + CHECK_INVARIANT(rsq, "could not convert recursive structure query"); + auto res = hasQueryHs(*rsq->getQueryMol()); + if(res.second) { + return res; + } + queryHs |= res.first; + } else { + childStack.insert(childStack.end(), qry->beginChildren(), + qry->endChildren()); + } + } + } + } // end of recursion loop + + return std::make_pair(queryHs, false); +} + } // namespace MolOps } // namespace RDKit diff --git a/Code/GraphMol/MolOps.h b/Code/GraphMol/MolOps.h index 53bef0ba9..4f71cb134 100644 --- a/Code/GraphMol/MolOps.h +++ b/Code/GraphMol/MolOps.h @@ -309,6 +309,18 @@ RDKIT_GRAPHMOL_EXPORT void mergeQueryHs(RWMol &mol, bool mergeUnmappedOnly = false, bool mergeIsotopes = false); +//! returns a pair of booleans (hasQueryHs, hasUnmergaebleQueryHs) +/*! + This is really intended to be used with molecules that contain QueryAtoms + such as when checking smarts patterns for explicit hydrogens + + + \param mol the molecule to check for query Hs from + \return std::pair if pair.first is true if the molecule has query hydrogens, if pair.second + is true, the queryHs cannot be removed my mergeQueryHs +*/ +RDKIT_GRAPHMOL_EXPORT std::pair hasQueryHs(const ROMol &mol); + typedef enum { ADJUST_IGNORENONE = 0x0, ADJUST_IGNORECHAINS = 0x1, diff --git a/Code/GraphMol/Wrap/MolOps.cpp b/Code/GraphMol/Wrap/MolOps.cpp index 7fc1e54a6..c9fbd1f36 100644 --- a/Code/GraphMol/Wrap/MolOps.cpp +++ b/Code/GraphMol/Wrap/MolOps.cpp @@ -964,6 +964,14 @@ ROMol *molzipHelper(python::object &pmols, const MolzipParams &p) { return molzip(*mols, p).release(); } +python::tuple hasQueryHsHelper(const ROMol &m) { + python::list res; + auto hashs = MolOps::hasQueryHs(m); + res.append(hashs.first); + res.append(hashs.second); + return python::tuple(res); +} + // we can really only set some of these types from C++ which means // we need a helper function for testing that we can read them // correctly. @@ -1333,6 +1341,19 @@ struct molops_wrapper { "merges hydrogens into their neighboring atoms as queries", python::return_value_policy()); + docString = + "Check to see if the molecule has query Hs, this is normally used on query molecules\n\ +such as thos returned from MolFromSmarts\n\ +Example: \n\ + (hasQueryHs, hasUnmergableQueryHs) = HasQueryHs(mol)\n\ +\n\ +if hasUnmergableQueryHs, these query hs cannot be removed by calling\n\ +MergeQueryHs"; + python::def("HasQueryHs", hasQueryHsHelper, + python::arg("mol"), + docString.c_str()); + + // ------------------------------------------------------------------------ docString = "Removes atoms matching a substructure query from a molecule\n\ diff --git a/Code/GraphMol/Wrap/rough_test.py b/Code/GraphMol/Wrap/rough_test.py index 793088049..75fe350d9 100644 --- a/Code/GraphMol/Wrap/rough_test.py +++ b/Code/GraphMol/Wrap/rough_test.py @@ -7347,6 +7347,17 @@ CAS<~> self.assertEqual(sgs[1].GetGroupType(), Chem.StereoGroupType.STEREO_OR) self.assertEqual(len(sgs[1].GetAtoms()), 1) + def testHasQueryHs(self): + for sma, hasQHs in [ + ("[#1]", (True, False)), + ("[#1,N]", (True, True)), + ("[$(C-[H])]", (True, False)), + ("[$([C,#1])]", (True, True)), + ("[$(c([C;!R;!$(C-[N,O,S]);!$(C-[H])](=O))1naaaa1),$(c([C;!R;!$(C-[N,O,S]);!$(C-[H])](=O))1naa[n,s,o]1)]", + (True, False))]: + pat = Chem.MolFromSmarts(sma) + self.assertEqual(Chem.HasQueryHs(pat), hasQHs) + def testMrvHandling(self): fn1 = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol','MarvinParse','test_data','aspirin.mrv') mol = Chem.MolFromMrvFile(fn1) @@ -7369,7 +7380,6 @@ CAS<~> self.assertEqual(mol.GetNumAtoms(),13) - if __name__ == '__main__': if "RDTESTCASE" in os.environ: suite = unittest.TestSuite() diff --git a/Code/GraphMol/molopstest.cpp b/Code/GraphMol/molopstest.cpp index cd268cc58..bcb768e4c 100644 --- a/Code/GraphMol/molopstest.cpp +++ b/Code/GraphMol/molopstest.cpp @@ -8437,6 +8437,37 @@ void testGithub5099() { TEST_ASSERT(m->getNumAtoms() == 5); } +void testHasQueryHs() { + BOOST_LOG(rdInfoLog) + << "-----------------------\n Testing hasQueryHs " + << std::endl; + const auto has_no_query_hs = std::make_pair(false, false); + const auto has_only_query_hs = std::make_pair(true, false); + const auto has_unmergeable_hs = std::make_pair(true, true); + + auto m0 = "CCCC"_smarts; + TEST_ASSERT(RDKit::MolOps::hasQueryHs(*m0) == has_no_query_hs); + + auto m = "[#1]"_smarts; + TEST_ASSERT(RDKit::MolOps::hasQueryHs(*m) == has_only_query_hs); + + auto m2 = "[#1,N]"_smarts; + TEST_ASSERT(RDKit::MolOps::hasQueryHs(*m2) == has_unmergeable_hs); + + //remove the negation + auto recursive = "[$(C-[H])]"_smarts; + TEST_ASSERT(RDKit::MolOps::hasQueryHs(*recursive) == has_only_query_hs); + + auto recursive_or = "[$([C,#1])]"_smarts; + TEST_ASSERT(RDKit::MolOps::hasQueryHs(*recursive_or) == has_unmergeable_hs); + + // from rd_filters for something bigger + auto keto_def_heterocycle = "[$(c([C;!R;!$(C-[N,O,S]);!$(C-[H])](=O))1naaaa1),$(c([C;!R;!$(C-[N,O,S]);!$(C-[H])](=O))1naa[n,s,o]1)]"_smarts; + TEST_ASSERT(RDKit::MolOps::hasQueryHs(*keto_def_heterocycle) == has_only_query_hs); + + BOOST_LOG(rdInfoLog) << "\tdone" << std::endl; + +} int main() { RDLog::InitLogs(); // boost::logging::enable_logs("rdApp.debug"); @@ -8553,6 +8584,6 @@ int main() { testSetTerminalAtomCoords(); testGet3DDistanceMatrix(); testGithub5099(); - + testHasQueryHs(); return 0; } diff --git a/Code/JavaWrappers/MolOps.i b/Code/JavaWrappers/MolOps.i index 843c40c1f..1edb13365 100644 --- a/Code/JavaWrappers/MolOps.i +++ b/Code/JavaWrappers/MolOps.i @@ -29,6 +29,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +%include "std_pair.i" %{ #include @@ -46,6 +47,7 @@ %ignore RDKit::MolOps::detectChemistryProblems; %include %ignore RDKit::MolOps::sanitizeMol(RWMol &,unsigned int &,unsigned int &); +%template(BoolPair) std::pair; %inline %{ int sanitizeMol(RDKit::RWMol &mol,int sanitizeOps){ diff --git a/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/MolQueryTests.java b/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/MolQueryTests.java index 1180f14e7..d6a1d940a 100644 --- a/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/MolQueryTests.java +++ b/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/MolQueryTests.java @@ -63,6 +63,19 @@ public class MolQueryTests extends GraphMolTest { assertTrue(m1.hasSubstructMatch(aqmol)); assertFalse(m2.hasSubstructMatch(aqmol)); } + @Test public void testHasQueryHs() { + ROMol m2; + m2 = RWMol.MolFromSmarts("[#1]",0,false); + BoolPair res = RDKFuncs.hasQueryHs(m2); + assertTrue(res.getFirst()); + assertFalse(res.getSecond()); + + m2 = RWMol.MolFromSmarts("[#1,C]",0,false); + res = RDKFuncs.hasQueryHs(m2); + assertTrue(res.getFirst()); + assertTrue(res.getSecond()); + } + public static void main(String args[]) { org.junit.runner.JUnitCore.main("org.RDKit.BasicMoleculeTests"); }