diff --git a/Code/GraphMol/ChemReactions/Reaction.cpp b/Code/GraphMol/ChemReactions/Reaction.cpp index c7f22d724..d9d275c94 100644 --- a/Code/GraphMol/ChemReactions/Reaction.cpp +++ b/Code/GraphMol/ChemReactions/Reaction.cpp @@ -317,7 +317,7 @@ bool ChemicalReaction::validate(unsigned int &numWarnings, bool isMoleculeReactantOfReaction(const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which) { if (!rxn.isInitialized()) { - throw ChemicalReactionException("initMatchers() must be called first"); + throw ChemicalReactionException("initReactantMatchers() must be called first"); } which = 0; for (MOL_SPTR_VECT::const_iterator iter = rxn.beginReactantTemplates(); @@ -338,7 +338,7 @@ bool isMoleculeReactantOfReaction(const ChemicalReaction &rxn, bool isMoleculeProductOfReaction(const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which) { if (!rxn.isInitialized()) { - throw ChemicalReactionException("initMatchers() must be called first"); + throw ChemicalReactionException("initReactantMatchers() must be called first"); } which = 0; for (MOL_SPTR_VECT::const_iterator iter = rxn.beginProductTemplates(); @@ -359,7 +359,7 @@ bool isMoleculeProductOfReaction(const ChemicalReaction &rxn, bool isMoleculeAgentOfReaction(const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which) { if (!rxn.isInitialized()) { - throw ChemicalReactionException("initMatchers() must be called first"); + throw ChemicalReactionException("initReactantMatchers() must be called first"); } which = 0; for (MOL_SPTR_VECT::const_iterator iter = rxn.beginAgentTemplates(); @@ -398,7 +398,7 @@ void addRecursiveQueriesToReaction( std::vector > > *reactantLabels) { if (!rxn.isInitialized()) { - throw ChemicalReactionException("initMatchers() must be called first"); + throw ChemicalReactionException("initReactantMatchers() must be called first"); } if (reactantLabels != NULL) { @@ -576,7 +576,7 @@ bool getMappedAtoms(T &rIt, std::map &mappedAtoms) { VECT_INT_VECT getReactingAtoms(const ChemicalReaction &rxn, bool mappedAtomsOnly) { if (!rxn.isInitialized()) { - throw ChemicalReactionException("initMatchers() must be called first"); + throw ChemicalReactionException("initReactantMatchers() must be called first"); } VECT_INT_VECT res; res.resize(rxn.getNumReactantTemplates()); diff --git a/Code/GraphMol/ChemReactions/SanitizeRxn.cpp b/Code/GraphMol/ChemReactions/SanitizeRxn.cpp index 86c35176f..c5a82d2a7 100644 --- a/Code/GraphMol/ChemReactions/SanitizeRxn.cpp +++ b/Code/GraphMol/ChemReactions/SanitizeRxn.cpp @@ -255,7 +255,6 @@ void fixRGroups(ChemicalReaction &rxn) { // if we have query atoms without rlabels, make proper rlabels if possible // ensure that every rlabel in the reactant has one in the product - void fixAtomMaps(ChemicalReaction &rxn) { int max_atom_map = getMaxProp( rxn, @@ -324,15 +323,67 @@ void fixReactantTemplateAromaticity(ChemicalReaction &rxn) { } void fixHs(ChemicalReaction &rxn) { + { + // if mapped Hydrogens in reactants area mapped to heavy atoms + // keep mappings, in all other cases remove them. + // this allows us to merge query hydrogens atoms + + std::map mappedToNonHeavyProductAtom; + + for(MOL_SPTR_VECT::iterator it = rxn.beginProductTemplates(); + it != rxn.endProductTemplates(); + ++it) { + int atomMap = 0; + for (ROMol::AtomIterator atIt = (*it)->beginAtoms(); + atIt != (*it)->endAtoms(); + ++atIt) { + Atom *atom = (*atIt); + if (atom->getAtomicNum() != 1) { // hydrogen + if (atom->getPropIfPresent(common_properties::molAtomMapNumber, atomMap)) { + if(atomMap) { + mappedToNonHeavyProductAtom[atomMap] = true; + } + } + } + } + } + + for(MOL_SPTR_VECT::iterator it = rxn.beginReactantTemplates(); + it != rxn.endReactantTemplates(); + ++it) { + int atomMap = 0; + for (ROMol::AtomIterator atIt = (*it)->beginAtoms(); + atIt != (*it)->endAtoms(); + ++atIt) { + Atom *atom = (*atIt); + if (atom->getAtomicNum() == 1) { // hydrogen + if (atom->getPropIfPresent(common_properties::molAtomMapNumber, atomMap)) { + if(atomMap) { + if(mappedToNonHeavyProductAtom.find(atomMap) == + mappedToNonHeavyProductAtom.end()) { + atom->clearProp(common_properties::molAtomMapNumber); + } else { + BOOST_LOG(rdWarningLog) << + "Reaction has explicit hydrogens, reactants will need explicit hydrogens (addHs)" + << std::endl; + } + } + } + } + } + } + } + const bool mergeUnmappedOnly = true; for(MOL_SPTR_VECT::iterator it = rxn.beginReactantTemplates(); it != rxn.endReactantTemplates(); ++it) { RWMol * rw = dynamic_cast(it->get()); - if (rw) + if (rw) { MolOps::mergeQueryHs(*rw, mergeUnmappedOnly); + } else - PRECONDITION(rw, "Oops, not really a RWMol?"); + PRECONDITION(rw, "Oops, not really an RWMol?"); } } diff --git a/Code/GraphMol/ChemReactions/SanitizeRxn.h b/Code/GraphMol/ChemReactions/SanitizeRxn.h index 9893e4505..a79259cd7 100644 --- a/Code/GraphMol/ChemReactions/SanitizeRxn.h +++ b/Code/GraphMol/ChemReactions/SanitizeRxn.h @@ -82,7 +82,22 @@ inline const MolOps::AdjustQueryParameters DefaultRxnAdjustParams() { } // Default adjustment parameters for ChemDraw style matching of reagents +// -- deprecated - renamed MatchOnlyAtRgroupsAdjustParams +// -- this doesn't match sciquest style searching inline const MolOps::AdjustQueryParameters ChemDrawRxnAdjustParams() { + BOOST_LOG(rdWarningLog) << + " deprecated -- please use MatchOnlyAtRgroupsAdjustParams instead" << std::endl; + MolOps::AdjustQueryParameters params; + params.adjustDegree = true; + params.adjustDegreeFlags = MolOps::ADJUST_IGNOREDUMMIES; + params.adjustRingCount = false; + params.adjustRingCountFlags = MolOps::ADJUST_IGNORENONE; + params.makeDummiesQueries = false; + params.aromatizeIfPossible = true; + return params; +} + +inline const MolOps::AdjustQueryParameters MatchOnlyAtRgroupsAdjustParams() { MolOps::AdjustQueryParameters params; params.adjustDegree = true; params.adjustDegreeFlags = MolOps::ADJUST_IGNOREDUMMIES; diff --git a/Code/GraphMol/ChemReactions/Wrap/rdChemReactions.cpp b/Code/GraphMol/ChemReactions/Wrap/rdChemReactions.cpp index 9d9f08f54..32fd99d2a 100644 --- a/Code/GraphMol/ChemReactions/Wrap/rdChemReactions.cpp +++ b/Code/GraphMol/ChemReactions/Wrap/rdChemReactions.cpp @@ -816,7 +816,10 @@ Sample Usage:\n\ "Returns the default adjustment parameters for reactant templates"); python::def("GetChemDrawRxnAdjustParams", RDKit::RxnOps::ChemDrawRxnAdjustParams, - "Returns the chemdraw style adjustment parameters for reactant templates"); + "(deprecated, see MatchOnlyAtRgroupsAdjustParams)\n\tReturns the chemdraw style adjustment parameters for reactant templates"); + + python::def("MatchOnlyAtRgroupsAdjustParams", RDKit::RxnOps::MatchOnlyAtRgroupsAdjustParams, + "Only match at the specified rgroup locations in the reactant templates"); std::string docstring = "feed me"; python::def( diff --git a/Code/GraphMol/ChemReactions/testReaction.cpp b/Code/GraphMol/ChemReactions/testReaction.cpp index a616ea162..1956a9646 100644 --- a/Code/GraphMol/ChemReactions/testReaction.cpp +++ b/Code/GraphMol/ChemReactions/testReaction.cpp @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -5985,6 +5986,410 @@ void test64Github1266() { BOOST_LOG(rdInfoLog) << "Done" << std::endl; } +void test65SanitizeUnmappedHs() { + BOOST_LOG(rdInfoLog) << "-------------------------------------" << std::endl; + BOOST_LOG(rdInfoLog) << "Tests sanitize reaction (unmapped Hs) " + << std::endl; + + const std::string unmappedHs = \ + "$RXN\n" \ + "\n" \ + " Marvin 031701170941\n" \ + "\n" \ + " 1 1\n" \ + "$MOL\n" \ + "\n" \ + " Mrv1583 03171709412D \n" \ + "\n" \ + " 16 16 0 0 0 0 999 V2000\n" \ + " -2.5620 0.5265 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \ + " -2.4235 -0.2868 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \ + " -3.0587 -0.8133 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0\n" \ + " -3.8322 -0.5265 0.0000 C 0 0 0 0 0 0 0 0 0 4 0 0\n" \ + " -3.3355 0.8133 0.0000 C 0 0 0 0 0 0 0 0 0 6 0 0\n" \ + " -1.7581 0.7120 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \ + " -3.0942 1.6022 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \ + " -4.5124 0.9090 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \ + " -4.3663 -1.1553 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \ + " -3.3000 -1.6022 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \ + " -3.8242 1.4780 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \ + " -2.2307 1.2821 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \ + " -3.9706 0.2868 0.0000 C 0 0 0 0 0 0 0 0 0 5 0 0\n" \ + " -2.5700 -1.4780 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \ + " -1.6500 -0.5736 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \ + " -4.6278 -0.3082 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" \ + " 1 2 1 0 0 0 0\n" \ + " 5 1 1 0 0 0 0\n" \ + " 1 6 1 0 0 0 0\n" \ + " 1 12 1 0 0 0 0\n" \ + " 2 3 1 0 0 0 0\n" \ + " 2 15 1 0 0 0 0\n" \ + " 3 4 1 0 0 0 0\n" \ + " 3 10 1 0 0 0 0\n" \ + " 3 14 1 0 0 0 0\n" \ + " 4 9 1 0 0 0 0\n" \ + " 4 13 1 0 0 0 0\n" \ + " 4 16 1 0 0 0 0\n" \ + " 5 7 1 0 0 0 0\n" \ + " 5 11 1 0 0 0 0\n" \ + " 13 5 1 0 0 0 0\n" \ + " 13 8 1 0 0 0 0\n" \ + "M END\n" \ + "$MOL\n" \ + "\n" \ + " Mrv1583 03171709412D \n" \ + "\n" \ + " 6 6 0 0 0 0 999 V2000\n" \ + " 3.8966 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \ + " 4.6111 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \ + " 4.6111 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0\n" \ + " 3.8966 -0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 4 0 0\n" \ + " 3.1821 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 5 0 0\n" \ + " 3.1821 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 6 0 0\n" \ + " 1 2 1 0 0 0 0\n" \ + " 6 1 1 0 0 0 0\n" \ + " 2 3 1 0 0 0 0\n" \ + " 3 4 1 0 0 0 0\n" \ + " 4 5 1 0 0 0 0\n" \ + " 5 6 1 0 0 0 0\n" \ + "M END"; + + ChemicalReaction *rxn = RxnBlockToChemicalReaction(unmappedHs); + TEST_ASSERT(rxn); + rxn->initReactantMatchers(); + MOL_SPTR_VECT reacts1, hreacts1, reacts2, hreacts2; + std::vector prods; + + reacts1.push_back(ROMOL_SPTR(SmilesToMol("C1CCCCC1"))); + hreacts1.push_back(ROMOL_SPTR(MolOps::addHs(*reacts1[0].get()))); + + reacts2.push_back(ROMOL_SPTR(SmilesToMol("C1CCCCC1Cl"))); + hreacts2.push_back(ROMOL_SPTR(MolOps::addHs(*reacts2[0].get()))); + + // test with and without AddHs + prods = rxn->runReactants(reacts1); + TEST_ASSERT(prods.size() == 0); + prods = rxn->runReactants(hreacts1); + TEST_ASSERT(prods.size() == 768); + + prods = rxn->runReactants(reacts2); + TEST_ASSERT(prods.size() == 0); + + prods = rxn->runReactants(hreacts2); + TEST_ASSERT(prods.size() == 128); + + // Test after sanitization (way fewer matches than with AddHs..) + RxnOps::sanitizeRxn(*rxn); + prods = rxn->runReactants(reacts1); + TEST_ASSERT(prods.size() == 12); + + prods = rxn->runReactants(reacts2); + TEST_ASSERT(prods.size() == 4); + + + delete rxn; + + BOOST_LOG(rdInfoLog) << "Done" << std::endl; + +} + + + +void test66SanitizeMappedHs() { + BOOST_LOG(rdInfoLog) << "-------------------------------------" << std::endl; + BOOST_LOG(rdInfoLog) << "Tests sanitize reaction (mapped hs in react but not prod) " + << std::endl; + + // H's are mapped in reactant but do not exist in product, + // they can be merged + const std::string unmappedHs = \ + "$RXN\n" \ + "\n" \ + " Marvin 031701170941\n" \ + "\n" \ + " 1 1\n" \ + "$MOL\n" \ + "\n" \ + " Mrv1583 03171709412D \n" \ + "\n" \ + " 16 16 0 0 0 0 999 V2000\n" \ + " -2.5620 0.5265 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \ + " -2.4235 -0.2868 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \ + " -3.0587 -0.8133 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0\n" \ + " -3.8322 -0.5265 0.0000 C 0 0 0 0 0 0 0 0 0 4 0 0\n" \ + " -3.3355 0.8133 0.0000 C 0 0 0 0 0 0 0 0 0 6 0 0\n" \ + " -1.7581 0.7120 0.0000 H 0 0 0 0 0 0 0 0 0 7 0 0\n" \ + " -3.0942 1.6022 0.0000 H 0 0 0 0 0 0 0 0 0 8 0 0\n" \ + " -4.5124 0.9090 0.0000 H 0 0 0 0 0 0 0 0 0 9 0 0\n" \ + " -4.3663 -1.1553 0.0000 H 0 0 0 0 0 0 0 0 0 10 0 0\n" \ + " -3.3000 -1.6022 0.0000 H 0 0 0 0 0 0 0 0 0 11 0 0\n" \ + " -3.8242 1.4780 0.0000 H 0 0 0 0 0 0 0 0 0 12 0 0\n" \ + " -2.2307 1.2821 0.0000 H 0 0 0 0 0 0 0 0 0 13 0 0\n" \ + " -3.9706 0.2868 0.0000 C 0 0 0 0 0 0 0 0 0 5 0 0\n" \ + " -2.5700 -1.4780 0.0000 H 0 0 0 0 0 0 0 0 0 14 0 0\n" \ + " -1.6500 -0.5736 0.0000 H 0 0 0 0 0 0 0 0 0 15 0 0\n" \ + " -4.6278 -0.3082 0.0000 H 0 0 0 0 0 0 0 0 0 16 0 0\n" \ + " 1 2 1 0 0 0 0\n" \ + " 5 1 1 0 0 0 0\n" \ + " 1 6 1 0 0 0 0\n" \ + " 1 12 1 0 0 0 0\n" \ + " 2 3 1 0 0 0 0\n" \ + " 2 15 1 0 0 0 0\n" \ + " 3 4 1 0 0 0 0\n" \ + " 3 10 1 0 0 0 0\n" \ + " 3 14 1 0 0 0 0\n" \ + " 4 9 1 0 0 0 0\n" \ + " 4 13 1 0 0 0 0\n" \ + " 4 16 1 0 0 0 0\n" \ + " 5 7 1 0 0 0 0\n" \ + " 5 11 1 0 0 0 0\n" \ + " 13 5 1 0 0 0 0\n" \ + " 13 8 1 0 0 0 0\n" \ + "M END\n" \ + "$MOL\n" \ + "\n" \ + " Mrv1583 03171709412D \n" \ + "\n" \ + " 6 6 0 0 0 0 999 V2000\n" \ + " 3.8966 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \ + " 4.6111 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \ + " 4.6111 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0\n" \ + " 3.8966 -0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 4 0 0\n" \ + " 3.1821 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 5 0 0\n" \ + " 3.1821 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 6 0 0\n" \ + " 1 2 1 0 0 0 0\n" \ + " 6 1 1 0 0 0 0\n" \ + " 2 3 1 0 0 0 0\n" \ + " 3 4 1 0 0 0 0\n" \ + " 4 5 1 0 0 0 0\n" \ + " 5 6 1 0 0 0 0\n" \ + "M END"; + + ChemicalReaction *rxn = RxnBlockToChemicalReaction(unmappedHs); + TEST_ASSERT(rxn); + rxn->initReactantMatchers(); + MOL_SPTR_VECT reacts1, hreacts1, reacts2, hreacts2; + std::vector prods; + + reacts1.push_back(ROMOL_SPTR(SmilesToMol("C1CCCCC1"))); + hreacts1.push_back(ROMOL_SPTR(MolOps::addHs(*reacts1[0].get()))); + + reacts2.push_back(ROMOL_SPTR(SmilesToMol("C1CCCCC1Cl"))); + hreacts2.push_back(ROMOL_SPTR(MolOps::addHs(*reacts2[0].get()))); + + // test with and without AddHs + prods = rxn->runReactants(reacts1); + TEST_ASSERT(prods.size() == 0); + prods = rxn->runReactants(hreacts1); + TEST_ASSERT(prods.size() == 768); + + prods = rxn->runReactants(reacts2); + TEST_ASSERT(prods.size() == 0); + + prods = rxn->runReactants(hreacts2); + TEST_ASSERT(prods.size() == 128); + + // Test after sanitization (way fewer matches than with AddHs..) + RxnOps::sanitizeRxn(*rxn); + prods = rxn->runReactants(reacts1); + TEST_ASSERT(prods.size() == 12); + + prods = rxn->runReactants(reacts2); + TEST_ASSERT(prods.size() == 4); + + delete rxn; + + BOOST_LOG(rdInfoLog) << "Done" << std::endl; + +} + +void test67SanitizeMappedHsInReactantAndProd() { + BOOST_LOG(rdInfoLog) << "-------------------------------------" << std::endl; + BOOST_LOG(rdInfoLog) << "Tests sanitize reaction (mapped hs in react and prod) " + << std::endl; + + // H's are mapped in reactant and in prod + // they can be merged + const std::string unmappedHs = \ + "$RXN\n" \ + "\n" \ + " Marvin 031701171002\n" \ + "\n" \ + " 1 1\n" \ + "$MOL\n" \ + "\n" \ + " Mrv1583 03171710022D \n" \ + "\n" \ + " 16 16 0 0 0 0 999 V2000\n" \ + " -3.1881 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \ + " -2.4736 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \ + " -2.4736 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0\n" \ + " -3.1881 -0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 4 0 0\n" \ + " -3.9025 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 6 0 0\n" \ + " -2.8178 1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 7 0 0\n" \ + " -4.3559 1.1018 0.0000 H 0 0 0 0 0 0 0 0 0 8 0 0\n" \ + " -4.6170 -0.8250 0.0000 H 0 0 0 0 0 0 0 0 0 9 0 0\n" \ + " -3.5583 -1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 10 0 0\n" \ + " -2.0203 -1.1018 0.0000 H 0 0 0 0 0 0 0 0 0 11 0 0\n" \ + " -4.7262 0.4605 0.0000 H 0 0 0 0 0 0 0 0 0 12 0 0\n" \ + " -3.5583 1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 13 0 0\n" \ + " -3.9025 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 5 0 0\n" \ + " -1.6500 -0.4605 0.0000 H 0 0 0 0 0 0 0 0 0 14 0 0\n" \ + " -1.7591 0.8250 0.0000 H 0 0 0 0 0 0 0 0 0 15 0 0\n" \ + " -2.8178 -1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 16 0 0\n" \ + " 1 2 1 0 0 0 0\n" \ + " 5 1 1 0 0 0 0\n" \ + " 1 6 1 0 0 0 0\n" \ + " 1 12 1 0 0 0 0\n" \ + " 2 3 1 0 0 0 0\n" \ + " 2 15 1 0 0 0 0\n" \ + " 3 4 1 0 0 0 0\n" \ + " 3 10 1 0 0 0 0\n" \ + " 3 14 1 0 0 0 0\n" \ + " 4 9 1 0 0 0 0\n" \ + " 4 13 1 0 0 0 0\n" \ + " 4 16 1 0 0 0 0\n" \ + " 5 7 1 0 0 0 0\n" \ + " 5 11 1 0 0 0 0\n" \ + " 13 5 1 0 0 0 0\n" \ + " 13 8 1 0 0 0 0\n" \ + "M END\n" \ + "$MOL\n" \ + "\n" \ + " Mrv1583 03171710022D \n" \ + "\n" \ + " 17 17 0 0 0 0 999 V2000\n" \ + " 4.1309 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \ + " 4.8454 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \ + " 4.8454 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0\n" \ + " 4.1309 -0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 4 0 0\n" \ + " 3.4165 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 5 0 0\n" \ + " 3.4165 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 6 0 0\n" \ + " 4.5012 1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 13 0 0\n" \ + " 3.7607 1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 7 0 0\n" \ + " 5.6690 0.4605 0.0000 H 0 0 0 0 0 0 0 0 0 15 0 0\n" \ + " 5.2987 1.1018 0.0000 H 0 0 0 0 0 0 0 0 0 17 0 0\n" \ + " 5.2987 -1.1018 0.0000 H 0 0 0 0 0 0 0 0 0 14 0 0\n" \ + " 3.7607 -1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 16 0 0\n" \ + " 2.9631 -1.1018 0.0000 H 0 0 0 0 0 0 0 0 0 9 0 0\n" \ + " 2.5929 -0.4605 0.0000 H 0 0 0 0 0 0 0 0 0 18 0 0\n" \ + " 2.7020 0.8250 0.0000 H 0 0 0 0 0 0 0 0 0 8 0 0\n" \ + " 4.5012 -1.5622 0.0000 H 0 0 0 0 0 0 0 0 0 10 0 0\n" \ + " 5.6690 -0.4605 0.0000 H 0 0 0 0 0 0 0 0 0 11 0 0\n" \ + " 1 2 1 0 0 0 0\n" \ + " 6 1 1 0 0 0 0\n" \ + " 1 7 1 0 0 0 0\n" \ + " 1 8 1 0 0 0 0\n" \ + " 2 9 1 0 0 0 0\n" \ + " 2 10 1 0 0 0 0\n" \ + " 2 3 1 0 0 0 0\n" \ + " 3 4 1 0 0 0 0\n" \ + " 3 11 1 0 0 0 0\n" \ + " 3 17 1 0 0 0 0\n" \ + " 4 5 1 0 0 0 0\n" \ + " 4 12 1 0 0 0 0\n" \ + " 4 16 1 0 0 0 0\n" \ + " 5 6 1 0 0 0 0\n" \ + " 5 14 1 0 0 0 0\n" \ + " 5 13 1 0 0 0 0\n" \ + " 6 15 1 0 0 0 0\n" \ + "M END\n"; + + ChemicalReaction *rxn = RxnBlockToChemicalReaction(unmappedHs); + TEST_ASSERT(rxn); + rxn->initReactantMatchers(); + MOL_SPTR_VECT reacts1, hreacts1, reacts2, hreacts2; + std::vector prods; + + reacts1.push_back(ROMOL_SPTR(SmilesToMol("C1CCCCC1"))); + hreacts1.push_back(ROMOL_SPTR(MolOps::addHs(*reacts1[0].get()))); + + reacts2.push_back(ROMOL_SPTR(SmilesToMol("C1CCCCC1Cl"))); + hreacts2.push_back(ROMOL_SPTR(MolOps::addHs(*reacts2[0].get()))); + + // test with and without AddHs + prods = rxn->runReactants(reacts1); + TEST_ASSERT(prods.size() == 0); + prods = rxn->runReactants(hreacts1); + TEST_ASSERT(prods.size() == 768); + + prods = rxn->runReactants(reacts2); + TEST_ASSERT(prods.size() == 0); + + prods = rxn->runReactants(hreacts2); + TEST_ASSERT(prods.size() == 128); + + // Test after sanitization (way fewer matches than with AddHs..) + RxnOps::sanitizeRxn(*rxn); + prods = rxn->runReactants(reacts1); + TEST_ASSERT(prods.size() == 12); + + prods = rxn->runReactants(reacts2); + TEST_ASSERT(prods.size() == 4); + + + delete rxn; + + BOOST_LOG(rdInfoLog) << "Done" << std::endl; + +} +void test68MappedHToHeavy() { + const std::string rxnblock = \ + "$RXN\n" \ + "\n" \ + " Marvin 031701171005\n" \ + "\n" \ + " 1 1\n" \ + "$MOL\n" \ + "\n" \ + " Mrv1583 03171710052D \n" \ + "\n" \ + " 3 2 0 0 0 0 999 V2000\n" \ + " -1.2721 -0.0116 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \ + " -1.9866 -0.4241 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \ + " -1.2721 0.8134 0.0000 H 0 0 0 0 0 0 0 0 0 3 0 0\n" \ + " 2 1 1 0 0 0 0\n" \ + " 1 3 1 0 0 0 0\n" \ + "M END\n" \ + "$MOL\n" \ + "\n" \ + " Mrv1583 03171710052D \n" \ + "\n" \ + " 3 2 0 0 0 0 999 V2000\n" \ + " 2.3886 -0.0563 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0\n" \ + " 1.6741 -0.4688 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n" \ + " 2.3886 0.7688 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0\n" \ + " 2 1 1 0 0 0 0\n" \ + " 1 3 1 0 0 0 0\n" \ + "M END\n"; + + ChemicalReaction *rxn = RxnBlockToChemicalReaction(rxnblock); + TEST_ASSERT(rxn); + rxn->initReactantMatchers(); + MOL_SPTR_VECT reacts1, hreacts1, reacts2, hreacts2; + std::vector prods; + + reacts1.push_back(ROMOL_SPTR(SmilesToMol("CC"))); + hreacts1.push_back(ROMOL_SPTR(MolOps::addHs(*reacts1[0].get()))); + + // test with and without AddHs + prods = rxn->runReactants(reacts1); + TEST_ASSERT(prods.size() == 0); + + prods = rxn->runReactants(hreacts1); + TEST_ASSERT(prods.size() == 6); + + std::stringstream sstrm; + rdWarningLog->SetTee(sstrm); + RxnOps::sanitizeRxn(*rxn); + std::string s = sstrm.str(); + std::cerr << s << std::endl; + TEST_ASSERT(s.find("Reaction has explicit hydrogens, reactants will need explicit hydrogens (addHs)") != std::string::npos); + rdWarningLog->ClearTee(); + + delete rxn; +} + int main() { RDLog::InitLogs(); @@ -6062,6 +6467,11 @@ int main() { test43Github243(); test64Github1266(); + test65SanitizeUnmappedHs(); + test66SanitizeMappedHs(); + test67SanitizeMappedHsInReactantAndProd(); + test68MappedHToHeavy(); + BOOST_LOG(rdInfoLog) << "*******************************************************\n"; return (0); diff --git a/ReleaseNotes.md b/ReleaseNotes.md index b501c5607..93d6fecfe 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -345,7 +345,11 @@ Vianello, Maciek Wojcikowski ## Deprecated code (to be removed in a future release): - rdkit.VLib python module - + - SanitizeRxn parameters "ChemDrawRxnAdjustParams" has been renamed to + "MatchOnlyAtRgroupsAdjustParams". These settings did not reflect + how integrations with SciQuest or the Perkin Elmer ELN behaved and + were confusing to users (especially since they were not explicit) + ## Removed code: ## Contrib updates: