diff --git a/Code/GraphMol/ChemReactions/ReactionRunner.cpp b/Code/GraphMol/ChemReactions/ReactionRunner.cpp index 2946d14e0..63f507156 100644 --- a/Code/GraphMol/ChemReactions/ReactionRunner.cpp +++ b/Code/GraphMol/ChemReactions/ReactionRunner.cpp @@ -129,13 +129,12 @@ bool getReactantMatches(const MOL_SPTR_VECT &reactants, bool recurseOverReactantCombinations( const VectVectMatchVectType &matchesByReactant, VectVectMatchVectType &matchesPerProduct, unsigned int level, - VectMatchVectType combination, - unsigned int maxProducts) { + VectMatchVectType combination, unsigned int maxProducts) { unsigned int nReactants = matchesByReactant.size(); URANGE_CHECK(level, nReactants); PRECONDITION(combination.size() == nReactants, "bad combination size"); - if(maxProducts && matchesPerProduct.size() >= maxProducts) { + if (maxProducts && matchesPerProduct.size() >= maxProducts) { return false; } @@ -146,15 +145,15 @@ bool recurseOverReactantCombinations( prod[level] = *reactIt; if (level == nReactants - 1) { // this is the bottom of the recursion: - if(maxProducts && matchesPerProduct.size() >= maxProducts) { + if (maxProducts && matchesPerProduct.size() >= maxProducts) { keepGoing = false; break; } matchesPerProduct.push_back(prod); } else { - keepGoing = recurseOverReactantCombinations(matchesByReactant, matchesPerProduct, - level + 1, prod, maxProducts); + keepGoing = recurseOverReactantCombinations( + matchesByReactant, matchesPerProduct, level + 1, prod, maxProducts); } } return keepGoing; @@ -184,16 +183,15 @@ void updateImplicitAtomProperties(Atom *prodAtom, const Atom *reactAtom) { void generateReactantCombinations( const VectVectMatchVectType &matchesByReactant, - VectVectMatchVectType &matchesPerProduct, - unsigned int maxProducts) { + VectVectMatchVectType &matchesPerProduct, unsigned int maxProducts) { matchesPerProduct.clear(); VectMatchVectType tmp; tmp.clear(); tmp.resize(matchesByReactant.size()); - if (!recurseOverReactantCombinations(matchesByReactant, matchesPerProduct, 0, tmp, maxProducts)) { - BOOST_LOG(rdWarningLog) - << "Maximum product count hit " << maxProducts << ", stopping reaction early...\n"; - + if (!recurseOverReactantCombinations(matchesByReactant, matchesPerProduct, 0, + tmp, maxProducts)) { + BOOST_LOG(rdWarningLog) << "Maximum product count hit " << maxProducts + << ", stopping reaction early...\n"; } } // end of generateReactantCombinations() @@ -397,13 +395,13 @@ void checkProductChirality(Atom::ChiralType reactantChirality, switch (flagVal) { case 0: + // reaction doesn't have anything to say about the chirality // FIX: should we clear the chirality or leave it alone? for now we leave // it alone - // productAtom->setChiralTag(Atom::ChiralType::CHI_UNSPECIFIED); productAtom->setChiralTag(reactantChirality); break; case 1: - // inversion + // reaction inverts chirality if (reactantChirality != Atom::CHI_TETRAHEDRAL_CW && reactantChirality != Atom::CHI_TETRAHEDRAL_CCW) { BOOST_LOG(rdWarningLog) @@ -414,14 +412,17 @@ void checkProductChirality(Atom::ChiralType reactantChirality, } break; case 2: + // reaction retains chirality: // retention: just set to the reactant productAtom->setChiralTag(reactantChirality); break; case 3: + // reaction destroys chirality: // remove stereo productAtom->setChiralTag(Atom::CHI_UNSPECIFIED); break; case 4: + // reaction creates chirality. // set stereo, so leave it the way it was in the product template break; default: @@ -466,8 +467,7 @@ void setReactantAtomPropertiesToProduct(Atom *productAtom, // One might be tempted to copy over the reactant atom's chirality into the // product atom if chirality is not specified on the product. This would be a // very bad idea because the order of bonds will almost certainly change on - // the - // atom and the chirality is referenced to bond order. + // the atom and the chirality is referenced to bond order. // --------- --------- --------- --------- --------- --------- // While we're here, set the stereochemistry @@ -669,8 +669,7 @@ void checkAndCorrectChiralityOfMatchingAtomsInProduct( continue; } // we can only do something sensible here if we have the same number of - // bonds - // in the reactants and the products: + // bonds in the reactants and the products: if (reactantAtom.getDegree() != productAtom->getDegree()) { continue; } @@ -732,13 +731,14 @@ void checkAndCorrectChiralityOfMatchingAtomsInProduct( } } +// Check the chirality of atoms not directly involved in the reaction void checkAndCorrectChiralityOfProduct( const std::vector &chiralAtomsToCheck, RWMOL_SPTR product, ReactantProductAtomMapping *mapping) { for (auto reactantAtom : chiralAtomsToCheck) { CHECK_INVARIANT(reactantAtom->getChiralTag() != Atom::CHI_UNSPECIFIED, "missing atom chirality."); - unsigned int reactAtomDegree = + const auto reactAtomDegree = reactantAtom->getOwningMol().getAtomDegree(reactantAtom); for (unsigned i = 0; i < mapping->reactProdAtomMap[reactantAtom->getIdx()].size(); i++) { @@ -753,12 +753,9 @@ void checkAndCorrectChiralityOfProduct( // If the number of bonds to the atom has changed in the course of the // reaction we're lost, so remove chirality. // A word of explanation here: the atoms in the chiralAtomsToCheck set - // are - // not explicitly mapped atoms of the reaction, so we really have no - // idea what - // to do with this case. At the moment I'm not even really sure how - // this - // could happen, but better safe than sorry. + // are not explicitly mapped atoms of the reaction, so we really have + // no idea what to do with this case. At the moment I'm not even really + // sure how this could happen, but better safe than sorry. productAtom->setChiralTag(Atom::CHI_UNSPECIFIED); } else if (reactantAtom->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW || reactantAtom->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW) { @@ -794,6 +791,51 @@ void checkAndCorrectChiralityOfProduct( } // end of loop over chiralAtomsToCheck } +/// +// Copy enhanced stereo groups from one reactant to the product +// stereo groups are copied if any atoms are in the product with +// the stereochemical information from the reactant preserved. +void copyEnhancedStereoGroups(const ROMol &reactant, RWMOL_SPTR product, + const ReactantProductAtomMapping &mapping) { + std::vector new_stereo_groups; + for (const auto &sg : reactant.getStereoGroups()) { + std::vector atoms; + for (auto &&reactantAtom : sg.getAtoms()) { + auto productAtoms = mapping.reactProdAtomMap.find(reactantAtom->getIdx()); + if (productAtoms == mapping.reactProdAtomMap.end()) { + continue; + } + + for (auto &&productAtomIdx : productAtoms->second) { + auto productAtom = product->getAtomWithIdx(productAtomIdx); + + // If chirality destroyed by the reaction, skip the atom + if (productAtom->getChiralTag() == Atom::CHI_UNSPECIFIED) { + continue; + } + // If chirality defined explicitly by the reaction, skip the atom + int flagVal = 0; + productAtom->getPropIfPresent(common_properties::molInversionFlag, + flagVal); + if (flagVal == 4) { + continue; + } + atoms.push_back(productAtom); + } + } + if (!atoms.empty()) { + new_stereo_groups.emplace_back(sg.getGroupType(), std::move(atoms)); + } + } + + if (!new_stereo_groups.empty()) { + auto &existing_sg = product->getStereoGroups(); + new_stereo_groups.insert(new_stereo_groups.end(), existing_sg.begin(), + existing_sg.end()); + product->setStereoGroups(std::move(new_stereo_groups)); + } +} + void generateProductConformers(Conformer *productConf, const ROMol &reactant, ReactantProductAtomMapping *mapping) { if (!reactant.getNumConformers()) { @@ -880,10 +922,14 @@ void addReactantAtomsAndBonds(const ChemicalReaction &rxn, RWMOL_SPTR product, // ---------- ---------- ---------- ---------- ---------- ---------- // now we need to loop over atoms from the reactants that were chiral but not // directly involved in the reaction in order to make sure their chirality - // hasn't - // been disturbed + // hasn't been disturbed checkAndCorrectChiralityOfProduct(chiralAtomsToCheck, product, mapping); + // ---------- ---------- ---------- ---------- ---------- ---------- + // Copy enhanced StereoGroup data from reactant to product if it is + // still valid. Uses ChiralTag checks above. + copyEnhancedStereoGroups(*reactant, product, *mapping); + // ---------- ---------- ---------- ---------- ---------- ---------- // finally we may need to set the coordinates in the product conformer: if (productConf) { @@ -902,7 +948,7 @@ MOL_SPTR_VECT generateOneProductSet( // if any of the reactants have a conformer, we'll go ahead and // generate conformers for the products: bool doConfs = false; - BOOST_FOREACH (ROMOL_SPTR reactant, reactants) { + for (auto &reactant : reactants) { if (reactant->getNumConformers()) { doConfs = true; break; @@ -975,9 +1021,8 @@ std::vector run_Reactants(const ChemicalReaction &rxn, // we now have matches for each reactant, so we can start creating products: // start by doing the combinatorics on the matches: VectVectMatchVectType reactantMatchesPerProduct; - ReactionRunnerUtils::generateReactantCombinations(matchesByReactant, - reactantMatchesPerProduct, - maxProducts); + ReactionRunnerUtils::generateReactantCombinations( + matchesByReactant, reactantMatchesPerProduct, maxProducts); productMols.resize(reactantMatchesPerProduct.size()); for (unsigned int productId = 0; productId != productMols.size(); diff --git a/Code/GraphMol/ChemReactions/Wrap/testReactionWrapper.py b/Code/GraphMol/ChemReactions/Wrap/testReactionWrapper.py index 13afaea7c..0692e566c 100644 --- a/Code/GraphMol/ChemReactions/Wrap/testReactionWrapper.py +++ b/Code/GraphMol/ChemReactions/Wrap/testReactionWrapper.py @@ -717,5 +717,183 @@ M END self.assertEqual(len(prods), 1) +def _getProductCXSMILES(product): + """ + Clear properties. + + Mapping properties show up in CXSMILES make validation less readable. + """ + for a in product.GetAtoms(): + for k in a.GetPropsAsDict(): + a.ClearProp(k) + return Chem.MolToCXSmiles(product) + + +def _reactAndSummarize(rxn_smarts, *smiles): + """ + Run a reaction and combine the products in a single string. + + Makes errors readable ish + """ + rxn = rdChemReactions.ReactionFromSmarts(rxn_smarts) + mols = [Chem.MolFromSmiles(s) for s in smiles] + products = [] + for prods in rxn.RunReactants(mols): + products.append(' + '.join(map(_getProductCXSMILES, prods))) + products = ' OR '.join(products) + return products + + +class StereoGroupTests(unittest.TestCase): + + def test_reaction_preserves_stereo(self): + """ + StereoGroup atoms are in the reaction, but the reaction doesn't affect + the chirality at the stereo centers + -> preserve stereo group + """ + reaction = '[C@:1]>>[C@:1]' + reactants = ['F[C@H](Cl)Br |o1:1|', 'F[C@@H](Cl)Br |&1:1|', 'FC(Cl)Br'] + for reactant in reactants: + products = _reactAndSummarize(reaction, reactant) + self.assertEqual(products, reactant) + + def test_reaction_ignores_stereo(self): + """ + StereoGroup atoms are in the reaction, but the reaction doesn't specify the + chirality at the stereo centers + -> preserve stereo group + """ + reaction = '[C:1]>>[C:1]' + reactants = ['F[C@H](Cl)Br |o1:1|', 'F[C@@H](Cl)Br |&1:1|', 'FC(Cl)Br'] + for reactant in reactants: + products = _reactAndSummarize(reaction, reactant) + self.assertEqual(products, reactant) + + def test_reaction_inverts_stereo(self): + """ + StereoGroup atoms are in the reaction, and the reaction inverts the specified + chirality at the stereo centers. + -> preserve stereo group + """ + reaction = '[C@:1]>>[C@@:1]' + + products = _reactAndSummarize(reaction, 'F[C@H](Cl)Br |o1:1|') + self.assertEqual(products, 'F[C@@H](Cl)Br |o1:1|') + products = _reactAndSummarize(reaction, 'F[C@@H](Cl)Br |&1:1|') + self.assertEqual(products, 'F[C@H](Cl)Br |&1:1|') + products = _reactAndSummarize(reaction, 'FC(Cl)Br') + self.assertEqual(products, 'FC(Cl)Br') + + def test_reaction_destroys_stereo(self): + """ + StereoGroup atoms are in the reaction, and the reaction destroys the specified + chirality at the stereo centers + -> invalidate stereo center, preserve the rest of the stereo group. + """ + reaction = '[C@:1]>>[C:1]' + products = _reactAndSummarize(reaction, 'F[C@H](Cl)Br |o1:1|') + self.assertEqual(products, 'FC(Cl)Br') + products = _reactAndSummarize(reaction, 'F[C@@H](Cl)Br |&1:1|') + self.assertEqual(products, 'FC(Cl)Br') + products = _reactAndSummarize(reaction, 'FC(Cl)Br') + self.assertEqual(products, 'FC(Cl)Br') + + reaction = '[C@:1]F>>[C:1]F' + # Reaction destroys stereo (but preserves unaffected group + products = _reactAndSummarize(reaction, + 'F[C@H](Cl)[C@@H](Cl)Br |o1:1,&2:3|') + self.assertEqual(products, 'FC(Cl)[C@@H](Cl)Br |&1:3|') + # Reaction destroys stereo (but preserves the rest of the group + products = _reactAndSummarize(reaction, 'F[C@H](Cl)[C@@H](Cl)Br |&1:1,3|') + self.assertEqual(products, 'FC(Cl)[C@@H](Cl)Br |&1:3|') + + def test_reaction_defines_stereo(self): + """ + StereoGroup atoms are in the reaction, and the reaction creates the specified + chirality at the stereo centers + -> remove the stereo center from + -> invalidate stereo group + """ + products = _reactAndSummarize('[C:1]>>[C@@:1]', 'F[C@H](Cl)Br |o1:1|') + self.assertEqual(products, 'F[C@@H](Cl)Br') + products = _reactAndSummarize('[C:1]>>[C@@:1]', 'F[C@@H](Cl)Br |&1:1|') + self.assertEqual(products, 'F[C@@H](Cl)Br') + products = _reactAndSummarize('[C:1]>>[C@@:1]', 'FC(Cl)Br') + self.assertEqual(products, 'F[C@@H](Cl)Br') + + # Remove group with defined stereo + products = _reactAndSummarize('[C:1]F>>[C@@:1]F', + 'F[C@H](Cl)[C@@H](Cl)Br |o1:1,&2:3|') + self.assertEqual(products, 'F[C@@H](Cl)[C@@H](Cl)Br |&1:3|') + + # Remove atoms with defined stereo from group + products = _reactAndSummarize('[C:1]F>>[C@@:1]F', + 'F[C@H](Cl)[C@@H](Cl)Br |o1:1,3|') + self.assertEqual(products, 'F[C@@H](Cl)[C@@H](Cl)Br |o1:3|') + + def test_stereogroup_is_spectator_to_reaction(self): + """ + StereoGroup atoms are not in the reaction + -> stereo group is unaffected + """ + # 5a. Reaction preserves unrelated stereo + products = _reactAndSummarize('[C@:1]F>>[C@:1]F', + 'F[C@H](Cl)[C@@H](Cl)Br |o1:3|') + self.assertEqual(products, 'F[C@H](Cl)[C@@H](Cl)Br |o1:3|') + # 5b. Reaction ignores unrelated stereo' + products = _reactAndSummarize('[C:1]F>>[C:1]F', + 'F[C@H](Cl)[C@@H](Cl)Br |o1:3|') + self.assertEqual(products, 'F[C@H](Cl)[C@@H](Cl)Br |o1:3|') + # 5c. Reaction inverts unrelated stereo' + products = _reactAndSummarize('[C@:1]F>>[C@@:1]F', + 'F[C@H](Cl)[C@@H](Cl)Br |o1:3|') + self.assertEqual(products, 'F[C@@H](Cl)[C@@H](Cl)Br |o1:3|') + # 5d. Reaction destroys unrelated stereo' 1:3| + products = _reactAndSummarize('[C@:1]F>>[C:1]F', + 'F[C@H](Cl)[C@@H](Cl)Br |o1:3|') + self.assertEqual(products, 'FC(Cl)[C@@H](Cl)Br |o1:3|') + # 5e. Reaction assigns unrelated stereo' + products = _reactAndSummarize('[C:1]F>>[C@@:1]F', + 'F[C@H](Cl)[C@@H](Cl)Br |o1:3|') + self.assertEqual(products, 'F[C@@H](Cl)[C@@H](Cl)Br |o1:3|') + + def test_reaction_splits_stereogroup(self): + """ + StereoGroup atoms are split into two products by the reaction + -> Should the group be invalidated or trimmed? + """ + products = _reactAndSummarize('[C:1]OO[C:2]>>[C:2]O.O[C:1]', + 'F[C@H](Cl)OO[C@@H](Cl)Br |o1:1,5|') + # Two product sets, each with two mols: + self.assertEqual(products.count('|o1:1|'), 4) + + def test_reaction_copies_stereogroup(self): + """ + If multiple copies of an atom in StereoGroup show up in the product, they + should all be part of the same product StereoGroup. + """ + # Stereogroup atoms are in the reaction with multiple copies in the product + products = _reactAndSummarize('[O:1].[C:2]=O>>[O:1][C:2][O:1]', + 'Cl[C@@H](Br)C[C@H](Br)CCO |&1:1,4|', + 'CC(=O)C') + # stereogroup manually checked, product SMILES assumed correct. + self.assertEqual( + products, + 'CC(C)(OCC[C@@H](Br)C[C@@H](Cl)Br)OCC[C@@H](Br)C[C@@H](Cl)Br |&1:6,9,15,18|' + ) + + # Stereogroup atoms are not in the reaction, but have multiple copies in the + # product. + products = _reactAndSummarize('[O:1].[C:2]=O>>[O:1][C:2][O:1]', + 'Cl[C@@H](Br)C[C@H](Br)CCO |&1:1,4|', + 'CC(=O)C') + # stereogroup manually checked, product SMILES assumed correct. + self.assertEqual( + products, + 'CC(C)(OCC[C@@H](Br)C[C@@H](Cl)Br)OCC[C@@H](Br)C[C@@H](Cl)Br |&1:6,9,15,18|' + ) + + if __name__ == '__main__': unittest.main(verbosity=True) diff --git a/Code/GraphMol/ChemReactions/catch_tests.cpp b/Code/GraphMol/ChemReactions/catch_tests.cpp index 10db40155..d2de6575b 100644 --- a/Code/GraphMol/ChemReactions/catch_tests.cpp +++ b/Code/GraphMol/ChemReactions/catch_tests.cpp @@ -22,6 +22,7 @@ #include using namespace RDKit; +using std::unique_ptr; TEST_CASE("Github #1632", "[Reaction,PDB,bug]") { SECTION("basics") { @@ -30,7 +31,7 @@ TEST_CASE("Github #1632", "[Reaction,PDB,bug]") { std::unique_ptr mol(SequenceToMol("K", sanitize, flavor)); REQUIRE(mol); REQUIRE(mol->getAtomWithIdx(0)->getMonomerInfo()); - auto res = static_cast( + auto res = static_cast( mol->getAtomWithIdx(0)->getMonomerInfo()); CHECK(res->getResidueNumber() == 1); std::unique_ptr rxn(RxnSmartsToChemicalReaction( @@ -45,9 +46,95 @@ TEST_CASE("Github #1632", "[Reaction,PDB,bug]") { auto p = prods[0][0]; CHECK(p->getNumAtoms() == mol->getNumAtoms() + 1); REQUIRE(p->getAtomWithIdx(0)->getMonomerInfo()); - auto pres = static_cast( + auto pres = static_cast( p->getAtomWithIdx(0)->getMonomerInfo()); CHECK(pres->getResidueNumber() == 1); REQUIRE(!p->getAtomWithIdx(4)->getMonomerInfo()); } } + +static void clearAtomMappingProps(ROMol& mol) { + for (auto&& a : mol.atoms()) { + a->clear(); + } +} + +TEST_CASE("Github #2366 Enhanced Stereo", "[Reaction,StereoGroup,bug]") { + SECTION("Reaction Preserves Stereo") { + ROMOL_SPTR mol("F[C@H](Cl)Br |o1:1|"_smiles); + REQUIRE(mol); + unique_ptr rxn( + RxnSmartsToChemicalReaction("[C@:1]>>[C@:1]")); + REQUIRE(rxn); + + MOL_SPTR_VECT reactants = {mol}; + + rxn->initReactantMatchers(); + auto prods = rxn->runReactants(reactants); + REQUIRE(prods.size() == 1); + REQUIRE(prods[0].size() == 1); + auto p = prods[0][0]; + + clearAtomMappingProps(*p); + CHECK(MolToCXSmiles(*p) == "F[C@H](Cl)Br |o1:1|"); + } + SECTION("Reaction destroys one center in StereoGroup") { + ROMOL_SPTR mol("F[C@H](Cl)[C@@H](Cl)Br |&1:1,3|"_smiles); + REQUIRE(mol); + unique_ptr rxn( + RxnSmartsToChemicalReaction("[C@:1]F>>[C:1]F")); + REQUIRE(rxn); + + MOL_SPTR_VECT reactants = {mol}; + + rxn->initReactantMatchers(); + auto prods = rxn->runReactants(reactants); + REQUIRE(prods.size() == 1); + REQUIRE(prods[0].size() == 1); + auto p = prods[0][0]; + + clearAtomMappingProps(*p); + CHECK(MolToCXSmiles(*p) == "FC(Cl)[C@@H](Cl)Br |&1:3|"); + } + SECTION("Reaction splits StereoGroup") { + ROMOL_SPTR mol("F[C@H](Cl)[C@@H](Cl)Br |&1:1,3|"_smiles); + REQUIRE(mol); + unique_ptr rxn(RxnSmartsToChemicalReaction( + "[F:1][C@:2][C@:3][Cl:4]>>[F:1][C@:2]O.O[C@:3][Cl:4]")); + REQUIRE(rxn); + + MOL_SPTR_VECT reactants = {mol}; + + rxn->initReactantMatchers(); + auto prods = rxn->runReactants(reactants); + REQUIRE(prods.size() == 1); + REQUIRE(prods[0].size() == 2); + auto p0 = prods[0][0]; + auto p1 = prods[0][1]; + + clearAtomMappingProps(*p0); + clearAtomMappingProps(*p1); + CHECK(MolToCXSmiles(*p0) == "O[C@@H](F)Cl |&1:1|"); + CHECK(MolToCXSmiles(*p1) == "O[C@@H](Cl)Br |&1:1|"); + } + SECTION("Reaction combines StereoGroups") { + ROMOL_SPTR mol1("F[C@H](Cl)O |&1:1|"_smiles); + REQUIRE(mol1); + ROMOL_SPTR mol2("Cl[C@H](Br)O |&1:1|"_smiles); + REQUIRE(mol2); + unique_ptr rxn(RxnSmartsToChemicalReaction( + "[F:1][C@:2]O.O[C@:3][Cl:4]>>[F:1][C@:2][C@:3][Cl:4]")); + REQUIRE(rxn); + + MOL_SPTR_VECT reactants = {mol1, mol2}; + + rxn->initReactantMatchers(); + auto prods = rxn->runReactants(reactants); + REQUIRE(prods.size() == 1); + REQUIRE(prods[0].size() == 1); + auto p0 = prods[0][0]; + + clearAtomMappingProps(*p0); + CHECK(MolToCXSmiles(*p0) == "F[C@@H](Cl)[C@H](Cl)Br |&1:1,&2:3|"); + } +} diff --git a/Docs/Book/RDKit_Book.rst b/Docs/Book/RDKit_Book.rst index 3dc0e39b2..d1b094d3f 100644 --- a/Docs/Book/RDKit_Book.rst +++ b/Docs/Book/RDKit_Book.rst @@ -419,7 +419,8 @@ defition is handled. A consistent example, esterification of secondary alcohols, is used throughout [#chiralRxn]_. If no chiral information is present in the reaction definition, the -stereochemistry of the reactants is preserved: +stereochemistry of the reactants is preserved, as is membership in +enhanced stereo groups: .. doctest:: @@ -1223,21 +1224,37 @@ and the set of atoms that make it up. Use cases ========= -The initial target is to not lose data on an ``V3k mol -> RDKit -> V3k mol`` round trip. Manipulation, -depiction, and searching is a secondary goal. +The initial target is to not lose data on an ``V3k mol -> RDKit -> V3k mol`` round trip. Manipulation, depiction, and searching are future goals. -It is currently possible to enumerate the elements of a ``StereoGroup`` using the function py:func:`rdkit.Chem.EnumerateStereoisomers.EumerateStereoisomers`. +It is possible to enumerate the elements of a ``StereoGroup`` using the function :py:func:`rdkit.Chem.EnumerateStereoisomers.EumerateStereoisomers`, which also +preserves membership in the original ``StereoGroup``. .. doctest :: - >>> m = Chem.MolFromSmiles('C[C@H](F)C[C@H](O)Cl |&1:1|') - >>> m.GetStereoGroups()[0].GetGroupType() + >>> m = Chem.MolFromSmiles('C[C@H](F)C[C@H](O)Cl |&1:1|') + >>> m.GetStereoGroups()[0].GetGroupType() rdkit.Chem.rdchem.StereoGroupType.STEREO_AND - >>> [x.GetIdx() for x in m.GetStereoGroups()[0].GetAtoms()] + >>> [x.GetIdx() for x in m.GetStereoGroups()[0].GetAtoms()] [1] - >>> from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers - >>> [Chem.MolToSmiles(x) for x in EnumerateStereoisomers(m)] - ['C[C@@H](F)C[C@H](O)Cl', 'C[C@H](F)C[C@H](O)Cl'] + >>> from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers + >>> [Chem.MolToCXSmiles(x) for x in EnumerateStereoisomers(m)] + ['C[C@@H](F)C[C@H](O)Cl |&1:1|', 'C[C@H](F)C[C@H](O)Cl |&1:1|'] + +Reactions also preserve ``StereoGroup``s. Product atoms are included in the ``StereoGroup`` as long as the reaction doesn't create or destroy chirality at the atom. + +.. doctest :: + + >>> def clearAllAtomProps(mol): + ... """So that atom mapping isn't shown""" + ... for atom in mol.GetAtoms(): + ... for key in atom.GetPropsAsDict(): + ... atom.ClearProp(key) + ... + >>> rxn = AllChem.ReactionFromSmarts('[C:1]F >> [C:1]Br') + >>> ps=rxn.RunReactants([m]) + >>> clearAllAtomProps(ps[0][0]) + >>> Chem.MolToCXSmiles(ps[0][0]) + 'C[C@H](Br)C[C@H](O)Cl |&1:1|' .. rubric:: Footnotes diff --git a/Docs/Code/EnhancedStereo.md b/Docs/Code/EnhancedStereo.md index 18a97ac7f..6ad3e9de0 100644 --- a/Docs/Code/EnhancedStereo.md +++ b/Docs/Code/EnhancedStereo.md @@ -32,17 +32,24 @@ depiction, and searching is a secondary goal. Stored as a vector of `StereoGroup` objects. -A `StereoGroup` contains an enum with the type as well as pointers to the atoms involved. We will need to adjust this when atoms are removed or replaced. `StereoGroup`s are not exposed to Python, as the implementation is still tentative. +A `StereoGroup` contains an enum with the type as well as pointers to the atoms involved. We will need to adjust this when atoms are removed or replaced. ## Enumeration -The existing stereoisomer enumeration code needs to be updated to handle enhanced stereo groups correctly. This is the key piece for canonicalization and substructure searching. +The Python-only function `rdkit.Chem.EnumerateStereoisomers.EnumerateStereoisomers` can be used to enumerate +all stereoisomers represented by the StereoGroups. It is possible to enumerate _only_ the Stereo Groups, +ignoring other unspecified centers. -We probably need to add an option to allow enumeration only of stereo groups (to ignore unspecified centers). +## Reactions + +The reaction runner is aware of stereo groups associated with a Mol. Atoms in a Stereo Group are copied to +all products in which they appear, unless the reaction created or destroyed stereochemistry at that atom. +If an atom from a reactant StereoGroup appears multiple times in the product, all copies of that atom are +put in the same product StereoGroup. ## Searching -This will be handled by searching in MolBundle objects produced by the enumeration code. +This will be handled by searching in MolBundle objects produced by the enumeration code. Substructure searching and canonicalization do not yet support stereo groups. ## Depiction diff --git a/enhanced_stereo_in_reactions_demo.py b/enhanced_stereo_in_reactions_demo.py new file mode 100644 index 000000000..d82bc62ac --- /dev/null +++ b/enhanced_stereo_in_reactions_demo.py @@ -0,0 +1,83 @@ +from rdkit import Chem +from rdkit.Chem import AllChem + +def get_product_cxsmiles(product): + # Clear properties. Mapping properties show up in CXSMILES + for a in product.GetAtoms(): + for k in a.GetPropsAsDict(): + a.ClearProp(k) + return Chem.MolToCXSmiles(product) + +def print_rxn(rxn_smarts, smiles, comment): + print(f"\n{comment}:\n {rxn_smarts}") + print(' ', ' + '.join(smiles)) + rxn = AllChem.ReactionFromSmarts(rxn_smarts) + mols = [Chem.MolFromSmiles(s) for s in smiles] + for prods in rxn.RunReactants(mols): + print(' >>') + print(' ', ' + '.join(map(get_product_cxsmiles, prods))) + + +# StereoGroup atoms are in the reaction, but the reaction doesn't affect +# the chirality at the stereo centers +# -> preserve stereo group +print_rxn('[C@:1]>>[C@:1]', ['F[C@H](Cl)Br |o1:1|'], '0a. Reaction preserves stereo') +print_rxn('[C@:1]>>[C@:1]', ['F[C@@H](Cl)Br |&1:1|'], '0b. Reaction preserves stereo') +print_rxn('[C@:1]>>[C@:1]', ['FC(Cl)Br'], '0c. Reaction preserves stereo') + +# StereoGroup atoms are in the reaction, but the reaction doesn't specify the +# chirality at the stereo centers +# -> preserve stereo group +print_rxn('[C:1]>>[C:1]', ['F[C@H](Cl)Br |a:1|'], '1a. Reaction ignores stereo') +print_rxn('[C:1]>>[C:1]', ['F[C@@H](Cl)Br |&1:1|'], '1b. Reaction ignores stereo') +print_rxn('[C:1]>>[C:1]', ['FC(Cl)Br'], '1c. Reaction ignores stereo') + +# StereoGroup atoms are in the reaction, and the reaction inverts the specified +# chirality at the stereo centers +# -> preserve stereo group +print_rxn('[C@:1]>>[C@@:1]', ['F[C@H](Cl)Br |o1:1|'], '2a. Reaction inverts stereo') +print_rxn('[C@:1]>>[C@@:1]', ['F[C@@H](Cl)Br |&1:1|'], '2b. Reaction inverts stereo') +print_rxn('[C@:1]>>[C@@:1]', ['FC(Cl)Br'], '2c. Reaction inverts stereo') + +# StereoGroup atoms are in the reaction, and the reaction destroys the specified +# chirality at the stereo centers +# -> invalidate stereo group +print_rxn('[C@:1]>>[C:1]', ['F[C@H](Cl)Br |o1:1|'], '3a. Reaction destroys stereo') +print_rxn('[C@:1]>>[C:1]', ['F[C@@H](Cl)Br |&1:1|'], '3b. Reaction destroys stereo') +print_rxn('[C@:1]>>[C:1]', ['FC(Cl)Br'], '3c. Reaction destroys stereo') +print_rxn('[C@:1]F>>[C:1]F', ['F[C@H](Cl)[C@@H](Cl)Br |o1:1,&2:3|'], '3d. Reaction destroys stereo (but preserves unaffected group)' ) +#### -> Should the group be invalidated or trimmed? +print_rxn('[C@:1]F>>[C:1]F', ['F[C@H](Cl)[C@@H](Cl)Br |&1:1,3|'], '3e. Reaction destroys stereo' ) + +# StereoGroup atoms are in the reaction, and the reaction creates the specified +# chirality at the stereo centers +# -> invalidate stereo group +print_rxn('[C:1]>>[C@@:1]', ['F[C@H](Cl)Br |o1:1|'], '4a. Reaction creates stereo') +print_rxn('[C:1]>>[C@@:1]', ['F[C@@H](Cl)Br |&1:1|'], '4b. Reaction creates stereo') +print_rxn('[C:1]>>[C@@:1]', ['FC(Cl)Br'], '4c. Reaction creates stereo') +print_rxn('[C:1]F>>[C@@:1]F', ['F[C@H](Cl)[C@@H](Cl)Br |o1:1,&2:3|'], '4d. Reaction creates stereo (preserve unaffected group)' ) +#### -> Should the group be invalidated or trimmed? +print_rxn('[C:1]F>>[C@@:1]F', ['F[C@H](Cl)[C@@H](Cl)Br |o1:1,3|'], '4e. Reaction creates stereo' ) + + +# StereoGroup atoms are not in the reaction +# -> preserve stereo group +print_rxn('[C@:1]F>>[C@:1]F', ['F[C@H](Cl)[C@@H](Cl)Br |o1:3|'], '5a. Reaction preserves unrelated stereo' ) +print_rxn('[C:1]F>>[C:1]F', ['F[C@H](Cl)[C@@H](Cl)Br |o1:3|'], '5b. Reaction ignores unrelated stereo' ) +print_rxn('[C@:1]F>>[C@@:1]F', ['F[C@H](Cl)[C@@H](Cl)Br |o1:3|'], '5c. Reaction inverts unrelated stereo' ) +print_rxn('[C@:1]F>>[C:1]F', ['F[C@H](Cl)[C@@H](Cl)Br |o1:3|'], '5d. Reaction destroys unrelated stereo' ) +print_rxn('[C:1]F>>[C@@:1]F', ['F[C@H](Cl)[C@@H](Cl)Br |o1:3|'], '5e. Reaction creates unrelated stereo' ) + +# StereoGroup atoms are split into two products by the reaction +#### -> Should the group be invalidated or trimmed? +print_rxn('[C:1]OO[C:2]>>[C:2]O.O[C:1]', ['F[C@H](Cl)OO[C@@H](Cl)Br |o1:1,5|'], '6e. Reaction splits StereoGroup atoms into two Mols' ) + +# Stereogroup atoms are in the reaction with multiple copies in the product +# -> All copies should be in the same product stereo group +print_rxn('[O:1].[C:2]=O>>[O:1][C:2][O:1]', ['Cl[C@@H](Br)C[C@H](Br)CCO |&1:1,4|', 'CC(=O)C'], '7. Add two copies') + +# Stereogroup atoms are not in the reaction, but have multiple copies in the +# product. +# -> All copies should be in the same product stereo group +print_rxn('[O:1].[C:2]=O>>[O:1][C:2][O:1]', ['Cl[C@@H](Br)C[C@H](Br)CCO |&1:1,4|', 'CC(=O)C'], '8. Add two copies') +