diff --git a/Code/GraphMol/Substruct/SubstructMatch.cpp b/Code/GraphMol/Substruct/SubstructMatch.cpp index 17911b0da..4dfb94e6a 100644 --- a/Code/GraphMol/Substruct/SubstructMatch.cpp +++ b/Code/GraphMol/Substruct/SubstructMatch.cpp @@ -33,6 +33,8 @@ #include "vf2.hpp" +using boost::make_iterator_range; + namespace RDKit { namespace detail { @@ -42,6 +44,74 @@ bool hasChiralLabel(const Atom *at) { return at->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW || at->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW; } + +bool enhancedStereoIsOK(const ROMol& mol, const ROMol& query, + std::unordered_map& q_to_mol, + const std::unordered_map& molStereoGroups, + const std::unordered_map& matches) +{ + std::unordered_map molAtomsToQueryGroups; + + // If the query has stereo groups: + // * OR only matches AND or OR (not absolute) + // * AND only matches OR + for (auto&& sg: query.getStereoGroups()) { + if (sg.getGroupType() == StereoGroupType::STEREO_ABSOLUTE) { + continue; + } + // StereoGroup const* matched_mol_group = nullptr; + const bool is_and = sg.getGroupType() == StereoGroupType::STEREO_AND; + for (auto&& a: sg.getAtoms()) { + auto mol_group = molStereoGroups.find(q_to_mol[a->getIdx()]); + if (mol_group == molStereoGroups.end()) { + // group matching absolute. not ok. + return false; + } else if (is_and && mol_group->second->getGroupType() != StereoGroupType::STEREO_AND) { + // AND matching OR. not ok. + return false; + } + + molAtomsToQueryGroups[q_to_mol[a->getIdx()]] = &sg; + } + } + + // If the mol has stereo groups: + // * All atoms must either be the same or opposite, you can't mix + // * Only one stereogroup must cover all matched atoms in the mol stereo group + for (auto&& sg: mol.getStereoGroups()) { + if (sg.getGroupType() == StereoGroupType::STEREO_ABSOLUTE) { + continue; + } + bool doesMatch; + bool seen = false; + StereoGroup const* QGroup = nullptr; + + for (auto&& a: sg.getAtoms()) { + auto thisDoesMatch = matches.find(a->getIdx()); + if (thisDoesMatch == matches.end()) { + // not matched + continue; + } + + auto pos = molAtomsToQueryGroups.find(a->getIdx()); + auto thisQGroup = pos == molAtomsToQueryGroups.end() ? nullptr : pos->second; + if (!seen) { + doesMatch = thisDoesMatch->second; + QGroup = thisQGroup; + seen = true; + } else if (doesMatch != thisDoesMatch->second) { + // diastereomer. not ok. + return false; + } else if (thisQGroup != QGroup) { + // mix of groups in query. not ok. + return false; + } + } + } + + return true; +} + } // namespace typedef std::map SUBQUERY_MAP; @@ -76,14 +146,26 @@ class MolMatchFinalCheckFunctor { public: MolMatchFinalCheckFunctor(const ROMol &query, const ROMol &mol, const SubstructMatchParameters &ps) - : d_query(query), d_mol(mol), d_params(ps){}; - bool operator()(const boost::detail::node_id c1[], - const boost::detail::node_id c2[]) const { + : d_query(query), d_mol(mol), d_params(ps) { + if (d_params.useEnhancedStereo) { + for (const auto& sg: d_mol.getStereoGroups()) { + if (sg.getGroupType() == StereoGroupType::STEREO_ABSOLUTE) { + continue; + } + for (const auto a: sg.getAtoms()) { + d_molStereoGroups[a->getIdx()] = &sg; + } + } + } + } + + bool operator()(const boost::detail::node_id q_c[], + const boost::detail::node_id m_c[]) const { if (d_params.extraFinalCheck) { // EFF: we can no-doubt do better than this - std::vector aids(d_query.getNumAtoms()); + std::vector aids(m_c, m_c + d_query.getNumAtoms()); for (unsigned int i = 0; i < d_query.getNumAtoms(); ++i) { - aids[i] = c2[i]; + aids[i] = m_c[i]; } if (!d_params.extraFinalCheck(d_mol, aids)) { return false; @@ -93,16 +175,18 @@ class MolMatchFinalCheckFunctor { return true; } + std::unordered_map matches; + // check chiral atoms: for (unsigned int i = 0; i < d_query.getNumAtoms(); ++i) { - const Atom *qAt = d_query.getAtomWithIdx(c1[i]); + const Atom *qAt = d_query.getAtomWithIdx(q_c[i]); // With less than 3 neighbors we can't establish CW/CCW parity, // so query will be a match if it has any kind of chirality. if (qAt->getDegree() < 3 || !hasChiralLabel(qAt)) { continue; } - const Atom *mAt = d_mol.getAtomWithIdx(c2[i]); + const Atom *mAt = d_mol.getAtomWithIdx(m_c[i]); if (!hasChiralLabel(mAt)) { return false; } @@ -110,11 +194,12 @@ class MolMatchFinalCheckFunctor { return false; } + INT_LIST qOrder; INT_LIST mOrder; for (unsigned int j = 0; j < d_query.getNumAtoms(); ++j) { - const Bond *qB = d_query.getBondBetweenAtoms(c1[i], c1[j]); - const Bond *mB = d_mol.getBondBetweenAtoms(c2[i], c2[j]); + const Bond *qB = d_query.getBondBetweenAtoms(q_c[i], q_c[j]); + const Bond *mB = d_mol.getBondBetweenAtoms(m_c[i], m_c[j]); if (qB && mB) { mOrder.push_back(mB->getIdx()); qOrder.push_back(qB->getIdx()); @@ -131,32 +216,46 @@ class MolMatchFinalCheckFunctor { mOrder.insert(mOrder.end(), unmatchedNeighbors, -1); INT_LIST moOrder; - ROMol::OEDGE_ITER dbeg, dend; - boost::tie(dbeg, dend) = d_mol.getAtomBonds(mAt); - while (dbeg != dend) { - int dbidx = d_mol[*dbeg]->getIdx(); + for (const auto &bond : make_iterator_range(d_mol.getAtomBonds(mAt))) { + int dbidx = d_mol[bond]->getIdx(); if (std::find(mOrder.begin(), mOrder.end(), dbidx) != mOrder.end()) { moOrder.push_back(dbidx); } else { moOrder.push_back(-1); } - ++dbeg; } int mPermCount = static_cast(countSwapsToInterconvert(moOrder, mOrder)); - bool requireMatch = qPermCount % 2 == mPermCount % 2; - bool labelsMatch = qAt->getChiralTag() == mAt->getChiralTag(); + const bool requireMatch = qPermCount % 2 == mPermCount % 2; + const bool labelsMatch = qAt->getChiralTag() == mAt->getChiralTag(); + const bool matchOK = requireMatch == labelsMatch; - if (requireMatch != labelsMatch) { + // if this is not part of a stereogroup and doesn't match, return false + auto msg = d_molStereoGroups.find(m_c[i]); + if (msg == d_molStereoGroups.end()) { + if (!matchOK) { + return false; + } + } else { + matches[m_c[i]] = matchOK; + } + } + + std::unordered_map q_to_mol; + for (unsigned int j = 0; j < d_query.getNumAtoms(); ++j) { + q_to_mol[q_c[j]] = m_c[j]; + } + + if (d_params.useEnhancedStereo) { + if (!enhancedStereoIsOK(d_mol, d_query, q_to_mol, d_molStereoGroups, matches)) { return false; } } // now check double bonds - for (unsigned int i = 0; i < d_query.getNumBonds(); ++i) { - const Bond *qBnd = d_query.getBondWithIdx(i); + for (const auto& qBnd: d_query.bonds()) { if (qBnd->getBondType() != Bond::DOUBLE || qBnd->getStereo() <= Bond::STEREOANY) { continue; @@ -167,12 +266,8 @@ class MolMatchFinalCheckFunctor { continue; } - std::map qMap; - for (unsigned int j = 0; j < d_query.getNumAtoms(); ++j) { - qMap[c1[j]] = j; - } const Bond *mBnd = d_mol.getBondBetweenAtoms( - c2[qMap[qBnd->getBeginAtomIdx()]], c2[qMap[qBnd->getEndAtomIdx()]]); + q_to_mol[qBnd->getBeginAtomIdx()], q_to_mol[qBnd->getEndAtomIdx()]); CHECK_INVARIANT(mBnd, "Matching bond not found"); if (mBnd->getBondType() != Bond::DOUBLE || qBnd->getStereo() <= Bond::STEREOANY) { @@ -185,20 +280,20 @@ class MolMatchFinalCheckFunctor { unsigned int end1Matches = 0; unsigned int end2Matches = 0; - if (c2[qMap[qBnd->getBeginAtomIdx()]] == mBnd->getBeginAtomIdx()) { + if (q_to_mol[qBnd->getBeginAtomIdx()] == mBnd->getBeginAtomIdx()) { // query Begin == mol Begin - if (c2[qMap[qBnd->getStereoAtoms()[0]]] == mBnd->getStereoAtoms()[0]) { + if (q_to_mol[qBnd->getStereoAtoms()[0]] == mBnd->getStereoAtoms()[0]) { end1Matches = 1; } - if (c2[qMap[qBnd->getStereoAtoms()[1]]] == mBnd->getStereoAtoms()[1]) { + if (q_to_mol[qBnd->getStereoAtoms()[1]] == mBnd->getStereoAtoms()[1]) { end2Matches = 1; } } else { // query End == mol Begin - if (c2[qMap[qBnd->getStereoAtoms()[0]]] == mBnd->getStereoAtoms()[1]) { + if (q_to_mol[qBnd->getStereoAtoms()[0]] == mBnd->getStereoAtoms()[1]) { end1Matches = 1; } - if (c2[qMap[qBnd->getStereoAtoms()[1]]] == mBnd->getStereoAtoms()[0]) { + if (q_to_mol[qBnd->getStereoAtoms()[1]] == mBnd->getStereoAtoms()[0]) { end2Matches = 1; } } @@ -224,6 +319,7 @@ class MolMatchFinalCheckFunctor { const ROMol &d_query; const ROMol &d_mol; const SubstructMatchParameters &d_params; + std::unordered_map d_molStereoGroups; }; class AtomLabelFunctor { @@ -415,7 +511,7 @@ std::vector SubstructMatch( if (nt == 1) { detail::ResSubstructMatchHelper_(args, &matches, 0, resMolSupplier.length()); - } + } #ifdef RDK_THREADSAFE_SSS else { std::vector> tg; diff --git a/Code/GraphMol/Substruct/SubstructMatch.h b/Code/GraphMol/Substruct/SubstructMatch.h index 7314c7854..140c30af3 100644 --- a/Code/GraphMol/Substruct/SubstructMatch.h +++ b/Code/GraphMol/Substruct/SubstructMatch.h @@ -29,6 +29,8 @@ typedef std::vector> MatchVectType; struct RDKIT_SUBSTRUCTMATCH_EXPORT SubstructMatchParameters { bool useChirality = false; //!< Use chirality in determining whether or not //!< atoms/bonds match + bool useEnhancedStereo = false; //!< Use enhanced stereochemistry in + //!< determining whether atoms/bonds match bool aromaticMatchesConjugated = false; //!< Aromatic and conjugated bonds //!< match each other bool useQueryQueryMatches = false; //!< Consider query-query matches, not diff --git a/Code/GraphMol/Substruct/catch_tests.cpp b/Code/GraphMol/Substruct/catch_tests.cpp index b04b9cbfa..2558cac9c 100644 --- a/Code/GraphMol/Substruct/catch_tests.cpp +++ b/Code/GraphMol/Substruct/catch_tests.cpp @@ -7,6 +7,8 @@ // which is included in the file license.txt, found at the root // of the RDKit source tree. // +// Tests of substructure searching +// #define CATCH_CONFIG_MAIN #include "catch.hpp" @@ -15,11 +17,49 @@ #include #include +#include #include using namespace RDKit; typedef std::tuple matchCase; +class _IsSubstructOf : public Catch::MatcherBase { + ROMol const *m_mol; + SubstructMatchParameters m_ps; + + public: + _IsSubstructOf(const ROMol &m) : m_mol(&m) {} + + _IsSubstructOf(const ROMol &m, SubstructMatchParameters ps) + : m_mol(&m), m_ps(ps) {} + + virtual bool match(const ROMol &query) const override { + return !SubstructMatch(*m_mol, query, m_ps).empty(); + } + + virtual std::string describe() const override { + std::ostringstream ss; + ss << "is not a substructure of " << MolToCXSmiles(*m_mol); + return ss.str(); + } +}; + +static _IsSubstructOf IsSubstructOf(const ROMol &m, + const SubstructMatchParameters &ps) { + return _IsSubstructOf(m, ps); +} + +static _IsSubstructOf IsSubstructOf(const ROMol &m) { + return _IsSubstructOf(m); +} + +namespace Catch { +// ""_smiles returns an RWMol. +template <> struct StringMaker { + static std::string convert(RDKit::RWMol const &m) { return MolToCXSmiles(m); } +}; +} + TEST_CASE("substructure parameters", "[substruct]") { SECTION("chirality") { auto mol1 = "CCC[C@@H]1CN(CCC)CCN1"_smiles; @@ -33,7 +73,7 @@ TEST_CASE("substructure parameters", "[substruct]") { CHECK(SubstructMatch(*mol1, *mol1, ps).size() == 1); ps.useChirality = true; - CHECK(SubstructMatch(*mol1, *mol2, ps).size() == 0); + CHECK_THAT(*mol2, !IsSubstructOf(*mol1, ps)); CHECK(SubstructMatch(*mol1, *mol1, ps).size() == 1); } SECTION("conjugated matching aromaticity 1") { @@ -55,8 +95,8 @@ TEST_CASE("substructure parameters", "[substruct]") { RWMol mol2(*mol1); MolOps::Kekulize(mol2); SubstructMatchParameters ps; - CHECK(SubstructMatch(*mol1, mol2, ps).size() == 0); - CHECK(SubstructMatch(mol2, *mol1, ps).size() == 0); + CHECK_THAT(mol2, !IsSubstructOf(*mol1)); + CHECK_THAT(*mol1, !IsSubstructOf(mol2)); ps.aromaticMatchesConjugated = true; CHECK(SubstructMatch(*mol1, mol2, ps).size() == 1); @@ -127,4 +167,99 @@ TEST_CASE("providing a final match function", "[substruct]") { ps.extraFinalCheck = &bigger; CHECK(SubstructMatch(*mol1, *mol2, ps).size() == 1); } -} \ No newline at end of file +} + +TEST_CASE("Enhanced stereochemistry", "[substruct,StereoGroup]") { + // Chirality specifications. + // 1. An achiral molecule: CC(O)C(CC)F means unknown/all stereoisomers + // 2. A chiral molecule: C[C@H](O)[C@H](CC)F means 1 stereoisomer + // 3. A chiral molecule with an AND specifier: C[C@H](O)[C@H](CC)F |a1:1,3| + // means both stereoisomers + // 4. A chiral molecule with an OR specifier: C[C@H](O)[C@H](CC)F |o1:1,3| + // means one of the two stereoisomers + auto mol_achiral = "CC(O)C(CC)F"_smiles; + auto mol_chiral = "C[C@H](O)[C@H](CC)F"_smiles; + auto mol_and = "C[C@H](O)[C@H](CC)F |&1:1,3|"_smiles; + auto mol_or = "C[C@H](O)[C@H](CC)F |o1:1,3|"_smiles; + auto mol_absolute = "C[C@H](O)[C@H](CC)F |a:1,3|"_smiles; + auto diastereomer = "C[C@H](O)[C@@H](CC)F"_smiles; + + SubstructMatchParameters ps; + ps.useChirality = true; + ps.useEnhancedStereo = true; + + SECTION("achiral search matches anything") { + CHECK_THAT(*mol_achiral, IsSubstructOf(*mol_chiral, ps)); + CHECK_THAT(*mol_achiral, IsSubstructOf(*mol_and, ps)); + CHECK_THAT(*mol_achiral, IsSubstructOf(*mol_or, ps)); + CHECK_THAT(*mol_achiral, IsSubstructOf(*mol_absolute, ps)); + CHECK_THAT(*mol_achiral, IsSubstructOf(*diastereomer, ps)); + } + SECTION("chiral molecule is a substructure of AND or OR") { + CHECK_THAT(*mol_chiral, !IsSubstructOf(*mol_achiral, ps)); + CHECK_THAT(*mol_chiral, IsSubstructOf(*mol_and, ps)); + CHECK_THAT(*mol_chiral, IsSubstructOf(*mol_or, ps)); + CHECK_THAT(*mol_chiral, !IsSubstructOf(*diastereomer, ps)); + CHECK_THAT(*mol_absolute, !IsSubstructOf(*mol_achiral, ps)); + CHECK_THAT(*mol_absolute, IsSubstructOf(*mol_and, ps)); + CHECK_THAT(*mol_absolute, IsSubstructOf(*mol_or, ps)); + CHECK_THAT(*mol_absolute, !IsSubstructOf(*diastereomer, ps)); + } + SECTION("AND query only matches AND") { + // because it means BOTH, and only AND includes both. + CHECK_THAT(*mol_and, !IsSubstructOf(*mol_or, ps)); + CHECK_THAT(*mol_and, IsSubstructOf(*mol_and, ps)); + CHECK_THAT(*mol_and, !IsSubstructOf(*mol_absolute, ps)); + CHECK_THAT(*mol_and, !IsSubstructOf(*mol_chiral, ps)); + CHECK_THAT(*mol_and, !IsSubstructOf(*mol_achiral, ps)); + } + SECTION("An OR query matches AND and OR") { + // because AND is both, so it's a superset of the molecules described in + // the OR + CHECK_THAT(*mol_or, !IsSubstructOf(*mol_chiral, ps)); + CHECK_THAT(*mol_or, !IsSubstructOf(*mol_absolute, ps)); + CHECK_THAT(*mol_or, !IsSubstructOf(*diastereomer, ps)); + CHECK_THAT(*mol_or, IsSubstructOf(*mol_or, ps)); + CHECK_THAT(*mol_or, IsSubstructOf(*mol_and, ps)); + } + SECTION("AND and OR match their enantiomer") { + // This is, like, the point of And/Or + auto enantiomer = "C[C@@H](O)[C@@H](CC)F"_smiles; + CHECK_THAT(*enantiomer, IsSubstructOf(*mol_and, ps)); + CHECK_THAT(*enantiomer, IsSubstructOf(*mol_or, ps)); + } + SECTION("But not some arbitrary diastereomer") { + CHECK_THAT(*diastereomer, !IsSubstructOf(*mol_and, ps)); + CHECK_THAT(*diastereomer, !IsSubstructOf(*mol_or, ps)); + } + SECTION("Mixed stereo groups include single stereo groups") { + auto mol_mixed_or = "C[C@H](O)[C@H](CC)F |o1:1,o2:3|"_smiles; + CHECK_THAT(*mol_mixed_or, !IsSubstructOf(*mol_or, ps)); + // OR refers to two of the 4 molecules that mol_mixed_or + CHECK_THAT(*mol_or, IsSubstructOf(*mol_mixed_or, ps)); + + auto mol_mixed_or2 = "C[C@H](O)[C@@H](CC)F |o1:1,o2:3|"_smiles; + CHECK_THAT(*mol_mixed_or2, !IsSubstructOf(*mol_or, ps)); + CHECK_THAT(*mol_or, IsSubstructOf(*mol_mixed_or2, ps)); + + // I'm not sure about these ones, but they should be symmetric: + auto mol_mixed_or_and_abs = "C[C@H](O)[C@H](CC)F |o1:1|"_smiles; + CHECK_THAT(*mol_mixed_or_and_abs, !IsSubstructOf(*mol_or, ps)); + CHECK_THAT(*mol_or, !IsSubstructOf(*mol_mixed_or_and_abs, ps)); + + auto mol_mixed_or_and_abs2 = "C[C@@H](O)[C@H](CC)F |o1:1|"_smiles; + CHECK_THAT(*mol_mixed_or_and_abs2, !IsSubstructOf(*mol_or, ps)); + CHECK_THAT(*mol_or, !IsSubstructOf(*mol_mixed_or_and_abs, ps)); + } + SECTION("It's OK to match part of a stereo group, though") { + auto mol_and_long = "F[C@@H](O)C[C@@H](CC)F |&1:1,3|"_smiles; + auto mol_and_partial = "F[C@@H](O)C |&1:1|"_smiles; + auto mol_or_long = "F[C@@H](O)C[C@@H](CC)F |o1:1,3|"_smiles; + auto mol_or_partial = "F[C@@H](O)C |o1:1|"_smiles; + + CHECK_THAT(*mol_and_partial, IsSubstructOf(*mol_and_long, ps)); + CHECK_THAT(*mol_or_partial, IsSubstructOf(*mol_or_long, ps)); + CHECK_THAT(*mol_or_partial, IsSubstructOf(*mol_and_long, ps)); + CHECK_THAT(*mol_and_partial, !IsSubstructOf(*mol_or_long, ps)); + } +} diff --git a/Code/GraphMol/Wrap/Mol.cpp b/Code/GraphMol/Wrap/Mol.cpp index 19626154c..fde7a41da 100644 --- a/Code/GraphMol/Wrap/Mol.cpp +++ b/Code/GraphMol/Wrap/Mol.cpp @@ -290,6 +290,11 @@ struct mol_wrapper { .def_readwrite( "useChirality", &RDKit::SubstructMatchParameters::useChirality, "Use chirality in determining whether or not atoms/bonds match") + .def_readwrite( + "useEnhancedStereo", + &RDKit::SubstructMatchParameters::useEnhancedStereo, + "take enhanced stereochemistry into account while doing the match. " + "This only has an effect if useChirality is also True.") .def_readwrite( "aromaticMatchesConjugated", &RDKit::SubstructMatchParameters::aromaticMatchesConjugated, diff --git a/Docs/Book/RDKit_Book.rst b/Docs/Book/RDKit_Book.rst index 42aa5d5b1..8f8df2366 100644 --- a/Docs/Book/RDKit_Book.rst +++ b/Docs/Book/RDKit_Book.rst @@ -1220,9 +1220,10 @@ Support for Enhanced Stereochemistry Overview ======== -We are going to follow, at least for the initial implementation, the enhanced stereo representation -used in V3k mol files: groups of atoms with specified stereochemistry with an ``ABS``, ``AND``, or ``OR`` -marker indicating what is known. The general idea is that ``AND`` indicates mixtures and ``OR`` indicates unknown single substances. +Enhanced stereochemistry is used to indicate that a molecule represents more than one possible diastereomer. +``AND`` indicates that a molecule is a mixture of molecules. ``OR`` indicates unknown single substances, +and ``ABS`` indicates a single substance. This follows, the convention used in V3k mol files: groups of +atoms with specified stereochemistry with an ``ABS``, ``AND``, or ``OR`` marker indicating what is known. Here are some illustrations of what the various combinations mean: @@ -1301,7 +1302,7 @@ and the set of atoms that make it up. Use cases ========= -The initial target is to not lose data on an ``V3k mol -> RDKit -> V3k mol`` round trip. Manipulation, depiction, and searching are future goals. +The initial target is to not lose data on an ``V3k mol -> RDKit -> V3k mol`` round trip. Manipulation and depiction are future goals. It is possible to enumerate the elements of a ``StereoGroup`` using the function :py:func:`rdkit.Chem.EnumerateStereoisomers.EumerateStereoisomers`, which also preserves membership in the original ``StereoGroup``. @@ -1333,6 +1334,84 @@ Reactions also preserve ``StereoGroup``s. Product atoms are included in the ``St >>> Chem.MolToCXSmiles(ps[0][0]) 'C[C@H](Br)C[C@H](O)Cl |&1:1|' +.. |EnhancedSSS_A| image:: ./images/EnhancedStereoSSS_molA.png + :scale: 75% + :align: middle +.. |EnhancedSSS_B| image:: ./images/EnhancedStereoSSS_molB.png + :scale: 75% + :align: middle +.. |EnhancedSSS_C| image:: ./images/EnhancedStereoSSS_molC.png + :scale: 75% + :align: middle +.. |EnhancedSSS_D| image:: ./images/EnhancedStereoSSS_molD.png + :scale: 75% + :align: middle +.. |EnhancedSSS_E| image:: ./images/EnhancedStereoSSS_molE.png + :scale: 75% + :align: middle +.. |EnhancedSSS_F| image:: ./images/EnhancedStereoSSS_molF.png + :scale: 75% + :align: middle +.. |EnhancedSSS_G| image:: ./images/EnhancedStereoSSS_molG.png + :scale: 75% + :align: middle + + +Enhanced Stereochemistry and substructure search +================================================ + +Enhanced Stereochemistry may optionally be honored in substructure searches. The following table captures whether or not a substructure query +(in the rows) matches a particular molecule (in the columns). + ++-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+ +| | |EnhancedSSS_A| | |EnhancedSSS_B| | |EnhancedSSS_C| | |EnhancedSSS_D| | |EnhancedSSS_E| | |EnhancedSSS_F| | |EnhancedSSS_G| | +| | | | | | | OR | AND | ++=================+=================+=================+=================+=================+=================+=================+=================+ +| |EnhancedSSS_A| | Y | Y | Y | Y | Y | Y | Y | ++-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+ +| |EnhancedSSS_B| | N | Y | N | N | Y | Y | Y | ++-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+ +| |EnhancedSSS_C| | N | N | Y | N | N | Y | Y | ++-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+ +| |EnhancedSSS_D| | N | N | N | Y | N | N | N | ++-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+ +| |EnhancedSSS_E| | N | Y | N | N | N | Y | Y | ++-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+ +| |EnhancedSSS_F| | N | N | N | N | N | Y | Y | +| OR | | | | | | | | ++-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+ +| |EnhancedSSS_G| | N | N | N | N | N | N | Y | +| AND | | | | | | | | ++-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+ + +Substructure search using molecules with enhanced stereochemistry follows these rules (where substructure < superstructure): + +* achiral < everything, because an achiral query means ignore chirality in the match +* chiral < AND, because AND includes both the chiral molecule and another one +* chiral < OR, because OR includes either the chiral molecule or another one +* OR < AND, because AND includes both molecules that OR could actually mean. +* one group of two atoms < two groups of one atom, because the latter is 4 different +diastereomers, and the former only two of the four. + +Some concrete examples of this: + +.. doctest :: + >>> ps = Chem.SubstructMatchParameters() + >>> ps.useChirality = True + >>> ps.useEnhancedStereo = True + >>> m_ABS = Chem.MolFromSmiles('CC[C@H](F)[C@H](C)O') + >>> m_AND = Chem.MolFromSmiles('CC[C@H](F)[C@H](C)O |&1:2,4|') + >>> m_OR = Chem.MolFromSmiles('CC[C@H](F)[C@H](C)O |o1:2,4|') + >>> m_AND.HasSubstructMatch(m_ABS,ps) + True + >>> m_OR.HasSubstructMatch(m_ABS,ps) + True + >>> m_AND.HasSubstructMatch(m_OR,ps) + True + >>> m_OR.HasSubstructMatch(m_AND,ps) + False + + Additional Information About the Fingerprints ********************************************* diff --git a/Docs/Book/images/EnhancedStereoSSS_molA.png b/Docs/Book/images/EnhancedStereoSSS_molA.png new file mode 100644 index 000000000..47dd36894 Binary files /dev/null and b/Docs/Book/images/EnhancedStereoSSS_molA.png differ diff --git a/Docs/Book/images/EnhancedStereoSSS_molB.png b/Docs/Book/images/EnhancedStereoSSS_molB.png new file mode 100644 index 000000000..f17b30468 Binary files /dev/null and b/Docs/Book/images/EnhancedStereoSSS_molB.png differ diff --git a/Docs/Book/images/EnhancedStereoSSS_molC.png b/Docs/Book/images/EnhancedStereoSSS_molC.png new file mode 100644 index 000000000..0d2d021fc Binary files /dev/null and b/Docs/Book/images/EnhancedStereoSSS_molC.png differ diff --git a/Docs/Book/images/EnhancedStereoSSS_molD.png b/Docs/Book/images/EnhancedStereoSSS_molD.png new file mode 100644 index 000000000..4c2cb96d7 Binary files /dev/null and b/Docs/Book/images/EnhancedStereoSSS_molD.png differ diff --git a/Docs/Book/images/EnhancedStereoSSS_molE.png b/Docs/Book/images/EnhancedStereoSSS_molE.png new file mode 100644 index 000000000..5bad00df9 Binary files /dev/null and b/Docs/Book/images/EnhancedStereoSSS_molE.png differ diff --git a/Docs/Book/images/EnhancedStereoSSS_molF.png b/Docs/Book/images/EnhancedStereoSSS_molF.png new file mode 100644 index 000000000..2a7b4812d Binary files /dev/null and b/Docs/Book/images/EnhancedStereoSSS_molF.png differ diff --git a/Docs/Book/images/EnhancedStereoSSS_molG.png b/Docs/Book/images/EnhancedStereoSSS_molG.png new file mode 100644 index 000000000..1aa828aad Binary files /dev/null and b/Docs/Book/images/EnhancedStereoSSS_molG.png differ