// // Copyright (c) 2017-2021, Novartis Institutes for BioMedical Research Inc. // and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include "RGroupCore.h" #include "GraphMol/SmilesParse/SmilesWrite.h" #include "GraphMol/ChemTransforms/ChemTransforms.h" #include "GraphMol/Substruct/SubstructUtils.h" #include "GraphMol/TautomerQuery/TautomerQuery.h" namespace RDKit { namespace { // From answer 12 in // https://stackoverflow.com/questions/5279051/how-can-i-create-cartesian-product-of-vector-of-vectors // by anumi // modified to exclude duplicates static std::vector> cartesianProduct( const std::vector> &v, bool allowDuplicates) { std::vector> s = {{}}; for (const auto &u : v) { std::vector> r; for (const auto &x : s) { for (const auto y : u) { // check for duplicates if (allowDuplicates || std::find(x.begin(), x.end(), y) == x.end()) { r.push_back(x); r.back().push_back(y); } } } if (r.empty()) { // unable to extend return r; } s = std::move(r); } return s; } } // namespace // move this to constructor if the create new core path can be removed from // RGroupDecomposition::add void RCore::init() { findIndicesWithRLabel(); countUserRGroups(); buildMatchingMol(); } void RCore::findIndicesWithRLabel() { // Find all the core atoms that have user // label and set their indices to 1 in core_atoms_with_user_labels core_atoms_with_user_labels.resize(core->getNumAtoms()); for (const auto atom : core->atoms()) { int label; if (atom->getPropIfPresent(RLABEL, label) && label > 0) { core_atoms_with_user_labels.set(atom->getIdx()); } } } RWMOL_SPTR RCore::extractCoreFromMolMatch( const ROMol &mol, const MatchVectType &match, const RGroupDecompositionParameters ¶ms) const { auto extractedCore = boost::make_shared(mol); boost::dynamic_bitset<> atomIndicesToKeep(mol.getNumAtoms()); std::vector newBonds; std::map dummyAtomMap; std::map molAtomMap; for (const auto &pair : match) { const auto queryAtom = core->getAtomWithIdx(pair.first); const auto targetAtom = extractedCore->getAtomWithIdx(pair.second); if (int rLabel; queryAtom->getPropIfPresent(RLABEL, rLabel)) { targetAtom->setProp(RLABEL, rLabel); } if (int rLabelType; queryAtom->getPropIfPresent(RLABEL_TYPE, rLabelType)) { targetAtom->setProp(RLABEL_TYPE, rLabelType); } if (queryAtom->getAtomicNum() == 0 && queryAtom->hasProp(RLABEL) && queryAtom->getDegree() == 1) { continue; } else { atomIndicesToKeep.set(pair.second); molAtomMap[mol.getAtomWithIdx(pair.second)] = targetAtom; int neighborNumber = -1; #ifdef VERBOSE std::cerr << "Atom Chirality In " << targetAtom->getChiralTag() << std::endl; #endif bool isChiral = targetAtom->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW || targetAtom->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW; if (isChiral && !params.substructmatchParams.useChirality) { // if we're not doing chiral matching don't copy chirality to the // extracted core. targetAtom->setChiralTag(Atom::CHI_UNSPECIFIED); isChiral = false; } // collect neighbors in vector, so we can add atoms while looping std::vector targetNeighborAtoms; for (auto targetNeighborAtom : extractedCore->atomNeighbors(targetAtom)) { targetNeighborAtoms.push_back(targetNeighborAtom); } // explicit hydrogens to keep in extracted core to preserve chiralty std::vector hydrogensToAdd; for (const auto targetNeighborAtom : targetNeighborAtoms) { ++neighborNumber; auto targetNeighborIndex = targetNeighborAtom->getIdx(); auto queryNeighborMapping = std::find_if( match.begin(), match.end(), [this, targetNeighborIndex, pair](const auto &p) { return p.second == static_cast(targetNeighborIndex) && core->getBondBetweenAtoms(pair.first, p.first); }); if (queryNeighborMapping == match.end()) { if (targetNeighborAtom->getAtomicNum() == 1) { // Hydrogen needed to define chirality is present in target but // not mapped to core. Copy it to the extracted core hydrogensToAdd.push_back(static_cast(targetNeighborIndex)); molAtomMap[mol.getAtomWithIdx(targetNeighborIndex)] = targetNeighborAtom; } else if (isChiral) { // There is a heavy sidechain in the decomp that is connected to // the core by an unknown bond (onlyMatchAtRGroups = False and // allowMultipleRGroupsOnUnlabelled = False). // As there is no explicit target bond chriality is not preserved. // In some cases we could handle chirality, but that has not been // implemented (if there is only one free bond on the query that // would be easy- or we could arbitrarily assign bonds). isChiral = false; targetAtom->setChiralTag(Atom::CHI_UNSPECIFIED); } continue; } const auto queryNeighbor = core->getAtomWithIdx((*queryNeighborMapping).first); if (queryNeighbor->getAtomicNum() == 0 && queryNeighbor->hasProp(RLABEL) && queryNeighbor->getDegree() == 1) { auto newDummy = new Atom(*queryNeighbor); dummyAtomMap[newDummy] = static_cast(targetNeighborIndex); newDummy->clearComputedProps(); const auto newDummyIdx = extractedCore->addAtom(newDummy, false, true); if (newDummyIdx >= atomIndicesToKeep.size()) { atomIndicesToKeep.resize(newDummyIdx + 1); } atomIndicesToKeep.set(newDummyIdx); auto connectingBond = extractedCore ->getBondBetweenAtoms(pair.second, targetNeighborIndex) ->copy(); if (connectingBond->getStereo() > Bond::BondStereo::STEREOANY) { // stereo double bonds connectingBond->setStereo(Bond::BondStereo::STEREOANY); } if (connectingBond->getBeginAtomIdx() == targetNeighborIndex) { connectingBond->setBeginAtomIdx(newDummyIdx); } else { connectingBond->setEndAtomIdx(newDummyIdx); } newBonds.push_back(connectingBond); // Check to see if we are breaking a stereo bond definition, by // removing one of the stereo atoms. If so, set to the new atom for (auto bond : extractedCore->atomBonds(targetAtom)) { if (bond->getIdx() == connectingBond->getIdx()) { continue; } if (bond->getStereo() > Bond::STEREOANY) { auto &stereoAtoms = bond->getStereoAtoms(); for (int &stereoAtom : stereoAtoms) { if (stereoAtom == static_cast(targetNeighborIndex)) { stereoAtom = static_cast(newDummyIdx); } } } } // Chirality parity stuff see RDKit::replaceCore in // Code/GraphMol/ChemTransforms/ChemTransforms.cpp if (isChiral) { bool switchIt = false; switch (extractedCore->getAtomDegree(targetAtom)) { case 4: if (!(neighborNumber % 2)) { switchIt = true; } break; case 3: if (neighborNumber == 1) { switchIt = true; } break; } // because this neighbor will be moved at the end of the neighbor // list, we need to decrement the neighbor number in case additional // dummy atoms are to be connected to the targetAtom --neighborNumber; if (switchIt) { targetAtom->invertChirality(); } } } } for (const auto index : hydrogensToAdd) { atomIndicesToKeep.set(index); } #ifdef VERBOSE std::cerr << "Atom Chirality Out " << targetAtom->getChiralTag() << std::endl; #endif } } for (const auto newBond : newBonds) { extractedCore->addBond(newBond, true); } // Now delete atom's that are not in the core. extractedCore->beginBatchEdit(); boost::dynamic_bitset<> removedAtoms(extractedCore->getNumAtoms()); for (const auto atom : extractedCore->atoms()) { if (!atomIndicesToKeep.test(atom->getIdx())) { extractedCore->removeAtom(atom); removedAtoms.set(atom->getIdx()); } } extractedCore->commitBatchEdit(); // Copy molecule coordinates to extracted core details::updateSubMolConfs(mol, *extractedCore, removedAtoms); for (auto citer = mol.beginConformers(); citer != mol.endConformers(); ++citer) { Conformer &newConf = extractedCore->getConformer(static_cast((*citer)->getId())); for (const auto &[fst, snd] : dummyAtomMap) { newConf.setAtomPos(fst->getIdx(), (*citer)->getAtomPos(snd)); } } // If the molecule has no coordinates and the core does, copy those over if (!mol.getNumConformers() && core->getNumConformers()) { ROMol molCopy(mol); for (auto citer = core->beginConformers(); citer != core->endConformers(); ++citer) { const auto newConf = new Conformer(mol.getNumAtoms()); newConf->setId((*citer)->getId()); newConf->set3D((*citer)->is3D()); for (const auto &[fst, snd] : match) { newConf->setAtomPos(snd, (*citer)->getAtomPos(fst)); } molCopy.addConformer(newConf); } details::updateSubMolConfs(molCopy, *extractedCore, removedAtoms); molCopy.clearConformers(); for (const auto atom : extractedCore->atoms()) { if (isUserRLabel(*atom)) { int rLabel = atom->getProp(RLABEL); for (const auto coreAtom : core->atoms()) { if (int l; coreAtom->getPropIfPresent(RLABEL, l) && l == rLabel) { int i = 0; for (auto citer = core->beginConformers(); citer != core->endConformers(); ++citer, ++i) { extractedCore->getConformer(i).setAtomPos( atom->getIdx(), (*citer)->getAtomPos(coreAtom->getIdx())); } break; } } } } } // Copy over any stereo groups that lie in the extracted core details::copyStereoGroups(molAtomMap, mol, *extractedCore); extractedCore->clearComputedProps(true); extractedCore->updatePropertyCache(false); #ifdef VERBOSE std::cerr << "Extracted core smiles " << MolToSmiles(*extractedCore) << std::endl; std::cerr << "Extracted core smarts " << MolToSmarts(*extractedCore) << std::endl; #endif try { unsigned int failed; MolOps::sanitizeMol(*extractedCore, failed, MolOps::SANITIZE_SYMMRINGS | MolOps::SANITIZE_CLEANUP); } catch (const MolSanitizeException &) { } return extractedCore; } // Return a copy of core where dummy atoms are replaced by // the respective matching atom in mol, while other atoms have // their aromatic flag and formal charge copied from // the respective matching atom in mol ROMOL_SPTR RCore::replaceCoreAtomsWithMolMatches( const ROMol &mol, const MatchVectType &match) const { auto coreReplacedAtoms = boost::make_shared(*core); for (const auto &p : match) { auto atom = coreReplacedAtoms->getAtomWithIdx(p.first); if (isAtomWithMultipleNeighborsOrNotDummyRGroupAttachment(*atom)) { auto molAtom = mol.getAtomWithIdx(p.second); replaceCoreAtom(*coreReplacedAtoms, *atom, *molAtom); } } std::map matchLookup(match.cbegin(), match.cend()); for (auto bond : coreReplacedAtoms->bonds()) { if (bond->hasQuery()) { const auto molBond = mol.getBondBetweenAtoms(matchLookup[bond->getBeginAtomIdx()], matchLookup[bond->getEndAtomIdx()]); if (molBond == nullptr) { // this can happen if we have a user-defined R group that is not // matched in the query CHECK_INVARIANT(bond->getBeginAtom()->getAtomicNum() == 0 || bond->getEndAtom()->getAtomicNum() == 0, "Failed to find core bond in molecule"); } else { Bond newBond(molBond->getBondType()); newBond.setIsAromatic(molBond->getIsAromatic()); coreReplacedAtoms->replaceBond(bond->getIdx(), &newBond, true); } } } #ifdef VERBOSE std::cerr << "Original core smarts " << MolToSmarts(*core) << std::endl; std::cerr << "Dummy replaced core smarts " << MolToSmarts(*coreReplacedAtoms) << std::endl; #endif if (mol.getNumConformers() > 0) { // if the input structure has coordinates copy them to the core if (!coreReplacedAtoms->getNumConformers()) { coreReplacedAtoms->addConformer( new Conformer(coreReplacedAtoms->getNumAtoms())); } auto &replacedConformer = coreReplacedAtoms->getConformer(); const auto &molConformer = mol.getConformer(); for (const auto &p : match) { auto molPoint = molConformer.getAtomPos(p.second); replacedConformer.setAtomPos(p.first, molPoint); } } else { // otherwise, delete all core coordinates from the replaced core coreReplacedAtoms->clearConformers(); } return coreReplacedAtoms; } void RCore::replaceCoreAtom(RWMol &mol, Atom &atom, const Atom &other) const { auto atomicNumber = other.getAtomicNum(); auto targetAtom = &atom; bool wasDummy = (atom.getAtomicNum() == 0); if (wasDummy || atom.hasQuery()) { if (atom.hasQuery()) { Atom newAtom(atomicNumber); auto atomIdx = atom.getIdx(); mol.replaceAtom(atomIdx, &newAtom, false, true); targetAtom = mol.getAtomWithIdx(atomIdx); } else { atom.setAtomicNum(atomicNumber); } } targetAtom->setIsAromatic(other.getIsAromatic()); targetAtom->setFormalCharge(other.getFormalCharge()); if (wasDummy) { targetAtom->setNoImplicit(true); unsigned int numHs = 0; const auto &otherMol = other.getOwningMol(); for (const auto &nbri : boost::make_iterator_range(otherMol.getAtomNeighbors(&other))) { const auto nbrAtom = otherMol[nbri]; if (nbrAtom->getAtomicNum() == 1) { ++numHs; } } targetAtom->setNumExplicitHs(numHs + other.getTotalNumHs()); targetAtom->updatePropertyCache(false); } } // matching the core to the target molecule is a two step process // First match to a reduced representation (the core minus terminal user // R-groups). Next, match the R-groups. We do this as the core may not be a // substructure match for the molecule if a single molecule atom matches 2 // terminal user defined RGroup attachments (see // https://github.com/rdkit/rdkit/pull/4002) buildMatchingMol() creates the // reduced representation from the core and matchTerminalUserRGroups() adds in // the terminal R Groups to the match // Builds a matching molecule which is the core with terminal user R groups // removed Also creates the data structures used in matching the R groups void RCore::buildMatchingMol() { matchingMol = boost::make_shared(*core); terminalRGroupAtomToNeighbor.clear(); matchingMol->beginBatchEdit(); for (auto atom : matchingMol->atoms()) { // keep track of the original core index in the matching molecule atom atom->setProp(RLABEL_CORE_INDEX, atom->getIdx()); // TODO for unlabelled core attachments if the heavy neighbor is not dummy // then keep one attachment if (atom->getAtomicNum() == 0 && atom->getDegree() == 1 && isDummyRGroupAttachment(*atom)) { // if we are a query in disguise, don't strip if (atom->hasQuery()) { auto q = atom->getQuery()->getDescription(); // If we are anything but AtomNull or AtomAtomicNum we are not // fully a dummy atom if (q != "AtomNull" && q != "AtomAtomicNum") { unsigned int type = 0xFFFFFFFF; // If we are were set as an RLABEL of type Isotope we probably // have a query of named "AtomIsotope" // we DO strip these unless they have an additional query if (q != "AtomIsotope" || !atom->getPropIfPresent(RLABEL_TYPE, type) || type != RGroupLabels::IsotopeLabels) { continue; } } } // remove terminal user R groups and map the index of the core neighbor // atom to the index of the removed terminal R group const int neighborIdx = *matchingMol->getAtomNeighbors(atom).first; terminalRGroupAtomToNeighbor.emplace(atom->getIdx(), neighborIdx); matchingMol->removeAtom(atom); } } matchingMol->commitBatchEdit(); } // Given a matching molecule substructure match to a target molecule, return // core matches with terminal user R groups matched std::vector RCore::matchTerminalUserRGroups( const RWMol &target, MatchVectType match, const SubstructMatchParameters &sssParams) const { // Transform match indexed by matching molecule atoms to a map // indexed by core atoms std::transform(match.begin(), match.end(), match.begin(), [this](const std::pair &mapping) { auto queryIdx = this->matchingIndexToCoreIndex(mapping.first); std::pair newMapping(queryIdx, mapping.second); return newMapping; }); std::map matchMap(match.cbegin(), match.cend()); std::vector allMappings; if (terminalRGroupAtomToNeighbor.empty()) { allMappings.push_back(std::move(match)); return allMappings; } // build a dynamic_bitset of target atoms currently mapped boost::dynamic_bitset<> mappedTargetIdx(target.getNumAtoms()); for (const auto &pair : match) { mappedTargetIdx.set(pair.second); } // Dummy atoms/r group attachments that cannot be mapped to target atoms std::vector missingDummies; // A map of terminal dummies/R group attachment points to a list of possible // target atoms that the R group can map to std::map> availableMappingsForDummyMap; boost::dynamic_bitset<> symmetricHydrogens(target.getNumAtoms()); // keep a count of target atoms and the number of connections to the core std::map targetAtomBondsToCoreCounts; for (const auto coreAtom : core->atoms()) { if (coreAtom->getAtomicNum() == 1) { continue; } if (isTerminalRGroupWithUserLabel(coreAtom->getIdx())) { continue; } std::vector dummyIndexes; for (auto neighbor : core->atomNeighbors(coreAtom)) { if (isTerminalRGroupWithUserLabel(neighbor->getIdx())) { dummyIndexes.push_back(neighbor->getIdx()); } } if (dummyIndexes.empty()) { continue; } // Sort dummies based on user RLABEL (ascending) then unlabeled // (descending as they are negative) std::sort(dummyIndexes.begin(), dummyIndexes.end(), [this](int a, int b) { auto dummy = core->getAtomWithIdx(a); auto otherDummy = core->getAtomWithIdx(b); auto l1 = dummy->getProp(RLABEL); auto l2 = otherDummy->getProp(RLABEL); return l1 > 0 && l2 > 0 ? l1 < l2 : l1 > l2; }); // Find what target atoms will bond to these dummies std::vector> neighborDummyLists; for (auto dummyIndex : dummyIndexes) { const int neighborIdx = terminalRGroupAtomToNeighbor.at(dummyIndex); const auto coreBond = core->getBondBetweenAtoms(dummyIndex, neighborIdx); // find the atom in the target mapped to the neighbor in the core const int targetIdx = matchMap[neighborIdx]; const auto targetAtom = target.getAtomWithIdx(targetIdx); std::vector available; // now look for neighbors of that target atom that are not mapped to a // core atom- the dummy atom can potentially be mapped to each of those for (const auto &nbrIdx : boost::make_iterator_range(target.getAtomNeighbors(targetAtom))) { if (!mappedTargetIdx.test(nbrIdx)) { const auto targetBond = target.getBondBetweenAtoms(targetIdx, nbrIdx); // check for bond compatibility if (bondCompat(coreBond, targetBond, sssParams)) { available.push_back(nbrIdx); ++targetAtomBondsToCoreCounts[nbrIdx]; } } } if (available.size() > 1) { bool allHydrogens = std::all_of( available.begin(), available.end(), [&target](const int idx) { return target.getAtomWithIdx(idx)->getAtomicNum() == 1; }); if (allHydrogens) { // If all neighbors are hydrogens we don't need to iterate through // them- just assign the first free hydrogen. Could extend to cover // symmetric groups in general auto hydrogen = std::find_if(available.begin(), available.end(), [&symmetricHydrogens](const int idx) { return !symmetricHydrogens.test(idx); }); int singleHydrogen = hydrogen == available.end() ? *available.begin() : *hydrogen; available = std::vector{singleHydrogen}; symmetricHydrogens.set(singleHydrogen); } } neighborDummyLists.push_back(available); } // Now search through all the dummies and see if we need to exclude any as // it may not be possible to assign target atoms to all R groups. If that // is the case exclude R Groups with higher labels // use negative numbers to indicate a group is excluded std::vector> neighborListsWithUnmapped( neighborDummyLists.size()); int start = 0; std::transform(neighborDummyLists.begin(), neighborDummyLists.end(), neighborListsWithUnmapped.begin(), [&start](std::vector v) { v.push_back(--start); return v; }); // could optimize this product as depth search to return as soon as we // have a permutation of all positive numbers auto cp = cartesianProduct(neighborListsWithUnmapped, false); // now sort bool foundAll = false; std::sort(cp.begin(), cp.end(), [&foundAll](const std::vector a, std::vector b) { auto isNegative = [](int v) -> bool { return v < 0; }; // firstly to minimize number of unmapped dummies const int numUnmappedA = std::count_if(a.begin(), a.end(), isNegative); const int numUnmappedB = std::count_if(b.begin(), b.end(), isNegative); if (numUnmappedA < numUnmappedB) { return true; } if (numUnmappedB == 0 || numUnmappedA == 0) { foundAll = true; } if (!foundAll && numUnmappedA == numUnmappedB && numUnmappedA) { // in a tie prefer the permutation that excludes the r group // with the lowest label // don't need to sort these if we've found an all mapped // combination for (size_t i = 0; i < a.size(); ++i) { const int v1 = a[i]; const int v2 = b[i]; if ((v1 > 0 && v2 > 0) || (v1 < 0 && v2 < 0)) { continue; } if (v1 > 0 && v2 < 0) { return true; } if (v1 < 0 && v2 > 0) { break; } } } return false; }); auto best = cp[0]; for (size_t i = 0; i < dummyIndexes.size(); i++) { auto dummyIdx = dummyIndexes[i]; if (best[i] < 0) { // We can't map this dummy to a target atom. That is OK if it is an // unlabeled core attachment atom or a user R label connected to a // query or wildcard atom const auto dummy = core->getAtomWithIdx(dummyIdx); if (dummy->hasProp(UNLABELED_CORE_ATTACHMENT)) { missingDummies.push_back(dummyIdx); } else if (isUserRLabel(*dummy) && (coreAtom->getAtomicNum() == 0 || coreAtom->hasQuery())) { // https://github.com/rdkit/rdkit/issues/4505 missingDummies.push_back(dummyIdx); } else { return allMappings; } } else { availableMappingsForDummyMap[dummyIdx] = neighborDummyLists[i]; } } } if (availableMappingsForDummyMap.empty()) { allMappings.push_back(std::move(match)); return allMappings; } std::vector dummiesWithMapping; std::vector> availableMappingsForDummy; for (const auto &mapping : availableMappingsForDummyMap) { dummiesWithMapping.push_back(mapping.first); availableMappingsForDummy.push_back(mapping.second); } // enumerate over all available atoms using a cartesian product. // only allow duplicates if a target atom can be bonded to more than one query // atom const bool allowDuplicates = std::find_if(targetAtomBondsToCoreCounts.begin(), targetAtomBondsToCoreCounts.end(), [](const std::pair &p) { return p.second > 1; }) != targetAtomBondsToCoreCounts.end(); const auto allAvailableMappings = cartesianProduct(availableMappingsForDummy, allowDuplicates); if (allAvailableMappings.empty()) { allMappings.push_back(std::move(match)); return allMappings; } // the size of the final mapping size_t size = allAvailableMappings[0].size() + match.size(); // these indices are needed for the whole molecule match check functor std::unique_ptr checkCore; std::map coreToCheck; const std::string indexProp("__core_index__"); bool hasMissing = !missingDummies.empty(); if (hasMissing) { // if there are dummies that we can't map these need to be removed from the // query before atom-by-atom matching. Create a copy of the query for that // and use properties to map atoms back to the core for (auto atom : core->atoms()) { atom->setProp(indexProp, atom->getIdx()); } checkCore = std::make_unique(*core); std::sort(missingDummies.begin(), missingDummies.end(), std::greater()); for (int index : missingDummies) { auto [nbrIdx, endNbrs] = checkCore->getAtomNeighbors(checkCore->getAtomWithIdx(index)); auto neighborAtom = checkCore->getAtomWithIdx(*nbrIdx); checkCore->removeAtom(index); neighborAtom->updatePropertyCache(false); } size_t index = 0U; for (const auto atom : checkCore->atoms()) { auto coreIndex = atom->getProp(indexProp); coreToCheck[coreIndex] = index++; } for (auto atom : core->atoms()) { atom->clearProp(indexProp); } } auto queryIndices = new std::uint32_t[size]; auto targetIndices = new std::uint32_t[size]; for (size_t position = 0; position < match.size(); position++) { const auto &pair = match[position]; auto queryIndex = hasMissing ? coreToCheck[pair.first] : pair.first; queryIndices[position] = queryIndex; targetIndices[position] = pair.second; } auto queryMatchingMol = hasMissing ? checkCore.get() : core.get(); MolMatchFinalCheckFunctor molMatchFunctor(*queryMatchingMol, target, sssParams); boost::dynamic_bitset<> targetBondsPresent(target.getNumBonds()); // Filter all available mappings removing those that violate chirality or have // duplicate bonds for (const auto &dummyMapping : allAvailableMappings) { CHECK_INVARIANT(match.size() + dummyMapping.size() == size, "Size error in dummy mapping"); auto duplicateBonds = false; targetBondsPresent.reset(); for (size_t i = 0; i < dummyMapping.size(); i++) { size_t position = match.size() + i; auto queryIndex = hasMissing ? coreToCheck[dummiesWithMapping[i]] : dummiesWithMapping[i]; queryIndices[position] = queryIndex; targetIndices[position] = dummyMapping[i]; if (allowDuplicates) { const int neighborIdx = terminalRGroupAtomToNeighbor.at(dummiesWithMapping[i]); const int targetNeighborIdx = matchMap[neighborIdx]; const auto targetBond = target.getBondBetweenAtoms(dummyMapping[i], targetNeighborIdx); CHECK_INVARIANT(targetBond != nullptr, "Matching target bond not found"); const auto targetBondIdx = targetBond->getIdx(); // check for duplicates if (targetBondsPresent[targetBondIdx]) { duplicateBonds = true; break; } targetBondsPresent[targetBondIdx] = 1; } } // use MolMatchFinalCheckFunctor to check this match works with chirality if (!duplicateBonds && molMatchFunctor(queryIndices, targetIndices)) { MatchVectType matchWithDummy(match); for (size_t i = 0; i < dummyMapping.size(); i++) { matchWithDummy.emplace_back(dummiesWithMapping[i], dummyMapping[i]); } allMappings.push_back(std::move(matchWithDummy)); } } delete[] queryIndices; delete[] targetIndices; return allMappings; } // This function checks the bond environment is valid when onlyMatchAtRGroups // is set. When this function is called targetRGroupIdx is the index of an atom // in the target that is mapped to an R group. Validates that all core bonds to // the R group are present - in certain circumstances there may be two // core bonds to a target R group and when onlyMatchAtRGroups is set // both bonds should be present (#4002). bool RCore::checkAllBondsToRGroupPresent( const ROMol &mol, const int targetRGroupIdx, const std::vector> &targetToCoreIndices) const { const auto targetRGroupAtom = mol.getAtomWithIdx(targetRGroupIdx); std::set coreNeighborIndices; for (const auto &nbri : boost::make_iterator_range(mol.getAtomNeighbors(targetRGroupAtom))) { const auto &nbr = mol[nbri]; // could a neighbor to an r group attachment match another r group // attachment? I don't think so. if (nbr->getAtomicNum() >= 1) { const auto &coreAtomIndices = targetToCoreIndices.at(nbri); if (coreAtomIndices.empty()) { continue; } CHECK_INVARIANT( coreAtomIndices.size() == 1, "Target atom neighboring R group should have exactly one match in core"); auto coreAtomIdx = coreAtomIndices.front(); const auto coreAtom = core->getAtomWithIdx(coreAtomIdx); // don't need to match a non terminal user R group // if (!(coreAtom->getDegree() > 1 && isUserRLabel(*coreAtom))) { if (!(coreAtom->getAtomicNum() == 0 && isUserRLabel(*coreAtom))) { coreNeighborIndices.insert(coreAtomIdx); } } } CHECK_INVARIANT( coreNeighborIndices.size() >= 1, "Unable to find target atom(s) matching core for attachment point"); if (coreNeighborIndices.size() == 1) { // currently this routine is only called when we know the attachment to // one core atom exists. return true; } // at this point we know the target atom is connected to two or more core // atoms. Now check we have core R groups for each. // There should be a map entry for a different R group for each of // the core atoms. const auto &coreRGroupIndices = targetToCoreIndices.at(targetRGroupIdx); if (coreRGroupIndices.size() != coreNeighborIndices.size()) { return false; } // check that there is a bond to an attachment point for every neighbor std::set bondIndices; for (const auto coreNeighborIdx : coreNeighborIndices) { for (const auto coreRGroupIdx : coreRGroupIndices) { const auto bond = core->getBondBetweenAtoms(coreNeighborIdx, coreRGroupIdx); if (bond) { bondIndices.insert(bond->getIdx()); } } } return bondIndices.size() == coreNeighborIndices.size(); } // Convert a matching molecule index to a core index int RCore::matchingIndexToCoreIndex(int matchingIndex) const { auto atom = matchingMol->getAtomWithIdx(matchingIndex); CHECK_INVARIANT(atom->hasProp(RLABEL_CORE_INDEX), "Matched atom missing core index"); return atom->getProp(RLABEL_CORE_INDEX); } // Create tautomer query for the matching mol on demand and cache for // performance. If the tautomer query cannot be created (because we can't // kekulize the query) then nullptr will be returned and we revert to // non-tautomer match std::shared_ptr RCore::getMatchingTautomerQuery() { if (!checkedForTautomerQuery) { try { // Enumerate tautomers from a sanitized copy of the matching molecule RWMol copy(*matchingMol); // If the core has had rgroup labels removed when creating the matching // mol then we need to update properties. Should a full sanitization be // done? MolOps::sanitizeMol(*copy); copy.updatePropertyCache(false); std::shared_ptr tautomerQuery( TautomerQuery::fromMol(copy)); matchingTautomerQuery = tautomerQuery; } catch (const MolSanitizeException &) { matchingTautomerQuery = nullptr; } checkedForTautomerQuery = true; } return matchingTautomerQuery; } } // namespace RDKit