// // Copyright (c) 2007-2017, Novartis Institutes for BioMedical Research Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. // * Neither the name of Novartis Institutes for BioMedical Research Inc. // nor the names of its contributors may be used to endorse or promote // products derived from this software without specific prior written // permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // #include #include #include #include #include #include #include #include #include #include #include #include #include "GraphMol/ChemReactions/ReactionRunner.h" namespace RDKit { std::vector ChemicalReaction::runReactants( const MOL_SPTR_VECT reactants, unsigned int maxProducts) const { return run_Reactants(*this, reactants, maxProducts); } std::vector ChemicalReaction::runReactant( const ROMOL_SPTR reactant, unsigned int reactionTemplateIdx) const { return run_Reactant(*this, reactant, reactionTemplateIdx); } ChemicalReaction::ChemicalReaction(const std::string &pickle) { ReactionPickler::reactionFromPickle(pickle, this); } void ChemicalReaction::initReactantMatchers() { unsigned int nWarnings, nErrors; if (!this->validate(nWarnings, nErrors)) { BOOST_LOG(rdErrorLog) << "initialization failed\n"; this->df_needsInit = true; } else { this->df_needsInit = false; } } bool ChemicalReaction::validate(unsigned int &numWarnings, unsigned int &numErrors, bool silent) const { bool res = true; numWarnings = 0; numErrors = 0; if (!this->getNumReactantTemplates()) { if (!silent) { BOOST_LOG(rdErrorLog) << "reaction has no reactants\n"; } numErrors++; res = false; } if (!this->getNumProductTemplates()) { if (!silent) { BOOST_LOG(rdErrorLog) << "reaction has no products\n"; } numErrors++; res = false; } std::vector mapNumbersSeen; std::map reactingAtoms; unsigned int molIdx = 0; for (auto molIter = this->beginReactantTemplates(); molIter != this->endReactantTemplates(); ++molIter) { bool thisMolMapped = false; for (ROMol::AtomIterator atomIt = (*molIter)->beginAtoms(); atomIt != (*molIter)->endAtoms(); ++atomIt) { int mapNum; if ((*atomIt)->getPropIfPresent(common_properties::molAtomMapNumber, mapNum)) { thisMolMapped = true; if (std::find(mapNumbersSeen.begin(), mapNumbersSeen.end(), mapNum) != mapNumbersSeen.end()) { if (!silent) { BOOST_LOG(rdErrorLog) << "reactant atom-mapping number " << mapNum << " found multiple times.\n"; } numErrors++; res = false; } else { mapNumbersSeen.push_back(mapNum); reactingAtoms[mapNum] = *atomIt; } } } if (!thisMolMapped) { if (!silent) { BOOST_LOG(rdWarningLog) << "reactant " << molIdx << " has no mapped atoms.\n"; } numWarnings++; } molIdx++; } std::vector productNumbersSeen; molIdx = 0; for (auto molIter = this->beginProductTemplates(); molIter != this->endProductTemplates(); ++molIter) { // clear out some possible cached properties to prevent // misleading warnings for (ROMol::AtomIterator atomIt = (*molIter)->beginAtoms(); atomIt != (*molIter)->endAtoms(); ++atomIt) { if ((*atomIt)->hasProp(common_properties::_QueryFormalCharge)) (*atomIt)->clearProp(common_properties::_QueryFormalCharge); if ((*atomIt)->hasProp(common_properties::_QueryHCount)) (*atomIt)->clearProp(common_properties::_QueryHCount); if ((*atomIt)->hasProp(common_properties::_QueryMass)) (*atomIt)->clearProp(common_properties::_QueryMass); if ((*atomIt)->hasProp(common_properties::_QueryIsotope)) (*atomIt)->clearProp(common_properties::_QueryIsotope); } bool thisMolMapped = false; for (ROMol::AtomIterator atomIt = (*molIter)->beginAtoms(); atomIt != (*molIter)->endAtoms(); ++atomIt) { int mapNum; if ((*atomIt)->getPropIfPresent(common_properties::molAtomMapNumber, mapNum)) { thisMolMapped = true; bool seenAlready = std::find(productNumbersSeen.begin(), productNumbersSeen.end(), mapNum) != productNumbersSeen.end(); if (seenAlready) { if (!silent) { BOOST_LOG(rdWarningLog) << "product atom-mapping number " << mapNum << " found multiple times.\n"; } numWarnings++; // ------------ // Always check to see if the atoms connectivity changes independent // if it is mapped multiple times // ------------ const Atom *rAtom = reactingAtoms[mapNum]; CHECK_INVARIANT(rAtom, "missing atom"); if (rAtom->getDegree() != (*atomIt)->getDegree()) { (*atomIt)->setProp(common_properties::_ReactionDegreeChanged, 1); } } else { productNumbersSeen.push_back(mapNum); } auto ivIt = std::find(mapNumbersSeen.begin(), mapNumbersSeen.end(), mapNum); if (ivIt == mapNumbersSeen.end()) { if (!seenAlready) { if (!silent) { BOOST_LOG(rdWarningLog) << "product atom-mapping number " << mapNum << " not found in reactants.\n"; } numWarnings++; } } else { mapNumbersSeen.erase(ivIt); // ------------ // The atom is mapped, check to see if its connectivity changes // ------------ const Atom *rAtom = reactingAtoms[mapNum]; CHECK_INVARIANT(rAtom, "missing atom"); if (rAtom->getDegree() != (*atomIt)->getDegree()) { (*atomIt)->setProp(common_properties::_ReactionDegreeChanged, 1); } } } // ------------ // Deal with queries // ------------ if ((*atomIt)->hasQuery()) { std::list queries; queries.push_back((*atomIt)->getQuery()); while (!queries.empty()) { const Atom::QUERYATOM_QUERY *query = queries.front(); queries.pop_front(); for (auto qIter = query->beginChildren(); qIter != query->endChildren(); ++qIter) { queries.push_back((*qIter).get()); } if (query->getDescription() == "AtomFormalCharge") { int qval; if ((*atomIt)->getPropIfPresent( common_properties::_QueryFormalCharge, qval) && qval != static_cast(query)->getVal()) { if (!silent) { BOOST_LOG(rdWarningLog) << "atom " << (*atomIt)->getIdx() << " in product " << molIdx << " has multiple charge specifications.\n"; } numWarnings++; } else { (*atomIt)->setProp( common_properties::_QueryFormalCharge, static_cast(query)->getVal()); } } else if (query->getDescription() == "AtomHCount") { int qval; if ((*atomIt)->getPropIfPresent(common_properties::_QueryHCount, qval) && qval != static_cast(query)->getVal()) { if (!silent) { BOOST_LOG(rdWarningLog) << "atom " << (*atomIt)->getIdx() << " in product " << molIdx << " has multiple H count specifications.\n"; } numWarnings++; } else { (*atomIt)->setProp( common_properties::_QueryHCount, static_cast(query)->getVal()); } } else if (query->getDescription() == "AtomMass") { int qval; if ((*atomIt)->getPropIfPresent(common_properties::_QueryMass, qval) && qval != static_cast(query)->getVal()) { if (!silent) { BOOST_LOG(rdWarningLog) << "atom " << (*atomIt)->getIdx() << " in product " << molIdx << " has multiple mass specifications.\n"; } numWarnings++; } else { (*atomIt)->setProp( common_properties::_QueryMass, static_cast(query)->getVal() / massIntegerConversionFactor); } } else if (query->getDescription() == "AtomIsotope") { int qval; if ((*atomIt)->getPropIfPresent(common_properties::_QueryIsotope, qval) && qval != static_cast(query)->getVal()) { if (!silent) { BOOST_LOG(rdWarningLog) << "atom " << (*atomIt)->getIdx() << " in product " << molIdx << " has multiple isotope specifications.\n"; } numWarnings++; } else { (*atomIt)->setProp( common_properties::_QueryIsotope, static_cast(query)->getVal()); } } } } } if (!thisMolMapped) { if (!silent) { BOOST_LOG(rdWarningLog) << "product " << molIdx << " has no mapped atoms.\n"; } numWarnings++; } molIdx++; } if (!mapNumbersSeen.empty()) { if (!silent) { std::ostringstream ostr; ostr << "mapped atoms in the reactants were not mapped in the products.\n"; ostr << " unmapped numbers are: "; for (std::vector::const_iterator ivIt = mapNumbersSeen.begin(); ivIt != mapNumbersSeen.end(); ++ivIt) { ostr << *ivIt << " "; } ostr << "\n"; BOOST_LOG(rdWarningLog) << ostr.str(); } numWarnings++; } return res; } bool isMoleculeReactantOfReaction(const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which) { if (!rxn.isInitialized()) { throw ChemicalReactionException( "initReactantMatchers() must be called first"); } which = 0; for (auto iter = rxn.beginReactantTemplates(); iter != rxn.endReactantTemplates(); ++iter, ++which) { MatchVectType tvect; if (SubstructMatch(mol, **iter, tvect)) { return true; } } return false; } bool isMoleculeReactantOfReaction(const ChemicalReaction &rxn, const ROMol &mol) { unsigned int ignore; return isMoleculeReactantOfReaction(rxn, mol, ignore); } bool isMoleculeProductOfReaction(const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which) { if (!rxn.isInitialized()) { throw ChemicalReactionException( "initReactantMatchers() must be called first"); } which = 0; for (auto iter = rxn.beginProductTemplates(); iter != rxn.endProductTemplates(); ++iter, ++which) { MatchVectType tvect; if (SubstructMatch(mol, **iter, tvect)) { return true; } } return false; } bool isMoleculeProductOfReaction(const ChemicalReaction &rxn, const ROMol &mol) { unsigned int ignore; return isMoleculeProductOfReaction(rxn, mol, ignore); } bool isMoleculeAgentOfReaction(const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which) { if (!rxn.isInitialized()) { throw ChemicalReactionException( "initReactantMatchers() must be called first"); } which = 0; for (auto iter = rxn.beginAgentTemplates(); iter != rxn.endAgentTemplates(); ++iter, ++which) { if (iter->get()->getNumHeavyAtoms() != mol.getNumHeavyAtoms()) { continue; } if (iter->get()->getNumBonds() != mol.getNumBonds()) { continue; } // not possible, update property cache not possible for const molecules // if(iter->get()->getRingInfo()->numRings() != // mol.getRingInfo()->numRings()){ // return false; // } if (RDKit::Descriptors::calcAMW(*iter->get()) != RDKit::Descriptors::calcAMW(mol)) { continue; } MatchVectType tvect; if (SubstructMatch(mol, **iter, tvect)) { return true; } } return false; } bool isMoleculeAgentOfReaction(const ChemicalReaction &rxn, const ROMol &mol) { unsigned int ignore; return isMoleculeAgentOfReaction(rxn, mol, ignore); } void addRecursiveQueriesToReaction( ChemicalReaction &rxn, const std::map &queries, const std::string &propName, std::vector > > *reactantLabels) { if (!rxn.isInitialized()) { throw ChemicalReactionException( "initReactantMatchers() must be called first"); } if (reactantLabels != nullptr) { (*reactantLabels).resize(0); } for (MOL_SPTR_VECT::const_iterator rIt = rxn.beginReactantTemplates(); rIt != rxn.endReactantTemplates(); ++rIt) { if (reactantLabels != nullptr) { std::vector > labels; addRecursiveQueries(**rIt, queries, propName, &labels); (*reactantLabels).push_back(labels); } else { addRecursiveQueries(**rIt, queries, propName); } } } namespace { // recursively looks for atomic number queries anywhere in this set of children // or its children int numComplexQueries( Queries::Query::CHILD_VECT_CI childIt, Queries::Query::CHILD_VECT_CI endChildren) { int res = 0; while (childIt != endChildren) { std::string descr = (*childIt)->getDescription(); if (descr == "AtomAtomicNum" || descr == "AtomNull") { ++res; } else { res += numComplexQueries((*childIt)->beginChildren(), (*childIt)->endChildren()); } ++childIt; } return res; } #if 0 // FIX: this is adapted from Fingerprints.cpp and we really should have code // like this centralized bool isComplexQuery(const Atom &a) { if (!a.hasQuery()) return false; // negated things are always complex: if (a.getQuery()->getNegation()) return true; std::string descr = a.getQuery()->getDescription(); if (descr == "AtomAtomicNum") return false; if (descr == "AtomOr" || descr == "AtomXor") return true; if (descr == "AtomAnd") { auto childIt = a.getQuery()->beginChildren(); int ncq = numComplexQueries(childIt, a.getQuery()->endChildren()); if (ncq == 1) { return false; } } return true; } #endif bool isChangedAtom(const Atom &rAtom, const Atom &pAtom, int mapNum, const std::map &mappedProductAtoms) { PRECONDITION(mappedProductAtoms.find(mapNum) != mappedProductAtoms.end(), "atom not mapped in products"); if (rAtom.getAtomicNum() != pAtom.getAtomicNum() && pAtom.getAtomicNum() > 0) { // the atomic number changed and the product wasn't a dummy return true; } else if (rAtom.getDegree() != pAtom.getDegree()) { // the degree changed return true; } else if (pAtom.getAtomicNum() > 0 && isComplexQuery(&rAtom)) { // more than a simple query return true; } // now check bond layout: std::map reactantBonds; ROMol::ADJ_ITER nbrIdx, endNbrs; boost::tie(nbrIdx, endNbrs) = rAtom.getOwningMol().getAtomNeighbors(&rAtom); while (nbrIdx != endNbrs) { const Atom *nbr = rAtom.getOwningMol()[*nbrIdx]; int mapNum; if (nbr->getPropIfPresent(common_properties::molAtomMapNumber, mapNum)) { reactantBonds[mapNum] = rAtom.getOwningMol().getBondBetweenAtoms( rAtom.getIdx(), nbr->getIdx()); } else { // if we have an un-mapped neighbor, we are automatically a reacting atom: return true; } ++nbrIdx; } boost::tie(nbrIdx, endNbrs) = pAtom.getOwningMol().getAtomNeighbors(&pAtom); while (nbrIdx != endNbrs) { const Atom * nbr = pAtom.getOwningMol()[*nbrIdx]; int mapNum; if (nbr->getPropIfPresent(common_properties::molAtomMapNumber, mapNum)) { // if we don't have a bond to a similarly mapped atom in the reactant, // we're done: if (reactantBonds.find(mapNum) == reactantBonds.end()) { return true; } const Bond *rBond = reactantBonds[mapNum]; const Bond *pBond = pAtom.getOwningMol().getBondBetweenAtoms( pAtom.getIdx(), nbr->getIdx()); // bond comparison logic: if (rBond->hasQuery()) { if (!pBond->hasQuery()) { // reactant query, product not query: always a change return true; } else { if (pBond->getQuery()->getDescription() == "BondNull") { // null queries are trump, they match everything } else if (rBond->getBondType() == Bond::SINGLE && pBond->getBondType() == Bond::SINGLE && ((rBond->getQuery()->getDescription() == "BondOr" && pBond->getQuery()->getDescription() == "BondOr") || (rBond->getQuery()->getDescription() == "SingleOrAromaticBond" && pBond->getQuery()->getDescription() == "SingleOrAromaticBond")) ) { // The SMARTS parser tags unspecified bonds as single, but then adds // a query so that they match single or double. // these cases match } else { if (rBond->getBondType() == pBond->getBondType() && rBond->getQuery()->getDescription() == "BondOrder" && pBond->getQuery()->getDescription() == "BondOrder" && static_cast(rBond->getQuery())->getVal() == static_cast(pBond->getQuery()) ->getVal()) { // bond order queries with equal orders also match } else { // anything else does not match return true; } } } } else if (pBond->hasQuery()) { // reactant not query, product query // if product is anything other than the null query // it's a change: if (pBond->getQuery()->getDescription() != "BondNull") { return true; } } else { // neither has a query, just compare the types if (rBond->getBondType() != pBond->getBondType()) { return true; } } } ++nbrIdx; } // haven't found anything to say that we are changed, so we must // not be return false; } template bool getMappedAtoms(T &rIt, std::map &mappedAtoms) { ROMol::ATOM_ITER_PAIR atItP = rIt->getVertices(); while (atItP.first != atItP.second) { const Atom *oAtom = (*rIt)[*(atItP.first++)]; // we only worry about mapped atoms: int mapNum; if (oAtom->getPropIfPresent(common_properties::molAtomMapNumber, mapNum)) { mappedAtoms[mapNum] = oAtom; } else { // unmapped atom, return it return false; } } return true; } } // end of anonymous namespace VECT_INT_VECT getReactingAtoms(const ChemicalReaction &rxn, bool mappedAtomsOnly) { if (!rxn.isInitialized()) { throw ChemicalReactionException( "initReactantMatchers() must be called first"); } VECT_INT_VECT res; res.resize(rxn.getNumReactantTemplates()); // find mapped atoms in the products : std::map mappedProductAtoms; for (auto rIt = rxn.beginProductTemplates(); rIt != rxn.endProductTemplates(); ++rIt) { getMappedAtoms(*rIt, mappedProductAtoms); } // now loop over mapped atoms in the reactants, keeping track of // which reactant they are associated with, and check for changes. auto resIt = res.begin(); for (auto rIt = rxn.beginReactantTemplates(); rIt != rxn.endReactantTemplates(); ++rIt, ++resIt) { ROMol::ATOM_ITER_PAIR atItP = (*rIt)->getVertices(); while (atItP.first != atItP.second) { const Atom *oAtom = (**rIt)[*(atItP.first++)]; // unmapped atoms are definitely changing: int mapNum; if (!oAtom->getPropIfPresent(common_properties::molAtomMapNumber, mapNum)) { if (!mappedAtomsOnly) { resIt->push_back(oAtom->getIdx()); } } else { // but mapped ones require more careful consideration // if this is found in a reactant: if (mappedProductAtoms.find(mapNum) != mappedProductAtoms.end()) { if (isChangedAtom(*oAtom, *(mappedProductAtoms[mapNum]), mapNum, mappedProductAtoms)) { resIt->push_back(oAtom->getIdx()); } } } } } return res; } void ChemicalReaction::removeUnmappedReactantTemplates( double thresholdUnmappedAtoms, bool moveToAgentTemplates, MOL_SPTR_VECT *targetVector) { MOL_SPTR_VECT res_reactantTemplates; for (auto iter = beginReactantTemplates(); iter != endReactantTemplates(); ++iter) { if (isReactionTemplateMoleculeAgent(*iter->get(), thresholdUnmappedAtoms)) { if (moveToAgentTemplates) { m_agentTemplates.push_back(*iter); } if (targetVector) { targetVector->push_back(*iter); } } else { res_reactantTemplates.push_back(*iter); } } m_reactantTemplates.clear(); m_reactantTemplates.insert(m_reactantTemplates.begin(), res_reactantTemplates.begin(), res_reactantTemplates.end()); res_reactantTemplates.clear(); } void ChemicalReaction::removeUnmappedProductTemplates( double thresholdUnmappedAtoms, bool moveToAgentTemplates, MOL_SPTR_VECT *targetVector) { MOL_SPTR_VECT res_productTemplates; for (auto iter = beginProductTemplates(); iter != endProductTemplates(); ++iter) { if (isReactionTemplateMoleculeAgent(*iter->get(), thresholdUnmappedAtoms)) { if (moveToAgentTemplates) { m_agentTemplates.push_back(*iter); } if (targetVector) { targetVector->push_back(*iter); } } else { res_productTemplates.push_back(*iter); } } m_productTemplates.clear(); m_productTemplates.insert(m_productTemplates.begin(), res_productTemplates.begin(), res_productTemplates.end()); res_productTemplates.clear(); } void ChemicalReaction::removeAgentTemplates(MOL_SPTR_VECT *targetVector) { if (targetVector) { for (auto iter = beginAgentTemplates(); iter != endAgentTemplates(); ++iter) { targetVector->push_back(*iter); } } m_agentTemplates.clear(); } } // end of RDKit namespace