// // Copyright (C) 2001-2015 Greg Landrum and Rational Discovery LLC // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include #include #include #include #include #include "SubstructMatch.h" #include "SubstructUtils.h" #include #include #ifdef RDK_THREADSAFE_SSS #include #endif #include "ullmann.hpp" #include "vf2.hpp" namespace RDKit{ namespace detail { typedef std::map SUBQUERY_MAP; void MatchSubqueries(const ROMol &mol,QueryAtom::QUERYATOM_QUERY *q,bool useChirality, SUBQUERY_MAP &subqueryMap,bool useQueryQueryMatches, std::vector &locked ); bool matchCompare(const std::pair &a, const std::pair &b); bool matchVectCompare(const MatchVectType &a, const MatchVectType &b); bool isToBeAddedToVector(std::vector &matches, const MatchVectType &m); typedef std::list > ssPairType; class MolMatchFinalCheckFunctor { public: MolMatchFinalCheckFunctor(const ROMol &query,const ROMol &mol, bool useChirality) : d_query(query), d_mol(mol), df_useChirality(useChirality) {}; bool operator()(const boost::detail::node_id c1[], const boost::detail::node_id c2[]) const { //std::cerr<<" check! "<getChiralTag()!=Atom::CHI_TETRAHEDRAL_CW && qAt->getChiralTag()!=Atom::CHI_TETRAHEDRAL_CCW) continue; const Atom *mAt=d_mol.getAtomWithIdx(c2[i]); if(mAt->getChiralTag()!=Atom::CHI_TETRAHEDRAL_CW && mAt->getChiralTag()!=Atom::CHI_TETRAHEDRAL_CCW) return false; if(qAt->getDegree()!=mAt->getDegree()) return false; INT_LIST qOrder; for(unsigned int j=0;jgetIdx()); if(qOrder.size()==qAt->getDegree()) break; } } int qPermCount=qAt->getPerturbationOrder(qOrder); INT_LIST mOrder; for(unsigned int j=0;jgetIdx()); if(mOrder.size()==mAt->getDegree()) break; } } int mPermCount=mAt->getPerturbationOrder(mOrder); if((qPermCount%2 == mPermCount%2 && qAt->getChiralTag()!=mAt->getChiralTag()) || (qPermCount%2 != mPermCount%2 && qAt->getChiralTag()==mAt->getChiralTag())) return false; } // now check double bonds for(unsigned int i=0;igetBondType()!=Bond::DOUBLE || (qBnd->getStereo()!=Bond::STEREOZ && qBnd->getStereo()!=Bond::STEREOE)) continue; // don't think this can actually happen, but check to be sure: if(qBnd->getStereoAtoms().size()!=2) continue; std::map qMap; for(unsigned int j=0;jgetBeginAtomIdx()]], c2[qMap[qBnd->getEndAtomIdx()]]); CHECK_INVARIANT(mBnd,"Matching bond not found"); if(mBnd->getBondType()!=Bond::DOUBLE || (mBnd->getStereo()!=Bond::STEREOZ && mBnd->getStereo()!=Bond::STEREOE)) continue; // don't think this can actually happen, but check to be sure: if(mBnd->getStereoAtoms().size()!=2) continue; unsigned int end1Matches=0; unsigned int end2Matches=0; if(c2[qMap[qBnd->getBeginAtomIdx()]]==mBnd->getBeginAtomIdx()){ // query Begin == mol Begin if(c2[qMap[qBnd->getStereoAtoms()[0]]]==mBnd->getStereoAtoms()[0]) end1Matches=1; if(c2[qMap[qBnd->getStereoAtoms()[1]]]==mBnd->getStereoAtoms()[1]) end2Matches=1; } else { // query End == mol Begin if(c2[qMap[qBnd->getStereoAtoms()[0]]]==mBnd->getStereoAtoms()[1]) end1Matches=1; if(c2[qMap[qBnd->getStereoAtoms()[1]]]==mBnd->getStereoAtoms()[0]) end2Matches=1; } //std::cerr<<" bnd: "<getIdx()<<":"<getStereo()<<" - "<getIdx()<<":"<getStereo()<<" -- "<getStereo()==qBnd->getStereo() && (end1Matches+end2Matches)==1) return false; if(mBnd->getStereo()!=qBnd->getStereo() && (end1Matches+end2Matches)!=1) return false; } return true; } private: const ROMol &d_query; const ROMol &d_mol; bool df_useChirality; }; class AtomLabelFunctor{ public: AtomLabelFunctor(const ROMol &query,const ROMol &mol, bool useChirality, bool useQueryQueryMatches) : d_query(query), d_mol(mol), df_useChirality(useChirality), df_useQueryQueryMatches(useQueryQueryMatches) {}; bool operator()(unsigned int i,unsigned int j) const{ bool res=false; if(df_useChirality){ const Atom *qAt=d_query.getAtomWithIdx(i); if(qAt->getChiralTag()==Atom::CHI_TETRAHEDRAL_CW || qAt->getChiralTag()==Atom::CHI_TETRAHEDRAL_CCW) { const Atom *mAt=d_mol.getAtomWithIdx(j); if(mAt->getChiralTag()!=Atom::CHI_TETRAHEDRAL_CW && mAt->getChiralTag()!=Atom::CHI_TETRAHEDRAL_CCW) return false; } } res=atomCompat(d_query[i],d_mol[j],df_useQueryQueryMatches); return res; } private: const ROMol &d_query; const ROMol &d_mol; bool df_useChirality; bool df_useQueryQueryMatches; }; class BondLabelFunctor{ public: BondLabelFunctor(const ROMol &query,const ROMol &mol,bool useChirality, bool useQueryQueryMatches) : d_query(query), d_mol(mol),df_useChirality(useChirality), df_useQueryQueryMatches(useQueryQueryMatches) {}; bool operator()(MolGraph::edge_descriptor i,MolGraph::edge_descriptor j) const{ if(df_useChirality){ const BOND_SPTR qBnd=d_query[i]; if(qBnd->getBondType()==Bond::DOUBLE && (qBnd->getStereo()==Bond::STEREOZ || qBnd->getStereo()==Bond::STEREOE)){ const BOND_SPTR mBnd=d_mol[j]; if(mBnd->getBondType()==Bond::DOUBLE && !(mBnd->getStereo()==Bond::STEREOZ || mBnd->getStereo()==Bond::STEREOE)) return false; } } bool res=bondCompat(d_query[i],d_mol[j]); return res; } private: const ROMol &d_query; const ROMol &d_mol; bool df_useChirality; bool df_useQueryQueryMatches; }; } // ---------------------------------------------- // // find one match // bool SubstructMatch(const ROMol &mol,const ROMol &query,MatchVectType &matchVect, bool recursionPossible,bool useChirality,bool useQueryQueryMatches) { //std::cerr<<"begin match"< locked; #ifdef RDK_THREADSAFE_SSS locked.reserve(query.getNumAtoms()); #endif if(recursionPossible){ ROMol::ConstAtomIterator atIt; detail::SUBQUERY_MAP subqueryMap; for(atIt=query.beginAtoms();atIt!=query.endAtoms();atIt++){ if((*atIt)->getQuery()){ detail::MatchSubqueries(mol,(*atIt)->getQuery(),useChirality,subqueryMap, useQueryQueryMatches, locked); } } } //std::cerr<<"main matching"<first]=std::pair(iter->first,iter->second); } } #ifdef RDK_THREADSAFE_SSS if(recursionPossible) { BOOST_FOREACH(RecursiveStructureQuery *v, locked) v->d_mutex.unlock(); } #endif return res; } // ---------------------------------------------- // // find one match in ResonanceMolSupplier object // bool SubstructMatch(const ResonanceMolSupplier &resMolSupplier, const ROMol &query, MatchVectType &matchVect, bool recursionPossible, bool useChirality, bool useQueryQueryMatches) { bool match = false; for (unsigned int i = 0; !match && (i < resMolSupplier.length()); ++i) { ROMol *mol = resMolSupplier[i]; match = SubstructMatch(*mol, query, matchVect, recursionPossible, useChirality, useQueryQueryMatches); delete mol; } return match; } // ---------------------------------------------- // // find all matches // // NOTE: this blows out the contents of matches // unsigned int SubstructMatch(const ROMol &mol,const ROMol &query, std::vector< MatchVectType > &matches, bool uniquify,bool recursionPossible, bool useChirality,bool useQueryQueryMatches, unsigned int maxMatches ){ std::vector locked; #ifdef RDK_THREADSAFE_SSS locked.reserve(query.getNumAtoms()); #endif if(recursionPossible){ detail::SUBQUERY_MAP subqueryMap; ROMol::ConstAtomIterator atIt; for(atIt=query.beginAtoms();atIt!=query.endAtoms();atIt++){ if((*atIt)->getQuery()){ //std::cerr<<"recurse from atom "<<(*atIt)->getIdx()<getQuery(),useChirality,subqueryMap, useQueryQueryMatches, locked); } } } matches.clear(); matches.resize(0); detail::AtomLabelFunctor atomLabeler(query,mol,useChirality,useQueryQueryMatches); detail::BondLabelFunctor bondLabeler(query,mol,useChirality,useQueryQueryMatches); detail::MolMatchFinalCheckFunctor matchChecker(query,mol,useChirality); std::list pms; #if 0 bool found=boost::ullmann_all(query.getTopology(),mol.getTopology(), atomLabeler,bondLabeler,pms); #else bool found=boost::vf2_all(query.getTopology(),mol.getTopology(), atomLabeler,bondLabeler,matchChecker,pms,maxMatches); #endif unsigned int res=0; if(found){ unsigned int nQueryAtoms=query.getNumAtoms(); matches.reserve(pms.size()); for(std::list::const_iterator iter1=pms.begin(); iter1!=pms.end();++iter1){ MatchVectType matchVect; matchVect.resize(nQueryAtoms); for(detail::ssPairType::const_iterator iter2=iter1->begin(); iter2!=iter1->end();++iter2){ matchVect[iter2->first]=std::pair(iter2->first,iter2->second); } matches.push_back(matchVect); } if(uniquify){ removeDuplicates(matches,mol.getNumAtoms()); } res = matches.size(); } #ifdef RDK_THREADSAFE_SSS if(recursionPossible) { BOOST_FOREACH(RecursiveStructureQuery *v, locked) v->d_mutex.unlock(); } #endif return res; } // ---------------------------------------------- // // find all matches in a ResonanceMolSupplier object // // NOTE: this blows out the contents of matches // unsigned int SubstructMatch(const ResonanceMolSupplier &resMolSupplier, const ROMol &query, std::vector< MatchVectType > &matches, bool uniquify, bool recursionPossible, bool useChirality, bool useQueryQueryMatches, unsigned int maxMatches) { unsigned int nMatches = 0; for (unsigned int i = 0; (matches.size() < maxMatches) && (i < resMolSupplier.length()); ++i) { ROMol *mol = resMolSupplier[i]; std::vector matchesTmp; unsigned int nMatchesTmp = SubstructMatch(*mol, query, matchesTmp, uniquify, recursionPossible, useChirality, useQueryQueryMatches, maxMatches); for (std::vector::iterator it = matchesTmp.begin(); (matches.size() < maxMatches) && (it != matchesTmp.end()); ++it) { if ((std::find(matches.begin(), matches.end(), *it) == matches.end()) && (!uniquify || detail::isToBeAddedToVector(matches, *it))) matches.push_back(*it); } delete mol; } std::sort(matches.begin(), matches.end(), detail::matchVectCompare); return matches.size(); } namespace detail { unsigned int RecursiveMatcher(const ROMol &mol,const ROMol &query, std::vector< int > &matches,bool useChirality, SUBQUERY_MAP &subqueryMap,bool useQueryQueryMatches, std::vector &locked) { ROMol::ConstAtomIterator atIt; for(atIt=query.beginAtoms();atIt!=query.endAtoms();atIt++){ if((*atIt)->getQuery()){ MatchSubqueries(mol,(*atIt)->getQuery(),useChirality,subqueryMap, useQueryQueryMatches, locked); } } detail::AtomLabelFunctor atomLabeler(query,mol,useChirality,useQueryQueryMatches); detail::BondLabelFunctor bondLabeler(query,mol,useChirality,useQueryQueryMatches); detail::MolMatchFinalCheckFunctor matchChecker(query,mol,useChirality); matches.clear(); matches.resize(0); std::list pms; #if 0 bool found=boost::ullmann_all(query.getTopology(),mol.getTopology(), atomLabeler,bondLabeler,pms); #else bool found=boost::vf2_all(query.getTopology(),mol.getTopology(), atomLabeler,bondLabeler,matchChecker,pms); #endif unsigned int res=0; if(found){ matches.reserve(pms.size()); for(std::list::const_iterator iter1=pms.begin(); iter1!=pms.end();++iter1){ if(!query.hasProp(common_properties::_queryRootAtom)){ matches.push_back(iter1->begin()->second); } else { int rootIdx; query.getProp(common_properties::_queryRootAtom,rootIdx); bool found=false; for(detail::ssPairType::const_iterator pairIter=iter1->begin(); pairIter!=iter1->end();++pairIter){ if(pairIter->first==static_cast(rootIdx)){ matches.push_back(pairIter->second); found=true; break; } } if(!found){ BOOST_LOG(rdErrorLog)<<"no match found for queryRootAtom"< &locked){ PRECONDITION(query,"bad query"); //std::cout << "*-*-* MS: " << (int)query << std::endl; //std::cout << "\t\t" << typeid(*query).name() << std::endl; if(query->getDescription()=="RecursiveStructure"){ RecursiveStructureQuery *rsq=(RecursiveStructureQuery *)query; #ifdef RDK_THREADSAFE_SSS rsq->d_mutex.lock(); locked.push_back(rsq); #endif rsq->clear(); bool matchDone=false; if(rsq->getSerialNumber() && subqueryMap.find(rsq->getSerialNumber()) != subqueryMap.end()){ // we've matched an equivalent serial number before, just // copy in the matches: matchDone=true; const RecursiveStructureQuery *orsq= (const RecursiveStructureQuery *)subqueryMap[rsq->getSerialNumber()]; for(RecursiveStructureQuery::CONTAINER_TYPE::const_iterator setIter=orsq->beginSet(); setIter!=orsq->endSet();++setIter){ rsq->insert(*setIter); } //std::cerr<<" copying results for query serial number: "<getSerialNumber()<getQueryMol(); // in case we are reusing this query, clear its contents now. if(queryMol){ std::vector< int > matchStarts; unsigned int res = RecursiveMatcher(mol,*queryMol,matchStarts,useChirality, subqueryMap,useQueryQueryMatches, locked); if(res){ for(std::vector::iterator i=matchStarts.begin(); i!=matchStarts.end(); i++){ rsq->insert(*i); } } } if(rsq->getSerialNumber()){ subqueryMap[rsq->getSerialNumber()]=query; //std::cerr<<" storing results for query serial number: "<getSerialNumber()<::CHILD_VECT_CI childIt; //std::cout << query << " " << query->endChildren()-query->beginChildren() << std::endl; for(childIt=query->beginChildren();childIt!=query->endChildren();childIt++){ MatchSubqueries(mol,childIt->get(),useChirality,subqueryMap,useQueryQueryMatches, locked); } //std::cout << "<<- back " << (int)query << std::endl; } bool matchCompare(const std::pair &a, const std::pair &b) { return (a.second < b.second); } bool matchVectCompare(const MatchVectType &a, const MatchVectType &b) { for (unsigned int i = 0; i < std::min(a.size(), b.size()); ++i) { if (a[i].second != b[i].second) return (a[i].second < b[i].second); } return (a.size() < b.size()); } bool isToBeAddedToVector(std::vector &matches, const MatchVectType &m) { bool isToBeAdded = true; MatchVectType mCopy = m; std::sort(mCopy.begin(), mCopy.end(), matchCompare); for (std::vector::iterator it = matches.end(); isToBeAdded && it-- != matches.begin();) { isToBeAdded = (it->size() != mCopy.size()); if (!isToBeAdded) { MatchVectType matchCopy = *it; std::sort(matchCopy.begin(), matchCopy.end(), matchCompare); for (unsigned int i = 0; !isToBeAdded && (i < matchCopy.size()); ++i) isToBeAdded = (mCopy[i].second != matchCopy[i].second); if (!isToBeAdded) { for (unsigned int i = 0; !isToBeAdded && (i < m.size()); ++i) isToBeAdded = (m[i].second < (*it)[i].second); if (isToBeAdded) { matches.erase(it); break; } } } } return isToBeAdded; } } // end of namespace detail }