// $Id$ // // Copyright (C) 2013 Greg Landrum and Rational Discovery LLC // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include #include #include #include #include "Fingerprints.h" #include #include #include #include #include #include #include #include #include #include #include #include //#define VERBOSE_FINGERPRINTING 1 //#define REPORT_FP_STATS 1 #ifdef REPORT_FP_STATS #include #endif namespace RDKit{ const char *pqs[]={ "[*]~[*]", "[*]~[*]~[*]", "[R]~1~[R]~[R]~1", //"[*]~[*]~[*]~[*]", "[*]~[*](~[*])~[*]", //"[*]~[R]~1[R]~[R]~1", "[R]~1[R]~[R]~[R]~1", //"[*]~[*]~[*]~[*]~[*]", "[*]~[*]~[*](~[*])~[*]", //"[*]~[R]~1[R]~[R]~1~[*]", "[R]~1~[R]~[R]~[R]~[R]~1", "[R]~1~[R]~[R]~[R]~[R]~[R]~1", "[R2]~[R1]~[R2]", "[R2]~[R1]~[R1]~[R2]", "[*]!@[R]~[R]!@[*]", "[*]!@[R]~[R]~[R]!@[*]", #if 0 "[*]~[*](~[*])(~[*])~[*]", "[*]~[*]~[*]~[*]~[*]~[*]", "[*]~[*]~[*]~[*](~[*])~[*]", "[*]~[*]~[*](~[*])~[*]~[*]", "[*]~[*]~[*](~[*])(~[*])~[*]", "[*]~[*](~[*])~[*](~[*])~[*]", "[*]~[R]~1[R]~[R]~1(~[*])~[*]", "[*]~[R]~1[R](~[*])~[R]~1[*]", "[*]~[R]~1[R]~[R](~[*])~[R]~1", "[*]~[R]~1[R]~[R]~[R]~1[*]", "[*]~[R]~1[R]~[R]~[R]~[R]~1", "[*]~[R]~1(~[*])~[R]~[R]~[R]~1", "[*]~[*]~[*]~[*]~[*]~[*]~[*]", "[*]~[*]~[*]~[*]~[*](~[*])~[*]", "[*]~[*]~[*]~[*](~[*])~[*]~[*]", "[*]~[*]~[*]~[*](~[*])(~[*])~[*]", "[*]~[*]~[*](~[*])~[*](~[*])~[*]", "[*]~[*](~[*])~[*]~[*](~[*])~[*]", "[*]~[*](~[*])~[*](~[*])(~[*])~[*]", #endif ""}; namespace detail { void getAtomNumbers(const Atom *a,std::vector &atomNums){ atomNums.clear(); if( !a->hasQuery()){ atomNums.push_back(a->getAtomicNum()); return; } // negated things are always complex: if( a->getQuery()->getNegation()) return; std::string descr=a->getQuery()->getDescription(); if(descr=="AtomAtomicNum"){ atomNums.push_back(static_cast(a->getQuery())->getVal()); } else if(descr=="AtomXor"){ return; } else if(descr=="AtomAnd"){ Queries::Query::CHILD_VECT_CI childIt=a->getQuery()->beginChildren(); if( (*childIt)->getDescription()=="AtomAtomicNum" && ((*(childIt+1))->getDescription()=="AtomIsAliphatic" || (*(childIt+1))->getDescription()=="AtomIsAromatic") && (childIt+2)==a->getQuery()->endChildren()){ atomNums.push_back(static_cast((*childIt).get())->getVal()); return; } } else if(descr=="AtomOr"){ Queries::Query::CHILD_VECT_CI childIt=a->getQuery()->beginChildren(); while(childIt !=a->getQuery()->endChildren()){ if( (*childIt)->getDescription()=="AtomAtomicNum" ){ atomNums.push_back(static_cast((*childIt).get())->getVal()); } else if((*childIt)->getDescription()=="AtomAnd"){ Queries::Query::CHILD_VECT_CI childIt2=(*childIt)->beginChildren(); if( (*childIt2)->getDescription()=="AtomAtomicNum" && ((*(childIt2+1))->getDescription()=="AtomIsAliphatic" || (*(childIt2+1))->getDescription()=="AtomIsAromatic") && (childIt2+2)==(*childIt)->endChildren()){ atomNums.push_back(static_cast((*childIt2).get())->getVal()); } else { atomNums.clear(); return; } } else { atomNums.clear(); return; } ++childIt; } } return; } } // caller owns the result, it must be deleted ExplicitBitVect *PatternFingerprintMol(const ROMol &mol, unsigned int fpSize, std::vector *atomCounts, ExplicitBitVect *setOnlyBits){ PRECONDITION(fpSize!=0,"fpSize==0"); PRECONDITION(!atomCounts || atomCounts->size()>=mol.getNumAtoms(),"bad atomCounts size"); PRECONDITION(!setOnlyBits || setOnlyBits->getNumBits()==fpSize,"bad setOnlyBits size"); static std::vector patts; // FIX: need a mutex here to be threadsafe if(patts.size()==0){ unsigned int idx=0; while(1){ std::string pq=pqs[idx]; if(pq=="") break; idx++; RWMol *tm; try { tm = SmartsToMol(pq); }catch (...) { tm=NULL; } if(!tm) continue; patts.push_back(ROMOL_SPTR(static_cast(tm))); } } if(!mol.getRingInfo()->isInitialized()){ MolOps::findSSSR(mol); } boost::dynamic_bitset<> isQueryAtom(mol.getNumAtoms()),isQueryBond(mol.getNumBonds()); ROMol::VERTEX_ITER firstA,lastA; boost::tie(firstA,lastA) = mol.getVertices(); while(firstA!=lastA){ const Atom *at=mol[*firstA].get(); if(Fingerprints::detail::isComplexQuery(at)) isQueryAtom.set(at->getIdx()); ++firstA; } ROMol::EDGE_ITER firstB,lastB; boost::tie(firstB,lastB) = mol.getEdges(); while(firstB!=lastB){ const Bond *bond = mol[*firstB].get(); if( Fingerprints::detail::isComplexQuery(bond) ){ isQueryBond.set(bond->getIdx()); } ++firstB; } ExplicitBitVect *res = new ExplicitBitVect(fpSize); unsigned int pIdx=0; BOOST_FOREACH(ROMOL_SPTR patt,patts){ ++pIdx; std::vector matches; // uniquify matches? // time for 10K molecules w/ uniquify: 5.24s // time for 10K molecules w/o uniquify: 4.87s SubstructMatch(mol,*(patt.get()),matches,false); boost::uint32_t mIdx=pIdx+patt->getNumAtoms()+patt->getNumBonds(); BOOST_FOREACH(MatchVectType &mv,matches){ #ifdef VERBOSE_FINGERPRINTING std::cerr<<"\nPatt: "<setBit(mIdx%fpSize); bool isQuery=false; boost::uint32_t bitId=pIdx; std::vector amap(mv.size(),0); BOOST_FOREACH(MatchVectType::value_type &p,mv){ #ifdef VERBOSE_FINGERPRINTING std::cerr<getAtomicNum()); amap[p.first]=p.second; } if(isQuery) continue; ROMol::EDGE_ITER firstB,lastB; boost::tie(firstB,lastB) = patt->getEdges(); while(firstB!=lastB){ BOND_SPTR pbond = (*patt.get())[*firstB]; ++firstB; if(isQueryBond[pbond->getIdx()]){ isQuery=true; #ifdef VERBOSE_FINGERPRINTING std::cerr<<"bond query: "<getIdx(); #endif break; } const Bond *mbond=mol.getBondBetweenAtoms(amap[pbond->getBeginAtomIdx()], amap[pbond->getEndAtomIdx()]); gboost::hash_combine(bitId,(boost::uint32_t)mbond->getBondType()); } if(!isQuery){ #ifdef VERBOSE_FINGERPRINTING std::cerr<<" set: "<setBit(bitId%fpSize); } } } return res; } }