#include #include #include #include "../QueryAtom.h" #include "../QueryBond.h" #include "../SmilesParse/SmilesWrite.h" #include "../SmilesParse/SmartsWrite.h" //#include "../SmilesParse/SmilesParse.h" //#include "../Substruct/SubstructMatch.h" #include "SubstructMatchCustom.h" #include "MaximumCommonSubgraph.h" #ifdef VERBOSE_STATISTICS_ON ExecStatistics stat; #endif namespace RDKit { namespace FMCS { bool ConsoleOutputEnabled = false; }} namespace RDKit { namespace FMCS { struct LabelDefinition { unsigned ItemIndex; // item with this label value unsigned Value; LabelDefinition() : ItemIndex(-1), Value(-1) {} LabelDefinition(unsigned i, unsigned value) : ItemIndex(i), Value(value) {} }; MaximumCommonSubgraph::MaximumCommonSubgraph(const MCSParameters* params) { Parameters = ( 0 != params ? *params : MCSParameters()); if (Parameters.ProgressCallback == MCSProgressCallbackTimeout) Parameters.ProgressCallbackUserData = &To; To = time(0); } static bool molPtr_NumBondLess (const ROMol* l, const ROMol* r) // need for sorting the source molecules by size { return l->getNumBonds() < r->getNumBonds(); } void MaximumCommonSubgraph::init() { QueryMolecule = Molecules.front(); void* userData = Parameters.CompareFunctionsUserData; if(Parameters.BondCompareParameters.CompleteRingsOnly || Parameters.BondCompareParameters.RingMatchesRingOnly) { #ifdef FAST_SUBSTRUCT_CACHE RingMatchTables.init(QueryMolecule); Parameters.CompareFunctionsUserData = &RingMatchTables; #endif } #ifdef FAST_SUBSTRUCT_CACHE #ifdef PRECOMPUTED_TABLES_MATCH // fill out RingMatchTables for check cache Hash collision by checking match a part of Query to Query if(!userData // predefined functor - compute RingMatchTable for all targets && (Parameters.BondCompareParameters.CompleteRingsOnly || Parameters.BondCompareParameters.RingMatchesRingOnly)) RingMatchTables.computeRingMatchTable(QueryMolecule, QueryMolecule, Parameters); // fill out match tables size_t nq = QueryMolecule->getNumAtoms(); QueryAtomMatchTable.resize(nq, nq); for(size_t aj = 0; aj < nq; aj++) for(size_t ai = 0; ai < nq; ai++) QueryAtomMatchTable.set(ai, aj, Parameters.AtomTyper(Parameters.AtomCompareParameters, *QueryMolecule, ai, *QueryMolecule, aj, Parameters.CompareFunctionsUserData)); nq = QueryMolecule->getNumBonds(); QueryBondMatchTable.resize(nq, nq); for(size_t aj = 0; aj < nq; aj++) for(size_t ai = 0; ai < nq; ai++) QueryBondMatchTable.set(ai, aj, Parameters.BondTyper(Parameters.BondCompareParameters, *QueryMolecule, ai, *QueryMolecule, aj, Parameters.CompareFunctionsUserData)); #endif // PRECOMPUTED_TABLES_MATCH // Compute label values based on current functor and parameters for code Morgan correct computation. unsigned currentLabelValue = 1; std::vector labels; nq = QueryMolecule->getNumAtoms(); QueryAtomLabels.resize(nq); for(size_t ai = 0; ai < nq; ai++) { if(MCSAtomCompareAny == Parameters.AtomTyper) // predefined functor without atom compare parameters QueryAtomLabels[ai] = 1; else { const Atom* atom = QueryMolecule->getAtomWithIdx(ai); if(MCSAtomCompareElements == Parameters.AtomTyper) // predefined functor without atom compare parameters QueryAtomLabels[ai] = atom->getAtomicNum()|(Parameters.AtomCompareParameters.MatchValences ? (atom->getTotalValence()>>8):0); else if(MCSAtomCompareIsotopes == Parameters.AtomTyper) // predefined functor without atom compare parameters QueryAtomLabels[ai] = atom->getAtomicNum()|(atom->getIsotope()>>8)|(Parameters.AtomCompareParameters.MatchValences ? (atom->getTotalValence()>>16):0); else // custom user defined functor { QueryAtomLabels[ai] = -1; for(size_t i = 0; i < labels.size(); i++) if(Parameters.AtomTyper(Parameters.AtomCompareParameters, *QueryMolecule, labels[i].ItemIndex, *QueryMolecule, ai, userData)) // equal itoms { QueryAtomLabels[ai] = labels[i].Value; break; } if(-1 == QueryAtomLabels[ai]) // not found -> create new label { QueryAtomLabels[ai] = ++currentLabelValue; labels.push_back(LabelDefinition(ai, currentLabelValue)); } } } } labels.clear(); currentLabelValue = 1; nq = QueryMolecule->getNumBonds(); QueryBondLabels.resize(nq); for(size_t aj = 0; aj < nq; aj++) { const Bond* bond = QueryMolecule->getBondWithIdx(aj); unsigned ring = 0; if(Parameters.BondCompareParameters.CompleteRingsOnly || Parameters.BondCompareParameters.RingMatchesRingOnly) { ring = RingMatchTables.isQueryBondInRing(aj) ? 0 : 1; // is bond in ring } if(MCSBondCompareAny == Parameters.BondTyper) // predefined functor without atom compare parameters QueryBondLabels[aj] = 1 | (ring>>8); else if(MCSBondCompareOrderExact == Parameters.BondTyper) // predefined functor without compare parameters QueryBondLabels[aj] = (bond->getBondType() + 1) | (ring>>8); else if(MCSBondCompareOrder == Parameters.BondTyper) // predefined functor, ignore Aromatization { unsigned order = bond->getBondType(); if(Bond::AROMATIC == order || Bond::ONEANDAHALF == order) // ignore Aromatization order = Bond::SINGLE; else if(Bond::TWOANDAHALF == order) order = Bond::DOUBLE; else if(Bond::THREEANDAHALF == order) order = Bond::TRIPLE; else if(Bond::FOURANDAHALF == order) order = Bond::QUADRUPLE; else if(Bond::FIVEANDAHALF == order) order = Bond::QUINTUPLE; QueryBondLabels[aj] = (order + 1) | (ring>>8); } else // custom user defined functor { QueryBondLabels[aj] = -1; for(size_t i = 0; i < labels.size(); i++) if(Parameters.BondTyper(Parameters.BondCompareParameters, *QueryMolecule, labels[i].ItemIndex, *QueryMolecule, aj, userData)) // equal bonds + ring ... { QueryBondLabels[aj] = labels[i].Value; break; } if(-1 == QueryAtomLabels[aj]) // not found -> create new label { QueryBondLabels[aj] = ++currentLabelValue; labels.push_back( LabelDefinition(aj, currentLabelValue)); } } } #endif Targets.resize(Molecules.size()-1); size_t i=0; for(std::vector::iterator it = Molecules.begin()+1; it != Molecules.end(); it++, i++) { Targets[i].Molecule = *it; // build Target Topology ADD ATOMs size_t j=0; // current item for(ROMol::ConstAtomIterator a = Targets[i].Molecule->beginAtoms(); a != Targets[i].Molecule->endAtoms(); a++, j++) { Targets[i].Topology.addAtom((*a)->getIdx()); #ifdef FAST_INCREMENTAL_MATCH Targets[i].AtomAdjacency.resize((*it)->getNumAtoms()); ROMol::OEDGE_ITER beg,end; for(boost::tie(beg,end) = (*it)->getAtomBonds(*a); beg!=end; beg++) // all bonds from the atom { AtomAdjacency aa; aa.Bond = (*(*it))[*beg].get(); aa.BondIdx = aa.Bond->getIdx(); aa.ConnectedAtomIdx = aa.Bond->getEndAtomIdx(); if((*a)->getIdx() == aa.ConnectedAtomIdx) aa.ConnectedAtomIdx = aa.Bond->getBeginAtomIdx(); Targets[i].AtomAdjacency[j].push_back(aa); } #endif // FAST_INCREMENTAL_MATCH } // build Target Topology ADD BONDs for(ROMol::ConstBondIterator b = Targets[i].Molecule->beginBonds(); b != Targets[i].Molecule->endBonds(); b++) { const Bond* bond = *b; unsigned ii = bond->getBeginAtomIdx(); unsigned jj = bond->getEndAtomIdx(); Targets[i].Topology.addBond((*b)->getIdx(), ii, jj); } // fill out RingMatchTables if(!userData // predefined functor - compute RingMatchTable for all targets && (Parameters.BondCompareParameters.CompleteRingsOnly || Parameters.BondCompareParameters.RingMatchesRingOnly)) { #ifdef FAST_SUBSTRUCT_CACHE RingMatchTables.addTargetBondRingsIndeces(Targets[i].Molecule); RingMatchTables.computeRingMatchTable(QueryMolecule, Targets[i].Molecule, Parameters); #endif } #ifdef PRECOMPUTED_TABLES_MATCH // fill out match tables size_t nq = QueryMolecule->getNumAtoms(); size_t nt = (*it)->getNumAtoms(); Targets[i].AtomMatchTable.resize(nq, nt); for(size_t aj = 0; aj < nt; aj++) for(size_t ai = 0; ai < nq; ai++) Targets[i].AtomMatchTable.set(ai, aj, Parameters.AtomTyper(Parameters.AtomCompareParameters, *QueryMolecule, ai, *Targets[i].Molecule, aj, Parameters.CompareFunctionsUserData)); nq = QueryMolecule->getNumBonds(); nt = (*it)->getNumBonds(); Targets[i].BondMatchTable.resize(nq, nt); for(size_t aj = 0; aj < nt; aj++) for(size_t ai = 0; ai < nq; ai++) Targets[i].BondMatchTable.set(ai, aj, Parameters.BondTyper(Parameters.BondCompareParameters, *QueryMolecule, ai, *Targets[i].Molecule, aj, Parameters.CompareFunctionsUserData)); #endif } Parameters.CompareFunctionsUserData = userData; // restore } void MaximumCommonSubgraph::makeInitialSeeds() { // build a set of initial seeds as "all" single bonds from query molecule std::vector excludedBonds(QueryMolecule->getNumBonds()); for(size_t i = 0; i < excludedBonds.size(); i++) excludedBonds[i] = false; Seeds.clear(); //R1 additional performance OPTIMISATION //if(Parameters.BondCompareParameters.CompleteRingsOnly) // disable all mismatched rings, and do not generate initial seeds from such disabled bonds // for( rings .....) for(i......) // if(mismatched) excludedBonds[i.......] = true; for(RWMol::ConstBondIterator bi = QueryMolecule->beginBonds(); bi != QueryMolecule->endBonds(); bi++) { //R1 additional performance OPTIMISATION //if(excludedBonds[(*bi)->getIdx()]) // continue; Seed seed; #ifdef FAST_INCREMENTAL_MATCH seed.MatchResult.resize(Targets.size()); #endif #ifdef VERBOSE_STATISTICS_ON ++stat.Seed; ++stat.InitialSeed; #endif seed.addAtom((*bi)->getBeginAtom()); seed.addAtom((*bi)->getEndAtom()); seed.ExcludedBonds = excludedBonds; // all bonds from first to current seed.addBond (*bi); excludedBonds[(*bi)->getIdx()] = true; seed.computeRemainingSize(*QueryMolecule); if( ! checkIfMatchAndAppend(seed)) { #ifdef VERBOSE_STATISTICS_ON ++stat.MismatchedInitialSeed; #endif // optionally remove all such bonds from all targets TOPOLOGY where it exists. //.......... // disable (mark as already processed) mismatched bond in all seeds for(SeedSet::iterator si = Seeds.begin(); si != Seeds.end(); si++) si->ExcludedBonds[(*bi)->getIdx()] = true; } } Seeds.MaxBonds = 1; Seeds.MaxAtoms = 2; } void MaximumCommonSubgraph::growSeeds(MolFragment& mcsIdx, MCSResult& res) { unsigned steps = 99999; // steps from last progress callback call. call it immediately in the begining // Find MCS -- SDF Seed growing OPTIMISATION (it works in 3 times faster) while(!Seeds.empty()) { ++steps; stat.TotalSteps++; SeedSet::iterator si = Seeds.begin(); si->grow(*this, *QueryMolecule); const Seed& fs = Seeds.front(); // bigger substructure found if((!Parameters.MaximizeBonds && (fs.getNumAtoms() > res.NumAtoms || (fs.getNumAtoms() == res.NumAtoms && fs.getNumBonds() > res.NumBonds))) ||( Parameters.MaximizeBonds && (fs.getNumBonds() > res.NumBonds || (fs.getNumBonds() == res.NumBonds && fs.getNumAtoms() > res.NumAtoms))) ) { stat.MCSFoundStep = stat.TotalSteps; res.NumAtoms = fs.getNumAtoms(); res.NumBonds = fs.getNumBonds(); mcsIdx.Atoms = fs.MoleculeFragment.Atoms; mcsIdx.Bonds = fs.MoleculeFragment.Bonds; mcsIdx.AtomsIdx = fs.MoleculeFragment.AtomsIdx; mcsIdx.BondsIdx = fs.MoleculeFragment.BondsIdx; //TMP DEBUG /* std::cout<<"MCS atoms=("; for(unsigned seedAtomIdx = 0; seedAtomIdx < mcs.getNumAtoms(); seedAtomIdx++) { const Atom* atom = mcs.MoleculeFragment.Atoms[seedAtomIdx]; std::cout<getIdx()<<", "; } std::cout<<") new atoms=["; for(unsigned seedAtomIdx = mcs.LastAddedAtomsBeginIdx; seedAtomIdx < mcs.getNumAtoms(); seedAtomIdx++) { const Atom* atom = mcs.MoleculeFragment.Atoms[seedAtomIdx]; std::cout<getIdx()<<", "; } std::cout<<"] bonds=("; for(int i=0; iMoleculeFragment.BondsIdx[0]) { std::cout<<"\n" <<" -----------------"<<(void*)&*si <<"Remaining "<RemainingBonds<< ", " << si->RemainingAtoms <<": LastAddedAtomsBeginIdx = "<LastAddedAtomsBeginIdx<<", LastAddedBondsBeginIdx = "<LastAddedBondsBeginIdx<<"\n"; for(size_t i = 0; i < si->MoleculeFragment.Bonds.size(); i++) std::cout << i << " "<MoleculeFragment.Bonds[i]->getIdx()<<" : " <<" "<MoleculeFragment.Bonds[i]->getBeginAtom()->getIdx() <<" "<MoleculeFragment.Bonds[i]->getEndAtom()->getIdx() <<"\n"; } */ /* #ifdef xxxWIN32 // TEMP DEBUG !!!!!!!!!!!!! if(30 == res.NumAtoms) { std::cout<<"-------------\n"; for(int i=0; igetBondWithIdx(mcs.MoleculeFragment.BondsIdx[i])->getBeginAtomIdx() <<" , "<< QueryMolecule->getBondWithIdx(mcs.MoleculeFragment.BondsIdx[i])->getEndAtomIdx()<<")\n"; std::cout<<"-------------\n"; for(int i=0; i<30; i++) { const Atom* a = QueryMolecule->getAtomWithIdx(mcs.MoleculeFragment.AtomsIdx[i]); std::cout<getAtomicNum()<<"\n"; if(a->getAtomicNum()==7) // N { ROMol::OEDGE_ITER beg,end; for(boost::tie(beg,end) = QueryMolecule->getAtomBonds(a); beg!=end; beg++) { const Bond* bond = &*((*QueryMolecule)[*beg]); std::cout<<" bond="<getIdx()<<" =("<< bond->getBeginAtomIdx() <<" , "<< bond->getEndAtomIdx()<<")\n"; } } } std::cout<<"-------------\n"; } #endif */ } if(-1 == si->GrowingStage) //finished Seeds.erase(si); if(Parameters.ProgressCallback && (steps > 777))// || res.NumAtoms > Stat.NumAtoms)) { steps = 0; Stat.NumAtoms = res.NumAtoms; Stat.NumBonds = res.NumBonds; #ifdef VERBOSE_STATISTICS_ON Stat.SeedProcessed = stat.Seed; #endif if(!Parameters.ProgressCallback(Stat, Parameters, Parameters.ProgressCallbackUserData)) { res.Canceled = true; break; } } } } struct AtomMatch // for each seed atom (matched) { unsigned QueryAtomIdx; unsigned TargetAtomIdx; AtomMatch() : QueryAtomIdx(-1), TargetAtomIdx(-1) {} }; typedef std::vector AtomMatchSet; std::string MaximumCommonSubgraph::generateSMARTS(const MolFragment& mcsIdx) { // match the result MCS with all targets to check if it is exact match or template Seed seed; // result MCS seed.ExcludedBonds.resize(QueryMolecule->getNumBonds()); for(size_t i = 0; i < seed.ExcludedBonds.size(); i++) seed.ExcludedBonds[i] = false; std::vector atomMatchResult(Targets.size()); std::vector atomIdxMap(QueryMolecule->getNumAtoms()); std::vector > bondMatchSet (mcsIdx.Bonds.size()); //key is unique BondType std::vector > atomMatchSet (mcsIdx.Atoms.size()); //key is unique atomic number //isotope, mass, charge, Hs std::vector > atomMatchSet (mcsIdx.Atoms.size()); //key is unique atomic number for(std::vector::const_iterator atom = mcsIdx.Atoms.begin(); atom != mcsIdx.Atoms.end(); atom++) { atomIdxMap[(*atom)->getIdx()] = seed.getNumAtoms(); seed.addAtom((*atom)); } for(std::vector::const_iterator bond = mcsIdx.Bonds.begin(); bond != mcsIdx.Bonds.end(); bond++) seed.addBond((*bond)); unsigned itarget = 0; for(std::vector::const_iterator tag = Targets.begin(); tag != Targets.end(); tag++, itarget++) { match_V_t match; // THERE IS NO Bonds match INFO !!!! bool target_matched = #ifdef PRECOMPUTED_TABLES_MATCH SubstructMatchCustomTable(tag->Topology, seed.Topology, tag->AtomMatchTable, tag->BondMatchTable, &match); #else //noticable slowly: SubstructMatchCustom( tag->Topology.GraphTopology , *tag->Molecule , seed.GraphTopology , *QueryMolecule , Parameters.AtomTyper, Parameters.BondTyper , Parameters.AtomCompareParameters, Parameters.BondCompareParameters, Parameters.CompareFunctionsUserData ); #endif atomMatchResult[itarget].resize(seed.getNumAtoms()); for(match_V_t::const_iterator mit = match.begin(); target_matched && mit != match.end(); mit++) { unsigned ai = mit->first; // SeedAtomIdx atomMatchResult[itarget][ai].QueryAtomIdx = seed.Topology[mit->first]; atomMatchResult[itarget][ai].TargetAtomIdx = tag->Topology[mit->second]; const Atom* ta = tag->Molecule->getAtomWithIdx(tag->Topology[mit->second]); if(ta && ta->getAtomicNum() != seed.MoleculeFragment.Atoms[ai]->getAtomicNum()) atomMatchSet[ai][ta->getAtomicNum()] = ta; // add } // AND BUILD BOND MATCH INFO unsigned bi=0; for(std::vector::const_iterator bond = mcsIdx.Bonds.begin(); target_matched && bond != mcsIdx.Bonds.end(); bond++, bi++) { unsigned i = atomIdxMap[(*bond)->getBeginAtomIdx()]; unsigned j = atomIdxMap[(*bond)->getEndAtomIdx()]; unsigned ti= atomMatchResult[itarget][i].TargetAtomIdx; unsigned tj= atomMatchResult[itarget][j].TargetAtomIdx; const Bond* tBond = tag->Molecule->getBondBetweenAtoms(ti, tj); if(tBond && (*bond)->getBondType() != tBond->getBondType()) bondMatchSet[bi] [tBond->getBondType()] = tBond; // add } } // Generate result's SMARTS RWMol mol; // create molecule from MCS for MolToSmarts() unsigned ai = 0; // SeedAtomIdx for(std::vector::const_iterator atom = mcsIdx.Atoms.begin(); atom != mcsIdx.Atoms.end(); atom++, ai++) { QueryAtom* a = new QueryAtom(*(*atom)); // generate [#6] instead of C or c ! //for all atomMatchSet[ai] items add atom query to template for(std::map::const_iterator am = atomMatchSet[ai].begin(); am != atomMatchSet[ai].end(); am++) { ATOM_OR_QUERY* a2 = new ATOM_OR_QUERY(); a2->addChild(QueryAtom::QUERYATOM_QUERY::CHILD_TYPE(makeAtomNumQuery((*atom)->getAtomicNum()))); a2->addChild(QueryAtom::QUERYATOM_QUERY::CHILD_TYPE(makeAtomNumQuery(am->second->getAtomicNum()))); a2->setDescription("AtomOr"); //ATOM_EQUALS_QUERY *makeAtomIsotopeQuery(int what) //.... a->setQuery(a2); } mol.addAtom(a, true); } unsigned bi = 0; // Seed Idx for(std::vector::const_iterator bond = mcsIdx.Bonds.begin(); bond != mcsIdx.Bonds.end(); bond++, bi++) { unsigned i = atomIdxMap[(*bond)->getBeginAtomIdx()]; unsigned j = atomIdxMap[(*bond)->getEndAtomIdx()]; QueryBond* b = new QueryBond(*(*bond)); // add OR template if need for(std::map::const_iterator bm = bondMatchSet[bi].begin(); bm != bondMatchSet[bi].end(); bm++) { BOND_OR_QUERY* b2 = new BOND_OR_QUERY(); b2->addChild(QueryBond::QUERYBOND_QUERY::CHILD_TYPE(makeBondOrderEqualsQuery((*bond)->getBondType()))); b2->addChild(QueryBond::QUERYBOND_QUERY::CHILD_TYPE(makeBondOrderEqualsQuery(bm->second->getBondType()))); b2->setDescription("BondOr"); b->setQuery(b2); } b->setBeginAtomIdx(i); b->setEndAtomIdx(j); mol.addBond(b, true); } return MolToSmarts(mol, true); } MCSResult MaximumCommonSubgraph::find(const std::vector& src_mols) { clear(); MCSResult res; if(src_mols.size() < 2) throw std::runtime_error("FMCS. Invalid argument. mols.size() must be at least 2"); if (Parameters.Threshold > 1.0) throw std::runtime_error("FMCS. Invalid argument. Parameter Threshold must be 1.0 or less."); ThresholdCount = (unsigned) ceil((src_mols.size()-1) * Parameters.Threshold); // minimal required number of matched targets if (ThresholdCount < 1) // at least one target ThresholdCount = 1; if (ThresholdCount > src_mols.size()-1) // max all targets ThresholdCount = src_mols.size()-1; // Selecting CompleteRingsOnly option also enables --ring-matches-ring-only. ring--ring and chain bonds only match chain bonds. if(Parameters.BondCompareParameters.CompleteRingsOnly) Parameters.BondCompareParameters.RingMatchesRingOnly = true; for(std::vector::const_iterator it = src_mols.begin(); it != src_mols.end(); it++) { Molecules.push_back((*it).get()); if(!Molecules.back()->getRingInfo()->isInitialized()) Molecules.back()->getRingInfo()->initialize(); // but do not fill out !!! } // sort source set of molecules by their 'size' and assume the smallest molecule as a query std::stable_sort(Molecules.begin(), Molecules.end(), molPtr_NumBondLess); init(); makeInitialSeeds(); /* // TEMP DEBUG !!!!!!!!!!!!! { std::cout<<"---Bonds----------\n"; for(int i=0; igetNumBonds(); i++) std::cout<getBondWithIdx(i)->getBeginAtomIdx() <<" , "<< QueryMolecule->getBondWithIdx(i)->getEndAtomIdx()<<")\n"; std::cout<<"---Atoms----------\n"; for(int i=0; igetNumAtoms(); i++) { const Atom* a = QueryMolecule->getAtomWithIdx(i); std::cout<getAtomBonds(a); beg!=end; beg++) { const Bond* bond = &*((*QueryMolecule)[*beg]); std::cout<getIdx()<<"=("<< bond->getBeginAtomIdx()<<", "<getEndAtomIdx()<<") "; } std::cout<<"\n"; } std::cout<<"-------------\n"; } // !!!!!!!!!!!!!!!!!!!!!! */ MolFragment mcsIdx; // current MCS growSeeds(mcsIdx, res); res.SmartsString = generateSMARTS(mcsIdx); /* // TMP DEBUG std::cout<<"---MCS Bonds------\n"; for(int i=0; igetBondWithIdx(mcsIdx.BondsIdx[i])->getBeginAtomIdx() <<" , "<< QueryMolecule->getBondWithIdx(mcsIdx.BondsIdx[i])->getEndAtomIdx()<<")\n"; // TMP DEBUG //--------------------------- std::cout<<"Query "<< MolToSmiles(*QueryMolecule) <<"\n"; std::cout<<"MCS Smiles "<< MolFragmentToSmiles(*QueryMolecule, *(const std::vector*) &mcsIdx.AtomsIdx, (const std::vector*) &mcsIdx.BondsIdx) <<"\n"; // unsigned */ /* unsigned itarget = 0; for(std::vector::const_iterator tag = Targets.begin(); tag != Targets.end(); tag++, itarget++) { MatchVectType match; bool target_matched = SubstructMatch(*tag->Molecule, *QueryMolecule, match); std::cout<<"Target "<< itarget+1 << (target_matched ? " matched" : " MISMATCHED") <<"\n"; } */ //--------------------------- #ifdef VERBOSE_STATISTICS_ON if(ConsoleOutputEnabled) { std::cout << "STATISTICS:\n"; std::cout << "Total Growing Steps = " << stat.TotalSteps<<", MCS found on "<::const_iterator bi = seed.MoleculeFragment.Bonds.begin(); bi != seed.MoleculeFragment.Bonds.end(); bi++) TRACE(0) << (*bi)->getIdx() << " "; TRACE(0) << "\n"; #endif #ifdef VERBOSE_STATISTICS_ON ++stat.SeedCheck; #endif bool foundInCache = false; bool foundInDupCache = false; #ifdef DUP_SUBSTRUCT_CACHE if(DuplicateCache.find(seed.DupCacheKey, foundInCache)) { // duplicate found. skip match() but store both seeds, because they will grow by different paths !!! #ifdef VERBOSE_STATISTICS_ON stat.DupCacheFound++; stat.DupCacheFoundMatch += foundInCache ? 1 : 0; #endif if(!foundInCache) // mismatched !!! return false; } foundInDupCache = foundInCache; #endif #ifdef FAST_SUBSTRUCT_CACHE SubstructureCache::HashKey cacheKey; SubstructureCache::TIndexEntry* cacheEntry = 0; bool cacheEntryIsValid = false; if(!foundInCache) { #ifdef VERBOSE_STATISTICS_ON ++stat.FindHashInCache; #endif cacheEntry = HashCache.find(seed, QueryAtomLabels, QueryBondLabels, cacheKey); cacheEntryIsValid = true; if(cacheEntry) // possibly found. check for hash collision { #ifdef VERBOSE_STATISTICS_ON ++stat.HashKeyFoundInCache; #endif // check hash collisions (time +3%): for(SubstructureCache::TIndexEntry::const_iterator g = cacheEntry->begin(); !foundInCache && g != cacheEntry->end(); g++) { if(g->m_vertices.size() != seed.getNumAtoms() || g->m_edges.size() != seed.getNumBonds()) continue; #ifdef VERBOSE_STATISTICS_ON ++stat.ExactMatchCall; #endif // EXACT MATCH #ifdef PRECOMPUTED_TABLES_MATCH foundInCache = SubstructMatchCustomTable((*g), seed.Topology, QueryAtomMatchTable, QueryBondMatchTable); #else //.................. #endif #ifdef VERBOSE_STATISTICS_ON if(foundInCache) ++stat.ExactMatchCallTrue; #endif } } } #endif bool found = foundInCache; if(!found) { found = match(seed); } if(found) // Store new generated seed, if found in cache or in all(- threshold) targets { Seed& new_seed = Seeds.add(seed); #ifdef DUP_SUBSTRUCT_CACHE if(!foundInDupCache && seed.getNumBonds() >= 3) // only seed with a ring can be duplicated - do not store very small seed in cache DuplicateCache.add(seed.DupCacheKey, true); #endif #ifdef FAST_SUBSTRUCT_CACHE if(!foundInCache) HashCache.add(seed, cacheKey, cacheEntry); #endif } else { #ifdef DUP_SUBSTRUCT_CACHE if(seed.getNumBonds() > 3) DuplicateCache.add(seed.DupCacheKey, false); //opt. cache mismatched duplicates too #endif } return found; // new matched seed has been actualy added } bool MaximumCommonSubgraph::match(Seed& seed) { unsigned max_miss = Targets.size() - ThresholdCount; unsigned missing = 0; unsigned passed = 0; unsigned itarget = 0; for(std::vector::const_iterator tag = Targets.begin(); tag != Targets.end(); tag++, itarget++) { #ifdef VERBOSE_STATISTICS_ON ++stat.MatchCall; #endif bool target_matched = false; #ifdef FAST_INCREMENTAL_MATCH if(!seed.MatchResult[itarget].empty()) target_matched = matchIncrementalFast(seed, itarget); #endif if(!target_matched) // slow full match { match_V_t match; // THERE IS NO Bonds match INFO !!!! target_matched = #ifdef PRECOMPUTED_TABLES_MATCH SubstructMatchCustomTable(tag->Topology, seed.Topology, tag->AtomMatchTable, tag->BondMatchTable, &match); #else //noticable slowly: SubstructMatchCustom( tag->Topology , *tag->Molecule , seed.GraphTopology , *QueryMolecule , Parameters.AtomTyper, Parameters.BondTyper , Parameters.AtomCompareParameters, Parameters.BondCompareParameters, Parameters.CompareFunctionsUserData ); #endif #ifdef FAST_INCREMENTAL_MATCH // save current match info seed.MatchResult[itarget].clear();//resize(target_matched ? match.size() : 0); for(match_V_t::const_iterator mit = match.begin(); target_matched && mit != match.end(); mit++) { BondMatch m; m.SeedAtomIdx = mit->first; m.QueryAtomIdx = seed.GraphTopology[mit->first]; m.TargetAtomIdx = mit->second; //==target.Topology[it->second]); seed.MatchResult[itarget].push_back(m); // add for each matched bond } //AND RESTORE BOND MATCH INFO !!!!!!!!!!!!!!!!!!!!! #endif } if(target_matched) { if(++passed >= ThresholdCount) // it's enought break; } else // mismatched { if(++missing > max_miss) break; } } if(missing <= max_miss) { #ifdef VERBOSE_STATISTICS_ON ++stat.MatchCallTrue; #endif return true; } return false; } #ifdef FAST_INCREMENTAL_MATCH // call it by target, if fail perform full match check bool MaximumCommonSubgraph::matchIncrementalFast(Seed& seed, unsigned itarget) { // use and update results of previous match stored in the seed #ifdef VERBOSE_STATISTICS_ON ++stat.FastMatchCall; #endif Target& t = Targets[itarget]; BondMatchSet& match = seed.MatchResult[itarget]; size_t previousLen = match.size(); for(unsigned newBondSeedIdx = seed.LastAddedBondsBeginIdx; newBondSeedIdx < seed.getNumBonds(); newBondSeedIdx++) { bool found=false; const Bond* bond = seed.MoleculeFragment.Bonds[newBondSeedIdx]; unsigned newBondQueryIdx = seed.MoleculeFragment.BondsIdx[newBondSeedIdx]; unsigned i = seed.MoleculeFragment.SeedAtomIdxMap[bond->getBeginAtomIdx()]; if(i >= seed.LastAddedAtomsBeginIdx) // old atom in the seed i = seed.MoleculeFragment.SeedAtomIdxMap[bond->getEndAtomIdx()]; unsigned newBondSourceAtomSeedIdx = i; // seed's index of atom from which new bond was added const BondMatch& ma = seed.MatchResult[itarget][i]; unsigned newBondSourceAtomTargetIdx = ma.TargetAtomIdx; // corresponding atom in the target unsigned newBondSourceAtomQueryIdx = ma.QueryAtomIdx; //const Atom* newBondSourceAtomTarget = t.Molecule->getAtomWithIdx(newBondSourceAtomTargetIdx); // check all unvisited bonds from newBondSourceAtomTargetIdx const AtomAdjacencyList& taAdj = t.AtomAdjacency[newBondSourceAtomTargetIdx]; for(size_t itaAdji = 0; itaAdji < taAdj.size(); itaAdji++) { unsigned tbi = taAdj[itaAdji].BondIdx; if(t.BondMatchTable.at(newBondQueryIdx, tbi)) { /* // and check ending atom of the bond too //.......... bool visited = false; //match.find //MULTIPLE various bonds for one atom !!! for(size_t im = 0; !visited && im < match.size(); im++) if(seed.MatchResult[itarget][im]. == ) visited = true; if(visited) // visited == targer bons already presents in the match of this seed continue; // append to match result seed.MatchResult[itarget][] found = true; break; */ } } } /*/----------------------- //Seed: unsigned LastAddedBondBeginIdx; //history for each tag seedBond, tagBond, both (seedAtom, tagAtom) of Graph q = seed.GraphTopology; for(unsigned ib = seed.LastAddedBondBeginIdx; ib < seed.getNumBonds(); ib++) // all added bonds ib endAtom // of ib unsigned tagBeginAtomIdx = ...; // already verified. matched with seed. for() all outgoing bonds except already matched to seed check match of bond to ib and endAtom to ia if ia is not already verified (ring back to seed) // for(unsigned srcAtomIdx = seed.LastAddedAtomsBeginIdx; srcAtomIdx < seed.getNumAtoms(); srcAtomIdx++) // all added atoms //-----------------------*/ if(false // match_ok ) { #ifdef VERBOSE_STATISTICS_ON ++stat.FastMatchCallTrue; #endif return true; } match.resize(previousLen); // clean up - remove new items only return false; } #endif // FAST_INCREMENTAL_MATCH }} // namespace RDKit