// // Copyright (C) 2014 Novartis Institutes for BioMedical Research // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include "MaximumCommonSubgraph.h" #include "Composition2N.h" #include "Seed.h" #include "DebugTrace.h" #include "../SmilesParse/SmilesWrite.h" namespace RDKit { namespace FMCS { unsigned Seed::addAtom(const Atom* atom) { unsigned i = MoleculeFragment.AtomsIdx.size(); unsigned aqi = atom->getIdx(); MoleculeFragment.Atoms.push_back(atom); MoleculeFragment.AtomsIdx.push_back(aqi); MoleculeFragment.SeedAtomIdxMap[aqi] = i; Topology.addAtom(aqi); #ifdef DUP_SUBSTRUCT_CACHE DupCacheKey.addAtom(aqi); #endif return i; } unsigned Seed::addBond(const Bond* bond) { unsigned b = bond->getIdx(); if(ExcludedBonds[b]) throw -1; //never, check the implementation ExcludedBonds[b] = true; MoleculeFragment.BondsIdx.push_back(b); MoleculeFragment.Bonds.push_back(bond); // remap idx to seed's indeces: unsigned i = MoleculeFragment.SeedAtomIdxMap[bond->getBeginAtomIdx()]; unsigned j = MoleculeFragment.SeedAtomIdxMap[bond->getEndAtomIdx()]; Topology.addBond(b, i, j); #ifdef DUP_SUBSTRUCT_CACHE DupCacheKey.addBond(b); #endif return getNumBonds(); } void Seed::fillNewBonds(const ROMol& qmol) { std::vector excludedBonds = ExcludedBonds; for(unsigned srcAtomIdx = LastAddedAtomsBeginIdx; srcAtomIdx < getNumAtoms(); srcAtomIdx++) { // all atoms added on previous growing only const Atom* atom = MoleculeFragment.Atoms[srcAtomIdx]; ROMol::OEDGE_ITER beg,end; for(boost::tie(beg,end) = qmol.getAtomBonds(atom); beg!=end; beg++) { // all bonds from MoleculeFragment.Atoms[srcAtomIdx] const Bond* bond = &*(qmol[*beg]); if( ! excludedBonds[bond->getIdx()]) { // already in the seed or in the NewBonds list from another atom in a RING excludedBonds[bond->getIdx()] = true; unsigned ai = (atom == bond->getBeginAtom()) ? bond->getEndAtomIdx() : bond->getBeginAtomIdx(); const Atom* end_atom = qmol.getAtomWithIdx(ai); unsigned end_atom_idx = NotSet; for(unsigned i=0; i < getNumAtoms(); i++) if(end_atom == MoleculeFragment.Atoms[i]) { // already exists in this seed end_atom_idx = i; break; } NewBonds.push_back(NewBond(srcAtomIdx, bond->getIdx(), ai, end_atom_idx, NotSet==end_atom_idx ? end_atom:0)); } } } } void Seed::grow(MaximumCommonSubgraph& mcs) const { const ROMol& qmol = mcs.getQueryMolecule(); std::map newAtomsMap; // map new added atoms to their seed's indeces if(!canGrowBiggerThan(mcs.getMaxNumberBonds(), mcs.getMaxNumberAtoms()) ) { // prune() parent GrowingStage = NotSet; //finished #ifdef VERBOSE_STATISTICS_ON ++mcs.VerboseStatistics.RemainingSizeRejected; #endif return; } if(0==GrowingStage) { // 0. Fill out list of all directly connected outgoing bonds ((Seed*)this)->fillNewBonds(qmol); // non const method, multistage growing optimisation if(NewBonds.empty()) { GrowingStage = NotSet; // finished return; } // 1. Check and add the biggest child seed with all outgoing bonds added: // Add all bonds at first (build the biggest child seed). All new atoms are already in the seed Seed seed; seed.createFromParent(this); for(std::vector::const_iterator nbi = NewBonds.begin(); nbi != NewBonds.end(); nbi++) { unsigned aIdx = nbi->EndAtomIdx; if(NotSet == aIdx) { // new atom std::map::const_iterator nai = newAtomsMap.find(nbi->NewAtomIdx); // check RING if(newAtomsMap.end() == nai) { const Atom* end_atom = nbi->NewAtom; aIdx = seed.addAtom(end_atom); newAtomsMap[nbi->NewAtomIdx] = aIdx; // store new possible ring end point } else aIdx = nai->second; } const Bond* src_bond = qmol.getBondWithIdx(nbi->BondIdx); seed.addBond(src_bond); } #ifdef VERBOSE_STATISTICS_ON { ++mcs.VerboseStatistics.Seed; } #endif seed.RemainingBonds = RemainingBonds - NewBonds.size(); // Added ALL !!! seed.RemainingAtoms = RemainingAtoms - newAtomsMap.size(); // new atoms added to seed // prune() Best Sizes if( ! seed.canGrowBiggerThan(mcs.getMaxNumberBonds(), mcs.getMaxNumberAtoms()) ) { GrowingStage = NotSet; #ifdef VERBOSE_STATISTICS_ON ++mcs.VerboseStatistics.RemainingSizeRejected; #endif return; // the biggest possible subrgaph from this seed is too small for future growing. So, skip ALL children ! } seed.MatchResult = MatchResult; bool allMatched = mcs.checkIfMatchAndAppend(seed); // this seed + all extern bonds is a part of MCS GrowingStage = 1; if(allMatched && NewBonds.size() > 1) return; // grow deep first. postpone next growing steps } // 2. Check and add all 2^N-1-1 other possible seeds: if(1 == NewBonds.size()) { GrowingStage = NotSet; return; // everything has been done } // OPTIMISATION: // check each single bond first: if (this seed + single bond) does not exist in MCS, exclude this new bond from growing this seed. unsigned numErasedNewBonds = 0; for(std::vector::iterator nbi = NewBonds.begin(); nbi != NewBonds.end(); nbi++) { #ifdef VERBOSE_STATISTICS_ON { ++mcs.VerboseStatistics.Seed; } #endif Seed seed; seed.createFromParent(this); newAtomsMap.clear(); unsigned aIdx = nbi->EndAtomIdx; // existed in this parent seed (ring) or -1 if(NotSet == aIdx) { // new atom const Atom* end_atom = nbi->NewAtom; aIdx = seed.addAtom(end_atom); } const Bond* src_bond = qmol.getBondWithIdx(nbi->BondIdx); seed.addBond(src_bond); seed.computeRemainingSize(qmol); if(seed.canGrowBiggerThan(mcs.getMaxNumberBonds(), mcs.getMaxNumberAtoms()) ) { // prune() if(!MatchResult.empty()) seed.MatchResult = MatchResult; if( ! mcs.checkIfMatchAndAppend(seed)) { nbi->BondIdx = NotSet; // exclude this new bond from growing this seed - decrease 2^^N-1 to 2^^k-1, k 0) { std::vector dirtyNewBonds; dirtyNewBonds.reserve(NewBonds.size()); dirtyNewBonds.swap(NewBonds); for(std::vector::const_iterator nbi = dirtyNewBonds.begin(); nbi != dirtyNewBonds.end(); nbi++) if(NotSet != nbi->BondIdx) NewBonds.push_back(*nbi); } // add all other from 2^k-1 possible seeds, where k=newBonds.size(): if(NewBonds.size() > 1) { // if just one new bond, such seed has been already created if(sizeof(unsigned long long)*8 < NewBonds.size()) throw std::runtime_error("Max number of new external bonds of a seed more than 64"); BitSet maxCompositionValue; Composition2N::compute2N(NewBonds.size(), maxCompositionValue); maxCompositionValue -= 1; // 2^N-1 Composition2N composition(maxCompositionValue, maxCompositionValue); #ifdef EXCLUDE_WRONG_COMPOSITION std::vector failedCombinations; BitSet failedCombinationsMask=0uLL; #endif while(composition.generateNext()) { if(composition.is2Power()) // exclude already processed single external bond combinations continue; if(0==numErasedNewBonds && composition.getBitSet() == maxCompositionValue) continue; // exclude already processed all external bonds combination 2N-1 #ifdef EXCLUDE_WRONG_COMPOSITION // OPTIMISATION. reduce amount of generated seeds and match calls // 2120 instead of 2208 match calls on small test. 43 wrongComp-s, 83 rejected if(failedCombinationsMask & composition.getBitSet()) { // possibly exists in the list bool compositionWrong = false; for(std::vector::const_iterator failed = failedCombinations.begin(); failed != failedCombinations.end() ; failed++) if(*failed == (*failed & composition.getBitSet())) { // combination includes failed combination compositionWrong = true; break; } if(compositionWrong) { #ifdef VERBOSE_STATISTICS_ON ++mcs.VerboseStatistics.WrongCompositionRejected; #endif continue; } } #endif #ifdef VERBOSE_STATISTICS_ON { ++mcs.VerboseStatistics.Seed; } #endif Seed seed; seed.createFromParent(this); newAtomsMap.clear(); for(unsigned i=0; iEndAtomIdx; // existed in this parent seed (ring) or -1 if(NotSet == aIdx) { // new atom std::map::const_iterator nai = newAtomsMap.find(nbi->NewAtomIdx); // check RING if(newAtomsMap.end() == nai) { const Atom* end_atom = nbi->NewAtom;//qmol.getAtomWithIdx(nbi->NewAtomIdx); aIdx = seed.addAtom(end_atom); newAtomsMap[nbi->NewAtomIdx] = aIdx; // store new possible ring end point } else aIdx = nai->second; } const Bond* src_bond = qmol.getBondWithIdx(nbi->BondIdx); seed.addBond(src_bond); } seed.computeRemainingSize(qmol); if( ! seed.canGrowBiggerThan(mcs.getMaxNumberBonds(), mcs.getMaxNumberAtoms()) ) { // prune(). // seed too small #ifdef VERBOSE_STATISTICS_ON ++mcs.VerboseStatistics.RemainingSizeRejected; #endif } else { seed.MatchResult = MatchResult; bool found = mcs.checkIfMatchAndAppend(seed); if(!found) { #ifdef EXCLUDE_WRONG_COMPOSITION // if seed does not matched it is possible to exclude this mismatched combination for performance improvement failedCombinations.push_back(composition.getBitSet()); failedCombinationsMask &= composition.getBitSet(); #ifdef VERBOSE_STATISTICS_ON ++mcs.VerboseStatistics.WrongCompositionDetected; #endif #endif } } } } GrowingStage = NotSet; //finished } void Seed::computeRemainingSize(const ROMol& qmol) { RemainingBonds = RemainingAtoms = 0; std::vector end_atom_stack; std::vector visitedBonds = ExcludedBonds; std::vector visitedAtoms(qmol.getNumAtoms()); for(size_t i = 0; i < visitedAtoms.size(); i++) visitedAtoms[i] = false; for(std::vector::const_iterator it = MoleculeFragment.AtomsIdx.begin(); it != MoleculeFragment.AtomsIdx.end(); it++) visitedAtoms[*it] = true; // SDF all paths // 1. direct neighbours for(unsigned seedAtomIdx = LastAddedAtomsBeginIdx; seedAtomIdx < getNumAtoms(); seedAtomIdx++) { // just now added new border vertices (candidates for future growing) const Atom* atom = MoleculeFragment.Atoms[seedAtomIdx]; ROMol::OEDGE_ITER beg,end; for(boost::tie(beg,end) = qmol.getAtomBonds(atom); beg!=end; beg++) { // all bonds from MoleculeFragment.Atoms[srcAtomIdx] const Bond& bond = *(qmol[*beg]); if( ! visitedBonds[bond.getIdx()]) { ++RemainingBonds; visitedBonds[bond.getIdx()] = true; unsigned end_atom_idx = (MoleculeFragment.AtomsIdx[seedAtomIdx] == bond.getBeginAtomIdx()) ? bond.getEndAtomIdx() : bond.getBeginAtomIdx(); if( ! visitedAtoms[end_atom_idx]) { // check RING/CYCLE ++RemainingAtoms; visitedAtoms[end_atom_idx] = true; end_atom_stack.push_back(end_atom_idx); } } } } // 2. go deep while(!end_atom_stack.empty()) { unsigned ai = end_atom_stack.back(); end_atom_stack.pop_back(); const Atom* atom = qmol.getAtomWithIdx(ai); ROMol::OEDGE_ITER beg,end; for(boost::tie(beg,end) = qmol.getAtomBonds(atom); beg!=end; beg++) { // all bonds from end_atom const Bond& bond = *(qmol[*beg]); if( ! visitedBonds[bond.getIdx()]) { ++RemainingBonds; visitedBonds[bond.getIdx()] = true; unsigned end_atom_idx = (ai == bond.getBeginAtomIdx()) ? bond.getEndAtomIdx() : bond.getBeginAtomIdx(); if( ! visitedAtoms[end_atom_idx]) { // check RING/CYCLE ++RemainingAtoms; visitedAtoms[end_atom_idx] = true; end_atom_stack.push_back(end_atom_idx); } } } } } } }