// $Id$ // // Copyright (C) 2003-2009 Greg Landrum and Rational Discovery LLC // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include "SubgraphUtils.h" #include "Subgraphs.h" #include #include #include #include #include #include #include #include #include namespace RDKit { namespace Subgraphs { ROMol *pathToSubmol(const ROMol &mol, const PATH_TYPE &path, bool useQuery) { INT_MAP_INT aIdxMap; return pathToSubmol(mol, path, useQuery, aIdxMap); } ROMol *pathToSubmol(const ROMol &mol, const PATH_TYPE &path, bool useQuery, INT_MAP_INT &atomIdxMap) { RWMol *subMol=new RWMol(); PATH_TYPE::const_iterator pathIter; //std::map atomIdxMap; atomIdxMap.clear(); if (useQuery) { // have to do this in two different blocks because of issues with variable scopes. for(pathIter=path.begin(); pathIter!=path.end(); ++pathIter){ QueryBond *bond; bond = new QueryBond(*(mol.getBondWithIdx(*pathIter))); int begIdx=bond->getBeginAtomIdx(); int endIdx=bond->getEndAtomIdx(); if(atomIdxMap.find(begIdx)==atomIdxMap.end()){ QueryAtom *atom = new QueryAtom(*(mol.getAtomWithIdx(begIdx))); int newAtomIdx=subMol->addAtom(atom,false,true); atomIdxMap[begIdx] = newAtomIdx; } begIdx = atomIdxMap.find(begIdx)->second; if(atomIdxMap.find(endIdx)==atomIdxMap.end()){ QueryAtom *atom = new QueryAtom(*(mol.getAtomWithIdx(endIdx))); int newAtomIdx=subMol->addAtom(atom,false,true); atomIdxMap[endIdx] = newAtomIdx; } endIdx = atomIdxMap.find(endIdx)->second; bond->setOwningMol(subMol); bond->setBeginAtomIdx(begIdx); bond->setEndAtomIdx(endIdx); subMol->addBond(bond,true); } } else { for(pathIter=path.begin(); pathIter!=path.end(); ++pathIter){ Bond *bond; bond=mol.getBondWithIdx(*pathIter)->copy(); int begIdx=bond->getBeginAtomIdx(); int endIdx=bond->getEndAtomIdx(); if(atomIdxMap.find(begIdx)==atomIdxMap.end()){ Atom *atom = mol.getAtomWithIdx(begIdx)->copy(); int newAtomIdx=subMol->addAtom(atom,false,true); atomIdxMap[begIdx] = newAtomIdx; } begIdx = atomIdxMap.find(begIdx)->second; if(atomIdxMap.find(endIdx)==atomIdxMap.end()){ Atom *atom = mol.getAtomWithIdx(endIdx)->copy(); int newAtomIdx=subMol->addAtom(atom,false,true); atomIdxMap[endIdx] = newAtomIdx; } endIdx = atomIdxMap.find(endIdx)->second; bond->setOwningMol(subMol); bond->setBeginAtomIdx(begIdx); bond->setEndAtomIdx(endIdx); subMol->addBond(bond,true); } } return subMol; } PATH_TYPE bondListFromAtomList(const ROMol &mol, const PATH_TYPE &atomIds) { PATH_TYPE bids; unsigned int natms = atomIds.size(); if (natms <= 1) { return bids; //FIX: should probably throw an exception } for (unsigned int i = 0; i < natms; i++) { for (unsigned int j = i+1; j < natms; j++) { const Bond *bnd = mol.getBondBetweenAtoms(atomIds[i], atomIds[j]); if (bnd) { int bid = bnd->getIdx(); bids.push_back(bid); } } } return bids; } using boost::uint32_t; using boost::int32_t; DiscrimTuple calcPathDiscriminators(const ROMol &mol, const PATH_TYPE &path, bool useBO, std::vector *extraInvars) { if(extraInvars) CHECK_INVARIANT(extraInvars->size()==mol.getNumAtoms(),"bad extra invars"); DiscrimTuple res; // Start by collecting the atoms in the path and their degrees std::vector atomsUsed(mol.getNumAtoms(),-1); // map from atom index->path index std::vector atoms; // to contain the atoms in the path std::vector pathDegrees; // degrees of each atom *in the path* for(PATH_TYPE::const_iterator pathIter=path.begin(); pathIter!=path.end(); ++pathIter){ const Bond *bond=mol.getBondWithIdx(*pathIter); if(atomsUsed[bond->getBeginAtomIdx()]<0){ atomsUsed[bond->getBeginAtomIdx()]=atoms.size(); atoms.push_back(bond->getBeginAtom()); pathDegrees.push_back(1); } else { pathDegrees[atomsUsed[bond->getBeginAtomIdx()]]+=1; } if(atomsUsed[bond->getEndAtomIdx()]<0){ atomsUsed[bond->getEndAtomIdx()]=atoms.size(); atoms.push_back(bond->getEndAtom()); pathDegrees.push_back(1); } else { pathDegrees[atomsUsed[bond->getEndAtomIdx()]]+=1; } } // Calculate the atomic invariants unsigned int nAtoms=atoms.size(); std::vector invars(nAtoms); for(unsigned int i=0;igetAtomicNum(); gboost::hash_combine(invar,pathDegrees[i]); gboost::hash_combine(invar,atom->getFormalCharge()); int deltaMass = static_cast(atom->getMass() - PeriodicTable::getTable()->getAtomicWeight(atom->getAtomicNum())); gboost::hash_combine(invar,deltaMass); if(atom->getIsAromatic()){ gboost::hash_combine(invar,1); } if(extraInvars){ gboost::hash_combine(invar,(*extraInvars)[atom->getIdx()]); } invars[i] = invar; } // now do the Morgan iterations: // the most number of cycles we need for the atoms on the edges // to feel each other is pathSize/2 // EFF: it may be worth revisiting this at some point to see // if the iteration count can be even smaller (and if it // makes a difference in runtime) unsigned int nCycles=path.size()/2+1; gboost::hash > vectHasher; for(unsigned int cycle=0;cycle > locInvars(nAtoms); for(PATH_TYPE::const_iterator pathIter=path.begin(); pathIter!=path.end(); ++pathIter){ const Bond *bond=mol.getBondWithIdx(*pathIter); uint32_t v1=invars[atomsUsed[bond->getBeginAtomIdx()]]; uint32_t v2=invars[atomsUsed[bond->getEndAtomIdx()]]; if(useBO){ gboost::hash_combine(v1,static_cast(bond->getBondType())); gboost::hash_combine(v2,static_cast(bond->getBondType())); } locInvars[atomsUsed[bond->getBeginAtomIdx()]].push_back(v2); locInvars[atomsUsed[bond->getEndAtomIdx()]].push_back(v1); } // we need to sort by the neighbor invariants to be order // independent: for(unsigned int i=0;i discrimsSeen; for(PATH_LIST::const_iterator path=allPaths.begin(); path!=allPaths.end();++path){ DiscrimTuple discrims = calcPathDiscriminators(mol,*path,useBO); if(std::find(discrimsSeen.begin(),discrimsSeen.end(),discrims)==discrimsSeen.end()){ discrimsSeen.push_back(discrims); res.push_back(*path); } } return res; } } // end of namespace Subgraphs } // end of namespace RDKit