// $Id$ // // Copyright (C) 2003-2013 Greg Landrum and Rational Discovery LLC // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include "SubgraphUtils.h" #include "Subgraphs.h" #include #include #include #include #include #include #include #include #include namespace RDKit { namespace Subgraphs { ROMol *pathToSubmol(const ROMol &mol, const PATH_TYPE &path, bool useQuery) { INT_MAP_INT aIdxMap; return pathToSubmol(mol, path, useQuery, aIdxMap); } ROMol *pathToSubmol(const ROMol &mol, const PATH_TYPE &path, bool useQuery, INT_MAP_INT &atomIdxMap) { SubsetOptions options; options.copyAsQuery = useQuery; options.copyCoordinates = true; options.method = SubsetMethod::BONDS; std::vector upath{path.begin(), path.end()}; SubsetInfo subsetInfo; auto res = copyMolSubset(mol, upath, subsetInfo, options); atomIdxMap.clear(); for (auto mapping : subsetInfo.atomMapping) { atomIdxMap[mapping.first] = mapping.second; } return res.release(); } PATH_TYPE bondListFromAtomList(const ROMol &mol, const PATH_TYPE &atomIds) { PATH_TYPE bids; unsigned int natms = atomIds.size(); if (natms <= 1) { return bids; // FIX: should probably throw an exception } for (unsigned int i = 0; i < natms; i++) { for (unsigned int j = i + 1; j < natms; j++) { const Bond *bnd = mol.getBondBetweenAtoms(atomIds[i], atomIds[j]); if (bnd) { int bid = bnd->getIdx(); bids.push_back(bid); } } } return bids; } using std::int32_t; using std::uint32_t; DiscrimTuple calcPathDiscriminators(const ROMol &mol, const PATH_TYPE &path, bool useBO, std::vector *extraInvars) { if (extraInvars) { CHECK_INVARIANT(extraInvars->size() == mol.getNumAtoms(), "bad extra invars"); } // Start by collecting the atoms in the path and their degrees std::vector atomsUsed(mol.getNumAtoms(), -1); // map from atom index->path index std::vector atoms; // to contain the atoms in the path std::vector pathDegrees; // degrees of each atom *in the path* for (int pathIter : path) { const Bond *bond = mol.getBondWithIdx(pathIter); if (atomsUsed[bond->getBeginAtomIdx()] < 0) { atomsUsed[bond->getBeginAtomIdx()] = static_cast(atoms.size()); atoms.push_back(bond->getBeginAtom()); pathDegrees.push_back(1); } else { pathDegrees[atomsUsed[bond->getBeginAtomIdx()]] += 1; } if (atomsUsed[bond->getEndAtomIdx()] < 0) { atomsUsed[bond->getEndAtomIdx()] = static_cast(atoms.size()); atoms.push_back(bond->getEndAtom()); pathDegrees.push_back(1); } else { pathDegrees[atomsUsed[bond->getEndAtomIdx()]] += 1; } } // Calculate the atomic invariants unsigned int nAtoms = atoms.size(); std::vector invars(nAtoms); for (unsigned int i = 0; i < nAtoms; ++i) { const Atom *atom = atoms[i]; uint32_t invar = atom->getAtomicNum(); gboost::hash_combine(invar, pathDegrees[i]); gboost::hash_combine(invar, atom->getFormalCharge()); int deltaMass = static_cast( atom->getMass() - PeriodicTable::getTable()->getAtomicWeight(atom->getAtomicNum())); gboost::hash_combine(invar, deltaMass); if (atom->getIsAromatic()) { gboost::hash_combine(invar, 1); } if (extraInvars) { gboost::hash_combine(invar, (*extraInvars)[atom->getIdx()]); } invars[i] = invar; } // now do the Morgan iterations: // the most number of cycles we need for the atoms on the edges // to feel each other is pathSize/2 // EFF: it may be worth revisiting this at some point to see // if the iteration count can be even smaller (and if it // makes a difference in runtime) unsigned int nCycles = path.size() / 2 + 1; gboost::hash> vectHasher; for (unsigned int cycle = 0; cycle < nCycles; ++cycle) { // let each atom feel it's neighbors: std::vector> locInvars(nAtoms); for (int pathIter : path) { const Bond *bond = mol.getBondWithIdx(pathIter); uint32_t v1 = invars[atomsUsed[bond->getBeginAtomIdx()]]; uint32_t v2 = invars[atomsUsed[bond->getEndAtomIdx()]]; if (useBO) { gboost::hash_combine(v1, static_cast(bond->getBondType())); gboost::hash_combine(v2, static_cast(bond->getBondType())); } locInvars[atomsUsed[bond->getBeginAtomIdx()]].push_back(v2); locInvars[atomsUsed[bond->getEndAtomIdx()]].push_back(v1); } // we need to sort by the neighbor invariants to be order // independent: for (unsigned int i = 0; i < nAtoms; ++i) { std::sort(locInvars[i].begin(), locInvars[i].end()); invars[i] = vectHasher(locInvars[i]); } } // again, a sort for order independence: std::sort(invars.begin(), invars.end()); uint32_t pathInvar = vectHasher(invars); // also include the path size (bond count) and number of atoms // in the discriminator return std::make_tuple(pathInvar, path.size(), nAtoms); } // // This is intended for use on either subgraphs or paths. // The entries in PATH_LIST should refer to bonds though (not // atoms) // PATH_LIST uniquifyPaths(const ROMol &mol, const PATH_LIST &allPaths, bool useBO) { PATH_LIST res; std::vector discrimsSeen; for (const auto &allPath : allPaths) { DiscrimTuple discrims = calcPathDiscriminators(mol, allPath, useBO); if (std::find(discrimsSeen.begin(), discrimsSeen.end(), discrims) == discrimsSeen.end()) { discrimsSeen.push_back(discrims); res.push_back(allPath); } } return res; } } // end of namespace Subgraphs } // end of namespace RDKit