mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
# Conflicts: # .travis.yml # Code/GraphMol/FileParsers/MolFileParser.cpp # Code/GraphMol/FileParsers/MolFileStereochem.cpp # Code/GraphMol/ForceFieldHelpers/UFF/testUFFHelpers.cpp # Code/GraphMol/MolAlign/testMolAlign.cpp # Code/GraphMol/MolDraw2D/MolDraw2D.cpp # Code/GraphMol/MolDraw2D/Wrap/rdMolDraw2D.cpp # Code/GraphMol/QueryOps.cpp # Code/GraphMol/ROMol.cpp # Code/GraphMol/SmilesParse/test.cpp # Code/GraphMol/Trajectory/Trajectory.cpp # Code/GraphMol/Wrap/Atom.cpp # Code/GraphMol/Wrap/Bond.cpp # Code/GraphMol/new_canon.cpp # Code/RDGeneral/testDict.cpp # Code/SimDivPickers/Wrap/MaxMinPicker.cpp
1291 lines
46 KiB
C++
1291 lines
46 KiB
C++
// $Id$
|
|
//
|
|
// Copyright (C) 2003-2013 Greg Landrum and Rational Discovery LLC
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include "RDKitBase.h"
|
|
#include <GraphMol/Rings.h>
|
|
#include <RDGeneral/RDLog.h>
|
|
#include <RDGeneral/Exceptions.h>
|
|
|
|
#include <RDGeneral/utils.h>
|
|
#include <vector>
|
|
#include <set>
|
|
#include <algorithm>
|
|
#include <boost/dynamic_bitset.hpp>
|
|
#include <boost/cstdint.hpp>
|
|
#include <RDGeneral/hash/hash.hpp>
|
|
|
|
typedef std::set<boost::uint32_t> RINGINVAR_SET;
|
|
typedef RINGINVAR_SET::const_iterator RINGINVAR_SET_CI;
|
|
typedef std::vector<boost::uint32_t> RINGINVAR_VECT;
|
|
|
|
namespace RingUtils {
|
|
const size_t MAX_BFSQ_SIZE = 200000; // arbitrary huge value
|
|
|
|
using namespace RDKit;
|
|
|
|
boost::uint32_t computeRingInvariant(INT_VECT ring, unsigned int nAtoms) {
|
|
RDUNUSED_PARAM(nAtoms);
|
|
std::sort(ring.begin(), ring.end());
|
|
boost::uint32_t res = gboost::hash_range(ring.begin(), ring.end());
|
|
return res;
|
|
}
|
|
|
|
void convertToBonds(const VECT_INT_VECT &res, VECT_INT_VECT &brings,
|
|
const ROMol &mol) {
|
|
for (const auto &ring : res) {
|
|
unsigned int rsiz = rdcast<unsigned int>(ring.size());
|
|
INT_VECT bring(rsiz);
|
|
for (unsigned int i = 0; i < (rsiz - 1); i++) {
|
|
const Bond *bnd = mol.getBondBetweenAtoms(ring[i], ring[i + 1]);
|
|
if (!bnd) throw ValueErrorException("expected bond not found");
|
|
bring[i] = bnd->getIdx();
|
|
}
|
|
// bond from last to first atom
|
|
const Bond *bnd = mol.getBondBetweenAtoms(ring[rsiz - 1], ring[0]);
|
|
if (!bnd) throw ValueErrorException("expected bond not found");
|
|
|
|
bring[rsiz - 1] = bnd->getIdx();
|
|
brings.push_back(bring);
|
|
}
|
|
}
|
|
|
|
} // end of namespace RingUtils
|
|
|
|
namespace FindRings {
|
|
using namespace RDKit;
|
|
int smallestRingsBfs(const ROMol &mol, int root, VECT_INT_VECT &rings,
|
|
boost::dynamic_bitset<> &activeBonds,
|
|
INT_VECT *forbidden = nullptr);
|
|
void trimBonds(unsigned int cand, const ROMol &tMol, INT_SET &changed,
|
|
INT_VECT &atomDegrees, boost::dynamic_bitset<> &activeBonds);
|
|
void storeRingInfo(const ROMol &mol, const INT_VECT &ring) {
|
|
INT_VECT bondIndices;
|
|
INT_VECT_CI lastRai;
|
|
for (auto rai = ring.begin(); rai != ring.end(); rai++) {
|
|
if (rai != ring.begin()) {
|
|
const Bond *bnd = mol.getBondBetweenAtoms(*rai, *lastRai);
|
|
if (!bnd) throw ValueErrorException("expected bond not found");
|
|
bondIndices.push_back(bnd->getIdx());
|
|
}
|
|
lastRai = rai;
|
|
}
|
|
const Bond *bnd = mol.getBondBetweenAtoms(*lastRai, *(ring.begin()));
|
|
if (!bnd) throw ValueErrorException("expected bond not found");
|
|
bondIndices.push_back(bnd->getIdx());
|
|
mol.getRingInfo()->addRing(ring, bondIndices);
|
|
}
|
|
|
|
void storeRingsInfo(const ROMol &mol, const VECT_INT_VECT &rings) {
|
|
for (const auto &ring : rings) {
|
|
storeRingInfo(mol, ring);
|
|
}
|
|
}
|
|
|
|
void markUselessD2s(unsigned int root, const ROMol &tMol,
|
|
boost::dynamic_bitset<> &forb, const INT_VECT &atomDegrees,
|
|
const boost::dynamic_bitset<> &activeBonds) {
|
|
// recursive function to mark any degree 2 nodes that are already represnted
|
|
// by root for the purpose of finding smallest rings.
|
|
ROMol::OEDGE_ITER beg, end;
|
|
boost::tie(beg, end) = tMol.getAtomBonds(tMol.getAtomWithIdx(root));
|
|
while (beg != end) {
|
|
BOND_SPTR bond = tMol[*beg];
|
|
++beg;
|
|
if (!activeBonds[bond->getIdx()]) continue;
|
|
unsigned int oIdx = bond->getOtherAtomIdx(root);
|
|
if (!forb[oIdx] && atomDegrees[oIdx] == 2) {
|
|
forb[oIdx] = 1;
|
|
markUselessD2s(oIdx, tMol, forb, atomDegrees, activeBonds);
|
|
}
|
|
}
|
|
}
|
|
|
|
void pickD2Nodes(const ROMol &tMol, INT_VECT &d2nodes, const INT_VECT &currFrag,
|
|
const INT_VECT &atomDegrees,
|
|
const boost::dynamic_bitset<> &activeBonds) {
|
|
d2nodes.resize(0);
|
|
|
|
// forb contains all d2 nodes, not just the ones we want to keep
|
|
boost::dynamic_bitset<> forb(tMol.getNumAtoms());
|
|
while (1) {
|
|
int root = -1;
|
|
for (int axci : currFrag) {
|
|
if (atomDegrees[axci] == 2 && !forb[axci]) {
|
|
root = axci;
|
|
d2nodes.push_back(axci);
|
|
forb[axci] = 1;
|
|
break;
|
|
}
|
|
}
|
|
if (root == -1) {
|
|
break;
|
|
} else {
|
|
markUselessD2s(root, tMol, forb, atomDegrees, activeBonds);
|
|
}
|
|
}
|
|
}
|
|
|
|
#if 0
|
|
typedef std::map<double, INT_VECT> DOUBLE_INT_VECT_MAP;
|
|
typedef DOUBLE_INT_VECT_MAP::iterator DOUBLE_INT_VECT_MAP_I;
|
|
typedef DOUBLE_INT_VECT_MAP::const_iterator DOUBLE_INT_VECT_MAP_CI;
|
|
#else
|
|
typedef std::map<boost::uint32_t, INT_VECT> RINGINVAR_INT_VECT_MAP;
|
|
typedef RINGINVAR_INT_VECT_MAP::iterator RINGINVAR_INT_VECT_MAP_I;
|
|
typedef RINGINVAR_INT_VECT_MAP::const_iterator RINGINVAR_INT_VECT_MAP_CI;
|
|
#endif
|
|
|
|
void findSSSRforDupCands(const ROMol &mol, VECT_INT_VECT &res,
|
|
RINGINVAR_SET &invars, const INT_INT_VECT_MAP dupMap,
|
|
const RINGINVAR_INT_VECT_MAP &dupD2Cands,
|
|
INT_VECT &atomDegrees,
|
|
boost::dynamic_bitset<> activeBonds) {
|
|
for (const auto &dupD2Cand : dupD2Cands) {
|
|
const INT_VECT &dupCands = dupD2Cand.second;
|
|
if (dupCands.size() > 1) {
|
|
// we have duplicate candidates.
|
|
VECT_INT_VECT nrings;
|
|
unsigned int minSiz = static_cast<unsigned int>(MAX_INT);
|
|
for (int dupCand : dupCands) {
|
|
// now break bonds for all the d2 nodes for that give the same rings as
|
|
// with (*dupi) and recompute smallest ring with (*dupi)
|
|
INT_VECT atomDegreesCopy = atomDegrees;
|
|
boost::dynamic_bitset<> activeBondsCopy = activeBonds;
|
|
INT_SET changed;
|
|
auto dmci = dupMap.find(dupCand);
|
|
for (int dni : dmci->second) {
|
|
trimBonds(dni, mol, changed, atomDegreesCopy, activeBondsCopy);
|
|
}
|
|
|
|
// now find the smallest ring/s around (*dupi)
|
|
VECT_INT_VECT srings;
|
|
smallestRingsBfs(mol, dupCand, srings, activeBondsCopy);
|
|
for (VECT_INT_VECT_CI sri = srings.begin(); sri != srings.end();
|
|
++sri) {
|
|
if (sri->size() < minSiz) {
|
|
minSiz = rdcast<unsigned int>(sri->size());
|
|
}
|
|
nrings.push_back((*sri));
|
|
}
|
|
}
|
|
|
|
for (VECT_INT_VECT_CI nri = nrings.begin(); nri != nrings.end(); ++nri) {
|
|
if (nri->size() == minSiz) {
|
|
boost::uint32_t invr =
|
|
RingUtils::computeRingInvariant(*nri, mol.getNumAtoms());
|
|
if (invars.find(invr) == invars.end()) {
|
|
res.push_back((*nri));
|
|
invars.insert(invr);
|
|
}
|
|
}
|
|
} // end of loop over new rings found
|
|
} // end if (dupCand.size() > 1)
|
|
} // end of loop over all set of duplicate candidates
|
|
}
|
|
|
|
struct compRingSize : public std::binary_function<INT_VECT, INT_VECT, bool> {
|
|
bool operator()(const INT_VECT &v1, const INT_VECT &v2) const {
|
|
return v1.size() < v2.size();
|
|
}
|
|
};
|
|
|
|
void removeExtraRings(VECT_INT_VECT &res, unsigned int nexpt,
|
|
const ROMol &mol) {
|
|
RDUNUSED_PARAM(nexpt);
|
|
// sort on size
|
|
std::sort(res.begin(), res.end(), compRingSize());
|
|
|
|
#if 0
|
|
std::cerr<<"\n\nSORTED\n";
|
|
for(VECT_INT_VECT::const_iterator iter=res.begin();
|
|
iter!=res.end();++iter){
|
|
std::cerr<<iter-res.begin()<<": ";
|
|
std::copy(iter->begin(),iter->end(),std::ostream_iterator<int>(std::cerr," "));
|
|
std::cerr<<std::endl;
|
|
}
|
|
#endif
|
|
|
|
// change the rings from atom IDs to bondIds
|
|
VECT_INT_VECT brings;
|
|
RingUtils::convertToBonds(res, brings, mol);
|
|
std::vector<boost::dynamic_bitset<>> bitBrings;
|
|
bitBrings.reserve(brings.size());
|
|
for (VECT_INT_VECT_CI vivi = brings.begin(); vivi != brings.end(); ++vivi) {
|
|
boost::dynamic_bitset<> lring(mol.getNumBonds());
|
|
for (int ivi : *vivi) {
|
|
lring.set(ivi);
|
|
}
|
|
bitBrings.push_back(lring);
|
|
}
|
|
|
|
boost::dynamic_bitset<> availRings(res.size());
|
|
availRings.set();
|
|
boost::dynamic_bitset<> keepRings(res.size());
|
|
boost::dynamic_bitset<> munion(mol.getNumBonds());
|
|
|
|
for (unsigned int i = 0; i < res.size(); ++i) {
|
|
// skip this ring if we've already seen all of its bonds
|
|
if (bitBrings[i].is_subset_of(munion)) availRings.set(i, 0);
|
|
if (!availRings[i]) continue;
|
|
|
|
munion |= bitBrings[i];
|
|
keepRings.set(i);
|
|
|
|
// from this ring we consider all others that are still available and the
|
|
// same size
|
|
boost::dynamic_bitset<> consider(res.size());
|
|
for (unsigned int j = i + 1; j < res.size(); ++j) {
|
|
// std::cerr<<" "<<j<<" "<<brings[j].size()<<" -
|
|
// "<<brings[i].size()<<" >"<<availRings[j]<<std::endl;
|
|
if (availRings[j] && (brings[j].size() == brings[i].size())) {
|
|
consider.set(j);
|
|
}
|
|
}
|
|
|
|
// std::cerr<<">>> "<<i<<" "<<consider.count()<<std::endl;
|
|
while (consider.count()) {
|
|
unsigned int bestJ = i + 1;
|
|
int bestOverlap = -1;
|
|
// loop over the available other rings in consideration and pick the one
|
|
// that has the most overlapping bonds with what we've done so far.
|
|
// this is the fix to github #526
|
|
for (unsigned int j = i + 1;
|
|
j < res.size() && bitBrings[j].count() == bitBrings[i].count();
|
|
++j) {
|
|
if (!consider[j] || !availRings[j]) continue;
|
|
int overlap = rdcast<int>((bitBrings[j] & munion).count());
|
|
if (overlap > bestOverlap) {
|
|
bestOverlap = overlap;
|
|
bestJ = j;
|
|
}
|
|
}
|
|
consider.set(bestJ, 0);
|
|
if (bitBrings[bestJ].is_subset_of(munion)) {
|
|
availRings.set(bestJ, 0);
|
|
} else {
|
|
keepRings.set(bestJ);
|
|
availRings.set(bestJ, 0);
|
|
munion |= bitBrings[bestJ];
|
|
}
|
|
}
|
|
}
|
|
// remove the extra rings from res and store them on the molecule in case we
|
|
// wish
|
|
// symmetrize the SSSRs later
|
|
VECT_INT_VECT extras;
|
|
VECT_INT_VECT temp = res;
|
|
res.resize(0);
|
|
for (unsigned int i = 0; i < temp.size(); i++) {
|
|
if (keepRings[i]) {
|
|
res.push_back(temp[i]);
|
|
} else {
|
|
extras.push_back(temp[i]);
|
|
}
|
|
}
|
|
|
|
mol.setProp(common_properties::extraRings, extras, true);
|
|
}
|
|
|
|
void findRingsD2nodes(const ROMol &tMol, VECT_INT_VECT &res,
|
|
RINGINVAR_SET &invars, const INT_VECT &d2nodes,
|
|
INT_VECT &atomDegrees,
|
|
boost::dynamic_bitset<> &activeBonds,
|
|
boost::dynamic_bitset<> &ringBonds,
|
|
boost::dynamic_bitset<> &ringAtoms) {
|
|
// place to record any duplicate rings discovered from the current d2 nodes
|
|
RINGINVAR_INT_VECT_MAP dupD2Cands;
|
|
int cand;
|
|
INT_VECT_CI d2i;
|
|
|
|
INT_INT_VECT_MAP dupMap;
|
|
// here is an example of molecule where the this scheme of finding other node
|
|
// that
|
|
// result in duplicates is necessary : C12=CON=C1C(C4)CC3CC2CC4C3
|
|
// It would help to draw this molecule, and number the atoms but here is what
|
|
// happen
|
|
// - there are 6 d2 node - 1, 6, 7, 9, 11, 13
|
|
// - both 6 and 7 find the same ring (5,6,12,13,8,7) but we do not find the 7
|
|
// membered ring
|
|
// (5,7,8,9,10,0,4)
|
|
// - similarly 9 and 11 find a duplicate ring (9,10,11,12,13)
|
|
// - when we move to 13 both the above duplicate rings are found
|
|
// - so we will keep track for each d2 all the other node that resulted in
|
|
// duplicate rings
|
|
// - the bonds to these nodes will be broken and we attempt to find a new
|
|
// ring, for e.g. by breaking
|
|
// bonds to 7 and 13, we will find a 7 membered ring with 6 (this is done
|
|
// in findSSSRforDupCands)
|
|
std::map<int, RINGINVAR_VECT> nodeInvars;
|
|
std::map<int, RINGINVAR_VECT>::const_iterator nici;
|
|
DOUBLE_VECT_CI ici;
|
|
for (d2i = d2nodes.begin(); d2i != d2nodes.end(); ++d2i) {
|
|
cand = (*d2i);
|
|
// std::cerr<<" smallest rings bfs: "<<cand<<std::endl;
|
|
VECT_INT_VECT srings;
|
|
// we have to find all non duplicate possible smallest rings for each node
|
|
smallestRingsBfs(tMol, cand, srings, activeBonds);
|
|
for (VECT_INT_VECT_CI sri = srings.begin(); sri != srings.end(); ++sri) {
|
|
const INT_VECT &nring = (*sri);
|
|
boost::uint32_t invr =
|
|
RingUtils::computeRingInvariant(nring, tMol.getNumAtoms());
|
|
if (invars.find(invr) == invars.end()) {
|
|
res.push_back(nring);
|
|
invars.insert(invr);
|
|
for (unsigned int i = 0; i < nring.size() - 1; ++i) {
|
|
unsigned int bIdx =
|
|
tMol.getBondBetweenAtoms(nring[i], nring[i + 1])->getIdx();
|
|
ringBonds.set(bIdx);
|
|
ringAtoms.set(nring[i]);
|
|
}
|
|
ringBonds.set(
|
|
tMol.getBondBetweenAtoms(nring[0], nring[nring.size() - 1])
|
|
->getIdx());
|
|
ringAtoms.set(nring[nring.size() - 1]);
|
|
#if 0
|
|
std::cerr<<" res: "<<invr<<" | ";
|
|
std::copy(nring.begin(),nring.end(),std::ostream_iterator<int>(std::cerr," "));
|
|
std::cerr<<std::endl;
|
|
#endif
|
|
}
|
|
|
|
nodeInvars[cand].push_back(invr);
|
|
// check if this ring is duplicate with something else
|
|
for (nici = nodeInvars.begin(); nici != nodeInvars.end(); nici++) {
|
|
if (nici->first != cand) {
|
|
if (std::find(nici->second.begin(), nici->second.end(), invr) !=
|
|
nici->second.end()) {
|
|
// ok we discovered this ring via another node before
|
|
// add that node as duplicate to this node and and vice versa
|
|
dupMap[cand].push_back(nici->first);
|
|
dupMap[nici->first].push_back(cand);
|
|
}
|
|
}
|
|
}
|
|
dupD2Cands[invr].push_back(cand);
|
|
}
|
|
|
|
// We don't want to trim the bonds connecting cand here - this can disrupt
|
|
// a second small ring. Here is an example SC(C3C1CC(C3)CC(C2S)(O)C1)2S
|
|
// by trimming the bond connecting to atom #4 , we loose the smallest ring
|
|
// that
|
|
// contains atom #7. Issue 134
|
|
// MolOps::trimBonds(cand, tMol, changed);
|
|
}
|
|
|
|
// now deal with any d2 nodes that resulted in duplicate rings before trimming
|
|
// their bonds.
|
|
// it is possible that one of these nodes is involved a different small ring,
|
|
// that is not found
|
|
// because the first nodes has not be trimmed. Here is an example molecule:
|
|
// CC1=CC=C(C=C1)S(=O)(=O)O[CH]2[CH]3CO[CH](O3)[CH]4OC(C)(C)O[CH]24
|
|
findSSSRforDupCands(tMol, res, invars, dupMap, dupD2Cands, atomDegrees,
|
|
activeBonds);
|
|
}
|
|
|
|
void findRingsD3Node(const ROMol &tMol, VECT_INT_VECT &res,
|
|
RINGINVAR_SET &invars, int cand, INT_VECT &atomDegrees,
|
|
boost::dynamic_bitset<> activeBonds) {
|
|
RDUNUSED_PARAM(atomDegrees);
|
|
// this is brutal - we have no degree 2 nodes - find the first possible degree
|
|
// 3 node
|
|
int nsmall;
|
|
|
|
// We've got a degree three node. The goal of what follows is to find the
|
|
// three rings in which it's involved, push those onto our results, and
|
|
// then remove the node from consideration. This will create a bunch of
|
|
// degree
|
|
// 2 nodes, which we can then chew off the next time around the loop.
|
|
|
|
// this part is a bit different from the Figueras algorithm
|
|
// here we try to find all the rings the rings that have a potential for
|
|
// contributing to
|
|
// SSSR - i.e. we try to find 3 rings for this node.
|
|
// - each bond (that contributes to the degree 3 ) is allowed to participate
|
|
// in exactly
|
|
// two of these rings.
|
|
// - also any rings that are included in already found rings are ignored
|
|
|
|
// ASSUME: every connection from a degree three node at this point is a
|
|
// ring bond
|
|
// REVIEW: Is this valid?
|
|
|
|
// first find all smallest possible rings
|
|
VECT_INT_VECT srings;
|
|
nsmall = smallestRingsBfs(tMol, cand, srings, activeBonds);
|
|
|
|
for (VECT_INT_VECT_CI sri = srings.begin(); sri != srings.end(); ++sri) {
|
|
const INT_VECT &nring = (*sri);
|
|
boost::uint32_t invr =
|
|
RingUtils::computeRingInvariant(nring, tMol.getNumAtoms());
|
|
if (invars.find(invr) == invars.end()) {
|
|
res.push_back(nring);
|
|
invars.insert(invr);
|
|
}
|
|
}
|
|
|
|
// if already found >3 rings we are done with this degree 3 node
|
|
// if we found less than 3 we have to find other potential ring/s
|
|
if (nsmall < 3) {
|
|
int n1 = -1, n2 = -1, n3 = -1;
|
|
|
|
ROMol::OEDGE_ITER beg, end;
|
|
boost::tie(beg, end) = tMol.getAtomBonds(tMol.getAtomWithIdx(cand));
|
|
while (beg != end && !activeBonds[tMol[*beg]->getIdx()]) ++beg;
|
|
CHECK_INVARIANT(beg != end, "neighbor not found");
|
|
n1 = tMol[*beg]->getOtherAtomIdx(cand);
|
|
|
|
++beg;
|
|
while (beg != end && !activeBonds[tMol[*beg]->getIdx()]) ++beg;
|
|
CHECK_INVARIANT(beg != end, "neighbor not found");
|
|
n2 = tMol[*beg]->getOtherAtomIdx(cand);
|
|
|
|
++beg;
|
|
while (beg != end && !activeBonds[tMol[*beg]->getIdx()]) ++beg;
|
|
CHECK_INVARIANT(beg != end, "neighbor not found");
|
|
n3 = tMol[*beg]->getOtherAtomIdx(cand);
|
|
|
|
if (nsmall == 2) {
|
|
// we found two rings find the third one
|
|
// first find the neighbor that is common to the two ring we found so far
|
|
int f;
|
|
|
|
if ((std::find(srings[0].begin(), srings[0].end(), n1) !=
|
|
srings[0].end()) &&
|
|
(std::find(srings[1].begin(), srings[1].end(), n1) !=
|
|
srings[1].end())) {
|
|
f = n1;
|
|
} else if ((std::find(srings[0].begin(), srings[0].end(), n2) !=
|
|
srings[0].end()) &&
|
|
(std::find(srings[1].begin(), srings[1].end(), n2) !=
|
|
srings[1].end())) {
|
|
f = n2;
|
|
} else if ((std::find(srings[0].begin(), srings[0].end(), n3) !=
|
|
srings[0].end()) &&
|
|
(std::find(srings[1].begin(), srings[1].end(), n3) !=
|
|
srings[1].end())) {
|
|
f = n3;
|
|
}
|
|
|
|
// now find the smallest possible ring that does not contain f
|
|
VECT_INT_VECT trings;
|
|
INT_VECT forb;
|
|
forb.push_back(f);
|
|
smallestRingsBfs(tMol, cand, trings, activeBonds, &forb);
|
|
for (VECT_INT_VECT_CI sri = trings.begin(); sri != trings.end(); ++sri) {
|
|
const INT_VECT &nring = (*sri);
|
|
boost::uint32_t invr =
|
|
RingUtils::computeRingInvariant(nring, tMol.getNumAtoms());
|
|
|
|
if (invars.find(invr) == invars.end()) {
|
|
res.push_back(nring);
|
|
invars.insert(invr);
|
|
}
|
|
}
|
|
} // doing degree 3 node - end of 2 smallest rings found for cand
|
|
if (nsmall == 1) {
|
|
// we found 1 ring - we need to find two more that involve the 3rd
|
|
// neighbor
|
|
int f1, f2;
|
|
// Which of our three neighbors are in the small ring?
|
|
// these are f1 and f2
|
|
if (std::find(srings[0].begin(), srings[0].end(), n1) ==
|
|
srings[0].end()) {
|
|
f1 = n2, f2 = n3;
|
|
} else if (std::find(srings[0].begin(), srings[0].end(), n2) ==
|
|
srings[0].end()) {
|
|
f1 = n1;
|
|
f2 = n3;
|
|
} else if (std::find(srings[0].begin(), srings[0].end(), n3) ==
|
|
srings[0].end()) {
|
|
f1 = n1;
|
|
f2 = n2;
|
|
}
|
|
|
|
// now find two rings that include cand, one of these rings should include
|
|
// f1
|
|
// and the other should include f2
|
|
|
|
// first ring with f1 and no f2
|
|
VECT_INT_VECT trings;
|
|
INT_VECT forb;
|
|
forb.push_back(f2);
|
|
smallestRingsBfs(tMol, cand, trings, activeBonds, &forb);
|
|
for (VECT_INT_VECT_CI sri = trings.begin(); sri != trings.end(); ++sri) {
|
|
const INT_VECT &nring = (*sri);
|
|
boost::uint32_t invr =
|
|
RingUtils::computeRingInvariant(nring, tMol.getNumAtoms());
|
|
if (invars.find(invr) == invars.end()) {
|
|
res.push_back(nring);
|
|
invars.insert(invr);
|
|
}
|
|
}
|
|
|
|
// next the ring with f2 and no f1
|
|
trings.clear();
|
|
forb.clear();
|
|
forb.push_back(f1);
|
|
smallestRingsBfs(tMol, cand, trings, activeBonds, &forb);
|
|
for (VECT_INT_VECT_CI sri = trings.begin(); sri != trings.end(); ++sri) {
|
|
const INT_VECT &nring = (*sri);
|
|
boost::uint32_t invr =
|
|
RingUtils::computeRingInvariant(nring, tMol.getNumAtoms());
|
|
if (invars.find(invr) == invars.end()) {
|
|
res.push_back(nring);
|
|
invars.insert(invr);
|
|
}
|
|
}
|
|
} // doing node of degree 3 - end of found only 1 smallest ring
|
|
} // end of found less than 3 smallest ring for the degree 3 node
|
|
}
|
|
|
|
int greatestComFac(long curfac, long nfac) {
|
|
long small;
|
|
long large;
|
|
long rem;
|
|
|
|
// Determine which of the numbers is the larger, and which is the smaller
|
|
large = (curfac > nfac) ? curfac : nfac;
|
|
small = (curfac < nfac) ? curfac : nfac;
|
|
|
|
// Keep looping until no remainder, as this means it is a factor of both
|
|
while (small != 0) {
|
|
// Set the larger var to the smaller, and set the smaller to the remainder
|
|
// of (large / small)
|
|
rem = (large % small);
|
|
large = small;
|
|
small = rem;
|
|
}
|
|
|
|
// By here nLarge will hold the largest common factor, so just return it
|
|
return large;
|
|
}
|
|
|
|
/******************************************************************************
|
|
* SUMMARY:
|
|
* remove the bond in the molecule that connect to the spcified atom
|
|
*
|
|
* ARGUMENTS:
|
|
* cand - the node(atom) of interest
|
|
* tMol - molecule of interest
|
|
* changed - list of the atoms that are effected the bond removal
|
|
* this may be accumulated over multiple calls to trimBonds
|
|
* it basically forms a list of atom that need to be searched for
|
|
* the next round of pruning
|
|
*
|
|
******************************************************************************/
|
|
void trimBonds(unsigned int cand, const ROMol &tMol, INT_SET &changed,
|
|
INT_VECT &atomDegrees, boost::dynamic_bitset<> &activeBonds) {
|
|
ROMol::OEDGE_ITER beg, end;
|
|
boost::tie(beg, end) = tMol.getAtomBonds(tMol.getAtomWithIdx(cand));
|
|
while (beg != end) {
|
|
BOND_SPTR bond = tMol[*beg];
|
|
++beg;
|
|
if (!activeBonds[bond->getIdx()]) continue;
|
|
unsigned int oIdx = bond->getOtherAtomIdx(cand);
|
|
if (atomDegrees[oIdx] <= 2) changed.insert(oIdx);
|
|
activeBonds[bond->getIdx()] = 0;
|
|
atomDegrees[oIdx] -= 1;
|
|
atomDegrees[cand] -= 1;
|
|
}
|
|
}
|
|
|
|
/*******************************************************************************
|
|
* SUMMARY:
|
|
* this again is a modified version of the BFS algorihtm in Figueras paper to
|
|
*find
|
|
* the smallest ring with a specified root atom.
|
|
* JCICS, Vol. 30, No. 5, 1996, 986-991
|
|
* The follwing are changes from the original algorithm
|
|
* - find all smallest rings around a node not just one
|
|
* - once can provided a list of node IDs that should not be include in the
|
|
*discovered rings
|
|
*
|
|
* ARGUMENTS:
|
|
* mol - molecule of interest
|
|
* root - Atom ID of the node of interest
|
|
* rings - list of rings into which the results are entered
|
|
* forbidden - list of atoms ID that should be avoided
|
|
*
|
|
* RETURNS:
|
|
* number of smallest rings found
|
|
***********************************************************************************/
|
|
int smallestRingsBfs(const ROMol &mol, int root, VECT_INT_VECT &rings,
|
|
boost::dynamic_bitset<> &activeBonds,
|
|
INT_VECT *forbidden) {
|
|
// this function finds the smallest ring with the given root atom.
|
|
// if multiple smallest rings are found all of them are return
|
|
// if any atoms are specified in the forbidden list, those atoms are avoided.
|
|
|
|
// FIX: this should be number of atoms in the fragment (if it's required at
|
|
// all, see below)
|
|
const int WHITE = 0, GRAY = 1, BLACK = 2;
|
|
INT_VECT done(mol.getNumAtoms(), WHITE);
|
|
|
|
if (forbidden) {
|
|
for (INT_VECT_CI dci = forbidden->begin(); dci != forbidden->end(); dci++) {
|
|
done[*dci] = BLACK;
|
|
}
|
|
}
|
|
|
|
// it would be "nicer" to use a map for this, but that ends up being too slow:
|
|
VECT_INT_VECT atPaths(mol.getNumAtoms());
|
|
INT_VECT rpath(1, root);
|
|
atPaths[root] = rpath;
|
|
|
|
std::deque<int> bfsq;
|
|
bfsq.push_back(root);
|
|
int curr = -1;
|
|
unsigned int curSize = UINT_MAX;
|
|
while (bfsq.size() > 0) {
|
|
if (bfsq.size() >= RingUtils::MAX_BFSQ_SIZE) {
|
|
std::string msg =
|
|
"Maximum BFS search size exceeded.\nThis is likely due to a highly "
|
|
"symmetric fused ring system.";
|
|
BOOST_LOG(rdErrorLog) << msg << std::endl;
|
|
throw ValueErrorException(msg);
|
|
}
|
|
|
|
curr = bfsq.front();
|
|
bfsq.pop_front();
|
|
|
|
done[curr] = BLACK;
|
|
|
|
INT_VECT &cpath = atPaths[curr];
|
|
|
|
ROMol::OEDGE_ITER beg, end;
|
|
boost::tie(beg, end) = mol.getAtomBonds(mol.getAtomWithIdx(curr));
|
|
while (beg != end) {
|
|
BOND_SPTR bond = mol[*beg];
|
|
++beg;
|
|
if (!activeBonds[bond->getIdx()]) continue;
|
|
int nbrIdx = bond->getOtherAtomIdx(curr);
|
|
if ((std::find(cpath.begin(), cpath.end(), nbrIdx) == cpath.end()) &&
|
|
done[nbrIdx] != BLACK) {
|
|
// i.e. we are not at a node that is making up the current path
|
|
// and we are not a node that has been completely explored before
|
|
// (it has been a curr node before)
|
|
|
|
// FIX: can we avoid this find by coloring atoms gray when they go into
|
|
// the queue and just looking up the colors?
|
|
if (done[nbrIdx] == WHITE) {
|
|
// we have never been to this node before through via any path
|
|
atPaths[nbrIdx] = cpath;
|
|
atPaths[nbrIdx].push_back(nbrIdx);
|
|
done[nbrIdx] = GRAY;
|
|
bfsq.push_back(nbrIdx);
|
|
} // end of found a untouched node
|
|
else {
|
|
// we have been here via a different path
|
|
// there is a potential for ring closure here
|
|
INT_VECT npath = atPaths[nbrIdx];
|
|
// make sure that the intersections of cpath and npath give exactl one
|
|
// element and that should be the root element for correct ring
|
|
// closure
|
|
int id = -1;
|
|
unsigned int com = 0;
|
|
for (INT_VECT_CI ci = cpath.begin(); ci != cpath.end(); ++ci) {
|
|
if (std::find(npath.begin(), npath.end(), (*ci)) != npath.end()) {
|
|
com++;
|
|
id = (*ci);
|
|
if (id != root) break;
|
|
}
|
|
} // end of found stuff in common with neighbor
|
|
|
|
if (id == root) { // we found a ring
|
|
// make the ring
|
|
INT_VECT ring = cpath;
|
|
|
|
// remove the root node and attach the other half of the ring from
|
|
// npath
|
|
// reverse this piece so that the ring is traversed correctly
|
|
|
|
// FIX: we're probably assured that root is the first node, so we
|
|
// can
|
|
// just pop it from the front
|
|
npath.erase(std::remove(npath.begin(), npath.end(), root));
|
|
|
|
#ifndef WIN32
|
|
ring.insert(ring.end(), npath.rbegin(), npath.rend());
|
|
#else // I <heart> MSVC++ v6
|
|
std::reverse(npath.begin(), npath.end());
|
|
ring.insert(ring.end(), npath.begin(), npath.end());
|
|
#endif
|
|
if (ring.size() <= curSize) {
|
|
curSize = rdcast<unsigned int>(ring.size());
|
|
rings.push_back(ring);
|
|
} else {
|
|
// we are done with the smallest rings
|
|
return rdcast<unsigned int>(rings.size());
|
|
}
|
|
} // end of found a ring
|
|
} // end of we have seen this neighbor before
|
|
} // end of nbrIdx not part of current path and not a done atom
|
|
} // end of loop over neighbors of current atom
|
|
} // moving to the next node
|
|
return rdcast<unsigned int>(
|
|
rings.size()); // if we are here we should have found everything around
|
|
// the node
|
|
}
|
|
|
|
bool _atomSearchBFS(const ROMol &tMol, unsigned int startAtomIdx,
|
|
unsigned int endAtomIdx, boost::dynamic_bitset<> &ringAtoms,
|
|
INT_VECT &res, RINGINVAR_SET &invars) {
|
|
res.clear();
|
|
std::deque<INT_VECT> bfsq;
|
|
|
|
INT_VECT tv;
|
|
tv.push_back(startAtomIdx);
|
|
bfsq.push_back(tv);
|
|
while (!bfsq.empty()) {
|
|
if (bfsq.size() >= RingUtils::MAX_BFSQ_SIZE) {
|
|
std::string msg =
|
|
"Maximum BFS search size exceeded.\nThis is likely due to a highly "
|
|
"symmetric fused ring system.";
|
|
BOOST_LOG(rdErrorLog) << msg << std::endl;
|
|
throw ValueErrorException(msg);
|
|
}
|
|
tv = bfsq.front();
|
|
bfsq.pop_front();
|
|
|
|
unsigned int currAtomIdx = tv.back();
|
|
ROMol::ADJ_ITER nbrIdx, endNbrs;
|
|
boost::tie(nbrIdx, endNbrs) =
|
|
tMol.getAtomNeighbors(tMol.getAtomWithIdx(currAtomIdx));
|
|
while (nbrIdx != endNbrs) {
|
|
if (*nbrIdx == endAtomIdx) {
|
|
if (currAtomIdx != startAtomIdx) {
|
|
INT_VECT nv(tv);
|
|
|
|
nv.push_back(rdcast<unsigned int>(*nbrIdx));
|
|
// make sure the ring we just found isn't already in our set
|
|
// of rings (this was an extension of sf.net issue 249)
|
|
boost::uint32_t invr =
|
|
RingUtils::computeRingInvariant(nv, tMol.getNumAtoms());
|
|
if (invars.find(invr) == invars.end()) {
|
|
// we're done!
|
|
res.resize(nv.size());
|
|
std::copy(nv.begin(), nv.end(), res.begin());
|
|
return true;
|
|
}
|
|
} else {
|
|
// ignore this one
|
|
}
|
|
} else if (ringAtoms[*nbrIdx] &&
|
|
std::find(tv.begin(), tv.end(), *nbrIdx) == tv.end()) {
|
|
//} else if(ringAtoms[*nbrIdx]){
|
|
INT_VECT nv(tv);
|
|
nv.push_back(rdcast<unsigned int>(*nbrIdx));
|
|
|
|
bfsq.push_back(nv);
|
|
}
|
|
++nbrIdx;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool findRingConnectingAtoms(const ROMol &tMol, const Bond *bond,
|
|
VECT_INT_VECT &res, RINGINVAR_SET &invars,
|
|
boost::dynamic_bitset<> &ringBonds,
|
|
boost::dynamic_bitset<> &ringAtoms) {
|
|
PRECONDITION(bond, "bad bond");
|
|
PRECONDITION(!ringBonds[bond->getIdx()], "not a ring bond");
|
|
PRECONDITION(ringAtoms[bond->getBeginAtomIdx()], "not a ring atom");
|
|
PRECONDITION(ringAtoms[bond->getEndAtomIdx()], "not a ring atom");
|
|
|
|
INT_VECT nring;
|
|
if (_atomSearchBFS(tMol, bond->getBeginAtomIdx(), bond->getEndAtomIdx(),
|
|
ringAtoms, nring, invars)) {
|
|
boost::uint32_t invr =
|
|
RingUtils::computeRingInvariant(nring, tMol.getNumAtoms());
|
|
if (invars.find(invr) == invars.end()) {
|
|
res.push_back(nring);
|
|
invars.insert(invr);
|
|
#if 0
|
|
std::cerr<<" local: "<<invr<<" | ";
|
|
std::copy(nring.begin(),nring.end(),std::ostream_iterator<int>(std::cerr," "));
|
|
std::cerr<<std::endl;
|
|
#endif
|
|
for (unsigned int i = 0; i < nring.size() - 1; ++i) {
|
|
unsigned int bIdx =
|
|
tMol.getBondBetweenAtoms(nring[i], nring[i + 1])->getIdx();
|
|
ringBonds.set(bIdx);
|
|
ringAtoms.set(nring[i]);
|
|
}
|
|
ringBonds.set(tMol.getBondBetweenAtoms(nring[0], nring[nring.size() - 1])
|
|
->getIdx());
|
|
ringAtoms.set(nring[nring.size() - 1]);
|
|
}
|
|
} else {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
} // end of FindRings namespace
|
|
|
|
namespace RDKit {
|
|
namespace MolOps {
|
|
int findSSSR(const ROMol &mol, VECT_INT_VECT *res) {
|
|
if (!res) {
|
|
VECT_INT_VECT rings;
|
|
return findSSSR(mol, rings);
|
|
} else {
|
|
return findSSSR(mol, (*res));
|
|
}
|
|
}
|
|
|
|
int findSSSR(const ROMol &mol, VECT_INT_VECT &res) {
|
|
res.resize(0);
|
|
// check if SSSR's are already on the molecule
|
|
if (mol.getRingInfo()->isInitialized()) {
|
|
res = mol.getRingInfo()->atomRings();
|
|
return rdcast<int>(res.size());
|
|
} else {
|
|
mol.getRingInfo()->initialize();
|
|
}
|
|
|
|
RINGINVAR_SET invars;
|
|
|
|
unsigned int nats = mol.getNumAtoms();
|
|
boost::dynamic_bitset<> activeAtoms(nats);
|
|
activeAtoms.set();
|
|
int nbnds = mol.getNumBonds();
|
|
boost::dynamic_bitset<> activeBonds(nbnds);
|
|
activeBonds.set();
|
|
|
|
// Zero-order bonds are not candidates for rings
|
|
ROMol::EDGE_ITER firstB, lastB;
|
|
boost::tie(firstB, lastB) = mol.getEdges();
|
|
while (firstB != lastB) {
|
|
BOND_SPTR bond = mol[*firstB];
|
|
if (bond->getBondType() == Bond::ZERO) activeBonds[bond->getIdx()] = 0;
|
|
++firstB;
|
|
}
|
|
|
|
boost::dynamic_bitset<> ringBonds(nbnds);
|
|
boost::dynamic_bitset<> ringAtoms(nats);
|
|
|
|
INT_VECT atomDegrees(nats);
|
|
INT_VECT atomDegreesWithZeroOrderBonds(nats);
|
|
for (unsigned int i = 0; i < nats; ++i) {
|
|
const Atom *atom = mol.getAtomWithIdx(i);
|
|
int deg = atom->getDegree();
|
|
atomDegrees[i] = deg;
|
|
atomDegreesWithZeroOrderBonds[i] = deg;
|
|
ROMol::OEDGE_ITER beg, end;
|
|
boost::tie(beg, end) = mol.getAtomBonds(atom);
|
|
while (beg != end) {
|
|
BOND_SPTR bond = mol[*beg];
|
|
if (bond->getBondType() == Bond::ZERO) atomDegrees[i]--;
|
|
++beg;
|
|
}
|
|
}
|
|
|
|
// find the number of fragments in the molecule - we will loop over them
|
|
VECT_INT_VECT frags;
|
|
INT_VECT curFrag;
|
|
unsigned int nfrags = getMolFrags(mol, frags);
|
|
for (unsigned int fi = 0; fi < nfrags;
|
|
fi++) { // loop over the fragments in a molecule
|
|
VECT_INT_VECT fragRes;
|
|
curFrag = frags[fi];
|
|
|
|
if (curFrag.size() < 3) continue;
|
|
|
|
// the following is the list of atoms that are useful in the next round of
|
|
// trimming
|
|
// basically atoms that become degree 0 or 1 because of bond removals
|
|
// initialized with atoms of degrees 0 and 1
|
|
INT_SET changed;
|
|
int bndcnt_with_zero_order_bonds = 0;
|
|
unsigned int nbnds = 0;
|
|
for (INT_VECT_CI aidi = curFrag.begin(); aidi != curFrag.end(); aidi++) {
|
|
int atom_idx = *aidi;
|
|
bndcnt_with_zero_order_bonds += atomDegreesWithZeroOrderBonds[atom_idx];
|
|
|
|
int deg = atomDegrees[atom_idx];
|
|
|
|
nbnds += deg;
|
|
if (deg < 2) {
|
|
changed.insert((*aidi));
|
|
}
|
|
}
|
|
|
|
// check to see if this fragment can even have a possible ring
|
|
CHECK_INVARIANT(bndcnt_with_zero_order_bonds % 2 == 0,
|
|
"fragment graph has a dangling degree");
|
|
bndcnt_with_zero_order_bonds = bndcnt_with_zero_order_bonds / 2;
|
|
int num_possible_rings = bndcnt_with_zero_order_bonds - curFrag.size() + 1;
|
|
if (num_possible_rings < 1) continue;
|
|
|
|
CHECK_INVARIANT(nbnds % 2 == 0,
|
|
"fragment graph problem when including zero-order bonds");
|
|
nbnds = nbnds / 2;
|
|
|
|
boost::dynamic_bitset<> doneAts(nats);
|
|
unsigned int nAtomsDone = 0;
|
|
while (nAtomsDone < curFrag.size()) {
|
|
// std::cerr<<" ndone: "<<nAtomsDone<<std::endl;
|
|
// std::cerr<<" activeBonds: "<<activeBonds<<std::endl;
|
|
// std::cerr<<" done: ";
|
|
// trim all bonds that connect to degree 0 and 1 atoms
|
|
while (changed.size() > 0) {
|
|
int cand = *(changed.begin());
|
|
changed.erase(changed.begin());
|
|
if (!doneAts[cand]) {
|
|
// std::cerr<<cand<<" ";
|
|
doneAts.set(cand);
|
|
++nAtomsDone;
|
|
FindRings::trimBonds(cand, mol, changed, atomDegrees, activeBonds);
|
|
}
|
|
}
|
|
// std::cerr<<std::endl;
|
|
// std::cerr<<"activeBonds2: "<<activeBonds<<std::endl;
|
|
|
|
// all atoms left in the fragment should atleast have a degree >= 2
|
|
// collect all the degree two nodes;
|
|
INT_VECT d2nodes;
|
|
FindRings::pickD2Nodes(mol, d2nodes, curFrag, atomDegrees, activeBonds);
|
|
#if 0
|
|
std::cerr<<"d2nodes: ";
|
|
std::copy(d2nodes.begin(),d2nodes.end(),std::ostream_iterator<int>(std::cerr," "));
|
|
std::cerr<<std::endl;
|
|
#endif
|
|
if (d2nodes.size() > 0) { // deal with the current degree two nodes
|
|
// place to record any duplicate rings discovered from the current d2
|
|
// nodes
|
|
FindRings::findRingsD2nodes(mol, fragRes, invars, d2nodes, atomDegrees,
|
|
activeBonds, ringBonds, ringAtoms);
|
|
#if 0
|
|
std::cerr<<" d2nodes post: ";
|
|
std::copy(d2nodes.begin(),d2nodes.end(),std::ostream_iterator<int>(std::cerr," "));
|
|
std::cerr<<std::endl;
|
|
std::cerr<<" ring bonds: "<<ringBonds<<std::endl;
|
|
#endif
|
|
INT_VECT_CI d2i;
|
|
// trim after we have dealt with all the current d2 nodes,
|
|
for (d2i = d2nodes.begin(); d2i != d2nodes.end(); d2i++) {
|
|
doneAts.set(*d2i);
|
|
++nAtomsDone;
|
|
FindRings::trimBonds((*d2i), mol, changed, atomDegrees, activeBonds);
|
|
}
|
|
} // end of degree two nodes
|
|
else if (nAtomsDone <
|
|
curFrag.size()) { // now deal with higher degree nodes
|
|
// this is brutal - we have no degree 2 nodes - find the first possible
|
|
// degree 3 node
|
|
int cand = -1;
|
|
for (INT_VECT_CI aidi = curFrag.begin(); aidi != curFrag.end();
|
|
aidi++) {
|
|
unsigned int deg = atomDegrees[*aidi];
|
|
if (deg == 3) {
|
|
cand = (*aidi);
|
|
break;
|
|
}
|
|
}
|
|
|
|
// if we did not find a degree 3 node we are done
|
|
// REVIEW:
|
|
if (cand == -1) {
|
|
break;
|
|
}
|
|
FindRings::findRingsD3Node(mol, fragRes, invars, cand, atomDegrees,
|
|
activeBonds);
|
|
doneAts.set(cand);
|
|
++nAtomsDone;
|
|
FindRings::trimBonds(cand, mol, changed, atomDegrees, activeBonds);
|
|
} // done with degree 3 node
|
|
} // done finding rings in this fragement
|
|
|
|
#if 0
|
|
std::cerr<<"\n\nFOUND:\n";
|
|
for(VECT_INT_VECT::const_iterator iter=fragRes.begin();
|
|
iter!=fragRes.end();++iter){
|
|
std::cerr<<iter-fragRes.begin()<<": ";
|
|
std::copy(iter->begin(),iter->end(),std::ostream_iterator<int>(std::cerr," "));
|
|
std::cerr<<std::endl;
|
|
}
|
|
#endif
|
|
// calculate the cyclomatic number for the fragment:
|
|
int nexpt = rdcast<int>((nbnds - curFrag.size() + 1));
|
|
int ssiz = rdcast<int>(fragRes.size());
|
|
|
|
// first check that we got at least the number of expected rings
|
|
// std::cerr<<"EXPT: "<<ssiz<<" "<<nexpt<<std::endl;
|
|
if (ssiz < nexpt) {
|
|
// Issue 3514824: in certain highly fused ring systems, the algorithm
|
|
// above would miss rings.
|
|
// for this fix to apply we have to have at least one non-ring bond
|
|
// that terminates in ring atoms. Find those bonds:
|
|
std::vector<const Bond *> possibleBonds;
|
|
for (unsigned int i = 0; i < nbnds; ++i) {
|
|
if (!ringBonds[i]) {
|
|
const Bond *bnd = mol.getBondWithIdx(i);
|
|
if (ringAtoms[bnd->getBeginAtomIdx()] &&
|
|
ringAtoms[bnd->getEndAtomIdx()]) {
|
|
possibleBonds.push_back(bnd);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
boost::dynamic_bitset<> deadBonds(mol.getNumBonds());
|
|
while (possibleBonds.size()) {
|
|
bool ringFound = FindRings::findRingConnectingAtoms(
|
|
mol, possibleBonds[0], fragRes, invars, ringBonds, ringAtoms);
|
|
if (!ringFound) deadBonds.set(possibleBonds[0]->getIdx(), 1);
|
|
possibleBonds.clear();
|
|
// check if we need to repeat the process:
|
|
for (unsigned int i = 0; i < nbnds; ++i) {
|
|
if (!ringBonds[i]) {
|
|
const Bond *bnd = mol.getBondWithIdx(i);
|
|
if (!deadBonds[bnd->getIdx()] &&
|
|
ringAtoms[bnd->getBeginAtomIdx()] &&
|
|
ringAtoms[bnd->getEndAtomIdx()]) {
|
|
possibleBonds.push_back(bnd);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
ssiz = rdcast<int>(fragRes.size());
|
|
if (ssiz < nexpt) {
|
|
throw ValueErrorException("could not find number of expected rings.");
|
|
}
|
|
}
|
|
// if we have more than expected we need to do some cleanup
|
|
// otherwise do som clean up work
|
|
// std::cerr<<" check: "<<ssiz<<" "<<nexpt<<std::endl;
|
|
if (ssiz > nexpt) {
|
|
FindRings::removeExtraRings(fragRes, nexpt, mol);
|
|
}
|
|
|
|
#if 0
|
|
std::cerr<<"\n\nKEEPING:\n";
|
|
for(VECT_INT_VECT::const_iterator iter=fragRes.begin();
|
|
iter!=fragRes.end();++iter){
|
|
std::copy(iter->begin(),iter->end(),std::ostream_iterator<int>(std::cerr," "));
|
|
std::cerr<<std::endl;
|
|
}
|
|
#endif
|
|
|
|
res.reserve(res.size() + fragRes.size());
|
|
for (VECT_INT_VECT::const_iterator iter = fragRes.begin();
|
|
iter != fragRes.end(); ++iter) {
|
|
res.push_back(*iter);
|
|
}
|
|
} // done with all fragments
|
|
|
|
FindRings::storeRingsInfo(mol, res);
|
|
|
|
// update the ring memberships of atoms and bonds in the molecule:
|
|
// store the SSSR rings on the the molecule as a property
|
|
// we will ignore any existing SSSRs ont eh molecule - simply overwrite
|
|
return rdcast<int>(res.size());
|
|
}
|
|
|
|
int symmetrizeSSSR(ROMol &mol) {
|
|
VECT_INT_VECT tmp;
|
|
return symmetrizeSSSR(mol, tmp);
|
|
};
|
|
|
|
int symmetrizeSSSR(ROMol &mol, VECT_INT_VECT &res) {
|
|
res.clear();
|
|
res.resize(0);
|
|
unsigned int nsssr;
|
|
VECT_INT_VECT sssrs;
|
|
|
|
// FIX: need to set flag here the symmetrization has been done in order to
|
|
// avoid
|
|
// repeating this work
|
|
if (!mol.getRingInfo()->isInitialized()) {
|
|
nsssr = findSSSR(mol, sssrs);
|
|
} else {
|
|
sssrs = mol.getRingInfo()->atomRings();
|
|
nsssr = rdcast<unsigned int>(sssrs.size());
|
|
}
|
|
|
|
VECT_INT_VECT_CI srci;
|
|
INT_VECT copr;
|
|
for (srci = sssrs.begin(); srci != sssrs.end(); srci++) {
|
|
copr = (*srci);
|
|
res.push_back(copr);
|
|
}
|
|
|
|
// now check if there are any extra rings on the molecule
|
|
if (!mol.hasProp(common_properties::extraRings)) {
|
|
// no extra rings nothign to be done
|
|
return rdcast<int>(res.size());
|
|
}
|
|
const VECT_INT_VECT &extras =
|
|
mol.getProp<VECT_INT_VECT>(common_properties::extraRings);
|
|
|
|
// std::cerr<<" extras "<<extras.size()<<std::endl;
|
|
// convert the rings to bond ids
|
|
VECT_INT_VECT bsrs, bextra;
|
|
RingUtils::convertToBonds(sssrs, bsrs, mol);
|
|
RingUtils::convertToBonds(extras, bextra, mol);
|
|
INT_VECT munion, nunion, symids;
|
|
Union(bsrs, munion);
|
|
INT_VECT sr, exr;
|
|
INT_VECT_CI eri;
|
|
unsigned int eid, srid, ssiz;
|
|
unsigned int next = rdcast<unsigned int>(bextra.size());
|
|
// now the trick is the following
|
|
// we will replace each ring of size ssiz from the SSSR with
|
|
// one of the same size rings in the extras. Compute the union of of the new
|
|
// set
|
|
// if all the union elements of the new set if same as munion we found a
|
|
// symmetric ring
|
|
for (srid = 0; srid < nsssr; srid++) {
|
|
sr = bsrs[srid];
|
|
#if 0
|
|
std::cerr<<" consider: "<<srid<<std::endl;
|
|
std::copy(sssrs[srid].begin(),sssrs[srid].end(),std::ostream_iterator<int>(std::cerr," "));
|
|
std::cerr<<" | ";
|
|
std::copy(sr.begin(),sr.end(),std::ostream_iterator<int>(std::cerr," "));
|
|
std::cerr<<std::endl;
|
|
std::cerr<<"------"<<std::endl;
|
|
#endif
|
|
ssiz = rdcast<unsigned int>(sr.size());
|
|
INT_VECT exrid;
|
|
exrid.push_back(srid);
|
|
Union(bsrs, nunion, &exrid);
|
|
for (eid = 0; eid < next; eid++) {
|
|
// if we already added this ring continue
|
|
// FIX: if the ring has already been added,it probably shouldn't be
|
|
// in the list at all? Is this perhaps the most efficient way?
|
|
if (std::find(symids.begin(), symids.end(), static_cast<int>(eid)) !=
|
|
symids.end()) {
|
|
continue;
|
|
}
|
|
exr = bextra[eid];
|
|
#if 0
|
|
std::cerr<<" "<<eid<<": ";
|
|
std::copy(extras[eid].begin(),extras[eid].end(),std::ostream_iterator<int>(std::cerr," "));
|
|
std::cerr<<" | ";
|
|
std::copy(exr.begin(),exr.end(),std::ostream_iterator<int>(std::cerr," "));
|
|
std::cerr<<std::endl;
|
|
#endif
|
|
if (ssiz == exr.size()) {
|
|
// std::cerr<<" possible"<<std::endl;
|
|
INT_VECT eunion;
|
|
Union(nunion, exr, eunion);
|
|
#if 0
|
|
std::cerr<<" munion: ";
|
|
std::copy(munion.begin(),munion.end(),std::ostream_iterator<int>(std::cerr," "));
|
|
std::cerr<<std::endl;
|
|
std::cerr<<" eunion: ";
|
|
std::copy(eunion.begin(),eunion.end(),std::ostream_iterator<int>(std::cerr," "));
|
|
std::cerr<<std::endl;
|
|
#endif
|
|
// now check if the eunion is same as the original union from the SSSRs
|
|
if (eunion.size() == munion.size()) {
|
|
// we found a symmetric ring
|
|
symids.push_back(eid);
|
|
// std::cerr<<" keep!"<<std::endl;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// add the symmetric rings
|
|
for (eri = symids.begin(); eri != symids.end(); eri++) {
|
|
exr = extras[*eri];
|
|
res.push_back(exr);
|
|
FindRings::storeRingInfo(mol, exr);
|
|
}
|
|
if (mol.hasProp(common_properties::extraRings)) {
|
|
mol.clearProp(common_properties::extraRings);
|
|
}
|
|
return rdcast<int>(res.size());
|
|
}
|
|
|
|
namespace {
|
|
void _DFS(const ROMol &mol, const Atom *atom, INT_VECT &atomColors,
|
|
std::vector<const Atom *> &traversalOrder, VECT_INT_VECT &res,
|
|
const Atom *fromAtom = nullptr) {
|
|
// std::cerr<<" dfs: "<<atom->getIdx()<<" from
|
|
// "<<(fromAtom?fromAtom->getIdx():-1)<<std::endl;
|
|
PRECONDITION(atom, "bad atom");
|
|
PRECONDITION(atomColors[atom->getIdx()] == 0, "bad color");
|
|
atomColors[atom->getIdx()] = 1;
|
|
traversalOrder.push_back(atom);
|
|
|
|
ROMol::ADJ_ITER nbrIter, endNbrs;
|
|
boost::tie(nbrIter, endNbrs) = mol.getAtomNeighbors(atom);
|
|
while (nbrIter != endNbrs) {
|
|
const Atom *nbr = mol[*nbrIter].get();
|
|
unsigned int nbrIdx = nbr->getIdx();
|
|
// std::cerr<<" "<<atom->getIdx()<<" consider: "<<nbrIdx<<"
|
|
// "<<atomColors[nbrIdx]<<std::endl;
|
|
if (atomColors[nbrIdx] == 0) {
|
|
if (nbr->getDegree() < 2) {
|
|
atomColors[nbr->getIdx()] = 2;
|
|
} else {
|
|
_DFS(mol, nbr, atomColors, traversalOrder, res, atom);
|
|
}
|
|
} else if (atomColors[nbrIdx] == 1) {
|
|
if (fromAtom && nbrIdx != fromAtom->getIdx()) {
|
|
INT_VECT cycle;
|
|
auto lastElem =
|
|
std::find(traversalOrder.rbegin(), traversalOrder.rend(), atom);
|
|
for (auto rIt = lastElem; // traversalOrder.rbegin();
|
|
rIt != traversalOrder.rend() && (*rIt)->getIdx() != nbrIdx;
|
|
++rIt) {
|
|
cycle.push_back((*rIt)->getIdx());
|
|
}
|
|
cycle.push_back(nbrIdx);
|
|
res.push_back(cycle);
|
|
// std::cerr<<" cycle from "<<atom->getIdx()<<" :";
|
|
// std::copy(cycle.begin(),cycle.end(),std::ostream_iterator<int>(std::cerr,"
|
|
// "));
|
|
// std::cerr<<std::endl;
|
|
}
|
|
}
|
|
++nbrIter;
|
|
}
|
|
atomColors[atom->getIdx()] = 2;
|
|
traversalOrder.pop_back();
|
|
// std::cerr<<" done "<<atom->getIdx()<<std::endl;
|
|
}
|
|
} // end of anonymous namespace
|
|
void fastFindRings(const ROMol &mol) {
|
|
// std::cerr<<"ffr"<<std::endl;
|
|
VECT_INT_VECT res;
|
|
res.resize(0);
|
|
// check if SSSR's are already on the molecule
|
|
if (mol.getRingInfo()->isInitialized()) {
|
|
return;
|
|
} else {
|
|
mol.getRingInfo()->initialize();
|
|
}
|
|
|
|
unsigned int nats = mol.getNumAtoms();
|
|
|
|
INT_VECT atomColors(nats, 0);
|
|
|
|
for (unsigned int i = 0; i < nats; ++i) {
|
|
if (atomColors[i]) continue;
|
|
if (mol.getAtomWithIdx(i)->getDegree() < 2) {
|
|
atomColors[i] = 2;
|
|
continue;
|
|
}
|
|
std::vector<const Atom *> traversalOrder;
|
|
_DFS(mol, mol.getAtomWithIdx(i), atomColors, traversalOrder, res);
|
|
}
|
|
|
|
FindRings::storeRingsInfo(mol, res);
|
|
}
|
|
|
|
} // end of MolOps namespace
|
|
|
|
} // end of RDKit namespace
|