mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
- Switched from dynamic to static allocation for an instance of `MCSParameters` - Switched to using `auto` where possible - Added a few `CHECK_INVARIANT` where appropriate before dereferencing pointers - Moved some inline comments to the previous line to improve readability - Added a early check for `CompleteRingsOnly` in `checkBondRingMatch()` to improve computational efficiency - Removed `RingMatchTableSet` entirely as 1) it is unnecessary since its functionality is already provided by `RingInfo` 2) it abused the `userData` pointer. This allows cleaning up and simplifying the code, particularly the Python wrappers which had a significant amount of added complexity to support it - Removed all the code that was deprecated several releases ago - Reimplemented ringFusionCheck() from scratch to address several bug reports; also switched from std::set to boost::dynamic_bitset for better efficiency - Replaced boost::tie with boost::make_iterator_range - Modernized `for` loops where possible - Removed entirely the QueryRings structure as its functionality is already available in RingInfo - Removed entirely the _DFS() function since the same algorithm can be implemented in a simpler and more efficient way using RingInfo (from 2m28.441s to 2m9.859s for the same task) - Replaced std::vector<bool> with boost::dynamic_bitset - Replaced C-style casts with C++ casts - Replaced some size_t with unsigned int - Refactored checkIfRingsAreClosed() such that checkNoLoneRingAtoms() is not needed anymore - Added a test for slow runtimes with CompleteRingsOnly - Setting Timeout to 0 means no timeout, as it should be - Removed unused `steps` variable from `MaximumCommonSubgraph::growSeeds` - Storing both Atom and Bond pointers and their indices on Seed and MCS data structures is time-consuming and a potential source of incons istencies; storing pointers is sufficient - Promoted `MaximumCommonSubgraph::match` from `private` to `public` - `NewBonds` was declared `mutable`, but `Seed::fillNewBonds()` was incorrectly declared as `non-const`, which caused the need for an ugly (and unnecessary) `const_cast`. I have now removed the `const_cast` and correctly declared functions that alter `NewBonds` as `const`, since `NewBonds` is explicitly `mut able` - Removed some useless random scoping that was peppering the MCS code - Removed a significant amount of duplicate code from the Python wrappers by inheriting from a base `PyMCSWrapper` class - Fixed #6082 - Fixed #5510 - Fixed #5457 - Fixed #5440 - Fixed #5411 - Fixed #3965 - Fixed #6578 Co-authored-by: ptosco <paolo.tosco@novartis.com>
313 lines
9.8 KiB
C++
313 lines
9.8 KiB
C++
//
|
|
// Copyright (C) 2004-2019 Greg Landrum and Rational Discovery LLC
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include "RingInfo.h"
|
|
#include <RDGeneral/Invariant.h>
|
|
#include <algorithm>
|
|
|
|
namespace RDKit {
|
|
RingInfo::INT_VECT RingInfo::atomRingSizes(unsigned int idx) const {
|
|
PRECONDITION(df_init, "RingInfo not initialized");
|
|
|
|
if (idx < d_atomMembers.size()) {
|
|
INT_VECT res(d_atomMembers[idx].size());
|
|
std::transform(d_atomMembers[idx].begin(), d_atomMembers[idx].end(),
|
|
res.begin(),
|
|
[this](int ri) { return d_atomRings.at(ri).size(); });
|
|
return res;
|
|
}
|
|
return INT_VECT();
|
|
}
|
|
bool RingInfo::isAtomInRingOfSize(unsigned int idx, unsigned int size) const {
|
|
PRECONDITION(df_init, "RingInfo not initialized");
|
|
|
|
if (idx < d_atomMembers.size()) {
|
|
return std::find_if(d_atomMembers[idx].begin(), d_atomMembers[idx].end(),
|
|
[this, size](int ri) {
|
|
return d_atomRings.at(ri).size() == size;
|
|
}) != d_atomMembers[idx].end();
|
|
}
|
|
return false;
|
|
}
|
|
unsigned int RingInfo::minAtomRingSize(unsigned int idx) const {
|
|
PRECONDITION(df_init, "RingInfo not initialized");
|
|
|
|
if (idx < d_atomMembers.size() && !d_atomMembers[idx].empty()) {
|
|
auto ri = *std::min_element(
|
|
d_atomMembers[idx].begin(), d_atomMembers[idx].end(),
|
|
[this](int ri1, int ri2) {
|
|
return d_atomRings.at(ri1).size() < d_atomRings.at(ri2).size();
|
|
});
|
|
return d_atomRings.at(ri).size();
|
|
}
|
|
return 0;
|
|
}
|
|
unsigned int RingInfo::numAtomRings(unsigned int idx) const {
|
|
PRECONDITION(df_init, "RingInfo not initialized");
|
|
|
|
if (idx < d_atomMembers.size()) {
|
|
return rdcast<unsigned int>(d_atomMembers[idx].size());
|
|
}
|
|
return 0;
|
|
}
|
|
const RingInfo::INT_VECT &RingInfo::atomMembers(unsigned int idx) const {
|
|
PRECONDITION(df_init, "RingInfo not initialized");
|
|
|
|
static const INT_VECT emptyVect;
|
|
if (idx < d_atomMembers.size()) {
|
|
return d_atomMembers[idx];
|
|
}
|
|
return emptyVect;
|
|
}
|
|
bool RingInfo::areAtomsInSameRingOfSize(unsigned int idx1, unsigned int idx2,
|
|
unsigned int size) const {
|
|
PRECONDITION(df_init, "RingInfo not initialized");
|
|
|
|
if (idx1 >= d_atomMembers.size() || idx2 >= d_atomMembers.size()) {
|
|
return false;
|
|
}
|
|
auto it1 = d_atomMembers[idx1].begin();
|
|
auto it2 = d_atomMembers[idx2].begin();
|
|
while (it1 != d_atomMembers[idx1].end() && it2 != d_atomMembers[idx2].end()) {
|
|
if (*it1 < *it2) {
|
|
++it1;
|
|
} else if (*it1 > *it2) {
|
|
++it2;
|
|
} else if (!size || d_atomRings.at(*it1).size() == size) {
|
|
return true;
|
|
} else {
|
|
++it1;
|
|
++it2;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
RingInfo::INT_VECT RingInfo::bondRingSizes(unsigned int idx) const {
|
|
PRECONDITION(df_init, "RingInfo not initialized");
|
|
|
|
if (idx < d_bondMembers.size()) {
|
|
INT_VECT res(d_bondMembers[idx].size());
|
|
std::transform(d_bondMembers[idx].begin(), d_bondMembers[idx].end(),
|
|
res.begin(),
|
|
[this](int ri) { return d_bondRings.at(ri).size(); });
|
|
return res;
|
|
}
|
|
return INT_VECT();
|
|
}
|
|
bool RingInfo::isBondInRingOfSize(unsigned int idx, unsigned int size) const {
|
|
PRECONDITION(df_init, "RingInfo not initialized");
|
|
|
|
if (idx < d_bondMembers.size()) {
|
|
return std::find_if(d_bondMembers[idx].begin(), d_bondMembers[idx].end(),
|
|
[this, size](int ri) {
|
|
return d_bondRings.at(ri).size() == size;
|
|
}) != d_bondMembers[idx].end();
|
|
}
|
|
return false;
|
|
}
|
|
unsigned int RingInfo::minBondRingSize(unsigned int idx) const {
|
|
PRECONDITION(df_init, "RingInfo not initialized");
|
|
|
|
if (idx < d_bondMembers.size() && d_bondMembers[idx].size()) {
|
|
return d_bondRings
|
|
.at(*std::min_element(
|
|
d_bondMembers[idx].begin(), d_bondMembers[idx].end(),
|
|
[this](int ri1, int ri2) {
|
|
return d_bondRings.at(ri1).size() < d_bondRings.at(ri2).size();
|
|
}))
|
|
.size();
|
|
}
|
|
return 0;
|
|
}
|
|
unsigned int RingInfo::numBondRings(unsigned int idx) const {
|
|
PRECONDITION(df_init, "RingInfo not initialized");
|
|
|
|
if (idx < d_bondMembers.size()) {
|
|
return rdcast<unsigned int>(d_bondMembers[idx].size());
|
|
}
|
|
return 0;
|
|
}
|
|
const RingInfo::INT_VECT &RingInfo::bondMembers(unsigned int idx) const {
|
|
PRECONDITION(df_init, "RingInfo not initialized");
|
|
|
|
static const INT_VECT emptyVect;
|
|
if (idx < d_bondMembers.size()) {
|
|
return d_bondMembers[idx];
|
|
}
|
|
return emptyVect;
|
|
}
|
|
bool RingInfo::areBondsInSameRingOfSize(unsigned int idx1, unsigned int idx2,
|
|
unsigned int size) const {
|
|
PRECONDITION(df_init, "RingInfo not initialized");
|
|
|
|
if (idx1 >= d_bondMembers.size() || idx2 >= d_bondMembers.size()) {
|
|
return false;
|
|
}
|
|
auto it1 = d_bondMembers[idx1].begin();
|
|
auto it2 = d_bondMembers[idx2].begin();
|
|
while (it1 != d_bondMembers[idx1].end() && it2 != d_bondMembers[idx2].end()) {
|
|
if (*it1 < *it2) {
|
|
++it1;
|
|
} else if (*it1 > *it2) {
|
|
++it2;
|
|
} else if (!size || d_bondRings.at(*it1).size() == size) {
|
|
return true;
|
|
} else {
|
|
++it1;
|
|
++it2;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
unsigned int RingInfo::numRings() const {
|
|
PRECONDITION(df_init, "RingInfo not initialized");
|
|
PRECONDITION(d_atomRings.size() == d_bondRings.size(), "length mismatch");
|
|
return rdcast<unsigned int>(d_atomRings.size());
|
|
}
|
|
|
|
unsigned int RingInfo::addRing(const INT_VECT &atomIndices,
|
|
const INT_VECT &bondIndices) {
|
|
PRECONDITION(df_init, "RingInfo not initialized");
|
|
PRECONDITION(atomIndices.size() == bondIndices.size(), "length mismatch");
|
|
for (const auto &i : atomIndices) {
|
|
if (i >= static_cast<int>(d_atomMembers.size())) {
|
|
d_atomMembers.resize(i + 1);
|
|
}
|
|
d_atomMembers[i].push_back(d_atomRings.size());
|
|
}
|
|
for (const auto &i : bondIndices) {
|
|
if (i >= static_cast<int>(d_bondMembers.size())) {
|
|
d_bondMembers.resize(i + 1);
|
|
}
|
|
d_bondMembers[i].push_back(d_bondRings.size());
|
|
}
|
|
d_atomRings.push_back(atomIndices);
|
|
d_bondRings.push_back(bondIndices);
|
|
POSTCONDITION(d_atomRings.size() == d_bondRings.size(), "length mismatch");
|
|
return rdcast<unsigned int>(d_atomRings.size());
|
|
}
|
|
|
|
bool RingInfo::isRingFused(unsigned int ringIdx) {
|
|
PRECONDITION(ringIdx < d_bondRings.size(), "ringIdx out of bounds");
|
|
if (d_fusedRings.empty()) {
|
|
initFusedRings();
|
|
}
|
|
return d_fusedRings.at(ringIdx).any();
|
|
}
|
|
|
|
bool RingInfo::areRingsFused(unsigned int ring1Idx, unsigned int ring2Idx) {
|
|
PRECONDITION(ring1Idx < d_bondRings.size(), "ring1Idx out of bounds");
|
|
PRECONDITION(ring2Idx < d_bondRings.size(), "ring2Idx out of bounds");
|
|
if (d_fusedRings.empty()) {
|
|
initFusedRings();
|
|
}
|
|
return d_fusedRings.at(ring1Idx).test(ring2Idx);
|
|
}
|
|
|
|
unsigned int RingInfo::numFusedBonds(unsigned int ringIdx) {
|
|
PRECONDITION(ringIdx < d_bondRings.size(), "ringIdx out of bounds");
|
|
if (d_numFusedBonds.empty()) {
|
|
d_numFusedBonds.resize(d_bondRings.size(), 0);
|
|
for (unsigned int ri = 0; ri < d_bondRings.size(); ++ri) {
|
|
d_numFusedBonds[ri] += std::count_if(
|
|
d_bondRings[ri].begin(), d_bondRings[ri].end(),
|
|
[this](unsigned int bi) { return numBondRings(bi) > 1; });
|
|
}
|
|
}
|
|
return d_numFusedBonds[ringIdx];
|
|
}
|
|
|
|
unsigned int RingInfo::numFusedRingNeighbors(unsigned int ringIdx) {
|
|
PRECONDITION(ringIdx < d_fusedRings.size(), "ringIdx out of bounds");
|
|
return d_fusedRings[ringIdx].count();
|
|
}
|
|
|
|
std::vector<unsigned int> RingInfo::fusedRingNeighbors(unsigned int ringIdx) {
|
|
PRECONDITION(ringIdx < d_bondRings.size(), "ringIdx out of bounds");
|
|
std::vector<unsigned int> res;
|
|
res.reserve(d_fusedRings[ringIdx].count());
|
|
for (unsigned int i = 0; i < d_fusedRings[ringIdx].size(); ++i) {
|
|
if (d_fusedRings[ringIdx].test(i)) {
|
|
res.push_back(i);
|
|
}
|
|
}
|
|
return res;
|
|
}
|
|
|
|
void RingInfo::initFusedRings() {
|
|
if (d_bondRings.empty()) {
|
|
return;
|
|
}
|
|
d_fusedRings.resize(d_bondRings.size());
|
|
for (auto &fusedRing : d_fusedRings) {
|
|
fusedRing.resize(d_bondRings.size());
|
|
}
|
|
for (const auto &ringIndices : d_bondMembers) {
|
|
if (ringIndices.size() <= 1) {
|
|
continue;
|
|
}
|
|
for (unsigned int i = 0; i < ringIndices.size() - 1; ++i) {
|
|
unsigned int ringIdx1 = ringIndices[i];
|
|
for (unsigned int j = i + 1; j < ringIndices.size(); ++j) {
|
|
unsigned int ringIdx2 = ringIndices[j];
|
|
d_fusedRings[ringIdx1].set(ringIdx2);
|
|
d_fusedRings[ringIdx2].set(ringIdx1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef RDK_USE_URF
|
|
unsigned int RingInfo::numRingFamilies() const {
|
|
PRECONDITION(df_init, "RingInfo not initialized");
|
|
return d_atomRingFamilies.size();
|
|
};
|
|
|
|
unsigned int RingInfo::numRelevantCycles() const {
|
|
PRECONDITION(df_init, "RingInfo not initialized");
|
|
return rdcast<unsigned int>(RDL_getNofRC(dp_urfData.get()));
|
|
};
|
|
|
|
unsigned int RingInfo::addRingFamily(const INT_VECT &atomIndices,
|
|
const INT_VECT &bondIndices) {
|
|
PRECONDITION(df_init, "RingInfo not initialized");
|
|
d_atomRingFamilies.push_back(atomIndices);
|
|
d_bondRingFamilies.push_back(bondIndices);
|
|
POSTCONDITION(d_atomRingFamilies.size() == d_bondRingFamilies.size(),
|
|
"length mismatch");
|
|
|
|
return rdcast<unsigned int>(d_atomRingFamilies.size());
|
|
}
|
|
#endif
|
|
|
|
void RingInfo::initialize() {
|
|
PRECONDITION(!df_init, "already initialized");
|
|
df_init = true;
|
|
};
|
|
void RingInfo::reset() {
|
|
if (!df_init) {
|
|
return;
|
|
}
|
|
df_init = false;
|
|
d_atomMembers.clear();
|
|
d_bondMembers.clear();
|
|
d_atomRings.clear();
|
|
d_bondRings.clear();
|
|
#ifdef RDK_USE_URF
|
|
d_atomRingFamilies.clear();
|
|
d_bondRingFamilies.clear();
|
|
#endif
|
|
}
|
|
void RingInfo::preallocate(unsigned int numAtoms, unsigned int numBonds) {
|
|
d_atomMembers.resize(numAtoms);
|
|
d_bondMembers.resize(numBonds);
|
|
}
|
|
} // namespace RDKit
|