mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
* CIP labeller: Don't calculate auxiliary descriptors unnecessarily The first 3 rules (the constitutional rules) are pretty easy to understand. After rule 3, we need to calculate auxiliary stereo descriptors to break ties. However, we _were actually_ calculating auxiliary stereodescriptors for all centers! We should only need to calculate auxiliary stereocenters for sites that are needed to break ties. This cost time - it also caused errors if the auxiliary descriptors needed a graph expansion, because bonds in the digraph might be pointed in the wrong direction. Example case PDB ID 4AXM Before this commit, errored with "Could not calculate parity! Carrier mismatch" after 14s. After this commit, completes successfully in 0.036s. Labelled centers all match (for the centers that had labels in the failure case). Includes a test that I can imagine breaking with this optimization. The reference labels are from before this change * Ensure all "arms" of stereo bonds and atropisomer bonds are expanded For tetrahedral centers, ranking using the constitutional rules always expands as far as is needed (but no further). For SP2bond and atropisomers, if the first side is not resolvable, the second side is never visited. If the constitutional rules don't resolve a side, we need to label the auxiliary centers. It's important to label all auxiliary centers that _will_ be visited, so we need to know what centers will be visited. This commit updates the label() call in SP2 and Atropisomer bonds to always attempt to label both sides if using the constitutional rule set. The constitutional rules are cheap, and if they fail, we always go on to the full rule set. It is not a savings to skip the search on the second side if we're going to keep going anyway! Includes a test that reproduces Ricardo's example. This has no measurable effect on performance relative to the original solution * If any parts of the center have been seen, label it. I couldn't make an example hit this, but Ric is totally theoretically right * Greg's ranges suggestion #2 Co-authored-by: Greg Landrum <greg.landrum@gmail.com> * any_of for container search Co-authored-by: Greg Landrum <greg.landrum@gmail.com> --------- Co-authored-by: Greg Landrum <greg.landrum@gmail.com>
208 lines
5.6 KiB
C++
208 lines
5.6 KiB
C++
//
|
|
//
|
|
// Copyright (C) 2020 Schrödinger, LLC
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
|
|
#include <list>
|
|
#include <sstream>
|
|
|
|
#include "Digraph.h"
|
|
#include "CIPMol.h"
|
|
#include "Node.h"
|
|
#include "Edge.h"
|
|
|
|
namespace RDKit {
|
|
namespace CIPLabeler {
|
|
|
|
namespace {
|
|
|
|
/**
|
|
* Upper limit on the size of the digraph, stops out of memory error with a
|
|
* more graceful failure. 0=Infinite
|
|
*/
|
|
const int MAX_NODE_COUNT = 100000;
|
|
|
|
/**
|
|
* Used for debugging only, 0=Infinite
|
|
*/
|
|
const int MAX_NODE_DIST = 0;
|
|
} // namespace
|
|
|
|
Node &Digraph::addNode(std::vector<char> &&visit, Atom *atom,
|
|
boost::rational<int> &&frac, int dist, int flags) {
|
|
d_nodes.emplace_back(this, std::move(visit), atom, std::move(frac), dist,
|
|
flags);
|
|
return d_nodes.back();
|
|
}
|
|
|
|
bool Digraph::seenAtom(Atom *atom) const {
|
|
return std::ranges::any_of(
|
|
d_nodes, [&](const auto &n) { return n.getAtom() == atom; });
|
|
}
|
|
|
|
void Digraph::addEdge(Node *beg, Bond *bond, Node *end) {
|
|
d_edges.emplace_back(beg, end, bond);
|
|
auto &e = d_edges.back();
|
|
beg->add(&e);
|
|
end->add(&e);
|
|
}
|
|
|
|
Digraph::Digraph(const CIPMol &mol, Atom *atom, bool atropisomerMode)
|
|
: d_mol{mol} {
|
|
PRECONDITION(atom, "cannot init digraph on a nullptr")
|
|
|
|
auto visit = std::vector<char>(d_mol.getNumAtoms());
|
|
visit[atom->getIdx()] = 1;
|
|
|
|
auto dist = 1;
|
|
auto flags = 0x0;
|
|
auto atomic_num = atom->getAtomicNum();
|
|
|
|
dp_root = &addNode(std::move(visit), atom, atomic_num, dist, flags);
|
|
dp_origin = dp_root;
|
|
d_atropisomerMode = atropisomerMode;
|
|
}
|
|
|
|
const CIPMol &Digraph::getMol() const { return d_mol; };
|
|
|
|
Node *Digraph::getOriginalRoot() const { return dp_origin; };
|
|
|
|
Node *Digraph::getCurrentRoot() const { return dp_root; }
|
|
|
|
int Digraph::getNumNodes() const { return d_nodes.size(); }
|
|
|
|
std::vector<Node *> Digraph::getNodes(Atom *atom) const {
|
|
std::vector<Node *> result;
|
|
std::vector<Node *> queue = {getCurrentRoot()};
|
|
|
|
for (size_t i = 0; i < queue.size(); ++i) {
|
|
auto node = queue[i];
|
|
if (atom == node->getAtom()) {
|
|
result.push_back(node);
|
|
}
|
|
for (const auto &e : node->getEdges()) {
|
|
if (!e->isBeg(node)) {
|
|
continue;
|
|
}
|
|
queue.push_back(e->getEnd());
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Access the reference atom for Rule 6 (if one is set).
|
|
*/
|
|
Atom *Digraph::getRule6Ref() const { return dp_rule6Ref; }
|
|
|
|
/**
|
|
* Used exclusively for Rule 6, we set one atom as the reference.
|
|
* @param ref reference atom
|
|
*/
|
|
void Digraph::setRule6Ref(Atom *ref) { dp_rule6Ref = ref; }
|
|
|
|
/**
|
|
* Sets the root node of this digraph by flipping the directions
|
|
* of edges as required.
|
|
*
|
|
* @param newroot the new root
|
|
*/
|
|
void Digraph::changeRoot(Node *newroot) {
|
|
std::vector<Edge *> toflip;
|
|
auto queue = std::list<Node *>({newroot});
|
|
for (const auto &node : queue) {
|
|
for (const auto &e : node->getEdges()) {
|
|
if (e->isEnd(node)) {
|
|
toflip.push_back(e);
|
|
queue.push_back(e->getBeg());
|
|
}
|
|
}
|
|
}
|
|
for (auto &e : toflip) {
|
|
e->flip();
|
|
}
|
|
dp_root = newroot;
|
|
}
|
|
|
|
void Digraph::expand(Node *beg) {
|
|
const auto &atom = beg->getAtom();
|
|
const auto &edges = beg->getEdges();
|
|
const auto &prev =
|
|
edges.size() > 0 && !edges[0]->isBeg(beg) ? edges[0]->getBond() : nullptr;
|
|
|
|
if (MAX_NODE_DIST > 0 && beg->getDistance() > MAX_NODE_DIST) {
|
|
return;
|
|
}
|
|
if (MAX_NODE_COUNT > 0 && d_nodes.size() >= MAX_NODE_COUNT) {
|
|
std::stringstream errmsg;
|
|
errmsg << "Digraph generation failed: more than " << MAX_NODE_COUNT
|
|
<< "nodes found.";
|
|
throw TooManyNodesException(errmsg.str());
|
|
}
|
|
|
|
// create 'explicit' nodes
|
|
for (const auto &bond : d_mol.getBonds(atom)) {
|
|
const auto &nbr = bond->getOtherAtom(atom);
|
|
const int nbrIdx = nbr->getIdx();
|
|
const int bord = d_mol.getBondOrder(bond);
|
|
const int virtual_nodes = bord - 1;
|
|
|
|
if (!beg->isVisited(nbrIdx)) {
|
|
auto end = beg->newChild(nbrIdx, nbr);
|
|
addEdge(beg, bond, end);
|
|
|
|
// duplicate nodes for bond orders (except for root atoms...)
|
|
// for example >S=O
|
|
if (dp_origin != beg || d_atropisomerMode) {
|
|
if (atom->getFormalCharge() < 0 &&
|
|
d_mol.getFractionalAtomicNum(atom).denominator() > 1) {
|
|
end = beg->newBondDuplicateChild(nbrIdx, nbr);
|
|
addEdge(beg, bond, end);
|
|
} else {
|
|
for (int i = 0; i < virtual_nodes; ++i) {
|
|
end = beg->newBondDuplicateChild(nbrIdx, nbr);
|
|
addEdge(beg, bond, end);
|
|
}
|
|
}
|
|
}
|
|
} else if (bond == prev) { // bond order expansion (backwards)
|
|
if (dp_origin->getAtom() != nbr || d_atropisomerMode) {
|
|
for (int i = 0; i < virtual_nodes; ++i) {
|
|
auto end = beg->newBondDuplicateChild(nbrIdx, nbr);
|
|
addEdge(beg, bond, end);
|
|
}
|
|
}
|
|
} else { // ring closures
|
|
auto end = beg->newRingDuplicateChild(nbrIdx, nbr);
|
|
addEdge(beg, bond, end);
|
|
|
|
if (atom->getFormalCharge() < 0 &&
|
|
d_mol.getFractionalAtomicNum(atom).denominator() > 1) {
|
|
end = beg->newBondDuplicateChild(nbrIdx, nbr);
|
|
addEdge(beg, bond, end);
|
|
} else {
|
|
for (int i = 0; i < virtual_nodes; ++i) {
|
|
end = beg->newBondDuplicateChild(nbrIdx, nbr);
|
|
addEdge(beg, bond, end);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Create implicit hydrogen nodes
|
|
const int hcnt = atom->getTotalNumHs();
|
|
for (int i = 0; i < hcnt; ++i) {
|
|
auto end = beg->newImplicitHydrogenChild();
|
|
addEdge(beg, nullptr, end);
|
|
}
|
|
}
|
|
|
|
} // namespace CIPLabeler
|
|
} // namespace RDKit
|