mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
* add port of centres
* Several changes:
- Added a test based on RDKit issue 2984
(default RDKit fails it, this gets it right)
- Use bond directions for bond stereo (label is no longer required)
- Fix bugs in rules 4b and 5new
- Fix some mem errors
- clang-formatted
- some other minor cleanups
* Several changes and some improvements:
- Added LGPL license, as well as a mention in the doc.
- Fix/update/add some comments
- Fix typo/bug in Mancude calculation
- Fix bug in rules 4b, 5New
- Fix Sp2 Bond dir reference
- Re clang-format
- other minor changes suggested by Dan
* Another bunch of changes:
- require integer-order bonds; kekulize when required
- fix fraction comparison
- rename sq Cis/Trans e/z
- replace queues with vectors
- update copyright notices
- revert LGPL changes
- fix Asymmetric typo
* move to separate lib/mod, add python validation test
* Moving away from the original implementation:
- Rename to CIPLabeler
- Remove the abstraction layer
- Remove some stats stuff
- Push some CIPMol functions down to Node
- Use RDKit's isotope info
* Another bundle of changes. The most relevant ones:
- fix parity translation
- use cis trans as bond reference -- breaks #2984 test
- kill a lot of unused code
- use lists for queues
- store nodes and edges in digraph
- add prefixes to class data member names
- update changeRoot() test
- use fastFindRings() for mancude rings
- update docs
- add references to the scientific paper
- Document the Mancude functions
- Fix Mancude atom types and their comments
- remove mol data member from SequenceRule
- replace Fraction with boost::rational
- update comments, docstrings and the doc
* fix building the test
* Changes here include:
- adding bitset overload for the labeling function
- python wrap of the overload
- handling trigonal pyramids with implicit H
- setting bond labels sets stereo atoms, cis/trans
- nix LEFT/RIGHT/TOGETHER/OPPOSITE constants
- don't use GLOB in cmake
- a decent amount of refactoring
* Minor edits to new_CIP_labeling (#6)
* Some changes for clarity
Added some documentation and changed some variable names to match
my understanding. Also a ran clang-tidy to ensure that all blocks
were brace-enclosed.
* Return a reference instead of a copy for performance
This is called many times and showed up after some light
profiling. This change bumped throughput by about 20%
* move out of Graphmol
* move .hpp headers to .h
* update documentation; add label set of atoms test
* Address comments:
- Added references to centres to CIPLabeler.h and Python Wrap.
- Update validation test to skip sanitization.
- Document mancude fractional atomic number calculation.
- Use unittest assertions in python test.
- Update mancude docstrings to 'resonance' instad of 'tautomers'.
- Rename prioritise() to prioritize().
- Add postcondition to check carriers size in Tetrahedral.cpp.
- Use getNeighbors() in Tetrahedral.cpp.
- Move findStereoAtoms to Chirality namespace.
- Move code back into GraphMol.
- Fix typos and reformat doc.
* More comments:
- Mention why we use boost's unordered map rather than the std one.
- Fix include in Python wrapper.
* Addressed second batch of comments:
- fix the bug in rule 4b
- fix docstring for rule 2
- move atomic mass calculation from rule 2 to node
- addressed some build warnings
- simplify sp2bond::label(comp)
- add start/end atoms to Sp2Bond constructor
- update system/local includes
Co-authored-by: Dan N <dan.nealschneider@schrodinger.com>
204 lines
5.3 KiB
C++
204 lines
5.3 KiB
C++
//
|
|
//
|
|
// Copyright (C) 2020 Schrödinger, LLC
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
|
|
#include <list>
|
|
#include <sstream>
|
|
|
|
#include "Digraph.h"
|
|
#include "CIPMol.h"
|
|
#include "Node.h"
|
|
#include "Edge.h"
|
|
|
|
namespace RDKit {
|
|
namespace CIPLabeler {
|
|
|
|
namespace {
|
|
|
|
/**
|
|
* Upper limit on the size of the digraph, stops out of memory error with a
|
|
* more graceful failure. 0=Infinite
|
|
*/
|
|
const int MAX_NODE_COUNT = 100000;
|
|
|
|
/**
|
|
* Used for debugging only, 0=Infinite
|
|
*/
|
|
const int MAX_NODE_DIST = 0;
|
|
} // namespace
|
|
|
|
Node &Digraph::addNode(std::vector<char> &&visit, Atom *atom,
|
|
boost::rational<int> &&frac, int dist, int flags) {
|
|
|
|
d_nodes.emplace_back(this, std::move(visit), atom, std::move(frac), dist,
|
|
flags);
|
|
return d_nodes.back();
|
|
}
|
|
|
|
void Digraph::addEdge(Node *beg, Bond *bond, Node *end) {
|
|
d_edges.emplace_back(beg, end, bond);
|
|
auto &e = d_edges.back();
|
|
beg->add(&e);
|
|
end->add(&e);
|
|
}
|
|
|
|
Digraph::Digraph(const CIPMol &mol, Atom *atom) : d_mol{mol} {
|
|
PRECONDITION(atom, "cannot init digraph on a nullptr")
|
|
|
|
auto visit = std::vector<char>(d_mol.getNumAtoms());
|
|
visit[atom->getIdx()] = 1;
|
|
|
|
auto dist = 1;
|
|
auto flags = 0x0;
|
|
auto atomic_num = atom->getAtomicNum();
|
|
|
|
dp_root = &addNode(std::move(visit), atom, atomic_num, dist, flags);
|
|
dp_origin = dp_root;
|
|
}
|
|
|
|
const CIPMol &Digraph::getMol() const { return d_mol; };
|
|
|
|
Node *Digraph::getOriginalRoot() const { return dp_origin; };
|
|
|
|
Node *Digraph::getCurrentRoot() const { return dp_root; }
|
|
|
|
int Digraph::getNumNodes() const { return d_nodes.size(); }
|
|
|
|
std::vector<Node *> Digraph::getNodes(Atom *atom) const {
|
|
std::vector<Node *> result;
|
|
auto queue = std::list<Node *>({getCurrentRoot()});
|
|
|
|
for (const auto &node : queue) {
|
|
|
|
if (atom == node->getAtom()) {
|
|
result.push_back(node);
|
|
}
|
|
for (const auto &e : node->getEdges()) {
|
|
if (!e->isBeg(node)) {
|
|
continue;
|
|
}
|
|
queue.push_back(e->getEnd());
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Access the reference atom for Rule 6 (if one is set).
|
|
*/
|
|
Atom *Digraph::getRule6Ref() const { return dp_rule6Ref; }
|
|
|
|
/**
|
|
* Used exclusively for Rule 6, we set one atom as the reference.
|
|
* @param ref reference atom
|
|
*/
|
|
void Digraph::setRule6Ref(Atom *ref) { dp_rule6Ref = ref; }
|
|
|
|
/**
|
|
* Sets the root node of this digraph by flipping the directions
|
|
* of edges as required.
|
|
*
|
|
* @param newroot the new root
|
|
*/
|
|
void Digraph::changeRoot(Node *newroot) {
|
|
|
|
std::vector<Edge *> toflip;
|
|
auto queue = std::list<Node *>({newroot});
|
|
for (const auto &node : queue) {
|
|
|
|
for (const auto &e : node->getEdges()) {
|
|
if (e->isEnd(node)) {
|
|
toflip.push_back(e);
|
|
queue.push_back(e->getBeg());
|
|
}
|
|
}
|
|
}
|
|
for (auto &e : toflip) {
|
|
e->flip();
|
|
}
|
|
dp_root = newroot;
|
|
}
|
|
|
|
void Digraph::expand(Node *beg) {
|
|
const auto &atom = beg->getAtom();
|
|
const auto &edges = beg->getEdges();
|
|
const auto &prev =
|
|
edges.size() > 0 && !edges[0]->isBeg(beg) ? edges[0]->getBond() : nullptr;
|
|
|
|
if (MAX_NODE_DIST > 0 && beg->getDistance() > MAX_NODE_DIST) {
|
|
return;
|
|
}
|
|
if (MAX_NODE_COUNT > 0 && d_nodes.size() >= MAX_NODE_COUNT) {
|
|
std::stringstream errmsg;
|
|
errmsg << "Digraph generation failed: more than " << MAX_NODE_COUNT
|
|
<< "nodes found.";
|
|
throw TooManyNodesException(errmsg.str());
|
|
}
|
|
|
|
// create 'explicit' nodes
|
|
for (const auto &bond : d_mol.getBonds(atom)) {
|
|
const auto &nbr = bond->getOtherAtom(atom);
|
|
const int nbrIdx = nbr->getIdx();
|
|
const int bord = d_mol.getBondOrder(bond);
|
|
const int virtual_nodes = bord - 1;
|
|
|
|
if (!beg->isVisited(nbrIdx)) {
|
|
auto end = beg->newChild(nbrIdx, nbr);
|
|
addEdge(beg, bond, end);
|
|
|
|
// duplicate nodes for bond orders (except for root atoms...)
|
|
// for example >S=O
|
|
if (dp_origin != beg) {
|
|
if (atom->getFormalCharge() < 0 &&
|
|
d_mol.getFractionalAtomicNum(atom).denominator() > 1) {
|
|
end = beg->newBondDuplicateChild(nbrIdx, nbr);
|
|
addEdge(beg, bond, end);
|
|
} else {
|
|
for (int i = 0; i < virtual_nodes; ++i) {
|
|
end = beg->newBondDuplicateChild(nbrIdx, nbr);
|
|
addEdge(beg, bond, end);
|
|
}
|
|
}
|
|
}
|
|
} else if (bond == prev) { // bond order expansion (backwards)
|
|
if (dp_origin->getAtom() != nbr) {
|
|
for (int i = 0; i < virtual_nodes; ++i) {
|
|
auto end = beg->newBondDuplicateChild(nbrIdx, nbr);
|
|
addEdge(beg, bond, end);
|
|
}
|
|
}
|
|
} else { // ring closures
|
|
auto end = beg->newRingDuplicateChild(nbrIdx, nbr);
|
|
addEdge(beg, bond, end);
|
|
|
|
if (atom->getFormalCharge() < 0 &&
|
|
d_mol.getFractionalAtomicNum(atom).denominator() > 1) {
|
|
end = beg->newBondDuplicateChild(nbrIdx, nbr);
|
|
addEdge(beg, bond, end);
|
|
} else {
|
|
for (int i = 0; i < virtual_nodes; ++i) {
|
|
end = beg->newBondDuplicateChild(nbrIdx, nbr);
|
|
addEdge(beg, bond, end);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Create implicit hydrogen nodes
|
|
const int hcnt = atom->getTotalNumHs();
|
|
for (int i = 0; i < hcnt; ++i) {
|
|
auto end = beg->newImplicitHydrogenChild();
|
|
addEdge(beg, nullptr, end);
|
|
}
|
|
}
|
|
|
|
} // namespace CIPLabeler
|
|
} // namespace RDKit
|