Files
rdkit/Code/GraphMol/CIPLabeler/Node.cpp
Ric d54e77e375 Add new CIP labelling algorithm (#3234)
* add port of centres

* Several changes:
    - Added a test based on RDKit issue 2984
        (default RDKit fails it, this gets it right)
    - Use bond directions for bond stereo (label is no longer required)
    - Fix bugs in rules 4b and 5new
    - Fix some mem errors
    - clang-formatted
    - some other minor cleanups

* Several changes and some improvements:
    - Added LGPL license, as well as a mention in the doc.
    - Fix/update/add some comments
    - Fix typo/bug in Mancude calculation
    - Fix bug in rules 4b, 5New
    - Fix Sp2 Bond dir reference
    - Re clang-format
    - other minor changes suggested by Dan

* Another bunch of changes:
  - require integer-order bonds; kekulize when required
  - fix fraction comparison
  - rename sq Cis/Trans e/z
  - replace queues with vectors
  - update copyright notices
  - revert LGPL changes
  - fix Asymmetric typo

* move to separate lib/mod, add python validation test

* Moving away from the original implementation:
    - Rename to CIPLabeler
    - Remove the abstraction layer
    - Remove some stats stuff
    - Push some CIPMol functions down to Node
    - Use RDKit's isotope info

* Another bundle of changes. The most relevant ones:
    - fix parity translation
    - use cis trans as bond reference -- breaks #2984 test
    - kill a lot of unused code
    - use lists for queues
    - store nodes and edges in digraph
    - add prefixes to class data member names
    - update changeRoot() test
    - use fastFindRings() for mancude rings
    - update docs
    - add references to the scientific paper
    - Document the Mancude functions
    - Fix Mancude atom types and their comments
    - remove mol data member from SequenceRule
    - replace Fraction with boost::rational
    - update comments, docstrings and the doc

* fix building the test

* Changes here include:
    - adding bitset overload for the labeling function
    - python wrap of the overload
    - handling trigonal pyramids with implicit H
    - setting bond labels sets stereo atoms, cis/trans
    - nix LEFT/RIGHT/TOGETHER/OPPOSITE constants
    - don't use GLOB in cmake
    - a decent amount of refactoring

* Minor edits to new_CIP_labeling (#6)

* Some changes for clarity

Added some documentation and changed some variable names to match
my understanding. Also a ran clang-tidy to ensure that all blocks
were brace-enclosed.

* Return a reference instead of a copy for performance

This is called many times and showed up after some light
profiling. This change bumped throughput by about 20%

* move out of Graphmol

* move .hpp headers to .h

* update documentation; add label set of atoms test

* Address comments:
    - Added references to centres to CIPLabeler.h and Python Wrap.
    - Update validation test to skip sanitization.
    - Document mancude fractional atomic number calculation.
    - Use unittest assertions in python test.
    - Update mancude docstrings to 'resonance' instad of 'tautomers'.
    - Rename prioritise() to prioritize().
    - Add postcondition to check carriers size in Tetrahedral.cpp.
    - Use getNeighbors() in Tetrahedral.cpp.
    - Move findStereoAtoms to Chirality namespace.
    - Move code back into GraphMol.
    - Fix typos and reformat doc.

* More comments:
    - Mention why we use boost's unordered map rather than the std one.
    - Fix include in Python wrapper.

* Addressed second batch of comments:
    - fix the bug in rule 4b
    - fix docstring for rule 2
    - move atomic mass calculation from rule 2 to node
    - addressed some build warnings
    - simplify sp2bond::label(comp)
    - add start/end atoms to Sp2Bond constructor
    - update system/local includes

Co-authored-by: Dan N <dan.nealschneider@schrodinger.com>
2020-07-07 20:34:33 +02:00

155 lines
4.1 KiB
C++

//
//
// Copyright (C) 2020 Schrödinger, LLC
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <vector>
#include "Digraph.h"
#include "Edge.h"
#include "Node.h"
#include "CIPMol.h"
namespace RDKit {
namespace CIPLabeler {
Node *Node::newTerminalChild(int idx, Atom *atom, int flags) const {
int new_dist = flags & DUPLICATE ? d_visit[idx] : d_dist + 1;
std::vector<char> new_visit;
if (flags & BOND_DUPLICATE) {
auto frac = dp_g->getMol().getFractionalAtomicNum(dp_atom);
if (frac.denominator() > 1) {
return &dp_g->addNode(std::move(new_visit), atom, std::move(frac),
new_dist, flags);
}
}
auto atomic_num = atom ? atom->getAtomicNum() : 1;
return &dp_g->addNode(std::move(new_visit), atom, atomic_num, new_dist,
flags);
}
Node::Node(Digraph *g, std::vector<char> &&visit, Atom *atom,
boost::rational<int> &&frac, int dist, int flags)
: dp_g{g}, dp_atom{atom}, d_dist{dist}, d_atomic_num{std::move(frac)},
d_flags{flags}, d_visit{std::move(visit)} {
if (d_flags & DUPLICATE) {
d_edges.reserve(4);
d_atomic_mass = 0.;
} else {
const auto &table = RDKit::PeriodicTable::getTable();
auto atomic_number = getAtomicNum();
auto isotope = getMassNum();
if (isotope == 0u) {
d_atomic_mass = table->getAtomicWeight(atomic_number);
} else {
d_atomic_mass = table->getMassForIsotope(atomic_number, isotope);
}
}
if (d_visit.empty() || d_flags & DUPLICATE) {
d_flags |= EXPANDED;
}
}
Digraph *Node::getDigraph() const { return dp_g; }
Atom *Node::getAtom() const { return dp_atom; }
int Node::getDistance() const { return d_dist; }
boost::rational<int> Node::getAtomicNumFraction() const { return d_atomic_num; }
int Node::getAtomicNum() const {
if (dp_atom == nullptr) {
return 1;
}
return dp_atom->getAtomicNum();
};
unsigned Node::getMassNum() const {
if (dp_atom == nullptr || isDuplicate()) {
return 0u;
}
return dp_atom->getIsotope();
}
double Node::getAtomicMass() const { return d_atomic_mass; }
Descriptor Node::getAux() const { return d_aux; }
bool Node::isSet(int mask) const { return mask & d_flags; }
bool Node::isDuplicate() const { return d_flags & DUPLICATE; }
bool Node::isTerminal() const {
return d_visit.empty() || (isExpanded() && d_edges.size() == 1);
}
bool Node::isExpanded() const { return d_flags & EXPANDED; }
bool Node::isVisited(int idx) const { return d_visit[idx] != 0; }
Node *Node::newChild(int idx, Atom *atom) const {
auto new_visit = d_visit;
new_visit[idx] = static_cast<char>(d_dist + 1);
auto atomic_num = atom ? atom->getAtomicNum() : 1;
return &dp_g->addNode(std::move(new_visit), atom, atomic_num, d_dist + 1, 0);
}
Node *Node::newBondDuplicateChild(int idx, Atom *atom) const {
return newTerminalChild(idx, atom, BOND_DUPLICATE);
}
Node *Node::newRingDuplicateChild(int idx, Atom *atom) const {
return newTerminalChild(idx, atom, RING_DUPLICATE);
}
Node *Node::newImplicitHydrogenChild() const {
return newTerminalChild(-1, nullptr, IMPL_HYDROGEN);
}
void Node::add(Edge *e) { d_edges.push_back(e); }
void Node::setAux(Descriptor desc) { d_aux = desc; }
const std::vector<Edge *> &Node::getEdges() const {
if (!isExpanded()) {
auto non_const_this = const_cast<Node *>(this);
non_const_this->d_flags |= EXPANDED;
dp_g->expand(non_const_this);
}
return d_edges;
}
std::vector<Edge *> Node::getEdges(Atom *end) const {
std::vector<Edge *> res;
for (auto &edge : getEdges()) {
if (edge->getEnd()->isDuplicate()) {
continue;
};
if (end == edge->getBeg()->getAtom() || end == edge->getEnd()->getAtom()) {
res.push_back(edge);
}
}
return res;
}
std::vector<Edge *> Node::getNonTerminalOutEdges() const {
std::vector<Edge *> edges;
for (auto &edge : getEdges()) {
if (edge->isBeg(this) && !edge->getEnd()->isTerminal()) {
edges.push_back(edge);
}
}
return edges;
}
} // namespace CIPLabeler
} // namespace RDKit