Files
rdkit/Code/GraphMol/CIPLabeler/rules/SequenceRule.cpp
Ric d54e77e375 Add new CIP labelling algorithm (#3234)
* add port of centres

* Several changes:
    - Added a test based on RDKit issue 2984
        (default RDKit fails it, this gets it right)
    - Use bond directions for bond stereo (label is no longer required)
    - Fix bugs in rules 4b and 5new
    - Fix some mem errors
    - clang-formatted
    - some other minor cleanups

* Several changes and some improvements:
    - Added LGPL license, as well as a mention in the doc.
    - Fix/update/add some comments
    - Fix typo/bug in Mancude calculation
    - Fix bug in rules 4b, 5New
    - Fix Sp2 Bond dir reference
    - Re clang-format
    - other minor changes suggested by Dan

* Another bunch of changes:
  - require integer-order bonds; kekulize when required
  - fix fraction comparison
  - rename sq Cis/Trans e/z
  - replace queues with vectors
  - update copyright notices
  - revert LGPL changes
  - fix Asymmetric typo

* move to separate lib/mod, add python validation test

* Moving away from the original implementation:
    - Rename to CIPLabeler
    - Remove the abstraction layer
    - Remove some stats stuff
    - Push some CIPMol functions down to Node
    - Use RDKit's isotope info

* Another bundle of changes. The most relevant ones:
    - fix parity translation
    - use cis trans as bond reference -- breaks #2984 test
    - kill a lot of unused code
    - use lists for queues
    - store nodes and edges in digraph
    - add prefixes to class data member names
    - update changeRoot() test
    - use fastFindRings() for mancude rings
    - update docs
    - add references to the scientific paper
    - Document the Mancude functions
    - Fix Mancude atom types and their comments
    - remove mol data member from SequenceRule
    - replace Fraction with boost::rational
    - update comments, docstrings and the doc

* fix building the test

* Changes here include:
    - adding bitset overload for the labeling function
    - python wrap of the overload
    - handling trigonal pyramids with implicit H
    - setting bond labels sets stereo atoms, cis/trans
    - nix LEFT/RIGHT/TOGETHER/OPPOSITE constants
    - don't use GLOB in cmake
    - a decent amount of refactoring

* Minor edits to new_CIP_labeling (#6)

* Some changes for clarity

Added some documentation and changed some variable names to match
my understanding. Also a ran clang-tidy to ensure that all blocks
were brace-enclosed.

* Return a reference instead of a copy for performance

This is called many times and showed up after some light
profiling. This change bumped throughput by about 20%

* move out of Graphmol

* move .hpp headers to .h

* update documentation; add label set of atoms test

* Address comments:
    - Added references to centres to CIPLabeler.h and Python Wrap.
    - Update validation test to skip sanitization.
    - Document mancude fractional atomic number calculation.
    - Use unittest assertions in python test.
    - Update mancude docstrings to 'resonance' instad of 'tautomers'.
    - Rename prioritise() to prioritize().
    - Add postcondition to check carriers size in Tetrahedral.cpp.
    - Use getNeighbors() in Tetrahedral.cpp.
    - Move findStereoAtoms to Chirality namespace.
    - Move code back into GraphMol.
    - Fix typos and reformat doc.

* More comments:
    - Mention why we use boost's unordered map rather than the std one.
    - Fix include in Python wrapper.

* Addressed second batch of comments:
    - fix the bug in rule 4b
    - fix docstring for rule 2
    - move atomic mass calculation from rule 2 to node
    - addressed some build warnings
    - simplify sp2bond::label(comp)
    - add start/end atoms to Sp2Bond constructor
    - update system/local includes

Co-authored-by: Dan N <dan.nealschneider@schrodinger.com>
2020-07-07 20:34:33 +02:00

169 lines
4.2 KiB
C++

//
//
// Copyright (C) 2020 Schrödinger, LLC
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include "SequenceRule.h"
#include "../CIPMol.h"
namespace RDKit {
namespace CIPLabeler {
SequenceRule::SequenceRule() = default;
SequenceRule::~SequenceRule() = default;
Descriptor SequenceRule::getBondLabel(const Edge *edge) const {
Bond *bond = edge->getBond();
if (bond == nullptr) {
return Descriptor::NONE;
}
Descriptor label = edge->getAux();
if (label != Descriptor::NONE) {
return label;
}
return label;
}
int SequenceRule::getComparision(const Edge *a, const Edge *b) const {
return getComparision(a, b, true);
}
int SequenceRule::getComparision(const Edge *a, const Edge *b,
bool deep) const {
return deep ? recursiveCompare(a, b) : compare(a, b);
}
const Sort *SequenceRule::getSorter() const {
if (dp_sorter == nullptr) {
const_cast<SequenceRule *>(this)->setSorter(new Sort(this));
}
return dp_sorter.get();
}
int SequenceRule::recursiveCompare(const Edge *a, const Edge *b) const {
// pseudo atoms (atomic no. 0) match all
if (a->getEnd()->getAtomicNum() == 0 || b->getEnd()->getAtomicNum() == 0) {
return 0;
}
int cmp = compare(a, b);
if (cmp != 0) {
return cmp;
}
auto aQueue = std::vector<const Edge *>({a});
auto bQueue = std::vector<const Edge *>({b});
for (auto pos = 0u; pos < aQueue.size() && pos < bQueue.size(); ++pos) {
a = aQueue[pos];
b = bQueue[pos];
auto as = a->getEnd()->getEdges();
auto bs = b->getEnd()->getEdges();
// shallow sort first of all
sort(a->getEnd(), as, false);
sort(b->getEnd(), bs, false);
int sizediff = three_way_comparison(static_cast<int>(as.size()),
static_cast<int>(bs.size()));
{
auto aIt = as.begin();
auto bIt = bs.begin();
for (; aIt != as.end() && bIt != bs.end(); ++aIt, ++bIt) {
Node *aNode = a->getEnd();
Node *bNode = b->getEnd();
Edge *aEdge = *aIt;
Edge *bEdge = *bIt;
if (areUpEdges(aNode, bNode, aEdge, bEdge)) {
continue;
}
// pseudo atoms (atomic no. 0) match all
if (aEdge->getEnd()->getAtomicNum() == 0 ||
bEdge->getEnd()->getAtomicNum() == 0) {
return 0;
}
cmp = compare(aEdge, bEdge);
if (cmp != 0) {
return cmp;
}
}
}
if (sizediff != 0) {
return sizediff;
}
sort(a->getEnd(), as);
sort(b->getEnd(), bs);
{
auto aIt = as.begin();
auto bIt = bs.begin();
for (; aIt != as.end() && bIt != bs.end(); ++aIt, ++bIt) {
Node *aNode = a->getEnd();
Node *bNode = b->getEnd();
Edge *aEdge = *aIt;
Edge *bEdge = *bIt;
if (areUpEdges(aNode, bNode, aEdge, bEdge)) {
continue;
}
// pseudo atoms (atomic no. 0) match all
if (aEdge->getEnd()->getAtomicNum() == 0 ||
bEdge->getEnd()->getAtomicNum() == 0) {
return 0;
}
cmp = compare(aEdge, bEdge);
if (cmp != 0) {
return cmp;
}
aQueue.push_back(aEdge);
bQueue.push_back(bEdge);
}
}
}
return 0;
}
void SequenceRule::setSorter(const Sort *sorter) { dp_sorter.reset(sorter); }
Priority SequenceRule::sort(const Node *node, std::vector<Edge *> &edges,
bool deep) const {
return getSorter()->prioritize(node, edges, deep);
}
Priority SequenceRule::sort(const Node *node,
std::vector<Edge *> &edges) const {
return sort(node, edges, true);
}
bool SequenceRule::areUpEdges(Node *aNode, Node *bNode, Edge *aEdge,
Edge *bEdge) const {
// step over 'up' edges
if (aEdge->isEnd(aNode)) {
// if b is 'down' something's not right!
if (!bEdge->isEnd(bNode)) {
throw std::runtime_error("Something unexpected!");
}
return true;
}
return false;
}
} // namespace CIPLabeler
} // namespace RDKit