Files
rdkit/Code/GraphMol/CIPLabeler/CIPLabeler.cpp
Ric d54e77e375 Add new CIP labelling algorithm (#3234)
* add port of centres

* Several changes:
    - Added a test based on RDKit issue 2984
        (default RDKit fails it, this gets it right)
    - Use bond directions for bond stereo (label is no longer required)
    - Fix bugs in rules 4b and 5new
    - Fix some mem errors
    - clang-formatted
    - some other minor cleanups

* Several changes and some improvements:
    - Added LGPL license, as well as a mention in the doc.
    - Fix/update/add some comments
    - Fix typo/bug in Mancude calculation
    - Fix bug in rules 4b, 5New
    - Fix Sp2 Bond dir reference
    - Re clang-format
    - other minor changes suggested by Dan

* Another bunch of changes:
  - require integer-order bonds; kekulize when required
  - fix fraction comparison
  - rename sq Cis/Trans e/z
  - replace queues with vectors
  - update copyright notices
  - revert LGPL changes
  - fix Asymmetric typo

* move to separate lib/mod, add python validation test

* Moving away from the original implementation:
    - Rename to CIPLabeler
    - Remove the abstraction layer
    - Remove some stats stuff
    - Push some CIPMol functions down to Node
    - Use RDKit's isotope info

* Another bundle of changes. The most relevant ones:
    - fix parity translation
    - use cis trans as bond reference -- breaks #2984 test
    - kill a lot of unused code
    - use lists for queues
    - store nodes and edges in digraph
    - add prefixes to class data member names
    - update changeRoot() test
    - use fastFindRings() for mancude rings
    - update docs
    - add references to the scientific paper
    - Document the Mancude functions
    - Fix Mancude atom types and their comments
    - remove mol data member from SequenceRule
    - replace Fraction with boost::rational
    - update comments, docstrings and the doc

* fix building the test

* Changes here include:
    - adding bitset overload for the labeling function
    - python wrap of the overload
    - handling trigonal pyramids with implicit H
    - setting bond labels sets stereo atoms, cis/trans
    - nix LEFT/RIGHT/TOGETHER/OPPOSITE constants
    - don't use GLOB in cmake
    - a decent amount of refactoring

* Minor edits to new_CIP_labeling (#6)

* Some changes for clarity

Added some documentation and changed some variable names to match
my understanding. Also a ran clang-tidy to ensure that all blocks
were brace-enclosed.

* Return a reference instead of a copy for performance

This is called many times and showed up after some light
profiling. This change bumped throughput by about 20%

* move out of Graphmol

* move .hpp headers to .h

* update documentation; add label set of atoms test

* Address comments:
    - Added references to centres to CIPLabeler.h and Python Wrap.
    - Update validation test to skip sanitization.
    - Document mancude fractional atomic number calculation.
    - Use unittest assertions in python test.
    - Update mancude docstrings to 'resonance' instad of 'tautomers'.
    - Rename prioritise() to prioritize().
    - Add postcondition to check carriers size in Tetrahedral.cpp.
    - Use getNeighbors() in Tetrahedral.cpp.
    - Move findStereoAtoms to Chirality namespace.
    - Move code back into GraphMol.
    - Fix typos and reformat doc.

* More comments:
    - Mention why we use boost's unordered map rather than the std one.
    - Fix include in Python wrapper.

* Addressed second batch of comments:
    - fix the bug in rule 4b
    - fix docstring for rule 2
    - move atomic mass calculation from rule 2 to node
    - addressed some build warnings
    - simplify sp2bond::label(comp)
    - add start/end atoms to Sp2Bond constructor
    - update system/local includes

Co-authored-by: Dan N <dan.nealschneider@schrodinger.com>
2020-07-07 20:34:33 +02:00

186 lines
5.0 KiB
C++

//
//
// Copyright (C) 2020 Schrödinger, LLC
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <algorithm>
#include <memory>
#include <GraphMol/RDKitBase.h>
#include "CIPLabeler.h"
#include "CIPMol.h"
#include "configs/Sp2Bond.h"
#include "configs/Tetrahedral.h"
#include "rules/Rules.h"
#include "rules/Rule1a.h"
#include "rules/Rule1b.h"
#include "rules/Rule2.h"
#include "rules/Rule3.h"
#include "rules/Rule4a.h"
#include "rules/Rule4b.h"
#include "rules/Rule4c.h"
#include "rules/Rule5New.h"
#include "rules/Rule6.h"
namespace RDKit {
namespace CIPLabeler {
namespace {
// constitutional rules
const Rules constitutional_rules({new Rule1a, new Rule1b, new Rule2});
// all rules (require aux calc)
const Rules all_rules({new Rule1a, new Rule1b, new Rule2, new Rule3, new Rule4a,
new Rule4b, new Rule4c, new Rule5New, new Rule6});
std::vector<std::unique_ptr<Configuration>>
findConfigs(CIPMol &mol, const boost::dynamic_bitset<> &atoms,
const boost::dynamic_bitset<> &bonds) {
std::vector<std::unique_ptr<Configuration>> configs;
for (auto index = atoms.find_first(); index != boost::dynamic_bitset<>::npos;
index = atoms.find_next(index)) {
auto atom = mol.getAtom(index);
auto chiraltag = atom->getChiralTag();
if (chiraltag == Atom::CHI_TETRAHEDRAL_CW ||
chiraltag == Atom::CHI_TETRAHEDRAL_CCW) {
std::unique_ptr<Tetrahedral> cfg{new Tetrahedral(mol, atom)};
configs.push_back(std::move(cfg));
}
}
for (auto index = bonds.find_first(); index != boost::dynamic_bitset<>::npos;
index = bonds.find_next(index)) {
auto bond = mol.getBond(index);
auto bond_cfg = Bond::STEREONONE;
switch (bond->getStereo()) {
case Bond::STEREOE:
case Bond::STEREOTRANS:
bond_cfg = Bond::STEREOTRANS;
break;
case Bond::STEREOZ:
case Bond::STEREOCIS:
bond_cfg = Bond::STEREOCIS;
break;
default:
continue;
}
std::unique_ptr<Sp2Bond> cfg(new Sp2Bond(mol, bond, bond->getBeginAtom(),
bond->getEndAtom(), bond_cfg));
configs.push_back(std::move(cfg));
}
return configs;
}
bool labelAux(std::vector<std::unique_ptr<Configuration>> &configs,
const Rules &rules,
const std::unique_ptr<Configuration> &center) {
using Node_Cfg_Pair = std::pair<Node *, Configuration *>;
std::vector<Node_Cfg_Pair> aux;
auto &digraph = center->getDigraph();
for (const auto &config : configs) {
if (config == center) {
continue;
}
// FIXME: specific to each descriptor
const auto &foci = config->getFoci();
for (const auto &node : digraph.getNodes(foci[0])) {
if (node->isDuplicate()) {
continue;
}
auto low = node;
if (foci.size() == 2) {
for (const auto &edge : node->getEdges(foci[1])) {
const auto &other_node = edge->getOther(node);
if (other_node->getDistance() < node->getDistance())
low = other_node;
}
}
if (!low->isDuplicate()) {
aux.emplace_back(low, config.get());
}
}
}
auto farthest = [](const Node_Cfg_Pair &a, const Node_Cfg_Pair &b) {
return a.first->getDistance() > b.first->getDistance();
};
std::sort(aux.begin(), aux.end(), farthest);
// Using a boost::unordered_map because it is more performant
// than the STL version.
boost::unordered_map<Node *, Descriptor> queue;
int prev = std::numeric_limits<int>::max();
for (const auto &e : aux) {
const auto &node = e.first;
if (node->getDistance() < prev) {
for (const auto &e2 : queue) {
e2.first->setAux(e2.second);
}
queue.clear();
prev = node->getDistance();
}
const auto &config = e.second;
auto label = config->label(node, digraph, rules);
queue.emplace(node, label);
}
for (const auto &e : queue) {
e.first->setAux(e.second);
}
return true;
}
void label(std::vector<std::unique_ptr<Configuration>> &configs) {
for (const auto &conf : configs) {
auto desc = conf->label(constitutional_rules);
if (desc != Descriptor::UNKNOWN) {
conf->setPrimaryLabel(desc);
} else {
if (labelAux(configs, all_rules, conf)) {
desc = conf->label(all_rules);
if (desc != Descriptor::UNKNOWN) {
conf->setPrimaryLabel(desc);
}
}
}
}
}
} // namespace
void assignCIPLabels(ROMol &mol, const boost::dynamic_bitset<> &atoms,
const boost::dynamic_bitset<> &bonds) {
CIPMol cipmol{mol};
auto configs = findConfigs(cipmol, atoms, bonds);
label(configs);
}
void assignCIPLabels(ROMol &mol) {
boost::dynamic_bitset<> atoms(mol.getNumAtoms());
boost::dynamic_bitset<> bonds(mol.getNumBonds());
atoms.set();
bonds.set();
assignCIPLabels(mol, atoms, bonds);
}
} // namespace CIPLabeler
} // namespace RDKit