mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
* add port of centres
* Several changes:
- Added a test based on RDKit issue 2984
(default RDKit fails it, this gets it right)
- Use bond directions for bond stereo (label is no longer required)
- Fix bugs in rules 4b and 5new
- Fix some mem errors
- clang-formatted
- some other minor cleanups
* Several changes and some improvements:
- Added LGPL license, as well as a mention in the doc.
- Fix/update/add some comments
- Fix typo/bug in Mancude calculation
- Fix bug in rules 4b, 5New
- Fix Sp2 Bond dir reference
- Re clang-format
- other minor changes suggested by Dan
* Another bunch of changes:
- require integer-order bonds; kekulize when required
- fix fraction comparison
- rename sq Cis/Trans e/z
- replace queues with vectors
- update copyright notices
- revert LGPL changes
- fix Asymmetric typo
* move to separate lib/mod, add python validation test
* Moving away from the original implementation:
- Rename to CIPLabeler
- Remove the abstraction layer
- Remove some stats stuff
- Push some CIPMol functions down to Node
- Use RDKit's isotope info
* Another bundle of changes. The most relevant ones:
- fix parity translation
- use cis trans as bond reference -- breaks #2984 test
- kill a lot of unused code
- use lists for queues
- store nodes and edges in digraph
- add prefixes to class data member names
- update changeRoot() test
- use fastFindRings() for mancude rings
- update docs
- add references to the scientific paper
- Document the Mancude functions
- Fix Mancude atom types and their comments
- remove mol data member from SequenceRule
- replace Fraction with boost::rational
- update comments, docstrings and the doc
* fix building the test
* Changes here include:
- adding bitset overload for the labeling function
- python wrap of the overload
- handling trigonal pyramids with implicit H
- setting bond labels sets stereo atoms, cis/trans
- nix LEFT/RIGHT/TOGETHER/OPPOSITE constants
- don't use GLOB in cmake
- a decent amount of refactoring
* Minor edits to new_CIP_labeling (#6)
* Some changes for clarity
Added some documentation and changed some variable names to match
my understanding. Also a ran clang-tidy to ensure that all blocks
were brace-enclosed.
* Return a reference instead of a copy for performance
This is called many times and showed up after some light
profiling. This change bumped throughput by about 20%
* move out of Graphmol
* move .hpp headers to .h
* update documentation; add label set of atoms test
* Address comments:
- Added references to centres to CIPLabeler.h and Python Wrap.
- Update validation test to skip sanitization.
- Document mancude fractional atomic number calculation.
- Use unittest assertions in python test.
- Update mancude docstrings to 'resonance' instad of 'tautomers'.
- Rename prioritise() to prioritize().
- Add postcondition to check carriers size in Tetrahedral.cpp.
- Use getNeighbors() in Tetrahedral.cpp.
- Move findStereoAtoms to Chirality namespace.
- Move code back into GraphMol.
- Fix typos and reformat doc.
* More comments:
- Mention why we use boost's unordered map rather than the std one.
- Fix include in Python wrapper.
* Addressed second batch of comments:
- fix the bug in rule 4b
- fix docstring for rule 2
- move atomic mass calculation from rule 2 to node
- addressed some build warnings
- simplify sp2bond::label(comp)
- add start/end atoms to Sp2Bond constructor
- update system/local includes
Co-authored-by: Dan N <dan.nealschneider@schrodinger.com>
186 lines
5.0 KiB
C++
186 lines
5.0 KiB
C++
//
|
|
//
|
|
// Copyright (C) 2020 Schrödinger, LLC
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include <algorithm>
|
|
#include <memory>
|
|
|
|
#include <GraphMol/RDKitBase.h>
|
|
|
|
#include "CIPLabeler.h"
|
|
#include "CIPMol.h"
|
|
#include "configs/Sp2Bond.h"
|
|
#include "configs/Tetrahedral.h"
|
|
|
|
#include "rules/Rules.h"
|
|
#include "rules/Rule1a.h"
|
|
#include "rules/Rule1b.h"
|
|
#include "rules/Rule2.h"
|
|
#include "rules/Rule3.h"
|
|
#include "rules/Rule4a.h"
|
|
#include "rules/Rule4b.h"
|
|
#include "rules/Rule4c.h"
|
|
#include "rules/Rule5New.h"
|
|
#include "rules/Rule6.h"
|
|
|
|
namespace RDKit {
|
|
namespace CIPLabeler {
|
|
|
|
namespace {
|
|
|
|
// constitutional rules
|
|
const Rules constitutional_rules({new Rule1a, new Rule1b, new Rule2});
|
|
|
|
// all rules (require aux calc)
|
|
const Rules all_rules({new Rule1a, new Rule1b, new Rule2, new Rule3, new Rule4a,
|
|
new Rule4b, new Rule4c, new Rule5New, new Rule6});
|
|
|
|
std::vector<std::unique_ptr<Configuration>>
|
|
findConfigs(CIPMol &mol, const boost::dynamic_bitset<> &atoms,
|
|
const boost::dynamic_bitset<> &bonds) {
|
|
std::vector<std::unique_ptr<Configuration>> configs;
|
|
|
|
for (auto index = atoms.find_first(); index != boost::dynamic_bitset<>::npos;
|
|
index = atoms.find_next(index)) {
|
|
auto atom = mol.getAtom(index);
|
|
auto chiraltag = atom->getChiralTag();
|
|
if (chiraltag == Atom::CHI_TETRAHEDRAL_CW ||
|
|
chiraltag == Atom::CHI_TETRAHEDRAL_CCW) {
|
|
|
|
std::unique_ptr<Tetrahedral> cfg{new Tetrahedral(mol, atom)};
|
|
configs.push_back(std::move(cfg));
|
|
}
|
|
}
|
|
|
|
for (auto index = bonds.find_first(); index != boost::dynamic_bitset<>::npos;
|
|
index = bonds.find_next(index)) {
|
|
auto bond = mol.getBond(index);
|
|
|
|
auto bond_cfg = Bond::STEREONONE;
|
|
switch (bond->getStereo()) {
|
|
case Bond::STEREOE:
|
|
case Bond::STEREOTRANS:
|
|
bond_cfg = Bond::STEREOTRANS;
|
|
break;
|
|
case Bond::STEREOZ:
|
|
case Bond::STEREOCIS:
|
|
bond_cfg = Bond::STEREOCIS;
|
|
break;
|
|
default:
|
|
continue;
|
|
}
|
|
|
|
std::unique_ptr<Sp2Bond> cfg(new Sp2Bond(mol, bond, bond->getBeginAtom(),
|
|
bond->getEndAtom(), bond_cfg));
|
|
configs.push_back(std::move(cfg));
|
|
}
|
|
|
|
return configs;
|
|
}
|
|
|
|
bool labelAux(std::vector<std::unique_ptr<Configuration>> &configs,
|
|
const Rules &rules,
|
|
const std::unique_ptr<Configuration> ¢er) {
|
|
using Node_Cfg_Pair = std::pair<Node *, Configuration *>;
|
|
std::vector<Node_Cfg_Pair> aux;
|
|
|
|
auto &digraph = center->getDigraph();
|
|
for (const auto &config : configs) {
|
|
if (config == center) {
|
|
continue;
|
|
}
|
|
// FIXME: specific to each descriptor
|
|
const auto &foci = config->getFoci();
|
|
for (const auto &node : digraph.getNodes(foci[0])) {
|
|
if (node->isDuplicate()) {
|
|
continue;
|
|
}
|
|
auto low = node;
|
|
if (foci.size() == 2) {
|
|
for (const auto &edge : node->getEdges(foci[1])) {
|
|
const auto &other_node = edge->getOther(node);
|
|
if (other_node->getDistance() < node->getDistance())
|
|
low = other_node;
|
|
}
|
|
}
|
|
if (!low->isDuplicate()) {
|
|
aux.emplace_back(low, config.get());
|
|
}
|
|
}
|
|
}
|
|
|
|
auto farthest = [](const Node_Cfg_Pair &a, const Node_Cfg_Pair &b) {
|
|
return a.first->getDistance() > b.first->getDistance();
|
|
};
|
|
std::sort(aux.begin(), aux.end(), farthest);
|
|
|
|
// Using a boost::unordered_map because it is more performant
|
|
// than the STL version.
|
|
boost::unordered_map<Node *, Descriptor> queue;
|
|
int prev = std::numeric_limits<int>::max();
|
|
for (const auto &e : aux) {
|
|
const auto &node = e.first;
|
|
|
|
if (node->getDistance() < prev) {
|
|
for (const auto &e2 : queue) {
|
|
e2.first->setAux(e2.second);
|
|
}
|
|
queue.clear();
|
|
prev = node->getDistance();
|
|
}
|
|
const auto &config = e.second;
|
|
auto label = config->label(node, digraph, rules);
|
|
queue.emplace(node, label);
|
|
}
|
|
|
|
for (const auto &e : queue) {
|
|
e.first->setAux(e.second);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void label(std::vector<std::unique_ptr<Configuration>> &configs) {
|
|
|
|
for (const auto &conf : configs) {
|
|
auto desc = conf->label(constitutional_rules);
|
|
if (desc != Descriptor::UNKNOWN) {
|
|
conf->setPrimaryLabel(desc);
|
|
} else {
|
|
if (labelAux(configs, all_rules, conf)) {
|
|
desc = conf->label(all_rules);
|
|
|
|
if (desc != Descriptor::UNKNOWN) {
|
|
conf->setPrimaryLabel(desc);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
|
|
void assignCIPLabels(ROMol &mol, const boost::dynamic_bitset<> &atoms,
|
|
const boost::dynamic_bitset<> &bonds) {
|
|
CIPMol cipmol{mol};
|
|
auto configs = findConfigs(cipmol, atoms, bonds);
|
|
label(configs);
|
|
}
|
|
|
|
void assignCIPLabels(ROMol &mol) {
|
|
boost::dynamic_bitset<> atoms(mol.getNumAtoms());
|
|
boost::dynamic_bitset<> bonds(mol.getNumBonds());
|
|
atoms.set();
|
|
bonds.set();
|
|
assignCIPLabels(mol, atoms, bonds);
|
|
}
|
|
|
|
} // namespace CIPLabeler
|
|
} // namespace RDKit
|