mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
* add new test (it fails, of course) * isAtomPotentialTetrahedralCenter() there and tested tests cases for molecular stereo written (but failing, of course) create new_chirality.cpp, we will probably want to change this at some point new StereoInfo structure * more infrastructure - isBondPotentialStereoBond() - two getStereoInfo() functions - associated unit tests * backup * oops * backup * switch to always using four atoms for bonds * backup * add new test (it fails, of course) * isAtomPotentialTetrahedralCenter() there and tested tests cases for molecular stereo written (but failing, of course) create new_chirality.cpp, we will probably want to change this at some point new StereoInfo structure * more infrastructure - isBondPotentialStereoBond() - two getStereoInfo() functions - associated unit tests * backup * oops * backup * switch to always using four atoms for bonds * backup * this now actually works * doc update * add a test to demo that ring stereo is not working * more testing * add a fun CIP test * add review note * debugging * remove extraneous debugging turn off tests for ring-double bond stereo * disable the ring-stereo fix... this breaks a few tests, but we will recover * works, needs cleanup, chirality code needs re-testing * nothing works * Fixes #3322 * Python and C++ tests now pass * clang-format * first pass at python wrappers * improve doctest * basic optimization... stop with the copying * rename * all tests passing again * optimization * fix the sort in the tests * looks like this might fix the windows-dll build problems * update tests * the fun never ends * comment cleanup * handle deliberately unspecified atoms/bonds * add cleanIt option * add flagPossible * add option to use the new code to the SMILES parser * additional testing * additional testing * a bit of additional testing never hurts * changes in response to review * fixes a bug with potential parastereo not being cleared other changes in response to review * update docs
470 lines
14 KiB
C++
470 lines
14 KiB
C++
//
|
|
// Copyright (C) 2020 Schrödinger, LLC
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
|
|
#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do
|
|
// this in one cpp file
|
|
|
|
#include <bitset>
|
|
#include <list>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "catch.hpp"
|
|
|
|
#include <GraphMol/MolOps.h>
|
|
#include <GraphMol/SmilesParse/SmilesParse.h>
|
|
|
|
#include "CIPLabeler.h"
|
|
#include "Digraph.h"
|
|
#include "rules/Pairlist.h"
|
|
#include "rules/Rule1a.h"
|
|
#include "rules/Rule2.h"
|
|
|
|
#include "CIPMol.h"
|
|
|
|
using namespace RDKit;
|
|
using namespace RDKit::CIPLabeler;
|
|
|
|
std::string toBinaryString(int value) {
|
|
return std::bitset<32>(value).to_string();
|
|
}
|
|
|
|
TEST_CASE("Descriptor lists", "[accurateCIP]") {
|
|
auto descriptors = PairList();
|
|
|
|
SECTION("IgnoreConstructionNulls") {
|
|
CHECK(!descriptors.add(Descriptor::NONE));
|
|
CHECK(!descriptors.add(Descriptor::UNKNOWN));
|
|
CHECK(!descriptors.add(Descriptor::ns));
|
|
}
|
|
SECTION("IgnoreConstructionPseudo") {
|
|
CHECK(!descriptors.add(Descriptor::r));
|
|
CHECK(!descriptors.add(Descriptor::s));
|
|
|
|
CHECK(descriptors.add(Descriptor::R));
|
|
CHECK(descriptors.add(Descriptor::S));
|
|
}
|
|
SECTION("Pairing") {
|
|
REQUIRE(descriptors.getPairing() == 0);
|
|
|
|
CHECK("00000000000000000000000000000000" ==
|
|
toBinaryString(descriptors.getPairing()));
|
|
|
|
descriptors.add(Descriptor::R);
|
|
CHECK("00000000000000000000000000000000" ==
|
|
toBinaryString(descriptors.getPairing()));
|
|
|
|
// like
|
|
descriptors.add(Descriptor::R);
|
|
CHECK("01000000000000000000000000000000" ==
|
|
toBinaryString(descriptors.getPairing()));
|
|
|
|
// like
|
|
descriptors.add(Descriptor::R);
|
|
CHECK("01100000000000000000000000000000" ==
|
|
toBinaryString(descriptors.getPairing()));
|
|
|
|
// unlike
|
|
descriptors.add(Descriptor::S);
|
|
CHECK("01100000000000000000000000000000" ==
|
|
toBinaryString(descriptors.getPairing()));
|
|
|
|
// like
|
|
descriptors.add(Descriptor::R);
|
|
CHECK("01101000000000000000000000000000" ==
|
|
toBinaryString(descriptors.getPairing()));
|
|
|
|
// like
|
|
descriptors.add(Descriptor::R);
|
|
CHECK("01101100000000000000000000000000" ==
|
|
toBinaryString(descriptors.getPairing()));
|
|
|
|
// like
|
|
descriptors.add(Descriptor::R);
|
|
CHECK("01101110000000000000000000000000" ==
|
|
toBinaryString(descriptors.getPairing()));
|
|
|
|
// unlike
|
|
descriptors.add(Descriptor::S);
|
|
CHECK("01101110000000000000000000000000" ==
|
|
toBinaryString(descriptors.getPairing()));
|
|
|
|
// like
|
|
descriptors.add(Descriptor::R);
|
|
CHECK("01101110100000000000000000000000" ==
|
|
toBinaryString(descriptors.getPairing()));
|
|
}
|
|
|
|
SECTION("pairRM") {
|
|
PairList list1 = PairList();
|
|
PairList list2 = PairList();
|
|
list1.add(Descriptor::R);
|
|
list1.add(Descriptor::M);
|
|
list1.add(Descriptor::R);
|
|
list1.add(Descriptor::S);
|
|
list2.add(Descriptor::R);
|
|
list2.add(Descriptor::P);
|
|
list2.add(Descriptor::S);
|
|
list2.add(Descriptor::M);
|
|
CHECK(list1.toString() == "R:llu");
|
|
CHECK(list2.toString() == "R:uul");
|
|
}
|
|
}
|
|
|
|
void check_incoming_edge_count(Node *root) {
|
|
auto queue = std::list<Node *>({root});
|
|
|
|
for (const auto &node : queue) {
|
|
int incoming_edges = 0;
|
|
for (const auto &e : node->getEdges()) {
|
|
if (!e->isBeg(node)) {
|
|
REQUIRE(e->isEnd(node));
|
|
++incoming_edges;
|
|
} else if (!e->getEnd()->isTerminal()) {
|
|
queue.push_back(e->getEnd());
|
|
}
|
|
}
|
|
|
|
// Only the current root should have no incoming nodes.
|
|
// All other nodes should have exactly one incoming edge.
|
|
if (node == root) {
|
|
CHECK(incoming_edges == 0);
|
|
} else {
|
|
CHECK(incoming_edges == 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
void expandAll(Digraph &g) {
|
|
auto queue = std::list<Node *>({g.getOriginalRoot()});
|
|
|
|
for (const auto &node : queue) {
|
|
for (const auto &e : node->getEdges()) {
|
|
if (!e->isBeg(node)) {
|
|
continue;
|
|
}
|
|
if (!e->getEnd()->isTerminal()) {
|
|
queue.push_back(e->getEnd());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_CASE("Digraph", "[accurateCIP]") {
|
|
auto mol =
|
|
R"(CC1(OC2=C(C=3NC[C@@]4(C3C=C2)C([C@@H]5C[C@@]67C(N([C@]5(C4)CN6CC[C@@]7(C)O)C)=O)(C)C)OC=C1)C)"_smiles; // VS040
|
|
CIPLabeler::CIPMol cipmol(*mol);
|
|
|
|
auto initial_root_idx = 1u;
|
|
auto initial_root_atom = cipmol.getAtom(initial_root_idx);
|
|
|
|
Digraph g(cipmol, initial_root_atom);
|
|
expandAll(g);
|
|
REQUIRE(g.getNumNodes() == 3819);
|
|
|
|
auto current_root = g.getCurrentRoot();
|
|
REQUIRE(current_root->getAtom()->getIdx() == initial_root_idx);
|
|
|
|
check_incoming_edge_count(current_root);
|
|
|
|
auto new_root_idx = 24u;
|
|
auto new_root_atom = cipmol.getAtom(new_root_idx);
|
|
auto new_root_nodes = g.getNodes(new_root_atom);
|
|
CHECK(new_root_nodes.size() == 104);
|
|
|
|
Node *new_root_node = *new_root_nodes.begin();
|
|
for (const auto &node : new_root_nodes) {
|
|
if (!node->isDuplicate() &&
|
|
node->getDistance() > new_root_node->getDistance()) {
|
|
new_root_node = node;
|
|
}
|
|
}
|
|
REQUIRE(new_root_node->getDistance() == 25);
|
|
|
|
g.changeRoot(new_root_node);
|
|
REQUIRE(g.getNumNodes() == 3819);
|
|
|
|
current_root = g.getCurrentRoot();
|
|
REQUIRE(current_root->getAtom()->getIdx() == new_root_idx);
|
|
|
|
check_incoming_edge_count(current_root);
|
|
}
|
|
|
|
TEST_CASE("Rule1a", "[accurateCIP]") {
|
|
SECTION("Compare equal") {
|
|
auto mol = "COC"_smiles;
|
|
CIPLabeler::CIPMol cipmol(*mol);
|
|
Digraph g(cipmol, cipmol.getAtom(1));
|
|
auto origin = g.getOriginalRoot();
|
|
|
|
auto frac = origin->getAtomicNumFraction();
|
|
REQUIRE(frac.numerator() == 8);
|
|
REQUIRE(frac.denominator() == 1);
|
|
|
|
Rule1a rule;
|
|
|
|
auto edges = origin->getEdges();
|
|
REQUIRE(edges.size() == 2);
|
|
REQUIRE(edges[0] != edges[1]);
|
|
|
|
CHECK(0 == rule.compare(edges[0], edges[1]));
|
|
|
|
CHECK(!rule.getSorter()->prioritize(origin, edges).isUnique());
|
|
}
|
|
|
|
SECTION("Compare different") {
|
|
auto mol = "CON"_smiles;
|
|
CIPLabeler::CIPMol cipmol(*mol);
|
|
Digraph g(cipmol, cipmol.getAtom(1));
|
|
auto origin = g.getOriginalRoot();
|
|
|
|
auto frac = origin->getAtomicNumFraction();
|
|
REQUIRE(frac.numerator() == 8);
|
|
REQUIRE(frac.denominator() == 1);
|
|
|
|
Rule1a rule;
|
|
|
|
auto edges = origin->getEdges();
|
|
REQUIRE(edges.size() == 2);
|
|
REQUIRE(edges[0] != edges[1]);
|
|
|
|
auto frac0 = edges[0]->getEnd()->getAtomicNumFraction();
|
|
REQUIRE(frac0.numerator() == 6);
|
|
REQUIRE(frac0.denominator() == 1);
|
|
|
|
auto frac1 = edges[1]->getEnd()->getAtomicNumFraction();
|
|
REQUIRE(frac1.numerator() == 7);
|
|
REQUIRE(frac1.denominator() == 1);
|
|
|
|
CHECK(rule.compare(edges[0], edges[1]) < 0);
|
|
CHECK(rule.compare(edges[1], edges[0]) > 0);
|
|
|
|
CHECK(rule.getSorter()->prioritize(origin, edges).isUnique());
|
|
}
|
|
}
|
|
|
|
TEST_CASE("Rule2", "[accurateCIP]") {
|
|
SECTION("Compare equal") {
|
|
auto mol = "COC"_smiles;
|
|
CIPLabeler::CIPMol cipmol(*mol);
|
|
Digraph g(cipmol, cipmol.getAtom(1));
|
|
auto origin = g.getOriginalRoot();
|
|
|
|
auto frac = origin->getAtomicNumFraction();
|
|
REQUIRE(frac.numerator() == 8);
|
|
REQUIRE(frac.denominator() == 1);
|
|
|
|
Rule2 rule;
|
|
|
|
auto edges = origin->getEdges();
|
|
REQUIRE(edges.size() == 2);
|
|
REQUIRE(edges[0] != edges[1]);
|
|
|
|
CHECK(0 == rule.compare(edges[0], edges[1]));
|
|
|
|
CHECK(!rule.getSorter()->prioritize(origin, edges).isUnique());
|
|
}
|
|
|
|
SECTION("Compare different: Zero Mass Num") {
|
|
auto mol = "CO[13C]"_smiles;
|
|
CIPLabeler::CIPMol cipmol(*mol);
|
|
Digraph g(cipmol, cipmol.getAtom(1));
|
|
auto origin = g.getOriginalRoot();
|
|
|
|
auto frac = origin->getAtomicNumFraction();
|
|
REQUIRE(frac.numerator() == 8);
|
|
REQUIRE(frac.denominator() == 1);
|
|
|
|
Rule2 rule;
|
|
|
|
auto edges = origin->getEdges();
|
|
REQUIRE(edges.size() == 2);
|
|
REQUIRE(edges[0] != edges[1]);
|
|
|
|
REQUIRE(edges[0]->getEnd()->getMassNum() == 0);
|
|
REQUIRE(edges[1]->getEnd()->getMassNum() == 13);
|
|
|
|
CHECK(rule.compare(edges[0], edges[1]) < 0);
|
|
CHECK(rule.compare(edges[1], edges[0]) > 0);
|
|
|
|
CHECK(rule.getSorter()->prioritize(origin, edges).isUnique());
|
|
}
|
|
|
|
SECTION("Compare different: Non-Zero Mass Num") {
|
|
auto mol = "[13C]O[14C]"_smiles;
|
|
CIPLabeler::CIPMol cipmol(*mol);
|
|
Digraph g(cipmol, cipmol.getAtom(1));
|
|
auto origin = g.getOriginalRoot();
|
|
|
|
auto frac = origin->getAtomicNumFraction();
|
|
REQUIRE(frac.numerator() == 8);
|
|
REQUIRE(frac.denominator() == 1);
|
|
|
|
Rule2 rule;
|
|
|
|
auto edges = origin->getEdges();
|
|
REQUIRE(edges.size() == 2);
|
|
REQUIRE(edges[0] != edges[1]);
|
|
|
|
REQUIRE(edges[0]->getEnd()->getMassNum() == 13);
|
|
REQUIRE(edges[1]->getEnd()->getMassNum() == 14);
|
|
|
|
CHECK(rule.compare(edges[0], edges[1]) < 0);
|
|
CHECK(rule.compare(edges[1], edges[0]) > 0);
|
|
|
|
CHECK(rule.getSorter()->prioritize(origin, edges).isUnique());
|
|
}
|
|
}
|
|
|
|
TEST_CASE("Tetrahedral assignment", "[accurateCIP]") {
|
|
auto mol = "Br[C@H](Cl)F"_smiles;
|
|
REQUIRE(mol->getNumAtoms() == 4);
|
|
|
|
auto chiral_atom = mol->getAtomWithIdx(1);
|
|
chiral_atom->clearProp(common_properties::_CIPCode);
|
|
REQUIRE(chiral_atom->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW);
|
|
|
|
CIPLabeler::assignCIPLabels(*mol);
|
|
|
|
std::string chirality;
|
|
CHECK(chiral_atom->getPropIfPresent(common_properties::_CIPCode, chirality) ==
|
|
true);
|
|
CHECK(chirality == "S");
|
|
}
|
|
|
|
TEST_CASE("Double bond stereo assignment", "[accurateCIP]") {
|
|
auto mol = R"(CC\C(\C(\C)=N\O)=N\O)"_smiles; // VS013
|
|
REQUIRE(mol->getNumAtoms() == 9);
|
|
|
|
auto bond_1 = mol->getBondWithIdx(4);
|
|
auto bond_2 = mol->getBondWithIdx(6);
|
|
REQUIRE(bond_1->getBondType() == Bond::DOUBLE);
|
|
REQUIRE(bond_2->getBondType() == Bond::DOUBLE);
|
|
REQUIRE(bond_1->getStereo() == Bond::STEREOE);
|
|
REQUIRE(bond_2->getStereo() == Bond::STEREOZ);
|
|
|
|
CIPLabeler::assignCIPLabels(*mol);
|
|
|
|
std::string chirality;
|
|
CHECK(bond_1->getPropIfPresent(common_properties::_CIPCode, chirality) ==
|
|
true);
|
|
CHECK(chirality == "E");
|
|
|
|
CHECK(bond_2->getPropIfPresent(common_properties::_CIPCode, chirality) ==
|
|
true);
|
|
CHECK(chirality == "Z");
|
|
}
|
|
|
|
TEST_CASE("phosphine and arsine chirality", "[accurateCIP]") {
|
|
const std::vector<std::pair<std::string, std::string>> mols{
|
|
{"C[P@](C1CCCC1)C1=CC=CC=C1", "R"},
|
|
{"C[As@@](C1CCCC1)C1=CC=CC=C1", "S"},
|
|
{"C[P@H]C1CCCCC1", "R"},
|
|
{"C[P@@H]C1CCCCC1", "S"}};
|
|
|
|
for (const auto &ref : mols) {
|
|
std::unique_ptr<RWMol> mol{SmilesToMol(ref.first)};
|
|
REQUIRE(mol);
|
|
CIPLabeler::assignCIPLabels(*mol);
|
|
|
|
std::string chirality;
|
|
CHECK(mol->getAtomWithIdx(1)->getPropIfPresent(common_properties::_CIPCode,
|
|
chirality) == true);
|
|
CHECK(chirality == ref.second);
|
|
}
|
|
}
|
|
|
|
TEST_CASE("assign specific atoms and bonds", "[accurateCIP]") {
|
|
SECTION("Assign atoms") {
|
|
auto mol = "C[C@H](Cl)CC[C@H](Cl)C"_smiles;
|
|
REQUIRE(mol);
|
|
|
|
auto atom1 = mol->getAtomWithIdx(1);
|
|
auto atom5 = mol->getAtomWithIdx(5);
|
|
|
|
REQUIRE(atom1->hasProp(common_properties::_CIPCode) == true);
|
|
REQUIRE(atom5->hasProp(common_properties::_CIPCode) == true);
|
|
|
|
atom1->clearProp(common_properties::_CIPCode);
|
|
atom5->clearProp(common_properties::_CIPCode);
|
|
|
|
boost::dynamic_bitset<> atoms(mol->getNumAtoms());
|
|
boost::dynamic_bitset<> bonds;
|
|
atoms.set(1);
|
|
CIPLabeler::assignCIPLabels(*mol, atoms, bonds);
|
|
|
|
std::string chirality;
|
|
CHECK(atom1->getPropIfPresent(common_properties::_CIPCode, chirality) ==
|
|
true);
|
|
CHECK(chirality == "S");
|
|
CHECK(atom5->hasProp(common_properties::_CIPCode) == false);
|
|
}
|
|
SECTION("Assign bonds") {
|
|
auto mol = R"(C\C=C\C=C/C)"_smiles;
|
|
REQUIRE(mol);
|
|
|
|
auto bond1 = mol->getBondWithIdx(1);
|
|
auto bond3 = mol->getBondWithIdx(3);
|
|
|
|
REQUIRE(bond1->getBondType() == Bond::DOUBLE);
|
|
REQUIRE(bond3->getBondType() == Bond::DOUBLE);
|
|
|
|
REQUIRE(bond1->hasProp(common_properties::_CIPCode) == false);
|
|
REQUIRE(bond3->hasProp(common_properties::_CIPCode) == false);
|
|
|
|
boost::dynamic_bitset<> atoms;
|
|
boost::dynamic_bitset<> bonds(mol->getNumBonds());
|
|
bonds.set(3);
|
|
CIPLabeler::assignCIPLabels(*mol, atoms, bonds);
|
|
|
|
std::string stereo;
|
|
CHECK(bond1->hasProp(common_properties::_CIPCode) == false);
|
|
CHECK(bond3->getPropIfPresent(common_properties::_CIPCode, stereo) == true);
|
|
CHECK(stereo == "Z");
|
|
}
|
|
}
|
|
|
|
TEST_CASE("para-stereochemistry", "[accurateCIP]") {
|
|
SECTION("example 1") {
|
|
// slightly simplified complex example from Salome Rieder
|
|
auto mol = "C\\C=C/[C@@H](\\C=C\\O)[C@H](C)[C@H](\\C=C/C)\\C=C\\O"_smiles;
|
|
REQUIRE(mol);
|
|
CIPLabeler::assignCIPLabels(*mol);
|
|
|
|
std::string chirality;
|
|
CHECK(mol->getAtomWithIdx(3)->getPropIfPresent(common_properties::_CIPCode,
|
|
chirality) == true);
|
|
CHECK(chirality == "R");
|
|
CHECK(mol->getAtomWithIdx(7)->getPropIfPresent(common_properties::_CIPCode,
|
|
chirality) == true);
|
|
CHECK(chirality == "r");
|
|
CHECK(mol->getAtomWithIdx(9)->getPropIfPresent(common_properties::_CIPCode,
|
|
chirality) == true);
|
|
CHECK(chirality == "S");
|
|
}
|
|
SECTION("example 2") {
|
|
// lovely complex example from Salome Rieder
|
|
auto mol = "C\\C=C/[C@@H](\\C=C\\C)[C@H](C)[C@H](\\C=C/C)\\C=C\\C"_smiles;
|
|
REQUIRE(mol);
|
|
CIPLabeler::assignCIPLabels(*mol);
|
|
|
|
std::string chirality;
|
|
CHECK(mol->getAtomWithIdx(3)->getPropIfPresent(common_properties::_CIPCode,
|
|
chirality) == true);
|
|
CHECK(chirality == "R");
|
|
CHECK(mol->getAtomWithIdx(7)->getPropIfPresent(common_properties::_CIPCode,
|
|
chirality) == true);
|
|
CHECK(chirality == "r");
|
|
CHECK(mol->getAtomWithIdx(9)->getPropIfPresent(common_properties::_CIPCode,
|
|
chirality) == true);
|
|
CHECK(chirality == "S");
|
|
}
|
|
}
|