mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
* CIP labeller: Don't calculate auxiliary descriptors unnecessarily The first 3 rules (the constitutional rules) are pretty easy to understand. After rule 3, we need to calculate auxiliary stereo descriptors to break ties. However, we _were actually_ calculating auxiliary stereodescriptors for all centers! We should only need to calculate auxiliary stereocenters for sites that are needed to break ties. This cost time - it also caused errors if the auxiliary descriptors needed a graph expansion, because bonds in the digraph might be pointed in the wrong direction. Example case PDB ID 4AXM Before this commit, errored with "Could not calculate parity! Carrier mismatch" after 14s. After this commit, completes successfully in 0.036s. Labelled centers all match (for the centers that had labels in the failure case). Includes a test that I can imagine breaking with this optimization. The reference labels are from before this change * Ensure all "arms" of stereo bonds and atropisomer bonds are expanded For tetrahedral centers, ranking using the constitutional rules always expands as far as is needed (but no further). For SP2bond and atropisomers, if the first side is not resolvable, the second side is never visited. If the constitutional rules don't resolve a side, we need to label the auxiliary centers. It's important to label all auxiliary centers that _will_ be visited, so we need to know what centers will be visited. This commit updates the label() call in SP2 and Atropisomer bonds to always attempt to label both sides if using the constitutional rule set. The constitutional rules are cheap, and if they fail, we always go on to the full rule set. It is not a savings to skip the search on the second side if we're going to keep going anyway! Includes a test that reproduces Ricardo's example. This has no measurable effect on performance relative to the original solution * If any parts of the center have been seen, label it. I couldn't make an example hit this, but Ric is totally theoretically right * Greg's ranges suggestion #2 Co-authored-by: Greg Landrum <greg.landrum@gmail.com> * any_of for container search Co-authored-by: Greg Landrum <greg.landrum@gmail.com> --------- Co-authored-by: Greg Landrum <greg.landrum@gmail.com>
180 lines
5.3 KiB
C++
180 lines
5.3 KiB
C++
//
|
|
//
|
|
// Copyright (C) 2020 Schrödinger, LLC
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include <GraphMol/Chirality.h>
|
|
#include <RDGeneral/types.h>
|
|
|
|
#include "Sp2Bond.h"
|
|
#include "../Sort.h"
|
|
#include "../rules/Rules.h"
|
|
|
|
namespace RDKit {
|
|
namespace CIPLabeler {
|
|
|
|
Sp2Bond::Sp2Bond(const CIPMol &mol, Bond *bond, Atom *startAtom, Atom *endAtom,
|
|
Bond::BondStereo cfg)
|
|
: Configuration(mol, {startAtom, endAtom}), dp_bond{bond}, d_cfg{cfg} {
|
|
CHECK_INVARIANT(startAtom && endAtom, "bad foci")
|
|
CHECK_INVARIANT(d_cfg == Bond::STEREOTRANS || d_cfg == Bond::STEREOCIS,
|
|
"bad config")
|
|
|
|
auto stereo_atoms = Chirality::findStereoAtoms(bond);
|
|
CHECK_INVARIANT(stereo_atoms.size() == 2, "incorrect number of stereo atoms")
|
|
|
|
std::vector<Atom *> anchors{
|
|
{mol.getAtom(stereo_atoms[0]), mol.getAtom(stereo_atoms[1])}};
|
|
|
|
setCarriers(std::move(anchors));
|
|
}
|
|
|
|
void Sp2Bond::setPrimaryLabel(Descriptor desc) {
|
|
switch (desc) {
|
|
case Descriptor::seqTrans:
|
|
case Descriptor::E:
|
|
case Descriptor::seqCis:
|
|
case Descriptor::Z: {
|
|
auto carriers = getCarriers();
|
|
dp_bond->setStereoAtoms(carriers[0]->getIdx(), carriers[1]->getIdx());
|
|
dp_bond->setStereo(d_cfg);
|
|
dp_bond->setProp(common_properties::_CIPCode, to_string(desc));
|
|
dp_bond->setProp(common_properties::_CIPNeighborOrder, d_ranked_anchors,
|
|
true);
|
|
return;
|
|
}
|
|
case Descriptor::R:
|
|
case Descriptor::S:
|
|
case Descriptor::r:
|
|
case Descriptor::s:
|
|
case Descriptor::M:
|
|
case Descriptor::P:
|
|
case Descriptor::m:
|
|
case Descriptor::p:
|
|
case Descriptor::SP_4:
|
|
case Descriptor::TBPY_5:
|
|
case Descriptor::OC_6:
|
|
throw std::runtime_error(
|
|
"Received a Descriptor that is not supported for double bonds");
|
|
default:
|
|
throw std::runtime_error("Received an invalid Bond Descriptor");
|
|
}
|
|
}
|
|
|
|
bool Sp2Bond::hasPrimaryLabel() const {
|
|
return dp_bond->hasProp(common_properties::_CIPCode);
|
|
}
|
|
|
|
void Sp2Bond::resetPrimaryLabel() const {
|
|
dp_bond->clearProp(common_properties::_CIPCode);
|
|
}
|
|
|
|
Descriptor Sp2Bond::label(const Rules &comp) {
|
|
auto &digraph = getDigraph();
|
|
auto root1 = digraph.getOriginalRoot();
|
|
if (digraph.getCurrentRoot() != root1) {
|
|
digraph.changeRoot(root1);
|
|
}
|
|
|
|
return label(root1, digraph, comp);
|
|
}
|
|
|
|
Descriptor Sp2Bond::label(Node *root1, Digraph &digraph, const Rules &comp) {
|
|
const auto &focus1 = getFoci()[0];
|
|
const auto &focus2 = getFoci()[1];
|
|
|
|
const bool is_constitutional = comp.getNumSubRules() == 3;
|
|
|
|
d_ranked_anchors.clear();
|
|
|
|
const auto &internal = findInternalEdge(root1->getEdges(), focus1, focus2);
|
|
if (internal == nullptr) {
|
|
return Descriptor::UNKNOWN;
|
|
}
|
|
const auto &root2 = internal->getOther(root1);
|
|
|
|
auto edges1 = root1->getEdges();
|
|
auto edges2 = root2->getEdges();
|
|
removeInternalEdges(edges1, focus1, focus2);
|
|
removeInternalEdges(edges2, focus1, focus2);
|
|
|
|
auto carriers = std::vector<Atom *>(getCarriers());
|
|
auto config = d_cfg;
|
|
|
|
if (root1->getAtom() == focus2) {
|
|
std::swap(carriers[0], carriers[1]);
|
|
}
|
|
|
|
digraph.changeRoot(root1);
|
|
const auto &priority1 = comp.sort(root1, edges1);
|
|
if (!priority1.isUnique() && !is_constitutional) {
|
|
return Descriptor::UNKNOWN;
|
|
}
|
|
// swap
|
|
if (edges1.size() > 1 && carriers[0] != edges1[0]->getEnd()->getAtom()) {
|
|
if (config == Bond::STEREOCIS) {
|
|
config = Bond::STEREOTRANS;
|
|
} else {
|
|
config = Bond::STEREOCIS;
|
|
}
|
|
}
|
|
digraph.changeRoot(root2);
|
|
const auto &priority2 = comp.sort(root2, edges2);
|
|
if (!priority2.isUnique() || !priority1.isUnique()) {
|
|
return Descriptor::UNKNOWN;
|
|
}
|
|
// swap
|
|
if (edges2.size() > 1 && carriers[1] != edges2[0]->getEnd()->getAtom()) {
|
|
if (config == Bond::STEREOCIS) {
|
|
config = Bond::STEREOTRANS;
|
|
} else {
|
|
config = Bond::STEREOCIS;
|
|
}
|
|
}
|
|
|
|
{
|
|
// At this point, edges1 and edges2 are sorted by priority starting from
|
|
// this node. Record that now! - they may be resorted after processing
|
|
// other nodes.
|
|
|
|
// As weird as it seems, these may actually be implicit Hs: Rule 2
|
|
// in the paper on which this code is based states that,
|
|
// in CIP ranks, H > 1H, so implicit H actually has a higher
|
|
// priority than 1H (!!!). getAtomIdx() returns Atom::NOATOM
|
|
// if that is the case.
|
|
auto carrier1_idx = edges1[0]->getEnd()->getAtomIdx();
|
|
auto carrier2_idx = edges2[0]->getEnd()->getAtomIdx();
|
|
|
|
// Make sure the stereo atoms are in the right order
|
|
if (edges1[0]->getBeg()->getAtom() == focus1) {
|
|
d_ranked_anchors.assign({carrier1_idx, carrier2_idx});
|
|
} else if (edges2[0]->getBeg()->getAtom() == focus1) {
|
|
d_ranked_anchors.assign({carrier2_idx, carrier1_idx});
|
|
}
|
|
}
|
|
|
|
if (config == Bond::STEREOCIS) {
|
|
if (priority1.isPseudoAsymetric() != priority2.isPseudoAsymetric()) {
|
|
return Descriptor::seqCis;
|
|
} else {
|
|
return Descriptor::Z;
|
|
}
|
|
} else if (config == Bond::STEREOTRANS) {
|
|
if (priority1.isPseudoAsymetric() != priority2.isPseudoAsymetric()) {
|
|
return Descriptor::seqTrans;
|
|
} else {
|
|
return Descriptor::E;
|
|
}
|
|
}
|
|
|
|
return Descriptor::UNKNOWN;
|
|
}
|
|
|
|
} // namespace CIPLabeler
|
|
} // namespace RDKit
|