Fix for issue #8965 (#8968)

* add a test

* change stereo bond canonicalization

* update canonicalization watch test with fixed cases

* make canonicalization test stricter (compare CIP codes)

* add reverse symmetry condition

* rewrite double bond canonicalization code

* update tests

* fix multiline comment

* update java tests

* update python test

* nix switchBondDir (unused)

* fix and rename flipBondDir

* refactor comment

* fix shadowed var name, casting

* fix neighbor sorting

* make seen_bonds a vector

* abstract setDirectionFromNeighboringBond

* handle both sides of the bond have directions

* move getNeighboringStereoBond

* check seen_bonds after popping connectedBondsQ

* use references for arguments

* add release note

* add example required by Dan

* add example requested by Dan
This commit is contained in:
Ricardo Rodriguez
2026-02-26 02:58:36 -05:00
committed by GitHub
parent 872b054d5c
commit d5aa90e18f
11 changed files with 474 additions and 349 deletions

View File

@@ -17,10 +17,82 @@
#include <RDGeneral/Exceptions.h>
#include <RDGeneral/hash/hash.hpp>
#include <RDGeneral/utils.h>
#include <ranges>
#include <queue>
#include <algorithm>
namespace RDKit {
namespace Canon {
namespace {
static constexpr Bond::BondDir flipStereoBondDir(Bond::BondDir bondDir) {
switch (bondDir) {
case Bond::ENDUPRIGHT:
return Bond::ENDDOWNRIGHT;
case Bond::ENDDOWNRIGHT:
return Bond::ENDUPRIGHT;
default:
return bondDir;
}
}
void setDirectionFromNeighboringBond(Bond &sourceBond, bool isSourceBondFlipped,
Bond &targetBond,
bool isTargetBondFlipped) {
auto dir = sourceBond.getBondDir();
// By default, both bonds on the same side of the double bond
// should have opposite directions, but this can change if
// one (and only one) of the bonds is flipped (if both were
// flipped, the flips in the direction would cancel out).
if (isSourceBondFlipped == isTargetBondFlipped) {
dir = flipStereoBondDir(dir);
}
targetBond.setBondDir(dir);
}
Bond::BondDir getReferenceDirection(const Bond &dblBond, const Atom &refAtom,
const Atom &targetAtom,
const Bond &refControllingBond,
bool refIsFlipped, const Bond &targetBond,
bool targetIsFlipped) {
Bond::BondDir dir = Bond::NONE;
if (dblBond.getStereo() == Bond::STEREOE ||
dblBond.getStereo() == Bond::STEREOTRANS) {
dir = refControllingBond.getBondDir();
} else if (dblBond.getStereo() == Bond::STEREOZ ||
dblBond.getStereo() == Bond::STEREOCIS) {
dir = flipStereoBondDir(refControllingBond.getBondDir());
}
CHECK_INVARIANT(dir != Bond::NONE, "stereo not set");
// If we're not looking at the bonds used to determine the
// stereochemistry, we need to flip the setting on the other bond:
const INT_VECT &stereoAtoms = dblBond.getStereoAtoms();
if (refAtom.getDegree() == 3 &&
std::ranges::find(stereoAtoms,
static_cast<int>(refControllingBond.getOtherAtomIdx(
refAtom.getIdx()))) == stereoAtoms.end()) {
dir = flipStereoBondDir(dir);
}
if (targetAtom.getDegree() == 3 &&
std::ranges::find(stereoAtoms,
static_cast<int>(targetBond.getOtherAtomIdx(
targetAtom.getIdx()))) == stereoAtoms.end()) {
dir = flipStereoBondDir(dir);
}
// XOR: flips will cancel out if we do both
if (refIsFlipped != targetIsFlipped) {
dir = flipStereoBondDir(dir);
}
return dir;
}
} // namespace
namespace details {
bool isUnsaturated(const Atom *atom, const ROMol &mol) {
for (const auto &bndItr :
@@ -86,84 +158,14 @@ auto _possibleCompare = [](const PossibleType &arg1, const PossibleType &arg2) {
return (std::get<0>(arg1) < std::get<0>(arg2));
};
void switchBondDir(Bond *bond) {
PRECONDITION(bond, "bad bond");
PRECONDITION(bond->getBondType() == Bond::SINGLE || bond->getIsAromatic() ||
isDative(*bond),
"bad bond type");
switch (bond->getBondDir()) {
case Bond::ENDUPRIGHT:
bond->setBondDir(Bond::ENDDOWNRIGHT);
break;
case Bond::ENDDOWNRIGHT:
bond->setBondDir(Bond::ENDUPRIGHT);
break;
default:
break;
}
}
namespace {
bool isClosingRingBond(Bond *bond) {
if (bond == nullptr) {
return false;
}
auto beginIdx = bond->getBeginAtomIdx();
auto endIdx = bond->getEndAtomIdx();
return beginIdx > endIdx && beginIdx - endIdx > 1 &&
bond->hasProp(common_properties::_TraversalRingClosureBond);
}
Bond::BondDir flipBondDir(Bond::BondDir bondDir) {
return (bondDir == Bond::ENDUPRIGHT) ? Bond::ENDDOWNRIGHT : Bond::ENDUPRIGHT;
}
std::pair<Bond::BondDir, bool> getReferenceDirection(
const Bond *dblBond, const Atom *refAtom, const Atom *targetAtom,
const Bond *refControllingBond, const Bond *targetBond) {
Bond::BondDir dir = Bond::NONE;
if (dblBond->getStereo() == Bond::STEREOE ||
dblBond->getStereo() == Bond::STEREOTRANS) {
dir = refControllingBond->getBondDir();
} else if (dblBond->getStereo() == Bond::STEREOZ ||
dblBond->getStereo() == Bond::STEREOCIS) {
dir = flipBondDir(refControllingBond->getBondDir());
}
CHECK_INVARIANT(dir != Bond::NONE, "stereo not set");
// If we're not looking at the bonds used to determine the
// stereochemistry, we need to flip the setting on the other bond:
const INT_VECT &stereoAtoms = dblBond->getStereoAtoms();
auto isFlipped = false;
if (refAtom->getDegree() == 3 &&
std::ranges::find(stereoAtoms,
static_cast<int>(refControllingBond->getOtherAtomIdx(
refAtom->getIdx()))) == stereoAtoms.end()) {
isFlipped = true;
dir = flipBondDir(dir);
}
if (targetAtom->getDegree() == 3 &&
std::ranges::find(stereoAtoms,
static_cast<int>(targetBond->getOtherAtomIdx(
targetAtom->getIdx()))) == stereoAtoms.end()) {
isFlipped = true;
dir = flipBondDir(dir);
}
return std::make_pair(dir, isFlipped);
}
} // namespace
// FIX: this may only be of interest from the SmilesWriter, should we
// move it there?
//
//
void canonicalizeDoubleBond(Bond *dblBond, const UINT_VECT &bondVisitOrders,
const UINT_VECT &atomVisitOrders,
UINT_VECT &bondDirCounts, UINT_VECT &atomDirCounts,
const MolStack &molStack) {
UINT_VECT &bondDirCounts,
UINT_VECT &atomDirCounts) {
PRECONDITION(dblBond, "bad bond");
PRECONDITION(dblBond->getBondType() == Bond::DOUBLE, "bad bond order");
PRECONDITION(dblBond->getStereo() > Bond::STEREOANY, "bad bond stereo");
@@ -233,6 +235,110 @@ void canonicalizeDoubleBond(Bond *dblBond, const UINT_VECT &bondVisitOrders,
return;
}
// We interpret double bonds like this (this is a TRANS bond,
// a CIS one would be similar, but both anchors would be either
// above or below the double bond):
//
// anchor1
// |
// atom1 === atom2
// |
// anchor2
//
// When parsing a SMILES, we expect anchor1 to come before atom1,
// and atom2 before anchor2. But that is not always the case.
// Inverting the order has effects on the bond directions, so
// we define the variables below to help us find the right direction
// for the bond directions. These are interpreted as follows:
//
// - firstFromAtom1 and secondFromAtom1 are considered "flipped" if
// the anchor would come after the double bond start atom1, e.g.
// in the SMILES C(\N)(/O)=C(/N)\O they are both flipped. Note
// that, with this definition secondFromAtom1 is usually "flipped".
//
bool isFirstFromAtom1Flipped = [&]() {
auto anchorIdx = firstFromAtom1->getOtherAtom(atom1)->getIdx();
return (atomVisitOrders[atom1->getIdx()] < atomVisitOrders[anchorIdx]) !=
firstFromAtom1->hasProp(
common_properties::_TraversalRingClosureBond);
}();
bool isSecondFromAtom1Flipped = [&]() {
if (secondFromAtom1 == nullptr) {
return false;
}
auto anchorIdx = secondFromAtom1->getOtherAtom(atom1)->getIdx();
return (atomVisitOrders[atom1->getIdx()] < atomVisitOrders[anchorIdx]) !=
secondFromAtom1->hasProp(
common_properties::_TraversalRingClosureBond);
}();
// - firstFromAtom2 and secondFromAtom2 are considered "flipped" if
// the anchor atom comes before the double bond end atom2 (I think
// this always requires rings to be involved). An example of firstFromAtom2
// being flipped would be the C atom in "C\N=2" in C=c1s/c2n(c1=O)CCCCC\N=2
bool isFirstFromAtom2Flipped = [&]() {
auto anchorIdx = firstFromAtom2->getOtherAtom(atom2)->getIdx();
return (atomVisitOrders[anchorIdx] < atomVisitOrders[atom2->getIdx()]) !=
firstFromAtom2->hasProp(
common_properties::_TraversalRingClosureBond);
}();
bool isSecondFromAtom2Flipped = [&]() {
if (secondFromAtom2 == nullptr) {
return false;
}
auto anchorIdx = secondFromAtom2->getOtherAtom(atom2)->getIdx();
return (atomVisitOrders[anchorIdx] < atomVisitOrders[atom2->getIdx()]) !=
secondFromAtom2->hasProp(
common_properties::_TraversalRingClosureBond);
}();
// Both directions are already set. Update accounting
// and check if both directions on each side are set.
// We hit this in cases with cycles like CO/C1=C/C=C\C=C/C=N\1.
if (dir1Set && dir2Set) {
// these are guaranteed to exist
bondDirCounts[firstFromAtom1->getIdx()] += 1;
bondDirCounts[firstFromAtom2->getIdx()] += 1;
atomDirCounts[atom1->getIdx()] += 1;
atomDirCounts[atom2->getIdx()] += 1;
// To do: check that the existing directions are consistent.
if (secondFromAtom1) {
if (!bondDirCounts[firstFromAtom1->getIdx()]) {
setDirectionFromNeighboringBond(
*secondFromAtom1, isSecondFromAtom1Flipped, *firstFromAtom1,
isFirstFromAtom1Flipped);
} else if (!bondDirCounts[secondFromAtom1->getIdx()]) {
setDirectionFromNeighboringBond(
*firstFromAtom1, isFirstFromAtom1Flipped, *secondFromAtom1,
isSecondFromAtom1Flipped);
}
bondDirCounts[secondFromAtom1->getIdx()] += 1;
atomDirCounts[atom1->getIdx()] += 1;
}
if (secondFromAtom2) {
if (!bondDirCounts[firstFromAtom2->getIdx()]) {
setDirectionFromNeighboringBond(
*secondFromAtom2, isSecondFromAtom2Flipped, *firstFromAtom2,
isFirstFromAtom2Flipped);
} else if (!bondDirCounts[secondFromAtom2->getIdx()]) {
setDirectionFromNeighboringBond(
*firstFromAtom2, isFirstFromAtom2Flipped, *secondFromAtom2,
isSecondFromAtom2Flipped);
}
bondDirCounts[secondFromAtom2->getIdx()] += 1;
atomDirCounts[atom2->getIdx()] += 1;
}
return;
}
bool setFromBond1 = true;
Bond *atom1ControllingBond = firstFromAtom1;
Bond *atom2ControllingBond = firstFromAtom2;
@@ -270,22 +376,10 @@ void canonicalizeDoubleBond(Bond *dblBond, const UINT_VECT &bondVisitOrders,
CHECK_INVARIANT(secondFromAtom1, "inconsistent state");
CHECK_INVARIANT(bondDirCounts[secondFromAtom1->getIdx()] > 0,
"inconsistent state");
// It must be the second bond setting the direction.
// This happens when the bond dir is set in a branch:
// v- this double bond
// CC(/C=P/N)=N/O
// ^- the second bond sets the direction
// or when the first bond is a ring closure from an
// earlier traversed atom:
// v- this double bond
// NC1=NOC/C1=N\O
// ^- this closure ends up being the first bond,
// and it does not set the direction.
//
// This addresses parts of Issue 185 and sf.net Issue 1842174
//
auto atom1Dir = secondFromAtom1->getBondDir();
firstFromAtom1->setBondDir(atom1Dir);
setDirectionFromNeighboringBond(*secondFromAtom1,
isSecondFromAtom1Flipped, *firstFromAtom1,
isFirstFromAtom1Flipped);
// acknowledge that secondFromAtom1 is relevant for this bond,
// and prevent removeRedundantBondDirSpecs from removing this
@@ -310,22 +404,10 @@ void canonicalizeDoubleBond(Bond *dblBond, const UINT_VECT &bondVisitOrders,
CHECK_INVARIANT(secondFromAtom2, "inconsistent state");
CHECK_INVARIANT(bondDirCounts[secondFromAtom2->getIdx()] > 0,
"inconsistent state");
// It must be the second bond setting the direction.
// This happens when the bond dir is set in a branch:
// v- this double bond
// CC(/C=P/N)=N/O
// ^- the second bond sets the direction
// or when the first bond is a ring closure from an
// earlier traversed atom:
// v- this double bond
// NC1=NOC/C1=N\O
// ^- this closure ends up being the first bond,
// and it does not set the direction.
//
// This addresses parts of Issue 185 and sf.net Issue 1842174
//
auto atom2Dir = secondFromAtom2->getBondDir();
firstFromAtom2->setBondDir(atom2Dir);
setDirectionFromNeighboringBond(*secondFromAtom2,
isSecondFromAtom2Flipped, *firstFromAtom2,
isFirstFromAtom2Flipped);
// acknowledge that secondFromAtom2 is relevant for this bond,
// and prevent removeRedundantBondDirSpecs from removing this
@@ -336,25 +418,29 @@ void canonicalizeDoubleBond(Bond *dblBond, const UINT_VECT &bondVisitOrders,
atomDirCounts[atom2->getIdx()] += 2;
atom2ControllingBond = secondFromAtom2;
}
// CHECK_INVARIANT(0,"ring stereochemistry not handled");
} // end of the ring stereochemistry if
// now set the directionality on the other side:
if (setFromBond1) {
auto [atom2Dir, isFlipped] = getReferenceDirection(
dblBond, atom1, atom2, atom1ControllingBond, firstFromAtom2);
auto isControllingAtomFlipped =
(atom1ControllingBond == firstFromAtom1 ? isFirstFromAtom1Flipped
: isSecondFromAtom1Flipped);
if (!isFlipped && isClosingRingBond(dblBond)) {
atom2Dir = flipBondDir(atom2Dir);
}
auto atom2Dir = getReferenceDirection(
*dblBond, *atom1, *atom2, *atom1ControllingBond,
isControllingAtomFlipped, *firstFromAtom2, isFirstFromAtom2Flipped);
firstFromAtom2->setBondDir(atom2Dir);
bondDirCounts[firstFromAtom2->getIdx()] += 1;
atomDirCounts[atom2->getIdx()] += 1;
} else {
auto [atom1Dir, isFlipped] = getReferenceDirection(
dblBond, atom2, atom1, atom2ControllingBond, firstFromAtom1);
auto isControllingAtomFlipped =
(atom2ControllingBond == firstFromAtom2 ? isFirstFromAtom2Flipped
: isSecondFromAtom2Flipped);
auto atom1Dir = getReferenceDirection(
*dblBond, *atom2, *atom1, *atom2ControllingBond,
isControllingAtomFlipped, *firstFromAtom1, isFirstFromAtom1Flipped);
firstFromAtom1->setBondDir(atom1Dir);
@@ -369,49 +455,9 @@ void canonicalizeDoubleBond(Bond *dblBond, const UINT_VECT &bondVisitOrders,
///
if (atom1->getDegree() == 3 && secondFromAtom1) {
if (!bondDirCounts[secondFromAtom1->getIdx()]) {
// This bond (the second bond from the starting atom of the double bond)
// is a special case. It's going to appear in a branch in the smiles:
// X\C(\Y)=C/Z
// ^
// |- here
// so it actually needs to go down with the *same* direction as the
// bond that's already been set (because "pulling the bond out of the
// branch" reverses its direction).
// A quick example. This SMILES:
// F/C(\Cl)=C/F
// is *wrong*. This is the correct form:
// F/C(/Cl)=C/F
// So, since we want this bond to have the opposite direction to the
// other one, we put it in with the same direction.
// This was Issue 183
// UNLESS the bond is not in a branch (in the smiles) (e.g. firstFromAtom1
// branches off a cycle, and secondFromAtom1 shows up at the end of the
// cycle). This was Github Issue #2023, see it for an example.
auto checkBondsInSameBranch = [&molStack, &bondVisitOrders](
Bond *dblBond, Bond *dirBond) {
auto start = bondVisitOrders[dblBond->getIdx()];
auto end = bondVisitOrders[dirBond->getIdx()];
if (start > end) {
std::swap(start, end);
}
unsigned int branchLevel = 0;
for (auto i = start + 1; i != end; ++i) {
const auto &item = molStack[i];
if (item.type == MOL_STACK_BRANCH_OPEN) {
++branchLevel;
} else if (item.type == MOL_STACK_BRANCH_CLOSE) {
--branchLevel;
}
}
return branchLevel == 0;
};
if (checkBondsInSameBranch(dblBond, secondFromAtom1)) {
auto otherDir = flipBondDir(firstFromAtom1->getBondDir());
secondFromAtom1->setBondDir(otherDir);
} else {
secondFromAtom1->setBondDir(firstFromAtom1->getBondDir());
}
setDirectionFromNeighboringBond(*firstFromAtom1, isFirstFromAtom1Flipped,
*secondFromAtom1,
isSecondFromAtom1Flipped);
}
bondDirCounts[secondFromAtom1->getIdx()] += 1;
atomDirCounts[atom1->getIdx()] += 1;
@@ -419,96 +465,136 @@ void canonicalizeDoubleBond(Bond *dblBond, const UINT_VECT &bondVisitOrders,
if (atom2->getDegree() == 3 && secondFromAtom2) {
if (!bondDirCounts[secondFromAtom2->getIdx()]) {
// Here we set the bond direction to be opposite the other one (since
// both come after the atom connected to the double bond).
Bond::BondDir otherDir;
if (!secondFromAtom2->hasProp(
common_properties::_TraversalRingClosureBond)) {
otherDir = flipBondDir(firstFromAtom2->getBondDir());
} else {
// another one those irritating little reversal things due to
// ring closures
otherDir = firstFromAtom2->getBondDir();
}
secondFromAtom2->setBondDir(otherDir);
setDirectionFromNeighboringBond(*firstFromAtom2, isFirstFromAtom2Flipped,
*secondFromAtom2,
isSecondFromAtom2Flipped);
}
bondDirCounts[secondFromAtom2->getIdx()] += 1;
atomDirCounts[atom2->getIdx()] += 1;
// std::cerr<<" other: "<<secondFromAtom2->getIdx()<<"
// "<<otherDir<<std::endl;
}
}
if (setFromBond1) {
// This is an odd case... The bonds off the beginning atom are
// after the start atom in the traversal stack. These need to
// have their directions reversed. An example SMILES (unlikely
// to actually traverse this way is:
// C(=C/O)/F or C(/F)=C/O
// That bond is Z, without the reversal, this would come out:
// C(=C/O)\F or C(\F)=C/O
// which is E.
//
// In the case of three-coordinate atoms, we don't need to flip
// the second bond because the Issue 183 fix (above) already got
// that one.
//
// This was Issue 191 and continued into sf.net issue 1842174
if (bondVisitOrders[atom1ControllingBond->getIdx()] >
atomVisitOrders[atom1->getIdx()]) {
if (bondDirCounts[atom1ControllingBond->getIdx()] == 1) {
if (!atom1ControllingBond->hasProp(
common_properties::_TraversalRingClosureBond)) {
// std::cerr<<" switcheroo 1"<<std::endl;
switchBondDir(atom1ControllingBond);
void canonicalizeDoubleBonds(ROMol &mol, const UINT_VECT &bondVisitOrders,
const UINT_VECT &atomVisitOrders,
UINT_VECT &bondDirCounts, UINT_VECT &atomDirCounts,
const MolStack &molStack) {
// start by removing the current directions on single bonds
// around double bonds. At the same time, we build a prioritized
// queue to decide the order in which we will canonicalize bonds.
// We want to start with bonds with the most neighboring stereo
// bonds, and in case of ties, start with the bond that has
// the lowest position in the molStack
auto getNeighboringStereoBond = [&mol](const Atom *dblBndAtom,
const Bond *nbrBnd) -> Bond * {
auto otherAtom = nbrBnd->getOtherAtom(dblBndAtom);
for (const auto bond : mol.atomBonds(otherAtom)) {
if (bond != nbrBnd && bond->getBondType() == Bond::DOUBLE &&
bond->getStereo() > Bond::STEREOANY) {
return bond;
}
}
return nullptr;
};
std::greater<const unsigned int &> molStackComparer;
std::less<const unsigned int &> numStereoNbrsComparer;
std::unordered_map<const Bond *, std::vector<Bond *>> stereoBondNbrs;
auto compareBondPriority = [&stereoBondNbrs, &bondVisitOrders,
&molStackComparer, &numStereoNbrsComparer](
const Bond *aBnd, const Bond *bBnd) {
const auto aNumStereoNbrs = stereoBondNbrs[aBnd].size();
const auto bNumStereoNbrs = stereoBondNbrs[bBnd].size();
if (aNumStereoNbrs == bNumStereoNbrs) {
return molStackComparer(bondVisitOrders[aBnd->getIdx()],
bondVisitOrders[bBnd->getIdx()]);
}
return numStereoNbrsComparer(aNumStereoNbrs, bNumStereoNbrs);
};
std::priority_queue<Bond *, std::vector<Bond *>,
decltype(compareBondPriority)>
q{compareBondPriority};
for (auto &msI : molStack) {
if (msI.type != MOL_STACK_BOND) {
// not a bond, skip it
continue;
}
auto bond = msI.obj.bond;
Bond::BondDir dir = bond->getBondDir();
if (dir == Bond::ENDDOWNRIGHT || dir == Bond::ENDUPRIGHT) {
bond->setBondDir(Bond::NONE);
}
if (bond->getBondType() != Bond::DOUBLE ||
bond->getStereo() <= Bond::STEREOANY ||
bond->getStereoAtoms().size() < 2) {
// not a bond that can have stereo or that needs canonicalization
bond->setStereo(Bond::STEREONONE);
continue;
}
auto &currentNbrs = stereoBondNbrs[bond];
for (const auto *dblBondAtom : {bond->getBeginAtom(), bond->getEndAtom()}) {
for (const auto *nbrBond : mol.atomBonds(dblBondAtom)) {
if (!canHaveDirection(*nbrBond)) {
continue;
}
} else if (bondDirCounts[firstFromAtom2->getIdx()] == 1) {
// the controlling bond at atom1 is being set by someone else, flip the
// direction
// on the atom2 bond instead:
// std::cerr<<" switcheroo 2"<<std::endl;
switchBondDir(firstFromAtom2);
if (secondFromAtom2 && bondDirCounts[secondFromAtom2->getIdx()] >= 1) {
switchBondDir(secondFromAtom2);
auto nbrDblBnd = getNeighboringStereoBond(dblBondAtom, nbrBond);
if (nbrDblBnd != nullptr) {
currentNbrs.push_back(nbrDblBnd);
}
}
}
std::ranges::sort(currentNbrs, [&molStackComparer, &bondVisitOrders](
const Bond *aBnd, const Bond *bBnd) {
// Reversing the bonds is intentional: molStackComparer
// is a std::greater comparer (priority queue returns
// the highest element), but here we want to sort in
// increasing order, so we want a std::less comparer,
// which can be achieved by reversing the std::greater
// because we can have no ties here
return molStackComparer(bondVisitOrders[bBnd->getIdx()],
bondVisitOrders[aBnd->getIdx()]);
});
q.emplace(bond);
}
// something to watch out for here. For this molecule and traversal order:
// 0 1 2 3 4 5 6 7 8 <- atom numbers
// C/C=C/C(/N=C/C)=C/C
// ^ ^
// |--|-- these two bonds must match in direction or the SMILES
// is inconsistent (according to Daylight, Marvin does ok with
// it)
// That means that the direction of the bond from atom 3->4 needs to be set
// when the bond from 2->3 is set.
// Issue2023: But only if 3->4 doesn't have a direction yet?
//
// I believe we only need to worry about this for the bonds from atom2.
const Atom *atom3 = firstFromAtom2->getOtherAtom(atom2);
if (atom3->getDegree() == 3) {
Bond *otherAtom3Bond = nullptr;
bool dblBondPresent = false;
for (auto tbond : mol.atomBonds(atom3)) {
if (tbond->getBondType() == Bond::DOUBLE &&
tbond->getStereo() > Bond::STEREOANY) {
dblBondPresent = true;
} else if (tbond->getBondType() == Bond::SINGLE &&
tbond != firstFromAtom2) {
otherAtom3Bond = tbond;
}
// Now that we have bonds in the order we want to handle them,
// do the canonicalization
std::vector<bool> seen_bonds(mol.getNumBonds());
while (!q.empty()) {
const auto bond = q.top();
q.pop();
if (seen_bonds[bond->getIdx()]) {
continue;
}
if (dblBondPresent && otherAtom3Bond &&
otherAtom3Bond->getBondDir() == Bond::NONE) {
// std::cerr<<"set!"<<std::endl;
auto dir = firstFromAtom2->getBondDir();
if (isClosingRingBond(otherAtom3Bond)) {
dir = flipBondDir(dir);
std::queue<Bond *> connectedBondsQ;
connectedBondsQ.push(bond);
while (!connectedBondsQ.empty()) {
const auto currentBond = connectedBondsQ.front();
connectedBondsQ.pop();
if (seen_bonds[currentBond->getIdx()]) {
continue;
}
Canon::canonicalizeDoubleBond(currentBond, bondVisitOrders,
atomVisitOrders, bondDirCounts,
atomDirCounts);
seen_bonds[currentBond->getIdx()] = true;
for (auto nbrStereoBnd : stereoBondNbrs[currentBond]) {
if (!seen_bonds[nbrStereoBnd->getIdx()]) {
connectedBondsQ.push(nbrStereoBnd);
}
}
otherAtom3Bond->setBondDir(dir);
bondDirCounts[otherAtom3Bond->getIdx()] += 1;
atomDirCounts[atom3->getIdx()] += 1;
}
}
}
@@ -883,10 +969,8 @@ void clearBondDirs(ROMol &mol, Bond *refBond, const Atom *fromAtom,
refBond->setBondDir(Bond::NONE);
atomDirCounts[refBond->getBeginAtomIdx()] -= 1;
atomDirCounts[refBond->getEndAtomIdx()] -= 1;
// std::cerr<<"rb:"<<refBond->getIdx()<<" ";
}
}
// std::cerr<<std::endl;
}
void removeRedundantBondDirSpecs(ROMol &mol, MolStack &molStack,
@@ -1104,22 +1188,12 @@ void canonicalizeFragment(ROMol &mol, int atomIdx,
++pos;
}
// traverse the stack and canonicalize double bonds and atoms with (ring)
// stereochemistry
for (auto &msI : molStack) {
if (msI.type == MOL_STACK_BOND &&
msI.obj.bond->getBondType() == Bond::DOUBLE &&
msI.obj.bond->getStereo() > Bond::STEREOANY) {
if (msI.obj.bond->getStereoAtoms().size() >= 2) {
Canon::canonicalizeDoubleBond(msI.obj.bond, bondVisitOrders,
atomVisitOrders, bondDirCounts,
atomDirCounts, molStack);
} else {
// bad stereo spec:
msI.obj.bond->setStereo(Bond::STEREONONE);
}
}
if (doIsomericSmiles) {
canonicalizeDoubleBonds(mol, bondVisitOrders, atomVisitOrders, bondDirCounts,
atomDirCounts, molStack);
// traverse the stack and canonicalize atoms with (ring) stereochemistry
if (doIsomericSmiles) {
for (auto &msI : molStack) {
if (msI.type == MOL_STACK_ATOM &&
msI.obj.atom->getChiralTag() != Atom::CHI_UNSPECIFIED &&
!msI.obj.atom->hasProp(common_properties::_brokenChirality)) {

View File

@@ -730,7 +730,7 @@ void testGithub8569() {
});
std::vector<std::pair<std::string, std::string>> expected = {
{"null", "C/C=C/C=C/[*:1].C[*:1]"},
{"C(=C/[*:2])\\[*:1]", "C/C=C/[*:1].C[*:2]"},
{"C(=C\\[*:2])/[*:1]", "C/C=C/[*:1].C[*:2]"},
{"C(/C=C/[*:2])=C\\[*:1]", "C[*:1].C[*:2]"},
{"null", "C/C=C/[*:1].C/C=C/[*:1]"},
};

View File

@@ -1636,6 +1636,7 @@ TEST_CASE("Github #4582: double bonds and ring closures") {
const auto useLegacy = GENERATE(true, false);
CAPTURE(useLegacy);
UseLegacyStereoPerceptionFixture fxn(useLegacy);
auto mol = R"CTAB(CHEMBL409450
RDKit 2D
@@ -1708,11 +1709,6 @@ M END)CTAB"_ctab;
SmilesWriteParams ps;
ps.doRandom = true;
for (auto i = 0u; i < 100; ++i) {
if (i == 13 || i == 25 || i == 38 || i == 50) {
// we know these fail; we hope to address them
// together with issue #8965
continue;
}
INFO("i = " + std::to_string(i));
getRandomGenerator(i + 1)();
auto rsmiles = MolToSmiles(*mol, ps);
@@ -1769,7 +1765,7 @@ M END)CTAB"_ctab;
auto mol = R"SMI(C1=CC/C=C2C3=C/CC=CC=CC\3C\2C=C1)SMI"_smiles;
REQUIRE(mol);
auto smi = MolToSmiles(*mol);
CHECK(smi == R"SMI(C1=CC/C=C2C3=C\CC=CC=CC/3C\2C=C1)SMI");
CHECK(smi == R"SMI(C1=CC/C=C2\C3=C\CC=CC=CC3C2C=C1)SMI");
}
SECTION("CHEMBL3623347") {
auto mol = R"CTAB(CHEMBL3623347

View File

@@ -1342,7 +1342,7 @@ TEST_CASE("Testing Issue 185: Cis/Trans incorrect on writing branches") {
REQUIRE(mol->getBondWithIdx(1)->getBondType() == Bond::DOUBLE);
REQUIRE(mol->getBondWithIdx(1)->getStereo() == Bond::STEREOZ);
refSmi = MolToSmiles(*mol, 1, 0, 0);
REQUIRE(refSmi == "C(\\C)=N\\O");
CHECK(refSmi == "C(/C)=N/O");
delete mol;
// make sure we can round-trip:
mol = SmilesToMol(refSmi);
@@ -1367,11 +1367,11 @@ TEST_CASE("Testing Issue 185: Cis/Trans incorrect on writing branches") {
for (RWMol::BondIterator bondIt = mol->beginBonds();
bondIt != mol->endBonds(); bondIt++) {
if ((*bondIt)->getBondType() == Bond::DOUBLE) {
REQUIRE((*bondIt)->getStereo() == Bond::STEREOE);
CHECK((*bondIt)->getStereo() == Bond::STEREOE);
}
}
smi = MolToSmiles(*mol, 1);
REQUIRE(refSmi == smi);
CHECK(refSmi == smi);
// now repeat that experiment, but this time root the SMILES so that
// we go in a "sensible" order:
@@ -1380,47 +1380,45 @@ TEST_CASE("Testing Issue 185: Cis/Trans incorrect on writing branches") {
mol = SmilesToMol(smi);
REQUIRE(mol);
refSmi = MolToSmiles(*mol, true, false, 6);
REQUIRE(refSmi == "N/P=C/C(C)=N/O");
CHECK(refSmi == "N/P=C/C(C)=N/O");
delete mol;
mol = SmilesToMol(refSmi);
REQUIRE(mol);
for (RWMol::BondIterator bondIt = mol->beginBonds();
bondIt != mol->endBonds(); bondIt++) {
if ((*bondIt)->getBondType() == Bond::DOUBLE) {
REQUIRE((*bondIt)->getStereo() == Bond::STEREOE);
CHECK((*bondIt)->getStereo() == Bond::STEREOE);
}
}
delete mol;
}
TEST_CASE("Testing Issue 191: Bad bond directions in a branch") {
ROMol *mol;
std::string smi, refSmi;
int numE = 0;
smi = "C2=NNC(N=C2)=N\\N=C\\c1ccccc1";
mol = SmilesToMol(smi);
// Only 1 of the double bonds has stereo defined!
constexpr const char *smi = R"SMI(C2=NNC(N=C2)=N\N=C\c1ccccc1)SMI";
constexpr const char *refSmi = R"SMI(C(=N\N=c1nccn[nH]1)/c1ccccc1)SMI";
ROMol *mol = SmilesToMol(smi);
REQUIRE(mol);
REQUIRE(mol->getBondWithIdx(7)->getBondType() == Bond::DOUBLE);
REQUIRE(mol->getBondWithIdx(7)->getStereo() == Bond::STEREOE);
refSmi = MolToSmiles(*mol, 1);
auto tmpSmi = MolToSmiles(*mol, 1);
CHECK(tmpSmi == refSmi);
delete mol;
mol = SmilesToMol(refSmi);
mol = SmilesToMol(tmpSmi);
REQUIRE(mol);
numE = 0;
for (RWMol::BondIterator bondIt = mol->beginBonds();
bondIt != mol->endBonds(); bondIt++) {
if ((*bondIt)->getBondType() == Bond::DOUBLE) {
REQUIRE((*bondIt)->getStereo() != Bond::STEREOZ);
if ((*bondIt)->getStereo() == Bond::STEREOE) {
numE++;
int numE = 0;
for (auto bond : mol->bonds()) {
if (bond->getBondType() == Bond::DOUBLE) {
CHECK(bond->getStereo() != Bond::STEREOZ);
if (bond->getStereo() == Bond::STEREOE) {
++numE;
}
}
}
REQUIRE(numE == 1);
smi = MolToSmiles(*mol, 1);
// std::cout << "ref: " << refSmi << " -> " << smi << std::endl;
REQUIRE(refSmi == smi);
CHECK(numE == 1);
tmpSmi = MolToSmiles(*mol, 1);
CHECK(tmpSmi == refSmi);
delete mol;
}
@@ -1676,7 +1674,7 @@ TEST_CASE("Testing SF.net bug 1842174: bad bond dirs in branches") {
CHECK(smi == "F/C=N/Cl");
smi = MolToSmiles(*mol, true, false, 1);
CHECK(smi == "C(\\F)=N/Cl");
CHECK(smi == R"SMI(C(/F)=N\Cl)SMI");
delete mol;
smi = "C(\\C=C\\F)=C(/Cl)Br";
@@ -2070,7 +2068,7 @@ TEST_CASE("Issue 3139534: stereochemistry in larger rings") {
REQUIRE(m->getBondWithIdx(4)->getStereo() == Bond::STEREOZ);
smiles = MolToSmiles(*m, true);
REQUIRE(smiles == "C1=C\\COCCCCC/1");
CHECK(smiles == "C1=C\\COCCCCC/1");
delete m;
}
{
@@ -2081,7 +2079,7 @@ TEST_CASE("Issue 3139534: stereochemistry in larger rings") {
REQUIRE(m->getBondWithIdx(4)->getStereo() == Bond::STEREOE);
smiles = MolToSmiles(*m, true);
REQUIRE(smiles == "C1=C/COCCCCC/1");
CHECK(smiles == "C1=C/COCCCCC/1");
delete m;
}
@@ -2091,10 +2089,10 @@ TEST_CASE("Issue 3139534: stereochemistry in larger rings") {
m = SmilesToMol(smiles);
REQUIRE(m);
smiles = MolToSmiles(*m, true, false, -1, false);
REQUIRE(smiles == "C1CC/C=C/C=C/CCC1");
CHECK(smiles == "C1CC/C=C/C=C/CCC1");
smiles = MolToSmiles(*m, true);
REQUIRE(smiles == "C1=C/CCCCCC/C=C/1");
CHECK(smiles == "C1=C/CCCCCC/C=C/1");
delete m;
}
@@ -2105,7 +2103,7 @@ TEST_CASE("Issue 3139534: stereochemistry in larger rings") {
REQUIRE(m);
smiles = MolToSmiles(*m, true);
REQUIRE(smiles == "C1=C\\CCCCCC/C=C/1");
CHECK(smiles == "C1=C\\CCCCCC/C=C/1");
delete m;
}
@@ -2117,7 +2115,7 @@ TEST_CASE("Issue 3139534: stereochemistry in larger rings") {
REQUIRE(m->getBondWithIdx(4)->getStereo() == Bond::STEREOE);
smiles = MolToSmiles(*m, true);
REQUIRE(smiles == "C1=C/CCCOC/C=C/1");
CHECK(smiles == "C1=C/CCCOC/C=C/1");
delete m;
}
@@ -2127,8 +2125,8 @@ TEST_CASE("Issue 3139534: stereochemistry in larger rings") {
std::string smiles = "C1=C/OCC/C=C\\CC\\1";
m = SmilesToMol(smiles);
REQUIRE(m);
REQUIRE(m->getBondWithIdx(0)->getStereo() == Bond::STEREOZ);
REQUIRE(m->getBondWithIdx(5)->getStereo() == Bond::STEREOZ);
CHECK(m->getBondWithIdx(0)->getStereo() == Bond::STEREOZ);
CHECK(m->getBondWithIdx(5)->getStereo() == Bond::STEREOZ);
delete m;
}
@@ -2139,10 +2137,10 @@ TEST_CASE("Issue 3139534: stereochemistry in larger rings") {
REQUIRE(m);
smiles = MolToSmiles(*m, true, false, 7, false);
REQUIRE(smiles == "C1=C/NCCCCC/1");
CHECK(smiles == "C1=C/NCCCCC/1");
smiles = MolToSmiles(*m, true, false, 0, false);
REQUIRE(smiles == "C1CCCCN/C=C/1");
CHECK(smiles == "C1CCCCN/C=C/1");
delete m;
}
@@ -2158,7 +2156,7 @@ TEST_CASE("Issue 3139534: stereochemistry in larger rings") {
REQUIRE(m->getBondWithIdx(14)->getStereo() == Bond::STEREOE);
smiles = MolToSmiles(*m, true);
REQUIRE(smiles == R"(CCC[N+]1=C/c2ccccc2OC(=O)/C=C\1O)");
CHECK(smiles == R"(CCC[N+]1=C/c2ccccc2OC(=O)/C=C\1O)");
delete m;
@@ -2170,7 +2168,7 @@ TEST_CASE("Issue 3139534: stereochemistry in larger rings") {
REQUIRE(m->getBondWithIdx(14)->getStereo() == Bond::STEREOE);
smiles = MolToSmiles(*m, true);
REQUIRE(smiles == R"(CCC[N+]1=C/c2ccccc2OC(=O)/C=C\1O)");
CHECK(smiles == R"(CCC[N+]1=C/c2ccccc2OC(=O)/C=C\1O)");
delete m;
}
@@ -2188,7 +2186,7 @@ TEST_CASE("Issue 3139534: stereochemistry in larger rings") {
REQUIRE(m->getBondWithIdx(8)->getStereo() == Bond::STEREOZ);
smiles = MolToSmiles(*m, true);
REQUIRE(smiles == R"(COC1=C/C=C\C=C/C=N\1)");
CHECK(smiles == R"(COC1=C/C=C\C=C/C=N\1)");
delete m;
@@ -2202,7 +2200,7 @@ TEST_CASE("Issue 3139534: stereochemistry in larger rings") {
REQUIRE(m->getBondWithIdx(8)->getStereo() == Bond::STEREOZ);
smiles = MolToSmiles(*m, true);
REQUIRE(smiles == R"(COC1=C/C=C\C=C/C=N\1)");
CHECK(smiles == R"(COC1=C/C=C\C=C/C=N\1)");
delete m;
}
@@ -2228,7 +2226,7 @@ TEST_CASE("Issue 3139534: stereochemistry in larger rings") {
m2 = SmilesToMol(nsmiles);
REQUIRE(m2);
std::string ncsmiles = MolToSmiles(*m2, true);
REQUIRE(ncsmiles == csmiles);
CHECK(ncsmiles == csmiles);
delete m2;
}
delete m;
@@ -2251,7 +2249,7 @@ TEST_CASE("Issue 3139534: stereochemistry in larger rings") {
m2 = SmilesToMol(nsmiles);
REQUIRE(m2);
std::string ncsmiles = MolToSmiles(*m2, true);
REQUIRE(ncsmiles == csmiles);
CHECK(ncsmiles == csmiles);
delete m2;
}
delete m;
@@ -2274,7 +2272,7 @@ TEST_CASE("Issue 3139534: stereochemistry in larger rings") {
m2 = SmilesToMol(nsmiles);
REQUIRE(m2);
std::string ncsmiles = MolToSmiles(*m2, true);
REQUIRE(ncsmiles == csmiles);
CHECK(ncsmiles == csmiles);
delete m2;
}
delete m;
@@ -2297,7 +2295,7 @@ TEST_CASE("Issue 3139534: stereochemistry in larger rings") {
m2 = SmilesToMol(nsmiles);
REQUIRE(m2);
std::string ncsmiles = MolToSmiles(*m2, true);
REQUIRE(ncsmiles == csmiles);
CHECK(ncsmiles == csmiles);
delete m2;
}
delete m;
@@ -2313,7 +2311,7 @@ TEST_CASE("test adding atom-map information") {
// changed: smiles does not need to be canonical
smiles = MolToSmiles(*m, true, false, -1, false);
REQUIRE(smiles == "[*:1]CCC([C:200])C");
CHECK(smiles == "[*:1]CCC([C:200])C");
delete m;
}
@@ -3764,13 +3762,13 @@ TEST_CASE(
auto mol = "C=c1s/c2n(c1=O)CCCCCCC\\N=2"_smiles;
REQUIRE(mol);
auto smi = MolToSmiles(*mol);
REQUIRE(smi == "C=c1s/c2n(c1=O)CCCCCCC\\N=2");
CHECK(smi == "C=c1s/c2n(c1=O)CCCCCCC\\N=2");
}
{
auto mol = R"SMI(C1=C\C/C=C2C3=C/C/C=C\C=C/C\3C\2\C=C/1)SMI"_smiles;
REQUIRE(mol);
auto smi = MolToSmiles(*mol);
REQUIRE(smi == R"SMI(C1=C\C/C=C2C3=C\C/C=C\C=C/C/3C\2\C=C/1)SMI");
CHECK(smi == R"SMI(C1=C\C/C=C2\C3=C\C/C=C\C=C/C3C2\C=C/1)SMI");
}
}

View File

@@ -1042,9 +1042,10 @@ M END
// This should not throw an invariant violation
auto smiles = MolToSmiles(*m);
CHECK(
smiles ==
R"SMI(CC[C@@]1(C)/C2=C(C)/C3=[N]4->[CoH2+]56[N]2[C@H]([C@@H]1C)[C@]1(C)[N]->5=C(/C(C)=C2[N]->6=C(/C=C4/C(C)(C)[C@@H]3C)[C@@H](C)C\2(C)C)[C@@H](C)C1(C)C)SMI");
CHECK(smiles ==
(R"SMI(CC[C@@]1(C)/C2=C(\C)/C3=[N]4->[CoH2+]56[N]2[C@H]([C@@H]1C))SMI"
R"SMI([C@]1(C)[N]->5=C(/C(C)=C2[N]->6=C(/C=C4/C(C)(C)[C@@H]3C))SMI"
R"SMI([C@@H](C)C\2(C)C)[C@@H](C)C1(C)C)SMI"));
}
TEST_CASE("chiral presence and ranking") {
@@ -1168,51 +1169,51 @@ TEST_CASE("Canonicalization issues watch (see GitHub Issue #8775)") {
// first reported.
const static std::initializer_list<std::tuple<std::string, bool, bool>> samples = {
{R"smi(C/C=C\C=C(/C=C\C)C(/C=C\C)=C/C)smi", false, false}, // #8759
{R"smi(C/C=C\C=C(/C=C\C)C(/C=C\C)=C/C)smi", true, true}, // #8759
{R"smi(C1=C\CCCCCC/C=C/C=C/1)smi", true, true}, // #8759
{R"smi(O=C=NC1=CC2C3=C(C=C1)C2=C(N=C=O)C=C3)smi", false, false}, // #8721
{R"smi(O=C(c1ccccc1C(=O)N1C(=O)c2ccccc2C1=O)N1C(=O)c2ccccc2C1=O)smi",
false, false}, // #8721
{R"smi(O=[N+]([O-])c1cc/c2c(c1)=C(c1ccccc1)/N=c1\\ccc([N+](=O)[O-])cc1=C(c1ccccc1)/N=2)smi",
false, true}, // #8721
{R"smi(C=Cc1c(C)/c2[n-]c1=C=c1[n-]/c(c(CC)c1C)=C\\c1[n-]c3c(c1C)C(=O)[C@H](C(=O)OC)/C3=C1/[NH+]=C(/C=2)[C@@H](C)[C@@H]1CCC(=O)OC/C=C(\\C)CCC[C@H](C)CCC[C@H](C)CCCC(C)C.[Mg+2])smi",
false, true}, // #8721
{R"smi(O=[N+]([O-])c1cc/c2c(c1)=C(c1ccccc1)/N=c1\ccc([N+](=O)[O-])cc1=C(c1ccccc1)/N=2)smi",
true, true}, // #8721
{R"smi(C=Cc1c(C)/c2[n-]c1=C=c1[n-]/c(c(CC)c1C)=C\c1[n-]c3c(c1C)C(=O)[C@H](C(=O)OC)/C3=C1/[NH+]=C(/C=2)[C@@H](C)[C@@H]1CCC(=O)OC/C=C(\C)CCC[C@H](C)CCC[C@H](C)CCCC(C)C.[Mg+2])smi",
true, true}, // #8721
{R"smi(CC1=C(/C=C2\C(C)=C3/C(C)=C(/C=C4\C(C)=C5/C(CCC(=O)O)=C(C(C)=C5N4)C=C6\C(CCC(=O)O)=C(C)C(=C6N2)C=C1)N3)C=C(\C=C)C)smi",
false, false}, // #8089
{R"smi(COC1=N\C2=CC(=O)c3c(c(O)c(C)c4c3C(=O)C(C)(O/C=C/C(OC)C(C)C(OC(C)=O)C(C)C(O)C(C)C(O)C(C)/C=C\C=C/1C)O4)C2=O)smi",
false, false}, // #8089
{R"smi(CC1C2c3cc4/c5c6c7c8c9c%10c6c4c4c3C3=C6c%11c%12c%13c%14c%15c(c9c9c%16c8c(c8/c(c%17c%18c%19c(c(c%11c%11c%19c%19c%17c8c%16c(c%149)c%19c%13%11)C62)C1C%181C[N+](C)(C)C1)=C\C\C=5)C7)C%10C4C31C[N+](C)(C)CC%12%151)smi",
false, true}, // #8089
true, true}, // #8089
{R"smi(CC1=C\[C@H](C)C[C@@]2(C)CC[C@@H](O2)[C@@]23CC[C@@](C)(C[C@@H](O2)[C@H]2O[C@](C)(CC2=O)[C@@H](O)[C@@H]2CC[C@@]4(CCC[C@H](O4)[C@@H](C)C(=O)O[C@@H]4C[C@@H]([C@@]5(O)OCC[C@@H](C)[C@H]5O)O[C@@H]4/C=C/1)O2)O3)smi",
false, false}, // #8089
true, true}, // #8089
{R"smi(CC1=C/[C@H]2O[C@@H](C/C=C/C=C/C(=O)O[C@@H]3C[C@@H](/C=C/C/C=C/1)O[C@@H](C/C=C\CCO)[C@]3(C)CO)C[C@H](O)[C@H]2C)smi",
false, false}, // #8089
true, true}, // #8089
{R"smi(CC(=O)OCC1=C\CC/C(C)=C/CC[C@@]2(C)CC[C@@](C(C)C)(/C=C/1)O2)smi",
false, false}, // #8089
{R"smi(CC1=C\C/C=C(\C)CC[C@H]2C(C)(C)[C@@H](\C=C/1)CC[C@]2(C)O)smi",
false, true}, // #8089
true, true}, // #8089
{R"smi(CC1=C\C/C=C(\C)CC[C@H]2C(C)(C)[C@@H](\C=C/1)CC[C@]2(C)O)smi", true,
true}, // #8089
{R"smi(CC(=O)OCC1=C/[C@@H]2OC(=O)[C@H](C)[C@@]2(O)[C@@H](OC(C)=O)[C@H]2[C@]3(CC[C@H](OC(C)=O)[C@]2(C)[C@@H](OC(=O)COC(=O)CC(C)C)\C=C/1)CO3)smi",
false, false}, // #8089
true, true}, // #8089
{R"smi(CC(=O)OCC1=C/[C@@H]2OC(=O)[C@H](C)[C@@]2(O)[C@@H](OC(C)=O)[C@H]2[C@]3(CC[C@H](OC(C)=O)[C@]2(C)[C@@H](OC(C)=O)\C=C/1)CO3)smi",
false, false}, // #8089
true, true}, // #8089
{R"smi(C=Cc1c(C)/c2[nH]/c1=C\C1=N/C(=C\C3=C(C)C4C(=O)N(Cc5cccc(C#Cc6cccc(Nc7ncnc8cc(OCCOC)c(OCCOC)cc78)c6)c5)C(=O)/C(=C5/N=C(/C=2)[C@@H](C)[C@@H]5CCC(=O)OC)C4N3)C(CC)=C1C)smi",
false, true}, // #8089
true, true}, // #8089
{R"smi(CC1=C\C[C@H](O)/C=C/C(C)=C/[C@@H](NC(=O)[C@H](C)O)[C@]2(C)C(=O)O[C@H](C[C@H](O)/C=C/1)[C@@H](C)C2=O)smi",
false, false}, // #8089
true, true}, // #8089
{R"smi(CC1=C/[C@H]2O[C@@H](C/C=C/C=C/C(=O)O[C@@H]3C[C@@H](/C=C/C/C=C/1)O[C@@H](C/C=C\C[C@@H](O)C(=O)O)[C@]3(C)CO)C[C@H](O)[C@H]2C)smi",
false, false}, // #8089
true, true}, // #8089
{R"smi(c1ccc2/c3[nH]/c(c2c1)=N\c1ccc(cc1)-c1nc2cc(ccc2o1)/N=c1/[nH]/c(c2ccccc12)=N/c1ccc2nc(oc2c1)-c1ccc(cc1)/N=3)smi",
false, false}, // #8089
true, false}, // #8089
{R"smi(c1ccc2/c3[nH]/c(c2c1)=N\c1ccc(cc1)-c1nc2ccc(cc2o1)/N=c1/[nH]/c(c2ccccc12)=N/c1ccc2nc(oc2c1)-c1ccc(cc1)/N=3)smi",
false, false}, // #8089
true, false}, // #8089
{R"smi(CC1=C/[C@@H](C)C[C@]2(C)CC[C@H](O2)[C@]23CC[C@](C)(C[C@H](O2)[C@@H]2O[C@@](C)(CC2=O)[C@@H](O)[C@H]2CC[C@@]4(CCC[C@@H](O4)[C@H](C)C(=O)O[C@H]4C[C@H]([C@]5(O)OCC[C@H](C)[C@@H]5O)O[C@H]4\C=C/1)O2)O3)smi",
false, false}, // #8089
true, true}, // #8089
{R"smi(CC1=C/[C@@H](C)C[C@]2(C)CC[C@H](O2)[C@]23CC[C@](C(=O)O)(C[C@H](O2)[C@@H]2O[C@@](C)(CC2=O)[C@@H](O)[C@H]2CC[C@@]4(CCC[C@@H](O4)[C@H](C)C(=O)O[C@H]4C[C@H]([C@]5(O)OCC[C@H](C)[C@@H]5O)O[C@H]4\C=C/1)O2)O3)smi",
false, false}, // #8089
true, true}, // #8089
{R"smi(CC1=C/[C@@H](C)C[C@]2(C)CC[C@H](O2)[C@]23CC[C@](CO)(C[C@H](O2)[C@@H]2O[C@@](C)(CC2=O)[C@@H](O)[C@H]2CC[C@@]4(CCC[C@@H](O4)[C@H](C)C(=O)O[C@H]4C[C@H]([C@]5(O)OCC[C@H](C)[C@@H]5O)O[C@H]4\C=C/1)O2)O3)smi",
false, false}, // #8089
true, true}, // #8089
{R"smi(COC(=O)C1=C/c2cc3c(cc2-c2c(cc(OC)c(OC)c2OC)\C=C/1C(=O)OC)OCO3)smi",
false, false}, // #8089
true, true}, // #8089
{R"smi(N#C[P@@H]/C=C(/P=O)[P@@H]C#N)smi", true, true}, // #8089
{R"smi(C1=C\C/C=C(\C)CC[C@H]2C(C)(C)[C@@H](\C=C/1)CC[C@]2(C)O)smi", true,
true}, // #8089
@@ -1339,6 +1340,8 @@ TEST_CASE("Canonicalization issues watch (see GitHub Issue #8775)") {
{R"smi(O[C@H]1[C@H]2C[C@H](C2)[C@]12CC2)smi", false, true}, // #7759
{R"smi(N[C@]1(C(=O)O)[C@@H]2C[C@H]3C[C@@H](C2)C[C@H]1C3)smi", false,
true}, // #8862
{R"smi(C/C=C1/C(=C\Cl)/C(=C\F)/C(=C\N)/C/1=C\O)smi", false,
false}, // #8965
};
auto count_features = [](RWMol m) {

View File

@@ -9,6 +9,8 @@
//
#include <cstdlib>
#include <optional>
#include <ranges>
#include <catch2/catch_all.hpp>
@@ -6491,4 +6493,64 @@ M END
auto smimb = MolToSmiles(*mol2);
CHECK(smi == smimb);
}
}
}
TEST_CASE(
"GitHub Issue #8965: Stereo bond inversion in SMILES Writer canonicalization") {
const auto legacy_stereo = GENERATE(true, false);
CAPTURE(legacy_stereo);
UseLegacyStereoPerceptionFixture stereo_mode(legacy_stereo);
// These are all (non-canonical) iterations of mol in the issue
constexpr std::array<std::string_view, 4> samples = {
R"smi(C/C=C\C=C(/C=C\C)C(/C=C\C)=C/C)smi", // the reported SMILES
R"smi(C/C=C(\C=C/C)C(/C=C\C)=C/C=C\C)smi",
R"smi(C/C=C\C(=C/C)C(/C=C\C)=C/C=C\C)smi",
R"smi(C/C=C\C(=C/C=C\C)C(/C=C\C)=C/C)smi",
};
// These mol are small and simple enough for CIP calculation to not
// be too expensive.
// This is not exhaustive (we should check which bond is what),
// but it should be enough for this test.
auto get_bond_stereo_labels = [](auto &m) {
CIPLabeler::assignCIPLabels(m);
std::vector<std::string> bond_labels;
for (auto b : m.bonds()) {
std::string label;
if (b->getPropIfPresent(common_properties::_CIPCode, label)) {
bond_labels.push_back(label);
}
}
std::ranges::sort(bond_labels);
return bond_labels;
};
std::optional<std::vector<std::string>> labels_reference;
std::set<std::string> canonicalized_smiles;
for (auto smiles : samples) {
auto m =
v2::SmilesParse::MolFromSmiles(static_cast<const std::string>(smiles));
REQUIRE(m);
auto canonical_smiles = MolToSmiles(*m);
canonicalized_smiles.insert(canonical_smiles);
// Do this after getting the canonical SMILES, so that
// it doesn't have any chances to influence the canonicalization
if (!labels_reference.has_value()) {
labels_reference = get_bond_stereo_labels(*m);
} else {
CHECK(*labels_reference == get_bond_stereo_labels(*m));
}
}
// All samples should canonicalize to the same SMILES
REQUIRE(canonicalized_smiles.size() == 1);
// ... and the canonical SMILES should preserve stereo labels
auto m = v2::SmilesParse::MolFromSmiles(*canonicalized_smiles.begin());
REQUIRE(m);
CHECK(*labels_reference == get_bond_stereo_labels(*m));
}

View File

@@ -2371,7 +2371,6 @@ namespace details {
bool atomHasFourthValence(const Atom *atom);
bool hasSingleHQuery(const Atom::QUERYATOM_QUERY *q);
} // namespace details
void switchBondDir(Bond *bond);
} // namespace Canon
} // namespace RDKit
TEST_CASE("canon details") {
@@ -2394,17 +2393,6 @@ TEST_CASE("canon details") {
}
}
}
TEST_CASE("switchBondDir") {
auto m = "C/C=C/C"_smiles;
REQUIRE(m);
auto bond = m->getBondWithIdx(0);
CHECK(bond->getBondDir() == Bond::BondDir::ENDUPRIGHT);
Canon::switchBondDir(bond);
CHECK(bond->getBondDir() == Bond::BondDir::ENDDOWNRIGHT);
bond->setBondDir(Bond::BondDir::UNKNOWN);
Canon::switchBondDir(bond);
CHECK(bond->getBondDir() == Bond::BondDir::UNKNOWN);
}
#endif
TEST_CASE("allow 5 valent N/P/As to kekulize", "[kekulization]") {

View File

@@ -1570,12 +1570,11 @@ TEST_CASE("Testing Issue 183") {
REQUIRE(m2->getBondWithIdx(10)->getStereo() == Bond::STEREOZ);
refSmi = MolToSmiles(*m2, 1);
BOOST_LOG(rdInfoLog) << "ref: " << refSmi << std::endl;
CHECK(refSmi == R"SMI(C/C(F)=C(\C)C(=C(/C)Cl)/C(F)=C(/C)F)SMI");
m = SmilesToMol(refSmi);
REQUIRE(m);
smi = MolToSmiles(*m, 1);
BOOST_LOG(rdInfoLog) << "smi: " << smi << std::endl;
REQUIRE(refSmi == smi);
CHECK(refSmi == smi);
int nEs = 0, nZs = 0, nDbl = 0;
for (RWMol::BondIterator bondIt = m->beginBonds(); bondIt != m->endBonds();
@@ -3430,7 +3429,8 @@ TEST_CASE("Testing sf.net issue 2316677 : canonicalization error") {
REQUIRE(m);
std::string smi = MolToSmiles(*m, true);
std::cerr << "smi: " << smi << std::endl;
REQUIRE(smi == "Cc1ccc(S(=O)(=O)/N=C2\\CC(=N\\C(C)(C)C)/C2=N\\C(C)(C)C)cc1");
REQUIRE(smi ==
R"SMI(Cc1ccc(S(=O)(=O)\N=C2C/C(=N\C(C)(C)C)C/2=N\C(C)(C)C)cc1)SMI");
delete m;
}

View File

@@ -934,7 +934,7 @@ public class SmilesDetailsTests extends GraphMolTest {
assertEquals(Bond.BondStereo.STEREOZ, mol.getBondWithIdx(1).getStereo());
refSmi = mol.MolToSmiles(true, false, 0); // (1,0,0);
assertEquals("C(\\C)=N\\O", refSmi);
assertEquals("C(/C)=N/O", refSmi);
// make sure we can round-trip:
mol = RWMol.MolFromSmiles(refSmi);
@@ -1148,7 +1148,7 @@ public class SmilesDetailsTests extends GraphMolTest {
smi = mol.MolToSmiles(true, false, 1);
assertEquals("C(\\F)=N/Cl", smi);
assertEquals("C(/F)=N\\Cl", smi);
smi = "C(\\C=C\\F)=C(/Cl)Br";
mol = RWMol.MolFromSmiles(smi);