Fix STEREOANY (wavy bond) loss during InChI roundtrip (#9315)

When converting molecules with wavy bonds (Bond::STEREOANY on double
bonds) through InChI and back, the stereo information was silently
dropped. This affected any workflow using InChI roundtrips for
canonicalization (e.g. with -SUU flag).

Two bugs in External/INCHI-API/inchi.cpp:

Reverse path (InchiToMol): The stereo0D processing loop skipped
INCHI_PARITY_UNDEFINED entries before they could reach the double bond
handler. The handler already had an else clause that correctly sets
Bond::STEREOANY, but it was never reached. Fix: only skip
INCHI_PARITY_NONE at the top level, and add a guard in the Tetrahedral
case to prevent UNDEFINED/UNKNOWN from incorrectly setting chirality.

Forward path (MolToInchi): STEREOANY double bonds were only handled by
collapsing the coordinates — InChI then produced no stereo annotation
under -SUU. Fix: also emit a stereo0D entry with INCHI_PARITY_UNKNOWN
parity so InChI's -SUU output correctly carries the "stereo unknown"
designation. StereoAtoms may be cleared for STEREOANY, so we locate
the two outer neighbors by iterating bonds.

New test testStereoAnyRoundtrip in External/INCHI-API/test.cpp covers
9 representative cases (Schiff base, oxime, cinnamic acid, chalcone,
crotonaldehyde, tamoxifen-like, retinal-like, plus two molecules with
a chiral center adjacent to the wavy bond).

Counts in rdkit/Chem/UnitTestInchi.py shift by 1 (689 same, 492
reasonable) because the new STEREOANY emission produces a more
accurate roundtrip for one entry in the test inventory.

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Rody Arantes
2026-06-02 08:41:00 -04:00
committed by GitHub
parent b04a861ae7
commit 5d9892575c
3 changed files with 126 additions and 9 deletions

View File

@@ -1439,8 +1439,7 @@ RWMol *InchiToMol(const std::string &inchi, ExtraInchiReturnValues &rv,
Chirality::assignAtomCIPRanks(*m, ranks);
for (unsigned int i = 0; i < numStereo0D; i++) {
inchi_Stereo0D *stereo0DPtr = inchiOutput.stereo0D + i;
if (stereo0DPtr->parity == INCHI_PARITY_NONE ||
stereo0DPtr->parity == INCHI_PARITY_UNDEFINED) {
if (stereo0DPtr->parity == INCHI_PARITY_NONE) {
continue;
}
switch (stereo0DPtr->type) {
@@ -1585,6 +1584,10 @@ RWMol *InchiToMol(const std::string &inchi, ExtraInchiReturnValues &rv,
break;
}
case INCHI_StereoType_Tetrahedral: {
if (stereo0DPtr->parity == INCHI_PARITY_UNDEFINED ||
stereo0DPtr->parity == INCHI_PARITY_UNKNOWN) {
break;
}
unsigned int c =
indexToAtomIndexMapping[stereo0DPtr->central_atom];
Atom *atom = m->getAtomWithIdx(c);
@@ -2034,14 +2037,44 @@ std::string MolToInchi(const ROMol &mol, ExtraInchiReturnValues &rv,
stereo0D.type = INCHI_StereoType_DoubleBond;
stereo0DEntries.push_back(stereo0D);
} else if (bond->getStereo() == Bond::STEREOANY) {
// have to treat STEREOANY separately because RDKit will clear out
// StereoAtoms information.
// Here we just change the coordinates of the two end atoms - to bring
// them really close - so that InChI will not try to infer stereobond
// info from coordinates.
// Collapse coordinates so InChI cannot infer stereo from geometry,
// and send a proper stereo0D with UNKNOWN parity so that -SUU
// produces the correct unknown annotation. StereoAtoms may be
// cleared for STEREOANY, so we find neighbors by iterating bonds.
inchiAtoms[atomIndex1].x = inchiAtoms[atomIndex2].x;
inchiAtoms[atomIndex1].y = inchiAtoms[atomIndex2].y;
inchiAtoms[atomIndex1].z = inchiAtoms[atomIndex2].z;
int leftNbr = -1;
int rightNbr = -1;
for (const auto &nbond : m->atomBonds(m->getAtomWithIdx(atomIndex1))) {
auto other = nbond->getOtherAtomIdx(atomIndex1);
if (other != static_cast<unsigned int>(atomIndex2)) {
leftNbr = other;
break;
}
}
for (const auto &nbond : m->atomBonds(m->getAtomWithIdx(atomIndex2))) {
auto other = nbond->getOtherAtomIdx(atomIndex2);
if (other != static_cast<unsigned int>(atomIndex1)) {
rightNbr = other;
break;
}
}
if (leftNbr >= 0 && rightNbr >= 0) {
inchi_Stereo0D stereo0D;
stereo0D.parity = INCHI_PARITY_UNKNOWN;
stereo0D.neighbor[0] = leftNbr;
stereo0D.neighbor[1] = atomIndex1;
stereo0D.neighbor[2] = atomIndex2;
stereo0D.neighbor[3] = rightNbr;
if (!m->getBondBetweenAtoms(stereo0D.neighbor[0],
stereo0D.neighbor[1])) {
std::swap(stereo0D.neighbor[0], stereo0D.neighbor[3]);
}
stereo0D.central_atom = NO_ATOM;
stereo0D.type = INCHI_StereoType_DoubleBond;
stereo0DEntries.push_back(stereo0D);
}
}
// number of bonds

View File

@@ -959,6 +959,89 @@ void testGithub8239() {
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
namespace {
void checkStereoAnyRoundtrip(const char *smiles, const char *desc) {
BOOST_LOG(rdInfoLog) << " " << desc << ": " << smiles << std::endl;
auto m = SmilesToMol(smiles);
TEST_ASSERT(m);
// verify STEREOANY is present on input
bool foundStereoAny = false;
for (const auto bond : m->bonds()) {
if (bond->getStereo() == Bond::STEREOANY) {
foundStereoAny = true;
break;
}
}
TEST_ASSERT(foundStereoAny);
// convert to InChI with -SUU (include unknown/undefined stereo) and back
ExtraInchiReturnValues tmp;
auto inchi = MolToInchi(*m, tmp, "-SUU");
TEST_ASSERT(!inchi.empty());
ExtraInchiReturnValues tmp2;
std::unique_ptr<ROMol> m2(InchiToMol(inchi, tmp2));
TEST_ASSERT(m2);
// verify STEREOANY survives the roundtrip
bool foundStereoAny2 = false;
for (const auto bond : m2->bonds()) {
if (bond->getStereo() == Bond::STEREOANY) {
foundStereoAny2 = true;
TEST_ASSERT(bond->getStereoAtoms().size() == 2);
break;
}
}
TEST_ASSERT(foundStereoAny2);
delete m;
}
} // namespace
void testStereoAnyRoundtrip() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdInfoLog)
<< "testing STEREOANY (wavy bond) InChI roundtrip" << std::endl;
// Schiff base with wavy C=N (original bug report molecule)
checkStereoAnyRoundtrip("CSC1=NSC(CC=NC2=CC=CC=C2)=C1C#N |w:8.8|",
"Schiff base C=N");
// Benzaldoxime: wavy C=N (common pharma motif)
checkStereoAnyRoundtrip("O/N=C/c1ccccc1 |w:1.1|", "benzaldoxime C=N");
// Cinnamic acid: wavy C=C in conjugated system
checkStereoAnyRoundtrip("OC(=O)/C=C/c1ccccc1 |w:3.3|",
"cinnamic acid C=C");
// Chalcone: wavy C=C between two aryl groups
checkStereoAnyRoundtrip("O=C(/C=C/c1ccccc1)c1ccccc1 |w:2.2|",
"chalcone C=C");
// Crotonaldehyde: simple wavy C=C
checkStereoAnyRoundtrip("C/C=C/C=O |w:1.1|", "crotonaldehyde C=C");
// Tamoxifen-like: wavy C=C in drug-like molecule
checkStereoAnyRoundtrip(
"CC/C(=C(/c1ccccc1)c1ccc(OCCN(C)C)cc1)c1ccccc1 |w:2.2|",
"tamoxifen-like C=C");
// Retinal-like: wavy C=C in polyene chain
checkStereoAnyRoundtrip(
"CC1=C(/C=C/C(C)=C/C=C/C(C)=C/C=O)C(C)(C)CCC1 |w:3.3|",
"retinal-like C=C");
// Chiral center adjacent to wavy C=C
checkStereoAnyRoundtrip("O=C(O)[C@@H](CC=Cc1ccccc1)N |w:4.4|",
"chiral + wavy C=C");
// Chiral center adjacent to wavy C=N oxime
checkStereoAnyRoundtrip("C[C@H](O)/C(=N/O)c1ccccc1 |w:3.3|",
"chiral + wavy oxime C=N");
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
//-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
//
//-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
@@ -985,4 +1068,5 @@ int main() {
testGithub5311();
testGithub8123();
testGithub8239();
testStereoAnyRoundtrip();
}