mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
Fix STEREOANY (wavy bond) loss during InChI roundtrip (#9315)
When converting molecules with wavy bonds (Bond::STEREOANY on double bonds) through InChI and back, the stereo information was silently dropped. This affected any workflow using InChI roundtrips for canonicalization (e.g. with -SUU flag). Two bugs in External/INCHI-API/inchi.cpp: Reverse path (InchiToMol): The stereo0D processing loop skipped INCHI_PARITY_UNDEFINED entries before they could reach the double bond handler. The handler already had an else clause that correctly sets Bond::STEREOANY, but it was never reached. Fix: only skip INCHI_PARITY_NONE at the top level, and add a guard in the Tetrahedral case to prevent UNDEFINED/UNKNOWN from incorrectly setting chirality. Forward path (MolToInchi): STEREOANY double bonds were only handled by collapsing the coordinates — InChI then produced no stereo annotation under -SUU. Fix: also emit a stereo0D entry with INCHI_PARITY_UNKNOWN parity so InChI's -SUU output correctly carries the "stereo unknown" designation. StereoAtoms may be cleared for STEREOANY, so we locate the two outer neighbors by iterating bonds. New test testStereoAnyRoundtrip in External/INCHI-API/test.cpp covers 9 representative cases (Schiff base, oxime, cinnamic acid, chalcone, crotonaldehyde, tamoxifen-like, retinal-like, plus two molecules with a chiral center adjacent to the wavy bond). Counts in rdkit/Chem/UnitTestInchi.py shift by 1 (689 same, 492 reasonable) because the new STEREOANY emission produces a more accurate roundtrip for one entry in the test inventory. Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
47
External/INCHI-API/inchi.cpp
vendored
47
External/INCHI-API/inchi.cpp
vendored
@@ -1439,8 +1439,7 @@ RWMol *InchiToMol(const std::string &inchi, ExtraInchiReturnValues &rv,
|
||||
Chirality::assignAtomCIPRanks(*m, ranks);
|
||||
for (unsigned int i = 0; i < numStereo0D; i++) {
|
||||
inchi_Stereo0D *stereo0DPtr = inchiOutput.stereo0D + i;
|
||||
if (stereo0DPtr->parity == INCHI_PARITY_NONE ||
|
||||
stereo0DPtr->parity == INCHI_PARITY_UNDEFINED) {
|
||||
if (stereo0DPtr->parity == INCHI_PARITY_NONE) {
|
||||
continue;
|
||||
}
|
||||
switch (stereo0DPtr->type) {
|
||||
@@ -1585,6 +1584,10 @@ RWMol *InchiToMol(const std::string &inchi, ExtraInchiReturnValues &rv,
|
||||
break;
|
||||
}
|
||||
case INCHI_StereoType_Tetrahedral: {
|
||||
if (stereo0DPtr->parity == INCHI_PARITY_UNDEFINED ||
|
||||
stereo0DPtr->parity == INCHI_PARITY_UNKNOWN) {
|
||||
break;
|
||||
}
|
||||
unsigned int c =
|
||||
indexToAtomIndexMapping[stereo0DPtr->central_atom];
|
||||
Atom *atom = m->getAtomWithIdx(c);
|
||||
@@ -2034,14 +2037,44 @@ std::string MolToInchi(const ROMol &mol, ExtraInchiReturnValues &rv,
|
||||
stereo0D.type = INCHI_StereoType_DoubleBond;
|
||||
stereo0DEntries.push_back(stereo0D);
|
||||
} else if (bond->getStereo() == Bond::STEREOANY) {
|
||||
// have to treat STEREOANY separately because RDKit will clear out
|
||||
// StereoAtoms information.
|
||||
// Here we just change the coordinates of the two end atoms - to bring
|
||||
// them really close - so that InChI will not try to infer stereobond
|
||||
// info from coordinates.
|
||||
// Collapse coordinates so InChI cannot infer stereo from geometry,
|
||||
// and send a proper stereo0D with UNKNOWN parity so that -SUU
|
||||
// produces the correct unknown annotation. StereoAtoms may be
|
||||
// cleared for STEREOANY, so we find neighbors by iterating bonds.
|
||||
inchiAtoms[atomIndex1].x = inchiAtoms[atomIndex2].x;
|
||||
inchiAtoms[atomIndex1].y = inchiAtoms[atomIndex2].y;
|
||||
inchiAtoms[atomIndex1].z = inchiAtoms[atomIndex2].z;
|
||||
int leftNbr = -1;
|
||||
int rightNbr = -1;
|
||||
for (const auto &nbond : m->atomBonds(m->getAtomWithIdx(atomIndex1))) {
|
||||
auto other = nbond->getOtherAtomIdx(atomIndex1);
|
||||
if (other != static_cast<unsigned int>(atomIndex2)) {
|
||||
leftNbr = other;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (const auto &nbond : m->atomBonds(m->getAtomWithIdx(atomIndex2))) {
|
||||
auto other = nbond->getOtherAtomIdx(atomIndex2);
|
||||
if (other != static_cast<unsigned int>(atomIndex1)) {
|
||||
rightNbr = other;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (leftNbr >= 0 && rightNbr >= 0) {
|
||||
inchi_Stereo0D stereo0D;
|
||||
stereo0D.parity = INCHI_PARITY_UNKNOWN;
|
||||
stereo0D.neighbor[0] = leftNbr;
|
||||
stereo0D.neighbor[1] = atomIndex1;
|
||||
stereo0D.neighbor[2] = atomIndex2;
|
||||
stereo0D.neighbor[3] = rightNbr;
|
||||
if (!m->getBondBetweenAtoms(stereo0D.neighbor[0],
|
||||
stereo0D.neighbor[1])) {
|
||||
std::swap(stereo0D.neighbor[0], stereo0D.neighbor[3]);
|
||||
}
|
||||
stereo0D.central_atom = NO_ATOM;
|
||||
stereo0D.type = INCHI_StereoType_DoubleBond;
|
||||
stereo0DEntries.push_back(stereo0D);
|
||||
}
|
||||
}
|
||||
|
||||
// number of bonds
|
||||
|
||||
84
External/INCHI-API/test.cpp
vendored
84
External/INCHI-API/test.cpp
vendored
@@ -959,6 +959,89 @@ void testGithub8239() {
|
||||
BOOST_LOG(rdInfoLog) << "done" << std::endl;
|
||||
}
|
||||
|
||||
namespace {
|
||||
void checkStereoAnyRoundtrip(const char *smiles, const char *desc) {
|
||||
BOOST_LOG(rdInfoLog) << " " << desc << ": " << smiles << std::endl;
|
||||
auto m = SmilesToMol(smiles);
|
||||
TEST_ASSERT(m);
|
||||
|
||||
// verify STEREOANY is present on input
|
||||
bool foundStereoAny = false;
|
||||
for (const auto bond : m->bonds()) {
|
||||
if (bond->getStereo() == Bond::STEREOANY) {
|
||||
foundStereoAny = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
TEST_ASSERT(foundStereoAny);
|
||||
|
||||
// convert to InChI with -SUU (include unknown/undefined stereo) and back
|
||||
ExtraInchiReturnValues tmp;
|
||||
auto inchi = MolToInchi(*m, tmp, "-SUU");
|
||||
TEST_ASSERT(!inchi.empty());
|
||||
|
||||
ExtraInchiReturnValues tmp2;
|
||||
std::unique_ptr<ROMol> m2(InchiToMol(inchi, tmp2));
|
||||
TEST_ASSERT(m2);
|
||||
|
||||
// verify STEREOANY survives the roundtrip
|
||||
bool foundStereoAny2 = false;
|
||||
for (const auto bond : m2->bonds()) {
|
||||
if (bond->getStereo() == Bond::STEREOANY) {
|
||||
foundStereoAny2 = true;
|
||||
TEST_ASSERT(bond->getStereoAtoms().size() == 2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
TEST_ASSERT(foundStereoAny2);
|
||||
delete m;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void testStereoAnyRoundtrip() {
|
||||
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
|
||||
BOOST_LOG(rdInfoLog)
|
||||
<< "testing STEREOANY (wavy bond) InChI roundtrip" << std::endl;
|
||||
|
||||
// Schiff base with wavy C=N (original bug report molecule)
|
||||
checkStereoAnyRoundtrip("CSC1=NSC(CC=NC2=CC=CC=C2)=C1C#N |w:8.8|",
|
||||
"Schiff base C=N");
|
||||
|
||||
// Benzaldoxime: wavy C=N (common pharma motif)
|
||||
checkStereoAnyRoundtrip("O/N=C/c1ccccc1 |w:1.1|", "benzaldoxime C=N");
|
||||
|
||||
// Cinnamic acid: wavy C=C in conjugated system
|
||||
checkStereoAnyRoundtrip("OC(=O)/C=C/c1ccccc1 |w:3.3|",
|
||||
"cinnamic acid C=C");
|
||||
|
||||
// Chalcone: wavy C=C between two aryl groups
|
||||
checkStereoAnyRoundtrip("O=C(/C=C/c1ccccc1)c1ccccc1 |w:2.2|",
|
||||
"chalcone C=C");
|
||||
|
||||
// Crotonaldehyde: simple wavy C=C
|
||||
checkStereoAnyRoundtrip("C/C=C/C=O |w:1.1|", "crotonaldehyde C=C");
|
||||
|
||||
// Tamoxifen-like: wavy C=C in drug-like molecule
|
||||
checkStereoAnyRoundtrip(
|
||||
"CC/C(=C(/c1ccccc1)c1ccc(OCCN(C)C)cc1)c1ccccc1 |w:2.2|",
|
||||
"tamoxifen-like C=C");
|
||||
|
||||
// Retinal-like: wavy C=C in polyene chain
|
||||
checkStereoAnyRoundtrip(
|
||||
"CC1=C(/C=C/C(C)=C/C=C/C(C)=C/C=O)C(C)(C)CCC1 |w:3.3|",
|
||||
"retinal-like C=C");
|
||||
|
||||
// Chiral center adjacent to wavy C=C
|
||||
checkStereoAnyRoundtrip("O=C(O)[C@@H](CC=Cc1ccccc1)N |w:4.4|",
|
||||
"chiral + wavy C=C");
|
||||
|
||||
// Chiral center adjacent to wavy C=N oxime
|
||||
checkStereoAnyRoundtrip("C[C@H](O)/C(=N/O)c1ccccc1 |w:3.3|",
|
||||
"chiral + wavy oxime C=N");
|
||||
|
||||
BOOST_LOG(rdInfoLog) << "done" << std::endl;
|
||||
}
|
||||
|
||||
//-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
|
||||
//
|
||||
//-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
|
||||
@@ -985,4 +1068,5 @@ int main() {
|
||||
testGithub5311();
|
||||
testGithub8123();
|
||||
testGithub8239();
|
||||
testStereoAnyRoundtrip();
|
||||
}
|
||||
|
||||
@@ -255,9 +255,9 @@ class TestCase(unittest.TestCase):
|
||||
same += 1
|
||||
fmt = "\n{0}InChI read Summary: {1} identical, {2} variance, {3} reasonable variance{4}"
|
||||
print(fmt.format(COLOR_GREEN, same, diff, reasonable, COLOR_RESET))
|
||||
self.assertEqual(same, 688)
|
||||
self.assertEqual(same, 689)
|
||||
self.assertEqual(diff, 0)
|
||||
self.assertEqual(reasonable, 493)
|
||||
self.assertEqual(reasonable, 492)
|
||||
|
||||
def test2InchiOptions(self):
|
||||
m = MolFromSmiles("CC=C(N)C")
|
||||
|
||||
Reference in New Issue
Block a user