atropisomer bond atoms are checked explicitly for SP2 hybridization (#8974)

* atropisomer bond atoms are checked explicitly for SP2 hybridization

before, we checked only on totalDegree = 2 or 3,
but this causes false positives for something like a chiral sulfoxide
since the S is tetrahedral (sp3) but has only 3 substituents.
the hybridization code relies on totalDegree,
but modified to include and making sure to include conjugation
so while this is more expensive per molecule, it is closer to intent

Closes #8973

* assert we got no warnings on sulfinamides

* cleanup

* bring back degree based check, quick pass so that we don't run expensive calc on all mols with wedge bonds

* bring conformer back
This commit is contained in:
Yakov Pechersky
2025-12-29 12:04:40 -05:00
committed by Greg Landrum
parent 374d3a8143
commit edbb86d1b3
5 changed files with 155 additions and 4 deletions

View File

@@ -538,6 +538,13 @@ void detectAtropisomerChirality(ROMol &mol, const Conformer *conf) {
}
}
if (bondsToTry.empty()) {
return;
}
// First, do a simple check with TotalDegree to see if any bonds might be
// candidates before doing the expensive hybridization calculation.
bool anyBondPassesDegreeCheck = false;
for (auto bondToTry : bondsToTry) {
if (bondToTry->getBeginAtom()->needsUpdatePropertyCache()) {
bondToTry->getBeginAtom()->updatePropertyCache(false);
@@ -545,12 +552,48 @@ void detectAtropisomerChirality(ROMol &mol, const Conformer *conf) {
if (bondToTry->getEndAtom()->needsUpdatePropertyCache()) {
bondToTry->getEndAtom()->updatePropertyCache(false);
}
if (bondToTry->getBondType() == Bond::SINGLE &&
bondToTry->getStereo() != Bond::BondStereo::STEREOANY &&
bondToTry->getBeginAtom()->getTotalDegree() >= 2 &&
bondToTry->getBeginAtom()->getTotalDegree() <= 3 &&
bondToTry->getEndAtom()->getTotalDegree() >= 2 &&
bondToTry->getEndAtom()->getTotalDegree() <= 3) {
anyBondPassesDegreeCheck = true;
break;
}
}
if (!anyBondPassesDegreeCheck) {
return;
}
// defer cache update on the whole mol unless we actually have bonds to try
// we need to do an update on the whole mol and not just incident atoms
// because we need to calculate hybridization, which is non-local
bool needsUpdate =
mol.needsUpdatePropertyCache() ||
std::any_of(mol.atoms().begin(), mol.atoms().end(), [](const auto atom) {
return atom->getAtomicNum() != 0 &&
atom->getHybridization() == Atom::HybridizationType::UNSPECIFIED;
});
if (needsUpdate) {
mol.updatePropertyCache(false);
MolOps::setConjugation(mol);
MolOps::setHybridization(mol);
}
for (auto bondToTry : bondsToTry) {
if (bondToTry->getBondType() != Bond::SINGLE ||
bondToTry->getStereo() == Bond::BondStereo::STEREOANY ||
bondToTry->getBeginAtom()->getTotalDegree() < 2 ||
bondToTry->getEndAtom()->getTotalDegree() < 2 ||
bondToTry->getBeginAtom()->getTotalDegree() > 3 ||
bondToTry->getEndAtom()->getTotalDegree() > 3) {
// before, we checked only on totalDegree = 2 or 3,
// but this causes false positives for something like a chiral sulfoxide
// since the S is tetrahedral (sp3) but has only 3 substituents.
// the hybridization code relies on totalDegree,
// but modified to include and making sure to include conjugation
// so while this is more expensive per molecule, it is closer to intent
bondToTry->getBeginAtom()->getHybridization() != Atom::SP2 ||
bondToTry->getEndAtom()->getHybridization() != Atom::SP2) {
continue;
}

View File

@@ -27,6 +27,8 @@
#include <string>
#include <fstream>
#include <sstream>
#include <vector>
#include <boost/lexical_cast.hpp>
#include <filesystem>
using namespace RDKit;
@@ -567,6 +569,30 @@ void testLookForAtropisomersInSDdfFiles(std::string fileName,
TEST_ASSERT(notFoundCount == expectedMisses);
}
void testSulfinamideExamplesHaveNoAtropisomers() {
const std::vector<std::string> controlFiles = {
"sulfinamide-double-bond-O-R.mol",
"sulfinamide-single-bond-O-R.mol",
"sulfinamide-single-bond-O-S.mol",
};
std::string rdbase = getenv("RDBASE");
std::stringstream warningCapture;
rdWarningLog->SetTee(warningCapture);
for (const auto &file : controlFiles) {
auto fName = rdbase +
"/Code/GraphMol/FileParsers/test_data/atropisomers/" +
file;
BOOST_LOG(rdInfoLog) << "Validating absence of atropisomers in " << file
<< std::endl;
auto mol = std::unique_ptr<RWMol>(MolFileToMol(fName, true, false, false));
TEST_ASSERT(mol);
Atropisomers::detectAtropisomerChirality(*mol, &mol->getConformer());
TEST_ASSERT(!Atropisomers::doesMolHaveAtropisomers(*mol));
}
rdWarningLog->ClearTee();
TEST_ASSERT(warningCapture.str().empty());
}
int main(int argc, char *argv[]) {
(void)argc;
(void)argv;
@@ -589,6 +615,7 @@ int main(int argc, char *argv[]) {
molAtropTest.RunTests();
testLookForAtropisomersInSDdfFiles("TestMultInSDF.sdf", 1, 4);
testSulfinamideExamplesHaveNoAtropisomers();
return 0;
}

View File

@@ -0,0 +1,27 @@
sulfinamide double-bond-O-R control
ChemDraw11262512372D
0 0 0 0 0 0 V3000
M V30 BEGIN CTAB
M V30 COUNTS 8 7 0 0 1
M V30 BEGIN ATOM
M V30 1 C -1.071690 0.825057 0.000000 0
M V30 2 C -0.357230 0.412538 0.000000 0
M V30 3 N -0.357230 -0.412500 0.000000 0
M V30 4 S 0.357229 -0.825019 0.000000 0
M V30 5 O 0.357229 -1.650057 0.000000 0
M V30 6 C 1.071690 -0.412500 0.000000 0
M V30 7 C 0.357247 0.825029 0.000000 0
M V30 8 C -1.071668 1.650057 0.000000 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 1 2 3 CFG=1
M V30 3 1 3 4
M V30 4 2 4 5
M V30 5 1 4 6 CFG=1
M V30 6 1 2 7
M V30 7 1 1 8
M V30 END BOND
M V30 END CTAB
M END

View File

@@ -0,0 +1,27 @@
sulfinamide single-bond-O-R control
ChemDraw11262512372D
0 0 0 0 0 0 V3000
M V30 BEGIN CTAB
M V30 COUNTS 8 7 0 0 1
M V30 BEGIN ATOM
M V30 1 C -1.071690 0.825057 0.000000 0
M V30 2 C -0.357230 0.412538 0.000000 0
M V30 3 N -0.357230 -0.412500 0.000000 0
M V30 4 S 0.357229 -0.825019 0.000000 0 CHG=1
M V30 5 O 0.357229 -1.650057 0.000000 0 CHG=-1
M V30 6 C 1.071690 -0.412500 0.000000 0
M V30 7 C 0.357247 0.825029 0.000000 0
M V30 8 C -1.071668 1.650057 0.000000 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 1 2 3 CFG=1
M V30 3 1 3 4
M V30 4 1 4 5 CFG=1
M V30 5 1 4 6
M V30 6 1 2 7
M V30 7 1 1 8
M V30 END BOND
M V30 END CTAB
M END

View File

@@ -0,0 +1,27 @@
sulfinamide single-bond-O-S control
ChemDraw11262512382D
0 0 0 0 0 0 V3000
M V30 BEGIN CTAB
M V30 COUNTS 8 7 0 0 1
M V30 BEGIN ATOM
M V30 1 C -1.071690 0.825057 0.000000 0
M V30 2 C -0.357230 0.412538 0.000000 0
M V30 3 N -0.357230 -0.412500 0.000000 0
M V30 4 S 0.357229 -0.825019 0.000000 0 CHG=1
M V30 5 O 0.357229 -1.650057 0.000000 0 CHG=-1
M V30 6 C 1.071690 -0.412500 0.000000 0
M V30 7 C 0.357247 0.825029 0.000000 0
M V30 8 C -1.071668 1.650057 0.000000 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 1 2 3 CFG=1
M V30 3 1 3 4
M V30 4 1 4 5 CFG=3
M V30 5 1 4 6
M V30 6 1 2 7
M V30 7 1 1 8
M V30 END BOND
M V30 END CTAB
M END