mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
atropisomer bond atoms are checked explicitly for SP2 hybridization (#8974)
* atropisomer bond atoms are checked explicitly for SP2 hybridization before, we checked only on totalDegree = 2 or 3, but this causes false positives for something like a chiral sulfoxide since the S is tetrahedral (sp3) but has only 3 substituents. the hybridization code relies on totalDegree, but modified to include and making sure to include conjugation so while this is more expensive per molecule, it is closer to intent Closes #8973 * assert we got no warnings on sulfinamides * cleanup * bring back degree based check, quick pass so that we don't run expensive calc on all mols with wedge bonds * bring conformer back
This commit is contained in:
committed by
Greg Landrum
parent
374d3a8143
commit
edbb86d1b3
@@ -538,6 +538,13 @@ void detectAtropisomerChirality(ROMol &mol, const Conformer *conf) {
|
||||
}
|
||||
}
|
||||
|
||||
if (bondsToTry.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// First, do a simple check with TotalDegree to see if any bonds might be
|
||||
// candidates before doing the expensive hybridization calculation.
|
||||
bool anyBondPassesDegreeCheck = false;
|
||||
for (auto bondToTry : bondsToTry) {
|
||||
if (bondToTry->getBeginAtom()->needsUpdatePropertyCache()) {
|
||||
bondToTry->getBeginAtom()->updatePropertyCache(false);
|
||||
@@ -545,12 +552,48 @@ void detectAtropisomerChirality(ROMol &mol, const Conformer *conf) {
|
||||
if (bondToTry->getEndAtom()->needsUpdatePropertyCache()) {
|
||||
bondToTry->getEndAtom()->updatePropertyCache(false);
|
||||
}
|
||||
|
||||
if (bondToTry->getBondType() == Bond::SINGLE &&
|
||||
bondToTry->getStereo() != Bond::BondStereo::STEREOANY &&
|
||||
bondToTry->getBeginAtom()->getTotalDegree() >= 2 &&
|
||||
bondToTry->getBeginAtom()->getTotalDegree() <= 3 &&
|
||||
bondToTry->getEndAtom()->getTotalDegree() >= 2 &&
|
||||
bondToTry->getEndAtom()->getTotalDegree() <= 3) {
|
||||
anyBondPassesDegreeCheck = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!anyBondPassesDegreeCheck) {
|
||||
return;
|
||||
}
|
||||
|
||||
// defer cache update on the whole mol unless we actually have bonds to try
|
||||
// we need to do an update on the whole mol and not just incident atoms
|
||||
// because we need to calculate hybridization, which is non-local
|
||||
bool needsUpdate =
|
||||
mol.needsUpdatePropertyCache() ||
|
||||
std::any_of(mol.atoms().begin(), mol.atoms().end(), [](const auto atom) {
|
||||
return atom->getAtomicNum() != 0 &&
|
||||
atom->getHybridization() == Atom::HybridizationType::UNSPECIFIED;
|
||||
});
|
||||
if (needsUpdate) {
|
||||
mol.updatePropertyCache(false);
|
||||
MolOps::setConjugation(mol);
|
||||
MolOps::setHybridization(mol);
|
||||
}
|
||||
|
||||
for (auto bondToTry : bondsToTry) {
|
||||
if (bondToTry->getBondType() != Bond::SINGLE ||
|
||||
bondToTry->getStereo() == Bond::BondStereo::STEREOANY ||
|
||||
bondToTry->getBeginAtom()->getTotalDegree() < 2 ||
|
||||
bondToTry->getEndAtom()->getTotalDegree() < 2 ||
|
||||
bondToTry->getBeginAtom()->getTotalDegree() > 3 ||
|
||||
bondToTry->getEndAtom()->getTotalDegree() > 3) {
|
||||
// before, we checked only on totalDegree = 2 or 3,
|
||||
// but this causes false positives for something like a chiral sulfoxide
|
||||
// since the S is tetrahedral (sp3) but has only 3 substituents.
|
||||
// the hybridization code relies on totalDegree,
|
||||
// but modified to include and making sure to include conjugation
|
||||
// so while this is more expensive per molecule, it is closer to intent
|
||||
bondToTry->getBeginAtom()->getHybridization() != Atom::SP2 ||
|
||||
bondToTry->getEndAtom()->getHybridization() != Atom::SP2) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@@ -27,6 +27,8 @@
|
||||
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <boost/lexical_cast.hpp>
|
||||
#include <filesystem>
|
||||
using namespace RDKit;
|
||||
@@ -567,6 +569,30 @@ void testLookForAtropisomersInSDdfFiles(std::string fileName,
|
||||
TEST_ASSERT(notFoundCount == expectedMisses);
|
||||
}
|
||||
|
||||
void testSulfinamideExamplesHaveNoAtropisomers() {
|
||||
const std::vector<std::string> controlFiles = {
|
||||
"sulfinamide-double-bond-O-R.mol",
|
||||
"sulfinamide-single-bond-O-R.mol",
|
||||
"sulfinamide-single-bond-O-S.mol",
|
||||
};
|
||||
std::string rdbase = getenv("RDBASE");
|
||||
std::stringstream warningCapture;
|
||||
rdWarningLog->SetTee(warningCapture);
|
||||
for (const auto &file : controlFiles) {
|
||||
auto fName = rdbase +
|
||||
"/Code/GraphMol/FileParsers/test_data/atropisomers/" +
|
||||
file;
|
||||
BOOST_LOG(rdInfoLog) << "Validating absence of atropisomers in " << file
|
||||
<< std::endl;
|
||||
auto mol = std::unique_ptr<RWMol>(MolFileToMol(fName, true, false, false));
|
||||
TEST_ASSERT(mol);
|
||||
Atropisomers::detectAtropisomerChirality(*mol, &mol->getConformer());
|
||||
TEST_ASSERT(!Atropisomers::doesMolHaveAtropisomers(*mol));
|
||||
}
|
||||
rdWarningLog->ClearTee();
|
||||
TEST_ASSERT(warningCapture.str().empty());
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
(void)argc;
|
||||
(void)argv;
|
||||
@@ -589,6 +615,7 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
molAtropTest.RunTests();
|
||||
testLookForAtropisomersInSDdfFiles("TestMultInSDF.sdf", 1, 4);
|
||||
testSulfinamideExamplesHaveNoAtropisomers();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
sulfinamide double-bond-O-R control
|
||||
ChemDraw11262512372D
|
||||
|
||||
0 0 0 0 0 0 V3000
|
||||
M V30 BEGIN CTAB
|
||||
M V30 COUNTS 8 7 0 0 1
|
||||
M V30 BEGIN ATOM
|
||||
M V30 1 C -1.071690 0.825057 0.000000 0
|
||||
M V30 2 C -0.357230 0.412538 0.000000 0
|
||||
M V30 3 N -0.357230 -0.412500 0.000000 0
|
||||
M V30 4 S 0.357229 -0.825019 0.000000 0
|
||||
M V30 5 O 0.357229 -1.650057 0.000000 0
|
||||
M V30 6 C 1.071690 -0.412500 0.000000 0
|
||||
M V30 7 C 0.357247 0.825029 0.000000 0
|
||||
M V30 8 C -1.071668 1.650057 0.000000 0
|
||||
M V30 END ATOM
|
||||
M V30 BEGIN BOND
|
||||
M V30 1 1 1 2
|
||||
M V30 2 1 2 3 CFG=1
|
||||
M V30 3 1 3 4
|
||||
M V30 4 2 4 5
|
||||
M V30 5 1 4 6 CFG=1
|
||||
M V30 6 1 2 7
|
||||
M V30 7 1 1 8
|
||||
M V30 END BOND
|
||||
M V30 END CTAB
|
||||
M END
|
||||
@@ -0,0 +1,27 @@
|
||||
sulfinamide single-bond-O-R control
|
||||
ChemDraw11262512372D
|
||||
|
||||
0 0 0 0 0 0 V3000
|
||||
M V30 BEGIN CTAB
|
||||
M V30 COUNTS 8 7 0 0 1
|
||||
M V30 BEGIN ATOM
|
||||
M V30 1 C -1.071690 0.825057 0.000000 0
|
||||
M V30 2 C -0.357230 0.412538 0.000000 0
|
||||
M V30 3 N -0.357230 -0.412500 0.000000 0
|
||||
M V30 4 S 0.357229 -0.825019 0.000000 0 CHG=1
|
||||
M V30 5 O 0.357229 -1.650057 0.000000 0 CHG=-1
|
||||
M V30 6 C 1.071690 -0.412500 0.000000 0
|
||||
M V30 7 C 0.357247 0.825029 0.000000 0
|
||||
M V30 8 C -1.071668 1.650057 0.000000 0
|
||||
M V30 END ATOM
|
||||
M V30 BEGIN BOND
|
||||
M V30 1 1 1 2
|
||||
M V30 2 1 2 3 CFG=1
|
||||
M V30 3 1 3 4
|
||||
M V30 4 1 4 5 CFG=1
|
||||
M V30 5 1 4 6
|
||||
M V30 6 1 2 7
|
||||
M V30 7 1 1 8
|
||||
M V30 END BOND
|
||||
M V30 END CTAB
|
||||
M END
|
||||
@@ -0,0 +1,27 @@
|
||||
sulfinamide single-bond-O-S control
|
||||
ChemDraw11262512382D
|
||||
|
||||
0 0 0 0 0 0 V3000
|
||||
M V30 BEGIN CTAB
|
||||
M V30 COUNTS 8 7 0 0 1
|
||||
M V30 BEGIN ATOM
|
||||
M V30 1 C -1.071690 0.825057 0.000000 0
|
||||
M V30 2 C -0.357230 0.412538 0.000000 0
|
||||
M V30 3 N -0.357230 -0.412500 0.000000 0
|
||||
M V30 4 S 0.357229 -0.825019 0.000000 0 CHG=1
|
||||
M V30 5 O 0.357229 -1.650057 0.000000 0 CHG=-1
|
||||
M V30 6 C 1.071690 -0.412500 0.000000 0
|
||||
M V30 7 C 0.357247 0.825029 0.000000 0
|
||||
M V30 8 C -1.071668 1.650057 0.000000 0
|
||||
M V30 END ATOM
|
||||
M V30 BEGIN BOND
|
||||
M V30 1 1 1 2
|
||||
M V30 2 1 2 3 CFG=1
|
||||
M V30 3 1 3 4
|
||||
M V30 4 1 4 5 CFG=3
|
||||
M V30 5 1 4 6
|
||||
M V30 6 1 2 7
|
||||
M V30 7 1 1 8
|
||||
M V30 END BOND
|
||||
M V30 END CTAB
|
||||
M END
|
||||
Reference in New Issue
Block a user