diff --git a/Code/GraphMol/Atropisomers.cpp b/Code/GraphMol/Atropisomers.cpp index 18b9e1658..ad2c357eb 100644 --- a/Code/GraphMol/Atropisomers.cpp +++ b/Code/GraphMol/Atropisomers.cpp @@ -108,6 +108,46 @@ bool getBondFrameOfReference(const Bond *bond, const Conformer *conf, return true; } +Bond::BondDir getBondDirForAtropisomerNoConf(Bond::BondStereo bondStereo, + unsigned int whichEnd, + unsigned int whichBond) { + // the convention is that in the absence of coords, the coordiates are choosen + // with the lowest numbered atom of the atrop bond down, and the other atom + // straight up. + // On each end, the lowest numbered connecting atom is on the left + // + // a b + // \ / + // c + // | + // d + // / \ aaa + // e f + // + // where c > d + // a < b + // e < f + + PRECONDITION(whichEnd <= 1, "whichEnd must be 0 or 1"); + PRECONDITION(whichBond <= 1, "whichBond must be 0 or 1"); + PRECONDITION(bondStereo == Bond::BondStereo::STEREOATROPCW || + bondStereo == Bond::BondStereo::STEREOATROPCCW, + "bondStereo must be BondAtropisomerCW or BondAtropisomerCCW"); + + int flips = 0; + if (bondStereo == Bond::BondStereo::STEREOATROPCW) { + ++flips; + } + if (whichBond == 1) { + ++flips; + } + if (whichEnd == 1) { + ++flips; + } + + return flips % 2 ? Bond::BEGINDASH : Bond::BEGINWEDGE; +} + Bond::BondDir getBondDirForAtropisomer2d(RDGeom::Point3D bondVecs[2], Bond::BondStereo bondStereo, unsigned int whichEnd, @@ -129,12 +169,13 @@ Bond::BondDir getBondDirForAtropisomer2d(RDGeom::Point3D bondVecs[2], ++flips; } if (bondVecs[1 - whichEnd].y < 0) { - ++flips; // if the OTHER end if negative for the low index bond vec, it + ++flips; // if the OTHER end is negative for the low index bond vec, it // is a flip } return flips % 2 ? Bond::BEGINWEDGE : Bond::BEGINDASH; } + Bond::BondDir getBondDirForAtropisomer3d(Bond *whichBond, const Conformer *conf) { // for 3D we mark it as wedge or hash depending on the z-value of the bond @@ -204,6 +245,44 @@ bool getAtropIsomerEndVect(const AtropAtomAndBondVec &atomAndBondVec, return true; } +std::pair getBondDir( + const Bond *bond, const AtropAtomAndBondVec &atomAndBondVec) { + // get the wedge dir for this end of the bond + // if the first bond 1 has a bondDir, use it + // if the second bond has a bond dir use the opposite of if + // if both bonds have a dir, make sure they are different + + auto bond1Dir = atomAndBondVec.second[0]->getBondDir(); + if (bond1Dir != Bond::BEGINWEDGE && bond1Dir != Bond::BEGINDASH) { + bond1Dir = Bond::NONE; // we dont care if it any thing else + } + auto bond2Dir = atomAndBondVec.second.size() == 2 + ? atomAndBondVec.second[1]->getBondDir() + : Bond::NONE; + if (bond2Dir != Bond::BEGINWEDGE && bond2Dir != Bond::BEGINDASH) { + bond2Dir = Bond::NONE; + } + + // if both are set to a direction, they must NOT be the same - one + // must be a dash and the other a hash + + if (bond1Dir != Bond::NONE && bond2Dir != Bond::NONE && + bond1Dir == bond2Dir) { + BOOST_LOG(rdWarningLog) + << "The bonds on one end of an atropisomer are both UP or both DOWN - atoms are: " + << bond->getBeginAtomIdx() << " " << bond->getEndAtomIdx() << std::endl; + return {false, Bond::BondDir::NONE}; + } + + if (bond1Dir == Bond::BEGINWEDGE || bond2Dir == Bond::BEGINDASH) { + return {true, Bond::BondDir::BEGINWEDGE}; + } + if (bond1Dir == Bond::BEGINDASH || bond2Dir == Bond::BEGINWEDGE) { + return {true, Bond::BondDir::BEGINDASH}; + } + return {true, Bond::BondDir::NONE}; +} + bool DetectAtropisomerChiralityOneBond(Bond *bond, ROMol &mol, const Conformer *conf) { // the approach is this: @@ -217,11 +296,11 @@ bool DetectAtropisomerChiralityOneBond(Bond *bond, ROMol &mol, // atrop bond. for each end of the main bond, we find a vector to reprent // the neighbor atom with the smallest index as its projection onto the // x=0 plane. - // (In 2d, this projection is on the y-AXIS for the end that does NOT have a - // wedge/hash bond, and on the z axis - out of the plane - for the end that - // does have a wedge/hash). The chirality is recorded as the direction we - // rotate from, atom 1's projection to atom2's proejection - either - // clockwise or counter clockwise + // (In 2d, this projection is on the y-AXIS for the end that does NOT have + // a wedge/hash bond, and on the z axis - out of the plane - for the end + // that does have a wedge/hash). The chirality is recorded as the + // direction we rotate from, atom 1's projection to atom2's proejection - + // either clockwise or counter clockwise PRECONDITION(bond, "bad bond"); @@ -241,6 +320,52 @@ bool DetectAtropisomerChiralityOneBond(Bond *bond, ROMol &mol, } } + // the convention is that in the absence of coords, the coordiates are choosen + // with the lowest numbered atom of the atrop bond down, and the other atom + // straight up. + // On each end, the lowest numbered connecting atom is on the left + // + // a b + // \ / + // c + // | + // d + // / \ aaa + // e f + // + // where c > d + // a < b + // e < f + + if (conf == nullptr) { + std::pair bond1DirResult; + bond1DirResult = getBondDir(bond, atomAndBondVecs[0]); + if (!bond1DirResult.first) { + return false; + } + std::pair bond2DirResult; + bond2DirResult = getBondDir(bond, atomAndBondVecs[1]); + if (!bond2DirResult.first) { + return false; + } + if (bond1DirResult.second == bond2DirResult.second) { + BOOST_LOG(rdWarningLog) + << "inconsistent bond wedging for an atropisomer. Atoms are: " + << bond->getBeginAtomIdx() << " " << bond->getEndAtomIdx() + << std::endl; + return false; + } + if (bond1DirResult.second == Bond::BEGINWEDGE || + bond2DirResult.second == Bond::BEGINDASH) { + bond->setStereo(Bond::BondStereo::STEREOATROPCCW); + } else if (bond1DirResult.second == Bond::BEGINDASH || + bond2DirResult.second == Bond::BEGINWEDGE) { + bond->setStereo(Bond::BondStereo::STEREOATROPCW); + } + + return true; + } + // create a frame of reference that has its X-axis along the atrop bond RDGeom::Point3D xAxis, yAxis, zAxis; @@ -265,27 +390,10 @@ bool DetectAtropisomerChiralityOneBond(Bond *bond, ROMol &mol, // if the second bond has a bond dir use the opposite of if // if both bonds have a dir, make sure they are different - auto bond1Dir = atomAndBondVecs[bondAtomIndex].second[0]->getBondDir(); - if (bond1Dir != Bond::BEGINWEDGE && bond1Dir != Bond::BEGINDASH) { - bond1Dir = Bond::NONE; // we dont care if it any thing else - } - auto bond2Dir = - atomAndBondVecs[bondAtomIndex].second.size() == 2 - ? atomAndBondVecs[bondAtomIndex].second[1]->getBondDir() - : Bond::NONE; - if (bond2Dir != Bond::BEGINWEDGE && bond2Dir != Bond::BEGINDASH) { - bond2Dir = Bond::NONE; - } + std::pair bondDirResult; - // if both are set to a direction, they must NOT be the same - one - // must be a dash and the other a hash - - if (bond1Dir != Bond::NONE && bond2Dir != Bond::NONE && - bond1Dir == bond2Dir) { - BOOST_LOG(rdWarningLog) - << "The bonds on one end of an atropisomer are both UP or both DOWN - atoms are: " - << bond->getBeginAtomIdx() << " " << bond->getEndAtomIdx() - << std::endl; + bondDirResult = getBondDir(bond, atomAndBondVecs[bondAtomIndex]); + if (!bondDirResult.first) { return false; } @@ -294,10 +402,10 @@ bool DetectAtropisomerChiralityOneBond(Bond *bond, ROMol &mol, return false; } - if (bond1Dir == Bond::BEGINWEDGE || bond2Dir == Bond::BEGINDASH) { + if (bondDirResult.second == Bond::BEGINWEDGE) { bondVecs[bondAtomIndex].y *= 0.707; bondVecs[bondAtomIndex].z = fabs(bondVecs[bondAtomIndex].y); - } else if (bond1Dir == Bond::BEGINDASH || bond2Dir == Bond::BEGINWEDGE) { + } else if (bondDirResult.second == Bond::BEGINDASH) { bondVecs[bondAtomIndex].y *= 0.707; bondVecs[bondAtomIndex].z = -fabs(bondVecs[bondAtomIndex].y); } @@ -410,8 +518,7 @@ void cleanupAtropisomerStereoGroups(ROMol &mol) { } void detectAtropisomerChirality(ROMol &mol, const Conformer *conf) { - PRECONDITION(conf, "no conformer"); - PRECONDITION(&(conf->getOwningMol()) == &mol, + PRECONDITION(conf == nullptr || &(conf->getOwningMol()) == &mol, "conformer does not belong to molecule"); std::set bondsToTry; @@ -492,6 +599,169 @@ void getAllAtomIdsForStereoGroup( } } +bool WedgeBondFromAtropisomerOneBondNoConf( + Bond *bond, const ROMol &mol, + std::map> + &wedgeBonds) { + PRECONDITION(bond, "no bond"); + + AtropAtomAndBondVec atomAndBondVecs[2]; + if (!getAtropisomerAtomsAndBonds(bond, atomAndBondVecs, mol)) { + return false; // not an atropisomer + } + + // make sure we do not have wiggle bonds + + for (auto atomAndBondVec : atomAndBondVecs) { + for (auto endBond : atomAndBondVec.second) { + if (endBond->getBondDir() == Bond::UNKNOWN) { + return false; // not an atropisomer) + } + } + } + + // first see if any candidate bond is already set to a wedge or hash + // if so, we will use that bond as a wedge or hash + + std::vector useBondsAtEnd[2]; + bool foundBondDir = false; + + for (unsigned int whichEnd = 0; whichEnd < 2; ++whichEnd) { + for (unsigned int whichBond = 0; + whichBond < atomAndBondVecs[whichEnd].second.size(); ++whichBond) { + auto bondDir = atomAndBondVecs[whichEnd].second[whichBond]->getBondDir(); + + // see if it is a wedge or hash and its origin is the atom in the + // main bond + + if ((bondDir == Bond::BEGINWEDGE || bondDir == Bond::BEGINDASH) && + atomAndBondVecs[whichEnd].second[whichBond]->getBeginAtom() == + atomAndBondVecs[whichEnd].first && + canHaveDirection(*bond)) { + useBondsAtEnd[whichEnd].push_back(whichBond); + foundBondDir = true; + } + } + } + + if (foundBondDir) { + for (unsigned int whichEnd = 0; whichEnd < 2; ++whichEnd) { + for (unsigned int whichBondIndex = 0; + whichBondIndex < useBondsAtEnd[whichEnd].size(); ++whichBondIndex) { + atomAndBondVecs[whichEnd] + .second[useBondsAtEnd[whichEnd][whichBondIndex]] + ->setBondDir(getBondDirForAtropisomerNoConf( + bond->getStereo(), whichEnd, + useBondsAtEnd[whichEnd][whichBondIndex])); + } + } + + return true; + } + + // did not find a good bond dir - pick one to use + // we would like to have one that is not in a ring, and will be a wedge + + const RingInfo *ri = bond->getOwningMol().getRingInfo(); + + int bestBondEnd = -1, bestBondNumber = -1; + bool bestBondIsSingle = false; + unsigned int bestRingCount = INT_MAX; + Bond::BondDir bestBondDir = Bond::BondDir::NONE; + for (unsigned int whichEnd = 0; whichEnd < 2; ++whichEnd) { + for (unsigned int whichBond = 0; + whichBond < atomAndBondVecs[whichEnd].second.size(); ++whichBond) { + auto bondToTry = atomAndBondVecs[whichEnd].second[whichBond]; + + if (!canHaveDirection(*bondToTry) || + wedgeBonds.find(bondToTry->getIdx()) != wedgeBonds.end()) { + continue; // must be a single OR aromatic bond and not already + // spoken for by a chiral center + } + + if (bondToTry->getBondDir() != Bond::BondDir::NONE) { + if (bondToTry->getBeginAtom()->getIdx() == + atomAndBondVecs[whichEnd].first->getIdx()) { + BOOST_LOG(rdWarningLog) + << "Wedge or hash bond found on atropisomer where not expected - atoms are: " + << bond->getBeginAtomIdx() << " " << bond->getEndAtomIdx() + << std::endl; + return false; + } else { + continue; // wedge or hash bond affecting the OTHER atom + // = perhaps a chiral center + } + } + auto ringCount = ri->numBondRings(bondToTry->getIdx()); + if (ringCount > bestRingCount) { + continue; + } + + else if (ringCount < bestRingCount) { + bestBondEnd = whichEnd; + bestBondNumber = whichBond; + bestRingCount = ringCount; + bestBondIsSingle = (bondToTry->getBondType() == Bond::BondType::SINGLE); + bestBondDir = getBondDirForAtropisomerNoConf(bond->getStereo(), + whichEnd, whichBond); + } else if (bestBondIsSingle && + bondToTry->getBondType() != Bond::BondType::SINGLE) { + continue; + + } else if (!bestBondIsSingle && + bondToTry->getBondType() == Bond::BondType::SINGLE) { + bestBondEnd = whichEnd; + bestBondNumber = whichBond; + bestRingCount = ringCount; + bestBondIsSingle = true; + bestBondDir = getBondDirForAtropisomerNoConf(bond->getStereo(), + whichEnd, whichBond); + + } else { + auto bondDir = getBondDirForAtropisomerNoConf(bond->getStereo(), + whichEnd, whichBond); + if (bestBondDir == Bond::BondDir::NONE || + (bestBondDir == Bond::BondDir::BEGINDASH && + bondDir == Bond::BondDir::BEGINWEDGE)) { + bestBondEnd = whichEnd; + bestBondNumber = whichBond; + bestRingCount = ringCount; + bestBondIsSingle = + (bondToTry->getBondType() == Bond::BondType::SINGLE); + bestBondDir = bondDir; + } + } + } + } + + if (bestBondEnd >= 0) // we found a good one + { + // make sure the atoms on the bond are in the right order for the + // wedge/hash the atom on the end of the main bond must be listed + // first for the wedge/has bond + + auto bestBond = atomAndBondVecs[bestBondEnd].second[bestBondNumber]; + if (bestBond->getBeginAtom() != atomAndBondVecs[bestBondEnd].first) { + bestBond->setEndAtom(bestBond->getBeginAtom()); + bestBond->setBeginAtom(atomAndBondVecs[bestBondEnd].first); + } + + bestBond->setBondDir(bestBondDir); + + auto newWedgeInfo = std::unique_ptr( + new RDKit::Chirality::WedgeInfoAtropisomer(bond->getIdx(), + bestBondDir)); + wedgeBonds[bestBond->getIdx()] = std::move(newWedgeInfo); + } else { + BOOST_LOG(rdWarningLog) + << "Failed to find a good bond to set as UP or DOWN for an atropisomer - atoms are: " + << bond->getBeginAtomIdx() << " " << bond->getEndAtomIdx() << std::endl; + return false; + } + + return true; +} + bool WedgeBondFromAtropisomerOneBond2d( Bond *bond, const ROMol &mol, const Conformer *conf, std::map> @@ -605,8 +875,8 @@ bool WedgeBondFromAtropisomerOneBond2d( if (!canHaveDirection(*bondToTry) || wedgeBonds.find(bondToTry->getIdx()) != wedgeBonds.end()) { - continue; // must be a single OR aromatic bond and not already spoken - // for by a chiral center + continue; // must be a single OR aromatic bond and not already + // spoken for by a chiral center } if (bondToTry->getBondDir() != Bond::BondDir::NONE) { @@ -680,11 +950,13 @@ bool WedgeBondFromAtropisomerOneBond2d( bestBond->setEndAtom(bestBond->getBeginAtom()); bestBond->setBeginAtom(atomAndBondVecs[bestBondEnd].first); } - // bonds[bestBondEnd][bestBondNumber]->setBondDir(bestBondDir); + bestBond->setBondDir(bestBondDir); + auto newWedgeInfo = std::unique_ptr( new RDKit::Chirality::WedgeInfoAtropisomer(bond->getIdx(), bestBondDir)); wedgeBonds[bestBond->getIdx()] = std::move(newWedgeInfo); + } else { BOOST_LOG(rdWarningLog) << "Failed to find a good bond to set as UP or DOWN for an atropisomer - atoms are: " @@ -738,8 +1010,9 @@ bool WedgeBondFromAtropisomerOneBond3d( } } - // the following may seem redundant, since we just found the useBonds based - // on their bond dir PRESENCE, but this endures that the values are correct. + // the following may seem redundant, since we just found the useBonds + // based on their bond dir PRESENCE, but this endures that the values are + // correct. if (useBonds.size() > 0) { for (auto useBond : useBonds) { @@ -843,10 +1116,11 @@ bool WedgeBondFromAtropisomerOneBond3d( bestBond->setEndAtom(bestBond->getBeginAtom()); bestBond->setBeginAtom(atomAndBondVecs[bestBondEnd].first); } - // bestBond->setBondDir(bestBondDir); + bestBond->setBondDir(bestBondDir); auto newWedgeInfo = std::unique_ptr( new RDKit::Chirality::WedgeInfoAtropisomer(bond->getIdx(), bestBondDir)); + wedgeBonds[bestBond->getIdx()] = std::move(newWedgeInfo); } else { BOOST_LOG(rdWarningLog) @@ -862,8 +1136,7 @@ void wedgeBondsFromAtropisomers( const ROMol &mol, const Conformer *conf, std::map> &wedgeBonds) { - PRECONDITION(conf, "no conformer"); - PRECONDITION(&(conf->getOwningMol()) == &mol, + PRECONDITION(conf == nullptr || &(conf->getOwningMol()) == &mol, "conformer does not belong to molecule"); for (auto bond : mol.bonds()) { auto bondStereo = bond->getStereo(); @@ -878,10 +1151,14 @@ void wedgeBondsFromAtropisomers( continue; } - if (conf->is3D()) { - WedgeBondFromAtropisomerOneBond3d(bond, mol, conf, wedgeBonds); - } else { - WedgeBondFromAtropisomerOneBond2d(bond, mol, conf, wedgeBonds); + if (conf) { + if (conf->is3D()) { + WedgeBondFromAtropisomerOneBond3d(bond, mol, conf, wedgeBonds); + } else { + WedgeBondFromAtropisomerOneBond2d(bond, mol, conf, wedgeBonds); + } + } else { // no conformer + WedgeBondFromAtropisomerOneBondNoConf(bond, mol, wedgeBonds); } } } diff --git a/Code/GraphMol/CIPLabeler/catch_tests.cpp b/Code/GraphMol/CIPLabeler/catch_tests.cpp index ea0526515..eb7c68bbd 100644 --- a/Code/GraphMol/CIPLabeler/catch_tests.cpp +++ b/Code/GraphMol/CIPLabeler/catch_tests.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include "CIPLabeler.h" #include "Digraph.h" @@ -840,3 +841,99 @@ TEST_CASE("upsertTest", "[basic]") { CHECK(smilesOut != ""); } } + +TEST_CASE("atropisomers", "[basic]") { + SECTION("atropisomers1") { + UseLegacyStereoPerceptionFixture useLegacy(false); + + std::string rdbase = getenv("RDBASE"); + + std::vector files = { + "BMS-986142_atrop5.sdf", "BMS-986142_atrop1.sdf", + "BMS-986142_atrop2.sdf", "JDQ443_atrop1.sdf", + "JDQ443_atrop2.sdf", "Mrtx1719_atrop1.sdf", + "Mrtx1719_atrop2.sdf", "RP-6306_atrop1.sdf", + "RP-6306_atrop2.sdf", "Sotorasib_atrop1.sdf", + "Sotorasib_atrop2.sdf", "ZM374979_atrop1.sdf", + "ZM374979_atrop2.sdf"}; + + for (auto file : files) { + auto fName = + rdbase + "/Code/GraphMol/FileParsers/test_data/atropisomers/" + file; + + auto molsdf = MolFileToMol(fName, true, true, true); + REQUIRE(molsdf); + CIPLabeler::assignCIPLabels(*molsdf, 100000); + + std::map, std::string> CIPVals; + for (auto bond : molsdf->bonds()) { + auto a1 = bond->getBeginAtomIdx(); + auto a2 = bond->getEndAtomIdx(); + if (a1 > a2) { + std::swap(a1, a2); + } + + if (bond->hasProp(common_properties::_CIPCode)) { + CIPVals[std::make_pair(a1, a2)] = + bond->getProp(common_properties::_CIPCode); + } else { + CIPVals[std::make_pair(a1, a2)] = "N/A"; + } + } + + molsdf->clearConformers(); + + SmilesWriteParams wp; + wp.canonical = false; + wp.doKekule = false; + unsigned int flags = + RDKit::SmilesWrite::CXSmilesFields::CX_MOLFILE_VALUES | + RDKit::SmilesWrite::CXSmilesFields::CX_ATOM_PROPS | + RDKit::SmilesWrite::CXSmilesFields::CX_BOND_CFG | + RDKit::SmilesWrite::CXSmilesFields::CX_BOND_ATROPISOMER; + SmilesParserParams pp; + pp.allowCXSMILES = true; + pp.sanitize = true; + + auto smi = MolToCXSmiles(*molsdf, wp, flags); + auto newMol = SmilesToMol(smi, pp); + CIPLabeler::assignCIPLabels(*newMol, 100000); + + std::map, std::string> newCIPVals; + for (auto bond : newMol->bonds()) { + auto a1 = bond->getBeginAtomIdx(); + auto a2 = bond->getEndAtomIdx(); + if (a1 > a2) { + std::swap(a1, a2); + } + if (bond->hasProp(common_properties::_CIPCode)) { + newCIPVals[std::make_pair(a1, a2)] = + bond->getProp(common_properties::_CIPCode); + } else { + newCIPVals[std::make_pair(a1, a2)] = "N/A"; + } + } + + RDKit::SubstructMatchParameters params; + + auto match = RDKit::SubstructMatch(*molsdf, *newMol, params); + + for (auto thisBond : newMol->bonds()) { + unsigned int a1 = thisBond->getBeginAtomIdx(); + unsigned int a2 = thisBond->getEndAtomIdx(); + if (a1 > a2) { + std::swap(a1, a2); + } + + unsigned int a1match = match[0][a1].second; + unsigned int a2match = match[0][a2].second; + + if (a1match > a2match) { + std::swap(a1match, a2match); + } + CHECK(CIPVals[std::make_pair(a1match, a2match)] == + newCIPVals[std::make_pair(a1, a2)]); + } + } + } +} \ No newline at end of file diff --git a/Code/GraphMol/Chirality.h b/Code/GraphMol/Chirality.h index 52a732e64..b1b4fbbda 100644 --- a/Code/GraphMol/Chirality.h +++ b/Code/GraphMol/Chirality.h @@ -301,8 +301,12 @@ RDKIT_GRAPHMOL_EXPORT void setStereoForBond(ROMol &mol, Bond *bond, /// returns a map from bond idx -> controlling atom idx RDKIT_GRAPHMOL_EXPORT std::map> pickBondsToWedge( - const ROMol &mol, const BondWedgingParameters *params = nullptr, - const Conformer *conf = nullptr); + const ROMol &mol, const BondWedgingParameters *params = nullptr); + +RDKIT_GRAPHMOL_EXPORT +std::map> pickBondsToWedge( + const ROMol &mol, const BondWedgingParameters *params, + const Conformer *conf); RDKIT_GRAPHMOL_EXPORT void wedgeMolBonds( ROMol &mol, const Conformer *conf = nullptr, diff --git a/Code/GraphMol/FileParsers/CDXMLParser.cpp b/Code/GraphMol/FileParsers/CDXMLParser.cpp index 5c8579d4c..74903a9cc 100644 --- a/Code/GraphMol/FileParsers/CDXMLParser.cpp +++ b/Code/GraphMol/FileParsers/CDXMLParser.cpp @@ -613,6 +613,8 @@ std::vector> MolsFromCDXMLDataStream( Atropisomers::detectAtropisomerChirality( *res, &res->getConformer(confidx)); + } else { // no Conformer + Atropisomers::detectAtropisomerChirality(*res, nullptr); } // now that atom stereochem has been perceived, the wedging diff --git a/Code/GraphMol/FileParsers/MolFileStereochem.cpp b/Code/GraphMol/FileParsers/MolFileStereochem.cpp index f667e3059..c22a2371c 100644 --- a/Code/GraphMol/FileParsers/MolFileStereochem.cpp +++ b/Code/GraphMol/FileParsers/MolFileStereochem.cpp @@ -228,8 +228,8 @@ void invertMolBlockWedgingInfo(ROMol &mol) { } void markUnspecifiedStereoAsUnknown(ROMol &mol, int confId) { - auto wedgeBonds = RDKit::Chirality::pickBondsToWedge(mol); const auto conf = mol.getConformer(confId); + auto wedgeBonds = RDKit::Chirality::pickBondsToWedge(mol, nullptr, &conf); for (auto b : mol.bonds()) { if (b->getBondType() == Bond::DOUBLE) { int dirCode; diff --git a/Code/GraphMol/FileParsers/file_parsers_catch.cpp b/Code/GraphMol/FileParsers/file_parsers_catch.cpp index ce69361ff..0f106842a 100644 --- a/Code/GraphMol/FileParsers/file_parsers_catch.cpp +++ b/Code/GraphMol/FileParsers/file_parsers_catch.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +// #include #include #include "RDGeneral/test.h" @@ -1618,7 +1618,6 @@ H 0.635000 0.635000 0.635000 mol->addConformer(conf); const std::string xyzblock = MolToXYZBlock(*mol, 0, 15); - std::cout << xyzblock << std::endl; std::string xyzblock_expected = R"XYZ(7 CHEMBL506259 O 0.402012650000000 -0.132994360000000 1.000000170000000 @@ -5074,7 +5073,8 @@ M END Chirality::reapplyMolBlockWedging(*m); CHECK(m->getBondWithIdx(2)->getBondDir() == Bond::BondDir::NONE); } - SECTION("Reapply the original wedging, regardless the bond type of wedged bonds") { + SECTION( + "Reapply the original wedging, regardless the bond type of wedged bonds") { auto m = R"CTAB( Mrv2311 04232413302D @@ -6041,10 +6041,10 @@ TEST_CASE("MaeWriter basic testing", "[mae][MaeWriter][writer]") { auto bondBlockStart = mae.find("m_bond[6]"); REQUIRE(bondBlockStart != std::string::npos); - std::string_view ctBlock(&mae[ctBlockStart], atomBlockStart - ctBlockStart); - std::string_view atomBlock(&mae[atomBlockStart], - bondBlockStart - atomBlockStart); - std::string_view bondBlock(&mae[bondBlockStart]); + std::string ctBlock(&mae[ctBlockStart], atomBlockStart - ctBlockStart); + std::string atomBlock(&mae[atomBlockStart], + bondBlockStart - atomBlockStart); + std::string bondBlock(&mae[bondBlockStart]); // Check mol properties CHECK(ctBlock.find("s_m_title") != std::string::npos); @@ -6173,10 +6173,10 @@ TEST_CASE("MaeWriter basic testing", "[mae][MaeWriter][writer]") { auto bondBlockStart = mae.find("m_bond[6]"); REQUIRE(bondBlockStart != std::string::npos); - std::string_view ctBlock(&mae[ctBlockStart], atomBlockStart - ctBlockStart); - std::string_view atomBlock(&mae[atomBlockStart], - bondBlockStart - atomBlockStart); - std::string_view bondBlock(&mae[bondBlockStart]); + std::string ctBlock(&mae[ctBlockStart], atomBlockStart - ctBlockStart); + std::string atomBlock(&mae[atomBlockStart], + bondBlockStart - atomBlockStart); + std::string bondBlock(&mae[bondBlockStart]); // Check mol properties CHECK(ctBlock.find("s_m_title") != std::string::npos); @@ -6258,7 +6258,7 @@ TEST_CASE("MaeWriter basic testing", "[mae][MaeWriter][writer]") { // Maeparser does not parse bond properties, so don't check them. } SECTION("Check reverse roundtrip") { - constexpr std::string_view maeBlock = R"MAE(f_m_ct { + std::string maeBlock = R"MAE(f_m_ct { s_m_title ::: "" @@ -6348,7 +6348,7 @@ TEST_CASE("MaeWriter basic testing", "[mae][MaeWriter][writer]") { auto ctBlockStart = mae.find("f_m_ct"); REQUIRE(ctBlockStart != std::string::npos); - std::string_view ctBlock(&mae[ctBlockStart]); + std::string ctBlock(&mae[ctBlockStart]); CHECK(ctBlock == MaeWriter::getText(*mol)); } @@ -6616,8 +6616,8 @@ TEST_CASE( auto bondBlockStart = mae.find("m_bond[2]"); REQUIRE(bondBlockStart != std::string::npos); - const std::string_view atomBlock(&mae[atomBlockStart], - bondBlockStart - atomBlockStart); + const std::string atomBlock(&mae[atomBlockStart], + bondBlockStart - atomBlockStart); CHECK(atomBlock.find(" -2 ") != std::string::npos); } diff --git a/Code/GraphMol/MarvinParse/MarvinParser.cpp b/Code/GraphMol/MarvinParse/MarvinParser.cpp index f502091b8..e53bc620b 100644 --- a/Code/GraphMol/MarvinParse/MarvinParser.cpp +++ b/Code/GraphMol/MarvinParse/MarvinParser.cpp @@ -611,9 +611,12 @@ class MarvinCMLReader { MolOps::assignChiralTypesFrom3D(*mol, conf3d->getId(), true); } - if (conf || conf3d) { - Atropisomers::detectAtropisomerChirality( - *mol, conf != nullptr ? conf : conf3d); + if (conf) { + Atropisomers::detectAtropisomerChirality(*mol, conf); + } else if (conf3d) { + Atropisomers::detectAtropisomerChirality(*mol, conf3d); + } else { + Atropisomers::detectAtropisomerChirality(*mol, nullptr); } ClearSingleBondDirFlags(*mol); diff --git a/Code/GraphMol/SmilesParse/CXSmilesOps.cpp b/Code/GraphMol/SmilesParse/CXSmilesOps.cpp index a1279f752..a1292ca66 100644 --- a/Code/GraphMol/SmilesParse/CXSmilesOps.cpp +++ b/Code/GraphMol/SmilesParse/CXSmilesOps.cpp @@ -1097,7 +1097,9 @@ bool parse_wedged_bonds(Iterator &first, Iterator last, RDKit::RWMol &mol, if (atom->getIdx() != bond->getEndAtomIdx()) { BOOST_LOG(rdWarningLog) << "atom " << atomIdx << " is not associated with bond " - << bondIdx << " in w block" << std::endl; + << bondIdx << "(" << bond->getBeginAtomIdx() + startAtomIdx << "-" + << bond->getEndAtomIdx() + startAtomIdx << ")" + << " in w block" << std::endl; return false; } auto eidx = bond->getBeginAtomIdx(); @@ -2038,23 +2040,96 @@ std::string get_bond_config_block( bd = Bond::BondDir::NONE; } - if (atropisomerOnly) { - // on of the bonds on the beging atom of this bond must be an atropisomer + if (atropisomerOnly && bd == Bond::BondDir::NONE) { + continue; + } - if (bd == Bond::BondDir::NONE) { - continue; - } - bool foundAtropisomer = false; + // see if this one is an atropisomer - const Atom *firstAtom = bond->getBeginAtom(); + bool isAnAtropisomer = false; + + const Atom *firstAtom = bond->getBeginAtom(); + if (bd == Bond::BondDir::BEGINDASH || bd == Bond::BondDir::BEGINWEDGE) { for (auto bondNbr : mol.atomBonds(firstAtom)) { + if (bondNbr->getIdx() == bond->getIdx()) { + continue; // a bond is not its own neighbor + } if (bondNbr->getStereo() == Bond::BondStereo::STEREOATROPCW || bondNbr->getStereo() == Bond::BondStereo::STEREOATROPCCW) { - foundAtropisomer = true; + isAnAtropisomer = true; + + // if it is for an atropisomer and there are no coords, check to see + // if the wedge needs to be flipped based on the smiles reordering + if (!coordsIncluded && isAnAtropisomer) { + Atropisomers::AtropAtomAndBondVec atomAndBondVecs[2]; + if (!Atropisomers::getAtropisomerAtomsAndBonds( + bondNbr, atomAndBondVecs, mol)) { + throw ValueErrorException("Internal error - should not occur"); + // should not happend + } else { + unsigned int swaps = 0; + + unsigned int firstReorderedIdx = + std::find(atomOrder.begin(), atomOrder.end(), + bondNbr->getBeginAtom()->getIdx()) - + atomOrder.begin(); + unsigned int secondReorderedIdx = + std::find(atomOrder.begin(), atomOrder.end(), + bondNbr->getEndAtom()->getIdx()) - + atomOrder.begin(); + if (firstReorderedIdx > secondReorderedIdx) { + ++swaps; + } + + for (unsigned int bondAtomIndex = 0; bondAtomIndex < 2; + ++bondAtomIndex) { + if (atomAndBondVecs[bondAtomIndex].first == firstAtom) + continue; // swapped atoms on the side where the wedge bond + // is does NOT change the wedge bond + if (atomAndBondVecs[bondAtomIndex].second.size() == 2) { + unsigned int firstOtherAtomIdx = + atomAndBondVecs[bondAtomIndex] + .second[0] + ->getOtherAtom(atomAndBondVecs[bondAtomIndex].first) + ->getIdx(); + unsigned int secondOtherAtomIdx = + atomAndBondVecs[bondAtomIndex] + .second[1] + ->getOtherAtom(atomAndBondVecs[bondAtomIndex].first) + ->getIdx(); + + unsigned int firstReorderedAtomIdx = + std::find(atomOrder.begin(), atomOrder.end(), + firstOtherAtomIdx) - + atomOrder.begin(); + unsigned int secondReorderedAtomIdx = + std::find(atomOrder.begin(), atomOrder.end(), + secondOtherAtomIdx) - + atomOrder.begin(); + + if (firstReorderedAtomIdx > secondReorderedAtomIdx) { + ++swaps; + } + } + } + if (swaps % 2) { + bd = (bd == Bond::BondDir::BEGINWEDGE) + ? Bond::BondDir::BEGINDASH + : Bond::BondDir::BEGINWEDGE; + } + } + } + break; } } - if (!foundAtropisomer) { + } + + if (atropisomerOnly) { + // one of the bonds on the beginning atom of this bond must be an + // atropisomer + + if (!isAnAtropisomer) { continue; } } else { // atropisomeronly is FALSE - check for a wedging caused by @@ -2109,8 +2184,9 @@ std::string get_bond_config_block( std::string wType = ""; if (bd == Bond::BondDir::UNKNOWN) { wType = "w"; - } else if (coordsIncluded) { + } else if (coordsIncluded || isAnAtropisomer) { // we only do wedgeUp and wedgeDown if coordinates are being output + // or its an atropisomer if (bd == Bond::BondDir::BEGINWEDGE) { wType = "wU"; } else if (bd == Bond::BondDir::BEGINDASH) { @@ -2354,12 +2430,13 @@ std::string getCXExtensions(const ROMol &mol, std::uint32_t flags) { // do the CX_BOND_ATROPISOMER only if CX_BOND_CFG s not done. CX_BOND_CFG // includes the atropisomer wedging else if (flags & SmilesWrite::CXSmilesFields::CX_BOND_ATROPISOMER) { - if (conf) { - Atropisomers::wedgeBondsFromAtropisomers(mol, conf, wedgeBonds); - } - bool includeCoords = flags & SmilesWrite::CXSmilesFields::CX_COORDS && mol.getNumConformers(); + if (includeCoords) { + Atropisomers::wedgeBondsFromAtropisomers(mol, conf, wedgeBonds); + } else { + Atropisomers::wedgeBondsFromAtropisomers(mol, nullptr, wedgeBonds); + } const auto cfgblock = get_bond_config_block( mol, atomOrder, bondOrder, includeCoords, wedgeBonds, true); appendToCXExtension(cfgblock, res); diff --git a/Code/GraphMol/SmilesParse/SmilesParse.cpp b/Code/GraphMol/SmilesParse/SmilesParse.cpp index 5bb1bfff0..6415ca8d8 100644 --- a/Code/GraphMol/SmilesParse/SmilesParse.cpp +++ b/Code/GraphMol/SmilesParse/SmilesParse.cpp @@ -458,8 +458,13 @@ std::unique_ptr MolFromSmiles(const std::string &smiles, res->updatePropertyCache(false); MolOps::assignChiralTypesFrom3D(*res, conf3d->getId(), true); } - if (conf || conf3d) { - Atropisomers::detectAtropisomerChirality(*res, conf ? conf : conf3d); + + if (conf) { + Atropisomers::detectAtropisomerChirality(*res, conf); + } else if (conf3d) { + Atropisomers::detectAtropisomerChirality(*res, conf3d); + } else { + Atropisomers::detectAtropisomerChirality(*res, nullptr); } if (res && (params.sanitize || params.removeHs)) { diff --git a/Code/GraphMol/SmilesParse/cxsmiles_test.cpp b/Code/GraphMol/SmilesParse/cxsmiles_test.cpp index 4a4d3bad2..061e759ce 100644 --- a/Code/GraphMol/SmilesParse/cxsmiles_test.cpp +++ b/Code/GraphMol/SmilesParse/cxsmiles_test.cpp @@ -790,7 +790,8 @@ void testOneAtropisomers(const SmilesTest *smilesTest) { MolToCXSmiles(*smilesMol, ps, flags, RestoreBondDirOptionClear); generateNewExpectedFilesIfSoSpecified(fName + ".NEW3.cxsmi", smilesOut); - CHECK(getExpectedValue(expectedFileName) == smilesOut); + auto expected = getExpectedValue(expectedFileName); + CHECK(expected == smilesOut); } smilesMol = @@ -826,7 +827,7 @@ void testOneAtropisomers(const SmilesTest *smilesTest) { } catch (const RDKit::KekulizeException &e) { outMolStr = ""; } catch (...) { - throw; // re-trhow the error if not a kekule error + throw; // re-throw the error if not a kekule error } if (outMolStr == "") { RDKit::Chirality::reapplyMolBlockWedging(*smilesMol); @@ -1023,6 +1024,7 @@ void testOne3dChiral(const SmilesTest *smilesTest) { TEST_CASE("testAtropisomersInCXSmiles") { { std::list smiTests{ + SmilesTest("ShortAtropisomerNoCoords.cxsmi", true, 14, 15), SmilesTest("ShortAtropisomer.cxsmi", true, 14, 15), SmilesTest("ShortAtropisomerArom.cxsmi", true, 14, 15), SmilesTest("AtropManyChirals.cxsmi", true, 20, 20), @@ -1344,15 +1346,11 @@ TEST_CASE("SMILES CANONICALIZATION") { } molCount++; - try { - if (molBlock.length() > 25) { - std::unique_ptr mol(MolBlockToMol(molBlock)); - std::string smiles = MolToCXSmiles(*mol); + if (molBlock.length() > 25) { + std::unique_ptr mol(MolBlockToMol(molBlock)); + std::string smiles = MolToCXSmiles(*mol); - testSmilesCanonicalization(smiles); - } - } catch (...) { - std::cout << "failed on mol " << molCount << std::endl; + testSmilesCanonicalization(smiles); } } } diff --git a/Code/GraphMol/SmilesParse/test_data/ShortAtropisomerNoCoords.cxsmi b/Code/GraphMol/SmilesParse/test_data/ShortAtropisomerNoCoords.cxsmi new file mode 100644 index 000000000..692935f52 --- /dev/null +++ b/Code/GraphMol/SmilesParse/test_data/ShortAtropisomerNoCoords.cxsmi @@ -0,0 +1 @@ +CC1=C(N2C=CC=C2C)C(C)CCC1 |wU:2.9| diff --git a/Code/GraphMol/SmilesParse/test_data/ShortAtropisomerNoCoords.cxsmi.expected.cxsmi b/Code/GraphMol/SmilesParse/test_data/ShortAtropisomerNoCoords.cxsmi.expected.cxsmi new file mode 100644 index 000000000..51e75f74b --- /dev/null +++ b/Code/GraphMol/SmilesParse/test_data/ShortAtropisomerNoCoords.cxsmi.expected.cxsmi @@ -0,0 +1 @@ +CC1=C(n2cccc2C)C(C)CCC1 |wU:2.9| \ No newline at end of file diff --git a/Code/GraphMol/SmilesParse/test_data/ShortAtropisomerNoCoords.cxsmi.expected.mrv b/Code/GraphMol/SmilesParse/test_data/ShortAtropisomerNoCoords.cxsmi.expected.mrv new file mode 100644 index 000000000..a73ca06fe --- /dev/null +++ b/Code/GraphMol/SmilesParse/test_data/ShortAtropisomerNoCoords.cxsmi.expected.mrv @@ -0,0 +1,2 @@ + +W \ No newline at end of file diff --git a/Code/GraphMol/SmilesParse/test_data/ShortAtropisomerNoCoords.cxsmi.expected.sdf b/Code/GraphMol/SmilesParse/test_data/ShortAtropisomerNoCoords.cxsmi.expected.sdf new file mode 100644 index 000000000..6b6b03c12 --- /dev/null +++ b/Code/GraphMol/SmilesParse/test_data/ShortAtropisomerNoCoords.cxsmi.expected.sdf @@ -0,0 +1,41 @@ + + RDKit 2D + + 0 0 0 0 0 0 0 0 0 0999 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 14 15 0 0 0 +M V30 BEGIN ATOM +M V30 1 C 3.000000 0.000000 0.000000 0 +M V30 2 C 1.500000 0.000000 0.000000 0 +M V30 3 C 0.750000 -1.299038 0.000000 0 +M V30 4 N 1.500000 -2.598076 0.000000 0 +M V30 5 C 2.991783 -2.754869 0.000000 0 +M V30 6 C 3.303650 -4.222090 0.000000 0 +M V30 7 C 2.004612 -4.972090 0.000000 0 +M V30 8 C 0.889895 -3.968394 0.000000 0 +M V30 9 C -0.577326 -4.280262 0.000000 0 +M V30 10 C -0.750000 -1.299038 0.000000 0 +M V30 11 C -1.500000 -2.598076 0.000000 0 +M V30 12 C -1.500000 0.000000 0.000000 0 +M V30 13 C -0.750000 1.299038 0.000000 0 +M V30 14 C 0.750000 1.299038 0.000000 0 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 1 1 2 +M V30 2 2 2 3 +M V30 3 1 3 4 +M V30 4 1 4 5 +M V30 5 2 5 6 +M V30 6 1 6 7 +M V30 7 2 7 8 +M V30 8 1 8 9 +M V30 9 1 3 10 CFG=1 +M V30 10 1 10 11 +M V30 11 1 10 12 +M V30 12 1 12 13 +M V30 13 1 13 14 +M V30 14 1 14 2 +M V30 15 1 8 4 +M V30 END BOND +M V30 END CTAB +M END diff --git a/Code/GraphMol/SmilesParse/test_data/ShortAtropisomerNoCoords.cxsmi.expected2.cxsmi b/Code/GraphMol/SmilesParse/test_data/ShortAtropisomerNoCoords.cxsmi.expected2.cxsmi new file mode 100644 index 000000000..51e75f74b --- /dev/null +++ b/Code/GraphMol/SmilesParse/test_data/ShortAtropisomerNoCoords.cxsmi.expected2.cxsmi @@ -0,0 +1 @@ +CC1=C(n2cccc2C)C(C)CCC1 |wU:2.9| \ No newline at end of file diff --git a/Code/GraphMol/SmilesParse/test_data/ShortAtropisomerNoCoords.cxsmi.expected3.cxsmi b/Code/GraphMol/SmilesParse/test_data/ShortAtropisomerNoCoords.cxsmi.expected3.cxsmi new file mode 100644 index 000000000..51e75f74b --- /dev/null +++ b/Code/GraphMol/SmilesParse/test_data/ShortAtropisomerNoCoords.cxsmi.expected3.cxsmi @@ -0,0 +1 @@ +CC1=C(n2cccc2C)C(C)CCC1 |wU:2.9| \ No newline at end of file diff --git a/Code/GraphMol/WedgeBonds.cpp b/Code/GraphMol/WedgeBonds.cpp index 49be35361..6b8f96b45 100644 --- a/Code/GraphMol/WedgeBonds.cpp +++ b/Code/GraphMol/WedgeBonds.cpp @@ -341,6 +341,16 @@ int pickBondToWedge( // returns map of bondIdx -> bond begin atom for those bonds that // need wedging. + +std::map> pickBondsToWedge( + const ROMol &mol, const BondWedgingParameters *params) { + const Conformer *conf = nullptr; + if (mol.getNumConformers()) { + conf = &mol.getConformer(); + } + return pickBondsToWedge(mol, params, conf); +} + std::map> pickBondsToWedge( const ROMol &mol, const BondWedgingParameters *params, const Conformer *conf) { @@ -388,15 +398,7 @@ std::map> pickBondsToWedge( wedgeInfo[bnd1] = std::move(wi); } } - - if (conf == nullptr) { - if (mol.getNumConformers()) { - conf = &mol.getConformer(); - } - } - if (conf) { - RDKit::Atropisomers::wedgeBondsFromAtropisomers(mol, conf, wedgeInfo); - } + RDKit::Atropisomers::wedgeBondsFromAtropisomers(mol, conf, wedgeInfo); return wedgeInfo; } @@ -472,7 +474,7 @@ void wedgeMolBonds(ROMol &mol, const Conformer *conf, MolOps::findSSSR(mol); } - auto wedgeBonds = Chirality::pickBondsToWedge(mol, params); + auto wedgeBonds = Chirality::pickBondsToWedge(mol, params, conf); // loop over the bonds we need to wedge: for (const auto &[wbi, wedgeInfo] : wedgeBonds) { diff --git a/Docs/Book/RDKit_Book.rst b/Docs/Book/RDKit_Book.rst index ade970ee0..e98da4f09 100644 --- a/Docs/Book/RDKit_Book.rst +++ b/Docs/Book/RDKit_Book.rst @@ -2234,6 +2234,46 @@ Note: the RDKit software makes no attempt to determine if the bond is actually rotationally constrained. If the bond meets the requirements above, it is marked as an atropisomer. +Internal Representation of Atropisomers +======================================= + +To help with the rest of the explanation, we include the bond indices in the molecule drawing: + +.. testsetup:: + + 'CC1=CC=CC(I)=C1N1C(C)=CC=C1Br |wU:7.7,(18.31,-9.57,;17.45,-9.08,;17.44,-8.08,;16.58,-7.58,;15.71,-8.08,;15.71,-9.08,;14.84,-9.58,;16.58,-9.58,;16.58,-10.58,;17.38,-11.17,;18.34,-10.87,;17.08,-12.12,;16.07,-12.12,;15.77,-11.17,;14.81,-10.87,)|' + +.. image:: images/atrop_representation1.png + +Here's part of the Debug output for that molecule:: + + Atoms: + ... + 1 6 C chg: 0 deg: 3 exp: 4 imp: 0 hyb: SP2 arom?: 1 + ... + 5 6 C chg: 0 deg: 3 exp: 4 imp: 0 hyb: SP2 arom?: 1 + ... + 7 6 C chg: 0 deg: 3 exp: 4 imp: 0 hyb: SP2 arom?: 1 + 8 7 N chg: 0 deg: 3 exp: 3 imp: 0 hyb: SP2 arom?: 1 + 9 6 C chg: 0 deg: 3 exp: 4 imp: 0 hyb: SP2 arom?: 1 + ... + 13 6 C chg: 0 deg: 3 exp: 4 imp: 0 hyb: SP2 arom?: 1 + ... + Bonds: + ... + 6 5->7 order: a conj?: 1 aromatic?: 1 + 7 7->8 order: 1 stereo: CCW bonds: (14 6 8 15) conj?: 1 + 8 8->9 order: a conj?: 1 aromatic?: 1 + ... + 14 7->1 order: a dir: wedge conj?: 1 aromatic?: 1 + 15 13->8 order: a conj?: 1 aromatic?: 1 + +This tells us that bond 7 is atropisomeric and that when looking down the bond +(from atom C7 to atom N8), the rotation direction between bonds 14 and 8 (these +are the bonds to the lowest numbered atoms on each of the atropisomeric bond) is +counterclockwise (CCW). + + Formats supporting atropisomers =============================== @@ -2280,6 +2320,54 @@ an atropisomer bond, the actual stereochemistry of the bond is determined by the .. image:: images/atrop_example5.png +Interpreting atropisomers without coordinates +============================================= + +Just as it is possible to interpret atomic and double bond stereochemistry from +SMILES without providing atomic coordinates, the RDKit adopts (starting with the +2024.03.2 release) a convention for interpreting bond wedge information in +CXSMILES that do not have coordinates provided. + +In order to understand how this representation works, a quick digression is necessary to explain the difference in the way bonds are numbered in CXSMILES and the RDKit. +In the RDKit ring closure bonds are added last; they are the highest numbered bonds in the molecule. In CXSMILES, the ring closure bonds are added immediately upon closing the ring. +Here's an illustration of this for the SMILES ``CC1=CC=CC(I)=C1N1C(C)=CC=C1Br``; the RDKit bond numbering (what we saw above) is shown first and the CXSMILES numbering is shown second. + +.. testsetup:: + + 'CC1=CC=CC(I)=C1N1C(C)=CC=C1Br |wU:7.7|' + +.. image:: images/atrop_representation2.png + +To describe this atropisomer in CXSMILES without using coordinates, we adopt the convention that the atropisomeric bond's stereochemistry is ``CCW`` when the bond to the lowest-numbered neighbor of the start atom +(in this case the neighbor connected via bond 7) is wedged and write the CXSMILES for this molecule as ``CC1=CC=CC(I)=C1N1C(C)=CC=C1Br |wU:7.7||``. The stereo values for all possible combinations are shown here: + + ++-------+----+-------+-------+--------+------------+ +| | | bond | | | | +| from | to | index | type | stereo | | ++=======+====+=======+=======+========+============+ +| 7 | 1 | 7 | wedge | CCW | ``wU7.7`` | ++-------+----+-------+-------+--------+------------+ +| 7 | 5 | 6 | wedge | CW | ``wU7.6`` | ++-------+----+-------+-------+--------+------------+ +| 8 | 9 | 9 | wedge | CW | ``wU8.9`` | ++-------+----+-------+-------+--------+------------+ +| 8 | 13 | 14 | wedge | CCW | ``wU8.14`` | ++-------+----+-------+-------+--------+------------+ +| 7 | 1 | 7 | dash | CW | ``wD7.7`` | ++-------+----+-------+-------+--------+------------+ +| 7 | 5 | 6 | dash | CCW | ``wD7.6`` | ++-------+----+-------+-------+--------+------------+ +| 8 | 9 | 9 | dash | CCW | ``wD8.9`` | ++-------+----+-------+-------+--------+------------+ +| 8 | 13 | 14 | dash | CW | ``wD8.14`` | ++-------+----+-------+-------+--------+------------+ + + +It's also possible to wedge multiple neighbor bonds around an atropisomeric bond, but the wedging must be consistent based on the table above; +so the CXSMILES ``CC1=CC=CC(I)=C1N1C(C)=CC=C1Br |wD:7.7,wU:8.9|`` is valid, but ``CC1=CC=CC(I)=C1N1C(C)=CC=C1Br |wD:7.7,wU:8.14|`` is not. + + Validation of Atropisomers ========================== @@ -2294,7 +2382,6 @@ Atropisomers are supported in the canonicalization algorithm of RDKit. They are substructure searching and similarity searching at this time. - Query Features in Molecule Drawings *********************************** diff --git a/Docs/Book/images/atrop_representation1.png b/Docs/Book/images/atrop_representation1.png new file mode 100644 index 000000000..28214554a Binary files /dev/null and b/Docs/Book/images/atrop_representation1.png differ diff --git a/Docs/Book/images/atrop_representation2.png b/Docs/Book/images/atrop_representation2.png new file mode 100644 index 000000000..a1c7cd55c Binary files /dev/null and b/Docs/Book/images/atrop_representation2.png differ