mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
No coords atropisomers - fix smiles output of atrop wedges after reordering (#7418)
* removed string_view in favor of string for catch test * add parsing and generation of atropisomers when coords not present * changed string_view to string in catch test * more docs * reformulation of the docs * make an error message a little bit more useful * small optimization clang-format * add `BondWedgingParameters` to new function * changes for CIP test errors * Updated internal doc to match what it does * changes per PR review * removed cout statements in tests --------- Co-authored-by: Greg Landrum <greg.landrum@gmail.com>
This commit is contained in:
@@ -108,6 +108,46 @@ bool getBondFrameOfReference(const Bond *bond, const Conformer *conf,
|
||||
return true;
|
||||
}
|
||||
|
||||
Bond::BondDir getBondDirForAtropisomerNoConf(Bond::BondStereo bondStereo,
|
||||
unsigned int whichEnd,
|
||||
unsigned int whichBond) {
|
||||
// the convention is that in the absence of coords, the coordiates are choosen
|
||||
// with the lowest numbered atom of the atrop bond down, and the other atom
|
||||
// straight up.
|
||||
// On each end, the lowest numbered connecting atom is on the left
|
||||
//
|
||||
// a b
|
||||
// \ /
|
||||
// c
|
||||
// |
|
||||
// d
|
||||
// / \ aaa
|
||||
// e f
|
||||
//
|
||||
// where c > d
|
||||
// a < b
|
||||
// e < f
|
||||
|
||||
PRECONDITION(whichEnd <= 1, "whichEnd must be 0 or 1");
|
||||
PRECONDITION(whichBond <= 1, "whichBond must be 0 or 1");
|
||||
PRECONDITION(bondStereo == Bond::BondStereo::STEREOATROPCW ||
|
||||
bondStereo == Bond::BondStereo::STEREOATROPCCW,
|
||||
"bondStereo must be BondAtropisomerCW or BondAtropisomerCCW");
|
||||
|
||||
int flips = 0;
|
||||
if (bondStereo == Bond::BondStereo::STEREOATROPCW) {
|
||||
++flips;
|
||||
}
|
||||
if (whichBond == 1) {
|
||||
++flips;
|
||||
}
|
||||
if (whichEnd == 1) {
|
||||
++flips;
|
||||
}
|
||||
|
||||
return flips % 2 ? Bond::BEGINDASH : Bond::BEGINWEDGE;
|
||||
}
|
||||
|
||||
Bond::BondDir getBondDirForAtropisomer2d(RDGeom::Point3D bondVecs[2],
|
||||
Bond::BondStereo bondStereo,
|
||||
unsigned int whichEnd,
|
||||
@@ -129,12 +169,13 @@ Bond::BondDir getBondDirForAtropisomer2d(RDGeom::Point3D bondVecs[2],
|
||||
++flips;
|
||||
}
|
||||
if (bondVecs[1 - whichEnd].y < 0) {
|
||||
++flips; // if the OTHER end if negative for the low index bond vec, it
|
||||
++flips; // if the OTHER end is negative for the low index bond vec, it
|
||||
// is a flip
|
||||
}
|
||||
|
||||
return flips % 2 ? Bond::BEGINWEDGE : Bond::BEGINDASH;
|
||||
}
|
||||
|
||||
Bond::BondDir getBondDirForAtropisomer3d(Bond *whichBond,
|
||||
const Conformer *conf) {
|
||||
// for 3D we mark it as wedge or hash depending on the z-value of the bond
|
||||
@@ -204,6 +245,44 @@ bool getAtropIsomerEndVect(const AtropAtomAndBondVec &atomAndBondVec,
|
||||
return true;
|
||||
}
|
||||
|
||||
std::pair<bool, Bond::BondDir> getBondDir(
|
||||
const Bond *bond, const AtropAtomAndBondVec &atomAndBondVec) {
|
||||
// get the wedge dir for this end of the bond
|
||||
// if the first bond 1 has a bondDir, use it
|
||||
// if the second bond has a bond dir use the opposite of if
|
||||
// if both bonds have a dir, make sure they are different
|
||||
|
||||
auto bond1Dir = atomAndBondVec.second[0]->getBondDir();
|
||||
if (bond1Dir != Bond::BEGINWEDGE && bond1Dir != Bond::BEGINDASH) {
|
||||
bond1Dir = Bond::NONE; // we dont care if it any thing else
|
||||
}
|
||||
auto bond2Dir = atomAndBondVec.second.size() == 2
|
||||
? atomAndBondVec.second[1]->getBondDir()
|
||||
: Bond::NONE;
|
||||
if (bond2Dir != Bond::BEGINWEDGE && bond2Dir != Bond::BEGINDASH) {
|
||||
bond2Dir = Bond::NONE;
|
||||
}
|
||||
|
||||
// if both are set to a direction, they must NOT be the same - one
|
||||
// must be a dash and the other a hash
|
||||
|
||||
if (bond1Dir != Bond::NONE && bond2Dir != Bond::NONE &&
|
||||
bond1Dir == bond2Dir) {
|
||||
BOOST_LOG(rdWarningLog)
|
||||
<< "The bonds on one end of an atropisomer are both UP or both DOWN - atoms are: "
|
||||
<< bond->getBeginAtomIdx() << " " << bond->getEndAtomIdx() << std::endl;
|
||||
return {false, Bond::BondDir::NONE};
|
||||
}
|
||||
|
||||
if (bond1Dir == Bond::BEGINWEDGE || bond2Dir == Bond::BEGINDASH) {
|
||||
return {true, Bond::BondDir::BEGINWEDGE};
|
||||
}
|
||||
if (bond1Dir == Bond::BEGINDASH || bond2Dir == Bond::BEGINWEDGE) {
|
||||
return {true, Bond::BondDir::BEGINDASH};
|
||||
}
|
||||
return {true, Bond::BondDir::NONE};
|
||||
}
|
||||
|
||||
bool DetectAtropisomerChiralityOneBond(Bond *bond, ROMol &mol,
|
||||
const Conformer *conf) {
|
||||
// the approach is this:
|
||||
@@ -217,11 +296,11 @@ bool DetectAtropisomerChiralityOneBond(Bond *bond, ROMol &mol,
|
||||
// atrop bond. for each end of the main bond, we find a vector to reprent
|
||||
// the neighbor atom with the smallest index as its projection onto the
|
||||
// x=0 plane.
|
||||
// (In 2d, this projection is on the y-AXIS for the end that does NOT have a
|
||||
// wedge/hash bond, and on the z axis - out of the plane - for the end that
|
||||
// does have a wedge/hash). The chirality is recorded as the direction we
|
||||
// rotate from, atom 1's projection to atom2's proejection - either
|
||||
// clockwise or counter clockwise
|
||||
// (In 2d, this projection is on the y-AXIS for the end that does NOT have
|
||||
// a wedge/hash bond, and on the z axis - out of the plane - for the end
|
||||
// that does have a wedge/hash). The chirality is recorded as the
|
||||
// direction we rotate from, atom 1's projection to atom2's proejection -
|
||||
// either clockwise or counter clockwise
|
||||
|
||||
PRECONDITION(bond, "bad bond");
|
||||
|
||||
@@ -241,6 +320,52 @@ bool DetectAtropisomerChiralityOneBond(Bond *bond, ROMol &mol,
|
||||
}
|
||||
}
|
||||
|
||||
// the convention is that in the absence of coords, the coordiates are choosen
|
||||
// with the lowest numbered atom of the atrop bond down, and the other atom
|
||||
// straight up.
|
||||
// On each end, the lowest numbered connecting atom is on the left
|
||||
//
|
||||
// a b
|
||||
// \ /
|
||||
// c
|
||||
// |
|
||||
// d
|
||||
// / \ aaa
|
||||
// e f
|
||||
//
|
||||
// where c > d
|
||||
// a < b
|
||||
// e < f
|
||||
|
||||
if (conf == nullptr) {
|
||||
std::pair<bool, Bond::BondDir> bond1DirResult;
|
||||
bond1DirResult = getBondDir(bond, atomAndBondVecs[0]);
|
||||
if (!bond1DirResult.first) {
|
||||
return false;
|
||||
}
|
||||
std::pair<bool, Bond::BondDir> bond2DirResult;
|
||||
bond2DirResult = getBondDir(bond, atomAndBondVecs[1]);
|
||||
if (!bond2DirResult.first) {
|
||||
return false;
|
||||
}
|
||||
if (bond1DirResult.second == bond2DirResult.second) {
|
||||
BOOST_LOG(rdWarningLog)
|
||||
<< "inconsistent bond wedging for an atropisomer. Atoms are: "
|
||||
<< bond->getBeginAtomIdx() << " " << bond->getEndAtomIdx()
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
if (bond1DirResult.second == Bond::BEGINWEDGE ||
|
||||
bond2DirResult.second == Bond::BEGINDASH) {
|
||||
bond->setStereo(Bond::BondStereo::STEREOATROPCCW);
|
||||
} else if (bond1DirResult.second == Bond::BEGINDASH ||
|
||||
bond2DirResult.second == Bond::BEGINWEDGE) {
|
||||
bond->setStereo(Bond::BondStereo::STEREOATROPCW);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// create a frame of reference that has its X-axis along the atrop bond
|
||||
|
||||
RDGeom::Point3D xAxis, yAxis, zAxis;
|
||||
@@ -265,27 +390,10 @@ bool DetectAtropisomerChiralityOneBond(Bond *bond, ROMol &mol,
|
||||
// if the second bond has a bond dir use the opposite of if
|
||||
// if both bonds have a dir, make sure they are different
|
||||
|
||||
auto bond1Dir = atomAndBondVecs[bondAtomIndex].second[0]->getBondDir();
|
||||
if (bond1Dir != Bond::BEGINWEDGE && bond1Dir != Bond::BEGINDASH) {
|
||||
bond1Dir = Bond::NONE; // we dont care if it any thing else
|
||||
}
|
||||
auto bond2Dir =
|
||||
atomAndBondVecs[bondAtomIndex].second.size() == 2
|
||||
? atomAndBondVecs[bondAtomIndex].second[1]->getBondDir()
|
||||
: Bond::NONE;
|
||||
if (bond2Dir != Bond::BEGINWEDGE && bond2Dir != Bond::BEGINDASH) {
|
||||
bond2Dir = Bond::NONE;
|
||||
}
|
||||
std::pair<bool, Bond::BondDir> bondDirResult;
|
||||
|
||||
// if both are set to a direction, they must NOT be the same - one
|
||||
// must be a dash and the other a hash
|
||||
|
||||
if (bond1Dir != Bond::NONE && bond2Dir != Bond::NONE &&
|
||||
bond1Dir == bond2Dir) {
|
||||
BOOST_LOG(rdWarningLog)
|
||||
<< "The bonds on one end of an atropisomer are both UP or both DOWN - atoms are: "
|
||||
<< bond->getBeginAtomIdx() << " " << bond->getEndAtomIdx()
|
||||
<< std::endl;
|
||||
bondDirResult = getBondDir(bond, atomAndBondVecs[bondAtomIndex]);
|
||||
if (!bondDirResult.first) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -294,10 +402,10 @@ bool DetectAtropisomerChiralityOneBond(Bond *bond, ROMol &mol,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (bond1Dir == Bond::BEGINWEDGE || bond2Dir == Bond::BEGINDASH) {
|
||||
if (bondDirResult.second == Bond::BEGINWEDGE) {
|
||||
bondVecs[bondAtomIndex].y *= 0.707;
|
||||
bondVecs[bondAtomIndex].z = fabs(bondVecs[bondAtomIndex].y);
|
||||
} else if (bond1Dir == Bond::BEGINDASH || bond2Dir == Bond::BEGINWEDGE) {
|
||||
} else if (bondDirResult.second == Bond::BEGINDASH) {
|
||||
bondVecs[bondAtomIndex].y *= 0.707;
|
||||
bondVecs[bondAtomIndex].z = -fabs(bondVecs[bondAtomIndex].y);
|
||||
}
|
||||
@@ -410,8 +518,7 @@ void cleanupAtropisomerStereoGroups(ROMol &mol) {
|
||||
}
|
||||
|
||||
void detectAtropisomerChirality(ROMol &mol, const Conformer *conf) {
|
||||
PRECONDITION(conf, "no conformer");
|
||||
PRECONDITION(&(conf->getOwningMol()) == &mol,
|
||||
PRECONDITION(conf == nullptr || &(conf->getOwningMol()) == &mol,
|
||||
"conformer does not belong to molecule");
|
||||
|
||||
std::set<Bond *> bondsToTry;
|
||||
@@ -492,6 +599,169 @@ void getAllAtomIdsForStereoGroup(
|
||||
}
|
||||
}
|
||||
|
||||
bool WedgeBondFromAtropisomerOneBondNoConf(
|
||||
Bond *bond, const ROMol &mol,
|
||||
std::map<int, std::unique_ptr<RDKit::Chirality::WedgeInfoBase>>
|
||||
&wedgeBonds) {
|
||||
PRECONDITION(bond, "no bond");
|
||||
|
||||
AtropAtomAndBondVec atomAndBondVecs[2];
|
||||
if (!getAtropisomerAtomsAndBonds(bond, atomAndBondVecs, mol)) {
|
||||
return false; // not an atropisomer
|
||||
}
|
||||
|
||||
// make sure we do not have wiggle bonds
|
||||
|
||||
for (auto atomAndBondVec : atomAndBondVecs) {
|
||||
for (auto endBond : atomAndBondVec.second) {
|
||||
if (endBond->getBondDir() == Bond::UNKNOWN) {
|
||||
return false; // not an atropisomer)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// first see if any candidate bond is already set to a wedge or hash
|
||||
// if so, we will use that bond as a wedge or hash
|
||||
|
||||
std::vector<int> useBondsAtEnd[2];
|
||||
bool foundBondDir = false;
|
||||
|
||||
for (unsigned int whichEnd = 0; whichEnd < 2; ++whichEnd) {
|
||||
for (unsigned int whichBond = 0;
|
||||
whichBond < atomAndBondVecs[whichEnd].second.size(); ++whichBond) {
|
||||
auto bondDir = atomAndBondVecs[whichEnd].second[whichBond]->getBondDir();
|
||||
|
||||
// see if it is a wedge or hash and its origin is the atom in the
|
||||
// main bond
|
||||
|
||||
if ((bondDir == Bond::BEGINWEDGE || bondDir == Bond::BEGINDASH) &&
|
||||
atomAndBondVecs[whichEnd].second[whichBond]->getBeginAtom() ==
|
||||
atomAndBondVecs[whichEnd].first &&
|
||||
canHaveDirection(*bond)) {
|
||||
useBondsAtEnd[whichEnd].push_back(whichBond);
|
||||
foundBondDir = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (foundBondDir) {
|
||||
for (unsigned int whichEnd = 0; whichEnd < 2; ++whichEnd) {
|
||||
for (unsigned int whichBondIndex = 0;
|
||||
whichBondIndex < useBondsAtEnd[whichEnd].size(); ++whichBondIndex) {
|
||||
atomAndBondVecs[whichEnd]
|
||||
.second[useBondsAtEnd[whichEnd][whichBondIndex]]
|
||||
->setBondDir(getBondDirForAtropisomerNoConf(
|
||||
bond->getStereo(), whichEnd,
|
||||
useBondsAtEnd[whichEnd][whichBondIndex]));
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// did not find a good bond dir - pick one to use
|
||||
// we would like to have one that is not in a ring, and will be a wedge
|
||||
|
||||
const RingInfo *ri = bond->getOwningMol().getRingInfo();
|
||||
|
||||
int bestBondEnd = -1, bestBondNumber = -1;
|
||||
bool bestBondIsSingle = false;
|
||||
unsigned int bestRingCount = INT_MAX;
|
||||
Bond::BondDir bestBondDir = Bond::BondDir::NONE;
|
||||
for (unsigned int whichEnd = 0; whichEnd < 2; ++whichEnd) {
|
||||
for (unsigned int whichBond = 0;
|
||||
whichBond < atomAndBondVecs[whichEnd].second.size(); ++whichBond) {
|
||||
auto bondToTry = atomAndBondVecs[whichEnd].second[whichBond];
|
||||
|
||||
if (!canHaveDirection(*bondToTry) ||
|
||||
wedgeBonds.find(bondToTry->getIdx()) != wedgeBonds.end()) {
|
||||
continue; // must be a single OR aromatic bond and not already
|
||||
// spoken for by a chiral center
|
||||
}
|
||||
|
||||
if (bondToTry->getBondDir() != Bond::BondDir::NONE) {
|
||||
if (bondToTry->getBeginAtom()->getIdx() ==
|
||||
atomAndBondVecs[whichEnd].first->getIdx()) {
|
||||
BOOST_LOG(rdWarningLog)
|
||||
<< "Wedge or hash bond found on atropisomer where not expected - atoms are: "
|
||||
<< bond->getBeginAtomIdx() << " " << bond->getEndAtomIdx()
|
||||
<< std::endl;
|
||||
return false;
|
||||
} else {
|
||||
continue; // wedge or hash bond affecting the OTHER atom
|
||||
// = perhaps a chiral center
|
||||
}
|
||||
}
|
||||
auto ringCount = ri->numBondRings(bondToTry->getIdx());
|
||||
if (ringCount > bestRingCount) {
|
||||
continue;
|
||||
}
|
||||
|
||||
else if (ringCount < bestRingCount) {
|
||||
bestBondEnd = whichEnd;
|
||||
bestBondNumber = whichBond;
|
||||
bestRingCount = ringCount;
|
||||
bestBondIsSingle = (bondToTry->getBondType() == Bond::BondType::SINGLE);
|
||||
bestBondDir = getBondDirForAtropisomerNoConf(bond->getStereo(),
|
||||
whichEnd, whichBond);
|
||||
} else if (bestBondIsSingle &&
|
||||
bondToTry->getBondType() != Bond::BondType::SINGLE) {
|
||||
continue;
|
||||
|
||||
} else if (!bestBondIsSingle &&
|
||||
bondToTry->getBondType() == Bond::BondType::SINGLE) {
|
||||
bestBondEnd = whichEnd;
|
||||
bestBondNumber = whichBond;
|
||||
bestRingCount = ringCount;
|
||||
bestBondIsSingle = true;
|
||||
bestBondDir = getBondDirForAtropisomerNoConf(bond->getStereo(),
|
||||
whichEnd, whichBond);
|
||||
|
||||
} else {
|
||||
auto bondDir = getBondDirForAtropisomerNoConf(bond->getStereo(),
|
||||
whichEnd, whichBond);
|
||||
if (bestBondDir == Bond::BondDir::NONE ||
|
||||
(bestBondDir == Bond::BondDir::BEGINDASH &&
|
||||
bondDir == Bond::BondDir::BEGINWEDGE)) {
|
||||
bestBondEnd = whichEnd;
|
||||
bestBondNumber = whichBond;
|
||||
bestRingCount = ringCount;
|
||||
bestBondIsSingle =
|
||||
(bondToTry->getBondType() == Bond::BondType::SINGLE);
|
||||
bestBondDir = bondDir;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (bestBondEnd >= 0) // we found a good one
|
||||
{
|
||||
// make sure the atoms on the bond are in the right order for the
|
||||
// wedge/hash the atom on the end of the main bond must be listed
|
||||
// first for the wedge/has bond
|
||||
|
||||
auto bestBond = atomAndBondVecs[bestBondEnd].second[bestBondNumber];
|
||||
if (bestBond->getBeginAtom() != atomAndBondVecs[bestBondEnd].first) {
|
||||
bestBond->setEndAtom(bestBond->getBeginAtom());
|
||||
bestBond->setBeginAtom(atomAndBondVecs[bestBondEnd].first);
|
||||
}
|
||||
|
||||
bestBond->setBondDir(bestBondDir);
|
||||
|
||||
auto newWedgeInfo = std::unique_ptr<RDKit::Chirality::WedgeInfoBase>(
|
||||
new RDKit::Chirality::WedgeInfoAtropisomer(bond->getIdx(),
|
||||
bestBondDir));
|
||||
wedgeBonds[bestBond->getIdx()] = std::move(newWedgeInfo);
|
||||
} else {
|
||||
BOOST_LOG(rdWarningLog)
|
||||
<< "Failed to find a good bond to set as UP or DOWN for an atropisomer - atoms are: "
|
||||
<< bond->getBeginAtomIdx() << " " << bond->getEndAtomIdx() << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool WedgeBondFromAtropisomerOneBond2d(
|
||||
Bond *bond, const ROMol &mol, const Conformer *conf,
|
||||
std::map<int, std::unique_ptr<RDKit::Chirality::WedgeInfoBase>>
|
||||
@@ -605,8 +875,8 @@ bool WedgeBondFromAtropisomerOneBond2d(
|
||||
|
||||
if (!canHaveDirection(*bondToTry) ||
|
||||
wedgeBonds.find(bondToTry->getIdx()) != wedgeBonds.end()) {
|
||||
continue; // must be a single OR aromatic bond and not already spoken
|
||||
// for by a chiral center
|
||||
continue; // must be a single OR aromatic bond and not already
|
||||
// spoken for by a chiral center
|
||||
}
|
||||
|
||||
if (bondToTry->getBondDir() != Bond::BondDir::NONE) {
|
||||
@@ -680,11 +950,13 @@ bool WedgeBondFromAtropisomerOneBond2d(
|
||||
bestBond->setEndAtom(bestBond->getBeginAtom());
|
||||
bestBond->setBeginAtom(atomAndBondVecs[bestBondEnd].first);
|
||||
}
|
||||
// bonds[bestBondEnd][bestBondNumber]->setBondDir(bestBondDir);
|
||||
bestBond->setBondDir(bestBondDir);
|
||||
|
||||
auto newWedgeInfo = std::unique_ptr<RDKit::Chirality::WedgeInfoBase>(
|
||||
new RDKit::Chirality::WedgeInfoAtropisomer(bond->getIdx(),
|
||||
bestBondDir));
|
||||
wedgeBonds[bestBond->getIdx()] = std::move(newWedgeInfo);
|
||||
|
||||
} else {
|
||||
BOOST_LOG(rdWarningLog)
|
||||
<< "Failed to find a good bond to set as UP or DOWN for an atropisomer - atoms are: "
|
||||
@@ -738,8 +1010,9 @@ bool WedgeBondFromAtropisomerOneBond3d(
|
||||
}
|
||||
}
|
||||
|
||||
// the following may seem redundant, since we just found the useBonds based
|
||||
// on their bond dir PRESENCE, but this endures that the values are correct.
|
||||
// the following may seem redundant, since we just found the useBonds
|
||||
// based on their bond dir PRESENCE, but this endures that the values are
|
||||
// correct.
|
||||
|
||||
if (useBonds.size() > 0) {
|
||||
for (auto useBond : useBonds) {
|
||||
@@ -843,10 +1116,11 @@ bool WedgeBondFromAtropisomerOneBond3d(
|
||||
bestBond->setEndAtom(bestBond->getBeginAtom());
|
||||
bestBond->setBeginAtom(atomAndBondVecs[bestBondEnd].first);
|
||||
}
|
||||
// bestBond->setBondDir(bestBondDir);
|
||||
bestBond->setBondDir(bestBondDir);
|
||||
auto newWedgeInfo = std::unique_ptr<RDKit::Chirality::WedgeInfoBase>(
|
||||
new RDKit::Chirality::WedgeInfoAtropisomer(bond->getIdx(),
|
||||
bestBondDir));
|
||||
|
||||
wedgeBonds[bestBond->getIdx()] = std::move(newWedgeInfo);
|
||||
} else {
|
||||
BOOST_LOG(rdWarningLog)
|
||||
@@ -862,8 +1136,7 @@ void wedgeBondsFromAtropisomers(
|
||||
const ROMol &mol, const Conformer *conf,
|
||||
std::map<int, std::unique_ptr<RDKit::Chirality::WedgeInfoBase>>
|
||||
&wedgeBonds) {
|
||||
PRECONDITION(conf, "no conformer");
|
||||
PRECONDITION(&(conf->getOwningMol()) == &mol,
|
||||
PRECONDITION(conf == nullptr || &(conf->getOwningMol()) == &mol,
|
||||
"conformer does not belong to molecule");
|
||||
for (auto bond : mol.bonds()) {
|
||||
auto bondStereo = bond->getStereo();
|
||||
@@ -878,10 +1151,14 @@ void wedgeBondsFromAtropisomers(
|
||||
continue;
|
||||
}
|
||||
|
||||
if (conf->is3D()) {
|
||||
WedgeBondFromAtropisomerOneBond3d(bond, mol, conf, wedgeBonds);
|
||||
} else {
|
||||
WedgeBondFromAtropisomerOneBond2d(bond, mol, conf, wedgeBonds);
|
||||
if (conf) {
|
||||
if (conf->is3D()) {
|
||||
WedgeBondFromAtropisomerOneBond3d(bond, mol, conf, wedgeBonds);
|
||||
} else {
|
||||
WedgeBondFromAtropisomerOneBond2d(bond, mol, conf, wedgeBonds);
|
||||
}
|
||||
} else { // no conformer
|
||||
WedgeBondFromAtropisomerOneBondNoConf(bond, mol, wedgeBonds);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include <GraphMol/FileParsers/FileParsers.h>
|
||||
#include <GraphMol/MarvinParse/MarvinParser.h>
|
||||
#include <GraphMol/test_fixtures.h>
|
||||
#include <GraphMol/Substruct/SubstructMatch.h>
|
||||
|
||||
#include "CIPLabeler.h"
|
||||
#include "Digraph.h"
|
||||
@@ -840,3 +841,99 @@ TEST_CASE("upsertTest", "[basic]") {
|
||||
CHECK(smilesOut != "");
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("atropisomers", "[basic]") {
|
||||
SECTION("atropisomers1") {
|
||||
UseLegacyStereoPerceptionFixture useLegacy(false);
|
||||
|
||||
std::string rdbase = getenv("RDBASE");
|
||||
|
||||
std::vector<std::string> files = {
|
||||
"BMS-986142_atrop5.sdf", "BMS-986142_atrop1.sdf",
|
||||
"BMS-986142_atrop2.sdf", "JDQ443_atrop1.sdf",
|
||||
"JDQ443_atrop2.sdf", "Mrtx1719_atrop1.sdf",
|
||||
"Mrtx1719_atrop2.sdf", "RP-6306_atrop1.sdf",
|
||||
"RP-6306_atrop2.sdf", "Sotorasib_atrop1.sdf",
|
||||
"Sotorasib_atrop2.sdf", "ZM374979_atrop1.sdf",
|
||||
"ZM374979_atrop2.sdf"};
|
||||
|
||||
for (auto file : files) {
|
||||
auto fName =
|
||||
rdbase + "/Code/GraphMol/FileParsers/test_data/atropisomers/" + file;
|
||||
|
||||
auto molsdf = MolFileToMol(fName, true, true, true);
|
||||
REQUIRE(molsdf);
|
||||
CIPLabeler::assignCIPLabels(*molsdf, 100000);
|
||||
|
||||
std::map<std::pair<unsigned int, unsigned int>, std::string> CIPVals;
|
||||
for (auto bond : molsdf->bonds()) {
|
||||
auto a1 = bond->getBeginAtomIdx();
|
||||
auto a2 = bond->getEndAtomIdx();
|
||||
if (a1 > a2) {
|
||||
std::swap(a1, a2);
|
||||
}
|
||||
|
||||
if (bond->hasProp(common_properties::_CIPCode)) {
|
||||
CIPVals[std::make_pair(a1, a2)] =
|
||||
bond->getProp<std::string>(common_properties::_CIPCode);
|
||||
} else {
|
||||
CIPVals[std::make_pair(a1, a2)] = "N/A";
|
||||
}
|
||||
}
|
||||
|
||||
molsdf->clearConformers();
|
||||
|
||||
SmilesWriteParams wp;
|
||||
wp.canonical = false;
|
||||
wp.doKekule = false;
|
||||
unsigned int flags =
|
||||
RDKit::SmilesWrite::CXSmilesFields::CX_MOLFILE_VALUES |
|
||||
RDKit::SmilesWrite::CXSmilesFields::CX_ATOM_PROPS |
|
||||
RDKit::SmilesWrite::CXSmilesFields::CX_BOND_CFG |
|
||||
RDKit::SmilesWrite::CXSmilesFields::CX_BOND_ATROPISOMER;
|
||||
SmilesParserParams pp;
|
||||
pp.allowCXSMILES = true;
|
||||
pp.sanitize = true;
|
||||
|
||||
auto smi = MolToCXSmiles(*molsdf, wp, flags);
|
||||
auto newMol = SmilesToMol(smi, pp);
|
||||
CIPLabeler::assignCIPLabels(*newMol, 100000);
|
||||
|
||||
std::map<std::pair<unsigned int, unsigned int>, std::string> newCIPVals;
|
||||
for (auto bond : newMol->bonds()) {
|
||||
auto a1 = bond->getBeginAtomIdx();
|
||||
auto a2 = bond->getEndAtomIdx();
|
||||
if (a1 > a2) {
|
||||
std::swap(a1, a2);
|
||||
}
|
||||
if (bond->hasProp(common_properties::_CIPCode)) {
|
||||
newCIPVals[std::make_pair(a1, a2)] =
|
||||
bond->getProp<std::string>(common_properties::_CIPCode);
|
||||
} else {
|
||||
newCIPVals[std::make_pair(a1, a2)] = "N/A";
|
||||
}
|
||||
}
|
||||
|
||||
RDKit::SubstructMatchParameters params;
|
||||
|
||||
auto match = RDKit::SubstructMatch(*molsdf, *newMol, params);
|
||||
|
||||
for (auto thisBond : newMol->bonds()) {
|
||||
unsigned int a1 = thisBond->getBeginAtomIdx();
|
||||
unsigned int a2 = thisBond->getEndAtomIdx();
|
||||
if (a1 > a2) {
|
||||
std::swap(a1, a2);
|
||||
}
|
||||
|
||||
unsigned int a1match = match[0][a1].second;
|
||||
unsigned int a2match = match[0][a2].second;
|
||||
|
||||
if (a1match > a2match) {
|
||||
std::swap(a1match, a2match);
|
||||
}
|
||||
CHECK(CIPVals[std::make_pair(a1match, a2match)] ==
|
||||
newCIPVals[std::make_pair(a1, a2)]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -301,8 +301,12 @@ RDKIT_GRAPHMOL_EXPORT void setStereoForBond(ROMol &mol, Bond *bond,
|
||||
/// returns a map from bond idx -> controlling atom idx
|
||||
RDKIT_GRAPHMOL_EXPORT
|
||||
std::map<int, std::unique_ptr<Chirality::WedgeInfoBase>> pickBondsToWedge(
|
||||
const ROMol &mol, const BondWedgingParameters *params = nullptr,
|
||||
const Conformer *conf = nullptr);
|
||||
const ROMol &mol, const BondWedgingParameters *params = nullptr);
|
||||
|
||||
RDKIT_GRAPHMOL_EXPORT
|
||||
std::map<int, std::unique_ptr<Chirality::WedgeInfoBase>> pickBondsToWedge(
|
||||
const ROMol &mol, const BondWedgingParameters *params,
|
||||
const Conformer *conf);
|
||||
|
||||
RDKIT_GRAPHMOL_EXPORT void wedgeMolBonds(
|
||||
ROMol &mol, const Conformer *conf = nullptr,
|
||||
|
||||
@@ -613,6 +613,8 @@ std::vector<std::unique_ptr<RWMol>> MolsFromCDXMLDataStream(
|
||||
|
||||
Atropisomers::detectAtropisomerChirality(
|
||||
*res, &res->getConformer(confidx));
|
||||
} else { // no Conformer
|
||||
Atropisomers::detectAtropisomerChirality(*res, nullptr);
|
||||
}
|
||||
|
||||
// now that atom stereochem has been perceived, the wedging
|
||||
|
||||
@@ -228,8 +228,8 @@ void invertMolBlockWedgingInfo(ROMol &mol) {
|
||||
}
|
||||
|
||||
void markUnspecifiedStereoAsUnknown(ROMol &mol, int confId) {
|
||||
auto wedgeBonds = RDKit::Chirality::pickBondsToWedge(mol);
|
||||
const auto conf = mol.getConformer(confId);
|
||||
auto wedgeBonds = RDKit::Chirality::pickBondsToWedge(mol, nullptr, &conf);
|
||||
for (auto b : mol.bonds()) {
|
||||
if (b->getBondType() == Bond::DOUBLE) {
|
||||
int dirCode;
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <string_view>
|
||||
// #include <string_view>
|
||||
#include <streambuf>
|
||||
|
||||
#include "RDGeneral/test.h"
|
||||
@@ -1618,7 +1618,6 @@ H 0.635000 0.635000 0.635000
|
||||
mol->addConformer(conf);
|
||||
|
||||
const std::string xyzblock = MolToXYZBlock(*mol, 0, 15);
|
||||
std::cout << xyzblock << std::endl;
|
||||
std::string xyzblock_expected = R"XYZ(7
|
||||
CHEMBL506259
|
||||
O 0.402012650000000 -0.132994360000000 1.000000170000000
|
||||
@@ -5074,7 +5073,8 @@ M END
|
||||
Chirality::reapplyMolBlockWedging(*m);
|
||||
CHECK(m->getBondWithIdx(2)->getBondDir() == Bond::BondDir::NONE);
|
||||
}
|
||||
SECTION("Reapply the original wedging, regardless the bond type of wedged bonds") {
|
||||
SECTION(
|
||||
"Reapply the original wedging, regardless the bond type of wedged bonds") {
|
||||
auto m = R"CTAB(
|
||||
Mrv2311 04232413302D
|
||||
|
||||
@@ -6041,10 +6041,10 @@ TEST_CASE("MaeWriter basic testing", "[mae][MaeWriter][writer]") {
|
||||
auto bondBlockStart = mae.find("m_bond[6]");
|
||||
REQUIRE(bondBlockStart != std::string::npos);
|
||||
|
||||
std::string_view ctBlock(&mae[ctBlockStart], atomBlockStart - ctBlockStart);
|
||||
std::string_view atomBlock(&mae[atomBlockStart],
|
||||
bondBlockStart - atomBlockStart);
|
||||
std::string_view bondBlock(&mae[bondBlockStart]);
|
||||
std::string ctBlock(&mae[ctBlockStart], atomBlockStart - ctBlockStart);
|
||||
std::string atomBlock(&mae[atomBlockStart],
|
||||
bondBlockStart - atomBlockStart);
|
||||
std::string bondBlock(&mae[bondBlockStart]);
|
||||
|
||||
// Check mol properties
|
||||
CHECK(ctBlock.find("s_m_title") != std::string::npos);
|
||||
@@ -6173,10 +6173,10 @@ TEST_CASE("MaeWriter basic testing", "[mae][MaeWriter][writer]") {
|
||||
auto bondBlockStart = mae.find("m_bond[6]");
|
||||
REQUIRE(bondBlockStart != std::string::npos);
|
||||
|
||||
std::string_view ctBlock(&mae[ctBlockStart], atomBlockStart - ctBlockStart);
|
||||
std::string_view atomBlock(&mae[atomBlockStart],
|
||||
bondBlockStart - atomBlockStart);
|
||||
std::string_view bondBlock(&mae[bondBlockStart]);
|
||||
std::string ctBlock(&mae[ctBlockStart], atomBlockStart - ctBlockStart);
|
||||
std::string atomBlock(&mae[atomBlockStart],
|
||||
bondBlockStart - atomBlockStart);
|
||||
std::string bondBlock(&mae[bondBlockStart]);
|
||||
|
||||
// Check mol properties
|
||||
CHECK(ctBlock.find("s_m_title") != std::string::npos);
|
||||
@@ -6258,7 +6258,7 @@ TEST_CASE("MaeWriter basic testing", "[mae][MaeWriter][writer]") {
|
||||
// Maeparser does not parse bond properties, so don't check them.
|
||||
}
|
||||
SECTION("Check reverse roundtrip") {
|
||||
constexpr std::string_view maeBlock = R"MAE(f_m_ct {
|
||||
std::string maeBlock = R"MAE(f_m_ct {
|
||||
s_m_title
|
||||
:::
|
||||
""
|
||||
@@ -6348,7 +6348,7 @@ TEST_CASE("MaeWriter basic testing", "[mae][MaeWriter][writer]") {
|
||||
auto ctBlockStart = mae.find("f_m_ct");
|
||||
REQUIRE(ctBlockStart != std::string::npos);
|
||||
|
||||
std::string_view ctBlock(&mae[ctBlockStart]);
|
||||
std::string ctBlock(&mae[ctBlockStart]);
|
||||
|
||||
CHECK(ctBlock == MaeWriter::getText(*mol));
|
||||
}
|
||||
@@ -6616,8 +6616,8 @@ TEST_CASE(
|
||||
auto bondBlockStart = mae.find("m_bond[2]");
|
||||
REQUIRE(bondBlockStart != std::string::npos);
|
||||
|
||||
const std::string_view atomBlock(&mae[atomBlockStart],
|
||||
bondBlockStart - atomBlockStart);
|
||||
const std::string atomBlock(&mae[atomBlockStart],
|
||||
bondBlockStart - atomBlockStart);
|
||||
|
||||
CHECK(atomBlock.find(" -2 ") != std::string::npos);
|
||||
}
|
||||
|
||||
@@ -611,9 +611,12 @@ class MarvinCMLReader {
|
||||
MolOps::assignChiralTypesFrom3D(*mol, conf3d->getId(), true);
|
||||
}
|
||||
|
||||
if (conf || conf3d) {
|
||||
Atropisomers::detectAtropisomerChirality(
|
||||
*mol, conf != nullptr ? conf : conf3d);
|
||||
if (conf) {
|
||||
Atropisomers::detectAtropisomerChirality(*mol, conf);
|
||||
} else if (conf3d) {
|
||||
Atropisomers::detectAtropisomerChirality(*mol, conf3d);
|
||||
} else {
|
||||
Atropisomers::detectAtropisomerChirality(*mol, nullptr);
|
||||
}
|
||||
|
||||
ClearSingleBondDirFlags(*mol);
|
||||
|
||||
@@ -1097,7 +1097,9 @@ bool parse_wedged_bonds(Iterator &first, Iterator last, RDKit::RWMol &mol,
|
||||
if (atom->getIdx() != bond->getEndAtomIdx()) {
|
||||
BOOST_LOG(rdWarningLog)
|
||||
<< "atom " << atomIdx << " is not associated with bond "
|
||||
<< bondIdx << " in w block" << std::endl;
|
||||
<< bondIdx << "(" << bond->getBeginAtomIdx() + startAtomIdx << "-"
|
||||
<< bond->getEndAtomIdx() + startAtomIdx << ")"
|
||||
<< " in w block" << std::endl;
|
||||
return false;
|
||||
}
|
||||
auto eidx = bond->getBeginAtomIdx();
|
||||
@@ -2038,23 +2040,96 @@ std::string get_bond_config_block(
|
||||
bd = Bond::BondDir::NONE;
|
||||
}
|
||||
|
||||
if (atropisomerOnly) {
|
||||
// on of the bonds on the beging atom of this bond must be an atropisomer
|
||||
if (atropisomerOnly && bd == Bond::BondDir::NONE) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (bd == Bond::BondDir::NONE) {
|
||||
continue;
|
||||
}
|
||||
bool foundAtropisomer = false;
|
||||
// see if this one is an atropisomer
|
||||
|
||||
const Atom *firstAtom = bond->getBeginAtom();
|
||||
bool isAnAtropisomer = false;
|
||||
|
||||
const Atom *firstAtom = bond->getBeginAtom();
|
||||
if (bd == Bond::BondDir::BEGINDASH || bd == Bond::BondDir::BEGINWEDGE) {
|
||||
for (auto bondNbr : mol.atomBonds(firstAtom)) {
|
||||
if (bondNbr->getIdx() == bond->getIdx()) {
|
||||
continue; // a bond is not its own neighbor
|
||||
}
|
||||
if (bondNbr->getStereo() == Bond::BondStereo::STEREOATROPCW ||
|
||||
bondNbr->getStereo() == Bond::BondStereo::STEREOATROPCCW) {
|
||||
foundAtropisomer = true;
|
||||
isAnAtropisomer = true;
|
||||
|
||||
// if it is for an atropisomer and there are no coords, check to see
|
||||
// if the wedge needs to be flipped based on the smiles reordering
|
||||
if (!coordsIncluded && isAnAtropisomer) {
|
||||
Atropisomers::AtropAtomAndBondVec atomAndBondVecs[2];
|
||||
if (!Atropisomers::getAtropisomerAtomsAndBonds(
|
||||
bondNbr, atomAndBondVecs, mol)) {
|
||||
throw ValueErrorException("Internal error - should not occur");
|
||||
// should not happend
|
||||
} else {
|
||||
unsigned int swaps = 0;
|
||||
|
||||
unsigned int firstReorderedIdx =
|
||||
std::find(atomOrder.begin(), atomOrder.end(),
|
||||
bondNbr->getBeginAtom()->getIdx()) -
|
||||
atomOrder.begin();
|
||||
unsigned int secondReorderedIdx =
|
||||
std::find(atomOrder.begin(), atomOrder.end(),
|
||||
bondNbr->getEndAtom()->getIdx()) -
|
||||
atomOrder.begin();
|
||||
if (firstReorderedIdx > secondReorderedIdx) {
|
||||
++swaps;
|
||||
}
|
||||
|
||||
for (unsigned int bondAtomIndex = 0; bondAtomIndex < 2;
|
||||
++bondAtomIndex) {
|
||||
if (atomAndBondVecs[bondAtomIndex].first == firstAtom)
|
||||
continue; // swapped atoms on the side where the wedge bond
|
||||
// is does NOT change the wedge bond
|
||||
if (atomAndBondVecs[bondAtomIndex].second.size() == 2) {
|
||||
unsigned int firstOtherAtomIdx =
|
||||
atomAndBondVecs[bondAtomIndex]
|
||||
.second[0]
|
||||
->getOtherAtom(atomAndBondVecs[bondAtomIndex].first)
|
||||
->getIdx();
|
||||
unsigned int secondOtherAtomIdx =
|
||||
atomAndBondVecs[bondAtomIndex]
|
||||
.second[1]
|
||||
->getOtherAtom(atomAndBondVecs[bondAtomIndex].first)
|
||||
->getIdx();
|
||||
|
||||
unsigned int firstReorderedAtomIdx =
|
||||
std::find(atomOrder.begin(), atomOrder.end(),
|
||||
firstOtherAtomIdx) -
|
||||
atomOrder.begin();
|
||||
unsigned int secondReorderedAtomIdx =
|
||||
std::find(atomOrder.begin(), atomOrder.end(),
|
||||
secondOtherAtomIdx) -
|
||||
atomOrder.begin();
|
||||
|
||||
if (firstReorderedAtomIdx > secondReorderedAtomIdx) {
|
||||
++swaps;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (swaps % 2) {
|
||||
bd = (bd == Bond::BondDir::BEGINWEDGE)
|
||||
? Bond::BondDir::BEGINDASH
|
||||
: Bond::BondDir::BEGINWEDGE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!foundAtropisomer) {
|
||||
}
|
||||
|
||||
if (atropisomerOnly) {
|
||||
// one of the bonds on the beginning atom of this bond must be an
|
||||
// atropisomer
|
||||
|
||||
if (!isAnAtropisomer) {
|
||||
continue;
|
||||
}
|
||||
} else { // atropisomeronly is FALSE - check for a wedging caused by
|
||||
@@ -2109,8 +2184,9 @@ std::string get_bond_config_block(
|
||||
std::string wType = "";
|
||||
if (bd == Bond::BondDir::UNKNOWN) {
|
||||
wType = "w";
|
||||
} else if (coordsIncluded) {
|
||||
} else if (coordsIncluded || isAnAtropisomer) {
|
||||
// we only do wedgeUp and wedgeDown if coordinates are being output
|
||||
// or its an atropisomer
|
||||
if (bd == Bond::BondDir::BEGINWEDGE) {
|
||||
wType = "wU";
|
||||
} else if (bd == Bond::BondDir::BEGINDASH) {
|
||||
@@ -2354,12 +2430,13 @@ std::string getCXExtensions(const ROMol &mol, std::uint32_t flags) {
|
||||
// do the CX_BOND_ATROPISOMER only if CX_BOND_CFG s not done. CX_BOND_CFG
|
||||
// includes the atropisomer wedging
|
||||
else if (flags & SmilesWrite::CXSmilesFields::CX_BOND_ATROPISOMER) {
|
||||
if (conf) {
|
||||
Atropisomers::wedgeBondsFromAtropisomers(mol, conf, wedgeBonds);
|
||||
}
|
||||
|
||||
bool includeCoords = flags & SmilesWrite::CXSmilesFields::CX_COORDS &&
|
||||
mol.getNumConformers();
|
||||
if (includeCoords) {
|
||||
Atropisomers::wedgeBondsFromAtropisomers(mol, conf, wedgeBonds);
|
||||
} else {
|
||||
Atropisomers::wedgeBondsFromAtropisomers(mol, nullptr, wedgeBonds);
|
||||
}
|
||||
const auto cfgblock = get_bond_config_block(
|
||||
mol, atomOrder, bondOrder, includeCoords, wedgeBonds, true);
|
||||
appendToCXExtension(cfgblock, res);
|
||||
|
||||
@@ -458,8 +458,13 @@ std::unique_ptr<RWMol> MolFromSmiles(const std::string &smiles,
|
||||
res->updatePropertyCache(false);
|
||||
MolOps::assignChiralTypesFrom3D(*res, conf3d->getId(), true);
|
||||
}
|
||||
if (conf || conf3d) {
|
||||
Atropisomers::detectAtropisomerChirality(*res, conf ? conf : conf3d);
|
||||
|
||||
if (conf) {
|
||||
Atropisomers::detectAtropisomerChirality(*res, conf);
|
||||
} else if (conf3d) {
|
||||
Atropisomers::detectAtropisomerChirality(*res, conf3d);
|
||||
} else {
|
||||
Atropisomers::detectAtropisomerChirality(*res, nullptr);
|
||||
}
|
||||
|
||||
if (res && (params.sanitize || params.removeHs)) {
|
||||
|
||||
@@ -790,7 +790,8 @@ void testOneAtropisomers(const SmilesTest *smilesTest) {
|
||||
MolToCXSmiles(*smilesMol, ps, flags, RestoreBondDirOptionClear);
|
||||
|
||||
generateNewExpectedFilesIfSoSpecified(fName + ".NEW3.cxsmi", smilesOut);
|
||||
CHECK(getExpectedValue(expectedFileName) == smilesOut);
|
||||
auto expected = getExpectedValue(expectedFileName);
|
||||
CHECK(expected == smilesOut);
|
||||
}
|
||||
|
||||
smilesMol =
|
||||
@@ -826,7 +827,7 @@ void testOneAtropisomers(const SmilesTest *smilesTest) {
|
||||
} catch (const RDKit::KekulizeException &e) {
|
||||
outMolStr = "";
|
||||
} catch (...) {
|
||||
throw; // re-trhow the error if not a kekule error
|
||||
throw; // re-throw the error if not a kekule error
|
||||
}
|
||||
if (outMolStr == "") {
|
||||
RDKit::Chirality::reapplyMolBlockWedging(*smilesMol);
|
||||
@@ -1023,6 +1024,7 @@ void testOne3dChiral(const SmilesTest *smilesTest) {
|
||||
TEST_CASE("testAtropisomersInCXSmiles") {
|
||||
{
|
||||
std::list<SmilesTest> smiTests{
|
||||
SmilesTest("ShortAtropisomerNoCoords.cxsmi", true, 14, 15),
|
||||
SmilesTest("ShortAtropisomer.cxsmi", true, 14, 15),
|
||||
SmilesTest("ShortAtropisomerArom.cxsmi", true, 14, 15),
|
||||
SmilesTest("AtropManyChirals.cxsmi", true, 20, 20),
|
||||
@@ -1344,15 +1346,11 @@ TEST_CASE("SMILES CANONICALIZATION") {
|
||||
}
|
||||
molCount++;
|
||||
|
||||
try {
|
||||
if (molBlock.length() > 25) {
|
||||
std::unique_ptr<RWMol> mol(MolBlockToMol(molBlock));
|
||||
std::string smiles = MolToCXSmiles(*mol);
|
||||
if (molBlock.length() > 25) {
|
||||
std::unique_ptr<RWMol> mol(MolBlockToMol(molBlock));
|
||||
std::string smiles = MolToCXSmiles(*mol);
|
||||
|
||||
testSmilesCanonicalization(smiles);
|
||||
}
|
||||
} catch (...) {
|
||||
std::cout << "failed on mol " << molCount << std::endl;
|
||||
testSmilesCanonicalization(smiles);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
CC1=C(N2C=CC=C2C)C(C)CCC1 |wU:2.9|
|
||||
@@ -0,0 +1 @@
|
||||
CC1=C(n2cccc2C)C(C)CCC1 |wU:2.9|
|
||||
@@ -0,0 +1,2 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<cml xmlns="http://www.chemaxon.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.chemaxon.com http://www.chemaxon.com/marvin/schema/mrvSchema_20_20_0.xsd"><MDocument><MChemicalStruct><molecule molID="m1"><atomArray><atom id="a1" elementType="C" x2="3" y2="0.00000"/><atom id="a2" elementType="C" x2="1.5" y2="0.00000"/><atom id="a3" elementType="C" x2="0.75" y2="-1.29904"/><atom id="a4" elementType="N" x2="1.5" y2="-2.59808"/><atom id="a5" elementType="C" x2="2.99178" y2="-2.75487"/><atom id="a6" elementType="C" x2="3.30365" y2="-4.22209"/><atom id="a7" elementType="C" x2="2.00461" y2="-4.97209"/><atom id="a8" elementType="C" x2="0.889895" y2="-3.96839"/><atom id="a9" elementType="C" x2="-0.577326" y2="-4.28026"/><atom id="a10" elementType="C" x2="-0.75" y2="-1.29904"/><atom id="a11" elementType="C" x2="-1.5" y2="-2.59808"/><atom id="a12" elementType="C" x2="-1.5" y2="0.00000"/><atom id="a13" elementType="C" x2="-0.75" y2="1.29904"/><atom id="a14" elementType="C" x2="0.75" y2="1.29904"/></atomArray><bondArray><bond id="b1" atomRefs2="a1 a2" order="1"/><bond id="b2" atomRefs2="a2 a3" order="2"/><bond id="b3" atomRefs2="a3 a4" order="1"/><bond id="b4" atomRefs2="a4 a5" order="1"/><bond id="b5" atomRefs2="a5 a6" order="2"/><bond id="b6" atomRefs2="a6 a7" order="1"/><bond id="b7" atomRefs2="a7 a8" order="2"/><bond id="b8" atomRefs2="a8 a9" order="1"/><bond id="b9" atomRefs2="a3 a10" order="1"><bondStereo>W</bondStereo></bond><bond id="b10" atomRefs2="a10 a11" order="1"/><bond id="b11" atomRefs2="a10 a12" order="1"/><bond id="b12" atomRefs2="a12 a13" order="1"/><bond id="b13" atomRefs2="a13 a14" order="1"/><bond id="b14" atomRefs2="a14 a2" order="1"/><bond id="b15" atomRefs2="a8 a4" order="1"/></bondArray></molecule></MChemicalStruct></MDocument></cml>
|
||||
@@ -0,0 +1,41 @@
|
||||
|
||||
RDKit 2D
|
||||
|
||||
0 0 0 0 0 0 0 0 0 0999 V3000
|
||||
M V30 BEGIN CTAB
|
||||
M V30 COUNTS 14 15 0 0 0
|
||||
M V30 BEGIN ATOM
|
||||
M V30 1 C 3.000000 0.000000 0.000000 0
|
||||
M V30 2 C 1.500000 0.000000 0.000000 0
|
||||
M V30 3 C 0.750000 -1.299038 0.000000 0
|
||||
M V30 4 N 1.500000 -2.598076 0.000000 0
|
||||
M V30 5 C 2.991783 -2.754869 0.000000 0
|
||||
M V30 6 C 3.303650 -4.222090 0.000000 0
|
||||
M V30 7 C 2.004612 -4.972090 0.000000 0
|
||||
M V30 8 C 0.889895 -3.968394 0.000000 0
|
||||
M V30 9 C -0.577326 -4.280262 0.000000 0
|
||||
M V30 10 C -0.750000 -1.299038 0.000000 0
|
||||
M V30 11 C -1.500000 -2.598076 0.000000 0
|
||||
M V30 12 C -1.500000 0.000000 0.000000 0
|
||||
M V30 13 C -0.750000 1.299038 0.000000 0
|
||||
M V30 14 C 0.750000 1.299038 0.000000 0
|
||||
M V30 END ATOM
|
||||
M V30 BEGIN BOND
|
||||
M V30 1 1 1 2
|
||||
M V30 2 2 2 3
|
||||
M V30 3 1 3 4
|
||||
M V30 4 1 4 5
|
||||
M V30 5 2 5 6
|
||||
M V30 6 1 6 7
|
||||
M V30 7 2 7 8
|
||||
M V30 8 1 8 9
|
||||
M V30 9 1 3 10 CFG=1
|
||||
M V30 10 1 10 11
|
||||
M V30 11 1 10 12
|
||||
M V30 12 1 12 13
|
||||
M V30 13 1 13 14
|
||||
M V30 14 1 14 2
|
||||
M V30 15 1 8 4
|
||||
M V30 END BOND
|
||||
M V30 END CTAB
|
||||
M END
|
||||
@@ -0,0 +1 @@
|
||||
CC1=C(n2cccc2C)C(C)CCC1 |wU:2.9|
|
||||
@@ -0,0 +1 @@
|
||||
CC1=C(n2cccc2C)C(C)CCC1 |wU:2.9|
|
||||
@@ -341,6 +341,16 @@ int pickBondToWedge(
|
||||
|
||||
// returns map of bondIdx -> bond begin atom for those bonds that
|
||||
// need wedging.
|
||||
|
||||
std::map<int, std::unique_ptr<Chirality::WedgeInfoBase>> pickBondsToWedge(
|
||||
const ROMol &mol, const BondWedgingParameters *params) {
|
||||
const Conformer *conf = nullptr;
|
||||
if (mol.getNumConformers()) {
|
||||
conf = &mol.getConformer();
|
||||
}
|
||||
return pickBondsToWedge(mol, params, conf);
|
||||
}
|
||||
|
||||
std::map<int, std::unique_ptr<Chirality::WedgeInfoBase>> pickBondsToWedge(
|
||||
const ROMol &mol, const BondWedgingParameters *params,
|
||||
const Conformer *conf) {
|
||||
@@ -388,15 +398,7 @@ std::map<int, std::unique_ptr<Chirality::WedgeInfoBase>> pickBondsToWedge(
|
||||
wedgeInfo[bnd1] = std::move(wi);
|
||||
}
|
||||
}
|
||||
|
||||
if (conf == nullptr) {
|
||||
if (mol.getNumConformers()) {
|
||||
conf = &mol.getConformer();
|
||||
}
|
||||
}
|
||||
if (conf) {
|
||||
RDKit::Atropisomers::wedgeBondsFromAtropisomers(mol, conf, wedgeInfo);
|
||||
}
|
||||
RDKit::Atropisomers::wedgeBondsFromAtropisomers(mol, conf, wedgeInfo);
|
||||
|
||||
return wedgeInfo;
|
||||
}
|
||||
@@ -472,7 +474,7 @@ void wedgeMolBonds(ROMol &mol, const Conformer *conf,
|
||||
MolOps::findSSSR(mol);
|
||||
}
|
||||
|
||||
auto wedgeBonds = Chirality::pickBondsToWedge(mol, params);
|
||||
auto wedgeBonds = Chirality::pickBondsToWedge(mol, params, conf);
|
||||
|
||||
// loop over the bonds we need to wedge:
|
||||
for (const auto &[wbi, wedgeInfo] : wedgeBonds) {
|
||||
|
||||
@@ -2234,6 +2234,46 @@ Note: the RDKit software makes no attempt to determine if the bond is actually
|
||||
rotationally constrained. If the bond meets the requirements above, it is
|
||||
marked as an atropisomer.
|
||||
|
||||
Internal Representation of Atropisomers
|
||||
=======================================
|
||||
|
||||
To help with the rest of the explanation, we include the bond indices in the molecule drawing:
|
||||
|
||||
.. testsetup::
|
||||
|
||||
'CC1=CC=CC(I)=C1N1C(C)=CC=C1Br |wU:7.7,(18.31,-9.57,;17.45,-9.08,;17.44,-8.08,;16.58,-7.58,;15.71,-8.08,;15.71,-9.08,;14.84,-9.58,;16.58,-9.58,;16.58,-10.58,;17.38,-11.17,;18.34,-10.87,;17.08,-12.12,;16.07,-12.12,;15.77,-11.17,;14.81,-10.87,)|'
|
||||
|
||||
.. image:: images/atrop_representation1.png
|
||||
|
||||
Here's part of the Debug output for that molecule::
|
||||
|
||||
Atoms:
|
||||
...
|
||||
1 6 C chg: 0 deg: 3 exp: 4 imp: 0 hyb: SP2 arom?: 1
|
||||
...
|
||||
5 6 C chg: 0 deg: 3 exp: 4 imp: 0 hyb: SP2 arom?: 1
|
||||
...
|
||||
7 6 C chg: 0 deg: 3 exp: 4 imp: 0 hyb: SP2 arom?: 1
|
||||
8 7 N chg: 0 deg: 3 exp: 3 imp: 0 hyb: SP2 arom?: 1
|
||||
9 6 C chg: 0 deg: 3 exp: 4 imp: 0 hyb: SP2 arom?: 1
|
||||
...
|
||||
13 6 C chg: 0 deg: 3 exp: 4 imp: 0 hyb: SP2 arom?: 1
|
||||
...
|
||||
Bonds:
|
||||
...
|
||||
6 5->7 order: a conj?: 1 aromatic?: 1
|
||||
7 7->8 order: 1 stereo: CCW bonds: (14 6 8 15) conj?: 1
|
||||
8 8->9 order: a conj?: 1 aromatic?: 1
|
||||
...
|
||||
14 7->1 order: a dir: wedge conj?: 1 aromatic?: 1
|
||||
15 13->8 order: a conj?: 1 aromatic?: 1
|
||||
|
||||
This tells us that bond 7 is atropisomeric and that when looking down the bond
|
||||
(from atom C7 to atom N8), the rotation direction between bonds 14 and 8 (these
|
||||
are the bonds to the lowest numbered atoms on each of the atropisomeric bond) is
|
||||
counterclockwise (CCW).
|
||||
|
||||
|
||||
Formats supporting atropisomers
|
||||
===============================
|
||||
|
||||
@@ -2280,6 +2320,54 @@ an atropisomer bond, the actual stereochemistry of the bond is determined by the
|
||||
.. image:: images/atrop_example5.png
|
||||
|
||||
|
||||
Interpreting atropisomers without coordinates
|
||||
=============================================
|
||||
|
||||
Just as it is possible to interpret atomic and double bond stereochemistry from
|
||||
SMILES without providing atomic coordinates, the RDKit adopts (starting with the
|
||||
2024.03.2 release) a convention for interpreting bond wedge information in
|
||||
CXSMILES that do not have coordinates provided.
|
||||
|
||||
In order to understand how this representation works, a quick digression is necessary to explain the difference in the way bonds are numbered in CXSMILES and the RDKit.
|
||||
In the RDKit ring closure bonds are added last; they are the highest numbered bonds in the molecule. In CXSMILES, the ring closure bonds are added immediately upon closing the ring.
|
||||
Here's an illustration of this for the SMILES ``CC1=CC=CC(I)=C1N1C(C)=CC=C1Br``; the RDKit bond numbering (what we saw above) is shown first and the CXSMILES numbering is shown second.
|
||||
|
||||
.. testsetup::
|
||||
|
||||
'CC1=CC=CC(I)=C1N1C(C)=CC=C1Br |wU:7.7|'
|
||||
|
||||
.. image:: images/atrop_representation2.png
|
||||
|
||||
To describe this atropisomer in CXSMILES without using coordinates, we adopt the convention that the atropisomeric bond's stereochemistry is ``CCW`` when the bond to the lowest-numbered neighbor of the start atom
|
||||
(in this case the neighbor connected via bond 7) is wedged and write the CXSMILES for this molecule as ``CC1=CC=CC(I)=C1N1C(C)=CC=C1Br |wU:7.7||``. The stereo values for all possible combinations are shown here:
|
||||
|
||||
|
||||
+-------+----+-------+-------+--------+------------+
|
||||
| | | bond | | | |
|
||||
| from | to | index | type | stereo | |
|
||||
+=======+====+=======+=======+========+============+
|
||||
| 7 | 1 | 7 | wedge | CCW | ``wU7.7`` |
|
||||
+-------+----+-------+-------+--------+------------+
|
||||
| 7 | 5 | 6 | wedge | CW | ``wU7.6`` |
|
||||
+-------+----+-------+-------+--------+------------+
|
||||
| 8 | 9 | 9 | wedge | CW | ``wU8.9`` |
|
||||
+-------+----+-------+-------+--------+------------+
|
||||
| 8 | 13 | 14 | wedge | CCW | ``wU8.14`` |
|
||||
+-------+----+-------+-------+--------+------------+
|
||||
| 7 | 1 | 7 | dash | CW | ``wD7.7`` |
|
||||
+-------+----+-------+-------+--------+------------+
|
||||
| 7 | 5 | 6 | dash | CCW | ``wD7.6`` |
|
||||
+-------+----+-------+-------+--------+------------+
|
||||
| 8 | 9 | 9 | dash | CCW | ``wD8.9`` |
|
||||
+-------+----+-------+-------+--------+------------+
|
||||
| 8 | 13 | 14 | dash | CW | ``wD8.14`` |
|
||||
+-------+----+-------+-------+--------+------------+
|
||||
|
||||
|
||||
It's also possible to wedge multiple neighbor bonds around an atropisomeric bond, but the wedging must be consistent based on the table above;
|
||||
so the CXSMILES ``CC1=CC=CC(I)=C1N1C(C)=CC=C1Br |wD:7.7,wU:8.9|`` is valid, but ``CC1=CC=CC(I)=C1N1C(C)=CC=C1Br |wD:7.7,wU:8.14|`` is not.
|
||||
|
||||
|
||||
Validation of Atropisomers
|
||||
==========================
|
||||
|
||||
@@ -2294,7 +2382,6 @@ Atropisomers are supported in the canonicalization algorithm of RDKit. They are
|
||||
substructure searching and similarity searching at this time.
|
||||
|
||||
|
||||
|
||||
Query Features in Molecule Drawings
|
||||
***********************************
|
||||
|
||||
|
||||
BIN
Docs/Book/images/atrop_representation1.png
Normal file
BIN
Docs/Book/images/atrop_representation1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 15 KiB |
BIN
Docs/Book/images/atrop_representation2.png
Normal file
BIN
Docs/Book/images/atrop_representation2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 27 KiB |
Reference in New Issue
Block a user