// // Copyright (C) 2004-2024 Greg Landrum and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include "Chirality.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // #define VERBOSE_CANON 1 namespace RDKit { namespace { bool shouldDetectDoubleBondStereo(const Bond *bond) { const RingInfo *ri = bond->getOwningMol().getRingInfo(); return (!ri->numBondRings(bond->getIdx()) || ri->minBondRingSize(bond->getIdx()) >= Chirality::minRingSizeForDoubleBondStereo); } bool getValFromEnvironment(const char *var, bool defVal) { auto evar = std::getenv(var); if (evar != nullptr) { if (!strcmp(evar, "0")) { return false; } else { return true; } } return defVal; } bool is_regular_h(const Atom &atom) { return atom.getAtomicNum() == 1 && atom.getIsotope() == 0; } Bond::BondDir getOppositeBondDir(Bond::BondDir dir) { PRECONDITION(dir == Bond::ENDDOWNRIGHT || dir == Bond::ENDUPRIGHT, "bad bond direction"); switch (dir) { case Bond::ENDDOWNRIGHT: return Bond::ENDUPRIGHT; case Bond::ENDUPRIGHT: return Bond::ENDDOWNRIGHT; default: return Bond::NONE; } } void setBondDirRelativeToAtom(Bond *bond, Atom *atom, Bond::BondDir dir, bool reverse, boost::dynamic_bitset<> &) { PRECONDITION(bond, "bad bond"); PRECONDITION(atom, "bad atom"); PRECONDITION(dir == Bond::ENDUPRIGHT || dir == Bond::ENDDOWNRIGHT, "bad dir"); PRECONDITION(atom == bond->getBeginAtom() || atom == bond->getEndAtom(), "atom doesn't belong to bond"); if (bond->getBeginAtom() != atom) { reverse = !reverse; } if (reverse) { dir = (dir == Bond::ENDUPRIGHT ? Bond::ENDDOWNRIGHT : Bond::ENDUPRIGHT); } // to ensure maximum compatibility, even when a bond has unknown stereo (set // explicitly and recorded in _UnknownStereo property), I will still let a // direction to be computed. You must check the _UnknownStereo property to // make sure whether this bond is explicitly set to have no direction info. // This makes sense because the direction info are all derived from // coordinates, the _UnknownStereo property is like extra metadata to be // used with the direction info. bond->setBondDir(dir); } bool isLinearArrangement(const RDGeom::Point3D &v1, const RDGeom::Point3D &v2) { double lsq = v1.lengthSq() * v2.lengthSq(); // treat zero length vectors as linear if (lsq < 1.0e-6) { return true; } double dotProd = v1.dotProduct(v2); double cos178 = -0.999388; // == cos(M_PI-0.035), corresponds to a tolerance of 2 degrees return dotProd < cos178 * sqrt(lsq); } void controllingBondFromAtom(const ROMol &mol, const boost::dynamic_bitset<> &needsDir, const std::vector &singleBondCounts, const Bond *dblBond, const Atom *atom, Bond *&bond, Bond *&obond, bool &squiggleBondSeen, bool &doubleBondSeen) { bond = nullptr; obond = nullptr; for (const auto tBond : mol.atomBonds(atom)) { if (tBond == dblBond) { continue; } if ((tBond->getBondType() == Bond::SINGLE || tBond->getBondType() == Bond::AROMATIC) && (tBond->getBondDir() == Bond::BondDir::NONE || tBond->getBondDir() == Bond::BondDir::ENDDOWNRIGHT || tBond->getBondDir() == Bond::BondDir::ENDUPRIGHT)) { // prefer bonds that already have their directionality set // or that are adjacent to more double bonds: if (!bond) { bond = tBond; } else if (needsDir[tBond->getIdx()]) { if (singleBondCounts[tBond->getIdx()] > singleBondCounts[bond->getIdx()]) { obond = bond; bond = tBond; } else { obond = tBond; } } else { obond = bond; bond = tBond; } } else if (tBond->getBondType() == Bond::DOUBLE) { doubleBondSeen = true; } int explicit_unknown_stereo; if ((tBond->getBondType() == Bond::SINGLE || tBond->getBondType() == Bond::AROMATIC) && (tBond->getBondDir() == Bond::UNKNOWN || ((tBond->getPropIfPresent(common_properties::_UnknownStereo, explicit_unknown_stereo) && explicit_unknown_stereo)))) { squiggleBondSeen = true; break; } } } void updateDoubleBondNeighbors(ROMol &mol, Bond *dblBond, const Conformer *conf, boost::dynamic_bitset<> &needsDir, std::vector &singleBondCounts, const VECT_INT_VECT &singleBondNbrs) { // we want to deal only with double bonds: PRECONDITION(dblBond, "bad bond"); PRECONDITION(dblBond->getBondType() == Bond::DOUBLE, "not a double bond"); if (!needsDir[dblBond->getIdx()]) { return; } needsDir.set(dblBond->getIdx(), 0); std::vector followupBonds; Bond *bond1 = nullptr, *obond1 = nullptr; bool squiggleBondSeen = false; bool doubleBondSeen = false; controllingBondFromAtom(mol, needsDir, singleBondCounts, dblBond, dblBond->getBeginAtom(), bond1, obond1, squiggleBondSeen, doubleBondSeen); // Don't do any direction setting if we've seen a squiggle bond, but do mark // the double bond as a crossed bond and return if (squiggleBondSeen) { Chirality::detail::setStereoForBond(mol, dblBond, Bond::STEREOANY); return; } if (!bond1) { return; } Bond *bond2 = nullptr, *obond2 = nullptr; controllingBondFromAtom(mol, needsDir, singleBondCounts, dblBond, dblBond->getEndAtom(), bond2, obond2, squiggleBondSeen, doubleBondSeen); // Don't do any direction setting if we've seen a squiggle bond, but do mark // the double bond as a crossed bond and return if (squiggleBondSeen) { Chirality::detail::setStereoForBond(mol, dblBond, Bond::STEREOANY); return; } if (!bond2) { return; } CHECK_INVARIANT(bond1 && bond2, "no bonds found"); bool sameTorsionDir = false; if (conf) { RDGeom::Point3D beginP = conf->getAtomPos(dblBond->getBeginAtomIdx()); RDGeom::Point3D endP = conf->getAtomPos(dblBond->getEndAtomIdx()); RDGeom::Point3D bond1P = conf->getAtomPos(bond1->getOtherAtomIdx(dblBond->getBeginAtomIdx())); RDGeom::Point3D bond2P = conf->getAtomPos(bond2->getOtherAtomIdx(dblBond->getEndAtomIdx())); // check for a linear arrangement of atoms on either end: bool linear = false; RDGeom::Point3D p1; RDGeom::Point3D p2; p1 = bond1P - beginP; p2 = endP - beginP; if (isLinearArrangement(p1, p2)) { if (!obond1) { linear = true; } else { // one of the bonds was linear; what about the other one? Bond *tBond = bond1; bond1 = obond1; obond1 = tBond; bond1P = conf->getAtomPos( bond1->getOtherAtomIdx(dblBond->getBeginAtomIdx())); p1 = bond1P - beginP; if (isLinearArrangement(p1, p2)) { linear = true; } } } if (!linear) { p1 = bond2P - endP; p2 = beginP - endP; if (isLinearArrangement(p1, p2)) { if (!obond2) { linear = true; } else { Bond *tBond = bond2; bond2 = obond2; obond2 = tBond; bond2P = conf->getAtomPos( bond2->getOtherAtomIdx(dblBond->getEndAtomIdx())); p1 = bond2P - beginP; if (isLinearArrangement(p1, p2)) { linear = true; } } } } if (linear) { Chirality::detail::setStereoForBond(mol, dblBond, Bond::STEREOANY); return; } double ang = RDGeom::computeDihedralAngle(bond1P, beginP, endP, bond2P); sameTorsionDir = ang >= M_PI / 2; // std::cerr << " angle: " << ang << " sameTorsionDir: " << sameTorsionDir // << "\n"; } else { if (dblBond->getStereo() == Bond::STEREOCIS || dblBond->getStereo() == Bond::STEREOZ) { sameTorsionDir = false; } else if (dblBond->getStereo() == Bond::STEREOTRANS || dblBond->getStereo() == Bond::STEREOE) { sameTorsionDir = true; } else { return; } // if bond1 or bond2 are not to the stereo-controlling atoms, flip // our expections of the torsion dir int bond1AtomIdx = bond1->getOtherAtomIdx(dblBond->getBeginAtomIdx()); if (bond1AtomIdx != dblBond->getStereoAtoms()[0] && bond1AtomIdx != dblBond->getStereoAtoms()[1]) { sameTorsionDir = !sameTorsionDir; } int bond2AtomIdx = bond2->getOtherAtomIdx(dblBond->getEndAtomIdx()); if (bond2AtomIdx != dblBond->getStereoAtoms()[0] && bond2AtomIdx != dblBond->getStereoAtoms()[1]) { sameTorsionDir = !sameTorsionDir; } } /* Time for some clarificatory text, because this gets really confusing really fast. The dihedral angle analysis above is based on viewing things with an atom order as follows: 1 \ 2 = 3 \ 4 so dihedrals > 90 correspond to sameDir=true however, the stereochemistry representation is based on something more like this: 2 \ 1 = 3 \ 4 (i.e. we consider the direction-setting single bonds to be starting at the double-bonded atom) */ bool reverseBondDir = sameTorsionDir; Atom *atom1 = dblBond->getBeginAtom(), *atom2 = dblBond->getEndAtom(); if (needsDir[bond1->getIdx()]) { for (auto bidx : singleBondNbrs[bond1->getIdx()]) { // std::cerr << " neighbor from: " << bond1->getIdx() << " " << bidx // << ": " << needsDir[bidx] << std::endl; if (needsDir[bidx]) { followupBonds.push_back(mol.getBondWithIdx(bidx)); } } } if (needsDir[bond2->getIdx()]) { for (auto bidx : singleBondNbrs[bond2->getIdx()]) { // std::cerr << " neighbor from: " << bond2->getIdx() << " " << bidx // << ": " << needsDir[bidx] << std::endl; if (needsDir[bidx]) { followupBonds.push_back(mol.getBondWithIdx(bidx)); } } } if (!needsDir[bond1->getIdx()]) { if (!needsDir[bond2->getIdx()]) { // check that we agree } else { if (bond1->getBeginAtom() != atom1) { reverseBondDir = !reverseBondDir; } setBondDirRelativeToAtom(bond2, atom2, bond1->getBondDir(), reverseBondDir, needsDir); } } else if (!needsDir[bond2->getIdx()]) { if (bond2->getBeginAtom() != atom2) { reverseBondDir = !reverseBondDir; } setBondDirRelativeToAtom(bond1, atom1, bond2->getBondDir(), reverseBondDir, needsDir); } else { setBondDirRelativeToAtom(bond1, atom1, Bond::ENDDOWNRIGHT, false, needsDir); setBondDirRelativeToAtom(bond2, atom2, Bond::ENDDOWNRIGHT, reverseBondDir, needsDir); } needsDir[bond1->getIdx()] = 0; needsDir[bond2->getIdx()] = 0; if (obond1 && needsDir[obond1->getIdx()]) { setBondDirRelativeToAtom(obond1, atom1, bond1->getBondDir(), bond1->getBeginAtom() == atom1, needsDir); needsDir[obond1->getIdx()] = 0; } if (obond2 && needsDir[obond2->getIdx()]) { setBondDirRelativeToAtom(obond2, atom2, bond2->getBondDir(), bond2->getBeginAtom() == atom2, needsDir); needsDir[obond2->getIdx()] = 0; } for (Bond *oDblBond : followupBonds) { updateDoubleBondNeighbors(mol, oDblBond, conf, needsDir, singleBondCounts, singleBondNbrs); } } bool isBondCandidateForStereo(const Bond *bond) { PRECONDITION(bond, "no bond"); return bond->getBondType() == Bond::DOUBLE && bond->getStereo() != Bond::STEREOANY && bond->getBondDir() != Bond::EITHERDOUBLE && bond->getBeginAtom()->getDegree() > 1u && bond->getEndAtom()->getDegree() > 1u && shouldDetectDoubleBondStereo(bond); } const Atom *findHighestCIPNeighbor(const Atom *atom, const Atom *skipAtom) { PRECONDITION(atom, "bad atom"); unsigned bestCipRank = 0; const Atom *bestCipRankedAtom = nullptr; const auto &mol = atom->getOwningMol(); for (const auto neighbor : mol.atomNeighbors(atom)) { if (neighbor == skipAtom) { continue; } unsigned cip = 0; if (!neighbor->getPropIfPresent(common_properties::_CIPRank, cip)) { // If at least one of the atoms doesn't have a CIP rank, the highest rank // does not make sense, so return a nullptr. return nullptr; } else if (cip > bestCipRank || bestCipRankedAtom == nullptr) { bestCipRank = cip; bestCipRankedAtom = neighbor; } else if (cip == bestCipRank) { // This also doesn't make sense if there is a tie (if that's possible). // We still keep the best CIP rank in case something better comes around // (also not sure if that's possible). BOOST_LOG(rdWarningLog) << "Warning: duplicate CIP ranks found in findHighestCIPNeighbor()" << std::endl; bestCipRankedAtom = nullptr; } } return bestCipRankedAtom; } } // namespace namespace Chirality { std::optional atomChiralTypeFromBondDirPseudo3D( const ROMol &mol, const Bond *bond, const Conformer *conf) { PRECONDITION(bond, "no bond"); PRECONDITION(conf, "no conformer"); auto bondDir = bond->getBondDir(); PRECONDITION(bondDir == Bond::BEGINWEDGE || bondDir == Bond::BEGINDASH, "bad bond direction"); constexpr double coordZeroTol = 1e-4; constexpr double zeroTol = 1e-3; constexpr double tShapeTol = 0.00031; // used to recognize T-shaped arrangements // corresponds to an angle between the two vectors of just under 178 degrees // degree constexpr double pseudo3DOffset = 0.1; // z-displacement for wedged bonds constexpr double volumeTolerance = 0.00174; // used to recognize zero chiral volume // This is what we get for a T-shaped arrangement with just over 178 degrees // NOTE that according to the CT file spec, wedging assigns chirality // to the atom at the point of the wedge, (atom 1 in the bond). const auto atom = bond->getBeginAtom(); PRECONDITION(atom, "no atom"); // we can't do anything with atoms that have more than 4 neighbors: if (atom->getDegree() > 4) { return Atom::CHI_UNSPECIFIED; } const auto bondAtom = bond->getEndAtom(); Atom::ChiralType res = Atom::CHI_UNSPECIFIED; auto centerLoc = conf->getAtomPos(atom->getIdx()); centerLoc.z = 0.0; auto refPt = conf->getAtomPos(bondAtom->getIdx()); // Github #7305: in some odd cases, we get conformers with // weird scalings. In these, we need to scale the 3d offset // or it might be irrelevant or dominate over the coordinates. auto refLength = (centerLoc - refPt).length(); refPt.z = bondDir == Bond::BondDir::BEGINWEDGE ? pseudo3DOffset : -pseudo3DOffset; if (refLength) { refPt.z *= refLength; } //---------------------------------------------------------- // // collect indices and bond vectors of neighbors and track whether or // not there's an H neighbor and if all bonds are single // // at the end of this process bond 0 is the input wedged bond // //---------------------------------------------------------- INT_VECT neighborBondIndices; unsigned int refIdx = mol.getNumBonds() + 1; std::vector bondVects; bool allSingle = true; unsigned int nbrIdx = 0; for (const auto nbrBond : mol.atomBonds(atom)) { const auto oAtom = nbrBond->getOtherAtom(atom); auto tmpPt = conf->getAtomPos(oAtom->getIdx()); if (nbrBond == bond) { refIdx = nbrIdx; tmpPt = refPt; } else { // theoretically we could confirm that this is a single bond, // but it's not impossible that at some point in the future we // could allow wedged multiple bonds for things like atropisomers if (nbrBond->getBeginAtomIdx() == atom->getIdx() && (nbrBond->getBondDir() == Bond::BondDir::BEGINWEDGE || nbrBond->getBondDir() == Bond::BondDir::BEGINDASH)) { // scale the 3d offset based on the reference bond here too tmpPt.z = nbrBond->getBondDir() == Bond::BondDir::BEGINWEDGE ? pseudo3DOffset : -pseudo3DOffset; if (refLength) { tmpPt.z *= refLength; } } else { tmpPt.z = 0; } // check for overly short bonds. Note that we're doing this check *after* // adjusting the z coordinate. // We want to allow atoms to overlap in x-y space if they are connected // via a wedged bond. if ((centerLoc - tmpPt).lengthSq() < zeroTol) { BOOST_LOG(rdWarningLog) << "Warning: ambiguous stereochemistry - zero-length (or near zero-length) bond - at atom " << atom->getIdx() << " ignored." << std::endl; return std::nullopt; } } ++nbrIdx; if (nbrBond->getBondType() != Bond::SINGLE) { allSingle = false; } bondVects.push_back(centerLoc.directionVector(tmpPt)); neighborBondIndices.push_back(nbrBond->getIdx()); } CHECK_INVARIANT(refIdx < mol.getNumBonds(), "could not find reference bond in neighbors"); auto nNbrs = bondVects.size(); //---------------------------------------------------------- // // Return now if there aren't at least 3 bonds to the atom. // (we can implicitly add a single H to 3 coordinate atoms). // //---------------------------------------------------------- if (nNbrs < 3 || nNbrs > 4) { return std::nullopt; } //---------------------------------------------------------- // Check for neighbor atoms which overlap //---------------------------------------------------------- for (auto i = 0u; i < nNbrs; ++i) { for (auto j = 0u; j < i; ++j) { if ((bondVects[i] - bondVects[j]).lengthSq() < zeroTol) { BOOST_LOG(rdWarningLog) << "Warning: ambiguous stereochemistry - overlapping neighbors - at atom " << atom->getIdx() << " ignored" << std::endl; return std::nullopt; } } } //---------------------------------------------------------- // // Continue if there are all single bonds or if we're considering // 4-coordinate P or S // //---------------------------------------------------------- if (allSingle || atom->getAtomicNum() == 15 || atom->getAtomicNum() == 16) { double vol; unsigned int order[4] = {0, 1, 2, 3}; double prefactor = 1; if (refIdx != 0) { // bring the wedged bond to the front so that we always consider it std::swap(order[0], order[refIdx]); prefactor *= -1; } // check for the case that bonds 1 and 2 are co-linear but 1 and 0 are // not: if (nNbrs > 3 && bondVects[order[1]].crossProduct(bondVects[order[2]]).lengthSq() < 10 * zeroTol && bondVects[order[1]].crossProduct(bondVects[order[0]]).lengthSq() > 10 * zeroTol) { bondVects[order[1]].z = bondVects[order[0]].z * -1; // that bondVect is no longer normalized, but this hopefully won't break // anything } //---------------------------------------------------------- // // order the bonds so that the rotation order is: // 0 - 1 - 2 for three coordinate // or // 0 - 1 - 2 - 3 for four coordinate // // this makes the rest of the code a lot simpler // //---------------------------------------------------------- // checks to see if the vectors 1 and 2 need to have their order // relative to vector 0 swapped. // we don't actually pass the vectors in, but use their cross products // and dot products to vector 0 to figure out if they need to be swapped #if defined(__clang__) // Clang apparently doesn't need to capture the constexpr zeroTol, and complains // about it being specified, but MSVC does need it, and removing it will break // the build #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-lambda-capture" #endif auto needsSwap = [&zeroTol](const RDGeom::Point3D &cp01, const RDGeom::Point3D &cp02, double dp01, double dp02) -> bool { if (fabs(dp01) - 1 > -zeroTol) { if (cp02.z < 0) { return true; } return false; } if (fabs(dp02) - 1 > -zeroTol) { if (cp01.z < 0) { return true; } } if ((cp01.z * cp02.z) < -zeroTol) { if (cp01.z < cp02.z) { return true; } return false; } if (dp01 * dp02 < -zeroTol) { if (dp01 < dp02) { return true; } return false; } return fabs(dp01) > fabs(dp02); }; #if defined(__clang__) #pragma GCC diagnostic pop #endif if (nNbrs == 3) { // this case is simple, we either need to swap vectors 1 and 2 or we // don't: auto cp01 = bondVects[order[0]].crossProduct(bondVects[order[1]]); auto cp02 = bondVects[order[0]].crossProduct(bondVects[order[2]]); auto dp01 = bondVects[order[0]].dotProduct(bondVects[order[1]]); auto dp02 = bondVects[order[0]].dotProduct(bondVects[order[2]]); if (needsSwap(cp01, cp02, dp01, dp02)) { std::swap(order[1], order[2]); prefactor *= -1; } } else if (nNbrs > 3) { // here there are more permutations. Rather than hand-coding all of them // we'll just sort bonds 1, 2, and 3 based on their cross- and dot- // products to bond 0 std::vector> orderedBonds(3); for (auto i = 1u; i < 4; ++i) { auto cp0i = bondVects[order[0]].crossProduct(bondVects[order[i]]); auto sgn = cp0i.z < -zeroTol ? -1 : 1; auto dp0i = bondVects[order[0]].dotProduct(bondVects[order[i]]); orderedBonds[i - 1] = std::make_tuple(sgn, sgn * dp0i, order[i]); } std::sort(orderedBonds.rbegin(), orderedBonds.rend()); // update the order array and figure out whether or not we've done a // cyclic permutation auto nChanged = 0; for (auto i = 1u; i < 4; ++i) { auto ni = std::get<2>(orderedBonds[i - 1]); if (order[i] != ni) { order[i] = ni; ++nChanged; } } if (nChanged == 2) { // this is always an acyclic permutation prefactor *= -1; } } // std::cerr<<"ORDER "<getIdx() << " ignored." << std::endl; return std::nullopt; } } } } // three-coordinate special cases where chirality cannot be determined // // Case 1: // this one is never allowed with different directions for the bonds to 1 // and 2 // 0 2 // \ / // C // * // 1 // This is ST-1.2.10 in the IUPAC guidelines // // Case 2: all bonds are wedged in the same direction if (nNbrs == 3) { bool conflict = false; if (bondVects[order[1]].z * bondVects[order[0]].z < -coordZeroTol && fabs(bondVects[order[2]].z) < coordZeroTol) { conflict = bondVects[order[2]].crossProduct(bondVects[order[0]]).z * bondVects[order[2]].crossProduct(bondVects[order[1]]).z < -1e-4; } else if (bondVects[order[2]].z * bondVects[order[0]].z < -coordZeroTol && fabs(bondVects[order[1]].z) < coordZeroTol) { conflict = bondVects[order[1]].crossProduct(bondVects[order[0]]).z * bondVects[order[1]].crossProduct(bondVects[order[2]]).z < -coordZeroTol; } if (conflict) { BOOST_LOG(rdWarningLog) << "Warning: conflicting stereochemistry - bond wedging contradiction - at atom " << atom->getIdx() << " ignored" << std::endl; return std::nullopt; } } // for the purposes of the cross products we ignore any pseudo-3D // coordinates auto bv1 = bondVects[order[1]]; bv1.z = 0; auto bv2 = bondVects[order[2]]; bv2.z = 0; auto crossp1 = bv1.crossProduct(bv2); // catch linear arrangements if (nNbrs == 3) { if (crossp1.lengthSq() < tShapeTol) { // in a linear relationship with three neighbors we assume that the // two perpendicular bonds are wedged in the other direction from the // one that was provided. // that's this situation: // // 0 // | <- wedged up // 1--C--2 // // here we assume that bonds C-1 and C-2 are wedged down // // ST-1.2.12 of the IUPAC guidelines says that this form is wrong since // it's for a "T-shaped" configuration instead of a tetrahedron, but it // shows up fairly frequently, particularly with fused ring systems bv1.z = -bondVects[order[0]].z; bv2.z = -bondVects[order[0]].z; crossp1 = bv1.crossProduct(bv2); } } else if (crossp1.lengthSq() < 10 * zeroTol) { // if the other bond is flat: if (fabs(bondVects[order[3]].z) < coordZeroTol) { // By construction this is a neighboring bond, so make it the opposite // wedging from us. bondVects[order[3]].z = -1 * bondVects[order[0]].z; // that bondVect is no longer normalized, but this hopefully won't break // anything } } vol = crossp1.dotProduct(bondVects[order[0]]); if (nNbrs == 4) { const auto dotp1 = bondVects[order[1]].dotProduct(bondVects[order[2]]); // for the purposes of the cross products we ignore any pseudo-3D // coordinates auto bv3 = bondVects[order[3]]; bv3.z = 0; const auto crossp2 = bv1.crossProduct(bv3); const auto dotp2 = bondVects[order[1]].dotProduct(bondVects[order[3]]); auto vol2 = crossp2.dotProduct(bondVects[order[0]]); // detect the case where there's no chiral volume for the default // evaluation if (fabs(vol) < zeroTol) { // and check the other evaluation: if (fabs(vol2) < zeroTol) { BOOST_LOG(rdWarningLog) << "Warning: ambiguous stereochemistry - no chiral volume - at atom " << atom->getIdx() << " ignored" << std::endl; return std::nullopt; } vol = vol2; prefactor *= -1; } else if (vol * vol2 > 0 && fabs(vol2) > volumeTolerance && dotp1 < dotp2) { // both volumes give the same answer, but in the second case the cross // product is between two bonds with a better dot product vol = vol2; prefactor *= -1; } else if (fabs(vol) < volumeTolerance && fabs(vol2) > volumeTolerance) { // if the first volume is too small, but the second isn't, take the // second if (vol * vol2 < 0) { prefactor *= -1; } vol = vol2; } } vol *= prefactor; // std::cerr << " final " << vol << std::endl; // at this point we can assign our atomic stereo based on the sign of the // chiral volume if (vol > volumeTolerance) { res = Atom::ChiralType::CHI_TETRAHEDRAL_CCW; } else if (vol < -volumeTolerance) { res = Atom::ChiralType::CHI_TETRAHEDRAL_CW; } else { BOOST_LOG(rdWarningLog) << "Warning: ambiguous stereochemistry - zero final chiral volume - at atom " << atom->getIdx() << " ignored" << std::endl; return std::nullopt; } } return res; } #ifdef _WIN32 int setenv(const char *name, const char *value, int) { return _putenv_s(name, value); } #endif void setAllowNontetrahedralChirality(bool val) { if (val) { setenv(nonTetrahedralStereoEnvVar, "1", 1); } else { setenv(nonTetrahedralStereoEnvVar, "0", 1); } } bool getAllowNontetrahedralChirality() { return getValFromEnvironment(nonTetrahedralStereoEnvVar, nonTetrahedralStereoDefaultVal); } void setUseLegacyStereoPerception(bool val) { if (val) { setenv(useLegacyStereoEnvVar, "1", 1); } else { setenv(useLegacyStereoEnvVar, "0", 1); } } bool getUseLegacyStereoPerception() { return getValFromEnvironment(useLegacyStereoEnvVar, useLegacyStereoDefaultVal); } namespace detail { bool bondAffectsAtomChirality(const Bond *bond, const Atom *atom) { // FIX consider how to handle organometallics PRECONDITION(bond, "bad bond pointer"); PRECONDITION(atom, "bad atom pointer"); if (bond->getBondType() == Bond::BondType::UNSPECIFIED || bond->getBondType() == Bond::BondType::ZERO || (bond->getBondType() == Bond::BondType::DATIVE && bond->getBeginAtomIdx() == atom->getIdx())) { return false; } return true; } unsigned int getAtomNonzeroDegree(const Atom *atom) { PRECONDITION(atom, "bad pointer"); PRECONDITION(atom->hasOwningMol(), "no owning molecule"); unsigned int res = 0; for (auto bond : atom->getOwningMol().atomBonds(atom)) { if (!bondAffectsAtomChirality(bond, atom)) { continue; } ++res; } return res; } bool has_protium_neighbor(const ROMol &mol, const Atom *atom) { for (const auto nbr : mol.atomNeighbors(atom)) { if (is_regular_h(*nbr)) { return true; } } return false; } void setStereoForBond(ROMol &mol, Bond *bond, Bond::BondStereo stereo, bool useCXSmilesOrdering) { // NOTE: moved from parse_doublebond_stereo CXSmilesOps // IF useCXSmilesOrdering is true, the cis/trans/unknown marker will be // assigned relative to the lowest-numbered neighbor of each double bond atom. // Otherwise it uses the lowest-numbered neighbor on the lower-numbered atom // of the double bond and the highest-numbered neighbor on the higher-numbered // atom auto begAtom = bond->getBeginAtom(); auto endAtom = bond->getEndAtom(); if (begAtom->getIdx() > endAtom->getIdx()) { std::swap(begAtom, endAtom); } if (begAtom->getDegree() > 1 && endAtom->getDegree() > 1) { unsigned int begControl = mol.getNumAtoms(); for (auto nbr : mol.atomNeighbors(begAtom)) { if (nbr == endAtom) { continue; } begControl = std::min(nbr->getIdx(), begControl); } unsigned int endControl = useCXSmilesOrdering ? mol.getNumAtoms() : 0; for (auto nbr : mol.atomNeighbors(endAtom)) { if (nbr == begAtom) { continue; } endControl = useCXSmilesOrdering ? std::min(nbr->getIdx(), endControl) : std::max(nbr->getIdx(), endControl); } if (begAtom != bond->getBeginAtom()) { std::swap(begControl, endControl); } bond->setStereoAtoms(begControl, endControl); bond->setStereo(stereo); mol.setProp("_needsDetectBondStereo", 1); } } } // namespace detail typedef std::pair INT_PAIR; typedef std::vector INT_PAIR_VECT; typedef std::vector::iterator INT_PAIR_VECT_I; typedef std::vector::const_iterator INT_PAIR_VECT_CI; typedef INT_VECT CIP_ENTRY; typedef std::vector CIP_ENTRY_VECT; template void debugVect(const std::vector arg) { typename std::vector::const_iterator viIt; std::stringstream outS; for (viIt = arg.begin(); viIt != arg.end(); viIt++) { outS << *viIt << " "; } BOOST_LOG(rdDebugLog) << outS.str() << std::endl; } // -------------------------------------------------- // // Calculates chiral invariants for the atoms of a molecule // These are based on Labute's proposal in: // "An Efficient Algorithm for the Determination of Topological // RS Chirality" Journal of the CCG (1996) // // -------------------------------------------------- void buildCIPInvariants(const ROMol &mol, DOUBLE_VECT &res) { PRECONDITION(res.size() >= mol.getNumAtoms(), "res vect too small"); int atsSoFar = 0; // // NOTE: // If you make modifications to this, keep in mind that it is // essential that the initial comparison of ranks behave properly. // So, though it seems like it would makes sense to include // information about the number of Hs (or charge, etc) in the CIP // invariants, this will result in bad rankings. For example, in // this molecule: OC[C@H](C)O, including the number of Hs would // cause the methyl group (atom 3) to be ranked higher than the CH2 // connected to O (atom 1). This is totally wrong. // // We also don't include any pre-existing stereochemistry information. // Though R and S assignments do factor in to the priorities of atoms, // we're starting here from scratch and we'll let the R and S stuff // be taken into account during the iterations. // for (const auto atom : mol.atoms()) { const unsigned short nMassBits = 10; const unsigned short maxMass = 1 << nMassBits; unsigned long invariant = 0; int num = atom->getAtomicNum() % 128; // get an int with the deviation in the mass from the default: int mass = 0; if (atom->getIsotope()) { mass = atom->getIsotope() - PeriodicTable::getTable()->getMostCommonIsotope(atom->getAtomicNum()); if (mass >= 0) { mass += 1; } } mass += maxMass / 2; if (mass < 0) { mass = 0; } else { mass = mass % maxMass; } invariant = num; // 7 bits here invariant = (invariant << nMassBits) | mass; int mapnum = -1; atom->getPropIfPresent(common_properties::molAtomMapNumber, mapnum); mapnum = (mapnum + 1) % 1024; // increment to allow map numbers of zero // (though that would be stupid) invariant = (invariant << 10) | mapnum; res[atsSoFar++] = invariant; } } //! Lightweight sortable wrapper that references a CIP entry and keeps track of //! the current rank. struct SortableCIPReference { SortableCIPReference(CIP_ENTRY *cipRef, const int atomIdx) : cip(cipRef), atomIdx(atomIdx) { CHECK_INVARIANT(cip != nullptr, "null CIP entry"); } SortableCIPReference(SortableCIPReference &&other) noexcept { cip = other.cip; atomIdx = other.atomIdx; other.cip = nullptr; currRank = other.currRank; } SortableCIPReference &operator=(SortableCIPReference &&other) noexcept { if (this == &other) { return *this; } cip = other.cip; atomIdx = other.atomIdx; other.cip = nullptr; currRank = other.currRank; return *this; } bool operator==(const SortableCIPReference &rhs) const { PRECONDITION(cip != nullptr, "null CIP entry"); PRECONDITION(rhs.cip != nullptr, "null CIP entry"); return *cip == *rhs.cip; } bool operator<(const SortableCIPReference &rhs) const { PRECONDITION(cip != nullptr, "null CIP entry"); PRECONDITION(rhs.cip != nullptr, "null CIP entry"); return *cip < *rhs.cip; } CIP_ENTRY *cip = nullptr; int atomIdx = -1; int currRank = -1; }; //! Iterate over sorted entries, track tied regions and assign ranks. //! \param sortedEntries CIP entries //! \param res Pairs of start, end index of tied atoms //! \param numIndependentEntries The number of unique ranks. void findSegmentsToResort(std::vector &sortedEntries, std::vector> &res, unsigned int &numIndependentEntries) { res.clear(); numIndependentEntries = rdcast(sortedEntries.size()); SortableCIPReference *current = &sortedEntries.front(); int runningRank = 0; current->currRank = runningRank; bool inEqualSection = false; for (size_t i = 1; i < sortedEntries.size(); i++) { SortableCIPReference &entry = sortedEntries[i]; if (*current == entry) { entry.currRank = runningRank; numIndependentEntries--; // Case where we need to open a section if (!inEqualSection) { inEqualSection = true; auto &[firstIndex, _] = res.emplace_back(); // Go back to the first in this section, we only catch at first + 1 firstIndex = i - 1; } else { // Case where we are already in a section, nullop } } else { // Case where we're closing an open section. runningRank++; entry.currRank = runningRank; current = &entry; if (inEqualSection) { auto &[_, finalIndex] = res.back(); finalIndex = i; inEqualSection = false; } } } // Handle currently open. if (inEqualSection) { auto &[_, finalIndex] = res.back(); finalIndex = sortedEntries.size() - 1; } } struct PrecomputedBondFeatures { //! Pairs of {atom index, counts}, strided by 8 for each atom. std::vector> countsAndNeighborIndices; //! Number of neighbors per atom. std::vector numNeighbors; }; constexpr int kMaxBonds = 16; //! Lookup neighbor indices and compute counts for each atom. PrecomputedBondFeatures computeBondFeatures(const ROMol &mol) { PrecomputedBondFeatures features; const unsigned int numAtoms = mol.getNumAtoms(); features.countsAndNeighborIndices.resize(numAtoms * kMaxBonds); features.numNeighbors.resize(numAtoms, 0); for (size_t atomIdx = 0; atomIdx < numAtoms; atomIdx++) { int indexOffset = atomIdx * kMaxBonds; for (const auto bond : mol.atomBonds(mol[atomIdx])) { const unsigned int nbrIdx = bond->getOtherAtomIdx(atomIdx); features.numNeighbors[nbrIdx]++; auto &[count, neighborIndex] = features.countsAndNeighborIndices.at(indexOffset); neighborIndex = nbrIdx; // put the neighbor in 2N times where N is the bond order as a double. // this is to treat aromatic linkages on fair footing. i.e. at least in // the first iteration --c(:c):c and --C(=C)-C should look the same. // this was part of issue 3009911 // a special case for chiral phosphorus compounds // (this was leading to incorrect assignment of R/S labels ): bool isChiralPhosphorusSpecialCase = false; if (bond->getBondType() == Bond::DOUBLE) { const Atom *nbr = mol[nbrIdx]; if (nbr->getAtomicNum() == 15) { unsigned int nbrDeg = nbr->getDegree(); isChiralPhosphorusSpecialCase = nbrDeg == 3 || nbrDeg == 4; } }; // general justification of this is: // Paragraph 2.2. in the 1966 article is "Valence-Bond Conventions: // Multiple-Bond Unsaturation and Aromaticity". It contains several // conventions of which convention (b) is the one applying here: // "(b) Contributions by d orbitals to bonds of quadriligant atoms are // neglected." // FIX: this applies to more than just P if (isChiralPhosphorusSpecialCase) { count += 1; } else { count += getTwiceBondType(*bond); } ++indexOffset; } } return features; } void recomputeRanks(const std::vector &sortedEntries, std::vector &ranks) { for (size_t rank = 0; rank < ranks.size(); ++rank) { const auto &cipEntry = sortedEntries[rank]; ranks[cipEntry.atomIdx] = cipEntry.currRank; } } void iterateCIPRanks(const ROMol &mol, const DOUBLE_VECT &invars, UINT_VECT &ranks, bool seedWithInvars) { PRECONDITION(invars.size() == mol.getNumAtoms(), "bad invars size"); PRECONDITION(ranks.size() >= mol.getNumAtoms(), "bad ranks size"); unsigned int numAtoms = mol.getNumAtoms(); CIP_ENTRY_VECT cipEntries(numAtoms); for (auto &vec : cipEntries) { vec.reserve(16); } std::vector sortableEntries; sortableEntries.reserve(numAtoms); for (size_t i = 0; i < cipEntries.size(); i++) { sortableEntries.emplace_back(&cipEntries[i], i); } #ifdef VERBOSE_CANON BOOST_LOG(rdDebugLog) << "invariants:" << std::endl; for (unsigned int i = 0; i < numAtoms; i++) { BOOST_LOG(rdDebugLog) << i << ": " << invars[i] << std::endl; } #endif for (unsigned int i = 0; i < numAtoms; i++) { cipEntries[i].push_back(static_cast(invars[i])); } unsigned int numRanks; std::sort(sortableEntries.begin(), sortableEntries.end()); std::vector> needsSorting; findSegmentsToResort(sortableEntries, needsSorting, numRanks); recomputeRanks(sortableEntries, ranks); #ifdef VERBOSE_CANON BOOST_LOG(rdDebugLog) << "initial ranks:" << std::endl; for (unsigned int i = 0; i < numAtoms; ++i) { BOOST_LOG(rdDebugLog) << i << ": " << ranks[i] << std::endl; } #endif // Start each atom's rank vector with its atomic number: // Note: in general one should avoid the temptation to // use invariants here, those lead to incorrect answers for (unsigned int i = 0; i < numAtoms; i++) { if (seedWithInvars) { cipEntries[i][0] = static_cast(invars[i]); } else { cipEntries[i][0] = mol[i]->getAtomicNum(); cipEntries[i].push_back(static_cast(ranks[i])); } } // Based on above seeding, the rank will be set at index 1 or 2. const int cipRankIndex = seedWithInvars ? 1 : 2; // Loop until either: // 1) all classes are uniquified // 2) the number of ranks doesn't change from one iteration to // the next // 3) we've gone through maxIts times // maxIts is calculated by dividing the number of atoms // by 2. That's a pessimal version of the // maximum number of steps required for two atoms to // "feel" each other (each influences one additional // neighbor shell per iteration). unsigned int maxIts = numAtoms / 2 + 1; unsigned int numIts = 0; int lastNumRanks = -1; PrecomputedBondFeatures bondFeatures = computeBondFeatures(mol); while (!needsSorting.empty() && numIts < maxIts && (lastNumRanks < 0 || static_cast(lastNumRanks) < numRanks)) { // ---------------------------------------------------- // // for each atom, get a sorted list of its neighbors' ranks: // for (unsigned int index = 0; index < numAtoms; ++index) { const unsigned int indexOffset = kMaxBonds * index; const int numNeighbors = bondFeatures.numNeighbors[index]; auto *sortBegin = &bondFeatures.countsAndNeighborIndices[indexOffset]; auto *sortEnd = sortBegin + numNeighbors + 1; // For each of our neighbors' ranks weighted by bond type, copy it N times // to our cipEntry in reverse rank order, where N is the weight. if (numNeighbors > 1) { // compare vs 1 for performance. std::sort(sortBegin, sortEnd, [&ranks](const std::pair &countAndIdx1, const std::pair &countAndIdx2) { return ranks[countAndIdx1.second] > ranks[countAndIdx2.second]; }); } auto &cipEntry = cipEntries[index]; for (auto *iter = sortBegin; iter != sortEnd; ++iter) { const auto &[count, idx] = *iter; cipEntry.insert(cipEntry.end(), count, ranks[idx] + 1); } // add a zero for each coordinated H as long as we're not a query atom if (!mol[index]->hasQuery()) { cipEntry.insert(cipEntry.end(), mol[index]->getTotalNumHs(), 0); } } // ---------------------------------------------------- // // sort the new ranks and update the list of active indices: // lastNumRanks = numRanks; // Loop through previously tied atom sections and re-sort. for (const auto &[firstIdx, lastIdx] : needsSorting) { std::sort(sortableEntries.begin() + firstIdx, sortableEntries.begin() + lastIdx + 1); } findSegmentsToResort(sortableEntries, needsSorting, numRanks); // Map out of order rankings back to the absolute rankings vector. recomputeRanks(sortableEntries, ranks); // now truncate each vector and stick the rank at the end if (static_cast(lastNumRanks) != numRanks) { for (unsigned int i = 0; i < numAtoms; ++i) { cipEntries[i].resize(cipRankIndex + 1); cipEntries[i][cipRankIndex] = ranks[i]; } } ++numIts; #ifdef VERBOSE_CANON BOOST_LOG(rdDebugLog) << "strings and ranks:" << std::endl; for (unsigned int i = 0; i < numAtoms; i++) { BOOST_LOG(rdDebugLog) << i << ": " << ranks[i] << " > "; debugVect(cipEntries[i]); } #endif } } // Figure out the CIP ranks for the atoms of a molecule void assignAtomCIPRanks(const ROMol &mol, UINT_VECT &ranks) { PRECONDITION((!ranks.size() || ranks.size() >= mol.getNumAtoms()), "bad ranks size"); if (!ranks.size()) { ranks.resize(mol.getNumAtoms()); } unsigned int numAtoms = mol.getNumAtoms(); #ifndef USE_NEW_STEREOCHEMISTRY // get the initial invariants: DOUBLE_VECT invars(numAtoms, 0); buildCIPInvariants(mol, invars); iterateCIPRanks(mol, invars, ranks, false); #else Canon::chiralRankMolAtoms(mol, ranks); #endif // copy the ranks onto the atoms: for (unsigned int i = 0; i < numAtoms; ++i) { mol[i]->setProp(common_properties::_CIPRank, ranks[i], 1); } } // construct a vector with pairs for // neighbors of a given atom. This list will only be // non-empty if at least one of the bonds has its direction // set. void findAtomNeighborDirHelper(const ROMol &mol, const Atom *atom, const Bond *refBond, UINT_VECT &ranks, INT_PAIR_VECT &neighbors, bool &hasExplicitUnknownStereo) { PRECONDITION(atom, "bad atom"); PRECONDITION(refBond, "bad bond"); bool seenDir = false; for (const auto bond : mol.atomBonds(atom)) { // check whether this bond is explicitly set to have unknown stereo if (!hasExplicitUnknownStereo) { int explicit_unknown_stereo; if (bond->getBondDir() == Bond::UNKNOWN // there's a squiggle bond || (bond->getPropIfPresent(common_properties::_UnknownStereo, explicit_unknown_stereo) && explicit_unknown_stereo)) { hasExplicitUnknownStereo = true; } } Bond::BondDir dir = bond->getBondDir(); if (bond->getIdx() != refBond->getIdx()) { if (dir == Bond::ENDDOWNRIGHT || dir == Bond::ENDUPRIGHT) { seenDir = true; // If we're considering the bond "backwards", (i.e. from end // to beginning, reverse the effective direction: if (atom != bond->getBeginAtom()) { if (dir == Bond::ENDDOWNRIGHT) { dir = Bond::ENDUPRIGHT; } else { dir = Bond::ENDDOWNRIGHT; } } } Atom *nbrAtom = bond->getOtherAtom(atom); neighbors.push_back(std::make_pair(nbrAtom->getIdx(), dir)); } } if (!seenDir) { neighbors.clear(); } else { if (neighbors.size() == 2 && ranks[neighbors[0].first] == ranks[neighbors[1].first]) { // the two substituents are identical, no stereochemistry here: neighbors.clear(); } else { // it's possible that direction was set only one of the bonds, set the // other // bond's direction to be reversed: if (neighbors[0].second != Bond::ENDDOWNRIGHT && neighbors[0].second != Bond::ENDUPRIGHT) { CHECK_INVARIANT(neighbors.size() > 1, "too few neighbors"); neighbors[0].second = neighbors[1].second == Bond::ENDDOWNRIGHT ? Bond::ENDUPRIGHT : Bond::ENDDOWNRIGHT; } else if (neighbors.size() > 1 && neighbors[1].second != Bond::ENDDOWNRIGHT && neighbors[1].second != Bond::ENDUPRIGHT) { neighbors[1].second = neighbors[0].second == Bond::ENDDOWNRIGHT ? Bond::ENDUPRIGHT : Bond::ENDDOWNRIGHT; } } } } // find the neighbors for an atoms that are not connected by single bond that is // not refBond // if checkDir is true only neighbor atoms with bonds marked with a direction // will be returned void findAtomNeighborsHelper(const ROMol &mol, const Atom *atom, const Bond *refBond, UINT_VECT &neighbors, bool checkDir = false, bool includeAromatic = false) { PRECONDITION(atom, "bad atom"); PRECONDITION(refBond, "bad bond"); neighbors.clear(); for (const auto bond : mol.atomBonds(atom)) { if (bond == refBond) { continue; } Bond::BondDir dir = bond->getBondDir(); if (bond->getBondType() == Bond::SINGLE || (includeAromatic && bond->getBondType() == Bond::AROMATIC)) { if (checkDir) { if ((dir != Bond::ENDDOWNRIGHT) && (dir != Bond::ENDUPRIGHT)) { continue; } } Atom *nbrAtom = bond->getOtherAtom(atom); neighbors.push_back(nbrAtom->getIdx()); } } } // conditions for an atom to be a candidate for ring stereochem: // 1) two non-ring neighbors that have different ranks (the ring neighbors // will have the same rank) // 2) one non-ring neighbor and two ring neighbors (the ring neighbors will // have the same rank) // 3) four ring neighbors with three different ranks // 4) three ring neighbors with two different ranks // example for this last one: C[C@H]1CC2CCCC3CCCC(C1)[C@@H]23 // Note that N atoms are only candidates if they are in a 3-ring bool atomIsCandidateForRingStereochem( const ROMol &mol, const Atom *atom, const std::vector &atomRanks) { PRECONDITION(atom, "bad atom"); bool res = false; if (!atom->getPropIfPresent(common_properties::_ringStereochemCand, res)) { const RingInfo *ringInfo = mol.getRingInfo(); if (ringInfo->isInitialized() && ringInfo->numAtomRings(atom->getIdx())) { // three-coordinate N additional requirements: // in a ring of size 3 (from InChI) // OR // a bridgehead (RDKit extension) if (atom->getAtomicNum() == 7 && atom->getTotalDegree() == 3 && !ringInfo->isAtomInRingOfSize(atom->getIdx(), 3) && !queryIsAtomBridgehead(atom)) { return false; } std::vector nonRingNbrs; std::vector ringNbrs; std::set ringNbrRanks; for (const auto bond : mol.atomBonds(atom)) { if (!ringInfo->numBondRings(bond->getIdx())) { nonRingNbrs.push_back(bond->getOtherAtom(atom)); } else { const Atom *nbr = bond->getOtherAtom(atom); ringNbrs.push_back(nbr); ringNbrRanks.insert(atomRanks[nbr->getIdx()]); } } // std::cerr << "!!!! " << atom->getIdx() << " " << ringNbrRanks.size() << // " " // << ringNbrs.size() << " " << nonRingNbrs.size() << std::endl; switch (nonRingNbrs.size()) { case 2: // the ranks of the non ring neighbors must be different AND // the ranks of the ring neighbors must be the same (see issue #8956) res = atomRanks[nonRingNbrs[0]->getIdx()] != atomRanks[nonRingNbrs[1]->getIdx()]; res &= (ringNbrs.size() != ringNbrRanks.size()); break; case 1: if (ringNbrs.size() > ringNbrRanks.size()) { res = true; } break; case 0: if (ringNbrs.size() == 4 && ringNbrRanks.size() == 3) { res = true; } else if (ringNbrs.size() == 3 && ringNbrRanks.size() == 2) { res = true; } else { res = false; } break; default: res = false; } } // std::cerr<<" candidate? "<setProp(common_properties::_ringStereochemCand, res, 1); } return res; } // finds all possible chiral special cases. // at the moment this is just candidates for ring stereochemistry void findChiralAtomSpecialCases(ROMol &mol, boost::dynamic_bitset<> &possibleSpecialCases, const std::vector &atomRanks) { PRECONDITION(possibleSpecialCases.size() >= mol.getNumAtoms(), "bit vector too small"); possibleSpecialCases.reset(); if (!mol.getRingInfo()->isSymmSssr()) { VECT_INT_VECT sssrs; MolOps::symmetrizeSSSR(mol, sssrs); } boost::dynamic_bitset<> atomsSeen(mol.getNumAtoms()); boost::dynamic_bitset<> atomsUsed(mol.getNumAtoms()); boost::dynamic_bitset<> bondsSeen(mol.getNumBonds()); for (const auto atom : mol.atoms()) { if (atomsSeen[atom->getIdx()]) { continue; } if (atom->getChiralTag() == Atom::CHI_UNSPECIFIED || atom->hasProp(common_properties::_CIPCode) || !mol.getRingInfo()->numAtomRings(atom->getIdx()) || !atomIsCandidateForRingStereochem(mol, atom, atomRanks)) { continue; } // do a BFS from this ring atom along ring bonds and find other // stereochemistry candidates. std::list nextAtoms; // start with finding viable neighbors for (const auto bond : mol.atomBonds(atom)) { unsigned int bidx = bond->getIdx(); if (!bondsSeen[bidx]) { bondsSeen.set(bidx); if (mol.getRingInfo()->numBondRings(bidx)) { const Atom *oatom = bond->getOtherAtom(atom); if (!atomsSeen[oatom->getIdx()]) { nextAtoms.push_back(oatom); atomsUsed.set(oatom->getIdx()); } } } } INT_VECT ringStereoAtoms(0); if (!nextAtoms.empty()) { atom->getPropIfPresent(common_properties::_ringStereoAtoms, ringStereoAtoms); } while (!nextAtoms.empty()) { const Atom *ratom = nextAtoms.front(); nextAtoms.pop_front(); atomsSeen.set(ratom->getIdx()); if (ratom->getChiralTag() != Atom::CHI_UNSPECIFIED && !ratom->hasProp(common_properties::_CIPCode) && atomIsCandidateForRingStereochem(mol, ratom, atomRanks)) { int same = (ratom->getChiralTag() == atom->getChiralTag()) ? 1 : -1; ringStereoAtoms.push_back(same * (ratom->getIdx() + 1)); INT_VECT oringatoms(0); ratom->getPropIfPresent(common_properties::_ringStereoAtoms, oringatoms); oringatoms.push_back(same * (atom->getIdx() + 1)); ratom->setProp(common_properties::_ringStereoAtoms, oringatoms, true); possibleSpecialCases.set(ratom->getIdx()); possibleSpecialCases.set(atom->getIdx()); } // now push this atom's neighbors for (const auto bond : mol.atomBonds(ratom)) { unsigned int bidx = bond->getIdx(); if (!bondsSeen[bidx]) { bondsSeen.set(bidx); if (mol.getRingInfo()->numBondRings(bidx)) { const Atom *oatom = bond->getOtherAtom(ratom); if (!atomsSeen[oatom->getIdx()] && !atomsUsed[oatom->getIdx()]) { nextAtoms.push_back(oatom); atomsUsed.set(oatom->getIdx()); } } } } } // end of BFS if (ringStereoAtoms.size() != 0) { atom->setProp(common_properties::_ringStereoAtoms, ringStereoAtoms, true); // because we're only going to hit each ring atom once, the first atom we // encounter in a ring is going to end up with all the other atoms set as // stereoAtoms, but each of them will only have the first atom present. We // need to fix that. because the traverse from the first atom only // followed ring bonds, these things are all by definition in one ring // system. (Q: is this true if there's a spiro center in there?) INT_VECT same(mol.getNumAtoms(), 0); for (auto ringAtomEntry : ringStereoAtoms) { int ringAtomIdx = ringAtomEntry < 0 ? -ringAtomEntry - 1 : ringAtomEntry - 1; same[ringAtomIdx] = ringAtomEntry; } for (INT_VECT_CI rae = ringStereoAtoms.begin(); rae != ringStereoAtoms.end(); ++rae) { int ringAtomEntry = *rae; int ringAtomIdx = ringAtomEntry < 0 ? -ringAtomEntry - 1 : ringAtomEntry - 1; INT_VECT lringatoms(0); mol.getAtomWithIdx(ringAtomIdx) ->getPropIfPresent(common_properties::_ringStereoAtoms, lringatoms); CHECK_INVARIANT(lringatoms.size() > 0, "no other ring atoms found."); for (auto orae = rae + 1; orae != ringStereoAtoms.end(); ++orae) { int oringAtomEntry = *orae; int oringAtomIdx = oringAtomEntry < 0 ? -oringAtomEntry - 1 : oringAtomEntry - 1; int theseDifferent = (ringAtomEntry < 0) ^ (oringAtomEntry < 0); lringatoms.push_back(theseDifferent ? -(oringAtomIdx + 1) : (oringAtomIdx + 1)); INT_VECT olringatoms(0); mol.getAtomWithIdx(oringAtomIdx) ->getPropIfPresent(common_properties::_ringStereoAtoms, olringatoms); CHECK_INVARIANT(olringatoms.size() > 0, "no other ring atoms found."); olringatoms.push_back(theseDifferent ? -(ringAtomIdx + 1) : (ringAtomIdx + 1)); mol.getAtomWithIdx(oringAtomIdx) ->setProp(common_properties::_ringStereoAtoms, olringatoms); } mol.getAtomWithIdx(ringAtomIdx) ->setProp(common_properties::_ringStereoAtoms, lringatoms); } } else { possibleSpecialCases.reset(atom->getIdx()); } atomsSeen.set(atom->getIdx()); } } std::pair isAtomPotentialChiralCenter( const Atom *atom, const ROMol &mol, const UINT_VECT &ranks, Chirality::INT_PAIR_VECT &nbrs) { // loop over all neighbors and form a decorated list of their // ranks: bool legalCenter = true; bool hasDupes = false; auto nzDegree = Chirality::detail::getAtomNonzeroDegree(atom); auto tnzDegree = nzDegree + atom->getTotalNumHs(); if (tnzDegree > 4) { // we only know tetrahedral chirality legalCenter = false; } else { // cases we can exclude immediately without having to look at neighbors // ranks: if (tnzDegree < 3) { legalCenter = false; } else if (nzDegree < 3 && (atom->getAtomicNum() != 15 && atom->getAtomicNum() != 33)) { // less than three neighbors is never stereogenic // unless it is a phosphine/arsine with implicit H (this is from InChI) legalCenter = false; } else if (nzDegree == 3) { if (atom->getTotalNumHs() == 1) { // three-coordinate with more than one H is never stereogenic if (detail::has_protium_neighbor(mol, atom)) { legalCenter = false; } } else { // assume something that's really three coordinate isn't potentially // chiral, then look for exceptions legalCenter = false; if (atom->getAtomicNum() == 7) { // three-coordinate N additional requirements: // in a ring of size 3 (from InChI) // OR // is a bridgehead atom (RDKit extension) // Also: cannot be SP2 hybridized or have a conjugated bond // (this was Github #7434) if (atom->getHybridization() == Atom::HybridizationType::SP3 && !MolOps::atomHasConjugatedBond(atom) && (mol.getRingInfo()->isAtomInRingOfSize(atom->getIdx(), 3) || queryIsAtomBridgehead(atom))) { legalCenter = true; } } else if (atom->getAtomicNum() == 15 || atom->getAtomicNum() == 33) { // three-coordinate phosphines and arsines // are always treated as stereogenic even with H atom neighbors. // (this is from InChI) legalCenter = true; } else if (atom->getAtomicNum() == 16 || atom->getAtomicNum() == 34) { if (atom->getValence(Atom::ValenceType::EXPLICIT) == 4 || (atom->getValence(Atom::ValenceType::EXPLICIT) == 3 && atom->getFormalCharge() == 1)) { // we also accept sulfur or selenium with either a positive charge // or a double bond: legalCenter = true; } } } } if (legalCenter && !ranks.empty()) { boost::dynamic_bitset<> codesSeen(mol.getNumAtoms()); for (const auto bond : mol.atomBonds(atom)) { unsigned int otherIdx = bond->getOtherAtom(atom)->getIdx(); nbrs.push_back(std::make_pair(ranks[otherIdx], bond->getIdx())); if (!Chirality::detail::bondAffectsAtomChirality(bond, atom)) { continue; } CHECK_INVARIANT(ranks[otherIdx] < mol.getNumAtoms(), "CIP rank higher than the number of atoms."); // watch for neighbors with duplicate ranks, which would mean // that we cannot be chiral: if (codesSeen[ranks[otherIdx]]) { // we've already seen this code, it's a dupe hasDupes = true; break; } codesSeen[ranks[otherIdx]] = 1; } } } return std::make_pair(legalCenter, hasDupes); } // returns a pair: // 1) are there unassigned stereoatoms // 2) did we assign any? std::pair assignAtomChiralCodes(ROMol &mol, UINT_VECT &ranks, bool flagPossibleStereoCenters) { PRECONDITION((!ranks.size() || ranks.size() == mol.getNumAtoms()), "bad rank vector size"); bool atomChanged = false; unsigned int unassignedAtoms = 0; // ------------------ // now loop over each atom and, if it's marked as chiral, // figure out the appropriate CIP label: for (auto atom : mol.atoms()) { Atom::ChiralType tag = atom->getChiralTag(); // only worry about this atom if it has a marked chirality // we understand: if (flagPossibleStereoCenters || (tag != Atom::CHI_UNSPECIFIED && tag != Atom::CHI_OTHER)) { if (atom->hasProp(common_properties::_CIPCode)) { continue; } if (!ranks.size()) { // if we need to, get the "CIP" ranking of each atom: assignAtomCIPRanks(mol, ranks); } Chirality::INT_PAIR_VECT nbrs; // note that hasDupes is only evaluated if legalCenter==true auto [legalCenter, hasDupes] = isAtomPotentialChiralCenter(atom, mol, ranks, nbrs); if (legalCenter) { ++unassignedAtoms; } if (legalCenter && !hasDupes && flagPossibleStereoCenters) { atom->setProp(common_properties::_ChiralityPossible, 1); } if (legalCenter && !hasDupes && tag != Atom::CHI_UNSPECIFIED && tag != Atom::CHI_OTHER) { // stereochem is possible and we have no duplicate neighbors, assign // a CIP code: atomChanged = true; --unassignedAtoms; // sort the list of neighbors by their CIP ranks: std::sort(nbrs.begin(), nbrs.end(), Rankers::pairLess); // collect the list of neighbor indices: std::list nbrIndices; for (Chirality::INT_PAIR_VECT_CI nbrIt = nbrs.begin(); nbrIt != nbrs.end(); ++nbrIt) { nbrIndices.push_back((*nbrIt).second); } // ask the atom how many swaps we have to make: int nSwaps = atom->getPerturbationOrder(nbrIndices); // if the atom has 3 neighbors and a hydrogen, add a swap: if (nbrIndices.size() == 3 && atom->getTotalNumHs() == 1) { ++nSwaps; } // if that number is odd, we'll change our chirality: if (nSwaps % 2) { if (tag == Atom::CHI_TETRAHEDRAL_CCW) { tag = Atom::CHI_TETRAHEDRAL_CW; } else { tag = Atom::CHI_TETRAHEDRAL_CCW; } } // now assign the CIP code: std::string cipCode; if (tag == Atom::CHI_TETRAHEDRAL_CCW) { cipCode = "S"; } else { cipCode = "R"; } atom->setProp(common_properties::_CIPCode, cipCode); } } } return std::make_pair((unassignedAtoms > 0), atomChanged); } // returns a pair: // 1) are there unassigned stereo bonds? // 2) did we assign any? std::pair assignBondStereoCodes(ROMol &mol, UINT_VECT &ranks) { PRECONDITION((!ranks.size() || ranks.size() == mol.getNumAtoms()), "bad rank vector size"); bool assignedABond = false; unsigned int unassignedBonds = 0; boost::dynamic_bitset<> bondsToClear(mol.getNumBonds()); // find the double bonds: for (auto dblBond : mol.bonds()) { if (dblBond->getBondType() == Bond::BondType::DOUBLE) { if (dblBond->getStereo() != Bond::BondStereo::STEREONONE) { continue; } if (!ranks.size()) { assignAtomCIPRanks(mol, ranks); } dblBond->getStereoAtoms().clear(); // at the moment we are ignoring stereochem on ring bonds with less than // 8 members. if (shouldDetectDoubleBondStereo(dblBond)) { const Atom *begAtom = dblBond->getBeginAtom(); const Atom *endAtom = dblBond->getEndAtom(); // we're only going to handle 2 or three coordinate atoms: if ((begAtom->getDegree() == 2 || begAtom->getDegree() == 3) && (endAtom->getDegree() == 2 || endAtom->getDegree() == 3)) { ++unassignedBonds; // look around each atom and see if it has at least one bond with // direction marked: // the pairs here are: atomIdx,bonddir Chirality::INT_PAIR_VECT begAtomNeighbors, endAtomNeighbors; bool hasExplicitUnknownStereo = false; int bgn_stereo = false, end_stereo = false; if ((dblBond->getBeginAtom()->getPropIfPresent( common_properties::_UnknownStereo, bgn_stereo) && bgn_stereo) || (dblBond->getEndAtom()->getPropIfPresent( common_properties::_UnknownStereo, end_stereo) && end_stereo)) { hasExplicitUnknownStereo = true; } Chirality::findAtomNeighborDirHelper(mol, begAtom, dblBond, ranks, begAtomNeighbors, hasExplicitUnknownStereo); Chirality::findAtomNeighborDirHelper(mol, endAtom, dblBond, ranks, endAtomNeighbors, hasExplicitUnknownStereo); if (begAtomNeighbors.size() && endAtomNeighbors.size()) { // Each atom has at least one neighboring bond with marked // directionality. Find the highest-ranked directionality // on each side: int begDir, endDir, endNbrAid, begNbrAid; if (begAtomNeighbors.size() == 1 || ranks[begAtomNeighbors[0].first] > ranks[begAtomNeighbors[1].first]) { begDir = begAtomNeighbors[0].second; begNbrAid = begAtomNeighbors[0].first; } else { begDir = begAtomNeighbors[1].second; begNbrAid = begAtomNeighbors[1].first; } if (endAtomNeighbors.size() == 1 || ranks[endAtomNeighbors[0].first] > ranks[endAtomNeighbors[1].first]) { endDir = endAtomNeighbors[0].second; endNbrAid = endAtomNeighbors[0].first; } else { endDir = endAtomNeighbors[1].second; endNbrAid = endAtomNeighbors[1].first; } bool conflictingBegin = (begAtomNeighbors.size() == 2 && begAtomNeighbors[0].second == begAtomNeighbors[1].second); bool conflictingEnd = (endAtomNeighbors.size() == 2 && endAtomNeighbors[0].second == endAtomNeighbors[1].second); if (conflictingBegin || conflictingEnd) { dblBond->setStereo(Bond::STEREONONE); BOOST_LOG(rdWarningLog) << "Conflicting single bond directions " "around double bond at index " << dblBond->getIdx() << "." << std::endl; BOOST_LOG(rdWarningLog) << " BondStereo set to STEREONONE and " "single bond directions set to NONE." << std::endl; assignedABond = true; if (conflictingBegin) { bondsToClear[mol.getBondBetweenAtoms(begAtomNeighbors[0].first, begAtom->getIdx()) ->getIdx()] = 1; bondsToClear[mol.getBondBetweenAtoms(begAtomNeighbors[1].first, begAtom->getIdx()) ->getIdx()] = 1; } if (conflictingEnd) { bondsToClear[mol.getBondBetweenAtoms(endAtomNeighbors[0].first, endAtom->getIdx()) ->getIdx()] = 1; bondsToClear[mol.getBondBetweenAtoms(endAtomNeighbors[1].first, endAtom->getIdx()) ->getIdx()] = 1; } } else { dblBond->getStereoAtoms().push_back(begNbrAid); dblBond->getStereoAtoms().push_back(endNbrAid); if (hasExplicitUnknownStereo) { dblBond->setStereo(Bond::STEREOANY); assignedABond = true; } else if (begDir == endDir) { // In findAtomNeighborDirHelper, we've set up the // bond directions here so that they correspond to // having both single bonds START at the double bond. // This means that if the single bonds point in the same // direction, the bond is cis, "Z" dblBond->setStereo(Bond::STEREOZ); assignedABond = true; } else { dblBond->setStereo(Bond::STEREOE); assignedABond = true; } } --unassignedBonds; } } } } } for (unsigned int i = 0; i < mol.getNumBonds(); ++i) { if (bondsToClear[i]) { mol.getBondWithIdx(i)->setBondDir(Bond::NONE); } } return std::make_pair(unassignedBonds > 0, assignedABond); } void assignLegacyCIPLabels(ROMol &mol, bool flagPossibleStereoCenters) { std::vector atomRanks; assignAtomChiralCodes(mol, atomRanks, flagPossibleStereoCenters); // reset any already-specfied double bonds: for (auto bond : mol.bonds()) { if (bond->getBondType() == Bond::BondType::DOUBLE && bond->getStereo() > Bond::BondStereo::STEREOANY) { bond->setStereo(Bond::BondStereo::STEREONONE); } } assignBondStereoCodes(mol, atomRanks); } void assignBondCisTrans(ROMol &mol, const StereoInfo &sinfo) { if (sinfo.type != StereoType::Bond_Double || sinfo.specified != StereoSpecified::Unspecified || sinfo.controllingAtoms.size() != 4 || ((sinfo.controllingAtoms[0] == Atom::NOATOM && sinfo.controllingAtoms[1] == Atom::NOATOM) || (sinfo.controllingAtoms[2] == Atom::NOATOM && sinfo.controllingAtoms[3] == Atom::NOATOM))) { return; } auto dblBond = mol.getBondWithIdx(sinfo.centeredOn); bool begFirstNeighbor = true; auto begBond = mol.getBondBetweenAtoms(dblBond->getBeginAtomIdx(), sinfo.controllingAtoms[0]); CHECK_INVARIANT(begBond, "no initial bond found"); auto begDir = begBond->getBondDir(); if (begDir != Bond::BondDir::ENDDOWNRIGHT && begDir != Bond::BondDir::ENDUPRIGHT) { begFirstNeighbor = false; if (sinfo.controllingAtoms[1] != Atom::NOATOM) { begBond = mol.getBondBetweenAtoms(dblBond->getBeginAtomIdx(), sinfo.controllingAtoms[1]); CHECK_INVARIANT(begBond, "no initial bond found"); begDir = begBond->getBondDir(); } } // no direction found at beginning if (begDir != Bond::BondDir::ENDDOWNRIGHT && begDir != Bond::BondDir::ENDUPRIGHT) { return; } if (begBond->getBeginAtomIdx() != dblBond->getBeginAtomIdx()) { begDir = begDir == Bond::BondDir::ENDDOWNRIGHT ? Bond::BondDir::ENDUPRIGHT : Bond::BondDir::ENDDOWNRIGHT; } bool endFirstNeighbor = true; auto endBond = mol.getBondBetweenAtoms(dblBond->getEndAtomIdx(), sinfo.controllingAtoms[2]); CHECK_INVARIANT(endBond, "no final bond found"); auto endDir = endBond->getBondDir(); if (endDir != Bond::BondDir::ENDDOWNRIGHT && endDir != Bond::BondDir::ENDUPRIGHT) { endFirstNeighbor = false; if (sinfo.controllingAtoms[3] != Atom::NOATOM) { endBond = mol.getBondBetweenAtoms(dblBond->getEndAtomIdx(), sinfo.controllingAtoms[3]); CHECK_INVARIANT(endBond, "no final bond found"); endDir = endBond->getBondDir(); } } // no direction found at end if (endDir != Bond::BondDir::ENDDOWNRIGHT && endDir != Bond::BondDir::ENDUPRIGHT) { return; } if (endBond->getBeginAtomIdx() != dblBond->getEndAtomIdx()) { endDir = endDir == Bond::BondDir::ENDDOWNRIGHT ? Bond::BondDir::ENDUPRIGHT : Bond::BondDir::ENDDOWNRIGHT; } // we've set up the bond directions here so that they correspond to having // both single bonds START at the double bond. This means that if the single // bonds point in the same direction, the bond is cis bool sameDir = begDir == endDir; // if either the direction bond at the beginning or the direction bond at the // end wasn't to the first neighbor on that side (but not both), then we need // to swap if (begFirstNeighbor ^ endFirstNeighbor) { sameDir = !sameDir; } dblBond->setStereoAtoms(sinfo.controllingAtoms[0], sinfo.controllingAtoms[2]); if (sameDir) { dblBond->setStereo(Bond::BondStereo::STEREOCIS); } else { dblBond->setStereo(Bond::BondStereo::STEREOTRANS); } } // reassign atom ranks by supplementing the current ranks // with information about known chirality void rerankAtoms(const ROMol &mol, UINT_VECT &ranks) { PRECONDITION(ranks.size() == mol.getNumAtoms(), "bad rank vector size"); unsigned int factor = 100; while (factor < mol.getNumAtoms()) { factor *= 10; } #ifdef VERBOSE_CANON BOOST_LOG(rdDebugLog) << "rerank PRE: " << std::endl; for (int i = 0; i < mol.getNumAtoms(); i++) { BOOST_LOG(rdDebugLog) << " " << i << ": " << ranks[i] << std::endl; } #endif DOUBLE_VECT invars(mol.getNumAtoms()); // and now supplement them: for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) { invars[i] = ranks[i] * factor; const Atom *atom = mol.getAtomWithIdx(i); // Priority order: R > S > nothing std::string cipCode; if (atom->getPropIfPresent(common_properties::_CIPCode, cipCode)) { if (cipCode == "S") { invars[i] += 10; } else if (cipCode == "R") { invars[i] += 20; } } for (const auto oBond : mol.atomBonds(atom)) { if (oBond->getBondType() == Bond::DOUBLE) { if (oBond->getStereo() == Bond::STEREOE) { invars[i] += 1; } else if (oBond->getStereo() == Bond::STEREOZ) { invars[i] += 2; } } } } iterateCIPRanks(mol, invars, ranks, true); // copy the ranks onto the atoms: for (unsigned int i = 0; i < mol.getNumAtoms(); i++) { mol.getAtomWithIdx(i)->setProp(common_properties::_CIPRank, ranks[i]); } #ifdef VERBOSE_CANON BOOST_LOG(rdDebugLog) << " post: " << std::endl; for (int i = 0; i < mol.getNumAtoms(); i++) { BOOST_LOG(rdDebugLog) << " " << i << ": " << ranks[i] << std::endl; } #endif } bool hasStereoBondDir(const Bond *bond) { PRECONDITION(bond, "no bond"); return bond->getBondDir() == Bond::BondDir::ENDDOWNRIGHT || bond->getBondDir() == Bond::BondDir::ENDUPRIGHT; } const Bond *getNeighboringDirectedBond(const ROMol &mol, const Atom *atom) { PRECONDITION(atom, "no atom"); for (const auto &bondIdx : boost::make_iterator_range(mol.getAtomBonds(atom))) { const Bond *bond = mol[bondIdx]; if (bond->getBondType() != Bond::BondType::DOUBLE && hasStereoBondDir(bond)) { return bond; } } return nullptr; } Bond::BondStereo translateEZLabelToCisTrans(Bond::BondStereo label) { switch (label) { case Bond::STEREOE: return Bond::STEREOTRANS; case Bond::STEREOZ: return Bond::STEREOCIS; default: return label; } } INT_VECT findStereoAtoms(const Bond *bond) { PRECONDITION(bond, "bad bond"); PRECONDITION(bond->hasOwningMol(), "no mol"); PRECONDITION(bond->getBondType() == Bond::DOUBLE, "not double bond"); PRECONDITION(bond->getStereo() > Bond::BondStereo::STEREOANY, "no defined stereo"); if (!bond->getStereoAtoms().empty()) { return bond->getStereoAtoms(); } if (bond->getStereo() == Bond::BondStereo::STEREOE || bond->getStereo() == Bond::BondStereo::STEREOZ) { const Atom *startStereoAtom = findHighestCIPNeighbor(bond->getBeginAtom(), bond->getEndAtom()); const Atom *endStereoAtom = findHighestCIPNeighbor(bond->getEndAtom(), bond->getBeginAtom()); if (startStereoAtom == nullptr || endStereoAtom == nullptr) { return {}; } int startStereoAtomIdx = static_cast(startStereoAtom->getIdx()); int endStereoAtomIdx = static_cast(endStereoAtom->getIdx()); return {startStereoAtomIdx, endStereoAtomIdx}; } else { BOOST_LOG(rdWarningLog) << "Unable to assign stereo atoms for bond " << bond->getIdx() << std::endl; return {}; } } void cleanupStereoGroups(ROMol &mol) { std::vector newsgs; for (auto sg : mol.getStereoGroups()) { std::vector okatoms; std::vector okbonds; bool keep = true; for (const auto atom : sg.getAtoms()) { if (atom->getChiralTag() == Atom::ChiralType::CHI_UNSPECIFIED) { keep = false; } else { okatoms.push_back(atom); } } for (const auto bond : sg.getBonds()) { if (bond->getStereo() != Bond::BondStereo::STEREOATROPCCW && bond->getStereo() != Bond::BondStereo::STEREOATROPCW) { keep = false; } else { okbonds.push_back(bond); } } if (keep) { newsgs.push_back(sg); } else if (!okatoms.empty()) { newsgs.emplace_back(sg.getGroupType(), std::move(okatoms), std::move(okbonds), sg.getReadId()); } } mol.setStereoGroups(std::move(newsgs)); } // **************************************************************************** std::ostream &operator<<(std::ostream &oss, const StereoType &s) { switch (s) { case StereoType::Unspecified: oss << "Unspecified"; break; case StereoType::Atom_Tetrahedral: oss << "Atom_Tetrahedral"; break; case StereoType::Atom_SquarePlanar: oss << "Atom_SquarePlanar"; break; case StereoType::Atom_TrigonalBipyramidal: oss << "Atom_TrigonalBipyramidal"; break; case StereoType::Atom_Octahedral: oss << "Atom_Octahedral"; break; case StereoType::Bond_Double: oss << "Bond_Double"; break; case StereoType::Bond_Cumulene_Even: oss << "Bond_Cumulene_Even"; break; case StereoType::Bond_Atropisomer: oss << "Bond_Atropisomer"; break; } return oss; } // **************************************************************************** std::ostream &operator<<(std::ostream &oss, const StereoSpecified &s) { switch (s) { case StereoSpecified::Unspecified: oss << "Unspecified"; break; case StereoSpecified::Specified: oss << "Specified"; break; case StereoSpecified::Unknown: oss << "Unknown"; break; } return oss; } /* We're going to do this iteratively: 1) assign atom stereochemistry 2) assign bond stereochemistry 3) if there are still unresolved atoms or bonds repeat the above steps as necessary */ void legacyStereoPerception(ROMol &mol, bool cleanIt, bool flagPossibleStereoCenters) { mol.clearProp("_needsDetectBondStereo"); // later we're going to need ring information, get it now if we don't // have it already: // NOTE, if called from the SMART code, the ring info will be DUMMY, and // contains no information if (!mol.getRingInfo()->isFindFastOrBetter()) { MolOps::fastFindRings(mol); } // as part of the preparation, we'll loop over the atoms and // bonds to see if anything has stereochemistry // indicated. There's no point in doing the work here if there // are neither stereocenters nor bonds that we need to consider. // The exception to this is when flagPossibleStereoCenters is // true; then we always need to do the work bool hasStereoAtoms = false; // flagPossibleStereoCenters; bool hasPotentialStereoAtoms = false; for (auto atom : mol.atoms()) { if (cleanIt) { if (atom->hasProp(common_properties::_CIPCode)) { atom->clearProp(common_properties::_CIPCode); } if (atom->hasProp(common_properties::_ChiralityPossible)) { atom->clearProp(common_properties::_ChiralityPossible); } } if (!hasStereoAtoms && atom->getChiralTag() != Atom::CHI_UNSPECIFIED && atom->getChiralTag() != Atom::CHI_OTHER) { hasStereoAtoms = true; } else if (!hasPotentialStereoAtoms) { UINT_VECT ranks; Chirality::INT_PAIR_VECT nbrs; hasPotentialStereoAtoms = isAtomPotentialChiralCenter(atom, mol, ranks, nbrs).first; } } bool hasStereoBonds = false; bool hasPotentialStereoBonds = false; for (auto bond : mol.bonds()) { if (cleanIt) { bond->clearProp(common_properties::_CIPCode); // enforce no stereo on small rings if ((bond->getBondType() == Bond::DOUBLE || bond->getBondType() == Bond::AROMATIC) && !shouldDetectDoubleBondStereo(bond)) { if (bond->getBondDir() == Bond::EITHERDOUBLE) { bond->setBondDir(Bond::NONE); } if (bond->getStereo() != Bond::STEREONONE) { bond->setStereo(Bond::STEREONONE); bond->getStereoAtoms().clear(); } continue; } else if (bond->getBondType() == Bond::DOUBLE) { if (bond->getBondDir() == Bond::EITHERDOUBLE) { bond->setStereo(Bond::STEREOANY); bond->getStereoAtoms().clear(); bond->setBondDir(Bond::NONE); } else if (bond->getStereo() != Bond::STEREOANY) { bond->setStereo(Bond::STEREONONE); bond->getStereoAtoms().clear(); } } } if (!hasStereoBonds && bond->getBondType() == Bond::DOUBLE) { bool isSpecified = false; for (auto nbond : mol.atomBonds(bond->getBeginAtom())) { if (nbond->getBondDir() == Bond::ENDDOWNRIGHT || nbond->getBondDir() == Bond::ENDUPRIGHT) { hasStereoBonds = true; isSpecified = true; break; } } if (!hasStereoBonds) { for (auto nbond : mol.atomBonds(bond->getEndAtom())) { if (nbond->getBondDir() == Bond::ENDDOWNRIGHT || nbond->getBondDir() == Bond::ENDUPRIGHT) { hasStereoBonds = true; isSpecified = true; break; } } } if (!hasPotentialStereoBonds && !isSpecified && shouldDetectDoubleBondStereo(bond)) { hasPotentialStereoBonds = true; } } if (!cleanIt && hasStereoBonds && hasPotentialStereoBonds) { break; // no reason to keep iterating if we've already // determined there are stereo bonds to consider } } UINT_VECT atomRanks; bool keepGoing = hasStereoAtoms | hasStereoBonds; if (!keepGoing) { keepGoing = flagPossibleStereoCenters && (hasPotentialStereoAtoms || hasPotentialStereoBonds); } bool changedStereoAtoms, changedStereoBonds; while (keepGoing) { if (hasStereoAtoms || hasPotentialStereoAtoms) { // only do the CIP assignment if there's a chance of a stereo center std::tie(hasStereoAtoms, changedStereoAtoms) = Chirality::assignAtomChiralCodes(mol, atomRanks, flagPossibleStereoCenters); } else { changedStereoAtoms = false; } if (hasStereoBonds || hasPotentialStereoBonds) { // only do the CIP assignment if there's a chance of a stereo bond std::tie(hasStereoBonds, changedStereoBonds) = Chirality::assignBondStereoCodes(mol, atomRanks); } else { changedStereoBonds = false; } keepGoing = (hasStereoAtoms || hasStereoBonds) && (changedStereoAtoms || changedStereoBonds); if (keepGoing) { // update the atom ranks based on the new information we have: Chirality::rerankAtoms(mol, atomRanks); } } if (cleanIt) { for (auto atom : mol.atoms()) { if (atom->hasProp(common_properties::_ringStereochemCand)) { atom->clearProp(common_properties::_ringStereochemCand); } if (atom->hasProp(common_properties::_ringStereoAtoms)) { atom->clearProp(common_properties::_ringStereoAtoms); } } boost::dynamic_bitset<> possibleSpecialCases(mol.getNumAtoms()); Chirality::findChiralAtomSpecialCases(mol, possibleSpecialCases, atomRanks); for (auto atom : mol.atoms()) { if (atom->getChiralTag() != Atom::CHI_UNSPECIFIED && !Chirality::hasNonTetrahedralStereo(atom) && !atom->hasProp(common_properties::_CIPCode) && (!possibleSpecialCases[atom->getIdx()] || !atom->hasProp(common_properties::_ringStereoAtoms))) { atom->setChiralTag(Atom::CHI_UNSPECIFIED); // If the atom has an explicit hydrogen and no charge, that H // was probably put there solely because of the chirality. // So we'll go ahead and remove it. // This was Issue 194 if (atom->getNumExplicitHs() == 1 && atom->getFormalCharge() == 0 && !atom->getIsAromatic()) { atom->setNumExplicitHs(0); atom->setNoImplicit(false); atom->calcExplicitValence(false); atom->calcImplicitValence(false); } } } bool foundAtropisomer = false; for (auto bond : mol.bonds()) { // wedged bonds to atoms that have no stereochem // should be removed. (github issue 87) if ((bond->getBondDir() == Bond::BEGINWEDGE || bond->getBondDir() == Bond::BEGINDASH) && bond->getBeginAtom()->getChiralTag() == Atom::CHI_UNSPECIFIED && bond->getEndAtom()->getChiralTag() == Atom::CHI_UNSPECIFIED) { // see if there is an atropisomer bond connected to this bond bool atomHasAtropisomer = false; for (auto nbond : mol.atomBonds(bond->getBeginAtom())) { if (nbond->getStereo() == Bond::STEREOATROPCCW || nbond->getStereo() == Bond::STEREOATROPCW) { atomHasAtropisomer = true; foundAtropisomer = true; break; } } if (!atomHasAtropisomer) { bond->setBondDir(Bond::NONE); } } // if we have either a double bond that involves a degree one atom and // that is either crossed or STEREOANY, we need to clear the stereo and, // possibly, direction. This can happen with imines that just have an // implicit H on the N if (bond->getBondType() == Bond::DOUBLE && (bond->getBondDir() == Bond::EITHERDOUBLE || bond->getStereo() == Bond::STEREOANY) && (bond->getBeginAtom()->getDegree() == 1 || bond->getEndAtom()->getDegree() == 1)) { if (bond->getBondDir() == Bond::EITHERDOUBLE) { bond->setBondDir(Bond::NONE); } bond->setStereo(Bond::STEREONONE); } // check for directionality on single bonds around // double bonds without stereo. This was github #2422 if (bond->getBondType() == Bond::DOUBLE && (bond->getStereo() == Bond::STEREOANY || bond->getStereo() == Bond::STEREONONE)) { std::vector batoms = {bond->getBeginAtom(), bond->getEndAtom()}; for (auto batom : batoms) { for (const auto nbrBndI : mol.atomBonds(batom)) { if (nbrBndI == bond) { continue; } if ((nbrBndI->getBondDir() == Bond::ENDDOWNRIGHT || nbrBndI->getBondDir() == Bond::ENDUPRIGHT) && (nbrBndI->getBondType() == Bond::SINGLE || nbrBndI->getBondType() == Bond::AROMATIC)) { // direction is set, and we know it's not because of our // bond. What about other neighbors? bool okToClear = true; for (const auto nbrBndJ : mol.atomBonds(nbrBndI->getOtherAtom(batom))) { if (nbrBndJ->getBondType() == Bond::DOUBLE && nbrBndJ->getStereo() != Bond::STEREOANY && nbrBndJ->getStereo() != Bond::STEREONONE) { okToClear = false; break; } } if (okToClear) { nbrBndI->setBondDir(Bond::NONE); } } } } } } if (foundAtropisomer || Atropisomers::doesMolHaveAtropisomers(mol)) { Atropisomers::cleanupAtropisomerStereoGroups(mol); } Chirality::cleanupStereoGroups(mol); } } void updateDoubleBondStereo(ROMol &mol, const std::vector &sinfo, bool cleanIt) { boost::dynamic_bitset<> bondsTouched(mol.getNumBonds(), 0); for (const auto &si : sinfo) { if (si.type == Chirality::StereoType::Bond_Double) { auto bond = mol.getBondWithIdx(si.centeredOn); bondsTouched.set(bond->getIdx()); bond->setStereo(Bond::BondStereo::STEREONONE); if (si.specified == Chirality::StereoSpecified::Specified) { TEST_ASSERT(si.controllingAtoms.size() == 4); bond->setStereoAtoms(si.controllingAtoms[0], si.controllingAtoms[2]); switch (si.descriptor) { case Chirality::StereoDescriptor::Bond_Cis: bond->setStereo(Bond::BondStereo::STEREOCIS); break; case Chirality::StereoDescriptor::Bond_Trans: bond->setStereo(Bond::BondStereo::STEREOTRANS); break; default: BOOST_LOG(rdWarningLog) << "unrecognized bond stereo type" << std::endl; } } else if (si.specified == Chirality::StereoSpecified::Unknown) { // in cases like imines without explicit Hs, we have double bonds with // Unknown stereo but with only one neighbor on the N. Catch those and // clear the stereochem if (si.controllingAtoms.size() == 4 && si.controllingAtoms[0] != Atom::NOATOM && si.controllingAtoms[2] != Atom::NOATOM) { bond->setStereoAtoms(si.controllingAtoms[0], si.controllingAtoms[2]); bond->setStereo(Bond::BondStereo::STEREOANY); } else { bond->setStereo(Bond::BondStereo::STEREONONE); } bond->setBondDir(Bond::BondDir::NONE); } else if (si.specified == Chirality::StereoSpecified::Unspecified) { assignBondCisTrans(mol, si); } } } if (cleanIt) { for (auto bond : mol.bonds()) { if (bondsTouched[bond->getIdx()] || bond->getBondType() != Bond::BondType::DOUBLE) { continue; } // we didn't see it above, so it can't have stereo: bond->setStereo(Bond::BondStereo::STEREONONE); bond->setBondDir(Bond::BondDir::NONE); bond->getStereoAtoms().clear(); } } } void stereoPerception(ROMol &mol, bool cleanIt, bool flagPossibleStereoCenters) { if (cleanIt) { for (auto atom : mol.atoms()) { atom->clearProp(common_properties::_CIPCode); atom->clearProp(common_properties::_ChiralityPossible); } for (auto bond : mol.bonds()) { bond->clearProp(common_properties::_CIPCode); if (bond->getBondDir() == Bond::BondDir::EITHERDOUBLE) { bond->setStereo(Bond::BondStereo::STEREOANY); bond->getStereoAtoms().clear(); bond->setBondDir(Bond::BondDir::NONE); } } } // we need cis/trans markers on the double bonds... set those now: MolOps::setBondStereoFromDirections(mol); // do the actual perception auto sinfo = Chirality::findPotentialStereo(mol, cleanIt, flagPossibleStereoCenters); if (flagPossibleStereoCenters) { for (const auto &si : sinfo) { if (si.type == Chirality::StereoType::Atom_Tetrahedral || si.type == Chirality::StereoType::Atom_SquarePlanar || si.type == Chirality::StereoType::Atom_TrigonalBipyramidal || si.type == Chirality::StereoType::Atom_Octahedral) { mol.getAtomWithIdx(si.centeredOn) ->setProp(common_properties::_ChiralityPossible, 1); } } } // populate double bond stereo info: updateDoubleBondStereo(mol, sinfo, cleanIt); if (cleanIt) { Atropisomers::cleanupAtropisomerStereoGroups(mol); Chirality::cleanupStereoGroups(mol); } } bool canBeStereoBond(const Bond *bond) { PRECONDITION(bond, "no bond"); if (bond->getBondType() != Bond::BondType::DOUBLE && bond->getBondType() != Bond::BondType::AROMATIC) { return false; } auto beginAtom = bond->getBeginAtom(); auto endAtom = bond->getEndAtom(); for (const auto atom : {beginAtom, endAtom}) { std::vector nbrRanks; for (auto nbrBond : bond->getOwningMol().atomBonds(atom)) { if (nbrBond == bond) { continue; // a bond is NOT its own neighbor } if (nbrBond->getBondType() == Bond::SINGLE) { // if a neighbor has a wedge or hash bond, do NOT mark it as double // crossed if (nbrBond->getBondDir() == Bond::ENDUPRIGHT || nbrBond->getBondDir() == Bond::ENDDOWNRIGHT) { return false; } // if a neighbor has a wiggle bond, do NOT mark it as crossed (although // it is unknown if (nbrBond->getBondDir() == Bond::BondDir::UNKNOWN && nbrBond->getBeginAtom() == atom) { return false; } // if two neighbors havr the same CIP ranking, this is not stereo const auto otherAtom = nbrBond->getOtherAtom(atom); int rank; if (RDKit::Chirality::getUseLegacyStereoPerception()) { if (!otherAtom->getPropIfPresent(common_properties::_CIPRank, rank)) { rank = -1; } } else { // NOT legacy stereo if (!otherAtom->getPropIfPresent(common_properties::_ChiralAtomRank, rank)) { rank = -1; } } if (rank >= 0) { if (std::find(nbrRanks.begin(), nbrRanks.end(), rank) != nbrRanks.end()) { return false; } else { nbrRanks.push_back(rank); } } } } } return true; } bool shouldBeACrossedBond(const Bond *bond) { PRECONDITION(bond, ""); // double bond stereochemistry - // if the bond isn't specified, then it should go in the mol block // as "any", this was sf.net issue 2963522. // two caveats to this: // 1) if it's a ring bond, we'll only put the "any" // in the mol block if the user specifically asked for it. // Constantly seeing crossed bonds in rings, though maybe // technically correct, is irritating. // 2) if it's a terminal bond (where there's no chance of // stereochemistry anyway), we also skip the any. // this was sf.net issue 3009756 if (bond->getStereo() == Bond::STEREOANY) { // see if any of the neighbors have a wiggle bond - if so, do NOT make this // one a cross bond for (auto nbrBond : bond->getOwningMol().atomBonds(bond->getBeginAtom())) { if (nbrBond->getBondDir() == Bond::UNKNOWN && nbrBond->getBeginAtom()->getIdx() == bond->getBeginAtom()->getIdx()) { return false; } } for (auto nbrBond : bond->getOwningMol().atomBonds(bond->getEndAtom())) { if (nbrBond->getBondDir() == Bond::UNKNOWN && nbrBond->getBeginAtom()->getIdx() == bond->getEndAtom()->getIdx()) { return false; } } return true; // crossed double bond } if (bond->getStereo() != Bond::BondStereo::STEREONONE) { return false; } // if it is in a ring it is not makred as stereo. // If either end is terminal, it is not stereo if (!Chirality::detail::isBondPotentialStereoBond(bond)) { return false; } // we don't know that it's explicitly unspecified (covered above with // the ==STEREOANY check) if (bond->getBondDir() == Bond::EITHERDOUBLE) { return true; // crossed double bond } const auto beginAtom = bond->getBeginAtom(); const auto endAtom = bond->getEndAtom(); if (beginAtom->getDegree() > 1 && endAtom->getDegree() > 1 && (beginAtom->getTotalValence() - beginAtom->getTotalDegree()) == 1 && (endAtom->getTotalValence() - endAtom->getTotalDegree()) == 1) { // we only do this if each atom only has one unsaturation // FIX: this is the fix for github #2649, but we will need to // change it once we start handling allenes properly if (canBeStereoBond(bond)) { return true; // crossed double bond } } return false; // NOT crossed double bond } // only valid for single or aromatic bonds int BondGetDirCode(const Bond::BondDir dir) { int res = 0; switch (dir) { case Bond::NONE: res = 0; break; case Bond::BEGINWEDGE: res = 1; break; case Bond::BEGINDASH: res = 6; break; case Bond::UNKNOWN: res = 4; break; case Bond::BondDir::EITHERDOUBLE: res = 3; break; default: break; } return res; } void GetMolFileBondStereoInfo( const Bond *bond, const std::map> &wedgeBonds, const Conformer *conf, Bond::BondDir &dir, bool &reverse) { PRECONDITION(bond, ""); reverse = false; dir = Bond::NONE; if (canHaveDirection(*bond)) { // single bond stereo chemistry dir = Chirality::detail::determineBondWedgeState(bond, wedgeBonds, conf); // if this bond needs to be wedged it is possible that this // wedging was determined by a chiral atom at the end of the // bond (instead of at the beginning). In this case we need to // reverse the begin and end atoms for the bond when we write // the mol file if ((dir == Bond::BEGINDASH) || (dir == Bond::BEGINWEDGE || dir == Bond::UNKNOWN)) { auto wbi = wedgeBonds.find(bond->getIdx()); if (wbi != wedgeBonds.end() && wbi->second->getType() == Chirality::WedgeInfoType::WedgeInfoTypeChiral && static_cast(wbi->second->getIdx()) != bond->getBeginAtomIdx()) { reverse = true; } } } else if (bond->getBondType() == Bond::DOUBLE) { if (Chirality::shouldBeACrossedBond(bond)) { dir = Bond::BondDir::EITHERDOUBLE; } } } void GetMolFileBondStereoInfo( const Bond *bond, const std::map> &wedgeBonds, const Conformer *conf, int &dirCode, bool &reverse) { Bond::BondDir dir; GetMolFileBondStereoInfo(bond, wedgeBonds, conf, dir, reverse); dirCode = BondGetDirCode(dir); } void removeNonExplicit3DChirality(ROMol &mol) { for (auto atom : mol.atoms()) { if (atom->hasProp(common_properties::_NonExplicit3DChirality)) { atom->clearProp(common_properties::_NonExplicit3DChirality); atom->setChiralTag(Atom::CHI_UNSPECIFIED); } } } void addStereoAnnotations(ROMol &mol, std::string absLabel, std::string orLabel, std::string andLabel, std::string cipLabel, std::string bondLabel) { auto sgs = mol.getStereoGroups(); assignStereoGroupIds(sgs); boost::dynamic_bitset<> doneAts(mol.getNumAtoms()); for (const auto &sg : sgs) { std::string gid = std::to_string(sg.getWriteId()); for (const auto atom : sg.getAtoms()) { if (doneAts[atom->getIdx()]) { BOOST_LOG(rdWarningLog) << "Warning: atom " << atom->getIdx() << " is in more than one stereogroup. Only the " "label from the first group will be used." << std::endl; continue; } std::string cip; atom->getPropIfPresent(common_properties::_CIPCode, cip); std::string lab; switch (sg.getGroupType()) { case StereoGroupType::STEREO_ABSOLUTE: lab = absLabel; doneAts.set(atom->getIdx()); break; case StereoGroupType::STEREO_OR: lab = orLabel; doneAts.set(atom->getIdx()); break; case StereoGroupType::STEREO_AND: lab = andLabel; doneAts.set(atom->getIdx()); break; default: break; } if (!lab.empty()) { boost::algorithm::replace_all(lab, "{id}", gid); if (!cip.empty()) { boost::algorithm::replace_all(lab, "{cip}", cip); } atom->setProp(common_properties::atomNote, lab); } } } if (!cipLabel.empty()) { for (auto atom : mol.atoms()) { std::string cip; if (!doneAts[atom->getIdx()] && atom->getPropIfPresent(common_properties::_CIPCode, cip)) { std::string lab = cipLabel; boost::algorithm::replace_all(lab, "{cip}", cip); atom->setProp(common_properties::atomNote, lab); } } } if (!bondLabel.empty()) { for (auto bond : mol.bonds()) { std::string cip; if (!bond->getPropIfPresent(common_properties::_CIPCode, cip)) { if (bond->getStereo() == Bond::STEREOE) { cip = "E"; } else if (bond->getStereo() == Bond::STEREOZ) { cip = "Z"; } } if (!cip.empty()) { std::string lab = bondLabel; boost::algorithm::replace_all(lab, "{cip}", cip); bond->setProp(common_properties::bondNote, lab); } } } } } // namespace Chirality namespace MolOps { void assignStereochemistry(ROMol &mol, bool cleanIt, bool force, bool flagPossibleStereoCenters) { if (!force && mol.hasProp(common_properties::_StereochemDone)) { return; } if (mol.needsUpdatePropertyCache()) { mol.updatePropertyCache(false); } if (!Chirality::getUseLegacyStereoPerception()) { Chirality::stereoPerception(mol, cleanIt, flagPossibleStereoCenters); } else { Chirality::legacyStereoPerception(mol, cleanIt, flagPossibleStereoCenters); } mol.setProp(common_properties::_StereochemDone, 1, true); } // Find bonds than can be cis/trans in a molecule and mark them as // Bond::STEREOANY. void findPotentialStereoBonds(ROMol &mol, bool cleanIt) { // FIX: The earlier thought was to provide an optional argument to ignore or // consider // double bonds in a ring. But I am removing this optional argument and // ignoring ring bonds // completely for now. This is because finding a potential stereo bond in a // ring involves // more than just checking the CIPranks for the neighbors - SP 05/04/04 // make this function callable multiple times if ((mol.hasProp(common_properties::_BondsPotentialStereo)) && (!cleanIt)) { return; } else { UINT_VECT ranks; ranks.resize(mol.getNumAtoms()); bool cipDone = false; ROMol::BondIterator bondIt; for (bondIt = mol.beginBonds(); bondIt != mol.endBonds(); ++bondIt) { if ((*bondIt)->getBondType() == Bond::DOUBLE && !(mol.getRingInfo()->numBondRings((*bondIt)->getIdx()))) { // we are ignoring ring bonds here - read the FIX above Bond *dblBond = *bondIt; // proceed only if we either want to clean the stereocode on this bond, // if none is set on it yet, or it is STEREOANY and we need to find // stereoatoms if (cleanIt || dblBond->getStereo() == Bond::STEREONONE || (dblBond->getStereo() == Bond::STEREOANY && dblBond->getStereoAtoms().size() != 2)) { dblBond->setStereo(Bond::STEREONONE); const Atom *begAtom = dblBond->getBeginAtom(), *endAtom = dblBond->getEndAtom(); // we're only going to handle 2 or three coordinate atoms: if ((begAtom->getDegree() == 2 || begAtom->getDegree() == 3) && (endAtom->getDegree() == 2 || endAtom->getDegree() == 3)) { // ------------------ // get the CIP ranking of each atom if we need it: if (!cipDone) { if (!begAtom->hasProp(common_properties::_CIPRank)) { Chirality::assignAtomCIPRanks(mol, ranks); } else { // no need to recompute if we don't need to recompute. :-) for (unsigned int ai = 0; ai < mol.getNumAtoms(); ++ai) { ranks[ai] = mol.getAtomWithIdx(ai)->getProp( common_properties::_CIPRank); } } cipDone = true; } // find the neighbors for the begin atom and the endAtom UINT_VECT begAtomNeighbors, endAtomNeighbors; bool checkDir = false; bool includeAromatic = true; Chirality::findAtomNeighborsHelper(mol, begAtom, dblBond, begAtomNeighbors, checkDir, includeAromatic); Chirality::findAtomNeighborsHelper(mol, endAtom, dblBond, endAtomNeighbors, checkDir, includeAromatic); if (begAtomNeighbors.size() > 0 && endAtomNeighbors.size() > 0) { if ((begAtomNeighbors.size() == 2) && (endAtomNeighbors.size() == 2)) { // if both of the atoms have 2 neighbors (other than the one // connected // by the double bond) and .... if ((ranks[begAtomNeighbors[0]] != ranks[begAtomNeighbors[1]]) && (ranks[endAtomNeighbors[0]] != ranks[endAtomNeighbors[1]])) { // the neighbors ranks are different at both the ends, // this bond can be part of a cis/trans system if (ranks[begAtomNeighbors[0]] > ranks[begAtomNeighbors[1]]) { dblBond->getStereoAtoms().push_back(begAtomNeighbors[0]); } else { dblBond->getStereoAtoms().push_back(begAtomNeighbors[1]); } if (ranks[endAtomNeighbors[0]] > ranks[endAtomNeighbors[1]]) { dblBond->getStereoAtoms().push_back(endAtomNeighbors[0]); } else { dblBond->getStereoAtoms().push_back(endAtomNeighbors[1]); } } } else if (begAtomNeighbors.size() == 2) { // if the begAtom has two neighbors and .... if (ranks[begAtomNeighbors[0]] != ranks[begAtomNeighbors[1]]) { // their ranks are different if (ranks[begAtomNeighbors[0]] > ranks[begAtomNeighbors[1]]) { dblBond->getStereoAtoms().push_back(begAtomNeighbors[0]); } else { dblBond->getStereoAtoms().push_back(begAtomNeighbors[1]); } dblBond->getStereoAtoms().push_back(endAtomNeighbors[0]); } } else if (endAtomNeighbors.size() == 2) { // if the endAtom has two neighbors and ... if (ranks[endAtomNeighbors[0]] != ranks[endAtomNeighbors[1]]) { // their ranks are different dblBond->getStereoAtoms().push_back(begAtomNeighbors[0]); if (ranks[endAtomNeighbors[0]] > ranks[endAtomNeighbors[1]]) { dblBond->getStereoAtoms().push_back(endAtomNeighbors[0]); } else { dblBond->getStereoAtoms().push_back(endAtomNeighbors[1]); } } } else { // end and beg atoms has only one neighbor each, it doesn't // matter what the ranks are: dblBond->getStereoAtoms().push_back(begAtomNeighbors[0]); dblBond->getStereoAtoms().push_back(endAtomNeighbors[0]); } // end of different number of neighbors on beg and end atoms // mark this double bond as a potential stereo bond if (!dblBond->getStereoAtoms().empty()) { dblBond->setStereo(Bond::STEREOANY); } } // end of check that beg and end atoms have at least 1 // neighbor: } // end of 2 and 3 coordinated atoms only } // end of we want it or CIP code is not set } // end of double bond } // end of for loop over all bonds mol.setProp(common_properties::_BondsPotentialStereo, 1, true); } } // removes chirality markers from sp and sp2 hybridized centers: void cleanupChirality(RWMol &mol) { unsigned int degree, perm; bool needCleanupStereoGroups = false; for (auto atom : mol.atoms()) { switch (atom->getChiralTag()) { case Atom::CHI_TETRAHEDRAL_CW: case Atom::CHI_TETRAHEDRAL_CCW: if (atom->getHybridization() != Atom::SP3) { atom->setChiralTag(Atom::CHI_UNSPECIFIED); needCleanupStereoGroups = true; } break; case Atom::CHI_TETRAHEDRAL: if (atom->getHybridization() != Atom::SP3) { atom->setChiralTag(Atom::CHI_UNSPECIFIED); needCleanupStereoGroups = true; } else { perm = 0; atom->getPropIfPresent(common_properties::_chiralPermutation, perm); if (perm > 2) { perm = 0; atom->setProp(common_properties::_chiralPermutation, perm); } } break; case Atom::CHI_SQUAREPLANAR: degree = atom->getTotalDegree(); if (degree < 2 || degree > 4) { atom->setChiralTag(Atom::CHI_UNSPECIFIED); } else { perm = 0; atom->getPropIfPresent(common_properties::_chiralPermutation, perm); if (perm > 3) { perm = 0; atom->setProp(common_properties::_chiralPermutation, perm); } } break; case Atom::CHI_TRIGONALBIPYRAMIDAL: degree = atom->getTotalDegree(); if (degree < 2 || degree > 5) { atom->setChiralTag(Atom::CHI_UNSPECIFIED); } else { perm = 0; atom->getPropIfPresent(common_properties::_chiralPermutation, perm); if (perm > 20) { perm = 0; atom->setProp(common_properties::_chiralPermutation, perm); } } break; case Atom::CHI_OCTAHEDRAL: degree = atom->getTotalDegree(); if (degree < 2 || degree > 6) { atom->setChiralTag(Atom::CHI_UNSPECIFIED); } else { perm = 0; atom->getPropIfPresent(common_properties::_chiralPermutation, perm); if (perm > 30) { perm = 0; atom->setProp(common_properties::_chiralPermutation, perm); } } break; default: /* ??? Handle other types in future. */ break; } } if (needCleanupStereoGroups) { Chirality::cleanupStereoGroups(mol); } } #define VOLTEST(X, Y, Z) (v[X].dotProduct(v[Y].crossProduct(v[Z])) >= 0.0) static unsigned int OctahedralPermFrom3D(unsigned char *pair, const RDGeom::Point3D *v) { switch (pair[0]) { case 2: // a-b switch (pair[2]) { case 4: return VOLTEST(0, 3, 4) ? 28 : 27; case 5: return VOLTEST(0, 2, 3) ? 25 : 30; default: // 0 or 6 return VOLTEST(0, 2, 3) ? 26 : 29; } break; case 3: // a-c switch (pair[1]) { case 4: return VOLTEST(0, 3, 4) ? 22 : 21; case 5: return VOLTEST(0, 1, 3) ? 19 : 24; default: // 0 or 6 return VOLTEST(0, 1, 3) ? 20 : 23; } break; case 4: // a-d switch (pair[1]) { case 3: return VOLTEST(0, 2, 4) ? 13 : 12; case 5: return VOLTEST(0, 1, 2) ? 6 : 18; default: // 0 or 6 return VOLTEST(0, 1, 2) ? 7 : 17; } break; case 5: // a-e switch (pair[1]) { case 3: return VOLTEST(0, 2, 3) ? 11 : 9; case 4: return VOLTEST(0, 1, 2) ? 3 : 16; default: // 0 or 6 return VOLTEST(0, 1, 2) ? 5 : 15; } break; default: // 0 or 6 a-f switch (pair[1]) { case 3: return VOLTEST(0, 2, 3) ? 10 : 8; case 4: return VOLTEST(0, 1, 2) ? 1 : 2; default: // 5 return VOLTEST(0, 1, 2) ? 4 : 14; } } // unreachable return 0; } bool isWigglyBond(const Bond *bond, const Atom *atom) { int hasWigglyBond = 0; if (bond->getBeginAtomIdx() == atom->getIdx() && bond->getBondType() == Bond::BondType::SINGLE && (bond->getBondDir() == Bond::BondDir::UNKNOWN || (bond->getPropIfPresent(common_properties::_UnknownStereo, hasWigglyBond) && hasWigglyBond))) { return true; } return false; } // The tolerance here is pretty high in order to accomodate things coming from // the dgeom code As we get more experience with real-world structures and/or // improve the dgeom code, we can think about lowering this. static bool assignNontetrahedralChiralTypeFrom3D(ROMol &mol, const Conformer &conf, Atom *atom, double tolerance = 0.1) { // FIX: add tests for dative and zero order bonds // Fail fast check for non-tetrahedral elements if (atom->getAtomicNum() < 15) { return false; } // check for wiggly bonds for (const auto bond : mol.atomBonds(atom)) { if (isWigglyBond(bond, atom)) { return false; } } RDGeom::Point3D cen = conf.getAtomPos(atom->getIdx()); RDGeom::Point3D v[6]; unsigned int count = 0; ROMol::ADJ_ITER nbrIdx, endNbrs; boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom); while (nbrIdx != endNbrs) { if (count == 6) { return false; } RDGeom::Point3D p = conf.getAtomPos(*nbrIdx); v[count] = cen.directionVector(p); ++count; ++nbrIdx; } if (count < 3) { return false; } unsigned char pair[6]; memset(pair, 0, 6); unsigned int pairs = 0; for (unsigned int i = 0; i < count; i++) { for (unsigned int j = i + 1; j < count; j++) { if (v[i].dotProduct(v[j]) < -(1 - tolerance)) { if (pair[i] || pair[j]) { return false; } pair[i] = j + 1; pair[j] = i + 1; pairs++; } } } Atom::ChiralType tag; unsigned int perm; bool res = false; switch (pairs) { case 0: break; case 1: switch (count) { case 3: /* T-shape */ atom->setChiralTag(Atom::ChiralType::CHI_SQUAREPLANAR); res = true; if (pair[0] == 0) { perm = 3; // Z } else if (pair[0] == 2) { perm = 2; // 4 } else /* pair[0] == 3 */ { perm = 1; // U } atom->setProp(common_properties::_chiralPermutation, perm); break; case 4: /* See-saw */ if (pair[0] == 2) { // a b if (v[2].angleTo(v[3]) < 100 * M_PI / 180.0) { tag = Atom::ChiralType::CHI_OCTAHEDRAL; perm = VOLTEST(0, 2, 3) ? 25 : 29; } else { tag = Atom::ChiralType::CHI_TRIGONALBIPYRAMIDAL; perm = VOLTEST(0, 2, 3) ? 7 : 8; } } else if (pair[0] == 3) { // a c if (v[1].angleTo(v[3]) < 100 * M_PI / 180.0) { tag = Atom::ChiralType::CHI_OCTAHEDRAL; perm = VOLTEST(0, 1, 3) ? 19 : 23; } else { tag = Atom::ChiralType::CHI_TRIGONALBIPYRAMIDAL; perm = VOLTEST(0, 1, 3) ? 5 : 6; } } else if (pair[0] == 4) { // a d if (v[1].angleTo(v[2]) < 100 * M_PI / 180.0) { tag = Atom::ChiralType::CHI_OCTAHEDRAL; perm = VOLTEST(0, 1, 2) ? 6 : 17; } else { tag = Atom::ChiralType::CHI_TRIGONALBIPYRAMIDAL; perm = VOLTEST(0, 1, 2) ? 3 : 4; } } else if (pair[1] == 3) { // b c if (v[0].angleTo(v[3]) < 100 * M_PI / 180.0) { tag = Atom::ChiralType::CHI_OCTAHEDRAL; perm = VOLTEST(0, 1, 3) ? 10 : 8; } else { tag = Atom::ChiralType::CHI_TRIGONALBIPYRAMIDAL; perm = VOLTEST(1, 0, 3) ? 13 : 14; } } else if (pair[1] == 4) { // b d if (v[0].angleTo(v[2]) < 100 * M_PI / 180.0) { tag = Atom::ChiralType::CHI_OCTAHEDRAL; perm = VOLTEST(0, 1, 3) ? 1 : 2; } else { tag = Atom::ChiralType::CHI_TRIGONALBIPYRAMIDAL; perm = VOLTEST(1, 0, 2) ? 10 : 12; } } else /* pair[2] == 4 */ { // c d if (v[0].angleTo(v[1]) < 100 * M_PI / 180.0) { tag = Atom::ChiralType::CHI_OCTAHEDRAL; perm = VOLTEST(0, 1, 3) ? 4 : 14; } else { tag = Atom::ChiralType::CHI_TRIGONALBIPYRAMIDAL; perm = VOLTEST(3, 0, 1) ? 16 : 19; } } atom->setChiralTag(tag); res = true; atom->setProp(common_properties::_chiralPermutation, perm); break; case 5: /* Trigonal bipyramidal */ atom->setChiralTag(Atom::ChiralType::CHI_TRIGONALBIPYRAMIDAL); res = true; if (pair[0] == 2) { perm = VOLTEST(0, 2, 3) ? 7 : 8; // a b } else if (pair[0] == 3) { perm = VOLTEST(0, 1, 3) ? 5 : 6; // a c } else if (pair[0] == 4) { perm = VOLTEST(0, 1, 2) ? 3 : 4; // a d } else if (pair[0] == 5) { perm = VOLTEST(0, 1, 2) ? 1 : 2; // a e } else if (pair[1] == 3) { perm = VOLTEST(1, 0, 3) ? 13 : 14; // b c } else if (pair[1] == 4) { perm = VOLTEST(1, 0, 2) ? 10 : 12; // b d } else if (pair[1] == 5) { perm = VOLTEST(1, 0, 2) ? 9 : 11; // b e } else if (pair[2] == 4) { perm = VOLTEST(2, 0, 1) ? 16 : 19; // c d } else if (pair[2] == 5) { perm = VOLTEST(2, 0, 1) ? 15 : 20; // c e } else /* pair[2] == 4 */ { perm = VOLTEST(3, 0, 1) ? 17 : 18; // d e } atom->setProp(common_properties::_chiralPermutation, perm); break; } break; case 2: if (count == 4) { /* Square planar */ atom->setChiralTag(Atom::ChiralType::CHI_SQUAREPLANAR); res = true; if (pair[0] == 2) { perm = 2; // 4 } else if (pair[0] == 3) { perm = 1; // U } else /* pair[1] == 4 */ { perm = 3; // Z } atom->setProp(common_properties::_chiralPermutation, perm); } else if (count == 5) { /* Square pyramidal */ atom->setChiralTag(Atom::ChiralType::CHI_OCTAHEDRAL); res = true; perm = OctahedralPermFrom3D(pair, v); atom->setProp(common_properties::_chiralPermutation, perm); } break; case 3: if (count == 6) { /* Octahedral */ atom->setChiralTag(Atom::ChiralType::CHI_OCTAHEDRAL); res = true; perm = OctahedralPermFrom3D(pair, v); atom->setProp(common_properties::_chiralPermutation, perm); } break; } return res; } void assignChiralTypesFrom3D(ROMol &mol, int confId, bool replaceExistingTags) { const double ZERO_VOLUME_TOL = 0.1; if (!mol.getNumConformers()) { return; } const Conformer &conf = mol.getConformer(confId); if (!conf.is3D()) { return; } // if the molecule already has stereochemistry // perceived, remove the flags that indicate // this... what we're about to do will require // that we go again. if (mol.hasProp(common_properties::_StereochemDone)) { mol.clearProp(common_properties::_StereochemDone); } auto allowNontetrahedralStereo = Chirality::getAllowNontetrahedralChirality(); boost::dynamic_bitset<> explicitAtoms; explicitAtoms.resize(mol.getNumAtoms(), 0); for (auto bond : mol.bonds()) { auto bondDir = bond->getBondDir(); if (bondDir == Bond::BondDir::BEGINWEDGE || bondDir == Bond::BondDir::BEGINDASH) { explicitAtoms[bond->getBeginAtom()->getIdx()] = 1; } } for (auto atom : mol.atoms()) { if (atom->getChiralTag() != Atom::ChiralType::CHI_UNSPECIFIED) { explicitAtoms[atom->getIdx()] = 1; } } for (auto atom : mol.atoms()) { // if we aren't replacing existing tags and the atom is already tagged, // punt: if (!replaceExistingTags && atom->getChiralTag() != Atom::CHI_UNSPECIFIED) { continue; } atom->setChiralTag(Atom::CHI_UNSPECIFIED); // additional reasons to skip the atom: auto nzDegree = Chirality::detail::getAtomNonzeroDegree(atom); auto tnzDegree = nzDegree + atom->getTotalNumHs(); if (nzDegree < 3 || tnzDegree > 6) { // not enough explicit neighbors or too many total neighbors continue; } if (allowNontetrahedralStereo && assignNontetrahedralChiralTypeFrom3D(mol, conf, atom)) { if (explicitAtoms[atom->getIdx()] == 0) { atom->setProp(common_properties::_NonExplicit3DChirality, 1); } continue; } /* We're only doing tetrahedral cases here */ if (tnzDegree > 4) { continue; } int anum = atom->getAtomicNum(); if (anum != 16 && anum != 34 && // S or Se are special // (just using the InChI list for now) tnzDegree != 4 // not enough total neighbors ) { continue; } const auto &p0 = conf.getAtomPos(atom->getIdx()); const RDGeom::Point3D *nbrs[4]; unsigned int nbrIdx = 0; int hasWigglyBond = 0; for (const auto bond : mol.atomBonds(atom)) { hasWigglyBond = isWigglyBond(bond, atom); if (hasWigglyBond) { break; } if (!Chirality::detail::bondAffectsAtomChirality(bond, atom)) { continue; } nbrs[nbrIdx++] = &conf.getAtomPos(bond->getOtherAtomIdx(atom->getIdx())); } if (hasWigglyBond) { continue; } auto v1 = *nbrs[0] - p0; auto v2 = *nbrs[1] - p0; auto v3 = *nbrs[2] - p0; double chiralVol = v1.dotProduct(v2.crossProduct(v3)); bool chiralitySet = false; if (chiralVol < -ZERO_VOLUME_TOL) { atom->setChiralTag(Atom::CHI_TETRAHEDRAL_CW); chiralitySet = true; } else if (chiralVol > ZERO_VOLUME_TOL) { atom->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW); chiralitySet = true; } else if (nbrIdx == 4) { // The first three neighbors are on the same plane as the chiral atom (or // very close to it). If a 4th neighbor is present, let's see if this one // determines a chiral volume auto v4 = *nbrs[3] - p0; // v4 would be in the opposite direction to v3 chiralVol = -v1.dotProduct(v2.crossProduct(v4)); if (chiralVol < -ZERO_VOLUME_TOL) { atom->setChiralTag(Atom::CHI_TETRAHEDRAL_CW); chiralitySet = true; } else if (chiralVol > ZERO_VOLUME_TOL) { atom->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW); chiralitySet = true; } else { atom->setChiralTag(Atom::CHI_UNSPECIFIED); } } else { atom->setChiralTag(Atom::CHI_UNSPECIFIED); } if (chiralitySet && explicitAtoms[atom->getIdx()] == 0) { atom->setProp(common_properties::_NonExplicit3DChirality, 1); } } } void assignChiralTypesFromMolParity(ROMol &mol, bool replaceExistingTags) { static const std::vector chiralTypeVect{ Atom::CHI_TETRAHEDRAL_CW, Atom::CHI_TETRAHEDRAL_CCW}; // if the molecule already has stereochemistry // perceived, remove the flags that indicate // this... what we're about to do will require // that we go again. if (mol.hasProp(common_properties::_StereochemDone)) { mol.clearProp(common_properties::_StereochemDone); } // Atom-based parity // Number the atoms surrounding the stereo center with 1, 2, 3, and 4 // in order of increasing atom number (position in the atom block) // (an implicit hydrogen should be considered the highest numbered atom). // View the center from a position such that the bond connecting the // highest-numbered atom (4) projects behind the plane formed by // atoms 1, 2, and 3. // // Parity 1 (CW) Parity 2 (CCW) // 3 1 3 2 // \ / \ / // | | // 2 1 // for (auto atom : mol.atoms()) { // if we aren't replacing existing tags and the atom is already tagged, // punt: if (!replaceExistingTags && atom->getChiralTag() != Atom::CHI_UNSPECIFIED) { continue; } int parity = 0; atom->getPropIfPresent(common_properties::molParity, parity); if (parity <= 0 || parity > 2 || atom->getDegree() < 3) { atom->setChiralTag(Atom::CHI_UNSPECIFIED); continue; } // if we are here, parity was 1 (CW) or 2 (CCW) // now we set parity 0 to be CW and 1 to be CCW --parity; RDKit::ROMol::OBOND_ITER_PAIR nbrBonds = mol.getAtomBonds(atom); INT_LIST nbrBondIdxList; std::transform( nbrBonds.first, nbrBonds.second, std::back_inserter(nbrBondIdxList), [&mol](const ROMol::edge_descriptor &e) { return mol[e]->getIdx(); }); unsigned int atomIdx = atom->getIdx(); nbrBondIdxList.sort([&mol, atomIdx](const int ai, const int bi) { return (mol.getBondWithIdx(ai)->getOtherAtomIdx(atomIdx) < mol.getBondWithIdx(bi)->getOtherAtomIdx(atomIdx)); }); int nSwaps = atom->getPerturbationOrder(nbrBondIdxList); if (nSwaps % 2) { parity = 1 - parity; } atom->setChiralTag(chiralTypeVect[parity]); if (atom->needsUpdatePropertyCache()) { atom->updatePropertyCache(false); } } } void setDoubleBondNeighborDirections(ROMol &mol, const Conformer *conf) { // used to store the number of single bonds a given // single bond is adjacent to std::vector singleBondCounts(mol.getNumBonds(), 0); std::vector bondsInPlay; // keeps track of which single bonds are adjacent to each double bond: VECT_INT_VECT dblBondNbrs(mol.getNumBonds()); // keeps track of which double bonds are adjacent to each single bond: VECT_INT_VECT singleBondNbrs(mol.getNumBonds()); // keeps track of which single bonds need a dir set and which double bonds // need to have their neighbors' dirs set boost::dynamic_bitset<> needsDir(mol.getNumBonds()); // find double bonds that should be considered for // stereochemistry // NOTE that we are explicitly excluding double bonds in rings // with this test. if (!mol.getRingInfo()->isSymmSssr()) { RDKit::MolOps::symmetrizeSSSR(mol); } for (auto bond : mol.bonds()) { if (isBondCandidateForStereo(bond)) { bool isCandidate = true; for (const auto bondAtom : {bond->getBeginAtom(), bond->getEndAtom()}) { for (const auto nbrBond : mol.atomBonds(bondAtom)) { if (nbrBond->getBondType() == Bond::SINGLE || nbrBond->getBondType() == Bond::AROMATIC) { singleBondCounts[nbrBond->getIdx()] += 1; auto nbrDir = nbrBond->getBondDir(); int hasUnknownStereo = 0; if (nbrBond->getBeginAtom() == bondAtom && nbrDir == Bond::BondDir::UNKNOWN && nbrBond->getPropIfPresent(common_properties::_UnknownStereo, hasUnknownStereo) && hasUnknownStereo) { // if there's a wiggly bond starting here, then we're not a // candidate for stereo isCandidate = false; } else { needsDir[bond->getIdx()] = 1; if (nbrDir == Bond::BondDir::NONE || nbrDir == Bond::BondDir::ENDDOWNRIGHT || nbrDir == Bond::BondDir::ENDUPRIGHT) { needsDir[nbrBond->getIdx()] = 1; dblBondNbrs[bond->getIdx()].push_back(nbrBond->getIdx()); // the search may seem inefficient, but these vectors are // going to be at most 2 long (with very few exceptions). It's // just not worth using a different data structure if (std::find(singleBondNbrs[nbrBond->getIdx()].begin(), singleBondNbrs[nbrBond->getIdx()].end(), bond->getIdx()) == singleBondNbrs[nbrBond->getIdx()].end()) { singleBondNbrs[nbrBond->getIdx()].push_back(bond->getIdx()); } } } } if (!isCandidate) { break; } } if (!isCandidate) { break; } } if (isCandidate) { bondsInPlay.push_back(bond); } } } if (!bondsInPlay.size()) { return; } // order the double bonds based on the singleBondCounts of their neighbors: std::vector> orderedBondsInPlay; for (auto dblBond : bondsInPlay) { unsigned int countHere = std::accumulate(dblBondNbrs[dblBond->getIdx()].begin(), dblBondNbrs[dblBond->getIdx()].end(), 0); // and favor double bonds that are *not* in rings. The combination of // using the sum above (instead of the max) and this ring-membershipt test // seem to fix sf.net issue 3009836 if (!(mol.getRingInfo()->numBondRings(dblBond->getIdx()))) { countHere *= 10; } orderedBondsInPlay.push_back(std::make_pair(countHere, dblBond)); } std::ranges::sort(orderedBondsInPlay, [](const auto &a, const auto &b) { // sort in decreasing order of priority if (a.first != b.first) { return a.first > b.first; } // in case of ties, use the bond index to decide the order return a.second->getIdx() < b.second->getIdx(); }); // oof, now loop over the double bonds in that order and // update their neighbor directionalities: for (const auto& pairIter : orderedBondsInPlay) { updateDoubleBondNeighbors(mol, pairIter.second, conf, needsDir, singleBondCounts, singleBondNbrs); } } void detectBondStereochemistry(ROMol &mol, int confId) { if (!mol.getNumConformers()) { return; } const Conformer &conf = mol.getConformer(confId); setDoubleBondNeighborDirections(mol, &conf); } void clearSingleBondDirFlags(ROMol &mol, bool onlyWedgeFlags) { for (auto bond : mol.bonds()) { if (bond->getBondType() == Bond::SINGLE) { if (bond->getBondDir() == Bond::UNKNOWN) { bond->setProp(common_properties::_UnknownStereo, 1); } if (!onlyWedgeFlags || (bond->getBondDir() != Bond::BondDir::ENDDOWNRIGHT && bond->getBondDir() != Bond::BondDir::ENDUPRIGHT)) { bond->setBondDir(Bond::NONE); } } } } void clearDirFlags(ROMol &mol, bool onlyWedgeTypeBondDirs) { for (auto bond : mol.bonds()) { if (bond->getBondDir() == Bond::UNKNOWN || bond->getBondDir() == Bond::BondDir::EITHERDOUBLE) { bond->setProp(common_properties::_UnknownStereo, 1); } if (onlyWedgeTypeBondDirs == false || (bond->getBondDir() != Bond::BondDir::ENDDOWNRIGHT && bond->getBondDir() != Bond::BondDir::ENDUPRIGHT)) { bond->setBondDir(Bond::NONE); } } } void clearAllBondDirFlags(ROMol &mol) { clearDirFlags(mol, false); } void setBondStereoFromDirections(ROMol &mol) { mol.clearProp("_needsDetectBondStereo"); for (Bond *bond : mol.bonds()) { if (bond->getBondType() == Bond::DOUBLE && bond->getStereo() != Bond::STEREOANY) { const Atom *stereoBondBeginAtom = bond->getBeginAtom(); const Atom *stereoBondEndAtom = bond->getEndAtom(); const Bond *directedBondAtBegin = Chirality::getNeighboringDirectedBond(mol, stereoBondBeginAtom); const Bond *directedBondAtEnd = Chirality::getNeighboringDirectedBond(mol, stereoBondEndAtom); if (directedBondAtBegin != nullptr && directedBondAtEnd != nullptr) { unsigned beginSideStereoAtom = directedBondAtBegin->getOtherAtomIdx(stereoBondBeginAtom->getIdx()); unsigned endSideStereoAtom = directedBondAtEnd->getOtherAtomIdx(stereoBondEndAtom->getIdx()); bond->setStereoAtoms(beginSideStereoAtom, endSideStereoAtom); auto beginSideBondDirection = directedBondAtBegin->getBondDir(); if (directedBondAtBegin->getBeginAtom() == stereoBondBeginAtom) { beginSideBondDirection = getOppositeBondDir(beginSideBondDirection); } auto endSideBondDirection = directedBondAtEnd->getBondDir(); if (directedBondAtEnd->getEndAtom() == stereoBondEndAtom) { endSideBondDirection = getOppositeBondDir(endSideBondDirection); } if (beginSideBondDirection == endSideBondDirection) { bond->setStereo(Bond::STEREOTRANS); } else { bond->setStereo(Bond::STEREOCIS); } } } } } void assignStereochemistryFrom3D(ROMol &mol, int confId, bool replaceExistingTags) { if (!mol.getNumConformers() || !mol.getConformer(confId).is3D()) { return; } if (mol.needsUpdatePropertyCache()) { mol.updatePropertyCache(false); } detectBondStereochemistry(mol, confId); assignChiralTypesFrom3D(mol, confId, replaceExistingTags); bool force = true; bool flagPossibleStereoCenters = true; assignStereochemistry(mol, replaceExistingTags, force, flagPossibleStereoCenters); } void assignChiralTypesFromBondDirs(ROMol &mol, const int confId, const bool replaceExistingTags) { if (!mol.getNumConformers()) { return; } auto conf = mol.getConformer(confId); boost::dynamic_bitset<> atomsSet(mol.getNumAtoms(), 0); for (auto &bond : mol.bonds()) { const Bond::BondDir dir = bond->getBondDir(); Atom *atom = bond->getBeginAtom(); if (dir == Bond::UNKNOWN) { if (atomsSet[atom->getIdx()] || replaceExistingTags) { atom->setChiralTag(Atom::CHI_UNSPECIFIED); atomsSet.set(atom->getIdx()); } } else { // the bond is marked as chiral: if (dir == Bond::BEGINWEDGE || dir == Bond::BEGINDASH) { if (atomsSet[atom->getIdx()] || (!replaceExistingTags && atom->getChiralTag() != Atom::CHI_UNSPECIFIED)) { continue; } if (atom->needsUpdatePropertyCache()) { atom->updatePropertyCache(false); } Atom::ChiralType code = Chirality::atomChiralTypeFromBondDirPseudo3D(mol, bond, &conf) .value_or(Atom::ChiralType::CHI_UNSPECIFIED); if (code != Atom::ChiralType::CHI_UNSPECIFIED) { atomsSet.set(atom->getIdx()); // std::cerr << "atom " << atom->getIdx() << " code " << code // << " from bond " << bond->getIdx() << std::endl; } atom->setChiralTag(code); // within the RD representation, if a three-coordinate atom // is chiral and has an implicit H, that H needs to be made explicit: if (atom->getDegree() == 3 && !atom->getNumExplicitHs() && atom->getNumImplicitHs() == 1) { atom->setNumExplicitHs(1); // recalculated number of implicit Hs: atom->updatePropertyCache(); } } } } } void removeStereochemistry(ROMol &mol) { if (mol.hasProp(common_properties::_StereochemDone)) { mol.clearProp(common_properties::_StereochemDone); } for (auto atom : mol.atoms()) { atom->setChiralTag(Atom::CHI_UNSPECIFIED); if (atom->hasProp(common_properties::_CIPCode)) { atom->clearProp(common_properties::_CIPCode); } if (atom->hasProp(common_properties::_CIPRank)) { atom->clearProp(common_properties::_CIPRank); } } for (auto bond : mol.bonds()) { if (bond->getBondType() == Bond::DOUBLE) { bond->setStereo(Bond::BondStereo::STEREONONE); bond->getStereoAtoms().clear(); bond->setBondDir(Bond::BondDir::NONE); } else if (bond->getBondType() == Bond::SINGLE) { bond->setBondDir(Bond::BondDir::NONE); } } std::vector sgs; static_cast(mol).setStereoGroups(std::move(sgs)); } } // namespace MolOps namespace Chirality { void simplifyEnhancedStereo(ROMol &mol, bool removeAffectedStereoGroups) { auto sgs = mol.getStereoGroups(); if (sgs.size() == 1) { boost::dynamic_bitset<> chiralAts(mol.getNumAtoms()); for (const auto atom : mol.atoms()) { if (atom->getChiralTag() > Atom::ChiralType::CHI_UNSPECIFIED && atom->getChiralTag() < Atom::ChiralType::CHI_OTHER) { chiralAts.set(atom->getIdx(), 1); } } for (const auto atm : sgs[0].getAtoms()) { chiralAts.set(atm->getIdx(), 0); } if (chiralAts.none()) { // all specified chiral centers are accounted for by this StereoGroup. if (sgs[0].getGroupType() == StereoGroupType::STEREO_OR || sgs[0].getGroupType() == StereoGroupType::STEREO_AND) { if (removeAffectedStereoGroups) { std::vector empty; mol.setStereoGroups(std::move(empty)); } std::string label = sgs[0].getGroupType() == StereoGroupType::STEREO_OR ? "OR enantiomer" : "AND enantiomer"; mol.setProp(common_properties::molNote, label); // clear the chiral codes on the atoms in the group for (const auto atm : sgs[0].getAtoms()) { mol.getAtomWithIdx(atm->getIdx()) ->clearProp(common_properties::_CIPCode); } } } } } std::vector> findMesoCenters( const ROMol &mol, bool includeIsotopes, bool includeAtomMaps) { std::vector> res; boost::dynamic_bitset<> specifiedChiralAts(mol.getNumAtoms()); std::vector ringStereoAts(mol.getNumAtoms(), mol.getNumAtoms()); for (const auto atom : mol.atoms()) { atom->clearProp(common_properties::_mesoOtherAtom); if (atom->getChiralTag() > Atom::ChiralType::CHI_UNSPECIFIED) { specifiedChiralAts.set(atom->getIdx(), 1); } int otherIdx = -1; if (atom->getPropIfPresent(common_properties::_ringStereoOtherAtom, otherIdx) && otherIdx >= 0) { ringStereoAts[atom->getIdx()] = static_cast(otherIdx); } } // easy case: no atoms with specified chirality if (specifiedChiralAts.none()) { return res; } // we will compare the atom ranks with chirality and with only chiral presence // (so that we can distinguish centers with chirality specified and those // without) const bool breakTies = false; const bool includeChiralPresence = true; const bool includeStereoGroups = false; const bool useNonStereoRanks = false; bool includeChirality = true; std::vector chiralRanks; Canon::rankMolAtoms(mol, chiralRanks, breakTies, includeChirality, includeIsotopes, includeAtomMaps, includeChiralPresence, includeStereoGroups, useNonStereoRanks); includeChirality = false; std::vector presenceRanks; Canon::rankMolAtoms(mol, presenceRanks, breakTies, includeChirality, includeIsotopes, includeAtomMaps, includeChiralPresence, includeStereoGroups, useNonStereoRanks); for (auto i = 0u; i < mol.getNumAtoms(); ++i) { if (!specifiedChiralAts[i]) { continue; } for (auto j = i + 1; j < mol.getNumAtoms(); ++j) { if (!specifiedChiralAts[j]) { continue; } if (chiralRanks[i] != chiralRanks[j] && presenceRanks[i] == presenceRanks[j]) { res.emplace_back(i, j); } else if (ringStereoAts[i] == j && ringStereoAts[j] == i) { // if both atoms are involved in ring stereo, they can have different // ranks but still be meso centers. The canonical example of this is // N[C@H]1CC[C@@H](O)CC1 std::unordered_set iPresenceRanks; std::unordered_set iChiralRanks; const auto atomi = mol.getAtomWithIdx(i); for (const auto nbr : mol.atomNeighbors(atomi)) { iPresenceRanks.insert(presenceRanks[nbr->getIdx()]); iChiralRanks.insert(chiralRanks[nbr->getIdx()]); } if (iPresenceRanks.size() < atomi->getDegree()) { res.emplace_back(i, j); } } } } for (const auto &[i, j] : res) { mol.getAtomWithIdx(i)->setProp( common_properties::_mesoOtherAtom, j); mol.getAtomWithIdx(j)->setProp( common_properties::_mesoOtherAtom, i); } return res; } } // namespace Chirality } // namespace RDKit