mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
732 lines
28 KiB
C++
732 lines
28 KiB
C++
//
|
|
// Copyright (C) 2001-2022 Greg Landrum and other RDKit contributors
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include <GraphMol/RDKitBase.h>
|
|
#include <GraphMol/RDKitQueries.h>
|
|
#include <GraphMol/Canon.h>
|
|
#include <GraphMol/Chirality.h>
|
|
#include "SmilesParse.h"
|
|
#include "SmilesParseOps.h"
|
|
#include <list>
|
|
#include <algorithm>
|
|
#include <boost/dynamic_bitset.hpp>
|
|
#include <boost/format.hpp>
|
|
#include <RDGeneral/RDLog.h>
|
|
|
|
namespace SmilesParseOps {
|
|
using namespace RDKit;
|
|
|
|
void ClearAtomChemicalProps(RDKit::Atom *atom) {
|
|
TEST_ASSERT(atom);
|
|
atom->setIsotope(0);
|
|
atom->setFormalCharge(0);
|
|
atom->setNumExplicitHs(0);
|
|
}
|
|
|
|
void CheckRingClosureBranchStatus(RDKit::Atom *atom, RDKit::RWMol *mp) {
|
|
// github #786 and #1652: if the ring closure comes after a branch,
|
|
// the stereochem is wrong.
|
|
// This function is called while closing a branch during construction of
|
|
// the molecule from SMILES and corrects for what happens when parsing odd
|
|
// (and arguably wrong) SMILES constructs like:
|
|
// 1) [C@@](F)1(C)CCO1
|
|
// 2) C1CN[C@](O)(N)1
|
|
// 3) [C@](Cl)(F)1CC[C@H](F)CC1
|
|
// In the first two cases the stereochemistry at the chiral atom
|
|
// needs to be reversed. In the third case the stereochemistry should be
|
|
// reversed when the Cl is added, but left alone when the F is added.
|
|
// We recognize these situations using the index of the chiral atom
|
|
// and the degree of that chiral atom at the time the ring closure
|
|
// digit is encountered during parsing.
|
|
// ----------
|
|
// github #1972 adds these examples:
|
|
// 1) [C@@]1(Cl)(F)I.Br1 (ok)
|
|
// 2) [C@@](Cl)1(F)I.Br1 (reverse)
|
|
// 3) [C@@](Cl)(F)1I.Br1 (ok)
|
|
// 4) [C@@](Cl)(F)(I)1.Br1 (reverse)
|
|
PRECONDITION(atom, "bad atom");
|
|
PRECONDITION(mp, "bad mol");
|
|
if (atom->getIdx() != mp->getNumAtoms(true) - 1 &&
|
|
(atom->getDegree() == 1 ||
|
|
(atom->getDegree() == 2 && atom->getIdx() != 0) ||
|
|
(atom->getDegree() == 3 && atom->getIdx() == 0)) &&
|
|
(atom->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW ||
|
|
atom->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW)) {
|
|
// std::cerr << "crcbs: " << atom->getIdx() << std::endl;
|
|
atom->invertChirality();
|
|
}
|
|
}
|
|
|
|
void ReportParseError(const char *message, bool throwIt) {
|
|
PRECONDITION(message, "bad message");
|
|
if (!throwIt) {
|
|
BOOST_LOG(rdErrorLog) << "SMILES Parse Error: " << message << std::endl;
|
|
} else {
|
|
throw SmilesParseException(message);
|
|
}
|
|
}
|
|
|
|
void CleanupAfterParseError(RWMol *mol) {
|
|
PRECONDITION(mol, "no molecule");
|
|
// blow out any partial bonds:
|
|
for (auto markI : *mol->getBondBookmarks()) {
|
|
RWMol::BOND_PTR_LIST &bonds = markI.second;
|
|
for (auto &bond : bonds) {
|
|
delete bond;
|
|
}
|
|
}
|
|
}
|
|
|
|
namespace {
|
|
bool couldBeRingClosure(int val) { return val < 100000 && val >= 0; }
|
|
} // namespace
|
|
//
|
|
// set bondOrder to Bond::IONIC to skip the formation of a bond
|
|
// between the fragment and the molecule
|
|
//
|
|
void AddFragToMol(RWMol *mol, RWMol *frag, Bond::BondType bondOrder,
|
|
Bond::BondDir bondDir) {
|
|
PRECONDITION(mol, "no molecule");
|
|
PRECONDITION(frag, "no fragment");
|
|
PRECONDITION(mol->getActiveAtom(), "no active atom");
|
|
Atom *lastAt = mol->getActiveAtom();
|
|
int nOrigAtoms = mol->getNumAtoms();
|
|
int nOrigBonds = mol->getNumBonds();
|
|
|
|
//
|
|
// Add the fragment's atoms and bonds to the molecule:
|
|
//
|
|
mol->insertMol(*frag);
|
|
|
|
//
|
|
// update ring-closure order information on the added atoms:
|
|
//
|
|
for (const auto atom : frag->atoms()) {
|
|
INT_VECT tmpVect;
|
|
if (atom->getPropIfPresent(common_properties::_RingClosures, tmpVect)) {
|
|
for (auto &v : tmpVect) {
|
|
// if the ring closure is not already a bond, don't touch it:
|
|
if (v >= 0) {
|
|
v += nOrigBonds;
|
|
}
|
|
}
|
|
auto newAtom = mol->getAtomWithIdx(nOrigAtoms + atom->getIdx());
|
|
newAtom->setProp(common_properties::_RingClosures, tmpVect);
|
|
}
|
|
}
|
|
|
|
//
|
|
// ses up the bond between the mol and the branch
|
|
//
|
|
if (bondOrder != Bond::IONIC) {
|
|
// FIX: this is not so much with the elegance...
|
|
auto firstAt = mol->getAtomWithIdx(nOrigAtoms);
|
|
int atomIdx1 = firstAt->getIdx();
|
|
int atomIdx2 = lastAt->getIdx();
|
|
if (frag->hasBondBookmark(ci_LEADING_BOND)) {
|
|
// std::cout << "found it" << std::endl;
|
|
const ROMol::BOND_PTR_LIST &leadingBonds =
|
|
frag->getAllBondsWithBookmark(ci_LEADING_BOND);
|
|
for (auto leadingBond : leadingBonds) {
|
|
// we've already got a bond, so just set its local info
|
|
// and then add it to the molecule intact (no sense doing
|
|
// any extra work).
|
|
leadingBond->setOwningMol(mol);
|
|
leadingBond->setEndAtomIdx(leadingBond->getBeginAtomIdx() + nOrigAtoms);
|
|
leadingBond->setBeginAtomIdx(atomIdx2);
|
|
mol->addBond(leadingBond, true);
|
|
}
|
|
mol->clearBondBookmark(ci_LEADING_BOND);
|
|
} else {
|
|
// SMARTS semantics: unspecified bonds can be single or aromatic
|
|
if (bondOrder == Bond::UNSPECIFIED) {
|
|
auto *newB = new QueryBond(Bond::SINGLE);
|
|
newB->setQuery(makeSingleOrAromaticBondQuery());
|
|
newB->setOwningMol(mol);
|
|
newB->setBeginAtomIdx(atomIdx1);
|
|
newB->setEndAtomIdx(atomIdx2);
|
|
newB->setProp(RDKit::common_properties::_unspecifiedOrder, 1);
|
|
mol->addBond(newB);
|
|
delete newB;
|
|
} else {
|
|
Bond::BondType bo = bondOrder;
|
|
if (bo == Bond::DATIVEL) {
|
|
std::swap(atomIdx1, atomIdx2);
|
|
bo = Bond::DATIVE;
|
|
} else if (bo == Bond::DATIVER) {
|
|
bo = Bond::DATIVE;
|
|
}
|
|
int idx = mol->addBond(atomIdx2, atomIdx1, bo) - 1;
|
|
mol->getBondWithIdx(idx)->setBondDir(bondDir);
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// okay, the next thing we have to worry about is the possibility
|
|
// that there might be ring opening/closing in the fragment we just
|
|
// dealt with e.g. for things like C1C(C1) and C1C.C1
|
|
// We deal with this by copying in the bookmarks and partial bonds
|
|
// that exist in the fragment
|
|
//
|
|
for (auto atIt : *frag->getAtomBookmarks()) {
|
|
// don't bother even considering bookmarks outside
|
|
// the range used for cycles
|
|
if (couldBeRingClosure(atIt.first)) {
|
|
for (auto at2 : atIt.second) {
|
|
int newIdx = at2->getIdx() + nOrigAtoms;
|
|
mol->setAtomBookmark(mol->getAtomWithIdx(newIdx), atIt.first);
|
|
while (frag->hasBondBookmark(atIt.first)) {
|
|
Bond *b = frag->getBondWithBookmark(atIt.first);
|
|
int atomIdx1 = b->getBeginAtomIdx() + nOrigAtoms;
|
|
b->setOwningMol(mol);
|
|
b->setBeginAtomIdx(atomIdx1);
|
|
mol->setBondBookmark(b, atIt.first);
|
|
frag->clearBondBookmark(atIt.first, b);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
frag->clearAllAtomBookmarks();
|
|
frag->clearAllBondBookmarks();
|
|
};
|
|
|
|
typedef std::pair<size_t, int> SIZET_PAIR;
|
|
typedef std::pair<int, int> INT_PAIR;
|
|
template <typename T>
|
|
bool operator<(const std::pair<T, T> &p1, const std::pair<T, T> &p2) {
|
|
return p1.first < p2.first;
|
|
}
|
|
|
|
//
|
|
// Helper function to get the SMILES bond ordering around a given atom, this is
|
|
// required to make sure the stereo information is correct as the storage order
|
|
// may not match how it is SMILES due to ring closures and implicit/missing
|
|
// ligands.
|
|
//
|
|
unsigned int GetBondOrdering(INT_LIST &bondOrdering, const RDKit::RWMol *mol,
|
|
const RDKit::Atom *atom) {
|
|
PRECONDITION(mol, "no mol");
|
|
PRECONDITION(atom, "no atom");
|
|
|
|
//
|
|
// The atom is marked as chiral, set the SMILES-order of the
|
|
// atom's bonds. This is easy for non-ring-closure bonds,
|
|
// because the SMILES order is determined solely by the atom
|
|
// indices. Things are trickier for ring-closure bonds, which we
|
|
// need to insert into the list in a particular order
|
|
//
|
|
INT_VECT ringClosures;
|
|
atom->getPropIfPresent(common_properties::_RingClosures, ringClosures);
|
|
|
|
#if 0
|
|
std::cerr << "CLOSURES: ";
|
|
std::copy(ringClosures.begin(), ringClosures.end(),
|
|
std::ostream_iterator<int>(std::cerr, " "));
|
|
std::cerr << std::endl;
|
|
#endif
|
|
std::list<SIZET_PAIR> neighbors;
|
|
// push this atom onto the list of neighbors (we'll use this
|
|
// to find our place later):
|
|
neighbors.emplace_back(atom->getIdx(), -1);
|
|
std::list<size_t> bondOrder;
|
|
for (auto nbrIdx : boost::make_iterator_range(mol->getAtomNeighbors(atom))) {
|
|
const Bond *nbrBond = mol->getBondBetweenAtoms(atom->getIdx(), nbrIdx);
|
|
if (std::find(ringClosures.begin(), ringClosures.end(),
|
|
static_cast<int>(nbrBond->getIdx())) == ringClosures.end()) {
|
|
neighbors.emplace_back(nbrIdx, nbrBond->getIdx());
|
|
}
|
|
}
|
|
// sort the list of non-ring-closure bonds:
|
|
neighbors.sort();
|
|
|
|
// find the location of this atom. it pretty much has to be
|
|
// first in the list, e.g for smiles like [C@](F)(Cl)(Br)I, or
|
|
// second (everything else).
|
|
auto selfPos = neighbors.begin();
|
|
if (selfPos->first != atom->getIdx()) {
|
|
++selfPos;
|
|
}
|
|
CHECK_INVARIANT(selfPos->first == atom->getIdx(), "weird atom ordering");
|
|
|
|
// copy over the bond ids:
|
|
for (auto neighborIt = neighbors.begin(); neighborIt != neighbors.end();
|
|
++neighborIt) {
|
|
if (neighborIt != selfPos) {
|
|
bondOrdering.push_back(rdcast<int>(neighborIt->second));
|
|
} else {
|
|
// we are not going to add the atom itself, but we will push on
|
|
// ring closure bonds at this point (if required):
|
|
bondOrdering.insert(bondOrdering.end(), ringClosures.begin(),
|
|
ringClosures.end());
|
|
}
|
|
}
|
|
|
|
return ringClosures.size();
|
|
}
|
|
|
|
void AdjustAtomChiralityFlags(RWMol *mol) {
|
|
PRECONDITION(mol, "no molecule");
|
|
for (auto atom : mol->atoms()) {
|
|
Atom::ChiralType chiralType = atom->getChiralTag();
|
|
if (chiralType == Atom::CHI_TETRAHEDRAL_CW ||
|
|
chiralType == Atom::CHI_TETRAHEDRAL_CCW) {
|
|
INT_LIST bondOrdering;
|
|
unsigned int numClosures = GetBondOrdering(bondOrdering, mol, atom);
|
|
|
|
// ok, we now have the SMILES ordering of the bonds, figure out the
|
|
// permutation order.
|
|
//
|
|
// This whole thing is necessary because the ring-closure bonds
|
|
// in the SMILES come before the bonds to the other neighbors, but
|
|
// they come after the neighbors in the molecule we build.
|
|
// A crude example:
|
|
// in F[C@](Cl)(Br)I the C-Cl bond is index 1 in both SMILES
|
|
// and as built
|
|
// in F[C@]1(Br)I.Cl1 the C-Cl bond is index 1 in the SMILES
|
|
// and index 3 as built.
|
|
//
|
|
int nSwaps = atom->getPerturbationOrder(bondOrdering);
|
|
// FIX: explain this one:
|
|
// At least part of what's going on here for degree 3 atoms:
|
|
// - The first part: if we're at the beginning of the SMILES and have
|
|
// an explicit H, we need to add a swap.
|
|
// This is to reflect that [C@](Cl)(F)C is equivalent to Cl[C@@](F)C
|
|
// but [C@H](Cl)(F)C is fine as-is (The H-C bond is the one you look
|
|
// down).
|
|
// - The second part is more complicated and deals with situations like
|
|
// F[C@]1CCO1. In this case we otherwise end up looking like we need
|
|
// to invert the chirality, which is bogus. The chirality here needs
|
|
// to remain @ just as it does in F[C@](Cl)CCO1
|
|
// - We have to be careful with the second part to not sweep things like
|
|
// C[S@]2(=O).Cl2 into the same bin (was github #760). We detect
|
|
// those cases by looking for unsaturated atoms
|
|
//
|
|
if (Canon::chiralAtomNeedsTagInversion(
|
|
*mol, atom, atom->hasProp(common_properties::_SmilesStart),
|
|
numClosures)) {
|
|
++nSwaps;
|
|
}
|
|
// std::cerr << "nswaps " << atom->getIdx() << " " << nSwaps
|
|
// << std::endl;
|
|
// std::copy(bondOrdering.begin(), bondOrdering.end(),
|
|
// std::ostream_iterator<int>(std::cerr, ", "));
|
|
// std::cerr << std::endl;
|
|
if (nSwaps % 2) {
|
|
atom->invertChirality();
|
|
}
|
|
} else if (chiralType == Atom::CHI_SQUAREPLANAR ||
|
|
chiralType == Atom::CHI_TRIGONALBIPYRAMIDAL ||
|
|
chiralType == Atom::CHI_OCTAHEDRAL) {
|
|
INT_LIST bonds;
|
|
GetBondOrdering(bonds, mol, atom);
|
|
|
|
unsigned int ref_max = Chirality::getMaxNbors(chiralType);
|
|
|
|
// insert (-1) for hydrogens or missing ligands, where these are placed
|
|
// depends on if it is the first atom or not
|
|
if (bonds.size() < ref_max) {
|
|
if (atom->hasProp(common_properties::_SmilesStart)) {
|
|
bonds.insert(bonds.begin(), ref_max - bonds.size(), -1);
|
|
} else {
|
|
bonds.insert(++bonds.begin(), ref_max - bonds.size(), -1);
|
|
}
|
|
}
|
|
|
|
atom->setProp(common_properties::_chiralPermutation,
|
|
Chirality::getChiralPermutation(atom, bonds, true));
|
|
}
|
|
}
|
|
} // namespace SmilesParseOps
|
|
|
|
Bond::BondType GetUnspecifiedBondType(const RWMol *mol, const Atom *atom1,
|
|
const Atom *atom2) {
|
|
PRECONDITION(mol, "no molecule");
|
|
PRECONDITION(atom1, "no atom1");
|
|
PRECONDITION(atom2, "no atom2");
|
|
Bond::BondType res;
|
|
if (atom1->getIsAromatic() && atom2->getIsAromatic()) {
|
|
res = Bond::AROMATIC;
|
|
} else {
|
|
res = Bond::SINGLE;
|
|
}
|
|
return res;
|
|
}
|
|
void SetUnspecifiedBondTypes(RWMol *mol) {
|
|
PRECONDITION(mol, "no molecule");
|
|
for (auto bond : mol->bonds()) {
|
|
if (bond->hasProp(RDKit::common_properties::_unspecifiedOrder)) {
|
|
bond->setBondType(GetUnspecifiedBondType(mol, bond->getBeginAtom(),
|
|
bond->getEndAtom()));
|
|
if (bond->getBondType() == Bond::AROMATIC) {
|
|
bond->setIsAromatic(true);
|
|
} else {
|
|
bond->setIsAromatic(false);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
namespace {
|
|
void swapBondDirIfNeeded(Bond *bond1, const Bond *bond2) {
|
|
PRECONDITION(bond1, "bad bond1");
|
|
PRECONDITION(bond2, "bad bond2");
|
|
if (bond1->getBondDir() == Bond::NONE && bond2->getBondDir() != Bond::NONE) {
|
|
bond1->setBondDir(bond2->getBondDir());
|
|
if (bond1->getBeginAtom() != bond2->getBeginAtom()) {
|
|
switch (bond1->getBondDir()) {
|
|
case Bond::ENDDOWNRIGHT:
|
|
bond1->setBondDir(Bond::ENDUPRIGHT);
|
|
break;
|
|
case Bond::ENDUPRIGHT:
|
|
bond1->setBondDir(Bond::ENDDOWNRIGHT);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} // namespace
|
|
|
|
static const std::map<int, int> permutationLimits = {
|
|
{RDKit::Atom::ChiralType::CHI_TETRAHEDRAL, 2},
|
|
{RDKit::Atom::ChiralType::CHI_ALLENE, 2},
|
|
{RDKit::Atom::ChiralType::CHI_SQUAREPLANAR, 3},
|
|
{RDKit::Atom::ChiralType::CHI_OCTAHEDRAL, 30},
|
|
{RDKit::Atom::ChiralType::CHI_TRIGONALBIPYRAMIDAL, 20}};
|
|
|
|
bool checkChiralPermutation(int chiralTag, int permutation) {
|
|
if (chiralTag > RDKit::Atom::ChiralType::CHI_OTHER &&
|
|
permutationLimits.find(chiralTag) != permutationLimits.end() &&
|
|
(permutation < 0 || permutation > permutationLimits.at(chiralTag))) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void CheckChiralitySpecifications(RDKit::RWMol *mol, bool strict) {
|
|
PRECONDITION(mol, "no molecule");
|
|
for (const auto atom : mol->atoms()) {
|
|
int permutation;
|
|
if (atom->getChiralTag() > RDKit::Atom::ChiralType::CHI_OTHER &&
|
|
permutationLimits.find(atom->getChiralTag()) !=
|
|
permutationLimits.end() &&
|
|
atom->getPropIfPresent(common_properties::_chiralPermutation,
|
|
permutation)) {
|
|
if (!checkChiralPermutation(atom->getChiralTag(), permutation)) {
|
|
std::string error =
|
|
(boost::format("Invalid chiral specification on atom %d") %
|
|
atom->getIdx())
|
|
.str();
|
|
BOOST_LOG(rdWarningLog) << error << std::endl;
|
|
if (strict) {
|
|
throw SmilesParseException(error);
|
|
}
|
|
}
|
|
// directly convert @TH1 -> @ and @TH2 -> @@
|
|
if (atom->getChiralTag() == RDKit::Atom::ChiralType::CHI_TETRAHEDRAL) {
|
|
if (permutation == 0 || permutation == 1) {
|
|
atom->setChiralTag(RDKit::Atom::ChiralType::CHI_TETRAHEDRAL_CCW);
|
|
atom->clearProp(common_properties::_chiralPermutation);
|
|
} else if (permutation == 2) {
|
|
atom->setChiralTag(RDKit::Atom::ChiralType::CHI_TETRAHEDRAL_CW);
|
|
atom->clearProp(common_properties::_chiralPermutation);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void CloseMolRings(RWMol *mol, bool toleratePartials) {
|
|
// Here's what we want to do here:
|
|
// loop through the molecule's atom bookmarks
|
|
// for each bookmark:
|
|
// connect pairs of atoms sharing that bookmark
|
|
// left to right (in the order in which they were
|
|
// inserted into the molecule).
|
|
// whilst doing this, we have to be cognizant of the fact that
|
|
// there may well be partial bonds in the molecule which need
|
|
// to be tied in as well. WOO HOO! IT'S A BIG MESS!
|
|
PRECONDITION(mol, "no molecule");
|
|
|
|
auto bookmarkIt = mol->getAtomBookmarks()->begin();
|
|
while (bookmarkIt != mol->getAtomBookmarks()->end()) {
|
|
auto &bookmark = *bookmarkIt;
|
|
// don't bother even considering bookmarks outside
|
|
// the range used for rings
|
|
if (couldBeRingClosure(bookmark.first)) {
|
|
RWMol::ATOM_PTR_LIST bookmarkedAtomsToRemove;
|
|
auto atomIt = bookmark.second.begin();
|
|
auto atomsEnd = bookmark.second.end();
|
|
while (atomIt != atomsEnd) {
|
|
Atom *atom1 = *atomIt;
|
|
++atomIt;
|
|
if (!toleratePartials && atomIt == atomsEnd) {
|
|
ReportParseError("unclosed ring");
|
|
} else if (atomIt != atomsEnd && *atomIt == atom1) {
|
|
// make sure we don't try to connect an atom to itself
|
|
// this was github #1925
|
|
auto fmt =
|
|
boost::format{
|
|
"duplicated ring closure %1% bonds atom %2% to itself"} %
|
|
bookmark.first % atom1->getIdx();
|
|
std::string msg = fmt.str();
|
|
ReportParseError(msg.c_str(), true);
|
|
} else if (mol->getBondBetweenAtoms(atom1->getIdx(),
|
|
(*atomIt)->getIdx()) != nullptr) {
|
|
auto fmt =
|
|
boost::format{
|
|
"ring closure %1% duplicates bond between atom %2% and atom "
|
|
"%3%"} %
|
|
bookmark.first % atom1->getIdx() % (*atomIt)->getIdx();
|
|
std::string msg = fmt.str();
|
|
ReportParseError(msg.c_str(), true);
|
|
} else if (atomIt != atomsEnd) {
|
|
// we actually found an atom, so connect it to the first
|
|
Atom *atom2 = *atomIt;
|
|
++atomIt;
|
|
|
|
int bondIdx = -1;
|
|
// We're guaranteed two partial bonds, one for each time
|
|
// the ring index was used. We give the first specification
|
|
// priority.
|
|
CHECK_INVARIANT(mol->hasBondBookmark(bookmark.first),
|
|
"Missing bond bookmark");
|
|
|
|
// now use the info from the partial bond:
|
|
// The partial bond itself will have a proper order and
|
|
// directionality (with a minor caveat documented below) and will
|
|
// have its beginning atom set already:
|
|
RWMol::BOND_PTR_LIST bonds =
|
|
mol->getAllBondsWithBookmark(bookmark.first);
|
|
auto bondIt = bonds.begin();
|
|
CHECK_INVARIANT(bonds.size() >= 2, "Missing bond");
|
|
|
|
// get pointers to the two bonds:
|
|
Bond *bond1 = *bondIt;
|
|
++bondIt;
|
|
Bond *bond2 = *bondIt;
|
|
|
|
// remove those bonds from the bookmarks:
|
|
mol->clearBondBookmark(bookmark.first, bond1);
|
|
mol->clearBondBookmark(bookmark.first, bond2);
|
|
|
|
// Make sure the bonds have the correct starting atoms:
|
|
CHECK_INVARIANT(bond1->getBeginAtomIdx() == atom1->getIdx(),
|
|
"bad begin atom");
|
|
CHECK_INVARIANT(bond2->getBeginAtomIdx() == atom2->getIdx(),
|
|
"bad begin atom");
|
|
|
|
// we use the _cxsmilesBondIdx value from the second one, if it's
|
|
// there
|
|
if (bond2->hasProp("_cxsmilesBondIdx")) {
|
|
bond1->setProp("_cxsmilesBondIdx",
|
|
bond2->getProp<unsigned int>("_cxsmilesBondIdx"));
|
|
}
|
|
|
|
Bond *matchedBond;
|
|
|
|
// figure out which (if either) bond has a specified type, we'll
|
|
// keep that one. We also need to update the end atom index to
|
|
// match FIX: daylight barfs when you give it multiple specs for the
|
|
// closure
|
|
// bond, we'll just take the first one and ignore others
|
|
// NOTE: we used to do this the other way (take the last
|
|
// specification),
|
|
// but that turned out to be troublesome in odd cases like
|
|
// C1CC11CC1.
|
|
// std::cerr << ">-------------" << std::endl;
|
|
// std::cerr << atom1->getIdx() << "-" << atom2->getIdx() << ": "
|
|
// << bond1->getBondType() << "("
|
|
// << bond1->hasProp(common_properties::_unspecifiedOrder)
|
|
// << "):" << bond1->getBondDir() << " "
|
|
// << bond2->getBondType() << "("
|
|
// << bond2->hasProp(common_properties::_unspecifiedOrder)
|
|
// << "):" << bond2->getBondDir() << std::endl;
|
|
if (!bond1->hasProp(common_properties::_unspecifiedOrder)) {
|
|
matchedBond = bond1;
|
|
if (matchedBond->getBondType() == Bond::DATIVEL) {
|
|
matchedBond->setBeginAtomIdx(atom2->getIdx());
|
|
matchedBond->setEndAtomIdx(atom1->getIdx());
|
|
matchedBond->setBondType(Bond::DATIVE);
|
|
} else if (matchedBond->getBondType() == Bond::DATIVER) {
|
|
matchedBond->setEndAtomIdx(atom2->getIdx());
|
|
matchedBond->setBondType(Bond::DATIVE);
|
|
} else {
|
|
matchedBond->setEndAtomIdx(atom2->getIdx());
|
|
}
|
|
swapBondDirIfNeeded(bond1, bond2);
|
|
delete bond2;
|
|
} else {
|
|
matchedBond = bond2;
|
|
if (matchedBond->getBondType() == Bond::DATIVEL) {
|
|
matchedBond->setBeginAtomIdx(atom1->getIdx());
|
|
matchedBond->setEndAtomIdx(atom2->getIdx());
|
|
matchedBond->setBondType(Bond::DATIVE);
|
|
} else if (matchedBond->getBondType() == Bond::DATIVER) {
|
|
matchedBond->setEndAtomIdx(atom1->getIdx());
|
|
matchedBond->setBondType(Bond::DATIVE);
|
|
} else {
|
|
matchedBond->setEndAtomIdx(atom1->getIdx());
|
|
}
|
|
swapBondDirIfNeeded(bond2, bond1);
|
|
delete bond1;
|
|
}
|
|
if (matchedBond->getBondType() == Bond::UNSPECIFIED &&
|
|
!matchedBond->hasQuery()) {
|
|
Bond::BondType bondT = GetUnspecifiedBondType(mol, atom1, atom2);
|
|
matchedBond->setBondType(bondT);
|
|
}
|
|
matchedBond->setOwningMol(mol);
|
|
if (matchedBond->getBondType() == Bond::AROMATIC) {
|
|
matchedBond->setIsAromatic(true);
|
|
}
|
|
|
|
// add the bond:
|
|
bondIdx = mol->addBond(matchedBond, true);
|
|
|
|
// we found a bond, so update the atom's _RingClosures
|
|
// property:
|
|
if (bondIdx > -1) {
|
|
CHECK_INVARIANT(
|
|
atom1->hasProp(common_properties::_RingClosures) &&
|
|
atom2->hasProp(common_properties::_RingClosures),
|
|
"somehow atom doesn't have _RingClosures property.");
|
|
INT_VECT closures;
|
|
atom1->getProp(common_properties::_RingClosures, closures);
|
|
auto closurePos = std::find(closures.begin(), closures.end(),
|
|
-(bookmark.first + 1));
|
|
CHECK_INVARIANT(closurePos != closures.end(),
|
|
"could not find bookmark in atom _RingClosures");
|
|
*closurePos = bondIdx - 1;
|
|
atom1->setProp(common_properties::_RingClosures, closures);
|
|
|
|
atom2->getProp(common_properties::_RingClosures, closures);
|
|
closurePos = std::find(closures.begin(), closures.end(),
|
|
-(bookmark.first + 1));
|
|
CHECK_INVARIANT(closurePos != closures.end(),
|
|
"could not find bookmark in atom _RingClosures");
|
|
*closurePos = bondIdx - 1;
|
|
atom2->setProp(common_properties::_RingClosures, closures);
|
|
}
|
|
bookmarkedAtomsToRemove.push_back(atom1);
|
|
bookmarkedAtomsToRemove.push_back(atom2);
|
|
}
|
|
}
|
|
int mark = bookmark.first;
|
|
++bookmarkIt;
|
|
for (const auto atom : bookmarkedAtomsToRemove) {
|
|
mol->clearAtomBookmark(mark, atom);
|
|
}
|
|
} else {
|
|
++bookmarkIt;
|
|
}
|
|
}
|
|
};
|
|
|
|
void CleanupAfterParsing(RWMol *mol) {
|
|
PRECONDITION(mol, "no molecule");
|
|
for (auto atom : mol->atoms()) {
|
|
atom->clearProp(common_properties::_RingClosures);
|
|
atom->clearProp(common_properties::_SmilesStart);
|
|
std::string label;
|
|
if (atom->getAtomicNum() == 0 &&
|
|
atom->getPropIfPresent(common_properties::atomLabel, label)) {
|
|
// marvinsketch can output higher labels than _AP1 and _AP2, but they
|
|
// aren't part of the MOL file spec so we don't treat them as attachment
|
|
// points
|
|
if (label == "_AP1") {
|
|
atom->setProp(common_properties::_fromAttachPoint, 1);
|
|
} else if (label == "_AP2") {
|
|
atom->setProp(common_properties::_fromAttachPoint, 2);
|
|
}
|
|
}
|
|
}
|
|
for (auto bond : mol->bonds()) {
|
|
bond->clearProp(common_properties::_unspecifiedOrder);
|
|
bond->clearProp("_cxsmilesBondIdx");
|
|
}
|
|
for (auto sg : RDKit::getSubstanceGroups(*mol)) {
|
|
sg.clearProp("_cxsmilesindex");
|
|
}
|
|
if (!Chirality::getAllowNontetrahedralChirality()) {
|
|
bool needWarn = false;
|
|
for (auto atom : mol->atoms()) {
|
|
if (atom->hasProp(common_properties::_chiralPermutation)) {
|
|
needWarn = true;
|
|
atom->clearProp(common_properties::_chiralPermutation);
|
|
}
|
|
if (atom->getChiralTag() > Atom::ChiralType::CHI_OTHER) {
|
|
needWarn = true;
|
|
atom->setChiralTag(Atom::ChiralType::CHI_UNSPECIFIED);
|
|
}
|
|
}
|
|
if (needWarn) {
|
|
BOOST_LOG(rdWarningLog)
|
|
<< "ignoring non-tetrahedral stereo specification since setAllowNontetrahedralChirality() is false."
|
|
<< std::endl;
|
|
}
|
|
}
|
|
}
|
|
|
|
RDKit::QueryBond *getUnspecifiedQueryBond(const RDKit::Atom *a1,
|
|
const RDKit::Atom *a2) {
|
|
PRECONDITION(a1, "bad atom pointer");
|
|
QueryBond *newB;
|
|
if (!a1->getIsAromatic() || (a2 && !a2->getIsAromatic())) {
|
|
newB = new QueryBond(Bond::SINGLE);
|
|
newB->setQuery(makeSingleOrAromaticBondQuery());
|
|
} else {
|
|
newB = new QueryBond(Bond::AROMATIC);
|
|
newB->setQuery(makeSingleOrAromaticBondQuery());
|
|
}
|
|
newB->setProp(RDKit::common_properties::_unspecifiedOrder, 1);
|
|
return newB;
|
|
}
|
|
|
|
namespace detail {
|
|
|
|
void printSyntaxErrorMessage(std::string_view input,
|
|
std::string_view err_message,
|
|
unsigned int bad_token_position,
|
|
std::string_view input_type) {
|
|
// NOTE: If the input is very long, the pointer to the failed location
|
|
// becomes less useful. We should truncate the length of the error message
|
|
// to 41 chars.
|
|
static constexpr unsigned int error_size{41};
|
|
static constexpr unsigned int prefix_size{error_size / 2};
|
|
static auto truncate_input = [=](const auto &input, const unsigned int pos) {
|
|
if ((pos >= prefix_size) && (pos + prefix_size) < input.size()) {
|
|
return input.substr(pos - prefix_size, error_size);
|
|
} else if (pos >= prefix_size) {
|
|
return input.substr(pos - prefix_size);
|
|
} else {
|
|
return input.substr(
|
|
0, std::min(input.size(), static_cast<size_t>(error_size)));
|
|
}
|
|
};
|
|
|
|
size_t num_dashes =
|
|
(bad_token_position >= prefix_size ? prefix_size
|
|
: bad_token_position - 1);
|
|
|
|
BOOST_LOG(rdErrorLog) << input_type << " Parse Error: " << err_message
|
|
<< " while parsing: " << input << std::endl;
|
|
BOOST_LOG(rdErrorLog)
|
|
<< input_type << " Parse Error: check for mistakes around position "
|
|
<< bad_token_position << ":" << std::endl;
|
|
BOOST_LOG(rdErrorLog) << truncate_input(input, bad_token_position - 1)
|
|
<< std::endl;
|
|
BOOST_LOG(rdErrorLog) << std::string(num_dashes, '~') << "^" << std::endl;
|
|
}
|
|
} // namespace detail
|
|
} // end of namespace SmilesParseOps
|