mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
684 lines
26 KiB
C++
684 lines
26 KiB
C++
//
|
|
// Copyright (C) 2001-2016 Greg Landrum and Rational Discovery LLC
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include <GraphMol/RDKitBase.h>
|
|
#include <GraphMol/RDKitQueries.h>
|
|
#include "SmilesParse.h"
|
|
#include "SmilesParseOps.h"
|
|
#include <list>
|
|
#include <algorithm>
|
|
#include <boost/dynamic_bitset.hpp>
|
|
#include <boost/foreach.hpp>
|
|
#include <boost/format.hpp>
|
|
#include <RDGeneral/RDLog.h>
|
|
|
|
namespace SmilesParseOps {
|
|
using namespace RDKit;
|
|
|
|
void ClearAtomChemicalProps(RDKit::Atom *atom) {
|
|
TEST_ASSERT(atom);
|
|
atom->setIsotope(0);
|
|
atom->setFormalCharge(0);
|
|
atom->setNumExplicitHs(0);
|
|
}
|
|
|
|
void CheckRingClosureBranchStatus(RDKit::Atom *atom, RDKit::RWMol *mp) {
|
|
// github #786 and #1652: if the ring closure comes after a branch,
|
|
// the stereochem is wrong.
|
|
// This function is called while closing a branch during construction of
|
|
// the molecule from SMILES and corrects for what happens when parsing odd
|
|
// (and arguably wrong) SMILES constructs like:
|
|
// 1) [C@@](F)1(C)CCO1
|
|
// 2) C1CN[C@](O)(N)1
|
|
// 3) [C@](Cl)(F)1CC[C@H](F)CC1
|
|
// In the first two cases the stereochemistry at the chiral atom
|
|
// needs to be reversed. In the third case the stereochemistry should be
|
|
// reversed when the Cl is added, but left alone when the F is added.
|
|
// We recognize these situations using the index of the chiral atom
|
|
// and the degree of that chiral atom at the time the ring closure
|
|
// digit is encountered during parsing.
|
|
// ----------
|
|
// github #1972 adds these examples:
|
|
// 1) [C@@]1(Cl)(F)I.Br1 (ok)
|
|
// 2) [C@@](Cl)1(F)I.Br1 (reverse)
|
|
// 3) [C@@](Cl)(F)1I.Br1 (ok)
|
|
// 4) [C@@](Cl)(F)(I)1.Br1 (reverse)
|
|
if (atom->getIdx() != mp->getNumAtoms(true) - 1 &&
|
|
(atom->getDegree() == 1 ||
|
|
(atom->getDegree() == 2 && atom->getIdx() != 0) ||
|
|
(atom->getDegree() == 3 && atom->getIdx() == 0)) &&
|
|
(atom->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW ||
|
|
atom->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW)) {
|
|
// std::cerr << "crcbs: " << atom->getIdx() << std::endl;
|
|
atom->invertChirality();
|
|
}
|
|
}
|
|
|
|
void ReportParseError(const char *message, bool throwIt) {
|
|
if (!throwIt) {
|
|
BOOST_LOG(rdErrorLog) << "SMILES Parse Error: " << message << std::endl;
|
|
} else {
|
|
throw SmilesParseException(message);
|
|
}
|
|
}
|
|
|
|
void CleanupAfterParseError(RWMol *mol) {
|
|
PRECONDITION(mol, "no molecule");
|
|
// blow out any partial bonds:
|
|
RWMol::BOND_BOOKMARK_MAP *marks = mol->getBondBookmarks();
|
|
auto markI = marks->begin();
|
|
while (markI != marks->end()) {
|
|
RWMol::BOND_PTR_LIST &bonds = markI->second;
|
|
for (auto &bond : bonds) {
|
|
delete bond;
|
|
}
|
|
++markI;
|
|
}
|
|
}
|
|
|
|
//
|
|
// set bondOrder to Bond::IONIC to skip the formation of a bond
|
|
// between the fragment and the molecule
|
|
//
|
|
void AddFragToMol(RWMol *mol, RWMol *frag, Bond::BondType bondOrder,
|
|
Bond::BondDir bondDir, bool closeRings, bool doingQuery) {
|
|
PRECONDITION(mol, "no molecule");
|
|
PRECONDITION(frag, "no fragment");
|
|
PRECONDITION(mol->getActiveAtom(), "no active atom");
|
|
Atom *lastAt = mol->getActiveAtom();
|
|
int nOrigAtoms = mol->getNumAtoms();
|
|
int nOrigBonds = mol->getNumBonds();
|
|
|
|
//
|
|
// close any rings we can in the fragment
|
|
//
|
|
if (closeRings) {
|
|
CloseMolRings(frag, true);
|
|
}
|
|
|
|
//
|
|
// Add the fragment's atoms and bonds to the molecule:
|
|
//
|
|
mol->insertMol(*frag);
|
|
|
|
//
|
|
// update ring-closure order information on the added atoms:
|
|
//
|
|
for (RWMol::AtomIterator atomIt = frag->beginAtoms();
|
|
atomIt != frag->endAtoms(); atomIt++) {
|
|
INT_VECT tmpVect;
|
|
if ((*atomIt)->getPropIfPresent(common_properties::_RingClosures,
|
|
tmpVect)) {
|
|
BOOST_FOREACH (int &v, tmpVect) {
|
|
// if the ring closure is not already a bond, don't touch it:
|
|
if (v >= 0) v += nOrigBonds;
|
|
}
|
|
Atom *newAtom = mol->getAtomWithIdx(nOrigAtoms + (*atomIt)->getIdx());
|
|
newAtom->setProp(common_properties::_RingClosures, tmpVect);
|
|
}
|
|
}
|
|
|
|
//
|
|
// ses up the bond between the mol and the branch
|
|
//
|
|
if (bondOrder != Bond::IONIC) {
|
|
// FIX: this is not so much with the elegance...
|
|
Atom *firstAt = mol->getAtomWithIdx(nOrigAtoms);
|
|
Bond::BondType bo;
|
|
int atomIdx1, atomIdx2;
|
|
atomIdx1 = firstAt->getIdx();
|
|
atomIdx2 = lastAt->getIdx();
|
|
if (frag->hasBondBookmark(ci_LEADING_BOND)) {
|
|
// std::cout << "found it" << std::endl;
|
|
const ROMol::BOND_PTR_LIST &leadingBonds =
|
|
frag->getAllBondsWithBookmark(ci_LEADING_BOND);
|
|
BOOST_FOREACH (Bond *leadingBond, leadingBonds) {
|
|
// we've already got a bond, so just set its local info
|
|
// and then add it to the molecule intact (no sense doing
|
|
// any extra work).
|
|
leadingBond->setOwningMol(mol);
|
|
leadingBond->setEndAtomIdx(leadingBond->getBeginAtomIdx() + nOrigAtoms);
|
|
leadingBond->setBeginAtomIdx(atomIdx2);
|
|
mol->addBond(leadingBond, true);
|
|
}
|
|
mol->clearBondBookmark(ci_LEADING_BOND);
|
|
} else {
|
|
if (!doingQuery) {
|
|
if (bondOrder == Bond::UNSPECIFIED) {
|
|
// no bond order provided, figure it out ourselves
|
|
if (lastAt->getIsAromatic() && firstAt->getIsAromatic()) {
|
|
bo = Bond::AROMATIC;
|
|
} else {
|
|
bo = Bond::SINGLE;
|
|
}
|
|
} else {
|
|
bo = bondOrder;
|
|
}
|
|
if (bo == Bond::DATIVEL) {
|
|
int tmp = atomIdx2;
|
|
atomIdx2 = atomIdx1;
|
|
atomIdx1 = tmp;
|
|
bo = Bond::DATIVE;
|
|
} else if (bo == Bond::DATIVER) {
|
|
bo = Bond::DATIVE;
|
|
}
|
|
int idx = mol->addBond(atomIdx2, atomIdx1, bo) - 1;
|
|
mol->getBondWithIdx(idx)->setBondDir(bondDir);
|
|
} else {
|
|
// semantics are different in SMARTS, unspecified bonds can be single or
|
|
// aromatic:
|
|
if (bondOrder == Bond::UNSPECIFIED) {
|
|
auto *newB = new QueryBond(Bond::SINGLE);
|
|
newB->expandQuery(makeBondOrderEqualsQuery(Bond::AROMATIC),
|
|
Queries::COMPOSITE_OR, true);
|
|
newB->setOwningMol(mol);
|
|
newB->setBeginAtomIdx(atomIdx1);
|
|
newB->setEndAtomIdx(atomIdx2);
|
|
mol->addBond(newB);
|
|
delete newB;
|
|
} else {
|
|
bo = bondOrder;
|
|
if (bo == Bond::DATIVEL) {
|
|
int tmp = atomIdx2;
|
|
atomIdx2 = atomIdx1;
|
|
atomIdx1 = tmp;
|
|
bo = Bond::DATIVE;
|
|
} else if (bo == Bond::DATIVER) {
|
|
bo = Bond::DATIVE;
|
|
}
|
|
int idx = mol->addBond(atomIdx2, atomIdx1, bo) - 1;
|
|
mol->getBondWithIdx(idx)->setBondDir(bondDir);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// okay, the next thing we have to worry about is the possibility
|
|
// that there might be ring opening/closing in the fragment we just
|
|
// dealt with e.g. for things like C1C(C1) and C1C.C1
|
|
// We deal with this by copying in the bookmarks and partial bonds
|
|
// that exist in the fragment
|
|
//
|
|
RWMol::ATOM_BOOKMARK_MAP::iterator atIt;
|
|
for (atIt = frag->getAtomBookmarks()->begin();
|
|
atIt != frag->getAtomBookmarks()->end(); ++atIt) {
|
|
// don't bother even considering bookmarks outside
|
|
// the range used for loops
|
|
if (atIt->first < 100 && atIt->first > 0) {
|
|
RWMol::ATOM_PTR_LIST::iterator otherAt;
|
|
for (otherAt = atIt->second.begin(); otherAt != atIt->second.end();
|
|
otherAt++) {
|
|
Atom *at2 = *otherAt;
|
|
int newIdx = at2->getIdx() + nOrigAtoms;
|
|
mol->setAtomBookmark(mol->getAtomWithIdx(newIdx), atIt->first);
|
|
// frag->clearAtomBookmark(atIt->first,at2);
|
|
while (frag->hasBondBookmark(atIt->first)) {
|
|
Bond *b = frag->getBondWithBookmark(atIt->first);
|
|
int atomIdx1 = b->getBeginAtomIdx() + nOrigAtoms;
|
|
b->setOwningMol(mol);
|
|
b->setBeginAtomIdx(atomIdx1);
|
|
mol->setBondBookmark(b, atIt->first);
|
|
frag->clearBondBookmark(atIt->first, b);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
frag->clearAllAtomBookmarks();
|
|
frag->clearAllBondBookmarks();
|
|
};
|
|
|
|
void _invChiralRingAtomWithHs(Atom *atom) {
|
|
PRECONDITION(atom, "bad atom");
|
|
// we will assume that this function is called on a ring atom with a
|
|
// ring closure bond
|
|
if (atom->getNumExplicitHs() == 1) {
|
|
atom->invertChirality();
|
|
}
|
|
}
|
|
typedef std::pair<size_t, size_t> SIZET_PAIR;
|
|
typedef std::pair<int, int> INT_PAIR;
|
|
template <typename T>
|
|
bool operator<(const std::pair<T, T> &p1, const std::pair<T, T> &p2) {
|
|
return p1.first < p2.first;
|
|
}
|
|
namespace {
|
|
bool isUnsaturated(const Atom *atom, const RWMol *mol) {
|
|
ROMol::OEDGE_ITER beg, end;
|
|
boost::tie(beg, end) = mol->getAtomBonds(atom);
|
|
while (beg != end) {
|
|
const Bond *bond = (*mol)[*beg];
|
|
++beg;
|
|
if (bond->getBondType() != Bond::SINGLE) return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool hasSingleHQuery(const Atom::QUERYATOM_QUERY *q) {
|
|
// list queries are series of nested ors of AtomAtomicNum queries
|
|
PRECONDITION(q, "bad query");
|
|
bool res = false;
|
|
std::string descr = q->getDescription();
|
|
if (descr == "AtomAnd") {
|
|
for (auto cIt = q->beginChildren(); cIt != q->endChildren(); ++cIt) {
|
|
std::string descr = (*cIt)->getDescription();
|
|
if (descr == "AtomHCount") {
|
|
if (!(*cIt)->getNegation() &&
|
|
((ATOM_EQUALS_QUERY *)(*cIt).get())->getVal() == 1) {
|
|
return true;
|
|
}
|
|
return false;
|
|
} else if (descr == "AtomAnd") {
|
|
res = hasSingleHQuery((*cIt).get());
|
|
if (res) return true;
|
|
}
|
|
}
|
|
}
|
|
return res;
|
|
}
|
|
|
|
bool atomHasExplicitHs(const Atom *atom) {
|
|
if (atom->getNumExplicitHs()) return true;
|
|
if (atom->hasQuery()) {
|
|
// the SMARTS [C@@H] produces an atom with a H query, but we also
|
|
// need to treat this like an explicit H for chirality purposes
|
|
// This was Github #1489
|
|
bool res = hasSingleHQuery(atom->getQuery());
|
|
return res;
|
|
}
|
|
return false;
|
|
}
|
|
} // end of anonymous namespace
|
|
void AdjustAtomChiralityFlags(RWMol *mol) {
|
|
PRECONDITION(mol, "no molecule");
|
|
for (RWMol::AtomIterator atomIt = mol->beginAtoms();
|
|
atomIt != mol->endAtoms(); ++atomIt) {
|
|
Atom::ChiralType chiralType = (*atomIt)->getChiralTag();
|
|
if (chiralType == Atom::CHI_TETRAHEDRAL_CW ||
|
|
chiralType == Atom::CHI_TETRAHEDRAL_CCW) {
|
|
//
|
|
// The atom is marked as chiral, set the SMILES-order of the
|
|
// atom's bonds. This is easy for non-ring-closure bonds,
|
|
// because the SMILES order is determined solely by the atom
|
|
// indices. Things are trickier for ring-closure bonds, which we
|
|
// need to insert into the list in a particular order
|
|
//
|
|
INT_VECT ringClosures;
|
|
(*atomIt)->getPropIfPresent(common_properties::_RingClosures,
|
|
ringClosures);
|
|
|
|
#if 0
|
|
std::cerr << "CLOSURES: ";
|
|
std::copy(ringClosures.begin(), ringClosures.end(),
|
|
std::ostream_iterator<int>(std::cerr, " "));
|
|
std::cerr << std::endl;
|
|
#endif
|
|
std::list<SIZET_PAIR> neighbors;
|
|
// push this atom onto the list of neighbors (we'll use this
|
|
// to find our place later):
|
|
neighbors.push_back(std::make_pair((*atomIt)->getIdx(), -1));
|
|
std::list<size_t> bondOrder;
|
|
RWMol::ADJ_ITER nbrIdx, endNbrs;
|
|
boost::tie(nbrIdx, endNbrs) = mol->getAtomNeighbors(*atomIt);
|
|
while (nbrIdx != endNbrs) {
|
|
Bond *nbrBond = mol->getBondBetweenAtoms((*atomIt)->getIdx(), *nbrIdx);
|
|
if (std::find(ringClosures.begin(), ringClosures.end(),
|
|
static_cast<int>(nbrBond->getIdx())) ==
|
|
ringClosures.end()) {
|
|
neighbors.push_back(std::make_pair(*nbrIdx, nbrBond->getIdx()));
|
|
}
|
|
++nbrIdx;
|
|
}
|
|
// sort the list of non-ring-closure bonds:
|
|
neighbors.sort();
|
|
|
|
// find the location of this atom. it pretty much has to be
|
|
// first in the list, e.g for smiles like [C@](F)(Cl)(Br)I, or
|
|
// second (everything else).
|
|
auto selfPos = neighbors.begin();
|
|
if (selfPos->first != (*atomIt)->getIdx()) {
|
|
++selfPos;
|
|
}
|
|
CHECK_INVARIANT(selfPos->first == (*atomIt)->getIdx(),
|
|
"weird atom ordering");
|
|
|
|
// copy over the bond ids:
|
|
INT_LIST bondOrdering;
|
|
for (auto neighborIt = neighbors.begin(); neighborIt != neighbors.end();
|
|
++neighborIt) {
|
|
if (neighborIt != selfPos) {
|
|
bondOrdering.push_back(rdcast<int>(neighborIt->second));
|
|
} else {
|
|
// we are not going to add the atom itself, but we will push on
|
|
// ring closure bonds at this point (if required):
|
|
BOOST_FOREACH (int closure, ringClosures) {
|
|
bondOrdering.push_back(closure);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ok, we now have the SMILES ordering of the bonds, figure out the
|
|
// permutation order.
|
|
//
|
|
// This whole thing is necessary because the ring-closure bonds
|
|
// in the SMILES come before the bonds to the other neighbors, but
|
|
// they come after the neighbors in the molecule we build.
|
|
// A crude example:
|
|
// in F[C@](Cl)(Br)I the C-Cl bond is index 1 in both SMILES
|
|
// and as built
|
|
// in F[C@]1(Br)I.Cl1 the C-Cl bond is index 1 in the SMILES
|
|
// and index 3 as built.
|
|
//
|
|
int nSwaps = (*atomIt)->getPerturbationOrder(bondOrdering);
|
|
// FIX: explain this one:
|
|
// At least part of what's going on here for degree 3 atoms:
|
|
// - The first part: if we're at the beginning of the SMILES and have
|
|
// an explicit H, we need to add a swap.
|
|
// This is to reflect that [C@](Cl)(F)C is equivalent to Cl[C@@](F)C
|
|
// but [C@H](Cl)(F)C is fine as-is (The H-C bond is the one you look
|
|
// down).
|
|
// - The second part is more complicated and deals with situations like
|
|
// F[C@]1CCO1. In this case we otherwise end up looking like we need
|
|
// to invert the chirality, which is bogus. The chirality here needs
|
|
// to remain @ just as it does in F[C@](Cl)CCO1
|
|
// - We have to be careful with the second part to not sweep things like
|
|
// C[S@]2(=O).Cl2 into the same bin (was github #760). We detect
|
|
// those cases by looking for unsaturated atoms
|
|
//
|
|
if ((*atomIt)->getDegree() == 3 &&
|
|
(((*atomIt)->getNumExplicitHs() &&
|
|
(*atomIt)->hasProp(common_properties::_SmilesStart)) ||
|
|
(!atomHasExplicitHs(*atomIt) && ringClosures.size() == 1 &&
|
|
!isUnsaturated(*atomIt, mol)))) {
|
|
++nSwaps;
|
|
}
|
|
// std::cerr << "nswaps " << (*atomIt)->getIdx() << " " << nSwaps
|
|
// << std::endl;
|
|
// std::copy(bondOrdering.begin(), bondOrdering.end(),
|
|
// std::ostream_iterator<int>(std::cerr, ", "));
|
|
// std::cerr << std::endl;
|
|
if (nSwaps % 2) {
|
|
(*atomIt)->invertChirality();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Bond::BondType GetUnspecifiedBondType(const RWMol *mol, const Atom *atom1,
|
|
const Atom *atom2) {
|
|
PRECONDITION(mol, "no molecule");
|
|
PRECONDITION(atom1, "no atom1");
|
|
PRECONDITION(atom2, "no atom2");
|
|
Bond::BondType res;
|
|
if (atom1->getIsAromatic() && atom2->getIsAromatic()) {
|
|
res = Bond::AROMATIC;
|
|
} else {
|
|
res = Bond::SINGLE;
|
|
}
|
|
return res;
|
|
}
|
|
void SetUnspecifiedBondTypes(RWMol *mol) {
|
|
PRECONDITION(mol, "no molecule");
|
|
for (auto bond : mol->bonds()) {
|
|
if (bond->hasProp(RDKit::common_properties::_unspecifiedOrder)) {
|
|
bond->setBondType(GetUnspecifiedBondType(mol, bond->getBeginAtom(),
|
|
bond->getEndAtom()));
|
|
if (bond->getBondType() == Bond::AROMATIC) {
|
|
bond->setIsAromatic(true);
|
|
} else {
|
|
bond->setIsAromatic(false);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
namespace {
|
|
void swapBondDirIfNeeded(Bond *bond1, const Bond *bond2) {
|
|
if (bond1->getBondDir() == Bond::NONE && bond2->getBondDir() != Bond::NONE) {
|
|
bond1->setBondDir(bond2->getBondDir());
|
|
if (bond1->getBeginAtom() != bond2->getBeginAtom()) {
|
|
switch (bond1->getBondDir()) {
|
|
case Bond::ENDDOWNRIGHT:
|
|
bond1->setBondDir(Bond::ENDUPRIGHT);
|
|
break;
|
|
case Bond::ENDUPRIGHT:
|
|
bond1->setBondDir(Bond::ENDDOWNRIGHT);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} // namespace
|
|
|
|
void CloseMolRings(RWMol *mol, bool toleratePartials) {
|
|
// Here's what we want to do here:
|
|
// loop through the molecule's atom bookmarks
|
|
// for each bookmark:
|
|
// connect pairs of atoms sharing that bookmark
|
|
// left to right (in the order in which they were
|
|
// inserted into the molecule).
|
|
// whilst doing this, we have to be cognizant of the fact that
|
|
// there may well be partial bonds in the molecule which need
|
|
// to be tied in as well. WOO HOO! IT'S A BIG MESS!
|
|
PRECONDITION(mol, "no molecule");
|
|
RWMol::ATOM_BOOKMARK_MAP::iterator bookmarkIt;
|
|
bookmarkIt = mol->getAtomBookmarks()->begin();
|
|
while (bookmarkIt != mol->getAtomBookmarks()->end()) {
|
|
// don't bother even considering bookmarks outside
|
|
// the range used for loops
|
|
if (bookmarkIt->first < 100000 && bookmarkIt->first >= 0) {
|
|
RWMol::ATOM_PTR_LIST::iterator atomIt, atomsEnd;
|
|
RWMol::ATOM_PTR_LIST bookmarkedAtomsToRemove;
|
|
atomIt = bookmarkIt->second.begin();
|
|
atomsEnd = bookmarkIt->second.end();
|
|
while (atomIt != atomsEnd) {
|
|
Atom *atom1 = *atomIt;
|
|
++atomIt;
|
|
if (!toleratePartials && atomIt == atomsEnd) {
|
|
ReportParseError("unclosed ring");
|
|
} else if (atomIt != atomsEnd && *atomIt == atom1) {
|
|
// make sure we don't try to connect an atom to itself
|
|
// this was github #1925
|
|
auto fmt =
|
|
boost::format{
|
|
"duplicated ring closure %1% bonds atom %2% to itself"} %
|
|
bookmarkIt->first % atom1->getIdx();
|
|
std::string msg = fmt.str();
|
|
ReportParseError(msg.c_str(), true);
|
|
} else if (atomIt != atomsEnd) {
|
|
// we actually found an atom, so connect it to the first
|
|
Atom *atom2 = *atomIt;
|
|
++atomIt;
|
|
|
|
int bondIdx = -1;
|
|
// We're guaranteed two partial bonds, one for each time
|
|
// the ring index was used. We give the first specification
|
|
// priority.
|
|
CHECK_INVARIANT(mol->hasBondBookmark(bookmarkIt->first),
|
|
"Missing bond bookmark");
|
|
|
|
// now use the info from the partial bond:
|
|
// The partial bond itself will have a proper order and
|
|
// directionality (with a minor caveat documented below) and will
|
|
// have its beginning atom set already:
|
|
RWMol::BOND_PTR_LIST bonds =
|
|
mol->getAllBondsWithBookmark(bookmarkIt->first);
|
|
auto bondIt = bonds.begin();
|
|
CHECK_INVARIANT(bonds.size() >= 2, "Missing bond");
|
|
|
|
// get pointers to the two bonds:
|
|
Bond *bond1 = *bondIt;
|
|
++bondIt;
|
|
Bond *bond2 = *bondIt;
|
|
|
|
// remove those bonds from the bookmarks:
|
|
mol->clearBondBookmark(bookmarkIt->first, bond1);
|
|
mol->clearBondBookmark(bookmarkIt->first, bond2);
|
|
|
|
// Make sure the bonds have the correct starting atoms:
|
|
CHECK_INVARIANT(bond1->getBeginAtomIdx() == atom1->getIdx(),
|
|
"bad begin atom");
|
|
CHECK_INVARIANT(bond2->getBeginAtomIdx() == atom2->getIdx(),
|
|
"bad begin atom");
|
|
|
|
Bond *matchedBond;
|
|
|
|
// figure out which (if either) bond has a specified type, we'll
|
|
// keep that one. We also need to update the end atom index to
|
|
// match FIX: daylight barfs when you give it multiple specs for the
|
|
// closure
|
|
// bond, we'll just take the first one and ignore others
|
|
// NOTE: we used to do this the other way (take the last
|
|
// specification),
|
|
// but that turned out to be troublesome in odd cases like
|
|
// C1CC11CC1.
|
|
// std::cerr << ">-------------" << std::endl;
|
|
// std::cerr << atom1->getIdx() << "-" << atom2->getIdx() << ": "
|
|
// << bond1->getBondType() << "("
|
|
// << bond1->hasProp(common_properties::_unspecifiedOrder)
|
|
// << "):" << bond1->getBondDir() << " "
|
|
// << bond2->getBondType() << "("
|
|
// << bond2->hasProp(common_properties::_unspecifiedOrder)
|
|
// << "):" << bond2->getBondDir() << std::endl;
|
|
if (!bond1->hasProp(common_properties::_unspecifiedOrder)) {
|
|
matchedBond = bond1;
|
|
if (matchedBond->getBondType() == Bond::DATIVEL) {
|
|
matchedBond->setBeginAtomIdx(atom2->getIdx());
|
|
matchedBond->setEndAtomIdx(atom1->getIdx());
|
|
matchedBond->setBondType(Bond::DATIVE);
|
|
} else if (matchedBond->getBondType() == Bond::DATIVER) {
|
|
matchedBond->setEndAtomIdx(atom2->getIdx());
|
|
matchedBond->setBondType(Bond::DATIVE);
|
|
} else {
|
|
matchedBond->setEndAtomIdx(atom2->getIdx());
|
|
}
|
|
swapBondDirIfNeeded(bond1, bond2);
|
|
delete bond2;
|
|
} else {
|
|
matchedBond = bond2;
|
|
if (matchedBond->getBondType() == Bond::DATIVEL) {
|
|
matchedBond->setBeginAtomIdx(atom1->getIdx());
|
|
matchedBond->setEndAtomIdx(atom2->getIdx());
|
|
matchedBond->setBondType(Bond::DATIVE);
|
|
} else if (matchedBond->getBondType() == Bond::DATIVER) {
|
|
matchedBond->setEndAtomIdx(atom1->getIdx());
|
|
matchedBond->setBondType(Bond::DATIVE);
|
|
} else {
|
|
matchedBond->setEndAtomIdx(atom1->getIdx());
|
|
}
|
|
swapBondDirIfNeeded(bond2, bond1);
|
|
delete bond1;
|
|
}
|
|
if (matchedBond->getBondType() == Bond::UNSPECIFIED &&
|
|
!matchedBond->hasQuery()) {
|
|
Bond::BondType bondT = GetUnspecifiedBondType(mol, atom1, atom2);
|
|
matchedBond->setBondType(bondT);
|
|
}
|
|
matchedBond->setOwningMol(mol);
|
|
if (matchedBond->getBondType() == Bond::AROMATIC) {
|
|
matchedBond->setIsAromatic(true);
|
|
}
|
|
|
|
// std::cerr << " " <<
|
|
// matchedBond->getBeginAtomIdx()<<"-"<<matchedBond->getEndAtomIdx()<<":
|
|
// "<<matchedBond->getBondType() << " a:
|
|
// "<<matchedBond->getIsAromatic()<<std::endl; std::cerr <<
|
|
// "<-------------" << std::endl;
|
|
#if 0
|
|
//
|
|
// In cases like this: Cl\C=C1.F/1, we need to
|
|
// reverse the directionality on the added bond
|
|
// (because the bond is added from C -> F, but the
|
|
// directionality is for F->C. We recognize these
|
|
// cases because the matched bond direction isn't
|
|
// the same as the added bond direction (i.e. atom1
|
|
// isn't the begin atom for the matched bond).
|
|
//
|
|
// This was Issue 175
|
|
if(atom1->getIdx()!=matchedBond->getBeginAtomIdx()){
|
|
switch(matchedBond->getBondDir()){
|
|
case Bond::ENDUPRIGHT:
|
|
matchedBond->setBondDir(Bond::ENDDOWNRIGHT);
|
|
break;
|
|
case Bond::ENDDOWNRIGHT:
|
|
matchedBond->setBondDir(Bond::ENDUPRIGHT);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
#endif
|
|
// add the bond:
|
|
bondIdx = mol->addBond(matchedBond, true);
|
|
|
|
// we found a bond, so update the atom's _RingClosures
|
|
// property:
|
|
if (bondIdx > -1) {
|
|
CHECK_INVARIANT(
|
|
atom1->hasProp(common_properties::_RingClosures) &&
|
|
atom2->hasProp(common_properties::_RingClosures),
|
|
"somehow atom doesn't have _RingClosures property.");
|
|
INT_VECT closures;
|
|
atom1->getProp(common_properties::_RingClosures, closures);
|
|
auto closurePos = std::find(closures.begin(), closures.end(),
|
|
-(bookmarkIt->first + 1));
|
|
CHECK_INVARIANT(closurePos != closures.end(),
|
|
"could not find bookmark in atom _RingClosures");
|
|
*closurePos = bondIdx - 1;
|
|
atom1->setProp(common_properties::_RingClosures, closures);
|
|
|
|
atom2->getProp(common_properties::_RingClosures, closures);
|
|
closurePos = std::find(closures.begin(), closures.end(),
|
|
-(bookmarkIt->first + 1));
|
|
CHECK_INVARIANT(closurePos != closures.end(),
|
|
"could not find bookmark in atom _RingClosures");
|
|
*closurePos = bondIdx - 1;
|
|
atom2->setProp(common_properties::_RingClosures, closures);
|
|
}
|
|
bookmarkedAtomsToRemove.push_back(atom1);
|
|
bookmarkedAtomsToRemove.push_back(atom2);
|
|
}
|
|
}
|
|
//
|
|
// increment the bookmark before calling erase. Otherwise we
|
|
// get a seg fault under MSVC++
|
|
//
|
|
int mark = bookmarkIt->first;
|
|
bookmarkIt++;
|
|
RWMol::ATOM_PTR_LIST::const_iterator aplci;
|
|
BOOST_FOREACH (Atom *atom, bookmarkedAtomsToRemove) {
|
|
mol->clearAtomBookmark(mark, atom);
|
|
}
|
|
} else {
|
|
++bookmarkIt;
|
|
}
|
|
}
|
|
};
|
|
|
|
void CleanupAfterParsing(RWMol *mol) {
|
|
PRECONDITION(mol, "no molecule");
|
|
for (RWMol::AtomIterator atomIt = mol->beginAtoms();
|
|
atomIt != mol->endAtoms(); ++atomIt) {
|
|
if ((*atomIt)->hasProp(common_properties::_RingClosures))
|
|
(*atomIt)->clearProp(common_properties::_RingClosures);
|
|
if ((*atomIt)->hasProp(common_properties::_SmilesStart))
|
|
(*atomIt)->clearProp(common_properties::_SmilesStart);
|
|
}
|
|
for (RWMol::BondIterator bondIt = mol->beginBonds();
|
|
bondIt != mol->endBonds(); ++bondIt) {
|
|
if ((*bondIt)->hasProp(common_properties::_unspecifiedOrder))
|
|
(*bondIt)->clearProp(common_properties::_unspecifiedOrder);
|
|
}
|
|
}
|
|
|
|
} // end of namespace SmilesParseOps
|