Files
rdkit/Code/GraphMol/MolOps.cpp
2025-05-27 16:43:20 +02:00

1342 lines
44 KiB
C++

//
// Copyright (C) 2001-2023 Greg Landrum and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <GraphMol/GraphMol.h>
#include <GraphMol/MolOps.h>
#include <GraphMol/Atom.h>
#include <GraphMol/AtomIterators.h>
#include <GraphMol/BondIterators.h>
#include <GraphMol/PeriodicTable.h>
#include <GraphMol/Chirality.h>
#include <GraphMol/RDKitQueries.h>
#include <vector>
#include <algorithm>
#include <RDGeneral/BoostStartInclude.h>
#include <boost/graph/connected_components.hpp>
#include <boost/graph/kruskal_min_spanning_tree.hpp>
#include <boost/graph/johnson_all_pairs_shortest.hpp>
#include <boost/version.hpp>
#if BOOST_VERSION >= 104000
#include <boost/property_map/property_map.hpp>
#else
#include <boost/property_map.hpp>
#endif
#include <RDGeneral/BoostEndInclude.h>
#include <boost/config.hpp>
#include <boost/graph/adjacency_list.hpp>
#include <boost/tokenizer.hpp>
#include <Geometry/point.h>
#include <GraphMol/QueryOps.h>
#include <GraphMol/ROMol.h>
#include <GraphMol/new_canon.h>
#include <GraphMol/FileParsers/MolSGroupParsing.h>
const int ci_LOCAL_INF = static_cast<int>(1e8);
namespace RDKit {
namespace MolOps {
namespace {
void nitrogenCleanup(RWMol &mol, Atom *atom) {
// conversions here:
// - neutral 5 coordinate Ns with double bonds to Os to the
// zwitterionic form. e.g.:
// CN(=O)=O -> C[N+](=O)[O-]
// and:
// C1=CC=CN(=O)=C1 -> C1=CC=C[N+]([O-])=C1
// - neutral 5 coordinate Ns with triple bonds to Ns to the
// zwitterionic form. e.g.:
// C-N=N#N -> C-N=[N+]=[N-]
PRECONDITION(atom, "bad atom");
bool aromHolder;
// we only want to do neutrals so that things like this don't get
// munged:
// O=[n+]1occcc1
// this was sf.net issue 1811276
if (atom->getFormalCharge()) {
return;
}
// we need to play this little aromaticity game because the
// explicit valence code modifies its results for aromatic
// atoms.
aromHolder = atom->getIsAromatic();
atom->setIsAromatic(0);
// NOTE that we are calling calcExplicitValence() here, we do
// this because we cannot be sure that it has already been
// called on the atom (cleanUp() gets called pretty early in
// the sanitization process):
if (atom->calcExplicitValence(false) == 5) {
unsigned int aid = atom->getIdx();
for (const auto nbr : mol.atomNeighbors(atom)) {
if ((nbr->getAtomicNum() == 8) && (nbr->getFormalCharge() == 0) &&
(mol.getBondBetweenAtoms(aid, nbr->getIdx())->getBondType() ==
Bond::DOUBLE)) {
// here's the double bonded oxygen
auto b = mol.getBondBetweenAtoms(aid, nbr->getIdx());
b->setBondType(Bond::SINGLE);
atom->setFormalCharge(1);
nbr->setFormalCharge(-1);
break;
} else if ((nbr->getAtomicNum() == 7) && (nbr->getFormalCharge() == 0) &&
(mol.getBondBetweenAtoms(aid, nbr->getIdx())->getBondType() ==
Bond::TRIPLE)) {
// here's the triple bonded nitrogen
auto b = mol.getBondBetweenAtoms(aid, nbr->getIdx());
b->setBondType(Bond::DOUBLE);
atom->setFormalCharge(1);
nbr->setFormalCharge(-1);
break;
}
} // end of loop over the first neigh
} // if this atom is 5 coordinate nitrogen
// force a recalculation of the explicit valence here
atom->setIsAromatic(aromHolder);
atom->calcExplicitValence(false);
}
void phosphorusCleanup(RWMol &mol, Atom *atom) {
// conversions here:
// - neutral 5 coordinate Ps with one double bonds to an Os
// and one to a C or N to the zwitterionic form. e.g.:
// C=P(=O)X -> C=[P+]([O-])X
PRECONDITION(atom, "bad atom");
// we only want to do neutrals
if (atom->getFormalCharge()) {
return;
}
// NOTE that we are calling calcExplicitValence() here, we do
// this because we cannot be sure that it has already been
// called on the atom (cleanUp() gets called pretty early in
// the sanitization process):
if (atom->calcExplicitValence(false) == 5 && atom->getDegree() == 3) {
unsigned int aid = atom->getIdx();
Bond *dbl_to_O = nullptr;
Atom *O_atom = nullptr;
bool hasDoubleToCorN = false;
for (const auto nbr : mol.atomNeighbors(atom)) {
if ((nbr->getAtomicNum() == 8) && (nbr->getFormalCharge() == 0) &&
(mol.getBondBetweenAtoms(aid, nbr->getIdx())->getBondType() ==
Bond::DOUBLE)) {
// here's the double bonded oxygen
dbl_to_O = mol.getBondBetweenAtoms(aid, nbr->getIdx());
O_atom = nbr;
} else if ((nbr->getAtomicNum() == 6 || nbr->getAtomicNum() == 7) &&
(nbr->getDegree() >= 2) &&
(mol.getBondBetweenAtoms(aid, nbr->getIdx())->getBondType() ==
Bond::DOUBLE)) {
hasDoubleToCorN = true;
}
} // end of loop over the first neigh
if (hasDoubleToCorN && dbl_to_O != nullptr) {
TEST_ASSERT(O_atom != nullptr);
O_atom->setFormalCharge(-1);
dbl_to_O->setBondType(Bond::SINGLE);
atom->setFormalCharge(1);
}
}
// force a recalculation of the explicit valence here
atom->calcExplicitValence(false);
}
void halogenCleanup(RWMol &mol, Atom *atom) {
PRECONDITION(atom, "bad atom");
// Conversions done:
// X(=O)(=O)(=O)O -> [X+3]([O-])([O-])([O-])O
// X(=O)(=O)O -> [X+2]([O-])([O-])O
// X(=O)O -> [X+]([O-])O
int ev = atom->calcExplicitValence(false);
if (atom->getFormalCharge() == 0 && (ev == 7 || ev == 5 || ev == 3)) {
bool neighborsAllO = true;
for (const auto nbr : mol.atomNeighbors(atom)) {
if (nbr->getAtomicNum() != 8) {
neighborsAllO = false;
break;
}
}
if (neighborsAllO) {
int formalCharge = 0;
for (auto bond : mol.atomBonds(atom)) {
if (bond->getBondType() == Bond::DOUBLE) {
bond->setBondType(Bond::SINGLE);
auto otherAtom = bond->getOtherAtom(atom);
formalCharge++;
otherAtom->setFormalCharge(-1);
otherAtom->calcExplicitValence(false);
}
}
atom->setFormalCharge(formalCharge);
atom->calcExplicitValence(false);
}
}
}
bool isHypervalentNonMetal(Atom *atom) {
if (QueryOps::isMetal(*atom)) {
return false;
}
atom->updatePropertyCache(false);
int ev = atom->getValence(Atom::ValenceType::EXPLICIT);
// Check the explicit valence of the non-metal against the allowed
// valences of the atom, adjusted by its formal charge. This means that
// N+ is treated the same as C, O+ the same as N. This allows for,
// for example, c1cccc[n+]1-[Fe] to be acceptable and not turned into
// c1cccc[n+]1->[Fe]. After all, c1cccc[n+]1-C is ok. Although this is
// a poor example because c1ccccn1->[Fe] appears to be the normal
// way that pyridine complexes with transition metals. Heme b in
// CHEBI:26355 is an example of when this is required.
int effAtomicNum = atom->getAtomicNum() - atom->getFormalCharge();
if (effAtomicNum <= 0) {
return false;
}
// atom is a non-metal. If its explicit valence is greater than the
// maximum allowed valence then it is hypervalent.
// We have a special case in here for aromatic atoms where the explicit
// valence matches the max allowed and the degree is 4. This is there for
// cases like cyclopentadienyl - metal systems. We need this special case
// because the explicit valence on the C atoms there ends up being 4
const auto &otherValens =
PeriodicTable::getTable()->getValenceList(effAtomicNum);
auto maxV = otherValens.back();
if (maxV > 0 && (ev > maxV || (ev == maxV && atom->getIsAromatic() &&
atom->getTotalDegree() == 4))) {
return true;
}
return false;
}
int numDativeBonds(const Atom *atom) {
int numDatives = 0;
auto &mol = atom->getOwningMol();
for (auto bond : mol.atomBonds(atom)) {
if (bond->getBondType() == Bond::BondType::DATIVE ||
bond->getBondType() == Bond::BondType::DATIVEONE ||
bond->getBondType() == Bond::BondType::DATIVEL ||
bond->getBondType() == Bond::BondType::DATIVER) {
++numDatives;
}
}
return numDatives;
}
// Returns true if the atom shouldn't do dative bonds.
bool noDative(const Atom *a) {
static const std::set<int> noD{1, 2, 9, 10};
return (noD.find(a->getAtomicNum()) != noD.end());
};
void metalBondCleanup(RWMol &mol, Atom *atom,
const std::vector<unsigned int> &ranks) {
PRECONDITION(atom, "bad atom in metalBondCleanup");
// The IUPAC recommendation for ligand->metal coordination bonds is that they
// be single. This upsets the RDKit valence model, as seen in CHEBI:26355,
// heme b. If the valence of a non-metal atom is above the maximum in the
// RDKit model, and there are single bonds from it to metal
// change those bonds to atom->metal dative.
// If the atom is bonded to more than 1 metal atom, choose the one
// with the fewer dative bonds incident on it, with the canonical
// rank of the atoms as a tie-breaker.
if (isHypervalentNonMetal(atom) && !noDative(atom)) {
std::vector<Atom *> metals;
// see if there are any metals bonded to it by a single bond
for (auto bond : mol.atomBonds(atom)) {
if (bond->getBondType() == Bond::BondType::SINGLE &&
QueryOps::isMetal(*bond->getOtherAtom(atom))) {
metals.push_back(bond->getOtherAtom(atom));
}
}
if (!metals.empty()) {
std::sort(metals.begin(), metals.end(),
[&](const Atom *a1, const Atom *a2) -> bool {
int nda1 = numDativeBonds(a1);
int nda2 = numDativeBonds(a2);
if (nda1 == nda2) {
return ranks[a1->getIdx()] > ranks[a2->getIdx()];
} else {
return nda1 < nda2;
}
});
auto bond =
mol.getBondBetweenAtoms(atom->getIdx(), metals.front()->getIdx());
if (bond) {
bond->setBondType(RDKit::Bond::BondType::DATIVE);
bond->setBeginAtom(atom);
bond->setEndAtom(metals.front());
}
}
}
}
} // namespace
void cleanUp(RWMol &mol) {
for (auto atom : mol.atoms()) {
switch (atom->getAtomicNum()) {
case 7:
nitrogenCleanup(mol, atom);
break;
case 15:
phosphorusCleanup(mol, atom);
break;
case 17:
case 35:
case 53:
halogenCleanup(mol, atom);
break;
}
}
}
void cleanUpOrganometallics(RWMol &mol) {
// At present all this does is look for single bonds between
// non-metals and metals where the non-metal exceeds one of
// its normal valence states, and replaces that bond with
// a dative one from the non-metal to the metal.
bool needsFixing = false;
for (const auto atom : mol.atoms()) {
if (isHypervalentNonMetal(atom) && !noDative(atom)) {
// see if there are any metals bonded to it by a single bond
for (auto bond : mol.atomBonds(atom)) {
if (bond->getBondType() == Bond::BondType::SINGLE &&
QueryOps::isMetal(*bond->getOtherAtom(atom))) {
needsFixing = true;
break;
}
}
}
if (needsFixing) {
break;
}
}
if (!needsFixing) {
return;
}
mol.updatePropertyCache(false);
// First see if anything needs doing
std::vector<unsigned int> ranks(mol.getNumAtoms());
RDKit::Canon::rankMolAtoms(mol, ranks);
std::vector<std::pair<int, int>> atom_ranks;
for (size_t i = 0; i < ranks.size(); ++i) {
atom_ranks.push_back(std::make_pair(i, ranks[i]));
}
std::sort(atom_ranks.begin(), atom_ranks.end(),
[](const std::pair<int, int> &p1, std::pair<int, int> &p2) -> bool {
return p1.second < p2.second;
});
for (auto ar : atom_ranks) {
auto atom = mol.getAtomWithIdx(ar.first);
metalBondCleanup(mol, atom, ranks);
}
}
void adjustHs(RWMol &mol) {
//
// Go through and adjust the number of implicit and explicit Hs
// on each atom in the molecule.
//
// Atoms that do not *need* explicit Hs
//
// Assumptions: this is called after the molecule has been
// sanitized, aromaticity has been perceived, and the implicit
// valence of everything has been calculated.
//
for (auto atom : mol.atoms()) {
int origImplicitV = atom->getValence(Atom::ValenceType::IMPLICIT);
atom->calcExplicitValence(false);
int origExplicitV = atom->getNumExplicitHs();
int newImplicitV = atom->calcImplicitValence(false);
//
// Case 1: The disappearing Hydrogen
// Smiles: O=C1NC=CC2=C1C=CC=C2
//
// after perception is done, the N atom has two aromatic
// bonds to it and a single implicit H. When the Smiles is
// written, we get: n1ccc2ccccc2c1=O. Here the nitrogen has
// no implicit Hs (because there are two aromatic bonds to
// it, giving it a valence of 3). Also: this SMILES is bogus
// (un-kekulizable). The correct SMILES would be:
// [nH]1ccc2ccccc2c1=O. So we need to loop through the atoms
// and find those that have lost implicit H; we'll add those
// back as explicit Hs.
//
// <phew> that takes way longer to comment than it does to
// write:
if (newImplicitV < origImplicitV) {
atom->setNumExplicitHs(origExplicitV + (origImplicitV - newImplicitV));
atom->calcExplicitValence(false);
}
}
}
void assignRadicals(RWMol &mol) {
for (auto atom : mol.atoms()) {
// we only automatically assign radicals to atoms that
// don't have implicit Hs:
if (!atom->getNoImplicit() || !atom->getAtomicNum()) {
continue;
}
const auto &valens =
PeriodicTable::getTable()->getValenceList(atom->getAtomicNum());
int chg = atom->getFormalCharge();
int nOuter =
PeriodicTable::getTable()->getNouterElecs(atom->getAtomicNum());
if (valens.size() != 1 || valens[0] != -1) {
double accum = 0.0;
RWMol::OEDGE_ITER beg, end;
boost::tie(beg, end) = mol.getAtomBonds(atom);
while (beg != end) {
accum += mol[*beg]->getValenceContrib(atom);
++beg;
}
accum += atom->getNumExplicitHs();
int totalValence = static_cast<int>(accum + 0.1);
int baseCount = 8;
if (atom->getAtomicNum() == 1 || atom->getAtomicNum() == 2) {
baseCount = 2;
}
// applies to later (more electronegative) elements:
int numRadicals = baseCount - nOuter - totalValence + chg;
if (numRadicals < 0) {
numRadicals = 0;
// can the atom be "hypervalent"? (was github #447)
const INT_VECT &valens =
PeriodicTable::getTable()->getValenceList(atom->getAtomicNum());
if (valens.size() > 1) {
for (auto val : valens) {
if (val - totalValence + chg >= 0) {
numRadicals = val - totalValence + chg;
break;
}
}
}
}
// applies to earlier elements:
int numRadicals2 = nOuter - totalValence - chg;
if (numRadicals2 >= 0) {
numRadicals = std::min(numRadicals, numRadicals2);
}
atom->setNumRadicalElectrons(numRadicals);
} else {
// #7122: if there's a bond to the metal center, then don't assign
// radicals:
if (atom->getDegree() > 0) {
atom->setNumRadicalElectrons(0);
} else {
auto nValence = nOuter - chg;
// if this is an atom where we have no preferred valence info at all,
// e.g. for transition metals, then we shouldn't be guessing. This was
// #3330
if (nValence < 0) {
// this was github #5462
nValence = 0;
BOOST_LOG(rdWarningLog)
<< "Unusual charge on atom " << atom->getIdx()
<< " number of radical electrons set to zero" << std::endl;
}
atom->setNumRadicalElectrons(nValence % 2);
}
}
}
}
MolOps::Hybridizations::Hybridizations(const ROMol &mol) {
d_hybridizations.clear();
// see if the mol already has computed hybridizations:
if (mol.getNumAtoms() == 0) {
return;
}
if ((*mol.atoms().begin())->getHybridization() !=
Atom::HybridizationType::UNSPECIFIED) {
for (auto atom : mol.atoms()) {
d_hybridizations.push_back((int)atom->getHybridization());
}
return;
}
// compute them in a copy of the mol, so as not to change the mol passed in
RWMol molCopy(mol);
unsigned int operationThatFailed;
unsigned int santitizeOps =
MolOps::SANITIZE_SETCONJUGATION | MolOps::SANITIZE_SETHYBRIDIZATION;
MolOps::sanitizeMol(molCopy, operationThatFailed, santitizeOps);
for (auto atom : molCopy.atoms()) {
// determine hybridization and remove chiral atoms that are not sp3
d_hybridizations.push_back((int)atom->getHybridization());
}
return;
}
void cleanupAtropisomers(RWMol &mol) {
auto hybs = MolOps::Hybridizations(mol);
MolOps::cleanupAtropisomers(mol, hybs);
}
namespace {
void checkBond(RWMol &mol, Bond *bond, MolOps::Hybridizations &hybs) {
if (!mol.getRingInfo()->isSssrOrBetter()) {
RDKit::MolOps::findSSSR(mol);
}
const RingInfo *ri = mol.getRingInfo();
if (hybs[bond->getBeginAtomIdx()] != Atom::SP2 ||
hybs[bond->getEndAtomIdx()] != Atom::SP2 ||
// do not clear bonds that part of a macrocycle
// because they can be linking actual atropisomeric portions
(ri->numBondRings(bond->getIdx()) > 0 &&
ri->minBondRingSize(bond->getIdx()) < 8)) {
bond->setStereo(Bond::BondStereo::STEREONONE);
}
}
} // namespace
void cleanupAtropisomers(RWMol &mol, MolOps::Hybridizations &hybs) {
// make sure that ring info is available
// (defensive, current calls have it available)
for (auto bond : mol.bonds()) {
switch (bond->getStereo()) {
case Bond::BondStereo::STEREOATROPCW:
case Bond::BondStereo::STEREOATROPCCW:
checkBond(mol, bond, hybs);
break;
default:
break;
}
}
}
void sanitizeMol(RWMol &mol) {
unsigned int failedOp = 0;
sanitizeMol(mol, failedOp, SANITIZE_ALL);
}
void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed,
unsigned int sanitizeOps) {
// clear out any cached properties
mol.clearComputedProps();
operationThatFailed = SANITIZE_CLEANUP;
if (sanitizeOps & operationThatFailed) {
// clean up things like nitro groups
cleanUp(mol);
}
// fix things like non-metal to metal bonds that should be dative.
operationThatFailed = SANITIZE_CLEANUP_ORGANOMETALLICS;
if (sanitizeOps & operationThatFailed) {
cleanUpOrganometallics(mol);
}
// update computed properties on atoms and bonds:
operationThatFailed = SANITIZE_PROPERTIES;
if (sanitizeOps & operationThatFailed) {
mol.updatePropertyCache(true);
} else {
mol.updatePropertyCache(false);
}
operationThatFailed = SANITIZE_SYMMRINGS;
if (sanitizeOps & operationThatFailed) {
VECT_INT_VECT arings;
MolOps::symmetrizeSSSR(mol, arings);
}
// kekulizations
operationThatFailed = SANITIZE_KEKULIZE;
if (sanitizeOps & operationThatFailed) {
Kekulize(mol);
}
// look for radicals:
// We do this now because we need to know
// that the N in [N]1C=CC=C1 has a radical
// before we move into setAromaticity().
// It's important that this happen post-Kekulization
// because there's no way of telling what to do
// with the same molecule if it's in the form
// [n]1cccc1
operationThatFailed = SANITIZE_FINDRADICALS;
if (sanitizeOps & operationThatFailed) {
assignRadicals(mol);
}
// then do aromaticity perception
operationThatFailed = SANITIZE_SETAROMATICITY;
if (sanitizeOps & operationThatFailed) {
setAromaticity(mol);
}
// set conjugation
operationThatFailed = SANITIZE_SETCONJUGATION;
if (sanitizeOps & operationThatFailed) {
setConjugation(mol);
}
// set hybridization
operationThatFailed = SANITIZE_SETHYBRIDIZATION;
if (sanitizeOps & operationThatFailed) {
setHybridization(mol);
}
// remove bogus chirality specs:
operationThatFailed = SANITIZE_CLEANUPCHIRALITY;
if (sanitizeOps & operationThatFailed) {
cleanupChirality(mol);
}
operationThatFailed = SANITIZE_CLEANUPATROPISOMERS;
if (sanitizeOps & operationThatFailed) {
cleanupAtropisomers(mol);
}
// adjust Hydrogen counts:
operationThatFailed = SANITIZE_ADJUSTHS;
if (sanitizeOps & operationThatFailed) {
adjustHs(mol);
}
// now that everything has been cleaned up, go through and check/update the
// computed valences on atoms and bonds one more time
operationThatFailed = SANITIZE_PROPERTIES;
if (sanitizeOps & operationThatFailed) {
mol.updatePropertyCache(true);
}
operationThatFailed = 0;
}
std::vector<std::unique_ptr<MolSanitizeException>> detectChemistryProblems(
const ROMol &imol, unsigned int sanitizeOps) {
RWMol mol(imol);
std::vector<std::unique_ptr<MolSanitizeException>> res;
// clear out any cached properties
mol.clearComputedProps();
int operation;
operation = SANITIZE_CLEANUP;
if (sanitizeOps & operation) {
// clean up things like nitro groups
cleanUp(mol);
}
// update computed properties on atoms and bonds:
operation = SANITIZE_PROPERTIES;
if (sanitizeOps & operation) {
for (auto &atom : mol.atoms()) {
try {
bool strict = true;
atom->updatePropertyCache(strict);
} catch (const MolSanitizeException &e) {
res.emplace_back(e.copy());
}
}
} else {
mol.updatePropertyCache(false);
}
// kekulizations
operation = SANITIZE_KEKULIZE;
if (sanitizeOps & operation) {
try {
Kekulize(mol);
} catch (const MolSanitizeException &e) {
res.emplace_back(e.copy());
}
}
return res;
}
namespace {
std::vector<std::unique_ptr<ROMol>> getTheFrags(
const ROMol &mol, bool sanitizeFrags, INT_VECT *frags,
VECT_INT_VECT *fragsMolAtomMapping, bool copyConformers) {
std::unique_ptr<INT_VECT> mappingStorage;
if (!frags) {
mappingStorage.reset(new INT_VECT);
frags = mappingStorage.get();
}
int nFrags = getMolFrags(mol, *frags);
std::vector<std::unique_ptr<RWMol>> res;
if (nFrags == 1) {
res.emplace_back(new RWMol(mol));
if (fragsMolAtomMapping) {
INT_VECT comp;
for (unsigned int idx = 0; idx < mol.getNumAtoms(); ++idx) {
comp.push_back(idx);
}
(*fragsMolAtomMapping).push_back(comp);
}
} else {
res.reserve(nFrags);
for (int i = 0; i < nFrags; ++i) {
boost::dynamic_bitset<> atomsInFrag(mol.getNumAtoms());
INT_VECT comp;
for (unsigned int idx = 0; idx < mol.getNumAtoms(); ++idx) {
if ((*frags)[idx] == i) {
comp.push_back(idx);
atomsInFrag.set(idx);
}
}
auto fragmentHasChallengingFeatures =
[&](const INT_VECT &comp,
const boost::dynamic_bitset<> &atomsInFrag) -> bool {
for (auto idx : comp) {
// check for atoms with stereochem:
const auto atom = mol.getAtomWithIdx(idx);
if (atom->getChiralTag() != Atom::ChiralType::CHI_UNSPECIFIED &&
atom->getChiralTag() != Atom::ChiralType::CHI_OTHER) {
return true;
}
for (auto bnd : mol.atomBonds(atom)) {
if (atomsInFrag[bnd->getOtherAtomIdx(idx)]) {
if (bnd->getStereo() != Bond::BondStereo::STEREONONE &&
bnd->getStereo() != Bond::BondStereo::STEREOANY) {
return true;
}
}
}
}
for (auto sgroup : getSubstanceGroups(mol)) {
for (auto aid : sgroup.getAtoms()) {
if (atomsInFrag[aid]) {
return true;
}
}
for (auto aid : sgroup.getParentAtoms()) {
if (atomsInFrag[aid]) {
return true;
}
}
}
for (auto stereoGroup : mol.getStereoGroups()) {
// doesn't seem like this should be necessary, but in case
// we ever need stereogroups where the atoms aren't marked
// with stereo...
for (auto atom : stereoGroup.getAtoms()) {
if (atomsInFrag[atom->getIdx()]) {
return true;
}
}
// same check for stereo groups involving bonds:
for (auto bond : stereoGroup.getBonds()) {
if (atomsInFrag[bond->getBeginAtomIdx()] &&
atomsInFrag[bond->getEndAtomIdx()]) {
return true;
}
}
}
return false;
};
if (comp.size() == 1 ||
(nFrags > 3 && !fragmentHasChallengingFeatures(comp, atomsInFrag))) {
// special case for a small, simple fragments when a bunch of fragments
// are present. The check on the number of fragments is purely
// empirical. This is mainly intended to catch situations like proteins
// where you have a bunch of single-atom fragments (waters); the
// standard approach below ends up being horribly inefficient there
res.emplace_back(new RWMol());
auto &frag = res.back();
std::map<unsigned int, unsigned int> atomIdxMap;
for (auto aid : comp) {
atomIdxMap[aid] =
frag->addAtom(mol.getAtomWithIdx(aid)->copy(), false, true);
}
for (auto bond : mol.bonds()) {
if (atomsInFrag[bond->getBeginAtomIdx()] &&
atomsInFrag[bond->getEndAtomIdx()]) {
auto bondCopy = bond->copy();
bondCopy->setBeginAtomIdx(atomIdxMap[bond->getBeginAtomIdx()]);
bondCopy->setEndAtomIdx(atomIdxMap[bond->getEndAtomIdx()]);
frag->addBond(bondCopy, true);
}
}
if (copyConformers) {
for (auto cit = mol.beginConformers(); cit != mol.endConformers();
++cit) {
auto *conf = new Conformer(frag->getNumAtoms());
conf->setId((*cit)->getId());
conf->set3D((*cit)->is3D());
unsigned int cidx = 0;
for (auto ai : comp) {
conf->setAtomPos(cidx++, (*cit)->getAtomPos(ai));
}
frag->addConformer(conf);
}
}
} else {
res.emplace_back(new RWMol(mol));
auto &frag = res.back();
frag->beginBatchEdit();
for (unsigned int idx = 0; idx < mol.getNumAtoms(); ++idx) {
if (!atomsInFrag[idx]) {
frag->removeAtom(idx);
}
}
frag->commitBatchEdit();
}
if (fragsMolAtomMapping) {
(*fragsMolAtomMapping).push_back(comp);
}
}
}
if (!copyConformers) {
for (auto &frag : res) {
frag->clearConformers();
}
}
if (sanitizeFrags) {
for (auto &frag : res) {
sanitizeMol(*frag);
}
}
std::vector<std::unique_ptr<ROMol>> finalRes;
for (auto &r : res) {
finalRes.emplace_back(r.get());
r.release();
}
return finalRes;
}
} // namespace
std::vector<ROMOL_SPTR> getMolFrags(const ROMol &mol, bool sanitizeFrags,
INT_VECT *frags,
VECT_INT_VECT *fragsMolAtomMapping,
bool copyConformers) {
auto upFrags = getTheFrags(mol, sanitizeFrags, frags, fragsMolAtomMapping,
copyConformers);
std::vector<boost::shared_ptr<ROMol>> finalRes;
for (auto &r : upFrags) {
finalRes.emplace_back(r.get());
r.release();
}
return finalRes;
}
unsigned int getMolFrags(const ROMol &mol, INT_VECT &mapping) {
unsigned int natms = mol.getNumAtoms();
mapping.resize(natms);
return natms ? boost::connected_components(mol.getTopology(), &mapping[0])
: 0;
};
unsigned int getMolFrags(const ROMol &mol, VECT_INT_VECT &frags) {
frags.clear();
INT_VECT mapping;
getMolFrags(mol, mapping);
INT_INT_VECT_MAP comMap;
for (unsigned int i = 0; i < mol.getNumAtoms(); i++) {
int mi = mapping[i];
if (comMap.find(mi) == comMap.end()) {
INT_VECT comp;
comMap[mi] = comp;
}
comMap[mi].push_back(i);
}
for (INT_INT_VECT_MAP_CI mci = comMap.begin(); mci != comMap.end(); mci++) {
frags.push_back((*mci).second);
}
return rdcast<unsigned int>(frags.size());
}
unsigned int getMolFrags(const ROMol &mol,
std::vector<std::unique_ptr<ROMol>> &molFrags,
bool sanitizeFrags, std::vector<int> *frags,
std::vector<std::vector<int>> *fragsMolAtomMapping,
bool copyConformers) {
molFrags = getTheFrags(mol, sanitizeFrags, frags, fragsMolAtomMapping,
copyConformers);
return rdcast<unsigned int>(molFrags.size());
}
namespace {
template <typename T>
std::map<T, std::unique_ptr<ROMol>> getTheFragsWithQuery(
const ROMol &mol, T (*query)(const ROMol &, const Atom *),
bool sanitizeFrags, const std::vector<T> *whiteList, bool negateList) {
std::vector<T> assignments(mol.getNumAtoms());
std::vector<int> ids(mol.getNumAtoms(), -1);
std::map<T, std::unique_ptr<ROMol>> res;
for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
T where = query(mol, mol.getAtomWithIdx(i));
if (whiteList) {
bool found = std::find(whiteList->begin(), whiteList->end(), where) !=
whiteList->end();
if (!found && !negateList) {
continue;
} else if (found && negateList) {
continue;
}
}
assignments[i] = where;
if (res.find(where) == res.end()) {
res[where] = std::unique_ptr<ROMol>(new ROMol());
}
auto *frag = static_cast<RWMol *>(res[where].get());
ids[i] = frag->addAtom(mol.getAtomWithIdx(i)->copy(), false, true);
// loop over neighbors and add bonds in the fragment to all atoms
// that are already in the same fragment
ROMol::ADJ_ITER nbrIdx, endNbrs;
boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(mol.getAtomWithIdx(i));
while (nbrIdx != endNbrs) {
if (*nbrIdx < i && assignments[*nbrIdx] == where) {
Bond *nBond = mol.getBondBetweenAtoms(i, *nbrIdx)->copy();
nBond->setOwningMol(static_cast<ROMol *>(frag));
nBond->setBeginAtomIdx(ids[nBond->getBeginAtomIdx()]);
nBond->setEndAtomIdx(ids[nBond->getEndAtomIdx()]);
frag->addBond(nBond, true);
}
++nbrIdx;
}
}
// update conformers
for (auto cit = mol.beginConformers(); cit != mol.endConformers(); ++cit) {
for (auto iter = res.begin(); iter != res.end(); ++iter) {
auto &newM = iter->second;
auto *conf = new Conformer(newM->getNumAtoms());
conf->setId((*cit)->getId());
conf->set3D((*cit)->is3D());
newM->addConformer(conf);
}
for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
if (ids[i] < 0) {
continue;
}
res[assignments[i]]
->getConformer((*cit)->getId())
.setAtomPos(ids[i], (*cit)->getAtomPos(i));
}
}
if (sanitizeFrags) {
for (auto iter = res.begin(); iter != res.end(); ++iter) {
sanitizeMol(*static_cast<RWMol *>(iter->second.get()));
}
}
return res;
}
} // namespace
template <typename T>
std::map<T, boost::shared_ptr<ROMol>> getMolFragsWithQuery(
const ROMol &mol, T (*query)(const ROMol &, const Atom *),
bool sanitizeFrags, const std::vector<T> *whiteList, bool negateList) {
PRECONDITION(query, "no query");
auto rawRes =
getTheFragsWithQuery(mol, query, sanitizeFrags, whiteList, negateList);
std::map<T, boost::shared_ptr<ROMol>> res;
for (auto &it : rawRes) {
res.insert(std::make_pair(it.first, it.second.get()));
it.second.release();
}
return res;
}
template RDKIT_GRAPHMOL_EXPORT std::map<std::string, boost::shared_ptr<ROMol>>
getMolFragsWithQuery(const ROMol &mol,
std::string (*query)(const ROMol &, const Atom *),
bool sanitizeFrags, const std::vector<std::string> *,
bool);
template RDKIT_GRAPHMOL_EXPORT std::map<int, boost::shared_ptr<ROMol>>
getMolFragsWithQuery(const ROMol &mol,
int (*query)(const ROMol &, const Atom *),
bool sanitizeFrags, const std::vector<int> *, bool);
template RDKIT_GRAPHMOL_EXPORT std::map<unsigned int, boost::shared_ptr<ROMol>>
getMolFragsWithQuery(const ROMol &mol,
unsigned int (*query)(const ROMol &, const Atom *),
bool sanitizeFrags, const std::vector<unsigned int> *,
bool);
template <typename T>
unsigned int getMolFragsWithQuery(const ROMol &mol,
T (*query)(const ROMol &, const Atom *),
std::map<T, std::unique_ptr<ROMol>> &molFrags,
bool sanitizeFrags,
const std::vector<T> *whiteList,
bool negateList) {
PRECONDITION(query, "no query");
molFrags =
getTheFragsWithQuery(mol, query, sanitizeFrags, whiteList, negateList);
return rdcast<unsigned int>(molFrags.size());
}
template RDKIT_GRAPHMOL_EXPORT unsigned int getMolFragsWithQuery(
const ROMol &mol, std::string (*query)(const ROMol &, const Atom *),
std::map<std::string, std::unique_ptr<ROMol>> &molFrags, bool sanitizeFrags,
const std::vector<std::string> *, bool);
template RDKIT_GRAPHMOL_EXPORT unsigned int getMolFragsWithQuery(
const ROMol &mol, int (*query)(const ROMol &, const Atom *),
std::map<int, std::unique_ptr<ROMol>> &molFrags, bool sanitizeFrags,
const std::vector<int> *, bool);
template RDKIT_GRAPHMOL_EXPORT unsigned int getMolFragsWithQuery(
const ROMol &mol, unsigned int (*query)(const ROMol &, const Atom *),
std::map<unsigned int, std::unique_ptr<ROMol>> &molFrags,
bool sanitizeFrags, const std::vector<unsigned int> *, bool);
int getFormalCharge(const ROMol &mol) {
int accum = 0;
for (ROMol::ConstAtomIterator atomIt = mol.beginAtoms();
atomIt != mol.endAtoms(); ++atomIt) {
accum += (*atomIt)->getFormalCharge();
}
return accum;
};
unsigned getNumAtomsWithDistinctProperty(const ROMol &mol, std::string prop) {
unsigned numPropAtoms = 0;
for (const auto atom : mol.atoms()) {
if (atom->hasProp(prop)) {
++numPropAtoms;
}
}
return numPropAtoms;
}
ROMol *hapticBondsToDative(const ROMol &mol) {
auto *res = new RWMol(mol);
hapticBondsToDative(*res);
return static_cast<ROMol *>(res);
}
void hapticBondsToDative(RWMol &mol) {
std::vector<unsigned int> dummiesToGo;
std::vector<std::pair<unsigned int, unsigned int>> bondsToAdd;
mol.beginBatchEdit();
for (const auto &bond : mol.bonds()) {
if (bond->getBondType() == Bond::BondType::DATIVE) {
auto oats = details::hapticBondEndpoints(bond);
if (oats.empty()) {
continue;
}
Atom *dummy = nullptr;
Atom *metal = nullptr;
if (bond->getBeginAtom()->getAtomicNum() == 0) {
dummy = bond->getBeginAtom();
metal = bond->getEndAtom();
} else if (bond->getEndAtom()->getAtomicNum() == 0) {
metal = bond->getBeginAtom();
dummy = bond->getEndAtom();
}
if (dummy == nullptr) {
continue;
}
for (auto oat : oats) {
auto atom = mol.getAtomWithIdx(oat);
if (atom) {
mol.addBond(atom, metal, Bond::DATIVE);
}
}
mol.removeAtom(dummy);
}
}
mol.commitBatchEdit();
}
ROMol *dativeBondsToHaptic(const ROMol &mol) {
auto *res = new RWMol(mol);
dativeBondsToHaptic(*res);
return static_cast<ROMol *>(res);
}
namespace {
// return sets of contiguous atoms of more than 1 atom that are in
// allAts.
std::vector<std::vector<unsigned int>> contiguousAtoms(
const ROMol &mol, const std::vector<unsigned int> &allAts) {
std::vector<std::vector<unsigned int>> contigAts;
std::vector<char> doneAts(mol.getNumAtoms(), 0);
std::vector<char> inAllAts(mol.getNumAtoms(), 0);
for (auto a : allAts) {
inAllAts[a] = 1;
}
for (size_t i = 0; i < allAts.size(); ++i) {
if (doneAts[allAts[i]]) {
continue;
}
contigAts.push_back(std::vector<unsigned int>());
std::list<const Atom *> toDo{mol.getAtomWithIdx(allAts[i])};
while (!toDo.empty()) {
auto nextAt = toDo.front();
toDo.pop_front();
if (!doneAts[nextAt->getIdx()]) {
doneAts[nextAt->getIdx()] = 1;
contigAts.back().push_back(nextAt->getIdx());
}
for (const auto &nbri :
boost::make_iterator_range(mol.getAtomNeighbors(nextAt))) {
if (inAllAts[nbri] && !doneAts[nbri]) {
toDo.push_back(mol.getAtomWithIdx(nbri));
}
}
}
if (contigAts.back().size() < 2) {
contigAts.pop_back();
}
}
return contigAts;
}
// add to the molecule a dummy atom centred on the
// atoms passed in, with a dative bond from it to the metal atom.
void addHapticBond(RWMol &mol, unsigned int metalIdx,
std::vector<unsigned int> hapticAtoms) {
// So there is a * in the V3000 file as the symbol for the atom.
auto dummyAt = new QueryAtom(0);
dummyAt->setQuery(makeAtomNullQuery());
bool updateLabel = true;
bool takeOwnwership = true;
unsigned int dummyIdx = mol.addAtom(dummyAt, updateLabel, takeOwnwership);
for (auto i = 0u; i < mol.getNumConformers(); ++i) {
auto &conf = mol.getConformer(i);
RDGeom::Point3D dummyPos;
for (auto ha : hapticAtoms) {
auto haPos = conf.getAtomPos(ha);
dummyPos += haPos;
}
dummyPos /= hapticAtoms.size();
conf.setAtomPos(dummyIdx, dummyPos);
}
unsigned int numbonds = mol.addBond(dummyIdx, metalIdx, Bond::DATIVE);
auto bond = mol.getBondWithIdx(numbonds - 1);
// Get the atom numbers for the end points. First number is the
// count, the rest count from 1.
std::ostringstream oss;
oss << "(" << hapticAtoms.size() << " ";
for (auto ha : hapticAtoms) {
oss << ha + 1 << " ";
}
std::string endpts{oss.str()};
if (endpts.back() == ' ') {
endpts = endpts.substr(0, endpts.length() - 1);
}
endpts += ")";
bond->setProp(common_properties::_MolFileBondEndPts, endpts);
bond->setProp<std::string>(common_properties::_MolFileBondAttach, "ALL");
}
} // namespace
void dativeBondsToHaptic(RWMol &mol) {
// First collect all the atoms that have a dative bond to them.
// Assume that the ones of interest will have a metal as their
// end atoms.
std::map<unsigned int, std::vector<unsigned int>> dativeAtoms;
for (const auto &b : mol.bonds()) {
if (b->getBondType() == Bond::DATIVE) {
auto ins = dativeAtoms.find(b->getEndAtomIdx());
if (ins == dativeAtoms.end()) {
dativeAtoms.insert(
std::make_pair(b->getEndAtomIdx(),
std::vector<unsigned int>{b->getBeginAtomIdx()}));
} else {
ins->second.push_back(b->getBeginAtomIdx());
}
}
}
mol.beginBatchEdit();
for (auto &dativeSet : dativeAtoms) {
// Find the sets of contiguous atoms in the dativeAtoms lists. Each one
// will be the EndPts of a haptic bond going to the metal atom that is
// dativeSet.first.
auto contigAtoms = contiguousAtoms(mol, dativeSet.second);
for (const auto &ca : contigAtoms) {
addHapticBond(mol, dativeSet.first, ca);
for (auto cat : ca) {
mol.removeBond(dativeSet.first, cat);
}
}
}
mol.commitBatchEdit();
}
namespace details {
std::vector<int> hapticBondEndpoints(const Bond *bond) {
// This would ideally use ParseV3000Array but I'm buggered if I can get
// the linker to find it. The issue, I think, is that it's in the
// FileParsers library which is built after GraphMol so not available
// to link in. It can't be built first because it needs GraphMol.
// std::vector<unsigned int> oats =
// RDKit::SGroupParsing::ParseV3000Array<unsigned int>(endpts);
// Returns the atom indices i.e. subtracts 1 from the numbers in the prop.
std::vector<int> oats;
std::string endpts;
if (bond->getPropIfPresent(common_properties::_MolFileBondEndPts, endpts)) {
if ('(' == endpts.front() && ')' == endpts.back()) {
endpts = endpts.substr(1, endpts.length() - 2);
boost::char_separator<char> sep(" ");
boost::tokenizer<boost::char_separator<char>> tokens(endpts, sep);
auto beg = tokens.begin();
++beg;
std::transform(beg, tokens.end(), std::back_inserter(oats),
[](const std::string &a) { return std::stod(a) - 1; });
}
}
return oats;
}
} // end of namespace details
namespace details {
unsigned int addExplicitAttachmentPoint(RWMol &mol, unsigned int atomIdx,
unsigned int val, bool addAsQuery,
bool addCoords) {
Atom *newAtom = nullptr;
if (addAsQuery) {
newAtom = new QueryAtom(0);
newAtom->setQuery(RDKit::makeAtomNullQuery());
} else {
newAtom = new Atom(0);
}
newAtom->setProp(common_properties::_fromAttachPoint, val);
bool updateLabel = false;
bool takeOwnership = true;
auto idx = mol.addAtom(newAtom, updateLabel, takeOwnership);
mol.addBond(atomIdx, idx, Bond::SINGLE);
mol.getAtomWithIdx(idx)->updatePropertyCache(false);
if (addCoords) {
setTerminalAtomCoords(mol, idx, atomIdx);
}
return idx;
}
bool isAttachmentPoint(const Atom *atom, bool markedOnly) {
PRECONDITION(atom, "bad atom");
PRECONDITION(atom->hasOwningMol(), "atom not associated with a molecule");
if (atom->getAtomicNum() != 0 || atom->getDegree() != 1) {
return false;
}
if (markedOnly && !atom->hasProp(common_properties::_fromAttachPoint)) {
return false;
}
// we know that the atom is degree 1
const auto bond = *atom->getOwningMol().atomBonds(atom).begin();
if ((bond->getBondType() != Bond::BondType::SINGLE &&
bond->getBondType() != Bond::BondType::UNSPECIFIED) ||
bond->getBondDir() != Bond::BondDir::NONE) {
return false;
}
if (atom->hasQuery()) {
// a * from SMARTS
if (!atom->getQuery()->getNegation() &&
atom->getQuery()->getDescription() == "AtomNull") {
return true;
}
// a * from CXSMILES
if (atom->getQuery()->getNegation() &&
atom->getQuery()->getDescription() == "AtomAtomicNum" &&
static_cast<ATOM_EQUALS_QUERY *>(atom->getQuery())->getVal() == 1) {
return true;
}
return false;
}
return true;
}
} // namespace details
void expandAttachmentPoints(RWMol &mol, bool addAsQueries, bool addCoords) {
for (auto atom : mol.atoms()) {
int value;
if (atom->getPropIfPresent(common_properties::molAttachPoint, value)) {
std::vector<int> tgtVals;
if (value == 1 || value == -1) {
tgtVals.push_back(1);
}
if (value == 2 || value == -1) {
tgtVals.push_back(2);
}
if (tgtVals.empty()) {
BOOST_LOG(rdWarningLog)
<< "Invalid value for molAttachPoint: " << value << " on atom "
<< atom->getIdx() << ". Not expanding this atttachment point."
<< std::endl;
continue;
}
for (auto tval : tgtVals) {
atom->clearProp(common_properties::molAttachPoint);
details::addExplicitAttachmentPoint(mol, atom->getIdx(), tval,
addAsQueries, addCoords);
}
}
}
}
void collapseAttachmentPoints(RWMol &mol, bool markedOnly) {
bool removedAny = false;
std::vector<int> attachLabels(mol.getNumAtoms(), 0);
for (auto atom : mol.atoms()) {
if (details::isAttachmentPoint(atom, markedOnly)) {
int value = 0;
atom->getPropIfPresent(common_properties::_fromAttachPoint, value);
if (markedOnly && (value < 0 || value > 2)) {
BOOST_LOG(rdWarningLog)
<< "Invalid value for _fromAttachPoint: " << value << " on atom "
<< atom->getIdx() << ". Not collapsing this atom" << std::endl;
continue;
}
if (!markedOnly && !value) {
value = 1;
}
auto bond = *mol.atomBonds(atom).begin();
if ((bond->getBondType() != Bond::BondType::SINGLE &&
bond->getBondType() != Bond::BondType::UNSPECIFIED) ||
bond->getBondDir() != Bond::BondDir::NONE) {
continue;
}
auto oAtomIdx = bond->getOtherAtom(atom)->getIdx();
if (attachLabels[oAtomIdx]) {
if (attachLabels[oAtomIdx] != -1) {
value = -1;
} else {
BOOST_LOG(rdWarningLog)
<< "More than two attachment points on atom " << oAtomIdx
<< ". Attachment point " << atom->getIdx()
<< " will not be collapsed." << std::endl;
continue;
}
}
if (!removedAny) {
mol.beginBatchEdit();
removedAny = true;
}
attachLabels[oAtomIdx] = value;
mol.removeAtom(atom);
}
}
// set the attachment point labels
for (auto atom : mol.atoms()) {
if (attachLabels[atom->getIdx()]) {
atom->setProp(common_properties::molAttachPoint,
attachLabels[atom->getIdx()]);
}
}
if (removedAny) {
mol.commitBatchEdit();
}
}
} // end of namespace MolOps
} // end of namespace RDKit