mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
549 lines
17 KiB
C++
549 lines
17 KiB
C++
//
|
|
// Copyright (C) 2022 Sreya Gogineni and other RDKit contributors
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
|
|
#include "DetermineBonds.h"
|
|
#include <GraphMol/RDKitBase.h>
|
|
#ifdef RDK_BUILD_YAEHMOP_SUPPORT
|
|
#include <YAeHMOP/EHTTools.h>
|
|
#endif
|
|
#include <algorithm>
|
|
#include <limits>
|
|
#include <vector>
|
|
#include <numeric>
|
|
#include <cmath>
|
|
#include <unordered_map>
|
|
|
|
#include <RDGeneral/BoostStartInclude.h>
|
|
#include <boost/graph/adjacency_list.hpp>
|
|
#include <boost/graph/max_cardinality_matching.hpp>
|
|
#include <boost/multiprecision/cpp_int.hpp>
|
|
#include <RDGeneral/BoostEndInclude.h>
|
|
#include <GraphMol/FileParsers/ProximityBonds.h>
|
|
#include <RDGeneral/ControlCHandler.h>
|
|
|
|
typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::undirectedS>
|
|
Graph;
|
|
using boost::multiprecision::uint1024_t;
|
|
|
|
namespace {
|
|
|
|
// see http://phrogz.net/lazy-cartesian-product
|
|
template <typename T>
|
|
struct LazyCartesianProduct {
|
|
std::vector<std::vector<T>> d_listOfLists;
|
|
std::vector<uint1024_t> d_divs;
|
|
std::vector<uint1024_t> d_mods;
|
|
uint1024_t d_maxSize;
|
|
uint1024_t d_currentPos;
|
|
|
|
explicit LazyCartesianProduct(const std::vector<std::vector<T>> &input)
|
|
: d_listOfLists(input), d_currentPos(0) {
|
|
auto size = d_listOfLists.size();
|
|
d_divs.resize(size);
|
|
d_mods.resize(size);
|
|
d_maxSize = 1;
|
|
|
|
for (int i = size - 1; i >= 0; --i) {
|
|
uint1024_t items(d_listOfLists[i].size());
|
|
d_divs[i] = d_maxSize;
|
|
d_mods[i] = items;
|
|
d_maxSize *= items;
|
|
}
|
|
}
|
|
|
|
std::vector<T> entryAt(uint1024_t pos) const;
|
|
std::vector<T> next() { return entryAt(d_currentPos++); }
|
|
bool atEnd() const { return d_currentPos >= d_maxSize; }
|
|
};
|
|
|
|
template <typename T>
|
|
std::vector<T> LazyCartesianProduct<T>::entryAt(uint1024_t pos) const {
|
|
auto length = d_listOfLists.size();
|
|
std::vector<T> res(length);
|
|
for (auto i = 0u; i < length; ++i) {
|
|
res[i] = d_listOfLists[i][static_cast<size_t>(
|
|
static_cast<uint1024_t>(pos / d_divs[i]) % d_mods[i])];
|
|
}
|
|
return res;
|
|
}
|
|
|
|
std::vector<unsigned int> possibleValences(
|
|
const RDKit::Atom *atom,
|
|
const std::unordered_map<int, std::vector<unsigned int>> &atomicValence) {
|
|
auto atomNum = atom->getAtomicNum() - atom->getFormalCharge();
|
|
|
|
std::vector<unsigned int> avalences;
|
|
auto valences = atomicValence.find(atomNum);
|
|
if (valences != atomicValence.end()) {
|
|
avalences = valences->second;
|
|
} else {
|
|
for (auto v : RDKit::PeriodicTable::getTable()->getValenceList(atomNum)) {
|
|
if (v >= 0) {
|
|
avalences.push_back(v);
|
|
}
|
|
}
|
|
}
|
|
return avalences;
|
|
}
|
|
|
|
LazyCartesianProduct<unsigned int> getValenceCombinations(
|
|
const RDKit::RWMol &mol,
|
|
size_t iterations = 100) {
|
|
auto numAtoms = mol.getNumAtoms();
|
|
const std::unordered_map<int, std::vector<unsigned int>> atomicValence = {
|
|
{1, {1}}, {5, {3, 4}}, {6, {4}}, {7, {3, 4}}, {8, {2, 1, 3}},
|
|
{9, {1}}, {14, {4}}, {15, {5, 3}}, {16, {6, 3, 2, 1}}, {17, {1}},
|
|
{32, {4}}, {35, {1}}, {53, {1}}};
|
|
|
|
std::vector<std::vector<unsigned int>> curPossible(numAtoms);
|
|
std::vector<std::vector<unsigned int>> newPossible(numAtoms);
|
|
|
|
for (size_t i = 0; i < numAtoms; i++) {
|
|
auto valences = possibleValences(mol.getAtomWithIdx(i), atomicValence);
|
|
if (valences.empty()) {
|
|
auto atom = mol.getAtomWithIdx(i);
|
|
std::stringstream ss;
|
|
ss << "Atom " << i << " with atomic number " << atom->getAtomicNum()
|
|
<< " has no valences defined.";
|
|
throw ValueErrorException(ss.str());
|
|
}
|
|
curPossible[i] = valences;
|
|
}
|
|
|
|
while (true) {
|
|
for (size_t i = 0; i < numAtoms; i++) {
|
|
if (curPossible[i].size() == 1) {
|
|
newPossible[i] = curPossible[i];
|
|
continue;
|
|
}
|
|
|
|
auto atom = mol.getAtomWithIdx(i);
|
|
|
|
int availBonds = 0;
|
|
for (auto bond : mol.atomBonds(atom)) {
|
|
auto *neighbor = bond->getOtherAtom(atom);
|
|
auto neighIdx = neighbor->getIdx();
|
|
auto neighDegree = neighbor->getDegree();
|
|
auto maxPos = *std::max_element(curPossible[neighIdx].begin(), curPossible[neighIdx].end());
|
|
availBonds += maxPos - neighDegree;
|
|
}
|
|
|
|
auto numBonds = atom->getDegree();
|
|
for (auto valence : curPossible[i]) {
|
|
if (numBonds <= valence && valence <= numBonds + availBonds) {
|
|
newPossible[i].push_back(valence);
|
|
}
|
|
}
|
|
|
|
if (newPossible[i].empty()) {
|
|
std::stringstream ss;
|
|
ss << "Unable to determine valence of atom " << i << " with atomic number "
|
|
<< atom->getAtomicNum() << " and " << atom->getDegree()
|
|
<< " bonds. Check the input molecules bonding.";
|
|
throw ValueErrorException(ss.str());
|
|
}
|
|
}
|
|
|
|
if (newPossible == curPossible) {
|
|
break;
|
|
} else {
|
|
curPossible = newPossible;
|
|
std::for_each(newPossible.begin(), newPossible.end(),
|
|
[](auto& x) { x.clear(); });
|
|
}
|
|
|
|
if (--iterations == 0) {
|
|
break;
|
|
}
|
|
}
|
|
return LazyCartesianProduct<unsigned int>(curPossible);
|
|
}
|
|
|
|
} // namespace
|
|
|
|
namespace RDKit {
|
|
|
|
#ifdef RDK_BUILD_YAEHMOP_SUPPORT
|
|
void connectivityHueckel(RWMol &mol, int charge) {
|
|
auto numAtoms = mol.getNumAtoms();
|
|
mol.getAtomWithIdx(0)->setFormalCharge(charge);
|
|
EHTTools::EHTResults res;
|
|
bool success = runMol(mol, res);
|
|
RDUNUSED_PARAM(success);
|
|
// as of this writing runMol() always returns true, so we ignore the return
|
|
// value.
|
|
double *mat = res.reducedOverlapPopulationMatrix.get();
|
|
int matInd = 0;
|
|
for (unsigned int i = 0; i < numAtoms; i++) {
|
|
for (unsigned int j = 0; j < i + 1; j++) {
|
|
if (i != j && mat[matInd] >= 0.15) {
|
|
mol.addBond(i, j, Bond::BondType::SINGLE);
|
|
}
|
|
matInd++;
|
|
}
|
|
}
|
|
} // connectivityHueckel()
|
|
#else
|
|
void connectivityHueckel(RWMol &, int) {
|
|
CHECK_INVARIANT(0, "YAeHMOP support not available");
|
|
}
|
|
#endif
|
|
|
|
void connectivityVdW(RWMol &mol, double covFactor) {
|
|
auto numAtoms = mol.getNumAtoms();
|
|
double *distMat = MolOps::get3DDistanceMat(mol);
|
|
|
|
std::vector<double> rcov(numAtoms);
|
|
for (unsigned int i = 0; i < numAtoms; i++) {
|
|
rcov[i] = covFactor * PeriodicTable::getTable()->getRcovalent(
|
|
mol.getAtomWithIdx(i)->getAtomicNum());
|
|
}
|
|
for (unsigned int i = 0; i < numAtoms; i++) {
|
|
for (unsigned int j = i + 1; j < numAtoms; j++) {
|
|
if (distMat[i * numAtoms + j] <= (rcov[i] + rcov[j])) {
|
|
mol.addBond(i, j, Bond::BondType::SINGLE);
|
|
}
|
|
}
|
|
}
|
|
} // connectivityVdW()
|
|
|
|
void determineConnectivity(RWMol &mol, bool useHueckel, int charge,
|
|
double covFactor, bool useVdw) {
|
|
#ifndef RDK_BUILD_YAEHMOP_SUPPORT
|
|
if (useHueckel) {
|
|
throw ValueErrorException(
|
|
"The RDKit was not compiled with YAeHMOP support");
|
|
}
|
|
#endif
|
|
// remove all bonds
|
|
mol.beginBatchEdit();
|
|
for (auto bond : mol.bonds()) {
|
|
mol.removeBond(bond->getBeginAtomIdx(), bond->getEndAtomIdx());
|
|
}
|
|
mol.commitBatchEdit();
|
|
// set all atoms to noImplicit
|
|
for (auto atom : mol.atoms()) {
|
|
atom->setNoImplicit(true);
|
|
}
|
|
if (useHueckel) {
|
|
connectivityHueckel(mol, charge);
|
|
} else if (useVdw) {
|
|
connectivityVdW(mol, covFactor);
|
|
} else {
|
|
ConnectTheDots(&mol, ctdQUICKREMOVE_H_H_CONTACTS);
|
|
}
|
|
} // determineConnectivity()
|
|
|
|
void getUnsaturated(const std::vector<unsigned int> &order,
|
|
const std::vector<unsigned int> &valency,
|
|
std::vector<unsigned int> &unsat) {
|
|
for (unsigned int i = 0; i < order.size(); i++) {
|
|
if (order[i] > valency[i]) {
|
|
unsat.push_back(i);
|
|
}
|
|
}
|
|
}
|
|
|
|
void getUnsaturatedPairs(
|
|
const std::vector<std::vector<unsigned int>> &ordMat,
|
|
const std::vector<unsigned int> &unsat,
|
|
std::vector<std::pair<unsigned int, unsigned int>> &unsatPairs) {
|
|
for (unsigned int i = 0; i < unsat.size(); i++) {
|
|
for (unsigned int j = i + 1; j < unsat.size(); j++) {
|
|
if (ordMat[unsat[i]][unsat[j]]) {
|
|
unsatPairs.push_back(std::make_pair(unsat[i], unsat[j]));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
bool checkValency(const std::vector<unsigned int> &order,
|
|
const std::vector<unsigned int> &valency) {
|
|
for (unsigned int i = 0; i < valency.size(); i++) {
|
|
if (valency[i] > order[i]) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
int getAtomicCharge(int atom, unsigned int valence) {
|
|
if (atom == 1) {
|
|
return 1 - valence;
|
|
} else if (atom == 5) {
|
|
return 3 - valence;
|
|
} else if (atom == 15 && valence == 5) {
|
|
return 0;
|
|
} else if (atom == 16 && valence == 6) {
|
|
return 0;
|
|
} else {
|
|
return PeriodicTable::getTable()->getNouterElecs(atom) - 8 + valence;
|
|
}
|
|
}
|
|
|
|
bool checkCharge(RWMol &mol, const std::vector<unsigned int> &valency,
|
|
int charge) {
|
|
int molCharge = 0;
|
|
for (unsigned int i = 0; i < mol.getNumAtoms(); i++) {
|
|
const auto atom = mol.getAtomWithIdx(i);
|
|
int atomCharge = getAtomicCharge(atom->getAtomicNum(), valency[i]);
|
|
molCharge += atomCharge;
|
|
if (atom->getAtomicNum() == 6) {
|
|
if (atom->getDegree() == 2 && valency[i] == 2) {
|
|
molCharge += 1;
|
|
atomCharge = 2;
|
|
} else if (atom->getDegree() == 3 && (molCharge + 1 < charge)) {
|
|
molCharge += 2;
|
|
atomCharge = 1;
|
|
}
|
|
}
|
|
}
|
|
return molCharge == charge;
|
|
}
|
|
|
|
void setAtomicCharges(RWMol &mol, const std::vector<unsigned int> &valency,
|
|
int charge) {
|
|
int molCharge = 0;
|
|
for (unsigned int i = 0; i < mol.getNumAtoms(); i++) {
|
|
auto atom = mol.getAtomWithIdx(i);
|
|
int atomCharge = getAtomicCharge(
|
|
atom->getAtomicNum() - atom->getFormalCharge(), valency[i]);
|
|
molCharge += atomCharge;
|
|
if (atom->getAtomicNum() == 6) {
|
|
if (atom->getDegree() == 2 && valency[i] == 2) {
|
|
molCharge += 1;
|
|
atomCharge = 0;
|
|
} else if (atom->getDegree() == 3 && (molCharge + 1 < charge)) {
|
|
molCharge += 2;
|
|
atomCharge = 1;
|
|
}
|
|
}
|
|
if (atomCharge != 0) {
|
|
atom->setFormalCharge(atomCharge);
|
|
}
|
|
}
|
|
}
|
|
|
|
void setAtomicRadicals(RWMol &mol, const std::vector<unsigned int> &valency,
|
|
int charge) {
|
|
for (unsigned int i = 0; i < mol.getNumAtoms(); i++) {
|
|
auto atom = mol.getAtomWithIdx(i);
|
|
int atomCharge = getAtomicCharge(atom->getAtomicNum(), valency[i]);
|
|
if (atomCharge != 0) {
|
|
atom->setNumRadicalElectrons(std::abs(charge));
|
|
}
|
|
}
|
|
}
|
|
|
|
bool checkSaturation(const std::vector<unsigned int> &order,
|
|
const std::vector<unsigned int> &valency) {
|
|
for (unsigned int i = 0; i < order.size(); i++) {
|
|
if (order[i] > valency[i]) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void setAtomMap(RWMol &mol) {
|
|
for (unsigned int i = 0; i < mol.getNumAtoms(); i++) {
|
|
auto atom = mol.getAtomWithIdx(i);
|
|
atom->setAtomMapNum(i + 1);
|
|
}
|
|
}
|
|
|
|
void setChirality(RWMol &mol) {
|
|
MolOps::sanitizeMol(mol);
|
|
MolOps::setDoubleBondNeighborDirections(mol, &mol.getConformer());
|
|
MolOps::assignStereochemistryFrom3D(mol);
|
|
}
|
|
|
|
void addBondOrdering(RWMol &mol,
|
|
const std::vector<std::vector<unsigned int>> &ordMat,
|
|
const std::vector<unsigned int> &valency,
|
|
bool allowChargedFragments, bool embedChiral,
|
|
bool useAtomMap, int charge) {
|
|
auto numAtoms = mol.getNumAtoms();
|
|
|
|
for (unsigned int i = 0; i < numAtoms; i++) {
|
|
for (unsigned int j = i + 1; j < numAtoms; j++) {
|
|
if (ordMat[i][j] == 0) {
|
|
continue;
|
|
} else if (ordMat[i][j] == 1) {
|
|
mol.getBondBetweenAtoms(i, j)->setBondType(Bond::BondType::SINGLE);
|
|
} else if (ordMat[i][j] == 2) {
|
|
mol.getBondBetweenAtoms(i, j)->setBondType(Bond::BondType::DOUBLE);
|
|
} else if (ordMat[i][j] == 3) {
|
|
mol.getBondBetweenAtoms(i, j)->setBondType(Bond::BondType::TRIPLE);
|
|
} else {
|
|
mol.getBondBetweenAtoms(i, j)->setBondType(Bond::BondType::SINGLE);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (allowChargedFragments) {
|
|
setAtomicCharges(mol, valency, charge);
|
|
} else {
|
|
setAtomicRadicals(mol, valency, charge);
|
|
}
|
|
|
|
if (MolOps::getFormalCharge(mol) != charge) {
|
|
std::stringstream ss;
|
|
ss << "Final molecular charge (" << charge << ") does not match input ("
|
|
<< MolOps::getFormalCharge(mol)
|
|
<< "); could not find valid bond ordering";
|
|
throw ValueErrorException(ss.str());
|
|
}
|
|
|
|
if (useAtomMap) {
|
|
setAtomMap(mol);
|
|
}
|
|
|
|
if (embedChiral) {
|
|
setChirality(mol);
|
|
}
|
|
}
|
|
|
|
void determineBondOrders(RWMol &mol, int charge, bool allowChargedFragments,
|
|
bool embedChiral, bool useAtomMap, size_t iterations) {
|
|
if (iterations == 0) {
|
|
// LazyCartesianProduct allows up to uint1024_t::max iterations,
|
|
// but it's unlikely we'll even get to size_t::max
|
|
iterations = std::numeric_limits<size_t>::max();
|
|
}
|
|
|
|
auto numAtoms = mol.getNumAtoms();
|
|
|
|
std::vector<std::vector<unsigned int>> conMat(
|
|
numAtoms, std::vector<unsigned int>(numAtoms, 0));
|
|
std::vector<unsigned int> origValency(numAtoms, 0);
|
|
|
|
for (const auto bond : mol.bonds()) {
|
|
auto i = bond->getBeginAtomIdx();
|
|
auto j = bond->getEndAtomIdx();
|
|
// conMat is symmetric
|
|
if (i > j) {
|
|
std::swap(i, j);
|
|
}
|
|
conMat[i][j]++;
|
|
origValency[i]++;
|
|
origValency[j]++;
|
|
}
|
|
|
|
std::vector<std::vector<unsigned int>> best(conMat);
|
|
std::vector<unsigned int> bestValency(origValency);
|
|
int bestSum = std::accumulate(origValency.begin(), origValency.end(), 0);
|
|
|
|
auto valenceCombos = getValenceCombinations(mol);
|
|
|
|
bool valencyValid = false;
|
|
bool chargeValid = false;
|
|
bool saturationValid = false;
|
|
|
|
ControlCHandler::reset();
|
|
|
|
while (!valenceCombos.atEnd()) {
|
|
if (--iterations == 0) {
|
|
throw MaxFindBondOrdersItersExceeded();
|
|
}
|
|
if (ControlCHandler::getGotSignal()) {
|
|
BOOST_LOG(rdWarningLog)
|
|
<< "Interrupted, cancelling determine Bond Orders" << std::endl;
|
|
return;
|
|
}
|
|
|
|
auto order = valenceCombos.next();
|
|
|
|
std::vector<unsigned int> unsat;
|
|
getUnsaturated(order, origValency, unsat);
|
|
// checks whether the atomic connectivity is valid for the current set of
|
|
// atomic valences
|
|
if (unsat.empty()) {
|
|
valencyValid = checkValency(order, origValency);
|
|
chargeValid = checkCharge(mol, origValency, charge);
|
|
saturationValid = checkSaturation(order, origValency);
|
|
|
|
if (valencyValid && chargeValid && saturationValid) {
|
|
addBondOrdering(mol, conMat, origValency, allowChargedFragments,
|
|
embedChiral, useAtomMap, charge);
|
|
return;
|
|
} else {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
std::vector<std::vector<unsigned int>> ordMat(conMat);
|
|
std::vector<unsigned int> valency(origValency);
|
|
bool newBonds = false;
|
|
do {
|
|
newBonds = false;
|
|
std::vector<unsigned int> unsat;
|
|
getUnsaturated(order, valency, unsat);
|
|
std::vector<std::pair<unsigned int, unsigned int>> unsatPairs;
|
|
getUnsaturatedPairs(conMat, unsat, unsatPairs);
|
|
|
|
if (!unsatPairs.empty()) {
|
|
Graph graph(unsatPairs.begin(), unsatPairs.end(), numAtoms);
|
|
std::vector<boost::graph_traits<Graph>::vertex_descriptor> mate(
|
|
numAtoms);
|
|
edmonds_maximum_cardinality_matching(graph, &mate[0]);
|
|
|
|
boost::graph_traits<Graph>::vertex_iterator vi, viEnd;
|
|
for (boost::tie(vi, viEnd) = vertices(graph); vi != viEnd; ++vi) {
|
|
if (mate[*vi] != boost::graph_traits<Graph>::null_vertex() &&
|
|
*vi < mate[*vi]) {
|
|
newBonds = true;
|
|
ordMat[*vi][mate[*vi]]++;
|
|
valency[*vi]++;
|
|
valency[mate[*vi]]++;
|
|
}
|
|
}
|
|
}
|
|
|
|
} while (newBonds);
|
|
|
|
valencyValid = checkValency(order, valency);
|
|
chargeValid = checkCharge(mol, valency, charge);
|
|
saturationValid = checkSaturation(order, valency);
|
|
|
|
if (valencyValid && chargeValid) {
|
|
if (saturationValid) {
|
|
addBondOrdering(mol, ordMat, valency, allowChargedFragments,
|
|
embedChiral, useAtomMap, charge);
|
|
return;
|
|
} else {
|
|
int sum = std::accumulate(valency.begin(), valency.end(), 0);
|
|
if (sum > bestSum) {
|
|
best = ordMat;
|
|
bestSum = sum;
|
|
bestValency = valency;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
addBondOrdering(mol, best, bestValency, allowChargedFragments, embedChiral,
|
|
useAtomMap, charge);
|
|
return;
|
|
} // determineBondOrdering()
|
|
|
|
void determineBonds(RWMol &mol, bool useHueckel, int charge, double covFactor,
|
|
bool allowChargedFragments, bool embedChiral,
|
|
bool useAtomMap, bool useVdw, size_t maxIterations) {
|
|
if (mol.getNumAtoms() <= 1) {
|
|
return;
|
|
}
|
|
determineConnectivity(mol, useHueckel, charge, covFactor, useVdw);
|
|
determineBondOrders(mol, charge, allowChargedFragments, embedChiral,
|
|
useAtomMap, maxIterations);
|
|
} // determineBonds()
|
|
|
|
} // namespace RDKit
|