mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
* move _embedPoints over to use EmbedArgs structure * move _embedWithExpTorsions over * a few pointer cleanups * step by step * more * wrap the ETKDG paramaters into a struct * fix a typo * additional cleanup * more * simplify the embedargs struct * pass embedParams instead of having them in embedargs * a bit more cleanup * response to code review
993 lines
38 KiB
C++
993 lines
38 KiB
C++
//
|
|
// Copyright (C) 2004-2018 Greg Landrum and Rational Discovery LLC
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
// define DEBUG_EMBEDDING 1
|
|
#include "Embedder.h"
|
|
#include <DistGeom/BoundsMatrix.h>
|
|
#include <DistGeom/DistGeomUtils.h>
|
|
#include <DistGeom/TriangleSmooth.h>
|
|
#include <DistGeom/ChiralViolationContrib.h>
|
|
#include "BoundsMatrixBuilder.h"
|
|
#include <ForceField/ForceField.h>
|
|
#include <GraphMol/ROMol.h>
|
|
#include <GraphMol/Atom.h>
|
|
#include <GraphMol/AtomIterators.h>
|
|
#include <GraphMol/RingInfo.h>
|
|
|
|
#include <GraphMol/Conformer.h>
|
|
#include <RDGeneral/types.h>
|
|
#include <RDGeneral/RDLog.h>
|
|
#include <RDGeneral/Exceptions.h>
|
|
|
|
#include <Geometry/Transform3D.h>
|
|
#include <Numerics/Alignment/AlignPoints.h>
|
|
#include <DistGeom/ChiralSet.h>
|
|
#include <GraphMol/MolOps.h>
|
|
#include <GraphMol/ForceFieldHelpers/CrystalFF/TorsionPreferences.h>
|
|
#include <boost/dynamic_bitset.hpp>
|
|
#include <iomanip>
|
|
#include <RDGeneral/RDThreads.h>
|
|
|
|
#ifdef RDK_THREADSAFE_SSS
|
|
#include <future>
|
|
#endif
|
|
|
|
//#define DEBUG_EMBEDDING 1
|
|
|
|
namespace {
|
|
const double ERROR_TOL = 0.00001;
|
|
// these tolerances, all to detect and filter out bogus conformations, are a
|
|
// delicate balance between sensitive enough to detect obviously bad
|
|
// conformations but not so sensitive that a bunch of ok conformations get
|
|
// filtered out, which slows down the whole conformation generation process
|
|
const double MAX_MINIMIZED_E_PER_ATOM = 0.05;
|
|
const double MAX_MINIMIZED_E_CONTRIB = 0.20;
|
|
const double MIN_TETRAHEDRAL_CHIRAL_VOL = 0.50;
|
|
const double TETRAHEDRAL_CENTERINVOLUME_TOL = 0.30;
|
|
} // namespace
|
|
|
|
namespace RDKit {
|
|
namespace DGeomHelpers {
|
|
|
|
//! Parameters corresponding to Sereina Riniker's KDG approach
|
|
const EmbedParameters KDG(0, // maxIterations
|
|
1, // numThreads
|
|
-1, // randomSeed
|
|
true, // clearConfs
|
|
false, // useRandomCoords
|
|
2.0, // boxSizeMult
|
|
true, // randNegEig
|
|
1, // numZeroFail
|
|
NULL, // coordMap
|
|
1e-3, // optimizerForceTol
|
|
false, // ignoreSmoothingFailures
|
|
true, // enforceChirality
|
|
false, // useExpTorsionAnglePrefs
|
|
true, // useBasicKnowledge
|
|
false, // verbose
|
|
5.0, // basinThresh
|
|
-1.0, // pruneRmsThresh
|
|
true, // onlyHeavyAtomsForRMS
|
|
1 // ETversion
|
|
);
|
|
|
|
//! Parameters corresponding to Sereina Riniker's ETDG approach
|
|
const EmbedParameters ETDG(0, // maxIterations
|
|
1, // numThreads
|
|
-1, // randomSeed
|
|
true, // clearConfs
|
|
false, // useRandomCoords
|
|
2.0, // boxSizeMult
|
|
true, // randNegEig
|
|
1, // numZeroFail
|
|
NULL, // coordMap
|
|
1e-3, // optimizerForceTol
|
|
false, // ignoreSmoothingFailures
|
|
false, // enforceChirality
|
|
true, // useExpTorsionAnglePrefs
|
|
false, // useBasicKnowledge
|
|
false, // verbose
|
|
5.0, // basinThresh
|
|
-1.0, // pruneRmsThresh
|
|
true, // onlyHeavyAtomsForRMS
|
|
1 // ETversion
|
|
);
|
|
//! Parameters corresponding to Sereina Riniker's ETKDG approach
|
|
const EmbedParameters ETKDG(0, // maxIterations
|
|
1, // numThreads
|
|
-1, // randomSeed
|
|
true, // clearConfs
|
|
false, // useRandomCoords
|
|
2.0, // boxSizeMult
|
|
true, // randNegEig
|
|
1, // numZeroFail
|
|
NULL, // coordMap
|
|
1e-3, // optimizerForceTol
|
|
false, // ignoreSmoothingFailures
|
|
true, // enforceChirality
|
|
true, // useExpTorsionAnglePrefs
|
|
true, // useBasicKnowledge
|
|
false, // verbose
|
|
5.0, // basinThresh
|
|
-1.0, // pruneRmsThresh
|
|
true, // onlyHeavyAtomsForRMS
|
|
1 // ETversion
|
|
);
|
|
|
|
//! Parameters corresponding to Sereina Riniker's ETKDG approach - version 2
|
|
const EmbedParameters ETKDGv2(0, // maxIterations
|
|
1, // numThreads
|
|
-1, // randomSeed
|
|
true, // clearConfs
|
|
false, // useRandomCoords
|
|
2.0, // boxSizeMult
|
|
true, // randNegEig
|
|
1, // numZeroFail
|
|
NULL, // coordMap
|
|
1e-3, // optimizerForceTol
|
|
false, // ignoreSmoothingFailures
|
|
true, // enforceChirality
|
|
true, // useExpTorsionAnglePrefs
|
|
true, // useBasicKnowledge
|
|
false, // verbose
|
|
5.0, // basinThresh
|
|
-1.0, // pruneRmsThresh
|
|
true, // onlyHeavyAtomsForRMS
|
|
2 // ETversion
|
|
);
|
|
|
|
namespace detail {
|
|
struct EmbedArgs {
|
|
boost::dynamic_bitset<> *confsOk;
|
|
bool fourD;
|
|
INT_VECT *fragMapping;
|
|
std::vector<Conformer *> *confs;
|
|
unsigned int fragIdx;
|
|
DistGeom::BoundsMatPtr mmat;
|
|
DistGeom::VECT_CHIRALSET const *chiralCenters;
|
|
DistGeom::VECT_CHIRALSET const *tetrahedralCarbons;
|
|
ForceFields::CrystalFF::CrystalFFDetails *etkdgDetails;
|
|
};
|
|
} // namespace detail
|
|
|
|
bool _volumeTest(const DistGeom::ChiralSetPtr &chiralSet,
|
|
const RDGeom::PointPtrVect &positions, bool verbose = false) {
|
|
RDGeom::Point3D p0((*positions[chiralSet->d_idx0])[0],
|
|
(*positions[chiralSet->d_idx0])[1],
|
|
(*positions[chiralSet->d_idx0])[2]);
|
|
RDGeom::Point3D p1((*positions[chiralSet->d_idx1])[0],
|
|
(*positions[chiralSet->d_idx1])[1],
|
|
(*positions[chiralSet->d_idx1])[2]);
|
|
RDGeom::Point3D p2((*positions[chiralSet->d_idx2])[0],
|
|
(*positions[chiralSet->d_idx2])[1],
|
|
(*positions[chiralSet->d_idx2])[2]);
|
|
RDGeom::Point3D p3((*positions[chiralSet->d_idx3])[0],
|
|
(*positions[chiralSet->d_idx3])[1],
|
|
(*positions[chiralSet->d_idx3])[2]);
|
|
RDGeom::Point3D p4((*positions[chiralSet->d_idx4])[0],
|
|
(*positions[chiralSet->d_idx4])[1],
|
|
(*positions[chiralSet->d_idx4])[2]);
|
|
|
|
// even if we are minimizing in higher dimension the chiral volume is
|
|
// calculated using only the first 3 dimensions
|
|
RDGeom::Point3D v1 = p0 - p1;
|
|
v1.normalize();
|
|
RDGeom::Point3D v2 = p0 - p2;
|
|
v2.normalize();
|
|
RDGeom::Point3D v3 = p0 - p3;
|
|
v3.normalize();
|
|
RDGeom::Point3D v4 = p0 - p4;
|
|
v4.normalize();
|
|
|
|
RDGeom::Point3D crossp = v1.crossProduct(v2);
|
|
double vol = crossp.dotProduct(v3);
|
|
if (verbose) std::cerr << " " << fabs(vol) << std::endl;
|
|
if (fabs(vol) < MIN_TETRAHEDRAL_CHIRAL_VOL) return false;
|
|
crossp = v1.crossProduct(v2);
|
|
vol = crossp.dotProduct(v4);
|
|
if (verbose) std::cerr << " " << fabs(vol) << std::endl;
|
|
if (fabs(vol) < MIN_TETRAHEDRAL_CHIRAL_VOL) return false;
|
|
crossp = v1.crossProduct(v3);
|
|
vol = crossp.dotProduct(v4);
|
|
if (verbose) std::cerr << " " << fabs(vol) << std::endl;
|
|
if (fabs(vol) < MIN_TETRAHEDRAL_CHIRAL_VOL) return false;
|
|
crossp = v2.crossProduct(v3);
|
|
vol = crossp.dotProduct(v4);
|
|
if (verbose) std::cerr << " " << fabs(vol) << std::endl;
|
|
if (fabs(vol) < MIN_TETRAHEDRAL_CHIRAL_VOL) return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool _sameSide(const RDGeom::Point3D &v1, const RDGeom::Point3D &v2,
|
|
const RDGeom::Point3D &v3, const RDGeom::Point3D &v4,
|
|
const RDGeom::Point3D &p0, double tol = 0.1) {
|
|
RDGeom::Point3D normal = (v2 - v1).crossProduct(v3 - v1);
|
|
double d1 = normal.dotProduct(v4 - v1);
|
|
double d2 = normal.dotProduct(p0 - v1);
|
|
// std::cerr << " " << d1 << " - " << d2 << std::endl;
|
|
if (fabs(d1) < tol || fabs(d2) < tol) return false;
|
|
return !((d1 < 0.) ^ (d2 < 0.));
|
|
}
|
|
bool _centerInVolume(unsigned int idx0, unsigned int idx1, unsigned int idx2,
|
|
unsigned int idx3, unsigned int idx4,
|
|
const RDGeom::PointPtrVect &positions, double tol,
|
|
bool verbose = false) {
|
|
RDGeom::Point3D p0((*positions[idx0])[0], (*positions[idx0])[1],
|
|
(*positions[idx0])[2]);
|
|
RDGeom::Point3D p1((*positions[idx1])[0], (*positions[idx1])[1],
|
|
(*positions[idx1])[2]);
|
|
RDGeom::Point3D p2((*positions[idx2])[0], (*positions[idx2])[1],
|
|
(*positions[idx2])[2]);
|
|
RDGeom::Point3D p3((*positions[idx3])[0], (*positions[idx3])[1],
|
|
(*positions[idx3])[2]);
|
|
RDGeom::Point3D p4((*positions[idx4])[0], (*positions[idx4])[1],
|
|
(*positions[idx4])[2]);
|
|
// RDGeom::Point3D centroid = (p1+p2+p3+p4)/4.;
|
|
if (verbose) {
|
|
std::cerr << _sameSide(p1, p2, p3, p4, p0, tol) << " "
|
|
<< _sameSide(p2, p3, p4, p1, p0, tol) << " "
|
|
<< _sameSide(p3, p4, p1, p2, p0, tol) << " "
|
|
<< _sameSide(p4, p1, p2, p3, p0, tol) << std::endl;
|
|
}
|
|
bool res = _sameSide(p1, p2, p3, p4, p0, tol) &&
|
|
_sameSide(p2, p3, p4, p1, p0, tol) &&
|
|
_sameSide(p3, p4, p1, p2, p0, tol) &&
|
|
_sameSide(p4, p1, p2, p3, p0, tol);
|
|
return res;
|
|
}
|
|
|
|
bool _centerInVolume(const DistGeom::ChiralSetPtr &chiralSet,
|
|
const RDGeom::PointPtrVect &positions, double tol = 0.1,
|
|
bool verbose = false) {
|
|
if (chiralSet->d_idx0 ==
|
|
chiralSet->d_idx4) { // this happens for three-coordinate centers
|
|
return true;
|
|
}
|
|
return _centerInVolume(chiralSet->d_idx0, chiralSet->d_idx1,
|
|
chiralSet->d_idx2, chiralSet->d_idx3,
|
|
chiralSet->d_idx4, positions, tol, verbose);
|
|
}
|
|
bool _boundsFulfilled(const std::vector<int> &atoms,
|
|
const DistGeom::BoundsMatrix &mmat,
|
|
const RDGeom::PointPtrVect &positions) {
|
|
// unsigned int N = mmat.numRows();
|
|
// std::cerr << N << " " << atoms.size() << std::endl;
|
|
// loop over all pair of atoms
|
|
for (unsigned int i = 0; i < atoms.size() - 1; ++i) {
|
|
for (unsigned int j = i + 1; j < atoms.size(); ++j) {
|
|
int a1 = atoms[i];
|
|
int a2 = atoms[j];
|
|
RDGeom::Point3D p0((*positions[a1])[0], (*positions[a1])[1],
|
|
(*positions[a1])[2]);
|
|
RDGeom::Point3D p1((*positions[a2])[0], (*positions[a2])[1],
|
|
(*positions[a2])[2]);
|
|
double d2 = (p0 - p1).length(); // distance
|
|
double lb = mmat.getLowerBound(a1, a2);
|
|
double ub = mmat.getUpperBound(a1, a2); // bounds
|
|
if (((d2 < lb) && (fabs(d2 - lb) > 0.1 * ub)) ||
|
|
((d2 > ub) && (fabs(d2 - ub) > 0.1 * ub))) {
|
|
#ifdef DEBUG_EMBEDDING
|
|
std::cerr << a1 << " " << a2 << ":" << d2 << " " << lb << " " << ub
|
|
<< " " << fabs(d2 - lb) << " " << fabs(d2 - ub) << std::endl;
|
|
#endif
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
namespace EmbeddingOps {
|
|
bool generateInitialCoords(RDGeom::PointPtrVect *positions,
|
|
const detail::EmbedArgs &eargs,
|
|
const EmbedParameters &embedParams,
|
|
RDNumeric::DoubleSymmMatrix &distMat,
|
|
RDKit::double_source_type *rng) {
|
|
bool gotCoords = false;
|
|
if (!embedParams.useRandomCoords) {
|
|
double largestDistance =
|
|
DistGeom::pickRandomDistMat(*eargs.mmat, distMat, *rng);
|
|
RDUNUSED_PARAM(largestDistance);
|
|
gotCoords = DistGeom::computeInitialCoords(distMat, *positions, *rng,
|
|
embedParams.randNegEig,
|
|
embedParams.numZeroFail);
|
|
} else {
|
|
double boxSize;
|
|
if (embedParams.boxSizeMult > 0) {
|
|
boxSize = 5. * embedParams.boxSizeMult;
|
|
} else {
|
|
boxSize = -1 * embedParams.boxSizeMult;
|
|
}
|
|
gotCoords = DistGeom::computeRandomCoords(*positions, boxSize, *rng);
|
|
}
|
|
return gotCoords;
|
|
}
|
|
bool firstMinimization(RDGeom::PointPtrVect *positions,
|
|
const detail::EmbedArgs &eargs,
|
|
const EmbedParameters &embedParams) {
|
|
bool gotCoords = true;
|
|
std::unique_ptr<ForceFields::ForceField> field(DistGeom::constructForceField(
|
|
*eargs.mmat, *positions, *eargs.chiralCenters, 1.0, 0.1, nullptr,
|
|
embedParams.basinThresh));
|
|
unsigned int nPasses = 0;
|
|
field->initialize();
|
|
if (field->calcEnergy() > ERROR_TOL) {
|
|
int needMore = 1;
|
|
while (needMore) {
|
|
needMore = field->minimize(400, embedParams.optimizerForceTol);
|
|
++nPasses;
|
|
}
|
|
}
|
|
std::vector<double> e_contribs;
|
|
double local_e = field->calcEnergy(&e_contribs);
|
|
|
|
#ifdef DEBUG_EMBEDDING
|
|
std::cerr << " Energy : " << local_e / positions.size() << " "
|
|
<< *(std::max_element(e_contribs.begin(), e_contribs.end()))
|
|
<< std::endl;
|
|
#endif
|
|
|
|
// check that neither the energy nor any of the contributions to it are
|
|
// too high (this is part of github #971)
|
|
if (local_e / positions->size() >= MAX_MINIMIZED_E_PER_ATOM ||
|
|
(e_contribs.size() &&
|
|
*(std::max_element(e_contribs.begin(), e_contribs.end())) >
|
|
MAX_MINIMIZED_E_CONTRIB)) {
|
|
#ifdef DEBUG_EMBEDDING
|
|
std::cerr << " Energy fail: " << local_e / positions->size() << " "
|
|
<< *(std::max_element(e_contribs.begin(), e_contribs.end()))
|
|
<< std::endl;
|
|
#endif
|
|
gotCoords = false;
|
|
}
|
|
return gotCoords;
|
|
}
|
|
|
|
bool checkTetrahedralCenters(const RDGeom::PointPtrVect *positions,
|
|
const detail::EmbedArgs &eargs,
|
|
const EmbedParameters &embedParams) {
|
|
RDUNUSED_PARAM(embedParams);
|
|
// for each of the atoms in the "tetrahedralCarbons" list, make sure
|
|
// that there is a minimum volume around them and that they are inside
|
|
// that volume. (this is part of github #971)
|
|
for (const auto tetSet : *eargs.tetrahedralCarbons) {
|
|
// it could happen that the centroid is outside the volume defined
|
|
// by the other
|
|
// four points. That is also a fail.
|
|
if (!_volumeTest(tetSet, *positions) ||
|
|
!_centerInVolume(tetSet, *positions, TETRAHEDRAL_CENTERINVOLUME_TOL)) {
|
|
#ifdef DEBUG_EMBEDDING
|
|
std::cerr << " fail2! (" << tetSet->d_idx0 << ") iter: " << iter
|
|
<< " vol: " << _volumeTest(tetSet, *positions, true)
|
|
<< " center: "
|
|
<< _centerInVolume(tetSet, *positions,
|
|
TETRAHEDRAL_CENTERINVOLUME_TOL, true)
|
|
<< std::endl;
|
|
#endif
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
bool checkChiralCenters(const RDGeom::PointPtrVect *positions,
|
|
const detail::EmbedArgs &eargs,
|
|
const EmbedParameters &embedParams) {
|
|
RDUNUSED_PARAM(embedParams);
|
|
// check the chiral volume:
|
|
for (const auto chiralSet : *eargs.chiralCenters) {
|
|
double vol = DistGeom::ChiralViolationContrib::calcChiralVolume(
|
|
chiralSet->d_idx1, chiralSet->d_idx2, chiralSet->d_idx3,
|
|
chiralSet->d_idx4, *positions);
|
|
double lb = chiralSet->getLowerVolumeBound();
|
|
double ub = chiralSet->getUpperVolumeBound();
|
|
if ((lb > 0 && vol < lb && (lb - vol) / lb > .2) ||
|
|
(ub < 0 && vol > ub && (vol - ub) / ub > .2)) {
|
|
#ifdef DEBUG_EMBEDDING
|
|
std::cerr << " fail! (" << chiralSet->d_idx0 << ") iter: " << iter << " "
|
|
<< vol << " " << lb << "-" << ub << std::endl;
|
|
#endif
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
bool minimizeFourthDimension(RDGeom::PointPtrVect *positions,
|
|
const detail::EmbedArgs &eargs,
|
|
const EmbedParameters &embedParams) {
|
|
// now redo the minimization if we have a chiral center
|
|
// or have started from random coords. This
|
|
// time removing the chiral constraints and
|
|
// increasing the weight on the fourth dimension
|
|
|
|
std::unique_ptr<ForceFields::ForceField> field2(DistGeom::constructForceField(
|
|
*eargs.mmat, *positions, *eargs.chiralCenters, 0.2, 1.0, nullptr,
|
|
embedParams.basinThresh));
|
|
field2->initialize();
|
|
// std::cerr<<"FIELD2 E: "<<field2->calcEnergy()<<std::endl;
|
|
if (field2->calcEnergy() > ERROR_TOL) {
|
|
int needMore = 1;
|
|
int nPasses2 = 0;
|
|
while (needMore) {
|
|
needMore = field2->minimize(200, embedParams.optimizerForceTol);
|
|
++nPasses2;
|
|
}
|
|
// std::cerr<<" "<<field2->calcEnergy()<<" after npasses2:
|
|
// "<<nPasses2<<std::endl;
|
|
}
|
|
return true;
|
|
}
|
|
// the minimization using experimental torsion angle preferences
|
|
bool minimizeWithExpTorsions(RDGeom::PointPtrVect &positions,
|
|
const detail::EmbedArgs &eargs,
|
|
const EmbedParameters &embedParams) {
|
|
PRECONDITION(eargs.etkdgDetails, "bogus etkdgDetails pointer");
|
|
bool planar = true;
|
|
|
|
// convert to 3D positions and create coordMap
|
|
RDGeom::Point3DPtrVect positions3D;
|
|
for (auto &position : positions) {
|
|
positions3D.push_back(
|
|
new RDGeom::Point3D((*position)[0], (*position)[1], (*position)[2]));
|
|
}
|
|
|
|
// create the force field
|
|
std::unique_ptr<ForceFields::ForceField> field;
|
|
if (embedParams.useBasicKnowledge) { // ETKDG or KDG
|
|
field.reset(DistGeom::construct3DForceField(
|
|
*eargs.mmat, positions3D, *eargs.etkdgDetails));
|
|
} else { // plain ETDG
|
|
field.reset(DistGeom::constructPlain3DForceField(
|
|
*eargs.mmat, positions3D, *eargs.etkdgDetails));
|
|
}
|
|
|
|
// minimize!
|
|
field->initialize();
|
|
if (field->calcEnergy() > ERROR_TOL) {
|
|
// while (needMore) {
|
|
field->minimize(300, embedParams.optimizerForceTol);
|
|
// ++nPasses;
|
|
//}
|
|
}
|
|
// std::cout << field->calcEnergy() << std::endl;
|
|
|
|
// check for planarity if ETKDG or KDG
|
|
if (embedParams.useBasicKnowledge) {
|
|
// create a force field with only the impropers
|
|
std::unique_ptr<ForceFields::ForceField> field2(
|
|
DistGeom::construct3DImproperForceField(
|
|
*eargs.mmat, positions3D, eargs.etkdgDetails->improperAtoms,
|
|
eargs.etkdgDetails->atomNums));
|
|
field2->initialize();
|
|
// check if the energy is low enough
|
|
double planarityTolerance = 0.7;
|
|
if (field2->calcEnergy() >
|
|
eargs.etkdgDetails->improperAtoms.size() * planarityTolerance) {
|
|
#ifdef DEBUG_EMBEDDING
|
|
std::cerr << " planar fail: " << field2->calcEnergy() << " "
|
|
<< eargs.etkdgDetails->improperAtoms.size() * planarityTolerance
|
|
<< std::endl;
|
|
#endif
|
|
planar = false;
|
|
}
|
|
}
|
|
|
|
// overwrite positions and delete the 3D ones
|
|
for (unsigned int i = 0; i < positions3D.size(); ++i) {
|
|
(*positions[i])[0] = (*positions3D[i])[0];
|
|
(*positions[i])[1] = (*positions3D[i])[1];
|
|
(*positions[i])[2] = (*positions3D[i])[2];
|
|
delete positions3D[i];
|
|
}
|
|
|
|
return planar;
|
|
}
|
|
|
|
bool finalChiralChecks(RDGeom::PointPtrVect *positions,
|
|
const detail::EmbedArgs &eargs,
|
|
const EmbedParameters &embedParams) {
|
|
RDUNUSED_PARAM(embedParams);
|
|
// "distance matrix" chirality test
|
|
std::set<int> atoms;
|
|
for (const auto chiralSet : *eargs.chiralCenters) {
|
|
if (chiralSet->d_idx0 != chiralSet->d_idx4) {
|
|
atoms.insert(chiralSet->d_idx0);
|
|
atoms.insert(chiralSet->d_idx1);
|
|
atoms.insert(chiralSet->d_idx2);
|
|
atoms.insert(chiralSet->d_idx3);
|
|
atoms.insert(chiralSet->d_idx4);
|
|
}
|
|
}
|
|
std::vector<int> atomsToCheck(atoms.begin(), atoms.end());
|
|
if (atomsToCheck.size() > 0) {
|
|
if (!_boundsFulfilled(atomsToCheck, *eargs.mmat, *positions)) {
|
|
#ifdef DEBUG_EMBEDDING
|
|
std::cerr << " fail3a! (" << atomsToCheck[0] << ") iter: " << iter
|
|
<< std::endl;
|
|
#endif
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// "center in volume" chirality test
|
|
for (const auto chiralSet : *eargs.chiralCenters) {
|
|
// it could happen that the centroid is outside the volume defined
|
|
// by the other four points. That is also a fail.
|
|
if (!_centerInVolume(chiralSet, *positions)) {
|
|
#ifdef DEBUG_EMBEDDING
|
|
std::cerr << " fail3b! (" << chiralSet->d_idx0 << ") iter: " << iter
|
|
<< std::endl;
|
|
#endif
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool embedPoints(RDGeom::PointPtrVect *positions, detail::EmbedArgs eargs,
|
|
EmbedParameters embedParams, int seed) {
|
|
PRECONDITION(positions, "bogus positions");
|
|
if (embedParams.maxIterations == 0) {
|
|
embedParams.maxIterations = 10 * positions->size();
|
|
}
|
|
RDNumeric::DoubleSymmMatrix distMat(positions->size(), 0.0);
|
|
|
|
// The basin threshold just gets us into trouble when we're using
|
|
// random coordinates since it ends up ignoring 1-4 (and higher)
|
|
// interactions. This causes us to get folded-up (and self-penetrating)
|
|
// conformations for large flexible molecules
|
|
if (embedParams.useRandomCoords) embedParams.basinThresh = 1e8;
|
|
|
|
RDKit::double_source_type *rng = nullptr;
|
|
RDKit::rng_type *generator;
|
|
RDKit::uniform_double *distrib;
|
|
CHECK_INVARIANT(seed >= -1,
|
|
"random seed must either be positive, zero, or negative one");
|
|
if (seed > -1) {
|
|
generator = new RDKit::rng_type(42u);
|
|
generator->seed(seed);
|
|
distrib = new RDKit::uniform_double(0.0, 1.0);
|
|
rng = new RDKit::double_source_type(*generator, *distrib);
|
|
} else {
|
|
rng = &RDKit::getDoubleRandomSource();
|
|
}
|
|
|
|
bool gotCoords = false;
|
|
unsigned int iter = 0;
|
|
while ((gotCoords == false) && (iter < embedParams.maxIterations)) {
|
|
++iter;
|
|
gotCoords = EmbeddingOps::generateInitialCoords(positions, eargs,
|
|
embedParams, distMat, rng);
|
|
#ifdef DEBUG_EMBEDDING
|
|
if (!gotCoords) {
|
|
std::cerr << "Initial embedding failed!, Iter: " << iter << std::endl;
|
|
}
|
|
#endif
|
|
if (gotCoords) {
|
|
gotCoords =
|
|
EmbeddingOps::firstMinimization(positions, eargs, embedParams);
|
|
if (gotCoords) {
|
|
gotCoords = EmbeddingOps::checkTetrahedralCenters(positions, eargs,
|
|
embedParams);
|
|
}
|
|
|
|
// Check if any of our chiral centers are badly out of whack.
|
|
if (gotCoords && embedParams.enforceChirality &&
|
|
eargs.chiralCenters->size() > 0) {
|
|
gotCoords =
|
|
EmbeddingOps::checkChiralCenters(positions, eargs, embedParams);
|
|
}
|
|
// redo the minimization if we have a chiral center
|
|
// or have started from random coords.
|
|
if (gotCoords &&
|
|
(eargs.chiralCenters->size() > 0 || embedParams.useRandomCoords)) {
|
|
gotCoords = EmbeddingOps::minimizeFourthDimension(positions, eargs,
|
|
embedParams);
|
|
}
|
|
|
|
// (ET)(K)DG
|
|
if (gotCoords && (embedParams.useExpTorsionAnglePrefs ||
|
|
embedParams.useBasicKnowledge)) {
|
|
gotCoords = EmbeddingOps::minimizeWithExpTorsions(*positions, eargs,
|
|
embedParams);
|
|
}
|
|
// test if chirality is correct
|
|
if (embedParams.enforceChirality && gotCoords &&
|
|
(eargs.chiralCenters->size() > 0)) {
|
|
gotCoords =
|
|
EmbeddingOps::finalChiralChecks(positions, eargs, embedParams);
|
|
}
|
|
} // if(gotCoords)
|
|
} // while
|
|
if (seed > -1 && rng) {
|
|
delete rng;
|
|
delete generator;
|
|
delete distrib;
|
|
}
|
|
return gotCoords;
|
|
}
|
|
|
|
void findChiralSets(const ROMol &mol, DistGeom::VECT_CHIRALSET &chiralCenters,
|
|
DistGeom::VECT_CHIRALSET &tetrahedralCenters,
|
|
const std::map<int, RDGeom::Point3D> *coordMap) {
|
|
for (const auto &atom : mol.atoms()) {
|
|
if (atom->getAtomicNum() != 1) { // skip hydrogens
|
|
Atom::ChiralType chiralType = atom->getChiralTag();
|
|
if ((chiralType == Atom::CHI_TETRAHEDRAL_CW ||
|
|
chiralType == Atom::CHI_TETRAHEDRAL_CCW) ||
|
|
((atom->getAtomicNum() == 6 || atom->getAtomicNum() == 7) &&
|
|
atom->getDegree() == 4)) {
|
|
// make a chiral set from the neighbors
|
|
INT_VECT nbrs;
|
|
nbrs.reserve(4);
|
|
// find the neighbors of this atom and enter them into the
|
|
// nbr list
|
|
ROMol::OEDGE_ITER beg, end;
|
|
boost::tie(beg, end) = mol.getAtomBonds(atom);
|
|
while (beg != end) {
|
|
nbrs.push_back(mol[*beg]->getOtherAtom(atom)->getIdx());
|
|
++beg;
|
|
}
|
|
// if we have less than 4 heavy atoms as neighbors,
|
|
// we need to include the chiral center into the mix
|
|
// we should at least have 3 though
|
|
bool includeSelf = false;
|
|
RDUNUSED_PARAM(includeSelf);
|
|
CHECK_INVARIANT(nbrs.size() >= 3, "Cannot be a chiral center");
|
|
|
|
if (nbrs.size() < 4) {
|
|
nbrs.insert(nbrs.end(), atom->getIdx());
|
|
includeSelf = true;
|
|
}
|
|
|
|
// now create a chiral set and set the upper and lower bound on the
|
|
// volume
|
|
if (chiralType == Atom::CHI_TETRAHEDRAL_CCW) {
|
|
// postive chiral volume
|
|
auto *cset = new DistGeom::ChiralSet(atom->getIdx(), nbrs[0], nbrs[1],
|
|
nbrs[2], nbrs[3], 5.0, 100.0);
|
|
DistGeom::ChiralSetPtr cptr(cset);
|
|
chiralCenters.push_back(cptr);
|
|
} else if (chiralType == Atom::CHI_TETRAHEDRAL_CW) {
|
|
auto *cset = new DistGeom::ChiralSet(atom->getIdx(), nbrs[0], nbrs[1],
|
|
nbrs[2], nbrs[3], -100.0, -5.0);
|
|
DistGeom::ChiralSetPtr cptr(cset);
|
|
chiralCenters.push_back(cptr);
|
|
} else {
|
|
if ((coordMap && coordMap->find(atom->getIdx()) != coordMap->end()) ||
|
|
(mol.getRingInfo()->isInitialized() &&
|
|
(mol.getRingInfo()->numAtomRings(atom->getIdx()) < 2 ||
|
|
mol.getRingInfo()->isAtomInRingOfSize(atom->getIdx(), 3)))) {
|
|
// we only want to these tests for ring atoms that are not part of
|
|
// the coordMap
|
|
// there's no sense doing 3-rings because those are a nightmare
|
|
} else {
|
|
auto *cset = new DistGeom::ChiralSet(
|
|
atom->getIdx(), nbrs[0], nbrs[1], nbrs[2], nbrs[3], 0.0, 0.0);
|
|
DistGeom::ChiralSetPtr cptr(cset);
|
|
tetrahedralCenters.push_back(cptr);
|
|
}
|
|
}
|
|
} // if block -chirality check
|
|
} // if block - heavy atom check
|
|
} // for loop over atoms
|
|
} // end of _findChiralSets
|
|
|
|
void adjustBoundsMatFromCoordMap(
|
|
DistGeom::BoundsMatPtr mmat, unsigned int nAtoms,
|
|
const std::map<int, RDGeom::Point3D> *coordMap) {
|
|
RDUNUSED_PARAM(nAtoms);
|
|
for (auto iIt = coordMap->begin(); iIt != coordMap->end(); ++iIt) {
|
|
unsigned int iIdx = iIt->first;
|
|
const RDGeom::Point3D &iPoint = iIt->second;
|
|
auto jIt = iIt;
|
|
while (++jIt != coordMap->end()) {
|
|
unsigned int jIdx = jIt->first;
|
|
const RDGeom::Point3D &jPoint = jIt->second;
|
|
double dist = (iPoint - jPoint).length();
|
|
mmat->setUpperBound(iIdx, jIdx, dist);
|
|
mmat->setLowerBound(iIdx, jIdx, dist);
|
|
}
|
|
}
|
|
}
|
|
|
|
bool setupInitialBoundsMatrix(
|
|
ROMol *mol, DistGeom::BoundsMatPtr mmat,
|
|
const std::map<int, RDGeom::Point3D> *coordMap,
|
|
const EmbedParameters ¶ms,
|
|
ForceFields::CrystalFF::CrystalFFDetails &etkdgDetails) {
|
|
PRECONDITION(mol, "bad molecule");
|
|
unsigned int nAtoms = mol->getNumAtoms();
|
|
if (params.useExpTorsionAnglePrefs || params.useBasicKnowledge) {
|
|
ForceFields::CrystalFF::getExperimentalTorsions(
|
|
*mol, etkdgDetails, params.useExpTorsionAnglePrefs,
|
|
params.useBasicKnowledge, params.ETversion, params.verbose);
|
|
setTopolBounds(*mol, mmat, etkdgDetails.bonds, etkdgDetails.angles, true,
|
|
false);
|
|
etkdgDetails.atomNums.resize(nAtoms);
|
|
for (unsigned int i = 0; i < nAtoms; ++i) {
|
|
etkdgDetails.atomNums[i] = (*mol).getAtomWithIdx(i)->getAtomicNum();
|
|
}
|
|
} else {
|
|
setTopolBounds(*mol, mmat, true, false);
|
|
}
|
|
double tol = 0.0;
|
|
if (coordMap) {
|
|
adjustBoundsMatFromCoordMap(mmat, nAtoms, coordMap);
|
|
tol = 0.05;
|
|
}
|
|
if (!DistGeom::triangleSmoothBounds(mmat, tol)) {
|
|
// ok this bound matrix failed to triangle smooth - re-compute the
|
|
// bounds matrix without 15 bounds and with VDW scaling
|
|
initBoundsMat(mmat);
|
|
setTopolBounds(*mol, mmat, false, true);
|
|
|
|
if (coordMap) {
|
|
adjustBoundsMatFromCoordMap(mmat, nAtoms, coordMap);
|
|
}
|
|
|
|
// try triangle smoothing again
|
|
if (!DistGeom::triangleSmoothBounds(mmat, tol)) {
|
|
// ok, we're not going to be able to smooth this,
|
|
if (params.ignoreSmoothingFailures) {
|
|
// proceed anyway with the more relaxed bounds matrix
|
|
initBoundsMat(mmat);
|
|
setTopolBounds(*mol, mmat, false, true);
|
|
|
|
if (coordMap) {
|
|
adjustBoundsMatFromCoordMap(mmat, nAtoms, coordMap);
|
|
}
|
|
} else {
|
|
BOOST_LOG(rdWarningLog)
|
|
<< "Could not triangle bounds smooth molecule." << std::endl;
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
} // namespace EmbeddingOps
|
|
|
|
void _fillAtomPositions(RDGeom::Point3DConstPtrVect &pts, const Conformer &conf,
|
|
const ROMol &mol, bool onlyHeavyAtomsForRMS) {
|
|
unsigned int na = conf.getNumAtoms();
|
|
pts.clear();
|
|
pts.reserve(na);
|
|
for (const auto &atom : mol.atoms()) {
|
|
// FIX: should we include D and T here?
|
|
if (onlyHeavyAtomsForRMS && atom->getAtomicNum() == 1) {
|
|
continue;
|
|
}
|
|
pts.push_back(&conf.getAtomPos(atom->getIdx()));
|
|
}
|
|
}
|
|
|
|
bool _isConfFarFromRest(const ROMol &mol, const Conformer &conf,
|
|
double threshold, bool onlyHeavyAtomsForRMS) {
|
|
// NOTE: it is tempting to use some triangle inequality to prune
|
|
// conformations here but some basic testing has shown very
|
|
// little advantage and given that the time for pruning fades in
|
|
// comparison to embedding - we will use a simple for loop below
|
|
// over all conformation until we find a match
|
|
RDGeom::Point3DConstPtrVect refPoints, prbPoints;
|
|
_fillAtomPositions(refPoints, conf, mol, onlyHeavyAtomsForRMS);
|
|
|
|
double ssrThres = conf.getNumAtoms() * threshold * threshold;
|
|
for (ROMol::ConstConformerIterator confi = mol.beginConformers();
|
|
confi != mol.endConformers(); ++confi) {
|
|
_fillAtomPositions(prbPoints, *(*confi), mol, onlyHeavyAtomsForRMS);
|
|
RDGeom::Transform3D trans;
|
|
auto ssr = RDNumeric::Alignments::AlignPoints(refPoints, prbPoints, trans);
|
|
if (ssr < ssrThres) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
namespace detail {
|
|
|
|
template <class T>
|
|
bool multiplication_overflows_(T a, T b) {
|
|
// a * b > c if and only if a > c / b
|
|
if (a == 0 || b == 0) return false;
|
|
if (a > std::numeric_limits<T>::max() / b) return true;
|
|
return false;
|
|
}
|
|
|
|
void embedHelper_(int threadId, int numThreads, EmbedArgs *eargs,
|
|
const EmbedParameters *params) {
|
|
PRECONDITION(eargs, "bogus eargs");
|
|
PRECONDITION(params, "bogus params");
|
|
unsigned int nAtoms = eargs->mmat->numRows();
|
|
RDGeom::PointPtrVect positions(nAtoms);
|
|
for (unsigned int i = 0; i < nAtoms; ++i) {
|
|
if (eargs->fourD) {
|
|
positions[i] = new RDGeom::PointND(4);
|
|
} else {
|
|
positions[i] = new RDGeom::Point3D();
|
|
}
|
|
}
|
|
for (size_t ci = 0; ci < eargs->confs->size(); ci++) {
|
|
if (rdcast<int>(ci % numThreads) != threadId) continue;
|
|
if (!(*eargs->confsOk)[ci]) {
|
|
// we call this function for each fragment in a molecule,
|
|
// if one of the fragments has already failed, there's no
|
|
// sense in embedding this one
|
|
continue;
|
|
}
|
|
|
|
CHECK_INVARIANT(
|
|
params->randomSeed >= -1,
|
|
"random seed must either be positive, zero, or negative one");
|
|
int new_seed = params->randomSeed;
|
|
if (new_seed > -1) {
|
|
if (!multiplication_overflows_(rdcast<int>(ci + 1), params->randomSeed)) {
|
|
// old method of computing a new seed
|
|
new_seed = (ci + 1) * params->randomSeed;
|
|
} else {
|
|
// If the above simple multiplication will overflow, use a
|
|
// cheap and easy way to hash the conformer index and seed
|
|
// together: for N'ary numerical system, where N is the
|
|
// maximum possible value of the pair of numbers. The
|
|
// following will generate unique integers:
|
|
// hash(a, b) = a + b * N
|
|
size_t big_seed = rdcast<size_t>(params->randomSeed);
|
|
size_t max_val = std::max(ci + 1, big_seed);
|
|
size_t big_num = big_seed + max_val * (ci + 1);
|
|
// only grab the first 31 bits xor'd with the next 31 bits to
|
|
// make sure its positive, careful, the 'ULL' is important
|
|
// here, 0x7fffffff is the 'int' type because of C default
|
|
// number semantics and that we definitely don't want!
|
|
const size_t positive_int_mask = 0x7fffffffULL;
|
|
size_t folded_num = (big_num & positive_int_mask) ^ (big_num >> 31ULL);
|
|
new_seed = rdcast<int>(folded_num & positive_int_mask);
|
|
}
|
|
}
|
|
CHECK_INVARIANT(new_seed >= -1,
|
|
"Something went wrong calculating a new seed");
|
|
|
|
bool gotCoords =
|
|
EmbeddingOps::embedPoints(&positions, *eargs, *params, new_seed);
|
|
|
|
// copy the coordinates into the correct conformer
|
|
if (gotCoords) {
|
|
Conformer *conf = (*eargs->confs)[ci];
|
|
unsigned int fragAtomIdx = 0;
|
|
for (unsigned int i = 0; i < conf->getNumAtoms(); ++i) {
|
|
if (!eargs->fragMapping ||
|
|
(*eargs->fragMapping)[i] == static_cast<int>(eargs->fragIdx)) {
|
|
conf->setAtomPos(i, RDGeom::Point3D((*positions[fragAtomIdx])[0],
|
|
(*positions[fragAtomIdx])[1],
|
|
(*positions[fragAtomIdx])[2]));
|
|
++fragAtomIdx;
|
|
}
|
|
}
|
|
} else {
|
|
(*eargs->confsOk)[ci] = 0;
|
|
}
|
|
}
|
|
for (unsigned int i = 0; i < nAtoms; ++i) {
|
|
delete positions[i];
|
|
}
|
|
}
|
|
} // end of namespace detail
|
|
|
|
void EmbedMultipleConfs(ROMol &mol, INT_VECT &res, unsigned int numConfs,
|
|
const EmbedParameters ¶ms) {
|
|
if (!mol.getNumAtoms()) {
|
|
throw ValueErrorException("molecule has no atoms");
|
|
}
|
|
if (params.ETversion < 1 || params.ETversion > 2) {
|
|
throw ValueErrorException(
|
|
"Only version 1 and 2 of the experimental "
|
|
"torsion-angle preferences (ETversion) supported");
|
|
}
|
|
|
|
// initialize the conformers we're going to be creating:
|
|
if (params.clearConfs) {
|
|
res.clear();
|
|
mol.clearConformers();
|
|
}
|
|
std::vector<Conformer *> confs(numConfs);
|
|
for (unsigned int i = 0; i < numConfs; ++i) {
|
|
confs[i] = new Conformer(mol.getNumAtoms());
|
|
}
|
|
|
|
boost::dynamic_bitset<> confsOk(numConfs);
|
|
confsOk.set();
|
|
|
|
INT_VECT fragMapping;
|
|
std::vector<ROMOL_SPTR> molFrags =
|
|
MolOps::getMolFrags(mol, true, &fragMapping);
|
|
const std::map<int, RDGeom::Point3D> *coordMap = params.coordMap;
|
|
if (molFrags.size() > 1 && coordMap) {
|
|
BOOST_LOG(rdWarningLog)
|
|
<< "Constrained conformer generation (via the coordMap argument) "
|
|
"does not work with molecules that have multiple fragments."
|
|
<< std::endl;
|
|
coordMap = nullptr;
|
|
}
|
|
|
|
// we will generate conformations for each fragment in the molecule
|
|
// separately, so loop over them:
|
|
for (unsigned int fragIdx = 0; fragIdx < molFrags.size(); ++fragIdx) {
|
|
ROMOL_SPTR piece = molFrags[fragIdx];
|
|
unsigned int nAtoms = piece->getNumAtoms();
|
|
|
|
// create and initialize the distance bounds matrix
|
|
DistGeom::BoundsMatPtr mmat(new DistGeom::BoundsMatrix(nAtoms));
|
|
initBoundsMat(mmat);
|
|
|
|
ForceFields::CrystalFF::CrystalFFDetails etkdgDetails;
|
|
if (!EmbeddingOps::setupInitialBoundsMatrix(piece.get(), mmat, coordMap,
|
|
params, etkdgDetails)) {
|
|
// return if we couldn't setup the bounds matrix
|
|
// possible causes include a triangle smoothing failure
|
|
return;
|
|
}
|
|
|
|
// find all the chiral centers in the molecule
|
|
MolOps::assignStereochemistry(*piece);
|
|
DistGeom::VECT_CHIRALSET chiralCenters;
|
|
DistGeom::VECT_CHIRALSET tetrahedralCarbons;
|
|
EmbeddingOps::findChiralSets(*piece, chiralCenters, tetrahedralCarbons,
|
|
coordMap);
|
|
|
|
// if we have any chiral centers or are using random coordinates, we will
|
|
// first embed the molecule in four dimensions, otherwise we will use 3D
|
|
bool fourD = false;
|
|
if (params.useRandomCoords || chiralCenters.size() > 0) {
|
|
fourD = true;
|
|
}
|
|
int numThreads = getNumThreadsToUse(params.numThreads);
|
|
|
|
// do the embedding, using multiple threads if requested
|
|
detail::EmbedArgs eargs = {
|
|
&confsOk, fourD, &fragMapping, &confs, fragIdx,
|
|
mmat, &chiralCenters, &tetrahedralCarbons, &etkdgDetails};
|
|
if (numThreads == 1) {
|
|
detail::embedHelper_(0, 1, &eargs, ¶ms);
|
|
}
|
|
#ifdef RDK_THREADSAFE_SSS
|
|
else {
|
|
std::vector<std::future<void>> tg;
|
|
for (int tid = 0; tid < numThreads; ++tid) {
|
|
tg.emplace_back(std::async(std::launch::async, detail::embedHelper_,
|
|
tid, numThreads, &eargs, ¶ms));
|
|
}
|
|
for (auto &fut : tg) {
|
|
fut.get();
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
for (unsigned int ci = 0; ci < confs.size(); ++ci) {
|
|
Conformer *conf = confs[ci];
|
|
if (confsOk[ci]) {
|
|
// check if we are pruning away conformations and
|
|
// a closeby conformation has already been chosen :
|
|
if (params.pruneRmsThresh > 0.0 &&
|
|
!_isConfFarFromRest(mol, *conf, params.pruneRmsThresh,
|
|
params.onlyHeavyAtomsForRMS)) {
|
|
delete conf;
|
|
} else {
|
|
int confId = (int)mol.addConformer(conf, true);
|
|
res.push_back(confId);
|
|
}
|
|
} else {
|
|
delete conf;
|
|
}
|
|
}
|
|
}
|
|
|
|
} // end of namespace DGeomHelpers
|
|
} // end of namespace RDKit
|