Dev/cleanup bad confs (#973)

* passes all tests, but is still not 100 percent there

* generalize _centerInVolume for possible future use

* better testing of tetrahedral centers
update tests

* only test ring atoms

* handle 4-coordinate n too

* add a volume check; not all tests pass

* turn off debug printing

* rearrange the order of tests.
if this is not done, we get a seg fault when the github55 test runs.
the whole thing needs to be run under valgrind to track this down

* clear up a memory leak

* a bit more documentation
add a constant for one of the threshold values

* get more permissive on the energy tests.
Only do the extra tetrahedral tests for atoms in at least two rings.

* add a DEBUG_EMBEDDING option to make tracking down failures easier

* enable better debugging when the flag is set

* remove a FIX

* remove some debug printing from the tests
This commit is contained in:
Greg Landrum
2016-07-25 20:26:19 +02:00
committed by Brian Kelley
parent 5106eb8d3e
commit 2afeb3b086
6 changed files with 336 additions and 136 deletions

View File

@@ -185,8 +185,9 @@ int ForceField::minimize(unsigned int maxIts, double forceTol,
return minimize(0, NULL, maxIts, forceTol, energyTol);
}
int ForceField::minimize(unsigned int snapshotFreq, RDKit::SnapshotVect *snapshotVect,
unsigned int maxIts, double forceTol, double energyTol) {
int ForceField::minimize(unsigned int snapshotFreq,
RDKit::SnapshotVect *snapshotVect, unsigned int maxIts,
double forceTol, double energyTol) {
PRECONDITION(df_init, "not initialized");
PRECONDITION(static_cast<unsigned int>(d_numPoints) == d_positions.size(),
"size mismatch");
@@ -201,19 +202,23 @@ int ForceField::minimize(unsigned int snapshotFreq, RDKit::SnapshotVect *snapsho
ForceFieldsHelper::calcEnergy eCalc(this);
ForceFieldsHelper::calcGradient gCalc(this);
int res = BFGSOpt::minimize(dim, points, forceTol, numIters, finalForce,
eCalc, gCalc, snapshotFreq, snapshotVect,
energyTol, maxIts);
int res =
BFGSOpt::minimize(dim, points, forceTol, numIters, finalForce, eCalc,
gCalc, snapshotFreq, snapshotVect, energyTol, maxIts);
this->gather(points);
delete[] points;
return res;
}
double ForceField::calcEnergy() const {
double ForceField::calcEnergy(std::vector<double> *contribs) const {
PRECONDITION(df_init, "not initialized");
double res = 0.0;
if (d_contribs.empty()) return res;
if (contribs) {
contribs->clear();
contribs->reserve(d_contribs.size());
}
unsigned int N = d_positions.size();
double *pos = new double[d_dimension * N];
@@ -221,7 +226,9 @@ double ForceField::calcEnergy() const {
// now loop over the contribs
for (ContribPtrVect::const_iterator contrib = d_contribs.begin();
contrib != d_contribs.end(); contrib++) {
res += (*contrib)->getEnergy(pos);
double e = (*contrib)->getEnergy(pos);
res += e;
if (contribs) contribs->push_back(e);
}
delete[] pos;
return res;

View File

@@ -69,7 +69,7 @@ class ForceField {
void initialize();
//! calculates and returns the energy (in kcal/mol) based on existing
//positions in the forcefield
// positions in the forcefield
/*!
\return the current energy
@@ -79,7 +79,7 @@ class ForceField {
double *
the positions need to be converted to double * here
*/
double calcEnergy() const;
double calcEnergy(std::vector<double> *contribs = NULL) const;
// these next two aren't const because they may update our
// distance matrix

View File

@@ -1,6 +1,5 @@
// $Id$
//
// Copyright (C) 2004-2012 Greg Landrum and Rational Discovery LLC
// Copyright (C) 2004-2016 Greg Landrum and Rational Discovery LLC
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
@@ -8,7 +7,7 @@
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
// define DEBUG_EMBEDDING 1
#include "Embedder.h"
#include <DistGeom/BoundsMatrix.h>
#include <DistGeom/DistGeomUtils.h>
@@ -19,6 +18,7 @@
#include <GraphMol/ROMol.h>
#include <GraphMol/Atom.h>
#include <GraphMol/AtomIterators.h>
#include <GraphMol/RingInfo.h>
#include <GraphMol/Conformer.h>
#include <RDGeneral/types.h>
@@ -35,29 +35,22 @@
#include <RDGeneral/RDThreads.h>
#define ERROR_TOL 0.00001
// these tolerances, all to detect and filter out bogus conformations, are a
// delicate balance between sensitive enough to detect obviously bad
// conformations but not so sensitive that a bunch of ok conformations get
// filtered out, which slows down the whole conformation generation process
#define MAX_MINIMIZED_E_PER_ATOM 0.05
#define MAX_MINIMIZED_E_CONTRIB 0.20
#define MIN_TETRAHEDRAL_CHIRAL_VOL 0.50
#define TETRAHEDRAL_CENTERINVOLUME_TOL 0.30
namespace RDKit {
namespace DGeomHelpers {
typedef std::pair<int, int> INT_PAIR;
typedef std::vector<INT_PAIR> INT_PAIR_VECT;
bool _sameSide(const RDGeom::Point3D &v1, const RDGeom::Point3D &v2,
const RDGeom::Point3D &v3, const RDGeom::Point3D &v4,
const RDGeom::Point3D &p0, double tol = 0.1) {
RDGeom::Point3D normal = (v2 - v1).crossProduct(v3 - v1);
double d1 = normal.dotProduct(v4 - v1);
double d2 = normal.dotProduct(p0 - v1);
// std::cerr<<" "<<d1<<" - " <<d2<<std::endl;
if (fabs(d1) < tol || fabs(d2) < tol) return false;
return !((d1 < 0.) ^ (d2 < 0.));
}
bool _centerInVolume(const DistGeom::ChiralSetPtr &chiralSet,
const RDGeom::PointPtrVect &positions) {
if (chiralSet->d_idx0 ==
chiralSet->d_idx4) { // this happens for three-coordinate centers
return true;
}
bool _volumeTest(const DistGeom::ChiralSetPtr &chiralSet,
const RDGeom::PointPtrVect &positions, bool verbose = false) {
RDGeom::Point3D p0((*positions[chiralSet->d_idx0])[0],
(*positions[chiralSet->d_idx0])[1],
(*positions[chiralSet->d_idx0])[2]);
@@ -73,15 +66,87 @@ bool _centerInVolume(const DistGeom::ChiralSetPtr &chiralSet,
RDGeom::Point3D p4((*positions[chiralSet->d_idx4])[0],
(*positions[chiralSet->d_idx4])[1],
(*positions[chiralSet->d_idx4])[2]);
// RDGeom::Point3D centroid = (p1+p2+p3+p4)/4.;
bool res = _sameSide(p1, p2, p3, p4, p0) && _sameSide(p2, p3, p4, p1, p0) &&
_sameSide(p3, p4, p1, p2, p0) && _sameSide(p4, p1, p2, p3, p0);
// std::cerr<<"civ:"<<chiralSet->d_idx0<<" "<<chiralSet->d_idx1<<"
// "<<chiralSet->d_idx2<<" "<<chiralSet->d_idx3<<"
// "<<chiralSet->d_idx4<<"->"<<res<<"|"<<std::endl;
// even if we are minimizing in higher dimension the chiral volume is
// calculated using only the first 3 dimensions
RDGeom::Point3D v1 = p0 - p1;
v1.normalize();
RDGeom::Point3D v2 = p0 - p2;
v2.normalize();
RDGeom::Point3D v3 = p0 - p3;
v3.normalize();
RDGeom::Point3D v4 = p0 - p4;
v4.normalize();
RDGeom::Point3D crossp = v1.crossProduct(v2);
double vol = crossp.dotProduct(v3);
if (verbose) std::cerr << " " << fabs(vol) << std::endl;
if (fabs(vol) < MIN_TETRAHEDRAL_CHIRAL_VOL) return false;
crossp = v1.crossProduct(v2);
vol = crossp.dotProduct(v4);
if (verbose) std::cerr << " " << fabs(vol) << std::endl;
if (fabs(vol) < MIN_TETRAHEDRAL_CHIRAL_VOL) return false;
crossp = v1.crossProduct(v3);
vol = crossp.dotProduct(v4);
if (verbose) std::cerr << " " << fabs(vol) << std::endl;
if (fabs(vol) < MIN_TETRAHEDRAL_CHIRAL_VOL) return false;
crossp = v2.crossProduct(v3);
vol = crossp.dotProduct(v4);
if (verbose) std::cerr << " " << fabs(vol) << std::endl;
if (fabs(vol) < MIN_TETRAHEDRAL_CHIRAL_VOL) return false;
return true;
}
bool _sameSide(const RDGeom::Point3D &v1, const RDGeom::Point3D &v2,
const RDGeom::Point3D &v3, const RDGeom::Point3D &v4,
const RDGeom::Point3D &p0, double tol = 0.1) {
RDGeom::Point3D normal = (v2 - v1).crossProduct(v3 - v1);
double d1 = normal.dotProduct(v4 - v1);
double d2 = normal.dotProduct(p0 - v1);
// std::cerr << " " << d1 << " - " << d2 << std::endl;
if (fabs(d1) < tol || fabs(d2) < tol) return false;
return !((d1 < 0.) ^ (d2 < 0.));
}
bool _centerInVolume(unsigned int idx0, unsigned int idx1, unsigned int idx2,
unsigned int idx3, unsigned int idx4,
const RDGeom::PointPtrVect &positions, double tol,
bool verbose = false) {
RDGeom::Point3D p0((*positions[idx0])[0], (*positions[idx0])[1],
(*positions[idx0])[2]);
RDGeom::Point3D p1((*positions[idx1])[0], (*positions[idx1])[1],
(*positions[idx1])[2]);
RDGeom::Point3D p2((*positions[idx2])[0], (*positions[idx2])[1],
(*positions[idx2])[2]);
RDGeom::Point3D p3((*positions[idx3])[0], (*positions[idx3])[1],
(*positions[idx3])[2]);
RDGeom::Point3D p4((*positions[idx4])[0], (*positions[idx4])[1],
(*positions[idx4])[2]);
// RDGeom::Point3D centroid = (p1+p2+p3+p4)/4.;
if (verbose) {
std::cerr << _sameSide(p1, p2, p3, p4, p0, tol) << " "
<< _sameSide(p2, p3, p4, p1, p0, tol) << " "
<< _sameSide(p3, p4, p1, p2, p0, tol) << " "
<< _sameSide(p4, p1, p2, p3, p0, tol) << std::endl;
}
bool res = _sameSide(p1, p2, p3, p4, p0, tol) &&
_sameSide(p2, p3, p4, p1, p0, tol) &&
_sameSide(p3, p4, p1, p2, p0, tol) &&
_sameSide(p4, p1, p2, p3, p0, tol);
return res;
}
bool _centerInVolume(const DistGeom::ChiralSetPtr &chiralSet,
const RDGeom::PointPtrVect &positions, double tol = 0.1,
bool verbose = false) {
if (chiralSet->d_idx0 ==
chiralSet->d_idx4) { // this happens for three-coordinate centers
return true;
}
return _centerInVolume(chiralSet->d_idx0, chiralSet->d_idx1,
chiralSet->d_idx2, chiralSet->d_idx3,
chiralSet->d_idx4, positions, tol, verbose);
}
bool _boundsFulfilled(const std::vector<int> &atoms,
const DistGeom::BoundsMatrix &mmat,
const RDGeom::PointPtrVect &positions) {
@@ -188,7 +253,8 @@ bool _embedPoints(
bool useRandomCoords, double boxSizeMult, bool randNegEig,
unsigned int numZeroFail, double optimizerForceTol, double basinThresh,
int seed, unsigned int maxIterations,
const DistGeom::VECT_CHIRALSET *chiralCenters, bool enforceChirality,
const DistGeom::VECT_CHIRALSET *chiralCenters,
const DistGeom::VECT_CHIRALSET *tetrahedralCarbons, bool enforceChirality,
bool useExpTorsionAnglePrefs, bool useBasicKnowledge,
const std::vector<std::pair<int, int> > &bonds,
const std::vector<std::vector<int> > &angles,
@@ -240,12 +306,20 @@ bool _embedPoints(
}
gotCoords = DistGeom::computeRandomCoords(*positions, boxSize, *rng);
}
#ifdef DEBUG_EMBEDDING
if (!gotCoords) {
std::cerr << "Initial embedding failed!, Iter: " << iter << std::endl;
}
#endif
// std::cerr << " ITER: " << iter << " gotCoords: " << gotCoords <<
// std::endl;
if (gotCoords) {
ForceFields::ForceField *field = DistGeom::constructForceField(
*mmat, *positions, *chiralCenters, 1.0, 0.1, 0, basinThresh);
boost::scoped_ptr<ForceFields::ForceField> field(
DistGeom::constructForceField(*mmat, *positions, *chiralCenters, 1.0,
0.1, 0, basinThresh));
unsigned int nPasses = 0;
field->initialize();
// std::cerr<<"FIELD E: "<<field->calcEnergy()<<std::endl;
// std::cerr << "FIELD E: " << field->calcEnergy() << std::endl;
if (field->calcEnergy() > ERROR_TOL) {
int needMore = 1;
while (needMore) {
@@ -253,14 +327,55 @@ bool _embedPoints(
++nPasses;
}
}
delete field;
field = NULL;
// std::cerr<<" "<<field->calcEnergy()<<" after npasses:
// "<<nPasses<<std::endl;
std::vector<double> e_contribs;
double local_e = field->calcEnergy(&e_contribs);
// if (e_contribs.size()) {
// std::cerr << " check: " << local_e / nat << " "
// << *(std::max_element(e_contribs.begin(),
// e_contribs.end()))
// << std::endl;
// }
// check that neither the energy nor any of the contributions to it are
// too high (this is part of github #971)
if (local_e / nat >= MAX_MINIMIZED_E_PER_ATOM ||
(e_contribs.size() &&
*(std::max_element(e_contribs.begin(), e_contribs.end())) >
MAX_MINIMIZED_E_CONTRIB)) {
#ifdef DEBUG_EMBEDDING
std::cerr << " Energy fail: " << local_e / nat << " "
<< *(std::max_element(e_contribs.begin(), e_contribs.end()))
<< std::endl;
#endif
gotCoords = false;
continue;
}
// for each of the atoms in the "tetrahedralCarbons" list, make sure
// that there is a minimum volume around them and that they are inside
// that volume. (this is part of github #971)
BOOST_FOREACH (DistGeom::ChiralSetPtr tetSet, *tetrahedralCarbons) {
// it could happen that the centroid is outside the volume defined
// by the other
// four points. That is also a fail.
if (!_volumeTest(tetSet, *positions) ||
!_centerInVolume(tetSet, *positions,
TETRAHEDRAL_CENTERINVOLUME_TOL)) {
#ifdef DEBUG_EMBEDDING
std::cerr << " fail2! (" << tetSet->d_idx0 << ") iter: " << iter
<< " vol: " << _volumeTest(tetSet, *positions, true)
<< " center: "
<< _centerInVolume(tetSet, *positions,
TETRAHEDRAL_CENTERINVOLUME_TOL, true)
<< std::endl;
#endif
gotCoords = false;
continue;
}
}
// Check if any of our chiral centers are badly out of whack. If so, try
// again
if (enforceChirality && chiralCenters->size() > 0) {
if (gotCoords && enforceChirality && chiralCenters->size() > 0) {
// check the chiral volume:
BOOST_FOREACH (DistGeom::ChiralSetPtr chiralSet, *chiralCenters) {
double vol = DistGeom::ChiralViolationContrib::calcChiralVolume(
@@ -270,8 +385,10 @@ bool _embedPoints(
double ub = chiralSet->getUpperVolumeBound();
if ((lb > 0 && vol < lb && (lb - vol) / lb > .2) ||
(ub < 0 && vol > ub && (vol - ub) / ub > .2)) {
// std::cerr<<" fail! ("<<chiralSet->d_idx0<<") iter: "<<iter<<"
// "<<vol<<" "<<lb<<"-"<<ub<<std::endl;
#ifdef DEBUG_EMBEDDING
std::cerr << " fail! (" << chiralSet->d_idx0 << ") iter: " << iter
<< " " << vol << " " << lb << "-" << ub << std::endl;
#endif
gotCoords = false;
break;
}
@@ -282,8 +399,9 @@ bool _embedPoints(
// time removing the chiral constraints and
// increasing the weight on the fourth dimension
if (gotCoords && (chiralCenters->size() > 0 || useRandomCoords)) {
ForceFields::ForceField *field2 = DistGeom::constructForceField(
*mmat, *positions, *chiralCenters, 0.2, 1.0, 0, basinThresh);
boost::scoped_ptr<ForceFields::ForceField> field2(
DistGeom::constructForceField(*mmat, *positions, *chiralCenters,
0.2, 1.0, 0, basinThresh));
field2->initialize();
// std::cerr<<"FIELD2 E: "<<field2->calcEnergy()<<std::endl;
if (field2->calcEnergy() > ERROR_TOL) {
@@ -296,7 +414,6 @@ bool _embedPoints(
// std::cerr<<" "<<field2->calcEnergy()<<" after npasses2:
// "<<nPasses2<<std::endl;
}
delete field2;
}
// (ET)(K)DG
@@ -306,7 +423,6 @@ bool _embedPoints(
expTorsionAtoms, expTorsionAngles, improperAtoms, atomNums,
useBasicKnowledge);
}
// test if chirality is correct
if (enforceChirality && gotCoords && (chiralCenters->size() > 0)) {
// "distance matrix" chirality test
@@ -334,8 +450,10 @@ bool _embedPoints(
// by the other
// four points. That is also a fail.
if (!_centerInVolume(chiralSet, *positions)) {
// std::cerr<<" fail2! ("<<chiralSet->d_idx0<<") iter:
// "<<iter<<std::endl;
#ifdef DEBUG_EMBEDDING
std::cerr << " fail3! (" << chiralSet->d_idx0
<< ") iter: " << iter << std::endl;
#endif
gotCoords = false;
break;
}
@@ -352,8 +470,9 @@ bool _embedPoints(
return gotCoords;
}
void _findChiralSets(const ROMol &mol,
DistGeom::VECT_CHIRALSET &chiralCenters) {
void _findChiralSets(const ROMol &mol, DistGeom::VECT_CHIRALSET &chiralCenters,
DistGeom::VECT_CHIRALSET &tetrahedralCenters,
const std::map<int, RDGeom::Point3D> *coordMap) {
ROMol::ConstAtomIterator ati;
INT_VECT nbrs;
ROMol::OEDGE_ITER beg, end;
@@ -361,8 +480,10 @@ void _findChiralSets(const ROMol &mol,
for (ati = mol.beginAtoms(); ati != mol.endAtoms(); ati++) {
if ((*ati)->getAtomicNum() != 1) { // skip hydrogens
Atom::ChiralType chiralType = (*ati)->getChiralTag();
if (chiralType == Atom::CHI_TETRAHEDRAL_CW ||
chiralType == Atom::CHI_TETRAHEDRAL_CCW) {
if ((chiralType == Atom::CHI_TETRAHEDRAL_CW ||
chiralType == Atom::CHI_TETRAHEDRAL_CCW) ||
(((*ati)->getAtomicNum() == 6 || (*ati)->getAtomicNum() == 7) &&
(*ati)->getDegree() == 4)) {
// make a chiral set from the neighbors
nbrs.clear();
nbrs.reserve(4);
@@ -393,12 +514,27 @@ void _findChiralSets(const ROMol &mol,
(*ati)->getIdx(), nbrs[0], nbrs[1], nbrs[2], nbrs[3], 5.0, 100.0);
DistGeom::ChiralSetPtr cptr(cset);
chiralCenters.push_back(cptr);
} else {
} else if (chiralType == Atom::CHI_TETRAHEDRAL_CW) {
DistGeom::ChiralSet *cset =
new DistGeom::ChiralSet((*ati)->getIdx(), nbrs[0], nbrs[1],
nbrs[2], nbrs[3], -100.0, -5.0);
DistGeom::ChiralSetPtr cptr(cset);
chiralCenters.push_back(cptr);
} else {
if ((coordMap &&
coordMap->find((*ati)->getIdx()) != coordMap->end()) ||
(mol.getRingInfo()->isInitialized() &&
(mol.getRingInfo()->numAtomRings((*ati)->getIdx()) < 2 ||
mol.getRingInfo()->isAtomInRingOfSize((*ati)->getIdx(), 3)))) {
// we only want to these tests for ring atoms that are not part of
// the coordMap
// there's no sense doing 3-rings because those are a nightmare
} else {
DistGeom::ChiralSet *cset = new DistGeom::ChiralSet(
(*ati)->getIdx(), nbrs[0], nbrs[1], nbrs[2], nbrs[3], 0.0, 0.0);
DistGeom::ChiralSetPtr cptr(cset);
tetrahedralCenters.push_back(cptr);
}
}
} // if block -chirality check
} // if block - heavy atom check
@@ -521,6 +657,7 @@ typedef struct {
int seed;
unsigned int maxIterations;
DistGeom::VECT_CHIRALSET const *chiralCenters;
DistGeom::VECT_CHIRALSET const *tetrahedralCarbons;
bool enforceChirality;
bool useExpTorsionAnglePrefs;
bool useBasicKnowledge;
@@ -553,9 +690,10 @@ void embedHelper_(int threadId, int numThreads, EmbedArgs *eargs) {
&positions, eargs->mmat, eargs->useRandomCoords, eargs->boxSizeMult,
eargs->randNegEig, eargs->numZeroFail, eargs->optimizerForceTol,
eargs->basinThresh, (ci + 1) * eargs->seed, eargs->maxIterations,
eargs->chiralCenters, eargs->enforceChirality,
eargs->useExpTorsionAnglePrefs, eargs->useBasicKnowledge, *eargs->bonds,
*eargs->angles, *eargs->expTorsionAtoms, *eargs->expTorsionAngles,
eargs->chiralCenters, eargs->tetrahedralCarbons,
eargs->enforceChirality, eargs->useExpTorsionAnglePrefs,
eargs->useBasicKnowledge, *eargs->bonds, *eargs->angles,
*eargs->expTorsionAtoms, *eargs->expTorsionAngles,
*eargs->improperAtoms, *eargs->atomNums);
if (gotCoords) {
@@ -684,8 +822,10 @@ void EmbedMultipleConfs(ROMol &mol, INT_VECT &res, unsigned int numConfs,
#endif
// find all the chiral centers in the molecule
DistGeom::VECT_CHIRALSET chiralCenters;
DistGeom::VECT_CHIRALSET tetrahedralCarbons;
MolOps::assignStereochemistry(*piece);
_findChiralSets(*piece, chiralCenters);
_findChiralSets(*piece, chiralCenters, tetrahedralCarbons, coordMap);
// if we have any chiral centers or are using random coordinates, we will
// first embed the molecule in four dimensions, otherwise we will use 3D
@@ -713,6 +853,7 @@ void EmbedMultipleConfs(ROMol &mol, INT_VECT &res, unsigned int numConfs,
seed,
maxIterations,
&chiralCenters,
&tetrahedralCarbons,
enforceChirality,
useExpTorsionAnglePrefs,
useBasicKnowledge,

View File

@@ -56,10 +56,10 @@ def compareMatrices(bm1, bm2, map, tol=1.0e-5) :
l, m = m,l
if (abs(bm1[l,m] - bm2[i,j]) > tol):
return 0
if (abs(bm1[m,l] - bm2[j,i]) > tol):
return 0
return 1
def compareOrder(smi1, smi2, tol=1.0e-5) :
@@ -92,7 +92,7 @@ class TestCase(unittest.TestCase) :
def _test0Cdk2(self):
fileN = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol','DistGeomHelpers',
'test_data','cis_trans_cases.csv')
ofile = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol','DistGeomHelpers',
'test_data','embedDistOpti.sdf')
self.assertTrue(compareWithOld(fileN, ofile))
@@ -105,7 +105,7 @@ class TestCase(unittest.TestCase) :
conf = mol.GetConformer()
self.assertTrue(lstEq(conf.GetAtomPosition(0), [0.0, 0.0, 0.0]))
#writer.write(mol)
mol = Chem.MolFromSmiles('CO')
rdDistGeom.EmbedMolecule(mol, 10,1)
conf = mol.GetConformer()
@@ -120,20 +120,20 @@ class TestCase(unittest.TestCase) :
self.assertTrue(lstEq(conf.GetAtomPosition(1), [-0.00604, 0.59337, 0.0]))
self.assertTrue(lstEq(conf.GetAtomPosition(2), [1.22281, -0.29446, 0.0]))
#writer.write(mol)
mol = Chem.MolFromSmiles('O=C=O')
rdDistGeom.EmbedMolecule(mol,10,1)
conf = mol.GetConformer()
#writer.write(mol)
self.assertTrue(lstEq(conf.GetAtomPosition(0), [-1.2180, -0.06088, 0.0]))
self.assertTrue(lstEq(conf.GetAtomPosition(1), [-0.00408, 0.12116, 0.0]))
self.assertTrue(lstEq(conf.GetAtomPosition(2), [1.22207, -0.060276, 0.0]))
mol = Chem.MolFromSmiles('C=C=C=C')
rdDistGeom.EmbedMolecule(mol,10,1)
conf = mol.GetConformer()
#writer.write(mol)
d1 = computeDist(conf.GetAtomPosition(0), conf.GetAtomPosition(1))
@@ -171,7 +171,7 @@ class TestCase(unittest.TestCase) :
#print(['%.2f'%x for x in nenergies])
#print(nenergies)
self.assertTrue(lstEq(energies, nenergies,tol=1e-2))
def test4OrderDependence(self) :
self.assertTrue(compareOrder("CC(C)(C)C(=O)NC(C1)CC(N2C)CCC12",
"CN1C2CCC1CC(NC(=O)C(C)(C)C)C2"))
@@ -181,7 +181,7 @@ class TestCase(unittest.TestCase) :
#issue 232
self.assertTrue(compareOrder("CC(C)(C)C(=O)NC(C1)CC(N2C)CCC12",
"CN1C2CCC1CC(NC(=O)C(C)(C)C)C2"))
def test5Issue285(self):
m = Chem.MolFromSmiles('CNC=O')
cs = rdDistGeom.EmbedMultipleConfs(m,10)
@@ -206,7 +206,7 @@ class TestCase(unittest.TestCase) :
cids = rdDistGeom.EmbedMultipleConfs(mol, 50, maxAttempts=30,
randomSeed=100, pruneRmsThresh=1.5)
nconfs.append(len(cids))
d = [abs(x-y) for x,y in zip(expected,nconfs)]
self.assertTrue(max(d)<=1)
@@ -294,18 +294,19 @@ class TestCase(unittest.TestCase) :
conf.GetAtomPosition(3),
conf.GetAtomPosition(4))
self.assertTrue(abs(vol-tgtVol)<2.)
# let's try a little more complicated system
expectedV1 = -2.0
expectedV2 = -2.9
for i in range(5):
smi = "C1=CC=C(C=C1)[C@H](OC1=C[NH]N=C1)C(=O)[NH]C[C@H](Cl)C1=CC=NC=C1"
mol = Chem.MolFromSmiles(smi)
ci = rdDistGeom.EmbedMolecule(mol, 30, randomSeed=(i+1)*15)
ci = rdDistGeom.EmbedMolecule(mol, randomSeed=(i+1)*15)
self.assertTrue(ci>=0)
ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, 10.0, ci)
ff.Minimize()
conf = mol.GetConformer(ci)
vol1 = computeChiralVol(conf.GetAtomPosition(6),
conf.GetAtomPosition(3),
@@ -326,7 +327,7 @@ class TestCase(unittest.TestCase) :
# forms of the compound
expectedV1 = 2.0 #[-2.30, -2.31, -2.30, 2.30, -1.77]
expectedV2 = 2.8 #[2.90, 2.89, 2.69, -2.90, -2.93]
self.assertTrue(nPos>0)
self.assertTrue(nNeg>0)
for i in range(5):
@@ -335,7 +336,7 @@ class TestCase(unittest.TestCase) :
ci = rdDistGeom.EmbedMolecule(mol, 30, (i+1)*10)
ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, 10.0, ci)
ff.Minimize()
conf = mol.GetConformer(ci)
vol1 = computeChiralVol(conf.GetAtomPosition(6),
conf.GetAtomPosition(3),
@@ -391,7 +392,7 @@ class TestCase(unittest.TestCase) :
nenergies.append(ee)
self.assertTrue(lstEq(energies, nenergies, tol=ENERGY_TOLERANCE))
for cid in cids:
msd = 0.0
for i in range(mol.GetNumAtoms()):
@@ -400,6 +401,6 @@ class TestCase(unittest.TestCase) :
msd /= mol.GetNumAtoms()
self.assertTrue(msd < MSD_TOLERANCE)
if __name__ == '__main__':
unittest.main()

View File

@@ -885,6 +885,7 @@ void testRandomCoords() {
for (tokenizer::iterator token = tokens.begin(); token != tokens.end();
++token) {
std::string smi = *token;
// std::cerr << "SMI: " << smi << std::endl;
ROMol *m = SmilesToMol(smi, 0, 1);
RWMol *m2 = (RWMol *)MolOps::addHs(*m);
delete m;
@@ -1425,8 +1426,10 @@ void testMultiThreadMultiConf() {
TEST_ASSERT(pVect.size() == p2Vect.size());
double msd = 0.0;
for (unsigned int i = 0; i < pVect.size(); ++i) {
const RDGeom::Point3D *p = dynamic_cast<const RDGeom::Point3D *>(pVect[i]);
const RDGeom::Point3D *p2 = dynamic_cast<const RDGeom::Point3D *>(p2Vect[i]);
const RDGeom::Point3D *p =
dynamic_cast<const RDGeom::Point3D *>(pVect[i]);
const RDGeom::Point3D *p2 =
dynamic_cast<const RDGeom::Point3D *>(p2Vect[i]);
TEST_ASSERT(p && p2);
msd += (*p - *p2).lengthSq();
}
@@ -1599,6 +1602,50 @@ void testGithub697() {
}
}
void testGithub971() {
{
// sample molecule found by Sereina
std::string smi = "C/C(=C\\c1ccccc1)CN1C2CC[NH2+]CC1CC2";
RWMol *m = SmilesToMol(smi);
TEST_ASSERT(m);
MolOps::addHs(*m);
int cid = DGeomHelpers::EmbedMolecule(*m, 0, 0xf00d);
TEST_ASSERT(cid >= 0);
MolOps::removeHs(*m);
std::string expectedMb =
"\n RDKit 3D\n\n 19 21 0 0 0 0 0 0 0 0999 V2000\n "
" 1.1258 -1.3888 0.9306 C 0 0 0 0 0 0 0 0 0 0 0 "
"0\n 1.0779 -0.1065 0.1565 C 0 0 0 0 0 0 0 0 0 0 0 "
" 0\n 2.2519 0.2795 -0.3483 C 0 0 0 0 0 0 0 0 0 0 "
"0 0\n 3.5245 -0.0901 0.2817 C 0 0 0 0 0 0 0 0 0 0 "
" 0 0\n 4.2551 0.9456 0.8411 C 0 0 0 0 0 0 0 0 0 "
"0 0 0\n 5.6137 0.8099 1.0965 C 0 0 0 0 0 0 0 0 0 "
" 0 0 0\n 6.2293 -0.3871 0.7552 C 0 0 0 0 0 0 0 0 "
"0 0 0 0\n 5.4107 -1.4793 0.4938 C 0 0 0 0 0 0 0 0 "
" 0 0 0 0\n 4.2354 -1.1874 -0.1873 C 0 0 0 0 0 0 0 "
"0 0 0 0 0\n -0.0943 0.0572 -0.7601 C 0 0 0 0 0 0 0 "
" 0 0 0 0 0\n -1.3242 0.2903 -0.0524 N 0 0 0 0 0 0 "
"0 0 0 0 0 0\n -2.1468 1.1490 -0.9044 C 0 0 0 0 0 0 "
" 0 0 0 0 0 0\n -3.1678 1.9193 -0.1239 C 0 0 0 0 0 "
"0 0 0 0 0 0 0\n -2.9879 1.6714 1.3508 C 0 0 0 0 0 "
" 0 0 0 0 0 0 0\n -3.3579 0.3480 1.7662 N 0 0 0 0 "
"0 0 0 0 0 0 0 0\n -3.4586 -0.6011 0.7007 C 0 0 0 0 "
" 0 0 0 0 0 0 0 0\n -2.1733 -0.8977 -0.0058 C 0 0 0 "
"0 0 0 0 0 0 0 0 0\n -2.5650 -1.1467 -1.4589 C 0 0 0 "
" 0 0 0 0 0 0 0 0 0\n -2.7401 0.2591 -1.9624 C 0 0 "
"0 0 0 0 0 0 0 0 0 0\n 1 2 1 0\n 2 3 2 0\n 3 4 1 "
"0\n 4 5 2 0\n 5 6 1 0\n 6 7 2 0\n 7 8 1 0\n 8 9 2 "
"0\n 2 10 1 0\n 10 11 1 0\n 11 12 1 0\n 12 13 1 0\n 13 14 1 "
"0\n 14 15 1 0\n 15 16 1 0\n 16 17 1 0\n 17 18 1 0\n 18 19 1 "
"0\n 9 4 1 0\n 17 11 1 0\n 19 12 1 0\nM CHG 1 15 1\nM "
"END\n";
std::string mb = MolToMolBlock(*m);
TEST_ASSERT(mb == expectedMb);
delete m;
}
}
int main() {
RDLog::InitLogs();
@@ -1756,6 +1803,11 @@ int main() {
<< "\t More ChEMBL molecules failing bounds smoothing.\n";
testGithub697();
BOOST_LOG(rdInfoLog) << "\t---------------------------------\n";
BOOST_LOG(rdInfoLog) << "\t ugly conformations can be generated for highly "
"constrained ring systems.\n";
testGithub971();
BOOST_LOG(rdInfoLog)
<< "*******************************************************\n";

View File

@@ -89,25 +89,24 @@ $$$$
7 15 1 0
M END
$$$$
RDKit 3D
15 14 0 0 0 0 0 0 0 0999 V2000
0.5177 -1.5667 1.0114 C 0 0 0 0 0 0 0 0 0 0 0 0
0.0838 -0.5673 0.0062 C 0 0 0 0 0 0 0 0 0 0 0 0
0.8660 0.5210 0.0851 C 0 0 0 0 0 0 0 0 0 0 0 0
0.5855 1.7783 -0.6447 C 0 0 0 0 0 0 0 0 0 0 0 0
0.1773 2.8107 0.4000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.5980 -1.1074 1.9990 H 0 0 0 0 0 0 0 0 0 0 0 0
-0.1861 -2.3819 1.1713 H 0 0 0 0 0 0 0 0 0 0 0 0
1.4363 -1.9566 0.6071 H 0 0 0 0 0 0 0 0 0 0 0 0
-0.3304 -0.7500 -0.9609 H 0 0 0 0 0 0 0 0 0 0 0 0
1.4222 0.7277 1.0088 H 0 0 0 0 0 0 0 0 0 0 0 0
1.5247 2.1027 -1.1110 H 0 0 0 0 0 0 0 0 0 0 0 0
-0.1933 1.7221 -1.3922 H 0 0 0 0 0 0 0 0 0 0 0 0
0.0081 3.8023 -0.0301 H 0 0 0 0 0 0 0 0 0 0 0 0
0.9945 2.8523 1.1529 H 0 0 0 0 0 0 0 0 0 0 0 0
-0.7659 2.4155 0.8539 H 0 0 0 0 0 0 0 0 0 0 0 0
-0.6674 -1.4421 -0.2259 C 0 0 0 0 0 0 0 0 0 0 0 0
0.4715 -0.4781 -0.3874 C 0 0 0 0 0 0 0 0 0 0 0 0
1.4964 -0.4452 0.4746 C 0 0 0 0 0 0 0 0 0 0 0 0
2.8975 -0.5838 0.0702 C 0 0 0 0 0 0 0 0 0 0 0 0
3.7119 0.6750 0.0897 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.5746 -2.1222 -1.0920 H 0 0 0 0 0 0 0 0 0 0 0 0
-0.5373 -2.0629 0.6802 H 0 0 0 0 0 0 0 0 0 0 0 0
-1.6360 -0.9478 -0.1287 H 0 0 0 0 0 0 0 0 0 0 0 0
0.6406 0.0652 -1.3004 H 0 0 0 0 0 0 0 0 0 0 0 0
1.1764 -0.5383 1.5198 H 0 0 0 0 0 0 0 0 0 0 0 0
3.0896 -1.0902 -0.8734 H 0 0 0 0 0 0 0 0 0 0 0 0
3.3497 -1.2397 0.8600 H 0 0 0 0 0 0 0 0 0 0 0 0
3.5758 1.2511 1.0187 H 0 0 0 0 0 0 0 0 0 0 0 0
3.6004 1.2954 -0.8300 H 0 0 0 0 0 0 0 0 0 0 0 0
4.7657 0.2900 0.0338 H 0 0 0 0 0 0 0 0 0 0 0 0
1 2 1 0
2 3 2 0
3 4 1 0
@@ -302,45 +301,45 @@ $$$$
M END
$$$$
RDKit 3D
RDKit 3D
18 18 0 0 0 0 0 0 0 0999 V2000
0.9717 -1.3489 -0.0110 C 0 0 0 0 0 0 0 0 0 0 0 0
1.1105 -0.3243 -1.0856 C 0 0 0 0 0 0 0 0 0 0 0 0
0.1251 0.8241 -0.9619 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.0110 1.2209 0.4529 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.0652 0.1661 1.4917 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.2022 -1.2119 0.8908 C 0 0 0 0 0 0 0 0 0 0 0 0
1.8853 -1.2291 0.6154 H 0 0 0 0 0 0 0 0 0 0 0 0
0.9294 -2.3379 -0.4611 H 0 0 0 0 0 0 0 0 0 0 0 0
0.9496 -0.7477 -2.1103 H 0 0 0 0 0 0 0 0 0 0 0 0
2.1470 0.1056 -1.1358 H 0 0 0 0 0 0 0 0 0 0 0 0
0.5728 1.6978 -1.5151 H 0 0 0 0 0 0 0 0 0 0 0 0
-0.7868 0.6472 -1.5309 H 0 0 0 0 0 0 0 0 0 0 0 0
-0.8565 1.9304 0.6348 H 0 0 0 0 0 0 0 0 0 0 0 0
0.9372 1.7847 0.6615 H 0 0 0 0 0 0 0 0 0 0 0 0
-0.8889 0.2833 2.2385 H 0 0 0 0 0 0 0 0 0 0 0 0
0.8485 0.0858 2.1394 H 0 0 0 0 0 0 0 0 0 0 0 0
-1.1758 -1.3657 0.4983 H 0 0 0 0 0 0 0 0 0 0 0 0
-0.0062 -2.0033 1.6600 H 0 0 0 0 0 0 0 0 0 0 0 0
1 2 1 0
2 3 1 0
3 4 1 0
4 5 1 0
5 6 1 0
6 1 1 0
1 7 1 0
1 8 1 0
2 9 1 0
2 10 1 0
3 11 1 0
3 12 1 0
4 13 1 0
4 14 1 0
5 15 1 0
5 16 1 0
6 17 1 0
6 18 1 0
18 18 0 0 0 0 0 0 0 0999 V2000
-1.9985 0.0911 2.3307 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.5239 -0.9063 1.3045 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.9232 -0.2690 0.1063 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.9872 0.5688 -0.5143 C 0 0 0 0 0 0 0 0 0 0 0 0
-3.0572 1.0891 0.4258 C 0 0 0 0 0 0 0 0 0 0 0 0
-2.3589 1.4202 1.7316 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.1554 0.2787 3.0102 H 0 0 0 0 0 0 0 0 0 0 0 0
-2.8695 -0.3130 2.9011 H 0 0 0 0 0 0 0 0 0 0 0 0
-2.4102 -1.4667 0.9576 H 0 0 0 0 0 0 0 0 0 0 0 0
-0.7483 -1.5752 1.7269 H 0 0 0 0 0 0 0 0 0 0 0 0
-0.5844 -1.0656 -0.5741 H 0 0 0 0 0 0 0 0 0 0 0 0
-0.0119 0.2876 0.4129 H 0 0 0 0 0 0 0 0 0 0 0 0
-1.4545 1.4979 -0.7303 H 0 0 0 0 0 0 0 0 0 0 0 0
-2.4714 0.1536 -1.3985 H 0 0 0 0 0 0 0 0 0 0 0 0
-3.8064 0.2989 0.5059 H 0 0 0 0 0 0 0 0 0 0 0 0
-3.4816 2.0335 0.0483 H 0 0 0 0 0 0 0 0 0 0 0 0
-3.1553 1.9256 2.3339 H 0 0 0 0 0 0 0 0 0 0 0 0
-1.5151 2.0800 1.5603 H 0 0 0 0 0 0 0 0 0 0 0 0
1 2 1 0
2 3 1 0
3 4 1 0
4 5 1 0
5 6 1 0
6 1 1 0
1 7 1 0
1 8 1 0
2 9 1 0
2 10 1 0
3 11 1 0
3 12 1 0
4 13 1 0
4 14 1 0
5 15 1 0
5 16 1 0
6 17 1 0
6 18 1 0
M END
$$$$