Add the option to minimize structures with coordgen (#3319)

* backup

* works; needs testing

* add some better tests and do some cleanup

* rescale coordgen output to recover original bond lengths

* patch coordgen so that we can do a windows DLL build

* patch coordgen so that we can do a windows DLL build

* changes in response to review
This commit is contained in:
Greg Landrum
2020-08-20 09:20:40 +02:00
committed by GitHub
parent 28eb951dfd
commit bcb2860523
5 changed files with 300 additions and 67 deletions

View File

@@ -89,8 +89,17 @@ if(RDK_BUILD_COORDGEN_SUPPORT)
execute_process(COMMAND ${CMAKE_COMMAND} -E tar zxf
${CMAKE_CURRENT_SOURCE_DIR}/coordgenlibs-${RELEASE_NO}.tar.gz
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
# FIX: this is a workaround until https://github.com/schrodinger/coordgenlibs/pull/76
# is merged and a new coordgen release is done
file(RENAME "coordgenlibs-${RELEASE_NO}" "${COORDGEN_DIR}")
set(fileToPatch "${COORDGEN_DIR}/CoordgenFragmenter.h")
configure_file("${fileToPatch}" "${fileToPatch}.orig" COPYONLY)
file(READ "${fileToPatch}" buffer)
string(REPLACE "class CoordgenFragmenter"
"#include \"CoordgenConfig.hpp\"\nclass EXPORT_COORDGEN CoordgenFragmenter"
buffer "${buffer}")
file(WRITE "${fileToPatch}" "${buffer}")
else()
message("-- Found coordgenlibs source in ${COORDGEN_DIR}")
endif()
@@ -116,7 +125,7 @@ if(RDK_BUILD_COORDGEN_SUPPORT)
rdkit_test(testCoordGen test.cpp
LINK_LIBRARIES
${RDK_COORDGEN_LIBS} Depictor
${RDK_COORDGEN_LIBS} Depictor ChemTransforms
FileParsers SmilesParse SubstructMatch GraphMol
RDGeneral DataStructs RDGeneral RDGeometryLib
${RDKit_THREAD_LIBS})

View File

@@ -16,6 +16,7 @@
#include <cstdlib>
#include "coordgen/sketcherMinimizer.h"
#include "coordgen/CoordgenFragmenter.h"
namespace RDKit {
namespace CoordGen {
@@ -32,6 +33,7 @@ struct CoordGenParams {
SKETCHER_STANDARD_PRECISION; // controls sketch precision
bool dbg_useConstrained = true; // debugging
bool dbg_useFixed = false; // debugging
bool minimizeOnly = false; // don't actually generate full coords
};
static CoordGenParams defaultParams;
@@ -58,6 +60,12 @@ unsigned int addCoords(T& mol, const CoordGenParams* params = nullptr) {
templateFileDir += "/Data/";
}
}
if (params->minimizeOnly && !mol.getNumConformers()) {
throw ValueErrorException(
"minimizeOnly set but molecule has no conformers");
}
double scaleFactor = params->coordgenScaling;
sketcherMinimizer minimizer(params->minimizerPrecision);
@@ -70,12 +78,21 @@ unsigned int addCoords(T& mol, const CoordGenParams* params = nullptr) {
}
bool hasTemplateMatch = false;
MatchVectType mv;
if (params->templateMol && params->templateMol->getNumConformers() == 1) {
if (!params->minimizeOnly && params->templateMol &&
params->templateMol->getNumConformers() == 1) {
if (SubstructMatch(mol, *(params->templateMol), mv)) {
hasTemplateMatch = true;
}
}
// if we're doing coordinate minimization it makes our life easier to
// start by translating to the origin
RDGeom::Point3D centroid{0.0, 0.0, 0.0};
if (params->minimizeOnly) {
auto conf = mol.getConformer();
centroid = MolTransforms::computeCentroid(conf);
}
std::vector<sketcherMinimizerAtom*> ats(mol.getNumAtoms());
for (auto atit = mol.beginAtoms(); atit != mol.endAtoms(); ++atit) {
auto oatom = *atit;
@@ -83,11 +100,17 @@ unsigned int addCoords(T& mol, const CoordGenParams* params = nullptr) {
atom->molecule = min_mol; // seems like this should be in addNewAtom()
atom->atomicNumber = oatom->getAtomicNum();
atom->charge = oatom->getFormalCharge();
if (hasTemplateMatch ||
params->coordMap.find(oatom->getIdx()) != params->coordMap.end()) {
if (params->minimizeOnly) {
auto coords = mol.getConformer().getAtomPos(oatom->getIdx());
coords -= centroid;
atom->coordinates = sketcherMinimizerPointF(coords.x * scaleFactor,
coords.y * scaleFactor);
atom->fixed = false;
atom->constrained = false;
} else if (hasTemplateMatch || params->coordMap.find(oatom->getIdx()) !=
params->coordMap.end()) {
atom->constrained = params->dbg_useConstrained;
atom->fixed = params->dbg_useFixed;
RDGeom::Point2D coords;
if (hasTemplateMatch) {
for (auto& pr : mv) {
if (pr.second == static_cast<int>(oatom->getIdx())) {
@@ -98,7 +121,6 @@ unsigned int addCoords(T& mol, const CoordGenParams* params = nullptr) {
break;
}
}
coords = params->coordMap.find(oatom->getIdx())->second;
} else {
const RDGeom::Point2D& coords =
params->coordMap.find(oatom->getIdx())->second;
@@ -109,6 +131,14 @@ unsigned int addCoords(T& mol, const CoordGenParams* params = nullptr) {
ats[oatom->getIdx()] = atom;
}
// coordgen has its own ideas about what bond lengths should be
// if we are doing minimizeOnly we want to do a rigid scaling after the
// minimization in order to try and get as close to where we started as
// possible
double singleBondLenAccum = 0.0;
double bondLenAccum = 0.0;
unsigned nSingle = 0;
std::vector<sketcherMinimizerBond*> bnds(mol.getNumBonds());
for (auto bndit = mol.beginBonds(); bndit != mol.endBonds(); ++bndit) {
auto obnd = *bndit;
@@ -118,6 +148,13 @@ unsigned int addCoords(T& mol, const CoordGenParams* params = nullptr) {
switch (obnd->getBondType()) {
case Bond::SINGLE:
bnd->bondOrder = 1;
if (params->minimizeOnly) {
++nSingle;
singleBondLenAccum +=
(mol.getConformer().getAtomPos(obnd->getBeginAtomIdx()) -
mol.getConformer().getAtomPos(obnd->getEndAtomIdx()))
.length();
}
break;
case Bond::DOUBLE:
bnd->bondOrder = 2;
@@ -132,6 +169,19 @@ unsigned int addCoords(T& mol, const CoordGenParams* params = nullptr) {
BOOST_LOG(rdWarningLog) << "unrecognized bond type";
}
bnds[obnd->getIdx()] = bnd;
if (params->minimizeOnly) {
bondLenAccum += (mol.getConformer().getAtomPos(obnd->getBeginAtomIdx()) -
mol.getConformer().getAtomPos(obnd->getEndAtomIdx()))
.length();
}
}
double avgBondLen = 1.0;
if (params->minimizeOnly) {
if (nSingle) {
avgBondLen = singleBondLenAccum / nSingle;
} else if (mol.getNumBonds()) {
avgBondLen = bondLenAccum / mol.getNumBonds();
}
}
// setup double bond stereo
@@ -156,19 +206,30 @@ unsigned int addCoords(T& mol, const CoordGenParams* params = nullptr) {
}
minimizer.initialize(min_mol);
minimizer.runGenerateCoordinates();
if (!params->minimizeOnly) {
minimizer.runGenerateCoordinates();
} else {
CoordgenFragmenter::splitIntoFragments(min_mol);
minimizer.m_minimizer.minimizeMolecule(min_mol);
}
auto conf = new Conformer(mol.getNumAtoms());
for (size_t i = 0; i < mol.getNumAtoms(); ++i) {
conf->setAtomPos(
i, RDGeom::Point3D(ats[i]->coordinates.x() / scaleFactor,
ats[i]->coordinates.y() / scaleFactor, 0.0));
auto coords = RDGeom::Point3D(ats[i]->coordinates.x() / scaleFactor,
ats[i]->coordinates.y() / scaleFactor, 0.0);
if (params->minimizeOnly) {
// readjust the average bond length from 1 (coordgen's target)
// to whatever we started with
coords *= avgBondLen;
coords += centroid;
}
conf->setAtomPos(i, coords);
// std::cerr << atom->coordinates << std::endl;
}
conf->set3D(false);
mol.clearConformers();
auto res = mol.addConformer(conf, true);
if (params->coordMap.empty() && !params->templateMol) {
if (!params->minimizeOnly && params->coordMap.empty() &&
!params->templateMol) {
// center the coordinates
RDGeom::Transform3D tf;
auto centroid = MolTransforms::computeCentroid(*conf);
@@ -179,4 +240,4 @@ unsigned int addCoords(T& mol, const CoordGenParams* params = nullptr) {
return res;
}
} // end of namespace CoordGen
} // end of namespace RDKit
} // namespace RDKit

View File

@@ -71,7 +71,10 @@ struct coordgen_wrapper {
"directory containing the templates.mae file")
.def_readwrite("minimizerPrecision",
&CoordGen::CoordGenParams::minimizerPrecision,
"controls sketcher precision");
"controls sketcher precision")
.def_readwrite("minimizeOnly", &CoordGen::CoordGenParams::minimizeOnly,
"uses coordgen's force field to cleanup the 2D "
"coordinates of the active conformation");
python::def("SetDefaultTemplateFileDir", SetDefaultTemplateFileDir);
docString =
"Add 2D coordinates.\n"

View File

@@ -22,60 +22,145 @@ def compareConfs(c1,c2,match, tol=1e-2, alignIt=False):
return True
class TestCase(unittest.TestCase) :
def test_basics(self):
mol = Chem.MolFromSmiles('CCOC')
self.assertEqual(mol.GetNumConformers(),0)
rdCoordGen.AddCoords(mol)
self.assertEqual(mol.GetNumConformers(),1)
rdCoordGen.AddCoords(mol)
self.assertEqual(mol.GetNumConformers(),1)
rwmol = Chem.RWMol(mol)
rdCoordGen.AddCoords(rwmol)
self.assertEqual(rwmol.GetNumConformers(),1)
def test_template1(self):
template = Chem.MolFromSmiles('C1OCOCOCOCCCNCCC1')
template.SetProp("_Name",'template')
mol = Chem.MolFromSmiles('C1OCOCOCOCCCNC(OC(=O)C2CC2)CC1')
mol.SetProp("_Name","mol")
rdCoordGen.AddCoords(template)
rdCoordGen.AddCoords(mol)
self.assertFalse(compareConfs(mol.GetConformer(),template.GetConformer(),mol.GetSubstructMatch(template)))
match = mol.GetSubstructMatch(template)
mapd = dict()
for i,aid in enumerate(match):
p = template.GetConformer().GetAtomPosition(i)
mapd[aid] = Geometry.Point2D(p.x,p.y)
ps = rdCoordGen.CoordGenParams()
ps.SetCoordMap(mapd)
ps.dbg_useFixed = True
rdCoordGen.AddCoords(mol,ps)
self.assertTrue(compareConfs(mol.GetConformer(),template.GetConformer(),mol.GetSubstructMatch(template)))
def test_basics(self):
mol = Chem.MolFromSmiles('CCOC')
self.assertEqual(mol.GetNumConformers(),0)
rdCoordGen.AddCoords(mol)
self.assertEqual(mol.GetNumConformers(),1)
rdCoordGen.AddCoords(mol)
self.assertEqual(mol.GetNumConformers(),1)
rwmol = Chem.RWMol(mol)
rdCoordGen.AddCoords(rwmol)
self.assertEqual(rwmol.GetNumConformers(),1)
def test_template1(self):
template = Chem.MolFromSmiles('C1OCOCOCOCCCNCCC1')
template.SetProp("_Name",'template')
mol = Chem.MolFromSmiles('C1OCOCOCOCCCNC(OC(=O)C2CC2)CC1')
mol.SetProp("_Name","mol")
rdCoordGen.AddCoords(template)
rdCoordGen.AddCoords(mol)
self.assertFalse(compareConfs(mol.GetConformer(),template.GetConformer(),mol.GetSubstructMatch(template)))
match = mol.GetSubstructMatch(template)
mapd = dict()
for i,aid in enumerate(match):
p = template.GetConformer().GetAtomPosition(i)
mapd[aid] = Geometry.Point2D(p.x,p.y)
ps = rdCoordGen.CoordGenParams()
ps.SetCoordMap(mapd)
ps.dbg_useFixed = True
rdCoordGen.AddCoords(mol,ps)
self.assertTrue(compareConfs(mol.GetConformer(),template.GetConformer(),mol.GetSubstructMatch(template)))
def test_template2(self):
# the easier way...
template = Chem.MolFromSmiles('C1OCOCOCOCCCNCCC1')
template.SetProp("_Name",'template')
mol = Chem.MolFromSmiles('C1OCOCOCOCCCNC(OC(=O)C2CC2)CC1')
mol.SetProp("_Name","mol")
rdCoordGen.AddCoords(template)
ps = rdCoordGen.CoordGenParams()
ps.SetTemplateMol(template)
ps.dbg_useFixed = True
rdCoordGen.AddCoords(mol,ps)
self.assertTrue(compareConfs(mol.GetConformer(),template.GetConformer(),mol.GetSubstructMatch(template)))
def test_template2(self):
# the easier way...
template = Chem.MolFromSmiles('C1OCOCOCOCCCNCCC1')
template.SetProp("_Name",'template')
mol = Chem.MolFromSmiles('C1OCOCOCOCCCNC(OC(=O)C2CC2)CC1')
mol.SetProp("_Name","mol")
rdCoordGen.AddCoords(template)
ps = rdCoordGen.CoordGenParams()
ps.SetTemplateMol(template)
ps.dbg_useFixed = True
rdCoordGen.AddCoords(mol,ps)
self.assertTrue(compareConfs(mol.GetConformer(),template.GetConformer(),mol.GetSubstructMatch(template)))
def test_template3(self):
# the easier way, test lifetime...
template = Chem.MolFromSmiles('C1OCOCOCOCCCNCCC1')
mol = Chem.MolFromSmiles('C1OCOCOCOCCCNC(OC(=O)C2CC2)CC1')
rdCoordGen.AddCoords(template)
template2 = Chem.Mol(template)
ps = rdCoordGen.CoordGenParams()
ps.SetTemplateMol(template2)
template2 = None
ps.dbg_useFixed = True
rdCoordGen.AddCoords(mol,ps)
self.assertTrue(compareConfs(mol.GetConformer(),template.GetConformer(),mol.GetSubstructMatch(template)))
def testMinimizeOnly(self):
m = Chem.MolFromMolBlock('''
Mrv2014 08052005392D
0 0 0 0 0 999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 14 15 0 0 0
M V30 BEGIN ATOM
M V30 1 C -1.4287 -1.4523 0 0
M V30 2 C -2.9638 -1.5752 0 0
M V30 3 C -3.8377 -0.3072 0 0
M V30 4 C -3.1766 1.0837 0 0
M V30 5 C -1.6416 1.2066 0 0
M V30 6 C -0.7675 -0.0614 0 0
M V30 7 C 0.7675 0.0614 0 0
M V30 8 C 1.6416 -1.2066 0 0
M V30 9 C 0.9804 -2.5975 0 0
M V30 10 N 3.1766 -1.0837 0 0
M V30 11 C 3.8377 0.3072 0 0
M V30 12 C 2.9638 1.5752 0 0
M V30 13 C 1.4287 1.4523 0 0
M V30 14 F -0.5548 -2.7203 0 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 2 1 2
M V30 2 1 2 3
M V30 3 2 3 4
M V30 4 1 4 5
M V30 5 2 5 6
M V30 6 1 6 7
M V30 7 2 7 8
M V30 8 1 8 9
M V30 9 1 8 10
M V30 10 2 10 11
M V30 11 1 11 12
M V30 12 2 12 13
M V30 13 1 6 1
M V30 14 1 13 7
M V30 15 1 1 14
M V30 END BOND
M V30 END CTAB
M END
''')
ref = Chem.MolFromMolBlock('''
RDKit 2D
14 15 0 0 0 0 0 0 0 0999 V2000
-1.5379 -1.4859 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-3.1218 -1.5958 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-3.9595 -0.2554 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-3.2641 1.1663 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.6778 1.2217 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.7941 -0.0886 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.7983 0.0383 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1.7524 -1.2209 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1.1246 -2.6443 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
3.3306 -1.0787 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
3.9439 0.3747 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
3.0337 1.6656 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1.4617 1.4692 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.6924 -2.7917 0.0000 F 0 0 0 0 0 0 0 0 0 0 0 0
1 2 2 0
2 3 1 0
3 4 2 0
4 5 1 0
5 6 2 0
6 7 1 0
7 8 2 0
8 9 1 0
8 10 1 0
10 11 2 0
11 12 1 0
12 13 2 0
6 1 1 0
13 7 1 0
1 14 1 0
M END''')
ps = rdCoordGen.CoordGenParams()
ps.minimizeOnly = True
m2 = Chem.Mol(m)
rdCoordGen.AddCoords(m2,ps)
self.assertGreater(rdMolAlign.AlignMol(m, ref), 0.1)
self.assertLess(rdMolAlign.AlignMol(m2, ref), 0.1)
def test_template3(self):
# the easier way, test lifetime...
template = Chem.MolFromSmiles('C1OCOCOCOCCCNCCC1')
mol = Chem.MolFromSmiles('C1OCOCOCOCCCNC(OC(=O)C2CC2)CC1')
rdCoordGen.AddCoords(template)
template2 = Chem.Mol(template)
ps = rdCoordGen.CoordGenParams()
ps.SetTemplateMol(template2)
template2 = None
ps.dbg_useFixed = True
rdCoordGen.AddCoords(mol,ps)
self.assertTrue(compareConfs(mol.GetConformer(),template.GetConformer(),mol.GetSubstructMatch(template)))
if __name__ == '__main__':

View File

@@ -16,6 +16,7 @@
#include <GraphMol/Substruct/SubstructMatch.h>
#include <GraphMol/MolAlign/AlignMolecules.h>
#include <GraphMol/MolTransforms/MolTransforms.h>
#include <GraphMol/ChemTransforms/ChemTransforms.h>
#include <RDGeneral/RDLog.h>
@@ -487,12 +488,86 @@ void testGithub3131() {
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void testCoordgenMinimize() {
BOOST_LOG(rdInfoLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdInfoLog) << "testing coordgen minimize" << std::endl;
{
auto m1 =
R"CTAB(
Mrv2014 08052005142D
0 0 0 0 0 999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 8 8 0 0 0
M V30 BEGIN ATOM
M V30 1 C -2.1121 -0.3399 0 0
M V30 2 C -3.3708 0.5474 0 0
M V30 3 C -1.9835 0.9311 0 0
M V30 4 C -0.7248 0.0437 0 0
M V30 5 C 0.7926 0.3064 0 0
M V30 6 O 1.3239 1.7518 0 0
M V30 7 O 0.612 -1.2514 0 0
M V30 8 C 1.3429 -0.2989 0 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 1 2 3
M V30 3 1 3 4
M V30 4 1 4 5
M V30 5 2 5 6
M V30 6 1 5 7
M V30 7 1 7 8
M V30 8 1 4 1
M V30 END BOND
M V30 END CTAB
M END
)CTAB"_ctab;
TEST_ASSERT(m1);
TEST_ASSERT(m1->getNumConformers() == 1);
auto ref = R"CTAB(
RDKit 2D
8 8 0 0 0 0 0 0 0 0999 V2000
-1.5738 -1.1409 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-2.7946 -0.3071 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.9122 0.8790 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.6914 0.0452 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.7517 0.2885 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1.2665 1.6721 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
1.6941 -0.8483 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
3.1500 -0.6002 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1 2 1 0
2 3 1 0
3 4 1 0
4 5 1 0
5 6 2 0
5 7 1 0
7 8 1 0
4 1 1 0
M END
)CTAB"_ctab;
TEST_ASSERT(ref);
TEST_ASSERT(ref->getNumConformers() == 1);
CoordGen::CoordGenParams ps;
ps.minimizeOnly = true;
CoordGen::addCoords(*m1, &ps);
ROMol m2(*m1);
double rmsd = MolAlign::alignMol(m2, *ref);
TEST_ASSERT(rmsd < 0.1);
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
int main(int argc, char* argv[]) {
(void)argc;
(void)argv;
RDLog::InitLogs();
#if 1
test2();
test1();
testGithub1929();
testGithub3131();
#endif
testCoordgenMinimize();
}