From bcb28605238f866195f653bafef2811901e645d2 Mon Sep 17 00:00:00 2001 From: Greg Landrum Date: Thu, 20 Aug 2020 09:20:40 +0200 Subject: [PATCH] Add the option to minimize structures with coordgen (#3319) * backup * works; needs testing * add some better tests and do some cleanup * rescale coordgen output to recover original bond lengths * patch coordgen so that we can do a windows DLL build * patch coordgen so that we can do a windows DLL build * changes in response to review --- External/CoordGen/CMakeLists.txt | 13 +- External/CoordGen/CoordGen.h | 85 +++++++++-- External/CoordGen/Wrap/rdCoordGen.cpp | 5 +- External/CoordGen/Wrap/testCoordGen.py | 189 ++++++++++++++++++------- External/CoordGen/test.cpp | 75 ++++++++++ 5 files changed, 300 insertions(+), 67 deletions(-) diff --git a/External/CoordGen/CMakeLists.txt b/External/CoordGen/CMakeLists.txt index 5cb459bbd..91c4b1645 100644 --- a/External/CoordGen/CMakeLists.txt +++ b/External/CoordGen/CMakeLists.txt @@ -89,8 +89,17 @@ if(RDK_BUILD_COORDGEN_SUPPORT) execute_process(COMMAND ${CMAKE_COMMAND} -E tar zxf ${CMAKE_CURRENT_SOURCE_DIR}/coordgenlibs-${RELEASE_NO}.tar.gz WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + + # FIX: this is a workaround until https://github.com/schrodinger/coordgenlibs/pull/76 + # is merged and a new coordgen release is done file(RENAME "coordgenlibs-${RELEASE_NO}" "${COORDGEN_DIR}") - + set(fileToPatch "${COORDGEN_DIR}/CoordgenFragmenter.h") + configure_file("${fileToPatch}" "${fileToPatch}.orig" COPYONLY) + file(READ "${fileToPatch}" buffer) + string(REPLACE "class CoordgenFragmenter" + "#include \"CoordgenConfig.hpp\"\nclass EXPORT_COORDGEN CoordgenFragmenter" + buffer "${buffer}") + file(WRITE "${fileToPatch}" "${buffer}") else() message("-- Found coordgenlibs source in ${COORDGEN_DIR}") endif() @@ -116,7 +125,7 @@ if(RDK_BUILD_COORDGEN_SUPPORT) rdkit_test(testCoordGen test.cpp LINK_LIBRARIES - ${RDK_COORDGEN_LIBS} Depictor + ${RDK_COORDGEN_LIBS} Depictor ChemTransforms FileParsers SmilesParse SubstructMatch GraphMol RDGeneral DataStructs RDGeneral RDGeometryLib ${RDKit_THREAD_LIBS}) diff --git a/External/CoordGen/CoordGen.h b/External/CoordGen/CoordGen.h index 94a1ca0eb..b6428bc6e 100644 --- a/External/CoordGen/CoordGen.h +++ b/External/CoordGen/CoordGen.h @@ -16,6 +16,7 @@ #include #include "coordgen/sketcherMinimizer.h" +#include "coordgen/CoordgenFragmenter.h" namespace RDKit { namespace CoordGen { @@ -32,6 +33,7 @@ struct CoordGenParams { SKETCHER_STANDARD_PRECISION; // controls sketch precision bool dbg_useConstrained = true; // debugging bool dbg_useFixed = false; // debugging + bool minimizeOnly = false; // don't actually generate full coords }; static CoordGenParams defaultParams; @@ -58,6 +60,12 @@ unsigned int addCoords(T& mol, const CoordGenParams* params = nullptr) { templateFileDir += "/Data/"; } } + + if (params->minimizeOnly && !mol.getNumConformers()) { + throw ValueErrorException( + "minimizeOnly set but molecule has no conformers"); + } + double scaleFactor = params->coordgenScaling; sketcherMinimizer minimizer(params->minimizerPrecision); @@ -70,12 +78,21 @@ unsigned int addCoords(T& mol, const CoordGenParams* params = nullptr) { } bool hasTemplateMatch = false; MatchVectType mv; - if (params->templateMol && params->templateMol->getNumConformers() == 1) { + if (!params->minimizeOnly && params->templateMol && + params->templateMol->getNumConformers() == 1) { if (SubstructMatch(mol, *(params->templateMol), mv)) { hasTemplateMatch = true; } } + // if we're doing coordinate minimization it makes our life easier to + // start by translating to the origin + RDGeom::Point3D centroid{0.0, 0.0, 0.0}; + if (params->minimizeOnly) { + auto conf = mol.getConformer(); + centroid = MolTransforms::computeCentroid(conf); + } + std::vector ats(mol.getNumAtoms()); for (auto atit = mol.beginAtoms(); atit != mol.endAtoms(); ++atit) { auto oatom = *atit; @@ -83,11 +100,17 @@ unsigned int addCoords(T& mol, const CoordGenParams* params = nullptr) { atom->molecule = min_mol; // seems like this should be in addNewAtom() atom->atomicNumber = oatom->getAtomicNum(); atom->charge = oatom->getFormalCharge(); - if (hasTemplateMatch || - params->coordMap.find(oatom->getIdx()) != params->coordMap.end()) { + if (params->minimizeOnly) { + auto coords = mol.getConformer().getAtomPos(oatom->getIdx()); + coords -= centroid; + atom->coordinates = sketcherMinimizerPointF(coords.x * scaleFactor, + coords.y * scaleFactor); + atom->fixed = false; + atom->constrained = false; + } else if (hasTemplateMatch || params->coordMap.find(oatom->getIdx()) != + params->coordMap.end()) { atom->constrained = params->dbg_useConstrained; atom->fixed = params->dbg_useFixed; - RDGeom::Point2D coords; if (hasTemplateMatch) { for (auto& pr : mv) { if (pr.second == static_cast(oatom->getIdx())) { @@ -98,7 +121,6 @@ unsigned int addCoords(T& mol, const CoordGenParams* params = nullptr) { break; } } - coords = params->coordMap.find(oatom->getIdx())->second; } else { const RDGeom::Point2D& coords = params->coordMap.find(oatom->getIdx())->second; @@ -109,6 +131,14 @@ unsigned int addCoords(T& mol, const CoordGenParams* params = nullptr) { ats[oatom->getIdx()] = atom; } + // coordgen has its own ideas about what bond lengths should be + // if we are doing minimizeOnly we want to do a rigid scaling after the + // minimization in order to try and get as close to where we started as + // possible + double singleBondLenAccum = 0.0; + double bondLenAccum = 0.0; + unsigned nSingle = 0; + std::vector bnds(mol.getNumBonds()); for (auto bndit = mol.beginBonds(); bndit != mol.endBonds(); ++bndit) { auto obnd = *bndit; @@ -118,6 +148,13 @@ unsigned int addCoords(T& mol, const CoordGenParams* params = nullptr) { switch (obnd->getBondType()) { case Bond::SINGLE: bnd->bondOrder = 1; + if (params->minimizeOnly) { + ++nSingle; + singleBondLenAccum += + (mol.getConformer().getAtomPos(obnd->getBeginAtomIdx()) - + mol.getConformer().getAtomPos(obnd->getEndAtomIdx())) + .length(); + } break; case Bond::DOUBLE: bnd->bondOrder = 2; @@ -132,6 +169,19 @@ unsigned int addCoords(T& mol, const CoordGenParams* params = nullptr) { BOOST_LOG(rdWarningLog) << "unrecognized bond type"; } bnds[obnd->getIdx()] = bnd; + if (params->minimizeOnly) { + bondLenAccum += (mol.getConformer().getAtomPos(obnd->getBeginAtomIdx()) - + mol.getConformer().getAtomPos(obnd->getEndAtomIdx())) + .length(); + } + } + double avgBondLen = 1.0; + if (params->minimizeOnly) { + if (nSingle) { + avgBondLen = singleBondLenAccum / nSingle; + } else if (mol.getNumBonds()) { + avgBondLen = bondLenAccum / mol.getNumBonds(); + } } // setup double bond stereo @@ -156,19 +206,30 @@ unsigned int addCoords(T& mol, const CoordGenParams* params = nullptr) { } minimizer.initialize(min_mol); - minimizer.runGenerateCoordinates(); + if (!params->minimizeOnly) { + minimizer.runGenerateCoordinates(); + } else { + CoordgenFragmenter::splitIntoFragments(min_mol); + minimizer.m_minimizer.minimizeMolecule(min_mol); + } auto conf = new Conformer(mol.getNumAtoms()); for (size_t i = 0; i < mol.getNumAtoms(); ++i) { - conf->setAtomPos( - i, RDGeom::Point3D(ats[i]->coordinates.x() / scaleFactor, - ats[i]->coordinates.y() / scaleFactor, 0.0)); + auto coords = RDGeom::Point3D(ats[i]->coordinates.x() / scaleFactor, + ats[i]->coordinates.y() / scaleFactor, 0.0); + if (params->minimizeOnly) { + // readjust the average bond length from 1 (coordgen's target) + // to whatever we started with + coords *= avgBondLen; + coords += centroid; + } + conf->setAtomPos(i, coords); // std::cerr << atom->coordinates << std::endl; } conf->set3D(false); - mol.clearConformers(); auto res = mol.addConformer(conf, true); - if (params->coordMap.empty() && !params->templateMol) { + if (!params->minimizeOnly && params->coordMap.empty() && + !params->templateMol) { // center the coordinates RDGeom::Transform3D tf; auto centroid = MolTransforms::computeCentroid(*conf); @@ -179,4 +240,4 @@ unsigned int addCoords(T& mol, const CoordGenParams* params = nullptr) { return res; } } // end of namespace CoordGen -} // end of namespace RDKit +} // namespace RDKit diff --git a/External/CoordGen/Wrap/rdCoordGen.cpp b/External/CoordGen/Wrap/rdCoordGen.cpp index 0b38787d9..deca2c178 100644 --- a/External/CoordGen/Wrap/rdCoordGen.cpp +++ b/External/CoordGen/Wrap/rdCoordGen.cpp @@ -71,7 +71,10 @@ struct coordgen_wrapper { "directory containing the templates.mae file") .def_readwrite("minimizerPrecision", &CoordGen::CoordGenParams::minimizerPrecision, - "controls sketcher precision"); + "controls sketcher precision") + .def_readwrite("minimizeOnly", &CoordGen::CoordGenParams::minimizeOnly, + "uses coordgen's force field to cleanup the 2D " + "coordinates of the active conformation"); python::def("SetDefaultTemplateFileDir", SetDefaultTemplateFileDir); docString = "Add 2D coordinates.\n" diff --git a/External/CoordGen/Wrap/testCoordGen.py b/External/CoordGen/Wrap/testCoordGen.py index 16122c7c6..de5e0f651 100644 --- a/External/CoordGen/Wrap/testCoordGen.py +++ b/External/CoordGen/Wrap/testCoordGen.py @@ -22,60 +22,145 @@ def compareConfs(c1,c2,match, tol=1e-2, alignIt=False): return True class TestCase(unittest.TestCase) : - def test_basics(self): - mol = Chem.MolFromSmiles('CCOC') - self.assertEqual(mol.GetNumConformers(),0) - rdCoordGen.AddCoords(mol) - self.assertEqual(mol.GetNumConformers(),1) - rdCoordGen.AddCoords(mol) - self.assertEqual(mol.GetNumConformers(),1) - rwmol = Chem.RWMol(mol) - rdCoordGen.AddCoords(rwmol) - self.assertEqual(rwmol.GetNumConformers(),1) - def test_template1(self): - template = Chem.MolFromSmiles('C1OCOCOCOCCCNCCC1') - template.SetProp("_Name",'template') - mol = Chem.MolFromSmiles('C1OCOCOCOCCCNC(OC(=O)C2CC2)CC1') - mol.SetProp("_Name","mol") - rdCoordGen.AddCoords(template) - rdCoordGen.AddCoords(mol) - self.assertFalse(compareConfs(mol.GetConformer(),template.GetConformer(),mol.GetSubstructMatch(template))) - match = mol.GetSubstructMatch(template) - mapd = dict() - for i,aid in enumerate(match): - p = template.GetConformer().GetAtomPosition(i) - mapd[aid] = Geometry.Point2D(p.x,p.y) - ps = rdCoordGen.CoordGenParams() - ps.SetCoordMap(mapd) - ps.dbg_useFixed = True - rdCoordGen.AddCoords(mol,ps) - self.assertTrue(compareConfs(mol.GetConformer(),template.GetConformer(),mol.GetSubstructMatch(template))) + def test_basics(self): + mol = Chem.MolFromSmiles('CCOC') + self.assertEqual(mol.GetNumConformers(),0) + rdCoordGen.AddCoords(mol) + self.assertEqual(mol.GetNumConformers(),1) + rdCoordGen.AddCoords(mol) + self.assertEqual(mol.GetNumConformers(),1) + rwmol = Chem.RWMol(mol) + rdCoordGen.AddCoords(rwmol) + self.assertEqual(rwmol.GetNumConformers(),1) + def test_template1(self): + template = Chem.MolFromSmiles('C1OCOCOCOCCCNCCC1') + template.SetProp("_Name",'template') + mol = Chem.MolFromSmiles('C1OCOCOCOCCCNC(OC(=O)C2CC2)CC1') + mol.SetProp("_Name","mol") + rdCoordGen.AddCoords(template) + rdCoordGen.AddCoords(mol) + self.assertFalse(compareConfs(mol.GetConformer(),template.GetConformer(),mol.GetSubstructMatch(template))) + match = mol.GetSubstructMatch(template) + mapd = dict() + for i,aid in enumerate(match): + p = template.GetConformer().GetAtomPosition(i) + mapd[aid] = Geometry.Point2D(p.x,p.y) + ps = rdCoordGen.CoordGenParams() + ps.SetCoordMap(mapd) + ps.dbg_useFixed = True + rdCoordGen.AddCoords(mol,ps) + self.assertTrue(compareConfs(mol.GetConformer(),template.GetConformer(),mol.GetSubstructMatch(template))) - def test_template2(self): - # the easier way... - template = Chem.MolFromSmiles('C1OCOCOCOCCCNCCC1') - template.SetProp("_Name",'template') - mol = Chem.MolFromSmiles('C1OCOCOCOCCCNC(OC(=O)C2CC2)CC1') - mol.SetProp("_Name","mol") - rdCoordGen.AddCoords(template) - ps = rdCoordGen.CoordGenParams() - ps.SetTemplateMol(template) - ps.dbg_useFixed = True - rdCoordGen.AddCoords(mol,ps) - self.assertTrue(compareConfs(mol.GetConformer(),template.GetConformer(),mol.GetSubstructMatch(template))) + def test_template2(self): + # the easier way... + template = Chem.MolFromSmiles('C1OCOCOCOCCCNCCC1') + template.SetProp("_Name",'template') + mol = Chem.MolFromSmiles('C1OCOCOCOCCCNC(OC(=O)C2CC2)CC1') + mol.SetProp("_Name","mol") + rdCoordGen.AddCoords(template) + ps = rdCoordGen.CoordGenParams() + ps.SetTemplateMol(template) + ps.dbg_useFixed = True + rdCoordGen.AddCoords(mol,ps) + self.assertTrue(compareConfs(mol.GetConformer(),template.GetConformer(),mol.GetSubstructMatch(template))) + + def test_template3(self): + # the easier way, test lifetime... + template = Chem.MolFromSmiles('C1OCOCOCOCCCNCCC1') + mol = Chem.MolFromSmiles('C1OCOCOCOCCCNC(OC(=O)C2CC2)CC1') + rdCoordGen.AddCoords(template) + template2 = Chem.Mol(template) + ps = rdCoordGen.CoordGenParams() + ps.SetTemplateMol(template2) + template2 = None + ps.dbg_useFixed = True + rdCoordGen.AddCoords(mol,ps) + self.assertTrue(compareConfs(mol.GetConformer(),template.GetConformer(),mol.GetSubstructMatch(template))) + + def testMinimizeOnly(self): + m = Chem.MolFromMolBlock(''' + Mrv2014 08052005392D + + 0 0 0 0 0 999 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 14 15 0 0 0 +M V30 BEGIN ATOM +M V30 1 C -1.4287 -1.4523 0 0 +M V30 2 C -2.9638 -1.5752 0 0 +M V30 3 C -3.8377 -0.3072 0 0 +M V30 4 C -3.1766 1.0837 0 0 +M V30 5 C -1.6416 1.2066 0 0 +M V30 6 C -0.7675 -0.0614 0 0 +M V30 7 C 0.7675 0.0614 0 0 +M V30 8 C 1.6416 -1.2066 0 0 +M V30 9 C 0.9804 -2.5975 0 0 +M V30 10 N 3.1766 -1.0837 0 0 +M V30 11 C 3.8377 0.3072 0 0 +M V30 12 C 2.9638 1.5752 0 0 +M V30 13 C 1.4287 1.4523 0 0 +M V30 14 F -0.5548 -2.7203 0 0 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 2 1 2 +M V30 2 1 2 3 +M V30 3 2 3 4 +M V30 4 1 4 5 +M V30 5 2 5 6 +M V30 6 1 6 7 +M V30 7 2 7 8 +M V30 8 1 8 9 +M V30 9 1 8 10 +M V30 10 2 10 11 +M V30 11 1 11 12 +M V30 12 2 12 13 +M V30 13 1 6 1 +M V30 14 1 13 7 +M V30 15 1 1 14 +M V30 END BOND +M V30 END CTAB +M END +''') + ref = Chem.MolFromMolBlock(''' + RDKit 2D + + 14 15 0 0 0 0 0 0 0 0999 V2000 + -1.5379 -1.4859 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.1218 -1.5958 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.9595 -0.2554 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.2641 1.1663 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.6778 1.2217 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.7941 -0.0886 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.7983 0.0383 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.7524 -1.2209 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.1246 -2.6443 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.3306 -1.0787 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9439 0.3747 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.0337 1.6656 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.4617 1.4692 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.6924 -2.7917 0.0000 F 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 2 0 + 2 3 1 0 + 3 4 2 0 + 4 5 1 0 + 5 6 2 0 + 6 7 1 0 + 7 8 2 0 + 8 9 1 0 + 8 10 1 0 + 10 11 2 0 + 11 12 1 0 + 12 13 2 0 + 6 1 1 0 + 13 7 1 0 + 1 14 1 0 +M END''') + ps = rdCoordGen.CoordGenParams() + ps.minimizeOnly = True + m2 = Chem.Mol(m) + rdCoordGen.AddCoords(m2,ps) + self.assertGreater(rdMolAlign.AlignMol(m, ref), 0.1) + self.assertLess(rdMolAlign.AlignMol(m2, ref), 0.1) - def test_template3(self): - # the easier way, test lifetime... - template = Chem.MolFromSmiles('C1OCOCOCOCCCNCCC1') - mol = Chem.MolFromSmiles('C1OCOCOCOCCCNC(OC(=O)C2CC2)CC1') - rdCoordGen.AddCoords(template) - template2 = Chem.Mol(template) - ps = rdCoordGen.CoordGenParams() - ps.SetTemplateMol(template2) - template2 = None - ps.dbg_useFixed = True - rdCoordGen.AddCoords(mol,ps) - self.assertTrue(compareConfs(mol.GetConformer(),template.GetConformer(),mol.GetSubstructMatch(template))) if __name__ == '__main__': diff --git a/External/CoordGen/test.cpp b/External/CoordGen/test.cpp index aa1d71367..c3d2c6595 100644 --- a/External/CoordGen/test.cpp +++ b/External/CoordGen/test.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -487,12 +488,86 @@ void testGithub3131() { BOOST_LOG(rdInfoLog) << "done" << std::endl; } +void testCoordgenMinimize() { + BOOST_LOG(rdInfoLog) << "-------------------------------------" << std::endl; + BOOST_LOG(rdInfoLog) << "testing coordgen minimize" << std::endl; + { + auto m1 = + R"CTAB( + Mrv2014 08052005142D + + 0 0 0 0 0 999 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 8 8 0 0 0 +M V30 BEGIN ATOM +M V30 1 C -2.1121 -0.3399 0 0 +M V30 2 C -3.3708 0.5474 0 0 +M V30 3 C -1.9835 0.9311 0 0 +M V30 4 C -0.7248 0.0437 0 0 +M V30 5 C 0.7926 0.3064 0 0 +M V30 6 O 1.3239 1.7518 0 0 +M V30 7 O 0.612 -1.2514 0 0 +M V30 8 C 1.3429 -0.2989 0 0 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 1 1 2 +M V30 2 1 2 3 +M V30 3 1 3 4 +M V30 4 1 4 5 +M V30 5 2 5 6 +M V30 6 1 5 7 +M V30 7 1 7 8 +M V30 8 1 4 1 +M V30 END BOND +M V30 END CTAB +M END +)CTAB"_ctab; + TEST_ASSERT(m1); + TEST_ASSERT(m1->getNumConformers() == 1); + auto ref = R"CTAB( + RDKit 2D + + 8 8 0 0 0 0 0 0 0 0999 V2000 + -1.5738 -1.1409 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.7946 -0.3071 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.9122 0.8790 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.6914 0.0452 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.7517 0.2885 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2665 1.6721 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6941 -0.8483 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 3.1500 -0.6002 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 + 2 3 1 0 + 3 4 1 0 + 4 5 1 0 + 5 6 2 0 + 5 7 1 0 + 7 8 1 0 + 4 1 1 0 +M END +)CTAB"_ctab; + TEST_ASSERT(ref); + TEST_ASSERT(ref->getNumConformers() == 1); + + CoordGen::CoordGenParams ps; + ps.minimizeOnly = true; + CoordGen::addCoords(*m1, &ps); + ROMol m2(*m1); + double rmsd = MolAlign::alignMol(m2, *ref); + TEST_ASSERT(rmsd < 0.1); + } + BOOST_LOG(rdInfoLog) << "done" << std::endl; +} + int main(int argc, char* argv[]) { (void)argc; (void)argv; RDLog::InitLogs(); +#if 1 test2(); test1(); testGithub1929(); testGithub3131(); +#endif + testCoordgenMinimize(); }