From bada20dbafa044452473aab913bee313226bc61d Mon Sep 17 00:00:00 2001 From: Greg Landrum Date: Sun, 6 Nov 2016 10:17:47 +0100 Subject: [PATCH] Expose new default parameter objects to python (#1150) * Fix a bug in the python DistGeom tests. * test new features * add simple parameter objects for ETDG, ETKDG, and KDG * Update conformation generation documentation to use the new parameter objects. --- .../DistGeomHelpers/Wrap/rdDistGeom.cpp | 18 ++++++ .../DistGeomHelpers/Wrap/testDistGeom.py | 16 ++++- Docs/Book/GettingStartedInPython.rst | 58 +++++++++---------- 3 files changed, 61 insertions(+), 31 deletions(-) diff --git a/Code/GraphMol/DistGeomHelpers/Wrap/rdDistGeom.cpp b/Code/GraphMol/DistGeomHelpers/Wrap/rdDistGeom.cpp index b7d3d0b62..b201aba1d 100644 --- a/Code/GraphMol/DistGeomHelpers/Wrap/rdDistGeom.cpp +++ b/Code/GraphMol/DistGeomHelpers/Wrap/rdDistGeom.cpp @@ -120,6 +120,15 @@ PyObject *getMolBoundsMatrix(ROMol &mol, bool set15bounds = true, return PyArray_Return(res); } +DGeomHelpers::EmbedParameters *getETKDG() { + return new DGeomHelpers::EmbedParameters(DGeomHelpers::ETKDG); +} +DGeomHelpers::EmbedParameters *getKDG() { + return new DGeomHelpers::EmbedParameters(DGeomHelpers::KDG); +} +DGeomHelpers::EmbedParameters *getETDG() { + return new DGeomHelpers::EmbedParameters(DGeomHelpers::ETDG); +} } BOOST_PYTHON_MODULE(rdDistGeom) { @@ -311,6 +320,15 @@ BOOST_PYTHON_MODULE(rdDistGeom) { docString.c_str()); python::def("EmbedMolecule", RDKit::EmbedMolecule2, (python::arg("mol"), python::arg("params")), docString.c_str()); + python::def("ETKDG", RDKit::getETKDG, + "Returns an EmbedParameters object for the ETKDG method.", + python::return_value_policy()); + python::def("ETDG", RDKit::getETDG, + "Returns an EmbedParameters object for the ETDG method.", + python::return_value_policy()); + python::def("KDG", RDKit::getKDG, + "Returns an EmbedParameters object for the KDG method.", + python::return_value_policy()); docString = "Returns the distance bounds matrix for a molecule\n\ diff --git a/Code/GraphMol/DistGeomHelpers/Wrap/testDistGeom.py b/Code/GraphMol/DistGeomHelpers/Wrap/testDistGeom.py index 131329cdc..314ab4c6b 100644 --- a/Code/GraphMol/DistGeomHelpers/Wrap/testDistGeom.py +++ b/Code/GraphMol/DistGeomHelpers/Wrap/testDistGeom.py @@ -400,11 +400,11 @@ class TestCase(unittest.TestCase): def _compareConfs(self, mol, ref, molConfId, refConfId): self.assertEqual(mol.GetNumAtoms(), ref.GetNumAtoms()) molConf = mol.GetConformer(molConfId) - refConf = mol.GetConformer(refConfId) + refConf = ref.GetConformer(refConfId) for i in range(mol.GetNumAtoms()): mp = molConf.GetAtomPosition(i) rp = refConf.GetAtomPosition(i) - self.assertAlmostEqual((mp - rp).Length(), 0.0, 4) + self.assertAlmostEqual((mp - rp).Length(), 0.0, 3) def test9EmbedParams(self): mol = Chem.AddHs(Chem.MolFromSmiles('OCCC')) @@ -424,6 +424,10 @@ class TestCase(unittest.TestCase): params.useExpTorsionAnglePrefs = True self.assertEqual(rdDistGeom.EmbedMolecule(mol, params), 0) self._compareConfs(mol, ref, 0, 0) + params = rdDistGeom.ETDG() + params.randomSeed = 42 + self.assertEqual(rdDistGeom.EmbedMolecule(mol, params), 0) + self._compareConfs(mol, ref, 0, 0) fn = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'DistGeomHelpers', 'test_data', 'simple_torsion.etkdg.mol') @@ -434,6 +438,10 @@ class TestCase(unittest.TestCase): params.useBasicKnowledge = True self.assertEqual(rdDistGeom.EmbedMolecule(mol, params), 0) self._compareConfs(mol, ref, 0, 0) + params = rdDistGeom.ETKDG() + params.randomSeed = 42 + self.assertEqual(rdDistGeom.EmbedMolecule(mol, params), 0) + self._compareConfs(mol, ref, 0, 0) fn = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'DistGeomHelpers', 'test_data', 'simple_torsion.kdg.mol') @@ -443,6 +451,10 @@ class TestCase(unittest.TestCase): params.useBasicKnowledge = True self.assertEqual(rdDistGeom.EmbedMolecule(mol, params), 0) self._compareConfs(mol, ref, 0, 0) + params = rdDistGeom.KDG() + params.randomSeed = 42 + self.assertEqual(rdDistGeom.EmbedMolecule(mol, params), 0) + self._compareConfs(mol, ref, 0, 0) if __name__ == '__main__': diff --git a/Docs/Book/GettingStartedInPython.rst b/Docs/Book/GettingStartedInPython.rst index 1f3c53667..011912035 100644 --- a/Docs/Book/GettingStartedInPython.rst +++ b/Docs/Book/GettingStartedInPython.rst @@ -227,21 +227,20 @@ cyclobutane M END -Or you can add 3D coordinates by embedding the molecule: +Or you can add 3D coordinates by embedding the molecule (we're using the ETKDG +method here, which is described in more detail below): ->>> AllChem.EmbedMolecule(m2) -0 ->>> AllChem.UFFOptimizeMolecule(m2) +>>> AllChem.EmbedMolecule(m2,AllChem.ETKDG()) 0 >>> print(Chem.MolToMolBlock(m2)) # doctest: +NORMALIZE_WHITESPACE cyclobutane RDKit 3D 4 4 0 0 0 0 0 0 0 0999 V2000 - -0.7883 0.5560 -0.2718 C 0 0 0 0 0 0 0 0 0 0 0 0 - -0.4153 -0.9091 -0.1911 C 0 0 0 0 0 0 0 0 0 0 0 0 - 0.7883 -0.5560 0.6568 C 0 0 0 0 0 0 0 0 0 0 0 0 - 0.4153 0.9091 0.5762 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.8321 0.5405 -0.1981 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.3467 -0.8825 -0.2651 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.7190 -0.5613 0.7314 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.4599 0.9032 0.5020 C 0 0 0 0 0 0 0 0 0 0 0 0 1 2 1 0 2 3 1 0 3 4 1 0 @@ -249,14 +248,11 @@ cyclobutane M END -The optimization step isn't necessary, but it substantially improves the quality of the conformation. - -To get good 3D conformations, it's almost always a good idea to add hydrogens to the molecule first: +To get good 3D conformations, it's almost always a good idea to add +hydrogens to the molecule first: >>> m3 = Chem.AddHs(m2) ->>> AllChem.EmbedMolecule(m3) -0 ->>> AllChem.UFFOptimizeMolecule(m3) +>>> AllChem.EmbedMolecule(m3,AllChem.ETKDG()) 0 These can then be removed: @@ -267,10 +263,10 @@ cyclobutane RDKit 3D 4 4 0 0 0 0 0 0 0 0999 V2000 - 0.2851 1.0372 -0.0171 C 0 0 0 0 0 0 0 0 0 0 0 0 - 1.0352 -0.2833 0.0743 C 0 0 0 0 0 0 0 0 0 0 0 0 - -0.2851 -1.0372 0.0171 C 0 0 0 0 0 0 0 0 0 0 0 0 - -1.0352 0.2833 -0.0743 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.3497 0.9755 -0.2202 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.9814 -0.3380 0.2534 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.3384 -1.0009 -0.1474 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.9992 0.3532 0.1458 C 0 0 0 0 0 0 0 0 0 0 0 0 1 2 1 0 2 3 1 0 3 4 1 0 @@ -551,7 +547,7 @@ The full process of embedding and optimizing a molecule is easier than all the a 0 >>> m3=Chem.AddHs(m) >>> # use the new method ->>> AllChem.EmbedMolecule(m3, useExpTorsionAnglePrefs=True, useBasicKnowledge=True) +>>> AllChem.EmbedMolecule(m3, AllChem.ETKDG()) 0 The RDKit also has an implementation of the MMFF94 force field available. [#mmff1]_, [#mmff2]_, [#mmff3]_, [#mmff4]_, [#mmffs]_ @@ -588,18 +584,23 @@ to each other and the RMS values calculated. >>> AllChem.AlignMolConformers(m2, RMSlist=rmslist) >>> print(len(rmslist)) 9 ->>> m = Chem.MolFromSmiles('C1CCC1OC') ->>> m3=Chem.AddHs(m) ->>> # run CSD-based method ->>> cids = AllChem.EmbedMultipleConfs(m3, useExpTorsionAnglePrefs=True, useBasicKnowledge=True, numConfs=10) ->>> print(len(cids)) -10 rmslist contains the RMS values between the first conformer and all others. -The RMS between two specific conformers (e.g. 1 and 9) can also be calculated. The flag prealigned lets the user specify if the conformers are already aligned (by default, the function aligns them). +The RMS between two specific conformers (e.g. 1 and 9) can also be calculated. +The flag prealigned lets the user specify if the conformers are already aligned +(by default, the function aligns them). >>> rms = AllChem.GetConformerRMS(m2, 1, 9, prealigned=True) +We can also generate multiple conformers using the new CSD-based method: + +>>> m = Chem.MolFromSmiles('C1CCC1OC') +>>> m3=Chem.AddHs(m) +>>> # run the new CSD-based method +>>> cids = AllChem.EmbedMultipleConfs(m3, 10, AllChem.ETKDG()) +>>> print(len(cids)) +10 + More 3D functionality of the RDKit is described in the Cookbook. @@ -607,9 +608,8 @@ More 3D functionality of the RDKit is described in the Cookbook. The original, default, 2D->3D conversion provided with the RDKit is not intended to be a replacement for a “real” conformational analysis tool; it merely provides quick 3D structures for cases when they are -required. On the other hand, the second method, when a sufficiently -large number of conformers are generated, should be adequate for most -purposes. +required. We believe, however, that the newer ETKDG method[#riniker2]_ should be +adequate for most purposes. Preserving Molecules