From 880a8e5725cf842091c3f273da2b35b13e88fffb Mon Sep 17 00:00:00 2001 From: Ric Date: Fri, 28 Apr 2023 00:53:56 -0400 Subject: [PATCH] Reformat Python code for 2023.03 release (#6294) * run yapf * run isort --------- Co-authored-by: Greg Landrum --- Code/ChemicalFeatures/Wrap/testFeatures.py | 21 +- .../MetricMatrixCalc/Wrap/testMatricCalc.py | 228 +- Code/DataStructs/Wrap/testBV.py | 64 +- .../DataStructs/Wrap/testDiscreteValueVect.py | 406 ++-- Code/DataStructs/Wrap/testFPB.py | 14 +- Code/DataStructs/Wrap/testSparseIntVect.py | 14 +- Code/Demos/RDKit/MPI/rdkpympi.py | 5 +- Code/Demos/boost/EBV_err/setup.py | 31 +- Code/Demos/boost/EBV_err/test.py | 3 +- Code/Demos/boost/cross_mod_err/setup.py | 31 +- Code/Demos/boost/cross_mod_err/test.py | 1 - Code/Demos/boost/cross_module/setup.py | 27 +- Code/Demos/boost/cross_module/test.py | 1 - Code/Demos/boost/numpy/setup.py | 23 +- Code/Demos/boost/numpy/test.py | 3 +- Code/Demos/boost/overloads/setup.py | 23 +- Code/Demos/boost/python_objs/setup.py | 23 +- Code/Demos/boost/smartPtrsAndIters/setup.py | 13 +- Code/Demos/boost/smartPtrsAndIters/test.py | 3 +- Code/DistGeom/Wrap/rough_test.py | 17 +- Code/ForceField/Wrap/testConstraints.py | 14 +- Code/Geometry/Wrap/testGeometry.py | 9 +- .../Abbreviations/Wrap/testAbbreviations.py | 4 +- .../CIPLabeler/Wrap/pyCIPLabelsValidation.py | 1 + .../ChemReactions/Wrap/testEnumerations.py | 375 ++-- .../ChemReactions/Wrap/testReactionWrapper.py | 11 +- .../ChemReactions/Wrap/testSanitize.py | 179 +- Code/GraphMol/Depictor/Wrap/testDepictor.py | 1151 +++++----- Code/GraphMol/Deprotect/Wrap/rough_test.py | 1 + Code/GraphMol/Descriptors/Wrap/test3D.py | 9 +- .../Descriptors/Wrap/testMolDescriptors.py | 11 +- Code/GraphMol/Descriptors/test3D.py | 88 +- Code/GraphMol/Descriptors/test3D_old.py | 165 +- Code/GraphMol/Descriptors/test_data/pmi.py | 23 +- .../DetermineBonds/Wrap/testDetermineBonds.py | 10 +- .../DistGeomHelpers/Wrap/testDistGeom.py | 38 +- Code/GraphMol/FMCS/Wrap/testFMCS.py | 1960 ++++++++--------- Code/GraphMol/FileParsers/mol.py | 1 - .../GraphMol/FilterCatalog/Wrap/rough_test.py | 175 +- Code/GraphMol/FilterCatalog/update_pains.py | 49 +- .../Fingerprints/Wrap/testGenerators.py | 7 +- Code/GraphMol/Fingerprints/Wrap/testMHFP.py | 3 +- .../ForceFieldHelpers/Wrap/testHelpers.py | 59 +- Code/GraphMol/FragCatalog/Wrap/rough_test.py | 8 +- Code/GraphMol/MMPA/Wrap/testMMPA.py | 52 +- Code/GraphMol/MolAlign/Wrap/testMolAlign.py | 28 +- Code/GraphMol/MolCatalog/Wrap/rough_test.py | 9 +- .../Wrap/testChemicalFeatures.py | 23 +- .../MolDraw2D/Qt/Wrap/testMolDraw2DQt.py | 7 +- Code/GraphMol/MolDraw2D/Wrap/testMolDraw2D.py | 13 +- .../GraphMol/MolDraw2D/side_by_side_images.py | 32 +- .../MolDraw2D/test_dir/test_rdkit_draw.py | 9 +- Code/GraphMol/MolDraw2D/update_hash_codes.py | 44 +- .../GraphMol/MolEnumerator/Wrap/rough_test.py | 6 +- Code/GraphMol/MolHash/Wrap/testMolHash.py | 7 +- .../MolInterchange/Wrap/testMolInterchange.py | 53 +- .../MolStandardize/Wrap/testMolStandardize.py | 23 +- .../MolTransforms/Wrap/testMolTransforms.py | 61 +- .../PartialCharges/Wrap/testPartialCharges.py | 218 +- .../RGroupDecomposition/Wrap/test_rgroups.py | 20 +- .../ReducedGraphs/Wrap/testReducedGraphs.py | 11 +- Code/GraphMol/SLNParse/Wrap/testSLN.py | 24 +- .../Wrap/testPickleScaffoldNetwork.py | 4 +- .../Wrap/testScaffoldNetwork.py | 4 +- .../ShapeHelpers/Wrap/testShapeHelpers.py | 147 +- .../GraphMol/StructChecker/Wrap/rough_test.py | 3 +- Code/GraphMol/Substruct/UnitTestSubstruct.py | 75 +- .../SubstructLibrary/Wrap/rough_test.py | 20 +- .../GraphMol/TautomerQuery/Wrap/rough_test.py | 7 +- Code/GraphMol/UnitTestQueryMol.py | 4 +- Code/GraphMol/Wrap/rough_test.py | 18 +- Code/GraphMol/Wrap/testConformer.py | 8 +- .../Wrap/testMultithreadedMolSupplier.py | 263 +-- Code/GraphMol/Wrap/testPropertyLists.py | 133 +- Code/GraphMol/Wrap/testSGroups.py | 6 +- Code/GraphMol/Wrap/testThreads.py | 39 +- Code/GraphMol/Wrap/testTrajectory.py | 13 +- Code/GraphMol/Wrap/test_cdxml.py | 9 +- .../GraphMol/Wrap/test_data/do_smiles.bomb.py | 3 +- Code/GraphMol/Wrap/test_data/do_smiles.py | 2 +- Code/JavaWrappers/make_templates.py | 2 +- Code/JavaWrappers/parse_doxy_html.py | 8 +- Code/ML/InfoTheory/Wrap/testRanker.py | 11 +- Code/MinimalLib/simple.py | 1 + Code/Numerics/Alignment/Wrap/testAlignment.py | 74 +- Code/SimDivPickers/Wrap/testMaxMin.py | 5 +- Code/SimDivPickers/Wrap/testPickers.py | 32 +- Code/cmake/Modules/fixup_coverage.py | 4 +- .../AtomAtomPathSimilarity.py | 70 +- Contrib/CalcLigRMSD/CalcLigRMSD.py | 84 +- Contrib/ChiralPairs/ChiralDescriptors.py | 542 ++--- .../Wrap/testConformerParser.py | 9 +- Contrib/ConformerParser/Wrap/test_list.py | 5 +- Contrib/ConformerParser/test_list.py | 5 +- Contrib/Fastcluster/fastcluster.py | 82 +- Contrib/Fastcluster/testdata/sdf2smi.py | 6 +- Contrib/FreeWilson/freewilson.py | 780 +++---- Contrib/FreeWilson/setup.py | 16 +- Contrib/FreeWilson/test/test_freewilson.py | 11 +- Contrib/Glare/glare.py | 637 +++--- Contrib/IFG/ifg.py | 134 +- Contrib/LEF/AddLabels.py | 19 +- Contrib/LEF/ClusterFps.py | 20 +- Contrib/LEF/CreateFps.py | 33 +- Contrib/LEF/DistancePlot.py | 19 +- Contrib/LEF/DistancePredict.py | 24 +- Contrib/M_Kossner/Frames.py | 21 +- Contrib/MolVS/molvs_cli.py | 125 +- .../assignSubstructureFilters.py | 238 +- Contrib/NP_Score/npscorer.py | 9 +- Contrib/PBF/pbf.py | 5 +- .../RxnRoleAssignment/identifyReactants.py | 769 +++---- Contrib/RxnRoleAssignment/utils.py | 96 +- Contrib/SA_Score/UnitTestSAScore.py | 12 +- Contrib/SA_Score/sascorer.py | 180 +- Contrib/fraggle/atomcontrib.py | 17 +- Contrib/fraggle/cxn_tversky.py | 15 +- Contrib/fraggle/fraggle.py | 6 +- Contrib/fraggle/rdkit_tversky.py | 11 +- Contrib/mmpa/cansmirk.py | 9 +- Contrib/mmpa/create_mmp_db.py | 15 +- Contrib/mmpa/indexing.py | 51 +- Contrib/mmpa/mol_transform.py | 3 +- Contrib/mmpa/rfrag.py | 13 +- Contrib/mmpa/search_mmp_db.py | 18 +- Contrib/mmpa/test_list.py | 5 +- Contrib/mmpa/test_rfrag.py | 10 +- Contrib/pzc/p_con.py | 98 +- Data/DTDs/validate.py | 2 +- Data/Fonts/font_dumper.py | 36 +- Data/Pains/test_data/run_tests.py | 4 +- Data/SmartsLib/tests/bench2.py | 10 +- Docs/Book/conf.py | 7 +- Docs/Book/data/test_multi_colours.py | 97 +- Docs/Book/exts/extapi.py | 1 + Docs/Book_jp/conf.py | 43 +- External/AvalonTools/Wrap/testAvalonTools.py | 26 +- External/AvalonTools/Wrap/test_list.py | 5 +- External/AvalonTools/test_list.py | 5 +- External/CoordGen/Wrap/testCoordGen.py | 58 +- External/FreeSASA/Wrap/testFreeSASA.py | 597 +++-- External/INCHI-API/python/inchi.py | 35 +- External/INCHI-API/python/noinchi.py | 14 +- External/YAeHMOP/Wrap/testEHTTools.py | 4 +- External/pymol/modules/pymol/rpc.py | 40 +- Projects/DbCLI/CreateDb.py | 160 +- Projects/DbCLI/SearchDb.py | 151 +- Projects/DbCLI/TestDbCLI.py | 141 +- Projects/DbCLI/test_list.py | 5 +- Projects/test_list.py | 7 +- Regress/Scripts/chiral_embed.py | 4 +- Regress/Scripts/fingerprint_screenout.py | 90 +- Regress/Scripts/new_timings.py | 300 +-- Regress/Scripts/timings.py | 373 ++-- Scripts/FeatFinderCLI.py | 1 + Scripts/PythonFormat.py | 9 +- Scripts/run_python_tests.py | 23 +- Web/RDExtras/MolDepict.py | 6 +- Web/RDExtras/MolImage.py | 9 +- build_support/pkg_version.py | 15 +- rdkit/Chem/AllChem.py | 16 +- rdkit/Chem/AtomPairs/Pairs.py | 18 +- rdkit/Chem/AtomPairs/Sheridan.py | 10 +- rdkit/Chem/AtomPairs/Torsions.py | 11 +- rdkit/Chem/AtomPairs/UnitTestDescriptors.py | 14 +- rdkit/Chem/AtomPairs/Utils.py | 199 +- rdkit/Chem/AtomPairs/test_list.py | 1 + rdkit/Chem/BRICS.py | 1095 ++++----- rdkit/Chem/BuildFragmentCatalog.py | 860 ++++---- rdkit/Chem/ChemUtils/AlignDepict.py | 9 +- rdkit/Chem/ChemUtils/BulkTester.py | 1 + rdkit/Chem/ChemUtils/DescriptorUtilities.py | 65 +- rdkit/Chem/ChemUtils/SDFToCSV.py | 10 +- rdkit/Chem/ChemUtils/TemplateExpand.py | 14 +- rdkit/Chem/ChemUtils/UnitTestAlignDepict.py | 75 +- rdkit/Chem/ChemUtils/UnitTestSDFToCSV.py | 131 +- rdkit/Chem/ChemUtils/test_list.py | 1 + rdkit/Chem/Crippen.py | 7 +- rdkit/Chem/DSViewer.py | 9 +- rdkit/Chem/Descriptors.py | 46 +- rdkit/Chem/Descriptors3D.py | 1 - rdkit/Chem/Draw/IPythonConsole.py | 9 +- rdkit/Chem/Draw/InteractiveRenderer.py | 47 +- rdkit/Chem/Draw/MolDrawing.py | 54 +- rdkit/Chem/Draw/SimilarityMaps.py | 41 +- rdkit/Chem/Draw/UnitTestDraw.py | 11 +- rdkit/Chem/Draw/UnitTestIPython.py | 9 +- rdkit/Chem/Draw/UnitTestSimilarityMaps.py | 7 +- rdkit/Chem/Draw/__init__.py | 8 +- rdkit/Chem/Draw/aggCanvas.py | 15 +- rdkit/Chem/Draw/cairoCanvas.py | 3 +- rdkit/Chem/Draw/qtCanvas.py | 15 +- rdkit/Chem/Draw/test_list.py | 1 + rdkit/Chem/EState/AtomTypes.py | 3 +- rdkit/Chem/EState/EState.py | 5 +- rdkit/Chem/EState/EState_VSA.py | 2 + rdkit/Chem/EState/Fingerprinter.py | 4 +- rdkit/Chem/EState/UnitTestEState.py | 193 +- rdkit/Chem/EState/UnitTestFingerprints.py | 64 +- rdkit/Chem/EState/UnitTestTypes.py | 9 +- rdkit/Chem/EState/UnitTestVSA.py | 1 - rdkit/Chem/EState/__init__.py | 2 +- rdkit/Chem/EState/test_list.py | 1 + rdkit/Chem/EnumerateHeterocycles.py | 363 ++- rdkit/Chem/EnumerateStereoisomers.py | 4 +- rdkit/Chem/FastSDMolSupplier.py | 3 +- rdkit/Chem/FeatFinderCLI.py | 6 +- rdkit/Chem/FeatMaps/FeatMapParser.py | 8 +- rdkit/Chem/FeatMaps/FeatMapPoint.py | 2 +- rdkit/Chem/FeatMaps/FeatMapUtils.py | 10 +- rdkit/Chem/FeatMaps/FeatMaps.py | 16 +- rdkit/Chem/FeatMaps/UnitTestFeatMap.py | 109 +- rdkit/Chem/FeatMaps/UnitTestFeatMapParser.py | 145 +- rdkit/Chem/FeatMaps/UnitTestFeatMapUtils.py | 2 +- rdkit/Chem/FeatMaps/test_list.py | 1 + rdkit/Chem/Features/FeatDirUtilsRD.py | 41 +- rdkit/Chem/Features/ShowFeats.py | 88 +- rdkit/Chem/Features/UnitTestFeatDirUtilsRD.py | 262 ++- rdkit/Chem/Features/test_list.py | 13 +- rdkit/Chem/Fingerprints/ClusterMols.py | 8 +- rdkit/Chem/Fingerprints/DbFpSupplier.py | 123 +- rdkit/Chem/Fingerprints/FingerprintMols.py | 91 +- rdkit/Chem/Fingerprints/MolSimilarity.py | 17 +- rdkit/Chem/Fingerprints/SimilarityScreener.py | 169 +- .../Chem/Fingerprints/UnitTestDbFpSupplier.py | 1 - .../Chem/Fingerprints/UnitTestFingerprints.py | 15 +- .../Chem/Fingerprints/UnitTestSimScreener.py | 3 +- rdkit/Chem/Fingerprints/test_list.py | 1 + rdkit/Chem/Fraggle/FraggleSim.py | 8 +- rdkit/Chem/Fraggle/UnitTestFraggle.py | 83 +- rdkit/Chem/Fraggle/test_list.py | 5 +- rdkit/Chem/FragmentMatcher.py | 3 +- rdkit/Chem/Fragments.py | 7 +- rdkit/Chem/FunctionalGroups.py | 208 +- rdkit/Chem/GraphDescriptors.py | 54 +- rdkit/Chem/Graphs.py | 43 +- rdkit/Chem/MACCSkeys.py | 7 +- rdkit/Chem/MCS.py | 53 +- rdkit/Chem/MolDb/FingerprintUtils.py | 150 +- rdkit/Chem/MolDb/Loader_orig.py | 12 +- rdkit/Chem/MolDb/Loader_sa.py | 8 +- rdkit/Chem/MolKey/InchiInfo.py | 34 +- rdkit/Chem/MolKey/MolKey.py | 27 +- rdkit/Chem/MolKey/UnitTestMolKey.py | 22 +- rdkit/Chem/MolKey/test_list.py | 1 + .../MolStandardize/UnitTestMolStandardize.py | 29 +- rdkit/Chem/MolStandardize/UnitTestMolVS.py | 341 +-- rdkit/Chem/MolStandardize/__init__.py | 25 +- rdkit/Chem/MolStandardize/charge.py | 422 ++-- rdkit/Chem/MolStandardize/errors.py | 10 +- rdkit/Chem/MolStandardize/fragment.py | 300 ++- rdkit/Chem/MolStandardize/metal.py | 77 +- rdkit/Chem/MolStandardize/normalize.py | 235 +- rdkit/Chem/MolStandardize/resonance.py | 79 +- rdkit/Chem/MolStandardize/standardize.py | 308 ++- rdkit/Chem/MolStandardize/tautomer.py | 469 ++-- rdkit/Chem/MolStandardize/test_list.py | 13 +- .../Chem/MolStandardize/test_standardizer.py | 37 +- rdkit/Chem/MolStandardize/utils.py | 26 +- rdkit/Chem/MolStandardize/validate.py | 117 +- rdkit/Chem/MolStandardize/validations.py | 178 +- rdkit/Chem/MolSurf.py | 28 +- rdkit/Chem/PandasPatcher.py | 41 +- rdkit/Chem/PandasTools.py | 14 +- rdkit/Chem/Pharm2D/Generate.py | 4 +- rdkit/Chem/Pharm2D/LazyGenerator.py | 15 +- rdkit/Chem/Pharm2D/Matcher.py | 3 +- rdkit/Chem/Pharm2D/SigFactory.py | 391 ++-- rdkit/Chem/Pharm2D/UnitTestGobbi.py | 302 +-- rdkit/Chem/Pharm2D/UnitTestMatcher.py | 119 +- rdkit/Chem/Pharm2D/UnitTestSignature.py | 45 +- rdkit/Chem/Pharm2D/UnitTestUtils.py | 4 +- rdkit/Chem/Pharm2D/Utils.py | 481 ++-- rdkit/Chem/Pharm2D/__init__.py | 10 +- rdkit/Chem/Pharm2D/test_list.py | 1 + rdkit/Chem/Pharm3D/EmbedLib.py | 43 +- rdkit/Chem/Pharm3D/ExcludedVolume.py | 6 +- rdkit/Chem/Pharm3D/Pharmacophore.py | 4 +- rdkit/Chem/Pharm3D/UnitTestExcludedVolume.py | 9 +- rdkit/Chem/Pharm3D/UnitTestPharmacophore.py | 20 +- rdkit/Chem/Pharm3D/test_list.py | 1 + rdkit/Chem/PropertyMol.py | 58 +- rdkit/Chem/PyMol.py | 5 +- rdkit/Chem/QED.py | 211 +- rdkit/Chem/Randomize.py | 90 +- rdkit/Chem/Recap.py | 896 ++++---- rdkit/Chem/ReducedGraphs.py | 1 + rdkit/Chem/RegistrationHash.py | 3 +- rdkit/Chem/SATIS.py | 1 - rdkit/Chem/SaltRemover.py | 206 +- rdkit/Chem/Scaffolds/MurckoScaffold.py | 18 +- .../Chem/Scaffolds/UnitTestMurckoScaffold.py | 17 +- rdkit/Chem/Scaffolds/test_list.py | 1 + rdkit/Chem/ShowMols.py | 3 +- rdkit/Chem/SimpleEnum/Enumerator.py | 12 +- rdkit/Chem/SimpleEnum/UnitTestEnumerator.py | 8 +- rdkit/Chem/SimpleEnum/test_list.py | 5 +- rdkit/Chem/Subshape/BuilderUtils.py | 1 - rdkit/Chem/Subshape/SubshapeAligner.py | 21 +- rdkit/Chem/Subshape/SubshapeBuilder.py | 11 +- rdkit/Chem/Subshape/SubshapeObjects.py | 1 + rdkit/Chem/Subshape/UnitTestSubshape.py | 129 +- rdkit/Chem/Subshape/testCombined.py | 13 +- rdkit/Chem/Subshape/test_list.py | 5 +- rdkit/Chem/Suppliers/DbMolSupplier.py | 9 +- rdkit/Chem/Suppliers/UnitTestDbMolSupplier.py | 107 +- rdkit/Chem/Suppliers/UnitTestSDMolSupplier.py | 129 +- .../Suppliers/UnitTestSmilesMolSupplier.py | 140 +- rdkit/Chem/Suppliers/test_list.py | 1 + rdkit/Chem/TemplateAlign.py | 19 +- rdkit/Chem/TorsionFingerprints.py | 19 +- rdkit/Chem/UnitTestCatalog.py | 99 +- rdkit/Chem/UnitTestChem.py | 7 +- rdkit/Chem/UnitTestChemAtom.py | 1 + rdkit/Chem/UnitTestChemBond.py | 1 + rdkit/Chem/UnitTestChemSmarts.py | 6 +- rdkit/Chem/UnitTestChemv2.py | 7 +- rdkit/Chem/UnitTestCrippen.py | 9 +- rdkit/Chem/UnitTestDescriptors.py | 18 +- rdkit/Chem/UnitTestDocTestsChem.py | 45 +- rdkit/Chem/UnitTestEnumerateHeterocycles.py | 502 ++--- rdkit/Chem/UnitTestFeatFinderCLI.py | 106 +- rdkit/Chem/UnitTestFragmentDescriptors.py | 252 ++- rdkit/Chem/UnitTestFunctionalGroups.py | 4 +- rdkit/Chem/UnitTestGraphDescriptors_2.py | 130 +- rdkit/Chem/UnitTestInchi.py | 18 +- rdkit/Chem/UnitTestLipinski.py | 6 +- rdkit/Chem/UnitTestMCS.py | 37 +- rdkit/Chem/UnitTestMol3D.py | 12 +- rdkit/Chem/UnitTestOldBugs.py | 11 +- rdkit/Chem/UnitTestPandasTools.py | 34 +- rdkit/Chem/UnitTestQED.py | 21 +- rdkit/Chem/UnitTestRegistrationHash.py | 4 +- rdkit/Chem/UnitTestSATIS.py | 21 +- rdkit/Chem/UnitTestSaltRemover.py | 5 +- rdkit/Chem/UnitTestSmiles.py | 152 +- rdkit/Chem/UnitTestSuppliers.py | 128 +- rdkit/Chem/UnitTestSurf.py | 10 +- rdkit/Chem/__init__.py | 16 +- rdkit/Chem/fmcs/fmcs | 5 +- rdkit/Chem/fmcs/fmcs.py | 192 +- rdkit/Chem/test_data/BuildCrippenTestSet.py | 6 +- .../test_data/BuildDescrsTestSet.Crippen.py | 19 +- rdkit/Chem/test_data/BuildDescrsTestSet.py | 22 +- rdkit/Chem/test_list.py | 1 + rdkit/DataStructs/BitUtils.py | 2 +- rdkit/DataStructs/LazySignature.py | 2 +- rdkit/DataStructs/UnitTestBitEnsemble.py | 23 +- rdkit/DataStructs/UnitTestDocTests.py | 8 +- rdkit/DataStructs/UnitTestTopNContainer.py | 139 +- rdkit/DataStructs/UnitTestcBitVect.py | 3 +- rdkit/DataStructs/VectCollection.py | 264 +-- rdkit/DataStructs/__init__.py | 22 +- rdkit/DataStructs/test_list.py | 1 + rdkit/Dbase/DbConnection.py | 3 +- rdkit/Dbase/DbInfo.py | 196 +- rdkit/Dbase/DbModule.py | 73 +- rdkit/Dbase/DbReport.py | 27 +- rdkit/Dbase/DbResultSet.py | 1 + rdkit/Dbase/DbUtils.py | 541 +++-- rdkit/Dbase/StorageUtils.py | 5 +- rdkit/Dbase/UnitTestDbUtils.py | 9 +- rdkit/Dbase/UnitTestStorageUtils.py | 2 +- rdkit/Dbase/test_list.py | 1 + rdkit/ML/AnalyzeComposite.py | 12 +- rdkit/ML/BuildComposite.py | 130 +- rdkit/ML/Cluster/Butina.py | 2 + rdkit/ML/Cluster/ClusterVis.py | 22 +- rdkit/ML/Cluster/Clusters.py | 348 +-- rdkit/ML/Cluster/Murtagh.py | 63 +- rdkit/ML/Cluster/Resemblance.py | 5 +- rdkit/ML/Cluster/UnitTestButina.py | 168 +- rdkit/ML/Cluster/UnitTestCluster.py | 239 +- rdkit/ML/Cluster/murtagh_test.py | 2 - rdkit/ML/Cluster/test_list.py | 1 + rdkit/ML/Composite/AdjustComposite.py | 1 - rdkit/ML/Composite/BayesComposite.py | 1 - rdkit/ML/Composite/Composite.py | 8 +- rdkit/ML/Composite/UnitTestCOMServer.py | 18 +- rdkit/ML/Composite/UnitTestComposite.py | 10 +- rdkit/ML/Composite/test_list.py | 5 +- rdkit/ML/CompositeRun.py | 68 +- rdkit/ML/Data/DataUtils.py | 689 +++--- rdkit/ML/Data/FindQuantBounds.py | 4 +- rdkit/ML/Data/MLData.py | 364 ++- rdkit/ML/Data/Quantize.py | 377 ++-- rdkit/ML/Data/SplitData.py | 3 +- rdkit/ML/Data/Stats.py | 102 +- rdkit/ML/Data/UnitTestDoctests.py | 60 +- rdkit/ML/Data/UnitTestMLData.py | 259 ++- rdkit/ML/Data/UnitTestQuantize.py | 505 ++--- rdkit/ML/Data/UnitTestStats.py | 84 +- rdkit/ML/Data/test_data/populate.py | 3 +- rdkit/ML/Data/test_list.py | 1 + rdkit/ML/DecTree/BuildQuantTree.py | 531 ++--- rdkit/ML/DecTree/BuildSigTree.py | 1 - rdkit/ML/DecTree/CrossValidate.py | 4 +- rdkit/ML/DecTree/Forest.py | 2 +- rdkit/ML/DecTree/PruneTree.py | 374 ++-- rdkit/ML/DecTree/SigTree.py | 5 +- rdkit/ML/DecTree/Tree.py | 1 - rdkit/ML/DecTree/UnitTestID3.py | 63 +- rdkit/ML/DecTree/UnitTestPrune.py | 106 +- rdkit/ML/DecTree/UnitTestQuantTree.py | 352 ++- rdkit/ML/DecTree/UnitTestSigTree.py | 251 ++- rdkit/ML/DecTree/UnitTestTree.py | 203 +- rdkit/ML/DecTree/UnitTestTreeUtils.py | 1 - rdkit/ML/DecTree/UnitTestXVal.py | 114 +- rdkit/ML/DecTree/test_list.py | 1 + rdkit/ML/Descriptors/CompoundDescriptors.py | 3 +- rdkit/ML/Descriptors/MoleculeDescriptors.py | 2 +- rdkit/ML/Descriptors/Parser.py | 11 +- rdkit/ML/Descriptors/UnitTestCOMServer.py | 10 +- rdkit/ML/Descriptors/UnitTestDescriptors.py | 61 +- .../ML/Descriptors/UnitTestMolDescriptors.py | 126 +- rdkit/ML/Descriptors/UnitTestParser.py | 9 +- rdkit/ML/Descriptors/test_list.py | 1 + rdkit/ML/EnrichPlot.py | 586 +++-- rdkit/ML/GrowComposite.py | 29 +- rdkit/ML/InfoTheory/UnitTestBitRanker.py | 205 +- rdkit/ML/InfoTheory/UnitTestCorrMatGen.py | 208 +- rdkit/ML/InfoTheory/entropy.py | 2 +- rdkit/ML/InfoTheory/test_list.py | 9 +- rdkit/ML/KNN/CrossValidate.py | 1 - rdkit/ML/KNN/DistFunctions.py | 2 +- rdkit/ML/KNN/UnitTestKNN.py | 7 +- rdkit/ML/KNN/test_list.py | 1 + rdkit/ML/MLUtils/VoteImg.py | 5 +- rdkit/ML/MatOps.py | 2 - rdkit/ML/ModelPackage/PackageUtils.py | 216 +- rdkit/ML/ModelPackage/UnitTestPackage.py | 263 +-- rdkit/ML/ModelPackage/test_list.py | 5 +- rdkit/ML/NaiveBayes/ClassificationModel.py | 405 ++-- rdkit/ML/NaiveBayes/CrossValidate.py | 1 - rdkit/ML/NaiveBayes/test_list.py | 5 +- rdkit/ML/Neural/CrossValidate.py | 5 +- rdkit/ML/Neural/NetNode.py | 1 + rdkit/ML/Neural/Network.py | 5 +- rdkit/ML/Neural/Trainers.py | 2 +- rdkit/ML/Neural/UnitTestOther.py | 1 - rdkit/ML/Neural/UnitTestTrainer.py | 105 +- rdkit/ML/Neural/test_list.py | 7 +- rdkit/ML/SLT/UnitTestRisk.py | 6 +- rdkit/ML/SLT/test_list.py | 5 +- rdkit/ML/Scoring/Scoring.py | 2 + rdkit/ML/Scoring/test_list.py | 5 +- rdkit/ML/ScreenComposite.py | 64 +- rdkit/ML/UnitTestAnalyzeComposite.py | 66 +- rdkit/ML/UnitTestBuildComposite.py | 296 +-- rdkit/ML/UnitTestScreenComposite.py | 596 ++--- rdkit/ML/files.py | 1 - rdkit/ML/test_data/populate.py | 3 +- rdkit/ML/test_list.py | 1 + rdkit/RDConfig.py | 6 +- rdkit/RDLogger.py | 6 +- rdkit/RDRandom.py | 40 +- rdkit/SimDivFilters/SimilarityPickers.py | 3 +- .../UnitTestSimilarityPickers.py | 4 +- rdkit/SimDivFilters/__init__.py | 1 + rdkit/SimDivFilters/test_data/genfps.py | 24 +- rdkit/SimDivFilters/test_list.py | 5 +- rdkit/VLib/Filter.py | 76 +- rdkit/VLib/Node.py | 138 +- rdkit/VLib/NodeLib/DbMolSupply.py | 12 +- rdkit/VLib/NodeLib/DbPickleSupplier.py | 11 +- rdkit/VLib/NodeLib/SDSupply.py | 32 +- rdkit/VLib/NodeLib/SmartsMolFilter.py | 2 +- rdkit/VLib/NodeLib/SmartsRemover.py | 2 +- rdkit/VLib/NodeLib/SmilesDupeFilter.py | 2 +- rdkit/VLib/NodeLib/SmilesOutput.py | 66 +- rdkit/VLib/NodeLib/SmilesSupply.py | 42 +- rdkit/VLib/NodeLib/UnitTestNodeLib.py | 129 +- rdkit/VLib/NodeLib/demo.py | 11 +- rdkit/VLib/NodeLib/test_list.py | 1 + rdkit/VLib/Output.py | 44 +- rdkit/VLib/Supply.py | 48 +- rdkit/VLib/Transform.py | 46 +- rdkit/VLib/UnitTestVLib.py | 221 +- rdkit/VLib/test_list.py | 5 +- rdkit/six.py | 616 +++--- rdkit/sping/PDF/pdfdoc.py | 17 +- rdkit/sping/PDF/pdfgen.py | 37 +- rdkit/sping/PDF/pdfgeom.py | 2 +- rdkit/sping/PDF/pdfmetrics.py | 354 +-- rdkit/sping/PDF/pdfutils.py | 455 ++-- rdkit/sping/PDF/pidPDF.py | 833 ++++--- rdkit/sping/PIL/pidPIL.py | 671 +++--- rdkit/sping/PIL/pilfonts/removemedium.py | 1 - rdkit/sping/PS/latin1MetricsCache.py | 310 +-- rdkit/sping/PS/pidPS.py | 114 +- rdkit/sping/PS/psmetrics.py | 411 ++-- rdkit/sping/Pyart/Fontmapping.py | 60 +- rdkit/sping/Pyart/pidPyart.py | 21 +- rdkit/sping/Qt/pidQt.py | 22 +- rdkit/sping/Qt/pidQt4.py | 20 +- rdkit/sping/ReportLab/pidReportLab.py | 24 +- rdkit/sping/SVG/pidSVG.py | 9 +- rdkit/sping/TK/pidTK.py | 23 +- rdkit/sping/WX/pidWX.py | 4 +- rdkit/sping/WX/pidWxDc.py | 2 + rdkit/sping/colors.py | 106 +- rdkit/sping/examples/formatted-strings.py | 1 - .../examples/tkCanvas-with-scrollbars.py | 7 +- .../examples/tkCanvasPIL-with-scrollbars.py | 11 +- rdkit/sping/pid.py | 608 ++--- rdkit/sping/stringformat.py | 26 +- rdkit/sping/test_list.py | 6 +- rdkit/sping/tests/pidtest.py | 631 +++--- rdkit/sping/tests/pstests.py | 2 +- rdkit/sping/tests/test_list.py | 2 +- rdkit/sping/tests/testallps.py | 5 + rdkit/sping/util/HTMLPiddler.py | 497 ++--- rdkit/sping/utils.py | 12 +- rdkit/test_list.py | 1 + rdkit/utils/UnitTestUtils.py | 32 +- rdkit/utils/cactvs.py | 8 +- rdkit/utils/chemdraw.py | 11 +- rdkit/utils/chemdraw_qax.py | 14 +- rdkit/utils/chemutils.py | 160 +- rdkit/utils/comhack.py | 1 + rdkit/utils/listutils.py | 2 +- rdkit/utils/spiral.py | 4 +- rdkit/utils/test_list.py | 5 +- 523 files changed, 22586 insertions(+), 21694 deletions(-) diff --git a/Code/ChemicalFeatures/Wrap/testFeatures.py b/Code/ChemicalFeatures/Wrap/testFeatures.py index 12fc702ac..2f0fbfe68 100644 --- a/Code/ChemicalFeatures/Wrap/testFeatures.py +++ b/Code/ChemicalFeatures/Wrap/testFeatures.py @@ -5,12 +5,13 @@ # @@ All Rights Reserved @@ # -import os, sys import io -import unittest +import os import pickle -from rdkit import RDConfig -from rdkit import Chem +import sys +import unittest + +from rdkit import Chem, RDConfig from rdkit.Chem import ChemicalFeatures from rdkit.Geometry import rdGeometry as geom @@ -51,8 +52,8 @@ class TestCase(unittest.TestCase): ffeat.SetType("HBondDonor1") self.assertTrue(ffeat.GetType() == "HBondDonor1") - ffeat = ChemicalFeatures.FreeChemicalFeature("HBondDonor", "HBondDonor1", geom.Point3D(1.0, 2.0, - 3.0)) + ffeat = ChemicalFeatures.FreeChemicalFeature("HBondDonor", "HBondDonor1", + geom.Point3D(1.0, 2.0, 3.0)) self.assertTrue(ffeat.GetId() == -1) self.assertTrue(ffeat.GetFamily() == "HBondDonor") self.assertTrue(ffeat.GetType() == "HBondDonor1") @@ -85,8 +86,8 @@ class TestCase(unittest.TestCase): self.assertTrue(ptFeq(ffeat2.GetPos(), ffeat.GetPos())) # Check that the old pickled versions have not been broken - inTF = open( - os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/feat.pkl'), 'r') + inTF = open(os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/feat.pkl'), + 'r') buf = inTF.read().replace('\r\n', '\n').encode('utf-8') inTF.close() inF = io.BytesIO(buf) @@ -101,8 +102,8 @@ class TestCase(unittest.TestCase): # uncomment the following to generate (overwrite) new version of pickled # data file #pickle.dump(ffeat,file(os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'),'wb+')) - inTF = open( - os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'), 'r') + inTF = open(os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'), + 'r') buf = inTF.read().replace('\r\n', '\n').encode('utf-8') inTF.close() inF = io.BytesIO(buf) diff --git a/Code/DataManip/MetricMatrixCalc/Wrap/testMatricCalc.py b/Code/DataManip/MetricMatrixCalc/Wrap/testMatricCalc.py index ea311e6ef..312582a41 100755 --- a/Code/DataManip/MetricMatrixCalc/Wrap/testMatricCalc.py +++ b/Code/DataManip/MetricMatrixCalc/Wrap/testMatricCalc.py @@ -1,152 +1,158 @@ - -from rdkit import RDConfig -import unittest -from rdkit.DataManip.Metric import rdMetricMatrixCalc as rdmmc -import numpy import random -from rdkit import DataStructs +import unittest + +import numpy + +from rdkit import DataStructs, RDConfig +from rdkit.DataManip.Metric import rdMetricMatrixCalc as rdmmc def feq(v1, v2, tol2=1e-4): - return abs(v1 - v2) <= tol2 + return abs(v1 - v2) <= tol2 class TestCase(unittest.TestCase): - def setUp(self): - pass + def setUp(self): + pass - def test0DistsArray(self): - exp = numpy.array([1., 1.414213, 1.0], 'd') + def test0DistsArray(self): + exp = numpy.array([1., 1.414213, 1.0], 'd') - # initialize a double array and check if get back the expected distances - desc = numpy.zeros((3, 2), 'd') - desc[1, 0] = 1.0 - desc[2, 0] = 1.0 - desc[2, 1] = 1.0 + # initialize a double array and check if get back the expected distances + desc = numpy.zeros((3, 2), 'd') + desc[1, 0] = 1.0 + desc[2, 0] = 1.0 + desc[2, 1] = 1.0 - dmat = rdmmc.GetEuclideanDistMat(desc) - for i in range(numpy.shape(dmat)[0]): - assert feq(dmat[i], exp[i]) + dmat = rdmmc.GetEuclideanDistMat(desc) + for i in range(numpy.shape(dmat)[0]): + assert feq(dmat[i], exp[i]) - # repeat with an float array - desc = numpy.zeros((3, 2), 'f') - desc[1, 0] = 1.0 - desc[2, 0] = 1.0 - desc[2, 1] = 1.0 + # repeat with an float array + desc = numpy.zeros((3, 2), 'f') + desc[1, 0] = 1.0 + desc[2, 0] = 1.0 + desc[2, 1] = 1.0 - dmat = rdmmc.GetEuclideanDistMat(desc) - for i in range(numpy.shape(dmat)[0]): - assert feq(dmat[i], exp[i]) + dmat = rdmmc.GetEuclideanDistMat(desc) + for i in range(numpy.shape(dmat)[0]): + assert feq(dmat[i], exp[i]) - # finally with an integer array - desc = numpy.zeros((3, 2), 'i') - desc[1, 0] = 1 - desc[2, 0] = 1 - desc[2, 1] = 1 + # finally with an integer array + desc = numpy.zeros((3, 2), 'i') + desc[1, 0] = 1 + desc[2, 0] = 1 + desc[2, 1] = 1 - dmat = rdmmc.GetEuclideanDistMat(desc) - for i in range(numpy.shape(dmat)[0]): - assert feq(dmat[i], exp[i]) + dmat = rdmmc.GetEuclideanDistMat(desc) + for i in range(numpy.shape(dmat)[0]): + assert feq(dmat[i], exp[i]) - def ctest1DistsListArray(self): - exp = numpy.array([1., 1.414213, 1.0], 'd') + def ctest1DistsListArray(self): + exp = numpy.array([1., 1.414213, 1.0], 'd') - desc = [numpy.array([0.0, 0.0], 'd'), numpy.array([1.0, 0.0], 'd'), - numpy.array([1.0, 1.0], 'd')] - dmat = rdmmc.GetEuclideanDistMat(desc) + desc = [ + numpy.array([0.0, 0.0], 'd'), + numpy.array([1.0, 0.0], 'd'), + numpy.array([1.0, 1.0], 'd') + ] + dmat = rdmmc.GetEuclideanDistMat(desc) - for i in range(numpy.shape(dmat)[0]): - assert feq(dmat[i], exp[i]) + for i in range(numpy.shape(dmat)[0]): + assert feq(dmat[i], exp[i]) - # repeat the test with a list of numpy.arrays of floats - desc = [numpy.array([0.0, 0.0], 'f'), numpy.array([1.0, 0.0], 'f'), - numpy.array([1.0, 1.0], 'f')] - dmat = rdmmc.GetEuclideanDistMat(desc) - for i in range(numpy.shape(dmat)[0]): - assert feq(dmat[i], exp[i]) + # repeat the test with a list of numpy.arrays of floats + desc = [ + numpy.array([0.0, 0.0], 'f'), + numpy.array([1.0, 0.0], 'f'), + numpy.array([1.0, 1.0], 'f') + ] + dmat = rdmmc.GetEuclideanDistMat(desc) + for i in range(numpy.shape(dmat)[0]): + assert feq(dmat[i], exp[i]) - # repeat the test with a list of numpy.arrays of ints - desc = [numpy.array([0, 0], 'i'), numpy.array([1, 0], 'i'), numpy.array([1, 1], 'i')] - dmat = rdmmc.GetEuclideanDistMat(desc) - for i in range(numpy.shape(dmat)[0]): - assert feq(dmat[i], exp[i]) + # repeat the test with a list of numpy.arrays of ints + desc = [numpy.array([0, 0], 'i'), numpy.array([1, 0], 'i'), numpy.array([1, 1], 'i')] + dmat = rdmmc.GetEuclideanDistMat(desc) + for i in range(numpy.shape(dmat)[0]): + assert feq(dmat[i], exp[i]) - def test2DistListList(self): - exp = numpy.array([1., 1.414213, 1.0], 'd') + def test2DistListList(self): + exp = numpy.array([1., 1.414213, 1.0], 'd') - desc = [[0.0, 0.0], [1.0, 0.0], [1.0, 1.0]] - dmat = rdmmc.GetEuclideanDistMat(desc) - for i in range(numpy.shape(dmat)[0]): - assert feq(dmat[i], exp[i]) + desc = [[0.0, 0.0], [1.0, 0.0], [1.0, 1.0]] + dmat = rdmmc.GetEuclideanDistMat(desc) + for i in range(numpy.shape(dmat)[0]): + assert feq(dmat[i], exp[i]) - # test with ints - desc = [[0, 0], [1, 0], [1, 1]] - dmat = rdmmc.GetEuclideanDistMat(desc) - for i in range(numpy.shape(dmat)[0]): - assert feq(dmat[i], exp[i]) + # test with ints + desc = [[0, 0], [1, 0], [1, 1]] + dmat = rdmmc.GetEuclideanDistMat(desc) + for i in range(numpy.shape(dmat)[0]): + assert feq(dmat[i], exp[i]) - def test3Compare(self): - n = 30 - m = 5 + def test3Compare(self): + n = 30 + m = 5 - dscArr = numpy.zeros((n, m), 'd') - for i in range(n): - for j in range(m): - dscArr[i, j] = random.random() - dmatArr = rdmmc.GetEuclideanDistMat(dscArr) + dscArr = numpy.zeros((n, m), 'd') + for i in range(n): + for j in range(m): + dscArr[i, j] = random.random() + dmatArr = rdmmc.GetEuclideanDistMat(dscArr) - dscLL = [] - for i in range(n): - row = [] - for j in range(m): - row.append(dscArr[i, j]) - dscLL.append(row) - dmatLL = rdmmc.GetEuclideanDistMat(dscLL) + dscLL = [] + for i in range(n): + row = [] + for j in range(m): + row.append(dscArr[i, j]) + dscLL.append(row) + dmatLL = rdmmc.GetEuclideanDistMat(dscLL) - assert numpy.shape(dmatArr) == numpy.shape(dmatLL) + assert numpy.shape(dmatArr) == numpy.shape(dmatLL) - for i in range(n * (n - 1) // 2): - assert feq(dmatArr[i], dmatLL[i]) + for i in range(n * (n - 1) // 2): + assert feq(dmatArr[i], dmatLL[i]) - def test4ebv(self): + def test4ebv(self): - n = 30 - m = 2048 - dm = 800 - lst = [] - for i in range(n): - v = DataStructs.ExplicitBitVect(m) - for j in range(dm): - v.SetBit(random.randrange(0, m)) - lst.append(v) + n = 30 + m = 2048 + dm = 800 + lst = [] + for i in range(n): + v = DataStructs.ExplicitBitVect(m) + for j in range(dm): + v.SetBit(random.randrange(0, m)) + lst.append(v) - dMat = rdmmc.GetTanimotoDistMat(lst) + dMat = rdmmc.GetTanimotoDistMat(lst) - sMat = rdmmc.GetTanimotoSimMat(lst) + sMat = rdmmc.GetTanimotoSimMat(lst) - for i in range(n * (n - 1) // 2): - assert feq(sMat[i] + dMat[i], 1.0) + for i in range(n * (n - 1) // 2): + assert feq(sMat[i] + dMat[i], 1.0) - def test5sbv(self): + def test5sbv(self): - n = 30 - m = 2048 - dm = 800 - lst = [] - for i in range(n): - v = DataStructs.SparseBitVect(m) - for j in range(dm): - v.SetBit(random.randrange(0, m)) - lst.append(v) + n = 30 + m = 2048 + dm = 800 + lst = [] + for i in range(n): + v = DataStructs.SparseBitVect(m) + for j in range(dm): + v.SetBit(random.randrange(0, m)) + lst.append(v) - dMat = rdmmc.GetTanimotoDistMat(lst) + dMat = rdmmc.GetTanimotoDistMat(lst) - sMat = rdmmc.GetTanimotoSimMat(lst) + sMat = rdmmc.GetTanimotoSimMat(lst) - for i in range(n * (n - 1) // 2): - assert feq(sMat[i] + dMat[i], 1.0) + for i in range(n * (n - 1) // 2): + assert feq(sMat[i] + dMat[i], 1.0) if __name__ == '__main__': - unittest.main() + unittest.main() diff --git a/Code/DataStructs/Wrap/testBV.py b/Code/DataStructs/Wrap/testBV.py index 88c415219..409da5caa 100755 --- a/Code/DataStructs/Wrap/testBV.py +++ b/Code/DataStructs/Wrap/testBV.py @@ -1,10 +1,11 @@ -from rdkit import DataStructs -from rdkit import RDConfig -import unittest import pickle import random +import unittest + import numpy +from rdkit import DataStructs, RDConfig + def feq(a, b, tol=1e-4): return abs(a - b) < tol @@ -109,15 +110,15 @@ class TestCase(unittest.TestCase): self.assertTrue(len(bv) == 4) self.assertTrue(list(bv.GetOnBits()) == [0, 2]) - def _bulkTest(self,bvs): - for metric in 'Tanimoto','Dice','AllBit','OnBit','RogotGoldberg': - bulk = getattr(DataStructs,f'Bulk{metric}Similarity') - single = getattr(DataStructs,f'{metric}Similarity') - sims = bulk(bvs[0],bvs) + def _bulkTest(self, bvs): + for metric in 'Tanimoto', 'Dice', 'AllBit', 'OnBit', 'RogotGoldberg': + bulk = getattr(DataStructs, f'Bulk{metric}Similarity') + single = getattr(DataStructs, f'{metric}Similarity') + sims = bulk(bvs[0], bvs) for i in range(len(bvs)): - sim = single(bvs[0],bvs[i]) - self.assertEqual(sim,sims[i]) - self.assertEqual(sim, single(bvs[0],bvs[i].ToBinary())) + sim = single(bvs[0], bvs[i]) + self.assertEqual(sim, sims[i]) + self.assertEqual(sim, single(bvs[0], bvs[i].ToBinary())) dists = bulk(bvs[0], bvs, returnDistance=True) for i in range(len(bvs)): dist = single(bvs[0], bvs[i], returnDistance=True) @@ -172,7 +173,6 @@ class TestCase(unittest.TestCase): bvs[bvi] = bv self._bulkTest(bvs) - def test7FPS(self): bv = DataStructs.ExplicitBitVect(32) bv.SetBit(0) @@ -206,7 +206,7 @@ class TestCase(unittest.TestCase): def test9ToNumpy(self): import numpy - for typ in (DataStructs.ExplicitBitVect,): + for typ in (DataStructs.ExplicitBitVect, ): bv = typ(32) bv.SetBit(0) bv.SetBit(1) @@ -218,9 +218,8 @@ class TestCase(unittest.TestCase): for i in range(bv.GetNumBits()): self.assertEqual(bv[i], arr[i]) - for typ in (DataStructs.IntSparseIntVect, - DataStructs.LongSparseIntVect, DataStructs.UIntSparseIntVect, - DataStructs.ULongSparseIntVect): + for typ in (DataStructs.IntSparseIntVect, DataStructs.LongSparseIntVect, + DataStructs.UIntSparseIntVect, DataStructs.ULongSparseIntVect): iv = typ(32) iv[0] = 1 iv[1] = 1 @@ -243,31 +242,32 @@ class TestCase(unittest.TestCase): bvs.append(bv) qs = bvs[:10] db = bvs[10:] - for metric in ['Tanimoto','Cosine', 'Kulczynski', 'Dice', 'Sokal', - 'McConnaughey', 'Asymmetric', 'BraunBlanquet', 'Russel', - 'RogotGoldberg']: - bulkSim = getattr(DataStructs,f'Bulk{metric}Similarity') - nbrSim = getattr(DataStructs,f'{metric}SimilarityNeighbors') + for metric in [ + 'Tanimoto', 'Cosine', 'Kulczynski', 'Dice', 'Sokal', 'McConnaughey', 'Asymmetric', + 'BraunBlanquet', 'Russel', 'RogotGoldberg' + ]: + bulkSim = getattr(DataStructs, f'Bulk{metric}Similarity') + nbrSim = getattr(DataStructs, f'{metric}SimilarityNeighbors') tgts = [] for q in qs: - sims = bulkSim(q,db) + sims = bulkSim(q, db) sim, idx = max((sim, -idx) for idx, sim in enumerate(sims)) - tgts.append((-idx,sim)) - nbrs = nbrSim(qs,db) - self.assertEqual(tgts,nbrs) + tgts.append((-idx, sim)) + nbrs = nbrSim(qs, db) + self.assertEqual(tgts, nbrs) def test12ToList(self): nbits = 2048 for cls in [DataStructs.ExplicitBitVect, DataStructs.SparseBitVect]: bv = cls(nbits) - l = [0]*2048 + l = [0] * 2048 # test no bits set l2 = list(bv) l3 = bv.ToList() self.assertEqual(l, l2) self.assertEqual(l, l3) - + for j in range(nbits): x = random.randrange(0, nbits) l[x] = 1 @@ -288,7 +288,7 @@ class TestCase(unittest.TestCase): bv2 = cls(nbits) bv2.FromBase64(bv.ToBase64()) - self.assertEqual(bv,bv2) + self.assertEqual(bv, bv2) def test14NegativeIndices(self): nbits = 2048 @@ -298,13 +298,13 @@ class TestCase(unittest.TestCase): for j in range(nbits): x = random.randrange(0, nbits) bv.SetBit(x) - bv2[-(nbits-x)]=1 + bv2[-(nbits - x)] = 1 - self.assertEqual(bv,bv2) + self.assertEqual(bv, bv2) for j in range(nbits): - self.assertEqual(bv[j],bv[-(nbits-j)]) + self.assertEqual(bv[j], bv[-(nbits - j)]) with self.assertRaises(IndexError): - bv[-(nbits+1)] + bv[-(nbits + 1)] with self.assertRaises(IndexError): bv2[-(nbits + 1)] = 1 diff --git a/Code/DataStructs/Wrap/testDiscreteValueVect.py b/Code/DataStructs/Wrap/testDiscreteValueVect.py index 47cf8011c..6b421d02a 100644 --- a/Code/DataStructs/Wrap/testDiscreteValueVect.py +++ b/Code/DataStructs/Wrap/testDiscreteValueVect.py @@ -4,258 +4,256 @@ # # @@ All Rights Reserved @@ # -import os import io -import sys +import os +import pickle import unittest -from rdkit import RDConfig -#import pickle -import pickle from rdkit import DataStructs as ds +from rdkit import RDConfig class TestCase(unittest.TestCase): - def setUp(self): - pass + def setUp(self): + pass - def test1Discrete(self): - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.ONEBITVALUE, 30) - for i in range(15): - v1[2 * i] = 1 + def test1Discrete(self): + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.ONEBITVALUE, 30) + for i in range(15): + v1[2 * i] = 1 - self.assertTrue(len(v1) == 30) - self.assertTrue(v1.GetTotalVal() == 15) + self.assertTrue(len(v1) == 30) + self.assertTrue(v1.GetTotalVal() == 15) - for i in range(len(v1)): - self.assertTrue(v1[i] == (i + 1) % 2) + for i in range(len(v1)): + self.assertTrue(v1[i] == (i + 1) % 2) - self.assertRaises(ValueError, lambda: v1.__setitem__(5, 2)) + self.assertRaises(ValueError, lambda: v1.__setitem__(5, 2)) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 30) - for i in range(len(v1)): - v1[i] = i % 4 + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 30) + for i in range(len(v1)): + v1[i] = i % 4 - self.assertTrue(len(v1) == 30) - for i in range(len(v1)): - self.assertTrue(v1[i] == i % 4) + self.assertTrue(len(v1) == 30) + for i in range(len(v1)): + self.assertTrue(v1[i] == i % 4) - self.assertRaises(ValueError, lambda: v1.__setitem__(10, 6)) + self.assertRaises(ValueError, lambda: v1.__setitem__(10, 6)) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.FOURBITVALUE, 30) - for i in range(len(v1)): - v1[i] = i % 16 + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.FOURBITVALUE, 30) + for i in range(len(v1)): + v1[i] = i % 16 - self.assertTrue(len(v1) == 30) - self.assertTrue(v1.GetTotalVal() == 211) - for i in range(len(v1)): - self.assertTrue(v1[i] == i % 16) + self.assertTrue(len(v1) == 30) + self.assertTrue(v1.GetTotalVal() == 211) + for i in range(len(v1)): + self.assertTrue(v1[i] == i % 16) - self.assertRaises(ValueError, lambda: v1.__setitem__(10, 16)) + self.assertRaises(ValueError, lambda: v1.__setitem__(10, 16)) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.EIGHTBITVALUE, 32) - for i in range(len(v1)): - v1[i] = i % 256 + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.EIGHTBITVALUE, 32) + for i in range(len(v1)): + v1[i] = i % 256 - self.assertTrue(len(v1) == 32) - self.assertTrue(v1.GetTotalVal() == 496) - for i in range(len(v1)): - self.assertTrue(v1[i] == i % 256) + self.assertTrue(len(v1) == 32) + self.assertTrue(v1.GetTotalVal() == 496) + for i in range(len(v1)): + self.assertTrue(v1[i] == i % 256) - self.assertRaises(ValueError, lambda: v1.__setitem__(10, 256)) + self.assertRaises(ValueError, lambda: v1.__setitem__(10, 256)) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.SIXTEENBITVALUE, 300) - for i in range(len(v1)): - v1[i] = i % 300 + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.SIXTEENBITVALUE, 300) + for i in range(len(v1)): + v1[i] = i % 300 - self.assertTrue(len(v1) == 300) - self.assertTrue(v1.GetTotalVal() == 44850) - self.assertRaises(ValueError, lambda: v1.__setitem__(10, 65536)) + self.assertTrue(len(v1) == 300) + self.assertTrue(v1.GetTotalVal() == 44850) + self.assertRaises(ValueError, lambda: v1.__setitem__(10, 65536)) - def test2VectDistances(self): - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.ONEBITVALUE, 30) - v2 = ds.DiscreteValueVect(ds.DiscreteValueType.ONEBITVALUE, 30) - for i in range(15): - v1[2 * i] = 1 - v2[2 * i] = 1 - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) - for i in range(30): - if (i % 3 == 0): - v2[i] = 1 - else: - v2[i] = 0 - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 15) + def test2VectDistances(self): + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.ONEBITVALUE, 30) + v2 = ds.DiscreteValueVect(ds.DiscreteValueType.ONEBITVALUE, 30) + for i in range(15): + v1[2 * i] = 1 + v2[2 * i] = 1 + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) + for i in range(30): + if (i % 3 == 0): + v2[i] = 1 + else: + v2[i] = 0 + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 15) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 30) - v2 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 30) + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 30) + v2 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 30) - for i in range(30): - v1[i] = i % 4 - v2[i] = (i + 1) % 4 + for i in range(30): + v1[i] = i % 4 + v2[i] = (i + 1) % 4 - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 44) + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 44) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.FOURBITVALUE, 16) - v2 = ds.DiscreteValueVect(ds.DiscreteValueType.FOURBITVALUE, 16) - for i in range(16): - v1[i] = i % 16 - v2[i] = i % 5 - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 90) + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.FOURBITVALUE, 16) + v2 = ds.DiscreteValueVect(ds.DiscreteValueType.FOURBITVALUE, 16) + for i in range(16): + v1[i] = i % 16 + v2[i] = i % 5 + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 90) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.EIGHTBITVALUE, 5) - v2 = ds.DiscreteValueVect(ds.DiscreteValueType.EIGHTBITVALUE, 5) - v1[0] = 34 - v1[1] = 167 - v1[2] = 3 - v1[3] = 56 - v1[4] = 128 + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.EIGHTBITVALUE, 5) + v2 = ds.DiscreteValueVect(ds.DiscreteValueType.EIGHTBITVALUE, 5) + v1[0] = 34 + v1[1] = 167 + v1[2] = 3 + v1[3] = 56 + v1[4] = 128 - v2[0] = 14 - v2[1] = 67 - v2[2] = 103 - v2[3] = 6 - v2[4] = 228 - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 370) + v2[0] = 14 + v2[1] = 67 + v2[2] = 103 + v2[3] = 6 + v2[4] = 228 + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 370) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.SIXTEENBITVALUE, 3) - v2 = ds.DiscreteValueVect(ds.DiscreteValueType.SIXTEENBITVALUE, 3) - v1[0] = 2345 - v1[1] = 64578 - v1[2] = 34 + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.SIXTEENBITVALUE, 3) + v2 = ds.DiscreteValueVect(ds.DiscreteValueType.SIXTEENBITVALUE, 3) + v1[0] = 2345 + v1[1] = 64578 + v1[2] = 34 - v2[0] = 1345 - v2[1] = 54578 - v2[2] = 10034 - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 21000) + v2[0] = 1345 + v2[1] = 54578 + v2[2] = 10034 + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 21000) - def test3Pickles(self): - #outF = file('dvvs.pkl','wb+') - with open(os.path.join(RDConfig.RDBaseDir, 'Code/DataStructs/Wrap/testData/dvvs.pkl'), - 'r') as inTF: - buf = inTF.read().replace('\r\n', '\n').encode('utf-8') - inTF.close() - with io.BytesIO(buf) as inF: - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.ONEBITVALUE, 30) - for i in range(15): - v1[2 * i] = 1 - v2 = pickle.loads(pickle.dumps(v1)) - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) - # pickle.dump(v1,outF) - v2 = pickle.load(inF, encoding='bytes') - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) - self.assertTrue(v1.GetTotalVal() == v2.GetTotalVal()) - self.assertTrue(v2.GetTotalVal() != 0) + def test3Pickles(self): + #outF = file('dvvs.pkl','wb+') + with open(os.path.join(RDConfig.RDBaseDir, 'Code/DataStructs/Wrap/testData/dvvs.pkl'), + 'r') as inTF: + buf = inTF.read().replace('\r\n', '\n').encode('utf-8') + inTF.close() + with io.BytesIO(buf) as inF: + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.ONEBITVALUE, 30) + for i in range(15): + v1[2 * i] = 1 + v2 = pickle.loads(pickle.dumps(v1)) + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) + # pickle.dump(v1,outF) + v2 = pickle.load(inF, encoding='bytes') + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) + self.assertTrue(v1.GetTotalVal() == v2.GetTotalVal()) + self.assertTrue(v2.GetTotalVal() != 0) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 30) - for i in range(30): - v1[i] = i % 4 - v2 = pickle.loads(pickle.dumps(v1)) - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) - # pickle.dump(v1,outF) - v2 = pickle.load(inF, encoding='bytes') - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) - self.assertTrue(v1.GetTotalVal() == v2.GetTotalVal()) - self.assertTrue(v2.GetTotalVal() != 0) + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 30) + for i in range(30): + v1[i] = i % 4 + v2 = pickle.loads(pickle.dumps(v1)) + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) + # pickle.dump(v1,outF) + v2 = pickle.load(inF, encoding='bytes') + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) + self.assertTrue(v1.GetTotalVal() == v2.GetTotalVal()) + self.assertTrue(v2.GetTotalVal() != 0) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.FOURBITVALUE, 16) - for i in range(16): - v1[i] = i % 16 - v2 = pickle.loads(pickle.dumps(v1)) - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) - # pickle.dump(v1,outF) - v2 = pickle.load(inF, encoding='bytes') - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) - self.assertTrue(v1.GetTotalVal() == v2.GetTotalVal()) - self.assertTrue(v2.GetTotalVal() != 0) + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.FOURBITVALUE, 16) + for i in range(16): + v1[i] = i % 16 + v2 = pickle.loads(pickle.dumps(v1)) + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) + # pickle.dump(v1,outF) + v2 = pickle.load(inF, encoding='bytes') + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) + self.assertTrue(v1.GetTotalVal() == v2.GetTotalVal()) + self.assertTrue(v2.GetTotalVal() != 0) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.EIGHTBITVALUE, 5) - v1[0] = 34 - v1[1] = 167 - v1[2] = 3 - v1[3] = 56 - v1[4] = 128 - v2 = pickle.loads(pickle.dumps(v1)) - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) - # pickle.dump(v1,outF) - v2 = pickle.load(inF, encoding='bytes') - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) - self.assertTrue(v1.GetTotalVal() == v2.GetTotalVal()) - self.assertTrue(v2.GetTotalVal() != 0) + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.EIGHTBITVALUE, 5) + v1[0] = 34 + v1[1] = 167 + v1[2] = 3 + v1[3] = 56 + v1[4] = 128 + v2 = pickle.loads(pickle.dumps(v1)) + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) + # pickle.dump(v1,outF) + v2 = pickle.load(inF, encoding='bytes') + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) + self.assertTrue(v1.GetTotalVal() == v2.GetTotalVal()) + self.assertTrue(v2.GetTotalVal() != 0) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.SIXTEENBITVALUE, 3) - v1[0] = 2345 - v1[1] = 64578 - v1[2] = 34 - v2 = pickle.loads(pickle.dumps(v1)) - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) - # pickle.dump(v1,outF) - v2 = pickle.load(inF, encoding='bytes') - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) - self.assertTrue(v1.GetTotalVal() == v2.GetTotalVal()) - self.assertTrue(v2.GetTotalVal() != 0) + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.SIXTEENBITVALUE, 3) + v1[0] = 2345 + v1[1] = 64578 + v1[2] = 34 + v2 = pickle.loads(pickle.dumps(v1)) + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) + # pickle.dump(v1,outF) + v2 = pickle.load(inF, encoding='bytes') + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) + self.assertTrue(v1.GetTotalVal() == v2.GetTotalVal()) + self.assertTrue(v2.GetTotalVal() != 0) - def test4DiscreteVectOps(self): - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 8) - for i in range(4): - v1[2 * i] = 2 - self.assertTrue(v1.GetTotalVal() == 8) - v2 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 8) - for i in range(4): - v2[2 * i + 1] = 2 - v2[2 * i] = 1 - self.assertTrue(v2.GetTotalVal() == 12) + def test4DiscreteVectOps(self): + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 8) + for i in range(4): + v1[2 * i] = 2 + self.assertTrue(v1.GetTotalVal() == 8) + v2 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 8) + for i in range(4): + v2[2 * i + 1] = 2 + v2[2 * i] = 1 + self.assertTrue(v2.GetTotalVal() == 12) - v3 = v1 | v2 - self.assertTrue(len(v3) == len(v2)) - self.assertTrue(v3.GetTotalVal() == 16) + v3 = v1 | v2 + self.assertTrue(len(v3) == len(v2)) + self.assertTrue(v3.GetTotalVal() == 16) - v3 = v1 & v2 - self.assertTrue(len(v3) == len(v2)) - self.assertTrue(v3.GetTotalVal() == 4) + v3 = v1 & v2 + self.assertTrue(len(v3) == len(v2)) + self.assertTrue(v3.GetTotalVal() == 4) - v4 = v1 + v2 - self.assertTrue(len(v4) == len(v2)) - self.assertTrue(v4.GetTotalVal() == 20) + v4 = v1 + v2 + self.assertTrue(len(v4) == len(v2)) + self.assertTrue(v4.GetTotalVal() == 20) - v4 = v1 - v2 - self.assertTrue(v4.GetTotalVal() == 4) - v4 = v2 - v1 - self.assertTrue(v4.GetTotalVal() == 8) + v4 = v1 - v2 + self.assertTrue(v4.GetTotalVal() == 4) + v4 = v2 - v1 + self.assertTrue(v4.GetTotalVal() == 8) - v4 = v2 - v4 -= v1 - self.assertTrue(v4.GetTotalVal() == 8) - v4 -= v4 - self.assertTrue(v4.GetTotalVal() == 0) + v4 = v2 + v4 -= v1 + self.assertTrue(v4.GetTotalVal() == 8) + v4 -= v4 + self.assertTrue(v4.GetTotalVal() == 0) - def testIterator(self): - """ + def testIterator(self): + """ connected to sf.net issue 1719831: http://sourceforge.net/tracker/index.php?func=detail&aid=1719831&group_id=160139&atid=814650 """ - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.ONEBITVALUE, 30) - for i in range(15): - v1[2 * i] = 1 - l1 = list(v1) - self.assertTrue(len(l1) == len(v1)) - for i, v in enumerate(v1): - self.assertTrue(l1[i] == v) - self.assertRaises(IndexError, lambda: v1[40]) + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.ONEBITVALUE, 30) + for i in range(15): + v1[2 * i] = 1 + l1 = list(v1) + self.assertTrue(len(l1) == len(v1)) + for i, v in enumerate(v1): + self.assertTrue(l1[i] == v) + self.assertRaises(IndexError, lambda: v1[40]) - def test9ToNumpy(self): - import numpy - bv = ds.DiscreteValueVect(ds.DiscreteValueType.FOURBITVALUE, 32) - bv[0] = 1 - bv[1] = 4 - bv[17] = 1 - bv[23] = 8 - bv[31] = 12 - arr = numpy.zeros((3, ), 'i') - ds.ConvertToNumpyArray(bv, arr) - for i in range(len(bv)): - self.assertEqual(bv[i], arr[i]) + def test9ToNumpy(self): + import numpy + bv = ds.DiscreteValueVect(ds.DiscreteValueType.FOURBITVALUE, 32) + bv[0] = 1 + bv[1] = 4 + bv[17] = 1 + bv[23] = 8 + bv[31] = 12 + arr = numpy.zeros((3, ), 'i') + ds.ConvertToNumpyArray(bv, arr) + for i in range(len(bv)): + self.assertEqual(bv[i], arr[i]) if __name__ == '__main__': - unittest.main() + unittest.main() diff --git a/Code/DataStructs/Wrap/testFPB.py b/Code/DataStructs/Wrap/testFPB.py index 54e1dfae3..c28832d34 100644 --- a/Code/DataStructs/Wrap/testFPB.py +++ b/Code/DataStructs/Wrap/testFPB.py @@ -1,6 +1,7 @@ -from rdkit import DataStructs -from rdkit import RDConfig -import unittest, os +import os +import unittest + +from rdkit import DataStructs, RDConfig def feq(a, b, tol=1e-4): @@ -84,9 +85,10 @@ class TestCase(unittest.TestCase): nbrs = fpbr.GetContainingNeighbors(bytes) self.assertEqual(len(nbrs), 9) ids = sorted(fpbr.GetId(x) for x in nbrs) - self.assertEqual(ids, ['ZINC00000562', 'ZINC00000843', 'ZINC00000969', 'ZINC00001484', - 'ZINC00001585', 'ZINC00002094', 'ZINC00004739', 'ZINC00005235', - 'ZINC00006300']) + self.assertEqual(ids, [ + 'ZINC00000562', 'ZINC00000843', 'ZINC00000969', 'ZINC00001484', 'ZINC00001585', + 'ZINC00002094', 'ZINC00004739', 'ZINC00005235', 'ZINC00006300' + ]) def test6MultiFPBReaderTani(self): basen = os.path.join(RDConfig.RDBaseDir, 'Code', 'DataStructs', 'testData') diff --git a/Code/DataStructs/Wrap/testSparseIntVect.py b/Code/DataStructs/Wrap/testSparseIntVect.py index 2d0ede614..3db28b056 100644 --- a/Code/DataStructs/Wrap/testSparseIntVect.py +++ b/Code/DataStructs/Wrap/testSparseIntVect.py @@ -4,13 +4,16 @@ # # @@ All Rights Reserved @@ # -import os, sys import io -import unittest +import os import pickle -from rdkit import RDConfig -from rdkit import DataStructs as ds import random +import sys +import unittest + +from rdkit import DataStructs as ds +from rdkit import RDConfig + def feq(v1, v2, tol=1e-4): return abs(v1 - v2) < tol @@ -213,7 +216,7 @@ class TestCase(unittest.TestCase): self.assertTrue(feq(bulkDs[i], taniDs[i])) def test7ToList(self): - l = [0]*2048 + l = [0] * 2048 nbits = 2048 bv = ds.IntSparseIntVect(nbits) for j in range(nbits): @@ -226,5 +229,6 @@ class TestCase(unittest.TestCase): self.assertEqual(l, l2) self.assertEqual(l, l3) + if __name__ == '__main__': unittest.main() diff --git a/Code/Demos/RDKit/MPI/rdkpympi.py b/Code/Demos/RDKit/MPI/rdkpympi.py index 1f3d1108f..f2c89897d 100644 --- a/Code/Demos/RDKit/MPI/rdkpympi.py +++ b/Code/Demos/RDKit/MPI/rdkpympi.py @@ -9,9 +9,11 @@ # from boost import mpi + from rdkit import Chem from rdkit.Chem import AllChem from rdkit.RDLogger import logger + logger = logger() @@ -48,8 +50,9 @@ def dividetask(data, task, silent=True): if __name__ == '__main__': - from rdkit import RDConfig import os + + from rdkit import RDConfig fName = os.path.join(RDConfig.RDBaseDir, 'Projects', 'DbCLI', 'testData', 'bzr.sdf') if mpi.world.rank == 0: data = [x for x in Chem.SDMolSupplier(fName)][:50] diff --git a/Code/Demos/boost/EBV_err/setup.py b/Code/Demos/boost/EBV_err/setup.py index fbeb47c0a..6f2385c68 100644 --- a/Code/Demos/boost/EBV_err/setup.py +++ b/Code/Demos/boost/EBV_err/setup.py @@ -1,11 +1,12 @@ # Run this with: # python setup.py install --install-lib=. -from distutils.core import setup, Extension -import RDConfig - # force the use of g++ please from distutils import sysconfig +from distutils.core import Extension, setup + +import RDConfig + save_init_posix = sysconfig._init_posix @@ -47,13 +48,17 @@ libDirs.append(pyLibDir) libraries.append(boostLib) libraries.append("python2.2") -compileArgs = ['-ftemplate-depth-150', - '-DBOOST_PYTHON_DYNAMIC_LIB', - boostInc, ] -setup(name="crossTest", version="1.0", - ext_modules=[Extension("moduleA", ["wrapA.cpp"], include_dirs=incDirs, library_dirs=libDirs, - libraries=libraries, extra_compile_args=compileArgs), - Extension("moduleB", ["moduleB.cpp"], include_dirs=incDirs, library_dirs=libDirs, - libraries=libraries, extra_compile_args=compileArgs), - Extension("moduleC", ["wrapC.cpp"], include_dirs=incDirs, library_dirs=libDirs, - libraries=libraries, extra_compile_args=compileArgs)]) +compileArgs = [ + '-ftemplate-depth-150', + '-DBOOST_PYTHON_DYNAMIC_LIB', + boostInc, +] +setup( + name="crossTest", version="1.0", ext_modules=[ + Extension("moduleA", ["wrapA.cpp"], include_dirs=incDirs, library_dirs=libDirs, + libraries=libraries, extra_compile_args=compileArgs), + Extension("moduleB", ["moduleB.cpp"], include_dirs=incDirs, library_dirs=libDirs, + libraries=libraries, extra_compile_args=compileArgs), + Extension("moduleC", ["wrapC.cpp"], include_dirs=incDirs, library_dirs=libDirs, + libraries=libraries, extra_compile_args=compileArgs) + ]) diff --git a/Code/Demos/boost/EBV_err/test.py b/Code/Demos/boost/EBV_err/test.py index 00371b241..75a875caf 100644 --- a/Code/Demos/boost/EBV_err/test.py +++ b/Code/Demos/boost/EBV_err/test.py @@ -1,9 +1,8 @@ - #from DataStructs import cDataStructs -from DataStructs import cDataStructs import moduleA import moduleB import moduleC +from DataStructs import cDataStructs print("*****************************") print("Testing self print for classA") diff --git a/Code/Demos/boost/cross_mod_err/setup.py b/Code/Demos/boost/cross_mod_err/setup.py index 63cd6a8b7..555dd2530 100644 --- a/Code/Demos/boost/cross_mod_err/setup.py +++ b/Code/Demos/boost/cross_mod_err/setup.py @@ -1,11 +1,12 @@ # Run this with: # python setup.py install --install-lib=. -from distutils.core import setup, Extension -import RDConfig - # force the use of g++ please from distutils import sysconfig +from distutils.core import Extension, setup + +import RDConfig + save_init_posix = sysconfig._init_posix @@ -32,13 +33,17 @@ boostLib = "boost_python" libDirs = [boostLibDir, pyLibDir] libraries = [boostLib, "python2.2"] # have to include g++ here or we get link errors with boost -compileArgs = ['-ftemplate-depth-150', - '-DBOOST_PYTHON_DYNAMIC_LIB', - boostInc, ] -setup(name="crossTest", version="1.0", - ext_modules=[Extension("moduleA", ["wrapA.cpp"], include_dirs=incDirs, library_dirs=libDirs, - libraries=libraries, extra_compile_args=compileArgs), - Extension("moduleB", ["moduleB.cpp"], include_dirs=incDirs, library_dirs=libDirs, - libraries=libraries, extra_compile_args=compileArgs), - Extension("moduleC", ["wrapC.cpp"], include_dirs=incDirs, library_dirs=libDirs, - libraries=libraries, extra_compile_args=compileArgs)]) +compileArgs = [ + '-ftemplate-depth-150', + '-DBOOST_PYTHON_DYNAMIC_LIB', + boostInc, +] +setup( + name="crossTest", version="1.0", ext_modules=[ + Extension("moduleA", ["wrapA.cpp"], include_dirs=incDirs, library_dirs=libDirs, + libraries=libraries, extra_compile_args=compileArgs), + Extension("moduleB", ["moduleB.cpp"], include_dirs=incDirs, library_dirs=libDirs, + libraries=libraries, extra_compile_args=compileArgs), + Extension("moduleC", ["wrapC.cpp"], include_dirs=incDirs, library_dirs=libDirs, + libraries=libraries, extra_compile_args=compileArgs) + ]) diff --git a/Code/Demos/boost/cross_mod_err/test.py b/Code/Demos/boost/cross_mod_err/test.py index 7e5687583..6f5748c9d 100644 --- a/Code/Demos/boost/cross_mod_err/test.py +++ b/Code/Demos/boost/cross_mod_err/test.py @@ -1,4 +1,3 @@ - import moduleA import moduleB import moduleC diff --git a/Code/Demos/boost/cross_module/setup.py b/Code/Demos/boost/cross_module/setup.py index 87aa83d07..ee55be76d 100644 --- a/Code/Demos/boost/cross_module/setup.py +++ b/Code/Demos/boost/cross_module/setup.py @@ -1,11 +1,12 @@ # Run this with: # python setup.py install --install-lib=. -from distutils.core import setup, Extension -import RDConfig - # force the use of g++ please from distutils import sysconfig +from distutils.core import Extension, setup + +import RDConfig + save_init_posix = sysconfig._init_posix @@ -32,11 +33,15 @@ boostLib = "boost_python" libDirs = [boostLibDir, pyLibDir] libraries = [boostLib, "python2.2"] # have to include g++ here or we get link errors with boost -compileArgs = ['-ftemplate-depth-150', - '-DBOOST_PYTHON_DYNAMIC_LIB', - boostInc, ] -setup(name="moda", version="1.0", - ext_modules=[Extension("moda", ["moda.cpp"], include_dirs=incDirs, library_dirs=libDirs, - libraries=libraries, extra_compile_args=compileArgs), - Extension("modb", ["modb.cpp"], include_dirs=incDirs, library_dirs=libDirs, - libraries=libraries, extra_compile_args=compileArgs)]) +compileArgs = [ + '-ftemplate-depth-150', + '-DBOOST_PYTHON_DYNAMIC_LIB', + boostInc, +] +setup( + name="moda", version="1.0", ext_modules=[ + Extension("moda", ["moda.cpp"], include_dirs=incDirs, library_dirs=libDirs, libraries=libraries, + extra_compile_args=compileArgs), + Extension("modb", ["modb.cpp"], include_dirs=incDirs, library_dirs=libDirs, libraries=libraries, + extra_compile_args=compileArgs) + ]) diff --git a/Code/Demos/boost/cross_module/test.py b/Code/Demos/boost/cross_module/test.py index 0813b2d08..82d5e704a 100644 --- a/Code/Demos/boost/cross_module/test.py +++ b/Code/Demos/boost/cross_module/test.py @@ -1,4 +1,3 @@ - import moda import modb diff --git a/Code/Demos/boost/numpy/setup.py b/Code/Demos/boost/numpy/setup.py index 44c964f20..d172940d9 100644 --- a/Code/Demos/boost/numpy/setup.py +++ b/Code/Demos/boost/numpy/setup.py @@ -1,11 +1,12 @@ # Run this with: # python setup.py install --install-lib=. -from distutils.core import setup, Extension -import RDConfig - # force the use of g++ please from distutils import sysconfig +from distutils.core import Extension, setup + +import RDConfig + save_init_posix = sysconfig._init_posix @@ -32,9 +33,13 @@ boostLib = "boost_python" libDirs = [boostLibDir, pyLibDir] libraries = [boostLib, "python2.2"] # have to include g++ here or we get link errors with boost -compileArgs = ['-ftemplate-depth-150', - '-DBOOST_PYTHON_DYNAMIC_LIB', - boostInc, ] -setup(name="demo", version="1.0", - ext_modules=[Extension("linalg", ["linalg.cpp"], include_dirs=incDirs, library_dirs=libDirs, - libraries=libraries, extra_compile_args=compileArgs)]) +compileArgs = [ + '-ftemplate-depth-150', + '-DBOOST_PYTHON_DYNAMIC_LIB', + boostInc, +] +setup( + name="demo", version="1.0", ext_modules=[ + Extension("linalg", ["linalg.cpp"], include_dirs=incDirs, library_dirs=libDirs, + libraries=libraries, extra_compile_args=compileArgs) + ]) diff --git a/Code/Demos/boost/numpy/test.py b/Code/Demos/boost/numpy/test.py index bc3ed6379..8813962b6 100644 --- a/Code/Demos/boost/numpy/test.py +++ b/Code/Demos/boost/numpy/test.py @@ -1,6 +1,5 @@ - -from Numeric import * import linalg +from Numeric import * print(linalg.GetFirstElement(array([1, 0, 2], Int))) print(linalg.GetFirstElement(array([1, 0, 2], Float))) diff --git a/Code/Demos/boost/overloads/setup.py b/Code/Demos/boost/overloads/setup.py index a5734ff3a..4d8412eb9 100644 --- a/Code/Demos/boost/overloads/setup.py +++ b/Code/Demos/boost/overloads/setup.py @@ -1,11 +1,12 @@ # Run this with: # python setup.py install --install-lib=. -from distutils.core import setup, Extension -import RDConfig - # force the use of g++ please from distutils import sysconfig +from distutils.core import Extension, setup + +import RDConfig + save_init_posix = sysconfig._init_posix @@ -32,9 +33,13 @@ boostLib = "boost_python" libDirs = [boostLibDir, pyLibDir] libraries = [boostLib, "python2.2"] # have to include g++ here or we get link errors with boost -compileArgs = ['-ftemplate-depth-150', - '-DBOOST_PYTHON_DYNAMIC_LIB', - boostInc, ] -setup(name="overloads", version="1.0", ext_modules=[Extension( - "overloads", ["overloads.cpp"], include_dirs=incDirs, library_dirs=libDirs, libraries=libraries, - extra_compile_args=compileArgs)]) +compileArgs = [ + '-ftemplate-depth-150', + '-DBOOST_PYTHON_DYNAMIC_LIB', + boostInc, +] +setup( + name="overloads", version="1.0", ext_modules=[ + Extension("overloads", ["overloads.cpp"], include_dirs=incDirs, library_dirs=libDirs, + libraries=libraries, extra_compile_args=compileArgs) + ]) diff --git a/Code/Demos/boost/python_objs/setup.py b/Code/Demos/boost/python_objs/setup.py index bd4de86c1..d847d8733 100644 --- a/Code/Demos/boost/python_objs/setup.py +++ b/Code/Demos/boost/python_objs/setup.py @@ -1,11 +1,12 @@ # Run this with: # python setup.py install --install-lib=. -from distutils.core import setup, Extension -from rdkit import RDConfig - # force the use of g++ please from distutils import sysconfig +from distutils.core import Extension, setup + +from rdkit import RDConfig + save_init_posix = sysconfig._init_posix @@ -32,9 +33,13 @@ boostLib = "boost_python" libDirs = [boostLibDir, pyLibDir] libraries = [boostLib, "python2.2"] # have to include g++ here or we get link errors with boost -compileArgs = ['-ftemplate-depth-150', - '-DBOOST_PYTHON_DYNAMIC_LIB', - boostInc, ] -setup(name="python_objs", version="1.0", ext_modules=[Extension( - "python_objs", ["python_objs.cpp"], include_dirs=incDirs, library_dirs=libDirs, - libraries=libraries, extra_compile_args=compileArgs)]) +compileArgs = [ + '-ftemplate-depth-150', + '-DBOOST_PYTHON_DYNAMIC_LIB', + boostInc, +] +setup( + name="python_objs", version="1.0", ext_modules=[ + Extension("python_objs", ["python_objs.cpp"], include_dirs=incDirs, library_dirs=libDirs, + libraries=libraries, extra_compile_args=compileArgs) + ]) diff --git a/Code/Demos/boost/smartPtrsAndIters/setup.py b/Code/Demos/boost/smartPtrsAndIters/setup.py index 6002c6ea7..b6338caa9 100644 --- a/Code/Demos/boost/smartPtrsAndIters/setup.py +++ b/Code/Demos/boost/smartPtrsAndIters/setup.py @@ -1,8 +1,11 @@ # Run this with: # python setup.py install --install-lib=. -from RDBuild import * -from distutils.core import setup, Extension +from distutils.core import Extension, setup -setup(name="SPtrTestModule", version="1.0", ext_modules=[Extension( - "SPtrTestModule", ["module.cpp"], library_dirs=libDirs, libraries=libraries, - extra_link_args=linkArgs, extra_compile_args=compileArgs)]) +from RDBuild import * + +setup( + name="SPtrTestModule", version="1.0", ext_modules=[ + Extension("SPtrTestModule", ["module.cpp"], library_dirs=libDirs, libraries=libraries, + extra_link_args=linkArgs, extra_compile_args=compileArgs) + ]) diff --git a/Code/Demos/boost/smartPtrsAndIters/test.py b/Code/Demos/boost/smartPtrsAndIters/test.py index 61a10f8b2..5ec28d5e7 100755 --- a/Code/Demos/boost/smartPtrsAndIters/test.py +++ b/Code/Demos/boost/smartPtrsAndIters/test.py @@ -2,9 +2,10 @@ # Distributed under the Boost Software License, Version 1.0. (See # accompanying file LICENSE_1_0.txt or copy at # http://www.boost.org/LICENSE_1_0.txt) -import SPtrTestModule as TestModule import unittest +import SPtrTestModule as TestModule + class TestCase(unittest.TestCase): diff --git a/Code/DistGeom/Wrap/rough_test.py b/Code/DistGeom/Wrap/rough_test.py index f33aa3807..be1c214ff 100644 --- a/Code/DistGeom/Wrap/rough_test.py +++ b/Code/DistGeom/Wrap/rough_test.py @@ -11,7 +11,9 @@ Replaced numpy.oldnumeric with numpy methods - Jan 2015, PGedeck """ #pylint: disable=E1101,C0111,R0904 import unittest + import numpy as np + from rdkit import DistanceGeometry as DG @@ -74,8 +76,9 @@ class TestCase(unittest.TestCase): self.assertTrue(feq(d1, 1.0, 2e-3)) d2 = np.dot(v2, v2) self.assertTrue(feq(d2, 1.0, 2e-3)) - arr = np.array([[0.0, 1.0, 1.0, 1.01], [1.0, 0.0, 1.0, 1.0], [1.0, 1.0, 0.0, 1.0], - [0.99, 1.0, 1.0, 0.0]], float) + arr = np.array( + [[0.0, 1.0, 1.0, 1.01], [1.0, 0.0, 1.0, 1.0], [1.0, 1.0, 0.0, 1.0], [0.99, 1.0, 1.0, 0.0]], + float) self.assertTrue(DG.DoTriangleSmoothing(arr)) coords = DG.EmbedBoundsMatrix(arr) v1 = coords[0] - coords[1] @@ -89,10 +92,12 @@ class TestCase(unittest.TestCase): # this test is currently (rev:4769) passing on windows and # failing on linux. It's kind of dependent on fp precision, so # it's probably ok to ditch it. - arr = np.array([[0.0, 1.0, 1.0, 1.0], - [1.0, 0.0, 1.0, 1.0], - [1.0, 1.0, 0.0, 1.0], - [1.0, 1.0, 1.0, 0.0], ], float) + arr = np.array([ + [0.0, 1.0, 1.0, 1.0], + [1.0, 0.0, 1.0, 1.0], + [1.0, 1.0, 0.0, 1.0], + [1.0, 1.0, 1.0, 0.0], + ], float) self.assertTrue(DG.DoTriangleSmoothing(arr)) coords = DG.EmbedBoundsMatrix(arr, randomSeed=100) v1 = coords[0] - coords[1] diff --git a/Code/ForceField/Wrap/testConstraints.py b/Code/ForceField/Wrap/testConstraints.py index 3c02f6cda..b3282c7cc 100755 --- a/Code/ForceField/Wrap/testConstraints.py +++ b/Code/ForceField/Wrap/testConstraints.py @@ -1,11 +1,11 @@ -from rdkit import RDConfig -import sys, os -from time import sleep -from multiprocessing import Process, Value +import os +import sys import unittest -from rdkit import Chem -from rdkit.Chem import ChemicalForceFields -from rdkit.Chem import rdMolTransforms +from multiprocessing import Process, Value +from time import sleep + +from rdkit import Chem, RDConfig +from rdkit.Chem import ChemicalForceFields, rdMolTransforms class OptSafe: diff --git a/Code/Geometry/Wrap/testGeometry.py b/Code/Geometry/Wrap/testGeometry.py index ee6461837..53d3173df 100644 --- a/Code/Geometry/Wrap/testGeometry.py +++ b/Code/Geometry/Wrap/testGeometry.py @@ -1,12 +1,11 @@ -import os, sys -import unittest import copy import math - +import os import pickle +import sys +import unittest -from rdkit import RDConfig -from rdkit import DataStructs +from rdkit import DataStructs, RDConfig from rdkit.Geometry import rdGeometry as geom diff --git a/Code/GraphMol/Abbreviations/Wrap/testAbbreviations.py b/Code/GraphMol/Abbreviations/Wrap/testAbbreviations.py index 3e9368e57..b3a7553cb 100644 --- a/Code/GraphMol/Abbreviations/Wrap/testAbbreviations.py +++ b/Code/GraphMol/Abbreviations/Wrap/testAbbreviations.py @@ -7,12 +7,12 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. +import unittest + # from rdkit import Chem from rdkit.Chem import rdAbbreviations -import unittest - class TestCase(unittest.TestCase): diff --git a/Code/GraphMol/CIPLabeler/Wrap/pyCIPLabelsValidation.py b/Code/GraphMol/CIPLabeler/Wrap/pyCIPLabelsValidation.py index 766d32797..096b3542e 100644 --- a/Code/GraphMol/CIPLabeler/Wrap/pyCIPLabelsValidation.py +++ b/Code/GraphMol/CIPLabeler/Wrap/pyCIPLabelsValidation.py @@ -1,6 +1,7 @@ # coding: utf-8 import os import sys + from rdkit import Chem EXPECTED_LABELS_OVERRIDES = { diff --git a/Code/GraphMol/ChemReactions/Wrap/testEnumerations.py b/Code/GraphMol/ChemReactions/Wrap/testEnumerations.py index 944817b4e..9fbb15d02 100644 --- a/Code/GraphMol/ChemReactions/Wrap/testEnumerations.py +++ b/Code/GraphMol/ChemReactions/Wrap/testEnumerations.py @@ -1,18 +1,18 @@ # Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc. # All rights reserved. -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are -# met: +# met: # -# * Redistributions of source code must retain the above copyright +# * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided # with the distribution. -# * Neither the name of Novartis Institutes for BioMedical Research Inc. -# nor the names of its contributors may be used to endorse or promote +# * Neither the name of Novartis Institutes for BioMedical Research Inc. +# nor the names of its contributors may be used to endorse or promote # products derived from this software without specific prior written # permission. # @@ -29,111 +29,111 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # - -import unittest -import os,sys, copy - +import copy +import itertools +import os import pickle +import sys +import time +import unittest -from rdkit import rdBase -from rdkit import Chem -from rdkit.Chem import AllChem,rdChemReactions -from rdkit import Geometry -from rdkit import RDConfig -import itertools, time import numpy as np +from rdkit import Chem, Geometry, RDConfig, rdBase +from rdkit.Chem import AllChem, rdChemReactions + + def log(s): rdBase.LogErrorMsg("== " + s) -class TestCase(unittest.TestCase) : + +class TestCase(unittest.TestCase): + def setUp(self): - self.dataDir = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol','ChemReactions','testData') + self.dataDir = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'ChemReactions', 'testData') def testCartesianProduct(self): log("testCartesianProduct") rxn = rdChemReactions.ChemicalReaction() - rgroups = [[Chem.MolFromSmiles("C")]*10, - [Chem.MolFromSmiles("N")]*5, - [Chem.MolFromSmiles("O")]*6] + rgroups = [[Chem.MolFromSmiles("C")] * 10, [Chem.MolFromSmiles("N")] * 5, + [Chem.MolFromSmiles("O")] * 6] cartProd = rdChemReactions.CartesianProductStrategy() cartProd.Initialize(rxn, rgroups) - self.assertEquals(cartProd.GetNumPermutations(), 10*5*6) + self.assertEquals(cartProd.GetNumPermutations(), 10 * 5 * 6) groups = [] count = 0 - print (cartProd.__bool__()) + print(cartProd.__bool__()) while cartProd: groups.append(tuple(cartProd.next())) + + # count += 1 # assert count <= cartProd.GetNumPermutations() - self.assertEquals(len(groups), 10*5*6) + self.assertEquals(len(groups), 10 * 5 * 6) # see if we are equal to the Python implementation - g = list(itertools.product( list(range(10)), list(range(5)), list(range(6)) )) + g = list(itertools.product(list(range(10)), list(range(5)), list(range(6)))) self.assertEquals(set(g), set(groups)) copy.copy(cartProd) - + def testRandomSample(self): log("testRandomSample") - rgroups = [[Chem.MolFromSmiles("C")]*10, - [Chem.MolFromSmiles("N")]*5, - [Chem.MolFromSmiles("O")]*6] + rgroups = [[Chem.MolFromSmiles("C")] * 10, [Chem.MolFromSmiles("N")] * 5, + [Chem.MolFromSmiles("O")] * 6] rxn = rdChemReactions.ChemicalReaction() randProd = rdChemReactions.RandomSampleStrategy() randProd.Initialize(rxn, rgroups) - self.assertEquals(randProd.GetNumPermutations(), 10*5*6) + self.assertEquals(randProd.GetNumPermutations(), 10 * 5 * 6) groups = [] - for i in range(10*5*6): + for i in range(10 * 5 * 6): groups.append(tuple(randProd.next())) - print( len(set(groups)), "out of", 10*5*6 ) + print(len(set(groups)), "out of", 10 * 5 * 6) randProd = rdChemReactions.RandomSampleStrategy() randProd.Initialize(rxn, rgroups) - self.assertEquals(randProd.GetNumPermutations(), 10*5*6) + self.assertEquals(randProd.GetNumPermutations(), 10 * 5 * 6) groups = [] for i in range(10): groups.append(tuple(randProd.next())) for i in range(3): - print( i, len(set([g[i] for g in groups])), "out of", [10,5,6][i] ) + print(i, len(set([g[i] for g in groups])), "out of", [10, 5, 6][i]) copy.copy(randProd) - + def testRandomSampleAllBBs(self): log("testRandomSampleAllBBs") rxn = rdChemReactions.ChemicalReaction() - rgroups = [[Chem.MolFromSmiles("C")]*10, - [Chem.MolFromSmiles("N")]*5, - [Chem.MolFromSmiles("O")]*6] + rgroups = [[Chem.MolFromSmiles("C")] * 10, [Chem.MolFromSmiles("N")] * 5, + [Chem.MolFromSmiles("O")] * 6] randProd = rdChemReactions.RandomSampleAllBBsStrategy() randProd.Initialize(rxn, rgroups) - self.assertEquals(randProd.GetNumPermutations(), 10*5*6) + self.assertEquals(randProd.GetNumPermutations(), 10 * 5 * 6) groups = [] - for i in range(10*5*6): + for i in range(10 * 5 * 6): groups.append(tuple(randProd.next())) - print( len(set(groups)), "out of", 10*5*6 ) + print(len(set(groups)), "out of", 10 * 5 * 6) randProd = rdChemReactions.RandomSampleAllBBsStrategy() randProd.Initialize(rxn, rgroups) - self.assertEquals(randProd.GetNumPermutations(), 10*5*6) + self.assertEquals(randProd.GetNumPermutations(), 10 * 5 * 6) groups = [] for i in range(10): groups.append(tuple(randProd.next())) for i in range(3): - print( i, len(set([g[i] for g in groups])), "out of", [10,5,6][i] ) - self.assertEquals(len(set([g[i] for g in groups])), [10,5,6][i]) + print(i, len(set([g[i] for g in groups])), "out of", [10, 5, 6][i]) + self.assertEquals(len(set([g[i] for g in groups])), [10, 5, 6][i]) copy.copy(randProd) - + def testTimings(self): log("testTimings") rxn = rdChemReactions.ChemicalReaction() - rgroups = [[Chem.MolFromSmiles("C")]*17000, - [Chem.MolFromSmiles("N")]*50000, - [Chem.MolFromSmiles("O")]*4000] + rgroups = [[Chem.MolFromSmiles("C")] * 17000, [Chem.MolFromSmiles("N")] * 50000, + [Chem.MolFromSmiles("O")] * 4000] cartProd = rdChemReactions.CartesianProductStrategy() randProd = rdChemReactions.RandomSampleStrategy() randAllBBs = rdChemReactions.RandomSampleAllBBsStrategy() @@ -143,21 +143,20 @@ class TestCase(unittest.TestCase) : t1 = time.time() r.Skip(num) t2 = time.time() - print("%s Skipped %s in %s seconds"%(r, num, t2-t1)) + print("%s Skipped %s in %s seconds" % (r, num, t2 - t1)) def testEvenPairsSampling(self): rxn = rdChemReactions.ChemicalReaction() - - rgroups = [[Chem.MolFromSmiles("C")]*10, - [Chem.MolFromSmiles("N")]*10, - [Chem.MolFromSmiles("O")]*10] + + rgroups = [[Chem.MolFromSmiles("C")] * 10, [Chem.MolFromSmiles("N")] * 10, + [Chem.MolFromSmiles("O")] * 10] rxn = rdChemReactions.ChemicalReaction() count = 0 pairs01 = {} pairs12 = {} pairs02 = {} - + strategy = rdChemReactions.EvenSamplePairsStrategy() strategy.Initialize(rxn, rgroups) # try 100 samples @@ -194,9 +193,9 @@ class TestCase(unittest.TestCase) : count += 1 # each pair should be used roughly 10 times - self.assertTrue( 9 <= np.median(list(pairs01.values())) <= 11) - self.assertTrue( 9 <= np.median(list(pairs02.values())) <= 11) - self.assertTrue( 9 <= np.median(list(pairs12.values())) <= 11) + self.assertTrue(9 <= np.median(list(pairs01.values())) <= 11) + self.assertTrue(9 <= np.median(list(pairs02.values())) <= 11) + self.assertTrue(9 <= np.median(list(pairs12.values())) <= 11) # now try 500 pairs01 = {} @@ -215,25 +214,24 @@ class TestCase(unittest.TestCase) : pairs02[p01] = pairs02.get(p02, 0) + 1 count += 1 - # each pair should be used roughly 5 times - self.assertTrue( 4 <= np.median(list(pairs01.values())) <= 6) - self.assertTrue( 4 <= np.median(list(pairs02.values())) <= 6) - self.assertTrue( 4 <= np.median(list(pairs12.values())) <= 6) - - + # each pair should be used roughly 5 times + self.assertTrue(4 <= np.median(list(pairs01.values())) <= 6) + self.assertTrue(4 <= np.median(list(pairs02.values())) <= 6) + self.assertTrue(4 <= np.median(list(pairs12.values())) <= 6) + self.assertTrue("PAIRSTAT" in strategy.Stats()) def testEnumerateLibrary(self): log("testEnumerateLibrary") smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]" rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea) - reagents = [ - [Chem.MolFromSmiles('C=CCN=C=S'), Chem.MolFromSmiles('CC=CCN=C=S')], - [Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'), - Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'), - Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'), - ] - ] + reagents = [[Chem.MolFromSmiles('C=CCN=C=S'), + Chem.MolFromSmiles('CC=CCN=C=S')], + [ + Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'), + Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'), + Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'), + ]] enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents) self.assertTrue(enumerator) @@ -246,13 +244,12 @@ class TestCase(unittest.TestCase) : for i in range(len(bbs)): for j in range(len(bbs[i])): self.assertTrue(Chem.MolToSmiles(reagents[i][j]) == Chem.MolToSmiles(bbs[i][j])) - - smiresults = ['C=CCNC(=S)NCc1ncc(Cl)cc1Br', - 'CC=CCNC(=S)NCc1ncc(Cl)cc1Br', - 'C=CCNC(=S)NCCc1ncc(Cl)cc1Br', - 'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br', - 'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br', - 'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br'] + + smiresults = [ + 'C=CCNC(=S)NCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCc1ncc(Cl)cc1Br', 'C=CCNC(=S)NCCc1ncc(Cl)cc1Br', + 'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br', + 'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br' + ] results = [Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiresults] enumerators = [enumerator] @@ -262,25 +259,26 @@ class TestCase(unittest.TestCase) : pickle = enumerator.Serialize() enumerator2 = rdChemReactions.EnumerateLibrary() enumerator2.InitFromString(pickle) - + # make sure old pickles work enumerator3 = rdChemReactions.EnumerateLibrary() - enumerator3.InitFromString(open(os.path.join(self.dataDir, "enumeration.pickle"), 'rb').read()) - + enumerator3.InitFromString( + open(os.path.join(self.dataDir, "enumeration.pickle"), 'rb').read()) + print("==", enumerator.GetEnumerator().Type(), enumerator2.GetEnumerator().Type()) self.assertEquals(enumerator.GetEnumerator().Type(), enumerator2.GetEnumerator().Type()) enumerators.append(enumerator2) enumerators.append(enumerator3) # check for fully sampled and deterministic ordering in final index values - expected_positions = [[0, 0],[1, 0],[0, 1],[1, 1],[0, 2],[1, 2]] - + expected_positions = [[0, 0], [1, 0], [0, 1], [1, 1], [0, 2], [1, 2]] + out = [] for en in enumerators: i = 0 positions = [] for i, prods in enumerate(en): - positions.append( list(en.GetPosition()) ) + positions.append(list(en.GetPosition())) for mols in prods: self.assertEquals(len(mols), 1) smi = Chem.MolToSmiles(mols[0]) @@ -293,8 +291,8 @@ class TestCase(unittest.TestCase) : pickle_at_2 = enumerator.Serialize() self.assertEquals(i, 5) self.assertEquals(positions, expected_positions) - - if rdChemReactions.EnumerateLibraryCanSerialize(): + + if rdChemReactions.EnumerateLibraryCanSerialize(): # see if we can restore the enumeration from the middle out3 = [] enumerator3 = rdChemReactions.EnumerateLibrary() @@ -315,28 +313,27 @@ class TestCase(unittest.TestCase) : self.assertEquals(mols[0], results[i]) i += 1 self.assertEquals(i, 6) - + def testRandomEnumerateLibrary(self): log("testRandomEnumerateLibrary") smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]" rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea) - reagents = [ - [Chem.MolFromSmiles('C=CCN=C=S'), Chem.MolFromSmiles('CC=CCN=C=S')], - [Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'), - Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'), - Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'), - ] - ] + reagents = [[Chem.MolFromSmiles('C=CCN=C=S'), + Chem.MolFromSmiles('CC=CCN=C=S')], + [ + Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'), + Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'), + Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'), + ]] enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents, rdChemReactions.RandomSampleStrategy()) self.assertTrue(enumerator) - smiresults = ['C=CCNC(=S)NCc1ncc(Cl)cc1Br', - 'CC=CCNC(=S)NCc1ncc(Cl)cc1Br', - 'C=CCNC(=S)NCCc1ncc(Cl)cc1Br', - 'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br', - 'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br', - 'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br'] + smiresults = [ + 'C=CCNC(=S)NCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCc1ncc(Cl)cc1Br', 'C=CCNC(=S)NCCc1ncc(Cl)cc1Br', + 'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br', + 'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br' + ] results = [Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiresults] enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents, @@ -348,13 +345,13 @@ class TestCase(unittest.TestCase) : count += 1 if count > 100000: print("Unable to find enumerate set with 100,000 random samples!", file=sys.stderr) - self.assertEquals(res,set(results)) + self.assertEquals(res, set(results)) prod = iteren.next() for mols in prod: smi1 = Chem.MolToSmiles(mols[0]) res.add(smi1) - + if rdChemReactions.EnumerateLibraryCanSerialize(): enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents, rdChemReactions.RandomSampleStrategy()) @@ -373,10 +370,10 @@ class TestCase(unittest.TestCase) : prods2 = iteren2.next() self.assertEquals(len(prods1), len(prods2)) for mols1, mols2 in zip(prods1, prods2): - self.assertEquals(len(mols1), 1) - smi1 = Chem.MolToSmiles(mols1[0]) - self.assertEquals(smi1, Chem.MolToSmiles(mols2[0])) - outsmiles.append(smi1) + self.assertEquals(len(mols1), 1) + smi1 = Chem.MolToSmiles(mols1[0]) + self.assertEquals(smi1, Chem.MolToSmiles(mols2[0])) + outsmiles.append(smi1) if i == 1: pickle_at_2 = enumerator.Serialize() @@ -389,24 +386,24 @@ class TestCase(unittest.TestCase) : for i in range(8): prods3 = iteren3.next() for mols3 in prods3: - self.assertEquals(len(mols3), 1) - smi1 = Chem.MolToSmiles(mols3[0]) - self.assertEquals(smi1, Chem.MolToSmiles(mols3[0])) - outsmiles2.append(smi1) + self.assertEquals(len(mols3), 1) + smi1 = Chem.MolToSmiles(mols3[0]) + self.assertEquals(smi1, Chem.MolToSmiles(mols3[0])) + outsmiles2.append(smi1) self.assertEquals(outsmiles2, outsmiles[2:]) - + def testRandomEnumerateAllBBsLibrary(self): log("testRandomEnumerateAllBBsLibrary") smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]" rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea) - reagents = [ - [Chem.MolFromSmiles('C=CCN=C=S'), Chem.MolFromSmiles('CC=CCN=C=S')], - [Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'), - Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'), - Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'), - ] - ] + reagents = [[Chem.MolFromSmiles('C=CCN=C=S'), + Chem.MolFromSmiles('CC=CCN=C=S')], + [ + Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'), + Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'), + Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'), + ]] enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents, rdChemReactions.RandomSampleAllBBsStrategy()) self.assertTrue(enumerator) @@ -415,38 +412,37 @@ class TestCase(unittest.TestCase) : strategy = iter(enumerator) r1 = set() r2 = set() - strategy.next() + strategy.next() groups = strategy.GetPosition() print("**", list(groups), file=sys.stderr) r1.add(groups[0]) r2.add(groups[1]) strategy.next() groups = strategy.GetPosition() - print("**", list(groups),file=sys.stderr) + print("**", list(groups), file=sys.stderr) r1.add(groups[0]) r2.add(groups[1]) - self.assertEquals(r1, set([0,1])) # two bbs at reagent one all sampled at one iteration + self.assertEquals(r1, set([0, 1])) # two bbs at reagent one all sampled at one iteration strategy.next() groups = strategy.GetPosition() - print("**", list(groups),file=sys.stderr) + print("**", list(groups), file=sys.stderr) r1.add(groups[0]) r2.add(groups[1]) - self.assertEquals(r2, set([0,1,2])) # three bbs at reagent one all sampled in three iterations - - smiresults = ['C=CCNC(=S)NCc1ncc(Cl)cc1Br', - 'CC=CCNC(=S)NCc1ncc(Cl)cc1Br', - 'C=CCNC(=S)NCCc1ncc(Cl)cc1Br', - 'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br', - 'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br', - 'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br'] - results = [Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiresults] + self.assertEquals(r2, set([0, 1, + 2])) # three bbs at reagent one all sampled in three iterations + smiresults = [ + 'C=CCNC(=S)NCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCc1ncc(Cl)cc1Br', 'C=CCNC(=S)NCCc1ncc(Cl)cc1Br', + 'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br', + 'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br' + ] + results = [Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiresults] if rdChemReactions.EnumerateLibraryCanSerialize(): enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents, rdChemReactions.RandomSampleAllBBsStrategy()) self.assertTrue(enumerator) - + pickle = enumerator.Serialize() enumerator2 = rdChemReactions.EnumerateLibrary() enumerator2.InitFromString(pickle) @@ -461,10 +457,10 @@ class TestCase(unittest.TestCase) : prods2 = iteren2.next() self.assertEquals(len(prods1), len(prods2)) for mols1, mols2 in zip(prods1, prods2): - self.assertEquals(len(mols1), 1) - smi1 = Chem.MolToSmiles(mols1[0]) - self.assertEquals(smi1, Chem.MolToSmiles(mols2[0])) - outsmiles.append(smi1) + self.assertEquals(len(mols1), 1) + smi1 = Chem.MolToSmiles(mols1[0]) + self.assertEquals(smi1, Chem.MolToSmiles(mols2[0])) + outsmiles.append(smi1) if i == 1: pickle_at_2 = enumerator.Serialize() @@ -479,32 +475,33 @@ class TestCase(unittest.TestCase) : for i in range(8): prods3 = iteren3.next() for mols3 in prods3: - self.assertEquals(len(mols3), 1) - smi1 = Chem.MolToSmiles(mols3[0]) - self.assertEquals(smi1, Chem.MolToSmiles(mols3[0])) - outsmiles2.append(smi1) + self.assertEquals(len(mols3), 1) + smi1 = Chem.MolToSmiles(mols3[0]) + self.assertEquals(smi1, Chem.MolToSmiles(mols3[0])) + outsmiles2.append(smi1) self.assertEquals(outsmiles2, outsmiles[2:]) - def testRGroupState(self): if not rdChemReactions.EnumerateLibraryCanSerialize(): - print("-- Skipping testRGroupState, serialization of EnumerateLibrary not enabled", file=sys.stderr) + print("-- Skipping testRGroupState, serialization of EnumerateLibrary not enabled", + file=sys.stderr) return - + log("testRGroupState") smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]" rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea) - reagents = [ - [Chem.MolFromSmiles('C=CCN=C=S'), Chem.MolFromSmiles('CC=CCN=C=S')], - [Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'), - Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'), - Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'), - ] - ] + reagents = [[Chem.MolFromSmiles('C=CCN=C=S'), + Chem.MolFromSmiles('CC=CCN=C=S')], + [ + Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'), + Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'), + Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'), + ]] def tostr(l): return [[str(x) for x in v] for v in l] + enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents) state = enumerator.GetState() p = enumerator.nextSmiles() @@ -515,7 +512,7 @@ class TestCase(unittest.TestCase) : enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents, rdChemReactions.RandomSampleStrategy()) - + state = enumerator.GetState() p = enumerator.nextSmiles() p2 = enumerator.nextSmiles() @@ -532,43 +529,42 @@ class TestCase(unittest.TestCase) : self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p)) self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p2)) - enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents) - smiresults = ['C=CCNC(=S)NCc1ncc(Cl)cc1Br', - 'CC=CCNC(=S)NCc1ncc(Cl)cc1Br', - 'C=CCNC(=S)NCCc1ncc(Cl)cc1Br', - 'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br', - 'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br', - 'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br'] + smiresults = [ + 'C=CCNC(=S)NCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCc1ncc(Cl)cc1Br', 'C=CCNC(=S)NCCc1ncc(Cl)cc1Br', + 'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br', + 'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br' + ] smiresults = [Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiresults] enumerator.GetEnumerator().Skip(10) enumerator.ResetState() - results = [] + results = [] for result in enumerator: for prodSet in result: for mol in prodSet: - results.append( Chem.MolToSmiles(mol) ) + results.append(Chem.MolToSmiles(mol)) self.assertEquals(results, smiresults) def testRemovingBadMatches(self): log("testRemoveBadMatches") smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]" - + rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea) # invert matches so nothing matches reagents = [ - [Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'), - Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'), - Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'), - ], - - [Chem.MolFromSmiles('C=CCN=C=S'), - Chem.MolFromSmiles('CC=CCN=C=S'), - Chem.MolFromSmiles('CCC'), - Chem.MolFromSmiles('CCCCC'), - ], + [ + Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'), + Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'), + Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'), + ], + [ + Chem.MolFromSmiles('C=CCN=C=S'), + Chem.MolFromSmiles('CC=CCN=C=S'), + Chem.MolFromSmiles('CCC'), + Chem.MolFromSmiles('CCCCC'), + ], ] enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents) @@ -579,25 +575,28 @@ class TestCase(unittest.TestCase) : rxn = AllChem.ReactionFromRxnBlock(rxndata) bbs = [] - r1 = [ Chem.MolFromSmiles("CCNCC"), - Chem.MolFromSmiles("NCC"), - ] - r2 = [ Chem.MolFromSmiles("ClC1CCCC1"), - Chem.MolFromSmiles("ClC1CCCC1Cl"), - ] - r3 = [ Chem.MolFromSmiles("CCNCC"), - Chem.MolFromSmiles("NCC"), - ] + r1 = [ + Chem.MolFromSmiles("CCNCC"), + Chem.MolFromSmiles("NCC"), + ] + r2 = [ + Chem.MolFromSmiles("ClC1CCCC1"), + Chem.MolFromSmiles("ClC1CCCC1Cl"), + ] + r3 = [ + Chem.MolFromSmiles("CCNCC"), + Chem.MolFromSmiles("NCC"), + ] bbs = [r1, r2, r3] # nothing matches! - for i,reagent in enumerate(rxn.GetReactants()): + for i, reagent in enumerate(rxn.GetReactants()): for bb in bbs[i]: self.assertFalse(bb.HasSubstructMatch(reagent)) # everything matches - yay sanitization! rdChemReactions.SanitizeRxn(rxn) - for i,reagent in enumerate(rxn.GetReactants()): + for i, reagent in enumerate(rxn.GetReactants()): for bb in bbs[i]: self.assertTrue(bb.HasSubstructMatch(reagent)) @@ -609,9 +608,9 @@ class TestCase(unittest.TestCase) : ##################################################################################### # Match only at rgroups (ChemDraw style) rxn = AllChem.ReactionFromRxnBlock(rxndata) - expected_matches = [[False,True], [True,True],[False, True] ] + expected_matches = [[False, True], [True, True], [False, True]] rdChemReactions.SanitizeRxn(rxn, params=rdChemReactions.GetChemDrawRxnAdjustParams()) - for i,(reagent, expected) in enumerate(zip(rxn.GetReactants(), expected_matches)): + for i, (reagent, expected) in enumerate(zip(rxn.GetReactants(), expected_matches)): match = [bb.HasSubstructMatch(reagent) for reagent in bbs[i]] self.assertTrue(match, expected) @@ -621,10 +620,9 @@ class TestCase(unittest.TestCase) : self.assertTrue(len(en.GetReagents()[1]) == 2) self.assertTrue(len(en.GetReagents()[2]) == 1) - ##################################################################################### # now set the removal options ot only make one product per reagent set - rxn = AllChem.ReactionFromRxnBlock(rxndata) + rxn = AllChem.ReactionFromRxnBlock(rxndata) rdChemReactions.SanitizeRxn(rxn) opts = rdChemReactions.EnumerationParams() @@ -638,9 +636,7 @@ class TestCase(unittest.TestCase) : # now set the removal options ot only make one product per reagent set # but wt rxn = AllChem.ReactionFromRxnBlock(rxndata) - rdChemReactions.SanitizeRxn(rxn, - params=rdChemReactions.GetChemDrawRxnAdjustParams()) - + rdChemReactions.SanitizeRxn(rxn, params=rdChemReactions.GetChemDrawRxnAdjustParams()) opts = rdChemReactions.EnumerationParams() opts.reagentMaxMatchCount = 1 @@ -648,7 +644,6 @@ class TestCase(unittest.TestCase) : self.assertTrue(len(en.GetReagents()[0]) == 1) self.assertTrue(len(en.GetReagents()[1]) == 1) self.assertTrue(len(en.GetReagents()[2]) == 1) - if __name__ == '__main__': unittest.main() diff --git a/Code/GraphMol/ChemReactions/Wrap/testReactionWrapper.py b/Code/GraphMol/ChemReactions/Wrap/testReactionWrapper.py index bcd9b511c..9f6f36f5e 100644 --- a/Code/GraphMol/ChemReactions/Wrap/testReactionWrapper.py +++ b/Code/GraphMol/ChemReactions/Wrap/testReactionWrapper.py @@ -29,18 +29,15 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # -import importlib.util -import unittest import doctest +import importlib.util import os -import sys import pickle +import sys +import unittest -from rdkit import rdBase -from rdkit import Chem +from rdkit import Chem, Geometry, RDConfig, rdBase from rdkit.Chem import AllChem, rdChemReactions -from rdkit import Geometry -from rdkit import RDConfig from rdkit.Chem.SimpleEnum import Enumerator diff --git a/Code/GraphMol/ChemReactions/Wrap/testSanitize.py b/Code/GraphMol/ChemReactions/Wrap/testSanitize.py index bcb4de165..5b04461fa 100644 --- a/Code/GraphMol/ChemReactions/Wrap/testSanitize.py +++ b/Code/GraphMol/ChemReactions/Wrap/testSanitize.py @@ -1,18 +1,18 @@ # Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc. # All rights reserved. -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are -# met: +# met: # -# * Redistributions of source code must retain the above copyright +# * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided # with the distribution. -# * Neither the name of Novartis Institutes for BioMedical Research Inc. -# nor the names of its contributors may be used to endorse or promote +# * Neither the name of Novartis Institutes for BioMedical Research Inc. +# nor the names of its contributors may be used to endorse or promote # products derived from this software without specific prior written # permission. # @@ -29,19 +29,18 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # - -import unittest -import os,sys +import itertools +import os import pickle +import sys +import time +import unittest -from rdkit import rdBase -from rdkit import Chem -from rdkit.Chem import rdChemReactions, AllChem -from rdkit import Geometry -from rdkit import RDConfig -import itertools, time +from rdkit import Chem, Geometry, RDConfig, rdBase +from rdkit.Chem import AllChem, rdChemReactions -test_data = [("good", '''$RXN +test_data = [ + ("good", '''$RXN ISIS 052820091627 @@ -82,8 +81,7 @@ $MOL 1 2 1 0 0 0 0 M RGP 2 1 1 2 2 M END'''), - -("bad", '''$RXN + ("bad", '''$RXN ISIS 052820091627 @@ -124,8 +122,8 @@ $MOL 1 2 1 0 0 0 0 M RGP 2 1 1 2 2 M END'''), -# chemdraw style -("bad", '''$RXN + # chemdraw style + ("bad", '''$RXN ISIS 052820091627 @@ -163,7 +161,7 @@ $MOL 11.9811 -6.9292 0.0000 R2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 1 0 0 0 0 M END'''), -("fail", '''$RXN + ("fail", '''$RXN ISIS 052820091627 @@ -271,81 +269,80 @@ $MOL M END """ -good_res = (0,0,2,1,(((0, 'halogen.bromine.aromatic'),), ((1, 'boronicacid'),))) -bad_res = (3,0,2,1,(((0, 'halogen.bromine.aromatic'),), ((1, 'boronicacid'),))) +good_res = (0, 0, 2, 1, (((0, 'halogen.bromine.aromatic'), ), ((1, 'boronicacid'), ))) +bad_res = (3, 0, 2, 1, (((0, 'halogen.bromine.aromatic'), ), ((1, 'boronicacid'), ))) -class TestCase(unittest.TestCase) : - def test_sanitize(self): - for status, block in test_data: - print("*"*44) - rxna = AllChem.ReactionFromRxnBlock(block) - rxnb = AllChem.ReactionFromRxnBlock(block) - rxna.Initialize() - res = rdChemReactions.PreprocessReaction(rxna) - print(AllChem.ReactionToRxnBlock(rxna)) - if status == "good": - self.assertEquals(res, good_res) - elif status == "bad": - self.assertEquals(res, bad_res) - print (">"*44) - rxnb.Initialize() - try: - rdChemReactions.SanitizeRxn(rxnb) - res = rdChemReactions.PreprocessReaction(rxnb) - print(AllChem.ReactionToRxnBlock(rxnb)) - self.assertEquals(res, good_res) - assert not status == "fail" - except Exception: - print ("$RXN Failed") - if status == "fail": - continue - raise - - def test_unused_rlabel_in_product(self): - rxn = AllChem.ReactionFromRxnBlock(unused_rlabel_in_product) - # test was for a seg fault - rdChemReactions.SanitizeRxn(rxn) - def test_only_aromatize_if_possible(self): - rxn = AllChem.ReactionFromRxnBlock(kekule_rxn) - # test was for a seg fault - groups = rxn.RunReactants([Chem.MolFromSmiles("c1ccccc1")]) - print(groups) - self.assertFalse(len(groups)) +class TestCase(unittest.TestCase): - # check normal sanitization - rdChemReactions.SanitizeRxn(rxn) - groups = rxn.RunReactants([Chem.MolFromSmiles("c1ccccc1")]) - self.assertTrue(len(groups[0])) + def test_sanitize(self): + for status, block in test_data: + print("*" * 44) + rxna = AllChem.ReactionFromRxnBlock(block) + rxnb = AllChem.ReactionFromRxnBlock(block) + rxna.Initialize() + res = rdChemReactions.PreprocessReaction(rxna) + print(AllChem.ReactionToRxnBlock(rxna)) + if status == "good": + self.assertEquals(res, good_res) + elif status == "bad": + self.assertEquals(res, bad_res) + print(">" * 44) + rxnb.Initialize() + try: + rdChemReactions.SanitizeRxn(rxnb) + res = rdChemReactions.PreprocessReaction(rxnb) + print(AllChem.ReactionToRxnBlock(rxnb)) + self.assertEquals(res, good_res) + assert not status == "fail" + except Exception: + print("$RXN Failed") + if status == "fail": + continue + raise - # now check adjustparams with ONLY aromatize if possible - rxn = AllChem.ReactionFromRxnBlock(kekule_rxn) - rdChemReactions.SanitizeRxn(rxn) - - groups = rxn.RunReactants([Chem.MolFromSmiles("c1ccccc1")]) - self.assertTrue(len(groups[0])) + def test_unused_rlabel_in_product(self): + rxn = AllChem.ReactionFromRxnBlock(unused_rlabel_in_product) + # test was for a seg fault + rdChemReactions.SanitizeRxn(rxn) - def test_github_4162(self): - rxn = rdChemReactions.ReactionFromSmarts( - "[C:1](=[O:2])-[OD1].[N!H0:3]>>[C:1](=[O:2])[N:3]") - rxn_copy = rdChemReactions.ChemicalReaction(rxn) - rdChemReactions.SanitizeRxn(rxn) - rdChemReactions.SanitizeRxn(rxn_copy) - pkl = rxn.ToBinary() - rxn_from_pickle = rdChemReactions.ChemicalReaction(pkl) - rdChemReactions.SanitizeRxn(rxn_from_pickle) - pkl = pickle.dumps(rxn) - rxn_from_pickle = pickle.loads(pkl) - rdChemReactions.SanitizeRxn(rxn_from_pickle) - pkl = rxn_from_pickle.ToBinary() - rxn_from_pickle = rdChemReactions.ChemicalReaction(pkl) - rdChemReactions.SanitizeRxn(rxn_from_pickle) - pkl = pickle.dumps(rxn_from_pickle) - rxn_from_pickle = pickle.loads(pkl) - rdChemReactions.SanitizeRxn(rxn_from_pickle) - + def test_only_aromatize_if_possible(self): + rxn = AllChem.ReactionFromRxnBlock(kekule_rxn) + # test was for a seg fault + groups = rxn.RunReactants([Chem.MolFromSmiles("c1ccccc1")]) + print(groups) + self.assertFalse(len(groups)) + + # check normal sanitization + rdChemReactions.SanitizeRxn(rxn) + groups = rxn.RunReactants([Chem.MolFromSmiles("c1ccccc1")]) + self.assertTrue(len(groups[0])) + + # now check adjustparams with ONLY aromatize if possible + rxn = AllChem.ReactionFromRxnBlock(kekule_rxn) + rdChemReactions.SanitizeRxn(rxn) + + groups = rxn.RunReactants([Chem.MolFromSmiles("c1ccccc1")]) + self.assertTrue(len(groups[0])) + + def test_github_4162(self): + rxn = rdChemReactions.ReactionFromSmarts("[C:1](=[O:2])-[OD1].[N!H0:3]>>[C:1](=[O:2])[N:3]") + rxn_copy = rdChemReactions.ChemicalReaction(rxn) + rdChemReactions.SanitizeRxn(rxn) + rdChemReactions.SanitizeRxn(rxn_copy) + pkl = rxn.ToBinary() + rxn_from_pickle = rdChemReactions.ChemicalReaction(pkl) + rdChemReactions.SanitizeRxn(rxn_from_pickle) + pkl = pickle.dumps(rxn) + rxn_from_pickle = pickle.loads(pkl) + rdChemReactions.SanitizeRxn(rxn_from_pickle) + pkl = rxn_from_pickle.ToBinary() + rxn_from_pickle = rdChemReactions.ChemicalReaction(pkl) + rdChemReactions.SanitizeRxn(rxn_from_pickle) + pkl = pickle.dumps(rxn_from_pickle) + rxn_from_pickle = pickle.loads(pkl) + rdChemReactions.SanitizeRxn(rxn_from_pickle) - if __name__ == '__main__': unittest.main() diff --git a/Code/GraphMol/Depictor/Wrap/testDepictor.py b/Code/GraphMol/Depictor/Wrap/testDepictor.py index d61f33990..5df8ead09 100755 --- a/Code/GraphMol/Depictor/Wrap/testDepictor.py +++ b/Code/GraphMol/Depictor/Wrap/testDepictor.py @@ -2,6 +2,7 @@ import os import sys +import tempfile # # $Id: testDepictor.py 2112 2012-07-02 09:47:45Z glandrum $ # @@ -9,240 +10,248 @@ import sys import unittest import numpy as np -import tempfile -from rdkit import Chem -from rdkit import Geometry -from rdkit import RDConfig -from rdkit.Chem import rdDepictor -from rdkit.Chem import rdMolAlign +from rdkit import Chem, Geometry, RDConfig +from rdkit.Chem import rdDepictor, rdMolAlign from rdkit.Chem.ChemUtils import AlignDepict def feq(v1, v2, tol2=1e-4): - return abs(v1 - v2) <= tol2 + return abs(v1 - v2) <= tol2 def ptEq(pt1, pt2, tol=1e-4): - return feq(pt1.x, pt2.x, tol) and feq(pt1.y, pt2.y, tol) and feq(pt1.z, pt2.z, tol) + return feq(pt1.x, pt2.x, tol) and feq(pt1.y, pt2.y, tol) and feq(pt1.z, pt2.z, tol) def getDistMat(mol): - conf = mol.GetConformer() - nat = mol.GetNumAtoms() - nl = nat * (nat - 1) // 2 - res = np.zeros(nl, float) + conf = mol.GetConformer() + nat = mol.GetNumAtoms() + nl = nat * (nat - 1) // 2 + res = np.zeros(nl, float) - for i in range(1, nat): - pi = conf.GetAtomPosition(i) - idx = i * (i - 1) // 2 - for j in range(i): - pj = conf.GetAtomPosition(j) - pj -= pi - res[idx + j] = pj.Length() + for i in range(1, nat): + pi = conf.GetAtomPosition(i) + idx = i * (i - 1) // 2 + for j in range(i): + pj = conf.GetAtomPosition(j) + pj -= pi + res[idx + j] = pj.Length() - return res + return res def compareCoords(m, molFile): - mo = Chem.MolFromMolFile(molFile) - co = mo.GetConformer() + mo = Chem.MolFromMolFile(molFile) + co = mo.GetConformer() - ci = m.GetConformer() - nat = m.GetNumAtoms() - if (nat != mo.GetNumAtoms()): - return 0 + ci = m.GetConformer() + nat = m.GetNumAtoms() + if (nat != mo.GetNumAtoms()): + return 0 - for i in range(nat): - pos = ci.GetAtomPosition(i) - opos = co.GetAtomPosition(i) - if not ptEq(pos, opos): - print(Chem.MolToMolBlock(m)) - print(Chem.MolToMolBlock(mo)) - return 0 - return 1 + for i in range(nat): + pos = ci.GetAtomPosition(i) + opos = co.GetAtomPosition(i) + if not ptEq(pos, opos): + print(Chem.MolToMolBlock(m)) + print(Chem.MolToMolBlock(mo)) + return 0 + return 1 def compareWithOld(smilesFile, sdFile): - smiSup = Chem.SmilesMolSupplier(smilesFile, ",", 0, -1) - sdsup = Chem.SDMolSupplier(sdFile) - im = 0 - for mol in smiSup: - omol = sdsup[im] - rdDepictor.Compute2DCoords(mol, canonOrient=False) - conf = mol.GetConformer() - oconf = omol.GetConformer() - nat = mol.GetNumAtoms() - for i in range(nat): - pos = conf.GetAtomPosition(i) - opos = oconf.GetAtomPosition(i) - if not ptEq(pos, opos): - print(Chem.MolToMolBlock(omol), file=sys.stderr) - print('> \n%d\n' % i, file=sys.stderr) - print("$$$$", file=sys.stderr) - print(Chem.MolToMolBlock(mol), file=sys.stderr) - print('> \n%d\n' % i, file=sys.stderr) - print("$$$$", file=sys.stderr) - return 0 - im += 1 - return 1 + smiSup = Chem.SmilesMolSupplier(smilesFile, ",", 0, -1) + sdsup = Chem.SDMolSupplier(sdFile) + im = 0 + for mol in smiSup: + omol = sdsup[im] + rdDepictor.Compute2DCoords(mol, canonOrient=False) + conf = mol.GetConformer() + oconf = omol.GetConformer() + nat = mol.GetNumAtoms() + for i in range(nat): + pos = conf.GetAtomPosition(i) + opos = oconf.GetAtomPosition(i) + if not ptEq(pos, opos): + print(Chem.MolToMolBlock(omol), file=sys.stderr) + print('> \n%d\n' % i, file=sys.stderr) + print("$$$$", file=sys.stderr) + print(Chem.MolToMolBlock(mol), file=sys.stderr) + print('> \n%d\n' % i, file=sys.stderr) + print("$$$$", file=sys.stderr) + return 0 + im += 1 + return 1 def stereoCompare(smilesFile): - smiSup = Chem.SmilesMolSupplier(smilesFile, ",", 0, -1) - for mol in smiSup: - rdDepictor.Compute2DCoords(mol, canonOrient=False) - mb = Chem.MolToMolBlock(mol) - nmol = Chem.MolFromMolBlock(mb) - matches = nmol.GetSubstructMatches(mol, False) - dbnds = [x for x in mol.GetBonds() if (x.GetBondType() == Chem.BondType.DOUBLE and - x.GetStereo() > Chem.BondStereo.STEREOANY) ] - ok = True - for match in matches: - for bnd in dbnds: - obnd = nmol.GetBondBetweenAtoms( - match[bnd.GetBeginAtomIdx()], match[bnd.GetEndAtomIdx()]) - assert (obnd.GetBondType() == Chem.BondType.DOUBLE) - if ok: - break - if not ok: - print(Chem.MolToMolBlock(mol), file=sys.stderr) - print("$$$$", file=sys.stderr) - return 0 - return 1 + smiSup = Chem.SmilesMolSupplier(smilesFile, ",", 0, -1) + for mol in smiSup: + rdDepictor.Compute2DCoords(mol, canonOrient=False) + mb = Chem.MolToMolBlock(mol) + nmol = Chem.MolFromMolBlock(mb) + matches = nmol.GetSubstructMatches(mol, False) + dbnds = [ + x for x in mol.GetBonds() + if (x.GetBondType() == Chem.BondType.DOUBLE and x.GetStereo() > Chem.BondStereo.STEREOANY) + ] + ok = True + for match in matches: + for bnd in dbnds: + obnd = nmol.GetBondBetweenAtoms(match[bnd.GetBeginAtomIdx()], match[bnd.GetEndAtomIdx()]) + assert (obnd.GetBondType() == Chem.BondType.DOUBLE) + if ok: + break + if not ok: + print(Chem.MolToMolBlock(mol), file=sys.stderr) + print("$$$$", file=sys.stderr) + return 0 + return 1 class TestCase(unittest.TestCase): - def _test0First200(self): - # this test is disabled because it's not particularly useful and - # causes problems every time anything changes. - fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'Depictor', 'test_data', - 'first_200.tpsa.csv') - #smiSup = Chem.SmilesMolSupplier(fileN, ",", 0, -1) + def _test0First200(self): + # this test is disabled because it's not particularly useful and + # causes problems every time anything changes. + fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'Depictor', 'test_data', + 'first_200.tpsa.csv') + #smiSup = Chem.SmilesMolSupplier(fileN, ",", 0, -1) - ofile = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'Depictor', 'test_data', - 'first_200.python.sdf') - self.assertTrue(compareWithOld(fileN, ofile)) + ofile = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'Depictor', 'test_data', + 'first_200.python.sdf') + self.assertTrue(compareWithOld(fileN, ofile)) - def test1CisTrans(self): - fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'Depictor', 'test_data', - "cis_trans_cases.csv") - self.assertTrue(stereoCompare(fileN)) + def test1CisTrans(self): + fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'Depictor', 'test_data', + "cis_trans_cases.csv") + self.assertTrue(stereoCompare(fileN)) - def test2Coords(self): - m1 = Chem.MolFromSmiles('C1CCC1CC') - coordMap = {0: Geometry.Point2D(0, 0), - 1: Geometry.Point2D(1.5, 0), - 2: Geometry.Point2D(1.5, 1.5), - 3: Geometry.Point2D(0, 1.5)} - rdDepictor.Compute2DCoords(m1, coordMap=coordMap) - conf = m1.GetConformer(0) - for i in range(4): - self.assertTrue( - ptEq(conf.GetAtomPosition(i), Geometry.Point3D(coordMap[i].x, coordMap[i].y, 0.0))) + def test2Coords(self): + m1 = Chem.MolFromSmiles('C1CCC1CC') + coordMap = { + 0: Geometry.Point2D(0, 0), + 1: Geometry.Point2D(1.5, 0), + 2: Geometry.Point2D(1.5, 1.5), + 3: Geometry.Point2D(0, 1.5) + } + rdDepictor.Compute2DCoords(m1, coordMap=coordMap) + conf = m1.GetConformer(0) + for i in range(4): + self.assertTrue( + ptEq(conf.GetAtomPosition(i), Geometry.Point3D(coordMap[i].x, coordMap[i].y, 0.0))) - m1 = Chem.MolFromSmiles('CCC') - try: - rdDepictor.Compute2DCoords(m1, coordMap=coordMap) - ok = 0 - except ValueError: - ok = 1 - self.assertTrue(ok) + m1 = Chem.MolFromSmiles('CCC') + try: + rdDepictor.Compute2DCoords(m1, coordMap=coordMap) + ok = 0 + except ValueError: + ok = 1 + self.assertTrue(ok) - def test3IssueSF1526844(self): - t = Chem.MolFromSmiles('c1nc(N)ccc1') - rdDepictor.Compute2DCoords(t, canonOrient=False) + def test3IssueSF1526844(self): + t = Chem.MolFromSmiles('c1nc(N)ccc1') + rdDepictor.Compute2DCoords(t, canonOrient=False) - m2 = Chem.MolFromSmiles('c1nc(NC=O)ccc1') - AlignDepict.AlignDepict(m2, t) - expected = [Geometry.Point3D(1.5, 0.0, 0.0), Geometry.Point3D(0.75, -1.299, 0.0), - Geometry.Point3D(-0.75, -1.299, 0.0), Geometry.Point3D(-1.5, -2.5981, 0.0), - Geometry.Point3D(-3.0, -2.5981, 0.0), Geometry.Point3D(-3.75, -3.8971, 0.0), - Geometry.Point3D(-1.5, 0.0, 0.0), Geometry.Point3D(-0.75, 1.2990, 0.0), - Geometry.Point3D(0.75, 1.2990, 0.0)] + m2 = Chem.MolFromSmiles('c1nc(NC=O)ccc1') + AlignDepict.AlignDepict(m2, t) + expected = [ + Geometry.Point3D(1.5, 0.0, 0.0), + Geometry.Point3D(0.75, -1.299, 0.0), + Geometry.Point3D(-0.75, -1.299, 0.0), + Geometry.Point3D(-1.5, -2.5981, 0.0), + Geometry.Point3D(-3.0, -2.5981, 0.0), + Geometry.Point3D(-3.75, -3.8971, 0.0), + Geometry.Point3D(-1.5, 0.0, 0.0), + Geometry.Point3D(-0.75, 1.2990, 0.0), + Geometry.Point3D(0.75, 1.2990, 0.0) + ] - nat = m2.GetNumAtoms() - conf = m2.GetConformer() - for i in range(nat): - pos = conf.GetAtomPosition(i) - self.assertTrue(ptEq(pos, expected[i], 0.001)) + nat = m2.GetNumAtoms() + conf = m2.GetConformer() + for i in range(nat): + pos = conf.GetAtomPosition(i) + self.assertTrue(ptEq(pos, expected[i], 0.001)) - def test4SamplingSpread(self): - mol = Chem.MolFromMolFile( - os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', 'test_data/7UPJ_xtal.mol')) + def test4SamplingSpread(self): + mol = Chem.MolFromMolFile( + os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', 'test_data/7UPJ_xtal.mol')) - # default mode - rdDepictor.Compute2DCoords(mol, canonOrient=False) - self.assertTrue( - compareCoords(mol, os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', - 'test_data/7UPJ_default.mol'))) + # default mode + rdDepictor.Compute2DCoords(mol, canonOrient=False) + self.assertTrue( + compareCoords( + mol, os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', + 'test_data/7UPJ_default.mol'))) - # spread the structure as much as possible by sampling - rdDepictor.Compute2DCoords(mol, canonOrient=False, nFlipsPerSample=3, nSample=100, - sampleSeed=100, permuteDeg4Nodes=1) - self.assertTrue( - compareCoords(mol, os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', - 'test_data/7UPJ_spread.mol'))) + # spread the structure as much as possible by sampling + rdDepictor.Compute2DCoords(mol, canonOrient=False, nFlipsPerSample=3, nSample=100, + sampleSeed=100, permuteDeg4Nodes=1) + self.assertTrue( + compareCoords( + mol, os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', + 'test_data/7UPJ_spread.mol'))) - def test5SamplingMimic3D(self): - mol = Chem.MolFromMolFile( - os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', 'test_data/7UPJ_xtal.mol')) - dmat3D = getDistMat(mol) + def test5SamplingMimic3D(self): + mol = Chem.MolFromMolFile( + os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', 'test_data/7UPJ_xtal.mol')) + dmat3D = getDistMat(mol) - # now mimic the coordinate with a very small weight - rdDepictor.Compute2DCoordsMimicDistmat(mol, dmat3D, weightDistMat=0.001) - self.assertTrue( - compareCoords(mol, os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', - 'test_data/7UPJ_mimic3D_1.mol'))) + # now mimic the coordinate with a very small weight + rdDepictor.Compute2DCoordsMimicDistmat(mol, dmat3D, weightDistMat=0.001) + self.assertTrue( + compareCoords( + mol, + os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', 'test_data/7UPJ_mimic3D_1.mol'))) - # now mimic the coordinate with a very small weight - rdDepictor.Compute2DCoordsMimicDistmat(mol, dmat3D, weightDistMat=0.003) - self.assertTrue( - compareCoords(mol, os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', - 'test_data/7UPJ_mimic3D_2.mol'))) + # now mimic the coordinate with a very small weight + rdDepictor.Compute2DCoordsMimicDistmat(mol, dmat3D, weightDistMat=0.003) + self.assertTrue( + compareCoords( + mol, + os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', 'test_data/7UPJ_mimic3D_2.mol'))) - #mb = Chem.MolToMolBlock(mol) - #ofile = open('../test_data/7UPJ_mimic3D_2.mol', 'w') - # ofile.write(mb) - # ofile.close() + #mb = Chem.MolToMolBlock(mol) + #ofile = open('../test_data/7UPJ_mimic3D_2.mol', 'w') + # ofile.write(mb) + # ofile.close() - def test6ChangeBondLength(self): - m = Chem.MolFromSmiles('CC') - rdDepictor.Compute2DCoords(m) - conf = m.GetConformer() - self.assertAlmostEqual(conf.GetAtomPosition(0).x, -0.750, 3) - self.assertAlmostEqual(conf.GetAtomPosition(1).x, 0.750, 3) - rdDepictor.Compute2DCoords(m, bondLength=1.0) - conf = m.GetConformer() - self.assertAlmostEqual(conf.GetAtomPosition(0).x, -0.500, 3) - self.assertAlmostEqual(conf.GetAtomPosition(1).x, 0.500, 3) - rdDepictor.Compute2DCoords(m) - conf = m.GetConformer() - self.assertAlmostEqual(conf.GetAtomPosition(0).x, -0.750, 3) - self.assertAlmostEqual(conf.GetAtomPosition(1).x, 0.750, 3) + def test6ChangeBondLength(self): + m = Chem.MolFromSmiles('CC') + rdDepictor.Compute2DCoords(m) + conf = m.GetConformer() + self.assertAlmostEqual(conf.GetAtomPosition(0).x, -0.750, 3) + self.assertAlmostEqual(conf.GetAtomPosition(1).x, 0.750, 3) + rdDepictor.Compute2DCoords(m, bondLength=1.0) + conf = m.GetConformer() + self.assertAlmostEqual(conf.GetAtomPosition(0).x, -0.500, 3) + self.assertAlmostEqual(conf.GetAtomPosition(1).x, 0.500, 3) + rdDepictor.Compute2DCoords(m) + conf = m.GetConformer() + self.assertAlmostEqual(conf.GetAtomPosition(0).x, -0.750, 3) + self.assertAlmostEqual(conf.GetAtomPosition(1).x, 0.750, 3) - def testConstrainedCoords(self): - templ = Chem.MolFromSmiles('c1nccc2n1ccc2') - rdDepictor.Compute2DCoords(templ) - m1 = Chem.MolFromSmiles('c1cccc2ncn3cccc3c21') - rdDepictor.GenerateDepictionMatching2DStructure(m1, templ) - m2 = Chem.MolFromSmiles('c1cc(Cl)cc2ncn3cccc3c21') - rdDepictor.Compute2DCoords(m2) - refPatt1 = Chem.MolFromSmarts('*1****2*1***2') - rdDepictor.GenerateDepictionMatching2DStructure(m2, templ, -1, refPatt1) - fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'Depictor', 'test_data', - '1XP0_ligand.sdf') + def testConstrainedCoords(self): + templ = Chem.MolFromSmiles('c1nccc2n1ccc2') + rdDepictor.Compute2DCoords(templ) + m1 = Chem.MolFromSmiles('c1cccc2ncn3cccc3c21') + rdDepictor.GenerateDepictionMatching2DStructure(m1, templ) + m2 = Chem.MolFromSmiles('c1cc(Cl)cc2ncn3cccc3c21') + rdDepictor.Compute2DCoords(m2) + refPatt1 = Chem.MolFromSmarts('*1****2*1***2') + rdDepictor.GenerateDepictionMatching2DStructure(m2, templ, -1, refPatt1) + fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'Depictor', 'test_data', + '1XP0_ligand.sdf') - xp0_lig = Chem.MolFromMolFile(fileN) - xp0_lig_2d = Chem.Mol(xp0_lig) - rdDepictor.GenerateDepictionMatching3DStructure(xp0_lig_2d, xp0_lig) - xp0_ref = Chem.MolFromSmarts('[#6]1~[#7][#6]~[#6]2[#6](=[#8])[#7]~[#6](c3ccccc3)[#7][#7]12') - rdDepictor.GenerateDepictionMatching3DStructure(xp0_lig_2d, xp0_lig, -1, xp0_ref) + xp0_lig = Chem.MolFromMolFile(fileN) + xp0_lig_2d = Chem.Mol(xp0_lig) + rdDepictor.GenerateDepictionMatching3DStructure(xp0_lig_2d, xp0_lig) + xp0_ref = Chem.MolFromSmarts('[#6]1~[#7][#6]~[#6]2[#6](=[#8])[#7]~[#6](c3ccccc3)[#7][#7]12') + rdDepictor.GenerateDepictionMatching3DStructure(xp0_lig_2d, xp0_lig, -1, xp0_ref) - - def testGenerate2DDepictionRefPatternAtomMap(self): - indazoleMolblock = """ + def testGenerate2DDepictionRefPatternAtomMap(self): + indazoleMolblock = """ RDKit 2D 9 10 0 0 0 0 0 0 0 0999 V2000 @@ -266,65 +275,67 @@ class TestCase(unittest.TestCase): 7 9 1 0 7 5 1 0 M END""" - indazoleRef = Chem.MolFromMolBlock(indazoleMolblock) - cycloheptylPyrazole = Chem.MolFromSmiles("c1cc(C2CCCCCC2)[nH]n1") + indazoleRef = Chem.MolFromMolBlock(indazoleMolblock) + cycloheptylPyrazole = Chem.MolFromSmiles("c1cc(C2CCCCCC2)[nH]n1") - # test using refPattern - refPatt = Chem.MolFromSmarts("a1aan[nH]1") - rdDepictor.GenerateDepictionMatching2DStructure(cycloheptylPyrazole, indazoleRef, refPatt=refPatt) - self.assertEqual(cycloheptylPyrazole.GetNumConformers(), 1) - molMatchVect = cycloheptylPyrazole.GetSubstructMatch(refPatt) - self.assertEqual(len(molMatchVect), refPatt.GetNumAtoms()) - refMatchVect = indazoleRef.GetSubstructMatch(refPatt) - self.assertEqual(len(refMatchVect), refPatt.GetNumAtoms()) - atomMap = tuple(zip(refMatchVect, molMatchVect)) - msd = 0.0 - for refIdx, molIdx in atomMap: - msd += (indazoleRef.GetConformer().GetAtomPosition(refIdx) - - cycloheptylPyrazole.GetConformer().GetAtomPosition(molIdx)).LengthSq() - msd /= len(molMatchVect) - self.assertAlmostEqual(msd, 0.0) - # try with a pattern larger than the reference molecule - hugePatt = Chem.MolFromSmarts("CCCCCCCCCCCCCCCCCCCCCCCCCCC") - with self.assertRaises(ValueError): - rdDepictor.GenerateDepictionMatching2DStructure( - cycloheptylPyrazole, indazoleRef, refPatt=hugePatt) + # test using refPattern + refPatt = Chem.MolFromSmarts("a1aan[nH]1") + rdDepictor.GenerateDepictionMatching2DStructure(cycloheptylPyrazole, indazoleRef, + refPatt=refPatt) + self.assertEqual(cycloheptylPyrazole.GetNumConformers(), 1) + molMatchVect = cycloheptylPyrazole.GetSubstructMatch(refPatt) + self.assertEqual(len(molMatchVect), refPatt.GetNumAtoms()) + refMatchVect = indazoleRef.GetSubstructMatch(refPatt) + self.assertEqual(len(refMatchVect), refPatt.GetNumAtoms()) + atomMap = tuple(zip(refMatchVect, molMatchVect)) + msd = 0.0 + for refIdx, molIdx in atomMap: + msd += (indazoleRef.GetConformer().GetAtomPosition(refIdx) - + cycloheptylPyrazole.GetConformer().GetAtomPosition(molIdx)).LengthSq() + msd /= len(molMatchVect) + self.assertAlmostEqual(msd, 0.0) + # try with a pattern larger than the reference molecule + hugePatt = Chem.MolFromSmarts("CCCCCCCCCCCCCCCCCCCCCCCCCCC") + with self.assertRaises(ValueError): + rdDepictor.GenerateDepictionMatching2DStructure(cycloheptylPyrazole, indazoleRef, + refPatt=hugePatt) - # try with an out of range confId - with self.assertRaises(ValueError): - rdDepictor.GenerateDepictionMatching2DStructure( - cycloheptylPyrazole, indazoleRef, confId=1, refPatt=refPatt) + # try with an out of range confId + with self.assertRaises(ValueError): + rdDepictor.GenerateDepictionMatching2DStructure(cycloheptylPyrazole, indazoleRef, confId=1, + refPatt=refPatt) - # test using atomMap directly - cycloheptylPyrazole.RemoveAllConformers() - rdDepictor.GenerateDepictionMatching2DStructure(cycloheptylPyrazole, indazoleRef, atomMap=atomMap) - self.assertEqual(cycloheptylPyrazole.GetNumConformers(), 1) - msd = 0.0 - for refIdx, molIdx in atomMap: - msd += (indazoleRef.GetConformer().GetAtomPosition(refIdx) - - cycloheptylPyrazole.GetConformer().GetAtomPosition(molIdx)).LengthSq() - msd /= len(atomMap) - self.assertAlmostEqual(msd, 0.0) + # test using atomMap directly + cycloheptylPyrazole.RemoveAllConformers() + rdDepictor.GenerateDepictionMatching2DStructure(cycloheptylPyrazole, indazoleRef, + atomMap=atomMap) + self.assertEqual(cycloheptylPyrazole.GetNumConformers(), 1) + msd = 0.0 + for refIdx, molIdx in atomMap: + msd += (indazoleRef.GetConformer().GetAtomPosition(refIdx) - + cycloheptylPyrazole.GetConformer().GetAtomPosition(molIdx)).LengthSq() + msd /= len(atomMap) + self.assertAlmostEqual(msd, 0.0) - # try with an atomMap larger than the reference molecule - atomMapHuge = list(atomMap) + [(0, 0) for i in range(indazoleRef.GetNumAtoms())] - with self.assertRaises(ValueError): - rdDepictor.GenerateDepictionMatching2DStructure( - cycloheptylPyrazole, indazoleRef, atomMap=atomMapHuge) + # try with an atomMap larger than the reference molecule + atomMapHuge = list(atomMap) + [(0, 0) for i in range(indazoleRef.GetNumAtoms())] + with self.assertRaises(ValueError): + rdDepictor.GenerateDepictionMatching2DStructure(cycloheptylPyrazole, indazoleRef, + atomMap=atomMapHuge) - # try with an atomMap with out of range indices - atomMapOutOfRange = list(atomMap) + [(100, 100)] - with self.assertRaises(ValueError): - rdDepictor.GenerateDepictionMatching2DStructure( - cycloheptylPyrazole, indazoleRef, atomMap=atomMapOutOfRange) + # try with an atomMap with out of range indices + atomMapOutOfRange = list(atomMap) + [(100, 100)] + with self.assertRaises(ValueError): + rdDepictor.GenerateDepictionMatching2DStructure(cycloheptylPyrazole, indazoleRef, + atomMap=atomMapOutOfRange) - # try with an out of range confId - with self.assertRaises(ValueError): - rdDepictor.GenerateDepictionMatching2DStructure( - cycloheptylPyrazole, indazoleRef, atomMap=atomMap, confId=1) + # try with an out of range confId + with self.assertRaises(ValueError): + rdDepictor.GenerateDepictionMatching2DStructure(cycloheptylPyrazole, indazoleRef, + atomMap=atomMap, confId=1) - def testGenerate2DDepictionAllowRGroups(self): - templateMolblock = """ + def testGenerate2DDepictionAllowRGroups(self): + templateMolblock = """ RDKit 2D 9 9 0 0 0 0 0 0 0 0999 V2000 @@ -348,33 +359,33 @@ M END""" 2 7 1 0 M RGP 3 7 1 8 2 9 3 M END""" - templateRef = Chem.MolFromMolBlock(templateMolblock) - orthoMeta = Chem.MolFromSmiles("c1ccc(-c2ccc(-c3ccccc3)c(-c3ccccc3)c2)cc1") - ortho = Chem.MolFromSmiles("c1ccc(-c2ccccc2-c2ccccc2)cc1") - meta = Chem.MolFromSmiles("c1ccc(-c2cccc(-c3ccccc3)c2)cc1") - biphenyl = Chem.MolFromSmiles("c1ccccc1-c1ccccc1") - phenyl = Chem.MolFromSmiles("c1ccccc1") + templateRef = Chem.MolFromMolBlock(templateMolblock) + orthoMeta = Chem.MolFromSmiles("c1ccc(-c2ccc(-c3ccccc3)c(-c3ccccc3)c2)cc1") + ortho = Chem.MolFromSmiles("c1ccc(-c2ccccc2-c2ccccc2)cc1") + meta = Chem.MolFromSmiles("c1ccc(-c2cccc(-c3ccccc3)c2)cc1") + biphenyl = Chem.MolFromSmiles("c1ccccc1-c1ccccc1") + phenyl = Chem.MolFromSmiles("c1ccccc1") - atomMap = rdDepictor.GenerateDepictionMatching2DStructure(orthoMeta, templateRef) - self.assertEqual(orthoMeta.GetNumConformers(), 1) + atomMap = rdDepictor.GenerateDepictionMatching2DStructure(orthoMeta, templateRef) + self.assertEqual(orthoMeta.GetNumConformers(), 1) - for mol in (ortho, meta, biphenyl, phenyl): - # fails as does not match template - with self.assertRaises(ValueError): - rdDepictor.GenerateDepictionMatching2DStructure(mol, templateRef) + for mol in (ortho, meta, biphenyl, phenyl): + # fails as does not match template + with self.assertRaises(ValueError): + rdDepictor.GenerateDepictionMatching2DStructure(mol, templateRef) - # succeeds with allowRGroups=true - atomMap = rdDepictor.GenerateDepictionMatching2DStructure(mol, templateRef, allowRGroups=True) - self.assertEqual(mol.GetNumConformers(), 1) - msd = 0.0 - for refIdx, molIdx in atomMap: - msd += (templateRef.GetConformer().GetAtomPosition(refIdx) - - mol.GetConformer().GetAtomPosition(molIdx)).LengthSq() - msd /= len(atomMap) - self.assertAlmostEqual(msd, 0.0) + # succeeds with allowRGroups=true + atomMap = rdDepictor.GenerateDepictionMatching2DStructure(mol, templateRef, allowRGroups=True) + self.assertEqual(mol.GetNumConformers(), 1) + msd = 0.0 + for refIdx, molIdx in atomMap: + msd += (templateRef.GetConformer().GetAtomPosition(refIdx) - + mol.GetConformer().GetAtomPosition(molIdx)).LengthSq() + msd /= len(atomMap) + self.assertAlmostEqual(msd, 0.0) - # test that using a refPattern with R groups and a reference without works - pyridineRef = Chem.MolFromMolBlock(""" + # test that using a refPattern with R groups and a reference without works + pyridineRef = Chem.MolFromMolBlock(""" RDKit 2D 6 6 0 0 0 0 0 0 0 0999 V2000 @@ -391,21 +402,21 @@ M END""" 5 6 2 0 6 1 1 0 M END""") - genericRefPatternWithRGroups = Chem.MolFromSmarts("[*:3]a1a([*:1])aa([*:2])aa1") + genericRefPatternWithRGroups = Chem.MolFromSmarts("[*:3]a1a([*:1])aa([*:2])aa1") - for mol in (ortho, meta, biphenyl, phenyl): - atomMap = rdDepictor.GenerateDepictionMatching2DStructure( - mol, pyridineRef, refPatt=genericRefPatternWithRGroups, allowRGroups=True) - self.assertEqual(mol.GetNumConformers(), 1) - msd = 0.0 - for refIdx, molIdx in atomMap: - msd += (pyridineRef.GetConformer().GetAtomPosition(refIdx) - - mol.GetConformer().GetAtomPosition(molIdx)).LengthSq() - msd /= len(atomMap) - self.assertAlmostEqual(msd, 0.0) + for mol in (ortho, meta, biphenyl, phenyl): + atomMap = rdDepictor.GenerateDepictionMatching2DStructure( + mol, pyridineRef, refPatt=genericRefPatternWithRGroups, allowRGroups=True) + self.assertEqual(mol.GetNumConformers(), 1) + msd = 0.0 + for refIdx, molIdx in atomMap: + msd += (pyridineRef.GetConformer().GetAtomPosition(refIdx) - + mol.GetConformer().GetAtomPosition(molIdx)).LengthSq() + msd /= len(atomMap) + self.assertAlmostEqual(msd, 0.0) - def testNormalizeStraighten(self): - noradrenalineMJ = Chem.MolFromMolBlock(""" + def testNormalizeStraighten(self): + noradrenalineMJ = Chem.MolFromMolBlock(""" MJ201100 12 12 0 0 1 0 0 0 0 0999 V2000 @@ -435,338 +446,358 @@ M END""") 5 12 1 0 0 0 0 M END)""") - noradrenalineMJCopy = Chem.Mol(noradrenalineMJ) - conformer0 = noradrenalineMJCopy.GetConformer(0) - conformer1 = Chem.Conformer(conformer0) - noradrenalineMJCopy.AddConformer(conformer1, True) - conformer1 = noradrenalineMJCopy.GetConformer(1) - self.assertLess(rdMolAlign.CalcRMS(noradrenalineMJ, noradrenalineMJCopy, 0, 0), 1.e-5) - self.assertLess(rdMolAlign.CalcRMS(noradrenalineMJ, noradrenalineMJCopy, 0, 1), 1.e-5) - scalingFactor = rdDepictor.NormalizeDepiction(noradrenalineMJCopy, 1) - self.assertLess(rdMolAlign.CalcRMS(noradrenalineMJ, noradrenalineMJCopy, 0, 0), 1.e-5) - self.assertGreater(rdMolAlign.CalcRMS(noradrenalineMJ, noradrenalineMJCopy, 0, 1), 1.e-5) - self.assertAlmostEqual(scalingFactor, 1.875, 3) - conformer2 = Chem.Conformer(conformer1) - noradrenalineMJCopy.AddConformer(conformer2, True) - conformer2 = noradrenalineMJCopy.GetConformer(2) - bond10_11Conf0 = conformer0.GetAtomPosition(11) - conformer0.GetAtomPosition(10) - self.assertAlmostEqual(bond10_11Conf0.x, 0.825, 3) - self.assertAlmostEqual(bond10_11Conf0.y, 0., 3) - bond10_11Conf1 = conformer1.GetAtomPosition(11) - conformer1.GetAtomPosition(10) - self.assertAlmostEqual(bond10_11Conf1.x, 1.513, 3) - self.assertAlmostEqual(bond10_11Conf1.y, -0.321, 3) - rdDepictor.StraightenDepiction(noradrenalineMJCopy, 1) - bond10_11Conf1 = conformer1.GetAtomPosition(11) - conformer1.GetAtomPosition(10) - self.assertAlmostEqual(bond10_11Conf1.x, 1.340, 3) - self.assertAlmostEqual(bond10_11Conf1.y, -0.773, 3) - bond4_11Conf1 = conformer1.GetAtomPosition(11) - conformer1.GetAtomPosition(4) - self.assertAlmostEqual(bond4_11Conf1.x, 0., 3) - self.assertAlmostEqual(bond4_11Conf1.y, 1.547, 3) - rdDepictor.StraightenDepiction(noradrenalineMJCopy, 2, True) - bond10_11Conf2 = conformer2.GetAtomPosition(11) - conformer2.GetAtomPosition(10) - self.assertAlmostEqual(bond10_11Conf2.x, 1.547, 3) - self.assertAlmostEqual(bond10_11Conf2.y, 0.0, 3) - bond4_11Conf2 = conformer2.GetAtomPosition(11) - conformer2.GetAtomPosition(4) - self.assertAlmostEqual(bond4_11Conf2.x, -0.773, 3) - self.assertAlmostEqual(bond4_11Conf2.y, 1.339, 3) + noradrenalineMJCopy = Chem.Mol(noradrenalineMJ) + conformer0 = noradrenalineMJCopy.GetConformer(0) + conformer1 = Chem.Conformer(conformer0) + noradrenalineMJCopy.AddConformer(conformer1, True) + conformer1 = noradrenalineMJCopy.GetConformer(1) + self.assertLess(rdMolAlign.CalcRMS(noradrenalineMJ, noradrenalineMJCopy, 0, 0), 1.e-5) + self.assertLess(rdMolAlign.CalcRMS(noradrenalineMJ, noradrenalineMJCopy, 0, 1), 1.e-5) + scalingFactor = rdDepictor.NormalizeDepiction(noradrenalineMJCopy, 1) + self.assertLess(rdMolAlign.CalcRMS(noradrenalineMJ, noradrenalineMJCopy, 0, 0), 1.e-5) + self.assertGreater(rdMolAlign.CalcRMS(noradrenalineMJ, noradrenalineMJCopy, 0, 1), 1.e-5) + self.assertAlmostEqual(scalingFactor, 1.875, 3) + conformer2 = Chem.Conformer(conformer1) + noradrenalineMJCopy.AddConformer(conformer2, True) + conformer2 = noradrenalineMJCopy.GetConformer(2) + bond10_11Conf0 = conformer0.GetAtomPosition(11) - conformer0.GetAtomPosition(10) + self.assertAlmostEqual(bond10_11Conf0.x, 0.825, 3) + self.assertAlmostEqual(bond10_11Conf0.y, 0., 3) + bond10_11Conf1 = conformer1.GetAtomPosition(11) - conformer1.GetAtomPosition(10) + self.assertAlmostEqual(bond10_11Conf1.x, 1.513, 3) + self.assertAlmostEqual(bond10_11Conf1.y, -0.321, 3) + rdDepictor.StraightenDepiction(noradrenalineMJCopy, 1) + bond10_11Conf1 = conformer1.GetAtomPosition(11) - conformer1.GetAtomPosition(10) + self.assertAlmostEqual(bond10_11Conf1.x, 1.340, 3) + self.assertAlmostEqual(bond10_11Conf1.y, -0.773, 3) + bond4_11Conf1 = conformer1.GetAtomPosition(11) - conformer1.GetAtomPosition(4) + self.assertAlmostEqual(bond4_11Conf1.x, 0., 3) + self.assertAlmostEqual(bond4_11Conf1.y, 1.547, 3) + rdDepictor.StraightenDepiction(noradrenalineMJCopy, 2, True) + bond10_11Conf2 = conformer2.GetAtomPosition(11) - conformer2.GetAtomPosition(10) + self.assertAlmostEqual(bond10_11Conf2.x, 1.547, 3) + self.assertAlmostEqual(bond10_11Conf2.y, 0.0, 3) + bond4_11Conf2 = conformer2.GetAtomPosition(11) - conformer2.GetAtomPosition(4) + self.assertAlmostEqual(bond4_11Conf2.x, -0.773, 3) + self.assertAlmostEqual(bond4_11Conf2.y, 1.339, 3) - noradrenalineMJCopy = Chem.Mol(noradrenalineMJ) - conformer0 = noradrenalineMJCopy.GetConformer(0) - conformer1 = Chem.Conformer(conformer0) - noradrenalineMJCopy.AddConformer(conformer1, True) - conformer1 = noradrenalineMJCopy.GetConformer(1) - scalingFactor = rdDepictor.NormalizeDepiction(noradrenalineMJCopy, 1, -1) - self.assertLess(rdMolAlign.CalcRMS(noradrenalineMJ, noradrenalineMJCopy, 0, 0), 1.e-5) - self.assertGreater(rdMolAlign.CalcRMS(noradrenalineMJ, noradrenalineMJCopy, 0, 1), 1.e-5) - self.assertAlmostEqual(scalingFactor, 1.875, 3) - conformer2 = Chem.Conformer(conformer1) - noradrenalineMJCopy.AddConformer(conformer2, True) - conformer2 = noradrenalineMJCopy.GetConformer(2) - bond10_11Conf0 = conformer0.GetAtomPosition(11) - conformer0.GetAtomPosition(10) - self.assertAlmostEqual(bond10_11Conf0.x, 0.825, 3) - self.assertAlmostEqual(bond10_11Conf0.y, 0., 3) - bond10_11Conf1 = conformer1.GetAtomPosition(11) - conformer1.GetAtomPosition(10) - self.assertAlmostEqual(bond10_11Conf1.x, 0.321, 3) - self.assertAlmostEqual(bond10_11Conf1.y, 1.513, 3) - rdDepictor.StraightenDepiction(noradrenalineMJCopy, 1) - bond10_11Conf1 = conformer1.GetAtomPosition(11) - conformer1.GetAtomPosition(10) - self.assertAlmostEqual(bond10_11Conf1.x, 0.0, 3) - self.assertAlmostEqual(bond10_11Conf1.y, 1.547, 3) - rdDepictor.StraightenDepiction(noradrenalineMJCopy, 2, True) - bond10_11Conf2 = conformer2.GetAtomPosition(11) - conformer2.GetAtomPosition(10) - self.assertAlmostEqual(bond10_11Conf2.x, bond10_11Conf1.x, 3) - self.assertAlmostEqual(bond10_11Conf2.y, bond10_11Conf1.y, 3) + noradrenalineMJCopy = Chem.Mol(noradrenalineMJ) + conformer0 = noradrenalineMJCopy.GetConformer(0) + conformer1 = Chem.Conformer(conformer0) + noradrenalineMJCopy.AddConformer(conformer1, True) + conformer1 = noradrenalineMJCopy.GetConformer(1) + scalingFactor = rdDepictor.NormalizeDepiction(noradrenalineMJCopy, 1, -1) + self.assertLess(rdMolAlign.CalcRMS(noradrenalineMJ, noradrenalineMJCopy, 0, 0), 1.e-5) + self.assertGreater(rdMolAlign.CalcRMS(noradrenalineMJ, noradrenalineMJCopy, 0, 1), 1.e-5) + self.assertAlmostEqual(scalingFactor, 1.875, 3) + conformer2 = Chem.Conformer(conformer1) + noradrenalineMJCopy.AddConformer(conformer2, True) + conformer2 = noradrenalineMJCopy.GetConformer(2) + bond10_11Conf0 = conformer0.GetAtomPosition(11) - conformer0.GetAtomPosition(10) + self.assertAlmostEqual(bond10_11Conf0.x, 0.825, 3) + self.assertAlmostEqual(bond10_11Conf0.y, 0., 3) + bond10_11Conf1 = conformer1.GetAtomPosition(11) - conformer1.GetAtomPosition(10) + self.assertAlmostEqual(bond10_11Conf1.x, 0.321, 3) + self.assertAlmostEqual(bond10_11Conf1.y, 1.513, 3) + rdDepictor.StraightenDepiction(noradrenalineMJCopy, 1) + bond10_11Conf1 = conformer1.GetAtomPosition(11) - conformer1.GetAtomPosition(10) + self.assertAlmostEqual(bond10_11Conf1.x, 0.0, 3) + self.assertAlmostEqual(bond10_11Conf1.y, 1.547, 3) + rdDepictor.StraightenDepiction(noradrenalineMJCopy, 2, True) + bond10_11Conf2 = conformer2.GetAtomPosition(11) - conformer2.GetAtomPosition(10) + self.assertAlmostEqual(bond10_11Conf2.x, bond10_11Conf1.x, 3) + self.assertAlmostEqual(bond10_11Conf2.y, bond10_11Conf1.y, 3) - noradrenalineMJCopy = Chem.Mol(noradrenalineMJ) - conformer0 = noradrenalineMJCopy.GetConformer(0) - conformer1 = Chem.Conformer(conformer0) - noradrenalineMJCopy.AddConformer(conformer1, True) - conformer1 = noradrenalineMJCopy.GetConformer(1) - scalingFactor = rdDepictor.NormalizeDepiction(noradrenalineMJCopy, 1, 0, 3.0) - self.assertLess(rdMolAlign.CalcRMS(noradrenalineMJ, noradrenalineMJCopy, 0, 0), 1.e-5) - self.assertGreater(rdMolAlign.CalcRMS(noradrenalineMJ, noradrenalineMJCopy, 0, 1), 1.e-5) - self.assertAlmostEqual(scalingFactor, 3.0, 3) - conformer2 = Chem.Conformer(conformer1) - noradrenalineMJCopy.AddConformer(conformer2, True) - conformer2 = noradrenalineMJCopy.GetConformer(2) - conformer3 = Chem.Conformer(conformer1) - noradrenalineMJCopy.AddConformer(conformer3, True) - conformer3 = noradrenalineMJCopy.GetConformer(3) - bond10_11Conf0 = conformer0.GetAtomPosition(11) - conformer0.GetAtomPosition(10) - self.assertAlmostEqual(bond10_11Conf0.x, 0.825, 3) - self.assertAlmostEqual(bond10_11Conf0.y, 0., 3) - bond10_11Conf1 = conformer1.GetAtomPosition(11) - conformer1.GetAtomPosition(10) - self.assertAlmostEqual(bond10_11Conf1.x, 2.475, 3) - self.assertAlmostEqual(bond10_11Conf1.y, 0., 3) - rdDepictor.StraightenDepiction(noradrenalineMJCopy, 1) - bond10_11Conf1 = conformer1.GetAtomPosition(11) - conformer1.GetAtomPosition(10) - self.assertAlmostEqual(bond10_11Conf1.x, 2.143, 3) - self.assertAlmostEqual(bond10_11Conf1.y, -1.237, 3) - bond4_11Conf1 = conformer1.GetAtomPosition(11) - conformer1.GetAtomPosition(4) - self.assertAlmostEqual(bond4_11Conf1.x, 0., 3) - self.assertAlmostEqual(bond4_11Conf1.y, 2.475, 3) - rdDepictor.StraightenDepiction(noradrenalineMJCopy, 2, True) - bond10_11Conf2 = conformer2.GetAtomPosition(11) - conformer2.GetAtomPosition(10) - bond10_11Conf3 = conformer3.GetAtomPosition(11) - conformer3.GetAtomPosition(10) - self.assertAlmostEqual(bond10_11Conf2.x, bond10_11Conf3.x, 3) - self.assertAlmostEqual(bond10_11Conf2.y, bond10_11Conf3.y, 3) - bond4_11Conf2 = conformer2.GetAtomPosition(11) - conformer2.GetAtomPosition(4) - bond4_11Conf3 = conformer3.GetAtomPosition(11) - conformer3.GetAtomPosition(4) - self.assertAlmostEqual(bond4_11Conf2.x, bond4_11Conf3.x, 3) - self.assertAlmostEqual(bond4_11Conf2.y, bond4_11Conf3.y, 3) + noradrenalineMJCopy = Chem.Mol(noradrenalineMJ) + conformer0 = noradrenalineMJCopy.GetConformer(0) + conformer1 = Chem.Conformer(conformer0) + noradrenalineMJCopy.AddConformer(conformer1, True) + conformer1 = noradrenalineMJCopy.GetConformer(1) + scalingFactor = rdDepictor.NormalizeDepiction(noradrenalineMJCopy, 1, 0, 3.0) + self.assertLess(rdMolAlign.CalcRMS(noradrenalineMJ, noradrenalineMJCopy, 0, 0), 1.e-5) + self.assertGreater(rdMolAlign.CalcRMS(noradrenalineMJ, noradrenalineMJCopy, 0, 1), 1.e-5) + self.assertAlmostEqual(scalingFactor, 3.0, 3) + conformer2 = Chem.Conformer(conformer1) + noradrenalineMJCopy.AddConformer(conformer2, True) + conformer2 = noradrenalineMJCopy.GetConformer(2) + conformer3 = Chem.Conformer(conformer1) + noradrenalineMJCopy.AddConformer(conformer3, True) + conformer3 = noradrenalineMJCopy.GetConformer(3) + bond10_11Conf0 = conformer0.GetAtomPosition(11) - conformer0.GetAtomPosition(10) + self.assertAlmostEqual(bond10_11Conf0.x, 0.825, 3) + self.assertAlmostEqual(bond10_11Conf0.y, 0., 3) + bond10_11Conf1 = conformer1.GetAtomPosition(11) - conformer1.GetAtomPosition(10) + self.assertAlmostEqual(bond10_11Conf1.x, 2.475, 3) + self.assertAlmostEqual(bond10_11Conf1.y, 0., 3) + rdDepictor.StraightenDepiction(noradrenalineMJCopy, 1) + bond10_11Conf1 = conformer1.GetAtomPosition(11) - conformer1.GetAtomPosition(10) + self.assertAlmostEqual(bond10_11Conf1.x, 2.143, 3) + self.assertAlmostEqual(bond10_11Conf1.y, -1.237, 3) + bond4_11Conf1 = conformer1.GetAtomPosition(11) - conformer1.GetAtomPosition(4) + self.assertAlmostEqual(bond4_11Conf1.x, 0., 3) + self.assertAlmostEqual(bond4_11Conf1.y, 2.475, 3) + rdDepictor.StraightenDepiction(noradrenalineMJCopy, 2, True) + bond10_11Conf2 = conformer2.GetAtomPosition(11) - conformer2.GetAtomPosition(10) + bond10_11Conf3 = conformer3.GetAtomPosition(11) - conformer3.GetAtomPosition(10) + self.assertAlmostEqual(bond10_11Conf2.x, bond10_11Conf3.x, 3) + self.assertAlmostEqual(bond10_11Conf2.y, bond10_11Conf3.y, 3) + bond4_11Conf2 = conformer2.GetAtomPosition(11) - conformer2.GetAtomPosition(4) + bond4_11Conf3 = conformer3.GetAtomPosition(11) - conformer3.GetAtomPosition(4) + self.assertAlmostEqual(bond4_11Conf2.x, bond4_11Conf3.x, 3) + self.assertAlmostEqual(bond4_11Conf2.y, bond4_11Conf3.y, 3) - @unittest.skipIf(not rdDepictor.IsCoordGenSupportAvailable(), "CoordGen not available, skipping") - def testUsingCoordGenCtxtMgr(self): - default_status = rdDepictor.GetPreferCoordGen() + @unittest.skipIf(not rdDepictor.IsCoordGenSupportAvailable(), "CoordGen not available, skipping") + def testUsingCoordGenCtxtMgr(self): + default_status = rdDepictor.GetPreferCoordGen() - # This is the default; we shouldn't have changed it - self.assertEqual(default_status, False) + # This is the default; we shouldn't have changed it + self.assertEqual(default_status, False) - with rdDepictor.UsingCoordGen(True): - current_status = rdDepictor.GetPreferCoordGen() - self.assertEqual(current_status, True) + with rdDepictor.UsingCoordGen(True): + current_status = rdDepictor.GetPreferCoordGen() + self.assertEqual(current_status, True) - current_status = rdDepictor.GetPreferCoordGen() - self.assertEqual(current_status, False) + current_status = rdDepictor.GetPreferCoordGen() + self.assertEqual(current_status, False) - rdDepictor.SetPreferCoordGen(True) + rdDepictor.SetPreferCoordGen(True) - with rdDepictor.UsingCoordGen(False): - current_status = rdDepictor.GetPreferCoordGen() - self.assertEqual(current_status, False) + with rdDepictor.UsingCoordGen(False): + current_status = rdDepictor.GetPreferCoordGen() + self.assertEqual(current_status, False) - current_status = rdDepictor.GetPreferCoordGen() - self.assertEqual(current_status, True) + current_status = rdDepictor.GetPreferCoordGen() + self.assertEqual(current_status, True) - rdDepictor.SetPreferCoordGen(default_status) + rdDepictor.SetPreferCoordGen(default_status) + def molMatchesTemplate(self, mol, template): + """ + Determines if the shape/layout of the template and mol are the same. It + is ok if the mol and template are not centered at the same place, or if + the mol and template have different orientations. + """ + match = mol.GetSubstructMatch(template) + if not match or len(match) != template.GetNumAtoms(): + return False - def molMatchesTemplate(self, mol, template): - """ - Determines if the shape/layout of the template and mol are the same. It - is ok if the mol and template are not centered at the same place, or if - the mol and template have different orientations. - """ - match = mol.GetSubstructMatch(template) - if not match or len(match) != template.GetNumAtoms(): - return False + # get positions of atoms with centroid at origin, it is ok if the + # template or mol is not centered + template_match_positions = [ + mol.GetConformer().GetPositions()[mol_at_idx] for mol_at_idx in match + ] + template_match_center = sum(template_match_positions) / len(template_match_positions) + mol_positions = [p - template_match_center for p in mol.GetConformer().GetPositions()] - # get positions of atoms with centroid at origin, it is ok if the - # template or mol is not centered - template_match_positions = [mol.GetConformer().GetPositions()[mol_at_idx] for mol_at_idx in match] - template_match_center = sum(template_match_positions) / len(template_match_positions) - mol_positions = [p - template_match_center for p in mol.GetConformer().GetPositions()] + template_center = sum(template.GetConformer().GetPositions()) / template.GetNumAtoms() + template_positions = [p - template_center for p in template.GetConformer().GetPositions()] - template_center = sum(template.GetConformer().GetPositions()) / template.GetNumAtoms() - template_positions = [p - template_center for p in template.GetConformer().GetPositions()] + # the mol may match the template but be slightly rotated about the centroid + # or reflected across the x or y axis + rotations = [[], [], [], []] + for template_idx, idx in enumerate(match): + v1 = mol_positions[idx] - # the mol may match the template but be slightly rotated about the centroid - # or reflected across the x or y axis - rotations = [[], [], [], []] - for template_idx, idx in enumerate(match): - v1 = mol_positions[idx] + # no reflection + v2 = template_positions[template_idx] + val = round(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)), 4) + rotations[0].append(np.arccos(val)) - # no reflection - v2 = template_positions[template_idx] - val = round(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)), 4) - rotations[0].append(np.arccos(val)) + # reflect across x-axis + v2[0] = v2[0] * -1 + val = round(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)), 4) + rotations[1].append(np.arccos(val)) - # reflect across x-axis - v2[0] = v2[0] * -1 - val = round(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)), 4) - rotations[1].append(np.arccos(val)) + # reflect across y-axis + v2[0] = v2[0] * -1 + v2[1] = v2[1] * -1 + val = round(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)), 4) + rotations[2].append(np.arccos(val)) - # reflect across y-axis - v2[0] = v2[0] * -1 - v2[1] = v2[1] * -1 - val = round(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)), 4) - rotations[2].append(np.arccos(val)) + # reflect across y-axis and x-acis + v2[0] = v2[0] * -1 + # v2[1] = v2[1] * -1 + val = round(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)), 4) + rotations[3].append(np.arccos(val)) - - # reflect across y-axis and x-acis - v2[0] = v2[0] * -1 - # v2[1] = v2[1] * -1 - val = round(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)), 4) - rotations[3].append(np.arccos(val)) - - # if all the rotations are similar, then the shape is the same - return np.any([np.allclose(r, r[0], atol=.05) for r in rotations]) + # if all the rotations are similar, then the shape is the same + return np.any([np.allclose(r, r[0], atol=.05) for r in rotations]) def assertMolMatchesCoordMap(self, mol, coord_map): - for aid, expected_position in coord_map.items(): - actual_position = mol.GetConformer().GetAtomPosition(aid) - self.assertAlmostEqual(actual_position.x, expected_position.x) - self.assertAlmostEqual(actual_position.y, actual_position.y) + for aid, expected_position in coord_map.items(): + actual_position = mol.GetConformer().GetAtomPosition(aid) + self.assertAlmostEqual(actual_position.x, expected_position.x) + self.assertAlmostEqual(actual_position.y, actual_position.y) def testUseMultipleTemplates(self): - with rdDepictor.UsingCoordGen(False): - # templates that will be linked together - template1 = Chem.MolFromSmiles("C1=CCCC2CCCCC2CCCCC2CCCC(CCCCCCC1)C2 |(-0.04,3.43,;-0.04,1.93,;-1.34,1.18,;-2.64,1.93,;-3.94,1.18,;-5.24,1.93,;-6.54,1.18,;-6.54,-0.32,;-5.24,-1.07,;-3.94,-0.32,;-2.64,-1.07,;-2.64,-2.57,;-1.34,-3.32,;-0.04,-2.57,;1.26,-3.32,;1.26,-4.82,;2.56,-5.56,;3.86,-4.82,;3.86,-3.32,;5.16,-2.57,;5.16,-1.07,;3.86,-0.32,;3.86,1.18,;2.56,1.93,;2.56,3.43,;1.26,4.18,;2.56,-2.57,)|") - template2 = Chem.MolFromSmiles("C1CCC2C(C1)C1CCN2NN1 |(-2.94,-0.77,;-2.94,0.77,;-1.6,1.54,;-0.27,0.77,;-0.27,-0.77,;-1.6,-1.54,;1.06,-1.54,;2.4,-0.77,;2.4,0.77,;1.06,1.54,;1.33,0.51,;1.33,-0.51,)|") - template3 = Chem.MolFromSmiles("C1C2CC3CC1CC3C2 |(-7.01,3.13,;-7.71,4.35,;-7.01,5.56,;-5.61,5.56,;-4.91,4.35,;-5.61,3.13,;-4.28,3.57,;-4.28,5.13,;-6.34,4.05,)|") + with rdDepictor.UsingCoordGen(False): + # templates that will be linked together + template1 = Chem.MolFromSmiles( + "C1=CCCC2CCCCC2CCCCC2CCCC(CCCCCCC1)C2 |(-0.04,3.43,;-0.04,1.93,;-1.34,1.18,;-2.64,1.93,;-3.94,1.18,;-5.24,1.93,;-6.54,1.18,;-6.54,-0.32,;-5.24,-1.07,;-3.94,-0.32,;-2.64,-1.07,;-2.64,-2.57,;-1.34,-3.32,;-0.04,-2.57,;1.26,-3.32,;1.26,-4.82,;2.56,-5.56,;3.86,-4.82,;3.86,-3.32,;5.16,-2.57,;5.16,-1.07,;3.86,-0.32,;3.86,1.18,;2.56,1.93,;2.56,3.43,;1.26,4.18,;2.56,-2.57,)|" + ) + template2 = Chem.MolFromSmiles( + "C1CCC2C(C1)C1CCN2NN1 |(-2.94,-0.77,;-2.94,0.77,;-1.6,1.54,;-0.27,0.77,;-0.27,-0.77,;-1.6,-1.54,;1.06,-1.54,;2.4,-0.77,;2.4,0.77,;1.06,1.54,;1.33,0.51,;1.33,-0.51,)|" + ) + template3 = Chem.MolFromSmiles( + "C1C2CC3CC1CC3C2 |(-7.01,3.13,;-7.71,4.35,;-7.01,5.56,;-5.61,5.56,;-4.91,4.35,;-5.61,3.13,;-4.28,3.57,;-4.28,5.13,;-6.34,4.05,)|" + ) - # example with 2 templates linked together - two_linked_templates = Chem.MolFromSmiles("NC(CCC1CCC2C(C1)C1CCN2NN1)CC(=O)CCC1=CCCC2CCCCC2CCCCC2CCCC(CCCCCCC1)C2") - rdDepictor.Compute2DCoords(two_linked_templates,useRingTemplates=False) - assert not self.molMatchesTemplate(two_linked_templates, template1) - assert not self.molMatchesTemplate(two_linked_templates, template2) + # example with 2 templates linked together + two_linked_templates = Chem.MolFromSmiles( + "NC(CCC1CCC2C(C1)C1CCN2NN1)CC(=O)CCC1=CCCC2CCCCC2CCCCC2CCCC(CCCCCCC1)C2") + rdDepictor.Compute2DCoords(two_linked_templates, useRingTemplates=False) + assert not self.molMatchesTemplate(two_linked_templates, template1) + assert not self.molMatchesTemplate(two_linked_templates, template2) - rdDepictor.Compute2DCoords(two_linked_templates,useRingTemplates=True) - assert self.molMatchesTemplate(two_linked_templates, template1) - assert self.molMatchesTemplate(two_linked_templates, template2) + rdDepictor.Compute2DCoords(two_linked_templates, useRingTemplates=True) + assert self.molMatchesTemplate(two_linked_templates, template1) + assert self.molMatchesTemplate(two_linked_templates, template2) - # example with 3 templates linked together - three_linked_templates = Chem.MolFromSmiles("NC(CCC1CCC2C(C1)C1CC(CCC(=O)CC(N)CC~C3C4CC5CC3CC5C4)N2NN1)CC(=O)CCC1=CCCC2CCCCC2CCCCC2CCCC(CCCCCCC1)C2") - rdDepictor.Compute2DCoords(three_linked_templates,useRingTemplates=False) - assert not self.molMatchesTemplate(three_linked_templates, template1) - assert not self.molMatchesTemplate(three_linked_templates, template2) - assert not self.molMatchesTemplate(three_linked_templates, template3) - - rdDepictor.Compute2DCoords(three_linked_templates,useRingTemplates=True) - assert self.molMatchesTemplate(three_linked_templates, template1) - assert self.molMatchesTemplate(three_linked_templates, template2) - assert self.molMatchesTemplate(three_linked_templates, template3) + # example with 3 templates linked together + three_linked_templates = Chem.MolFromSmiles( + "NC(CCC1CCC2C(C1)C1CC(CCC(=O)CC(N)CC~C3C4CC5CC3CC5C4)N2NN1)CC(=O)CCC1=CCCC2CCCCC2CCCCC2CCCC(CCCCCCC1)C2" + ) + rdDepictor.Compute2DCoords(three_linked_templates, useRingTemplates=False) + assert not self.molMatchesTemplate(three_linked_templates, template1) + assert not self.molMatchesTemplate(three_linked_templates, template2) + assert not self.molMatchesTemplate(three_linked_templates, template3) + rdDepictor.Compute2DCoords(three_linked_templates, useRingTemplates=True) + assert self.molMatchesTemplate(three_linked_templates, template1) + assert self.molMatchesTemplate(three_linked_templates, template2) + assert self.molMatchesTemplate(three_linked_templates, template3) def testUseTemplateAndCoordMap(self): - with rdDepictor.UsingCoordGen(False): - template1 = Chem.MolFromSmiles("C1=CCCC2CCCCC2CCCCC2CCCC(CCCCCCC1)C2 |(-0.04,3.43,;-0.04,1.93,;-1.34,1.18,;-2.64,1.93,;-3.94,1.18,;-5.24,1.93,;-6.54,1.18,;-6.54,-0.32,;-5.24,-1.07,;-3.94,-0.32,;-2.64,-1.07,;-2.64,-2.57,;-1.34,-3.32,;-0.04,-2.57,;1.26,-3.32,;1.26,-4.82,;2.56,-5.56,;3.86,-4.82,;3.86,-3.32,;5.16,-2.57,;5.16,-1.07,;3.86,-0.32,;3.86,1.18,;2.56,1.93,;2.56,3.43,;1.26,4.18,;2.56,-2.57,)|") - template2 = Chem.MolFromSmiles("C1CCC2C(C1)C1CCN2NN1 |(-2.94,-0.77,;-2.94,0.77,;-1.6,1.54,;-0.27,0.77,;-0.27,-0.77,;-1.6,-1.54,;1.06,-1.54,;2.4,-0.77,;2.4,0.77,;1.06,1.54,;1.33,0.51,;1.33,-0.51,)|") - two_linked_templates = Chem.MolFromSmiles("NC(CCC1CCC2C(C1)C1CCN2NN1)CC(=O)CCC1=CCCC2CCCCC2CCCCC2CCCC(CCCCCCC1)C2") + with rdDepictor.UsingCoordGen(False): + template1 = Chem.MolFromSmiles( + "C1=CCCC2CCCCC2CCCCC2CCCC(CCCCCCC1)C2 |(-0.04,3.43,;-0.04,1.93,;-1.34,1.18,;-2.64,1.93,;-3.94,1.18,;-5.24,1.93,;-6.54,1.18,;-6.54,-0.32,;-5.24,-1.07,;-3.94,-0.32,;-2.64,-1.07,;-2.64,-2.57,;-1.34,-3.32,;-0.04,-2.57,;1.26,-3.32,;1.26,-4.82,;2.56,-5.56,;3.86,-4.82,;3.86,-3.32,;5.16,-2.57,;5.16,-1.07,;3.86,-0.32,;3.86,1.18,;2.56,1.93,;2.56,3.43,;1.26,4.18,;2.56,-2.57,)|" + ) + template2 = Chem.MolFromSmiles( + "C1CCC2C(C1)C1CCN2NN1 |(-2.94,-0.77,;-2.94,0.77,;-1.6,1.54,;-0.27,0.77,;-0.27,-0.77,;-1.6,-1.54,;1.06,-1.54,;2.4,-0.77,;2.4,0.77,;1.06,1.54,;1.33,0.51,;1.33,-0.51,)|" + ) + two_linked_templates = Chem.MolFromSmiles( + "NC(CCC1CCC2C(C1)C1CCN2NN1)CC(=O)CCC1=CCCC2CCCCC2CCCCC2CCCC(CCCCCCC1)C2") - # when a coord map doesn't contain any part of a ring system, ring system - # templates should still be adhered to - linker_coord_map = { - 16: Geometry.Point2D(1.5, 0), - 17: Geometry.Point2D(1.5, 1.5), - 19: Geometry.Point2D(0, 1.5) - } - rdDepictor.Compute2DCoords(two_linked_templates, coordMap=linker_coord_map, useRingTemplates=True) - self.assertMolMatchesCoordMap(two_linked_templates, linker_coord_map) - assert self.molMatchesTemplate(two_linked_templates, template1) - assert self.molMatchesTemplate(two_linked_templates, template2) + # when a coord map doesn't contain any part of a ring system, ring system + # templates should still be adhered to + linker_coord_map = { + 16: Geometry.Point2D(1.5, 0), + 17: Geometry.Point2D(1.5, 1.5), + 19: Geometry.Point2D(0, 1.5) + } + rdDepictor.Compute2DCoords(two_linked_templates, coordMap=linker_coord_map, + useRingTemplates=True) + self.assertMolMatchesCoordMap(two_linked_templates, linker_coord_map) + assert self.molMatchesTemplate(two_linked_templates, template1) + assert self.molMatchesTemplate(two_linked_templates, template2) + # when a coord map contains a partial ring system, ring system templates + # should not be used because they could be distorted by the user-provided + # templates + ring_system_coord_map = { + 31: Geometry.Point2D(1.5, 0), + 32: Geometry.Point2D(1.5, 1.5), + 33: Geometry.Point2D(0, 1.5) + } + rdDepictor.Compute2DCoords(two_linked_templates, coordMap=ring_system_coord_map, + useRingTemplates=True) + self.assertMolMatchesCoordMap(two_linked_templates, ring_system_coord_map) + # atoms 10, 11, and 13 are in this template so the ring template should not be used + assert not self.molMatchesTemplate(two_linked_templates, template1) + assert self.molMatchesTemplate(two_linked_templates, template2) - # when a coord map contains a partial ring system, ring system templates - # should not be used because they could be distorted by the user-provided - # templates - ring_system_coord_map = { - 31: Geometry.Point2D(1.5, 0), - 32: Geometry.Point2D(1.5, 1.5), - 33: Geometry.Point2D(0, 1.5) - } - rdDepictor.Compute2DCoords(two_linked_templates, coordMap=ring_system_coord_map, useRingTemplates=True) - self.assertMolMatchesCoordMap(two_linked_templates, ring_system_coord_map) - # atoms 10, 11, and 13 are in this template so the ring template should not be used - assert not self.molMatchesTemplate(two_linked_templates, template1) - assert self.molMatchesTemplate(two_linked_templates, template2) - - # when a coord map contains a single atom, even if it is a part of a ring - # system, ring system templates should be used and the coord map should be - # followed - single_atom_coord_map = { - 10: Geometry.Point2D(0, 0) - } - rdDepictor.Compute2DCoords(two_linked_templates, coordMap=single_atom_coord_map, useRingTemplates=True) - self.assertMolMatchesCoordMap(two_linked_templates, single_atom_coord_map) - assert self.molMatchesTemplate(two_linked_templates, template1) - assert self.molMatchesTemplate(two_linked_templates, template2) - + # when a coord map contains a single atom, even if it is a part of a ring + # system, ring system templates should be used and the coord map should be + # followed + single_atom_coord_map = {10: Geometry.Point2D(0, 0)} + rdDepictor.Compute2DCoords(two_linked_templates, coordMap=single_atom_coord_map, + useRingTemplates=True) + self.assertMolMatchesCoordMap(two_linked_templates, single_atom_coord_map) + assert self.molMatchesTemplate(two_linked_templates, template1) + assert self.molMatchesTemplate(two_linked_templates, template2) def testSetRingSystemTemplates(self): - with rdDepictor.UsingCoordGen(False): - mol = Chem.MolFromSmiles("C1CC2CCOC3OC4CCC(C1)C23OO4") - default_template = Chem.MolFromSmiles("C1CC2CCOC3OC4CCC(C1)C23OO4 |(3.53,-1.22,;3.53,0.3,;2.21,1.06,;2.21,2.59,;0.89,3.35,;-0.43,2.59,;-0.43,1.06,;-1.9,0.65,;-2.47,-0.76,;-1.71,-2.08,;-0.2,-2.29,;0.89,-1.22,;2.21,-1.99,;0.89,0.3,;0.12,-0.83,;-1.19,-1.25,)|") - user_provided_template = Chem.MolFromSmiles("C1CC2CCOC3OC4CCC(C1)C23OO4 |(-0.5537,-3.1595,;-1.6057,-2.003,;-1.4262,-0.4072,;-2.9804,0.0271,;-3.5191,1.502,;-2.2028,2.3562,;-0.6818,1.8511,;1.0592,1.4391,;2.6123,1.8366,;3.5191,0.5341,;2.6067,-0.7521,;1.0061,-0.773,;0.7888,-2.3546,;-0.0405,0.5251,;0.4049,2.3,;1.7604,3.1594,)|") + with rdDepictor.UsingCoordGen(False): + mol = Chem.MolFromSmiles("C1CC2CCOC3OC4CCC(C1)C23OO4") + default_template = Chem.MolFromSmiles( + "C1CC2CCOC3OC4CCC(C1)C23OO4 |(3.53,-1.22,;3.53,0.3,;2.21,1.06,;2.21,2.59,;0.89,3.35,;-0.43,2.59,;-0.43,1.06,;-1.9,0.65,;-2.47,-0.76,;-1.71,-2.08,;-0.2,-2.29,;0.89,-1.22,;2.21,-1.99,;0.89,0.3,;0.12,-0.83,;-1.19,-1.25,)|" + ) + user_provided_template = Chem.MolFromSmiles( + "C1CC2CCOC3OC4CCC(C1)C23OO4 |(-0.5537,-3.1595,;-1.6057,-2.003,;-1.4262,-0.4072,;-2.9804,0.0271,;-3.5191,1.502,;-2.2028,2.3562,;-0.6818,1.8511,;1.0592,1.4391,;2.6123,1.8366,;3.5191,0.5341,;2.6067,-0.7521,;1.0061,-0.773,;0.7888,-2.3546,;-0.0405,0.5251,;0.4049,2.3,;1.7604,3.1594,)|" + ) - # default templates are loaded automatically - rdDepictor.Compute2DCoords(mol, useRingTemplates=True) - assert self.molMatchesTemplate(mol, default_template) + # default templates are loaded automatically + rdDepictor.Compute2DCoords(mol, useRingTemplates=True) + assert self.molMatchesTemplate(mol, default_template) - # set to user-provided template, this will delete default templates - fpath = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'Depictor', 'test_data', 'ring_system_templates.smi') - rdDepictor.SetRingSystemTemplates(fpath) - rdDepictor.Compute2DCoords(mol, useRingTemplates=True) - assert self.molMatchesTemplate(mol, user_provided_template) + # set to user-provided template, this will delete default templates + fpath = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'Depictor', 'test_data', + 'ring_system_templates.smi') + rdDepictor.SetRingSystemTemplates(fpath) + rdDepictor.Compute2DCoords(mol, useRingTemplates=True) + assert self.molMatchesTemplate(mol, user_provided_template) - # set back to default ring system templates - rdDepictor.LoadDefaultRingSystemTemplates() - rdDepictor.Compute2DCoords(mol, useRingTemplates=True) - assert self.molMatchesTemplate(mol, default_template) + # set back to default ring system templates + rdDepictor.LoadDefaultRingSystemTemplates() + rdDepictor.Compute2DCoords(mol, useRingTemplates=True) + assert self.molMatchesTemplate(mol, default_template) def testSetBadRingSystemTemplates(self): - with tempfile.NamedTemporaryFile(delete=False) as tmp_file: - tmp_file.write(b"invalidsmiles") - tmp_file.seek(0) - with self.assertRaises(ValueError): - rdDepictor.SetRingSystemTemplates(tmp_file.name) + with tempfile.NamedTemporaryFile(delete=False) as tmp_file: + tmp_file.write(b"invalidsmiles") + tmp_file.seek(0) + with self.assertRaises(ValueError): + rdDepictor.SetRingSystemTemplates(tmp_file.name) - with tempfile.NamedTemporaryFile(delete=False) as tmp_file: - # not a ring system - tmp_file.write(b"C |(-0.5537,-3.1595)|") - tmp_file.seek(0) - with self.assertRaises(ValueError): - rdDepictor.SetRingSystemTemplates(tmp_file.name) + with tempfile.NamedTemporaryFile(delete=False) as tmp_file: + # not a ring system + tmp_file.write(b"C |(-0.5537,-3.1595)|") + tmp_file.seek(0) + with self.assertRaises(ValueError): + rdDepictor.SetRingSystemTemplates(tmp_file.name) - with tempfile.NamedTemporaryFile(delete=False) as tmp_file: - # no coordinates - tmp_file.write(b"C1CCCCC1") - tmp_file.seek(0) - with self.assertRaises(ValueError): - rdDepictor.SetRingSystemTemplates(tmp_file.name) + with tempfile.NamedTemporaryFile(delete=False) as tmp_file: + # no coordinates + tmp_file.write(b"C1CCCCC1") + tmp_file.seek(0) + with self.assertRaises(ValueError): + rdDepictor.SetRingSystemTemplates(tmp_file.name) - with tempfile.NamedTemporaryFile(delete=False) as tmp_file: - # bridged ring system - tmp_file.write(b"c1ccccc1-c1ccccc1") - tmp_file.seek(0) - with self.assertRaises(ValueError): - rdDepictor.SetRingSystemTemplates(tmp_file.name) + with tempfile.NamedTemporaryFile(delete=False) as tmp_file: + # bridged ring system + tmp_file.write(b"c1ccccc1-c1ccccc1") + tmp_file.seek(0) + with self.assertRaises(ValueError): + rdDepictor.SetRingSystemTemplates(tmp_file.name) - # set back to default ring system templates - rdDepictor.LoadDefaultRingSystemTemplates() + # set back to default ring system templates + rdDepictor.LoadDefaultRingSystemTemplates() def testAddRingSystemTemplates(self): - with rdDepictor.UsingCoordGen(False): - # this is what is in the default ring system templates - mol = Chem.MolFromSmiles("C1CCCN2CCCC(CC1)C2") - default_template = Chem.MolFromSmiles("C1CCCN2CCCC(CC1)C2 |(2.64,1.53,;3.1,0.11,;2.52,-1.28,;1.17,-1.94,;-0.28,-1.57,;-1.7,-2.03,;-2.82,-1.03,;-2.51,0.44,;-1.08,0.9,;-0.13,2.06,;1.35,2.31,;0.03,-0.1,)|") - new_template_smi = "C1CCCN2CCCC(CC1)C2 |(3.2585,-0.7733,;3.2585,0.7733,;2.225,1.9568,;0.6347,2.17,;-0.8068,1.4216,;-2.4372,1.3464,;-3.2585,0,;-2.4372,-1.3464,;-0.8068,-1.4216,;0.6347,-2.17,;2.225,-1.9569,;-0.6701,0,)|" - new_template = Chem.MolFromSmiles(new_template_smi) + with rdDepictor.UsingCoordGen(False): + # this is what is in the default ring system templates + mol = Chem.MolFromSmiles("C1CCCN2CCCC(CC1)C2") + default_template = Chem.MolFromSmiles( + "C1CCCN2CCCC(CC1)C2 |(2.64,1.53,;3.1,0.11,;2.52,-1.28,;1.17,-1.94,;-0.28,-1.57,;-1.7,-2.03,;-2.82,-1.03,;-2.51,0.44,;-1.08,0.9,;-0.13,2.06,;1.35,2.31,;0.03,-0.1,)|" + ) + new_template_smi = "C1CCCN2CCCC(CC1)C2 |(3.2585,-0.7733,;3.2585,0.7733,;2.225,1.9568,;0.6347,2.17,;-0.8068,1.4216,;-2.4372,1.3464,;-3.2585,0,;-2.4372,-1.3464,;-0.8068,-1.4216,;0.6347,-2.17,;2.225,-1.9569,;-0.6701,0,)|" + new_template = Chem.MolFromSmiles(new_template_smi) - rdDepictor.Compute2DCoords(mol, useRingTemplates=True) - assert self.molMatchesTemplate(mol, default_template) + rdDepictor.Compute2DCoords(mol, useRingTemplates=True) + assert self.molMatchesTemplate(mol, default_template) - with tempfile.NamedTemporaryFile(delete=False) as tmp_file: - tmp_file.write(new_template_smi.encode('utf-8')) - tmp_file.seek(0) - rdDepictor.AddRingSystemTemplates(tmp_file.name) + with tempfile.NamedTemporaryFile(delete=False) as tmp_file: + tmp_file.write(new_template_smi.encode('utf-8')) + tmp_file.seek(0) + rdDepictor.AddRingSystemTemplates(tmp_file.name) - # now the new template should be used - rdDepictor.Compute2DCoords(mol, useRingTemplates=True) - assert self.molMatchesTemplate(mol, new_template) + # now the new template should be used + rdDepictor.Compute2DCoords(mol, useRingTemplates=True) + assert self.molMatchesTemplate(mol, new_template) + + # set back to default ring system templates + rdDepictor.LoadDefaultRingSystemTemplates() - # set back to default ring system templates - rdDepictor.LoadDefaultRingSystemTemplates() if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/Code/GraphMol/Deprotect/Wrap/rough_test.py b/Code/GraphMol/Deprotect/Wrap/rough_test.py index 5a4c2be97..76125d8e8 100644 --- a/Code/GraphMol/Deprotect/Wrap/rough_test.py +++ b/Code/GraphMol/Deprotect/Wrap/rough_test.py @@ -1,4 +1,5 @@ import unittest + from rdkit import Chem from rdkit.Chem import rdDeprotect as rd diff --git a/Code/GraphMol/Descriptors/Wrap/test3D.py b/Code/GraphMol/Descriptors/Wrap/test3D.py index 5d3b44f7c..8f3d4d497 100644 --- a/Code/GraphMol/Descriptors/Wrap/test3D.py +++ b/Code/GraphMol/Descriptors/Wrap/test3D.py @@ -1,14 +1,13 @@ -from rdkit import Chem -from rdkit import rdBase -from rdkit import RDConfig import os -from rdkit.Chem import rdMolDescriptors as rdMD +from rdkit import Chem, RDConfig, rdBase from rdkit.Chem import AllChem +from rdkit.Chem import rdMolDescriptors as rdMD haveDescrs3D = hasattr(rdMD, 'CalcAUTOCORR3D') -import time, unittest +import time +import unittest def _gen3D(m, is3d, calculator): diff --git a/Code/GraphMol/Descriptors/Wrap/testMolDescriptors.py b/Code/GraphMol/Descriptors/Wrap/testMolDescriptors.py index 65300fc9c..d17957490 100644 --- a/Code/GraphMol/Descriptors/Wrap/testMolDescriptors.py +++ b/Code/GraphMol/Descriptors/Wrap/testMolDescriptors.py @@ -1,16 +1,15 @@ # $Id$ # -from rdkit import Chem -from rdkit.Chem import rdMolDescriptors as rdMD, Descriptors -from rdkit.Chem import AllChem -from rdkit import DataStructs -from rdkit import RDConfig -from rdkit.Geometry import rdGeometry as rdG import unittest from os import environ from pathlib import Path +from rdkit import Chem, DataStructs, RDConfig +from rdkit.Chem import AllChem, Descriptors +from rdkit.Chem import rdMolDescriptors as rdMD +from rdkit.Geometry import rdGeometry as rdG + haveBCUT = hasattr(rdMD, 'BCUT2D') diff --git a/Code/GraphMol/Descriptors/test3D.py b/Code/GraphMol/Descriptors/test3D.py index 94236f1d5..6754ada8c 100644 --- a/Code/GraphMol/Descriptors/test3D.py +++ b/Code/GraphMol/Descriptors/test3D.py @@ -1,52 +1,54 @@ -from rdkit import Chem -from rdkit import rdBase -from rdkit import RDConfig import os - -from rdkit.Chem import rdMolDescriptors as rdMD -from rdkit.Chem import AllChem - import time -def get3D(m,is3d): - if not is3d: - m = Chem.AddHs(m) - # define the new code from RDKit Molecule 3D ETKDG. - ps = AllChem.ETKDG() - ps.randomSeed = 0xf00d - AllChem.EmbedMolecule(m,ps) - r= rdMD.CalcAUTOCORR3D(m)+rdMD.CalcRDF(m)+rdMD.CalcMORSE(m)+rdMD.CalcWHIM(m)+rdMD.CalcGETAWAY(m, precision=0.001) - return r +from rdkit import Chem, RDConfig, rdBase +from rdkit.Chem import AllChem +from rdkit.Chem import rdMolDescriptors as rdMD + + +def get3D(m, is3d): + if not is3d: + m = Chem.AddHs(m) + # define the new code from RDKit Molecule 3D ETKDG. + ps = AllChem.ETKDG() + ps.randomSeed = 0xf00d + AllChem.EmbedMolecule(m, ps) + r = rdMD.CalcAUTOCORR3D(m) + rdMD.CalcRDF(m) + rdMD.CalcMORSE(m) + rdMD.CalcWHIM( + m) + rdMD.CalcGETAWAY(m, precision=0.001) + return r + def generateAll(): - filename='/Users/GVALMTGG/Github/rdkit_mine/Code/GraphMol/Descriptors/test_data/PBF_egfr.sdf' - suppl = Chem.SDMolSupplier(filename,removeHs=False) - mols = [x for x in suppl] - start = time.time() - for m in mols: - r= get3D(m,True) - print(r) - end = time.time() - print end-start + filename = '/Users/GVALMTGG/Github/rdkit_mine/Code/GraphMol/Descriptors/test_data/PBF_egfr.sdf' + suppl = Chem.SDMolSupplier(filename, removeHs=False) + mols = [x for x in suppl] + start = time.time() + for m in mols: + r = get3D(m, True) + print(r) + end = time.time() + print end - start + def simple_case(): - start = time.time() - smi = 'CCC(C)COCCCC' - m = Chem.MolFromSmiles(smi) - T = get3D(m,False) - print T - end = time.time() - print end-start + start = time.time() + smi = 'CCC(C)COCCCC' + m = Chem.MolFromSmiles(smi) + T = get3D(m, False) + print T + end = time.time() + print end - start -if(__name__=='__main__'): - # FIX: We need to actually add some tests here, but this doees not need to - # to be done until the C++ code and tests are straightened out. - generateAll() - start = time.time() - smi = 'CCC(C)COCCCC' - m = Chem.MolFromSmiles(smi) - T = get3D(m,False) - print T - end = time.time() - print end-start +if (__name__ == '__main__'): + # FIX: We need to actually add some tests here, but this doees not need to + # to be done until the C++ code and tests are straightened out. + generateAll() + + start = time.time() + smi = 'CCC(C)COCCCC' + m = Chem.MolFromSmiles(smi) + T = get3D(m, False) + print T + end = time.time() + print end - start diff --git a/Code/GraphMol/Descriptors/test3D_old.py b/Code/GraphMol/Descriptors/test3D_old.py index 7d6fd3619..40395b8ac 100644 --- a/Code/GraphMol/Descriptors/test3D_old.py +++ b/Code/GraphMol/Descriptors/test3D_old.py @@ -1,96 +1,113 @@ -from rdkit import Chem -from rdkit import rdBase - -from rdkit.Chem import rdMolDescriptors as rdMD -from rdkit.Chem import AllChem -from rdkit.Chem.EState import EStateIndices -from rdkit.Chem.EState import AtomTypes - import time + +from rdkit import Chem, rdBase +from rdkit.Chem import AllChem +from rdkit.Chem import rdMolDescriptors as rdMD +from rdkit.Chem.EState import AtomTypes, EStateIndices + print rdBase.rdkitVersion print rdBase.boostVersion + def getEState(mol): - return EStateIndices(mol) + return EStateIndices(mol) -def localopt(mol, steps = 500): - if mol.GetNumConformers() == 0: - mol=make3D(mol) - AllChem.MMFFOptimizeMolecule(mol, maxIters = steps) - return mol -def make3D(mol, steps = 50): - mol = Chem.AddHs(mol) - success = AllChem.EmbedMolecule(mol) - if success == -1: # Failed - success = AllChem.EmbedMolecule(mol, useRandomCoords = True) - if success == -1: - raise Error, "Embedding failed!" - mol = localopt(mol, steps) - return mol +def localopt(mol, steps=500): + if mol.GetNumConformers() == 0: + mol = make3D(mol) + AllChem.MMFFOptimizeMolecule(mol, maxIters=steps) + return mol + + +def make3D(mol, steps=50): + mol = Chem.AddHs(mol) + success = AllChem.EmbedMolecule(mol) + if success == -1: # Failed + success = AllChem.EmbedMolecule(mol, useRandomCoords=True) + if success == -1: + raise Error, "Embedding failed!" + mol = localopt(mol, steps) + return mol + + +def get3D(m, is3d): + if not is3d: + m = Chem.AddHs(m) + AllChem.EmbedMolecule(m) + AllChem.MMFFOptimizeMolecule(m) + r = rdMD.CalcAUTOCORR3D(m) + rdMD.CalcRDF(m) + rdMD.CalcMORSE(m) + rdMD.CalcWHIM( + m) + rdMD.CalcGETAWAY(m) + return r -def get3D(m,is3d): - if not is3d: - m = Chem.AddHs(m) - AllChem.EmbedMolecule(m) - AllChem.MMFFOptimizeMolecule(m) - r= rdMD.CalcAUTOCORR3D(m)+rdMD.CalcRDF(m)+rdMD.CalcMORSE(m)+rdMD.CalcWHIM(m)+rdMD.CalcGETAWAY(m) - return r def generateALL(): - m = Chem.MolFromSmiles('Cc1ccccc1') - thefile = open('testAC.txt', 'w') - filename="/Users/mbp/Github/rdkit_mine/Code/GraphMol/Descriptors/test_data/PBF_egfr.sdf" - suppl = Chem.SDMolSupplier(filename,removeHs=False) - mols = [x for x in suppl] - start = time.time() - for m in mols: - r= get3D(m,True) - for item in r: - thefile.write("%.3f," % item) - thefile.write("\n") + m = Chem.MolFromSmiles('Cc1ccccc1') + thefile = open('testAC.txt', 'w') + filename = "/Users/mbp/Github/rdkit_mine/Code/GraphMol/Descriptors/test_data/PBF_egfr.sdf" + suppl = Chem.SDMolSupplier(filename, removeHs=False) + mols = [x for x in suppl] + start = time.time() + for m in mols: + r = get3D(m, True) + for item in r: + thefile.write("%.3f," % item) + thefile.write("\n") + + end = time.time() + print end - start - end = time.time() - print end - start thefile = open('testSMWHIM.txt', 'w') writer = Chem.SDWriter('3Dsmallmol.sdf') -A=['[H][H]','B','O=O','C','CC','CCC','CCCC','CCCCC','CCCCCC','CO','CCO','CCCO','CCCCO','CCCCCO','CCCCCCO','CCl','CCCl','CCCCl','CCCCCl','CCCCCCl','CCCCCCCl','CBr','CCBr','CCCBr','CCCCBr','CCCCCBr','CCCCCCBr','CI','CCI','CCCI','CCCCI','CCCCCI','CCCCCCI','CF','CCF','CCCF','CCCCF','CCCCCF','CCCCCCF','CS','CCS','CCCS','CCCCS','CCCCCS','CCCCCCS','CN','CCN','CCCN','CCCCN','CCCCCN','CCCCCCN'] +A = [ + '[H][H]', 'B', 'O=O', 'C', 'CC', 'CCC', 'CCCC', 'CCCCC', 'CCCCCC', 'CO', 'CCO', 'CCCO', 'CCCCO', + 'CCCCCO', 'CCCCCCO', 'CCl', 'CCCl', 'CCCCl', 'CCCCCl', 'CCCCCCl', 'CCCCCCCl', 'CBr', 'CCBr', + 'CCCBr', 'CCCCBr', 'CCCCCBr', 'CCCCCCBr', 'CI', 'CCI', 'CCCI', 'CCCCI', 'CCCCCI', 'CCCCCCI', 'CF', + 'CCF', 'CCCF', 'CCCCF', 'CCCCCF', 'CCCCCCF', 'CS', 'CCS', 'CCCS', 'CCCCS', 'CCCCCS', 'CCCCCCS', + 'CN', 'CCN', 'CCCN', 'CCCCN', 'CCCCCN', 'CCCCCCN' +] for smi in A: - m = Chem.MolFromSmiles(smi) - m=localopt(m,100) - #r=get3D(m,True) - print smi - print "---------" - r=rdMD.CalcWHIM(m) - print "Ei:"+str(r[0])+ "," + str(r[1]) + "," + str(r[2])+ "\n" - print "Gi:"+str(r[5])+ "," + str(r[6]) + "," + str(r[7])+ "\n" - print "SI:"+str(rdMD.CalcSpherocityIndex(m)) - print "AS:"+str(rdMD.CalcAsphericity(m)) - print "EX:"+str(rdMD.CalcEccentricity(m)) - for item in r: - thefile.write("%.3f," % item) - thefile.write("\n") - #m.SetProp("smi", smi) - #writer.write(m) - + m = Chem.MolFromSmiles(smi) + m = localopt(m, 100) + #r=get3D(m,True) + print smi + print "---------" + r = rdMD.CalcWHIM(m) + print "Ei:" + str(r[0]) + "," + str(r[1]) + "," + str(r[2]) + "\n" + print "Gi:" + str(r[5]) + "," + str(r[6]) + "," + str(r[7]) + "\n" + print "SI:" + str(rdMD.CalcSpherocityIndex(m)) + print "AS:" + str(rdMD.CalcAsphericity(m)) + print "EX:" + str(rdMD.CalcEccentricity(m)) + for item in r: + thefile.write("%.3f," % item) + thefile.write("\n") + #m.SetProp("smi", smi) + #writer.write(m) thefile = open('testBPA.txt', 'w') writer = Chem.SDWriter('3DBPAmol.sdf') -B=['CN(C)CC(Br)c1ccccc1','CN(C)CC(Br)c1ccc(F)cc1','CN(C)CC(Br)c1ccc(Cl)cc1','CN(C)CC(Br)c1ccc(Cl)cc1','CN(C)CC(Br)c1ccc(I)cc1','CN(C)CC(Br)c1ccc(C)cc1','CN(C)CC(Br)c1cccc(F)c1','CN(C)CC(Br)c1cccc(Cl)c1','CN(C)CC(Br)c1cccc(Br)c1','CN(C)CC(Br)c1cccc(I)c1','CN(C)CC(Br)c1cccc(C)c1','CN(C)CC(Br)c1ccc(F)c(Cl)c1','CN(C)CC(Br)c1ccc(F)c(Br)c1','CN(C)CC(Br)c1ccc(F)c(C)c1','CN(C)CC(Br)c1ccc(Cl)c(Cl)c1','CN(C)CC(Br)c1ccc(Cl)c(Br)c1','CN(C)CC(Br)c1ccc(Cl)c(C)c1','CN(C)CC(Br)c1ccc(Br)c(Cl)c1','CN(C)CC(Br)c1ccc(Br)c(Br)c1','CN(C)CC(Br)c1ccc(Br)c(C)c1','CN(C)CC(Br)c1ccc(C)c(C)c1','CN(C)CC(Br)c1ccc(C)c(Br)c1'] +B = [ + 'CN(C)CC(Br)c1ccccc1', 'CN(C)CC(Br)c1ccc(F)cc1', 'CN(C)CC(Br)c1ccc(Cl)cc1', + 'CN(C)CC(Br)c1ccc(Cl)cc1', 'CN(C)CC(Br)c1ccc(I)cc1', 'CN(C)CC(Br)c1ccc(C)cc1', + 'CN(C)CC(Br)c1cccc(F)c1', 'CN(C)CC(Br)c1cccc(Cl)c1', 'CN(C)CC(Br)c1cccc(Br)c1', + 'CN(C)CC(Br)c1cccc(I)c1', 'CN(C)CC(Br)c1cccc(C)c1', 'CN(C)CC(Br)c1ccc(F)c(Cl)c1', + 'CN(C)CC(Br)c1ccc(F)c(Br)c1', 'CN(C)CC(Br)c1ccc(F)c(C)c1', 'CN(C)CC(Br)c1ccc(Cl)c(Cl)c1', + 'CN(C)CC(Br)c1ccc(Cl)c(Br)c1', 'CN(C)CC(Br)c1ccc(Cl)c(C)c1', 'CN(C)CC(Br)c1ccc(Br)c(Cl)c1', + 'CN(C)CC(Br)c1ccc(Br)c(Br)c1', 'CN(C)CC(Br)c1ccc(Br)c(C)c1', 'CN(C)CC(Br)c1ccc(C)c(C)c1', + 'CN(C)CC(Br)c1ccc(C)c(Br)c1' +] for smi in B: - m = Chem.MolFromSmiles(smi) - m=localopt(m,100) - #r=get3D(m,True) - r=rdMD.CalcWHIM(m) - for item in r: - thefile.write("%.3f," % item) - thefile.write("\n") - #m.SetProp("smi", smi) - #writer.write(m) + m = Chem.MolFromSmiles(smi) + m = localopt(m, 100) + #r=get3D(m,True) + r = rdMD.CalcWHIM(m) + for item in r: + thefile.write("%.3f," % item) + thefile.write("\n") + #m.SetProp("smi", smi) + #writer.write(m) - - -A="G1w,G2w,G3w,Gw" +A = "G1w,G2w,G3w,Gw" print dir(rdMD) diff --git a/Code/GraphMol/Descriptors/test_data/pmi.py b/Code/GraphMol/Descriptors/test_data/pmi.py index 8809253de..4246d28a4 100644 --- a/Code/GraphMol/Descriptors/test_data/pmi.py +++ b/Code/GraphMol/Descriptors/test_data/pmi.py @@ -7,10 +7,11 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. +import numpy as np + # generates reference data for the PMI descriptors from rdkit import Chem from rdkit.Chem import AllChem -import numpy as np def GetMoments(mol, includeWeights): @@ -21,18 +22,18 @@ def GetMoments(mol, includeWeights): masses = [1.0] * mol.GetNumAtoms() ps = conf.GetPositions() - mps = [x*y for x,y in zip(ps,masses)] - centroid = np.sum(mps,axis=0)/sum(masses) + mps = [x * y for x, y in zip(ps, masses)] + centroid = np.sum(mps, axis=0) / sum(masses) cps = ps - centroid xx = xy = xz = yy = yz = zz = 0.0 - for m,p in zip(masses,cps): - xx += m*(p[1]*p[1] + p[2]*p[2]) - yy += m*(p[0]*p[0] + p[2]*p[2]) - zz += m*(p[1]*p[1] + p[0]*p[0]) - xy -= m*p[0]*p[1] - xz -= m*p[0]*p[2] - yz -= m*p[1]*p[2] - covm = np.array([[xx,xy,xz],[xy,yy,yz],[xz,yz,zz]]) + for m, p in zip(masses, cps): + xx += m * (p[1] * p[1] + p[2] * p[2]) + yy += m * (p[0] * p[0] + p[2] * p[2]) + zz += m * (p[1] * p[1] + p[0] * p[0]) + xy -= m * p[0] * p[1] + xz -= m * p[0] * p[2] + yz -= m * p[1] * p[2] + covm = np.array([[xx, xy, xz], [xy, yy, yz], [xz, yz, zz]]) res = np.linalg.eigvals(covm) return sorted(res) diff --git a/Code/GraphMol/DetermineBonds/Wrap/testDetermineBonds.py b/Code/GraphMol/DetermineBonds/Wrap/testDetermineBonds.py index 1b5ac0bc8..ab7beb752 100644 --- a/Code/GraphMol/DetermineBonds/Wrap/testDetermineBonds.py +++ b/Code/GraphMol/DetermineBonds/Wrap/testDetermineBonds.py @@ -7,12 +7,12 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. -from rdkit import RDConfig -from rdkit import Chem -from rdkit.Chem import rdDetermineBonds -import unittest -import os import glob +import os +import unittest + +from rdkit import Chem, RDConfig +from rdkit.Chem import rdDetermineBonds class TestCase(unittest.TestCase): diff --git a/Code/GraphMol/DistGeomHelpers/Wrap/testDistGeom.py b/Code/GraphMol/DistGeomHelpers/Wrap/testDistGeom.py index 7cce5a082..39985967c 100644 --- a/Code/GraphMol/DistGeomHelpers/Wrap/testDistGeom.py +++ b/Code/GraphMol/DistGeomHelpers/Wrap/testDistGeom.py @@ -1,18 +1,16 @@ -import unittest -import os import copy import math +import os +import pickle +import unittest + import numpy -import pickle - -from rdkit import Chem -from rdkit.Chem import AllChem -from rdkit.Chem import rdDistGeom, ChemicalForceFields, rdMolAlign import rdkit.DistanceGeometry as DG -from rdkit import RDConfig, rdBase -from rdkit.Geometry import rdGeometry as geom +from rdkit import Chem, RDConfig, rdBase +from rdkit.Chem import AllChem, ChemicalForceFields, rdDistGeom, rdMolAlign from rdkit.Geometry import ComputeSignedDihedralAngle +from rdkit.Geometry import rdGeometry as geom from rdkit.RDLogger import logger logger = logger() @@ -632,6 +630,7 @@ class TestCase(unittest.TestCase): """ test for a macrocycle molecule, ETKDGv3 samples trans amide """ + def get_atom_mapping(mol, smirks="[O:1]=[C:2]@;-[NX3:3]-[H:4]"): qmol = Chem.MolFromSmarts(smirks) ind_map = {} @@ -677,17 +676,18 @@ class TestCase(unittest.TestCase): self.assertEqual(list(ts[0]["signs"]), [1, 1, 1, 1, 1, 1]) self.assertEqual(list(ts[0]["atomIndices"]), [0, 1, 2, 3]) + def testTrackFailures(self): - params = AllChem.ETKDGv3() - params.trackFailures = True - params.maxIterations = 50 - params.randomSeed = 42 - mol = Chem.MolFromSmiles('C=CC1=C(N)Oc2cc1c(-c1cc(C(C)O)cc(=O)cc1C1NCC(=O)N1)c(OC)c2OC') - mol = Chem.AddHs(mol) - AllChem.EmbedMolecule(mol, params) - cnts = params.GetFailureCounts() - self.assertGreater(cnts[AllChem.EmbedFailureCauses.INITIAL_COORDS],5) - self.assertGreater(cnts[AllChem.EmbedFailureCauses.ETK_MINIMIZATION],10) + params = AllChem.ETKDGv3() + params.trackFailures = True + params.maxIterations = 50 + params.randomSeed = 42 + mol = Chem.MolFromSmiles('C=CC1=C(N)Oc2cc1c(-c1cc(C(C)O)cc(=O)cc1C1NCC(=O)N1)c(OC)c2OC') + mol = Chem.AddHs(mol) + AllChem.EmbedMolecule(mol, params) + cnts = params.GetFailureCounts() + self.assertGreater(cnts[AllChem.EmbedFailureCauses.INITIAL_COORDS], 5) + self.assertGreater(cnts[AllChem.EmbedFailureCauses.ETK_MINIMIZATION], 10) if __name__ == '__main__': diff --git a/Code/GraphMol/FMCS/Wrap/testFMCS.py b/Code/GraphMol/FMCS/Wrap/testFMCS.py index 7a263713a..58a1ee61d 100644 --- a/Code/GraphMol/FMCS/Wrap/testFMCS.py +++ b/Code/GraphMol/FMCS/Wrap/testFMCS.py @@ -1,700 +1,729 @@ -import unittest import sys +import unittest from io import StringIO + from rdkit import Chem from rdkit.Chem import rdFMCS class BondMatchOrderMatrix: - def __init__(self, ignoreAromatization): - self.MatchMatrix = [[False]*(Chem.BondType.ZERO + 1) - for i in range(Chem.BondType.ZERO + 1)] - for i in range(Chem.BondType.ZERO + 1): - # fill cells of the same and unspecified type - self.MatchMatrix[i][i] = True - self.MatchMatrix[Chem.BondType.UNSPECIFIED][i] = \ - self.MatchMatrix[i][Chem.BondType.UNSPECIFIED] = True - self.MatchMatrix[Chem.BondType.ZERO][i] = \ - self.MatchMatrix[i][Chem.BondType.ZERO] = True - if ignoreAromatization: - self.MatchMatrix[Chem.BondType.SINGLE][Chem.BondType.AROMATIC] = \ - self.MatchMatrix[Chem.BondType.AROMATIC][Chem.BondType.SINGLE] = True - self.MatchMatrix[Chem.BondType.DOUBLE][Chem.BondType.AROMATIC] = \ - self.MatchMatrix[Chem.BondType.AROMATIC][Chem.BondType.DOUBLE] = True - self.MatchMatrix[Chem.BondType.SINGLE][Chem.BondType.ONEANDAHALF] = \ - self.MatchMatrix[Chem.BondType.ONEANDAHALF][Chem.BondType.SINGLE] = True - self.MatchMatrix[Chem.BondType.DOUBLE][Chem.BondType.TWOANDAHALF] = \ - self.MatchMatrix[Chem.BondType.TWOANDAHALF][Chem.BondType.DOUBLE] = True - self.MatchMatrix[Chem.BondType.TRIPLE][Chem.BondType.THREEANDAHALF] = \ - self.MatchMatrix[Chem.BondType.THREEANDAHALF][Chem.BondType.TRIPLE] = True - self.MatchMatrix[Chem.BondType.QUADRUPLE][Chem.BondType.FOURANDAHALF] = \ - self.MatchMatrix[Chem.BondType.FOURANDAHALF][Chem.BondType.QUADRUPLE] = True - self.MatchMatrix[Chem.BondType.QUINTUPLE][Chem.BondType.FIVEANDAHALF] = \ - self.MatchMatrix[Chem.BondType.FIVEANDAHALF][Chem.BondType.QUINTUPLE] = True - def isEqual(self, i, j): - return self.MatchMatrix[i][j] + + def __init__(self, ignoreAromatization): + self.MatchMatrix = [[False] * (Chem.BondType.ZERO + 1) for i in range(Chem.BondType.ZERO + 1)] + for i in range(Chem.BondType.ZERO + 1): + # fill cells of the same and unspecified type + self.MatchMatrix[i][i] = True + self.MatchMatrix[Chem.BondType.UNSPECIFIED][i] = \ + self.MatchMatrix[i][Chem.BondType.UNSPECIFIED] = True + self.MatchMatrix[Chem.BondType.ZERO][i] = \ + self.MatchMatrix[i][Chem.BondType.ZERO] = True + if ignoreAromatization: + self.MatchMatrix[Chem.BondType.SINGLE][Chem.BondType.AROMATIC] = \ + self.MatchMatrix[Chem.BondType.AROMATIC][Chem.BondType.SINGLE] = True + self.MatchMatrix[Chem.BondType.DOUBLE][Chem.BondType.AROMATIC] = \ + self.MatchMatrix[Chem.BondType.AROMATIC][Chem.BondType.DOUBLE] = True + self.MatchMatrix[Chem.BondType.SINGLE][Chem.BondType.ONEANDAHALF] = \ + self.MatchMatrix[Chem.BondType.ONEANDAHALF][Chem.BondType.SINGLE] = True + self.MatchMatrix[Chem.BondType.DOUBLE][Chem.BondType.TWOANDAHALF] = \ + self.MatchMatrix[Chem.BondType.TWOANDAHALF][Chem.BondType.DOUBLE] = True + self.MatchMatrix[Chem.BondType.TRIPLE][Chem.BondType.THREEANDAHALF] = \ + self.MatchMatrix[Chem.BondType.THREEANDAHALF][Chem.BondType.TRIPLE] = True + self.MatchMatrix[Chem.BondType.QUADRUPLE][Chem.BondType.FOURANDAHALF] = \ + self.MatchMatrix[Chem.BondType.FOURANDAHALF][Chem.BondType.QUADRUPLE] = True + self.MatchMatrix[Chem.BondType.QUINTUPLE][Chem.BondType.FIVEANDAHALF] = \ + self.MatchMatrix[Chem.BondType.FIVEANDAHALF][Chem.BondType.QUINTUPLE] = True + + def isEqual(self, i, j): + return self.MatchMatrix[i][j] + class CompareAny(rdFMCS.MCSAtomCompare): - def __call__(self, p, mol1, atom1, mol2, atom2): - if (p.MatchChiralTag and not self.CheckAtomChirality(p, mol1, atom1, mol2, atom2)): - return False - if (p.MatchFormalCharge and not self.CheckAtomCharge(p, mol1, atom1, mol2, atom2)): - return False - if (p.RingMatchesRingOnly): - return self.CheckAtomRingMatch(p, mol1, atom1, mol2, atom2) - return True + + def __call__(self, p, mol1, atom1, mol2, atom2): + if (p.MatchChiralTag and not self.CheckAtomChirality(p, mol1, atom1, mol2, atom2)): + return False + if (p.MatchFormalCharge and not self.CheckAtomCharge(p, mol1, atom1, mol2, atom2)): + return False + if (p.RingMatchesRingOnly): + return self.CheckAtomRingMatch(p, mol1, atom1, mol2, atom2) + return True + class CompareAnyHeavyAtom(CompareAny): - def __call__(self, p, mol1, atom1, mol2, atom2): - a1 = mol1.GetAtomWithIdx(atom1) - a2 = mol2.GetAtomWithIdx(atom2) - # Any atom, including H, matches another atom of the same type, according to - # the other flags - if (a1.GetAtomicNum() == a2.GetAtomicNum() or - (a1.GetAtomicNum() > 1 and a2.GetAtomicNum() > 1)): - return CompareAny.__call__(self, p, mol1, atom1, mol2, atom2) - return False + + def __call__(self, p, mol1, atom1, mol2, atom2): + a1 = mol1.GetAtomWithIdx(atom1) + a2 = mol2.GetAtomWithIdx(atom2) + # Any atom, including H, matches another atom of the same type, according to + # the other flags + if (a1.GetAtomicNum() == a2.GetAtomicNum() + or (a1.GetAtomicNum() > 1 and a2.GetAtomicNum() > 1)): + return CompareAny.__call__(self, p, mol1, atom1, mol2, atom2) + return False + class CompareElements(rdFMCS.MCSAtomCompare): - def __call__(self, p, mol1, atom1, mol2, atom2): - a1 = mol1.GetAtomWithIdx(atom1) - a2 = mol2.GetAtomWithIdx(atom2) - if (a1.GetAtomicNum() != a2.GetAtomicNum()): - return False - if (p.MatchValences and a1.GetTotalValence() != a2.GetTotalValence()): - return False - if (p.MatchChiralTag and not self.CheckAtomChirality(p, mol1, atom1, mol2, atom2)): - return False - if (p.MatchFormalCharge and not self.CheckAtomCharge(p, mol1, atom1, mol2, atom2)): - return False - if p.RingMatchesRingOnly: - return self.CheckAtomRingMatch(p, mol1, atom1, mol2, atom2) - return True + + def __call__(self, p, mol1, atom1, mol2, atom2): + a1 = mol1.GetAtomWithIdx(atom1) + a2 = mol2.GetAtomWithIdx(atom2) + if (a1.GetAtomicNum() != a2.GetAtomicNum()): + return False + if (p.MatchValences and a1.GetTotalValence() != a2.GetTotalValence()): + return False + if (p.MatchChiralTag and not self.CheckAtomChirality(p, mol1, atom1, mol2, atom2)): + return False + if (p.MatchFormalCharge and not self.CheckAtomCharge(p, mol1, atom1, mol2, atom2)): + return False + if p.RingMatchesRingOnly: + return self.CheckAtomRingMatch(p, mol1, atom1, mol2, atom2) + return True + class CompareIsotopes(rdFMCS.MCSAtomCompare): - def __call__(self, p, mol1, atom1, mol2, atom2): - a1 = mol1.GetAtomWithIdx(atom1) - a2 = mol2.GetAtomWithIdx(atom2) - if (a1.GetIsotope() != a2.GetIsotope()): - return False - if (p.MatchChiralTag and not self.CheckAtomChirality(p, mol1, atom1, mol2, atom2)): - return False - if (p.MatchFormalCharge and not self.CheckAtomCharge(p, mol1, atom1, mol2, atom2)): - return False - if p.RingMatchesRingOnly: - return self.CheckAtomRingMatch(p, mol1, atom1, mol2, atom2) - return True + + def __call__(self, p, mol1, atom1, mol2, atom2): + a1 = mol1.GetAtomWithIdx(atom1) + a2 = mol2.GetAtomWithIdx(atom2) + if (a1.GetIsotope() != a2.GetIsotope()): + return False + if (p.MatchChiralTag and not self.CheckAtomChirality(p, mol1, atom1, mol2, atom2)): + return False + if (p.MatchFormalCharge and not self.CheckAtomCharge(p, mol1, atom1, mol2, atom2)): + return False + if p.RingMatchesRingOnly: + return self.CheckAtomRingMatch(p, mol1, atom1, mol2, atom2) + return True + class CompareOrder(rdFMCS.MCSBondCompare): - match = BondMatchOrderMatrix(True) # ignore Aromatization - def __call__(self, p, mol1, bond1, mol2, bond2): - b1 = mol1.GetBondWithIdx(bond1) - b2 = mol2.GetBondWithIdx(bond2) - t1 = b1.GetBondType() - t2 = b2.GetBondType() - if self.match.isEqual(t1, t2): - if (p.MatchStereo and not self.CheckBondStereo(p, mol1, bond1, mol2, bond2)): - return False - if p.RingMatchesRingOnly: - return self.CheckBondRingMatch(p, mol1, bond1, mol2, bond2) - return True + match = BondMatchOrderMatrix(True) # ignore Aromatization + + def __call__(self, p, mol1, bond1, mol2, bond2): + b1 = mol1.GetBondWithIdx(bond1) + b2 = mol2.GetBondWithIdx(bond2) + t1 = b1.GetBondType() + t2 = b2.GetBondType() + if self.match.isEqual(t1, t2): + if (p.MatchStereo and not self.CheckBondStereo(p, mol1, bond1, mol2, bond2)): return False + if p.RingMatchesRingOnly: + return self.CheckBondRingMatch(p, mol1, bond1, mol2, bond2) + return True + return False + class AtomCompareCompareIsInt(rdFMCS.MCSAtomCompare): - __call__ = 1 + __call__ = 1 + class AtomCompareNoCompare(rdFMCS.MCSAtomCompare): - pass + pass + class AtomCompareUserData(rdFMCS.MCSAtomCompare): - def __init__(self): - super().__init__() - self._matchAnyHet = False - def setMatchAnyHet(self, v): - self._matchAnyHet = v - def __call__(self, p, mol1, atom1, mol2, atom2): - a1 = mol1.GetAtomWithIdx(atom1) - a2 = mol2.GetAtomWithIdx(atom2) - if (a1.GetAtomicNum() != a2.GetAtomicNum() and - ((not self._matchAnyHet) or - a1.GetAtomicNum() == 6 or - a2.GetAtomicNum() == 6)): - return False - if (p.MatchValences and a1.GetTotalValence() != a2.GetTotalValence()): - return False - if (p.MatchChiralTag and not self.CheckAtomChirality(p, mol1, atom1, mol2, atom2)): - return False - if (p.MatchFormalCharge and not self.CheckAtomCharge(p, mol1, atom1, mol2, atom2)): - return False - if p.RingMatchesRingOnly: - return self.CheckAtomRingMatch(p, mol1, atom1, mol2, atom2) - return True + + def __init__(self): + super().__init__() + self._matchAnyHet = False + + def setMatchAnyHet(self, v): + self._matchAnyHet = v + + def __call__(self, p, mol1, atom1, mol2, atom2): + a1 = mol1.GetAtomWithIdx(atom1) + a2 = mol2.GetAtomWithIdx(atom2) + if (a1.GetAtomicNum() != a2.GetAtomicNum() + and ((not self._matchAnyHet) or a1.GetAtomicNum() == 6 or a2.GetAtomicNum() == 6)): + return False + if (p.MatchValences and a1.GetTotalValence() != a2.GetTotalValence()): + return False + if (p.MatchChiralTag and not self.CheckAtomChirality(p, mol1, atom1, mol2, atom2)): + return False + if (p.MatchFormalCharge and not self.CheckAtomCharge(p, mol1, atom1, mol2, atom2)): + return False + if p.RingMatchesRingOnly: + return self.CheckAtomRingMatch(p, mol1, atom1, mol2, atom2) + return True + class BondCompareCompareIsInt(rdFMCS.MCSBondCompare): - __call__ = 1 + __call__ = 1 + class BondCompareNoCompare(rdFMCS.MCSBondCompare): - pass + pass + class BondCompareUserData(rdFMCS.MCSBondCompare): - def __init__(self): - super().__init__() - self.match = None - def setIgnoreAromatization(self, v): - self.match = BondMatchOrderMatrix(v) - def __call__(self, p, mol1, bond1, mol2, bond2): - b1 = mol1.GetBondWithIdx(bond1) - b2 = mol2.GetBondWithIdx(bond2) - t1 = b1.GetBondType() - t2 = b2.GetBondType() - if self.match.isEqual(t1, t2): - if (p.MatchStereo and not self.CheckBondStereo(p, mol1, bond1, mol2, bond2)): - return False - if p.RingMatchesRingOnly: - return self.CheckBondRingMatch(p, mol1, bond1, mol2, bond2) - return True + + def __init__(self): + super().__init__() + self.match = None + + def setIgnoreAromatization(self, v): + self.match = BondMatchOrderMatrix(v) + + def __call__(self, p, mol1, bond1, mol2, bond2): + b1 = mol1.GetBondWithIdx(bond1) + b2 = mol2.GetBondWithIdx(bond2) + t1 = b1.GetBondType() + t2 = b2.GetBondType() + if self.match.isEqual(t1, t2): + if (p.MatchStereo and not self.CheckBondStereo(p, mol1, bond1, mol2, bond2)): return False + if p.RingMatchesRingOnly: + return self.CheckBondRingMatch(p, mol1, bond1, mol2, bond2) + return True + return False + class ProgressCallbackCallbackIsInt(rdFMCS.MCSProgress): - __call__ = 1 + __call__ = 1 + class ProgressCallbackNoCallback(rdFMCS.MCSProgress): - pass + pass + class ProgressCallback(rdFMCS.MCSProgress): - def __init__(self, parent): - super().__init__() - self.parent = parent - self.callCount = 0 - def __call__(self, stat, params): - self.callCount += 1 - self.parent.assertTrue(isinstance(stat, rdFMCS.MCSProgressData)) - self.parent.assertTrue(hasattr(stat, "numAtoms")) - self.parent.assertTrue(isinstance(stat.numAtoms, int)) - self.parent.assertTrue(hasattr(stat, "numBonds")) - self.parent.assertTrue(isinstance(stat.numBonds, int)) - self.parent.assertTrue(hasattr(stat, "seedProcessed")) - self.parent.assertTrue(isinstance(stat.seedProcessed, int)) - self.parent.assertTrue(isinstance(params, rdFMCS.MCSParameters)) - self.parent.assertTrue(isinstance(params.AtomTyper, rdFMCS.MCSAtomCompare)) - self.parent.assertTrue(isinstance(params.BondTyper, rdFMCS.BondCompare)) - self.parent.assertEqual(params.ProgressCallback, self) - return (self.callCount < 3) + + def __init__(self, parent): + super().__init__() + self.parent = parent + self.callCount = 0 + + def __call__(self, stat, params): + self.callCount += 1 + self.parent.assertTrue(isinstance(stat, rdFMCS.MCSProgressData)) + self.parent.assertTrue(hasattr(stat, "numAtoms")) + self.parent.assertTrue(isinstance(stat.numAtoms, int)) + self.parent.assertTrue(hasattr(stat, "numBonds")) + self.parent.assertTrue(isinstance(stat.numBonds, int)) + self.parent.assertTrue(hasattr(stat, "seedProcessed")) + self.parent.assertTrue(isinstance(stat.seedProcessed, int)) + self.parent.assertTrue(isinstance(params, rdFMCS.MCSParameters)) + self.parent.assertTrue(isinstance(params.AtomTyper, rdFMCS.MCSAtomCompare)) + self.parent.assertTrue(isinstance(params.BondTyper, rdFMCS.BondCompare)) + self.parent.assertEqual(params.ProgressCallback, self) + return (self.callCount < 3) + class Common: - @staticmethod - def getParams(**kwargs): - params = rdFMCS.MCSParameters() - for kw in ("AtomTyper", "BondTyper"): - v = kwargs.get(kw, None) - if v is not None: - v_instance = v() - setattr(params, kw, v_instance) - return params - @staticmethod - def test1(self, **kwargs): - smis = ( - "Cc1nc(CN(C(C)c2ncccc2)CCCCN)ccc1 CHEMBL1682991", # -- QUERY - "Cc1ccc(CN(C(C)c2ccccn2)CCCCN)nc1 CHEMBL1682990", - "Cc1cccnc1CN(C(C)c1ccccn1)CCCCN CHEMBL1682998", - "CC(N(CCCCN)Cc1c(N)cccn1)c1ccccn1 CHEMBL1682987", - "Cc1cc(C)c(CN(C(C)c2ccccn2)CCCCN)nc1 CHEMBL1682992", - "Cc1cc(C(C)N(CCCCN)Cc2c(C)cccn2)ncc1 CHEMBL1682993", - "Cc1nc(C(C)N(CCCCN)Cc2nc3c([nH]2)cccc3)ccc1 CHEMBL1682878", - "CC(c1ncccc1)N(CCCCN)Cc1nc2c([nH]1)cccc2 CHEMBL1682867", - "CC(N(CCCCN)Cc1c(C(C)(C)C)cccn1)c1ccccn1 CHEMBL1682989", - "CC(N(CCCCN)Cc1c(C(F)(F)F)cccn1)c1ccccn1 CHEMBL1682988", - ) - ms = [Chem.MolFromSmiles(x.split()[0]) for x in smis] - qm = ms[0] - ms = ms[1:] - if kwargs: - params = Common.getParams(**kwargs) - mcs = rdFMCS.FindMCS(ms, params) - else: - mcs = rdFMCS.FindMCS(ms) - self.assertEqual(mcs.numBonds, 21) - self.assertEqual(mcs.numAtoms, 21) - self.assertEqual( - mcs.smartsString, - '[#6](:[#6]:[#6]):[#6]:[#7]:[#6]-[#6]-[#7](-[#6](-[#6])-[#6]1:[#6]:[#6]:[#6]:[#6]:[#7]:1)-[#6]-[#6]-[#6]-[#6]-[#7]' - ) - qm = Chem.MolFromSmarts(mcs.smartsString) - self.assertTrue(qm is not None) - for m in ms: - self.assertTrue(m.HasSubstructMatch(qm)) + @staticmethod + def getParams(**kwargs): + params = rdFMCS.MCSParameters() + for kw in ("AtomTyper", "BondTyper"): + v = kwargs.get(kw, None) + if v is not None: + v_instance = v() + setattr(params, kw, v_instance) + return params - if kwargs: - params = Common.getParams(**kwargs) - params.Threshold = 0.8 - mcs = rdFMCS.FindMCS(ms, params) - else: - mcs = rdFMCS.FindMCS(ms, threshold=0.8) - self.assertEqual(mcs.numBonds, 21) - self.assertEqual(mcs.numAtoms, 21) - self.assertEqual( - mcs.smartsString, - '[#6](:[#6]:[#6]):[#6]:[#7]:[#6]-[#6]-[#7](-[#6](-[#6])-[#6]1:[#6]:[#6]:[#6]:[#6]:[#7]:1)-[#6]-[#6]-[#6]-[#6]-[#7]' - ) - qm = Chem.MolFromSmarts(mcs.smartsString) - self.assertTrue(qm is not None) - for m in ms: - self.assertTrue(m.HasSubstructMatch(qm)) + @staticmethod + def test1(self, **kwargs): + smis = ( + "Cc1nc(CN(C(C)c2ncccc2)CCCCN)ccc1 CHEMBL1682991", # -- QUERY + "Cc1ccc(CN(C(C)c2ccccn2)CCCCN)nc1 CHEMBL1682990", + "Cc1cccnc1CN(C(C)c1ccccn1)CCCCN CHEMBL1682998", + "CC(N(CCCCN)Cc1c(N)cccn1)c1ccccn1 CHEMBL1682987", + "Cc1cc(C)c(CN(C(C)c2ccccn2)CCCCN)nc1 CHEMBL1682992", + "Cc1cc(C(C)N(CCCCN)Cc2c(C)cccn2)ncc1 CHEMBL1682993", + "Cc1nc(C(C)N(CCCCN)Cc2nc3c([nH]2)cccc3)ccc1 CHEMBL1682878", + "CC(c1ncccc1)N(CCCCN)Cc1nc2c([nH]1)cccc2 CHEMBL1682867", + "CC(N(CCCCN)Cc1c(C(C)(C)C)cccn1)c1ccccn1 CHEMBL1682989", + "CC(N(CCCCN)Cc1c(C(F)(F)F)cccn1)c1ccccn1 CHEMBL1682988", + ) + ms = [Chem.MolFromSmiles(x.split()[0]) for x in smis] + qm = ms[0] + ms = ms[1:] + if kwargs: + params = Common.getParams(**kwargs) + mcs = rdFMCS.FindMCS(ms, params) + else: + mcs = rdFMCS.FindMCS(ms) + self.assertEqual(mcs.numBonds, 21) + self.assertEqual(mcs.numAtoms, 21) + self.assertEqual( + mcs.smartsString, + '[#6](:[#6]:[#6]):[#6]:[#7]:[#6]-[#6]-[#7](-[#6](-[#6])-[#6]1:[#6]:[#6]:[#6]:[#6]:[#7]:1)-[#6]-[#6]-[#6]-[#6]-[#7]' + ) + qm = Chem.MolFromSmarts(mcs.smartsString) + self.assertTrue(qm is not None) + for m in ms: + self.assertTrue(m.HasSubstructMatch(qm)) - def test2(self, **kwargs): - smis = ( - "CHEMBL122452 CN(CCCN(C)CCc1ccccc1)CCOC(c1ccccc1)c1ccccc1", - "CHEMBL123252 CN(CCCc1ccccc1)CCCN(C)CCOC(c1ccccc1)c1ccccc1", - "CHEMBL121611 Fc1ccc(C(OCCNCCCNCCc2ccccc2)c2ccc(F)cc2)cc1", - "CHEMBL121050 O=C(Cc1ccccc1)NCCCCNCCOC(c1ccc(F)cc1)c1ccc(F)cc1", - "CHEMBL333667 O=C(Cc1ccccc1)NCCNCCOC(c1ccc(F)cc1)c1ccc(F)cc1", - "CHEMBL121486 O=C(Cc1ccc(Br)cc1)NC=CNCCOC(c1ccc(F)cc1)c1ccc(F)cc1", - "CHEMBL123830 O=C(Cc1ccc(F)cc1)NCCNCCOC(c1ccc(F)cc1)c1ccc(F)cc1", - "CHEMBL420900 O=C(Cc1ccccc1)NCCCNCCOC(c1ccc(F)cc1)c1ccc(F)cc1", - "CHEMBL121460 CN(CCOC(c1ccc(F)cc1)c1ccc(F)cc1)CCN(C)CCOC(c1ccc(F)cc1)c1ccc(F)cc1", - "CHEMBL120901 COC(=O)C1C2CCC(CC1C(=O)Oc1ccccc1)N2C", - "CHEMBL122859 O=C1CN(CCc2ccccc2)CCN1CCOC(c1ccc(F)cc1)c1ccc(F)cc1", - "CHEMBL121027 CN(CCOC(c1ccccc1)c1ccccc1)CCN(C)CCc1ccc(F)cc1", - ) + if kwargs: + params = Common.getParams(**kwargs) + params.Threshold = 0.8 + mcs = rdFMCS.FindMCS(ms, params) + else: + mcs = rdFMCS.FindMCS(ms, threshold=0.8) + self.assertEqual(mcs.numBonds, 21) + self.assertEqual(mcs.numAtoms, 21) + self.assertEqual( + mcs.smartsString, + '[#6](:[#6]:[#6]):[#6]:[#7]:[#6]-[#6]-[#7](-[#6](-[#6])-[#6]1:[#6]:[#6]:[#6]:[#6]:[#7]:1)-[#6]-[#6]-[#6]-[#6]-[#7]' + ) + qm = Chem.MolFromSmarts(mcs.smartsString) + self.assertTrue(qm is not None) + for m in ms: + self.assertTrue(m.HasSubstructMatch(qm)) - ms = [Chem.MolFromSmiles(x.split()[1]) for x in smis] - qm = ms[0] - ms = ms[1:] - if kwargs: - params = Common.getParams(**kwargs) - mcs = rdFMCS.FindMCS(ms, params) - else: - mcs = rdFMCS.FindMCS(ms) - self.assertEqual(mcs.numBonds, 9) - self.assertEqual(mcs.numAtoms, 10) - qm = Chem.MolFromSmarts(mcs.smartsString) - self.assertTrue(qm is not None) - for m in ms: - self.assertTrue(m.HasSubstructMatch(qm)) - # smarts too hard to canonicalize this - # self.assertEqual(mcs.smartsString,'[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](-[#6]-[#8]-[#6]:,-[#6])-,:[#6]') + def test2(self, **kwargs): + smis = ( + "CHEMBL122452 CN(CCCN(C)CCc1ccccc1)CCOC(c1ccccc1)c1ccccc1", + "CHEMBL123252 CN(CCCc1ccccc1)CCCN(C)CCOC(c1ccccc1)c1ccccc1", + "CHEMBL121611 Fc1ccc(C(OCCNCCCNCCc2ccccc2)c2ccc(F)cc2)cc1", + "CHEMBL121050 O=C(Cc1ccccc1)NCCCCNCCOC(c1ccc(F)cc1)c1ccc(F)cc1", + "CHEMBL333667 O=C(Cc1ccccc1)NCCNCCOC(c1ccc(F)cc1)c1ccc(F)cc1", + "CHEMBL121486 O=C(Cc1ccc(Br)cc1)NC=CNCCOC(c1ccc(F)cc1)c1ccc(F)cc1", + "CHEMBL123830 O=C(Cc1ccc(F)cc1)NCCNCCOC(c1ccc(F)cc1)c1ccc(F)cc1", + "CHEMBL420900 O=C(Cc1ccccc1)NCCCNCCOC(c1ccc(F)cc1)c1ccc(F)cc1", + "CHEMBL121460 CN(CCOC(c1ccc(F)cc1)c1ccc(F)cc1)CCN(C)CCOC(c1ccc(F)cc1)c1ccc(F)cc1", + "CHEMBL120901 COC(=O)C1C2CCC(CC1C(=O)Oc1ccccc1)N2C", + "CHEMBL122859 O=C1CN(CCc2ccccc2)CCN1CCOC(c1ccc(F)cc1)c1ccc(F)cc1", + "CHEMBL121027 CN(CCOC(c1ccccc1)c1ccccc1)CCN(C)CCc1ccc(F)cc1", + ) - if kwargs: - params = Common.getParams(**kwargs) - params.Threshold = 0.8 - mcs = rdFMCS.FindMCS(ms, params) - else: - mcs = rdFMCS.FindMCS(ms, threshold=0.8) - self.assertEqual(mcs.numBonds, 20) - self.assertEqual(mcs.numAtoms, 19) - qm = Chem.MolFromSmarts(mcs.smartsString) - self.assertTrue(qm is not None) - nHits = 0 - for m in ms: - if m.HasSubstructMatch(qm): - nHits += 1 - self.assertTrue(nHits >= int(0.8 * len(smis))) - # smarts too hard to canonicalize this - # self.assertEqual(mcs.smartsString,'[#6]1:[#6]:[#6]:[#6](:[#6]:[#6]:1)-[#6](-[#8]-[#6]-[#6]-[#7]-[#6]-[#6])-[#6]2:[#6]:[#6]:[#6]:[#6]:[#6]:2') + ms = [Chem.MolFromSmiles(x.split()[1]) for x in smis] + qm = ms[0] + ms = ms[1:] + if kwargs: + params = Common.getParams(**kwargs) + mcs = rdFMCS.FindMCS(ms, params) + else: + mcs = rdFMCS.FindMCS(ms) + self.assertEqual(mcs.numBonds, 9) + self.assertEqual(mcs.numAtoms, 10) + qm = Chem.MolFromSmarts(mcs.smartsString) + self.assertTrue(qm is not None) + for m in ms: + self.assertTrue(m.HasSubstructMatch(qm)) + # smarts too hard to canonicalize this + # self.assertEqual(mcs.smartsString,'[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](-[#6]-[#8]-[#6]:,-[#6])-,:[#6]') - def test3IsotopeMatch(self, **kwargs): - smis = ( - "CC[14NH2]", - "CC[14CH3]", - ) + if kwargs: + params = Common.getParams(**kwargs) + params.Threshold = 0.8 + mcs = rdFMCS.FindMCS(ms, params) + else: + mcs = rdFMCS.FindMCS(ms, threshold=0.8) + self.assertEqual(mcs.numBonds, 20) + self.assertEqual(mcs.numAtoms, 19) + qm = Chem.MolFromSmarts(mcs.smartsString) + self.assertTrue(qm is not None) + nHits = 0 + for m in ms: + if m.HasSubstructMatch(qm): + nHits += 1 + self.assertTrue(nHits >= int(0.8 * len(smis))) + # smarts too hard to canonicalize this + # self.assertEqual(mcs.smartsString,'[#6]1:[#6]:[#6]:[#6](:[#6]:[#6]:1)-[#6](-[#8]-[#6]-[#6]-[#7]-[#6]-[#6])-[#6]2:[#6]:[#6]:[#6]:[#6]:[#6]:2') - ms = [Chem.MolFromSmiles(x) for x in smis] - if kwargs: - params = Common.getParams(**kwargs) - mcs = rdFMCS.FindMCS(ms, params) - else: - mcs = rdFMCS.FindMCS(ms) - self.assertEqual(mcs.numBonds, 1) - self.assertEqual(mcs.numAtoms, 2) - qm = Chem.MolFromSmarts(mcs.smartsString) + def test3IsotopeMatch(self, **kwargs): + smis = ( + "CC[14NH2]", + "CC[14CH3]", + ) - if kwargs: - params = Common.getParams(**kwargs) - params.AtomTyper = CompareIsotopes() - params.AtomCompareParameters.MatchIsotope = True - mcs = rdFMCS.FindMCS(ms, params) - else: - mcs = rdFMCS.FindMCS(ms, atomCompare=rdFMCS.AtomCompare.CompareIsotopes) - self.assertEqual(mcs.numBonds, 2) - self.assertEqual(mcs.numAtoms, 3) - qm = Chem.MolFromSmarts(mcs.smartsString) + ms = [Chem.MolFromSmiles(x) for x in smis] + if kwargs: + params = Common.getParams(**kwargs) + mcs = rdFMCS.FindMCS(ms, params) + else: + mcs = rdFMCS.FindMCS(ms) + self.assertEqual(mcs.numBonds, 1) + self.assertEqual(mcs.numAtoms, 2) + qm = Chem.MolFromSmarts(mcs.smartsString) - self.assertTrue(Chem.MolFromSmiles('CC[14CH3]').HasSubstructMatch(qm)) - self.assertFalse(Chem.MolFromSmiles('CC[13CH3]').HasSubstructMatch(qm)) - self.assertTrue(Chem.MolFromSmiles('OO[14CH3]').HasSubstructMatch(qm)) - self.assertFalse(Chem.MolFromSmiles('O[13CH2][14CH3]').HasSubstructMatch(qm)) + if kwargs: + params = Common.getParams(**kwargs) + params.AtomTyper = CompareIsotopes() + params.AtomCompareParameters.MatchIsotope = True + mcs = rdFMCS.FindMCS(ms, params) + else: + mcs = rdFMCS.FindMCS(ms, atomCompare=rdFMCS.AtomCompare.CompareIsotopes) + self.assertEqual(mcs.numBonds, 2) + self.assertEqual(mcs.numAtoms, 3) + qm = Chem.MolFromSmarts(mcs.smartsString) - def test4RingMatches(self, **kwargs): - smis = ['CCCCC', 'CCC1CCCCC1'] - ms = [Chem.MolFromSmiles(x) for x in smis] - if kwargs: - params = Common.getParams(**kwargs) - mcs = rdFMCS.FindMCS(ms, params) - else: - mcs = rdFMCS.FindMCS(ms) - self.assertEqual(mcs.numBonds, 4) - self.assertEqual(mcs.numAtoms, 5) - self.assertEqual(mcs.smartsString, '[#6]-[#6]-[#6]-[#6]-[#6]') + self.assertTrue(Chem.MolFromSmiles('CC[14CH3]').HasSubstructMatch(qm)) + self.assertFalse(Chem.MolFromSmiles('CC[13CH3]').HasSubstructMatch(qm)) + self.assertTrue(Chem.MolFromSmiles('OO[14CH3]').HasSubstructMatch(qm)) + self.assertFalse(Chem.MolFromSmiles('O[13CH2][14CH3]').HasSubstructMatch(qm)) - if kwargs: - params = Common.getParams(**kwargs) - params.BondCompareParameters.CompleteRingsOnly = True - mcs = rdFMCS.FindMCS(ms, params) - else: - mcs = rdFMCS.FindMCS(ms, completeRingsOnly=True) - self.assertEqual(mcs.numBonds, 2) - self.assertEqual(mcs.numAtoms, 3) - self.assertEqual(mcs.smartsString, '[#6]-&!@[#6]-&!@[#6]') + def test4RingMatches(self, **kwargs): + smis = ['CCCCC', 'CCC1CCCCC1'] + ms = [Chem.MolFromSmiles(x) for x in smis] + if kwargs: + params = Common.getParams(**kwargs) + mcs = rdFMCS.FindMCS(ms, params) + else: + mcs = rdFMCS.FindMCS(ms) + self.assertEqual(mcs.numBonds, 4) + self.assertEqual(mcs.numAtoms, 5) + self.assertEqual(mcs.smartsString, '[#6]-[#6]-[#6]-[#6]-[#6]') - if kwargs: - params = Common.getParams(**kwargs) - params.BondCompareParameters.CompleteRingsOnly = True - params.BondCompareParameters.MatchFusedRings = True - params.BondCompareParameters.MatchFusedRingsStrict = False - mcs = rdFMCS.FindMCS(ms, params) - else: - mcs = rdFMCS.FindMCS(ms, completeRingsOnly=True, - ringCompare=rdFMCS.RingCompare.PermissiveRingFusion) - self.assertEqual(mcs.numBonds, 2) - self.assertEqual(mcs.numAtoms, 3) - self.assertEqual(mcs.smartsString, '[#6]-&!@[#6]-&!@[#6]') + if kwargs: + params = Common.getParams(**kwargs) + params.BondCompareParameters.CompleteRingsOnly = True + mcs = rdFMCS.FindMCS(ms, params) + else: + mcs = rdFMCS.FindMCS(ms, completeRingsOnly=True) + self.assertEqual(mcs.numBonds, 2) + self.assertEqual(mcs.numAtoms, 3) + self.assertEqual(mcs.smartsString, '[#6]-&!@[#6]-&!@[#6]') - if kwargs: - params = Common.getParams(**kwargs) - params.BondCompareParameters.CompleteRingsOnly = True - params.BondCompareParameters.MatchFusedRings = True - params.BondCompareParameters.MatchFusedRingsStrict = True - mcs = rdFMCS.FindMCS(ms, params) - else: - mcs = rdFMCS.FindMCS(ms, completeRingsOnly=True, - ringCompare=rdFMCS.RingCompare.StrictRingFusion) - self.assertEqual(mcs.numBonds, 2) - self.assertEqual(mcs.numAtoms, 3) - self.assertEqual(mcs.smartsString, '[#6]-&!@[#6]-&!@[#6]') + if kwargs: + params = Common.getParams(**kwargs) + params.BondCompareParameters.CompleteRingsOnly = True + params.BondCompareParameters.MatchFusedRings = True + params.BondCompareParameters.MatchFusedRingsStrict = False + mcs = rdFMCS.FindMCS(ms, params) + else: + mcs = rdFMCS.FindMCS(ms, completeRingsOnly=True, + ringCompare=rdFMCS.RingCompare.PermissiveRingFusion) + self.assertEqual(mcs.numBonds, 2) + self.assertEqual(mcs.numAtoms, 3) + self.assertEqual(mcs.smartsString, '[#6]-&!@[#6]-&!@[#6]') - if kwargs: - params = Common.getParams(**kwargs) - params.AtomCompareParameters.RingMatchesRingOnly = True - params.BondCompareParameters.RingMatchesRingOnly = True - mcs = rdFMCS.FindMCS(ms, params) - else: - mcs = rdFMCS.FindMCS(ms, ringMatchesRingOnly=True) - self.assertEqual(mcs.numBonds, 1) - self.assertEqual(mcs.numAtoms, 2) - self.assertEqual(mcs.smartsString, '[#6&!R]-&!@[#6&!R]') + if kwargs: + params = Common.getParams(**kwargs) + params.BondCompareParameters.CompleteRingsOnly = True + params.BondCompareParameters.MatchFusedRings = True + params.BondCompareParameters.MatchFusedRingsStrict = True + mcs = rdFMCS.FindMCS(ms, params) + else: + mcs = rdFMCS.FindMCS(ms, completeRingsOnly=True, + ringCompare=rdFMCS.RingCompare.StrictRingFusion) + self.assertEqual(mcs.numBonds, 2) + self.assertEqual(mcs.numAtoms, 3) + self.assertEqual(mcs.smartsString, '[#6]-&!@[#6]-&!@[#6]') - smis = ['CC1CCC1', 'CCC1CCCCC1'] - ms = [Chem.MolFromSmiles(x) for x in smis] - if kwargs: - params = Common.getParams(**kwargs) - mcs = rdFMCS.FindMCS(ms, params) - else: - mcs = rdFMCS.FindMCS(ms) - self.assertEqual(mcs.numBonds, 4) - self.assertEqual(mcs.numAtoms, 5) - self.assertEqual(mcs.smartsString, '[#6]-[#6](-[#6]-[#6])-[#6]') + if kwargs: + params = Common.getParams(**kwargs) + params.AtomCompareParameters.RingMatchesRingOnly = True + params.BondCompareParameters.RingMatchesRingOnly = True + mcs = rdFMCS.FindMCS(ms, params) + else: + mcs = rdFMCS.FindMCS(ms, ringMatchesRingOnly=True) + self.assertEqual(mcs.numBonds, 1) + self.assertEqual(mcs.numAtoms, 2) + self.assertEqual(mcs.smartsString, '[#6&!R]-&!@[#6&!R]') - if kwargs: - params = Common.getParams(**kwargs) - params.BondCompareParameters.CompleteRingsOnly = True - mcs = rdFMCS.FindMCS(ms, params) - else: - mcs = rdFMCS.FindMCS(ms, completeRingsOnly=True) - self.assertEqual(mcs.numBonds, 1) - self.assertEqual(mcs.numAtoms, 2) - self.assertEqual(mcs.smartsString, '[#6]-&!@[#6]') + smis = ['CC1CCC1', 'CCC1CCCCC1'] + ms = [Chem.MolFromSmiles(x) for x in smis] + if kwargs: + params = Common.getParams(**kwargs) + mcs = rdFMCS.FindMCS(ms, params) + else: + mcs = rdFMCS.FindMCS(ms) + self.assertEqual(mcs.numBonds, 4) + self.assertEqual(mcs.numAtoms, 5) + self.assertEqual(mcs.smartsString, '[#6]-[#6](-[#6]-[#6])-[#6]') - if kwargs: - params = Common.getParams(**kwargs) - params.AtomCompareParameters.RingMatchesRingOnly = True - params.BondCompareParameters.CompleteRingsOnly = True - params.BondCompareParameters.RingMatchesRingOnly = True - mcs = rdFMCS.FindMCS(ms, params) - else: - mcs = rdFMCS.FindMCS(ms, ringMatchesRingOnly=True, completeRingsOnly=True) - self.assertEqual(mcs.numBonds, 1) - self.assertEqual(mcs.numAtoms, 2) - self.assertEqual(mcs.smartsString, '[#6&!R]-&!@[#6&R]') + if kwargs: + params = Common.getParams(**kwargs) + params.BondCompareParameters.CompleteRingsOnly = True + mcs = rdFMCS.FindMCS(ms, params) + else: + mcs = rdFMCS.FindMCS(ms, completeRingsOnly=True) + self.assertEqual(mcs.numBonds, 1) + self.assertEqual(mcs.numAtoms, 2) + self.assertEqual(mcs.smartsString, '[#6]-&!@[#6]') - if kwargs: - params = Common.getParams(**kwargs) - params.AtomCompareParameters.RingMatchesRingOnly = True - params.BondCompareParameters.CompleteRingsOnly = True - params.BondCompareParameters.RingMatchesRingOnly = True - params.BondCompareParameters.MatchFusedRings = True - params.BondCompareParameters.MatchFusedRingsStrict = False - mcs = rdFMCS.FindMCS(ms, params) - else: - mcs = rdFMCS.FindMCS(ms, ringMatchesRingOnly=True, completeRingsOnly=True, - ringCompare=rdFMCS.RingCompare.PermissiveRingFusion) - self.assertEqual(mcs.numBonds, 1) - self.assertEqual(mcs.numAtoms, 2) - self.assertEqual(mcs.smartsString, '[#6&!R]-&!@[#6&R]') + if kwargs: + params = Common.getParams(**kwargs) + params.AtomCompareParameters.RingMatchesRingOnly = True + params.BondCompareParameters.CompleteRingsOnly = True + params.BondCompareParameters.RingMatchesRingOnly = True + mcs = rdFMCS.FindMCS(ms, params) + else: + mcs = rdFMCS.FindMCS(ms, ringMatchesRingOnly=True, completeRingsOnly=True) + self.assertEqual(mcs.numBonds, 1) + self.assertEqual(mcs.numAtoms, 2) + self.assertEqual(mcs.smartsString, '[#6&!R]-&!@[#6&R]') - if kwargs: - params = Common.getParams(**kwargs) - params.AtomCompareParameters.RingMatchesRingOnly = True - params.BondCompareParameters.CompleteRingsOnly = True - params.BondCompareParameters.RingMatchesRingOnly = True - params.BondCompareParameters.MatchFusedRings = True - params.BondCompareParameters.MatchFusedRingsStrict = True - mcs = rdFMCS.FindMCS(ms, params) - else: - mcs = rdFMCS.FindMCS(ms, ringMatchesRingOnly=True, completeRingsOnly=True, - ringCompare=rdFMCS.RingCompare.StrictRingFusion) - self.assertEqual(mcs.numBonds, 1) - self.assertEqual(mcs.numAtoms, 2) - self.assertEqual(mcs.smartsString, '[#6&!R]-&!@[#6&R]') + if kwargs: + params = Common.getParams(**kwargs) + params.AtomCompareParameters.RingMatchesRingOnly = True + params.BondCompareParameters.CompleteRingsOnly = True + params.BondCompareParameters.RingMatchesRingOnly = True + params.BondCompareParameters.MatchFusedRings = True + params.BondCompareParameters.MatchFusedRingsStrict = False + mcs = rdFMCS.FindMCS(ms, params) + else: + mcs = rdFMCS.FindMCS(ms, ringMatchesRingOnly=True, completeRingsOnly=True, + ringCompare=rdFMCS.RingCompare.PermissiveRingFusion) + self.assertEqual(mcs.numBonds, 1) + self.assertEqual(mcs.numAtoms, 2) + self.assertEqual(mcs.smartsString, '[#6&!R]-&!@[#6&R]') - if kwargs: - params = Common.getParams(**kwargs) - params.AtomCompareParameters.RingMatchesRingOnly = True - params.BondCompareParameters.RingMatchesRingOnly = True - mcs = rdFMCS.FindMCS(ms, params) - else: - mcs = rdFMCS.FindMCS(ms, ringMatchesRingOnly=True) - self.assertEqual(mcs.numBonds, 4) - self.assertEqual(mcs.numAtoms, 5) - self.assertEqual(mcs.smartsString, '[#6&!R]-&!@[#6&R](-&@[#6&R]-&@[#6&R])-&@[#6&R]') + if kwargs: + params = Common.getParams(**kwargs) + params.AtomCompareParameters.RingMatchesRingOnly = True + params.BondCompareParameters.CompleteRingsOnly = True + params.BondCompareParameters.RingMatchesRingOnly = True + params.BondCompareParameters.MatchFusedRings = True + params.BondCompareParameters.MatchFusedRingsStrict = True + mcs = rdFMCS.FindMCS(ms, params) + else: + mcs = rdFMCS.FindMCS(ms, ringMatchesRingOnly=True, completeRingsOnly=True, + ringCompare=rdFMCS.RingCompare.StrictRingFusion) + self.assertEqual(mcs.numBonds, 1) + self.assertEqual(mcs.numAtoms, 2) + self.assertEqual(mcs.smartsString, '[#6&!R]-&!@[#6&R]') - def test5AnyMatch(self, **kwargs): - smis = ('c1ccccc1C', 'c1ccccc1O', 'c1ccccc1Cl') - ms = [Chem.MolFromSmiles(x) for x in smis] - if kwargs: - params = Common.getParams(**kwargs) - params.AtomTyper = CompareAny() - mcs = rdFMCS.FindMCS(ms, params) - else: - mcs = rdFMCS.FindMCS(ms, atomCompare=rdFMCS.AtomCompare.CompareAny) - self.assertEqual(mcs.numBonds, 7) - self.assertEqual(mcs.numAtoms, 7) - qm = Chem.MolFromSmarts(mcs.smartsString) + if kwargs: + params = Common.getParams(**kwargs) + params.AtomCompareParameters.RingMatchesRingOnly = True + params.BondCompareParameters.RingMatchesRingOnly = True + mcs = rdFMCS.FindMCS(ms, params) + else: + mcs = rdFMCS.FindMCS(ms, ringMatchesRingOnly=True) + self.assertEqual(mcs.numBonds, 4) + self.assertEqual(mcs.numAtoms, 5) + self.assertEqual(mcs.smartsString, '[#6&!R]-&!@[#6&R](-&@[#6&R]-&@[#6&R])-&@[#6&R]') - for m in ms: - self.assertTrue(m.HasSubstructMatch(qm)) + def test5AnyMatch(self, **kwargs): + smis = ('c1ccccc1C', 'c1ccccc1O', 'c1ccccc1Cl') + ms = [Chem.MolFromSmiles(x) for x in smis] + if kwargs: + params = Common.getParams(**kwargs) + params.AtomTyper = CompareAny() + mcs = rdFMCS.FindMCS(ms, params) + else: + mcs = rdFMCS.FindMCS(ms, atomCompare=rdFMCS.AtomCompare.CompareAny) + self.assertEqual(mcs.numBonds, 7) + self.assertEqual(mcs.numAtoms, 7) + qm = Chem.MolFromSmarts(mcs.smartsString) - smis = ('c1cccnc1C', 'c1cnncc1O', 'c1cccnc1Cl') - ms = [Chem.MolFromSmiles(x) for x in smis] - if kwargs: - params = Common.getParams(**kwargs) - params.AtomTyper = CompareAny() - mcs = rdFMCS.FindMCS(ms, params) - else: - mcs = rdFMCS.FindMCS(ms, atomCompare=rdFMCS.AtomCompare.CompareAny) - self.assertEqual(mcs.numBonds, 7) - self.assertEqual(mcs.numAtoms, 7) - qm = Chem.MolFromSmarts(mcs.smartsString) + for m in ms: + self.assertTrue(m.HasSubstructMatch(qm)) - for m in ms: - self.assertTrue(m.HasSubstructMatch(qm)) + smis = ('c1cccnc1C', 'c1cnncc1O', 'c1cccnc1Cl') + ms = [Chem.MolFromSmiles(x) for x in smis] + if kwargs: + params = Common.getParams(**kwargs) + params.AtomTyper = CompareAny() + mcs = rdFMCS.FindMCS(ms, params) + else: + mcs = rdFMCS.FindMCS(ms, atomCompare=rdFMCS.AtomCompare.CompareAny) + self.assertEqual(mcs.numBonds, 7) + self.assertEqual(mcs.numAtoms, 7) + qm = Chem.MolFromSmarts(mcs.smartsString) - def testAtomCompareAnyHeavyAtom(self, **kwargs): - # H matches H, O matches C - smis = ('[H]c1ccccc1C', '[H]c1ccccc1O') - ms = [Chem.MolFromSmiles(x, sanitize=False) for x in smis] - if kwargs: - params = Common.getParams(**kwargs) - params.AtomTyper = CompareAnyHeavyAtom() - mcs = rdFMCS.FindMCS(ms, params) - else: - mcs = rdFMCS.FindMCS(ms, atomCompare=rdFMCS.AtomCompare.CompareAnyHeavyAtom) - self.assertEqual(mcs.numBonds, 8) - self.assertEqual(mcs.numAtoms, 8) - qm = Chem.MolFromSmarts(mcs.smartsString) + for m in ms: + self.assertTrue(m.HasSubstructMatch(qm)) - for m in ms: - self.assertTrue(m.HasSubstructMatch(qm)) + def testAtomCompareAnyHeavyAtom(self, **kwargs): + # H matches H, O matches C + smis = ('[H]c1ccccc1C', '[H]c1ccccc1O') + ms = [Chem.MolFromSmiles(x, sanitize=False) for x in smis] + if kwargs: + params = Common.getParams(**kwargs) + params.AtomTyper = CompareAnyHeavyAtom() + mcs = rdFMCS.FindMCS(ms, params) + else: + mcs = rdFMCS.FindMCS(ms, atomCompare=rdFMCS.AtomCompare.CompareAnyHeavyAtom) + self.assertEqual(mcs.numBonds, 8) + self.assertEqual(mcs.numAtoms, 8) + qm = Chem.MolFromSmarts(mcs.smartsString) - def testAtomCompareAnyHeavyAtom1(self, **kwargs): - # O matches C, H does not match O - smis = ('[H]c1ccccc1C', 'Oc1ccccc1O') - ms = [Chem.MolFromSmiles(x, sanitize=False) for x in smis] - if kwargs: - params = Common.getParams(**kwargs) - params.AtomTyper = CompareAnyHeavyAtom() - mcs = rdFMCS.FindMCS(ms, params) - else: - mcs = rdFMCS.FindMCS(ms, atomCompare=rdFMCS.AtomCompare.CompareAnyHeavyAtom) - self.assertEqual(mcs.numBonds, 7) - self.assertEqual(mcs.numAtoms, 7) - qm = Chem.MolFromSmarts(mcs.smartsString) + for m in ms: + self.assertTrue(m.HasSubstructMatch(qm)) - for m in ms: - self.assertTrue(m.HasSubstructMatch(qm)) + def testAtomCompareAnyHeavyAtom1(self, **kwargs): + # O matches C, H does not match O + smis = ('[H]c1ccccc1C', 'Oc1ccccc1O') + ms = [Chem.MolFromSmiles(x, sanitize=False) for x in smis] + if kwargs: + params = Common.getParams(**kwargs) + params.AtomTyper = CompareAnyHeavyAtom() + mcs = rdFMCS.FindMCS(ms, params) + else: + mcs = rdFMCS.FindMCS(ms, atomCompare=rdFMCS.AtomCompare.CompareAnyHeavyAtom) + self.assertEqual(mcs.numBonds, 7) + self.assertEqual(mcs.numAtoms, 7) + qm = Chem.MolFromSmarts(mcs.smartsString) - def test6MatchValences(self, **kwargs): - ms = (Chem.MolFromSmiles('NC1OC1'), Chem.MolFromSmiles('C1OC1[N+](=O)[O-]')) - if kwargs: - params = Common.getParams(**kwargs) - mcs = rdFMCS.FindMCS(ms, params) - else: - mcs = rdFMCS.FindMCS(ms) - self.assertEqual(mcs.numBonds, 4) - self.assertEqual(mcs.numAtoms, 4) - if kwargs: - params = Common.getParams(**kwargs) - params.AtomCompareParameters.MatchValences = True - mcs = rdFMCS.FindMCS(ms, params) - else: - mcs = rdFMCS.FindMCS(ms, matchValences=True) - self.assertEqual(mcs.numBonds, 3) - self.assertEqual(mcs.numAtoms, 3) + for m in ms: + self.assertTrue(m.HasSubstructMatch(qm)) - def test7Seed(self, **kwargs): - smis = ['C1CCC1CC1CC1', 'C1CCC1OC1CC1', 'C1CCC1NC1CC1', 'C1CCC1SC1CC1'] - ms = [Chem.MolFromSmiles(x) for x in smis] - if kwargs: - params = Common.getParams(**kwargs) - r = rdFMCS.FindMCS(ms, params) - else: - r = rdFMCS.FindMCS(ms) - self.assertEqual(r.smartsString, "[#6]1-[#6]-[#6]-[#6]-1") - if kwargs: - params = Common.getParams(**kwargs) - params.InitialSeed = 'C1CC1' - r = rdFMCS.FindMCS(ms, params) - else: - r = rdFMCS.FindMCS(ms, seedSmarts='C1CC1') - self.assertEqual(r.smartsString, "[#6]1-[#6]-[#6]-1") - if kwargs: - params = Common.getParams(**kwargs) - params.InitialSeed = 'C1OC1' - r = rdFMCS.FindMCS(ms, params) - else: - r = rdFMCS.FindMCS(ms, seedSmarts='C1OC1') - self.assertEqual(r.smartsString, "[#6]1-[#6]-[#6]-[#6]-1") - self.assertEqual(r.numAtoms, 4) - self.assertEqual(r.numBonds, 4) - if kwargs: - params = Common.getParams(**kwargs) - params.InitialSeed = 'C1OC1' - params.AtomCompareParameters.RingMatchesRingOnly = True - params.BondCompareParameters.RingMatchesRingOnly = True - r = rdFMCS.FindMCS(ms, params) - else: - r = rdFMCS.FindMCS(ms, seedSmarts='C1OC1', ringMatchesRingOnly=True) - self.assertEqual(r.smartsString, "[#6&R]1-&@[#6&R]-&@[#6&R]-&@[#6&R]-&@1") - self.assertEqual(r.numAtoms, 4) - self.assertEqual(r.numBonds, 4) - if kwargs: - params = Common.getParams(**kwargs) - params.InitialSeed = 'C1OC1' - params.BondCompareParameters.CompleteRingsOnly = True - r = rdFMCS.FindMCS(ms, params) - else: - r = rdFMCS.FindMCS(ms, seedSmarts='C1OC1', completeRingsOnly=True) - self.assertEqual(r.smartsString, "[#6]1-&@[#6]-&@[#6]-&@[#6]-&@1") - self.assertEqual(r.numAtoms, 4) - self.assertEqual(r.numBonds, 4) + def test6MatchValences(self, **kwargs): + ms = (Chem.MolFromSmiles('NC1OC1'), Chem.MolFromSmiles('C1OC1[N+](=O)[O-]')) + if kwargs: + params = Common.getParams(**kwargs) + mcs = rdFMCS.FindMCS(ms, params) + else: + mcs = rdFMCS.FindMCS(ms) + self.assertEqual(mcs.numBonds, 4) + self.assertEqual(mcs.numAtoms, 4) + if kwargs: + params = Common.getParams(**kwargs) + params.AtomCompareParameters.MatchValences = True + mcs = rdFMCS.FindMCS(ms, params) + else: + mcs = rdFMCS.FindMCS(ms, matchValences=True) + self.assertEqual(mcs.numBonds, 3) + self.assertEqual(mcs.numAtoms, 3) - def test8MatchParams(self, **kwargs): - smis = ("CCC1NC1", "CCC1N(C)C1", "CCC1OC1") - ms = [Chem.MolFromSmiles(x) for x in smis] + def test7Seed(self, **kwargs): + smis = ['C1CCC1CC1CC1', 'C1CCC1OC1CC1', 'C1CCC1NC1CC1', 'C1CCC1SC1CC1'] + ms = [Chem.MolFromSmiles(x) for x in smis] + if kwargs: + params = Common.getParams(**kwargs) + r = rdFMCS.FindMCS(ms, params) + else: + r = rdFMCS.FindMCS(ms) + self.assertEqual(r.smartsString, "[#6]1-[#6]-[#6]-[#6]-1") + if kwargs: + params = Common.getParams(**kwargs) + params.InitialSeed = 'C1CC1' + r = rdFMCS.FindMCS(ms, params) + else: + r = rdFMCS.FindMCS(ms, seedSmarts='C1CC1') + self.assertEqual(r.smartsString, "[#6]1-[#6]-[#6]-1") + if kwargs: + params = Common.getParams(**kwargs) + params.InitialSeed = 'C1OC1' + r = rdFMCS.FindMCS(ms, params) + else: + r = rdFMCS.FindMCS(ms, seedSmarts='C1OC1') + self.assertEqual(r.smartsString, "[#6]1-[#6]-[#6]-[#6]-1") + self.assertEqual(r.numAtoms, 4) + self.assertEqual(r.numBonds, 4) + if kwargs: + params = Common.getParams(**kwargs) + params.InitialSeed = 'C1OC1' + params.AtomCompareParameters.RingMatchesRingOnly = True + params.BondCompareParameters.RingMatchesRingOnly = True + r = rdFMCS.FindMCS(ms, params) + else: + r = rdFMCS.FindMCS(ms, seedSmarts='C1OC1', ringMatchesRingOnly=True) + self.assertEqual(r.smartsString, "[#6&R]1-&@[#6&R]-&@[#6&R]-&@[#6&R]-&@1") + self.assertEqual(r.numAtoms, 4) + self.assertEqual(r.numBonds, 4) + if kwargs: + params = Common.getParams(**kwargs) + params.InitialSeed = 'C1OC1' + params.BondCompareParameters.CompleteRingsOnly = True + r = rdFMCS.FindMCS(ms, params) + else: + r = rdFMCS.FindMCS(ms, seedSmarts='C1OC1', completeRingsOnly=True) + self.assertEqual(r.smartsString, "[#6]1-&@[#6]-&@[#6]-&@[#6]-&@1") + self.assertEqual(r.numAtoms, 4) + self.assertEqual(r.numBonds, 4) - if kwargs: - ps = Common.getParams(**kwargs) - mcs = rdFMCS.FindMCS(ms, ps) - else: - mcs = rdFMCS.FindMCS(ms) - self.assertEqual(mcs.numAtoms, 4) + def test8MatchParams(self, **kwargs): + smis = ("CCC1NC1", "CCC1N(C)C1", "CCC1OC1") + ms = [Chem.MolFromSmiles(x) for x in smis] - ps = Common.getParams(**kwargs) - ps.BondCompareParameters.CompleteRingsOnly = True - mcs = rdFMCS.FindMCS(ms, ps) - self.assertEqual(mcs.numAtoms, 3) + if kwargs: + ps = Common.getParams(**kwargs) + mcs = rdFMCS.FindMCS(ms, ps) + else: + mcs = rdFMCS.FindMCS(ms) + self.assertEqual(mcs.numAtoms, 4) - ps = Common.getParams(**kwargs) - ps.BondCompareParameters.CompleteRingsOnly = True - ps.BondCompareParameters.MatchFusedRings = True - mcs = rdFMCS.FindMCS(ms, ps) - self.assertEqual(mcs.numAtoms, 3) + ps = Common.getParams(**kwargs) + ps.BondCompareParameters.CompleteRingsOnly = True + mcs = rdFMCS.FindMCS(ms, ps) + self.assertEqual(mcs.numAtoms, 3) - ps = Common.getParams(**kwargs) - ps.BondCompareParameters.CompleteRingsOnly = True - ps.BondCompareParameters.MatchFusedRingsStrict = True - mcs = rdFMCS.FindMCS(ms, ps) - self.assertEqual(mcs.numAtoms, 3) + ps = Common.getParams(**kwargs) + ps.BondCompareParameters.CompleteRingsOnly = True + ps.BondCompareParameters.MatchFusedRings = True + mcs = rdFMCS.FindMCS(ms, ps) + self.assertEqual(mcs.numAtoms, 3) - ps = Common.getParams(**kwargs) - if kwargs: - ps.SetAtomTyper(CompareAny()) - else: - ps.SetAtomTyper(rdFMCS.AtomCompare.CompareAny) - mcs = rdFMCS.FindMCS(ms, ps) - self.assertEqual(mcs.numAtoms, 5) + ps = Common.getParams(**kwargs) + ps.BondCompareParameters.CompleteRingsOnly = True + ps.BondCompareParameters.MatchFusedRingsStrict = True + mcs = rdFMCS.FindMCS(ms, ps) + self.assertEqual(mcs.numAtoms, 3) - def test9MatchCharge(self, **kwargs): - smis = ("CCNC", "CCN(C)C", "CC[N+](C)C") - ms = [Chem.MolFromSmiles(x) for x in smis] + ps = Common.getParams(**kwargs) + if kwargs: + ps.SetAtomTyper(CompareAny()) + else: + ps.SetAtomTyper(rdFMCS.AtomCompare.CompareAny) + mcs = rdFMCS.FindMCS(ms, ps) + self.assertEqual(mcs.numAtoms, 5) - if kwargs: - ps = Common.getParams(**kwargs) - mcs = rdFMCS.FindMCS(ms, ps) - else: - mcs = rdFMCS.FindMCS(ms) + def test9MatchCharge(self, **kwargs): + smis = ("CCNC", "CCN(C)C", "CC[N+](C)C") + ms = [Chem.MolFromSmiles(x) for x in smis] - self.assertEqual(mcs.numAtoms, 4) + if kwargs: + ps = Common.getParams(**kwargs) + mcs = rdFMCS.FindMCS(ms, ps) + else: + mcs = rdFMCS.FindMCS(ms) - ps = Common.getParams(**kwargs) - ps.AtomCompareParameters.MatchFormalCharge = True - mcs = rdFMCS.FindMCS(ms, ps) - self.assertEqual(mcs.numAtoms, 2) + self.assertEqual(mcs.numAtoms, 4) - def test10MatchChargeAndParams(self, **kwargs): - smis = ("CCNC", "CCN(C)C", "CC[N+](C)C", "CC[C+](C)C") - ms = [Chem.MolFromSmiles(x) for x in smis] + ps = Common.getParams(**kwargs) + ps.AtomCompareParameters.MatchFormalCharge = True + mcs = rdFMCS.FindMCS(ms, ps) + self.assertEqual(mcs.numAtoms, 2) - if kwargs: - ps = Common.getParams(**kwargs) - mcs = rdFMCS.FindMCS(ms, ps) - else: - mcs = rdFMCS.FindMCS(ms) - self.assertEqual(mcs.numAtoms, 2) + def test10MatchChargeAndParams(self, **kwargs): + smis = ("CCNC", "CCN(C)C", "CC[N+](C)C", "CC[C+](C)C") + ms = [Chem.MolFromSmiles(x) for x in smis] - ps = Common.getParams(**kwargs) - if kwargs: - ps.AtomTyper = CompareAny() - else: - ps.AtomTyper = rdFMCS.AtomCompare.CompareAny - mcs = rdFMCS.FindMCS(ms, ps) - self.assertEqual(mcs.numAtoms, 4) + if kwargs: + ps = Common.getParams(**kwargs) + mcs = rdFMCS.FindMCS(ms, ps) + else: + mcs = rdFMCS.FindMCS(ms) + self.assertEqual(mcs.numAtoms, 2) - ps = Common.getParams(**kwargs) - ps.AtomCompareParameters.MatchFormalCharge = True - mcs = rdFMCS.FindMCS(ms, ps) - self.assertEqual(mcs.numAtoms, 2) + ps = Common.getParams(**kwargs) + if kwargs: + ps.AtomTyper = CompareAny() + else: + ps.AtomTyper = rdFMCS.AtomCompare.CompareAny + mcs = rdFMCS.FindMCS(ms, ps) + self.assertEqual(mcs.numAtoms, 4) - def test11Github2034(self, **kwargs): - smis = ("C1CC1N2CC2", "C1CC1N") - ms = [Chem.MolFromSmiles(x) for x in smis] + ps = Common.getParams(**kwargs) + ps.AtomCompareParameters.MatchFormalCharge = True + mcs = rdFMCS.FindMCS(ms, ps) + self.assertEqual(mcs.numAtoms, 2) - if kwargs: - ps = Common.getParams(**kwargs) - mcs = rdFMCS.FindMCS(ms, ps) - else: - mcs = rdFMCS.FindMCS(ms) - self.assertEqual(mcs.numAtoms, 4) - self.assertEqual(mcs.numBonds, 4) + def test11Github2034(self, **kwargs): + smis = ("C1CC1N2CC2", "C1CC1N") + ms = [Chem.MolFromSmiles(x) for x in smis] - if kwargs: - ps = Common.getParams(**kwargs) - ps.AtomCompareParameters.RingMatchesRingOnly = True - ps.BondCompareParameters.RingMatchesRingOnly = True - mcs = rdFMCS.FindMCS(ms, ps) - else: - mcs = rdFMCS.FindMCS(ms, ringMatchesRingOnly=True) - self.assertEqual(mcs.numAtoms, 3) - self.assertEqual(mcs.numBonds, 3) + if kwargs: + ps = Common.getParams(**kwargs) + mcs = rdFMCS.FindMCS(ms, ps) + else: + mcs = rdFMCS.FindMCS(ms) + self.assertEqual(mcs.numAtoms, 4) + self.assertEqual(mcs.numBonds, 4) - ps = Common.getParams(**kwargs) - ps.AtomCompareParameters.RingMatchesRingOnly = True - mcs = rdFMCS.FindMCS(ms, ps) - self.assertEqual(mcs.numAtoms, 3) - self.assertEqual(mcs.numBonds, 3) + if kwargs: + ps = Common.getParams(**kwargs) + ps.AtomCompareParameters.RingMatchesRingOnly = True + ps.BondCompareParameters.RingMatchesRingOnly = True + mcs = rdFMCS.FindMCS(ms, ps) + else: + mcs = rdFMCS.FindMCS(ms, ringMatchesRingOnly=True) + self.assertEqual(mcs.numAtoms, 3) + self.assertEqual(mcs.numBonds, 3) - def test19MCS3d(self, **kwargs): - block1 = """ + ps = Common.getParams(**kwargs) + ps.AtomCompareParameters.RingMatchesRingOnly = True + mcs = rdFMCS.FindMCS(ms, ps) + self.assertEqual(mcs.numAtoms, 3) + self.assertEqual(mcs.numBonds, 3) + + def test19MCS3d(self, **kwargs): + block1 = """ RDKit 3D 17 17 0 0 0 0 0 0 0 0999 V2000 @@ -736,7 +765,7 @@ M END """ - block2 = """ + block2 = """ RDKit 3D 17 17 0 0 0 0 0 0 0 0999 V2000 @@ -778,385 +807,344 @@ M END """ - m1 = Chem.MolFromMolBlock(block1, removeHs=False) - m2 = Chem.MolFromMolBlock(block2, removeHs=False) - ps = Common.getParams(**kwargs) - ps.AtomCompareParameters.MaxDistance = 1.0 - mcs = rdFMCS.FindMCS([m1, m2], ps) - self.assertEqual(mcs.numAtoms, 14) - self.assertEqual(mcs.numBonds, 14) + m1 = Chem.MolFromMolBlock(block1, removeHs=False) + m2 = Chem.MolFromMolBlock(block2, removeHs=False) + ps = Common.getParams(**kwargs) + ps.AtomCompareParameters.MaxDistance = 1.0 + mcs = rdFMCS.FindMCS([m1, m2], ps) + self.assertEqual(mcs.numAtoms, 14) + self.assertEqual(mcs.numBonds, 14) class TestCase(unittest.TestCase): - def setUp(self): - pass - - def test1(self): - Common.test1(self) - - def test1PythonImpl(self): - Common.test1(self, - AtomTyper=CompareElements, - BondTyper=CompareOrder) - - # DEPRECATED: remove from here in release 2021.01 - def test1PythonImplDeprecated(self): - atom_call = CompareElements.__call__ - setattr(CompareElements, "compare", CompareElements.__call__) - delattr(CompareElements, "__call__") - bond_call = CompareOrder.__call__ - setattr(CompareOrder, "compare", CompareOrder.__call__) - delattr(CompareOrder, "__call__") - Common.test1(self, - AtomTyper=CompareElements, - BondTyper=CompareOrder) - setattr(CompareElements, "__call__", atom_call) - delattr(CompareElements, "compare") - setattr(CompareOrder, "__call__", bond_call) - delattr(CompareOrder, "compare") - - def test1PythonImplDeprecatedTypo(self): - atom_call = CompareElements.__call__ - setattr(CompareElements, "comparx", CompareElements.__call__) - delattr(CompareElements, "__call__") - bond_call = CompareOrder.__call__ - setattr(CompareOrder, "comparx", CompareOrder.__call__) - delattr(CompareOrder, "__call__") - self.assertRaises(TypeError, lambda self: Common.test1(self, - AtomTyper=CompareElements, - BondTyper=CompareOrder)) - setattr(CompareElements, "__call__", atom_call) - delattr(CompareElements, "comparx") - setattr(CompareOrder, "__call__", bond_call) - delattr(CompareOrder, "comparx") - # DEPRECATED: remove until here in release 2021.01 - - def test2(self): - Common.test2(self) - - def test2PythonImpl(self): - Common.test2(self, - AtomTyper=CompareElements, - BondTyper=CompareOrder) - - def test2PythonImplAtomTyperOnly(self): - Common.test2(self, - AtomTyper=CompareElements) - - def test2PythonImplBondTyperOnly(self): - Common.test2(self, - BondTyper=CompareOrder) - - def test3IsotopeMatch(self): - Common.test3IsotopeMatch(self) - - def test3IsotopeMatchPythonImpl(self): - Common.test3IsotopeMatch(self, - AtomTyper=CompareElements, - BondTyper=CompareOrder) - - def test3IsotopeMatchPythonImplAtomTyperOnly(self): - Common.test3IsotopeMatch(self, - AtomTyper=CompareElements) - - def test3IsotopeMatchPythonImplBondTyperOnly(self): - Common.test3IsotopeMatch(self, - BondTyper=CompareOrder) - - def test4RingMatches(self): - Common.test4RingMatches(self) - - def test4RingMatchesPythonImpl(self): - Common.test4RingMatches(self, - AtomTyper=CompareElements, - BondTyper=CompareOrder) - - def test4RingMatchesPythonImplAtomTyperOnly(self): - Common.test4RingMatches(self, - AtomTyper=CompareElements) - - def test4RingMatchesPythonImplBondTyperOnly(self): - Common.test4RingMatches(self, - BondTyper=CompareOrder) - - def test5AnyMatch(self): - Common.test5AnyMatch(self) - - def test5AnyMatchPythonImpl(self): - Common.test5AnyMatch(self, - AtomTyper=CompareElements, - BondTyper=CompareOrder) - - def test5AnyMatchPythonImplAtomTyperOnly(self): - Common.test5AnyMatch(self, - AtomTyper=CompareElements) - - def test5AnyMatchPythonImplBondTyperOnly(self): - Common.test5AnyMatch(self, - BondTyper=CompareOrder) - - def testAtomCompareAnyHeavyAtom(self): - Common.testAtomCompareAnyHeavyAtom(self) - - def testAtomCompareAnyHeavyAtomPythonImpl(self): - Common.testAtomCompareAnyHeavyAtom(self, - AtomTyper=CompareElements, - BondTyper=CompareOrder) - - def testAtomCompareAnyHeavyAtomPythonImplAtomTyperOnly(self): - Common.testAtomCompareAnyHeavyAtom(self, - AtomTyper=CompareElements) - - def testAtomCompareAnyHeavyAtomPythonImplBondTyperOnly(self): - Common.testAtomCompareAnyHeavyAtom(self, - BondTyper=CompareOrder) - - def testAtomCompareAnyHeavyAtom1(self): - Common.testAtomCompareAnyHeavyAtom1(self) - - def testAtomCompareAnyHeavyAtom1PythonImpl(self): - Common.testAtomCompareAnyHeavyAtom1(self, - AtomTyper=CompareElements, - BondTyper=CompareOrder) - - def testAtomCompareAnyHeavyAtom1PythonImplAtomTyperOnly(self): - Common.testAtomCompareAnyHeavyAtom1(self, - AtomTyper=CompareElements) - - def testAtomCompareAnyHeavyAtom1PythonImplBondTyperOnly(self): - Common.testAtomCompareAnyHeavyAtom1(self, - BondTyper=CompareOrder) - - def test6MatchValences(self): - Common.test6MatchValences(self) - - def test6MatchValencesPythonImpl(self): - Common.test6MatchValences(self, - AtomTyper=CompareElements, - BondTyper=CompareOrder) - - def test6MatchValencesPythonImplAtomTyperOnly(self): - Common.test6MatchValences(self, - AtomTyper=CompareElements) - - def test6MatchValencesPythonImplBondTyperOnly(self): - Common.test6MatchValences(self, - BondTyper=CompareOrder) - - def test7Seed(self): - Common.test7Seed(self) - - def test7SeedPythonImpl(self): - Common.test7Seed(self, - AtomTyper=CompareElements, - BondTyper=CompareOrder) - - def test7SeedPythonImplAtomTyperOnly(self): - Common.test7Seed(self, - AtomTyper=CompareElements) - - def test7SeedPythonImplBondTyperOnly(self): - Common.test7Seed(self, - BondTyper=CompareOrder) - - def test8MatchParams(self): - Common.test8MatchParams(self) - - def test8MatchParamsPythonImpl(self): - Common.test8MatchParams(self, - AtomTyper=CompareElements, - BondTyper=CompareOrder) - - def test8MatchParamsPythonImplAtomTyperOnly(self): - Common.test8MatchParams(self, - AtomTyper=CompareElements) - - def test8MatchParamsPythonImplBondTyperOnly(self): - Common.test8MatchParams(self, - BondTyper=CompareOrder) - - def test9MatchCharge(self): - Common.test9MatchCharge(self) - - def test9MatchChargePythonImpl(self): - Common.test9MatchCharge(self, - AtomTyper=CompareElements, - BondTyper=CompareOrder) - - def test9MatchChargePythonImplAtomTyperOnly(self): - Common.test9MatchCharge(self, - AtomTyper=CompareElements) - - def test9MatchChargePythonImplBondTyperOnly(self): - Common.test9MatchCharge(self, - BondTyper=CompareOrder) - - def test10MatchChargeAndParams(self): - Common.test10MatchChargeAndParams(self) - - def test10MatchChargeAndParamsPythonImpl(self): - Common.test10MatchChargeAndParams(self, - AtomTyper=CompareElements, - BondTyper=CompareOrder) - - def test10MatchChargeAndParamsPythonImplAtomTyperOnly(self): - Common.test10MatchChargeAndParams(self, - AtomTyper=CompareElements) - - def test10MatchChargeAndParamsPythonImplBondTyperOnly(self): - Common.test10MatchChargeAndParams(self, - BondTyper=CompareOrder) - - def test11Github2034(self): - Common.test11Github2034(self) - - def test11Github2034PythonImpl(self): - Common.test11Github2034(self, - AtomTyper=CompareElements, - BondTyper=CompareOrder) - - def test11Github2034PythonImplAtomTyperOnly(self): - Common.test11Github2034(self, - AtomTyper=CompareElements) - - def test11Github2034PythonImplBondTyperOnly(self): - Common.test11Github2034(self, - BondTyper=CompareOrder) - - def test12MCSAtomCompareExceptions(self): - ps = rdFMCS.MCSParameters() - smis = ['CCCCC', 'CCC1CCCCC1'] - ms = [Chem.MolFromSmiles(x) for x in smis] - self.assertRaises(TypeError, lambda ps: setattr(ps, "AtomTyper", - AtomCompareCompareIsInt())) - self.assertRaises(TypeError, lambda ps: setattr(ps, "AtomTyper", - AtomCompareNoCompare())) - - def test13MCSAtomCompareUserData(self): - smis = ['CCOCCOC', 'CCNCCCC'] - ms = [Chem.MolFromSmiles(x) for x in smis] - ps = rdFMCS.MCSParameters() - ps.AtomTyper = AtomCompareUserData() - ps.AtomTyper.setMatchAnyHet(False) - mcs = rdFMCS.FindMCS(ms, ps) - self.assertEqual(mcs.numAtoms, 2) - ps.AtomTyper.setMatchAnyHet(True) - mcs = rdFMCS.FindMCS(ms, ps) - self.assertEqual(mcs.numAtoms, 5) - - def test14MCSBondCompareExceptions(self): - ps = rdFMCS.MCSParameters() - smis = ['CCCCC', 'CCC1CCCCC1'] - ms = [Chem.MolFromSmiles(x) for x in smis] - self.assertRaises(TypeError, lambda ps: setattr(ps, "BondTyper", - BondCompareCompareIsInt())) - self.assertRaises(TypeError, lambda ps: setattr(ps, "BondTyper", - BondCompareNoCompare())) - - def test15MCSBondCompareUserData(self): - smis = ['C1CC=CCC1', 'c1ccccc1'] - ms = [Chem.MolFromSmiles(x) for x in smis] - ps = rdFMCS.MCSParameters() - ps.BondTyper = BondCompareUserData() - ps.BondCompareParameters.CompleteRingsOnly = True - ps.BondTyper.setIgnoreAromatization(False) - mcs = rdFMCS.FindMCS(ms, ps) - self.assertEqual(mcs.numAtoms, 0) - ps.BondTyper.setIgnoreAromatization(True) - mcs = rdFMCS.FindMCS(ms, ps) - self.assertEqual(mcs.numAtoms, 6) - - def test16MCSProgressCallbackExceptions(self): - ps = rdFMCS.MCSParameters() - smis = ['CCCC(C)CC(CC)CC', 'OC(N)CC(C)CC(CC)CC'] - ms = [Chem.MolFromSmiles(x) for x in smis] - self.assertRaises(TypeError, lambda ps: setattr(ps, "ProgressCallback", - ProgressCallbackCallbackIsInt())) - self.assertRaises(TypeError, lambda ps: setattr(ps, "ProgressCallback", - ProgressCallbackNoCallback())) - - def test17MCSProgressCallbackCancel(self): - ps = rdFMCS.MCSParameters() - smis = ['CCCC(C)CC(CC)CC', 'OC(N)CC(C)CC(CC)CC'] - ms = [Chem.MolFromSmiles(x) for x in smis] - ps.AtomTyper = CompareElements() - ps.ProgressCallback = ProgressCallback(self) - mcs = rdFMCS.FindMCS(ms, ps) - self.assertTrue(mcs.canceled) - self.assertEqual(ps.ProgressCallback.callCount, 3) - - # DEPRECATED: remove from here in release 2021.01 - def test17MCSProgressCallbackCancelDeprecated(self): - callback = ProgressCallback.__call__ - setattr(ProgressCallback, "callback", ProgressCallback.__call__) - delattr(ProgressCallback, "__call__") - self.test17MCSProgressCallbackCancel() - setattr(ProgressCallback, "__call__", callback) - delattr(ProgressCallback, "callback") - - def test17MCSProgressCallbackCancelDeprecatedTypo(self): - callback = ProgressCallback.__call__ - setattr(ProgressCallback, "callbacx", ProgressCallback.__call__) - delattr(ProgressCallback, "__call__") - self.assertRaises(TypeError, self.test17MCSProgressCallbackCancel) - setattr(ProgressCallback, "__call__", callback) - delattr(ProgressCallback, "callbacx") - # DEPRECATED: remove until here in release 2021.01 - - def test18GitHub3693(self): - mols = [Chem.MolFromSmiles(smi) for smi in [ - "Nc1ccc(O)cc1c1ccc2ccccc2c1", "Oc1cnc(NC2CCC2)c(c1)c1ccc2ccccc2c1"]] - params = rdFMCS.MCSParameters() - res = rdFMCS.FindMCS(mols, params) - self.assertEqual(res.numAtoms, 17) - self.assertEqual(res.numBonds, 18) - self.assertEqual(res.smartsString, "[#7]-,:[#6]:[#6](:[#6]:[#6](:[#6])-[#8])-[#6]1:[#6]:[#6]:[#6]2:[#6](:[#6]:1):[#6]:[#6]:[#6]:[#6]:2") - - params = rdFMCS.MCSParameters() - params.BondCompareParameters.CompleteRingsOnly = True - res = rdFMCS.FindMCS(mols, params) - self.assertEqual(res.numAtoms, 11) - self.assertEqual(res.numBonds, 12) - self.assertEqual(res.smartsString, "[#6]-&!@[#6]1:&@[#6]:&@[#6]:&@[#6]2:&@[#6](:&@[#6]:&@1):&@[#6]:&@[#6]:&@[#6]:&@[#6]:&@2") - - params = rdFMCS.MCSParameters() - params.AtomCompareParameters.CompleteRingsOnly = True - params.BondCompareParameters.CompleteRingsOnly = True - res = rdFMCS.FindMCS(mols, params) - self.assertEqual(res.numAtoms, 10) - self.assertEqual(res.numBonds, 11) - self.assertEqual(res.smartsString, "[#6&R]1:&@[#6&R]:&@[#6&R]:&@[#6&R]2:&@[#6&R](:&@[#6&R]:&@1):&@[#6&R]:&@[#6&R]:&@[#6&R]:&@[#6&R]:&@2") - - params = rdFMCS.MCSParameters() - params.AtomCompareParameters.CompleteRingsOnly = True - # this will automatically be set to True - params.BondCompareParameters.CompleteRingsOnly = False - res = rdFMCS.FindMCS(mols, params) - self.assertEqual(res.numAtoms, 10) - self.assertEqual(res.numBonds, 11) - self.assertEqual(res.smartsString, "[#6&R]1:&@[#6&R]:&@[#6&R]:&@[#6&R]2:&@[#6&R](:&@[#6&R]:&@1):&@[#6&R]:&@[#6&R]:&@[#6&R]:&@[#6&R]:&@2") - - def test19MCS3d(self): - Common.test19MCS3d(self) - - def test20AtomCompareCompleteRingsOnly(self): - mols = [Chem.MolFromSmiles(smi) for smi in ["C1CCCC1C", "C1CCCC1C1CCCCC1"]] - params = rdFMCS.MCSParameters() - params.AtomCompareParameters.CompleteRingsOnly = True - res = rdFMCS.FindMCS(mols, params) - self.assertEqual(res.numAtoms, 5) - self.assertEqual(res.numBonds, 5) - self.assertEqual(res.smartsString, "[#6&R]1-&@[#6&R]-&@[#6&R]-&@[#6&R]-&@[#6&R]-&@1") - - params = rdFMCS.MCSParameters() - params.AtomCompareParameters.CompleteRingsOnly = True - # this will automatically be set to True - params.BondCompareParameters.CompleteRingsOnly = False - res = rdFMCS.FindMCS(mols, params) - self.assertEqual(res.numAtoms, 5) - self.assertEqual(res.numBonds, 5) - self.assertEqual(res.smartsString, "[#6&R]1-&@[#6&R]-&@[#6&R]-&@[#6&R]-&@[#6&R]-&@1") + def setUp(self): + pass + + def test1(self): + Common.test1(self) + + def test1PythonImpl(self): + Common.test1(self, AtomTyper=CompareElements, BondTyper=CompareOrder) + + # DEPRECATED: remove from here in release 2021.01 + def test1PythonImplDeprecated(self): + atom_call = CompareElements.__call__ + setattr(CompareElements, "compare", CompareElements.__call__) + delattr(CompareElements, "__call__") + bond_call = CompareOrder.__call__ + setattr(CompareOrder, "compare", CompareOrder.__call__) + delattr(CompareOrder, "__call__") + Common.test1(self, AtomTyper=CompareElements, BondTyper=CompareOrder) + setattr(CompareElements, "__call__", atom_call) + delattr(CompareElements, "compare") + setattr(CompareOrder, "__call__", bond_call) + delattr(CompareOrder, "compare") + + def test1PythonImplDeprecatedTypo(self): + atom_call = CompareElements.__call__ + setattr(CompareElements, "comparx", CompareElements.__call__) + delattr(CompareElements, "__call__") + bond_call = CompareOrder.__call__ + setattr(CompareOrder, "comparx", CompareOrder.__call__) + delattr(CompareOrder, "__call__") + self.assertRaises( + TypeError, lambda self: Common.test1(self, AtomTyper=CompareElements, BondTyper=CompareOrder)) + setattr(CompareElements, "__call__", atom_call) + delattr(CompareElements, "comparx") + setattr(CompareOrder, "__call__", bond_call) + delattr(CompareOrder, "comparx") + + # DEPRECATED: remove until here in release 2021.01 + + def test2(self): + Common.test2(self) + + def test2PythonImpl(self): + Common.test2(self, AtomTyper=CompareElements, BondTyper=CompareOrder) + + def test2PythonImplAtomTyperOnly(self): + Common.test2(self, AtomTyper=CompareElements) + + def test2PythonImplBondTyperOnly(self): + Common.test2(self, BondTyper=CompareOrder) + + def test3IsotopeMatch(self): + Common.test3IsotopeMatch(self) + + def test3IsotopeMatchPythonImpl(self): + Common.test3IsotopeMatch(self, AtomTyper=CompareElements, BondTyper=CompareOrder) + + def test3IsotopeMatchPythonImplAtomTyperOnly(self): + Common.test3IsotopeMatch(self, AtomTyper=CompareElements) + + def test3IsotopeMatchPythonImplBondTyperOnly(self): + Common.test3IsotopeMatch(self, BondTyper=CompareOrder) + + def test4RingMatches(self): + Common.test4RingMatches(self) + + def test4RingMatchesPythonImpl(self): + Common.test4RingMatches(self, AtomTyper=CompareElements, BondTyper=CompareOrder) + + def test4RingMatchesPythonImplAtomTyperOnly(self): + Common.test4RingMatches(self, AtomTyper=CompareElements) + + def test4RingMatchesPythonImplBondTyperOnly(self): + Common.test4RingMatches(self, BondTyper=CompareOrder) + + def test5AnyMatch(self): + Common.test5AnyMatch(self) + + def test5AnyMatchPythonImpl(self): + Common.test5AnyMatch(self, AtomTyper=CompareElements, BondTyper=CompareOrder) + + def test5AnyMatchPythonImplAtomTyperOnly(self): + Common.test5AnyMatch(self, AtomTyper=CompareElements) + + def test5AnyMatchPythonImplBondTyperOnly(self): + Common.test5AnyMatch(self, BondTyper=CompareOrder) + + def testAtomCompareAnyHeavyAtom(self): + Common.testAtomCompareAnyHeavyAtom(self) + + def testAtomCompareAnyHeavyAtomPythonImpl(self): + Common.testAtomCompareAnyHeavyAtom(self, AtomTyper=CompareElements, BondTyper=CompareOrder) + + def testAtomCompareAnyHeavyAtomPythonImplAtomTyperOnly(self): + Common.testAtomCompareAnyHeavyAtom(self, AtomTyper=CompareElements) + + def testAtomCompareAnyHeavyAtomPythonImplBondTyperOnly(self): + Common.testAtomCompareAnyHeavyAtom(self, BondTyper=CompareOrder) + + def testAtomCompareAnyHeavyAtom1(self): + Common.testAtomCompareAnyHeavyAtom1(self) + + def testAtomCompareAnyHeavyAtom1PythonImpl(self): + Common.testAtomCompareAnyHeavyAtom1(self, AtomTyper=CompareElements, BondTyper=CompareOrder) + + def testAtomCompareAnyHeavyAtom1PythonImplAtomTyperOnly(self): + Common.testAtomCompareAnyHeavyAtom1(self, AtomTyper=CompareElements) + + def testAtomCompareAnyHeavyAtom1PythonImplBondTyperOnly(self): + Common.testAtomCompareAnyHeavyAtom1(self, BondTyper=CompareOrder) + + def test6MatchValences(self): + Common.test6MatchValences(self) + + def test6MatchValencesPythonImpl(self): + Common.test6MatchValences(self, AtomTyper=CompareElements, BondTyper=CompareOrder) + + def test6MatchValencesPythonImplAtomTyperOnly(self): + Common.test6MatchValences(self, AtomTyper=CompareElements) + + def test6MatchValencesPythonImplBondTyperOnly(self): + Common.test6MatchValences(self, BondTyper=CompareOrder) + + def test7Seed(self): + Common.test7Seed(self) + + def test7SeedPythonImpl(self): + Common.test7Seed(self, AtomTyper=CompareElements, BondTyper=CompareOrder) + + def test7SeedPythonImplAtomTyperOnly(self): + Common.test7Seed(self, AtomTyper=CompareElements) + + def test7SeedPythonImplBondTyperOnly(self): + Common.test7Seed(self, BondTyper=CompareOrder) + + def test8MatchParams(self): + Common.test8MatchParams(self) + + def test8MatchParamsPythonImpl(self): + Common.test8MatchParams(self, AtomTyper=CompareElements, BondTyper=CompareOrder) + + def test8MatchParamsPythonImplAtomTyperOnly(self): + Common.test8MatchParams(self, AtomTyper=CompareElements) + + def test8MatchParamsPythonImplBondTyperOnly(self): + Common.test8MatchParams(self, BondTyper=CompareOrder) + + def test9MatchCharge(self): + Common.test9MatchCharge(self) + + def test9MatchChargePythonImpl(self): + Common.test9MatchCharge(self, AtomTyper=CompareElements, BondTyper=CompareOrder) + + def test9MatchChargePythonImplAtomTyperOnly(self): + Common.test9MatchCharge(self, AtomTyper=CompareElements) + + def test9MatchChargePythonImplBondTyperOnly(self): + Common.test9MatchCharge(self, BondTyper=CompareOrder) + + def test10MatchChargeAndParams(self): + Common.test10MatchChargeAndParams(self) + + def test10MatchChargeAndParamsPythonImpl(self): + Common.test10MatchChargeAndParams(self, AtomTyper=CompareElements, BondTyper=CompareOrder) + + def test10MatchChargeAndParamsPythonImplAtomTyperOnly(self): + Common.test10MatchChargeAndParams(self, AtomTyper=CompareElements) + + def test10MatchChargeAndParamsPythonImplBondTyperOnly(self): + Common.test10MatchChargeAndParams(self, BondTyper=CompareOrder) + + def test11Github2034(self): + Common.test11Github2034(self) + + def test11Github2034PythonImpl(self): + Common.test11Github2034(self, AtomTyper=CompareElements, BondTyper=CompareOrder) + + def test11Github2034PythonImplAtomTyperOnly(self): + Common.test11Github2034(self, AtomTyper=CompareElements) + + def test11Github2034PythonImplBondTyperOnly(self): + Common.test11Github2034(self, BondTyper=CompareOrder) + + def test12MCSAtomCompareExceptions(self): + ps = rdFMCS.MCSParameters() + smis = ['CCCCC', 'CCC1CCCCC1'] + ms = [Chem.MolFromSmiles(x) for x in smis] + self.assertRaises(TypeError, lambda ps: setattr(ps, "AtomTyper", AtomCompareCompareIsInt())) + self.assertRaises(TypeError, lambda ps: setattr(ps, "AtomTyper", AtomCompareNoCompare())) + + def test13MCSAtomCompareUserData(self): + smis = ['CCOCCOC', 'CCNCCCC'] + ms = [Chem.MolFromSmiles(x) for x in smis] + ps = rdFMCS.MCSParameters() + ps.AtomTyper = AtomCompareUserData() + ps.AtomTyper.setMatchAnyHet(False) + mcs = rdFMCS.FindMCS(ms, ps) + self.assertEqual(mcs.numAtoms, 2) + ps.AtomTyper.setMatchAnyHet(True) + mcs = rdFMCS.FindMCS(ms, ps) + self.assertEqual(mcs.numAtoms, 5) + + def test14MCSBondCompareExceptions(self): + ps = rdFMCS.MCSParameters() + smis = ['CCCCC', 'CCC1CCCCC1'] + ms = [Chem.MolFromSmiles(x) for x in smis] + self.assertRaises(TypeError, lambda ps: setattr(ps, "BondTyper", BondCompareCompareIsInt())) + self.assertRaises(TypeError, lambda ps: setattr(ps, "BondTyper", BondCompareNoCompare())) + + def test15MCSBondCompareUserData(self): + smis = ['C1CC=CCC1', 'c1ccccc1'] + ms = [Chem.MolFromSmiles(x) for x in smis] + ps = rdFMCS.MCSParameters() + ps.BondTyper = BondCompareUserData() + ps.BondCompareParameters.CompleteRingsOnly = True + ps.BondTyper.setIgnoreAromatization(False) + mcs = rdFMCS.FindMCS(ms, ps) + self.assertEqual(mcs.numAtoms, 0) + ps.BondTyper.setIgnoreAromatization(True) + mcs = rdFMCS.FindMCS(ms, ps) + self.assertEqual(mcs.numAtoms, 6) + + def test16MCSProgressCallbackExceptions(self): + ps = rdFMCS.MCSParameters() + smis = ['CCCC(C)CC(CC)CC', 'OC(N)CC(C)CC(CC)CC'] + ms = [Chem.MolFromSmiles(x) for x in smis] + self.assertRaises(TypeError, + lambda ps: setattr(ps, "ProgressCallback", ProgressCallbackCallbackIsInt())) + self.assertRaises(TypeError, + lambda ps: setattr(ps, "ProgressCallback", ProgressCallbackNoCallback())) + + def test17MCSProgressCallbackCancel(self): + ps = rdFMCS.MCSParameters() + smis = ['CCCC(C)CC(CC)CC', 'OC(N)CC(C)CC(CC)CC'] + ms = [Chem.MolFromSmiles(x) for x in smis] + ps.AtomTyper = CompareElements() + ps.ProgressCallback = ProgressCallback(self) + mcs = rdFMCS.FindMCS(ms, ps) + self.assertTrue(mcs.canceled) + self.assertEqual(ps.ProgressCallback.callCount, 3) + + # DEPRECATED: remove from here in release 2021.01 + def test17MCSProgressCallbackCancelDeprecated(self): + callback = ProgressCallback.__call__ + setattr(ProgressCallback, "callback", ProgressCallback.__call__) + delattr(ProgressCallback, "__call__") + self.test17MCSProgressCallbackCancel() + setattr(ProgressCallback, "__call__", callback) + delattr(ProgressCallback, "callback") + + def test17MCSProgressCallbackCancelDeprecatedTypo(self): + callback = ProgressCallback.__call__ + setattr(ProgressCallback, "callbacx", ProgressCallback.__call__) + delattr(ProgressCallback, "__call__") + self.assertRaises(TypeError, self.test17MCSProgressCallbackCancel) + setattr(ProgressCallback, "__call__", callback) + delattr(ProgressCallback, "callbacx") + + # DEPRECATED: remove until here in release 2021.01 + + def test18GitHub3693(self): + mols = [ + Chem.MolFromSmiles(smi) + for smi in ["Nc1ccc(O)cc1c1ccc2ccccc2c1", "Oc1cnc(NC2CCC2)c(c1)c1ccc2ccccc2c1"] + ] + params = rdFMCS.MCSParameters() + res = rdFMCS.FindMCS(mols, params) + self.assertEqual(res.numAtoms, 17) + self.assertEqual(res.numBonds, 18) + self.assertEqual( + res.smartsString, + "[#7]-,:[#6]:[#6](:[#6]:[#6](:[#6])-[#8])-[#6]1:[#6]:[#6]:[#6]2:[#6](:[#6]:1):[#6]:[#6]:[#6]:[#6]:2" + ) + + params = rdFMCS.MCSParameters() + params.BondCompareParameters.CompleteRingsOnly = True + res = rdFMCS.FindMCS(mols, params) + self.assertEqual(res.numAtoms, 11) + self.assertEqual(res.numBonds, 12) + self.assertEqual( + res.smartsString, + "[#6]-&!@[#6]1:&@[#6]:&@[#6]:&@[#6]2:&@[#6](:&@[#6]:&@1):&@[#6]:&@[#6]:&@[#6]:&@[#6]:&@2") + + params = rdFMCS.MCSParameters() + params.AtomCompareParameters.CompleteRingsOnly = True + params.BondCompareParameters.CompleteRingsOnly = True + res = rdFMCS.FindMCS(mols, params) + self.assertEqual(res.numAtoms, 10) + self.assertEqual(res.numBonds, 11) + self.assertEqual( + res.smartsString, + "[#6&R]1:&@[#6&R]:&@[#6&R]:&@[#6&R]2:&@[#6&R](:&@[#6&R]:&@1):&@[#6&R]:&@[#6&R]:&@[#6&R]:&@[#6&R]:&@2" + ) + + params = rdFMCS.MCSParameters() + params.AtomCompareParameters.CompleteRingsOnly = True + # this will automatically be set to True + params.BondCompareParameters.CompleteRingsOnly = False + res = rdFMCS.FindMCS(mols, params) + self.assertEqual(res.numAtoms, 10) + self.assertEqual(res.numBonds, 11) + self.assertEqual( + res.smartsString, + "[#6&R]1:&@[#6&R]:&@[#6&R]:&@[#6&R]2:&@[#6&R](:&@[#6&R]:&@1):&@[#6&R]:&@[#6&R]:&@[#6&R]:&@[#6&R]:&@2" + ) + + def test19MCS3d(self): + Common.test19MCS3d(self) + + def test20AtomCompareCompleteRingsOnly(self): + mols = [Chem.MolFromSmiles(smi) for smi in ["C1CCCC1C", "C1CCCC1C1CCCCC1"]] + params = rdFMCS.MCSParameters() + params.AtomCompareParameters.CompleteRingsOnly = True + res = rdFMCS.FindMCS(mols, params) + self.assertEqual(res.numAtoms, 5) + self.assertEqual(res.numBonds, 5) + self.assertEqual(res.smartsString, "[#6&R]1-&@[#6&R]-&@[#6&R]-&@[#6&R]-&@[#6&R]-&@1") + + params = rdFMCS.MCSParameters() + params.AtomCompareParameters.CompleteRingsOnly = True + # this will automatically be set to True + params.BondCompareParameters.CompleteRingsOnly = False + res = rdFMCS.FindMCS(mols, params) + self.assertEqual(res.numAtoms, 5) + self.assertEqual(res.numBonds, 5) + self.assertEqual(res.smartsString, "[#6&R]1-&@[#6&R]-&@[#6&R]-&@[#6&R]-&@[#6&R]-&@1") + if __name__ == "__main__": - unittest.main() + unittest.main() diff --git a/Code/GraphMol/FileParsers/mol.py b/Code/GraphMol/FileParsers/mol.py index 1178ae526..1b0017de1 100755 --- a/Code/GraphMol/FileParsers/mol.py +++ b/Code/GraphMol/FileParsers/mol.py @@ -1,4 +1,3 @@ - from Chem import rdmol from Chem.rdmol import Atom, Bond, Mol diff --git a/Code/GraphMol/FilterCatalog/Wrap/rough_test.py b/Code/GraphMol/FilterCatalog/Wrap/rough_test.py index 2858efaa5..71b302215 100644 --- a/Code/GraphMol/FilterCatalog/Wrap/rough_test.py +++ b/Code/GraphMol/FilterCatalog/Wrap/rough_test.py @@ -34,19 +34,18 @@ it is intended to be shallow but broad. """ - -import doctest, unittest, os +import doctest +import os import pickle +import unittest + from rdkit import RDConfig from rdkit.RDLogger import logger + logger = logger() -from rdkit import Chem -from rdkit import rdBase -from rdkit.Chem import rdfiltercatalog -from rdkit.Chem import FilterCatalog, rdMolDescriptors -from rdkit.Chem.FilterCatalog import FilterCatalogParams -from rdkit.Chem.FilterCatalog import FilterMatchOps -from rdkit import DataStructs +from rdkit import Chem, DataStructs, rdBase +from rdkit.Chem import FilterCatalog, rdfiltercatalog, rdMolDescriptors +from rdkit.Chem.FilterCatalog import FilterCatalogParams, FilterMatchOps def load_tests(loader, tests, ignore): @@ -124,9 +123,9 @@ class TestCase(unittest.TestCase): print(not_match) def test2FilterCatalogTest(self): - tests = ((FilterCatalogParams.FilterCatalogs.PAINS_A, 16), - (FilterCatalogParams.FilterCatalogs.PAINS_B, 55), - (FilterCatalogParams.FilterCatalogs.PAINS_C, 409), + tests = ((FilterCatalogParams.FilterCatalogs.PAINS_A, + 16), (FilterCatalogParams.FilterCatalogs.PAINS_B, + 55), (FilterCatalogParams.FilterCatalogs.PAINS_C, 409), (FilterCatalogParams.FilterCatalogs.PAINS, 409 + 16 + 55)) for catalog_idx, num in tests: @@ -149,8 +148,9 @@ class TestCase(unittest.TestCase): for index, catalog in enumerate(catalogs): self.assertEqual(catalog.GetNumEntries(), num) - if catalog_idx in [FilterCatalogParams.FilterCatalogs.PAINS_A, - FilterCatalogParams.FilterCatalogs.PAINS]: + if catalog_idx in [ + FilterCatalogParams.FilterCatalogs.PAINS_A, FilterCatalogParams.FilterCatalogs.PAINS + ]: # http://chemistrycompass.com/chemsearch/58909/ mol = Chem.MolFromSmiles("O=C(Cn1cnc2c1c(=O)n(C)c(=O)n2C)N/N=C/c1c(O)ccc2c1cccc2") entry = catalog.GetFirstMatch(mol) @@ -269,8 +269,8 @@ class TestCase(unittest.TestCase): entry = catalog.GetEntryWithIdx(10) desc = entry.GetDescription() count = 0 - descs = set([catalog.GetEntryWithIdx(i).GetDescription() - for i in range(catalog.GetNumEntries())]) + descs = set( + [catalog.GetEntryWithIdx(i).GetDescription() for i in range(catalog.GetNumEntries())]) for i in range(catalog.GetNumEntries()): if catalog.GetEntryWithIdx(i).GetDescription() == desc: count += 1 @@ -281,8 +281,8 @@ class TestCase(unittest.TestCase): del entry self.assertTrue(catalog.GetNumEntries() == sz - 1) - descs2 = set([catalog.GetEntryWithIdx(i).GetDescription() - for i in range(catalog.GetNumEntries())]) + descs2 = set( + [catalog.GetEntryWithIdx(i).GetDescription() for i in range(catalog.GetNumEntries())]) print(descs - descs2) newcount = 0 @@ -361,9 +361,10 @@ class TestCase(unittest.TestCase): def testFilterHierarchyMatcher(self): # test root = FilterCatalog.FilterHierarchyMatcher() - sm = h = FilterCatalog.SmartsMatcher("Halogen", "[$([F,Cl,Br,I]-!@[#6]);!$([F,Cl,Br,I]" - "-!@C-!@[F,Cl,Br,I]);!$([F,Cl,Br,I]-[C,S]" - "(=[O,S,N]))]", 1) + sm = h = FilterCatalog.SmartsMatcher( + "Halogen", "[$([F,Cl,Br,I]-!@[#6]);!$([F,Cl,Br,I]" + "-!@C-!@[F,Cl,Br,I]);!$([F,Cl,Br,I]-[C,S]" + "(=[O,S,N]))]", 1) root.SetPattern(sm) def hierarchy(matcher): @@ -374,35 +375,34 @@ class TestCase(unittest.TestCase): sm = FilterCatalog.SmartsMatcher("Halogen.Aromatic", "[F,Cl,Br,I;$(*-!@c)]") root.AddChild(hierarchy(sm)) - sm = FilterCatalog.SmartsMatcher("Halogen.NotFluorine", "[$([Cl,Br,I]-!@[#6]);!$([Cl,Br,I]" - "-!@C-!@[F,Cl,Br,I]);!$([Cl,Br,I]-[C,S]" - "(=[O,S,N]))]") + sm = FilterCatalog.SmartsMatcher( + "Halogen.NotFluorine", "[$([Cl,Br,I]-!@[#6]);!$([Cl,Br,I]" + "-!@C-!@[F,Cl,Br,I]);!$([Cl,Br,I]-[C,S]" + "(=[O,S,N]))]") node = hierarchy(sm) halogen_notf_children = [ - hierarchy(x) - for x in [ + hierarchy(x) for x in [ FilterCatalog.SmartsMatcher( "Halogen.NotFluorine.Aliphatic", "[$([Cl,Br,I]-!@C);!$([Cl,Br,I]" - "-!@C-!@[F,Cl,Br,I]);!$([Cl,Br,I]-[C,S](=[O,S,N]))]"), FilterCatalog.SmartsMatcher( - "Halogen.NotFluorine.Aromatic", "[$([Cl,Br,I]-!@c)]") + "-!@C-!@[F,Cl,Br,I]);!$([Cl,Br,I]-[C,S](=[O,S,N]))]"), + FilterCatalog.SmartsMatcher("Halogen.NotFluorine.Aromatic", "[$([Cl,Br,I]-!@c)]") ] ] for child in halogen_notf_children: node.AddChild(child) root.AddChild(node) - sm = FilterCatalog.SmartsMatcher("Halogen.Bromine", - "[Br;$([Br]-!@[#6]);!$([Br]-!@C-!@[F,Cl,Br,I])" - ";!$([Br]-[C,S](=[O,S,N]))]", 1) + sm = FilterCatalog.SmartsMatcher( + "Halogen.Bromine", "[Br;$([Br]-!@[#6]);!$([Br]-!@C-!@[F,Cl,Br,I])" + ";!$([Br]-[C,S](=[O,S,N]))]", 1) node = hierarchy(sm) halogen_bromine_children = [ - hierarchy(x) - for x in [ + hierarchy(x) for x in [ FilterCatalog.SmartsMatcher( "Halogen.Bromine.Aliphatic", "[Br;$(Br-!@C);!$(Br-!@C-!@[F,Cl,Br,I]);" - "!$(Br-[C,S](=[O,S,N]))]"), FilterCatalog.SmartsMatcher( - "Halogen.Bromine.Aromatic", "[Br;$(Br-!@c)]"), FilterCatalog.SmartsMatcher( - "Halogen.Bromine.BromoKetone", "[Br;$(Br-[CH2]-C(=O)-[#6])]") + "!$(Br-[C,S](=[O,S,N]))]"), + FilterCatalog.SmartsMatcher("Halogen.Bromine.Aromatic", "[Br;$(Br-!@c)]"), + FilterCatalog.SmartsMatcher("Halogen.Bromine.BromoKetone", "[Br;$(Br-[CH2]-C(=O)-[#6])]") ] ] for child in halogen_bromine_children: @@ -427,9 +427,9 @@ class TestCase(unittest.TestCase): res = root.GetMatches(m) self.assertEquals(len(res), 3) - self.assertEquals([match.filterMatch.GetName() for match in res], - ['Halogen.Aromatic', 'Halogen.NotFluorine.Aromatic', - 'Halogen.Bromine.Aromatic']) + self.assertEquals( + [match.filterMatch.GetName() for match in res], + ['Halogen.Aromatic', 'Halogen.NotFluorine.Aromatic', 'Halogen.Bromine.Aromatic']) m = Chem.MolFromSmiles("c1ccccc1F") assert h.HasMatch(m) @@ -448,12 +448,13 @@ class TestCase(unittest.TestCase): def testFunctionalGroupHierarchy(self): fc = FilterCatalog.GetFunctionalGroupHierarchy() - matches = [(Chem.MolFromSmiles("CCl"), ['Halogen.Aliphatic', 'Halogen.NotFluorine.Aliphatic']), - (Chem.MolFromSmiles("c1ccccc1Cl"), - ['Halogen.Aromatic', 'Halogen.NotFluorine.Aromatic']), - (Chem.MolFromSmiles("c1ccccc1F"), ['Halogen.Aromatic']), ( - Chem.MolFromSmiles("CBr"), ['Halogen.Aliphatic', 'Halogen.NotFluorine.Aliphatic', - 'Halogen.Bromine.Aliphatic'])] + matches = [ + (Chem.MolFromSmiles("CCl"), ['Halogen.Aliphatic', 'Halogen.NotFluorine.Aliphatic']), + (Chem.MolFromSmiles("c1ccccc1Cl"), ['Halogen.Aromatic', 'Halogen.NotFluorine.Aromatic']), + (Chem.MolFromSmiles("c1ccccc1F"), ['Halogen.Aromatic']), + (Chem.MolFromSmiles("CBr"), + ['Halogen.Aliphatic', 'Halogen.NotFluorine.Aliphatic', 'Halogen.Bromine.Aliphatic']) + ] catalogs = [fc] if FilterCatalog.FilterCatalogCanSerialize(): @@ -477,19 +478,22 @@ class TestCase(unittest.TestCase): queryDefs = FilterCatalog.GetFlattenedFunctionalGroupHierarchy() items = sorted(queryDefs.items()) - matches = [(Chem.MolFromSmiles("CCl"), ['Halogen', 'Halogen.Aliphatic', 'Halogen.NotFluorine', - 'Halogen.NotFluorine.Aliphatic']), - (Chem.MolFromSmiles("c1ccccc1Cl"), - ['Halogen', 'Halogen.Aromatic', 'Halogen.NotFluorine', - 'Halogen.NotFluorine.Aromatic']), (Chem.MolFromSmiles("c1ccccc1F"), - ['Halogen', 'Halogen.Aromatic']), - (Chem.MolFromSmiles("CBr"), ['Halogen', - 'Halogen.Aliphatic', - 'Halogen.Bromine', - 'Halogen.Bromine.Aliphatic', - 'Halogen.NotFluorine', - 'Halogen.NotFluorine.Aliphatic', ])] - + matches = [ + (Chem.MolFromSmiles("CCl"), + ['Halogen', 'Halogen.Aliphatic', 'Halogen.NotFluorine', 'Halogen.NotFluorine.Aliphatic']), + (Chem.MolFromSmiles("c1ccccc1Cl"), + ['Halogen', 'Halogen.Aromatic', 'Halogen.NotFluorine', 'Halogen.NotFluorine.Aromatic']), + (Chem.MolFromSmiles("c1ccccc1F"), ['Halogen', 'Halogen.Aromatic']), + (Chem.MolFromSmiles("CBr"), [ + 'Halogen', + 'Halogen.Aliphatic', + 'Halogen.Bromine', + 'Halogen.Bromine.Aliphatic', + 'Halogen.NotFluorine', + 'Halogen.NotFluorine.Aliphatic', + ]) + ] + # test the normalized groups for mol, res in matches: hits = [name for name, pat in items if mol.HasSubstructMatch(pat)] @@ -498,18 +502,21 @@ class TestCase(unittest.TestCase): items = sorted(queryDefs.items()) - matches = [(Chem.MolFromSmiles("CCl"), ['halogen', 'halogen.aliphatic', 'halogen.notfluorine', - 'halogen.notfluorine.aliphatic']), - (Chem.MolFromSmiles("c1ccccc1Cl"), - ['halogen', 'halogen.aromatic', 'halogen.notfluorine', - 'halogen.notfluorine.aromatic']), (Chem.MolFromSmiles("c1ccccc1F"), - ['halogen', 'halogen.aromatic']), - (Chem.MolFromSmiles("CBr"), ['halogen', - 'halogen.aliphatic', - 'halogen.bromine', - 'halogen.bromine.aliphatic', - 'halogen.notfluorine', - 'halogen.notfluorine.aliphatic', ])] + matches = [ + (Chem.MolFromSmiles("CCl"), + ['halogen', 'halogen.aliphatic', 'halogen.notfluorine', 'halogen.notfluorine.aliphatic']), + (Chem.MolFromSmiles("c1ccccc1Cl"), + ['halogen', 'halogen.aromatic', 'halogen.notfluorine', 'halogen.notfluorine.aromatic']), + (Chem.MolFromSmiles("c1ccccc1F"), ['halogen', 'halogen.aromatic']), + (Chem.MolFromSmiles("CBr"), [ + 'halogen', + 'halogen.aliphatic', + 'halogen.bromine', + 'halogen.bromine.aliphatic', + 'halogen.notfluorine', + 'halogen.notfluorine.aliphatic', + ]) + ] for mol, res in matches: hits = [name for name, pat in items if mol.HasSubstructMatch(pat)] @@ -526,13 +533,11 @@ class TestCase(unittest.TestCase): params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_B) params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_C) fc = FilterCatalog.FilterCatalog(params) - + results = FilterCatalog.RunFilterCatalog(fc, smiles) self.assertEquals(len(results), 3) - descriptions = ["hzone_phenol_A(479)", - "cyano_imine_B(17)", - "keto_keto_gamma(5)"] + descriptions = ["hzone_phenol_A(479)", "cyano_imine_B(17)", "keto_keto_gamma(5)"] for i, res in enumerate(results): self.assertTrue(len(res) > 0) @@ -577,23 +582,25 @@ class TestCase(unittest.TestCase): self.assertTrue(entry.GetDescription() == "MW Violation") print("running") - results = FilterCatalog.RunFilterCatalog(fc, smiles*10, numThreads=3) + results = FilterCatalog.RunFilterCatalog(fc, smiles * 10, numThreads=3) def test_pw_chembl_filters(self): # just ensure we have the right numbers - tests = ((FilterCatalogParams.FilterCatalogs.CHEMBL_BMS, 180), - (FilterCatalogParams.FilterCatalogs.CHEMBL_LINT, 57), - (FilterCatalogParams.FilterCatalogs.CHEMBL_Glaxo, 55), - (FilterCatalogParams.FilterCatalogs.CHEMBL_MLSMR, 116), - (FilterCatalogParams.FilterCatalogs.CHEMBL_Dundee, 105), - (FilterCatalogParams.FilterCatalogs.CHEMBL_Inpharmatica, 91), - (FilterCatalogParams.FilterCatalogs.CHEMBL_SureChEMBL, 166), - ) + tests = ( + (FilterCatalogParams.FilterCatalogs.CHEMBL_BMS, 180), + (FilterCatalogParams.FilterCatalogs.CHEMBL_LINT, 57), + (FilterCatalogParams.FilterCatalogs.CHEMBL_Glaxo, 55), + (FilterCatalogParams.FilterCatalogs.CHEMBL_MLSMR, 116), + (FilterCatalogParams.FilterCatalogs.CHEMBL_Dundee, 105), + (FilterCatalogParams.FilterCatalogs.CHEMBL_Inpharmatica, 91), + (FilterCatalogParams.FilterCatalogs.CHEMBL_SureChEMBL, 166), + ) for catalog_idx, num in tests: params = FilterCatalog.FilterCatalogParams() self.assertTrue(params.AddCatalog(catalog_idx)) catalog = FilterCatalog.FilterCatalog(params) self.assertTrue(num == catalog.GetNumEntries()) - + + if __name__ == '__main__': unittest.main() diff --git a/Code/GraphMol/FilterCatalog/update_pains.py b/Code/GraphMol/FilterCatalog/update_pains.py index 8496a31de..5d87b0f96 100644 --- a/Code/GraphMol/FilterCatalog/update_pains.py +++ b/Code/GraphMol/FilterCatalog/update_pains.py @@ -1,29 +1,34 @@ # must be run from this directory -import csv, os, sys +import csv +import os +import sys + py3 = sys.version_info[0] == 3 -pains_a = ['ene_six_het_A(483)', 'hzone_phenol_A(479)', 'anil_di_alk_A(478)', 'indol_3yl_alk(461)', - 'quinone_A(370)', 'azo_A(324)', 'imine_one_A(321)', 'mannich_A(296)', - 'anil_di_alk_B(251)', 'anil_di_alk_C(246)', 'ene_rhod_A(235)', 'hzone_phenol_B(215)', - 'ene_five_het_A(201)', 'anil_di_alk_D(198)', 'imine_one_isatin(189)', - 'anil_di_alk_E(186)'] +pains_a = [ + 'ene_six_het_A(483)', 'hzone_phenol_A(479)', 'anil_di_alk_A(478)', 'indol_3yl_alk(461)', + 'quinone_A(370)', 'azo_A(324)', 'imine_one_A(321)', 'mannich_A(296)', 'anil_di_alk_B(251)', + 'anil_di_alk_C(246)', 'ene_rhod_A(235)', 'hzone_phenol_B(215)', 'ene_five_het_A(201)', + 'anil_di_alk_D(198)', 'imine_one_isatin(189)', 'anil_di_alk_E(186)' +] -pains_b = ['thiaz_ene_A(128)', 'pyrrole_A(118)', 'catechol_A(92)', 'ene_five_het_B(90)', - 'imine_one_fives(89)', 'ene_five_het_C(85)', 'hzone_pipzn(79)', 'keto_keto_beta_A(68)', - 'hzone_pyrrol(64)', 'ene_one_ene_A(57)', 'cyano_ene_amine_A(56)', 'ene_five_one_A(55)', - 'cyano_pyridone_A(54)', 'anil_alk_ene(51)', 'amino_acridine_A(46)', 'ene_five_het_D(46)', - 'thiophene_amino_Aa(45)', 'ene_five_het_E(44)', 'sulfonamide_A(43)', 'thio_ketone(43)', - 'sulfonamide_B(41)', 'anil_no_alk(40)', 'thiophene_amino_Ab(40)', - 'het_pyridiniums_A(39)', 'anthranil_one_A(38)', 'cyano_imine_A(37)', - 'diazox_sulfon_A(36)', 'hzone_anil_di_alk(35)', 'rhod_sat_A(33)', 'hzone_enamin(30)', - 'pyrrole_B(29)', 'thiophene_hydroxy(28)', 'cyano_pyridone_B(27)', 'imine_one_sixes(27)', - 'dyes5A(27)', 'naphth_amino_A(25)', 'naphth_amino_B(25)', 'ene_one_ester(24)', - 'thio_dibenzo(23)', 'cyano_cyano_A(23)', 'hzone_acyl_naphthol(22)', 'het_65_A(21)', - 'imidazole_A(19)', 'ene_cyano_A(19)', 'anthranil_acid_A(19)', 'dyes3A(19)', - 'dhp_bis_amino_CN(19)', 'het_6_tetrazine(18)', 'ene_one_hal(17)', 'cyano_imine_B(17)', - 'thiaz_ene_B(17)', 'ene_rhod_B(16)', 'thio_carbonate_A(15)', 'anil_di_alk_furan_A(15)', - 'ene_five_het_F(15)'] +pains_b = [ + 'thiaz_ene_A(128)', 'pyrrole_A(118)', 'catechol_A(92)', 'ene_five_het_B(90)', + 'imine_one_fives(89)', 'ene_five_het_C(85)', 'hzone_pipzn(79)', 'keto_keto_beta_A(68)', + 'hzone_pyrrol(64)', 'ene_one_ene_A(57)', 'cyano_ene_amine_A(56)', 'ene_five_one_A(55)', + 'cyano_pyridone_A(54)', 'anil_alk_ene(51)', 'amino_acridine_A(46)', 'ene_five_het_D(46)', + 'thiophene_amino_Aa(45)', 'ene_five_het_E(44)', 'sulfonamide_A(43)', 'thio_ketone(43)', + 'sulfonamide_B(41)', 'anil_no_alk(40)', 'thiophene_amino_Ab(40)', 'het_pyridiniums_A(39)', + 'anthranil_one_A(38)', 'cyano_imine_A(37)', 'diazox_sulfon_A(36)', 'hzone_anil_di_alk(35)', + 'rhod_sat_A(33)', 'hzone_enamin(30)', 'pyrrole_B(29)', 'thiophene_hydroxy(28)', + 'cyano_pyridone_B(27)', 'imine_one_sixes(27)', 'dyes5A(27)', 'naphth_amino_A(25)', + 'naphth_amino_B(25)', 'ene_one_ester(24)', 'thio_dibenzo(23)', 'cyano_cyano_A(23)', + 'hzone_acyl_naphthol(22)', 'het_65_A(21)', 'imidazole_A(19)', 'ene_cyano_A(19)', + 'anthranil_acid_A(19)', 'dyes3A(19)', 'dhp_bis_amino_CN(19)', 'het_6_tetrazine(18)', + 'ene_one_hal(17)', 'cyano_imine_B(17)', 'thiaz_ene_B(17)', 'ene_rhod_B(16)', + 'thio_carbonate_A(15)', 'anil_di_alk_furan_A(15)', 'ene_five_het_F(15)' +] pains_c = [ 'anil_di_alk_F(14)', 'hzone_anil(14)', 'het_5_pyrazole_OH(14)', 'het_thio_666_A(13)', @@ -171,6 +176,7 @@ def write_pains(filename, data): if t != data: if py3: import io + # newline = don't convert to windows style with io.open(filename, 'w', newline='') as f: f.write(data) @@ -179,6 +185,7 @@ def write_pains(filename, data): with open(filename, 'wb') as f: f.write(data) + write_pains(PAINS_A_FILENAME, PAINS_A) write_pains(PAINS_B_FILENAME, PAINS_B) write_pains(PAINS_C_FILENAME, PAINS_C) diff --git a/Code/GraphMol/Fingerprints/Wrap/testGenerators.py b/Code/GraphMol/Fingerprints/Wrap/testGenerators.py index fa899e149..ffa3b26c8 100644 --- a/Code/GraphMol/Fingerprints/Wrap/testGenerators.py +++ b/Code/GraphMol/Fingerprints/Wrap/testGenerators.py @@ -1,7 +1,9 @@ +import unittest + +import numpy as np + from rdkit import Chem, DataStructs from rdkit.Chem import rdFingerprintGenerator -import numpy as np -import unittest class TestCase(unittest.TestCase): @@ -340,5 +342,6 @@ class TestCase(unittest.TestCase): nz = fp.GetNonzeroElements() self.assertEqual(len(nz), 1) + if __name__ == '__main__': unittest.main() diff --git a/Code/GraphMol/Fingerprints/Wrap/testMHFP.py b/Code/GraphMol/Fingerprints/Wrap/testMHFP.py index b724889e5..7fcd4dee8 100644 --- a/Code/GraphMol/Fingerprints/Wrap/testMHFP.py +++ b/Code/GraphMol/Fingerprints/Wrap/testMHFP.py @@ -7,9 +7,10 @@ which is included in the file license.txt, found at the root of the RDKit source tree. """ +import unittest + from rdkit import Chem from rdkit.Chem import rdMHFPFingerprint -import unittest class TestCase(unittest.TestCase): diff --git a/Code/GraphMol/ForceFieldHelpers/Wrap/testHelpers.py b/Code/GraphMol/ForceFieldHelpers/Wrap/testHelpers.py index 0d935d0f8..c5080240e 100644 --- a/Code/GraphMol/ForceFieldHelpers/Wrap/testHelpers.py +++ b/Code/GraphMol/ForceFieldHelpers/Wrap/testHelpers.py @@ -1,10 +1,11 @@ -from rdkit import Chem -from rdkit.Chem import ChemicalForceFields, rdDistGeom -from rdkit import RDConfig -import unittest import os +import unittest + import numpy +from rdkit import Chem, RDConfig +from rdkit.Chem import ChemicalForceFields, rdDistGeom + def feq(v1, v2, tol2=1e-4): return abs(v1 - v2) <= tol2 @@ -158,31 +159,30 @@ M END""" mp = ChemicalForceFields.MMFFGetMoleculeProperties(m) mmffBondStretchParams = mp.GetMMFFBondStretchParams(m, 6, 7) self.assertTrue(mmffBondStretchParams) - self.assertTrue((mmffBondStretchParams[0] == 0) and - (int(round(mmffBondStretchParams[1] * 1000) == 4258)) and - (int(round(mmffBondStretchParams[2] * 1000) == 1508))) + self.assertTrue((mmffBondStretchParams[0] == 0) + and (int(round(mmffBondStretchParams[1] * 1000) == 4258)) + and (int(round(mmffBondStretchParams[2] * 1000) == 1508))) mmffBondStretchParams = mp.GetMMFFBondStretchParams(m, 0, 7) self.assertFalse(mmffBondStretchParams) mmffAngleBendParams = mp.GetMMFFAngleBendParams(m, 6, 7, 8) self.assertTrue(mmffAngleBendParams) - self.assertTrue((mmffAngleBendParams[0] == 0) and - (int(round(mmffAngleBendParams[1] * 1000) == 777)) and - (int(round(mmffAngleBendParams[2] * 1000) == 108290))) + self.assertTrue((mmffAngleBendParams[0] == 0) + and (int(round(mmffAngleBendParams[1] * 1000) == 777)) + and (int(round(mmffAngleBendParams[2] * 1000) == 108290))) mmffAngleBendParams = mp.GetMMFFAngleBendParams(m, 0, 7, 8) self.assertFalse(mmffAngleBendParams) mmffStretchBendParams = mp.GetMMFFStretchBendParams(m, 6, 7, 8) self.assertTrue(mmffStretchBendParams) - self.assertTrue((mmffStretchBendParams[0] == 0) and - (int(round(mmffStretchBendParams[1] * 1000) == 136)) and - (int(round(mmffStretchBendParams[2] * 1000) == 282))) + self.assertTrue((mmffStretchBendParams[0] == 0) + and (int(round(mmffStretchBendParams[1] * 1000) == 136)) + and (int(round(mmffStretchBendParams[2] * 1000) == 282))) mmffStretchBendParams = mp.GetMMFFStretchBendParams(m, 0, 7, 8) self.assertFalse(mmffStretchBendParams) mmffTorsionParams = mp.GetMMFFTorsionParams(m, 6, 7, 8, 9) self.assertTrue(mmffTorsionParams) - self.assertTrue((mmffTorsionParams[0] == 0) and - (int(round(mmffTorsionParams[1] * 1000) == 0)) and - (int(round(mmffTorsionParams[2] * 1000) == -300)) and - (int(round(mmffTorsionParams[3] * 1000) == 500))) + self.assertTrue((mmffTorsionParams[0] == 0) and (int(round(mmffTorsionParams[1] * 1000) == 0)) + and (int(round(mmffTorsionParams[2] * 1000) == -300)) + and (int(round(mmffTorsionParams[3] * 1000) == 500))) mmffTorsionParams = mp.GetMMFFTorsionParams(m, 0, 7, 8, 9) self.assertFalse(mmffTorsionParams) mmffOopBendParams = mp.GetMMFFOopBendParams(m, 6, 5, 4, 0) @@ -196,24 +196,24 @@ M END""" hIdx = sub1[2] mmffVdWParams = mp.GetMMFFVdWParams(nIdx, hIdx) self.assertTrue(mmffVdWParams) - self.assertTrue((int(round(mmffVdWParams[0] * 1000)) == 3321) and - (int(round(mmffVdWParams[1] * 1000)) == 34) and - (int(round(mmffVdWParams[2] * 1000)) == 2657) and - (int(round(mmffVdWParams[3] * 1000)) == 17)) + self.assertTrue((int(round(mmffVdWParams[0] * 1000)) == 3321) + and (int(round(mmffVdWParams[1] * 1000)) == 34) + and (int(round(mmffVdWParams[2] * 1000)) == 2657) + and (int(round(mmffVdWParams[3] * 1000)) == 17)) def test10(self): m = Chem.MolFromSmiles('c1ccccc1CCNN') m = Chem.AddHs(m) uffBondStretchParams = ChemicalForceFields.GetUFFBondStretchParams(m, 6, 7) self.assertTrue(uffBondStretchParams) - self.assertTrue((int(round(uffBondStretchParams[0] * 1000) == 699592)) and - (int(round(uffBondStretchParams[1] * 1000) == 1514))) + self.assertTrue((int(round(uffBondStretchParams[0] * 1000) == 699592)) + and (int(round(uffBondStretchParams[1] * 1000) == 1514))) uffBondStretchParams = ChemicalForceFields.GetUFFBondStretchParams(m, 0, 7) self.assertFalse(uffBondStretchParams) uffAngleBendParams = ChemicalForceFields.GetUFFAngleBendParams(m, 6, 7, 8) self.assertTrue(uffAngleBendParams) - self.assertTrue((int(round(uffAngleBendParams[0] * 1000) == 303297)) and - (int(round(uffAngleBendParams[1] * 1000) == 109470))) + self.assertTrue((int(round(uffAngleBendParams[0] * 1000) == 303297)) + and (int(round(uffAngleBendParams[1] * 1000) == 109470))) uffAngleBendParams = ChemicalForceFields.GetUFFAngleBendParams(m, 0, 7, 8) self.assertFalse(uffAngleBendParams) uffTorsionParams = ChemicalForceFields.GetUFFTorsionParams(m, 6, 7, 8, 9) @@ -228,8 +228,8 @@ M END""" self.assertFalse(uffInversionParams) uffVdWParams = ChemicalForceFields.GetUFFVdWParams(m, 0, 9) self.assertTrue(uffVdWParams) - self.assertTrue((int(round(uffVdWParams[0] * 1000)) == 3754) and - (int(round(uffVdWParams[1] * 1000)) == 85)) + self.assertTrue((int(round(uffVdWParams[0] * 1000)) == 3754) + and (int(round(uffVdWParams[1] * 1000)) == 85)) def test11(self): query = Chem.MolFromSmarts('c1cccn1') @@ -369,12 +369,15 @@ M END""" self.assertEqual(len(cids), 10) mp = ChemicalForceFields.MMFFGetMoleculeProperties(m) ff = ChemicalForceFields.MMFFGetMoleculeForceField(m, mp) - before = [ChemicalForceFields.MMFFGetMoleculeForceField(m, mp, confId=cid).CalcEnergy() for cid in cids] + before = [ + ChemicalForceFields.MMFFGetMoleculeForceField(m, mp, confId=cid).CalcEnergy() for cid in cids + ] res, after = tuple(zip(*ChemicalForceFields.OptimizeMoleculeConfs(m, ff, maxIters=200))) self.assertEqual(len(res), 10) self.assertEqual(len(before), len(after)) self.assertTrue(all(map(lambda i: i == 0, res))) self.assertTrue(all(after[i] < b for i, b in enumerate(before))) + if __name__ == '__main__': unittest.main() diff --git a/Code/GraphMol/FragCatalog/Wrap/rough_test.py b/Code/GraphMol/FragCatalog/Wrap/rough_test.py index 5b3bfc10b..9bc6c0469 100755 --- a/Code/GraphMol/FragCatalog/Wrap/rough_test.py +++ b/Code/GraphMol/FragCatalog/Wrap/rough_test.py @@ -8,14 +8,16 @@ it's intended to be shallow, but broad """ -import unittest, os +import os import pickle +import unittest + from rdkit import RDConfig from rdkit.RDLogger import logger + logger = logger() -from rdkit import Chem +from rdkit import Chem, DataStructs from rdkit.Chem import FragmentCatalog -from rdkit import DataStructs class TestCase(unittest.TestCase): diff --git a/Code/GraphMol/MMPA/Wrap/testMMPA.py b/Code/GraphMol/MMPA/Wrap/testMMPA.py index 4976da670..7016d08a5 100644 --- a/Code/GraphMol/MMPA/Wrap/testMMPA.py +++ b/Code/GraphMol/MMPA/Wrap/testMMPA.py @@ -1,7 +1,6 @@ - -from rdkit import RDConfig import unittest -from rdkit import Chem + +from rdkit import Chem, RDConfig from rdkit.Chem import rdMMPA @@ -95,7 +94,8 @@ class TestCase(unittest.TestCase): def test5(self): m = Chem.MolFromSmiles( - "CC[C@H](C)[C@@H](C(=O)N[C@H]1CSSC[C@H]2C(=O)NCC(=O)N3CCC[C@H]3C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@@H](CSSC[C@@H](C(=O)N[C@H](C(=O)N4CCC[C@H]4C(=O)N[C@H](C(=O)N2)C)CC(=O)N)NC1=O)C(=O)N)CO)Cc5ccc(cc5)O)CCCC[NH3+])N") # ALPHA-CONOTOXIN SI + "CC[C@H](C)[C@@H](C(=O)N[C@H]1CSSC[C@H]2C(=O)NCC(=O)N3CCC[C@H]3C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@@H](CSSC[C@@H](C(=O)N[C@H](C(=O)N4CCC[C@H]4C(=O)N[C@H](C(=O)N2)C)CC(=O)N)NC1=O)C(=O)N)CO)Cc5ccc(cc5)O)CCCC[NH3+])N" + ) # ALPHA-CONOTOXIN SI frags = rdMMPA.FragmentMol(m, resultsAsMols=False) self.assertFalse(len(frags)) frags = rdMMPA.FragmentMol(m, maxCuts=2, maxCutBonds=21, resultsAsMols=False) @@ -103,46 +103,41 @@ class TestCase(unittest.TestCase): def test6(self): m = Chem.MolFromSmiles( - "CC[C@H](C)[C@@H](C(=O)N[C@H]1CSSC[C@H]2C(=O)NCC(=O)N3CCC[C@H]3C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@@H](CSSC[C@@H](C(=O)N[C@H](C(=O)N4CCC[C@H]4C(=O)N[C@H](C(=O)N2)C)CC(=O)N)NC1=O)C(=O)N)CO)Cc5ccc(cc5)O)CCCC[NH3+])N") # ALPHA-CONOTOXIN SI + "CC[C@H](C)[C@@H](C(=O)N[C@H]1CSSC[C@H]2C(=O)NCC(=O)N3CCC[C@H]3C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@@H](CSSC[C@@H](C(=O)N[C@H](C(=O)N4CCC[C@H]4C(=O)N[C@H](C(=O)N2)C)CC(=O)N)NC1=O)C(=O)N)CO)Cc5ccc(cc5)O)CCCC[NH3+])N" + ) # ALPHA-CONOTOXIN SI frags = rdMMPA.FragmentMol(m, resultsAsMols=False) self.assertFalse(len(frags)) - frags1 = rdMMPA.FragmentMol(m, minCuts=1, maxCuts=1, maxCutBonds=21, - resultsAsMols=False) - frags2 = rdMMPA.FragmentMol(m, minCuts=2, maxCuts=2, maxCutBonds=21, - resultsAsMols=False) + frags1 = rdMMPA.FragmentMol(m, minCuts=1, maxCuts=1, maxCutBonds=21, resultsAsMols=False) + frags2 = rdMMPA.FragmentMol(m, minCuts=2, maxCuts=2, maxCutBonds=21, resultsAsMols=False) frags = rdMMPA.FragmentMol(m, maxCuts=2, maxCutBonds=21, resultsAsMols=False) - self.assertEqual(set(frags1+frags2), set(frags)) + self.assertEqual(set(frags1 + frags2), set(frags)) self.assertEqual(set(frags1).intersection(set(frags2)), set()) - def test7(self): m = Chem.MolFromSmiles("Oc1ccccc1N") - frags1 = rdMMPA.FragmentMol(m, minCuts=1, maxCuts=1, maxCutBonds=21, - resultsAsMols=False) + frags1 = rdMMPA.FragmentMol(m, minCuts=1, maxCuts=1, maxCutBonds=21, resultsAsMols=False) - frags2 = rdMMPA.FragmentMol(m, minCuts=2, maxCuts=2, maxCutBonds=21, - resultsAsMols=False) + frags2 = rdMMPA.FragmentMol(m, minCuts=2, maxCuts=2, maxCutBonds=21, resultsAsMols=False) frags = rdMMPA.FragmentMol(m, maxCuts=2, maxCutBonds=21, resultsAsMols=False) - self.assertEqual(set(frags1+frags2), set(frags)) - + self.assertEqual(set(frags1 + frags2), set(frags)) def test8(self): m = Chem.MolFromSmiles('Cc1ccccc1NC(=O)C(C)[NH+]1CCCC1') # ZINC00000051 sm = Chem.MolFromSmarts("[#6+0;!$(*=,#[!#6])]!@!=!#[*]") matching_atoms = m.GetSubstructMatches(sm) bonds = [] - for a,b in matching_atoms: - bond = m.GetBondBetweenAtoms(a,b) + for a, b in matching_atoms: + bond = m.GetBondBetweenAtoms(a, b) bonds.append(bond.GetIdx()) frags = rdMMPA.FragmentMol(m, resultsAsMols=False) - frags2 = rdMMPA.FragmentMol(m, bonds, resultsAsMols=False) - frags3 = rdMMPA.FragmentMol(m, tuple(bonds), resultsAsMols=False) + frags2 = rdMMPA.FragmentMol(m, bonds, resultsAsMols=False) + frags3 = rdMMPA.FragmentMol(m, tuple(bonds), resultsAsMols=False) self.assertEqual(frags, frags2) self.assertEqual(frags2, frags3) @@ -150,21 +145,18 @@ class TestCase(unittest.TestCase): m = Chem.MolFromSmiles("Oc1ccccc1N") try: - - frags1 = rdMMPA.FragmentMol(m, minCuts=1, maxCuts=0, maxCutBonds=21, - resultsAsMols=False) - self.assertTrue(False) # should not get here + + frags1 = rdMMPA.FragmentMol(m, minCuts=1, maxCuts=0, maxCutBonds=21, resultsAsMols=False) + self.assertTrue(False) # should not get here except ValueError as e: self.assertEqual(str(e), "supplied maxCuts is less than minCuts") try: - - frags1 = rdMMPA.FragmentMol(m, minCuts=0, maxCuts=0, maxCutBonds=21, - resultsAsMols=False) - self.assertTrue(False) # should not get here + + frags1 = rdMMPA.FragmentMol(m, minCuts=0, maxCuts=0, maxCutBonds=21, resultsAsMols=False) + self.assertTrue(False) # should not get here except ValueError as e: self.assertEqual(str(e), "minCuts must be greater than 0") - if __name__ == "__main__": diff --git a/Code/GraphMol/MolAlign/Wrap/testMolAlign.py b/Code/GraphMol/MolAlign/Wrap/testMolAlign.py index 56d47c059..2b3e32955 100644 --- a/Code/GraphMol/MolAlign/Wrap/testMolAlign.py +++ b/Code/GraphMol/MolAlign/Wrap/testMolAlign.py @@ -5,12 +5,15 @@ # @@ All Rights Reserved @@ # -from rdkit import RDConfig -import os, sys, copy -import unittest +import copy import math -from rdkit import Chem -from rdkit.Chem import rdMolAlign, rdMolTransforms, rdMolDescriptors, rdDistGeom, ChemicalForceFields +import os +import sys +import unittest + +from rdkit import Chem, RDConfig +from rdkit.Chem import (ChemicalForceFields, rdDistGeom, rdMolAlign, + rdMolDescriptors, rdMolTransforms) def lstFeq(l1, l2, tol=1.e-4): @@ -490,19 +493,23 @@ class TestCase(unittest.TestCase): params = Chem.SmilesParserParams() params.removeHs = False - scaffold = Chem.MolFromSmiles("N1C([H])([H])C([H])([H])C([H])([H])[N+]([H])([H])C([H])([H])C1([H])[H]", params) + scaffold = Chem.MolFromSmiles( + "N1C([H])([H])C([H])([H])C([H])([H])[N+]([H])([H])C([H])([H])C1([H])[H]", params) scaffoldMatch = ref.GetSubstructMatch(scaffold) scaffoldIndicesBitSet = [0] * ref.GetNumAtoms() for idx in scaffoldMatch: scaffoldIndicesBitSet[idx] = 1 - matches = tuple(tuple(enumerate(match)) for match in ref.GetSubstructMatches(prb, uniquify=False)) + matches = tuple( + tuple(enumerate(match)) for match in ref.GetSubstructMatches(prb, uniquify=False)) self.assertGreater(len(matches), 0) - matchesPruned = tuple(tuple(filter(lambda tup: scaffoldIndicesBitSet[tup[1]], match)) for match in matches) + matchesPruned = tuple( + tuple(filter(lambda tup: scaffoldIndicesBitSet[tup[1]], match)) for match in matches) rmsdInPlace = rdMolAlign.CalcRMS(prbCopy2, ref, map=matchesPruned) self.assertAlmostEqual(rmsdInPlace, 2.5672, 3) rmsd = rdMolAlign.GetBestRMS(prb, ref, map=matchesPruned) self.assertAlmostEqual(rmsd, 1.14329, 3) - rmsdCopy, bestTrans, bestMatch = rdMolAlign.GetBestAlignmentTransform(prbCopy2, ref, map=matchesPruned); + rmsdCopy, bestTrans, bestMatch = rdMolAlign.GetBestAlignmentTransform( + prbCopy2, ref, map=matchesPruned) self.assertEqual(str(type(bestTrans)), "") self.assertAlmostEqual(rmsd, rmsdCopy, 3) self.assertEqual(len(bestMatch), len(scaffoldMatch)) @@ -511,7 +518,8 @@ class TestCase(unittest.TestCase): self.assertAlmostEqual(rmsdInPlace, 17.7959, 3) rmsd = rdMolAlign.GetBestRMS(prb, ref, map=matches, weights=weights) self.assertAlmostEqual(rmsd, 10.9681, 3) - rmsdCopy, bestTrans, bestMatch = rdMolAlign.GetBestAlignmentTransform(prbCopy3, ref, map=matches, weights=weights) + rmsdCopy, bestTrans, bestMatch = rdMolAlign.GetBestAlignmentTransform( + prbCopy3, ref, map=matches, weights=weights) self.assertAlmostEqual(rmsd, rmsdCopy, 3) self.assertEqual(len(bestMatch), ref.GetNumAtoms()) self.assertTrue(all(len(tup) == 2 for tup in bestMatch)) diff --git a/Code/GraphMol/MolCatalog/Wrap/rough_test.py b/Code/GraphMol/MolCatalog/Wrap/rough_test.py index 6e6a2d090..ad61a2d9e 100755 --- a/Code/GraphMol/MolCatalog/Wrap/rough_test.py +++ b/Code/GraphMol/MolCatalog/Wrap/rough_test.py @@ -2,11 +2,12 @@ # # Copyright (C) 2006 Greg Landrum # -import unittest, os, sys +import os import pickle -from rdkit import RDConfig -from rdkit import Chem -from rdkit import DataStructs +import sys +import unittest + +from rdkit import Chem, DataStructs, RDConfig from rdkit.Chem import MolCatalog diff --git a/Code/GraphMol/MolChemicalFeatures/Wrap/testChemicalFeatures.py b/Code/GraphMol/MolChemicalFeatures/Wrap/testChemicalFeatures.py index 9d1d7f30e..7cb70c24f 100644 --- a/Code/GraphMol/MolChemicalFeatures/Wrap/testChemicalFeatures.py +++ b/Code/GraphMol/MolChemicalFeatures/Wrap/testChemicalFeatures.py @@ -1,9 +1,8 @@ -from rdkit import DataStructs -from rdkit import RDConfig -from rdkit import Chem -from rdkit.Chem import ChemicalFeatures, rdDistGeom, AllChem -from rdkit import Geometry -import unittest, os +import os +import unittest + +from rdkit import Chem, DataStructs, Geometry, RDConfig +from rdkit.Chem import AllChem, ChemicalFeatures, rdDistGeom def lstFeq(l1, l2, tol=1.e-4): @@ -79,8 +78,8 @@ class TestCase(unittest.TestCase): self.failUnless(cfac.GetNumMolFeatures(mol, includeOnly="Bogus") == 0) self.failUnlessRaises(IndexError, lambda: cfac.GetMolFeature(mol, 1, includeOnly="HBondDonor")) - self.failUnlessRaises( - IndexError, lambda: cfac.GetMolFeature(mol, 2, includeOnly="HBondAcceptor")) + self.failUnlessRaises(IndexError, + lambda: cfac.GetMolFeature(mol, 2, includeOnly="HBondAcceptor")) f = cfac.GetMolFeature(mol, 0, includeOnly="HBondDonor") self.failUnless(f.GetFamily() == 'HBondDonor') @@ -126,15 +125,15 @@ EndFeature\r Weights 1.0 EndFeature """ - self.failUnlessRaises( - ValueError, lambda: ChemicalFeatures.BuildFeatureFactoryFromString(fdefBlock)) + self.failUnlessRaises(ValueError, + lambda: ChemicalFeatures.BuildFeatureFactoryFromString(fdefBlock)) fdefBlock = \ """DefineFeature HDonor1 [N,O;!H0] Family HBondDonor Weights 1.0 """ - self.failUnlessRaises( - ValueError, lambda: ChemicalFeatures.BuildFeatureFactoryFromString(fdefBlock)) + self.failUnlessRaises(ValueError, + lambda: ChemicalFeatures.BuildFeatureFactoryFromString(fdefBlock)) self.failUnlessRaises(IOError, lambda: ChemicalFeatures.BuildFeatureFactory('noSuchFile.txt')) diff --git a/Code/GraphMol/MolDraw2D/Qt/Wrap/testMolDraw2DQt.py b/Code/GraphMol/MolDraw2D/Qt/Wrap/testMolDraw2DQt.py index b54e972c3..430731085 100644 --- a/Code/GraphMol/MolDraw2D/Qt/Wrap/testMolDraw2DQt.py +++ b/Code/GraphMol/MolDraw2D/Qt/Wrap/testMolDraw2DQt.py @@ -1,11 +1,8 @@ import sys import unittest -from rdkit import Chem -from rdkit import RDConfig -from rdkit.Chem import AllChem -from rdkit.Chem import Draw -from rdkit.Chem import rdDepictor +from rdkit import Chem, RDConfig +from rdkit.Chem import AllChem, Draw, rdDepictor from rdkit.Chem.Draw import rdMolDraw2DQt try: diff --git a/Code/GraphMol/MolDraw2D/Wrap/testMolDraw2D.py b/Code/GraphMol/MolDraw2D/Wrap/testMolDraw2D.py index e63703fbf..7c6fe81b3 100644 --- a/Code/GraphMol/MolDraw2D/Wrap/testMolDraw2D.py +++ b/Code/GraphMol/MolDraw2D/Wrap/testMolDraw2D.py @@ -1,14 +1,14 @@ -from rdkit import RDConfig -import unittest import random import re +import unittest from os import environ -from rdkit import Chem -from rdkit.Chem import Draw, AllChem, rdDepictor -from rdkit.Chem.Draw import rdMolDraw2D -from rdkit import Geometry + import numpy as np +from rdkit import Chem, Geometry, RDConfig +from rdkit.Chem import AllChem, Draw, rdDepictor +from rdkit.Chem.Draw import rdMolDraw2D + class TestCase(unittest.TestCase): @@ -814,5 +814,6 @@ M END''') text = d2d.GetDrawingText() self.assertTrue("#7F7F7F" not in text) + if __name__ == "__main__": unittest.main() diff --git a/Code/GraphMol/MolDraw2D/side_by_side_images.py b/Code/GraphMol/MolDraw2D/side_by_side_images.py index 27d952b7f..9a5db3038 100755 --- a/Code/GraphMol/MolDraw2D/side_by_side_images.py +++ b/Code/GraphMol/MolDraw2D/side_by_side_images.py @@ -16,14 +16,14 @@ d1 = args.dir1 d2 = args.dir2 if not Path(d1).exists(): - print(f'Directory {d1} missing.') - exit(1) + print(f'Directory {d1} missing.') + exit(1) if not Path(d2).exists(): - print(f'Directory {d2} missing.') - exit(1) + print(f'Directory {d2} missing.') + exit(1) with open(args.outfile, 'w') as f: - f.write(f''' + f.write(f''' @@ -40,21 +40,21 @@ with open(args.outfile, 'w') as f: {d2} \n''') - inglob = Path(d1) - fns = [Path(fn) for fn in inglob.glob(args.file_glob)] - fns.sort(key = lambda f: f.stat().st_mtime, reverse=True) + inglob = Path(d1) + fns = [Path(fn) for fn in inglob.glob(args.file_glob)] + fns.sort(key=lambda f: f.stat().st_mtime, reverse=True) - for fp in fns: - fn = fp.name - if not fn.endswith('.svg') and not fn.endswith('.png'): - continue - fns = fn.replace('.svg', '') - f.write(f''' + for fp in fns: + fn = fp.name + if not fn.endswith('.svg') and not fn.endswith('.png'): + continue + fns = fn.replace('.svg', '') + f.write(f''' {fns} {fns} {fns} \n''') - - f.write(''' + + f.write(''' \n''') diff --git a/Code/GraphMol/MolDraw2D/test_dir/test_rdkit_draw.py b/Code/GraphMol/MolDraw2D/test_dir/test_rdkit_draw.py index a36abca69..7feff4289 100755 --- a/Code/GraphMol/MolDraw2D/test_dir/test_rdkit_draw.py +++ b/Code/GraphMol/MolDraw2D/test_dir/test_rdkit_draw.py @@ -1,9 +1,10 @@ #!/usr/bin/env python -from rdkit import RDConfig -import os, sys -from rdkit import Chem -from rdkit.Chem import Draw, AllChem +import os +import sys + +from rdkit import Chem, RDConfig +from rdkit.Chem import AllChem, Draw suppl = Chem.SDMolSupplier() diff --git a/Code/GraphMol/MolDraw2D/update_hash_codes.py b/Code/GraphMol/MolDraw2D/update_hash_codes.py index c7f70063c..5acd19457 100644 --- a/Code/GraphMol/MolDraw2D/update_hash_codes.py +++ b/Code/GraphMol/MolDraw2D/update_hash_codes.py @@ -12,40 +12,40 @@ import json import re import sys - updates = {} with open(sys.argv[1], 'r') as f: - for line in f.readlines(): - if line.startswith('file'): - line_bits = line.strip().split() - updates[(line_bits[1], line_bits[8])] = (line_bits[1], line_bits[4]) + for line in f.readlines(): + if line.startswith('file'): + line_bits = line.strip().split() + updates[(line_bits[1], line_bits[8])] = (line_bits[1], line_bits[4]) re_str = r'{"([\w.-]*.\w*)", (\d*U)},' patt = re.compile(re_str) with open(sys.argv[2], 'r') as f: - cpp_file = f.read() + cpp_file = f.read() hashes = patt.findall(cpp_file) + def hash_replace(match): - match_bits = match.group().split(',') - match_tuple = (match_bits[0][2:-1], match_bits[1][1:-1]) - if match_tuple in updates: - rep_tuple = updates[match_tuple] - rep_str = f'{{"{rep_tuple[0]}", {rep_tuple[1]}}},' - print(f'replacing {match.group()} with {rep_str}') - return rep_str - else: - return match.group() - + match_bits = match.group().split(',') + match_tuple = (match_bits[0][2:-1], match_bits[1][1:-1]) + if match_tuple in updates: + rep_tuple = updates[match_tuple] + rep_str = f'{{"{rep_tuple[0]}", {rep_tuple[1]}}},' + print(f'replacing {match.group()} with {rep_str}') + return rep_str + else: + return match.group() + + for hash in hashes: - print(hash) - if hash in updates: - print(hash, updates[hash]) - + print(hash) + if hash in updates: + print(hash, updates[hash]) + new_cpp_file = patt.sub(hash_replace, cpp_file) with open(f'{sys.argv[2]}.update', 'w') as f: - f.write(new_cpp_file) - + f.write(new_cpp_file) diff --git a/Code/GraphMol/MolEnumerator/Wrap/rough_test.py b/Code/GraphMol/MolEnumerator/Wrap/rough_test.py index d3cc874a9..77a415a7c 100644 --- a/Code/GraphMol/MolEnumerator/Wrap/rough_test.py +++ b/Code/GraphMol/MolEnumerator/Wrap/rough_test.py @@ -8,11 +8,11 @@ # of the RDKit source tree. # -from rdkit import RDConfig -from rdkit import Chem -from rdkit.Chem import rdMolEnumerator import unittest +from rdkit import Chem, RDConfig +from rdkit.Chem import rdMolEnumerator + class TestCase(unittest.TestCase): diff --git a/Code/GraphMol/MolHash/Wrap/testMolHash.py b/Code/GraphMol/MolHash/Wrap/testMolHash.py index dda54c310..431d6770e 100644 --- a/Code/GraphMol/MolHash/Wrap/testMolHash.py +++ b/Code/GraphMol/MolHash/Wrap/testMolHash.py @@ -1,7 +1,8 @@ -from rdkit import RDConfig -import os, sys +import os +import sys import unittest -from rdkit import Chem + +from rdkit import Chem, RDConfig from rdkit.Chem import rdMolHash diff --git a/Code/GraphMol/MolInterchange/Wrap/testMolInterchange.py b/Code/GraphMol/MolInterchange/Wrap/testMolInterchange.py index 0b70bc05a..5eb486a5f 100644 --- a/Code/GraphMol/MolInterchange/Wrap/testMolInterchange.py +++ b/Code/GraphMol/MolInterchange/Wrap/testMolInterchange.py @@ -1,8 +1,7 @@ import unittest -from rdkit import Chem +from rdkit import Chem, RDConfig from rdkit.Chem import rdMolInterchange -from rdkit import RDConfig class TestCase(unittest.TestCase): @@ -11,50 +10,48 @@ class TestCase(unittest.TestCase): pass def test1(self): - smis = ('c1ccccc1','C[C@H](F)Cl') + smis = ('c1ccccc1', 'C[C@H](F)Cl') for smi in smis: m = Chem.MolFromSmiles(smi) csmi = Chem.MolToSmiles(m) json = rdMolInterchange.MolToJSON(m) nms = rdMolInterchange.JSONToMols(json) - self.assertEqual(len(nms),1) + self.assertEqual(len(nms), 1) smi2 = Chem.MolToSmiles(nms[0]) - self.assertEqual(csmi,smi2) + self.assertEqual(csmi, smi2) ms = [Chem.MolFromSmiles(smi) for smi in smis] json = rdMolInterchange.MolsToJSON(ms) nms = rdMolInterchange.JSONToMols(json) - self.assertEqual(len(ms),len(nms)) - self.assertEqual([Chem.MolToSmiles(x) for x in ms],[Chem.MolToSmiles(x) for x in nms]) + self.assertEqual(len(ms), len(nms)) + self.assertEqual([Chem.MolToSmiles(x) for x in ms], [Chem.MolToSmiles(x) for x in nms]) def test2(self): - smis = ("C[C@H](O)C[C@@H](C)F |o1:1,4|","C[C@H](O)CC[C@@H](C)F |&1:1,5|") + smis = ("C[C@H](O)C[C@@H](C)F |o1:1,4|", "C[C@H](O)CC[C@@H](C)F |&1:1,5|") ms = [Chem.MolFromSmiles(x) for x in smis] json = rdMolInterchange.MolToJSON(ms[0]) - self.assertIn('stereoGroups',json) - self.assertIn('"or"',json) - self.assertIn('[1,4]',json) - + self.assertIn('stereoGroups', json) + self.assertIn('"or"', json) + self.assertIn('[1,4]', json) + json = rdMolInterchange.MolToJSON(ms[1]) - self.assertIn('stereoGroups',json) - self.assertIn('"and"',json) - self.assertIn('[1,5]',json) - + self.assertIn('stereoGroups', json) + self.assertIn('"and"', json) + self.assertIn('[1,5]', json) + json = rdMolInterchange.MolsToJSON(ms) - self.assertIn('stereoGroups',json) - self.assertIn('"or"',json) - self.assertIn('[1,4]',json) - self.assertIn('"and"',json) - self.assertIn('[1,5]',json) - + self.assertIn('stereoGroups', json) + self.assertIn('"or"', json) + self.assertIn('[1,4]', json) + self.assertIn('"and"', json) + self.assertIn('[1,5]', json) + ps = rdMolInterchange.JSONWriteParameters() ps.useRDKitExtensions = False - json = rdMolInterchange.MolToJSON(ms[1],ps) - self.assertNotIn('stereoGroups',json) - json = rdMolInterchange.MolsToJSON(ms,ps) - self.assertNotIn('stereoGroups',json) + json = rdMolInterchange.MolToJSON(ms[1], ps) + self.assertNotIn('stereoGroups', json) + json = rdMolInterchange.MolsToJSON(ms, ps) + self.assertNotIn('stereoGroups', json) - - if __name__ == '__main__': unittest.main() diff --git a/Code/GraphMol/MolStandardize/Wrap/testMolStandardize.py b/Code/GraphMol/MolStandardize/Wrap/testMolStandardize.py index b5cbd1e72..05bf7c7d3 100644 --- a/Code/GraphMol/MolStandardize/Wrap/testMolStandardize.py +++ b/Code/GraphMol/MolStandardize/Wrap/testMolStandardize.py @@ -2,17 +2,16 @@ # Copyright (C) 2018 Susan H. Leung # All Rights Reserved # -from rdkit import RDConfig +import math import os import sys -import math -from datetime import datetime, timedelta import unittest -from rdkit import DataStructs -from rdkit import Chem -from rdkit.Geometry import rdGeometry as geom -from rdkit.Chem.rdchem import Atom +from datetime import datetime, timedelta + +from rdkit import Chem, DataStructs, RDConfig from rdkit.Chem.MolStandardize import rdMolStandardize +from rdkit.Chem.rdchem import Atom +from rdkit.Geometry import rdGeometry as geom class TestCase(unittest.TestCase): @@ -90,7 +89,8 @@ class TestCase(unittest.TestCase): 'ruthenium.mol') rumol = Chem.MolFromMolFile(rufile) disrumol = rdMolStandardize.DisconnectOrganometallics(rumol) - self.assertEqual(Chem.MolToSmiles(disrumol), "[Cl-].[Cl-].[Cl-].[Cl-].[Ru+2].[Ru+2].c1ccccc1.c1ccccc1") + self.assertEqual(Chem.MolToSmiles(disrumol), + "[Cl-].[Cl-].[Cl-].[Cl-].[Ru+2].[Ru+2].c1ccccc1.c1ccccc1") opts = rdMolStandardize.MetalDisconnectorOptions() opts.splitGrignards = True @@ -102,18 +102,19 @@ class TestCase(unittest.TestCase): self.assertNotEqual(def_opts.splitAromaticC, opts.splitAromaticC) self.assertNotEqual(def_opts.adjustCharges, opts.adjustCharges) self.assertNotEqual(def_opts.removeHapticDummies, opts.removeHapticDummies) - + md = rdMolStandardize.MetalDisconnector(opts) grigfile = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolStandardize', 'test_data', - 'grignard_2.mol') + 'grignard_2.mol') grigmol = Chem.MolFromMolFile(grigfile) disgrigmol = md.Disconnect(grigmol) self.assertEqual(Chem.MolToSmiles(disgrigmol), "[Cl-].[Mg+2].[c-]1ccccc1") # and passing in the options explicitly disrumol = rdMolStandardize.DisconnectOrganometallics(rumol, opts) - self.assertEqual(Chem.MolToSmiles(disrumol), "[Cl-].[Cl-].[Cl-].[Cl-].[Ru+2].[Ru+2].c1ccccc1.c1ccccc1") + self.assertEqual(Chem.MolToSmiles(disrumol), + "[Cl-].[Cl-].[Cl-].[Cl-].[Ru+2].[Ru+2].c1ccccc1.c1ccccc1") def test6Charge(self): mol = Chem.MolFromSmiles("C1=C(C=CC(=C1)[S]([O-])=O)[S](O)(=O)=O") diff --git a/Code/GraphMol/MolTransforms/Wrap/testMolTransforms.py b/Code/GraphMol/MolTransforms/Wrap/testMolTransforms.py index 9b08f2423..bcbc60e59 100644 --- a/Code/GraphMol/MolTransforms/Wrap/testMolTransforms.py +++ b/Code/GraphMol/MolTransforms/Wrap/testMolTransforms.py @@ -1,11 +1,14 @@ -from rdkit import RDConfig -import os, sys, math +import math +import os +import sys import unittest + import numpy as np -from rdkit import DataStructs -from rdkit import Chem -from rdkit.Geometry import rdGeometry as geom + +from rdkit import Chem, DataStructs, RDConfig from rdkit.Chem import rdMolTransforms as rdmt +from rdkit.Geometry import rdGeometry as geom + def feq(v1, v2, tol=1.0e-4): return abs(v1 - v2) < tol @@ -81,12 +84,8 @@ class TestCase(unittest.TestCase): 2 4 1 0 M END ''' - axesRef = ( - (-0.9997, -0.0246, 0.0009), - ( 0.0246, -0.9981, 0.0559), - ( 0.0004, -0.0559, -0.9984) - ) - momentsRef = ( 3.4220, 4.7230, 7.1757) + axesRef = ((-0.9997, -0.0246, 0.0009), (0.0246, -0.9981, 0.0559), (0.0004, -0.0559, -0.9984)) + momentsRef = (3.4220, 4.7230, 7.1757) m = Chem.MolFromMolBlock(molBlock) axes, moments = rdmt.ComputePrincipalAxesAndMoments(m.GetConformer()) self.assertIsNotNone(axes) @@ -97,18 +96,15 @@ M END self.assertAlmostEqual(moments[y], momentsRef[y], 3) failed = False try: - axes, moments = rdmt.ComputePrincipalAxesAndMoments(m.GetConformer(), weights = (0.5, 0.5)) + axes, moments = rdmt.ComputePrincipalAxesAndMoments(m.GetConformer(), weights=(0.5, 0.5)) except Exception: failed = True self.assertTrue(failed) - axesWeightedRef = ( - (-0.9998, -0.0114, -0.0189), - (-0.0153, 0.9744, 0.2245), - ( 0.0158, 0.2247, -0.9743) - ) - momentsWeightedRef = ( 0.5496, 1.5559, 1.9361) + axesWeightedRef = ((-0.9998, -0.0114, -0.0189), (-0.0153, 0.9744, 0.2245), (0.0158, 0.2247, + -0.9743)) + momentsWeightedRef = (0.5496, 1.5559, 1.9361) axesWeighted, momentsWeighted = rdmt.ComputePrincipalAxesAndMoments( - m.GetConformer(), weights = (0.1, 0.2, 0.3, 0.4)) + m.GetConformer(), weights=(0.1, 0.2, 0.3, 0.4)) self.assertIsNotNone(axesWeighted) self.assertIsNotNone(momentsWeighted) for y in range(3): @@ -133,12 +129,8 @@ M END 2 4 1 0 M END ''' - axesRef = ( - (-0.0009, -0.0246, 0.9997), - (-0.0559, -0.9981, -0.0246), - ( 0.9984, -0.0559, -0.0004) - ) - momentsRef = ( 0.1212, 0.7343, 1.0596) + axesRef = ((-0.0009, -0.0246, 0.9997), (-0.0559, -0.9981, -0.0246), (0.9984, -0.0559, -0.0004)) + momentsRef = (0.1212, 0.7343, 1.0596) m = Chem.MolFromMolBlock(molBlock) axes, moments = rdmt.ComputePrincipalAxesAndMomentsFromGyrationMatrix(m.GetConformer()) self.assertIsNotNone(axes) @@ -149,18 +141,16 @@ M END self.assertAlmostEqual(moments[y], momentsRef[y], 3) failed = False try: - axes, moments = rdmt.ComputePrincipalAxesAndMomentsFromGyrationMatrix(m.GetConformer(), weights = (0.5, 0.5)) + axes, moments = rdmt.ComputePrincipalAxesAndMomentsFromGyrationMatrix( + m.GetConformer(), weights=(0.5, 0.5)) except Exception: failed = True self.assertTrue(failed) - axesWeightedRef = ( - ( 0.0189, -0.0114, 0.9998), - (-0.2245, 0.9744, 0.0153), - ( 0.9743, 0.2247, -0.0158) - ) - momentsWeightedRef = ( 0.0847, 0.4649, 1.4712) + axesWeightedRef = ((0.0189, -0.0114, 0.9998), (-0.2245, 0.9744, 0.0153), (0.9743, 0.2247, + -0.0158)) + momentsWeightedRef = (0.0847, 0.4649, 1.4712) axesWeighted, momentsWeighted = rdmt.ComputePrincipalAxesAndMomentsFromGyrationMatrix( - m.GetConformer(), weights = (0.1, 0.2, 0.3, 0.4)) + m.GetConformer(), weights=(0.1, 0.2, 0.3, 0.4)) self.assertIsNotNone(axesWeighted) self.assertIsNotNone(momentsWeighted) for y in range(3): @@ -248,6 +238,7 @@ M END self.assertTrue(exceptionRaised) def testEigen3CanonicalTransformAgainstNumpy(self): + def canonicalize_conf_rdkit(mol, conf_id=-1): mol = Chem.Mol(mol) conf = mol.GetConformer(conf_id) @@ -265,7 +256,9 @@ M END cov_mat = np.cov(trans_pos, bias=1, rowvar=False) * conf.GetNumAtoms() eigval, eigvect = np.linalg.eig(cov_mat) eigval_sorted = sorted(enumerate(eigval), key=lambda x: x[1], reverse=True) - eigvect_sorted = [eigvect[:, i] * (1.0 if eigvect[:, i].sum() > 0.0 else -1.0) for i, _ in eigval_sorted] + eigvect_sorted = [ + eigvect[:, i] * (1.0 if eigvect[:, i].sum() > 0.0 else -1.0) for i, _ in eigval_sorted + ] canon_trans = np.array([ [*eigvect_sorted[0], 0.], [*eigvect_sorted[1], 0.], diff --git a/Code/GraphMol/PartialCharges/Wrap/testPartialCharges.py b/Code/GraphMol/PartialCharges/Wrap/testPartialCharges.py index 038eba2ae..f3894fcee 100644 --- a/Code/GraphMol/PartialCharges/Wrap/testPartialCharges.py +++ b/Code/GraphMol/PartialCharges/Wrap/testPartialCharges.py @@ -1,139 +1,137 @@ - -import unittest -import os import io - +import os import pickle +import unittest -from rdkit import Chem +from rdkit import Chem, RDConfig from rdkit.Chem import rdPartialCharges -from rdkit import RDConfig def feq(v1, v2, tol2=1e-4): - return abs(v1 - v2) <= tol2 + return abs(v1 - v2) <= tol2 class TestCase(unittest.TestCase): - def setUp(self): - pass + def setUp(self): + pass - def test0HalgrenSet(self): - smiSup = Chem.SmilesMolSupplier( - os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'PartialCharges', 'Wrap', 'test_data', - 'halgren.smi'), delimiter='\t') + def test0HalgrenSet(self): + smiSup = Chem.SmilesMolSupplier( + os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'PartialCharges', 'Wrap', 'test_data', + 'halgren.smi'), delimiter='\t') - # parse the original file - with open( - os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'PartialCharges', 'Wrap', 'test_data', - 'halgren_out.txt'), 'r') as infil: - lines = infil.readlines() + # parse the original file + with open( + os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'PartialCharges', 'Wrap', 'test_data', + 'halgren_out.txt'), 'r') as infil: + lines = infil.readlines() - tab = Chem.GetPeriodicTable() + tab = Chem.GetPeriodicTable() - olst = [] - for mol in smiSup: - rdPartialCharges.ComputeGasteigerCharges(mol) - tstr = "Molecule: " - tstr += mol.GetProp("_Name") - olst.append(tstr) - for i in range(mol.GetNumAtoms()): - at = mol.GetAtomWithIdx(i) - en = tab.GetElementSymbol(at.GetAtomicNum()) - chg = float(at.GetProp("_GasteigerCharge")) - tstr = "%i %s %6.4f" % (i, en, chg) - olst.append(tstr) + olst = [] + for mol in smiSup: + rdPartialCharges.ComputeGasteigerCharges(mol) + tstr = "Molecule: " + tstr += mol.GetProp("_Name") + olst.append(tstr) + for i in range(mol.GetNumAtoms()): + at = mol.GetAtomWithIdx(i) + en = tab.GetElementSymbol(at.GetAtomicNum()) + chg = float(at.GetProp("_GasteigerCharge")) + tstr = "%i %s %6.4f" % (i, en, chg) + olst.append(tstr) - i = 0 - for line in lines: - self.assertTrue(line.strip() == olst[i]) - i += 1 + i = 0 + for line in lines: + self.assertTrue(line.strip() == olst[i]) + i += 1 - def test1PPDataset(self): - fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'PartialCharges', 'Wrap', - 'test_data', 'PP_descrs_regress.2.csv') - infil = open(fileN, 'r') - lines = infil.readlines() - infil.close() + def test1PPDataset(self): + fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'PartialCharges', 'Wrap', + 'test_data', 'PP_descrs_regress.2.csv') + infil = open(fileN, 'r') + lines = infil.readlines() + infil.close() - infile = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'PartialCharges', 'Wrap', - 'test_data', 'PP_combi_charges.pkl') - with open(infile, 'r') as cchtFile: - buf = cchtFile.read().replace('\r\n', '\n').encode('utf-8') - cchtFile.close() - with io.BytesIO(buf) as cchFile: - combiCharges = pickle.load(cchFile) + infile = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'PartialCharges', 'Wrap', + 'test_data', 'PP_combi_charges.pkl') + with open(infile, 'r') as cchtFile: + buf = cchtFile.read().replace('\r\n', '\n').encode('utf-8') + cchtFile.close() + with io.BytesIO(buf) as cchFile: + combiCharges = pickle.load(cchFile) - for lin in lines: - if (lin[0] == '#'): - continue - tlst = lin.strip().split(',') - smi = tlst[0] - rdmol = Chem.MolFromSmiles(smi) - rdPartialCharges.ComputeGasteigerCharges(rdmol) + for lin in lines: + if (lin[0] == '#'): + continue + tlst = lin.strip().split(',') + smi = tlst[0] + rdmol = Chem.MolFromSmiles(smi) + rdPartialCharges.ComputeGasteigerCharges(rdmol) - nat = rdmol.GetNumAtoms() - failed = False - for ai in range(nat): - rdch = float(rdmol.GetAtomWithIdx(ai).GetProp('_GasteigerCharge')) - if not feq(rdch, combiCharges[smi][ai], 1.e-2): - failed = True - print(smi, ai, rdch, combiCharges[smi][ai]) - if failed: - rdmol.Debug() - self.assertFalse(failed) + nat = rdmol.GetNumAtoms() + failed = False + for ai in range(nat): + rdch = float(rdmol.GetAtomWithIdx(ai).GetProp('_GasteigerCharge')) + if not feq(rdch, combiCharges[smi][ai], 1.e-2): + failed = True + print(smi, ai, rdch, combiCharges[smi][ai]) + if failed: + rdmol.Debug() + self.assertFalse(failed) - def test2Params(self): - """ tests handling of Issue187 """ - m1 = Chem.MolFromSmiles('C(=O)[O-]') - rdPartialCharges.ComputeGasteigerCharges(m1) + def test2Params(self): + """ tests handling of Issue187 """ + m1 = Chem.MolFromSmiles('C(=O)[O-]') + rdPartialCharges.ComputeGasteigerCharges(m1) - m2 = Chem.MolFromSmiles('C(=O)[O-].[Na+]') - rdPartialCharges.ComputeGasteigerCharges(m2) + m2 = Chem.MolFromSmiles('C(=O)[O-].[Na+]') + rdPartialCharges.ComputeGasteigerCharges(m2) - for i in range(m1.GetNumAtoms()): - c1 = float(m1.GetAtomWithIdx(i).GetProp('_GasteigerCharge')) - c2 = float(m2.GetAtomWithIdx(i).GetProp('_GasteigerCharge')) - self.assertTrue(feq(c1, c2, 1e-4)) + for i in range(m1.GetNumAtoms()): + c1 = float(m1.GetAtomWithIdx(i).GetProp('_GasteigerCharge')) + c2 = float(m2.GetAtomWithIdx(i).GetProp('_GasteigerCharge')) + self.assertTrue(feq(c1, c2, 1e-4)) - def test3Params(self): - """ tests handling of Issue187 """ - m2 = Chem.MolFromSmiles('C(=O)[O-].[Na+]') - with self.assertRaisesRegex(Exception, ""): - rdPartialCharges.ComputeGasteigerCharges(m2, 12, 1) + def test3Params(self): + """ tests handling of Issue187 """ + m2 = Chem.MolFromSmiles('C(=O)[O-].[Na+]') + with self.assertRaisesRegex(Exception, ""): + rdPartialCharges.ComputeGasteigerCharges(m2, 12, 1) - def testGithubIssue20(self): - """ tests handling of Github issue 20 """ - m1 = Chem.MolFromSmiles('CB(O)O') - rdPartialCharges.ComputeGasteigerCharges(m1) - chgs = [-0.030, 0.448, -0.427, -0.427] - for i in range(m1.GetNumAtoms()): - c1 = float(m1.GetAtomWithIdx(i).GetProp('_GasteigerCharge')) - self.assertAlmostEqual(c1, chgs[i], 3) + def testGithubIssue20(self): + """ tests handling of Github issue 20 """ + m1 = Chem.MolFromSmiles('CB(O)O') + rdPartialCharges.ComputeGasteigerCharges(m1) + chgs = [-0.030, 0.448, -0.427, -0.427] + for i in range(m1.GetNumAtoms()): + c1 = float(m1.GetAtomWithIdx(i).GetProp('_GasteigerCharge')) + self.assertAlmostEqual(c1, chgs[i], 3) - def testGithubIssue577(self): - """ tests handling of Github issue 577 """ - m1 = Chem.MolFromSmiles('CCO') - from locale import setlocale, LC_NUMERIC - try: - setlocale(LC_NUMERIC, "de_DE") - except Exception: - # can't set the required locale, might as well just return - return - try: - rdPartialCharges.ComputeGasteigerCharges(m1) - for at in m1.GetAtoms(): - float(at.GetProp('_GasteigerCharge')) - finally: - setlocale(LC_NUMERIC, "C") - rdPartialCharges.ComputeGasteigerCharges(m1) - for at in m1.GetAtoms(): - float(at.GetProp('_GasteigerCharge')) - def testGithub2480(self): - with self.assertRaisesRegex(Exception, "^Python argument types"): - rdPartialCharges.ComputeGasteigerCharges(None) + def testGithubIssue577(self): + """ tests handling of Github issue 577 """ + m1 = Chem.MolFromSmiles('CCO') + from locale import LC_NUMERIC, setlocale + try: + setlocale(LC_NUMERIC, "de_DE") + except Exception: + # can't set the required locale, might as well just return + return + try: + rdPartialCharges.ComputeGasteigerCharges(m1) + for at in m1.GetAtoms(): + float(at.GetProp('_GasteigerCharge')) + finally: + setlocale(LC_NUMERIC, "C") + rdPartialCharges.ComputeGasteigerCharges(m1) + for at in m1.GetAtoms(): + float(at.GetProp('_GasteigerCharge')) + + def testGithub2480(self): + with self.assertRaisesRegex(Exception, "^Python argument types"): + rdPartialCharges.ComputeGasteigerCharges(None) if __name__ == '__main__': - unittest.main() + unittest.main() diff --git a/Code/GraphMol/RGroupDecomposition/Wrap/test_rgroups.py b/Code/GraphMol/RGroupDecomposition/Wrap/test_rgroups.py index 3fec91f8b..1979f7e13 100644 --- a/Code/GraphMol/RGroupDecomposition/Wrap/test_rgroups.py +++ b/Code/GraphMol/RGroupDecomposition/Wrap/test_rgroups.py @@ -30,20 +30,20 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # -import unittest -import os, sys, copy - +import copy +import os import pickle - -from rdkit import rdBase -from rdkit import Chem -from rdkit.Chem.rdRGroupDecomposition import (RGroupDecompose, RGroupDecomposition, - RGroupDecompositionParameters, RGroupLabels, - RGroupCoreAlignment) +import sys +import unittest from collections import OrderedDict # the RGD code can generate a lot of warnings. disable them -from rdkit import RDLogger +from rdkit import Chem, RDLogger, rdBase +from rdkit.Chem.rdRGroupDecomposition import (RGroupCoreAlignment, + RGroupDecompose, + RGroupDecomposition, + RGroupDecompositionParameters, + RGroupLabels) RDLogger.DisableLog("rdApp.warning") diff --git a/Code/GraphMol/ReducedGraphs/Wrap/testReducedGraphs.py b/Code/GraphMol/ReducedGraphs/Wrap/testReducedGraphs.py index adf9ae680..7a5e967e2 100644 --- a/Code/GraphMol/ReducedGraphs/Wrap/testReducedGraphs.py +++ b/Code/GraphMol/ReducedGraphs/Wrap/testReducedGraphs.py @@ -1,11 +1,12 @@ # $Id$ # -from rdkit import Chem -from rdkit.Chem import rdReducedGraphs as rdRG -from rdkit import RDConfig -import numpy import unittest +import numpy + +from rdkit import Chem, RDConfig +from rdkit.Chem import rdReducedGraphs as rdRG + class TestCase(unittest.TestCase): @@ -55,7 +56,7 @@ class TestCase(unittest.TestCase): m = Chem.MolFromSmiles('OCCc1ccccc1') mrg = rdRG.GenerateMolExtendedReducedGraph(m) erg_types = [tuple(atom.GetPropsAsDict().get('_ErGAtomTypes')) for atom in mrg.GetAtoms()] - self.assertEqual(erg_types, [(0, 1), (), (), (), (5,)]) + self.assertEqual(erg_types, [(0, 1), (), (), (), (5, )]) if __name__ == '__main__': diff --git a/Code/GraphMol/SLNParse/Wrap/testSLN.py b/Code/GraphMol/SLNParse/Wrap/testSLN.py index e77e01ae7..152e0d5ce 100644 --- a/Code/GraphMol/SLNParse/Wrap/testSLN.py +++ b/Code/GraphMol/SLNParse/Wrap/testSLN.py @@ -2,19 +2,19 @@ # # Copyright (c) 2008, Novartis Institutes for BioMedical Research Inc. # All rights reserved. -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are -# met: +# met: # -# * Redistributions of source code must retain the above copyright +# * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided # with the distribution. -# * Neither the name of Novartis Institutes for BioMedical Research Inc. -# nor the names of its contributors may be used to endorse or promote +# * Neither the name of Novartis Institutes for BioMedical Research Inc. +# nor the names of its contributors may be used to endorse or promote # products derived from this software without specific prior # written permission. # @@ -32,12 +32,12 @@ # # Created by Greg Landrum, September 2006 # -from rdkit import Chem -from rdkit.Chem import rdSLNParse -from rdkit import Geometry -from rdkit import RDConfig +import os +import sys import unittest -import os, sys + +from rdkit import Chem, Geometry, RDConfig +from rdkit.Chem import rdSLNParse class TestCase(unittest.TestCase): diff --git a/Code/GraphMol/ScaffoldNetwork/Wrap/testPickleScaffoldNetwork.py b/Code/GraphMol/ScaffoldNetwork/Wrap/testPickleScaffoldNetwork.py index 37d31d447..d563b8d30 100644 --- a/Code/GraphMol/ScaffoldNetwork/Wrap/testPickleScaffoldNetwork.py +++ b/Code/GraphMol/ScaffoldNetwork/Wrap/testPickleScaffoldNetwork.py @@ -11,9 +11,7 @@ import pickle import unittest -from rdkit import Chem -from rdkit import RDConfig -from rdkit import rdBase +from rdkit import Chem, RDConfig, rdBase from rdkit.Chem.Scaffolds import rdScaffoldNetwork rdBase.DisableLog("rdApp.info") diff --git a/Code/GraphMol/ScaffoldNetwork/Wrap/testScaffoldNetwork.py b/Code/GraphMol/ScaffoldNetwork/Wrap/testScaffoldNetwork.py index 7104358fe..e57cbf49d 100644 --- a/Code/GraphMol/ScaffoldNetwork/Wrap/testScaffoldNetwork.py +++ b/Code/GraphMol/ScaffoldNetwork/Wrap/testScaffoldNetwork.py @@ -11,9 +11,7 @@ import pickle import unittest -from rdkit import Chem -from rdkit import RDConfig -from rdkit import rdBase +from rdkit import Chem, RDConfig, rdBase from rdkit.Chem.Scaffolds import rdScaffoldNetwork rdBase.DisableLog("rdApp.info") diff --git a/Code/GraphMol/ShapeHelpers/Wrap/testShapeHelpers.py b/Code/GraphMol/ShapeHelpers/Wrap/testShapeHelpers.py index 7ac7405af..53e456b1b 100644 --- a/Code/GraphMol/ShapeHelpers/Wrap/testShapeHelpers.py +++ b/Code/GraphMol/ShapeHelpers/Wrap/testShapeHelpers.py @@ -1,105 +1,102 @@ - +import math import os import sys import unittest -import math -from rdkit import RDConfig -from rdkit import DataStructs -from rdkit import Chem +from rdkit import Chem, DataStructs, RDConfig from rdkit.Chem import rdMolAlign -from rdkit.Geometry import rdGeometry as geom -from rdkit.Chem import rdShapeHelpers as rdshp from rdkit.Chem import rdMolTransforms as rdmt +from rdkit.Chem import rdShapeHelpers as rdshp +from rdkit.Geometry import rdGeometry as geom class TestCase(unittest.TestCase): - def setUp(self): - pass + def setUp(self): + pass - def test1Shape(self): - fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'ShapeHelpers', 'test_data', - '1oir.mol') - m = Chem.MolFromMolFile(fileN) - rdmt.CanonicalizeMol(m) - dims1, offset1 = rdshp.ComputeConfDimsAndOffset(m.GetConformer()) - grd = geom.UniformGrid3D(30.0, 16.0, 10.0) - rdshp.EncodeShape(m, grd, 0) - ovect = grd.GetOccupancyVect() - self.assertEqual(ovect.GetTotalVal(), 7405) + def test1Shape(self): + fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'ShapeHelpers', 'test_data', + '1oir.mol') + m = Chem.MolFromMolFile(fileN) + rdmt.CanonicalizeMol(m) + dims1, offset1 = rdshp.ComputeConfDimsAndOffset(m.GetConformer()) + grd = geom.UniformGrid3D(30.0, 16.0, 10.0) + rdshp.EncodeShape(m, grd, 0) + ovect = grd.GetOccupancyVect() + self.assertEqual(ovect.GetTotalVal(), 7405) - m = Chem.MolFromMolFile(fileN) - trans = rdmt.ComputeCanonicalTransform(m.GetConformer()) - dims, offset = rdshp.ComputeConfDimsAndOffset(m.GetConformer(), trans=trans) - dims -= dims1 - offset -= offset1 - self.assertAlmostEqual(dims.Length(), 0.0, 4) - self.assertAlmostEqual(offset.Length(), 0.0, 4) + m = Chem.MolFromMolFile(fileN) + trans = rdmt.ComputeCanonicalTransform(m.GetConformer()) + dims, offset = rdshp.ComputeConfDimsAndOffset(m.GetConformer(), trans=trans) + dims -= dims1 + offset -= offset1 + self.assertAlmostEqual(dims.Length(), 0.0, 4) + self.assertAlmostEqual(offset.Length(), 0.0, 4) - grd1 = geom.UniformGrid3D(30.0, 16.0, 10.0) - rdshp.EncodeShape(m, grd1, 0, trans) - ovect = grd1.GetOccupancyVect() + grd1 = geom.UniformGrid3D(30.0, 16.0, 10.0) + rdshp.EncodeShape(m, grd1, 0, trans) + ovect = grd1.GetOccupancyVect() - self.assertEqual(ovect.GetTotalVal(), 7405) + self.assertEqual(ovect.GetTotalVal(), 7405) - grd2 = geom.UniformGrid3D(30.0, 16.0, 10.0) - rdshp.EncodeShape(m, grd2, 0) + grd2 = geom.UniformGrid3D(30.0, 16.0, 10.0) + rdshp.EncodeShape(m, grd2, 0) - fileN2 = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'ShapeHelpers', 'test_data', - '1oir_conf.mol') - m2 = Chem.MolFromMolFile(fileN2) + fileN2 = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'ShapeHelpers', 'test_data', + '1oir_conf.mol') + m2 = Chem.MolFromMolFile(fileN2) - self.assertAlmostEqual(rdshp.ShapeTanimotoDist(m, m), 0.0, 4) - self.assertAlmostEqual(rdshp.ShapeTverskyIndex(m, m, 1.0, 1.0), 1.0, 4) + self.assertAlmostEqual(rdshp.ShapeTanimotoDist(m, m), 0.0, 4) + self.assertAlmostEqual(rdshp.ShapeTverskyIndex(m, m, 1.0, 1.0), 1.0, 4) - rmsd = rdMolAlign.AlignMol(m, m2) - self.assertAlmostEqual(rdshp.ShapeTanimotoDist(m, m2), 0.31, 2) - self.assertAlmostEqual(rdshp.ShapeTverskyIndex(m, m2, 1.0, 1.0), 0.686, 2) + rmsd = rdMolAlign.AlignMol(m, m2) + self.assertAlmostEqual(rdshp.ShapeTanimotoDist(m, m2), 0.31, 2) + self.assertAlmostEqual(rdshp.ShapeTverskyIndex(m, m2, 1.0, 1.0), 0.686, 2) - dist = rdshp.ShapeTanimotoDist(mol1=m, mol2=m2, confId1=0, confId2=0, gridSpacing=0.25, - stepSize=0.125) - self.assertAlmostEqual(dist, 0.339, 2) + dist = rdshp.ShapeTanimotoDist(mol1=m, mol2=m2, confId1=0, confId2=0, gridSpacing=0.25, + stepSize=0.125) + self.assertAlmostEqual(dist, 0.339, 2) - m = Chem.MolFromMolFile(fileN) - cpt = rdmt.ComputeCentroid(m.GetConformer()) - dims, offset = rdshp.ComputeConfDimsAndOffset(m.GetConformer()) + m = Chem.MolFromMolFile(fileN) + cpt = rdmt.ComputeCentroid(m.GetConformer()) + dims, offset = rdshp.ComputeConfDimsAndOffset(m.GetConformer()) - grd = geom.UniformGrid3D(dims.x, dims.y, dims.z, 0.5, DataStructs.DiscreteValueType.TWOBITVALUE, - offset) - dims -= geom.Point3D(13.927, 16.97, 9.775) - offset -= geom.Point3D(-4.353, 16.829, 2.782) - self.assertAlmostEqual(dims.Length(), 0.0, 4) - self.assertAlmostEqual(offset.Length(), 0.0, 4) - rdshp.EncodeShape(m, grd, 0) + grd = geom.UniformGrid3D(dims.x, dims.y, dims.z, 0.5, DataStructs.DiscreteValueType.TWOBITVALUE, + offset) + dims -= geom.Point3D(13.927, 16.97, 9.775) + offset -= geom.Point3D(-4.353, 16.829, 2.782) + self.assertAlmostEqual(dims.Length(), 0.0, 4) + self.assertAlmostEqual(offset.Length(), 0.0, 4) + rdshp.EncodeShape(m, grd, 0) - ovect = grd.GetOccupancyVect() + ovect = grd.GetOccupancyVect() - self.assertEqual(ovect.GetTotalVal(), 7417) - geom.WriteGridToFile(grd, '1oir_shape.grd') + self.assertEqual(ovect.GetTotalVal(), 7417) + geom.WriteGridToFile(grd, '1oir_shape.grd') - m = Chem.MolFromMolFile(fileN) - lc, uc = rdshp.ComputeConfBox(m.GetConformer()) - rdmt.CanonicalizeMol(m) - lc1, uc1 = rdshp.ComputeConfBox(m.GetConformer()) + m = Chem.MolFromMolFile(fileN) + lc, uc = rdshp.ComputeConfBox(m.GetConformer()) + rdmt.CanonicalizeMol(m) + lc1, uc1 = rdshp.ComputeConfBox(m.GetConformer()) - lc2, uc2 = rdshp.ComputeUnionBox((lc, uc), (lc1, uc1)) - lc -= geom.Point3D(-4.353, 16.829, 2.782) - uc -= geom.Point3D(9.574, 33.799, 12.557) - self.assertAlmostEqual(lc.Length(), 0.0, 4) - self.assertAlmostEqual(uc.Length(), 0.0, 4) + lc2, uc2 = rdshp.ComputeUnionBox((lc, uc), (lc1, uc1)) + lc -= geom.Point3D(-4.353, 16.829, 2.782) + uc -= geom.Point3D(9.574, 33.799, 12.557) + self.assertAlmostEqual(lc.Length(), 0.0, 4) + self.assertAlmostEqual(uc.Length(), 0.0, 4) - lc1 -= geom.Point3D(-10.7519, -6.0778, -3.0123) - uc1 -= geom.Point3D(8.7163, 5.3279, 3.1621) - self.assertAlmostEqual(lc1.Length(), 0.0, 4) - self.assertAlmostEqual(uc1.Length(), 0.0, 3) + lc1 -= geom.Point3D(-10.7519, -6.0778, -3.0123) + uc1 -= geom.Point3D(8.7163, 5.3279, 3.1621) + self.assertAlmostEqual(lc1.Length(), 0.0, 4) + self.assertAlmostEqual(uc1.Length(), 0.0, 3) - lc2 -= geom.Point3D(-10.7519, -6.0778, -3.01226) - uc2 -= geom.Point3D(9.574, 33.799, 12.557) - self.assertAlmostEqual(lc2.Length(), 0.0, 4) - self.assertAlmostEqual(uc2.Length(), 0.0, 4) + lc2 -= geom.Point3D(-10.7519, -6.0778, -3.01226) + uc2 -= geom.Point3D(9.574, 33.799, 12.557) + self.assertAlmostEqual(lc2.Length(), 0.0, 4) + self.assertAlmostEqual(uc2.Length(), 0.0, 4) if __name__ == '__main__': - print("Testing Shape Helpers wrapper") - unittest.main() + print("Testing Shape Helpers wrapper") + unittest.main() diff --git a/Code/GraphMol/StructChecker/Wrap/rough_test.py b/Code/GraphMol/StructChecker/Wrap/rough_test.py index f2cbc6d1b..dabb5846b 100644 --- a/Code/GraphMol/StructChecker/Wrap/rough_test.py +++ b/Code/GraphMol/StructChecker/Wrap/rough_test.py @@ -30,12 +30,11 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # +import unittest from rdkit import Chem from rdkit.Chem import rdStructChecker -import unittest - data = """310929550 -OEChem-07211613022D diff --git a/Code/GraphMol/Substruct/UnitTestSubstruct.py b/Code/GraphMol/Substruct/UnitTestSubstruct.py index fb259610a..63f75fca0 100755 --- a/Code/GraphMol/Substruct/UnitTestSubstruct.py +++ b/Code/GraphMol/Substruct/UnitTestSubstruct.py @@ -2,8 +2,11 @@ """basic unit testing code for the substructure matching """ +import os +import sys +import unittest + import RDConfig -import unittest, os, sys class TestCase(unittest.TestCase): @@ -24,22 +27,24 @@ class TestCase(unittest.TestCase): def testAtomListPass(self): """ testing atom list matches which should pass """ - smis = ['C1=CC=CC=C1', - 'C1C=CC=CC=1', - 'N1=CC=CC=C1', - 'C1=NC=CC=C1', - 'C1=CN=CC=C1', - 'C1=CC=NC=C1', - 'C1=CC=CN=C1', - 'C1=CC=CC=N1', - 'P1=CC=CC=C1', - 'C1=PC=CC=C1', - 'C1=CP=CC=C1', - 'C1=CC=PC=C1', - 'C1=CC=CP=C1', - 'C1=CC=CC=P1', - 'C1=C(C)C=CC=C1', - 'C1C=C(CC)C=C(C)C=1', ] + smis = [ + 'C1=CC=CC=C1', + 'C1C=CC=CC=1', + 'N1=CC=CC=C1', + 'C1=NC=CC=C1', + 'C1=CN=CC=C1', + 'C1=CC=NC=C1', + 'C1=CC=CN=C1', + 'C1=CC=CC=N1', + 'P1=CC=CC=C1', + 'C1=PC=CC=C1', + 'C1=CP=CC=C1', + 'C1=CC=PC=C1', + 'C1=CC=CP=C1', + 'C1=CC=CC=P1', + 'C1=C(C)C=CC=C1', + 'C1C=C(CC)C=C(C)C=1', + ] cdxFile = '%s/list-query.cdxml' % (self.basePath) for smi in smis: p = os.popen('%s %s "%s"' % (self.exe, cdxFile, smi), 'r') @@ -48,11 +53,13 @@ class TestCase(unittest.TestCase): def testAtomListFail(self): """ testing atom list matches which should fail """ - smis = ['C1CC=CC=C1', - 'c1ccccc1', - 'O1=CC=CC=C1', - 'C1=NC=CN=C1', - 'C1=CP=CO=C1', ] + smis = [ + 'C1CC=CC=C1', + 'c1ccccc1', + 'O1=CC=CC=C1', + 'C1=NC=CN=C1', + 'C1=CP=CO=C1', + ] cdxFile = '%s/list-query.cdxml' % (self.basePath) for smi in smis: p = os.popen('%s %s "%s"' % (self.exe, cdxFile, smi), 'r') @@ -61,14 +68,16 @@ class TestCase(unittest.TestCase): def testBondListPass(self): """ testing bond list matches which should pass """ - smis = ['CCCC=C', - 'C=CCCC', - 'CC=CC=C', - 'C=CC=CC', - 'C1CCC=CC1', - 'C1CC=CC=C1' - 'C=CC=CCCCO', - 'CC=C(C=C)COC', ] + smis = [ + 'CCCC=C', + 'C=CCCC', + 'CC=CC=C', + 'C=CC=CC', + 'C1CCC=CC1', + 'C1CC=CC=C1' + 'C=CC=CCCCO', + 'CC=C(C=C)COC', + ] cdxFile = '%s/bond-query.cdxml' % (self.basePath) for smi in smis: p = os.popen('%s %s "%s"' % (self.exe, cdxFile, smi), 'r') @@ -77,8 +86,10 @@ class TestCase(unittest.TestCase): def testBondListFail(self): """ testing bond list matches which should fail """ - smis = ['CCCCC', - 'C=COCC', ] + smis = [ + 'CCCCC', + 'C=COCC', + ] cdxFile = '%s/bond-query.cdxml' % (self.basePath) for smi in smis: p = os.popen('%s %s "%s"' % (self.exe, cdxFile, smi), 'r') diff --git a/Code/GraphMol/SubstructLibrary/Wrap/rough_test.py b/Code/GraphMol/SubstructLibrary/Wrap/rough_test.py index f3fc37a34..3bfd03b5f 100644 --- a/Code/GraphMol/SubstructLibrary/Wrap/rough_test.py +++ b/Code/GraphMol/SubstructLibrary/Wrap/rough_test.py @@ -34,18 +34,22 @@ it is intended to be shallow but broad. """ -import doctest, unittest, os, sys - -from rdkit import rdBase +import doctest import logging -from rdkit import RDConfig, RDLogger +import os +import sys +import unittest + +from rdkit import RDConfig, RDLogger, rdBase from rdkit.RDLogger import logger + logger = logger() -from rdkit import Chem -from rdkit.Chem import rdSubstructLibrary -import time import pickle import tempfile +import time + +from rdkit import Chem +from rdkit.Chem import rdSubstructLibrary def load_tests(loader, tests, ignore): @@ -396,7 +400,7 @@ class TestCase(unittest.TestCase): slib2.InitFromStream(file) self.assertEqual(len(slib), len(slib2)) - from io import StringIO, BytesIO + from io import BytesIO, StringIO s = StringIO() slib.ToStream(s) diff --git a/Code/GraphMol/TautomerQuery/Wrap/rough_test.py b/Code/GraphMol/TautomerQuery/Wrap/rough_test.py index 488b357f3..445f9fd52 100644 --- a/Code/GraphMol/TautomerQuery/Wrap/rough_test.py +++ b/Code/GraphMol/TautomerQuery/Wrap/rough_test.py @@ -12,11 +12,12 @@ Rough in that only basic functionality is evaluated. """ -from rdkit import Chem, DataStructs -from rdkit.Chem import rdTautomerQuery -from unittest import TestCase, main import os import pickle +from unittest import TestCase, main + +from rdkit import Chem, DataStructs +from rdkit.Chem import rdTautomerQuery class TautomerQueryTestCase(TestCase): diff --git a/Code/GraphMol/UnitTestQueryMol.py b/Code/GraphMol/UnitTestQueryMol.py index 5a3e7b15c..042338511 100755 --- a/Code/GraphMol/UnitTestQueryMol.py +++ b/Code/GraphMol/UnitTestQueryMol.py @@ -3,9 +3,11 @@ """ +import os +import sys +import unittest from rdkit import RDConfig -import unittest, os, sys class TestCase(unittest.TestCase): diff --git a/Code/GraphMol/Wrap/rough_test.py b/Code/GraphMol/Wrap/rough_test.py index 2fce864de..b2ee6dded 100644 --- a/Code/GraphMol/Wrap/rough_test.py +++ b/Code/GraphMol/Wrap/rough_test.py @@ -7212,7 +7212,6 @@ CAS<~> self.assertEqual(mae, iomae[ctBlockStart:]) - def test_HapticBondsToDative(self): fefile = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolStandardize', 'test_data', 'ferrocene.mol') @@ -7220,8 +7219,7 @@ CAS<~> newfemol = Chem.rdmolops.HapticBondsToDative(femol) self.assertEqual(Chem.MolToSmiles(newfemol), 'c12->[Fe+2]3456789(<-c1c->3[cH-]->4c->52)<-c1c->6c->7[cH-]->8c->91') - - + def test_DativeBondsToHaptic(self): fefile = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolStandardize', 'test_data', 'ferrocene.mol') @@ -7229,22 +7227,22 @@ CAS<~> newfemol = Chem.rdmolops.HapticBondsToDative(femol) backfemol = Chem.rdmolops.DativeBondsToHaptic(newfemol) self.assertEqual(Chem.MolToSmiles(femol), Chem.MolToSmiles(backfemol)) - + def testTranslateChiralFlag(self): mol = Chem.MolFromSmiles("C[C@@](N)(F)C[C@](C)(O)F |a:1|") flagMol = Chem.Mol(mol) - flagMol.SetIntProp("_MolFileChiralFlag",1) + flagMol.SetIntProp("_MolFileChiralFlag", 1) Chem.TranslateChiralFlagToStereoGroups(flagMol) sgs = flagMol.GetStereoGroups() - self.assertEqual(len(sgs),1) + self.assertEqual(len(sgs), 1) self.assertEqual(len(sgs[0].GetAtoms()), 2) self.assertEqual(sgs[0].GetGroupType(), Chem.StereoGroupType.STEREO_ABSOLUTE) flagMol = Chem.Mol(mol) - flagMol.SetIntProp("_MolFileChiralFlag",0) + flagMol.SetIntProp("_MolFileChiralFlag", 0) Chem.TranslateChiralFlagToStereoGroups(flagMol) sgs = flagMol.GetStereoGroups() - self.assertEqual(len(sgs),2) + self.assertEqual(len(sgs), 2) self.assertEqual(sgs[0].GetGroupType(), Chem.StereoGroupType.STEREO_ABSOLUTE) self.assertEqual(len(sgs[0].GetAtoms()), 1) @@ -7252,10 +7250,10 @@ CAS<~> self.assertEqual(len(sgs[1].GetAtoms()), 1) flagMol = Chem.Mol(mol) - flagMol.SetIntProp("_MolFileChiralFlag",0) + flagMol.SetIntProp("_MolFileChiralFlag", 0) Chem.TranslateChiralFlagToStereoGroups(flagMol, Chem.StereoGroupType.STEREO_OR) sgs = flagMol.GetStereoGroups() - self.assertEqual(len(sgs),2) + self.assertEqual(len(sgs), 2) self.assertEqual(sgs[0].GetGroupType(), Chem.StereoGroupType.STEREO_ABSOLUTE) self.assertEqual(len(sgs[0].GetAtoms()), 1) diff --git a/Code/GraphMol/Wrap/testConformer.py b/Code/GraphMol/Wrap/testConformer.py index 5ce00ee2e..ce1e99c05 100644 --- a/Code/GraphMol/Wrap/testConformer.py +++ b/Code/GraphMol/Wrap/testConformer.py @@ -3,11 +3,11 @@ # Copyright (C) 2004 Rational Discovery LLC # All Rights Reserved # -from rdkit import RDConfig -import os, sys +import os +import sys import unittest -from rdkit import Chem -from rdkit import Geometry + +from rdkit import Chem, Geometry, RDConfig from rdkit.Geometry import Point3D diff --git a/Code/GraphMol/Wrap/testMultithreadedMolSupplier.py b/Code/GraphMol/Wrap/testMultithreadedMolSupplier.py index feae7a0f6..4cd4d9916 100644 --- a/Code/GraphMol/Wrap/testMultithreadedMolSupplier.py +++ b/Code/GraphMol/Wrap/testMultithreadedMolSupplier.py @@ -1,139 +1,140 @@ -import os, sys, unittest, doctest +import doctest import gzip -from rdkit import RDConfig, rdBase -from rdkit import Chem -from rdkit import __version__ +import os import sys +import unittest + +from rdkit import Chem, RDConfig, __version__, rdBase + class TestCase(unittest.TestCase): - def testMultiSmiMolSupplier(self): - fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', - 'FileParsers', 'test_data', 'first_200.tpsa.csv') - # fileN = "../FileParsers/test_data/first_200.tpsa.csv" - smiSup = Chem.MultithreadedSmilesMolSupplier(fileN, ",", 0, - 1) - i = 0 - while not smiSup.atEnd(): - mol = next(smiSup) - if(mol): - i += 1 - self.assertTrue(i == 200) - fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', - 'FileParsers', 'test_data', 'fewSmi.csv') - # fileN = "../FileParsers/test_data/fewSmi.csv" - smiSup = Chem.MultithreadedSmilesMolSupplier( - fileN, delimiter=",", smilesColumn=1, nameColumn=0, titleLine=0) - names = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] - props = ["34.14", "25.78", "106.51", "82.78", "60.16", - "87.74", "37.38", "77.28", "65.18", "0.00"] - confusedNames = [] - confusedProps = [] - i = 0 - for mol in smiSup: - if mol is not None: - self.assertTrue(mol.HasProp("_Name")) - self.assertTrue(mol.HasProp("Column_2")) - prop = mol.GetProp("Column_2") - name = mol.GetProp("_Name") - confusedProps.append(prop) - confusedNames.append(name) - i += 1 - self.assertTrue(i == 10) - self.assertTrue(sorted(confusedNames) == sorted(names)) - self.assertTrue(sorted(confusedProps) == sorted(props)) - # context manager - confusedNames = [] - confusedProps = [] - i = 0 - with Chem.MultithreadedSmilesMolSupplier(fileN,delimiter=",", smilesColumn=1, - nameColumn=0, titleLine=0) as smiSup: - for mol in smiSup: - if mol is not None: - self.assertTrue(mol.HasProp("_Name")) - self.assertTrue(mol.HasProp("Column_2")) - prop = mol.GetProp("Column_2") - name = mol.GetProp("_Name") - confusedProps.append(prop) - confusedNames.append(name) - i += 1 - self.assertTrue(i == 10) - self.assertTrue(sorted(confusedNames) == sorted(names)) - self.assertTrue(sorted(confusedProps) == sorted(props)) + def testMultiSmiMolSupplier(self): + fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'FileParsers', 'test_data', + 'first_200.tpsa.csv') + # fileN = "../FileParsers/test_data/first_200.tpsa.csv" + smiSup = Chem.MultithreadedSmilesMolSupplier(fileN, ",", 0, -1) + i = 0 + while not smiSup.atEnd(): + mol = next(smiSup) + if (mol): + i += 1 + self.assertTrue(i == 200) + fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'FileParsers', 'test_data', + 'fewSmi.csv') + # fileN = "../FileParsers/test_data/fewSmi.csv" + smiSup = Chem.MultithreadedSmilesMolSupplier(fileN, delimiter=",", smilesColumn=1, nameColumn=0, + titleLine=0) + names = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + props = [ + "34.14", "25.78", "106.51", "82.78", "60.16", "87.74", "37.38", "77.28", "65.18", "0.00" + ] + confusedNames = [] + confusedProps = [] + i = 0 + for mol in smiSup: + if mol is not None: + self.assertTrue(mol.HasProp("_Name")) + self.assertTrue(mol.HasProp("Column_2")) + prop = mol.GetProp("Column_2") + name = mol.GetProp("_Name") + confusedProps.append(prop) + confusedNames.append(name) + i += 1 + self.assertTrue(i == 10) + self.assertTrue(sorted(confusedNames) == sorted(names)) + self.assertTrue(sorted(confusedProps) == sorted(props)) + + # context manager + confusedNames = [] + confusedProps = [] + i = 0 + with Chem.MultithreadedSmilesMolSupplier(fileN, delimiter=",", smilesColumn=1, nameColumn=0, + titleLine=0) as smiSup: + for mol in smiSup: + if mol is not None: + self.assertTrue(mol.HasProp("_Name")) + self.assertTrue(mol.HasProp("Column_2")) + prop = mol.GetProp("Column_2") + name = mol.GetProp("_Name") + confusedProps.append(prop) + confusedNames.append(name) + i += 1 + self.assertTrue(i == 10) + self.assertTrue(sorted(confusedNames) == sorted(names)) + self.assertTrue(sorted(confusedProps) == sorted(props)) + + def testMultiSDMolSupplier(self): + fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'FileParsers', 'test_data', + 'NCI_aids_few.sdf') + # fileN = "../FileParsers/test_data/NCI_aids_few.sdf" + sdSup = Chem.MultithreadedSDMolSupplier(fileN) + molNames = [ + "48", "78", "128", "163", "164", "170", "180", "186", "192", "203", "210", "211", "213", + "220", "229", "256" + ] + confusedMolNames = [] + i = 0 + for mol in sdSup: + if mol is not None: + confusedMolNames.append(mol.GetProp("_Name")) + i += 1 + self.assertTrue(len(molNames) == i) + self.assertTrue(sorted(confusedMolNames) == sorted(molNames)) + + # context manager + confusedMolNames = [] + i = 0 + with Chem.MultithreadedSDMolSupplier(fileN) as sdSup: + for mol in sdSup: + if mol is not None: + confusedMolNames.append(mol.GetProp("_Name")) + i += 1 + self.assertTrue(len(molNames) == i) + self.assertTrue(sorted(confusedMolNames) == sorted(molNames)) + + # NOTE these are disabled until we rewrite the code to construct a + # MultithreadedSDMolSupplier from a python stream + @unittest.skip("Skipping construction from stream") + def testMultiSDMolSupplierFromStream(self): + fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'FileParsers', 'test_data', + 'NCI_aids_few.sdf') + molNames = [ + "48", "78", "128", "163", "164", "170", "180", "186", "192", "203", "210", "211", "213", + "220", "229", "256" + ] + # try opening with streambuf + inf = open(fileN, 'rb') + if (inf): + gSup = Chem.SDMolSupplierFromStream(inf) + confusedMolNames = [] + i = 0 + for mol in gSup: + # print("!!",i,file=sys.stderr);sys.stderr.flush() + if (mol): + confusedMolNames.append(mol.GetProp("_Name")) + i += 1 + self.assertTrue(len(molNames) == i) + self.assertTrue(sorted(confusedMolNames) == sorted(molNames)) + # print("done!",file=sys.stderr);sys.stderr.flush() + # try opening with streambuf + fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'FileParsers', 'test_data', + 'NCI_aids_few.sdf.gz') + # try opening with gzip + inf = gzip.open(fileN) + if (inf): + gSup = Chem.SDMolSupplierFromStream(inf) + confusedMolNames = [] + i = 0 + for mol in gSup: + # print("!",i,file=sys.stderr);sys.stderr.flush() + if (mol): + confusedMolNames.append(mol.GetProp("_Name")) + i += 1 + self.assertTrue(len(molNames) == i) + self.assertTrue(sorted(confusedMolNames) == sorted(molNames)) - def testMultiSDMolSupplier(self): - fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', - 'FileParsers', 'test_data', 'NCI_aids_few.sdf') - # fileN = "../FileParsers/test_data/NCI_aids_few.sdf" - sdSup = Chem.MultithreadedSDMolSupplier(fileN) - molNames = ["48", "78", "128", "163", "164", "170", "180", "186", - "192", "203", "210", "211", "213", "220", "229", "256"] - confusedMolNames = [] - i = 0 - for mol in sdSup: - if mol is not None: - confusedMolNames.append(mol.GetProp("_Name")) - i += 1 - self.assertTrue(len(molNames) == i) - self.assertTrue(sorted(confusedMolNames) == sorted(molNames)) - - # context manager - confusedMolNames = [] - i = 0 - with Chem.MultithreadedSDMolSupplier(fileN) as sdSup: - for mol in sdSup: - if mol is not None: - confusedMolNames.append(mol.GetProp("_Name")) - i += 1 - self.assertTrue(len(molNames) == i) - self.assertTrue(sorted(confusedMolNames) == sorted(molNames)) - - - - - - # NOTE these are disabled until we rewrite the code to construct a - # MultithreadedSDMolSupplier from a python stream - @unittest.skip("Skipping construction from stream") - def testMultiSDMolSupplierFromStream(self): - fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', - 'FileParsers', 'test_data', 'NCI_aids_few.sdf') - molNames = ["48", "78", "128", "163", "164", "170", "180", "186", - "192", "203", "210", "211", "213", "220", "229", "256"] - # try opening with streambuf - inf = open(fileN,'rb') - if(inf): - gSup = Chem.SDMolSupplierFromStream(inf) - confusedMolNames = [] - i = 0 - for mol in gSup: - # print("!!",i,file=sys.stderr);sys.stderr.flush() - if(mol): - confusedMolNames.append(mol.GetProp("_Name")) - i += 1 - self.assertTrue(len(molNames) == i) - self.assertTrue(sorted(confusedMolNames) == sorted(molNames)) - # print("done!",file=sys.stderr);sys.stderr.flush() - # try opening with streambuf - fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'FileParsers', 'test_data', - 'NCI_aids_few.sdf.gz') - # try opening with gzip - inf = gzip.open(fileN) - if(inf): - gSup = Chem.SDMolSupplierFromStream(inf) - confusedMolNames = [] - i = 0 - for mol in gSup: - # print("!",i,file=sys.stderr);sys.stderr.flush() - if(mol): - confusedMolNames.append(mol.GetProp("_Name")) - i += 1 - self.assertTrue(len(molNames) == i) - self.assertTrue(sorted(confusedMolNames) == sorted(molNames)) - - - - if __name__ == '__main__': - print("Testing Smiles and SD MultithreadedMolSupplier") - unittest.main() + print("Testing Smiles and SD MultithreadedMolSupplier") + unittest.main() diff --git a/Code/GraphMol/Wrap/testPropertyLists.py b/Code/GraphMol/Wrap/testPropertyLists.py index e753b94f6..43865e258 100644 --- a/Code/GraphMol/Wrap/testPropertyLists.py +++ b/Code/GraphMol/Wrap/testPropertyLists.py @@ -2,15 +2,16 @@ # Copyright (C) 2019 Greg Landrum # All Rights Reserved # -from rdkit import RDConfig, rdBase -from rdkit import Chem -from io import BytesIO import unittest +from io import BytesIO + +from rdkit import Chem, RDConfig, rdBase class TestCase(unittest.TestCase): - def setUp(self): - self.sdf = b""" + + def setUp(self): + self.sdf = b""" RDKit 2D 3 3 0 0 0 0 0 0 0 0999 V2000 @@ -40,77 +41,75 @@ one n/a three [?] 2 2 ? $$$$""" - def testForwardSupplier(self): - sio = BytesIO(self.sdf) - suppl = Chem.ForwardSDMolSupplier(sio) - suppl.SetProcessPropertyLists(False) - m = next(suppl) - self.assertTrue(m.HasProp("atom.prop.AtomLabel")) - self.assertFalse(m.GetAtomWithIdx(0).HasProp("AtomLabel")) + def testForwardSupplier(self): + sio = BytesIO(self.sdf) + suppl = Chem.ForwardSDMolSupplier(sio) + suppl.SetProcessPropertyLists(False) + m = next(suppl) + self.assertTrue(m.HasProp("atom.prop.AtomLabel")) + self.assertFalse(m.GetAtomWithIdx(0).HasProp("AtomLabel")) - sio = BytesIO(self.sdf) - suppl = Chem.ForwardSDMolSupplier(sio) - self.assertTrue(suppl.GetProcessPropertyLists()) - m = next(suppl) - self.assertTrue(m.HasProp("atom.prop.AtomLabel")) - self.assertTrue(m.GetAtomWithIdx(0).HasProp("AtomLabel")) + sio = BytesIO(self.sdf) + suppl = Chem.ForwardSDMolSupplier(sio) + self.assertTrue(suppl.GetProcessPropertyLists()) + m = next(suppl) + self.assertTrue(m.HasProp("atom.prop.AtomLabel")) + self.assertTrue(m.GetAtomWithIdx(0).HasProp("AtomLabel")) - def testSupplier(self): - suppl = Chem.SDMolSupplier() - suppl.SetData(self.sdf) - suppl.SetProcessPropertyLists(False) - m = suppl[0] - self.assertFalse(suppl.GetProcessPropertyLists()) - self.assertTrue(m.HasProp("atom.prop.AtomLabel")) - self.assertFalse(m.GetAtomWithIdx(0).HasProp("AtomLabel")) + def testSupplier(self): + suppl = Chem.SDMolSupplier() + suppl.SetData(self.sdf) + suppl.SetProcessPropertyLists(False) + m = suppl[0] + self.assertFalse(suppl.GetProcessPropertyLists()) + self.assertTrue(m.HasProp("atom.prop.AtomLabel")) + self.assertFalse(m.GetAtomWithIdx(0).HasProp("AtomLabel")) - suppl.SetProcessPropertyLists(True) - m = suppl[0] - self.assertTrue(m.HasProp("atom.prop.AtomLabel")) - self.assertTrue(m.GetAtomWithIdx(0).HasProp("AtomLabel")) + suppl.SetProcessPropertyLists(True) + m = suppl[0] + self.assertTrue(m.HasProp("atom.prop.AtomLabel")) + self.assertTrue(m.GetAtomWithIdx(0).HasProp("AtomLabel")) - def testCreateLists(self): - suppl = Chem.SDMolSupplier() - suppl.SetData(self.sdf) - m = suppl[0] - self.assertTrue(m.GetAtomWithIdx(0).HasProp("NumHeavyNeighbors")) - m.ClearProp("atom.iprop.NumHeavyNeighbors") - self.assertFalse(m.HasProp("atom.iprop.NumHeavyNeighbors")) - Chem.CreateAtomIntPropertyList(m,"NumHeavyNeighbors") - self.assertTrue(m.HasProp("atom.iprop.NumHeavyNeighbors")) + def testCreateLists(self): + suppl = Chem.SDMolSupplier() + suppl.SetData(self.sdf) + m = suppl[0] + self.assertTrue(m.GetAtomWithIdx(0).HasProp("NumHeavyNeighbors")) + m.ClearProp("atom.iprop.NumHeavyNeighbors") + self.assertFalse(m.HasProp("atom.iprop.NumHeavyNeighbors")) + Chem.CreateAtomIntPropertyList(m, "NumHeavyNeighbors") + self.assertTrue(m.HasProp("atom.iprop.NumHeavyNeighbors")) - self.assertTrue(m.GetAtomWithIdx(0).HasProp("PartialCharge")) - m.ClearProp("atom.dprop.PartialCharge") - self.assertFalse(m.HasProp("atom.dprop.PartialCharge")) - Chem.CreateAtomDoublePropertyList(m,"PartialCharge") - self.assertTrue(m.HasProp("atom.dprop.PartialCharge")) + self.assertTrue(m.GetAtomWithIdx(0).HasProp("PartialCharge")) + m.ClearProp("atom.dprop.PartialCharge") + self.assertFalse(m.HasProp("atom.dprop.PartialCharge")) + Chem.CreateAtomDoublePropertyList(m, "PartialCharge") + self.assertTrue(m.HasProp("atom.dprop.PartialCharge")) - self.assertTrue(m.GetAtomWithIdx(0).HasProp("IsCarbon")) - m.ClearProp("atom.bprop.IsCarbon") - self.assertFalse(m.HasProp("atom.bprop.IsCarbon")) - Chem.CreateAtomBoolPropertyList(m,"IsCarbon") - self.assertTrue(m.HasProp("atom.bprop.IsCarbon")) + self.assertTrue(m.GetAtomWithIdx(0).HasProp("IsCarbon")) + m.ClearProp("atom.bprop.IsCarbon") + self.assertFalse(m.HasProp("atom.bprop.IsCarbon")) + Chem.CreateAtomBoolPropertyList(m, "IsCarbon") + self.assertTrue(m.HasProp("atom.bprop.IsCarbon")) - self.assertTrue(m.GetAtomWithIdx(0).HasProp("PartiallyMissing")) - m.ClearProp("atom.prop.PartiallyMissing") - self.assertFalse(m.HasProp("atom.prop.PartiallyMissing")) - Chem.CreateAtomStringPropertyList(m,"PartiallyMissing") - self.assertTrue(m.HasProp("atom.prop.PartiallyMissing")) - self.assertEqual(m.GetProp("atom.prop.PartiallyMissing"),"one n/a three") - Chem.CreateAtomStringPropertyList(m,"PartiallyMissing",missingValueMarker="?") - self.assertTrue(m.HasProp("atom.prop.PartiallyMissing")) - self.assertEqual(m.GetProp("atom.prop.PartiallyMissing"),"[?] one ? three") + self.assertTrue(m.GetAtomWithIdx(0).HasProp("PartiallyMissing")) + m.ClearProp("atom.prop.PartiallyMissing") + self.assertFalse(m.HasProp("atom.prop.PartiallyMissing")) + Chem.CreateAtomStringPropertyList(m, "PartiallyMissing") + self.assertTrue(m.HasProp("atom.prop.PartiallyMissing")) + self.assertEqual(m.GetProp("atom.prop.PartiallyMissing"), "one n/a three") + Chem.CreateAtomStringPropertyList(m, "PartiallyMissing", missingValueMarker="?") + self.assertTrue(m.HasProp("atom.prop.PartiallyMissing")) + self.assertEqual(m.GetProp("atom.prop.PartiallyMissing"), "[?] one ? three") - def testGithubPR4160(self): - # this shouldn't fail with a bad any cast anymore - from rdkit import Chem - m = Chem.MolFromSmiles("CC") - for a in m.GetAtoms(): - a.SetIntProp("foo", 1) - Chem.CreateAtomIntPropertyList(m, "foo") - + def testGithubPR4160(self): + # this shouldn't fail with a bad any cast anymore + from rdkit import Chem + m = Chem.MolFromSmiles("CC") + for a in m.GetAtoms(): + a.SetIntProp("foo", 1) + Chem.CreateAtomIntPropertyList(m, "foo") if __name__ == '__main__': unittest.main() - diff --git a/Code/GraphMol/Wrap/testSGroups.py b/Code/GraphMol/Wrap/testSGroups.py index 3c812abcb..1d303c363 100644 --- a/Code/GraphMol/Wrap/testSGroups.py +++ b/Code/GraphMol/Wrap/testSGroups.py @@ -8,13 +8,13 @@ # of the RDKit source tree. # -from rdkit import Chem -from rdkit import Geometry -from rdkit.Chem import RDConfig import os import sys import unittest +from rdkit import Chem, Geometry +from rdkit.Chem import RDConfig + class TestCase(unittest.TestCase): diff --git a/Code/GraphMol/Wrap/testThreads.py b/Code/GraphMol/Wrap/testThreads.py index 846854a9f..2388a887e 100644 --- a/Code/GraphMol/Wrap/testThreads.py +++ b/Code/GraphMol/Wrap/testThreads.py @@ -1,7 +1,8 @@ -import sys -from rdkit import Chem -import threading import multiprocessing +import sys +import threading + +from rdkit import Chem # this just tests some threading stuff to ensure it doesn't crash with python # releasing the GIL smarts are recursive... @@ -10,39 +11,39 @@ ref_mol = Chem.MolFromMolBlock(ref_sdf) core_smarts = '[#6]-!@[#6]-!@[#8]-!@[#6]:1:[#6](-!@[#6]#!@[#7]):[#6](-!@[#7]):[#6]:[#6](-!@[#7]-!@[#6](-!@[#6]-!@[#6]:2:[#6]:[#6]:[#6]:[#6]:[#6]:2)=!@[#8]):[#7]:1' if ref_mol is None: - raise ValueError('Bad ref structure') + raise ValueError('Bad ref structure') core_mol = Chem.MolFromSmarts(core_smarts) if core_mol is None: - raise ValueError('Bad core structure') + raise ValueError('Bad core structure') expected = {} def runner(func, args): - if args: - res = getattr(ref_mol, func)(args) - else: - res = getattr(ref_mol, func)() - if func in expected: - assert res == expected[func], "Got %r expected %r" % (ers, expected[func]) - return res + if args: + res = getattr(ref_mol, func)(args) + else: + res = getattr(ref_mol, func)() + if func in expected: + assert res == expected[func], "Got %r expected %r" % (ers, expected[func]) + return res funcs = ["GetSubstructMatch", "GetSubstructMatches", "HasSubstructMatch"] # get the expected results from the non-thread version for func in funcs: - expected[func] = runner(func, core_mol) + expected[func] = runner(func, core_mol) nthreads = int(multiprocessing.cpu_count() * 100 / 4) # 100 threads per cpu threads = [] for i in range(0, nthreads): - for func in funcs: - t = threading.Thread(target=runner, args=(func, core_mol)) - t.start() - threads.append(t) - t = threading.Thread(target=runner, args=("ToBinary", None)) + for func in funcs: + t = threading.Thread(target=runner, args=(func, core_mol)) t.start() threads.append(t) + t = threading.Thread(target=runner, args=("ToBinary", None)) + t.start() + threads.append(t) for t in threads: - t.join() + t.join() diff --git a/Code/GraphMol/Wrap/testTrajectory.py b/Code/GraphMol/Wrap/testTrajectory.py index e4b5c3a44..7ef1240bb 100644 --- a/Code/GraphMol/Wrap/testTrajectory.py +++ b/Code/GraphMol/Wrap/testTrajectory.py @@ -1,12 +1,11 @@ - -from rdkit import Chem -from rdkit.Chem import ChemicalForceFields, rdtrajectory -from rdkit.Chem.rdtrajectory import Snapshot, \ - Trajectory, ReadAmberTrajectory, ReadGromosTrajectory -import os, sys +import os +import sys import unittest -from rdkit import RDConfig +from rdkit import Chem, RDConfig +from rdkit.Chem import ChemicalForceFields, rdtrajectory +from rdkit.Chem.rdtrajectory import (ReadAmberTrajectory, ReadGromosTrajectory, + Snapshot, Trajectory) def feq(v1, v2, tol=1.0e-4): diff --git a/Code/GraphMol/Wrap/test_cdxml.py b/Code/GraphMol/Wrap/test_cdxml.py index 0c877547f..ec14b4c96 100644 --- a/Code/GraphMol/Wrap/test_cdxml.py +++ b/Code/GraphMol/Wrap/test_cdxml.py @@ -20,9 +20,11 @@ from io import StringIO from rdkit import Chem + class TestCase(unittest.TestCase): + def test_cdxml(self): - cdxml=""" + cdxml = """ """ mols = Chem.MolsFromCDXML(cdxml) self.assertEqual(len(mols), 1) - self.assertEqual(Chem.MolToSmiles(mols[0]), "CC(C)(C)OC(=O)C1CCCCCC1"); - + self.assertEqual(Chem.MolToSmiles(mols[0]), "CC(C)(C)OC(=O)C1CCCCCC1") + + if __name__ == '__main__': if "RDTESTCASE" in os.environ: suite = unittest.TestSuite() diff --git a/Code/GraphMol/Wrap/test_data/do_smiles.bomb.py b/Code/GraphMol/Wrap/test_data/do_smiles.bomb.py index 108b75df3..bf56c1fa5 100755 --- a/Code/GraphMol/Wrap/test_data/do_smiles.bomb.py +++ b/Code/GraphMol/Wrap/test_data/do_smiles.bomb.py @@ -1,6 +1,5 @@ - - import re + splitExpr = re.compile('[\t ]') from Chem import rdmol diff --git a/Code/GraphMol/Wrap/test_data/do_smiles.py b/Code/GraphMol/Wrap/test_data/do_smiles.py index 46a076c7d..16fec8560 100755 --- a/Code/GraphMol/Wrap/test_data/do_smiles.py +++ b/Code/GraphMol/Wrap/test_data/do_smiles.py @@ -1,5 +1,5 @@ - import re + splitExpr = re.compile('[\t ]') from rdkit import Chem diff --git a/Code/JavaWrappers/make_templates.py b/Code/JavaWrappers/make_templates.py index 44db1e2dc..01d2fd6cd 100644 --- a/Code/JavaWrappers/make_templates.py +++ b/Code/JavaWrappers/make_templates.py @@ -7,8 +7,8 @@ becomes this %template(OnBitProjSimilarityEBV) OnBitProjSimilarity; """ - import re + template_match = re.compile(r"""template\s*\<(.+)\>\s*.*\s+(\w+)\s*\(.*""") diff --git a/Code/JavaWrappers/parse_doxy_html.py b/Code/JavaWrappers/parse_doxy_html.py index c1f65fbef..5f1a5e716 100644 --- a/Code/JavaWrappers/parse_doxy_html.py +++ b/Code/JavaWrappers/parse_doxy_html.py @@ -9,11 +9,11 @@ files don't have that section and for now this program can't handle them without by hand. """ - -from BeautifulSoup import * import os import re +from BeautifulSoup import * + def list_class_files(dir): return [ @@ -306,8 +306,8 @@ Notes: ''' _renote = re.compile(r'^\w*(Notes?[:]?)(?:.*?$)(.*?)((^\w)|\Z)', flags=(re.M | re.I | re.DOTALL)) -_reparam = re.compile(r'^\w*(Param(?:eter)?s?[:]?)(?:.*?$)(.*?)((^\w)|\Z)', flags=(re.M | re.I | - re.DOTALL)) +_reparam = re.compile(r'^\w*(Param(?:eter)?s?[:]?)(?:.*?$)(.*?)((^\w)|\Z)', + flags=(re.M | re.I | re.DOTALL)) _rereturn = re.compile(r'^\w*(Returns[:])(?:.*?$)(.*?)((^\w)|\Z)', flags=(re.M | re.I | re.DOTALL)) _rereturn2 = re.compile(r'^\w*(Returns)\s+(.*?)((^\w)|\Z)', flags=(re.M | re.I | re.DOTALL)) _reusage = re.compile(r'^\w*(Usage[:]?)(?:.*?$)(.*?)((^\w)|\Z)', flags=(re.M | re.I | re.DOTALL)) diff --git a/Code/ML/InfoTheory/Wrap/testRanker.py b/Code/ML/InfoTheory/Wrap/testRanker.py index aacb8c580..c624cb445 100644 --- a/Code/ML/InfoTheory/Wrap/testRanker.py +++ b/Code/ML/InfoTheory/Wrap/testRanker.py @@ -1,13 +1,12 @@ -import unittest -import numpy -import os import io - +import os import pickle +import unittest -from rdkit import RDConfig, RDRandom +import numpy + +from rdkit import DataStructs, RDConfig, RDRandom from rdkit.ML.InfoTheory import rdInfoTheory as rdit -from rdkit import DataStructs def feq(a, b, tol=1e-4): diff --git a/Code/MinimalLib/simple.py b/Code/MinimalLib/simple.py index 725f42cf7..45365e0de 100644 --- a/Code/MinimalLib/simple.py +++ b/Code/MinimalLib/simple.py @@ -1,4 +1,5 @@ import ctypes + rdk = ctypes.cdll.LoadLibrary('./lib/librdkitcffi.so') rdk.get_smiles.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_char_p] rdk.get_smiles.restype = ctypes.c_char_p diff --git a/Code/Numerics/Alignment/Wrap/testAlignment.py b/Code/Numerics/Alignment/Wrap/testAlignment.py index 7cd08d429..6819a9f8b 100644 --- a/Code/Numerics/Alignment/Wrap/testAlignment.py +++ b/Code/Numerics/Alignment/Wrap/testAlignment.py @@ -4,13 +4,14 @@ Replaced numpy.oldnumeric with numpy methods - Jan 2015, PGedeck """ #pylint: disable=E1101,C0111,R0904 +import copy +import math +import unittest + +import numpy as np import rdkit.Numerics.rdAlignment as rdAlg from rdkit import Geometry -import unittest -import numpy as np -import math -import copy def lstFeq(l1, l2, tol=1.e-4): @@ -77,9 +78,9 @@ class TestCase(unittest.TestCase): refPts = np.array([[-math.cos(math.pi / 6), -math.sin(math.pi / 6), 0.0], [math.cos(math.pi / 6), -math.sin(math.pi / 6), 0.0], [0.0, 1.0, 0.0]], float) - prbPts = np.array([[-2 * math.sin(math.pi / 6) + 3.0, 2 * math.cos(math.pi / 6), 4.0], - [-2 * math.sin(math.pi / 6) + 3.0, -2 * math.cos(math.pi / 6), 4.0], - [5.0, 0.0, 4.0]], float) + prbPts = np.array( + [[-2 * math.sin(math.pi / 6) + 3.0, 2 * math.cos(math.pi / 6), 4.0], + [-2 * math.sin(math.pi / 6) + 3.0, -2 * math.cos(math.pi / 6), 4.0], [5.0, 0.0, 4.0]], float) res = rdAlg.GetAlignmentTransform(refPts, prbPts) self.assertTrue(feq(res[0], 3.0)) target = [[-1.732, -1., 0.], [1.732, -1., 0.], [0., 2., 0.]] @@ -105,13 +106,11 @@ class TestCase(unittest.TestCase): self.assertTrue(feq(res[0], 4.8)) def test3tetra(self): - refPts = np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]], - float) + refPts = np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]], float) prbPts = np.array([[2.0, 2.0, 3.0], [3.0, 2.0, 3.0], [2.0, 3.0, 3.0]], float) self.assertRaises(ValueError, lambda: rdAlg.GetAlignmentTransform(refPts, prbPts)) - prbPts = np.array([[2.0, 2.0, 3.0], [3.0, 2.0, 3.0], [2.0, 3.0, 3.0], [2.0, 2.0, 4.0]], - float) + prbPts = np.array([[2.0, 2.0, 3.0], [3.0, 2.0, 3.0], [2.0, 3.0, 3.0], [2.0, 2.0, 4.0]], float) res = rdAlg.GetAlignmentTransform(refPts, prbPts) self.assertTrue(feq(res[0], 0.0)) @@ -123,8 +122,7 @@ class TestCase(unittest.TestCase): self.assertTrue(feq(res[0], 0.0)) # test reflection - prbPts = np.array([[2.0, 2.0, 3.0], [3.0, 2.0, 3.0], [2.0, 2.0, 4.0], [2.0, 3.0, 3.0]], - float) + prbPts = np.array([[2.0, 2.0, 3.0], [3.0, 2.0, 3.0], [2.0, 2.0, 4.0], [2.0, 3.0, 3.0]], float) res = rdAlg.GetAlignmentTransform(refPts, prbPts, wts) self.assertTrue(feq(res[0], 1.0)) @@ -137,27 +135,35 @@ class TestCase(unittest.TestCase): cnt += 1 def test4points(self): - refPts = (Geometry.Point3D(0.0, 0.0, 0.0), - Geometry.Point3D(1.0, 0.0, 0.0), - Geometry.Point3D(0.0, 1.0, 0.0), - Geometry.Point3D(0.0, 0.0, 1.0), ) - prbPts = (Geometry.Point3D(2.0, 2.0, 3.0), - Geometry.Point3D(3.0, 2.0, 3.0), - Geometry.Point3D(2.0, 3.0, 3.0), - Geometry.Point3D(2.0, 2.0, 4.0), ) + refPts = ( + Geometry.Point3D(0.0, 0.0, 0.0), + Geometry.Point3D(1.0, 0.0, 0.0), + Geometry.Point3D(0.0, 1.0, 0.0), + Geometry.Point3D(0.0, 0.0, 1.0), + ) + prbPts = ( + Geometry.Point3D(2.0, 2.0, 3.0), + Geometry.Point3D(3.0, 2.0, 3.0), + Geometry.Point3D(2.0, 3.0, 3.0), + Geometry.Point3D(2.0, 2.0, 4.0), + ) res = rdAlg.GetAlignmentTransform(refPts, prbPts) self.assertTrue(feq(res[0], 0.0)) def test5errorHandling(self): - refPts = (Geometry.Point3D(0.0, 0.0, 0.0), - Geometry.Point3D(1.0, 0.0, 0.0), - Geometry.Point3D(0.0, 1.0, 0.0), - Geometry.Point3D(0.0, 0.0, 1.0), ) - prbPts = (1, - 2, - 3, - 4, ) + refPts = ( + Geometry.Point3D(0.0, 0.0, 0.0), + Geometry.Point3D(1.0, 0.0, 0.0), + Geometry.Point3D(0.0, 1.0, 0.0), + Geometry.Point3D(0.0, 0.0, 1.0), + ) + prbPts = ( + 1, + 2, + 3, + 4, + ) self.assertRaises(ValueError, lambda: rdAlg.GetAlignmentTransform(refPts, prbPts)) prbPts = () self.assertRaises(ValueError, lambda: rdAlg.GetAlignmentTransform(refPts, prbPts)) @@ -165,10 +171,12 @@ class TestCase(unittest.TestCase): prbPts = 1 self.assertRaises(ValueError, lambda: rdAlg.GetAlignmentTransform(refPts, prbPts)) - prbPts = (Geometry.Point3D(2.0, 2.0, 3.0), - Geometry.Point3D(3.0, 2.0, 3.0), - Geometry.Point3D(2.0, 3.0, 3.0), - (2.0, 2.0, 5.0), ) + prbPts = ( + Geometry.Point3D(2.0, 2.0, 3.0), + Geometry.Point3D(3.0, 2.0, 3.0), + Geometry.Point3D(2.0, 3.0, 3.0), + (2.0, 2.0, 5.0), + ) self.assertRaises(ValueError, lambda: rdAlg.GetAlignmentTransform(refPts, prbPts)) diff --git a/Code/SimDivPickers/Wrap/testMaxMin.py b/Code/SimDivPickers/Wrap/testMaxMin.py index 7507321e5..eb8c94121 100644 --- a/Code/SimDivPickers/Wrap/testMaxMin.py +++ b/Code/SimDivPickers/Wrap/testMaxMin.py @@ -1,7 +1,8 @@ - -from rdkit.SimDivFilters import rdSimDivPickers as rdsimdiv import numpy + from rdkit import RDRandom +from rdkit.SimDivFilters import rdSimDivPickers as rdsimdiv + RDRandom.seed(23) pkr = rdsimdiv.MaxMinPicker() diff --git a/Code/SimDivPickers/Wrap/testPickers.py b/Code/SimDivPickers/Wrap/testPickers.py index 9705139a4..5ad071e6d 100755 --- a/Code/SimDivPickers/Wrap/testPickers.py +++ b/Code/SimDivPickers/Wrap/testPickers.py @@ -1,10 +1,12 @@ -from rdkit import RDConfig -import unittest, os -from rdkit.SimDivFilters import rdSimDivPickers -from rdkit.DataManip.Metric import rdMetricMatrixCalc as rdmmc -from rdkit import DataStructs -import numpy +import os import random +import unittest + +import numpy + +from rdkit import DataStructs, RDConfig +from rdkit.DataManip.Metric import rdMetricMatrixCalc as rdmmc +from rdkit.SimDivFilters import rdSimDivPickers class TestCase(unittest.TestCase): @@ -240,13 +242,17 @@ class TestCase(unittest.TestCase): fps.append(fp) mmp = rdSimDivPickers.MaxMinPicker() ids = list(mmp.LazyBitVectorPick(fps, len(fps), 20, seed=42)) - self.assertEqual(ids,[374,720,690,339,875,842,404,725,120,385,115,868,630, - 881,516,497,412,718,869,407]) + self.assertEqual(ids, [ + 374, 720, 690, 339, 875, 842, 404, 725, 120, 385, 115, 868, 630, 881, 516, 497, 412, 718, 869, + 407 + ]) ids = list( mmp.LazyBitVectorPick(fps, len(fps), 20, firstPicks=[374, 720, 690, 339, 875], seed=42)) - self.assertEqual(ids,[374,720,690,339,875,842,404,725,120,385,115,868,630, - 881,516,497,412,718,869,407]) + self.assertEqual(ids, [ + 374, 720, 690, 339, 875, 842, 404, 725, 120, 385, 115, 868, 630, 881, 516, 497, 412, 718, 869, + 407 + ]) def testBitVectorMaxMin4(self): # threshold tests @@ -259,8 +265,10 @@ class TestCase(unittest.TestCase): fps.append(fp) mmp = rdSimDivPickers.MaxMinPicker() ids, threshold = mmp.LazyBitVectorPickWithThreshold(fps, len(fps), 20, -1.0, seed=42) - self.assertEqual(list(ids),[374,720,690,339,875,842,404,725,120,385,115,868,630, - 881,516,497,412,718,869,407]) + self.assertEqual(list(ids), [ + 374, 720, 690, 339, 875, 842, 404, 725, 120, 385, 115, 868, 630, 881, 516, 497, 412, 718, 869, + 407 + ]) self.assertAlmostEqual(threshold, 0.8977, 4) diff --git a/Code/cmake/Modules/fixup_coverage.py b/Code/cmake/Modules/fixup_coverage.py index a10dd8bef..ff3e62e83 100644 --- a/Code/cmake/Modules/fixup_coverage.py +++ b/Code/cmake/Modules/fixup_coverage.py @@ -4,7 +4,9 @@ It replaces the paths with the ones from the source tree n.b. if a file with the same name (i.e. sln.yy) is found twice in the source tree, this will break""" -import os, sys +import os +import sys + source_dir, info_file = sys.argv[1:3] print(source_dir, info_file) diff --git a/Contrib/AtomAtomSimilarity/AtomAtomPathSimilarity.py b/Contrib/AtomAtomSimilarity/AtomAtomPathSimilarity.py index ffac8d606..1f12aa9e4 100644 --- a/Contrib/AtomAtomSimilarity/AtomAtomPathSimilarity.py +++ b/Contrib/AtomAtomSimilarity/AtomAtomPathSimilarity.py @@ -1,18 +1,16 @@ -# This is a reference implementation of the Atom-Atom-Path (AAP) similarity metric +# This is a reference implementation of the Atom-Atom-Path (AAP) similarity metric # from Gobbi et al, J. Cheminf. (2015) 7:11. https://doi.org/10.1186/s13321-015-0056-8 # # Original author: Richard Hall # -import numpy import time import unittest +import numpy from scipy.optimize import linear_sum_assignment -from rdkit import Chem -from rdkit.Chem import AllChem -from rdkit.Chem import rdmolops -from rdkit import DataStructs +from rdkit import Chem, DataStructs +from rdkit.Chem import AllChem, rdmolops from rdkit.Chem.Fingerprints import FingerprintMols _BK_ = { @@ -128,6 +126,8 @@ def getpathintegers(m1, uptolength=7): val4 = val2 pathuniqueint = val4 pathintegers[idx].append(pathuniqueint) + + #sorted lists allow for a quicker comparison algorithm for p in pathintegers.values(): p.sort() @@ -260,8 +260,10 @@ def test1(): def test2(): '''generate a matrix molecules from the Gobbi source AAPathComparator2Test.java''' - smileslist = ["*", "C", "N", "CCO", "CC(=O)N", "c1ccccc1", "c1ncncc1", "c1[nH]ccc1", - "c1ncncc1CC(=O)N", "c1ccccc1c1ncncc1"] + smileslist = [ + "*", "C", "N", "CCO", "CC(=O)N", "c1ccccc1", "c1ncncc1", "c1[nH]ccc1", "c1ncncc1CC(=O)N", + "c1ccccc1c1ncncc1" + ] sims = [] for s1 in smileslist: for s2 in smileslist: @@ -350,33 +352,39 @@ class TestAtomAtomPathSimilarity(unittest.TestCase): self.assertEqual(getcommon([2, 2, 2, 3, 3, 3], 6, [1, 2, 3, 3, 4, 5], 6), 3) def test_pathintegers(self): - self.assertEqual(test1(), [ - {0: []}, {0: [1160, 2270], - 1: [2260, 30692], - 2: [1145, 33761]}, {0: [752, 1150, 1155, 3826, 38221, 43791], - 1: [1145, 1150, 1596, 4670, 32641, 38211], - 2: [1145, 1155, 5785, 32646, 43786, 65173]} - ]) + self.assertEqual(test1(), [{ + 0: [] + }, { + 0: [1160, 2270], + 1: [2260, 30692], + 2: [1145, 33761] + }, { + 0: [752, 1150, 1155, 3826, 38221, 43791], + 1: [1145, 1150, 1596, 4670, 32641, 38211], + 2: [1145, 1155, 5785, 32646, 43786, 65173] + }]) def test_AAPathComparator2Test(self): - self.assertEqual( - test2(), - ['1.0000', '0.0000', '0.0000', '0.0000', '0.0000', '0.0000', '0.0000', '0.0000', '0.0000', - '0.0000', '0.0000', '1.0000', '0.0000', '0.0345', '0.0182', '0.0000', '0.0000', '0.0000', - '0.0017', '0.0000', '0.0000', '0.0000', '1.0000', '0.0000', '0.0182', '0.0000', '0.0000', - '0.0000', '0.0020', '0.0000', '0.0000', '0.0345', '0.0000', '1.0000', '0.1126', '0.0000', - '0.0000', '0.0000', '0.0088', '0.0000', '0.0000', '0.0182', '0.0182', '0.1126', '1.0000', - '0.0000', '0.0000', '0.0000', '0.0336', '0.0000', '0.0000', '0.0000', '0.0000', '0.0000', - '0.0000', '1.0000', '0.0373', '0.0645', '0.0148', '0.0869', '0.0000', '0.0000', '0.0000', - '0.0000', '0.0000', '0.0373', '1.0000', '0.1101', '0.1767', '0.0869', '0.0000', '0.0000', - '0.0000', '0.0000', '0.0000', '0.0645', '0.1101', '1.0000', '0.0387', '0.0219', '0.0000', - '0.0017', '0.0020', '0.0088', '0.0336', '0.0148', '0.1767', '0.0387', '1.0000', '0.0869', - '0.0000', '0.0000', '0.0000', '0.0000', '0.0000', '0.0869', '0.0869', '0.0219', '0.0869', - '1.0000']) + self.assertEqual(test2(), [ + '1.0000', '0.0000', '0.0000', '0.0000', '0.0000', '0.0000', '0.0000', '0.0000', '0.0000', + '0.0000', '0.0000', '1.0000', '0.0000', '0.0345', '0.0182', '0.0000', '0.0000', '0.0000', + '0.0017', '0.0000', '0.0000', '0.0000', '1.0000', '0.0000', '0.0182', '0.0000', '0.0000', + '0.0000', '0.0020', '0.0000', '0.0000', '0.0345', '0.0000', '1.0000', '0.1126', '0.0000', + '0.0000', '0.0000', '0.0088', '0.0000', '0.0000', '0.0182', '0.0182', '0.1126', '1.0000', + '0.0000', '0.0000', '0.0000', '0.0336', '0.0000', '0.0000', '0.0000', '0.0000', '0.0000', + '0.0000', '1.0000', '0.0373', '0.0645', '0.0148', '0.0869', '0.0000', '0.0000', '0.0000', + '0.0000', '0.0000', '0.0373', '1.0000', '0.1101', '0.1767', '0.0869', '0.0000', '0.0000', + '0.0000', '0.0000', '0.0000', '0.0645', '0.1101', '1.0000', '0.0387', '0.0219', '0.0000', + '0.0017', '0.0020', '0.0088', '0.0336', '0.0148', '0.1767', '0.0387', '1.0000', '0.0869', + '0.0000', '0.0000', '0.0000', '0.0000', '0.0000', '0.0869', '0.0869', '0.0219', '0.0869', + '1.0000' + ]) def test_tableS1(self): - self.assertEqual(test3(), ['1.00', '0.19', '0.06', '0.09', '0.19', '1.00', '0.12', '0.05', - '0.06', '0.12', '1.00', '0.15', '0.09', '0.05', '0.15', '1.00']) + self.assertEqual(test3(), [ + '1.00', '0.19', '0.06', '0.09', '0.19', '1.00', '0.12', '0.05', '0.06', '0.12', '1.00', + '0.15', '0.09', '0.05', '0.15', '1.00' + ]) if __name__ == "__main__": diff --git a/Contrib/CalcLigRMSD/CalcLigRMSD.py b/Contrib/CalcLigRMSD/CalcLigRMSD.py index 149ac5717..a1488edfa 100644 --- a/Contrib/CalcLigRMSD/CalcLigRMSD.py +++ b/Contrib/CalcLigRMSD/CalcLigRMSD.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2021 Carmen Esposito +# Copyright (C) 2021 Carmen Esposito # # @@ All Rights Reserved @@ # This file is part of the RDKit. @@ -7,13 +7,14 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -from rdkit import Chem -from rdkit.Chem import rdFMCS import numpy as np +from rdkit import Chem +from rdkit.Chem import rdFMCS -def CalcLigRMSD(lig1, lig2, rename_lig2 = True, output_filename="tmp.pdb"): - """ + +def CalcLigRMSD(lig1, lig2, rename_lig2=True, output_filename="tmp.pdb"): + """ Calculate the Root-mean-square deviation (RMSD) between two prealigned ligands, even when atom names between the two ligands are not matching. The symmetry of the molecules is taken into consideration (e.g. tri-methyl groups). @@ -36,41 +37,40 @@ def CalcLigRMSD(lig1, lig2, rename_lig2 = True, output_filename="tmp.pdb"): rmsd : float Root-mean-square deviation between the two input molecules """ - - # Exclude hydrogen atoms from the RMSD calculation - lig1 = Chem.RemoveHs(lig1) - lig2 = Chem.RemoveHs(lig2) - # Extract coordinates - coordinates_lig2 = lig2.GetConformer().GetPositions() - coordinates_lig1 = lig1.GetConformer().GetPositions() - # Calculate the RMSD between the MCS of lig1 and lig2 (useful if e.g. the crystal structures has missing atoms) - res=rdFMCS.FindMCS([lig1,lig2]) - ref_mol = Chem.MolFromSmarts(res.smartsString) - # Match the ligands to the MCS - # For lig2, the molecular symmetry is considered: - # If 2 atoms are symmetric (3 and 4), two indeces combinations are printed out - # ((0,1,2,3,4), (0,1,2,4,3)) and stored in mas2_list - mas1 = list(lig1.GetSubstructMatch(ref_mol)) # match lig1 to MCS - mas2_list = lig2.GetSubstructMatches(ref_mol, uniquify =False) - # Reorder the coordinates of the ligands and calculate the RMSD between all possible symmetrical atom matches - coordinates_lig1 = coordinates_lig1[mas1] - list_rmsd = [] - for match1 in mas2_list: - coordinates_lig2_tmp = coordinates_lig2[list(match1)] - diff = coordinates_lig2_tmp - coordinates_lig1 - list_rmsd.append(np.sqrt((diff * diff).sum() / len(coordinates_lig2_tmp))) # rmsd - # Return the minimum RMSD - lig_rmsd = min(list_rmsd) - # Write out a PDB file with matched atom names - if rename_lig2: - mas2 = mas2_list[np.argmin(list_rmsd)] - correspondence_key2_item1 = dict(zip(mas2, mas1)) - atom_names_lig1 = [atom1.GetPDBResidueInfo().GetName() for atom1 in lig1.GetAtoms()] - lig1_ResName = lig1.GetAtoms()[0].GetPDBResidueInfo().GetResidueName() - for i, atom1 in enumerate(lig2.GetAtoms()): - atom1.GetPDBResidueInfo().SetResidueName(lig1_ResName) - if i in correspondence_key2_item1.keys(): - atom1.GetPDBResidueInfo().SetName(atom_names_lig1[correspondence_key2_item1[i]]) - Chem.MolToPDBFile(lig2, output_filename) - return lig_rmsd + # Exclude hydrogen atoms from the RMSD calculation + lig1 = Chem.RemoveHs(lig1) + lig2 = Chem.RemoveHs(lig2) + # Extract coordinates + coordinates_lig2 = lig2.GetConformer().GetPositions() + coordinates_lig1 = lig1.GetConformer().GetPositions() + # Calculate the RMSD between the MCS of lig1 and lig2 (useful if e.g. the crystal structures has missing atoms) + res = rdFMCS.FindMCS([lig1, lig2]) + ref_mol = Chem.MolFromSmarts(res.smartsString) + # Match the ligands to the MCS + # For lig2, the molecular symmetry is considered: + # If 2 atoms are symmetric (3 and 4), two indeces combinations are printed out + # ((0,1,2,3,4), (0,1,2,4,3)) and stored in mas2_list + mas1 = list(lig1.GetSubstructMatch(ref_mol)) # match lig1 to MCS + mas2_list = lig2.GetSubstructMatches(ref_mol, uniquify=False) + # Reorder the coordinates of the ligands and calculate the RMSD between all possible symmetrical atom matches + coordinates_lig1 = coordinates_lig1[mas1] + list_rmsd = [] + for match1 in mas2_list: + coordinates_lig2_tmp = coordinates_lig2[list(match1)] + diff = coordinates_lig2_tmp - coordinates_lig1 + list_rmsd.append(np.sqrt((diff * diff).sum() / len(coordinates_lig2_tmp))) # rmsd + # Return the minimum RMSD + lig_rmsd = min(list_rmsd) + # Write out a PDB file with matched atom names + if rename_lig2: + mas2 = mas2_list[np.argmin(list_rmsd)] + correspondence_key2_item1 = dict(zip(mas2, mas1)) + atom_names_lig1 = [atom1.GetPDBResidueInfo().GetName() for atom1 in lig1.GetAtoms()] + lig1_ResName = lig1.GetAtoms()[0].GetPDBResidueInfo().GetResidueName() + for i, atom1 in enumerate(lig2.GetAtoms()): + atom1.GetPDBResidueInfo().SetResidueName(lig1_ResName) + if i in correspondence_key2_item1.keys(): + atom1.GetPDBResidueInfo().SetName(atom_names_lig1[correspondence_key2_item1[i]]) + Chem.MolToPDBFile(lig2, output_filename) + return lig_rmsd diff --git a/Contrib/ChiralPairs/ChiralDescriptors.py b/Contrib/ChiralPairs/ChiralDescriptors.py index f01d69b22..7b1dbd674 100644 --- a/Contrib/ChiralPairs/ChiralDescriptors.py +++ b/Contrib/ChiralPairs/ChiralDescriptors.py @@ -1,19 +1,19 @@ # # Copyright (c) 2017, Novartis Institutes for BioMedical Research Inc. # All rights reserved. -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are -# met: +# met: # -# * Redistributions of source code must retain the above copyright +# * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided # with the distribution. -# * Neither the name of Novartis Institutes for BioMedical Research Inc. -# nor the names of its contributors may be used to endorse or promote +# * Neither the name of Novartis Institutes for BioMedical Research Inc. +# nor the names of its contributors may be used to endorse or promote # products derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS @@ -30,272 +30,298 @@ # # Created by Nadine Schneider & Peter Ertl, July 2017 - -from collections import defaultdict, Counter, namedtuple - -import seaborn as sns -import numpy as np import re +from collections import Counter, defaultdict, namedtuple + +import numpy as np +import seaborn as sns from numpy import linalg from rdkit import Chem from rdkit.Chem import AllChem from rdkit.Chem.Draw import rdMolDraw2D + # build an svg grid image to print -def _svgsToGrid(svgs, labels, svgsPerRow=4,molSize=(250,150),fontSize=12): - - matcher = re.compile(r'^(<.*>\n)(\n)(.*)',re.DOTALL) - hdr='' - ftr='' - rect='' - nRows = len(svgs)//svgsPerRow - if len(svgs)%svgsPerRow : nRows+=1 - blocks = ['']*(nRows*svgsPerRow) - labelSizeDist = fontSize*5 - fullSize=(svgsPerRow*(molSize[0]+molSize[0]/10.0),nRows*(molSize[1]+labelSizeDist)) +def _svgsToGrid(svgs, labels, svgsPerRow=4, molSize=(250, 150), fontSize=12): - count=0 - for svg,name in zip(svgs,labels): - h,r,b = matcher.match(svg).groups() - if hdr == '': - hdr = h.replace("width='{}px'".format(molSize[0]),"width='{}px'".format(fullSize[0])) - hdr = hdr.replace("height='{}px'".format(molSize[1]),"height='{}px'".format(fullSize[1])) - if rect == '': - rect = r - - tspanFmt = '{2}' - names = name.split('|') - legend = [] - legend.append(''.format(fontSize)) - legend.append(tspanFmt.format(molSize[0]/2., molSize[1]+fontSize*2, names[0])) - if len(names) > 1: - legend.append(tspanFmt.format(molSize[0]/2., molSize[1]+fontSize*3.5, names[1])) - legend.append('') - legend = '\n'.join(legend) + matcher = re.compile(r'^(<.*>\n)(\n)(.*)', re.DOTALL) + hdr = '' + ftr = '' + rect = '' + nRows = len(svgs) // svgsPerRow + if len(svgs) % svgsPerRow: + nRows += 1 + blocks = [''] * (nRows * svgsPerRow) + labelSizeDist = fontSize * 5 + fullSize = (svgsPerRow * (molSize[0] + molSize[0] / 10.0), nRows * (molSize[1] + labelSizeDist)) - blocks[count] = b + legend - count+=1 + count = 0 + for svg, name in zip(svgs, labels): + h, r, b = matcher.match(svg).groups() + if hdr == '': + hdr = h.replace("width='{}px'".format(molSize[0]), "width='{}px'".format(fullSize[0])) + hdr = hdr.replace("height='{}px'".format(molSize[1]), "height='{}px'".format(fullSize[1])) + if rect == '': + rect = r - for i,elem in enumerate(blocks): - row = i//svgsPerRow - col = i%svgsPerRow - elem = rect+elem - blocks[i] = '%s'%(col*(molSize[0]+molSize[0]/10.0),row*(molSize[1]+labelSizeDist),elem) - res = hdr + '\n'.join(blocks)+ftr - return res + tspanFmt = '{2}' + names = name.split('|') + legend = [] + legend.append( + ''.format( + fontSize)) + legend.append(tspanFmt.format(molSize[0] / 2., molSize[1] + fontSize * 2, names[0])) + if len(names) > 1: + legend.append(tspanFmt.format(molSize[0] / 2., molSize[1] + fontSize * 3.5, names[1])) + legend.append('') + legend = '\n'.join(legend) + + blocks[count] = b + legend + count += 1 + + for i, elem in enumerate(blocks): + row = i // svgsPerRow + col = i % svgsPerRow + elem = rect + elem + blocks[i] = '%s' % (col * + (molSize[0] + molSize[0] / 10.0), row * + (molSize[1] + labelSizeDist), elem) + res = hdr + '\n'.join(blocks) + ftr + return res def determineAtomSubstituents(atomID, mol, distanceMatrix, verbose=False): - atomPaths = distanceMatrix[atomID] - # determine the direct neighbors of the atom - neighbors = [n for n,i in enumerate(atomPaths) if i == 1] - # store the ids of the neighbors (substituents) - subs = defaultdict(list) - # track in how many substituents an atom is involved (can happen in rings) - sharedNeighbors = defaultdict(int) - # determine the max path length for each substituent - maxShell=defaultdict(int) - for n in neighbors: - subs[n].append(n) - sharedNeighbors[n]+=1 - maxShell[n]=0 - # second shell of neighbors - mindist=2 - # max distance from atom - maxdist=int(np.max(atomPaths)) - for d in range(mindist,maxdist+1): - if verbose: - print("Shell: ",d) - newShell = [n for n,i in enumerate(atomPaths) if i == d] - for aidx in newShell: - if verbose: - print("Atom ", aidx," in shell ",d) - atom = mol.GetAtomWithIdx(aidx) - # find neighbors of the current atom that are part of the substituent already - for n in atom.GetNeighbors(): - nidx = n.GetIdx() - for k,v in subs.items(): - # is the neighbor in the substituent and is not inthe same shell as the current atom - # and we haven't added the current atom already then put it in the correct substituent list - if nidx in v and nidx not in newShell and aidx not in v: - subs[k].append(aidx) - sharedNeighbors[aidx]+=1 - maxShell[k]=d - if verbose: - print("Atom ",aidx," assigned to ",nidx) + atomPaths = distanceMatrix[atomID] + # determine the direct neighbors of the atom + neighbors = [n for n, i in enumerate(atomPaths) if i == 1] + # store the ids of the neighbors (substituents) + subs = defaultdict(list) + # track in how many substituents an atom is involved (can happen in rings) + sharedNeighbors = defaultdict(int) + # determine the max path length for each substituent + maxShell = defaultdict(int) + for n in neighbors: + subs[n].append(n) + sharedNeighbors[n] += 1 + maxShell[n] = 0 + # second shell of neighbors + mindist = 2 + # max distance from atom + maxdist = int(np.max(atomPaths)) + for d in range(mindist, maxdist + 1): if verbose: - print(subs) - print(sharedNeighbors) - - return subs, sharedNeighbors, maxShell + print("Shell: ", d) + newShell = [n for n, i in enumerate(atomPaths) if i == d] + for aidx in newShell: + if verbose: + print("Atom ", aidx, " in shell ", d) + atom = mol.GetAtomWithIdx(aidx) + # find neighbors of the current atom that are part of the substituent already + for n in atom.GetNeighbors(): + nidx = n.GetIdx() + for k, v in subs.items(): + # is the neighbor in the substituent and is not inthe same shell as the current atom + # and we haven't added the current atom already then put it in the correct substituent list + if nidx in v and nidx not in newShell and aidx not in v: + subs[k].append(aidx) + sharedNeighbors[aidx] += 1 + maxShell[k] = d + if verbose: + print("Atom ", aidx, " assigned to ", nidx) + if verbose: + print(subs) + print(sharedNeighbors) + + return subs, sharedNeighbors, maxShell + def _getSizeOfSubstituents(sub, sharedNeighbors, weighdownShared=True): - if weighdownShared: - return sum(1.0 / sharedNeighbors[a] for a in sub) - else: - return len(sub) - -def getBondsSubstituent(mol, atoms): - bonds=[] - for b in mol.GetBonds(): - a1 = b.GetBeginAtomIdx() - a2 = b.GetEndAtomIdx() - if a1 in atoms and a2 in atoms: - bonds.append(b.GetIdx()) - return bonds + if weighdownShared: + return sum(1.0 / sharedNeighbors[a] for a in sub) + else: + return len(sub) + + +def getBondsSubstituent(mol, atoms): + bonds = [] + for b in mol.GetBonds(): + a1 = b.GetBeginAtomIdx() + a2 = b.GetEndAtomIdx() + if a1 in atoms and a2 in atoms: + bonds.append(b.GetIdx()) + return bonds + + +def getNumAromaticBondsSubstituent(mol, subAtoms): + return sum(1 for b in getBondsSubstituent(mol, subAtoms) if mol.GetBondWithIdx(b).GetIsAromatic()) -def getNumAromaticBondsSubstituent(mol, subAtoms): - return sum(1 for b in getBondsSubstituent(mol, subAtoms) - if mol.GetBondWithIdx(b).GetIsAromatic()) def getNumRotatableBondsSubstituent(mol, subAtoms): - rotatableBond = Chem.MolFromSmarts('[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]') - matches = mol.GetSubstructMatches(rotatableBond) - numRotBonds=0 - for a1,a2 in matches: - if a1 in subAtoms and a2 in subAtoms: - numRotBonds+=1 - return numRotBonds + rotatableBond = Chem.MolFromSmarts('[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]') + matches = mol.GetSubstructMatches(rotatableBond) + numRotBonds = 0 + for a1, a2 in matches: + if a1 in subAtoms and a2 in subAtoms: + numRotBonds += 1 + return numRotBonds -substituentDescriptor = namedtuple('substituentDescriptor', - ['size','relSize','numNO','relNumNO','relNumNO_2','pathLength','relPathLength','relPathLength_2', - 'sharedNeighbors', 'numRotBonds', 'numAroBonds']) +substituentDescriptor = namedtuple('substituentDescriptor', [ + 'size', 'relSize', 'numNO', 'relNumNO', 'relNumNO_2', 'pathLength', 'relPathLength', + 'relPathLength_2', 'sharedNeighbors', 'numRotBonds', 'numAroBonds' +]) -def calcSizeSubstituents(mol, subs, sharedNeighbors, maxShell): - sizeDict=defaultdict() - numAtoms = mol.GetNumAtoms() - for sidx, sub in sorted(subs.items(), key= lambda x: len(x[1])): - size = _getSizeOfSubstituents(sub, sharedNeighbors) - numNOs=0 - numShared=0 - # determine the number of oxygen and nitrogen atoms - for i in sub: - if mol.GetAtomWithIdx(i).GetAtomicNum() in [7,8]: - numNOs+=1.0/sharedNeighbors[i] - if sharedNeighbors[i] > 1: - numShared+=1 - numRotBs = getNumRotatableBondsSubstituent(mol, set(sub)) - aroBonds = getNumAromaticBondsSubstituent(mol, set(sub)) - # fill the substituentDescriptor tuple - sizeDict[sidx]=substituentDescriptor(size=size,relSize=size/numAtoms,numNO=numNOs,relNumNO=numNOs/numAtoms, - relNumNO_2=numNOs/size,pathLength=maxShell[sidx], - relPathLength=maxShell[sidx]/numAtoms,relPathLength_2=maxShell[sidx]/size, - sharedNeighbors=numShared, numRotBonds=numRotBs, numAroBonds=aroBonds) - # if we have less then 4 substituents the missing ones need to be an hydrogen atoms - if len(sizeDict) < 4: - for i in range(4-len(sizeDict)): - sizeDict['H'+str(i)]=substituentDescriptor(size=0,relSize=0,numNO=0,relNumNO=0,relNumNO_2=0, - pathLength=0,relPathLength=0,relPathLength_2=0,sharedNeighbors=0, numRotBonds=0, - numAroBonds=0) - return sizeDict + +def calcSizeSubstituents(mol, subs, sharedNeighbors, maxShell): + sizeDict = defaultdict() + numAtoms = mol.GetNumAtoms() + for sidx, sub in sorted(subs.items(), key=lambda x: len(x[1])): + size = _getSizeOfSubstituents(sub, sharedNeighbors) + numNOs = 0 + numShared = 0 + # determine the number of oxygen and nitrogen atoms + for i in sub: + if mol.GetAtomWithIdx(i).GetAtomicNum() in [7, 8]: + numNOs += 1.0 / sharedNeighbors[i] + if sharedNeighbors[i] > 1: + numShared += 1 + numRotBs = getNumRotatableBondsSubstituent(mol, set(sub)) + aroBonds = getNumAromaticBondsSubstituent(mol, set(sub)) + # fill the substituentDescriptor tuple + sizeDict[sidx] = substituentDescriptor( + size=size, relSize=size / numAtoms, numNO=numNOs, relNumNO=numNOs / numAtoms, + relNumNO_2=numNOs / size, pathLength=maxShell[sidx], relPathLength=maxShell[sidx] / numAtoms, + relPathLength_2=maxShell[sidx] / size, sharedNeighbors=numShared, numRotBonds=numRotBs, + numAroBonds=aroBonds) + # if we have less then 4 substituents the missing ones need to be an hydrogen atoms + if len(sizeDict) < 4: + for i in range(4 - len(sizeDict)): + sizeDict['H' + str(i)] = substituentDescriptor(size=0, relSize=0, numNO=0, relNumNO=0, + relNumNO_2=0, pathLength=0, relPathLength=0, + relPathLength_2=0, sharedNeighbors=0, + numRotBonds=0, numAroBonds=0) + return sizeDict # Visualization of the substituents -def visualizeSubstituentsGrid(mol, aIdx, molSize=(300,150), kekulize=True,): - dists = Chem.GetDistanceMatrix(mol) - idxChiral = Chem.FindMolChiralCenters(mol)[0][0] - subs, sharedNeighbors, maxShell = determineAtomSubstituents(aIdx, mol, dists, False) - - colors = sns.husl_palette(len(subs), s=.6) - mc = rdMolDraw2D.PrepareMolForDrawing(mol, kekulize=kekulize) - count=0 - svgs=[] - labels=[] - for sub in sorted(subs.values(), key= lambda x: _getSizeOfSubstituents(x, sharedNeighbors)): - color = tuple(colors[count]) - count+=1 - atColors = {atom: color for atom in sub} - - bonds = getBondsSubstituent(mol, set(sub)) - bnColors = {bond: color for bond in bonds} - - drawer = rdMolDraw2D.MolDraw2DSVG(molSize[0],molSize[1]) - drawer.DrawMolecule(mc,highlightAtoms=atColors.keys(), - highlightAtomColors=atColors,highlightBonds=bonds,highlightBondColors=bnColors) - drawer.FinishDrawing() - svg = drawer.GetDrawingText() - svgs.append(svg.replace('svg:','')) - labels.append("Substituent "+str(count)+" (#atoms: "+str(len(sub))+", size normed: "+ - str(_getSizeOfSubstituents(sub, sharedNeighbors))+")") - return _svgsToGrid(svgs, labels, svgsPerRow=len(svgs),molSize=molSize,fontSize=12) - -def visualizeChiralSubstituentsGrid(mol): - idxChiral = Chem.FindMolChiralCenters(mol)[0][0] - return visualizeSubstituentsGrid(mol, idxChiral) +def visualizeSubstituentsGrid( + mol, + aIdx, + molSize=(300, 150), + kekulize=True, +): + dists = Chem.GetDistanceMatrix(mol) + idxChiral = Chem.FindMolChiralCenters(mol)[0][0] + subs, sharedNeighbors, maxShell = determineAtomSubstituents(aIdx, mol, dists, False) -# Chiral moment descriptor + colors = sns.husl_palette(len(subs), s=.6) + mc = rdMolDraw2D.PrepareMolForDrawing(mol, kekulize=kekulize) + count = 0 + svgs = [] + labels = [] + for sub in sorted(subs.values(), key=lambda x: _getSizeOfSubstituents(x, sharedNeighbors)): + color = tuple(colors[count]) + count += 1 + atColors = {atom: color for atom in sub} + + bonds = getBondsSubstituent(mol, set(sub)) + bnColors = {bond: color for bond in bonds} + + drawer = rdMolDraw2D.MolDraw2DSVG(molSize[0], molSize[1]) + drawer.DrawMolecule(mc, highlightAtoms=atColors.keys(), highlightAtomColors=atColors, + highlightBonds=bonds, highlightBondColors=bnColors) + drawer.FinishDrawing() + svg = drawer.GetDrawingText() + svgs.append(svg.replace('svg:', '')) + labels.append("Substituent " + str(count) + " (#atoms: " + str(len(sub)) + ", size normed: " + + str(_getSizeOfSubstituents(sub, sharedNeighbors)) + ")") + return _svgsToGrid(svgs, labels, svgsPerRow=len(svgs), molSize=molSize, fontSize=12) + + +def visualizeChiralSubstituentsGrid(mol): + idxChiral = Chem.FindMolChiralCenters(mol)[0][0] + return visualizeSubstituentsGrid(mol, idxChiral) + + +# Chiral moment descriptor def calcSP3CarbonSubstituentMoment(subSizes): - if len(subSizes) != 4: - raise ValueError('Function "calcSP3CarbonSubstituentMoment" expects an array of size 4 as parameter') - - # tetrahedron unit vectors - x1=np.array([1,1,1]) - x2=np.array([-1,1,-1]) - x3=np.array([1,-1,-1]) - x4=np.array([-1,-1,1]) + if len(subSizes) != 4: + raise ValueError( + 'Function "calcSP3CarbonSubstituentMoment" expects an array of size 4 as parameter') + + # tetrahedron unit vectors + x1 = np.array([1, 1, 1]) + x2 = np.array([-1, 1, -1]) + x3 = np.array([1, -1, -1]) + x4 = np.array([-1, -1, 1]) + + substituentMoment = linalg.norm((subSizes[0] * x1) + (subSizes[1] * x2) + (subSizes[2] * x3) + + (subSizes[3] * x4)) + return substituentMoment - substituentMoment= linalg.norm((subSizes[0]*x1)+(subSizes[1]*x2)+(subSizes[2]*x3)+(subSizes[3]*x4)) - return substituentMoment def calculateChiralDescriptors(mol, idxChiral, dists, verbose=False): - - desc = {} - subs, sharedNeighbors, maxShell = determineAtomSubstituents(idxChiral, mol, dists, verbose) - sizes = calcSizeSubstituents(mol, subs, sharedNeighbors, maxShell) - paths = dists[idxChiral] - # set some basic descriptors - desc['numAtoms'] = mol.GetNumAtoms() - desc['numBonds'] = mol.GetNumBonds() - desc['numRotBonds'] = AllChem.CalcNumRotatableBonds(mol) - desc['ringChiralCenter'] = int(mol.GetAtomWithIdx(idxChiral).IsInRing()) - # determine the max path length in the molecule and the mean pairwise distance of all atom pairs - desc['meanDist'] = np.sum(dists)/((desc['numAtoms']-1)*(desc['numAtoms'])) - desc['maxDist'] = int(np.max(dists)) - # determine the max path length from the chiral center and the mean pairwise distance of - # all atom pairs from the chiral center - desc['meanDistFromCC'] = np.sum(paths)/(desc['numAtoms']-1) - desc['maxDistfromCC'] = int(np.max(paths)) - # determine the number of neighbors per shell/distance level - nlevels=Counter(paths.astype(int)) - # consider the levels until a path length of 10 - for i in range(1,11): - desc['nLevel'+str(i)]=nlevels[i] - # determine the number of nitrogen and oxygen atoms in a certain level around the chiral center - for i in range(1,4): - desc['phLevel'+str(i)]=len([n for n,j in enumerate(paths) if j==i and mol.GetAtomWithIdx(n).GetAtomicNum() in [7,8]]) - # determine the number of aromatic atoms in a certain level around the chiral center - for i in range(1,4): - desc['arLevel'+str(i)]=len([n for n,j in enumerate(paths) if j==i and mol.GetAtomWithIdx(n).GetIsAromatic()]) - # set the size descriptors for each substituent, sort them from smallest to largest - for n, v in enumerate(sorted(sizes.values(), key=lambda x: x.size), 1): - sn = 's' + str(n) - desc[sn+'_size'] = v.size - desc[sn+'_relSize'] = v.relSize - desc[sn+'_phSize'] = v.numNO - desc[sn+'_phRelSize'] = v.relNumNO - desc[sn+'_phRelSize_2'] = v.relNumNO_2 - desc[sn+'_pathLength'] = v.pathLength - desc[sn+'_relPathLength'] = v.relPathLength - desc[sn+'_relPathLength_2'] = v.relPathLength_2 - desc[sn+'_numSharedNeighbors']=v.sharedNeighbors - desc[sn+'_numRotBonds']=v.numRotBonds - desc[sn+'_numAroBonds']=v.numAroBonds - # some combination of substituent sizes - desc['s34_size'] = desc['s3_size']+desc['s4_size'] - desc['s34_phSize'] = desc['s3_phSize']+desc['s4_phSize'] - desc['s34_relSize'] = desc['s3_relSize']+desc['s4_relSize'] - desc['s34_phRelSize'] = desc['s3_phRelSize']+desc['s4_phRelSize'] - # calculate the chiral moment --> kind of 3D descriptor - desc['chiralMoment'] = calcSP3CarbonSubstituentMoment([desc['s1_size'],desc['s2_size'],desc['s3_size'],desc['s4_size']]) - desc['chiralPhMoment'] = calcSP3CarbonSubstituentMoment([desc['s1_phSize'],desc['s2_phSize'], - desc['s3_phSize'],desc['s4_phSize']]) - return desc + + desc = {} + subs, sharedNeighbors, maxShell = determineAtomSubstituents(idxChiral, mol, dists, verbose) + sizes = calcSizeSubstituents(mol, subs, sharedNeighbors, maxShell) + paths = dists[idxChiral] + # set some basic descriptors + desc['numAtoms'] = mol.GetNumAtoms() + desc['numBonds'] = mol.GetNumBonds() + desc['numRotBonds'] = AllChem.CalcNumRotatableBonds(mol) + desc['ringChiralCenter'] = int(mol.GetAtomWithIdx(idxChiral).IsInRing()) + # determine the max path length in the molecule and the mean pairwise distance of all atom pairs + desc['meanDist'] = np.sum(dists) / ((desc['numAtoms'] - 1) * (desc['numAtoms'])) + desc['maxDist'] = int(np.max(dists)) + # determine the max path length from the chiral center and the mean pairwise distance of + # all atom pairs from the chiral center + desc['meanDistFromCC'] = np.sum(paths) / (desc['numAtoms'] - 1) + desc['maxDistfromCC'] = int(np.max(paths)) + # determine the number of neighbors per shell/distance level + nlevels = Counter(paths.astype(int)) + # consider the levels until a path length of 10 + for i in range(1, 11): + desc['nLevel' + str(i)] = nlevels[i] + # determine the number of nitrogen and oxygen atoms in a certain level around the chiral center + for i in range(1, 4): + desc['phLevel' + str(i)] = len( + [n for n, j in enumerate(paths) if j == i and mol.GetAtomWithIdx(n).GetAtomicNum() in [7, 8]]) + # determine the number of aromatic atoms in a certain level around the chiral center + for i in range(1, 4): + desc['arLevel' + str(i)] = len( + [n for n, j in enumerate(paths) if j == i and mol.GetAtomWithIdx(n).GetIsAromatic()]) + # set the size descriptors for each substituent, sort them from smallest to largest + for n, v in enumerate(sorted(sizes.values(), key=lambda x: x.size), 1): + sn = 's' + str(n) + desc[sn + '_size'] = v.size + desc[sn + '_relSize'] = v.relSize + desc[sn + '_phSize'] = v.numNO + desc[sn + '_phRelSize'] = v.relNumNO + desc[sn + '_phRelSize_2'] = v.relNumNO_2 + desc[sn + '_pathLength'] = v.pathLength + desc[sn + '_relPathLength'] = v.relPathLength + desc[sn + '_relPathLength_2'] = v.relPathLength_2 + desc[sn + '_numSharedNeighbors'] = v.sharedNeighbors + desc[sn + '_numRotBonds'] = v.numRotBonds + desc[sn + '_numAroBonds'] = v.numAroBonds + # some combination of substituent sizes + desc['s34_size'] = desc['s3_size'] + desc['s4_size'] + desc['s34_phSize'] = desc['s3_phSize'] + desc['s4_phSize'] + desc['s34_relSize'] = desc['s3_relSize'] + desc['s4_relSize'] + desc['s34_phRelSize'] = desc['s3_phRelSize'] + desc['s4_phRelSize'] + # calculate the chiral moment --> kind of 3D descriptor + desc['chiralMoment'] = calcSP3CarbonSubstituentMoment( + [desc['s1_size'], desc['s2_size'], desc['s3_size'], desc['s4_size']]) + desc['chiralPhMoment'] = calcSP3CarbonSubstituentMoment( + [desc['s1_phSize'], desc['s2_phSize'], desc['s3_phSize'], desc['s4_phSize']]) + return desc + def generateChiralDescriptorsForAllCenters(mol, verbose=False): - """ + """ Generates descriptors for all chiral centers in the molecule. Details of these descriptors are described in: Schneider et al., Chiral Cliffs: Investigating the Influence of Chirality on Binding Affinity @@ -317,15 +343,16 @@ def generateChiralDescriptorsForAllCenters(mol, verbose=False): >>> desc[6]['maxDistfromCC'] 7 """ - - desc={} - dists = Chem.GetDistanceMatrix(mol) - for idxChiral, _ in Chem.FindMolChiralCenters(mol): - desc[idxChiral] = calculateChiralDescriptors(mol, idxChiral, dists, verbose=False) - return desc + + desc = {} + dists = Chem.GetDistanceMatrix(mol) + for idxChiral, _ in Chem.FindMolChiralCenters(mol): + desc[idxChiral] = calculateChiralDescriptors(mol, idxChiral, dists, verbose=False) + return desc + def generateChiralDescriptors(mol, verbose=False): - """ + """ Generates descriptors for the 'first' chiral centers in the molecule. Details of these descriptors are described in: Schneider et al., Chiral Cliffs: Investigating the Influence of Chirality on Binding Affinity @@ -367,11 +394,11 @@ def generateChiralDescriptors(mol, verbose=False): >>> desc['s2_numSharedNeighbors'] 0 - """ - - dists = Chem.GetDistanceMatrix(mol) - idxChiral = Chem.FindMolChiralCenters(mol)[0][0] - return calculateChiralDescriptors(mol, idxChiral, dists, verbose=False) + """ + + dists = Chem.GetDistanceMatrix(mol) + idxChiral = Chem.FindMolChiralCenters(mol)[0][0] + return calculateChiralDescriptors(mol, idxChiral, dists, verbose=False) #------------------------------------ @@ -379,13 +406,12 @@ def generateChiralDescriptors(mol, verbose=False): # doctest boilerplate # def _test(): - import doctest, sys - return doctest.testmod(sys.modules["__main__"]) + import doctest + import sys + return doctest.testmod(sys.modules["__main__"]) if __name__ == '__main__': - import sys - failed, tried = _test() - sys.exit(failed) - - + import sys + failed, tried = _test() + sys.exit(failed) diff --git a/Contrib/ConformerParser/Wrap/testConformerParser.py b/Contrib/ConformerParser/Wrap/testConformerParser.py index 34eddc109..6779a930c 100644 --- a/Contrib/ConformerParser/Wrap/testConformerParser.py +++ b/Contrib/ConformerParser/Wrap/testConformerParser.py @@ -1,9 +1,10 @@ -from rdkit import Chem -from rdkit.Chem import rdConformerParser -from rdkit import RDConfig -import unittest import os +import unittest + +from rdkit import Chem, RDConfig +from rdkit.Chem import rdConformerParser from rdkit.RDLogger import logger + logger = logger() diff --git a/Contrib/ConformerParser/Wrap/test_list.py b/Contrib/ConformerParser/Wrap/test_list.py index 8cfa9116c..13dd6a619 100644 --- a/Contrib/ConformerParser/Wrap/test_list.py +++ b/Contrib/ConformerParser/Wrap/test_list.py @@ -1,9 +1,12 @@ -tests = [("python", "testConformerParser.py", {}), ] +tests = [ + ("python", "testConformerParser.py", {}), +] longTests = [] if __name__ == '__main__': import sys + from rdkit import TestRunner failed, tests = TestRunner.RunScript('test_list.py', 0, 1) sys.exit(len(failed)) diff --git a/Contrib/ConformerParser/test_list.py b/Contrib/ConformerParser/test_list.py index bbc05f36f..06a3babaf 100755 --- a/Contrib/ConformerParser/test_list.py +++ b/Contrib/ConformerParser/test_list.py @@ -1,9 +1,12 @@ -tests = [("testExecs/testConformerParser.exe", "", {}), ] +tests = [ + ("testExecs/testConformerParser.exe", "", {}), +] longTests = [] if __name__ == '__main__': import sys + from rdkit import TestRunner failed, tests = TestRunner.RunScript('test_list.py', 0, 1) sys.exit(len(failed)) diff --git a/Contrib/Fastcluster/fastcluster.py b/Contrib/Fastcluster/fastcluster.py index 5d6f69a92..2270a1abe 100644 --- a/Contrib/Fastcluster/fastcluster.py +++ b/Contrib/Fastcluster/fastcluster.py @@ -15,53 +15,59 @@ Please see more details in README. """ import argparse -import subprocess -import pickle import os +import pickle +import subprocess + from rdkit import Chem from rdkit.Chem import AllChem + def getArgParser(): - """ Create the argument parser """ - parser = argparse.ArgumentParser("Fast clustering for chemoinformatics") - parser.add_argument("input", help="filename of input file") - parser.add_argument("nclusters", metavar="N", help="the number of clusters") - parser.add_argument("--output", help="filename of output, tab separated format", default="clustered.tsv") - parser.add_argument("--centroid", metavar="CENTROID", help="filename of centroid information. tab separated format", default="centroid.tsv") - return parser + """ Create the argument parser """ + parser = argparse.ArgumentParser("Fast clustering for chemoinformatics") + parser.add_argument("input", help="filename of input file") + parser.add_argument("nclusters", metavar="N", help="the number of clusters") + parser.add_argument("--output", help="filename of output, tab separated format", + default="clustered.tsv") + parser.add_argument("--centroid", metavar="CENTROID", + help="filename of centroid information. tab separated format", + default="centroid.tsv") + return parser + def smi2fp(molid, smiles): - mol = Chem.MolFromSmiles(smiles) - onbits = AllChem.GetMorganFingerprintAsBitVect(mol, 2).GetOnBits() - row = molid - for bit in onbits: - row += "\tFP_{}\t1.0".format(bit) - row += "\n" - return row + mol = Chem.MolFromSmiles(smiles) + onbits = AllChem.GetMorganFingerprintAsBitVect(mol, 2).GetOnBits() + row = molid + for bit in onbits: + row += "\tFP_{}\t1.0".format(bit) + row += "\n" + return row if __name__ == "__main__": - parser = getArgParser() - args = parser.parse_args() - with open(args.input, "r") as inputf: - with open("fp.tsv", "w") as tempf: - for line in inputf: - molid,smiles = line.rstrip().split("\t") - tempf.write(smi2fp(molid, smiles)) - res = subprocess.call("time bayon -p -c {0.centroid} -n {0.nclusters} fp.tsv > {0.output}".format(args), shell=True) + parser = getArgParser() + args = parser.parse_args() + with open(args.input, "r") as inputf: + with open("fp.tsv", "w") as tempf: + for line in inputf: + molid, smiles = line.rstrip().split("\t") + tempf.write(smi2fp(molid, smiles)) + res = subprocess.call( + "time bayon -p -c {0.centroid} -n {0.nclusters} fp.tsv > {0.output}".format(args), shell=True) - #parse results - parsefile = open(args.output.split(".")[0]+"_parse.tsv", "w") - inputf = open(args.output, "r") - for line in inputf: - line = line.rstrip().split("\t") - cluster_id = line[0] - for i in range(1, len(line)-1, 2) : - molid = line[ i ] - point = line[ i + 1 ] - parsefile.write("{}\t{}\tCLS_ID_{}\n".format(molid, point, cluster_id)) - parsefile.close() + #parse results + parsefile = open(args.output.split(".")[0] + "_parse.tsv", "w") + inputf = open(args.output, "r") + for line in inputf: + line = line.rstrip().split("\t") + cluster_id = line[0] + for i in range(1, len(line) - 1, 2): + molid = line[i] + point = line[i + 1] + parsefile.write("{}\t{}\tCLS_ID_{}\n".format(molid, point, cluster_id)) + parsefile.close() - - if res != 0: - parser.exit("Error running bayon") + if res != 0: + parser.exit("Error running bayon") diff --git a/Contrib/Fastcluster/testdata/sdf2smi.py b/Contrib/Fastcluster/testdata/sdf2smi.py index dd8492f96..e661a11c6 100644 --- a/Contrib/Fastcluster/testdata/sdf2smi.py +++ b/Contrib/Fastcluster/testdata/sdf2smi.py @@ -1,14 +1,14 @@ import argparse + from rdkit import Chem from rdkit.Chem.rdmolfiles import SmilesWriter parser = argparse.ArgumentParser() parser.add_argument('inputfile', help="sdf filename for convert to smiles") args = parser.parse_args() -sdf = Chem.SDMolSupplier( args.inputfile ) +sdf = Chem.SDMolSupplier(args.inputfile) writer = SmilesWriter("converted.smi") for mol in sdf: - writer.write( mol ) + writer.write(mol) writer.close() - diff --git a/Contrib/FreeWilson/freewilson.py b/Contrib/FreeWilson/freewilson.py index c6d0b1ed8..41605eb15 100644 --- a/Contrib/FreeWilson/freewilson.py +++ b/Contrib/FreeWilson/freewilson.py @@ -6,7 +6,6 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # - """ Free Wilson Analysis @@ -133,128 +132,134 @@ input structures ``` """ -from rdkit.Chem import rdRGroupDecomposition as rgd -from rdkit.Chem import molzip, Descriptors -from rdkit import rdBase -from rdkit import Chem +import csv +import itertools +import logging +import math +import re +import sys +from collections import defaultdict, namedtuple +from typing import Generator, List + from sklearn.linear_model import Ridge from sklearn.metrics import r2_score -from collections import defaultdict, namedtuple from tqdm import tqdm -import itertools -import math -import sys -from typing import List -import logging -import csv -import re -from typing import Generator + +from rdkit import Chem, rdBase +from rdkit.Chem import Descriptors, molzip +from rdkit.Chem import rdRGroupDecomposition as rgd + logger = logging.getLogger("freewilson") FreeWilsonPrediction = namedtuple("FreeWilsonPrediction", ['prediction', 'smiles', 'rgroups']) -# match dummy atoms in a smiles string to extract atom maps +# match dummy atoms in a smiles string to extract atom maps dummypat = re.compile(r"\*:([0-9]+)") + # molzip doesn't handle some of the forms that the RGroupDecomposition # returns, this solves these issues. def molzip_smi(smiles): - """Fix a rgroup smiles for molzip, note that the core MUST come first + """Fix a rgroup smiles for molzip, note that the core MUST come first in the smiles string, ala core.rgroup1.rgroup2 ... """ - dupes = set() - sl = [] - for s in smiles.split("."): - if s.count("*") >= 1: - if s in dupes: - continue - else: - dupes.add(s) - sl.append(s) - - smiles = ".".join(sl) - m = Chem.RWMol(Chem.MolFromSmiles(smiles, sanitize=False)) - frags = Chem.GetMolFrags(m) - core = frags[0] - atommaps = {} - counts = defaultdict(int) - for idx in core: - atommap = m.GetAtomWithIdx(idx).GetAtomMapNum() - if atommap: - atommaps[atommap] = idx - counts[atommap] += 1 - - next_atommap = max(atommaps) + 1 - add_atommap = [] - for fragment in frags[1:]: - for idx in fragment: - atommap = m.GetAtomWithIdx(idx).GetAtomMapNum() - if atommap: - count = counts[atommap] = counts[atommap] + 1 - if count > 2: - m.GetAtomWithIdx(idx).SetAtomMapNum(next_atommap) - add_atommap.append((atommaps[atommap], next_atommap)) - next_atommap += 1 - - for atomidx, atommap in add_atommap: - atom = m.GetAtomWithIdx(atomidx) - bonds = list(atom.GetBonds()) - if len(bonds) == 1: - oatom = bonds[0].GetOtherAtom(atom) - xatom = Chem.Atom(0) - idx = m.AddAtom(xatom) - xatom = m.GetAtomWithIdx(idx) - xatom.SetAtomMapNum(atommap) - m.AddBond(oatom.GetIdx(), xatom.GetIdx(), Chem.BondType.SINGLE) - return Chem.molzip(m) + dupes = set() + sl = [] + for s in smiles.split("."): + if s.count("*") >= 1: + if s in dupes: + continue + else: + dupes.add(s) + sl.append(s) + + smiles = ".".join(sl) + m = Chem.RWMol(Chem.MolFromSmiles(smiles, sanitize=False)) + frags = Chem.GetMolFrags(m) + core = frags[0] + atommaps = {} + counts = defaultdict(int) + for idx in core: + atommap = m.GetAtomWithIdx(idx).GetAtomMapNum() + if atommap: + atommaps[atommap] = idx + counts[atommap] += 1 + + next_atommap = max(atommaps) + 1 + add_atommap = [] + for fragment in frags[1:]: + for idx in fragment: + atommap = m.GetAtomWithIdx(idx).GetAtomMapNum() + if atommap: + count = counts[atommap] = counts[atommap] + 1 + if count > 2: + m.GetAtomWithIdx(idx).SetAtomMapNum(next_atommap) + add_atommap.append((atommaps[atommap], next_atommap)) + next_atommap += 1 + + for atomidx, atommap in add_atommap: + atom = m.GetAtomWithIdx(atomidx) + bonds = list(atom.GetBonds()) + if len(bonds) == 1: + oatom = bonds[0].GetOtherAtom(atom) + xatom = Chem.Atom(0) + idx = m.AddAtom(xatom) + xatom = m.GetAtomWithIdx(idx) + xatom.SetAtomMapNum(atommap) + m.AddBond(oatom.GetIdx(), xatom.GetIdx(), Chem.BondType.SINGLE) + return Chem.molzip(m) + class RGroup: - """FreeWilson RGroup + """FreeWilson RGroup smiles - isomeric smiles of the rgroup rgroup - Rgroup name ['Core', 'R1', 'R2'...] count - number of molecules with the rgroup coefficient - ridge regression coefficient idx - one-hot encoding for the rgroup """ - def __init__(self, smiles, rgroup, count, coefficient, idx=None): - self.smiles = smiles # smiles for the sidechain (n.b. can be a core as well) - self.rgroup = rgroup # rgroup Core, R1, R2,... - self.count = count # num molecules with this rgruop - self.coefficient = coefficient # ridge coefficient - self.idx = idx # descriptor index - self.dummies = tuple([int(x) for x in sorted(dummypat.findall(smiles))]) - # Assemble some additive properties - - # will fail on some structures - m = Chem.MolFromSmiles(smiles) - if m: - self.mw = Descriptors.MolWt(m) - self.hvyct = Descriptors.HeavyAtomCount(m) - else: - # guess the MW if we can't sanitize - table = Chem.GetPeriodicTable() - m = Chem.MolFromSmiles(smiles, sanitize=False) - if m: - self.mw = 0. - self.hvyct = 0 - for atom in m.GetAtoms(): - atomicnum = atom.GetAtomicNum() - self.mw += table.GetAtomicWeight(atomicnum) - if atomicnum > 1: - self.hvyct += 1 - self.hvyct = Descriptors.HeavyAtomCount(m) - else: - self.mw = 0 # dunno - self.hvyct = 0 - - def __str__(self): - return f"RGroup(smiles={repr(self.smiles)}, rgroup={repr(self.rgroup)}, count={repr(self.count)}, coefficient={repr(self.coefficient)}, idx={repr(self.idx)})" - def __repr__(self): - return self.__str__() + def __init__(self, smiles, rgroup, count, coefficient, idx=None): + self.smiles = smiles # smiles for the sidechain (n.b. can be a core as well) + self.rgroup = rgroup # rgroup Core, R1, R2,... + self.count = count # num molecules with this rgruop + self.coefficient = coefficient # ridge coefficient + self.idx = idx # descriptor index + self.dummies = tuple([int(x) for x in sorted(dummypat.findall(smiles))]) + + # Assemble some additive properties + + # will fail on some structures + m = Chem.MolFromSmiles(smiles) + if m: + self.mw = Descriptors.MolWt(m) + self.hvyct = Descriptors.HeavyAtomCount(m) + else: + # guess the MW if we can't sanitize + table = Chem.GetPeriodicTable() + m = Chem.MolFromSmiles(smiles, sanitize=False) + if m: + self.mw = 0. + self.hvyct = 0 + for atom in m.GetAtoms(): + atomicnum = atom.GetAtomicNum() + self.mw += table.GetAtomicWeight(atomicnum) + if atomicnum > 1: + self.hvyct += 1 + self.hvyct = Descriptors.HeavyAtomCount(m) + else: + self.mw = 0 # dunno + self.hvyct = 0 + + def __str__(self): + return f"RGroup(smiles={repr(self.smiles)}, rgroup={repr(self.rgroup)}, count={repr(self.count)}, coefficient={repr(self.coefficient)}, idx={repr(self.idx)})" + + def __repr__(self): + return self.__str__() + class FreeWilsonDecomposition: - """FreeWilson decomposition + """FreeWilson decomposition rgroups - dictionary of rgroup to list of RGroups i.e. {'Core': [RGroup(...), ...] 'R1': [ RGroup(...), RGroup(...)], @@ -267,20 +272,18 @@ class FreeWilsonDecomposition: used to not enumerate existing molecules row_decomposition - original rgroup decomposition (With row key 'molecule' is an rdkit molecule) """ - - def __init__(self, - rgroups, rgroup_to_descriptor_idx, fitter, - r2, descriptors, row_decomposition, - num_training, num_reconstructed): - self.rgroups = rgroups # dictionary 'Core':[core1, core1], 'R1': [rgroup1, rgroup2], ... - self.rgroup_to_descriptor_idx = rgroup_to_descriptor_idx # dictionary {smi:descriptor_idx} - self.fitter = fitter # fitter rgroup indices -> prediction - self.N = len(rgroup_to_descriptor_idx) - self.r2 = r2 - self.descriptors = set([tuple(d) for d in descriptors]) - self.row_decomposition = row_decomposition - self.num_training = num_training - self.num_reconstructed = num_reconstructed + + def __init__(self, rgroups, rgroup_to_descriptor_idx, fitter, r2, descriptors, row_decomposition, + num_training, num_reconstructed): + self.rgroups = rgroups # dictionary 'Core':[core1, core1], 'R1': [rgroup1, rgroup2], ... + self.rgroup_to_descriptor_idx = rgroup_to_descriptor_idx # dictionary {smi:descriptor_idx} + self.fitter = fitter # fitter rgroup indices -> prediction + self.N = len(rgroup_to_descriptor_idx) + self.r2 = r2 + self.descriptors = set([tuple(d) for d in descriptors]) + self.row_decomposition = row_decomposition + self.num_training = num_training + self.num_reconstructed = num_reconstructed default_decomp_params = rgd.RGroupDecompositionParameters() @@ -298,8 +301,11 @@ default_decomp_params.scoreMethod = rgd.RGroupScore.FingerprintVariance # we need to keep hydrogens so molzip will work default_decomp_params.removeHydrogensPostMatch = False -def FWDecompose(scaffolds, mols, scores, decomp_params=default_decomp_params) -> FreeWilsonDecomposition: - """ + + +def FWDecompose(scaffolds, mols, scores, + decomp_params=default_decomp_params) -> FreeWilsonDecomposition: + """ Perform a free wilson analysis : param scaffolds : scaffold or list of scaffolds to use for the rgroup decomposition : param mols : molecules to decompose @@ -333,176 +339,178 @@ def FWDecompose(scaffolds, mols, scores, decomp_params=default_decomp_params) -> See FWBuild docs to see how to filter predictions, molecular weight or molecular properties. """ - descriptors = [] # list of descriptors, one per matched molecules - # descriptors are 1/0 if a sidechain is present - matched_scores = [] # scores from the matching molecules - rgroup_idx = {} # rgroup index into descriptor { smiles: idx } - rgroups = defaultdict(list) # final list of rgrups/sidechains + descriptors = [] # list of descriptors, one per matched molecules + # descriptors are 1/0 if a sidechain is present + matched_scores = [] # scores from the matching molecules + rgroup_idx = {} # rgroup index into descriptor { smiles: idx } + rgroups = defaultdict(list) # final list of rgrups/sidechains - if len(mols) != len(scores): - raise ValueError(f"The number of molecules must match the number of scores #mols {len(mols)} #scores {len(scores)}") - # decompose the rgroups - logger.info(f"Decomposing {len(mols)} molecules...") - decomposer = rgd.RGroupDecomposition(scaffolds, decomp_params) - matched = [] - matched_indices = [] - for i,(mol, score) in enumerate(tqdm(zip(mols, scores))): - if decomposer.Add(mol) >= 0: - matched_scores.append(score) - matched.append(mol) - matched_indices.append(i) + if len(mols) != len(scores): + raise ValueError( + f"The number of molecules must match the number of scores #mols {len(mols)} #scores {len(scores)}" + ) + # decompose the rgroups + logger.info(f"Decomposing {len(mols)} molecules...") + decomposer = rgd.RGroupDecomposition(scaffolds, decomp_params) + matched = [] + matched_indices = [] + for i, (mol, score) in enumerate(tqdm(zip(mols, scores))): + if decomposer.Add(mol) >= 0: + matched_scores.append(score) + matched.append(mol) + matched_indices.append(i) - decomposer.Process() - logger.info(f"Matched {len(matched_scores)} out of {len(mols)}") - if not(matched_scores): - logger.error("No scaffolds matched the input molecules") - return + decomposer.Process() + logger.info(f"Matched {len(matched_scores)} out of {len(mols)}") + if not (matched_scores): + logger.error("No scaffolds matched the input molecules") + return - decomposition = decomposer.GetRGroupsAsRows(asSmiles=True) + decomposition = decomposer.GetRGroupsAsRows(asSmiles=True) - logger.info("Get unique rgroups...") - blocker = rdBase.BlockLogs() - rgroup_counts = defaultdict(int) - num_reconstructed = 0 - for num_mols, (row, idx) in enumerate(zip(decomposition, matched_indices)): - row_smiles = [] - for rgroup,smiles in row.items(): - row_smiles.append(smiles) - rgroup_counts[smiles] += 1 - if smiles not in rgroup_idx: - rgroup_idx[smiles] = len(rgroup_idx) - rgroups[rgroup].append(RGroup(smiles, rgroup, 0, 0)) - row['original_idx'] = idx - reconstructed = ".".join(row_smiles) - try: - blocker = rdBase.BlockLogs() - mol = molzip_smi(reconstructed) - num_reconstructed += 1 - except: - print("failed:", Chem.MolToSmiles(matched[num_mols]), reconstructed) - + logger.info("Get unique rgroups...") + blocker = rdBase.BlockLogs() + rgroup_counts = defaultdict(int) + num_reconstructed = 0 + for num_mols, (row, idx) in enumerate(zip(decomposition, matched_indices)): + row_smiles = [] + for rgroup, smiles in row.items(): + row_smiles.append(smiles) + rgroup_counts[smiles] += 1 + if smiles not in rgroup_idx: + rgroup_idx[smiles] = len(rgroup_idx) + rgroups[rgroup].append(RGroup(smiles, rgroup, 0, 0)) + row['original_idx'] = idx + reconstructed = ".".join(row_smiles) + try: + blocker = rdBase.BlockLogs() + mol = molzip_smi(reconstructed) + num_reconstructed += 1 + except: + print("failed:", Chem.MolToSmiles(matched[num_mols]), reconstructed) - logger.info(f"Descriptor size {len(rgroup_idx)}") - logger.info(f"Reconstructed {num_reconstructed} out of {num_mols}") + logger.info(f"Descriptor size {len(rgroup_idx)}") + logger.info(f"Reconstructed {num_reconstructed} out of {num_mols}") - # get the descriptors list, one-hot encoding per rgroup - if num_reconstructed == 0: - logging.warning("Could only reconstruct %s out of %s training molecules", - num_mols, num_reconstructed) + # get the descriptors list, one-hot encoding per rgroup + if num_reconstructed == 0: + logging.warning("Could only reconstruct %s out of %s training molecules", num_mols, + num_reconstructed) - for mol, row in zip(matched, decomposition): - row['molecule'] = mol - descriptor = [0] * len(rgroup_idx) - descriptors.append(descriptor) - for smiles in row.values(): - if smiles in rgroup_idx: - descriptor[rgroup_idx[smiles]] = 1 + for mol, row in zip(matched, decomposition): + row['molecule'] = mol + descriptor = [0] * len(rgroup_idx) + descriptors.append(descriptor) + for smiles in row.values(): + if smiles in rgroup_idx: + descriptor[rgroup_idx[smiles]] = 1 - - assert len(descriptors) == len(matched_scores), f"Number of descriptors({len(descriptors)}) doesn't match number of matcved scores({len(matched_scores)})" + assert len(descriptors) == len( + matched_scores + ), f"Number of descriptors({len(descriptors)}) doesn't match number of matcved scores({len(matched_scores)})" - # Perform the Ridge Regression - logger.info("Ridge Regressing...") - lm = Ridge() - lm.fit(descriptors, matched_scores) - preds = lm.predict(descriptors) - r2 = r2_score(matched_scores, preds) - logger.info(f"R2 {r2}") - logger.info(f"Intercept = {lm.intercept_:.2f}") + # Perform the Ridge Regression + logger.info("Ridge Regressing...") + lm = Ridge() + lm.fit(descriptors, matched_scores) + preds = lm.predict(descriptors) + r2 = r2_score(matched_scores, preds) + logger.info(f"R2 {r2}") + logger.info(f"Intercept = {lm.intercept_:.2f}") - for sidechains in rgroups.values(): - for rgroup in sidechains: - rgroup.count = rgroup_counts[rgroup.smiles] - rgroup.coefficient = lm.coef_[rgroup_idx[rgroup.smiles]] - rgroup.idx = rgroup_idx[rgroup.smiles] + for sidechains in rgroups.values(): + for rgroup in sidechains: + rgroup.count = rgroup_counts[rgroup.smiles] + rgroup.coefficient = lm.coef_[rgroup_idx[rgroup.smiles]] + rgroup.idx = rgroup_idx[rgroup.smiles] - return FreeWilsonDecomposition(rgroups, rgroup_idx, lm, - r2, descriptors, decomposition, - num_mols, num_reconstructed) + return FreeWilsonDecomposition(rgroups, rgroup_idx, lm, r2, descriptors, decomposition, num_mols, + num_reconstructed) -def _enumerate(rgroups, fw, - mw_filter=None, hvy_filter=None, pred_filter=None, mol_filter=None): - N = fw.N - fitter = fw.fitter - num_products = 1 - for r in rgroups: - num_products*=len(r) - wrote = 0 - in_training_set = 0 - rejected_pred = 0 - rejected_mw = 0 - rejected_hvy = 0 - good_pred = 0 - rejected_filters = 0 - rejected_bad = 0 - min_mw = 10000000 - max_mw = 0 - min_hvy = 10000000 - max_hvy = 0 - max_pred = -1e10 - min_pred = 1e10 - delta = num_products//10 or 1 - for i,groups in tqdm(enumerate(itertools.product(*rgroups)), total=num_products): - if i and i % delta == 0: - logging.debug(f"Wrote {wrote} results out of {num_products}\n\t\n\tIn Training Set{in_training_set}\n\tBad MW: {rejected_mw}\n\tBad Pred: {rejected_pred}\n\tBad Filters: {rejected_filters}\n\tBad smi: {rejected_bad}\n\tmin mw: {min_mw}\n\tmax mw: {max_mw}\n\t\n\tmin pred: {min_pred}\n\tmax pred: {max_pred}", file=sys.stderr) +def _enumerate(rgroups, fw, mw_filter=None, hvy_filter=None, pred_filter=None, mol_filter=None): + N = fw.N + fitter = fw.fitter + num_products = 1 + for r in rgroups: + num_products *= len(r) - mw = 0 - hvy = 0 - descriptors = [0] * N - for g in groups: - descriptors[g.idx] = 1 - mw += g.mw - hvy += g.hvyct + wrote = 0 + in_training_set = 0 + rejected_pred = 0 + rejected_mw = 0 + rejected_hvy = 0 + good_pred = 0 + rejected_filters = 0 + rejected_bad = 0 + min_mw = 10000000 + max_mw = 0 + min_hvy = 10000000 + max_hvy = 0 + max_pred = -1e10 + min_pred = 1e10 + delta = num_products // 10 or 1 + for i, groups in tqdm(enumerate(itertools.product(*rgroups)), total=num_products): + if i and i % delta == 0: + logging.debug( + f"Wrote {wrote} results out of {num_products}\n\t\n\tIn Training Set{in_training_set}\n\tBad MW: {rejected_mw}\n\tBad Pred: {rejected_pred}\n\tBad Filters: {rejected_filters}\n\tBad smi: {rejected_bad}\n\tmin mw: {min_mw}\n\tmax mw: {max_mw}\n\t\n\tmin pred: {min_pred}\n\tmax pred: {max_pred}", + file=sys.stderr) - if tuple(descriptors) in fw.descriptors: - in_training_set += 1 - continue - - min_mw = min(min_mw, mw) - max_mw = max(max_mw, mw) - if mw_filter and not mw_filter(mw): - rejected_mw += 1 - continue + mw = 0 + hvy = 0 + descriptors = [0] * N + for g in groups: + descriptors[g.idx] = 1 + mw += g.mw + hvy += g.hvyct - min_hvy = min(min_hvy, hvy) - max_hvy = max(max_hvy, hvy) - if hvy_filter and not hvy_filter(hvy): - rejected_hvy += 1 - continue + if tuple(descriptors) in fw.descriptors: + in_training_set += 1 + continue - pred = fitter.predict([descriptors])[0] - max_pred = max(pred, max_pred) - min_pred = min(pred, min_pred) - if pred_filter and not pred_filter(pred): - rejected_pred += 1 - continue - good_pred+=1 - smiles = set([g.smiles for g in groups]) # remove dupes - smi = ".".join(set([g.smiles for g in groups])) - try: - mol = molzip_smi(smi) - except: - rejected_bad += 1 - continue - - rejected = False - if mol_filter and not mol_filter(mol): - rejected_filters += 1 - continue + min_mw = min(min_mw, mw) + max_mw = max(max_mw, mw) + if mw_filter and not mw_filter(mw): + rejected_mw += 1 + continue - out_smi = Chem.MolToSmiles(mol) - yield FreeWilsonPrediction(pred, out_smi, groups) - wrote += 1 - logging.info(f"Wrote {wrote} results out of {num_products}\n\tIn Training set: {in_training_set}\n\tBad MW: {rejected_mw}\n\tBad Pred: {rejected_pred}\n\tBad Filters: {rejected_filters}\n\tBad smi: {rejected_bad}\n\tmin mw: {min_mw}\n\tmax mw: {max_mw}\n\tBad HVY: {rejected_hvy}\n\tBad Pred: {rejected_pred}\n\tBad Filters: {rejected_filters}\n\tBad smi: {rejected_bad}\n\tmin mw: {min_mw}\n\tmax mw: {max_mw}\n\tmin hvy: {min_hvy}\n\tmax hvy: {max_hvy}\n\t\n\tmin pred: {min_pred}\n\tmax pred: {max_pred}") + min_hvy = min(min_hvy, hvy) + max_hvy = max(max_hvy, hvy) + if hvy_filter and not hvy_filter(hvy): + rejected_hvy += 1 + continue - -def FWBuild(fw: FreeWilsonDecomposition, - pred_filter=None, - mw_filter=None, - hvy_filter=None, - mol_filter=None) -> Generator[FreeWilsonPrediction,None,None]: - """Enumerate the freewilson decomposition and return their predictions + pred = fitter.predict([descriptors])[0] + max_pred = max(pred, max_pred) + min_pred = min(pred, min_pred) + if pred_filter and not pred_filter(pred): + rejected_pred += 1 + continue + good_pred += 1 + smiles = set([g.smiles for g in groups]) # remove dupes + smi = ".".join(set([g.smiles for g in groups])) + try: + mol = molzip_smi(smi) + except: + rejected_bad += 1 + continue + + rejected = False + if mol_filter and not mol_filter(mol): + rejected_filters += 1 + continue + + out_smi = Chem.MolToSmiles(mol) + yield FreeWilsonPrediction(pred, out_smi, groups) + wrote += 1 + logging.info( + f"Wrote {wrote} results out of {num_products}\n\tIn Training set: {in_training_set}\n\tBad MW: {rejected_mw}\n\tBad Pred: {rejected_pred}\n\tBad Filters: {rejected_filters}\n\tBad smi: {rejected_bad}\n\tmin mw: {min_mw}\n\tmax mw: {max_mw}\n\tBad HVY: {rejected_hvy}\n\tBad Pred: {rejected_pred}\n\tBad Filters: {rejected_filters}\n\tBad smi: {rejected_bad}\n\tmin mw: {min_mw}\n\tmax mw: {max_mw}\n\tmin hvy: {min_hvy}\n\tmax hvy: {max_hvy}\n\t\n\tmin pred: {min_pred}\n\tmax pred: {max_pred}" + ) + + +def FWBuild(fw: FreeWilsonDecomposition, pred_filter=None, mw_filter=None, hvy_filter=None, + mol_filter=None) -> Generator[FreeWilsonPrediction, None, None]: + """Enumerate the freewilson decomposition and return their predictions :param fw: FreeWilsonDecomposition generated from FWDecompose :param pred_filter: return True if the prediction is in a desireable range @@ -514,150 +522,144 @@ def FWBuild(fw: FreeWilsonDecomposition, :param mol_filter: return True if the molecule is ok to be enumerated e.g. lambda mol: -3 < Descriptors.MolLogp(mol) < 5 """ - blocker = rdBase.BlockLogs() - # check groups for cycles - cycles = set() - rgroups_no_cycles = defaultdict(list) - rgroup_cycles = defaultdict(list) + blocker = rdBase.BlockLogs() + # check groups for cycles + cycles = set() + rgroups_no_cycles = defaultdict(list) + rgroup_cycles = defaultdict(list) - # we can handle cycles now? - for key, rgroup in fw.rgroups.items(): - if key == 'Core': - rgroups_no_cycles[key] = rgroup - continue - no_cycles = rgroups_no_cycles[key] - for g in rgroup: - no_cycles.append(g) - continue - - if len(g.dummies) > 1: - cycles.add(g.dummies) - rgroup_cycles[g.dummies].append(g) - else: - no_cycles.append(g) + # we can handle cycles now? + for key, rgroup in fw.rgroups.items(): + if key == 'Core': + rgroups_no_cycles[key] = rgroup + continue + no_cycles = rgroups_no_cycles[key] + for g in rgroup: + no_cycles.append(g) + continue - logging.info("Enumerating rgroups with no broken cycles...") - for k,v in rgroups_no_cycles.items(): - logging.info(f"\t{k}\t{len(v)}") - # do the ones that have no cycles first - rgroups = [rgroup for key, rgroup in sorted(rgroups_no_cycles.items())] - # core is always first - for res in _enumerate(rgroups, - fw, - pred_filter=pred_filter, - mw_filter=mw_filter, - hvy_filter=hvy_filter, + if len(g.dummies) > 1: + cycles.add(g.dummies) + rgroup_cycles[g.dummies].append(g) + else: + no_cycles.append(g) + + logging.info("Enumerating rgroups with no broken cycles...") + for k, v in rgroups_no_cycles.items(): + logging.info(f"\t{k}\t{len(v)}") + # do the ones that have no cycles first + rgroups = [rgroup for key, rgroup in sorted(rgroups_no_cycles.items())] + # core is always first + for res in _enumerate(rgroups, fw, pred_filter=pred_filter, mw_filter=mw_filter, + hvy_filter=hvy_filter, mol_filter=mol_filter): + yield res + + # iterate on rgroups with cycles + # basically only let one set of RGroups show up once. + indices = set() + for k in fw.rgroups: + if k[0] == "R": + indices.add(int(k[1:])) + if cycles: + logging.info("Enumerating rgroups with broken cycles...") + + for rgroup_indices in cycles: + missing = indices - set(rgroup_indices) + rgroups = {'Core': fw.rgroups['Core']} + rgroups["R%s" % ".".join([str(x) for x in rgroup_indices])] = rgroup_cycles[rgroup_indices] + for m in missing: + k = "R%s" % m + rgroups[k] = rgroups_no_cycles[k] + + for k, v in rgroups.items(): + logging.info(f"\t{k}\t{len(v)}") + + for res in _enumerate([rgroup for key, rgroup in sorted(rgroups.items())], fw, + pred_filter=pred_filter, mw_filter=mw_filter, hvy_filter=hvy_filter, mol_filter=mol_filter): - yield res + yield res - # iterate on rgroups with cycles - # basically only let one set of RGroups show up once. - indices = set() - for k in fw.rgroups: - if k[0] == "R": - indices.add(int(k[1:])) - if cycles: - logging.info("Enumerating rgroups with broken cycles...") - - for rgroup_indices in cycles: - missing = indices - set(rgroup_indices) - rgroups = {'Core': fw.rgroups['Core']} - rgroups["R%s"%".".join([str(x) for x in rgroup_indices])] = rgroup_cycles[rgroup_indices] - for m in missing: - k = "R%s"%m - rgroups[k] = rgroups_no_cycles[k] - - for k,v in rgroups.items(): - logging.info(f"\t{k}\t{len(v)}") - - for res in _enumerate([rgroup for key, rgroup in sorted(rgroups.items())], - fw, - pred_filter=pred_filter, - mw_filter=mw_filter, - hvy_filter=hvy_filter, - mol_filter=mol_filter): - yield res def _rgroup_sort(r): - """Sort groups like R1 R2 R10 not R1 R10 R2 + """Sort groups like R1 R2 R10 not R1 R10 R2 """ - if r[0] == "R": return ("R", int(r[1:])) - return (r, None) + if r[0] == "R": + return ("R", int(r[1:])) + return (r, None) + def predictions_to_csv(outstream, decomposition: FreeWilsonDecomposition, predictions): - """Output predictions in csv format to the output stream + """Output predictions in csv format to the output stream :param outstream: output stream to write results :param decomposition: freewillson decomposition :param predictions: list of Predictions to output """ - writer = None - for pred in predictions: - if not writer: - rgroups = set() - for rgroup in decomposition.rgroups: - rgroups.add(rgroup) - rgroups = sorted(rgroups, key=_rgroup_sort) + writer = None + for pred in predictions: + if not writer: + rgroups = set() + for rgroup in decomposition.rgroups: + rgroups.add(rgroup) + rgroups = sorted(rgroups, key=_rgroup_sort) + + lookup = {} + for i, rg in enumerate(rgroups): + lookup[rg] = i + writer = csv.writer(outstream) + header = ['smiles', 'prediction'] + [f"{rg}_smiles" for rg in list(rgroups)] + writer.writerow(header) + rg = [""] * len(lookup) + for s in pred.rgroups: + rg[lookup[s.rgroup]] = s.smiles + + row = [pred.smiles, repr(pred.prediction)] + rg + writer.writerow(row) + return header - lookup = {} - for i, rg in enumerate(rgroups): - lookup[rg] = i - writer = csv.writer(outstream) - header = ['smiles', 'prediction'] + [f"{rg}_smiles" for rg in list(rgroups)] - writer.writerow(header) - rg = [""] * len(lookup) - for s in pred.rgroups: - rg[lookup[s.rgroup]] = s.smiles - row = [pred.smiles, repr(pred.prediction)] + rg - writer.writerow(row) - return header - def test_freewilson(): - # some simple tests - from rdkit import Chem - from rdkit.Chem import Descriptors - assert dummypat.findall("C[*:1]N.[H][*:2]") == ['1', '2'] - assert dummypat.findall("C[*:1]N.[HH][*:2]") == ['1', '2'] - assert dummypat.findall("C[*:1]N.[2H][*:2]") == ['1', '2'] - assert dummypat.findall("C[*:1]N.[CH2][*:2]") == ['1', '2'] + # some simple tests + from rdkit import Chem + from rdkit.Chem import Descriptors + assert dummypat.findall("C[*:1]N.[H][*:2]") == ['1', '2'] + assert dummypat.findall("C[*:1]N.[HH][*:2]") == ['1', '2'] + assert dummypat.findall("C[*:1]N.[2H][*:2]") == ['1', '2'] + assert dummypat.findall("C[*:1]N.[CH2][*:2]") == ['1', '2'] - scaffold = Chem.MolFromSmiles("[*:2]c1cccnc1[*:1]") - mols = [Chem.MolFromSmiles("N"*(i+1) + "c1cccnc1"+"C"*(i+1)) for i in range(10)] - scores = [Descriptors.MolLogP(m) for m in mols] - fw = FWDecompose(scaffold, mols, scores) + scaffold = Chem.MolFromSmiles("[*:2]c1cccnc1[*:1]") + mols = [Chem.MolFromSmiles("N" * (i + 1) + "c1cccnc1" + "C" * (i + 1)) for i in range(10)] + scores = [Descriptors.MolLogP(m) for m in mols] + fw = FWDecompose(scaffold, mols, scores) + + for pred in FWBuild(fw, pred_filter=lambda x: -3 < x < 3, mw_filter=lambda mw: 100 < mw < 450, + hvy_filter=lambda hvy: 10 < hvy < 50, + mol_filter=lambda m: -3 < Descriptors.MolLogP(m) < 3): + rgroups = set() + for sidechain in pred.rgroups: + rgroups.add(sidechain.rgroup) + rgroups = sorted(rgroups) + assert list(rgroups) == ['Core', 'R1', 'R2'] - for pred in FWBuild(fw, - pred_filter=lambda x: -3 < x < 3, - mw_filter=lambda mw: 100 initial a Property + + def __init__(self, name, propIdx, minValue, maxValue, scaffoldoffset=0.0): + """name, propIdx, minValue, maxValue, scaffoldoffset -> initial a Property name is the name of the property. propIdx: the index of the property in the property vector minValue: the minimum acceptable value for the property maxValue: the maximum acceptable value for the property scaffoldoffset: any offset from the reaction scaffold (defaults to 0) """ - self.name = name - self.propIdx = propIdx - self.minValue = minValue - self.maxValue = maxValue - self.offset = scaffoldoffset - + self.name = name + self.propIdx = propIdx + self.minValue = minValue + self.maxValue = maxValue + self.offset = scaffoldoffset - def evaluate(self, sidechains): - """sidechains -> Evaluate a list of sidechains to see if they + def evaluate(self, sidechains): + """sidechains -> Evaluate a list of sidechains to see if they pass the property values. Each sidechain must have a property vector e.g. (s.props for s in sidechains) which is a vector of values where s.props[propIdx] is the property being inspected """ - product = self.offset - propIdx = self.propIdx - for s in sidechains: - product += s.props[propIdx] - return self.minValue <= product <= self.maxValue + product = self.offset + propIdx = self.propIdx + for s in sidechains: + product += s.props[propIdx] + return self.minValue <= product <= self.maxValue + class Sidechain: - """Holds the name (identifier) and property list for the + """Holds the name (identifier) and property list for the given sidechain/fragment. Properties are assumed to be numerical values""" - def __init__(self, name, props, goodCount=0, **extra_data): - """name, props, goodCount=0 -> initialize a Sidechain + + def __init__(self, name, props, goodCount=0, **extra_data): + """name, props, goodCount=0 -> initialize a Sidechain initialize a sidechain. name: the unique name for the sidechain props: the property vector (see Properties class for details) @@ -83,193 +88,195 @@ class Sidechain: a good product, where good is a product that is in the desired property space. """ - self.name = name - self.props = props - self.good_count = goodCount # shared variable - self.dropped = False # shared variable - self.extra_data = extra_data + self.name = name + self.props = props + self.good_count = goodCount # shared variable + self.dropped = False # shared variable + self.extra_data = extra_data + + def data(self): + return self.extra_data + + def __str__(self): + return "Sidechain %s(%s, goodCount=%s, **%r)" % (self.name, self.props, self.good_count, + self.extra_data) + + def __repr__(self): + return "Sidechain(%r, %r, %s, **%r)" % (self.name, self.props, self.good_count, self.extra_data) + - def data(self): - return self.extra_data - - def __str__(self): - return "Sidechain %s(%s, goodCount=%s, **%r)"%(self.name, - self.props, self.good_count, self.extra_data) - def __repr__(self): - return "Sidechain(%r, %r, %s, **%r)"%(self.name, self.props, self.good_count, self.extra_data) - class RGroups: - """Holds a collection of sidechains for the given RGroup""" - def __init__(self, sidechains): - """Sidechains -> RGroups + """Holds a collection of sidechains for the given RGroup""" + + def __init__(self, sidechains): + """Sidechains -> RGroups sidechains: the list of Sidechains that make up the potential sidechains at this rgroup position""" - self.sidechains = sidechains - - self.rejected = [] # list of rejected sidechains - self.initial_size = len(sidechains) - - def count(self): - """Returns the number of possible sidechains""" - return len(self.sidechains) + self.sidechains = sidechains - def randomize(self): - """Randomly shuffles the sidechains and reset the goodness counts""" - random.shuffle(self.sidechains) - for s in self.sidechains: - s.good_count = 0 + self.rejected = [] # list of rejected sidechains + self.initial_size = len(sidechains) - def effectiveness(self): - """-> return the current effectiveness of this collection + def count(self): + """Returns the number of possible sidechains""" + return len(self.sidechains) + + def randomize(self): + """Randomly shuffles the sidechains and reset the goodness counts""" + random.shuffle(self.sidechains) + for s in self.sidechains: + s.good_count = 0 + + def effectiveness(self): + """-> return the current effectiveness of this collection effectiveness is the number of items left divided by the initial amount""" - - return len(self.sidechains)/float(self.initial_size) - - def chunk_size(self, num_chunks): - """num_chunks -> return the number of sidechains in each chunk - if the sidechains are split into num_chunks chunks""" - return int(math.ceil(float(len(self.sidechains))/num_chunks)) - - def chunk(self, chunk_idx, num_chunks): - """chunk_idx, num)chunks -> RGroups - return the chunk_idxth chunk given num_chunks total chunks""" - assert chunk_idx >=0 and chunk_idx < num_chunks, "%s %s"%( - chunk_idx, num_chunks) - - n = self.chunk_size(num_chunks) - return RGroups(self.sidechains[chunk_idx*n:(chunk_idx+1)*n]) - def prune(self, fractionToKeep): - """fractionToKeep -> Sort the sidechains from the most often + return len(self.sidechains) / float(self.initial_size) + + def chunk_size(self, num_chunks): + """num_chunks -> return the number of sidechains in each chunk + if the sidechains are split into num_chunks chunks""" + return int(math.ceil(float(len(self.sidechains)) / num_chunks)) + + def chunk(self, chunk_idx, num_chunks): + """chunk_idx, num)chunks -> RGroups + return the chunk_idxth chunk given num_chunks total chunks""" + assert chunk_idx >= 0 and chunk_idx < num_chunks, "%s %s" % (chunk_idx, num_chunks) + + n = self.chunk_size(num_chunks) + return RGroups(self.sidechains[chunk_idx * n:(chunk_idx + 1) * n]) + + def prune(self, fractionToKeep): + """fractionToKeep -> Sort the sidechains from the most often found if good products to the least, and keep the best fractionToKeep percentage""" - assert 0 < fractionToKeep <= 1.0, "fractionToKeep: %s"%fractionToKeep + assert 0 < fractionToKeep <= 1.0, "fractionToKeep: %s" % fractionToKeep + + self.sidechains.sort(key=lambda x: x.good_count, reverse=True) + fragment_index = int(len(self.sidechains) * fractionToKeep + 0.5) + + # update rejected set + self.rejected += self.sidechains[fragment_index:] + self.sidechains = self.sidechains[:fragment_index] - self.sidechains.sort(key=lambda x: x.good_count, reverse=True) - fragment_index = int(len(self.sidechains) * fractionToKeep + 0.5) - # update rejected set - self.rejected += self.sidechains[fragment_index:] - self.sidechains = self.sidechains[:fragment_index] - class Library: - """A library is a collection of RGroups that need to be combinitorially + """A library is a collection of RGroups that need to be combinitorially combined""" - def __init__(self, rgroups): - """rgroups -> Initialize the Library. + + def __init__(self, rgroups): + """rgroups -> Initialize the Library. rgroups: the list of possible RGroups that is combinitorially combined to make the library""" - self.rgroups = rgroups - - def isValid(self): - """If we have an empty set for any rgroup, return False""" - for rg in self.rgroups: - if len(rg.sidechains) == 0: - return False - return True - - def randomize(self): - """randomize the order of the sidechains""" - for rg in self.rgroups: - rg.randomize() + self.rgroups = rgroups - def getSidechainsPerPartition( self, total_num_partitions_per_rgroup ): - """total_num_partitions -> [num_fragments/partition for rgroup1, + def isValid(self): + """If we have an empty set for any rgroup, return False""" + for rg in self.rgroups: + if len(rg.sidechains) == 0: + return False + return True + + def randomize(self): + """randomize the order of the sidechains""" + for rg in self.rgroups: + rg.randomize() + + def getSidechainsPerPartition(self, total_num_partitions_per_rgroup): + """total_num_partitions -> [num_fragments/partition for rgroup1, num_fragments/partition for rgroup2] return the number of sidechains in a partition for each rgroup""" - sizes = [ (libIdx, max(rg.count()//total_num_partitions_per_rgroup, 1)) - for libIdx, rg in enumerate(self.rgroups) ] + sizes = [(libIdx, max(rg.count() // total_num_partitions_per_rgroup, 1)) + for libIdx, rg in enumerate(self.rgroups)] - # "optimally" apportion the partitions according the - # the glare paper see Appendix eq (8) and (9) - # sort by size - sizes.sort(key=lambda sz: sz[1]) - last_size = 1 - opt_sizes = [] - for libIdx, current_size in sizes[:-1]: - opt_sizes.append( (libIdx, - current_size - (current_size % last_size)) ) - last_size = current_size + # "optimally" apportion the partitions according the + # the glare paper see Appendix eq (8) and (9) + # sort by size + sizes.sort(key=lambda sz: sz[1]) + last_size = 1 + opt_sizes = [] + for libIdx, current_size in sizes[:-1]: + opt_sizes.append((libIdx, current_size - (current_size % last_size))) + last_size = current_size - # From the Glare paper: - # the last library size is set equal to the second to last - # From Table 3, it is easy to understand that, if the fourth dimension - # was split in 24 instead of 12, a factor of 2 would be gained from the - # reduced size of the sublibraries. However, twice as many sublibraries - # would be needed, and the net speedup would be null, hence, the decision to - # set p4=p3. (p4 here is the last library) - libIdx, current_size = sizes[-1] - opt_sizes.append((libIdx, last_size)) - # back to the original library order - opt_sizes.sort() - res = [size for libIdx, size in opt_sizes] - return res + # From the Glare paper: + # the last library size is set equal to the second to last + # From Table 3, it is easy to understand that, if the fourth dimension + # was split in 24 instead of 12, a factor of 2 would be gained from the + # reduced size of the sublibraries. However, twice as many sublibraries + # would be needed, and the net speedup would be null, hence, the decision to + # set p4=p3. (p4 here is the last library) + libIdx, current_size = sizes[-1] + opt_sizes.append((libIdx, last_size)) + # back to the original library order + opt_sizes.sort() + res = [size for libIdx, size in opt_sizes] + return res - def chunk(self, num_partitions): - """num_partitions -> [Library(..), Library(...)] + def chunk(self, num_partitions): + """num_partitions -> [Library(..), Library(...)] Return new libraries that are chunks of this one. These are the libraries that get sampled to see of sidechains participate in good products. """ - partitions = self.getSidechainsPerPartition(num_partitions) - max_subsets = max(partitions) - enumeration_indices = [] - for i in range(max_subsets): - combinations = [] - for size in partitions: - combinations.append( i % size ) - enumeration_indices.append( combinations ) - - library_sets = [] - for subset_index, combinations in enumerate(enumeration_indices): - libs = [] - partitioned_rgroups = [] - for lib_index, libpart_index in enumerate(combinations): - lib = self.rgroups[lib_index] - num_chunks = partitions[lib_index] - partitioned_rgroups.append( lib.chunk(chunk_idx=libpart_index, - num_chunks=num_chunks)) - lib = Library(partitioned_rgroups) - if lib.isValid(): - library_sets.append(lib) + partitions = self.getSidechainsPerPartition(num_partitions) + max_subsets = max(partitions) + enumeration_indices = [] + for i in range(max_subsets): + combinations = [] + for size in partitions: + combinations.append(i % size) + enumeration_indices.append(combinations) - return library_sets + library_sets = [] + for subset_index, combinations in enumerate(enumeration_indices): + libs = [] + partitioned_rgroups = [] + for lib_index, libpart_index in enumerate(combinations): + lib = self.rgroups[lib_index] + num_chunks = partitions[lib_index] + partitioned_rgroups.append(lib.chunk(chunk_idx=libpart_index, num_chunks=num_chunks)) + lib = Library(partitioned_rgroups) + if lib.isValid(): + library_sets.append(lib) - def effectiveness(self): - """-> returns the average effectiveness of this library set""" - sum = 0.0 - for rg in self.rgroups: - sum += rg.effectiveness() - return sum/len(self.rgroups) - - def evaluate(self, props): - """props -> num_good_enumerations, total_enumerations + return library_sets + + def effectiveness(self): + """-> returns the average effectiveness of this library set""" + sum = 0.0 + for rg in self.rgroups: + sum += rg.effectiveness() + return sum / len(self.rgroups) + + def evaluate(self, props): + """props -> num_good_enumerations, total_enumerations props: a list of Property evaluators for the fragments. returns the number of good enumerations and the total number of enumerations for this Library""" - frags = [rg.sidechains for rg in self.rgroups] - good = 0 - bad = 0 - for i,frag in enumerate(itertools.product(*frags)): - for p in props: - if not p.evaluate(frag): - bad += 1 - break - else: - good += 1 - for sidechain in frag: - sidechain.good_count += 1 - return good, i+1 - - + frags = [rg.sidechains for rg in self.rgroups] + good = 0 + bad = 0 + for i, frag in enumerate(itertools.product(*frags)): + for p in props: + if not p.evaluate(frag): + bad += 1 + break + else: + good += 1 + for sidechain in frag: + sidechain.good_count += 1 + return good, i + 1 + + class Glare: - """Glare Algorithm. Implementation of + """Glare Algorithm. Implementation of GLARE: A New Approach for Filtering Large Reagent Lists in Combinatorial Library Design Using Product Properties @@ -291,171 +298,171 @@ class Glare: glare = Glare() glare.optimize(lib, props) """ - def __init__(self, - desiredFinalGoodness=0.95, - maxIterations=100, - rgroupScale=6.0, # None if no scaling - initialFraction=None,#None=auto -100., - numPartitions=16): - self.fractionGood = self.desiredFinalGoodness = desiredFinalGoodness - self.maxIterations = maxIterations - self.rgroupScale = rgroupScale - - if initialFraction is not None: - self.initialFraction = initialFraction/100. - else: - self.initialFraction = initialFraction - self.numPartitions = numPartitions - - def optimize(self, library, props): - """library, props + + def __init__( + self, + desiredFinalGoodness=0.95, + maxIterations=100, + rgroupScale=6.0, # None if no scaling + initialFraction=None, #None=auto -100., + numPartitions=16): + self.fractionGood = self.desiredFinalGoodness = desiredFinalGoodness + self.maxIterations = maxIterations + self.rgroupScale = rgroupScale + + if initialFraction is not None: + self.initialFraction = initialFraction / 100. + else: + self.initialFraction = initialFraction + self.numPartitions = numPartitions + + def optimize(self, library, props): + """library, props Given a Library and the list of Propery evaluators, optimize the library. The library is modified in place by removing building blocks (sidechains) that are not likely to pass the property criteria. """ - # attempt to generate report like glare application - print ("------- PARAMETERS: --------------") - print ("GOOODNESS THRESHOLD : %s%%"%(self.desiredFinalGoodness * 100)) - print ("MIN PARTITION SIZE : %s"%self.numPartitions) - if self.initialFraction is None or self.initialFraction > 0.999: - print ("INITIAL FRACTION TO KEEP : AUTOMATIC") + # attempt to generate report like glare application + print("------- PARAMETERS: --------------") + print("GOOODNESS THRESHOLD : %s%%" % (self.desiredFinalGoodness * 100)) + print("MIN PARTITION SIZE : %s" % self.numPartitions) + if self.initialFraction is None or self.initialFraction > 0.999: + print("INITIAL FRACTION TO KEEP : AUTOMATIC") + else: + print("INITIAL FRACTION TO KEEP : %s%%" % (self.initialFraction * 100)) + print("Actual SIZE : %s = %s" % + (" x ".join([str(len(rg.sidechains)) for rg in library.rgroups + ]), reduce(operator.mul, [len(rg.sidechains) for rg in library.rgroups]))) + + running_total = 0.0 + Gt = self.desiredFinalGoodness + + for iteration in range(1, self.maxIterations + 1): + # chunk of the total library into smaller more manageable sets + # and run combinatorial analysis on the sub libraries + # each of these records the number of times a sidechain is used + # in a successful enumeration which is then used to prune the + # library at the end + # + for rg in library.rgroups: + rg.randomize() + + good = total = 0.0 + chunked_libs = library.chunk(self.numPartitions) + # for each chunk, do the combinatorial check to see + # if reagents make good products + for libidx, chunk in enumerate(chunked_libs): + g, t = chunk.evaluate(props) + good += g + total += t + running_total += total + Gi = good / total # current goodness + + if Gi < 1e-12: + # I think we're done here :) + fraction = 0.0 + elif iteration == 1: + G0 = Gi # Goodness at first iteration + + # the first time, use the initalFraction or a "good enough" + # value + if self.initialFraction is not None: + fraction = K0 = self.initialFraction else: - print ("INITIAL FRACTION TO KEEP : %s%%"%(self.initialFraction*100)) - print ("Actual SIZE : %s = %s"%( - " x ".join([str(len(rg.sidechains)) for rg in library.rgroups]), - reduce(operator.mul, [len(rg.sidechains) for rg in library.rgroups]) - )) - - running_total = 0.0 - Gt = self.desiredFinalGoodness + # auto choose the fraction based on the current good percentage + # and the desired + fraction = K0 = min(-1.1 * (Gt - G0) + 1.2, 0.9) + else: + # the second time, gradually eliminate reagents slowing + # down as the number of iterations increases + # see equation (5) in reference + if abs(Gt - G0) < 1e-4: + Ki = 1.0 + else: + Ki = (1.0 - K0) * (Gi - G0) / (Gt - G0) + K0 + fraction = min(1.0, Ki) - for iteration in range(1, self.maxIterations+1): - # chunk of the total library into smaller more manageable sets - # and run combinatorial analysis on the sub libraries - # each of these records the number of times a sidechain is used - # in a successful enumeration which is then used to prune the - # library at the end - # - for rg in library.rgroups: - rg.randomize() - - good = total = 0.0 - chunked_libs = library.chunk(self.numPartitions) - # for each chunk, do the combinatorial check to see - # if reagents make good products - for libidx, chunk in enumerate(chunked_libs): - g,t = chunk.evaluate(props) - good += g - total += t - running_total += total - Gi = good/total # current goodness - - if Gi < 1e-12: - # I think we're done here :) - fraction = 0.0 - elif iteration == 1: - G0 = Gi # Goodness at first iteration + # prune the library to keep the highest occurring sidechains + # note that even if all sidechains are acceptable, + # some will always get pruned - # the first time, use the initalFraction or a "good enough" - # value - if self.initialFraction is not None: - fraction = K0 = self.initialFraction - else: - # auto choose the fraction based on the current good percentage - # and the desired - fraction = K0 = min(-1.1 * ( Gt - G0) + 1.2, - 0.9) - else: - # the second time, gradually eliminate reagents slowing - # down as the number of iterations increases - # see equation (5) in reference - if abs(Gt-G0) < 1e-4: - Ki = 1.0 - else: - Ki = (1.0 - K0) * (Gi - G0) / (Gt - G0) + K0 - fraction = min(1.0, Ki) + max_size = float(max([len(rg.sidechains) for rg in library.rgroups])) + for rg in library.rgroups: + scale = 1.0 + if self.rgroupScale is not None: + # scale differently size rgroups via equation (6) in paper + numSidechains = len(rg.sidechains) + numer = 1.0 + denom = 1.0 + math.exp(-self.rgroupScale * ((numSidechains / max_size) - 0.5)) + scale = numer / denom + fraction_to_reject = (1.0 - fraction) * scale + # keep the best fraction... + rg.prune(1.0 - fraction_to_reject) - # prune the library to keep the highest occurring sidechains - # note that even if all sidechains are acceptable, - # some will always get pruned - - max_size = float(max([len(rg.sidechains) for rg in library.rgroups])) - for rg in library.rgroups: - scale = 1.0 - if self.rgroupScale is not None: - # scale differently size rgroups via equation (6) in paper - numSidechains = len(rg.sidechains) - numer = 1.0 - denom = 1.0 + math.exp(-self.rgroupScale * - ((numSidechains/max_size) - 0.5)) - scale = numer/denom - fraction_to_reject = (1.0 - fraction) * scale - # keep the best fraction... - rg.prune(1.0 - fraction_to_reject) - - print ("-------------- ITERATION : %s ----------------------"%iteration) - print ("GOODNESS : %s%%"%(Gi * 100)) - print ("NUMBER EVAL : %s"%(total)) - print ("CUMUL EVAL : %s"%(running_total)) - print ("KEPT IN STEP : %s%%"%(fraction*100.)) - if not iteration: - print ("GOODNESS THRESHOLD : %s"%self.desiredFinalGoodness) - print ("MIN PARTITION SIZE : %s"%self.numPartitions) - print ("INITIAL FRACTION TO KEEP : ") - if self.fractionToKeep > 0.999: - print ("AUTOMATIC") - else: - print ("%s%%"%self.fractionToKeep) + print("-------------- ITERATION : %s ----------------------" % iteration) + print("GOODNESS : %s%%" % (Gi * 100)) + print("NUMBER EVAL : %s" % (total)) + print("CUMUL EVAL : %s" % (running_total)) + print("KEPT IN STEP : %s%%" % (fraction * 100.)) + if not iteration: + print("GOODNESS THRESHOLD : %s" % self.desiredFinalGoodness) + print("MIN PARTITION SIZE : %s" % self.numPartitions) + print("INITIAL FRACTION TO KEEP : ") + if self.fractionToKeep > 0.999: + print("AUTOMATIC") + else: + print("%s%%" % self.fractionToKeep) - print ("Actual SIZE : %s = %s"%( - " x ".join([str(len(rg.sidechains)) for rg in library.rgroups]), - reduce(operator.mul, [len(rg.sidechains) for rg in library.rgroups]) - )) - print ("EFFECTIVENESS : %s%%"%(library.effectiveness()*100.)) + print("Actual SIZE : %s = %s" % + (" x ".join([str(len(rg.sidechains)) for rg in library.rgroups + ]), reduce(operator.mul, [len(rg.sidechains) for rg in library.rgroups]))) + print("EFFECTIVENESS : %s%%" % (library.effectiveness() * 100.)) - # stopping criteria - if iteration and Gi < 1e-12: - return - elif abs(Gi - self.desiredFinalGoodness) < 0.001 or \ - Gi > self.desiredFinalGoodness: - return + # stopping criteria + if iteration and Gi < 1e-12: + return + elif abs(Gi - self.desiredFinalGoodness) < 0.001 or \ + Gi > self.desiredFinalGoodness: + return -###################################################################### + +###################################################################### # testing codes def makeFakeProps(): - mw = random.randint(10,500) - alogp = random.randint(-10,10) - tpsa = random.randint(0,180) - return [mw, alogp, tpsa] + mw = random.randint(10, 500) + alogp = random.randint(-10, 10) + tpsa = random.randint(0, 180) + return [mw, alogp, tpsa] + def makeFakeSidechains(lib, num): - res = [] - for i in range(num): - res.append(Sidechain(lib + "_" + str(i), makeFakeProps())) - return res + res = [] + for i in range(num): + res.append(Sidechain(lib + "_" + str(i), makeFakeProps())) + return res + def testGlare(): - a = RGroups(makeFakeSidechains("aldehydes", 1000)) - b = RGroups(makeFakeSidechains("boronic_acids", 1500)) - - lib = Library([a,b]) - props = [ - Property("mw", 0, 0, 500, 230.1419), - Property("alogp", 1, -2.4, 5, 2.212749), - Property("tpsa", 2, 0, 90, 24.5) - ] - - glare = Glare() - glare.optimize(lib, props) - # print out the selected reactants - for reactant_idx, rgroup in enumerate(lib.rgroups): - print(f"Reactants for reactant {reactant_idx}") - for reactant in rgroup.sidechains: - print(reactant.name) + a = RGroups(makeFakeSidechains("aldehydes", 1000)) + b = RGroups(makeFakeSidechains("boronic_acids", 1500)) + + lib = Library([a, b]) + props = [ + Property("mw", 0, 0, 500, 230.1419), + Property("alogp", 1, -2.4, 5, 2.212749), + Property("tpsa", 2, 0, 90, 24.5) + ] + + glare = Glare() + glare.optimize(lib, props) + # print out the selected reactants + for reactant_idx, rgroup in enumerate(lib.rgroups): + print(f"Reactants for reactant {reactant_idx}") + for reactant in rgroup.sidechains: + print(reactant.name) + if __name__ == "__main__": - testGlare() - - + testGlare() diff --git a/Contrib/IFG/ifg.py b/Contrib/IFG/ifg.py index 21a01b71a..ceab5ecbf 100644 --- a/Contrib/IFG/ifg.py +++ b/Contrib/IFG/ifg.py @@ -5,6 +5,8 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. +from collections import namedtuple + # # # Richard hall 2017 @@ -13,21 +15,22 @@ # refine output function # astex_ifg: identify functional groups a la Ertl, J. Cheminform (2017) 9:36 from rdkit import Chem -from collections import namedtuple + def merge(mol, marked, aset): - bset = set() - for idx in aset: - atom = mol.GetAtomWithIdx(idx) - for nbr in atom.GetNeighbors(): - jdx = nbr.GetIdx() - if jdx in marked: - marked.remove(jdx) - bset.add(jdx) - if not bset: - return - merge(mol, marked, bset) - aset.update(bset) + bset = set() + for idx in aset: + atom = mol.GetAtomWithIdx(idx) + for nbr in atom.GetNeighbors(): + jdx = nbr.GetIdx() + if jdx in marked: + marked.remove(jdx) + bset.add(jdx) + if not bset: + return + merge(mol, marked, bset) + aset.update(bset) + # atoms connected by non-aromatic double or triple bond to any heteroatom # c=O should not match (see fig1, box 15). I think using A instead of * should sort that out? @@ -41,65 +44,70 @@ PATT_OXIRANE_ETC = Chem.MolFromSmarts('[O,N,S]1CC1') PATT_TUPLE = (PATT_DOUBLE_TRIPLE, PATT_CC_DOUBLE_TRIPLE, PATT_ACETAL, PATT_OXIRANE_ETC) + def identify_functional_groups(mol): - marked = set() -#mark all heteroatoms in a molecule, including halogens - for atom in mol.GetAtoms(): - if atom.GetAtomicNum() not in (6,1): # would we ever have hydrogen? - marked.add(atom.GetIdx()) + marked = set() + #mark all heteroatoms in a molecule, including halogens + for atom in mol.GetAtoms(): + if atom.GetAtomicNum() not in (6, 1): # would we ever have hydrogen? + marked.add(atom.GetIdx()) #mark the four specific types of carbon atom - for patt in PATT_TUPLE: - for path in mol.GetSubstructMatches(patt): - for atomindex in path: - marked.add(atomindex) + for patt in PATT_TUPLE: + for path in mol.GetSubstructMatches(patt): + for atomindex in path: + marked.add(atomindex) #merge all connected marked atoms to a single FG - groups = [] - while marked: - grp = set([marked.pop()]) - merge(mol, marked, grp) - groups.append(grp) + groups = [] + while marked: + grp = set([marked.pop()]) + merge(mol, marked, grp) + groups.append(grp) + #extract also connected unmarked carbon atoms - ifg = namedtuple('IFG', ['atomIds', 'atoms', 'type']) - ifgs = [] - for g in groups: - uca = set() - for atomidx in g: - for n in mol.GetAtomWithIdx(atomidx).GetNeighbors(): - if n.GetAtomicNum() == 6: - uca.add(n.GetIdx()) - ifgs.append(ifg(atomIds=tuple(list(g)), atoms=Chem.MolFragmentToSmiles(mol, g, canonical=True), type=Chem.MolFragmentToSmiles(mol, g.union(uca),canonical=True))) - return ifgs + ifg = namedtuple('IFG', ['atomIds', 'atoms', 'type']) + ifgs = [] + for g in groups: + uca = set() + for atomidx in g: + for n in mol.GetAtomWithIdx(atomidx).GetNeighbors(): + if n.GetAtomicNum() == 6: + uca.add(n.GetIdx()) + ifgs.append( + ifg(atomIds=tuple(list(g)), atoms=Chem.MolFragmentToSmiles(mol, g, canonical=True), + type=Chem.MolFragmentToSmiles(mol, g.union(uca), canonical=True))) + return ifgs + def main(): - for ix, smiles in enumerate([ - 'Cc1nc(NS(=O)(=O)c2ccc(N)cc2)nc(C)c1', # fig1, 1 - 'NC(=N)c1ccc(C=Cc2ccc(cc2O)C(=N)N)cc1', # 2 - 'CC(=O)Nc1nnc(s1)S(=O)(=O)N', # 3 - 'NS(=O)(=O)c1cc2c(NCNS2(=O)=O)cc1Cl', # 4 - 'CNC1=Nc2ccc(Cl)cc2C(=N(=O)C1)c3ccccc3', # 5 - 'Cc1onc(c1C(=O)NC2C3SC(C)(C)C(N3C2=O)C(=O)O)c4ccccc4', # 6 - 'Clc1ccccc1C2=NCC(=O)Nc3ccc(cc23)N(=O)=O', # 7 - 'COc1cc(cc(C(=O)NCC2CCCN2CC=C)c1OC)S(=O)(=O)N', # 8 - 'Cc1ccc(Cl)c(Nc2ccccc2C(=O)O)c1Cl', # 9 - 'Clc1ccc2Oc3ccccc3N=C(N4CCNCC4)c2c1', # 10 - there is a discrepancy with the paper here! I wonder if Peter has the ring as aromatic? - 'FC(F)(F)CN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13', # 11 - 'OCC1OC(CC1O)n2cnc3C(O)CNC=Nc32', # 12 - 'CCNC1CC(C)S(=O)(=O)c2sc(cc12)S(=O)(=O)N', # 13 - 'CC(O)C1C2C(C)C(=C(N2C1=O)C(=O)O)SC3CNC(C3)C(=O)N(C)C', # 14 - 'CC1CN(CC(C)N1)c2c(F)c(N)c3c(=O)c(cn(C4CC4)c3c2F)C(=O)O', # 15 - 'CC(=CCC1C(=O)N(N(C1=O)c2ccccc2)c3ccccc3)C', # 16 - 'Clc1ccc2N=C3NC(=O)CN3Cc2c1Cl', # 17 - 'CC(=O)NC1C(NC(=N)N)C=C(OC1C(O)C(O)CO)C(=O)O', # 18 - 'CC(O)C(O)C1CNc2nc(N)nc(O)c2N1', # 19 - 'NC1CCCCN(C1)c2c(Cl)cc3c(=O)c(cn(C4CC4)c3c2Cl)C(=O)O', # 20 - ]): - m = Chem.MolFromSmiles(smiles) - fgs = identify_functional_groups(m) - print('%2d: %d fgs'%(ix+1, len(fgs)), fgs) + for ix, smiles in enumerate([ + 'Cc1nc(NS(=O)(=O)c2ccc(N)cc2)nc(C)c1', # fig1, 1 + 'NC(=N)c1ccc(C=Cc2ccc(cc2O)C(=N)N)cc1', # 2 + 'CC(=O)Nc1nnc(s1)S(=O)(=O)N', # 3 + 'NS(=O)(=O)c1cc2c(NCNS2(=O)=O)cc1Cl', # 4 + 'CNC1=Nc2ccc(Cl)cc2C(=N(=O)C1)c3ccccc3', # 5 + 'Cc1onc(c1C(=O)NC2C3SC(C)(C)C(N3C2=O)C(=O)O)c4ccccc4', # 6 + 'Clc1ccccc1C2=NCC(=O)Nc3ccc(cc23)N(=O)=O', # 7 + 'COc1cc(cc(C(=O)NCC2CCCN2CC=C)c1OC)S(=O)(=O)N', # 8 + 'Cc1ccc(Cl)c(Nc2ccccc2C(=O)O)c1Cl', # 9 + 'Clc1ccc2Oc3ccccc3N=C(N4CCNCC4)c2c1', # 10 - there is a discrepancy with the paper here! I wonder if Peter has the ring as aromatic? + 'FC(F)(F)CN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13', # 11 + 'OCC1OC(CC1O)n2cnc3C(O)CNC=Nc32', # 12 + 'CCNC1CC(C)S(=O)(=O)c2sc(cc12)S(=O)(=O)N', # 13 + 'CC(O)C1C2C(C)C(=C(N2C1=O)C(=O)O)SC3CNC(C3)C(=O)N(C)C', # 14 + 'CC1CN(CC(C)N1)c2c(F)c(N)c3c(=O)c(cn(C4CC4)c3c2F)C(=O)O', # 15 + 'CC(=CCC1C(=O)N(N(C1=O)c2ccccc2)c3ccccc3)C', # 16 + 'Clc1ccc2N=C3NC(=O)CN3Cc2c1Cl', # 17 + 'CC(=O)NC1C(NC(=N)N)C=C(OC1C(O)C(O)CO)C(=O)O', # 18 + 'CC(O)C(O)C1CNc2nc(N)nc(O)c2N1', # 19 + 'NC1CCCCN(C1)c2c(Cl)cc3c(=O)c(cn(C4CC4)c3c2Cl)C(=O)O', # 20 + ]): + m = Chem.MolFromSmiles(smiles) + fgs = identify_functional_groups(m) + print('%2d: %d fgs' % (ix + 1, len(fgs)), fgs) if __name__ == "__main__": - main() + main() diff --git a/Contrib/LEF/AddLabels.py b/Contrib/LEF/AddLabels.py index 148efdb67..1bbdc03d3 100644 --- a/Contrib/LEF/AddLabels.py +++ b/Contrib/LEF/AddLabels.py @@ -1,19 +1,19 @@ # # Copyright (c) 2009, Novartis Institutes for BioMedical Research Inc. # All rights reserved. -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are -# met: +# met: # -# * Redistributions of source code must retain the above copyright +# * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided # with the distribution. -# * Neither the name of Novartis Institutes for BioMedical Research Inc. -# nor the names of its contributors may be used to endorse or promote +# * Neither the name of Novartis Institutes for BioMedical Research Inc. +# nor the names of its contributors may be used to endorse or promote # products derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS @@ -30,9 +30,12 @@ # # Created by Greg Landrum and Anna Vulpetti, March 2009 +import pickle +import re +import sys + from rdkit import Chem from rdkit.Chem import BRICS -import sys, pickle, re inF = file(sys.argv[1], 'r') inLs = inF.readlines() diff --git a/Contrib/LEF/ClusterFps.py b/Contrib/LEF/ClusterFps.py index f72b01644..d07e6843a 100644 --- a/Contrib/LEF/ClusterFps.py +++ b/Contrib/LEF/ClusterFps.py @@ -1,19 +1,19 @@ # # Copyright (c) 2009, Novartis Institutes for BioMedical Research Inc. # All rights reserved. -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are -# met: +# met: # -# * Redistributions of source code must retain the above copyright +# * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided # with the distribution. -# * Neither the name of Novartis Institutes for BioMedical Research Inc. -# nor the names of its contributors may be used to endorse or promote +# * Neither the name of Novartis Institutes for BioMedical Research Inc. +# nor the names of its contributors may be used to endorse or promote # products derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS @@ -30,9 +30,11 @@ # # Created by Greg Landrum and Anna Vulpetti, March 2009 -from rdkit.ML.Cluster import Butina +import pickle +import sys + from rdkit import DataStructs -import sys, pickle +from rdkit.ML.Cluster import Butina # sims is the list of similarity thresholds used to generate clusters sims = [.9, .8, .7, .6] diff --git a/Contrib/LEF/CreateFps.py b/Contrib/LEF/CreateFps.py index d5c43781d..105ee4426 100644 --- a/Contrib/LEF/CreateFps.py +++ b/Contrib/LEF/CreateFps.py @@ -1,19 +1,19 @@ # # Copyright (c) 2009, Novartis Institutes for BioMedical Research Inc. # All rights reserved. -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are -# met: +# met: # -# * Redistributions of source code must retain the above copyright +# * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided # with the distribution. -# * Neither the name of Novartis Institutes for BioMedical Research Inc. -# nor the names of its contributors may be used to endorse or promote +# * Neither the name of Novartis Institutes for BioMedical Research Inc. +# nor the names of its contributors may be used to endorse or promote # products derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS @@ -30,11 +30,12 @@ # # Created by Greg Landrum and Anna Vulpetti, March 2009 +import pickle +import sys from rdkit import Chem from rdkit.Chem import AllChem from rdkit.Chem.AtomPairs import Pairs, Torsions -import sys, pickle # maxPathLength is the maximum path length in atoms # maxPathLength=6 corresponds to F-FP-5 @@ -47,11 +48,13 @@ maxPathLength = 8 nameField = 'Compound_orig' #nameField = '_Name' -extraQueries = (('SCF3?', Chem.MolFromSmarts('SC(F)(F)F')), - ('COCF3?', Chem.MolFromSmarts('C(=O)C(F)(F)F')), - ('OCF3?', Chem.MolFromSmarts('OC(F)(F)F')), - ('NCF3?', Chem.MolFromSmarts('NC(F)(F)F')), - ('CF3?', Chem.MolFromSmarts('C(F)(F)F')), ) +extraQueries = ( + ('SCF3?', Chem.MolFromSmarts('SC(F)(F)F')), + ('COCF3?', Chem.MolFromSmarts('C(=O)C(F)(F)F')), + ('OCF3?', Chem.MolFromSmarts('OC(F)(F)F')), + ('NCF3?', Chem.MolFromSmarts('NC(F)(F)F')), + ('CF3?', Chem.MolFromSmarts('C(F)(F)F')), +) def GetMolFingerprint(mol, maxPathLength): @@ -95,8 +98,8 @@ if __name__ == '__main__': pickle.dump(colNames, outF) pickle.dump(fps, outF) - print('name1 smiles1 name2 smiles2 name12 smiles12 environment_id ' + ' '.join( - [x for x, y in extraQueries])) + print('name1 smiles1 name2 smiles2 name12 smiles12 environment_id ' + + ' '.join([x for x, y in extraQueries])) if 1: seen = [] smis = [] diff --git a/Contrib/LEF/DistancePlot.py b/Contrib/LEF/DistancePlot.py index e686ddcb6..fcd20ec0f 100644 --- a/Contrib/LEF/DistancePlot.py +++ b/Contrib/LEF/DistancePlot.py @@ -1,19 +1,19 @@ # # Copyright (c) 2009, Novartis Institutes for BioMedical Research Inc. # All rights reserved. -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are -# met: +# met: # -# * Redistributions of source code must retain the above copyright +# * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided # with the distribution. -# * Neither the name of Novartis Institutes for BioMedical Research Inc. -# nor the names of its contributors may be used to endorse or promote +# * Neither the name of Novartis Institutes for BioMedical Research Inc. +# nor the names of its contributors may be used to endorse or promote # products derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS @@ -29,10 +29,11 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Created by Greg Landrum and Anna Vulpetti, March 2009 -from rdkit import Chem -from rdkit import DataStructs from CreateFps import GetMolFingerprint + +from rdkit import Chem, DataStructs from rdkit.RDLogger import logger + logger = logger() import sys diff --git a/Contrib/LEF/DistancePredict.py b/Contrib/LEF/DistancePredict.py index b7b33e7e0..4ad9f7453 100644 --- a/Contrib/LEF/DistancePredict.py +++ b/Contrib/LEF/DistancePredict.py @@ -1,19 +1,19 @@ # # Copyright (c) 2009, Novartis Institutes for BioMedical Research Inc. # All rights reserved. -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are -# met: +# met: # -# * Redistributions of source code must retain the above copyright +# * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided # with the distribution. -# * Neither the name of Novartis Institutes for BioMedical Research Inc. -# nor the names of its contributors may be used to endorse or promote +# * Neither the name of Novartis Institutes for BioMedical Research Inc. +# nor the names of its contributors may be used to endorse or promote # products derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS @@ -29,11 +29,12 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Created by Greg Landrum and Anna Vulpetti, March 2009 -from rdkit import Chem -from rdkit import DataStructs from CreateFps import GetMolFingerprint + +from rdkit import Chem, DataStructs from rdkit.ML.KNN.KNNRegressionModel import KNNRegressionModel from rdkit.RDLogger import logger + logger = logger() import sys @@ -47,8 +48,9 @@ nameField = 'Compound_orig' propField = 'chemical_shift_1' weightedAverage = True -import types, copy -from optparse import OptionParser, Option, OptionValueError +import copy +import types +from optparse import Option, OptionParser, OptionValueError def check_floatlist(option, opt, value): diff --git a/Contrib/M_Kossner/Frames.py b/Contrib/M_Kossner/Frames.py index 5ac6a81f1..7ae44ba43 100644 --- a/Contrib/M_Kossner/Frames.py +++ b/Contrib/M_Kossner/Frames.py @@ -3,23 +3,24 @@ # Jan 2011 (markus kossner) Cleaned up the code, added some documentation # somewhere around Aug 2008 (markus kossner) created -# +# # This script extracts the molecular framework for a database of molecules. -# You can use two modes (hard coded): +# You can use two modes (hard coded): # - Scaff: The molecular frame is extracted # - RedScaff: All linking chains between rings are deleted. The rings are directly connected. -# -# You can comment in/out the code snippets indicated by the comments +# +# You can comment in/out the code snippets indicated by the comments # to force each atom of the frame to be a Carbon. -# +# # Usage: Frames.py -# Output: +# Output: # - sd files containing all molecules belonging to one frame (1.sdf, 2.sdf etc) # - frames.smi containing the (canonical) smiles and count of occurrence # +import os +import sys -import os, sys from Chem import AllChem as Chem @@ -68,7 +69,7 @@ def GetFrame(mol, mode='Scaff'): #print 'PosConnectors:' #print PosConnectors Framework = [x for x in RingAtoms] - #Start a list of pathways which we will have to walk + #Start a list of pathways which we will have to walk #print 'Path atoms:' #print Paths Linkers = [] @@ -165,7 +166,9 @@ if __name__ == '__main__': if cansmiles in FrameDict: FrameDict[cansmiles].append(mol) else: - FrameDict[cansmiles] = [mol, ] + FrameDict[cansmiles] = [ + mol, + ] counter = 0 w = open('frames.smi', 'w') diff --git a/Contrib/MolVS/molvs_cli.py b/Contrib/MolVS/molvs_cli.py index 0a1ad3493..bb266218d 100644 --- a/Contrib/MolVS/molvs_cli.py +++ b/Contrib/MolVS/molvs_cli.py @@ -17,89 +17,96 @@ import sys from rdkit import Chem from rdkit.Chem.MolStandardize import Standardizer, Validator - log = logging.getLogger(__name__) - FILETYPES = ['smi', 'mol', 'sdf'] class MolvsParser(argparse.ArgumentParser): - def error(self, message): - sys.stderr.write('Error: %s\n\n'.encode() % message) - self.print_help() - sys.exit(2) + def error(self, message): + sys.stderr.write('Error: %s\n\n'.encode() % message) + self.print_help() + sys.exit(2) def _read_mol(args): - if args.smiles: - return Chem.MolFromSmiles(args.smiles) - elif args.intype in {'smi', 'smiles'} or args.infile.name.endswith('smi') or args.infile.name.endswith('smiles'): - return Chem.MolFromSmiles(args.infile.read()) - elif args.intype in {'mol', 'sdf'} or args.infile.name.endswith('mol') or args.infile.name.endswith('sdf'): - return Chem.MolFromMolBlock(args.infile.read()) - else: - return Chem.MolFromSmiles(args.infile.read()) + if args.smiles: + return Chem.MolFromSmiles(args.smiles) + elif args.intype in {'smi', 'smiles' + } or args.infile.name.endswith('smi') or args.infile.name.endswith('smiles'): + return Chem.MolFromSmiles(args.infile.read()) + elif args.intype in {'mol', 'sdf' + } or args.infile.name.endswith('mol') or args.infile.name.endswith('sdf'): + return Chem.MolFromMolBlock(args.infile.read()) + else: + return Chem.MolFromSmiles(args.infile.read()) def _write_mol(mol, args): - if args.outtype in {'smi', 'smiles'} or args.outfile.name.endswith('smi') or args.outfile.name.endswith('smiles'): - args.outfile.write(Chem.MolToSmiles(mol)) - args.outfile.write('\n') - elif args.outtype in {'mol', 'sdf'} or args.outfile.name.endswith('mol') or args.outfile.name.endswith('sdf'): - args.outfile.write(Chem.MolToMolBlock(mol)) - else: - args.outfile.write(Chem.MolToSmiles(mol)) - args.outfile.write('\n') + if args.outtype in { + 'smi', 'smiles' + } or args.outfile.name.endswith('smi') or args.outfile.name.endswith('smiles'): + args.outfile.write(Chem.MolToSmiles(mol)) + args.outfile.write('\n') + elif args.outtype in {'mol', 'sdf' + } or args.outfile.name.endswith('mol') or args.outfile.name.endswith('sdf'): + args.outfile.write(Chem.MolToMolBlock(mol)) + else: + args.outfile.write(Chem.MolToSmiles(mol)) + args.outfile.write('\n') def standardize_main(args): - mol = _read_mol(args) - s = Standardizer() - mol = s.standardize(mol) - _write_mol(mol, args) + mol = _read_mol(args) + s = Standardizer() + mol = s.standardize(mol) + _write_mol(mol, args) def validate_main(args): - mol = _read_mol(args) - v = Validator() - logs = v.validate(mol) - for log in logs: - args.outfile.write(log) - args.outfile.write('\n') + mol = _read_mol(args) + v = Validator() + logs = v.validate(mol) + for log in logs: + args.outfile.write(log) + args.outfile.write('\n') -if __name__=='__main__': - """Main function for molvs command line interface.""" +if __name__ == '__main__': + """Main function for molvs command line interface.""" - # Root options -# parser = MolvsParser(epilog='use "molvs -h" to show help for a specific command') - parser = MolvsParser(usage="usage: python cli.py [-h] {standardize,validate} ...") - subparsers = parser.add_subparsers(title='Available commands') + # Root options + # parser = MolvsParser(epilog='use "molvs -h" to show help for a specific command') + parser = MolvsParser(usage="usage: python cli.py [-h] {standardize,validate} ...") + subparsers = parser.add_subparsers(title='Available commands') - # Options common to all commands + # Options common to all commands - common_parser = MolvsParser(add_help=False) - common_parser.add_argument('infile', nargs='?', help='input filename', type=argparse.FileType('r'), default=sys.stdin) - common_parser.add_argument('-i', '--intype', help='input filetype', choices=FILETYPES) - common_parser.add_argument('-:', '--smiles', help='input SMILES instead of file', metavar='') - common_parser.add_argument('-O', '--outfile', help='output filename', type=argparse.FileType('w'), default=sys.stdout, metavar='') + common_parser = MolvsParser(add_help=False) + common_parser.add_argument('infile', nargs='?', help='input filename', + type=argparse.FileType('r'), default=sys.stdin) + common_parser.add_argument('-i', '--intype', help='input filetype', choices=FILETYPES) + common_parser.add_argument('-:', '--smiles', help='input SMILES instead of file', + metavar='') + common_parser.add_argument('-O', '--outfile', help='output filename', type=argparse.FileType('w'), + default=sys.stdout, metavar='') - # Standardize options - standardize_parser = subparsers.add_parser('standardize', help='standardize a molecule', parents=[common_parser]) - standardize_parser.add_argument('-o', '--outtype', help='output filetype', choices=FILETYPES) - standardize_parser.set_defaults(func=standardize_main) + # Standardize options + standardize_parser = subparsers.add_parser('standardize', help='standardize a molecule', + parents=[common_parser]) + standardize_parser.add_argument('-o', '--outtype', help='output filetype', choices=FILETYPES) + standardize_parser.set_defaults(func=standardize_main) - # Validate options - validate_parser = subparsers.add_parser('validate', help='validate a molecule', parents=[common_parser]) - validate_parser.set_defaults(func=validate_main) - - args = parser.parse_args() - try: - args.func(args) - except Exception as e: - sys.stderr.write('Error: %s\n\n'.encode() % e.message) - parser.print_help() - sys.exit(2) + # Validate options + validate_parser = subparsers.add_parser('validate', help='validate a molecule', + parents=[common_parser]) + validate_parser.set_defaults(func=validate_main) + args = parser.parse_args() + try: + args.func(args) + except Exception as e: + sys.stderr.write('Error: %s\n\n'.encode() % e.message) + parser.print_help() + sys.exit(2) diff --git a/Contrib/NIBRSubstructureFilters/assignSubstructureFilters.py b/Contrib/NIBRSubstructureFilters/assignSubstructureFilters.py index 76c1fc379..d673decfd 100644 --- a/Contrib/NIBRSubstructureFilters/assignSubstructureFilters.py +++ b/Contrib/NIBRSubstructureFilters/assignSubstructureFilters.py @@ -1,133 +1,141 @@ -import pandas as pd import argparse -import numpy as np import operator -from collections import Counter, defaultdict, namedtuple import sys +from collections import Counter, defaultdict, namedtuple + +import numpy as np +import pandas as pd from rdkit import Chem -from rdkit.Chem import FilterCatalog -from rdkit.Chem import rdMolDescriptors -from rdkit.Chem import RDConfig - -FilterMatch = namedtuple('FilterMatch', ('SubstructureMatches', 'Min_N_O_filter', 'Frac_N_O', 'Covalent', 'SpecialMol', 'SeverityScore')) +from rdkit.Chem import FilterCatalog, RDConfig, rdMolDescriptors + +FilterMatch = namedtuple( + 'FilterMatch', + ('SubstructureMatches', 'Min_N_O_filter', 'Frac_N_O', 'Covalent', 'SpecialMol', 'SeverityScore')) + # Build the filter catalog using the RDKit filterCatalog module def buildFilterCatalog(): - inhousefilter = pd.read_csv(f'{RDConfig.RDContribDir}/NIBRSubstructureFilters/SubstructureFilter_HitTriaging_wPubChemExamples.csv') - inhouseFiltersCat = FilterCatalog.FilterCatalog() - for i in range(inhousefilter.shape[0]): - mincount=1 - if inhousefilter['MIN_COUNT'][i] != 0: - mincount = int(inhousefilter['MIN_COUNT'][i]) - pname = inhousefilter['PATTERN_NAME'][i] - sname = inhousefilter['SET_NAME'][i] - pname_final='{0}_min({1})__{2}__{3}__{4}'.format(pname,mincount, - inhousefilter['SEVERITY_SCORE'][i], - inhousefilter['COVALENT'][i], - inhousefilter['SPECIAL_MOL'][i]) - fil = FilterCatalog.SmartsMatcher(pname_final,inhousefilter['SMARTS'][i], mincount) - inhouseFiltersCat.AddEntry(FilterCatalog.FilterCatalogEntry(pname_final,fil)) - inhouseFiltersCat.GetEntry(i).SetProp('Scope', sname) - return inhouseFiltersCat + inhousefilter = pd.read_csv( + f'{RDConfig.RDContribDir}/NIBRSubstructureFilters/SubstructureFilter_HitTriaging_wPubChemExamples.csv' + ) + inhouseFiltersCat = FilterCatalog.FilterCatalog() + for i in range(inhousefilter.shape[0]): + mincount = 1 + if inhousefilter['MIN_COUNT'][i] != 0: + mincount = int(inhousefilter['MIN_COUNT'][i]) + pname = inhousefilter['PATTERN_NAME'][i] + sname = inhousefilter['SET_NAME'][i] + pname_final = '{0}_min({1})__{2}__{3}__{4}'.format(pname, mincount, + inhousefilter['SEVERITY_SCORE'][i], + inhousefilter['COVALENT'][i], + inhousefilter['SPECIAL_MOL'][i]) + fil = FilterCatalog.SmartsMatcher(pname_final, inhousefilter['SMARTS'][i], mincount) + inhouseFiltersCat.AddEntry(FilterCatalog.FilterCatalogEntry(pname_final, fil)) + inhouseFiltersCat.GetEntry(i).SetProp('Scope', sname) + return inhouseFiltersCat + # Assign substructure filters and fraction of Nitrogen and Oxygen atoms def assignFilters(data, nameSmilesColumn='smiles'): - - results=[] - - inhouseFiltersCat = buildFilterCatalog() - - NO_filter = '[#7,#8]' - sma = Chem.MolFromSmarts(NO_filter, mergeHs=True) - - for smi in data[nameSmilesColumn]: - qc,NO_filter,fracNO,co,sc,sm = [np.NaN]*6 - - try: - mol = Chem.MolFromSmiles(smi) - # fraction of N and O atoms - numHeavyAtoms = mol.GetNumHeavyAtoms() - numNO = len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#7,#8]'))) - fracNO = float(numNO)/numHeavyAtoms + results = [] - # all substructure filters - entries = inhouseFiltersCat.GetMatches(mol) - if len(list(entries)): - # initialize empty lists - fs,sev,cov,spm = ([] for _ in range(4)) - # get the matches - for entry in entries: - pname=entry.GetDescription() - n, s, c, m = pname.split('__') - fs.append(entry.GetProp("Scope")+'_'+n) - sev.append(int(s)) - cov.append(int(c)) - spm.append(int(m)) - # concatenate all matching filters - qc = ' | '.join(fs) - # assign overall severity - if sev.count(2): - sc = 10 - else: - sc = sum(sev) - # get number of covalent flags and special molecule flags - co = sum(cov) - sm = sum(spm) - # if non of the filters matches - else: - qc = 'no match' - sc = 0 - co = 0 - sm = 0 + inhouseFiltersCat = buildFilterCatalog() + + NO_filter = '[#7,#8]' + sma = Chem.MolFromSmarts(NO_filter, mergeHs=True) + + for smi in data[nameSmilesColumn]: + qc, NO_filter, fracNO, co, sc, sm = [np.NaN] * 6 + + try: + mol = Chem.MolFromSmiles(smi) + + # fraction of N and O atoms + numHeavyAtoms = mol.GetNumHeavyAtoms() + numNO = len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#7,#8]'))) + fracNO = float(numNO) / numHeavyAtoms + + # all substructure filters + entries = inhouseFiltersCat.GetMatches(mol) + if len(list(entries)): + # initialize empty lists + fs, sev, cov, spm = ([] for _ in range(4)) + # get the matches + for entry in entries: + pname = entry.GetDescription() + n, s, c, m = pname.split('__') + fs.append(entry.GetProp("Scope") + '_' + n) + sev.append(int(s)) + cov.append(int(c)) + spm.append(int(m)) + # concatenate all matching filters + qc = ' | '.join(fs) + # assign overall severity + if sev.count(2): + sc = 10 + else: + sc = sum(sev) + # get number of covalent flags and special molecule flags + co = sum(cov) + sm = sum(spm) + # if non of the filters matches + else: + qc = 'no match' + sc = 0 + co = 0 + sm = 0 + + # special NO filter + if not mol.HasSubstructMatch(sma): + NO_filter = 'no_oxygen_or_nitrogen' + else: + NO_filter = 'no match' + except Exception: + print("Failed on compound {0}\n".format(smi)) + pass + results.append(FilterMatch(qc, NO_filter, fracNO, co, sm, sc)) + return results - # special NO filter - if not mol.HasSubstructMatch(sma): - NO_filter = 'no_oxygen_or_nitrogen' - else: - NO_filter = 'no match' - except Exception: - print("Failed on compound {0}\n".format(smi)) - pass - results.append(FilterMatch(qc,NO_filter,fracNO,co,sm,sc)) - return results if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument('--data', type=str, required=True, help='Please specify the path to your data file. Required format: csv') - parser.add_argument('--smilesColumn', type=str, required=True, help='Please specify the name of your SMILES column.') - parser.add_argument('--result', type=str, required=True, help='Please specify the name of your result file.') - parser.add_argument('--verbose', type=bool, default=1, help='Generate output? Default: False') - args = parser.parse_args() - - if args.verbose: - print('---> Reading data') - datafile = args.data - try: - data = pd.read_csv(datafile) - except Exception: - if args.verbose: - print('Data could not be read. Please check your file.') - sys.exit() - - smiCol = args.smilesColumn + parser = argparse.ArgumentParser() + parser.add_argument('--data', type=str, required=True, + help='Please specify the path to your data file. Required format: csv') + parser.add_argument('--smilesColumn', type=str, required=True, + help='Please specify the name of your SMILES column.') + parser.add_argument('--result', type=str, required=True, + help='Please specify the name of your result file.') + parser.add_argument('--verbose', type=bool, default=1, help='Generate output? Default: False') + args = parser.parse_args() - if args.verbose: - print('---> Apply filters to data') - try: - results = assignFilters(data, nameSmilesColumn=smiCol) - except Exception: - if args.verbose: - print('Smiles column does not exist. Please check.') - sys.exit() - - df_tmp = pd.DataFrame.from_records(results, columns=FilterMatch._fields) - - data = data.merge(df_tmp, how='left', left_index=True, right_index=True) - data.to_csv(args.result, index=False) - - if args.verbose: - print('---> Done') - + if args.verbose: + print('---> Reading data') + datafile = args.data + try: + data = pd.read_csv(datafile) + except Exception: + if args.verbose: + print('Data could not be read. Please check your file.') + sys.exit() + + smiCol = args.smilesColumn + + if args.verbose: + print('---> Apply filters to data') + try: + results = assignFilters(data, nameSmilesColumn=smiCol) + except Exception: + if args.verbose: + print('Smiles column does not exist. Please check.') + sys.exit() + + df_tmp = pd.DataFrame.from_records(results, columns=FilterMatch._fields) + + data = data.merge(df_tmp, how='left', left_index=True, right_index=True) + data.to_csv(args.result, index=False) + + if args.verbose: + print('---> Done') diff --git a/Contrib/NP_Score/npscorer.py b/Contrib/NP_Score/npscorer.py index 737c40205..483e47fa3 100644 --- a/Contrib/NP_Score/npscorer.py +++ b/Contrib/NP_Score/npscorer.py @@ -13,12 +13,15 @@ # peter ertl, august 2015 # +import gzip +import math +import os.path +import pickle +import sys +from collections import namedtuple from rdkit import Chem from rdkit.Chem import rdMolDescriptors -import sys, math, gzip, pickle -import os.path -from collections import namedtuple def readNPModel(filename=os.path.join(os.path.dirname(__file__), 'publicnp.model.gz')): diff --git a/Contrib/PBF/pbf.py b/Contrib/PBF/pbf.py index 7349c742c..653818ad6 100644 --- a/Contrib/PBF/pbf.py +++ b/Contrib/PBF/pbf.py @@ -7,11 +7,12 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. -from rdkit import Chem -from rdkit.Chem import AllChem import numpy as np from numpy import linalg +from rdkit import Chem +from rdkit.Chem import AllChem + def GetBestFitPlane(pts, weights=None): if weights is None: diff --git a/Contrib/RxnRoleAssignment/identifyReactants.py b/Contrib/RxnRoleAssignment/identifyReactants.py index 256b21aee..e9529d9ad 100644 --- a/Contrib/RxnRoleAssignment/identifyReactants.py +++ b/Contrib/RxnRoleAssignment/identifyReactants.py @@ -1,19 +1,19 @@ # # Copyright (c) 2016, Novartis Institutes for BioMedical Research Inc. # All rights reserved. -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are -# met: +# met: # -# * Redistributions of source code must retain the above copyright +# * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided # with the distribution. -# * Neither the name of Novartis Institutes for BioMedical Research Inc. -# nor the names of its contributors may be used to endorse or promote +# * Neither the name of Novartis Institutes for BioMedical Research Inc. +# nor the names of its contributors may be used to endorse or promote # products derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS @@ -30,404 +30,431 @@ # # Created by Nadine Schneider, July 2016 +import itertools +from collections import Counter, defaultdict - +import numpy as np from rdkit import Chem -from rdkit.Chem import AllChem -from rdkit.Chem import rdqueries - -from collections import defaultdict, Counter -import itertools -import numpy as np +from rdkit.Chem import AllChem, rdqueries from . import utils class MoleculeDetails(object): - - __slots__ = ['detailFP','scaffoldFP','bitInfoDetailFP','bitInfoScaffoldFP','reactivity','bitReactivity','molecule'] - - def _atomDetailInvariant(self, mol): - mol.UpdatePropertyCache(False) - num_atoms = mol.GetNumAtoms() - Chem.GetSSSR(mol) - rinfo = mol.GetRingInfo() - invariants = [0]*num_atoms - for i,a in enumerate(mol.GetAtoms()): - descriptors=[] - descriptors.append(a.GetAtomicNum()) - descriptors.append(a.GetTotalDegree()) - descriptors.append(a.GetTotalNumHs()) - descriptors.append(rinfo.IsAtomInRingOfSize(a.GetIdx(),6)) - descriptors.append(rinfo.IsAtomInRingOfSize(a.GetIdx(),5)) - descriptors.append(a.IsInRing()) - descriptors.append(a.GetIsAromatic()) - invariants[i]=hash(tuple(descriptors))& 0xffffffff - return invariants - def _atomScaffoldInvariant(self, mol): - num_atoms = mol.GetNumAtoms() - invariants = [0]*num_atoms - for i,a in enumerate(mol.GetAtoms()): - descriptors=[] - descriptors.append(a.GetAtomicNum()) - invariants[i]=hash(tuple(descriptors))& 0xffffffff - return invariants - - def _createFP(self, mol, invariant, bitinfo, useBondTypes=True, radius=1): - return AllChem.GetMorganFingerprint(mol=mol, radius=radius, invariants=invariant, useBondTypes=useBondTypes, bitInfo=bitinfo) - - def _isHeteroAtom(self, a): - return a.GetAtomicNum() not in (6, 1) + __slots__ = [ + 'detailFP', 'scaffoldFP', 'bitInfoDetailFP', 'bitInfoScaffoldFP', 'reactivity', 'bitReactivity', + 'molecule' + ] - def _isSp3OrAromaticCarbon(self, a): - if a.GetAtomicNum() != 6: - return False - if a.GetIsAromatic(): - return True - for b in a.GetBonds(): - if b.GetBondTypeAsDouble() > 1.5: - return False - return True + def _atomDetailInvariant(self, mol): + mol.UpdatePropertyCache(False) + num_atoms = mol.GetNumAtoms() + Chem.GetSSSR(mol) + rinfo = mol.GetRingInfo() + invariants = [0] * num_atoms + for i, a in enumerate(mol.GetAtoms()): + descriptors = [] + descriptors.append(a.GetAtomicNum()) + descriptors.append(a.GetTotalDegree()) + descriptors.append(a.GetTotalNumHs()) + descriptors.append(rinfo.IsAtomInRingOfSize(a.GetIdx(), 6)) + descriptors.append(rinfo.IsAtomInRingOfSize(a.GetIdx(), 5)) + descriptors.append(a.IsInRing()) + descriptors.append(a.GetIsAromatic()) + invariants[i] = hash(tuple(descriptors)) & 0xffffffff + return invariants - def _calcReactivityAtom(self, a): - # exclude sp3 carbons or uncharged single heavy atoms such as water molecules - if self._isSp3OrAromaticCarbon(a) or (len(a.GetNeighbors())==0 and a.GetFormalCharge()==0): - return 0 - # all other atoms have at least a reactivity of one - reactivity=1 - b = a.GetBonds() - # if it is a heteroatom or has an H (we already know it's not SP3 or aromatic) increase the reactivity - if self._isHeteroAtom(a) or a.GetTotalNumHs() > 0: - reactivity += 1 - # slightly increase reactivity for atoms in aromatic rings compared to aliphatic rings - if a.IsInRing(): - if a.GetIsAromatic(): - reactivity += 0.5 - # but prefer non-ring atoms - else: - reactivity += 1 - # increase reactivity of charged atoms - if a.GetFormalCharge(): - reactivity += 2 - for bo in b: - # look at the direct neighbors of the atom - ni = bo.GetOtherAtom(a) - # for non-single bonds increase the reactivity - if bo.GetBondTypeAsDouble() > 1.5: - reactivity += 1 - # if there are hydrogens attached, increase the reactivity - if ni.GetTotalNumHs() > 0: - reactivity+=1 - # if it is a bond to a hetero atom further increase the reactivity - if self._isHeteroAtom(ni): - reactivity += 1 - # bonds between nitrogens and oxygen or between oxygen and oxygen or between nitrogen and nitrogen are more reactive - if a.GetAtomicNum() in (7,8) and ni.GetAtomicNum() in (7,8): - reactivity += 2 - # if the neighbor is a Mg, Si, P, Pd, or Sn atom increase the reactivity - elif ni.GetAtomicNum() in (12,14,15,46,50): - reactivity += 1 - return reactivity + def _atomScaffoldInvariant(self, mol): + num_atoms = mol.GetNumAtoms() + invariants = [0] * num_atoms + for i, a in enumerate(mol.GetAtoms()): + descriptors = [] + descriptors.append(a.GetAtomicNum()) + invariants[i] = hash(tuple(descriptors)) & 0xffffffff + return invariants - def _calcReactivityMolecule(self, mol): - reactivityAtoms = [self._calcReactivityAtom(a) for a in mol.GetAtoms()] - return reactivityAtoms + def _createFP(self, mol, invariant, bitinfo, useBondTypes=True, radius=1): + return AllChem.GetMorganFingerprint(mol=mol, radius=radius, invariants=invariant, + useBondTypes=useBondTypes, bitInfo=bitinfo) - def __init__(self, molecule, verbose=0): - self.molecule= molecule - self.bitInfoDetailFP={} - self.detailFP = self._createFP(molecule, self._atomDetailInvariant(molecule), self.bitInfoDetailFP) - self.bitInfoScaffoldFP={} - self.scaffoldFP = self._createFP(molecule, self._atomScaffoldInvariant(molecule), self.bitInfoScaffoldFP, useBondTypes=False) - reactivityAtoms = self._calcReactivityMolecule(molecule) - reactivity = sum(reactivityAtoms) - if Chem.MolToSmiles(molecule) in frequentReagents: - reactivity*=0.8 - self.reactivity = reactivity - - -def _calcScore(reactantFP,productFP,bitInfoProd=None,output=False): - if output: - print("--- _calcScore ---") - score=0 - dFP = productFP-reactantFP - numRBits = float(utils.getNumPositiveCounts(reactantFP)) + def _isHeteroAtom(self, a): + return a.GetAtomicNum() not in (6, 1) + + def _isSp3OrAromaticCarbon(self, a): + if a.GetAtomicNum() != 6: + return False + if a.GetIsAromatic(): + return True + for b in a.GetBonds(): + if b.GetBondTypeAsDouble() > 1.5: + return False + return True + + def _calcReactivityAtom(self, a): + # exclude sp3 carbons or uncharged single heavy atoms such as water molecules + if self._isSp3OrAromaticCarbon(a) or (len(a.GetNeighbors()) == 0 and a.GetFormalCharge() == 0): + return 0 + # all other atoms have at least a reactivity of one + reactivity = 1 + b = a.GetBonds() + # if it is a heteroatom or has an H (we already know it's not SP3 or aromatic) increase the reactivity + if self._isHeteroAtom(a) or a.GetTotalNumHs() > 0: + reactivity += 1 + # slightly increase reactivity for atoms in aromatic rings compared to aliphatic rings + if a.IsInRing(): + if a.GetIsAromatic(): + reactivity += 0.5 + # but prefer non-ring atoms + else: + reactivity += 1 + # increase reactivity of charged atoms + if a.GetFormalCharge(): + reactivity += 2 + for bo in b: + # look at the direct neighbors of the atom + ni = bo.GetOtherAtom(a) + # for non-single bonds increase the reactivity + if bo.GetBondTypeAsDouble() > 1.5: + reactivity += 1 + # if there are hydrogens attached, increase the reactivity + if ni.GetTotalNumHs() > 0: + reactivity += 1 + # if it is a bond to a hetero atom further increase the reactivity + if self._isHeteroAtom(ni): + reactivity += 1 + # bonds between nitrogens and oxygen or between oxygen and oxygen or between nitrogen and nitrogen are more reactive + if a.GetAtomicNum() in (7, 8) and ni.GetAtomicNum() in (7, 8): + reactivity += 2 + # if the neighbor is a Mg, Si, P, Pd, or Sn atom increase the reactivity + elif ni.GetAtomicNum() in (12, 14, 15, 46, 50): + reactivity += 1 + return reactivity + + def _calcReactivityMolecule(self, mol): + reactivityAtoms = [self._calcReactivityAtom(a) for a in mol.GetAtoms()] + return reactivityAtoms + + def __init__(self, molecule, verbose=0): + self.molecule = molecule + self.bitInfoDetailFP = {} + self.detailFP = self._createFP(molecule, self._atomDetailInvariant(molecule), + self.bitInfoDetailFP) + self.bitInfoScaffoldFP = {} + self.scaffoldFP = self._createFP(molecule, self._atomScaffoldInvariant(molecule), + self.bitInfoScaffoldFP, useBondTypes=False) + reactivityAtoms = self._calcReactivityMolecule(molecule) + reactivity = sum(reactivityAtoms) + if Chem.MolToSmiles(molecule) in frequentReagents: + reactivity *= 0.8 + self.reactivity = reactivity + + +def _calcScore(reactantFP, productFP, bitInfoProd=None, output=False): + if output: + print("--- _calcScore ---") + score = 0 + dFP = productFP - reactantFP + numRBits = float(utils.getNumPositiveCounts(reactantFP)) + if output > 2: + print("num RBits: ", numRBits) + numPBits = float(utils.getNumPositiveCounts(productFP)) + if output > 2: + print("num PBits: ", numPBits) + numUnmappedPBits = float(utils.getNumPositiveCounts(dFP)) + if output > 2: + print("num UnmappedPBits: ", numUnmappedPBits) + numUnmappedRBits = float(utils.getNumNegativeCounts(dFP)) + if output > 2: + print("num UnmappedRBits: ", numUnmappedRBits) + + numUnmappedPAtoms = -1 + bitsUnmappedPAtoms = -1 + if bitInfoProd is not None: + numUnmappedPAtoms, bitsUnmappedPAtoms = utils.getNumPositiveBitCountsOfRadius0(dFP, bitInfoProd) if output > 2: - print("num RBits: ",numRBits) - numPBits = float(utils.getNumPositiveCounts(productFP)) - if output > 2: - print("num PBits: ",numPBits) - numUnmappedPBits = float(utils.getNumPositiveCounts(dFP)) - if output > 2: - print("num UnmappedPBits: ",numUnmappedPBits) - numUnmappedRBits = float(utils.getNumNegativeCounts(dFP)) - if output > 2: - print("num UnmappedRBits: ",numUnmappedRBits) + print("num UnmappedPAtoms: ", numUnmappedPAtoms) + ratioMappedPBits = 1 - (numUnmappedPBits / numPBits) + ratioUnmappedRBits = numUnmappedRBits / numRBits + score = max(ratioMappedPBits - ratioUnmappedRBits * ratioUnmappedRBits, 0) - numUnmappedPAtoms=-1 - bitsUnmappedPAtoms=-1 - if bitInfoProd is not None: - numUnmappedPAtoms,bitsUnmappedPAtoms = utils.getNumPositiveBitCountsOfRadius0(dFP,bitInfoProd) - if output > 2: - print("num UnmappedPAtoms: ", numUnmappedPAtoms) - ratioMappedPBits = 1-(numUnmappedPBits/numPBits) - ratioUnmappedRBits = numUnmappedRBits/numRBits - score = max(ratioMappedPBits - ratioUnmappedRBits*ratioUnmappedRBits,0) + if output > 1: + print("score: ", score, "(", ratioMappedPBits, ",", ratioUnmappedRBits * ratioUnmappedRBits, + ",", ratioUnmappedRBits, ")") + + return [score, numUnmappedPBits, numUnmappedPAtoms, bitsUnmappedPAtoms] - if output > 1: - print("score: ",score, "(",ratioMappedPBits,",",ratioUnmappedRBits*ratioUnmappedRBits,",",ratioUnmappedRBits,")") - - return [score,numUnmappedPBits,numUnmappedPAtoms,bitsUnmappedPAtoms] # Set of frequent reagents derived from all patent reactions -frequentReagents = set(['CCN(CC)CC', '[Li+]', '[Na+]', 'O=C(O)CC(O)(CC(=O)O)C(=O)O', 'O=S(=O)(O)O', 'CN1CCCC1=O', 'CCN(C(C)C)C(C)C', - 'c1ccncc1', '[K]', 'CC(C)(C)O', 'CCO', 'Cc1ccc(S(=O)(=O)O)cc1', 'ClC(Cl)(Cl)Cl', '[Na]', 'CC(C)(C)[O-]', 'O=C([O-])O', 'COCCOC', '[NH4+]', - 'CC(C)OC(C)C', 'O=C([O-])[O-]', 'CC(=O)OC(C)=O', 'O=C=O', '[Cl-]', 'c1ccc(P(c2ccccc2)c2ccccc2)cc1', '[H-]', 'N#N', 'CN1CCOCC1', - 'C1COCCO1', 'c1ccccc1', '[Cs+]', '[K+]', '[OH-]', 'CCCCCC', 'CCCCC', 'CN(C)C=O', 'C[O-]', 'Cc1ccccc1', 'C1CCC2=NCCCN2CC1', 'CO', - 'CCCCO', 'O=C(O)C(F)(F)F', 'O=P([O-])([O-])[O-]', 'CCOC(C)=O', '[Mg+2]', 'C1CCCCC1', 'O', 'N', 'II', 'O=CO', 'CC(=O)N(C)C', 'CC(=O)O', - 'CCOCC', 'CC(C)O', 'C[Si](C)(C)Cl', 'Cc1ccccc1C', 'CC(C)=O', 'CS(=O)(=O)O', 'CN(C)c1ccncc1', 'Cl', 'ClCCCl', 'O=S(Cl)Cl', 'ClC(Cl)Cl', - '[Li]CCCC', '[Pd]', '[H][H]', '[Br-]', 'CS(C)=O', 'COC(C)(C)C', 'O=S(=O)([O-])[O-]', 'CC(Cl)Cl', 'CC(=O)[O-]', 'CCCC[N+](CCCC)(CCCC)CCCC', - 'ClCCl', 'CC#N', 'C1CCOC1', 'CCCCCCC']) +frequentReagents = set([ + 'CCN(CC)CC', '[Li+]', '[Na+]', 'O=C(O)CC(O)(CC(=O)O)C(=O)O', 'O=S(=O)(O)O', 'CN1CCCC1=O', + 'CCN(C(C)C)C(C)C', 'c1ccncc1', '[K]', 'CC(C)(C)O', 'CCO', 'Cc1ccc(S(=O)(=O)O)cc1', + 'ClC(Cl)(Cl)Cl', '[Na]', 'CC(C)(C)[O-]', 'O=C([O-])O', 'COCCOC', '[NH4+]', 'CC(C)OC(C)C', + 'O=C([O-])[O-]', 'CC(=O)OC(C)=O', 'O=C=O', '[Cl-]', 'c1ccc(P(c2ccccc2)c2ccccc2)cc1', '[H-]', + 'N#N', 'CN1CCOCC1', 'C1COCCO1', 'c1ccccc1', '[Cs+]', '[K+]', '[OH-]', 'CCCCCC', 'CCCCC', + 'CN(C)C=O', 'C[O-]', 'Cc1ccccc1', 'C1CCC2=NCCCN2CC1', 'CO', 'CCCCO', 'O=C(O)C(F)(F)F', + 'O=P([O-])([O-])[O-]', 'CCOC(C)=O', '[Mg+2]', 'C1CCCCC1', 'O', 'N', 'II', 'O=CO', 'CC(=O)N(C)C', + 'CC(=O)O', 'CCOCC', 'CC(C)O', 'C[Si](C)(C)Cl', 'Cc1ccccc1C', 'CC(C)=O', 'CS(=O)(=O)O', + 'CN(C)c1ccncc1', 'Cl', 'ClCCCl', 'O=S(Cl)Cl', 'ClC(Cl)Cl', '[Li]CCCC', '[Pd]', '[H][H]', '[Br-]', + 'CS(C)=O', 'COC(C)(C)C', 'O=S(=O)([O-])[O-]', 'CC(Cl)Cl', 'CC(=O)[O-]', + 'CCCC[N+](CCCC)(CCCC)CCCC', 'ClCCl', 'CC#N', 'C1CCOC1', 'CCCCCCC' +]) -def _getBestCombination(rfps,pfps,output=False): +def _getBestCombination(rfps, pfps, output=False): - if output: - print("--- _getBestCombination ---") + if output: + print("--- _getBestCombination ---") - tests=[] - numReactants=len(rfps) - # generate first all reactant combinations - for i in range(1,numReactants+1): - for x in itertools.combinations(range(numReactants),i): - temp=[] - for j in x: - # don't include frequent reagents - if not rfps[j][1]: - numAtms = rfps[j][0].molecule.GetNumAtoms() - # not test single ions - if numAtms > 1: - # store the number of reactant atoms for later - temp.append((rfps[j][0].molecule.GetNumAtoms(),j)) - else: - if output > 3: - print("Frequent reagent found: ", j) - if temp not in tests: - tests.append(temp) - # initialisation of the results - maxScore=0 - maxDetailScore=0 - finalReacts=[[]] - # get the product fingerprints - productsDetailFP = utils.getSumFps([i.detailFP for i in pfps]) - productsScaffoldFP = utils.getSumFps([i.scaffoldFP for i in pfps]) - # get the number of atoms for the product - numProductAtoms = 0 - for i in pfps: - numProductAtoms += i.molecule.GetNumAtoms() - # get the bitinfo for the product FP - productsDetailFPBitInfo={} - productsScaffoldFPBitInfo={} - for i in pfps: - productsDetailFPBitInfo.update(i.bitInfoDetailFP) - productsScaffoldFPBitInfo.update(i.bitInfoScaffoldFP) - # set some initial values - numUnmappedPAtoms,bitsUnmappedPAtoms = utils.getNumPositiveBitCountsOfRadius0(productsScaffoldFP,productsScaffoldFPBitInfo) - finalNumUnmappedProdAtoms=[[len(productsDetailFP.GetNonzeroElements()), - len(productsScaffoldFP.GetNonzeroElements()),numUnmappedPAtoms,bitsUnmappedPAtoms]] + tests = [] + numReactants = len(rfps) + # generate first all reactant combinations + for i in range(1, numReactants + 1): + for x in itertools.combinations(range(numReactants), i): + temp = [] + for j in x: + # don't include frequent reagents + if not rfps[j][1]: + numAtms = rfps[j][0].molecule.GetNumAtoms() + # not test single ions + if numAtms > 1: + # store the number of reactant atoms for later + temp.append((rfps[j][0].molecule.GetNumAtoms(), j)) + else: + if output > 3: + print("Frequent reagent found: ", j) + if temp not in tests: + tests.append(temp) + # initialisation of the results + maxScore = 0 + maxDetailScore = 0 + finalReacts = [[]] + # get the product fingerprints + productsDetailFP = utils.getSumFps([i.detailFP for i in pfps]) + productsScaffoldFP = utils.getSumFps([i.scaffoldFP for i in pfps]) + # get the number of atoms for the product + numProductAtoms = 0 + for i in pfps: + numProductAtoms += i.molecule.GetNumAtoms() + # get the bitinfo for the product FP + productsDetailFPBitInfo = {} + productsScaffoldFPBitInfo = {} + for i in pfps: + productsDetailFPBitInfo.update(i.bitInfoDetailFP) + productsScaffoldFPBitInfo.update(i.bitInfoScaffoldFP) + # set some initial values + numUnmappedPAtoms, bitsUnmappedPAtoms = utils.getNumPositiveBitCountsOfRadius0( + productsScaffoldFP, productsScaffoldFPBitInfo) + finalNumUnmappedProdAtoms = [[ + len(productsDetailFP.GetNonzeroElements()), + len(productsScaffoldFP.GetNonzeroElements()), numUnmappedPAtoms, bitsUnmappedPAtoms + ]] - for test in tests: - if len(test) < 1: - continue - # get the number of involved reactant atoms - numReactantAtoms = np.array(test)[:,0].sum() - # ignore combinations including too many or too few atoms - if numReactantAtoms > 5*numProductAtoms or numReactantAtoms < numProductAtoms*0.8: - continue - - if output > 0: - print("Combination: ",test) - - #build the combined reactant FPs - reactantsDetailFP = utils.getSumFps([rfps[i[1]][0].detailFP for i in test]) - reactantsScaffoldFP = utils.getSumFps([rfps[i[1]][0].scaffoldFP for i in test]) + for test in tests: + if len(test) < 1: + continue + # get the number of involved reactant atoms + numReactantAtoms = np.array(test)[:, 0].sum() + # ignore combinations including too many or too few atoms + if numReactantAtoms > 5 * numProductAtoms or numReactantAtoms < numProductAtoms * 0.8: + continue - # get the scores for both FPs - detailFPScore = _calcScore(reactantsDetailFP,productsDetailFP,bitInfoProd=productsDetailFPBitInfo,output=output) - scaffoldFPScore = _calcScore(reactantsScaffoldFP,productsScaffoldFP,bitInfoProd=productsScaffoldFPBitInfo,output=output) - # final score - score = detailFPScore[0] + scaffoldFPScore[0] - - if output > 0: - print(">>>> score: ", score) - print(">>>> scores (detail, scaffold): ", detailFPScore[0], scaffoldFPScore[0]) - print(">>>> num unmapped productFP bits: ", detailFPScore[1], scaffoldFPScore[1], detailFPScore[2], scaffoldFPScore[2]) + if output > 0: + print("Combination: ", test) + + #build the combined reactant FPs + reactantsDetailFP = utils.getSumFps([rfps[i[1]][0].detailFP for i in test]) + reactantsScaffoldFP = utils.getSumFps([rfps[i[1]][0].scaffoldFP for i in test]) + + # get the scores for both FPs + detailFPScore = _calcScore(reactantsDetailFP, productsDetailFP, + bitInfoProd=productsDetailFPBitInfo, output=output) + scaffoldFPScore = _calcScore(reactantsScaffoldFP, productsScaffoldFP, + bitInfoProd=productsScaffoldFPBitInfo, output=output) + # final score + score = detailFPScore[0] + scaffoldFPScore[0] + + if output > 0: + print(">>>> score: ", score) + print(">>>> scores (detail, scaffold): ", detailFPScore[0], scaffoldFPScore[0]) + print(">>>> num unmapped productFP bits: ", detailFPScore[1], scaffoldFPScore[1], + detailFPScore[2], scaffoldFPScore[2]) + + if score > maxScore: + maxScore = score + maxDetailScore = detailFPScore[0] + del finalReacts[:] + del finalNumUnmappedProdAtoms[:] + # set the final reactants + finalReacts.append([i[1] for i in test]) + # for tracking the mapping of the product atoms include the number of unmapped detailedFP bits, the number of unmapped + # atoms based on the scaffold FP, the number of unmapped scaffoldFP bits, and the unmapped scaffoldFP bits + finalNumUnmappedProdAtoms.append( + [detailFPScore[1], scaffoldFPScore[2], scaffoldFPScore[1], scaffoldFPScore[-1]]) + if output > 0: + print(" >> maxScore: ", maxScore) + print(" >> Final reactants: ", finalReacts) + # test for almost perfect matchings (e.g. oxidations, reduction etc.) + if scaffoldFPScore[0] > 0.9999 and detailFPScore[0] > 0.8: + return finalReacts, finalNumUnmappedProdAtoms + # test for number of mapped product atoms e.g. to capture deprotections earlier + if len(finalNumUnmappedProdAtoms) > 0 and len(test) == 1: + if finalNumUnmappedProdAtoms[0][1] == 0 and finalNumUnmappedProdAtoms[0][0] <= 3: + return finalReacts, finalNumUnmappedProdAtoms + # include alternative solutions + elif abs(score - maxScore) < 0.0000001 and score > 0.0: + finalReacts.append([i[1] for i in test]) + finalNumUnmappedProdAtoms.append( + [detailFPScore[1], scaffoldFPScore[2], scaffoldFPScore[1], scaffoldFPScore[-1]]) + if output > 0: + print(" >> Added alternative result") + print(" >> Final reactants: ", finalReacts) + + return finalReacts, finalNumUnmappedProdAtoms - if score > maxScore: - maxScore=score - maxDetailScore=detailFPScore[0] - del finalReacts[:] - del finalNumUnmappedProdAtoms[:] - # set the final reactants - finalReacts.append([i[1] for i in test]) - # for tracking the mapping of the product atoms include the number of unmapped detailedFP bits, the number of unmapped - # atoms based on the scaffold FP, the number of unmapped scaffoldFP bits, and the unmapped scaffoldFP bits - finalNumUnmappedProdAtoms.append([detailFPScore[1], scaffoldFPScore[2], scaffoldFPScore[1],scaffoldFPScore[-1]]) - if output > 0: - print(" >> maxScore: ", maxScore) - print(" >> Final reactants: ", finalReacts) - # test for almost perfect matchings (e.g. oxidations, reduction etc.) - if scaffoldFPScore[0] > 0.9999 and detailFPScore[0] > 0.8: - return finalReacts, finalNumUnmappedProdAtoms - # test for number of mapped product atoms e.g. to capture deprotections earlier - if len(finalNumUnmappedProdAtoms) > 0 and len(test) == 1: - if finalNumUnmappedProdAtoms[0][1] == 0 and finalNumUnmappedProdAtoms[0][0] <= 3: - return finalReacts, finalNumUnmappedProdAtoms - # include alternative solutions - elif abs(score - maxScore) < 0.0000001 and score > 0.0: - finalReacts.append([i[1] for i in test]) - finalNumUnmappedProdAtoms.append([detailFPScore[1], scaffoldFPScore[2], scaffoldFPScore[1],scaffoldFPScore[-1]]) - if output > 0: - print(" >> Added alternative result") - print(" >> Final reactants: ", finalReacts) - - return finalReacts, finalNumUnmappedProdAtoms def _findMissingReactiveReactants(rfps, pfps, currentReactants, unmappedPAtoms, output=False): - if output: - print("--- _findMissingReactiveReactants ---") - if not len(unmappedPAtoms): - return currentReactants - # if there are unmapped product bits find possible reactants for those - else: - finalReactants = [] - numReactants=len(rfps) - # investigate all possible solutions of the scoring before - for reacts,umPA in zip(currentReactants,unmappedPAtoms): - # if there are unmapped product atoms find possible reactants for those - finalReactants.append(reacts) - if umPA[1] > 0: - remainingReactants=set(range(numReactants)).difference(set(reacts)) - # sort the possible reactants by the reactivity - remainingReactants = sorted(remainingReactants, key=lambda x: rfps[x].reactivity/float(rfps[x].molecule.GetNumAtoms()), - reverse=True) - missingPAtoms = [] - # get the missing atoms and counts - for bit,c in umPA[-1]: - for pbi in range(len(pfps)): - if bit in pfps[pbi].bitInfoScaffoldFP: - a = pfps[pbi].bitInfoScaffoldFP[bit][0] - missingPAtoms.extend([pfps[pbi].molecule.GetAtomWithIdx(a[0]).GetAtomicNum()]*c) - missingPAtoms = Counter(missingPAtoms) - if output > 0: - print(missingPAtoms) - # build queries for the missing atoms - queries=[(rdqueries.AtomNumEqualsQueryAtom(a),a) for a in missingPAtoms] - maxFullfilledQueries=0 - maxReactivity=-1 - addReactants=[] - # search for the most reactive reactants capturing all/most of the unmapped product atoms - for r in remainingReactants: - if output > 0: - print(" >> Reactant", r, rfps[r].reactivity/float(rfps[r].molecule.GetNumAtoms())) - countFullfilledQueries=0 - for q,a in queries: - if len(rfps[r].molecule.GetAtomsMatchingQuery(q)) >= missingPAtoms[a]: - countFullfilledQueries+=1 - if output > 0: - print(" Max reactivity", maxReactivity) - print(" Max fulfilled queries", maxFullfilledQueries) - if countFullfilledQueries > maxFullfilledQueries: - maxFullfilledQueries = countFullfilledQueries - maxReactivity = rfps[r].reactivity/float(rfps[r].molecule.GetNumAtoms()) - addReactants = [r] - elif maxFullfilledQueries and countFullfilledQueries == maxFullfilledQueries and \ - rfps[r].reactivity/float(rfps[r].molecule.GetNumAtoms()) >= maxReactivity: - maxFullfilledQueries = countFullfilledQueries - addReactants.append(r) - if output > 0: - print(" Added reactants", addReactants) - finalReactants[-1].extend(addReactants) - if output > 0: - print(" >> Final reactants", finalReactants) - return finalReactants + if output: + print("--- _findMissingReactiveReactants ---") + if not len(unmappedPAtoms): + return currentReactants + # if there are unmapped product bits find possible reactants for those + else: + finalReactants = [] + numReactants = len(rfps) + # investigate all possible solutions of the scoring before + for reacts, umPA in zip(currentReactants, unmappedPAtoms): + # if there are unmapped product atoms find possible reactants for those + finalReactants.append(reacts) + if umPA[1] > 0: + remainingReactants = set(range(numReactants)).difference(set(reacts)) + # sort the possible reactants by the reactivity + remainingReactants = sorted( + remainingReactants, + key=lambda x: rfps[x].reactivity / float(rfps[x].molecule.GetNumAtoms()), reverse=True) + missingPAtoms = [] + # get the missing atoms and counts + for bit, c in umPA[-1]: + for pbi in range(len(pfps)): + if bit in pfps[pbi].bitInfoScaffoldFP: + a = pfps[pbi].bitInfoScaffoldFP[bit][0] + missingPAtoms.extend([pfps[pbi].molecule.GetAtomWithIdx(a[0]).GetAtomicNum()] * c) + missingPAtoms = Counter(missingPAtoms) + if output > 0: + print(missingPAtoms) + # build queries for the missing atoms + queries = [(rdqueries.AtomNumEqualsQueryAtom(a), a) for a in missingPAtoms] + maxFullfilledQueries = 0 + maxReactivity = -1 + addReactants = [] + # search for the most reactive reactants capturing all/most of the unmapped product atoms + for r in remainingReactants: + if output > 0: + print(" >> Reactant", r, rfps[r].reactivity / float(rfps[r].molecule.GetNumAtoms())) + countFullfilledQueries = 0 + for q, a in queries: + if len(rfps[r].molecule.GetAtomsMatchingQuery(q)) >= missingPAtoms[a]: + countFullfilledQueries += 1 + if output > 0: + print(" Max reactivity", maxReactivity) + print(" Max fulfilled queries", maxFullfilledQueries) + if countFullfilledQueries > maxFullfilledQueries: + maxFullfilledQueries = countFullfilledQueries + maxReactivity = rfps[r].reactivity / float(rfps[r].molecule.GetNumAtoms()) + addReactants = [r] + elif maxFullfilledQueries and countFullfilledQueries == maxFullfilledQueries and \ + rfps[r].reactivity/float(rfps[r].molecule.GetNumAtoms()) >= maxReactivity: + maxFullfilledQueries = countFullfilledQueries + addReactants.append(r) + if output > 0: + print(" Added reactants", addReactants) + finalReactants[-1].extend(addReactants) + if output > 0: + print(" >> Final reactants", finalReactants) + return finalReactants + def _detectObviousReagents(reactants, products): - unchangedReacts=set() - unchangedProds=set() - for i,r in enumerate(reactants): - for j,p in enumerate(products): - if r==p: - unchangedReacts.add(i) - unchangedProds.add(j) - return unchangedReacts,unchangedProds + unchangedReacts = set() + unchangedProds = set() + for i, r in enumerate(reactants): + for j, p in enumerate(products): + if r == p: + unchangedReacts.add(i) + unchangedProds.add(j) + return unchangedReacts, unchangedProds -def identifyReactants(reaction,output=False): - rxn = AllChem.ChemicalReaction(reaction) - AllChem.RemoveMappingNumbersFromReactions(rxn) - if output: - print("--- identifyReactants ---") - reactants = rxn.GetReactants() - products = rxn.GetProducts() - ### Preprocessing - uniqueReactants,reactantSmiles = utils.uniqueMolecules(reactants) - uniqueProducts,productSmiles = utils.uniqueMolecules(products) - # find molecules which do not change in the rxn - unmodifiedReactants,unmodifiedProducts = _detectObviousReagents(reactantSmiles, productSmiles) - if output: - print(" >>> Found reagents in reactants:", unmodifiedReactants) - print(" >>> Found reagents in products:", unmodifiedProducts) - if len(products) == len(unmodifiedProducts): - unmodifiedProducts=set() - uniquePotentialReactants = [r for r in sorted(set(uniqueReactants.values()))] - uniquePotentialProducts = [p for p in sorted(set(uniqueProducts.values())) if p not in unmodifiedProducts] - - ### Find the most probable reactants - # only generate moleculeDetail objects for unique, potential reactants and products - rfps = [MoleculeDetails(reactants[r]) for r in uniquePotentialReactants] - pfps = [MoleculeDetails(products[p]) for p in uniquePotentialProducts] - - rfpsPrep = [(MoleculeDetails(reactants[r]),reactantSmiles[r] in frequentReagents) for r in uniquePotentialReactants] - reacts, unmappedProdAtoms = _getBestCombination(rfpsPrep,pfps,output=output) - # no reactants where found try again including the frequent reagents - if np.array(reacts).shape == (1,0): - rfpsPrep = [(MoleculeDetails(reactants[r]),0) for r in uniquePotentialReactants] - reacts, unmappedProdAtoms = _getBestCombination(rfpsPrep,pfps,output=output) - - ### Postprocessing - # identify missing reactants - reacts = _findMissingReactiveReactants(rfps, pfps, reacts, unmappedProdAtoms, output=output) - finalreacts = [] - for i in reacts: - temp=[uniquePotentialReactants[j] for j in i] - finalreacts.append(set(temp)) - - return finalreacts, unmodifiedReactants, unmodifiedProducts +def identifyReactants(reaction, output=False): + rxn = AllChem.ChemicalReaction(reaction) + AllChem.RemoveMappingNumbersFromReactions(rxn) + if output: + print("--- identifyReactants ---") + reactants = rxn.GetReactants() + products = rxn.GetProducts() + ### Preprocessing + uniqueReactants, reactantSmiles = utils.uniqueMolecules(reactants) + uniqueProducts, productSmiles = utils.uniqueMolecules(products) + # find molecules which do not change in the rxn + unmodifiedReactants, unmodifiedProducts = _detectObviousReagents(reactantSmiles, productSmiles) + if output: + print(" >>> Found reagents in reactants:", unmodifiedReactants) + print(" >>> Found reagents in products:", unmodifiedProducts) + if len(products) == len(unmodifiedProducts): + unmodifiedProducts = set() + uniquePotentialReactants = [r for r in sorted(set(uniqueReactants.values()))] + uniquePotentialProducts = [ + p for p in sorted(set(uniqueProducts.values())) if p not in unmodifiedProducts + ] + + ### Find the most probable reactants + # only generate moleculeDetail objects for unique, potential reactants and products + rfps = [MoleculeDetails(reactants[r]) for r in uniquePotentialReactants] + pfps = [MoleculeDetails(products[p]) for p in uniquePotentialProducts] + + rfpsPrep = [(MoleculeDetails(reactants[r]), reactantSmiles[r] in frequentReagents) + for r in uniquePotentialReactants] + + reacts, unmappedProdAtoms = _getBestCombination(rfpsPrep, pfps, output=output) + # no reactants where found try again including the frequent reagents + if np.array(reacts).shape == (1, 0): + rfpsPrep = [(MoleculeDetails(reactants[r]), 0) for r in uniquePotentialReactants] + reacts, unmappedProdAtoms = _getBestCombination(rfpsPrep, pfps, output=output) + + ### Postprocessing + # identify missing reactants + reacts = _findMissingReactiveReactants(rfps, pfps, reacts, unmappedProdAtoms, output=output) + finalreacts = [] + for i in reacts: + temp = [uniquePotentialReactants[j] for j in i] + finalreacts.append(set(temp)) + + return finalreacts, unmodifiedReactants, unmodifiedProducts + # reassign the reaction roles of a reaction def reassignRXNRoles(rxn): - utils.transferAgentsToReactants(rxn) - reacts, rAgents, pAgents = identifyReactants(rxn) - if len(reacts) < 1: - return None - new_rxn = AllChem.ChemicalReaction() - for i in range(rxn.GetNumProductTemplates()): - new_rxn.AddProductTemplate(rxn.GetProductTemplate(i)) - for i in range(rxn.GetNumReactantTemplates()): - if i in reacts[0]: - new_rxn.AddReactantTemplate(rxn.GetReactantTemplate(i)) - else: - new_rxn.AddAgentTemplate(rxn.GetReactantTemplate(i)) - return new_rxn + utils.transferAgentsToReactants(rxn) + reacts, rAgents, pAgents = identifyReactants(rxn) + if len(reacts) < 1: + return None + new_rxn = AllChem.ChemicalReaction() + for i in range(rxn.GetNumProductTemplates()): + new_rxn.AddProductTemplate(rxn.GetProductTemplate(i)) + for i in range(rxn.GetNumReactantTemplates()): + if i in reacts[0]: + new_rxn.AddReactantTemplate(rxn.GetReactantTemplate(i)) + else: + new_rxn.AddAgentTemplate(rxn.GetReactantTemplate(i)) + return new_rxn + # clean-up the reaction smiles def reassignReactionRoles(smi): - rxn = AllChem.ReactionFromSmarts(smi,useSmiles=True) - new_rxn = reassignRXNRoles(rxn) - if new_rxn is None: - return '' - smi_new = AllChem.ReactionToSmiles(new_rxn) - return smi_new + rxn = AllChem.ReactionFromSmarts(smi, useSmiles=True) + new_rxn = reassignRXNRoles(rxn) + if new_rxn is None: + return '' + smi_new = AllChem.ReactionToSmiles(new_rxn) + return smi_new diff --git a/Contrib/RxnRoleAssignment/utils.py b/Contrib/RxnRoleAssignment/utils.py index 639116d1d..ae9076f53 100644 --- a/Contrib/RxnRoleAssignment/utils.py +++ b/Contrib/RxnRoleAssignment/utils.py @@ -1,19 +1,19 @@ # # Copyright (c) 2016, Novartis Institutes for BioMedical Research Inc. # All rights reserved. -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are -# met: +# met: # -# * Redistributions of source code must retain the above copyright +# * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided # with the distribution. -# * Neither the name of Novartis Institutes for BioMedical Research Inc. -# nor the names of its contributors may be used to endorse or promote +# * Neither the name of Novartis Institutes for BioMedical Research Inc. +# nor the names of its contributors may be used to endorse or promote # products derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS @@ -30,58 +30,64 @@ # # Created by Nadine Schneider, July 2016 +import copy +from collections import defaultdict from rdkit import Chem from rdkit.Chem import AllChem -from collections import defaultdict -import copy def transferAgentsToReactants(rxn): - for a in range(rxn.GetNumAgentTemplates()): - agent = rxn.GetAgentTemplate(a) - rxn.AddReactantTemplate(agent) + for a in range(rxn.GetNumAgentTemplates()): + agent = rxn.GetAgentTemplate(a) + rxn.AddReactantTemplate(agent) + def removeAgentsAndTransferToReactants(rxn): - tmp=[] - rxn.RemoveAgentTemplates(tmp) - for a in tmp: - rxn.AddReactantTemplate(a) + tmp = [] + rxn.RemoveAgentTemplates(tmp) + for a in tmp: + rxn.AddReactantTemplate(a) + def getNumPositiveCounts(fp): - count=0 - for k,v in fp.GetNonzeroElements().items(): - if v > 0: - count+=v - return count + count = 0 + for k, v in fp.GetNonzeroElements().items(): + if v > 0: + count += v + return count + def getNumNegativeCounts(fp): - count=0 - for k,v in fp.GetNonzeroElements().items(): - if v < 0: - count+=abs(v) - return count + count = 0 + for k, v in fp.GetNonzeroElements().items(): + if v < 0: + count += abs(v) + return count + + +def getNumPositiveBitCountsOfRadius0(fp, bitinfo): + count = 0 + bitsUnmappedAtoms = [] + for k in bitinfo: + if bitinfo[k][0][1] == 0: + v = fp[k] + if v > 0: + count += 1 + bitsUnmappedAtoms.append((k, v)) + return count, bitsUnmappedAtoms -def getNumPositiveBitCountsOfRadius0(fp,bitinfo): - count=0 - bitsUnmappedAtoms=[] - for k in bitinfo: - if bitinfo[k][0][1] == 0: - v = fp[k] - if v>0: - count += 1 - bitsUnmappedAtoms.append((k,v)) - return count,bitsUnmappedAtoms def getSumFps(fps): - summedFP = copy.deepcopy(fps[0]) - for fp in fps[1:]: - summedFP += fp - return summedFP + summedFP = copy.deepcopy(fps[0]) + for fp in fps[1:]: + summedFP += fp + return summedFP + def uniqueMolecules(mols): - smiles = [Chem.MolToSmiles(mol) for mol in mols] - uniqueMolecules=defaultdict(int) - for n,smi in enumerate(smiles): - uniqueMolecules[n]=smiles.index(smi) - return uniqueMolecules,smiles + smiles = [Chem.MolToSmiles(mol) for mol in mols] + uniqueMolecules = defaultdict(int) + for n, smi in enumerate(smiles): + uniqueMolecules[n] = smiles.index(smi) + return uniqueMolecules, smiles diff --git a/Contrib/SA_Score/UnitTestSAScore.py b/Contrib/SA_Score/UnitTestSAScore.py index 335c688dd..ce5c1798d 100644 --- a/Contrib/SA_Score/UnitTestSAScore.py +++ b/Contrib/SA_Score/UnitTestSAScore.py @@ -1,8 +1,10 @@ +import os.path +import unittest -from rdkit import RDConfig -from rdkit import Chem -import unittest, os.path import sascorer + +from rdkit import Chem, RDConfig + print(sascorer.__file__) @@ -21,7 +23,9 @@ class TestCase(unittest.TestCase): if __name__ == '__main__': - import sys, getopt, re + import getopt + import re + import sys doLong = 0 if len(sys.argv) > 1: args, extras = getopt.getopt(sys.argv[1:], 'l') diff --git a/Contrib/SA_Score/sascorer.py b/Contrib/SA_Score/sascorer.py index 4440dd2de..88566f73d 100644 --- a/Contrib/SA_Score/sascorer.py +++ b/Contrib/SA_Score/sascorer.py @@ -16,130 +16,128 @@ # peter ertl & greg landrum, september 2013 # +import math +import os.path as op +import pickle +from collections import defaultdict from rdkit import Chem from rdkit.Chem import rdMolDescriptors -import pickle - -import math -from collections import defaultdict - -import os.path as op _fscores = None def readFragmentScores(name='fpscores'): - import gzip - global _fscores - # generate the full path filename: - if name == "fpscores": - name = op.join(op.dirname(__file__), name) - data = pickle.load(gzip.open('%s.pkl.gz' % name)) - outDict = {} - for i in data: - for j in range(1, len(i)): - outDict[i[j]] = float(i[0]) - _fscores = outDict + import gzip + global _fscores + # generate the full path filename: + if name == "fpscores": + name = op.join(op.dirname(__file__), name) + data = pickle.load(gzip.open('%s.pkl.gz' % name)) + outDict = {} + for i in data: + for j in range(1, len(i)): + outDict[i[j]] = float(i[0]) + _fscores = outDict def numBridgeheadsAndSpiro(mol, ri=None): - nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(mol) - nBridgehead = rdMolDescriptors.CalcNumBridgeheadAtoms(mol) - return nBridgehead, nSpiro + nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(mol) + nBridgehead = rdMolDescriptors.CalcNumBridgeheadAtoms(mol) + return nBridgehead, nSpiro def calculateScore(m): - if _fscores is None: - readFragmentScores() + if _fscores is None: + readFragmentScores() - # fragment score - fp = rdMolDescriptors.GetMorganFingerprint(m, - 2) # <- 2 is the *radius* of the circular fingerprint - fps = fp.GetNonzeroElements() - score1 = 0. - nf = 0 - for bitId, v in fps.items(): - nf += v - sfp = bitId - score1 += _fscores.get(sfp, -4) * v - score1 /= nf + # fragment score + fp = rdMolDescriptors.GetMorganFingerprint(m, + 2) # <- 2 is the *radius* of the circular fingerprint + fps = fp.GetNonzeroElements() + score1 = 0. + nf = 0 + for bitId, v in fps.items(): + nf += v + sfp = bitId + score1 += _fscores.get(sfp, -4) * v + score1 /= nf - # features score - nAtoms = m.GetNumAtoms() - nChiralCenters = len(Chem.FindMolChiralCenters(m, includeUnassigned=True)) - ri = m.GetRingInfo() - nBridgeheads, nSpiro = numBridgeheadsAndSpiro(m, ri) - nMacrocycles = 0 - for x in ri.AtomRings(): - if len(x) > 8: - nMacrocycles += 1 + # features score + nAtoms = m.GetNumAtoms() + nChiralCenters = len(Chem.FindMolChiralCenters(m, includeUnassigned=True)) + ri = m.GetRingInfo() + nBridgeheads, nSpiro = numBridgeheadsAndSpiro(m, ri) + nMacrocycles = 0 + for x in ri.AtomRings(): + if len(x) > 8: + nMacrocycles += 1 - sizePenalty = nAtoms**1.005 - nAtoms - stereoPenalty = math.log10(nChiralCenters + 1) - spiroPenalty = math.log10(nSpiro + 1) - bridgePenalty = math.log10(nBridgeheads + 1) - macrocyclePenalty = 0. - # --------------------------------------- - # This differs from the paper, which defines: - # macrocyclePenalty = math.log10(nMacrocycles+1) - # This form generates better results when 2 or more macrocycles are present - if nMacrocycles > 0: - macrocyclePenalty = math.log10(2) + sizePenalty = nAtoms**1.005 - nAtoms + stereoPenalty = math.log10(nChiralCenters + 1) + spiroPenalty = math.log10(nSpiro + 1) + bridgePenalty = math.log10(nBridgeheads + 1) + macrocyclePenalty = 0. + # --------------------------------------- + # This differs from the paper, which defines: + # macrocyclePenalty = math.log10(nMacrocycles+1) + # This form generates better results when 2 or more macrocycles are present + if nMacrocycles > 0: + macrocyclePenalty = math.log10(2) - score2 = 0. - sizePenalty - stereoPenalty - spiroPenalty - bridgePenalty - macrocyclePenalty + score2 = 0. - sizePenalty - stereoPenalty - spiroPenalty - bridgePenalty - macrocyclePenalty - # correction for the fingerprint density - # not in the original publication, added in version 1.1 - # to make highly symmetrical molecules easier to synthetise - score3 = 0. - if nAtoms > len(fps): - score3 = math.log(float(nAtoms) / len(fps)) * .5 + # correction for the fingerprint density + # not in the original publication, added in version 1.1 + # to make highly symmetrical molecules easier to synthetise + score3 = 0. + if nAtoms > len(fps): + score3 = math.log(float(nAtoms) / len(fps)) * .5 - sascore = score1 + score2 + score3 + sascore = score1 + score2 + score3 - # need to transform "raw" value into scale between 1 and 10 - min = -4.0 - max = 2.5 - sascore = 11. - (sascore - min + 1) / (max - min) * 9. - # smooth the 10-end - if sascore > 8.: - sascore = 8. + math.log(sascore + 1. - 9.) - if sascore > 10.: - sascore = 10.0 - elif sascore < 1.: - sascore = 1.0 + # need to transform "raw" value into scale between 1 and 10 + min = -4.0 + max = 2.5 + sascore = 11. - (sascore - min + 1) / (max - min) * 9. + # smooth the 10-end + if sascore > 8.: + sascore = 8. + math.log(sascore + 1. - 9.) + if sascore > 10.: + sascore = 10.0 + elif sascore < 1.: + sascore = 1.0 - return sascore + return sascore def processMols(mols): - print('smiles\tName\tsa_score') - for i, m in enumerate(mols): - if m is None: - continue + print('smiles\tName\tsa_score') + for i, m in enumerate(mols): + if m is None: + continue - s = calculateScore(m) + s = calculateScore(m) - smiles = Chem.MolToSmiles(m) - print(smiles + "\t" + m.GetProp('_Name') + "\t%3f" % s) + smiles = Chem.MolToSmiles(m) + print(smiles + "\t" + m.GetProp('_Name') + "\t%3f" % s) if __name__ == '__main__': - import sys - import time + import sys + import time - t1 = time.time() - readFragmentScores("fpscores") - t2 = time.time() + t1 = time.time() + readFragmentScores("fpscores") + t2 = time.time() - suppl = Chem.SmilesMolSupplier(sys.argv[1]) - t3 = time.time() - processMols(suppl) - t4 = time.time() + suppl = Chem.SmilesMolSupplier(sys.argv[1]) + t3 = time.time() + processMols(suppl) + t4 = time.time() - print('Reading took %.2f seconds. Calculating took %.2f seconds' % ((t2 - t1), (t4 - t3)), - file=sys.stderr) + print('Reading took %.2f seconds. Calculating took %.2f seconds' % ((t2 - t1), (t4 - t3)), + file=sys.stderr) # # Copyright (c) 2013, Novartis Institutes for BioMedical Research Inc. diff --git a/Contrib/fraggle/atomcontrib.py b/Contrib/fraggle/atomcontrib.py index 26aeb7df2..c019e5fb2 100644 --- a/Contrib/fraggle/atomcontrib.py +++ b/Contrib/fraggle/atomcontrib.py @@ -31,10 +31,11 @@ # Created by Jameed Hussain, May 2013 import sys -from optparse import OptionParser -from rdkit import Chem -from rdkit import DataStructs from collections import defaultdict +from optparse import OptionParser + +from rdkit import Chem, DataStructs +from rdkit.Chem.Fraggle import FraggleSim #input format #query_substructs,query_smiles,SMILES,ID,Tversky_sim @@ -44,15 +45,15 @@ from collections import defaultdict #feed to atomcontrib function to return generalised_SMILES #use Tanimoto to compare generalised_SMILES with query smiles to give fraggle similarity -from rdkit.Chem.Fraggle import FraggleSim parser = OptionParser( - description="Program to post-process Tversky search results as part of Fraggle", - epilog="Format of input file: query_frag_smiles,query_smiles,query_id,retrieved_smi,retrieved_id,tversky_sim\t" + description="Program to post-process Tversky search results as part of Fraggle", epilog= + "Format of input file: query_frag_smiles,query_smiles,query_id,retrieved_smi,retrieved_id,tversky_sim\t" "Output: SMILES,ID,QuerySMI,QueryID,Fraggle_Similarity,RDK5_Similarity") parser.add_option( - '-c', '--cutoff', action='store', dest='cutoff', type='float', default=0.7, - help="Cutoff for fraggle similarity. Only results with similarity greater than the cutoff will be output. DEFAULT = 0.7") + '-c', '--cutoff', action='store', dest='cutoff', type='float', default=0.7, help= + "Cutoff for fraggle similarity. Only results with similarity greater than the cutoff will be output. DEFAULT = 0.7" +) parser.add_option('-p', '--pfp', action='store', dest='pfp', type='float', default=0.8, help="Cutoff for partial fp similarity. DEFAULT = 0.8") diff --git a/Contrib/fraggle/cxn_tversky.py b/Contrib/fraggle/cxn_tversky.py index 928422dc6..da8c964d9 100644 --- a/Contrib/fraggle/cxn_tversky.py +++ b/Contrib/fraggle/cxn_tversky.py @@ -32,16 +32,14 @@ # # Created by Jameed Hussain, July 2013 - -import sys -import re import array +import re +import sys -from chemaxon.util import MolHandler +from chemaxon.descriptors import (CFParameters, ChemicalFingerprint, + SimilarityCalculatorFactory) from chemaxon.struc import Molecule -from chemaxon.descriptors import ChemicalFingerprint -from chemaxon.descriptors import CFParameters -from chemaxon.descriptors import SimilarityCalculatorFactory +from chemaxon.util import MolHandler def desalt(mol): @@ -59,7 +57,8 @@ def desalt(mol): cfp = CFParameters( - " ") + " " +) cfp.setLength(1024) cfp.setBondCount(7) cfp.setBitCount(4) diff --git a/Contrib/fraggle/fraggle.py b/Contrib/fraggle/fraggle.py index a3c1536a2..e6531df12 100644 --- a/Contrib/fraggle/fraggle.py +++ b/Contrib/fraggle/fraggle.py @@ -34,10 +34,12 @@ from rdkit import Chem from rdkit.Chem.Fraggle import FraggleSim if __name__ == '__main__': - import sys, re + import re + import sys if (len(sys.argv) >= 2): print( - "Program to run the first part of Fraggle. Program splits the molecule\nready for the search\n") + "Program to run the first part of Fraggle. Program splits the molecule\nready for the search\n" + ) print("USAGE: ./fraggle.py = 2): print( - "Program that canonicalises an input SMIRKS so its in same format as MMP identification program.\n") + "Program that canonicalises an input SMIRKS so its in same format as MMP identification program.\n" + ) print("USAGE: ./cansmirks.py int(matchObj.group(b))): #if(int(matchObj.group(1)) > int(matchObj.group(2))): - smi = re.sub(r'\[\*\:' + matchObj.group(a) + r'\]', '[*:XX' + matchObj.group(b) + 'XX]', smi) - smi = re.sub(r'\[\*\:' + matchObj.group(b) + r'\]', '[*:XX' + matchObj.group(a) + 'XX]', smi) + smi = re.sub(r'\[\*\:' + matchObj.group(a) + r'\]', '[*:XX' + matchObj.group(b) + 'XX]', + smi) + smi = re.sub(r'\[\*\:' + matchObj.group(b) + r'\]', '[*:XX' + matchObj.group(a) + 'XX]', + smi) smi = re.sub('XX', '', smi) return smi @@ -404,16 +407,18 @@ if __name__ == '__main__': #parser = OptionParser() parser = OptionParser(description="Program to generate MMPs") parser.add_option( - '-s', '--symmetric', default=False, action='store_true', dest='sym', - help='Output symmetrically equivalent MMPs, i.e output both cmpd1,cmpd2, SMIRKS:A>>B and cmpd2,cmpd1, SMIRKS:B>>A') + '-s', '--symmetric', default=False, action='store_true', dest='sym', help= + 'Output symmetrically equivalent MMPs, i.e output both cmpd1,cmpd2, SMIRKS:A>>B and cmpd2,cmpd1, SMIRKS:B>>A' + ) parser.add_option( - '-m', '--maxsize', action='store', dest='maxsize', type='int', - help='Maximum size of change (in heavy atoms) allowed in matched molecular pairs identified. DEFAULT=10. \ + '-m', '--maxsize', action='store', dest='maxsize', type='int', help= + 'Maximum size of change (in heavy atoms) allowed in matched molecular pairs identified. DEFAULT=10. \ Note: This option overrides the ratio option if both are specified.') parser.add_option( - '-r', '--ratio', action='store', dest='ratio', type='float', - help='Maximum ratio of change allowed in matched molecular pairs identified. The ratio is: size of change / \ - size of cmpd (in terms of heavy atoms). DEFAULT=0.3. Note: If this option is used with the maxsize option, the maxsize option will be used.') + '-r', '--ratio', action='store', dest='ratio', type='float', help= + 'Maximum ratio of change allowed in matched molecular pairs identified. The ratio is: size of change / \ + size of cmpd (in terms of heavy atoms). DEFAULT=0.3. Note: If this option is used with the maxsize option, the maxsize option will be used.' + ) #parse the command line options (options, args) = parser.parse_args() diff --git a/Contrib/mmpa/mol_transform.py b/Contrib/mmpa/mol_transform.py index 95ec2c4d6..f551b1cab 100644 --- a/Contrib/mmpa/mol_transform.py +++ b/Contrib/mmpa/mol_transform.py @@ -30,9 +30,10 @@ # # Created by Jameed Hussain, July 2013 -import sys import re +import sys from optparse import OptionParser + from rdkit import Chem from rdkit.Chem import AllChem diff --git a/Contrib/mmpa/rfrag.py b/Contrib/mmpa/rfrag.py index 789a11613..59cb00a5c 100644 --- a/Contrib/mmpa/rfrag.py +++ b/Contrib/mmpa/rfrag.py @@ -33,8 +33,9 @@ # Modifications and optimizations by Greg Landrum, July 2015 # -import sys import re +import sys + from rdkit import Chem from rdkit.Chem import rdMMPA @@ -119,8 +120,8 @@ def delete_bonds(smi, id, mol, bonds, out): s2 = Chem.MolFromSmiles(fragments[1]) #need to cansmi again as smiles can be different - output = '%s,%s,,%s.%s' % (smi, id, Chem.MolToSmiles(s1, isomericSmiles=True), - Chem.MolToSmiles(s2, isomericSmiles=True)) + output = '%s,%s,,%s.%s' % (smi, id, Chem.MolToSmiles( + s1, isomericSmiles=True), Chem.MolToSmiles(s2, isomericSmiles=True)) if output not in out: out.add(output) @@ -197,7 +198,7 @@ def fragment_mol(smi, cid): outlines.add(output) if not outlines: # for molecules with no cuts, output the parent molecule itself - outlines.add('%s,%s,,' % (smi,cid)) + outlines.add('%s,%s,,' % (smi, cid)) return outlines @@ -207,7 +208,8 @@ if __name__ == '__main__': if (len(sys.argv) >= 2): print("Program that fragments a user input set of smiles.") print( - "The program enumerates every single,double and triple acyclic single bond cuts in a molecule.\n") + "The program enumerates every single,double and triple acyclic single bond cuts in a molecule.\n" + ) print("USAGE: ./rfrag.py <\b\b\b\b\b\b\b\b\b\b\b') @@ -413,8 +419,9 @@ class p_con: else: continue - self.cmpd_data = requests.get("https://www.ebi.ac.uk/chemblws/compounds/{}.json".format( - bioactivity['ingredient_cmpd_chemblid']), proxies=self.proxy).json() + self.cmpd_data = requests.get( + "https://www.ebi.ac.uk/chemblws/compounds/{}.json".format( + bioactivity['ingredient_cmpd_chemblid']), proxies=self.proxy).json() my_smiles = self.cmpd_data['compound']['smiles'] bioactivity['Smiles'] = my_smiles @@ -559,16 +566,17 @@ class p_con: def step_5_remove_descriptors(self): """remove list of Properties from each compound (hardcoded) which would corrupt process of creating Prediction-Models""" - sd_tags = ['activity__comment', 'alogp', 'assay__chemblid', 'assay__description', 'assay__type', - 'bioactivity__type', 'activity_comment', 'assay_chemblid', 'assay_description', - 'assay_type', 'bioactivity_type', 'cansmirdkit', 'ingredient__cmpd__chemblid', - 'ingredient_cmpd_chemblid', 'knownDrug', 'medChemFriendly', 'molecularFormula', - 'name__in__reference', 'name_in_reference', 'numRo5Violations', 'operator', - 'organism', 'parent__cmpd__chemblid', 'parent_cmpd_chemblid', 'passesRuleOfThree', - 'preferredCompoundName', 'reference', 'rotatableBonds', 'smiles', 'Smiles', - 'stdInChiKey', 'synonyms', 'target__chemblid', 'target_chemblid', - 'target__confidence', 'target__name', 'target_confidence', 'target_name', 'units', - 'value_avg', 'value_stddev'] + ['value'] + sd_tags = [ + 'activity__comment', 'alogp', 'assay__chemblid', 'assay__description', 'assay__type', + 'bioactivity__type', 'activity_comment', 'assay_chemblid', 'assay_description', 'assay_type', + 'bioactivity_type', 'cansmirdkit', 'ingredient__cmpd__chemblid', 'ingredient_cmpd_chemblid', + 'knownDrug', 'medChemFriendly', 'molecularFormula', 'name__in__reference', + 'name_in_reference', 'numRo5Violations', 'operator', 'organism', 'parent__cmpd__chemblid', + 'parent_cmpd_chemblid', 'passesRuleOfThree', 'preferredCompoundName', 'reference', + 'rotatableBonds', 'smiles', 'Smiles', 'stdInChiKey', 'synonyms', 'target__chemblid', + 'target_chemblid', 'target__confidence', 'target__name', 'target_confidence', 'target_name', + 'units', 'value_avg', 'value_stddev' + ] + ['value'] result = [] for mol in self.sd_entries: @@ -595,8 +603,10 @@ class p_con: """train models according to trafficlight using sklearn.ensamble.RandomForestClassifier self.model contains up to 10 models afterwards, use save_model_info(type) to create csv or html containing data for each model""" - title_line = ["#", "accuracy", "MCC", "precision", "recall", "f1", "auc", "kappa", "prevalence", - "bias", "pickel-File"] + title_line = [ + "#", "accuracy", "MCC", "precision", "recall", "f1", "auc", "kappa", "prevalence", "bias", + "pickel-File" + ] self.csv_text = [title_line] TL_list = [] @@ -712,16 +722,20 @@ class p_con: print(conf_matrix2) result_string_cut = [ - randomseedcounter, str(accuracy_CV) + "_" + str(accuracy_std_CV), - str(MCC_CV) + "_" + str(MCC_std_CV), str(precision_CV) + "_" + str(precision_std_CV), - str(recall_CV) + "_" + str(recall_std_CV), str(f1_CV) + "_" + str(f1_std_CV), - str(auc_CV) + "_" + str(auc_std_CV), str(kappa) + "_" + str(kappa_stdev), kappa_prevalence, - kappa_bias, "model_file.pkl" + randomseedcounter, + str(accuracy_CV) + "_" + str(accuracy_std_CV), + str(MCC_CV) + "_" + str(MCC_std_CV), + str(precision_CV) + "_" + str(precision_std_CV), + str(recall_CV) + "_" + str(recall_std_CV), + str(f1_CV) + "_" + str(f1_std_CV), + str(auc_CV) + "_" + str(auc_std_CV), + str(kappa) + "_" + str(kappa_stdev), kappa_prevalence, kappa_bias, "model_file.pkl" ] self.model.append(clf_RF) self.csv_text.append(result_string_cut) + # except Exception as e: # print "got %d models" % len(self.model) # print e diff --git a/Data/DTDs/validate.py b/Data/DTDs/validate.py index f2fb8c6ee..f8dfb17a4 100644 --- a/Data/DTDs/validate.py +++ b/Data/DTDs/validate.py @@ -1,6 +1,6 @@ +import sys import pyRXP -import sys parser = pyRXP.Parser() res = parser.parse(open(sys.argv[1], 'r').read()) diff --git a/Data/Fonts/font_dumper.py b/Data/Fonts/font_dumper.py index fa6d804dc..510e86138 100644 --- a/Data/Fonts/font_dumper.py +++ b/Data/Fonts/font_dumper.py @@ -6,30 +6,28 @@ import argparse parser = argparse.ArgumentParser(description='Dump TTF file to char array.') parser.add_argument('--ttf-file', required=True, help='Name of TTF file.') -parser.add_argument('--output-file', required=True, - help='Name of output file.') -parser.add_argument('--variable-name', default='raw_data', - help='Name of variable for char array.' - ' Default=%(default)s.') +parser.add_argument('--output-file', required=True, help='Name of output file.') +parser.add_argument('--variable-name', default='raw_data', help='Name of variable for char array.' + ' Default=%(default)s.') parser.add_argument('--string-name', default='ttf_font_data', help='Name of variable for string array.' - ' Default=%(default)s.') + ' Default=%(default)s.') args = parser.parse_args() with open(args.ttf_file, 'rb') as f: - hexdata = f.read().hex() + hexdata = f.read().hex() num = 0 with open(args.output_file, 'w') as f: - f.write('namespace {\n') - f.write(f'const unsigned char {args.variable_name}[] = {{\n ') - for i in range(0, len(hexdata), 2): - f.write(f' 0x{hexdata[i:i+2]},') - num += 1 - if num == 12: - f.write('\n ') - num = 0 - f.write('\n};\n') - f.write('} // namespace\n') - f.write(f'const std::string {args.string_name}((const char *){args.variable_name},' - f' (const char *){args.variable_name} + sizeof({args.variable_name}));') \ No newline at end of file + f.write('namespace {\n') + f.write(f'const unsigned char {args.variable_name}[] = {{\n ') + for i in range(0, len(hexdata), 2): + f.write(f' 0x{hexdata[i:i+2]},') + num += 1 + if num == 12: + f.write('\n ') + num = 0 + f.write('\n};\n') + f.write('} // namespace\n') + f.write(f'const std::string {args.string_name}((const char *){args.variable_name},' + f' (const char *){args.variable_name} + sizeof({args.variable_name}));') diff --git a/Data/Pains/test_data/run_tests.py b/Data/Pains/test_data/run_tests.py index e676a002b..a765e8014 100644 --- a/Data/Pains/test_data/run_tests.py +++ b/Data/Pains/test_data/run_tests.py @@ -6,8 +6,10 @@ # of the RDKit source tree. # +import csv +import os +import unittest -import unittest, os, csv from rdkit import Chem, RDConfig diff --git a/Data/SmartsLib/tests/bench2.py b/Data/SmartsLib/tests/bench2.py index 104a2c068..a09d13ce9 100644 --- a/Data/SmartsLib/tests/bench2.py +++ b/Data/SmartsLib/tests/bench2.py @@ -1,9 +1,11 @@ - -from rdkit import Chem -from rdkit import RDConfig -import time, sys, gzip +import gzip import pickle +import sys +import time + +from rdkit import Chem, RDConfig from rdkit.RDLogger import logger + logger = logger() logger.info('reading smarts') diff --git a/Docs/Book/conf.py b/Docs/Book/conf.py index 105d28652..b9fc622b8 100644 --- a/Docs/Book/conf.py +++ b/Docs/Book/conf.py @@ -11,8 +11,8 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys import os +import sys # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -26,10 +26,7 @@ sys.path.insert(0, os.path.abspath('exts')) # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = [ - 'sphinx.ext.autodoc', 'sphinx.ext.doctest', - 'myst_parser' -] # , 'extapi'] +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'myst_parser'] # , 'extapi'] #autosummary_generate = True doctest_test_doctest_blocks = "" diff --git a/Docs/Book/data/test_multi_colours.py b/Docs/Book/data/test_multi_colours.py index fb670397c..0ca0524d6 100644 --- a/Docs/Book/data/test_multi_colours.py +++ b/Docs/Book/data/test_multi_colours.py @@ -2,78 +2,73 @@ from json import dumps -from rdkit import Chem -from rdkit import rdBase -from rdkit.Chem import AllChem -from rdkit.Chem import Draw -from rdkit.Chem import rdDepictor +from rdkit import Chem, rdBase +from rdkit.Chem import AllChem, Draw, rdDepictor from rdkit.Chem.Draw import rdMolDraw2D -COLS = [(1.0, 0.0, 0.0), (0.0, 1.0, 0.0), - (0.0, 0.0, 1.0), (1.0, 0.55, 1.0)] +COLS = [(1.0, 0.0, 0.0), (0.0, 1.0, 0.0), (0.0, 0.0, 1.0), (1.0, 0.55, 1.0)] def get_hit_atoms_and_bonds(mol, smt): - alist = [] - blist = [] - q = Chem.MolFromSmarts(smt) - for match in mol.GetSubstructMatches(q): - alist.extend(match) + alist = [] + blist = [] + q = Chem.MolFromSmarts(smt) + for match in mol.GetSubstructMatches(q): + alist.extend(match) - for ha1 in alist: - for ha2 in alist: - if ha1 > ha2: - b = mol.GetBondBetweenAtoms(ha1, ha2) - if b: - blist.append(b.GetIdx()) + for ha1 in alist: + for ha2 in alist: + if ha1 > ha2: + b = mol.GetBondBetweenAtoms(ha1, ha2) + if b: + blist.append(b.GetIdx()) - return alist, blist + return alist, blist def add_colours_to_map(els, cols, col_num): - for el in els: - if el not in cols: - cols[el] = [] - if COLS[col_num] not in cols[el]: - cols[el].append(COLS[col_num]) + for el in els: + if el not in cols: + cols[el] = [] + if COLS[col_num] not in cols[el]: + cols[el].append(COLS[col_num]) def do_a_picture(smi, smarts, filename, label, fmt='svg'): - with rdDepictor.UsingCoordGen(True): - mol = Chem.MolFromSmiles(smi) - mol = Draw.PrepareMolForDrawing(mol) + with rdDepictor.UsingCoordGen(True): + mol = Chem.MolFromSmiles(smi) + mol = Draw.PrepareMolForDrawing(mol) - acols = {} - bcols = {} - h_rads = {} - h_lw_mult = {} + acols = {} + bcols = {} + h_rads = {} + h_lw_mult = {} - for i, smt in enumerate(smarts): - alist, blist = get_hit_atoms_and_bonds(mol, smt) - col = i % 4 - add_colours_to_map(alist, acols, col) - add_colours_to_map(blist, bcols, col) + for i, smt in enumerate(smarts): + alist, blist = get_hit_atoms_and_bonds(mol, smt) + col = i % 4 + add_colours_to_map(alist, acols, col) + add_colours_to_map(blist, bcols, col) - if fmt == 'svg': - d = rdMolDraw2D.MolDraw2DSVG(300, 300) - mode = 'w' - elif fmt == 'png': - d = rdMolDraw2D.MolDraw2DCairo(300, 300) - mode = 'wb' - else: - print('unknown format {}'.format(fmt)) - return + if fmt == 'svg': + d = rdMolDraw2D.MolDraw2DSVG(300, 300) + mode = 'w' + elif fmt == 'png': + d = rdMolDraw2D.MolDraw2DCairo(300, 300) + mode = 'wb' + else: + print('unknown format {}'.format(fmt)) + return - d.drawOptions().fillHighlights = False - d.DrawMoleculeWithHighlights(mol, label, acols, bcols, h_rads, h_lw_mult, -1) - d.FinishDrawing() + d.drawOptions().fillHighlights = False + d.DrawMoleculeWithHighlights(mol, label, acols, bcols, h_rads, h_lw_mult, -1) + d.FinishDrawing() - with open(filename, mode) as f: - f.write(d.GetDrawingText()) + with open(filename, mode) as f: + f.write(d.GetDrawingText()) smi = 'CO[C@@H](O)C1=C(O[C@H](F)Cl)C(C#N)=C1ONNC[NH3+]' smarts = ['CONN', 'N#CC~CO', 'C=CON', 'CONNCN'] do_a_picture(smi, smarts, 'atom_highlights_3.png', '', fmt='png') - diff --git a/Docs/Book/exts/extapi.py b/Docs/Book/exts/extapi.py index e61372688..e92413712 100644 --- a/Docs/Book/exts/extapi.py +++ b/Docs/Book/exts/extapi.py @@ -20,6 +20,7 @@ # import os.path + from docutils import nodes diff --git a/Docs/Book_jp/conf.py b/Docs/Book_jp/conf.py index e87be6db2..976a2d24e 100644 --- a/Docs/Book_jp/conf.py +++ b/Docs/Book_jp/conf.py @@ -21,7 +21,6 @@ # import sys # sys.path.insert(0, os.path.abspath('.')) - # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for @@ -159,45 +158,40 @@ pygments_style = 'sphinx' # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False - # -- Options for LaTeX output --------------------------------------------- latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # - # 'papersize': 'letterpaper', + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). - # - # 'pointsize': '10pt', + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. - # - # 'preamble': '', + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', - # Latex figure (float) alignment - # - # 'figure_align': 'htbp', + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'RDKit_unofficial_translation_JP.tex', 'RDKit\\_unofficial\\_translation\\_JP Documentation', - 'anonymous', 'manual'), + (master_doc, 'RDKit_unofficial_translation_JP.tex', + 'RDKit\\_unofficial\\_translation\\_JP Documentation', 'anonymous', 'manual'), ] - # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'rdkit_unofficial_translation_jp', 'RDKit_unofficial_translation_JP Documentation', - [author], 1) -] - +man_pages = [(master_doc, 'rdkit_unofficial_translation_jp', + 'RDKit_unofficial_translation_JP Documentation', [author], 1)] # -- Options for Texinfo output ------------------------------------------- @@ -205,7 +199,6 @@ man_pages = [ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'RDKit_unofficial_translation_JP', 'RDKit_unofficial_translation_JP Documentation', - author, 'RDKit_unofficial_translation_JP', 'One line description of project.', - 'Miscellaneous'), + (master_doc, 'RDKit_unofficial_translation_JP', 'RDKit_unofficial_translation_JP Documentation', + author, 'RDKit_unofficial_translation_JP', 'One line description of project.', 'Miscellaneous'), ] diff --git a/External/AvalonTools/Wrap/testAvalonTools.py b/External/AvalonTools/Wrap/testAvalonTools.py index 908814748..32c7c7a3a 100755 --- a/External/AvalonTools/Wrap/testAvalonTools.py +++ b/External/AvalonTools/Wrap/testAvalonTools.py @@ -3,10 +3,11 @@ # Created by Greg Landrum, July 2008 # -from rdkit import RDConfig -import os, sys +import os +import sys import unittest -from rdkit import DataStructs, Chem + +from rdkit import Chem, DataStructs, RDConfig from rdkit.Avalon import pyAvalonTools struchk_conf_path = os.path.join(RDConfig.RDDataDir, 'struchk', '') @@ -179,6 +180,7 @@ CC[C@H](C)[C@@H](C(=O)N[C@@H](CC(C)C)C(=O)O)NC(=O)[C@H](Cc1ccccc1)CC(=O)NO $$$$ """ + def feq(v1, v2, tol=1e-4): return abs(v1 - v2) < tol @@ -196,11 +198,13 @@ class TestCase(unittest.TestCase): self.assertTrue(smi == 'c1ccncc1') def test2(self): - tgts = ['CC1=CC(=O)C=CC1=O', 'c2ccc1SC(=Nc1c2)SSC4=Nc3ccccc3S4', - '[O-][N+](=O)c1cc(Cl)c(O)c(c1)[N+]([O-])=O', 'N=C1NC=C(S1)[N+]([O-])=O', - 'Nc3ccc2C(=O)c1ccccc1C(=O)c2c3', 'OC(=O)c1ccccc1C3=C2C=CC(=O)C(Br)=C2Oc4c3ccc(O)c4Br', - 'CN(C)C2C(=O)c1ccccc1C(=O)C=2Cl', 'Cc3ccc2C(=O)c1ccccc1C(=O)c2c3[N+]([O-])=O', - r'C/C(=N\O)/C(/C)=N/O', 'c1ccc(cc1)P(c2ccccc2)c3ccccc3'] + tgts = [ + 'CC1=CC(=O)C=CC1=O', 'c2ccc1SC(=Nc1c2)SSC4=Nc3ccccc3S4', + '[O-][N+](=O)c1cc(Cl)c(O)c(c1)[N+]([O-])=O', 'N=C1NC=C(S1)[N+]([O-])=O', + 'Nc3ccc2C(=O)c1ccccc1C(=O)c2c3', 'OC(=O)c1ccccc1C3=C2C=CC(=O)C(Br)=C2Oc4c3ccc(O)c4Br', + 'CN(C)C2C(=O)c1ccccc1C(=O)C=2Cl', 'Cc3ccc2C(=O)c1ccccc1C(=O)c2c3[N+]([O-])=O', + r'C/C(=N\O)/C(/C)=N/O', 'c1ccc(cc1)P(c2ccccc2)c3ccccc3' + ] with open(os.path.join(RDConfig.RDDataDir, 'NCI', 'first_200.props.sdf'), 'r') as f: d = f.read() mbs = d.split('$$$$\n')[:10] @@ -285,7 +289,6 @@ class TestCase(unittest.TestCase): s2 = Chem.MolToSmiles(m2) self.assertEqual(s1, s2) - def testRDK151(self): smi = "C[C@H](F)Cl" m = Chem.MolFromSmiles(smi) @@ -323,18 +326,17 @@ class TestCase(unittest.TestCase): r = pyAvalonTools.InitializeCheckMol(STRUCHK_INIT_IN_MEMORY_LOGGING) try: (err, fixed_mol) = pyAvalonTools.CheckMoleculeString(atom_clash, False) - log = pyAvalonTools.GetCheckMolLog() + log = pyAvalonTools.GetCheckMolLog() self.assertTrue("of average bond length from bond" in log) # make sure that the log is cleared for the next molecule (err, fixed_mol) = pyAvalonTools.CheckMoleculeString("c1ccccc1", True) - log = pyAvalonTools.GetCheckMolLog() + log = pyAvalonTools.GetCheckMolLog() self.assertFalse(log) finally: pyAvalonTools.CloseCheckMolFiles() - # def testIsotopeBug(self): # mb="""D isotope problem.mol # Mrv0541 08141217122D diff --git a/External/AvalonTools/Wrap/test_list.py b/External/AvalonTools/Wrap/test_list.py index 2d94f0bb9..521e47b5a 100755 --- a/External/AvalonTools/Wrap/test_list.py +++ b/External/AvalonTools/Wrap/test_list.py @@ -1,8 +1,11 @@ -tests = [("python", "testAvalonTools.py", {}), ] +tests = [ + ("python", "testAvalonTools.py", {}), +] longTests = [] if __name__ == '__main__': import sys + from rdkit import TestRunner failed, tests = TestRunner.RunScript('test_list.py', 0, 1) sys.exit(len(failed)) diff --git a/External/AvalonTools/test_list.py b/External/AvalonTools/test_list.py index 299f2d3e0..5790140e0 100755 --- a/External/AvalonTools/test_list.py +++ b/External/AvalonTools/test_list.py @@ -1,11 +1,14 @@ tests = [ ("testExecs/test1.exe", "", {}), - ("python", "test_list.py", {"dir": "Wrap"}), + ("python", "test_list.py", { + "dir": "Wrap" + }), ] longTests = [] if __name__ == '__main__': import sys + import TestRunner failed, tests = TestRunner.RunScript('test_list.py', 0, 1) sys.exit(len(failed)) diff --git a/External/CoordGen/Wrap/testCoordGen.py b/External/CoordGen/Wrap/testCoordGen.py index fb2b39eb3..1d7259d82 100644 --- a/External/CoordGen/Wrap/testCoordGen.py +++ b/External/CoordGen/Wrap/testCoordGen.py @@ -6,63 +6,71 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. - +import copy +import os +import sys import unittest -import os,sys, copy -from rdkit.Chem import rdCoordGen, rdMolAlign from rdkit import Chem, Geometry +from rdkit.Chem import rdCoordGen, rdMolAlign -def compareConfs(c1,c2,match, tol=1e-2, alignIt=False): - for i,j in enumerate(match): + +def compareConfs(c1, c2, match, tol=1e-2, alignIt=False): + for i, j in enumerate(match): pi = c2.GetAtomPosition(i) pj = c1.GetAtomPosition(j) - if (pj-pi).Length()>=tol: + if (pj - pi).Length() >= tol: return False return True -class TestCase(unittest.TestCase) : + +class TestCase(unittest.TestCase): + def test_basics(self): mol = Chem.MolFromSmiles('CCOC') - self.assertEqual(mol.GetNumConformers(),0) + self.assertEqual(mol.GetNumConformers(), 0) rdCoordGen.AddCoords(mol) - self.assertEqual(mol.GetNumConformers(),1) + self.assertEqual(mol.GetNumConformers(), 1) rdCoordGen.AddCoords(mol) - self.assertEqual(mol.GetNumConformers(),1) + self.assertEqual(mol.GetNumConformers(), 1) rwmol = Chem.RWMol(mol) rdCoordGen.AddCoords(rwmol) - self.assertEqual(rwmol.GetNumConformers(),1) + self.assertEqual(rwmol.GetNumConformers(), 1) + def test_template1(self): template = Chem.MolFromSmiles('C1OCOCOCOCCCNCCC1') - template.SetProp("_Name",'template') + template.SetProp("_Name", 'template') mol = Chem.MolFromSmiles('C1OCOCOCOCCCNC(OC(=O)C2CC2)CC1') - mol.SetProp("_Name","mol") + mol.SetProp("_Name", "mol") rdCoordGen.AddCoords(template) rdCoordGen.AddCoords(mol) - self.assertFalse(compareConfs(mol.GetConformer(),template.GetConformer(),mol.GetSubstructMatch(template))) + self.assertFalse( + compareConfs(mol.GetConformer(), template.GetConformer(), mol.GetSubstructMatch(template))) match = mol.GetSubstructMatch(template) mapd = dict() - for i,aid in enumerate(match): + for i, aid in enumerate(match): p = template.GetConformer().GetAtomPosition(i) - mapd[aid] = Geometry.Point2D(p.x,p.y) + mapd[aid] = Geometry.Point2D(p.x, p.y) ps = rdCoordGen.CoordGenParams() ps.SetCoordMap(mapd) ps.dbg_useFixed = True - rdCoordGen.AddCoords(mol,ps) - self.assertTrue(compareConfs(mol.GetConformer(),template.GetConformer(),mol.GetSubstructMatch(template))) + rdCoordGen.AddCoords(mol, ps) + self.assertTrue( + compareConfs(mol.GetConformer(), template.GetConformer(), mol.GetSubstructMatch(template))) def test_template2(self): # the easier way... template = Chem.MolFromSmiles('C1OCOCOCOCCCNCCC1') - template.SetProp("_Name",'template') + template.SetProp("_Name", 'template') mol = Chem.MolFromSmiles('C1OCOCOCOCCCNC(OC(=O)C2CC2)CC1') - mol.SetProp("_Name","mol") + mol.SetProp("_Name", "mol") rdCoordGen.AddCoords(template) ps = rdCoordGen.CoordGenParams() ps.SetTemplateMol(template) ps.dbg_useFixed = True - rdCoordGen.AddCoords(mol,ps) - self.assertTrue(compareConfs(mol.GetConformer(),template.GetConformer(),mol.GetSubstructMatch(template))) + rdCoordGen.AddCoords(mol, ps) + self.assertTrue( + compareConfs(mol.GetConformer(), template.GetConformer(), mol.GetSubstructMatch(template))) def test_template3(self): # the easier way, test lifetime... @@ -74,8 +82,10 @@ class TestCase(unittest.TestCase) : ps.SetTemplateMol(template2) template2 = None ps.dbg_useFixed = True - rdCoordGen.AddCoords(mol,ps) - self.assertTrue(compareConfs(mol.GetConformer(),template.GetConformer(),mol.GetSubstructMatch(template))) + rdCoordGen.AddCoords(mol, ps) + self.assertTrue( + compareConfs(mol.GetConformer(), template.GetConformer(), mol.GetSubstructMatch(template))) + if __name__ == '__main__': unittest.main() diff --git a/External/FreeSASA/Wrap/testFreeSASA.py b/External/FreeSASA/Wrap/testFreeSASA.py index 8b9c9036f..a4044b335 100644 --- a/External/FreeSASA/Wrap/testFreeSASA.py +++ b/External/FreeSASA/Wrap/testFreeSASA.py @@ -29,364 +29,265 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # - +import copy +import os +import sys import unittest -import os,sys, copy -from rdkit.Chem import rdFreeSASA from rdkit import Chem +from rdkit.Chem import rdFreeSASA -expected = [ - [0, 1, "Polar", 1.64], [1, 0, "Apolar", 1.88], - [2, 0, "Apolar", 1.61], [3, 1, "Polar", 1.42], - [4, 0, "Apolar", 1.88], [5, 0, "Apolar", 1.88], - [6, 1, "Polar", 1.77], [7, 0, "Apolar", 1.88], - [8, 1, "Polar", 1.64], [9, 0, "Apolar", 1.88], - [10, 0, "Apolar", 1.61], [11, 1, "Polar", 1.42], - [12, 0, "Apolar", 1.88], [13, 0, "Apolar", 1.88], - [14, 0, "Apolar", 1.61], [15, 1, "Polar", 1.42], - [16, 1, "Polar", 1.64], [17, 1, "Polar", 1.64], - [18, 0, "Apolar", 1.88], [19, 0, "Apolar", 1.61], - [20, 1, "Polar", 1.42], [21, 0, "Apolar", 1.88], - [22, 0, "Apolar", 1.88], [23, 0, "Apolar", 1.88], - [24, 0, "Apolar", 1.88], [25, 1, "Polar", 1.64], - [26, 0, "Apolar", 1.88], [27, 0, "Apolar", 1.61], - [28, 1, "Polar", 1.42], [29, 0, "Apolar", 1.88], - [30, 0, "Apolar", 1.61], [31, 0, "Apolar", 1.76], - [32, 0, "Apolar", 1.76], [33, 0, "Apolar", 1.76], - [34, 0, "Apolar", 1.76], [35, 0, "Apolar", 1.76], - [36, 1, "Polar", 1.64], [37, 0, "Apolar", 1.88], - [38, 0, "Apolar", 1.61], [39, 1, "Polar", 1.42], - [40, 0, "Apolar", 1.88], [41, 0, "Apolar", 1.88], - [42, 0, "Apolar", 1.88], [43, 1, "Polar", 1.64], - [44, 0, "Apolar", 1.88], [45, 0, "Apolar", 1.61], - [46, 1, "Polar", 1.42], [47, 0, "Apolar", 1.88], - [48, 0, "Apolar", 1.88], [49, 0, "Apolar", 1.88], - [50, 0, "Apolar", 1.88], [51, 1, "Polar", 1.64], - [52, 1, "Polar", 1.64], [53, 0, "Apolar", 1.88], - [54, 0, "Apolar", 1.61], [55, 1, "Polar", 1.42], - [56, 0, "Apolar", 1.88], [57, 1, "Polar", 1.46], - [58, 0, "Apolar", 1.88], [59, 1, "Polar", 1.64], - [60, 0, "Apolar", 1.88], [61, 0, "Apolar", 1.61], - [62, 1, "Polar", 1.42], [63, 0, "Apolar", 1.88], - [64, 0, "Apolar", 1.88], [65, 0, "Apolar", 1.88], - [66, 0, "Apolar", 1.88], [67, 1, "Polar", 1.64], - [68, 0, "Apolar", 1.88], [69, 0, "Apolar", 1.61], - [70, 1, "Polar", 1.42], [71, 0, "Apolar", 1.88], - [72, 1, "Polar", 1.46], [73, 0, "Apolar", 1.88], - [74, 1, "Polar", 1.64], [75, 0, "Apolar", 1.88], - [76, 0, "Apolar", 1.61], [77, 1, "Polar", 1.42], - [78, 1, "Polar", 1.64], [79, 0, "Apolar", 1.88], - [80, 0, "Apolar", 1.61], [81, 1, "Polar", 1.42], - [82, 0, "Apolar", 1.88], [83, 0, "Apolar", 1.88], - [84, 0, "Apolar", 1.88], [85, 0, "Apolar", 1.88], - [86, 1, "Polar", 1.64], [87, 1, "Polar", 1.64], - [88, 0, "Apolar", 1.88], [89, 0, "Apolar", 1.61], - [90, 1, "Polar", 1.42], [91, 0, "Apolar", 1.88], - [92, 1, "Polar", 1.46], [93, 0, "Apolar", 1.88], - [94, 1, "Polar", 1.64], [95, 0, "Apolar", 1.88], - [96, 0, "Apolar", 1.61], [97, 1, "Polar", 1.42], - [98, 0, "Apolar", 1.88], [99, 0, "Apolar", 1.88], - [100, 0, "Apolar", 1.88], [101, 0, "Apolar", 1.88], - [102, 1, "Polar", 1.64], [103, 0, "Apolar", 1.88], - [104, 0, "Apolar", 1.61], [105, 1, "Polar", 1.42], - [106, 0, "Apolar", 1.88], [107, 1, "Polar", 1.46], - [108, 0, "Apolar", 1.88], [109, 1, "Polar", 1.64], - [110, 0, "Apolar", 1.88], [111, 0, "Apolar", 1.61], - [112, 1, "Polar", 1.42], [113, 0, "Apolar", 1.88], - [114, 0, "Apolar", 1.88], [115, 0, "Apolar", 1.88], - [116, 0, "Apolar", 1.88], [117, 1, "Polar", 1.64], - [118, 0, "Apolar", 1.88], [119, 0, "Apolar", 1.61], - [120, 1, "Polar", 1.42], [121, 0, "Apolar", 1.88], - [122, 0, "Apolar", 1.88], [123, 0, "Apolar", 1.61], - [124, 1, "Polar", 1.42], [125, 1, "Polar", 1.46], - [126, 1, "Polar", 1.64], [127, 0, "Apolar", 1.88], - [128, 0, "Apolar", 1.61], [129, 1, "Polar", 1.42], - [130, 0, "Apolar", 1.88], [131, 0, "Apolar", 1.88], - [132, 0, "Apolar", 1.88], [133, 1, "Polar", 1.64], - [134, 0, "Apolar", 1.88], [135, 0, "Apolar", 1.61], - [136, 1, "Polar", 1.42], [137, 0, "Apolar", 1.88], - [138, 0, "Apolar", 1.88], [139, 0, "Apolar", 1.61], - [140, 1, "Polar", 1.42], [141, 1, "Polar", 1.46], - [142, 1, "Polar", 1.64], [143, 0, "Apolar", 1.88], - [144, 0, "Apolar", 1.61], [145, 1, "Polar", 1.42], - [146, 0, "Apolar", 1.88], [147, 0, "Apolar", 1.88], - [148, 0, "Apolar", 1.88], [149, 1, "Polar", 1.64], - [150, 0, "Apolar", 1.88], [151, 0, "Apolar", 1.61], - [152, 1, "Polar", 1.42], [153, 0, "Apolar", 1.88], - [154, 1, "Polar", 1.46], [155, 1, "Polar", 1.64], - [156, 0, "Apolar", 1.88], [157, 0, "Apolar", 1.61], - [158, 1, "Polar", 1.42], [159, 0, "Apolar", 1.88], - [160, 0, "Apolar", 1.61], [161, 1, "Polar", 1.42], - [162, 1, "Polar", 1.46], [163, 1, "Polar", 1.64], - [164, 0, "Apolar", 1.88], [165, 0, "Apolar", 1.61], - [166, 1, "Polar", 1.42], [167, 0, "Apolar", 1.88], - [168, 1, "Polar", 1.46], [169, 0, "Apolar", 1.88], - [170, 1, "Polar", 1.64], [171, 0, "Apolar", 1.88], - [172, 0, "Apolar", 1.61], [173, 1, "Polar", 1.42], - [174, 0, "Apolar", 1.88], [175, 0, "Apolar", 1.88], - [176, 0, "Apolar", 1.88], [177, 0, "Apolar", 1.88], - [178, 1, "Polar", 1.64], [179, 0, "Apolar", 1.88], - [180, 0, "Apolar", 1.61], [181, 1, "Polar", 1.42], - [182, 0, "Apolar", 1.88], [183, 0, "Apolar", 1.88], - [184, 0, "Apolar", 1.61], [185, 1, "Polar", 1.42], - [186, 1, "Polar", 1.46], [187, 1, "Polar", 1.64], - [188, 0, "Apolar", 1.88], [189, 0, "Apolar", 1.61], - [190, 1, "Polar", 1.42], [191, 0, "Apolar", 1.88], - [192, 0, "Apolar", 1.61], [193, 1, "Polar", 1.42], - [194, 1, "Polar", 1.64], [195, 1, "Polar", 1.64], - [196, 0, "Apolar", 1.88], [197, 0, "Apolar", 1.61], - [198, 1, "Polar", 1.42], [199, 0, "Apolar", 1.88], - [200, 0, "Apolar", 1.88], [201, 0, "Apolar", 1.88], - [202, 1, "Polar", 1.64], [203, 0, "Apolar", 1.88], - [204, 0, "Apolar", 1.61], [205, 1, "Polar", 1.42], - [206, 0, "Apolar", 1.88], [207, 0, "Apolar", 1.88], - [208, 0, "Apolar", 1.88], [209, 0, "Apolar", 1.88], - [210, 1, "Polar", 1.64], [211, 1, "Polar", 1.64], - [212, 0, "Apolar", 1.88], [213, 0, "Apolar", 1.61], - [214, 1, "Polar", 1.42], [215, 0, "Apolar", 1.88], - [216, 1, "Polar", 1.64], [217, 0, "Apolar", 1.88], - [218, 0, "Apolar", 1.61], [219, 1, "Polar", 1.42], - [220, 0, "Apolar", 1.88], [221, 0, "Apolar", 1.88], - [222, 0, "Apolar", 1.88], [223, 0, "Apolar", 1.88], - [224, 1, "Polar", 1.64], [225, 1, "Polar", 1.64], - [226, 0, "Apolar", 1.88], [227, 0, "Apolar", 1.61], - [228, 1, "Polar", 1.42], [229, 0, "Apolar", 1.88], - [230, 0, "Apolar", 1.88], [231, 0, "Apolar", 1.88], - [232, 0, "Apolar", 1.88], [233, 1, "Polar", 1.64], - [234, 0, "Apolar", 1.88], [235, 0, "Apolar", 1.61], - [236, 1, "Polar", 1.42], [237, 0, "Apolar", 1.88], - [238, 0, "Apolar", 1.88], [239, 0, "Apolar", 1.61], - [240, 1, "Polar", 1.42], [241, 1, "Polar", 1.64], - [242, 1, "Polar", 1.64], [243, 0, "Apolar", 1.88], - [244, 0, "Apolar", 1.61], [245, 1, "Polar", 1.42], - [246, 0, "Apolar", 1.88], [247, 0, "Apolar", 1.61], - [248, 1, "Polar", 1.42], [249, 1, "Polar", 1.46], - [250, 1, "Polar", 1.64], [251, 0, "Apolar", 1.88], - [252, 0, "Apolar", 1.61], [253, 1, "Polar", 1.42], - [254, 0, "Apolar", 1.88], [255, 0, "Apolar", 1.88], - [256, 0, "Apolar", 1.88], [257, 0, "Apolar", 1.88], - [258, 1, "Polar", 1.64], [259, 1, "Polar", 1.64], - [260, 0, "Apolar", 1.88], [261, 0, "Apolar", 1.61], - [262, 1, "Polar", 1.42], [263, 0, "Apolar", 1.88], - [264, 0, "Apolar", 1.88], [265, 0, "Apolar", 1.61], - [266, 1, "Polar", 1.42], [267, 1, "Polar", 1.46], - [268, 1, "Polar", 1.64], [269, 0, "Apolar", 1.88], - [270, 0, "Apolar", 1.61], [271, 1, "Polar", 1.42], - [272, 1, "Polar", 1.64], [273, 0, "Apolar", 1.88], - [274, 0, "Apolar", 1.61], [275, 1, "Polar", 1.42], - [276, 0, "Apolar", 1.88], [277, 0, "Apolar", 1.88], - [278, 0, "Apolar", 1.88], [279, 0, "Apolar", 1.88], - [280, 1, "Polar", 1.64], [281, 0, "Apolar", 1.88], - [282, 0, "Apolar", 1.61], [283, 1, "Polar", 1.42], - [284, 0, "Apolar", 1.88], [285, 0, "Apolar", 1.88], - [286, 0, "Apolar", 1.88], [287, 1, "Polar", 1.64], - [288, 0, "Apolar", 1.88], [289, 0, "Apolar", 1.61], - [290, 1, "Polar", 1.42], [291, 0, "Apolar", 1.88], - [292, 0, "Apolar", 1.88], [293, 0, "Apolar", 1.88], - [294, 1, "Polar", 1.64], [295, 0, "Apolar", 1.88], - [296, 0, "Apolar", 1.61], [297, 1, "Polar", 1.42], - [298, 0, "Apolar", 1.88], [299, 0, "Apolar", 1.61], - [300, 1, "Polar", 1.42], [301, 1, "Polar", 1.46], - [302, 1, "Polar", 1.64], [303, 0, "Apolar", 1.88], - [304, 0, "Apolar", 1.61], [305, 1, "Polar", 1.42], - [306, 0, "Apolar", 1.88], [307, 0, "Apolar", 1.88], - [308, 0, "Apolar", 1.61], [309, 1, "Polar", 1.42], - [310, 1, "Polar", 1.64], [311, 1, "Polar", 1.64], - [312, 0, "Apolar", 1.88], [313, 0, "Apolar", 1.61], - [314, 1, "Polar", 1.42], [315, 0, "Apolar", 1.88], - [316, 0, "Apolar", 1.88], [317, 0, "Apolar", 1.61], - [318, 1, "Polar", 1.42], [319, 1, "Polar", 1.64], - [320, 1, "Polar", 1.64], [321, 0, "Apolar", 1.88], - [322, 0, "Apolar", 1.61], [323, 1, "Polar", 1.42], - [324, 0, "Apolar", 1.88], [325, 0, "Apolar", 1.88], - [326, 0, "Apolar", 1.88], [327, 1, "Polar", 1.64], - [328, 0, "Apolar", 1.61], [329, 1, "Polar", 1.64], - [330, 1, "Polar", 1.64], [331, 1, "Polar", 1.64], - [332, 0, "Apolar", 1.88], [333, 0, "Apolar", 1.61], - [334, 1, "Polar", 1.42], [335, 0, "Apolar", 1.88], - [336, 0, "Apolar", 1.88], [337, 0, "Apolar", 1.88], - [338, 0, "Apolar", 1.88], [339, 1, "Polar", 1.64], - [340, 0, "Apolar", 1.88], [341, 0, "Apolar", 1.61], - [342, 1, "Polar", 1.42], [343, 0, "Apolar", 1.88], - [344, 0, "Apolar", 1.88], [345, 0, "Apolar", 1.88], - [346, 0, "Apolar", 1.88], [347, 1, "Polar", 1.64], - [348, 0, "Apolar", 1.88], [349, 0, "Apolar", 1.61], - [350, 1, "Polar", 1.42], [351, 0, "Apolar", 1.88], - [352, 0, "Apolar", 1.61], [353, 0, "Apolar", 1.76], - [354, 0, "Apolar", 1.76], [355, 0, "Apolar", 1.76], - [356, 0, "Apolar", 1.76], [357, 0, "Apolar", 1.76], - [358, 1, "Polar", 1.64], [359, 0, "Apolar", 1.88], - [360, 0, "Apolar", 1.61], [361, 1, "Polar", 1.42], - [362, 0, "Apolar", 1.88], [363, 1, "Polar", 1.64], - [364, 0, "Apolar", 1.88], [365, 0, "Apolar", 1.61], - [366, 1, "Polar", 1.42], [367, 1, "Polar", 1.64], - [368, 0, "Apolar", 1.88], [369, 0, "Apolar", 1.61], - [370, 1, "Polar", 1.42], [371, 0, "Apolar", 1.88], - [372, 0, "Apolar", 1.88], [373, 0, "Apolar", 1.88], - [374, 0, "Apolar", 1.88], [375, 1, "Polar", 1.64], - [376, 1, "Polar", 1.64], [377, 0, "Apolar", 1.88], - [378, 0, "Apolar", 1.61], [379, 1, "Polar", 1.42], - [380, 0, "Apolar", 1.88], [381, 0, "Apolar", 1.88], - [382, 0, "Apolar", 1.61], [383, 1, "Polar", 1.42], - [384, 1, "Polar", 1.64], [385, 1, "Polar", 1.64], - [386, 0, "Apolar", 1.88], [387, 0, "Apolar", 1.61], - [388, 1, "Polar", 1.42], [389, 0, "Apolar", 1.88], - [390, 0, "Apolar", 1.88], [391, 0, "Apolar", 1.88], - [392, 0, "Apolar", 1.88], [393, 1, "Polar", 1.64], - [394, 0, "Apolar", 1.88], [395, 0, "Apolar", 1.61], - [396, 1, "Polar", 1.42], [397, 0, "Apolar", 1.88], - [398, 0, "Apolar", 1.88], [399, 0, "Apolar", 1.61], - [400, 1, "Polar", 1.42], [401, 1, "Polar", 1.46], - [402, 1, "Polar", 1.64], [403, 0, "Apolar", 1.88], - [404, 0, "Apolar", 1.61], [405, 1, "Polar", 1.42], - [406, 0, "Apolar", 1.88], [407, 0, "Apolar", 1.61], - [408, 1, "Polar", 1.42], [409, 1, "Polar", 1.46], - [410, 1, "Polar", 1.64], [411, 0, "Apolar", 1.88], - [412, 0, "Apolar", 1.61], [413, 1, "Polar", 1.42], - [414, 1, "Polar", 1.64], [415, 0, "Apolar", 1.88], - [416, 0, "Apolar", 1.61], [417, 1, "Polar", 1.42], - [418, 0, "Apolar", 1.88], [419, 0, "Apolar", 1.88], - [420, 0, "Apolar", 1.88], [421, 1, "Polar", 1.64], - [422, 0, "Apolar", 1.61], [423, 1, "Polar", 1.64], - [424, 1, "Polar", 1.64], [425, 1, "Polar", 1.64], - [426, 0, "Apolar", 1.88], [427, 0, "Apolar", 1.61], - [428, 1, "Polar", 1.42], [429, 0, "Apolar", 1.88], - [430, 1, "Polar", 1.46], [431, 0, "Apolar", 1.88], - [432, 1, "Polar", 1.64], [433, 0, "Apolar", 1.88], - [434, 0, "Apolar", 1.61], [435, 1, "Polar", 1.42], - [436, 0, "Apolar", 1.88], [437, 0, "Apolar", 1.88], - [438, 0, "Apolar", 1.88], [439, 0, "Apolar", 1.88], - [440, 1, "Polar", 1.64], [441, 0, "Apolar", 1.88], - [442, 0, "Apolar", 1.61], [443, 1, "Polar", 1.42], - [444, 0, "Apolar", 1.88], [445, 1, "Polar", 1.46], - [446, 1, "Polar", 1.64], [447, 0, "Apolar", 1.88], - [448, 0, "Apolar", 1.61], [449, 1, "Polar", 1.42], - [450, 0, "Apolar", 1.88], [451, 0, "Apolar", 1.61], - [452, 1, "Polar", 1.42], [453, 1, "Polar", 1.46], - [454, 1, "Polar", 1.64], [455, 0, "Apolar", 1.88], - [456, 0, "Apolar", 1.61], [457, 1, "Polar", 1.42], - [458, 0, "Apolar", 1.88], [459, 0, "Apolar", 1.61], - [460, 0, "Apolar", 1.76], [461, 0, "Apolar", 1.76], - [462, 0, "Apolar", 1.76], [463, 0, "Apolar", 1.76], - [464, 0, "Apolar", 1.61], [465, 1, "Polar", 1.46], - [466, 1, "Polar", 1.64], [467, 0, "Apolar", 1.88], - [468, 0, "Apolar", 1.61], [469, 1, "Polar", 1.42], - [470, 0, "Apolar", 1.88], [471, 0, "Apolar", 1.61], - [472, 1, "Polar", 1.42], [473, 1, "Polar", 1.64], - [474, 1, "Polar", 1.64], [475, 0, "Apolar", 1.88], - [476, 0, "Apolar", 1.61], [477, 1, "Polar", 1.42], - [478, 0, "Apolar", 1.88], [479, 0, "Apolar", 1.88], - [480, 0, "Apolar", 1.88], [481, 0, "Apolar", 1.88], - [482, 1, "Polar", 1.64], [483, 0, "Apolar", 1.88], - [484, 0, "Apolar", 1.61], [485, 1, "Polar", 1.42], - [486, 0, "Apolar", 1.88], [487, 0, "Apolar", 1.88], - [488, 0, "Apolar", 1.61], [489, 1, "Polar", 1.42], - [490, 1, "Polar", 1.64], [491, 1, "Polar", 1.64], - [492, 0, "Apolar", 1.88], [493, 0, "Apolar", 1.61], - [494, 1, "Polar", 1.42], [495, 0, "Apolar", 1.88], - [496, 0, "Apolar", 1.88], [497, 0, "Apolar", 1.88], - [498, 0, "Apolar", 1.88], [499, 1, "Polar", 1.64], - [500, 1, "Polar", 1.64], [501, 0, "Apolar", 1.88], - [502, 0, "Apolar", 1.61], [503, 1, "Polar", 1.42], - [504, 0, "Apolar", 1.88], [505, 0, "Apolar", 1.88], - [506, 0, "Apolar", 1.61], [507, 1, "Polar", 1.42], - [508, 1, "Polar", 1.46], [509, 1, "Polar", 1.64], - [510, 0, "Apolar", 1.88], [511, 0, "Apolar", 1.61], - [512, 1, "Polar", 1.42], [513, 0, "Apolar", 1.88], - [514, 1, "Polar", 1.46], [515, 1, "Polar", 1.64], - [516, 0, "Apolar", 1.88], [517, 0, "Apolar", 1.61], - [518, 1, "Polar", 1.42], [519, 0, "Apolar", 1.88], - [520, 1, "Polar", 1.46], [521, 0, "Apolar", 1.88], - [522, 1, "Polar", 1.64], [523, 0, "Apolar", 1.88], - [524, 0, "Apolar", 1.61], [525, 1, "Polar", 1.42], - [526, 0, "Apolar", 1.88], [527, 0, "Apolar", 1.88], - [528, 0, "Apolar", 1.88], [529, 0, "Apolar", 1.88], - [530, 1, "Polar", 1.64], [531, 0, "Apolar", 1.88], - [532, 0, "Apolar", 1.61], [533, 1, "Polar", 1.42], - [534, 0, "Apolar", 1.88], [535, 0, "Apolar", 1.61], - [536, 1, "Polar", 1.64], [537, 0, "Apolar", 1.76], - [538, 0, "Apolar", 1.76], [539, 1, "Polar", 1.64], - [540, 1, "Polar", 1.64], [541, 0, "Apolar", 1.88], - [542, 0, "Apolar", 1.61], [543, 1, "Polar", 1.42], - [544, 0, "Apolar", 1.88], [545, 0, "Apolar", 1.88], - [546, 0, "Apolar", 1.88], [547, 0, "Apolar", 1.88], - [548, 1, "Polar", 1.64], [549, 0, "Apolar", 1.88], - [550, 0, "Apolar", 1.61], [551, 1, "Polar", 1.42], - [552, 0, "Apolar", 1.88], [553, 0, "Apolar", 1.88], - [554, 0, "Apolar", 1.88], [555, 1, "Polar", 1.64], - [556, 0, "Apolar", 1.88], [557, 0, "Apolar", 1.61], - [558, 1, "Polar", 1.42], [559, 0, "Apolar", 1.88], - [560, 0, "Apolar", 1.88], [561, 0, "Apolar", 1.88], - [562, 0, "Apolar", 1.88], [563, 1, "Polar", 1.64], - [564, 0, "Apolar", 1.88], [565, 0, "Apolar", 1.61], - [566, 1, "Polar", 1.42], [567, 0, "Apolar", 1.88], - [568, 0, "Apolar", 1.88], [569, 0, "Apolar", 1.88], - [570, 1, "Polar", 1.64], [571, 0, "Apolar", 1.61], - [572, 1, "Polar", 1.64], [573, 1, "Polar", 1.64], - [574, 1, "Polar", 1.64], [575, 0, "Apolar", 1.88], - [576, 0, "Apolar", 1.61], [577, 1, "Polar", 1.42], - [578, 0, "Apolar", 1.88], [579, 0, "Apolar", 1.88], - [580, 0, "Apolar", 1.88], [581, 0, "Apolar", 1.88], - [582, 1, "Polar", 1.64], [583, 0, "Apolar", 1.88], - [584, 0, "Apolar", 1.61], [585, 1, "Polar", 1.42], - [586, 0, "Apolar", 1.88], [587, 0, "Apolar", 1.88], - [588, 0, "Apolar", 1.88], [589, 1, "Polar", 1.64], - [590, 0, "Apolar", 1.61], [591, 1, "Polar", 1.64], - [592, 1, "Polar", 1.64], [593, 1, "Polar", 1.64], - [594, 0, "Apolar", 1.88], [595, 0, "Apolar", 1.61], - [596, 1, "Polar", 1.42], [597, 1, "Polar", 1.64], - [598, 0, "Apolar", 1.88], [599, 0, "Apolar", 1.61], - [600, 1, "Polar", 1.42], [601, 1, "Polar", 1.46] - ] +expected = [[0, 1, "Polar", 1.64], [1, 0, "Apolar", 1.88], [2, 0, "Apolar", 1.61], + [3, 1, "Polar", 1.42], [4, 0, "Apolar", 1.88], [5, 0, "Apolar", 1.88], + [6, 1, "Polar", 1.77], [7, 0, "Apolar", 1.88], [8, 1, "Polar", 1.64], + [9, 0, "Apolar", 1.88], [10, 0, "Apolar", 1.61], [11, 1, "Polar", 1.42], + [12, 0, "Apolar", 1.88], [13, 0, "Apolar", 1.88], [14, 0, "Apolar", 1.61], + [15, 1, "Polar", 1.42], [16, 1, "Polar", 1.64], [17, 1, "Polar", 1.64], + [18, 0, "Apolar", 1.88], [19, 0, "Apolar", 1.61], [20, 1, "Polar", 1.42], + [21, 0, "Apolar", 1.88], [22, 0, "Apolar", 1.88], [23, 0, "Apolar", 1.88], + [24, 0, "Apolar", 1.88], [25, 1, "Polar", 1.64], [26, 0, "Apolar", 1.88], + [27, 0, "Apolar", 1.61], [28, 1, "Polar", 1.42], [29, 0, "Apolar", 1.88], + [30, 0, "Apolar", 1.61], [31, 0, "Apolar", 1.76], [32, 0, "Apolar", 1.76], + [33, 0, "Apolar", 1.76], [34, 0, "Apolar", 1.76], [35, 0, "Apolar", 1.76], + [36, 1, "Polar", 1.64], [37, 0, "Apolar", 1.88], [38, 0, "Apolar", 1.61], + [39, 1, "Polar", 1.42], [40, 0, "Apolar", 1.88], [41, 0, "Apolar", 1.88], + [42, 0, "Apolar", 1.88], [43, 1, "Polar", 1.64], [44, 0, "Apolar", 1.88], + [45, 0, "Apolar", 1.61], [46, 1, "Polar", 1.42], [47, 0, "Apolar", 1.88], + [48, 0, "Apolar", 1.88], [49, 0, "Apolar", 1.88], [50, 0, "Apolar", 1.88], + [51, 1, "Polar", 1.64], [52, 1, "Polar", 1.64], [53, 0, "Apolar", 1.88], + [54, 0, "Apolar", 1.61], [55, 1, "Polar", 1.42], [56, 0, "Apolar", 1.88], + [57, 1, "Polar", 1.46], [58, 0, "Apolar", 1.88], [59, 1, "Polar", 1.64], + [60, 0, "Apolar", 1.88], [61, 0, "Apolar", 1.61], [62, 1, "Polar", 1.42], + [63, 0, "Apolar", 1.88], [64, 0, "Apolar", 1.88], [65, 0, "Apolar", 1.88], + [66, 0, "Apolar", 1.88], [67, 1, "Polar", 1.64], [68, 0, "Apolar", 1.88], + [69, 0, "Apolar", 1.61], [70, 1, "Polar", 1.42], [71, 0, "Apolar", 1.88], + [72, 1, "Polar", 1.46], [73, 0, "Apolar", 1.88], [74, 1, "Polar", 1.64], + [75, 0, "Apolar", 1.88], [76, 0, "Apolar", 1.61], [77, 1, "Polar", 1.42], + [78, 1, "Polar", 1.64], [79, 0, "Apolar", 1.88], [80, 0, "Apolar", 1.61], + [81, 1, "Polar", 1.42], [82, 0, "Apolar", 1.88], [83, 0, "Apolar", 1.88], + [84, 0, "Apolar", 1.88], [85, 0, "Apolar", 1.88], [86, 1, "Polar", 1.64], + [87, 1, "Polar", 1.64], [88, 0, "Apolar", 1.88], [89, 0, "Apolar", 1.61], + [90, 1, "Polar", 1.42], [91, 0, "Apolar", 1.88], [92, 1, "Polar", 1.46], + [93, 0, "Apolar", 1.88], [94, 1, "Polar", 1.64], [95, 0, "Apolar", 1.88], + [96, 0, "Apolar", 1.61], [97, 1, "Polar", 1.42], [98, 0, "Apolar", 1.88], + [99, 0, "Apolar", 1.88], [100, 0, "Apolar", 1.88], [101, 0, "Apolar", 1.88], + [102, 1, "Polar", 1.64], [103, 0, "Apolar", 1.88], [104, 0, "Apolar", 1.61], + [105, 1, "Polar", 1.42], [106, 0, "Apolar", 1.88], [107, 1, "Polar", 1.46], + [108, 0, "Apolar", 1.88], [109, 1, "Polar", 1.64], [110, 0, "Apolar", 1.88], + [111, 0, "Apolar", 1.61], [112, 1, "Polar", 1.42], [113, 0, "Apolar", 1.88], + [114, 0, "Apolar", 1.88], [115, 0, "Apolar", 1.88], [116, 0, "Apolar", 1.88], + [117, 1, "Polar", 1.64], [118, 0, "Apolar", 1.88], [119, 0, "Apolar", 1.61], + [120, 1, "Polar", 1.42], [121, 0, "Apolar", 1.88], [122, 0, "Apolar", 1.88], + [123, 0, "Apolar", 1.61], [124, 1, "Polar", 1.42], [125, 1, "Polar", 1.46], + [126, 1, "Polar", 1.64], [127, 0, "Apolar", 1.88], [128, 0, "Apolar", 1.61], + [129, 1, "Polar", 1.42], [130, 0, "Apolar", 1.88], [131, 0, "Apolar", 1.88], + [132, 0, "Apolar", 1.88], [133, 1, "Polar", 1.64], [134, 0, "Apolar", 1.88], + [135, 0, "Apolar", 1.61], [136, 1, "Polar", 1.42], [137, 0, "Apolar", 1.88], + [138, 0, "Apolar", 1.88], [139, 0, "Apolar", 1.61], [140, 1, "Polar", 1.42], + [141, 1, "Polar", 1.46], [142, 1, "Polar", 1.64], [143, 0, "Apolar", 1.88], + [144, 0, "Apolar", 1.61], [145, 1, "Polar", 1.42], [146, 0, "Apolar", 1.88], + [147, 0, "Apolar", 1.88], [148, 0, "Apolar", 1.88], [149, 1, "Polar", 1.64], + [150, 0, "Apolar", 1.88], [151, 0, "Apolar", 1.61], [152, 1, "Polar", 1.42], + [153, 0, "Apolar", 1.88], [154, 1, "Polar", 1.46], [155, 1, "Polar", 1.64], + [156, 0, "Apolar", 1.88], [157, 0, "Apolar", 1.61], [158, 1, "Polar", 1.42], + [159, 0, "Apolar", 1.88], [160, 0, "Apolar", 1.61], [161, 1, "Polar", 1.42], + [162, 1, "Polar", 1.46], [163, 1, "Polar", 1.64], [164, 0, "Apolar", 1.88], + [165, 0, "Apolar", 1.61], [166, 1, "Polar", 1.42], [167, 0, "Apolar", 1.88], + [168, 1, "Polar", 1.46], [169, 0, "Apolar", 1.88], [170, 1, "Polar", 1.64], + [171, 0, "Apolar", 1.88], [172, 0, "Apolar", 1.61], [173, 1, "Polar", 1.42], + [174, 0, "Apolar", 1.88], [175, 0, "Apolar", 1.88], [176, 0, "Apolar", 1.88], + [177, 0, "Apolar", 1.88], [178, 1, "Polar", 1.64], [179, 0, "Apolar", 1.88], + [180, 0, "Apolar", 1.61], [181, 1, "Polar", 1.42], [182, 0, "Apolar", 1.88], + [183, 0, "Apolar", 1.88], [184, 0, "Apolar", 1.61], [185, 1, "Polar", 1.42], + [186, 1, "Polar", 1.46], [187, 1, "Polar", 1.64], [188, 0, "Apolar", 1.88], + [189, 0, "Apolar", 1.61], [190, 1, "Polar", 1.42], [191, 0, "Apolar", 1.88], + [192, 0, "Apolar", 1.61], [193, 1, "Polar", 1.42], [194, 1, "Polar", 1.64], + [195, 1, "Polar", 1.64], [196, 0, "Apolar", 1.88], [197, 0, "Apolar", 1.61], + [198, 1, "Polar", 1.42], [199, 0, "Apolar", 1.88], [200, 0, "Apolar", 1.88], + [201, 0, "Apolar", 1.88], [202, 1, "Polar", 1.64], [203, 0, "Apolar", 1.88], + [204, 0, "Apolar", 1.61], [205, 1, "Polar", 1.42], [206, 0, "Apolar", 1.88], + [207, 0, "Apolar", 1.88], [208, 0, "Apolar", 1.88], [209, 0, "Apolar", 1.88], + [210, 1, "Polar", 1.64], [211, 1, "Polar", 1.64], [212, 0, "Apolar", 1.88], + [213, 0, "Apolar", 1.61], [214, 1, "Polar", 1.42], [215, 0, "Apolar", 1.88], + [216, 1, "Polar", 1.64], [217, 0, "Apolar", 1.88], [218, 0, "Apolar", 1.61], + [219, 1, "Polar", 1.42], [220, 0, "Apolar", 1.88], [221, 0, "Apolar", 1.88], + [222, 0, "Apolar", 1.88], [223, 0, "Apolar", 1.88], [224, 1, "Polar", 1.64], + [225, 1, "Polar", 1.64], [226, 0, "Apolar", 1.88], [227, 0, "Apolar", 1.61], + [228, 1, "Polar", 1.42], [229, 0, "Apolar", 1.88], [230, 0, "Apolar", 1.88], + [231, 0, "Apolar", 1.88], [232, 0, "Apolar", 1.88], [233, 1, "Polar", 1.64], + [234, 0, "Apolar", 1.88], [235, 0, "Apolar", 1.61], [236, 1, "Polar", 1.42], + [237, 0, "Apolar", 1.88], [238, 0, "Apolar", 1.88], [239, 0, "Apolar", 1.61], + [240, 1, "Polar", 1.42], [241, 1, "Polar", 1.64], [242, 1, "Polar", 1.64], + [243, 0, "Apolar", 1.88], [244, 0, "Apolar", 1.61], [245, 1, "Polar", 1.42], + [246, 0, "Apolar", 1.88], [247, 0, "Apolar", 1.61], [248, 1, "Polar", 1.42], + [249, 1, "Polar", 1.46], [250, 1, "Polar", 1.64], [251, 0, "Apolar", 1.88], + [252, 0, "Apolar", 1.61], [253, 1, "Polar", 1.42], [254, 0, "Apolar", 1.88], + [255, 0, "Apolar", 1.88], [256, 0, "Apolar", 1.88], [257, 0, "Apolar", 1.88], + [258, 1, "Polar", 1.64], [259, 1, "Polar", 1.64], [260, 0, "Apolar", 1.88], + [261, 0, "Apolar", 1.61], [262, 1, "Polar", 1.42], [263, 0, "Apolar", 1.88], + [264, 0, "Apolar", 1.88], [265, 0, "Apolar", 1.61], [266, 1, "Polar", 1.42], + [267, 1, "Polar", 1.46], [268, 1, "Polar", 1.64], [269, 0, "Apolar", 1.88], + [270, 0, "Apolar", 1.61], [271, 1, "Polar", 1.42], [272, 1, "Polar", 1.64], + [273, 0, "Apolar", 1.88], [274, 0, "Apolar", 1.61], [275, 1, "Polar", 1.42], + [276, 0, "Apolar", 1.88], [277, 0, "Apolar", 1.88], [278, 0, "Apolar", 1.88], + [279, 0, "Apolar", 1.88], [280, 1, "Polar", 1.64], [281, 0, "Apolar", 1.88], + [282, 0, "Apolar", 1.61], [283, 1, "Polar", 1.42], [284, 0, "Apolar", 1.88], + [285, 0, "Apolar", 1.88], [286, 0, "Apolar", 1.88], [287, 1, "Polar", 1.64], + [288, 0, "Apolar", 1.88], [289, 0, "Apolar", 1.61], [290, 1, "Polar", 1.42], + [291, 0, "Apolar", 1.88], [292, 0, "Apolar", 1.88], [293, 0, "Apolar", 1.88], + [294, 1, "Polar", 1.64], [295, 0, "Apolar", 1.88], [296, 0, "Apolar", 1.61], + [297, 1, "Polar", 1.42], [298, 0, "Apolar", 1.88], [299, 0, "Apolar", 1.61], + [300, 1, "Polar", 1.42], [301, 1, "Polar", 1.46], [302, 1, "Polar", 1.64], + [303, 0, "Apolar", 1.88], [304, 0, "Apolar", 1.61], [305, 1, "Polar", 1.42], + [306, 0, "Apolar", 1.88], [307, 0, "Apolar", 1.88], [308, 0, "Apolar", 1.61], + [309, 1, "Polar", 1.42], [310, 1, "Polar", 1.64], [311, 1, "Polar", 1.64], + [312, 0, "Apolar", 1.88], [313, 0, "Apolar", 1.61], [314, 1, "Polar", 1.42], + [315, 0, "Apolar", 1.88], [316, 0, "Apolar", 1.88], [317, 0, "Apolar", 1.61], + [318, 1, "Polar", 1.42], [319, 1, "Polar", 1.64], [320, 1, "Polar", 1.64], + [321, 0, "Apolar", 1.88], [322, 0, "Apolar", 1.61], [323, 1, "Polar", 1.42], + [324, 0, "Apolar", 1.88], [325, 0, "Apolar", 1.88], [326, 0, "Apolar", 1.88], + [327, 1, "Polar", 1.64], [328, 0, "Apolar", 1.61], [329, 1, "Polar", 1.64], + [330, 1, "Polar", 1.64], [331, 1, "Polar", 1.64], [332, 0, "Apolar", 1.88], + [333, 0, "Apolar", 1.61], [334, 1, "Polar", 1.42], [335, 0, "Apolar", 1.88], + [336, 0, "Apolar", 1.88], [337, 0, "Apolar", 1.88], [338, 0, "Apolar", 1.88], + [339, 1, "Polar", 1.64], [340, 0, "Apolar", 1.88], [341, 0, "Apolar", 1.61], + [342, 1, "Polar", 1.42], [343, 0, "Apolar", 1.88], [344, 0, "Apolar", 1.88], + [345, 0, "Apolar", 1.88], [346, 0, "Apolar", 1.88], [347, 1, "Polar", 1.64], + [348, 0, "Apolar", 1.88], [349, 0, "Apolar", 1.61], [350, 1, "Polar", 1.42], + [351, 0, "Apolar", 1.88], [352, 0, "Apolar", 1.61], [353, 0, "Apolar", 1.76], + [354, 0, "Apolar", 1.76], [355, 0, "Apolar", 1.76], [356, 0, "Apolar", 1.76], + [357, 0, "Apolar", 1.76], [358, 1, "Polar", 1.64], [359, 0, "Apolar", 1.88], + [360, 0, "Apolar", 1.61], [361, 1, "Polar", 1.42], [362, 0, "Apolar", 1.88], + [363, 1, "Polar", 1.64], [364, 0, "Apolar", 1.88], [365, 0, "Apolar", 1.61], + [366, 1, "Polar", 1.42], [367, 1, "Polar", 1.64], [368, 0, "Apolar", 1.88], + [369, 0, "Apolar", 1.61], [370, 1, "Polar", 1.42], [371, 0, "Apolar", 1.88], + [372, 0, "Apolar", 1.88], [373, 0, "Apolar", 1.88], [374, 0, "Apolar", 1.88], + [375, 1, "Polar", 1.64], [376, 1, "Polar", 1.64], [377, 0, "Apolar", 1.88], + [378, 0, "Apolar", 1.61], [379, 1, "Polar", 1.42], [380, 0, "Apolar", 1.88], + [381, 0, "Apolar", 1.88], [382, 0, "Apolar", 1.61], [383, 1, "Polar", 1.42], + [384, 1, "Polar", 1.64], [385, 1, "Polar", 1.64], [386, 0, "Apolar", 1.88], + [387, 0, "Apolar", 1.61], [388, 1, "Polar", 1.42], [389, 0, "Apolar", 1.88], + [390, 0, "Apolar", 1.88], [391, 0, "Apolar", 1.88], [392, 0, "Apolar", 1.88], + [393, 1, "Polar", 1.64], [394, 0, "Apolar", 1.88], [395, 0, "Apolar", 1.61], + [396, 1, "Polar", 1.42], [397, 0, "Apolar", 1.88], [398, 0, "Apolar", 1.88], + [399, 0, "Apolar", 1.61], [400, 1, "Polar", 1.42], [401, 1, "Polar", 1.46], + [402, 1, "Polar", 1.64], [403, 0, "Apolar", 1.88], [404, 0, "Apolar", 1.61], + [405, 1, "Polar", 1.42], [406, 0, "Apolar", 1.88], [407, 0, "Apolar", 1.61], + [408, 1, "Polar", 1.42], [409, 1, "Polar", 1.46], [410, 1, "Polar", 1.64], + [411, 0, "Apolar", 1.88], [412, 0, "Apolar", 1.61], [413, 1, "Polar", 1.42], + [414, 1, "Polar", 1.64], [415, 0, "Apolar", 1.88], [416, 0, "Apolar", 1.61], + [417, 1, "Polar", 1.42], [418, 0, "Apolar", 1.88], [419, 0, "Apolar", 1.88], + [420, 0, "Apolar", 1.88], [421, 1, "Polar", 1.64], [422, 0, "Apolar", 1.61], + [423, 1, "Polar", 1.64], [424, 1, "Polar", 1.64], [425, 1, "Polar", 1.64], + [426, 0, "Apolar", 1.88], [427, 0, "Apolar", 1.61], [428, 1, "Polar", 1.42], + [429, 0, "Apolar", 1.88], [430, 1, "Polar", 1.46], [431, 0, "Apolar", 1.88], + [432, 1, "Polar", 1.64], [433, 0, "Apolar", 1.88], [434, 0, "Apolar", 1.61], + [435, 1, "Polar", 1.42], [436, 0, "Apolar", 1.88], [437, 0, "Apolar", 1.88], + [438, 0, "Apolar", 1.88], [439, 0, "Apolar", 1.88], [440, 1, "Polar", 1.64], + [441, 0, "Apolar", 1.88], [442, 0, "Apolar", 1.61], [443, 1, "Polar", 1.42], + [444, 0, "Apolar", 1.88], [445, 1, "Polar", 1.46], [446, 1, "Polar", 1.64], + [447, 0, "Apolar", 1.88], [448, 0, "Apolar", 1.61], [449, 1, "Polar", 1.42], + [450, 0, "Apolar", 1.88], [451, 0, "Apolar", 1.61], [452, 1, "Polar", 1.42], + [453, 1, "Polar", 1.46], [454, 1, "Polar", 1.64], [455, 0, "Apolar", 1.88], + [456, 0, "Apolar", 1.61], [457, 1, "Polar", 1.42], [458, 0, "Apolar", 1.88], + [459, 0, "Apolar", 1.61], [460, 0, "Apolar", 1.76], [461, 0, "Apolar", 1.76], + [462, 0, "Apolar", 1.76], [463, 0, "Apolar", 1.76], [464, 0, "Apolar", 1.61], + [465, 1, "Polar", 1.46], [466, 1, "Polar", 1.64], [467, 0, "Apolar", 1.88], + [468, 0, "Apolar", 1.61], [469, 1, "Polar", 1.42], [470, 0, "Apolar", 1.88], + [471, 0, "Apolar", 1.61], [472, 1, "Polar", 1.42], [473, 1, "Polar", 1.64], + [474, 1, "Polar", 1.64], [475, 0, "Apolar", 1.88], [476, 0, "Apolar", 1.61], + [477, 1, "Polar", 1.42], [478, 0, "Apolar", 1.88], [479, 0, "Apolar", 1.88], + [480, 0, "Apolar", 1.88], [481, 0, "Apolar", 1.88], [482, 1, "Polar", 1.64], + [483, 0, "Apolar", 1.88], [484, 0, "Apolar", 1.61], [485, 1, "Polar", 1.42], + [486, 0, "Apolar", 1.88], [487, 0, "Apolar", 1.88], [488, 0, "Apolar", 1.61], + [489, 1, "Polar", 1.42], [490, 1, "Polar", 1.64], [491, 1, "Polar", 1.64], + [492, 0, "Apolar", 1.88], [493, 0, "Apolar", 1.61], [494, 1, "Polar", 1.42], + [495, 0, "Apolar", 1.88], [496, 0, "Apolar", 1.88], [497, 0, "Apolar", 1.88], + [498, 0, "Apolar", 1.88], [499, 1, "Polar", 1.64], [500, 1, "Polar", 1.64], + [501, 0, "Apolar", 1.88], [502, 0, "Apolar", 1.61], [503, 1, "Polar", 1.42], + [504, 0, "Apolar", 1.88], [505, 0, "Apolar", 1.88], [506, 0, "Apolar", 1.61], + [507, 1, "Polar", 1.42], [508, 1, "Polar", 1.46], [509, 1, "Polar", 1.64], + [510, 0, "Apolar", 1.88], [511, 0, "Apolar", 1.61], [512, 1, "Polar", 1.42], + [513, 0, "Apolar", 1.88], [514, 1, "Polar", 1.46], [515, 1, "Polar", 1.64], + [516, 0, "Apolar", 1.88], [517, 0, "Apolar", 1.61], [518, 1, "Polar", 1.42], + [519, 0, "Apolar", 1.88], [520, 1, "Polar", 1.46], [521, 0, "Apolar", 1.88], + [522, 1, "Polar", 1.64], [523, 0, "Apolar", 1.88], [524, 0, "Apolar", 1.61], + [525, 1, "Polar", 1.42], [526, 0, "Apolar", 1.88], [527, 0, "Apolar", 1.88], + [528, 0, "Apolar", 1.88], [529, 0, "Apolar", 1.88], [530, 1, "Polar", 1.64], + [531, 0, "Apolar", 1.88], [532, 0, "Apolar", 1.61], [533, 1, "Polar", 1.42], + [534, 0, "Apolar", 1.88], [535, 0, "Apolar", 1.61], [536, 1, "Polar", 1.64], + [537, 0, "Apolar", 1.76], [538, 0, "Apolar", 1.76], [539, 1, "Polar", 1.64], + [540, 1, "Polar", 1.64], [541, 0, "Apolar", 1.88], [542, 0, "Apolar", 1.61], + [543, 1, "Polar", 1.42], [544, 0, "Apolar", 1.88], [545, 0, "Apolar", 1.88], + [546, 0, "Apolar", 1.88], [547, 0, "Apolar", 1.88], [548, 1, "Polar", 1.64], + [549, 0, "Apolar", 1.88], [550, 0, "Apolar", 1.61], [551, 1, "Polar", 1.42], + [552, 0, "Apolar", 1.88], [553, 0, "Apolar", 1.88], [554, 0, "Apolar", 1.88], + [555, 1, "Polar", 1.64], [556, 0, "Apolar", 1.88], [557, 0, "Apolar", 1.61], + [558, 1, "Polar", 1.42], [559, 0, "Apolar", 1.88], [560, 0, "Apolar", 1.88], + [561, 0, "Apolar", 1.88], [562, 0, "Apolar", 1.88], [563, 1, "Polar", 1.64], + [564, 0, "Apolar", 1.88], [565, 0, "Apolar", 1.61], [566, 1, "Polar", 1.42], + [567, 0, "Apolar", 1.88], [568, 0, "Apolar", 1.88], [569, 0, "Apolar", 1.88], + [570, 1, "Polar", 1.64], [571, 0, "Apolar", 1.61], [572, 1, "Polar", 1.64], + [573, 1, "Polar", 1.64], [574, 1, "Polar", 1.64], [575, 0, "Apolar", 1.88], + [576, 0, "Apolar", 1.61], [577, 1, "Polar", 1.42], [578, 0, "Apolar", 1.88], + [579, 0, "Apolar", 1.88], [580, 0, "Apolar", 1.88], [581, 0, "Apolar", 1.88], + [582, 1, "Polar", 1.64], [583, 0, "Apolar", 1.88], [584, 0, "Apolar", 1.61], + [585, 1, "Polar", 1.42], [586, 0, "Apolar", 1.88], [587, 0, "Apolar", 1.88], + [588, 0, "Apolar", 1.88], [589, 1, "Polar", 1.64], [590, 0, "Apolar", 1.61], + [591, 1, "Polar", 1.64], [592, 1, "Polar", 1.64], [593, 1, "Polar", 1.64], + [594, 0, "Apolar", 1.88], [595, 0, "Apolar", 1.61], [596, 1, "Polar", 1.42], + [597, 1, "Polar", 1.64], [598, 0, "Apolar", 1.88], [599, 0, "Apolar", 1.61], + [600, 1, "Polar", 1.42], [601, 1, "Polar", 1.46]] -class TestCase(unittest.TestCase) : - def test_basics(self): - fname = os.path.join(os.environ["RDBASE"], - "External", "FreeSASA", "test_data", "1d3z.pdb") - mol = Chem.MolFromPDBFile(fname) - radii = rdFreeSASA.classifyAtoms(mol) - for atom in mol.GetAtoms(): - self.assertEqual( expected[atom.GetIdx()][3], radii[atom.GetIdx()] ) - leeRichards = 5004.79964427 - shrakerupley = 5000.340175 - sasa = rdFreeSASA.CalcSASA(mol, radii=radii) - self.assertTrue( (sasa-leeRichards) < 1e-5 ) +class TestCase(unittest.TestCase): - opts = rdFreeSASA.SASAOpts(rdFreeSASA.ShrakeRupley, rdFreeSASA.Protor) - sasa = rdFreeSASA.CalcSASA(mol, radii=radii, opts=opts) - self.assertTrue( (sasa-shrakerupley) < 1e-5 ) + def test_basics(self): + fname = os.path.join(os.environ["RDBASE"], "External", "FreeSASA", "test_data", "1d3z.pdb") + mol = Chem.MolFromPDBFile(fname) + radii = rdFreeSASA.classifyAtoms(mol) + for atom in mol.GetAtoms(): + self.assertEqual(expected[atom.GetIdx()][3], radii[atom.GetIdx()]) + leeRichards = 5004.79964427 + shrakerupley = 5000.340175 - apolar = rdFreeSASA.CalcSASA(mol, radii, query=rdFreeSASA.MakeFreeSasaAPolarAtomQuery(), opts=opts) - polar = rdFreeSASA.CalcSASA(mol, radii, query=rdFreeSASA.MakeFreeSasaPolarAtomQuery(), opts=opts) + sasa = rdFreeSASA.CalcSASA(mol, radii=radii) + self.assertTrue((sasa - leeRichards) < 1e-5) - self.assertTrue( (polar + apolar - 5000.340175) < 1e-5 ) + opts = rdFreeSASA.SASAOpts(rdFreeSASA.ShrakeRupley, rdFreeSASA.Protor) + sasa = rdFreeSASA.CalcSASA(mol, radii=radii, opts=opts) + self.assertTrue((sasa - shrakerupley) < 1e-5) - def test_opts(self): - fname = os.path.join(os.environ["RDBASE"], - "External", "FreeSASA", "test_data", "1d3z.pdb") - mol = Chem.MolFromPDBFile(fname) - radii = rdFreeSASA.classifyAtoms(mol) - for atom in mol.GetAtoms(): - self.assertEqual( expected[atom.GetIdx()][3], radii[atom.GetIdx()] ) - leeRichards = 5004.79964427 - shrakerupley = 5000.340175 - opts = rdFreeSASA.SASAOpts() - for alg, res in ( (rdFreeSASA.ShrakeRupley, shrakerupley), - (rdFreeSASA.LeeRichards, leeRichards)): - opts.algorithm = alg - sasa = rdFreeSASA.CalcSASA(mol, radii=radii, opts=opts) - self.assertTrue( abs(sasa-res) < 1e-5 ) - leeRichards = 5009.93014166 - shrakerupley = 4977.7709106 - opts = rdFreeSASA.SASAOpts() - opts.probeRadius = 2.0 - for alg, res in ( (rdFreeSASA.ShrakeRupley, shrakerupley), - (rdFreeSASA.LeeRichards, leeRichards)): - opts.algorithm = alg - sasa = rdFreeSASA.CalcSASA(mol, radii=radii, opts=opts) - self.assertTrue( abs(sasa-res) < 1e-5 ) + apolar = rdFreeSASA.CalcSASA(mol, radii, query=rdFreeSASA.MakeFreeSasaAPolarAtomQuery(), + opts=opts) + polar = rdFreeSASA.CalcSASA(mol, radii, query=rdFreeSASA.MakeFreeSasaPolarAtomQuery(), + opts=opts) + + self.assertTrue((polar + apolar - 5000.340175) < 1e-5) + + def test_opts(self): + fname = os.path.join(os.environ["RDBASE"], "External", "FreeSASA", "test_data", "1d3z.pdb") + mol = Chem.MolFromPDBFile(fname) + radii = rdFreeSASA.classifyAtoms(mol) + for atom in mol.GetAtoms(): + self.assertEqual(expected[atom.GetIdx()][3], radii[atom.GetIdx()]) + leeRichards = 5004.79964427 + shrakerupley = 5000.340175 + opts = rdFreeSASA.SASAOpts() + for alg, res in ((rdFreeSASA.ShrakeRupley, shrakerupley), (rdFreeSASA.LeeRichards, + leeRichards)): + opts.algorithm = alg + sasa = rdFreeSASA.CalcSASA(mol, radii=radii, opts=opts) + self.assertTrue(abs(sasa - res) < 1e-5) + leeRichards = 5009.93014166 + shrakerupley = 4977.7709106 + opts = rdFreeSASA.SASAOpts() + opts.probeRadius = 2.0 + for alg, res in ((rdFreeSASA.ShrakeRupley, shrakerupley), (rdFreeSASA.LeeRichards, + leeRichards)): + opts.algorithm = alg + sasa = rdFreeSASA.CalcSASA(mol, radii=radii, opts=opts) + self.assertTrue(abs(sasa - res) < 1e-5) if __name__ == '__main__': diff --git a/External/INCHI-API/python/inchi.py b/External/INCHI-API/python/inchi.py index fdcd6c4c9..5c3a1ce8a 100644 --- a/External/INCHI-API/python/inchi.py +++ b/External/INCHI-API/python/inchi.py @@ -1,19 +1,19 @@ # # Copyright (c) 2011, Novartis Institutes for BioMedical Research Inc. # All rights reserved. -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are -# met: +# met: # -# * Redistributions of source code must retain the above copyright +# * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided # with the distribution. -# * Neither the name of Novartis Institutes for BioMedical Research Inc. -# nor the names of its contributors may be used to endorse or promote +# * Neither the name of Novartis Institutes for BioMedical Research Inc. +# nor the names of its contributors may be used to endorse or promote # products derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS @@ -33,8 +33,9 @@ INCHI_AVAILABLE = True import logging -from rdkit.Chem import rdinchi from rdkit import RDLogger +from rdkit.Chem import rdinchi + logger = RDLogger.logger() logLevelToLogFunctionLookup = { @@ -122,6 +123,7 @@ def MolToInchiAndAuxInfo(mol, options="", logLevel=None, treatWarningAsError=Fal raise InchiReadWriteError(inchi, aux, message) return inchi, aux + def MolBlockToInchiAndAuxInfo(molblock, options="", logLevel=None, treatWarningAsError=False): """Returns the standard InChI string and InChI auxInfo for a mol block @@ -154,6 +156,7 @@ def MolBlockToInchiAndAuxInfo(molblock, options="", logLevel=None, treatWarningA raise InchiReadWriteError(inchi, aux, message) return inchi, aux + def MolToInchi(mol, options="", logLevel=None, treatWarningAsError=False): """Returns the standard InChI string for a molecule @@ -182,6 +185,7 @@ def MolToInchi(mol, options="", logLevel=None, treatWarningAsError=False): raise InchiReadWriteError(inchi, message) return inchi + def MolBlockToInchi(molblock, options="", logLevel=None, treatWarningAsError=False): """Returns the standard InChI string for a mol block @@ -204,12 +208,13 @@ def MolBlockToInchi(molblock, options="", logLevel=None, treatWarningAsError=Fal try: inchi, aux = MolBlockToInchiAndAuxInfo(molblock, options, logLevel=logLevel, - treatWarningAsError=treatWarningAsError) + treatWarningAsError=treatWarningAsError) except InchiReadWriteError as inst: inchi, aux, message = inst.args raise InchiReadWriteError(inchi, message) return inchi + def InchiToInchiKey(inchi): """Return the InChI key for the given InChI string. Return None on error""" ret = rdinchi.InchiToInchiKey(inchi) @@ -218,17 +223,17 @@ def InchiToInchiKey(inchi): else: return None + def MolToInchiKey(mol, options=""): """Returns the standard InChI key for a molecule Returns: the standard InChI key returned by InChI API for the input molecule """ - return rdinchi.MolToInchiKey(mol,options) + return rdinchi.MolToInchiKey(mol, options) - - - -__all__ = ['MolToInchiAndAuxInfo', 'MolToInchi', 'MolBlockToInchiAndAuxInfo', 'MolBlockToInchi', 'MolFromInchi', 'InchiReadWriteError', - 'InchiToInchiKey', 'MolToInchiKey', 'INCHI_AVAILABLE'] +__all__ = [ + 'MolToInchiAndAuxInfo', 'MolToInchi', 'MolBlockToInchiAndAuxInfo', 'MolBlockToInchi', + 'MolFromInchi', 'InchiReadWriteError', 'InchiToInchiKey', 'MolToInchiKey', 'INCHI_AVAILABLE' +] diff --git a/External/INCHI-API/python/noinchi.py b/External/INCHI-API/python/noinchi.py index 5f44882a3..a074b1dc0 100644 --- a/External/INCHI-API/python/noinchi.py +++ b/External/INCHI-API/python/noinchi.py @@ -2,19 +2,19 @@ # Copyright (c) 2011, Novartis Institutes for BioMedical Research Inc. # All rights reserved. -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are -# met: +# met: # -# * Redistributions of source code must retain the above copyright +# * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided # with the distribution. -# * Neither the name of Novartis Institutes for BioMedical Research Inc. -# nor the names of its contributors may be used to endorse or promote +# * Neither the name of Novartis Institutes for BioMedical Research Inc. +# nor the names of its contributors may be used to endorse or promote # products derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS diff --git a/External/YAeHMOP/Wrap/testEHTTools.py b/External/YAeHMOP/Wrap/testEHTTools.py index 936d5157a..8e349f730 100755 --- a/External/YAeHMOP/Wrap/testEHTTools.py +++ b/External/YAeHMOP/Wrap/testEHTTools.py @@ -5,11 +5,11 @@ # The contents are covered by the terms of the BSD license # which is included in the file license.txt, found at the root # of the RDKit source tree. -from rdkit import RDConfig import os import sys import unittest -from rdkit import DataStructs, Chem + +from rdkit import Chem, DataStructs, RDConfig from rdkit.Chem import rdEHTTools diff --git a/External/pymol/modules/pymol/rpc.py b/External/pymol/modules/pymol/rpc.py index e84037630..37a99c4d7 100644 --- a/External/pymol/modules/pymol/rpc.py +++ b/External/pymol/modules/pymol/rpc.py @@ -14,9 +14,15 @@ RD Version: $Rev$ """ +import os +import sys +import tempfile +import threading +import time +import types + import SimpleXMLRPCServer -import threading, sys, time, types, os, tempfile -from pymol import cmd, cgo +from pymol import cgo, cmd # initial port to try for the server _xmlPort = 9123 @@ -248,20 +254,22 @@ is white o = [] else: o = [cgo.ALPHA, 1 - transparency] - o.extend([cgo.CYLINDER, - x1, - y1, - z1, - x2, - y2, - z2, - rad, - r1, - g1, - b1, - r2, - g2, - b2, ]) + o.extend([ + cgo.CYLINDER, + x1, + y1, + z1, + x2, + y2, + z2, + rad, + r1, + g1, + b1, + r2, + g2, + b2, + ]) obj.extend(o) cgoDict[id] = obj cmd.load_cgo(obj, id, 1) diff --git a/Projects/DbCLI/CreateDb.py b/Projects/DbCLI/CreateDb.py index f62f855df..361172112 100644 --- a/Projects/DbCLI/CreateDb.py +++ b/Projects/DbCLI/CreateDb.py @@ -2,19 +2,19 @@ # # Copyright (c) 2007, Novartis Institutes for BioMedical Research Inc. # All rights reserved. -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are -# met: +# met: # -# * Redistributions of source code must retain the above copyright +# * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided # with the distribution. -# * Neither the name of Novartis Institutes for BioMedical Research Inc. -# nor the names of its contributors may be used to endorse or promote +# * Neither the name of Novartis Institutes for BioMedical Research Inc. +# nor the names of its contributors may be used to endorse or promote # products derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS @@ -54,108 +54,116 @@ _description = """ """ import argparse -from rdkit import RDConfig -from rdkit import Chem -from rdkit.Dbase.DbConnection import DbConnect -from rdkit.Dbase import DbModule -from rdkit.RDLogger import logger +from rdkit import Chem, RDConfig from rdkit.Chem.MolDb import Loader +from rdkit.Dbase import DbModule +from rdkit.Dbase.DbConnection import DbConnect +from rdkit.RDLogger import logger logger = logger() -import sys, os import io +import os import pickle -from rdkit.Chem.MolDb.FingerprintUtils import BuildSigFactory, LayeredOptions +import sys + from rdkit.Chem.MolDb import FingerprintUtils +from rdkit.Chem.MolDb.FingerprintUtils import BuildSigFactory, LayeredOptions # ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- + def initParser(): - """ Initialize the command line parser """ + """ Initialize the command line parser """ parser = argparse.ArgumentParser(usage='CreateDB [optional arguments] ', description=_description, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('filename', nargs='?', help='File containg molecules to load into database') parser.add_argument('--version', action='version', version='%(prog)s ' + _version) - + parser.add_argument('--outDir', '--dbDir', default='', help='name of the output directory') parser.add_argument('--molDbName', default='Compounds.sqlt', help='name of the molecule database') parser.add_argument('--molIdName', default='compound_id', help='name of the database key column') parser.add_argument('--regName', default='molecules', help='name of the molecular registry table') - parser.add_argument('--pairDbName', default='AtomPairs.sqlt', help='name of the atom pairs database') + parser.add_argument('--pairDbName', default='AtomPairs.sqlt', + help='name of the atom pairs database') parser.add_argument('--pairTableName', default='atompairs', help='name of the atom pairs table') parser.add_argument('--fpDbName', default='Fingerprints.sqlt', - help='name of the 2D fingerprints database') + help='name of the 2D fingerprints database') parser.add_argument('--fpTableName', default='rdkitfps', help='name of the 2D fingerprints table') parser.add_argument('--layeredTableName', default='layeredfps', - help='name of the layered fingerprints table') + help='name of the layered fingerprints table') parser.add_argument('--descrDbName', default='Descriptors.sqlt', - help='name of the descriptor database') - parser.add_argument('--descrTableName', default='descriptors_v1', help='name of the descriptor table') - parser.add_argument('--descriptorCalcFilename', default=os.path.join(RDConfig.RDBaseDir, 'Projects', - 'DbCLI', 'moe_like.dsc'), - help='name of the file containing the descriptor calculator') - parser.add_argument('--errFilename', default='loadErrors.txt', - help='name of the file to contain information about molecules that fail to load') - parser.add_argument('--noPairs', default=True, dest='doPairs', action='store_false', - help='skip calculating atom pairs') - parser.add_argument('--noFingerprints', default=True, dest='doFingerprints', action='store_false', - help='skip calculating 2D fingerprints') - parser.add_argument('--noLayeredFps', default=True, dest='doLayered', action='store_false', - help='skip calculating layered fingerprints') - parser.add_argument('--noDescriptors', default=True, dest='doDescriptors', action='store_false', - help='skip calculating descriptors') - parser.add_argument('--noProps', default=False, dest='skipProps', action='store_true', - help="don't include molecular properties in the database") - parser.add_argument('--noSmiles', default=False, dest='skipSmiles', action='store_true', - help="don't include SMILES in the database (can make loading somewhat faster)") - parser.add_argument('--maxRowsCached', default=-1, - help="maximum number of rows to cache before doing a database commit") - - parser.add_argument('--silent', default=False, action='store_true', - help='do not provide status messages') - - parser.add_argument('--molFormat', default='', choices=('smiles', 'sdf', ''), - help='specify the format of the input file') + help='name of the descriptor database') + parser.add_argument('--descrTableName', default='descriptors_v1', + help='name of the descriptor table') + parser.add_argument('--descriptorCalcFilename', + default=os.path.join(RDConfig.RDBaseDir, 'Projects', 'DbCLI', 'moe_like.dsc'), + help='name of the file containing the descriptor calculator') parser.add_argument( - '--nameProp', default='_Name', - help='specify the SD property to be used for the molecule names. Default is to use the mol block name') + '--errFilename', default='loadErrors.txt', + help='name of the file to contain information about molecules that fail to load') + parser.add_argument('--noPairs', default=True, dest='doPairs', action='store_false', + help='skip calculating atom pairs') + parser.add_argument('--noFingerprints', default=True, dest='doFingerprints', action='store_false', + help='skip calculating 2D fingerprints') + parser.add_argument('--noLayeredFps', default=True, dest='doLayered', action='store_false', + help='skip calculating layered fingerprints') + parser.add_argument('--noDescriptors', default=True, dest='doDescriptors', action='store_false', + help='skip calculating descriptors') + parser.add_argument('--noProps', default=False, dest='skipProps', action='store_true', + help="don't include molecular properties in the database") + parser.add_argument( + '--noSmiles', default=False, dest='skipSmiles', action='store_true', + help="don't include SMILES in the database (can make loading somewhat faster)") + parser.add_argument('--maxRowsCached', default=-1, + help="maximum number of rows to cache before doing a database commit") + + parser.add_argument('--silent', default=False, action='store_true', + help='do not provide status messages') + + parser.add_argument('--molFormat', default='', choices=('smiles', 'sdf', ''), + help='specify the format of the input file') + parser.add_argument( + '--nameProp', default='_Name', help= + 'specify the SD property to be used for the molecule names. Default is to use the mol block name' + ) parser.add_argument( '--missingPropertyVal', default='N/A', help='value to insert in the database if a property value is missing. Default is %(default)s.') parser.add_argument('--addProps', default=False, action='store_true', - help='add computed properties to the output') + help='add computed properties to the output') parser.add_argument('--noExtras', default=False, action='store_true', - help='skip all non-molecule databases') - parser.add_argument('--skipLoad', '--skipMols', action="store_false", dest='loadMols', default=True, - help='skip the molecule loading (assumes mol db already exists)') + help='skip all non-molecule databases') + parser.add_argument('--skipLoad', '--skipMols', action="store_false", dest='loadMols', + default=True, + help='skip the molecule loading (assumes mol db already exists)') parser.add_argument('--updateDb', '--update', default=False, action='store_true', - help='add to an existing database') + help='add to an existing database') parser.add_argument('--doPharm2D', default=False, action='store_true', - help='skip calculating Pharm2D fingerprints') + help='skip calculating Pharm2D fingerprints') parser.add_argument('--pharm2DTableName', default='pharm2dfps', - help='name of the Pharm2D fingerprints table') - parser.add_argument('--fdefFile', '--fdef', - default=os.path.join(RDConfig.RDDataDir, 'Novartis1.fdef'), - help='provide the name of the fdef file to use for 2d pharmacophores') + help='name of the Pharm2D fingerprints table') + parser.add_argument('--fdefFile', '--fdef', default=os.path.join(RDConfig.RDDataDir, + 'Novartis1.fdef'), + help='provide the name of the fdef file to use for 2d pharmacophores') parser.add_argument('--doGobbi2D', default=False, action='store_true', - help='skip calculating Gobbi 2D fingerprints') + help='skip calculating Gobbi 2D fingerprints') parser.add_argument('--gobbi2DTableName', default='gobbi2dfps', - help='name of the Gobbi 2D fingerprints table') - + help='name of the Gobbi 2D fingerprints table') + parser.add_argument('--noMorganFps', '--noCircularFps', default=True, dest='doMorganFps', - action='store_false', help='skip calculating Morgan (circular) fingerprints') + action='store_false', help='skip calculating Morgan (circular) fingerprints') parser.add_argument('--morganFpTableName', default='morganfps', - help='name of the Morgan fingerprints table') - + help='name of the Morgan fingerprints table') + parser.add_argument('--delimiter', '--delim', default=' ', help='the delimiter in the input file') parser.add_argument('--titleLine', default=False, action='store_true', - help='the input file contains a title line') + help='the input file contains a title line') parser.add_argument('--smilesColumn', '--smilesCol', default=0, type=int, - help='the column index with smiles') + help='the column index with smiles') parser.add_argument('--nameColumn', '--nameCol', default=1, type=int, - help='the column index with mol names') + help='the column index with mol names') return parser @@ -202,9 +210,10 @@ def CreateDb(options, dataFilename='', supplier=None): if options.molFormat == 'smiles': if options.delimiter == '\\t': options.delimiter = '\t' - supplier = Chem.SmilesMolSupplier( - dataFilename, titleLine=options.titleLine, delimiter=options.delimiter, - smilesColumn=options.smilesColumn, nameColumn=options.nameColumn) + supplier = Chem.SmilesMolSupplier(dataFilename, titleLine=options.titleLine, + delimiter=options.delimiter, + smilesColumn=options.smilesColumn, + nameColumn=options.nameColumn) else: supplier = Chem.SDMolSupplier(dataFilename) if not options.silent: @@ -212,10 +221,9 @@ def CreateDb(options, dataFilename='', supplier=None): Loader.LoadDb(supplier, os.path.join(options.outDir, options.molDbName), errorsTo=errFile, regName=options.regName, nameCol=options.molIdName, skipProps=options.skipProps, defaultVal=options.missingPropertyVal, addComputedProps=options.addProps, - uniqNames=True, skipSmiles=options.skipSmiles, - maxRowsCached=int(options.maxRowsCached), silent=options.silent, - nameProp=options.nameProp, lazySupplier=int(options.maxRowsCached) > 0, - startAnew=not options.updateDb) + uniqNames=True, skipSmiles=options.skipSmiles, maxRowsCached=int( + options.maxRowsCached), silent=options.silent, nameProp=options.nameProp, + lazySupplier=int(options.maxRowsCached) > 0, startAnew=not options.updateDb) if options.doPairs: pairConn = DbConnect(os.path.join(options.outDir, options.pairDbName)) @@ -256,8 +264,8 @@ def CreateDb(options, dataFilename='', supplier=None): layeredQs = ','.join('?' * LayeredOptions.nWords) colDefs = ','.join(['Col_%d integer' % (x + 1) for x in range(LayeredOptions.nWords)]) fpCurs.execute( - 'create table %s (guid integer not null primary key,%s varchar not null unique,%s)' % ( - options.layeredTableName, options.molIdName, colDefs)) + 'create table %s (guid integer not null primary key,%s varchar not null unique,%s)' % + (options.layeredTableName, options.molIdName, colDefs)) if options.doPharm2D: fpCurs.execute( diff --git a/Projects/DbCLI/SearchDb.py b/Projects/DbCLI/SearchDb.py index fcb2c88a2..e70299a4d 100644 --- a/Projects/DbCLI/SearchDb.py +++ b/Projects/DbCLI/SearchDb.py @@ -2,19 +2,19 @@ # # Copyright (c) 2007-2013, Novartis Institutes for BioMedical Research Inc. # All rights reserved. -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are -# met: +# met: # -# * Redistributions of source code must retain the above copyright +# * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided # with the distribution. -# * Neither the name of Novartis Institutes for BioMedical Research Inc. -# nor the names of its contributors may be used to endorse or promote +# * Neither the name of Novartis Institutes for BioMedical Research Inc. +# nor the names of its contributors may be used to endorse or promote # products derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS @@ -48,22 +48,23 @@ _description = """ - Property names are not case sensitive in the database. """ -import os import argparse -import sys, time +import os +import sys +import time from rdkit import RDConfig from rdkit.Dbase.DbConnection import DbConnect - from rdkit.RDLogger import logger + logger = logger() import zlib -from rdkit import Chem -from rdkit.Chem.MolDb.FingerprintUtils import supportedSimilarityMethods, BuildSigFactory, DepickleFP, LayeredOptions +from rdkit import Chem, DataStructs from rdkit.Chem.MolDb import FingerprintUtils - -from rdkit import DataStructs +from rdkit.Chem.MolDb.FingerprintUtils import (BuildSigFactory, DepickleFP, + LayeredOptions, + supportedSimilarityMethods) def _molFromPkl(pkl): @@ -443,8 +444,8 @@ def RunSearch(options, queryFilename): if not options.transpose: for i, nm in ks: nbrs = nbrLists[(i, nm)] - nbrTxt = options.outputDelim.join([nm] + ['%s%s%.3f' % (nmDict[id], options.outputDelim, - score) for id, score in nbrs]) + nbrTxt = options.outputDelim.join( + [nm] + ['%s%s%.3f' % (nmDict[id], options.outputDelim, score) for id, score in nbrs]) if outF: print(nbrTxt, file=outF) else: @@ -510,23 +511,28 @@ def RunSearch(options, queryFilename): if not options.silent: logger.info('Done!') + # ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- + def initParser(): - """ Initialize the command line parser """ + """ Initialize the command line parser """ parser = argparse.ArgumentParser(usage='SearchDB [optional arguments] ', description=_description, formatter_class=argparse.RawDescriptionHelpFormatter) - + parser.add_argument('filename', nargs='?', help='File containg molecules for searching') parser.add_argument('--version', action='version', version='%(prog)s ' + _version) - parser.add_argument('--dbDir', default='', - help='name of the directory containing the database information. The default is the current directory') + parser.add_argument( + '--dbDir', default='', help= + 'name of the directory containing the database information. The default is the current directory' + ) parser.add_argument('--molDbName', default='Compounds.sqlt', help='name of the molecule database') parser.add_argument('--molIdName', default='compound_id', help='name of the database key column') parser.add_argument('--regName', default='molecules', help='name of the molecular registry table') - parser.add_argument('--pairDbName', default='AtomPairs.sqlt', help='name of the atom pairs database') + parser.add_argument('--pairDbName', default='AtomPairs.sqlt', + help='name of the atom pairs database') parser.add_argument('--pairTableName', default='atompairs', help='name of the atom pairs table') parser.add_argument('--pairColName', default='atompairfp', help='name of the atom pair column') parser.add_argument( @@ -537,93 +543,102 @@ def initParser(): help='name of the topological torsions table (usually the same as the atom pairs table)') parser.add_argument('--torsionsColName', default='torsionfp', help='name of the atom pair column') parser.add_argument('--fpDbName', default='Fingerprints.sqlt', - help='name of the 2D fingerprints database') + help='name of the 2D fingerprints database') parser.add_argument('--fpTableName', default='rdkitfps', help='name of the 2D fingerprints table') parser.add_argument('--layeredTableName', default='layeredfps', - help='name of the layered fingerprints table') + help='name of the layered fingerprints table') parser.add_argument('--fpColName', default='', - help='name of the 2D fingerprint column, a sensible default is used') + help='name of the 2D fingerprint column, a sensible default is used') parser.add_argument('--descrDbName', default='Descriptors.sqlt', - help='name of the descriptor database') - parser.add_argument('--descrTableName', default='descriptors_v1', help='name of the descriptor table') - parser.add_argument('--descriptorCalcFilename', default=os.path.join(RDConfig.RDBaseDir, 'Projects', - 'DbCLI', 'moe_like.dsc'), - help='name of the file containing the descriptor calculator') + help='name of the descriptor database') + parser.add_argument('--descrTableName', default='descriptors_v1', + help='name of the descriptor table') + parser.add_argument('--descriptorCalcFilename', + default=os.path.join(RDConfig.RDBaseDir, 'Projects', 'DbCLI', 'moe_like.dsc'), + help='name of the file containing the descriptor calculator') parser.add_argument('--outputDelim', default=',', - help='the delimiter for the output file. The default is %(default)s') + help='the delimiter for the output file. The default is %(default)s') parser.add_argument( '--topN', default=20, type=int, help='the number of neighbors to keep for each query compound. The default is %(default)s') - + parser.add_argument('--outF', '--outFile', default='-', - help='The name of the output file. The default is the console (stdout).') - + help='The name of the output file. The default is the console (stdout).') + parser.add_argument( - '--transpose', default=False, action="store_true", - help='print the results out in a transposed form: e.g. neighbors in rows and probe compounds in columns') - + '--transpose', default=False, action="store_true", help= + 'print the results out in a transposed form: e.g. neighbors in rows and probe compounds in columns' + ) + parser.add_argument('--molFormat', default='sdf', choices=('smiles', 'sdf'), - help='specify the format of the input file') + help='specify the format of the input file') parser.add_argument( - '--nameProp', default='_Name', - help='specify the SD property to be used for the molecule names. Default is to use the mol block name') - + '--nameProp', default='_Name', help= + 'specify the SD property to be used for the molecule names. Default is to use the mol block name' + ) + parser.add_argument('--smartsQuery', '--smarts', '--sma', default='', - help='provide a SMARTS to be used as a substructure query') + help='provide a SMARTS to be used as a substructure query') parser.add_argument('--smilesQuery', '--smiles', '--smi', default='', - help='provide a SMILES to be used as a substructure query') + help='provide a SMILES to be used as a substructure query') parser.add_argument('--negateQuery', '--negate', default=False, action='store_true', - help='negate the results of the smarts query.') + help='negate the results of the smarts query.') parser.add_argument('--propQuery', '--query', '-q', default='', - help='provide a property query (see the NOTE about property names)') - + help='provide a property query (see the NOTE about property names)') + parser.add_argument('--sdfOut', '--sdOut', default='', - help='export an SD file with the matching molecules') + help='export an SD file with the matching molecules') parser.add_argument('--smilesOut', '--smiOut', default='', - help='export a smiles file with the matching molecules') + help='export a smiles file with the matching molecules') parser.add_argument('--nonchiralSmiles', dest='chiralSmiles', default=True, action='store_false', - help='do not use chiral SMILES in the output') + help='do not use chiral SMILES in the output') parser.add_argument('--silent', default=False, action='store_true', - help='Do not generate status messages.') - + help='Do not generate status messages.') + parser.add_argument('--zipMols', '--zip', default=False, action='store_true', - help='read compressed mols from the database') - + help='read compressed mols from the database') + parser.add_argument('--pharm2DTableName', default='pharm2dfps', - help='name of the Pharm2D fingerprints table') - parser.add_argument('--fdefFile', '--fdef', - default=os.path.join(RDConfig.RDDataDir, 'Novartis1.fdef'), - help='provide the name of the fdef file to use for 2d pharmacophores') + help='name of the Pharm2D fingerprints table') + parser.add_argument('--fdefFile', '--fdef', default=os.path.join(RDConfig.RDDataDir, + 'Novartis1.fdef'), + help='provide the name of the fdef file to use for 2d pharmacophores') parser.add_argument('--gobbi2DTableName', default='gobbi2dfps', - help='name of the Gobbi2D fingerprints table') - + help='name of the Gobbi2D fingerprints table') + parser.add_argument( '--similarityType', '--simType', '--sim', default='RDK', choices=supportedSimilarityMethods, - help='Choose the type of similarity to use, possible values: RDK, AtomPairs, TopologicalTorsions, Pharm2D, Gobbi2D, Avalon, Morgan. The default is %(default)s') + help= + 'Choose the type of similarity to use, possible values: RDK, AtomPairs, TopologicalTorsions, Pharm2D, Gobbi2D, Avalon, Morgan. The default is %(default)s' + ) parser.add_argument('--morganFpDbName', default='Fingerprints.sqlt', - help='name of the morgan fingerprints database') + help='name of the morgan fingerprints database') parser.add_argument('--morganFpTableName', default='morganfps', - help='name of the morgan fingerprints table') + help='name of the morgan fingerprints table') parser.add_argument('--morganFpColName', default='morganfp', - help='name of the morgan fingerprint column') - + help='name of the morgan fingerprint column') + parser.add_argument( '--similarityMetric', '--simMetric', '--metric', default='', - choices=('tanimoto', 'dice', 'tversky', ''), - help='Choose the type of similarity to use, possible values: tanimoto, dice, tversky. The default is determined by the fingerprint type') + choices=('tanimoto', 'dice', 'tversky', ''), help= + 'Choose the type of similarity to use, possible values: tanimoto, dice, tversky. The default is determined by the fingerprint type' + ) parser.add_argument('--tverskyA', default=0.5, type=float, help='Tversky A value') parser.add_argument('--tverskyB', default=0.5, type=float, help='Tversky B value') parser.add_argument( '--simThresh', default=-1, type=float, - help='threshold to use for similarity searching. If provided, this supersedes the topN argument') + help='threshold to use for similarity searching. If provided, this supersedes the topN argument' + ) return parser + if __name__ == '__main__': parser = initParser() options = parser.parse_args() - - if options.filename is None and not (options.smilesQuery or options.smartsQuery or options.propQuery): + + if options.filename is None and not (options.smilesQuery or options.smartsQuery + or options.propQuery): parser.error('please either provide a query filename argument or do a data or smarts query') queryFilename = options.filename diff --git a/Projects/DbCLI/TestDbCLI.py b/Projects/DbCLI/TestDbCLI.py index 786374984..26119880c 100644 --- a/Projects/DbCLI/TestDbCLI.py +++ b/Projects/DbCLI/TestDbCLI.py @@ -4,16 +4,20 @@ # # @@ All Rights Reserved @@ # -import unittest, subprocess, os +import os +import subprocess +import sys +import unittest + from rdkit import RDConfig from rdkit.Dbase.DbConnection import DbConnect -import sys + class TestCase(unittest.TestCase): def test1Create(self): - p = subprocess.Popen((sys.executable, 'CreateDb.py', '--dbDir=testData/bzr', '--molFormat=smiles', - 'testData/bzr.smi')) + p = subprocess.Popen((sys.executable, 'CreateDb.py', '--dbDir=testData/bzr', + '--molFormat=smiles', 'testData/bzr.smi')) res = p.wait() self.assertFalse(res) p = None @@ -196,11 +200,13 @@ class TestCase(unittest.TestCase): os.unlink('testData/bzr/search.out') def test2_5SearchSmarts(self): - p = subprocess.Popen((sys.executable, - 'SearchDb.py', - '--dbDir=testData/bzr', - '--outF=testData/bzr/search.out', - '--smarts=cncncc', )) + p = subprocess.Popen(( + sys.executable, + 'SearchDb.py', + '--dbDir=testData/bzr', + '--outF=testData/bzr/search.out', + '--smarts=cncncc', + )) res = p.wait() self.assertFalse(res) @@ -213,19 +219,23 @@ class TestCase(unittest.TestCase): os.unlink('testData/bzr/search.out') if os.path.exists('/dev/null'): - p = subprocess.Popen((sys.executable, - 'SearchDb.py', - '--dbDir=testData/bzr', - '--outF=/dev/null', - '--smilesOut=testData/bzr/search.out', - '--smarts=cncncc', )) + p = subprocess.Popen(( + sys.executable, + 'SearchDb.py', + '--dbDir=testData/bzr', + '--outF=/dev/null', + '--smilesOut=testData/bzr/search.out', + '--smarts=cncncc', + )) else: - p = subprocess.Popen((sys.executable, - 'SearchDb.py', - '--dbDir=testData/bzr', - '--outF=testData/crud.out', - '--smilesOut=testData/bzr/search.out', - '--smarts=cncncc', )) + p = subprocess.Popen(( + sys.executable, + 'SearchDb.py', + '--dbDir=testData/bzr', + '--outF=testData/crud.out', + '--smilesOut=testData/bzr/search.out', + '--smarts=cncncc', + )) res = p.wait() self.assertFalse(res) p = None @@ -238,12 +248,14 @@ class TestCase(unittest.TestCase): if os.path.exists('testData/crud.out'): os.unlink('testData/crud.out') - p = subprocess.Popen((sys.executable, - 'SearchDb.py', - '--dbDir=testData/bzr', - '--outF=testData/bzr/search.out', - '--negate', - '--smarts=cncncc', )) + p = subprocess.Popen(( + sys.executable, + 'SearchDb.py', + '--dbDir=testData/bzr', + '--outF=testData/bzr/search.out', + '--negate', + '--smarts=cncncc', + )) res = p.wait() self.assertFalse(res) @@ -354,8 +366,8 @@ class TestCase(unittest.TestCase): if os.path.exists('testData/bzr/Fingerprints.sqlt'): os.unlink('testData/bzr/Fingerprints.sqlt') - p = subprocess.Popen((sys.executable, 'CreateDb.py', '--dbDir=testData/bzr', '--molFormat=smiles', - '--noExtras', '--noSmiles', 'testData/bzr.smi')) + p = subprocess.Popen((sys.executable, 'CreateDb.py', '--dbDir=testData/bzr', + '--molFormat=smiles', '--noExtras', '--noSmiles', 'testData/bzr.smi')) res = p.wait() self.assertFalse(res) p = None @@ -385,9 +397,9 @@ class TestCase(unittest.TestCase): if os.path.exists('testData/bzr/Fingerprints.sqlt'): os.unlink('testData/bzr/Fingerprints.sqlt') - p = subprocess.Popen((sys.executable, 'CreateDb.py', '--dbDir=testData/bzr', '--molFormat=smiles', - '--noSmiles', '--noFingerprints', '--noLayeredFps', '--noMorganFps', - '--noPairs', '--noDescriptors', 'testData/bzr.smi')) + p = subprocess.Popen((sys.executable, 'CreateDb.py', '--dbDir=testData/bzr', + '--molFormat=smiles', '--noSmiles', '--noFingerprints', '--noLayeredFps', + '--noMorganFps', '--noPairs', '--noDescriptors', 'testData/bzr.smi')) res = p.wait() self.assertFalse(res) p = None @@ -408,32 +420,9 @@ class TestCase(unittest.TestCase): conn.KillCursor() conn = None - p = subprocess.Popen((sys.executable, 'CreateDb.py', '--dbDir=testData/bzr', '--molFormat=smiles', - '--noProps', '--noFingerprints', '--noLayeredFps', '--noMorganFps', - '--noPairs', '--noDescriptors', 'testData/bzr.smi')) - res = p.wait() - self.assertFalse(res) - p = None - - self.assertTrue(os.path.exists('testData/bzr/Compounds.sqlt')) - self.assertFalse(os.path.exists('testData/bzr/AtomPairs.sqlt')) - self.assertFalse(os.path.exists('testData/bzr/Descriptors.sqlt')) - self.assertFalse(os.path.exists('testData/bzr/Fingerprints.sqlt')) - - conn = DbConnect('testData/bzr/Compounds.sqlt') - d = conn.GetData('molecules', fields='count(*)') - self.assertEqual(d[0][0], 10) - d = conn.GetData('molecules', fields='*') - self.assertEqual(len(d), 10) - cns = [x.lower() for x in d.GetColumnNames()] - self.assertTrue('smiles' in cns) - d = None - conn.KillCursor() - conn = None - - p = subprocess.Popen((sys.executable, 'CreateDb.py', '--dbDir=testData/bzr', '--molFormat=smiles', - '--noFingerprints', '--noLayeredFps', '--noMorganFps', '--noPairs', - '--noDescriptors', '--maxRowsCached=4', 'testData/bzr.smi')) + p = subprocess.Popen((sys.executable, 'CreateDb.py', '--dbDir=testData/bzr', + '--molFormat=smiles', '--noProps', '--noFingerprints', '--noLayeredFps', + '--noMorganFps', '--noPairs', '--noDescriptors', 'testData/bzr.smi')) res = p.wait() self.assertFalse(res) p = None @@ -455,8 +444,32 @@ class TestCase(unittest.TestCase): conn = None p = subprocess.Popen( - (sys.executable, 'CreateDb.py', '--dbDir=testData/bzr', '--molFormat=smiles', '--noFingerprints', - '--noPairs', '--noDescriptors', '--maxRowsCached=4', 'testData/bzr.smi')) + (sys.executable, 'CreateDb.py', '--dbDir=testData/bzr', '--molFormat=smiles', + '--noFingerprints', '--noLayeredFps', '--noMorganFps', '--noPairs', '--noDescriptors', + '--maxRowsCached=4', 'testData/bzr.smi')) + res = p.wait() + self.assertFalse(res) + p = None + + self.assertTrue(os.path.exists('testData/bzr/Compounds.sqlt')) + self.assertFalse(os.path.exists('testData/bzr/AtomPairs.sqlt')) + self.assertFalse(os.path.exists('testData/bzr/Descriptors.sqlt')) + self.assertFalse(os.path.exists('testData/bzr/Fingerprints.sqlt')) + + conn = DbConnect('testData/bzr/Compounds.sqlt') + d = conn.GetData('molecules', fields='count(*)') + self.assertEqual(d[0][0], 10) + d = conn.GetData('molecules', fields='*') + self.assertEqual(len(d), 10) + cns = [x.lower() for x in d.GetColumnNames()] + self.assertTrue('smiles' in cns) + d = None + conn.KillCursor() + conn = None + + p = subprocess.Popen( + (sys.executable, 'CreateDb.py', '--dbDir=testData/bzr', '--molFormat=smiles', + '--noFingerprints', '--noPairs', '--noDescriptors', '--maxRowsCached=4', 'testData/bzr.smi')) res = p.wait() self.assertFalse(res) p = None @@ -512,8 +525,8 @@ class TestCase(unittest.TestCase): os.unlink('testData/bzr/search.out') def test6Update(self): - p = subprocess.Popen((sys.executable, 'CreateDb.py', '--dbDir=testData/bzr', '--molFormat=smiles', - 'testData/bzr.smi')) + p = subprocess.Popen((sys.executable, 'CreateDb.py', '--dbDir=testData/bzr', + '--molFormat=smiles', 'testData/bzr.smi')) res = p.wait() self.assertFalse(res) p = None @@ -541,8 +554,8 @@ class TestCase(unittest.TestCase): d = None conn.KillCursor() - p = subprocess.Popen((sys.executable, 'CreateDb.py', '--dbDir=testData/bzr', '--molFormat=smiles', - '--updateDb', 'testData/bzr.2.smi')) + p = subprocess.Popen((sys.executable, 'CreateDb.py', '--dbDir=testData/bzr', + '--molFormat=smiles', '--updateDb', 'testData/bzr.2.smi')) res = p.wait() self.assertFalse(res) p = None diff --git a/Projects/DbCLI/test_list.py b/Projects/DbCLI/test_list.py index ecf68c5c6..9857d5bb2 100755 --- a/Projects/DbCLI/test_list.py +++ b/Projects/DbCLI/test_list.py @@ -1,9 +1,12 @@ -tests = [("python", "TestDbCLI.py", {}), ] +tests = [ + ("python", "TestDbCLI.py", {}), +] longTests = [] if __name__ == '__main__': import sys + from rdkit import TestRunner failed, tests = TestRunner.RunScript('test_list.py', 0, 1) sys.exit(len(failed)) diff --git a/Projects/test_list.py b/Projects/test_list.py index 7ec9f94f1..2074012da 100644 --- a/Projects/test_list.py +++ b/Projects/test_list.py @@ -1,9 +1,14 @@ -tests = [("python", "TestDbCLI.py", {'dir': 'DbCLI'}), ] +tests = [ + ("python", "TestDbCLI.py", { + 'dir': 'DbCLI' + }), +] longTests = [] if __name__ == '__main__': import sys + from rdkit import TestRunner failed, tests = TestRunner.RunScript('test_list.py', 0, 1) sys.exit(len(failed)) diff --git a/Regress/Scripts/chiral_embed.py b/Regress/Scripts/chiral_embed.py index 615f2921f..61f72c02b 100644 --- a/Regress/Scripts/chiral_embed.py +++ b/Regress/Scripts/chiral_embed.py @@ -1,8 +1,8 @@ +import gzip from rdkit import Chem -from rdkit.Chem.PyMol import MolViewer from rdkit.Chem import AllChem -import gzip +from rdkit.Chem.PyMol import MolViewer try: v = MolViewer() diff --git a/Regress/Scripts/fingerprint_screenout.py b/Regress/Scripts/fingerprint_screenout.py index 3325ecde1..57f599059 100644 --- a/Regress/Scripts/fingerprint_screenout.py +++ b/Regress/Scripts/fingerprint_screenout.py @@ -7,19 +7,19 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -import time +import argparse import gzip +import time + import rdkit -from rdkit import Chem -from rdkit import DataStructs +from rdkit import Chem, DataStructs from rdkit.Chem import AllChem from rdkit.RDLogger import logger -import argparse logger = logger() parser = argparse.ArgumentParser( - description="benchmark and test fingerprint screenout and substructure searching") + description="benchmark and test fingerprint screenout and substructure searching") parser.add_argument("--validate", dest='validateResults', default=False, action='store_true', help="validate that the screenout isn't missing anything") parser.add_argument("--short", dest='doShort', default=False, action='store_true', @@ -33,19 +33,18 @@ mols = [] t1 = time.time() # find this file here: https://raw.githubusercontent.com/greglandrum/rdkit_blog/master/data/chembl21_25K.pairs.txt.gz with gzip.open('../Data/chembl21_25K.pairs.txt.gz', 'rb') as inf: - for line in inf: - line = line.decode().strip().split() - smi1 = line[1] - smi2 = line[3] - mols.append(Chem.MolFromSmiles(smi1)) - mols.append(Chem.MolFromSmiles(smi2)) - if args.doShort and len(mols) >= 1000: - break + for line in inf: + line = line.decode().strip().split() + smi1 = line[1] + smi2 = line[3] + mols.append(Chem.MolFromSmiles(smi1)) + mols.append(Chem.MolFromSmiles(smi2)) + if args.doShort and len(mols) >= 1000: + break t2 = time.time() ts.append(t2 - t1) logger.info(f'Results{len(ts)}: {t2-t1 : .2f} seconds, {len(mols)} mols') - logger.info('queries from smiles') t1 = time.time() # find this file here: https://raw.githubusercontent.com/greglandrum/rdkit_blog/master/data/zinc.frags.500.q.smi @@ -58,7 +57,6 @@ t2 = time.time() ts.append(t2 - t1) logger.info(f'Results{len(ts)}: {t2-t1 : .2f} seconds') - logger.info('generating pattern fingerprints for mols') t1 = time.time() mfps = [Chem.PatternFingerprint(m) for m in mols] @@ -66,7 +64,6 @@ t2 = time.time() ts.append(t2 - t1) logger.info(f'Results{len(ts)}: {t2-t1 : .2f} seconds') - logger.info('generating pattern fingerprints for queries') t1 = time.time() fragsfps = [Chem.PatternFingerprint(m, 2048) for m in frags] @@ -76,35 +73,36 @@ t2 = time.time() ts.append(t2 - t1) logger.info(f'Results{len(ts)}: {t2-t1 : .2f} seconds') -for nm, qs, qfps in [('frags', frags, fragsfps), ('leads', leads, leadsfps), ('pieces', pieces, piecesfps)]: - logger.info(f'testing {nm} queries') - t1 = time.time() - nPossible = 0 - nTested = 0 - nFound = 0 - nErrors = 0 - for i, fragfp in enumerate(qfps): - for j, mfp in enumerate(mfps): - nPossible += 1 - if args.validateResults: - matched = mols[j].HasSubstructMatch(qs[i]) - fpMatch = DataStructs.AllProbeBitsMatch(fragfp, mfp) - if fpMatch: - nTested += 1 - if matched: - nFound += 1 - if not fpMatch: - nErrors += 1 - logger.error(f"ERROR: mol {j} query {i}") - else: - if DataStructs.AllProbeBitsMatch(fragfp, mfp): - nTested += 1 - if mols[j].HasSubstructMatch(qs[i]): - nFound += 1 - t2 = time.time() - ts.append(t2 - t1) - logger.info( - f'Results{len(ts)}: {t2-t1 : .2f} seconds. {nTested} tested ({nTested/nPossible :.4f} of total), {nFound} found, {nFound/nTested : .2f} accuracy. {nErrors} errors.') - +for nm, qs, qfps in [('frags', frags, fragsfps), ('leads', leads, leadsfps), + ('pieces', pieces, piecesfps)]: + logger.info(f'testing {nm} queries') + t1 = time.time() + nPossible = 0 + nTested = 0 + nFound = 0 + nErrors = 0 + for i, fragfp in enumerate(qfps): + for j, mfp in enumerate(mfps): + nPossible += 1 + if args.validateResults: + matched = mols[j].HasSubstructMatch(qs[i]) + fpMatch = DataStructs.AllProbeBitsMatch(fragfp, mfp) + if fpMatch: + nTested += 1 + if matched: + nFound += 1 + if not fpMatch: + nErrors += 1 + logger.error(f"ERROR: mol {j} query {i}") + else: + if DataStructs.AllProbeBitsMatch(fragfp, mfp): + nTested += 1 + if mols[j].HasSubstructMatch(qs[i]): + nFound += 1 + t2 = time.time() + ts.append(t2 - t1) + logger.info( + f'Results{len(ts)}: {t2-t1 : .2f} seconds. {nTested} tested ({nTested/nPossible :.4f} of total), {nFound} found, {nFound/nTested : .2f} accuracy. {nErrors} errors.' + ) print(f"| {rdkit.__version__} | {' | '.join(['%.1f' % x for x in ts])} |") diff --git a/Regress/Scripts/new_timings.py b/Regress/Scripts/new_timings.py index 20d7dc7d9..43391bba0 100644 --- a/Regress/Scripts/new_timings.py +++ b/Regress/Scripts/new_timings.py @@ -1,9 +1,9 @@ - -import time import gzip -import random import os +import random import sys +import time + import rdkit from rdkit import Chem from rdkit.Chem import AllChem @@ -13,189 +13,189 @@ dname = os.path.dirname(__file__) def data(fname): - return os.path.join(dname, '..', 'Data', fname) + return os.path.join(dname, '..', 'Data', fname) logger = logger() tests = [1] * 1001 if len(sys.argv) > 1: - tests = [0] * 1001 - for x in sys.argv[1:]: - x = int(x) - tests[x] = 1 + tests = [0] * 1001 + for x in sys.argv[1:]: + x = int(x) + tests[x] = 1 ts = [] mols = [] if tests[0]: - lines = gzip.open(data('znp.50k.smi.gz'), 'rt').readlines() - logger.info('mols from smiles') - nMols = 0 - nBad = 0 - t1 = time.time() - for line in lines: - line = line.strip().split(' ') - m = Chem.MolFromSmiles(line[0]) - if m: - nMols += 1 - mols.append(m) - else: - nBad += 1 + lines = gzip.open(data('znp.50k.smi.gz'), 'rt').readlines() + logger.info('mols from smiles') + nMols = 0 + nBad = 0 + t1 = time.time() + for line in lines: + line = line.strip().split(' ') + m = Chem.MolFromSmiles(line[0]) + if m: + nMols += 1 + mols.append(m) + else: + nBad += 1 - t2 = time.time() - logger.info('Results1: %.2f seconds, %d passed, %d failed' % (t2 - t1, nMols, nBad)) - ts.append(t2 - t1) + t2 = time.time() + logger.info('Results1: %.2f seconds, %d passed, %d failed' % (t2 - t1, nMols, nBad)) + ts.append(t2 - t1) if tests[1]: - logger.info('Writing: Canonical SMILES') - t1 = time.time() - for mol in mols: - smi = Chem.MolToSmiles(mol, True) - t2 = time.time() - logger.info('Results2: %.2f seconds' % (t2 - t1)) - ts.append(t2 - t1) + logger.info('Writing: Canonical SMILES') + t1 = time.time() + for mol in mols: + smi = Chem.MolToSmiles(mol, True) + t2 = time.time() + logger.info('Results2: %.2f seconds' % (t2 - t1)) + ts.append(t2 - t1) if tests[2]: - sdData = gzip.open(data('mols.1000.sdf.gz'), 'rb').read() - logger.info('mols from sdf') - suppl = Chem.SDMolSupplier() - suppl.SetData(sdData) - nMols = 0 - nBad = 0 - t1 = time.time() - for i in range(10): - for m in suppl: - if m: - nMols += 1 - # mols.append(m) - else: - nBad += 1 - t2 = time.time() - logger.info('Results1: %.2f seconds, %d passed, %d failed' % (t2 - t1, nMols, nBad)) - ts.append(t2 - t1) + sdData = gzip.open(data('mols.1000.sdf.gz'), 'rb').read() + logger.info('mols from sdf') + suppl = Chem.SDMolSupplier() + suppl.SetData(sdData) + nMols = 0 + nBad = 0 + t1 = time.time() + for i in range(10): + for m in suppl: + if m: + nMols += 1 + # mols.append(m) + else: + nBad += 1 + t2 = time.time() + logger.info('Results1: %.2f seconds, %d passed, %d failed' % (t2 - t1, nMols, nBad)) + ts.append(t2 - t1) if tests[3] or tests[4] or tests[5]: - pattData = gzip.open(data('queries.txt.gz'), 'rt').readlines() - pattData = [x.strip().replace('[H]', '').replace('()', '') for x in pattData] - logger.info('patterns from smiles') - patts = [] - nMols = 0 - t1 = time.time() - for line in pattData: - m = Chem.MolFromSmarts(line) - if m: - nMols += 1 - patts.append(m) - else: - nBad += 1 - t2 = time.time() - logger.info('Results3: %.2f seconds, %d passed, %d failed' % (t2 - t1, nMols, nBad)) - ts.append(t2 - t1) - random.seed(23) - random.shuffle(patts) - patts = patts[:100] + pattData = gzip.open(data('queries.txt.gz'), 'rt').readlines() + pattData = [x.strip().replace('[H]', '').replace('()', '') for x in pattData] + logger.info('patterns from smiles') + patts = [] + nMols = 0 + t1 = time.time() + for line in pattData: + m = Chem.MolFromSmarts(line) + if m: + nMols += 1 + patts.append(m) + else: + nBad += 1 + t2 = time.time() + logger.info('Results3: %.2f seconds, %d passed, %d failed' % (t2 - t1, nMols, nBad)) + ts.append(t2 - t1) + random.seed(23) + random.shuffle(patts) + patts = patts[:100] if tests[4]: - logger.info('Matching1: HasSubstructMatch') - t1 = time.time() - for mol in mols: - for patt in patts: - mol.HasSubstructMatch(patt) - t2 = time.time() - logger.info('Results4: %.2f seconds' % (t2 - t1)) - ts.append(t2 - t1) + logger.info('Matching1: HasSubstructMatch') + t1 = time.time() + for mol in mols: + for patt in patts: + mol.HasSubstructMatch(patt) + t2 = time.time() + logger.info('Results4: %.2f seconds' % (t2 - t1)) + ts.append(t2 - t1) if tests[5]: - logger.info('Matching2: GetSubstructMatches') - t1 = time.time() - for mol in mols: - for patt in patts: - mol.GetSubstructMatches(patt) - t2 = time.time() - logger.info('Results5: %.2f seconds' % (t2 - t1)) - ts.append(t2 - t1) + logger.info('Matching2: GetSubstructMatches') + t1 = time.time() + for mol in mols: + for patt in patts: + mol.GetSubstructMatches(patt) + t2 = time.time() + logger.info('Results5: %.2f seconds' % (t2 - t1)) + ts.append(t2 - t1) if tests[6] or tests[7] or tests[8]: - logger.info('reading SMARTS') - patts = [] - t1 = time.time() - for line in open(data('RLewis_smarts.txt')): - line = line.strip() - if line == '' or line[0] == '#': - continue - splitL = line.split(' ') - sma = splitL[0] - m = Chem.MolFromSmarts(sma) - if m: - patts.append(m) - t2 = time.time() - logger.info('Results6: %.2f seconds for %d patterns' % (t2 - t1, len(patts))) - ts.append(t2 - t1) + logger.info('reading SMARTS') + patts = [] + t1 = time.time() + for line in open(data('RLewis_smarts.txt')): + line = line.strip() + if line == '' or line[0] == '#': + continue + splitL = line.split(' ') + sma = splitL[0] + m = Chem.MolFromSmarts(sma) + if m: + patts.append(m) + t2 = time.time() + logger.info('Results6: %.2f seconds for %d patterns' % (t2 - t1, len(patts))) + ts.append(t2 - t1) if tests[7]: - logger.info('Matching3: HasSubstructMatch') - t1 = time.time() - for mol in mols: - for patt in patts: - mol.HasSubstructMatch(patt) - t2 = time.time() - logger.info('Results7: %.2f seconds' % (t2 - t1)) - ts.append(t2 - t1) + logger.info('Matching3: HasSubstructMatch') + t1 = time.time() + for mol in mols: + for patt in patts: + mol.HasSubstructMatch(patt) + t2 = time.time() + logger.info('Results7: %.2f seconds' % (t2 - t1)) + ts.append(t2 - t1) if tests[8]: - logger.info('Matching4: GetSubstructMatches') - t1 = time.time() - for mol in mols: - for patt in patts: - mol.GetSubstructMatches(patt) - t2 = time.time() - logger.info('Results8: %.2f seconds' % (t2 - t1)) - ts.append(t2 - t1) + logger.info('Matching4: GetSubstructMatches') + t1 = time.time() + for mol in mols: + for patt in patts: + mol.GetSubstructMatches(patt) + t2 = time.time() + logger.info('Results8: %.2f seconds' % (t2 - t1)) + ts.append(t2 - t1) if tests[9]: - logger.info('Writing: Mol blocks') - t1 = time.time() - for mol in mols: - mb = Chem.MolToMolBlock(mol) - t2 = time.time() - logger.info('Results10: %.2f seconds' % (t2 - t1)) - ts.append(t2 - t1) + logger.info('Writing: Mol blocks') + t1 = time.time() + for mol in mols: + mb = Chem.MolToMolBlock(mol) + t2 = time.time() + logger.info('Results10: %.2f seconds' % (t2 - t1)) + ts.append(t2 - t1) if tests[10]: - from rdkit.Chem import BRICS - logger.info('BRICS decomposition') - t1 = time.time() - for mol in mols: - d = BRICS.BreakBRICSBonds(mol) - t2 = time.time() - logger.info('Results11: %.2f seconds' % (t2 - t1)) - ts.append(t2 - t1) + from rdkit.Chem import BRICS + logger.info('BRICS decomposition') + t1 = time.time() + for mol in mols: + d = BRICS.BreakBRICSBonds(mol) + t2 = time.time() + logger.info('Results11: %.2f seconds' % (t2 - t1)) + ts.append(t2 - t1) if tests[11]: - logger.info('Generate 2D coords') - t1 = time.time() - for mol in mols: - AllChem.Compute2DCoords(mol) - t2 = time.time() - logger.info('Results12: %.2f seconds' % (t2 - t1)) - ts.append(t2 - t1) + logger.info('Generate 2D coords') + t1 = time.time() + for mol in mols: + AllChem.Compute2DCoords(mol) + t2 = time.time() + logger.info('Results12: %.2f seconds' % (t2 - t1)) + ts.append(t2 - t1) if tests[12]: - logger.info('Generate topological fingerprints') - t1 = time.time() - for mol in mols: - Chem.RDKFingerprint(mol) - t2 = time.time() - logger.info('Results16: %.2f seconds' % (t2 - t1)) - ts.append(t2 - t1) + logger.info('Generate topological fingerprints') + t1 = time.time() + for mol in mols: + Chem.RDKFingerprint(mol) + t2 = time.time() + logger.info('Results16: %.2f seconds' % (t2 - t1)) + ts.append(t2 - t1) if tests[13]: - logger.info('Generate morgan fingerprints') - t1 = time.time() - for mol in mols: - AllChem.GetMorganFingerprint(mol, radius=2) - t2 = time.time() - logger.info('Results16: %.2f seconds' % (t2 - t1)) - ts.append(t2 - t1) + logger.info('Generate morgan fingerprints') + t1 = time.time() + for mol in mols: + AllChem.GetMorganFingerprint(mol, radius=2) + t2 = time.time() + logger.info('Results16: %.2f seconds' % (t2 - t1)) + ts.append(t2 - t1) print(f"| {rdkit.__version__} | {' | '.join(['%.1f' % x for x in ts])} |") diff --git a/Regress/Scripts/timings.py b/Regress/Scripts/timings.py index 343cad2e7..8f5a5fdda 100644 --- a/Regress/Scripts/timings.py +++ b/Regress/Scripts/timings.py @@ -1,24 +1,23 @@ - -import time import gzip -import random import os +import random import sys +import time + import rdkit from rdkit import Chem -from rdkit.Chem import AllChem -from rdkit.Chem import Recap +from rdkit.Chem import AllChem, Recap from rdkit.RDLogger import logger logger = logger() tests = [1] * 1001 if len(sys.argv) > 1: - tests = [0] * 1001 - tests[1] = 1 - for x in sys.argv[1:]: - x = int(x) - tests[x] = 1 + tests = [0] * 1001 + tests[1] = 1 + for x in sys.argv[1:]: + x = int(x) + tests[x] = 1 ts = [] sdData = gzip.open('../Data/mols.1000.sdf.gz').read() @@ -30,219 +29,219 @@ nMols = 0 nBad = 0 t1 = time.time() for m in suppl: - if m: - nMols += 1 - mols.append(m) - else: - nBad += 1 + if m: + nMols += 1 + mols.append(m) + else: + nBad += 1 t2 = time.time() logger.info('Results1: %.2f seconds, %d passed, %d failed' % (t2 - t1, nMols, nBad)) ts.append(t2 - t1) if tests[2]: - lines = gzip.open('../Data/mols.1000.txt.gz').readlines() - logger.info('mols from smiles') - nMols = 0 - nBad = 0 - t1 = time.time() - for line in lines: - line = line.decode().strip().split(' ') - m = Chem.MolFromSmiles(line[1]) - if m: - nMols += 1 - else: - nBad += 1 - t2 = time.time() - logger.info('Results2: %.2f seconds, %d passed, %d failed' % (t2 - t1, nMols, nBad)) - ts.append(t2 - t1) + lines = gzip.open('../Data/mols.1000.txt.gz').readlines() + logger.info('mols from smiles') + nMols = 0 + nBad = 0 + t1 = time.time() + for line in lines: + line = line.decode().strip().split(' ') + m = Chem.MolFromSmiles(line[1]) + if m: + nMols += 1 + else: + nBad += 1 + t2 = time.time() + logger.info('Results2: %.2f seconds, %d passed, %d failed' % (t2 - t1, nMols, nBad)) + ts.append(t2 - t1) if tests[3] or tests[4] or tests[5]: - pattData = gzip.open('../Data/queries.txt.gz').readlines() - pattData = [x.decode().strip().replace('[H]', '').replace('()', '') for x in pattData] - logger.info('patterns from smiles') - patts = [] - nMols = 0 - t1 = time.time() - for line in pattData: - m = Chem.MolFromSmarts(line) - if m: - nMols += 1 - patts.append(m) - else: - nBad += 1 - t2 = time.time() - logger.info('Results3: %.2f seconds, %d passed, %d failed' % (t2 - t1, nMols, nBad)) - ts.append(t2 - t1) - random.seed(23) - random.shuffle(patts) - patts = patts[:100] + pattData = gzip.open('../Data/queries.txt.gz').readlines() + pattData = [x.decode().strip().replace('[H]', '').replace('()', '') for x in pattData] + logger.info('patterns from smiles') + patts = [] + nMols = 0 + t1 = time.time() + for line in pattData: + m = Chem.MolFromSmarts(line) + if m: + nMols += 1 + patts.append(m) + else: + nBad += 1 + t2 = time.time() + logger.info('Results3: %.2f seconds, %d passed, %d failed' % (t2 - t1, nMols, nBad)) + ts.append(t2 - t1) + random.seed(23) + random.shuffle(patts) + patts = patts[:100] if tests[4]: - logger.info('Matching1: HasSubstructMatch') - t1 = time.time() - for mol in mols: - for patt in patts: - mol.HasSubstructMatch(patt) - t2 = time.time() - logger.info('Results4: %.2f seconds' % (t2 - t1)) - ts.append(t2 - t1) + logger.info('Matching1: HasSubstructMatch') + t1 = time.time() + for mol in mols: + for patt in patts: + mol.HasSubstructMatch(patt) + t2 = time.time() + logger.info('Results4: %.2f seconds' % (t2 - t1)) + ts.append(t2 - t1) if tests[5]: - logger.info('Matching2: GetSubstructMatches') - t1 = time.time() - for mol in mols: - for patt in patts: - mol.GetSubstructMatches(patt) - t2 = time.time() - logger.info('Results5: %.2f seconds' % (t2 - t1)) - ts.append(t2 - t1) + logger.info('Matching2: GetSubstructMatches') + t1 = time.time() + for mol in mols: + for patt in patts: + mol.GetSubstructMatches(patt) + t2 = time.time() + logger.info('Results5: %.2f seconds' % (t2 - t1)) + ts.append(t2 - t1) if tests[6] or tests[7] or tests[8]: - logger.info('reading SMARTS') - patts = [] - t1 = time.time() - for line in open('../Data/RLewis_smarts.txt'): - line = line.strip() - if line == '' or line[0] == '#': - continue - splitL = line.split(' ') - sma = splitL[0] - m = Chem.MolFromSmarts(sma) - if m: - patts.append(m) - t2 = time.time() - logger.info('Results6: %.2f seconds for %d patterns' % (t2 - t1, len(patts))) - ts.append(t2 - t1) + logger.info('reading SMARTS') + patts = [] + t1 = time.time() + for line in open('../Data/RLewis_smarts.txt'): + line = line.strip() + if line == '' or line[0] == '#': + continue + splitL = line.split(' ') + sma = splitL[0] + m = Chem.MolFromSmarts(sma) + if m: + patts.append(m) + t2 = time.time() + logger.info('Results6: %.2f seconds for %d patterns' % (t2 - t1, len(patts))) + ts.append(t2 - t1) if tests[7]: - logger.info('Matching3: HasSubstructMatch') - t1 = time.time() - for mol in mols: - for patt in patts: - mol.HasSubstructMatch(patt) - t2 = time.time() - logger.info('Results7: %.2f seconds' % (t2 - t1)) - ts.append(t2 - t1) + logger.info('Matching3: HasSubstructMatch') + t1 = time.time() + for mol in mols: + for patt in patts: + mol.HasSubstructMatch(patt) + t2 = time.time() + logger.info('Results7: %.2f seconds' % (t2 - t1)) + ts.append(t2 - t1) if tests[8]: - logger.info('Matching4: GetSubstructMatches') - t1 = time.time() - for mol in mols: - for patt in patts: - mol.GetSubstructMatches(patt) - t2 = time.time() - logger.info('Results8: %.2f seconds' % (t2 - t1)) - ts.append(t2 - t1) + logger.info('Matching4: GetSubstructMatches') + t1 = time.time() + for mol in mols: + for patt in patts: + mol.GetSubstructMatches(patt) + t2 = time.time() + logger.info('Results8: %.2f seconds' % (t2 - t1)) + ts.append(t2 - t1) if tests[9]: - logger.info('Writing: Canonical SMILES') - t1 = time.time() - for mol in mols: - smi = Chem.MolToSmiles(mol) - t2 = time.time() - logger.info('Results9: %.2f seconds' % (t2 - t1)) - ts.append(t2 - t1) + logger.info('Writing: Canonical SMILES') + t1 = time.time() + for mol in mols: + smi = Chem.MolToSmiles(mol) + t2 = time.time() + logger.info('Results9: %.2f seconds' % (t2 - t1)) + ts.append(t2 - t1) if tests[10]: - logger.info('Generate 2D coords') - t1 = time.time() - for mol in mols: - AllChem.Compute2DCoords(mol) - t2 = time.time() - logger.info('Results10: %.2f seconds' % (t2 - t1)) - ts.append(t2 - t1) + logger.info('Generate 2D coords') + t1 = time.time() + for mol in mols: + AllChem.Compute2DCoords(mol) + t2 = time.time() + logger.info('Results10: %.2f seconds' % (t2 - t1)) + ts.append(t2 - t1) if tests[11]: - logger.info('Writing: Mol blocks') - t1 = time.time() - for mol in mols: - mb = Chem.MolToMolBlock(mol) - t2 = time.time() - logger.info('Results11: %.2f seconds' % (t2 - t1)) - ts.append(t2 - t1) + logger.info('Writing: Mol blocks') + t1 = time.time() + for mol in mols: + mb = Chem.MolToMolBlock(mol) + t2 = time.time() + logger.info('Results11: %.2f seconds' % (t2 - t1)) + ts.append(t2 - t1) if tests[12]: - logger.info('RECAP decomposition') - t1 = time.time() - for mol in mols: - d = Recap.RecapDecompose(mol) - t2 = time.time() - logger.info('Results12: %.2f seconds' % (t2 - t1)) - ts.append(t2 - t1) + logger.info('RECAP decomposition') + t1 = time.time() + for mol in mols: + d = Recap.RecapDecompose(mol) + t2 = time.time() + logger.info('Results12: %.2f seconds' % (t2 - t1)) + ts.append(t2 - t1) if tests[13]: - logger.info('Generate 3D coords for 50 molecules with ETKDG') - mols3d = mols[200:250] - t1 = time.time() - nBad = 0 - for mol in mols3d: - cid = AllChem.EmbedMolecule(mol, randomSeed=0xF00D, - useExpTorsionAnglePrefs=True, useBasicKnowledge=True) - if cid < 0: - nBad += 1 - t2 = time.time() - logger.info('Results13: %.2f seconds %d failures' % (t2 - t1, nBad)) - ts.append(t2 - t1) + logger.info('Generate 3D coords for 50 molecules with ETKDG') + mols3d = mols[200:250] + t1 = time.time() + nBad = 0 + for mol in mols3d: + cid = AllChem.EmbedMolecule(mol, randomSeed=0xF00D, useExpTorsionAnglePrefs=True, + useBasicKnowledge=True) + if cid < 0: + nBad += 1 + t2 = time.time() + logger.info('Results13: %.2f seconds %d failures' % (t2 - t1, nBad)) + ts.append(t2 - t1) if tests[14]: - logger.info('UFF optimizing those:') - t1 = time.time() - for mol in mols3d: - if not mol.GetNumConformers(): - continue - mol = Chem.Mol(mol) - needMore = 1 - while needMore: - needMore = AllChem.UFFOptimizeMolecule(mol, maxIters=200) - t2 = time.time() - logger.info('Results14: %.2f seconds' % (t2 - t1)) - ts.append(t2 - t1) + logger.info('UFF optimizing those:') + t1 = time.time() + for mol in mols3d: + if not mol.GetNumConformers(): + continue + mol = Chem.Mol(mol) + needMore = 1 + while needMore: + needMore = AllChem.UFFOptimizeMolecule(mol, maxIters=200) + t2 = time.time() + logger.info('Results14: %.2f seconds' % (t2 - t1)) + ts.append(t2 - t1) if tests[15]: - logger.info('MMFF optimizing the molecules:') - t1 = time.time() - for i, mol in enumerate(mols3d): - mol = Chem.Mol(mol) - if not mol.GetNumConformers(): - continue - if not AllChem.MMFFHasAllMoleculeParams(mol): - continue - needMore = 1 - while needMore: - try: - needMore = AllChem.MMFFOptimizeMolecule(mol, maxIters=200) - except ValueError: - logger.warning('Problems with MMFF and mol %d' % i) - break - t2 = time.time() - logger.info('Results15: %.2f seconds' % (t2 - t1)) - ts.append(t2 - t1) + logger.info('MMFF optimizing the molecules:') + t1 = time.time() + for i, mol in enumerate(mols3d): + mol = Chem.Mol(mol) + if not mol.GetNumConformers(): + continue + if not AllChem.MMFFHasAllMoleculeParams(mol): + continue + needMore = 1 + while needMore: + try: + needMore = AllChem.MMFFOptimizeMolecule(mol, maxIters=200) + except ValueError: + logger.warning('Problems with MMFF and mol %d' % i) + break + t2 = time.time() + logger.info('Results15: %.2f seconds' % (t2 - t1)) + ts.append(t2 - t1) if tests[16]: - logger.info('Find unique subgraphs') - t1 = time.time() - for mol in mols: - Chem.FindUniqueSubgraphsOfLengthN(mol, 6) - t2 = time.time() - logger.info('Results16: %.2f seconds' % (t2 - t1)) - ts.append(t2 - t1) + logger.info('Find unique subgraphs') + t1 = time.time() + for mol in mols: + Chem.FindUniqueSubgraphsOfLengthN(mol, 6) + t2 = time.time() + logger.info('Results16: %.2f seconds' % (t2 - t1)) + ts.append(t2 - t1) if tests[17]: - logger.info('Generate topological fingerprints') - t1 = time.time() - for mol in mols: - Chem.RDKFingerprint(mol) - t2 = time.time() - logger.info('Results17: %.2f seconds' % (t2 - t1)) - ts.append(t2 - t1) + logger.info('Generate topological fingerprints') + t1 = time.time() + for mol in mols: + Chem.RDKFingerprint(mol) + t2 = time.time() + logger.info('Results17: %.2f seconds' % (t2 - t1)) + ts.append(t2 - t1) if tests[18]: - logger.info('Generate morgan fingerprints') - t1 = time.time() - for mol in mols: - AllChem.GetMorganFingerprint(mol, radius=2) - t2 = time.time() - logger.info('Results18: %.2f seconds' % (t2 - t1)) - ts.append(t2 - t1) + logger.info('Generate morgan fingerprints') + t1 = time.time() + for mol in mols: + AllChem.GetMorganFingerprint(mol, radius=2) + t2 = time.time() + logger.info('Results18: %.2f seconds' % (t2 - t1)) + ts.append(t2 - t1) print(f"| {rdkit.__version__} | {' | '.join(['%.1f' % x for x in ts])} |") diff --git a/Scripts/FeatFinderCLI.py b/Scripts/FeatFinderCLI.py index 1e51996dc..36a768c4d 100644 --- a/Scripts/FeatFinderCLI.py +++ b/Scripts/FeatFinderCLI.py @@ -6,4 +6,5 @@ separated file and annotates the atoms of the molecules with their pharmacophore Use 'FeatFinderCLI.py --help' for further information ''' from rdkit.Chem import FeatFinderCLI + FeatFinderCLI.main() diff --git a/Scripts/PythonFormat.py b/Scripts/PythonFormat.py index 3b186c0b1..562135ef5 100644 --- a/Scripts/PythonFormat.py +++ b/Scripts/PythonFormat.py @@ -13,14 +13,17 @@ If changes are found, the script will exit with error code 1, otherwise 0. ''' import os -from yapf.yapflib.yapf_api import FormatCode import sys +from yapf.yapflib.yapf_api import FormatCode + rdbase = os.environ.get('RDBASE', '') styleConfig = os.path.join(rdbase, 'setup.cfg') -excludeDirs = [os.path.join(rdbase, 'build'), - os.path.join(rdbase, 'External'), ] +excludeDirs = [ + os.path.join(rdbase, 'build'), + os.path.join(rdbase, 'External'), +] def pythonFiles(dirname=rdbase): diff --git a/Scripts/run_python_tests.py b/Scripts/run_python_tests.py index bdae3f719..501da9087 100644 --- a/Scripts/run_python_tests.py +++ b/Scripts/run_python_tests.py @@ -7,16 +7,17 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -from rdkit import RDConfig -from rdkit import TestRunner -import os,time,sys +import os +import sys +import time + +from rdkit import RDConfig, TestRunner if __name__ == '__main__': - script = 'test_list.py' - os.chdir(RDConfig.RDCodeDir) - t1 = time.time() - failed,nTests = TestRunner.RunScript(script,doLongTests=False, - verbose=True) - t2 = time.time() - TestRunner.ReportResults(script,failed,nTests,t2-t1,verbose=True,dest=sys.stderr) - sys.exit(len(failed)) + script = 'test_list.py' + os.chdir(RDConfig.RDCodeDir) + t1 = time.time() + failed, nTests = TestRunner.RunScript(script, doLongTests=False, verbose=True) + t2 = time.time() + TestRunner.ReportResults(script, failed, nTests, t2 - t1, verbose=True, dest=sys.stderr) + sys.exit(len(failed)) diff --git a/Web/RDExtras/MolDepict.py b/Web/RDExtras/MolDepict.py index 026bc438e..4f93e9cd5 100644 --- a/Web/RDExtras/MolDepict.py +++ b/Web/RDExtras/MolDepict.py @@ -1,5 +1,9 @@ +import os +import sys +import tempfile +import urllib + from mod_python import apache -import sys, os, tempfile, urllib from WebUtils import General General._version = "1.0.0" diff --git a/Web/RDExtras/MolImage.py b/Web/RDExtras/MolImage.py index 664f2ae2d..76c13c9df 100644 --- a/Web/RDExtras/MolImage.py +++ b/Web/RDExtras/MolImage.py @@ -3,13 +3,16 @@ # Copyright (C) 2004, 2005 Rational Discovery LLC # All Rights Reserved # +import os +import sys +import tempfile + import Chem -from Chem.Draw.MolDrawing import MolDrawing from Chem import TemplateAlign -from sping.SVG.pidSVG import SVGCanvas as Canvas +from Chem.Draw.MolDrawing import MolDrawing from mod_python import apache +from sping.SVG.pidSVG import SVGCanvas as Canvas from utils import cactvs -import sys, os, tempfile def gif(req, smiles, width=100, height=100, highlight='[]', frame=0, dblSize=0, **kwargs): diff --git a/build_support/pkg_version.py b/build_support/pkg_version.py index 87d0fea5a..9954173c0 100644 --- a/build_support/pkg_version.py +++ b/build_support/pkg_version.py @@ -1,7 +1,9 @@ from __future__ import print_function + import os import re from datetime import datetime + from setuptools import setup pkg_version = '' @@ -16,20 +18,17 @@ if (not have_src_dir): raise OSError('Could not find SRC_DIR, got: ' + str(src_dir)) # parse root CMakeLists.txt and Code/cmake/Modules/RDKitUtils.cmake root_cmakelists_path = os.path.join(src_dir, 'CMakeLists.txt') -rdkitutils_path = os.path.join(src_dir, 'Code', 'cmake', - 'Modules', 'RDKitUtils.cmake') +rdkitutils_path = os.path.join(src_dir, 'Code', 'cmake', 'Modules', 'RDKitUtils.cmake') var_dict = {} for file in (root_cmakelists_path, rdkitutils_path): with open(file, 'rt') as hnd: # vars we want to read - var_set = set(['RDKit_Year', 'RDKit_Month', 'RDKit_Revision', - 'RDKit_ABI', 'RDKit_RELEASENAME']) + var_set = set(['RDKit_Year', 'RDKit_Month', 'RDKit_Revision', 'RDKit_ABI', 'RDKit_RELEASENAME']) line = hnd.readline() while (line): # is this an uncommented set command? - m = re.match(r'^\s*set\s*\((\w+)\s*\"(.*)\"\s*\)', - line, re.IGNORECASE) + m = re.match(r'^\s*set\s*\((\w+)\s*\"(.*)\"\s*\)', line, re.IGNORECASE) # if it is if (m is not None): # extract the var name @@ -66,7 +65,7 @@ if (rdkitVersion is not None): pkg_version = rdkitVersion else: # if extracting rdkitVersion somehow failed, use the date - pkg_version = d + pkg_version = d print('rdkitVersion:', pkg_version) -setup(rdkitVersion = pkg_version) +setup(rdkitVersion=pkg_version) diff --git a/rdkit/Chem/AllChem.py b/rdkit/Chem/AllChem.py index 85da4573e..2d88beab5 100644 --- a/rdkit/Chem/AllChem.py +++ b/rdkit/Chem/AllChem.py @@ -16,28 +16,26 @@ from collections import namedtuple import numpy -from rdkit import DataStructs -from rdkit import ForceField -from rdkit import RDConfig -from rdkit import rdBase +from rdkit import DataStructs, ForceField, RDConfig, rdBase from rdkit.Chem import * from rdkit.Chem.ChemicalFeatures import * +from rdkit.Chem.EnumerateStereoisomers import (EnumerateStereoisomers, + StereoEnumerationOptions) from rdkit.Chem.rdChemReactions import * from rdkit.Chem.rdDepictor import * from rdkit.Chem.rdDistGeom import * +from rdkit.Chem.rdFingerprintGenerator import * from rdkit.Chem.rdForceFieldHelpers import * from rdkit.Chem.rdMolAlign import * from rdkit.Chem.rdMolDescriptors import * +from rdkit.Chem.rdMolEnumerator import * from rdkit.Chem.rdMolTransforms import * from rdkit.Chem.rdPartialCharges import * +from rdkit.Chem.rdqueries import * from rdkit.Chem.rdReducedGraphs import * from rdkit.Chem.rdShapeHelpers import * -from rdkit.Chem.rdqueries import * -from rdkit.Chem.rdMolEnumerator import * -from rdkit.Chem.rdFingerprintGenerator import * from rdkit.Geometry import rdGeometry from rdkit.RDLogger import logger -from rdkit.Chem.EnumerateStereoisomers import StereoEnumerationOptions, EnumerateStereoisomers try: from rdkit.Chem.rdSLNParse import * @@ -445,8 +443,8 @@ def AssignBondOrdersFromTemplate(refmol, mol): # doctest boilerplate # def _runDoctests(verbose=None): # pragma: nocover - import sys import doctest + import sys failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) sys.exit(failed) diff --git a/rdkit/Chem/AtomPairs/Pairs.py b/rdkit/Chem/AtomPairs/Pairs.py index 6f66d7e49..7402275a9 100755 --- a/rdkit/Chem/AtomPairs/Pairs.py +++ b/rdkit/Chem/AtomPairs/Pairs.py @@ -24,8 +24,9 @@ The fingerprints can be accessed through the following functions: from rdkit import DataStructs from rdkit.Chem import rdMolDescriptors from rdkit.Chem.AtomPairs import Utils +from rdkit.Chem.rdMolDescriptors import (GetAtomPairFingerprint, + GetHashedAtomPairFingerprint) -from rdkit.Chem.rdMolDescriptors import GetAtomPairFingerprint, GetHashedAtomPairFingerprint GetAtomPairFingerprintAsIntVect = rdMolDescriptors.GetAtomPairFingerprint numPathBits = rdMolDescriptors.AtomPairsParameters.numPathBits @@ -56,14 +57,14 @@ def pyScorePair(at1, at2, dist, atomCodes=None, includeChirality=False): """ if not atomCodes: - code1 = Utils.GetAtomCode(at1, includeChirality = includeChirality) - code2 = Utils.GetAtomCode(at2, includeChirality = includeChirality) + code1 = Utils.GetAtomCode(at1, includeChirality=includeChirality) + code2 = Utils.GetAtomCode(at2, includeChirality=includeChirality) else: code1, code2 = atomCodes codeSize = rdMolDescriptors.AtomPairsParameters.codeSize if includeChirality: - codeSize += rdMolDescriptors.AtomPairsParameters.numChiralBits + codeSize += rdMolDescriptors.AtomPairsParameters.numChiralBits accum = int(dist) % _maxPathLen accum |= min(code1, code2) << numPathBits @@ -71,7 +72,7 @@ def pyScorePair(at1, at2, dist, atomCodes=None, includeChirality=False): return accum -def ExplainPairScore(score,includeChirality=False): +def ExplainPairScore(score, includeChirality=False): """ >>> from rdkit import Chem >>> m = Chem.MolFromSmiles('C=CC') @@ -104,7 +105,7 @@ def ExplainPairScore(score,includeChirality=False): """ codeSize = rdMolDescriptors.AtomPairsParameters.codeSize if includeChirality: - codeSize += rdMolDescriptors.AtomPairsParameters.numChiralBits + codeSize += rdMolDescriptors.AtomPairsParameters.numChiralBits codeMask = (1 << codeSize) - 1 pathMask = (1 << numPathBits) - 1 @@ -115,8 +116,7 @@ def ExplainPairScore(score,includeChirality=False): score = score >> codeSize code2 = score & codeMask - return (Utils.ExplainAtomCode(code1, includeChirality=includeChirality), - dist, + return (Utils.ExplainAtomCode(code1, includeChirality=includeChirality), dist, Utils.ExplainAtomCode(code2, includeChirality=includeChirality)) @@ -155,8 +155,8 @@ def GetAtomPairFingerprintAsBitVect(mol): # doctest boilerplate # def _runDoctests(verbose=None): # pragma: nocover - import sys import doctest + import sys failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) sys.exit(failed) diff --git a/rdkit/Chem/AtomPairs/Sheridan.py b/rdkit/Chem/AtomPairs/Sheridan.py index 21ca598d6..74ff48f55 100644 --- a/rdkit/Chem/AtomPairs/Sheridan.py +++ b/rdkit/Chem/AtomPairs/Sheridan.py @@ -21,13 +21,13 @@ The fingerprints can be accessed through the following functions: import os.path import re -from rdkit import Chem -from rdkit import RDConfig +from rdkit import Chem, RDConfig from rdkit.Chem import rdMolDescriptors -from rdkit.Chem.rdMolDescriptors import GetAtomPairFingerprint, GetTopologicalTorsionFingerprint +from rdkit.Chem.rdMolDescriptors import (GetAtomPairFingerprint, + GetTopologicalTorsionFingerprint) numPathBits = rdMolDescriptors.AtomPairsParameters.numPathBits -_maxPathLen = (1 << numPathBits) - 1 # Unused variable +_maxPathLen = (1 << numPathBits) - 1 # Unused variable numFpBits = numPathBits + 2 * rdMolDescriptors.AtomPairsParameters.codeSize fpLen = 1 << numFpBits @@ -110,8 +110,8 @@ def GetBTFingerprint(mol, fpfn=GetTopologicalTorsionFingerprint): # doctest boilerplate # def _runDoctests(verbose=None): # pragma: nocover - import sys import doctest + import sys failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) sys.exit(failed) diff --git a/rdkit/Chem/AtomPairs/Torsions.py b/rdkit/Chem/AtomPairs/Torsions.py index 044faad14..c412bac87 100755 --- a/rdkit/Chem/AtomPairs/Torsions.py +++ b/rdkit/Chem/AtomPairs/Torsions.py @@ -25,8 +25,9 @@ The fingerprints can be accessed through the following functions: """ from rdkit.Chem import rdMolDescriptors from rdkit.Chem.AtomPairs import Utils -from rdkit.Chem.rdMolDescriptors import (GetTopologicalTorsionFingerprint, - GetHashedTopologicalTorsionFingerprint) +from rdkit.Chem.rdMolDescriptors import ( + GetHashedTopologicalTorsionFingerprint, GetTopologicalTorsionFingerprint) + GetTopologicalTorsionFingerprintAsIntVect = rdMolDescriptors.GetTopologicalTorsionFingerprint @@ -57,7 +58,7 @@ def pyScorePath(mol, path, size, atomCodes=None): >>> pyScorePath(m, (0, 1, 2, 4), 4) == t 1 - """ + """ codes = [None] * size for i in range(size): if i == 0 or i == size - 1: @@ -80,7 +81,7 @@ def pyScorePath(mol, path, size, atomCodes=None): if codes[beg] > codes[end]: codes.reverse() break - + accum = 0 codeSize = rdMolDescriptors.AtomPairsParameters.codeSize for i, code in enumerate(codes): @@ -166,8 +167,8 @@ def GetTopologicalTorsionFingerprintAsIds(mol, targetSize=4): # doctest boilerplate # def _runDoctests(verbose=None): # pragma: nocover - import sys import doctest + import sys failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) sys.exit(failed) diff --git a/rdkit/Chem/AtomPairs/UnitTestDescriptors.py b/rdkit/Chem/AtomPairs/UnitTestDescriptors.py index ae10e09be..a3b9b8319 100755 --- a/rdkit/Chem/AtomPairs/UnitTestDescriptors.py +++ b/rdkit/Chem/AtomPairs/UnitTestDescriptors.py @@ -9,14 +9,14 @@ # of the RDKit source tree. # -import unittest import doctest -import os import gzip +import os import pickle -from rdkit import Chem -from rdkit import RDConfig -from rdkit.Chem.AtomPairs import Pairs, Torsions, Utils, Sheridan +import unittest + +from rdkit import Chem, RDConfig +from rdkit.Chem.AtomPairs import Pairs, Sheridan, Torsions, Utils def load_tests(loader, tests, ignore): @@ -60,8 +60,8 @@ class TestCase(unittest.TestCase): tt = Torsions.GetTopologicalTorsionFingerprint(mol) self.assertEqual(tt.GetNonzeroElements(), {4437590049: 2, 8732557345: 2, 4445978657: 2}) tt = Torsions.GetTopologicalTorsionFingerprintAsIds(mol) - self.assertEqual( - sorted(tt), [4437590049, 4437590049, 4445978657, 4445978657, 8732557345, 8732557345]) + self.assertEqual(sorted(tt), + [4437590049, 4437590049, 4445978657, 4445978657, 8732557345, 8732557345]) tt = Torsions.GetTopologicalTorsionFingerprintAsIntVect(mol) self.assertEqual(tt.GetNonzeroElements(), {4437590049: 2, 8732557345: 2, 4445978657: 2}) diff --git a/rdkit/Chem/AtomPairs/Utils.py b/rdkit/Chem/AtomPairs/Utils.py index 8ccd9730a..7fff9273f 100755 --- a/rdkit/Chem/AtomPairs/Utils.py +++ b/rdkit/Chem/AtomPairs/Utils.py @@ -8,13 +8,14 @@ # of the RDKit source tree. # +import math + from rdkit import Chem from rdkit.Chem import rdMolDescriptors -import math def ExplainAtomCode(code, branchSubtract=0, includeChirality=False): - """ + """ **Arguments**: @@ -74,38 +75,38 @@ def ExplainAtomCode(code, branchSubtract=0, includeChirality=False): ('C', 3, 0, 'S') """ - typeMask = (1 << rdMolDescriptors.AtomPairsParameters.numTypeBits) - 1 - branchMask = (1 << rdMolDescriptors.AtomPairsParameters.numBranchBits) - 1 - piMask = (1 << rdMolDescriptors.AtomPairsParameters.numPiBits) - 1 - chiMask = (1 << rdMolDescriptors.AtomPairsParameters.numChiralBits) - 1 + typeMask = (1 << rdMolDescriptors.AtomPairsParameters.numTypeBits) - 1 + branchMask = (1 << rdMolDescriptors.AtomPairsParameters.numBranchBits) - 1 + piMask = (1 << rdMolDescriptors.AtomPairsParameters.numPiBits) - 1 + chiMask = (1 << rdMolDescriptors.AtomPairsParameters.numChiralBits) - 1 - nBranch = int(code & branchMask) - code = code >> rdMolDescriptors.AtomPairsParameters.numBranchBits + nBranch = int(code & branchMask) + code = code >> rdMolDescriptors.AtomPairsParameters.numBranchBits - nPi = int(code & piMask) - code = code >> rdMolDescriptors.AtomPairsParameters.numPiBits + nPi = int(code & piMask) + code = code >> rdMolDescriptors.AtomPairsParameters.numPiBits - typeIdx = int(code & typeMask) - if typeIdx < len(rdMolDescriptors.AtomPairsParameters.atomTypes): - atomNum = rdMolDescriptors.AtomPairsParameters.atomTypes[typeIdx] - atomSymbol = Chem.GetPeriodicTable().GetElementSymbol(atomNum) - else: - atomSymbol = 'X' + typeIdx = int(code & typeMask) + if typeIdx < len(rdMolDescriptors.AtomPairsParameters.atomTypes): + atomNum = rdMolDescriptors.AtomPairsParameters.atomTypes[typeIdx] + atomSymbol = Chem.GetPeriodicTable().GetElementSymbol(atomNum) + else: + atomSymbol = 'X' - if not includeChirality: - return (atomSymbol, nBranch, nPi) - - code = code >> rdMolDescriptors.AtomPairsParameters.numTypeBits - chiDict = {0: '', 1: 'R', 2: 'S'} - chiCode = int(code & chiMask) - return (atomSymbol, nBranch, nPi, chiDict[chiCode]) + if not includeChirality: + return (atomSymbol, nBranch, nPi) + + code = code >> rdMolDescriptors.AtomPairsParameters.numTypeBits + chiDict = {0: '', 1: 'R', 2: 'S'} + chiCode = int(code & chiMask) + return (atomSymbol, nBranch, nPi, chiDict[chiCode]) GetAtomCode = rdMolDescriptors.GetAtomPairAtomCode def NumPiElectrons(atom): - """ Returns the number of electrons an atom is using for pi bonding + """ Returns the number of electrons an atom is using for pi bonding >>> m = Chem.MolFromSmiles('C=C') >>> NumPiElectrons(m.GetAtomWithIdx(0)) @@ -145,21 +146,21 @@ def NumPiElectrons(atom): """ - res = 0 - if atom.GetIsAromatic(): - res = 1 - elif atom.GetHybridization() != Chem.HybridizationType.SP3: - # the number of pi electrons is just the number of - # unsaturations (valence - degree): - res = atom.GetExplicitValence() - atom.GetNumExplicitHs() - if res < atom.GetDegree(): - raise ValueError("explicit valence exceeds atom degree") - res -= atom.GetDegree() - return res + res = 0 + if atom.GetIsAromatic(): + res = 1 + elif atom.GetHybridization() != Chem.HybridizationType.SP3: + # the number of pi electrons is just the number of + # unsaturations (valence - degree): + res = atom.GetExplicitValence() - atom.GetNumExplicitHs() + if res < atom.GetDegree(): + raise ValueError("explicit valence exceeds atom degree") + res -= atom.GetDegree() + return res def BitsInCommon(v1, v2): - """ Returns the number of bits in common between two vectors + """ Returns the number of bits in common between two vectors **Arguments**: @@ -182,22 +183,22 @@ def BitsInCommon(v1, v2): 3 """ - res = 0 - v2Pos = 0 - nV2 = len(v2) - for val in v1: - while v2Pos < nV2 and v2[v2Pos] < val: - v2Pos += 1 - if v2Pos >= nV2: - break - if v2[v2Pos] == val: - res += 1 - v2Pos += 1 - return res + res = 0 + v2Pos = 0 + nV2 = len(v2) + for val in v1: + while v2Pos < nV2 and v2[v2Pos] < val: + v2Pos += 1 + if v2Pos >= nV2: + break + if v2[v2Pos] == val: + res += 1 + v2Pos += 1 + return res def DiceSimilarity(v1, v2, bounds=None): - """ Implements the DICE similarity metric. + """ Implements the DICE similarity metric. This is the recommended metric in both the Topological torsions and Atom pairs papers. @@ -248,20 +249,20 @@ def DiceSimilarity(v1, v2, bounds=None): 0.0 """ - denom = 1.0 * (len(v1) + len(v2)) - if not denom: - res = 0.0 + denom = 1.0 * (len(v1) + len(v2)) + if not denom: + res = 0.0 + else: + if bounds and (min(len(v1), len(v2)) / denom) < bounds: + numer = 0.0 else: - if bounds and (min(len(v1), len(v2)) / denom) < bounds: - numer = 0.0 - else: - numer = 2.0 * BitsInCommon(v1, v2) - res = numer / denom - return res + numer = 2.0 * BitsInCommon(v1, v2) + res = numer / denom + return res def Dot(v1, v2): - """ Returns the Dot product between two vectors: + """ Returns the Dot product between two vectors: **Arguments**: @@ -290,35 +291,35 @@ def Dot(v1, v2): 0 """ - res = 0 - nV1 = len(v1) - nV2 = len(v2) - i = 0 - j = 0 - while i < nV1: - v1Val = v1[i] - v1Count = 1 - i += 1 - while i < nV1 and v1[i] == v1Val: - v1Count += 1 - i += 1 - while j < nV2 and v2[j] < v1Val: - j += 1 - if j < nV2 and v2[j] == v1Val: - v2Count = 1 - j += 1 - while j < nV2 and v2[j] == v1Val: - v2Count += 1 - j += 1 - commonCount = min(v1Count, v2Count) - res += commonCount * commonCount - elif j >= nV2: - break - return res + res = 0 + nV1 = len(v1) + nV2 = len(v2) + i = 0 + j = 0 + while i < nV1: + v1Val = v1[i] + v1Count = 1 + i += 1 + while i < nV1 and v1[i] == v1Val: + v1Count += 1 + i += 1 + while j < nV2 and v2[j] < v1Val: + j += 1 + if j < nV2 and v2[j] == v1Val: + v2Count = 1 + j += 1 + while j < nV2 and v2[j] == v1Val: + v2Count += 1 + j += 1 + commonCount = min(v1Count, v2Count) + res += commonCount * commonCount + elif j >= nV2: + break + return res def CosineSimilarity(v1, v2): - """ Implements the Cosine similarity metric. + """ Implements the Cosine similarity metric. This is the recommended metric in the LaSSI paper **Arguments**: @@ -343,15 +344,15 @@ def CosineSimilarity(v1, v2): 0.000 """ - d1 = Dot(v1, v1) - d2 = Dot(v2, v2) - denom = math.sqrt(d1 * d2) - if not denom: - res = 0.0 - else: - numer = Dot(v1, v2) - res = numer / denom - return res + d1 = Dot(v1, v1) + d2 = Dot(v2, v2) + denom = math.sqrt(d1 * d2) + if not denom: + res = 0.0 + else: + numer = Dot(v1, v2) + res = numer / denom + return res # ------------------------------------ @@ -359,11 +360,11 @@ def CosineSimilarity(v1, v2): # doctest boilerplate # def _runDoctests(verbose=None): # pragma: nocover - import sys - import doctest - failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) - sys.exit(failed) + import doctest + import sys + failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) + sys.exit(failed) if __name__ == '__main__': # pragma: nocover - _runDoctests() + _runDoctests() diff --git a/rdkit/Chem/AtomPairs/test_list.py b/rdkit/Chem/AtomPairs/test_list.py index 094398150..6df62d250 100755 --- a/rdkit/Chem/AtomPairs/test_list.py +++ b/rdkit/Chem/AtomPairs/test_list.py @@ -1,4 +1,5 @@ import sys + from rdkit import TestRunner tests = [ diff --git a/rdkit/Chem/BRICS.py b/rdkit/Chem/BRICS.py index 045fe683b..9a0490d57 100644 --- a/rdkit/Chem/BRICS.py +++ b/rdkit/Chem/BRICS.py @@ -30,13 +30,15 @@ # # Created by Greg Landrum, Nov 2008 import copy + """ Implementation of the BRICS algorithm from Degen et al. ChemMedChem *3* 1503-7 (2008) """ -import sys import re -from rdkit import RDRandom as random +import sys + from rdkit import Chem +from rdkit import RDRandom as random from rdkit.Chem import rdChemReactions as Reactions # These are the definitions that will be applied to fragment molecules: @@ -185,56 +187,57 @@ reactionDefs = ( # L16 [ ('16', '16', '-'), # not in original paper - ], ) + ], +) smartsGps = copy.deepcopy(reactionDefs) for gp in smartsGps: - for j, defn in enumerate(gp): - g1, g2, bnd = defn - r1 = environs['L' + g1] - r2 = environs['L' + g2] - g1 = re.sub('[a-z,A-Z]', '', g1) - g2 = re.sub('[a-z,A-Z]', '', g2) - sma = '[$(%s):1]%s;!@[$(%s):2]>>[%s*]-[*:1].[%s*]-[*:2]' % (r1, bnd, r2, g1, g2) - gp[j] = sma + for j, defn in enumerate(gp): + g1, g2, bnd = defn + r1 = environs['L' + g1] + r2 = environs['L' + g2] + g1 = re.sub('[a-z,A-Z]', '', g1) + g2 = re.sub('[a-z,A-Z]', '', g2) + sma = '[$(%s):1]%s;!@[$(%s):2]>>[%s*]-[*:1].[%s*]-[*:2]' % (r1, bnd, r2, g1, g2) + gp[j] = sma for gp in smartsGps: - for defn in gp: - try: - t = Reactions.ReactionFromSmarts(defn) - t.Initialize() - except Exception: - print(defn) - raise + for defn in gp: + try: + t = Reactions.ReactionFromSmarts(defn) + t.Initialize() + except Exception: + print(defn) + raise environMatchers = {} for env, sma in environs.items(): - environMatchers[env] = Chem.MolFromSmarts(sma) + environMatchers[env] = Chem.MolFromSmarts(sma) bondMatchers = [] for i, compats in enumerate(reactionDefs): - tmp = [] - for i1, i2, bType in compats: - e1 = environs['L%s' % i1] - e2 = environs['L%s' % i2] - patt = '[$(%s)]%s;!@[$(%s)]' % (e1, bType, e2) - patt = Chem.MolFromSmarts(patt) - tmp.append((i1, i2, bType, patt)) - bondMatchers.append(tmp) + tmp = [] + for i1, i2, bType in compats: + e1 = environs['L%s' % i1] + e2 = environs['L%s' % i2] + patt = '[$(%s)]%s;!@[$(%s)]' % (e1, bType, e2) + patt = Chem.MolFromSmarts(patt) + tmp.append((i1, i2, bType, patt)) + bondMatchers.append(tmp) reactions = tuple([[Reactions.ReactionFromSmarts(y) for y in x] for x in smartsGps]) reverseReactions = [] for i, rxnSet in enumerate(smartsGps): - for j, sma in enumerate(rxnSet): - rs, ps = sma.split('>>') - sma = '%s>>%s' % (ps, rs) - rxn = Reactions.ReactionFromSmarts(sma) - labels = re.findall(r'\[([0-9]+?)\*\]', ps) - rxn._matchers = [Chem.MolFromSmiles('[%s*]' % x) for x in labels] - reverseReactions.append(rxn) + for j, sma in enumerate(rxnSet): + rs, ps = sma.split('>>') + sma = '%s>>%s' % (ps, rs) + rxn = Reactions.ReactionFromSmarts(sma) + labels = re.findall(r'\[([0-9]+?)\*\]', ps) + rxn._matchers = [Chem.MolFromSmiles('[%s*]' % x) for x in labels] + reverseReactions.append(rxn) def FindBRICSBonds(mol, randomizeOrder=False, silent=True): - """ returns the bonds in a molecule that BRICS would cleave + """ returns the bonds in a molecule that BRICS would cleave >>> from rdkit import Chem >>> m = Chem.MolFromSmiles('CCCOCC') @@ -283,35 +286,35 @@ def FindBRICSBonds(mol, randomizeOrder=False, silent=True): [] """ - letter = re.compile('[a-z,A-Z]') - indices = list(range(len(bondMatchers))) - bondsDone = set() - if randomizeOrder: - random.shuffle(indices, random=random.random) + letter = re.compile('[a-z,A-Z]') + indices = list(range(len(bondMatchers))) + bondsDone = set() + if randomizeOrder: + random.shuffle(indices, random=random.random) - envMatches = {} - for env, patt in environMatchers.items(): - envMatches[env] = mol.HasSubstructMatch(patt) - for gpIdx in indices: - if randomizeOrder: - compats = bondMatchers[gpIdx][:] - random.shuffle(compats, random=random.random) - else: - compats = bondMatchers[gpIdx] - for i1, i2, bType, patt in compats: - if not envMatches['L' + i1] or not envMatches['L' + i2]: - continue - matches = mol.GetSubstructMatches(patt) - i1 = letter.sub('', i1) - i2 = letter.sub('', i2) - for match in matches: - if match not in bondsDone and (match[1], match[0]) not in bondsDone: - bondsDone.add(match) - yield (((match[0], match[1]), (i1, i2))) + envMatches = {} + for env, patt in environMatchers.items(): + envMatches[env] = mol.HasSubstructMatch(patt) + for gpIdx in indices: + if randomizeOrder: + compats = bondMatchers[gpIdx][:] + random.shuffle(compats, random=random.random) + else: + compats = bondMatchers[gpIdx] + for i1, i2, bType, patt in compats: + if not envMatches['L' + i1] or not envMatches['L' + i2]: + continue + matches = mol.GetSubstructMatches(patt) + i1 = letter.sub('', i1) + i2 = letter.sub('', i2) + for match in matches: + if match not in bondsDone and (match[1], match[0]) not in bondsDone: + bondsDone.add(match) + yield (((match[0], match[1]), (i1, i2))) def BreakBRICSBonds(mol, bonds=None, sanitize=True, silent=True): - """ breaks the BRICS bonds in a molecule and returns the results + """ breaks the BRICS bonds in a molecule and returns the results >>> from rdkit import Chem >>> m = Chem.MolFromSmiles('CCCOCC') @@ -344,55 +347,55 @@ def BreakBRICSBonds(mol, bonds=None, sanitize=True, silent=True): ['[4*]CCC', '[3*]O[3*]', '[4*]CC'] """ - if not bonds: - #bonds = FindBRICSBonds(mol) - res = Chem.FragmentOnBRICSBonds(mol) - if sanitize: - Chem.SanitizeMol(res) - return res - eMol = Chem.EditableMol(mol) - nAts = mol.GetNumAtoms() - - dummyPositions = [] - for indices, dummyTypes in bonds: - ia, ib = indices - obond = mol.GetBondBetweenAtoms(ia, ib) - bondType = obond.GetBondType() - eMol.RemoveBond(ia, ib) - - da, db = dummyTypes - atoma = Chem.Atom(0) - atoma.SetIsotope(int(da)) - atoma.SetNoImplicit(True) - idxa = nAts - nAts += 1 - eMol.AddAtom(atoma) - eMol.AddBond(ia, idxa, bondType) - - atomb = Chem.Atom(0) - atomb.SetIsotope(int(db)) - atomb.SetNoImplicit(True) - idxb = nAts - nAts += 1 - eMol.AddAtom(atomb) - eMol.AddBond(ib, idxb, bondType) - if mol.GetNumConformers(): - dummyPositions.append((idxa, ib)) - dummyPositions.append((idxb, ia)) - res = eMol.GetMol() + if not bonds: + #bonds = FindBRICSBonds(mol) + res = Chem.FragmentOnBRICSBonds(mol) if sanitize: - Chem.SanitizeMol(res) - if mol.GetNumConformers(): - for conf in mol.GetConformers(): - resConf = res.GetConformer(conf.GetId()) - for ia, pa in dummyPositions: - resConf.SetAtomPosition(ia, conf.GetAtomPosition(pa)) + Chem.SanitizeMol(res) return res + eMol = Chem.EditableMol(mol) + nAts = mol.GetNumAtoms() + + dummyPositions = [] + for indices, dummyTypes in bonds: + ia, ib = indices + obond = mol.GetBondBetweenAtoms(ia, ib) + bondType = obond.GetBondType() + eMol.RemoveBond(ia, ib) + + da, db = dummyTypes + atoma = Chem.Atom(0) + atoma.SetIsotope(int(da)) + atoma.SetNoImplicit(True) + idxa = nAts + nAts += 1 + eMol.AddAtom(atoma) + eMol.AddBond(ia, idxa, bondType) + + atomb = Chem.Atom(0) + atomb.SetIsotope(int(db)) + atomb.SetNoImplicit(True) + idxb = nAts + nAts += 1 + eMol.AddAtom(atomb) + eMol.AddBond(ib, idxb, bondType) + if mol.GetNumConformers(): + dummyPositions.append((idxa, ib)) + dummyPositions.append((idxb, ia)) + res = eMol.GetMol() + if sanitize: + Chem.SanitizeMol(res) + if mol.GetNumConformers(): + for conf in mol.GetConformers(): + resConf = res.GetConformer(conf.GetId()) + for ia, pa in dummyPositions: + resConf.SetAtomPosition(ia, conf.GetAtomPosition(pa)) + return res def BRICSDecompose(mol, allNodes=None, minFragmentSize=1, onlyUseReactions=None, silent=True, keepNonLeafNodes=False, singlePass=False, returnMols=False): - """ returns the BRICS decomposition for a molecule + """ returns the BRICS decomposition for a molecule >>> from rdkit import Chem >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1') @@ -449,79 +452,78 @@ def BRICSDecompose(mol, allNodes=None, minFragmentSize=1, onlyUseReactions=None, """ - global reactions - mSmi = Chem.MolToSmiles(mol, 1) + global reactions + mSmi = Chem.MolToSmiles(mol, 1) - if allNodes is None: - allNodes = set() + if allNodes is None: + allNodes = set() - if mSmi in allNodes: - return set() + if mSmi in allNodes: + return set() - activePool = {mSmi: mol} - allNodes.add(mSmi) - foundMols = {mSmi: mol} - for gpIdx, reactionGp in enumerate(reactions): - newPool = {} - while activePool: - matched = False - nSmi = next(iter(activePool)) - mol = activePool.pop(nSmi) - for rxnIdx, reaction in enumerate(reactionGp): - if onlyUseReactions and (gpIdx, rxnIdx) not in onlyUseReactions: - continue - if not silent: - print('--------') - print(smartsGps[gpIdx][rxnIdx]) - ps = reaction.RunReactants((mol, )) - if ps: - if not silent: - print(nSmi, '->', len(ps), 'products') - for prodSeq in ps: - seqOk = True - # we want to disqualify small fragments, so sort the product sequence by size - tSeq = [(prod.GetNumAtoms(onlyExplicit=True), idx) - for idx, prod in enumerate(prodSeq)] - tSeq.sort() - for nats, idx in tSeq: - prod = prodSeq[idx] - try: - Chem.SanitizeMol(prod) - except Exception: - continue - pSmi = Chem.MolToSmiles(prod, 1) - if minFragmentSize > 0: - nDummies = pSmi.count('*') - if nats - nDummies < minFragmentSize: - seqOk = False - break - prod.pSmi = pSmi - ts = [(x, prodSeq[y]) for x, y in tSeq] - prodSeq = ts - if seqOk: - matched = True - for nats, prod in prodSeq: - pSmi = prod.pSmi - # print('\t',nats,pSmi) - if pSmi not in allNodes: - if not singlePass: - activePool[pSmi] = prod - allNodes.add(pSmi) - foundMols[pSmi] = prod - if singlePass or keepNonLeafNodes or not matched: - newPool[nSmi] = mol - activePool = newPool - if not (singlePass or keepNonLeafNodes): - if not returnMols: - res = set(activePool.keys()) - else: - res = activePool.values() + activePool = {mSmi: mol} + allNodes.add(mSmi) + foundMols = {mSmi: mol} + for gpIdx, reactionGp in enumerate(reactions): + newPool = {} + while activePool: + matched = False + nSmi = next(iter(activePool)) + mol = activePool.pop(nSmi) + for rxnIdx, reaction in enumerate(reactionGp): + if onlyUseReactions and (gpIdx, rxnIdx) not in onlyUseReactions: + continue + if not silent: + print('--------') + print(smartsGps[gpIdx][rxnIdx]) + ps = reaction.RunReactants((mol, )) + if ps: + if not silent: + print(nSmi, '->', len(ps), 'products') + for prodSeq in ps: + seqOk = True + # we want to disqualify small fragments, so sort the product sequence by size + tSeq = [(prod.GetNumAtoms(onlyExplicit=True), idx) for idx, prod in enumerate(prodSeq)] + tSeq.sort() + for nats, idx in tSeq: + prod = prodSeq[idx] + try: + Chem.SanitizeMol(prod) + except Exception: + continue + pSmi = Chem.MolToSmiles(prod, 1) + if minFragmentSize > 0: + nDummies = pSmi.count('*') + if nats - nDummies < minFragmentSize: + seqOk = False + break + prod.pSmi = pSmi + ts = [(x, prodSeq[y]) for x, y in tSeq] + prodSeq = ts + if seqOk: + matched = True + for nats, prod in prodSeq: + pSmi = prod.pSmi + # print('\t',nats,pSmi) + if pSmi not in allNodes: + if not singlePass: + activePool[pSmi] = prod + allNodes.add(pSmi) + foundMols[pSmi] = prod + if singlePass or keepNonLeafNodes or not matched: + newPool[nSmi] = mol + activePool = newPool + if not (singlePass or keepNonLeafNodes): + if not returnMols: + res = set(activePool.keys()) else: - if not returnMols: - res = allNodes - else: - res = foundMols.values() - return res + res = activePool.values() + else: + if not returnMols: + res = allNodes + else: + res = foundMols.values() + return res dummyPattern = Chem.MolFromSmiles('[*]') @@ -529,291 +531,292 @@ dummyPattern = Chem.MolFromSmiles('[*]') def BRICSBuild(fragments, onlyCompleteMols=True, seeds=None, uniquify=True, scrambleReagents=True, maxDepth=3): - seen = set() - if not seeds: - seeds = list(fragments) - if scrambleReagents: - seeds = list(seeds) - random.shuffle(seeds, random=random.random) - if scrambleReagents: - tempReactions = list(reverseReactions) - random.shuffle(tempReactions, random=random.random) - else: - tempReactions = reverseReactions - for seed in seeds: - seedIsR1 = False - seedIsR2 = False - nextSteps = [] - for rxn in tempReactions: - if seed.HasSubstructMatch(rxn._matchers[0]): - seedIsR1 = True - if seed.HasSubstructMatch(rxn._matchers[1]): - seedIsR2 = True - for fragment in fragments: - ps = None - if fragment.HasSubstructMatch(rxn._matchers[0]): - if seedIsR2: - ps = rxn.RunReactants((fragment, seed)) - if fragment.HasSubstructMatch(rxn._matchers[1]): - if seedIsR1: - ps = rxn.RunReactants((seed, fragment)) - if ps: - for p in ps: - if uniquify: - pSmi = Chem.MolToSmiles(p[0], True) - if pSmi in seen: - continue - else: - seen.add(pSmi) - if p[0].HasSubstructMatch(dummyPattern): - nextSteps.append(p[0]) - if not onlyCompleteMols: - yield p[0] - else: - yield p[0] - if nextSteps and maxDepth > 0: - for p in BRICSBuild(fragments, onlyCompleteMols=onlyCompleteMols, seeds=nextSteps, - uniquify=uniquify, maxDepth=maxDepth - 1, - scrambleReagents=scrambleReagents): - if uniquify: - pSmi = Chem.MolToSmiles(p, True) - if pSmi in seen: - continue - else: - seen.add(pSmi) - yield p + seen = set() + if not seeds: + seeds = list(fragments) + if scrambleReagents: + seeds = list(seeds) + random.shuffle(seeds, random=random.random) + if scrambleReagents: + tempReactions = list(reverseReactions) + random.shuffle(tempReactions, random=random.random) + else: + tempReactions = reverseReactions + for seed in seeds: + seedIsR1 = False + seedIsR2 = False + nextSteps = [] + for rxn in tempReactions: + if seed.HasSubstructMatch(rxn._matchers[0]): + seedIsR1 = True + if seed.HasSubstructMatch(rxn._matchers[1]): + seedIsR2 = True + for fragment in fragments: + ps = None + if fragment.HasSubstructMatch(rxn._matchers[0]): + if seedIsR2: + ps = rxn.RunReactants((fragment, seed)) + if fragment.HasSubstructMatch(rxn._matchers[1]): + if seedIsR1: + ps = rxn.RunReactants((seed, fragment)) + if ps: + for p in ps: + if uniquify: + pSmi = Chem.MolToSmiles(p[0], True) + if pSmi in seen: + continue + else: + seen.add(pSmi) + if p[0].HasSubstructMatch(dummyPattern): + nextSteps.append(p[0]) + if not onlyCompleteMols: + yield p[0] + else: + yield p[0] + if nextSteps and maxDepth > 0: + for p in BRICSBuild(fragments, onlyCompleteMols=onlyCompleteMols, seeds=nextSteps, + uniquify=uniquify, maxDepth=maxDepth - 1, + scrambleReagents=scrambleReagents): + if uniquify: + pSmi = Chem.MolToSmiles(p, True) + if pSmi in seen: + continue + else: + seen.add(pSmi) + yield p - # ------- ------- ------- ------- ------- ------- ------- ------- - # Begin testing code + # ------- ------- ------- ------- ------- ------- ------- ------- + # Begin testing code - # ------------------------------------ - # - # doctest boilerplate - # + # ------------------------------------ + # + # doctest boilerplate + # def _test(): - import doctest - import sys - return doctest.testmod(sys.modules["__main__"], - optionflags=doctest.ELLIPSIS + doctest.NORMALIZE_WHITESPACE) + import doctest + import sys + return doctest.testmod(sys.modules["__main__"], + optionflags=doctest.ELLIPSIS + doctest.NORMALIZE_WHITESPACE) if __name__ == '__main__': - import unittest + import unittest - class TestCase(unittest.TestCase): + class TestCase(unittest.TestCase): - def test1(self): - m = Chem.MolFromSmiles('CC(=O)OC') - res = BRICSDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res) == 2) + def test1(self): + m = Chem.MolFromSmiles('CC(=O)OC') + res = BRICSDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res) == 2) - m = Chem.MolFromSmiles('CC(=O)N1CCC1=O') - res = BRICSDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res) == 2, res) + m = Chem.MolFromSmiles('CC(=O)N1CCC1=O') + res = BRICSDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res) == 2, res) - m = Chem.MolFromSmiles('c1ccccc1N(C)C') - res = BRICSDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res) == 2, res) + m = Chem.MolFromSmiles('c1ccccc1N(C)C') + res = BRICSDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res) == 2, res) - m = Chem.MolFromSmiles('c1cccnc1N(C)C') - res = BRICSDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res) == 2, res) + m = Chem.MolFromSmiles('c1cccnc1N(C)C') + res = BRICSDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res) == 2, res) - m = Chem.MolFromSmiles('o1ccnc1N(C)C') - res = BRICSDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res) == 2) + m = Chem.MolFromSmiles('o1ccnc1N(C)C') + res = BRICSDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res) == 2) - m = Chem.MolFromSmiles('c1ccccc1OC') - res = BRICSDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res) == 2) + m = Chem.MolFromSmiles('c1ccccc1OC') + res = BRICSDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res) == 2) - m = Chem.MolFromSmiles('o1ccnc1OC') - res = BRICSDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res) == 2) + m = Chem.MolFromSmiles('o1ccnc1OC') + res = BRICSDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res) == 2) - m = Chem.MolFromSmiles('O1CCNC1OC') - res = BRICSDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res) == 2) + m = Chem.MolFromSmiles('O1CCNC1OC') + res = BRICSDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res) == 2) - m = Chem.MolFromSmiles('CCCSCC') - res = BRICSDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res) == 3, res) - self.assertTrue('[11*]S[11*]' in res, res) + m = Chem.MolFromSmiles('CCCSCC') + res = BRICSDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res) == 3, res) + self.assertTrue('[11*]S[11*]' in res, res) - m = Chem.MolFromSmiles('CCNC(=O)C1CC1') - res = BRICSDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res) == 4, res) - self.assertTrue('[5*]N[5*]' in res, res) + m = Chem.MolFromSmiles('CCNC(=O)C1CC1') + res = BRICSDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res) == 4, res) + self.assertTrue('[5*]N[5*]' in res, res) - def test2(self): - # example from the paper, nexavar: - m = Chem.MolFromSmiles( - 'CNC(=O)C1=NC=CC(OC2=CC=C(NC(=O)NC3=CC(=C(Cl)C=C3)C(F)(F)F)C=C2)=C1') - res = BRICSDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res) == 9, res) + def test2(self): + # example from the paper, nexavar: + m = Chem.MolFromSmiles('CNC(=O)C1=NC=CC(OC2=CC=C(NC(=O)NC3=CC(=C(Cl)C=C3)C(F)(F)F)C=C2)=C1') + res = BRICSDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res) == 9, res) - def test3(self): - m = Chem.MolFromSmiles('FC(F)(F)C1=C(Cl)C=CC(NC(=O)NC2=CC=CC=C2)=C1') - res = BRICSDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res) == 5, res) - self.assertTrue('[5*]N[5*]' in res, res) - self.assertTrue('[16*]c1ccccc1' in res, res) - self.assertTrue('[8*]C(F)(F)F' in res, res) + def test3(self): + m = Chem.MolFromSmiles('FC(F)(F)C1=C(Cl)C=CC(NC(=O)NC2=CC=CC=C2)=C1') + res = BRICSDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res) == 5, res) + self.assertTrue('[5*]N[5*]' in res, res) + self.assertTrue('[16*]c1ccccc1' in res, res) + self.assertTrue('[8*]C(F)(F)F' in res, res) - def test4(self): - allNodes = set() - m = Chem.MolFromSmiles('c1ccccc1OCCC') - res = BRICSDecompose(m, allNodes=allNodes) - self.assertTrue(res) - leaves = res - self.assertTrue(len(leaves) == 3, leaves) - self.assertTrue(len(allNodes) == 6, allNodes) - res = BRICSDecompose(m, allNodes=allNodes) - self.assertFalse(res) - self.assertTrue(len(allNodes) == 6, allNodes) + def test4(self): + allNodes = set() + m = Chem.MolFromSmiles('c1ccccc1OCCC') + res = BRICSDecompose(m, allNodes=allNodes) + self.assertTrue(res) + leaves = res + self.assertTrue(len(leaves) == 3, leaves) + self.assertTrue(len(allNodes) == 6, allNodes) + res = BRICSDecompose(m, allNodes=allNodes) + self.assertFalse(res) + self.assertTrue(len(allNodes) == 6, allNodes) - m = Chem.MolFromSmiles('c1ccccc1OCCCC') - res = BRICSDecompose(m, allNodes=allNodes) - self.assertTrue(res) - leaves.update(res) - self.assertTrue(len(allNodes) == 9, allNodes) - self.assertTrue(len(leaves) == 4, leaves) + m = Chem.MolFromSmiles('c1ccccc1OCCCC') + res = BRICSDecompose(m, allNodes=allNodes) + self.assertTrue(res) + leaves.update(res) + self.assertTrue(len(allNodes) == 9, allNodes) + self.assertTrue(len(leaves) == 4, leaves) - m = Chem.MolFromSmiles('c1cc(C(=O)NCC)ccc1OCCC') - res = BRICSDecompose(m, allNodes=allNodes) - self.assertTrue(res) - leaves.update(res) - self.assertTrue(len(leaves) == 8, leaves) - self.assertTrue(len(allNodes) == 18, allNodes) + m = Chem.MolFromSmiles('c1cc(C(=O)NCC)ccc1OCCC') + res = BRICSDecompose(m, allNodes=allNodes) + self.assertTrue(res) + leaves.update(res) + self.assertTrue(len(leaves) == 8, leaves) + self.assertTrue(len(allNodes) == 18, allNodes) - def test5(self): - allNodes = set() - frags = [ - '[14*]c1ncncn1', - '[16*]c1ccccc1', - '[14*]c1ncccc1', - ] - frags = [Chem.MolFromSmiles(x) for x in frags] - res = BRICSBuild(frags) - self.assertTrue(res) - res = list(res) - self.assertTrue(len(res) == 6) - smis = [Chem.MolToSmiles(x, True) for x in res] - self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis) - self.assertTrue('c1ccc(-c2ccccn2)cc1' in smis) + def test5(self): + allNodes = set() + frags = [ + '[14*]c1ncncn1', + '[16*]c1ccccc1', + '[14*]c1ncccc1', + ] + frags = [Chem.MolFromSmiles(x) for x in frags] + res = BRICSBuild(frags) + self.assertTrue(res) + res = list(res) + self.assertTrue(len(res) == 6) + smis = [Chem.MolToSmiles(x, True) for x in res] + self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis) + self.assertTrue('c1ccc(-c2ccccn2)cc1' in smis) - def test5a(self): - allNodes = set() - frags = [ - '[3*]O[3*]', - '[16*]c1ccccc1', - ] - frags = [Chem.MolFromSmiles(x) for x in frags] - res = BRICSBuild(frags) - self.assertTrue(res) - res = list(res) - smis = [Chem.MolToSmiles(x, True) for x in res] - self.assertTrue(len(smis) == 2, smis) - self.assertTrue('c1ccc(Oc2ccccc2)cc1' in smis) - self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis) + def test5a(self): + allNodes = set() + frags = [ + '[3*]O[3*]', + '[16*]c1ccccc1', + ] + frags = [Chem.MolFromSmiles(x) for x in frags] + res = BRICSBuild(frags) + self.assertTrue(res) + res = list(res) + smis = [Chem.MolToSmiles(x, True) for x in res] + self.assertTrue(len(smis) == 2, smis) + self.assertTrue('c1ccc(Oc2ccccc2)cc1' in smis) + self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis) - def test6(self): - allNodes = set() - frags = [ - '[16*]c1ccccc1', - '[3*]OC', - '[9*]n1cccc1', - ] - frags = [Chem.MolFromSmiles(x) for x in frags] - res = BRICSBuild(frags) - self.assertTrue(res) - res = list(res) - self.assertTrue(len(res) == 3) - smis = [Chem.MolToSmiles(x, True) for x in res] - self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis) - self.assertTrue('COc1ccccc1' in smis) - self.assertTrue('c1ccc(-n2cccc2)cc1' in smis, smis) + def test6(self): + allNodes = set() + frags = [ + '[16*]c1ccccc1', + '[3*]OC', + '[9*]n1cccc1', + ] + frags = [Chem.MolFromSmiles(x) for x in frags] + res = BRICSBuild(frags) + self.assertTrue(res) + res = list(res) + self.assertTrue(len(res) == 3) + smis = [Chem.MolToSmiles(x, True) for x in res] + self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis) + self.assertTrue('COc1ccccc1' in smis) + self.assertTrue('c1ccc(-n2cccc2)cc1' in smis, smis) - def test7(self): - allNodes = set() - frags = [ - '[16*]c1ccccc1', - '[3*]OC', - '[3*]OCC(=O)[6*]', - ] - frags = [Chem.MolFromSmiles(x) for x in frags] - res = BRICSBuild(frags) - self.assertTrue(res) - res = list(res) - smis = [Chem.MolToSmiles(x, True) for x in res] - self.assertTrue(len(res) == 3) - self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis) - self.assertTrue('COc1ccccc1' in smis) - self.assertTrue('O=C(COc1ccccc1)c1ccccc1' in smis) + def test7(self): + allNodes = set() + frags = [ + '[16*]c1ccccc1', + '[3*]OC', + '[3*]OCC(=O)[6*]', + ] + frags = [Chem.MolFromSmiles(x) for x in frags] + res = BRICSBuild(frags) + self.assertTrue(res) + res = list(res) + smis = [Chem.MolToSmiles(x, True) for x in res] + self.assertTrue(len(res) == 3) + self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis) + self.assertTrue('COc1ccccc1' in smis) + self.assertTrue('O=C(COc1ccccc1)c1ccccc1' in smis) - def test8(self): - random.seed(23) - base = Chem.MolFromSmiles("n1cncnc1OCC(C1CC1)OC1CNC1") - catalog = BRICSDecompose(base) - self.assertTrue(len(catalog) == 5, catalog) - catalog = [Chem.MolFromSmiles(x) for x in catalog] - ms = list(BRICSBuild(catalog, maxDepth=4, scrambleReagents=False)) - for m in ms: - Chem.SanitizeMol(m) - ms = [Chem.MolToSmiles(x) for x in ms] - self.assertEqual(len(ms), 36) + def test8(self): + random.seed(23) + base = Chem.MolFromSmiles("n1cncnc1OCC(C1CC1)OC1CNC1") + catalog = BRICSDecompose(base) + self.assertTrue(len(catalog) == 5, catalog) + catalog = [Chem.MolFromSmiles(x) for x in catalog] + ms = list(BRICSBuild(catalog, maxDepth=4, scrambleReagents=False)) + for m in ms: + Chem.SanitizeMol(m) + ms = [Chem.MolToSmiles(x) for x in ms] + self.assertEqual(len(ms), 36) - ts = ['n1cnc(C2CNC2)nc1', 'n1cnc(-c2ncncn2)nc1', 'C(OC1CNC1)C(C1CC1)OC1CNC1', - 'n1cnc(OC(COC2CNC2)C2CC2)nc1', 'n1cnc(OCC(OC2CNC2)C2CNC2)nc1'] - ts = [Chem.MolToSmiles(Chem.MolFromSmiles(x), True) for x in ts] - for t in ts: - self.assertTrue(t in ms, (t, ms)) + ts = [ + 'n1cnc(C2CNC2)nc1', 'n1cnc(-c2ncncn2)nc1', 'C(OC1CNC1)C(C1CC1)OC1CNC1', + 'n1cnc(OC(COC2CNC2)C2CC2)nc1', 'n1cnc(OCC(OC2CNC2)C2CNC2)nc1' + ] + ts = [Chem.MolToSmiles(Chem.MolFromSmiles(x), True) for x in ts] + for t in ts: + self.assertTrue(t in ms, (t, ms)) - ms2 = list(BRICSBuild(catalog, maxDepth=4, scrambleReagents=False)) - for m in ms2: - Chem.SanitizeMol(m) - ms2 = [Chem.MolToSmiles(x) for x in ms2] - self.assertEqual(ms, ms2) + ms2 = list(BRICSBuild(catalog, maxDepth=4, scrambleReagents=False)) + for m in ms2: + Chem.SanitizeMol(m) + ms2 = [Chem.MolToSmiles(x) for x in ms2] + self.assertEqual(ms, ms2) - ms2 = list(BRICSBuild(catalog, maxDepth=4, scrambleReagents=True)) - for m in ms2: - Chem.SanitizeMol(m) - ms2 = [Chem.MolToSmiles(x) for x in ms2] - self.assertNotEqual(ms, ms2) + ms2 = list(BRICSBuild(catalog, maxDepth=4, scrambleReagents=True)) + for m in ms2: + Chem.SanitizeMol(m) + ms2 = [Chem.MolToSmiles(x) for x in ms2] + self.assertNotEqual(ms, ms2) - def test9(self): - m = Chem.MolFromSmiles('CCOc1ccccc1c1ncc(c2nc(NCCCC)ncn2)cc1') - res = BRICSDecompose(m) - self.assertEqual(len(res), 7) - self.assertTrue('[3*]O[3*]' in res) - self.assertFalse('[14*]c1ncnc(NCCCC)n1' in res) - res = BRICSDecompose(m, singlePass=True) - self.assertEqual(len(res), 13) - self.assertTrue('[3*]OCC' in res) - self.assertTrue('[14*]c1ncnc(NCCCC)n1' in res) + def test9(self): + m = Chem.MolFromSmiles('CCOc1ccccc1c1ncc(c2nc(NCCCC)ncn2)cc1') + res = BRICSDecompose(m) + self.assertEqual(len(res), 7) + self.assertTrue('[3*]O[3*]' in res) + self.assertFalse('[14*]c1ncnc(NCCCC)n1' in res) + res = BRICSDecompose(m, singlePass=True) + self.assertEqual(len(res), 13) + self.assertTrue('[3*]OCC' in res) + self.assertTrue('[14*]c1ncnc(NCCCC)n1' in res) - def test10(self): - m = Chem.MolFromSmiles('C1CCCCN1c1ccccc1') - res = BRICSDecompose(m) - self.assertEqual(len(res), 2, res) + def test10(self): + m = Chem.MolFromSmiles('C1CCCCN1c1ccccc1') + res = BRICSDecompose(m) + self.assertEqual(len(res), 2, res) - def test11(self): - # test coordinate preservation: - molblock = """ + def test11(self): + # test coordinate preservation: + molblock = """ RDKit 3D 13 14 0 0 0 0 0 0 0 0999 V2000 @@ -846,48 +849,48 @@ if __name__ == '__main__': 13 8 1 0 M END """ - m = Chem.MolFromMolBlock(molblock) - pieces = BreakBRICSBonds(m) + m = Chem.MolFromMolBlock(molblock) + pieces = BreakBRICSBonds(m) - frags = Chem.GetMolFrags(pieces, asMols=True) - self.assertEqual(len(frags), 3) - self.assertEqual(frags[0].GetNumAtoms(), 7) - self.assertEqual(frags[1].GetNumAtoms(), 3) - self.assertEqual(frags[2].GetNumAtoms(), 7) + frags = Chem.GetMolFrags(pieces, asMols=True) + self.assertEqual(len(frags), 3) + self.assertEqual(frags[0].GetNumAtoms(), 7) + self.assertEqual(frags[1].GetNumAtoms(), 3) + self.assertEqual(frags[2].GetNumAtoms(), 7) - c1 = m.GetConformer() - c2 = frags[0].GetConformer() - for i in range(6): - p1 = c1.GetAtomPosition(i) - p2 = c2.GetAtomPosition(i) - self.assertEqual((p1 - p2).Length(), 0.0) - p1 = c1.GetAtomPosition(6) - p2 = c2.GetAtomPosition(6) - self.assertEqual((p1 - p2).Length(), 0.0) + c1 = m.GetConformer() + c2 = frags[0].GetConformer() + for i in range(6): + p1 = c1.GetAtomPosition(i) + p2 = c2.GetAtomPosition(i) + self.assertEqual((p1 - p2).Length(), 0.0) + p1 = c1.GetAtomPosition(6) + p2 = c2.GetAtomPosition(6) + self.assertEqual((p1 - p2).Length(), 0.0) - c2 = frags[2].GetConformer() - for i in range(6): - p1 = c1.GetAtomPosition(i + 7) - p2 = c2.GetAtomPosition(i) - self.assertEqual((p1 - p2).Length(), 0.0) - p1 = c1.GetAtomPosition(6) - p2 = c2.GetAtomPosition(6) - self.assertEqual((p1 - p2).Length(), 0.0) + c2 = frags[2].GetConformer() + for i in range(6): + p1 = c1.GetAtomPosition(i + 7) + p2 = c2.GetAtomPosition(i) + self.assertEqual((p1 - p2).Length(), 0.0) + p1 = c1.GetAtomPosition(6) + p2 = c2.GetAtomPosition(6) + self.assertEqual((p1 - p2).Length(), 0.0) - c2 = frags[1].GetConformer() - for i in range(1): - p1 = c1.GetAtomPosition(i + 6) - p2 = c2.GetAtomPosition(i) - self.assertEqual((p1 - p2).Length(), 0.0) - p1 = c1.GetAtomPosition(5) - p2 = c2.GetAtomPosition(1) - self.assertEqual((p1 - p2).Length(), 0.0) - p1 = c1.GetAtomPosition(6) - p2 = c2.GetAtomPosition(0) - self.assertEqual((p1 - p2).Length(), 0.0) + c2 = frags[1].GetConformer() + for i in range(1): + p1 = c1.GetAtomPosition(i + 6) + p2 = c2.GetAtomPosition(i) + self.assertEqual((p1 - p2).Length(), 0.0) + p1 = c1.GetAtomPosition(5) + p2 = c2.GetAtomPosition(1) + self.assertEqual((p1 - p2).Length(), 0.0) + p1 = c1.GetAtomPosition(6) + p2 = c2.GetAtomPosition(0) + self.assertEqual((p1 - p2).Length(), 0.0) - # make sure multiple conformations (include 2D) also work: - molblock = """ + # make sure multiple conformations (include 2D) also work: + molblock = """ RDKit 2D 13 14 0 0 0 0 0 0 0 0999 V2000 @@ -920,101 +923,101 @@ M END 13 8 1 0 M END """ - m2 = Chem.MolFromMolBlock(molblock) - m.AddConformer(m2.GetConformer(), assignId=True) - self.assertEqual(m.GetNumConformers(), 2) + m2 = Chem.MolFromMolBlock(molblock) + m.AddConformer(m2.GetConformer(), assignId=True) + self.assertEqual(m.GetNumConformers(), 2) - pieces = BreakBRICSBonds(m) - frags = Chem.GetMolFrags(pieces, asMols=True) - self.assertEqual(len(frags), 3) - self.assertEqual(frags[0].GetNumAtoms(), 7) - self.assertEqual(frags[1].GetNumAtoms(), 3) - self.assertEqual(frags[2].GetNumAtoms(), 7) - self.assertEqual(frags[0].GetNumConformers(), 2) - self.assertEqual(frags[1].GetNumConformers(), 2) - self.assertEqual(frags[2].GetNumConformers(), 2) + pieces = BreakBRICSBonds(m) + frags = Chem.GetMolFrags(pieces, asMols=True) + self.assertEqual(len(frags), 3) + self.assertEqual(frags[0].GetNumAtoms(), 7) + self.assertEqual(frags[1].GetNumAtoms(), 3) + self.assertEqual(frags[2].GetNumAtoms(), 7) + self.assertEqual(frags[0].GetNumConformers(), 2) + self.assertEqual(frags[1].GetNumConformers(), 2) + self.assertEqual(frags[2].GetNumConformers(), 2) - c1 = m.GetConformer(0) - c2 = frags[0].GetConformer(0) - for i in range(6): - p1 = c1.GetAtomPosition(i) - p2 = c2.GetAtomPosition(i) - self.assertEqual((p1 - p2).Length(), 0.0) - p1 = c1.GetAtomPosition(6) - p2 = c2.GetAtomPosition(6) - self.assertEqual((p1 - p2).Length(), 0.0) + c1 = m.GetConformer(0) + c2 = frags[0].GetConformer(0) + for i in range(6): + p1 = c1.GetAtomPosition(i) + p2 = c2.GetAtomPosition(i) + self.assertEqual((p1 - p2).Length(), 0.0) + p1 = c1.GetAtomPosition(6) + p2 = c2.GetAtomPosition(6) + self.assertEqual((p1 - p2).Length(), 0.0) - c2 = frags[2].GetConformer(0) - for i in range(6): - p1 = c1.GetAtomPosition(i + 7) - p2 = c2.GetAtomPosition(i) - self.assertEqual((p1 - p2).Length(), 0.0) - p1 = c1.GetAtomPosition(6) - p2 = c2.GetAtomPosition(6) - self.assertEqual((p1 - p2).Length(), 0.0) + c2 = frags[2].GetConformer(0) + for i in range(6): + p1 = c1.GetAtomPosition(i + 7) + p2 = c2.GetAtomPosition(i) + self.assertEqual((p1 - p2).Length(), 0.0) + p1 = c1.GetAtomPosition(6) + p2 = c2.GetAtomPosition(6) + self.assertEqual((p1 - p2).Length(), 0.0) - c2 = frags[1].GetConformer(0) - for i in range(1): - p1 = c1.GetAtomPosition(i + 6) - p2 = c2.GetAtomPosition(i) - self.assertEqual((p1 - p2).Length(), 0.0) - p1 = c1.GetAtomPosition(5) - p2 = c2.GetAtomPosition(1) - self.assertEqual((p1 - p2).Length(), 0.0) - p1 = c1.GetAtomPosition(6) - p2 = c2.GetAtomPosition(0) - self.assertEqual((p1 - p2).Length(), 0.0) + c2 = frags[1].GetConformer(0) + for i in range(1): + p1 = c1.GetAtomPosition(i + 6) + p2 = c2.GetAtomPosition(i) + self.assertEqual((p1 - p2).Length(), 0.0) + p1 = c1.GetAtomPosition(5) + p2 = c2.GetAtomPosition(1) + self.assertEqual((p1 - p2).Length(), 0.0) + p1 = c1.GetAtomPosition(6) + p2 = c2.GetAtomPosition(0) + self.assertEqual((p1 - p2).Length(), 0.0) - c1 = m.GetConformer(1) - c2 = frags[0].GetConformer(1) - for i in range(6): - p1 = c1.GetAtomPosition(i) - p2 = c2.GetAtomPosition(i) - self.assertEqual((p1 - p2).Length(), 0.0) - p1 = c1.GetAtomPosition(6) - p2 = c2.GetAtomPosition(6) - self.assertEqual((p1 - p2).Length(), 0.0) + c1 = m.GetConformer(1) + c2 = frags[0].GetConformer(1) + for i in range(6): + p1 = c1.GetAtomPosition(i) + p2 = c2.GetAtomPosition(i) + self.assertEqual((p1 - p2).Length(), 0.0) + p1 = c1.GetAtomPosition(6) + p2 = c2.GetAtomPosition(6) + self.assertEqual((p1 - p2).Length(), 0.0) - c2 = frags[2].GetConformer(1) - for i in range(6): - p1 = c1.GetAtomPosition(i + 7) - p2 = c2.GetAtomPosition(i) - self.assertEqual((p1 - p2).Length(), 0.0) - p1 = c1.GetAtomPosition(6) - p2 = c2.GetAtomPosition(6) - self.assertEqual((p1 - p2).Length(), 0.0) + c2 = frags[2].GetConformer(1) + for i in range(6): + p1 = c1.GetAtomPosition(i + 7) + p2 = c2.GetAtomPosition(i) + self.assertEqual((p1 - p2).Length(), 0.0) + p1 = c1.GetAtomPosition(6) + p2 = c2.GetAtomPosition(6) + self.assertEqual((p1 - p2).Length(), 0.0) - c2 = frags[1].GetConformer(1) - for i in range(1): - p1 = c1.GetAtomPosition(i + 6) - p2 = c2.GetAtomPosition(i) - self.assertEqual((p1 - p2).Length(), 0.0) - p1 = c1.GetAtomPosition(5) - p2 = c2.GetAtomPosition(1) - self.assertEqual((p1 - p2).Length(), 0.0) - p1 = c1.GetAtomPosition(6) - p2 = c2.GetAtomPosition(0) - self.assertEqual((p1 - p2).Length(), 0.0) + c2 = frags[1].GetConformer(1) + for i in range(1): + p1 = c1.GetAtomPosition(i + 6) + p2 = c2.GetAtomPosition(i) + self.assertEqual((p1 - p2).Length(), 0.0) + p1 = c1.GetAtomPosition(5) + p2 = c2.GetAtomPosition(1) + self.assertEqual((p1 - p2).Length(), 0.0) + p1 = c1.GetAtomPosition(6) + p2 = c2.GetAtomPosition(0) + self.assertEqual((p1 - p2).Length(), 0.0) - def test12(self): - m = Chem.MolFromSmiles('CCS(=O)(=O)NCC') - res = list(FindBRICSBonds(m)) - self.assertEqual(len(res), 2, res) - atIds = [x[0] for x in res] - atIds.sort() - self.assertEqual(atIds, [(5, 2), (6, 5)]) + def test12(self): + m = Chem.MolFromSmiles('CCS(=O)(=O)NCC') + res = list(FindBRICSBonds(m)) + self.assertEqual(len(res), 2, res) + atIds = [x[0] for x in res] + atIds.sort() + self.assertEqual(atIds, [(5, 2), (6, 5)]) - def testGithub1734(self): - m = Chem.MolFromSmiles('c1ccccc1[C@H](C)NC') - res = BRICSDecompose(m) - self.assertEqual(len(res), 3) - self.assertTrue('[4*][C@H]([8*])C' in res) - res = BreakBRICSBonds(m) - self.assertEqual(Chem.MolToSmiles(res, isomericSmiles=True), - '[16*]c1ccccc1.[4*][C@H]([8*])C.[5*]NC') + def testGithub1734(self): + m = Chem.MolFromSmiles('c1ccccc1[C@H](C)NC') + res = BRICSDecompose(m) + self.assertEqual(len(res), 3) + self.assertTrue('[4*][C@H]([8*])C' in res) + res = BreakBRICSBonds(m) + self.assertEqual(Chem.MolToSmiles(res, isomericSmiles=True), + '[16*]c1ccccc1.[4*][C@H]([8*])C.[5*]NC') - failed, tried = _test() - if failed: - sys.exit(failed) + failed, tried = _test() + if failed: + sys.exit(failed) - unittest.main() + unittest.main() diff --git a/rdkit/Chem/BuildFragmentCatalog.py b/rdkit/Chem/BuildFragmentCatalog.py index 98b23f85e..620f6c7f6 100755 --- a/rdkit/Chem/BuildFragmentCatalog.py +++ b/rdkit/Chem/BuildFragmentCatalog.py @@ -63,8 +63,8 @@ """ - import os +import pickle import sys import numpy @@ -74,15 +74,13 @@ from rdkit.Chem import FragmentCatalog from rdkit.Dbase.DbConnection import DbConnect from rdkit.ML import InfoTheory -import pickle - def message(msg, dest=sys.stdout): - dest.write(msg) + dest.write(msg) def BuildCatalog(suppl, maxPts=-1, groupFileName=None, minPath=2, maxPath=6, reportFreq=10): - """ builds a fragment catalog from a set of molecules in a delimited text block + """ builds a fragment catalog from a set of molecules in a delimited text block **Arguments** @@ -104,33 +102,33 @@ def BuildCatalog(suppl, maxPts=-1, groupFileName=None, minPath=2, maxPath=6, rep a FragmentCatalog """ - if groupFileName is None: - groupFileName = os.path.join(RDConfig.RDDataDir, "FunctionalGroups.txt") + if groupFileName is None: + groupFileName = os.path.join(RDConfig.RDDataDir, "FunctionalGroups.txt") - fpParams = FragmentCatalog.FragCatParams(minPath, maxPath, groupFileName) - catalog = FragmentCatalog.FragCatalog(fpParams) - fgen = FragmentCatalog.FragCatGenerator() - if maxPts > 0: - nPts = maxPts + fpParams = FragmentCatalog.FragCatParams(minPath, maxPath, groupFileName) + catalog = FragmentCatalog.FragCatalog(fpParams) + fgen = FragmentCatalog.FragCatGenerator() + if maxPts > 0: + nPts = maxPts + else: + if hasattr(suppl, '__len__'): + nPts = len(suppl) else: - if hasattr(suppl, '__len__'): - nPts = len(suppl) - else: - nPts = -1 - for i, mol in enumerate(suppl): - if i == nPts: - break - if i and not i % reportFreq: - if nPts > -1: - message('Done %d of %d, %d paths\n' % (i, nPts, catalog.GetFPLength())) - else: - message('Done %d, %d paths\n' % (i, catalog.GetFPLength())) - fgen.AddFragsFromMol(mol, catalog) - return catalog + nPts = -1 + for i, mol in enumerate(suppl): + if i == nPts: + break + if i and not i % reportFreq: + if nPts > -1: + message('Done %d of %d, %d paths\n' % (i, nPts, catalog.GetFPLength())) + else: + message('Done %d, %d paths\n' % (i, catalog.GetFPLength())) + fgen.AddFragsFromMol(mol, catalog) + return catalog def ScoreMolecules(suppl, catalog, maxPts=-1, actName='', acts=None, nActs=2, reportFreq=10): - """ scores the compounds in a supplier using a catalog + """ scores the compounds in a supplier using a catalog **Arguments** @@ -160,40 +158,40 @@ def ScoreMolecules(suppl, catalog, maxPts=-1, actName='', acts=None, nActs=2, re 2) a list containing the on bit lists for each molecule """ - nBits = catalog.GetFPLength() - resTbl = numpy.zeros((nBits, 2, nActs), numpy.int32) - obls = [] + nBits = catalog.GetFPLength() + resTbl = numpy.zeros((nBits, 2, nActs), numpy.int32) + obls = [] - if not actName and not acts: - actName = suppl[0].GetPropNames()[-1] + if not actName and not acts: + actName = suppl[0].GetPropNames()[-1] - fpgen = FragmentCatalog.FragFPGenerator() - suppl.reset() - i = 1 - for mol in suppl: - if i and not i % reportFreq: - message('Done %d.\n' % (i)) - if mol: - if not acts: - act = int(mol.GetProp(actName)) - else: - act = acts[i - 1] - fp = fpgen.GetFPForMol(mol, catalog) - obls.append([x for x in fp.GetOnBits()]) - for j in range(nBits): - resTbl[j, 0, act] += 1 - for id_ in obls[i - 1]: - resTbl[id_ - 1, 0, act] -= 1 - resTbl[id_ - 1, 1, act] += 1 - else: - obls.append([]) - i += 1 - return resTbl, obls + fpgen = FragmentCatalog.FragFPGenerator() + suppl.reset() + i = 1 + for mol in suppl: + if i and not i % reportFreq: + message('Done %d.\n' % (i)) + if mol: + if not acts: + act = int(mol.GetProp(actName)) + else: + act = acts[i - 1] + fp = fpgen.GetFPForMol(mol, catalog) + obls.append([x for x in fp.GetOnBits()]) + for j in range(nBits): + resTbl[j, 0, act] += 1 + for id_ in obls[i - 1]: + resTbl[id_ - 1, 0, act] -= 1 + resTbl[id_ - 1, 1, act] += 1 + else: + obls.append([]) + i += 1 + return resTbl, obls def ScoreFromLists(bitLists, suppl, catalog, maxPts=-1, actName='', acts=None, nActs=2, reportFreq=10): - """ similar to _ScoreMolecules()_, but uses pre-calculated bit lists + """ similar to _ScoreMolecules()_, but uses pre-calculated bit lists for the molecules (this speeds things up a lot) @@ -220,37 +218,37 @@ def ScoreFromLists(bitLists, suppl, catalog, maxPts=-1, actName='', acts=None, n the results table (a 3D array of ints nBits x 2 x nActs) """ - nBits = catalog.GetFPLength() - if maxPts > 0: - nPts = maxPts + nBits = catalog.GetFPLength() + if maxPts > 0: + nPts = maxPts + else: + nPts = len(bitLists) + resTbl = numpy.zeros((nBits, 2, nActs), numpy.int32) + if not actName and not acts: + actName = suppl[0].GetPropNames()[-1] + suppl.reset() + for i in range(1, nPts + 1): + mol = next(suppl) + if not acts: + act = int(mol.GetProp(actName)) else: - nPts = len(bitLists) - resTbl = numpy.zeros((nBits, 2, nActs), numpy.int32) - if not actName and not acts: - actName = suppl[0].GetPropNames()[-1] - suppl.reset() - for i in range(1, nPts + 1): - mol = next(suppl) - if not acts: - act = int(mol.GetProp(actName)) - else: - act = acts[i - 1] - if i and not i % reportFreq: - message('Done %d of %d\n' % (i, nPts)) - ids = set() - for id_ in bitLists[i - 1]: - ids.add(id_ - 1) - for j in range(nBits): - resTbl[j, 0, act] += 1 - for id_ in ids: - resTbl[id_, 0, act] -= 1 - resTbl[id_, 1, act] += 1 - return resTbl + act = acts[i - 1] + if i and not i % reportFreq: + message('Done %d of %d\n' % (i, nPts)) + ids = set() + for id_ in bitLists[i - 1]: + ids.add(id_ - 1) + for j in range(nBits): + resTbl[j, 0, act] += 1 + for id_ in ids: + resTbl[id_, 0, act] -= 1 + resTbl[id_, 1, act] += 1 + return resTbl def CalcGains(suppl, catalog, topN=-1, actName='', acts=None, nActs=2, reportFreq=10, biasList=None, collectFps=0): - """ calculates info gains by constructing fingerprints + """ calculates info gains by constructing fingerprints *DOC* Returns a 2-tuple: @@ -258,405 +256,405 @@ def CalcGains(suppl, catalog, topN=-1, actName='', acts=None, nActs=2, reportFre 2) list of fingerprints """ - nBits = catalog.GetFPLength() - if topN < 0: - topN = nBits - if not actName and not acts: - actName = suppl[0].GetPropNames()[-1] + nBits = catalog.GetFPLength() + if topN < 0: + topN = nBits + if not actName and not acts: + actName = suppl[0].GetPropNames()[-1] - if hasattr(suppl, '__len__'): - nMols = len(suppl) + if hasattr(suppl, '__len__'): + nMols = len(suppl) + else: + nMols = -1 + fpgen = FragmentCatalog.FragFPGenerator() + # ranker = InfoTheory.InfoBitRanker(nBits,nActs,InfoTheory.InfoType.ENTROPY) + if biasList: + ranker = InfoTheory.InfoBitRanker(nBits, nActs, InfoTheory.InfoType.BIASENTROPY) + ranker.SetBiasList(biasList) + else: + ranker = InfoTheory.InfoBitRanker(nBits, nActs, InfoTheory.InfoType.ENTROPY) + i = 0 + fps = [] + for mol in suppl: + if not acts: + try: + act = int(mol.GetProp(actName)) + except KeyError: + message('ERROR: Molecule has no property: %s\n' % (actName)) + message('\tAvailable properties are: %s\n' % (str(mol.GetPropNames()))) + raise KeyError(actName) else: - nMols = -1 - fpgen = FragmentCatalog.FragFPGenerator() - # ranker = InfoTheory.InfoBitRanker(nBits,nActs,InfoTheory.InfoType.ENTROPY) - if biasList: - ranker = InfoTheory.InfoBitRanker(nBits, nActs, InfoTheory.InfoType.BIASENTROPY) - ranker.SetBiasList(biasList) - else: - ranker = InfoTheory.InfoBitRanker(nBits, nActs, InfoTheory.InfoType.ENTROPY) - i = 0 - fps = [] - for mol in suppl: - if not acts: - try: - act = int(mol.GetProp(actName)) - except KeyError: - message('ERROR: Molecule has no property: %s\n' % (actName)) - message('\tAvailable properties are: %s\n' % (str(mol.GetPropNames()))) - raise KeyError(actName) - else: - act = acts[i] - if i and not i % reportFreq: - if nMols > 0: - message('Done %d of %d.\n' % (i, nMols)) - else: - message('Done %d.\n' % (i)) - fp = fpgen.GetFPForMol(mol, catalog) - ranker.AccumulateVotes(fp, act) - i += 1 - if collectFps: - fps.append(fp) - gains = ranker.GetTopN(topN) - return gains, fps + act = acts[i] + if i and not i % reportFreq: + if nMols > 0: + message('Done %d of %d.\n' % (i, nMols)) + else: + message('Done %d.\n' % (i)) + fp = fpgen.GetFPForMol(mol, catalog) + ranker.AccumulateVotes(fp, act) + i += 1 + if collectFps: + fps.append(fp) + gains = ranker.GetTopN(topN) + return gains, fps def CalcGainsFromFps(suppl, fps, topN=-1, actName='', acts=None, nActs=2, reportFreq=10, biasList=None): - """ calculates info gains from a set of fingerprints + """ calculates info gains from a set of fingerprints *DOC* """ - nBits = len(fps[0]) - if topN < 0: - topN = nBits - if not actName and not acts: - actName = suppl[0].GetPropNames()[-1] + nBits = len(fps[0]) + if topN < 0: + topN = nBits + if not actName and not acts: + actName = suppl[0].GetPropNames()[-1] - if hasattr(suppl, '__len__'): - nMols = len(suppl) + if hasattr(suppl, '__len__'): + nMols = len(suppl) + else: + nMols = -1 + if biasList: + ranker = InfoTheory.InfoBitRanker(nBits, nActs, InfoTheory.InfoType.BIASENTROPY) + ranker.SetBiasList(biasList) + else: + ranker = InfoTheory.InfoBitRanker(nBits, nActs, InfoTheory.InfoType.ENTROPY) + for i, mol in enumerate(suppl): + if not acts: + try: + act = int(mol.GetProp(actName)) + except KeyError: + message('ERROR: Molecule has no property: %s\n' % (actName)) + message('\tAvailable properties are: %s\n' % (str(mol.GetPropNames()))) + raise KeyError(actName) else: - nMols = -1 - if biasList: - ranker = InfoTheory.InfoBitRanker(nBits, nActs, InfoTheory.InfoType.BIASENTROPY) - ranker.SetBiasList(biasList) - else: - ranker = InfoTheory.InfoBitRanker(nBits, nActs, InfoTheory.InfoType.ENTROPY) - for i, mol in enumerate(suppl): - if not acts: - try: - act = int(mol.GetProp(actName)) - except KeyError: - message('ERROR: Molecule has no property: %s\n' % (actName)) - message('\tAvailable properties are: %s\n' % (str(mol.GetPropNames()))) - raise KeyError(actName) - else: - act = acts[i] - if i and not i % reportFreq: - if nMols > 0: - message('Done %d of %d.\n' % (i, nMols)) - else: - message('Done %d.\n' % (i)) - fp = fps[i] - ranker.AccumulateVotes(fp, act) - gains = ranker.GetTopN(topN) - return gains + act = acts[i] + if i and not i % reportFreq: + if nMols > 0: + message('Done %d of %d.\n' % (i, nMols)) + else: + message('Done %d.\n' % (i)) + fp = fps[i] + ranker.AccumulateVotes(fp, act) + gains = ranker.GetTopN(topN) + return gains def OutputGainsData(outF, gains, cat, nActs=2): - actHeaders = ['Act-%d' % (x) for x in range(nActs)] + actHeaders = ['Act-%d' % (x) for x in range(nActs)] + if cat: + outF.write('id,Description,Gain,%s\n' % (','.join(actHeaders))) + else: + outF.write('id,Gain,%s\n' % (','.join(actHeaders))) + for entry in gains: + id_ = int(entry[0]) + outL = [str(id_)] if cat: - outF.write('id,Description,Gain,%s\n' % (','.join(actHeaders))) - else: - outF.write('id,Gain,%s\n' % (','.join(actHeaders))) - for entry in gains: - id_ = int(entry[0]) - outL = [str(id_)] - if cat: - descr = cat.GetBitDescription(id_) - outL.append(descr) - outL.append('%.6f' % entry[1]) - outL += ['%d' % x for x in entry[2:]] - outF.write(','.join(outL)) - outF.write('\n') + descr = cat.GetBitDescription(id_) + outL.append(descr) + outL.append('%.6f' % entry[1]) + outL += ['%d' % x for x in entry[2:]] + outF.write(','.join(outL)) + outF.write('\n') def ProcessGainsData(inF, delim=',', idCol=0, gainCol=1): - """ reads a list of ids and info gains out of an input file + """ reads a list of ids and info gains out of an input file """ - res = [] - _ = inF.readline() - for line in inF: - splitL = line.strip().split(delim) - res.append((splitL[idCol], float(splitL[gainCol]))) - return res + res = [] + _ = inF.readline() + for line in inF: + splitL = line.strip().split(delim) + res.append((splitL[idCol], float(splitL[gainCol]))) + return res def ShowDetails(catalog, gains, nToDo=-1, outF=sys.stdout, idCol=0, gainCol=1, outDelim=','): - """ + """ gains should be a sequence of sequences. The idCol entry of each sub-sequence should be a catalog ID. _ProcessGainsData()_ provides suitable input. """ - if nToDo < 0: - nToDo = len(gains) - for i in range(nToDo): - id_ = int(gains[i][idCol]) - gain = float(gains[i][gainCol]) - descr = catalog.GetFragDescription(id_) - if descr: - outF.write('%s\n' % (outDelim.join((str(id_), descr, str(gain))))) + if nToDo < 0: + nToDo = len(gains) + for i in range(nToDo): + id_ = int(gains[i][idCol]) + gain = float(gains[i][gainCol]) + descr = catalog.GetFragDescription(id_) + if descr: + outF.write('%s\n' % (outDelim.join((str(id_), descr, str(gain))))) def SupplierFromDetails(details): - from rdkit.VLib.NodeLib.DbMolSupply import DbMolSupplyNode - from rdkit.VLib.NodeLib.SmilesSupply import SmilesSupplyNode + from rdkit.VLib.NodeLib.DbMolSupply import DbMolSupplyNode + from rdkit.VLib.NodeLib.SmilesSupply import SmilesSupplyNode - if details.dbName: - conn = DbConnect(details.dbName, details.tableName) - suppl = DbMolSupplyNode(conn.GetData()) - else: - suppl = SmilesSupplyNode(details.inFileName, delim=details.delim, nameColumn=details.nameCol, - smilesColumn=details.smiCol, titleLine=details.hasTitle) - if isinstance(details.actCol, int): - suppl.reset() - m = next(suppl) - actName = m.GetPropNames()[details.actCol] - details.actCol = actName - if isinstance(details.nameCol, int): - suppl.reset() - m = next(suppl) - nameName = m.GetPropNames()[details.nameCol] - details.nameCol = nameName - suppl.reset() + if details.dbName: + conn = DbConnect(details.dbName, details.tableName) + suppl = DbMolSupplyNode(conn.GetData()) + else: + suppl = SmilesSupplyNode(details.inFileName, delim=details.delim, nameColumn=details.nameCol, + smilesColumn=details.smiCol, titleLine=details.hasTitle) if isinstance(details.actCol, int): - suppl.reset() - m = next(suppl) - actName = m.GetPropNames()[details.actCol] - details.actCol = actName + suppl.reset() + m = next(suppl) + actName = m.GetPropNames()[details.actCol] + details.actCol = actName if isinstance(details.nameCol, int): - suppl.reset() - m = next(suppl) - nameName = m.GetPropNames()[details.nameCol] - details.nameCol = nameName - suppl.reset() - return suppl + suppl.reset() + m = next(suppl) + nameName = m.GetPropNames()[details.nameCol] + details.nameCol = nameName + suppl.reset() + if isinstance(details.actCol, int): + suppl.reset() + m = next(suppl) + actName = m.GetPropNames()[details.actCol] + details.actCol = actName + if isinstance(details.nameCol, int): + suppl.reset() + m = next(suppl) + nameName = m.GetPropNames()[details.nameCol] + details.nameCol = nameName + suppl.reset() + return suppl def Usage(): - print("This is BuildFragmentCatalog") - print('usage error') - # print(__doc__) - sys.exit(-1) + print("This is BuildFragmentCatalog") + print('usage error') + # print(__doc__) + sys.exit(-1) class RunDetails(object): - numMols = -1 - doBuild = 0 - doSigs = 0 - doScore = 0 - doGains = 0 - doDetails = 0 - catalogName = None - onBitsName = None - scoresName = None - gainsName = None - dbName = '' - tableName = None - detailsName = None - inFileName = None - fpName = None - minPath = 2 - maxPath = 6 - smiCol = 1 - actCol = -1 - nameCol = -1 - hasTitle = 1 - nActs = 2 - nBits = -1 - delim = ',' - biasList = None - topN = -1 + numMols = -1 + doBuild = 0 + doSigs = 0 + doScore = 0 + doGains = 0 + doDetails = 0 + catalogName = None + onBitsName = None + scoresName = None + gainsName = None + dbName = '' + tableName = None + detailsName = None + inFileName = None + fpName = None + minPath = 2 + maxPath = 6 + smiCol = 1 + actCol = -1 + nameCol = -1 + hasTitle = 1 + nActs = 2 + nBits = -1 + delim = ',' + biasList = None + topN = -1 def ParseArgs(details): - import getopt - try: - args, extras = getopt.getopt(sys.argv[1:], 'n:d:cst', - ['catalog=', 'onbits=', 'scoresFile=', 'gainsFile=', - 'detailsFile=', 'fpFile=', 'minPath=', 'maxPath=', 'smiCol=', - 'actCol=', 'nameCol=', 'nActs=', 'nBits=', 'biasList=', 'topN=', - 'build', 'sigs', 'gains', 'details', 'score', 'noTitle']) - except Exception: - sys.stderr.write('Error parsing command line:\n') - import traceback - traceback.print_exc() - Usage() - for arg, val in args: - if arg == '-n': - details.numMols = int(val) - elif arg == '-c': - details.delim = ',' - elif arg == '-s': - details.delim = ' ' - elif arg == '-t': - details.delim = '\t' - elif arg == '-d': - details.dbName = val - elif arg == '--build': - details.doBuild = 1 - elif arg == '--score': - details.doScore = 1 - elif arg == '--gains': - details.doGains = 1 - elif arg == '--sigs': - details.doSigs = 1 - elif arg == '-details': - details.doDetails = 1 - elif arg == '--catalog': - details.catalogName = val - elif arg == '--onbits': - details.onBitsName = val - elif arg == '--scoresFile': - details.scoresName = val - elif arg == '--gainsFile': - details.gainsName = val - elif arg == '--detailsFile': - details.detailsName = val - elif arg == '--fpFile': - details.fpName = val - elif arg == '--minPath': - details.minPath = int(val) - elif arg == '--maxPath': - details.maxPath = int(val) - elif arg == '--smiCol': - try: - details.smiCol = int(val) - except ValueError: - details.smiCol = val - elif arg == '--actCol': - try: - details.actCol = int(val) - except ValueError: - details.actCol = val - elif arg == '--nameCol': - try: - details.nameCol = int(val) - except ValueError: - details.nameCol = val - elif arg == '--nActs': - details.nActs = int(val) - elif arg == '--nBits': - details.nBits = int(val) - elif arg == '--noTitle': - details.hasTitle = 0 - elif arg == '--biasList': - details.biasList = tuple(eval(val)) - elif arg == '--topN': - details.topN = int(val) - elif arg == '-h': - Usage() - sys.exit(0) - else: - Usage() - if len(extras): - if details.dbName: - details.tableName = extras[0] - else: - details.inFileName = extras[0] + import getopt + try: + args, extras = getopt.getopt(sys.argv[1:], 'n:d:cst', [ + 'catalog=', 'onbits=', 'scoresFile=', 'gainsFile=', 'detailsFile=', 'fpFile=', 'minPath=', + 'maxPath=', 'smiCol=', 'actCol=', 'nameCol=', 'nActs=', 'nBits=', 'biasList=', 'topN=', + 'build', 'sigs', 'gains', 'details', 'score', 'noTitle' + ]) + except Exception: + sys.stderr.write('Error parsing command line:\n') + import traceback + traceback.print_exc() + Usage() + for arg, val in args: + if arg == '-n': + details.numMols = int(val) + elif arg == '-c': + details.delim = ',' + elif arg == '-s': + details.delim = ' ' + elif arg == '-t': + details.delim = '\t' + elif arg == '-d': + details.dbName = val + elif arg == '--build': + details.doBuild = 1 + elif arg == '--score': + details.doScore = 1 + elif arg == '--gains': + details.doGains = 1 + elif arg == '--sigs': + details.doSigs = 1 + elif arg == '-details': + details.doDetails = 1 + elif arg == '--catalog': + details.catalogName = val + elif arg == '--onbits': + details.onBitsName = val + elif arg == '--scoresFile': + details.scoresName = val + elif arg == '--gainsFile': + details.gainsName = val + elif arg == '--detailsFile': + details.detailsName = val + elif arg == '--fpFile': + details.fpName = val + elif arg == '--minPath': + details.minPath = int(val) + elif arg == '--maxPath': + details.maxPath = int(val) + elif arg == '--smiCol': + try: + details.smiCol = int(val) + except ValueError: + details.smiCol = val + elif arg == '--actCol': + try: + details.actCol = int(val) + except ValueError: + details.actCol = val + elif arg == '--nameCol': + try: + details.nameCol = int(val) + except ValueError: + details.nameCol = val + elif arg == '--nActs': + details.nActs = int(val) + elif arg == '--nBits': + details.nBits = int(val) + elif arg == '--noTitle': + details.hasTitle = 0 + elif arg == '--biasList': + details.biasList = tuple(eval(val)) + elif arg == '--topN': + details.topN = int(val) + elif arg == '-h': + Usage() + sys.exit(0) else: - Usage() + Usage() + if len(extras): + if details.dbName: + details.tableName = extras[0] + else: + details.inFileName = extras[0] + else: + Usage() if __name__ == '__main__': - import time - details = RunDetails() - ParseArgs(details) - from io import StringIO - suppl = SupplierFromDetails(details) + import time + details = RunDetails() + ParseArgs(details) + from io import StringIO + suppl = SupplierFromDetails(details) - cat = None - obls = None - if details.doBuild: - if not suppl: - message("We require inData to generate a catalog\n") - sys.exit(-2) - message("Building catalog\n") - t1 = time.time() - cat = BuildCatalog(suppl, maxPts=details.numMols, minPath=details.minPath, - maxPath=details.maxPath) - t2 = time.time() - message("\tThat took %.2f seconds.\n" % (t2 - t1)) - if details.catalogName: - message("Dumping catalog data\n") - pickle.dump(cat, open(details.catalogName, 'wb+')) - elif details.catalogName: - message("Loading catalog\n") - cat = pickle.load(open(details.catalogName, 'rb')) - if details.onBitsName: - try: - obls = pickle.load(open(details.onBitsName, 'rb')) - except Exception: - obls = None - else: - if len(obls) < (inD.count('\n') - 1): - obls = None - scores = None - if details.doScore: - if not suppl: - message("We require inData to score molecules\n") - sys.exit(-2) - if not cat: - message("We require a catalog to score molecules\n") - sys.exit(-2) - message("Scoring compounds\n") - if not obls or len(obls) < details.numMols: - scores, obls = ScoreMolecules(suppl, cat, maxPts=details.numMols, actName=details.actCol, - nActs=details.nActs) - if details.scoresName: - pickle.dump(scores, open(details.scoresName, 'wb+')) - if details.onBitsName: - pickle.dump(obls, open(details.onBitsName, 'wb+')) - else: - scores = ScoreFromLists(obls, suppl, cat, maxPts=details.numMols, actName=details.actCol, + cat = None + obls = None + if details.doBuild: + if not suppl: + message("We require inData to generate a catalog\n") + sys.exit(-2) + message("Building catalog\n") + t1 = time.time() + cat = BuildCatalog(suppl, maxPts=details.numMols, minPath=details.minPath, + maxPath=details.maxPath) + t2 = time.time() + message("\tThat took %.2f seconds.\n" % (t2 - t1)) + if details.catalogName: + message("Dumping catalog data\n") + pickle.dump(cat, open(details.catalogName, 'wb+')) + elif details.catalogName: + message("Loading catalog\n") + cat = pickle.load(open(details.catalogName, 'rb')) + if details.onBitsName: + try: + obls = pickle.load(open(details.onBitsName, 'rb')) + except Exception: + obls = None + else: + if len(obls) < (inD.count('\n') - 1): + obls = None + scores = None + if details.doScore: + if not suppl: + message("We require inData to score molecules\n") + sys.exit(-2) + if not cat: + message("We require a catalog to score molecules\n") + sys.exit(-2) + message("Scoring compounds\n") + if not obls or len(obls) < details.numMols: + scores, obls = ScoreMolecules(suppl, cat, maxPts=details.numMols, actName=details.actCol, nActs=details.nActs) - elif details.scoresName: - scores = pickle.load(open(details.scoresName, 'rb')) - - if details.fpName and os.path.exists(details.fpName) and not details.doSigs: - message("Reading fingerprints from file.\n") - fps = pickle.load(open(details.fpName, 'rb')) + if details.scoresName: + pickle.dump(scores, open(details.scoresName, 'wb+')) + if details.onBitsName: + pickle.dump(obls, open(details.onBitsName, 'wb+')) else: - fps = [] - gains = None - if details.doGains: - if not suppl: - message("We require inData to calculate gains\n") - sys.exit(-2) - if not (cat or fps): - message("We require either a catalog or fingerprints to calculate gains\n") - sys.exit(-2) - message("Calculating Gains\n") - t1 = time.time() - if details.fpName: - collectFps = 1 - else: - collectFps = 0 - if not fps: - gains, fps = CalcGains(suppl, cat, topN=details.topN, actName=details.actCol, - nActs=details.nActs, biasList=details.biasList, collectFps=collectFps) - if details.fpName: - message("Writing fingerprint file.\n") - tmpF = open(details.fpName, 'wb+') - pickle.dump(fps, tmpF, 1) - tmpF.close() - else: - gains = CalcGainsFromFps(suppl, fps, topN=details.topN, actName=details.actCol, - nActs=details.nActs, biasList=details.biasList) - t2 = time.time() - message("\tThat took %.2f seconds.\n" % (t2 - t1)) - if details.gainsName: - outF = open(details.gainsName, 'w+') - OutputGainsData(outF, gains, cat, nActs=details.nActs) - else: - if details.gainsName: - inF = open(details.gainsName, 'r') - gains = ProcessGainsData(inF) + scores = ScoreFromLists(obls, suppl, cat, maxPts=details.numMols, actName=details.actCol, + nActs=details.nActs) + elif details.scoresName: + scores = pickle.load(open(details.scoresName, 'rb')) - if details.doDetails: - if not cat: - message("We require a catalog to get details\n") - sys.exit(-2) - if not gains: - message("We require gains data to get details\n") - sys.exit(-2) - io = StringIO() - io.write('id,SMILES,gain\n') - ShowDetails(cat, gains, nToDo=details.nBits, outF=io) - if details.detailsName: - open(details.detailsName, 'w+').write(io.getvalue()) - else: - sys.stderr.write(io.getvalue()) + if details.fpName and os.path.exists(details.fpName) and not details.doSigs: + message("Reading fingerprints from file.\n") + fps = pickle.load(open(details.fpName, 'rb')) + else: + fps = [] + gains = None + if details.doGains: + if not suppl: + message("We require inData to calculate gains\n") + sys.exit(-2) + if not (cat or fps): + message("We require either a catalog or fingerprints to calculate gains\n") + sys.exit(-2) + message("Calculating Gains\n") + t1 = time.time() + if details.fpName: + collectFps = 1 + else: + collectFps = 0 + if not fps: + gains, fps = CalcGains(suppl, cat, topN=details.topN, actName=details.actCol, + nActs=details.nActs, biasList=details.biasList, collectFps=collectFps) + if details.fpName: + message("Writing fingerprint file.\n") + tmpF = open(details.fpName, 'wb+') + pickle.dump(fps, tmpF, 1) + tmpF.close() + else: + gains = CalcGainsFromFps(suppl, fps, topN=details.topN, actName=details.actCol, + nActs=details.nActs, biasList=details.biasList) + t2 = time.time() + message("\tThat took %.2f seconds.\n" % (t2 - t1)) + if details.gainsName: + outF = open(details.gainsName, 'w+') + OutputGainsData(outF, gains, cat, nActs=details.nActs) + else: + if details.gainsName: + inF = open(details.gainsName, 'r') + gains = ProcessGainsData(inF) + + if details.doDetails: + if not cat: + message("We require a catalog to get details\n") + sys.exit(-2) + if not gains: + message("We require gains data to get details\n") + sys.exit(-2) + io = StringIO() + io.write('id,SMILES,gain\n') + ShowDetails(cat, gains, nToDo=details.nBits, outF=io) + if details.detailsName: + open(details.detailsName, 'w+').write(io.getvalue()) + else: + sys.stderr.write(io.getvalue()) diff --git a/rdkit/Chem/ChemUtils/AlignDepict.py b/rdkit/Chem/ChemUtils/AlignDepict.py index 4469e915c..fee1d3a5d 100644 --- a/rdkit/Chem/ChemUtils/AlignDepict.py +++ b/rdkit/Chem/ChemUtils/AlignDepict.py @@ -2,11 +2,10 @@ # Copyright (C) 2006 Greg Landrum # This file is part of RDKit and covered by $RDBASE/license.txt # -import sys import argparse +import sys -from rdkit import Chem -from rdkit import Geometry +from rdkit import Chem, Geometry from rdkit.Chem import rdDepictor @@ -30,12 +29,12 @@ def AlignDepict(mol, core, corePattern=None, acceptFailure=False): raise ValueError("Core does not map to itself") else: coreMatch = list(range(core.GetNumAtoms(onlyExplicit=True))) - + if corePattern: match = mol.GetSubstructMatch(corePattern) else: match = mol.GetSubstructMatch(core) - + if not match: if not acceptFailure: raise ValueError('Substructure match with core not found.') diff --git a/rdkit/Chem/ChemUtils/BulkTester.py b/rdkit/Chem/ChemUtils/BulkTester.py index 81dad7150..97a163bf7 100644 --- a/rdkit/Chem/ChemUtils/BulkTester.py +++ b/rdkit/Chem/ChemUtils/BulkTester.py @@ -10,6 +10,7 @@ # import sys + from rdkit import Chem from rdkit.Chem import Randomize diff --git a/rdkit/Chem/ChemUtils/DescriptorUtilities.py b/rdkit/Chem/ChemUtils/DescriptorUtilities.py index cbebddeaa..372800173 100644 --- a/rdkit/Chem/ChemUtils/DescriptorUtilities.py +++ b/rdkit/Chem/ChemUtils/DescriptorUtilities.py @@ -18,51 +18,56 @@ def setDescriptorVersion(version='1.0.0'): """ Set the version on the descriptor function. Use as a decorator """ + def wrapper(func): func.version = version return func + return wrapper class VectorDescriptorNamespace(dict): - def __init__(self, **kwargs): - self.update(kwargs) + + def __init__(self, **kwargs): + self.update(kwargs) class VectorDescriptorWrapper: - """Wrap a function that returns a vector and make it seem like there + """Wrap a function that returns a vector and make it seem like there is one function for each entry. These functions are added to the global namespace with the names provided""" - def __init__(self, func, names, version, namespace): - self.func = func - self.names = names - self.func_key = "__%s"%(func.__name__) - function_namespace = {} - for i,n in enumerate(names): - def f(mol, index=i): - return self.call_desc(mol, index=index) - f.__name__ = n - f.__qualname__ = n - f.version = version - function_namespace[n] = f - self.namespace = VectorDescriptorNamespace(**function_namespace) - self.namespace.update(namespace) - namespace.update(function_namespace) - def _get_key(self, index): - return "%s%s"%(self.func_key, index) + def __init__(self, func, names, version, namespace): + self.func = func + self.names = names + self.func_key = "__%s" % (func.__name__) + function_namespace = {} + for i, n in enumerate(names): - def call_desc(self, mol, index): - if hasattr(mol, self.func_key): - results = getattr(mol, self.func_key, None) - if results is not None: - return results[index] + def f(mol, index=i): + return self.call_desc(mol, index=index) - try: - results = self.func(mol) - except Exception: - return math.nan + f.__name__ = n + f.__qualname__ = n + f.version = version + function_namespace[n] = f + self.namespace = VectorDescriptorNamespace(**function_namespace) + self.namespace.update(namespace) + namespace.update(function_namespace) - setattr(mol, self.func_key, results) + def _get_key(self, index): + return "%s%s" % (self.func_key, index) + + def call_desc(self, mol, index): + if hasattr(mol, self.func_key): + results = getattr(mol, self.func_key, None) + if results is not None: return results[index] + try: + results = self.func(mol) + except Exception: + return math.nan + + setattr(mol, self.func_key, results) + return results[index] diff --git a/rdkit/Chem/ChemUtils/SDFToCSV.py b/rdkit/Chem/ChemUtils/SDFToCSV.py index 9c85a2205..6c5b8346b 100644 --- a/rdkit/Chem/ChemUtils/SDFToCSV.py +++ b/rdkit/Chem/ChemUtils/SDFToCSV.py @@ -37,7 +37,7 @@ def Convert(suppl, outFile, keyCol=None, stopAfter=-1, includeChirality=False, s else: smi = mol.GetProp(smilesFrom) smi = Chem.MolToSmiles(Chem.MolFromSmiles(smi), isomericSmiles=includeChirality) - + outL = [] if keyCol: outL.append(str(mol.GetProp(keyCol))) @@ -67,10 +67,10 @@ def initParser(): def existingFile(filename): - """ 'type' for argparse - check that filename exists """ - if not os.path.exists(filename): - raise argparse.ArgumentTypeError("{0} does not exist".format(filename)) - return filename + """ 'type' for argparse - check that filename exists """ + if not os.path.exists(filename): + raise argparse.ArgumentTypeError("{0} does not exist".format(filename)) + return filename def main(): diff --git a/rdkit/Chem/ChemUtils/TemplateExpand.py b/rdkit/Chem/ChemUtils/TemplateExpand.py index c9218c31e..24ec99522 100644 --- a/rdkit/Chem/ChemUtils/TemplateExpand.py +++ b/rdkit/Chem/ChemUtils/TemplateExpand.py @@ -5,14 +5,15 @@ # from rdkit import RDLogger as logging + logger = logging.logger() logger.setLevel(logging.INFO) +import sys + from rdkit import Chem -from rdkit.Chem import Crippen -from rdkit.Chem import AllChem +from rdkit.Chem import AllChem, Crippen from rdkit.Chem.ChemUtils.AlignDepict import AlignDepict -import sys _version = "0.8.0" _greet = "This is TemplateExpand version %s" % _version @@ -81,6 +82,7 @@ def Usage(): print(_usage, file=sys.stderr) sys.exit(-1) + nDumped = 0 @@ -162,7 +164,7 @@ def Explode(template, sidechains, outF, autoNames=True, do3D=False, useTethers=F templateName = template.GetProp('_Name') except KeyError: templateName = "template" - + for mol in _exploder(template, 0, sidechains, core, chainIndices, autoNames=autoNames, templateName=templateName, do3D=do3D, useTethers=useTethers): outF.write(Chem.MolToMolBlock(mol)) @@ -248,10 +250,10 @@ def ConstructSidechains(suppl, sma=None, replace=True, useAll=False): tmp = None else: tmp = [(idx + 1, mol)] - + if tmp: res.extend(tmp) - + return res diff --git a/rdkit/Chem/ChemUtils/UnitTestAlignDepict.py b/rdkit/Chem/ChemUtils/UnitTestAlignDepict.py index 4f15b4954..7909caebc 100644 --- a/rdkit/Chem/ChemUtils/UnitTestAlignDepict.py +++ b/rdkit/Chem/ChemUtils/UnitTestAlignDepict.py @@ -1,58 +1,59 @@ -from contextlib import contextmanager import sys import unittest +from contextlib import contextmanager +from io import StringIO from rdkit import Chem -from rdkit.Chem.ChemUtils.AlignDepict import initParser, processArgs, AlignDepict -from io import StringIO +from rdkit.Chem.ChemUtils.AlignDepict import (AlignDepict, initParser, + processArgs) class TestCase(unittest.TestCase): - def test1(self): - parser = initParser() - with outputRedirect() as (out, err): - args = parser.parse_args('--smiles CC CCC'.split()) - args.outF = out - processArgs(args) - self.assertIn('RDKit', out.getvalue()) - self.assertIn('2D', out.getvalue()) - self.assertEqual(err.getvalue(), '') + def test1(self): + parser = initParser() + with outputRedirect() as (out, err): + args = parser.parse_args('--smiles CC CCC'.split()) + args.outF = out + processArgs(args) + self.assertIn('RDKit', out.getvalue()) + self.assertIn('2D', out.getvalue()) + self.assertEqual(err.getvalue(), '') - def test_AlignDepict(self): - mol = Chem.MolFromSmiles('CNC') - core = Chem.MolFromSmiles('CC') - pattern = Chem.MolFromSmarts('CCC') - self.assertRaises(ValueError, AlignDepict, mol, core, pattern) + def test_AlignDepict(self): + mol = Chem.MolFromSmiles('CNC') + core = Chem.MolFromSmiles('CC') + pattern = Chem.MolFromSmarts('CCC') + self.assertRaises(ValueError, AlignDepict, mol, core, pattern) - pattern = Chem.MolFromSmarts('CN') - self.assertRaises(ValueError, AlignDepict, mol, core, pattern) + pattern = Chem.MolFromSmarts('CN') + self.assertRaises(ValueError, AlignDepict, mol, core, pattern) - pattern = Chem.MolFromSmarts('CC') - self.assertRaises(ValueError, AlignDepict, mol, core, pattern) + pattern = Chem.MolFromSmarts('CC') + self.assertRaises(ValueError, AlignDepict, mol, core, pattern) - pattern = Chem.MolFromSmarts('CC') - self.assertRaises(ValueError, AlignDepict, mol, core, pattern) + pattern = Chem.MolFromSmarts('CC') + self.assertRaises(ValueError, AlignDepict, mol, core, pattern) - mol = Chem.MolFromSmiles('CCC') - Chem.rdDepictor.Compute2DCoords(core) - AlignDepict(mol, core, pattern) + mol = Chem.MolFromSmiles('CCC') + Chem.rdDepictor.Compute2DCoords(core) + AlignDepict(mol, core, pattern) - mol = Chem.MolFromSmiles('CNC') - AlignDepict(mol, core, pattern, acceptFailure=True) + mol = Chem.MolFromSmiles('CNC') + AlignDepict(mol, core, pattern, acceptFailure=True) @contextmanager def outputRedirect(): - """ Redirect standard output and error to String IO and return """ - try: - _stdout, _stderr = sys.stdout, sys.stderr - sys.stdout = sStdout = StringIO() - sys.stderr = sStderr = StringIO() - yield (sStdout, sStderr) - finally: - sys.stdout, sys.stderr = _stdout, _stderr + """ Redirect standard output and error to String IO and return """ + try: + _stdout, _stderr = sys.stdout, sys.stderr + sys.stdout = sStdout = StringIO() + sys.stderr = sStderr = StringIO() + yield (sStdout, sStderr) + finally: + sys.stdout, sys.stderr = _stdout, _stderr if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/Chem/ChemUtils/UnitTestSDFToCSV.py b/rdkit/Chem/ChemUtils/UnitTestSDFToCSV.py index 70438ce16..91950e6c9 100644 --- a/rdkit/Chem/ChemUtils/UnitTestSDFToCSV.py +++ b/rdkit/Chem/ChemUtils/UnitTestSDFToCSV.py @@ -1,85 +1,84 @@ -from contextlib import contextmanager import os import sys import unittest - -from rdkit import Chem -from rdkit import RDConfig -from rdkit.Chem.ChemUtils.SDFToCSV import Convert, initParser +from contextlib import contextmanager from io import StringIO +from rdkit import Chem, RDConfig +from rdkit.Chem.ChemUtils.SDFToCSV import Convert, initParser + class TestCase(unittest.TestCase): - def test1(self): - fName = os.path.join(RDConfig.RDDataDir, 'NCI', 'first_200.props.sdf') - suppl = Chem.SDMolSupplier(fName) - io = StringIO() - try: - Convert(suppl, io) - except Exception: - import traceback - traceback.print_exc() - self.fail('conversion failed') - txt = io.getvalue() - lines = txt.split('\n') - if not lines[-1]: - del lines[-1] - self.assertTrue(len(lines) == 201, 'bad num lines: %d' % len(lines)) - line0 = lines[0].split(',') - self.assertEqual(len(line0), 20) - self.assertTrue(line0[0] == 'SMILES') + def test1(self): + fName = os.path.join(RDConfig.RDDataDir, 'NCI', 'first_200.props.sdf') + suppl = Chem.SDMolSupplier(fName) + io = StringIO() + try: + Convert(suppl, io) + except Exception: + import traceback + traceback.print_exc() + self.fail('conversion failed') + txt = io.getvalue() + lines = txt.split('\n') + if not lines[-1]: + del lines[-1] + self.assertTrue(len(lines) == 201, 'bad num lines: %d' % len(lines)) + line0 = lines[0].split(',') + self.assertEqual(len(line0), 20) + self.assertTrue(line0[0] == 'SMILES') - def test2(self): - fName = os.path.join(RDConfig.RDDataDir, 'NCI', 'first_200.props.sdf') - suppl = Chem.SDMolSupplier(fName) - io = StringIO() - try: - Convert(suppl, io, keyCol='AMW', stopAfter=5) - except Exception: - import traceback - traceback.print_exc() - self.fail('conversion failed') - txt = io.getvalue() - lines = [line for line in txt.split('\n') if line.strip() != ''] - self.assertTrue(len(lines) == 6, 'bad num lines: %d' % len(lines)) - line0 = lines[0].split(',') - self.assertEqual(len(line0), 20) - self.assertTrue(line0[0] == 'AMW') - self.assertTrue(line0[1] == 'SMILES') + def test2(self): + fName = os.path.join(RDConfig.RDDataDir, 'NCI', 'first_200.props.sdf') + suppl = Chem.SDMolSupplier(fName) + io = StringIO() + try: + Convert(suppl, io, keyCol='AMW', stopAfter=5) + except Exception: + import traceback + traceback.print_exc() + self.fail('conversion failed') + txt = io.getvalue() + lines = [line for line in txt.split('\n') if line.strip() != ''] + self.assertTrue(len(lines) == 6, 'bad num lines: %d' % len(lines)) + line0 = lines[0].split(',') + self.assertEqual(len(line0), 20) + self.assertTrue(line0[0] == 'AMW') + self.assertTrue(line0[1] == 'SMILES') - def test_parser(self): - parser = initParser() - # User want's help - with self.assertRaises(SystemExit), outputRedirect() as (out, err): - parser.parse_args(['-h']) - self.assertNotEqual(out.getvalue(), '') - self.assertEqual(err.getvalue(), '') + def test_parser(self): + parser = initParser() + # User want's help + with self.assertRaises(SystemExit), outputRedirect() as (out, err): + parser.parse_args(['-h']) + self.assertNotEqual(out.getvalue(), '') + self.assertEqual(err.getvalue(), '') - # Missing input file - with self.assertRaises(SystemExit), outputRedirect() as (out, err): - parser.parse_args([]) - self.assertEqual(out.getvalue(), '') - self.assertNotEqual(err.getvalue(), '') + # Missing input file + with self.assertRaises(SystemExit), outputRedirect() as (out, err): + parser.parse_args([]) + self.assertEqual(out.getvalue(), '') + self.assertNotEqual(err.getvalue(), '') - # Input file doesn't exist - with self.assertRaises(SystemExit), outputRedirect() as (out, err): - parser.parse_args(['incorrectFilename']) - self.assertEqual(out.getvalue(), '') - self.assertNotEqual(err.getvalue(), '') + # Input file doesn't exist + with self.assertRaises(SystemExit), outputRedirect() as (out, err): + parser.parse_args(['incorrectFilename']) + self.assertEqual(out.getvalue(), '') + self.assertNotEqual(err.getvalue(), '') @contextmanager def outputRedirect(): - """ Redirect standard output and error to String IO and return """ - try: - _stdout, _stderr = sys.stdout, sys.stderr - sys.stdout = sStdout = StringIO() - sys.stderr = sStderr = StringIO() - yield (sStdout, sStderr) - finally: - sys.stdout, sys.stderr = _stdout, _stderr + """ Redirect standard output and error to String IO and return """ + try: + _stdout, _stderr = sys.stdout, sys.stderr + sys.stdout = sStdout = StringIO() + sys.stderr = sStderr = StringIO() + yield (sStdout, sStderr) + finally: + sys.stdout, sys.stderr = _stdout, _stderr if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/Chem/ChemUtils/test_list.py b/rdkit/Chem/ChemUtils/test_list.py index 8f7f2c75b..0730f217a 100644 --- a/rdkit/Chem/ChemUtils/test_list.py +++ b/rdkit/Chem/ChemUtils/test_list.py @@ -6,6 +6,7 @@ longTests = [] if __name__ == '__main__': import sys + from rdkit import TestRunner doLong = 0 if '-l' in sys.argv: diff --git a/rdkit/Chem/Crippen.py b/rdkit/Chem/Crippen.py index 31ba8c118..d740f26c3 100755 --- a/rdkit/Chem/Crippen.py +++ b/rdkit/Chem/Crippen.py @@ -18,11 +18,12 @@ """ import os -from rdkit import RDConfig -from rdkit import Chem -from rdkit.Chem import rdMolDescriptors + import numpy +from rdkit import Chem, RDConfig +from rdkit.Chem import rdMolDescriptors + _smartsPatterns = {} _patternOrder = [] # this is the file containing the atom contributions diff --git a/rdkit/Chem/DSViewer.py b/rdkit/Chem/DSViewer.py index 45cc2e58f..00c2db49c 100644 --- a/rdkit/Chem/DSViewer.py +++ b/rdkit/Chem/DSViewer.py @@ -11,9 +11,12 @@ """ uses DSViewer to interact with molecules """ -from rdkit import Chem +import os +import tempfile + from win32com.client import Dispatch -import tempfile, os + +from rdkit import Chem _nextDisplayId = 1 @@ -324,7 +327,7 @@ class MolViewer(object): self.doc.DoCommand('UnSelectAll') tmp = self.doc.DoCommand('SetProperty object RD_Visual=%d;object id="*":select=on' % o.id) tmp = self.doc.DoCommand('SelectByRadius inside %f atom' % distance) - # that selects all atoms in the radius, now we need to make sure + # that selects all atoms in the radius, now we need to make sure # only atoms in _inObj_ are selected: for obj in self.displayables.values(): if obj.id != p.id: diff --git a/rdkit/Chem/Descriptors.py b/rdkit/Chem/Descriptors.py index 239043189..7afd09e21 100755 --- a/rdkit/Chem/Descriptors.py +++ b/rdkit/Chem/Descriptors.py @@ -7,14 +7,16 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -from collections import abc # this won't work in python2, but we don't support that any more +from collections import \ + abc # this won't work in python2, but we don't support that any more -from rdkit import Chem -from rdkit.Chem import rdMolDescriptors as _rdMolDescriptors -from rdkit.Chem import rdPartialCharges, rdMolDescriptors import rdkit.Chem.ChemUtils.DescriptorUtilities as _du -from rdkit.Chem.EState.EState import (MaxEStateIndex, MinEStateIndex, MaxAbsEStateIndex, - MinAbsEStateIndex) +from rdkit import Chem +from rdkit.Chem import rdMolDescriptors +from rdkit.Chem import rdMolDescriptors as _rdMolDescriptors +from rdkit.Chem import rdPartialCharges +from rdkit.Chem.EState.EState import (MaxAbsEStateIndex, MaxEStateIndex, + MinAbsEStateIndex, MinEStateIndex) from rdkit.Chem.QED import qed @@ -28,7 +30,8 @@ _descList = [] def _setupDescriptors(namespace): global _descList, descList - from rdkit.Chem import GraphDescriptors, MolSurf, Lipinski, Fragments, Crippen, Descriptors3D + from rdkit.Chem import (Crippen, Descriptors3D, Fragments, GraphDescriptors, + Lipinski, MolSurf) from rdkit.Chem.EState import EState_VSA _descList.clear() @@ -267,8 +270,9 @@ class PropertyFunctor(rdMolDescriptors.PythonPropertyFunctor): def __call__(self, mol): raise NotImplementedError("Please implement the __call__ method") + def CalcMolDescriptors(mol, missingVal=None, silent=True): - ''' calculate the full set of descriptors for a molecule + ''' calculate the full set of descriptors for a molecule Parameters ---------- @@ -283,18 +287,18 @@ def CalcMolDescriptors(mol, missingVal=None, silent=True): dict A dictionary with decriptor names as keys and the descriptor values as values ''' - res = {} - for nm,fn in _descList: - # some of the descriptor fucntions can throw errors if they fail, catch those here: - try: - val = fn(mol) - except: - if not silent: - import traceback - traceback.print_exc() - val = missingVal - res[nm] = val - return res + res = {} + for nm, fn in _descList: + # some of the descriptor fucntions can throw errors if they fail, catch those here: + try: + val = fn(mol) + except: + if not silent: + import traceback + traceback.print_exc() + val = missingVal + res[nm] = val + return res # ------------------------------------ @@ -302,8 +306,8 @@ def CalcMolDescriptors(mol, missingVal=None, silent=True): # doctest boilerplate # def _runDoctests(verbose=None): # pragma: nocover - import sys import doctest + import sys failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) sys.exit(failed) diff --git a/rdkit/Chem/Descriptors3D.py b/rdkit/Chem/Descriptors3D.py index a296ebe84..479514980 100644 --- a/rdkit/Chem/Descriptors3D.py +++ b/rdkit/Chem/Descriptors3D.py @@ -13,7 +13,6 @@ from rdkit.Chem import rdMolDescriptors - if hasattr(rdMolDescriptors, 'CalcPMI1'): PMI1 = lambda *x, **y: rdMolDescriptors.CalcPMI1(*x, **y) PMI1.version = rdMolDescriptors._CalcPMI1_version diff --git a/rdkit/Chem/Draw/IPythonConsole.py b/rdkit/Chem/Draw/IPythonConsole.py index b2daa7810..bc26f60d9 100644 --- a/rdkit/Chem/Draw/IPythonConsole.py +++ b/rdkit/Chem/Draw/IPythonConsole.py @@ -8,15 +8,18 @@ # of the RDKit source tree. # import base64 -import html import copy +import html import warnings from io import BytesIO + import IPython from IPython.display import HTML, SVG + from rdkit import Chem from rdkit.Chem import Draw, rdchem, rdChemReactions from rdkit.Chem.Draw import rdMolDraw2D + from . import InteractiveRenderer if IPython.release.version < '0.11': @@ -121,8 +124,8 @@ def _toHTML(mol): content = InteractiveRenderer.generateHTMLBody(mol, molSize, legend=nm, useSVG=ipython_useSVG) else: if not ipython_useSVG: - png = Draw._moltoimg(mol, molSize, [], nm, returnPNG=True, - kekulize=kekulizeStructures, drawOptions=drawOptions) + png = Draw._moltoimg(mol, molSize, [], nm, returnPNG=True, kekulize=kekulizeStructures, + drawOptions=drawOptions) png = base64.b64encode(png) content = f'' else: diff --git a/rdkit/Chem/Draw/InteractiveRenderer.py b/rdkit/Chem/Draw/InteractiveRenderer.py index e6d14cb80..3fa1b5472 100644 --- a/rdkit/Chem/Draw/InteractiveRenderer.py +++ b/rdkit/Chem/Draw/InteractiveRenderer.py @@ -9,17 +9,20 @@ # of the RDKit source tree. # """ Interactive molecule rendering through rdkit-structure-renderer.js """ -from xml.dom import minidom -import uuid import base64 import json -import re import logging -from . import rdMolDraw2D +import re +import uuid +from xml.dom import minidom + from IPython.display import HTML, display + from rdkit import Chem from rdkit.Chem import Draw +from . import rdMolDraw2D + log = logging.getLogger(__name__) rdkitStructureRendererJsUrl = "https://unpkg.com/rdkit-structure-renderer/dist/rdkit-structure-renderer-module.js" @@ -82,8 +85,8 @@ def setEnabled(shouldEnable=True, quiet=False): def _wrapMsgIntoDiv(uuid, msg, quiet): return ('
{"" if quiet else msg}
') + 'class="lm-Widget p-Widget jp-RenderedText jp-mod-trusted jp-OutputArea-output"' + f'id="{uuid}">{"" if quiet else msg}') global _enabled_div_uuid loadingMsg = "Loading rdkit-structure-renderer.js..." @@ -98,8 +101,9 @@ def setEnabled(shouldEnable=True, quiet=False): if _enabled_div_uuid: return display(HTML(_wrapMsgIntoDiv(_enabled_div_uuid, renderingEnabledMsg, quiet))) _enabled_div_uuid = str(uuid.uuid1()) - return display(HTML(_wrapMsgIntoDiv(_enabled_div_uuid, loadingMsg, quiet) + -f"""