From a5bcf726e11bbfd17111483409fe7fc5fe0f9f4b Mon Sep 17 00:00:00 2001 From: Greg Landrum Date: Tue, 23 Sep 2025 18:56:03 +0200 Subject: [PATCH] allow custom shape points (#8799) * support providing custom features * expose that to python * switch to tuples (on get) and objects (on set) for attributes * allow aligning mols directly with ShapeInputOptions * changes for review * add warning --- External/pubchem_shape/PubChemShape.cpp | 83 +++++++++++++--- External/pubchem_shape/PubChemShape.hpp | 52 ++++++++++ External/pubchem_shape/Wrap/cshapealign.cpp | 97 +++++++++++++++++-- .../pubchem_shape/Wrap/test_rdshapealign.py | 42 ++++++++ External/pubchem_shape/test.cpp | 70 +++++++++++++ 5 files changed, 322 insertions(+), 22 deletions(-) diff --git a/External/pubchem_shape/PubChemShape.cpp b/External/pubchem_shape/PubChemShape.cpp index 40d4ac215..50c078e3a 100644 --- a/External/pubchem_shape/PubChemShape.cpp +++ b/External/pubchem_shape/PubChemShape.cpp @@ -368,6 +368,28 @@ void extractFeatureCoords( } } +void extractCustomFeatureCoords(const unsigned int nSelectedAtoms, + const ShapeInputOptions &shapeOpts, + const RDGeom::Point3D &ave, + unsigned int &numFeatures, ShapeInput &res, + std::vector &rad_vector) { + for (const auto &feature : shapeOpts.customFeatures) { + unsigned int feature_type = std::get<0>(feature); + RDGeom::Point3D floc = std::get<1>(feature); + double radius = std::get<2>(feature); + floc -= ave; + DEBUG_MSG("custom feature type " << feature_type << " (" << floc << ")"); + + auto array_idx = nSelectedAtoms + numFeatures; + res.coord[array_idx * 3] = floc.x; + res.coord[(array_idx * 3) + 1] = floc.y; + res.coord[(array_idx * 3) + 2] = floc.z; + rad_vector[array_idx] = radius; + res.atom_type_vector[array_idx] = feature_type; + ++numFeatures; + } +} + } // namespace // The conformer is left where it is, the shape is translated to the origin. ShapeInput PrepareConformer(const ROMol &mol, int confId, @@ -386,7 +408,12 @@ ShapeInput PrepareConformer(const ROMol &mol, int confId, // Start with the arrays as large as they will possibly have to be. // They will be re-sized later. - unsigned int nAlignmentAtoms = nAtoms + feature_idx_type.size(); + unsigned int nAlignmentAtoms = nAtoms; + if (shapeOpts.customFeatures.empty()) { + nAlignmentAtoms += feature_idx_type.size(); + } else { + nAlignmentAtoms += shapeOpts.customFeatures.size(); + } std::vector rad_vector(nAlignmentAtoms); res.atom_type_vector.resize(nAlignmentAtoms, 0); @@ -402,8 +429,13 @@ ShapeInput PrepareConformer(const ROMol &mol, int confId, extractAtomCoords(conformer, nAtoms, shapeOpts, ave, res.coord); unsigned int numFeatures = 0; - extractFeatureCoords(conformer, nAtoms, nSelectedAtoms, feature_idx_type, - shapeOpts, ave, numFeatures, res, rad_vector); + if (shapeOpts.customFeatures.empty()) { + extractFeatureCoords(conformer, nAtoms, nSelectedAtoms, feature_idx_type, + shapeOpts, ave, numFeatures, res, rad_vector); + } else if (shapeOpts.useColors) { + extractCustomFeatureCoords(nSelectedAtoms, shapeOpts, ave, numFeatures, res, + rad_vector); + } // Now cut the final vectors down to the actual number of atoms and // features used. @@ -511,14 +543,12 @@ void TransformConformer(const std::vector &finalTrans, std::pair AlignMolecule( const ShapeInput &refShape, ROMol &fit, std::vector &matrix, - int fitConfId, bool useColors, double opt_param, unsigned int max_preiters, - unsigned int max_postiters, bool applyRefShift) { + const ShapeInputOptions &shapeOpts, int fitConfId, double opt_param, + unsigned int max_preiters, unsigned int max_postiters, bool applyRefShift) { PRECONDITION(matrix.size() == 12, "bad matrix size"); Align3D::setUseCutOff(true); DEBUG_MSG("Fit details:"); - ShapeInputOptions shapeOpts; - shapeOpts.useColors = useColors; auto fitShape = PrepareConformer(fit, fitConfId, shapeOpts); auto tanis = AlignShape(refShape, fitShape, matrix, opt_param, max_preiters, max_postiters); @@ -536,6 +566,38 @@ std::pair AlignMolecule( return tanis; } +std::pair AlignMolecule( + const ShapeInput &refShape, ROMol &fit, std::vector &matrix, + int fitConfId, bool useColors, double opt_param, unsigned int max_preiters, + unsigned int max_postiters, bool applyRefShift) { + ShapeInputOptions shapeOpts; + shapeOpts.useColors = useColors; + return AlignMolecule(refShape, fit, matrix, shapeOpts, fitConfId, opt_param, + max_preiters, max_postiters, applyRefShift); +} + +std::pair AlignMolecule( + const ROMol &ref, ROMol &fit, std::vector &matrix, + const ShapeInputOptions &refShapeOpts, + const ShapeInputOptions &probeShapeOpts, int refConfId, int fitConfId, + double opt_param, unsigned int max_preiters, unsigned int max_postiters) { + Align3D::setUseCutOff(true); + + if (refShapeOpts.useColors != probeShapeOpts.useColors) { + BOOST_LOG(rdWarningLog) + << "useColor values inconsistent between the reference and fit molecules. Colors will not be used in the alignment." + << std::endl; + } + + DEBUG_MSG("Reference details:"); + auto refShape = PrepareConformer(ref, refConfId, refShapeOpts); + bool applyRefShift = true; + auto scores = + AlignMolecule(refShape, fit, matrix, probeShapeOpts, fitConfId, opt_param, + max_preiters, max_postiters, applyRefShift); + return scores; +} + std::pair AlignMolecule(const ROMol &ref, ROMol &fit, std::vector &matrix, int refConfId, int fitConfId, @@ -547,9 +609,6 @@ std::pair AlignMolecule(const ROMol &ref, ROMol &fit, DEBUG_MSG("Reference details:"); ShapeInputOptions shapeOpts; shapeOpts.useColors = useColors; - auto refShape = PrepareConformer(ref, refConfId, shapeOpts); - - auto scores = AlignMolecule(refShape, fit, matrix, fitConfId, useColors, - opt_param, max_preiters, max_postiters, true); - return scores; + return AlignMolecule(ref, fit, matrix, shapeOpts, shapeOpts, refConfId, + fitConfId, opt_param, max_preiters, max_postiters); } diff --git a/External/pubchem_shape/PubChemShape.hpp b/External/pubchem_shape/PubChemShape.hpp index 1b093ce22..62f9fff50 100644 --- a/External/pubchem_shape/PubChemShape.hpp +++ b/External/pubchem_shape/PubChemShape.hpp @@ -84,6 +84,10 @@ struct RDKIT_PUBCHEMSHAPE_EXPORT ShapeInputOptions { atomRadii; // Use these non-standard radii for these atoms. // The int is for the atom index in the molecule, not // the atomic number. + std::vector> + customFeatures; // use these feature definitions instead of the defaults. + // Each feature is defined by a tuple of: + // (feature type, position, radius) }; //! Prepare the input for the shape comparison @@ -152,6 +156,54 @@ RDKIT_PUBCHEMSHAPE_EXPORT std::pair AlignMolecule( unsigned int max_preiters = 10u, unsigned int max_postiters = 30u, bool applyRefShift = false); +//! Align a molecule to a reference shape +/*! + \param refShape the reference shape + \param fit the molecule to align + \param matrix the transformation matrix (populated on return) + \param shapeOpts options for constructing the shape for the fit molecule + \param fitConfId (optional) the conformer to use for the fit molecule + \param opt_param (optional) the optimization parameter + \param max_preiters (optional) the max number of pre-optimization iterations + \param max_postiters (optional) the max number of post-optimization iterations + \param applyRefShift (optional) if true, apply the reference shape's shift + translation to the final coordinates + + \return a pair of the shape Tanimoto value and the color Tanimoto value (zero + if useColors is false) +*/ +RDKIT_PUBCHEMSHAPE_EXPORT std::pair AlignMolecule( + const ShapeInput &refShape, RDKit::ROMol &fit, std::vector &matrix, + const ShapeInputOptions &shapeOpts, int fitConfId = -1, + double opt_param = 1.0, unsigned int max_preiters = 10u, + unsigned int max_postiters = 30u, bool applyRefShift = false); + +//! Align a molecule to a reference molecule +/*! + \param ref the reference molecule + \param fit the molecule to align + \param matrix the transformation matrix (populated on return) + \param refShapeOpts options for constructing the shape for the ref molecule + \param fitShapeOpts options for constructing the shape for the fit molecule + \param refConfId (optional) the conformer to use for the reference + molecule + \param fitConfId (optional) the conformer to use for the fit + molecule + \param opt_param (optional) the optimization parameter + \param max_preiters (optional) the max number of pre-optimization iterations + \param max_postiters (optional) the max number of post-optimization + iterations + + \return a pair of the shape Tanimoto value and the color Tanimoto value (zero + if useColors is false) +*/ +RDKIT_PUBCHEMSHAPE_EXPORT std::pair AlignMolecule( + const RDKit::ROMol &ref, RDKit::ROMol &fit, std::vector &matrix, + const ShapeInputOptions &refShapeOpts, + const ShapeInputOptions &probeShapeOpts, int refConfId = -1, + int fitConfId = -1, double opt_param = 1.0, unsigned int max_preiters = 10u, + unsigned int max_postiters = 30u); + //! Align a molecule to a reference molecule /*! \param ref the reference molecule diff --git a/External/pubchem_shape/Wrap/cshapealign.cpp b/External/pubchem_shape/Wrap/cshapealign.cpp index 3e8a7d0c5..7209be2e8 100644 --- a/External/pubchem_shape/Wrap/cshapealign.cpp +++ b/External/pubchem_shape/Wrap/cshapealign.cpp @@ -43,6 +43,17 @@ python::tuple alignMol(const RDKit::ROMol &ref, RDKit::ROMol &probe, opt_param, max_preiters, max_postiters); return python::make_tuple(nbr_st, nbr_ct); } +python::tuple alignMol3(const RDKit::ROMol &ref, RDKit::ROMol &probe, + const ShapeInputOptions &refShapeOpts, + const ShapeInputOptions &probeShapeOpts, int refConfId, + int probeConfId, double opt_param, + unsigned int max_preiters, unsigned int max_postiters) { + std::vector matrix(12, 0.0); + auto [nbr_st, nbr_ct] = + AlignMolecule(ref, probe, matrix, refShapeOpts, probeShapeOpts, refConfId, + probeConfId, opt_param, max_preiters, max_postiters); + return python::make_tuple(nbr_st, nbr_ct); +} python::tuple alignMol2(const ShapeInput &ref, RDKit::ROMol &probe, int probeConfId, bool useColors, double opt_param, unsigned int max_preiters, unsigned int max_postiters, @@ -92,31 +103,31 @@ ShapeInput *prepConf(const RDKit::ROMol &mol, int confId, } return new ShapeInput(PrepareConformer(mol, confId, opts)); } -void set_atomSubset(ShapeInputOptions &opts, const python::list &as) { +void set_atomSubset(ShapeInputOptions &opts, const python::object &as) { pythonObjectToVect(as, opts.atomSubset); } -python::list get_atomSubset(const ShapeInputOptions &opts) { +python::tuple get_atomSubset(const ShapeInputOptions &opts) { python::list py_list; for (const auto &val : opts.atomSubset) { py_list.append(val); } - return py_list; + return python::tuple(py_list); } -void set_notColorAtoms(ShapeInputOptions &opts, const python::list &nca) { +void set_notColorAtoms(ShapeInputOptions &opts, const python::object &nca) { pythonObjectToVect(nca, opts.notColorAtoms); } -python::list get_notColorAtoms(const ShapeInputOptions &opts) { +python::tuple get_notColorAtoms(const ShapeInputOptions &opts) { python::list py_list; for (const auto &val : opts.notColorAtoms) { py_list.append(val); } - return py_list; + return python::tuple(py_list); } -void set_atomRadii(ShapeInputOptions &opts, const python::list &ar) { +void set_atomRadii(ShapeInputOptions &opts, const python::object &ar) { int len = python::len(ar); opts.atomRadii.resize(len); for (int i = 0; i < len; i++) { @@ -126,15 +137,15 @@ void set_atomRadii(ShapeInputOptions &opts, const python::list &ar) { } } -python::list get_atomRadii(const ShapeInputOptions &opts) { +python::tuple get_atomRadii(const ShapeInputOptions &opts) { python::list py_list; for (const auto &val : opts.atomRadii) { py_list.append(python::make_tuple(static_cast(val.first), val.second)); } - return py_list; + return python::tuple(py_list); } -void set_shapeShift(ShapeInput &shp, const python::list &s) { +void set_shapeShift(ShapeInput &shp, const python::object &s) { pythonObjectToVect(s, shp.shift); } python::list get_shapeShift(const ShapeInput &shp) { @@ -144,6 +155,31 @@ python::list get_shapeShift(const ShapeInput &shp) { } return py_list; } + +void set_customFeatures(ShapeInputOptions &shp, const python::object &s) { + shp.customFeatures.clear(); + auto len = python::len(s); + shp.customFeatures.reserve(len); + for (auto i = 0u; i < len; ++i) { + const auto elem = s[i]; + unsigned int featType = python::extract(elem[0]); + RDGeom::Point3D pos = python::extract(elem[1]); + double radius = python::extract(elem[2]); + shp.customFeatures.emplace_back(featType, pos, radius); + } +} +python::tuple get_customFeatures(const ShapeInputOptions &shp) { + python::list py_list; + for (const auto &val : shp.customFeatures) { + python::list elem; + elem.append(static_cast(std::get<0>(val))); + elem.append(std::get<1>(val)); + elem.append(std::get<2>(val)); + py_list.append(python::tuple(elem)); + } + return python::tuple(py_list); +} + } // namespace helpers void wrap_pubchemshape() { @@ -174,6 +210,9 @@ void wrap_pubchemshape() { "atomRadii", &helpers::get_atomRadii, &helpers::set_atomRadii, "Non-standard radii to use for the atoms specified by their indices" " in the molecule. A list of tuples of [int, float].") + .add_property("customFeatures", &helpers::get_customFeatures, + &helpers::set_customFeatures, + "Custom features for the shape.") .def("__setattr__", &safeSetattr); python::def( @@ -206,6 +245,44 @@ max_postiters : int, optional only the best poses from the first phase +Returns +------- + 2-tuple of doubles + The results are (shape_score, color_score) + The color_score is zero if useColors is False)DOC"); + + python::def( + "AlignMol", &helpers::alignMol3, + (python::arg("ref"), python::arg("probe"), python::arg("refShapeOpts"), + python::arg("probeShapeOpts"), python::arg("refConfId") = -1, + python::arg("probeConfId") = -1, python::arg("opt_param") = 1.0, + python::arg("max_preiters") = 10, python::arg("max_postiters") = 30), + R"DOC(Aligns a probe molecule to a reference molecule. The probe is modified. + +Parameters +---------- +ref : RDKit.ROMol + Reference molecule +probe : RDKit.ROMol + Probe molecule +refShapeOpts : ShapeInputOptions + Options for constructing the shape for the reference molecule +probeShapeOpts : ShapeInputOptions + Options for constructing the shape for the probe molecule +refConfId : int, optional + Reference conformer ID (default is -1) +probeConfId : int, optional + Probe conformer ID (default is -1) +opt_param : float, optional + Balance of shape and color for optimization. + 0 is only color, 0.5 is equal weight, and 1.0 is only shape +max_preiters : int, optional + In the two phase optimization, the maximum iterations done on all poses. +max_postiters : int, optional + In the two phase optimization, the maximum iterations during the second phase on + only the best poses from the first phase + + Returns ------- 2-tuple of doubles diff --git a/External/pubchem_shape/Wrap/test_rdshapealign.py b/External/pubchem_shape/Wrap/test_rdshapealign.py index 0e1137ead..c705bad39 100644 --- a/External/pubchem_shape/Wrap/test_rdshapealign.py +++ b/External/pubchem_shape/Wrap/test_rdshapealign.py @@ -2,6 +2,7 @@ import unittest from rdkit import Chem from rdkit.Chem import rdShapeAlign from rdkit import RDConfig +from rdkit.Geometry import Point3D datadir = RDConfig.RDBaseDir + '/External/pubchem_shape/test_data' @@ -76,5 +77,46 @@ class TestCase(unittest.TestCase): shp = rdShapeAlign.PrepareConformer(m1, -1, opts) self.assertAlmostEqual(shp.sof, 5.074, places=3) + def test7_customFeatures(self): + m1 = Chem.MolFromSmiles( + "O=CC=O |(-1.75978,0.148897,0;-0.621382,-0.394324,0;0.624061,0.3656,.1;1.7571,-0.120174,.1)|") + opts = rdShapeAlign.ShapeInputOptions() + opts.customFeatures = ((1, Point3D(-1.75978, 0.148897, + 0), 1.0), (2, Point3D(1.7571, -0.120174, 0.1), 1.0)) + shp = rdShapeAlign.PrepareConformer(m1, -1, opts) + self.assertEqual(len(shp.coord), 18) + m2 = Chem.Mol(m1) + opts2 = rdShapeAlign.ShapeInputOptions() + opts2.customFeatures = ((2, Point3D(-1.75978, 0.148897, + 0), 1.0), (1, Point3D(1.7571, -0.120174, 0.1), 1.0)) + shp2 = rdShapeAlign.PrepareConformer(m2, -1, opts2) + tpl = rdShapeAlign.AlignShapes(shp, shp2, opt_param=0.5) + self.assertAlmostEqual(tpl[0], 0.997, places=3) + self.assertAlmostEqual(tpl[1], 0.978, places=3) + tf = tpl[2] + self.assertGreater(0.0, tf[0]) + self.assertLess(0.0, tf[3 * 3]) + + # check the getter: + cfs = opts2.customFeatures + self.assertEqual(len(cfs), 2) + self.assertEqual(cfs[0][0], 2) + self.assertEqual(cfs[1][0], 1) + + def test8_customFeatures(self): + m1 = Chem.MolFromSmiles( + "O=CC=O |(-1.75978,0.148897,0;-0.621382,-0.394324,0;0.624061,0.3656,.1;1.7571,-0.120174,.1)|") + opts = rdShapeAlign.ShapeInputOptions() + opts.customFeatures = ((1, Point3D(-1.75978, 0.148897, + 0), 1.0), (2, Point3D(1.7571, -0.120174, 0.1), 1.0)) + m2 = Chem.Mol(m1) + opts2 = rdShapeAlign.ShapeInputOptions() + opts2.customFeatures = ((2, Point3D(-1.75978, 0.148897, + 0), 1.0), (1, Point3D(1.7571, -0.120174, 0.1), 1.0)) + tpl = rdShapeAlign.AlignMol(m1, m2, opts, opts2, opt_param=0.5) + self.assertAlmostEqual(tpl[0], 0.997, places=3) + self.assertAlmostEqual(tpl[1], 0.978, places=3) + + if __name__ == '__main__': unittest.main() diff --git a/External/pubchem_shape/test.cpp b/External/pubchem_shape/test.cpp index bc4d9c186..59416f9e7 100644 --- a/External/pubchem_shape/test.cpp +++ b/External/pubchem_shape/test.cpp @@ -523,3 +523,73 @@ TEST_CASE("Hs not properly transformed when hcount = feature count") { } } } + +TEST_CASE("custom feature points") { + auto m1 = + "O=CC=O |(-1.75978,0.148897,0;-0.621382,-0.394324,0;0.624061,0.3656,.1;1.7571,-0.120174,.1)|"_smiles; + SECTION("using shapes") { + auto shape1 = PrepareConformer(*m1, -1); + // each carbonyl O gets one feature: + CHECK(shape1.coord.size() == 18); + ShapeInputOptions opts2; + opts2.customFeatures = {{1, RDGeom::Point3D(-1.75978, 0.148897, 0), 1.0}, + {2, RDGeom::Point3D(1.7571, -0.120174, 0.1), 1.0}}; + auto shape2 = PrepareConformer(*m1, -1, opts2); + CHECK(shape2.coord.size() == 18); + + { + // confirm that we don't add the features if useColors is false + ShapeInputOptions topts; + topts.customFeatures = { + {1, RDGeom::Point3D(-1.75978, 0.148897, 0), 1.0}, + {2, RDGeom::Point3D(1.7571, -0.120174, 0.1), 1.0}}; + topts.useColors = false; + auto tshape = PrepareConformer(*m1, -1, topts); + CHECK(tshape.coord.size() == 12); + } + + // we'll swap the features on the second shape so that the alignment has to + // be inverted + ShapeInputOptions opts3; + opts3.customFeatures = {{2, RDGeom::Point3D(-1.75978, 0.148897, 0), 1.0}, + {1, RDGeom::Point3D(1.7571, -0.120174, 0.1), 1.0}}; + + auto m2 = ROMol(*m1); + auto shape3 = PrepareConformer(m2, -1, opts3); + CHECK(shape3.coord.size() == 18); + double opt_param = 0.5; + std::vector matrix(12, 0.0); + auto [st, ct] = AlignShape(shape2, shape3, matrix, opt_param); + CHECK_THAT(st, Catch::Matchers::WithinAbs(0.997, 0.001)); + CHECK_THAT(ct, Catch::Matchers::WithinAbs(0.978, 0.001)); + CHECK(shape3.coord[0] > 0); // x coord of first atom + CHECK(shape3.coord[3 * 3] < 0); // x coord of fourth atom + + auto conf = m2.getConformer(-1); + TransformConformer(shape2.shift, matrix, shape3, conf); + CHECK(conf.getAtomPos(0).x > 0); + CHECK(conf.getAtomPos(3).x < 0); + } + SECTION("using molecules") { + ShapeInputOptions opts2; + opts2.customFeatures = {{1, RDGeom::Point3D(-1.75978, 0.148897, 0), 1.0}, + {2, RDGeom::Point3D(1.7571, -0.120174, 0.1), 1.0}}; + + auto m2 = ROMol(*m1); + // we'll swap the features on the second shape so that the alignment has to + // be inverted + ShapeInputOptions opts3; + opts3.customFeatures = {{2, RDGeom::Point3D(-1.75978, 0.148897, 0), 1.0}, + {1, RDGeom::Point3D(1.7571, -0.120174, 0.1), 1.0}}; + + double opt_param = 0.5; + std::vector matrix(12, 0.0); + auto [st, ct] = + AlignMolecule(*m1, m2, matrix, opts2, opts3, -2, -2, opt_param); + CHECK_THAT(st, Catch::Matchers::WithinAbs(0.997, 0.001)); + CHECK_THAT(ct, Catch::Matchers::WithinAbs(0.978, 0.001)); + auto conf = m2.getConformer(-1); + CHECK(conf.getAtomPos(0).x > 0); + CHECK(conf.getAtomPos(3).x < 0); + } +}