allow custom shape points (#8799)

* support providing custom features

* expose that to python

* switch to tuples (on get) and objects (on set) for attributes

* allow aligning mols directly with ShapeInputOptions

* changes for review

* add warning
This commit is contained in:
Greg Landrum
2025-09-23 18:56:03 +02:00
committed by GitHub
parent 805294c27f
commit a5bcf726e1
5 changed files with 322 additions and 22 deletions

View File

@@ -368,6 +368,28 @@ void extractFeatureCoords(
}
}
void extractCustomFeatureCoords(const unsigned int nSelectedAtoms,
const ShapeInputOptions &shapeOpts,
const RDGeom::Point3D &ave,
unsigned int &numFeatures, ShapeInput &res,
std::vector<double> &rad_vector) {
for (const auto &feature : shapeOpts.customFeatures) {
unsigned int feature_type = std::get<0>(feature);
RDGeom::Point3D floc = std::get<1>(feature);
double radius = std::get<2>(feature);
floc -= ave;
DEBUG_MSG("custom feature type " << feature_type << " (" << floc << ")");
auto array_idx = nSelectedAtoms + numFeatures;
res.coord[array_idx * 3] = floc.x;
res.coord[(array_idx * 3) + 1] = floc.y;
res.coord[(array_idx * 3) + 2] = floc.z;
rad_vector[array_idx] = radius;
res.atom_type_vector[array_idx] = feature_type;
++numFeatures;
}
}
} // namespace
// The conformer is left where it is, the shape is translated to the origin.
ShapeInput PrepareConformer(const ROMol &mol, int confId,
@@ -386,7 +408,12 @@ ShapeInput PrepareConformer(const ROMol &mol, int confId,
// Start with the arrays as large as they will possibly have to be.
// They will be re-sized later.
unsigned int nAlignmentAtoms = nAtoms + feature_idx_type.size();
unsigned int nAlignmentAtoms = nAtoms;
if (shapeOpts.customFeatures.empty()) {
nAlignmentAtoms += feature_idx_type.size();
} else {
nAlignmentAtoms += shapeOpts.customFeatures.size();
}
std::vector<double> rad_vector(nAlignmentAtoms);
res.atom_type_vector.resize(nAlignmentAtoms, 0);
@@ -402,8 +429,13 @@ ShapeInput PrepareConformer(const ROMol &mol, int confId,
extractAtomCoords(conformer, nAtoms, shapeOpts, ave, res.coord);
unsigned int numFeatures = 0;
extractFeatureCoords(conformer, nAtoms, nSelectedAtoms, feature_idx_type,
shapeOpts, ave, numFeatures, res, rad_vector);
if (shapeOpts.customFeatures.empty()) {
extractFeatureCoords(conformer, nAtoms, nSelectedAtoms, feature_idx_type,
shapeOpts, ave, numFeatures, res, rad_vector);
} else if (shapeOpts.useColors) {
extractCustomFeatureCoords(nSelectedAtoms, shapeOpts, ave, numFeatures, res,
rad_vector);
}
// Now cut the final vectors down to the actual number of atoms and
// features used.
@@ -511,14 +543,12 @@ void TransformConformer(const std::vector<double> &finalTrans,
std::pair<double, double> AlignMolecule(
const ShapeInput &refShape, ROMol &fit, std::vector<float> &matrix,
int fitConfId, bool useColors, double opt_param, unsigned int max_preiters,
unsigned int max_postiters, bool applyRefShift) {
const ShapeInputOptions &shapeOpts, int fitConfId, double opt_param,
unsigned int max_preiters, unsigned int max_postiters, bool applyRefShift) {
PRECONDITION(matrix.size() == 12, "bad matrix size");
Align3D::setUseCutOff(true);
DEBUG_MSG("Fit details:");
ShapeInputOptions shapeOpts;
shapeOpts.useColors = useColors;
auto fitShape = PrepareConformer(fit, fitConfId, shapeOpts);
auto tanis = AlignShape(refShape, fitShape, matrix, opt_param, max_preiters,
max_postiters);
@@ -536,6 +566,38 @@ std::pair<double, double> AlignMolecule(
return tanis;
}
std::pair<double, double> AlignMolecule(
const ShapeInput &refShape, ROMol &fit, std::vector<float> &matrix,
int fitConfId, bool useColors, double opt_param, unsigned int max_preiters,
unsigned int max_postiters, bool applyRefShift) {
ShapeInputOptions shapeOpts;
shapeOpts.useColors = useColors;
return AlignMolecule(refShape, fit, matrix, shapeOpts, fitConfId, opt_param,
max_preiters, max_postiters, applyRefShift);
}
std::pair<double, double> AlignMolecule(
const ROMol &ref, ROMol &fit, std::vector<float> &matrix,
const ShapeInputOptions &refShapeOpts,
const ShapeInputOptions &probeShapeOpts, int refConfId, int fitConfId,
double opt_param, unsigned int max_preiters, unsigned int max_postiters) {
Align3D::setUseCutOff(true);
if (refShapeOpts.useColors != probeShapeOpts.useColors) {
BOOST_LOG(rdWarningLog)
<< "useColor values inconsistent between the reference and fit molecules. Colors will not be used in the alignment."
<< std::endl;
}
DEBUG_MSG("Reference details:");
auto refShape = PrepareConformer(ref, refConfId, refShapeOpts);
bool applyRefShift = true;
auto scores =
AlignMolecule(refShape, fit, matrix, probeShapeOpts, fitConfId, opt_param,
max_preiters, max_postiters, applyRefShift);
return scores;
}
std::pair<double, double> AlignMolecule(const ROMol &ref, ROMol &fit,
std::vector<float> &matrix,
int refConfId, int fitConfId,
@@ -547,9 +609,6 @@ std::pair<double, double> AlignMolecule(const ROMol &ref, ROMol &fit,
DEBUG_MSG("Reference details:");
ShapeInputOptions shapeOpts;
shapeOpts.useColors = useColors;
auto refShape = PrepareConformer(ref, refConfId, shapeOpts);
auto scores = AlignMolecule(refShape, fit, matrix, fitConfId, useColors,
opt_param, max_preiters, max_postiters, true);
return scores;
return AlignMolecule(ref, fit, matrix, shapeOpts, shapeOpts, refConfId,
fitConfId, opt_param, max_preiters, max_postiters);
}

View File

@@ -84,6 +84,10 @@ struct RDKIT_PUBCHEMSHAPE_EXPORT ShapeInputOptions {
atomRadii; // Use these non-standard radii for these atoms.
// The int is for the atom index in the molecule, not
// the atomic number.
std::vector<std::tuple<unsigned int, RDGeom::Point3D, double>>
customFeatures; // use these feature definitions instead of the defaults.
// Each feature is defined by a tuple of:
// (feature type, position, radius)
};
//! Prepare the input for the shape comparison
@@ -152,6 +156,54 @@ RDKIT_PUBCHEMSHAPE_EXPORT std::pair<double, double> AlignMolecule(
unsigned int max_preiters = 10u, unsigned int max_postiters = 30u,
bool applyRefShift = false);
//! Align a molecule to a reference shape
/*!
\param refShape the reference shape
\param fit the molecule to align
\param matrix the transformation matrix (populated on return)
\param shapeOpts options for constructing the shape for the fit molecule
\param fitConfId (optional) the conformer to use for the fit molecule
\param opt_param (optional) the optimization parameter
\param max_preiters (optional) the max number of pre-optimization iterations
\param max_postiters (optional) the max number of post-optimization iterations
\param applyRefShift (optional) if true, apply the reference shape's shift
translation to the final coordinates
\return a pair of the shape Tanimoto value and the color Tanimoto value (zero
if useColors is false)
*/
RDKIT_PUBCHEMSHAPE_EXPORT std::pair<double, double> AlignMolecule(
const ShapeInput &refShape, RDKit::ROMol &fit, std::vector<float> &matrix,
const ShapeInputOptions &shapeOpts, int fitConfId = -1,
double opt_param = 1.0, unsigned int max_preiters = 10u,
unsigned int max_postiters = 30u, bool applyRefShift = false);
//! Align a molecule to a reference molecule
/*!
\param ref the reference molecule
\param fit the molecule to align
\param matrix the transformation matrix (populated on return)
\param refShapeOpts options for constructing the shape for the ref molecule
\param fitShapeOpts options for constructing the shape for the fit molecule
\param refConfId (optional) the conformer to use for the reference
molecule
\param fitConfId (optional) the conformer to use for the fit
molecule
\param opt_param (optional) the optimization parameter
\param max_preiters (optional) the max number of pre-optimization iterations
\param max_postiters (optional) the max number of post-optimization
iterations
\return a pair of the shape Tanimoto value and the color Tanimoto value (zero
if useColors is false)
*/
RDKIT_PUBCHEMSHAPE_EXPORT std::pair<double, double> AlignMolecule(
const RDKit::ROMol &ref, RDKit::ROMol &fit, std::vector<float> &matrix,
const ShapeInputOptions &refShapeOpts,
const ShapeInputOptions &probeShapeOpts, int refConfId = -1,
int fitConfId = -1, double opt_param = 1.0, unsigned int max_preiters = 10u,
unsigned int max_postiters = 30u);
//! Align a molecule to a reference molecule
/*!
\param ref the reference molecule

View File

@@ -43,6 +43,17 @@ python::tuple alignMol(const RDKit::ROMol &ref, RDKit::ROMol &probe,
opt_param, max_preiters, max_postiters);
return python::make_tuple(nbr_st, nbr_ct);
}
python::tuple alignMol3(const RDKit::ROMol &ref, RDKit::ROMol &probe,
const ShapeInputOptions &refShapeOpts,
const ShapeInputOptions &probeShapeOpts, int refConfId,
int probeConfId, double opt_param,
unsigned int max_preiters, unsigned int max_postiters) {
std::vector<float> matrix(12, 0.0);
auto [nbr_st, nbr_ct] =
AlignMolecule(ref, probe, matrix, refShapeOpts, probeShapeOpts, refConfId,
probeConfId, opt_param, max_preiters, max_postiters);
return python::make_tuple(nbr_st, nbr_ct);
}
python::tuple alignMol2(const ShapeInput &ref, RDKit::ROMol &probe,
int probeConfId, bool useColors, double opt_param,
unsigned int max_preiters, unsigned int max_postiters,
@@ -92,31 +103,31 @@ ShapeInput *prepConf(const RDKit::ROMol &mol, int confId,
}
return new ShapeInput(PrepareConformer(mol, confId, opts));
}
void set_atomSubset(ShapeInputOptions &opts, const python::list &as) {
void set_atomSubset(ShapeInputOptions &opts, const python::object &as) {
pythonObjectToVect<unsigned int>(as, opts.atomSubset);
}
python::list get_atomSubset(const ShapeInputOptions &opts) {
python::tuple get_atomSubset(const ShapeInputOptions &opts) {
python::list py_list;
for (const auto &val : opts.atomSubset) {
py_list.append(val);
}
return py_list;
return python::tuple(py_list);
}
void set_notColorAtoms(ShapeInputOptions &opts, const python::list &nca) {
void set_notColorAtoms(ShapeInputOptions &opts, const python::object &nca) {
pythonObjectToVect<unsigned int>(nca, opts.notColorAtoms);
}
python::list get_notColorAtoms(const ShapeInputOptions &opts) {
python::tuple get_notColorAtoms(const ShapeInputOptions &opts) {
python::list py_list;
for (const auto &val : opts.notColorAtoms) {
py_list.append(val);
}
return py_list;
return python::tuple(py_list);
}
void set_atomRadii(ShapeInputOptions &opts, const python::list &ar) {
void set_atomRadii(ShapeInputOptions &opts, const python::object &ar) {
int len = python::len(ar);
opts.atomRadii.resize(len);
for (int i = 0; i < len; i++) {
@@ -126,15 +137,15 @@ void set_atomRadii(ShapeInputOptions &opts, const python::list &ar) {
}
}
python::list get_atomRadii(const ShapeInputOptions &opts) {
python::tuple get_atomRadii(const ShapeInputOptions &opts) {
python::list py_list;
for (const auto &val : opts.atomRadii) {
py_list.append(python::make_tuple(static_cast<int>(val.first), val.second));
}
return py_list;
return python::tuple(py_list);
}
void set_shapeShift(ShapeInput &shp, const python::list &s) {
void set_shapeShift(ShapeInput &shp, const python::object &s) {
pythonObjectToVect<double>(s, shp.shift);
}
python::list get_shapeShift(const ShapeInput &shp) {
@@ -144,6 +155,31 @@ python::list get_shapeShift(const ShapeInput &shp) {
}
return py_list;
}
void set_customFeatures(ShapeInputOptions &shp, const python::object &s) {
shp.customFeatures.clear();
auto len = python::len(s);
shp.customFeatures.reserve(len);
for (auto i = 0u; i < len; ++i) {
const auto elem = s[i];
unsigned int featType = python::extract<unsigned int>(elem[0]);
RDGeom::Point3D pos = python::extract<RDGeom::Point3D>(elem[1]);
double radius = python::extract<double>(elem[2]);
shp.customFeatures.emplace_back(featType, pos, radius);
}
}
python::tuple get_customFeatures(const ShapeInputOptions &shp) {
python::list py_list;
for (const auto &val : shp.customFeatures) {
python::list elem;
elem.append(static_cast<int>(std::get<0>(val)));
elem.append(std::get<1>(val));
elem.append(std::get<2>(val));
py_list.append(python::tuple(elem));
}
return python::tuple(py_list);
}
} // namespace helpers
void wrap_pubchemshape() {
@@ -174,6 +210,9 @@ void wrap_pubchemshape() {
"atomRadii", &helpers::get_atomRadii, &helpers::set_atomRadii,
"Non-standard radii to use for the atoms specified by their indices"
" in the molecule. A list of tuples of [int, float].")
.add_property("customFeatures", &helpers::get_customFeatures,
&helpers::set_customFeatures,
"Custom features for the shape.")
.def("__setattr__", &safeSetattr);
python::def(
@@ -206,6 +245,44 @@ max_postiters : int, optional
only the best poses from the first phase
Returns
-------
2-tuple of doubles
The results are (shape_score, color_score)
The color_score is zero if useColors is False)DOC");
python::def(
"AlignMol", &helpers::alignMol3,
(python::arg("ref"), python::arg("probe"), python::arg("refShapeOpts"),
python::arg("probeShapeOpts"), python::arg("refConfId") = -1,
python::arg("probeConfId") = -1, python::arg("opt_param") = 1.0,
python::arg("max_preiters") = 10, python::arg("max_postiters") = 30),
R"DOC(Aligns a probe molecule to a reference molecule. The probe is modified.
Parameters
----------
ref : RDKit.ROMol
Reference molecule
probe : RDKit.ROMol
Probe molecule
refShapeOpts : ShapeInputOptions
Options for constructing the shape for the reference molecule
probeShapeOpts : ShapeInputOptions
Options for constructing the shape for the probe molecule
refConfId : int, optional
Reference conformer ID (default is -1)
probeConfId : int, optional
Probe conformer ID (default is -1)
opt_param : float, optional
Balance of shape and color for optimization.
0 is only color, 0.5 is equal weight, and 1.0 is only shape
max_preiters : int, optional
In the two phase optimization, the maximum iterations done on all poses.
max_postiters : int, optional
In the two phase optimization, the maximum iterations during the second phase on
only the best poses from the first phase
Returns
-------
2-tuple of doubles

View File

@@ -2,6 +2,7 @@ import unittest
from rdkit import Chem
from rdkit.Chem import rdShapeAlign
from rdkit import RDConfig
from rdkit.Geometry import Point3D
datadir = RDConfig.RDBaseDir + '/External/pubchem_shape/test_data'
@@ -76,5 +77,46 @@ class TestCase(unittest.TestCase):
shp = rdShapeAlign.PrepareConformer(m1, -1, opts)
self.assertAlmostEqual(shp.sof, 5.074, places=3)
def test7_customFeatures(self):
m1 = Chem.MolFromSmiles(
"O=CC=O |(-1.75978,0.148897,0;-0.621382,-0.394324,0;0.624061,0.3656,.1;1.7571,-0.120174,.1)|")
opts = rdShapeAlign.ShapeInputOptions()
opts.customFeatures = ((1, Point3D(-1.75978, 0.148897,
0), 1.0), (2, Point3D(1.7571, -0.120174, 0.1), 1.0))
shp = rdShapeAlign.PrepareConformer(m1, -1, opts)
self.assertEqual(len(shp.coord), 18)
m2 = Chem.Mol(m1)
opts2 = rdShapeAlign.ShapeInputOptions()
opts2.customFeatures = ((2, Point3D(-1.75978, 0.148897,
0), 1.0), (1, Point3D(1.7571, -0.120174, 0.1), 1.0))
shp2 = rdShapeAlign.PrepareConformer(m2, -1, opts2)
tpl = rdShapeAlign.AlignShapes(shp, shp2, opt_param=0.5)
self.assertAlmostEqual(tpl[0], 0.997, places=3)
self.assertAlmostEqual(tpl[1], 0.978, places=3)
tf = tpl[2]
self.assertGreater(0.0, tf[0])
self.assertLess(0.0, tf[3 * 3])
# check the getter:
cfs = opts2.customFeatures
self.assertEqual(len(cfs), 2)
self.assertEqual(cfs[0][0], 2)
self.assertEqual(cfs[1][0], 1)
def test8_customFeatures(self):
m1 = Chem.MolFromSmiles(
"O=CC=O |(-1.75978,0.148897,0;-0.621382,-0.394324,0;0.624061,0.3656,.1;1.7571,-0.120174,.1)|")
opts = rdShapeAlign.ShapeInputOptions()
opts.customFeatures = ((1, Point3D(-1.75978, 0.148897,
0), 1.0), (2, Point3D(1.7571, -0.120174, 0.1), 1.0))
m2 = Chem.Mol(m1)
opts2 = rdShapeAlign.ShapeInputOptions()
opts2.customFeatures = ((2, Point3D(-1.75978, 0.148897,
0), 1.0), (1, Point3D(1.7571, -0.120174, 0.1), 1.0))
tpl = rdShapeAlign.AlignMol(m1, m2, opts, opts2, opt_param=0.5)
self.assertAlmostEqual(tpl[0], 0.997, places=3)
self.assertAlmostEqual(tpl[1], 0.978, places=3)
if __name__ == '__main__':
unittest.main()

View File

@@ -523,3 +523,73 @@ TEST_CASE("Hs not properly transformed when hcount = feature count") {
}
}
}
TEST_CASE("custom feature points") {
auto m1 =
"O=CC=O |(-1.75978,0.148897,0;-0.621382,-0.394324,0;0.624061,0.3656,.1;1.7571,-0.120174,.1)|"_smiles;
SECTION("using shapes") {
auto shape1 = PrepareConformer(*m1, -1);
// each carbonyl O gets one feature:
CHECK(shape1.coord.size() == 18);
ShapeInputOptions opts2;
opts2.customFeatures = {{1, RDGeom::Point3D(-1.75978, 0.148897, 0), 1.0},
{2, RDGeom::Point3D(1.7571, -0.120174, 0.1), 1.0}};
auto shape2 = PrepareConformer(*m1, -1, opts2);
CHECK(shape2.coord.size() == 18);
{
// confirm that we don't add the features if useColors is false
ShapeInputOptions topts;
topts.customFeatures = {
{1, RDGeom::Point3D(-1.75978, 0.148897, 0), 1.0},
{2, RDGeom::Point3D(1.7571, -0.120174, 0.1), 1.0}};
topts.useColors = false;
auto tshape = PrepareConformer(*m1, -1, topts);
CHECK(tshape.coord.size() == 12);
}
// we'll swap the features on the second shape so that the alignment has to
// be inverted
ShapeInputOptions opts3;
opts3.customFeatures = {{2, RDGeom::Point3D(-1.75978, 0.148897, 0), 1.0},
{1, RDGeom::Point3D(1.7571, -0.120174, 0.1), 1.0}};
auto m2 = ROMol(*m1);
auto shape3 = PrepareConformer(m2, -1, opts3);
CHECK(shape3.coord.size() == 18);
double opt_param = 0.5;
std::vector<float> matrix(12, 0.0);
auto [st, ct] = AlignShape(shape2, shape3, matrix, opt_param);
CHECK_THAT(st, Catch::Matchers::WithinAbs(0.997, 0.001));
CHECK_THAT(ct, Catch::Matchers::WithinAbs(0.978, 0.001));
CHECK(shape3.coord[0] > 0); // x coord of first atom
CHECK(shape3.coord[3 * 3] < 0); // x coord of fourth atom
auto conf = m2.getConformer(-1);
TransformConformer(shape2.shift, matrix, shape3, conf);
CHECK(conf.getAtomPos(0).x > 0);
CHECK(conf.getAtomPos(3).x < 0);
}
SECTION("using molecules") {
ShapeInputOptions opts2;
opts2.customFeatures = {{1, RDGeom::Point3D(-1.75978, 0.148897, 0), 1.0},
{2, RDGeom::Point3D(1.7571, -0.120174, 0.1), 1.0}};
auto m2 = ROMol(*m1);
// we'll swap the features on the second shape so that the alignment has to
// be inverted
ShapeInputOptions opts3;
opts3.customFeatures = {{2, RDGeom::Point3D(-1.75978, 0.148897, 0), 1.0},
{1, RDGeom::Point3D(1.7571, -0.120174, 0.1), 1.0}};
double opt_param = 0.5;
std::vector<float> matrix(12, 0.0);
auto [st, ct] =
AlignMolecule(*m1, m2, matrix, opts2, opts3, -2, -2, opt_param);
CHECK_THAT(st, Catch::Matchers::WithinAbs(0.997, 0.001));
CHECK_THAT(ct, Catch::Matchers::WithinAbs(0.978, 0.001));
auto conf = m2.getConformer(-1);
CHECK(conf.getAtomPos(0).x > 0);
CHECK(conf.getAtomPos(3).x < 0);
}
}