Add function to compute shape scores without overlay. (#8950)

* Add function to compute shape scores without overlay.

* Update docstrings.

* Rename function to ScoreMolecule/ScoreMol.
Add extra functions analogous to the overlay ones.

* Windows export crap.

---------

Co-authored-by: David Cosgrove <david@cozchemix.co.uk>
This commit is contained in:
David Cosgrove
2025-11-19 16:02:38 +00:00
committed by GitHub
parent bea9b11a34
commit 2c3010d5ce
5 changed files with 309 additions and 12 deletions

View File

@@ -315,7 +315,9 @@ void extractAtomCoords(const Conformer &conformer, const unsigned int nAtoms,
unsigned int Z = conformer.getOwningMol().getAtomWithIdx(i)->getAtomicNum();
if (Z > 1 || (shapeOpts.includeDummies && Z == 0)) {
RDGeom::Point3D pos = conformer.getAtomPos(i);
pos -= ave;
if (shapeOpts.normalize) {
pos -= ave;
}
coords[j * 3] = pos.x;
coords[(j * 3) + 1] = pos.y;
coords[(j * 3) + 2] = pos.z;
@@ -352,7 +354,9 @@ void extractFeatureCoords(
}
if (nSel == feature_idx_type[i].first.size()) {
floc /= nSel;
floc -= ave;
if (shapeOpts.normalize) {
floc -= ave;
}
DEBUG_MSG("feature type " << feature_idx_type[i].second << " (" << floc
<< ")");
@@ -611,3 +615,60 @@ std::pair<double, double> AlignMolecule(const ROMol &ref, ROMol &fit,
return AlignMolecule(ref, fit, matrix, shapeOpts, shapeOpts, refConfId,
fitConfId, opt_param, max_preiters, max_postiters);
}
std::pair<double, double> ScoreShape(const ShapeInput &shape1,
ShapeInput &shape2, bool useColors) {
double shapeOverlap = Align3D::ComputeShapeOverlap(
shape1.coord.data(), shape1.alpha_vector, shape1.volumeAtomIndexVector,
shape2.coord.data(), shape2.alpha_vector, shape2.volumeAtomIndexVector);
double shape = shapeOverlap / (shape1.sov + shape2.sov - shapeOverlap);
std::set<unsigned int> jointColorAtomTypeSet;
Align3D::getJointColorTypeSet(
shape1.atom_type_vector.data(), shape1.atom_type_vector.size(),
shape2.atom_type_vector.data(), shape2.atom_type_vector.size(),
jointColorAtomTypeSet);
// Take copy of the color atom mappings so as not to alter the input shape
// which might be re-used.
double color = 0.0;
if (useColors) {
auto shape1MapCp = shape1.colorAtomType2IndexVectorMap;
Align3D::restrictColorAtomType2IndexVectorMap(shape1MapCp,
jointColorAtomTypeSet);
auto shape2MapCp = shape2.colorAtomType2IndexVectorMap;
Align3D::restrictColorAtomType2IndexVectorMap(shape2MapCp,
jointColorAtomTypeSet);
double featureOverlap = Align3D::ComputeFeatureOverlap(
shape1.coord.data(), shape1.alpha_vector, shape1MapCp,
shape2.coord.data(), shape2.alpha_vector, shape2MapCp, nullptr);
color = featureOverlap / (shape1.sof + shape2.sof - featureOverlap);
}
return std::make_pair(shape, color);
}
std::pair<double, double> ScoreMolecule(const ShapeInput &shape,
RDKit::ROMol &mol,
const ShapeInputOptions &molShapeOpts,
int molConfId) {
ShapeInputOptions shapeOpts = molShapeOpts;
shapeOpts.normalize = false;
auto shape2 = PrepareConformer(mol, molConfId, shapeOpts);
return ScoreShape(shape, shape2, shapeOpts.useColors);
}
std::pair<double, double> ScoreMolecule(const RDKit::ROMol &mol1,
RDKit::ROMol &mol2,
const ShapeInputOptions &mol1ShapeOpts,
const ShapeInputOptions &mol2ShapeOpts,
int mol1ConfId, int mol2ConfId) {
ShapeInputOptions shapeOpts = mol1ShapeOpts;
shapeOpts.normalize = false;
auto shape1 = PrepareConformer(mol1, mol1ConfId, shapeOpts);
shapeOpts = mol2ShapeOpts;
shapeOpts.normalize = false;
auto shape2 = PrepareConformer(mol2, mol2ConfId, shapeOpts);
// If either shapeOpts has useColors of false, we have to go with that.
bool useColors = mol1ShapeOpts.useColors && mol2ShapeOpts.useColors;
return ScoreShape(shape1, shape2, useColors);
}

View File

@@ -88,6 +88,8 @@ struct RDKIT_PUBCHEMSHAPE_EXPORT ShapeInputOptions {
customFeatures; // use these feature definitions instead of the defaults.
// Each feature is defined by a tuple of:
// (feature type, position, radius)
bool normalize{true}; // Whether to normalise the shape by putting into
// its inertial frame.
};
//! Prepare the input for the shape comparison
@@ -148,7 +150,7 @@ RDKIT_PUBCHEMSHAPE_EXPORT void TransformConformer(
translation to the final coordinates
\return a pair of the shape Tanimoto value and the color Tanimoto value (zero
if useColors is false)
if opt_param is 1.0.)
*/
RDKIT_PUBCHEMSHAPE_EXPORT std::pair<double, double> AlignMolecule(
const ShapeInput &refShape, RDKit::ROMol &fit, std::vector<float> &matrix,
@@ -170,7 +172,7 @@ RDKIT_PUBCHEMSHAPE_EXPORT std::pair<double, double> AlignMolecule(
translation to the final coordinates
\return a pair of the shape Tanimoto value and the color Tanimoto value (zero
if useColors is false)
if opt_param is 1.0.)
*/
RDKIT_PUBCHEMSHAPE_EXPORT std::pair<double, double> AlignMolecule(
const ShapeInput &refShape, RDKit::ROMol &fit, std::vector<float> &matrix,
@@ -195,7 +197,7 @@ RDKIT_PUBCHEMSHAPE_EXPORT std::pair<double, double> AlignMolecule(
iterations
\return a pair of the shape Tanimoto value and the color Tanimoto value (zero
if useColors is false)
if opt_param is 1.0.)
*/
RDKIT_PUBCHEMSHAPE_EXPORT std::pair<double, double> AlignMolecule(
const RDKit::ROMol &ref, RDKit::ROMol &fit, std::vector<float> &matrix,
@@ -221,7 +223,7 @@ RDKIT_PUBCHEMSHAPE_EXPORT std::pair<double, double> AlignMolecule(
iterations
\return a pair of the shape Tanimoto value and the color Tanimoto value (zero
if useColors is false)
if opt_param is 1.0.)
*/
RDKIT_PUBCHEMSHAPE_EXPORT std::pair<double, double> AlignMolecule(
const RDKit::ROMol &ref, RDKit::ROMol &fit, std::vector<float> &matrix,
@@ -229,4 +231,54 @@ RDKIT_PUBCHEMSHAPE_EXPORT std::pair<double, double> AlignMolecule(
double opt_param = 1.0, unsigned int max_preiters = 10u,
unsigned int max_postiters = 30u);
//! Calculate the scores between 2 shapes without moving them.
/*!
\param shape1 the first shape. It's essential that the shape was
created with ShapeInputOptions::normalize = false.
\param shape2 the second shape. It's essential that the shape was
created with ShapeInputOptions::normalize = false.
\param useColors (optional) whether to return a color score
\return a pair of the shape Tanimoto value and the color Tanimoto value (zero
if useColors is false)
*/
RDKIT_PUBCHEMSHAPE_EXPORT std::pair<double, double> ScoreShape(
const ShapeInput &shape1, ShapeInput &shape2, bool useColors);
//! Calculate the scores between a shape and a molecule without moving them.
/*!
\param shape the shape. It's essential that the shape was created
with ShapeInputOptions::normalize = false.
\param mol the molecule
\param molShapeOpts options for constructing the shape for molecule
\param molConfId (optional) the conformer to use for the
molecule
\return a pair of the shape Tanimoto value and the color Tanimoto value (zero
if molShapeOpts.useColors is false)
*/
RDKIT_PUBCHEMSHAPE_EXPORT std::pair<double, double> ScoreMolecule(
const ShapeInput &shape, RDKit::ROMol &mol,
const ShapeInputOptions &molShapeOpts, int molConfId = -1);
//! Calculate the scores between 2 molecules without moving them.
/*!
\param mol1 the first molecule
\param mol2 the second molecule
\param mol1ShapeOpts options for constructing the shape for molecule 1
\param mol2ShapeOpts options for constructing the shape for molecule 2
\param mol1ConfId (optional) the conformer to use for the first
molecule
\param mol2ConfId (optional) the conformer to use for the second
molecule
\return a pair of the shape Tanimoto value and the color Tanimoto value (zero
if useColors is false in either ShapeInputOptions parameter.)
*/
RDKIT_PUBCHEMSHAPE_EXPORT std::pair<double, double> ScoreMolecule(
const RDKit::ROMol &mol1, RDKit::ROMol &mol2,
const ShapeInputOptions &mol1ShapeOpts,
const ShapeInputOptions &mol2ShapeOpts, int mol1ConfId = -1,
int mol2ConfId = -1);
#endif

View File

@@ -76,6 +76,25 @@ python::tuple alignShapes(const ShapeInput &refShape, ShapeInput &fitShape,
}
return python::make_tuple(nbr_st, nbr_ct, pyMatrix);
}
python::tuple scoreMolecule1(const RDKit::ROMol &mol1, RDKit::ROMol &mol2,
const ShapeInputOptions &mol1ShapeOpts,
const ShapeInputOptions &mol2ShapeOpts,
int mol1ConfId, int mol2ConfId) {
auto [nbr_st, nbr_ct] = ScoreMolecule(mol1, mol2, mol1ShapeOpts,
mol2ShapeOpts, mol1ConfId, mol2ConfId);
return python::make_tuple(nbr_st, nbr_ct);
}
python::tuple scoreMolecule2(const ShapeInput &shape, RDKit::ROMol &mol,
const ShapeInputOptions &molShapeOpts,
int molConfId) {
auto [nbr_st, nbr_ct] = ScoreMolecule(shape, mol, molShapeOpts, molConfId);
return python::make_tuple(nbr_st, nbr_ct);
}
python::tuple scoreShape(const ShapeInput &shape1, ShapeInput &shape2,
bool useColors) {
auto [nbr_st, nbr_ct] = ScoreShape(shape1, shape2, useColors);
return python::make_tuple(nbr_st, nbr_ct);
}
void transformConformer(const python::list &pyFinalTrans,
const python::list &pyMatrix, ShapeInput probeShape,
RDKit::Conformer &probeConf) {
@@ -213,6 +232,9 @@ void wrap_pubchemshape() {
.add_property("customFeatures", &helpers::get_customFeatures,
&helpers::set_customFeatures,
"Custom features for the shape.")
.def_readwrite("normalize", &ShapeInputOptions::normalize,
"Whether to normalise the shape by putting into"
"its inertial frame. Default=True.")
.def("__setattr__", &safeSetattr);
python::def(
@@ -237,7 +259,8 @@ useColors : bool, optional
Whether or not to use colors in the scoring (default is True)
opt_param : float, optional
Balance of shape and color for optimization.
0 is only color, 0.5 is equal weight, and 1.0 is only shape
0 is only color, 0.5 is equal weight, and 1.0 is only shape.
Default is 1.0.
max_preiters : int, optional
In the two phase optimization, the maximum iterations done on all poses.
max_postiters : int, optional
@@ -249,7 +272,7 @@ Returns
-------
2-tuple of doubles
The results are (shape_score, color_score)
The color_score is zero if useColors is False)DOC");
The color_score is zero if opt_param is 1.0.)DOC");
python::def(
"AlignMol", &helpers::alignMol3,
@@ -275,7 +298,8 @@ probeConfId : int, optional
Probe conformer ID (default is -1)
opt_param : float, optional
Balance of shape and color for optimization.
0 is only color, 0.5 is equal weight, and 1.0 is only shape
0 is only color, 0.5 is equal weight, and 1.0 is only shape.
Default is 1.0.
max_preiters : int, optional
In the two phase optimization, the maximum iterations done on all poses.
max_postiters : int, optional
@@ -287,7 +311,7 @@ Returns
-------
2-tuple of doubles
The results are (shape_score, color_score)
The color_score is zero if useColors is False)DOC");
The color_score is zero if opt_param is 1.0.)DOC");
python::def(
"AlignMol", &helpers::alignMol2,
@@ -310,7 +334,8 @@ useColors : bool, optional
Whether or not to use colors in the scoring (default is True)
opt_param : float, optional
Balance of shape and color for optimization.
0 is only color, 0.5 is equal weight, and 1.0 is only shape
0 is only color, 0.5 is equal weight, and 1.0 is only shape.
Default is 1.0.
max_preiters : int, optional
In the two phase optimization, the maximum iterations done on all poses.
max_postiters : int, optional
@@ -325,7 +350,7 @@ Returns
-------
2-tuple of doubles
The results are (shape_score, color_score)
The color_score is zero if useColors is False)DOC");
The color_score is zero if opt_param is 1.0.)DOC");
python::def(
"AlignShapes", &helpers::alignShapes,
@@ -405,6 +430,78 @@ Returns
"Translation of centre of shape coordinates to origin.")
.def_readwrite("sov", &ShapeInput::sov)
.def_readwrite("sof", &ShapeInput::sof);
python::def(
"ScoreMol", &helpers::scoreMolecule1,
(python::arg("mol1"), python::arg("mol2"), python::arg("mol1ShapeOpts"),
python::arg("mol2ShapeOpts"), python::arg("mol1ConfId") = -1,
python::arg("mol2ConfId") = -1),
R"DOC(Calculate the scores between a shape and a molecule without moving them.
Parameters
----------
mol1 : RDKit.ROMol
First molecule
mol2 : RDKit.ROMol
Second molecule
mol1ShapeOptions:
Options for constructing the shape for molecule 1
mol2ShapeOptions:
Options for constructing the shape for molecule 2
mol1ConfId : int, optional
First molecule conformer ID (default is -1)
mol2ConfId : int, optional
Second conformer ID (default is -1)
Returns
-------
2-tuple of doubles
The results are (shape_score, color_score)
The color_score is zero if useColors is False for either of the
shape options)DOC");
python::def(
"ScoreMol", &helpers::scoreMolecule2,
(python::arg("shape"), python::arg("mol"), python::arg("molShapeOpts"),
python::arg("molConfId") = -1),
R"DOC(Calculate the scores between 2 molecules without moving them.
Parameters
----------
shape : ShapeInput
Shape
mol : RDKit.ROMol
Molecule
molShapeOptions:
Options for constructing the shape for molecule
molConfId : int, optional
Molecule conformer ID (default is -1)
Returns
-------
2-tuple of doubles
The results are (shape_score, color_score)
The color_score is zero if shape.useColors is False)DOC");
python::def("ScoreShape", &helpers::scoreShape,
(python::arg("shape1"), python::arg("shape2"),
python::arg("useColors") = false),
R"DOC(Calculate the scores between 2 shapes without moving them.
Parameters
----------
shape1 : ShapeInput
Shape
shape2 : ShapeInput
Shape
useColors : bool
Whether to use colors for the score or not.
Returns
-------
2-tuple of doubles
The results are (shape_score, color_score)
The color_score is zero if useColors is False)DOC");
}
BOOST_PYTHON_MODULE(rdShapeAlign) { wrap_pubchemshape(); }

View File

@@ -117,6 +117,28 @@ class TestCase(unittest.TestCase):
self.assertAlmostEqual(tpl[0], 0.997, places=3)
self.assertAlmostEqual(tpl[1], 0.978, places=3)
def test9_FixedScore(self):
# Just to make sure it's there and returns a value.
opts = rdShapeAlign.ShapeInputOptions()
tpl = rdShapeAlign.ScoreMol(self.ref, self.ref, opts, opts)
self.assertAlmostEqual(tpl[0], 1.0, places=3)
self.assertAlmostEqual(tpl[1], 1.0, places=3)
opts = rdShapeAlign.ShapeInputOptions()
opts.useColors = False
opts.normalize = False
shp = rdShapeAlign.PrepareConformer(self.ref, -1, opts)
tpl = rdShapeAlign.ScoreMol(shp, self.probe, opts)
self.assertAlmostEqual(tpl[0], 0.0, places=3)
self.assertAlmostEqual(tpl[1], 0.0, places=3)
opts.useColors = True
shp1 = rdShapeAlign.PrepareConformer(self.probe, -1, opts)
shp2 = rdShapeAlign.PrepareConformer(self.probe, -1, opts)
tpl = rdShapeAlign.ScoreShape(shp1, shp2, True)
self.assertAlmostEqual(tpl[0], 1.0, places=3)
self.assertAlmostEqual(tpl[1], 1.0, places=3)
if __name__ == '__main__':
unittest.main()

View File

@@ -592,3 +592,68 @@ TEST_CASE("custom feature points") {
CHECK(conf.getAtomPos(3).x < 0);
}
}
TEST_CASE("Score No Overlay") {
// These are 2 ligands used by Andy Grant and Co in their original paper
// https://onlinelibrary.wiley.com/doi/10.1002/(SICI)1096-987X(19961115)17:14%3C1653::AID-JCC7%3E3.0.CO;2-K
// Ligands as extracted from PDB, with a bit of munging to get them as
// SMILES strings (downloaded the Ideal ligand structures from RCSB
// as SDFs and transferred the corresponding atom coords from 3tmn and 1tmn).
auto pdb_trp_3tmn =
R"(N[C@H](C(=O)O)Cc1c[nH]c2c1cccc2 |(37.935,40.394,-3.825;39.119,39.593,-4.13;38.758,38.486,-5.101;37.526,38.337,-5.395;39.716,37.852,-5.605;39.883,39.108,-2.906;39.086,38.098,-2.209;38.093,38.363,-1.34;37.565,37.179,-0.881;38.201,36.136,-1.471;39.193,36.684,-2.308;40.015,35.812,-3.036;39.846,34.441,-2.913;38.844,33.933,-2.075;38.015,34.752,-1.333),wU:1.0|)"_smiles;
REQUIRE(pdb_trp_3tmn);
auto pdb_0zn_1tmn =
R"([C@H](CCc1ccccc1)(C(=O)O)N[C@H](C(=O)N[C@H](C(=O)O)Cc1c[nH]c2c1cccc2)CC(C)C |(35.672,41.482,-5.722;34.516,40.842,-6.512;34.843,39.355,-6.7;33.819,38.475,-7.45;33.825,38.414,-8.838;32.951,37.553,-9.53;32.064,36.747,-8.81;32.096,36.799,-7.402;32.985,37.656,-6.73;35.934,42.778,-6.452;36.833,42.858,-7.316;35.175,43.735,-6.275;35.516,41.561,-4.218;36.707,42.096,-3.513;38.055,41.449,-3.859;39.11,42.138,-3.959;37.975,40.129,-3.983;39.134,39.277,-4.298;38.825,38.04,-5.133;37.649,37.934,-5.605;39.788,37.369,-5.652;39.985,38.945,-3.037;39.221,37.953,-2.164;37.934,37.961,-1.823;37.579,36.695,-1.314;38.63,35.975,-1.286;39.736,36.771,-1.642;41.052,36.341,-1.48;41.213,35.042,-0.964;40.095,34.215,-0.69;38.765,34.665,-0.855;36.506,41.966,-2.002;37.6,42.757,-1.31;37.546,44.225,-1.728;37.408,42.58,0.19),wD:0.0,wU:17.21,13.33|)"_smiles;
REQUIRE(pdb_0zn_1tmn);
ShapeInputOptions mol1Opts, mol2Opts;
{
auto [singleShape, singleColor] =
ScoreMolecule(*pdb_trp_3tmn, *pdb_trp_3tmn, mol1Opts, mol2Opts);
CHECK_THAT(singleShape, Catch::Matchers::WithinAbs(1.0, 0.001));
CHECK_THAT(singleColor, Catch::Matchers::WithinAbs(1.0, 0.001));
}
{
auto pdb_trp_3tmn_cp =
R"(N[C@H](C(=O)O)Cc1c[nH]c2c1cccc2 |(37.935,40.394,-3.825;39.119,39.593,-4.13;38.758,38.486,-5.101;37.526,38.337,-5.395;39.716,37.852,-5.605;39.883,39.108,-2.906;39.086,38.098,-2.209;38.093,38.363,-1.34;37.565,37.179,-0.881;38.201,36.136,-1.471;39.193,36.684,-2.308;40.015,35.812,-3.036;39.846,34.441,-2.913;38.844,33.933,-2.075;38.015,34.752,-1.333),wU:1.0|)"_smiles;
RDGeom::Point3D trans{100.0, 100.0, 100.0};
RDGeom::Transform3D transform_3d;
transform_3d.SetTranslation(trans);
MolTransforms::transformConformer(pdb_trp_3tmn_cp->getConformer(),
transform_3d);
auto [singleShape, singleColor] =
ScoreMolecule(*pdb_trp_3tmn, *pdb_trp_3tmn_cp, mol1Opts, mol2Opts);
CHECK_THAT(singleShape, Catch::Matchers::WithinAbs(0.0, 0.001));
CHECK_THAT(singleColor, Catch::Matchers::WithinAbs(0.0, 0.001));
}
{
auto [singleShape, singleColor] =
ScoreMolecule(*pdb_0zn_1tmn, *pdb_0zn_1tmn, mol1Opts, mol2Opts);
CHECK_THAT(singleShape, Catch::Matchers::WithinAbs(1.0, 0.001));
CHECK_THAT(singleColor, Catch::Matchers::WithinAbs(1.0, 0.001));
}
{
auto [singleShape, singleColor] =
ScoreMolecule(*pdb_trp_3tmn, *pdb_0zn_1tmn, mol1Opts, mol2Opts);
CHECK_THAT(singleShape, Catch::Matchers::WithinAbs(0.3376, 0.0005));
CHECK_THAT(singleColor, Catch::Matchers::WithinAbs(0.2674, 0.0005));
}
{
ShapeInputOptions opts;
opts.normalize = false;
auto shape = PrepareConformer(*pdb_trp_3tmn, -1, opts);
auto [singleShape, singleColor] =
ScoreMolecule(shape, *pdb_0zn_1tmn, mol1Opts);
CHECK_THAT(singleShape, Catch::Matchers::WithinAbs(0.3376, 0.0005));
CHECK_THAT(singleColor, Catch::Matchers::WithinAbs(0.2674, 0.0005));
}
{
ShapeInputOptions opts;
opts.normalize = false;
auto shape1 = PrepareConformer(*pdb_trp_3tmn, -1, opts);
auto shape2 = PrepareConformer(*pdb_0zn_1tmn, -1, opts);
auto [singleShape, singleColor] = ScoreShape(shape1, shape2, true);
CHECK_THAT(singleShape, Catch::Matchers::WithinAbs(0.3376, 0.0005));
CHECK_THAT(singleColor, Catch::Matchers::WithinAbs(0.2674, 0.0005));
}
}