ShapeInput from subset of atoms in molecule (#8449)

* Make shape on subset of atoms in molecule.
Optional radius for dummy atoms.

* Re-factor.
Add ShapeInputOptions.

* Python wrapper for options.

* Add non-standard radii option.
Add function to align a shape on a shape.
Add function to align conformer based on shape-shape transformation.

* Python wrappers for AlignShapes and TransformConformer.

* Add the new thing for Python options.

* Add option for excluding atoms from color features.
No partial color features if using atomSubset.

* In AlignShapes, don't change the fit shape (apart from transforming it) in case it will be used again.

* Rename it AlignShape to be consistent with AlignMolecule.

* Response to review.

* Undo clang-format

* Fix botched merge.

* Typo.

---------

Co-authored-by: David Cosgrove <david@cozchemix.co.uk>
This commit is contained in:
David Cosgrove
2025-04-27 05:51:53 +01:00
committed by GitHub
parent fc58b61c8b
commit 2de1dabdc1
5 changed files with 635 additions and 131 deletions

View File

@@ -1,4 +1,5 @@
#include <iostream>
#include <ranges>
#include <sstream>
#include <stdexcept>
@@ -25,8 +26,11 @@ using namespace RDKit;
// Bondi radii
// can find more of these in Table 12 of this publication:
// https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3658832/
// The dummy atom radius (atomic number 0) is set to
// 2.16 in ShapeInputOptions and may be varied there, as
// may all the other radii if required, including the
// addition of atoms not covered here.
const std::map<unsigned int, double> vdw_radii = {
{0, 1.10}, // dummy atom (value copied from H)
{1, 1.10}, // H
{2, 1.40}, // He
{3, 1.81}, // Li
@@ -159,21 +163,18 @@ std::vector<std::vector<const ROMol *>> *getPh4Patterns() {
return patterns.get();
}
// the conformer is translated to the origin
ShapeInput PrepareConformer(const ROMol &mol, int confId, bool useColors) {
Align3D::setUseCutOff(true);
ShapeInput res;
namespace {
std::vector<std::pair<std::vector<unsigned int>, unsigned int>> extractFeatures(
const ROMol &mol, const ShapeInputOptions &shapeOpts) {
// unpack features (PubChem-specific property from SDF)
// NOTE: this unpacking assumes that RWMol-atom-index = SDF-atom-number - 1
// e.g. RWMol uses [0..N-1] and SDF uses [1..N], with atoms in the same
// order
// If there are no PubChem features, falls back on RDKit pphore types.
std::vector<std::pair<std::vector<unsigned int>, unsigned int>>
feature_idx_type;
if (useColors) {
if (shapeOpts.useColors) {
std::string features;
if (mol.getPropIfPresent(pubchemFeatureName, features)) {
// regular atoms have type 0; feature "atoms" (features represented by a
@@ -246,8 +247,135 @@ ShapeInput PrepareConformer(const ROMol &mol, int confId, bool useColors) {
}
}
}
return feature_idx_type;
}
// unpack atoms
bool atomInSubset(unsigned int atomIdx, const ShapeInputOptions &shapeOpts) {
if (shapeOpts.atomSubset.empty()) {
return true;
}
return std::ranges::find(shapeOpts.atomSubset, atomIdx) !=
shapeOpts.atomSubset.end();
}
bool atomAllowedInColor(unsigned int atomIdx,
const ShapeInputOptions &shapeOpts) {
return std::ranges::find(shapeOpts.notColorAtoms, atomIdx) ==
shapeOpts.notColorAtoms.end();
}
double getAtomRadius(unsigned int atomIdx, const ShapeInputOptions &shapeOpts) {
auto it = std::ranges::find_if(
shapeOpts.atomRadii,
[atomIdx](const auto &p) -> bool { return p.first == atomIdx; });
return it == shapeOpts.atomRadii.end() ? -1.0 : it->second;
}
// Get the atom radii. rad_vector is expected to be big enough to hold them
// all. Also computes the average coordinates of the selected atoms.
void extractAtomRadii(const Conformer &conformer, unsigned int nAtoms,
const ShapeInputOptions &shapeOpts, RDGeom::Point3D &ave,
unsigned int &nSelectedAtoms,
std::vector<double> &rad_vector) {
nSelectedAtoms = 0;
for (unsigned int i = 0u; i < nAtoms; ++i) {
if (!atomInSubset(i, shapeOpts)) {
continue;
}
double rad = getAtomRadius(i, shapeOpts);
if (rad > 0.0) {
rad_vector[nSelectedAtoms++] = rad;
ave += conformer.getAtomPos(i);
} else {
unsigned int Z =
conformer.getOwningMol().getAtomWithIdx(i)->getAtomicNum();
if (Z > 1) {
ave += conformer.getAtomPos(i);
if (auto rad = vdw_radii.find(Z); rad != vdw_radii.end()) {
rad_vector[nSelectedAtoms++] = rad->second;
} else {
throw ValueErrorException("No VdW radius for atom with Z=" +
std::to_string(Z));
}
} else if (shapeOpts.includeDummies && Z == 0) {
ave += conformer.getAtomPos(i);
rad_vector[nSelectedAtoms++] = shapeOpts.dummyRadius;
}
}
}
ave /= nSelectedAtoms;
}
void extractAtomCoords(const Conformer &conformer, const unsigned int nAtoms,
const ShapeInputOptions &shapeOpts,
const RDGeom::Point3D &ave, std::vector<float> &coords) {
for (unsigned i = 0, j = 0; i < nAtoms; ++i) {
if (!atomInSubset(i, shapeOpts)) {
continue;
}
// use only non-H for alignment, optionally with dummy atoms.
unsigned int Z = conformer.getOwningMol().getAtomWithIdx(i)->getAtomicNum();
if (Z > 1 || (shapeOpts.includeDummies && Z == 0)) {
RDGeom::Point3D pos = conformer.getAtomPos(i);
pos -= ave;
coords[j * 3] = pos.x;
coords[(j * 3) + 1] = pos.y;
coords[(j * 3) + 2] = pos.z;
++j;
}
}
}
void extractFeatureCoords(
const Conformer &conformer, const unsigned int nAtoms,
const unsigned int nSelectedAtoms,
const std::vector<std::pair<std::vector<unsigned int>, unsigned int>>
&feature_idx_type,
const ShapeInputOptions &shapeOpts, const RDGeom::Point3D &ave,
unsigned int &numFeatures, ShapeInput &res,
std::vector<double> &rad_vector) {
// get feature coordinates - simply the average of coords of all atoms in the
// feature
for (unsigned i = 0; i < feature_idx_type.size(); ++i) {
RDGeom::Point3D floc;
unsigned int nSel = 0;
for (unsigned int j = 0; j < feature_idx_type[i].first.size(); ++j) {
unsigned int idx = feature_idx_type[i].first[j];
if (!atomInSubset(idx, shapeOpts) ||
!atomAllowedInColor(idx, shapeOpts)) {
continue;
}
if (idx >= nAtoms ||
conformer.getOwningMol().getAtomWithIdx(idx)->getAtomicNum() <= 1) {
throw ValueErrorException("Invalid feature atom index");
}
floc += conformer.getAtomPos(idx);
++nSel;
}
if (nSel == feature_idx_type[i].first.size()) {
floc /= nSel;
floc -= ave;
DEBUG_MSG("feature type " << feature_idx_type[i].second << " (" << floc
<< ")");
auto array_idx = nSelectedAtoms + numFeatures;
res.coord[array_idx * 3] = floc.x;
res.coord[(array_idx * 3) + 1] = floc.y;
res.coord[(array_idx * 3) + 2] = floc.z;
rad_vector[array_idx] = radius_color;
res.atom_type_vector[array_idx] = feature_idx_type[i].second;
++numFeatures;
}
}
}
} // namespace
// The conformer is left where it is, the shape is translated to the origin.
ShapeInput PrepareConformer(const ROMol &mol, int confId,
const ShapeInputOptions &shapeOpts) {
Align3D::setUseCutOff(true);
ShapeInput res;
auto feature_idx_type = extractFeatures(mol, shapeOpts);
auto &conformer = mol.getConformer(confId);
if (!conformer.is3D()) {
@@ -256,72 +384,33 @@ ShapeInput PrepareConformer(const ROMol &mol, int confId, bool useColors) {
unsigned int nAtoms = mol.getNumAtoms();
// DEBUG_MSG("num atoms: " << nAtoms);
unsigned int nHeavyAtoms = mol.getNumHeavyAtoms();
DEBUG_MSG("num heavy atoms: " << nHeavyAtoms);
unsigned int nAlignmentAtoms = nHeavyAtoms + feature_idx_type.size();
// Start with the arrays as large as they will possibly have to be.
// They will be re-sized later.
unsigned int nAlignmentAtoms = nAtoms + feature_idx_type.size();
std::vector<double> rad_vector(nAlignmentAtoms);
res.atom_type_vector.resize(nAlignmentAtoms, 0);
RDGeom::Point3D ave;
for (unsigned i = 0, activeAtomIdx = 0; i < nAtoms; ++i) {
unsigned int Z = mol.getAtomWithIdx(i)->getAtomicNum();
if (Z > 1) {
ave += conformer.getAtomPos(i);
if (auto rad = vdw_radii.find(Z); rad != vdw_radii.end()) {
rad_vector[activeAtomIdx++] = rad->second;
} else {
throw ValueErrorException("No VdW radius for atom with Z=" +
std::to_string(Z));
}
}
}
unsigned int nSelectedAtoms = 0;
extractAtomRadii(conformer, nAtoms, shapeOpts, ave, nSelectedAtoms,
rad_vector);
// translate steric center to origin
ave /= nHeavyAtoms;
DEBUG_MSG("steric center: (" << ave << ")");
res.shift = {-ave.x, -ave.y, -ave.z};
res.coord.resize(nAlignmentAtoms * 3);
for (unsigned i = 0, j = 0; i < nAtoms; ++i) {
// use only non-H for alignment
if (mol.getAtomWithIdx(i)->getAtomicNum() > 1) {
RDGeom::Point3D pos = conformer.getAtomPos(i);
pos -= ave;
res.coord[j * 3] = pos.x;
res.coord[(j * 3) + 1] = pos.y;
res.coord[(j * 3) + 2] = pos.z;
++j;
}
}
// get feature coordinates - simply the average of coords of all atoms in the
// feature
for (unsigned i = 0; i < feature_idx_type.size(); ++i) {
RDGeom::Point3D floc;
for (unsigned int j = 0; j < feature_idx_type[i].first.size(); ++j) {
unsigned int idx = feature_idx_type[i].first[j];
if (idx >= nAtoms || mol.getAtomWithIdx(idx)->getAtomicNum() <= 1) {
throw ValueErrorException("Invalid feature atom index");
}
floc += conformer.getAtomPos(idx);
}
floc /= feature_idx_type[i].first.size();
floc -= ave;
DEBUG_MSG("feature type " << feature_idx_type[i].second << " (" << floc
<< ")");
auto array_idx = nHeavyAtoms + i;
res.coord[array_idx * 3] = floc.x;
res.coord[(array_idx * 3) + 1] = floc.y;
res.coord[(array_idx * 3) + 2] = floc.z;
rad_vector[array_idx] = radius_color;
res.atom_type_vector[array_idx] = feature_idx_type[i].second;
}
extractAtomCoords(conformer, nAtoms, shapeOpts, ave, res.coord);
unsigned int numFeatures = 0;
extractFeatureCoords(conformer, nAtoms, nSelectedAtoms, feature_idx_type,
shapeOpts, ave, numFeatures, res, rad_vector);
// Now cut the final vectors down to the actual number of atoms and
// features used.
nAlignmentAtoms = nSelectedAtoms + numFeatures;
res.coord.resize(nAlignmentAtoms * 3);
rad_vector.resize(nAlignmentAtoms);
res.atom_type_vector.resize(nAlignmentAtoms);
Align3D::setAlpha(rad_vector.data(), rad_vector.size(), res.alpha_vector);
// regular atom self overlap
@@ -346,18 +435,12 @@ ShapeInput PrepareConformer(const ROMol &mol, int confId, bool useColors) {
return res;
}
std::pair<double, double> AlignMolecule(const ShapeInput &refShape, ROMol &fit,
std::vector<float> &matrix,
int fitConfId, bool useColors,
double opt_param,
unsigned int max_preiters,
unsigned int max_postiters) {
PRECONDITION(matrix.size() == 12, "bad matrix size");
Align3D::setUseCutOff(true);
DEBUG_MSG("Fit details:");
auto fitShape = PrepareConformer(fit, fitConfId, useColors);
std::pair<double, double> AlignShape(const ShapeInput &refShape,
ShapeInput &fitShape,
std::vector<float> &matrix,
double opt_param,
unsigned int max_preiters,
unsigned int max_postiters) {
std::set<unsigned int> jointColorAtomTypeSet;
Align3D::getJointColorTypeSet(
refShape.atom_type_vector.data(), refShape.atom_type_vector.size(),
@@ -365,8 +448,11 @@ std::pair<double, double> AlignMolecule(const ShapeInput &refShape, ROMol &fit,
jointColorAtomTypeSet);
auto mapCp = refShape.colorAtomType2IndexVectorMap;
Align3D::restrictColorAtomType2IndexVectorMap(mapCp, jointColorAtomTypeSet);
Align3D::restrictColorAtomType2IndexVectorMap(
fitShape.colorAtomType2IndexVectorMap, jointColorAtomTypeSet);
// Take copy of the color atom mappings so as not to alter the input shape
// which might be re-used.
auto fitMapCp = fitShape.colorAtomType2IndexVectorMap;
Align3D::restrictColorAtomType2IndexVectorMap(fitMapCp,
jointColorAtomTypeSet);
DEBUG_MSG("Running alignment...");
double nbr_st = 0.0;
@@ -375,40 +461,74 @@ std::pair<double, double> AlignMolecule(const ShapeInput &refShape, ROMol &fit,
refShape.coord.data(), refShape.alpha_vector,
refShape.volumeAtomIndexVector, mapCp, refShape.sov, refShape.sof,
fitShape.coord.data(), fitShape.alpha_vector,
fitShape.volumeAtomIndexVector, fitShape.colorAtomType2IndexVectorMap,
fitShape.sov, fitShape.sof, !jointColorAtomTypeSet.empty(), max_preiters,
max_postiters, opt_param, matrix.data(), nbr_st, nbr_ct);
fitShape.volumeAtomIndexVector, fitMapCp, fitShape.sov, fitShape.sof,
!jointColorAtomTypeSet.empty(), max_preiters, max_postiters, opt_param,
matrix.data(), nbr_st, nbr_ct);
DEBUG_MSG("Done!");
DEBUG_MSG("nbr_st: " << nbr_st);
DEBUG_MSG("nbr_ct: " << nbr_ct);
// transform fit coords
Conformer &fit_conformer = fit.getConformer(fitConfId);
if (3 * fit.getNumAtoms() != fitShape.coord.size()) {
std::vector<float> transformed(fitShape.coord.size());
Align3D::VApplyRotTransMatrix(transformed.data(), fitShape.coord.data(),
fitShape.coord.size() / 3, matrix.data());
fitShape.coord = transformed;
return std::make_pair(nbr_st, nbr_ct);
}
void TransformConformer(const std::vector<double> &finalTrans,
const std::vector<float> &matrix, ShapeInput &fitShape,
Conformer &fitConf) {
const unsigned int nAtoms = fitConf.getOwningMol().getNumAtoms();
if (3 * nAtoms != fitShape.coord.size()) {
// Hs were removed
fitShape.coord.resize(3 * fit.getNumAtoms());
for (unsigned int i = 0; i < fit.getNumAtoms(); ++i) {
const auto &pos = fit_conformer.getAtomPos(i);
fitShape.coord.resize(3 * nAtoms);
for (unsigned int i = 0; i < nAtoms; ++i) {
const auto &pos = fitConf.getAtomPos(i);
fitShape.coord[i * 3] = pos.x + fitShape.shift[0];
fitShape.coord[i * 3 + 1] = pos.y + fitShape.shift[1];
fitShape.coord[i * 3 + 2] = pos.z + fitShape.shift[2];
}
}
std::vector<float> transformed(fit.getNumAtoms() * 3);
std::vector<float> transformed(nAtoms * 3);
Align3D::VApplyRotTransMatrix(transformed.data(), fitShape.coord.data(),
fit.getNumAtoms(), matrix.data());
fitConf.getOwningMol().getNumAtoms(),
matrix.data());
for (unsigned i = 0; i < fit.getNumAtoms(); ++i) {
RDGeom::Point3D &pos = fit_conformer.getAtomPos(i);
pos.x = transformed[i * 3];
pos.y = transformed[(i * 3) + 1];
pos.z = transformed[(i * 3) + 2];
for (unsigned i = 0; i < nAtoms; ++i) {
RDGeom::Point3D &pos = fitConf.getAtomPos(i);
pos.x = transformed[i * 3] - finalTrans[0];
pos.y = transformed[(i * 3) + 1] - finalTrans[1];
pos.z = transformed[(i * 3) + 2] - finalTrans[2];
}
fit.setProp("shape_align_shape_tanimoto", nbr_st);
fit.setProp("shape_align_color_tanimoto", nbr_ct);
}
return std::make_pair(nbr_st, nbr_ct);
std::pair<double, double> AlignMolecule(
const ShapeInput &refShape, ROMol &fit, std::vector<float> &matrix,
int fitConfId, bool useColors, double opt_param, unsigned int max_preiters,
unsigned int max_postiters, bool applyRefShift) {
PRECONDITION(matrix.size() == 12, "bad matrix size");
Align3D::setUseCutOff(true);
DEBUG_MSG("Fit details:");
ShapeInputOptions shapeOpts;
shapeOpts.useColors = useColors;
auto fitShape = PrepareConformer(fit, fitConfId, shapeOpts);
auto tanis = AlignShape(refShape, fitShape, matrix, opt_param, max_preiters,
max_postiters);
// transform fit coords
Conformer &fit_conformer = fit.getConformer(fitConfId);
std::vector<double> finalTrans{0.0, 0.0, 0.0};
if (applyRefShift) {
finalTrans = refShape.shift;
}
TransformConformer(finalTrans, matrix, fitShape, fit_conformer);
fit.setProp("shape_align_shape_tanimoto", tanis.first);
fit.setProp("shape_align_color_tanimoto", tanis.second);
return tanis;
}
std::pair<double, double> AlignMolecule(const ROMol &ref, ROMol &fit,
@@ -420,17 +540,11 @@ std::pair<double, double> AlignMolecule(const ROMol &ref, ROMol &fit,
Align3D::setUseCutOff(true);
DEBUG_MSG("Reference details:");
auto refShape = PrepareConformer(ref, refConfId, useColors);
ShapeInputOptions shapeOpts;
shapeOpts.useColors = useColors;
auto refShape = PrepareConformer(ref, refConfId, shapeOpts);
auto scores = AlignMolecule(refShape, fit, matrix, fitConfId, useColors,
opt_param, max_preiters, max_postiters);
// translate the fit conformer back to the steric center of the reference.
Conformer &fit_conformer = fit.getConformer(fitConfId);
for (unsigned i = 0; i < fit.getNumAtoms(); ++i) {
RDGeom::Point3D &pos = fit_conformer.getAtomPos(i);
pos.x -= refShape.shift[0];
pos.y -= refShape.shift[1];
pos.z -= refShape.shift[2];
}
opt_param, max_preiters, max_postiters, true);
return scores;
}

View File

@@ -68,17 +68,66 @@ struct RDKIT_PUBCHEMSHAPE_EXPORT ShapeInput {
double sof{0.0};
};
struct RDKIT_PUBCHEMSHAPE_EXPORT ShapeInputOptions {
bool useColors{true}; // Whether to use colors (pharmacophore features) in
// the score.
bool includeDummies{false}; // Hydrogen and dummy atoms are normally skipped.
// This forces the inclusion of dummy atoms.
double dummyRadius{2.16}; // This is the radius used for Xe.
std::vector<unsigned int> atomSubset; // If not empty, use just these atoms
// in the molecule to form the
// ShapeInput object.
std::vector<unsigned int> notColorAtoms; // Any atoms mentioned here by index
// should not be used in a color
// feature.
std::vector<std::pair<unsigned int, double>>
atomRadii; // Use these non-standard radii for these atoms.
// The int is for the atom index in the molecule, not
// the atomic number.
};
//! Prepare the input for the shape comparison
/*!
\param mol the molecule to prepare
\param confId (optional) the conformer to use
\param useColors (optional) whether to generate info about colors
\param shapeOpts (optional) Change the default behaviour.
\return a ShapeInput object
\return a ShapeInput object, translated to the origin
*/
RDKIT_PUBCHEMSHAPE_EXPORT ShapeInput PrepareConformer(const RDKit::ROMol &mol,
int confId = -1,
bool useColors = true);
RDKIT_PUBCHEMSHAPE_EXPORT ShapeInput
PrepareConformer(const RDKit::ROMol &mol, int confId = -1,
const ShapeInputOptions &shapeOpts = ShapeInputOptions());
//! Align a shape onto a reference shape. Assumes the shapes are both
//! centred on the origin.
/*!
\param refShape the reference shape
\param fit the shape to align
\param matrix the transformation matrix (populated on return)
\param opt_param (optional) the optimization parameter \param
max_preiters (optional) the max number of pre-optimization iterations \param
max_postiters (optional) the max number of post-optimization iterationsa
\return a pair of the shape Tanimoto value and the color Tanimoto value (zero
if useColors is false)
*/
RDKIT_PUBCHEMSHAPE_EXPORT std::pair<double, double> AlignShape(
const ShapeInput &refShape, ShapeInput &fitShape,
std::vector<float> &matrix, double opt_param = 1.0,
unsigned int max_preiters = 10u, unsigned int max_postiters = 30u);
//! Assuming that fitShape has been overlaid onto a reference shape to give
//! the ! transformation matrix, apply the same transformation to the given
//! conformer.
/*!
\param finalTrans the final translation to apply to the fitConf coords.
\param matrix the transformation matrix produced from alignment
\param fitShape the shape that was aligned
\param fitConf the conformation to be transformed
*/
RDKIT_PUBCHEMSHAPE_EXPORT void TransformConformer(
const std::vector<double> &finalTrans, const std::vector<float> &matrix,
ShapeInput &fitShape, RDKit::Conformer &fitConf);
//! Align a molecule to a reference shape
/*!
@@ -90,6 +139,8 @@ RDKIT_PUBCHEMSHAPE_EXPORT ShapeInput PrepareConformer(const RDKit::ROMol &mol,
\param opt_param (optional) the optimization parameter
\param max_preiters (optional) the max number of pre-optimization iterations
\param max_postiters (optional) the max number of post-optimization iterations
\param applyRefShift (optional) if true, apply the reference shape's shift
translation to the final coordinates
\return a pair of the shape Tanimoto value and the color Tanimoto value (zero
if useColors is false)
@@ -97,7 +148,8 @@ RDKIT_PUBCHEMSHAPE_EXPORT ShapeInput PrepareConformer(const RDKit::ROMol &mol,
RDKIT_PUBCHEMSHAPE_EXPORT std::pair<double, double> AlignMolecule(
const ShapeInput &refShape, RDKit::ROMol &fit, std::vector<float> &matrix,
int fitConfId = -1, bool useColors = true, double opt_param = 1.0,
unsigned int max_preiters = 10u, unsigned int max_postiters = 30u);
unsigned int max_preiters = 10u, unsigned int max_postiters = 30u,
bool applyRefShift = false);
//! Align a molecule to a reference molecule
/*!
@@ -105,11 +157,15 @@ RDKIT_PUBCHEMSHAPE_EXPORT std::pair<double, double> AlignMolecule(
\param fit the molecule to align
\param matrix the transformation matrix (populated on return)
\param refConfId (optional) the conformer to use for the reference
molecule \param fitConfId (optional) the conformer to use for the fit
molecule \param useColors (optional) whether or not to use colors in the
scoring \param opt_param (optional) the optimization parameter \param
max_preiters (optional) the max number of pre-optimization iterations \param
max_postiters (optional) the max number of post-optimization iterationsa
molecule
\param fitConfId (optional) the conformer to use for the fit
molecule
\param useColors (optional) whether or not to use colors in the
scoring
\param opt_param (optional) the optimization parameter
\param max_preiters (optional) the max number of pre-optimization iterations
\param max_postiters (optional) the max number of post-optimization
iterations
\return a pair of the shape Tanimoto value and the color Tanimoto value (zero
if useColors is false)

View File

@@ -45,15 +45,104 @@ python::tuple alignMol(const RDKit::ROMol &ref, RDKit::ROMol &probe,
}
python::tuple alignMol2(const ShapeInput &ref, RDKit::ROMol &probe,
int probeConfId, bool useColors, double opt_param,
unsigned int max_preiters, unsigned int max_postiters) {
unsigned int max_preiters, unsigned int max_postiters,
bool applyRefShift) {
std::vector<float> matrix(12, 0.0);
auto [nbr_st, nbr_ct] =
AlignMolecule(ref, probe, matrix, probeConfId, useColors, opt_param,
max_preiters, max_postiters);
max_preiters, max_postiters, applyRefShift);
return python::make_tuple(nbr_st, nbr_ct);
}
ShapeInput *prepConf(const RDKit::ROMol &mol, int confId, bool useColors) {
return new ShapeInput(PrepareConformer(mol, confId, useColors));
python::tuple alignShapes(const ShapeInput &refShape, ShapeInput &fitShape,
double opt_param, unsigned int max_preiters,
unsigned int max_postiters) {
std::vector<float> matrix(12, 0.0);
auto [nbr_st, nbr_ct] = AlignShape(refShape, fitShape, matrix, opt_param,
max_preiters, max_postiters);
python::list pyMatrix;
for (auto m : matrix) {
pyMatrix.append(m);
}
return python::make_tuple(nbr_st, nbr_ct, pyMatrix);
}
void transformConformer(const python::list &pyFinalTrans,
const python::list &pyMatrix, ShapeInput probeShape,
RDKit::Conformer &probeConf) {
std::vector<float> matrix;
pythonObjectToVect<float>(pyMatrix, matrix);
if (matrix.size() != 12) {
throw_value_error(
"The transformation matrix must have 12 values. It had " +
std::to_string(matrix.size()) + ".");
}
std::vector<double> finalTrans;
pythonObjectToVect<double>(pyFinalTrans, finalTrans);
if (finalTrans.size() != 3) {
throw_value_error(
"The final translation vector must have 3 values. It had " +
std::to_string(finalTrans.size()) + ".");
}
TransformConformer(finalTrans, matrix, probeShape, probeConf);
}
ShapeInput *prepConf(const RDKit::ROMol &mol, int confId,
const python::object &py_opts) {
ShapeInputOptions opts;
if (!py_opts.is_none()) {
opts = python::extract<ShapeInputOptions>(py_opts);
}
return new ShapeInput(PrepareConformer(mol, confId, opts));
}
void set_atomSubset(ShapeInputOptions &opts, const python::list &as) {
pythonObjectToVect<unsigned int>(as, opts.atomSubset);
}
python::list get_atomSubset(const ShapeInputOptions &opts) {
python::list py_list;
for (const auto &val : opts.atomSubset) {
py_list.append(val);
}
return py_list;
}
void set_notColorAtoms(ShapeInputOptions &opts, const python::list &nca) {
pythonObjectToVect<unsigned int>(nca, opts.notColorAtoms);
}
python::list get_notColorAtoms(const ShapeInputOptions &opts) {
python::list py_list;
for (const auto &val : opts.notColorAtoms) {
py_list.append(val);
}
return py_list;
}
void set_atomRadii(ShapeInputOptions &opts, const python::list &ar) {
int len = python::len(ar);
opts.atomRadii.resize(len);
for (int i = 0; i < len; i++) {
unsigned int atomIdx = python::extract<unsigned int>(ar[i][0]);
double radius = python::extract<double>(ar[i][1]);
opts.atomRadii[i] = std::make_pair(atomIdx, radius);
}
}
python::list get_atomRadii(const ShapeInputOptions &opts) {
python::list py_list;
for (const auto &val : opts.atomRadii) {
py_list.append(python::make_tuple(static_cast<int>(val.first), val.second));
}
return py_list;
}
void set_shapeShift(ShapeInput &shp, const python::list &s) {
pythonObjectToVect<double>(s, shp.shift);
}
python::list get_shapeShift(const ShapeInput &shp) {
python::list py_list;
for (const auto &val : shp.shift) {
py_list.append(val);
}
return py_list;
}
} // namespace helpers
@@ -62,6 +151,31 @@ void wrap_pubchemshape() {
RegisterVectorConverter<double>("DoubleVector");
RegisterVectorConverter<unsigned int>("UnsignedIntVector");
python::class_<ShapeInputOptions, boost::noncopyable>("ShapeInputOptions",
"Shape Input Options")
.def_readwrite(
"useColors", &ShapeInputOptions::useColors,
"Whether to use colors (pharmacophore features) in the score. Default=True.")
.def_readwrite(
"includeDummies", &ShapeInputOptions::includeDummies,
"Whether to use dummy atoms in the alignment. Default=False.")
.def_readwrite(
"dummyRadius", &ShapeInputOptions::dummyRadius,
"If using dummy atoms in the alignment, what radius to use for them."
" Default=2.16 (the radius of Xe).")
.add_property(
"atomSubset", &helpers::get_atomSubset, &helpers::set_atomSubset,
"If not empty, use just these atoms in the molecule to form the ShapeInput object.")
.add_property(
"notColorAtoms", &helpers::get_notColorAtoms,
&helpers::set_notColorAtoms,
"Any atoms mentioned here by index should not be used in a color feature.")
.add_property(
"atomRadii", &helpers::get_atomRadii, &helpers::set_atomRadii,
"Non-standard radii to use for the atoms specified by their indices"
" in the molecule. A list of tuples of [int, float].")
.def("__setattr__", &safeSetattr);
python::def(
"AlignMol", &helpers::alignMol,
(python::arg("ref"), python::arg("probe"), python::arg("refConfId") = -1,
@@ -98,13 +212,14 @@ Returns
(python::arg("refShape"), python::arg("probe"),
python::arg("probeConfId") = -1, python::arg("useColors") = true,
python::arg("opt_param") = 1.0, python::arg("max_preiters") = 10,
python::arg("max_postiters") = 30),
python::arg("max_postiters") = 30, python::arg("applyRefShift") = false),
R"DOC(Aligns a probe molecule to a reference shape. The probe is modified.
Assumes the shapes are both centred on the origin.
Parameters
----------
refShape : ShapeInput
Reference molecule
Reference shape
probe : RDKit.ROMol
Probe molecule
probeConfId : int, optional
@@ -114,6 +229,9 @@ useColors : bool, optional
optParam : float, optional
max_preiters : int, optional
max_postiters : int, optional
applyRefShift : bool, optional
If True, apply the reference shape's shift translation to the final
coordinates.
Returns
@@ -122,9 +240,54 @@ Returns
The results are (shape_score, color_score)
The color_score is zero if useColors is False)DOC");
python::def(
"AlignShapes", &helpers::alignShapes,
(python::arg("refShape"), python::arg("probeShape"),
python::arg("opt_param") = 1.0, python::arg("max_preiters") = 10,
python::arg("max_postiters") = 30),
R"DOC(Aligns a probe shape to a reference shape. The probe is modified.
Parameters
----------
refShape : ShapeInput
Reference shape
probeShape : ShapeInput
Probe shape
opt_param : float, optional
max_preiters : int, optional
max_postiters : int, optional
Returns
-------
3-tuple of double, double, list of doubles
The results are (shape_score, color_score, matrix)
The matrix is a 12-float list giving the transformation matrix that
overlays the probe onto the reference.)DOC");
python::def(
"TransformConformer", &helpers::transformConformer,
(python::arg("finalTrans"), python::arg("matrix"),
python::arg("probeShape"), python::arg("probeConformer")),
R"DOC(Assuming that probeShape has been overlaid onto refShape to give
the supplied transformation matrix, applies that transformation to the
given conformer.
Parameters
----------
refShape : ShapeInput
Reference shape
matrix: list[float * 12]
The transformation matrix
probeShape : ShapeInput
Probe shape
probeConformer : Conformer
Probe conformer
)DOC");
python::def("PrepareConformer", &helpers::prepConf,
(python::arg("mol"), python::arg("confId") = -1,
python::arg("useColors") = true),
python::arg("opts") = python::object()),
R"DOC(Generates a ShapeInput object for a molecule
Parameters
@@ -146,7 +309,8 @@ Returns
.def_readwrite("atom_type_vector", &ShapeInput::atom_type_vector)
.def_readwrite("volumeAtomIndexVector",
&ShapeInput::volumeAtomIndexVector)
.def_readwrite("shift", &ShapeInput::shift)
.add_property("shift", &helpers::get_shapeShift, &helpers::set_shapeShift,
"Translation of centre of shape coordinates to origin.")
.def_readwrite("sov", &ShapeInput::sov)
.def_readwrite("sof", &ShapeInput::sof);
}

View File

@@ -32,6 +32,49 @@ class TestCase(unittest.TestCase):
self.assertAlmostEqual(tpl[0], 0.773, places=3)
self.assertAlmostEqual(tpl[1], 0.303, places=3)
def test4_ShapeInputOptions(self):
opts = rdShapeAlign.ShapeInputOptions()
opts.useColors = False
shp = rdShapeAlign.PrepareConformer(self.ref, -1, opts)
tpl = rdShapeAlign.AlignMol(shp, self.probe, opt_param=0.5, max_preiters=3, max_postiters=16)
self.assertAlmostEqual(tpl[0], 0.773, places=3)
self.assertAlmostEqual(tpl[1], 0.0, places=3)
opts.atomSubset = [4, 5, 6, 7, 8, 9]
shp = rdShapeAlign.PrepareConformer(self.ref, -1, opts)
self.assertAlmostEqual(shp.sov, 251.946, places=3)
self.assertAlmostEqual(shp.sof, 0.0, places=3)
opts.atomRadii = [(4, 1.9)]
shp = rdShapeAlign.PrepareConformer(self.ref, -1, opts)
self.assertAlmostEqual(shp.sov, 274.576, places=3)
with self.assertRaises(AttributeError):
opts.rhubarb = True
def test5_ShapeShapeOverlay(self):
refShp = rdShapeAlign.PrepareConformer(self.ref, -1)
probeShp = rdShapeAlign.PrepareConformer(self.probe, -1)
tpl = rdShapeAlign.AlignShapes(refShp, probeShp)
probeCp = Chem.Mol(self.probe)
rdShapeAlign.TransformConformer(refShp.shift, tpl[2], probeShp, probeCp.GetConformer(-1))
# Just show it did something. The full test is in the C++ layer.
self.assertNotEqual(self.probe.GetConformer().GetAtomPosition(0),
probeCp.GetConformer().GetAtomPosition(0))
matrix = tpl[2][:10]
with self.assertRaises(ValueError):
rdShapeAlign.TransformConformer(refShp.shift, matrix, probeShp, probeCp.GetConformer(-1))
def test6_notColorAtoms(self):
m1 = Chem.MolFromSmiles("Nc1ccccc1 |(0.392086,-2.22477,0.190651;"
"0.232269,-1.38667,0.118385;-1.06274,-0.918982,0.0342466;"
"-1.26098,0.446053,-0.0811879;-0.244035,1.36265,-0.11691;"
"1.05134,0.875929,-0.031248;1.28797,-0.499563,0.0864097)"
",atomProp:0.dummyLabel.*|")
opts = rdShapeAlign.ShapeInputOptions()
opts.notColorAtoms = [0]
shp = rdShapeAlign.PrepareConformer(m1, -1, opts)
self.assertAlmostEqual(shp.sof, 5.074, places=3)
if __name__ == '__main__':
unittest.main()

View File

@@ -312,13 +312,13 @@ TEST_CASE("d2CutOff set") {
auto m1 =
"c1ccc(-c2ccccc2)cc1 |(-3.26053,-0.0841607,-0.741909;-2.93383,0.123873,0.593407;-1.60713,0.377277,0.917966;-0.644758,0.654885,-0.0378428;0.743308,0.219134,0.168663;1.82376,1.0395,-0.0112769;3.01462,0.695405,0.613858;3.18783,-0.589771,1.09649;2.15761,-1.50458,1.01949;0.988307,-1.1313,0.385783;-1.1048,0.797771,-1.34022;-2.39754,0.435801,-1.69921)|"_smiles;
REQUIRE(m1);
auto shape1 = PrepareConformer(*m1, -1, true);
auto shape1 = PrepareConformer(*m1, -1);
std::vector<float> matrix(12, 0.0);
auto m2 =
"c1ccc(-c2ccccc2)cc1 |(-3.26053,-0.0841607,-0.741909;-2.93383,0.123873,0.593407;-1.60713,0.377277,0.917966;-0.644758,0.654885,-0.0378428;0.743308,0.219134,0.168663;1.82376,1.0395,-0.0112769;3.01462,0.695405,0.613858;3.18783,-0.589771,1.09649;2.15761,-1.50458,1.01949;0.988307,-1.1313,0.385783;-1.1048,0.797771,-1.34022;-2.39754,0.435801,-1.69921)|"_smiles;
REQUIRE(m2);
AlignMolecule(*m2, *m2, matrix);
auto shape2 = PrepareConformer(*m1, -1, true);
auto shape2 = PrepareConformer(*m1, -1);
CHECK(shape1.sov == shape2.sov);
}
@@ -344,7 +344,7 @@ TEST_CASE("Overlay onto shape bug (Github8462)") {
CHECK_THAT((pos1 - pos2).length(), Catch::Matchers::WithinAbs(0.0, 0.005));
}
auto s1 = PrepareConformer(*m1, -1, true);
auto s1 = PrepareConformer(*m1, -1);
auto [st1, ct1] = AlignMolecule(s1, m3, matrix);
CHECK_THAT(st1, Catch::Matchers::WithinAbs(1.0, 0.005));
CHECK_THAT(ct1, Catch::Matchers::WithinAbs(1.0, 0.005));
@@ -354,4 +354,131 @@ TEST_CASE("Overlay onto shape bug (Github8462)") {
auto pos2 = m3.getConformer().getAtomPos(i);
CHECK_THAT((pos1 - pos2).length(), Catch::Matchers::WithinAbs(0.0, 0.005));
}
}
}
TEST_CASE("Shape subset") {
auto m1 =
"c1ccc(-c2ccccc2)cc1 |(-3.26053,-0.0841607,-0.741909;-2.93383,0.123873,0.593407;-1.60713,0.377277,0.917966;-0.644758,0.654885,-0.0378428;0.743308,0.219134,0.168663;1.82376,1.0395,-0.0112769;3.01462,0.695405,0.613858;3.18783,-0.589771,1.09649;2.15761,-1.50458,1.01949;0.988307,-1.1313,0.385783;-1.1048,0.797771,-1.34022;-2.39754,0.435801,-1.69921)|"_smiles;
REQUIRE(m1);
ShapeInputOptions shapeOpts;
shapeOpts.atomSubset = std::vector<unsigned int>{0, 1, 2, 3, 10, 11};
auto partShape = PrepareConformer(*m1, -1, shapeOpts);
CHECK(partShape.coord.size() == 21);
CHECK_THAT(partShape.sov, Catch::Matchers::WithinAbs(253.929, 0.005));
CHECK_THAT(partShape.sof, Catch::Matchers::WithinAbs(5.074, 0.005));
shapeOpts.atomSubset.clear();
auto wholeShape = PrepareConformer(*m1, -1, shapeOpts);
CHECK(wholeShape.coord.size() == 42);
CHECK_THAT(wholeShape.sov, Catch::Matchers::WithinAbs(542.04, 0.005));
CHECK_THAT(wholeShape.sof, Catch::Matchers::WithinAbs(10.148, 0.005));
}
TEST_CASE("Dummy radii") {
auto m1 =
"[Xe]c1ccccc1 |(0.392086,-2.22477,0.190651;0.232269,-1.38667,0.118385;-1.06274,-0.918982,0.0342466;-1.26098,0.446053,-0.0811879;-0.244035,1.36265,-0.11691;1.05134,0.875929,-0.031248;1.28797,-0.499563,0.0864097),atomProp:0.dummyLabel.*|"_smiles;
auto shape1 = PrepareConformer(*m1, -1);
CHECK(shape1.coord.size() == 24);
// The dummy radius defaults to 2.16, the same as Xe, so these shapes should
// come out the same.
auto m2 =
"*c1ccccc1 |(0.392086,-2.22477,0.190651;0.232269,-1.38667,0.118385;-1.06274,-0.918982,0.0342466;-1.26098,0.446053,-0.0811879;-0.244035,1.36265,-0.11691;1.05134,0.875929,-0.031248;1.28797,-0.499563,0.0864097),atomProp:0.dummyLabel.*|"_smiles;
ShapeInputOptions shapeOpts;
shapeOpts.includeDummies = true;
auto shape2 = PrepareConformer(*m2, -1, shapeOpts);
CHECK(shape2.coord.size() == 24);
CHECK(shape1.sov == shape2.sov);
shapeOpts.dummyRadius = 2.5;
auto shape3 = PrepareConformer(*m2, -1, shapeOpts);
CHECK_THAT(shape3.sov, Catch::Matchers::WithinAbs(427.925, 0.005));
shapeOpts.includeDummies = false;
auto shape4 = PrepareConformer(*m2, -1, shapeOpts);
CHECK(shape4.coord.size() == 21);
CHECK(shape4.sov < shape2.sov);
CHECK_THAT(shape4.sov, Catch::Matchers::WithinAbs(254.578, 0.005));
}
TEST_CASE("Non-standard radii") {
auto m1 =
"[Xe]c1ccccc1 |(0.392086,-2.22477,0.190651;0.232269,-1.38667,0.118385;-1.06274,-0.918982,0.0342466;-1.26098,0.446053,-0.0811879;-0.244035,1.36265,-0.11691;1.05134,0.875929,-0.031248;1.28797,-0.499563,0.0864097),atomProp:0.dummyLabel.*|"_smiles;
auto shape1 = PrepareConformer(*m1, -1);
CHECK(shape1.coord.size() == 24);
CHECK_THAT(shape1.sov, Catch::Matchers::WithinAbs(376.434, 0.005));
ShapeInputOptions shapeOpts;
// Benzene derivative with atom 4 with an N radius.
shapeOpts.atomRadii =
std::vector<std::pair<unsigned int, double>>{{0, 2.5}, {4, 1.55}};
auto shape2 = PrepareConformer(*m1, -1, shapeOpts);
CHECK_THAT(shape2.sov, Catch::Matchers::WithinAbs(412.666, 0.005));
// Corresponding pyridine derivative.
auto m2 =
"[Xe]c1ccncc1 |(0.392086,-2.22477,0.190651;0.232269,-1.38667,0.118385;-1.06274,-0.918982,0.0342466;-1.26098,0.446053,-0.0811879;-0.244035,1.36265,-0.11691;1.05134,0.875929,-0.031248;1.28797,-0.499563,0.0864097),atomProp:0.dummyLabel.*|"_smiles;
auto shape3 = PrepareConformer(*m2, -1, shapeOpts);
CHECK(shape3.sov == shape2.sov);
}
TEST_CASE("Shape-Shape alignment") {
std::string dirName = getenv("RDBASE");
dirName += "/External/pubchem_shape/test_data";
auto suppl = v2::FileParsers::SDMolSupplier(dirName + "/test1.sdf");
auto ref = suppl[0];
REQUIRE(ref);
auto probe = suppl[1];
REQUIRE(probe);
RWMol probeCP(*probe);
auto refShape = PrepareConformer(*ref, -1);
auto probeShape = PrepareConformer(*probe, -1);
std::vector<float> matrix(12, 0.0);
auto [mol_st, mol_ct] = AlignMolecule(*ref, *probe, matrix, -1, -1);
auto [shape_st, shape_ct] = AlignShape(refShape, probeShape, matrix);
// Check that the same results are achieved when overlaying the probe
// molecule onto the reference and the probe shape onto the reference shape
CHECK_THAT(shape_st, Catch::Matchers::WithinAbs(mol_st, 0.001));
CHECK_THAT(shape_ct, Catch::Matchers::WithinAbs(mol_ct, 0.001));
for (unsigned int i = 0; i < probe->getNumAtoms(); i++) {
const auto &pos = probe->getConformer().getAtomPos(i);
CHECK_THAT(pos.x, Catch::Matchers::WithinAbs(
probeShape.coord[3 * i] - refShape.shift[0], 0.001));
CHECK_THAT(pos.y,
Catch::Matchers::WithinAbs(
probeShape.coord[3 * i + 1] - refShape.shift[1], 0.001));
CHECK_THAT(pos.z,
Catch::Matchers::WithinAbs(
probeShape.coord[3 * i + 2] - refShape.shift[2], 0.001));
}
// Also check the TransformConformer function
TransformConformer(refShape.shift, matrix, probeShape,
probeCP.getConformer(-1));
for (unsigned int i = 0; i < probe->getNumAtoms(); i++) {
const auto &pos = probe->getConformer().getAtomPos(i);
const auto &poscp = probeCP.getConformer().getAtomPos(i);
CHECK_THAT(pos.x, Catch::Matchers::WithinAbs(poscp.x, 0.001));
CHECK_THAT(pos.y, Catch::Matchers::WithinAbs(poscp.y, 0.001));
CHECK_THAT(pos.z, Catch::Matchers::WithinAbs(poscp.z, 0.001));
}
}
TEST_CASE("Atoms excluded from Color features") {
auto m1 =
"Nc1ccccc1 |(0.392086,-2.22477,0.190651;0.232269,-1.38667,0.118385;-1.06274,-0.918982,0.0342466;-1.26098,0.446053,-0.0811879;-0.244035,1.36265,-0.11691;1.05134,0.875929,-0.031248;1.28797,-0.499563,0.0864097),atomProp:0.dummyLabel.*|"_smiles;
auto shape1 = PrepareConformer(*m1, -1);
// The aniline N comes out as a donor, acceptor and basic so gets 3
// features.
CHECK(shape1.coord.size() == 33);
ShapeInputOptions opts;
opts.notColorAtoms = std::vector<unsigned int>{0};
auto shape2 = PrepareConformer(*m1, -1, opts);
CHECK(shape2.coord.size() == 24);
}