diff --git a/External/pubchem_shape/PubChemShape.cpp b/External/pubchem_shape/PubChemShape.cpp index b800a23bc..b3b608d76 100644 --- a/External/pubchem_shape/PubChemShape.cpp +++ b/External/pubchem_shape/PubChemShape.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -25,8 +26,11 @@ using namespace RDKit; // Bondi radii // can find more of these in Table 12 of this publication: // https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3658832/ +// The dummy atom radius (atomic number 0) is set to +// 2.16 in ShapeInputOptions and may be varied there, as +// may all the other radii if required, including the +// addition of atoms not covered here. const std::map vdw_radii = { - {0, 1.10}, // dummy atom (value copied from H) {1, 1.10}, // H {2, 1.40}, // He {3, 1.81}, // Li @@ -159,21 +163,18 @@ std::vector> *getPh4Patterns() { return patterns.get(); } -// the conformer is translated to the origin -ShapeInput PrepareConformer(const ROMol &mol, int confId, bool useColors) { - Align3D::setUseCutOff(true); - - ShapeInput res; - +namespace { +std::vector, unsigned int>> extractFeatures( + const ROMol &mol, const ShapeInputOptions &shapeOpts) { // unpack features (PubChem-specific property from SDF) // NOTE: this unpacking assumes that RWMol-atom-index = SDF-atom-number - 1 // e.g. RWMol uses [0..N-1] and SDF uses [1..N], with atoms in the same // order + // If there are no PubChem features, falls back on RDKit pphore types. std::vector, unsigned int>> feature_idx_type; - - if (useColors) { + if (shapeOpts.useColors) { std::string features; if (mol.getPropIfPresent(pubchemFeatureName, features)) { // regular atoms have type 0; feature "atoms" (features represented by a @@ -246,8 +247,135 @@ ShapeInput PrepareConformer(const ROMol &mol, int confId, bool useColors) { } } } + return feature_idx_type; +} - // unpack atoms +bool atomInSubset(unsigned int atomIdx, const ShapeInputOptions &shapeOpts) { + if (shapeOpts.atomSubset.empty()) { + return true; + } + return std::ranges::find(shapeOpts.atomSubset, atomIdx) != + shapeOpts.atomSubset.end(); +} +bool atomAllowedInColor(unsigned int atomIdx, + const ShapeInputOptions &shapeOpts) { + return std::ranges::find(shapeOpts.notColorAtoms, atomIdx) == + shapeOpts.notColorAtoms.end(); +} + +double getAtomRadius(unsigned int atomIdx, const ShapeInputOptions &shapeOpts) { + auto it = std::ranges::find_if( + shapeOpts.atomRadii, + [atomIdx](const auto &p) -> bool { return p.first == atomIdx; }); + return it == shapeOpts.atomRadii.end() ? -1.0 : it->second; +} + +// Get the atom radii. rad_vector is expected to be big enough to hold them +// all. Also computes the average coordinates of the selected atoms. +void extractAtomRadii(const Conformer &conformer, unsigned int nAtoms, + const ShapeInputOptions &shapeOpts, RDGeom::Point3D &ave, + unsigned int &nSelectedAtoms, + std::vector &rad_vector) { + nSelectedAtoms = 0; + for (unsigned int i = 0u; i < nAtoms; ++i) { + if (!atomInSubset(i, shapeOpts)) { + continue; + } + double rad = getAtomRadius(i, shapeOpts); + if (rad > 0.0) { + rad_vector[nSelectedAtoms++] = rad; + ave += conformer.getAtomPos(i); + } else { + unsigned int Z = + conformer.getOwningMol().getAtomWithIdx(i)->getAtomicNum(); + if (Z > 1) { + ave += conformer.getAtomPos(i); + if (auto rad = vdw_radii.find(Z); rad != vdw_radii.end()) { + rad_vector[nSelectedAtoms++] = rad->second; + } else { + throw ValueErrorException("No VdW radius for atom with Z=" + + std::to_string(Z)); + } + } else if (shapeOpts.includeDummies && Z == 0) { + ave += conformer.getAtomPos(i); + rad_vector[nSelectedAtoms++] = shapeOpts.dummyRadius; + } + } + } + ave /= nSelectedAtoms; +} + +void extractAtomCoords(const Conformer &conformer, const unsigned int nAtoms, + const ShapeInputOptions &shapeOpts, + const RDGeom::Point3D &ave, std::vector &coords) { + for (unsigned i = 0, j = 0; i < nAtoms; ++i) { + if (!atomInSubset(i, shapeOpts)) { + continue; + } + // use only non-H for alignment, optionally with dummy atoms. + unsigned int Z = conformer.getOwningMol().getAtomWithIdx(i)->getAtomicNum(); + if (Z > 1 || (shapeOpts.includeDummies && Z == 0)) { + RDGeom::Point3D pos = conformer.getAtomPos(i); + pos -= ave; + coords[j * 3] = pos.x; + coords[(j * 3) + 1] = pos.y; + coords[(j * 3) + 2] = pos.z; + ++j; + } + } +} + +void extractFeatureCoords( + const Conformer &conformer, const unsigned int nAtoms, + const unsigned int nSelectedAtoms, + const std::vector, unsigned int>> + &feature_idx_type, + const ShapeInputOptions &shapeOpts, const RDGeom::Point3D &ave, + unsigned int &numFeatures, ShapeInput &res, + std::vector &rad_vector) { + // get feature coordinates - simply the average of coords of all atoms in the + // feature + for (unsigned i = 0; i < feature_idx_type.size(); ++i) { + RDGeom::Point3D floc; + unsigned int nSel = 0; + for (unsigned int j = 0; j < feature_idx_type[i].first.size(); ++j) { + unsigned int idx = feature_idx_type[i].first[j]; + if (!atomInSubset(idx, shapeOpts) || + !atomAllowedInColor(idx, shapeOpts)) { + continue; + } + if (idx >= nAtoms || + conformer.getOwningMol().getAtomWithIdx(idx)->getAtomicNum() <= 1) { + throw ValueErrorException("Invalid feature atom index"); + } + floc += conformer.getAtomPos(idx); + ++nSel; + } + if (nSel == feature_idx_type[i].first.size()) { + floc /= nSel; + floc -= ave; + DEBUG_MSG("feature type " << feature_idx_type[i].second << " (" << floc + << ")"); + + auto array_idx = nSelectedAtoms + numFeatures; + res.coord[array_idx * 3] = floc.x; + res.coord[(array_idx * 3) + 1] = floc.y; + res.coord[(array_idx * 3) + 2] = floc.z; + rad_vector[array_idx] = radius_color; + res.atom_type_vector[array_idx] = feature_idx_type[i].second; + ++numFeatures; + } + } +} + +} // namespace +// The conformer is left where it is, the shape is translated to the origin. +ShapeInput PrepareConformer(const ROMol &mol, int confId, + const ShapeInputOptions &shapeOpts) { + Align3D::setUseCutOff(true); + ShapeInput res; + + auto feature_idx_type = extractFeatures(mol, shapeOpts); auto &conformer = mol.getConformer(confId); if (!conformer.is3D()) { @@ -256,72 +384,33 @@ ShapeInput PrepareConformer(const ROMol &mol, int confId, bool useColors) { unsigned int nAtoms = mol.getNumAtoms(); // DEBUG_MSG("num atoms: " << nAtoms); - unsigned int nHeavyAtoms = mol.getNumHeavyAtoms(); - DEBUG_MSG("num heavy atoms: " << nHeavyAtoms); - - unsigned int nAlignmentAtoms = nHeavyAtoms + feature_idx_type.size(); - + // Start with the arrays as large as they will possibly have to be. + // They will be re-sized later. + unsigned int nAlignmentAtoms = nAtoms + feature_idx_type.size(); std::vector rad_vector(nAlignmentAtoms); res.atom_type_vector.resize(nAlignmentAtoms, 0); RDGeom::Point3D ave; - for (unsigned i = 0, activeAtomIdx = 0; i < nAtoms; ++i) { - unsigned int Z = mol.getAtomWithIdx(i)->getAtomicNum(); - if (Z > 1) { - ave += conformer.getAtomPos(i); - - if (auto rad = vdw_radii.find(Z); rad != vdw_radii.end()) { - rad_vector[activeAtomIdx++] = rad->second; - } else { - throw ValueErrorException("No VdW radius for atom with Z=" + - std::to_string(Z)); - } - } - } + unsigned int nSelectedAtoms = 0; + extractAtomRadii(conformer, nAtoms, shapeOpts, ave, nSelectedAtoms, + rad_vector); // translate steric center to origin - ave /= nHeavyAtoms; DEBUG_MSG("steric center: (" << ave << ")"); res.shift = {-ave.x, -ave.y, -ave.z}; res.coord.resize(nAlignmentAtoms * 3); - for (unsigned i = 0, j = 0; i < nAtoms; ++i) { - // use only non-H for alignment - if (mol.getAtomWithIdx(i)->getAtomicNum() > 1) { - RDGeom::Point3D pos = conformer.getAtomPos(i); - pos -= ave; - - res.coord[j * 3] = pos.x; - res.coord[(j * 3) + 1] = pos.y; - res.coord[(j * 3) + 2] = pos.z; - ++j; - } - } - - // get feature coordinates - simply the average of coords of all atoms in the - // feature - for (unsigned i = 0; i < feature_idx_type.size(); ++i) { - RDGeom::Point3D floc; - for (unsigned int j = 0; j < feature_idx_type[i].first.size(); ++j) { - unsigned int idx = feature_idx_type[i].first[j]; - if (idx >= nAtoms || mol.getAtomWithIdx(idx)->getAtomicNum() <= 1) { - throw ValueErrorException("Invalid feature atom index"); - } - floc += conformer.getAtomPos(idx); - } - floc /= feature_idx_type[i].first.size(); - floc -= ave; - DEBUG_MSG("feature type " << feature_idx_type[i].second << " (" << floc - << ")"); - - auto array_idx = nHeavyAtoms + i; - res.coord[array_idx * 3] = floc.x; - res.coord[(array_idx * 3) + 1] = floc.y; - res.coord[(array_idx * 3) + 2] = floc.z; - rad_vector[array_idx] = radius_color; - res.atom_type_vector[array_idx] = feature_idx_type[i].second; - } + extractAtomCoords(conformer, nAtoms, shapeOpts, ave, res.coord); + unsigned int numFeatures = 0; + extractFeatureCoords(conformer, nAtoms, nSelectedAtoms, feature_idx_type, + shapeOpts, ave, numFeatures, res, rad_vector); + // Now cut the final vectors down to the actual number of atoms and + // features used. + nAlignmentAtoms = nSelectedAtoms + numFeatures; + res.coord.resize(nAlignmentAtoms * 3); + rad_vector.resize(nAlignmentAtoms); + res.atom_type_vector.resize(nAlignmentAtoms); Align3D::setAlpha(rad_vector.data(), rad_vector.size(), res.alpha_vector); // regular atom self overlap @@ -346,18 +435,12 @@ ShapeInput PrepareConformer(const ROMol &mol, int confId, bool useColors) { return res; } -std::pair AlignMolecule(const ShapeInput &refShape, ROMol &fit, - std::vector &matrix, - int fitConfId, bool useColors, - double opt_param, - unsigned int max_preiters, - unsigned int max_postiters) { - PRECONDITION(matrix.size() == 12, "bad matrix size"); - Align3D::setUseCutOff(true); - - DEBUG_MSG("Fit details:"); - auto fitShape = PrepareConformer(fit, fitConfId, useColors); - +std::pair AlignShape(const ShapeInput &refShape, + ShapeInput &fitShape, + std::vector &matrix, + double opt_param, + unsigned int max_preiters, + unsigned int max_postiters) { std::set jointColorAtomTypeSet; Align3D::getJointColorTypeSet( refShape.atom_type_vector.data(), refShape.atom_type_vector.size(), @@ -365,8 +448,11 @@ std::pair AlignMolecule(const ShapeInput &refShape, ROMol &fit, jointColorAtomTypeSet); auto mapCp = refShape.colorAtomType2IndexVectorMap; Align3D::restrictColorAtomType2IndexVectorMap(mapCp, jointColorAtomTypeSet); - Align3D::restrictColorAtomType2IndexVectorMap( - fitShape.colorAtomType2IndexVectorMap, jointColorAtomTypeSet); + // Take copy of the color atom mappings so as not to alter the input shape + // which might be re-used. + auto fitMapCp = fitShape.colorAtomType2IndexVectorMap; + Align3D::restrictColorAtomType2IndexVectorMap(fitMapCp, + jointColorAtomTypeSet); DEBUG_MSG("Running alignment..."); double nbr_st = 0.0; @@ -375,40 +461,74 @@ std::pair AlignMolecule(const ShapeInput &refShape, ROMol &fit, refShape.coord.data(), refShape.alpha_vector, refShape.volumeAtomIndexVector, mapCp, refShape.sov, refShape.sof, fitShape.coord.data(), fitShape.alpha_vector, - fitShape.volumeAtomIndexVector, fitShape.colorAtomType2IndexVectorMap, - fitShape.sov, fitShape.sof, !jointColorAtomTypeSet.empty(), max_preiters, - max_postiters, opt_param, matrix.data(), nbr_st, nbr_ct); + fitShape.volumeAtomIndexVector, fitMapCp, fitShape.sov, fitShape.sof, + !jointColorAtomTypeSet.empty(), max_preiters, max_postiters, opt_param, + matrix.data(), nbr_st, nbr_ct); DEBUG_MSG("Done!"); DEBUG_MSG("nbr_st: " << nbr_st); DEBUG_MSG("nbr_ct: " << nbr_ct); - // transform fit coords - Conformer &fit_conformer = fit.getConformer(fitConfId); - if (3 * fit.getNumAtoms() != fitShape.coord.size()) { + std::vector transformed(fitShape.coord.size()); + Align3D::VApplyRotTransMatrix(transformed.data(), fitShape.coord.data(), + fitShape.coord.size() / 3, matrix.data()); + fitShape.coord = transformed; + return std::make_pair(nbr_st, nbr_ct); +} + +void TransformConformer(const std::vector &finalTrans, + const std::vector &matrix, ShapeInput &fitShape, + Conformer &fitConf) { + const unsigned int nAtoms = fitConf.getOwningMol().getNumAtoms(); + if (3 * nAtoms != fitShape.coord.size()) { // Hs were removed - fitShape.coord.resize(3 * fit.getNumAtoms()); - for (unsigned int i = 0; i < fit.getNumAtoms(); ++i) { - const auto &pos = fit_conformer.getAtomPos(i); + fitShape.coord.resize(3 * nAtoms); + for (unsigned int i = 0; i < nAtoms; ++i) { + const auto &pos = fitConf.getAtomPos(i); fitShape.coord[i * 3] = pos.x + fitShape.shift[0]; fitShape.coord[i * 3 + 1] = pos.y + fitShape.shift[1]; fitShape.coord[i * 3 + 2] = pos.z + fitShape.shift[2]; } } - std::vector transformed(fit.getNumAtoms() * 3); + + std::vector transformed(nAtoms * 3); Align3D::VApplyRotTransMatrix(transformed.data(), fitShape.coord.data(), - fit.getNumAtoms(), matrix.data()); + fitConf.getOwningMol().getNumAtoms(), + matrix.data()); - for (unsigned i = 0; i < fit.getNumAtoms(); ++i) { - RDGeom::Point3D &pos = fit_conformer.getAtomPos(i); - pos.x = transformed[i * 3]; - pos.y = transformed[(i * 3) + 1]; - pos.z = transformed[(i * 3) + 2]; + for (unsigned i = 0; i < nAtoms; ++i) { + RDGeom::Point3D &pos = fitConf.getAtomPos(i); + pos.x = transformed[i * 3] - finalTrans[0]; + pos.y = transformed[(i * 3) + 1] - finalTrans[1]; + pos.z = transformed[(i * 3) + 2] - finalTrans[2]; } - fit.setProp("shape_align_shape_tanimoto", nbr_st); - fit.setProp("shape_align_color_tanimoto", nbr_ct); +} - return std::make_pair(nbr_st, nbr_ct); +std::pair AlignMolecule( + const ShapeInput &refShape, ROMol &fit, std::vector &matrix, + int fitConfId, bool useColors, double opt_param, unsigned int max_preiters, + unsigned int max_postiters, bool applyRefShift) { + PRECONDITION(matrix.size() == 12, "bad matrix size"); + Align3D::setUseCutOff(true); + + DEBUG_MSG("Fit details:"); + ShapeInputOptions shapeOpts; + shapeOpts.useColors = useColors; + auto fitShape = PrepareConformer(fit, fitConfId, shapeOpts); + auto tanis = AlignShape(refShape, fitShape, matrix, opt_param, max_preiters, + max_postiters); + + // transform fit coords + Conformer &fit_conformer = fit.getConformer(fitConfId); + std::vector finalTrans{0.0, 0.0, 0.0}; + if (applyRefShift) { + finalTrans = refShape.shift; + } + TransformConformer(finalTrans, matrix, fitShape, fit_conformer); + fit.setProp("shape_align_shape_tanimoto", tanis.first); + fit.setProp("shape_align_color_tanimoto", tanis.second); + + return tanis; } std::pair AlignMolecule(const ROMol &ref, ROMol &fit, @@ -420,17 +540,11 @@ std::pair AlignMolecule(const ROMol &ref, ROMol &fit, Align3D::setUseCutOff(true); DEBUG_MSG("Reference details:"); - auto refShape = PrepareConformer(ref, refConfId, useColors); + ShapeInputOptions shapeOpts; + shapeOpts.useColors = useColors; + auto refShape = PrepareConformer(ref, refConfId, shapeOpts); auto scores = AlignMolecule(refShape, fit, matrix, fitConfId, useColors, - opt_param, max_preiters, max_postiters); - // translate the fit conformer back to the steric center of the reference. - Conformer &fit_conformer = fit.getConformer(fitConfId); - for (unsigned i = 0; i < fit.getNumAtoms(); ++i) { - RDGeom::Point3D &pos = fit_conformer.getAtomPos(i); - pos.x -= refShape.shift[0]; - pos.y -= refShape.shift[1]; - pos.z -= refShape.shift[2]; - } + opt_param, max_preiters, max_postiters, true); return scores; } diff --git a/External/pubchem_shape/PubChemShape.hpp b/External/pubchem_shape/PubChemShape.hpp index 19090167b..8d7717e61 100644 --- a/External/pubchem_shape/PubChemShape.hpp +++ b/External/pubchem_shape/PubChemShape.hpp @@ -68,17 +68,66 @@ struct RDKIT_PUBCHEMSHAPE_EXPORT ShapeInput { double sof{0.0}; }; +struct RDKIT_PUBCHEMSHAPE_EXPORT ShapeInputOptions { + bool useColors{true}; // Whether to use colors (pharmacophore features) in + // the score. + bool includeDummies{false}; // Hydrogen and dummy atoms are normally skipped. + // This forces the inclusion of dummy atoms. + double dummyRadius{2.16}; // This is the radius used for Xe. + std::vector atomSubset; // If not empty, use just these atoms + // in the molecule to form the + // ShapeInput object. + std::vector notColorAtoms; // Any atoms mentioned here by index + // should not be used in a color + // feature. + std::vector> + atomRadii; // Use these non-standard radii for these atoms. + // The int is for the atom index in the molecule, not + // the atomic number. +}; + //! Prepare the input for the shape comparison /*! \param mol the molecule to prepare \param confId (optional) the conformer to use - \param useColors (optional) whether to generate info about colors + \param shapeOpts (optional) Change the default behaviour. - \return a ShapeInput object + \return a ShapeInput object, translated to the origin */ -RDKIT_PUBCHEMSHAPE_EXPORT ShapeInput PrepareConformer(const RDKit::ROMol &mol, - int confId = -1, - bool useColors = true); +RDKIT_PUBCHEMSHAPE_EXPORT ShapeInput +PrepareConformer(const RDKit::ROMol &mol, int confId = -1, + const ShapeInputOptions &shapeOpts = ShapeInputOptions()); + +//! Align a shape onto a reference shape. Assumes the shapes are both +//! centred on the origin. +/*! + \param refShape the reference shape + \param fit the shape to align + \param matrix the transformation matrix (populated on return) + \param opt_param (optional) the optimization parameter \param + max_preiters (optional) the max number of pre-optimization iterations \param + max_postiters (optional) the max number of post-optimization iterationsa + + \return a pair of the shape Tanimoto value and the color Tanimoto value (zero + if useColors is false) +*/ +RDKIT_PUBCHEMSHAPE_EXPORT std::pair AlignShape( + const ShapeInput &refShape, ShapeInput &fitShape, + std::vector &matrix, double opt_param = 1.0, + unsigned int max_preiters = 10u, unsigned int max_postiters = 30u); + +//! Assuming that fitShape has been overlaid onto a reference shape to give +//! the ! transformation matrix, apply the same transformation to the given +//! conformer. +/*! + \param finalTrans the final translation to apply to the fitConf coords. + \param matrix the transformation matrix produced from alignment + \param fitShape the shape that was aligned + \param fitConf the conformation to be transformed +*/ +RDKIT_PUBCHEMSHAPE_EXPORT void TransformConformer( + const std::vector &finalTrans, const std::vector &matrix, + ShapeInput &fitShape, RDKit::Conformer &fitConf); //! Align a molecule to a reference shape /*! @@ -90,6 +139,8 @@ RDKIT_PUBCHEMSHAPE_EXPORT ShapeInput PrepareConformer(const RDKit::ROMol &mol, \param opt_param (optional) the optimization parameter \param max_preiters (optional) the max number of pre-optimization iterations \param max_postiters (optional) the max number of post-optimization iterations + \param applyRefShift (optional) if true, apply the reference shape's shift + translation to the final coordinates \return a pair of the shape Tanimoto value and the color Tanimoto value (zero if useColors is false) @@ -97,7 +148,8 @@ RDKIT_PUBCHEMSHAPE_EXPORT ShapeInput PrepareConformer(const RDKit::ROMol &mol, RDKIT_PUBCHEMSHAPE_EXPORT std::pair AlignMolecule( const ShapeInput &refShape, RDKit::ROMol &fit, std::vector &matrix, int fitConfId = -1, bool useColors = true, double opt_param = 1.0, - unsigned int max_preiters = 10u, unsigned int max_postiters = 30u); + unsigned int max_preiters = 10u, unsigned int max_postiters = 30u, + bool applyRefShift = false); //! Align a molecule to a reference molecule /*! @@ -105,11 +157,15 @@ RDKIT_PUBCHEMSHAPE_EXPORT std::pair AlignMolecule( \param fit the molecule to align \param matrix the transformation matrix (populated on return) \param refConfId (optional) the conformer to use for the reference - molecule \param fitConfId (optional) the conformer to use for the fit - molecule \param useColors (optional) whether or not to use colors in the - scoring \param opt_param (optional) the optimization parameter \param - max_preiters (optional) the max number of pre-optimization iterations \param - max_postiters (optional) the max number of post-optimization iterationsa + molecule + \param fitConfId (optional) the conformer to use for the fit + molecule + \param useColors (optional) whether or not to use colors in the + scoring + \param opt_param (optional) the optimization parameter + \param max_preiters (optional) the max number of pre-optimization iterations + \param max_postiters (optional) the max number of post-optimization + iterations \return a pair of the shape Tanimoto value and the color Tanimoto value (zero if useColors is false) diff --git a/External/pubchem_shape/Wrap/cshapealign.cpp b/External/pubchem_shape/Wrap/cshapealign.cpp index 154122f9d..e2b1ea4a9 100644 --- a/External/pubchem_shape/Wrap/cshapealign.cpp +++ b/External/pubchem_shape/Wrap/cshapealign.cpp @@ -45,15 +45,104 @@ python::tuple alignMol(const RDKit::ROMol &ref, RDKit::ROMol &probe, } python::tuple alignMol2(const ShapeInput &ref, RDKit::ROMol &probe, int probeConfId, bool useColors, double opt_param, - unsigned int max_preiters, unsigned int max_postiters) { + unsigned int max_preiters, unsigned int max_postiters, + bool applyRefShift) { std::vector matrix(12, 0.0); auto [nbr_st, nbr_ct] = AlignMolecule(ref, probe, matrix, probeConfId, useColors, opt_param, - max_preiters, max_postiters); + max_preiters, max_postiters, applyRefShift); return python::make_tuple(nbr_st, nbr_ct); } -ShapeInput *prepConf(const RDKit::ROMol &mol, int confId, bool useColors) { - return new ShapeInput(PrepareConformer(mol, confId, useColors)); +python::tuple alignShapes(const ShapeInput &refShape, ShapeInput &fitShape, + double opt_param, unsigned int max_preiters, + unsigned int max_postiters) { + std::vector matrix(12, 0.0); + auto [nbr_st, nbr_ct] = AlignShape(refShape, fitShape, matrix, opt_param, + max_preiters, max_postiters); + python::list pyMatrix; + for (auto m : matrix) { + pyMatrix.append(m); + } + return python::make_tuple(nbr_st, nbr_ct, pyMatrix); +} +void transformConformer(const python::list &pyFinalTrans, + const python::list &pyMatrix, ShapeInput probeShape, + RDKit::Conformer &probeConf) { + std::vector matrix; + pythonObjectToVect(pyMatrix, matrix); + if (matrix.size() != 12) { + throw_value_error( + "The transformation matrix must have 12 values. It had " + + std::to_string(matrix.size()) + "."); + } + std::vector finalTrans; + pythonObjectToVect(pyFinalTrans, finalTrans); + if (finalTrans.size() != 3) { + throw_value_error( + "The final translation vector must have 3 values. It had " + + std::to_string(finalTrans.size()) + "."); + } + TransformConformer(finalTrans, matrix, probeShape, probeConf); +} +ShapeInput *prepConf(const RDKit::ROMol &mol, int confId, + const python::object &py_opts) { + ShapeInputOptions opts; + if (!py_opts.is_none()) { + opts = python::extract(py_opts); + } + return new ShapeInput(PrepareConformer(mol, confId, opts)); +} +void set_atomSubset(ShapeInputOptions &opts, const python::list &as) { + pythonObjectToVect(as, opts.atomSubset); +} + +python::list get_atomSubset(const ShapeInputOptions &opts) { + python::list py_list; + for (const auto &val : opts.atomSubset) { + py_list.append(val); + } + return py_list; +} + +void set_notColorAtoms(ShapeInputOptions &opts, const python::list &nca) { + pythonObjectToVect(nca, opts.notColorAtoms); +} + +python::list get_notColorAtoms(const ShapeInputOptions &opts) { + python::list py_list; + for (const auto &val : opts.notColorAtoms) { + py_list.append(val); + } + return py_list; +} + +void set_atomRadii(ShapeInputOptions &opts, const python::list &ar) { + int len = python::len(ar); + opts.atomRadii.resize(len); + for (int i = 0; i < len; i++) { + unsigned int atomIdx = python::extract(ar[i][0]); + double radius = python::extract(ar[i][1]); + opts.atomRadii[i] = std::make_pair(atomIdx, radius); + } +} + +python::list get_atomRadii(const ShapeInputOptions &opts) { + python::list py_list; + for (const auto &val : opts.atomRadii) { + py_list.append(python::make_tuple(static_cast(val.first), val.second)); + } + return py_list; +} + +void set_shapeShift(ShapeInput &shp, const python::list &s) { + pythonObjectToVect(s, shp.shift); +} +python::list get_shapeShift(const ShapeInput &shp) { + python::list py_list; + for (const auto &val : shp.shift) { + py_list.append(val); + } + return py_list; } } // namespace helpers @@ -62,6 +151,31 @@ void wrap_pubchemshape() { RegisterVectorConverter("DoubleVector"); RegisterVectorConverter("UnsignedIntVector"); + python::class_("ShapeInputOptions", + "Shape Input Options") + .def_readwrite( + "useColors", &ShapeInputOptions::useColors, + "Whether to use colors (pharmacophore features) in the score. Default=True.") + .def_readwrite( + "includeDummies", &ShapeInputOptions::includeDummies, + "Whether to use dummy atoms in the alignment. Default=False.") + .def_readwrite( + "dummyRadius", &ShapeInputOptions::dummyRadius, + "If using dummy atoms in the alignment, what radius to use for them." + " Default=2.16 (the radius of Xe).") + .add_property( + "atomSubset", &helpers::get_atomSubset, &helpers::set_atomSubset, + "If not empty, use just these atoms in the molecule to form the ShapeInput object.") + .add_property( + "notColorAtoms", &helpers::get_notColorAtoms, + &helpers::set_notColorAtoms, + "Any atoms mentioned here by index should not be used in a color feature.") + .add_property( + "atomRadii", &helpers::get_atomRadii, &helpers::set_atomRadii, + "Non-standard radii to use for the atoms specified by their indices" + " in the molecule. A list of tuples of [int, float].") + .def("__setattr__", &safeSetattr); + python::def( "AlignMol", &helpers::alignMol, (python::arg("ref"), python::arg("probe"), python::arg("refConfId") = -1, @@ -98,13 +212,14 @@ Returns (python::arg("refShape"), python::arg("probe"), python::arg("probeConfId") = -1, python::arg("useColors") = true, python::arg("opt_param") = 1.0, python::arg("max_preiters") = 10, - python::arg("max_postiters") = 30), + python::arg("max_postiters") = 30, python::arg("applyRefShift") = false), R"DOC(Aligns a probe molecule to a reference shape. The probe is modified. +Assumes the shapes are both centred on the origin. Parameters ---------- refShape : ShapeInput - Reference molecule + Reference shape probe : RDKit.ROMol Probe molecule probeConfId : int, optional @@ -114,6 +229,9 @@ useColors : bool, optional optParam : float, optional max_preiters : int, optional max_postiters : int, optional +applyRefShift : bool, optional + If True, apply the reference shape's shift translation to the final + coordinates. Returns @@ -122,9 +240,54 @@ Returns The results are (shape_score, color_score) The color_score is zero if useColors is False)DOC"); + python::def( + "AlignShapes", &helpers::alignShapes, + (python::arg("refShape"), python::arg("probeShape"), + python::arg("opt_param") = 1.0, python::arg("max_preiters") = 10, + python::arg("max_postiters") = 30), + R"DOC(Aligns a probe shape to a reference shape. The probe is modified. + +Parameters +---------- +refShape : ShapeInput + Reference shape +probeShape : ShapeInput + Probe shape +opt_param : float, optional +max_preiters : int, optional +max_postiters : int, optional + + +Returns +------- + 3-tuple of double, double, list of doubles + The results are (shape_score, color_score, matrix) + The matrix is a 12-float list giving the transformation matrix that + overlays the probe onto the reference.)DOC"); + + python::def( + "TransformConformer", &helpers::transformConformer, + (python::arg("finalTrans"), python::arg("matrix"), + python::arg("probeShape"), python::arg("probeConformer")), + R"DOC(Assuming that probeShape has been overlaid onto refShape to give +the supplied transformation matrix, applies that transformation to the + given conformer. + +Parameters +---------- +refShape : ShapeInput + Reference shape +matrix: list[float * 12] + The transformation matrix +probeShape : ShapeInput + Probe shape +probeConformer : Conformer + Probe conformer +)DOC"); + python::def("PrepareConformer", &helpers::prepConf, (python::arg("mol"), python::arg("confId") = -1, - python::arg("useColors") = true), + python::arg("opts") = python::object()), R"DOC(Generates a ShapeInput object for a molecule Parameters @@ -146,7 +309,8 @@ Returns .def_readwrite("atom_type_vector", &ShapeInput::atom_type_vector) .def_readwrite("volumeAtomIndexVector", &ShapeInput::volumeAtomIndexVector) - .def_readwrite("shift", &ShapeInput::shift) + .add_property("shift", &helpers::get_shapeShift, &helpers::set_shapeShift, + "Translation of centre of shape coordinates to origin.") .def_readwrite("sov", &ShapeInput::sov) .def_readwrite("sof", &ShapeInput::sof); } diff --git a/External/pubchem_shape/Wrap/test_rdshapealign.py b/External/pubchem_shape/Wrap/test_rdshapealign.py index 9996fd374..0e1137ead 100644 --- a/External/pubchem_shape/Wrap/test_rdshapealign.py +++ b/External/pubchem_shape/Wrap/test_rdshapealign.py @@ -32,6 +32,49 @@ class TestCase(unittest.TestCase): self.assertAlmostEqual(tpl[0], 0.773, places=3) self.assertAlmostEqual(tpl[1], 0.303, places=3) + def test4_ShapeInputOptions(self): + opts = rdShapeAlign.ShapeInputOptions() + opts.useColors = False + shp = rdShapeAlign.PrepareConformer(self.ref, -1, opts) + tpl = rdShapeAlign.AlignMol(shp, self.probe, opt_param=0.5, max_preiters=3, max_postiters=16) + self.assertAlmostEqual(tpl[0], 0.773, places=3) + self.assertAlmostEqual(tpl[1], 0.0, places=3) + + opts.atomSubset = [4, 5, 6, 7, 8, 9] + shp = rdShapeAlign.PrepareConformer(self.ref, -1, opts) + self.assertAlmostEqual(shp.sov, 251.946, places=3) + self.assertAlmostEqual(shp.sof, 0.0, places=3) + + opts.atomRadii = [(4, 1.9)] + shp = rdShapeAlign.PrepareConformer(self.ref, -1, opts) + self.assertAlmostEqual(shp.sov, 274.576, places=3) + + with self.assertRaises(AttributeError): + opts.rhubarb = True + + def test5_ShapeShapeOverlay(self): + refShp = rdShapeAlign.PrepareConformer(self.ref, -1) + probeShp = rdShapeAlign.PrepareConformer(self.probe, -1) + tpl = rdShapeAlign.AlignShapes(refShp, probeShp) + probeCp = Chem.Mol(self.probe) + rdShapeAlign.TransformConformer(refShp.shift, tpl[2], probeShp, probeCp.GetConformer(-1)) + # Just show it did something. The full test is in the C++ layer. + self.assertNotEqual(self.probe.GetConformer().GetAtomPosition(0), + probeCp.GetConformer().GetAtomPosition(0)) + matrix = tpl[2][:10] + with self.assertRaises(ValueError): + rdShapeAlign.TransformConformer(refShp.shift, matrix, probeShp, probeCp.GetConformer(-1)) + + def test6_notColorAtoms(self): + m1 = Chem.MolFromSmiles("Nc1ccccc1 |(0.392086,-2.22477,0.190651;" + "0.232269,-1.38667,0.118385;-1.06274,-0.918982,0.0342466;" + "-1.26098,0.446053,-0.0811879;-0.244035,1.36265,-0.11691;" + "1.05134,0.875929,-0.031248;1.28797,-0.499563,0.0864097)" + ",atomProp:0.dummyLabel.*|") + opts = rdShapeAlign.ShapeInputOptions() + opts.notColorAtoms = [0] + shp = rdShapeAlign.PrepareConformer(m1, -1, opts) + self.assertAlmostEqual(shp.sof, 5.074, places=3) if __name__ == '__main__': unittest.main() diff --git a/External/pubchem_shape/test.cpp b/External/pubchem_shape/test.cpp index 5bb5d7f84..8ce34c441 100644 --- a/External/pubchem_shape/test.cpp +++ b/External/pubchem_shape/test.cpp @@ -312,13 +312,13 @@ TEST_CASE("d2CutOff set") { auto m1 = "c1ccc(-c2ccccc2)cc1 |(-3.26053,-0.0841607,-0.741909;-2.93383,0.123873,0.593407;-1.60713,0.377277,0.917966;-0.644758,0.654885,-0.0378428;0.743308,0.219134,0.168663;1.82376,1.0395,-0.0112769;3.01462,0.695405,0.613858;3.18783,-0.589771,1.09649;2.15761,-1.50458,1.01949;0.988307,-1.1313,0.385783;-1.1048,0.797771,-1.34022;-2.39754,0.435801,-1.69921)|"_smiles; REQUIRE(m1); - auto shape1 = PrepareConformer(*m1, -1, true); + auto shape1 = PrepareConformer(*m1, -1); std::vector matrix(12, 0.0); auto m2 = "c1ccc(-c2ccccc2)cc1 |(-3.26053,-0.0841607,-0.741909;-2.93383,0.123873,0.593407;-1.60713,0.377277,0.917966;-0.644758,0.654885,-0.0378428;0.743308,0.219134,0.168663;1.82376,1.0395,-0.0112769;3.01462,0.695405,0.613858;3.18783,-0.589771,1.09649;2.15761,-1.50458,1.01949;0.988307,-1.1313,0.385783;-1.1048,0.797771,-1.34022;-2.39754,0.435801,-1.69921)|"_smiles; REQUIRE(m2); AlignMolecule(*m2, *m2, matrix); - auto shape2 = PrepareConformer(*m1, -1, true); + auto shape2 = PrepareConformer(*m1, -1); CHECK(shape1.sov == shape2.sov); } @@ -344,7 +344,7 @@ TEST_CASE("Overlay onto shape bug (Github8462)") { CHECK_THAT((pos1 - pos2).length(), Catch::Matchers::WithinAbs(0.0, 0.005)); } - auto s1 = PrepareConformer(*m1, -1, true); + auto s1 = PrepareConformer(*m1, -1); auto [st1, ct1] = AlignMolecule(s1, m3, matrix); CHECK_THAT(st1, Catch::Matchers::WithinAbs(1.0, 0.005)); CHECK_THAT(ct1, Catch::Matchers::WithinAbs(1.0, 0.005)); @@ -354,4 +354,131 @@ TEST_CASE("Overlay onto shape bug (Github8462)") { auto pos2 = m3.getConformer().getAtomPos(i); CHECK_THAT((pos1 - pos2).length(), Catch::Matchers::WithinAbs(0.0, 0.005)); } -} \ No newline at end of file +} + +TEST_CASE("Shape subset") { + auto m1 = + "c1ccc(-c2ccccc2)cc1 |(-3.26053,-0.0841607,-0.741909;-2.93383,0.123873,0.593407;-1.60713,0.377277,0.917966;-0.644758,0.654885,-0.0378428;0.743308,0.219134,0.168663;1.82376,1.0395,-0.0112769;3.01462,0.695405,0.613858;3.18783,-0.589771,1.09649;2.15761,-1.50458,1.01949;0.988307,-1.1313,0.385783;-1.1048,0.797771,-1.34022;-2.39754,0.435801,-1.69921)|"_smiles; + REQUIRE(m1); + ShapeInputOptions shapeOpts; + shapeOpts.atomSubset = std::vector{0, 1, 2, 3, 10, 11}; + auto partShape = PrepareConformer(*m1, -1, shapeOpts); + CHECK(partShape.coord.size() == 21); + CHECK_THAT(partShape.sov, Catch::Matchers::WithinAbs(253.929, 0.005)); + CHECK_THAT(partShape.sof, Catch::Matchers::WithinAbs(5.074, 0.005)); + + shapeOpts.atomSubset.clear(); + auto wholeShape = PrepareConformer(*m1, -1, shapeOpts); + CHECK(wholeShape.coord.size() == 42); + CHECK_THAT(wholeShape.sov, Catch::Matchers::WithinAbs(542.04, 0.005)); + CHECK_THAT(wholeShape.sof, Catch::Matchers::WithinAbs(10.148, 0.005)); +} + +TEST_CASE("Dummy radii") { + auto m1 = + "[Xe]c1ccccc1 |(0.392086,-2.22477,0.190651;0.232269,-1.38667,0.118385;-1.06274,-0.918982,0.0342466;-1.26098,0.446053,-0.0811879;-0.244035,1.36265,-0.11691;1.05134,0.875929,-0.031248;1.28797,-0.499563,0.0864097),atomProp:0.dummyLabel.*|"_smiles; + auto shape1 = PrepareConformer(*m1, -1); + CHECK(shape1.coord.size() == 24); + + // The dummy radius defaults to 2.16, the same as Xe, so these shapes should + // come out the same. + auto m2 = + "*c1ccccc1 |(0.392086,-2.22477,0.190651;0.232269,-1.38667,0.118385;-1.06274,-0.918982,0.0342466;-1.26098,0.446053,-0.0811879;-0.244035,1.36265,-0.11691;1.05134,0.875929,-0.031248;1.28797,-0.499563,0.0864097),atomProp:0.dummyLabel.*|"_smiles; + ShapeInputOptions shapeOpts; + shapeOpts.includeDummies = true; + auto shape2 = PrepareConformer(*m2, -1, shapeOpts); + CHECK(shape2.coord.size() == 24); + CHECK(shape1.sov == shape2.sov); + + shapeOpts.dummyRadius = 2.5; + auto shape3 = PrepareConformer(*m2, -1, shapeOpts); + CHECK_THAT(shape3.sov, Catch::Matchers::WithinAbs(427.925, 0.005)); + + shapeOpts.includeDummies = false; + auto shape4 = PrepareConformer(*m2, -1, shapeOpts); + CHECK(shape4.coord.size() == 21); + CHECK(shape4.sov < shape2.sov); + CHECK_THAT(shape4.sov, Catch::Matchers::WithinAbs(254.578, 0.005)); +} + +TEST_CASE("Non-standard radii") { + auto m1 = + "[Xe]c1ccccc1 |(0.392086,-2.22477,0.190651;0.232269,-1.38667,0.118385;-1.06274,-0.918982,0.0342466;-1.26098,0.446053,-0.0811879;-0.244035,1.36265,-0.11691;1.05134,0.875929,-0.031248;1.28797,-0.499563,0.0864097),atomProp:0.dummyLabel.*|"_smiles; + auto shape1 = PrepareConformer(*m1, -1); + CHECK(shape1.coord.size() == 24); + CHECK_THAT(shape1.sov, Catch::Matchers::WithinAbs(376.434, 0.005)); + + ShapeInputOptions shapeOpts; + // Benzene derivative with atom 4 with an N radius. + shapeOpts.atomRadii = + std::vector>{{0, 2.5}, {4, 1.55}}; + auto shape2 = PrepareConformer(*m1, -1, shapeOpts); + CHECK_THAT(shape2.sov, Catch::Matchers::WithinAbs(412.666, 0.005)); + + // Corresponding pyridine derivative. + auto m2 = + "[Xe]c1ccncc1 |(0.392086,-2.22477,0.190651;0.232269,-1.38667,0.118385;-1.06274,-0.918982,0.0342466;-1.26098,0.446053,-0.0811879;-0.244035,1.36265,-0.11691;1.05134,0.875929,-0.031248;1.28797,-0.499563,0.0864097),atomProp:0.dummyLabel.*|"_smiles; + auto shape3 = PrepareConformer(*m2, -1, shapeOpts); + CHECK(shape3.sov == shape2.sov); +} + +TEST_CASE("Shape-Shape alignment") { + std::string dirName = getenv("RDBASE"); + dirName += "/External/pubchem_shape/test_data"; + + auto suppl = v2::FileParsers::SDMolSupplier(dirName + "/test1.sdf"); + auto ref = suppl[0]; + REQUIRE(ref); + auto probe = suppl[1]; + REQUIRE(probe); + RWMol probeCP(*probe); + + auto refShape = PrepareConformer(*ref, -1); + auto probeShape = PrepareConformer(*probe, -1); + + std::vector matrix(12, 0.0); + auto [mol_st, mol_ct] = AlignMolecule(*ref, *probe, matrix, -1, -1); + + auto [shape_st, shape_ct] = AlignShape(refShape, probeShape, matrix); + + // Check that the same results are achieved when overlaying the probe + // molecule onto the reference and the probe shape onto the reference shape + + CHECK_THAT(shape_st, Catch::Matchers::WithinAbs(mol_st, 0.001)); + CHECK_THAT(shape_ct, Catch::Matchers::WithinAbs(mol_ct, 0.001)); + for (unsigned int i = 0; i < probe->getNumAtoms(); i++) { + const auto &pos = probe->getConformer().getAtomPos(i); + CHECK_THAT(pos.x, Catch::Matchers::WithinAbs( + probeShape.coord[3 * i] - refShape.shift[0], 0.001)); + CHECK_THAT(pos.y, + Catch::Matchers::WithinAbs( + probeShape.coord[3 * i + 1] - refShape.shift[1], 0.001)); + CHECK_THAT(pos.z, + Catch::Matchers::WithinAbs( + probeShape.coord[3 * i + 2] - refShape.shift[2], 0.001)); + } + + // Also check the TransformConformer function + TransformConformer(refShape.shift, matrix, probeShape, + probeCP.getConformer(-1)); + for (unsigned int i = 0; i < probe->getNumAtoms(); i++) { + const auto &pos = probe->getConformer().getAtomPos(i); + const auto &poscp = probeCP.getConformer().getAtomPos(i); + CHECK_THAT(pos.x, Catch::Matchers::WithinAbs(poscp.x, 0.001)); + CHECK_THAT(pos.y, Catch::Matchers::WithinAbs(poscp.y, 0.001)); + CHECK_THAT(pos.z, Catch::Matchers::WithinAbs(poscp.z, 0.001)); + } +} + +TEST_CASE("Atoms excluded from Color features") { + auto m1 = + "Nc1ccccc1 |(0.392086,-2.22477,0.190651;0.232269,-1.38667,0.118385;-1.06274,-0.918982,0.0342466;-1.26098,0.446053,-0.0811879;-0.244035,1.36265,-0.11691;1.05134,0.875929,-0.031248;1.28797,-0.499563,0.0864097),atomProp:0.dummyLabel.*|"_smiles; + auto shape1 = PrepareConformer(*m1, -1); + // The aniline N comes out as a donor, acceptor and basic so gets 3 + // features. + CHECK(shape1.coord.size() == 33); + ShapeInputOptions opts; + opts.notColorAtoms = std::vector{0}; + auto shape2 = PrepareConformer(*m1, -1, opts); + CHECK(shape2.coord.size() == 24); +}