Files
rdkit/External/pubchem_shape/PubChemShape.cpp
David Cosgrove 21b3c4e6f1 Shape overlay initial start aligned to principal axes (#8999)
* Add MolTransforms library.

* Transform to inertial frame of reference before overlay.

* Vital fix of typo in comment.

* Tidy up debugging cruft.

* Comment in CMakeLists.txt.

* Fix python wrappers.

* Extra tidy.

* Response to review.

* Tidy includes.

---------

Co-authored-by: David Cosgrove <david@cozchemix.co.uk>
2025-12-20 08:35:28 +01:00

767 lines
30 KiB
C++

#include <ranges>
#include <sstream>
#include <stdexcept>
#include <GraphMol/RWMol.h>
#include <Geometry/Transform3D.h>
#include <GraphMol/MolTransforms/MolTransforms.h>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <GraphMol/Substruct/SubstructMatch.h>
#include <RDGeneral/BoostStartInclude.h>
#include <boost/flyweight.hpp>
#include <boost/flyweight/key_value.hpp>
#include <boost/flyweight/no_tracking.hpp>
#include <RDGeneral/BoostEndInclude.h>
#include "pubchem-align3d/shape_functions.hpp"
#include "PubChemShape.hpp"
constexpr auto pubchemFeatureName = "PUBCHEM_PHARMACOPHORE_FEATURES";
// #define DEBUG_MSG(msg_stream) cout << msg_stream << '\n'
#define DEBUG_MSG(msg_stream)
using namespace RDKit;
// Bondi radii
// can find more of these in Table 12 of this publication:
// https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3658832/
// The dummy atom radius (atomic number 0) is set to
// 2.16 in ShapeInputOptions and may be varied there, as
// may all the other radii if required, including the
// addition of atoms not covered here.
const std::map<unsigned int, double> vdw_radii = {
{1, 1.10}, // H
{2, 1.40}, // He
{3, 1.81}, // Li
{4, 1.53}, // Be
{5, 1.92}, // B
{6, 1.70}, // C
{7, 1.55}, // N
{8, 1.52}, // O
{9, 1.47}, // F
{10, 1.54}, // Ne
{11, 2.27}, // Na
{12, 1.73}, // Mg
{13, 1.84}, // Al
{14, 2.10}, // Si
{15, 1.80}, // P
{16, 1.80}, // S
{17, 1.75}, // Cl
{18, 1.88}, // Ar
{19, 2.75}, // K
{20, 2.31}, // Ca
{31, 1.87}, // Ga
{32, 2.11}, // Ge
{33, 1.85}, // As
{34, 1.90}, // Se
{35, 1.83}, // Br
{36, 2.02}, // Kr
{37, 3.03}, // Rb
{38, 2.49}, // Sr
{49, 1.93}, // In
{50, 2.17}, // Sn
{51, 2.06}, // Sb
{52, 2.06}, // Te
{53, 1.98}, // I
{54, 2.16}, // Xe
{55, 3.43}, // Cs
{56, 2.68}, // Ba
{81, 1.96}, // Tl
{82, 2.02}, // Pb
{83, 2.07}, // Bi
{84, 1.97}, // Po
{85, 2.02}, // At
{86, 2.20}, // Rn
{87, 3.48}, // Fr
{88, 2.83}, // Ra
};
constexpr double radius_color =
1.08265; // same radius for all feature/color "atoms"
namespace {
class ss_matcher {
public:
ss_matcher(const std::string &pattern) : m_pattern(pattern) {
m_needCopies = (pattern.find_first_of("$") != std::string::npos);
RDKit::RWMol *p = RDKit::SmartsToMol(pattern);
m_matcher = p;
POSTCONDITION(m_matcher, "no matcher");
};
const RDKit::ROMol *getMatcher() const { return m_matcher; };
unsigned int countMatches(const RDKit::ROMol &mol) const {
PRECONDITION(m_matcher, "no matcher");
std::vector<RDKit::MatchVectType> matches;
// This is an ugly one. Recursive queries aren't thread safe.
// Unfortunately we have to take a performance hit here in order
// to guarantee thread safety
if (m_needCopies) {
const RDKit::ROMol nm(*(m_matcher), true);
RDKit::SubstructMatch(mol, nm, matches);
} else {
const RDKit::ROMol &nm = *m_matcher;
RDKit::SubstructMatch(mol, nm, matches);
}
return matches.size();
}
~ss_matcher() { delete m_matcher; };
private:
ss_matcher() : m_pattern("") {};
std::string m_pattern;
bool m_needCopies{false};
const RDKit::ROMol *m_matcher{nullptr};
};
} // namespace
typedef boost::flyweight<boost::flyweights::key_value<std::string, ss_matcher>,
boost::flyweights::no_tracking>
pattern_flyweight;
// Definitions for feature points adapted from:
// Gobbi and Poppinger, Biotech. Bioeng. _61_ 47-54 (1998)
const std::vector<std::vector<std::string>> smartsPatterns = {
{"[$([N;!H0;v3,v4&+1]),\
$([O,S;H1;+0]),\
n&H1&+0]"}, // Donor
{"[$([O,S;H1;v2;!$(*-*=[O,N,P,S])]),\
$([O,S;H0;v2]),\
$([O,S;-]),\
$([N;v3;!$(N-*=[O,N,P,S])]),\
n&H0&+0,\
$([o,s;+0;!$([o,s]:n);!$([o,s]:c:n)])]"}, // Acceptor
{
"[r]1[r][r]1",
"[r]1[r][r][r]1",
"[r]1[r][r][r][r]1",
"[r]1[r][r][r][r][r]1",
"[r]1[r][r][r][r][r][r]1",
}, // rings
// "[a]", //
// Aromatic
// "[F,Cl,Br,I]", // Halogen
{"[#7;+,\
$([N;H2&+0][$([C,a]);!$([C,a](=O))]),\
$([N;H1&+0]([$([C,a]);!$([C,a](=O))])[$([C,a]);!$([C,a](=O))]),\
$([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))])]"}, // Basic
{"[$([C,S](=[O,S,P])-[O;H1,-1])]"} // Acidic
};
std::vector<std::vector<const ROMol *>> *getPh4Patterns() {
static std::unique_ptr<std::vector<std::vector<const ROMol *>>> patterns;
if (!patterns) {
patterns.reset(new std::vector<std::vector<const ROMol *>>());
for (const auto &smartsV : smartsPatterns) {
std::vector<const ROMol *> v;
for (const auto &smarts : smartsV) {
const ROMol *matcher = pattern_flyweight(smarts).get().getMatcher();
CHECK_INVARIANT(matcher, "bad smarts");
v.push_back(matcher);
}
patterns->push_back(std::move(v));
}
}
return patterns.get();
}
namespace {
std::vector<std::pair<std::vector<unsigned int>, unsigned int>> extractFeatures(
const ROMol &mol, const ShapeInputOptions &shapeOpts) {
// unpack features (PubChem-specific property from SDF)
// NOTE: this unpacking assumes that RWMol-atom-index = SDF-atom-number - 1
// e.g. RWMol uses [0..N-1] and SDF uses [1..N], with atoms in the same
// order
// If there are no PubChem features, falls back on RDKit pphore types.
std::vector<std::pair<std::vector<unsigned int>, unsigned int>>
feature_idx_type;
if (shapeOpts.useColors) {
std::string features;
if (mol.getPropIfPresent(pubchemFeatureName, features)) {
// regular atoms have type 0; feature "atoms" (features represented by a
// single point+radius) must have type > 0
static const std::map<std::string, unsigned int> atomTypes = {
{"acceptor", 1}, {"anion", 2}, {"cation", 3},
{"donor", 4}, {"hydrophobe", 5}, {"rings", 6},
};
std::istringstream iss(features);
std::string line;
unsigned int n = 0;
while (std::getline(iss, line)) {
if (n == 0) {
feature_idx_type.resize(stoul(line));
}
else {
unsigned int f = n - 1;
if (f >= feature_idx_type.size()) {
throw ValueErrorException("Too many features");
}
std::istringstream iss2(line);
std::string token;
unsigned int t = 0;
while (std::getline(iss2, token, ' ')) {
if (t == 0) {
feature_idx_type[f].first.resize(stoul(token));
} else if (t <= feature_idx_type[f].first.size()) {
feature_idx_type[f].first[t - 1] = stoul(token) - 1;
} else {
auto type = atomTypes.find(token);
if (type == atomTypes.end()) {
throw ValueErrorException("Invalid feature type: " + token);
}
feature_idx_type[f].second = type->second;
}
++t;
}
if (t != (feature_idx_type[f].first.size() + 2)) {
throw ValueErrorException("Wrong number of tokens in feature");
}
}
++n;
}
if (n != (feature_idx_type.size() + 1)) {
throw ValueErrorException("Wrong number of features");
}
DEBUG_MSG("# features: " << feature_idx_type.size());
} else {
const auto pattVects = getPh4Patterns();
feature_idx_type.clear();
unsigned pattIdx = 1;
for (const auto &patts : *pattVects) {
for (const auto patt : patts) {
auto matches = SubstructMatch(mol, *patt);
for (auto match : matches) {
std::vector<unsigned int> ats;
for (const auto &pr : match) {
ats.push_back(pr.second);
}
feature_idx_type.emplace_back(ats, pattIdx);
}
}
++pattIdx;
}
}
}
return feature_idx_type;
}
bool atomInSubset(unsigned int atomIdx, const ShapeInputOptions &shapeOpts) {
if (shapeOpts.atomSubset.empty()) {
return true;
}
return std::ranges::find(shapeOpts.atomSubset, atomIdx) !=
shapeOpts.atomSubset.end();
}
bool atomAllowedInColor(unsigned int atomIdx,
const ShapeInputOptions &shapeOpts) {
return std::ranges::find(shapeOpts.notColorAtoms, atomIdx) ==
shapeOpts.notColorAtoms.end();
}
double getAtomRadius(unsigned int atomIdx, const ShapeInputOptions &shapeOpts) {
auto it = std::ranges::find_if(
shapeOpts.atomRadii,
[atomIdx](const auto &p) -> bool { return p.first == atomIdx; });
return it == shapeOpts.atomRadii.end() ? -1.0 : it->second;
}
// Get the atom radii. rad_vector is expected to be big enough to hold them
// all. Also computes the average coordinates of the selected atoms.
void extractAtomRadii(const Conformer &conformer, unsigned int nAtoms,
const ShapeInputOptions &shapeOpts, RDGeom::Point3D &ave,
unsigned int &nSelectedAtoms,
std::vector<double> &rad_vector) {
nSelectedAtoms = 0;
for (unsigned int i = 0u; i < nAtoms; ++i) {
if (!atomInSubset(i, shapeOpts)) {
continue;
}
double rad = getAtomRadius(i, shapeOpts);
if (rad > 0.0) {
rad_vector[nSelectedAtoms++] = rad;
ave += conformer.getAtomPos(i);
} else {
unsigned int Z =
conformer.getOwningMol().getAtomWithIdx(i)->getAtomicNum();
if (Z > 1) {
ave += conformer.getAtomPos(i);
if (auto rad = vdw_radii.find(Z); rad != vdw_radii.end()) {
rad_vector[nSelectedAtoms++] = rad->second;
} else {
throw ValueErrorException("No VdW radius for atom with Z=" +
std::to_string(Z));
}
} else if (shapeOpts.includeDummies && Z == 0) {
ave += conformer.getAtomPos(i);
rad_vector[nSelectedAtoms++] = shapeOpts.dummyRadius;
}
}
}
ave /= nSelectedAtoms;
}
void extractAtomCoords(const Conformer &conformer, const unsigned int nAtoms,
const ShapeInputOptions &shapeOpts,
const RDGeom::Point3D &ave, std::vector<float> &coords) {
for (unsigned i = 0, j = 0; i < nAtoms; ++i) {
if (!atomInSubset(i, shapeOpts)) {
continue;
}
// use only non-H for alignment, optionally with dummy atoms.
unsigned int Z = conformer.getOwningMol().getAtomWithIdx(i)->getAtomicNum();
if (Z > 1 || (shapeOpts.includeDummies && Z == 0)) {
RDGeom::Point3D pos = conformer.getAtomPos(i);
if (shapeOpts.normalize) {
pos -= ave;
}
coords[j * 3] = pos.x;
coords[(j * 3) + 1] = pos.y;
coords[(j * 3) + 2] = pos.z;
++j;
}
}
}
void extractFeatureCoords(
const Conformer &conformer, const unsigned int nAtoms,
const unsigned int nSelectedAtoms,
const std::vector<std::pair<std::vector<unsigned int>, unsigned int>>
&feature_idx_type,
const ShapeInputOptions &shapeOpts, const RDGeom::Point3D &ave,
unsigned int &numFeatures, ShapeInput &res,
std::vector<double> &rad_vector) {
// get feature coordinates - simply the average of coords of all atoms in the
// feature
for (unsigned i = 0; i < feature_idx_type.size(); ++i) {
RDGeom::Point3D floc;
unsigned int nSel = 0;
for (unsigned int j = 0; j < feature_idx_type[i].first.size(); ++j) {
unsigned int idx = feature_idx_type[i].first[j];
if (!atomInSubset(idx, shapeOpts) ||
!atomAllowedInColor(idx, shapeOpts)) {
continue;
}
if (idx >= nAtoms ||
conformer.getOwningMol().getAtomWithIdx(idx)->getAtomicNum() <= 1) {
throw ValueErrorException("Invalid feature atom index");
}
floc += conformer.getAtomPos(idx);
++nSel;
}
if (nSel == feature_idx_type[i].first.size()) {
floc /= nSel;
if (shapeOpts.normalize) {
floc -= ave;
}
DEBUG_MSG("feature type " << feature_idx_type[i].second << " (" << floc
<< ")");
auto array_idx = nSelectedAtoms + numFeatures;
res.coord[array_idx * 3] = floc.x;
res.coord[(array_idx * 3) + 1] = floc.y;
res.coord[(array_idx * 3) + 2] = floc.z;
rad_vector[array_idx] = radius_color;
res.atom_type_vector[array_idx] = feature_idx_type[i].second;
++numFeatures;
}
}
}
void extractCustomFeatureCoords(
const unsigned int nSelectedAtoms, const ShapeInputOptions &shapeOpts,
const RDGeom::Point3D &ave, unsigned int &numFeatures, ShapeInput &res,
std::vector<double> &rad_vector,
const std::unique_ptr<RDGeom::Transform3D> &trans) {
for (const auto &feature : shapeOpts.customFeatures) {
unsigned int feature_type = std::get<0>(feature);
RDGeom::Point3D floc = std::get<1>(feature);
if (trans) {
trans->TransformPoint(floc);
}
double radius = std::get<2>(feature);
floc -= ave;
DEBUG_MSG("custom feature type " << feature_type << " (" << floc << ")");
auto array_idx = nSelectedAtoms + numFeatures;
res.coord[array_idx * 3] = floc.x;
res.coord[(array_idx * 3) + 1] = floc.y;
res.coord[(array_idx * 3) + 2] = floc.z;
rad_vector[array_idx] = radius;
res.atom_type_vector[array_idx] = feature_type;
++numFeatures;
}
}
} // namespace
// The conformer is left where it is, the shape is translated to the origin
// and aligned to the principal axes.
ShapeInput PrepareConformer(const ROMol &mol, int confId,
const ShapeInputOptions &shapeOpts) {
Align3D::setUseCutOff(true);
ShapeInput res;
auto feature_idx_type = extractFeatures(mol, shapeOpts);
auto &conformer = mol.getConformer(confId);
if (!conformer.is3D()) {
throw ValueErrorException("Conformer must be 3D");
}
unsigned int nAtoms = mol.getNumAtoms();
// DEBUG_MSG("num atoms: " << nAtoms);
// Start with the arrays as large as they will possibly have to be.
// They will be re-sized later.
unsigned int nAlignmentAtoms = nAtoms;
if (shapeOpts.customFeatures.empty()) {
nAlignmentAtoms += feature_idx_type.size();
} else {
nAlignmentAtoms += shapeOpts.customFeatures.size();
}
std::vector<double> rad_vector(nAlignmentAtoms);
res.atom_type_vector.resize(nAlignmentAtoms, 0);
Conformer confCp(conformer);
std::unique_ptr<RDGeom::Transform3D> trans;
RDGeom::Point3D initialCentroid;
if (shapeOpts.normalize) {
// This ignores hydrogens by default, which is what is needed here.
trans.reset(MolTransforms::computeCanonicalTransform(confCp));
MolTransforms::transformConformer(confCp, *trans);
// Save the rotation part of the transformation. The coords will be centred
// below and left that way so we don't need to keep the translation.
for (unsigned int i = 0, k = 0; i < 3; ++i) {
for (unsigned int j = 0; j < 3; ++j, ++k) {
res.inertialRot[k] = trans->getValUnchecked(i, j);
}
}
// Work back from the canonical transformation to get the initial
// centroid by the inverse rotation of the current translation
// but not taking its negative so it can be used directly as the shift.
initialCentroid = RDGeom::Point3D{trans->getValUnchecked(0, 3),
trans->getValUnchecked(1, 3),
trans->getValUnchecked(2, 3)};
RDGeom::Transform3D invTrans;
invTrans.setValUnchecked(0, 0, trans->getValUnchecked(0, 0));
invTrans.setValUnchecked(0, 1, trans->getValUnchecked(1, 0));
invTrans.setValUnchecked(0, 2, trans->getValUnchecked(2, 0));
invTrans.setValUnchecked(1, 0, trans->getValUnchecked(0, 1));
invTrans.setValUnchecked(1, 1, trans->getValUnchecked(1, 1));
invTrans.setValUnchecked(1, 2, trans->getValUnchecked(2, 1));
invTrans.setValUnchecked(2, 0, trans->getValUnchecked(0, 2));
invTrans.setValUnchecked(2, 1, trans->getValUnchecked(1, 2));
invTrans.setValUnchecked(2, 2, trans->getValUnchecked(2, 2));
invTrans.TransformPoint(initialCentroid);
}
RWMol molCp(mol);
molCp.removeConformer(0);
molCp.addConformer(new Conformer(confCp), true);
RDGeom::Point3D ave;
unsigned int nSelectedAtoms = 0;
// Calculates the average coord as by-product
extractAtomRadii(confCp, nAtoms, shapeOpts, ave, nSelectedAtoms, rad_vector);
// translate steric center to origin
DEBUG_MSG("steric center: (" << ave << ")");
res.shift = {-ave.x, -ave.y, -ave.z};
res.coord.resize(nAlignmentAtoms * 3);
extractAtomCoords(confCp, nAtoms, shapeOpts, ave, res.coord);
unsigned int numFeatures = 0;
if (shapeOpts.customFeatures.empty()) {
extractFeatureCoords(confCp, nAtoms, nSelectedAtoms, feature_idx_type,
shapeOpts, ave, numFeatures, res, rad_vector);
} else if (shapeOpts.useColors) {
extractCustomFeatureCoords(nSelectedAtoms, shapeOpts, ave, numFeatures, res,
rad_vector, trans);
}
// Now cut the final vectors down to the actual number of atoms and
// features used.
nAlignmentAtoms = nSelectedAtoms + numFeatures;
res.coord.resize(nAlignmentAtoms * 3);
rad_vector.resize(nAlignmentAtoms);
res.atom_type_vector.resize(nAlignmentAtoms);
Align3D::setAlpha(rad_vector.data(), rad_vector.size(), res.alpha_vector);
// regular atom self overlap
Align3D::getVolumeAtomIndexVector(res.atom_type_vector.data(),
res.atom_type_vector.size(),
res.volumeAtomIndexVector);
res.sov = Align3D::ComputeShapeOverlap(
res.coord.data(), res.alpha_vector, res.volumeAtomIndexVector,
res.coord.data(), res.alpha_vector, res.volumeAtomIndexVector);
DEBUG_MSG("sov: " << res.sov);
// feature self overlap
if (feature_idx_type.size() > 0) {
Align3D::getColorAtomType2IndexVectorMap(res.atom_type_vector.data(),
res.atom_type_vector.size(),
res.colorAtomType2IndexVectorMap);
res.sof = Align3D::ComputeFeatureOverlap(
res.coord.data(), res.alpha_vector, res.colorAtomType2IndexVectorMap,
res.coord.data(), res.alpha_vector, res.colorAtomType2IndexVectorMap);
DEBUG_MSG("sof: " << res.sof);
}
// Finally set the shift to the negative of the original average coords.
res.shift = {initialCentroid.x, initialCentroid.y, initialCentroid.z};
return res;
}
std::pair<double, double> AlignShape(const ShapeInput &refShape,
ShapeInput &fitShape,
std::vector<float> &matrix,
double opt_param,
unsigned int max_preiters,
unsigned int max_postiters) {
std::set<unsigned int> jointColorAtomTypeSet;
Align3D::getJointColorTypeSet(
refShape.atom_type_vector.data(), refShape.atom_type_vector.size(),
fitShape.atom_type_vector.data(), fitShape.atom_type_vector.size(),
jointColorAtomTypeSet);
auto mapCp = refShape.colorAtomType2IndexVectorMap;
Align3D::restrictColorAtomType2IndexVectorMap(mapCp, jointColorAtomTypeSet);
// Take copy of the color atom mappings so as not to alter the input shape
// which might be re-used.
auto fitMapCp = fitShape.colorAtomType2IndexVectorMap;
Align3D::restrictColorAtomType2IndexVectorMap(fitMapCp,
jointColorAtomTypeSet);
DEBUG_MSG("Running alignment...");
double nbr_st = 0.0;
double nbr_ct = 0.0;
Align3D::Neighbor_Conformers(
refShape.coord.data(), refShape.alpha_vector,
refShape.volumeAtomIndexVector, mapCp, refShape.sov, refShape.sof,
fitShape.coord.data(), fitShape.alpha_vector,
fitShape.volumeAtomIndexVector, fitMapCp, fitShape.sov, fitShape.sof,
!jointColorAtomTypeSet.empty(), max_preiters, max_postiters, opt_param,
matrix.data(), nbr_st, nbr_ct);
DEBUG_MSG("Done!");
DEBUG_MSG("nbr_st: " << nbr_st);
DEBUG_MSG("nbr_ct: " << nbr_ct);
std::vector<float> transformed(fitShape.coord.size());
Align3D::VApplyRotTransMatrix(transformed.data(), fitShape.coord.data(),
fitShape.coord.size() / 3, matrix.data());
fitShape.coord = transformed;
return std::make_pair(nbr_st, nbr_ct);
}
void TransformConformer(const std::vector<double> &finalTrans,
const std::vector<double> &finalRot,
const std::vector<float> &matrix,
const ShapeInput &fitShape, Conformer &fitConf) {
// Move fitConf to fitShape's initial centroid and principal axes
RDGeom::Transform3D transform0;
transform0.SetTranslation(
RDGeom::Point3D{fitShape.shift[0], fitShape.shift[1], fitShape.shift[2]});
RDGeom::Transform3D transform1;
transform1.setValUnchecked(0, 0, fitShape.inertialRot[0]);
transform1.setValUnchecked(0, 1, fitShape.inertialRot[1]);
transform1.setValUnchecked(0, 2, fitShape.inertialRot[2]);
transform1.setValUnchecked(1, 0, fitShape.inertialRot[3]);
transform1.setValUnchecked(1, 1, fitShape.inertialRot[4]);
transform1.setValUnchecked(1, 2, fitShape.inertialRot[5]);
transform1.setValUnchecked(2, 0, fitShape.inertialRot[6]);
transform1.setValUnchecked(2, 1, fitShape.inertialRot[7]);
transform1.setValUnchecked(2, 2, fitShape.inertialRot[8]);
transform1.setValUnchecked(2, 3, 0.0);
transform1.setValUnchecked(3, 0, 0.0);
transform1.setValUnchecked(3, 1, 0.0);
transform1.setValUnchecked(3, 2, 0.0);
transform1.setValUnchecked(3, 3, 1.0);
// Apply the overlay matrix.
// The matrix comes in as a linear form of a 3x3 rotation matrix and a
// 3x1 translation. Convert that into a RDGeom::Trans3D
RDGeom::Transform3D transform2;
transform2.setValUnchecked(0, 0, matrix[0]);
transform2.setValUnchecked(0, 1, matrix[1]);
transform2.setValUnchecked(0, 2, matrix[2]);
transform2.setValUnchecked(0, 3, matrix[9]);
transform2.setValUnchecked(1, 0, matrix[3]);
transform2.setValUnchecked(1, 1, matrix[4]);
transform2.setValUnchecked(1, 2, matrix[5]);
transform2.setValUnchecked(1, 3, matrix[10]);
transform2.setValUnchecked(2, 0, matrix[6]);
transform2.setValUnchecked(2, 1, matrix[7]);
transform2.setValUnchecked(2, 2, matrix[8]);
transform2.setValUnchecked(2, 3, matrix[11]);
transform2.setValUnchecked(3, 0, 0.0);
transform2.setValUnchecked(3, 1, 0.0);
transform2.setValUnchecked(3, 2, 0.0);
transform2.setValUnchecked(3, 3, 1.0);
// Now apply the final rotation and final translation, which
// is to put it into the reference shape's frame of reference.
RDGeom::Transform3D transform3;
transform3.setValUnchecked(0, 0, finalRot[0]);
transform3.setValUnchecked(0, 1, finalRot[1]);
transform3.setValUnchecked(0, 2, finalRot[2]);
transform3.setValUnchecked(0, 3, finalTrans[0]);
transform3.setValUnchecked(1, 0, finalRot[3]);
transform3.setValUnchecked(1, 1, finalRot[4]);
transform3.setValUnchecked(1, 2, finalRot[5]);
transform3.setValUnchecked(1, 3, finalTrans[1]);
transform3.setValUnchecked(2, 0, finalRot[6]);
transform3.setValUnchecked(2, 1, finalRot[7]);
transform3.setValUnchecked(2, 2, finalRot[8]);
transform3.setValUnchecked(2, 3, finalTrans[2]);
transform3.setValUnchecked(3, 0, 0.0);
transform3.setValUnchecked(3, 1, 0.0);
transform3.setValUnchecked(3, 2, 0.0);
transform3.setValUnchecked(3, 3, 1.0);
auto finalTransform = transform3 * transform2 * transform1 * transform0;
MolTransforms::transformConformer(fitConf, finalTransform);
}
std::pair<double, double> AlignMolecule(
const ShapeInput &refShape, ROMol &fit, std::vector<float> &matrix,
const ShapeInputOptions &shapeOpts, int fitConfId, double opt_param,
unsigned int max_preiters, unsigned int max_postiters, bool applyRefShift) {
PRECONDITION(matrix.size() == 12, "bad matrix size");
Align3D::setUseCutOff(true);
DEBUG_MSG("Fit details:");
auto fitShape = PrepareConformer(fit, fitConfId, shapeOpts);
auto tanis = AlignShape(refShape, fitShape, matrix, opt_param, max_preiters,
max_postiters);
// transform fit coords
Conformer &fit_conformer = fit.getConformer(fitConfId);
std::vector<double> finalTrans{0.0, 0.0, 0.0};
std::vector<double> finalRot{1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0};
if (applyRefShift) {
finalTrans = std::vector<double>{-refShape.shift[0], -refShape.shift[1],
-refShape.shift[2]};
// set the final rotation to the inverse of the original inertialRot
// of the reference.
finalRot =
std::vector<double>{refShape.inertialRot[0], refShape.inertialRot[3],
refShape.inertialRot[6], refShape.inertialRot[1],
refShape.inertialRot[4], refShape.inertialRot[7],
refShape.inertialRot[2], refShape.inertialRot[5],
refShape.inertialRot[8]};
}
TransformConformer(finalTrans, finalRot, matrix, fitShape, fit_conformer);
fit.setProp("shape_align_shape_tanimoto", tanis.first);
fit.setProp("shape_align_color_tanimoto", tanis.second);
return tanis;
}
std::pair<double, double> AlignMolecule(
const ShapeInput &refShape, ROMol &fit, std::vector<float> &matrix,
int fitConfId, bool useColors, double opt_param, unsigned int max_preiters,
unsigned int max_postiters, bool applyRefShift) {
ShapeInputOptions shapeOpts;
shapeOpts.useColors = useColors;
return AlignMolecule(refShape, fit, matrix, shapeOpts, fitConfId, opt_param,
max_preiters, max_postiters, applyRefShift);
}
std::pair<double, double> AlignMolecule(
const ROMol &ref, ROMol &fit, std::vector<float> &matrix,
const ShapeInputOptions &refShapeOpts,
const ShapeInputOptions &fitShapeOpts, int refConfId, int fitConfId,
double opt_param, unsigned int max_preiters, unsigned int max_postiters) {
Align3D::setUseCutOff(true);
if (refShapeOpts.useColors != fitShapeOpts.useColors) {
BOOST_LOG(rdWarningLog)
<< "useColor values inconsistent between the reference and fit molecules. Colors will not be used in the alignment."
<< std::endl;
}
DEBUG_MSG("Reference details:");
auto refShape = PrepareConformer(ref, refConfId, refShapeOpts);
bool applyRefShift = true;
auto scores =
AlignMolecule(refShape, fit, matrix, fitShapeOpts, fitConfId, opt_param,
max_preiters, max_postiters, applyRefShift);
return scores;
}
std::pair<double, double> AlignMolecule(const ROMol &ref, ROMol &fit,
std::vector<float> &matrix,
int refConfId, int fitConfId,
bool useColors, double opt_param,
unsigned int max_preiters,
unsigned int max_postiters) {
Align3D::setUseCutOff(true);
DEBUG_MSG("Reference details:");
ShapeInputOptions shapeOpts;
shapeOpts.useColors = useColors;
return AlignMolecule(ref, fit, matrix, shapeOpts, shapeOpts, refConfId,
fitConfId, opt_param, max_preiters, max_postiters);
}
std::pair<double, double> ScoreShape(const ShapeInput &shape1,
ShapeInput &shape2, bool useColors) {
double shapeOverlap = Align3D::ComputeShapeOverlap(
shape1.coord.data(), shape1.alpha_vector, shape1.volumeAtomIndexVector,
shape2.coord.data(), shape2.alpha_vector, shape2.volumeAtomIndexVector);
double shape = shapeOverlap / (shape1.sov + shape2.sov - shapeOverlap);
std::set<unsigned int> jointColorAtomTypeSet;
Align3D::getJointColorTypeSet(
shape1.atom_type_vector.data(), shape1.atom_type_vector.size(),
shape2.atom_type_vector.data(), shape2.atom_type_vector.size(),
jointColorAtomTypeSet);
// Take copy of the color atom mappings so as not to alter the input shape
// which might be re-used.
double color = 0.0;
if (useColors) {
auto shape1MapCp = shape1.colorAtomType2IndexVectorMap;
Align3D::restrictColorAtomType2IndexVectorMap(shape1MapCp,
jointColorAtomTypeSet);
auto shape2MapCp = shape2.colorAtomType2IndexVectorMap;
Align3D::restrictColorAtomType2IndexVectorMap(shape2MapCp,
jointColorAtomTypeSet);
double featureOverlap = Align3D::ComputeFeatureOverlap(
shape1.coord.data(), shape1.alpha_vector, shape1MapCp,
shape2.coord.data(), shape2.alpha_vector, shape2MapCp, nullptr);
color = featureOverlap / (shape1.sof + shape2.sof - featureOverlap);
}
return std::make_pair(shape, color);
}
std::pair<double, double> ScoreMolecule(const ShapeInput &shape,
RDKit::ROMol &mol,
const ShapeInputOptions &molShapeOpts,
int molConfId) {
ShapeInputOptions shapeOpts = molShapeOpts;
shapeOpts.normalize = false;
auto shape2 = PrepareConformer(mol, molConfId, shapeOpts);
return ScoreShape(shape, shape2, shapeOpts.useColors);
}
std::pair<double, double> ScoreMolecule(const RDKit::ROMol &mol1,
RDKit::ROMol &mol2,
const ShapeInputOptions &mol1ShapeOpts,
const ShapeInputOptions &mol2ShapeOpts,
int mol1ConfId, int mol2ConfId) {
ShapeInputOptions shapeOpts = mol1ShapeOpts;
shapeOpts.normalize = false;
auto shape1 = PrepareConformer(mol1, mol1ConfId, shapeOpts);
shapeOpts = mol2ShapeOpts;
shapeOpts.normalize = false;
auto shape2 = PrepareConformer(mol2, mol2ConfId, shapeOpts);
// If either shapeOpts has useColors of false, we have to go with that.
bool useColors = mol1ShapeOpts.useColors && mol2ShapeOpts.useColors;
return ScoreShape(shape1, shape2, useColors);
}