Files
rdkit/Code/GraphMol/GaussianShape/GaussianShape.cpp
2026-03-26 21:53:54 +01:00

501 lines
21 KiB
C++

//
// Copyright (C) 2026 David Cosgrove and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
// Original author: David Cosgrove (CozChemIx Limited)
//
// This is an implementation of the Gaussian overlap molecular overlay
// method of Grant, Pickup and Gallardo.
// J. Comp. Chem., 17, 1653-1666 (1996)
// https://doi.org/10.1002/(SICI)1096-987X(19961115)17:14%3C1653::AID-JCC7%3E3.0.CO;2-K
// It uses implementation ideas and some code from the PubChem implementation
// https://github.com/ncbi/pubchem-align3d/blob/main/shape_neighbor.cpp.
#include <cmath>
#include <Geometry/Transform3D.h>
#include <GraphMol/ROMol.h>
#include <GraphMol/GaussianShape/GaussianShape.h>
#include "GraphMol/SmilesParse/SmilesWrite.h"
#include <GraphMol/GaussianShape/ShapeInput.h>
#include <GraphMol/GaussianShape/SingleConformerAlignment.h>
#include <GraphMol/MolTransforms/MolTransforms.h>
#include <GraphMol/SmilesParse/SmilesParse.h>
namespace RDKit {
namespace GaussianShape {
namespace {
// Compute final overlay transform, which applies fitShape's
// initial canonical transformation, followed by the overlay transform and
// finally the inverse of refShape's initial canonical transformation.
RDGeom::Transform3D computeFinalTransform(
const std::array<double, 3> &inRefTrans,
const std::array<double, 9> &inRefRot,
const std::array<double, 3> &inFitTrans,
const std::array<double, 9> &inFitRot, RDGeom::Transform3D &ovXform) {
// Move to fitShape's initial centroid and principal axes
RDGeom::Transform3D transform0;
transform0.SetTranslation(
RDGeom::Point3D{inFitTrans[0], inFitTrans[1], inFitTrans[2]});
RDGeom::Transform3D transform1;
transform1.setValUnchecked(0, 0, inFitRot[0]);
transform1.setValUnchecked(0, 1, inFitRot[1]);
transform1.setValUnchecked(0, 2, inFitRot[2]);
transform1.setValUnchecked(1, 0, inFitRot[3]);
transform1.setValUnchecked(1, 1, inFitRot[4]);
transform1.setValUnchecked(1, 2, inFitRot[5]);
transform1.setValUnchecked(2, 0, inFitRot[6]);
transform1.setValUnchecked(2, 1, inFitRot[7]);
transform1.setValUnchecked(2, 2, inFitRot[8]);
RDGeom::Transform3D toRefRefFrame;
// Rotate by the inverse of the ref shape's canonical rotation and
// translate by the negative of its canonical translation.
toRefRefFrame.setValUnchecked(0, 0, inRefRot[0]);
toRefRefFrame.setValUnchecked(0, 1, inRefRot[3]);
toRefRefFrame.setValUnchecked(0, 2, inRefRot[6]);
toRefRefFrame.setValUnchecked(0, 3, -inRefTrans[0]);
toRefRefFrame.setValUnchecked(1, 0, inRefRot[1]);
toRefRefFrame.setValUnchecked(1, 1, inRefRot[4]);
toRefRefFrame.setValUnchecked(1, 2, inRefRot[7]);
toRefRefFrame.setValUnchecked(1, 3, -inRefTrans[1]);
toRefRefFrame.setValUnchecked(2, 0, inRefRot[2]);
toRefRefFrame.setValUnchecked(2, 1, inRefRot[5]);
toRefRefFrame.setValUnchecked(2, 2, inRefRot[8]);
toRefRefFrame.setValUnchecked(2, 3, -inRefTrans[2]);
auto finalTransform = toRefRefFrame * ovXform * transform1 * transform0;
return finalTransform;
}
// Return the original transformation quaternion for the given index.
// Different optimisation modes have different numbers of starting
// orientations to try. In order these are no transformation, rotate 180
// degrees about each axis and rotate +/- 45 degrees about 2 axes at a time.
std::array<double, 4> getInitialRotationPlain(
int index, const ShapeInput &refShape, const ShapeInput &fitShape,
const RDGeom::Point3D &refDisp, const ShapeOverlayOptions &overlayOpts,
double &score) {
static const double sinpi_4 = std::sin(std::atan(1.0));
const static std::vector<std::array<double, 4>> quats{
{1.0, 0.0, 0.0, 0.0}, {0.0, 1.0, 0.0, 0.0},
{0.0, 0.0, 1.0, 0.0}, {0.0, 0.0, 0.0, 1.0},
{sinpi_4, -sinpi_4, 0.0, 0.0}, {sinpi_4, sinpi_4, 0.0, 0.0},
{0.0, 0.0, -sinpi_4, sinpi_4}, {0.0, 0.0, sinpi_4, sinpi_4},
{sinpi_4, 0.0, 0.0, -sinpi_4}, {0.0, sinpi_4, sinpi_4, 0.0},
{sinpi_4, 0.0, 0.0, sinpi_4}, {0.0, -sinpi_4, sinpi_4, 0.0},
{sinpi_4, 0.0, sinpi_4, 0.0}, {0.0, sinpi_4, 0.0, sinpi_4},
{0.0, -sinpi_4, 0.0, sinpi_4}, {sinpi_4, 0.0, -sinpi_4, 0.0}};
bool useColor = overlayOpts.optimMode != OptimMode::SHAPE_ONLY;
std::array<double, 7> quatTrans{
quats[index][0], quats[index][1], quats[index][2], quats[index][3],
refDisp[0], refDisp[1], refDisp[2]};
SingleConformerAlignment sca(
refShape.getCoords(), refShape.getTypes().data(),
refShape.getCarbonRadii(), refShape.getNumAtoms(),
refShape.getNumFeatures(), refShape.getShapeVolume(),
refShape.getColorVolume(), fitShape.getCoords(),
fitShape.getTypes().data(), fitShape.getCarbonRadii(),
fitShape.getNumAtoms(), fitShape.getNumFeatures(),
fitShape.getShapeVolume(), fitShape.getColorVolume(), quatTrans,
overlayOpts.optimMode, overlayOpts.simAlpha, overlayOpts.simBeta,
overlayOpts.optParam, overlayOpts.useDistCutoff, overlayOpts.distCutoff,
overlayOpts.shapeConvergenceCriterion, overlayOpts.nSteps);
auto scores = sca.calcScores(useColor);
score = scores[0];
return quats[index];
}
// Return the initial transformation matrix in the manner of the PubChem
// overlay code. Rotate 180 degrees about each axis, and then
// add +/ ~25 degrees from that. It is not revealed where that
// angle comes from.
std::array<double, 4> getInitialRotationWiggle(
int index, const ShapeInput &refShape, const ShapeInput &fitShape,
const RDGeom::Point3D &refDisp, const ShapeOverlayOptions &overlayOpts,
double &score) {
const static double qrot1 = 0.977659114061,
qrot = 0.210196709523; // 0.215 (un-normalized)
const static std::vector<std::array<double, 4>> quats{
{1.0, 0.0, 0.0, 0.0}, // 0 X, Y, Z
{qrot1, qrot, 0.0, 0.0}, {qrot1, -qrot, 0.0, 0.0},
{qrot1, 0.0, qrot, 0.0}, {qrot1, 0.0, -qrot, 0.0},
{qrot1, 0.0, 0.0, qrot}, {qrot1, 0.0, 0.0, -qrot},
{0.0, 1.0, 0.0, 0.0}, // 1 X, -Y, -Z
{qrot, qrot1, 0.0, 0.0}, {qrot, -qrot1, 0.0, 0.0},
{0.0, qrot1, qrot, 0.0}, {0.0, qrot1, -qrot, 0.0},
{0.0, qrot1, 0.0, qrot}, {0.0, qrot1, 0.0, -qrot},
{0.0, 0.0, 0.0, 1.0}, // 2 -X, -Y, Z
{qrot, 0.0, 0.0, qrot1}, {qrot, 0.0, 0.0, -qrot1},
{0.0, qrot, 0.0, qrot1}, {0.0, -qrot, 0.0, qrot1},
{0.0, 0.0, qrot, qrot1}, {0.0, 0.0, -qrot, qrot1},
{0.0, 0.0, 1.0, 0.0}, // 3 -X, Y, -Z
{qrot, 0.0, qrot1, 0.0}, {qrot, 0.0, -qrot1, 0.0},
{0.0, qrot, qrot1, 0.0}, {0.0, -qrot, qrot1, 0.0},
{0.0, 0.0, qrot1, qrot}, {0.0, 0.0, qrot1, -qrot}};
unsigned int start_quat = index * 7;
unsigned int bestQuat = 0;
double bestScore = 0.0;
bool useColor = overlayOpts.optimMode != OptimMode::SHAPE_ONLY;
std::array<double, 7> tmpQuatTrans{1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
SingleConformerAlignment sca(
refShape.getCoords(), refShape.getTypes().data(),
refShape.getCarbonRadii(), refShape.getNumAtoms(),
refShape.getNumFeatures(), refShape.getShapeVolume(),
refShape.getColorVolume(), fitShape.getCoords(),
fitShape.getTypes().data(), fitShape.getCarbonRadii(),
fitShape.getNumAtoms(), fitShape.getNumFeatures(),
fitShape.getShapeVolume(), fitShape.getColorVolume(), tmpQuatTrans,
overlayOpts.optimMode, overlayOpts.simAlpha, overlayOpts.simBeta,
overlayOpts.optParam, overlayOpts.useDistCutoff, overlayOpts.distCutoff,
overlayOpts.shapeConvergenceCriterion, overlayOpts.nSteps);
for (unsigned int i = start_quat; i < start_quat + 7; ++i) {
std::array<double, 7> quatTrans{quats[i][0], quats[i][1], quats[i][2],
quats[i][3], refDisp[0], refDisp[1],
refDisp[2]};
sca.setQuatTrans(quatTrans);
auto scores = sca.calcScores(useColor);
if (scores[0] > bestScore) {
bestScore = scores[0];
bestQuat = i;
}
}
score = bestScore;
return quats[bestQuat];
}
// Return the translation that puts the extreme of refShape at the
// extreme of the fitShape along the appropriate axis.
RDGeom::Point3D getInitialTranslation(int index, ShapeInput &refShape,
ShapeInput fitShape) {
auto getDisp = [](ShapeInput &shape, size_t i) -> RDGeom::Point3D {
const double *coord =
shape.getCoords().data() + shape.calcExtremes()[i] * 4;
return RDGeom::Point3D(coord[0], coord[1], coord[2]);
};
RDGeom::Point3D disp;
RDGeom::Point3D refDisp, fitDisp;
switch (index) {
case 1:
refDisp = getDisp(refShape, 0);
fitDisp = getDisp(fitShape, 0);
disp = fitDisp - refDisp;
break;
case 2:
refDisp = getDisp(refShape, 1);
fitDisp = getDisp(fitShape, 1);
disp = fitDisp - refDisp;
break;
case 3:
refDisp = getDisp(refShape, 2);
fitDisp = getDisp(fitShape, 2);
disp = fitDisp - refDisp;
break;
case 4:
refDisp = getDisp(refShape, 3);
fitDisp = getDisp(fitShape, 3);
disp = fitDisp - refDisp;
break;
case 5:
refDisp = getDisp(refShape, 4);
fitDisp = getDisp(fitShape, 4);
disp = fitDisp - refDisp;
break;
case 6:
refDisp = getDisp(refShape, 5);
fitDisp = getDisp(fitShape, 5);
disp = fitDisp - refDisp;
break;
default:
break;
}
return disp;
}
// This is how the PubChem code decides between ROTATE_180_WIGGLE and
// ROTATE_45. I have no clue.
unsigned int calculateQrat(const std::array<double, 3> &eigenValues) {
double double_ev_oe[3]{eigenValues[1] + eigenValues[2] - eigenValues[0],
eigenValues[0] + eigenValues[2] - eigenValues[1],
eigenValues[0] + eigenValues[1] - eigenValues[2]};
std::sort(double_ev_oe, double_ev_oe + 3, std::greater<double>());
const static double qrat_threshold = 0.7225; // 0.85*0.85;
unsigned int qrat = 1000;
unsigned int u_rqyx, u_rqzy;
if (double_ev_oe[1] > 0) {
if (qrat_threshold < (double_ev_oe[1] / double_ev_oe[0])) {
u_rqyx = 1;
} else {
u_rqyx = 0;
}
if (qrat_threshold < (double_ev_oe[2] / double_ev_oe[1])) {
u_rqzy = 1;
} else {
u_rqzy = 0;
}
qrat = u_rqyx + u_rqzy;
}
return qrat;
}
StartMode decideStartModeFromEigenValues(ShapeInput &refShape,
ShapeInput &fitShape) {
// The PubChem code uses the moments of inertia for this, rather than the
// canonical transformation.
auto rqratwf = calculateQrat(refShape.calcMomentsOfInertia(true));
auto fqratwf = calculateQrat(fitShape.calcMomentsOfInertia(true));
StartMode startModeWF{StartMode::ROTATE_180_WIGGLE};
if (rqratwf > 0 || fqratwf > 0) {
startModeWF = StartMode::ROTATE_45;
}
return startModeWF;
}
std::array<double, 3> alignShape(ShapeInput &refShape, ShapeInput &fitShape,
RDGeom::Transform3D &bestXform,
const ShapeOverlayOptions &overlayOpts) {
unsigned int finalRotIndex = 1;
auto startMode = overlayOpts.startMode;
if (startMode == StartMode::A_LA_PUBCHEM) {
startMode = decideStartModeFromEigenValues(refShape, fitShape);
}
switch (startMode) {
case StartMode::ROTATE_0:
case StartMode::ROTATE_0_FRAGMENT:
break;
case StartMode::ROTATE_180:
case StartMode::ROTATE_180_FRAGMENT:
case StartMode::ROTATE_180_WIGGLE:
finalRotIndex = 4;
break;
case StartMode::ROTATE_45:
case StartMode::ROTATE_45_FRAGMENT:
finalRotIndex = 16;
break;
default:
break;
}
unsigned int finalTransIndex = 1;
if (startMode == StartMode::ROTATE_0_FRAGMENT ||
startMode == StartMode::ROTATE_45_FRAGMENT ||
startMode == StartMode::ROTATE_180_FRAGMENT) {
finalTransIndex = 7;
}
std::array<double, 3> bestScore;
double bestTotal = -1.0;
// Get together the start transformations.
std::vector<std::unique_ptr<SingleConformerAlignment>> aligners;
std::vector<std::pair<double, unsigned int>> bestScoreForStart;
bestScoreForStart.reserve(finalTransIndex * finalRotIndex);
unsigned int k = 0;
for (unsigned int j = 0; j < finalTransIndex; j++) {
auto refDisp = getInitialTranslation(j, refShape, fitShape);
std::array<double, 4> quat;
for (unsigned int i = 0; i < finalRotIndex; i++, k++) {
double score = 0.0;
if (startMode == StartMode::ROTATE_180_WIGGLE) {
quat = getInitialRotationWiggle(i, refShape, fitShape, refDisp,
overlayOpts, score);
} else {
quat = getInitialRotationPlain(i, refShape, fitShape, refDisp,
overlayOpts, score);
}
std::array<double, 7> initQuat{quat[0], quat[1], quat[2], quat[3],
refDisp.x, refDisp.y, refDisp.z};
aligners.emplace_back(std::make_unique<SingleConformerAlignment>(
refShape.getCoords(), refShape.getTypes().data(),
refShape.getCarbonRadii(), refShape.getNumAtoms(),
refShape.getNumFeatures(), refShape.getShapeVolume(),
refShape.getColorVolume(), fitShape.getCoords(),
fitShape.getTypes().data(), fitShape.getCarbonRadii(),
fitShape.getNumAtoms(), fitShape.getNumFeatures(),
fitShape.getShapeVolume(), fitShape.getColorVolume(), initQuat,
overlayOpts.optimMode, overlayOpts.simAlpha, overlayOpts.simBeta,
overlayOpts.optParam, overlayOpts.useDistCutoff,
overlayOpts.distCutoff, overlayOpts.shapeConvergenceCriterion,
overlayOpts.nSteps));
bestScoreForStart.push_back({score, k});
}
}
// Do it in 2 cycles, a quick optimisation first, followed by an additional
// longer one for those that look like they're going to win.
for (unsigned int cycle = 0; cycle < 2; cycle++) {
std::ranges::sort(bestScoreForStart,
[](const auto &p1, const auto &p2) -> bool {
return p1.first > p2.first;
});
std::vector<std::pair<double, unsigned int>> nextBestScoreForStart;
nextBestScoreForStart.reserve(finalTransIndex * finalRotIndex);
for (const auto &[bssf, k] : bestScoreForStart) {
if (cycle == 1) {
if (bssf < 0.7 * bestScore[0]) {
continue;
}
}
std::array<double, 20> outScores;
aligners[k]->doOverlay(outScores, cycle);
nextBestScoreForStart.emplace_back(outScores[0], k);
if (outScores[0] > bestTotal) {
bestTotal = outScores[0];
bestScore =
std::array<double, 3>{outScores[0], outScores[1], outScores[2]};
aligners[k]->getFinalQuatTrans(bestXform);
}
}
bestScoreForStart = nextBestScoreForStart;
}
return bestScore;
}
} // namespace
std::array<double, 3> AlignShape(const ShapeInput &refShape,
ShapeInput &fitShape,
RDGeom::Transform3D *xform,
const ShapeOverlayOptions &overlayOpts) {
// The shapes aren't necessarily normalized (it's not done on creation, for
// example) but they might need to be.
auto workingRefShape = std::make_unique<ShapeInput>(refShape);
auto workingFitShape = std::make_unique<ShapeInput>(fitShape);
auto inRefTrans = workingRefShape->calcCanonicalTranslation();
auto inRefRot = workingRefShape->calcCanonicalRotation();
auto inFitTrans = workingFitShape->calcCanonicalTranslation();
auto inFitRot = workingFitShape->calcCanonicalRotation();
// If we're not normalizing, translate both shapes so that the fit
// is at the origin, so the rotations work.
RDGeom::Transform3D moveToOrigin;
RDGeom::Transform3D moveFromOrigin;
if (overlayOpts.normalize) {
if (!workingRefShape->getNormalized()) {
workingRefShape->normalizeCoords();
}
if (!workingFitShape->getNormalized()) {
workingFitShape->normalizeCoords();
}
} else {
moveToOrigin.SetTranslation(
RDGeom::Point3D{workingFitShape->calcCanonicalTranslation()[0],
workingFitShape->calcCanonicalTranslation()[1],
workingFitShape->calcCanonicalTranslation()[2]});
moveFromOrigin.SetTranslation(
RDGeom::Point3D{-workingFitShape->calcCanonicalTranslation()[0],
-workingFitShape->calcCanonicalTranslation()[1],
-workingFitShape->calcCanonicalTranslation()[2]});
workingFitShape->transformCoords(moveToOrigin);
workingRefShape->transformCoords(moveToOrigin);
}
RDGeom::Transform3D bestXform;
auto scores =
alignShape(*workingRefShape, *workingFitShape, bestXform, overlayOpts);
if (!overlayOpts.normalize) {
// Shove it back again.
auto finalXform = moveFromOrigin * bestXform * moveToOrigin;
bestXform = finalXform;
} else {
auto finalXform = computeFinalTransform(inRefTrans, inRefRot, inFitTrans,
inFitRot, bestXform);
bestXform = finalXform;
}
fitShape.transformCoords(bestXform);
if (xform) {
*xform = bestXform;
}
return scores;
}
std::array<double, 3> AlignMolecule(const ShapeInput &refShape, ROMol &fit,
const ShapeInputOptions &fitOpts,
RDGeom::Transform3D *xform,
const ShapeOverlayOptions &overlayOpts,
int fitConfId) {
auto fitShape = ShapeInput(fit, fitConfId, fitOpts, overlayOpts);
RDGeom::Transform3D tmpXform;
auto scores = AlignShape(refShape, fitShape, &tmpXform, overlayOpts);
MolTransforms::transformConformer(fit.getConformer(fitConfId), tmpXform);
if (xform) {
*xform = tmpXform;
}
return scores;
}
std::array<double, 3> AlignMolecule(const ROMol &ref, ROMol &fit,
const ShapeInputOptions &refOpts,
const ShapeInputOptions &fitOpts,
RDGeom::Transform3D *xform,
const ShapeOverlayOptions &overlayOpts,
int refConfId, int fitConfId) {
auto refShape = ShapeInput(ref, refConfId, refOpts, overlayOpts);
auto scores =
AlignMolecule(refShape, fit, fitOpts, xform, overlayOpts, fitConfId);
return scores;
}
std::array<double, 3> ScoreShape(const ShapeInput &refShape,
const ShapeInput &fitShape,
const ShapeOverlayOptions &overlayOpts) {
auto refWorking = refShape.getCoords();
auto fitWorking = fitShape.getCoords();
std::array<double, 7> quatTrans{1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
SingleConformerAlignment sca(
refShape.getCoords(), refShape.getTypes().data(),
refShape.getCarbonRadii(), refShape.getNumAtoms(),
refShape.getNumFeatures(), refShape.getShapeVolume(),
refShape.getColorVolume(), fitShape.getCoords(),
fitShape.getTypes().data(), fitShape.getCarbonRadii(),
fitShape.getNumAtoms(), fitShape.getNumFeatures(),
fitShape.getShapeVolume(), fitShape.getColorVolume(), quatTrans,
overlayOpts.optimMode, overlayOpts.simAlpha, overlayOpts.simBeta,
overlayOpts.optParam, overlayOpts.useDistCutoff, overlayOpts.distCutoff,
overlayOpts.shapeConvergenceCriterion, overlayOpts.nSteps);
bool includeColor = overlayOpts.optimMode != OptimMode::SHAPE_ONLY;
auto scores = sca.calcScores(refShape.getCoords().data(),
fitShape.getCoords().data(), includeColor);
return std::array{scores[0], scores[1], scores[2]};
}
std::array<double, 3> ScoreMolecule(const ShapeInput &refShape,
const ROMol &fit,
const ShapeInputOptions &fitOpts,
const ShapeOverlayOptions &overlayOpts,
int fitConfId) {
auto fitShape = ShapeInput(fit, fitConfId, fitOpts, overlayOpts);
return ScoreShape(refShape, fitShape, overlayOpts);
}
std::array<double, 3> ScoreMolecule(const ROMol &ref, const ROMol &fit,
const ShapeInputOptions &refOpts,
const ShapeInputOptions &fitOpts,
const ShapeOverlayOptions &overlayOpts,
int refConfId, int fitConfId) {
ShapeOverlayOptions tmpOpts = overlayOpts;
tmpOpts.normalize = false;
tmpOpts.startMode = StartMode::ROTATE_0;
ShapeInputOptions tmpRefOpts = refOpts;
auto refShape = ShapeInput(ref, refConfId, refOpts, tmpOpts);
ShapeInputOptions tmpFitOpts = fitOpts;
auto fitShape = ShapeInput(fit, fitConfId, fitOpts, tmpOpts);
return ScoreShape(refShape, fitShape, tmpOpts);
}
} // namespace GaussianShape
} // namespace RDKit