Gaussian shape overlays (#9095)

This commit is contained in:
David Cosgrove
2026-03-26 20:53:54 +00:00
committed by GitHub
parent adf060c881
commit 5235f53910
16 changed files with 4070 additions and 10 deletions

View File

@@ -78,6 +78,7 @@ add_subdirectory(MolAlign)
add_subdirectory(MolChemicalFeatures)
add_subdirectory(ShapeHelpers)
add_subdirectory(MolCatalog)
add_subdirectory(GaussianShape)
add_subdirectory(MolDraw2D)

View File

@@ -0,0 +1,13 @@
rdkit_library(GaussianShape
GaussianShape.cpp ShapeInput.cpp SingleConformerAlignment.cpp
SHARED LINK_LIBRARIES SmilesParse SubstructMatch MolTransforms)
target_compile_definitions(GaussianShape PRIVATE RDKIT_GAUSSIANSHAPE_BUILD)
rdkit_headers(GaussianShape.h ShapeInput.h ShapeOverlayOptions.h)
rdkit_catch_test(testGaussianShape catch_tests.cpp LINK_LIBRARIES GaussianShape
FileParsers MolAlign MolTransforms)
if(RDK_BUILD_PYTHON_WRAPPERS)
add_subdirectory(Wrap)
endif()

View File

@@ -0,0 +1,500 @@
//
// Copyright (C) 2026 David Cosgrove and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
// Original author: David Cosgrove (CozChemIx Limited)
//
// This is an implementation of the Gaussian overlap molecular overlay
// method of Grant, Pickup and Gallardo.
// J. Comp. Chem., 17, 1653-1666 (1996)
// https://doi.org/10.1002/(SICI)1096-987X(19961115)17:14%3C1653::AID-JCC7%3E3.0.CO;2-K
// It uses implementation ideas and some code from the PubChem implementation
// https://github.com/ncbi/pubchem-align3d/blob/main/shape_neighbor.cpp.
#include <cmath>
#include <Geometry/Transform3D.h>
#include <GraphMol/ROMol.h>
#include <GraphMol/GaussianShape/GaussianShape.h>
#include "GraphMol/SmilesParse/SmilesWrite.h"
#include <GraphMol/GaussianShape/ShapeInput.h>
#include <GraphMol/GaussianShape/SingleConformerAlignment.h>
#include <GraphMol/MolTransforms/MolTransforms.h>
#include <GraphMol/SmilesParse/SmilesParse.h>
namespace RDKit {
namespace GaussianShape {
namespace {
// Compute final overlay transform, which applies fitShape's
// initial canonical transformation, followed by the overlay transform and
// finally the inverse of refShape's initial canonical transformation.
RDGeom::Transform3D computeFinalTransform(
const std::array<double, 3> &inRefTrans,
const std::array<double, 9> &inRefRot,
const std::array<double, 3> &inFitTrans,
const std::array<double, 9> &inFitRot, RDGeom::Transform3D &ovXform) {
// Move to fitShape's initial centroid and principal axes
RDGeom::Transform3D transform0;
transform0.SetTranslation(
RDGeom::Point3D{inFitTrans[0], inFitTrans[1], inFitTrans[2]});
RDGeom::Transform3D transform1;
transform1.setValUnchecked(0, 0, inFitRot[0]);
transform1.setValUnchecked(0, 1, inFitRot[1]);
transform1.setValUnchecked(0, 2, inFitRot[2]);
transform1.setValUnchecked(1, 0, inFitRot[3]);
transform1.setValUnchecked(1, 1, inFitRot[4]);
transform1.setValUnchecked(1, 2, inFitRot[5]);
transform1.setValUnchecked(2, 0, inFitRot[6]);
transform1.setValUnchecked(2, 1, inFitRot[7]);
transform1.setValUnchecked(2, 2, inFitRot[8]);
RDGeom::Transform3D toRefRefFrame;
// Rotate by the inverse of the ref shape's canonical rotation and
// translate by the negative of its canonical translation.
toRefRefFrame.setValUnchecked(0, 0, inRefRot[0]);
toRefRefFrame.setValUnchecked(0, 1, inRefRot[3]);
toRefRefFrame.setValUnchecked(0, 2, inRefRot[6]);
toRefRefFrame.setValUnchecked(0, 3, -inRefTrans[0]);
toRefRefFrame.setValUnchecked(1, 0, inRefRot[1]);
toRefRefFrame.setValUnchecked(1, 1, inRefRot[4]);
toRefRefFrame.setValUnchecked(1, 2, inRefRot[7]);
toRefRefFrame.setValUnchecked(1, 3, -inRefTrans[1]);
toRefRefFrame.setValUnchecked(2, 0, inRefRot[2]);
toRefRefFrame.setValUnchecked(2, 1, inRefRot[5]);
toRefRefFrame.setValUnchecked(2, 2, inRefRot[8]);
toRefRefFrame.setValUnchecked(2, 3, -inRefTrans[2]);
auto finalTransform = toRefRefFrame * ovXform * transform1 * transform0;
return finalTransform;
}
// Return the original transformation quaternion for the given index.
// Different optimisation modes have different numbers of starting
// orientations to try. In order these are no transformation, rotate 180
// degrees about each axis and rotate +/- 45 degrees about 2 axes at a time.
std::array<double, 4> getInitialRotationPlain(
int index, const ShapeInput &refShape, const ShapeInput &fitShape,
const RDGeom::Point3D &refDisp, const ShapeOverlayOptions &overlayOpts,
double &score) {
static const double sinpi_4 = std::sin(std::atan(1.0));
const static std::vector<std::array<double, 4>> quats{
{1.0, 0.0, 0.0, 0.0}, {0.0, 1.0, 0.0, 0.0},
{0.0, 0.0, 1.0, 0.0}, {0.0, 0.0, 0.0, 1.0},
{sinpi_4, -sinpi_4, 0.0, 0.0}, {sinpi_4, sinpi_4, 0.0, 0.0},
{0.0, 0.0, -sinpi_4, sinpi_4}, {0.0, 0.0, sinpi_4, sinpi_4},
{sinpi_4, 0.0, 0.0, -sinpi_4}, {0.0, sinpi_4, sinpi_4, 0.0},
{sinpi_4, 0.0, 0.0, sinpi_4}, {0.0, -sinpi_4, sinpi_4, 0.0},
{sinpi_4, 0.0, sinpi_4, 0.0}, {0.0, sinpi_4, 0.0, sinpi_4},
{0.0, -sinpi_4, 0.0, sinpi_4}, {sinpi_4, 0.0, -sinpi_4, 0.0}};
bool useColor = overlayOpts.optimMode != OptimMode::SHAPE_ONLY;
std::array<double, 7> quatTrans{
quats[index][0], quats[index][1], quats[index][2], quats[index][3],
refDisp[0], refDisp[1], refDisp[2]};
SingleConformerAlignment sca(
refShape.getCoords(), refShape.getTypes().data(),
refShape.getCarbonRadii(), refShape.getNumAtoms(),
refShape.getNumFeatures(), refShape.getShapeVolume(),
refShape.getColorVolume(), fitShape.getCoords(),
fitShape.getTypes().data(), fitShape.getCarbonRadii(),
fitShape.getNumAtoms(), fitShape.getNumFeatures(),
fitShape.getShapeVolume(), fitShape.getColorVolume(), quatTrans,
overlayOpts.optimMode, overlayOpts.simAlpha, overlayOpts.simBeta,
overlayOpts.optParam, overlayOpts.useDistCutoff, overlayOpts.distCutoff,
overlayOpts.shapeConvergenceCriterion, overlayOpts.nSteps);
auto scores = sca.calcScores(useColor);
score = scores[0];
return quats[index];
}
// Return the initial transformation matrix in the manner of the PubChem
// overlay code. Rotate 180 degrees about each axis, and then
// add +/ ~25 degrees from that. It is not revealed where that
// angle comes from.
std::array<double, 4> getInitialRotationWiggle(
int index, const ShapeInput &refShape, const ShapeInput &fitShape,
const RDGeom::Point3D &refDisp, const ShapeOverlayOptions &overlayOpts,
double &score) {
const static double qrot1 = 0.977659114061,
qrot = 0.210196709523; // 0.215 (un-normalized)
const static std::vector<std::array<double, 4>> quats{
{1.0, 0.0, 0.0, 0.0}, // 0 X, Y, Z
{qrot1, qrot, 0.0, 0.0}, {qrot1, -qrot, 0.0, 0.0},
{qrot1, 0.0, qrot, 0.0}, {qrot1, 0.0, -qrot, 0.0},
{qrot1, 0.0, 0.0, qrot}, {qrot1, 0.0, 0.0, -qrot},
{0.0, 1.0, 0.0, 0.0}, // 1 X, -Y, -Z
{qrot, qrot1, 0.0, 0.0}, {qrot, -qrot1, 0.0, 0.0},
{0.0, qrot1, qrot, 0.0}, {0.0, qrot1, -qrot, 0.0},
{0.0, qrot1, 0.0, qrot}, {0.0, qrot1, 0.0, -qrot},
{0.0, 0.0, 0.0, 1.0}, // 2 -X, -Y, Z
{qrot, 0.0, 0.0, qrot1}, {qrot, 0.0, 0.0, -qrot1},
{0.0, qrot, 0.0, qrot1}, {0.0, -qrot, 0.0, qrot1},
{0.0, 0.0, qrot, qrot1}, {0.0, 0.0, -qrot, qrot1},
{0.0, 0.0, 1.0, 0.0}, // 3 -X, Y, -Z
{qrot, 0.0, qrot1, 0.0}, {qrot, 0.0, -qrot1, 0.0},
{0.0, qrot, qrot1, 0.0}, {0.0, -qrot, qrot1, 0.0},
{0.0, 0.0, qrot1, qrot}, {0.0, 0.0, qrot1, -qrot}};
unsigned int start_quat = index * 7;
unsigned int bestQuat = 0;
double bestScore = 0.0;
bool useColor = overlayOpts.optimMode != OptimMode::SHAPE_ONLY;
std::array<double, 7> tmpQuatTrans{1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
SingleConformerAlignment sca(
refShape.getCoords(), refShape.getTypes().data(),
refShape.getCarbonRadii(), refShape.getNumAtoms(),
refShape.getNumFeatures(), refShape.getShapeVolume(),
refShape.getColorVolume(), fitShape.getCoords(),
fitShape.getTypes().data(), fitShape.getCarbonRadii(),
fitShape.getNumAtoms(), fitShape.getNumFeatures(),
fitShape.getShapeVolume(), fitShape.getColorVolume(), tmpQuatTrans,
overlayOpts.optimMode, overlayOpts.simAlpha, overlayOpts.simBeta,
overlayOpts.optParam, overlayOpts.useDistCutoff, overlayOpts.distCutoff,
overlayOpts.shapeConvergenceCriterion, overlayOpts.nSteps);
for (unsigned int i = start_quat; i < start_quat + 7; ++i) {
std::array<double, 7> quatTrans{quats[i][0], quats[i][1], quats[i][2],
quats[i][3], refDisp[0], refDisp[1],
refDisp[2]};
sca.setQuatTrans(quatTrans);
auto scores = sca.calcScores(useColor);
if (scores[0] > bestScore) {
bestScore = scores[0];
bestQuat = i;
}
}
score = bestScore;
return quats[bestQuat];
}
// Return the translation that puts the extreme of refShape at the
// extreme of the fitShape along the appropriate axis.
RDGeom::Point3D getInitialTranslation(int index, ShapeInput &refShape,
ShapeInput fitShape) {
auto getDisp = [](ShapeInput &shape, size_t i) -> RDGeom::Point3D {
const double *coord =
shape.getCoords().data() + shape.calcExtremes()[i] * 4;
return RDGeom::Point3D(coord[0], coord[1], coord[2]);
};
RDGeom::Point3D disp;
RDGeom::Point3D refDisp, fitDisp;
switch (index) {
case 1:
refDisp = getDisp(refShape, 0);
fitDisp = getDisp(fitShape, 0);
disp = fitDisp - refDisp;
break;
case 2:
refDisp = getDisp(refShape, 1);
fitDisp = getDisp(fitShape, 1);
disp = fitDisp - refDisp;
break;
case 3:
refDisp = getDisp(refShape, 2);
fitDisp = getDisp(fitShape, 2);
disp = fitDisp - refDisp;
break;
case 4:
refDisp = getDisp(refShape, 3);
fitDisp = getDisp(fitShape, 3);
disp = fitDisp - refDisp;
break;
case 5:
refDisp = getDisp(refShape, 4);
fitDisp = getDisp(fitShape, 4);
disp = fitDisp - refDisp;
break;
case 6:
refDisp = getDisp(refShape, 5);
fitDisp = getDisp(fitShape, 5);
disp = fitDisp - refDisp;
break;
default:
break;
}
return disp;
}
// This is how the PubChem code decides between ROTATE_180_WIGGLE and
// ROTATE_45. I have no clue.
unsigned int calculateQrat(const std::array<double, 3> &eigenValues) {
double double_ev_oe[3]{eigenValues[1] + eigenValues[2] - eigenValues[0],
eigenValues[0] + eigenValues[2] - eigenValues[1],
eigenValues[0] + eigenValues[1] - eigenValues[2]};
std::sort(double_ev_oe, double_ev_oe + 3, std::greater<double>());
const static double qrat_threshold = 0.7225; // 0.85*0.85;
unsigned int qrat = 1000;
unsigned int u_rqyx, u_rqzy;
if (double_ev_oe[1] > 0) {
if (qrat_threshold < (double_ev_oe[1] / double_ev_oe[0])) {
u_rqyx = 1;
} else {
u_rqyx = 0;
}
if (qrat_threshold < (double_ev_oe[2] / double_ev_oe[1])) {
u_rqzy = 1;
} else {
u_rqzy = 0;
}
qrat = u_rqyx + u_rqzy;
}
return qrat;
}
StartMode decideStartModeFromEigenValues(ShapeInput &refShape,
ShapeInput &fitShape) {
// The PubChem code uses the moments of inertia for this, rather than the
// canonical transformation.
auto rqratwf = calculateQrat(refShape.calcMomentsOfInertia(true));
auto fqratwf = calculateQrat(fitShape.calcMomentsOfInertia(true));
StartMode startModeWF{StartMode::ROTATE_180_WIGGLE};
if (rqratwf > 0 || fqratwf > 0) {
startModeWF = StartMode::ROTATE_45;
}
return startModeWF;
}
std::array<double, 3> alignShape(ShapeInput &refShape, ShapeInput &fitShape,
RDGeom::Transform3D &bestXform,
const ShapeOverlayOptions &overlayOpts) {
unsigned int finalRotIndex = 1;
auto startMode = overlayOpts.startMode;
if (startMode == StartMode::A_LA_PUBCHEM) {
startMode = decideStartModeFromEigenValues(refShape, fitShape);
}
switch (startMode) {
case StartMode::ROTATE_0:
case StartMode::ROTATE_0_FRAGMENT:
break;
case StartMode::ROTATE_180:
case StartMode::ROTATE_180_FRAGMENT:
case StartMode::ROTATE_180_WIGGLE:
finalRotIndex = 4;
break;
case StartMode::ROTATE_45:
case StartMode::ROTATE_45_FRAGMENT:
finalRotIndex = 16;
break;
default:
break;
}
unsigned int finalTransIndex = 1;
if (startMode == StartMode::ROTATE_0_FRAGMENT ||
startMode == StartMode::ROTATE_45_FRAGMENT ||
startMode == StartMode::ROTATE_180_FRAGMENT) {
finalTransIndex = 7;
}
std::array<double, 3> bestScore;
double bestTotal = -1.0;
// Get together the start transformations.
std::vector<std::unique_ptr<SingleConformerAlignment>> aligners;
std::vector<std::pair<double, unsigned int>> bestScoreForStart;
bestScoreForStart.reserve(finalTransIndex * finalRotIndex);
unsigned int k = 0;
for (unsigned int j = 0; j < finalTransIndex; j++) {
auto refDisp = getInitialTranslation(j, refShape, fitShape);
std::array<double, 4> quat;
for (unsigned int i = 0; i < finalRotIndex; i++, k++) {
double score = 0.0;
if (startMode == StartMode::ROTATE_180_WIGGLE) {
quat = getInitialRotationWiggle(i, refShape, fitShape, refDisp,
overlayOpts, score);
} else {
quat = getInitialRotationPlain(i, refShape, fitShape, refDisp,
overlayOpts, score);
}
std::array<double, 7> initQuat{quat[0], quat[1], quat[2], quat[3],
refDisp.x, refDisp.y, refDisp.z};
aligners.emplace_back(std::make_unique<SingleConformerAlignment>(
refShape.getCoords(), refShape.getTypes().data(),
refShape.getCarbonRadii(), refShape.getNumAtoms(),
refShape.getNumFeatures(), refShape.getShapeVolume(),
refShape.getColorVolume(), fitShape.getCoords(),
fitShape.getTypes().data(), fitShape.getCarbonRadii(),
fitShape.getNumAtoms(), fitShape.getNumFeatures(),
fitShape.getShapeVolume(), fitShape.getColorVolume(), initQuat,
overlayOpts.optimMode, overlayOpts.simAlpha, overlayOpts.simBeta,
overlayOpts.optParam, overlayOpts.useDistCutoff,
overlayOpts.distCutoff, overlayOpts.shapeConvergenceCriterion,
overlayOpts.nSteps));
bestScoreForStart.push_back({score, k});
}
}
// Do it in 2 cycles, a quick optimisation first, followed by an additional
// longer one for those that look like they're going to win.
for (unsigned int cycle = 0; cycle < 2; cycle++) {
std::ranges::sort(bestScoreForStart,
[](const auto &p1, const auto &p2) -> bool {
return p1.first > p2.first;
});
std::vector<std::pair<double, unsigned int>> nextBestScoreForStart;
nextBestScoreForStart.reserve(finalTransIndex * finalRotIndex);
for (const auto &[bssf, k] : bestScoreForStart) {
if (cycle == 1) {
if (bssf < 0.7 * bestScore[0]) {
continue;
}
}
std::array<double, 20> outScores;
aligners[k]->doOverlay(outScores, cycle);
nextBestScoreForStart.emplace_back(outScores[0], k);
if (outScores[0] > bestTotal) {
bestTotal = outScores[0];
bestScore =
std::array<double, 3>{outScores[0], outScores[1], outScores[2]};
aligners[k]->getFinalQuatTrans(bestXform);
}
}
bestScoreForStart = nextBestScoreForStart;
}
return bestScore;
}
} // namespace
std::array<double, 3> AlignShape(const ShapeInput &refShape,
ShapeInput &fitShape,
RDGeom::Transform3D *xform,
const ShapeOverlayOptions &overlayOpts) {
// The shapes aren't necessarily normalized (it's not done on creation, for
// example) but they might need to be.
auto workingRefShape = std::make_unique<ShapeInput>(refShape);
auto workingFitShape = std::make_unique<ShapeInput>(fitShape);
auto inRefTrans = workingRefShape->calcCanonicalTranslation();
auto inRefRot = workingRefShape->calcCanonicalRotation();
auto inFitTrans = workingFitShape->calcCanonicalTranslation();
auto inFitRot = workingFitShape->calcCanonicalRotation();
// If we're not normalizing, translate both shapes so that the fit
// is at the origin, so the rotations work.
RDGeom::Transform3D moveToOrigin;
RDGeom::Transform3D moveFromOrigin;
if (overlayOpts.normalize) {
if (!workingRefShape->getNormalized()) {
workingRefShape->normalizeCoords();
}
if (!workingFitShape->getNormalized()) {
workingFitShape->normalizeCoords();
}
} else {
moveToOrigin.SetTranslation(
RDGeom::Point3D{workingFitShape->calcCanonicalTranslation()[0],
workingFitShape->calcCanonicalTranslation()[1],
workingFitShape->calcCanonicalTranslation()[2]});
moveFromOrigin.SetTranslation(
RDGeom::Point3D{-workingFitShape->calcCanonicalTranslation()[0],
-workingFitShape->calcCanonicalTranslation()[1],
-workingFitShape->calcCanonicalTranslation()[2]});
workingFitShape->transformCoords(moveToOrigin);
workingRefShape->transformCoords(moveToOrigin);
}
RDGeom::Transform3D bestXform;
auto scores =
alignShape(*workingRefShape, *workingFitShape, bestXform, overlayOpts);
if (!overlayOpts.normalize) {
// Shove it back again.
auto finalXform = moveFromOrigin * bestXform * moveToOrigin;
bestXform = finalXform;
} else {
auto finalXform = computeFinalTransform(inRefTrans, inRefRot, inFitTrans,
inFitRot, bestXform);
bestXform = finalXform;
}
fitShape.transformCoords(bestXform);
if (xform) {
*xform = bestXform;
}
return scores;
}
std::array<double, 3> AlignMolecule(const ShapeInput &refShape, ROMol &fit,
const ShapeInputOptions &fitOpts,
RDGeom::Transform3D *xform,
const ShapeOverlayOptions &overlayOpts,
int fitConfId) {
auto fitShape = ShapeInput(fit, fitConfId, fitOpts, overlayOpts);
RDGeom::Transform3D tmpXform;
auto scores = AlignShape(refShape, fitShape, &tmpXform, overlayOpts);
MolTransforms::transformConformer(fit.getConformer(fitConfId), tmpXform);
if (xform) {
*xform = tmpXform;
}
return scores;
}
std::array<double, 3> AlignMolecule(const ROMol &ref, ROMol &fit,
const ShapeInputOptions &refOpts,
const ShapeInputOptions &fitOpts,
RDGeom::Transform3D *xform,
const ShapeOverlayOptions &overlayOpts,
int refConfId, int fitConfId) {
auto refShape = ShapeInput(ref, refConfId, refOpts, overlayOpts);
auto scores =
AlignMolecule(refShape, fit, fitOpts, xform, overlayOpts, fitConfId);
return scores;
}
std::array<double, 3> ScoreShape(const ShapeInput &refShape,
const ShapeInput &fitShape,
const ShapeOverlayOptions &overlayOpts) {
auto refWorking = refShape.getCoords();
auto fitWorking = fitShape.getCoords();
std::array<double, 7> quatTrans{1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
SingleConformerAlignment sca(
refShape.getCoords(), refShape.getTypes().data(),
refShape.getCarbonRadii(), refShape.getNumAtoms(),
refShape.getNumFeatures(), refShape.getShapeVolume(),
refShape.getColorVolume(), fitShape.getCoords(),
fitShape.getTypes().data(), fitShape.getCarbonRadii(),
fitShape.getNumAtoms(), fitShape.getNumFeatures(),
fitShape.getShapeVolume(), fitShape.getColorVolume(), quatTrans,
overlayOpts.optimMode, overlayOpts.simAlpha, overlayOpts.simBeta,
overlayOpts.optParam, overlayOpts.useDistCutoff, overlayOpts.distCutoff,
overlayOpts.shapeConvergenceCriterion, overlayOpts.nSteps);
bool includeColor = overlayOpts.optimMode != OptimMode::SHAPE_ONLY;
auto scores = sca.calcScores(refShape.getCoords().data(),
fitShape.getCoords().data(), includeColor);
return std::array{scores[0], scores[1], scores[2]};
}
std::array<double, 3> ScoreMolecule(const ShapeInput &refShape,
const ROMol &fit,
const ShapeInputOptions &fitOpts,
const ShapeOverlayOptions &overlayOpts,
int fitConfId) {
auto fitShape = ShapeInput(fit, fitConfId, fitOpts, overlayOpts);
return ScoreShape(refShape, fitShape, overlayOpts);
}
std::array<double, 3> ScoreMolecule(const ROMol &ref, const ROMol &fit,
const ShapeInputOptions &refOpts,
const ShapeInputOptions &fitOpts,
const ShapeOverlayOptions &overlayOpts,
int refConfId, int fitConfId) {
ShapeOverlayOptions tmpOpts = overlayOpts;
tmpOpts.normalize = false;
tmpOpts.startMode = StartMode::ROTATE_0;
ShapeInputOptions tmpRefOpts = refOpts;
auto refShape = ShapeInput(ref, refConfId, refOpts, tmpOpts);
ShapeInputOptions tmpFitOpts = fitOpts;
auto fitShape = ShapeInput(fit, fitConfId, fitOpts, tmpOpts);
return ScoreShape(refShape, fitShape, tmpOpts);
}
} // namespace GaussianShape
} // namespace RDKit

View File

@@ -0,0 +1,157 @@
//
// Copyright (C) 2026 David Cosgrove and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
// Original author: David Cosgrove (CozChemIx Limited)
//
// This is the interface for the functions to perform shape-based molecule
// alignments and scoring. It is experimental code and the API and/or
// results may change in future releases.
#ifndef RDKIT_GAUSSIANSHAPE_GUARD
#define RDKIT_GAUSSIANSHAPE_GUARD
#include <RDGeneral/export.h>
#include <Geometry/Transform3D.h>
#include <GraphMol/GaussianShape/ShapeInput.h>
#include <GraphMol/GaussianShape/ShapeOverlayOptions.h>
namespace RDKit {
class ROMol;
class Conformer;
namespace GaussianShape {
//! Align a shape onto a reference shape.
/*!
\param refShape the reference shape
\param fitShape the shape to align
\param xform if passed in as non-null, will be populated with the
transformation matrix that aligns fit onto ref.
\param overlayOpts options for the overlay
\return an array of the combination score of the shape Tversky value and the
color Tversky value (zero if colors not used) and the individual values. If
using color features, defaults to RDKit pharmacophore types for the features.
*/
RDKIT_GAUSSIANSHAPE_EXPORT std::array<double, 3> AlignShape(
const ShapeInput &refShape, ShapeInput &fitShape,
RDGeom::Transform3D *xform = nullptr,
const ShapeOverlayOptions &overlayOpts = ShapeOverlayOptions());
//! Align a molecule to a reference shape
/*!
\param refShape the reference shape
\param fit the molecule to align
\param fitOpts the options for creating the fit shape
\param xform if passed in as non-null, will be populated with the
transformation matrix that aligns fit onto ref.
\param overlayOpts options for setting up and running the overlay
\param fitConfId (optional) the conformer to use for the fit
molecule
\return an array of the combination score of the shape Tversky value and the
color Tversky value (zero if colors not used) and the individual values. If
using color features, defaults to RDKit pharmacophore types for the features.
*/
RDKIT_GAUSSIANSHAPE_EXPORT std::array<double, 3> AlignMolecule(
const ShapeInput &refShape, ROMol &fit,
const ShapeInputOptions &fitOpts = ShapeInputOptions(),
RDGeom::Transform3D *xform = nullptr,
const ShapeOverlayOptions &overlayOpts = ShapeOverlayOptions(),
int fitConfId = -1);
//! Align a molecule to a reference molecule
/*!
\param ref the reference molecule
\param fit the molecule to align
\param refOpts the options for creating the ref shape
\param fitOpts the options for creating the fit shape
\param xform if passed in as non-null, will be populated with the
transformation matrix that aligns fit onto ref.
\param overlayOpts options for setting up and running the overlay
\param refConfId (optional) the conformer to use for the reference
molecule
\param fitConfId (optional) the conformer to use for the fit
molecule
\return an array of the combination score of the shape Tversky value and the
color Tversky value (zero if colors not used) and the individual values. If
using color features, defaults to RDKit pharmacophore types for the features.
*/
RDKIT_GAUSSIANSHAPE_EXPORT std::array<double, 3> AlignMolecule(
const ROMol &ref, ROMol &fit,
const ShapeInputOptions &refOpts = ShapeInputOptions(),
const ShapeInputOptions &fitOpts = ShapeInputOptions(),
RDGeom::Transform3D *xform = nullptr,
const ShapeOverlayOptions &overlayOpts = ShapeOverlayOptions(),
int refConfId = -1, int fitConfId = -1);
//! Score the overlap of a shape to a reference shape without moving
// either.
/*!
\param refShape the reference shape
\param fitShape the shape to score
\param overlayOpts options for controlling the volume calculation
\return an array of the combination score of the shape Tversky value and the
color Tversky value (zero if colors not used) and the individual values. If
using color features, defaults to RDKit pharmacophore types for the features.
*/
RDKIT_GAUSSIANSHAPE_EXPORT std::array<double, 3> ScoreShape(
const ShapeInput &refShape, const ShapeInput &fitShape,
const ShapeOverlayOptions &overlayOpts = ShapeOverlayOptions());
//! Score the overlap of a molecule to a reference shape without moving
// either.
/*!
\param ref the reference shape
\param fit the molecule to score
\param fitOpts the options for creating the fit shape
\param overlayOpts options for controlling the volume calculation
\param fitConfId (optional) the conformer to use for the fit
molecule
\return an array of the combination score of the shape Tversky value and the
color Tversky value (zero if colors not used) and the individual values. If
using color features, defaults to RDKit pharmacophore types for the features.
*/
RDKIT_GAUSSIANSHAPE_EXPORT std::array<double, 3> ScoreMolecule(
const ShapeInput &refShape, const ROMol &fit,
const ShapeInputOptions &fitOpts = ShapeInputOptions(),
const ShapeOverlayOptions &overlayOpts = ShapeOverlayOptions(),
int fitConfId = -1);
//! Score the overlap of a molecule to a reference molecule without moving
// either.
/*!
\param ref the reference molecule
\param fit the molecule to score
\param refOpts the options for creating the ref shape
\param fitOpts the options for creating the fit shape
\param overlayOpts options for controlling the volume calculation
\param refConfId (optional) the conformer to use for the reference
molecule
\param fitConfId (optional) the conformer to use for the fit
molecule
\return an array of the combination score of the shape Tverksy value and the
color Tversky value (zero if colors not used) and the individual values. If
using color features, defaults to RDKit pharmacophore types for the features.
*/
RDKIT_GAUSSIANSHAPE_EXPORT std::array<double, 3> ScoreMolecule(
const ROMol &ref, const ROMol &fit,
const ShapeInputOptions &refOpts = ShapeInputOptions(),
const ShapeInputOptions &fitOpts = ShapeInputOptions(),
const ShapeOverlayOptions &overlayOpts = ShapeOverlayOptions(),
int refConfId = -1, int fitConfId = -1);
} // namespace GaussianShape
} // namespace RDKit
#endif // RDKIT_GAUSSIANSHAPE_GUARD

View File

@@ -0,0 +1,604 @@
//
// Copyright (C) 2026 David Cosgrove and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
// Original author: David Cosgrove (CozChemIx Limited)
//
#include <Geometry/point.h>
#include <Geometry/Transform3D.h>
#include <GraphMol/ROMol.h>
#include <GraphMol/RWMol.h>
#include <GraphMol/GaussianShape/ShapeInput.h>
#include <GraphMol/GaussianShape/SingleConformerAlignment.h>
#include <GraphMol/MolTransforms/MolTransforms.h>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <GraphMol/Substruct/SubstructMatch.h>
#include <RDGeneral/BoostStartInclude.h>
#include <boost/flyweight.hpp>
#include <boost/flyweight/key_value.hpp>
#include <boost/flyweight/no_tracking.hpp>
#include <RDGeneral/BoostEndInclude.h>
#ifdef RDK_HAS_EIGEN3
#include <Eigen/Dense>
#endif
std::mutex mtx;
namespace RDKit {
namespace GaussianShape {
// Bondi radii
// You can find more of these in Table 12 of this publication:
// https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3658832/
// The dummy atom radius (atomic number 0) is set to
// 2.16.
const std::map<unsigned int, double> vdw_radii = {
{0, 2.16}, // Dummy, same as Xe.
{1, 1.10}, // H
{2, 1.40}, // He
{3, 1.81}, // Li
{4, 1.53}, // Be
{5, 1.92}, // B
{6, 1.70}, // C
{7, 1.55}, // N
{8, 1.52}, // O
{9, 1.47}, // F
{10, 1.54}, // Ne
{11, 2.27}, // Na
{12, 1.73}, // Mg
{13, 1.84}, // Al
{14, 2.10}, // Si
{15, 1.80}, // P
{16, 1.80}, // S
{17, 1.75}, // Cl
{18, 1.88}, // Ar
{19, 2.75}, // K
{20, 2.31}, // Ca
{31, 1.87}, // Ga
{32, 2.11}, // Ge
{33, 1.85}, // As
{34, 1.90}, // Se
{35, 1.83}, // Br
{36, 2.02}, // Kr
{37, 3.03}, // Rb
{38, 2.49}, // Sr
{49, 1.93}, // In
{50, 2.17}, // Sn
{51, 2.06}, // Sb
{52, 2.06}, // Te
{53, 1.98}, // I
{54, 2.16}, // Xe
{55, 3.43}, // Cs
{56, 2.68}, // Ba
{81, 1.96}, // Tl
{82, 2.02}, // Pb
{83, 2.07}, // Bi
{84, 1.97}, // Po
{85, 2.02}, // At
{86, 2.20}, // Rn
{87, 3.48}, // Fr
{88, 2.83}, // Ra
};
constexpr double radius_color =
1.08265; // same radius for all feature/color "atoms", as used by the
// PubChem code.
ShapeInput::ShapeInput(const ROMol &mol, int confId,
const ShapeInputOptions &opts,
const ShapeOverlayOptions &overlayOpts) {
PRECONDITION(mol.getNumConformers() > 0,
"ShapeInput object needs the molecule to have conformers. " +
mol.getProp<std::string>("_Name") + " " + MolToSmiles(mol));
if (opts.allCarbonRadii && !opts.atomRadii.empty()) {
BOOST_LOG(rdWarningLog)
<< "Specifying allCarbonRadii and providing custom atom radii doesn't"
" make sense. Ignoring the radii."
<< std::endl;
}
extractAtoms(mol, confId, opts);
if (opts.useColors) {
extractFeatures(mol, confId, opts);
}
calcNormalization();
calcExtremes();
std::vector<double> gradConverters(12 * (d_numAtoms + d_numFeats));
d_selfOverlapVol =
calcVolAndGrads(d_coords.data(), d_numAtoms, d_carbonRadii.get(),
d_coords.data(), d_numAtoms, d_carbonRadii.get(),
gradConverters, overlayOpts.useDistCutoff,
overlayOpts.distCutoff * overlayOpts.distCutoff);
d_selfOverlapColor = calcVolAndGrads(
d_coords.data() + 4 * d_numAtoms, d_numFeats, d_types.data() + d_numAtoms,
d_coords.data() + 4 * d_numAtoms, d_numFeats, d_types.data() + d_numAtoms,
d_numAtoms, gradConverters, overlayOpts.useDistCutoff,
overlayOpts.distCutoff * overlayOpts.distCutoff, nullptr, nullptr);
}
ShapeInput::ShapeInput(const ShapeInput &other)
: d_coords(other.d_coords),
d_types(other.d_types),
d_numAtoms(other.d_numAtoms),
d_numFeats(other.d_numFeats),
d_selfOverlapVol(other.d_selfOverlapVol),
d_selfOverlapColor(other.d_selfOverlapColor),
d_extremePoints(other.d_extremePoints),
d_normalized(other.d_normalized),
d_normalizationOK(other.d_normalizationOK),
d_canonRot(other.d_canonRot),
d_canonTrans(other.d_canonTrans),
d_eigenValues(other.d_eigenValues) {
if (other.d_carbonRadii) {
d_carbonRadii.reset(new boost::dynamic_bitset<>(*other.d_carbonRadii));
}
}
ShapeInput &ShapeInput::operator=(const ShapeInput &other) {
if (this == &other) {
return *this;
}
d_coords = other.d_coords;
d_types = other.d_types;
d_numAtoms = other.d_numAtoms;
d_numFeats = other.d_numFeats;
d_selfOverlapVol = other.d_selfOverlapVol;
d_selfOverlapColor = other.d_selfOverlapColor;
d_extremePoints = other.d_extremePoints;
d_normalized = other.d_normalized;
d_normalizationOK = other.d_normalizationOK;
d_canonRot = other.d_canonRot;
d_canonTrans = other.d_canonTrans;
d_eigenValues = other.d_eigenValues;
if (other.d_carbonRadii) {
d_carbonRadii.reset(new boost::dynamic_bitset<>(*other.d_carbonRadii));
} else {
d_carbonRadii.reset();
}
return *this;
}
std::vector<RDGeom::Point3D> ShapeInput::getAtomPoints(
bool includeColors) const {
std::vector<RDGeom::Point3D> atomPoints;
unsigned int numPoints = getNumAtoms();
if (includeColors) {
numPoints += getNumFeatures();
}
atomPoints.reserve(numPoints);
for (unsigned int i = 0; i < 4 * numPoints; i += 4) {
atomPoints.emplace_back(
RDGeom::Point3D(d_coords[i], d_coords[i + 1], d_coords[i + 2]));
}
return atomPoints;
}
const std::array<double, 9> &ShapeInput::calcCanonicalRotation() {
if (!d_normalizationOK) {
calcNormalization();
}
return d_canonRot;
}
const std::array<double, 3> &ShapeInput::calcCanonicalTranslation() {
if (!d_normalizationOK) {
calcNormalization();
}
return d_canonTrans;
}
const std::array<double, 3> &ShapeInput::calcEigenValues() {
if (!d_normalizationOK) {
calcNormalization();
}
return d_eigenValues;
}
const std::array<size_t, 6> &ShapeInput::calcExtremes() {
if (!d_normalizationOK) {
calculateExtremes();
}
return d_extremePoints;
}
std::array<double, 3> ShapeInput::calcMomentsOfInertia(
bool includeColors) const {
auto tmpMol = shapeToMol(includeColors);
std::array<double, 3> eVals;
#if RDK_HAS_EIGEN3
Eigen::Matrix3d axes;
Eigen::Vector3d moments;
MolTransforms::computePrincipalAxesAndMoments(tmpMol->getConformer(), axes,
moments);
eVals[0] = moments[0];
eVals[1] = moments[1];
eVals[2] = moments[2];
#else
std::unique_ptr<RDGeom::Transform3D> canonXform(
MolTransforms::computeCanonicalTransform(tmpMol->getConformer(), nullptr,
false, true, eVals.data()));
#endif
return eVals;
}
void ShapeInput::normalizeCoords() {
if (d_normalized) {
return;
}
if (!d_normalizationOK) {
calcNormalization();
}
RDGeom::Transform3D canonRot;
for (unsigned int i = 0, k = 0; i < 3; ++i) {
for (unsigned int j = 0; j < 3; ++j, ++k) {
canonRot.setValUnchecked(i, j, d_canonRot[k]);
}
}
RDGeom::Point3D trans{d_canonTrans[0], d_canonTrans[1], d_canonTrans[2]};
canonRot.TransformPoint(trans);
canonRot.SetTranslation(trans);
transformCoords(canonRot);
d_normalized = true;
// Recalculate the extremes now we've changed the coordinates.
calcExtremes();
}
void ShapeInput::transformCoords(RDGeom::Transform3D &xform) {
applyTransformToShape(d_coords, xform);
d_normalized = false;
d_normalizationOK = false;
}
std::unique_ptr<RWMol> ShapeInput::shapeToMol(bool includeColors) const {
auto mol = std::make_unique<RWMol>();
for (unsigned int i = 0; i < getNumAtoms(); i++) {
Atom *atom = new Atom(6);
mol->addAtom(atom, true, true);
}
if (includeColors) {
for (unsigned int i = 0; i < getNumFeatures(); i++) {
Atom *atom = new Atom(7);
mol->addAtom(atom, true, true);
}
}
unsigned int num = getNumAtoms();
if (includeColors) {
num += getNumFeatures();
}
Conformer *conf = new Conformer(num);
const auto &shapeCds = getCoords();
for (unsigned int i = 0; i < num; i++) {
auto &pos = conf->getAtomPos(i);
pos.x = shapeCds[4 * i];
pos.y = shapeCds[4 * i + 1];
pos.z = shapeCds[4 * i + 2];
}
mol->addConformer(conf, true);
return mol;
}
namespace {
double getStandardAtomRadius(unsigned int atomicNum) {
if (auto rad = vdw_radii.find(static_cast<unsigned int>(atomicNum));
rad != vdw_radii.end()) {
return rad->second;
}
throw ValueErrorException("No VdW radius for atom with Z=" +
std::to_string(atomicNum));
}
} // namespace
void ShapeInput::extractAtoms(const ROMol &mol, int confId,
const ShapeInputOptions &opts) {
d_coords.reserve(mol.getNumAtoms() * 4);
if (!opts.allCarbonRadii) {
d_carbonRadii.reset(new boost::dynamic_bitset<>(
!opts.atomSubset.empty() ? opts.atomSubset.size() : mol.getNumAtoms()));
}
auto conf = mol.getConformer(confId);
// Index of atoms that have been added to the shape.
unsigned int idx = 0;
for (const auto atom : mol.atoms()) {
if (!opts.atomSubset.empty()) {
const auto atomIdx = atom->getIdx();
if (auto it = std::ranges::find_if(
opts.atomSubset,
[atomIdx](const auto &p) -> bool { return p == atomIdx; });
it == opts.atomSubset.end()) {
continue;
}
}
if (atom->getAtomicNum() > 1) {
auto atIdx = atom->getIdx();
auto &pos = conf.getAtomPos(atIdx);
d_coords.push_back(pos.x);
d_coords.push_back(pos.y);
d_coords.push_back(pos.z);
if (opts.allCarbonRadii) {
d_coords.push_back(KAPPA / (1.7 * 1.7));
} else {
double rad = 0.0;
if (opts.atomRadii.empty()) {
if (atom->getAtomicNum() == 6) {
rad = 1.7;
(*d_carbonRadii)[idx] = true;
} else {
rad = getStandardAtomRadius(atom->getAtomicNum());
}
} else {
auto it = std::ranges::find_if(
opts.atomRadii,
[atIdx](const auto &p) -> bool { return p.first == atIdx; });
if (it == opts.atomRadii.end()) {
rad = getStandardAtomRadius(atom->getAtomicNum());
} else {
rad = it->second;
}
}
d_coords.push_back(KAPPA / (rad * rad));
}
}
++idx;
}
d_numAtoms = d_coords.size() / 4;
d_types.resize(d_numAtoms);
d_numFeats = 0;
}
namespace {
class ss_matcher {
public:
ss_matcher(const std::string &pattern) : m_pattern(pattern) {
m_needCopies = (pattern.find_first_of("$") != std::string::npos);
RDKit::RWMol *p = RDKit::SmartsToMol(pattern);
m_matcher = p;
POSTCONDITION(m_matcher, "no matcher");
};
const RDKit::ROMol *getMatcher() const { return m_matcher; };
unsigned int countMatches(const RDKit::ROMol &mol) const {
PRECONDITION(m_matcher, "no matcher");
std::vector<RDKit::MatchVectType> matches;
// This is an ugly one. Recursive queries aren't thread safe.
// Unfortunately we have to take a performance hit here in order
// to guarantee thread safety
if (m_needCopies) {
const RDKit::ROMol nm(*(m_matcher), true);
RDKit::SubstructMatch(mol, nm, matches);
} else {
const RDKit::ROMol &nm = *m_matcher;
RDKit::SubstructMatch(mol, nm, matches);
}
return matches.size();
}
~ss_matcher() { delete m_matcher; };
private:
ss_matcher() : m_pattern("") {};
std::string m_pattern;
bool m_needCopies{false};
const RDKit::ROMol *m_matcher{nullptr};
};
} // namespace
// This came from the original PubChemShape.cpp
typedef boost::flyweight<boost::flyweights::key_value<std::string, ss_matcher>,
boost::flyweights::no_tracking>
pattern_flyweight;
// Definitions for feature points adapted from:
// Gobbi and Poppinger, Biotech. Bioeng. _61_ 47-54 (1998)
const std::vector<std::vector<std::string>> smartsPatterns = {
{"[$([N;!H0;v3,v4&+1]),\
$([O,S;H1;+0]),\
n&H1&+0]"}, // Donor
{"[$([O,S;H1;v2;!$(*-*=[O,N,P,S])]),\
$([O,S;H0;v2]),\
$([O,S;-]),\
$([N;v3;!$(N-*=[O,N,P,S])]),\
n&H0&+0,\
$([o,s;+0;!$([o,s]:n);!$([o,s]:c:n)])]"}, // Acceptor
{
"[r]1[r][r]1",
"[r]1[r][r][r]1",
"[r]1[r][r][r][r]1",
"[r]1[r][r][r][r][r]1",
"[r]1[r][r][r][r][r][r]1",
}, // rings
// "[a]", //
// Aromatic
// "[F,Cl,Br,I]", // Halogen
{"[#7;+,\
$([N;H2&+0][$([C,a]);!$([C,a](=O))]),\
$([N;H1&+0]([$([C,a]);!$([C,a](=O))])[$([C,a]);!$([C,a](=O))]),\
$([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))])]"}, // Basic
{"[$([C,S](=[O,S,P])-[O;H1,-1])]"} // Acidic
};
std::vector<std::vector<const ROMol *>> *getPh4Patterns() {
static std::unique_ptr<std::vector<std::vector<const ROMol *>>> patterns;
if (!patterns) {
patterns.reset(new std::vector<std::vector<const ROMol *>>());
for (const auto &smartsV : smartsPatterns) {
std::vector<const ROMol *> v;
for (const auto &smarts : smartsV) {
const ROMol *matcher = pattern_flyweight(smarts).get().getMatcher();
CHECK_INVARIANT(matcher, "bad smarts");
v.push_back(matcher);
}
patterns->push_back(std::move(v));
}
}
return patterns.get();
}
// Extract the features for the color scores, using RDKit pphore features
// for now. Other options to be added later.
void ShapeInput::extractFeatures(const ROMol &mol, int confId,
const ShapeInputOptions &opts) {
if (opts.customFeatures.empty()) {
unsigned pattIdx = 1;
const auto pattVects = getPh4Patterns();
for (const auto &patts : *pattVects) {
for (const auto &patt : patts) {
std::vector<MatchVectType> matches;
{
// recursive queries aren't thread safe.
matches = SubstructMatch(mol, *patt);
}
for (const auto &match : matches) {
std::vector<unsigned int> ats;
bool featOk = true;
for (const auto &pr : match) {
// make sure all the atoms are in the subset, if there is one
if (!opts.atomSubset.empty()) {
if (std::ranges::find_if(
opts.atomSubset, [pr](const auto &p) -> bool {
return p == static_cast<unsigned int>(pr.second);
}) == opts.atomSubset.end()) {
featOk = false;
break;
}
}
ats.push_back(pr.second);
}
if (!featOk) {
continue;
}
auto featPos = computeFeaturePos(mol, confId, ats);
d_types.push_back(pattIdx);
d_coords.push_back(featPos.x);
d_coords.push_back(featPos.y);
d_coords.push_back(featPos.z);
d_coords.push_back(KAPPA / (radius_color * radius_color));
d_numFeats++;
}
}
++pattIdx;
}
} else {
// Just copy them directly
for (const auto &f : opts.customFeatures) {
d_types.push_back(std::get<0>(f));
d_numFeats++;
const auto &pos = std::get<1>(f);
d_coords.push_back(pos.x);
d_coords.push_back(pos.y);
d_coords.push_back(pos.z);
d_coords.push_back(std::get<2>(f));
}
}
}
void ShapeInput::calcNormalization() {
d_eigenValues = std::array<double, 3>{0.0, 0.0, 0.0};
// Build a "molecule" just of the shape points, not the color features
// with which to calculate the canonical transformation. Doesn't ever
// use the input molecule in case the shape was built from a subset of
// atoms in that molecule.
auto tmpMol = shapeToMol(false);
std::unique_ptr<RDGeom::Transform3D> canonXform(
MolTransforms::computeCanonicalTransform(
tmpMol->getConformer(), nullptr, false, true, d_eigenValues.data()));
d_canonRot =
std::array<double, 9>{1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0};
for (unsigned int i = 0, k = 0; i < 3; ++i) {
for (unsigned int j = 0; j < 3; ++j, ++k) {
d_canonRot[k] = canonXform->getValUnchecked(i, j);
}
}
d_canonTrans = std::array<double, 3>{0.0, 0.0, 0.0};
for (unsigned int i = 0; i < 4 * d_numAtoms; i += 4) {
d_canonTrans[0] -= d_coords[i];
d_canonTrans[1] -= d_coords[i + 1];
d_canonTrans[2] -= d_coords[i + 2];
}
d_canonTrans[0] /= d_numAtoms;
d_canonTrans[1] /= d_numAtoms;
d_canonTrans[2] /= d_numAtoms;
d_normalizationOK = true;
}
void ShapeInput::calculateExtremes() {
d_extremePoints = std::array<size_t, 6>{0, 0, 0, 0, 0, 0};
for (size_t i = 0, j = 0; i < d_coords.size(); i += 4, ++j) {
if (d_coords[i] < d_coords[4 * d_extremePoints[0]]) {
d_extremePoints[0] = j;
}
if (d_coords[i] > d_coords[4 * d_extremePoints[3]]) {
d_extremePoints[3] = j;
}
if (d_coords[i + 1] < d_coords[4 * d_extremePoints[1] + 1]) {
d_extremePoints[1] = j;
}
if (d_coords[i + 1] > d_coords[4 * d_extremePoints[4] + 1]) {
d_extremePoints[4] = j;
}
if (d_coords[i + 2] < d_coords[4 * d_extremePoints[2] + 2]) {
d_extremePoints[2] = j;
}
if (d_coords[i + 2] > d_coords[4 * d_extremePoints[5] + 2]) {
d_extremePoints[5] = j;
}
}
}
RDGeom::Point3D computeFeaturePos(const ROMol &mol, int confId,
const std::vector<unsigned int> &ats) {
RDGeom::Point3D featPos;
auto &conf = mol.getConformer(confId);
for (const auto at : ats) {
featPos += conf.getAtomPos(at);
}
featPos /= ats.size();
return featPos;
}
void applyTransformToShape(std::vector<double> &shape,
RDGeom::Transform3D &xform) {
for (size_t i = 0; i < shape.size(); i += 4) {
xform.TransformPoint(shape.data() + i);
}
}
void applyTransformToShape(const double *inShape, double *outShape,
size_t numPoints, RDGeom::Transform3D &xform) {
for (size_t i = 0; i < 4 * numPoints; i += 4) {
outShape[i] = inShape[i];
outShape[i + 1] = inShape[i + 1];
outShape[i + 2] = inShape[i + 2];
outShape[i + 3] = inShape[i + 3];
xform.TransformPoint(outShape + i);
}
}
void translateShape(std::vector<double> &shape,
const RDGeom::Point3D &translation) {
for (size_t i = 0; i < shape.size(); i += 4) {
shape[i] += translation.x;
shape[i + 1] += translation.y;
shape[i + 2] += translation.z;
}
}
void translateShape(const double *inShape, double *outShape, size_t numPoints,
const RDGeom::Point3D &translation) {
for (size_t i = 0; i < 4 * numPoints; i += 4) {
outShape[i] = inShape[i] + translation.x;
outShape[i + 1] = inShape[i + 1] + translation.y;
outShape[i + 2] = inShape[i + 2] + translation.z;
outShape[i + 3] = inShape[i + 3];
}
}
} // namespace GaussianShape
} // namespace RDKit

View File

@@ -0,0 +1,265 @@
//
// Copyright (C) 2026 David Cosgrove and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
// Original author: David Cosgrove (CozChemIx Limited)
//
#ifndef RDKIT_SHAPEINPUT_GUARD
#define RDKIT_SHAPEINPUT_GUARD
#include <array>
#include <vector>
#include <RDGeneral/export.h>
#include <Geometry/Transform3D.h>
#include <RDGeneral/BoostStartInclude.h>
#include <boost/dynamic_bitset.hpp>
#ifdef RDK_USE_BOOST_SERIALIZATION
#include <boost/archive/text_oarchive.hpp>
#include <boost/archive/text_iarchive.hpp>
#include <boost/serialization/vector.hpp>
#include <boost/serialization/array.hpp>
#include <boost/serialization/unique_ptr.hpp>
#endif
#include <RDGeneral/BoostEndInclude.h>
#include <GraphMol/GaussianShape/ShapeOverlayOptions.h>
// The code below was provided by Claude (Sonnet 4.6).
// If first tried to get me to use boost/serialization/dynamic_bitset.hpp
// and then admitted that it had made that up.
namespace boost {
namespace serialization {
template <class Archive, typename Block, typename Allocator>
void serialize(Archive &ar, boost::dynamic_bitset<Block, Allocator> &bs,
const unsigned int /*version*/) {
size_t num_bits = bs.size();
ar & num_bits;
std::vector<Block> blocks;
if (Archive::is_saving::value) {
to_block_range(bs, std::back_inserter(blocks));
}
ar & blocks;
if (Archive::is_loading::value) {
bs.resize(num_bits);
from_block_range(blocks.begin(), blocks.end(), bs);
bs.resize(num_bits); // trim any excess bits
}
}
} // namespace serialization
} // namespace boost
namespace RDKit {
class ROMol;
class RWMol;
namespace GaussianShape {
// From Grant et al.
constexpr double P = 2.7;
constexpr double KAPPA = 2.41798793102;
using CustomFeatures =
std::vector<std::tuple<unsigned int, RDGeom::Point3D, double>>;
struct ShapeInputOptions {
ShapeInputOptions() = default;
ShapeInputOptions(const ShapeInputOptions &) = default;
ShapeInputOptions(ShapeInputOptions &&) = default;
ShapeInputOptions &operator=(const ShapeInputOptions &) = default;
ShapeInputOptions &operator=(ShapeInputOptions &&) = default;
~ShapeInputOptions() = default;
// By default, it will create features using the RDKit pharmacophore
// definitions.
bool useColors{
true}; //! Whether to build the color features. By default, it will
//! create features using the RDKit pharmacophore definitions.
CustomFeatures customFeatures; //! Custom color features used verbatim. A
//! vector of tuples of integer type, Point3D
//! coords, double radius.
std::vector<unsigned int>
atomSubset; //! If not empty, use just these atoms in the molecule to
//! form the ShapeInput object.
std::vector<std::pair<unsigned int, double>>
atomRadii; //! Use these non-standard radii for these atoms. The int is
//! for the atom index in the molecule, not the atomic number.
//! Not all atoms need be specified, just some radii can be
//! over-ridden, with the rest left as standard.
bool allCarbonRadii{
true}; //! Whether to use carbon radii for all atoms (which is quicker
//! but less accurate) or vdw radii appropriate for the elements.
};
// Data for shape alignment code
class RDKIT_GAUSSIANSHAPE_EXPORT ShapeInput {
public:
//! Create the ShapeInput object.
//! @param mol: The molecule of interest
//! @param confId: The conformer to use
//! @param opts: Options for setting up the shape
ShapeInput(const ROMol &mol, int confId = -1,
const ShapeInputOptions &opts = ShapeInputOptions(),
const ShapeOverlayOptions &overlayOpts = ShapeOverlayOptions());
ShapeInput(const std::string &str) {
#ifndef RDK_USE_BOOST_SERIALIZATION
PRECONDITION(0, "Boost SERIALIZATION is not enabled")
#else
std::stringstream ss(str);
boost::archive::text_iarchive ia(ss);
ia &*this;
#endif
}
ShapeInput(const ShapeInput &other);
ShapeInput(ShapeInput &&other) = default;
ShapeInput &operator=(const ShapeInput &other);
ShapeInput &operator=(ShapeInput &&other) = default;
virtual ~ShapeInput() = default;
std::string toString() const {
#ifndef RDK_USE_BOOST_SERIALIZATION
PRECONDITION(0, "Boost SERIALIZATION is not enabled")
#else
std::stringstream ss;
boost::archive::text_oarchive oa(ss);
oa &*this;
return ss.str();
#endif
}
// Note that the coords returned is a vector size 4*getNumAtoms()
// with the 4th value per atom being the alpha paramter.
const std::vector<double> &getCoords() const { return d_coords; }
//! Fetch the coordinates of the atoms and optionally features.
std::vector<RDGeom::Point3D> getAtomPoints(bool includeColors = false) const;
bool getNormalized() const { return d_normalized; }
const std::vector<int> &getTypes() const { return d_types; }
unsigned int getNumAtoms() const { return d_numAtoms; }
unsigned int getNumFeatures() const { return d_numFeats; }
double getShapeVolume() const { return d_selfOverlapVol; }
double getColorVolume() const { return d_selfOverlapColor; }
const boost::dynamic_bitset<> *getCarbonRadii() const {
return d_carbonRadii.get();
}
// These functions use cached values if available.
const std::array<double, 9> &calcCanonicalRotation();
const std::array<double, 3> &calcCanonicalTranslation();
const std::array<double, 3> &calcEigenValues();
const std::array<size_t, 6> &calcExtremes();
// Return the principal moments of inertia, if Eigen3 is available, and the
// eigenvalues of the canonical transformation if not.
std::array<double, 3> calcMomentsOfInertia(bool includeColors = false) const;
// Align the principal axes to the cartesian axes and centre on the origin.
// Doesn't require that the shape was created from a molecule. Creates
// the necessary transformation if not already done.
void normalizeCoords();
void transformCoords(RDGeom::Transform3D &xform);
// Mock a molecule up from the shape for visual inspection and sometimes
// calculation of the normalization matrices. No bonds.
// Atoms are C, features are N.
virtual std::unique_ptr<RWMol> shapeToMol(bool includeColors = true) const;
#ifdef RDK_USE_BOOST_SERIALIZATION
template <class Archive>
void serialize(Archive &ar, const unsigned int) {
ar & d_coords;
ar & d_types;
ar & d_numAtoms;
ar & d_numFeats;
ar & d_selfOverlapVol;
ar & d_selfOverlapColor;
ar & d_extremePoints;
ar & d_carbonRadii;
ar & d_normalized;
ar & d_normalizationOK;
ar & d_canonRot;
ar & d_canonTrans;
ar & d_eigenValues;
}
#endif
private:
void extractAtoms(const ROMol &mol, int confId,
const ShapeInputOptions &opts);
// Extract the features for the color scores, using RDKit pphore features
// for now. Other options to be added later.
void extractFeatures(const ROMol &mol, int confId,
const ShapeInputOptions &shapeOpts);
// Calculate the rotation and translation that will align the principal axes
// to the cartesian axes and centre on the origin.
void calcNormalization();
void calculateExtremes();
std::vector<double> d_coords; // The coordinates and alpha parameter for the
// atoms and features, packed as 4 floats per
// item - x, y, z and alpha. alpha is KAPPA / (r * r) where r is the radius
// of the atom. This is not used if using all_atoms_carbon mode.
std::vector<int> d_types; // The feature types. The size is the same
// as the number of coordinates, padded with 0
// for the atoms.
unsigned int d_numAtoms; // The number of atoms
unsigned int d_numFeats; // The number of features
double d_selfOverlapVol{0.0}; // Shape volume
double d_selfOverlapColor{0.0}; // Color volume
// These are the points at the extremes of the x, y and z axes.
// they are min_x, min_y, min_z and max_x, max_y, max_z.
std::array<size_t, 6> d_extremePoints;
std::unique_ptr<boost::dynamic_bitset<>>
d_carbonRadii; // Flags those atoms with a carbon radius, for faster
// calculation later.
// This is the rotation and translation to align the principal axes of the
// shape with cartesian axes. If d_normalized is true, it has been applied
// to the coordinates.
bool d_normalized{false};
// If the shape is moved, the normalization matrices are no longer valid.
// This flags that so it is re-computed as required.
bool d_normalizationOK{false};
std::array<double, 9> d_canonRot;
std::array<double, 3> d_canonTrans;
// The sorted eigenvalues of the principal axes.
std::array<double, 3> d_eigenValues;
};
// Calculate the mean position of the given atoms.
RDKIT_GAUSSIANSHAPE_EXPORT RDGeom::Point3D computeFeaturePos(
const ROMol &mol, int confId, const std::vector<unsigned int> &ats);
RDKIT_GAUSSIANSHAPE_EXPORT RDGeom::Transform3D quatTransToTransform(
const double *quat, const double *trans);
// Apply the transformation to the coordinates assumed to be in
// ShapeInput.d_coords form.
RDKIT_GAUSSIANSHAPE_EXPORT void applyTransformToShape(
std::vector<double> &shape, RDGeom::Transform3D &xform);
RDKIT_GAUSSIANSHAPE_EXPORT void applyTransformToShape(
const double *inShape, double *outShape, size_t numPoints,
RDGeom::Transform3D &xform);
RDKIT_GAUSSIANSHAPE_EXPORT void translateShape(
std::vector<double> &shape, const RDGeom::Point3D &translation);
RDKIT_GAUSSIANSHAPE_EXPORT void translateShape(
const double *inShape, double *outShape, size_t numPoints,
const RDGeom::Point3D &translation);
} // namespace GaussianShape
} // namespace RDKit
#endif // RDKIT_SHAPEINPUT_GUARD

View File

@@ -0,0 +1,119 @@
//
// Copyright (C) 2026 David Cosgrove and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
// Original author: David Cosgrove (CozChemIx Limited)
//
// Options for the Roshambo2-based shape overlay.
#ifndef RDKIT_SHAPEOVERLAYOPTIONS_GUARD
#define RDKIT_SHAPEOVERLAYOPTIONS_GUARD
#include <RDGeneral/export.h>
namespace RDKit {
class ROMol;
namespace GaussianShape {
enum class RDKIT_GAUSSIANSHAPE_EXPORT StartMode {
ROTATE_0, //! No rotation, just normalization if requested
ROTATE_180, //! ROTATE_0 plus rotate by 180 degrees about each of x, y, z
ROTATE_180_WIGGLE, //! ROTATE_180 plus, as the PubChem code does, rotate by a
//! small amount (~25 degrees) about each axis and use the
//! highest scoring of those as the start point for that
//! rotation angle
ROTATE_45, //! ROTATE_180 plus rotate by 45 degrees about pairs of each of x,
//! y, z
ROTATE_0_FRAGMENT, //! No rotation, translate probe to each end of ref
ROTATE_180_FRAGMENT, //! Translate probe to each end of ref and then
//! ROTATE_180
ROTATE_45_FRAGMENT, //! Translate probe to each end of ref and then ROTATE_90
A_LA_PUBCHEM, //! Uses the eigenvalues of the principal vectors to decide
//! whether to do ROTATE_180_WIGGLE or ROTATE_45
};
enum class RDKIT_GAUSSIANSHAPE_EXPORT OptimMode {
SHAPE_ONLY, //! Drive the optimisation by shape overlap only.
SHAPE_PLUS_COLOR_SCORE, //! Drive the optimisation by shape, but include
//! color in the score to determine the best
//! solution. Color never used in the optimisation
//! stage.
SHAPE_PLUS_COLOR, //! Drive the optimisation by overlap of shape and color
//! features.
};
struct RDKIT_GAUSSIANSHAPE_EXPORT ShapeOverlayOptions {
//! Different modes for starting the optimisation. Default is as used by the
//! PubChem code. The molecules are normalized so the principal axes are
//! along the cartesian axes rather than the shape quadrupole axes as Grant et
//! al. did.
StartMode startMode{StartMode::A_LA_PUBCHEM};
OptimMode optimMode{
OptimMode::SHAPE_PLUS_COLOR_SCORE}; //! Optimisation mode.
double simAlpha{
1.0}; //! When doing a Tversky similarity, the alpha value. If alpha and
//! beta are both the default 1.0, it's a Tanimoto similarity. A
//! high alpha and low beta emphasize the fit volume in the
//! similarity and vice versa. Tversky is O / (A * (R - O) + B * (F
//! - O) + O) where O is the overlap volume, R is the reference's
//! volume and F is the fit's volume. This is different from that
//! used by OpenEye (O / (A * R + B * F)).
double simBeta{1.0}; //! When doing a Tversky similarity, the beta value.
double optParam{0.5}; //! If using colors, the relative weights of shape and
//! color scores.
int nSteps{100}; //! Maximum number of steps for optimiser to take.
bool normalize{
true}; //! Whether to normalise the shapes by putting them into their
//! canonical conformations (centred at the origin, aligned along
//! its principal axes) before starting.
bool useDistCutoff{
true}; //! Whether to use a distance cutoff for the volume calculation.
double distCutoff{4.5}; //! The distance cutoff. If 2 atoms are more than
//! this distance apart, they are not included in the
//! volume calculation. A smaller value is faster but
//! less precise.
double shapeConvergenceCriterion{
0.001}; //! Optimisation stops when the shape score changes by less
//! than this amount. A larger number is faster but less
//! precise.
};
inline std::ostream &operator<<(std::ostream &os, const StartMode &sm) {
switch (sm) {
case StartMode::ROTATE_0:
os << "ROTATE_0";
break;
case StartMode::ROTATE_0_FRAGMENT:
os << "ROTATE_0_FRAGMENT";
break;
case StartMode::ROTATE_180:
os << "ROTATE_180";
break;
case StartMode::ROTATE_180_FRAGMENT:
os << "ROTATE_180_FRAGMENT";
break;
case StartMode::ROTATE_180_WIGGLE:
os << "ROTATE_180_WIGGLE";
break;
case StartMode::ROTATE_45:
os << "ROTATE_45";
break;
case StartMode::ROTATE_45_FRAGMENT:
os << "ROTATE_45_FRAGMENT";
break;
case StartMode::A_LA_PUBCHEM:
os << "A_LA_PUBCHEM";
break;
}
return os;
}
} // namespace GaussianShape
} // namespace RDKit
#endif // RDKIT_SHAPEOVERLAYOPTIONS_GUARD

View File

@@ -0,0 +1,638 @@
//
// Copyright (C) 2026 David Cosgrove and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
// Original author: David Cosgrove (CozChemIx Limited)
//
#include <algorithm>
#include <array>
#include <cmath>
#include <numbers>
#include <numeric>
#include <Geometry/point.h>
#include <Geometry/Transform3D.h>
#include <GraphMol/GaussianShape/ShapeInput.h>
#include <GraphMol/GaussianShape/ShapeOverlayOptions.h>
#include <GraphMol/GaussianShape/SingleConformerAlignment.h>
constexpr int D = 4;
namespace RDKit {
namespace GaussianShape {
SingleConformerAlignment::SingleConformerAlignment(
const std::vector<double> &ref, const int *refTypes,
const boost::dynamic_bitset<> *refCarbonRadii, int nRefShape, int nRefColor,
double refShapeVol, double refColorVol, const std::vector<double> &fit,
const int *fitTypes, const boost::dynamic_bitset<> *fitCarbonRadii,
int nFitShape, int nFitColor, double fitShapeVol, double fitColorVol,
const std::array<double, 7> &initQuatTrans, OptimMode optimMode,
double simAlpha, double simBeta, double mixingParam, bool useCutoff,
double distCutoff, double shapeConvergenceCriterion, unsigned int maxIts)
: d_ref(ref),
d_refTypes(refTypes),
d_refCarbonRadii(refCarbonRadii),
d_nRefShape(nRefShape),
d_nRefColor(nRefColor),
d_refShapeVol(refShapeVol),
d_refColorVol(refColorVol),
d_fit(fit),
d_fitTypes(fitTypes),
d_fitCarbonRadii(fitCarbonRadii),
d_nFitShape(nFitShape),
d_nFitColor(nFitColor),
d_fitShapeVol(fitShapeVol),
d_fitColorVol(fitColorVol),
d_initQuatTrans(initQuatTrans),
d_optimMode(optimMode),
d_simAlpha(simAlpha),
d_simBeta(simBeta),
d_mixingParam(mixingParam),
d_useCutoff(useCutoff),
d_distCutoff2(distCutoff * distCutoff),
d_shapeConvergenceCriterion(shapeConvergenceCriterion),
d_maxIts(maxIts) {
// Move the reference by initialTrans, leaving fit at the origin where
// the rotations work properly. Apply the initial rotation to the fit.
translateShape(d_ref, RDGeom::Point3D{d_initQuatTrans[4], d_initQuatTrans[5],
d_initQuatTrans[6]});
RDGeom::Transform3D xform;
xform.SetRotationFromQuaternion(d_initQuatTrans.data());
applyTransformToShape(d_fit, xform);
d_refTemp.resize(d_ref.size());
d_fitTemp.resize(d_fit.size());
d_gradConverters.resize(12 * (d_nFitShape + d_nFitColor));
}
void SingleConformerAlignment::getFinalQuatTrans(
RDGeom::Transform3D &xform) const {
RDGeom::Transform3D tmp;
tmp.SetRotationFromQuaternion(d_quatTrans.data());
tmp.SetTranslation(
RDGeom::Point3D{d_quatTrans[4], d_quatTrans[5], d_quatTrans[6]});
RDGeom::Transform3D reverseInitialTrans;
reverseInitialTrans.SetTranslation(RDGeom::Point3D{
-d_initQuatTrans[4], -d_initQuatTrans[5], -d_initQuatTrans[6]});
RDGeom::Transform3D initialRot;
initialRot.SetRotationFromQuaternion(d_initQuatTrans.data());
auto tt = reverseInitialTrans * tmp * initialRot;
xform = tt;
}
std::array<double, 5> SingleConformerAlignment::calcScores(const double *ref,
const double *fit,
bool includeColor) {
std::array<double, 5> scores{0.0, 0.0, 0.0, 0.0, 0.0};
scores[3] = calcVolAndGrads(ref, d_nRefShape, d_refCarbonRadii, fit,
d_nFitShape, d_fitCarbonRadii, d_gradConverters,
d_useCutoff, d_distCutoff2, nullptr, nullptr);
if (d_nRefColor && d_nFitColor &&
(d_optimMode == OptimMode::SHAPE_PLUS_COLOR || includeColor)) {
scores[4] = calcVolAndGrads(ref + d_nRefShape * D, d_nRefColor,
d_refTypes + d_nRefShape, fit + d_nFitShape * D,
d_nFitColor, d_fitTypes + d_nFitShape,
d_nFitShape, d_gradConverters, d_useCutoff,
d_distCutoff2, nullptr, nullptr);
}
scores = calcScores(scores[3], scores[4], includeColor);
return scores;
}
std::array<double, 5> SingleConformerAlignment::calcScores(bool includeColor) {
applyQuatTrans(d_quatTrans);
return calcScores(d_refTemp.data(), d_fitTemp.data(), includeColor);
}
std::array<double, 5> SingleConformerAlignment::calcScores(
const double shapeOvVol, const double colorOvVol, bool includeColor) const {
std::array<double, 5> scores{0.0, 0.0, 0.0, 0.0, 0.0};
scores[3] = shapeOvVol;
scores[4] = colorOvVol;
scores[1] =
shapeOvVol / (d_simAlpha * (d_refShapeVol - shapeOvVol) +
d_simBeta * (d_fitShapeVol - shapeOvVol) + shapeOvVol);
if (d_nRefColor && d_nFitColor && d_refColorVol > 0.0 &&
d_fitColorVol > 0.0 && includeColor) {
scores[2] =
colorOvVol / (d_simAlpha * (d_refColorVol - colorOvVol) +
d_simBeta * (d_fitColorVol - colorOvVol) + colorOvVol);
scores[0] = scores[1] * (1 - d_mixingParam) + scores[2] * d_mixingParam;
} else {
scores[0] = scores[1];
}
return scores;
}
namespace {
// Set of values to convert the cartesian gradients to quaternion gradients.
// This uses the chain rule: the dV/qQ = (dV/dr) * (dr/dQ) where V is the
// volume overlap and r is the Cartesian space. Assumes gradConverters
// is already the correct size.
void cartToQuatGrads(const double *quat, const double *mol, int numBPts,
std::vector<double> &gradConverters, int gradConvOffset) {
// for ease of ref
auto q = quat[0];
auto r = quat[1];
auto s = quat[2];
auto u = quat[3];
auto coef = 1.0 / (q * q + r * r + s * s + u * u);
for (int i = 0, j = gradConvOffset, k = 12 * gradConvOffset; i < 4 * numBPts;
i += 4, ++j, k += 12) {
auto x = mol[i];
auto y = mol[i + 1];
auto z = mol[i + 2];
auto dx_dq = coef * 2.0 * (q * x + u * y - s * z);
auto dx_dr = coef * 2.0 * (r * x + s * y + u * z);
auto dy_dr = coef * 2.0 * (s * x - r * y + q * z);
auto dx_du = coef * 2.0 * (-u * x + q * y + r * z);
auto dz_ds = dx_dq;
auto dy_du = -dx_dq;
auto dy_ds = dx_dr;
auto dz_du = dx_dr;
auto dx_ds = -dy_dr;
auto dz_dq = dy_dr;
auto dy_dq = dx_du;
auto dz_dr = -dx_du;
gradConverters[k] = dx_dq;
gradConverters[k + 1] = dy_dq;
gradConverters[k + 2] = dz_dq;
gradConverters[k + 3] = dx_dr;
gradConverters[k + 4] = dy_dr;
gradConverters[k + 5] = dz_dr;
gradConverters[k + 6] = dx_ds;
gradConverters[k + 7] = dy_ds;
gradConverters[k + 8] = dz_ds;
gradConverters[k + 9] = dx_du;
gradConverters[k + 10] = dy_du;
gradConverters[k + 11] = dz_du;
}
}
} // namespace
// atoms/shape features
double calcVolAndGrads(const double *ref, int numRefPts,
const boost::dynamic_bitset<> *refCarbonRadii,
const double *fit, int numFitPts,
const boost::dynamic_bitset<> *fitCarbonRadii,
std::vector<double> &gradConverters, bool useCutoff,
double distCutoff2, const double *quat,
double *gradients) {
if (gradients) {
cartToQuatGrads(quat, fit, numFitPts, gradConverters, 0);
}
static const double CARBON_A = KAPPA / (1.7 * 1.7);
static const double CARBON_BIT =
8.0 * pow(std::numbers::pi / (2 * CARBON_A), 1.5);
double vol = 0.0;
double vij;
// If either of the carbon radii flags aren't supplied, treat them
// both as being all carbon. There isn't enough information to do
// otherwise.
bool allCarbon = !refCarbonRadii || !fitCarbonRadii;
for (int i = 0, i_idx = 0; i < numRefPts * 4; i += 4, i_idx++) {
const auto ai = ref[i + 3];
for (int j = 0, j_idx = 0, k = 0; j < numFitPts * 4;
j += 4, j_idx++, k += 12) {
auto dx = ref[i] - fit[j];
auto dy = ref[i + 1] - fit[j + 1];
auto dz = ref[i + 2] - fit[j + 2];
auto d2 = dx * dx + dy * dy + dz * dz;
if (useCutoff && d2 > distCutoff2) {
continue;
}
const auto aj = fit[j + 3];
auto mult = -(ai * aj) / (ai + aj);
auto kij = exp(mult * d2);
if (allCarbon || (!allCarbon && (*refCarbonRadii)[i_idx] &&
(*fitCarbonRadii)[j_idx])) {
vij = kij * CARBON_BIT;
} else {
auto pi_ai_aj = std::numbers::pi / (ai + aj);
vij = 8 * kij * pi_ai_aj * std::sqrt(pi_ai_aj);
}
vol += vij;
if (gradients) {
auto r = 2.0 * vij * mult;
// Use the gradient converters to calculate the gradients in quaternion
// space.
// The zeroth gradient is never used, so don't waste time calculating
// it but leave the code here for completeness and possible future use.
// gradients[0] +=
// r * (dx * gradConverters[k] + dy *
// gradConverters[k + 1] +
// dz * gradConverters[k + 2]);
gradients[1] +=
r * (dx * gradConverters[k + 3] + dy * gradConverters[k + 4] +
dz * gradConverters[k + 5]);
gradients[2] +=
r * (dx * gradConverters[k + 6] + dy * gradConverters[k + 7] +
dz * gradConverters[k + 8]);
gradients[3] +=
r * (dx * gradConverters[k + 9] + dy * gradConverters[k + 10] +
dz * gradConverters[k + 11]);
gradients[4] += r * dx;
gradients[5] += r * dy;
gradients[6] += r * dz;
}
}
}
return vol;
}
// color features
double calcVolAndGrads(const double *ref, int numRefPts, const int *refTypes,
const double *fit, int numFitPts, const int *fitTypes,
int numFitShape, std::vector<double> &gradConverters,
const bool useCutoff, const double distCutoff2,
const double *quat, double *gradients) {
double vol = 0.0;
if (gradients) {
cartToQuatGrads(quat, fit, numFitPts, gradConverters, numFitShape);
}
for (int i = 0, i_idx = 0; i < numRefPts * 4; i += 4, i_idx++) {
const auto ai = ref[i + 3];
const auto aType = refTypes[i_idx];
for (int j = 0, j_idx = 0, k = 0; j < numFitPts * 4;
j += 4, j_idx++, k += 12) {
const auto bType = fitTypes[j_idx];
if (aType != bType) {
continue;
}
auto dx = ref[i] - fit[j];
auto dy = ref[i + 1] - fit[j + 1];
auto dz = ref[i + 2] - fit[j + 2];
auto d2 = dx * dx + dy * dy + dz * dz;
if (useCutoff && d2 > distCutoff2) {
continue;
}
const auto aj = fit[j + 3];
auto mult = -(ai * aj) / (ai + aj);
auto kij = exp(mult * d2);
auto pi_ai_aj = std::numbers::pi / (ai + aj);
auto vij = 8 * kij * pi_ai_aj * std::sqrt(pi_ai_aj);
vol += vij;
if (gradients) {
auto r = 2.0 * vij * mult;
// Use the converters to calculate the gradients in quaternion space.
// The zeroth gradient is never used, so don't waste time calculating
// it but leave the code here for completeness and possible future use.
// gradients[0] +=
// r * (dx * gradConverters[k + 0] + dy *
// gradConverters[k + 1] +
// dz * gradConverters[k + 2]);
gradients[1] +=
r * (dx * gradConverters[k + 3] + dy * gradConverters[k + 4] +
dz * gradConverters[k + 5]);
gradients[2] +=
r * (dx * gradConverters[k + 6] + dy * gradConverters[k + 7] +
dz * gradConverters[k + 8]);
gradients[3] +=
r * (dx * gradConverters[k + 9] + dy * gradConverters[k + 10] +
dz * gradConverters[k + 11]);
gradients[4] += r * dx;
gradients[5] += r * dy;
gradients[6] += r * dz;
}
}
}
return vol;
}
void SingleConformerAlignment::applyQuatTrans(
const std::array<double, 7> &quatTrans) {
// Leave fit at the origin, and move ref to meet it.
RDGeom::Point3D translateA{-quatTrans[4], -quatTrans[5], -quatTrans[6]};
translateShape(d_ref.data(), d_refTemp.data(), d_nRefShape + d_nRefColor,
translateA);
// Rotate fit by quaternion
// double tq[4]{quatTrans[0], quatTrans[1], quatTrans[2], quatTrans[3]};
RDGeom::Transform3D transformB;
transformB.SetRotationFromQuaternion(quatTrans.data());
applyTransformToShape(d_fit.data(), d_fitTemp.data(),
d_nFitShape + d_nFitColor, transformB);
}
void SingleConformerAlignment::calcVolumeAndGradients(
const std::array<double, 7> &quatTrans, double &shapeOvlpVol,
double &colorOvlpVol, std::array<double, 7> &gradients) {
// Set the coords up.
applyQuatTrans(quatTrans);
// We assume that d_refTemp was once initialised to d_ref and the same with
// fit so that the radii are already there.
gradients[0] = gradients[1] = gradients[2] = gradients[3] = gradients[4] =
gradients[5] = gradients[6] = 0.0;
shapeOvlpVol = calcVolAndGrads(
d_refTemp.data(), d_nRefShape, d_refCarbonRadii, d_fitTemp.data(),
d_nFitShape, d_fitCarbonRadii, d_gradConverters, d_useCutoff,
d_distCutoff2, quatTrans.data(), gradients.data());
if (d_optimMode == OptimMode::SHAPE_PLUS_COLOR) {
std::array<double, 7> colorGrads{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
colorOvlpVol = calcVolAndGrads(
d_refTemp.data() + 4 * d_nRefShape, d_nRefColor,
d_refTypes + d_nRefShape, d_fitTemp.data() + 4 * d_nFitShape,
d_nFitColor, d_fitTypes + d_nFitShape, d_nFitShape, d_gradConverters,
d_useCutoff, d_distCutoff2, quatTrans.data(), colorGrads.data());
// The color gradients are normally dwarfed by the shape gradients, so
// normalize them and then mix by the same rule as the final score.
auto shapeSum = sqrt(std::accumulate(
gradients.begin() + 1, gradients.end(), 0.0,
[](const auto init, const auto g) -> double { return init + g * g; }));
auto colorSum = sqrt(std::accumulate(
colorGrads.begin() + 1, colorGrads.end(), 0.0,
[](const auto init, const auto g) -> double { return init + g * g; }));
auto ratio = shapeSum / colorSum;
std::transform(
gradients.begin() + 1, gradients.end(), colorGrads.begin(),
gradients.begin() + 1, [&](const auto g1, const auto g2) -> double {
return g1 * (1 - d_mixingParam) + g2 * ratio * d_mixingParam;
});
} else {
colorOvlpVol = 0.0;
}
}
bool SingleConformerAlignment::doOverlay(std::array<double, 20> &scores,
unsigned int cycle) {
unsigned int maxIters = cycle == 0 ? 10 : d_maxIts - 10;
auto res = optimise(maxIters);
// Get the final coords for fit into d_fitTemp, and compute the scores
RDGeom::Transform3D xform;
xform.SetRotationFromQuaternion(d_quatTrans.data());
xform.SetTranslation(
RDGeom::Point3D{d_quatTrans[4], d_quatTrans[5], d_quatTrans[6]});
applyTransformToShape(d_fit.data(), d_fitTemp.data(),
d_nFitShape + d_nFitColor, xform);
auto tscores = calcScores(d_ref.data(), d_fitTemp.data(), true);
scores[0] = tscores[0];
scores[1] = tscores[1];
scores[2] = tscores[2];
scores[3] = tscores[3];
scores[4] = tscores[4];
scores[5] = d_refShapeVol;
scores[6] = d_refColorVol;
scores[7] = d_fitShapeVol;
scores[8] = d_fitColorVol;
scores[9] = d_quatTrans[0];
scores[10] = d_quatTrans[1];
scores[11] = d_quatTrans[2];
scores[12] = d_quatTrans[3];
scores[13] = d_quatTrans[4];
scores[14] = d_quatTrans[5];
scores[15] = d_quatTrans[6];
scores[16] = 0.0;
scores[17] = 0.0;
scores[18] = 0.0;
scores[19] = 0.0;
return res;
}
namespace {
double oneStep(double grad, double stepSize, double quatTrans, double oldGrad,
double oldQuatTrans) {
double step = 0.0;
if (std::signbit(grad) != std::signbit(oldGrad)) {
step = (((quatTrans * fabs(oldGrad)) + (oldQuatTrans * fabs(grad))) /
(fabs(oldGrad) + fabs(grad) + fabs(grad))) -
quatTrans;
double newStep = stepSize * grad;
if (fabs(step) > fabs(newStep)) {
// This is definitely what the PubChem code says! I read it as keeping
// the sign of step, but the value of newStep.
step *= fabs(newStep / step);
}
} else {
step = stepSize * grad;
}
return step;
}
void calcStep(std::array<double, 7> &grad, double qStepSize, double tStepSize,
std::array<double, 7> &oldGrad, std::array<double, 7> &quatTrans,
std::array<double, 7> &oldQuatTrans, unsigned int iter,
std::array<double, 7> &step) {
step[0] = 0.0;
if (iter == 0) {
// 1st iteration, use default step sizes
step[1] = qStepSize * grad[1];
step[2] = qStepSize * grad[2];
step[3] = qStepSize * grad[3];
step[4] = tStepSize * grad[4];
step[5] = tStepSize * grad[5];
step[6] = tStepSize * grad[6];
} else {
step[1] =
oneStep(grad[1], qStepSize, quatTrans[1], oldGrad[1], oldQuatTrans[1]);
step[2] =
oneStep(grad[2], qStepSize, quatTrans[2], oldGrad[2], oldQuatTrans[2]);
step[3] =
oneStep(grad[3], qStepSize, quatTrans[3], oldGrad[3], oldQuatTrans[3]);
step[4] =
oneStep(grad[4], tStepSize, quatTrans[4], oldGrad[4], oldQuatTrans[4]);
step[5] =
oneStep(grad[5], tStepSize, quatTrans[5], oldGrad[5], oldQuatTrans[5]);
step[6] =
oneStep(grad[6], tStepSize, quatTrans[6], oldGrad[6], oldQuatTrans[6]);
}
}
double constrainStep(double maxStep, double *step, bool checkSize) {
double mStep = std::max({fabs(step[0]), fabs(step[1]), fabs(step[2])});
if (mStep > maxStep) {
double scaleFactor = maxStep / mStep;
if (fabs(step[0] > maxStep)) {
step[0] *= scaleFactor;
}
if (fabs(step[1] > maxStep)) {
step[1] *= scaleFactor;
}
if (fabs(step[2] > maxStep)) {
step[2] *= scaleFactor;
}
}
if (checkSize) {
double quatSquared =
step[0] * step[0] + step[1] * step[1] + step[2] * step[2];
if (quatSquared > 1.0) {
double scaleFactor = 1.0 / (2.0 * quatSquared);
step[0] *= scaleFactor;
step[1] *= scaleFactor;
step[2] *= scaleFactor;
}
}
return mStep;
}
std::array<double, 7> combineQuatTrans(const std::array<double, 7> &q1,
const std::array<double, 7> &q2) {
std::array<double, 7> res;
// Multiply the quaternions, which are assumed to be normalised.
res[0] = q1[0] * q2[0] - q1[1] * q2[1] - q1[2] * q2[2] - q1[3] * q2[3];
res[1] = q1[0] * q2[1] + q1[1] * q2[0] + q1[2] * q2[3] - q1[3] * q2[2];
res[2] = q1[0] * q2[2] - q1[1] * q2[3] + q1[2] * q2[0] + q1[3] * q2[1];
res[3] = q1[0] * q2[3] + q1[1] * q2[2] - q1[2] * q2[1] + q1[3] * q2[0];
// Add the translations
res[4] = q1[4] + q2[4];
res[5] = q1[5] + q2[5];
res[6] = q1[6] + q2[6];
return res;
}
double oneReduceStep(double grad, double oldGrad, double quatTrans,
double oldQuatTrans, double stepSize, double step) {
if (std::signbit(grad) != std::signbit(oldGrad)) {
step = (((quatTrans * fabs(oldGrad)) + (oldQuatTrans * fabs(grad))) /
(fabs(oldGrad + fabs(grad)))) -
quatTrans;
double newStep = stepSize * grad;
if (fabs(step) > fabs(newStep)) {
step *= fabs(newStep / step);
}
} else if (fabs(grad) <= 1.0) {
step = stepSize * grad;
} else if (fabs(grad) > fabs(oldGrad)) {
// Going wrong way relative to other components?
step += stepSize * grad;
} else {
double delta = grad * (step / (oldGrad - grad));
if (fabs(delta) > fabs(step * 0.1) && fabs(delta) > 0.001) {
delta *= 0.0005 / fabs(delta);
}
step += delta;
}
return step;
}
void reduceStep(std::array<double, 7> &grad, std::array<double, 7> &oldGrad,
std::array<double, 7> quatTrans,
std::array<double, 7> &oldQuatTrans, unsigned int lineIter,
std::array<double, 7> &step, double &qStepSize,
double &tStepSize) {
if (lineIter == 2) {
qStepSize *= 0.1;
tStepSize *= 0.1;
step[1] = qStepSize * oldGrad[1];
step[2] = qStepSize * oldGrad[2];
step[3] = qStepSize * oldGrad[3];
step[4] = tStepSize * oldGrad[4];
step[5] = tStepSize * oldGrad[5];
step[6] = tStepSize * oldGrad[6];
qStepSize *= 5.0;
tStepSize *= 5.0;
} else {
step[1] = oneReduceStep(grad[1], oldGrad[1], quatTrans[1], oldQuatTrans[1],
qStepSize, step[1]);
step[2] = oneReduceStep(grad[2], oldGrad[2], quatTrans[2], oldQuatTrans[2],
qStepSize, step[2]);
step[3] = oneReduceStep(grad[3], oldGrad[3], quatTrans[3], oldQuatTrans[3],
qStepSize, step[3]);
// The original PubChem code used qStepSize for all 6 of these updates, but
// that would appear to be a cut-and-paste error.
step[4] = oneReduceStep(grad[4], oldGrad[4], quatTrans[4], oldQuatTrans[4],
tStepSize, step[4]);
step[5] = oneReduceStep(grad[5], oldGrad[5], quatTrans[5], oldQuatTrans[5],
tStepSize, step[5]);
step[6] = oneReduceStep(grad[6], oldGrad[6], quatTrans[6], oldQuatTrans[6],
tStepSize, step[6]);
}
}
} // namespace
// The optimisation follows closely the procedure used in the PubChem
// code from
// https://github.com/ncbi/pubchem-align3d/blob/main/shape_neighbor.cpp
// Original Authors: Evan Bolton, Leonid Zaslavsky, Paul Thiessen
bool SingleConformerAlignment::optimise(unsigned int maxIters) {
const double maxQuaternionStep = 0.075; // Maximum step size for quaternion
const double maxTranslationStep = 0.500; // Maximum step size for translation
const double minQuaternionStep =
0.0002; // Convergence criteria for quaternion
const double minTranslationStep =
0.0020; // Convergence criteria for translation
std::array<double, 7> grad;
std::array<double, 7> oldGrad{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
std::array<double, 7> oldQuatTrans;
std::array<double, 7> step;
double shapeOvlpVol, colorOvlpVol, comboScore = 0.0;
bool finished = false;
for (unsigned iter = 0; iter < maxIters; iter++) {
calcVolumeAndGradients(d_quatTrans, shapeOvlpVol, colorOvlpVol, grad);
// Note that the combo score will have a zero color score so will be half
// the shape score unless we're optimising with color gradients.
auto scores = calcScores(shapeOvlpVol, colorOvlpVol);
comboScore = scores[0];
calcStep(grad, d_qStepSize, d_tStepSize, oldGrad, d_quatTrans, oldQuatTrans,
iter, step);
// In case we have to backtrack
double oldComboScore = comboScore;
oldQuatTrans = d_quatTrans;
oldGrad = grad;
// What the PubChem code calls "Line search (sort of) loop"
bool converged = false;
for (unsigned int lineIter = 0; !converged; lineIter++) {
// Check that the absolute max step size does not go beyond some
// reasonable size
double mqStep = constrainStep(maxQuaternionStep, step.data() + 1, true);
double mtStep = constrainStep(maxTranslationStep, step.data() + 4, false);
if (mqStep <= minQuaternionStep && mtStep <= minTranslationStep) {
converged = true;
comboScore = 0.0; // Make sure we return to the old one.
break;
}
// Calculate the 0th component of the quaternion. Obviously it
// relies on the other 3 components being small
double quatSquared =
step[1] * step[1] + step[2] * step[2] + step[3] * step[3];
step[0] = sqrt(1.0 - quatSquared);
// Update the quaternion with the step, multiplying them.
auto newQuatTrans = combineQuatTrans(d_quatTrans, step);
calcVolumeAndGradients(newQuatTrans, shapeOvlpVol, colorOvlpVol, grad);
auto scores = calcScores(shapeOvlpVol, colorOvlpVol);
comboScore = scores[0];
// if we made a good step, keep the quaternion and we're done
if (comboScore > oldComboScore) {
d_quatTrans = newQuatTrans;
break;
}
if (lineIter > 2) {
converged = true;
d_quatTrans = newQuatTrans;
break;
}
// It got worse, so reduce the step.
reduceStep(grad, oldGrad, newQuatTrans, oldQuatTrans, lineIter, step,
d_qStepSize, d_tStepSize);
d_quatTrans = oldQuatTrans;
} // End of line search
// Did it converge?
if (converged ||
d_shapeConvergenceCriterion > (comboScore - oldComboScore)) {
if (oldComboScore > comboScore) {
// The previous step was better, so keep it.
comboScore = oldComboScore;
d_quatTrans = oldQuatTrans;
}
finished = true;
break;
}
}
return finished;
}
} // namespace GaussianShape
} // namespace RDKit

View File

@@ -0,0 +1,204 @@
//
// Copyright (C) 2026 David Cosgrove and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
// Original author: David Cosgrove (CozChemIx Limited)
//
// This is the class that does optimises a moving molecule (fit)
// to maximise its Gaussian overlap with the reference molecule (ref).
// The optimiser is a modified BFGS taken in large part, but re-arranged
// for readability, from the PubChem shape overlay code
// https://github.com/ncbi/pubchem-align3d/blob/main/shape_neighbor.cpp
#ifndef RDKIT_SINGLECONFORMERALIGNMENT_GUARD
#define RDKIT_SINGLECONFORMERALIGNMENT_GUARD
#include <array>
#include <RDGeneral/BoostStartInclude.h>
#include <boost/dynamic_bitset.hpp>
#include <RDGeneral/BoostEndInclude.h>
#include <RDGeneral/export.h>
#include <GraphMol/GaussianShape/ShapeOverlayOptions.h>
namespace RDKit {
namespace GaussianShape {
struct RDKIT_GAUSSIANSHAPE_EXPORT SingleConformerAlignment {
SingleConformerAlignment() = delete;
/// @brief Do the overlay for a single conformer of fit against a single
/// conformer of ref. The output in scores is the rotation and translation
/// that moves fit to optimise its score with ref.
/// @param ref - the query molecule as 1D array of 4 * N entries. Each
/// block of 4 is the coords and atom radius
/// @param refTypes - the feature types for molecule ref
/// @param refCarbonRadii - whether each atom has a carbon radius
/// @param nRefShape - the number of atoms in ref
/// @param nRefColor - the number of features in ref
/// @param refShapeVol - overlap volume of ref with itself
/// @param refColorVol - color overlap of ref with itself
/// @param fit - the fit molecule as 1D array of 4 * N entries. Each
/// block of 4 is the coords and atom radius.
/// @param fitTypes - the feature types for fit molecule
/// @param fitCarbonRadii - whether each atom has a carbon radius
/// @param nFitShape - the number of atoms in fit
/// @param nFitColor - the number of features in fit
/// @param fitShapeVol - overlap volume of fit with itself
/// @param fitColorVol - color overlap of fit with itself
/// @param optimMode - optimisation mode
/// @param simAlpha - for the Tversky similarity, the alpha value
/// @param simBeta - for the Tversky similarity, the beta value
/// @param mixingParam - how to mix the 2 Tversky values
/// @param useCutoff - whether to use a distance cutoff in the volume
/// calculation
/// @param distCutoff - the cutoff to use if we're doing it.
/// @param maxIts - maximum number of iterations for optimiser
/// of optimiser
SingleConformerAlignment(
const std::vector<double> &ref, const int *refTypes,
const boost::dynamic_bitset<> *refCarbonRadii, int nRefShape,
int nRefColor, double refShapeVol, double refColorVol,
const std::vector<double> &fit, const int *fitTypes,
const boost::dynamic_bitset<> *fitCarbonRadii, int nFitShape,
int nFitColor, double fitShapeVol, double fitColorVol,
const std::array<double, 7> &initQuatTrans, OptimMode optimMode,
double simAlpha, double simBeta, double mixingParam, bool useCutoff,
double distCutoff, double shapeConvergenceCriterion, unsigned int maxIts);
SingleConformerAlignment(const SingleConformerAlignment &other) = delete;
SingleConformerAlignment(SingleConformerAlignment &&other) = delete;
SingleConformerAlignment &operator=(const SingleConformerAlignment &other) =
delete;
SingleConformerAlignment &operator=(SingleConformerAlignment &&other) =
delete;
~SingleConformerAlignment() = default;
void setQuatTrans(const std::array<double, 7> &quatTrans) {
d_quatTrans = quatTrans;
}
// Get the final transformation by adding the initial transformation
// and the optimised final answer.
void getFinalQuatTrans(RDGeom::Transform3D &xform) const;
// Calculate the combined, shape, and color Tversky scores as appropriate,
// plus the volume of the shape and color overlaps, in that order.
// Assumes that ref and fit are already in the correct configurations.
// If includeColor is passed in true, it will compute the color score
// irrespective of the value in d_optimMode. We still want the color
// score even if doing SHAPE_ONLY optimisation, for example.
std::array<double, 5> calcScores(const double *ref, const double *fit,
bool includeColor = false);
// This one applies the current quatTrans to the coords and then calculates
// the score.
std::array<double, 5> calcScores(bool includeColor = false);
// This one computes the scores from the given overlap volumes. Color score
// only calculated if the color volumes are non-zero.
std::array<double, 5> calcScores(const double shapeOvVol,
const double colorOvVol,
bool includeColor = true) const;
// Apply the quatTrans to the ref and fit shapes and put the results
// into their tmp equivalents. Ref is translated by the -ve of the
// translation, fit is rotated by the rotation bit.
void applyQuatTrans(const std::array<double, 7> &quatTrans);
// Calculate the overlap volume between A and B after the given "quaternion"
// has been applied. The "quaternion" is 7 elements, the first 4 the
// quaternion the last 3 the translation that currently form the
// transformation that overlays B onto A.
void calcVolumeAndGradients(const std::array<double, 7> &quatTrans,
double &shapeOvlpVol, double &colorOvlpVol,
std::array<double, 7> &gradients);
/// @brief Do the overlay, feeding the results into scores.
/// @return scores - the output scores and transformation to reproduce the
/// overlay - an array of size 20. Only the first 16 are used here. They are:
/// 0 - the combo score
/// 1 - the shape Tversky score
/// 2 - the color Tversky score
/// 3 - the shape overlap volume
/// 4 - the color overlap volume
/// 5 - the shape volume of fit
/// 6 - the shape volume of ref
/// 7 - the color volume of fit
/// 8 - the color volume of ref
/// 9-12 - the quaternion to rotate fit onto ref. Applied first.
/// 13-15 - the translation to move fit onto ref. Applied second.
/// 16-19 - not used at present, returned as zeros.
/// Returns false if it didn't finish with the allowed maximum number of
/// iterations.
bool doOverlay(std::array<double, 20> &scores, unsigned int cycle);
// Find the quaternion and translation that maximises the volume
// overlap appropriate to d_optimMode. Returns false if it didn't finish with
// the allowed maximum number of iterations.
bool optimise(unsigned int maxIters);
std::vector<double> d_ref;
std::vector<double> d_refTemp;
const int *d_refTypes;
const boost::dynamic_bitset<> *d_refCarbonRadii;
const int d_nRefShape;
const int d_nRefColor;
const double d_refShapeVol;
const double d_refColorVol;
std::vector<double> d_fit;
std::vector<double> d_fitTemp;
const int *d_fitTypes;
const boost::dynamic_bitset<> *d_fitCarbonRadii;
const int d_nFitShape;
const int d_nFitColor;
double d_fitShapeVol;
double d_fitColorVol;
std::array<double, 7> d_initQuatTrans;
const OptimMode d_optimMode;
const double d_simAlpha;
const double d_simBeta;
const double d_mixingParam;
const bool d_useCutoff;
const double d_distCutoff2;
const double d_shapeConvergenceCriterion;
const unsigned int d_maxIts;
// The quaternion/translation as the optimisation proceeds
std::array<double, 7> d_quatTrans{1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
// The step sizes of the quaternion and translation during the
// optimisation. Taken from the PubChem code.
double d_qStepSize{-0.001};
double d_tStepSize{-0.01};
// Scratch space for the gradients dr/dQ of the fit molecule.
std::vector<double> d_gradConverters;
};
// Compute the volume overlap and optionally "quaternion" gradients for the
// overlap volume of ref and fit, wrt fit. fit is the original coords of
// the fit molecule, fitTemp is those subject to any transformation applied
// by the quaternion we're using to optimise the overlap volume. If
// gradients is null, they won't be calculated. They are assumed to be
// initialised correctly.
// This is for the atoms/shape features.
double calcVolAndGrads(const double *ref, int numRefPts,
const boost::dynamic_bitset<> *refCarbonRadii,
const double *fit, int numFitPts,
const boost::dynamic_bitset<> *fitCarbonRadii,
std::vector<double> &gradConverters,
const bool useCutoff, const double distCutoff2,
const double *quat = nullptr,
double *gradients = nullptr);
// This one is for the features, and only calculates values if the types
// of 2 features match.
double calcVolAndGrads(const double *ref, int numRefPts, const int *refTypes,
const double *fit, int numFitPts, const int *fitTypes,
int numFitShape, std::vector<double> &gradConverters,
const bool useCutoff, const double distCutoff2,
const double *quat, double *gradients);
} // namespace GaussianShape
} // namespace RDKit
#endif // RDKIT_SINGLECONFORMERALIGNMENT_GUARD

View File

@@ -0,0 +1,6 @@
rdkit_python_extension(rdGaussianShape
rdGaussianShape.cpp
DEST Chem
LINK_LIBRARIES GaussianShape)
add_pytest(pyShapeAlign
${CMAKE_CURRENT_SOURCE_DIR}/test_rdgaussian_shape.py)

View File

@@ -0,0 +1,513 @@
//
// Copyright (C) 2026 David Cosgrove and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
// Original author: David Cosgrove (CozChemIx Limited)
//
#include <string>
#include <boost/python.hpp>
#include <Geometry/point.h>
#include <GraphMol/ROMol.h>
#include <GraphMol/GaussianShape/GaussianShape.h>
#include <GraphMol/GaussianShape/ShapeInput.h>
#include <GraphMol/GaussianShape/ShapeOverlayOptions.h>
#include <RDBoost/Wrap.h>
namespace python = boost::python;
namespace RDKit {
namespace helpers {
void set_customFeatures(GaussianShape::ShapeInputOptions &shp,
const python::object &s) {
shp.customFeatures.clear();
auto len = python::len(s);
shp.customFeatures.reserve(len);
for (auto i = 0u; i < len; ++i) {
const auto elem = s[i];
unsigned int featType = python::extract<unsigned int>(elem[0]);
RDGeom::Point3D pos = python::extract<RDGeom::Point3D>(elem[1]);
double radius = python::extract<double>(elem[2]);
shp.customFeatures.emplace_back(featType, pos, radius);
}
}
python::tuple get_customFeatures(const GaussianShape::ShapeInputOptions &shp) {
python::list py_list;
for (const auto &val : shp.customFeatures) {
python::list elem;
elem.append(static_cast<int>(std::get<0>(val)));
elem.append(std::get<1>(val));
elem.append(std::get<2>(val));
py_list.append(python::tuple(elem));
}
return python::tuple(py_list);
}
python::tuple alignMol1(const ROMol &ref, ROMol &fit,
const python::object &py_refOpts,
const python::object &py_fitOpts,
const python::object &py_overlayOpts, int refConfId,
int fitConfId) {
GaussianShape::ShapeInputOptions refOpts, fitOpts;
if (!py_refOpts.is_none()) {
refOpts = python::extract<GaussianShape::ShapeInputOptions>(py_refOpts);
}
if (!py_fitOpts.is_none()) {
fitOpts = python::extract<GaussianShape::ShapeInputOptions>(py_fitOpts);
}
GaussianShape::ShapeOverlayOptions overlayOpts;
if (!py_overlayOpts.is_none()) {
overlayOpts =
python::extract<GaussianShape::ShapeOverlayOptions>(py_overlayOpts);
}
auto results = GaussianShape::AlignMolecule(
ref, fit, refOpts, fitOpts, nullptr, overlayOpts, refConfId, fitConfId);
return python::make_tuple(results[0], results[1], results[2]);
}
python::tuple alignMol2(const GaussianShape::ShapeInput &refShape, ROMol &fit,
const python::object &py_fitOpts,
const python::object &py_overlayOpts, int fitConfId) {
GaussianShape::ShapeInputOptions fitOpts;
if (!py_fitOpts.is_none()) {
fitOpts = python::extract<GaussianShape::ShapeInputOptions>(py_fitOpts);
}
GaussianShape::ShapeOverlayOptions overlayOpts;
if (!py_overlayOpts.is_none()) {
overlayOpts =
python::extract<GaussianShape::ShapeOverlayOptions>(py_overlayOpts);
}
auto results = GaussianShape::AlignMolecule(refShape, fit, fitOpts, nullptr,
overlayOpts, fitConfId);
return python::make_tuple(results[0], results[1], results[2]);
}
python::tuple alignShapes(const GaussianShape::ShapeInput &refShape,
GaussianShape::ShapeInput &fitShape,
const python::object &py_overlayOpts) {
GaussianShape::ShapeOverlayOptions overlayOpts;
if (!py_overlayOpts.is_none()) {
overlayOpts =
python::extract<GaussianShape::ShapeOverlayOptions>(py_overlayOpts);
}
RDGeom::Transform3D xform;
auto results =
GaussianShape::AlignShape(refShape, fitShape, &xform, overlayOpts);
python::list pyMatrix;
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < 4; ++j) {
pyMatrix.append(xform.getValUnchecked(i, j));
}
}
return python::make_tuple(results[0], results[1], results[2], pyMatrix);
}
python::tuple scoreMol1(const ROMol &ref, const ROMol &fit,
const python::object &py_refOpts,
const python::object &py_fitOpts,
const python::object &py_overlayOpts, int refConfId,
int fitConfId) {
GaussianShape::ShapeInputOptions refOpts, fitOpts;
if (!py_refOpts.is_none()) {
refOpts = python::extract<GaussianShape::ShapeInputOptions>(py_refOpts);
}
if (!py_fitOpts.is_none()) {
fitOpts = python::extract<GaussianShape::ShapeInputOptions>(py_fitOpts);
}
GaussianShape::ShapeOverlayOptions overlayOpts;
if (!py_overlayOpts.is_none()) {
overlayOpts =
python::extract<GaussianShape::ShapeOverlayOptions>(py_overlayOpts);
}
auto results = GaussianShape::ScoreMolecule(
ref, fit, refOpts, fitOpts, overlayOpts, refConfId, fitConfId);
return python::make_tuple(results[0], results[1], results[2]);
}
python::tuple scoreMol2(const GaussianShape::ShapeInput &refShape,
const ROMol &fit, const python::object &py_fitOpts,
const python::object &py_overlayOpts, int fitConfId) {
GaussianShape::ShapeInputOptions fitOpts;
if (!py_fitOpts.is_none()) {
fitOpts = python::extract<GaussianShape::ShapeInputOptions>(py_fitOpts);
}
GaussianShape::ShapeOverlayOptions overlayOpts;
if (!py_overlayOpts.is_none()) {
overlayOpts =
python::extract<GaussianShape::ShapeOverlayOptions>(py_overlayOpts);
}
auto results = GaussianShape::ScoreMolecule(refShape, fit, fitOpts,
overlayOpts, fitConfId);
return python::make_tuple(results[0], results[1], results[2]);
}
python::tuple scoreShape(const GaussianShape::ShapeInput &refShape,
const GaussianShape::ShapeInput &fitShape,
const python::object &py_overlayOpts) {
GaussianShape::ShapeOverlayOptions overlayOpts;
if (!py_overlayOpts.is_none()) {
overlayOpts =
python::extract<GaussianShape::ShapeOverlayOptions>(py_overlayOpts);
}
auto results = GaussianShape::ScoreShape(refShape, fitShape, overlayOpts);
return python::make_tuple(results[0], results[1], results[2]);
}
void set_atomSubset(GaussianShape::ShapeInputOptions &opts,
const python::object &as) {
pythonObjectToVect<unsigned int>(as, opts.atomSubset);
}
python::tuple get_atomSubset(const GaussianShape::ShapeInputOptions &opts) {
python::list py_list;
for (const auto &val : opts.atomSubset) {
py_list.append(val);
}
return python::tuple(py_list);
}
void set_atomRadii(GaussianShape::ShapeInputOptions &opts,
const python::object &ar) {
int len = python::len(ar);
opts.atomRadii.resize(len);
for (int i = 0; i < len; i++) {
unsigned int atomIdx = python::extract<unsigned int>(ar[i][0]);
double radius = python::extract<double>(ar[i][1]);
opts.atomRadii[i] = std::make_pair(atomIdx, radius);
}
}
python::tuple get_atomRadii(const GaussianShape::ShapeInputOptions &opts) {
python::list py_list;
for (const auto &val : opts.atomRadii) {
py_list.append(python::make_tuple(static_cast<int>(val.first), val.second));
}
return python::tuple(py_list);
}
} // namespace helpers
void wrap_rdGaussianShape() {
python::scope().attr("__doc__") =
"Module containing implementation of Gaussian-based shape overlay and"
" scoring."
"NOTE: This functionality is experimental and the API"
" and/or results may change in future releases.";
python::enum_<RDKit::GaussianShape::StartMode>("StartMode")
.value("ROTATE_0", RDKit::GaussianShape::StartMode::ROTATE_0)
.value("ROTATE_180", RDKit::GaussianShape::StartMode::ROTATE_180)
.value("ROTATE_180_WIGGLE",
RDKit::GaussianShape::StartMode::ROTATE_180_WIGGLE)
.value("ROTATE_45", RDKit::GaussianShape::StartMode::ROTATE_45)
.value("ROTATE_0_FRAGMENT",
RDKit::GaussianShape::StartMode::ROTATE_0_FRAGMENT)
.value("ROTATE_180_FRAGMENT",
RDKit::GaussianShape::StartMode::ROTATE_180_FRAGMENT)
.value("ROTATE_45_FRAGMENT",
RDKit::GaussianShape::StartMode::ROTATE_45_FRAGMENT)
.value("A_LA_PUBCHEM", GaussianShape::StartMode::A_LA_PUBCHEM)
.export_values();
python::enum_<RDKit::GaussianShape::OptimMode>("OptimMode")
.value("SHAPE_ONLY", RDKit::GaussianShape::OptimMode::SHAPE_ONLY)
.value("SHAPE_PLUS_COLOR_SCORE",
RDKit::GaussianShape::OptimMode::SHAPE_PLUS_COLOR_SCORE)
.value("SHAPE_PLUS_COLOR",
RDKit::GaussianShape::OptimMode::SHAPE_PLUS_COLOR)
.export_values();
python::class_<GaussianShape::ShapeInputOptions, boost::noncopyable>(
"ShapeInputOptions",
"ShapeInputOptions - options for setting up ShapeInput objects.")
.def_readwrite("useColors", &GaussianShape::ShapeInputOptions::useColors,
"Whether to use color features in overlay. Default=True.")
.def_readwrite(
"allCarbonRadii", &GaussianShape::ShapeInputOptions::allCarbonRadii,
"Whether to use the same radius, appropriate for Carbon, for all atoms. There is a"
" slight accuracy penalty but significant speed gain if used. Default=True.")
.add_property(
"atomSubset", &helpers::get_atomSubset, &helpers::set_atomSubset,
"If not empty, use just these atoms in the molecule to form the ShapeInput object.")
.add_property(
"customFeatures", &helpers::get_customFeatures,
&helpers::set_customFeatures,
"Custom features for the shape. Requires a list of tuples of"
" int (the feature type), Point3D (the coordinates) and float (the radius).")
.add_property(
"atomRadii", &helpers::get_atomRadii, &helpers::set_atomRadii,
"Non-standard radii to use for the atoms specified by their indices"
" in the molecule. Not all atoms need have a radius specified."
" A list of tuples of [int, float].")
.def("__setattr__", &safeSetattr);
python::class_<GaussianShape::ShapeOverlayOptions, boost::noncopyable>(
"ShapeOverlayOptions",
"ShapeOverlayOptions - options for controlling the shape overlay process.")
.def_readwrite(
"startMode", &RDKit::GaussianShape::ShapeOverlayOptions::startMode,
"Start modes for optimisation. Default is A_LA_PUBCHEM - as used by the"
" PubChem code - either ROTATE_180_WIGGLE or ROTATE_45 depending on the shape"
" of the two molecules. ROTATE_180_WIGGLE means 180 rotations about"
" the x, y and z axes, then a small"
" rotation about each axis from that point, using the best scoring one of"
" those. ROTATE_180 uses 180 degree rotations for 4 start points,"
" ROTATE_45 uses 45 degree rotations for 9 start points and ROTATE_0"
" leaves the relative orientations of the 2 molecules as passed in before"
" optimisation. There are also ROTATE_0_FRAGMENT, ROTATE_45_FRAGMENT"
" and ROTATE_180_FRAGMENT that as well as the above move the fit"
" molecule to the ends of each of the principal axes and then does"
" the appropriate rotations. This is useful when the fit molecule is"
" a lot smaller than the reference molecule, but requires a large number"
" of optimisations so is relatively slow.")
.def_readwrite(
"optimMode", &GaussianShape::ShapeOverlayOptions::optimMode,
"Optimisation mode, controlling what parameters are used"
" to drive the overlay. Default=SHAPE_PLUS_COLOR_SCORE which"
" optimises using just the overlap of shape, but uses the"
" color to decide which is the best overlay. Other options"
" are SHAPE_ONLY and SHAPE_AND_COLOR with the latter using"
" the overlap of color features as well. ")
.def_readwrite(
"simAlpha", &GaussianShape::ShapeOverlayOptions::simAlpha,
"When doing a Tversky similarity, the alpha value. If alpha and"
" beta are both the default 1.0, it's a Tanimoto similarity. A"
" high alpha and low beta emphasize the fit volume in the"
" similarity and vice versa. Tversky is O / (A * (R - O) + B * (F"
" - O) + O) where O is the overlap volume, R is the reference's"
" volume and F is the fit's volume. This is different from that"
" used by OpenEye (O / (A * R + B * F)).")
.def_readwrite("simBeta", &GaussianShape::ShapeOverlayOptions::simBeta,
"When doing a Tversky similarity, the beta value.")
.def_readwrite(
"optParam", &GaussianShape::ShapeOverlayOptions::optParam,
"If using colors, the relative weights of the shape and color scores,"
" as a fraction of 1. Default=0.5.")
.def_readwrite(
"nSteps", &GaussianShape::ShapeOverlayOptions::nSteps,
"Maximum number of steps for the shape overlay process. Default=100.")
.def_readwrite(
"normalize", &GaussianShape::ShapeOverlayOptions::normalize,
"Whether to normalize the shapes before overlay by putting them into their"
" canonical orientation (centred on the origin, aligned along its"
" principal axes. Default=True.")
.def_readwrite(
"useDistCutoff", &GaussianShape::ShapeOverlayOptions::useDistCutoff,
"Whether to use distance cutoff when calculating the shape volumes. If used,"
" there will be a small penalty in accuracy but a significant increase in speed."
" Default=True.")
.def_readwrite(
"distCutoff", &GaussianShape::ShapeOverlayOptions::distCutoff,
"If using a distance cutoff, this is the value used. Default=4.5 of whatever"
" units the coordinates are in.")
.def_readwrite(
"shapeConvergenceCriterion",
&GaussianShape::ShapeOverlayOptions::shapeConvergenceCriterion,
"Optimisation stops when the shape Tversky score changes by less"
" than this amount after an optimisation step. A larger number is"
" faster but gives less precise overlays. Default=0.001.")
.def("__setattr__", &safeSetattr);
std::string docString("ShapeInput object");
python::class_<GaussianShape::ShapeInput, boost::noncopyable>(
"ShapeInput", docString.c_str(),
python::init<const ROMol &, int, const GaussianShape::ShapeInputOptions &,
const GaussianShape::ShapeOverlayOptions &>(
python::args("self", "confId", "shapeOpt", "overlayOpts")))
.add_property("NumAtoms", &GaussianShape::ShapeInput::getNumAtoms,
"Get the number of atoms defining the shape.")
.add_property("NumFeatures", &GaussianShape::ShapeInput::getNumFeatures,
"Get the number of features in the shape.")
.add_property("ShapeVolume", &GaussianShape::ShapeInput::getShapeVolume,
"Get the shape's volume due to the atoms.")
.add_property("ColorVolume", &GaussianShape::ShapeInput::getColorVolume,
"Get the volume of the shape's color features.")
.def("__setattr__", &safeSetattr);
python::def(
"AlignMol", &helpers::alignMol1,
(python::arg("ref"), python::arg("fit"),
python::arg("refOpts") = python::object(),
python::arg("fitOpts") = python::object(),
python::arg("overlayOpts") = python::object(),
python::arg("refConfId") = -1, python::arg("fitConfId") = -1),
R"DOC(Aligns a fit molecule onto a reference molecule. The fit is modified.
Parameters
----------
ref: RDKit.ROMol
Reference molecule
fit: RDKit.ROMol
Fit molecule that will be overlaid
refOpts: ShapeInputOptions, optional
Options for building the ref shape
fitOpts: ShapeInputOptions, optional
Options for building the fit shape
overlayOpts: ShapeOverlayOptions, optional
Options for controlling the overlay
refConfId : int, optional
Reference conformer ID (default is -1)
fitConfId : int, optional
fit conformer ID (default is -1)
Returns
-------
3-tuple of floats
The results are (combo_score, shape_score, color_score). The color_score is
0.0 if color features not used, in which case combo_score and shape_score will
be the same.
)DOC");
python::def(
"AlignMol", &helpers::alignMol2,
(python::arg("refShape"), python::arg("fit"),
python::arg("fitOpts") = python::object(),
python::arg("overlayOpts") = python::object(),
python::arg("fitConfId") = -1),
R"DOC(Aligns a fit molecule onto a reference shape. The fit is modified.
Parameters
----------
refShape: ShapeInput
Reference shape
fit: RDKit.ROMol
Fit molecule that will be overlaid
fitOpts: ShapeInputOptions, optional
Options for building the fit shape
overlayOpts: ShapeOverlayOptions, optional
Options for controlling the overlay
fitConfId : int, optional
Fit conformer ID (default is -1)
Returns
-------
3-tuple of floats
The results are (combo_score, shape_score, color_score). The color_score is
0.0 if color features not used, in which case combo_score and shape_score will
be the same.)DOC");
python::def(
"AlignShapes", &helpers::alignShapes,
(python::arg("refShape"), python::arg("fitShape"),
python::arg("overlayOpts") = python::object()),
R"DOC(Aligns a fit shape to a reference shape. The fit is modified.
Parameters
----------
refShape : ShapeInput
Reference shape
fitShape : ShapeInput
fit shape
overlayOpts: ShapeOverlayOptions, optional
Options for controlling the overlay
Returns
-------
4-tuple of float, float, list of floats
The results are (combo_score, shape_score, color_score, matrix)
The matrix is a 16-float list giving the transformation matrix that
overlays the fit onto the reference.)DOC");
python::def("ScoreMol", &helpers::scoreMol1,
(python::arg("ref"), python::arg("fit"),
python::arg("refOpts") = python::object(),
python::arg("fitOpts") = python::object(),
python::arg("overlayOpts") = python::object(),
python::arg("refConfId") = -1, python::arg("fitConfId") = -1),
R"DOC(Calculates the scores between a reference molecule and a fit
molecule without overlay.
Parameters
----------
ref: RDKit.ROMol
Reference molecule
fit: RDKit.ROMol
Fit molecule that will be scored
refOpts: ShapeInputOptions, optional
Options for building the ref shape
fitOpts: ShapeInputOptions, optional
Options for building the fit shape
overlayOpts: ShapeOverlayOptions, optional
Options for controlling the volume calculation
refConfId : int, optional
Reference conformer ID (default is -1)
fitConfId : int, optional
fit conformer ID (default is -1)
Returns
-------
3-tuple of floats
The results are (combo_score, shape_score, color_score). The color_score is
0.0 if color features not used, in which case combo_score and shape_score will
be the same.
)DOC");
python::def(
"ScoreMol", &helpers::scoreMol2,
(python::arg("refShape"), python::arg("fit"),
python::arg("fitOpts") = python::object(),
python::arg("overlayOpts") = python::object(),
python::arg("fitConfId") = -1),
R"DOC(Calculates the scores between a reference shape and a fit molecule
without overlay.
Parameters
----------
refShape: ShapeInput
Reference shape
fit: RDKit.ROMol
Fit molecule that will be scored
fitOpts: ShapeInputOptions, optional
Options for building the fit shape
overlayOpts: ShapeOverlayOptions, optional
Options for controlling the volume calculation
fitConfId : int, optional
fit conformer ID (default is -1)
Returns
-------
3-tuple of floats
The results are (combo_score, shape_score, color_score). The color_score is
0.0 if color features not used, in which case combo_score and shape_score will
be the same.
)DOC");
python::def(
"ScoreShape", &helpers::scoreShape,
(python::arg("refShape"), python::arg("fitShape"),
python::arg("overlayOpts") = python::object()),
R"DOC(Calculates the scores between a reference shape and a fit shape without
overlay.
Parameters
----------
refShape: ShapeInput
Reference shape
fitShape: ShapeInput
Fit shape
fitOpts: ShapeInputOptions, optional
Options for building the fit shape
overlayOpts: ShapeOverlayOptions, optional
Options for controlling the volume calculation
Returns
-------
3-tuple of floats
The results are (combo_score, shape_score, color_score). The color_score is
0.0 if color features not used, in which case combo_score and shape_score will
be the same.
)DOC");
}
BOOST_PYTHON_MODULE(rdGaussianShape) { wrap_rdGaussianShape(); }
} // namespace RDKit

View File

@@ -0,0 +1,161 @@
import unittest
import numpy as np
from rdkit import Chem
from rdkit.Chem import rdGaussianShape, rdMolTransforms
from rdkit import RDConfig
from rdkit.Geometry import Point3D
datadir = RDConfig.RDBaseDir + '/External/pubchem_shape/test_data'
class TestCase(unittest.TestCase):
def setUp(self):
suppl = Chem.SDMolSupplier(datadir + '/test1.sdf')
self.ref = suppl[0]
self.probe = suppl[1]
def test1_Defaults(self):
tpl = rdGaussianShape.AlignMol(self.ref, self.probe)
self.assertAlmostEqual(tpl[0], 0.497, places=3)
self.assertAlmostEqual(tpl[1], 0.760, places=3)
self.assertAlmostEqual(tpl[2], 0.233, places=3)
def test2_NoColor(self):
ovOpts = rdGaussianShape.ShapeOverlayOptions()
ovOpts.optimMode = rdGaussianShape.OptimMode.SHAPE_ONLY
shpOpts= rdGaussianShape.ShapeInputOptions()
shpOpts.useColors = False
tpl = rdGaussianShape.AlignMol(self.ref, self.probe , shpOpts, shpOpts, ovOpts)
self.assertAlmostEqual(tpl[0], 0.760, places=3)
self.assertAlmostEqual(tpl[1], 0.760, places=3)
self.assertAlmostEqual(tpl[2], 0.0, places=3)
def test3_FromShape(self):
ovOpts = rdGaussianShape.ShapeOverlayOptions()
shpOpts= rdGaussianShape.ShapeInputOptions()
shp = rdGaussianShape.ShapeInput(self.ref, -1, shpOpts, ovOpts)
self.assertAlmostEqual(shp.ShapeVolume, 591.058, places=3)
self.assertAlmostEqual(shp.ColorVolume, 31.935, places=3)
self.assertTrue(type(shp) == rdGaussianShape.ShapeInput)
tpl = rdGaussianShape.AlignMol(shp, self.probe)
self.assertAlmostEqual(tpl[0], 0.497, places=3)
self.assertAlmostEqual(tpl[1], 0.760, places=3)
self.assertAlmostEqual(tpl[2], 0.233, places=3)
def test4_customFeatures(self):
m1 = Chem.MolFromSmiles(
"O=CC=O |(-1.75978,0.148897,0;-0.621382,-0.394324,0;0.624061,0.3656,.1;1.7571,-0.120174,.1)|")
opts = rdGaussianShape.ShapeInputOptions()
opts.customFeatures = ((1, Point3D(-1.75978, 0.148897,
0), 1.0), (2, Point3D(1.7571, -0.120174, 0.1), 1.0))
ovOpts = rdGaussianShape.ShapeOverlayOptions()
shp = rdGaussianShape.ShapeInput(m1, -1, opts, ovOpts)
self.assertEqual(shp.NumAtoms, 4)
self.assertEqual(shp.NumFeatures, 2)
m2 = Chem.Mol(m1)
opts2 = rdGaussianShape.ShapeInputOptions()
opts2.customFeatures = ((2, Point3D(-1.75978, 0.148897,
0), 1.0), (1, Point3D(1.7571, -0.120174, 0.1), 1.0))
shp2 = rdGaussianShape.ShapeInput(m2, -1, opts2, ovOpts)
tpl = rdGaussianShape.AlignShapes(shp, shp2, ovOpts)
self.assertAlmostEqual(tpl[0], 0.999, places=3)
self.assertAlmostEqual(tpl[1], 1.000, places=3)
self.assertAlmostEqual(tpl[2], 0.999, places=3)
tf = tpl[3]
self.assertGreater(0.0, tf[0])
self.assertEqual(1.0, tf[15])
# check the getter:
cfs = opts2.customFeatures
self.assertEqual(len(cfs), 2)
self.assertEqual(cfs[0][0], 2)
self.assertEqual(cfs[1][0], 1)
def test5_customFeatures(self):
m1 = Chem.MolFromSmiles(
"O=CC=O |(-1.75978,0.148897,0;-0.621382,-0.394324,0;0.624061,0.3656,.1;1.7571,-0.120174,.1)|")
opts = rdGaussianShape.ShapeInputOptions()
opts.customFeatures = ((1, Point3D(-1.75978, 0.148897,
0), 1.0), (2, Point3D(1.7571, -0.120174, 0.1), 1.0))
m2 = Chem.Mol(m1)
opts2 = rdGaussianShape.ShapeInputOptions()
opts2.customFeatures = ((2, Point3D(-1.75978, 0.148897,
0), 1.0), (1, Point3D(1.7571, -0.120174, 0.1), 1.0))
ovOpts = rdGaussianShape.ShapeOverlayOptions()
tpl = rdGaussianShape.AlignMol(m1, m2, opts, opts2, ovOpts)
self.assertAlmostEqual(tpl[0], 0.999, places=3)
self.assertAlmostEqual(tpl[1], 1.000, places=3)
self.assertAlmostEqual(tpl[2], 0.999, places=3)
def test6_FixedScore(self):
ovOpts = rdGaussianShape.ShapeOverlayOptions()
# Just to make sure it's there and returns a value.
opts = rdGaussianShape.ShapeInputOptions()
tpl = rdGaussianShape.ScoreMol(self.ref, self.ref, opts, opts, ovOpts)
self.assertAlmostEqual(tpl[0], 1.0, places=3)
self.assertAlmostEqual(tpl[1], 1.0, places=3)
self.assertAlmostEqual(tpl[2], 1.0, places=3)
opts = rdGaussianShape.ShapeInputOptions()
opts.useColors = False
ovOpts.normalize = False
shp = rdGaussianShape.ShapeInput(self.ref, -1, opts, ovOpts)
tpl = rdGaussianShape.ScoreMol(shp, self.probe, opts)
self.assertAlmostEqual(tpl[0], 0.0, places=3)
self.assertAlmostEqual(tpl[1], 0.0, places=3)
self.assertAlmostEqual(tpl[2], 0.0, places=3)
opts.useColors = True
shp1 = rdGaussianShape.ShapeInput(self.probe, -1, opts, ovOpts)
shp2 = rdGaussianShape.ShapeInput(self.probe, -1, opts, ovOpts)
tpl = rdGaussianShape.ScoreShape(shp1, shp2, ovOpts)
self.assertAlmostEqual(tpl[0], 1.0, places=3)
self.assertAlmostEqual(tpl[1], 1.0, places=3)
self.assertAlmostEqual(tpl[2], 1.0, places=3)
def test7_customAtomRadii(self):
ovOpts = rdGaussianShape.ShapeOverlayOptions()
opts = rdGaussianShape.ShapeInputOptions()
opts.allCarbonRadii = False
opts.atomRadii = [(4, 1.9)]
shp = rdGaussianShape.ShapeInput(self.ref, -1, opts, ovOpts)
self.assertAlmostEqual(shp.ShapeVolume, 559.361, places=3)
def test8_atomSubset(self):
ovOpts = rdGaussianShape.ShapeOverlayOptions()
opts = rdGaussianShape.ShapeInputOptions()
opts.atomSubset = [4, 5, 6, 7, 8, 9]
opts.useColors = False
opts.allCarbonRadii = False
shp = rdGaussianShape.ShapeInput(self.ref, -1, opts, ovOpts)
self.assertAlmostEqual(shp.ShapeVolume, 259.144, places=3)
self.assertEqual(shp.ColorVolume, 0.0)
def test9_tversky(self):
scores = rdGaussianShape.AlignMol(self.ref, self.probe)
self.assertAlmostEqual(scores[0], 0.497, places=3)
self.assertAlmostEqual(scores[1], 0.760, places=3)
self.assertAlmostEqual(scores[2], 0.233, places=3)
ovOpts = rdGaussianShape.ShapeOverlayOptions()
ovOpts.simAlpha = 0.95
ovOpts.simBeta = 0.05
ref_tversky = rdGaussianShape.AlignMol(self.ref, self.probe, overlayOpts=ovOpts)
self.assertAlmostEqual(ref_tversky[0], 0.700, places=3)
self.assertAlmostEqual(ref_tversky[1], 0.968, places=3)
self.assertAlmostEqual(ref_tversky[2], 0.433, places=3)
ovOpts.simAlpha = 0.05
ovOpts.simBeta = 0.95
fit_tversky = rdGaussianShape.AlignMol(self.ref, self.probe, overlayOpts=ovOpts)
self.assertAlmostEqual(fit_tversky[0], 0.557, places=3)
self.assertAlmostEqual(fit_tversky[1], 0.780, places=3)
self.assertAlmostEqual(fit_tversky[2], 0.335, places=3)
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,861 @@
//
// Copyright (C) 2026 David Cosgrove and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
// Original author: David Cosgrove (CozChemIx Limited)
//
// Tests for the Roshambo2-based shape alignment code.
#include <chrono>
#include <random>
#include <algorithm>
#include <execution>
#include <catch2/catch_test_macros.hpp>
#include <catch2/matchers/catch_matchers_floating_point.hpp>
#include <GraphMol/MolOps.h>
#include <GraphMol/FileParsers/MolWriters.h>
#include <GraphMol/FileParsers/MolSupplier.h>
#include <GraphMol/GaussianShape/GaussianShape.h>
#include <GraphMol/GaussianShape/ShapeInput.h>
#include <GraphMol/MolTransforms/MolTransforms.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
using namespace RDKit;
bool checkMolsHaveRoughlySameCoords(const ROMol &m1, const ROMol &m2,
double margin = 0.005) {
for (unsigned int i = 0; i < m1.getNumAtoms(); ++i) {
auto pos1 = m1.getConformer().getAtomPos(i);
auto pos2 = m2.getConformer().getAtomPos(i);
if ((pos1 - pos2).length() > margin) {
// So the error is printed in a relevant place.
std::cout << i << " : " << m1.getAtomWithIdx(i)->getAtomicNum()
<< " :: " << (pos1 - pos2).length() << std::endl;
CHECK_THAT((pos1 - pos2).length(),
Catch::Matchers::WithinAbs(0.0, margin));
return false;
}
}
return true;
}
TEST_CASE("basic alignment") {
std::string dirName = getenv("RDBASE");
dirName += "/External/pubchem_shape/test_data";
auto suppl = v2::FileParsers::SDMolSupplier(dirName + "/test1.sdf");
auto refT = suppl[0];
auto ref = v2::SmilesParse::MolFromSmiles(MolToCXSmiles(*refT));
REQUIRE(ref);
auto probeT = suppl[1];
auto probe = v2::SmilesParse::MolFromSmiles(MolToCXSmiles(*probeT));
REQUIRE(probe);
GaussianShape::ShapeOverlayOptions overlayOpts;
overlayOpts.optimMode = GaussianShape::OptimMode::SHAPE_PLUS_COLOR_SCORE;
overlayOpts.startMode = GaussianShape::StartMode::ROTATE_180;
overlayOpts.nSteps = 50;
GaussianShape::ShapeInputOptions shapeOpts;
SECTION("setup") {
auto refShape = GaussianShape::ShapeInput(*ref, -1, shapeOpts);
CHECK_THAT(refShape.getShapeVolume(),
Catch::Matchers::WithinAbs(591.057, 0.005));
CHECK_THAT(refShape.getColorVolume(),
Catch::Matchers::WithinAbs(31.935, 0.005));
auto probeShape = GaussianShape::ShapeInput(*probe, -1, shapeOpts);
CHECK_THAT(probeShape.getShapeVolume(),
Catch::Matchers::WithinAbs(751.013, 0.005));
CHECK_THAT(probeShape.getColorVolume(),
Catch::Matchers::WithinAbs(42.530, 0.005));
}
SECTION("shape only") {
ROMol cp(*probe);
overlayOpts.optimMode = GaussianShape::OptimMode::SHAPE_ONLY;
overlayOpts.startMode = GaussianShape::StartMode::ROTATE_180;
GaussianShape::ShapeInputOptions tShapeOpts;
tShapeOpts.useColors = false;
const auto scores = GaussianShape::AlignMolecule(
*ref, cp, tShapeOpts, tShapeOpts, nullptr, overlayOpts);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.760, 0.005));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.760, 0.005));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.0, 0.005));
// Check that a re-score gives the same answer.
auto rescores = GaussianShape::ScoreMolecule(*ref, cp, shapeOpts, shapeOpts,
overlayOpts);
CHECK_THAT(rescores[0], Catch::Matchers::WithinAbs(scores[0], 0.005));
CHECK_THAT(rescores[1], Catch::Matchers::WithinAbs(scores[1], 0.005));
CHECK_THAT(rescores[2], Catch::Matchers::WithinAbs(scores[2], 0.005));
}
SECTION("shape plus color score") {
overlayOpts.optimMode = GaussianShape::OptimMode::SHAPE_PLUS_COLOR_SCORE;
overlayOpts.startMode = GaussianShape::StartMode::ROTATE_180;
ROMol cp(*probe);
const auto scores = GaussianShape::AlignMolecule(
*ref, cp, shapeOpts, shapeOpts, nullptr, overlayOpts);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.494, 0.005));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.760, 0.005));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.236, 0.005));
// Check that a re-score gives the same answer.
const auto rescores = GaussianShape::ScoreMolecule(*ref, cp, shapeOpts,
shapeOpts, overlayOpts);
CHECK_THAT(rescores[0], Catch::Matchers::WithinAbs(scores[0], 0.005));
CHECK_THAT(rescores[1], Catch::Matchers::WithinAbs(scores[1], 0.005));
CHECK_THAT(rescores[2], Catch::Matchers::WithinAbs(scores[2], 0.005));
}
SECTION("shape and color") {
overlayOpts.optimMode = GaussianShape::OptimMode::SHAPE_PLUS_COLOR;
overlayOpts.startMode = GaussianShape::StartMode::ROTATE_180;
ROMol cp(*probe);
const auto scores = GaussianShape::AlignMolecule(
*ref, cp, shapeOpts, shapeOpts, nullptr, overlayOpts);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.477, 0.005));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.747, 0.005));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.207, 0.005));
const auto rescores = GaussianShape::ScoreMolecule(*ref, cp, shapeOpts,
shapeOpts, overlayOpts);
CHECK_THAT(rescores[0], Catch::Matchers::WithinAbs(scores[0], 0.005));
CHECK_THAT(rescores[1], Catch::Matchers::WithinAbs(scores[1], 0.005));
CHECK_THAT(rescores[2], Catch::Matchers::WithinAbs(scores[2], 0.005));
}
SECTION("collect transform") {
overlayOpts.optimMode = GaussianShape::OptimMode::SHAPE_PLUS_COLOR_SCORE;
overlayOpts.startMode = GaussianShape::StartMode::ROTATE_180;
ROMol cp(*probe);
RDGeom::Transform3D xform;
const auto scores = GaussianShape::AlignMolecule(
*ref, cp, shapeOpts, shapeOpts, &xform, overlayOpts);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.494, 0.005));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.760, 0.005));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.236, 0.005));
// Check a few values from the transform, just to be sure
CHECK_THAT(xform.getValUnchecked(0, 0),
Catch::Matchers::WithinAbs(-0.886, 0.005));
CHECK_THAT(xform.getValUnchecked(1, 1),
Catch::Matchers::WithinAbs(-0.828, 0.005));
CHECK_THAT(xform.getValUnchecked(2, 2),
Catch::Matchers::WithinAbs(0.816, 0.005));
CHECK_THAT(xform.getValUnchecked(3, 3),
Catch::Matchers::WithinAbs(1.0, 0.005));
}
SECTION("shape plus color score a la pubchem") {
overlayOpts.optimMode = GaussianShape::OptimMode::SHAPE_PLUS_COLOR_SCORE;
overlayOpts.startMode = GaussianShape::StartMode::A_LA_PUBCHEM;
GaussianShape::ShapeInputOptions shapeOpts2;
for (const auto acr : std::vector{true, false}) {
shapeOpts2.allCarbonRadii = acr;
ROMol cp(*probe);
const auto scores = GaussianShape::AlignMolecule(
*ref, cp, shapeOpts2, shapeOpts2, nullptr, overlayOpts);
if (acr) {
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.498, 0.005));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.758, 0.005));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.237, 0.005));
} else {
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.503, 0.005));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.761, 0.005));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.245, 0.005));
}
// Check that a re-score gives the same answer.
const auto rescores = GaussianShape::ScoreMolecule(
*ref, cp, shapeOpts2, shapeOpts2, overlayOpts);
CHECK_THAT(rescores[0], Catch::Matchers::WithinAbs(scores[0], 0.005));
CHECK_THAT(rescores[1], Catch::Matchers::WithinAbs(scores[1], 0.005));
CHECK_THAT(rescores[2], Catch::Matchers::WithinAbs(scores[2], 0.005));
}
}
}
TEST_CASE("bulk") {
std::string dirName = getenv("RDBASE");
dirName += "/External/pubchem_shape/test_data";
auto suppl = v2::FileParsers::SDMolSupplier(dirName + "/bulk.pubchem.sdf");
auto ref = suppl[0];
REQUIRE(ref);
std::string testout = dirName + "/bulk.pubchem_out.sdf";
auto writer = SDWriter(testout);
writer.write(*ref);
GaussianShape::ShapeOverlayOptions overlayOpts;
overlayOpts.optimMode = GaussianShape::OptimMode::SHAPE_PLUS_COLOR_SCORE;
overlayOpts.startMode = GaussianShape::StartMode::A_LA_PUBCHEM;
GaussianShape::ShapeInputOptions shapeOpts;
for (auto i = 1u; i < suppl.length(); ++i) {
auto probe = suppl[1];
REQUIRE(probe);
auto scores = GaussianShape::AlignMolecule(*ref, *probe, shapeOpts,
shapeOpts, nullptr, overlayOpts);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.575, 0.005));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.818, 0.005));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.332, 0.005));
const auto rescores = GaussianShape::ScoreMolecule(*ref, *probe);
CHECK_THAT(rescores[0], Catch::Matchers::WithinAbs(scores[0], 0.005));
CHECK_THAT(rescores[1], Catch::Matchers::WithinAbs(scores[1], 0.005));
CHECK_THAT(rescores[2], Catch::Matchers::WithinAbs(scores[2], 0.005));
writer.write(*probe);
break;
}
writer.close();
}
TEST_CASE("shape alignment") {
std::string dirName = getenv("RDBASE");
dirName += "/External/pubchem_shape/test_data";
auto suppl = v2::FileParsers::SDMolSupplier(dirName + "/test1.sdf");
auto ref = suppl[0];
REQUIRE(ref);
auto probe = suppl[1];
REQUIRE(probe);
auto refShape = GaussianShape::ShapeInput(*ref, -1);
auto probeShape = GaussianShape::ShapeInput(*probe, -1);
const auto ovProbe =
"FC1(F)C[C@H](C(O)=O)N(Cc2ocnc2)C1 |(-13.7799,-5.76066,4.42449;-13.5271,-6.62223,3.41219;-12.4707,-7.37583,3.76844;-13.2679,-5.8659,2.12715;-14.6435,-5.78022,1.46335;-15.139,-4.37081,1.41003;-14.7786,-3.78046,0.244433;-15.7838,-3.81972,2.28974;-15.5606,-6.52351,2.33643;-16.628,-7.19488,1.60806;-17.7234,-6.24049,1.21312;-18.3383,-5.54964,2.21298;-19.2578,-4.78996,1.55674;-19.2808,-4.93485,0.251035;-18.298,-5.86438,0.0244256;-14.7486,-7.4588,3.11797),wU:4.4|"_smiles;
RDGeom::Transform3D xform;
auto scores = GaussianShape::AlignShape(refShape, probeShape, &xform);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.498, 0.005));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.760, 0.005));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.235, 0.005));
// This effectively checks that xform is correct.
auto rescores = GaussianShape::ScoreShape(refShape, probeShape);
CHECK_THAT(rescores[0], Catch::Matchers::WithinAbs(scores[0], 0.001));
CHECK_THAT(rescores[1], Catch::Matchers::WithinAbs(scores[1], 0.001));
CHECK_THAT(rescores[2], Catch::Matchers::WithinAbs(scores[2], 0.001));
SmilesWriteParams params;
params.canonical = false;
// The input structure being from an SDF doesn't have the atoms in an order
// that will make a SMILES string so bounce it through one for comparison.
auto probeCp1 = v2::SmilesParse::MolFromSmiles(MolToCXSmiles(*probe, params));
MolTransforms::transformConformer(probeCp1->getConformer(), xform);
CHECK(checkMolsHaveRoughlySameCoords(*ovProbe, *probeCp1));
// And pre-normalizing the shapes
refShape.normalizeCoords();
probeShape.normalizeCoords();
RDGeom::Transform3D xform1;
auto scores1 = GaussianShape::AlignShape(refShape, probeShape, &xform1);
CHECK_THAT(scores1[0], Catch::Matchers::WithinAbs(0.498, 0.005));
CHECK_THAT(scores1[1], Catch::Matchers::WithinAbs(0.760, 0.005));
CHECK_THAT(scores1[2], Catch::Matchers::WithinAbs(0.235, 0.005));
}
TEST_CASE("Overlay onto shape bug (Github8462)") {
auto m1 =
R"(c1ccc(-c2ccccc2)cc1 |(-3.26053,-0.0841607,-0.741909;-2.93383,0.123873,0.593407;-1.60713,0.377277,0.917966;-0.644758,0.654885,-0.0378428;0.743308,0.219134,0.168663;1.82376,1.0395,-0.0112769;3.01462,0.695405,0.613858;3.18783,-0.589771,1.09649;2.15761,-1.50458,1.01949;0.988307,-1.1313,0.385783;-1.1048,0.797771,-1.34022;-2.39754,0.435801,-1.69921)|)"_smiles;
REQUIRE(m1);
ROMol m2(*m1);
for (auto a : m2.atoms()) {
auto &pos = m2.getConformer().getAtomPos(a->getIdx());
pos.x += 3.0;
pos.y += 2.0;
}
ROMol m3(m2);
auto scores = GaussianShape::AlignMolecule(*m1, m2);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(1.0, 0.005));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(1.0, 0.005));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(1.0, 0.005));
CHECK(checkMolsHaveRoughlySameCoords(*m1, m2, 0.005));
// Create the shape without normalization to mimic an arbitrary shape.
auto s1 = GaussianShape::ShapeInput(*m1, -1);
auto scores1 = AlignMolecule(s1, m3);
CHECK_THAT(scores1[0], Catch::Matchers::WithinAbs(1.0, 0.005));
CHECK_THAT(scores1[1], Catch::Matchers::WithinAbs(1.0, 0.005));
CHECK_THAT(scores1[2], Catch::Matchers::WithinAbs(1.0, 0.005));
for (unsigned int i = 0; i < m3.getNumAtoms(); ++i) {
RDGeom::Point3D pos1(s1.getCoords()[4 * i], s1.getCoords()[4 * i + 1],
s1.getCoords()[4 * i + 2]);
auto pos2 = m3.getConformer().getAtomPos(i);
CHECK_THAT((pos1 - pos2).length(), Catch::Matchers::WithinAbs(0.0, 0.01));
}
}
TEST_CASE("handling molecules with Hs") {
std::string dirName = getenv("RDBASE");
dirName += "/External/pubchem_shape/test_data";
v2::FileParsers::MolFileParserParams params;
params.removeHs = false;
auto suppl =
v2::FileParsers::SDMolSupplier(dirName + "/align_with_hs.sdf", params);
auto ref = suppl[0];
REQUIRE(ref);
auto probe = suppl[1];
REQUIRE(probe);
SECTION("basics") {
RWMol cp(*probe);
RDGeom::Transform3D xform;
auto scores = GaussianShape::AlignMolecule(*ref, cp);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.700, 0.005));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.834, 0.005));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.566, 0.005));
for (auto i = 0u; i < cp.getNumAtoms(); ++i) {
// the failure mode here was that Hs had HUGE coordinates
auto pos = cp.getConformer().getAtomPos(i);
CHECK((pos.x > -10 && pos.x < 10));
}
// Check the rescore
auto rescores = GaussianShape::ScoreMolecule(*ref, cp);
CHECK_THAT(rescores[0], Catch::Matchers::WithinAbs(scores[0], 0.005));
CHECK_THAT(rescores[1], Catch::Matchers::WithinAbs(scores[1], 0.005));
CHECK_THAT(rescores[2], Catch::Matchers::WithinAbs(scores[2], 0.005));
}
}
TEST_CASE("Github #8096") {
SECTION("as reported") {
auto m1 =
R"([H]c1c([H])c([H])c([2H])c([H])c1[H] |(1.55967,1.91617,0.0546381;0.885536,1.07172,0.030849;1.38172,-0.23747,0.0274262;2.44539,-0.439501,0.0483424;0.470206,-1.27516,-0.00361916;0.856925,-2.30002,-0.00633525;-0.896665,-1.07227,-0.0310991;-1.60071,-1.87642,-0.0551085;-1.36315,0.22877,-0.0271173;-2.43593,0.379132,-0.0487835;-0.479018,1.29083,0.00359778;-0.823965,2.31421,0.00720933)|)"_smiles;
REQUIRE(m1);
auto m2 =
R"([H]c1c([H])c([H])c([H])c([H])c1[H] |(-2.06264,-0.844763,-0.0261403;-1.04035,-0.481453,-0.0114878;-0.00743655,-1.41861,-0.0137121;-0.215455,-2.47997,-0.0295909;1.29853,-0.949412,0.00507497;2.12524,-1.65277,0.00390664;1.58501,0.395878,0.0254188;2.61997,0.704365,0.0394811;0.550242,1.31385,0.0273741;0.783172,2.37039,0.0434262;-0.763786,0.88847,0.00908113;-1.60557,1.58532,0.0100194)|)"_smiles;
REQUIRE(m2);
auto scores = GaussianShape::AlignMolecule(*m1, *m2);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(1.0, 0.005));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(1.0, 0.005));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(1.0, 0.005));
}
}
TEST_CASE("Hs not properly transformed when hcount = feature count") {
std::string dirName = getenv("RDBASE");
dirName += "/External/pubchem_shape/test_data";
SECTION("as reported") {
v2::FileParsers::MolFileParserParams ps;
ps.removeHs = false;
auto mol1 =
v2::FileParsers::MolFromMolFile(dirName + "/hcount_ex1_1.mol", ps);
REQUIRE(mol1);
auto mol2 =
v2::FileParsers::MolFromMolFile(dirName + "/hcount_ex1_2.mol", ps);
REQUIRE(mol2);
{
RWMol cp(*mol2);
auto scores = GaussianShape::AlignMolecule(*mol1, cp);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.744, 0.005));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.918, 0.005));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.570, 0.005));
// the bug led to H atoms in stupid positions, so we can detect it by just
// looking at bond lengths to Hs:
for (auto i = cp.getNumHeavyAtoms(); i < cp.getNumAtoms(); ++i) {
INFO("checking atom " << i);
auto at = cp.getAtomWithIdx(i);
for (auto nbr : cp.atomNeighbors(at)) {
auto dist = (cp.getConformer().getAtomPos(i) -
cp.getConformer().getAtomPos(nbr->getIdx()))
.length();
CHECK(dist < 1.2); // should be a bond to H
}
}
}
}
}
TEST_CASE("Score No Overlay") {
// These are 2 ligands used by Andy Grant and Co in their original paper
// https://onlinelibrary.wiley.com/doi/10.1002/(SICI)1096-987X(19961115)17:14%3C1653::AID-JCC7%3E3.0.CO;2-K
// Ligands as extracted from PDB, with a bit of munging to get them as
// SMILES strings (downloaded the Ideal ligand structures from RCSB
// as SDFs and transferred the corresponding atom coords from 3tmn and 1tmn).
auto pdb_trp_3tmn =
R"(N[C@H](C(=O)O)Cc1c[nH]c2c1cccc2 |(37.935,40.394,-3.825;39.119,39.593,-4.13;38.758,38.486,-5.101;37.526,38.337,-5.395;39.716,37.852,-5.605;39.883,39.108,-2.906;39.086,38.098,-2.209;38.093,38.363,-1.34;37.565,37.179,-0.881;38.201,36.136,-1.471;39.193,36.684,-2.308;40.015,35.812,-3.036;39.846,34.441,-2.913;38.844,33.933,-2.075;38.015,34.752,-1.333),wU:1.0|)"_smiles;
REQUIRE(pdb_trp_3tmn);
auto pdb_0zn_1tmn =
R"([C@H](CCc1ccccc1)(C(=O)O)N[C@H](C(=O)N[C@H](C(=O)O)Cc1c[nH]c2c1cccc2)CC(C)C |(35.672,41.482,-5.722;34.516,40.842,-6.512;34.843,39.355,-6.7;33.819,38.475,-7.45;33.825,38.414,-8.838;32.951,37.553,-9.53;32.064,36.747,-8.81;32.096,36.799,-7.402;32.985,37.656,-6.73;35.934,42.778,-6.452;36.833,42.858,-7.316;35.175,43.735,-6.275;35.516,41.561,-4.218;36.707,42.096,-3.513;38.055,41.449,-3.859;39.11,42.138,-3.959;37.975,40.129,-3.983;39.134,39.277,-4.298;38.825,38.04,-5.133;37.649,37.934,-5.605;39.788,37.369,-5.652;39.985,38.945,-3.037;39.221,37.953,-2.164;37.934,37.961,-1.823;37.579,36.695,-1.314;38.63,35.975,-1.286;39.736,36.771,-1.642;41.052,36.341,-1.48;41.213,35.042,-0.964;40.095,34.215,-0.69;38.765,34.665,-0.855;36.506,41.966,-2.002;37.6,42.757,-1.31;37.546,44.225,-1.728;37.408,42.58,0.19),wD:0.0,wU:17.21,13.33|)"_smiles;
REQUIRE(pdb_0zn_1tmn);
GaussianShape::ShapeInputOptions shapeOpts;
{
auto scores = ScoreMolecule(*pdb_trp_3tmn, *pdb_trp_3tmn, shapeOpts);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(1.0, 0.001));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(1.0, 0.001));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(1.0, 0.001));
}
{
auto pdb_trp_3tmn_cp =
R"(N[C@H](C(=O)O)Cc1c[nH]c2c1cccc2 |(37.935,40.394,-3.825;39.119,39.593,-4.13;38.758,38.486,-5.101;37.526,38.337,-5.395;39.716,37.852,-5.605;39.883,39.108,-2.906;39.086,38.098,-2.209;38.093,38.363,-1.34;37.565,37.179,-0.881;38.201,36.136,-1.471;39.193,36.684,-2.308;40.015,35.812,-3.036;39.846,34.441,-2.913;38.844,33.933,-2.075;38.015,34.752,-1.333),wU:1.0|)"_smiles;
RDGeom::Point3D trans{100.0, 100.0, 100.0};
RDGeom::Transform3D transform_3d;
transform_3d.SetTranslation(trans);
MolTransforms::transformConformer(pdb_trp_3tmn_cp->getConformer(),
transform_3d);
auto scores = ScoreMolecule(*pdb_trp_3tmn, *pdb_trp_3tmn_cp, shapeOpts);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.0, 0.001));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.0, 0.001));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.0, 0.001));
}
{
auto scores = ScoreMolecule(*pdb_0zn_1tmn, *pdb_0zn_1tmn, shapeOpts);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(1.0, 0.001));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(1.0, 0.001));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(1.0, 0.001));
}
{
auto scores = ScoreMolecule(*pdb_trp_3tmn, *pdb_0zn_1tmn, shapeOpts);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.307, 0.005));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.349, 0.005));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.265, 0.005));
}
{
auto shape = GaussianShape::ShapeInput(*pdb_trp_3tmn, -1, shapeOpts);
auto scores = ScoreMolecule(shape, *pdb_0zn_1tmn, shapeOpts);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.307, 0.005));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.349, 0.005));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.265, 0.005));
}
{
auto shape1 = GaussianShape::ShapeInput(*pdb_trp_3tmn, -1, shapeOpts);
auto shape2 = GaussianShape::ShapeInput(*pdb_0zn_1tmn, -1, shapeOpts);
auto scores = GaussianShape::ScoreShape(shape1, shape2);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.307, 0.005));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.349, 0.005));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.265, 0.005));
}
}
TEST_CASE("Iressa onto Tagrisso") {
// Conformations from PubChem produced by Omega. Iressa rotated and translated
// by a random amount. PubChem puts them both in their inertial frame which
// makes things too easy.
auto tagrisso =
R"(C=CC(=O)Nc1cc(Nc2nccc(-c3cn(C)c4ccccc34)n2)c(OC)cc1N(C)CCN(C)C |(-0.9161,3.8415,-2.9811;0.1848,3.1933,-2.588;0.1064,1.7789,-2.12;-0.9619,1.1797,-2.0847;1.3654,1.2872,-1.7553;1.6841,0.0144,-1.273;0.6638,-0.9235,-1.1146;0.9578,-2.1997,-0.6343;-0.0813,-3.1358,-0.4783;-1.4556,-2.9979,-0.1847;-2.1716,-4.1359,-0.1085;-3.4803,-3.9673,0.173;-4.0689,-2.7353,0.3728;-3.2269,-1.647,0.2676;-3.7311,-0.317,0.4568;-5.0275,0.0291,0.153;-5.1887,1.3569,0.4454;-6.4231,2.0889,0.2595;-4.0141,1.8811,0.9361;-3.7121,3.1796,1.3615;-2.4139,3.4249,1.8179;-1.4588,2.4106,1.8467;-1.7752,1.1164,1.4181;-3.0776,0.8453,0.9537;-1.9103,-1.7423,-0.011;2.2723,-2.5382,-0.3127;2.58,-3.7798,0.1575;2.539,-3.9651,1.571;3.2927,-1.6003,-0.4713;2.9986,-0.324,-0.9514;4.0475,0.61,-1.1047;4.7738,0.6956,-2.3546;4.4021,1.497,-0.0162;5.4401,0.8254,0.8736;5.8294,1.7155,1.9601;4.8213,1.7057,3.0218;7.1361,1.3324,2.4981)|)"_smiles;
REQUIRE(tagrisso);
auto iressa =
R"(COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN1CCOCC1 |(11.4672,-0.467948,5.63989;12.0133,0.532631,6.49693;11.2039,1.5801,6.81985;11.2014,2.71958,6.00975;10.3926,3.81652,6.29699;10.4038,4.90395,5.50623;9.58889,5.91871,5.85946;8.76443,5.96486,6.91838;8.77814,4.86059,7.68868;7.92337,4.86224,8.81914;7.44878,5.8925,9.64622;8.22182,7.03851,9.85619;7.75051,8.06265,10.6777;6.50441,7.94546,11.2936;6.06567,8.93802,12.0809;5.72932,6.80403,11.0875;4.19056,6.65372,11.8447;6.20047,5.78015,10.2656;9.57161,3.74547,7.43436;9.56851,2.60328,8.25407;10.3868,1.52933,7.93769;10.3797,0.419365,8.74203;11.3064,0.402096,9.81907;10.7104,-0.399165,10.9685;9.40938,0.22121,11.4678;9.64205,1.59049,11.9223;8.38006,2.22985,12.3199;8.64991,3.65266,12.8011;9.56883,3.64192,13.8942;10.8103,3.05101,13.5078;10.5931,1.61394,13.0425)|)"_smiles;
REQUIRE(iressa);
GaussianShape::ShapeOverlayOptions opts;
opts.optimMode = GaussianShape::OptimMode::SHAPE_PLUS_COLOR_SCORE;
opts.startMode = GaussianShape::StartMode::A_LA_PUBCHEM;
opts.nSteps = 100;
GaussianShape::ShapeInputOptions shapeOpts;
shapeOpts.allCarbonRadii = false;
auto scores = GaussianShape::AlignMolecule(*tagrisso, *iressa, shapeOpts,
shapeOpts, nullptr, opts);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.332, 0.005));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.569, 0.005));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.095, 0.005));
auto rescores = GaussianShape::ScoreMolecule(*tagrisso, *iressa, shapeOpts,
shapeOpts, opts);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(rescores[0], 0.005));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(rescores[1], 0.005));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(rescores[2], 0.005));
auto aligned_iressa =
"COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN1CCOCC1 |(3.34206,-4.82098,0.224565;2.562,-4.29938,-0.849374;1.40029,-3.66768,-0.520786;0.217637,-4.40844,-0.435429;-0.995315,-3.80966,-0.103538;-2.12266,-4.53841,-0.026421;-3.25097,-3.87666,0.301586;-3.3749,-2.56477,0.559961;-2.22774,-1.8651,0.473675;-2.30832,-0.475114,0.738276;-3.33657,0.464391,0.56286;-4.2686,0.303124,-0.466868;-5.2907,1.23626,-0.641364;-5.38531,2.33529,0.212458;-6.37287,3.22379,0.031359;-4.45782,2.50088,1.24127;-4.5695,3.85508,2.29834;-3.43604,1.56746,1.41601;-0.997368,-2.42737,0.145328;0.187601,-1.67548,0.061391;1.37756,-2.30488,-0.27166;2.52893,-1.56544,-0.352965;2.83995,-1.00034,-1.61907;3.59724,0.302963,-1.40382;2.76275,1.31266,-0.622178;1.52096,1.60458,-1.33518;0.667075,2.50553,-0.54864;-0.616491,2.80465,-1.31789;-0.312021,3.38752,-2.58555;0.491386,2.50505,-3.3701;1.80144,2.19743,-2.65039)|"_smiles;
REQUIRE(aligned_iressa);
checkMolsHaveRoughlySameCoords(*iressa, *aligned_iressa);
}
TEST_CASE("Optimise in place") {
// These are 2 ligands used by Andy Grant and Co in their original paper
// https://onlinelibrary.wiley.com/doi/10.1002/(SICI)1096-987X(19961115)17:14%3C1653::AID-JCC7%3E3.0.CO;2-K
// Ligands as extracted from PDB, with a bit of munging to get them as
// SMILES strings (downloaded the Ideal ligand structures from RCSB
// as SDFs and transferred the corresponding atom coords from 3tmn and 1tmn).
auto pdb_trp_3tmn =
R"(N[C@H](C(=O)O)Cc1c[nH]c2c1cccc2 |(37.935,40.394,-3.825;39.119,39.593,-4.13;38.758,38.486,-5.101;37.526,38.337,-5.395;39.716,37.852,-5.605;39.883,39.108,-2.906;39.086,38.098,-2.209;38.093,38.363,-1.34;37.565,37.179,-0.881;38.201,36.136,-1.471;39.193,36.684,-2.308;40.015,35.812,-3.036;39.846,34.441,-2.913;38.844,33.933,-2.075;38.015,34.752,-1.333),wU:1.0|)"_smiles;
REQUIRE(pdb_trp_3tmn);
auto pdb_0zn_1tmn =
R"([C@H](CCc1ccccc1)(C(=O)O)N[C@H](C(=O)N[C@H](C(=O)O)Cc1c[nH]c2c1cccc2)CC(C)C |(35.672,41.482,-5.722;34.516,40.842,-6.512;34.843,39.355,-6.7;33.819,38.475,-7.45;33.825,38.414,-8.838;32.951,37.553,-9.53;32.064,36.747,-8.81;32.096,36.799,-7.402;32.985,37.656,-6.73;35.934,42.778,-6.452;36.833,42.858,-7.316;35.175,43.735,-6.275;35.516,41.561,-4.218;36.707,42.096,-3.513;38.055,41.449,-3.859;39.11,42.138,-3.959;37.975,40.129,-3.983;39.134,39.277,-4.298;38.825,38.04,-5.133;37.649,37.934,-5.605;39.788,37.369,-5.652;39.985,38.945,-3.037;39.221,37.953,-2.164;37.934,37.961,-1.823;37.579,36.695,-1.314;38.63,35.975,-1.286;39.736,36.771,-1.642;41.052,36.341,-1.48;41.213,35.042,-0.964;40.095,34.215,-0.69;38.765,34.665,-0.855;36.506,41.966,-2.002;37.6,42.757,-1.31;37.546,44.225,-1.728;37.408,42.58,0.19),wD:0.0,wU:17.21,13.33|)"_smiles;
REQUIRE(pdb_0zn_1tmn);
// This is the overlay produced by the first test below, to make sure we
// haven't broken anything.
auto ov_pdb_0zn_1tmn =
R"(CC(C)C[C@H](N[C@@H](CCc1ccccc1)C(=O)O)C(=O)N[C@@H](Cc1c[nH]c2ccccc12)C(=O)O |(38.4182,43.8068,-0.910588;38.2709,42.2972,-0.731075;38.1304,41.9045,0.733229;37.0364,41.7932,-1.45451;37.1781,42.1406,-2.93763;35.8859,41.9023,-3.62687;35.9519,42.0509,-5.13225;34.6741,41.7191,-5.92414;34.7628,40.2524,-6.36478;33.5811,39.6611,-7.16439;33.5076,39.8293,-8.54156;32.4798,39.2259,-9.29221;31.5187,38.4455,-8.64246;31.6293,38.2599,-7.2498;32.6704,38.8605,-6.52059;36.3695,43.3994,-5.66929;37.2254,43.4931,-6.57465;35.7737,44.4114,-5.28982;38.3937,41.376,-3.47909;39.5342,41.9164,-3.54877;38.1091,40.1191,-3.80052;39.1087,39.1759,-4.32928;39.9624,38.5242,-3.20191;39.1024,37.5196,-2.43949;37.8503,37.6527,-2.00683;37.3343,36.383,-1.67591;38.2646,35.5274,-1.83827;38.2218,34.1585,-1.6274;39.4752,33.5046,-1.63354;40.6899,34.1995,-1.85784;40.7011,35.5753,-2.15222;39.4587,36.2072,-2.14728;38.5746,38.1494,-5.32118;37.3737,38.2894,-5.71533;39.3977,37.4441,-6.00821),wD:6.6,wU:4.3,21.22|)"_smiles;
auto initScores = GaussianShape::ScoreMolecule(*pdb_trp_3tmn, *pdb_0zn_1tmn);
CHECK_THAT(initScores[0], Catch::Matchers::WithinAbs(0.307, 0.001));
CHECK_THAT(initScores[1], Catch::Matchers::WithinAbs(0.349, 0.001));
CHECK_THAT(initScores[2], Catch::Matchers::WithinAbs(0.265, 0.001));
// The PDB atom order isn't canonical, so bounce in and out of SMILES
// to make it easier to check.
auto canon_probe =
v2::SmilesParse::MolFromSmiles(MolToCXSmiles(*pdb_0zn_1tmn));
{
// This should just tweak the input overlay.
GaussianShape::ShapeOverlayOptions opts;
opts.startMode = GaussianShape::StartMode::ROTATE_0;
opts.normalize = false;
GaussianShape::ShapeInputOptions shapeOpts;
ROMol cp(*canon_probe);
RDGeom::Transform3D xform;
auto scores = GaussianShape::AlignMolecule(*pdb_trp_3tmn, cp, shapeOpts,
shapeOpts, &xform, opts);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.322, 0.001));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.396, 0.001));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.247, 0.001));
CHECK(checkMolsHaveRoughlySameCoords(cp, *ov_pdb_0zn_1tmn));
}
{
// With default settings, it does a poor job.
GaussianShape::ShapeOverlayOptions opts;
GaussianShape::ShapeInputOptions shapeOpts;
ROMol cp(*canon_probe);
auto scores = GaussianShape::AlignMolecule(*pdb_trp_3tmn, cp, shapeOpts,
shapeOpts, nullptr, opts);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.197, 0.001));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.361, 0.001));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.033, 0.001));
}
{
// And with reference as a shape the same
GaussianShape::ShapeOverlayOptions opts;
opts.startMode = GaussianShape::StartMode::ROTATE_0;
opts.normalize = false;
GaussianShape::ShapeInputOptions shapeOpts;
auto refShape = GaussianShape::ShapeInput(*pdb_trp_3tmn, -1, shapeOpts);
ROMol cp(*canon_probe);
RDGeom::Transform3D xform;
auto scores =
GaussianShape::AlignMolecule(refShape, cp, shapeOpts, &xform, opts);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.322, 0.001));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.396, 0.001));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.247, 0.001));
CHECK(checkMolsHaveRoughlySameCoords(cp, *ov_pdb_0zn_1tmn));
MolTransforms::transformConformer(cp.getConformer(), xform);
ROMol cp1(*canon_probe);
MolTransforms::transformConformer(cp1.getConformer(), xform);
CHECK(checkMolsHaveRoughlySameCoords(cp1, *ov_pdb_0zn_1tmn));
}
{
// And with both as shapes
GaussianShape::ShapeOverlayOptions opts;
opts.startMode = GaussianShape::StartMode::ROTATE_0;
opts.normalize = false;
GaussianShape::ShapeInputOptions shapeOpts;
auto refShape = GaussianShape::ShapeInput(*pdb_trp_3tmn, -1, shapeOpts);
auto fitShape = GaussianShape::ShapeInput(*canon_probe, -1, shapeOpts);
RDGeom::Transform3D xform;
auto scores = GaussianShape::AlignShape(refShape, fitShape, &xform, opts);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.322, 0.001));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.396, 0.001));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.247, 0.001));
auto cp = fitShape.shapeToMol(false);
CHECK(checkMolsHaveRoughlySameCoords(*cp, *ov_pdb_0zn_1tmn));
}
}
TEST_CASE("Fragment Mode") {
// On the PDB overlay.
auto pdb_trp_3tmn =
R"(N[C@H](C(=O)O)Cc1c[nH]c2c1cccc2 |(37.935,40.394,-3.825;39.119,39.593,-4.13;38.758,38.486,-5.101;37.526,38.337,-5.395;39.716,37.852,-5.605;39.883,39.108,-2.906;39.086,38.098,-2.209;38.093,38.363,-1.34;37.565,37.179,-0.881;38.201,36.136,-1.471;39.193,36.684,-2.308;40.015,35.812,-3.036;39.846,34.441,-2.913;38.844,33.933,-2.075;38.015,34.752,-1.333),wU:1.0|)"_smiles;
REQUIRE(pdb_trp_3tmn);
auto pdb_0zn_1tmn =
R"([C@H](CCc1ccccc1)(C(=O)O)N[C@H](C(=O)N[C@H](C(=O)O)Cc1c[nH]c2c1cccc2)CC(C)C |(35.672,41.482,-5.722;34.516,40.842,-6.512;34.843,39.355,-6.7;33.819,38.475,-7.45;33.825,38.414,-8.838;32.951,37.553,-9.53;32.064,36.747,-8.81;32.096,36.799,-7.402;32.985,37.656,-6.73;35.934,42.778,-6.452;36.833,42.858,-7.316;35.175,43.735,-6.275;35.516,41.561,-4.218;36.707,42.096,-3.513;38.055,41.449,-3.859;39.11,42.138,-3.959;37.975,40.129,-3.983;39.134,39.277,-4.298;38.825,38.04,-5.133;37.649,37.934,-5.605;39.788,37.369,-5.652;39.985,38.945,-3.037;39.221,37.953,-2.164;37.934,37.961,-1.823;37.579,36.695,-1.314;38.63,35.975,-1.286;39.736,36.771,-1.642;41.052,36.341,-1.48;41.213,35.042,-0.964;40.095,34.215,-0.69;38.765,34.665,-0.855;36.506,41.966,-2.002;37.6,42.757,-1.31;37.546,44.225,-1.728;37.408,42.58,0.19),wD:0.0,wU:17.21,13.33|)"_smiles;
REQUIRE(pdb_0zn_1tmn);
GaussianShape::ShapeOverlayOptions opts;
opts.nSteps = 100;
opts.startMode = GaussianShape::StartMode::ROTATE_180_FRAGMENT;
opts.optimMode = GaussianShape::OptimMode::SHAPE_PLUS_COLOR_SCORE;
auto probeShape = GaussianShape::ShapeInput(*pdb_trp_3tmn, -1);
auto refShape = GaussianShape::ShapeInput(*pdb_0zn_1tmn, -1);
RDGeom::Transform3D xform;
// Use the smaller molecule as the probe
auto scores = GaussianShape::AlignShape(refShape, probeShape, &xform, opts);
// These are close to the values above for starting from the xtal structures.
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.311, 0.005));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.408, 0.005));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.215, 0.005));
MolTransforms::transformConformer(pdb_trp_3tmn->getConformer(), xform);
}
TEST_CASE("custom feature points") {
auto m1 =
"O=CC=O |(-1.75978,0.148897,0;-0.621382,-0.394324,0;0.624061,0.3656,.1;1.7571,-0.120174,.1)|"_smiles;
SECTION("using shapes") {
auto shape1 = GaussianShape::ShapeInput(*m1, -1);
// each carbonyl O gets one feature:
CHECK(shape1.getCoords().size() == 24);
GaussianShape::ShapeInputOptions opts2;
opts2.customFeatures = GaussianShape::CustomFeatures{
{1, RDGeom::Point3D(-1.75978, 0.148897, 0), 1.0},
{2, RDGeom::Point3D(1.7571, -0.120174, 0.1), 1.0}};
auto shape2 = GaussianShape::ShapeInput(*m1, -1, opts2);
CHECK(shape2.getCoords().size() == 24);
{
// confirm that we don't add the features if not requested.
GaussianShape::ShapeInputOptions topts;
topts.customFeatures = GaussianShape::CustomFeatures{
{1, RDGeom::Point3D(-1.75978, 0.148897, 0), 1.0},
{2, RDGeom::Point3D(1.7571, -0.120174, 0.1), 1.0}};
topts.useColors = false;
auto tshape = GaussianShape::ShapeInput(*m1, -1, topts);
CHECK(tshape.getCoords().size() == 16);
}
// we'll swap the features on the second shape so that the alignment has to
// be inverted
GaussianShape::ShapeInputOptions opts3;
opts3.customFeatures = GaussianShape::CustomFeatures{
{2, RDGeom::Point3D(-1.75978, 0.148897, 0), 1.0},
{1, RDGeom::Point3D(1.7571, -0.120174, 0.1), 1.0}};
auto m2 = ROMol(*m1);
auto shape3 = GaussianShape::ShapeInput(m2, -1, opts3);
CHECK(shape3.getCoords().size() == 24);
GaussianShape::ShapeOverlayOptions overlayOpts;
overlayOpts.optParam = 0.5;
RDGeom::Transform3D xform;
auto scores = AlignShape(shape2, shape3, &xform, overlayOpts);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(1.000, 0.001));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(1.000, 0.001));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.999, 0.001));
CHECK(shape3.getCoords()[0] > 0); // x coord of first atom
CHECK(shape3.getCoords()[3 * 4] < 0); // x coord of fourth atom
auto conf = m2.getConformer(-1);
MolTransforms::transformConformer(conf, xform);
CHECK(conf.getAtomPos(0).x > 0);
CHECK(conf.getAtomPos(3).x < 0);
}
SECTION("using molecules") {
GaussianShape::ShapeInputOptions opts2;
opts2.customFeatures = GaussianShape::CustomFeatures{
{1, RDGeom::Point3D(-1.75978, 0.148897, 0), 1.0},
{2, RDGeom::Point3D(1.7571, -0.120174, 0.1), 1.0}};
auto m2 = ROMol(*m1);
// we'll swap the features on the second shape so that the alignment has to
// be inverted
GaussianShape::ShapeInputOptions opts3;
opts3.customFeatures = GaussianShape::CustomFeatures{
{2, RDGeom::Point3D(-1.75978, 0.148897, 0), 1.0},
{1, RDGeom::Point3D(1.7571, -0.120174, 0.1), 1.0}};
GaussianShape::ShapeOverlayOptions overlayOpts;
overlayOpts.optParam = 0.5;
std::vector<float> matrix(12, 0.0);
auto scores = AlignMolecule(*m1, m2, opts2, opts3, nullptr, overlayOpts);
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(1.000, 0.001));
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(1.000, 0.001));
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.999, 0.001));
auto conf = m2.getConformer(-1);
CHECK(conf.getAtomPos(0).x > 0);
CHECK(conf.getAtomPos(3).x < 0);
}
}
TEST_CASE("Non-standard radii") {
auto m1 =
"[Xe]c1ccccc1 |(0.392086,-2.22477,0.190651;0.232269,-1.38667,0.118385;-1.06274,-0.918982,0.0342466;-1.26098,0.446053,-0.0811879;-0.244035,1.36265,-0.11691;1.05134,0.875929,-0.031248;1.28797,-0.499563,0.0864097),atomProp:0.dummyLabel.*|"_smiles;
GaussianShape::ShapeInputOptions shapeOpts;
shapeOpts.useColors = false;
shapeOpts.allCarbonRadii = false;
auto shape1 = GaussianShape::ShapeInput(*m1, -1, shapeOpts);
CHECK(shape1.getCoords().size() == 28);
CHECK_THAT(shape1.getShapeVolume(),
Catch::Matchers::WithinAbs(387.396, 0.005));
// mol1 with atom 4 with an N radius and a bigger Xe.
shapeOpts.atomRadii =
std::vector<std::pair<unsigned int, double>>{{0, 2.5}, {4, 1.55}};
shapeOpts.allCarbonRadii = false;
auto shape2 = GaussianShape::ShapeInput(*m1, -1, shapeOpts);
CHECK_THAT(shape2.getShapeVolume(),
Catch::Matchers::WithinAbs(425.051, 0.005));
// Corresponding pyridine derivative.
auto m2 =
"[Xe]c1ccncc1 |(0.392086,-2.22477,0.190651;0.232269,-1.38667,0.118385;-1.06274,-0.918982,0.0342466;-1.26098,0.446053,-0.0811879;-0.244035,1.36265,-0.11691;1.05134,0.875929,-0.031248;1.28797,-0.499563,0.0864097),atomProp:0.dummyLabel.*|"_smiles;
auto shape3 = GaussianShape::ShapeInput(*m2, -1, shapeOpts);
CHECK(shape3.getShapeVolume() == shape2.getShapeVolume());
}
TEST_CASE("Shape subset") {
auto m1 =
"c1ccc(-c2ccccc2)cc1 |(-3.26053,-0.0841607,-0.741909;-2.93383,0.123873,0.593407;-1.60713,0.377277,0.917966;-0.644758,0.654885,-0.0378428;0.743308,0.219134,0.168663;1.82376,1.0395,-0.0112769;3.01462,0.695405,0.613858;3.18783,-0.589771,1.09649;2.15761,-1.50458,1.01949;0.988307,-1.1313,0.385783;-1.1048,0.797771,-1.34022;-2.39754,0.435801,-1.69921)|"_smiles;
REQUIRE(m1);
GaussianShape::ShapeInputOptions shapeOpts;
shapeOpts.atomSubset = std::vector<unsigned int>{0, 1, 2, 3, 10, 11};
auto partShape = GaussianShape::ShapeInput(*m1, -1, shapeOpts);
CHECK(partShape.getCoords().size() == 28);
CHECK_THAT(partShape.getShapeVolume(),
Catch::Matchers::WithinAbs(261.166, 0.005));
CHECK_THAT(partShape.getColorVolume(),
Catch::Matchers::WithinAbs(5.316, 0.005));
shapeOpts.atomSubset.clear();
auto wholeShape = GaussianShape::ShapeInput(*m1, -1, shapeOpts);
CHECK(wholeShape.getCoords().size() == 56);
CHECK_THAT(wholeShape.getShapeVolume(),
Catch::Matchers::WithinAbs(556.266, 0.005));
CHECK_THAT(wholeShape.getColorVolume(),
Catch::Matchers::WithinAbs(10.631, 0.005));
}
// These are LOBSTER structures 437_A_355, YIW_A_1353, LSA_A_503, SU0_A_263,
// VHC_A_1, 40Z_A_301, 0J8_A_1401, 5QQ_A_1401, 054_A_578, 053_A_578
// respectively. LOBSTER is published
// https://doi.org/10.1007/s10822-024-00581-1 from the Rarey and BioSolveIT
// group.
std::
vector<std::string>
lobstersText =
{
"CC(C)(C)c1cc(NC(=O)Nc2cccc3ccccc23)n(-c2ccc(CO)cc2)n1 |(4.1858,1.2187,12.6749;3.4917,2.128,11.6409;4.5532,3.0576,11.0244;2.9098,1.2685,10.5255;2.3016,2.8612,12.2277;1.3306,2.2548,13.0356;0.4334,3.2937,13.302;-0.7316,3.3518,14.0275;-0.978,2.5581,15.0932;-0.147,1.7378,15.4901;-2.1645,2.6924,15.6938;-2.626,2.0319,16.8298;-1.8457,2.1016,18.0083;-2.2654,1.5179,19.2117;-3.4858,0.8316,19.2546;-4.2646,0.7339,18.1056;-5.4724,0.0393,18.1895;-6.2813,-0.103,17.0695;-5.8756,0.4693,15.8666;-4.6776,1.1996,15.7839;-3.8502,1.3304,16.8984;0.8824,4.3943,12.6425;0.3828,5.6491,12.5902;-0.1458,6.3636,13.6677;-0.6231,7.6713,13.5508;-0.5802,8.3258,12.3036;-1.1083,9.738,12.1288;-0.2929,10.4745,11.2057;-0.0699,7.6584,11.21;0.4028,6.3542,11.3691;2.0606,4.1572,11.9546)|",
"CC(C)c1nnc2ccc(Sc3ccccc3CNC(=O)Nc3cc(C(C)(C)C)nn3-c3ccccc3)cn12 |(-2.677,-1.147,25.057;-1.383,-1.713,24.501;-0.937,-2.645,25.64;-1.654,-2.496,23.218;-1.533,-3.804,23.035;-1.814,-4.126,21.82;-2.125,-3.054,21.15;-2.484,-2.842,19.8;-2.737,-1.521,19.397;-2.633,-0.45,20.292;-2.934,1.2,19.817;-4.086,0.989,18.473;-5.306,0.233,18.595;-6.145,0.057,17.523;-5.802,0.63,16.3;-4.651,1.427,16.155;-3.805,1.585,17.237;-2.532,2.33,17.092;-2.317,3.124,15.849;-1.208,2.896,15.139;-0.293,2.051,15.461;-0.934,3.704,14.164;0.194,3.633,13.319;1.093,2.626,13.06;2.064,3.138,12.153;3.254,2.438,11.547;2.765,1.497,10.446;4.297,3.407,10.979;3.923,1.592,12.556;1.755,4.357,11.974;0.557,4.674,12.592;0.026,5.998,12.604;-0.011,6.769,11.405;-0.52,8.07,11.439;-0.946,8.61,12.635;-0.914,7.879,13.819;-0.396,6.572,13.782;-2.292,-0.677,21.581;-2.025,-1.986,22.016)|",
"O=S1(=O)N=C(O)c2ccccc21 |(-5.7089,1.0252,18.4004;-6.3943,1.0779,17.1227;-7.8251,0.8757,17.0645;-5.6185,0.1418,15.9961;-4.8784,0.7874,15.1629;-4.3563,0.192,14.2273;-5.1972,2.2654,15.1438;-4.6774,3.2806,14.3416;-5.0543,4.5982,14.6171;-5.8378,4.8973,15.7426;-6.3084,3.8774,16.5767;-5.9587,2.5637,16.2652)|",
"COc1ccc2c(CC(=O)Nc3ccc(S(N)(=O)=O)cc3)cc(=O)oc2c1 |(-2.8164,14.7062,11.3592;-3.6624,13.537,11.2643;-3.2822,12.2442,11.6717;-3.8543,11.0312,10.9854;-3.454,9.7632,11.3397;-2.4737,9.5893,12.4478;-2.0245,8.2354,12.8485;-2.4284,7.0271,12.0534;-3.4577,6.2276,12.8381;-4.6459,6.5341,12.9744;-2.9341,5.1799,13.652;-3.8043,4.2751,14.3593;-3.0353,3.1465,14.8909;-3.6696,2.1888,15.6247;-5.1279,2.3197,15.867;-5.8526,1.0165,16.8148;-5.1597,-0.2684,16.1766;-7.2644,1.0484,16.6895;-5.3596,1.2234,18.1529;-5.863,3.3573,15.3579;-5.1676,4.4008,14.5479;-1.2733,8.1678,14.0258;-0.8262,9.3446,14.6451;-0.1397,9.3025,15.6623;-1.0563,10.6342,14.1829;-1.9446,10.7231,13.0793;-2.3948,12.0761,12.7036)|",
"Cc1cc(C)c2cc1C(=O)NCCCOc1cccc(c1)Sc1cc-2nc(N)n1 |(73.8435,34.0723,26.5156;72.3815,34.1628,26.0388;71.6766,32.9652,25.9327;70.3451,32.9485,25.4823;69.6347,31.5936,25.3834;69.7284,34.1492,25.0935;70.4497,35.3542,25.1896;71.7747,35.3826,25.6823;72.4585,36.7442,25.8391;73.1172,37.0066,26.8611;72.1633,37.6292,24.8945;72.7135,39.0047,24.8716;71.7025,40.047,24.3677;71.3796,39.8568,22.8758;70.4153,38.7939,22.9499;69.8006,38.3095,21.8404;70.2406,38.4827,20.5148;69.5441,37.8479,19.489;68.407,37.0853,19.7393;68.0155,36.8722,21.0628;68.6923,37.5061,22.0951;66.5592,35.8914,21.365;66.9463,35.055,22.8617;68.2573,34.8374,23.3077;68.4281,34.247,24.558;67.3421,33.831,25.243;66.0874,33.9792,24.7404;65.0509,33.4765,25.428;65.8935,34.6016,23.5436)|",
"Cc1c2c(n3c1CCN(Cc1ccco1)c1cc(C(N)=O)c(Cl)cc1-3)CC(C)(C)CC2=O |(74.8244,36.0896,26.0638;73.6879,35.186,25.6743;73.8428,33.8098,25.2429;72.5848,33.3216,24.9555;71.6743,34.332,25.2034;72.3429,35.4561,25.6334;71.7112,36.7663,26.0021;70.9895,37.4573,24.8514;70.1361,36.585,24.0367;69.7698,37.0101,22.6942;70.4319,38.0048,21.8175;70.8144,39.2778,21.9818;71.3289,39.7044,20.7598;71.23,38.6778,19.9306;70.6882,37.6078,20.5486;69.5068,35.3697,24.5106;68.1241,35.239,24.3599;67.4459,34.0963,24.7908;65.9689,34.0392,24.5456;65.1976,33.5111,25.4835;65.5111,34.4884,23.4842;68.1905,33.0839,25.4012;67.4671,31.5959,25.9018;69.5536,33.1987,25.5786;70.2398,34.3166,25.103;72.2711,31.9512,24.4312;73.4524,31.2236,23.7805;73.1395,29.7332,23.6933;73.6857,31.7544,22.3618;74.7055,31.4432,24.6434;74.982,32.872,25.0594;76.1434,33.2319,25.2012)|",
"O=[N+]([O-])c1cccc(CNc2nc(C(F)(F)F)nc3ncc(-c4cnn(C5CCNCC5)c4)cc23)c1 |(-1.438,-13.226,20.761;-2.668,-13.702,20.695;-3.225,-14.278,21.715;-3.449,-13.606,19.5;-4.865,-13.703,19.611;-5.66,-13.615,18.465;-5.057,-13.44,17.21;-3.642,-13.329,17.113;-2.985,-13.176,15.754;-3.208,-14.453,15.102;-2.183,-15.47,15.231;-1.174,-15.247,16.105;-0.207,-16.166,16.24;0.913,-15.872,17.233;1.607,-16.968,17.501;1.653,-14.932,16.679;0.431,-15.434,18.382;-0.155,-17.307,15.532;-1.134,-17.587,14.626;-1.051,-18.759,13.937;-2.004,-19.081,13.034;-3.067,-18.213,12.807;-4.134,-18.597,11.829;-4.958,-17.727,11.105;-5.807,-18.503,10.373;-5.518,-19.836,10.648;-6.22,-21.008,10.059;-7.613,-21.132,10.635;-8.385,-22.302,9.993;-8.344,-22.271,8.528;-7.465,-21.299,7.857;-6.163,-20.922,8.548;-4.513,-19.902,11.543;-3.18,-16.987,13.52;-2.183,-16.667,14.458;-2.84,-13.422,18.253)|",
"Fc1ccc(-c2cnc3nnc(C(F)(F)c4ccc5ncccc5c4)n3n2)cc1 |(-8.9341,-13.5345,15.2941;-7.8624,-13.6169,16.1019;-8.0323,-13.604,17.4747;-6.927,-13.6949,18.3088;-5.6563,-13.7867,17.7687;-4.5363,-13.8655,18.5962;-4.6798,-14.0273,19.9861;-3.5769,-14.1,20.7695;-2.3321,-14.0007,20.1927;-1.0691,-14.049,20.7489;-0.1759,-13.9183,19.7353;-0.8597,-13.8052,18.581;-0.2877,-13.6262,17.1957;1.0077,-13.3321,17.3153;-0.932,-12.6293,16.5758;-0.3953,-14.89,16.3793;0.3679,-16.0044,16.7321;0.2944,-17.1762,15.9833;-0.5658,-17.2197,14.8472;-0.6484,-18.3768,14.1035;-1.4592,-18.4618,13.0237;-2.2424,-17.3707,12.6345;-2.1971,-16.184,13.3658;-1.3263,-16.1253,14.5037;-1.2424,-14.9308,15.2731;-2.1994,-13.8502,18.8681;-3.3091,-13.7741,18.0449;-5.4848,-13.7974,16.3944;-6.5897,-13.7169,15.5608)|",
"Nc1cc(Cn2c(C(=O)O)c(-n3c(=O)[nH]c4cscc4c3=O)c3cc(C(F)(F)F)ccc32)ccn1 |(29.5323,45.8636,43.104;28.8655,44.7866,42.5152;27.6833,44.3191,43.0964;27.0256,43.2322,42.5026;25.769,42.5937,43.0062;25.1539,43.3311,44.0877;24.4106,44.4698,43.8927;24.1642,45.0354,42.563;23.9009,46.3564,42.5697;24.0503,44.4039,41.5301;23.9917,44.8584,45.1623;23.2031,45.9947,45.3911;21.8869,45.9724,44.834;21.3954,44.9674,44.2853;21.1644,47.1439,44.9613;21.7067,48.2779,45.5571;21.0928,49.4972,45.696;22.1115,50.6335,46.4923;23.3622,49.4671,46.6456;23.0143,48.2553,46.0988;23.8418,47.054,46.0577;24.9808,47.0096,46.5158;24.5049,43.9455,46.1213;24.4196,43.8288,47.5202;25.0722,42.7404,48.1424;25.0173,42.5333,49.6217;24.5189,41.3151,49.9335;26.2412,42.5919,50.1868;24.2862,43.4348,50.3041;25.7949,41.7994,47.3917;25.8973,41.9011,46.0107;25.2386,42.9843,45.4045;27.5907,42.6386,41.3749;28.7511,43.1735,40.8552;29.3972,44.2337,41.3948)|",
"Nc1cc(Cn2c(C(=O)O)c(-c3ccc[nH]c3=O)c3cc(C(F)(F)F)ccc32)ccn1 |(29.4427,46.05,43.4472;28.8505,44.9996,42.7518;27.6724,44.4529,43.2669;27.0734,43.3813,42.5955;25.8189,42.6999,43.052;25.2163,43.3606,44.2071;24.4581,44.4943,44.0838;24.1931,45.0901,42.7808;24.1204,46.4333,42.7998;23.8923,44.4562,41.7912;24.0461,44.8297,45.3707;23.2097,45.9536,45.7413;23.5041,46.7963,46.7667;22.6287,47.9099,47.1092;21.5037,48.1244,46.3872;21.1624,47.2732,45.3588;21.9195,46.1834,44.9822;21.563,45.4266,44.0641;24.5817,43.8725,46.2645;24.4958,43.6833,47.6538;25.1452,42.5857,48.2292;25.0531,42.3252,49.6881;24.5934,41.0857,49.9422;26.2447,42.4118,50.3073;24.2575,43.1875,50.3428;25.8943,41.6963,47.4464;25.9994,41.8646,46.0771;25.324,42.958,45.5057;27.6791,42.8877,41.4291;28.8324,43.5019,40.9842;29.4267,44.5435,41.6155)|",
};
std::vector<std::shared_ptr<RWMol>> lobsters;
void initLobsters() {
if (lobsters.empty()) {
for (const auto &text : lobstersText) {
lobsters.emplace_back(v2::SmilesParse::MolFromSmiles(text));
REQUIRE(lobsters.back());
}
}
}
TEST_CASE("Tversky") {
// Score the PDB overlay.
auto pdb_trp_3tmn =
R"(N[C@H](C(=O)O)Cc1c[nH]c2c1cccc2 |(37.935,40.394,-3.825;39.119,39.593,-4.13;38.758,38.486,-5.101;37.526,38.337,-5.395;39.716,37.852,-5.605;39.883,39.108,-2.906;39.086,38.098,-2.209;38.093,38.363,-1.34;37.565,37.179,-0.881;38.201,36.136,-1.471;39.193,36.684,-2.308;40.015,35.812,-3.036;39.846,34.441,-2.913;38.844,33.933,-2.075;38.015,34.752,-1.333),wU:1.0|)"_smiles;
REQUIRE(pdb_trp_3tmn);
auto pdb_0zn_1tmn =
R"([C@H](CCc1ccccc1)(C(=O)O)N[C@H](C(=O)N[C@H](C(=O)O)Cc1c[nH]c2c1cccc2)CC(C)C |(35.672,41.482,-5.722;34.516,40.842,-6.512;34.843,39.355,-6.7;33.819,38.475,-7.45;33.825,38.414,-8.838;32.951,37.553,-9.53;32.064,36.747,-8.81;32.096,36.799,-7.402;32.985,37.656,-6.73;35.934,42.778,-6.452;36.833,42.858,-7.316;35.175,43.735,-6.275;35.516,41.561,-4.218;36.707,42.096,-3.513;38.055,41.449,-3.859;39.11,42.138,-3.959;37.975,40.129,-3.983;39.134,39.277,-4.298;38.825,38.04,-5.133;37.649,37.934,-5.605;39.788,37.369,-5.652;39.985,38.945,-3.037;39.221,37.953,-2.164;37.934,37.961,-1.823;37.579,36.695,-1.314;38.63,35.975,-1.286;39.736,36.771,-1.642;41.052,36.341,-1.48;41.213,35.042,-0.964;40.095,34.215,-0.69;38.765,34.665,-0.855;36.506,41.966,-2.002;37.6,42.757,-1.31;37.546,44.225,-1.728;37.408,42.58,0.19),wD:0.0,wU:17.21,13.33|)"_smiles;
REQUIRE(pdb_0zn_1tmn);
GaussianShape::ShapeOverlayOptions ovlyOpts;
GaussianShape::ShapeInputOptions inOpts;
auto tan_scores = GaussianShape::ScoreMolecule(*pdb_0zn_1tmn, *pdb_trp_3tmn);
CHECK_THAT(tan_scores[0], Catch::Matchers::WithinAbs(0.307, 0.001));
CHECK_THAT(tan_scores[1], Catch::Matchers::WithinAbs(0.349, 0.001));
CHECK_THAT(tan_scores[2], Catch::Matchers::WithinAbs(0.265, 0.001));
ovlyOpts.simAlpha = 0.95;
ovlyOpts.simBeta = 0.05;
auto ref_tversky = GaussianShape::ScoreMolecule(*pdb_0zn_1tmn, *pdb_trp_3tmn,
inOpts, inOpts, ovlyOpts);
CHECK_THAT(ref_tversky[0], Catch::Matchers::WithinAbs(0.362, 0.001));
CHECK_THAT(ref_tversky[1], Catch::Matchers::WithinAbs(0.383, 0.001));
CHECK_THAT(ref_tversky[2], Catch::Matchers::WithinAbs(0.342, 0.001));
ovlyOpts.simAlpha = 0.05;
ovlyOpts.simBeta = 0.95;
auto fit_tversky = GaussianShape::ScoreMolecule(*pdb_0zn_1tmn, *pdb_trp_3tmn,
inOpts, inOpts, ovlyOpts);
CHECK_THAT(fit_tversky[0], Catch::Matchers::WithinAbs(0.668, 0.001));
CHECK_THAT(fit_tversky[1], Catch::Matchers::WithinAbs(0.795, 0.001));
CHECK_THAT(fit_tversky[2], Catch::Matchers::WithinAbs(0.540, 0.001));
}
#ifdef RDK_USE_BOOST_SERIALIZATION
TEST_CASE("Serialization") {
auto m1 =
"[H]c1c([H])c([H])c([H])c([H])c1[H] |(-2.06264,-0.844763,-0.0261403;-1.04035,-0.481453,-0.0114878;-0.00743655,-1.41861,-0.0137121;-0.215455,-2.47997,-0.0295909;1.29853,-0.949412,0.00507497;2.12524,-1.65277,0.00390664;1.58501,0.395878,0.0254188;2.61997,0.704365,0.0394811;0.550242,1.31385,0.0273741;0.783172,2.37039,0.0434262;-0.763786,0.88847,0.00908113;-1.60557,1.58532,0.0100194)|"_smiles;
REQUIRE(m1);
GaussianShape::ShapeInputOptions shapeOpts;
shapeOpts.allCarbonRadii = false;
auto shape = GaussianShape::ShapeInput(*m1, -1, shapeOpts);
auto istr = shape.toString();
GaussianShape::ShapeInput shape2(istr);
CHECK(shape2.getCoords() == shape.getCoords());
CHECK(shape2.getTypes() == shape.getTypes());
CHECK(shape2.getNumAtoms() == shape.getNumAtoms());
CHECK(shape2.getNumFeatures() == shape.getNumFeatures());
CHECK(shape2.getNormalized() == shape.getNormalized());
CHECK(shape2.calcExtremes() == shape.calcExtremes());
CHECK(shape2.calcCanonicalRotation() == shape.calcCanonicalRotation());
CHECK(shape2.calcCanonicalTranslation() == shape.calcCanonicalTranslation());
CHECK(*shape2.getCarbonRadii() == *shape.getCarbonRadii());
CHECK_THAT(shape2.getShapeVolume(),
Catch::Matchers::WithinAbs(261.0145, 0.005));
CHECK_THAT(shape2.getColorVolume(), Catch::Matchers::WithinAbs(5.316, 0.005));
// Check it handles the case of no d_carbonRadii in the ShapeInput.
shapeOpts.allCarbonRadii = true;
auto shape3 = GaussianShape::ShapeInput(*m1, -1, shapeOpts);
auto istr2 = shape3.toString();
GaussianShape::ShapeInput shape4(istr2);
CHECK(!shape4.getCarbonRadii());
}
#endif
#ifdef RDK_TEST_MULTITHREADED
#include <thread>
#include <future>
namespace {
void runblock(
const std::vector<std::pair<std::shared_ptr<RWMol>, std::shared_ptr<RWMol>>>
&pairs,
unsigned int count, unsigned int idx,
std::vector<std::array<double, 3>> &test) {
for (unsigned int i = idx; i < pairs.size(); i += count) {
auto p1 = *pairs[i].first;
auto p2 = *pairs[i].second;
test[i] = GaussianShape::AlignMolecule(p1, p2);
}
}
} // namespace
TEST_CASE("multithreaded") {
initLobsters();
constexpr size_t numRepeats = 1000;
std::vector<std::pair<std::shared_ptr<RWMol>, std::shared_ptr<RWMol>>> pairs;
for (auto r = 0u; r < numRepeats; ++r) {
for (unsigned int i = 0; i < 10; i += 2) {
unsigned int l1 = i;
unsigned int l2 = i + 1;
pairs.emplace_back(lobsters[l1], lobsters[l2]);
}
}
// generate reference data
std::cerr << " generating reference data" << std::endl;
auto start = std::chrono::steady_clock::now();
std::vector<std::array<double, 3>> ref;
for (auto pr : pairs) {
auto p1 = *pr.first;
auto p2 = *pr.second;
auto norm_scores = GaussianShape::AlignMolecule(p1, p2);
ref.push_back(norm_scores);
}
auto end = std::chrono::steady_clock::now();
auto ref_time =
std::chrono::duration_cast<std::chrono::milliseconds>(end - start)
.count();
std::cerr << " reference time: " << ref_time << "ms" << std::endl;
// run the same calculations in parallel and check they match the reference
std::cerr << " parallel loop" << std::endl;
std::vector<std::array<double, 3>> test(pairs.size());
std::vector<unsigned int> idx(pairs.size());
std::iota(idx.begin(), idx.end(), 0);
auto start2 = std::chrono::steady_clock::now();
std::vector<std::future<void>> tg;
unsigned int count = 4;
for (unsigned int i = 0; i < count; ++i) {
tg.emplace_back(std::async(std::launch::async, runblock, pairs, count, i,
std::ref(test)));
}
for (auto &fut : tg) {
fut.get();
}
tg.clear();
auto end2 = std::chrono::steady_clock::now();
auto test_time =
std::chrono::duration_cast<std::chrono::milliseconds>(end2 - start2)
.count();
std::cerr << " parallel time: " << test_time << "ms" << std::endl;
CHECK(test == ref);
}
#endif

View File

@@ -247,8 +247,10 @@ bool computePrincipalAxesAndMomentsFromGyrationMatrix(
return true;
}
auto origin = computeCentroid(conf, ignoreHs, weights);
// Note that this may not return a right-handed axis.
bool res = getEigenValEigenVectFromCovMat(conf, axes, moments, origin,
ignoreHs, true, weights);
if (res && !weights) {
conf.getOwningMol().setProp(axesPropName, axes, true);
conf.getOwningMol().setProp(momentsPropName, moments, true);
@@ -259,7 +261,8 @@ bool computePrincipalAxesAndMomentsFromGyrationMatrix(
RDGeom::Transform3D *computeCanonicalTransform(const Conformer &conf,
const RDGeom::Point3D *center,
bool normalizeCovar,
bool ignoreHs) {
bool ignoreHs,
double *eigenValues) {
constexpr unsigned int DIM = 3;
RDGeom::Point3D origin;
if (!center) {
@@ -271,8 +274,8 @@ RDGeom::Transform3D *computeCanonicalTransform(const Conformer &conf,
auto *trans = new RDGeom::Transform3D;
trans->setToIdentity();
// if we have a single atom system we don't need to do anyhting setting
// translation is sufficient
// If we have a single atom system we don't need to do anything setting
// translation is sufficient.
if (nAtms > 1) {
Eigen::Matrix3d eigVecs;
Eigen::Vector3d eigVals;
@@ -291,6 +294,9 @@ RDGeom::Transform3D *computeCanonicalTransform(const Conformer &conf,
});
for (unsigned int col = 0; col < DIM; ++col) {
unsigned int colSorted = eigValsSorted.at(col).first;
if (eigenValues) {
eigenValues[colSorted] = eigValsSorted.at(col).second;
}
for (unsigned int row = 0; row < DIM; ++row) {
trans->setVal(col, row, eigVecs(row, colSorted));
}
@@ -327,7 +333,8 @@ RDGeom::Transform3D *computeCanonicalTransform(const Conformer &conf,
RDGeom::Transform3D *computeCanonicalTransform(const Conformer &conf,
const RDGeom::Point3D *center,
bool normalizeCovar,
bool ignoreHs) {
bool ignoreHs,
double *retEigenValues) {
RDGeom::Point3D origin;
if (!center) {
origin = computeCentroid(conf, ignoreHs);
@@ -339,7 +346,7 @@ RDGeom::Transform3D *computeCanonicalTransform(const Conformer &conf,
// find the eigen values and eigen vectors for the covMat
RDNumeric::DoubleMatrix eigVecs(3, 3);
RDNumeric::DoubleVector eigVals(3);
// if we have a single atom system we don't need to do anyhting other than
// if we have a single atom system we don't need to do anything other than
// setting translation
// translation
unsigned int nAtms = conf.getNumAtoms();
@@ -348,11 +355,16 @@ RDGeom::Transform3D *computeCanonicalTransform(const Conformer &conf,
// set the translation
origin *= -1.0;
// trans->SetTranslation(origin);
// if we have a single atom system we don't need to do anyhting setting
// if we have a single atom system we don't need to do anything setting
// translation is sufficient
if (nAtms > 1) {
RDNumeric::EigenSolvers::powerEigenSolver(3, *covMat, eigVals, eigVecs,
conf.getNumAtoms());
if (retEigenValues) {
retEigenValues[0] = eigVals[0];
retEigenValues[1] = eigVals[1];
retEigenValues[2] = eigVals[2];
}
// deal with zero eigen value systems
unsigned int i, j, dim = 3;
for (i = 0; i < 3; ++i) {

View File

@@ -110,10 +110,13 @@ computePrincipalAxesAndMomentsFromGyrationMatrix(
\param normalizeCovar Normalize the covariance matrix with the number of
atoms
\param ignoreHs Optionally ignore hydrogens
\param eigenVals Optionally return the values for the eigenvalues,
sorted in ascending order. If given, must be big enough to hold 3 values.
*/
RDKIT_MOLTRANSFORMS_EXPORT RDGeom::Transform3D *computeCanonicalTransform(
const RDKit::Conformer &conf, const RDGeom::Point3D *center = nullptr,
bool normalizeCovar = false, bool ignoreHs = true);
bool normalizeCovar = false, bool ignoreHs = true,
double *eigenVals = nullptr);
//! Transform the conformation using the specified transformation
RDKIT_MOLTRANSFORMS_EXPORT void transformConformer(