mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
Gaussian shape overlays (#9095)
This commit is contained in:
@@ -78,6 +78,7 @@ add_subdirectory(MolAlign)
|
||||
add_subdirectory(MolChemicalFeatures)
|
||||
add_subdirectory(ShapeHelpers)
|
||||
add_subdirectory(MolCatalog)
|
||||
add_subdirectory(GaussianShape)
|
||||
|
||||
add_subdirectory(MolDraw2D)
|
||||
|
||||
|
||||
13
Code/GraphMol/GaussianShape/CMakeLists.txt
Normal file
13
Code/GraphMol/GaussianShape/CMakeLists.txt
Normal file
@@ -0,0 +1,13 @@
|
||||
rdkit_library(GaussianShape
|
||||
GaussianShape.cpp ShapeInput.cpp SingleConformerAlignment.cpp
|
||||
SHARED LINK_LIBRARIES SmilesParse SubstructMatch MolTransforms)
|
||||
target_compile_definitions(GaussianShape PRIVATE RDKIT_GAUSSIANSHAPE_BUILD)
|
||||
|
||||
rdkit_headers(GaussianShape.h ShapeInput.h ShapeOverlayOptions.h)
|
||||
|
||||
rdkit_catch_test(testGaussianShape catch_tests.cpp LINK_LIBRARIES GaussianShape
|
||||
FileParsers MolAlign MolTransforms)
|
||||
|
||||
if(RDK_BUILD_PYTHON_WRAPPERS)
|
||||
add_subdirectory(Wrap)
|
||||
endif()
|
||||
500
Code/GraphMol/GaussianShape/GaussianShape.cpp
Normal file
500
Code/GraphMol/GaussianShape/GaussianShape.cpp
Normal file
@@ -0,0 +1,500 @@
|
||||
//
|
||||
// Copyright (C) 2026 David Cosgrove and other RDKit contributors
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
// The contents are covered by the terms of the BSD license
|
||||
// which is included in the file license.txt, found at the root
|
||||
// of the RDKit source tree.
|
||||
//
|
||||
// Original author: David Cosgrove (CozChemIx Limited)
|
||||
//
|
||||
// This is an implementation of the Gaussian overlap molecular overlay
|
||||
// method of Grant, Pickup and Gallardo.
|
||||
// J. Comp. Chem., 17, 1653-1666 (1996)
|
||||
// https://doi.org/10.1002/(SICI)1096-987X(19961115)17:14%3C1653::AID-JCC7%3E3.0.CO;2-K
|
||||
// It uses implementation ideas and some code from the PubChem implementation
|
||||
// https://github.com/ncbi/pubchem-align3d/blob/main/shape_neighbor.cpp.
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include <Geometry/Transform3D.h>
|
||||
#include <GraphMol/ROMol.h>
|
||||
#include <GraphMol/GaussianShape/GaussianShape.h>
|
||||
|
||||
#include "GraphMol/SmilesParse/SmilesWrite.h"
|
||||
|
||||
#include <GraphMol/GaussianShape/ShapeInput.h>
|
||||
#include <GraphMol/GaussianShape/SingleConformerAlignment.h>
|
||||
#include <GraphMol/MolTransforms/MolTransforms.h>
|
||||
#include <GraphMol/SmilesParse/SmilesParse.h>
|
||||
|
||||
namespace RDKit {
|
||||
namespace GaussianShape {
|
||||
|
||||
namespace {
|
||||
// Compute final overlay transform, which applies fitShape's
|
||||
// initial canonical transformation, followed by the overlay transform and
|
||||
// finally the inverse of refShape's initial canonical transformation.
|
||||
RDGeom::Transform3D computeFinalTransform(
|
||||
const std::array<double, 3> &inRefTrans,
|
||||
const std::array<double, 9> &inRefRot,
|
||||
const std::array<double, 3> &inFitTrans,
|
||||
const std::array<double, 9> &inFitRot, RDGeom::Transform3D &ovXform) {
|
||||
// Move to fitShape's initial centroid and principal axes
|
||||
RDGeom::Transform3D transform0;
|
||||
transform0.SetTranslation(
|
||||
RDGeom::Point3D{inFitTrans[0], inFitTrans[1], inFitTrans[2]});
|
||||
|
||||
RDGeom::Transform3D transform1;
|
||||
transform1.setValUnchecked(0, 0, inFitRot[0]);
|
||||
transform1.setValUnchecked(0, 1, inFitRot[1]);
|
||||
transform1.setValUnchecked(0, 2, inFitRot[2]);
|
||||
transform1.setValUnchecked(1, 0, inFitRot[3]);
|
||||
transform1.setValUnchecked(1, 1, inFitRot[4]);
|
||||
transform1.setValUnchecked(1, 2, inFitRot[5]);
|
||||
transform1.setValUnchecked(2, 0, inFitRot[6]);
|
||||
transform1.setValUnchecked(2, 1, inFitRot[7]);
|
||||
transform1.setValUnchecked(2, 2, inFitRot[8]);
|
||||
|
||||
RDGeom::Transform3D toRefRefFrame;
|
||||
// Rotate by the inverse of the ref shape's canonical rotation and
|
||||
// translate by the negative of its canonical translation.
|
||||
toRefRefFrame.setValUnchecked(0, 0, inRefRot[0]);
|
||||
toRefRefFrame.setValUnchecked(0, 1, inRefRot[3]);
|
||||
toRefRefFrame.setValUnchecked(0, 2, inRefRot[6]);
|
||||
toRefRefFrame.setValUnchecked(0, 3, -inRefTrans[0]);
|
||||
toRefRefFrame.setValUnchecked(1, 0, inRefRot[1]);
|
||||
toRefRefFrame.setValUnchecked(1, 1, inRefRot[4]);
|
||||
toRefRefFrame.setValUnchecked(1, 2, inRefRot[7]);
|
||||
toRefRefFrame.setValUnchecked(1, 3, -inRefTrans[1]);
|
||||
toRefRefFrame.setValUnchecked(2, 0, inRefRot[2]);
|
||||
toRefRefFrame.setValUnchecked(2, 1, inRefRot[5]);
|
||||
toRefRefFrame.setValUnchecked(2, 2, inRefRot[8]);
|
||||
toRefRefFrame.setValUnchecked(2, 3, -inRefTrans[2]);
|
||||
|
||||
auto finalTransform = toRefRefFrame * ovXform * transform1 * transform0;
|
||||
return finalTransform;
|
||||
}
|
||||
|
||||
// Return the original transformation quaternion for the given index.
|
||||
// Different optimisation modes have different numbers of starting
|
||||
// orientations to try. In order these are no transformation, rotate 180
|
||||
// degrees about each axis and rotate +/- 45 degrees about 2 axes at a time.
|
||||
std::array<double, 4> getInitialRotationPlain(
|
||||
int index, const ShapeInput &refShape, const ShapeInput &fitShape,
|
||||
const RDGeom::Point3D &refDisp, const ShapeOverlayOptions &overlayOpts,
|
||||
double &score) {
|
||||
static const double sinpi_4 = std::sin(std::atan(1.0));
|
||||
const static std::vector<std::array<double, 4>> quats{
|
||||
{1.0, 0.0, 0.0, 0.0}, {0.0, 1.0, 0.0, 0.0},
|
||||
{0.0, 0.0, 1.0, 0.0}, {0.0, 0.0, 0.0, 1.0},
|
||||
{sinpi_4, -sinpi_4, 0.0, 0.0}, {sinpi_4, sinpi_4, 0.0, 0.0},
|
||||
{0.0, 0.0, -sinpi_4, sinpi_4}, {0.0, 0.0, sinpi_4, sinpi_4},
|
||||
{sinpi_4, 0.0, 0.0, -sinpi_4}, {0.0, sinpi_4, sinpi_4, 0.0},
|
||||
{sinpi_4, 0.0, 0.0, sinpi_4}, {0.0, -sinpi_4, sinpi_4, 0.0},
|
||||
{sinpi_4, 0.0, sinpi_4, 0.0}, {0.0, sinpi_4, 0.0, sinpi_4},
|
||||
{0.0, -sinpi_4, 0.0, sinpi_4}, {sinpi_4, 0.0, -sinpi_4, 0.0}};
|
||||
bool useColor = overlayOpts.optimMode != OptimMode::SHAPE_ONLY;
|
||||
std::array<double, 7> quatTrans{
|
||||
quats[index][0], quats[index][1], quats[index][2], quats[index][3],
|
||||
refDisp[0], refDisp[1], refDisp[2]};
|
||||
SingleConformerAlignment sca(
|
||||
refShape.getCoords(), refShape.getTypes().data(),
|
||||
refShape.getCarbonRadii(), refShape.getNumAtoms(),
|
||||
refShape.getNumFeatures(), refShape.getShapeVolume(),
|
||||
refShape.getColorVolume(), fitShape.getCoords(),
|
||||
fitShape.getTypes().data(), fitShape.getCarbonRadii(),
|
||||
fitShape.getNumAtoms(), fitShape.getNumFeatures(),
|
||||
fitShape.getShapeVolume(), fitShape.getColorVolume(), quatTrans,
|
||||
overlayOpts.optimMode, overlayOpts.simAlpha, overlayOpts.simBeta,
|
||||
overlayOpts.optParam, overlayOpts.useDistCutoff, overlayOpts.distCutoff,
|
||||
overlayOpts.shapeConvergenceCriterion, overlayOpts.nSteps);
|
||||
auto scores = sca.calcScores(useColor);
|
||||
score = scores[0];
|
||||
return quats[index];
|
||||
}
|
||||
|
||||
// Return the initial transformation matrix in the manner of the PubChem
|
||||
// overlay code. Rotate 180 degrees about each axis, and then
|
||||
// add +/ ~25 degrees from that. It is not revealed where that
|
||||
// angle comes from.
|
||||
std::array<double, 4> getInitialRotationWiggle(
|
||||
int index, const ShapeInput &refShape, const ShapeInput &fitShape,
|
||||
const RDGeom::Point3D &refDisp, const ShapeOverlayOptions &overlayOpts,
|
||||
double &score) {
|
||||
const static double qrot1 = 0.977659114061,
|
||||
qrot = 0.210196709523; // 0.215 (un-normalized)
|
||||
const static std::vector<std::array<double, 4>> quats{
|
||||
{1.0, 0.0, 0.0, 0.0}, // 0 X, Y, Z
|
||||
{qrot1, qrot, 0.0, 0.0}, {qrot1, -qrot, 0.0, 0.0},
|
||||
{qrot1, 0.0, qrot, 0.0}, {qrot1, 0.0, -qrot, 0.0},
|
||||
{qrot1, 0.0, 0.0, qrot}, {qrot1, 0.0, 0.0, -qrot},
|
||||
{0.0, 1.0, 0.0, 0.0}, // 1 X, -Y, -Z
|
||||
{qrot, qrot1, 0.0, 0.0}, {qrot, -qrot1, 0.0, 0.0},
|
||||
{0.0, qrot1, qrot, 0.0}, {0.0, qrot1, -qrot, 0.0},
|
||||
{0.0, qrot1, 0.0, qrot}, {0.0, qrot1, 0.0, -qrot},
|
||||
{0.0, 0.0, 0.0, 1.0}, // 2 -X, -Y, Z
|
||||
{qrot, 0.0, 0.0, qrot1}, {qrot, 0.0, 0.0, -qrot1},
|
||||
{0.0, qrot, 0.0, qrot1}, {0.0, -qrot, 0.0, qrot1},
|
||||
{0.0, 0.0, qrot, qrot1}, {0.0, 0.0, -qrot, qrot1},
|
||||
{0.0, 0.0, 1.0, 0.0}, // 3 -X, Y, -Z
|
||||
{qrot, 0.0, qrot1, 0.0}, {qrot, 0.0, -qrot1, 0.0},
|
||||
{0.0, qrot, qrot1, 0.0}, {0.0, -qrot, qrot1, 0.0},
|
||||
{0.0, 0.0, qrot1, qrot}, {0.0, 0.0, qrot1, -qrot}};
|
||||
unsigned int start_quat = index * 7;
|
||||
unsigned int bestQuat = 0;
|
||||
double bestScore = 0.0;
|
||||
bool useColor = overlayOpts.optimMode != OptimMode::SHAPE_ONLY;
|
||||
std::array<double, 7> tmpQuatTrans{1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
|
||||
SingleConformerAlignment sca(
|
||||
refShape.getCoords(), refShape.getTypes().data(),
|
||||
refShape.getCarbonRadii(), refShape.getNumAtoms(),
|
||||
refShape.getNumFeatures(), refShape.getShapeVolume(),
|
||||
refShape.getColorVolume(), fitShape.getCoords(),
|
||||
fitShape.getTypes().data(), fitShape.getCarbonRadii(),
|
||||
fitShape.getNumAtoms(), fitShape.getNumFeatures(),
|
||||
fitShape.getShapeVolume(), fitShape.getColorVolume(), tmpQuatTrans,
|
||||
overlayOpts.optimMode, overlayOpts.simAlpha, overlayOpts.simBeta,
|
||||
overlayOpts.optParam, overlayOpts.useDistCutoff, overlayOpts.distCutoff,
|
||||
overlayOpts.shapeConvergenceCriterion, overlayOpts.nSteps);
|
||||
|
||||
for (unsigned int i = start_quat; i < start_quat + 7; ++i) {
|
||||
std::array<double, 7> quatTrans{quats[i][0], quats[i][1], quats[i][2],
|
||||
quats[i][3], refDisp[0], refDisp[1],
|
||||
refDisp[2]};
|
||||
sca.setQuatTrans(quatTrans);
|
||||
auto scores = sca.calcScores(useColor);
|
||||
if (scores[0] > bestScore) {
|
||||
bestScore = scores[0];
|
||||
bestQuat = i;
|
||||
}
|
||||
}
|
||||
score = bestScore;
|
||||
return quats[bestQuat];
|
||||
}
|
||||
|
||||
// Return the translation that puts the extreme of refShape at the
|
||||
// extreme of the fitShape along the appropriate axis.
|
||||
RDGeom::Point3D getInitialTranslation(int index, ShapeInput &refShape,
|
||||
ShapeInput fitShape) {
|
||||
auto getDisp = [](ShapeInput &shape, size_t i) -> RDGeom::Point3D {
|
||||
const double *coord =
|
||||
shape.getCoords().data() + shape.calcExtremes()[i] * 4;
|
||||
return RDGeom::Point3D(coord[0], coord[1], coord[2]);
|
||||
};
|
||||
RDGeom::Point3D disp;
|
||||
RDGeom::Point3D refDisp, fitDisp;
|
||||
switch (index) {
|
||||
case 1:
|
||||
refDisp = getDisp(refShape, 0);
|
||||
fitDisp = getDisp(fitShape, 0);
|
||||
disp = fitDisp - refDisp;
|
||||
break;
|
||||
case 2:
|
||||
refDisp = getDisp(refShape, 1);
|
||||
fitDisp = getDisp(fitShape, 1);
|
||||
disp = fitDisp - refDisp;
|
||||
break;
|
||||
case 3:
|
||||
refDisp = getDisp(refShape, 2);
|
||||
fitDisp = getDisp(fitShape, 2);
|
||||
disp = fitDisp - refDisp;
|
||||
break;
|
||||
case 4:
|
||||
refDisp = getDisp(refShape, 3);
|
||||
fitDisp = getDisp(fitShape, 3);
|
||||
disp = fitDisp - refDisp;
|
||||
break;
|
||||
case 5:
|
||||
refDisp = getDisp(refShape, 4);
|
||||
fitDisp = getDisp(fitShape, 4);
|
||||
disp = fitDisp - refDisp;
|
||||
break;
|
||||
case 6:
|
||||
refDisp = getDisp(refShape, 5);
|
||||
fitDisp = getDisp(fitShape, 5);
|
||||
disp = fitDisp - refDisp;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return disp;
|
||||
}
|
||||
|
||||
// This is how the PubChem code decides between ROTATE_180_WIGGLE and
|
||||
// ROTATE_45. I have no clue.
|
||||
unsigned int calculateQrat(const std::array<double, 3> &eigenValues) {
|
||||
double double_ev_oe[3]{eigenValues[1] + eigenValues[2] - eigenValues[0],
|
||||
eigenValues[0] + eigenValues[2] - eigenValues[1],
|
||||
eigenValues[0] + eigenValues[1] - eigenValues[2]};
|
||||
std::sort(double_ev_oe, double_ev_oe + 3, std::greater<double>());
|
||||
|
||||
const static double qrat_threshold = 0.7225; // 0.85*0.85;
|
||||
unsigned int qrat = 1000;
|
||||
unsigned int u_rqyx, u_rqzy;
|
||||
|
||||
if (double_ev_oe[1] > 0) {
|
||||
if (qrat_threshold < (double_ev_oe[1] / double_ev_oe[0])) {
|
||||
u_rqyx = 1;
|
||||
} else {
|
||||
u_rqyx = 0;
|
||||
}
|
||||
if (qrat_threshold < (double_ev_oe[2] / double_ev_oe[1])) {
|
||||
u_rqzy = 1;
|
||||
} else {
|
||||
u_rqzy = 0;
|
||||
}
|
||||
|
||||
qrat = u_rqyx + u_rqzy;
|
||||
}
|
||||
return qrat;
|
||||
}
|
||||
|
||||
StartMode decideStartModeFromEigenValues(ShapeInput &refShape,
|
||||
ShapeInput &fitShape) {
|
||||
// The PubChem code uses the moments of inertia for this, rather than the
|
||||
// canonical transformation.
|
||||
auto rqratwf = calculateQrat(refShape.calcMomentsOfInertia(true));
|
||||
auto fqratwf = calculateQrat(fitShape.calcMomentsOfInertia(true));
|
||||
StartMode startModeWF{StartMode::ROTATE_180_WIGGLE};
|
||||
if (rqratwf > 0 || fqratwf > 0) {
|
||||
startModeWF = StartMode::ROTATE_45;
|
||||
}
|
||||
return startModeWF;
|
||||
}
|
||||
|
||||
std::array<double, 3> alignShape(ShapeInput &refShape, ShapeInput &fitShape,
|
||||
RDGeom::Transform3D &bestXform,
|
||||
const ShapeOverlayOptions &overlayOpts) {
|
||||
unsigned int finalRotIndex = 1;
|
||||
auto startMode = overlayOpts.startMode;
|
||||
if (startMode == StartMode::A_LA_PUBCHEM) {
|
||||
startMode = decideStartModeFromEigenValues(refShape, fitShape);
|
||||
}
|
||||
|
||||
switch (startMode) {
|
||||
case StartMode::ROTATE_0:
|
||||
case StartMode::ROTATE_0_FRAGMENT:
|
||||
break;
|
||||
case StartMode::ROTATE_180:
|
||||
case StartMode::ROTATE_180_FRAGMENT:
|
||||
case StartMode::ROTATE_180_WIGGLE:
|
||||
finalRotIndex = 4;
|
||||
break;
|
||||
case StartMode::ROTATE_45:
|
||||
case StartMode::ROTATE_45_FRAGMENT:
|
||||
finalRotIndex = 16;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
unsigned int finalTransIndex = 1;
|
||||
if (startMode == StartMode::ROTATE_0_FRAGMENT ||
|
||||
startMode == StartMode::ROTATE_45_FRAGMENT ||
|
||||
startMode == StartMode::ROTATE_180_FRAGMENT) {
|
||||
finalTransIndex = 7;
|
||||
}
|
||||
|
||||
std::array<double, 3> bestScore;
|
||||
double bestTotal = -1.0;
|
||||
|
||||
// Get together the start transformations.
|
||||
std::vector<std::unique_ptr<SingleConformerAlignment>> aligners;
|
||||
std::vector<std::pair<double, unsigned int>> bestScoreForStart;
|
||||
bestScoreForStart.reserve(finalTransIndex * finalRotIndex);
|
||||
unsigned int k = 0;
|
||||
for (unsigned int j = 0; j < finalTransIndex; j++) {
|
||||
auto refDisp = getInitialTranslation(j, refShape, fitShape);
|
||||
std::array<double, 4> quat;
|
||||
for (unsigned int i = 0; i < finalRotIndex; i++, k++) {
|
||||
double score = 0.0;
|
||||
if (startMode == StartMode::ROTATE_180_WIGGLE) {
|
||||
quat = getInitialRotationWiggle(i, refShape, fitShape, refDisp,
|
||||
overlayOpts, score);
|
||||
} else {
|
||||
quat = getInitialRotationPlain(i, refShape, fitShape, refDisp,
|
||||
overlayOpts, score);
|
||||
}
|
||||
std::array<double, 7> initQuat{quat[0], quat[1], quat[2], quat[3],
|
||||
refDisp.x, refDisp.y, refDisp.z};
|
||||
aligners.emplace_back(std::make_unique<SingleConformerAlignment>(
|
||||
refShape.getCoords(), refShape.getTypes().data(),
|
||||
refShape.getCarbonRadii(), refShape.getNumAtoms(),
|
||||
refShape.getNumFeatures(), refShape.getShapeVolume(),
|
||||
refShape.getColorVolume(), fitShape.getCoords(),
|
||||
fitShape.getTypes().data(), fitShape.getCarbonRadii(),
|
||||
fitShape.getNumAtoms(), fitShape.getNumFeatures(),
|
||||
fitShape.getShapeVolume(), fitShape.getColorVolume(), initQuat,
|
||||
overlayOpts.optimMode, overlayOpts.simAlpha, overlayOpts.simBeta,
|
||||
overlayOpts.optParam, overlayOpts.useDistCutoff,
|
||||
overlayOpts.distCutoff, overlayOpts.shapeConvergenceCriterion,
|
||||
overlayOpts.nSteps));
|
||||
bestScoreForStart.push_back({score, k});
|
||||
}
|
||||
}
|
||||
|
||||
// Do it in 2 cycles, a quick optimisation first, followed by an additional
|
||||
// longer one for those that look like they're going to win.
|
||||
for (unsigned int cycle = 0; cycle < 2; cycle++) {
|
||||
std::ranges::sort(bestScoreForStart,
|
||||
[](const auto &p1, const auto &p2) -> bool {
|
||||
return p1.first > p2.first;
|
||||
});
|
||||
std::vector<std::pair<double, unsigned int>> nextBestScoreForStart;
|
||||
nextBestScoreForStart.reserve(finalTransIndex * finalRotIndex);
|
||||
for (const auto &[bssf, k] : bestScoreForStart) {
|
||||
if (cycle == 1) {
|
||||
if (bssf < 0.7 * bestScore[0]) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
std::array<double, 20> outScores;
|
||||
aligners[k]->doOverlay(outScores, cycle);
|
||||
nextBestScoreForStart.emplace_back(outScores[0], k);
|
||||
if (outScores[0] > bestTotal) {
|
||||
bestTotal = outScores[0];
|
||||
bestScore =
|
||||
std::array<double, 3>{outScores[0], outScores[1], outScores[2]};
|
||||
aligners[k]->getFinalQuatTrans(bestXform);
|
||||
}
|
||||
}
|
||||
bestScoreForStart = nextBestScoreForStart;
|
||||
}
|
||||
return bestScore;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
std::array<double, 3> AlignShape(const ShapeInput &refShape,
|
||||
ShapeInput &fitShape,
|
||||
RDGeom::Transform3D *xform,
|
||||
const ShapeOverlayOptions &overlayOpts) {
|
||||
// The shapes aren't necessarily normalized (it's not done on creation, for
|
||||
// example) but they might need to be.
|
||||
auto workingRefShape = std::make_unique<ShapeInput>(refShape);
|
||||
auto workingFitShape = std::make_unique<ShapeInput>(fitShape);
|
||||
auto inRefTrans = workingRefShape->calcCanonicalTranslation();
|
||||
auto inRefRot = workingRefShape->calcCanonicalRotation();
|
||||
auto inFitTrans = workingFitShape->calcCanonicalTranslation();
|
||||
auto inFitRot = workingFitShape->calcCanonicalRotation();
|
||||
// If we're not normalizing, translate both shapes so that the fit
|
||||
// is at the origin, so the rotations work.
|
||||
RDGeom::Transform3D moveToOrigin;
|
||||
RDGeom::Transform3D moveFromOrigin;
|
||||
if (overlayOpts.normalize) {
|
||||
if (!workingRefShape->getNormalized()) {
|
||||
workingRefShape->normalizeCoords();
|
||||
}
|
||||
if (!workingFitShape->getNormalized()) {
|
||||
workingFitShape->normalizeCoords();
|
||||
}
|
||||
} else {
|
||||
moveToOrigin.SetTranslation(
|
||||
RDGeom::Point3D{workingFitShape->calcCanonicalTranslation()[0],
|
||||
workingFitShape->calcCanonicalTranslation()[1],
|
||||
workingFitShape->calcCanonicalTranslation()[2]});
|
||||
moveFromOrigin.SetTranslation(
|
||||
RDGeom::Point3D{-workingFitShape->calcCanonicalTranslation()[0],
|
||||
-workingFitShape->calcCanonicalTranslation()[1],
|
||||
-workingFitShape->calcCanonicalTranslation()[2]});
|
||||
workingFitShape->transformCoords(moveToOrigin);
|
||||
workingRefShape->transformCoords(moveToOrigin);
|
||||
}
|
||||
|
||||
RDGeom::Transform3D bestXform;
|
||||
auto scores =
|
||||
alignShape(*workingRefShape, *workingFitShape, bestXform, overlayOpts);
|
||||
if (!overlayOpts.normalize) {
|
||||
// Shove it back again.
|
||||
auto finalXform = moveFromOrigin * bestXform * moveToOrigin;
|
||||
bestXform = finalXform;
|
||||
} else {
|
||||
auto finalXform = computeFinalTransform(inRefTrans, inRefRot, inFitTrans,
|
||||
inFitRot, bestXform);
|
||||
bestXform = finalXform;
|
||||
}
|
||||
fitShape.transformCoords(bestXform);
|
||||
if (xform) {
|
||||
*xform = bestXform;
|
||||
}
|
||||
|
||||
return scores;
|
||||
}
|
||||
|
||||
std::array<double, 3> AlignMolecule(const ShapeInput &refShape, ROMol &fit,
|
||||
const ShapeInputOptions &fitOpts,
|
||||
RDGeom::Transform3D *xform,
|
||||
const ShapeOverlayOptions &overlayOpts,
|
||||
int fitConfId) {
|
||||
auto fitShape = ShapeInput(fit, fitConfId, fitOpts, overlayOpts);
|
||||
RDGeom::Transform3D tmpXform;
|
||||
auto scores = AlignShape(refShape, fitShape, &tmpXform, overlayOpts);
|
||||
MolTransforms::transformConformer(fit.getConformer(fitConfId), tmpXform);
|
||||
if (xform) {
|
||||
*xform = tmpXform;
|
||||
}
|
||||
return scores;
|
||||
}
|
||||
|
||||
std::array<double, 3> AlignMolecule(const ROMol &ref, ROMol &fit,
|
||||
const ShapeInputOptions &refOpts,
|
||||
const ShapeInputOptions &fitOpts,
|
||||
RDGeom::Transform3D *xform,
|
||||
const ShapeOverlayOptions &overlayOpts,
|
||||
int refConfId, int fitConfId) {
|
||||
auto refShape = ShapeInput(ref, refConfId, refOpts, overlayOpts);
|
||||
auto scores =
|
||||
AlignMolecule(refShape, fit, fitOpts, xform, overlayOpts, fitConfId);
|
||||
return scores;
|
||||
}
|
||||
|
||||
std::array<double, 3> ScoreShape(const ShapeInput &refShape,
|
||||
const ShapeInput &fitShape,
|
||||
const ShapeOverlayOptions &overlayOpts) {
|
||||
auto refWorking = refShape.getCoords();
|
||||
auto fitWorking = fitShape.getCoords();
|
||||
std::array<double, 7> quatTrans{1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
|
||||
SingleConformerAlignment sca(
|
||||
refShape.getCoords(), refShape.getTypes().data(),
|
||||
refShape.getCarbonRadii(), refShape.getNumAtoms(),
|
||||
refShape.getNumFeatures(), refShape.getShapeVolume(),
|
||||
refShape.getColorVolume(), fitShape.getCoords(),
|
||||
fitShape.getTypes().data(), fitShape.getCarbonRadii(),
|
||||
fitShape.getNumAtoms(), fitShape.getNumFeatures(),
|
||||
fitShape.getShapeVolume(), fitShape.getColorVolume(), quatTrans,
|
||||
overlayOpts.optimMode, overlayOpts.simAlpha, overlayOpts.simBeta,
|
||||
overlayOpts.optParam, overlayOpts.useDistCutoff, overlayOpts.distCutoff,
|
||||
overlayOpts.shapeConvergenceCriterion, overlayOpts.nSteps);
|
||||
bool includeColor = overlayOpts.optimMode != OptimMode::SHAPE_ONLY;
|
||||
auto scores = sca.calcScores(refShape.getCoords().data(),
|
||||
fitShape.getCoords().data(), includeColor);
|
||||
return std::array{scores[0], scores[1], scores[2]};
|
||||
}
|
||||
|
||||
std::array<double, 3> ScoreMolecule(const ShapeInput &refShape,
|
||||
const ROMol &fit,
|
||||
const ShapeInputOptions &fitOpts,
|
||||
const ShapeOverlayOptions &overlayOpts,
|
||||
int fitConfId) {
|
||||
auto fitShape = ShapeInput(fit, fitConfId, fitOpts, overlayOpts);
|
||||
return ScoreShape(refShape, fitShape, overlayOpts);
|
||||
}
|
||||
|
||||
std::array<double, 3> ScoreMolecule(const ROMol &ref, const ROMol &fit,
|
||||
const ShapeInputOptions &refOpts,
|
||||
const ShapeInputOptions &fitOpts,
|
||||
const ShapeOverlayOptions &overlayOpts,
|
||||
int refConfId, int fitConfId) {
|
||||
ShapeOverlayOptions tmpOpts = overlayOpts;
|
||||
tmpOpts.normalize = false;
|
||||
tmpOpts.startMode = StartMode::ROTATE_0;
|
||||
ShapeInputOptions tmpRefOpts = refOpts;
|
||||
auto refShape = ShapeInput(ref, refConfId, refOpts, tmpOpts);
|
||||
|
||||
ShapeInputOptions tmpFitOpts = fitOpts;
|
||||
auto fitShape = ShapeInput(fit, fitConfId, fitOpts, tmpOpts);
|
||||
|
||||
return ScoreShape(refShape, fitShape, tmpOpts);
|
||||
}
|
||||
} // namespace GaussianShape
|
||||
} // namespace RDKit
|
||||
157
Code/GraphMol/GaussianShape/GaussianShape.h
Normal file
157
Code/GraphMol/GaussianShape/GaussianShape.h
Normal file
@@ -0,0 +1,157 @@
|
||||
//
|
||||
// Copyright (C) 2026 David Cosgrove and other RDKit contributors
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
// The contents are covered by the terms of the BSD license
|
||||
// which is included in the file license.txt, found at the root
|
||||
// of the RDKit source tree.
|
||||
//
|
||||
// Original author: David Cosgrove (CozChemIx Limited)
|
||||
//
|
||||
// This is the interface for the functions to perform shape-based molecule
|
||||
// alignments and scoring. It is experimental code and the API and/or
|
||||
// results may change in future releases.
|
||||
|
||||
#ifndef RDKIT_GAUSSIANSHAPE_GUARD
|
||||
#define RDKIT_GAUSSIANSHAPE_GUARD
|
||||
|
||||
#include <RDGeneral/export.h>
|
||||
#include <Geometry/Transform3D.h>
|
||||
#include <GraphMol/GaussianShape/ShapeInput.h>
|
||||
#include <GraphMol/GaussianShape/ShapeOverlayOptions.h>
|
||||
|
||||
namespace RDKit {
|
||||
class ROMol;
|
||||
class Conformer;
|
||||
|
||||
namespace GaussianShape {
|
||||
|
||||
//! Align a shape onto a reference shape.
|
||||
/*!
|
||||
\param refShape the reference shape
|
||||
\param fitShape the shape to align
|
||||
\param xform if passed in as non-null, will be populated with the
|
||||
transformation matrix that aligns fit onto ref.
|
||||
\param overlayOpts options for the overlay
|
||||
|
||||
\return an array of the combination score of the shape Tversky value and the
|
||||
color Tversky value (zero if colors not used) and the individual values. If
|
||||
using color features, defaults to RDKit pharmacophore types for the features.
|
||||
*/
|
||||
RDKIT_GAUSSIANSHAPE_EXPORT std::array<double, 3> AlignShape(
|
||||
const ShapeInput &refShape, ShapeInput &fitShape,
|
||||
RDGeom::Transform3D *xform = nullptr,
|
||||
const ShapeOverlayOptions &overlayOpts = ShapeOverlayOptions());
|
||||
|
||||
//! Align a molecule to a reference shape
|
||||
/*!
|
||||
\param refShape the reference shape
|
||||
\param fit the molecule to align
|
||||
\param fitOpts the options for creating the fit shape
|
||||
\param xform if passed in as non-null, will be populated with the
|
||||
transformation matrix that aligns fit onto ref.
|
||||
\param overlayOpts options for setting up and running the overlay
|
||||
\param fitConfId (optional) the conformer to use for the fit
|
||||
molecule
|
||||
|
||||
\return an array of the combination score of the shape Tversky value and the
|
||||
color Tversky value (zero if colors not used) and the individual values. If
|
||||
using color features, defaults to RDKit pharmacophore types for the features.
|
||||
*/
|
||||
RDKIT_GAUSSIANSHAPE_EXPORT std::array<double, 3> AlignMolecule(
|
||||
const ShapeInput &refShape, ROMol &fit,
|
||||
const ShapeInputOptions &fitOpts = ShapeInputOptions(),
|
||||
RDGeom::Transform3D *xform = nullptr,
|
||||
const ShapeOverlayOptions &overlayOpts = ShapeOverlayOptions(),
|
||||
int fitConfId = -1);
|
||||
|
||||
//! Align a molecule to a reference molecule
|
||||
/*!
|
||||
\param ref the reference molecule
|
||||
\param fit the molecule to align
|
||||
\param refOpts the options for creating the ref shape
|
||||
\param fitOpts the options for creating the fit shape
|
||||
\param xform if passed in as non-null, will be populated with the
|
||||
transformation matrix that aligns fit onto ref.
|
||||
\param overlayOpts options for setting up and running the overlay
|
||||
\param refConfId (optional) the conformer to use for the reference
|
||||
molecule
|
||||
\param fitConfId (optional) the conformer to use for the fit
|
||||
molecule
|
||||
|
||||
\return an array of the combination score of the shape Tversky value and the
|
||||
color Tversky value (zero if colors not used) and the individual values. If
|
||||
using color features, defaults to RDKit pharmacophore types for the features.
|
||||
*/
|
||||
RDKIT_GAUSSIANSHAPE_EXPORT std::array<double, 3> AlignMolecule(
|
||||
const ROMol &ref, ROMol &fit,
|
||||
const ShapeInputOptions &refOpts = ShapeInputOptions(),
|
||||
const ShapeInputOptions &fitOpts = ShapeInputOptions(),
|
||||
RDGeom::Transform3D *xform = nullptr,
|
||||
const ShapeOverlayOptions &overlayOpts = ShapeOverlayOptions(),
|
||||
int refConfId = -1, int fitConfId = -1);
|
||||
|
||||
//! Score the overlap of a shape to a reference shape without moving
|
||||
// either.
|
||||
/*!
|
||||
\param refShape the reference shape
|
||||
\param fitShape the shape to score
|
||||
\param overlayOpts options for controlling the volume calculation
|
||||
|
||||
\return an array of the combination score of the shape Tversky value and the
|
||||
color Tversky value (zero if colors not used) and the individual values. If
|
||||
using color features, defaults to RDKit pharmacophore types for the features.
|
||||
*/
|
||||
RDKIT_GAUSSIANSHAPE_EXPORT std::array<double, 3> ScoreShape(
|
||||
const ShapeInput &refShape, const ShapeInput &fitShape,
|
||||
const ShapeOverlayOptions &overlayOpts = ShapeOverlayOptions());
|
||||
|
||||
//! Score the overlap of a molecule to a reference shape without moving
|
||||
// either.
|
||||
/*!
|
||||
\param ref the reference shape
|
||||
\param fit the molecule to score
|
||||
\param fitOpts the options for creating the fit shape
|
||||
\param overlayOpts options for controlling the volume calculation
|
||||
\param fitConfId (optional) the conformer to use for the fit
|
||||
molecule
|
||||
|
||||
\return an array of the combination score of the shape Tversky value and the
|
||||
color Tversky value (zero if colors not used) and the individual values. If
|
||||
using color features, defaults to RDKit pharmacophore types for the features.
|
||||
*/
|
||||
RDKIT_GAUSSIANSHAPE_EXPORT std::array<double, 3> ScoreMolecule(
|
||||
const ShapeInput &refShape, const ROMol &fit,
|
||||
const ShapeInputOptions &fitOpts = ShapeInputOptions(),
|
||||
const ShapeOverlayOptions &overlayOpts = ShapeOverlayOptions(),
|
||||
int fitConfId = -1);
|
||||
|
||||
//! Score the overlap of a molecule to a reference molecule without moving
|
||||
// either.
|
||||
/*!
|
||||
\param ref the reference molecule
|
||||
\param fit the molecule to score
|
||||
\param refOpts the options for creating the ref shape
|
||||
\param fitOpts the options for creating the fit shape
|
||||
\param overlayOpts options for controlling the volume calculation
|
||||
\param refConfId (optional) the conformer to use for the reference
|
||||
molecule
|
||||
\param fitConfId (optional) the conformer to use for the fit
|
||||
molecule
|
||||
|
||||
\return an array of the combination score of the shape Tverksy value and the
|
||||
color Tversky value (zero if colors not used) and the individual values. If
|
||||
using color features, defaults to RDKit pharmacophore types for the features.
|
||||
*/
|
||||
RDKIT_GAUSSIANSHAPE_EXPORT std::array<double, 3> ScoreMolecule(
|
||||
const ROMol &ref, const ROMol &fit,
|
||||
const ShapeInputOptions &refOpts = ShapeInputOptions(),
|
||||
const ShapeInputOptions &fitOpts = ShapeInputOptions(),
|
||||
const ShapeOverlayOptions &overlayOpts = ShapeOverlayOptions(),
|
||||
int refConfId = -1, int fitConfId = -1);
|
||||
|
||||
} // namespace GaussianShape
|
||||
} // namespace RDKit
|
||||
|
||||
#endif // RDKIT_GAUSSIANSHAPE_GUARD
|
||||
604
Code/GraphMol/GaussianShape/ShapeInput.cpp
Normal file
604
Code/GraphMol/GaussianShape/ShapeInput.cpp
Normal file
@@ -0,0 +1,604 @@
|
||||
//
|
||||
// Copyright (C) 2026 David Cosgrove and other RDKit contributors
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
// The contents are covered by the terms of the BSD license
|
||||
// which is included in the file license.txt, found at the root
|
||||
// of the RDKit source tree.
|
||||
//
|
||||
// Original author: David Cosgrove (CozChemIx Limited)
|
||||
//
|
||||
|
||||
#include <Geometry/point.h>
|
||||
#include <Geometry/Transform3D.h>
|
||||
#include <GraphMol/ROMol.h>
|
||||
#include <GraphMol/RWMol.h>
|
||||
#include <GraphMol/GaussianShape/ShapeInput.h>
|
||||
#include <GraphMol/GaussianShape/SingleConformerAlignment.h>
|
||||
#include <GraphMol/MolTransforms/MolTransforms.h>
|
||||
#include <GraphMol/SmilesParse/SmilesParse.h>
|
||||
#include <GraphMol/SmilesParse/SmilesWrite.h>
|
||||
#include <GraphMol/Substruct/SubstructMatch.h>
|
||||
|
||||
#include <RDGeneral/BoostStartInclude.h>
|
||||
#include <boost/flyweight.hpp>
|
||||
#include <boost/flyweight/key_value.hpp>
|
||||
#include <boost/flyweight/no_tracking.hpp>
|
||||
#include <RDGeneral/BoostEndInclude.h>
|
||||
|
||||
#ifdef RDK_HAS_EIGEN3
|
||||
#include <Eigen/Dense>
|
||||
#endif
|
||||
|
||||
std::mutex mtx;
|
||||
|
||||
namespace RDKit {
|
||||
namespace GaussianShape {
|
||||
|
||||
// Bondi radii
|
||||
// You can find more of these in Table 12 of this publication:
|
||||
// https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3658832/
|
||||
// The dummy atom radius (atomic number 0) is set to
|
||||
// 2.16.
|
||||
const std::map<unsigned int, double> vdw_radii = {
|
||||
{0, 2.16}, // Dummy, same as Xe.
|
||||
{1, 1.10}, // H
|
||||
{2, 1.40}, // He
|
||||
{3, 1.81}, // Li
|
||||
{4, 1.53}, // Be
|
||||
{5, 1.92}, // B
|
||||
{6, 1.70}, // C
|
||||
{7, 1.55}, // N
|
||||
{8, 1.52}, // O
|
||||
{9, 1.47}, // F
|
||||
{10, 1.54}, // Ne
|
||||
{11, 2.27}, // Na
|
||||
{12, 1.73}, // Mg
|
||||
{13, 1.84}, // Al
|
||||
{14, 2.10}, // Si
|
||||
{15, 1.80}, // P
|
||||
{16, 1.80}, // S
|
||||
{17, 1.75}, // Cl
|
||||
{18, 1.88}, // Ar
|
||||
{19, 2.75}, // K
|
||||
{20, 2.31}, // Ca
|
||||
{31, 1.87}, // Ga
|
||||
{32, 2.11}, // Ge
|
||||
{33, 1.85}, // As
|
||||
{34, 1.90}, // Se
|
||||
{35, 1.83}, // Br
|
||||
{36, 2.02}, // Kr
|
||||
{37, 3.03}, // Rb
|
||||
{38, 2.49}, // Sr
|
||||
{49, 1.93}, // In
|
||||
{50, 2.17}, // Sn
|
||||
{51, 2.06}, // Sb
|
||||
{52, 2.06}, // Te
|
||||
{53, 1.98}, // I
|
||||
{54, 2.16}, // Xe
|
||||
{55, 3.43}, // Cs
|
||||
{56, 2.68}, // Ba
|
||||
{81, 1.96}, // Tl
|
||||
{82, 2.02}, // Pb
|
||||
{83, 2.07}, // Bi
|
||||
{84, 1.97}, // Po
|
||||
{85, 2.02}, // At
|
||||
{86, 2.20}, // Rn
|
||||
{87, 3.48}, // Fr
|
||||
{88, 2.83}, // Ra
|
||||
};
|
||||
constexpr double radius_color =
|
||||
1.08265; // same radius for all feature/color "atoms", as used by the
|
||||
// PubChem code.
|
||||
|
||||
ShapeInput::ShapeInput(const ROMol &mol, int confId,
|
||||
const ShapeInputOptions &opts,
|
||||
const ShapeOverlayOptions &overlayOpts) {
|
||||
PRECONDITION(mol.getNumConformers() > 0,
|
||||
"ShapeInput object needs the molecule to have conformers. " +
|
||||
mol.getProp<std::string>("_Name") + " " + MolToSmiles(mol));
|
||||
|
||||
if (opts.allCarbonRadii && !opts.atomRadii.empty()) {
|
||||
BOOST_LOG(rdWarningLog)
|
||||
<< "Specifying allCarbonRadii and providing custom atom radii doesn't"
|
||||
" make sense. Ignoring the radii."
|
||||
<< std::endl;
|
||||
}
|
||||
extractAtoms(mol, confId, opts);
|
||||
if (opts.useColors) {
|
||||
extractFeatures(mol, confId, opts);
|
||||
}
|
||||
calcNormalization();
|
||||
calcExtremes();
|
||||
std::vector<double> gradConverters(12 * (d_numAtoms + d_numFeats));
|
||||
d_selfOverlapVol =
|
||||
calcVolAndGrads(d_coords.data(), d_numAtoms, d_carbonRadii.get(),
|
||||
d_coords.data(), d_numAtoms, d_carbonRadii.get(),
|
||||
gradConverters, overlayOpts.useDistCutoff,
|
||||
overlayOpts.distCutoff * overlayOpts.distCutoff);
|
||||
d_selfOverlapColor = calcVolAndGrads(
|
||||
d_coords.data() + 4 * d_numAtoms, d_numFeats, d_types.data() + d_numAtoms,
|
||||
d_coords.data() + 4 * d_numAtoms, d_numFeats, d_types.data() + d_numAtoms,
|
||||
d_numAtoms, gradConverters, overlayOpts.useDistCutoff,
|
||||
overlayOpts.distCutoff * overlayOpts.distCutoff, nullptr, nullptr);
|
||||
}
|
||||
|
||||
ShapeInput::ShapeInput(const ShapeInput &other)
|
||||
: d_coords(other.d_coords),
|
||||
d_types(other.d_types),
|
||||
d_numAtoms(other.d_numAtoms),
|
||||
d_numFeats(other.d_numFeats),
|
||||
d_selfOverlapVol(other.d_selfOverlapVol),
|
||||
d_selfOverlapColor(other.d_selfOverlapColor),
|
||||
d_extremePoints(other.d_extremePoints),
|
||||
d_normalized(other.d_normalized),
|
||||
d_normalizationOK(other.d_normalizationOK),
|
||||
d_canonRot(other.d_canonRot),
|
||||
d_canonTrans(other.d_canonTrans),
|
||||
d_eigenValues(other.d_eigenValues) {
|
||||
if (other.d_carbonRadii) {
|
||||
d_carbonRadii.reset(new boost::dynamic_bitset<>(*other.d_carbonRadii));
|
||||
}
|
||||
}
|
||||
|
||||
ShapeInput &ShapeInput::operator=(const ShapeInput &other) {
|
||||
if (this == &other) {
|
||||
return *this;
|
||||
}
|
||||
d_coords = other.d_coords;
|
||||
d_types = other.d_types;
|
||||
d_numAtoms = other.d_numAtoms;
|
||||
d_numFeats = other.d_numFeats;
|
||||
d_selfOverlapVol = other.d_selfOverlapVol;
|
||||
d_selfOverlapColor = other.d_selfOverlapColor;
|
||||
d_extremePoints = other.d_extremePoints;
|
||||
d_normalized = other.d_normalized;
|
||||
d_normalizationOK = other.d_normalizationOK;
|
||||
d_canonRot = other.d_canonRot;
|
||||
d_canonTrans = other.d_canonTrans;
|
||||
d_eigenValues = other.d_eigenValues;
|
||||
if (other.d_carbonRadii) {
|
||||
d_carbonRadii.reset(new boost::dynamic_bitset<>(*other.d_carbonRadii));
|
||||
} else {
|
||||
d_carbonRadii.reset();
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
std::vector<RDGeom::Point3D> ShapeInput::getAtomPoints(
|
||||
bool includeColors) const {
|
||||
std::vector<RDGeom::Point3D> atomPoints;
|
||||
unsigned int numPoints = getNumAtoms();
|
||||
if (includeColors) {
|
||||
numPoints += getNumFeatures();
|
||||
}
|
||||
atomPoints.reserve(numPoints);
|
||||
for (unsigned int i = 0; i < 4 * numPoints; i += 4) {
|
||||
atomPoints.emplace_back(
|
||||
RDGeom::Point3D(d_coords[i], d_coords[i + 1], d_coords[i + 2]));
|
||||
}
|
||||
return atomPoints;
|
||||
}
|
||||
|
||||
const std::array<double, 9> &ShapeInput::calcCanonicalRotation() {
|
||||
if (!d_normalizationOK) {
|
||||
calcNormalization();
|
||||
}
|
||||
return d_canonRot;
|
||||
}
|
||||
|
||||
const std::array<double, 3> &ShapeInput::calcCanonicalTranslation() {
|
||||
if (!d_normalizationOK) {
|
||||
calcNormalization();
|
||||
}
|
||||
return d_canonTrans;
|
||||
}
|
||||
|
||||
const std::array<double, 3> &ShapeInput::calcEigenValues() {
|
||||
if (!d_normalizationOK) {
|
||||
calcNormalization();
|
||||
}
|
||||
return d_eigenValues;
|
||||
}
|
||||
|
||||
const std::array<size_t, 6> &ShapeInput::calcExtremes() {
|
||||
if (!d_normalizationOK) {
|
||||
calculateExtremes();
|
||||
}
|
||||
return d_extremePoints;
|
||||
}
|
||||
|
||||
std::array<double, 3> ShapeInput::calcMomentsOfInertia(
|
||||
bool includeColors) const {
|
||||
auto tmpMol = shapeToMol(includeColors);
|
||||
std::array<double, 3> eVals;
|
||||
#if RDK_HAS_EIGEN3
|
||||
Eigen::Matrix3d axes;
|
||||
Eigen::Vector3d moments;
|
||||
MolTransforms::computePrincipalAxesAndMoments(tmpMol->getConformer(), axes,
|
||||
moments);
|
||||
eVals[0] = moments[0];
|
||||
eVals[1] = moments[1];
|
||||
eVals[2] = moments[2];
|
||||
#else
|
||||
std::unique_ptr<RDGeom::Transform3D> canonXform(
|
||||
MolTransforms::computeCanonicalTransform(tmpMol->getConformer(), nullptr,
|
||||
false, true, eVals.data()));
|
||||
#endif
|
||||
return eVals;
|
||||
}
|
||||
|
||||
void ShapeInput::normalizeCoords() {
|
||||
if (d_normalized) {
|
||||
return;
|
||||
}
|
||||
if (!d_normalizationOK) {
|
||||
calcNormalization();
|
||||
}
|
||||
RDGeom::Transform3D canonRot;
|
||||
for (unsigned int i = 0, k = 0; i < 3; ++i) {
|
||||
for (unsigned int j = 0; j < 3; ++j, ++k) {
|
||||
canonRot.setValUnchecked(i, j, d_canonRot[k]);
|
||||
}
|
||||
}
|
||||
RDGeom::Point3D trans{d_canonTrans[0], d_canonTrans[1], d_canonTrans[2]};
|
||||
canonRot.TransformPoint(trans);
|
||||
canonRot.SetTranslation(trans);
|
||||
|
||||
transformCoords(canonRot);
|
||||
d_normalized = true;
|
||||
// Recalculate the extremes now we've changed the coordinates.
|
||||
calcExtremes();
|
||||
}
|
||||
|
||||
void ShapeInput::transformCoords(RDGeom::Transform3D &xform) {
|
||||
applyTransformToShape(d_coords, xform);
|
||||
d_normalized = false;
|
||||
d_normalizationOK = false;
|
||||
}
|
||||
|
||||
std::unique_ptr<RWMol> ShapeInput::shapeToMol(bool includeColors) const {
|
||||
auto mol = std::make_unique<RWMol>();
|
||||
for (unsigned int i = 0; i < getNumAtoms(); i++) {
|
||||
Atom *atom = new Atom(6);
|
||||
mol->addAtom(atom, true, true);
|
||||
}
|
||||
if (includeColors) {
|
||||
for (unsigned int i = 0; i < getNumFeatures(); i++) {
|
||||
Atom *atom = new Atom(7);
|
||||
mol->addAtom(atom, true, true);
|
||||
}
|
||||
}
|
||||
unsigned int num = getNumAtoms();
|
||||
if (includeColors) {
|
||||
num += getNumFeatures();
|
||||
}
|
||||
Conformer *conf = new Conformer(num);
|
||||
const auto &shapeCds = getCoords();
|
||||
for (unsigned int i = 0; i < num; i++) {
|
||||
auto &pos = conf->getAtomPos(i);
|
||||
pos.x = shapeCds[4 * i];
|
||||
pos.y = shapeCds[4 * i + 1];
|
||||
pos.z = shapeCds[4 * i + 2];
|
||||
}
|
||||
mol->addConformer(conf, true);
|
||||
return mol;
|
||||
}
|
||||
|
||||
namespace {
|
||||
double getStandardAtomRadius(unsigned int atomicNum) {
|
||||
if (auto rad = vdw_radii.find(static_cast<unsigned int>(atomicNum));
|
||||
rad != vdw_radii.end()) {
|
||||
return rad->second;
|
||||
}
|
||||
throw ValueErrorException("No VdW radius for atom with Z=" +
|
||||
std::to_string(atomicNum));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
void ShapeInput::extractAtoms(const ROMol &mol, int confId,
|
||||
const ShapeInputOptions &opts) {
|
||||
d_coords.reserve(mol.getNumAtoms() * 4);
|
||||
if (!opts.allCarbonRadii) {
|
||||
d_carbonRadii.reset(new boost::dynamic_bitset<>(
|
||||
!opts.atomSubset.empty() ? opts.atomSubset.size() : mol.getNumAtoms()));
|
||||
}
|
||||
auto conf = mol.getConformer(confId);
|
||||
// Index of atoms that have been added to the shape.
|
||||
unsigned int idx = 0;
|
||||
for (const auto atom : mol.atoms()) {
|
||||
if (!opts.atomSubset.empty()) {
|
||||
const auto atomIdx = atom->getIdx();
|
||||
if (auto it = std::ranges::find_if(
|
||||
opts.atomSubset,
|
||||
[atomIdx](const auto &p) -> bool { return p == atomIdx; });
|
||||
it == opts.atomSubset.end()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (atom->getAtomicNum() > 1) {
|
||||
auto atIdx = atom->getIdx();
|
||||
auto &pos = conf.getAtomPos(atIdx);
|
||||
d_coords.push_back(pos.x);
|
||||
d_coords.push_back(pos.y);
|
||||
d_coords.push_back(pos.z);
|
||||
if (opts.allCarbonRadii) {
|
||||
d_coords.push_back(KAPPA / (1.7 * 1.7));
|
||||
} else {
|
||||
double rad = 0.0;
|
||||
if (opts.atomRadii.empty()) {
|
||||
if (atom->getAtomicNum() == 6) {
|
||||
rad = 1.7;
|
||||
(*d_carbonRadii)[idx] = true;
|
||||
} else {
|
||||
rad = getStandardAtomRadius(atom->getAtomicNum());
|
||||
}
|
||||
} else {
|
||||
auto it = std::ranges::find_if(
|
||||
opts.atomRadii,
|
||||
[atIdx](const auto &p) -> bool { return p.first == atIdx; });
|
||||
if (it == opts.atomRadii.end()) {
|
||||
rad = getStandardAtomRadius(atom->getAtomicNum());
|
||||
} else {
|
||||
rad = it->second;
|
||||
}
|
||||
}
|
||||
d_coords.push_back(KAPPA / (rad * rad));
|
||||
}
|
||||
}
|
||||
++idx;
|
||||
}
|
||||
d_numAtoms = d_coords.size() / 4;
|
||||
d_types.resize(d_numAtoms);
|
||||
d_numFeats = 0;
|
||||
}
|
||||
|
||||
namespace {
|
||||
class ss_matcher {
|
||||
public:
|
||||
ss_matcher(const std::string &pattern) : m_pattern(pattern) {
|
||||
m_needCopies = (pattern.find_first_of("$") != std::string::npos);
|
||||
RDKit::RWMol *p = RDKit::SmartsToMol(pattern);
|
||||
m_matcher = p;
|
||||
POSTCONDITION(m_matcher, "no matcher");
|
||||
};
|
||||
const RDKit::ROMol *getMatcher() const { return m_matcher; };
|
||||
unsigned int countMatches(const RDKit::ROMol &mol) const {
|
||||
PRECONDITION(m_matcher, "no matcher");
|
||||
std::vector<RDKit::MatchVectType> matches;
|
||||
// This is an ugly one. Recursive queries aren't thread safe.
|
||||
// Unfortunately we have to take a performance hit here in order
|
||||
// to guarantee thread safety
|
||||
if (m_needCopies) {
|
||||
const RDKit::ROMol nm(*(m_matcher), true);
|
||||
RDKit::SubstructMatch(mol, nm, matches);
|
||||
} else {
|
||||
const RDKit::ROMol &nm = *m_matcher;
|
||||
RDKit::SubstructMatch(mol, nm, matches);
|
||||
}
|
||||
return matches.size();
|
||||
}
|
||||
~ss_matcher() { delete m_matcher; };
|
||||
|
||||
private:
|
||||
ss_matcher() : m_pattern("") {};
|
||||
std::string m_pattern;
|
||||
bool m_needCopies{false};
|
||||
const RDKit::ROMol *m_matcher{nullptr};
|
||||
};
|
||||
} // namespace
|
||||
|
||||
// This came from the original PubChemShape.cpp
|
||||
typedef boost::flyweight<boost::flyweights::key_value<std::string, ss_matcher>,
|
||||
boost::flyweights::no_tracking>
|
||||
pattern_flyweight;
|
||||
// Definitions for feature points adapted from:
|
||||
// Gobbi and Poppinger, Biotech. Bioeng. _61_ 47-54 (1998)
|
||||
const std::vector<std::vector<std::string>> smartsPatterns = {
|
||||
{"[$([N;!H0;v3,v4&+1]),\
|
||||
$([O,S;H1;+0]),\
|
||||
n&H1&+0]"}, // Donor
|
||||
{"[$([O,S;H1;v2;!$(*-*=[O,N,P,S])]),\
|
||||
$([O,S;H0;v2]),\
|
||||
$([O,S;-]),\
|
||||
$([N;v3;!$(N-*=[O,N,P,S])]),\
|
||||
n&H0&+0,\
|
||||
$([o,s;+0;!$([o,s]:n);!$([o,s]:c:n)])]"}, // Acceptor
|
||||
{
|
||||
"[r]1[r][r]1",
|
||||
"[r]1[r][r][r]1",
|
||||
"[r]1[r][r][r][r]1",
|
||||
"[r]1[r][r][r][r][r]1",
|
||||
"[r]1[r][r][r][r][r][r]1",
|
||||
}, // rings
|
||||
// "[a]", //
|
||||
// Aromatic
|
||||
// "[F,Cl,Br,I]", // Halogen
|
||||
{"[#7;+,\
|
||||
$([N;H2&+0][$([C,a]);!$([C,a](=O))]),\
|
||||
$([N;H1&+0]([$([C,a]);!$([C,a](=O))])[$([C,a]);!$([C,a](=O))]),\
|
||||
$([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))])]"}, // Basic
|
||||
{"[$([C,S](=[O,S,P])-[O;H1,-1])]"} // Acidic
|
||||
};
|
||||
std::vector<std::vector<const ROMol *>> *getPh4Patterns() {
|
||||
static std::unique_ptr<std::vector<std::vector<const ROMol *>>> patterns;
|
||||
if (!patterns) {
|
||||
patterns.reset(new std::vector<std::vector<const ROMol *>>());
|
||||
for (const auto &smartsV : smartsPatterns) {
|
||||
std::vector<const ROMol *> v;
|
||||
for (const auto &smarts : smartsV) {
|
||||
const ROMol *matcher = pattern_flyweight(smarts).get().getMatcher();
|
||||
CHECK_INVARIANT(matcher, "bad smarts");
|
||||
v.push_back(matcher);
|
||||
}
|
||||
patterns->push_back(std::move(v));
|
||||
}
|
||||
}
|
||||
|
||||
return patterns.get();
|
||||
}
|
||||
|
||||
// Extract the features for the color scores, using RDKit pphore features
|
||||
// for now. Other options to be added later.
|
||||
void ShapeInput::extractFeatures(const ROMol &mol, int confId,
|
||||
const ShapeInputOptions &opts) {
|
||||
if (opts.customFeatures.empty()) {
|
||||
unsigned pattIdx = 1;
|
||||
const auto pattVects = getPh4Patterns();
|
||||
for (const auto &patts : *pattVects) {
|
||||
for (const auto &patt : patts) {
|
||||
std::vector<MatchVectType> matches;
|
||||
{
|
||||
// recursive queries aren't thread safe.
|
||||
matches = SubstructMatch(mol, *patt);
|
||||
}
|
||||
for (const auto &match : matches) {
|
||||
std::vector<unsigned int> ats;
|
||||
bool featOk = true;
|
||||
for (const auto &pr : match) {
|
||||
// make sure all the atoms are in the subset, if there is one
|
||||
if (!opts.atomSubset.empty()) {
|
||||
if (std::ranges::find_if(
|
||||
opts.atomSubset, [pr](const auto &p) -> bool {
|
||||
return p == static_cast<unsigned int>(pr.second);
|
||||
}) == opts.atomSubset.end()) {
|
||||
featOk = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ats.push_back(pr.second);
|
||||
}
|
||||
if (!featOk) {
|
||||
continue;
|
||||
}
|
||||
auto featPos = computeFeaturePos(mol, confId, ats);
|
||||
d_types.push_back(pattIdx);
|
||||
d_coords.push_back(featPos.x);
|
||||
d_coords.push_back(featPos.y);
|
||||
d_coords.push_back(featPos.z);
|
||||
d_coords.push_back(KAPPA / (radius_color * radius_color));
|
||||
d_numFeats++;
|
||||
}
|
||||
}
|
||||
++pattIdx;
|
||||
}
|
||||
} else {
|
||||
// Just copy them directly
|
||||
for (const auto &f : opts.customFeatures) {
|
||||
d_types.push_back(std::get<0>(f));
|
||||
d_numFeats++;
|
||||
const auto &pos = std::get<1>(f);
|
||||
d_coords.push_back(pos.x);
|
||||
d_coords.push_back(pos.y);
|
||||
d_coords.push_back(pos.z);
|
||||
d_coords.push_back(std::get<2>(f));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ShapeInput::calcNormalization() {
|
||||
d_eigenValues = std::array<double, 3>{0.0, 0.0, 0.0};
|
||||
// Build a "molecule" just of the shape points, not the color features
|
||||
// with which to calculate the canonical transformation. Doesn't ever
|
||||
// use the input molecule in case the shape was built from a subset of
|
||||
// atoms in that molecule.
|
||||
auto tmpMol = shapeToMol(false);
|
||||
std::unique_ptr<RDGeom::Transform3D> canonXform(
|
||||
MolTransforms::computeCanonicalTransform(
|
||||
tmpMol->getConformer(), nullptr, false, true, d_eigenValues.data()));
|
||||
d_canonRot =
|
||||
std::array<double, 9>{1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0};
|
||||
for (unsigned int i = 0, k = 0; i < 3; ++i) {
|
||||
for (unsigned int j = 0; j < 3; ++j, ++k) {
|
||||
d_canonRot[k] = canonXform->getValUnchecked(i, j);
|
||||
}
|
||||
}
|
||||
d_canonTrans = std::array<double, 3>{0.0, 0.0, 0.0};
|
||||
for (unsigned int i = 0; i < 4 * d_numAtoms; i += 4) {
|
||||
d_canonTrans[0] -= d_coords[i];
|
||||
d_canonTrans[1] -= d_coords[i + 1];
|
||||
d_canonTrans[2] -= d_coords[i + 2];
|
||||
}
|
||||
d_canonTrans[0] /= d_numAtoms;
|
||||
d_canonTrans[1] /= d_numAtoms;
|
||||
d_canonTrans[2] /= d_numAtoms;
|
||||
d_normalizationOK = true;
|
||||
}
|
||||
|
||||
void ShapeInput::calculateExtremes() {
|
||||
d_extremePoints = std::array<size_t, 6>{0, 0, 0, 0, 0, 0};
|
||||
for (size_t i = 0, j = 0; i < d_coords.size(); i += 4, ++j) {
|
||||
if (d_coords[i] < d_coords[4 * d_extremePoints[0]]) {
|
||||
d_extremePoints[0] = j;
|
||||
}
|
||||
if (d_coords[i] > d_coords[4 * d_extremePoints[3]]) {
|
||||
d_extremePoints[3] = j;
|
||||
}
|
||||
|
||||
if (d_coords[i + 1] < d_coords[4 * d_extremePoints[1] + 1]) {
|
||||
d_extremePoints[1] = j;
|
||||
}
|
||||
if (d_coords[i + 1] > d_coords[4 * d_extremePoints[4] + 1]) {
|
||||
d_extremePoints[4] = j;
|
||||
}
|
||||
|
||||
if (d_coords[i + 2] < d_coords[4 * d_extremePoints[2] + 2]) {
|
||||
d_extremePoints[2] = j;
|
||||
}
|
||||
if (d_coords[i + 2] > d_coords[4 * d_extremePoints[5] + 2]) {
|
||||
d_extremePoints[5] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RDGeom::Point3D computeFeaturePos(const ROMol &mol, int confId,
|
||||
const std::vector<unsigned int> &ats) {
|
||||
RDGeom::Point3D featPos;
|
||||
auto &conf = mol.getConformer(confId);
|
||||
for (const auto at : ats) {
|
||||
featPos += conf.getAtomPos(at);
|
||||
}
|
||||
featPos /= ats.size();
|
||||
return featPos;
|
||||
}
|
||||
|
||||
void applyTransformToShape(std::vector<double> &shape,
|
||||
RDGeom::Transform3D &xform) {
|
||||
for (size_t i = 0; i < shape.size(); i += 4) {
|
||||
xform.TransformPoint(shape.data() + i);
|
||||
}
|
||||
}
|
||||
|
||||
void applyTransformToShape(const double *inShape, double *outShape,
|
||||
size_t numPoints, RDGeom::Transform3D &xform) {
|
||||
for (size_t i = 0; i < 4 * numPoints; i += 4) {
|
||||
outShape[i] = inShape[i];
|
||||
outShape[i + 1] = inShape[i + 1];
|
||||
outShape[i + 2] = inShape[i + 2];
|
||||
outShape[i + 3] = inShape[i + 3];
|
||||
xform.TransformPoint(outShape + i);
|
||||
}
|
||||
}
|
||||
|
||||
void translateShape(std::vector<double> &shape,
|
||||
const RDGeom::Point3D &translation) {
|
||||
for (size_t i = 0; i < shape.size(); i += 4) {
|
||||
shape[i] += translation.x;
|
||||
shape[i + 1] += translation.y;
|
||||
shape[i + 2] += translation.z;
|
||||
}
|
||||
}
|
||||
|
||||
void translateShape(const double *inShape, double *outShape, size_t numPoints,
|
||||
const RDGeom::Point3D &translation) {
|
||||
for (size_t i = 0; i < 4 * numPoints; i += 4) {
|
||||
outShape[i] = inShape[i] + translation.x;
|
||||
outShape[i + 1] = inShape[i + 1] + translation.y;
|
||||
outShape[i + 2] = inShape[i + 2] + translation.z;
|
||||
outShape[i + 3] = inShape[i + 3];
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace GaussianShape
|
||||
} // namespace RDKit
|
||||
265
Code/GraphMol/GaussianShape/ShapeInput.h
Normal file
265
Code/GraphMol/GaussianShape/ShapeInput.h
Normal file
@@ -0,0 +1,265 @@
|
||||
//
|
||||
// Copyright (C) 2026 David Cosgrove and other RDKit contributors
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
// The contents are covered by the terms of the BSD license
|
||||
// which is included in the file license.txt, found at the root
|
||||
// of the RDKit source tree.
|
||||
//
|
||||
// Original author: David Cosgrove (CozChemIx Limited)
|
||||
//
|
||||
|
||||
#ifndef RDKIT_SHAPEINPUT_GUARD
|
||||
#define RDKIT_SHAPEINPUT_GUARD
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include <RDGeneral/export.h>
|
||||
#include <Geometry/Transform3D.h>
|
||||
|
||||
#include <RDGeneral/BoostStartInclude.h>
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
#include <boost/archive/text_oarchive.hpp>
|
||||
#include <boost/archive/text_iarchive.hpp>
|
||||
#include <boost/serialization/vector.hpp>
|
||||
#include <boost/serialization/array.hpp>
|
||||
#include <boost/serialization/unique_ptr.hpp>
|
||||
#endif
|
||||
#include <RDGeneral/BoostEndInclude.h>
|
||||
|
||||
#include <GraphMol/GaussianShape/ShapeOverlayOptions.h>
|
||||
|
||||
// The code below was provided by Claude (Sonnet 4.6).
|
||||
// If first tried to get me to use boost/serialization/dynamic_bitset.hpp
|
||||
// and then admitted that it had made that up.
|
||||
namespace boost {
|
||||
namespace serialization {
|
||||
|
||||
template <class Archive, typename Block, typename Allocator>
|
||||
void serialize(Archive &ar, boost::dynamic_bitset<Block, Allocator> &bs,
|
||||
const unsigned int /*version*/) {
|
||||
size_t num_bits = bs.size();
|
||||
ar & num_bits;
|
||||
|
||||
std::vector<Block> blocks;
|
||||
|
||||
if (Archive::is_saving::value) {
|
||||
to_block_range(bs, std::back_inserter(blocks));
|
||||
}
|
||||
|
||||
ar & blocks;
|
||||
|
||||
if (Archive::is_loading::value) {
|
||||
bs.resize(num_bits);
|
||||
from_block_range(blocks.begin(), blocks.end(), bs);
|
||||
bs.resize(num_bits); // trim any excess bits
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace serialization
|
||||
} // namespace boost
|
||||
|
||||
namespace RDKit {
|
||||
class ROMol;
|
||||
class RWMol;
|
||||
namespace GaussianShape {
|
||||
|
||||
// From Grant et al.
|
||||
constexpr double P = 2.7;
|
||||
constexpr double KAPPA = 2.41798793102;
|
||||
using CustomFeatures =
|
||||
std::vector<std::tuple<unsigned int, RDGeom::Point3D, double>>;
|
||||
|
||||
struct ShapeInputOptions {
|
||||
ShapeInputOptions() = default;
|
||||
ShapeInputOptions(const ShapeInputOptions &) = default;
|
||||
ShapeInputOptions(ShapeInputOptions &&) = default;
|
||||
ShapeInputOptions &operator=(const ShapeInputOptions &) = default;
|
||||
ShapeInputOptions &operator=(ShapeInputOptions &&) = default;
|
||||
|
||||
~ShapeInputOptions() = default;
|
||||
|
||||
// By default, it will create features using the RDKit pharmacophore
|
||||
// definitions.
|
||||
bool useColors{
|
||||
true}; //! Whether to build the color features. By default, it will
|
||||
//! create features using the RDKit pharmacophore definitions.
|
||||
|
||||
CustomFeatures customFeatures; //! Custom color features used verbatim. A
|
||||
//! vector of tuples of integer type, Point3D
|
||||
//! coords, double radius.
|
||||
std::vector<unsigned int>
|
||||
atomSubset; //! If not empty, use just these atoms in the molecule to
|
||||
//! form the ShapeInput object.
|
||||
std::vector<std::pair<unsigned int, double>>
|
||||
atomRadii; //! Use these non-standard radii for these atoms. The int is
|
||||
//! for the atom index in the molecule, not the atomic number.
|
||||
//! Not all atoms need be specified, just some radii can be
|
||||
//! over-ridden, with the rest left as standard.
|
||||
bool allCarbonRadii{
|
||||
true}; //! Whether to use carbon radii for all atoms (which is quicker
|
||||
//! but less accurate) or vdw radii appropriate for the elements.
|
||||
};
|
||||
|
||||
// Data for shape alignment code
|
||||
class RDKIT_GAUSSIANSHAPE_EXPORT ShapeInput {
|
||||
public:
|
||||
//! Create the ShapeInput object.
|
||||
//! @param mol: The molecule of interest
|
||||
//! @param confId: The conformer to use
|
||||
//! @param opts: Options for setting up the shape
|
||||
ShapeInput(const ROMol &mol, int confId = -1,
|
||||
const ShapeInputOptions &opts = ShapeInputOptions(),
|
||||
const ShapeOverlayOptions &overlayOpts = ShapeOverlayOptions());
|
||||
ShapeInput(const std::string &str) {
|
||||
#ifndef RDK_USE_BOOST_SERIALIZATION
|
||||
PRECONDITION(0, "Boost SERIALIZATION is not enabled")
|
||||
#else
|
||||
std::stringstream ss(str);
|
||||
boost::archive::text_iarchive ia(ss);
|
||||
ia &*this;
|
||||
#endif
|
||||
}
|
||||
ShapeInput(const ShapeInput &other);
|
||||
ShapeInput(ShapeInput &&other) = default;
|
||||
ShapeInput &operator=(const ShapeInput &other);
|
||||
ShapeInput &operator=(ShapeInput &&other) = default;
|
||||
virtual ~ShapeInput() = default;
|
||||
|
||||
std::string toString() const {
|
||||
#ifndef RDK_USE_BOOST_SERIALIZATION
|
||||
PRECONDITION(0, "Boost SERIALIZATION is not enabled")
|
||||
#else
|
||||
std::stringstream ss;
|
||||
boost::archive::text_oarchive oa(ss);
|
||||
oa &*this;
|
||||
return ss.str();
|
||||
#endif
|
||||
}
|
||||
|
||||
// Note that the coords returned is a vector size 4*getNumAtoms()
|
||||
// with the 4th value per atom being the alpha paramter.
|
||||
const std::vector<double> &getCoords() const { return d_coords; }
|
||||
//! Fetch the coordinates of the atoms and optionally features.
|
||||
std::vector<RDGeom::Point3D> getAtomPoints(bool includeColors = false) const;
|
||||
bool getNormalized() const { return d_normalized; }
|
||||
const std::vector<int> &getTypes() const { return d_types; }
|
||||
unsigned int getNumAtoms() const { return d_numAtoms; }
|
||||
unsigned int getNumFeatures() const { return d_numFeats; }
|
||||
double getShapeVolume() const { return d_selfOverlapVol; }
|
||||
double getColorVolume() const { return d_selfOverlapColor; }
|
||||
const boost::dynamic_bitset<> *getCarbonRadii() const {
|
||||
return d_carbonRadii.get();
|
||||
}
|
||||
// These functions use cached values if available.
|
||||
const std::array<double, 9> &calcCanonicalRotation();
|
||||
const std::array<double, 3> &calcCanonicalTranslation();
|
||||
const std::array<double, 3> &calcEigenValues();
|
||||
const std::array<size_t, 6> &calcExtremes();
|
||||
// Return the principal moments of inertia, if Eigen3 is available, and the
|
||||
// eigenvalues of the canonical transformation if not.
|
||||
std::array<double, 3> calcMomentsOfInertia(bool includeColors = false) const;
|
||||
|
||||
// Align the principal axes to the cartesian axes and centre on the origin.
|
||||
// Doesn't require that the shape was created from a molecule. Creates
|
||||
// the necessary transformation if not already done.
|
||||
void normalizeCoords();
|
||||
|
||||
void transformCoords(RDGeom::Transform3D &xform);
|
||||
|
||||
// Mock a molecule up from the shape for visual inspection and sometimes
|
||||
// calculation of the normalization matrices. No bonds.
|
||||
// Atoms are C, features are N.
|
||||
virtual std::unique_ptr<RWMol> shapeToMol(bool includeColors = true) const;
|
||||
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
template <class Archive>
|
||||
void serialize(Archive &ar, const unsigned int) {
|
||||
ar & d_coords;
|
||||
ar & d_types;
|
||||
ar & d_numAtoms;
|
||||
ar & d_numFeats;
|
||||
ar & d_selfOverlapVol;
|
||||
ar & d_selfOverlapColor;
|
||||
ar & d_extremePoints;
|
||||
ar & d_carbonRadii;
|
||||
ar & d_normalized;
|
||||
ar & d_normalizationOK;
|
||||
ar & d_canonRot;
|
||||
ar & d_canonTrans;
|
||||
ar & d_eigenValues;
|
||||
}
|
||||
#endif
|
||||
|
||||
private:
|
||||
void extractAtoms(const ROMol &mol, int confId,
|
||||
const ShapeInputOptions &opts);
|
||||
// Extract the features for the color scores, using RDKit pphore features
|
||||
// for now. Other options to be added later.
|
||||
void extractFeatures(const ROMol &mol, int confId,
|
||||
const ShapeInputOptions &shapeOpts);
|
||||
// Calculate the rotation and translation that will align the principal axes
|
||||
// to the cartesian axes and centre on the origin.
|
||||
void calcNormalization();
|
||||
|
||||
void calculateExtremes();
|
||||
|
||||
std::vector<double> d_coords; // The coordinates and alpha parameter for the
|
||||
// atoms and features, packed as 4 floats per
|
||||
// item - x, y, z and alpha. alpha is KAPPA / (r * r) where r is the radius
|
||||
// of the atom. This is not used if using all_atoms_carbon mode.
|
||||
std::vector<int> d_types; // The feature types. The size is the same
|
||||
// as the number of coordinates, padded with 0
|
||||
// for the atoms.
|
||||
unsigned int d_numAtoms; // The number of atoms
|
||||
unsigned int d_numFeats; // The number of features
|
||||
double d_selfOverlapVol{0.0}; // Shape volume
|
||||
double d_selfOverlapColor{0.0}; // Color volume
|
||||
// These are the points at the extremes of the x, y and z axes.
|
||||
// they are min_x, min_y, min_z and max_x, max_y, max_z.
|
||||
std::array<size_t, 6> d_extremePoints;
|
||||
std::unique_ptr<boost::dynamic_bitset<>>
|
||||
d_carbonRadii; // Flags those atoms with a carbon radius, for faster
|
||||
// calculation later.
|
||||
|
||||
// This is the rotation and translation to align the principal axes of the
|
||||
// shape with cartesian axes. If d_normalized is true, it has been applied
|
||||
// to the coordinates.
|
||||
bool d_normalized{false};
|
||||
// If the shape is moved, the normalization matrices are no longer valid.
|
||||
// This flags that so it is re-computed as required.
|
||||
bool d_normalizationOK{false};
|
||||
|
||||
std::array<double, 9> d_canonRot;
|
||||
std::array<double, 3> d_canonTrans;
|
||||
// The sorted eigenvalues of the principal axes.
|
||||
std::array<double, 3> d_eigenValues;
|
||||
};
|
||||
|
||||
// Calculate the mean position of the given atoms.
|
||||
RDKIT_GAUSSIANSHAPE_EXPORT RDGeom::Point3D computeFeaturePos(
|
||||
const ROMol &mol, int confId, const std::vector<unsigned int> &ats);
|
||||
|
||||
RDKIT_GAUSSIANSHAPE_EXPORT RDGeom::Transform3D quatTransToTransform(
|
||||
const double *quat, const double *trans);
|
||||
|
||||
// Apply the transformation to the coordinates assumed to be in
|
||||
// ShapeInput.d_coords form.
|
||||
RDKIT_GAUSSIANSHAPE_EXPORT void applyTransformToShape(
|
||||
std::vector<double> &shape, RDGeom::Transform3D &xform);
|
||||
RDKIT_GAUSSIANSHAPE_EXPORT void applyTransformToShape(
|
||||
const double *inShape, double *outShape, size_t numPoints,
|
||||
RDGeom::Transform3D &xform);
|
||||
RDKIT_GAUSSIANSHAPE_EXPORT void translateShape(
|
||||
std::vector<double> &shape, const RDGeom::Point3D &translation);
|
||||
RDKIT_GAUSSIANSHAPE_EXPORT void translateShape(
|
||||
const double *inShape, double *outShape, size_t numPoints,
|
||||
const RDGeom::Point3D &translation);
|
||||
|
||||
} // namespace GaussianShape
|
||||
} // namespace RDKit
|
||||
|
||||
#endif // RDKIT_SHAPEINPUT_GUARD
|
||||
119
Code/GraphMol/GaussianShape/ShapeOverlayOptions.h
Normal file
119
Code/GraphMol/GaussianShape/ShapeOverlayOptions.h
Normal file
@@ -0,0 +1,119 @@
|
||||
//
|
||||
// Copyright (C) 2026 David Cosgrove and other RDKit contributors
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
// The contents are covered by the terms of the BSD license
|
||||
// which is included in the file license.txt, found at the root
|
||||
// of the RDKit source tree.
|
||||
//
|
||||
// Original author: David Cosgrove (CozChemIx Limited)
|
||||
//
|
||||
// Options for the Roshambo2-based shape overlay.
|
||||
|
||||
#ifndef RDKIT_SHAPEOVERLAYOPTIONS_GUARD
|
||||
#define RDKIT_SHAPEOVERLAYOPTIONS_GUARD
|
||||
|
||||
#include <RDGeneral/export.h>
|
||||
|
||||
namespace RDKit {
|
||||
class ROMol;
|
||||
namespace GaussianShape {
|
||||
|
||||
enum class RDKIT_GAUSSIANSHAPE_EXPORT StartMode {
|
||||
ROTATE_0, //! No rotation, just normalization if requested
|
||||
ROTATE_180, //! ROTATE_0 plus rotate by 180 degrees about each of x, y, z
|
||||
ROTATE_180_WIGGLE, //! ROTATE_180 plus, as the PubChem code does, rotate by a
|
||||
//! small amount (~25 degrees) about each axis and use the
|
||||
//! highest scoring of those as the start point for that
|
||||
//! rotation angle
|
||||
ROTATE_45, //! ROTATE_180 plus rotate by 45 degrees about pairs of each of x,
|
||||
//! y, z
|
||||
ROTATE_0_FRAGMENT, //! No rotation, translate probe to each end of ref
|
||||
ROTATE_180_FRAGMENT, //! Translate probe to each end of ref and then
|
||||
//! ROTATE_180
|
||||
ROTATE_45_FRAGMENT, //! Translate probe to each end of ref and then ROTATE_90
|
||||
A_LA_PUBCHEM, //! Uses the eigenvalues of the principal vectors to decide
|
||||
//! whether to do ROTATE_180_WIGGLE or ROTATE_45
|
||||
};
|
||||
|
||||
enum class RDKIT_GAUSSIANSHAPE_EXPORT OptimMode {
|
||||
SHAPE_ONLY, //! Drive the optimisation by shape overlap only.
|
||||
SHAPE_PLUS_COLOR_SCORE, //! Drive the optimisation by shape, but include
|
||||
//! color in the score to determine the best
|
||||
//! solution. Color never used in the optimisation
|
||||
//! stage.
|
||||
SHAPE_PLUS_COLOR, //! Drive the optimisation by overlap of shape and color
|
||||
//! features.
|
||||
};
|
||||
|
||||
struct RDKIT_GAUSSIANSHAPE_EXPORT ShapeOverlayOptions {
|
||||
//! Different modes for starting the optimisation. Default is as used by the
|
||||
//! PubChem code. The molecules are normalized so the principal axes are
|
||||
//! along the cartesian axes rather than the shape quadrupole axes as Grant et
|
||||
//! al. did.
|
||||
StartMode startMode{StartMode::A_LA_PUBCHEM};
|
||||
OptimMode optimMode{
|
||||
OptimMode::SHAPE_PLUS_COLOR_SCORE}; //! Optimisation mode.
|
||||
double simAlpha{
|
||||
1.0}; //! When doing a Tversky similarity, the alpha value. If alpha and
|
||||
//! beta are both the default 1.0, it's a Tanimoto similarity. A
|
||||
//! high alpha and low beta emphasize the fit volume in the
|
||||
//! similarity and vice versa. Tversky is O / (A * (R - O) + B * (F
|
||||
//! - O) + O) where O is the overlap volume, R is the reference's
|
||||
//! volume and F is the fit's volume. This is different from that
|
||||
//! used by OpenEye (O / (A * R + B * F)).
|
||||
double simBeta{1.0}; //! When doing a Tversky similarity, the beta value.
|
||||
double optParam{0.5}; //! If using colors, the relative weights of shape and
|
||||
//! color scores.
|
||||
int nSteps{100}; //! Maximum number of steps for optimiser to take.
|
||||
|
||||
bool normalize{
|
||||
true}; //! Whether to normalise the shapes by putting them into their
|
||||
//! canonical conformations (centred at the origin, aligned along
|
||||
//! its principal axes) before starting.
|
||||
bool useDistCutoff{
|
||||
true}; //! Whether to use a distance cutoff for the volume calculation.
|
||||
double distCutoff{4.5}; //! The distance cutoff. If 2 atoms are more than
|
||||
//! this distance apart, they are not included in the
|
||||
//! volume calculation. A smaller value is faster but
|
||||
//! less precise.
|
||||
double shapeConvergenceCriterion{
|
||||
0.001}; //! Optimisation stops when the shape score changes by less
|
||||
//! than this amount. A larger number is faster but less
|
||||
//! precise.
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const StartMode &sm) {
|
||||
switch (sm) {
|
||||
case StartMode::ROTATE_0:
|
||||
os << "ROTATE_0";
|
||||
break;
|
||||
case StartMode::ROTATE_0_FRAGMENT:
|
||||
os << "ROTATE_0_FRAGMENT";
|
||||
break;
|
||||
case StartMode::ROTATE_180:
|
||||
os << "ROTATE_180";
|
||||
break;
|
||||
case StartMode::ROTATE_180_FRAGMENT:
|
||||
os << "ROTATE_180_FRAGMENT";
|
||||
break;
|
||||
case StartMode::ROTATE_180_WIGGLE:
|
||||
os << "ROTATE_180_WIGGLE";
|
||||
break;
|
||||
case StartMode::ROTATE_45:
|
||||
os << "ROTATE_45";
|
||||
break;
|
||||
case StartMode::ROTATE_45_FRAGMENT:
|
||||
os << "ROTATE_45_FRAGMENT";
|
||||
break;
|
||||
case StartMode::A_LA_PUBCHEM:
|
||||
os << "A_LA_PUBCHEM";
|
||||
break;
|
||||
}
|
||||
return os;
|
||||
}
|
||||
} // namespace GaussianShape
|
||||
} // namespace RDKit
|
||||
|
||||
#endif // RDKIT_SHAPEOVERLAYOPTIONS_GUARD
|
||||
638
Code/GraphMol/GaussianShape/SingleConformerAlignment.cpp
Normal file
638
Code/GraphMol/GaussianShape/SingleConformerAlignment.cpp
Normal file
@@ -0,0 +1,638 @@
|
||||
//
|
||||
// Copyright (C) 2026 David Cosgrove and other RDKit contributors
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
// The contents are covered by the terms of the BSD license
|
||||
// which is included in the file license.txt, found at the root
|
||||
// of the RDKit source tree.
|
||||
//
|
||||
// Original author: David Cosgrove (CozChemIx Limited)
|
||||
//
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cmath>
|
||||
#include <numbers>
|
||||
#include <numeric>
|
||||
|
||||
#include <Geometry/point.h>
|
||||
#include <Geometry/Transform3D.h>
|
||||
#include <GraphMol/GaussianShape/ShapeInput.h>
|
||||
#include <GraphMol/GaussianShape/ShapeOverlayOptions.h>
|
||||
#include <GraphMol/GaussianShape/SingleConformerAlignment.h>
|
||||
|
||||
constexpr int D = 4;
|
||||
|
||||
namespace RDKit {
|
||||
namespace GaussianShape {
|
||||
|
||||
SingleConformerAlignment::SingleConformerAlignment(
|
||||
const std::vector<double> &ref, const int *refTypes,
|
||||
const boost::dynamic_bitset<> *refCarbonRadii, int nRefShape, int nRefColor,
|
||||
double refShapeVol, double refColorVol, const std::vector<double> &fit,
|
||||
const int *fitTypes, const boost::dynamic_bitset<> *fitCarbonRadii,
|
||||
int nFitShape, int nFitColor, double fitShapeVol, double fitColorVol,
|
||||
const std::array<double, 7> &initQuatTrans, OptimMode optimMode,
|
||||
double simAlpha, double simBeta, double mixingParam, bool useCutoff,
|
||||
double distCutoff, double shapeConvergenceCriterion, unsigned int maxIts)
|
||||
: d_ref(ref),
|
||||
d_refTypes(refTypes),
|
||||
d_refCarbonRadii(refCarbonRadii),
|
||||
d_nRefShape(nRefShape),
|
||||
d_nRefColor(nRefColor),
|
||||
d_refShapeVol(refShapeVol),
|
||||
d_refColorVol(refColorVol),
|
||||
d_fit(fit),
|
||||
d_fitTypes(fitTypes),
|
||||
d_fitCarbonRadii(fitCarbonRadii),
|
||||
d_nFitShape(nFitShape),
|
||||
d_nFitColor(nFitColor),
|
||||
d_fitShapeVol(fitShapeVol),
|
||||
d_fitColorVol(fitColorVol),
|
||||
d_initQuatTrans(initQuatTrans),
|
||||
d_optimMode(optimMode),
|
||||
d_simAlpha(simAlpha),
|
||||
d_simBeta(simBeta),
|
||||
d_mixingParam(mixingParam),
|
||||
d_useCutoff(useCutoff),
|
||||
d_distCutoff2(distCutoff * distCutoff),
|
||||
d_shapeConvergenceCriterion(shapeConvergenceCriterion),
|
||||
d_maxIts(maxIts) {
|
||||
// Move the reference by initialTrans, leaving fit at the origin where
|
||||
// the rotations work properly. Apply the initial rotation to the fit.
|
||||
translateShape(d_ref, RDGeom::Point3D{d_initQuatTrans[4], d_initQuatTrans[5],
|
||||
d_initQuatTrans[6]});
|
||||
RDGeom::Transform3D xform;
|
||||
xform.SetRotationFromQuaternion(d_initQuatTrans.data());
|
||||
applyTransformToShape(d_fit, xform);
|
||||
d_refTemp.resize(d_ref.size());
|
||||
d_fitTemp.resize(d_fit.size());
|
||||
d_gradConverters.resize(12 * (d_nFitShape + d_nFitColor));
|
||||
}
|
||||
|
||||
void SingleConformerAlignment::getFinalQuatTrans(
|
||||
RDGeom::Transform3D &xform) const {
|
||||
RDGeom::Transform3D tmp;
|
||||
tmp.SetRotationFromQuaternion(d_quatTrans.data());
|
||||
tmp.SetTranslation(
|
||||
RDGeom::Point3D{d_quatTrans[4], d_quatTrans[5], d_quatTrans[6]});
|
||||
RDGeom::Transform3D reverseInitialTrans;
|
||||
reverseInitialTrans.SetTranslation(RDGeom::Point3D{
|
||||
-d_initQuatTrans[4], -d_initQuatTrans[5], -d_initQuatTrans[6]});
|
||||
RDGeom::Transform3D initialRot;
|
||||
initialRot.SetRotationFromQuaternion(d_initQuatTrans.data());
|
||||
auto tt = reverseInitialTrans * tmp * initialRot;
|
||||
xform = tt;
|
||||
}
|
||||
|
||||
std::array<double, 5> SingleConformerAlignment::calcScores(const double *ref,
|
||||
const double *fit,
|
||||
bool includeColor) {
|
||||
std::array<double, 5> scores{0.0, 0.0, 0.0, 0.0, 0.0};
|
||||
scores[3] = calcVolAndGrads(ref, d_nRefShape, d_refCarbonRadii, fit,
|
||||
d_nFitShape, d_fitCarbonRadii, d_gradConverters,
|
||||
d_useCutoff, d_distCutoff2, nullptr, nullptr);
|
||||
if (d_nRefColor && d_nFitColor &&
|
||||
(d_optimMode == OptimMode::SHAPE_PLUS_COLOR || includeColor)) {
|
||||
scores[4] = calcVolAndGrads(ref + d_nRefShape * D, d_nRefColor,
|
||||
d_refTypes + d_nRefShape, fit + d_nFitShape * D,
|
||||
d_nFitColor, d_fitTypes + d_nFitShape,
|
||||
d_nFitShape, d_gradConverters, d_useCutoff,
|
||||
d_distCutoff2, nullptr, nullptr);
|
||||
}
|
||||
scores = calcScores(scores[3], scores[4], includeColor);
|
||||
return scores;
|
||||
}
|
||||
|
||||
std::array<double, 5> SingleConformerAlignment::calcScores(bool includeColor) {
|
||||
applyQuatTrans(d_quatTrans);
|
||||
return calcScores(d_refTemp.data(), d_fitTemp.data(), includeColor);
|
||||
}
|
||||
|
||||
std::array<double, 5> SingleConformerAlignment::calcScores(
|
||||
const double shapeOvVol, const double colorOvVol, bool includeColor) const {
|
||||
std::array<double, 5> scores{0.0, 0.0, 0.0, 0.0, 0.0};
|
||||
scores[3] = shapeOvVol;
|
||||
scores[4] = colorOvVol;
|
||||
scores[1] =
|
||||
shapeOvVol / (d_simAlpha * (d_refShapeVol - shapeOvVol) +
|
||||
d_simBeta * (d_fitShapeVol - shapeOvVol) + shapeOvVol);
|
||||
if (d_nRefColor && d_nFitColor && d_refColorVol > 0.0 &&
|
||||
d_fitColorVol > 0.0 && includeColor) {
|
||||
scores[2] =
|
||||
colorOvVol / (d_simAlpha * (d_refColorVol - colorOvVol) +
|
||||
d_simBeta * (d_fitColorVol - colorOvVol) + colorOvVol);
|
||||
scores[0] = scores[1] * (1 - d_mixingParam) + scores[2] * d_mixingParam;
|
||||
} else {
|
||||
scores[0] = scores[1];
|
||||
}
|
||||
return scores;
|
||||
}
|
||||
|
||||
namespace {
|
||||
// Set of values to convert the cartesian gradients to quaternion gradients.
|
||||
// This uses the chain rule: the dV/qQ = (dV/dr) * (dr/dQ) where V is the
|
||||
// volume overlap and r is the Cartesian space. Assumes gradConverters
|
||||
// is already the correct size.
|
||||
void cartToQuatGrads(const double *quat, const double *mol, int numBPts,
|
||||
std::vector<double> &gradConverters, int gradConvOffset) {
|
||||
// for ease of ref
|
||||
auto q = quat[0];
|
||||
auto r = quat[1];
|
||||
auto s = quat[2];
|
||||
auto u = quat[3];
|
||||
auto coef = 1.0 / (q * q + r * r + s * s + u * u);
|
||||
for (int i = 0, j = gradConvOffset, k = 12 * gradConvOffset; i < 4 * numBPts;
|
||||
i += 4, ++j, k += 12) {
|
||||
auto x = mol[i];
|
||||
auto y = mol[i + 1];
|
||||
auto z = mol[i + 2];
|
||||
auto dx_dq = coef * 2.0 * (q * x + u * y - s * z);
|
||||
auto dx_dr = coef * 2.0 * (r * x + s * y + u * z);
|
||||
auto dy_dr = coef * 2.0 * (s * x - r * y + q * z);
|
||||
auto dx_du = coef * 2.0 * (-u * x + q * y + r * z);
|
||||
auto dz_ds = dx_dq;
|
||||
auto dy_du = -dx_dq;
|
||||
auto dy_ds = dx_dr;
|
||||
auto dz_du = dx_dr;
|
||||
auto dx_ds = -dy_dr;
|
||||
auto dz_dq = dy_dr;
|
||||
auto dy_dq = dx_du;
|
||||
auto dz_dr = -dx_du;
|
||||
gradConverters[k] = dx_dq;
|
||||
gradConverters[k + 1] = dy_dq;
|
||||
gradConverters[k + 2] = dz_dq;
|
||||
gradConverters[k + 3] = dx_dr;
|
||||
gradConverters[k + 4] = dy_dr;
|
||||
gradConverters[k + 5] = dz_dr;
|
||||
gradConverters[k + 6] = dx_ds;
|
||||
gradConverters[k + 7] = dy_ds;
|
||||
gradConverters[k + 8] = dz_ds;
|
||||
gradConverters[k + 9] = dx_du;
|
||||
gradConverters[k + 10] = dy_du;
|
||||
gradConverters[k + 11] = dz_du;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// atoms/shape features
|
||||
double calcVolAndGrads(const double *ref, int numRefPts,
|
||||
const boost::dynamic_bitset<> *refCarbonRadii,
|
||||
const double *fit, int numFitPts,
|
||||
const boost::dynamic_bitset<> *fitCarbonRadii,
|
||||
std::vector<double> &gradConverters, bool useCutoff,
|
||||
double distCutoff2, const double *quat,
|
||||
double *gradients) {
|
||||
if (gradients) {
|
||||
cartToQuatGrads(quat, fit, numFitPts, gradConverters, 0);
|
||||
}
|
||||
static const double CARBON_A = KAPPA / (1.7 * 1.7);
|
||||
static const double CARBON_BIT =
|
||||
8.0 * pow(std::numbers::pi / (2 * CARBON_A), 1.5);
|
||||
double vol = 0.0;
|
||||
double vij;
|
||||
// If either of the carbon radii flags aren't supplied, treat them
|
||||
// both as being all carbon. There isn't enough information to do
|
||||
// otherwise.
|
||||
bool allCarbon = !refCarbonRadii || !fitCarbonRadii;
|
||||
for (int i = 0, i_idx = 0; i < numRefPts * 4; i += 4, i_idx++) {
|
||||
const auto ai = ref[i + 3];
|
||||
for (int j = 0, j_idx = 0, k = 0; j < numFitPts * 4;
|
||||
j += 4, j_idx++, k += 12) {
|
||||
auto dx = ref[i] - fit[j];
|
||||
auto dy = ref[i + 1] - fit[j + 1];
|
||||
auto dz = ref[i + 2] - fit[j + 2];
|
||||
auto d2 = dx * dx + dy * dy + dz * dz;
|
||||
if (useCutoff && d2 > distCutoff2) {
|
||||
continue;
|
||||
}
|
||||
const auto aj = fit[j + 3];
|
||||
auto mult = -(ai * aj) / (ai + aj);
|
||||
auto kij = exp(mult * d2);
|
||||
if (allCarbon || (!allCarbon && (*refCarbonRadii)[i_idx] &&
|
||||
(*fitCarbonRadii)[j_idx])) {
|
||||
vij = kij * CARBON_BIT;
|
||||
} else {
|
||||
auto pi_ai_aj = std::numbers::pi / (ai + aj);
|
||||
vij = 8 * kij * pi_ai_aj * std::sqrt(pi_ai_aj);
|
||||
}
|
||||
vol += vij;
|
||||
if (gradients) {
|
||||
auto r = 2.0 * vij * mult;
|
||||
// Use the gradient converters to calculate the gradients in quaternion
|
||||
// space.
|
||||
// The zeroth gradient is never used, so don't waste time calculating
|
||||
// it but leave the code here for completeness and possible future use.
|
||||
// gradients[0] +=
|
||||
// r * (dx * gradConverters[k] + dy *
|
||||
// gradConverters[k + 1] +
|
||||
// dz * gradConverters[k + 2]);
|
||||
gradients[1] +=
|
||||
r * (dx * gradConverters[k + 3] + dy * gradConverters[k + 4] +
|
||||
dz * gradConverters[k + 5]);
|
||||
gradients[2] +=
|
||||
r * (dx * gradConverters[k + 6] + dy * gradConverters[k + 7] +
|
||||
dz * gradConverters[k + 8]);
|
||||
gradients[3] +=
|
||||
r * (dx * gradConverters[k + 9] + dy * gradConverters[k + 10] +
|
||||
dz * gradConverters[k + 11]);
|
||||
gradients[4] += r * dx;
|
||||
gradients[5] += r * dy;
|
||||
gradients[6] += r * dz;
|
||||
}
|
||||
}
|
||||
}
|
||||
return vol;
|
||||
}
|
||||
|
||||
// color features
|
||||
double calcVolAndGrads(const double *ref, int numRefPts, const int *refTypes,
|
||||
const double *fit, int numFitPts, const int *fitTypes,
|
||||
int numFitShape, std::vector<double> &gradConverters,
|
||||
const bool useCutoff, const double distCutoff2,
|
||||
const double *quat, double *gradients) {
|
||||
double vol = 0.0;
|
||||
if (gradients) {
|
||||
cartToQuatGrads(quat, fit, numFitPts, gradConverters, numFitShape);
|
||||
}
|
||||
|
||||
for (int i = 0, i_idx = 0; i < numRefPts * 4; i += 4, i_idx++) {
|
||||
const auto ai = ref[i + 3];
|
||||
const auto aType = refTypes[i_idx];
|
||||
for (int j = 0, j_idx = 0, k = 0; j < numFitPts * 4;
|
||||
j += 4, j_idx++, k += 12) {
|
||||
const auto bType = fitTypes[j_idx];
|
||||
if (aType != bType) {
|
||||
continue;
|
||||
}
|
||||
auto dx = ref[i] - fit[j];
|
||||
auto dy = ref[i + 1] - fit[j + 1];
|
||||
auto dz = ref[i + 2] - fit[j + 2];
|
||||
auto d2 = dx * dx + dy * dy + dz * dz;
|
||||
if (useCutoff && d2 > distCutoff2) {
|
||||
continue;
|
||||
}
|
||||
const auto aj = fit[j + 3];
|
||||
auto mult = -(ai * aj) / (ai + aj);
|
||||
auto kij = exp(mult * d2);
|
||||
|
||||
auto pi_ai_aj = std::numbers::pi / (ai + aj);
|
||||
auto vij = 8 * kij * pi_ai_aj * std::sqrt(pi_ai_aj);
|
||||
vol += vij;
|
||||
if (gradients) {
|
||||
auto r = 2.0 * vij * mult;
|
||||
// Use the converters to calculate the gradients in quaternion space.
|
||||
// The zeroth gradient is never used, so don't waste time calculating
|
||||
// it but leave the code here for completeness and possible future use.
|
||||
// gradients[0] +=
|
||||
// r * (dx * gradConverters[k + 0] + dy *
|
||||
// gradConverters[k + 1] +
|
||||
// dz * gradConverters[k + 2]);
|
||||
gradients[1] +=
|
||||
r * (dx * gradConverters[k + 3] + dy * gradConverters[k + 4] +
|
||||
dz * gradConverters[k + 5]);
|
||||
gradients[2] +=
|
||||
r * (dx * gradConverters[k + 6] + dy * gradConverters[k + 7] +
|
||||
dz * gradConverters[k + 8]);
|
||||
gradients[3] +=
|
||||
r * (dx * gradConverters[k + 9] + dy * gradConverters[k + 10] +
|
||||
dz * gradConverters[k + 11]);
|
||||
gradients[4] += r * dx;
|
||||
gradients[5] += r * dy;
|
||||
gradients[6] += r * dz;
|
||||
}
|
||||
}
|
||||
}
|
||||
return vol;
|
||||
}
|
||||
|
||||
void SingleConformerAlignment::applyQuatTrans(
|
||||
const std::array<double, 7> &quatTrans) {
|
||||
// Leave fit at the origin, and move ref to meet it.
|
||||
RDGeom::Point3D translateA{-quatTrans[4], -quatTrans[5], -quatTrans[6]};
|
||||
translateShape(d_ref.data(), d_refTemp.data(), d_nRefShape + d_nRefColor,
|
||||
translateA);
|
||||
// Rotate fit by quaternion
|
||||
// double tq[4]{quatTrans[0], quatTrans[1], quatTrans[2], quatTrans[3]};
|
||||
RDGeom::Transform3D transformB;
|
||||
transformB.SetRotationFromQuaternion(quatTrans.data());
|
||||
applyTransformToShape(d_fit.data(), d_fitTemp.data(),
|
||||
d_nFitShape + d_nFitColor, transformB);
|
||||
}
|
||||
|
||||
void SingleConformerAlignment::calcVolumeAndGradients(
|
||||
const std::array<double, 7> &quatTrans, double &shapeOvlpVol,
|
||||
double &colorOvlpVol, std::array<double, 7> &gradients) {
|
||||
// Set the coords up.
|
||||
applyQuatTrans(quatTrans);
|
||||
// We assume that d_refTemp was once initialised to d_ref and the same with
|
||||
// fit so that the radii are already there.
|
||||
gradients[0] = gradients[1] = gradients[2] = gradients[3] = gradients[4] =
|
||||
gradients[5] = gradients[6] = 0.0;
|
||||
shapeOvlpVol = calcVolAndGrads(
|
||||
d_refTemp.data(), d_nRefShape, d_refCarbonRadii, d_fitTemp.data(),
|
||||
d_nFitShape, d_fitCarbonRadii, d_gradConverters, d_useCutoff,
|
||||
d_distCutoff2, quatTrans.data(), gradients.data());
|
||||
if (d_optimMode == OptimMode::SHAPE_PLUS_COLOR) {
|
||||
std::array<double, 7> colorGrads{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
|
||||
colorOvlpVol = calcVolAndGrads(
|
||||
d_refTemp.data() + 4 * d_nRefShape, d_nRefColor,
|
||||
d_refTypes + d_nRefShape, d_fitTemp.data() + 4 * d_nFitShape,
|
||||
d_nFitColor, d_fitTypes + d_nFitShape, d_nFitShape, d_gradConverters,
|
||||
d_useCutoff, d_distCutoff2, quatTrans.data(), colorGrads.data());
|
||||
// The color gradients are normally dwarfed by the shape gradients, so
|
||||
// normalize them and then mix by the same rule as the final score.
|
||||
auto shapeSum = sqrt(std::accumulate(
|
||||
gradients.begin() + 1, gradients.end(), 0.0,
|
||||
[](const auto init, const auto g) -> double { return init + g * g; }));
|
||||
auto colorSum = sqrt(std::accumulate(
|
||||
colorGrads.begin() + 1, colorGrads.end(), 0.0,
|
||||
[](const auto init, const auto g) -> double { return init + g * g; }));
|
||||
auto ratio = shapeSum / colorSum;
|
||||
std::transform(
|
||||
gradients.begin() + 1, gradients.end(), colorGrads.begin(),
|
||||
gradients.begin() + 1, [&](const auto g1, const auto g2) -> double {
|
||||
return g1 * (1 - d_mixingParam) + g2 * ratio * d_mixingParam;
|
||||
});
|
||||
} else {
|
||||
colorOvlpVol = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
bool SingleConformerAlignment::doOverlay(std::array<double, 20> &scores,
|
||||
unsigned int cycle) {
|
||||
unsigned int maxIters = cycle == 0 ? 10 : d_maxIts - 10;
|
||||
auto res = optimise(maxIters);
|
||||
|
||||
// Get the final coords for fit into d_fitTemp, and compute the scores
|
||||
RDGeom::Transform3D xform;
|
||||
xform.SetRotationFromQuaternion(d_quatTrans.data());
|
||||
xform.SetTranslation(
|
||||
RDGeom::Point3D{d_quatTrans[4], d_quatTrans[5], d_quatTrans[6]});
|
||||
applyTransformToShape(d_fit.data(), d_fitTemp.data(),
|
||||
d_nFitShape + d_nFitColor, xform);
|
||||
|
||||
auto tscores = calcScores(d_ref.data(), d_fitTemp.data(), true);
|
||||
scores[0] = tscores[0];
|
||||
scores[1] = tscores[1];
|
||||
scores[2] = tscores[2];
|
||||
scores[3] = tscores[3];
|
||||
scores[4] = tscores[4];
|
||||
scores[5] = d_refShapeVol;
|
||||
scores[6] = d_refColorVol;
|
||||
scores[7] = d_fitShapeVol;
|
||||
scores[8] = d_fitColorVol;
|
||||
scores[9] = d_quatTrans[0];
|
||||
scores[10] = d_quatTrans[1];
|
||||
scores[11] = d_quatTrans[2];
|
||||
scores[12] = d_quatTrans[3];
|
||||
scores[13] = d_quatTrans[4];
|
||||
scores[14] = d_quatTrans[5];
|
||||
scores[15] = d_quatTrans[6];
|
||||
scores[16] = 0.0;
|
||||
scores[17] = 0.0;
|
||||
scores[18] = 0.0;
|
||||
scores[19] = 0.0;
|
||||
return res;
|
||||
}
|
||||
|
||||
namespace {
|
||||
double oneStep(double grad, double stepSize, double quatTrans, double oldGrad,
|
||||
double oldQuatTrans) {
|
||||
double step = 0.0;
|
||||
if (std::signbit(grad) != std::signbit(oldGrad)) {
|
||||
step = (((quatTrans * fabs(oldGrad)) + (oldQuatTrans * fabs(grad))) /
|
||||
(fabs(oldGrad) + fabs(grad) + fabs(grad))) -
|
||||
quatTrans;
|
||||
double newStep = stepSize * grad;
|
||||
if (fabs(step) > fabs(newStep)) {
|
||||
// This is definitely what the PubChem code says! I read it as keeping
|
||||
// the sign of step, but the value of newStep.
|
||||
step *= fabs(newStep / step);
|
||||
}
|
||||
} else {
|
||||
step = stepSize * grad;
|
||||
}
|
||||
return step;
|
||||
}
|
||||
|
||||
void calcStep(std::array<double, 7> &grad, double qStepSize, double tStepSize,
|
||||
std::array<double, 7> &oldGrad, std::array<double, 7> &quatTrans,
|
||||
std::array<double, 7> &oldQuatTrans, unsigned int iter,
|
||||
std::array<double, 7> &step) {
|
||||
step[0] = 0.0;
|
||||
if (iter == 0) {
|
||||
// 1st iteration, use default step sizes
|
||||
step[1] = qStepSize * grad[1];
|
||||
step[2] = qStepSize * grad[2];
|
||||
step[3] = qStepSize * grad[3];
|
||||
step[4] = tStepSize * grad[4];
|
||||
step[5] = tStepSize * grad[5];
|
||||
step[6] = tStepSize * grad[6];
|
||||
} else {
|
||||
step[1] =
|
||||
oneStep(grad[1], qStepSize, quatTrans[1], oldGrad[1], oldQuatTrans[1]);
|
||||
step[2] =
|
||||
oneStep(grad[2], qStepSize, quatTrans[2], oldGrad[2], oldQuatTrans[2]);
|
||||
step[3] =
|
||||
oneStep(grad[3], qStepSize, quatTrans[3], oldGrad[3], oldQuatTrans[3]);
|
||||
step[4] =
|
||||
oneStep(grad[4], tStepSize, quatTrans[4], oldGrad[4], oldQuatTrans[4]);
|
||||
step[5] =
|
||||
oneStep(grad[5], tStepSize, quatTrans[5], oldGrad[5], oldQuatTrans[5]);
|
||||
step[6] =
|
||||
oneStep(grad[6], tStepSize, quatTrans[6], oldGrad[6], oldQuatTrans[6]);
|
||||
}
|
||||
}
|
||||
|
||||
double constrainStep(double maxStep, double *step, bool checkSize) {
|
||||
double mStep = std::max({fabs(step[0]), fabs(step[1]), fabs(step[2])});
|
||||
if (mStep > maxStep) {
|
||||
double scaleFactor = maxStep / mStep;
|
||||
if (fabs(step[0] > maxStep)) {
|
||||
step[0] *= scaleFactor;
|
||||
}
|
||||
if (fabs(step[1] > maxStep)) {
|
||||
step[1] *= scaleFactor;
|
||||
}
|
||||
if (fabs(step[2] > maxStep)) {
|
||||
step[2] *= scaleFactor;
|
||||
}
|
||||
}
|
||||
if (checkSize) {
|
||||
double quatSquared =
|
||||
step[0] * step[0] + step[1] * step[1] + step[2] * step[2];
|
||||
if (quatSquared > 1.0) {
|
||||
double scaleFactor = 1.0 / (2.0 * quatSquared);
|
||||
step[0] *= scaleFactor;
|
||||
step[1] *= scaleFactor;
|
||||
step[2] *= scaleFactor;
|
||||
}
|
||||
}
|
||||
return mStep;
|
||||
}
|
||||
|
||||
std::array<double, 7> combineQuatTrans(const std::array<double, 7> &q1,
|
||||
const std::array<double, 7> &q2) {
|
||||
std::array<double, 7> res;
|
||||
// Multiply the quaternions, which are assumed to be normalised.
|
||||
res[0] = q1[0] * q2[0] - q1[1] * q2[1] - q1[2] * q2[2] - q1[3] * q2[3];
|
||||
res[1] = q1[0] * q2[1] + q1[1] * q2[0] + q1[2] * q2[3] - q1[3] * q2[2];
|
||||
res[2] = q1[0] * q2[2] - q1[1] * q2[3] + q1[2] * q2[0] + q1[3] * q2[1];
|
||||
res[3] = q1[0] * q2[3] + q1[1] * q2[2] - q1[2] * q2[1] + q1[3] * q2[0];
|
||||
|
||||
// Add the translations
|
||||
res[4] = q1[4] + q2[4];
|
||||
res[5] = q1[5] + q2[5];
|
||||
res[6] = q1[6] + q2[6];
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
double oneReduceStep(double grad, double oldGrad, double quatTrans,
|
||||
double oldQuatTrans, double stepSize, double step) {
|
||||
if (std::signbit(grad) != std::signbit(oldGrad)) {
|
||||
step = (((quatTrans * fabs(oldGrad)) + (oldQuatTrans * fabs(grad))) /
|
||||
(fabs(oldGrad + fabs(grad)))) -
|
||||
quatTrans;
|
||||
double newStep = stepSize * grad;
|
||||
if (fabs(step) > fabs(newStep)) {
|
||||
step *= fabs(newStep / step);
|
||||
}
|
||||
} else if (fabs(grad) <= 1.0) {
|
||||
step = stepSize * grad;
|
||||
} else if (fabs(grad) > fabs(oldGrad)) {
|
||||
// Going wrong way relative to other components?
|
||||
step += stepSize * grad;
|
||||
} else {
|
||||
double delta = grad * (step / (oldGrad - grad));
|
||||
if (fabs(delta) > fabs(step * 0.1) && fabs(delta) > 0.001) {
|
||||
delta *= 0.0005 / fabs(delta);
|
||||
}
|
||||
step += delta;
|
||||
}
|
||||
return step;
|
||||
}
|
||||
|
||||
void reduceStep(std::array<double, 7> &grad, std::array<double, 7> &oldGrad,
|
||||
std::array<double, 7> quatTrans,
|
||||
std::array<double, 7> &oldQuatTrans, unsigned int lineIter,
|
||||
std::array<double, 7> &step, double &qStepSize,
|
||||
double &tStepSize) {
|
||||
if (lineIter == 2) {
|
||||
qStepSize *= 0.1;
|
||||
tStepSize *= 0.1;
|
||||
step[1] = qStepSize * oldGrad[1];
|
||||
step[2] = qStepSize * oldGrad[2];
|
||||
step[3] = qStepSize * oldGrad[3];
|
||||
step[4] = tStepSize * oldGrad[4];
|
||||
step[5] = tStepSize * oldGrad[5];
|
||||
step[6] = tStepSize * oldGrad[6];
|
||||
qStepSize *= 5.0;
|
||||
tStepSize *= 5.0;
|
||||
} else {
|
||||
step[1] = oneReduceStep(grad[1], oldGrad[1], quatTrans[1], oldQuatTrans[1],
|
||||
qStepSize, step[1]);
|
||||
step[2] = oneReduceStep(grad[2], oldGrad[2], quatTrans[2], oldQuatTrans[2],
|
||||
qStepSize, step[2]);
|
||||
step[3] = oneReduceStep(grad[3], oldGrad[3], quatTrans[3], oldQuatTrans[3],
|
||||
qStepSize, step[3]);
|
||||
// The original PubChem code used qStepSize for all 6 of these updates, but
|
||||
// that would appear to be a cut-and-paste error.
|
||||
step[4] = oneReduceStep(grad[4], oldGrad[4], quatTrans[4], oldQuatTrans[4],
|
||||
tStepSize, step[4]);
|
||||
step[5] = oneReduceStep(grad[5], oldGrad[5], quatTrans[5], oldQuatTrans[5],
|
||||
tStepSize, step[5]);
|
||||
step[6] = oneReduceStep(grad[6], oldGrad[6], quatTrans[6], oldQuatTrans[6],
|
||||
tStepSize, step[6]);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// The optimisation follows closely the procedure used in the PubChem
|
||||
// code from
|
||||
// https://github.com/ncbi/pubchem-align3d/blob/main/shape_neighbor.cpp
|
||||
// Original Authors: Evan Bolton, Leonid Zaslavsky, Paul Thiessen
|
||||
bool SingleConformerAlignment::optimise(unsigned int maxIters) {
|
||||
const double maxQuaternionStep = 0.075; // Maximum step size for quaternion
|
||||
const double maxTranslationStep = 0.500; // Maximum step size for translation
|
||||
const double minQuaternionStep =
|
||||
0.0002; // Convergence criteria for quaternion
|
||||
const double minTranslationStep =
|
||||
0.0020; // Convergence criteria for translation
|
||||
|
||||
std::array<double, 7> grad;
|
||||
std::array<double, 7> oldGrad{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
|
||||
std::array<double, 7> oldQuatTrans;
|
||||
std::array<double, 7> step;
|
||||
double shapeOvlpVol, colorOvlpVol, comboScore = 0.0;
|
||||
bool finished = false;
|
||||
for (unsigned iter = 0; iter < maxIters; iter++) {
|
||||
calcVolumeAndGradients(d_quatTrans, shapeOvlpVol, colorOvlpVol, grad);
|
||||
|
||||
// Note that the combo score will have a zero color score so will be half
|
||||
// the shape score unless we're optimising with color gradients.
|
||||
auto scores = calcScores(shapeOvlpVol, colorOvlpVol);
|
||||
comboScore = scores[0];
|
||||
calcStep(grad, d_qStepSize, d_tStepSize, oldGrad, d_quatTrans, oldQuatTrans,
|
||||
iter, step);
|
||||
|
||||
// In case we have to backtrack
|
||||
double oldComboScore = comboScore;
|
||||
oldQuatTrans = d_quatTrans;
|
||||
oldGrad = grad;
|
||||
|
||||
// What the PubChem code calls "Line search (sort of) loop"
|
||||
bool converged = false;
|
||||
for (unsigned int lineIter = 0; !converged; lineIter++) {
|
||||
// Check that the absolute max step size does not go beyond some
|
||||
// reasonable size
|
||||
double mqStep = constrainStep(maxQuaternionStep, step.data() + 1, true);
|
||||
double mtStep = constrainStep(maxTranslationStep, step.data() + 4, false);
|
||||
if (mqStep <= minQuaternionStep && mtStep <= minTranslationStep) {
|
||||
converged = true;
|
||||
comboScore = 0.0; // Make sure we return to the old one.
|
||||
break;
|
||||
}
|
||||
// Calculate the 0th component of the quaternion. Obviously it
|
||||
// relies on the other 3 components being small
|
||||
double quatSquared =
|
||||
step[1] * step[1] + step[2] * step[2] + step[3] * step[3];
|
||||
step[0] = sqrt(1.0 - quatSquared);
|
||||
// Update the quaternion with the step, multiplying them.
|
||||
auto newQuatTrans = combineQuatTrans(d_quatTrans, step);
|
||||
calcVolumeAndGradients(newQuatTrans, shapeOvlpVol, colorOvlpVol, grad);
|
||||
auto scores = calcScores(shapeOvlpVol, colorOvlpVol);
|
||||
comboScore = scores[0];
|
||||
// if we made a good step, keep the quaternion and we're done
|
||||
if (comboScore > oldComboScore) {
|
||||
d_quatTrans = newQuatTrans;
|
||||
break;
|
||||
}
|
||||
if (lineIter > 2) {
|
||||
converged = true;
|
||||
d_quatTrans = newQuatTrans;
|
||||
break;
|
||||
}
|
||||
// It got worse, so reduce the step.
|
||||
reduceStep(grad, oldGrad, newQuatTrans, oldQuatTrans, lineIter, step,
|
||||
d_qStepSize, d_tStepSize);
|
||||
d_quatTrans = oldQuatTrans;
|
||||
} // End of line search
|
||||
// Did it converge?
|
||||
if (converged ||
|
||||
d_shapeConvergenceCriterion > (comboScore - oldComboScore)) {
|
||||
if (oldComboScore > comboScore) {
|
||||
// The previous step was better, so keep it.
|
||||
comboScore = oldComboScore;
|
||||
d_quatTrans = oldQuatTrans;
|
||||
}
|
||||
finished = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return finished;
|
||||
}
|
||||
} // namespace GaussianShape
|
||||
} // namespace RDKit
|
||||
204
Code/GraphMol/GaussianShape/SingleConformerAlignment.h
Normal file
204
Code/GraphMol/GaussianShape/SingleConformerAlignment.h
Normal file
@@ -0,0 +1,204 @@
|
||||
//
|
||||
// Copyright (C) 2026 David Cosgrove and other RDKit contributors
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
// The contents are covered by the terms of the BSD license
|
||||
// which is included in the file license.txt, found at the root
|
||||
// of the RDKit source tree.
|
||||
//
|
||||
// Original author: David Cosgrove (CozChemIx Limited)
|
||||
//
|
||||
// This is the class that does optimises a moving molecule (fit)
|
||||
// to maximise its Gaussian overlap with the reference molecule (ref).
|
||||
// The optimiser is a modified BFGS taken in large part, but re-arranged
|
||||
// for readability, from the PubChem shape overlay code
|
||||
// https://github.com/ncbi/pubchem-align3d/blob/main/shape_neighbor.cpp
|
||||
|
||||
#ifndef RDKIT_SINGLECONFORMERALIGNMENT_GUARD
|
||||
#define RDKIT_SINGLECONFORMERALIGNMENT_GUARD
|
||||
|
||||
#include <array>
|
||||
|
||||
#include <RDGeneral/BoostStartInclude.h>
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
#include <RDGeneral/BoostEndInclude.h>
|
||||
|
||||
#include <RDGeneral/export.h>
|
||||
#include <GraphMol/GaussianShape/ShapeOverlayOptions.h>
|
||||
|
||||
namespace RDKit {
|
||||
namespace GaussianShape {
|
||||
struct RDKIT_GAUSSIANSHAPE_EXPORT SingleConformerAlignment {
|
||||
SingleConformerAlignment() = delete;
|
||||
/// @brief Do the overlay for a single conformer of fit against a single
|
||||
/// conformer of ref. The output in scores is the rotation and translation
|
||||
/// that moves fit to optimise its score with ref.
|
||||
/// @param ref - the query molecule as 1D array of 4 * N entries. Each
|
||||
/// block of 4 is the coords and atom radius
|
||||
/// @param refTypes - the feature types for molecule ref
|
||||
/// @param refCarbonRadii - whether each atom has a carbon radius
|
||||
/// @param nRefShape - the number of atoms in ref
|
||||
/// @param nRefColor - the number of features in ref
|
||||
/// @param refShapeVol - overlap volume of ref with itself
|
||||
/// @param refColorVol - color overlap of ref with itself
|
||||
/// @param fit - the fit molecule as 1D array of 4 * N entries. Each
|
||||
/// block of 4 is the coords and atom radius.
|
||||
/// @param fitTypes - the feature types for fit molecule
|
||||
/// @param fitCarbonRadii - whether each atom has a carbon radius
|
||||
/// @param nFitShape - the number of atoms in fit
|
||||
/// @param nFitColor - the number of features in fit
|
||||
/// @param fitShapeVol - overlap volume of fit with itself
|
||||
/// @param fitColorVol - color overlap of fit with itself
|
||||
/// @param optimMode - optimisation mode
|
||||
/// @param simAlpha - for the Tversky similarity, the alpha value
|
||||
/// @param simBeta - for the Tversky similarity, the beta value
|
||||
/// @param mixingParam - how to mix the 2 Tversky values
|
||||
/// @param useCutoff - whether to use a distance cutoff in the volume
|
||||
/// calculation
|
||||
/// @param distCutoff - the cutoff to use if we're doing it.
|
||||
/// @param maxIts - maximum number of iterations for optimiser
|
||||
/// of optimiser
|
||||
SingleConformerAlignment(
|
||||
const std::vector<double> &ref, const int *refTypes,
|
||||
const boost::dynamic_bitset<> *refCarbonRadii, int nRefShape,
|
||||
int nRefColor, double refShapeVol, double refColorVol,
|
||||
const std::vector<double> &fit, const int *fitTypes,
|
||||
const boost::dynamic_bitset<> *fitCarbonRadii, int nFitShape,
|
||||
int nFitColor, double fitShapeVol, double fitColorVol,
|
||||
const std::array<double, 7> &initQuatTrans, OptimMode optimMode,
|
||||
double simAlpha, double simBeta, double mixingParam, bool useCutoff,
|
||||
double distCutoff, double shapeConvergenceCriterion, unsigned int maxIts);
|
||||
|
||||
SingleConformerAlignment(const SingleConformerAlignment &other) = delete;
|
||||
SingleConformerAlignment(SingleConformerAlignment &&other) = delete;
|
||||
SingleConformerAlignment &operator=(const SingleConformerAlignment &other) =
|
||||
delete;
|
||||
SingleConformerAlignment &operator=(SingleConformerAlignment &&other) =
|
||||
delete;
|
||||
~SingleConformerAlignment() = default;
|
||||
|
||||
void setQuatTrans(const std::array<double, 7> &quatTrans) {
|
||||
d_quatTrans = quatTrans;
|
||||
}
|
||||
|
||||
// Get the final transformation by adding the initial transformation
|
||||
// and the optimised final answer.
|
||||
void getFinalQuatTrans(RDGeom::Transform3D &xform) const;
|
||||
|
||||
// Calculate the combined, shape, and color Tversky scores as appropriate,
|
||||
// plus the volume of the shape and color overlaps, in that order.
|
||||
// Assumes that ref and fit are already in the correct configurations.
|
||||
// If includeColor is passed in true, it will compute the color score
|
||||
// irrespective of the value in d_optimMode. We still want the color
|
||||
// score even if doing SHAPE_ONLY optimisation, for example.
|
||||
std::array<double, 5> calcScores(const double *ref, const double *fit,
|
||||
bool includeColor = false);
|
||||
// This one applies the current quatTrans to the coords and then calculates
|
||||
// the score.
|
||||
std::array<double, 5> calcScores(bool includeColor = false);
|
||||
// This one computes the scores from the given overlap volumes. Color score
|
||||
// only calculated if the color volumes are non-zero.
|
||||
std::array<double, 5> calcScores(const double shapeOvVol,
|
||||
const double colorOvVol,
|
||||
bool includeColor = true) const;
|
||||
|
||||
// Apply the quatTrans to the ref and fit shapes and put the results
|
||||
// into their tmp equivalents. Ref is translated by the -ve of the
|
||||
// translation, fit is rotated by the rotation bit.
|
||||
void applyQuatTrans(const std::array<double, 7> &quatTrans);
|
||||
|
||||
// Calculate the overlap volume between A and B after the given "quaternion"
|
||||
// has been applied. The "quaternion" is 7 elements, the first 4 the
|
||||
// quaternion the last 3 the translation that currently form the
|
||||
// transformation that overlays B onto A.
|
||||
void calcVolumeAndGradients(const std::array<double, 7> &quatTrans,
|
||||
double &shapeOvlpVol, double &colorOvlpVol,
|
||||
std::array<double, 7> &gradients);
|
||||
|
||||
/// @brief Do the overlay, feeding the results into scores.
|
||||
/// @return scores - the output scores and transformation to reproduce the
|
||||
/// overlay - an array of size 20. Only the first 16 are used here. They are:
|
||||
/// 0 - the combo score
|
||||
/// 1 - the shape Tversky score
|
||||
/// 2 - the color Tversky score
|
||||
/// 3 - the shape overlap volume
|
||||
/// 4 - the color overlap volume
|
||||
/// 5 - the shape volume of fit
|
||||
/// 6 - the shape volume of ref
|
||||
/// 7 - the color volume of fit
|
||||
/// 8 - the color volume of ref
|
||||
/// 9-12 - the quaternion to rotate fit onto ref. Applied first.
|
||||
/// 13-15 - the translation to move fit onto ref. Applied second.
|
||||
/// 16-19 - not used at present, returned as zeros.
|
||||
/// Returns false if it didn't finish with the allowed maximum number of
|
||||
/// iterations.
|
||||
bool doOverlay(std::array<double, 20> &scores, unsigned int cycle);
|
||||
|
||||
// Find the quaternion and translation that maximises the volume
|
||||
// overlap appropriate to d_optimMode. Returns false if it didn't finish with
|
||||
// the allowed maximum number of iterations.
|
||||
bool optimise(unsigned int maxIters);
|
||||
|
||||
std::vector<double> d_ref;
|
||||
std::vector<double> d_refTemp;
|
||||
const int *d_refTypes;
|
||||
const boost::dynamic_bitset<> *d_refCarbonRadii;
|
||||
const int d_nRefShape;
|
||||
const int d_nRefColor;
|
||||
const double d_refShapeVol;
|
||||
const double d_refColorVol;
|
||||
std::vector<double> d_fit;
|
||||
std::vector<double> d_fitTemp;
|
||||
const int *d_fitTypes;
|
||||
const boost::dynamic_bitset<> *d_fitCarbonRadii;
|
||||
const int d_nFitShape;
|
||||
const int d_nFitColor;
|
||||
double d_fitShapeVol;
|
||||
double d_fitColorVol;
|
||||
std::array<double, 7> d_initQuatTrans;
|
||||
const OptimMode d_optimMode;
|
||||
const double d_simAlpha;
|
||||
const double d_simBeta;
|
||||
const double d_mixingParam;
|
||||
const bool d_useCutoff;
|
||||
const double d_distCutoff2;
|
||||
const double d_shapeConvergenceCriterion;
|
||||
const unsigned int d_maxIts;
|
||||
// The quaternion/translation as the optimisation proceeds
|
||||
std::array<double, 7> d_quatTrans{1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
|
||||
// The step sizes of the quaternion and translation during the
|
||||
// optimisation. Taken from the PubChem code.
|
||||
double d_qStepSize{-0.001};
|
||||
double d_tStepSize{-0.01};
|
||||
// Scratch space for the gradients dr/dQ of the fit molecule.
|
||||
std::vector<double> d_gradConverters;
|
||||
};
|
||||
|
||||
// Compute the volume overlap and optionally "quaternion" gradients for the
|
||||
// overlap volume of ref and fit, wrt fit. fit is the original coords of
|
||||
// the fit molecule, fitTemp is those subject to any transformation applied
|
||||
// by the quaternion we're using to optimise the overlap volume. If
|
||||
// gradients is null, they won't be calculated. They are assumed to be
|
||||
// initialised correctly.
|
||||
// This is for the atoms/shape features.
|
||||
double calcVolAndGrads(const double *ref, int numRefPts,
|
||||
const boost::dynamic_bitset<> *refCarbonRadii,
|
||||
const double *fit, int numFitPts,
|
||||
const boost::dynamic_bitset<> *fitCarbonRadii,
|
||||
std::vector<double> &gradConverters,
|
||||
const bool useCutoff, const double distCutoff2,
|
||||
const double *quat = nullptr,
|
||||
double *gradients = nullptr);
|
||||
// This one is for the features, and only calculates values if the types
|
||||
// of 2 features match.
|
||||
double calcVolAndGrads(const double *ref, int numRefPts, const int *refTypes,
|
||||
const double *fit, int numFitPts, const int *fitTypes,
|
||||
int numFitShape, std::vector<double> &gradConverters,
|
||||
const bool useCutoff, const double distCutoff2,
|
||||
const double *quat, double *gradients);
|
||||
|
||||
} // namespace GaussianShape
|
||||
} // namespace RDKit
|
||||
|
||||
#endif // RDKIT_SINGLECONFORMERALIGNMENT_GUARD
|
||||
6
Code/GraphMol/GaussianShape/Wrap/CMakeLists.txt
Normal file
6
Code/GraphMol/GaussianShape/Wrap/CMakeLists.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
rdkit_python_extension(rdGaussianShape
|
||||
rdGaussianShape.cpp
|
||||
DEST Chem
|
||||
LINK_LIBRARIES GaussianShape)
|
||||
add_pytest(pyShapeAlign
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_rdgaussian_shape.py)
|
||||
513
Code/GraphMol/GaussianShape/Wrap/rdGaussianShape.cpp
Normal file
513
Code/GraphMol/GaussianShape/Wrap/rdGaussianShape.cpp
Normal file
@@ -0,0 +1,513 @@
|
||||
//
|
||||
// Copyright (C) 2026 David Cosgrove and other RDKit contributors
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
// The contents are covered by the terms of the BSD license
|
||||
// which is included in the file license.txt, found at the root
|
||||
// of the RDKit source tree.
|
||||
//
|
||||
// Original author: David Cosgrove (CozChemIx Limited)
|
||||
//
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <boost/python.hpp>
|
||||
|
||||
#include <Geometry/point.h>
|
||||
#include <GraphMol/ROMol.h>
|
||||
#include <GraphMol/GaussianShape/GaussianShape.h>
|
||||
#include <GraphMol/GaussianShape/ShapeInput.h>
|
||||
#include <GraphMol/GaussianShape/ShapeOverlayOptions.h>
|
||||
#include <RDBoost/Wrap.h>
|
||||
|
||||
namespace python = boost::python;
|
||||
|
||||
namespace RDKit {
|
||||
|
||||
namespace helpers {
|
||||
void set_customFeatures(GaussianShape::ShapeInputOptions &shp,
|
||||
const python::object &s) {
|
||||
shp.customFeatures.clear();
|
||||
auto len = python::len(s);
|
||||
shp.customFeatures.reserve(len);
|
||||
for (auto i = 0u; i < len; ++i) {
|
||||
const auto elem = s[i];
|
||||
unsigned int featType = python::extract<unsigned int>(elem[0]);
|
||||
RDGeom::Point3D pos = python::extract<RDGeom::Point3D>(elem[1]);
|
||||
double radius = python::extract<double>(elem[2]);
|
||||
shp.customFeatures.emplace_back(featType, pos, radius);
|
||||
}
|
||||
}
|
||||
python::tuple get_customFeatures(const GaussianShape::ShapeInputOptions &shp) {
|
||||
python::list py_list;
|
||||
for (const auto &val : shp.customFeatures) {
|
||||
python::list elem;
|
||||
elem.append(static_cast<int>(std::get<0>(val)));
|
||||
elem.append(std::get<1>(val));
|
||||
elem.append(std::get<2>(val));
|
||||
py_list.append(python::tuple(elem));
|
||||
}
|
||||
return python::tuple(py_list);
|
||||
}
|
||||
|
||||
python::tuple alignMol1(const ROMol &ref, ROMol &fit,
|
||||
const python::object &py_refOpts,
|
||||
const python::object &py_fitOpts,
|
||||
const python::object &py_overlayOpts, int refConfId,
|
||||
int fitConfId) {
|
||||
GaussianShape::ShapeInputOptions refOpts, fitOpts;
|
||||
if (!py_refOpts.is_none()) {
|
||||
refOpts = python::extract<GaussianShape::ShapeInputOptions>(py_refOpts);
|
||||
}
|
||||
if (!py_fitOpts.is_none()) {
|
||||
fitOpts = python::extract<GaussianShape::ShapeInputOptions>(py_fitOpts);
|
||||
}
|
||||
GaussianShape::ShapeOverlayOptions overlayOpts;
|
||||
if (!py_overlayOpts.is_none()) {
|
||||
overlayOpts =
|
||||
python::extract<GaussianShape::ShapeOverlayOptions>(py_overlayOpts);
|
||||
}
|
||||
auto results = GaussianShape::AlignMolecule(
|
||||
ref, fit, refOpts, fitOpts, nullptr, overlayOpts, refConfId, fitConfId);
|
||||
return python::make_tuple(results[0], results[1], results[2]);
|
||||
}
|
||||
|
||||
python::tuple alignMol2(const GaussianShape::ShapeInput &refShape, ROMol &fit,
|
||||
const python::object &py_fitOpts,
|
||||
const python::object &py_overlayOpts, int fitConfId) {
|
||||
GaussianShape::ShapeInputOptions fitOpts;
|
||||
if (!py_fitOpts.is_none()) {
|
||||
fitOpts = python::extract<GaussianShape::ShapeInputOptions>(py_fitOpts);
|
||||
}
|
||||
GaussianShape::ShapeOverlayOptions overlayOpts;
|
||||
if (!py_overlayOpts.is_none()) {
|
||||
overlayOpts =
|
||||
python::extract<GaussianShape::ShapeOverlayOptions>(py_overlayOpts);
|
||||
}
|
||||
auto results = GaussianShape::AlignMolecule(refShape, fit, fitOpts, nullptr,
|
||||
overlayOpts, fitConfId);
|
||||
return python::make_tuple(results[0], results[1], results[2]);
|
||||
}
|
||||
|
||||
python::tuple alignShapes(const GaussianShape::ShapeInput &refShape,
|
||||
GaussianShape::ShapeInput &fitShape,
|
||||
const python::object &py_overlayOpts) {
|
||||
GaussianShape::ShapeOverlayOptions overlayOpts;
|
||||
if (!py_overlayOpts.is_none()) {
|
||||
overlayOpts =
|
||||
python::extract<GaussianShape::ShapeOverlayOptions>(py_overlayOpts);
|
||||
}
|
||||
RDGeom::Transform3D xform;
|
||||
auto results =
|
||||
GaussianShape::AlignShape(refShape, fitShape, &xform, overlayOpts);
|
||||
python::list pyMatrix;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
pyMatrix.append(xform.getValUnchecked(i, j));
|
||||
}
|
||||
}
|
||||
return python::make_tuple(results[0], results[1], results[2], pyMatrix);
|
||||
}
|
||||
|
||||
python::tuple scoreMol1(const ROMol &ref, const ROMol &fit,
|
||||
const python::object &py_refOpts,
|
||||
const python::object &py_fitOpts,
|
||||
const python::object &py_overlayOpts, int refConfId,
|
||||
int fitConfId) {
|
||||
GaussianShape::ShapeInputOptions refOpts, fitOpts;
|
||||
if (!py_refOpts.is_none()) {
|
||||
refOpts = python::extract<GaussianShape::ShapeInputOptions>(py_refOpts);
|
||||
}
|
||||
if (!py_fitOpts.is_none()) {
|
||||
fitOpts = python::extract<GaussianShape::ShapeInputOptions>(py_fitOpts);
|
||||
}
|
||||
GaussianShape::ShapeOverlayOptions overlayOpts;
|
||||
if (!py_overlayOpts.is_none()) {
|
||||
overlayOpts =
|
||||
python::extract<GaussianShape::ShapeOverlayOptions>(py_overlayOpts);
|
||||
}
|
||||
auto results = GaussianShape::ScoreMolecule(
|
||||
ref, fit, refOpts, fitOpts, overlayOpts, refConfId, fitConfId);
|
||||
return python::make_tuple(results[0], results[1], results[2]);
|
||||
}
|
||||
|
||||
python::tuple scoreMol2(const GaussianShape::ShapeInput &refShape,
|
||||
const ROMol &fit, const python::object &py_fitOpts,
|
||||
const python::object &py_overlayOpts, int fitConfId) {
|
||||
GaussianShape::ShapeInputOptions fitOpts;
|
||||
if (!py_fitOpts.is_none()) {
|
||||
fitOpts = python::extract<GaussianShape::ShapeInputOptions>(py_fitOpts);
|
||||
}
|
||||
GaussianShape::ShapeOverlayOptions overlayOpts;
|
||||
if (!py_overlayOpts.is_none()) {
|
||||
overlayOpts =
|
||||
python::extract<GaussianShape::ShapeOverlayOptions>(py_overlayOpts);
|
||||
}
|
||||
auto results = GaussianShape::ScoreMolecule(refShape, fit, fitOpts,
|
||||
overlayOpts, fitConfId);
|
||||
return python::make_tuple(results[0], results[1], results[2]);
|
||||
}
|
||||
|
||||
python::tuple scoreShape(const GaussianShape::ShapeInput &refShape,
|
||||
const GaussianShape::ShapeInput &fitShape,
|
||||
const python::object &py_overlayOpts) {
|
||||
GaussianShape::ShapeOverlayOptions overlayOpts;
|
||||
if (!py_overlayOpts.is_none()) {
|
||||
overlayOpts =
|
||||
python::extract<GaussianShape::ShapeOverlayOptions>(py_overlayOpts);
|
||||
}
|
||||
auto results = GaussianShape::ScoreShape(refShape, fitShape, overlayOpts);
|
||||
return python::make_tuple(results[0], results[1], results[2]);
|
||||
}
|
||||
|
||||
void set_atomSubset(GaussianShape::ShapeInputOptions &opts,
|
||||
const python::object &as) {
|
||||
pythonObjectToVect<unsigned int>(as, opts.atomSubset);
|
||||
}
|
||||
|
||||
python::tuple get_atomSubset(const GaussianShape::ShapeInputOptions &opts) {
|
||||
python::list py_list;
|
||||
for (const auto &val : opts.atomSubset) {
|
||||
py_list.append(val);
|
||||
}
|
||||
return python::tuple(py_list);
|
||||
}
|
||||
|
||||
void set_atomRadii(GaussianShape::ShapeInputOptions &opts,
|
||||
const python::object &ar) {
|
||||
int len = python::len(ar);
|
||||
opts.atomRadii.resize(len);
|
||||
for (int i = 0; i < len; i++) {
|
||||
unsigned int atomIdx = python::extract<unsigned int>(ar[i][0]);
|
||||
double radius = python::extract<double>(ar[i][1]);
|
||||
opts.atomRadii[i] = std::make_pair(atomIdx, radius);
|
||||
}
|
||||
}
|
||||
|
||||
python::tuple get_atomRadii(const GaussianShape::ShapeInputOptions &opts) {
|
||||
python::list py_list;
|
||||
for (const auto &val : opts.atomRadii) {
|
||||
py_list.append(python::make_tuple(static_cast<int>(val.first), val.second));
|
||||
}
|
||||
return python::tuple(py_list);
|
||||
}
|
||||
|
||||
} // namespace helpers
|
||||
|
||||
void wrap_rdGaussianShape() {
|
||||
python::scope().attr("__doc__") =
|
||||
"Module containing implementation of Gaussian-based shape overlay and"
|
||||
" scoring."
|
||||
"NOTE: This functionality is experimental and the API"
|
||||
" and/or results may change in future releases.";
|
||||
|
||||
python::enum_<RDKit::GaussianShape::StartMode>("StartMode")
|
||||
.value("ROTATE_0", RDKit::GaussianShape::StartMode::ROTATE_0)
|
||||
.value("ROTATE_180", RDKit::GaussianShape::StartMode::ROTATE_180)
|
||||
.value("ROTATE_180_WIGGLE",
|
||||
RDKit::GaussianShape::StartMode::ROTATE_180_WIGGLE)
|
||||
.value("ROTATE_45", RDKit::GaussianShape::StartMode::ROTATE_45)
|
||||
.value("ROTATE_0_FRAGMENT",
|
||||
RDKit::GaussianShape::StartMode::ROTATE_0_FRAGMENT)
|
||||
.value("ROTATE_180_FRAGMENT",
|
||||
RDKit::GaussianShape::StartMode::ROTATE_180_FRAGMENT)
|
||||
.value("ROTATE_45_FRAGMENT",
|
||||
RDKit::GaussianShape::StartMode::ROTATE_45_FRAGMENT)
|
||||
.value("A_LA_PUBCHEM", GaussianShape::StartMode::A_LA_PUBCHEM)
|
||||
.export_values();
|
||||
|
||||
python::enum_<RDKit::GaussianShape::OptimMode>("OptimMode")
|
||||
.value("SHAPE_ONLY", RDKit::GaussianShape::OptimMode::SHAPE_ONLY)
|
||||
.value("SHAPE_PLUS_COLOR_SCORE",
|
||||
RDKit::GaussianShape::OptimMode::SHAPE_PLUS_COLOR_SCORE)
|
||||
.value("SHAPE_PLUS_COLOR",
|
||||
RDKit::GaussianShape::OptimMode::SHAPE_PLUS_COLOR)
|
||||
.export_values();
|
||||
|
||||
python::class_<GaussianShape::ShapeInputOptions, boost::noncopyable>(
|
||||
"ShapeInputOptions",
|
||||
"ShapeInputOptions - options for setting up ShapeInput objects.")
|
||||
.def_readwrite("useColors", &GaussianShape::ShapeInputOptions::useColors,
|
||||
"Whether to use color features in overlay. Default=True.")
|
||||
.def_readwrite(
|
||||
"allCarbonRadii", &GaussianShape::ShapeInputOptions::allCarbonRadii,
|
||||
"Whether to use the same radius, appropriate for Carbon, for all atoms. There is a"
|
||||
" slight accuracy penalty but significant speed gain if used. Default=True.")
|
||||
.add_property(
|
||||
"atomSubset", &helpers::get_atomSubset, &helpers::set_atomSubset,
|
||||
"If not empty, use just these atoms in the molecule to form the ShapeInput object.")
|
||||
.add_property(
|
||||
"customFeatures", &helpers::get_customFeatures,
|
||||
&helpers::set_customFeatures,
|
||||
"Custom features for the shape. Requires a list of tuples of"
|
||||
" int (the feature type), Point3D (the coordinates) and float (the radius).")
|
||||
.add_property(
|
||||
"atomRadii", &helpers::get_atomRadii, &helpers::set_atomRadii,
|
||||
"Non-standard radii to use for the atoms specified by their indices"
|
||||
" in the molecule. Not all atoms need have a radius specified."
|
||||
" A list of tuples of [int, float].")
|
||||
.def("__setattr__", &safeSetattr);
|
||||
|
||||
python::class_<GaussianShape::ShapeOverlayOptions, boost::noncopyable>(
|
||||
"ShapeOverlayOptions",
|
||||
"ShapeOverlayOptions - options for controlling the shape overlay process.")
|
||||
.def_readwrite(
|
||||
"startMode", &RDKit::GaussianShape::ShapeOverlayOptions::startMode,
|
||||
"Start modes for optimisation. Default is A_LA_PUBCHEM - as used by the"
|
||||
" PubChem code - either ROTATE_180_WIGGLE or ROTATE_45 depending on the shape"
|
||||
" of the two molecules. ROTATE_180_WIGGLE means 180 rotations about"
|
||||
" the x, y and z axes, then a small"
|
||||
" rotation about each axis from that point, using the best scoring one of"
|
||||
" those. ROTATE_180 uses 180 degree rotations for 4 start points,"
|
||||
" ROTATE_45 uses 45 degree rotations for 9 start points and ROTATE_0"
|
||||
" leaves the relative orientations of the 2 molecules as passed in before"
|
||||
" optimisation. There are also ROTATE_0_FRAGMENT, ROTATE_45_FRAGMENT"
|
||||
" and ROTATE_180_FRAGMENT that as well as the above move the fit"
|
||||
" molecule to the ends of each of the principal axes and then does"
|
||||
" the appropriate rotations. This is useful when the fit molecule is"
|
||||
" a lot smaller than the reference molecule, but requires a large number"
|
||||
" of optimisations so is relatively slow.")
|
||||
.def_readwrite(
|
||||
"optimMode", &GaussianShape::ShapeOverlayOptions::optimMode,
|
||||
"Optimisation mode, controlling what parameters are used"
|
||||
" to drive the overlay. Default=SHAPE_PLUS_COLOR_SCORE which"
|
||||
" optimises using just the overlap of shape, but uses the"
|
||||
" color to decide which is the best overlay. Other options"
|
||||
" are SHAPE_ONLY and SHAPE_AND_COLOR with the latter using"
|
||||
" the overlap of color features as well. ")
|
||||
.def_readwrite(
|
||||
"simAlpha", &GaussianShape::ShapeOverlayOptions::simAlpha,
|
||||
"When doing a Tversky similarity, the alpha value. If alpha and"
|
||||
" beta are both the default 1.0, it's a Tanimoto similarity. A"
|
||||
" high alpha and low beta emphasize the fit volume in the"
|
||||
" similarity and vice versa. Tversky is O / (A * (R - O) + B * (F"
|
||||
" - O) + O) where O is the overlap volume, R is the reference's"
|
||||
" volume and F is the fit's volume. This is different from that"
|
||||
" used by OpenEye (O / (A * R + B * F)).")
|
||||
.def_readwrite("simBeta", &GaussianShape::ShapeOverlayOptions::simBeta,
|
||||
"When doing a Tversky similarity, the beta value.")
|
||||
.def_readwrite(
|
||||
"optParam", &GaussianShape::ShapeOverlayOptions::optParam,
|
||||
"If using colors, the relative weights of the shape and color scores,"
|
||||
" as a fraction of 1. Default=0.5.")
|
||||
.def_readwrite(
|
||||
"nSteps", &GaussianShape::ShapeOverlayOptions::nSteps,
|
||||
"Maximum number of steps for the shape overlay process. Default=100.")
|
||||
.def_readwrite(
|
||||
"normalize", &GaussianShape::ShapeOverlayOptions::normalize,
|
||||
"Whether to normalize the shapes before overlay by putting them into their"
|
||||
" canonical orientation (centred on the origin, aligned along its"
|
||||
" principal axes. Default=True.")
|
||||
.def_readwrite(
|
||||
"useDistCutoff", &GaussianShape::ShapeOverlayOptions::useDistCutoff,
|
||||
"Whether to use distance cutoff when calculating the shape volumes. If used,"
|
||||
" there will be a small penalty in accuracy but a significant increase in speed."
|
||||
" Default=True.")
|
||||
.def_readwrite(
|
||||
"distCutoff", &GaussianShape::ShapeOverlayOptions::distCutoff,
|
||||
"If using a distance cutoff, this is the value used. Default=4.5 of whatever"
|
||||
" units the coordinates are in.")
|
||||
.def_readwrite(
|
||||
"shapeConvergenceCriterion",
|
||||
&GaussianShape::ShapeOverlayOptions::shapeConvergenceCriterion,
|
||||
"Optimisation stops when the shape Tversky score changes by less"
|
||||
" than this amount after an optimisation step. A larger number is"
|
||||
" faster but gives less precise overlays. Default=0.001.")
|
||||
.def("__setattr__", &safeSetattr);
|
||||
|
||||
std::string docString("ShapeInput object");
|
||||
python::class_<GaussianShape::ShapeInput, boost::noncopyable>(
|
||||
"ShapeInput", docString.c_str(),
|
||||
python::init<const ROMol &, int, const GaussianShape::ShapeInputOptions &,
|
||||
const GaussianShape::ShapeOverlayOptions &>(
|
||||
python::args("self", "confId", "shapeOpt", "overlayOpts")))
|
||||
.add_property("NumAtoms", &GaussianShape::ShapeInput::getNumAtoms,
|
||||
"Get the number of atoms defining the shape.")
|
||||
.add_property("NumFeatures", &GaussianShape::ShapeInput::getNumFeatures,
|
||||
"Get the number of features in the shape.")
|
||||
.add_property("ShapeVolume", &GaussianShape::ShapeInput::getShapeVolume,
|
||||
"Get the shape's volume due to the atoms.")
|
||||
.add_property("ColorVolume", &GaussianShape::ShapeInput::getColorVolume,
|
||||
"Get the volume of the shape's color features.")
|
||||
.def("__setattr__", &safeSetattr);
|
||||
|
||||
python::def(
|
||||
"AlignMol", &helpers::alignMol1,
|
||||
(python::arg("ref"), python::arg("fit"),
|
||||
python::arg("refOpts") = python::object(),
|
||||
python::arg("fitOpts") = python::object(),
|
||||
python::arg("overlayOpts") = python::object(),
|
||||
python::arg("refConfId") = -1, python::arg("fitConfId") = -1),
|
||||
R"DOC(Aligns a fit molecule onto a reference molecule. The fit is modified.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ref: RDKit.ROMol
|
||||
Reference molecule
|
||||
fit: RDKit.ROMol
|
||||
Fit molecule that will be overlaid
|
||||
refOpts: ShapeInputOptions, optional
|
||||
Options for building the ref shape
|
||||
fitOpts: ShapeInputOptions, optional
|
||||
Options for building the fit shape
|
||||
overlayOpts: ShapeOverlayOptions, optional
|
||||
Options for controlling the overlay
|
||||
refConfId : int, optional
|
||||
Reference conformer ID (default is -1)
|
||||
fitConfId : int, optional
|
||||
fit conformer ID (default is -1)
|
||||
|
||||
Returns
|
||||
-------
|
||||
3-tuple of floats
|
||||
The results are (combo_score, shape_score, color_score). The color_score is
|
||||
0.0 if color features not used, in which case combo_score and shape_score will
|
||||
be the same.
|
||||
)DOC");
|
||||
|
||||
python::def(
|
||||
"AlignMol", &helpers::alignMol2,
|
||||
(python::arg("refShape"), python::arg("fit"),
|
||||
python::arg("fitOpts") = python::object(),
|
||||
python::arg("overlayOpts") = python::object(),
|
||||
python::arg("fitConfId") = -1),
|
||||
R"DOC(Aligns a fit molecule onto a reference shape. The fit is modified.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
refShape: ShapeInput
|
||||
Reference shape
|
||||
fit: RDKit.ROMol
|
||||
Fit molecule that will be overlaid
|
||||
fitOpts: ShapeInputOptions, optional
|
||||
Options for building the fit shape
|
||||
overlayOpts: ShapeOverlayOptions, optional
|
||||
Options for controlling the overlay
|
||||
fitConfId : int, optional
|
||||
Fit conformer ID (default is -1)
|
||||
|
||||
Returns
|
||||
-------
|
||||
3-tuple of floats
|
||||
The results are (combo_score, shape_score, color_score). The color_score is
|
||||
0.0 if color features not used, in which case combo_score and shape_score will
|
||||
be the same.)DOC");
|
||||
|
||||
python::def(
|
||||
"AlignShapes", &helpers::alignShapes,
|
||||
(python::arg("refShape"), python::arg("fitShape"),
|
||||
python::arg("overlayOpts") = python::object()),
|
||||
R"DOC(Aligns a fit shape to a reference shape. The fit is modified.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
refShape : ShapeInput
|
||||
Reference shape
|
||||
fitShape : ShapeInput
|
||||
fit shape
|
||||
overlayOpts: ShapeOverlayOptions, optional
|
||||
Options for controlling the overlay
|
||||
|
||||
|
||||
Returns
|
||||
-------
|
||||
4-tuple of float, float, list of floats
|
||||
The results are (combo_score, shape_score, color_score, matrix)
|
||||
The matrix is a 16-float list giving the transformation matrix that
|
||||
overlays the fit onto the reference.)DOC");
|
||||
|
||||
python::def("ScoreMol", &helpers::scoreMol1,
|
||||
(python::arg("ref"), python::arg("fit"),
|
||||
python::arg("refOpts") = python::object(),
|
||||
python::arg("fitOpts") = python::object(),
|
||||
python::arg("overlayOpts") = python::object(),
|
||||
python::arg("refConfId") = -1, python::arg("fitConfId") = -1),
|
||||
R"DOC(Calculates the scores between a reference molecule and a fit
|
||||
molecule without overlay.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ref: RDKit.ROMol
|
||||
Reference molecule
|
||||
fit: RDKit.ROMol
|
||||
Fit molecule that will be scored
|
||||
refOpts: ShapeInputOptions, optional
|
||||
Options for building the ref shape
|
||||
fitOpts: ShapeInputOptions, optional
|
||||
Options for building the fit shape
|
||||
overlayOpts: ShapeOverlayOptions, optional
|
||||
Options for controlling the volume calculation
|
||||
refConfId : int, optional
|
||||
Reference conformer ID (default is -1)
|
||||
fitConfId : int, optional
|
||||
fit conformer ID (default is -1)
|
||||
|
||||
Returns
|
||||
-------
|
||||
3-tuple of floats
|
||||
The results are (combo_score, shape_score, color_score). The color_score is
|
||||
0.0 if color features not used, in which case combo_score and shape_score will
|
||||
be the same.
|
||||
)DOC");
|
||||
|
||||
python::def(
|
||||
"ScoreMol", &helpers::scoreMol2,
|
||||
(python::arg("refShape"), python::arg("fit"),
|
||||
python::arg("fitOpts") = python::object(),
|
||||
python::arg("overlayOpts") = python::object(),
|
||||
python::arg("fitConfId") = -1),
|
||||
R"DOC(Calculates the scores between a reference shape and a fit molecule
|
||||
without overlay.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
refShape: ShapeInput
|
||||
Reference shape
|
||||
fit: RDKit.ROMol
|
||||
Fit molecule that will be scored
|
||||
fitOpts: ShapeInputOptions, optional
|
||||
Options for building the fit shape
|
||||
overlayOpts: ShapeOverlayOptions, optional
|
||||
Options for controlling the volume calculation
|
||||
fitConfId : int, optional
|
||||
fit conformer ID (default is -1)
|
||||
|
||||
Returns
|
||||
-------
|
||||
3-tuple of floats
|
||||
The results are (combo_score, shape_score, color_score). The color_score is
|
||||
0.0 if color features not used, in which case combo_score and shape_score will
|
||||
be the same.
|
||||
)DOC");
|
||||
|
||||
python::def(
|
||||
"ScoreShape", &helpers::scoreShape,
|
||||
(python::arg("refShape"), python::arg("fitShape"),
|
||||
python::arg("overlayOpts") = python::object()),
|
||||
R"DOC(Calculates the scores between a reference shape and a fit shape without
|
||||
overlay.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
refShape: ShapeInput
|
||||
Reference shape
|
||||
fitShape: ShapeInput
|
||||
Fit shape
|
||||
fitOpts: ShapeInputOptions, optional
|
||||
Options for building the fit shape
|
||||
overlayOpts: ShapeOverlayOptions, optional
|
||||
Options for controlling the volume calculation
|
||||
|
||||
Returns
|
||||
-------
|
||||
3-tuple of floats
|
||||
The results are (combo_score, shape_score, color_score). The color_score is
|
||||
0.0 if color features not used, in which case combo_score and shape_score will
|
||||
be the same.
|
||||
)DOC");
|
||||
}
|
||||
|
||||
BOOST_PYTHON_MODULE(rdGaussianShape) { wrap_rdGaussianShape(); }
|
||||
|
||||
} // namespace RDKit
|
||||
161
Code/GraphMol/GaussianShape/Wrap/test_rdgaussian_shape.py
Normal file
161
Code/GraphMol/GaussianShape/Wrap/test_rdgaussian_shape.py
Normal file
@@ -0,0 +1,161 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem import rdGaussianShape, rdMolTransforms
|
||||
from rdkit import RDConfig
|
||||
from rdkit.Geometry import Point3D
|
||||
|
||||
|
||||
datadir = RDConfig.RDBaseDir + '/External/pubchem_shape/test_data'
|
||||
|
||||
|
||||
class TestCase(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
suppl = Chem.SDMolSupplier(datadir + '/test1.sdf')
|
||||
self.ref = suppl[0]
|
||||
self.probe = suppl[1]
|
||||
|
||||
def test1_Defaults(self):
|
||||
tpl = rdGaussianShape.AlignMol(self.ref, self.probe)
|
||||
self.assertAlmostEqual(tpl[0], 0.497, places=3)
|
||||
self.assertAlmostEqual(tpl[1], 0.760, places=3)
|
||||
self.assertAlmostEqual(tpl[2], 0.233, places=3)
|
||||
|
||||
|
||||
def test2_NoColor(self):
|
||||
ovOpts = rdGaussianShape.ShapeOverlayOptions()
|
||||
ovOpts.optimMode = rdGaussianShape.OptimMode.SHAPE_ONLY
|
||||
shpOpts= rdGaussianShape.ShapeInputOptions()
|
||||
shpOpts.useColors = False
|
||||
tpl = rdGaussianShape.AlignMol(self.ref, self.probe , shpOpts, shpOpts, ovOpts)
|
||||
self.assertAlmostEqual(tpl[0], 0.760, places=3)
|
||||
self.assertAlmostEqual(tpl[1], 0.760, places=3)
|
||||
self.assertAlmostEqual(tpl[2], 0.0, places=3)
|
||||
|
||||
def test3_FromShape(self):
|
||||
ovOpts = rdGaussianShape.ShapeOverlayOptions()
|
||||
shpOpts= rdGaussianShape.ShapeInputOptions()
|
||||
shp = rdGaussianShape.ShapeInput(self.ref, -1, shpOpts, ovOpts)
|
||||
self.assertAlmostEqual(shp.ShapeVolume, 591.058, places=3)
|
||||
self.assertAlmostEqual(shp.ColorVolume, 31.935, places=3)
|
||||
self.assertTrue(type(shp) == rdGaussianShape.ShapeInput)
|
||||
tpl = rdGaussianShape.AlignMol(shp, self.probe)
|
||||
self.assertAlmostEqual(tpl[0], 0.497, places=3)
|
||||
self.assertAlmostEqual(tpl[1], 0.760, places=3)
|
||||
self.assertAlmostEqual(tpl[2], 0.233, places=3)
|
||||
|
||||
def test4_customFeatures(self):
|
||||
m1 = Chem.MolFromSmiles(
|
||||
"O=CC=O |(-1.75978,0.148897,0;-0.621382,-0.394324,0;0.624061,0.3656,.1;1.7571,-0.120174,.1)|")
|
||||
opts = rdGaussianShape.ShapeInputOptions()
|
||||
opts.customFeatures = ((1, Point3D(-1.75978, 0.148897,
|
||||
0), 1.0), (2, Point3D(1.7571, -0.120174, 0.1), 1.0))
|
||||
ovOpts = rdGaussianShape.ShapeOverlayOptions()
|
||||
shp = rdGaussianShape.ShapeInput(m1, -1, opts, ovOpts)
|
||||
self.assertEqual(shp.NumAtoms, 4)
|
||||
self.assertEqual(shp.NumFeatures, 2)
|
||||
m2 = Chem.Mol(m1)
|
||||
opts2 = rdGaussianShape.ShapeInputOptions()
|
||||
opts2.customFeatures = ((2, Point3D(-1.75978, 0.148897,
|
||||
0), 1.0), (1, Point3D(1.7571, -0.120174, 0.1), 1.0))
|
||||
shp2 = rdGaussianShape.ShapeInput(m2, -1, opts2, ovOpts)
|
||||
tpl = rdGaussianShape.AlignShapes(shp, shp2, ovOpts)
|
||||
self.assertAlmostEqual(tpl[0], 0.999, places=3)
|
||||
self.assertAlmostEqual(tpl[1], 1.000, places=3)
|
||||
self.assertAlmostEqual(tpl[2], 0.999, places=3)
|
||||
tf = tpl[3]
|
||||
self.assertGreater(0.0, tf[0])
|
||||
self.assertEqual(1.0, tf[15])
|
||||
|
||||
# check the getter:
|
||||
cfs = opts2.customFeatures
|
||||
self.assertEqual(len(cfs), 2)
|
||||
self.assertEqual(cfs[0][0], 2)
|
||||
self.assertEqual(cfs[1][0], 1)
|
||||
|
||||
def test5_customFeatures(self):
|
||||
m1 = Chem.MolFromSmiles(
|
||||
"O=CC=O |(-1.75978,0.148897,0;-0.621382,-0.394324,0;0.624061,0.3656,.1;1.7571,-0.120174,.1)|")
|
||||
opts = rdGaussianShape.ShapeInputOptions()
|
||||
opts.customFeatures = ((1, Point3D(-1.75978, 0.148897,
|
||||
0), 1.0), (2, Point3D(1.7571, -0.120174, 0.1), 1.0))
|
||||
m2 = Chem.Mol(m1)
|
||||
opts2 = rdGaussianShape.ShapeInputOptions()
|
||||
opts2.customFeatures = ((2, Point3D(-1.75978, 0.148897,
|
||||
0), 1.0), (1, Point3D(1.7571, -0.120174, 0.1), 1.0))
|
||||
ovOpts = rdGaussianShape.ShapeOverlayOptions()
|
||||
tpl = rdGaussianShape.AlignMol(m1, m2, opts, opts2, ovOpts)
|
||||
self.assertAlmostEqual(tpl[0], 0.999, places=3)
|
||||
self.assertAlmostEqual(tpl[1], 1.000, places=3)
|
||||
self.assertAlmostEqual(tpl[2], 0.999, places=3)
|
||||
|
||||
def test6_FixedScore(self):
|
||||
ovOpts = rdGaussianShape.ShapeOverlayOptions()
|
||||
# Just to make sure it's there and returns a value.
|
||||
opts = rdGaussianShape.ShapeInputOptions()
|
||||
tpl = rdGaussianShape.ScoreMol(self.ref, self.ref, opts, opts, ovOpts)
|
||||
self.assertAlmostEqual(tpl[0], 1.0, places=3)
|
||||
self.assertAlmostEqual(tpl[1], 1.0, places=3)
|
||||
self.assertAlmostEqual(tpl[2], 1.0, places=3)
|
||||
|
||||
opts = rdGaussianShape.ShapeInputOptions()
|
||||
opts.useColors = False
|
||||
ovOpts.normalize = False
|
||||
shp = rdGaussianShape.ShapeInput(self.ref, -1, opts, ovOpts)
|
||||
tpl = rdGaussianShape.ScoreMol(shp, self.probe, opts)
|
||||
self.assertAlmostEqual(tpl[0], 0.0, places=3)
|
||||
self.assertAlmostEqual(tpl[1], 0.0, places=3)
|
||||
self.assertAlmostEqual(tpl[2], 0.0, places=3)
|
||||
|
||||
opts.useColors = True
|
||||
shp1 = rdGaussianShape.ShapeInput(self.probe, -1, opts, ovOpts)
|
||||
shp2 = rdGaussianShape.ShapeInput(self.probe, -1, opts, ovOpts)
|
||||
tpl = rdGaussianShape.ScoreShape(shp1, shp2, ovOpts)
|
||||
self.assertAlmostEqual(tpl[0], 1.0, places=3)
|
||||
self.assertAlmostEqual(tpl[1], 1.0, places=3)
|
||||
self.assertAlmostEqual(tpl[2], 1.0, places=3)
|
||||
|
||||
def test7_customAtomRadii(self):
|
||||
ovOpts = rdGaussianShape.ShapeOverlayOptions()
|
||||
opts = rdGaussianShape.ShapeInputOptions()
|
||||
opts.allCarbonRadii = False
|
||||
opts.atomRadii = [(4, 1.9)]
|
||||
shp = rdGaussianShape.ShapeInput(self.ref, -1, opts, ovOpts)
|
||||
self.assertAlmostEqual(shp.ShapeVolume, 559.361, places=3)
|
||||
|
||||
def test8_atomSubset(self):
|
||||
ovOpts = rdGaussianShape.ShapeOverlayOptions()
|
||||
opts = rdGaussianShape.ShapeInputOptions()
|
||||
opts.atomSubset = [4, 5, 6, 7, 8, 9]
|
||||
opts.useColors = False
|
||||
opts.allCarbonRadii = False
|
||||
shp = rdGaussianShape.ShapeInput(self.ref, -1, opts, ovOpts)
|
||||
self.assertAlmostEqual(shp.ShapeVolume, 259.144, places=3)
|
||||
self.assertEqual(shp.ColorVolume, 0.0)
|
||||
|
||||
def test9_tversky(self):
|
||||
scores = rdGaussianShape.AlignMol(self.ref, self.probe)
|
||||
self.assertAlmostEqual(scores[0], 0.497, places=3)
|
||||
self.assertAlmostEqual(scores[1], 0.760, places=3)
|
||||
self.assertAlmostEqual(scores[2], 0.233, places=3)
|
||||
|
||||
ovOpts = rdGaussianShape.ShapeOverlayOptions()
|
||||
ovOpts.simAlpha = 0.95
|
||||
ovOpts.simBeta = 0.05
|
||||
ref_tversky = rdGaussianShape.AlignMol(self.ref, self.probe, overlayOpts=ovOpts)
|
||||
self.assertAlmostEqual(ref_tversky[0], 0.700, places=3)
|
||||
self.assertAlmostEqual(ref_tversky[1], 0.968, places=3)
|
||||
self.assertAlmostEqual(ref_tversky[2], 0.433, places=3)
|
||||
|
||||
ovOpts.simAlpha = 0.05
|
||||
ovOpts.simBeta = 0.95
|
||||
fit_tversky = rdGaussianShape.AlignMol(self.ref, self.probe, overlayOpts=ovOpts)
|
||||
self.assertAlmostEqual(fit_tversky[0], 0.557, places=3)
|
||||
self.assertAlmostEqual(fit_tversky[1], 0.780, places=3)
|
||||
self.assertAlmostEqual(fit_tversky[2], 0.335, places=3)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
861
Code/GraphMol/GaussianShape/catch_tests.cpp
Normal file
861
Code/GraphMol/GaussianShape/catch_tests.cpp
Normal file
@@ -0,0 +1,861 @@
|
||||
//
|
||||
// Copyright (C) 2026 David Cosgrove and other RDKit contributors
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
// The contents are covered by the terms of the BSD license
|
||||
// which is included in the file license.txt, found at the root
|
||||
// of the RDKit source tree.
|
||||
//
|
||||
// Original author: David Cosgrove (CozChemIx Limited)
|
||||
//
|
||||
// Tests for the Roshambo2-based shape alignment code.
|
||||
|
||||
#include <chrono>
|
||||
#include <random>
|
||||
#include <algorithm>
|
||||
#include <execution>
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/matchers/catch_matchers_floating_point.hpp>
|
||||
|
||||
#include <GraphMol/MolOps.h>
|
||||
#include <GraphMol/FileParsers/MolWriters.h>
|
||||
#include <GraphMol/FileParsers/MolSupplier.h>
|
||||
#include <GraphMol/GaussianShape/GaussianShape.h>
|
||||
#include <GraphMol/GaussianShape/ShapeInput.h>
|
||||
#include <GraphMol/MolTransforms/MolTransforms.h>
|
||||
#include <GraphMol/SmilesParse/SmilesWrite.h>
|
||||
|
||||
using namespace RDKit;
|
||||
|
||||
bool checkMolsHaveRoughlySameCoords(const ROMol &m1, const ROMol &m2,
|
||||
double margin = 0.005) {
|
||||
for (unsigned int i = 0; i < m1.getNumAtoms(); ++i) {
|
||||
auto pos1 = m1.getConformer().getAtomPos(i);
|
||||
auto pos2 = m2.getConformer().getAtomPos(i);
|
||||
if ((pos1 - pos2).length() > margin) {
|
||||
// So the error is printed in a relevant place.
|
||||
std::cout << i << " : " << m1.getAtomWithIdx(i)->getAtomicNum()
|
||||
<< " :: " << (pos1 - pos2).length() << std::endl;
|
||||
CHECK_THAT((pos1 - pos2).length(),
|
||||
Catch::Matchers::WithinAbs(0.0, margin));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
TEST_CASE("basic alignment") {
|
||||
std::string dirName = getenv("RDBASE");
|
||||
dirName += "/External/pubchem_shape/test_data";
|
||||
|
||||
auto suppl = v2::FileParsers::SDMolSupplier(dirName + "/test1.sdf");
|
||||
auto refT = suppl[0];
|
||||
auto ref = v2::SmilesParse::MolFromSmiles(MolToCXSmiles(*refT));
|
||||
REQUIRE(ref);
|
||||
auto probeT = suppl[1];
|
||||
auto probe = v2::SmilesParse::MolFromSmiles(MolToCXSmiles(*probeT));
|
||||
REQUIRE(probe);
|
||||
GaussianShape::ShapeOverlayOptions overlayOpts;
|
||||
overlayOpts.optimMode = GaussianShape::OptimMode::SHAPE_PLUS_COLOR_SCORE;
|
||||
overlayOpts.startMode = GaussianShape::StartMode::ROTATE_180;
|
||||
overlayOpts.nSteps = 50;
|
||||
GaussianShape::ShapeInputOptions shapeOpts;
|
||||
SECTION("setup") {
|
||||
auto refShape = GaussianShape::ShapeInput(*ref, -1, shapeOpts);
|
||||
CHECK_THAT(refShape.getShapeVolume(),
|
||||
Catch::Matchers::WithinAbs(591.057, 0.005));
|
||||
CHECK_THAT(refShape.getColorVolume(),
|
||||
Catch::Matchers::WithinAbs(31.935, 0.005));
|
||||
|
||||
auto probeShape = GaussianShape::ShapeInput(*probe, -1, shapeOpts);
|
||||
CHECK_THAT(probeShape.getShapeVolume(),
|
||||
Catch::Matchers::WithinAbs(751.013, 0.005));
|
||||
CHECK_THAT(probeShape.getColorVolume(),
|
||||
Catch::Matchers::WithinAbs(42.530, 0.005));
|
||||
}
|
||||
SECTION("shape only") {
|
||||
ROMol cp(*probe);
|
||||
overlayOpts.optimMode = GaussianShape::OptimMode::SHAPE_ONLY;
|
||||
overlayOpts.startMode = GaussianShape::StartMode::ROTATE_180;
|
||||
|
||||
GaussianShape::ShapeInputOptions tShapeOpts;
|
||||
tShapeOpts.useColors = false;
|
||||
const auto scores = GaussianShape::AlignMolecule(
|
||||
*ref, cp, tShapeOpts, tShapeOpts, nullptr, overlayOpts);
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.760, 0.005));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.760, 0.005));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.0, 0.005));
|
||||
// Check that a re-score gives the same answer.
|
||||
auto rescores = GaussianShape::ScoreMolecule(*ref, cp, shapeOpts, shapeOpts,
|
||||
overlayOpts);
|
||||
CHECK_THAT(rescores[0], Catch::Matchers::WithinAbs(scores[0], 0.005));
|
||||
CHECK_THAT(rescores[1], Catch::Matchers::WithinAbs(scores[1], 0.005));
|
||||
CHECK_THAT(rescores[2], Catch::Matchers::WithinAbs(scores[2], 0.005));
|
||||
}
|
||||
SECTION("shape plus color score") {
|
||||
overlayOpts.optimMode = GaussianShape::OptimMode::SHAPE_PLUS_COLOR_SCORE;
|
||||
overlayOpts.startMode = GaussianShape::StartMode::ROTATE_180;
|
||||
ROMol cp(*probe);
|
||||
const auto scores = GaussianShape::AlignMolecule(
|
||||
*ref, cp, shapeOpts, shapeOpts, nullptr, overlayOpts);
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.494, 0.005));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.760, 0.005));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.236, 0.005));
|
||||
// Check that a re-score gives the same answer.
|
||||
const auto rescores = GaussianShape::ScoreMolecule(*ref, cp, shapeOpts,
|
||||
shapeOpts, overlayOpts);
|
||||
CHECK_THAT(rescores[0], Catch::Matchers::WithinAbs(scores[0], 0.005));
|
||||
CHECK_THAT(rescores[1], Catch::Matchers::WithinAbs(scores[1], 0.005));
|
||||
CHECK_THAT(rescores[2], Catch::Matchers::WithinAbs(scores[2], 0.005));
|
||||
}
|
||||
SECTION("shape and color") {
|
||||
overlayOpts.optimMode = GaussianShape::OptimMode::SHAPE_PLUS_COLOR;
|
||||
overlayOpts.startMode = GaussianShape::StartMode::ROTATE_180;
|
||||
ROMol cp(*probe);
|
||||
const auto scores = GaussianShape::AlignMolecule(
|
||||
*ref, cp, shapeOpts, shapeOpts, nullptr, overlayOpts);
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.477, 0.005));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.747, 0.005));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.207, 0.005));
|
||||
const auto rescores = GaussianShape::ScoreMolecule(*ref, cp, shapeOpts,
|
||||
shapeOpts, overlayOpts);
|
||||
CHECK_THAT(rescores[0], Catch::Matchers::WithinAbs(scores[0], 0.005));
|
||||
CHECK_THAT(rescores[1], Catch::Matchers::WithinAbs(scores[1], 0.005));
|
||||
CHECK_THAT(rescores[2], Catch::Matchers::WithinAbs(scores[2], 0.005));
|
||||
}
|
||||
SECTION("collect transform") {
|
||||
overlayOpts.optimMode = GaussianShape::OptimMode::SHAPE_PLUS_COLOR_SCORE;
|
||||
overlayOpts.startMode = GaussianShape::StartMode::ROTATE_180;
|
||||
ROMol cp(*probe);
|
||||
RDGeom::Transform3D xform;
|
||||
const auto scores = GaussianShape::AlignMolecule(
|
||||
*ref, cp, shapeOpts, shapeOpts, &xform, overlayOpts);
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.494, 0.005));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.760, 0.005));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.236, 0.005));
|
||||
// Check a few values from the transform, just to be sure
|
||||
CHECK_THAT(xform.getValUnchecked(0, 0),
|
||||
Catch::Matchers::WithinAbs(-0.886, 0.005));
|
||||
CHECK_THAT(xform.getValUnchecked(1, 1),
|
||||
Catch::Matchers::WithinAbs(-0.828, 0.005));
|
||||
CHECK_THAT(xform.getValUnchecked(2, 2),
|
||||
Catch::Matchers::WithinAbs(0.816, 0.005));
|
||||
CHECK_THAT(xform.getValUnchecked(3, 3),
|
||||
Catch::Matchers::WithinAbs(1.0, 0.005));
|
||||
}
|
||||
SECTION("shape plus color score a la pubchem") {
|
||||
overlayOpts.optimMode = GaussianShape::OptimMode::SHAPE_PLUS_COLOR_SCORE;
|
||||
overlayOpts.startMode = GaussianShape::StartMode::A_LA_PUBCHEM;
|
||||
GaussianShape::ShapeInputOptions shapeOpts2;
|
||||
for (const auto acr : std::vector{true, false}) {
|
||||
shapeOpts2.allCarbonRadii = acr;
|
||||
ROMol cp(*probe);
|
||||
const auto scores = GaussianShape::AlignMolecule(
|
||||
*ref, cp, shapeOpts2, shapeOpts2, nullptr, overlayOpts);
|
||||
if (acr) {
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.498, 0.005));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.758, 0.005));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.237, 0.005));
|
||||
} else {
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.503, 0.005));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.761, 0.005));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.245, 0.005));
|
||||
}
|
||||
// Check that a re-score gives the same answer.
|
||||
const auto rescores = GaussianShape::ScoreMolecule(
|
||||
*ref, cp, shapeOpts2, shapeOpts2, overlayOpts);
|
||||
CHECK_THAT(rescores[0], Catch::Matchers::WithinAbs(scores[0], 0.005));
|
||||
CHECK_THAT(rescores[1], Catch::Matchers::WithinAbs(scores[1], 0.005));
|
||||
CHECK_THAT(rescores[2], Catch::Matchers::WithinAbs(scores[2], 0.005));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("bulk") {
|
||||
std::string dirName = getenv("RDBASE");
|
||||
dirName += "/External/pubchem_shape/test_data";
|
||||
auto suppl = v2::FileParsers::SDMolSupplier(dirName + "/bulk.pubchem.sdf");
|
||||
auto ref = suppl[0];
|
||||
REQUIRE(ref);
|
||||
std::string testout = dirName + "/bulk.pubchem_out.sdf";
|
||||
auto writer = SDWriter(testout);
|
||||
writer.write(*ref);
|
||||
GaussianShape::ShapeOverlayOptions overlayOpts;
|
||||
overlayOpts.optimMode = GaussianShape::OptimMode::SHAPE_PLUS_COLOR_SCORE;
|
||||
overlayOpts.startMode = GaussianShape::StartMode::A_LA_PUBCHEM;
|
||||
GaussianShape::ShapeInputOptions shapeOpts;
|
||||
for (auto i = 1u; i < suppl.length(); ++i) {
|
||||
auto probe = suppl[1];
|
||||
REQUIRE(probe);
|
||||
auto scores = GaussianShape::AlignMolecule(*ref, *probe, shapeOpts,
|
||||
shapeOpts, nullptr, overlayOpts);
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.575, 0.005));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.818, 0.005));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.332, 0.005));
|
||||
const auto rescores = GaussianShape::ScoreMolecule(*ref, *probe);
|
||||
CHECK_THAT(rescores[0], Catch::Matchers::WithinAbs(scores[0], 0.005));
|
||||
CHECK_THAT(rescores[1], Catch::Matchers::WithinAbs(scores[1], 0.005));
|
||||
CHECK_THAT(rescores[2], Catch::Matchers::WithinAbs(scores[2], 0.005));
|
||||
|
||||
writer.write(*probe);
|
||||
break;
|
||||
}
|
||||
writer.close();
|
||||
}
|
||||
|
||||
TEST_CASE("shape alignment") {
|
||||
std::string dirName = getenv("RDBASE");
|
||||
dirName += "/External/pubchem_shape/test_data";
|
||||
|
||||
auto suppl = v2::FileParsers::SDMolSupplier(dirName + "/test1.sdf");
|
||||
auto ref = suppl[0];
|
||||
REQUIRE(ref);
|
||||
auto probe = suppl[1];
|
||||
REQUIRE(probe);
|
||||
auto refShape = GaussianShape::ShapeInput(*ref, -1);
|
||||
auto probeShape = GaussianShape::ShapeInput(*probe, -1);
|
||||
const auto ovProbe =
|
||||
"FC1(F)C[C@H](C(O)=O)N(Cc2ocnc2)C1 |(-13.7799,-5.76066,4.42449;-13.5271,-6.62223,3.41219;-12.4707,-7.37583,3.76844;-13.2679,-5.8659,2.12715;-14.6435,-5.78022,1.46335;-15.139,-4.37081,1.41003;-14.7786,-3.78046,0.244433;-15.7838,-3.81972,2.28974;-15.5606,-6.52351,2.33643;-16.628,-7.19488,1.60806;-17.7234,-6.24049,1.21312;-18.3383,-5.54964,2.21298;-19.2578,-4.78996,1.55674;-19.2808,-4.93485,0.251035;-18.298,-5.86438,0.0244256;-14.7486,-7.4588,3.11797),wU:4.4|"_smiles;
|
||||
RDGeom::Transform3D xform;
|
||||
auto scores = GaussianShape::AlignShape(refShape, probeShape, &xform);
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.498, 0.005));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.760, 0.005));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.235, 0.005));
|
||||
// This effectively checks that xform is correct.
|
||||
auto rescores = GaussianShape::ScoreShape(refShape, probeShape);
|
||||
CHECK_THAT(rescores[0], Catch::Matchers::WithinAbs(scores[0], 0.001));
|
||||
CHECK_THAT(rescores[1], Catch::Matchers::WithinAbs(scores[1], 0.001));
|
||||
CHECK_THAT(rescores[2], Catch::Matchers::WithinAbs(scores[2], 0.001));
|
||||
|
||||
SmilesWriteParams params;
|
||||
params.canonical = false;
|
||||
// The input structure being from an SDF doesn't have the atoms in an order
|
||||
// that will make a SMILES string so bounce it through one for comparison.
|
||||
auto probeCp1 = v2::SmilesParse::MolFromSmiles(MolToCXSmiles(*probe, params));
|
||||
MolTransforms::transformConformer(probeCp1->getConformer(), xform);
|
||||
CHECK(checkMolsHaveRoughlySameCoords(*ovProbe, *probeCp1));
|
||||
// And pre-normalizing the shapes
|
||||
refShape.normalizeCoords();
|
||||
probeShape.normalizeCoords();
|
||||
RDGeom::Transform3D xform1;
|
||||
auto scores1 = GaussianShape::AlignShape(refShape, probeShape, &xform1);
|
||||
CHECK_THAT(scores1[0], Catch::Matchers::WithinAbs(0.498, 0.005));
|
||||
CHECK_THAT(scores1[1], Catch::Matchers::WithinAbs(0.760, 0.005));
|
||||
CHECK_THAT(scores1[2], Catch::Matchers::WithinAbs(0.235, 0.005));
|
||||
}
|
||||
|
||||
TEST_CASE("Overlay onto shape bug (Github8462)") {
|
||||
auto m1 =
|
||||
R"(c1ccc(-c2ccccc2)cc1 |(-3.26053,-0.0841607,-0.741909;-2.93383,0.123873,0.593407;-1.60713,0.377277,0.917966;-0.644758,0.654885,-0.0378428;0.743308,0.219134,0.168663;1.82376,1.0395,-0.0112769;3.01462,0.695405,0.613858;3.18783,-0.589771,1.09649;2.15761,-1.50458,1.01949;0.988307,-1.1313,0.385783;-1.1048,0.797771,-1.34022;-2.39754,0.435801,-1.69921)|)"_smiles;
|
||||
REQUIRE(m1);
|
||||
ROMol m2(*m1);
|
||||
for (auto a : m2.atoms()) {
|
||||
auto &pos = m2.getConformer().getAtomPos(a->getIdx());
|
||||
pos.x += 3.0;
|
||||
pos.y += 2.0;
|
||||
}
|
||||
ROMol m3(m2);
|
||||
|
||||
auto scores = GaussianShape::AlignMolecule(*m1, m2);
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(1.0, 0.005));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(1.0, 0.005));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(1.0, 0.005));
|
||||
CHECK(checkMolsHaveRoughlySameCoords(*m1, m2, 0.005));
|
||||
|
||||
// Create the shape without normalization to mimic an arbitrary shape.
|
||||
auto s1 = GaussianShape::ShapeInput(*m1, -1);
|
||||
auto scores1 = AlignMolecule(s1, m3);
|
||||
CHECK_THAT(scores1[0], Catch::Matchers::WithinAbs(1.0, 0.005));
|
||||
CHECK_THAT(scores1[1], Catch::Matchers::WithinAbs(1.0, 0.005));
|
||||
CHECK_THAT(scores1[2], Catch::Matchers::WithinAbs(1.0, 0.005));
|
||||
for (unsigned int i = 0; i < m3.getNumAtoms(); ++i) {
|
||||
RDGeom::Point3D pos1(s1.getCoords()[4 * i], s1.getCoords()[4 * i + 1],
|
||||
s1.getCoords()[4 * i + 2]);
|
||||
auto pos2 = m3.getConformer().getAtomPos(i);
|
||||
CHECK_THAT((pos1 - pos2).length(), Catch::Matchers::WithinAbs(0.0, 0.01));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("handling molecules with Hs") {
|
||||
std::string dirName = getenv("RDBASE");
|
||||
dirName += "/External/pubchem_shape/test_data";
|
||||
|
||||
v2::FileParsers::MolFileParserParams params;
|
||||
params.removeHs = false;
|
||||
auto suppl =
|
||||
v2::FileParsers::SDMolSupplier(dirName + "/align_with_hs.sdf", params);
|
||||
auto ref = suppl[0];
|
||||
REQUIRE(ref);
|
||||
auto probe = suppl[1];
|
||||
REQUIRE(probe);
|
||||
SECTION("basics") {
|
||||
RWMol cp(*probe);
|
||||
RDGeom::Transform3D xform;
|
||||
auto scores = GaussianShape::AlignMolecule(*ref, cp);
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.700, 0.005));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.834, 0.005));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.566, 0.005));
|
||||
for (auto i = 0u; i < cp.getNumAtoms(); ++i) {
|
||||
// the failure mode here was that Hs had HUGE coordinates
|
||||
auto pos = cp.getConformer().getAtomPos(i);
|
||||
CHECK((pos.x > -10 && pos.x < 10));
|
||||
}
|
||||
// Check the rescore
|
||||
auto rescores = GaussianShape::ScoreMolecule(*ref, cp);
|
||||
CHECK_THAT(rescores[0], Catch::Matchers::WithinAbs(scores[0], 0.005));
|
||||
CHECK_THAT(rescores[1], Catch::Matchers::WithinAbs(scores[1], 0.005));
|
||||
CHECK_THAT(rescores[2], Catch::Matchers::WithinAbs(scores[2], 0.005));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Github #8096") {
|
||||
SECTION("as reported") {
|
||||
auto m1 =
|
||||
R"([H]c1c([H])c([H])c([2H])c([H])c1[H] |(1.55967,1.91617,0.0546381;0.885536,1.07172,0.030849;1.38172,-0.23747,0.0274262;2.44539,-0.439501,0.0483424;0.470206,-1.27516,-0.00361916;0.856925,-2.30002,-0.00633525;-0.896665,-1.07227,-0.0310991;-1.60071,-1.87642,-0.0551085;-1.36315,0.22877,-0.0271173;-2.43593,0.379132,-0.0487835;-0.479018,1.29083,0.00359778;-0.823965,2.31421,0.00720933)|)"_smiles;
|
||||
REQUIRE(m1);
|
||||
auto m2 =
|
||||
R"([H]c1c([H])c([H])c([H])c([H])c1[H] |(-2.06264,-0.844763,-0.0261403;-1.04035,-0.481453,-0.0114878;-0.00743655,-1.41861,-0.0137121;-0.215455,-2.47997,-0.0295909;1.29853,-0.949412,0.00507497;2.12524,-1.65277,0.00390664;1.58501,0.395878,0.0254188;2.61997,0.704365,0.0394811;0.550242,1.31385,0.0273741;0.783172,2.37039,0.0434262;-0.763786,0.88847,0.00908113;-1.60557,1.58532,0.0100194)|)"_smiles;
|
||||
REQUIRE(m2);
|
||||
auto scores = GaussianShape::AlignMolecule(*m1, *m2);
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(1.0, 0.005));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(1.0, 0.005));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(1.0, 0.005));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Hs not properly transformed when hcount = feature count") {
|
||||
std::string dirName = getenv("RDBASE");
|
||||
dirName += "/External/pubchem_shape/test_data";
|
||||
|
||||
SECTION("as reported") {
|
||||
v2::FileParsers::MolFileParserParams ps;
|
||||
ps.removeHs = false;
|
||||
auto mol1 =
|
||||
v2::FileParsers::MolFromMolFile(dirName + "/hcount_ex1_1.mol", ps);
|
||||
REQUIRE(mol1);
|
||||
auto mol2 =
|
||||
v2::FileParsers::MolFromMolFile(dirName + "/hcount_ex1_2.mol", ps);
|
||||
REQUIRE(mol2);
|
||||
{
|
||||
RWMol cp(*mol2);
|
||||
auto scores = GaussianShape::AlignMolecule(*mol1, cp);
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.744, 0.005));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.918, 0.005));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.570, 0.005));
|
||||
// the bug led to H atoms in stupid positions, so we can detect it by just
|
||||
// looking at bond lengths to Hs:
|
||||
for (auto i = cp.getNumHeavyAtoms(); i < cp.getNumAtoms(); ++i) {
|
||||
INFO("checking atom " << i);
|
||||
auto at = cp.getAtomWithIdx(i);
|
||||
for (auto nbr : cp.atomNeighbors(at)) {
|
||||
auto dist = (cp.getConformer().getAtomPos(i) -
|
||||
cp.getConformer().getAtomPos(nbr->getIdx()))
|
||||
.length();
|
||||
CHECK(dist < 1.2); // should be a bond to H
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Score No Overlay") {
|
||||
// These are 2 ligands used by Andy Grant and Co in their original paper
|
||||
// https://onlinelibrary.wiley.com/doi/10.1002/(SICI)1096-987X(19961115)17:14%3C1653::AID-JCC7%3E3.0.CO;2-K
|
||||
// Ligands as extracted from PDB, with a bit of munging to get them as
|
||||
// SMILES strings (downloaded the Ideal ligand structures from RCSB
|
||||
// as SDFs and transferred the corresponding atom coords from 3tmn and 1tmn).
|
||||
auto pdb_trp_3tmn =
|
||||
R"(N[C@H](C(=O)O)Cc1c[nH]c2c1cccc2 |(37.935,40.394,-3.825;39.119,39.593,-4.13;38.758,38.486,-5.101;37.526,38.337,-5.395;39.716,37.852,-5.605;39.883,39.108,-2.906;39.086,38.098,-2.209;38.093,38.363,-1.34;37.565,37.179,-0.881;38.201,36.136,-1.471;39.193,36.684,-2.308;40.015,35.812,-3.036;39.846,34.441,-2.913;38.844,33.933,-2.075;38.015,34.752,-1.333),wU:1.0|)"_smiles;
|
||||
REQUIRE(pdb_trp_3tmn);
|
||||
auto pdb_0zn_1tmn =
|
||||
R"([C@H](CCc1ccccc1)(C(=O)O)N[C@H](C(=O)N[C@H](C(=O)O)Cc1c[nH]c2c1cccc2)CC(C)C |(35.672,41.482,-5.722;34.516,40.842,-6.512;34.843,39.355,-6.7;33.819,38.475,-7.45;33.825,38.414,-8.838;32.951,37.553,-9.53;32.064,36.747,-8.81;32.096,36.799,-7.402;32.985,37.656,-6.73;35.934,42.778,-6.452;36.833,42.858,-7.316;35.175,43.735,-6.275;35.516,41.561,-4.218;36.707,42.096,-3.513;38.055,41.449,-3.859;39.11,42.138,-3.959;37.975,40.129,-3.983;39.134,39.277,-4.298;38.825,38.04,-5.133;37.649,37.934,-5.605;39.788,37.369,-5.652;39.985,38.945,-3.037;39.221,37.953,-2.164;37.934,37.961,-1.823;37.579,36.695,-1.314;38.63,35.975,-1.286;39.736,36.771,-1.642;41.052,36.341,-1.48;41.213,35.042,-0.964;40.095,34.215,-0.69;38.765,34.665,-0.855;36.506,41.966,-2.002;37.6,42.757,-1.31;37.546,44.225,-1.728;37.408,42.58,0.19),wD:0.0,wU:17.21,13.33|)"_smiles;
|
||||
REQUIRE(pdb_0zn_1tmn);
|
||||
GaussianShape::ShapeInputOptions shapeOpts;
|
||||
{
|
||||
auto scores = ScoreMolecule(*pdb_trp_3tmn, *pdb_trp_3tmn, shapeOpts);
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(1.0, 0.001));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(1.0, 0.001));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(1.0, 0.001));
|
||||
}
|
||||
{
|
||||
auto pdb_trp_3tmn_cp =
|
||||
R"(N[C@H](C(=O)O)Cc1c[nH]c2c1cccc2 |(37.935,40.394,-3.825;39.119,39.593,-4.13;38.758,38.486,-5.101;37.526,38.337,-5.395;39.716,37.852,-5.605;39.883,39.108,-2.906;39.086,38.098,-2.209;38.093,38.363,-1.34;37.565,37.179,-0.881;38.201,36.136,-1.471;39.193,36.684,-2.308;40.015,35.812,-3.036;39.846,34.441,-2.913;38.844,33.933,-2.075;38.015,34.752,-1.333),wU:1.0|)"_smiles;
|
||||
RDGeom::Point3D trans{100.0, 100.0, 100.0};
|
||||
RDGeom::Transform3D transform_3d;
|
||||
transform_3d.SetTranslation(trans);
|
||||
MolTransforms::transformConformer(pdb_trp_3tmn_cp->getConformer(),
|
||||
transform_3d);
|
||||
|
||||
auto scores = ScoreMolecule(*pdb_trp_3tmn, *pdb_trp_3tmn_cp, shapeOpts);
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.0, 0.001));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.0, 0.001));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.0, 0.001));
|
||||
}
|
||||
{
|
||||
auto scores = ScoreMolecule(*pdb_0zn_1tmn, *pdb_0zn_1tmn, shapeOpts);
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(1.0, 0.001));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(1.0, 0.001));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(1.0, 0.001));
|
||||
}
|
||||
{
|
||||
auto scores = ScoreMolecule(*pdb_trp_3tmn, *pdb_0zn_1tmn, shapeOpts);
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.307, 0.005));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.349, 0.005));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.265, 0.005));
|
||||
}
|
||||
{
|
||||
auto shape = GaussianShape::ShapeInput(*pdb_trp_3tmn, -1, shapeOpts);
|
||||
auto scores = ScoreMolecule(shape, *pdb_0zn_1tmn, shapeOpts);
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.307, 0.005));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.349, 0.005));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.265, 0.005));
|
||||
}
|
||||
{
|
||||
auto shape1 = GaussianShape::ShapeInput(*pdb_trp_3tmn, -1, shapeOpts);
|
||||
auto shape2 = GaussianShape::ShapeInput(*pdb_0zn_1tmn, -1, shapeOpts);
|
||||
auto scores = GaussianShape::ScoreShape(shape1, shape2);
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.307, 0.005));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.349, 0.005));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.265, 0.005));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Iressa onto Tagrisso") {
|
||||
// Conformations from PubChem produced by Omega. Iressa rotated and translated
|
||||
// by a random amount. PubChem puts them both in their inertial frame which
|
||||
// makes things too easy.
|
||||
auto tagrisso =
|
||||
R"(C=CC(=O)Nc1cc(Nc2nccc(-c3cn(C)c4ccccc34)n2)c(OC)cc1N(C)CCN(C)C |(-0.9161,3.8415,-2.9811;0.1848,3.1933,-2.588;0.1064,1.7789,-2.12;-0.9619,1.1797,-2.0847;1.3654,1.2872,-1.7553;1.6841,0.0144,-1.273;0.6638,-0.9235,-1.1146;0.9578,-2.1997,-0.6343;-0.0813,-3.1358,-0.4783;-1.4556,-2.9979,-0.1847;-2.1716,-4.1359,-0.1085;-3.4803,-3.9673,0.173;-4.0689,-2.7353,0.3728;-3.2269,-1.647,0.2676;-3.7311,-0.317,0.4568;-5.0275,0.0291,0.153;-5.1887,1.3569,0.4454;-6.4231,2.0889,0.2595;-4.0141,1.8811,0.9361;-3.7121,3.1796,1.3615;-2.4139,3.4249,1.8179;-1.4588,2.4106,1.8467;-1.7752,1.1164,1.4181;-3.0776,0.8453,0.9537;-1.9103,-1.7423,-0.011;2.2723,-2.5382,-0.3127;2.58,-3.7798,0.1575;2.539,-3.9651,1.571;3.2927,-1.6003,-0.4713;2.9986,-0.324,-0.9514;4.0475,0.61,-1.1047;4.7738,0.6956,-2.3546;4.4021,1.497,-0.0162;5.4401,0.8254,0.8736;5.8294,1.7155,1.9601;4.8213,1.7057,3.0218;7.1361,1.3324,2.4981)|)"_smiles;
|
||||
REQUIRE(tagrisso);
|
||||
auto iressa =
|
||||
R"(COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN1CCOCC1 |(11.4672,-0.467948,5.63989;12.0133,0.532631,6.49693;11.2039,1.5801,6.81985;11.2014,2.71958,6.00975;10.3926,3.81652,6.29699;10.4038,4.90395,5.50623;9.58889,5.91871,5.85946;8.76443,5.96486,6.91838;8.77814,4.86059,7.68868;7.92337,4.86224,8.81914;7.44878,5.8925,9.64622;8.22182,7.03851,9.85619;7.75051,8.06265,10.6777;6.50441,7.94546,11.2936;6.06567,8.93802,12.0809;5.72932,6.80403,11.0875;4.19056,6.65372,11.8447;6.20047,5.78015,10.2656;9.57161,3.74547,7.43436;9.56851,2.60328,8.25407;10.3868,1.52933,7.93769;10.3797,0.419365,8.74203;11.3064,0.402096,9.81907;10.7104,-0.399165,10.9685;9.40938,0.22121,11.4678;9.64205,1.59049,11.9223;8.38006,2.22985,12.3199;8.64991,3.65266,12.8011;9.56883,3.64192,13.8942;10.8103,3.05101,13.5078;10.5931,1.61394,13.0425)|)"_smiles;
|
||||
REQUIRE(iressa);
|
||||
GaussianShape::ShapeOverlayOptions opts;
|
||||
opts.optimMode = GaussianShape::OptimMode::SHAPE_PLUS_COLOR_SCORE;
|
||||
opts.startMode = GaussianShape::StartMode::A_LA_PUBCHEM;
|
||||
opts.nSteps = 100;
|
||||
GaussianShape::ShapeInputOptions shapeOpts;
|
||||
shapeOpts.allCarbonRadii = false;
|
||||
auto scores = GaussianShape::AlignMolecule(*tagrisso, *iressa, shapeOpts,
|
||||
shapeOpts, nullptr, opts);
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.332, 0.005));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.569, 0.005));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.095, 0.005));
|
||||
|
||||
auto rescores = GaussianShape::ScoreMolecule(*tagrisso, *iressa, shapeOpts,
|
||||
shapeOpts, opts);
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(rescores[0], 0.005));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(rescores[1], 0.005));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(rescores[2], 0.005));
|
||||
auto aligned_iressa =
|
||||
"COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN1CCOCC1 |(3.34206,-4.82098,0.224565;2.562,-4.29938,-0.849374;1.40029,-3.66768,-0.520786;0.217637,-4.40844,-0.435429;-0.995315,-3.80966,-0.103538;-2.12266,-4.53841,-0.026421;-3.25097,-3.87666,0.301586;-3.3749,-2.56477,0.559961;-2.22774,-1.8651,0.473675;-2.30832,-0.475114,0.738276;-3.33657,0.464391,0.56286;-4.2686,0.303124,-0.466868;-5.2907,1.23626,-0.641364;-5.38531,2.33529,0.212458;-6.37287,3.22379,0.031359;-4.45782,2.50088,1.24127;-4.5695,3.85508,2.29834;-3.43604,1.56746,1.41601;-0.997368,-2.42737,0.145328;0.187601,-1.67548,0.061391;1.37756,-2.30488,-0.27166;2.52893,-1.56544,-0.352965;2.83995,-1.00034,-1.61907;3.59724,0.302963,-1.40382;2.76275,1.31266,-0.622178;1.52096,1.60458,-1.33518;0.667075,2.50553,-0.54864;-0.616491,2.80465,-1.31789;-0.312021,3.38752,-2.58555;0.491386,2.50505,-3.3701;1.80144,2.19743,-2.65039)|"_smiles;
|
||||
REQUIRE(aligned_iressa);
|
||||
checkMolsHaveRoughlySameCoords(*iressa, *aligned_iressa);
|
||||
}
|
||||
|
||||
TEST_CASE("Optimise in place") {
|
||||
// These are 2 ligands used by Andy Grant and Co in their original paper
|
||||
// https://onlinelibrary.wiley.com/doi/10.1002/(SICI)1096-987X(19961115)17:14%3C1653::AID-JCC7%3E3.0.CO;2-K
|
||||
// Ligands as extracted from PDB, with a bit of munging to get them as
|
||||
// SMILES strings (downloaded the Ideal ligand structures from RCSB
|
||||
// as SDFs and transferred the corresponding atom coords from 3tmn and 1tmn).
|
||||
auto pdb_trp_3tmn =
|
||||
R"(N[C@H](C(=O)O)Cc1c[nH]c2c1cccc2 |(37.935,40.394,-3.825;39.119,39.593,-4.13;38.758,38.486,-5.101;37.526,38.337,-5.395;39.716,37.852,-5.605;39.883,39.108,-2.906;39.086,38.098,-2.209;38.093,38.363,-1.34;37.565,37.179,-0.881;38.201,36.136,-1.471;39.193,36.684,-2.308;40.015,35.812,-3.036;39.846,34.441,-2.913;38.844,33.933,-2.075;38.015,34.752,-1.333),wU:1.0|)"_smiles;
|
||||
REQUIRE(pdb_trp_3tmn);
|
||||
auto pdb_0zn_1tmn =
|
||||
R"([C@H](CCc1ccccc1)(C(=O)O)N[C@H](C(=O)N[C@H](C(=O)O)Cc1c[nH]c2c1cccc2)CC(C)C |(35.672,41.482,-5.722;34.516,40.842,-6.512;34.843,39.355,-6.7;33.819,38.475,-7.45;33.825,38.414,-8.838;32.951,37.553,-9.53;32.064,36.747,-8.81;32.096,36.799,-7.402;32.985,37.656,-6.73;35.934,42.778,-6.452;36.833,42.858,-7.316;35.175,43.735,-6.275;35.516,41.561,-4.218;36.707,42.096,-3.513;38.055,41.449,-3.859;39.11,42.138,-3.959;37.975,40.129,-3.983;39.134,39.277,-4.298;38.825,38.04,-5.133;37.649,37.934,-5.605;39.788,37.369,-5.652;39.985,38.945,-3.037;39.221,37.953,-2.164;37.934,37.961,-1.823;37.579,36.695,-1.314;38.63,35.975,-1.286;39.736,36.771,-1.642;41.052,36.341,-1.48;41.213,35.042,-0.964;40.095,34.215,-0.69;38.765,34.665,-0.855;36.506,41.966,-2.002;37.6,42.757,-1.31;37.546,44.225,-1.728;37.408,42.58,0.19),wD:0.0,wU:17.21,13.33|)"_smiles;
|
||||
REQUIRE(pdb_0zn_1tmn);
|
||||
// This is the overlay produced by the first test below, to make sure we
|
||||
// haven't broken anything.
|
||||
auto ov_pdb_0zn_1tmn =
|
||||
R"(CC(C)C[C@H](N[C@@H](CCc1ccccc1)C(=O)O)C(=O)N[C@@H](Cc1c[nH]c2ccccc12)C(=O)O |(38.4182,43.8068,-0.910588;38.2709,42.2972,-0.731075;38.1304,41.9045,0.733229;37.0364,41.7932,-1.45451;37.1781,42.1406,-2.93763;35.8859,41.9023,-3.62687;35.9519,42.0509,-5.13225;34.6741,41.7191,-5.92414;34.7628,40.2524,-6.36478;33.5811,39.6611,-7.16439;33.5076,39.8293,-8.54156;32.4798,39.2259,-9.29221;31.5187,38.4455,-8.64246;31.6293,38.2599,-7.2498;32.6704,38.8605,-6.52059;36.3695,43.3994,-5.66929;37.2254,43.4931,-6.57465;35.7737,44.4114,-5.28982;38.3937,41.376,-3.47909;39.5342,41.9164,-3.54877;38.1091,40.1191,-3.80052;39.1087,39.1759,-4.32928;39.9624,38.5242,-3.20191;39.1024,37.5196,-2.43949;37.8503,37.6527,-2.00683;37.3343,36.383,-1.67591;38.2646,35.5274,-1.83827;38.2218,34.1585,-1.6274;39.4752,33.5046,-1.63354;40.6899,34.1995,-1.85784;40.7011,35.5753,-2.15222;39.4587,36.2072,-2.14728;38.5746,38.1494,-5.32118;37.3737,38.2894,-5.71533;39.3977,37.4441,-6.00821),wD:6.6,wU:4.3,21.22|)"_smiles;
|
||||
auto initScores = GaussianShape::ScoreMolecule(*pdb_trp_3tmn, *pdb_0zn_1tmn);
|
||||
CHECK_THAT(initScores[0], Catch::Matchers::WithinAbs(0.307, 0.001));
|
||||
CHECK_THAT(initScores[1], Catch::Matchers::WithinAbs(0.349, 0.001));
|
||||
CHECK_THAT(initScores[2], Catch::Matchers::WithinAbs(0.265, 0.001));
|
||||
// The PDB atom order isn't canonical, so bounce in and out of SMILES
|
||||
// to make it easier to check.
|
||||
auto canon_probe =
|
||||
v2::SmilesParse::MolFromSmiles(MolToCXSmiles(*pdb_0zn_1tmn));
|
||||
{
|
||||
// This should just tweak the input overlay.
|
||||
GaussianShape::ShapeOverlayOptions opts;
|
||||
opts.startMode = GaussianShape::StartMode::ROTATE_0;
|
||||
opts.normalize = false;
|
||||
GaussianShape::ShapeInputOptions shapeOpts;
|
||||
ROMol cp(*canon_probe);
|
||||
RDGeom::Transform3D xform;
|
||||
auto scores = GaussianShape::AlignMolecule(*pdb_trp_3tmn, cp, shapeOpts,
|
||||
shapeOpts, &xform, opts);
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.322, 0.001));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.396, 0.001));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.247, 0.001));
|
||||
CHECK(checkMolsHaveRoughlySameCoords(cp, *ov_pdb_0zn_1tmn));
|
||||
}
|
||||
{
|
||||
// With default settings, it does a poor job.
|
||||
GaussianShape::ShapeOverlayOptions opts;
|
||||
GaussianShape::ShapeInputOptions shapeOpts;
|
||||
ROMol cp(*canon_probe);
|
||||
auto scores = GaussianShape::AlignMolecule(*pdb_trp_3tmn, cp, shapeOpts,
|
||||
shapeOpts, nullptr, opts);
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.197, 0.001));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.361, 0.001));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.033, 0.001));
|
||||
}
|
||||
{
|
||||
// And with reference as a shape the same
|
||||
GaussianShape::ShapeOverlayOptions opts;
|
||||
opts.startMode = GaussianShape::StartMode::ROTATE_0;
|
||||
opts.normalize = false;
|
||||
GaussianShape::ShapeInputOptions shapeOpts;
|
||||
auto refShape = GaussianShape::ShapeInput(*pdb_trp_3tmn, -1, shapeOpts);
|
||||
ROMol cp(*canon_probe);
|
||||
RDGeom::Transform3D xform;
|
||||
auto scores =
|
||||
GaussianShape::AlignMolecule(refShape, cp, shapeOpts, &xform, opts);
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.322, 0.001));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.396, 0.001));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.247, 0.001));
|
||||
CHECK(checkMolsHaveRoughlySameCoords(cp, *ov_pdb_0zn_1tmn));
|
||||
MolTransforms::transformConformer(cp.getConformer(), xform);
|
||||
ROMol cp1(*canon_probe);
|
||||
MolTransforms::transformConformer(cp1.getConformer(), xform);
|
||||
CHECK(checkMolsHaveRoughlySameCoords(cp1, *ov_pdb_0zn_1tmn));
|
||||
}
|
||||
{
|
||||
// And with both as shapes
|
||||
GaussianShape::ShapeOverlayOptions opts;
|
||||
opts.startMode = GaussianShape::StartMode::ROTATE_0;
|
||||
opts.normalize = false;
|
||||
GaussianShape::ShapeInputOptions shapeOpts;
|
||||
auto refShape = GaussianShape::ShapeInput(*pdb_trp_3tmn, -1, shapeOpts);
|
||||
auto fitShape = GaussianShape::ShapeInput(*canon_probe, -1, shapeOpts);
|
||||
RDGeom::Transform3D xform;
|
||||
auto scores = GaussianShape::AlignShape(refShape, fitShape, &xform, opts);
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.322, 0.001));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.396, 0.001));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.247, 0.001));
|
||||
auto cp = fitShape.shapeToMol(false);
|
||||
CHECK(checkMolsHaveRoughlySameCoords(*cp, *ov_pdb_0zn_1tmn));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Fragment Mode") {
|
||||
// On the PDB overlay.
|
||||
auto pdb_trp_3tmn =
|
||||
R"(N[C@H](C(=O)O)Cc1c[nH]c2c1cccc2 |(37.935,40.394,-3.825;39.119,39.593,-4.13;38.758,38.486,-5.101;37.526,38.337,-5.395;39.716,37.852,-5.605;39.883,39.108,-2.906;39.086,38.098,-2.209;38.093,38.363,-1.34;37.565,37.179,-0.881;38.201,36.136,-1.471;39.193,36.684,-2.308;40.015,35.812,-3.036;39.846,34.441,-2.913;38.844,33.933,-2.075;38.015,34.752,-1.333),wU:1.0|)"_smiles;
|
||||
REQUIRE(pdb_trp_3tmn);
|
||||
auto pdb_0zn_1tmn =
|
||||
R"([C@H](CCc1ccccc1)(C(=O)O)N[C@H](C(=O)N[C@H](C(=O)O)Cc1c[nH]c2c1cccc2)CC(C)C |(35.672,41.482,-5.722;34.516,40.842,-6.512;34.843,39.355,-6.7;33.819,38.475,-7.45;33.825,38.414,-8.838;32.951,37.553,-9.53;32.064,36.747,-8.81;32.096,36.799,-7.402;32.985,37.656,-6.73;35.934,42.778,-6.452;36.833,42.858,-7.316;35.175,43.735,-6.275;35.516,41.561,-4.218;36.707,42.096,-3.513;38.055,41.449,-3.859;39.11,42.138,-3.959;37.975,40.129,-3.983;39.134,39.277,-4.298;38.825,38.04,-5.133;37.649,37.934,-5.605;39.788,37.369,-5.652;39.985,38.945,-3.037;39.221,37.953,-2.164;37.934,37.961,-1.823;37.579,36.695,-1.314;38.63,35.975,-1.286;39.736,36.771,-1.642;41.052,36.341,-1.48;41.213,35.042,-0.964;40.095,34.215,-0.69;38.765,34.665,-0.855;36.506,41.966,-2.002;37.6,42.757,-1.31;37.546,44.225,-1.728;37.408,42.58,0.19),wD:0.0,wU:17.21,13.33|)"_smiles;
|
||||
REQUIRE(pdb_0zn_1tmn);
|
||||
GaussianShape::ShapeOverlayOptions opts;
|
||||
opts.nSteps = 100;
|
||||
opts.startMode = GaussianShape::StartMode::ROTATE_180_FRAGMENT;
|
||||
opts.optimMode = GaussianShape::OptimMode::SHAPE_PLUS_COLOR_SCORE;
|
||||
auto probeShape = GaussianShape::ShapeInput(*pdb_trp_3tmn, -1);
|
||||
auto refShape = GaussianShape::ShapeInput(*pdb_0zn_1tmn, -1);
|
||||
RDGeom::Transform3D xform;
|
||||
// Use the smaller molecule as the probe
|
||||
auto scores = GaussianShape::AlignShape(refShape, probeShape, &xform, opts);
|
||||
// These are close to the values above for starting from the xtal structures.
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(0.311, 0.005));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(0.408, 0.005));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.215, 0.005));
|
||||
MolTransforms::transformConformer(pdb_trp_3tmn->getConformer(), xform);
|
||||
}
|
||||
|
||||
TEST_CASE("custom feature points") {
|
||||
auto m1 =
|
||||
"O=CC=O |(-1.75978,0.148897,0;-0.621382,-0.394324,0;0.624061,0.3656,.1;1.7571,-0.120174,.1)|"_smiles;
|
||||
SECTION("using shapes") {
|
||||
auto shape1 = GaussianShape::ShapeInput(*m1, -1);
|
||||
// each carbonyl O gets one feature:
|
||||
CHECK(shape1.getCoords().size() == 24);
|
||||
GaussianShape::ShapeInputOptions opts2;
|
||||
opts2.customFeatures = GaussianShape::CustomFeatures{
|
||||
{1, RDGeom::Point3D(-1.75978, 0.148897, 0), 1.0},
|
||||
{2, RDGeom::Point3D(1.7571, -0.120174, 0.1), 1.0}};
|
||||
auto shape2 = GaussianShape::ShapeInput(*m1, -1, opts2);
|
||||
CHECK(shape2.getCoords().size() == 24);
|
||||
|
||||
{
|
||||
// confirm that we don't add the features if not requested.
|
||||
GaussianShape::ShapeInputOptions topts;
|
||||
topts.customFeatures = GaussianShape::CustomFeatures{
|
||||
{1, RDGeom::Point3D(-1.75978, 0.148897, 0), 1.0},
|
||||
{2, RDGeom::Point3D(1.7571, -0.120174, 0.1), 1.0}};
|
||||
topts.useColors = false;
|
||||
auto tshape = GaussianShape::ShapeInput(*m1, -1, topts);
|
||||
CHECK(tshape.getCoords().size() == 16);
|
||||
}
|
||||
|
||||
// we'll swap the features on the second shape so that the alignment has to
|
||||
// be inverted
|
||||
GaussianShape::ShapeInputOptions opts3;
|
||||
opts3.customFeatures = GaussianShape::CustomFeatures{
|
||||
{2, RDGeom::Point3D(-1.75978, 0.148897, 0), 1.0},
|
||||
{1, RDGeom::Point3D(1.7571, -0.120174, 0.1), 1.0}};
|
||||
|
||||
auto m2 = ROMol(*m1);
|
||||
auto shape3 = GaussianShape::ShapeInput(m2, -1, opts3);
|
||||
CHECK(shape3.getCoords().size() == 24);
|
||||
GaussianShape::ShapeOverlayOptions overlayOpts;
|
||||
overlayOpts.optParam = 0.5;
|
||||
RDGeom::Transform3D xform;
|
||||
auto scores = AlignShape(shape2, shape3, &xform, overlayOpts);
|
||||
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(1.000, 0.001));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(1.000, 0.001));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.999, 0.001));
|
||||
CHECK(shape3.getCoords()[0] > 0); // x coord of first atom
|
||||
CHECK(shape3.getCoords()[3 * 4] < 0); // x coord of fourth atom
|
||||
|
||||
auto conf = m2.getConformer(-1);
|
||||
MolTransforms::transformConformer(conf, xform);
|
||||
CHECK(conf.getAtomPos(0).x > 0);
|
||||
CHECK(conf.getAtomPos(3).x < 0);
|
||||
}
|
||||
SECTION("using molecules") {
|
||||
GaussianShape::ShapeInputOptions opts2;
|
||||
opts2.customFeatures = GaussianShape::CustomFeatures{
|
||||
{1, RDGeom::Point3D(-1.75978, 0.148897, 0), 1.0},
|
||||
{2, RDGeom::Point3D(1.7571, -0.120174, 0.1), 1.0}};
|
||||
|
||||
auto m2 = ROMol(*m1);
|
||||
// we'll swap the features on the second shape so that the alignment has to
|
||||
// be inverted
|
||||
GaussianShape::ShapeInputOptions opts3;
|
||||
opts3.customFeatures = GaussianShape::CustomFeatures{
|
||||
{2, RDGeom::Point3D(-1.75978, 0.148897, 0), 1.0},
|
||||
{1, RDGeom::Point3D(1.7571, -0.120174, 0.1), 1.0}};
|
||||
|
||||
GaussianShape::ShapeOverlayOptions overlayOpts;
|
||||
overlayOpts.optParam = 0.5;
|
||||
std::vector<float> matrix(12, 0.0);
|
||||
auto scores = AlignMolecule(*m1, m2, opts2, opts3, nullptr, overlayOpts);
|
||||
CHECK_THAT(scores[0], Catch::Matchers::WithinAbs(1.000, 0.001));
|
||||
CHECK_THAT(scores[1], Catch::Matchers::WithinAbs(1.000, 0.001));
|
||||
CHECK_THAT(scores[2], Catch::Matchers::WithinAbs(0.999, 0.001));
|
||||
auto conf = m2.getConformer(-1);
|
||||
CHECK(conf.getAtomPos(0).x > 0);
|
||||
CHECK(conf.getAtomPos(3).x < 0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Non-standard radii") {
|
||||
auto m1 =
|
||||
"[Xe]c1ccccc1 |(0.392086,-2.22477,0.190651;0.232269,-1.38667,0.118385;-1.06274,-0.918982,0.0342466;-1.26098,0.446053,-0.0811879;-0.244035,1.36265,-0.11691;1.05134,0.875929,-0.031248;1.28797,-0.499563,0.0864097),atomProp:0.dummyLabel.*|"_smiles;
|
||||
GaussianShape::ShapeInputOptions shapeOpts;
|
||||
shapeOpts.useColors = false;
|
||||
shapeOpts.allCarbonRadii = false;
|
||||
auto shape1 = GaussianShape::ShapeInput(*m1, -1, shapeOpts);
|
||||
|
||||
CHECK(shape1.getCoords().size() == 28);
|
||||
CHECK_THAT(shape1.getShapeVolume(),
|
||||
Catch::Matchers::WithinAbs(387.396, 0.005));
|
||||
// mol1 with atom 4 with an N radius and a bigger Xe.
|
||||
shapeOpts.atomRadii =
|
||||
std::vector<std::pair<unsigned int, double>>{{0, 2.5}, {4, 1.55}};
|
||||
shapeOpts.allCarbonRadii = false;
|
||||
auto shape2 = GaussianShape::ShapeInput(*m1, -1, shapeOpts);
|
||||
CHECK_THAT(shape2.getShapeVolume(),
|
||||
Catch::Matchers::WithinAbs(425.051, 0.005));
|
||||
|
||||
// Corresponding pyridine derivative.
|
||||
auto m2 =
|
||||
"[Xe]c1ccncc1 |(0.392086,-2.22477,0.190651;0.232269,-1.38667,0.118385;-1.06274,-0.918982,0.0342466;-1.26098,0.446053,-0.0811879;-0.244035,1.36265,-0.11691;1.05134,0.875929,-0.031248;1.28797,-0.499563,0.0864097),atomProp:0.dummyLabel.*|"_smiles;
|
||||
auto shape3 = GaussianShape::ShapeInput(*m2, -1, shapeOpts);
|
||||
CHECK(shape3.getShapeVolume() == shape2.getShapeVolume());
|
||||
}
|
||||
|
||||
TEST_CASE("Shape subset") {
|
||||
auto m1 =
|
||||
"c1ccc(-c2ccccc2)cc1 |(-3.26053,-0.0841607,-0.741909;-2.93383,0.123873,0.593407;-1.60713,0.377277,0.917966;-0.644758,0.654885,-0.0378428;0.743308,0.219134,0.168663;1.82376,1.0395,-0.0112769;3.01462,0.695405,0.613858;3.18783,-0.589771,1.09649;2.15761,-1.50458,1.01949;0.988307,-1.1313,0.385783;-1.1048,0.797771,-1.34022;-2.39754,0.435801,-1.69921)|"_smiles;
|
||||
REQUIRE(m1);
|
||||
GaussianShape::ShapeInputOptions shapeOpts;
|
||||
shapeOpts.atomSubset = std::vector<unsigned int>{0, 1, 2, 3, 10, 11};
|
||||
auto partShape = GaussianShape::ShapeInput(*m1, -1, shapeOpts);
|
||||
CHECK(partShape.getCoords().size() == 28);
|
||||
CHECK_THAT(partShape.getShapeVolume(),
|
||||
Catch::Matchers::WithinAbs(261.166, 0.005));
|
||||
CHECK_THAT(partShape.getColorVolume(),
|
||||
Catch::Matchers::WithinAbs(5.316, 0.005));
|
||||
|
||||
shapeOpts.atomSubset.clear();
|
||||
auto wholeShape = GaussianShape::ShapeInput(*m1, -1, shapeOpts);
|
||||
CHECK(wholeShape.getCoords().size() == 56);
|
||||
CHECK_THAT(wholeShape.getShapeVolume(),
|
||||
Catch::Matchers::WithinAbs(556.266, 0.005));
|
||||
CHECK_THAT(wholeShape.getColorVolume(),
|
||||
Catch::Matchers::WithinAbs(10.631, 0.005));
|
||||
}
|
||||
|
||||
// These are LOBSTER structures 437_A_355, YIW_A_1353, LSA_A_503, SU0_A_263,
|
||||
// VHC_A_1, 40Z_A_301, 0J8_A_1401, 5QQ_A_1401, 054_A_578, 053_A_578
|
||||
// respectively. LOBSTER is published
|
||||
// https://doi.org/10.1007/s10822-024-00581-1 from the Rarey and BioSolveIT
|
||||
// group.
|
||||
std::
|
||||
vector<std::string>
|
||||
lobstersText =
|
||||
{
|
||||
"CC(C)(C)c1cc(NC(=O)Nc2cccc3ccccc23)n(-c2ccc(CO)cc2)n1 |(4.1858,1.2187,12.6749;3.4917,2.128,11.6409;4.5532,3.0576,11.0244;2.9098,1.2685,10.5255;2.3016,2.8612,12.2277;1.3306,2.2548,13.0356;0.4334,3.2937,13.302;-0.7316,3.3518,14.0275;-0.978,2.5581,15.0932;-0.147,1.7378,15.4901;-2.1645,2.6924,15.6938;-2.626,2.0319,16.8298;-1.8457,2.1016,18.0083;-2.2654,1.5179,19.2117;-3.4858,0.8316,19.2546;-4.2646,0.7339,18.1056;-5.4724,0.0393,18.1895;-6.2813,-0.103,17.0695;-5.8756,0.4693,15.8666;-4.6776,1.1996,15.7839;-3.8502,1.3304,16.8984;0.8824,4.3943,12.6425;0.3828,5.6491,12.5902;-0.1458,6.3636,13.6677;-0.6231,7.6713,13.5508;-0.5802,8.3258,12.3036;-1.1083,9.738,12.1288;-0.2929,10.4745,11.2057;-0.0699,7.6584,11.21;0.4028,6.3542,11.3691;2.0606,4.1572,11.9546)|",
|
||||
"CC(C)c1nnc2ccc(Sc3ccccc3CNC(=O)Nc3cc(C(C)(C)C)nn3-c3ccccc3)cn12 |(-2.677,-1.147,25.057;-1.383,-1.713,24.501;-0.937,-2.645,25.64;-1.654,-2.496,23.218;-1.533,-3.804,23.035;-1.814,-4.126,21.82;-2.125,-3.054,21.15;-2.484,-2.842,19.8;-2.737,-1.521,19.397;-2.633,-0.45,20.292;-2.934,1.2,19.817;-4.086,0.989,18.473;-5.306,0.233,18.595;-6.145,0.057,17.523;-5.802,0.63,16.3;-4.651,1.427,16.155;-3.805,1.585,17.237;-2.532,2.33,17.092;-2.317,3.124,15.849;-1.208,2.896,15.139;-0.293,2.051,15.461;-0.934,3.704,14.164;0.194,3.633,13.319;1.093,2.626,13.06;2.064,3.138,12.153;3.254,2.438,11.547;2.765,1.497,10.446;4.297,3.407,10.979;3.923,1.592,12.556;1.755,4.357,11.974;0.557,4.674,12.592;0.026,5.998,12.604;-0.011,6.769,11.405;-0.52,8.07,11.439;-0.946,8.61,12.635;-0.914,7.879,13.819;-0.396,6.572,13.782;-2.292,-0.677,21.581;-2.025,-1.986,22.016)|",
|
||||
"O=S1(=O)N=C(O)c2ccccc21 |(-5.7089,1.0252,18.4004;-6.3943,1.0779,17.1227;-7.8251,0.8757,17.0645;-5.6185,0.1418,15.9961;-4.8784,0.7874,15.1629;-4.3563,0.192,14.2273;-5.1972,2.2654,15.1438;-4.6774,3.2806,14.3416;-5.0543,4.5982,14.6171;-5.8378,4.8973,15.7426;-6.3084,3.8774,16.5767;-5.9587,2.5637,16.2652)|",
|
||||
"COc1ccc2c(CC(=O)Nc3ccc(S(N)(=O)=O)cc3)cc(=O)oc2c1 |(-2.8164,14.7062,11.3592;-3.6624,13.537,11.2643;-3.2822,12.2442,11.6717;-3.8543,11.0312,10.9854;-3.454,9.7632,11.3397;-2.4737,9.5893,12.4478;-2.0245,8.2354,12.8485;-2.4284,7.0271,12.0534;-3.4577,6.2276,12.8381;-4.6459,6.5341,12.9744;-2.9341,5.1799,13.652;-3.8043,4.2751,14.3593;-3.0353,3.1465,14.8909;-3.6696,2.1888,15.6247;-5.1279,2.3197,15.867;-5.8526,1.0165,16.8148;-5.1597,-0.2684,16.1766;-7.2644,1.0484,16.6895;-5.3596,1.2234,18.1529;-5.863,3.3573,15.3579;-5.1676,4.4008,14.5479;-1.2733,8.1678,14.0258;-0.8262,9.3446,14.6451;-0.1397,9.3025,15.6623;-1.0563,10.6342,14.1829;-1.9446,10.7231,13.0793;-2.3948,12.0761,12.7036)|",
|
||||
"Cc1cc(C)c2cc1C(=O)NCCCOc1cccc(c1)Sc1cc-2nc(N)n1 |(73.8435,34.0723,26.5156;72.3815,34.1628,26.0388;71.6766,32.9652,25.9327;70.3451,32.9485,25.4823;69.6347,31.5936,25.3834;69.7284,34.1492,25.0935;70.4497,35.3542,25.1896;71.7747,35.3826,25.6823;72.4585,36.7442,25.8391;73.1172,37.0066,26.8611;72.1633,37.6292,24.8945;72.7135,39.0047,24.8716;71.7025,40.047,24.3677;71.3796,39.8568,22.8758;70.4153,38.7939,22.9499;69.8006,38.3095,21.8404;70.2406,38.4827,20.5148;69.5441,37.8479,19.489;68.407,37.0853,19.7393;68.0155,36.8722,21.0628;68.6923,37.5061,22.0951;66.5592,35.8914,21.365;66.9463,35.055,22.8617;68.2573,34.8374,23.3077;68.4281,34.247,24.558;67.3421,33.831,25.243;66.0874,33.9792,24.7404;65.0509,33.4765,25.428;65.8935,34.6016,23.5436)|",
|
||||
"Cc1c2c(n3c1CCN(Cc1ccco1)c1cc(C(N)=O)c(Cl)cc1-3)CC(C)(C)CC2=O |(74.8244,36.0896,26.0638;73.6879,35.186,25.6743;73.8428,33.8098,25.2429;72.5848,33.3216,24.9555;71.6743,34.332,25.2034;72.3429,35.4561,25.6334;71.7112,36.7663,26.0021;70.9895,37.4573,24.8514;70.1361,36.585,24.0367;69.7698,37.0101,22.6942;70.4319,38.0048,21.8175;70.8144,39.2778,21.9818;71.3289,39.7044,20.7598;71.23,38.6778,19.9306;70.6882,37.6078,20.5486;69.5068,35.3697,24.5106;68.1241,35.239,24.3599;67.4459,34.0963,24.7908;65.9689,34.0392,24.5456;65.1976,33.5111,25.4835;65.5111,34.4884,23.4842;68.1905,33.0839,25.4012;67.4671,31.5959,25.9018;69.5536,33.1987,25.5786;70.2398,34.3166,25.103;72.2711,31.9512,24.4312;73.4524,31.2236,23.7805;73.1395,29.7332,23.6933;73.6857,31.7544,22.3618;74.7055,31.4432,24.6434;74.982,32.872,25.0594;76.1434,33.2319,25.2012)|",
|
||||
"O=[N+]([O-])c1cccc(CNc2nc(C(F)(F)F)nc3ncc(-c4cnn(C5CCNCC5)c4)cc23)c1 |(-1.438,-13.226,20.761;-2.668,-13.702,20.695;-3.225,-14.278,21.715;-3.449,-13.606,19.5;-4.865,-13.703,19.611;-5.66,-13.615,18.465;-5.057,-13.44,17.21;-3.642,-13.329,17.113;-2.985,-13.176,15.754;-3.208,-14.453,15.102;-2.183,-15.47,15.231;-1.174,-15.247,16.105;-0.207,-16.166,16.24;0.913,-15.872,17.233;1.607,-16.968,17.501;1.653,-14.932,16.679;0.431,-15.434,18.382;-0.155,-17.307,15.532;-1.134,-17.587,14.626;-1.051,-18.759,13.937;-2.004,-19.081,13.034;-3.067,-18.213,12.807;-4.134,-18.597,11.829;-4.958,-17.727,11.105;-5.807,-18.503,10.373;-5.518,-19.836,10.648;-6.22,-21.008,10.059;-7.613,-21.132,10.635;-8.385,-22.302,9.993;-8.344,-22.271,8.528;-7.465,-21.299,7.857;-6.163,-20.922,8.548;-4.513,-19.902,11.543;-3.18,-16.987,13.52;-2.183,-16.667,14.458;-2.84,-13.422,18.253)|",
|
||||
"Fc1ccc(-c2cnc3nnc(C(F)(F)c4ccc5ncccc5c4)n3n2)cc1 |(-8.9341,-13.5345,15.2941;-7.8624,-13.6169,16.1019;-8.0323,-13.604,17.4747;-6.927,-13.6949,18.3088;-5.6563,-13.7867,17.7687;-4.5363,-13.8655,18.5962;-4.6798,-14.0273,19.9861;-3.5769,-14.1,20.7695;-2.3321,-14.0007,20.1927;-1.0691,-14.049,20.7489;-0.1759,-13.9183,19.7353;-0.8597,-13.8052,18.581;-0.2877,-13.6262,17.1957;1.0077,-13.3321,17.3153;-0.932,-12.6293,16.5758;-0.3953,-14.89,16.3793;0.3679,-16.0044,16.7321;0.2944,-17.1762,15.9833;-0.5658,-17.2197,14.8472;-0.6484,-18.3768,14.1035;-1.4592,-18.4618,13.0237;-2.2424,-17.3707,12.6345;-2.1971,-16.184,13.3658;-1.3263,-16.1253,14.5037;-1.2424,-14.9308,15.2731;-2.1994,-13.8502,18.8681;-3.3091,-13.7741,18.0449;-5.4848,-13.7974,16.3944;-6.5897,-13.7169,15.5608)|",
|
||||
"Nc1cc(Cn2c(C(=O)O)c(-n3c(=O)[nH]c4cscc4c3=O)c3cc(C(F)(F)F)ccc32)ccn1 |(29.5323,45.8636,43.104;28.8655,44.7866,42.5152;27.6833,44.3191,43.0964;27.0256,43.2322,42.5026;25.769,42.5937,43.0062;25.1539,43.3311,44.0877;24.4106,44.4698,43.8927;24.1642,45.0354,42.563;23.9009,46.3564,42.5697;24.0503,44.4039,41.5301;23.9917,44.8584,45.1623;23.2031,45.9947,45.3911;21.8869,45.9724,44.834;21.3954,44.9674,44.2853;21.1644,47.1439,44.9613;21.7067,48.2779,45.5571;21.0928,49.4972,45.696;22.1115,50.6335,46.4923;23.3622,49.4671,46.6456;23.0143,48.2553,46.0988;23.8418,47.054,46.0577;24.9808,47.0096,46.5158;24.5049,43.9455,46.1213;24.4196,43.8288,47.5202;25.0722,42.7404,48.1424;25.0173,42.5333,49.6217;24.5189,41.3151,49.9335;26.2412,42.5919,50.1868;24.2862,43.4348,50.3041;25.7949,41.7994,47.3917;25.8973,41.9011,46.0107;25.2386,42.9843,45.4045;27.5907,42.6386,41.3749;28.7511,43.1735,40.8552;29.3972,44.2337,41.3948)|",
|
||||
"Nc1cc(Cn2c(C(=O)O)c(-c3ccc[nH]c3=O)c3cc(C(F)(F)F)ccc32)ccn1 |(29.4427,46.05,43.4472;28.8505,44.9996,42.7518;27.6724,44.4529,43.2669;27.0734,43.3813,42.5955;25.8189,42.6999,43.052;25.2163,43.3606,44.2071;24.4581,44.4943,44.0838;24.1931,45.0901,42.7808;24.1204,46.4333,42.7998;23.8923,44.4562,41.7912;24.0461,44.8297,45.3707;23.2097,45.9536,45.7413;23.5041,46.7963,46.7667;22.6287,47.9099,47.1092;21.5037,48.1244,46.3872;21.1624,47.2732,45.3588;21.9195,46.1834,44.9822;21.563,45.4266,44.0641;24.5817,43.8725,46.2645;24.4958,43.6833,47.6538;25.1452,42.5857,48.2292;25.0531,42.3252,49.6881;24.5934,41.0857,49.9422;26.2447,42.4118,50.3073;24.2575,43.1875,50.3428;25.8943,41.6963,47.4464;25.9994,41.8646,46.0771;25.324,42.958,45.5057;27.6791,42.8877,41.4291;28.8324,43.5019,40.9842;29.4267,44.5435,41.6155)|",
|
||||
};
|
||||
std::vector<std::shared_ptr<RWMol>> lobsters;
|
||||
void initLobsters() {
|
||||
if (lobsters.empty()) {
|
||||
for (const auto &text : lobstersText) {
|
||||
lobsters.emplace_back(v2::SmilesParse::MolFromSmiles(text));
|
||||
REQUIRE(lobsters.back());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Tversky") {
|
||||
// Score the PDB overlay.
|
||||
auto pdb_trp_3tmn =
|
||||
R"(N[C@H](C(=O)O)Cc1c[nH]c2c1cccc2 |(37.935,40.394,-3.825;39.119,39.593,-4.13;38.758,38.486,-5.101;37.526,38.337,-5.395;39.716,37.852,-5.605;39.883,39.108,-2.906;39.086,38.098,-2.209;38.093,38.363,-1.34;37.565,37.179,-0.881;38.201,36.136,-1.471;39.193,36.684,-2.308;40.015,35.812,-3.036;39.846,34.441,-2.913;38.844,33.933,-2.075;38.015,34.752,-1.333),wU:1.0|)"_smiles;
|
||||
REQUIRE(pdb_trp_3tmn);
|
||||
auto pdb_0zn_1tmn =
|
||||
R"([C@H](CCc1ccccc1)(C(=O)O)N[C@H](C(=O)N[C@H](C(=O)O)Cc1c[nH]c2c1cccc2)CC(C)C |(35.672,41.482,-5.722;34.516,40.842,-6.512;34.843,39.355,-6.7;33.819,38.475,-7.45;33.825,38.414,-8.838;32.951,37.553,-9.53;32.064,36.747,-8.81;32.096,36.799,-7.402;32.985,37.656,-6.73;35.934,42.778,-6.452;36.833,42.858,-7.316;35.175,43.735,-6.275;35.516,41.561,-4.218;36.707,42.096,-3.513;38.055,41.449,-3.859;39.11,42.138,-3.959;37.975,40.129,-3.983;39.134,39.277,-4.298;38.825,38.04,-5.133;37.649,37.934,-5.605;39.788,37.369,-5.652;39.985,38.945,-3.037;39.221,37.953,-2.164;37.934,37.961,-1.823;37.579,36.695,-1.314;38.63,35.975,-1.286;39.736,36.771,-1.642;41.052,36.341,-1.48;41.213,35.042,-0.964;40.095,34.215,-0.69;38.765,34.665,-0.855;36.506,41.966,-2.002;37.6,42.757,-1.31;37.546,44.225,-1.728;37.408,42.58,0.19),wD:0.0,wU:17.21,13.33|)"_smiles;
|
||||
REQUIRE(pdb_0zn_1tmn);
|
||||
GaussianShape::ShapeOverlayOptions ovlyOpts;
|
||||
GaussianShape::ShapeInputOptions inOpts;
|
||||
auto tan_scores = GaussianShape::ScoreMolecule(*pdb_0zn_1tmn, *pdb_trp_3tmn);
|
||||
CHECK_THAT(tan_scores[0], Catch::Matchers::WithinAbs(0.307, 0.001));
|
||||
CHECK_THAT(tan_scores[1], Catch::Matchers::WithinAbs(0.349, 0.001));
|
||||
CHECK_THAT(tan_scores[2], Catch::Matchers::WithinAbs(0.265, 0.001));
|
||||
|
||||
ovlyOpts.simAlpha = 0.95;
|
||||
ovlyOpts.simBeta = 0.05;
|
||||
auto ref_tversky = GaussianShape::ScoreMolecule(*pdb_0zn_1tmn, *pdb_trp_3tmn,
|
||||
inOpts, inOpts, ovlyOpts);
|
||||
CHECK_THAT(ref_tversky[0], Catch::Matchers::WithinAbs(0.362, 0.001));
|
||||
CHECK_THAT(ref_tversky[1], Catch::Matchers::WithinAbs(0.383, 0.001));
|
||||
CHECK_THAT(ref_tversky[2], Catch::Matchers::WithinAbs(0.342, 0.001));
|
||||
|
||||
ovlyOpts.simAlpha = 0.05;
|
||||
ovlyOpts.simBeta = 0.95;
|
||||
auto fit_tversky = GaussianShape::ScoreMolecule(*pdb_0zn_1tmn, *pdb_trp_3tmn,
|
||||
inOpts, inOpts, ovlyOpts);
|
||||
CHECK_THAT(fit_tversky[0], Catch::Matchers::WithinAbs(0.668, 0.001));
|
||||
CHECK_THAT(fit_tversky[1], Catch::Matchers::WithinAbs(0.795, 0.001));
|
||||
CHECK_THAT(fit_tversky[2], Catch::Matchers::WithinAbs(0.540, 0.001));
|
||||
}
|
||||
|
||||
#ifdef RDK_USE_BOOST_SERIALIZATION
|
||||
TEST_CASE("Serialization") {
|
||||
auto m1 =
|
||||
"[H]c1c([H])c([H])c([H])c([H])c1[H] |(-2.06264,-0.844763,-0.0261403;-1.04035,-0.481453,-0.0114878;-0.00743655,-1.41861,-0.0137121;-0.215455,-2.47997,-0.0295909;1.29853,-0.949412,0.00507497;2.12524,-1.65277,0.00390664;1.58501,0.395878,0.0254188;2.61997,0.704365,0.0394811;0.550242,1.31385,0.0273741;0.783172,2.37039,0.0434262;-0.763786,0.88847,0.00908113;-1.60557,1.58532,0.0100194)|"_smiles;
|
||||
REQUIRE(m1);
|
||||
GaussianShape::ShapeInputOptions shapeOpts;
|
||||
shapeOpts.allCarbonRadii = false;
|
||||
auto shape = GaussianShape::ShapeInput(*m1, -1, shapeOpts);
|
||||
auto istr = shape.toString();
|
||||
|
||||
GaussianShape::ShapeInput shape2(istr);
|
||||
CHECK(shape2.getCoords() == shape.getCoords());
|
||||
CHECK(shape2.getTypes() == shape.getTypes());
|
||||
CHECK(shape2.getNumAtoms() == shape.getNumAtoms());
|
||||
CHECK(shape2.getNumFeatures() == shape.getNumFeatures());
|
||||
CHECK(shape2.getNormalized() == shape.getNormalized());
|
||||
CHECK(shape2.calcExtremes() == shape.calcExtremes());
|
||||
CHECK(shape2.calcCanonicalRotation() == shape.calcCanonicalRotation());
|
||||
CHECK(shape2.calcCanonicalTranslation() == shape.calcCanonicalTranslation());
|
||||
CHECK(*shape2.getCarbonRadii() == *shape.getCarbonRadii());
|
||||
CHECK_THAT(shape2.getShapeVolume(),
|
||||
Catch::Matchers::WithinAbs(261.0145, 0.005));
|
||||
CHECK_THAT(shape2.getColorVolume(), Catch::Matchers::WithinAbs(5.316, 0.005));
|
||||
|
||||
// Check it handles the case of no d_carbonRadii in the ShapeInput.
|
||||
shapeOpts.allCarbonRadii = true;
|
||||
auto shape3 = GaussianShape::ShapeInput(*m1, -1, shapeOpts);
|
||||
auto istr2 = shape3.toString();
|
||||
GaussianShape::ShapeInput shape4(istr2);
|
||||
CHECK(!shape4.getCarbonRadii());
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef RDK_TEST_MULTITHREADED
|
||||
#include <thread>
|
||||
#include <future>
|
||||
|
||||
namespace {
|
||||
void runblock(
|
||||
const std::vector<std::pair<std::shared_ptr<RWMol>, std::shared_ptr<RWMol>>>
|
||||
&pairs,
|
||||
unsigned int count, unsigned int idx,
|
||||
std::vector<std::array<double, 3>> &test) {
|
||||
for (unsigned int i = idx; i < pairs.size(); i += count) {
|
||||
auto p1 = *pairs[i].first;
|
||||
auto p2 = *pairs[i].second;
|
||||
test[i] = GaussianShape::AlignMolecule(p1, p2);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST_CASE("multithreaded") {
|
||||
initLobsters();
|
||||
|
||||
constexpr size_t numRepeats = 1000;
|
||||
std::vector<std::pair<std::shared_ptr<RWMol>, std::shared_ptr<RWMol>>> pairs;
|
||||
for (auto r = 0u; r < numRepeats; ++r) {
|
||||
for (unsigned int i = 0; i < 10; i += 2) {
|
||||
unsigned int l1 = i;
|
||||
unsigned int l2 = i + 1;
|
||||
pairs.emplace_back(lobsters[l1], lobsters[l2]);
|
||||
}
|
||||
}
|
||||
// generate reference data
|
||||
std::cerr << " generating reference data" << std::endl;
|
||||
auto start = std::chrono::steady_clock::now();
|
||||
std::vector<std::array<double, 3>> ref;
|
||||
for (auto pr : pairs) {
|
||||
auto p1 = *pr.first;
|
||||
auto p2 = *pr.second;
|
||||
auto norm_scores = GaussianShape::AlignMolecule(p1, p2);
|
||||
|
||||
ref.push_back(norm_scores);
|
||||
}
|
||||
auto end = std::chrono::steady_clock::now();
|
||||
auto ref_time =
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(end - start)
|
||||
.count();
|
||||
std::cerr << " reference time: " << ref_time << "ms" << std::endl;
|
||||
|
||||
// run the same calculations in parallel and check they match the reference
|
||||
std::cerr << " parallel loop" << std::endl;
|
||||
std::vector<std::array<double, 3>> test(pairs.size());
|
||||
std::vector<unsigned int> idx(pairs.size());
|
||||
std::iota(idx.begin(), idx.end(), 0);
|
||||
auto start2 = std::chrono::steady_clock::now();
|
||||
std::vector<std::future<void>> tg;
|
||||
unsigned int count = 4;
|
||||
for (unsigned int i = 0; i < count; ++i) {
|
||||
tg.emplace_back(std::async(std::launch::async, runblock, pairs, count, i,
|
||||
std::ref(test)));
|
||||
}
|
||||
for (auto &fut : tg) {
|
||||
fut.get();
|
||||
}
|
||||
tg.clear();
|
||||
auto end2 = std::chrono::steady_clock::now();
|
||||
auto test_time =
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(end2 - start2)
|
||||
.count();
|
||||
std::cerr << " parallel time: " << test_time << "ms" << std::endl;
|
||||
CHECK(test == ref);
|
||||
}
|
||||
#endif
|
||||
@@ -247,8 +247,10 @@ bool computePrincipalAxesAndMomentsFromGyrationMatrix(
|
||||
return true;
|
||||
}
|
||||
auto origin = computeCentroid(conf, ignoreHs, weights);
|
||||
// Note that this may not return a right-handed axis.
|
||||
bool res = getEigenValEigenVectFromCovMat(conf, axes, moments, origin,
|
||||
ignoreHs, true, weights);
|
||||
|
||||
if (res && !weights) {
|
||||
conf.getOwningMol().setProp(axesPropName, axes, true);
|
||||
conf.getOwningMol().setProp(momentsPropName, moments, true);
|
||||
@@ -259,7 +261,8 @@ bool computePrincipalAxesAndMomentsFromGyrationMatrix(
|
||||
RDGeom::Transform3D *computeCanonicalTransform(const Conformer &conf,
|
||||
const RDGeom::Point3D *center,
|
||||
bool normalizeCovar,
|
||||
bool ignoreHs) {
|
||||
bool ignoreHs,
|
||||
double *eigenValues) {
|
||||
constexpr unsigned int DIM = 3;
|
||||
RDGeom::Point3D origin;
|
||||
if (!center) {
|
||||
@@ -271,8 +274,8 @@ RDGeom::Transform3D *computeCanonicalTransform(const Conformer &conf,
|
||||
auto *trans = new RDGeom::Transform3D;
|
||||
trans->setToIdentity();
|
||||
|
||||
// if we have a single atom system we don't need to do anyhting setting
|
||||
// translation is sufficient
|
||||
// If we have a single atom system we don't need to do anything setting
|
||||
// translation is sufficient.
|
||||
if (nAtms > 1) {
|
||||
Eigen::Matrix3d eigVecs;
|
||||
Eigen::Vector3d eigVals;
|
||||
@@ -291,6 +294,9 @@ RDGeom::Transform3D *computeCanonicalTransform(const Conformer &conf,
|
||||
});
|
||||
for (unsigned int col = 0; col < DIM; ++col) {
|
||||
unsigned int colSorted = eigValsSorted.at(col).first;
|
||||
if (eigenValues) {
|
||||
eigenValues[colSorted] = eigValsSorted.at(col).second;
|
||||
}
|
||||
for (unsigned int row = 0; row < DIM; ++row) {
|
||||
trans->setVal(col, row, eigVecs(row, colSorted));
|
||||
}
|
||||
@@ -327,7 +333,8 @@ RDGeom::Transform3D *computeCanonicalTransform(const Conformer &conf,
|
||||
RDGeom::Transform3D *computeCanonicalTransform(const Conformer &conf,
|
||||
const RDGeom::Point3D *center,
|
||||
bool normalizeCovar,
|
||||
bool ignoreHs) {
|
||||
bool ignoreHs,
|
||||
double *retEigenValues) {
|
||||
RDGeom::Point3D origin;
|
||||
if (!center) {
|
||||
origin = computeCentroid(conf, ignoreHs);
|
||||
@@ -339,7 +346,7 @@ RDGeom::Transform3D *computeCanonicalTransform(const Conformer &conf,
|
||||
// find the eigen values and eigen vectors for the covMat
|
||||
RDNumeric::DoubleMatrix eigVecs(3, 3);
|
||||
RDNumeric::DoubleVector eigVals(3);
|
||||
// if we have a single atom system we don't need to do anyhting other than
|
||||
// if we have a single atom system we don't need to do anything other than
|
||||
// setting translation
|
||||
// translation
|
||||
unsigned int nAtms = conf.getNumAtoms();
|
||||
@@ -348,11 +355,16 @@ RDGeom::Transform3D *computeCanonicalTransform(const Conformer &conf,
|
||||
// set the translation
|
||||
origin *= -1.0;
|
||||
// trans->SetTranslation(origin);
|
||||
// if we have a single atom system we don't need to do anyhting setting
|
||||
// if we have a single atom system we don't need to do anything setting
|
||||
// translation is sufficient
|
||||
if (nAtms > 1) {
|
||||
RDNumeric::EigenSolvers::powerEigenSolver(3, *covMat, eigVals, eigVecs,
|
||||
conf.getNumAtoms());
|
||||
if (retEigenValues) {
|
||||
retEigenValues[0] = eigVals[0];
|
||||
retEigenValues[1] = eigVals[1];
|
||||
retEigenValues[2] = eigVals[2];
|
||||
}
|
||||
// deal with zero eigen value systems
|
||||
unsigned int i, j, dim = 3;
|
||||
for (i = 0; i < 3; ++i) {
|
||||
|
||||
@@ -110,10 +110,13 @@ computePrincipalAxesAndMomentsFromGyrationMatrix(
|
||||
\param normalizeCovar Normalize the covariance matrix with the number of
|
||||
atoms
|
||||
\param ignoreHs Optionally ignore hydrogens
|
||||
\param eigenVals Optionally return the values for the eigenvalues,
|
||||
sorted in ascending order. If given, must be big enough to hold 3 values.
|
||||
*/
|
||||
RDKIT_MOLTRANSFORMS_EXPORT RDGeom::Transform3D *computeCanonicalTransform(
|
||||
const RDKit::Conformer &conf, const RDGeom::Point3D *center = nullptr,
|
||||
bool normalizeCovar = false, bool ignoreHs = true);
|
||||
bool normalizeCovar = false, bool ignoreHs = true,
|
||||
double *eigenVals = nullptr);
|
||||
|
||||
//! Transform the conformation using the specified transformation
|
||||
RDKIT_MOLTRANSFORMS_EXPORT void transformConformer(
|
||||
|
||||
Reference in New Issue
Block a user