Fixes a bug with bad H positions in output conformer (#8731)

* Fixes a bug with bad H positions in output conformer

This would only trigger when the number of H atoms is equal
to the number of features, but it was terrible when it happened.

* spell that better

* docs

---------

Co-authored-by: = <=>
This commit is contained in:
Greg Landrum
2025-08-27 13:28:50 +02:00
committed by GitHub
parent be2b1899ae
commit 81226d84ad
5 changed files with 199 additions and 12 deletions

View File

@@ -479,23 +479,28 @@ std::pair<double, double> AlignShape(const ShapeInput &refShape,
void TransformConformer(const std::vector<double> &finalTrans,
const std::vector<float> &matrix, ShapeInput &fitShape,
Conformer &fitConf) {
// we reuse/modify the coord member of fitShape in order to avoid memory
// allocations
const unsigned int nAtoms = fitConf.getOwningMol().getNumAtoms();
if (3 * nAtoms != fitShape.coord.size()) {
// Hs were removed
if (nAtoms > fitShape.volumeAtomIndexVector.size()) {
// Hs are missing... make sure we have space for them
fitShape.coord.resize(3 * nAtoms);
for (unsigned int i = 0; i < nAtoms; ++i) {
const auto &pos = fitConf.getAtomPos(i);
fitShape.coord[i * 3] = pos.x + fitShape.shift[0];
fitShape.coord[i * 3 + 1] = pos.y + fitShape.shift[1];
fitShape.coord[i * 3 + 2] = pos.z + fitShape.shift[2];
}
}
// initialize the fitShape coords with the starting atomic positions from
// the conformer shifted to the center of "mass" coordinates.
for (unsigned int i = 0; i < nAtoms; ++i) {
const auto &pos = fitConf.getAtomPos(i);
fitShape.coord[i * 3] = pos.x + fitShape.shift[0];
fitShape.coord[i * 3 + 1] = pos.y + fitShape.shift[1];
fitShape.coord[i * 3 + 2] = pos.z + fitShape.shift[2];
}
std::vector<float> transformed(nAtoms * 3);
Align3D::VApplyRotTransMatrix(transformed.data(), fitShape.coord.data(),
fitConf.getOwningMol().getNumAtoms(),
matrix.data());
nAtoms, matrix.data());
// now set the coordinates in the conformer; undo the shift of the reference
// shape to center of "mass" coordinates
for (unsigned i = 0; i < nAtoms; ++i) {
RDGeom::Point3D &pos = fitConf.getAtomPos(i);
pos.x = transformed[i * 3] - finalTrans[0];

View File

@@ -122,7 +122,8 @@ RDKIT_PUBCHEMSHAPE_EXPORT std::pair<double, double> AlignShape(
/*!
\param finalTrans the final translation to apply to the fitConf coords.
\param matrix the transformation matrix produced from alignment
\param fitShape the shape that was aligned
\param fitShape the shape that was aligned. The coord vector of this is
modified
\param fitConf the conformation to be transformed
*/
RDKIT_PUBCHEMSHAPE_EXPORT void TransformConformer(

View File

@@ -18,7 +18,7 @@ using namespace RDKit;
constexpr double test_opt_param = 0.5;
constexpr unsigned int test_max_preiters = 3;
constexpr unsigned int test_max_postiters = 16;
constexpr double test_use_colors = true;
constexpr bool test_use_colors = true;
TEST_CASE("basic alignment") {
std::string dirName = getenv("RDBASE");
@@ -482,3 +482,44 @@ TEST_CASE("Atoms excluded from Color features") {
auto shape2 = PrepareConformer(*m1, -1, opts);
CHECK(shape2.coord.size() == 24);
}
TEST_CASE("Hs not properly transformed when hcount = feature count") {
std::string dirName = getenv("RDBASE");
dirName += "/External/pubchem_shape/test_data";
SECTION("as reported") {
v2::FileParsers::MolFileParserParams ps;
ps.removeHs = false;
auto mol1 =
v2::FileParsers::MolFromMolFile(dirName + "/hcount_ex1_1.mol", ps);
REQUIRE(mol1);
auto mol2 =
v2::FileParsers::MolFromMolFile(dirName + "/hcount_ex1_2.mol", ps);
REQUIRE(mol2);
{
RWMol cp(*mol2);
std::vector<float> matrix(12, 0.0);
auto [nbr_st, nbr_ct] =
AlignMolecule(*mol1, cp, matrix, -1, -1, true, 1.0, 30, 30);
CHECK_THAT(nbr_st, Catch::Matchers::WithinAbs(0.911, 0.005));
CHECK_THAT(nbr_ct, Catch::Matchers::WithinAbs(0.555, 0.005));
// the bug led to H atoms in stupid positions, so we can detect it by just
// looking at bond lengths to Hs:
for (auto i = cp.getNumHeavyAtoms(); i < cp.getNumAtoms(); ++i) {
INFO("checking atom " << i);
auto at = cp.getAtomWithIdx(i);
for (auto nbr : cp.atomNeighbors(at)) {
auto dist = (cp.getConformer().getAtomPos(i) -
cp.getConformer().getAtomPos(nbr->getIdx()))
.length();
CHECK(dist < 1.2); // should be a bond to H
}
}
MolToMolFile(*mol1, "m1_out.mol");
MolToMolFile(cp, "m2_out.mol");
}
}
}

View File

@@ -0,0 +1,70 @@
RDKit 3D
0 0 0 0 0 0 0 0 0 0999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 29 29 0 0 0
M V30 BEGIN ATOM
M V30 1 C 1.927916 -1.410215 0.383862 0 CFG=2
M V30 2 C 2.864997 -0.453384 -0.304805 0 CFG=1
M V30 3 C 2.541536 0.930071 0.181463 0 CFG=2
M V30 4 C 1.121018 1.210562 -0.183540 0 CFG=1
M V30 5 C 0.261228 0.172390 0.501275 0 CFG=1
M V30 6 C -1.159913 0.416657 0.019581 0 CFG=2
M V30 7 O 2.169981 -2.687972 -0.116436 0
M V30 8 O 4.193894 -0.793759 -0.128449 0
M V30 9 O 3.433090 1.889378 -0.256256 0
M V30 10 O 0.700107 2.482459 0.148348 0
M V30 11 O 0.605389 -1.111397 0.092338 0
M V30 12 C -2.160471 -0.528416 0.600029 0 CFG=1
M V30 13 P -3.780695 -0.072511 -0.095206 0 CFG=2
M V30 14 O -3.776864 1.409843 -0.346918 0
M V30 15 O -4.984733 -0.488974 1.023641 0 CHG=-1
M V30 16 O -3.956480 -0.964732 -1.518928 0 CHG=-1
M V30 17 H 2.150729 -1.413020 1.453945 0
M V30 18 H 2.667693 -0.515429 -1.399309 0
M V30 19 H 2.589219 0.897700 1.312392 0
M V30 20 H 0.935618 1.063894 -1.281071 0
M V30 21 H 0.300664 0.328483 1.587864 0
M V30 22 H -1.207924 0.346742 -1.088884 0
M V30 23 H -1.414630 1.447862 0.341830 0
M V30 24 H 2.009793 -3.373820 0.575214 0
M V30 25 H 4.515501 -0.644791 0.774293 0
M V30 26 H 4.195831 1.894158 0.403443 0
M V30 27 H 0.770315 3.058647 -0.652077 0
M V30 28 H -1.892592 -1.579459 0.409174 0
M V30 29 H -2.164770 -0.391014 1.718212 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 1 1 7
M V30 3 1 1 11
M V30 4 1 2 3
M V30 5 1 2 8
M V30 6 1 3 4
M V30 7 1 3 9
M V30 8 1 4 5
M V30 9 1 4 10
M V30 10 1 5 6
M V30 11 1 5 11
M V30 12 1 6 12
M V30 13 1 12 13
M V30 14 2 13 14
M V30 15 1 13 15 CFG=1
M V30 16 1 13 16
M V30 17 1 1 17 CFG=1
M V30 18 1 2 18 CFG=3
M V30 19 1 3 19 CFG=1
M V30 20 1 4 20 CFG=3
M V30 21 1 5 21 CFG=1
M V30 22 1 6 22 CFG=3
M V30 23 1 6 23
M V30 24 1 7 24
M V30 25 1 8 25
M V30 26 1 9 26
M V30 27 1 10 27
M V30 28 1 12 28 CFG=3
M V30 29 1 12 29
M V30 END BOND
M V30 END CTAB
M END

View File

@@ -0,0 +1,70 @@
RDKit 3D
0 0 0 0 0 0 0 0 0 0999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 29 29 0 0 0
M V30 BEGIN ATOM
M V30 1 C -2.271719 -1.297574 -0.275743 0 CFG=2
M V30 2 C -2.960742 -0.108151 0.333533 0 CFG=1
M V30 3 C -2.276603 1.142820 -0.215630 0 CFG=2
M V30 4 C -0.807701 1.051813 0.200191 0 CFG=1
M V30 5 C -0.257332 -0.229023 -0.412159 0 CFG=1
M V30 6 C 1.176341 -0.487698 -0.052713 0 CFG=2
M V30 7 O -2.896061 -2.497679 0.097593 0
M V30 8 O -4.297986 -0.147526 0.010192 0
M V30 9 O -2.895009 2.238490 0.335298 0
M V30 10 O -0.151207 2.118049 -0.366709 0
M V30 11 O -0.955838 -1.335529 0.032629 0
M V30 12 C 2.155019 0.550499 -0.466310 0 CFG=1
M V30 13 P 3.787109 -0.071015 0.108124 0 CFG=2
M V30 14 O 3.682037 -0.278070 1.605143 0
M V30 15 O 5.020474 0.957677 -0.343256 0 CHG=-1
M V30 16 O 3.949218 -1.607084 -0.590182 0 CHG=-1
M V30 17 H -2.337968 -1.187192 -1.406886 0
M V30 18 H -2.815253 -0.154589 1.439834 0
M V30 19 H -2.399154 1.143637 -1.297038 0
M V30 20 H -0.756646 1.105598 1.281380 0
M V30 21 H -0.384448 -0.168903 -1.509574 0
M V30 22 H 1.462397 -1.433844 -0.578569 0
M V30 23 H 1.289312 -0.706517 1.026045 0
M V30 24 H -3.498652 -2.751301 -0.628180 0
M V30 25 H -4.411239 0.237229 -0.877694 0
M V30 26 H -3.390002 2.052112 1.183332 0
M V30 27 H -0.316442 2.057553 -1.353106 0
M V30 28 H 2.207336 0.733167 -1.537732 0
M V30 29 H 2.028167 1.510524 0.115644 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 1 1 7
M V30 3 1 1 11
M V30 4 1 2 3
M V30 5 1 2 8
M V30 6 1 3 4
M V30 7 1 3 9
M V30 8 1 4 5
M V30 9 1 4 10
M V30 10 1 5 6
M V30 11 1 5 11
M V30 12 1 6 12
M V30 13 1 12 13
M V30 14 2 13 14
M V30 15 1 13 15 CFG=1
M V30 16 1 13 16
M V30 17 1 1 17 CFG=3
M V30 18 1 2 18 CFG=1
M V30 19 1 3 19 CFG=3
M V30 20 1 4 20 CFG=1
M V30 21 1 5 21 CFG=3
M V30 22 1 6 22 CFG=3
M V30 23 1 6 23
M V30 24 1 7 24
M V30 25 1 8 25
M V30 26 1 9 26
M V30 27 1 10 27
M V30 28 1 12 28 CFG=1
M V30 29 1 12 29
M V30 END BOND
M V30 END CTAB
M END