diff --git a/External/pubchem_shape/PubChemShape.cpp b/External/pubchem_shape/PubChemShape.cpp index b3b608d76..40d4ac215 100644 --- a/External/pubchem_shape/PubChemShape.cpp +++ b/External/pubchem_shape/PubChemShape.cpp @@ -479,23 +479,28 @@ std::pair AlignShape(const ShapeInput &refShape, void TransformConformer(const std::vector &finalTrans, const std::vector &matrix, ShapeInput &fitShape, Conformer &fitConf) { + // we reuse/modify the coord member of fitShape in order to avoid memory + // allocations const unsigned int nAtoms = fitConf.getOwningMol().getNumAtoms(); - if (3 * nAtoms != fitShape.coord.size()) { - // Hs were removed + if (nAtoms > fitShape.volumeAtomIndexVector.size()) { + // Hs are missing... make sure we have space for them fitShape.coord.resize(3 * nAtoms); - for (unsigned int i = 0; i < nAtoms; ++i) { - const auto &pos = fitConf.getAtomPos(i); - fitShape.coord[i * 3] = pos.x + fitShape.shift[0]; - fitShape.coord[i * 3 + 1] = pos.y + fitShape.shift[1]; - fitShape.coord[i * 3 + 2] = pos.z + fitShape.shift[2]; - } + } + // initialize the fitShape coords with the starting atomic positions from + // the conformer shifted to the center of "mass" coordinates. + for (unsigned int i = 0; i < nAtoms; ++i) { + const auto &pos = fitConf.getAtomPos(i); + fitShape.coord[i * 3] = pos.x + fitShape.shift[0]; + fitShape.coord[i * 3 + 1] = pos.y + fitShape.shift[1]; + fitShape.coord[i * 3 + 2] = pos.z + fitShape.shift[2]; } std::vector transformed(nAtoms * 3); Align3D::VApplyRotTransMatrix(transformed.data(), fitShape.coord.data(), - fitConf.getOwningMol().getNumAtoms(), - matrix.data()); + nAtoms, matrix.data()); + // now set the coordinates in the conformer; undo the shift of the reference + // shape to center of "mass" coordinates for (unsigned i = 0; i < nAtoms; ++i) { RDGeom::Point3D &pos = fitConf.getAtomPos(i); pos.x = transformed[i * 3] - finalTrans[0]; diff --git a/External/pubchem_shape/PubChemShape.hpp b/External/pubchem_shape/PubChemShape.hpp index 8d7717e61..1b093ce22 100644 --- a/External/pubchem_shape/PubChemShape.hpp +++ b/External/pubchem_shape/PubChemShape.hpp @@ -122,7 +122,8 @@ RDKIT_PUBCHEMSHAPE_EXPORT std::pair AlignShape( /*! \param finalTrans the final translation to apply to the fitConf coords. \param matrix the transformation matrix produced from alignment - \param fitShape the shape that was aligned + \param fitShape the shape that was aligned. The coord vector of this is + modified \param fitConf the conformation to be transformed */ RDKIT_PUBCHEMSHAPE_EXPORT void TransformConformer( diff --git a/External/pubchem_shape/test.cpp b/External/pubchem_shape/test.cpp index 8ce34c441..bc4d9c186 100644 --- a/External/pubchem_shape/test.cpp +++ b/External/pubchem_shape/test.cpp @@ -18,7 +18,7 @@ using namespace RDKit; constexpr double test_opt_param = 0.5; constexpr unsigned int test_max_preiters = 3; constexpr unsigned int test_max_postiters = 16; -constexpr double test_use_colors = true; +constexpr bool test_use_colors = true; TEST_CASE("basic alignment") { std::string dirName = getenv("RDBASE"); @@ -482,3 +482,44 @@ TEST_CASE("Atoms excluded from Color features") { auto shape2 = PrepareConformer(*m1, -1, opts); CHECK(shape2.coord.size() == 24); } + +TEST_CASE("Hs not properly transformed when hcount = feature count") { + std::string dirName = getenv("RDBASE"); + dirName += "/External/pubchem_shape/test_data"; + + SECTION("as reported") { + v2::FileParsers::MolFileParserParams ps; + ps.removeHs = false; + auto mol1 = + v2::FileParsers::MolFromMolFile(dirName + "/hcount_ex1_1.mol", ps); + REQUIRE(mol1); + auto mol2 = + v2::FileParsers::MolFromMolFile(dirName + "/hcount_ex1_2.mol", ps); + REQUIRE(mol2); + + { + RWMol cp(*mol2); + std::vector matrix(12, 0.0); + auto [nbr_st, nbr_ct] = + AlignMolecule(*mol1, cp, matrix, -1, -1, true, 1.0, 30, 30); + CHECK_THAT(nbr_st, Catch::Matchers::WithinAbs(0.911, 0.005)); + CHECK_THAT(nbr_ct, Catch::Matchers::WithinAbs(0.555, 0.005)); + + // the bug led to H atoms in stupid positions, so we can detect it by just + // looking at bond lengths to Hs: + for (auto i = cp.getNumHeavyAtoms(); i < cp.getNumAtoms(); ++i) { + INFO("checking atom " << i); + auto at = cp.getAtomWithIdx(i); + for (auto nbr : cp.atomNeighbors(at)) { + auto dist = (cp.getConformer().getAtomPos(i) - + cp.getConformer().getAtomPos(nbr->getIdx())) + .length(); + CHECK(dist < 1.2); // should be a bond to H + } + } + + MolToMolFile(*mol1, "m1_out.mol"); + MolToMolFile(cp, "m2_out.mol"); + } + } +} diff --git a/External/pubchem_shape/test_data/hcount_ex1_1.mol b/External/pubchem_shape/test_data/hcount_ex1_1.mol new file mode 100644 index 000000000..855a97bc2 --- /dev/null +++ b/External/pubchem_shape/test_data/hcount_ex1_1.mol @@ -0,0 +1,70 @@ + + RDKit 3D + + 0 0 0 0 0 0 0 0 0 0999 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 29 29 0 0 0 +M V30 BEGIN ATOM +M V30 1 C 1.927916 -1.410215 0.383862 0 CFG=2 +M V30 2 C 2.864997 -0.453384 -0.304805 0 CFG=1 +M V30 3 C 2.541536 0.930071 0.181463 0 CFG=2 +M V30 4 C 1.121018 1.210562 -0.183540 0 CFG=1 +M V30 5 C 0.261228 0.172390 0.501275 0 CFG=1 +M V30 6 C -1.159913 0.416657 0.019581 0 CFG=2 +M V30 7 O 2.169981 -2.687972 -0.116436 0 +M V30 8 O 4.193894 -0.793759 -0.128449 0 +M V30 9 O 3.433090 1.889378 -0.256256 0 +M V30 10 O 0.700107 2.482459 0.148348 0 +M V30 11 O 0.605389 -1.111397 0.092338 0 +M V30 12 C -2.160471 -0.528416 0.600029 0 CFG=1 +M V30 13 P -3.780695 -0.072511 -0.095206 0 CFG=2 +M V30 14 O -3.776864 1.409843 -0.346918 0 +M V30 15 O -4.984733 -0.488974 1.023641 0 CHG=-1 +M V30 16 O -3.956480 -0.964732 -1.518928 0 CHG=-1 +M V30 17 H 2.150729 -1.413020 1.453945 0 +M V30 18 H 2.667693 -0.515429 -1.399309 0 +M V30 19 H 2.589219 0.897700 1.312392 0 +M V30 20 H 0.935618 1.063894 -1.281071 0 +M V30 21 H 0.300664 0.328483 1.587864 0 +M V30 22 H -1.207924 0.346742 -1.088884 0 +M V30 23 H -1.414630 1.447862 0.341830 0 +M V30 24 H 2.009793 -3.373820 0.575214 0 +M V30 25 H 4.515501 -0.644791 0.774293 0 +M V30 26 H 4.195831 1.894158 0.403443 0 +M V30 27 H 0.770315 3.058647 -0.652077 0 +M V30 28 H -1.892592 -1.579459 0.409174 0 +M V30 29 H -2.164770 -0.391014 1.718212 0 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 1 1 2 +M V30 2 1 1 7 +M V30 3 1 1 11 +M V30 4 1 2 3 +M V30 5 1 2 8 +M V30 6 1 3 4 +M V30 7 1 3 9 +M V30 8 1 4 5 +M V30 9 1 4 10 +M V30 10 1 5 6 +M V30 11 1 5 11 +M V30 12 1 6 12 +M V30 13 1 12 13 +M V30 14 2 13 14 +M V30 15 1 13 15 CFG=1 +M V30 16 1 13 16 +M V30 17 1 1 17 CFG=1 +M V30 18 1 2 18 CFG=3 +M V30 19 1 3 19 CFG=1 +M V30 20 1 4 20 CFG=3 +M V30 21 1 5 21 CFG=1 +M V30 22 1 6 22 CFG=3 +M V30 23 1 6 23 +M V30 24 1 7 24 +M V30 25 1 8 25 +M V30 26 1 9 26 +M V30 27 1 10 27 +M V30 28 1 12 28 CFG=3 +M V30 29 1 12 29 +M V30 END BOND +M V30 END CTAB +M END diff --git a/External/pubchem_shape/test_data/hcount_ex1_2.mol b/External/pubchem_shape/test_data/hcount_ex1_2.mol new file mode 100644 index 000000000..3566c928d --- /dev/null +++ b/External/pubchem_shape/test_data/hcount_ex1_2.mol @@ -0,0 +1,70 @@ + + RDKit 3D + + 0 0 0 0 0 0 0 0 0 0999 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 29 29 0 0 0 +M V30 BEGIN ATOM +M V30 1 C -2.271719 -1.297574 -0.275743 0 CFG=2 +M V30 2 C -2.960742 -0.108151 0.333533 0 CFG=1 +M V30 3 C -2.276603 1.142820 -0.215630 0 CFG=2 +M V30 4 C -0.807701 1.051813 0.200191 0 CFG=1 +M V30 5 C -0.257332 -0.229023 -0.412159 0 CFG=1 +M V30 6 C 1.176341 -0.487698 -0.052713 0 CFG=2 +M V30 7 O -2.896061 -2.497679 0.097593 0 +M V30 8 O -4.297986 -0.147526 0.010192 0 +M V30 9 O -2.895009 2.238490 0.335298 0 +M V30 10 O -0.151207 2.118049 -0.366709 0 +M V30 11 O -0.955838 -1.335529 0.032629 0 +M V30 12 C 2.155019 0.550499 -0.466310 0 CFG=1 +M V30 13 P 3.787109 -0.071015 0.108124 0 CFG=2 +M V30 14 O 3.682037 -0.278070 1.605143 0 +M V30 15 O 5.020474 0.957677 -0.343256 0 CHG=-1 +M V30 16 O 3.949218 -1.607084 -0.590182 0 CHG=-1 +M V30 17 H -2.337968 -1.187192 -1.406886 0 +M V30 18 H -2.815253 -0.154589 1.439834 0 +M V30 19 H -2.399154 1.143637 -1.297038 0 +M V30 20 H -0.756646 1.105598 1.281380 0 +M V30 21 H -0.384448 -0.168903 -1.509574 0 +M V30 22 H 1.462397 -1.433844 -0.578569 0 +M V30 23 H 1.289312 -0.706517 1.026045 0 +M V30 24 H -3.498652 -2.751301 -0.628180 0 +M V30 25 H -4.411239 0.237229 -0.877694 0 +M V30 26 H -3.390002 2.052112 1.183332 0 +M V30 27 H -0.316442 2.057553 -1.353106 0 +M V30 28 H 2.207336 0.733167 -1.537732 0 +M V30 29 H 2.028167 1.510524 0.115644 0 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 1 1 2 +M V30 2 1 1 7 +M V30 3 1 1 11 +M V30 4 1 2 3 +M V30 5 1 2 8 +M V30 6 1 3 4 +M V30 7 1 3 9 +M V30 8 1 4 5 +M V30 9 1 4 10 +M V30 10 1 5 6 +M V30 11 1 5 11 +M V30 12 1 6 12 +M V30 13 1 12 13 +M V30 14 2 13 14 +M V30 15 1 13 15 CFG=1 +M V30 16 1 13 16 +M V30 17 1 1 17 CFG=3 +M V30 18 1 2 18 CFG=1 +M V30 19 1 3 19 CFG=3 +M V30 20 1 4 20 CFG=1 +M V30 21 1 5 21 CFG=3 +M V30 22 1 6 22 CFG=3 +M V30 23 1 6 23 +M V30 24 1 7 24 +M V30 25 1 8 25 +M V30 26 1 9 26 +M V30 27 1 10 27 +M V30 28 1 12 28 CFG=1 +M V30 29 1 12 29 +M V30 END BOND +M V30 END CTAB +M END