Add the useRingTemplates option to generateDepictionMatching2DStructure (#8688)

* Add the param

* add a C++ test

* wrap it for Python

* mirror the C++ test in Python
This commit is contained in:
Ricardo Rodriguez
2025-08-25 13:39:57 -04:00
committed by greg landrum
parent 5d082663b3
commit f1621c4e30
5 changed files with 194 additions and 24 deletions

View File

@@ -772,7 +772,8 @@ void generateDepictionMatching2DStructure(
RDGeom::Transform3D trans;
if (p.alignOnly) {
if (!hasExistingCoords) {
compute2DCoords(mol, nullptr, false, true, 0, 0, 0, false, p.forceRDKit);
compute2DCoords(mol, nullptr, false, true, 0, 0, 0, false, p.forceRDKit,
p.useRingTemplates);
}
RDKit::MatchVectType atomMap(refMatchVect.size());
std::transform(
@@ -796,7 +797,7 @@ void generateDepictionMatching2DStructure(
auto newConfId = compute2DCoords(
mol, &coordMap, false /* canonOrient */,
!(p.adjustMolBlockWedging && hasExistingCoords) /* clearConfs */, 0, 0,
0, false, p.forceRDKit);
0, false, p.forceRDKit, p.useRingTemplates);
if (p.adjustMolBlockWedging) {
// we need to clear the existing wedging information if:
// 1. the original molecule had no coordinates to start with
@@ -1032,8 +1033,8 @@ RDKit::MatchVectType generateDepictionMatching2DStructure(
// need to generate some before attempting the alignment
// and clear any existing wedging info if requested
if (!mol.getNumConformers()) {
compute2DCoords(mol, nullptr, false, true, 0, 0, 0, false,
p.forceRDKit);
compute2DCoords(mol, nullptr, false, true, 0, 0, 0, false, p.forceRDKit,
p.useRingTemplates);
if (p.adjustMolBlockWedging) {
RDKit::Chirality::clearMolBlockWedgingInfo(mol);
p.adjustMolBlockWedging = false;
@@ -1088,7 +1089,8 @@ RDKit::MatchVectType generateDepictionMatching2DStructure(
if (p.acceptFailure) {
// if we accept failure, we generate a standard set of
// coordinates and clear any existing wedging info if requested
compute2DCoords(mol, nullptr, false, true, 0, 0, 0, false, p.forceRDKit);
compute2DCoords(mol, nullptr, false, true, 0, 0, 0, false, p.forceRDKit,
p.useRingTemplates);
if (p.adjustMolBlockWedging) {
RDKit::Chirality::clearMolBlockWedgingInfo(mol);
}

View File

@@ -51,7 +51,7 @@ class RDKIT_DEPICTOR_EXPORT DepictException : public std::exception {
void RDKIT_DEPICTOR_EXPORT
setRingSystemTemplates(const std::string templatePath);
//! \brief Add ring system templates to be used in 2D coordinater generation.
//! \brief Add ring system templates to be used in 2D coordinate generation.
/// If there are duplicates, the most recently added template will be used.
/*!
@@ -90,7 +90,6 @@ struct RDKIT_DEPICTOR_EXPORT Compute2DCoordParameters {
//!< preferCoordGen is set to true
bool useRingTemplates = false; //!< whether to use ring system templates for
//!< generating initial coordinates
};
//! \brief Generate 2D coordinates (a depiction) for a molecule
@@ -181,7 +180,7 @@ RDKIT_DEPICTOR_EXPORT unsigned int compute2DCoords(
mol before adding a conformation
\param weightDistMat - A value between 0.0 and 1.0, this
determines the importance of mimicing the inter atoms
determines the importance of mimicking the inter atoms
distances in dmat. (1.0 - weightDistMat) is the weight associated
to spreading out the structure (density) in the cost function
@@ -238,6 +237,8 @@ struct RDKIT_DEPICTOR_EXPORT ConstrainedDepictionParams {
/// can be preserved following the constrained depiction (if
/// adjustMolBlockWedging is true)
int existingConfId = -1;
bool useRingTemplates = false; //!< whether to use ring system templates for
//!< generating initial coordinates
};
//! \brief Compute 2D coordinates where a piece of the molecule is
@@ -313,7 +314,7 @@ RDKIT_DEPICTOR_EXPORT void generateDepictionMatching2DStructure(
Existing coordinates in the conformation identified by
ConstrainedDepictionParams::existingConfId are preserved if present,
otherwise unconstrained new coordinates are generated.
Subsequently, coodinates undergo a rigid-body alignment to the reference.
Subsequently, coordinates undergo a rigid-body alignment to the reference.
If ConstrainedDepictionParams::adjustMolBlockWedging is false
(default), existing molblock wedging information is always preserved.
If ConstrainedDepictionParams::adjustMolBlockWedging is true,
@@ -337,7 +338,7 @@ RDKIT_DEPICTOR_EXPORT void generateDepictionMatching2DStructure(
\param params - (optional) a ConstrainedDepictionParams instance
RETURNS:
\return MatchVectType with (queryAtomidx, molAtomIdx) pairs used for
\return MatchVectType with (queryAtomIdx, molAtomIdx) pairs used for
the constrained depiction
*/
RDKIT_DEPICTOR_EXPORT RDKit::MatchVectType generateDepictionMatching2DStructure(
@@ -378,7 +379,7 @@ RDKIT_DEPICTOR_EXPORT RDKit::MatchVectType generateDepictionMatching2DStructure(
depiction is still attempted
RETURNS:
\return MatchVectType with (queryAtomidx, molAtomIdx) pairs used for
\return MatchVectType with (queryAtomIdx, molAtomIdx) pairs used for
the constrained depiction
*/
RDKIT_DEPICTOR_EXPORT RDKit::MatchVectType generateDepictionMatching2DStructure(

View File

@@ -325,7 +325,10 @@ BOOST_PYTHON_MODULE(rdDepictor) {
"rigid-body aligned to the reference (if alignOnly is True), "
"or used to determine whether existing molblock wedging information "
"can be preserved following the constrained depiction (if "
"adjustMolBlockWedging is True");
"adjustMolBlockWedging is True")
.def_readwrite(
"useRingTemplates", &ConstrainedDepictionParams::useRingTemplates,
"use templates to generate coordinates of complex ring systems");
std::string docString;
docString =

View File

@@ -1067,6 +1067,76 @@ M END
self.assertEqual(
len(rdDepictor.GenerateDepictionMatching2DStructure(mol, templateRef, params=p)), 7)
def testGenerateDepictionMatching2DStructureWithRingTemplates(self):
align_ref_mol = Chem.MolFromMolBlock(R"""
RDKit 2D
0 0 0 0 0 0 0 0 0 0999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 6 6 0 0 0
M V30 BEGIN ATOM
M V30 1 N -5.242424 0.787879 0.000000 0
M V30 2 C -4.492420 2.086915 0.000000 0
M V30 3 C -2.992419 2.086916 0.000000 0
M V30 4 C -2.242418 0.787879 0.000000 0
M V30 5 C -2.992416 -0.511157 0.000000 0
M V30 6 C -4.492420 -0.511158 0.000000 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 2 3
M V30 2 2 3 4
M V30 3 1 4 5
M V30 4 2 5 6
M V30 5 2 2 1
M V30 6 1 1 6
M V30 END BOND
M V30 END CTAB
M END
$$$$
""")
mol = Chem.MolFromSmiles(R"CC1=CC(C23C4C5C6C4C2C6C53)=CN=C1")
self.assertIsNotNone(align_ref_mol)
self.assertIsNotNone(mol)
ref_coords = align_ref_mol.GetConformer().GetPositions()
def check_match_coords(mol, match):
coords = mol.GetConformer().GetPositions()
return all(np.allclose(ref_coords[ref_idx], coords[idx]) for ref_idx, idx in match)
def has_weird_bonds(mol):
coords = mol.GetConformer().GetPositions()
for bond in mol.GetBonds():
length = np.linalg.norm(coords[bond.GetBeginAtomIdx()] - coords[bond.GetEndAtomIdx()])
if length < 1.0 or length > 2.0:
return True
return False
params = rdDepictor.ConstrainedDepictionParams()
params.useRingTemplates = False
matches = rdDepictor.GenerateDepictionMatching2DStructure(mol, align_ref_mol, params=params)
self.assertEqual(len(matches), 6)
self.assertTrue(check_match_coords(mol, matches))
# by default, RDkit's coordinate generation creates some weird bonds for cubane
self.assertTrue(has_weird_bonds(mol))
params.useRingTemplates = True
matches = rdDepictor.GenerateDepictionMatching2DStructure(mol, align_ref_mol, params=params)
self.assertEqual(len(matches), 6)
self.assertTrue(check_match_coords(mol, matches))
# when using ring templates, cubane bonds are all approximately
# the same length, which is reasonable
self.assertFalse(has_weird_bonds(mol))
if __name__ == '__main__':
unittest.main()

View File

@@ -8,6 +8,7 @@
// of the RDKit source tree.
//
#include <ranges>
#include <catch2/catch_all.hpp>
#include <GraphMol/MolAlign/AlignMolecules.h>
#include <GraphMol/RDKitBase.h>
@@ -215,19 +216,112 @@ TEST_CASE("octahedral", "[nontetrahedral]") {
}
TEST_CASE("use ring system templates") {
auto mol = "C1CCC2C(C1)C1CCN2NN1"_smiles;
RDDepict::Compute2DCoordParameters params;
RDDepict::compute2DCoords(*mol, params);
auto diff =
mol->getConformer().getAtomPos(10) - mol->getConformer().getAtomPos(11);
// when templates are not used, bond from 10-11 is very short
TEST_ASSERT(RDKit::feq(diff.length(), 0.116, .1));
SECTION("in compute2DCoords") {
auto mol = "C1CCC2C(C1)C1CCN2NN1"_smiles;
RDDepict::Compute2DCoordParameters params;
RDDepict::compute2DCoords(*mol, params);
auto diff =
mol->getConformer().getAtomPos(10) - mol->getConformer().getAtomPos(11);
// when templates are not used, bond from 10-11 is very short
TEST_ASSERT(RDKit::feq(diff.length(), 0.116, .1));
params.useRingTemplates = true;
RDDepict::compute2DCoords(*mol, params);
diff =
mol->getConformer().getAtomPos(10) - mol->getConformer().getAtomPos(11);
TEST_ASSERT(RDKit::feq(diff.length(), 1.0, .1))
params.useRingTemplates = true;
RDDepict::compute2DCoords(*mol, params);
diff =
mol->getConformer().getAtomPos(10) - mol->getConformer().getAtomPos(11);
TEST_ASSERT(RDKit::feq(diff.length(), 1.0, .1))
}
SECTION("in generateDepictionMatching2DStructure") {
auto align_ref_mol = R"CTAB(
RDKit 2D
0 0 0 0 0 0 0 0 0 0999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 6 6 0 0 0
M V30 BEGIN ATOM
M V30 1 N -5.242424 0.787879 0.000000 0
M V30 2 C -4.492420 2.086915 0.000000 0
M V30 3 C -2.992419 2.086916 0.000000 0
M V30 4 C -2.242418 0.787879 0.000000 0
M V30 5 C -2.992416 -0.511157 0.000000 0
M V30 6 C -4.492420 -0.511158 0.000000 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 2 3
M V30 2 2 3 4
M V30 3 1 4 5
M V30 4 2 5 6
M V30 5 2 2 1
M V30 6 1 1 6
M V30 END BOND
M V30 END CTAB
M END
$$$$
)CTAB"_ctab;
auto mol = "CC1=CC(C23C4C5C6C4C2C6C53)=CN=C1"_smiles;
REQUIRE(align_ref_mol);
REQUIRE(mol);
const auto &ref_coords = align_ref_mol->getConformer().getPositions();
// coordinates of the matched atoms must match the reference
auto check_match_coords = [](const auto &mol, const auto &ref_coords,
const MatchVectType &match) {
const auto &coords = mol.getConformer().getPositions();
return std::ranges::all_of(
match,
[&coords, &ref_coords](const auto &match_pair) {
auto diff =
ref_coords[match_pair.first] - coords[match_pair.second];
return RDKit::feq(diff.length(), 0.);
}
);
};
// whether the molecule has too long or too short bonds (thresholds are
// arbitrary)
auto has_weird_bonds = [](const auto &mol) {
const auto &coords = mol.getConformer().getPositions();
for (auto bond : mol.bonds()) {
auto diff =
coords[bond->getBeginAtomIdx()] - coords[bond->getEndAtomIdx()];
if (auto length = diff.length(); length < 1.0 || length > 2.0) {
return true;
}
};
return false;
};
RDDepict::ConstrainedDepictionParams params;
// without ring templates
params.useRingTemplates = false;
auto match = RDDepict::generateDepictionMatching2DStructure(
*mol, *align_ref_mol, -1, nullptr, params);
REQUIRE(match.size() == 6);
CHECK(check_match_coords(*mol, ref_coords, match) == true);
// by default, RDkit's coordinate generation creates some
// weird bonds for cubane
CHECK(has_weird_bonds(*mol) == true);
// with ring templates
params.useRingTemplates = true;
match = RDDepict::generateDepictionMatching2DStructure(*mol, *align_ref_mol,
-1, nullptr, params);
REQUIRE(match.size() == 6);
CHECK(check_match_coords(*mol, ref_coords, match) == true);
// when using ring templates, cubane bonds are all approximately
// the same length, which is reasonable
CHECK(has_weird_bonds(*mol) == false);
}
}
TEST_CASE("find core rings") {