* Fixes #8654

* fix a logic error

* SWIG is sooooo much fun
This commit is contained in:
Greg Landrum
2025-07-30 05:56:52 +02:00
committed by GitHub
parent c20b2a906b
commit ccefda882d
8 changed files with 84 additions and 20 deletions

View File

@@ -1,5 +1,5 @@
//
// Copyright (C) 2019-2022 Greg Landrum
// Copyright (C) 2019-2025 Greg Landrum and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
@@ -1084,4 +1084,36 @@ TEST_CASE("github #8405: some tautomer mismatches") {
}
}
}
}
TEST_CASE("github #8654: stereogroups incorrectly included in hash") {
SECTION("as reported") {
auto m =
"O=C(N[C@H]1C[C@@H](C(=O)O)[C@@H]2C[C@H]12)C1CC(=O)N(Cc2ccccn2)C1 |&1:3,5,9,11|"_smiles;
REQUIRE(m);
auto m2 =
"O=C(N[C@H]1C[C@@H](C(=O)O)[C@@H]2C[C@@H]21)C1CC(=O)N(Cc2ccccn2)C1"_smiles;
REQUIRE(m2);
bool useCxSmiles = true;
{
auto cxToSkip = SmilesWrite::CXSmilesFields::CX_ALL;
auto hsh1 =
MolHash::MolHash(m.get(), MolHash::HashFunction::HetAtomTautomerv2,
useCxSmiles, cxToSkip);
auto hsh2 =
MolHash::MolHash(m2.get(), MolHash::HashFunction::HetAtomTautomerv2,
useCxSmiles, cxToSkip);
CHECK(hsh1 == hsh2);
}
{
auto cxToSkip = SmilesWrite::CXSmilesFields::CX_ENHANCEDSTEREO;
auto hsh1 =
MolHash::MolHash(m.get(), MolHash::HashFunction::HetAtomTautomerv2,
useCxSmiles, cxToSkip);
auto hsh2 =
MolHash::MolHash(m2.get(), MolHash::HashFunction::HetAtomTautomerv2,
useCxSmiles, cxToSkip);
CHECK(hsh1 == hsh2);
}
}
}

View File

@@ -299,9 +299,12 @@ void NormalizeHCount(Atom *aptr) {
namespace {
std::string convertToSmilesWithCXFlags(
const RWMol &mol, bool doingCXSmiles,
const RWMol &mol, bool doingCXSmiles, unsigned cxFlagsToSkip,
SmilesWriteParams ps = SmilesWriteParams()) {
return SmilesWrite::detail::MolToSmiles(mol, ps, doingCXSmiles);
bool skipStereoGroups =
cxFlagsToSkip & SmilesWrite::CXSmilesFields::CX_ENHANCEDSTEREO;
return SmilesWrite::detail::MolToSmiles(mol, ps, doingCXSmiles,
!skipStereoGroups);
}
} // namespace
@@ -335,7 +338,7 @@ std::string AnonymousGraph(RWMol *mol, bool elem, bool useCXSmiles,
bool force = true;
MolOps::assignStereochemistry(*mol, cleanIt, force);
result = convertToSmilesWithCXFlags(*mol, useCXSmiles);
result = convertToSmilesWithCXFlags(*mol, useCXSmiles, cxFlagsToSkip);
if (useCXSmiles) {
addCXExtensions(mol, result, cxFlagsToSkip | SmilesWrite::CX_RADICALS);
@@ -369,7 +372,7 @@ std::string MesomerHash(RWMol *mol, bool netq, bool useCXSmiles,
bool force = true;
MolOps::assignStereochemistry(*mol, cleanIt, force);
result = convertToSmilesWithCXFlags(*mol, useCXSmiles);
result = convertToSmilesWithCXFlags(*mol, useCXSmiles, cxFlagsToSkip);
if (netq) {
sprintf(buffer, "_%d", charge);
result += buffer;
@@ -867,7 +870,7 @@ std::string TautomerHashv2(RWMol *mol, bool proto, bool useCXSmiles,
SmilesWriteParams ps;
ps.allBondsExplicit = true;
ps.allHsExplicit = true;
result = convertToSmilesWithCXFlags(*mol, useCXSmiles, ps);
result = convertToSmilesWithCXFlags(*mol, useCXSmiles, cxFlagsToSkip, ps);
char buffer[32];
if (!proto) {
sprintf(buffer, "_%d_%d", hcount, charge);
@@ -917,7 +920,7 @@ std::string TautomerHash(RWMol *mol, bool proto, bool useCXSmiles,
bool cleanIt = true;
bool force = true;
MolOps::assignStereochemistry(*mol, cleanIt, force);
result = convertToSmilesWithCXFlags(*mol, useCXSmiles);
result = convertToSmilesWithCXFlags(*mol, useCXSmiles, cxFlagsToSkip);
if (!proto) {
sprintf(buffer, "_%d_%d", hcount, charge);
} else {
@@ -1036,7 +1039,7 @@ std::string ExtendedMurckoScaffold(RWMol *mol, bool useCXSmiles,
MolOps::assignStereochemistry(*mol, cleanIt, force);
std::string result;
result = convertToSmilesWithCXFlags(*mol, useCXSmiles);
result = convertToSmilesWithCXFlags(*mol, useCXSmiles, cxFlagsToSkip);
if (useCXSmiles) {
addCXExtensions(mol, result, cxFlagsToSkip | SmilesWrite::CX_RADICALS);
}
@@ -1080,7 +1083,7 @@ std::string MurckoScaffoldHash(RWMol *mol, bool useCXSmiles,
MolOps::assignStereochemistry(*mol, cleanIt, force);
std::string result;
result = convertToSmilesWithCXFlags(*mol, useCXSmiles);
result = convertToSmilesWithCXFlags(*mol, useCXSmiles, cxFlagsToSkip);
if (useCXSmiles) {
addCXExtensions(mol, result, cxFlagsToSkip | SmilesWrite::CX_RADICALS);
}
@@ -1260,7 +1263,7 @@ std::string RegioisomerHash(RWMol *mol, bool useCXSmiles,
bool force = true;
MolOps::assignStereochemistry(*mol, cleanIt, force);
auto result = convertToSmilesWithCXFlags(*mol, useCXSmiles);
auto result = convertToSmilesWithCXFlags(*mol, useCXSmiles, cxFlagsToSkip);
if (useCXSmiles) {
addCXExtensions(mol, result, cxFlagsToSkip);
}
@@ -1403,7 +1406,7 @@ std::string MolHash(RWMol *mol, HashFunction func, bool useCXSmiles,
result = AnonymousGraph(mol, true, useCXSmiles, cxFlagsToSkip);
break;
case HashFunction::CanonicalSmiles:
result = convertToSmilesWithCXFlags(*mol, useCXSmiles);
result = convertToSmilesWithCXFlags(*mol, useCXSmiles, cxFlagsToSkip);
if (useCXSmiles) {
addCXExtensions(mol, result, cxFlagsToSkip);
}

View File

@@ -491,7 +491,7 @@ static bool SortBasedOnFirstElement(
namespace SmilesWrite {
namespace detail {
std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams &params,
bool doingCXSmiles) {
bool doingCXSmiles, bool includeStereoGroups) {
if (!mol.getNumAtoms()) {
return "";
}
@@ -519,7 +519,7 @@ std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams &params,
rootedAtAtom = -1;
if (params.rootedAtAtom >= 0 && atsPresent[params.rootedAtAtom]) {
rootedAtAtom = params.rootedAtAtom - atsPresent.find_first();
rootedAtAtom = params.rootedAtAtom - atsPresent.find_first();
}
fragsRootedAtAtom.push_back(rootedAtAtom);
@@ -566,11 +566,13 @@ std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams &params,
MolOps::assignStereochemistry(*tmol, params.cleanStereo);
}
}
if (!doingCXSmiles) {
if (!doingCXSmiles || !includeStereoGroups) {
// remove any stereo groups that may be present. Otherwise they will be
// used in the canonicalization
std::vector<StereoGroup> noStereoGroups;
tmol->setStereoGroups(noStereoGroups);
}
if (!doingCXSmiles) {
// remove any wiggle bonds, unspecified double bond stereochemistry, or
// dative bonds (if we aren't doing dative bonds in the standard SMILES)
for (auto bond : tmol->bonds()) {
@@ -601,7 +603,6 @@ std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams &params,
}
}
// if we are doing CXSMILES, Hydrogen bonds are shown as single bonds
// in the smiles part, and are indicated with the H: block of the CX
// extensions
@@ -614,7 +615,6 @@ std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams &params,
}
}
rootedAtAtom = fragsRootedAtAtom[fragIdx];
if (params.doRandom && rootedAtAtom == -1) {
@@ -765,6 +765,8 @@ std::string MolToCXSmiles(const ROMol &romol,
RWMol trwmol(romol);
bool doingCXSmiles = true;
bool includeStereoGroups =
flags & SmilesWrite::CXSmilesFields::CX_ENHANCEDSTEREO;
SmilesWriteParams params = paramsInput;
// if kekule is to be done, and the bond attrs (wedging) is to be done, we
@@ -777,7 +779,8 @@ std::string MolToCXSmiles(const ROMol &romol,
params.doKekule = false;
}
auto res = SmilesWrite::detail::MolToSmiles(trwmol, params, doingCXSmiles);
auto res = SmilesWrite::detail::MolToSmiles(trwmol, params, doingCXSmiles,
includeStereoGroups);
if (res.empty()) {
return res;
}

View File

@@ -146,7 +146,7 @@ inline std::string GetBondSmiles(const Bond *bond, int atomToLeftIdx = -1,
namespace detail {
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(
const ROMol &mol, const SmilesWriteParams &params, bool doingCXSmiles);
const ROMol &mol, const SmilesWriteParams &params, bool doingCXSmiles, bool includeStereoGroups=true);
}
} // namespace SmilesWrite

View File

@@ -1,5 +1,5 @@
//
// Copyright (C) 2018-2021 Greg Landrum and other RDKit contributors
// Copyright (C) 2018-2025 Greg Landrum and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
@@ -3288,4 +3288,18 @@ TEST_CASE("github #8471: fail on bad characters in SMILES") {
REQUIRE(!m);
}
}
}
TEST_CASE(
"github #8654: stereogroups used in canonicalization even if not in output") {
SECTION("as reported") {
auto m =
"O=C(N[C@H]1C[C@@H](C(=O)O)[C@@H]2C[C@H]12)C1CC(=O)N(Cc2ccccn2)C1 |&1:3,5,9,11|"_smiles;
REQUIRE(m);
SmilesWriteParams ps;
unsigned int cxFlags = 0;
auto smi = MolToCXSmiles(*m, ps, cxFlags);
CHECK(smi ==
"O=C(N[C@H]1C[C@@H](C(=O)O)[C@@H]2C[C@@H]21)C1CC(=O)N(Cc2ccccn2)C1");
}
}

View File

@@ -37,5 +37,6 @@
%}
%include <RDGeneral/BetterEnums.h>
%ignore RDKit::SmilesWrite::detail::MolToSmiles;
%include <GraphMol/SmilesParse/SmilesWrite.h>
%include <GraphMol/SmilesParse/SmilesJSONParsers.h>

View File

@@ -130,7 +130,9 @@ def GetMolLayers(original_molecule: Chem.rdchem.Mol, data_field_names: Optional[
mol = _RemoveUnnecessaryHs(original_molecule, preserve_stereogenic_hs=True)
_StripAtomMapLabels(mol)
Chem.CanonicalizeEnhancedStereo(mol)
# we only canonicalize the stereo groups if we are actually outputting enhanced stereo:
if cxflag & Chem.CXSmilesFields.CX_ENHANCEDSTEREO:
Chem.CanonicalizeEnhancedStereo(mol)
formula = rdMolHash.MolHash(mol, rdMolHash.HashFunction.MolFormula)

View File

@@ -844,6 +844,15 @@ $$$$
self.assertEqual(smiExt2, '|wD:10.20|') # same atom, but "down" wedge on a different bond
self.assertEqual(smiExt2, tautExt2)
def testGithub8654(self):
m1 = Chem.MolFromSmiles(
'O=C(N[C@H]1C[C@@H](C(=O)O)[C@@H]2C[C@H]12)C1CC(=O)N(Cc2ccccn2)C1 |&1:3,5,9,11|')
m2 = Chem.MolFromSmiles(
'N(C(=O)C1CN(CC2=CC=CC=N2)C(=O)C1)[C@@H]3[C@@]4([C@@](C4)([C@H](C(O)=O)C3)[H])[H]')
layers1 = RegistrationHash.GetMolLayers(m1, cxflag=False)
layers2 = RegistrationHash.GetMolLayers(m2, cxflag=False)
self.assertEqual(layers1, layers2)
if __name__ == '__main__': # pragma: nocover
unittest.main()