From d9850596aa1db532a2220ccf1d20f887fcdf8717 Mon Sep 17 00:00:00 2001 From: Ricardo Rodriguez Date: Fri, 17 Oct 2025 10:51:22 -0400 Subject: [PATCH] Fixes #8726 (#8874) * do not remove hydrides by default * add a minimal test * add release note about behavior change * require Hydrides to have degree 1 * also allow hydrides with degree 0 (ionic bond) * suggested changes --------- Co-authored-by: greg landrum --- Code/GraphMol/AddHs.cpp | 1 - Code/GraphMol/MolOps.h | 2 +- Code/GraphMol/Wrap/rough_test.py | 5 +++++ Code/GraphMol/catch_graphmol.cpp | 17 ++++++++++++++++- Code/GraphMol/catch_molops.cpp | 9 +++++++++ ReleaseNotes.md | 9 ++++++++- 6 files changed, 39 insertions(+), 4 deletions(-) diff --git a/Code/GraphMol/AddHs.cpp b/Code/GraphMol/AddHs.cpp index 528aaa8c9..64a045afc 100644 --- a/Code/GraphMol/AddHs.cpp +++ b/Code/GraphMol/AddHs.cpp @@ -901,7 +901,6 @@ bool shouldRemoveH(const RWMol &mol, const Atom *atom, } } } - if (!ps.removeHydrides && atom->getFormalCharge() == -1) { return false; } diff --git a/Code/GraphMol/MolOps.h b/Code/GraphMol/MolOps.h index 24dc1b286..d19fb14b7 100644 --- a/Code/GraphMol/MolOps.h +++ b/Code/GraphMol/MolOps.h @@ -332,7 +332,7 @@ struct RDKIT_GRAPHMOL_EXPORT RemoveHsParameters { bool removeNonimplicit = true; /**< DEPRECATED equivalent of !implicitOnly */ bool updateExplicitCount = false; /**< DEPRECATED equivalent of updateExplicitCount */ - bool removeHydrides = true; /**< Removing Hydrides */ + bool removeHydrides = false; /**< Removing Hydrides */ bool removeNontetrahedralNeighbors = false; /**< remove Hs which are bonded to atoms with specified non-tetrahedral stereochemistry */ diff --git a/Code/GraphMol/Wrap/rough_test.py b/Code/GraphMol/Wrap/rough_test.py index 9bced0806..738963090 100644 --- a/Code/GraphMol/Wrap/rough_test.py +++ b/Code/GraphMol/Wrap/rough_test.py @@ -6397,6 +6397,11 @@ M END m = Chem.MolFromSmiles('F[H-]F', smips) ps.removeHigherDegrees = True m = Chem.RemoveHs(m, ps) + self.assertEqual(m.GetNumAtoms(), 3) + m = Chem.MolFromSmiles('F[H-]F', smips) + ps.removeHigherDegrees = True + ps.removeHydrides = True + m = Chem.RemoveHs(m, ps) self.assertEqual(m.GetNumAtoms(), 2) m = Chem.MolFromSmiles('[H][H]', smips) diff --git a/Code/GraphMol/catch_graphmol.cpp b/Code/GraphMol/catch_graphmol.cpp index 1143cd8a7..b0ca4a95c 100644 --- a/Code/GraphMol/catch_graphmol.cpp +++ b/Code/GraphMol/catch_graphmol.cpp @@ -1107,6 +1107,14 @@ TEST_CASE("RemoveHsParameters", "[molops]") { ps.removeHigherDegrees = true; RWMol cp(*m); MolOps::removeHs(cp, ps); + CHECK(cp.getNumAtoms() == 3); // b/c removeHydrides is false by default + } + { + MolOps::RemoveHsParameters ps; + ps.removeHigherDegrees = true; + ps.removeHydrides = true; + RWMol cp(*m); + MolOps::removeHs(cp, ps); CHECK(cp.getNumAtoms() == 2); } } @@ -4925,4 +4933,11 @@ M END)CTAB"_ctab; REQUIRE(m2); } } -} \ No newline at end of file +} + +TEST_CASE("large smiles benchmark") { + std::string smiles(1000, 'C'); + auto m = v2::SmilesParse::MolFromSmiles(smiles); + REQUIRE(m); + CHECK(m->getNumAtoms() == 1000); +} diff --git a/Code/GraphMol/catch_molops.cpp b/Code/GraphMol/catch_molops.cpp index 295a30133..4f37a5c29 100644 --- a/Code/GraphMol/catch_molops.cpp +++ b/Code/GraphMol/catch_molops.cpp @@ -415,4 +415,13 @@ M END)CTAB"; CHECK(mol->getBondBetweenAtoms(10, 8)->getBondType() == Bond::BondType::SINGLE); } +} + +TEST_CASE("GitHub #8726: Do not remove hydrides by default") { + auto m = "[OH+][H-]"_smiles; + REQUIRE(m); + CHECK(m->getNumAtoms() == 2); + auto h_atom = m->getAtomWithIdx(1); + CHECK(h_atom->getAtomicNum() == 1); + CHECK(h_atom->getFormalCharge() == -1); } \ No newline at end of file diff --git a/ReleaseNotes.md b/ReleaseNotes.md index c80d7a91a..353b1e63b 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -14,7 +14,14 @@ GitHub) but some C++ class might have to be updated. - Simple AND queries are now merged into atoms. E.g. `[C&+]` now produces the the same result as `[C+]` when parsed as SMARTS. -- Molecules which do not have potential chiral centers or stereobonds will no longer have the "_CIPRank" atom property set by default. If you want to force the calculation of pseudo-CIP ranks, you can call `Chem.ComputeAtomCIPRanks()`. Note that if you just want a symmetry-aware canonical ranking of the atoms in a molecule, it is more efficient to use `Chem.CanonicalRankAtoms(mol, breakTies=False)`. +- Molecules which do not have potential chiral centers or stereobonds will no + longer have the "_CIPRank" atom property set by default. If you want to + force the calculation of pseudo-CIP ranks, you can call + `Chem.ComputeAtomCIPRanks()`. Note that if you just want a symmetry-aware + canonical ranking of the atoms in a molecule, it is more efficient to use + `Chem.CanonicalRankAtoms(mol, breakTies=False)`. +- The behavior of H removal has changed slightly: hydrides will no longer removed + by default, as this changes the global charge of the mol. ## New Features and Enhancements: