Allow abbreviations without XBonds (#8933)

* coding error checking on extraAttachAtoms

* Allow abbreviations that do not include XBonds

Fixes #8902
This commit is contained in:
Greg Landrum
2025-11-06 16:33:25 +01:00
committed by GitHub
parent c948297d98
commit 84bef7a66e
4 changed files with 81 additions and 5 deletions

View File

@@ -30,11 +30,14 @@ void applyMatches(RWMol &mol, const std::vector<AbbreviationMatch> &matches) {
prevAtomMapping) &&
mol.getPropIfPresent(common_properties::origBondMapping, prevBondMapping);
for (const auto &amatch : matches) {
// throughout this remember that atom 0 in the match is the dummy
// if we have bonds, then atom 0 in the match will be the dummy atom
// and atom 1 will end up being the abbreviation.
// If there are no bonds, atom 0 in the match will be the abbreviation.
// convert atom 1 to be the abbreviation so that we don't have to
// worry about messing up chirality, etc.
auto connectIdx = amatch.match.at(1).second;
unsigned int whichAtom = amatch.abbrev.includesXBonds ? 1 : 0;
auto connectIdx = amatch.match.at(whichAtom).second;
auto connectingAtom = mol.getAtomWithIdx(connectIdx);
connectingAtom->setProp(RDKit::common_properties::atomLabel,
amatch.abbrev.label);
@@ -55,7 +58,7 @@ void applyMatches(RWMol &mol, const std::vector<AbbreviationMatch> &matches) {
// set the hybridization so these are drawn linearly
connectingAtom->setHybridization(Atom::HybridizationType::SP);
for (unsigned int i = 2; i < amatch.match.size(); ++i) {
for (unsigned int i = whichAtom + 1; i < amatch.match.size(); ++i) {
const auto &pr = amatch.match.at(i);
CHECK_INVARIANT(!atomsToRemove[pr.second], "overlapping matches");
atomsToRemove.set(pr.second);
@@ -84,8 +87,10 @@ void applyMatches(RWMol &mol, const std::vector<AbbreviationMatch> &matches) {
}
// make connections between any extraAttachAtoms and the connection point
for (auto oaidx : amatch.abbrev.extraAttachAtoms) {
auto oatom = mol.getAtomWithIdx(oaidx);
CHECK_INVARIANT(oatom, "bad extra attachment atom index");
int bondIdx = -1;
for (const auto bond : mol.atomBonds(mol.getAtomWithIdx(oaidx))) {
for (const auto bond : mol.atomBonds(oatom)) {
if (bondsToRemove.test(bond->getIdx())) {
CHECK_INVARIANT(bondIdx == -1, "bondIdx must be unique");
bondIdx = bond->getIdx();
@@ -253,7 +258,9 @@ RDKIT_ABBREVIATIONS_EXPORT void condenseAbbreviationSubstanceGroups(
auto bnds = sg.getBonds();
if (bnds.empty()) {
BOOST_LOG(rdWarningLog) << "SUP group without any bonds" << std::endl;
abbrevMatch.abbrev.includesXBonds = false;
} else {
abbrevMatch.abbrev.includesXBonds = true;
bool firstAttachFound = false;
for (unsigned int i = 0; i < bnds.size(); ++i) {
auto bnd = mol.getBondWithIdx(bnds[i]);

View File

@@ -28,6 +28,8 @@ struct RDKIT_ABBREVIATIONS_EXPORT AbbreviationDefinition {
std::string smarts;
std::shared_ptr<ROMol> mol; //!< optional
std::vector<unsigned int> extraAttachAtoms; //!< optional
bool includesXBonds = true; //! whether or not the abbreviation definition
//! includes bonds to non-abbreviation atoms
bool operator==(const AbbreviationDefinition &other) const {
return label == other.label && displayLabel == other.displayLabel &&
displayLabelW == other.displayLabelW && smarts == other.smarts;

View File

@@ -69,7 +69,11 @@ BOOST_PYTHON_MODULE(rdAbbreviations) {
"the label in a drawing when the bond comes from the west")
.def_readwrite(
"mol", &Abbreviations::AbbreviationDefinition::mol,
"the query molecule (should have a dummy as the first atom)");
"the query molecule (should have a dummy as the first atom if includesXBonds is true)")
.def_readwrite("includesXBonds",
&Abbreviations::AbbreviationDefinition::includesXBonds,
"whether or not the abbreviation definition includes "
"bonds to non-abbreviation atoms");
python::def("GetDefaultAbbreviations",
&Abbreviations::Utils::getDefaultAbbreviations,

View File

@@ -654,4 +654,67 @@ TEST_CASE("comparison") {
CHECK(cp == abbrevs[0]);
CHECK(cp != abbrevs[1]);
CHECK(abbrevs[1] == abbrevs[1]);
}
TEST_CASE("abbreviations without xbonds, Github #8902") {
SECTION("as reported") {
auto m = R"CTAB(ACID.mol
ChemDraw10242518152D
0 0 0 0 0 999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 4 3 1 0 0
M V30 BEGIN ATOM
M V30 1 N 0.000000 -0.206250 0.000000 0 CHG=1
M V30 2 O 0.714471 -0.618750 0.000000 0 CHG=-1
M V30 3 O 0.000000 0.618750 0.000000 0
M V30 4 O -0.714471 -0.618750 0.000000 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 2 1 3
M V30 3 1 1 4
M V30 END BOND
M V30 BEGIN SGROUP
M V30 1 SUP 1 ATOMS=(4 1 2 3 4) LABEL=HNO3
M V30 END SGROUP
M V30 END CTAB
M END)CTAB"_ctab;
REQUIRE(m);
CHECK(m->getNumAtoms() == 4);
Abbreviations::condenseAbbreviationSubstanceGroups(*m);
CHECK(m->getNumAtoms() == 1);
CHECK(MolToCXSmiles(*m) == "* |(0,-0.20625,),$HNO3$|");
}
SECTION("no xbonds, but still connected") {
auto m = R"CTAB(test
ChemDraw10242518152D
0 0 0 0 0 999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 5 4 1 0 0
M V30 BEGIN ATOM
M V30 1 N 0.000000 -0.206250 0.000000 0 CHG=1
M V30 2 O 0.714471 -0.618750 0.000000 0 CHG=-1
M V30 3 O 0.000000 0.618750 0.000000 0
M V30 4 O -0.714471 -0.618750 0.000000 0
M V30 5 C 1.000000 0.206250 0.000000 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 1 1 3
M V30 3 1 1 4
M V30 4 1 1 5
M V30 END BOND
M V30 BEGIN SGROUP
M V30 1 SUP 1 ATOMS=(4 1 2 3 4) LABEL=H2NO3
M V30 END SGROUP
M V30 END CTAB
M END)CTAB"_ctab;
REQUIRE(m);
CHECK(m->getNumAtoms() == 5);
Abbreviations::condenseAbbreviationSubstanceGroups(*m);
CHECK(m->getNumAtoms() == 2);
CHECK(MolToCXSmiles(*m) == "*C |(0,-0.20625,;1,0.20625,),$H2NO3;$|");
}
}