mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
Allow abbreviations without XBonds (#8933)
* coding error checking on extraAttachAtoms * Allow abbreviations that do not include XBonds Fixes #8902
This commit is contained in:
@@ -30,11 +30,14 @@ void applyMatches(RWMol &mol, const std::vector<AbbreviationMatch> &matches) {
|
||||
prevAtomMapping) &&
|
||||
mol.getPropIfPresent(common_properties::origBondMapping, prevBondMapping);
|
||||
for (const auto &amatch : matches) {
|
||||
// throughout this remember that atom 0 in the match is the dummy
|
||||
// if we have bonds, then atom 0 in the match will be the dummy atom
|
||||
// and atom 1 will end up being the abbreviation.
|
||||
// If there are no bonds, atom 0 in the match will be the abbreviation.
|
||||
|
||||
// convert atom 1 to be the abbreviation so that we don't have to
|
||||
// worry about messing up chirality, etc.
|
||||
auto connectIdx = amatch.match.at(1).second;
|
||||
unsigned int whichAtom = amatch.abbrev.includesXBonds ? 1 : 0;
|
||||
auto connectIdx = amatch.match.at(whichAtom).second;
|
||||
auto connectingAtom = mol.getAtomWithIdx(connectIdx);
|
||||
connectingAtom->setProp(RDKit::common_properties::atomLabel,
|
||||
amatch.abbrev.label);
|
||||
@@ -55,7 +58,7 @@ void applyMatches(RWMol &mol, const std::vector<AbbreviationMatch> &matches) {
|
||||
// set the hybridization so these are drawn linearly
|
||||
connectingAtom->setHybridization(Atom::HybridizationType::SP);
|
||||
|
||||
for (unsigned int i = 2; i < amatch.match.size(); ++i) {
|
||||
for (unsigned int i = whichAtom + 1; i < amatch.match.size(); ++i) {
|
||||
const auto &pr = amatch.match.at(i);
|
||||
CHECK_INVARIANT(!atomsToRemove[pr.second], "overlapping matches");
|
||||
atomsToRemove.set(pr.second);
|
||||
@@ -84,8 +87,10 @@ void applyMatches(RWMol &mol, const std::vector<AbbreviationMatch> &matches) {
|
||||
}
|
||||
// make connections between any extraAttachAtoms and the connection point
|
||||
for (auto oaidx : amatch.abbrev.extraAttachAtoms) {
|
||||
auto oatom = mol.getAtomWithIdx(oaidx);
|
||||
CHECK_INVARIANT(oatom, "bad extra attachment atom index");
|
||||
int bondIdx = -1;
|
||||
for (const auto bond : mol.atomBonds(mol.getAtomWithIdx(oaidx))) {
|
||||
for (const auto bond : mol.atomBonds(oatom)) {
|
||||
if (bondsToRemove.test(bond->getIdx())) {
|
||||
CHECK_INVARIANT(bondIdx == -1, "bondIdx must be unique");
|
||||
bondIdx = bond->getIdx();
|
||||
@@ -253,7 +258,9 @@ RDKIT_ABBREVIATIONS_EXPORT void condenseAbbreviationSubstanceGroups(
|
||||
auto bnds = sg.getBonds();
|
||||
if (bnds.empty()) {
|
||||
BOOST_LOG(rdWarningLog) << "SUP group without any bonds" << std::endl;
|
||||
abbrevMatch.abbrev.includesXBonds = false;
|
||||
} else {
|
||||
abbrevMatch.abbrev.includesXBonds = true;
|
||||
bool firstAttachFound = false;
|
||||
for (unsigned int i = 0; i < bnds.size(); ++i) {
|
||||
auto bnd = mol.getBondWithIdx(bnds[i]);
|
||||
|
||||
@@ -28,6 +28,8 @@ struct RDKIT_ABBREVIATIONS_EXPORT AbbreviationDefinition {
|
||||
std::string smarts;
|
||||
std::shared_ptr<ROMol> mol; //!< optional
|
||||
std::vector<unsigned int> extraAttachAtoms; //!< optional
|
||||
bool includesXBonds = true; //! whether or not the abbreviation definition
|
||||
//! includes bonds to non-abbreviation atoms
|
||||
bool operator==(const AbbreviationDefinition &other) const {
|
||||
return label == other.label && displayLabel == other.displayLabel &&
|
||||
displayLabelW == other.displayLabelW && smarts == other.smarts;
|
||||
|
||||
@@ -69,7 +69,11 @@ BOOST_PYTHON_MODULE(rdAbbreviations) {
|
||||
"the label in a drawing when the bond comes from the west")
|
||||
.def_readwrite(
|
||||
"mol", &Abbreviations::AbbreviationDefinition::mol,
|
||||
"the query molecule (should have a dummy as the first atom)");
|
||||
"the query molecule (should have a dummy as the first atom if includesXBonds is true)")
|
||||
.def_readwrite("includesXBonds",
|
||||
&Abbreviations::AbbreviationDefinition::includesXBonds,
|
||||
"whether or not the abbreviation definition includes "
|
||||
"bonds to non-abbreviation atoms");
|
||||
|
||||
python::def("GetDefaultAbbreviations",
|
||||
&Abbreviations::Utils::getDefaultAbbreviations,
|
||||
|
||||
@@ -654,4 +654,67 @@ TEST_CASE("comparison") {
|
||||
CHECK(cp == abbrevs[0]);
|
||||
CHECK(cp != abbrevs[1]);
|
||||
CHECK(abbrevs[1] == abbrevs[1]);
|
||||
}
|
||||
|
||||
TEST_CASE("abbreviations without xbonds, Github #8902") {
|
||||
SECTION("as reported") {
|
||||
auto m = R"CTAB(ACID.mol
|
||||
ChemDraw10242518152D
|
||||
|
||||
0 0 0 0 0 999 V3000
|
||||
M V30 BEGIN CTAB
|
||||
M V30 COUNTS 4 3 1 0 0
|
||||
M V30 BEGIN ATOM
|
||||
M V30 1 N 0.000000 -0.206250 0.000000 0 CHG=1
|
||||
M V30 2 O 0.714471 -0.618750 0.000000 0 CHG=-1
|
||||
M V30 3 O 0.000000 0.618750 0.000000 0
|
||||
M V30 4 O -0.714471 -0.618750 0.000000 0
|
||||
M V30 END ATOM
|
||||
M V30 BEGIN BOND
|
||||
M V30 1 1 1 2
|
||||
M V30 2 2 1 3
|
||||
M V30 3 1 1 4
|
||||
M V30 END BOND
|
||||
M V30 BEGIN SGROUP
|
||||
M V30 1 SUP 1 ATOMS=(4 1 2 3 4) LABEL=HNO3
|
||||
M V30 END SGROUP
|
||||
M V30 END CTAB
|
||||
M END)CTAB"_ctab;
|
||||
REQUIRE(m);
|
||||
CHECK(m->getNumAtoms() == 4);
|
||||
Abbreviations::condenseAbbreviationSubstanceGroups(*m);
|
||||
CHECK(m->getNumAtoms() == 1);
|
||||
CHECK(MolToCXSmiles(*m) == "* |(0,-0.20625,),$HNO3$|");
|
||||
}
|
||||
SECTION("no xbonds, but still connected") {
|
||||
auto m = R"CTAB(test
|
||||
ChemDraw10242518152D
|
||||
|
||||
0 0 0 0 0 999 V3000
|
||||
M V30 BEGIN CTAB
|
||||
M V30 COUNTS 5 4 1 0 0
|
||||
M V30 BEGIN ATOM
|
||||
M V30 1 N 0.000000 -0.206250 0.000000 0 CHG=1
|
||||
M V30 2 O 0.714471 -0.618750 0.000000 0 CHG=-1
|
||||
M V30 3 O 0.000000 0.618750 0.000000 0
|
||||
M V30 4 O -0.714471 -0.618750 0.000000 0
|
||||
M V30 5 C 1.000000 0.206250 0.000000 0
|
||||
M V30 END ATOM
|
||||
M V30 BEGIN BOND
|
||||
M V30 1 1 1 2
|
||||
M V30 2 1 1 3
|
||||
M V30 3 1 1 4
|
||||
M V30 4 1 1 5
|
||||
M V30 END BOND
|
||||
M V30 BEGIN SGROUP
|
||||
M V30 1 SUP 1 ATOMS=(4 1 2 3 4) LABEL=H2NO3
|
||||
M V30 END SGROUP
|
||||
M V30 END CTAB
|
||||
M END)CTAB"_ctab;
|
||||
REQUIRE(m);
|
||||
CHECK(m->getNumAtoms() == 5);
|
||||
Abbreviations::condenseAbbreviationSubstanceGroups(*m);
|
||||
CHECK(m->getNumAtoms() == 2);
|
||||
CHECK(MolToCXSmiles(*m) == "*C |(0,-0.20625,;1,0.20625,),$H2NO3;$|");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user