Ensure that StereoGroups don't have duplicate atoms or bonds (#9258)

* check for duplicate atoms/bonds in StereoGroups

* explicit handling of duplicate stereogroup atoms in CTAB and CXSMILES parsers

---------

Co-authored-by: = <=>
This commit is contained in:
Greg Landrum
2026-04-29 16:54:00 +02:00
committed by GitHub
parent cb251343b9
commit 251353a217
6 changed files with 123 additions and 8 deletions

View File

@@ -245,7 +245,21 @@ std::string parseEnhancedStereo(std::istream *inStream, unsigned int &line,
for (size_t i = 0; i < count; ++i) {
ss >> index;
// atoms are 1 indexed in molfiles
atoms.push_back(mol->getAtomWithIdx(index - 1));
auto atom = mol->getAtomWithIdx(index - 1);
if (std::ranges::find(atoms, atom) != atoms.end()) {
std::string message =
(boost::format(
"Atom %1% appears more than once in stereo group specification on line %2%!") %
index % line)
.str();
if (strictParsing) {
throw FileParseException(message);
} else {
BOOST_LOG(rdWarningLog) << message << std::endl;
}
} else {
atoms.push_back(atom);
}
}
std::vector<Bond *> newBonds;
groups.emplace_back(grouptype, std::move(atoms), std::move(newBonds),

View File

@@ -4107,7 +4107,9 @@ M V30 1 1 1 2
M V30 END BOND
M V30 END CTAB
M END)CTAB";
{ REQUIRE_THROWS_AS(MolBlockToMol(ctab), FileParseException); }
{
REQUIRE_THROWS_AS(MolBlockToMol(ctab), FileParseException);
}
{
bool sanitize = true;
bool removeHs = true;
@@ -8059,4 +8061,48 @@ M END
}
CHECK(foundPyrroleN);
}
}
TEST_CASE("duplicates in stereo groups") {
auto ctab = R"CTAB(
Mrv1642508181718102D
0 0 0 0 0 999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 8 7 0 0 0
M V30 BEGIN ATOM
M V30 1 C -5.2222 4.7778 0 0 CFG=2
M V30 2 Br -6.9685 5.5478 0 0
M V30 3 C -5.0557 3.2468 0 0
M V30 4 C -3.9796 5.6874 0 0 CFG=2
M V30 5 C -2.5705 5.0661 0 0 CFG=1
M V30 6 F -1.3279 5.9758 0 0
M V30 7 C -4.1461 7.2184 0 0
M V30 8 C -2.404 3.5352 0 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 1 1 4
M V30 3 1 4 5
M V30 4 1 5 6
M V30 5 1 4 7 CFG=1
M V30 6 1 5 8 CFG=1
M V30 7 1 1 3 CFG=1
M V30 END BOND
M V30 BEGIN COLLECTION
M V30 MDLV30/STEREL1 ATOMS=(2 4 4)
M V30 END COLLECTION
M V30 END CTAB
M END
)CTAB";
// CHECK_THROWS_AS(v2::FileParsers::MolFromMolBlock(ctab),
// FileParseException);
v2::FileParsers::MolFileParserParams params;
params.strictParsing = false;
auto m = v2::FileParsers::MolFromMolBlock(ctab, params);
REQUIRE(m);
const auto &stgs = m->getStereoGroups();
REQUIRE(stgs.size() == 1);
CHECK(stgs.front().getGroupType() == StereoGroupType::STEREO_OR);
CHECK(stgs.front().getAtoms().size() == 1);
}

View File

@@ -1364,6 +1364,13 @@ bool parse_enhanced_stereo(Iterator &first, Iterator last, RDKit::RWMol &mol,
<< "Atom " << aidx << " not found!" << std::endl;
return false;
}
if (std::ranges::find(atoms, atom) != atoms.end()) {
BOOST_LOG(rdWarningLog)
<< "Atom " << aidx
<< " appears more than once in stereo group specification!"
<< std::endl;
return false;
}
atoms.push_back(atom);
}
} else {

View File

@@ -1731,4 +1731,12 @@ TEST_CASE("atom maps and dummy labels in CXSMILES") {
m->getAtomWithIdx(2)->setProp(common_properties::dummyLabel, "R1");
CHECK(MolToCXSmiles(*m) == "CC[*:1] |atomProp:2.dummyLabel.R1|");
}
}
TEST_CASE("duplicate atoms in StereoGroup") {
SECTION("as reported") {
std::string smiles = "C[C@H](F)[C@H](C)[C@@H](C)Br |a:1,o1:3,3|";
CHECK_THROWS_AS(SmilesToMol(smiles), SmilesParseException);
}
}

View File

@@ -42,14 +42,24 @@ void assignMissingIds(const boost::dynamic_bitset<> &ids, unsigned &nextId,
sg.setWriteId(nextId);
}
};
template <typename T>
void checkForDupes(const std::vector<T *> &vec, const std::string &typeName) {
for (auto it = vec.cbegin(); it != vec.cend(); ++it) {
if (std::find(vec.cbegin(), it, *it) != it) {
throw ValueErrorException("Duplicate " + typeName + " in StereoGroup");
}
}
}
} // namespace
StereoGroup::StereoGroup(StereoGroupType grouptype, std::vector<Atom *> &&atoms,
std::vector<Bond *> &&bonds, unsigned readId)
: d_grouptype(grouptype),
d_atoms(atoms),
d_bonds(bonds),
d_readId{readId} {}
: d_grouptype(grouptype), d_atoms(atoms), d_bonds(bonds), d_readId{readId} {
checkForDupes(d_atoms, "atom");
checkForDupes(d_bonds, "bond");
}
StereoGroup::StereoGroup(StereoGroupType grouptype,
const std::vector<Atom *> &atoms,
@@ -57,7 +67,10 @@ StereoGroup::StereoGroup(StereoGroupType grouptype,
: d_grouptype(grouptype),
d_atoms(std::move(atoms)),
d_bonds(std::move(bonds)),
d_readId{readId} {}
d_readId{readId} {
checkForDupes(d_atoms, "atom");
checkForDupes(d_bonds, "bond");
}
StereoGroupType StereoGroup::getGroupType() const { return d_grouptype; }

View File

@@ -1,5 +1,5 @@
//
// Copyright (C) 2018-2024 Greg Landrum and other RDKit contributors
// Copyright (C) 2018-2026 Greg Landrum and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
@@ -5017,4 +5017,31 @@ TEST_CASE("canonical re-kekulization after sanitization preserves stereo",
auto smi = MolToSmiles(*rwmol);
CHECK(smi == refSmi);
}
}
TEST_CASE("duplicate atoms/bonds in StereoGroups") {
SECTION("atoms") {
auto m = "C[C@H](O)C[C@H](F)Cl"_smiles;
REQUIRE(m);
std::unique_ptr<StereoGroup> stg;
CHECK_THROWS_AS(
stg = std::make_unique<StereoGroup>(
StereoGroupType::STEREO_ABSOLUTE,
std::vector<Atom *>{m->getAtomWithIdx(1), m->getAtomWithIdx(4),
m->getAtomWithIdx(1)},
std::vector<Bond *>{}),
ValueErrorException);
}
SECTION("bonds") {
auto m = "C/C=C/c1ccc2ncccc2c1"_smiles;
REQUIRE(m);
std::unique_ptr<StereoGroup> stg;
CHECK_THROWS_AS(
stg = std::make_unique<StereoGroup>(
StereoGroupType::STEREO_ABSOLUTE, std::vector<Atom *>{},
std::vector<Bond *>{m->getBondWithIdx(1), m->getBondWithIdx(1)}),
ValueErrorException);
}
}