mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
Ensure that StereoGroups don't have duplicate atoms or bonds (#9258)
* check for duplicate atoms/bonds in StereoGroups * explicit handling of duplicate stereogroup atoms in CTAB and CXSMILES parsers --------- Co-authored-by: = <=>
This commit is contained in:
@@ -245,7 +245,21 @@ std::string parseEnhancedStereo(std::istream *inStream, unsigned int &line,
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
ss >> index;
|
||||
// atoms are 1 indexed in molfiles
|
||||
atoms.push_back(mol->getAtomWithIdx(index - 1));
|
||||
auto atom = mol->getAtomWithIdx(index - 1);
|
||||
if (std::ranges::find(atoms, atom) != atoms.end()) {
|
||||
std::string message =
|
||||
(boost::format(
|
||||
"Atom %1% appears more than once in stereo group specification on line %2%!") %
|
||||
index % line)
|
||||
.str();
|
||||
if (strictParsing) {
|
||||
throw FileParseException(message);
|
||||
} else {
|
||||
BOOST_LOG(rdWarningLog) << message << std::endl;
|
||||
}
|
||||
} else {
|
||||
atoms.push_back(atom);
|
||||
}
|
||||
}
|
||||
std::vector<Bond *> newBonds;
|
||||
groups.emplace_back(grouptype, std::move(atoms), std::move(newBonds),
|
||||
|
||||
@@ -4107,7 +4107,9 @@ M V30 1 1 1 2
|
||||
M V30 END BOND
|
||||
M V30 END CTAB
|
||||
M END)CTAB";
|
||||
{ REQUIRE_THROWS_AS(MolBlockToMol(ctab), FileParseException); }
|
||||
{
|
||||
REQUIRE_THROWS_AS(MolBlockToMol(ctab), FileParseException);
|
||||
}
|
||||
{
|
||||
bool sanitize = true;
|
||||
bool removeHs = true;
|
||||
@@ -8059,4 +8061,48 @@ M END
|
||||
}
|
||||
CHECK(foundPyrroleN);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("duplicates in stereo groups") {
|
||||
auto ctab = R"CTAB(
|
||||
Mrv1642508181718102D
|
||||
|
||||
0 0 0 0 0 999 V3000
|
||||
M V30 BEGIN CTAB
|
||||
M V30 COUNTS 8 7 0 0 0
|
||||
M V30 BEGIN ATOM
|
||||
M V30 1 C -5.2222 4.7778 0 0 CFG=2
|
||||
M V30 2 Br -6.9685 5.5478 0 0
|
||||
M V30 3 C -5.0557 3.2468 0 0
|
||||
M V30 4 C -3.9796 5.6874 0 0 CFG=2
|
||||
M V30 5 C -2.5705 5.0661 0 0 CFG=1
|
||||
M V30 6 F -1.3279 5.9758 0 0
|
||||
M V30 7 C -4.1461 7.2184 0 0
|
||||
M V30 8 C -2.404 3.5352 0 0
|
||||
M V30 END ATOM
|
||||
M V30 BEGIN BOND
|
||||
M V30 1 1 1 2
|
||||
M V30 2 1 1 4
|
||||
M V30 3 1 4 5
|
||||
M V30 4 1 5 6
|
||||
M V30 5 1 4 7 CFG=1
|
||||
M V30 6 1 5 8 CFG=1
|
||||
M V30 7 1 1 3 CFG=1
|
||||
M V30 END BOND
|
||||
M V30 BEGIN COLLECTION
|
||||
M V30 MDLV30/STEREL1 ATOMS=(2 4 4)
|
||||
M V30 END COLLECTION
|
||||
M V30 END CTAB
|
||||
M END
|
||||
)CTAB";
|
||||
// CHECK_THROWS_AS(v2::FileParsers::MolFromMolBlock(ctab),
|
||||
// FileParseException);
|
||||
v2::FileParsers::MolFileParserParams params;
|
||||
params.strictParsing = false;
|
||||
auto m = v2::FileParsers::MolFromMolBlock(ctab, params);
|
||||
REQUIRE(m);
|
||||
const auto &stgs = m->getStereoGroups();
|
||||
REQUIRE(stgs.size() == 1);
|
||||
CHECK(stgs.front().getGroupType() == StereoGroupType::STEREO_OR);
|
||||
CHECK(stgs.front().getAtoms().size() == 1);
|
||||
}
|
||||
@@ -1364,6 +1364,13 @@ bool parse_enhanced_stereo(Iterator &first, Iterator last, RDKit::RWMol &mol,
|
||||
<< "Atom " << aidx << " not found!" << std::endl;
|
||||
return false;
|
||||
}
|
||||
if (std::ranges::find(atoms, atom) != atoms.end()) {
|
||||
BOOST_LOG(rdWarningLog)
|
||||
<< "Atom " << aidx
|
||||
<< " appears more than once in stereo group specification!"
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
atoms.push_back(atom);
|
||||
}
|
||||
} else {
|
||||
|
||||
@@ -1731,4 +1731,12 @@ TEST_CASE("atom maps and dummy labels in CXSMILES") {
|
||||
m->getAtomWithIdx(2)->setProp(common_properties::dummyLabel, "R1");
|
||||
CHECK(MolToCXSmiles(*m) == "CC[*:1] |atomProp:2.dummyLabel.R1|");
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("duplicate atoms in StereoGroup") {
|
||||
SECTION("as reported") {
|
||||
std::string smiles = "C[C@H](F)[C@H](C)[C@@H](C)Br |a:1,o1:3,3|";
|
||||
|
||||
CHECK_THROWS_AS(SmilesToMol(smiles), SmilesParseException);
|
||||
}
|
||||
}
|
||||
@@ -42,14 +42,24 @@ void assignMissingIds(const boost::dynamic_bitset<> &ids, unsigned &nextId,
|
||||
sg.setWriteId(nextId);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
void checkForDupes(const std::vector<T *> &vec, const std::string &typeName) {
|
||||
for (auto it = vec.cbegin(); it != vec.cend(); ++it) {
|
||||
if (std::find(vec.cbegin(), it, *it) != it) {
|
||||
throw ValueErrorException("Duplicate " + typeName + " in StereoGroup");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
StereoGroup::StereoGroup(StereoGroupType grouptype, std::vector<Atom *> &&atoms,
|
||||
std::vector<Bond *> &&bonds, unsigned readId)
|
||||
: d_grouptype(grouptype),
|
||||
d_atoms(atoms),
|
||||
d_bonds(bonds),
|
||||
d_readId{readId} {}
|
||||
: d_grouptype(grouptype), d_atoms(atoms), d_bonds(bonds), d_readId{readId} {
|
||||
checkForDupes(d_atoms, "atom");
|
||||
checkForDupes(d_bonds, "bond");
|
||||
}
|
||||
|
||||
StereoGroup::StereoGroup(StereoGroupType grouptype,
|
||||
const std::vector<Atom *> &atoms,
|
||||
@@ -57,7 +67,10 @@ StereoGroup::StereoGroup(StereoGroupType grouptype,
|
||||
: d_grouptype(grouptype),
|
||||
d_atoms(std::move(atoms)),
|
||||
d_bonds(std::move(bonds)),
|
||||
d_readId{readId} {}
|
||||
d_readId{readId} {
|
||||
checkForDupes(d_atoms, "atom");
|
||||
checkForDupes(d_bonds, "bond");
|
||||
}
|
||||
|
||||
StereoGroupType StereoGroup::getGroupType() const { return d_grouptype; }
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (C) 2018-2024 Greg Landrum and other RDKit contributors
|
||||
// Copyright (C) 2018-2026 Greg Landrum and other RDKit contributors
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
@@ -5017,4 +5017,31 @@ TEST_CASE("canonical re-kekulization after sanitization preserves stereo",
|
||||
auto smi = MolToSmiles(*rwmol);
|
||||
CHECK(smi == refSmi);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("duplicate atoms/bonds in StereoGroups") {
|
||||
SECTION("atoms") {
|
||||
auto m = "C[C@H](O)C[C@H](F)Cl"_smiles;
|
||||
REQUIRE(m);
|
||||
|
||||
std::unique_ptr<StereoGroup> stg;
|
||||
CHECK_THROWS_AS(
|
||||
stg = std::make_unique<StereoGroup>(
|
||||
StereoGroupType::STEREO_ABSOLUTE,
|
||||
std::vector<Atom *>{m->getAtomWithIdx(1), m->getAtomWithIdx(4),
|
||||
m->getAtomWithIdx(1)},
|
||||
std::vector<Bond *>{}),
|
||||
ValueErrorException);
|
||||
}
|
||||
SECTION("bonds") {
|
||||
auto m = "C/C=C/c1ccc2ncccc2c1"_smiles;
|
||||
REQUIRE(m);
|
||||
|
||||
std::unique_ptr<StereoGroup> stg;
|
||||
CHECK_THROWS_AS(
|
||||
stg = std::make_unique<StereoGroup>(
|
||||
StereoGroupType::STEREO_ABSOLUTE, std::vector<Atom *>{},
|
||||
std::vector<Bond *>{m->getBondWithIdx(1), m->getBondWithIdx(1)}),
|
||||
ValueErrorException);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user