diff --git a/Code/GraphMol/FileParsers/MolFileParser.cpp b/Code/GraphMol/FileParsers/MolFileParser.cpp index d5642c578..8bd021a4b 100644 --- a/Code/GraphMol/FileParsers/MolFileParser.cpp +++ b/Code/GraphMol/FileParsers/MolFileParser.cpp @@ -245,7 +245,21 @@ std::string parseEnhancedStereo(std::istream *inStream, unsigned int &line, for (size_t i = 0; i < count; ++i) { ss >> index; // atoms are 1 indexed in molfiles - atoms.push_back(mol->getAtomWithIdx(index - 1)); + auto atom = mol->getAtomWithIdx(index - 1); + if (std::ranges::find(atoms, atom) != atoms.end()) { + std::string message = + (boost::format( + "Atom %1% appears more than once in stereo group specification on line %2%!") % + index % line) + .str(); + if (strictParsing) { + throw FileParseException(message); + } else { + BOOST_LOG(rdWarningLog) << message << std::endl; + } + } else { + atoms.push_back(atom); + } } std::vector newBonds; groups.emplace_back(grouptype, std::move(atoms), std::move(newBonds), diff --git a/Code/GraphMol/FileParsers/file_parsers_catch.cpp b/Code/GraphMol/FileParsers/file_parsers_catch.cpp index df94c4bf8..adce444f2 100644 --- a/Code/GraphMol/FileParsers/file_parsers_catch.cpp +++ b/Code/GraphMol/FileParsers/file_parsers_catch.cpp @@ -4107,7 +4107,9 @@ M V30 1 1 1 2 M V30 END BOND M V30 END CTAB M END)CTAB"; - { REQUIRE_THROWS_AS(MolBlockToMol(ctab), FileParseException); } + { + REQUIRE_THROWS_AS(MolBlockToMol(ctab), FileParseException); + } { bool sanitize = true; bool removeHs = true; @@ -8059,4 +8061,48 @@ M END } CHECK(foundPyrroleN); } +} + +TEST_CASE("duplicates in stereo groups") { + auto ctab = R"CTAB( + Mrv1642508181718102D + + 0 0 0 0 0 999 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 8 7 0 0 0 +M V30 BEGIN ATOM +M V30 1 C -5.2222 4.7778 0 0 CFG=2 +M V30 2 Br -6.9685 5.5478 0 0 +M V30 3 C -5.0557 3.2468 0 0 +M V30 4 C -3.9796 5.6874 0 0 CFG=2 +M V30 5 C -2.5705 5.0661 0 0 CFG=1 +M V30 6 F -1.3279 5.9758 0 0 +M V30 7 C -4.1461 7.2184 0 0 +M V30 8 C -2.404 3.5352 0 0 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 1 1 2 +M V30 2 1 1 4 +M V30 3 1 4 5 +M V30 4 1 5 6 +M V30 5 1 4 7 CFG=1 +M V30 6 1 5 8 CFG=1 +M V30 7 1 1 3 CFG=1 +M V30 END BOND +M V30 BEGIN COLLECTION +M V30 MDLV30/STEREL1 ATOMS=(2 4 4) +M V30 END COLLECTION +M V30 END CTAB +M END +)CTAB"; + // CHECK_THROWS_AS(v2::FileParsers::MolFromMolBlock(ctab), + // FileParseException); + v2::FileParsers::MolFileParserParams params; + params.strictParsing = false; + auto m = v2::FileParsers::MolFromMolBlock(ctab, params); + REQUIRE(m); + const auto &stgs = m->getStereoGroups(); + REQUIRE(stgs.size() == 1); + CHECK(stgs.front().getGroupType() == StereoGroupType::STEREO_OR); + CHECK(stgs.front().getAtoms().size() == 1); } \ No newline at end of file diff --git a/Code/GraphMol/SmilesParse/CXSmilesOps.cpp b/Code/GraphMol/SmilesParse/CXSmilesOps.cpp index ed460ce3c..143a3b40e 100644 --- a/Code/GraphMol/SmilesParse/CXSmilesOps.cpp +++ b/Code/GraphMol/SmilesParse/CXSmilesOps.cpp @@ -1364,6 +1364,13 @@ bool parse_enhanced_stereo(Iterator &first, Iterator last, RDKit::RWMol &mol, << "Atom " << aidx << " not found!" << std::endl; return false; } + if (std::ranges::find(atoms, atom) != atoms.end()) { + BOOST_LOG(rdWarningLog) + << "Atom " << aidx + << " appears more than once in stereo group specification!" + << std::endl; + return false; + } atoms.push_back(atom); } } else { diff --git a/Code/GraphMol/SmilesParse/cxsmiles_test.cpp b/Code/GraphMol/SmilesParse/cxsmiles_test.cpp index 635e41ea8..8819b23ba 100644 --- a/Code/GraphMol/SmilesParse/cxsmiles_test.cpp +++ b/Code/GraphMol/SmilesParse/cxsmiles_test.cpp @@ -1731,4 +1731,12 @@ TEST_CASE("atom maps and dummy labels in CXSMILES") { m->getAtomWithIdx(2)->setProp(common_properties::dummyLabel, "R1"); CHECK(MolToCXSmiles(*m) == "CC[*:1] |atomProp:2.dummyLabel.R1|"); } +} + +TEST_CASE("duplicate atoms in StereoGroup") { + SECTION("as reported") { + std::string smiles = "C[C@H](F)[C@H](C)[C@@H](C)Br |a:1,o1:3,3|"; + + CHECK_THROWS_AS(SmilesToMol(smiles), SmilesParseException); + } } \ No newline at end of file diff --git a/Code/GraphMol/StereoGroup.cpp b/Code/GraphMol/StereoGroup.cpp index 28038568e..5e04632e4 100644 --- a/Code/GraphMol/StereoGroup.cpp +++ b/Code/GraphMol/StereoGroup.cpp @@ -42,14 +42,24 @@ void assignMissingIds(const boost::dynamic_bitset<> &ids, unsigned &nextId, sg.setWriteId(nextId); } }; + +template +void checkForDupes(const std::vector &vec, const std::string &typeName) { + for (auto it = vec.cbegin(); it != vec.cend(); ++it) { + if (std::find(vec.cbegin(), it, *it) != it) { + throw ValueErrorException("Duplicate " + typeName + " in StereoGroup"); + } + } +} + } // namespace StereoGroup::StereoGroup(StereoGroupType grouptype, std::vector &&atoms, std::vector &&bonds, unsigned readId) - : d_grouptype(grouptype), - d_atoms(atoms), - d_bonds(bonds), - d_readId{readId} {} + : d_grouptype(grouptype), d_atoms(atoms), d_bonds(bonds), d_readId{readId} { + checkForDupes(d_atoms, "atom"); + checkForDupes(d_bonds, "bond"); +} StereoGroup::StereoGroup(StereoGroupType grouptype, const std::vector &atoms, @@ -57,7 +67,10 @@ StereoGroup::StereoGroup(StereoGroupType grouptype, : d_grouptype(grouptype), d_atoms(std::move(atoms)), d_bonds(std::move(bonds)), - d_readId{readId} {} + d_readId{readId} { + checkForDupes(d_atoms, "atom"); + checkForDupes(d_bonds, "bond"); +} StereoGroupType StereoGroup::getGroupType() const { return d_grouptype; } diff --git a/Code/GraphMol/catch_graphmol.cpp b/Code/GraphMol/catch_graphmol.cpp index ce4ac0269..524cfa9ab 100644 --- a/Code/GraphMol/catch_graphmol.cpp +++ b/Code/GraphMol/catch_graphmol.cpp @@ -1,5 +1,5 @@ // -// Copyright (C) 2018-2024 Greg Landrum and other RDKit contributors +// Copyright (C) 2018-2026 Greg Landrum and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. @@ -5017,4 +5017,31 @@ TEST_CASE("canonical re-kekulization after sanitization preserves stereo", auto smi = MolToSmiles(*rwmol); CHECK(smi == refSmi); } +} + +TEST_CASE("duplicate atoms/bonds in StereoGroups") { + SECTION("atoms") { + auto m = "C[C@H](O)C[C@H](F)Cl"_smiles; + REQUIRE(m); + + std::unique_ptr stg; + CHECK_THROWS_AS( + stg = std::make_unique( + StereoGroupType::STEREO_ABSOLUTE, + std::vector{m->getAtomWithIdx(1), m->getAtomWithIdx(4), + m->getAtomWithIdx(1)}, + std::vector{}), + ValueErrorException); + } + SECTION("bonds") { + auto m = "C/C=C/c1ccc2ncccc2c1"_smiles; + REQUIRE(m); + + std::unique_ptr stg; + CHECK_THROWS_AS( + stg = std::make_unique( + StereoGroupType::STEREO_ABSOLUTE, std::vector{}, + std::vector{m->getBondWithIdx(1), m->getBondWithIdx(1)}), + ValueErrorException); + } } \ No newline at end of file