From fbf4990709bfa050f71d179ae90e941b77c49c9c Mon Sep 17 00:00:00 2001 From: Ricardo Rodriguez Date: Thu, 2 Oct 2025 06:04:24 +0200 Subject: [PATCH] Fixes a leak in ChemDraw code (#8828) * clang format * refactor code * fix typos * do not leak bonds when deleting in batch mode * check sizes --- Code/GraphMol/MolOps.cpp | 2 +- Code/GraphMol/RWMol.cpp | 18 +++++- External/ChemDraw/chemdraw.cpp | 92 ++++++++++++--------------- External/ChemDraw/test.cpp | 49 +++++++-------- External/ChemDraw/utils.cpp | 110 +++++++++++++++++---------------- 5 files changed, 137 insertions(+), 134 deletions(-) diff --git a/Code/GraphMol/MolOps.cpp b/Code/GraphMol/MolOps.cpp index e22619f6a..c5130accf 100644 --- a/Code/GraphMol/MolOps.cpp +++ b/Code/GraphMol/MolOps.cpp @@ -1098,7 +1098,7 @@ std::vector> contiguousAtoms( // add to the molecule a dummy atom centred on the // atoms passed in, with a dative bond from it to the metal atom. void addHapticBond(RWMol &mol, unsigned int metalIdx, - std::vector hapticAtoms) { + const std::vector &hapticAtoms) { // So there is a * in the V3000 file as the symbol for the atom. auto dummyAt = new QueryAtom(0); dummyAt->setQuery(makeAtomNullQuery()); diff --git a/Code/GraphMol/RWMol.cpp b/Code/GraphMol/RWMol.cpp index d1d0babda..042e0db24 100644 --- a/Code/GraphMol/RWMol.cpp +++ b/Code/GraphMol/RWMol.cpp @@ -527,6 +527,17 @@ unsigned int RWMol::addBond(unsigned int atomIdx1, unsigned int atomIdx2, b->setBeginAtomIdx(atomIdx1); b->setEndAtomIdx(atomIdx2); + // we're in a batch edit, and at least one of the bond ends is scheduled + // for deletion, so mark the new bond for deletion too: + if (dp_delAtoms && + ((atomIdx1 < dp_delAtoms->size() && dp_delAtoms->test(atomIdx1)) || + (atomIdx2 < dp_delAtoms->size() && dp_delAtoms->test(atomIdx2)))) { + if (dp_delBonds->size() < numBonds) { + dp_delBonds->resize(numBonds); + } + dp_delBonds->set(numBonds - 1); + } + // if both atoms have a degree>1, reset our ring info structure, // because there's a non-trivial chance that it's now wrong. if (dp_ringInfo && dp_ringInfo->isInitialized() && @@ -800,10 +811,11 @@ void RWMol::batchRemoveAtoms() { Bond *bond = d_graph[*beg++]; if (bond->getPropIfPresent(RDKit::common_properties::_MolFileBondEndPts, sprop)) { - // This would ideally use ParseV3000Array but I'm buggered if I can get - // the linker to find it. + // This would ideally use ParseV3000Array but I'm buggered if I can + // get the linker to find it. // std::vector oats = - // RDKit::SGroupParsing::ParseV3000Array(sprop); + // RDKit::SGroupParsing::ParseV3000Array(sprop); if ('(' == sprop.front() && ')' == sprop.back()) { sprop = sprop.substr(1, sprop.length() - 2); diff --git a/External/ChemDraw/chemdraw.cpp b/External/ChemDraw/chemdraw.cpp index 7ca496a50..ac43620a9 100644 --- a/External/ChemDraw/chemdraw.cpp +++ b/External/ChemDraw/chemdraw.cpp @@ -91,7 +91,7 @@ void visit_children( if (id == kCDXObj_Fragment) { std::unique_ptr mol = std::make_unique(); if (!parseFragment(*mol, (CDXFragment &)(*frag.second), pagedata, - missing_frag_id)) { + missing_frag_id)) { continue; } unsigned int frag_id = mol->getProp(CDX_FRAG_ID); @@ -238,40 +238,41 @@ void visit_children( } CDXFormat sniff_format(std::istream &is) { - // Remember the current read position - std::streampos start_pos = is.tellg(); - if (start_pos == -1) { - // Some streams (like std::cin) may not support tellg - return CDXFormat::AUTO; // here it simply means we failed - } + // Remember the current read position + std::streampos start_pos = is.tellg(); + if (start_pos == -1) { + // Some streams (like std::cin) may not support tellg + return CDXFormat::AUTO; // here it simply means we failed + } - // CDX header consists of: - // 8 bytes with the value "VjCD0100" (hex: 56 6A 43 44 30 31 30 30). - CDXFormat format = CDXFormat::CDXML; - const std::vector header{86, 106, 67, 68, 48, 49, 48, 48}; - std::vector buf(8); - is.read(buf.data(), 8); - if (buf == header) { - format = CDXFormat::CDX; - } - - // Reset the stream position - is.clear(); // clear EOF flag if we hit it - is.seekg(start_pos); - return format; + // CDX header consists of: + // 8 bytes with the value "VjCD0100" (hex: 56 6A 43 44 30 31 30 30). + CDXFormat format = CDXFormat::CDXML; + const std::vector header{86, 106, 67, 68, 48, 49, 48, 48}; + std::vector buf(8); + is.read(buf.data(), 8); + if (buf == header) { + format = CDXFormat::CDX; + } + + // Reset the stream position + is.clear(); // clear EOF flag if we hit it + is.seekg(start_pos); + return format; } - + std::unique_ptr streamToCDXDocument(std::istream &inStream, CDXFormat format) { - if(format == CDXFormat::AUTO) { + if (format == CDXFormat::AUTO) { format = sniff_format(inStream); - if(format == CDXFormat::AUTO) { - const std::string msg = " Failed deducing whether the input stream is CDXML or CDX"; + if (format == CDXFormat::AUTO) { + const std::string msg = + " Failed deducing whether the input stream is CDXML or CDX"; BOOST_LOG(rdErrorLog) << msg << std::endl; throw FileParseException(msg); } } - + if (format == CDXFormat::CDXML) { CDXMLParser parser; // populate tree structure pt @@ -292,7 +293,8 @@ std::unique_ptr streamToCDXDocument(std::istream &inStream, } else { CDXistream input(inStream); const bool doThrow = true; - // if we aren't opened in binary mode on windows, we are going to have a bad time... + // if we aren't opened in binary mode on windows, we are going to have a bad + // time... try { std::unique_ptr doc(CDXReadDocFromStorage(input, doThrow)); return doc; @@ -303,12 +305,12 @@ std::unique_ptr streamToCDXDocument(std::istream &inStream, } } } - + // may raise FileParseException std::vector> molsFromCDXMLDataStream( std::istream &inStream, const ChemDrawParserParams ¶ms) { std::unique_ptr document = - streamToCDXDocument(inStream, params.format); + streamToCDXDocument(inStream, params.format); if (!document) { // error return std::vector>(); @@ -358,19 +360,12 @@ std::unique_ptr ChemDrawToDocument(const std::string &filename) { (std::string)std::filesystem::path(filename).extension().string(); throw FileParseException(msg.c_str()); } -} +} // namespace ChemDraw namespace v2 { std::vector> MolsFromChemDrawDataStream( std::istream &inStream, const ChemDrawParserParams ¶ms) { - auto chemdrawmols = molsFromCDXMLDataStream(inStream, params); - std::vector> mols; - mols.reserve(chemdrawmols.size()); - for (auto &mol : chemdrawmols) { - RWMol *m = (RWMol *)mol.release(); - mols.push_back(std::unique_ptr(m)); - } - return mols; + return molsFromCDXMLDataStream(inStream, params); } std::vector> MolsFromChemDrawBlock( @@ -383,28 +378,21 @@ std::vector> MolsFromChemDrawBlock( std::vector> MolsFromChemDrawFile( const std::string &filename, const ChemDrawParserParams ¶ms) { ChemDrawParserParams realparams{params}; - - std::vector> mols; - std::fstream chemdrawfile(filename, std::ios::in | std::ios::binary); // Always open in Binary mode + // Always open in Binary mode + std::fstream chemdrawfile(filename, std::ios::in | std::ios::binary); if (!chemdrawfile) { throw BadFileException(filename + " does not exist"); - return mols; } - if (params.format == CDXFormat::AUTO && sniff_format(chemdrawfile) == CDXFormat::CDX) { // need to reopen in binary mode + // need to reopen in binary mode + if (params.format == CDXFormat::AUTO && + sniff_format(chemdrawfile) == CDXFormat::CDX) { realparams.format = CDXFormat::CDX; } - auto chemdrawmols = molsFromCDXMLDataStream(chemdrawfile, realparams); - - mols.reserve(chemdrawmols.size()); - for (auto &mol : chemdrawmols) { - RWMol *m = (RWMol *)mol.release(); - mols.push_back(std::unique_ptr(m)); - } - return mols; -} + return molsFromCDXMLDataStream(chemdrawfile, realparams); } +} // namespace v2 } // namespace RDKit diff --git a/External/ChemDraw/test.cpp b/External/ChemDraw/test.cpp index 6e5549f8b..7824970c1 100644 --- a/External/ChemDraw/test.cpp +++ b/External/ChemDraw/test.cpp @@ -469,7 +469,7 @@ TEST_CASE("CDXML Advanced") { int i = 0; SmilesWriteParams wp; for (auto &mol : mols) { - auto tomol = std::unique_ptr((ROMol*)mol.release()); + auto tomol = std::unique_ptr((ROMol *)mol.release()); tomol.get()->clearConformers(); RDKit::canonicalizeStereoGroups(tomol); @@ -546,10 +546,11 @@ TEST_CASE("CDXML Advanced") { // there were so many stereo warnings in chemdraw, I'm just going to // assume the rdkit is correct here... auto fname = cdxmlbase + "chemdraw_template2.cdxml"; - std::string talatisamine = "CCN1C[C@]2(COC)CCC(OC)[C@@]34[C@@H]5C[C@@H]6C(OC)C[C@@](O)([C@H]5[C@H]6O)[C@@H](C[C@H]23)[C@H]14"; + std::string talatisamine = + "CCN1C[C@]2(COC)CCC(OC)[C@@]34[C@@H]5C[C@@H]6C(OC)C[C@@](O)([C@H]5[C@H]6O)[C@@H](C[C@H]23)[C@H]14"; auto mols = MolsFromChemDrawFile(fname); std::vector expected = { - talatisamine, //0 + talatisamine, // 0 "*", "C", "[F]", @@ -559,17 +560,17 @@ TEST_CASE("CDXML Advanced") { talatisamine, "*", "C", - "[F]", // 10 + "[F]", // 10 "[B]", "[C]", "[2H]", - talatisamine, + talatisamine, "*", "C", "[F]", "[B]", "[C]", - "[2H]", // 20 + "[2H]", // 20 talatisamine, "*", "C", @@ -578,8 +579,8 @@ TEST_CASE("CDXML Advanced") { "[C]", "[2H]", talatisamine, - "CCN1C[C@]2(COC)CC[C@H](OC)[C@]34C1C(C[C@H]23)[C@@]1(O)CC(OC)[C@H]2C[C@@H]4[C@@H]1[C@H]2O", - "*", // 30 + "CCN1C[C@]2(COC)CC[C@H](OC)[C@]34C1C(C[C@H]23)[C@@]1(O)CC(OC)[C@H]2C[C@@H]4[C@@H]1[C@H]2O", + "*", // 30 "[B]", "[C]", "[2H]", @@ -589,7 +590,7 @@ TEST_CASE("CDXML Advanced") { "C", "[F]", "[B]", - "[C]", // 40 + "[C]", // 40 "[2H]", talatisamine, "*", @@ -599,7 +600,7 @@ TEST_CASE("CDXML Advanced") { "[C]", "[2H]", talatisamine, - "*", // 50 + "*", // 50 "C", "[F]", "[B]", @@ -609,7 +610,7 @@ TEST_CASE("CDXML Advanced") { "CC1=C(C(C)C)[C@@H](O)[C@@]2(O)[C@@]3(C)CC(=O)O[C@@]4([C@H](O)C(C)CC[C@]34O)[C@@]12O", "CC1=C[C@@]23OC(=O)C[C@@](C)([C@@]2(O)CC1)[C@]1(O)[C@H](O)C2(C(C)C)OC2(C)[C@@]31O", "*", - "[B]", // 60 + "[B]", // 60 "[C]", "CC1CC[C@@H]2[C@]3(C)C[C@@H]4O[C@@]2(C1)C1[C@@H]3CC(C(C)C)C14C", "[2H]", @@ -1210,7 +1211,7 @@ TEST_CASE("Github #7501 - dative bonds") { ChemDrawParserParams params; auto mols = MolsFromChemDrawFile(fname, params); CHECK(MolToSmiles(*mols[0]) == - "C[CH2](C)->[Os]12<-[CH3]CC[NH]->1CC=[NH]->2"); // All datives to the + "C[CH2](C)->[Os]12<-[CH3]CC[NH]->1CC=[NH]->2"); // All datives to the // Osmium } } @@ -1224,18 +1225,14 @@ TEST_CASE("Synthesis-workshop") { auto mols = MolsFromChemDrawFile(fname); std::vector expected = { "CCC/C=C/C=C/C(=O)O[C@H]1/C(=C/C(=O)OC)C[C@H]2C[C@H]([C@@H](C)O)OC(=O)C[C@H](O)C[C@@H]3C[C@H](OC(C)=O)C(C)(C)[C@](O)(C[C@@H]4C/C(=C/C(=O)OC)C[C@H](/C=C/C(C)(C)[C@]1(O)O2)O4)O3", - "[B]", - "*", - "[C]", + "[B]", "*", "[C]", "Cc1ccc2n1[C@@H]1[C@@H]3O[C@]([C@H](C)O)(C=C2)[C@H]1c1ccc(C)n1[C@@H]3C", // this is may or may not be correct, but the structure is drawn // incorrectly. // There's a test below which fixes this "Cc1ccc2n1[C@H](C)C(=O)[C@@H]1[C@H]2C(=O)C=Cc2ccc(C)n21", - "Cc1ccc2ccc(=O)ccn12", - "Cc1cccn1[C@H](C)C=O", - "Cc1ccc2ccc([O-])cc[n+]1-2", - "Cc1ccc2ccc(=O)ccn12", + "Cc1ccc2ccc(=O)ccn12", "Cc1cccn1[C@H](C)C=O", + "Cc1ccc2ccc([O-])cc[n+]1-2", "Cc1ccc2ccc(=O)ccn12", "Cc1cccn1[C@H](C)C(C#N)O[Si](C)(C)C", "CC1CC[C@]2(O)[C@]3(C)C[C@]4(O)O[C@@]2([C@@H]1O)C1(O)C4(C)C(O)(C(C)C)[C@@H](OC(=O)c2ccc[nH]2)[C@]13O", "C=C(C)[C@H]1CC(=O)CC2=C(C1)[C@H]1C(=O)O[C@H]3C[C@@](C)(O)[C@@H](C2=O)[C@@H]13"}; @@ -1414,8 +1411,9 @@ TEST_CASE("Geometry") { } TEST_CASE("Test Reading Wrong File Format") { - std::string path = std::string(getenv("RDBASE")) + "/Code/GraphMol/test_data/CDXML/"; - SECTION("CDX as CDXML") { + std::string path = + std::string(getenv("RDBASE")) + "/Code/GraphMol/test_data/CDXML/"; + SECTION("CDX as CDXML") { auto fname = path + "ring-stereo1.cdx"; ChemDrawParserParams params; params.format = CDXFormat::CDXML; @@ -1427,7 +1425,7 @@ TEST_CASE("Test Reading Wrong File Format") { } REQUIRE(caught); } - } +} TEST_CASE("github8761") { std::string path = @@ -1436,11 +1434,11 @@ TEST_CASE("github8761") { auto fname = path + "US12404367-20250902-C00017.CDX"; auto mols = MolsFromChemDrawFile(fname); REQUIRE(mols.size()); - + fname = path + "US12404367-20250902-C00025.CDX"; mols = MolsFromChemDrawFile(fname); REQUIRE(mols.size()); - } + } SECTION("Failing Patents") { std::filesystem::path patents(path + "patents"); @@ -1450,8 +1448,7 @@ TEST_CASE("github8761") { auto mols = MolsFromChemDrawFile(entry.path().generic_string(), params); // we just don't want a crash - std:: - string block; + std::string block; std::fstream chemdrawfile(entry.path(), std::ios::in | std::ios::binary); std::stringstream buffer; buffer << chemdrawfile.rdbuf(); diff --git a/External/ChemDraw/utils.cpp b/External/ChemDraw/utils.cpp index da4612642..4379c443e 100644 --- a/External/ChemDraw/utils.cpp +++ b/External/ChemDraw/utils.cpp @@ -22,7 +22,7 @@ std::string NodeType(CDXNodeType nodetype) { case kCDXNodeType_Fragment: return "Fragment"; case kCDXNodeType_Formula: - return "Forumla"; + return "Formula"; case kCDXNodeType_GenericNickname: return "GenericNickname"; case kCDXNodeType_AnonymousAlternativeGroup: @@ -48,7 +48,7 @@ void scaleBonds(const ROMol &mol, Conformer &conf, double targetBondLength, double bondLength) { double avg_bond_length = 0.0; if (bondLength < 0) { - // If we don't have a bond length for any reason, just scale the avgerage + // If we don't have a bond length for any reason, just scale the average // bond length for (auto &bond : mol.bonds()) { avg_bond_length += (conf.getAtomPos(bond->getBeginAtomIdx()) - @@ -87,7 +87,7 @@ void set_fuse_label(Atom *atm, unsigned int idx) { struct FragmentReplacement { // R = Replacement // F = Fragment - // C = Conneciton + // C = Connection // C R C F F // N=*=C.*=CCC=* // label 1 1 1 @@ -101,14 +101,16 @@ struct FragmentReplacement { std::vector fragment_atoms; bool replace(RWMol &mol) { - if (!replacement_atom) return true; + if (!replacement_atom) { + return true; + } auto bond_ordering = replacement_atom->getProp>(CDX_BOND_ORDERING); // Find the connecting atoms and and do the replacement for (auto bond : mol.atomBonds(replacement_atom)) { - // find the position of the attachement bonds in the bond ordering + // find the position of the attachment bonds in the bond ordering unsigned bond_id = 0; if (!bond->getPropIfPresent(CDX_BOND_ID, bond_id)) { BOOST_LOG(rdWarningLog) @@ -117,18 +119,19 @@ struct FragmentReplacement { return false; } auto it = std::find(bond_ordering.begin(), bond_ordering.end(), bond_id); - if (it == bond_ordering.end()) return false; - - auto pos = std::distance(bond_ordering.begin(), it); - - if(pos < 0 || (size_t)pos >= fragment_atoms.size()) { - BOOST_LOG(rdWarningLog) << "bond ordering and number of atoms in fragment mismatch, can't attach fragment at bond:" - << bond_id - << std::endl; - - return false; + if (it == bond_ordering.end()) { + return false; } - + + auto pos = std::distance(bond_ordering.begin(), it); + if (pos < 0 || (size_t)pos >= fragment_atoms.size()) { + BOOST_LOG(rdWarningLog) + << "bond ordering and number of atoms in fragment mismatch, can't attach fragment at bond:" + << bond_id << std::endl; + + return false; + } + auto &xatom = fragment_atoms[pos]; for (auto &xbond : mol.atomBonds(xatom)) { @@ -224,11 +227,11 @@ Atom::ChiralType getChirality(ROMol &mol, Atom *center_atom, Conformer &conf) { for (auto &angle : angles) { bonds.push_back(angle.second); } - - if(bonds.size() < 3) { + + if (bonds.size() < 3) { return Atom::ChiralType::CHI_UNSPECIFIED; } - + auto nswaps = center_atom->getPerturbationOrder(bonds); if (bonds.size() == 3 && center_atom->getTotalNumHs() == 1) { ++nswaps; @@ -244,7 +247,7 @@ Atom::ChiralType getChirality(ROMol &mol, Atom *center_atom, Conformer &conf) { } return Atom::ChiralType::CHI_TETRAHEDRAL_CW; } - + return Atom::ChiralType::CHI_UNSPECIFIED; } } // namespace @@ -269,33 +272,36 @@ void checkChemDrawTetrahedralGeometries(RWMol &mol) { } } // If we have a cip code, might as well check it too - CDXAtomCIPType cip; - if (atom->getPropIfPresent(CDX_CIP, cip)) { - // assign, possibly wrong, initial stereo. - // note: we can probably deduce this through CDX_BOND_ORDERING, but - // I currenlty don't understand that well enough. - switch (cip) { - case kCDXCIPAtom_R: - if(!chiralityChanged) atom->setChiralTag(Atom::ChiralType::CHI_TETRAHEDRAL_CW); - unsetTetrahedralAtoms.push_back(std::make_pair('R', atom)); - break; - case kCDXCIPAtom_r: - if(!chiralityChanged) atom->setChiralTag(Atom::ChiralType::CHI_TETRAHEDRAL_CW); - unsetTetrahedralAtoms.push_back(std::make_pair('r', atom)); - break; - case kCDXCIPAtom_S: - if(!chiralityChanged) atom->setChiralTag(Atom::ChiralType::CHI_TETRAHEDRAL_CW); - unsetTetrahedralAtoms.push_back(std::make_pair('S', atom)); - break; - case kCDXCIPAtom_s: - if(!chiralityChanged) atom->setChiralTag(Atom::ChiralType::CHI_TETRAHEDRAL_CCW); - unsetTetrahedralAtoms.push_back(std::make_pair('s', atom)); - break; - default: - break; - } + CDXAtomCIPType cip; + if (atom->getPropIfPresent(CDX_CIP, cip)) { + // assign, possibly wrong, initial stereo. + // note: we can probably deduce this through CDX_BOND_ORDERING, but + // I currently don't understand that well enough. + switch (cip) { + case kCDXCIPAtom_R: + if (!chiralityChanged) + atom->setChiralTag(Atom::ChiralType::CHI_TETRAHEDRAL_CW); + unsetTetrahedralAtoms.push_back(std::make_pair('R', atom)); + break; + case kCDXCIPAtom_r: + if (!chiralityChanged) + atom->setChiralTag(Atom::ChiralType::CHI_TETRAHEDRAL_CW); + unsetTetrahedralAtoms.push_back(std::make_pair('r', atom)); + break; + case kCDXCIPAtom_S: + if (!chiralityChanged) + atom->setChiralTag(Atom::ChiralType::CHI_TETRAHEDRAL_CW); + unsetTetrahedralAtoms.push_back(std::make_pair('S', atom)); + break; + case kCDXCIPAtom_s: + if (!chiralityChanged) + atom->setChiralTag(Atom::ChiralType::CHI_TETRAHEDRAL_CCW); + unsetTetrahedralAtoms.push_back(std::make_pair('s', atom)); + break; + default: + break; } - + } } // Now that we have missing chiralities, let's check the CIP codes and reset @@ -305,11 +311,11 @@ void checkChemDrawTetrahedralGeometries(RWMol &mol) { for (auto cipatom : unsetTetrahedralAtoms) { try { - CIPLabeler::assignCIPLabels(mol); - } catch (...) { - // can throw std::runtime error? - break; - } + CIPLabeler::assignCIPLabels(mol); + } catch (...) { + // can throw std::runtime error? + break; + } std::string cipcode; if (cipatom.second->getPropIfPresent( common_properties::_CIPCode, cipcode)) { @@ -335,5 +341,5 @@ void checkChemDrawTetrahedralGeometries(RWMol &mol) { MolOps::assignStereochemistry(mol, cleanIt, force); } } -} +} // namespace ChemDraw } // namespace RDKit