diff --git a/Code/GraphMol/FileParsers/MolSGroupParsing.cpp b/Code/GraphMol/FileParsers/MolSGroupParsing.cpp index 9e4eb2fed..41f8c4887 100644 --- a/Code/GraphMol/FileParsers/MolSGroupParsing.cpp +++ b/Code/GraphMol/FileParsers/MolSGroupParsing.cpp @@ -1286,6 +1286,9 @@ std::string ParseV3000SGroupsBlock(std::istream *inStream, unsigned int &line, } std::getline(lineStream, label, '='); + if (label.empty()) { + continue; + } ParseV3000ParseLabel(label, lineStream, dataFields, line, sgroup, nSgroups, mol, strictParsing); parsedLabels.insert(label); @@ -1316,6 +1319,9 @@ std::string ParseV3000SGroupsBlock(std::istream *inStream, unsigned int &line, } std::getline(lineStream, label, '='); + if (label.empty()) { + continue; + } if (std::find(parsedLabels.begin(), parsedLabels.end(), label) == parsedLabels.end()) { ParseV3000ParseLabel(label, lineStream, dataFields, defaultLineNum, diff --git a/Code/GraphMol/FileParsers/SmilesMolSupplier.cpp b/Code/GraphMol/FileParsers/SmilesMolSupplier.cpp index 4e2147e33..024f981e7 100644 --- a/Code/GraphMol/FileParsers/SmilesMolSupplier.cpp +++ b/Code/GraphMol/FileParsers/SmilesMolSupplier.cpp @@ -174,11 +174,10 @@ std::unique_ptr SmilesMolSupplier::processLine(std::string inLine) { std::string pname, pval; if (d_props.size() > col) { pname = d_props[col]; - } else { + } + if(pname.empty()){ pname = "Column_"; - std::stringstream ss; - ss << col; - pname += ss.str(); + pname += std::to_string(col); } pval = recs[col]; diff --git a/Code/GraphMol/FileParsers/test_data/s1p_chembldoc89753.txt b/Code/GraphMol/FileParsers/test_data/s1p_chembldoc89753.txt new file mode 100644 index 000000000..378534aa9 --- /dev/null +++ b/Code/GraphMol/FileParsers/test_data/s1p_chembldoc89753.txt @@ -0,0 +1,41 @@ +,doc_id,molregno,standard_relation,standard_value,standard_units,standard_flag,standard_type,pchembl_value,canonical_smiles,compound_chembl_id +1285,89753,1824026,=,10.0,nM,1,EC50,8.0,CC(C)Oc1ccc(cc1Cl)c2onc(n2)c3cccc4C(CCCC(=O)O)NCCOc34,CHEMBL3359853 +1286,89753,1824025,=,5.012,nM,1,EC50,8.3,CC(C)Oc1ccc(cc1Cl)c2onc(n2)c3cccc4C(CCC(=O)O)NCCOc34,CHEMBL3359852 +1287,89753,1824024,=,2.512,nM,1,EC50,8.6,CC(C)Oc1ccc(cc1Cl)c2onc(n2)c3ccc4OCC(CCC(=O)O)NCc4c3,CHEMBL3359851 +1288,89753,1824027,=,50.12,nM,1,EC50,7.3,CC(C)Oc1ncc(cc1Cl)c2onc(n2)c3cccc4C(CCCC(=O)O)NCCOc34,CHEMBL3359854 +1289,89753,1824027,=,39.81,nM,1,EC50,7.4,CC(C)Oc1ncc(cc1Cl)c2onc(n2)c3cccc4C(CCCC(=O)O)NCCOc34,CHEMBL3359854 +1290,89753,1824023,=,5.012,nM,1,EC50,8.3,CC(C)Oc1ccc(cc1Cl)c2onc(n2)c3ccc4OCC(NCc4c3)C(=O)O,CHEMBL3359850 +1291,89753,1824022,=,5.012,nM,1,EC50,8.3,CC(C)Oc1ccc(cc1C#N)c2onc(n2)c3cccc4C(CC(=O)O)NCCc34,CHEMBL3359849 +1292,89753,1824021,=,19.95,nM,1,EC50,7.7,CC(C)Oc1ccc(cc1Cl)c2onc(n2)c3cccc4C(CC(=O)O)NCCc34,CHEMBL3359848 +1293,89753,1824020,=,7.943,nM,1,EC50,8.1,CC(C)Oc1ccc(cc1C#N)c2onc(n2)c3ccc4CCN(CCCC(=O)O)CCc4c3,CHEMBL3359847 +1294,89753,1824019,=,7.943,nM,1,EC50,8.1,CC(C)Oc1ccc(cc1Cl)c2onc(n2)c3ccc4CCN(CCC(=O)O)CCc4c3,CHEMBL3359846 +1295,89753,1824018,=,3.9810000000000003,nM,1,EC50,8.4,CC(C)Oc1ccc(cc1C#N)c2onc(n2)c3ccc4CCN(CC(=O)O)CCc4c3,CHEMBL3359845 +1296,89753,1824017,=,39.81,nM,1,EC50,7.4,CC(C)Oc1ccc(cc1Cl)c2onc(n2)c3cccc4CN(CCCC(=O)O)CCOc34,CHEMBL3359844 +1297,89753,1824016,=,7.943,nM,1,EC50,8.1,CC(C)Oc1ccc(cc1C#N)c2onc(n2)c3cccc4CN(CC(=O)O)CCc34,CHEMBL3359843 +1298,89753,1824015,=,6.31,nM,1,EC50,8.2,CC(C)Oc1ccc(cc1C#N)c2onc(n2)c3ccc4CN(CCC(=O)O)CCc4c3,CHEMBL3359842 +1299,89753,1824014,=,63.1,nM,1,EC50,7.2,CC(C)Oc1ccc(cc1C#N)c2onc(n2)c3ccc4OCCN(CCCC(=O)O)Cc4c3,CHEMBL3359841 +1300,89753,1824013,=,50.12,nM,1,EC50,7.3,CC(C)Oc1ccc(cc1C#N)c2onc(n2)c3ccc4OCCN(CCC(=O)O)Cc4c3,CHEMBL3359840 +1301,89753,1824012,=,125.89,nM,1,EC50,6.9,CC(C)Oc1ccc(cc1Cl)c2onc(n2)c3ccc4OCCN(CCC(=O)O)Cc4c3,CHEMBL3359839 +1302,89753,1824011,=,2511.89,nM,1,EC50,5.6,CC(C)Oc1ccc(cc1Cl)c2onc(n2)c3cccc4CCN(CC(=O)O)CCc34,CHEMBL3359838 +1303,89753,1824557,=,158.49,nM,1,EC50,6.8,CC(C)Oc1ccc(cc1Cl)c2onc(n2)c3cccc4CCN(CCC(=O)O)CCc34,CHEMBL3360379 +1304,89753,1824556,=,398.11,nM,1,EC50,6.4,CC(C)Oc1ccc(cc1C#N)c2onc(n2)c3cccc4CN(CCCC(=O)O)Cc34,CHEMBL3360378 +1305,89753,1824555,=,501.19,nM,1,EC50,6.3,CC(C)Oc1ccc(cc1Cl)c2onc(n2)c3cccc4CN(CCCC(=O)O)Cc34,CHEMBL3360377 +1306,89753,1824554,=,316.23,nM,1,EC50,6.5,CC(C)Oc1ccc(cc1Cl)c2onc(n2)c3cccc4CN(CCC(=O)O)Cc34,CHEMBL3360376 +1307,89753,1824553,=,630.96,nM,1,EC50,6.2,CC(C)Oc1ccc(cc1Cl)c2onc(n2)c3cccc4CNCCCc34,CHEMBL3360375 +1308,89753,1824552,=,125.89,nM,1,EC50,6.9,CC(C)Oc1ccc(cc1C#N)c2onc(n2)c3ccc4CNCCCc4c3,CHEMBL3360374 +1309,89753,1824551,=,398.11,nM,1,EC50,6.4,CC(C)Oc1ccc(cc1Cl)c2onc(n2)c3ccc4CNCCCc4c3,CHEMBL3360373 +1310,89753,1824550,=,79.43,nM,1,EC50,7.1,CC(C)Oc1ccc(cc1C#N)c2onc(n2)c3ccc4CNCCOc4c3,CHEMBL3360372 +1311,89753,1824549,=,316.23,nM,1,EC50,6.5,CC(C)Oc1ccc(cc1Cl)c2onc(n2)c3ccc4CNCCOc4c3,CHEMBL3360371 +1312,89753,1824548,=,25.12,nM,1,EC50,7.6,CC(C)Oc1ccc(cc1C#N)c2onc(n2)c3ccc4CCNCCc4c3,CHEMBL3360370 +1313,89753,1824547,=,158.49,nM,1,EC50,6.8,CC(C)Oc1ccc(cc1Cl)c2onc(n2)c3ccc4CCNCCc4c3,CHEMBL3360369 +1314,89753,1824546,=,25.12,nM,1,EC50,7.6,CC(C)Oc1ccc(cc1C#N)c2onc(n2)c3ccc4OCCNCc4c3,CHEMBL3360368 +1315,89753,1824545,=,125.89,nM,1,EC50,6.9,CC(C)Oc1ccc(cc1Cl)c2onc(n2)c3ccc4OCCNCc4c3,CHEMBL3360367 +1316,89753,1824544,=,79.43,nM,1,EC50,7.1,CC(C)Oc1ccc(cc1Cl)c2onc(n2)c3cccc4CNCCOc34,CHEMBL3360366 +1317,89753,1824543,=,39.81,nM,1,EC50,7.4,CC(C)Oc1ccc(cc1Cl)c2onc(n2)c3cccc4CNCCc34,CHEMBL3360365 +1318,89753,1824542,=,3.9810000000000003,nM,1,EC50,8.4,CC(C)Oc1ccc(cc1C#N)c2onc(n2)c3ccc4CNCCc4c3,CHEMBL3360364 +1319,89753,1824541,=,1.995,nM,1,EC50,8.7,CC(C)Oc1ccc(cc1C#N)c2onc(n2)c3cccc4CCNCCc34,CHEMBL3360363 +1320,89753,1824540,=,15.85,nM,1,EC50,7.8,CC(C)Oc1ccc(cc1Cl)c2onc(n2)c3cccc4CCNCCc34,CHEMBL3360362 +1321,89753,1824539,=,7.943,nM,1,EC50,8.1,CC(C)Oc1ccc(cc1Cl)c2onc(n2)c3cccc4CNCc34,CHEMBL3360361 +1322,89753,1824538,=,19.95,nM,1,EC50,7.7,[Na+].CC(C)Oc1ccc(cc1Cl)c2onc(n2)c3cccc4c3cnn4CCC(=O)[O-],CHEMBL3360360 +1323,89753,1824537,=,31.62,nM,1,EC50,7.5,[Na+].CC(C)Oc1ccc(cc1Cl)c2onc(n2)c3ccc4c(cnn4CCC(=O)[O-])c3,CHEMBL3360359 +1324,89753,1824536,=,251.19,nM,1,EC50,6.6,[Na+].[O-]C(=O)c1cnn(Cc2ccc(cc2)c3noc(n3)c4cc(c5ccccc5)c(s4)C(F)(F)F)c1,CHEMBL3360358 diff --git a/Code/GraphMol/FileParsers/v2_file_parsers_catch.cpp b/Code/GraphMol/FileParsers/v2_file_parsers_catch.cpp index 3454e236c..e1935ba9e 100644 --- a/Code/GraphMol/FileParsers/v2_file_parsers_catch.cpp +++ b/Code/GraphMol/FileParsers/v2_file_parsers_catch.cpp @@ -57,4 +57,20 @@ M END CHECK(MolToCXSmiles(*mol2) == "*OC |(-2.50869,4.52002,;-3.3747,4.02,;-4.7083,3.25,),$_AP1;;$|"); } +} + +TEST_CASE("empty column names in SmilesMolSupplier") { + std::string rdbase = getenv("RDBASE"); + + std::string fName = + rdbase + "/Code/GraphMol/FileParsers/test_data/s1p_chembldoc89753.txt"; + SmilesMolSupplierParams params; + params.delimiter = ","; + params.smilesColumn = 9; + params.nameColumn = 10; + v2::FileParsers::SmilesMolSupplier suppl(fName, params); + auto mol = suppl.next(); + REQUIRE(mol); + CHECK(mol->hasProp("_Name")); + CHECK(mol->hasProp("Column_0")); } \ No newline at end of file diff --git a/Code/GraphMol/catch_graphmol.cpp b/Code/GraphMol/catch_graphmol.cpp index c9786ba1a..8d75f5ccf 100644 --- a/Code/GraphMol/catch_graphmol.cpp +++ b/Code/GraphMol/catch_graphmol.cpp @@ -4964,3 +4964,14 @@ TEST_CASE( CHECK(stg.getGroupType() == StereoGroupType::STEREO_ABSOLUTE); CHECK(stg.getAtoms().size() == 2); } + +TEST_CASE("github #9068: properties with empty names") { + SECTION("basics") { + auto m = "CCO"_smiles; + REQUIRE(m); + CHECK_THROWS_AS(m->setProp("", "some value"), ValueErrorException); + CHECK_THROWS_AS(m->getProp(""), KeyErrorException); + CHECK(!m->hasProp("")); + CHECK_NOTHROW(m->clearProp("")); + } +} \ No newline at end of file diff --git a/Code/RDGeneral/Dict.h b/Code/RDGeneral/Dict.h index 90e9bb5ae..3b4b1d6c4 100644 --- a/Code/RDGeneral/Dict.h +++ b/Code/RDGeneral/Dict.h @@ -1,5 +1,5 @@ // -// Copyright (C) 2003-2021 Greg Landrum and other RDKit contributors +// Copyright (C) 2003-2026 Greg Landrum and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. @@ -265,6 +265,9 @@ class RDKIT_RDGENERAL_EXPORT Dict { void setVal(const std::string_view what, T &val) { static_assert(!std::is_same_v, "T cannot be string_view"); + if (what.empty()) { + throw ValueErrorException("Cannot set value with empty key"); + } _hasNonPodData = true; for (auto &&data : _data) { if (data.key == what) { @@ -280,6 +283,9 @@ class RDKIT_RDGENERAL_EXPORT Dict { void setPODVal(const std::string_view what, T val) { static_assert(!std::is_same_v, "T cannot be string_view"); + if (what.empty()) { + throw ValueErrorException("Cannot set value with empty key"); + } // don't change the hasNonPodData status for (auto &&data : _data) { if (data.key == what) { @@ -291,20 +297,45 @@ class RDKIT_RDGENERAL_EXPORT Dict { _data.push_back(Pair(what, val)); } - void setVal(const std::string_view what, bool val) { setPODVal(what, val); } + void setVal(const std::string_view what, bool val) { + if (what.empty()) { + throw ValueErrorException("Cannot set value with empty key"); + } + setPODVal(what, val); + } - void setVal(const std::string_view what, double val) { setPODVal(what, val); } + void setVal(const std::string_view what, double val) { + if (what.empty()) { + throw ValueErrorException("Cannot set value with empty key"); + } + setPODVal(what, val); + } - void setVal(const std::string_view what, float val) { setPODVal(what, val); } - - void setVal(const std::string_view what, int val) { setPODVal(what, val); } + void setVal(const std::string_view what, float val) { + if (what.empty()) { + throw ValueErrorException("Cannot set value with empty key"); + } + setPODVal(what, val); + } + void setVal(const std::string_view what, int val) { + if (what.empty()) { + throw ValueErrorException("Cannot set value with empty key"); + } + setPODVal(what, val); + } void setVal(const std::string_view what, unsigned int val) { + if (what.empty()) { + throw ValueErrorException("Cannot set value with empty key"); + } setPODVal(what, val); } //! \overload void setVal(const std::string_view what, const char *val) { + if (what.empty()) { + throw ValueErrorException("Cannot set value with empty key"); + } std::string h(val); setVal(what, h); } diff --git a/Code/RDGeneral/RDProps.h b/Code/RDGeneral/RDProps.h index 2eec9699e..d7efff136 100644 --- a/Code/RDGeneral/RDProps.h +++ b/Code/RDGeneral/RDProps.h @@ -1,3 +1,6 @@ +// Copyright (C) 2016-2026 Brian Kelley and other RDKit contributors +// @@ All Rights Reserved @@ +// // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root @@ -75,6 +78,9 @@ class RDProps { //! \overload template void setProp(const std::string_view key, T val, bool computed = false) const { + if(key.empty()) { + throw ValueErrorException("Cannot set property with empty key"); + } if (computed) { STR_VECT compLst; getPropIfPresent(RDKit::detail::computedPropName, compLst); diff --git a/Code/RDGeneral/catch_dict.cpp b/Code/RDGeneral/catch_dict.cpp index ab2a08933..952fad912 100644 --- a/Code/RDGeneral/catch_dict.cpp +++ b/Code/RDGeneral/catch_dict.cpp @@ -1,5 +1,5 @@ // -// Copyright (C) 2021 Greg Landrum +// Copyright (C) 2021-2026 Greg Landrum // // @@ All Rights Reserved @@ // This file is part of the RDKit. @@ -11,6 +11,7 @@ #include #include "Dict.h" #include "RDProps.h" +#include "Exceptions.h" using namespace std::string_literals; TEST_CASE("Dict move semantics") { @@ -70,3 +71,25 @@ TEST_CASE("RDProps move semantics") { CHECK(!d1.hasProp("bar"s)); } } +TEST_CASE("github #9068: properties with empty names") { + RDKit::RDProps props; + SECTION("setProp with empty key") { + CHECK_THROWS_AS(props.setProp("", 1), ValueErrorException); + } + SECTION("getProp with empty key") { + CHECK_THROWS_AS(props.getProp(""), KeyErrorException); + } + SECTION("hasProp with empty key") { CHECK(!props.hasProp("")); } + SECTION("clearProp with empty key") { CHECK_NOTHROW(props.clearProp("")); } +} +TEST_CASE("github #9068: dicts with empty keys") { + RDKit::Dict dict; + SECTION("setVal with empty key") { + CHECK_THROWS_AS(dict.setVal("", 1), ValueErrorException); + } + SECTION("getVal with empty key") { + CHECK_THROWS_AS(dict.getVal(""), KeyErrorException); + } + SECTION("hasVal with empty key") { CHECK(!dict.hasVal("")); } + SECTION("clearVal with empty key") { CHECK_NOTHROW(dict.clearVal("")); } +} \ No newline at end of file