mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
* this is a fix, but it breaks other tests * all tests pass. This undoes the changes made as part of the "fix" for #5466 * update js tests * response to review --------- Co-authored-by: = <=>
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (C) 2016-2021 Greg Landrum and other RDKit contributors
|
||||
// Copyright (C) 2016-2026 Greg Landrum and other RDKit contributors
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
@@ -9,7 +9,6 @@
|
||||
//
|
||||
#include <RDGeneral/BoostStartInclude.h>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <boost/lexical_cast.hpp>
|
||||
#include <boost/format.hpp>
|
||||
#include <RDGeneral/BoostEndInclude.h>
|
||||
#include <GraphMol/RDKitBase.h>
|
||||
@@ -28,6 +27,7 @@
|
||||
#include <array>
|
||||
#include <map>
|
||||
#include <string_view>
|
||||
#include <string>
|
||||
|
||||
namespace SmilesParseOps {
|
||||
using namespace RDKit;
|
||||
@@ -124,7 +124,7 @@ bool read_int(Iterator &first, Iterator last, unsigned int &res) {
|
||||
if (num.empty()) {
|
||||
return false;
|
||||
}
|
||||
res = boost::lexical_cast<unsigned int>(num);
|
||||
res = std::atoi(num.c_str());
|
||||
return true;
|
||||
}
|
||||
template <typename Iterator>
|
||||
@@ -137,7 +137,7 @@ bool read_int_list(Iterator &first, Iterator last,
|
||||
++first;
|
||||
}
|
||||
if (!num.empty()) {
|
||||
res.push_back(boost::lexical_cast<unsigned int>(num));
|
||||
res.push_back(std::atoi(num.c_str()));
|
||||
}
|
||||
if (first >= last || *first != sep) {
|
||||
break;
|
||||
@@ -182,7 +182,7 @@ std::string read_text_to(Iterator &first, Iterator last, std::string delims) {
|
||||
}
|
||||
if (next > first + 2) {
|
||||
std::string blk = std::string(first + 2, next);
|
||||
res += (char)(boost::lexical_cast<int>(blk));
|
||||
res += (char)(std::atoi(blk.c_str()));
|
||||
}
|
||||
first = next + 1;
|
||||
start = first;
|
||||
@@ -441,13 +441,13 @@ bool parse_coords(Iterator &first, Iterator last, RDKit::RWMol &mol,
|
||||
std::vector<std::string> tokens;
|
||||
boost::split(tokens, tkn, boost::is_any_of(std::string(",")));
|
||||
if (tokens.size() >= 1 && tokens[0].size()) {
|
||||
pt.x = boost::lexical_cast<double>(tokens[0]);
|
||||
pt.x = std::atof(tokens[0].c_str());
|
||||
}
|
||||
if (tokens.size() >= 2 && tokens[1].size()) {
|
||||
pt.y = boost::lexical_cast<double>(tokens[1]);
|
||||
pt.y = std::atof(tokens[1].c_str());
|
||||
}
|
||||
if (tokens.size() >= 3 && tokens[2].size()) {
|
||||
pt.z = boost::lexical_cast<double>(tokens[2]);
|
||||
pt.z = std::atof(tokens[2].c_str());
|
||||
is3D = true;
|
||||
}
|
||||
}
|
||||
@@ -1621,17 +1621,23 @@ getSortedStereoGroupsAndIndices(
|
||||
return {std::move(sgs), std::move(sgAtomIdxs)};
|
||||
}
|
||||
|
||||
std::string quote_string(const std::string &txt) {
|
||||
// FIX
|
||||
return txt;
|
||||
bool is_alphanumeric(char c) {
|
||||
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') ||
|
||||
(c >= 'a' && c <= 'z');
|
||||
}
|
||||
|
||||
std::string quote_atomprop_string(const std::string &txt) {
|
||||
// at a bare minimum, . needs to be escaped
|
||||
// from:
|
||||
// https://docs.chemaxon.com/latest/formats_chemaxon-extended-smiles-and-smarts-cxsmiles-and-cxsmarts.html#escaping
|
||||
const std::string sgroupAllowedSpecialChars = "><\"!@#$%()[]./\\?-+*^_~= ";
|
||||
const std::string atomPropAllowedSpecialChars = "><\"!@#$%()[]./\\?-+*^_~= ";
|
||||
const std::string labelAllowedSpecialChars = "><\"!@#%()[]./\\?-+*^_~=,: ";
|
||||
std::string quote_string(const std::string &txt,
|
||||
std::string allowedSpecialChars) {
|
||||
std::string res;
|
||||
for (auto c : txt) {
|
||||
if (c == '.') {
|
||||
res += ".";
|
||||
if (!allowedSpecialChars.empty() && !is_alphanumeric(c) &&
|
||||
allowedSpecialChars.find(c) == std::string::npos) {
|
||||
res += "&#" + std::to_string(static_cast<unsigned int>(c)) + ";";
|
||||
} else {
|
||||
res += c;
|
||||
}
|
||||
@@ -1801,7 +1807,7 @@ std::string get_sgroup_polymer_block(
|
||||
res << ":";
|
||||
std::string label;
|
||||
if (sg.getPropIfPresent("LABEL", label)) {
|
||||
res << label;
|
||||
res << quote_string(label, sgroupAllowedSpecialChars);
|
||||
}
|
||||
res << ":";
|
||||
std::string connect;
|
||||
@@ -1876,13 +1882,13 @@ std::string get_sgroup_data_block(const ROMol &mol,
|
||||
res << ":";
|
||||
std::string prop;
|
||||
if (sg.getPropIfPresent("FIELDNAME", prop) && !prop.empty()) {
|
||||
res << prop;
|
||||
res << quote_string(prop, sgroupAllowedSpecialChars);
|
||||
}
|
||||
res << ":";
|
||||
std::vector<std::string> vprop;
|
||||
if (sg.getPropIfPresent("DATAFIELDS", vprop) && !vprop.empty()) {
|
||||
for (const auto &pv : vprop) {
|
||||
res << pv << ",";
|
||||
res << quote_string(pv, sgroupAllowedSpecialChars) << ",";
|
||||
}
|
||||
// remove the extra ",":
|
||||
res.seekp(-1, res.cur);
|
||||
@@ -1893,11 +1899,11 @@ std::string get_sgroup_data_block(const ROMol &mol,
|
||||
}
|
||||
res << ":";
|
||||
if (sg.getPropIfPresent("FIELDINFO", prop) && !prop.empty()) {
|
||||
res << prop;
|
||||
res << quote_string(prop, sgroupAllowedSpecialChars);
|
||||
}
|
||||
res << ":";
|
||||
if (sg.getPropIfPresent("FIELDTAG", prop) && !prop.empty()) {
|
||||
res << prop;
|
||||
res << quote_string(prop, sgroupAllowedSpecialChars);
|
||||
}
|
||||
res << ":";
|
||||
// FIX: do something about the coordinates
|
||||
@@ -1926,20 +1932,21 @@ std::string get_atomlabel_block(const ROMol &mol,
|
||||
const auto atom = mol.getAtomWithIdx(idx);
|
||||
if (atom->getPropIfPresent(common_properties::_QueryAtomGenericLabel,
|
||||
lbl)) {
|
||||
res += quote_string(lbl + "_p");
|
||||
res += quote_string(lbl + "_p", labelAllowedSpecialChars);
|
||||
} else if (!atom->getAtomicNum() &&
|
||||
atom->getPropIfPresent(common_properties::dummyLabel, lbl) &&
|
||||
std::find(SmilesParseOps::pseudoatoms.begin(),
|
||||
SmilesParseOps::pseudoatoms.end(),
|
||||
lbl) != SmilesParseOps::pseudoatoms.end()) {
|
||||
res += quote_string(lbl + "_p");
|
||||
res += quote_string(lbl + "_p", labelAllowedSpecialChars);
|
||||
} else if (!atom->getAtomicNum() &&
|
||||
atom->getPropIfPresent(common_properties::_fromAttachPoint,
|
||||
val) &&
|
||||
(val == 1 || val == 2)) {
|
||||
res += quote_string("_AP" + std::to_string(val));
|
||||
res +=
|
||||
quote_string("_AP" + std::to_string(val), labelAllowedSpecialChars);
|
||||
} else if (atom->getPropIfPresent(common_properties::atomLabel, lbl)) {
|
||||
res += quote_string(lbl);
|
||||
res += quote_string(lbl, labelAllowedSpecialChars);
|
||||
}
|
||||
}
|
||||
// if we didn't find anything return an empty string
|
||||
@@ -1963,7 +1970,7 @@ std::string get_value_block(const ROMol &mol,
|
||||
}
|
||||
std::string lbl;
|
||||
if (mol.getAtomWithIdx(idx)->getPropIfPresent(prop, lbl)) {
|
||||
res += quote_string(lbl);
|
||||
res += quote_string(lbl, atomPropAllowedSpecialChars);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
@@ -2060,9 +2067,9 @@ std::string get_atom_props_block(const ROMol &mol,
|
||||
if (res.empty()) {
|
||||
res += "atomProp";
|
||||
}
|
||||
res +=
|
||||
boost::str(boost::format(":%d.%s.%s") % which %
|
||||
quote_atomprop_string(pn) % quote_atomprop_string(pv));
|
||||
res += boost::str(boost::format(":%d.%s.%s") % which %
|
||||
quote_string(pn, atomPropAllowedSpecialChars) %
|
||||
quote_string(pv, atomPropAllowedSpecialChars));
|
||||
}
|
||||
}
|
||||
++which;
|
||||
|
||||
@@ -256,7 +256,8 @@ TEST_CASE("github #2257: writing cxsmiles", "[smiles][cxsmiles]") {
|
||||
CHECK(mol->getAtomWithIdx(1)->getProp<std::string>("p1") == "v1;2;3");
|
||||
|
||||
auto smi = MolToCXSmiles(*mol);
|
||||
CHECK(smi == "CC1CN1 |atomProp:2.p2.v2:2.p1.v1;2;3:3.p2.v2:3.p1.v1|");
|
||||
CHECK(smi ==
|
||||
"CC1CN1 |atomProp:2.p2.v2:2.p1.v1;2;3:3.p2.v2:3.p1.v1|");
|
||||
}
|
||||
SECTION("atom props and values") {
|
||||
//"CN |$_AV:atomv0;atomv1$,atomProp:0.p2.v2:1.p2.v1|";
|
||||
@@ -2005,12 +2006,12 @@ TEST_CASE("github #5466 writing floating point atom props cxsmiles",
|
||||
|
||||
mol->getAtomWithIdx(0)->setProp<std::string>("foo", "7.6");
|
||||
auto smi = MolToCXSmiles(*mol);
|
||||
CHECK(smi == "C |atomProp:0.foo.7.6|");
|
||||
CHECK(smi == "C |atomProp:0.foo.7.6|");
|
||||
|
||||
// 7.5 is exactly representable in IEEE so this helps :)
|
||||
mol->getAtomWithIdx(0)->setProp<double>("foo", 7.5);
|
||||
smi = MolToCXSmiles(*mol);
|
||||
CHECK(smi == "C |atomProp:0.foo.7.5|");
|
||||
CHECK(smi == "C |atomProp:0.foo.7.5|");
|
||||
}
|
||||
SECTION("label with .") {
|
||||
auto mol = "C"_smiles;
|
||||
@@ -2018,7 +2019,7 @@ TEST_CASE("github #5466 writing floating point atom props cxsmiles",
|
||||
|
||||
mol->getAtomWithIdx(0)->setProp<int>("foo.foo", 7);
|
||||
auto smi = MolToCXSmiles(*mol);
|
||||
CHECK(smi == "C |atomProp:0.foo.foo.7|");
|
||||
CHECK(smi == "C |atomProp:0.foo.foo.7|");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (C) 2016-2023 Greg Landrum
|
||||
// Copyright (C) 2016-2026 Greg Landrum and other RDKit contributors
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
@@ -477,15 +477,51 @@ TEST_CASE("enhanced stereo") {
|
||||
TEST_CASE("HTML char codes") {
|
||||
{
|
||||
std::string smiles = R"(CCCC* |$;;;;_AP1$,Sg:n:2:2,6-7:ht|)";
|
||||
SmilesParserParams params;
|
||||
v2::SmilesParse::SmilesParserParams params;
|
||||
params.allowCXSMILES = true;
|
||||
|
||||
ROMol *m = SmilesToMol(smiles, params);
|
||||
auto m = v2::SmilesParse::MolFromSmiles(smiles, params);
|
||||
REQUIRE(m);
|
||||
|
||||
CHECK(m->getNumAtoms() == 5);
|
||||
auto sgs = getSubstanceGroups(*m);
|
||||
REQUIRE(sgs.size() == 1);
|
||||
CHECK(sgs[0].getProp<std::string>("TYPE") == "SRU");
|
||||
}
|
||||
{
|
||||
auto m = R"CTAB(
|
||||
Mrv2222 05072606252D
|
||||
|
||||
delete m;
|
||||
0 0 0 0 0 999 V3000
|
||||
M V30 BEGIN CTAB
|
||||
M V30 COUNTS 4 3 1 0 0
|
||||
M V30 BEGIN ATOM
|
||||
M V30 1 C 2.31 -1.3337 0 0
|
||||
M V30 2 C 1.54 -0 0 0
|
||||
M V30 3 C -0 -0 0 0
|
||||
M V30 4 C -0.77 1.3337 0 0 ATTCHPT=1
|
||||
M V30 END ATOM
|
||||
M V30 BEGIN BOND
|
||||
M V30 1 1 1 2
|
||||
M V30 2 1 2 3
|
||||
M V30 3 1 3 4
|
||||
M V30 END BOND
|
||||
M V30 BEGIN SGROUP
|
||||
M V30 1 SRU 0 ATOMS=(1 3) XBONDS=(2 2 3) BRKXYZ=(9 -1.1173 0.0872 0 0.4832 -
|
||||
M V30 1.0112 0 0 0 0) BRKXYZ=(9 0.6341 0.924 0 0.6341 -0.924 0 0 0 0) -
|
||||
M V30 CONNECT=HT LABEL="2,6-7"
|
||||
M V30 END SGROUP
|
||||
M V30 END CTAB
|
||||
M END
|
||||
)CTAB"_ctab;
|
||||
REQUIRE(m);
|
||||
CHECK(m->getNumAtoms() == 4);
|
||||
m->clearConformers();
|
||||
auto sgs = getSubstanceGroups(*m);
|
||||
REQUIRE(sgs.size() == 1);
|
||||
CHECK(sgs[0].getProp<std::string>("TYPE") == "SRU");
|
||||
auto smi = MolToCXSmiles(*m);
|
||||
CHECK(smi == "CCCC |atomProp:3.molAttchpt.1,Sg:n:2:2,6-7:ht:::|");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1094,8 +1130,7 @@ TEST_CASE("testAtropisomersInCXSmiles") {
|
||||
};
|
||||
|
||||
for (auto smiTest : smiTests) {
|
||||
printf("Test\n\n %s\n\n", smiTest.fileName.c_str());
|
||||
// RDDepict::preferCoordGen = true;
|
||||
INFO(smiTest.fileName);
|
||||
testOneAtropisomers(&smiTest);
|
||||
}
|
||||
}
|
||||
@@ -1112,7 +1147,7 @@ TEST_CASE("testAtropisomersInCXSmilesCanon") {
|
||||
};
|
||||
|
||||
for (auto smiTest : smiTests) {
|
||||
printf("Test\n\n %s\n\n", smiTest.fileName.c_str());
|
||||
INFO(smiTest.fileName);
|
||||
testOneAtropisomersCanon(&smiTest);
|
||||
}
|
||||
}
|
||||
@@ -1128,7 +1163,7 @@ TEST_CASE("test3DChiral") {
|
||||
};
|
||||
|
||||
for (auto smiTest : smiTests) {
|
||||
printf("Test\n\n %s\n\n", smiTest.fileName.c_str());
|
||||
INFO(smiTest.fileName);
|
||||
// RDDepict::preferCoordGen = true;
|
||||
testOne3dChiral(&smiTest);
|
||||
}
|
||||
@@ -1739,4 +1774,35 @@ TEST_CASE("duplicate atoms in StereoGroup") {
|
||||
|
||||
CHECK_THROWS_AS(SmilesToMol(smiles), SmilesParseException);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Github #9231: quoting in CXSMILES") {
|
||||
auto m = "CCCCC"_smiles;
|
||||
REQUIRE(m);
|
||||
SECTION("as reported") {
|
||||
m->getAtomWithIdx(0)->setProp("p1", "1,2");
|
||||
m->getAtomWithIdx(0)->setProp("p2", "0");
|
||||
m->getAtomWithIdx(0)->setProp("p3", "|");
|
||||
m->getAtomWithIdx(0)->setProp("p4,5", "foo");
|
||||
auto smi = MolToCXSmiles(*m);
|
||||
CHECK(smi ==
|
||||
"CCCCC |atomProp:0.p1.1,2:0.p2.0:0.p3.|:0.p4,5.foo|");
|
||||
auto nmol = v2::SmilesParse::MolFromSmiles(smi);
|
||||
REQUIRE(nmol);
|
||||
CHECK(m->getAtomWithIdx(0)->getProp<std::string>("p1") == "1,2");
|
||||
CHECK(m->getAtomWithIdx(0)->getProp<std::string>("p2") == "0");
|
||||
CHECK(m->getAtomWithIdx(0)->getProp<std::string>("p3") == "|");
|
||||
CHECK(m->getAtomWithIdx(0)->getProp<std::string>("p4,5") == "foo");
|
||||
}
|
||||
SECTION("perverse example") {
|
||||
// bp-kelley came up with this beauty. :-)
|
||||
m->getAtomWithIdx(0)->setProp("cxsmiles", "C |atomProp:0.apKa.7.54|");
|
||||
auto smi = MolToCXSmiles(*m);
|
||||
CHECK(smi ==
|
||||
"CCCCC |atomProp:0.cxsmiles.C |atomProp:0.apKa.7.54||");
|
||||
auto nmol = v2::SmilesParse::MolFromSmiles(smi);
|
||||
REQUIRE(nmol);
|
||||
CHECK(m->getAtomWithIdx(0)->getProp<std::string>("cxsmiles") ==
|
||||
"C |atomProp:0.apKa.7.54|");
|
||||
}
|
||||
}
|
||||
@@ -2897,7 +2897,7 @@ M END\n\
|
||||
"{\"CX_ALL_BUT_COORDS\":true}");
|
||||
assert(cxsmiles_with_atom_prop);
|
||||
assert(!strcmp(cxsmiles_with_atom_prop,
|
||||
"NC1CC2CC(O)C1C2 |atomProp:5.atomProp.1.234|"));
|
||||
"NC1CC2CC(O)C1C2 |atomProp:5.atomProp.1.234|"));
|
||||
free(cxsmiles_with_atom_prop);
|
||||
free(mpkl_atom_prop);
|
||||
free(mpkl);
|
||||
@@ -2920,13 +2920,13 @@ M END\n\
|
||||
smarts = get_cxsmarts(mpkl, mpkl_size, empty_json[i]);
|
||||
assert(smarts);
|
||||
assert(!strcmp(
|
||||
smarts, "N-[C@&H1](-C(-O)=O)-C(-C)-C |atomProp:1.atomProp.1.234|"));
|
||||
smarts, "N-[C@&H1](-C(-O)=O)-C(-C)-C |atomProp:1.atomProp.1.234|"));
|
||||
free(smarts);
|
||||
}
|
||||
smarts = get_cxsmarts(mpkl, mpkl_size, "{\"doIsomericSmiles\":false}");
|
||||
assert(smarts);
|
||||
assert(!strcmp(smarts,
|
||||
"N-[C&H1](-C(-O)=O)-C(-C)-C |atomProp:1.atomProp.1.234|"));
|
||||
"N-[C&H1](-C(-O)=O)-C(-C)-C |atomProp:1.atomProp.1.234|"));
|
||||
free(smarts);
|
||||
free(mpkl);
|
||||
}
|
||||
|
||||
@@ -3176,11 +3176,11 @@ M END
|
||||
{
|
||||
const nonCanonicalCXSmilesNoStereo = mol.get_cxsmiles(JSON.stringify({doIsomericSmiles: false, canonical: false, CX_ALL_BUT_COORDS: true}));
|
||||
assert(nonCanonicalCXSmilesNoStereo === 'OC1CC2CC(N)C1C2');
|
||||
const nonCanonicalCXSmilesNoStereoAtomProp = `${nonCanonicalCXSmilesNoStereo} |atomProp:1.atomProp.1.234|`;
|
||||
const nonCanonicalCXSmilesNoStereoAtomProp = `${nonCanonicalCXSmilesNoStereo} |atomProp:1.atomProp.1.234|`;
|
||||
const molWithAtomProp = RDKitModule.get_mol(nonCanonicalCXSmilesNoStereoAtomProp);
|
||||
assert(molWithAtomProp);
|
||||
const cxSmilesWithAtomProp = molWithAtomProp.get_cxsmiles(JSON.stringify({CX_ALL_BUT_COORDS: true}));
|
||||
assert(cxSmilesWithAtomProp === 'NC1CC2CC(O)C1C2 |atomProp:5.atomProp.1.234|');
|
||||
assert(cxSmilesWithAtomProp === 'NC1CC2CC(O)C1C2 |atomProp:5.atomProp.1.234|');
|
||||
molWithAtomProp.delete();
|
||||
}
|
||||
mol.delete();
|
||||
@@ -3194,12 +3194,12 @@ M END
|
||||
assert(chiralQuery.get_smarts(JSON.stringify({doIsomericSmiles: false})) === 'N-[C&H1](-C(-O)=O)-C(-C)-C');
|
||||
}
|
||||
{
|
||||
const chiralQuery = RDKitModule.get_qmol('N-[C@H](-C(-O)=O)-C(-C)-C |atomProp:1.atomProp.1.234|');
|
||||
assert(chiralQuery.get_cxsmarts() === 'N-[C@&H1](-C(-O)=O)-C(-C)-C |atomProp:1.atomProp.1.234|');
|
||||
const chiralQuery = RDKitModule.get_qmol('N-[C@H](-C(-O)=O)-C(-C)-C |atomProp:1.atomProp.1.234|');
|
||||
assert(chiralQuery.get_cxsmarts() === 'N-[C@&H1](-C(-O)=O)-C(-C)-C |atomProp:1.atomProp.1.234|');
|
||||
['', '{}'].forEach((emptyJson) => {
|
||||
assert(chiralQuery.get_cxsmarts(emptyJson) === 'N-[C@&H1](-C(-O)=O)-C(-C)-C |atomProp:1.atomProp.1.234|');
|
||||
assert(chiralQuery.get_cxsmarts(emptyJson) === 'N-[C@&H1](-C(-O)=O)-C(-C)-C |atomProp:1.atomProp.1.234|');
|
||||
});
|
||||
assert(chiralQuery.get_cxsmarts(JSON.stringify({doIsomericSmiles: false})) === 'N-[C&H1](-C(-O)=O)-C(-C)-C |atomProp:1.atomProp.1.234|');
|
||||
assert(chiralQuery.get_cxsmarts(JSON.stringify({doIsomericSmiles: false})) === 'N-[C&H1](-C(-O)=O)-C(-C)-C |atomProp:1.atomProp.1.234|');
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user