mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
[v2 API] SMILES/SMARTS parsers (#6203)
* basic approach to v2api * does not work, backup commit * well, that now compiles * cleaner * more cleanup and testing * get the SWIG wrappers to build * swig wrappers now build * switch back to using references to default values * adjust to new catch version * move the implementation to v2 and call it from v1 * update the parameter object too * move debugParse down * a couple of review changes * make v2 naming consistent with Python
This commit is contained in:
@@ -77,3 +77,4 @@ rdkit_catch_test(cxsmilesTest cxsmiles_test.cpp LINK_LIBRARIES FileParsers Smile
|
||||
rdkit_test(smaTest1 smatest.cpp LINK_LIBRARIES SmilesParse SubstructMatch )
|
||||
rdkit_catch_test(smiTestCatch catch_tests.cpp LINK_LIBRARIES FileParsers SmilesParse )
|
||||
rdkit_catch_test(nontetrahedralCatch nontetrahedral_tests.cpp LINK_LIBRARIES FileParsers SmilesParse )
|
||||
rdkit_catch_test(v2smiTestCatch v2catch_tests.cpp LINK_LIBRARIES SmilesParse )
|
||||
|
||||
@@ -57,6 +57,8 @@ int yysmarts_lex_destroy(void *);
|
||||
size_t setup_smarts_string(const std::string &text, void *);
|
||||
extern int yysmarts_debug;
|
||||
namespace RDKit {
|
||||
namespace v2 {
|
||||
namespace SmilesParse {
|
||||
namespace {
|
||||
|
||||
int smarts_parse_helper(const std::string &inp,
|
||||
@@ -222,23 +224,24 @@ std::string labelRecursivePatterns(const std::string &sma) {
|
||||
}
|
||||
} // namespace
|
||||
|
||||
RWMol *toMol(const std::string &inp,
|
||||
int func(const std::string &, std::vector<RDKit::RWMol *> &),
|
||||
const std::string &origInp) {
|
||||
std::unique_ptr<RWMol> toMol(const std::string &inp,
|
||||
int func(const std::string &,
|
||||
std::vector<RDKit::RWMol *> &),
|
||||
const std::string &origInp) {
|
||||
// empty strings produce empty molecules:
|
||||
if (inp.empty()) {
|
||||
return new RWMol();
|
||||
return std::make_unique<RWMol>();
|
||||
}
|
||||
RWMol *res = nullptr;
|
||||
std::unique_ptr<RWMol> res;
|
||||
std::vector<RDKit::RWMol *> molVect;
|
||||
try {
|
||||
func(inp, molVect);
|
||||
if (!molVect.empty()) {
|
||||
res = molVect[0];
|
||||
SmilesParseOps::CloseMolRings(res, false);
|
||||
SmilesParseOps::CheckChiralitySpecifications(res, true);
|
||||
SmilesParseOps::SetUnspecifiedBondTypes(res);
|
||||
SmilesParseOps::AdjustAtomChiralityFlags(res);
|
||||
res.reset(molVect[0]);
|
||||
SmilesParseOps::CloseMolRings(res.get(), false);
|
||||
SmilesParseOps::CheckChiralitySpecifications(res.get(), true);
|
||||
SmilesParseOps::SetUnspecifiedBondTypes(res.get());
|
||||
SmilesParseOps::AdjustAtomChiralityFlags(res.get());
|
||||
// No sense leaving this bookmark intact:
|
||||
if (res->hasAtomBookmark(ci_RIGHTMOST_ATOM)) {
|
||||
res->clearAtomBookmark(ci_RIGHTMOST_ATOM);
|
||||
@@ -253,7 +256,10 @@ RWMol *toMol(const std::string &inp,
|
||||
}
|
||||
BOOST_LOG(rdErrorLog) << nm << " Parse Error: " << e.what()
|
||||
<< " for input: '" << origInp << "'" << std::endl;
|
||||
res = nullptr;
|
||||
res.reset();
|
||||
if (!molVect.empty()) {
|
||||
molVect[0] = nullptr;
|
||||
}
|
||||
}
|
||||
for (auto *molPtr : molVect) {
|
||||
if (molPtr) {
|
||||
@@ -266,8 +272,9 @@ RWMol *toMol(const std::string &inp,
|
||||
return res;
|
||||
}
|
||||
|
||||
Atom *toAtom(const std::string &inp, int func(const std::string &, Atom *&)) {
|
||||
// empty strings produce empty molecules:
|
||||
std::unique_ptr<Atom> toAtom(const std::string &inp,
|
||||
int func(const std::string &, Atom *&)) {
|
||||
// empty strings produce nullptrs:
|
||||
if (inp.empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
@@ -283,11 +290,12 @@ Atom *toAtom(const std::string &inp, int func(const std::string &, Atom *&)) {
|
||||
<< " for input: '" << inp << "'" << std::endl;
|
||||
res = nullptr;
|
||||
}
|
||||
return res;
|
||||
return std::unique_ptr<Atom>(res);
|
||||
}
|
||||
|
||||
Bond *toBond(const std::string &inp, int func(const std::string &, Bond *&)) {
|
||||
// empty strings produce empty molecules:
|
||||
std::unique_ptr<Bond> toBond(const std::string &inp,
|
||||
int func(const std::string &, Bond *&)) {
|
||||
// empty strings produce nullptrs:
|
||||
if (inp.empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
@@ -303,7 +311,7 @@ Bond *toBond(const std::string &inp, int func(const std::string &, Bond *&)) {
|
||||
<< " for input: '" << inp << "'" << std::endl;
|
||||
res = nullptr;
|
||||
}
|
||||
return res;
|
||||
return std::unique_ptr<Bond>(res);
|
||||
}
|
||||
|
||||
namespace {
|
||||
@@ -332,12 +340,11 @@ void preprocessSmiles(const std::string &smiles, const T ¶ms,
|
||||
lsmiles = smiles;
|
||||
}
|
||||
|
||||
if (params.replacements) {
|
||||
if (!params.replacements.empty()) {
|
||||
std::string smi = lsmiles;
|
||||
bool loopAgain = true;
|
||||
while (loopAgain) {
|
||||
for (auto loopAgain = true; loopAgain;) {
|
||||
loopAgain = false;
|
||||
for (const auto &pr : *(params.replacements)) {
|
||||
for (const auto &pr : params.replacements) {
|
||||
if (smi.find(pr.first) != std::string::npos) {
|
||||
loopAgain = true;
|
||||
boost::replace_all(smi, pr.first, pr.second);
|
||||
@@ -349,21 +356,17 @@ void preprocessSmiles(const std::string &smiles, const T ¶ms,
|
||||
}
|
||||
} // namespace
|
||||
|
||||
Atom *SmilesToAtom(const std::string &smiles) {
|
||||
std::unique_ptr<Atom> AtomFromSmiles(const std::string &smiles) {
|
||||
yysmiles_debug = false;
|
||||
|
||||
Atom *res = nullptr;
|
||||
res = toAtom(smiles, smiles_atom_parse);
|
||||
return res;
|
||||
};
|
||||
return toAtom(smiles, smiles_atom_parse);
|
||||
}
|
||||
|
||||
Bond *SmilesToBond(const std::string &smiles) {
|
||||
std::unique_ptr<Bond> BondFromSmiles(const std::string &smiles) {
|
||||
yysmiles_debug = false;
|
||||
|
||||
Bond *res = nullptr;
|
||||
res = toBond(smiles, smiles_bond_parse);
|
||||
return res;
|
||||
};
|
||||
return toBond(smiles, smiles_bond_parse);
|
||||
}
|
||||
|
||||
namespace {
|
||||
template <typename T>
|
||||
@@ -381,14 +384,12 @@ void handleCXPartAndName(RWMol *res, const T ¶ms, const std::string &cxPart,
|
||||
} catch (...) {
|
||||
cxfailed = true;
|
||||
if (params.strictCXSMILES) {
|
||||
delete res;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
res->setProp("_CXSMILES_Data", std::string(cxPart.cbegin(), pos));
|
||||
} else if (params.strictCXSMILES && !params.parseName &&
|
||||
pos != cxPart.cend()) {
|
||||
delete res;
|
||||
throw RDKit::SmilesParseException(
|
||||
"CXSMILES extension does not start with | and parseName=false");
|
||||
}
|
||||
@@ -400,9 +401,9 @@ void handleCXPartAndName(RWMol *res, const T ¶ms, const std::string &cxPart,
|
||||
}
|
||||
} // namespace
|
||||
|
||||
RWMol *SmilesToMol(const std::string &smiles,
|
||||
const SmilesParserParams ¶ms) {
|
||||
// Calling SmilesToMol in a multithreaded context is generally safe *unless*
|
||||
std::unique_ptr<RWMol> MolFromSmiles(const std::string &smiles,
|
||||
const SmilesParserParams ¶ms) {
|
||||
// Calling MolFromSmiles in a multithreaded context is generally safe *unless*
|
||||
// the value of debugParse is different for different threads. The if
|
||||
// statement below avoids a TSAN warning in the case where multiple threads
|
||||
// all use the same value for debugParse.
|
||||
@@ -414,20 +415,17 @@ RWMol *SmilesToMol(const std::string &smiles,
|
||||
preprocessSmiles(smiles, params, lsmiles, name, cxPart);
|
||||
// strip any leading/trailing whitespace:
|
||||
// boost::trim_if(smi,boost::is_any_of(" \t\r\n"));
|
||||
RWMol *res = nullptr;
|
||||
res = toMol(lsmiles, smiles_parse, lsmiles);
|
||||
|
||||
auto res = toMol(lsmiles, smiles_parse, lsmiles);
|
||||
if (!res) {
|
||||
return nullptr;
|
||||
return res;
|
||||
}
|
||||
handleCXPartAndName(res, params, cxPart, name);
|
||||
handleCXPartAndName(res.get(), params, cxPart, name);
|
||||
|
||||
const Conformer *conf = nullptr, *conf3d = nullptr;
|
||||
// get a conformer
|
||||
|
||||
const Conformer *conf = nullptr, *conf3d = nullptr;
|
||||
if (res && res->getNumConformers() > 0) {
|
||||
for (unsigned int confId = 0; confId < res->getNumConformers(); ++confId) {
|
||||
Conformer *testConf = &res->getConformer(confId);
|
||||
auto *testConf = &res->getConformer(confId);
|
||||
if (!testConf->is3D()) {
|
||||
if (conf == nullptr) { // only take the first 2d conf
|
||||
conf = testConf;
|
||||
@@ -460,26 +458,16 @@ RWMol *SmilesToMol(const std::string &smiles,
|
||||
MolOps::assignChiralTypesFrom3D(*res, conf3d->getId(), true);
|
||||
}
|
||||
if (conf || conf3d) {
|
||||
try {
|
||||
Atropisomers::detectAtropisomerChirality(*res, conf ? conf : conf3d);
|
||||
} catch (...) {
|
||||
delete res;
|
||||
throw;
|
||||
}
|
||||
Atropisomers::detectAtropisomerChirality(*res, conf ? conf : conf3d);
|
||||
}
|
||||
|
||||
if (res && (params.sanitize || params.removeHs)) {
|
||||
try {
|
||||
if (params.removeHs) {
|
||||
bool implicitOnly = false, updateExplicitCount = true;
|
||||
MolOps::removeHs(*res, implicitOnly, updateExplicitCount,
|
||||
params.sanitize);
|
||||
} else if (params.sanitize) {
|
||||
MolOps::sanitizeMol(*res);
|
||||
}
|
||||
} catch (...) {
|
||||
delete res;
|
||||
throw;
|
||||
if (params.removeHs) {
|
||||
bool implicitOnly = false, updateExplicitCount = true;
|
||||
MolOps::removeHs(*res, implicitOnly, updateExplicitCount,
|
||||
params.sanitize);
|
||||
} else if (params.sanitize) {
|
||||
MolOps::sanitizeMol(*res);
|
||||
}
|
||||
|
||||
if (res->hasProp(SmilesParseOps::detail::_needsDetectBondStereo)) {
|
||||
@@ -510,12 +498,12 @@ RWMol *SmilesToMol(const std::string &smiles,
|
||||
// need to:
|
||||
MolOps::fastFindRings(*res);
|
||||
}
|
||||
QueryOps::completeMolQueries(res, 0xDEADBEEF);
|
||||
QueryOps::completeMolQueries(res.get(), 0xDEADBEEF);
|
||||
}
|
||||
|
||||
if (res) {
|
||||
if (!params.skipCleanup) {
|
||||
SmilesParseOps::CleanupAfterParsing(res);
|
||||
SmilesParseOps::CleanupAfterParsing(res.get());
|
||||
}
|
||||
if (!name.empty()) {
|
||||
res->setProp(common_properties::_Name, name);
|
||||
@@ -524,25 +512,21 @@ RWMol *SmilesToMol(const std::string &smiles,
|
||||
return res;
|
||||
};
|
||||
|
||||
Atom *SmartsToAtom(const std::string &smiles) {
|
||||
std::unique_ptr<Atom> AtomFromSmarts(const std::string &smiles) {
|
||||
yysmarts_debug = false;
|
||||
|
||||
Atom *res = nullptr;
|
||||
res = toAtom(smiles, smarts_atom_parse);
|
||||
return res;
|
||||
return toAtom(smiles, smarts_atom_parse);
|
||||
};
|
||||
|
||||
Bond *SmartsToBond(const std::string &smiles) {
|
||||
std::unique_ptr<Bond> BondFromSmarts(const std::string &smiles) {
|
||||
yysmarts_debug = false;
|
||||
|
||||
Bond *res = nullptr;
|
||||
res = toBond(smiles, smarts_bond_parse);
|
||||
return res;
|
||||
return toBond(smiles, smarts_bond_parse);
|
||||
};
|
||||
|
||||
RWMol *SmartsToMol(const std::string &smarts,
|
||||
const SmartsParserParams ¶ms) {
|
||||
// Calling SmartsToMol in a multithreaded context is generally safe *unless*
|
||||
std::unique_ptr<RWMol> MolFromSmarts(const std::string &smarts,
|
||||
const SmartsParserParams ¶ms) {
|
||||
// Calling MolFromSmarts in a multithreaded context is generally safe *unless*
|
||||
// the value of debugParse is different for different threads. The if
|
||||
// statement below avoids a TSAN warning in the case where multiple threads
|
||||
// all use the same value for debugParse.
|
||||
@@ -553,21 +537,15 @@ RWMol *SmartsToMol(const std::string &smarts,
|
||||
std::string lsmarts, name, cxPart;
|
||||
preprocessSmiles(smarts, params, lsmarts, name, cxPart);
|
||||
|
||||
RWMol *res = nullptr;
|
||||
res = toMol(labelRecursivePatterns(lsmarts), smarts_parse, lsmarts);
|
||||
handleCXPartAndName(res, params, cxPart, name);
|
||||
auto res = toMol(labelRecursivePatterns(lsmarts), smarts_parse, lsmarts);
|
||||
handleCXPartAndName(res.get(), params, cxPart, name);
|
||||
if (res) {
|
||||
if (params.mergeHs) {
|
||||
try {
|
||||
MolOps::mergeQueryHs(*res);
|
||||
} catch (...) {
|
||||
delete res;
|
||||
throw;
|
||||
}
|
||||
MolOps::mergeQueryHs(*res);
|
||||
}
|
||||
MolOps::setBondStereoFromDirections(*res);
|
||||
if (!params.skipCleanup) {
|
||||
SmilesParseOps::CleanupAfterParsing(res);
|
||||
SmilesParseOps::CleanupAfterParsing(res.get());
|
||||
}
|
||||
if (!name.empty()) {
|
||||
res->setProp(common_properties::_Name, name);
|
||||
@@ -575,4 +553,6 @@ RWMol *SmartsToMol(const std::string &smarts,
|
||||
}
|
||||
return res;
|
||||
};
|
||||
} // namespace SmilesParse
|
||||
} // namespace v2
|
||||
} // namespace RDKit
|
||||
|
||||
@@ -11,13 +11,85 @@
|
||||
#ifndef RD_SMILESPARSE_H
|
||||
#define RD_SMILESPARSE_H
|
||||
|
||||
#include <GraphMol/RWMol.h>
|
||||
#include <GraphMol/SanitException.h>
|
||||
#include <string>
|
||||
#include <exception>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
|
||||
namespace RDKit {
|
||||
class RWMol;
|
||||
class Atom;
|
||||
class Bond;
|
||||
|
||||
namespace SmilesParse {
|
||||
class RDKIT_SMILESPARSE_EXPORT SmilesParseException : public std::exception {
|
||||
public:
|
||||
SmilesParseException(const char *msg) : _msg(msg) {}
|
||||
SmilesParseException(const std::string msg) : _msg(msg) {}
|
||||
const char *what() const noexcept override { return _msg.c_str(); }
|
||||
~SmilesParseException() noexcept override = default;
|
||||
|
||||
private:
|
||||
std::string _msg;
|
||||
};
|
||||
|
||||
} // namespace SmilesParse
|
||||
|
||||
namespace v2 {
|
||||
namespace SmilesParse {
|
||||
using RDKit::SmilesParse::SmilesParseException;
|
||||
|
||||
struct RDKIT_SMILESPARSE_EXPORT SmilesParserParams {
|
||||
bool sanitize = true; /**< sanitize the molecule after building it */
|
||||
std::map<std::string, std::string>
|
||||
replacements; /**< allows SMILES "macros" */
|
||||
bool allowCXSMILES = true; /**< recognize and parse CXSMILES*/
|
||||
bool strictCXSMILES =
|
||||
true; /**< throw an exception if the CXSMILES parsing fails */
|
||||
bool parseName = true; /**< parse (and set) the molecule name as well */
|
||||
bool removeHs = true; /**< remove Hs after constructing the molecule */
|
||||
bool skipCleanup =
|
||||
false; /**< skip the final cleanup stage */
|
||||
bool debugParse = false; /**< enable debugging in the SMILES parser*/
|
||||
};
|
||||
|
||||
struct RDKIT_SMILESPARSE_EXPORT SmartsParserParams {
|
||||
std::map<std::string, std::string>
|
||||
replacements; /**< allows SMARTS "macros" */
|
||||
bool allowCXSMILES = true; /**< recognize and parse CXSMILES extensions */
|
||||
bool strictCXSMILES =
|
||||
true; /**< throw an exception if the CXSMILES parsing fails */
|
||||
bool parseName = true; /**< parse (and set) the molecule name as well */
|
||||
bool mergeHs =
|
||||
false; /**< toggles merging H atoms in the SMARTS into neighboring atoms*/
|
||||
bool skipCleanup =
|
||||
false; /**< skip the final cleanup stage */
|
||||
bool debugParse = false; /**< enable debugging in the SMARTS parser*/
|
||||
};
|
||||
|
||||
RDKIT_SMILESPARSE_EXPORT std::unique_ptr<RDKit::RWMol> MolFromSmiles(
|
||||
const std::string &smi,
|
||||
const SmilesParserParams ¶ms = SmilesParserParams());
|
||||
RDKIT_SMILESPARSE_EXPORT std::unique_ptr<RDKit::RWMol> MolFromSmarts(
|
||||
const std::string &sma,
|
||||
const SmartsParserParams ¶ms = SmartsParserParams());
|
||||
|
||||
RDKIT_SMILESPARSE_EXPORT std::unique_ptr<RDKit::Atom> AtomFromSmiles(
|
||||
const std::string &smi);
|
||||
RDKIT_SMILESPARSE_EXPORT std::unique_ptr<RDKit::Bond> BondFromSmiles(
|
||||
const std::string &smi);
|
||||
|
||||
RDKIT_SMILESPARSE_EXPORT std::unique_ptr<RDKit::Atom> AtomFromSmarts(
|
||||
const std::string &sma);
|
||||
RDKIT_SMILESPARSE_EXPORT std::unique_ptr<RDKit::Bond> BondFromSmarts(
|
||||
const std::string &sma);
|
||||
|
||||
} // namespace SmilesParse
|
||||
} // namespace v2
|
||||
|
||||
inline namespace v1 {
|
||||
using RDKit::SmilesParse::SmilesParseException;
|
||||
|
||||
struct RDKIT_SMILESPARSE_EXPORT SmilesParserParams {
|
||||
int debugParse = 0; /**< enable debugging in the SMILES parser*/
|
||||
@@ -30,13 +102,48 @@ struct RDKIT_SMILESPARSE_EXPORT SmilesParserParams {
|
||||
bool parseName = true; /**< parse (and set) the molecule name as well */
|
||||
bool removeHs = true; /**< remove Hs after constructing the molecule */
|
||||
bool skipCleanup =
|
||||
false; /**< skip the final cleanup stage (for internal use) */
|
||||
false; /**< skip the final cleanup stage */
|
||||
};
|
||||
RDKIT_SMILESPARSE_EXPORT RWMol *SmilesToMol(const std::string &smi,
|
||||
const SmilesParserParams ¶ms);
|
||||
|
||||
RDKIT_SMILESPARSE_EXPORT Atom *SmilesToAtom(const std::string &smi);
|
||||
RDKIT_SMILESPARSE_EXPORT Bond *SmilesToBond(const std::string &smi);
|
||||
struct RDKIT_SMILESPARSE_EXPORT SmartsParserParams {
|
||||
int debugParse = 0; /**< enable debugging in the SMARTS parser*/
|
||||
std::map<std::string, std::string> *replacements =
|
||||
nullptr; /**< allows SMARTS "macros" */
|
||||
bool allowCXSMILES = true; /**< recognize and parse CXSMILES extensions */
|
||||
bool strictCXSMILES =
|
||||
true; /**< throw an exception if the CXSMILES parsing fails */
|
||||
bool parseName = true; /**< parse (and set) the molecule name as well */
|
||||
bool mergeHs =
|
||||
false; /**< toggles merging H atoms in the SMARTS into neighboring atoms*/
|
||||
bool skipCleanup =
|
||||
false; /**< skip the final cleanup stage */
|
||||
};
|
||||
|
||||
inline RDKit::RWMol *SmilesToMol(const std::string &smi,
|
||||
const SmilesParserParams &ps) {
|
||||
RDKit::v2::SmilesParse::SmilesParserParams v2ps;
|
||||
v2ps.debugParse = ps.debugParse;
|
||||
v2ps.sanitize = ps.sanitize;
|
||||
|
||||
if (ps.replacements) {
|
||||
v2ps.replacements = *ps.replacements;
|
||||
}
|
||||
v2ps.allowCXSMILES = ps.allowCXSMILES;
|
||||
v2ps.strictCXSMILES = ps.strictCXSMILES;
|
||||
v2ps.parseName = ps.parseName;
|
||||
v2ps.removeHs = ps.removeHs;
|
||||
v2ps.skipCleanup = ps.skipCleanup;
|
||||
return RDKit::v2::SmilesParse::MolFromSmiles(smi, v2ps).release();
|
||||
}
|
||||
|
||||
inline Atom *SmilesToAtom(const std::string &smi) {
|
||||
auto res = RDKit::v2::SmilesParse::AtomFromSmiles(smi).release();
|
||||
return res;
|
||||
}
|
||||
|
||||
inline Bond *SmilesToBond(const std::string &smi) {
|
||||
return RDKit::v2::SmilesParse::BondFromSmiles(smi).release();
|
||||
}
|
||||
|
||||
//! Construct a molecule from a SMILES string
|
||||
/*!
|
||||
@@ -68,9 +175,11 @@ RDKIT_SMILESPARSE_EXPORT Bond *SmilesToBond(const std::string &smi);
|
||||
inline RWMol *SmilesToMol(
|
||||
const std::string &smi, int debugParse = 0, bool sanitize = true,
|
||||
std::map<std::string, std::string> *replacements = nullptr) {
|
||||
SmilesParserParams params;
|
||||
RDKit::v2::SmilesParse::SmilesParserParams params;
|
||||
params.debugParse = debugParse;
|
||||
params.replacements = replacements;
|
||||
if (replacements) {
|
||||
params.replacements = *replacements;
|
||||
}
|
||||
if (sanitize) {
|
||||
params.sanitize = true;
|
||||
params.removeHs = true;
|
||||
@@ -78,24 +187,24 @@ inline RWMol *SmilesToMol(
|
||||
params.sanitize = false;
|
||||
params.removeHs = false;
|
||||
}
|
||||
return SmilesToMol(smi, params);
|
||||
return RDKit::v2::SmilesParse::MolFromSmiles(smi, params).release();
|
||||
};
|
||||
|
||||
struct RDKIT_SMILESPARSE_EXPORT SmartsParserParams {
|
||||
int debugParse = 0; /**< enable debugging in the SMARTS parser*/
|
||||
std::map<std::string, std::string> *replacements =
|
||||
nullptr; /**< allows SMARTS "macros" */
|
||||
bool allowCXSMILES = true; /**< recognize and parse CXSMILES extensions */
|
||||
bool strictCXSMILES =
|
||||
true; /**< throw an exception if the CXSMILES parsing fails */
|
||||
bool parseName = true; /**< parse (and set) the molecule name as well */
|
||||
bool mergeHs =
|
||||
true; /**< toggles merging H atoms in the SMARTS into neighboring atoms*/
|
||||
bool skipCleanup =
|
||||
false; /**< skip the final cleanup stage (for internal use) */
|
||||
};
|
||||
RDKIT_SMILESPARSE_EXPORT RWMol *SmartsToMol(const std::string &sma,
|
||||
const SmartsParserParams &ps);
|
||||
inline RWMol *SmartsToMol(const std::string &sma,
|
||||
const SmartsParserParams &ps) {
|
||||
RDKit::v2::SmilesParse::SmartsParserParams v2ps;
|
||||
v2ps.debugParse = ps.debugParse;
|
||||
if (ps.replacements) {
|
||||
v2ps.replacements = *ps.replacements;
|
||||
}
|
||||
v2ps.allowCXSMILES = ps.allowCXSMILES;
|
||||
v2ps.strictCXSMILES = ps.strictCXSMILES;
|
||||
v2ps.parseName = ps.parseName;
|
||||
v2ps.mergeHs = ps.mergeHs;
|
||||
v2ps.skipCleanup = ps.skipCleanup;
|
||||
|
||||
return RDKit::v2::SmilesParse::MolFromSmarts(sma, v2ps).release();
|
||||
}
|
||||
|
||||
//! Construct a molecule from a SMARTS string
|
||||
/*!
|
||||
@@ -112,45 +221,36 @@ RDKIT_SMILESPARSE_EXPORT RWMol *SmartsToMol(const std::string &sma,
|
||||
inline RWMol *SmartsToMol(
|
||||
const std::string &sma, int debugParse = 0, bool mergeHs = false,
|
||||
std::map<std::string, std::string> *replacements = nullptr) {
|
||||
SmartsParserParams ps;
|
||||
RDKit::v2::SmilesParse::SmartsParserParams ps;
|
||||
ps.debugParse = debugParse;
|
||||
ps.mergeHs = mergeHs;
|
||||
ps.replacements = replacements;
|
||||
return SmartsToMol(sma, ps);
|
||||
if (replacements) {
|
||||
ps.replacements = *replacements;
|
||||
}
|
||||
return RDKit::v2::SmilesParse::MolFromSmarts(sma, ps).release();
|
||||
};
|
||||
|
||||
RDKIT_SMILESPARSE_EXPORT Atom *SmartsToAtom(const std::string &sma);
|
||||
RDKIT_SMILESPARSE_EXPORT Bond *SmartsToBond(const std::string &sma);
|
||||
|
||||
class RDKIT_SMILESPARSE_EXPORT SmilesParseException : public std::exception {
|
||||
public:
|
||||
SmilesParseException(const char *msg) : _msg(msg) {}
|
||||
SmilesParseException(const std::string msg) : _msg(msg) {}
|
||||
const char *what() const noexcept override { return _msg.c_str(); }
|
||||
~SmilesParseException() noexcept override = default;
|
||||
|
||||
private:
|
||||
std::string _msg;
|
||||
};
|
||||
inline Atom *SmartsToAtom(const std::string &sma) {
|
||||
return RDKit::v2::SmilesParse::AtomFromSmarts(sma).release();
|
||||
}
|
||||
inline Bond *SmartsToBond(const std::string &sma) {
|
||||
return RDKit::v2::SmilesParse::BondFromSmarts(sma).release();
|
||||
}
|
||||
} // namespace v1
|
||||
|
||||
inline std::unique_ptr<RDKit::RWMol> operator"" _smiles(const char *text,
|
||||
size_t len) {
|
||||
std::string smi(text, len);
|
||||
RWMol *ptr = nullptr;
|
||||
try {
|
||||
ptr = SmilesToMol(smi);
|
||||
return v2::SmilesParse::MolFromSmiles(smi);
|
||||
} catch (const RDKit::MolSanitizeException &) {
|
||||
ptr = nullptr;
|
||||
return nullptr;
|
||||
}
|
||||
return std::unique_ptr<RWMol>(ptr);
|
||||
}
|
||||
inline std::unique_ptr<RDKit::RWMol> operator"" _smarts(const char *text,
|
||||
size_t len) {
|
||||
std::string smi(text, len);
|
||||
// no need for exception handling here: SmartsToMol() doesn't do
|
||||
// sanitization
|
||||
RWMol *ptr = SmartsToMol(smi);
|
||||
return std::unique_ptr<RWMol>(ptr);
|
||||
return v2::SmilesParse::MolFromSmarts(smi);
|
||||
}
|
||||
|
||||
} // namespace RDKit
|
||||
|
||||
69
Code/GraphMol/SmilesParse/v2catch_tests.cpp
Normal file
69
Code/GraphMol/SmilesParse/v2catch_tests.cpp
Normal file
@@ -0,0 +1,69 @@
|
||||
//
|
||||
// Copyright (C) 2023 Greg Landrum and other RDKit contributors
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
// The contents are covered by the terms of the BSD license
|
||||
// which is included in the file license.txt, found at the root
|
||||
// of the RDKit source tree.
|
||||
//
|
||||
|
||||
#include <catch2/catch_all.hpp>
|
||||
#ifdef RDK_BUILD_THREADSAFE_SSS
|
||||
#include <future>
|
||||
#include <thread>
|
||||
#endif
|
||||
|
||||
#include <GraphMol/RDKitBase.h>
|
||||
#include <GraphMol/MolPickler.h>
|
||||
#include <GraphMol/QueryAtom.h>
|
||||
#include <GraphMol/QueryBond.h>
|
||||
#include <GraphMol/Chirality.h>
|
||||
#include <GraphMol/SmilesParse/SmilesParse.h>
|
||||
#include <GraphMol/SmilesParse/SmilesWrite.h>
|
||||
#include <GraphMol/SmilesParse/SmartsWrite.h>
|
||||
|
||||
using namespace RDKit::v2;
|
||||
|
||||
TEST_CASE("v2 basics") {
|
||||
{
|
||||
auto mol = SmilesParse::MolFromSmiles("CCO[H]");
|
||||
REQUIRE(mol);
|
||||
CHECK(mol->getNumAtoms() == 3);
|
||||
}
|
||||
{
|
||||
SmilesParse::SmilesParserParams ps;
|
||||
ps.removeHs = false;
|
||||
auto mol = SmilesParse::MolFromSmiles("CCO[H]", ps);
|
||||
REQUIRE(mol);
|
||||
CHECK(mol->getNumAtoms() == 4);
|
||||
}
|
||||
{
|
||||
auto mol = SmilesParse::MolFromSmarts("[H]CC[R]");
|
||||
REQUIRE(mol);
|
||||
CHECK(mol->getNumAtoms() == 4);
|
||||
}
|
||||
{
|
||||
SmilesParse::SmartsParserParams ps;
|
||||
ps.mergeHs = true;
|
||||
auto mol = SmilesParse::MolFromSmarts("[H]CC[R]", ps);
|
||||
REQUIRE(mol);
|
||||
CHECK(mol->getNumAtoms() == 3);
|
||||
}
|
||||
{
|
||||
auto atm = SmilesParse::AtomFromSmiles("C");
|
||||
REQUIRE(atm);
|
||||
}
|
||||
{
|
||||
auto bnd = SmilesParse::BondFromSmiles("-");
|
||||
REQUIRE(bnd);
|
||||
}
|
||||
{
|
||||
auto atm = SmilesParse::AtomFromSmarts("[R]");
|
||||
REQUIRE(atm);
|
||||
}
|
||||
{
|
||||
auto bnd = SmilesParse::BondFromSmarts("@");
|
||||
REQUIRE(bnd);
|
||||
}
|
||||
}
|
||||
@@ -110,14 +110,14 @@
|
||||
%}
|
||||
|
||||
// ===== SmilesParseException =====
|
||||
%typemap(javabase) RDKit::SmilesParseException "java.lang.RuntimeException";
|
||||
%typemap(throws, throws="org.RDKit.SmilesParseException") RDKit::SmilesParseException {
|
||||
%typemap(javabase) RDKit::SmilesParse::SmilesParseException "java.lang.RuntimeException";
|
||||
%typemap(throws, throws="org.RDKit.SmilesParseException") RDKit::SmilesParse::SmilesParseException {
|
||||
jclass excep = jenv->FindClass("org/RDKit/SmilesParseException");
|
||||
if (excep)
|
||||
jenv->ThrowNew(excep, $1.what());
|
||||
return $null;
|
||||
}
|
||||
%typemap(javacode) RDKit::SmilesParseException %{
|
||||
%typemap(javacode) RDKit::SmilesParse::SmilesParseException %{
|
||||
public String getMessage() {
|
||||
return what();
|
||||
}
|
||||
|
||||
@@ -68,6 +68,8 @@
|
||||
%javaconst(1);
|
||||
#endif
|
||||
%include <GraphMol/FileParsers/FileParsers.h>
|
||||
%ignore RDKit::v2;
|
||||
%ignore RDKit::v2::SmilesParse;
|
||||
%include <GraphMol/SmilesParse/SmilesParse.h>
|
||||
%include <GraphMol/RWMol.h>
|
||||
|
||||
@@ -76,7 +78,7 @@
|
||||
std::map<std::string,std::string> *replacements=0){
|
||||
return RDKit::RWMOL_SPTR(RDKit::SmilesToMol(smi, debugParse, sanitize,replacements));
|
||||
}
|
||||
static RDKit::RWMOL_SPTR MolFromSmiles(const std::string &smi, const RDKit::SmilesParserParams ¶ms){
|
||||
static RDKit::RWMOL_SPTR MolFromSmiles(const std::string &smi, const RDKit::v1::SmilesParserParams ¶ms){
|
||||
return RDKit::RWMOL_SPTR(RDKit::SmilesToMol(smi, params));
|
||||
}
|
||||
static RDKit::RWMOL_SPTR MolFromSmarts(const std::string &sma,int debugParse=0,bool mergeHs=false,
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
%{
|
||||
#include <GraphMol/SmilesParse/SmilesParse.h>
|
||||
%}
|
||||
%ignore RDKit::v2;
|
||||
%ignore SmilesToMol;
|
||||
%ignore SmartsToMol;
|
||||
%include <GraphMol/SmilesParse/SmilesParse.h>
|
||||
|
||||
@@ -166,7 +166,7 @@ typedef unsigned long long int uintmax_t;
|
||||
%shared_ptr(RDKit::AtomValenceException)
|
||||
%shared_ptr(RDKit::AtomKekulizeException)
|
||||
%shared_ptr(RDKit::KekulizeException)
|
||||
%shared_ptr(RDKit::SmilesParseException)
|
||||
%shared_ptr(RDKit::SmilesParse::SmilesParseException)
|
||||
%shared_ptr(RDKit::RingInfo)
|
||||
%shared_ptr(RDKit::ChemicalReaction)
|
||||
%shared_ptr(ForceFields::ForceFieldContrib);
|
||||
|
||||
Reference in New Issue
Block a user