From 7d1e662bc70fdf6db4d04d42fda9995422a5b779 Mon Sep 17 00:00:00 2001 From: Ricardo Rodriguez Date: Thu, 9 Oct 2025 16:14:48 +0200 Subject: [PATCH] Use std::string_view for property keys (#8844) * string_view props API * wip * fix leak * add string_view to swig * fix comment * add backwards incompatibilty note * fix rebase issue --------- Co-authored-by: Greg Landrum --- .../FilterCatalog/FilterCatalogEntry.h | 44 ++--------- .../FilterCatalog/Wrap/FilterCatalog.cpp | 7 +- Code/GraphMol/Wrap/SubstanceGroup.cpp | 49 ++++++------ Code/JavaWrappers/Dict.i | 3 + Code/RDBoost/Wrap.h | 44 +---------- Code/RDBoost/Wrap/RDBase.cpp | 76 +++++++++++++++++++ Code/RDGeneral/Dict.h | 51 +++++++------ Code/RDGeneral/Exceptions.h | 5 ++ Code/RDGeneral/RDProps.h | 14 ++-- ReleaseNotes.md | 3 + 10 files changed, 158 insertions(+), 138 deletions(-) diff --git a/Code/GraphMol/FilterCatalog/FilterCatalogEntry.h b/Code/GraphMol/FilterCatalog/FilterCatalogEntry.h index c234f8805..87cc5e7a4 100644 --- a/Code/GraphMol/FilterCatalog/FilterCatalogEntry.h +++ b/Code/GraphMol/FilterCatalog/FilterCatalogEntry.h @@ -32,6 +32,7 @@ #include #ifndef __RD_FILTER_CATALOG_H__ #define __RD_FILTER_CATALOG_H__ +#include #include #include // For Dict @@ -110,13 +111,7 @@ class RDKIT_FILTERCATALOG_EXPORT FilterCatalogEntry \param val the value to be stored */ template - void setProp(const char *key, T val) { - std::string what(key); - setProp(what, val); - } - //! \overload - template - void setProp(const std::string &key, T val) { + void setProp(const std::string_view key, T val) { d_props.setVal(key, val); } @@ -137,52 +132,27 @@ class RDKIT_FILTERCATALOG_EXPORT FilterCatalogEntry */ template - void getProp(const char *key, T &res) const { + void getProp(const std::string_view key, T &res) const { d_props.getVal(key, res); } //! \overload template - void getProp(const std::string &key, T &res) const { - d_props.getVal(key, res); - } - //! \overload - template - T getProp(const char *key) const { - return d_props.getVal(key); - } - //! \overload - template - T getProp(const std::string &key) const { + T getProp(const std::string_view key) const { return d_props.getVal(key); } //! returns whether or not we have a \c property with name \c key //! and assigns the value if we do template - bool getPropIfPresent(const char *key, T &res) const { - return d_props.getValIfPresent(key, res); - } - //! \overload - template - bool getPropIfPresent(const std::string &key, T &res) const { + bool getPropIfPresent(const std::string_view key, T &res) const { return d_props.getValIfPresent(key, res); } //! returns whether or not we have a \c property with name \c key - bool hasProp(const char *key) const { return d_props.hasVal(key); } - //! \overload - bool hasProp(const std::string &key) const { - return d_props.hasVal(key); - // return hasProp(key.c_str()); - } + bool hasProp(const std::string_view key) const { return d_props.hasVal(key); } //! clears the value of a \c property - void clearProp(const char *key) { - std::string what(key); - clearProp(what); - } - //! \overload - void clearProp(const std::string &key) { d_props.clearVal(key); } + void clearProp(const std::string_view key) { d_props.clearVal(key); } // ------------------------------------------- //! Properties usually contain the reference and source diff --git a/Code/GraphMol/FilterCatalog/Wrap/FilterCatalog.cpp b/Code/GraphMol/FilterCatalog/Wrap/FilterCatalog.cpp index 0d46b3501..d3bd52c26 100644 --- a/Code/GraphMol/FilterCatalog/Wrap/FilterCatalog.cpp +++ b/Code/GraphMol/FilterCatalog/Wrap/FilterCatalog.cpp @@ -455,16 +455,17 @@ struct filtercat_wrapper { python::args("self")) .def("SetProp", (void (FilterCatalogEntry::*)( - const std::string &, + const std::string_view, std::string))&FilterCatalogEntry::setProp, python::args("self", "key", "val")) .def("GetProp", - (std::string (FilterCatalogEntry::*)(const std::string &) const) & + (std::string (FilterCatalogEntry::*)(const std::string_view) + const) & FilterCatalogEntry::getProp, python::args("self", "key")) .def("ClearProp", (void (FilterCatalogEntry::*)( - const std::string &))&FilterCatalogEntry::clearProp, + const std::string_view))&FilterCatalogEntry::clearProp, python::args("self", "key")); python::def( diff --git a/Code/GraphMol/Wrap/SubstanceGroup.cpp b/Code/GraphMol/Wrap/SubstanceGroup.cpp index 62f013e34..85ba45854 100644 --- a/Code/GraphMol/Wrap/SubstanceGroup.cpp +++ b/Code/GraphMol/Wrap/SubstanceGroup.cpp @@ -211,38 +211,39 @@ struct sgroup_wrap { python::args("self")) .def("SetProp", - (void (RDProps::*)(const std::string &, std::string, bool) const) & + (void (RDProps::*)(const std::string_view, std::string, bool) + const) & SubstanceGroup::setProp, (python::arg("self"), python::arg("key"), python::arg("val"), python::arg("computed") = false), "sets the value of a particular property") .def("SetDoubleProp", - (void (RDProps::*)(const std::string &, double, bool) const) & + (void (RDProps::*)(const std::string_view, double, bool) const) & SubstanceGroup::setProp, (python::arg("self"), python::arg("key"), python::arg("val"), python::arg("computed") = false), "sets the value of a particular property") .def("SetIntProp", - (void (RDProps::*)(const std::string &, int, bool) const) & + (void (RDProps::*)(const std::string_view, int, bool) const) & SubstanceGroup::setProp, (python::arg("self"), python::arg("key"), python::arg("val"), python::arg("computed") = false), "sets the value of a particular property") - .def( - "SetUnsignedProp", - (void (RDProps::*)(const std::string &, unsigned int, bool) const) & - SubstanceGroup::setProp, - (python::arg("self"), python::arg("key"), python::arg("val"), - python::arg("computed") = false), - "sets the value of a particular property") + .def("SetUnsignedProp", + (void (RDProps::*)(const std::string_view, unsigned int, bool) + const) & + SubstanceGroup::setProp, + (python::arg("self"), python::arg("key"), python::arg("val"), + python::arg("computed") = false), + "sets the value of a particular property") .def("SetBoolProp", - (void (RDProps::*)(const std::string &, bool, bool) const) & + (void (RDProps::*)(const std::string_view, bool, bool) const) & SubstanceGroup::setProp, (python::arg("self"), python::arg("key"), python::arg("val"), python::arg("computed") = false), "sets the value of a particular property") .def("HasProp", - (bool (RDProps::*)(const std::string &) const) & + (bool (RDProps::*)(const std::string_view) const) & SubstanceGroup::hasProp, python::args("self", "key"), "returns whether or not a particular property exists") @@ -260,37 +261,37 @@ struct sgroup_wrap { "will be raised.\n", boost::python::return_value_policy()) .def("GetIntProp", - (int (RDProps::*)(const std::string &) const) & + (int (RDProps::*)(const std::string_view) const) & SubstanceGroup::getProp, python::args("self", "key"), "returns the value of a particular property") .def("GetUnsignedProp", - (unsigned int (RDProps::*)(const std::string &) const) & + (unsigned int (RDProps::*)(const std::string_view) const) & SubstanceGroup::getProp, python::args("self", "key"), "returns the value of a particular property") .def("GetDoubleProp", - (double (RDProps::*)(const std::string &) const) & + (double (RDProps::*)(const std::string_view) const) & SubstanceGroup::getProp, python::args("self", "key"), "returns the value of a particular property") .def("GetBoolProp", - (bool (RDProps::*)(const std::string &) const) & + (bool (RDProps::*)(const std::string_view) const) & SubstanceGroup::getProp, python::args("self", "key"), "returns the value of a particular property") .def("GetUnsignedVectProp", - (std::vector (RDProps::*)(const std::string &) + (std::vector (RDProps::*)(const std::string_view) const) & SubstanceGroup::getProp>, python::args("self", "key"), "returns the value of a particular property") - .def( - "GetStringVectProp", - (std::vector (RDProps::*)(const std::string &) const) & - SubstanceGroup::getProp>, - python::args("self", "key"), - "returns the value of a particular property") + .def("GetStringVectProp", + (std::vector (RDProps::*)(const std::string_view) + const) & + SubstanceGroup::getProp>, + python::args("self", "key"), + "returns the value of a particular property") .def("GetPropNames", &SubstanceGroup::getPropList, (python::arg("self"), python::arg("includePrivate") = false, python::arg("includeComputed") = false), @@ -304,7 +305,7 @@ struct sgroup_wrap { "SubstanceGroup.\n" " n.b. some properties cannot be converted to python types.\n") .def("ClearProp", - (void (RDProps::*)(const std::string &) const) & + (void (RDProps::*)(const std::string_view) const) & SubstanceGroup::clearProp, python::args("self", "key"), "Removes a particular property (does nothing if not set).\n\n"); diff --git a/Code/JavaWrappers/Dict.i b/Code/JavaWrappers/Dict.i index d94d3889b..912e115ae 100644 --- a/Code/JavaWrappers/Dict.i +++ b/Code/JavaWrappers/Dict.i @@ -35,6 +35,9 @@ #include %} +// This requires SWIG 4.2 or higher +%include "std_string_view.i" + %ignore RDKit::Dict::Pair; %ignore RDKit::PairHolder; %include diff --git a/Code/RDBoost/Wrap.h b/Code/RDBoost/Wrap.h index b77913eb7..4cf0eead9 100644 --- a/Code/RDBoost/Wrap.h +++ b/Code/RDBoost/Wrap.h @@ -28,8 +28,9 @@ #include "list_indexing_suite.hpp" #include -#include #include +#include +#include #include namespace python = boost::python; @@ -322,47 +323,6 @@ struct iterable_converter { } }; -/// Simple Boost.Python custom converter from pathlib.Path to std::string -template -struct path_converter { - path_converter() { - python::converter::registry::push_back(&path_converter::convertible, - &path_converter::construct, - boost::python::type_id()); - } - - /// Check PyObject is a pathlib.Path - static void *convertible(PyObject *object) { - // paranoia - if (object == nullptr) { - return nullptr; - } - python::object boost_object(python::handle<>(python::borrowed(object))); - - std::string object_classname = boost::python::extract( - boost_object.attr("__class__").attr("__name__")); - // pathlib.Path is always specialized to the below derived classes - if (object_classname == "WindowsPath" || object_classname == "PosixPath") { - return object; - } - - return nullptr; - } - - /// Construct a std::string from pathlib.Path using its own __str__ attribute - static void construct( - PyObject *object, - boost::python::converter::rvalue_from_python_stage1_data *data) { - void *storage = - ((boost::python::converter::rvalue_from_python_storage *)data) - ->storage.bytes; - python::object boost_object{python::handle<>{python::borrowed(object)}}; - new (storage) - T{boost::python::extract{boost_object.attr("__str__")()}}; - data->convertible = storage; - } -}; - // // allows rdkit objects to be pickled. struct rdkit_pickle_suite : python::pickle_suite { diff --git a/Code/RDBoost/Wrap/RDBase.cpp b/Code/RDBoost/Wrap/RDBase.cpp index 723ce3eea..69031b106 100644 --- a/Code/RDBoost/Wrap/RDBase.cpp +++ b/Code/RDBoost/Wrap/RDBase.cpp @@ -234,6 +234,81 @@ struct python_ostream_wrapper { }; void seedRNG(unsigned int seed) { std::srand(seed); } + +/// Simple Boost.Python custom converter from pathlib.Path to std::string +template +struct path_converter { + path_converter() { + python::converter::registry::push_back(&path_converter::convertible, + &path_converter::construct, + boost::python::type_id()); + } + + /// Check PyObject is a pathlib.Path + static void *convertible(PyObject *object) { + // paranoia + if (object == nullptr) { + return nullptr; + } + python::object boost_object(python::handle<>(python::borrowed(object))); + + std::string object_classname = boost::python::extract( + boost_object.attr("__class__").attr("__name__")); + // pathlib.Path is always specialized to the below derived classes + if (object_classname == "WindowsPath" || object_classname == "PosixPath") { + return object; + } + + return nullptr; + } + + /// Construct a std::string from pathlib.Path using its own __str__ attribute + static void construct( + PyObject *object, + boost::python::converter::rvalue_from_python_stage1_data *data) { + void *storage = + ((boost::python::converter::rvalue_from_python_storage *)data) + ->storage.bytes; + python::object boost_object{python::handle<>{python::borrowed(object)}}; + new (storage) + T{boost::python::extract{boost_object.attr("__str__")()}}; + data->convertible = storage; + } +}; + +/// Convert a Python str to a std::string_view +template +struct string_view_converter { + string_view_converter() { + python::converter::registry::push_back(&string_view_converter::convertible, + &string_view_converter::construct, + boost::python::type_id()); + } + + /// Check PyObject is a str + static void *convertible(PyObject *object) { + if (object != nullptr && PyUnicode_Check(object)) { + return object; + } + return nullptr; + } + + /// Construct a std::string_view from the internal string of the PyObject. + /// This shouldn´t fail, as we block the Python thread until the C++ code + /// returns, so the PyObject and the internal string should remain valid. + static void construct( + PyObject *object, + boost::python::converter::rvalue_from_python_stage1_data *data) { + const char *tmp = PyUnicode_AsUTF8(object); + + void *storage = + ((boost::python::converter::rvalue_from_python_storage *)data) + ->storage.bytes; + new (storage) T{tmp}; + data->convertible = storage; + } +}; + } // namespace BOOST_PYTHON_MODULE(rdBase) { @@ -256,6 +331,7 @@ BOOST_PYTHON_MODULE(rdBase) { RegisterVectorConverter>("MatchTypeVect"); path_converter(); + string_view_converter(); RegisterListConverter(); RegisterListConverter>(); diff --git a/Code/RDGeneral/Dict.h b/Code/RDGeneral/Dict.h index fd3d41c35..90e9bb5ae 100644 --- a/Code/RDGeneral/Dict.h +++ b/Code/RDGeneral/Dict.h @@ -18,6 +18,7 @@ #include #include +#include #include #include "RDValue.h" #include "Exceptions.h" @@ -41,7 +42,9 @@ class RDKIT_RDGENERAL_EXPORT Dict { Pair() : key(), val() {} explicit Pair(std::string s) : key(std::move(s)), val() {} + explicit Pair(std::string_view s) : key(std::string(s)), val() {} Pair(std::string s, const RDValue &v) : key(std::move(s)), val(v) {} + Pair(std::string_view s, const RDValue &v) : key(std::string(s)), val(v) {} // In the case you are holding onto an rdvalue outside of a dictionary // or other container, you kust call cleanup to release non POD memory. void cleanup() { RDValue::cleanup_rdvalue(val); } @@ -146,7 +149,7 @@ class RDKIT_RDGENERAL_EXPORT Dict { //! \brief Returns whether or not the dictionary contains a particular //! key. - bool hasVal(const std::string &what) const { + bool hasVal(const std::string_view what) const { for (const auto &data : _data) { if (data.key == what) { return true; @@ -183,13 +186,13 @@ class RDKIT_RDGENERAL_EXPORT Dict { a KeyErrorException will be thrown. */ template - void getVal(const std::string &what, T &res) const { + void getVal(const std::string_view what, T &res) const { res = getVal(what); } //! \overload template - T getVal(const std::string &what) const { + T getVal(const std::string_view what) const { for (auto &data : _data) { if (data.key == what) { return from_rdvalue(data.val); @@ -199,7 +202,7 @@ class RDKIT_RDGENERAL_EXPORT Dict { } //! \overload - void getVal(const std::string &what, std::string &res) const { + void getVal(const std::string_view what, std::string &res) const { for (const auto &i : _data) { if (i.key == what) { rdvalue_tostring(i.val, res); @@ -224,7 +227,7 @@ class RDKIT_RDGENERAL_EXPORT Dict { a KeyErrorException will be thrown. */ template - bool getValIfPresent(const std::string &what, T &res) const { + bool getValIfPresent(const std::string_view what, T &res) const { for (const auto &data : _data) { if (data.key == what) { res = from_rdvalue(data.val); @@ -235,7 +238,7 @@ class RDKIT_RDGENERAL_EXPORT Dict { } //! \overload - bool getValIfPresent(const std::string &what, std::string &res) const { + bool getValIfPresent(const std::string_view what, std::string &res) const { for (const auto &i : _data) { if (i.key == what) { rdvalue_tostring(i.val, res); @@ -259,7 +262,7 @@ class RDKIT_RDGENERAL_EXPORT Dict { the value will be replaced. */ template - void setVal(const std::string &what, T &val) { + void setVal(const std::string_view what, T &val) { static_assert(!std::is_same_v, "T cannot be string_view"); _hasNonPodData = true; @@ -274,7 +277,7 @@ class RDKIT_RDGENERAL_EXPORT Dict { } template - void setPODVal(const std::string &what, T val) { + void setPODVal(const std::string_view what, T val) { static_assert(!std::is_same_v, "T cannot be string_view"); // don't change the hasNonPodData status @@ -288,20 +291,20 @@ class RDKIT_RDGENERAL_EXPORT Dict { _data.push_back(Pair(what, val)); } - void setVal(const std::string &what, bool val) { setPODVal(what, val); } + void setVal(const std::string_view what, bool val) { setPODVal(what, val); } - void setVal(const std::string &what, double val) { setPODVal(what, val); } + void setVal(const std::string_view what, double val) { setPODVal(what, val); } - void setVal(const std::string &what, float val) { setPODVal(what, val); } + void setVal(const std::string_view what, float val) { setPODVal(what, val); } - void setVal(const std::string &what, int val) { setPODVal(what, val); } + void setVal(const std::string_view what, int val) { setPODVal(what, val); } - void setVal(const std::string &what, unsigned int val) { + void setVal(const std::string_view what, unsigned int val) { setPODVal(what, val); } //! \overload - void setVal(const std::string &what, const char *val) { + void setVal(const std::string_view what, const char *val) { std::string h(val); setVal(what, h); } @@ -314,7 +317,7 @@ class RDKIT_RDGENERAL_EXPORT Dict { \param what the key to clear */ - void clearVal(const std::string &what) { + void clearVal(const std::string_view what) { for (DataType::iterator it = _data.begin(); it < _data.end(); ++it) { if (it->key == what) { if (_hasNonPodData) { @@ -346,7 +349,8 @@ class RDKIT_RDGENERAL_EXPORT Dict { }; template <> -inline std::string Dict::getVal(const std::string &what) const { +inline std::string Dict::getVal( + const std::string_view what) const { std::string res; getVal(what, res); return res; @@ -356,27 +360,24 @@ inline std::string Dict::getVal(const std::string &what) const { // Dict::Pairs require containers for memory management // This utility class covers cleanup and copying class PairHolder : public Dict::Pair { -public: - PairHolder() : Pair() {} - + public: + PairHolder() : Pair() {} + explicit PairHolder(const PairHolder &p) : Pair(p.key) { copy_rdvalue(this->val, p.val); } - explicit PairHolder(PairHolder&&p) : Pair(p.key) { + explicit PairHolder(PairHolder &&p) : Pair(p.key) { this->val = p.val; p.val.type = RDTypeTag::EmptyTag; } - explicit PairHolder(Dict::Pair&&p) : Pair(p.key) { + explicit PairHolder(Dict::Pair &&p) : Pair(p.key) { this->val = p.val; p.val.type = RDTypeTag::EmptyTag; } - ~PairHolder() { - RDValue::cleanup_rdvalue(this->val); - } - + ~PairHolder() { RDValue::cleanup_rdvalue(this->val); } }; } // namespace RDKit #endif diff --git a/Code/RDGeneral/Exceptions.h b/Code/RDGeneral/Exceptions.h index a2034b977..3a3aff14d 100644 --- a/Code/RDGeneral/Exceptions.h +++ b/Code/RDGeneral/Exceptions.h @@ -12,6 +12,7 @@ #define _RD_EXCEPTIONS_H #include #include +#include #include //! \brief Class to allow us to throw an \c IndexError from C++ and have @@ -59,6 +60,10 @@ class RDKIT_RDGENERAL_EXPORT KeyErrorException : public std::runtime_error { : std::runtime_error("KeyErrorException"), _key(key), _msg("Key Error: " + key) {} + KeyErrorException(std::string_view key) + : std::runtime_error("KeyErrorException"), + _key(key), + _msg("Key Error: " + _key) {} std::string key() const { return _key; } const char *what() const noexcept override { return _msg.c_str(); } diff --git a/Code/RDGeneral/RDProps.h b/Code/RDGeneral/RDProps.h index c17f5d212..07b3af740 100644 --- a/Code/RDGeneral/RDProps.h +++ b/Code/RDGeneral/RDProps.h @@ -74,12 +74,12 @@ class RDProps { //! \overload template - void setProp(const std::string &key, T val, bool computed = false) const { + void setProp(const std::string_view key, T val, bool computed = false) const { if (computed) { STR_VECT compLst; getPropIfPresent(RDKit::detail::computedPropName, compLst); if (std::find(compLst.begin(), compLst.end(), key) == compLst.end()) { - compLst.push_back(key); + compLst.emplace_back(key); d_props.setVal(RDKit::detail::computedPropName, compLst); } } @@ -104,13 +104,13 @@ class RDProps { */ //! \overload template - void getProp(const std::string &key, T &res) const { + void getProp(const std::string_view key, T &res) const { d_props.getVal(key, res); } //! \overload template - T getProp(const std::string &key) const { + T getProp(const std::string_view key) const { return d_props.getVal(key); } @@ -118,12 +118,12 @@ class RDProps { //! and assigns the value if we do //! \overload template - bool getPropIfPresent(const std::string &key, T &res) const { + bool getPropIfPresent(const std::string_view key, T &res) const { return d_props.getValIfPresent(key, res); } //! \overload - bool hasProp(const std::string &key) const { return d_props.hasVal(key); } + bool hasProp(const std::string_view key) const { return d_props.hasVal(key); } //! clears the value of a \c property /*! @@ -133,7 +133,7 @@ class RDProps { from our list of \c computedProperties */ //! \overload - void clearProp(const std::string &key) const { + void clearProp(const std::string_view key) const { STR_VECT compLst; if (getPropIfPresent(RDKit::detail::computedPropName, compLst)) { auto svi = std::find(compLst.begin(), compLst.end(), key); diff --git a/ReleaseNotes.md b/ReleaseNotes.md index cdb0263f5..f9b73756a 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -9,6 +9,9 @@ GitHub) ## Highlights ## Backwards incompatible changes: +- The `Dict` class (and therefore all the properties interfaces) has been updated + to `std::string_view` keys. This is transparent to the Python interfaces, + but some C++ class might have to be updated. - Simple AND queries are now merged into atoms. E.g. `[C&+]` now produces the the same result as `[C+]` when parsed as SMARTS.