mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
Hide data representation inside RDKit::Dict (#9113)
* Remove Dict::getData() for a strict abstraction boundary Replace direct access to Dict's internal std::vector<Pair> with encapsulated methods: size(), empty(), const iteration via begin()/end(), appendPair(), markNonPOD(), and getRawVal(). This enables future changes to Dict's internal representation without breaking callers. Ref: rdkit/rdkit#9112 * Harden Dict::appendPair to take a populated Pair by move appendPair(Pair&&) now auto-detects non-POD status via RDValue::needsCleanup(), eliminating markNonPOD() and the risk of dangling references or uninitialized entries. needsCleanup() is placed next to destroy() on RDValue to keep the POD/non-POD distinction in one place. * Remove vestigial dictHasNonPOD param from streamReadProp Both callers ignored the output. Non-POD detection is now handled by Dict::appendPair via RDValue::needsCleanup(). * unbork java build * Address PR review: bulk append, rename getRawVal, add custom data test - Add Dict::append(vector<Pair>&&) for bulk insertion with reserve - Use bulk append in streamReadProps to restore pre-allocation - Rename getRawVal -> getRDValue per reviewer preference - Add test verifying custom AnyTag data is destroyed through Dict lifecycle * heed self-review * don't manually implement vec.insert * Add test: ExplicitBitVect round-trip through Dict serialization Exercises the full streamWriteProps/streamReadProps path with an ExplicitBitVect in an RDProps Dict, confirming the custom handler is invoked and no memory is leaked (verified under valgrind). * in anyTag test, assert destructors ran a specific number of times. --------- Co-authored-by: bddap (Coding Agent) <andrew+bot@dirksen.com>
This commit is contained in:
@@ -1374,7 +1374,7 @@ TEST_CASE("test16BitVectProps") {
|
||||
|
||||
Dict d;
|
||||
d.setVal<ExplicitBitVect>("exp", bv);
|
||||
RDValue &value = d.getData()[0].val;
|
||||
const RDValue &value = d.getRDValue("exp");
|
||||
|
||||
DataStructsExplicitBitVecPropHandler bv_handler;
|
||||
std::vector<CustomPropHandler *> handlers = {&bv_handler, bv_handler.clone()};
|
||||
@@ -1392,6 +1392,30 @@ TEST_CASE("test16BitVectProps") {
|
||||
delete handlers[1];
|
||||
}
|
||||
|
||||
TEST_CASE("ExplicitBitVect round-trips through streamWriteProps/streamReadProps") {
|
||||
ExplicitBitVect bv(64);
|
||||
for (unsigned int i = 0; i < 64; i += 3) {
|
||||
bv.setBit(i);
|
||||
}
|
||||
|
||||
CustomPropHandlerVec handlers;
|
||||
handlers.push_back(std::shared_ptr<const CustomPropHandler>(
|
||||
new DataStructsExplicitBitVecPropHandler));
|
||||
|
||||
RDProps src;
|
||||
src.setProp<ExplicitBitVect>("bv", bv);
|
||||
src.setProp<int>("num", 42);
|
||||
|
||||
std::stringstream ss;
|
||||
streamWriteProps(ss, src, false, false, handlers);
|
||||
|
||||
RDProps dst;
|
||||
streamReadProps(ss, dst, handlers);
|
||||
|
||||
REQUIRE(dst.getProp<int>("num") == 42);
|
||||
REQUIRE(dst.getProp<ExplicitBitVect>("bv") == bv);
|
||||
}
|
||||
|
||||
TEST_CASE("test17Github3994") {
|
||||
SparseIntVect<std::uint32_t> siv1(128);
|
||||
siv1.setVal(0, 3);
|
||||
|
||||
@@ -137,7 +137,7 @@ void copyProperties(
|
||||
// since we don't want to export these.
|
||||
origin.clearComputedProps();
|
||||
|
||||
for (const auto &prop : origin.getDict().getData()) {
|
||||
for (const auto &prop : origin.getDict()) {
|
||||
// Skip the property holding the names of the computed properties
|
||||
if (prop.key == detail::computedPropName) {
|
||||
continue;
|
||||
|
||||
@@ -6204,7 +6204,7 @@ void check_roundtripped_properties(RDProps &original, RDProps &roundtrip) {
|
||||
REQUIRE(std::includes(roundtripPropNames.begin(), roundtripPropNames.end(),
|
||||
originalPropNames.begin(), originalPropNames.end()));
|
||||
|
||||
for (const auto &o : original.getDict().getData()) {
|
||||
for (const auto &o : original.getDict()) {
|
||||
if (o.key == detail::computedPropName) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -245,9 +245,9 @@ void addBond(const Bond &bond, bj::object &bjBond, const bj::object &bjDefaults,
|
||||
template <typename T>
|
||||
void addProperties(const T &obj, const std::vector<std::string> &propNames,
|
||||
bj::object &properties) {
|
||||
const auto &data = obj.getDict().getData();
|
||||
const auto &rd_dict = obj.getDict();
|
||||
|
||||
for (auto &rdvalue : data) {
|
||||
for (const auto &rdvalue : rd_dict) {
|
||||
if (std::find(propNames.begin(), propNames.end(), rdvalue.key) ==
|
||||
propNames.end()) {
|
||||
continue;
|
||||
|
||||
@@ -632,8 +632,7 @@ Query<int, T const *, true> *buildBaseQuery(std::istream &ss, T const *owner,
|
||||
double tolerance{0.0};
|
||||
streamRead(ss, tolerance, version);
|
||||
PairHolder pair;
|
||||
bool hasNonPod = false;
|
||||
streamReadProp(ss, pair, hasNonPod, MolPickler::getCustomPropHandlers());
|
||||
streamReadProp(ss, pair, MolPickler::getCustomPropHandlers());
|
||||
switch (pair.val.getTag()) {
|
||||
case RDTypeTag::IntTag:
|
||||
res = makePropQuery<T, int>(pair.key, rdvalue_cast<int>(pair.val),
|
||||
|
||||
@@ -99,11 +99,10 @@ boost::python::dict GetPropsAsDict(const T &obj, bool includePrivate,
|
||||
bool includeComputed,
|
||||
bool autoConvertStrings = true) {
|
||||
boost::python::dict dict;
|
||||
auto &rd_dict = obj.getDict();
|
||||
auto &data = rd_dict.getData();
|
||||
const auto &rd_dict = obj.getDict();
|
||||
|
||||
STR_VECT keys = obj.getPropList(includePrivate, includeComputed);
|
||||
for (auto &rdvalue : data) {
|
||||
for (const auto &rdvalue : rd_dict) {
|
||||
if (std::find(keys.begin(), keys.end(), rdvalue.key) == keys.end()) {
|
||||
continue;
|
||||
}
|
||||
@@ -242,8 +241,8 @@ PyObject *GetPyProp(const RDOb *obj, const std::string &key, bool autoConvert) {
|
||||
return nullptr;
|
||||
}
|
||||
} else {
|
||||
const auto &data = obj->getDict().getData();
|
||||
for (auto &rdvalue : data) {
|
||||
const auto &rd_dict = obj->getDict();
|
||||
for (const auto &rdvalue : rd_dict) {
|
||||
if (rdvalue.key == key) {
|
||||
try {
|
||||
const auto tag = rdvalue.val.getTag();
|
||||
|
||||
@@ -2571,14 +2571,14 @@ void check_dest(RWMol *m1, const ROMol &m2) {
|
||||
CHECK(m1->getNumAtoms() == 0);
|
||||
CHECK(m1->getNumBonds() == 0);
|
||||
CHECK(m1->getPropList().empty());
|
||||
CHECK(m1->getDict().getData().empty());
|
||||
CHECK(m1->getDict().empty());
|
||||
CHECK(m1->getStereoGroups().empty());
|
||||
CHECK(getSubstanceGroups(*m1).empty());
|
||||
CHECK(m1->getRingInfo() == nullptr);
|
||||
|
||||
// make sure we can still do something with m1:
|
||||
*m1 = m2;
|
||||
CHECK(!m1->getDict().getData().empty());
|
||||
CHECK(!m1->getDict().empty());
|
||||
CHECK(m1->getNumAtoms() == 8);
|
||||
CHECK(m1->getNumBonds() == 7);
|
||||
CHECK(m1->getRingInfo() != nullptr);
|
||||
|
||||
@@ -40,6 +40,11 @@
|
||||
|
||||
%ignore RDKit::Dict::Pair;
|
||||
%ignore RDKit::PairHolder;
|
||||
%ignore RDKit::Dict::extend;
|
||||
%ignore RDKit::Dict::getRDValue;
|
||||
%ignore RDKit::Dict::begin;
|
||||
%ignore RDKit::Dict::end;
|
||||
%ignore RDKit::Dict::const_iterator;
|
||||
%include <RDGeneral/Dict.h>
|
||||
|
||||
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
#ifndef RD_DICT_H_012020
|
||||
#define RD_DICT_H_012020
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
@@ -136,14 +135,41 @@ class RDKIT_RDGENERAL_EXPORT Dict {
|
||||
}
|
||||
|
||||
//----------------------------------------------------------
|
||||
//! \brief Access to the underlying non-POD containment flag
|
||||
//! This is meant to be used only in bulk updates of _data.
|
||||
bool &getNonPODStatus() { return _hasNonPodData; }
|
||||
//! \brief Returns the number of entries in the dictionary
|
||||
std::size_t size() const { return _data.size(); }
|
||||
|
||||
//----------------------------------------------------------
|
||||
//! \brief Access to the underlying data.
|
||||
const DataType &getData() const { return _data; }
|
||||
DataType &getData() { return _data; }
|
||||
//! \brief Returns whether the dictionary is empty
|
||||
bool empty() const { return _data.empty(); }
|
||||
|
||||
using const_iterator = DataType::const_iterator;
|
||||
const_iterator begin() const { return _data.begin(); }
|
||||
const_iterator end() const { return _data.end(); }
|
||||
|
||||
//! \brief Appends a populated Pair to the dictionary.
|
||||
void insert(Pair &&pair) {
|
||||
_hasNonPodData |= pair.val.needsCleanup();
|
||||
_data.push_back(std::move(pair));
|
||||
}
|
||||
|
||||
//! \brief Bulk-appends a vector of Pairs, moving them into the dictionary.
|
||||
void extend(std::vector<Pair> &&pairs) {
|
||||
for (auto &p : pairs) {
|
||||
_hasNonPodData |= p.val.needsCleanup();
|
||||
}
|
||||
_data.insert(_data.end(), std::make_move_iterator(pairs.begin()),
|
||||
std::make_move_iterator(pairs.end()));
|
||||
}
|
||||
|
||||
//! \brief Returns a const reference to the RDValue for a key.
|
||||
//! Throws KeyErrorException if the key is not found.
|
||||
const RDValue &getRDValue(const std::string_view what) const {
|
||||
for (const auto &data : _data) {
|
||||
if (data.key == what) {
|
||||
return data.val;
|
||||
}
|
||||
}
|
||||
throw KeyErrorException(what);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------
|
||||
|
||||
@@ -349,7 +375,7 @@ class RDKIT_RDGENERAL_EXPORT Dict {
|
||||
|
||||
*/
|
||||
void clearVal(const std::string_view what) {
|
||||
for (DataType::iterator it = _data.begin(); it < _data.end(); ++it) {
|
||||
for (auto it = _data.begin(); it < _data.end(); ++it) {
|
||||
if (it->key == what) {
|
||||
if (_hasNonPodData) {
|
||||
RDValue::cleanup_rdvalue(it->val);
|
||||
|
||||
@@ -262,6 +262,22 @@ struct RDValue {
|
||||
// be wrapped in a container.
|
||||
// The idea is that POD types don't need to be destroyed
|
||||
// and this allows the container optimization possibilities.
|
||||
bool needsCleanup() const {
|
||||
switch (type) {
|
||||
case RDTypeTag::StringTag:
|
||||
case RDTypeTag::AnyTag:
|
||||
case RDTypeTag::VecDoubleTag:
|
||||
case RDTypeTag::VecFloatTag:
|
||||
case RDTypeTag::VecIntTag:
|
||||
case RDTypeTag::VecUnsignedIntTag:
|
||||
case RDTypeTag::VecStringTag:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Keep in sync with needsCleanup() above.
|
||||
void destroy() {
|
||||
switch (type) {
|
||||
case RDTypeTag::StringTag:
|
||||
|
||||
@@ -501,7 +501,7 @@ inline bool streamWriteProps(
|
||||
|
||||
const Dict &dict = props.getDict();
|
||||
COUNT_TYPE count = 0;
|
||||
for (const auto &elem : dict.getData()) {
|
||||
for (const auto &elem : dict) {
|
||||
if (propnames.find(elem.key) != propnames.end()) {
|
||||
if (isSerializable(elem, handlers)) {
|
||||
count++;
|
||||
@@ -514,7 +514,7 @@ inline bool streamWriteProps(
|
||||
}
|
||||
|
||||
COUNT_TYPE writtenCount = 0;
|
||||
for (const auto &elem : dict.getData()) {
|
||||
for (const auto &elem : dict) {
|
||||
if (propnames.find(elem.key) != propnames.end()) {
|
||||
if (isSerializable(elem, handlers)) {
|
||||
// note - not all properties are serializable, this may be
|
||||
@@ -559,7 +559,6 @@ inline void readRDStringVecValue(std::istream &ss, RDValue &value) {
|
||||
}
|
||||
|
||||
inline bool streamReadProp(std::istream &ss, Dict::Pair &pair,
|
||||
bool &dictHasNonPOD,
|
||||
const CustomPropHandlerVec &handlers = {}) {
|
||||
int version = 0;
|
||||
streamRead(ss, pair.key, version);
|
||||
@@ -585,27 +584,21 @@ inline bool streamReadProp(std::istream &ss, Dict::Pair &pair,
|
||||
|
||||
case DTags::StringTag:
|
||||
readRDValueString(ss, pair.val);
|
||||
dictHasNonPOD = true;
|
||||
break;
|
||||
case DTags::VecStringTag:
|
||||
readRDStringVecValue(ss, pair.val);
|
||||
dictHasNonPOD = true;
|
||||
break;
|
||||
case DTags::VecIntTag:
|
||||
readRDVecValue<int>(ss, pair.val);
|
||||
dictHasNonPOD = true;
|
||||
break;
|
||||
case DTags::VecUIntTag:
|
||||
readRDVecValue<unsigned int>(ss, pair.val);
|
||||
dictHasNonPOD = true;
|
||||
break;
|
||||
case DTags::VecFloatTag:
|
||||
readRDVecValue<float>(ss, pair.val);
|
||||
dictHasNonPOD = true;
|
||||
break;
|
||||
case DTags::VecDoubleTag:
|
||||
readRDVecValue<double>(ss, pair.val);
|
||||
dictHasNonPOD = true;
|
||||
break;
|
||||
case DTags::CustomTag: {
|
||||
std::string propType;
|
||||
@@ -614,7 +607,6 @@ inline bool streamReadProp(std::istream &ss, Dict::Pair &pair,
|
||||
for (auto &handler : handlers) {
|
||||
if (propType == handler->getPropName()) {
|
||||
handler->read(ss, pair.val);
|
||||
dictHasNonPOD = true;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -638,13 +630,12 @@ inline unsigned int streamReadProps(std::istream &ss, RDProps &props,
|
||||
if (reset) {
|
||||
dict.reset(); // Clear data before repopulating
|
||||
}
|
||||
auto startSz = dict.getData().size();
|
||||
dict.getData().resize(startSz + count);
|
||||
std::vector<Dict::Pair> pairs(count);
|
||||
for (unsigned index = 0; index < count; ++index) {
|
||||
CHECK_INVARIANT(streamReadProp(ss, dict.getData()[startSz + index],
|
||||
dict.getNonPODStatus(), handlers),
|
||||
CHECK_INVARIANT(streamReadProp(ss, pairs[index], handlers),
|
||||
"Corrupted property serialization detected");
|
||||
}
|
||||
dict.extend(std::move(pairs));
|
||||
|
||||
return static_cast<unsigned int>(count);
|
||||
}
|
||||
|
||||
@@ -633,7 +633,7 @@ TEST_CASE("testCustomProps") {
|
||||
Foo f(1, 2.f);
|
||||
Dict d;
|
||||
d.setVal<Foo>("foo", f);
|
||||
RDValue &value = d.getData()[0].val;
|
||||
const RDValue &value = d.getRDValue("foo");
|
||||
FooHandler foo_handler;
|
||||
std::vector<CustomPropHandler *> handlers = {&foo_handler,
|
||||
foo_handler.clone()};
|
||||
@@ -652,6 +652,41 @@ TEST_CASE("testCustomProps") {
|
||||
delete handlers[1];
|
||||
}
|
||||
|
||||
struct Bar {
|
||||
int x{0};
|
||||
int *dtor_count{nullptr};
|
||||
Bar() = default;
|
||||
Bar(int x, int *count) : x(x), dtor_count(count) {}
|
||||
Bar(const Bar &o) = default;
|
||||
Bar &operator=(const Bar &) = default;
|
||||
~Bar() {
|
||||
if (dtor_count) {
|
||||
++(*dtor_count);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST_CASE("custom AnyTag data is destroyed through Dict lifecycle") {
|
||||
int count = 0;
|
||||
{
|
||||
Dict d;
|
||||
Bar b(42, &count);
|
||||
d.setVal<Bar>("mybar", b);
|
||||
REQUIRE(d.getVal<Bar>("mybar").x == 42);
|
||||
}
|
||||
REQUIRE(count == 3);
|
||||
|
||||
count = 0;
|
||||
{
|
||||
Dict d;
|
||||
Bar b(7, &count);
|
||||
d.setVal<Bar>("mybar", b);
|
||||
Dict d2(d);
|
||||
REQUIRE(d2.getVal<Bar>("mybar").x == 7);
|
||||
}
|
||||
REQUIRE(count == 4);
|
||||
}
|
||||
|
||||
TEST_CASE("testGithub2910") {
|
||||
Dict d;
|
||||
d.setVal("foo", 1.0);
|
||||
|
||||
Reference in New Issue
Block a user