Hide data representation inside RDKit::Dict (#9113)

* Remove Dict::getData() for a strict abstraction boundary

Replace direct access to Dict's internal std::vector<Pair> with
encapsulated methods: size(), empty(), const iteration via
begin()/end(), appendPair(), markNonPOD(), and getRawVal().

This enables future changes to Dict's internal representation
without breaking callers.

Ref: rdkit/rdkit#9112

* Harden Dict::appendPair to take a populated Pair by move

appendPair(Pair&&) now auto-detects non-POD status via
RDValue::needsCleanup(), eliminating markNonPOD() and the
risk of dangling references or uninitialized entries.

needsCleanup() is placed next to destroy() on RDValue to
keep the POD/non-POD distinction in one place.

* Remove vestigial dictHasNonPOD param from streamReadProp

Both callers ignored the output. Non-POD detection is now handled
by Dict::appendPair via RDValue::needsCleanup().

* unbork java build

* Address PR review: bulk append, rename getRawVal, add custom data test

- Add Dict::append(vector<Pair>&&) for bulk insertion with reserve
- Use bulk append in streamReadProps to restore pre-allocation
- Rename getRawVal -> getRDValue per reviewer preference
- Add test verifying custom AnyTag data is destroyed through Dict lifecycle

* heed self-review

* don't manually implement vec.insert

* Add test: ExplicitBitVect round-trip through Dict serialization

Exercises the full streamWriteProps/streamReadProps path with an
ExplicitBitVect in an RDProps Dict, confirming the custom handler
is invoked and no memory is leaked (verified under valgrind).

* in anyTag test, assert destructors ran a specific number of times.

---------

Co-authored-by: bddap (Coding Agent) <andrew+bot@dirksen.com>
This commit is contained in:
Andrew Dirksen
2026-03-19 22:58:36 -07:00
committed by GitHub
parent f7a3f044ff
commit cbedbb7819
12 changed files with 133 additions and 38 deletions

View File

@@ -1374,7 +1374,7 @@ TEST_CASE("test16BitVectProps") {
Dict d;
d.setVal<ExplicitBitVect>("exp", bv);
RDValue &value = d.getData()[0].val;
const RDValue &value = d.getRDValue("exp");
DataStructsExplicitBitVecPropHandler bv_handler;
std::vector<CustomPropHandler *> handlers = {&bv_handler, bv_handler.clone()};
@@ -1392,6 +1392,30 @@ TEST_CASE("test16BitVectProps") {
delete handlers[1];
}
TEST_CASE("ExplicitBitVect round-trips through streamWriteProps/streamReadProps") {
ExplicitBitVect bv(64);
for (unsigned int i = 0; i < 64; i += 3) {
bv.setBit(i);
}
CustomPropHandlerVec handlers;
handlers.push_back(std::shared_ptr<const CustomPropHandler>(
new DataStructsExplicitBitVecPropHandler));
RDProps src;
src.setProp<ExplicitBitVect>("bv", bv);
src.setProp<int>("num", 42);
std::stringstream ss;
streamWriteProps(ss, src, false, false, handlers);
RDProps dst;
streamReadProps(ss, dst, handlers);
REQUIRE(dst.getProp<int>("num") == 42);
REQUIRE(dst.getProp<ExplicitBitVect>("bv") == bv);
}
TEST_CASE("test17Github3994") {
SparseIntVect<std::uint32_t> siv1(128);
siv1.setVal(0, 3);

View File

@@ -137,7 +137,7 @@ void copyProperties(
// since we don't want to export these.
origin.clearComputedProps();
for (const auto &prop : origin.getDict().getData()) {
for (const auto &prop : origin.getDict()) {
// Skip the property holding the names of the computed properties
if (prop.key == detail::computedPropName) {
continue;

View File

@@ -6204,7 +6204,7 @@ void check_roundtripped_properties(RDProps &original, RDProps &roundtrip) {
REQUIRE(std::includes(roundtripPropNames.begin(), roundtripPropNames.end(),
originalPropNames.begin(), originalPropNames.end()));
for (const auto &o : original.getDict().getData()) {
for (const auto &o : original.getDict()) {
if (o.key == detail::computedPropName) {
continue;
}

View File

@@ -245,9 +245,9 @@ void addBond(const Bond &bond, bj::object &bjBond, const bj::object &bjDefaults,
template <typename T>
void addProperties(const T &obj, const std::vector<std::string> &propNames,
bj::object &properties) {
const auto &data = obj.getDict().getData();
const auto &rd_dict = obj.getDict();
for (auto &rdvalue : data) {
for (const auto &rdvalue : rd_dict) {
if (std::find(propNames.begin(), propNames.end(), rdvalue.key) ==
propNames.end()) {
continue;

View File

@@ -632,8 +632,7 @@ Query<int, T const *, true> *buildBaseQuery(std::istream &ss, T const *owner,
double tolerance{0.0};
streamRead(ss, tolerance, version);
PairHolder pair;
bool hasNonPod = false;
streamReadProp(ss, pair, hasNonPod, MolPickler::getCustomPropHandlers());
streamReadProp(ss, pair, MolPickler::getCustomPropHandlers());
switch (pair.val.getTag()) {
case RDTypeTag::IntTag:
res = makePropQuery<T, int>(pair.key, rdvalue_cast<int>(pair.val),

View File

@@ -99,11 +99,10 @@ boost::python::dict GetPropsAsDict(const T &obj, bool includePrivate,
bool includeComputed,
bool autoConvertStrings = true) {
boost::python::dict dict;
auto &rd_dict = obj.getDict();
auto &data = rd_dict.getData();
const auto &rd_dict = obj.getDict();
STR_VECT keys = obj.getPropList(includePrivate, includeComputed);
for (auto &rdvalue : data) {
for (const auto &rdvalue : rd_dict) {
if (std::find(keys.begin(), keys.end(), rdvalue.key) == keys.end()) {
continue;
}
@@ -242,8 +241,8 @@ PyObject *GetPyProp(const RDOb *obj, const std::string &key, bool autoConvert) {
return nullptr;
}
} else {
const auto &data = obj->getDict().getData();
for (auto &rdvalue : data) {
const auto &rd_dict = obj->getDict();
for (const auto &rdvalue : rd_dict) {
if (rdvalue.key == key) {
try {
const auto tag = rdvalue.val.getTag();

View File

@@ -2571,14 +2571,14 @@ void check_dest(RWMol *m1, const ROMol &m2) {
CHECK(m1->getNumAtoms() == 0);
CHECK(m1->getNumBonds() == 0);
CHECK(m1->getPropList().empty());
CHECK(m1->getDict().getData().empty());
CHECK(m1->getDict().empty());
CHECK(m1->getStereoGroups().empty());
CHECK(getSubstanceGroups(*m1).empty());
CHECK(m1->getRingInfo() == nullptr);
// make sure we can still do something with m1:
*m1 = m2;
CHECK(!m1->getDict().getData().empty());
CHECK(!m1->getDict().empty());
CHECK(m1->getNumAtoms() == 8);
CHECK(m1->getNumBonds() == 7);
CHECK(m1->getRingInfo() != nullptr);

View File

@@ -40,6 +40,11 @@
%ignore RDKit::Dict::Pair;
%ignore RDKit::PairHolder;
%ignore RDKit::Dict::extend;
%ignore RDKit::Dict::getRDValue;
%ignore RDKit::Dict::begin;
%ignore RDKit::Dict::end;
%ignore RDKit::Dict::const_iterator;
%include <RDGeneral/Dict.h>

View File

@@ -16,7 +16,6 @@
#ifndef RD_DICT_H_012020
#define RD_DICT_H_012020
#include <map>
#include <string>
#include <string_view>
#include <vector>
@@ -136,14 +135,41 @@ class RDKIT_RDGENERAL_EXPORT Dict {
}
//----------------------------------------------------------
//! \brief Access to the underlying non-POD containment flag
//! This is meant to be used only in bulk updates of _data.
bool &getNonPODStatus() { return _hasNonPodData; }
//! \brief Returns the number of entries in the dictionary
std::size_t size() const { return _data.size(); }
//----------------------------------------------------------
//! \brief Access to the underlying data.
const DataType &getData() const { return _data; }
DataType &getData() { return _data; }
//! \brief Returns whether the dictionary is empty
bool empty() const { return _data.empty(); }
using const_iterator = DataType::const_iterator;
const_iterator begin() const { return _data.begin(); }
const_iterator end() const { return _data.end(); }
//! \brief Appends a populated Pair to the dictionary.
void insert(Pair &&pair) {
_hasNonPodData |= pair.val.needsCleanup();
_data.push_back(std::move(pair));
}
//! \brief Bulk-appends a vector of Pairs, moving them into the dictionary.
void extend(std::vector<Pair> &&pairs) {
for (auto &p : pairs) {
_hasNonPodData |= p.val.needsCleanup();
}
_data.insert(_data.end(), std::make_move_iterator(pairs.begin()),
std::make_move_iterator(pairs.end()));
}
//! \brief Returns a const reference to the RDValue for a key.
//! Throws KeyErrorException if the key is not found.
const RDValue &getRDValue(const std::string_view what) const {
for (const auto &data : _data) {
if (data.key == what) {
return data.val;
}
}
throw KeyErrorException(what);
}
//----------------------------------------------------------
@@ -349,7 +375,7 @@ class RDKIT_RDGENERAL_EXPORT Dict {
*/
void clearVal(const std::string_view what) {
for (DataType::iterator it = _data.begin(); it < _data.end(); ++it) {
for (auto it = _data.begin(); it < _data.end(); ++it) {
if (it->key == what) {
if (_hasNonPodData) {
RDValue::cleanup_rdvalue(it->val);

View File

@@ -262,6 +262,22 @@ struct RDValue {
// be wrapped in a container.
// The idea is that POD types don't need to be destroyed
// and this allows the container optimization possibilities.
bool needsCleanup() const {
switch (type) {
case RDTypeTag::StringTag:
case RDTypeTag::AnyTag:
case RDTypeTag::VecDoubleTag:
case RDTypeTag::VecFloatTag:
case RDTypeTag::VecIntTag:
case RDTypeTag::VecUnsignedIntTag:
case RDTypeTag::VecStringTag:
return true;
default:
return false;
}
}
// Keep in sync with needsCleanup() above.
void destroy() {
switch (type) {
case RDTypeTag::StringTag:

View File

@@ -501,7 +501,7 @@ inline bool streamWriteProps(
const Dict &dict = props.getDict();
COUNT_TYPE count = 0;
for (const auto &elem : dict.getData()) {
for (const auto &elem : dict) {
if (propnames.find(elem.key) != propnames.end()) {
if (isSerializable(elem, handlers)) {
count++;
@@ -514,7 +514,7 @@ inline bool streamWriteProps(
}
COUNT_TYPE writtenCount = 0;
for (const auto &elem : dict.getData()) {
for (const auto &elem : dict) {
if (propnames.find(elem.key) != propnames.end()) {
if (isSerializable(elem, handlers)) {
// note - not all properties are serializable, this may be
@@ -559,7 +559,6 @@ inline void readRDStringVecValue(std::istream &ss, RDValue &value) {
}
inline bool streamReadProp(std::istream &ss, Dict::Pair &pair,
bool &dictHasNonPOD,
const CustomPropHandlerVec &handlers = {}) {
int version = 0;
streamRead(ss, pair.key, version);
@@ -585,27 +584,21 @@ inline bool streamReadProp(std::istream &ss, Dict::Pair &pair,
case DTags::StringTag:
readRDValueString(ss, pair.val);
dictHasNonPOD = true;
break;
case DTags::VecStringTag:
readRDStringVecValue(ss, pair.val);
dictHasNonPOD = true;
break;
case DTags::VecIntTag:
readRDVecValue<int>(ss, pair.val);
dictHasNonPOD = true;
break;
case DTags::VecUIntTag:
readRDVecValue<unsigned int>(ss, pair.val);
dictHasNonPOD = true;
break;
case DTags::VecFloatTag:
readRDVecValue<float>(ss, pair.val);
dictHasNonPOD = true;
break;
case DTags::VecDoubleTag:
readRDVecValue<double>(ss, pair.val);
dictHasNonPOD = true;
break;
case DTags::CustomTag: {
std::string propType;
@@ -614,7 +607,6 @@ inline bool streamReadProp(std::istream &ss, Dict::Pair &pair,
for (auto &handler : handlers) {
if (propType == handler->getPropName()) {
handler->read(ss, pair.val);
dictHasNonPOD = true;
return true;
}
}
@@ -638,13 +630,12 @@ inline unsigned int streamReadProps(std::istream &ss, RDProps &props,
if (reset) {
dict.reset(); // Clear data before repopulating
}
auto startSz = dict.getData().size();
dict.getData().resize(startSz + count);
std::vector<Dict::Pair> pairs(count);
for (unsigned index = 0; index < count; ++index) {
CHECK_INVARIANT(streamReadProp(ss, dict.getData()[startSz + index],
dict.getNonPODStatus(), handlers),
CHECK_INVARIANT(streamReadProp(ss, pairs[index], handlers),
"Corrupted property serialization detected");
}
dict.extend(std::move(pairs));
return static_cast<unsigned int>(count);
}

View File

@@ -633,7 +633,7 @@ TEST_CASE("testCustomProps") {
Foo f(1, 2.f);
Dict d;
d.setVal<Foo>("foo", f);
RDValue &value = d.getData()[0].val;
const RDValue &value = d.getRDValue("foo");
FooHandler foo_handler;
std::vector<CustomPropHandler *> handlers = {&foo_handler,
foo_handler.clone()};
@@ -652,6 +652,41 @@ TEST_CASE("testCustomProps") {
delete handlers[1];
}
struct Bar {
int x{0};
int *dtor_count{nullptr};
Bar() = default;
Bar(int x, int *count) : x(x), dtor_count(count) {}
Bar(const Bar &o) = default;
Bar &operator=(const Bar &) = default;
~Bar() {
if (dtor_count) {
++(*dtor_count);
}
}
};
TEST_CASE("custom AnyTag data is destroyed through Dict lifecycle") {
int count = 0;
{
Dict d;
Bar b(42, &count);
d.setVal<Bar>("mybar", b);
REQUIRE(d.getVal<Bar>("mybar").x == 42);
}
REQUIRE(count == 3);
count = 0;
{
Dict d;
Bar b(7, &count);
d.setVal<Bar>("mybar", b);
Dict d2(d);
REQUIRE(d2.getVal<Bar>("mybar").x == 7);
}
REQUIRE(count == 4);
}
TEST_CASE("testGithub2910") {
Dict d;
d.setVal("foo", 1.0);