GitHub 7865 haspropwithvaluequery leaks (#7872)

* Properly cleanup Dict::Pair when serializing HasPropWithQueryValue

* Make sure pickling doesn't change original molecule

* Fix bad cut and paste

* Add PairHolder utility class for memory management of non Dict Dict::Pairs, fix mem leak in pickler

* Edit comment to force a rebuild

* Ignore PairHolder from Java/Swig builds

* Ignore PairHolder API from swig

* Reponses to review

* Add backward incompatible change

* Make release note a bullet point
This commit is contained in:
Brian Kelley
2024-10-30 01:12:40 -04:00
committed by GitHub
parent 08bf105e5d
commit eacc365b27
8 changed files with 64 additions and 13 deletions

View File

@@ -387,8 +387,8 @@ QueryDetails getQueryDetails(const Query<int, T const *, true> *query) {
return QueryDetails(
std::make_tuple(MolPickler::QUERY_SET, std::move(tset)));
} else if (auto q = dynamic_cast<const HasPropWithValueQueryBase *>(query)) {
return std::make_tuple(MolPickler::QUERY_PROPERTY_WITH_VALUE, q->getPair(),
q->getTolerance());
return QueryDetails(std::make_tuple(MolPickler::QUERY_PROPERTY_WITH_VALUE,
q->getPair(), q->getTolerance()));
} else {
throw MolPicklerException("do not know how to pickle part of the query.");
}
@@ -465,10 +465,10 @@ void pickleQuery(std::ostream &ss, const Query<int, T const *, true> *query) {
streamWrite(ss, MolPickler::QUERY_VALUE, pval);
} break;
case 6: {
auto v = boost::get<std::tuple<MolPickler::Tags, Dict::Pair, double>>(
auto &v = boost::get<std::tuple<MolPickler::Tags, PairHolder, double>>(
qdetails);
streamWrite(ss, std::get<0>(v));
// The tolerance is pickled first as we can't pickle the Dict::Pair with
// The tolerance is pickled first as we can't pickle a PairHolder with
// the QUERY_VALUE tag
streamWrite(ss, MolPickler::QUERY_VALUE, std::get<2>(v));
streamWriteProp(ss, std::get<1>(v),
@@ -630,7 +630,7 @@ Query<int, T const *, true> *buildBaseQuery(std::istream &ss, T const *owner,
}
double tolerance{0.0};
streamRead(ss, tolerance, version);
Dict::Pair pair;
PairHolder pair;
bool hasNonPod = false;
streamReadProp(ss, pair, hasNonPod, MolPickler::getCustomPropHandlers());
switch (pair.val.getTag()) {
@@ -667,8 +667,6 @@ Query<int, T const *, true> *buildBaseQuery(std::istream &ss, T const *owner,
}
} break;
}
// hasNonPod should be false for now...
} break;
default:
throw MolPicklerException("unknown query-type tag encountered");

View File

@@ -314,7 +314,7 @@ using QueryDetails = boost::variant<
std::tuple<MolPickler::Tags, int32_t, int32_t, int32_t, char>,
std::tuple<MolPickler::Tags, std::set<int32_t>>,
std::tuple<MolPickler::Tags, std::string>,
std::tuple<MolPickler::Tags, Dict::Pair, double>>;
std::tuple<MolPickler::Tags, PairHolder, double>>;
// clang-format on
template <class T>
QueryDetails getQueryDetails(const Queries::Query<int, T const *, true> *query);

View File

@@ -866,7 +866,7 @@ class HasPropWithValueQueryBase {
public:
HasPropWithValueQueryBase() = default;
virtual ~HasPropWithValueQueryBase() = default;
virtual Dict::Pair getPair() const = 0;
virtual PairHolder getPair() const = 0;
virtual double getTolerance() const = 0;
};
@@ -886,7 +886,7 @@ class HasPropWithValueQuery
this->setDataFunc(0);
}
Dict::Pair getPair() const override { return Dict::Pair(propname, val); }
PairHolder getPair() const override { return PairHolder(Dict::Pair(propname, val)); }
double getTolerance() const override { return tolerance; }
@@ -968,7 +968,7 @@ class HasPropWithValueQuery<TargetPtr, std::string>
this->setDataFunc(nullptr);
}
Dict::Pair getPair() const override { return Dict::Pair(propname, val); }
PairHolder getPair() const override { return PairHolder(Dict::Pair(propname, val)); }
double getTolerance() const override { return 0.0; }
@@ -1040,7 +1040,7 @@ class HasPropWithValueQuery<TargetPtr, ExplicitBitVect>
this->setDataFunc(nullptr);
}
Dict::Pair getPair() const override { return Dict::Pair(propname, val); }
PairHolder getPair() const override { return PairHolder(Dict::Pair(propname, val)); }
double getTolerance() const override { return tol; }

View File

@@ -579,6 +579,8 @@ TEST_CASE("pickling HasPropWithValue queries") {
REQUIRE(pklmol.getAtomWithIdx(0)->hasQuery());
REQUIRE(pklmol.getAtomWithIdx(0)->hasQuery());
CHECK(SubstructMatch(*target, pklmol, ps).size() == 1);
// make sure we are idempotent in pickling
CHECK(SubstructMatch(*target, *mol, ps).size() == 1);
}
{
std::string pkl;
@@ -587,14 +589,18 @@ TEST_CASE("pickling HasPropWithValue queries") {
REQUIRE(pklmol.getAtomWithIdx(0)->hasQuery());
REQUIRE(pklmol.getAtomWithIdx(0)->hasQuery());
CHECK(SubstructMatch(*target, pklmol, ps).size() == 0);
// make sure we are idempotent in pickling
CHECK(SubstructMatch(*target, mol2, ps).size() == 0);
}
{
std::string pkl;
MolPickler::pickleMol(mol2, pkl);
MolPickler::pickleMol(mol3, pkl);
RWMol pklmol(pkl);
REQUIRE(pklmol.getAtomWithIdx(0)->hasQuery());
REQUIRE(pklmol.getAtomWithIdx(0)->hasQuery());
CHECK(SubstructMatch(*target, pklmol, ps).size() == 0);
// make sure we are idempotent in pickling
CHECK(SubstructMatch(*target, mol3, ps).size() == 0);
}
}
SECTION("basics string") {
@@ -636,6 +642,8 @@ TEST_CASE("pickling HasPropWithValue queries") {
REQUIRE(pklmol.getAtomWithIdx(0)->hasQuery());
REQUIRE(pklmol.getBondWithIdx(0)->hasQuery());
CHECK(SubstructMatch(*target, pklmol, ps).size() == 1);
// make sure we are idempotent in pickling
CHECK(SubstructMatch(*target, *mol, ps).size() == 1);
}
{
std::string pkl;
@@ -644,6 +652,8 @@ TEST_CASE("pickling HasPropWithValue queries") {
REQUIRE(pklmol.getAtomWithIdx(0)->hasQuery());
REQUIRE(pklmol.getBondWithIdx(0)->hasQuery());
CHECK(SubstructMatch(*target, pklmol, ps).size() == 0);
// make sure we are idempotent in pickling
CHECK(SubstructMatch(*target, mol2, ps).size() == 0);
}
{
std::string pkl;
@@ -652,6 +662,8 @@ TEST_CASE("pickling HasPropWithValue queries") {
REQUIRE(pklmol.getAtomWithIdx(0)->hasQuery());
REQUIRE(pklmol.getBondWithIdx(0)->hasQuery());
CHECK(SubstructMatch(*target, pklmol, ps).size() == 0);
// make sure we are idempotent in pickling
CHECK(SubstructMatch(*target, mol3, ps).size() == 0);
}
}
SECTION("basics EBV") {
@@ -698,6 +710,8 @@ TEST_CASE("pickling HasPropWithValue queries") {
REQUIRE(pklmol.getAtomWithIdx(0)->hasQuery());
REQUIRE(pklmol.getBondWithIdx(0)->hasQuery());
CHECK(SubstructMatch(*target, pklmol, ps).size() == 1);
// make sure we are idempotent in pickling
CHECK(SubstructMatch(*target, *mol, ps).size() == 1);
}
{
std::string pkl;
@@ -706,6 +720,8 @@ TEST_CASE("pickling HasPropWithValue queries") {
REQUIRE(pklmol.getAtomWithIdx(0)->hasQuery());
REQUIRE(pklmol.getBondWithIdx(0)->hasQuery());
CHECK(SubstructMatch(*target, pklmol, ps).size() == 0);
// make sure we are idempotent in pickling
CHECK(SubstructMatch(*target, mol2, ps).size() == 0);
}
{
std::string pkl;
@@ -714,6 +730,8 @@ TEST_CASE("pickling HasPropWithValue queries") {
REQUIRE(pklmol.getAtomWithIdx(0)->hasQuery());
REQUIRE(pklmol.getBondWithIdx(0)->hasQuery());
CHECK(SubstructMatch(*target, pklmol, ps).size() == 0);
// make sure we are idempotent in pickling
CHECK(SubstructMatch(*target, mol3, ps).size() == 0);
}
}
}

View File

@@ -36,6 +36,7 @@
%}
%ignore RDKit::Dict::Pair;
%ignore RDKit::PairHolder;
%include <RDGeneral/Dict.h>

View File

@@ -36,6 +36,9 @@
%}
%ignore RDKit::RecursiveStructureQuery::d_mutex;
%ignore RDKit::HasPropWithValueQuery::getPair;
%ignore RDKit::HasPropWithValueQueryBase::getPair;
%include <Query/QueryObjects.h>
%include <Query/EqualityQuery.h>
%include <GraphMol/QueryOps.h>

View File

@@ -42,6 +42,9 @@ class RDKIT_RDGENERAL_EXPORT Dict {
Pair() : key(), val() {}
explicit Pair(std::string s) : key(std::move(s)), val() {}
Pair(std::string s, const RDValue &v) : key(std::move(s)), val(v) {}
// In the case you are holding onto an rdvalue outside of a dictionary
// or other container, you kust call cleanup to release non POD memory.
void cleanup() { RDValue::cleanup_rdvalue(val); }
};
typedef std::vector<Pair> DataType;
@@ -349,5 +352,31 @@ inline std::string Dict::getVal<std::string>(const std::string &what) const {
return res;
}
// Utility class for holding a Dict::Pair
// Dict::Pairs require containers for memory management
// This utility class covers cleanup and copying
class PairHolder : public Dict::Pair {
public:
PairHolder() : Pair() {}
explicit PairHolder(const PairHolder &p) : Pair(p.key) {
copy_rdvalue(this->val, p.val);
}
explicit PairHolder(PairHolder&&p) : Pair(p.key) {
this->val = p.val;
p.val.type = RDTypeTag::EmptyTag;
}
explicit PairHolder(Dict::Pair&&p) : Pair(p.key) {
this->val = p.val;
p.val.type = RDTypeTag::EmptyTag;
}
~PairHolder() {
RDValue::cleanup_rdvalue(this->val);
}
};
} // namespace RDKit
#endif

View File

@@ -18,6 +18,8 @@ GraphMol/RGroupDecomposition/RGroupDecompJSONParsers.h, respectively.
with the underlying types. This may require existing C++ code using those
functions to be updated accordingly.
- HasPropWithValueQueryBase used RDKit::Dict::Pair to return data used for serializing object in a molecule pickle. This has been changed to RDKit::PairHolder which automatically manages memory.
## New Features and Enhancements:
## Bug Fixes: