Support serializing/deserializing FP generators to JSON (#9000)

This commit is contained in:
Greg Landrum
2025-12-24 19:09:05 +01:00
committed by GitHub
parent 47f2b7c1b5
commit b4164ea2f2
14 changed files with 628 additions and 38 deletions

View File

@@ -1,5 +1,5 @@
//
// Copyright (C) 2018 Boran Adas, Google Summer of Code
// Copyright (C) 2018-2025 Boran Adas and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
@@ -14,6 +14,11 @@
#include <GraphMol/Fingerprints/FingerprintUtil.h>
#include <RDGeneral/hash/hash.hpp>
#include <RDGeneral/BoostStartInclude.h>
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/json_parser.hpp>
#include <RDGeneral/BoostEndInclude.h>
namespace RDKit {
namespace AtomPair {
using namespace AtomPairs;
@@ -42,6 +47,19 @@ std::string AtomPairAtomInvGenerator::infoString() const {
std::to_string(df_topologicalTorsionCorrection);
}
void AtomPairAtomInvGenerator::toJSON(boost::property_tree::ptree &pt) const {
pt.put("type", "AtomPairAtomInvGenerator");
pt.put("includeChirality", df_includeChirality);
pt.put("topologicalTorsionCorrection", df_topologicalTorsionCorrection);
AtomInvariantsGenerator::toJSON(pt);
}
void AtomPairAtomInvGenerator::fromJSON(const boost::property_tree::ptree &pt) {
df_includeChirality = pt.get<bool>("includeChirality", df_includeChirality);
df_topologicalTorsionCorrection = pt.get<bool>(
"topologicalTorsionCorrection", df_topologicalTorsionCorrection);
AtomInvariantsGenerator::fromJSON(pt);
}
AtomPairAtomInvGenerator *AtomPairAtomInvGenerator::clone() const {
return new AtomPairAtomInvGenerator(df_includeChirality,
df_topologicalTorsionCorrection);
@@ -73,6 +91,19 @@ std::string AtomPairArguments::infoString() const {
" minDistance=" + std::to_string(d_minDistance) +
" maxDistance=" + std::to_string(d_maxDistance);
}
void AtomPairArguments::toJSON(boost::property_tree::ptree &pt) const {
pt.put("type", "AtomPairArguments");
pt.put("use2D", df_use2D);
pt.put("minDistance", d_minDistance);
pt.put("maxDistance", d_maxDistance);
FingerprintArguments::toJSON(pt);
}
void AtomPairArguments::fromJSON(const boost::property_tree::ptree &pt) {
df_use2D = pt.get<bool>("use2D", df_use2D);
d_minDistance = pt.get<unsigned int>("minDistance", d_minDistance);
d_maxDistance = pt.get<unsigned int>("maxDistance", d_maxDistance);
FingerprintArguments::fromJSON(pt);
}
template <typename OutputType>
void AtomPairAtomEnv<OutputType>::updateAdditionalOutput(
@@ -210,6 +241,13 @@ std::string AtomPairEnvGenerator<OutputType>::infoString() const {
return "AtomPairEnvironmentGenerator";
}
template <typename OutputType>
void AtomPairEnvGenerator<OutputType>::toJSON(
boost::property_tree::ptree &pt) const {
pt.put("type", "AtomPairEnvGenerator");
AtomEnvironmentGenerator<OutputType>::toJSON(pt);
}
template <typename OutputType>
FingerprintGenerator<OutputType> *getAtomPairGenerator(
const AtomPairArguments &args,

View File

@@ -21,8 +21,8 @@ using namespace AtomPairs;
class RDKIT_FINGERPRINTS_EXPORT AtomPairAtomInvGenerator
: public AtomInvariantsGenerator {
const bool df_includeChirality;
const bool df_topologicalTorsionCorrection;
bool df_includeChirality;
bool df_topologicalTorsionCorrection;
public:
/**
@@ -40,6 +40,9 @@ class RDKIT_FINGERPRINTS_EXPORT AtomPairAtomInvGenerator
const ROMol &mol) const override;
std::string infoString() const override;
void toJSON(boost::property_tree::ptree &pt) const override;
void fromJSON(const boost::property_tree::ptree &pt) override;
AtomPairAtomInvGenerator *clone() const override;
};
@@ -55,25 +58,24 @@ class RDKIT_FINGERPRINTS_EXPORT AtomPairArguments
unsigned int d_maxDistance = maxPathLen - 1;
std::string infoString() const override;
void toJSON(boost::property_tree::ptree &pt) const override;
void fromJSON(const boost::property_tree::ptree &pt) override;
/*!
\brief construct a new AtomPairArguments object
\param countSimulation if set, use count simulation while generating the
fingerprint
\param includeChirality if set, chirality will be used in the atom
invariants, this is ignored if atomInvariantsGenerator is present for
the /c FingerprintGenerator that uses this
\param use2D if set, the 2D (topological) distance matrix will be
used
\param minDistance minimum distance between atoms to be considered in a
\param countSimulation if set, use count simulation while generating
the fingerprint \param includeChirality if set, chirality will be used
in the atom invariants, this is ignored if atomInvariantsGenerator is
present for the /c FingerprintGenerator that uses this \param use2D if
set, the 2D (topological) distance matrix will be used \param
minDistance minimum distance between atoms to be considered in a
pair, default is 1 bond
\param maxDistance maximum distance between atoms to be considered in a
pair, default is maxPathLen-1 bonds
\param countBounds boundaries for count simulation, corresponding bit
will be set if the count is higher than the number provided for that spot
\param fpSize size of the generated fingerprint, does not affect the sparse
versions
\param maxDistance maximum distance between atoms to be considered
in a pair, default is maxPathLen-1 bonds \param countBounds boundaries
for count simulation, corresponding bit will be set if the count is
higher than the number provided for that spot \param fpSize size of the
generated fingerprint, does not affect the sparse versions
*/
AtomPairArguments(const bool countSimulation = true,
@@ -137,6 +139,7 @@ class RDKIT_FINGERPRINTS_EXPORT AtomPairEnvGenerator
const bool hashResults = false) const override;
std::string infoString() const override;
void toJSON(boost::property_tree::ptree &pt) const override;
OutputType getResultSize() const override;
};

View File

@@ -1,5 +1,5 @@
//
// Copyright (C) 2018-2022 Boran Adas and other RDKit contributors
// Copyright (C) 2018-2025 Boran Adas and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
@@ -20,6 +20,11 @@
#include <GraphMol/Fingerprints/RDKitFPGenerator.h>
#include <GraphMol/Fingerprints/TopologicalTorsionGenerator.h>
#include <RDGeneral/BoostStartInclude.h>
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/json_parser.hpp>
#include <RDGeneral/BoostEndInclude.h>
#include <RDGeneral/RDThreads.h>
#ifdef RDK_BUILD_THREADSAFE_SSS
#include <thread>
@@ -50,6 +55,37 @@ std::string FingerprintArguments::commonArgumentsString() const {
" includeChirality=" + std::to_string(df_includeChirality);
}
void FingerprintArguments::toJSON(boost::property_tree::ptree &pt) const {
pt.put("countSimulation", df_countSimulation);
pt.put("fpSize", d_fpSize);
pt.put("numBitsPerFeature", d_numBitsPerFeature);
pt.put("includeChirality", df_includeChirality);
boost::property_tree::ptree countBoundsNode;
for (const auto &bound : d_countBounds) {
boost::property_tree::ptree boundNode;
boundNode.put("", bound);
countBoundsNode.push_back(std::make_pair("", boundNode));
}
pt.add_child("countBounds", countBoundsNode);
}
void FingerprintArguments::fromJSON(const boost::property_tree::ptree &pt) {
df_countSimulation = pt.get<bool>("countSimulation", df_countSimulation);
d_fpSize = pt.get<std::uint32_t>("fpSize", d_fpSize);
d_numBitsPerFeature =
pt.get<std::uint32_t>("numBitsPerFeature", d_numBitsPerFeature);
df_includeChirality = pt.get<bool>("includeChirality", df_includeChirality);
d_countBounds.clear();
auto countBoundsNode = pt.get_child_optional("countBounds");
if (countBoundsNode) {
for (const auto &boundNode : *countBoundsNode) {
d_countBounds.push_back(boundNode.second.get_value<std::uint32_t>());
}
}
}
template <typename OutputType>
FingerprintGenerator<OutputType>::FingerprintGenerator(
AtomEnvironmentGenerator<OutputType> *atomEnvironmentGenerator,
@@ -136,6 +172,154 @@ std::string FingerprintGenerator<OutputType>::infoString() const {
: "No bond invariants generator");
}
template RDKIT_FINGERPRINTS_EXPORT void FingerprintGenerator<
std::uint32_t>::toJSON(boost::property_tree::ptree &pt) const;
template RDKIT_FINGERPRINTS_EXPORT void FingerprintGenerator<
std::uint64_t>::toJSON(boost::property_tree::ptree &pt) const;
template <typename OutputType>
void FingerprintGenerator<OutputType>::toJSON(
boost::property_tree::ptree &pt) const {
pt.put("name", "FingerprintGenerator");
boost::property_tree::ptree argsNode;
dp_fingerprintArguments->toJSON(argsNode);
pt.add_child("fingerprintArguments", argsNode);
boost::property_tree::ptree envGenNode;
dp_atomEnvironmentGenerator->toJSON(envGenNode);
pt.add_child("atomEnvironmentGenerator", envGenNode);
if (dp_atomInvariantsGenerator) {
boost::property_tree::ptree atomInvGenNode;
dp_atomInvariantsGenerator->toJSON(atomInvGenNode);
pt.add_child("atomInvariantsGenerator", atomInvGenNode);
}
if (dp_bondInvariantsGenerator) {
boost::property_tree::ptree bondInvGenNode;
dp_bondInvariantsGenerator->toJSON(bondInvGenNode);
pt.add_child("bondInvariantsGenerator", bondInvGenNode);
}
}
template void FingerprintGenerator<std::uint32_t>::fromJSON(
const boost::property_tree::ptree &pt);
template void FingerprintGenerator<std::uint64_t>::fromJSON(
const boost::property_tree::ptree &pt);
template <typename OutputType>
void FingerprintGenerator<OutputType>::fromJSON(
const boost::property_tree::ptree &) {}
template RDKIT_FINGERPRINTS_EXPORT std::string generatorToJSON(
const FingerprintGenerator<std::uint32_t> &generator);
template RDKIT_FINGERPRINTS_EXPORT std::string generatorToJSON(
const FingerprintGenerator<std::uint64_t> &generator);
template <typename OutputType>
std::string generatorToJSON(const FingerprintGenerator<OutputType> &generator) {
boost::property_tree::ptree pt;
generator.toJSON(pt);
std::ostringstream buf;
boost::property_tree::write_json(buf, pt, false);
auto str = buf.str();
boost::algorithm::trim(str);
return str;
}
std::unique_ptr<FingerprintGenerator<std::uint64_t>> generatorFromJSON(
const std::string &json) {
std::istringstream ss;
ss.str(json);
boost::property_tree::ptree pt;
boost::property_tree::read_json(ss, pt);
std::unique_ptr<AtomEnvironmentGenerator<std::uint64_t>> envGen;
std::unique_ptr<FingerprintArguments> fpArgs;
std::unique_ptr<AtomInvariantsGenerator> atomInvGen;
std::unique_ptr<BondInvariantsGenerator> bondInvGen;
auto fpArgsNode = pt.get_child_optional("fingerprintArguments");
if (fpArgsNode) {
auto typ = fpArgsNode->get_optional<std::string>("type");
if (!typ) {
throw ValueErrorException(
"FingerprintArguments type not specified in JSON");
}
if (*typ == "MorganArguments") {
fpArgs.reset(new MorganFingerprint::MorganArguments());
} else if (*typ == "RDKitFPArguments") {
fpArgs.reset(new RDKitFP::RDKitFPArguments());
} else if (*typ == "AtomPairArguments") {
fpArgs.reset(new AtomPair::AtomPairArguments());
} else if (*typ == "TopologicalTorsionArguments") {
fpArgs.reset(new TopologicalTorsion::TopologicalTorsionArguments());
} else {
throw ValueErrorException("Unknown FingerprintArguments type: " + *typ);
}
fpArgs->fromJSON(*fpArgsNode);
}
auto envGenNode = pt.get_child_optional("atomEnvironmentGenerator");
if (envGenNode) {
auto typ = envGenNode->get_optional<std::string>("type");
if (!typ) {
throw ValueErrorException(
"AtomEnvironmentGenerator type not specified in JSON");
}
if (*typ == "MorganEnvGenerator") {
envGen.reset(new MorganFingerprint::MorganEnvGenerator<std::uint64_t>());
} else if (*typ == "RDKitFPEnvGenerator") {
envGen.reset(new RDKitFP::RDKitFPEnvGenerator<std::uint64_t>());
} else if (*typ == "AtomPairEnvGenerator") {
envGen.reset(new AtomPair::AtomPairEnvGenerator<std::uint64_t>());
} else if (*typ == "TopologicalTorsionEnvGenerator") {
envGen.reset(new TopologicalTorsion::TopologicalTorsionEnvGenerator<
std::uint64_t>());
} else {
throw ValueErrorException("Unknown AtomEnvGenerator type: " + *typ);
}
envGen->fromJSON(*envGenNode);
}
auto atomInvGenNode = pt.get_child_optional("atomInvariantsGenerator");
if (atomInvGenNode) {
auto typ = atomInvGenNode->get_optional<std::string>("type");
if (!typ) {
throw ValueErrorException(
"AtomInvariantsGenerator type not specified in JSON");
}
if (*typ == "MorganAtomInvGenerator") {
atomInvGen.reset(new MorganFingerprint::MorganAtomInvGenerator());
} else if (*typ == "MorganFeatureAtomInvGenerator") {
atomInvGen.reset(new MorganFingerprint::MorganFeatureAtomInvGenerator());
} else if (*typ == "RDKitFPAtomInvGenerator") {
atomInvGen.reset(new RDKitFP::RDKitFPAtomInvGenerator());
} else if (*typ == "AtomPairAtomInvGenerator") {
atomInvGen.reset(new AtomPair::AtomPairAtomInvGenerator());
} else {
throw ValueErrorException("Unknown AtomInvariantsGenerator type: " +
*typ);
}
atomInvGen->fromJSON(*atomInvGenNode);
}
auto bondInvGenNode = pt.get_child_optional("bondInvariantsGenerator");
if (bondInvGenNode) {
auto typ = bondInvGenNode->get_optional<std::string>("type");
if (!typ) {
throw ValueErrorException(
"BondInvariantsGenerator type not specified in JSON");
}
if (*typ == "MorganBondInvGenerator") {
bondInvGen.reset(new MorganFingerprint::MorganBondInvGenerator());
} else {
throw ValueErrorException("Unknown BondInvariantsGenerator type: " +
*typ);
}
bondInvGen->fromJSON(*bondInvGenNode);
}
return std::make_unique<FingerprintGenerator<std::uint64_t>>(
envGen.release(), fpArgs.release(),
atomInvGen ? atomInvGen.release() : nullptr,
bondInvGen ? bondInvGen.release() : nullptr);
}
template <typename OutputType>
std::unique_ptr<SparseIntVect<OutputType>>
FingerprintGenerator<OutputType>::getFingerprintHelper(
@@ -175,8 +359,8 @@ FingerprintGenerator<OutputType>::getFingerprintHelper(
bondInvariants.reset(dp_bondInvariantsGenerator->getBondInvariants(mol));
}
// create all atom environments that will generate the bit-ids that will make
// up the fingerprint
// create all atom environments that will generate the bit-ids that will
// make up the fingerprint
auto atomEnvironments = dp_atomEnvironmentGenerator->getEnvironments(
*lmol, dp_fingerprintArguments, args.fromAtoms, args.ignoreAtoms,
args.confId, args.additionalOutput, atomInvariants.get(),
@@ -213,8 +397,8 @@ FingerprintGenerator<OutputType>::getFingerprintHelper(
randomSource.reset(new source_type(*generator, *dist));
}
// iterate over every atom environment and generate bit-ids that will make up
// the fingerprint
// iterate over every atom environment and generate bit-ids that will make
// up the fingerprint
for (const auto env : atomEnvironments) {
OutputType seed = env->getBitId(dp_fingerprintArguments,
atomInvariants.get(), bondInvariants.get(),
@@ -326,7 +510,8 @@ FingerprintGenerator<OutputType>::getSparseCountFingerprint(
// todo getSparseFingerprint does not completely produce the same output as
// getSparseCountFingerprint. Count simulation and potential 64 bit outputs
// makes size limiting necessary for getSparseFingerprint. This can be
// changed if there is another way to avoid the size limitation of SparseBitVect
// changed if there is another way to avoid the size limitation of
// SparseBitVect
template <typename OutputType>
std::unique_ptr<SparseBitVect>
FingerprintGenerator<OutputType>::getSparseFingerprint(
@@ -359,9 +544,9 @@ FingerprintGenerator<OutputType>::getSparseFingerprint(
if (dp_fingerprintArguments->df_countSimulation) {
for (unsigned int i = 0;
i < dp_fingerprintArguments->d_countBounds.size(); ++i) {
// for every bound in the d_countBounds in dp_fingerprintArguments, set
// a bit if the occurrence count is equal or higher than the bound for
// that bit
// for every bound in the d_countBounds in dp_fingerprintArguments,
// set a bit if the occurrence count is equal or higher than the bound
// for that bit
const auto &bounds_count = dp_fingerprintArguments->d_countBounds;
if (val.second >= static_cast<int>(bounds_count[i])) {
OutputType nBitId = val.first * bounds_count.size() + i;

View File

@@ -19,6 +19,7 @@
#include <vector>
#include <memory>
#include <cstdint>
#include <boost/property_tree/ptree_fwd.hpp>
namespace RDKit {
class ROMol;
@@ -106,6 +107,8 @@ class RDKIT_FINGERPRINTS_EXPORT FingerprintArguments {
\return std::string information string
*/
virtual std::string infoString() const = 0;
virtual void toJSON(boost::property_tree::ptree &pt) const;
virtual void fromJSON(const boost::property_tree::ptree &pt);
/**
\brief method that returns information string about common fingerprinting
@@ -201,6 +204,9 @@ class RDKIT_FINGERPRINTS_EXPORT AtomEnvironmentGenerator
\return std::string information string
*/
virtual std::string infoString() const = 0;
virtual void toJSON(boost::property_tree::ptree &) const {};
virtual void fromJSON(const boost::property_tree::ptree &) {};
/*!
\brief Returns the size of the fingerprint based on arguments
@@ -238,6 +244,8 @@ class RDKIT_FINGERPRINTS_EXPORT AtomInvariantsGenerator
\return std::string information string
*/
virtual std::string infoString() const = 0;
virtual void toJSON(boost::property_tree::ptree &) const {};
virtual void fromJSON(const boost::property_tree::ptree &) {};
virtual ~AtomInvariantsGenerator() {}
virtual AtomInvariantsGenerator *clone() const = 0;
@@ -268,6 +276,8 @@ class RDKIT_FINGERPRINTS_EXPORT BondInvariantsGenerator
\return std::string information string
*/
virtual std::string infoString() const = 0;
virtual void toJSON(boost::property_tree::ptree &) const {};
virtual void fromJSON(const boost::property_tree::ptree &) {};
virtual ~BondInvariantsGenerator() {}
virtual BondInvariantsGenerator *clone() const = 0;
@@ -410,7 +420,18 @@ class RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator
};
std::string infoString() const;
void toJSON(boost::property_tree::ptree &pt) const;
void fromJSON(const boost::property_tree::ptree &pt);
};
//! generate JSON representation of a FingerprintGenerator
template <typename OutputType>
RDKIT_FINGERPRINTS_EXPORT std::string generatorToJSON(
const FingerprintGenerator<OutputType> &generator);
//! create a FingerprintGenerator from its JSON representation
/// note that the returned generator always uses std::uint64_t as OutputType.
RDKIT_FINGERPRINTS_EXPORT std::unique_ptr<FingerprintGenerator<std::uint64_t>>
generatorFromJSON(const std::string &jsonStr);
template RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect *
FingerprintGenerator<std::uint32_t>::getFingerprint(

View File

@@ -14,6 +14,7 @@
#include <GraphMol/Fingerprints/MorganGenerator.h>
#include <RDGeneral/hash/hash.hpp>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/SmilesParse/SmartsWrite.h>
#include <GraphMol/Substruct/SubstructMatch.h>
#include <RDGeneral/BoostStartInclude.h>
@@ -25,6 +26,11 @@
#include <GraphMol/Chirality.h>
#include <GraphMol/CIPLabeler/CIPLabeler.h>
#include <RDGeneral/BoostStartInclude.h>
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/json_parser.hpp>
#include <RDGeneral/BoostEndInclude.h>
namespace RDKit {
namespace MorganFingerprint {
@@ -46,6 +52,16 @@ std::string MorganAtomInvGenerator::infoString() const {
return "MorganInvariantGenerator includeRingMembership=" +
std::to_string(df_includeRingMembership);
}
void MorganAtomInvGenerator::toJSON(boost::property_tree::ptree &pt) const {
pt.put("type", "MorganAtomInvGenerator");
pt.put("includeRingMembership", df_includeRingMembership);
AtomInvariantsGenerator::toJSON(pt);
}
void MorganAtomInvGenerator::fromJSON(const boost::property_tree::ptree &pt) {
df_includeRingMembership =
pt.get<bool>("includeRingMembership", df_includeRingMembership);
AtomInvariantsGenerator::fromJSON(pt);
}
MorganAtomInvGenerator *MorganAtomInvGenerator::clone() const {
return new MorganAtomInvGenerator(df_includeRingMembership);
@@ -59,6 +75,36 @@ MorganFeatureAtomInvGenerator::MorganFeatureAtomInvGenerator(
std::string MorganFeatureAtomInvGenerator::infoString() const {
return "MorganFeatureInvariantGenerator";
}
void MorganFeatureAtomInvGenerator::toJSON(
boost::property_tree::ptree &pt) const {
pt.put("type", "MorganFeatureAtomInvGenerator");
if (dp_patterns) {
boost::property_tree::ptree patternsNode;
for (const auto &pattern : *dp_patterns) {
boost::property_tree::ptree patternNode;
std::string smarts = MolToSmarts(*pattern);
patternNode.put("", smarts);
patternsNode.push_back(std::make_pair("", patternNode));
}
pt.add_child("patternSMARTS", patternsNode);
}
AtomInvariantsGenerator::toJSON(pt);
}
void MorganFeatureAtomInvGenerator::fromJSON(
const boost::property_tree::ptree &pt) {
if (pt.get_child_optional("patternSMARTS")) {
const auto &patternsNode = pt.get_child("patternSMARTS");
dp_patterns = new std::vector<const ROMol *>();
for (const auto &patternNode : patternsNode) {
std::string smarts = patternNode.second.get_value<std::string>();
ROMol *patternMol = SmartsToMol(smarts);
if (patternMol) {
dp_patterns->push_back(patternMol);
}
}
}
AtomInvariantsGenerator::fromJSON(pt);
}
MorganFeatureAtomInvGenerator *MorganFeatureAtomInvGenerator::clone() const {
return new MorganFeatureAtomInvGenerator(dp_patterns);
@@ -91,13 +137,14 @@ std::vector<std::uint32_t> *MorganBondInvGenerator::getBondInvariants(
} else {
auto bondStereo = static_cast<int32_t>(bond->getStereo());
if (!Chirality::getUseLegacyStereoPerception()) {
// if we aren't using legacy stereo, we need to compute the CIP codes
// if we aren't using legacy stereo, we need to compute the CIP
// codes
if (!mol.hasProp(common_properties::_CIPComputed)) {
CIPLabeler::assignCIPLabels(const_cast<ROMol &>(mol));
}
// for backwards compatibility, if we are E or Z, set those, otherwise
// just use whatever the bondStereo is set to.
// for backwards compatibility, if we are E or Z, set those,
// otherwise just use whatever the bondStereo is set to.
std::string cipCode;
if (bond->getPropIfPresent(common_properties::_CIPCode, cipCode)) {
if (cipCode == "E") {
@@ -125,6 +172,17 @@ std::string MorganBondInvGenerator::infoString() const {
std::to_string(df_useBondTypes) +
" useChirality=" + std::to_string(df_useChirality);
}
void MorganBondInvGenerator::toJSON(boost::property_tree::ptree &pt) const {
pt.put("type", "MorganBondInvGenerator");
pt.put("useBondTypes", df_useBondTypes);
pt.put("useChirality", df_useChirality);
BondInvariantsGenerator::toJSON(pt);
}
void MorganBondInvGenerator::fromJSON(const boost::property_tree::ptree &pt) {
df_useBondTypes = pt.get<bool>("useBondTypes", df_useBondTypes);
df_useChirality = pt.get<bool>("useChirality", df_useChirality);
BondInvariantsGenerator::fromJSON(pt);
}
MorganBondInvGenerator *MorganBondInvGenerator::clone() const {
return new MorganBondInvGenerator(df_useBondTypes, df_useChirality);
@@ -140,6 +198,18 @@ std::string MorganArguments::infoString() const {
std::to_string(df_onlyNonzeroInvariants) +
" radius=" + std::to_string(d_radius);
}
void MorganArguments::toJSON(boost::property_tree::ptree &pt) const {
pt.put("type", "MorganArguments");
pt.put("onlyNonzeroInvariants", df_onlyNonzeroInvariants);
pt.put("radius", d_radius);
FingerprintArguments::toJSON(pt);
}
void MorganArguments::fromJSON(const boost::property_tree::ptree &pt) {
d_radius = pt.get<std::uint32_t>("radius", d_radius);
df_onlyNonzeroInvariants =
pt.get<bool>("onlyNonzeroInvariants", df_onlyNonzeroInvariants);
FingerprintArguments::fromJSON(pt);
}
template <typename OutputType>
void MorganAtomEnv<OutputType>::updateAdditionalOutput(
@@ -415,6 +485,18 @@ template <typename OutputType>
std::string MorganEnvGenerator<OutputType>::infoString() const {
return "MorganEnvironmentGenerator";
}
template <typename OutputType>
void MorganEnvGenerator<OutputType>::toJSON(
boost::property_tree::ptree &pt) const {
pt.put("type", "MorganEnvGenerator");
AtomEnvironmentGenerator<OutputType>::toJSON(pt);
}
template <typename OutputType>
void MorganEnvGenerator<OutputType>::fromJSON(
const boost::property_tree::ptree &pt) {
AtomEnvironmentGenerator<OutputType>::fromJSON(pt);
}
template <typename OutputType>
FingerprintGenerator<OutputType> *getMorganGenerator(
@@ -462,6 +544,9 @@ FingerprintGenerator<OutputType> *getMorganGenerator(
}
template RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator<std::uint32_t> *
getMorganGenerator(const MorganArguments &, AtomInvariantsGenerator *,
BondInvariantsGenerator *, bool, bool);
template RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator<std::uint64_t> *
getMorganGenerator(const MorganArguments &, AtomInvariantsGenerator *,
BondInvariantsGenerator *, bool, bool);

View File

@@ -26,7 +26,7 @@ namespace MorganFingerprint {
*/
class RDKIT_FINGERPRINTS_EXPORT MorganAtomInvGenerator
: public AtomInvariantsGenerator {
const bool df_includeRingMembership;
bool df_includeRingMembership;
public:
/**
@@ -41,6 +41,8 @@ class RDKIT_FINGERPRINTS_EXPORT MorganAtomInvGenerator
const ROMol &mol) const override;
std::string infoString() const override;
void toJSON(boost::property_tree::ptree &pt) const override;
void fromJSON(const boost::property_tree::ptree &) override;
MorganAtomInvGenerator *clone() const override;
};
@@ -68,6 +70,8 @@ class RDKIT_FINGERPRINTS_EXPORT MorganFeatureAtomInvGenerator
const ROMol &mol) const override;
std::string infoString() const override;
void toJSON(boost::property_tree::ptree &pt) const override;
void fromJSON(const boost::property_tree::ptree &) override;
MorganFeatureAtomInvGenerator *clone() const override;
};
@@ -77,8 +81,8 @@ class RDKIT_FINGERPRINTS_EXPORT MorganFeatureAtomInvGenerator
*/
class RDKIT_FINGERPRINTS_EXPORT MorganBondInvGenerator
: public BondInvariantsGenerator {
const bool df_useBondTypes;
const bool df_useChirality;
bool df_useBondTypes;
bool df_useChirality;
public:
/**
@@ -96,6 +100,8 @@ class RDKIT_FINGERPRINTS_EXPORT MorganBondInvGenerator
const ROMol &mol) const override;
std::string infoString() const override;
void toJSON(boost::property_tree::ptree &pt) const override;
void fromJSON(const boost::property_tree::ptree &pt) override;
MorganBondInvGenerator *clone() const override;
~MorganBondInvGenerator() override = default;
};
@@ -112,6 +118,8 @@ class RDKIT_FINGERPRINTS_EXPORT MorganArguments : public FingerprintArguments {
bool df_useBondTypes = true;
std::string infoString() const override;
void toJSON(boost::property_tree::ptree &pt) const override;
void fromJSON(const boost::property_tree::ptree &pt) override;
/**
\brief Construct a new MorganArguments object
@@ -201,6 +209,9 @@ class RDKIT_FINGERPRINTS_EXPORT MorganEnvGenerator
const bool hashResults = false) const override;
std::string infoString() const override;
void toJSON(boost::property_tree::ptree &pt) const override;
void fromJSON(const boost::property_tree::ptree &pt) override;
OutputType getResultSize() const override;
};

View File

@@ -1,5 +1,5 @@
//
// Copyright (C) 2018-2022 Boran Adas and other RDKit contributors
// Copyright (C) 2018-2025 Boran Adas and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
@@ -33,6 +33,11 @@
#include <GraphMol/Fingerprints/FingerprintUtil.h>
#include <RDGeneral/BoostStartInclude.h>
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/json_parser.hpp>
#include <RDGeneral/BoostEndInclude.h>
namespace RDKit {
namespace RDKitFP {
@@ -53,6 +58,13 @@ std::string RDKitFPAtomInvGenerator::infoString() const {
return "RDKitFPAtomInvGenerator";
}
void RDKitFPAtomInvGenerator::toJSON(boost::property_tree::ptree &pt) const {
pt.put("type", "RDKitFPAtomInvGenerator");
}
void RDKitFPAtomInvGenerator::fromJSON(const boost::property_tree::ptree &) {
// no parameters to set
}
RDKitFPAtomInvGenerator *RDKitFPAtomInvGenerator::clone() const {
return new RDKitFPAtomInvGenerator();
}
@@ -70,6 +82,24 @@ std::string RDKitFPArguments::infoString() const {
" useBondOrder=" + std::to_string(df_useBondOrder);
}
void RDKitFPArguments::toJSON(boost::property_tree::ptree &pt) const {
pt.put("type", "RDKitFPArguments");
pt.put("minPath", d_minPath);
pt.put("maxPath", d_maxPath);
pt.put("useHs", df_useHs);
pt.put("branchedPaths", df_branchedPaths);
pt.put("useBondOrder", df_useBondOrder);
FingerprintArguments::toJSON(pt);
}
void RDKitFPArguments::fromJSON(const boost::property_tree::ptree &pt) {
d_minPath = pt.get<unsigned int>("minPath", d_minPath);
d_maxPath = pt.get<unsigned int>("maxPath", d_maxPath);
df_useHs = pt.get<bool>("useHs", df_useHs);
df_branchedPaths = pt.get<bool>("branchedPaths", df_branchedPaths);
df_useBondOrder = pt.get<bool>("useBondOrder", df_useBondOrder);
FingerprintArguments::fromJSON(pt);
}
RDKitFPArguments::RDKitFPArguments(unsigned int minPath, unsigned int maxPath,
bool useHs, bool branchedPaths,
bool useBondOrder, bool countSimulation,
@@ -135,6 +165,18 @@ std::string RDKitFPEnvGenerator<OutputType>::infoString() const {
return "RDKitFPEnvGenerator";
}
template <typename OutputType>
void RDKitFPEnvGenerator<OutputType>::toJSON(
boost::property_tree::ptree &pt) const {
pt.put("type", "RDKitFPEnvGenerator");
AtomEnvironmentGenerator<OutputType>::toJSON(pt);
}
template <typename OutputType>
void RDKitFPEnvGenerator<OutputType>::fromJSON(
const boost::property_tree::ptree &pt) {
AtomEnvironmentGenerator<OutputType>::fromJSON(pt);
};
template <typename OutputType>
std::vector<AtomEnvironment<OutputType> *>
RDKitFPEnvGenerator<OutputType>::getEnvironments(

View File

@@ -26,6 +26,8 @@ class RDKIT_FINGERPRINTS_EXPORT RDKitFPArguments : public FingerprintArguments {
bool df_useBondOrder = true;
std::string infoString() const override;
void toJSON(boost::property_tree::ptree &pt) const override;
void fromJSON(const boost::property_tree::ptree &pt) override;
/**
\brief Construct a new RDKitFPArguments object
@@ -62,6 +64,9 @@ class RDKIT_FINGERPRINTS_EXPORT RDKitFPAtomInvGenerator
const ROMol &mol) const override;
std::string infoString() const override;
void toJSON(boost::property_tree::ptree &pt) const override;
void fromJSON(const boost::property_tree::ptree &pt) override;
RDKitFPAtomInvGenerator *clone() const override;
};
@@ -113,6 +118,9 @@ class RDKIT_FINGERPRINTS_EXPORT RDKitFPEnvGenerator
bool hashResults = false) const override;
std::string infoString() const override;
void toJSON(boost::property_tree::ptree &pt) const override;
void fromJSON(const boost::property_tree::ptree &pt) override;
OutputType getResultSize() const override;
}; // namespace RDKitFP

View File

@@ -1,5 +1,5 @@
//
// Copyright (C) 2018-2022 Boran Adas and other RDKit contributors
// Copyright (C) 2018-2025 Boran Adas and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
@@ -12,6 +12,11 @@
#include <GraphMol/Fingerprints/FingerprintUtil.h>
#include <GraphMol/Fingerprints/AtomPairGenerator.h>
#include <RDGeneral/BoostStartInclude.h>
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/json_parser.hpp>
#include <RDGeneral/BoostEndInclude.h>
namespace RDKit {
namespace TopologicalTorsion {
@@ -44,6 +49,20 @@ std::string TopologicalTorsionArguments::infoString() const {
std::to_string(d_torsionAtomCount) +
" onlyShortestPaths=" + std::to_string(df_onlyShortestPaths);
};
void TopologicalTorsionArguments::toJSON(
boost::property_tree::ptree &pt) const {
pt.put("type", "TopologicalTorsionArguments");
pt.put("torsionAtomCount", d_torsionAtomCount);
pt.put("onlyShortestPaths", df_onlyShortestPaths);
FingerprintArguments::toJSON(pt);
}
void TopologicalTorsionArguments::fromJSON(
const boost::property_tree::ptree &pt) {
d_torsionAtomCount = pt.get<uint32_t>("torsionAtomCount", d_torsionAtomCount);
df_onlyShortestPaths =
pt.get<bool>("onlyShortestPaths", df_onlyShortestPaths);
FingerprintArguments::fromJSON(pt);
}
template <typename OutputType>
void TopologicalTorsionAtomEnv<OutputType>::updateAdditionalOutput(
@@ -178,6 +197,17 @@ template <typename OutputType>
std::string TopologicalTorsionEnvGenerator<OutputType>::infoString() const {
return "TopologicalTorsionEnvGenerator";
};
template <typename OutputType>
void TopologicalTorsionEnvGenerator<OutputType>::toJSON(
boost::property_tree::ptree &pt) const {
pt.put("type", "TopologicalTorsionEnvGenerator");
AtomEnvironmentGenerator<OutputType>::toJSON(pt);
};
template <typename OutputType>
void TopologicalTorsionEnvGenerator<OutputType>::fromJSON(
const boost::property_tree::ptree &pt) {
AtomEnvironmentGenerator<OutputType>::fromJSON(pt);
};
template <typename OutputType>
FingerprintGenerator<OutputType> *getTopologicalTorsionGenerator(

View File

@@ -25,6 +25,8 @@ class RDKIT_FINGERPRINTS_EXPORT TopologicalTorsionArguments
bool df_onlyShortestPaths = false;
std::string infoString() const override;
void toJSON(boost::property_tree::ptree &pt) const override;
void fromJSON(const boost::property_tree::ptree &pt) override;
/**
\brief Construct a new Topological Torsion Arguments object
@@ -85,6 +87,9 @@ class RDKIT_FINGERPRINTS_EXPORT TopologicalTorsionEnvGenerator
const bool hashResults = false) const override;
std::string infoString() const override;
void toJSON(boost::property_tree::ptree &pt) const override;
void fromJSON(const boost::property_tree::ptree &pt) override;
OutputType getResultSize() const override;
};

View File

@@ -1,5 +1,5 @@
//
// Copyright (C) 2018-2022 Boran Adas and other RDKit contributors
// Copyright (C) 2018-2025 Boran Adas and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
@@ -35,6 +35,11 @@ namespace np = boost::python::numpy;
namespace RDKit {
namespace FingerprintWrapper {
FingerprintGenerator<std::uint64_t> *generatorFromJSONHelper(
const std::string &jsonStr) {
return generatorFromJSON(jsonStr).release();
}
void convertPyArguments(
python::object py_fromAtoms, python::object py_ignoreAtoms,
python::object py_atomInvs, python::object py_bondInvs,
@@ -659,7 +664,9 @@ void wrapGenerator(const std::string &nm) {
.def("GetOptions", getOptions<T>,
python::return_internal_reference<
1, python::with_custodian_and_ward_postcall<0, 1>>(),
python::args("self"), "return the fingerprint options object");
python::args("self"), "return the fingerprint options object")
.def("ToJSON", &generatorToJSON<T>, (python::arg("self")),
"Serialize a FingerprintGenerator to JSON");
}
void setCountBoundsHelper(FingerprintArguments &opts, python::object bounds) {
@@ -759,6 +766,11 @@ BOOST_PYTHON_MODULE(rdFingerprintGenerator) {
python::arg("fpType") = FPType::MorganFP),
"");
python::def("FingerprintGeneratorFromJSON", &generatorFromJSONHelper,
(python::arg("jsonString")),
"Deserialize a FingerprintGenerator from a JSON string",
python::return_value_policy<python::manage_new_object>());
AtomPairWrapper::exportAtompair();
MorganWrapper::exportMorgan();
RDKitFPWrapper::exportRDKit();

View File

@@ -1,5 +1,5 @@
//
// Copyright (C) 2018-2022 Boran Adas and other RDKit contributors
// Copyright (C) 2018-2025 Boran Adas and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.

View File

@@ -405,6 +405,15 @@ class TestCase(unittest.TestCase):
_ = fpg.GetFingerprint(m,additionalOutput=ao)
self.assertIsNone(ao.GetAtomsPerBit())
def testJSONSerialization(self):
m = Chem.MolFromSmiles('CCC')
g1 = rdFingerprintGenerator.GetAtomPairGenerator()
fp1 = g1.GetFingerprint(m)
jsonStr = g1.ToJSON()
g2 = rdFingerprintGenerator.FingerprintGeneratorFromJSON(jsonStr)
fp2 = g2.GetFingerprint(m)
self.assertEqual(fp1, fp2)
if __name__ == '__main__':

View File

@@ -1,5 +1,5 @@
//
// Copyright (C) 2019-2021 Greg Landrum
// Copyright (C) 2019-2025 Greg Landrum
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
@@ -802,4 +802,145 @@ TEST_CASE("github #7533: IndexError while computing fingerprint") {
MorganFingerprints::getFingerprint(*mol, 2));
REQUIRE(fp);
CHECK(fp->getLength() == std::numeric_limits<unsigned>::max());
}
TEST_CASE("toJSON") {
auto m1 = "C[C@H](F)Oc1ccc(CCNC(=O)c2ccccc2C(=O)NCCc2ccc(OC)cc2)cc1"_smiles;
REQUIRE(m1);
SECTION("morgan") {
unsigned radius = 2;
std::unique_ptr<FingerprintGenerator<std::uint64_t>> fpGenerator(
MorganFingerprint::getMorganGenerator<std::uint64_t>(radius));
REQUIRE(fpGenerator);
std::unique_ptr<ExplicitBitVect> fp1{fpGenerator->getFingerprint(*m1)};
REQUIRE(fp1);
auto jsonStr = generatorToJSON(*fpGenerator);
CHECK(!jsonStr.empty());
CHECK(jsonStr.find("\"type\":\"MorganArguments\"") != std::string::npos);
auto fpGenerator2 = generatorFromJSON(jsonStr);
REQUIRE(fpGenerator2);
std::unique_ptr<ExplicitBitVect> fp2{fpGenerator2->getFingerprint(*m1)};
REQUIRE(fp2);
auto jsonStr2 = generatorToJSON(*fpGenerator2);
CHECK(jsonStr == jsonStr2);
CHECK(*fp1 == *fp2);
}
SECTION("RDKit") {
unsigned int minPath = 1;
unsigned int maxPath = 3;
std::unique_ptr<FingerprintGenerator<std::uint64_t>> fpGenerator(
RDKitFP::getRDKitFPGenerator<std::uint64_t>(minPath, maxPath));
REQUIRE(fpGenerator);
std::unique_ptr<ExplicitBitVect> fp1{fpGenerator->getFingerprint(*m1)};
REQUIRE(fp1);
auto jsonStr = generatorToJSON(*fpGenerator);
CHECK(!jsonStr.empty());
CHECK(jsonStr.find("\"type\":\"RDKitFPArguments\"") != std::string::npos);
auto fpGenerator2 = generatorFromJSON(jsonStr);
REQUIRE(fpGenerator2);
std::unique_ptr<ExplicitBitVect> fp2{fpGenerator2->getFingerprint(*m1)};
REQUIRE(fp2);
CHECK(*fp1 == *fp2);
auto jsonStr2 = generatorToJSON(*fpGenerator2);
CHECK(jsonStr == jsonStr2);
}
SECTION("topological torsion") {
bool includeChirality = true;
std::uint32_t torsionAtomCount = 5;
std::unique_ptr<FingerprintGenerator<std::uint64_t>> fpGenerator(
TopologicalTorsion::getTopologicalTorsionGenerator<std::uint64_t>(
includeChirality, torsionAtomCount));
REQUIRE(fpGenerator);
std::unique_ptr<ExplicitBitVect> fp1{fpGenerator->getFingerprint(*m1)};
REQUIRE(fp1);
auto jsonStr = generatorToJSON(*fpGenerator);
CHECK(!jsonStr.empty());
CHECK(jsonStr.find("\"type\":\"TopologicalTorsionArguments\"") !=
std::string::npos);
auto fpGenerator2 = generatorFromJSON(jsonStr);
REQUIRE(fpGenerator2);
std::unique_ptr<ExplicitBitVect> fp2{fpGenerator2->getFingerprint(*m1)};
REQUIRE(fp2);
auto jsonStr2 = generatorToJSON(*fpGenerator2);
CHECK(jsonStr == jsonStr2);
}
SECTION("atom pair") {
unsigned int minDistance = 2;
unsigned int maxDistance = 6;
bool includeChirality = true;
std::unique_ptr<FingerprintGenerator<std::uint64_t>> fpGenerator(
AtomPair::getAtomPairGenerator<std::uint64_t>(minDistance, maxDistance,
includeChirality));
REQUIRE(fpGenerator);
std::unique_ptr<ExplicitBitVect> fp1{fpGenerator->getFingerprint(*m1)};
REQUIRE(fp1);
auto jsonStr = generatorToJSON(*fpGenerator);
CHECK(!jsonStr.empty());
CHECK(jsonStr.find("\"type\":\"AtomPairArguments\"") != std::string::npos);
auto fpGenerator2 = generatorFromJSON(jsonStr);
REQUIRE(fpGenerator2);
std::unique_ptr<ExplicitBitVect> fp2{fpGenerator2->getFingerprint(*m1)};
REQUIRE(fp2);
auto jsonStr2 = generatorToJSON(*fpGenerator2);
CHECK(jsonStr == jsonStr2);
}
SECTION("feature morgan") {
MorganFingerprint::MorganArguments args;
args.d_radius = 2;
MorganFingerprint::MorganFeatureAtomInvGenerator atomInvGen;
std::unique_ptr<FingerprintGenerator<std::uint64_t>> fpGenerator(
MorganFingerprint::getMorganGenerator<std::uint64_t>(args,
&atomInvGen));
REQUIRE(fpGenerator);
std::unique_ptr<ExplicitBitVect> fp1{fpGenerator->getFingerprint(*m1)};
REQUIRE(fp1);
auto jsonStr = generatorToJSON(*fpGenerator);
CHECK(!jsonStr.empty());
CHECK(jsonStr.find("\"type\":\"MorganFeatureAtomInvGenerator\"") !=
std::string::npos);
// NO patterns there when we use the defaults:
CHECK(jsonStr.find("\"patternSMARTS\"") == std::string::npos);
auto fpGenerator2 = generatorFromJSON(jsonStr);
REQUIRE(fpGenerator2);
std::unique_ptr<ExplicitBitVect> fp2{fpGenerator2->getFingerprint(*m1)};
REQUIRE(fp2);
auto jsonStr2 = generatorToJSON(*fpGenerator2);
CHECK(jsonStr == jsonStr2);
CHECK(*fp1 == *fp2);
}
SECTION("custom feature morgan") {
// dumb feature definitions
auto p1 = "OC"_smarts;
REQUIRE(p1);
auto p2 = "NC"_smarts;
REQUIRE(p2);
auto p3 = "FC"_smarts;
REQUIRE(p3);
std::vector<const ROMol *> patterns = {p1.get(), p2.get(), p3.get()};
MorganFingerprint::MorganArguments args;
args.d_radius = 2;
MorganFingerprint::MorganFeatureAtomInvGenerator atomInvGen(&patterns);
std::unique_ptr<FingerprintGenerator<std::uint64_t>> fpGenerator(
MorganFingerprint::getMorganGenerator<std::uint64_t>(args,
&atomInvGen));
REQUIRE(fpGenerator);
std::unique_ptr<ExplicitBitVect> fp1{fpGenerator->getFingerprint(*m1)};
REQUIRE(fp1);
auto jsonStr = generatorToJSON(*fpGenerator);
CHECK(!jsonStr.empty());
CHECK(jsonStr.find("\"type\":\"MorganFeatureAtomInvGenerator\"") !=
std::string::npos);
CHECK(jsonStr.find("\"patternSMARTS\"") != std::string::npos);
auto fpGenerator2 = generatorFromJSON(jsonStr);
REQUIRE(fpGenerator2);
std::unique_ptr<ExplicitBitVect> fp2{fpGenerator2->getFingerprint(*m1)};
REQUIRE(fp2);
auto jsonStr2 = generatorToJSON(*fpGenerator2);
CHECK(jsonStr == jsonStr2);
CHECK(*fp1 == *fp2);
}
}