diff --git a/Code/GraphMol/Fingerprints/AtomPairGenerator.cpp b/Code/GraphMol/Fingerprints/AtomPairGenerator.cpp index 069bf5af0..2951782cc 100644 --- a/Code/GraphMol/Fingerprints/AtomPairGenerator.cpp +++ b/Code/GraphMol/Fingerprints/AtomPairGenerator.cpp @@ -1,5 +1,5 @@ // -// Copyright (C) 2018 Boran Adas, Google Summer of Code +// Copyright (C) 2018-2025 Boran Adas and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. @@ -14,6 +14,11 @@ #include #include +#include +#include +#include +#include + namespace RDKit { namespace AtomPair { using namespace AtomPairs; @@ -42,6 +47,19 @@ std::string AtomPairAtomInvGenerator::infoString() const { std::to_string(df_topologicalTorsionCorrection); } +void AtomPairAtomInvGenerator::toJSON(boost::property_tree::ptree &pt) const { + pt.put("type", "AtomPairAtomInvGenerator"); + pt.put("includeChirality", df_includeChirality); + pt.put("topologicalTorsionCorrection", df_topologicalTorsionCorrection); + AtomInvariantsGenerator::toJSON(pt); +} +void AtomPairAtomInvGenerator::fromJSON(const boost::property_tree::ptree &pt) { + df_includeChirality = pt.get("includeChirality", df_includeChirality); + df_topologicalTorsionCorrection = pt.get( + "topologicalTorsionCorrection", df_topologicalTorsionCorrection); + AtomInvariantsGenerator::fromJSON(pt); +} + AtomPairAtomInvGenerator *AtomPairAtomInvGenerator::clone() const { return new AtomPairAtomInvGenerator(df_includeChirality, df_topologicalTorsionCorrection); @@ -73,6 +91,19 @@ std::string AtomPairArguments::infoString() const { " minDistance=" + std::to_string(d_minDistance) + " maxDistance=" + std::to_string(d_maxDistance); } +void AtomPairArguments::toJSON(boost::property_tree::ptree &pt) const { + pt.put("type", "AtomPairArguments"); + pt.put("use2D", df_use2D); + pt.put("minDistance", d_minDistance); + pt.put("maxDistance", d_maxDistance); + FingerprintArguments::toJSON(pt); +} +void AtomPairArguments::fromJSON(const boost::property_tree::ptree &pt) { + df_use2D = pt.get("use2D", df_use2D); + d_minDistance = pt.get("minDistance", d_minDistance); + d_maxDistance = pt.get("maxDistance", d_maxDistance); + FingerprintArguments::fromJSON(pt); +} template void AtomPairAtomEnv::updateAdditionalOutput( @@ -210,6 +241,13 @@ std::string AtomPairEnvGenerator::infoString() const { return "AtomPairEnvironmentGenerator"; } +template +void AtomPairEnvGenerator::toJSON( + boost::property_tree::ptree &pt) const { + pt.put("type", "AtomPairEnvGenerator"); + AtomEnvironmentGenerator::toJSON(pt); +} + template FingerprintGenerator *getAtomPairGenerator( const AtomPairArguments &args, diff --git a/Code/GraphMol/Fingerprints/AtomPairGenerator.h b/Code/GraphMol/Fingerprints/AtomPairGenerator.h index b44b68b88..98448954a 100644 --- a/Code/GraphMol/Fingerprints/AtomPairGenerator.h +++ b/Code/GraphMol/Fingerprints/AtomPairGenerator.h @@ -21,8 +21,8 @@ using namespace AtomPairs; class RDKIT_FINGERPRINTS_EXPORT AtomPairAtomInvGenerator : public AtomInvariantsGenerator { - const bool df_includeChirality; - const bool df_topologicalTorsionCorrection; + bool df_includeChirality; + bool df_topologicalTorsionCorrection; public: /** @@ -40,6 +40,9 @@ class RDKIT_FINGERPRINTS_EXPORT AtomPairAtomInvGenerator const ROMol &mol) const override; std::string infoString() const override; + void toJSON(boost::property_tree::ptree &pt) const override; + void fromJSON(const boost::property_tree::ptree &pt) override; + AtomPairAtomInvGenerator *clone() const override; }; @@ -55,25 +58,24 @@ class RDKIT_FINGERPRINTS_EXPORT AtomPairArguments unsigned int d_maxDistance = maxPathLen - 1; std::string infoString() const override; + void toJSON(boost::property_tree::ptree &pt) const override; + void fromJSON(const boost::property_tree::ptree &pt) override; /*! \brief construct a new AtomPairArguments object - \param countSimulation if set, use count simulation while generating the - fingerprint - \param includeChirality if set, chirality will be used in the atom - invariants, this is ignored if atomInvariantsGenerator is present for - the /c FingerprintGenerator that uses this - \param use2D if set, the 2D (topological) distance matrix will be - used - \param minDistance minimum distance between atoms to be considered in a + \param countSimulation if set, use count simulation while generating + the fingerprint \param includeChirality if set, chirality will be used + in the atom invariants, this is ignored if atomInvariantsGenerator is + present for the /c FingerprintGenerator that uses this \param use2D if + set, the 2D (topological) distance matrix will be used \param + minDistance minimum distance between atoms to be considered in a pair, default is 1 bond - \param maxDistance maximum distance between atoms to be considered in a - pair, default is maxPathLen-1 bonds - \param countBounds boundaries for count simulation, corresponding bit - will be set if the count is higher than the number provided for that spot - \param fpSize size of the generated fingerprint, does not affect the sparse - versions + \param maxDistance maximum distance between atoms to be considered + in a pair, default is maxPathLen-1 bonds \param countBounds boundaries + for count simulation, corresponding bit will be set if the count is + higher than the number provided for that spot \param fpSize size of the + generated fingerprint, does not affect the sparse versions */ AtomPairArguments(const bool countSimulation = true, @@ -137,6 +139,7 @@ class RDKIT_FINGERPRINTS_EXPORT AtomPairEnvGenerator const bool hashResults = false) const override; std::string infoString() const override; + void toJSON(boost::property_tree::ptree &pt) const override; OutputType getResultSize() const override; }; diff --git a/Code/GraphMol/Fingerprints/FingerprintGenerator.cpp b/Code/GraphMol/Fingerprints/FingerprintGenerator.cpp index b44af33fe..a9ab46e31 100644 --- a/Code/GraphMol/Fingerprints/FingerprintGenerator.cpp +++ b/Code/GraphMol/Fingerprints/FingerprintGenerator.cpp @@ -1,5 +1,5 @@ // -// Copyright (C) 2018-2022 Boran Adas and other RDKit contributors +// Copyright (C) 2018-2025 Boran Adas and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. @@ -20,6 +20,11 @@ #include #include +#include +#include +#include +#include + #include #ifdef RDK_BUILD_THREADSAFE_SSS #include @@ -50,6 +55,37 @@ std::string FingerprintArguments::commonArgumentsString() const { " includeChirality=" + std::to_string(df_includeChirality); } +void FingerprintArguments::toJSON(boost::property_tree::ptree &pt) const { + pt.put("countSimulation", df_countSimulation); + pt.put("fpSize", d_fpSize); + pt.put("numBitsPerFeature", d_numBitsPerFeature); + pt.put("includeChirality", df_includeChirality); + + boost::property_tree::ptree countBoundsNode; + for (const auto &bound : d_countBounds) { + boost::property_tree::ptree boundNode; + boundNode.put("", bound); + countBoundsNode.push_back(std::make_pair("", boundNode)); + } + pt.add_child("countBounds", countBoundsNode); +} + +void FingerprintArguments::fromJSON(const boost::property_tree::ptree &pt) { + df_countSimulation = pt.get("countSimulation", df_countSimulation); + d_fpSize = pt.get("fpSize", d_fpSize); + d_numBitsPerFeature = + pt.get("numBitsPerFeature", d_numBitsPerFeature); + df_includeChirality = pt.get("includeChirality", df_includeChirality); + + d_countBounds.clear(); + auto countBoundsNode = pt.get_child_optional("countBounds"); + if (countBoundsNode) { + for (const auto &boundNode : *countBoundsNode) { + d_countBounds.push_back(boundNode.second.get_value()); + } + } +} + template FingerprintGenerator::FingerprintGenerator( AtomEnvironmentGenerator *atomEnvironmentGenerator, @@ -136,6 +172,154 @@ std::string FingerprintGenerator::infoString() const { : "No bond invariants generator"); } +template RDKIT_FINGERPRINTS_EXPORT void FingerprintGenerator< + std::uint32_t>::toJSON(boost::property_tree::ptree &pt) const; + +template RDKIT_FINGERPRINTS_EXPORT void FingerprintGenerator< + std::uint64_t>::toJSON(boost::property_tree::ptree &pt) const; + +template +void FingerprintGenerator::toJSON( + boost::property_tree::ptree &pt) const { + pt.put("name", "FingerprintGenerator"); + boost::property_tree::ptree argsNode; + dp_fingerprintArguments->toJSON(argsNode); + pt.add_child("fingerprintArguments", argsNode); + boost::property_tree::ptree envGenNode; + dp_atomEnvironmentGenerator->toJSON(envGenNode); + pt.add_child("atomEnvironmentGenerator", envGenNode); + if (dp_atomInvariantsGenerator) { + boost::property_tree::ptree atomInvGenNode; + dp_atomInvariantsGenerator->toJSON(atomInvGenNode); + pt.add_child("atomInvariantsGenerator", atomInvGenNode); + } + if (dp_bondInvariantsGenerator) { + boost::property_tree::ptree bondInvGenNode; + dp_bondInvariantsGenerator->toJSON(bondInvGenNode); + pt.add_child("bondInvariantsGenerator", bondInvGenNode); + } +} + +template void FingerprintGenerator::fromJSON( + const boost::property_tree::ptree &pt); +template void FingerprintGenerator::fromJSON( + const boost::property_tree::ptree &pt); +template +void FingerprintGenerator::fromJSON( + const boost::property_tree::ptree &) {} + +template RDKIT_FINGERPRINTS_EXPORT std::string generatorToJSON( + const FingerprintGenerator &generator); +template RDKIT_FINGERPRINTS_EXPORT std::string generatorToJSON( + const FingerprintGenerator &generator); + +template +std::string generatorToJSON(const FingerprintGenerator &generator) { + boost::property_tree::ptree pt; + generator.toJSON(pt); + std::ostringstream buf; + boost::property_tree::write_json(buf, pt, false); + auto str = buf.str(); + boost::algorithm::trim(str); + return str; +} + +std::unique_ptr> generatorFromJSON( + const std::string &json) { + std::istringstream ss; + ss.str(json); + boost::property_tree::ptree pt; + boost::property_tree::read_json(ss, pt); + + std::unique_ptr> envGen; + std::unique_ptr fpArgs; + std::unique_ptr atomInvGen; + std::unique_ptr bondInvGen; + + auto fpArgsNode = pt.get_child_optional("fingerprintArguments"); + if (fpArgsNode) { + auto typ = fpArgsNode->get_optional("type"); + if (!typ) { + throw ValueErrorException( + "FingerprintArguments type not specified in JSON"); + } + if (*typ == "MorganArguments") { + fpArgs.reset(new MorganFingerprint::MorganArguments()); + } else if (*typ == "RDKitFPArguments") { + fpArgs.reset(new RDKitFP::RDKitFPArguments()); + } else if (*typ == "AtomPairArguments") { + fpArgs.reset(new AtomPair::AtomPairArguments()); + } else if (*typ == "TopologicalTorsionArguments") { + fpArgs.reset(new TopologicalTorsion::TopologicalTorsionArguments()); + } else { + throw ValueErrorException("Unknown FingerprintArguments type: " + *typ); + } + fpArgs->fromJSON(*fpArgsNode); + } + auto envGenNode = pt.get_child_optional("atomEnvironmentGenerator"); + if (envGenNode) { + auto typ = envGenNode->get_optional("type"); + if (!typ) { + throw ValueErrorException( + "AtomEnvironmentGenerator type not specified in JSON"); + } + if (*typ == "MorganEnvGenerator") { + envGen.reset(new MorganFingerprint::MorganEnvGenerator()); + } else if (*typ == "RDKitFPEnvGenerator") { + envGen.reset(new RDKitFP::RDKitFPEnvGenerator()); + } else if (*typ == "AtomPairEnvGenerator") { + envGen.reset(new AtomPair::AtomPairEnvGenerator()); + } else if (*typ == "TopologicalTorsionEnvGenerator") { + envGen.reset(new TopologicalTorsion::TopologicalTorsionEnvGenerator< + std::uint64_t>()); + } else { + throw ValueErrorException("Unknown AtomEnvGenerator type: " + *typ); + } + envGen->fromJSON(*envGenNode); + } + auto atomInvGenNode = pt.get_child_optional("atomInvariantsGenerator"); + if (atomInvGenNode) { + auto typ = atomInvGenNode->get_optional("type"); + if (!typ) { + throw ValueErrorException( + "AtomInvariantsGenerator type not specified in JSON"); + } + if (*typ == "MorganAtomInvGenerator") { + atomInvGen.reset(new MorganFingerprint::MorganAtomInvGenerator()); + } else if (*typ == "MorganFeatureAtomInvGenerator") { + atomInvGen.reset(new MorganFingerprint::MorganFeatureAtomInvGenerator()); + } else if (*typ == "RDKitFPAtomInvGenerator") { + atomInvGen.reset(new RDKitFP::RDKitFPAtomInvGenerator()); + } else if (*typ == "AtomPairAtomInvGenerator") { + atomInvGen.reset(new AtomPair::AtomPairAtomInvGenerator()); + } else { + throw ValueErrorException("Unknown AtomInvariantsGenerator type: " + + *typ); + } + atomInvGen->fromJSON(*atomInvGenNode); + } + auto bondInvGenNode = pt.get_child_optional("bondInvariantsGenerator"); + if (bondInvGenNode) { + auto typ = bondInvGenNode->get_optional("type"); + if (!typ) { + throw ValueErrorException( + "BondInvariantsGenerator type not specified in JSON"); + } + if (*typ == "MorganBondInvGenerator") { + bondInvGen.reset(new MorganFingerprint::MorganBondInvGenerator()); + } else { + throw ValueErrorException("Unknown BondInvariantsGenerator type: " + + *typ); + } + bondInvGen->fromJSON(*bondInvGenNode); + } + + return std::make_unique>( + envGen.release(), fpArgs.release(), + atomInvGen ? atomInvGen.release() : nullptr, + bondInvGen ? bondInvGen.release() : nullptr); +} + template std::unique_ptr> FingerprintGenerator::getFingerprintHelper( @@ -175,8 +359,8 @@ FingerprintGenerator::getFingerprintHelper( bondInvariants.reset(dp_bondInvariantsGenerator->getBondInvariants(mol)); } - // create all atom environments that will generate the bit-ids that will make - // up the fingerprint + // create all atom environments that will generate the bit-ids that will + // make up the fingerprint auto atomEnvironments = dp_atomEnvironmentGenerator->getEnvironments( *lmol, dp_fingerprintArguments, args.fromAtoms, args.ignoreAtoms, args.confId, args.additionalOutput, atomInvariants.get(), @@ -213,8 +397,8 @@ FingerprintGenerator::getFingerprintHelper( randomSource.reset(new source_type(*generator, *dist)); } - // iterate over every atom environment and generate bit-ids that will make up - // the fingerprint + // iterate over every atom environment and generate bit-ids that will make + // up the fingerprint for (const auto env : atomEnvironments) { OutputType seed = env->getBitId(dp_fingerprintArguments, atomInvariants.get(), bondInvariants.get(), @@ -326,7 +510,8 @@ FingerprintGenerator::getSparseCountFingerprint( // todo getSparseFingerprint does not completely produce the same output as // getSparseCountFingerprint. Count simulation and potential 64 bit outputs // makes size limiting necessary for getSparseFingerprint. This can be -// changed if there is another way to avoid the size limitation of SparseBitVect +// changed if there is another way to avoid the size limitation of +// SparseBitVect template std::unique_ptr FingerprintGenerator::getSparseFingerprint( @@ -359,9 +544,9 @@ FingerprintGenerator::getSparseFingerprint( if (dp_fingerprintArguments->df_countSimulation) { for (unsigned int i = 0; i < dp_fingerprintArguments->d_countBounds.size(); ++i) { - // for every bound in the d_countBounds in dp_fingerprintArguments, set - // a bit if the occurrence count is equal or higher than the bound for - // that bit + // for every bound in the d_countBounds in dp_fingerprintArguments, + // set a bit if the occurrence count is equal or higher than the bound + // for that bit const auto &bounds_count = dp_fingerprintArguments->d_countBounds; if (val.second >= static_cast(bounds_count[i])) { OutputType nBitId = val.first * bounds_count.size() + i; diff --git a/Code/GraphMol/Fingerprints/FingerprintGenerator.h b/Code/GraphMol/Fingerprints/FingerprintGenerator.h index d0f0ba322..2b72f6d5c 100644 --- a/Code/GraphMol/Fingerprints/FingerprintGenerator.h +++ b/Code/GraphMol/Fingerprints/FingerprintGenerator.h @@ -19,6 +19,7 @@ #include #include #include +#include namespace RDKit { class ROMol; @@ -106,6 +107,8 @@ class RDKIT_FINGERPRINTS_EXPORT FingerprintArguments { \return std::string information string */ virtual std::string infoString() const = 0; + virtual void toJSON(boost::property_tree::ptree &pt) const; + virtual void fromJSON(const boost::property_tree::ptree &pt); /** \brief method that returns information string about common fingerprinting @@ -201,6 +204,9 @@ class RDKIT_FINGERPRINTS_EXPORT AtomEnvironmentGenerator \return std::string information string */ virtual std::string infoString() const = 0; + virtual void toJSON(boost::property_tree::ptree &) const {}; + virtual void fromJSON(const boost::property_tree::ptree &) {}; + /*! \brief Returns the size of the fingerprint based on arguments @@ -238,6 +244,8 @@ class RDKIT_FINGERPRINTS_EXPORT AtomInvariantsGenerator \return std::string information string */ virtual std::string infoString() const = 0; + virtual void toJSON(boost::property_tree::ptree &) const {}; + virtual void fromJSON(const boost::property_tree::ptree &) {}; virtual ~AtomInvariantsGenerator() {} virtual AtomInvariantsGenerator *clone() const = 0; @@ -268,6 +276,8 @@ class RDKIT_FINGERPRINTS_EXPORT BondInvariantsGenerator \return std::string information string */ virtual std::string infoString() const = 0; + virtual void toJSON(boost::property_tree::ptree &) const {}; + virtual void fromJSON(const boost::property_tree::ptree &) {}; virtual ~BondInvariantsGenerator() {} virtual BondInvariantsGenerator *clone() const = 0; @@ -410,7 +420,18 @@ class RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator }; std::string infoString() const; + void toJSON(boost::property_tree::ptree &pt) const; + void fromJSON(const boost::property_tree::ptree &pt); }; +//! generate JSON representation of a FingerprintGenerator +template +RDKIT_FINGERPRINTS_EXPORT std::string generatorToJSON( + const FingerprintGenerator &generator); + +//! create a FingerprintGenerator from its JSON representation +/// note that the returned generator always uses std::uint64_t as OutputType. +RDKIT_FINGERPRINTS_EXPORT std::unique_ptr> +generatorFromJSON(const std::string &jsonStr); template RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * FingerprintGenerator::getFingerprint( diff --git a/Code/GraphMol/Fingerprints/MorganGenerator.cpp b/Code/GraphMol/Fingerprints/MorganGenerator.cpp index d9ec2a914..0a7b77b90 100644 --- a/Code/GraphMol/Fingerprints/MorganGenerator.cpp +++ b/Code/GraphMol/Fingerprints/MorganGenerator.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -25,6 +26,11 @@ #include #include +#include +#include +#include +#include + namespace RDKit { namespace MorganFingerprint { @@ -46,6 +52,16 @@ std::string MorganAtomInvGenerator::infoString() const { return "MorganInvariantGenerator includeRingMembership=" + std::to_string(df_includeRingMembership); } +void MorganAtomInvGenerator::toJSON(boost::property_tree::ptree &pt) const { + pt.put("type", "MorganAtomInvGenerator"); + pt.put("includeRingMembership", df_includeRingMembership); + AtomInvariantsGenerator::toJSON(pt); +} +void MorganAtomInvGenerator::fromJSON(const boost::property_tree::ptree &pt) { + df_includeRingMembership = + pt.get("includeRingMembership", df_includeRingMembership); + AtomInvariantsGenerator::fromJSON(pt); +} MorganAtomInvGenerator *MorganAtomInvGenerator::clone() const { return new MorganAtomInvGenerator(df_includeRingMembership); @@ -59,6 +75,36 @@ MorganFeatureAtomInvGenerator::MorganFeatureAtomInvGenerator( std::string MorganFeatureAtomInvGenerator::infoString() const { return "MorganFeatureInvariantGenerator"; } +void MorganFeatureAtomInvGenerator::toJSON( + boost::property_tree::ptree &pt) const { + pt.put("type", "MorganFeatureAtomInvGenerator"); + if (dp_patterns) { + boost::property_tree::ptree patternsNode; + for (const auto &pattern : *dp_patterns) { + boost::property_tree::ptree patternNode; + std::string smarts = MolToSmarts(*pattern); + patternNode.put("", smarts); + patternsNode.push_back(std::make_pair("", patternNode)); + } + pt.add_child("patternSMARTS", patternsNode); + } + AtomInvariantsGenerator::toJSON(pt); +} +void MorganFeatureAtomInvGenerator::fromJSON( + const boost::property_tree::ptree &pt) { + if (pt.get_child_optional("patternSMARTS")) { + const auto &patternsNode = pt.get_child("patternSMARTS"); + dp_patterns = new std::vector(); + for (const auto &patternNode : patternsNode) { + std::string smarts = patternNode.second.get_value(); + ROMol *patternMol = SmartsToMol(smarts); + if (patternMol) { + dp_patterns->push_back(patternMol); + } + } + } + AtomInvariantsGenerator::fromJSON(pt); +} MorganFeatureAtomInvGenerator *MorganFeatureAtomInvGenerator::clone() const { return new MorganFeatureAtomInvGenerator(dp_patterns); @@ -91,13 +137,14 @@ std::vector *MorganBondInvGenerator::getBondInvariants( } else { auto bondStereo = static_cast(bond->getStereo()); if (!Chirality::getUseLegacyStereoPerception()) { - // if we aren't using legacy stereo, we need to compute the CIP codes + // if we aren't using legacy stereo, we need to compute the CIP + // codes if (!mol.hasProp(common_properties::_CIPComputed)) { CIPLabeler::assignCIPLabels(const_cast(mol)); } - // for backwards compatibility, if we are E or Z, set those, otherwise - // just use whatever the bondStereo is set to. + // for backwards compatibility, if we are E or Z, set those, + // otherwise just use whatever the bondStereo is set to. std::string cipCode; if (bond->getPropIfPresent(common_properties::_CIPCode, cipCode)) { if (cipCode == "E") { @@ -125,6 +172,17 @@ std::string MorganBondInvGenerator::infoString() const { std::to_string(df_useBondTypes) + " useChirality=" + std::to_string(df_useChirality); } +void MorganBondInvGenerator::toJSON(boost::property_tree::ptree &pt) const { + pt.put("type", "MorganBondInvGenerator"); + pt.put("useBondTypes", df_useBondTypes); + pt.put("useChirality", df_useChirality); + BondInvariantsGenerator::toJSON(pt); +} +void MorganBondInvGenerator::fromJSON(const boost::property_tree::ptree &pt) { + df_useBondTypes = pt.get("useBondTypes", df_useBondTypes); + df_useChirality = pt.get("useChirality", df_useChirality); + BondInvariantsGenerator::fromJSON(pt); +} MorganBondInvGenerator *MorganBondInvGenerator::clone() const { return new MorganBondInvGenerator(df_useBondTypes, df_useChirality); @@ -140,6 +198,18 @@ std::string MorganArguments::infoString() const { std::to_string(df_onlyNonzeroInvariants) + " radius=" + std::to_string(d_radius); } +void MorganArguments::toJSON(boost::property_tree::ptree &pt) const { + pt.put("type", "MorganArguments"); + pt.put("onlyNonzeroInvariants", df_onlyNonzeroInvariants); + pt.put("radius", d_radius); + FingerprintArguments::toJSON(pt); +} +void MorganArguments::fromJSON(const boost::property_tree::ptree &pt) { + d_radius = pt.get("radius", d_radius); + df_onlyNonzeroInvariants = + pt.get("onlyNonzeroInvariants", df_onlyNonzeroInvariants); + FingerprintArguments::fromJSON(pt); +} template void MorganAtomEnv::updateAdditionalOutput( @@ -415,6 +485,18 @@ template std::string MorganEnvGenerator::infoString() const { return "MorganEnvironmentGenerator"; } +template +void MorganEnvGenerator::toJSON( + boost::property_tree::ptree &pt) const { + pt.put("type", "MorganEnvGenerator"); + AtomEnvironmentGenerator::toJSON(pt); +} + +template +void MorganEnvGenerator::fromJSON( + const boost::property_tree::ptree &pt) { + AtomEnvironmentGenerator::fromJSON(pt); +} template FingerprintGenerator *getMorganGenerator( @@ -462,6 +544,9 @@ FingerprintGenerator *getMorganGenerator( } template RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator * +getMorganGenerator(const MorganArguments &, AtomInvariantsGenerator *, + BondInvariantsGenerator *, bool, bool); +template RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator * getMorganGenerator(const MorganArguments &, AtomInvariantsGenerator *, BondInvariantsGenerator *, bool, bool); diff --git a/Code/GraphMol/Fingerprints/MorganGenerator.h b/Code/GraphMol/Fingerprints/MorganGenerator.h index 3056ad0fc..0b3d2260b 100644 --- a/Code/GraphMol/Fingerprints/MorganGenerator.h +++ b/Code/GraphMol/Fingerprints/MorganGenerator.h @@ -26,7 +26,7 @@ namespace MorganFingerprint { */ class RDKIT_FINGERPRINTS_EXPORT MorganAtomInvGenerator : public AtomInvariantsGenerator { - const bool df_includeRingMembership; + bool df_includeRingMembership; public: /** @@ -41,6 +41,8 @@ class RDKIT_FINGERPRINTS_EXPORT MorganAtomInvGenerator const ROMol &mol) const override; std::string infoString() const override; + void toJSON(boost::property_tree::ptree &pt) const override; + void fromJSON(const boost::property_tree::ptree &) override; MorganAtomInvGenerator *clone() const override; }; @@ -68,6 +70,8 @@ class RDKIT_FINGERPRINTS_EXPORT MorganFeatureAtomInvGenerator const ROMol &mol) const override; std::string infoString() const override; + void toJSON(boost::property_tree::ptree &pt) const override; + void fromJSON(const boost::property_tree::ptree &) override; MorganFeatureAtomInvGenerator *clone() const override; }; @@ -77,8 +81,8 @@ class RDKIT_FINGERPRINTS_EXPORT MorganFeatureAtomInvGenerator */ class RDKIT_FINGERPRINTS_EXPORT MorganBondInvGenerator : public BondInvariantsGenerator { - const bool df_useBondTypes; - const bool df_useChirality; + bool df_useBondTypes; + bool df_useChirality; public: /** @@ -96,6 +100,8 @@ class RDKIT_FINGERPRINTS_EXPORT MorganBondInvGenerator const ROMol &mol) const override; std::string infoString() const override; + void toJSON(boost::property_tree::ptree &pt) const override; + void fromJSON(const boost::property_tree::ptree &pt) override; MorganBondInvGenerator *clone() const override; ~MorganBondInvGenerator() override = default; }; @@ -112,6 +118,8 @@ class RDKIT_FINGERPRINTS_EXPORT MorganArguments : public FingerprintArguments { bool df_useBondTypes = true; std::string infoString() const override; + void toJSON(boost::property_tree::ptree &pt) const override; + void fromJSON(const boost::property_tree::ptree &pt) override; /** \brief Construct a new MorganArguments object @@ -201,6 +209,9 @@ class RDKIT_FINGERPRINTS_EXPORT MorganEnvGenerator const bool hashResults = false) const override; std::string infoString() const override; + void toJSON(boost::property_tree::ptree &pt) const override; + void fromJSON(const boost::property_tree::ptree &pt) override; + OutputType getResultSize() const override; }; diff --git a/Code/GraphMol/Fingerprints/RDKitFPGenerator.cpp b/Code/GraphMol/Fingerprints/RDKitFPGenerator.cpp index d3af8760b..0fd65d0a6 100644 --- a/Code/GraphMol/Fingerprints/RDKitFPGenerator.cpp +++ b/Code/GraphMol/Fingerprints/RDKitFPGenerator.cpp @@ -1,5 +1,5 @@ // -// Copyright (C) 2018-2022 Boran Adas and other RDKit contributors +// Copyright (C) 2018-2025 Boran Adas and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. @@ -33,6 +33,11 @@ #include +#include +#include +#include +#include + namespace RDKit { namespace RDKitFP { @@ -53,6 +58,13 @@ std::string RDKitFPAtomInvGenerator::infoString() const { return "RDKitFPAtomInvGenerator"; } +void RDKitFPAtomInvGenerator::toJSON(boost::property_tree::ptree &pt) const { + pt.put("type", "RDKitFPAtomInvGenerator"); +} +void RDKitFPAtomInvGenerator::fromJSON(const boost::property_tree::ptree &) { + // no parameters to set +} + RDKitFPAtomInvGenerator *RDKitFPAtomInvGenerator::clone() const { return new RDKitFPAtomInvGenerator(); } @@ -70,6 +82,24 @@ std::string RDKitFPArguments::infoString() const { " useBondOrder=" + std::to_string(df_useBondOrder); } +void RDKitFPArguments::toJSON(boost::property_tree::ptree &pt) const { + pt.put("type", "RDKitFPArguments"); + pt.put("minPath", d_minPath); + pt.put("maxPath", d_maxPath); + pt.put("useHs", df_useHs); + pt.put("branchedPaths", df_branchedPaths); + pt.put("useBondOrder", df_useBondOrder); + FingerprintArguments::toJSON(pt); +} +void RDKitFPArguments::fromJSON(const boost::property_tree::ptree &pt) { + d_minPath = pt.get("minPath", d_minPath); + d_maxPath = pt.get("maxPath", d_maxPath); + df_useHs = pt.get("useHs", df_useHs); + df_branchedPaths = pt.get("branchedPaths", df_branchedPaths); + df_useBondOrder = pt.get("useBondOrder", df_useBondOrder); + FingerprintArguments::fromJSON(pt); +} + RDKitFPArguments::RDKitFPArguments(unsigned int minPath, unsigned int maxPath, bool useHs, bool branchedPaths, bool useBondOrder, bool countSimulation, @@ -135,6 +165,18 @@ std::string RDKitFPEnvGenerator::infoString() const { return "RDKitFPEnvGenerator"; } +template +void RDKitFPEnvGenerator::toJSON( + boost::property_tree::ptree &pt) const { + pt.put("type", "RDKitFPEnvGenerator"); + AtomEnvironmentGenerator::toJSON(pt); +} +template +void RDKitFPEnvGenerator::fromJSON( + const boost::property_tree::ptree &pt) { + AtomEnvironmentGenerator::fromJSON(pt); +}; + template std::vector *> RDKitFPEnvGenerator::getEnvironments( diff --git a/Code/GraphMol/Fingerprints/RDKitFPGenerator.h b/Code/GraphMol/Fingerprints/RDKitFPGenerator.h index 9ad6787ce..d5590c89c 100644 --- a/Code/GraphMol/Fingerprints/RDKitFPGenerator.h +++ b/Code/GraphMol/Fingerprints/RDKitFPGenerator.h @@ -26,6 +26,8 @@ class RDKIT_FINGERPRINTS_EXPORT RDKitFPArguments : public FingerprintArguments { bool df_useBondOrder = true; std::string infoString() const override; + void toJSON(boost::property_tree::ptree &pt) const override; + void fromJSON(const boost::property_tree::ptree &pt) override; /** \brief Construct a new RDKitFPArguments object @@ -62,6 +64,9 @@ class RDKIT_FINGERPRINTS_EXPORT RDKitFPAtomInvGenerator const ROMol &mol) const override; std::string infoString() const override; + void toJSON(boost::property_tree::ptree &pt) const override; + void fromJSON(const boost::property_tree::ptree &pt) override; + RDKitFPAtomInvGenerator *clone() const override; }; @@ -113,6 +118,9 @@ class RDKIT_FINGERPRINTS_EXPORT RDKitFPEnvGenerator bool hashResults = false) const override; std::string infoString() const override; + void toJSON(boost::property_tree::ptree &pt) const override; + void fromJSON(const boost::property_tree::ptree &pt) override; + OutputType getResultSize() const override; }; // namespace RDKitFP diff --git a/Code/GraphMol/Fingerprints/TopologicalTorsionGenerator.cpp b/Code/GraphMol/Fingerprints/TopologicalTorsionGenerator.cpp index ef632d00b..dabacf491 100644 --- a/Code/GraphMol/Fingerprints/TopologicalTorsionGenerator.cpp +++ b/Code/GraphMol/Fingerprints/TopologicalTorsionGenerator.cpp @@ -1,5 +1,5 @@ // -// Copyright (C) 2018-2022 Boran Adas and other RDKit contributors +// Copyright (C) 2018-2025 Boran Adas and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. @@ -12,6 +12,11 @@ #include #include +#include +#include +#include +#include + namespace RDKit { namespace TopologicalTorsion { @@ -44,6 +49,20 @@ std::string TopologicalTorsionArguments::infoString() const { std::to_string(d_torsionAtomCount) + " onlyShortestPaths=" + std::to_string(df_onlyShortestPaths); }; +void TopologicalTorsionArguments::toJSON( + boost::property_tree::ptree &pt) const { + pt.put("type", "TopologicalTorsionArguments"); + pt.put("torsionAtomCount", d_torsionAtomCount); + pt.put("onlyShortestPaths", df_onlyShortestPaths); + FingerprintArguments::toJSON(pt); +} +void TopologicalTorsionArguments::fromJSON( + const boost::property_tree::ptree &pt) { + d_torsionAtomCount = pt.get("torsionAtomCount", d_torsionAtomCount); + df_onlyShortestPaths = + pt.get("onlyShortestPaths", df_onlyShortestPaths); + FingerprintArguments::fromJSON(pt); +} template void TopologicalTorsionAtomEnv::updateAdditionalOutput( @@ -178,6 +197,17 @@ template std::string TopologicalTorsionEnvGenerator::infoString() const { return "TopologicalTorsionEnvGenerator"; }; +template +void TopologicalTorsionEnvGenerator::toJSON( + boost::property_tree::ptree &pt) const { + pt.put("type", "TopologicalTorsionEnvGenerator"); + AtomEnvironmentGenerator::toJSON(pt); +}; +template +void TopologicalTorsionEnvGenerator::fromJSON( + const boost::property_tree::ptree &pt) { + AtomEnvironmentGenerator::fromJSON(pt); +}; template FingerprintGenerator *getTopologicalTorsionGenerator( diff --git a/Code/GraphMol/Fingerprints/TopologicalTorsionGenerator.h b/Code/GraphMol/Fingerprints/TopologicalTorsionGenerator.h index 9696c1f49..e78731b6e 100644 --- a/Code/GraphMol/Fingerprints/TopologicalTorsionGenerator.h +++ b/Code/GraphMol/Fingerprints/TopologicalTorsionGenerator.h @@ -25,6 +25,8 @@ class RDKIT_FINGERPRINTS_EXPORT TopologicalTorsionArguments bool df_onlyShortestPaths = false; std::string infoString() const override; + void toJSON(boost::property_tree::ptree &pt) const override; + void fromJSON(const boost::property_tree::ptree &pt) override; /** \brief Construct a new Topological Torsion Arguments object @@ -85,6 +87,9 @@ class RDKIT_FINGERPRINTS_EXPORT TopologicalTorsionEnvGenerator const bool hashResults = false) const override; std::string infoString() const override; + void toJSON(boost::property_tree::ptree &pt) const override; + void fromJSON(const boost::property_tree::ptree &pt) override; + OutputType getResultSize() const override; }; diff --git a/Code/GraphMol/Fingerprints/Wrap/FingerprintGeneratorWrapper.cpp b/Code/GraphMol/Fingerprints/Wrap/FingerprintGeneratorWrapper.cpp index 9bbdb9f86..f9438f7e3 100644 --- a/Code/GraphMol/Fingerprints/Wrap/FingerprintGeneratorWrapper.cpp +++ b/Code/GraphMol/Fingerprints/Wrap/FingerprintGeneratorWrapper.cpp @@ -1,5 +1,5 @@ // -// Copyright (C) 2018-2022 Boran Adas and other RDKit contributors +// Copyright (C) 2018-2025 Boran Adas and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. @@ -35,6 +35,11 @@ namespace np = boost::python::numpy; namespace RDKit { namespace FingerprintWrapper { +FingerprintGenerator *generatorFromJSONHelper( + const std::string &jsonStr) { + return generatorFromJSON(jsonStr).release(); +} + void convertPyArguments( python::object py_fromAtoms, python::object py_ignoreAtoms, python::object py_atomInvs, python::object py_bondInvs, @@ -659,7 +664,9 @@ void wrapGenerator(const std::string &nm) { .def("GetOptions", getOptions, python::return_internal_reference< 1, python::with_custodian_and_ward_postcall<0, 1>>(), - python::args("self"), "return the fingerprint options object"); + python::args("self"), "return the fingerprint options object") + .def("ToJSON", &generatorToJSON, (python::arg("self")), + "Serialize a FingerprintGenerator to JSON"); } void setCountBoundsHelper(FingerprintArguments &opts, python::object bounds) { @@ -759,6 +766,11 @@ BOOST_PYTHON_MODULE(rdFingerprintGenerator) { python::arg("fpType") = FPType::MorganFP), ""); + python::def("FingerprintGeneratorFromJSON", &generatorFromJSONHelper, + (python::arg("jsonString")), + "Deserialize a FingerprintGenerator from a JSON string", + python::return_value_policy()); + AtomPairWrapper::exportAtompair(); MorganWrapper::exportMorgan(); RDKitFPWrapper::exportRDKit(); diff --git a/Code/GraphMol/Fingerprints/Wrap/TopologicalTorsionWrapper.cpp b/Code/GraphMol/Fingerprints/Wrap/TopologicalTorsionWrapper.cpp index b190aa40d..e3d529dbc 100644 --- a/Code/GraphMol/Fingerprints/Wrap/TopologicalTorsionWrapper.cpp +++ b/Code/GraphMol/Fingerprints/Wrap/TopologicalTorsionWrapper.cpp @@ -1,5 +1,5 @@ // -// Copyright (C) 2018-2022 Boran Adas and other RDKit contributors +// Copyright (C) 2018-2025 Boran Adas and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. diff --git a/Code/GraphMol/Fingerprints/Wrap/testGenerators.py b/Code/GraphMol/Fingerprints/Wrap/testGenerators.py index 32a40ce16..75ddcf7f2 100644 --- a/Code/GraphMol/Fingerprints/Wrap/testGenerators.py +++ b/Code/GraphMol/Fingerprints/Wrap/testGenerators.py @@ -405,6 +405,15 @@ class TestCase(unittest.TestCase): _ = fpg.GetFingerprint(m,additionalOutput=ao) self.assertIsNone(ao.GetAtomsPerBit()) + def testJSONSerialization(self): + m = Chem.MolFromSmiles('CCC') + g1 = rdFingerprintGenerator.GetAtomPairGenerator() + fp1 = g1.GetFingerprint(m) + jsonStr = g1.ToJSON() + g2 = rdFingerprintGenerator.FingerprintGeneratorFromJSON(jsonStr) + fp2 = g2.GetFingerprint(m) + self.assertEqual(fp1, fp2) + if __name__ == '__main__': diff --git a/Code/GraphMol/Fingerprints/catch_tests.cpp b/Code/GraphMol/Fingerprints/catch_tests.cpp index 33f80b85e..2c000d592 100644 --- a/Code/GraphMol/Fingerprints/catch_tests.cpp +++ b/Code/GraphMol/Fingerprints/catch_tests.cpp @@ -1,5 +1,5 @@ // -// Copyright (C) 2019-2021 Greg Landrum +// Copyright (C) 2019-2025 Greg Landrum // // @@ All Rights Reserved @@ // This file is part of the RDKit. @@ -802,4 +802,145 @@ TEST_CASE("github #7533: IndexError while computing fingerprint") { MorganFingerprints::getFingerprint(*mol, 2)); REQUIRE(fp); CHECK(fp->getLength() == std::numeric_limits::max()); +} + +TEST_CASE("toJSON") { + auto m1 = "C[C@H](F)Oc1ccc(CCNC(=O)c2ccccc2C(=O)NCCc2ccc(OC)cc2)cc1"_smiles; + REQUIRE(m1); + SECTION("morgan") { + unsigned radius = 2; + std::unique_ptr> fpGenerator( + MorganFingerprint::getMorganGenerator(radius)); + REQUIRE(fpGenerator); + std::unique_ptr fp1{fpGenerator->getFingerprint(*m1)}; + REQUIRE(fp1); + auto jsonStr = generatorToJSON(*fpGenerator); + CHECK(!jsonStr.empty()); + CHECK(jsonStr.find("\"type\":\"MorganArguments\"") != std::string::npos); + + auto fpGenerator2 = generatorFromJSON(jsonStr); + REQUIRE(fpGenerator2); + std::unique_ptr fp2{fpGenerator2->getFingerprint(*m1)}; + REQUIRE(fp2); + auto jsonStr2 = generatorToJSON(*fpGenerator2); + CHECK(jsonStr == jsonStr2); + CHECK(*fp1 == *fp2); + } + SECTION("RDKit") { + unsigned int minPath = 1; + unsigned int maxPath = 3; + std::unique_ptr> fpGenerator( + RDKitFP::getRDKitFPGenerator(minPath, maxPath)); + REQUIRE(fpGenerator); + std::unique_ptr fp1{fpGenerator->getFingerprint(*m1)}; + REQUIRE(fp1); + + auto jsonStr = generatorToJSON(*fpGenerator); + CHECK(!jsonStr.empty()); + CHECK(jsonStr.find("\"type\":\"RDKitFPArguments\"") != std::string::npos); + auto fpGenerator2 = generatorFromJSON(jsonStr); + REQUIRE(fpGenerator2); + std::unique_ptr fp2{fpGenerator2->getFingerprint(*m1)}; + REQUIRE(fp2); + CHECK(*fp1 == *fp2); + auto jsonStr2 = generatorToJSON(*fpGenerator2); + CHECK(jsonStr == jsonStr2); + } + SECTION("topological torsion") { + bool includeChirality = true; + std::uint32_t torsionAtomCount = 5; + std::unique_ptr> fpGenerator( + TopologicalTorsion::getTopologicalTorsionGenerator( + includeChirality, torsionAtomCount)); + REQUIRE(fpGenerator); + std::unique_ptr fp1{fpGenerator->getFingerprint(*m1)}; + REQUIRE(fp1); + auto jsonStr = generatorToJSON(*fpGenerator); + CHECK(!jsonStr.empty()); + CHECK(jsonStr.find("\"type\":\"TopologicalTorsionArguments\"") != + std::string::npos); + auto fpGenerator2 = generatorFromJSON(jsonStr); + REQUIRE(fpGenerator2); + std::unique_ptr fp2{fpGenerator2->getFingerprint(*m1)}; + REQUIRE(fp2); + auto jsonStr2 = generatorToJSON(*fpGenerator2); + CHECK(jsonStr == jsonStr2); + } + SECTION("atom pair") { + unsigned int minDistance = 2; + unsigned int maxDistance = 6; + bool includeChirality = true; + std::unique_ptr> fpGenerator( + AtomPair::getAtomPairGenerator(minDistance, maxDistance, + includeChirality)); + REQUIRE(fpGenerator); + std::unique_ptr fp1{fpGenerator->getFingerprint(*m1)}; + REQUIRE(fp1); + auto jsonStr = generatorToJSON(*fpGenerator); + CHECK(!jsonStr.empty()); + CHECK(jsonStr.find("\"type\":\"AtomPairArguments\"") != std::string::npos); + auto fpGenerator2 = generatorFromJSON(jsonStr); + REQUIRE(fpGenerator2); + std::unique_ptr fp2{fpGenerator2->getFingerprint(*m1)}; + REQUIRE(fp2); + auto jsonStr2 = generatorToJSON(*fpGenerator2); + CHECK(jsonStr == jsonStr2); + } + SECTION("feature morgan") { + MorganFingerprint::MorganArguments args; + args.d_radius = 2; + MorganFingerprint::MorganFeatureAtomInvGenerator atomInvGen; + std::unique_ptr> fpGenerator( + MorganFingerprint::getMorganGenerator(args, + &atomInvGen)); + REQUIRE(fpGenerator); + std::unique_ptr fp1{fpGenerator->getFingerprint(*m1)}; + REQUIRE(fp1); + auto jsonStr = generatorToJSON(*fpGenerator); + CHECK(!jsonStr.empty()); + CHECK(jsonStr.find("\"type\":\"MorganFeatureAtomInvGenerator\"") != + std::string::npos); + // NO patterns there when we use the defaults: + CHECK(jsonStr.find("\"patternSMARTS\"") == std::string::npos); + + auto fpGenerator2 = generatorFromJSON(jsonStr); + REQUIRE(fpGenerator2); + std::unique_ptr fp2{fpGenerator2->getFingerprint(*m1)}; + REQUIRE(fp2); + auto jsonStr2 = generatorToJSON(*fpGenerator2); + CHECK(jsonStr == jsonStr2); + CHECK(*fp1 == *fp2); + } + SECTION("custom feature morgan") { + // dumb feature definitions + auto p1 = "OC"_smarts; + REQUIRE(p1); + auto p2 = "NC"_smarts; + REQUIRE(p2); + auto p3 = "FC"_smarts; + REQUIRE(p3); + std::vector patterns = {p1.get(), p2.get(), p3.get()}; + MorganFingerprint::MorganArguments args; + args.d_radius = 2; + MorganFingerprint::MorganFeatureAtomInvGenerator atomInvGen(&patterns); + std::unique_ptr> fpGenerator( + MorganFingerprint::getMorganGenerator(args, + &atomInvGen)); + REQUIRE(fpGenerator); + std::unique_ptr fp1{fpGenerator->getFingerprint(*m1)}; + REQUIRE(fp1); + auto jsonStr = generatorToJSON(*fpGenerator); + CHECK(!jsonStr.empty()); + CHECK(jsonStr.find("\"type\":\"MorganFeatureAtomInvGenerator\"") != + std::string::npos); + CHECK(jsonStr.find("\"patternSMARTS\"") != std::string::npos); + + auto fpGenerator2 = generatorFromJSON(jsonStr); + REQUIRE(fpGenerator2); + std::unique_ptr fp2{fpGenerator2->getFingerprint(*m1)}; + REQUIRE(fp2); + auto jsonStr2 = generatorToJSON(*fpGenerator2); + CHECK(jsonStr == jsonStr2); + CHECK(*fp1 == *fp2); + } } \ No newline at end of file