Expose reading/writing PNG metadata to CFFI and MinimalLib (#8381)

* get SynthonSpace.cpp to build also when RDK_USE_BOOST_SERIALIZATION is
not defined

* test should not fail when RDK_USE_BOOST_SERIALIZATION is not defined

* - expose reading/writing PNG metadata to CFFI and MinimalLib
- add relevant CFFI and MinimalLib unit tests
- add RDK_USE_BOOST_PROGRAM_OPTIONS CMake option
- enable using standalone zlib in the absence of boost::iostreams for parsing PNG files
- enable linking against maeparser in the absence of boost::iostreams also on Windows
- enable building RDKit in the absence of boost::program_options

* add check for boost::program_options

* change size_t into std::uint64_t in SearchResults for consistency with doTheSearch() which uses std::uint64_t

* change size_t into std::uint64_t in SearchResults for consistency with
SynthonSpaceSearcher::doTheSearch()

* set CMake policy to allow YAeHMOP to require a version which is not
actually supported

* reverted External/YAeHMOP/CMakeLists.txt to master version

* check if Windows build will work

* fix build

* configure zlib install location

* build zlib dependency

* include zlib header directory

* explicitly set PropertyFlags.AllProps so the test does not fail on
static builds

---------

Co-authored-by: ptosco <paolo.tosco@novartis.com>
This commit is contained in:
Paolo Tosco
2025-07-23 17:10:38 +02:00
committed by GitHub
parent 3542b3ffbb
commit 9c7ffb33e9
19 changed files with 2133 additions and 87 deletions

View File

@@ -20,6 +20,7 @@
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <GraphMol/FileParsers/FileParsers.h>
#include <GraphMol/FileParsers/MolFileStereochem.h>
#include <GraphMol/FileParsers/PNGParser.h>
#include <RDGeneral/FileParseException.h>
#include <GraphMol/MolDraw2D/MolDraw2DSVG.h>
#include <GraphMol/Substruct/SubstructMatch.h>
@@ -51,6 +52,7 @@
#include <GraphMol/ChemReactions/Reaction.h>
#include <GraphMol/ChemReactions/ReactionParser.h>
#include <GraphMol/ChemReactions/SanitizeRxn.h>
#include <GraphMol/ChemTransforms/ChemTransforms.h>
#include <GraphMol/RGroupDecomposition/RGroupUtils.h>
#include <RDGeneral/RDLog.h>
#include "common_defs.h"
@@ -353,6 +355,7 @@ std::string parse_int_array(const rj::Document &doc, std::vector<int> &intVec,
if (!it->value.IsArray()) {
return "JSON contains '" + keyName + "' field, but it is not an array";
}
intVec.clear();
for (const auto &val : it->value.GetArray()) {
if (!val.IsInt()) {
return valueName + " should be integers";
@@ -363,6 +366,26 @@ std::string parse_int_array(const rj::Document &doc, std::vector<int> &intVec,
return "";
}
std::string parse_double_array(const rj::Document &doc,
std::vector<double> &doubleVec,
const std::string &keyName,
const std::string &valueName) {
const auto it = doc.FindMember(keyName.c_str());
if (it != doc.MemberEnd()) {
if (!it->value.IsArray()) {
return "JSON contains '" + keyName + "' field, but it is not an array";
}
doubleVec.clear();
for (const auto &val : it->value.GetArray()) {
if (!val.IsNumber()) {
return valueName + " should be floats";
}
doubleVec.push_back(val.GetDouble());
}
}
return "";
}
std::string parse_rgba_array(const rj::Value &val, DrawColour &color,
const std::string &keyName) {
if (!val.IsArray() || val.Size() < 3 || val.Size() > 4) {
@@ -1361,6 +1384,87 @@ class LogHandle {
std::stringstream d_stream;
};
std::vector<ROMOL_SPTR> get_mols_from_png_blob_internal(
const std::string &pngString, bool singleMol = false,
const char *details = nullptr) {
std::vector<ROMOL_SPTR> res;
if (pngString.empty()) {
return res;
}
PNGMetadataParams params;
params.includePkl = singleMol;
params.includeSmiles = singleMol;
params.includeMol = singleMol;
updatePNGMetadataParamsFromJSON(params, details);
std::string tagToUse;
unsigned int numTagsFound = 0;
if (params.includePkl) {
++numTagsFound;
tagToUse = PNGData::pklTag;
}
if (params.includeSmiles) {
++numTagsFound;
tagToUse = PNGData::smilesTag;
}
if (params.includeMol) {
++numTagsFound;
tagToUse = PNGData::molTag;
}
if (numTagsFound == 0 || (!singleMol && numTagsFound > 1)) {
return res;
}
auto metadata = PNGStringToMetadata(pngString);
for (const auto &[key, value] : metadata) {
if (!singleMol && key.rfind(tagToUse, 0) == std::string::npos) {
continue;
}
std::unique_ptr<RWMol> mol;
if (params.includePkl && key.rfind(PNGData::pklTag, 0) == 0) {
try {
mol.reset(new RWMol(value));
} catch (...) {
}
} else if ((params.includeSmiles &&
key.rfind(PNGData::smilesTag, 0) == 0) ||
(params.includeMol && key.rfind(PNGData::molTag, 0) == 0)) {
mol.reset(MinimalLib::mol_from_input(value, details));
}
if (mol) {
res.emplace_back(mol.release());
if (singleMol) {
break;
}
}
}
return res;
}
std::string combine_mols_internal(const ROMol &mol1, const ROMol &mol2,
std::unique_ptr<ROMol> &combinedMol,
const char *details_json = nullptr) {
std::vector<double> offset(3, 0.0);
combinedMol = nullptr;
if (details_json) {
rj::Document doc;
doc.Parse(details_json);
if (!doc.IsObject()) {
return "Invalid JSON";
}
std::string problems;
problems = parse_double_array(doc, offset, "offset", "offset coordinates");
if (!problems.empty()) {
return problems;
}
}
try {
combinedMol.reset(combineMols(
mol1, mol2, RDGeom::Point3D(offset[0], offset[1], offset[2])));
} catch (...) {
return "Failed to combine molecules";
}
return "";
}
} // namespace MinimalLib
} // namespace RDKit
#undef LPT_OPT_GET