From 9c7ffb33e916ec77b5248ae8597ec1ffba80d91f Mon Sep 17 00:00:00 2001 From: Paolo Tosco Date: Wed, 23 Jul 2025 17:10:38 +0200 Subject: [PATCH] Expose reading/writing PNG metadata to CFFI and MinimalLib (#8381) * get SynthonSpace.cpp to build also when RDK_USE_BOOST_SERIALIZATION is not defined * test should not fail when RDK_USE_BOOST_SERIALIZATION is not defined * - expose reading/writing PNG metadata to CFFI and MinimalLib - add relevant CFFI and MinimalLib unit tests - add RDK_USE_BOOST_PROGRAM_OPTIONS CMake option - enable using standalone zlib in the absence of boost::iostreams for parsing PNG files - enable linking against maeparser in the absence of boost::iostreams also on Windows - enable building RDKit in the absence of boost::program_options * add check for boost::program_options * change size_t into std::uint64_t in SearchResults for consistency with doTheSearch() which uses std::uint64_t * change size_t into std::uint64_t in SearchResults for consistency with SynthonSpaceSearcher::doTheSearch() * set CMake policy to allow YAeHMOP to require a version which is not actually supported * reverted External/YAeHMOP/CMakeLists.txt to master version * check if Windows build will work * fix build * configure zlib install location * build zlib dependency * include zlib header directory * explicitly set PropertyFlags.AllProps so the test does not fail on static builds --------- Co-authored-by: ptosco --- Code/GraphMol/FileParsers/CMakeLists.txt | 16 +- Code/GraphMol/FileParsers/PNGParser.cpp | 171 +++-- Code/GraphMol/FileParsers/PNGParser.h | 64 +- .../FileParsers/file_parsers_catch.cpp | 211 ++++++ .../RGroupDecomposition/CMakeLists.txt | 4 +- Code/GraphMol/Wrap/rdmolfiles.cpp | 142 +++- Code/GraphMol/Wrap/rough_test.py | 114 ++- Code/MinimalLib/JSONParsers.cpp | 19 + Code/MinimalLib/JSONParsers.h | 2 + Code/MinimalLib/cffi_test.c | 677 ++++++++++++++++++ Code/MinimalLib/cffiwrapper.cpp | 109 +++ Code/MinimalLib/cffiwrapper.h | 17 + Code/MinimalLib/common.h | 104 +++ Code/MinimalLib/docker/Dockerfile_1_deps | 13 + .../docker/Dockerfile_3_rdkit_build | 2 + Code/MinimalLib/jswrapper.cpp | 70 ++ Code/MinimalLib/minilib.cpp | 49 ++ Code/MinimalLib/minilib.h | 24 + Code/MinimalLib/tests/tests.js | 412 ++++++++++- 19 files changed, 2133 insertions(+), 87 deletions(-) diff --git a/Code/GraphMol/FileParsers/CMakeLists.txt b/Code/GraphMol/FileParsers/CMakeLists.txt index e282e2c97..4fda68533 100644 --- a/Code/GraphMol/FileParsers/CMakeLists.txt +++ b/Code/GraphMol/FileParsers/CMakeLists.txt @@ -1,3 +1,12 @@ + + +if(NOT RDK_USE_BOOST_IOSTREAMS) + find_package(ZLIB) + set(STANDALONE_ZLIB_LIBRARY ${ZLIB_LIBRARIES}) + include_directories(${ZLIB_INCLUDE_DIRS}) + add_definitions("-DRDK_USE_STANDALONE_ZLIB") +endif(NOT RDK_USE_BOOST_IOSTREAMS) + if(RDK_BUILD_MAEPARSER_SUPPORT) set(MAEPARSER_LIB maeparser) set(maesupplier MaeMolSupplier.cpp MaeWriter.cpp) @@ -34,8 +43,11 @@ rdkit_library(FileParsers MultithreadedMolSupplier.cpp MultithreadedSmilesMolSupplier.cpp MultithreadedSDMolSupplier.cpp - LINK_LIBRARIES GenericGroups Depictor SmilesParse ChemTransforms GraphMol SubstructMatch ${MAEPARSER_LIB} ${RDK_CHEMDRAW_LIBS}) - + LINK_LIBRARIES GenericGroups Depictor SmilesParse ChemTransforms GraphMol SubstructMatch ${MAEPARSER_LIB} ${RDK_CHEMDRAW_LIBS} ${STANDALONE_ZLIB_LIBRARY} +) +if(STANDALONE_ZLIB_LIBRARY) + target_include_directories(FileParsers PRIVATE ${ZLIB_INCLUDE_DIRS}) +endif(STANDALONE_ZLIB_LIBRARY) target_compile_definitions(FileParsers PRIVATE RDKIT_FILEPARSERS_BUILD) rdkit_headers(CDXMLParser.h diff --git a/Code/GraphMol/FileParsers/PNGParser.cpp b/Code/GraphMol/FileParsers/PNGParser.cpp index 4019f15ca..b700d4c87 100644 --- a/Code/GraphMol/FileParsers/PNGParser.cpp +++ b/Code/GraphMol/FileParsers/PNGParser.cpp @@ -13,19 +13,29 @@ // https://github.com/openbabel/openbabel/blob/master/src/formats/pngformat.cpp #include "PNGParser.h" -#include #include #include +#include #include -#include -#include #include "FileParsers.h" +#include +#include +#include +#include +#include #ifdef RDK_USE_BOOST_IOSTREAMS #include #include #include #endif +#include +#if !defined(RDK_USE_BOOST_IOSTREAMS) && defined(RDK_USE_STANDALONE_ZLIB) +#ifndef ZLIB_CONST +#define ZLIB_CONST +#endif +#include +#endif namespace RDKit { @@ -48,7 +58,7 @@ bool checkPNGHeader(std::istream &inStream) { return true; } -#ifdef RDK_USE_BOOST_IOSTREAMS +#if defined(RDK_USE_BOOST_IOSTREAMS) std::string uncompressString(const std::string &ztext) { std::stringstream compressed(ztext); std::stringstream uncompressed; @@ -67,7 +77,75 @@ std::string compressString(const std::string &text) { boost::iostreams::copy(bioOutstream, compressed); return compressed.str(); } +#elif defined(RDK_USE_STANDALONE_ZLIB) +std::string zlibActOnString(const std::string &inText, bool compress) { + static const char *deflatePrefix = "de"; + static const char *inflatePrefix = "in"; + const char *zlibActionPrefix; + int (*zlibAction)(z_streamp, int); + int (*zlibEnd)(z_streamp); + int zRetCode; + std::string res; + constexpr uInt BUF_SIZE = 65536; + std::vector outBuf(BUF_SIZE); + z_stream zs{}; + zs.next_in = reinterpret_cast(inText.c_str()); + zs.avail_in = static_cast(inText.size()); + zs.next_out = reinterpret_cast(outBuf.data()); + zs.avail_out = static_cast(outBuf.size()); + if (compress) { + zlibActionPrefix = deflatePrefix; + zlibAction = deflate; + zlibEnd = deflateEnd; + zRetCode = deflateInit(&zs, Z_DEFAULT_COMPRESSION); + } else { + zlibActionPrefix = inflatePrefix; + zlibAction = inflate; + zlibEnd = inflateEnd; + zRetCode = inflateInit(&zs); + } + if (zRetCode != Z_OK) { + BOOST_LOG(rdWarningLog) + << "Failed to initialize zlib stream (" << zRetCode << ")"; + if (zs.msg) { + BOOST_LOG(rdWarningLog) << ": " << zs.msg; + } + BOOST_LOG(rdWarningLog) << "." << std::endl; + zlibEnd(&zs); + return ""; + } + while (zRetCode == Z_OK) { + zRetCode = zlibAction(&zs, zs.avail_in ? Z_NO_FLUSH : Z_FINISH); + if (zRetCode != Z_OK && zRetCode != Z_STREAM_END) { + BOOST_LOG(rdWarningLog) << "Failed to " << zlibActionPrefix + << "flate zlib stream (" << zRetCode << ")"; + if (zs.msg) { + BOOST_LOG(rdWarningLog) << ": " << zs.msg; + } + BOOST_LOG(rdWarningLog) << "." << std::endl; + zlibEnd(&zs); + return ""; + } + if (!zs.avail_out) { + res += std::string(outBuf.data(), BUF_SIZE); + zs.next_out = reinterpret_cast(outBuf.data()); + zs.avail_out = BUF_SIZE; + } + } + auto residual = zs.total_out - res.size(); + if (residual) { + res += std::string(outBuf.data(), residual); + } + zlibEnd(&zs); + return res; +} +std::string uncompressString(const std::string &ztext) { + return zlibActOnString(ztext, false); +} +std::string compressString(const std::string &text) { + return zlibActOnString(text, true); +} #endif } // namespace @@ -99,7 +177,7 @@ std::vector> PNGStreamToMetadata( bytes[3] == 'D') { break; } -#ifndef RDK_USE_BOOST_IOSTREAMS +#if !defined(RDK_USE_BOOST_IOSTREAMS) && !defined(RDK_USE_STANDALONE_ZLIB) bool alreadyWarned = false; #endif if (blockLen > 0 && @@ -120,8 +198,8 @@ std::vector> PNGStreamToMetadata( if (inStream.fail()) { throw FileParseException("error when reading from PNG"); } - } else if (bytes[0] == 'z') { -#ifdef RDK_USE_BOOST_IOSTREAMS + } else { +#if defined(RDK_USE_BOOST_IOSTREAMS) || defined(RDK_USE_STANDALONE_ZLIB) value.resize(dataLen); inStream.read(&value.front(), dataLen); if (inStream.fail()) { @@ -138,11 +216,9 @@ std::vector> PNGStreamToMetadata( alreadyWarned = true; } #endif - } else { - CHECK_INVARIANT(0, "impossible value"); } if (!value.empty()) { - res.push_back(std::make_pair(key, value)); + res.emplace_back(key, value); } } inStream.seekg(beginBlock); @@ -156,7 +232,7 @@ std::string addMetadataToPNGStream( std::istream &inStream, const std::vector> &metadata, bool compressed) { -#ifndef RDK_USE_BOOST_IOSTREAMS +#if !defined(RDK_USE_BOOST_IOSTREAMS) && !defined(RDK_USE_STANDALONE_ZLIB) compressed = false; #endif // confirm that it's a PNG file: @@ -197,22 +273,22 @@ std::string addMetadataToPNGStream( } // write out the metadata: - for (const auto &pr : metadata) { + for (const auto &[key, value] : metadata) { std::stringstream blk; if (!compressed) { blk.write("tEXt", 4); // write the name along with a zero - blk.write(pr.first.c_str(), pr.first.size() + 1); - blk.write(pr.second.c_str(), pr.second.size()); + blk.write(key.c_str(), key.size() + 1); + blk.write(value.c_str(), value.size()); } else { -#ifdef RDK_USE_BOOST_IOSTREAMS +#if defined(RDK_USE_BOOST_IOSTREAMS) || defined(RDK_USE_STANDALONE_ZLIB) blk.write("zTXt", 4); // write the name along with a zero - blk.write(pr.first.c_str(), pr.first.size() + 1); + blk.write(key.c_str(), key.size() + 1); // write the compressed data // first a zero for the "compression method": blk.write("\0", 1); - auto dest = compressString(pr.second); + auto dest = compressString(value); blk.write((const char *)dest.c_str(), dest.size()); #else // we shouldn't get here since we disabled compressed at the beginning of @@ -247,24 +323,32 @@ std::string addMetadataToPNGStream( } std::string addMolToPNGStream(const ROMol &mol, std::istream &iStream, - bool includePkl, bool includeSmiles, - bool includeMol) { + const PNGMetadataParams ¶ms) { std::vector> metadata; - if (includePkl) { + if (params.includePkl) { std::string pkl; - MolPickler::pickleMol(mol, pkl); - metadata.push_back(std::make_pair(augmentTagName(PNGData::pklTag), pkl)); + MolPickler::pickleMol(mol, pkl, params.propertyFlags); + metadata.emplace_back(augmentTagName(PNGData::pklTag), pkl); } - if (includeSmiles) { - std::string smi = MolToCXSmiles(mol); - metadata.push_back(std::make_pair(augmentTagName(PNGData::smilesTag), smi)); + if (params.includeSmiles) { + std::string smi = + MolToCXSmiles(mol, params.smilesWriteParams, params.cxSmilesFlags, + params.restoreBondDirs); + metadata.emplace_back(augmentTagName(PNGData::smilesTag), smi); } - if (includeMol) { + if (params.includeMol) { + std::unique_ptr molOrigWedging; + const ROMol *molRef = &mol; bool includeStereo = true; int confId = -1; bool kekulize = false; - std::string mb = MolToMolBlock(mol, includeStereo, confId, kekulize); - metadata.push_back(std::make_pair(augmentTagName(PNGData::molTag), mb)); + if (params.restoreBondDirs == RestoreBondDirOptionTrue) { + molOrigWedging.reset(new ROMol(mol)); + Chirality::reapplyMolBlockWedging(*molOrigWedging); + molRef = molOrigWedging.get(); + } + std::string mb = MolToMolBlock(*molRef, includeStereo, confId, kekulize); + metadata.emplace_back(augmentTagName(PNGData::molTag), mb); } return addMetadataToPNGStream(iStream, metadata); }; @@ -274,15 +358,15 @@ ROMol *PNGStreamToMol(std::istream &inStream, ROMol *res = nullptr; auto metadata = PNGStreamToMetadata(inStream); bool formatFound = false; - for (const auto &pr : metadata) { - if (boost::starts_with(pr.first, PNGData::pklTag)) { - res = new ROMol(pr.second); + for (const auto &[key, value] : metadata) { + if (boost::starts_with(key, PNGData::pklTag)) { + res = new ROMol(value); formatFound = true; - } else if (boost::starts_with(pr.first, PNGData::smilesTag)) { - res = SmilesToMol(pr.second, params); + } else if (boost::starts_with(key, PNGData::smilesTag)) { + res = SmilesToMol(value, params); formatFound = true; - } else if (boost::starts_with(pr.first, PNGData::molTag)) { - res = MolBlockToMol(pr.second, params.sanitize, params.removeHs); + } else if (boost::starts_with(key, PNGData::molTag)) { + res = MolBlockToMol(value, params.sanitize, params.removeHs); formatFound = true; } if (formatFound) { @@ -300,17 +384,16 @@ std::vector> PNGStreamToMols( const SmilesParserParams ¶ms) { std::vector> res; auto metadata = PNGStreamToMetadata(inStream); - for (const auto &pr : metadata) { - if (!boost::starts_with(pr.first, tagToUse)) { + for (const auto &[key, value] : metadata) { + if (!boost::starts_with(key, tagToUse)) { continue; } - if (boost::starts_with(pr.first, PNGData::pklTag)) { - res.emplace_back(new ROMol(pr.second)); - } else if (boost::starts_with(pr.first, PNGData::smilesTag)) { - res.emplace_back(SmilesToMol(pr.second, params)); - } else if (boost::starts_with(pr.first, PNGData::molTag)) { - res.emplace_back( - MolBlockToMol(pr.second, params.sanitize, params.removeHs)); + if (boost::starts_with(key, PNGData::pklTag)) { + res.emplace_back(new ROMol(value)); + } else if (boost::starts_with(key, PNGData::smilesTag)) { + res.emplace_back(SmilesToMol(value, params)); + } else if (boost::starts_with(key, PNGData::molTag)) { + res.emplace_back(MolBlockToMol(value, params.sanitize, params.removeHs)); } } return res; diff --git a/Code/GraphMol/FileParsers/PNGParser.h b/Code/GraphMol/FileParsers/PNGParser.h index f0bf69d6e..93f519bba 100644 --- a/Code/GraphMol/FileParsers/PNGParser.h +++ b/Code/GraphMol/FileParsers/PNGParser.h @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -32,6 +33,24 @@ RDKIT_FILEPARSERS_EXPORT extern const std::string molTag; RDKIT_FILEPARSERS_EXPORT extern const std::string pklTag; } // namespace PNGData +struct RDKIT_FILEPARSERS_EXPORT PNGMetadataParams { + //! include molecule pickle + bool includePkl = true; + //! include CXSMILES for the molecule + bool includeSmiles = true; + //! include molblock for the molecule + bool includeMol = false; + //! choose properties to be included in the pickle + unsigned int propertyFlags = MolPickler::getDefaultPickleProperties(); + //! choose SmilesWriteParams for the CXSMILES string + SmilesWriteParams smilesWriteParams = SmilesWriteParams(); + //! choose CXSMILES fields to be included in the CXSMILES string + std::uint32_t cxSmilesFlags = SmilesWrite::CXSmilesFields::CX_ALL; + //! choose what to do with bond dirs in the CXSMILES string + RestoreBondDirOption restoreBondDirs = + RestoreBondDirOption::RestoreBondDirOptionClear; +}; + //! \name metadata to/from PNG //! @{ @@ -158,6 +177,18 @@ inline std::vector> PNGStringToMols( return PNGStreamToMols(inStream, tagToUse, params); } +//! \brief adds metadata for an ROMol to the data from a PNG stream. +//! The modified PNG data is returned. +/*! + + \param mol the molecule to add + \param iStream the stream to read from + \param params instance of PNGMetadataParams + +*/ +RDKIT_FILEPARSERS_EXPORT std::string addMolToPNGStream( + const ROMol &mol, std::istream &iStream, const PNGMetadataParams ¶ms); + //! \brief adds metadata for an ROMol to the data from a PNG stream. //! The modified PNG data is returned. /*! @@ -169,9 +200,16 @@ inline std::vector> PNGStringToMols( \param includeMol include a mol block for the molecule */ -RDKIT_FILEPARSERS_EXPORT std::string addMolToPNGStream( - const ROMol &mol, std::istream &iStream, bool includePkl = true, - bool includeSmiles = true, bool includeMol = false); +inline std::string addMolToPNGStream(const ROMol &mol, std::istream &iStream, + bool includePkl = true, + bool includeSmiles = true, + bool includeMol = false) { + PNGMetadataParams params; + params.includePkl = includePkl; + params.includeSmiles = includeSmiles; + params.includeMol = includeMol; + return addMolToPNGStream(mol, iStream, params); +} //! \brief adds metadata for an ROMol to a PNG string. //! The modified PNG data is returned. @@ -185,6 +223,17 @@ inline std::string addMolToPNGString(const ROMol &mol, return addMolToPNGStream(mol, inStream, includePkl, includeSmiles, includeMol); } + +//! \brief adds metadata for an ROMol to a PNG string. +//! The modified PNG data is returned. +//! See \c addMolToPNGStream() for more details. +inline std::string addMolToPNGString(const ROMol &mol, + const std::string &pngString, + const PNGMetadataParams ¶ms) { + std::stringstream inStream(pngString); + return addMolToPNGStream(mol, inStream, params); +} + //! \brief adds metadata for an ROMol to the data from a PNG file. //! The modified PNG data is returned. //! See \c addMolToPNGStream() for more details. @@ -196,6 +245,15 @@ inline std::string addMolToPNGFile(const ROMol &mol, const std::string &fname, return addMolToPNGStream(mol, inStream, includePkl, includeSmiles, includeMol); } + +//! \brief adds metadata for an ROMol to the data from a PNG file. +//! The modified PNG data is returned. +//! See \c addMolToPNGStream() for more details. +inline std::string addMolToPNGFile(const ROMol &mol, const std::string &fname, + const PNGMetadataParams ¶ms) { + std::ifstream inStream(fname.c_str(), std::ios::binary); + return addMolToPNGStream(mol, inStream, params); +} //! @} } // namespace RDKit diff --git a/Code/GraphMol/FileParsers/file_parsers_catch.cpp b/Code/GraphMol/FileParsers/file_parsers_catch.cpp index 1116290d9..4424467f8 100644 --- a/Code/GraphMol/FileParsers/file_parsers_catch.cpp +++ b/Code/GraphMol/FileParsers/file_parsers_catch.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -2753,6 +2754,216 @@ TEST_CASE("write molecule to PNG", "[writer][PNG]") { CHECK(mol->getNumAtoms() == 29); CHECK(mol->getNumConformers() == 1); } + SECTION("use PKL") { + std::string fname = + rdbase + + "/Code/GraphMol/FileParsers/test_data/colchicine.no_metadata.png"; + auto colchicine = + "COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1[C@@H](NC(C)=O)CC2"_smiles; + REQUIRE(colchicine); + RDDepict::compute2DCoords(*colchicine); + CHECK(colchicine->getNumConformers() == 1); + static const std::string propertyName("property"); + static const std::string propertyValue("value"); + colchicine->setProp(propertyName, propertyValue); + PNGMetadataParams params; + params.includePkl = true; + params.includeSmiles = false; + params.includeMol = false; + { + std::ifstream strm(fname, std::ios::in | std::ios::binary); + params.propertyFlags = PicklerOps::PropertyPickleOptions::NoProps; + auto pngString = addMolToPNGStream(*colchicine, strm, params); + // read it back out + std::unique_ptr mol(PNGStringToMol(pngString)); + REQUIRE(mol); + CHECK(mol->getNumAtoms() == 29); + CHECK(mol->getNumConformers() == 1); + CHECK(!mol->hasProp(propertyName)); + } + { + std::ifstream strm(fname, std::ios::in | std::ios::binary); + params.propertyFlags = PicklerOps::PropertyPickleOptions::AllProps; + auto pngString = addMolToPNGStream(*colchicine, strm, params); + // read it back out + std::unique_ptr mol(PNGStringToMol(pngString)); + REQUIRE(mol); + CHECK(mol->getNumAtoms() == 29); + CHECK(mol->getNumConformers() == 1); + CHECK(mol->hasProp(propertyName)); + CHECK(mol->getProp(propertyName) == propertyValue); + } + { + std::ifstream strm(fname, std::ios::in | std::ios::binary); + params.includePkl = false; + params.includeSmiles = true; + params.cxSmilesFlags = SmilesWrite::CXSmilesFields::CX_ALL_BUT_COORDS; + auto pngString = addMolToPNGStream(*colchicine, strm, params); + // read it back out + std::unique_ptr mol(PNGStringToMol(pngString)); + REQUIRE(mol); + CHECK(mol->getNumAtoms() == 29); + CHECK(mol->getNumConformers() == 0); + CHECK(!mol->hasProp(propertyName)); + } + } + SECTION("use original wedging") { + std::string fname = + rdbase + + "/Code/GraphMol/FileParsers/test_data/colchicine.no_metadata.png"; + auto colchicineUnusualWedging = + R"CTAB( + RDKit 2D + + 29 31 0 0 0 0 0 0 0 0999 V2000 + 6.4602 1.0300 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.3062 1.9883 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 3.8993 1.4680 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.7453 2.4262 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.3384 1.9059 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.0856 0.4273 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.2396 -0.5309 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.9868 -2.0094 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 3.1408 -2.9677 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6465 -0.0106 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.8005 -0.9688 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5477 -2.4474 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.2280 -0.2968 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.1857 -1.7387 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.6836 -2.9611 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.1813 -3.0436 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.7569 -4.4288 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + -4.2442 -4.6230 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.1797 -1.9240 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -4.6215 -2.3378 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + -2.9268 -0.4455 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.6132 0.2787 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.0269 1.7205 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.5055 1.9733 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + -4.0258 3.3802 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -5.5043 3.6330 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.0675 4.5342 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + -1.1576 2.9429 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.3401 3.0254 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 + 2 3 1 0 + 3 4 2 0 + 4 5 1 0 + 5 6 2 0 + 6 7 1 0 + 7 8 1 0 + 8 9 1 0 + 7 10 2 0 + 10 11 1 0 + 11 12 1 0 + 6 13 1 0 + 13 14 2 0 + 14 15 1 0 + 15 16 2 0 + 16 17 1 0 + 17 18 1 0 + 16 19 1 0 + 19 20 2 0 + 19 21 1 0 + 21 22 2 0 + 23 22 1 1 + 23 24 1 0 + 24 25 1 0 + 25 26 1 0 + 25 27 2 0 + 23 28 1 0 + 28 29 1 0 + 10 3 1 0 + 22 13 1 0 + 29 5 1 0 +M END +)CTAB"_ctab; + REQUIRE(colchicineUnusualWedging); + CHECK(colchicineUnusualWedging->getNumConformers() == 1); + SmilesWriteParams ps; + CHECK(MolToCXSmiles(*colchicineUnusualWedging).find("wU:22.24|") != + std::string::npos); + CHECK(MolToCXSmiles(*colchicineUnusualWedging, ps, SmilesWrite::CX_ALL, + RestoreBondDirOptionTrue) + .find("wU:22.23|") != std::string::npos); + PNGMetadataParams params; + params.includePkl = true; + params.includeSmiles = true; + params.includeMol = true; + params.propertyFlags = PicklerOps::PropertyPickleOptions::AtomProps | + PicklerOps::PropertyPickleOptions::BondProps; + { + std::ifstream strm(fname, std::ios::in | std::ios::binary); + auto pngString = + addMolToPNGStream(*colchicineUnusualWedging, strm, params); + // read it back out + std::unique_ptr mol(PNGStringToMol(pngString)); + REQUIRE(mol); + CHECK(mol->getNumAtoms() == 29); + CHECK(mol->getNumConformers() == 1); + CHECK( + MolToCXSmiles(*mol, ps, SmilesWrite::CX_ALL, RestoreBondDirOptionTrue) + .find("wU:22.23|") != std::string::npos); + auto metadata = PNGStringToMetadata(pngString); + auto smilesFound = false; + auto ctabFound = false; + auto pklFound = false; + for (const auto &[key, value] : metadata) { + if (key.substr(0, 6) == "SMILES") { + smilesFound = true; + CHECK(value.find("wU:22.24|") != std::string::npos); + } else if (key.substr(0, 3) == "MOL") { + ctabFound = true; + CHECK(value.find(" 23 24 1 1") != std::string::npos); + } else if (key.substr(0, 8) == "rdkitPKL") { + pklFound = true; + RWMol molFromPkl(value); + CHECK(MolToMolBlock(molFromPkl).find(" 23 24 1 1") != + std::string::npos); + Chirality::reapplyMolBlockWedging(molFromPkl); + CHECK(MolToMolBlock(molFromPkl).find(" 23 22 1 1") != + std::string::npos); + } + } + CHECK((smilesFound && ctabFound && pklFound)); + } + { + params.restoreBondDirs = RestoreBondDirOptionTrue; + std::ifstream strm(fname, std::ios::in | std::ios::binary); + auto pngString = + addMolToPNGStream(*colchicineUnusualWedging, strm, params); + // read it back out + std::unique_ptr mol(PNGStringToMol(pngString)); + REQUIRE(mol); + CHECK(mol->getNumAtoms() == 29); + CHECK(mol->getNumConformers() == 1); + CHECK( + MolToCXSmiles(*mol, ps, SmilesWrite::CX_ALL, RestoreBondDirOptionTrue) + .find("wU:22.23|") != std::string::npos); + auto metadata = PNGStringToMetadata(pngString); + auto smilesFound = false; + auto ctabFound = false; + auto pklFound = false; + for (const auto &[key, value] : metadata) { + if (key.substr(0, 6) == "SMILES") { + smilesFound = true; + CHECK(value.find("wU:22.23|") != std::string::npos); + } else if (key.substr(0, 3) == "MOL") { + ctabFound = true; + CHECK(value.find(" 23 22 1 1") != std::string::npos); + } else if (key.substr(0, 8) == "rdkitPKL") { + pklFound = true; + RWMol molFromPkl(value); + CHECK(MolToMolBlock(molFromPkl).find(" 23 24 1 1") != + std::string::npos); + Chirality::reapplyMolBlockWedging(molFromPkl); + CHECK(MolToMolBlock(molFromPkl).find(" 23 22 1 1") != + std::string::npos); + } + } + CHECK((smilesFound && ctabFound && pklFound)); + } + } } TEST_CASE("multiple molecules in the PNG", "[writer][PNG]") { std::string rdbase = getenv("RDBASE"); diff --git a/Code/GraphMol/RGroupDecomposition/CMakeLists.txt b/Code/GraphMol/RGroupDecomposition/CMakeLists.txt index 1fcf233c8..099b97f22 100644 --- a/Code/GraphMol/RGroupDecomposition/CMakeLists.txt +++ b/Code/GraphMol/RGroupDecomposition/CMakeLists.txt @@ -25,8 +25,8 @@ rdkit_test(testRGroupDecompInternals testRGroupInternals.cpp rdkit_catch_test(rgroupCatchTests catch_rgd.cpp LINK_LIBRARIES RGroupDecomposition) -find_package(Boost ${RDK_BOOST_VERSION} COMPONENTS program_options CONFIG) -if(RDK_BUILD_CPP_TESTS AND Boost_FOUND) +if (RDK_BUILD_CPP_TESTS AND NOT RDK_BUILD_CFFI_LIB AND NOT RDK_BUILD_MINIMAL_LIB) + find_package(Boost ${RDK_BOOST_VERSION} COMPONENTS program_options CONFIG) add_executable(gaExample GaExample.cpp) if(NOT Boost_USE_STATIC_LIBS) target_compile_definitions(gaExample PUBLIC -DBOOST_PROGRAM_OPTIONS_DYN_LINK) diff --git a/Code/GraphMol/Wrap/rdmolfiles.cpp b/Code/GraphMol/Wrap/rdmolfiles.cpp index 9304d929d..7145fb01a 100644 --- a/Code/GraphMol/Wrap/rdmolfiles.cpp +++ b/Code/GraphMol/Wrap/rdmolfiles.cpp @@ -593,12 +593,34 @@ ROMol *MolFromPNGString(python::object png, python::object pyParams) { return newM; } +python::object addMolToPNGFileHelperParams(const ROMol &mol, + python::object fname, + const PNGMetadataParams ¶ms) { + std::string cstr = python::extract(fname); + + auto res = addMolToPNGFile(mol, cstr, params); + + python::object retval = python::object( + python::handle<>(PyBytes_FromStringAndSize(res.c_str(), res.length()))); + return retval; +} + python::object addMolToPNGFileHelper(const ROMol &mol, python::object fname, bool includePkl, bool includeSmiles, bool includeMol) { - std::string cstr = python::extract(fname); + PNGMetadataParams params; + params.includePkl = includePkl; + params.includeSmiles = includeSmiles; + params.includeMol = includeMol; + return addMolToPNGFileHelperParams(mol, fname, params); +} - auto res = addMolToPNGFile(mol, cstr, includePkl, includeSmiles, includeMol); +python::object addMolToPNGStringHelperParams(const ROMol &mol, + python::object png, + const PNGMetadataParams ¶ms) { + std::string cstr = python::extract(png); + + auto res = addMolToPNGString(mol, cstr, params); python::object retval = python::object( python::handle<>(PyBytes_FromStringAndSize(res.c_str(), res.length()))); @@ -608,14 +630,11 @@ python::object addMolToPNGFileHelper(const ROMol &mol, python::object fname, python::object addMolToPNGStringHelper(const ROMol &mol, python::object png, bool includePkl, bool includeSmiles, bool includeMol) { - std::string cstr = python::extract(png); - - auto res = - addMolToPNGString(mol, cstr, includePkl, includeSmiles, includeMol); - - python::object retval = python::object( - python::handle<>(PyBytes_FromStringAndSize(res.c_str(), res.length()))); - return retval; + PNGMetadataParams params; + params.includePkl = includePkl; + params.includeSmiles = includeSmiles; + params.includeMol = includeMol; + return addMolToPNGStringHelperParams(mol, png, params); } python::object addMetadataToPNGFileHelper(python::dict pymetadata, @@ -734,31 +753,41 @@ python::tuple MolsFromCDXML(python::object cdxml, bool sanitize, } namespace { -python::dict translateMetadata( - const std::vector> &metadata) { - python::dict res; - for (const auto &pr : metadata) { +PyObject *translateMetadata( + const std::vector> &metadata, + bool asList) { + std::unique_ptr resAsDict; + std::unique_ptr resAsList; + if (asList) { + resAsList.reset(new python::list()); + } else { + resAsDict.reset(new python::dict()); + } + for (const auto &[key, value] : metadata) { // keys are safe to extract: - std::string key = pr.first; // but values may include binary, so we convert them directly to bytes: python::object val = python::object(python::handle<>( - PyBytes_FromStringAndSize(pr.second.c_str(), pr.second.length()))); - res[key] = val; + PyBytes_FromStringAndSize(value.c_str(), value.length()))); + if (asList) { + resAsList->append(python::make_tuple(key, val)); + } else { + (*resAsDict)[key] = val; + } } - return res; + return (asList ? resAsList.release()->ptr() : resAsDict.release()->ptr()); } } // namespace -python::dict MetadataFromPNGFile(python::object fname) { +PyObject *MetadataFromPNGFile(python::object fname, bool asList) { std::string cstr = python::extract(fname); auto metadata = PNGFileToMetadata(cstr); - return translateMetadata(metadata); + return translateMetadata(metadata, asList); } -python::dict MetadataFromPNGString(python::object png) { +PyObject *MetadataFromPNGString(python::object png, bool asList) { std::string cstr = python::extract(png); auto metadata = PNGStringToMetadata(cstr); - return translateMetadata(metadata); + return translateMetadata(metadata, asList); } void CanonicalizeEnhancedStereo(ROMol &mol) { @@ -2455,6 +2484,28 @@ BOOST_PYTHON_MODULE(rdmolfiles) { "returns a list of SMILES generated using the randomSmiles algorithm"); #ifdef RDK_USE_BOOST_IOSTREAMS + python::class_( + "PNGMetadataParams", + "Parameters controlling metadata included in PNG images") + .def_readwrite("includePkl", &RDKit::PNGMetadataParams::includePkl, + "toggles inclusion of molecule pickle (default=True)") + .def_readwrite("includeSmiles", &RDKit::PNGMetadataParams::includeSmiles, + "toggles inclusion of molecule CXSMILES (default=True)") + .def_readwrite("includeMol", &RDKit::PNGMetadataParams::includeMol, + "toggles inclusion of molecule molblock (default=False)") + .def_readwrite( + "propertyFlags", &RDKit::PNGMetadataParams::propertyFlags, + "choose properties to be included in the pickle (default=rdkit.Chem.rdchem.PropertyPickleOptions.NoProps)") + .def_readwrite( + "smilesWriteParams", &RDKit::PNGMetadataParams::smilesWriteParams, + "choose SmilesWriteParams for the CXSMILES string (default=rdkit.Chem.rdmolfiles.SmilesWriteParams())") + .def_readwrite( + "cxSmilesFlags", &RDKit::PNGMetadataParams::cxSmilesFlags, + "choose CXSMILES fields to be included in the CXSMILES string (default=rdkit.Chem.rdmolfiles.CXSmilesFields.CX_ALL)") + .def_readwrite( + "restoreBondDirs", &RDKit::PNGMetadataParams::restoreBondDirs, + "choose what to do with bond dirs in the CXSMILES string (default=rdkit.Chem.rdmolfiles.RestoreBondDirOption.RestoreBondDirOptionClear)"); + docString = R"DOC(Construct a molecule from metadata in a PNG string. @@ -2573,6 +2624,24 @@ BOOST_PYTHON_MODULE(rdmolfiles) { python::arg("includeMol") = false), docString.c_str()); + docString = + R"DOC(Adds molecular metadata to PNG data read from a file. + + ARGUMENTS: + + - mol: the molecule + + - filename: the PNG filename + + - params: an instance of PNGMetadataParams + + RETURNS: + the updated PNG data)DOC"; + python::def( + "MolMetadataToPNGFile", addMolToPNGFileHelperParams, + (python::arg("mol"), python::arg("filename"), python::arg("params")), + docString.c_str()); + docString = R"DOC(Adds molecular metadata to a PNG string. @@ -2596,6 +2665,23 @@ BOOST_PYTHON_MODULE(rdmolfiles) { python::arg("includeSmiles") = true, python::arg("includeMol") = false), docString.c_str()); + docString = + R"DOC(Adds molecular metadata to a PNG string. + + ARGUMENTS: + + - mol: the molecule + + - png: the PNG string + + - params: an instance of PNGMetadataParams + + RETURNS: + the updated PNG data)DOC"; + python::def("MolMetadataToPNGString", addMolToPNGStringHelperParams, + (python::arg("mol"), python::arg("png"), python::arg("params")), + docString.c_str()); + docString = R"DOC(Adds metadata to PNG data read from a file. @@ -2628,14 +2714,18 @@ BOOST_PYTHON_MODULE(rdmolfiles) { (python::arg("metadata"), python::arg("png")), docString.c_str()); python::def("MetadataFromPNGFile", MetadataFromPNGFile, - (python::arg("filename")), + (python::arg("filename"), python::arg("asList") = false), "Returns a dict with all metadata from the PNG file. Keys are " - "strings, values are bytes."); + "strings, values are bytes. " + "If asList is True, a list of (key, value) tuples is returned; " + "this enables retrieving multiple values sharing the same key."); python::def("MetadataFromPNGString", MetadataFromPNGString, - (python::arg("png")), + (python::arg("png"), python::arg("asList") = false), "Returns a dict with all metadata from the PNG string. Keys are " - "strings, values are bytes."); + "strings, values are bytes. " + "If asList is True, a list of (key, value) tuples is returned; " + "this enables retrieving multiple values sharing the same key."); #endif /******************************************************** * MolSupplier stuff diff --git a/Code/GraphMol/Wrap/rough_test.py b/Code/GraphMol/Wrap/rough_test.py index e82dc8a27..1b018e388 100644 --- a/Code/GraphMol/Wrap/rough_test.py +++ b/Code/GraphMol/Wrap/rough_test.py @@ -6722,19 +6722,100 @@ M END with open(fileN, 'rb') as inf: d = inf.read() - mol = Chem.MolFromSmiles("COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1[C@@H](NC(C)=O)CC2") + mol = Chem.MolFromSmiles('COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1[C@@H](NC(C)=O)CC2') + mol.SetProp('property', 'value') self.assertIsNotNone(mol) self.assertEqual(mol.GetNumAtoms(), 29) - nd = Chem.MolMetadataToPNGString(mol, d) - mol = Chem.MolFromPNGString(nd) - self.assertIsNotNone(mol) - self.assertEqual(mol.GetNumAtoms(), 29) + params = Chem.PNGMetadataParams() + params.propertyFlags = Chem.PropertyPickleOptions.AllProps + nd = Chem.MolMetadataToPNGString(mol, d, params) + molFromPNG = Chem.MolFromPNGString(nd) + self.assertIsNotNone(molFromPNG) + self.assertEqual(molFromPNG.GetNumAtoms(), 29) + self.assertTrue(molFromPNG.HasProp('property')) + rdkit.Chem.rdDepictor.Compute2DCoords(mol) + self.assertEqual(mol.GetNumConformers(), 1) - nd = Chem.MolMetadataToPNGFile(mol, fileN) - mol = Chem.MolFromPNGString(nd) - self.assertIsNotNone(mol) - self.assertEqual(mol.GetNumAtoms(), 29) + nd = Chem.MolMetadataToPNGString(mol, d, includePkl=False) + molFromPNG = Chem.MolFromPNGString(nd) + self.assertIsNotNone(molFromPNG) + self.assertEqual(molFromPNG.GetNumAtoms(), 29) + self.assertEqual(molFromPNG.GetNumConformers(), 1) + self.assertFalse(molFromPNG.HasProp('property')) + params = Chem.PNGMetadataParams() + params.includePkl = True + params.propertyFlags = Chem.PropertyPickleOptions.AllProps + nd = Chem.MolMetadataToPNGString(mol, d, params) + molFromPNG = Chem.MolFromPNGString(nd) + self.assertIsNotNone(molFromPNG) + self.assertEqual(molFromPNG.GetNumAtoms(), 29) + self.assertEqual(molFromPNG.GetNumConformers(), 1) + self.assertTrue(molFromPNG.HasProp('property')) + params = Chem.PNGMetadataParams() + params.includePkl = False + params.cxSmilesFlags = Chem.CXSmilesFields.CX_ALL_BUT_COORDS + nd = Chem.MolMetadataToPNGString(mol, d, params) + molFromPNG = Chem.MolFromPNGString(nd) + self.assertIsNotNone(molFromPNG) + self.assertEqual(molFromPNG.GetNumAtoms(), 29) + self.assertEqual(molFromPNG.GetNumConformers(), 0) + self.assertFalse(molFromPNG.HasProp('property')) + params.includePkl = True + params.propertyFlags = Chem.PropertyPickleOptions.NoProps + nd = Chem.MolMetadataToPNGString(mol, d, params) + molFromPNG = Chem.MolFromPNGString(nd) + self.assertIsNotNone(molFromPNG) + self.assertEqual(molFromPNG.GetNumAtoms(), 29) + self.assertEqual(molFromPNG.GetNumConformers(), 1) + self.assertFalse(molFromPNG.HasProp('property')) + params.propertyFlags = Chem.PropertyPickleOptions.AllProps + nd = Chem.MolMetadataToPNGString(mol, d, params) + molFromPNG = Chem.MolFromPNGString(nd) + self.assertIsNotNone(molFromPNG) + self.assertEqual(molFromPNG.GetNumAtoms(), 29) + self.assertEqual(molFromPNG.GetNumConformers(), 1) + self.assertTrue(molFromPNG.HasProp('property')) + + nd = Chem.MolMetadataToPNGFile(mol, fileN, includePkl=False) + molFromPNG = Chem.MolFromPNGString(nd) + self.assertIsNotNone(molFromPNG) + self.assertEqual(molFromPNG.GetNumAtoms(), 29) + self.assertEqual(molFromPNG.GetNumConformers(), 1) + self.assertFalse(molFromPNG.HasProp('property')) + params = Chem.PNGMetadataParams() + params.includePkl = True + params.propertyFlags = Chem.PropertyPickleOptions.AllProps + nd = Chem.MolMetadataToPNGFile(mol, fileN, params) + molFromPNG = Chem.MolFromPNGString(nd) + self.assertIsNotNone(molFromPNG) + self.assertEqual(molFromPNG.GetNumAtoms(), 29) + self.assertEqual(molFromPNG.GetNumConformers(), 1) + self.assertTrue(molFromPNG.HasProp('property')) + params = Chem.PNGMetadataParams() + params.includePkl = False + params.cxSmilesFlags = Chem.CXSmilesFields.CX_ALL_BUT_COORDS + nd = Chem.MolMetadataToPNGFile(mol, fileN, params) + molFromPNG = Chem.MolFromPNGString(nd) + self.assertIsNotNone(molFromPNG) + self.assertEqual(molFromPNG.GetNumAtoms(), 29) + self.assertEqual(molFromPNG.GetNumConformers(), 0) + self.assertFalse(molFromPNG.HasProp('property')) + params.includePkl = True + params.propertyFlags = Chem.PropertyPickleOptions.NoProps + nd = Chem.MolMetadataToPNGFile(mol, fileN, params) + molFromPNG = Chem.MolFromPNGString(nd) + self.assertIsNotNone(molFromPNG) + self.assertEqual(molFromPNG.GetNumAtoms(), 29) + self.assertEqual(molFromPNG.GetNumConformers(), 1) + self.assertFalse(molFromPNG.HasProp('property')) + params.propertyFlags = Chem.PropertyPickleOptions.AllProps + nd = Chem.MolMetadataToPNGFile(mol, fileN, params) + molFromPNG = Chem.MolFromPNGString(nd) + self.assertIsNotNone(molFromPNG) + self.assertEqual(molFromPNG.GetNumAtoms(), 29) + self.assertEqual(molFromPNG.GetNumConformers(), 1) + self.assertTrue(molFromPNG.HasProp('property')) @unittest.skipUnless(hasattr(Chem, 'MolFromPNGFile'), "RDKit not built with iostreams support") def testMolsFromPNG(self): @@ -6754,6 +6835,7 @@ M END with open(fileN, 'rb') as inf: d = inf.read() mol = Chem.MolFromPNGString(d) + self.assertIsNotNone(mol) nd = Chem.MolMetadataToPNGString(mol, d) vals = {'foo': '1', 'bar': '2'} nd = Chem.AddMetadataToPNGString(vals, nd) @@ -6763,6 +6845,20 @@ M END self.assertTrue('bar' in nvals) self.assertEqual(nvals['bar'], b'2') + with open(fileN, 'rb') as inf: + d = inf.read() + mol = Chem.MolFromPNGString(d) + self.assertIsNotNone(mol) + nd = Chem.MolMetadataToPNGString(mol, d) + nd = Chem.AddMetadataToPNGString(vals, nd) + vals2 = {'foo': '3', 'bar': '4'} + nd = Chem.AddMetadataToPNGString(vals2, nd) + nvals = Chem.MetadataFromPNGString(nd, asList=True) + self.assertEqual(len(nvals), 7) + self.assertEqual([k.split()[0] for k, _ in nvals], ['SMILES', 'rdkitPKL', 'SMILES', 'foo', 'bar', 'foo', 'bar']) + self.assertEqual([v.decode() for k, v in nvals if k == 'foo'], ['1', '3']) + self.assertEqual([v.decode() for k, v in nvals if k == 'bar'], ['2', '4']) + nd = Chem.AddMetadataToPNGFile(vals, fileN) nvals = Chem.MetadataFromPNGString(nd) self.assertTrue('foo' in nvals) diff --git a/Code/MinimalLib/JSONParsers.cpp b/Code/MinimalLib/JSONParsers.cpp index 93eb4a66d..4b310cc60 100644 --- a/Code/MinimalLib/JSONParsers.cpp +++ b/Code/MinimalLib/JSONParsers.cpp @@ -99,5 +99,24 @@ void updateRemoveHsParametersFromJSON(MolOps::RemoveHsParameters &ps, } } +void updatePNGMetadataParamsFromJSON(PNGMetadataParams ¶ms, + const char *details_json) { + if (details_json && strlen(details_json)) { + boost::property_tree::ptree pt; + std::istringstream ss; + ss.str(details_json); + boost::property_tree::read_json(ss, pt); + params.includePkl = pt.get("includePkl", params.includePkl); + params.includeSmiles = pt.get("includeSmiles", params.includeSmiles); + params.includeMol = pt.get("includeMol", params.includeMol); + updatePropertyPickleOptionsFromJSON(params.propertyFlags, details_json); + updateSmilesWriteParamsFromJSON(params.smilesWriteParams, details_json); + unsigned int restoreBondDirs = params.restoreBondDirs; + updateCXSmilesFieldsFromJSON(params.cxSmilesFlags, restoreBondDirs, + details_json); + params.restoreBondDirs = + RestoreBondDirOption::_from_integral(restoreBondDirs); + } +} } // end namespace MinimalLib } // end namespace RDKit diff --git a/Code/MinimalLib/JSONParsers.h b/Code/MinimalLib/JSONParsers.h index d8105f319..51e179ab7 100644 --- a/Code/MinimalLib/JSONParsers.h +++ b/Code/MinimalLib/JSONParsers.h @@ -25,5 +25,7 @@ void updateSanitizeFlagsFromJSON(unsigned int &sanitizeFlags, void updateRemoveHsParametersFromJSON(MolOps::RemoveHsParameters &ps, bool &sanitize, const char *details_json); +void updatePNGMetadataParamsFromJSON(PNGMetadataParams ¶ms, + const char *details_json); } // end namespace MinimalLib } // end namespace RDKit diff --git a/Code/MinimalLib/cffi_test.c b/Code/MinimalLib/cffi_test.c index 44933354d..48f84e8db 100644 --- a/Code/MinimalLib/cffi_test.c +++ b/Code/MinimalLib/cffi_test.c @@ -3222,6 +3222,682 @@ M END\n\ free(mpkl); } +size_t _read_png_blob(FILE *hnd, char **png_blob) { + assert(hnd && png_blob); + static const size_t PNG_BUF_LEN = 65536; + size_t read_count; + size_t png_blob_sz; + *png_blob = NULL; + png_blob_sz = 0; + read_count = PNG_BUF_LEN; + while (read_count == PNG_BUF_LEN) { + *png_blob = (char *)realloc(*png_blob, PNG_BUF_LEN); + assert(*png_blob); + read_count = fread(&(*png_blob)[png_blob_sz], 1, PNG_BUF_LEN, hnd); + png_blob_sz += read_count; + } + return png_blob_sz; +} + +size_t _write_png_blob(FILE *hnd, char *png_blob, size_t png_blob_sz) { + assert(hnd && png_blob); + return fwrite(png_blob, 1, png_blob_sz, hnd); +} + +void test_png_metadata() { + printf("--------------------------\n"); + printf(" test_png_metadata\n"); +#ifdef WIN32 +#define char_type_len wcslen + typedef wchar_t char_type; + const char_type *PNG_COLCHICINE_NO_METADATA = + L"\\Code\\GraphMol\\FileParsers\\test_data\\colchicine.no_metadata.png"; + const char_type *PNG_COLCHICINE_WITH_METADATA = + L"\\Code\\GraphMol\\FileParsers\\test_data\\colchicine.png"; + const char_type *PNG_PENICILLIN_METADATA = L"penicillin_metadata.png"; +#else +#define char_type_len strlen + typedef char char_type; + const char_type *PNG_COLCHICINE_NO_METADATA = + "/Code/GraphMol/FileParsers/test_data/colchicine.no_metadata.png"; + const char_type *PNG_COLCHICINE_WITH_METADATA = + "/Code/GraphMol/FileParsers/test_data/colchicine.png"; + const char_type *PNG_PENICILLIN_METADATA = "penicillin_metadata.png"; +#endif + const char *BENZYLPENICILLIN_SMI = + "CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)Cc3ccccc3)C(=O)O)C"; + const char *BENZYLPENICILLIN_CAN_SMI = + "CC1(C)S[C@@H]2[C@H](NC(=O)Cc3ccccc3)C(=O)N2[C@H]1C(=O)O"; + const char *AMOXICILLIN_SMI = + "O=C(O)[C@@H]2N3C(=O)[C@@H](NC(=O)[C@@H](c1ccc(O)cc1)N)[C@H]3SC2(C)C"; + const char *AMOXICILLIN_CAN_SMI = + "CC1(C)S[C@@H]2[C@H](NC(=O)[C@H](N)c3ccc(O)cc3)C(=O)N2[C@H]1C(=O)O"; + const char *PNG_PENICILLIN_AMOXICILLIN_METADATA = + "penicillin_amoxicillin_metadata.png"; + const char *PNG_COLCHICINE_AMOXICILLIN_METADATA = + "colchicine_amoxicillin_metadata.png"; + const char *COLCHICINE = "COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1[C@@H](NC(C)=O)CC2"; + const char *COLCHICINE_UNUSUAL_WEDGING = "\n\ + RDKit 2D\n\ +\n\ + 29 31 0 0 0 0 0 0 0 0999 V2000\n\ + 6.4602 1.0300 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\ + 5.3062 1.9883 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n\ + 3.8993 1.4680 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\ + 2.7453 2.4262 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\ + 1.3384 1.9059 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\ + 1.0856 0.4273 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\ + 2.2396 -0.5309 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\ + 1.9868 -2.0094 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n\ + 3.1408 -2.9677 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\ + 3.6465 -0.0106 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\ + 4.8005 -0.9688 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n\ + 4.5477 -2.4474 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\ + -0.2280 -0.2968 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\ + 0.1857 -1.7387 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\ + -0.6836 -2.9611 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\ + -2.1813 -3.0436 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\ + -2.7569 -4.4288 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n\ + -4.2442 -4.6230 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\ + -3.1797 -1.9240 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\ + -4.6215 -2.3378 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n\ + -2.9268 -0.4455 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\ + -1.6132 0.2787 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\ + -2.0269 1.7205 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\ + -3.5055 1.9733 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n\ + -4.0258 3.3802 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\ + -5.5043 3.6330 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\ + -3.0675 4.5342 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n\ + -1.1576 2.9429 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\ + 0.3401 3.0254 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n\ + 1 2 1 0\n\ + 2 3 1 0\n\ + 3 4 2 0\n\ + 4 5 1 0\n\ + 5 6 2 0\n\ + 6 7 1 0\n\ + 7 8 1 0\n\ + 8 9 1 0\n\ + 7 10 2 0\n\ + 10 11 1 0\n\ + 11 12 1 0\n\ + 6 13 1 0\n\ + 13 14 2 0\n\ + 14 15 1 0\n\ + 15 16 2 0\n\ + 16 17 1 0\n\ + 17 18 1 0\n\ + 16 19 1 0\n\ + 19 20 2 0\n\ + 19 21 1 0\n\ + 21 22 2 0\n\ + 23 22 1 1\n\ + 23 24 1 0\n\ + 24 25 1 0\n\ + 25 26 1 0\n\ + 25 27 2 0\n\ + 23 28 1 0\n\ + 28 29 1 0\n\ + 10 3 1 0\n\ + 22 13 1 0\n\ + 29 5 1 0\n\ +M END\n"; + char *penicillin_pkl; + size_t penicillin_pkl_sz; + char *amoxicillin_pkl; + size_t amoxicillin_pkl_sz; + char *colchicine_pkl; + size_t colchicine_pkl_sz; + const char *PROPERTY_NAME = "property"; + const char *PROPERTY_VALUE = "value"; + char_type *rdbase; + char_type *png_no_metadata_abspath; + char_type *png_with_metadata_abspath; + char *png_no_metadata_blob; + char *png_no_metadata_blob2; + char *png_with_metadata_blob; + char *prop; + char *smi; + char *molblock; + FILE *hnd_no_metadata; + FILE *hnd_with_metadata; + size_t rdbase_len; + size_t png_no_metadata_len; + size_t png_with_metadata_len; + size_t png_penicillin_metadata_len; + size_t png_penicillin_amoxicillin_metadata_len; + size_t png_colchicine_amoxicillin_metadata_len; + size_t png_no_metadata_abspath_maxlen; + size_t png_with_metadata_abspath_maxlen; + size_t png_no_metadata_blob_sz; + size_t png_no_metadata_blob2_sz; + size_t png_with_metadata_blob_sz; + size_t _read_png_blob(FILE * hnd, char **png_blob); + size_t _write_png_blob(FILE * hnd, char *png_blob, size_t png_blob_sz); + short res; + void *null_ptr = NULL; +#ifdef WIN32 + rdbase = _wgetenv(L"RDBASE"); +#else + rdbase = getenv("RDBASE"); +#endif + assert(rdbase); + rdbase_len = char_type_len(rdbase); + png_no_metadata_len = char_type_len(PNG_COLCHICINE_NO_METADATA); + png_with_metadata_len = char_type_len(PNG_COLCHICINE_WITH_METADATA); + png_penicillin_metadata_len = strlen(PNG_PENICILLIN_METADATA); + png_penicillin_amoxicillin_metadata_len = + strlen(PNG_PENICILLIN_AMOXICILLIN_METADATA); + png_colchicine_amoxicillin_metadata_len = + strlen(PNG_COLCHICINE_AMOXICILLIN_METADATA); + char *mpkl; + size_t mpkl_sz; + char *mpkl_san; + size_t mpkl_san_sz; + char **mpkl_array; + char *fp; + size_t *mpkl_sz_array; + size_t i; + png_no_metadata_abspath_maxlen = rdbase_len + png_no_metadata_len + 1; + png_no_metadata_abspath = + (char_type *)malloc(png_no_metadata_abspath_maxlen * sizeof(char_type)); + assert(png_no_metadata_abspath); + png_with_metadata_abspath_maxlen = rdbase_len + png_with_metadata_len + 1; + png_with_metadata_abspath = + (char_type *)malloc(png_with_metadata_abspath_maxlen * sizeof(char_type)); + assert(png_with_metadata_abspath); +#ifdef WIN32 + _snwprintf(png_no_metadata_abspath, png_no_metadata_abspath_maxlen, + L"%s%s", rdbase, PNG_COLCHICINE_NO_METADATA); + _snwprintf(png_with_metadata_abspath, png_with_metadata_abspath_maxlen, + L"%s%s", rdbase, PNG_COLCHICINE_WITH_METADATA); + hnd_no_metadata = _wfopen(png_no_metadata_abspath, L"rb"); + hnd_with_metadata = _wfopen(png_with_metadata_abspath, L"rb"); +#else + snprintf(png_no_metadata_abspath, png_no_metadata_abspath_maxlen, "%s%s", + rdbase, PNG_COLCHICINE_NO_METADATA); + snprintf(png_with_metadata_abspath, png_with_metadata_abspath_maxlen, "%s%s", + rdbase, PNG_COLCHICINE_WITH_METADATA); + hnd_no_metadata = fopen(png_no_metadata_abspath, "rb"); + hnd_with_metadata = fopen(png_with_metadata_abspath, "rb"); +#endif + assert(hnd_no_metadata); + png_no_metadata_blob_sz = + _read_png_blob(hnd_no_metadata, &png_no_metadata_blob); + fclose(hnd_no_metadata); + free(png_no_metadata_abspath); + assert(png_no_metadata_blob_sz); + png_no_metadata_blob2 = (char *)malloc(png_no_metadata_blob_sz); + assert(png_no_metadata_blob2); + memcpy(png_no_metadata_blob2, png_no_metadata_blob, png_no_metadata_blob_sz); + png_no_metadata_blob2_sz = png_no_metadata_blob_sz; + assert(hnd_with_metadata); + png_with_metadata_blob_sz = + _read_png_blob(hnd_with_metadata, &png_with_metadata_blob); + fclose(hnd_with_metadata); + free(png_with_metadata_abspath); + assert(png_with_metadata_blob_sz); + assert(!get_mol_from_png_blob(NULL, png_no_metadata_blob_sz, &mpkl, &mpkl_sz, + NULL)); + assert( + !get_mol_from_png_blob(png_no_metadata_blob, 0, &mpkl, &mpkl_sz, NULL)); + assert(!get_mol_from_png_blob(png_no_metadata_blob, png_no_metadata_blob_sz, + NULL, &mpkl_sz, NULL)); + assert(!get_mol_from_png_blob(png_no_metadata_blob, png_no_metadata_blob_sz, + &mpkl, NULL, NULL)); + assert(!get_mol_from_png_blob(png_no_metadata_blob, png_no_metadata_blob_sz, + &mpkl, &mpkl_sz, NULL)); + assert(!get_mol_from_png_blob(png_no_metadata_blob, png_no_metadata_blob_sz, + &mpkl, &mpkl_sz, "")); + assert(!get_mols_from_png_blob(NULL, png_no_metadata_blob_sz, &mpkl_array, + &mpkl_sz_array, NULL)); + assert(!get_mols_from_png_blob(png_no_metadata_blob, 0, &mpkl_array, + &mpkl_sz_array, NULL)); + assert(!get_mols_from_png_blob(png_no_metadata_blob, png_no_metadata_blob_sz, + NULL, &mpkl_sz_array, NULL)); + assert(!get_mols_from_png_blob(png_no_metadata_blob, png_no_metadata_blob_sz, + &mpkl_array, NULL, NULL)); + assert(!get_mols_from_png_blob(png_no_metadata_blob, png_no_metadata_blob_sz, + &mpkl_array, &mpkl_sz_array, NULL)); + assert(!get_mols_from_png_blob(png_no_metadata_blob, png_no_metadata_blob_sz, + &mpkl_array, &mpkl_sz_array, "")); + penicillin_pkl = NULL; + penicillin_pkl_sz = 0; + penicillin_pkl = get_mol(BENZYLPENICILLIN_SMI, &penicillin_pkl_sz, ""); + assert(penicillin_pkl && penicillin_pkl_sz); + assert(set_2d_coords(&penicillin_pkl, &penicillin_pkl_sz)); + assert(penicillin_pkl && penicillin_pkl_sz); + assert(!add_mol_to_png_blob(NULL, &png_no_metadata_blob_sz, penicillin_pkl, + penicillin_pkl_sz, NULL)); + assert(!add_mol_to_png_blob((char **)&null_ptr, &png_no_metadata_blob_sz, + penicillin_pkl, penicillin_pkl_sz, NULL)); + assert(!add_mol_to_png_blob(&png_no_metadata_blob, NULL, penicillin_pkl, + penicillin_pkl_sz, NULL)); + assert(!add_mol_to_png_blob(&png_no_metadata_blob, &png_no_metadata_blob_sz, + NULL, penicillin_pkl_sz, NULL)); + assert(!add_mol_to_png_blob(&png_no_metadata_blob, &png_no_metadata_blob_sz, + penicillin_pkl, 0, NULL)); + assert(add_mol_to_png_blob(&png_no_metadata_blob, &png_no_metadata_blob_sz, + penicillin_pkl, penicillin_pkl_sz, "{\"includePkl\":false,\"includeSmiles\":true,\"includeMol\":true}")); + hnd_with_metadata = fopen(PNG_PENICILLIN_METADATA, "wb"); + assert(hnd_with_metadata); + assert(_write_png_blob(hnd_with_metadata, png_no_metadata_blob, + png_no_metadata_blob_sz) == png_no_metadata_blob_sz); + fclose(hnd_with_metadata); + mpkl = NULL; + mpkl_sz = 0; + assert(get_mol_from_png_blob(png_no_metadata_blob, png_no_metadata_blob_sz, + &mpkl, &mpkl_sz, "")); + assert(mpkl && mpkl_sz); + assert(has_coords(mpkl, mpkl_sz) == 2); + free(mpkl); + mpkl = NULL; + mpkl_sz = 0; + assert(get_mol_from_png_blob(png_with_metadata_blob, + png_with_metadata_blob_sz, &mpkl, &mpkl_sz, "")); + assert(mpkl && mpkl_sz); + assert(has_coords(mpkl, mpkl_sz) == 2); + free(mpkl); + mpkl_array = NULL; + mpkl_sz_array = NULL; + assert(!get_mols_from_png_blob(png_with_metadata_blob, + png_with_metadata_blob_sz, &mpkl_array, + &mpkl_sz_array, "")); + assert(!get_mols_from_png_blob(png_with_metadata_blob, + png_with_metadata_blob_sz, &mpkl_array, + &mpkl_sz_array, "{\"includePkl\":true,\"includeSmiles\":true}")); + assert(get_mols_from_png_blob(png_with_metadata_blob, + png_with_metadata_blob_sz, &mpkl_array, + &mpkl_sz_array, "{\"includeSmiles\":true}") == 1); + i = 0; + while (mpkl_array[i]) { + ++i; + } + assert(i == 1); + i = 0; + while (mpkl_sz_array[i]) { + ++i; + } + assert(i == 1); + free_mol_array(&mpkl_array, &mpkl_sz_array); + assert(!mpkl_array && !mpkl_sz_array); + free_mol_array(&mpkl_array, &mpkl_sz_array); + assert(!mpkl_array && !mpkl_sz_array); + mpkl = NULL; + mpkl_sz = 0; + assert(!get_mol_from_png_blob( + png_no_metadata_blob, png_no_metadata_blob_sz, &mpkl, &mpkl_sz, + "{\"includePkl\":true,\"includeSmiles\":false,\"includeMol\":false}")); + assert(get_mol_from_png_blob( + png_no_metadata_blob, png_no_metadata_blob_sz, &mpkl, &mpkl_sz, + "{\"includePkl\":false,\"includeSmiles\":false,\"includeMol\":true,\"sanitize\":false,\"removeHs\":false,\"assignStereo\":false,\"fastFindRings\":false}")); + assert(mpkl && mpkl_sz); + assert(has_coords(mpkl, mpkl_sz) == 2); + smi = get_smiles(mpkl, mpkl_sz, ""); + free(mpkl); + assert(smi); + assert(strcmp(smi, BENZYLPENICILLIN_CAN_SMI)); + mpkl_san = get_mol(smi, &mpkl_san_sz, ""); + free(smi); + assert(mpkl_san && mpkl_san_sz); + smi = get_smiles(mpkl_san, mpkl_san_sz, ""); + assert(!strcmp(smi, BENZYLPENICILLIN_CAN_SMI)); + free(smi); + free(mpkl_san); + mpkl = NULL; + mpkl_sz = 0; + memcpy(png_no_metadata_blob, png_no_metadata_blob2, png_no_metadata_blob2_sz); + png_no_metadata_blob_sz = png_no_metadata_blob2_sz; + assert(!get_mol_from_png_blob(png_no_metadata_blob, png_no_metadata_blob_sz, + &mpkl, &mpkl_sz, "")); + free(penicillin_pkl); + penicillin_pkl = NULL; + penicillin_pkl_sz = 0; + penicillin_pkl = get_mol(BENZYLPENICILLIN_SMI, &penicillin_pkl_sz, ""); + assert(penicillin_pkl && penicillin_pkl_sz); + amoxicillin_pkl = NULL; + amoxicillin_pkl_sz = 0; + amoxicillin_pkl = get_mol(AMOXICILLIN_SMI, &amoxicillin_pkl_sz, ""); + assert(amoxicillin_pkl && amoxicillin_pkl_sz); + assert(set_2d_coords(&amoxicillin_pkl, &amoxicillin_pkl_sz)); + assert(amoxicillin_pkl && amoxicillin_pkl_sz); + assert(add_mol_to_png_blob(&png_no_metadata_blob, &png_no_metadata_blob_sz, + penicillin_pkl, penicillin_pkl_sz, + "{\"includePkl\":false,\"includeMol\":true,\"CX_ALL_BUT_COORDS\":true}")); + assert(add_mol_to_png_blob(&png_no_metadata_blob, &png_no_metadata_blob_sz, + amoxicillin_pkl, amoxicillin_pkl_sz, + "{\"includePkl\":false,\"includeMol\":true,\"CX_ALL_BUT_COORDS\":true}")); + hnd_with_metadata = fopen(PNG_PENICILLIN_AMOXICILLIN_METADATA, "wb"); + assert(hnd_with_metadata); + assert(_write_png_blob(hnd_with_metadata, png_no_metadata_blob, + png_no_metadata_blob_sz) == png_no_metadata_blob_sz); + fclose(hnd_with_metadata); + assert(get_mol_from_png_blob( + png_no_metadata_blob, png_no_metadata_blob_sz, &mpkl, &mpkl_sz, + "{\"sanitize\":false,\"removeHs\":false,\"assignStereo\":false,\"fastFindRings\":false}")); + assert(mpkl && mpkl_sz); + assert(!has_coords(mpkl, mpkl_sz)); + smi = get_smiles(mpkl, mpkl_sz, ""); + assert(smi); + assert(!strcmp(smi, BENZYLPENICILLIN_CAN_SMI)); + free(smi); + free(mpkl); + mpkl = NULL; + mpkl_sz = 0; + assert(!get_mol_from_png_blob( + png_no_metadata_blob, png_no_metadata_blob_sz, &mpkl, &mpkl_sz, + "{\"includePkl\":false,\"includeSmiles\":false,\"includeMol\":false}")); + assert(get_mol_from_png_blob( + png_no_metadata_blob, png_no_metadata_blob_sz, &mpkl, &mpkl_sz, + "{\"includePkl\":true,\"includeSmiles\":false,\"includeMol\":true}")); + assert(mpkl && mpkl_sz); + assert(has_coords(mpkl, mpkl_sz) == 2); + smi = get_smiles(mpkl, mpkl_sz, ""); + assert(smi); + assert(!strcmp(smi, BENZYLPENICILLIN_CAN_SMI)); + free(smi); + free(mpkl); + mpkl_array = NULL; + mpkl_sz_array = NULL; + assert(!get_mols_from_png_blob(png_no_metadata_blob, png_no_metadata_blob_sz, + &mpkl_array, &mpkl_sz_array, "")); + assert(get_mols_from_png_blob(png_no_metadata_blob, png_no_metadata_blob_sz, + &mpkl_array, &mpkl_sz_array, "{\"includeSmiles\":true}") == 2); + i = 0; + while (mpkl_array[i]) { + ++i; + } + assert(i == 2); + i = 0; + while (mpkl_sz_array[i]) { + ++i; + } + assert(i == 2); + assert(!has_coords(mpkl_array[0], mpkl_sz_array[0])); + smi = get_smiles(mpkl_array[0], mpkl_sz_array[0], ""); + assert(smi); + assert(!strcmp(smi, BENZYLPENICILLIN_CAN_SMI)); + free(smi); + fp = get_morgan_fp(mpkl_array[0], mpkl_sz_array[0], ""); + assert(fp); + free(fp); + assert(!has_coords(mpkl_array[1], mpkl_sz_array[1])); + smi = get_smiles(mpkl_array[1], mpkl_sz_array[1], ""); + assert(smi); + assert(!strcmp(smi, AMOXICILLIN_CAN_SMI)); + free(smi); + fp = get_morgan_fp(mpkl_array[1], mpkl_sz_array[1], ""); + assert(fp); + free(fp); + free_mol_array(&mpkl_array, &mpkl_sz_array); + assert(!mpkl_array && !mpkl_sz_array); + free_mol_array(&mpkl_array, &mpkl_sz_array); + assert(!mpkl_array && !mpkl_sz_array); + assert( + get_mols_from_png_blob( + png_no_metadata_blob, png_no_metadata_blob_sz, &mpkl_array, + &mpkl_sz_array, + "{\"includePkl\":false,\"includeMol\":true,\"sanitize\":false,\"removeHs\":false,\"assignStereo\":false,\"fastFindRings\":false}") == + 2); + i = 0; + while (mpkl_array[i]) { + ++i; + } + assert(i == 2); + i = 0; + while (mpkl_sz_array[i]) { + ++i; + } + assert(i == 2); + assert(has_coords(mpkl_array[0], mpkl_sz_array[0]) == 2); + smi = get_smiles(mpkl_array[0], mpkl_sz_array[0], ""); + assert(smi); + assert(strcmp(smi, BENZYLPENICILLIN_CAN_SMI)); + mpkl_san = get_mol(smi, &mpkl_san_sz, ""); + free(smi); + assert(mpkl_san && mpkl_san_sz); + smi = get_smiles(mpkl_san, mpkl_san_sz, ""); + assert(!strcmp(smi, BENZYLPENICILLIN_CAN_SMI)); + free(smi); + free(mpkl_san); + assert(has_coords(mpkl_array[1], mpkl_sz_array[1]) == 2); + smi = get_smiles(mpkl_array[1], mpkl_sz_array[1], ""); + assert(smi); + assert(strcmp(smi, AMOXICILLIN_CAN_SMI)); + mpkl_san = get_mol(smi, &mpkl_san_sz, ""); + free(smi); + assert(mpkl_san && mpkl_san_sz); + smi = get_smiles(mpkl_san, mpkl_san_sz, ""); + assert(!strcmp(smi, AMOXICILLIN_CAN_SMI)); + free(smi); + free(mpkl_san); + free_mol_array(&mpkl_array, &mpkl_sz_array); + assert(!mpkl_array && !mpkl_sz_array); + assert(!add_mol_to_png_blob(NULL, &png_with_metadata_blob_sz, amoxicillin_pkl, + amoxicillin_pkl_sz, NULL)); + assert(!add_mol_to_png_blob((char **)&null_ptr, &png_with_metadata_blob_sz, + amoxicillin_pkl, amoxicillin_pkl_sz, NULL)); + assert(!add_mol_to_png_blob(&png_with_metadata_blob, NULL, amoxicillin_pkl, + amoxicillin_pkl_sz, NULL)); + assert(!add_mol_to_png_blob(&png_with_metadata_blob, + &png_with_metadata_blob_sz, NULL, + amoxicillin_pkl_sz, NULL)); + assert(!add_mol_to_png_blob(&png_with_metadata_blob, + &png_with_metadata_blob_sz, amoxicillin_pkl, 0, + NULL)); + assert(add_mol_to_png_blob( + &png_with_metadata_blob, &png_with_metadata_blob_sz, amoxicillin_pkl, + amoxicillin_pkl_sz, "{\"includeMol\":true,\"CX_ALL_BUT_COORDS\":true}")); + hnd_with_metadata = fopen(PNG_COLCHICINE_AMOXICILLIN_METADATA, "wb"); + assert(hnd_with_metadata); + assert(_write_png_blob(hnd_with_metadata, png_with_metadata_blob, + png_with_metadata_blob_sz) == + png_with_metadata_blob_sz); + fclose(hnd_with_metadata); + assert(get_mols_from_png_blob( + png_with_metadata_blob, png_with_metadata_blob_sz, &mpkl_array, + &mpkl_sz_array, + "{\"includePkl\":false,\"includeMol\":true}") == 1); + i = 0; + while (mpkl_array[i]) { + ++i; + } + assert(i == 1); + i = 0; + while (mpkl_sz_array[i]) { + ++i; + } + assert(i == 1); + assert(has_coords(mpkl_array[0], mpkl_sz_array[0]) == 2); + free_mol_array(&mpkl_array, &mpkl_sz_array); + assert(!mpkl_array && !mpkl_sz_array); + assert(get_mols_from_png_blob( + png_with_metadata_blob, png_with_metadata_blob_sz, &mpkl_array, + &mpkl_sz_array, "{\"includeSmiles\":true}") == 2); + i = 0; + while (mpkl_array[i]) { + ++i; + } + assert(i == 2); + i = 0; + while (mpkl_sz_array[i]) { + ++i; + } + assert(i == 2); + assert(has_coords(mpkl_array[0], mpkl_sz_array[0]) == 2); + assert(!has_coords(mpkl_array[1], mpkl_sz_array[1])); + free_mol_array(&mpkl_array, &mpkl_sz_array); + assert(!mpkl_array && !mpkl_sz_array); + free(penicillin_pkl); + free(amoxicillin_pkl); + free(png_with_metadata_blob); + + colchicine_pkl = get_mol(COLCHICINE, &colchicine_pkl_sz, ""); + assert(colchicine_pkl && colchicine_pkl_sz); + memcpy(png_no_metadata_blob, png_no_metadata_blob2, png_no_metadata_blob2_sz); + assert(add_mol_to_png_blob( + &png_no_metadata_blob, &png_no_metadata_blob_sz, colchicine_pkl, + colchicine_pkl_sz, NULL)); + assert(get_mol_from_png_blob( + png_no_metadata_blob, png_no_metadata_blob_sz, &mpkl, &mpkl_sz, NULL)); + assert(mpkl && mpkl_sz); + assert(!has_coords(mpkl, mpkl_sz)); + free(mpkl); + // use SMILES + memcpy(png_no_metadata_blob, png_no_metadata_blob2, png_no_metadata_blob2_sz); + assert(add_mol_to_png_blob( + &png_no_metadata_blob, &png_no_metadata_blob_sz, colchicine_pkl, + colchicine_pkl_sz, "{\"includePkl\":false}")); + assert(get_mol_from_png_blob( + png_no_metadata_blob, png_no_metadata_blob_sz, &mpkl, &mpkl_sz, NULL)); + assert(mpkl && mpkl_sz); + assert(!has_coords(mpkl, mpkl_sz)); + free(mpkl); + // use MOL + memcpy(png_no_metadata_blob, png_no_metadata_blob2, png_no_metadata_blob2_sz); + assert(add_mol_to_png_blob( + &png_no_metadata_blob, &png_no_metadata_blob_sz, colchicine_pkl, + colchicine_pkl_sz, "{\"includePkl\":false,\"includeSmiles\":false,\"includeMol\":true}")); + assert(get_mol_from_png_blob( + png_no_metadata_blob, png_no_metadata_blob_sz, &mpkl, &mpkl_sz, NULL)); + assert(mpkl && mpkl_sz); + assert(has_coords(mpkl, mpkl_sz) == 2); + free(mpkl); + // use PKL + set_2d_coords(&colchicine_pkl, &colchicine_pkl_sz); + assert(has_coords(colchicine_pkl, colchicine_pkl_sz) == 2); + set_prop(&colchicine_pkl, &colchicine_pkl_sz, PROPERTY_NAME, PROPERTY_VALUE, 0); + assert(add_mol_to_png_blob( + &png_no_metadata_blob, &png_no_metadata_blob_sz, colchicine_pkl, + colchicine_pkl_sz, "{\"includePkl\":true,\"includeSmiles\":false,\"includeMol\":false,\"propertyFlags\":{\"NoProps\":true}}")); + assert(get_mol_from_png_blob( + png_no_metadata_blob, png_no_metadata_blob_sz, &mpkl, &mpkl_sz, NULL)); + assert(mpkl && mpkl_sz); + assert(has_coords(mpkl, mpkl_sz) == 2); + assert(!has_prop(mpkl, mpkl_sz, PROPERTY_NAME)); + free(mpkl); + memcpy(png_no_metadata_blob, png_no_metadata_blob2, png_no_metadata_blob2_sz); + assert(add_mol_to_png_blob( + &png_no_metadata_blob, &png_no_metadata_blob_sz, colchicine_pkl, + colchicine_pkl_sz, "{\"includePkl\":true,\"includeSmiles\":false,\"includeMol\":false,\"propertyFlags\":{\"AllProps\":true}}")); + assert(get_mol_from_png_blob( + png_no_metadata_blob, png_no_metadata_blob_sz, &mpkl, &mpkl_sz, NULL)); + assert(mpkl && mpkl_sz); + assert(has_coords(mpkl, mpkl_sz) == 2); + assert(has_prop(mpkl, mpkl_sz, PROPERTY_NAME)); + prop = get_prop(mpkl, mpkl_sz, PROPERTY_NAME); + assert(prop); + assert(!strcmp(prop, PROPERTY_VALUE)); + free(prop); + free(mpkl); + memcpy(png_no_metadata_blob, png_no_metadata_blob2, png_no_metadata_blob2_sz); + assert(add_mol_to_png_blob( + &png_no_metadata_blob, &png_no_metadata_blob_sz, colchicine_pkl, + colchicine_pkl_sz, "{\"includePkl\":false,\"includeSmiles\":true,\"includeMol\":false,\"propertyFlags\":{\"NoProps\":true},\"CX_ALL_BUT_COORDS\":true}")); + assert(get_mol_from_png_blob( + png_no_metadata_blob, png_no_metadata_blob_sz, &mpkl, &mpkl_sz, NULL)); + assert(mpkl && mpkl_sz); + assert(!has_coords(mpkl, mpkl_sz)); + assert(!has_prop(mpkl, mpkl_sz, PROPERTY_NAME)); + free(mpkl); + free(colchicine_pkl); + // use original wedging + colchicine_pkl = get_mol(COLCHICINE_UNUSUAL_WEDGING, &colchicine_pkl_sz, ""); + assert(colchicine_pkl); + assert(has_coords(colchicine_pkl, colchicine_pkl_sz) == 2); + smi = get_cxsmiles(colchicine_pkl, colchicine_pkl_sz, ""); + assert(smi); + assert(strstr(smi, "wU:22.24|")); + free(smi); + smi = get_cxsmiles(colchicine_pkl, colchicine_pkl_sz, "{\"CX_ALL\":true,\"restoreBondDirOption\":\"RestoreBondDirOptionTrue\"}"); + assert(smi); + assert(strstr(smi, "wU:22.23|")); + free(smi); + memcpy(png_no_metadata_blob, png_no_metadata_blob2, png_no_metadata_blob2_sz); + assert(add_mol_to_png_blob( + &png_no_metadata_blob, &png_no_metadata_blob_sz, colchicine_pkl, + colchicine_pkl_sz, "{\"includePkl\":true,\"includeSmiles\":true,\"includeMol\":true,\"propertyFlags\":{\"AtomProps\":true,\"BondProps\":true}}")); + assert(get_mol_from_png_blob( + png_no_metadata_blob, png_no_metadata_blob_sz, &mpkl, &mpkl_sz, NULL)); + assert(mpkl && mpkl_sz); + assert(has_coords(mpkl, mpkl_sz) == 2); + smi = get_cxsmiles(mpkl, mpkl_sz, "{\"CX_ALL\":true,\"restoreBondDirOption\":\"RestoreBondDirOptionTrue\"}"); + assert(smi); + assert(strstr(smi, "wU:22.23|")); + free(smi); + smi = get_cxsmiles(mpkl, mpkl_sz, "{\"CX_ALL\":true,\"restoreBondDirOption\":\"RestoreBondDirOptionClear\"}"); + assert(smi); + assert(strstr(smi, "wU:22.24|")); + free(smi); + molblock = get_molblock(mpkl, mpkl_sz, ""); + assert(molblock); + assert(strstr(molblock, " 23 24 1 1")); + free(molblock); + molblock = get_molblock(mpkl, mpkl_sz, "{\"useMolBlockWedging\":true}"); + assert(molblock); + assert(strstr(molblock, " 23 22 1 1")); + free(molblock); + free(mpkl); + // the mol is restored from CXSMILES, so it will not retain + // original molblock wedging, as it was not stored in the CXSMILES string + memcpy(png_no_metadata_blob, png_no_metadata_blob2, png_no_metadata_blob2_sz); + assert(add_mol_to_png_blob( + &png_no_metadata_blob, &png_no_metadata_blob_sz, colchicine_pkl, + colchicine_pkl_sz, "{\"includePkl\":false,\"includeSmiles\":true,\"includeMol\":true,\"propertyFlags\":{\"AtomProps\":true,\"BondProps\":true}}")); + assert(get_mol_from_png_blob( + png_no_metadata_blob, png_no_metadata_blob_sz, &mpkl, &mpkl_sz, NULL)); + assert(mpkl && mpkl_sz); + assert(has_coords(mpkl, mpkl_sz) == 2); + smi = get_cxsmiles(mpkl, mpkl_sz, "{\"CX_ALL\":true,\"restoreBondDirOption\":\"RestoreBondDirOptionTrue\"}"); + assert(smi); + assert(strstr(smi, "wU:22.24|")); + free(smi); + smi = get_cxsmiles(mpkl, mpkl_sz, "{\"CX_ALL\":true,\"restoreBondDirOption\":\"RestoreBondDirOptionClear\"}"); + assert(smi); + assert(strstr(smi, "wU:22.24|")); + free(smi); + molblock = get_molblock(mpkl, mpkl_sz, ""); + assert(molblock); + assert(strstr(molblock, " 23 24 1 1")); + free(molblock); + molblock = get_molblock(mpkl, mpkl_sz, "{\"useMolBlockWedging\":true}"); + assert(molblock); + assert(strstr(molblock, " 23 24 1 1")); + free(molblock); + free(mpkl); + // the mol is restored from CXSMILES, but restoreBondDirOption was set + // to 'RestoreBondDirOptionTrue', so it will not retain + // original molblock wedging, as it was stored in the CXSMILES string + memcpy(png_no_metadata_blob, png_no_metadata_blob2, png_no_metadata_blob2_sz); + assert(add_mol_to_png_blob( + &png_no_metadata_blob, &png_no_metadata_blob_sz, colchicine_pkl, + colchicine_pkl_sz, "{\"includePkl\":false,\"includeSmiles\":true,\"includeMol\":true,\"propertyFlags\":{\"AtomProps\":true,\"BondProps\":true},\"restoreBondDirOption\":\"RestoreBondDirOptionTrue\"}")); + assert(get_mol_from_png_blob( + png_no_metadata_blob, png_no_metadata_blob_sz, &mpkl, &mpkl_sz, NULL)); + assert(mpkl && mpkl_sz); + assert(has_coords(mpkl, mpkl_sz) == 2); + smi = get_cxsmiles(mpkl, mpkl_sz, "{\"CX_ALL\":true,\"restoreBondDirOption\":\"RestoreBondDirOptionTrue\"}"); + assert(smi); + assert(strstr(smi, "wU:22.23|")); + free(smi); + smi = get_cxsmiles(mpkl, mpkl_sz, "{\"CX_ALL\":true,\"restoreBondDirOption\":\"RestoreBondDirOptionClear\"}"); + assert(smi); + assert(strstr(smi, "wU:22.24|")); + free(smi); + molblock = get_molblock(mpkl, mpkl_sz, ""); + assert(molblock); + assert(strstr(molblock, " 23 22 1 1")); + free(molblock); + molblock = get_molblock(mpkl, mpkl_sz, "{\"useMolBlockWedging\":true}"); + assert(molblock); + assert(strstr(molblock, " 23 22 1 1")); + free(molblock); + free(mpkl); + free(colchicine_pkl); + free(png_no_metadata_blob); + free(png_no_metadata_blob2); +} + int main() { enable_logging(); char *vers = version(); @@ -3261,5 +3937,6 @@ int main() { test_custom_palette(); test_props(); test_get_mol_remove_hs(); + test_png_metadata(); return 0; } diff --git a/Code/MinimalLib/cffiwrapper.cpp b/Code/MinimalLib/cffiwrapper.cpp index ca2cdc286..b08df1787 100644 --- a/Code/MinimalLib/cffiwrapper.cpp +++ b/Code/MinimalLib/cffiwrapper.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -978,6 +979,114 @@ extern "C" void keep_props(char **mol_pkl, size_t *mol_pkl_sz, mol_to_pkl(mol, mol_pkl, mol_pkl_sz, propFlags); } +extern "C" short add_mol_to_png_blob(char **png_blob, size_t *png_blob_sz, + const char *pkl, size_t pkl_sz, + const char *details_json) { + if (!png_blob || !*png_blob || !png_blob_sz || !*png_blob_sz || !pkl || + !pkl_sz) { + return 0; + } + PNGMetadataParams params; + MinimalLib::updatePNGMetadataParamsFromJSON(params, details_json); + std::string pngString(*png_blob, *png_blob_sz); + try { + auto mol = mol_from_pkl(pkl, pkl_sz); + auto updatedPngString = addMolToPNGString(mol, pngString, params); + auto updated_png_blob = + static_cast(malloc(updatedPngString.size())); + if (!updated_png_blob) { + return 0; + } + memcpy(updated_png_blob, updatedPngString.data(), updatedPngString.size()); + free(*png_blob); + *png_blob = updated_png_blob; + *png_blob_sz = updatedPngString.size(); + } catch (...) { + return 0; + } + return 1; +} + +extern "C" short get_mol_from_png_blob(const char *png_blob, size_t png_blob_sz, + char **pkl, size_t *pkl_sz, + const char *details_json) { + if (!png_blob || !png_blob_sz || !pkl || !pkl_sz) { + return 0; + } + std::string pngString(png_blob, png_blob_sz); + auto mols = MinimalLib::get_mols_from_png_blob_internal(pngString, true, + details_json); + if (mols.empty()) { + return 0; + } + char *pkl_local = nullptr; + size_t pkl_sz_local = 0; + mol_to_pkl(*mols.front(), &pkl_local, &pkl_sz_local); + if (pkl_local && pkl_sz_local) { + *pkl = pkl_local; + *pkl_sz = pkl_sz_local; + return 1; + } + return 0; +} + +extern "C" short get_mols_from_png_blob(const char *png_blob, + size_t png_blob_sz, char ***pkl_array, + size_t **pkl_sz_array, + const char *details_json) { + if (!png_blob || !png_blob_sz || !pkl_array || !pkl_sz_array) { + return 0; + } + std::string pngString(png_blob, png_blob_sz); + auto mols = MinimalLib::get_mols_from_png_blob_internal(pngString, false, + details_json); + if (mols.empty()) { + return 0; + } + char **pkl_array_local = nullptr; + size_t *pkl_sz_array_local = nullptr; + size_t mol_array_len = mols.size() + 1; + pkl_array_local = (char **)malloc(mol_array_len * sizeof(char *)); + if (pkl_array_local) { + memset(pkl_array_local, 0, mol_array_len * sizeof(char *)); + pkl_sz_array_local = (size_t *)malloc(mol_array_len * sizeof(size_t)); + } + if (pkl_sz_array_local) { + memset(pkl_sz_array_local, 0, mol_array_len * sizeof(size_t)); + short i = 0; + for (const auto &mol : mols) { + mol_to_pkl(*mol, &pkl_array_local[i], &pkl_sz_array_local[i]); + if (pkl_array_local[i] && pkl_sz_array_local[i]) { + ++i; + } else { + break; + } + } + if (i == static_cast(mols.size())) { + *pkl_array = pkl_array_local; + *pkl_sz_array = pkl_sz_array_local; + return i; + } + } + free_mol_array(&pkl_array_local, &pkl_sz_array_local); + return 0; +} + +extern "C" void free_mol_array(char ***pkl_array, size_t **pkl_sz_array) { + if (pkl_array && *pkl_array) { + for (size_t i = 0; (*pkl_array)[i]; ++i) { + free((*pkl_array)[i]); + (*pkl_array)[i] = NULL; + } + free(*pkl_array); + *pkl_array = NULL; + } + if (pkl_sz_array && *pkl_sz_array) { + free(*pkl_sz_array); + *pkl_sz_array = NULL; + } +} + #if (defined(__GNUC__) || defined(__GNUG__)) #pragma GCC diagnostic pop #endif diff --git a/Code/MinimalLib/cffiwrapper.h b/Code/MinimalLib/cffiwrapper.h index f7c6da6ef..5e5222ebb 100644 --- a/Code/MinimalLib/cffiwrapper.h +++ b/Code/MinimalLib/cffiwrapper.h @@ -161,6 +161,23 @@ RDKIT_RDKITCFFI_EXPORT short disable_logger(const char *log_name); RDKIT_RDKITCFFI_EXPORT short use_legacy_stereo_perception(short value); RDKIT_RDKITCFFI_EXPORT short allow_non_tetrahedral_chirality(short value); +// PNG metadata +RDKIT_RDKITCFFI_EXPORT short add_mol_to_png_blob(char **png_blob, + size_t *png_blob_sz, + const char *mpkl, + size_t mpkl_size, + const char *details_json); +RDKIT_RDKITCFFI_EXPORT short get_mol_from_png_blob(const char *png_blob, + size_t png_blob_sz, + char **mpkl, size_t *mpkl_sz, + const char *details_json); +RDKIT_RDKITCFFI_EXPORT short get_mols_from_png_blob(const char *png_blob, + size_t png_blob_sz, + char ***mpkl_array, + size_t **mpkl_sz_array, + const char *details_json); +RDKIT_RDKITCFFI_EXPORT void free_mol_array(char ***pkl_array, + size_t **pkl_sz_array); // logging RDKIT_RDKITCFFI_EXPORT void *set_log_tee(const char *log_name); RDKIT_RDKITCFFI_EXPORT void *set_log_capture(const char *log_name); diff --git a/Code/MinimalLib/common.h b/Code/MinimalLib/common.h index 4118a3412..17ac1d3a6 100644 --- a/Code/MinimalLib/common.h +++ b/Code/MinimalLib/common.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,7 @@ #include #include #include +#include #include #include #include "common_defs.h" @@ -353,6 +355,7 @@ std::string parse_int_array(const rj::Document &doc, std::vector &intVec, if (!it->value.IsArray()) { return "JSON contains '" + keyName + "' field, but it is not an array"; } + intVec.clear(); for (const auto &val : it->value.GetArray()) { if (!val.IsInt()) { return valueName + " should be integers"; @@ -363,6 +366,26 @@ std::string parse_int_array(const rj::Document &doc, std::vector &intVec, return ""; } +std::string parse_double_array(const rj::Document &doc, + std::vector &doubleVec, + const std::string &keyName, + const std::string &valueName) { + const auto it = doc.FindMember(keyName.c_str()); + if (it != doc.MemberEnd()) { + if (!it->value.IsArray()) { + return "JSON contains '" + keyName + "' field, but it is not an array"; + } + doubleVec.clear(); + for (const auto &val : it->value.GetArray()) { + if (!val.IsNumber()) { + return valueName + " should be floats"; + } + doubleVec.push_back(val.GetDouble()); + } + } + return ""; +} + std::string parse_rgba_array(const rj::Value &val, DrawColour &color, const std::string &keyName) { if (!val.IsArray() || val.Size() < 3 || val.Size() > 4) { @@ -1361,6 +1384,87 @@ class LogHandle { std::stringstream d_stream; }; +std::vector get_mols_from_png_blob_internal( + const std::string &pngString, bool singleMol = false, + const char *details = nullptr) { + std::vector res; + if (pngString.empty()) { + return res; + } + PNGMetadataParams params; + params.includePkl = singleMol; + params.includeSmiles = singleMol; + params.includeMol = singleMol; + updatePNGMetadataParamsFromJSON(params, details); + std::string tagToUse; + unsigned int numTagsFound = 0; + if (params.includePkl) { + ++numTagsFound; + tagToUse = PNGData::pklTag; + } + if (params.includeSmiles) { + ++numTagsFound; + tagToUse = PNGData::smilesTag; + } + if (params.includeMol) { + ++numTagsFound; + tagToUse = PNGData::molTag; + } + if (numTagsFound == 0 || (!singleMol && numTagsFound > 1)) { + return res; + } + auto metadata = PNGStringToMetadata(pngString); + for (const auto &[key, value] : metadata) { + if (!singleMol && key.rfind(tagToUse, 0) == std::string::npos) { + continue; + } + std::unique_ptr mol; + if (params.includePkl && key.rfind(PNGData::pklTag, 0) == 0) { + try { + mol.reset(new RWMol(value)); + } catch (...) { + } + } else if ((params.includeSmiles && + key.rfind(PNGData::smilesTag, 0) == 0) || + (params.includeMol && key.rfind(PNGData::molTag, 0) == 0)) { + mol.reset(MinimalLib::mol_from_input(value, details)); + } + if (mol) { + res.emplace_back(mol.release()); + if (singleMol) { + break; + } + } + } + return res; +} + +std::string combine_mols_internal(const ROMol &mol1, const ROMol &mol2, + std::unique_ptr &combinedMol, + const char *details_json = nullptr) { + std::vector offset(3, 0.0); + combinedMol = nullptr; + if (details_json) { + rj::Document doc; + doc.Parse(details_json); + if (!doc.IsObject()) { + return "Invalid JSON"; + } + std::string problems; + problems = parse_double_array(doc, offset, "offset", "offset coordinates"); + if (!problems.empty()) { + return problems; + } + } + try { + combinedMol.reset(combineMols( + mol1, mol2, RDGeom::Point3D(offset[0], offset[1], offset[2]))); + } catch (...) { + return "Failed to combine molecules"; + } + return ""; +} + } // namespace MinimalLib } // namespace RDKit #undef LPT_OPT_GET diff --git a/Code/MinimalLib/docker/Dockerfile_1_deps b/Code/MinimalLib/docker/Dockerfile_1_deps index b71665f9b..3b155393d 100644 --- a/Code/MinimalLib/docker/Dockerfile_1_deps +++ b/Code/MinimalLib/docker/Dockerfile_1_deps @@ -20,6 +20,7 @@ ARG BOOST_PATCH_VERSION="0" ARG BOOST_DOT_VERSION ARG BOOST_UNDERSCORE_VERSION ARG FREETYPE_VERSION="2.13.3" +ARG ZLIB_VERSION="1.3.1" ARG http_proxy ARG https_proxy @@ -32,6 +33,7 @@ ARG BOOST_MAJOR_VERSION ARG BOOST_MINOR_VERSION ARG BOOST_PATCH_VERSION ARG FREETYPE_VERSION +ARG ZLIB_VERSION ARG http_proxy ARG https_proxy @@ -92,5 +94,16 @@ RUN emcmake cmake -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_FLAGS="${EXCEPTION_HANDLING}" -DCMAKE_EXE_LINKER_FLAGS="${EXCEPTION_HANDLING}" \ -DCMAKE_INSTALL_PREFIX=/opt/freetype .. RUN make -j2 && make -j2 install + +WORKDIR /src +RUN wget -q https://zlib.net/zlib-${ZLIB_VERSION}.tar.gz && \ + tar xzf zlib-${ZLIB_VERSION}.tar.gz +WORKDIR /src/zlib-${ZLIB_VERSION} +RUN mkdir build +WORKDIR /src/zlib-${ZLIB_VERSION}/build +RUN emcmake cmake -DCMAKE_BUILD_TYPE=Release -DZLIB_BUILD_EXAMPLES=OFF \ + -DCMAKE_C_FLAGS="${EXCEPTION_HANDLING}" -DCMAKE_EXE_LINKER_FLAGS="${EXCEPTION_HANDLING}" \ + -DCMAKE_INSTALL_PREFIX=/opt/zlib .. +RUN make && make install RUN echo "export BOOST_DOT_VERSION=${BOOST_DOT_VERSION}" >> ~/.bashrc RUN echo "export BOOST_UNDERSCORE_VERSION=${BOOST_UNDERSCORE_VERSION}" >> ~/.bashrc diff --git a/Code/MinimalLib/docker/Dockerfile_3_rdkit_build b/Code/MinimalLib/docker/Dockerfile_3_rdkit_build index 3b9ca6ddd..772e7fc42 100644 --- a/Code/MinimalLib/docker/Dockerfile_3_rdkit_build +++ b/Code/MinimalLib/docker/Dockerfile_3_rdkit_build @@ -73,6 +73,8 @@ RUN emcmake cmake -DRDK_BUILD_FREETYPE_SUPPORT=ON -DRDK_BUILD_MINIMAL_LIB=ON \ -DRDK_BUILD_SLN_SUPPORT=OFF -DRDK_USE_BOOST_IOSTREAMS=OFF \ -DFREETYPE_INCLUDE_DIRS=/opt/freetype/include/freetype2 \ -DFREETYPE_LIBRARY=/opt/freetype/lib/libfreetype.a \ + -DZLIB_INCLUDE_DIR=/opt/zlib/include \ + -DZLIB_LIBRARY=/opt/zlib/lib/libz.a \ -DCMAKE_CXX_FLAGS="${EXCEPTION_HANDLING} -O3 -DNDEBUG" \ -DCMAKE_C_FLAGS="${EXCEPTION_HANDLING} -O3 -DNDEBUG -DCOMPILE_ANSI_ONLY" \ -DCMAKE_EXE_LINKER_FLAGS="${EXCEPTION_HANDLING} -s STACK_OVERFLOW_CHECK=1 -s USE_PTHREADS=0 -s ALLOW_MEMORY_GROWTH=1 -s MAXIMUM_MEMORY=4GB -s MODULARIZE=1 -s EXPORT_NAME=\"'initRDKitModule'\"" .. diff --git a/Code/MinimalLib/jswrapper.cpp b/Code/MinimalLib/jswrapper.cpp index 5fbfe0d27..777a6d21e 100644 --- a/Code/MinimalLib/jswrapper.cpp +++ b/Code/MinimalLib/jswrapper.cpp @@ -400,6 +400,51 @@ emscripten::val get_avalon_fp_as_uint8array(const JSMolBase &self) { } #endif +emscripten::val add_to_png_blob_helper(const JSMolBase &self, + const std::string &pngString, + const std::string &details) { + auto updatedPngString = self.add_to_png_blob(pngString, details); + return binary_string_to_uint8array(updatedPngString); +} + +emscripten::val add_to_png_blob_helper(const JSMolBase &self, + const std::string &pngString) { + return add_to_png_blob_helper(self, pngString, ""); +} + +JSMolBase *get_mol_from_png_blob_helper(const emscripten::val &pngAsUInt8Array, + const std::string &details) { + auto pngString = pngAsUInt8Array.as(); + return get_mol_from_png_blob(pngString, details); +} + +JSMolBase *get_mol_from_png_blob_no_details_helper( + const emscripten::val &pngAsUInt8Array) { + return get_mol_from_png_blob_helper(pngAsUInt8Array, ""); +} + +JSMolList *get_mols_from_png_blob_helper(const emscripten::val &pngAsUInt8Array, + const std::string &details) { + return get_mols_from_png_blob(pngAsUInt8Array.as(), details); +} + +JSMolList *get_mols_from_png_blob_no_details_helper( + const emscripten::val &pngAsUInt8Array) { + return get_mols_from_png_blob_helper(pngAsUInt8Array, ""); +} + +emscripten::val get_coords_helper(const JSMolBase &self) { + static const char *PUSH = "push"; + auto res = emscripten::val::array(); + for (const auto &pt : self.get_coords()) { + auto xyz = emscripten::val::array(); + xyz.call(PUSH, pt.x); + xyz.call(PUSH, pt.y); + xyz.call(PUSH, pt.z); + res.call(PUSH, xyz); + } + return res; +} #ifdef RDK_BUILD_MINIMAL_LIB_MMPA emscripten::val get_mmpa_frags_helper(const JSMolBase &self, unsigned int minCuts, @@ -530,6 +575,12 @@ EMSCRIPTEN_BINDINGS(RDKit_minimal) { &JSMolBase::get_svg)) .function("get_svg_with_highlights", &JSMolBase::get_svg_with_highlights) + .function("combine_with", select_overload( + &JSMolBase::combine_with)) + .function( + "combine_with", + select_overload( + &JSMolBase::combine_with)) #ifdef __EMSCRIPTEN__ .function("draw_to_canvas_with_offset", &draw_to_canvas_with_offset) .function("draw_to_canvas", &draw_to_canvas) @@ -580,6 +631,15 @@ EMSCRIPTEN_BINDINGS(RDKit_minimal) { .function("get_frags", select_overload(get_frags_helper), allow_raw_pointers()) + .function( + "add_to_png_blob", + select_overload( + add_to_png_blob_helper)) + .function("add_to_png_blob", select_overload( + add_to_png_blob_helper)) + .function("get_coords", get_coords_helper) #ifdef RDK_BUILD_AVALON_SUPPORT .function( "get_avalon_fp_as_uint8array", @@ -848,4 +908,14 @@ EMSCRIPTEN_BINDINGS(RDKit_minimal) { function("molzip", &molzip_no_details_helper, allow_raw_pointers()); #endif #endif +#ifdef __EMSCRIPTEN__ + function("get_mol_from_png_blob", &get_mol_from_png_blob_helper, + allow_raw_pointers()); + function("get_mol_from_png_blob", &get_mol_from_png_blob_no_details_helper, + allow_raw_pointers()); + function("get_mols_from_png_blob", &get_mols_from_png_blob_helper, + allow_raw_pointers()); + function("get_mols_from_png_blob", &get_mols_from_png_blob_no_details_helper, + allow_raw_pointers()); +#endif } diff --git a/Code/MinimalLib/minilib.cpp b/Code/MinimalLib/minilib.cpp index ac65f3ceb..f94bf2c63 100644 --- a/Code/MinimalLib/minilib.cpp +++ b/Code/MinimalLib/minilib.cpp @@ -531,6 +531,12 @@ int JSMolBase::has_coords() const { return (get().getConformer().is3D() ? 3 : 2); } +const RDGeom::POINT3D_VECT &JSMolBase::get_coords() const { + static const RDGeom::POINT3D_VECT empty; + return (get().getNumConformers() ? get().getConformer().getPositions() + : empty); +} + double JSMolBase::normalize_depiction(int canonicalize, double scaleFactor) { if (!get().getNumConformers()) { return -1.; @@ -606,6 +612,29 @@ std::pair JSMolBase::get_mmpa_frags( } #endif +std::string JSMolBase::add_to_png_blob(const std::string &pngString, + const std::string &details) const { + PNGMetadataParams params; + std::string res; + try { + MinimalLib::updatePNGMetadataParamsFromJSON(params, details.c_str()); + res = addMolToPNGString(get(), pngString, params); + } catch (...) { + } + return res; +} + +std::string JSMolBase::combine_with(const JSMolBase &other, + const std::string &details) { + std::unique_ptr combinedMol; + auto res = MinimalLib::combine_mols_internal(get(), other.get(), combinedMol, + details.c_str()); + if (res.empty() && combinedMol) { + reset(static_cast(combinedMol.release())); + } + return ""; +} + #ifdef RDK_BUILD_MINIMAL_LIB_RXN std::string JSReaction::get_svg(int w, int h) const { assert(d_rxn); @@ -956,6 +985,26 @@ bool disable_logging(const std::string &logName) { void disable_logging() { RDKit::MinimalLib::LogHandle::disableLogging(); } +JSMolBase *get_mol_from_png_blob(const std::string &pngString, + const std::string &details) { + auto mols = MinimalLib::get_mols_from_png_blob_internal(pngString, true, + details.c_str()); + if (mols.empty()) { + return nullptr; + } + return new JSMol(new RWMol(*mols.front())); +} + +JSMolList *get_mols_from_png_blob(const std::string &pngString, + const std::string &details) { + auto mols = MinimalLib::get_mols_from_png_blob_internal(pngString, false, + details.c_str()); + if (mols.empty()) { + return nullptr; + } + return new JSMolList(mols); +} + #ifdef RDK_BUILD_MINIMAL_LIB_RGROUPDECOMP JSRGroupDecomposition::JSRGroupDecomposition(const JSMolBase &core, const std::string &details_json) { diff --git a/Code/MinimalLib/minilib.h b/Code/MinimalLib/minilib.h index b9fc7a2ab..9f12b1361 100644 --- a/Code/MinimalLib/minilib.h +++ b/Code/MinimalLib/minilib.h @@ -30,6 +30,7 @@ class JSMolBase { virtual ~JSMolBase(){}; virtual const RDKit::RWMol &get() const = 0; virtual RDKit::RWMol &get() = 0; + virtual void reset(RDKit::RWMol *other) = 0; std::string get_smiles() const; std::string get_smiles(const std::string &details) const; std::string get_cxsmiles() const; @@ -117,6 +118,7 @@ class JSMolBase { "instead")]] bool is_valid() const; int has_coords() const; + const RDGeom::POINT3D_VECT &get_coords() const; std::string get_stereo_tags(); std::string get_aromatic_form() const; @@ -163,6 +165,12 @@ class JSMolBase { unsigned int get_num_atoms(bool heavyOnly) const; unsigned int get_num_atoms() const { return get_num_atoms(false); }; unsigned int get_num_bonds() const; + std::string add_to_png_blob(const std::string &pngString, + const std::string &details) const; + std::string combine_with(const JSMolBase &other, const std::string &details); + std::string combine_with(const JSMolBase &other) { + return combine_with(other, "{}"); + } #ifdef RDK_BUILD_MINIMAL_LIB_MMPA std::pair get_mmpa_frags( unsigned int minCuts, unsigned int maxCuts, @@ -193,6 +201,10 @@ class JSMol : public JSMolBase { checkNotNull(); return *d_mol.get(); } + void reset(RDKit::RWMol *other) { + PRECONDITION(other, "other cannot be null"); + d_mol.reset(other); + } private: void checkNotNull() const { CHECK_INVARIANT(d_mol, "d_mol cannot be null"); } @@ -218,6 +230,14 @@ class JSMolShared : public JSMolBase { } const RDKit::ROMOL_SPTR &get_sptr() const { return d_mol; } RDKit::ROMOL_SPTR &get_sptr() { return d_mol; } + void reset(RDKit::RWMol *other) { + PRECONDITION(other, "other cannot be null"); + d_mol.reset(other); + } + void reset_sptr(const RDKit::ROMOL_SPTR &other) { + PRECONDITION(other, "other cannot be null"); + d_mol = other; + } private: void checkNotNull() const { CHECK_INVARIANT(d_mol, "d_mol cannot be null"); } @@ -351,6 +371,10 @@ bool enable_logging(const std::string &logName); bool disable_logging(const std::string &logName); JSLog *set_log_tee(const std::string &log_name); JSLog *set_log_capture(const std::string &log_name); +JSMolBase *get_mol_from_png_blob(const std::string &pngString, + const std::string &details); +JSMolList *get_mols_from_png_blob(const std::string &pngString, + const std::string &details); #ifdef RDK_BUILD_MINIMAL_LIB_MCS std::string get_mcs_as_json(const JSMolList &mols, const std::string &details_json); diff --git a/Code/MinimalLib/tests/tests.js b/Code/MinimalLib/tests/tests.js index 2c2b8d5a5..a27c51551 100644 --- a/Code/MinimalLib/tests/tests.js +++ b/Code/MinimalLib/tests/tests.js @@ -2809,6 +2809,21 @@ function captureStdoutStderr(stdoutCallback, optStderrCallback) { }; } +function captureStdoutStderr(stdoutCallback, optStderrCallback) { + if (!stdoutCallback) { + return null; + } + const stderrCallback = optStderrCallback || stdoutCallback; + const origStdoutWrite = process.stdout.write; + const origStderrWrite = process.stderr.write; + process.stdout.write = (chunk) => stdoutCallback(chunk); + process.stderr.write = (chunk) => stderrCallback(chunk); + return () => { + process.stdout.write = origStdoutWrite; + process.stderr.write = origStderrWrite; + }; +} + function test_capture_logs() { const PENTAVALENT_CARBON = 'CC(C)(C)(C)C'; const PENTAVALENT_CARBON_VALENCE_ERROR = 'Explicit valence for atom # 1 C, 5, is greater than permitted'; @@ -3756,6 +3771,398 @@ M END mol.delete(); } +function test_png_metadata() { + const PNG_COLCHICINE_NO_METADATA = "/../../GraphMol/FileParsers/test_data/colchicine.no_metadata.png"; + const PNG_COLCHICINE_WITH_METADATA = "/../../GraphMol/FileParsers/test_data/colchicine.png"; + const PNG_PENICILLIN_METADATA = "penicillin_metadata.png"; + const PNG_PENICILLIN_AMOXICILLIN_METADATA = "penicillin_amoxicillin_metadata.png"; + const PNG_COLCHICINE_AMOXICILLIN_METADATA = "colchicine_amoxicillin_metadata.png"; + const BENZYLPENICILLIN_SMI = "CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)Cc3ccccc3)C(=O)O)C"; + const BENZYLPENICILLIN_CAN_SMI = "CC1(C)S[C@@H]2[C@H](NC(=O)Cc3ccccc3)C(=O)N2[C@H]1C(=O)O"; + const AMOXICILLIN_SMI = "O=C(O)[C@@H]2N3C(=O)[C@@H](NC(=O)[C@@H](c1ccc(O)cc1)N)[C@H]3SC2(C)C"; + const AMOXICILLIN_CAN_SMI = "CC1(C)S[C@@H]2[C@H](NC(=O)[C@H](N)c3ccc(O)cc3)C(=O)N2[C@H]1C(=O)O"; + let mol; + let mols; + let molSan; + const png_no_metadata_buf = fs.readFileSync(__dirname + PNG_COLCHICINE_NO_METADATA); + const png_no_metadata_blob = new Uint8Array(png_no_metadata_buf.length); + const png_no_metadata_blob2 = new Uint8Array(png_no_metadata_buf.length); + png_no_metadata_buf.copy(png_no_metadata_blob); + png_no_metadata_buf.copy(png_no_metadata_blob2); + const png_with_metadata_buf = fs.readFileSync(__dirname + PNG_COLCHICINE_WITH_METADATA); + const png_with_metadata_blob = new Uint8Array(png_with_metadata_buf.length); + png_with_metadata_buf.copy(png_with_metadata_blob); + assert(!RDKitModule.get_mol_from_png_blob(png_no_metadata_blob)); + assert(!RDKitModule.get_mols_from_png_blob(png_no_metadata_blob)); + let penicillin = RDKitModule.get_mol(BENZYLPENICILLIN_SMI); + assert(penicillin); + assert(penicillin.set_new_coords()); + let png_penicillin_metadata_blob = penicillin.add_to_png_blob(png_no_metadata_blob, + "{\"includePkl\":false,\"includeSmiles\":true,\"includeMol\":true}"); + penicillin.delete(); + assert(png_penicillin_metadata_blob); + fs.writeFileSync(PNG_PENICILLIN_METADATA, png_penicillin_metadata_blob); + penicillin = RDKitModule.get_mol_from_png_blob(png_penicillin_metadata_blob); + assert(penicillin); + assert.equal(penicillin.has_coords(), 2); + penicillin.delete(); + RDKitModule.enable_logging() + mol = RDKitModule.get_mol_from_png_blob(png_with_metadata_blob); + assert(mol); + assert.equal(mol.has_coords(), 2); + mol.delete(); + mols = RDKitModule.get_mols_from_png_blob(png_with_metadata_blob); + assert(!mols); + mols = RDKitModule.get_mols_from_png_blob(png_with_metadata_blob, + JSON.stringify({ includePkl: true, includeSmiles: true })); + assert(!mols); + mols = RDKitModule.get_mols_from_png_blob( + png_with_metadata_blob, JSON.stringify({ includeSmiles: true })); + assert(mols); + assert.equal(mols.size(), 1); + mol = mols.next(); + assert.equal(mol.has_coords(), 2); + mol.delete(); + mols.delete(); + assert(!RDKitModule.get_mol_from_png_blob(png_penicillin_metadata_blob, + "{\"includePkl\":true,\"includeSmiles\":false,\"includeMol\":false}")); + penicillin = RDKitModule.get_mol_from_png_blob(png_penicillin_metadata_blob, + "{\"includePkl\":false,\"includeSmiles\":false,\"includeMol\":true,\"sanitize\":false,\"removeHs\":false,\"assignStereo\":false,\"fastFindRings\":false}"); + assert(penicillin); + assert.equal(penicillin.has_coords(), 2); + smi = penicillin.get_smiles(); + assert.notEqual(smi, BENZYLPENICILLIN_CAN_SMI); + molSan = RDKitModule.get_mol(smi) + assert.equal(molSan.get_smiles(), BENZYLPENICILLIN_CAN_SMI); + molSan.delete(); + penicillin.delete(); + assert(!RDKitModule.get_mol_from_png_blob(png_no_metadata_blob2)); + penicillin = RDKitModule.get_mol(BENZYLPENICILLIN_SMI); + assert(penicillin); + let amoxicillin = RDKitModule.get_mol(AMOXICILLIN_SMI); + assert(amoxicillin); + assert(amoxicillin.set_new_coords()); + png_penicillin_metadata_blob = penicillin.add_to_png_blob(png_no_metadata_blob2, + "{\"includePkl\":false,\"includeMol\":true,\"CX_ALL_BUT_COORDS\":true}"); + assert(png_penicillin_metadata_blob); + let png_penicillin_amoxicillin_metadata_blob = amoxicillin.add_to_png_blob(png_penicillin_metadata_blob, + "{\"includePkl\":false,\"includeMol\":true,\"CX_ALL_BUT_COORDS\":true}"); + assert(png_penicillin_amoxicillin_metadata_blob); + fs.writeFileSync(PNG_PENICILLIN_AMOXICILLIN_METADATA, png_penicillin_amoxicillin_metadata_blob); + mol = RDKitModule.get_mol_from_png_blob(png_penicillin_amoxicillin_metadata_blob, + "{\"sanitize\":false,\"removeHs\":false,\"assignStereo\":false,\"fastFindRings\":false}"); + assert(mol); + assert(!mol.has_coords()); + smi = mol.get_smiles(); + assert.equal(smi, BENZYLPENICILLIN_CAN_SMI); + mol.delete(); + assert(!RDKitModule.get_mol_from_png_blob(png_penicillin_amoxicillin_metadata_blob, + "{\"includePkl\":false,\"includeSmiles\":false,\"includeMol\":false}")); + mol = RDKitModule.get_mol_from_png_blob(png_penicillin_amoxicillin_metadata_blob, + "{\"includePkl\":true,\"includeSmiles\":false,\"includeMol\":true}"); + assert(mol); + assert.equal(mol.has_coords(), 2); + smi = mol.get_smiles(); + assert.equal(smi, BENZYLPENICILLIN_CAN_SMI); + mol.delete(); + mols = RDKitModule.get_mols_from_png_blob(png_penicillin_amoxicillin_metadata_blob); + assert(!mols); + mols = RDKitModule.get_mols_from_png_blob( + png_penicillin_amoxicillin_metadata_blob, JSON.stringify({ includeSmiles: true })); + assert(mols); + assert.equal(mols.size(), 2); + mol = mols.at(0); + assert(!mol.has_coords()); + smi = mol.get_smiles(); + assert.equal(smi, BENZYLPENICILLIN_CAN_SMI); + assert(mol.get_morgan_fp()); + mol.delete(); + mol = mols.at(1); + assert(!mol.has_coords()); + smi = mol.get_smiles(); + assert.equal(smi, AMOXICILLIN_CAN_SMI); + assert(mol.get_morgan_fp()); + mol.delete(); + mols.delete(); + mols = RDKitModule.get_mols_from_png_blob(png_penicillin_amoxicillin_metadata_blob, JSON.stringify({ + includePkl: false, includeMol: true, sanitize: false, removeHs: false, assignStereo: false, fastFindRings: false + })); + assert(mols); + assert.equal(mols.size(), 2); + mol = mols.at(0); + assert(mol.has_coords()); + smi = mol.get_smiles(); + assert.notEqual(smi, BENZYLPENICILLIN_CAN_SMI); + molSan = RDKitModule.get_mol(smi) + assert.equal(molSan.get_smiles(), BENZYLPENICILLIN_CAN_SMI); + molSan.delete(); + mol.delete(); + mol = mols.at(1); + assert(mol.has_coords()); + smi = mol.get_smiles(); + assert.notEqual(smi, AMOXICILLIN_CAN_SMI); + molSan = RDKitModule.get_mol(smi) + assert.equal(molSan.get_smiles(), AMOXICILLIN_CAN_SMI); + molSan.delete(); + mol.delete(); + mols.delete(); + let png_colchicine_amoxicillin_metadata_blob = amoxicillin.add_to_png_blob( + png_with_metadata_blob, JSON.stringify({ includeMol: true, CX_ALL_BUT_COORDS:true })); + assert(png_colchicine_amoxicillin_metadata_blob); + fs.writeFileSync(PNG_COLCHICINE_AMOXICILLIN_METADATA, png_colchicine_amoxicillin_metadata_blob); + mols = RDKitModule.get_mols_from_png_blob(png_colchicine_amoxicillin_metadata_blob, + "{\"includePkl\":false,\"includeMol\":true}"); + assert.equal(mols.size(), 1); + mol = mols.at(0); + assert.equal(mol.has_coords(), 2); + mol.delete(); + mols.delete(); + mols = RDKitModule.get_mols_from_png_blob(png_colchicine_amoxicillin_metadata_blob, + "{\"includeSmiles\":true}"); + assert.equal(mols.size(), 2); + assert.equal(mols.at(0).has_coords(), 2); + assert(!mols.at(1).has_coords()); + mols.delete(); + penicillin.delete(); + amoxicillin.delete(); + + const colchicine = RDKitModule.get_mol('COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1[C@@H](NC(C)=O)CC2'); + assert(colchicine); + png_no_metadata_buf.copy(png_no_metadata_blob); + let png_colchicine_metadata_blob; + png_colchicine_metadata_blob = colchicine.add_to_png_blob(png_no_metadata_blob); + mol = RDKitModule.get_mol_from_png_blob(png_colchicine_metadata_blob); + assert(mol); + assert.equal(mol.get_num_atoms(), 29); + assert(!mol.has_coords()); + mol.delete(); + // use SMILES + png_colchicine_metadata_blob = colchicine.add_to_png_blob(png_no_metadata_blob, + JSON.stringify({ includePkl: false })); + mol = RDKitModule.get_mol_from_png_blob(png_colchicine_metadata_blob); + assert(mol); + assert.equal(mol.get_num_atoms(), 29); + assert(!mol.has_coords()); + mol.delete(); + // use MOL + png_colchicine_metadata_blob = colchicine.add_to_png_blob(png_no_metadata_blob, + JSON.stringify({ includePkl: false, includeSmiles: false, includeMol: true })); + mol = RDKitModule.get_mol_from_png_blob(png_colchicine_metadata_blob); + assert(mol); + assert.equal(mol.get_num_atoms(), 29); + assert.equal(mol.has_coords(), 2); + mol.delete(); + // use PKL + colchicine.set_new_coords(); + assert.equal(colchicine.has_coords(), 2); + const PROPERTY_NAME = 'property'; + const PROPERTY_VALUE = 'value'; + colchicine.set_prop(PROPERTY_NAME, PROPERTY_VALUE); + png_colchicine_metadata_blob = colchicine.add_to_png_blob(png_no_metadata_blob, JSON.stringify({ + includePkl: true, includeSmiles: false, includeMol: false, propertyFlags: { NoProps: true } + })); + mol = RDKitModule.get_mol_from_png_blob(png_colchicine_metadata_blob); + assert(mol); + assert.equal(mol.get_num_atoms(), 29); + assert.equal(mol.has_coords(), 2); + assert(!mol.has_prop(PROPERTY_NAME)); + mol.delete(); + png_colchicine_metadata_blob = colchicine.add_to_png_blob(png_no_metadata_blob, JSON.stringify({ + includePkl: true, includeSmiles: false, includeMol: false, propertyFlags: { AllProps: true } + })); + mol = RDKitModule.get_mol_from_png_blob(png_colchicine_metadata_blob); + assert(mol); + assert.equal(mol.get_num_atoms(), 29); + assert.equal(mol.has_coords(), 2); + assert(mol.has_prop(PROPERTY_NAME)); + assert(mol.get_prop(PROPERTY_NAME) == PROPERTY_VALUE); + mol.delete(); + png_colchicine_metadata_blob = colchicine.add_to_png_blob(png_no_metadata_blob, JSON.stringify({ + includePkl: false, includeSmiles: true, includeMol: false, + propertyFlags: { NoProps: true }, CX_ALL_BUT_COORDS: true + })); + mol = RDKitModule.get_mol_from_png_blob(png_colchicine_metadata_blob); + assert(mol); + assert.equal(mol.get_num_atoms(), 29); + assert(!mol.has_coords()); + assert(!mol.has_prop(PROPERTY_NAME)); + mol.delete(); + // use original wedging + const colchicineUnusualWedging = RDKitModule.get_mol(` + RDKit 2D + + 29 31 0 0 0 0 0 0 0 0999 V2000 + 6.4602 1.0300 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.3062 1.9883 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 3.8993 1.4680 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.7453 2.4262 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.3384 1.9059 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.0856 0.4273 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.2396 -0.5309 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.9868 -2.0094 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 3.1408 -2.9677 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6465 -0.0106 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.8005 -0.9688 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5477 -2.4474 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.2280 -0.2968 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.1857 -1.7387 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.6836 -2.9611 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.1813 -3.0436 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.7569 -4.4288 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + -4.2442 -4.6230 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.1797 -1.9240 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -4.6215 -2.3378 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + -2.9268 -0.4455 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.6132 0.2787 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.0269 1.7205 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.5055 1.9733 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + -4.0258 3.3802 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -5.5043 3.6330 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.0675 4.5342 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + -1.1576 2.9429 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.3401 3.0254 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 + 2 3 1 0 + 3 4 2 0 + 4 5 1 0 + 5 6 2 0 + 6 7 1 0 + 7 8 1 0 + 8 9 1 0 + 7 10 2 0 + 10 11 1 0 + 11 12 1 0 + 6 13 1 0 + 13 14 2 0 + 14 15 1 0 + 15 16 2 0 + 16 17 1 0 + 17 18 1 0 + 16 19 1 0 + 19 20 2 0 + 19 21 1 0 + 21 22 2 0 + 23 22 1 1 + 23 24 1 0 + 24 25 1 0 + 25 26 1 0 + 25 27 2 0 + 23 28 1 0 + 28 29 1 0 + 10 3 1 0 + 22 13 1 0 + 29 5 1 0 +M END +`); + assert(colchicineUnusualWedging); + assert.equal(colchicineUnusualWedging.has_coords(), 2); + assert(colchicineUnusualWedging.get_cxsmiles().includes('wU:22.24|')); + assert(colchicineUnusualWedging.get_cxsmiles(JSON.stringify({ CX_ALL: true, restoreBondDirOption: 'RestoreBondDirOptionTrue' })).includes('wU:22.23|')); + png_colchicine_metadata_blob = colchicineUnusualWedging.add_to_png_blob(png_no_metadata_blob, JSON.stringify({ + includePkl: true, includeSmiles: true, includeMol: true, + propertyFlags: { AtomProps: true, BondProps: true } + })); + mol = RDKitModule.get_mol_from_png_blob(png_colchicine_metadata_blob); + // the mol is restored from pickle, so it will retain + // original molblock wedging + assert(mol); + assert.equal(mol.get_num_atoms(), 29); + assert.equal(mol.has_coords(), 2); + assert(mol.get_cxsmiles(JSON.stringify({ CX_ALL: true, restoreBondDirOption: 'RestoreBondDirOptionTrue' })).includes('wU:22.23|')); + assert(mol.get_cxsmiles(JSON.stringify({ CX_ALL: true, restoreBondDirOption: 'RestoreBondDirOptionClear' })).includes('wU:22.24|')); + assert(mol.get_molblock().includes(' 23 24 1 1')); + assert(mol.get_molblock(JSON.stringify({ useMolBlockWedging: true })).includes(' 23 22 1 1')); + mol.delete(); + // the mol is restored from CXSMILES, so it will not retain + // original molblock wedging, as it was not stored in the CXSMILES string + png_colchicine_metadata_blob = colchicineUnusualWedging.add_to_png_blob(png_no_metadata_blob, JSON.stringify({ + includePkl: false, includeSmiles: true, includeMol: true, + propertyFlags: { AtomProps: true, BondProps: true } + })); + mol = RDKitModule.get_mol_from_png_blob(png_colchicine_metadata_blob); + assert(mol); + assert.equal(mol.get_num_atoms(), 29); + assert.equal(mol.has_coords(), 2); + assert(mol.get_cxsmiles(JSON.stringify({ CX_ALL: true, restoreBondDirOption: 'RestoreBondDirOptionTrue' })).includes('wU:22.24|')); + assert(mol.get_cxsmiles(JSON.stringify({ CX_ALL: true, restoreBondDirOption: 'RestoreBondDirOptionClear' })).includes('wU:22.24|')); + assert(mol.get_molblock().includes(' 23 24 1 1')); + assert(mol.get_molblock(JSON.stringify({ useMolBlockWedging: true })).includes(' 23 24 1 1')); + mol.delete(); + // the mol is restored from CXSMILES, but restoreBondDirOption was set + // to 'RestoreBondDirOptionTrue', so it will not retain + // original molblock wedging, as it was stored in the CXSMILES string + png_colchicine_metadata_blob = colchicineUnusualWedging.add_to_png_blob(png_no_metadata_blob, JSON.stringify({ + includePkl: false, includeSmiles: true, includeMol: true, + propertyFlags: { AtomProps: true, BondProps: true }, + restoreBondDirOption: 'RestoreBondDirOptionTrue' + })); + mol = RDKitModule.get_mol_from_png_blob(png_colchicine_metadata_blob); + assert(mol); + assert.equal(mol.get_num_atoms(), 29); + assert.equal(mol.has_coords(), 2); + assert(mol.get_cxsmiles(JSON.stringify({ CX_ALL: true, restoreBondDirOption: 'RestoreBondDirOptionTrue' })).includes('wU:22.23|')); + assert(mol.get_cxsmiles(JSON.stringify({ CX_ALL: true, restoreBondDirOption: 'RestoreBondDirOptionClear' })).includes('wU:22.24|')); + assert(mol.get_molblock().includes(' 23 22 1 1')); + assert(mol.get_molblock(JSON.stringify({ useMolBlockWedging: true })).includes(' 23 22 1 1')); + mol.delete(); + colchicineUnusualWedging.delete(); +} + +function test_combine_with() { + { + var mol = RDKitModule.get_mol("CC"); + assert(mol); + var other = RDKitModule.get_mol("NCC"); + assert(other); + assert(!mol.combine_with(other)); + assert.equal(mol.get_num_atoms(), 5); + assert(mol.get_smiles() === "CC.CCN"); + mol.delete(); + other.delete(); + } + { + var mol = RDKitModule.get_mol("C1CC1 |(0.866025,0,;-0.433013,0.75,;-0.433013,-0.75,)|"); + var mol_copy = RDKitModule.get_mol_copy(mol); + assert(mol); + var other = RDKitModule.get_mol("C1CNC1 |(-1.06066,0,;0,-1.06066,;1.06066,0,;0,1.06066,)|"); + assert(other); + assert(!mol.combine_with(other, JSON.stringify({offset: [4.0, 0.0, 0.0]}))); + assert.equal(mol.get_num_atoms(), 7); + assert(mol.get_smiles() === "C1CC1.C1CNC1"); + assert(mol.get_molblock().includes(" 4.0000")); + mol.delete(); + assert(!mol_copy.combine_with(other, JSON.stringify({offset: [9.0, 0.0, 0.0]}))); + assert.equal(mol_copy.get_num_atoms(), 7); + assert(mol_copy.get_smiles() === "C1CC1.C1CNC1"); + assert(mol_copy.get_molblock().includes(" 9.0000")); + mol_copy.delete(); + other.delete(); + } +} + +function test_get_coords() { + { + var mol = RDKitModule.get_mol("C1CC1 |(0.866025,0,;-0.433013,0.75,;-0.433013,-0.75,)|"); + assert(mol); + assert.equal(mol.has_coords(), 2); + var pos = mol.get_coords(); + assert(Array.isArray(pos)); + assert(pos.length === mol.get_num_atoms()); + assert(pos.every((xyz) => Array.isArray(xyz) && xyz.length === 3 && xyz.every((c) => typeof c === "number"))); + assert(JSON.stringify(pos) === "[[0.866025,0,0],[-0.433013,0.75,0],[-0.433013,-0.75,0]]"); + mol.delete(); + } + { + var mol = RDKitModule.get_mol("C1CC1"); + assert(mol); + assert(!mol.has_coords()); + var pos = mol.get_coords(); + assert(Array.isArray(pos)); + assert(!pos.length); + mol.delete(); + } +} + initRDKitModule().then(function(instance) { var done = {}; const waitAllTestsFinished = () => { @@ -3850,8 +4257,11 @@ initRDKitModule().then(function(instance) { test_pickle(); test_remove_hs_details(); test_get_mol_remove_hs(); + test_png_metadata(); + test_combine_with(); + test_get_coords(); waitAllTestsFinished().then(() => console.log("Tests finished successfully") ); -}); \ No newline at end of file +});