diff --git a/.clang-format b/.clang-format index 999632639..d96ef06ec 100644 --- a/.clang-format +++ b/.clang-format @@ -12,6 +12,7 @@ AllowShortCaseLabelsOnASingleLine: false AllowShortIfStatementsOnASingleLine: true AllowShortLoopsOnASingleLine: true AllowShortFunctionsOnASingleLine: All +AllowShortEnumsOnASingleLine: false AlwaysBreakAfterDefinitionReturnType: false AlwaysBreakTemplateDeclarations: true AlwaysBreakBeforeMultilineStrings: true diff --git a/CMakeLists.txt b/CMakeLists.txt index 705e292b2..99b9853c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,6 +71,7 @@ option(RDK_BUILD_FUZZ_TARGETS "build the fuzz targets" OFF) option(RDK_BUILD_MINIMAL_LIB_RXN "build support for reactions into MinimalLib" ON ) option(RDK_BUILD_MINIMAL_LIB_SUBSTRUCTLIBRARY "build support for SubstructLibrary into MinimalLib" ON ) option(RDK_BUILD_MINIMAL_LIB_MCS "build support for MCS into MinimalLib" OFF ) +option(RDK_BUILD_MINIMAL_LIB_MOLZIP "build support for molzip into MinimalLib" OFF ) set(RDK_BOOST_VERSION "1.70.0") diff --git a/Code/GraphMol/ChemTransforms/CMakeLists.txt b/Code/GraphMol/ChemTransforms/CMakeLists.txt index 3445d0d35..d50038f23 100644 --- a/Code/GraphMol/ChemTransforms/CMakeLists.txt +++ b/Code/GraphMol/ChemTransforms/CMakeLists.txt @@ -1,10 +1,11 @@ -rdkit_library(ChemTransforms ChemTransforms.cpp MolFragmenter.cpp LINK_LIBRARIES +rdkit_library(ChemTransforms ChemTransforms.cpp + MolFragmenter.cpp MolFragmenterJSONParser.cpp LINK_LIBRARIES GraphMol SubstructMatch SmilesParse Depictor) target_compile_definitions(ChemTransforms PRIVATE -DRDKIT_CHEMTRANSFORMS_BUILD) rdkit_headers(ChemTransforms.h - MolFragmenter.h + MolFragmenter.h MolFragmenterJSONParser.h DEST GraphMol/ChemTransforms) # there's no Wrap subdirectory on the main trunk (but in "minimal" there is).. diff --git a/Code/GraphMol/ChemTransforms/MolFragmenter.h b/Code/GraphMol/ChemTransforms/MolFragmenter.h index f776f1566..cd7c876dc 100644 --- a/Code/GraphMol/ChemTransforms/MolFragmenter.h +++ b/Code/GraphMol/ChemTransforms/MolFragmenter.h @@ -13,6 +13,7 @@ #include #include +#include namespace RDKit { namespace MolFragmenter { @@ -103,13 +104,15 @@ RDKIT_CHEMTRANSFORMS_EXPORT void constructBRICSBondTypes( // n.b. AtomProperty must resolve to an unsigned integer value on an atom // property -enum class MolzipLabel { - AtomMapNumber, - Isotope, - FragmentOnBonds, - AtomType, - AtomProperty -}; +// clang-format off +BETTER_ENUM_CLASS(MolzipLabel, unsigned int, + AtomMapNumber, + Isotope, + FragmentOnBonds, + AtomType, + AtomProperty +); +// clang-format on struct RDKIT_CHEMTRANSFORMS_EXPORT MolzipParams { MolzipLabel label = MolzipLabel::AtomMapNumber; @@ -157,5 +160,6 @@ RDKIT_CHEMTRANSFORMS_EXPORT std::unique_ptr molzip( RDKIT_CHEMTRANSFORMS_EXPORT std::unique_ptr molzip( const std::map &row, const MolzipParams ¶ms = MolzipParams()); + } // namespace RDKit #endif diff --git a/Code/GraphMol/ChemTransforms/MolFragmenterJSONParser.cpp b/Code/GraphMol/ChemTransforms/MolFragmenterJSONParser.cpp new file mode 100644 index 000000000..c87d2cfde --- /dev/null +++ b/Code/GraphMol/ChemTransforms/MolFragmenterJSONParser.cpp @@ -0,0 +1,52 @@ +// +// Copyright (C) 2024 Novartis Biomedical Research and other RDKit contributors +// +// @@ All Rights Reserved @@ +// This file is part of the RDKit. +// The contents are covered by the terms of the BSD license +// which is included in the file license.txt, found at the root +// of the RDKit source tree. +// + +#define USE_BETTER_ENUMS +#include "MolFragmenterJSONParser.h" +#include +#include +#include +#include + +namespace RDKit { + +void parseMolzipParametersJSON(MolzipParams ¶ms, const char *details_json) { + if (!details_json || !strlen(details_json)) { + return; + } + boost::property_tree::ptree pt; + std::istringstream ss; + ss.str(details_json); + boost::property_tree::read_json(ss, pt); + std::string label; + label = pt.get("Label", label); + if (MolzipLabel::_is_valid(label.c_str())) { + params.label = MolzipLabel::_from_string(label.c_str()); + } + auto atomSymbolsIt = pt.find("AtomSymbols"); + if (atomSymbolsIt != pt.not_found()) { + const auto &jsonVect = atomSymbolsIt->second; + params.atomSymbols.resize(jsonVect.size()); + std::transform( + jsonVect.begin(), jsonVect.end(), params.atomSymbols.begin(), + [](const auto &atomSymbolNode) { + return atomSymbolNode.second.template get_value(); + }); + } + + params.atomProperty = + pt.get("AtomProperty", params.atomProperty); + params.enforceValenceRules = + pt.get("EnforceValenceRules", params.enforceValenceRules); + params.generateCoordinates = + pt.get("GenerateCoordinates", params.generateCoordinates); +} + +} // end namespace RDKit diff --git a/Code/GraphMol/ChemTransforms/MolFragmenterJSONParser.h b/Code/GraphMol/ChemTransforms/MolFragmenterJSONParser.h new file mode 100644 index 000000000..ad5a38777 --- /dev/null +++ b/Code/GraphMol/ChemTransforms/MolFragmenterJSONParser.h @@ -0,0 +1,27 @@ +// +// Copyright (C) 2024 Novartis Biomedical Research and other RDKit contributors +// +// @@ All Rights Reserved @@ +// This file is part of the RDKit. +// The contents are covered by the terms of the BSD license +// which is included in the file license.txt, found at the root +// of the RDKit source tree. +// + +#pragma once + +#include "MolFragmenter.h" + +namespace RDKit { + +//! \brief Parse MolzipParams from JSON. +/*! The passed MolzipParams instance is updated from + * the JSON-parsed content. + * + * @param params - molzip parameters + * @param details_json - JSON string + */ +RDKIT_CHEMTRANSFORMS_EXPORT void parseMolzipParametersJSON( + MolzipParams ¶ms, const char *details_json); + +} // end namespace RDKit diff --git a/Code/JavaWrappers/ChemTransforms.i b/Code/JavaWrappers/ChemTransforms.i index 3afce76a1..b66a5ea4a 100644 --- a/Code/JavaWrappers/ChemTransforms.i +++ b/Code/JavaWrappers/ChemTransforms.i @@ -34,6 +34,7 @@ %{ #include #include +#include #include // Fixes annoying compilation namespace issue typedef RDKit::MatchVectType MatchVectType; @@ -120,4 +121,6 @@ RDKit::ROMol * new_molzip( %newobject fragmentOnBRICSBonds; %template(UIntMolMap) std::map >; +%include %include +%include diff --git a/Code/MinimalLib/CMakeLists.txt b/Code/MinimalLib/CMakeLists.txt index 05a0d82a0..627e95e28 100644 --- a/Code/MinimalLib/CMakeLists.txt +++ b/Code/MinimalLib/CMakeLists.txt @@ -26,7 +26,11 @@ if(RDK_BUILD_MINIMAL_LIB) if(RDK_BUILD_MINIMAL_LIB_MMPA) add_definitions(-DRDK_BUILD_MINIMAL_LIB_MMPA) set(MINIMAL_LIB_LIBRARIES "${MINIMAL_LIB_LIBRARIES};MMPA_static") - endif() + endif() + if(RDK_BUILD_MINIMAL_LIB_MOLZIP) + add_definitions(-DRDK_BUILD_MINIMAL_LIB_MOLZIP) + set(MINIMAL_LIB_LIBRARIES "${MINIMAL_LIB_LIBRARIES};ChemTransforms_static") + endif() if(RDK_BUILD_FREETYPE_SUPPORT) if( ${CMAKE_SYSTEM_NAME} MATCHES "Emscripten") set(USE_FLAGS "-s USE_FREETYPE=1") diff --git a/Code/MinimalLib/jswrapper.cpp b/Code/MinimalLib/jswrapper.cpp index 700bf23e1..f2fff0b98 100644 --- a/Code/MinimalLib/jswrapper.cpp +++ b/Code/MinimalLib/jswrapper.cpp @@ -16,10 +16,29 @@ #include #include #include - +#if defined(RDK_BUILD_MINIMAL_LIB_MOLZIP) && \ + defined(RDK_BUILD_MINIMAL_LIB_RGROUPDECOMP) +#include +#include +#endif using namespace RDKit; namespace { +const emscripten::val JSMolObj() { + static const auto JSMOL = emscripten::val::module_property("Mol"); + return JSMOL; +} + +const emscripten::val JSMolListObj() { + static const auto JSMOLLIST = emscripten::val::module_property("MolList"); + return JSMOLLIST; +} + +const emscripten::val ObjectObj() { + static const auto OBJECT = emscripten::val::global("Object"); + return OBJECT; +} + class JSDrawerFromDetails : public MinimalLib::DrawerFromDetails { public: JSDrawerFromDetails(const emscripten::val &ctx, int w = -1, int h = -1, @@ -134,6 +153,66 @@ std::string get_mcs_as_smarts_no_details(const JSMolList &mols) { } #endif +#ifdef RDK_BUILD_MINIMAL_LIB_MOLZIP +JSMolBase *molzip_no_details_helper(const JSMolBase &a, const JSMolBase &b) { + return molzip(a, b, std::string()); +} + +#ifdef RDK_BUILD_MINIMAL_LIB_RGROUPDECOMP +JSMolBase *molzip_rgd_row_helper(const emscripten::val &rgdRow, + const std::string &details_json) { + auto rlabelsAsVal = ObjectObj().call("keys", rgdRow); + auto rlabels = emscripten::vecFromJSArray(rlabelsAsVal); + if (rlabels.empty()) { + return nullptr; + } + bool dynamicCastOk = true; + std::map molzipRow; + for (const auto &rlabel : rlabels) { + auto jsMolAsVal = rgdRow[rlabel]; + if (!jsMolAsVal.instanceof (JSMolObj())) { + return nullptr; + } + auto jsMolShared = dynamic_cast( + jsMolAsVal.as(emscripten::allow_raw_pointers())); + if (!jsMolShared) { + dynamicCastOk = false; + return nullptr; + } + molzipRow.emplace(rlabel, jsMolShared->get_sptr()); + } + MolzipParams params; + parseMolzipParametersJSON(params, details_json.c_str()); + auto res = RDKit::molzip(molzipRow, params); + return new JSMol(new RWMol(*res)); +} + +JSMolBase *molzip_no_details_rgd_row_helper(const emscripten::val &rgdRow) { + return molzip_rgd_row_helper(rgdRow, std::string()); +} + +JSMolBase *molzip_2params_helper(const emscripten::val ¶m1, + const emscripten::val ¶m2) { + JSMolBase *res = nullptr; + static const auto JSMOL = emscripten::val::module_property("Mol"); + if (param1.instanceof (JSMolObj()) && param2.instanceof (JSMolObj())) { + const auto aJsMolPtr = + param1.as(emscripten::allow_raw_pointers()); + const auto bJsMolPtr = + param2.as(emscripten::allow_raw_pointers()); + if (aJsMolPtr && bJsMolPtr) { + res = molzip_no_details_helper(*aJsMolPtr, *bJsMolPtr); + } + } else if (!param1.instanceof + (JSMolObj()) && param2.typeOf().as() == "string") { + auto details_json = param2.as(); + res = molzip_rgd_row_helper(param1, details_json); + } + return res; +} +#endif +#endif + emscripten::val binary_string_to_uint8array(const std::string &pkl) { emscripten::val view(emscripten::typed_memory_view( pkl.size(), reinterpret_cast(pkl.c_str()))); @@ -340,16 +419,14 @@ emscripten::val get_mmpa_frags_helper(const JSMolBase &self, JSRGroupDecomposition *get_rgd_helper( const emscripten::val &singleOrMultipleCores, const std::string &details_json) { - static const auto JSMOL = emscripten::val::module_property("Mol"); - static const auto JSMOLLIST = emscripten::val::module_property("MolList"); JSRGroupDecomposition *res = nullptr; - if (singleOrMultipleCores.instanceof (JSMOL)) { + if (singleOrMultipleCores.instanceof (JSMolObj())) { const auto jsMolPtr = singleOrMultipleCores.as(emscripten::allow_raw_pointers()); if (jsMolPtr) { res = new JSRGroupDecomposition(*jsMolPtr, details_json); } - } else if (singleOrMultipleCores.instanceof (JSMOLLIST)) { + } else if (singleOrMultipleCores.instanceof (JSMolListObj())) { const auto jsMolListPtr = singleOrMultipleCores.as(emscripten::allow_raw_pointers()); if (jsMolListPtr) { @@ -747,4 +824,13 @@ EMSCRIPTEN_BINDINGS(RDKit_minimal) { function("get_rgd", &get_rgd_helper, allow_raw_pointers()); function("get_rgd", &get_rgd_no_details_helper, allow_raw_pointers()); #endif +#if defined(RDK_BUILD_MINIMAL_LIB_MOLZIP) && defined(__EMSCRIPTEN__) + function("molzip", &::molzip, allow_raw_pointers()); +#ifdef RDK_BUILD_MINIMAL_LIB_RGROUPDECOMP + function("molzip", &molzip_2params_helper, allow_raw_pointers()); + function("molzip", &molzip_no_details_rgd_row_helper, allow_raw_pointers()); +#else + function("molzip", &molzip_no_details_helper, allow_raw_pointers()); +#endif +#endif } diff --git a/Code/MinimalLib/minilib.cpp b/Code/MinimalLib/minilib.cpp index 469e23c8d..119567e28 100644 --- a/Code/MinimalLib/minilib.cpp +++ b/Code/MinimalLib/minilib.cpp @@ -31,6 +31,10 @@ #ifdef RDK_BUILD_MINIMAL_LIB_MCS #include #endif +#ifdef RDK_BUILD_MINIMAL_LIB_MOLZIP +#include +#endif + #include #include #include @@ -992,3 +996,12 @@ JSRGroupDecomposition::getRGroupsAsRows() const { return res; } #endif +#ifdef RDK_BUILD_MINIMAL_LIB_MOLZIP +JSMolBase *molzip(const JSMolBase &a, const JSMolBase &b, + const std::string &details_json) { + MolzipParams params; + parseMolzipParametersJSON(params, details_json.c_str()); + auto out = molzip(a.get(), b.get(), params); + return new JSMol(new RDKit::RWMol(*out)); +} +#endif diff --git a/Code/MinimalLib/minilib.h b/Code/MinimalLib/minilib.h index 0a6d90ec3..f64550351 100644 --- a/Code/MinimalLib/minilib.h +++ b/Code/MinimalLib/minilib.h @@ -27,7 +27,7 @@ class JSMolBase { public: JSMolBase(const JSMolBase &) = delete; JSMolBase &operator=(const JSMolBase &) = delete; - virtual ~JSMolBase() {}; + virtual ~JSMolBase(){}; virtual const RDKit::RWMol &get() const = 0; virtual RDKit::RWMol &get() = 0; std::string get_smiles() const; @@ -225,8 +225,8 @@ class JSMolShared : public JSMolBase { class JSMolList { public: JSMolList(const std::vector &mols) - : d_mols(mols), d_idx(0) {}; - JSMolList() : d_idx(0) {}; + : d_mols(mols), d_idx(0){}; + JSMolList() : d_idx(0){}; JSMolBase *next(); size_t append(const JSMolBase &mol); size_t insert(size_t idx, const JSMolBase &mol); @@ -363,11 +363,11 @@ class JSRGroupDecomposition { public: JSRGroupDecomposition(const JSMolBase &core, const std::string &details_json); JSRGroupDecomposition(const JSMolBase &core) - : JSRGroupDecomposition(core, "") {}; + : JSRGroupDecomposition(core, ""){}; JSRGroupDecomposition(const JSMolList &cores, const std::string &details_json); JSRGroupDecomposition(const JSMolList &cores) - : JSRGroupDecomposition(cores, "") {}; + : JSRGroupDecomposition(cores, ""){}; int add(const JSMolBase &mol); bool process(); std::map> getRGroupsAsColumns() const; @@ -379,3 +379,8 @@ class JSRGroupDecomposition { std::vector d_unmatched; }; #endif + +#ifdef RDK_BUILD_MINIMAL_LIB_MOLZIP +JSMolBase *molzip(const JSMolBase &a, const JSMolBase &b, + const std::string &details_json); +#endif diff --git a/Code/MinimalLib/tests/tests.js b/Code/MinimalLib/tests/tests.js index d41bc8d4e..e2e19cf69 100644 --- a/Code/MinimalLib/tests/tests.js +++ b/Code/MinimalLib/tests/tests.js @@ -1965,6 +1965,88 @@ function test_get_mmpa_frags() { } } +function test_molzip() { + { + var mol1 = RDKitModule.get_mol("F/C=C/[*:1]"); + assert(mol1); + var mol2 = RDKitModule.get_mol("[*:1]F"); + assert(mol2); + var expectedLinkage = "F/C=C/F"; + var mol = RDKitModule.molzip(mol1, mol2); + assert(mol); + assert(mol.get_smiles() === expectedLinkage); + mol1.delete(); + mol2.delete(); + mol.delete(); + } + { + var mol1 = RDKitModule.get_mol("[C@H]([Xe])(F)([V])"); + assert(mol1); + var mol2 = RDKitModule.get_mol("[Xe]N.[V]I"); + assert(mol2); + var expectedLinkage = "N[C@@H](F)I"; + var details = JSON.stringify({ Label: 'AtomType', AtomSymbols: ['Xe', 'V'] }); + var mol = RDKitModule.molzip(mol1, mol2, details); + assert(mol); + assert(mol.get_smiles() === expectedLinkage); + mol1.delete(); + mol2.delete(); + mol.delete(); + } + if (RDKitModule.RGroupDecomposition) { + const smis = ['C1CN[C@H]1F', 'C1CN[C@]1(O)F', 'C1CN[C@@H]1F', 'C1CN[CH]1F']; + const core = RDKitModule.get_qmol('C1CNC1[*:1]'); + const params = { + rgroupLabelling: 'Isotope', + allowMultipleRGroupsOnUnlabelled: true, + }; + const rgd = RDKitModule.get_rgd(core, JSON.stringify(params)); + const smisCanon = smis.map((smi, i) => { + const mol = RDKitModule.get_mol(smi); + assert(mol); + try { + const res = rgd.add(mol); + assert(res === i); + return mol.get_smiles(); + } finally { + mol.delete(); + } + }); + rgd.process(); + const rows = rgd.get_rgroups_as_rows(); + const expectedRowMapping = [ + { Core: '[1*][C@@]1([2*])CCN1', R1: '[1*]F', R2: '[2*][H]' }, + { Core: '[1*][C@]1([2*])CCN1', R1: '[1*]F', R2: '[2*]O' }, + { Core: '[1*][C@]1([2*])CCN1', R1: '[1*]F', R2: '[2*][H]' }, + { Core: '[1*]C1([2*])CCN1', R1: '[1*]F', R2: '[2*][H]' }, + ]; + const details = JSON.stringify({ Label: 'Isotope' }); + rows.forEach((row, i) => { + const foundMapping = getFoundRgdRowAsMap(row, true); + assert(Object.keys(foundMapping).length === Object.keys(expectedRowMapping[i]).length); + Object.entries(foundMapping).forEach(([rlabel, smi]) => { + assert(expectedRowMapping[i][rlabel] && expectedRowMapping[i][rlabel] === smi); + }); + let mol; + try { + mol = RDKitModule.molzip(row, details); + assert(mol); + mol.remove_hs_in_place(); + assert(mol.get_smiles() === smisCanon[i]); + } finally { + if (mol) { + mol.delete(); + } + Object.values(row).forEach((rgroup) => { + if (rgroup) { + rgroup.delete(); + } + }); + } + }); + } +} + function test_hs_in_place() { { var mol = RDKitModule.get_mol("CC"); @@ -3189,14 +3271,14 @@ function test_multi_highlights() { mol.delete(); } -const getFoundRgdRowAsMap = (row) => Object.fromEntries(Object.entries(row).map(([rlabel, mol]) => { +const getFoundRgdRowAsMap = (row, keep) => Object.fromEntries(Object.entries(row).map(([rlabel, mol]) => { try { assert(mol); assert(mol instanceof RDKitModule.Mol); const smi = mol.get_smiles(); return [rlabel, smi]; } finally { - if (mol) { + if (!keep && mol) { mol.delete(); } } @@ -3509,6 +3591,9 @@ initRDKitModule().then(function(instance) { } test_bw_palette(); test_custom_palette(); + if (RDKitModule.molzip) { + test_molzip(); + } test_pickle(); waitAllTestsFinished().then(() =>