From 4ab18ae0eaa5cd960ee2bdd3f8001fc4d4ee418a Mon Sep 17 00:00:00 2001 From: Leonid Stolbov <57249456+StLeonidas@users.noreply.github.com> Date: Mon, 4 Dec 2023 22:31:49 +0300 Subject: [PATCH] minilib functions exposure: mmpa (#6902) * MMPA functions exposure * Minilib exposure: mmp * minilib: mmpa - test added * minilib: mmpa - failing test added * - MolList should return null when the corresponding ROMOL_SPTR is a nullptr - changed first into cores - changed second into sidechains - renamed some functions and constants * - made JSMolList::at() and JSMolList::pop() robust against attempting to dereference null ROMOL_SPTR - added tests for the above * Update Code/MinimalLib/minilib.h Co-authored-by: Paolo Tosco * changes after review * Update Code/MinimalLib/minilib.cpp Co-authored-by: Greg Landrum --------- Co-authored-by: ptosco Co-authored-by: Paolo Tosco Co-authored-by: Greg Landrum --- Code/MinimalLib/CMakeLists.txt | 4 ++ Code/MinimalLib/jswrapper.cpp | 19 ++++++++- Code/MinimalLib/minilib.cpp | 58 +++++++++++++++++++++++--- Code/MinimalLib/minilib.h | 9 +++++ Code/MinimalLib/tests/tests.js | 74 ++++++++++++++++++++++++++++++++++ 5 files changed, 157 insertions(+), 7 deletions(-) diff --git a/Code/MinimalLib/CMakeLists.txt b/Code/MinimalLib/CMakeLists.txt index 5b26d38df..84808a971 100644 --- a/Code/MinimalLib/CMakeLists.txt +++ b/Code/MinimalLib/CMakeLists.txt @@ -19,6 +19,10 @@ if(RDK_BUILD_MINIMAL_LIB) add_definitions(-DRDK_BUILD_MINIMAL_LIB_MCS) set(MINIMAL_LIB_LIBRARIES "${MINIMAL_LIB_LIBRARIES};FMCS_static") endif() + if(RDK_BUILD_MINIMAL_LIB_MMPA) + add_definitions(-DRDK_BUILD_MINIMAL_LIB_MMPA) + set(MINIMAL_LIB_LIBRARIES "${MINIMAL_LIB_LIBRARIES};MMPA_static") + endif() if(RDK_BUILD_FREETYPE_SUPPORT) if( ${CMAKE_SYSTEM_NAME} MATCHES "Emscripten") set(USE_FLAGS "-s USE_FREETYPE=1") diff --git a/Code/MinimalLib/jswrapper.cpp b/Code/MinimalLib/jswrapper.cpp index 6b233ecf7..ae56923fb 100644 --- a/Code/MinimalLib/jswrapper.cpp +++ b/Code/MinimalLib/jswrapper.cpp @@ -372,6 +372,17 @@ emscripten::val get_avalon_fp_as_uint8array(const JSMol &self) { } #endif +#ifdef RDK_BUILD_MINIMAL_LIB_MMPA +emscripten::val get_mmpa_frags_helper(const JSMol &self, unsigned int minCuts, + unsigned int maxCuts, + unsigned int maxCutBonds) { + auto obj = emscripten::val::object(); + auto pairs = self.get_mmpa_frags(minCuts, maxCuts, maxCutBonds); + obj.set("cores", pairs.first); + obj.set("sidechains", pairs.second); + return obj; +} +#endif } // namespace using namespace emscripten; @@ -564,7 +575,13 @@ EMSCRIPTEN_BINDINGS(RDKit_minimal) { .function("get_num_atoms", select_overload(&JSMol::get_num_atoms)) .function("get_num_bonds", &JSMol::get_num_bonds) - ; +#ifdef RDK_BUILD_MINIMAL_LIB_MMPA + .function("get_mmpa_frags", + select_overload( + get_mmpa_frags_helper)) +#endif + ; class_("MolList") .constructor<>() diff --git a/Code/MinimalLib/minilib.cpp b/Code/MinimalLib/minilib.cpp index 1b9929913..142fcbbc5 100644 --- a/Code/MinimalLib/minilib.cpp +++ b/Code/MinimalLib/minilib.cpp @@ -591,6 +591,44 @@ unsigned int JSMol::get_num_bonds() const { return d_mol->getNumBonds(); } +#ifdef RDK_BUILD_MINIMAL_LIB_MMPA +namespace { +bool mmpaFragmentMol(const ROMol &mol, std::vector &cores, + std::vector &sidechains, + unsigned int minCuts, unsigned int maxCuts, + unsigned int maxCutBonds) { + std::vector> mmpaFrags; + if (!RDKit::MMPA::fragmentMol(mol, mmpaFrags, minCuts, maxCuts, + maxCutBonds)) { + return false; + } + auto numEntries = mmpaFrags.size(); + cores.clear(); + cores.reserve(numEntries); + sidechains.clear(); + sidechains.reserve(numEntries); + for (const auto &mmpaFrag : mmpaFrags) { + cores.push_back(mmpaFrag.first); + sidechains.push_back(mmpaFrag.second); + } + return true; +} +} // end of anonymous namespace + +std::pair JSMol::get_mmpa_frags( + unsigned int minCuts, unsigned int maxCuts, + unsigned int maxCutBonds) const { + std::vector cores; + std::vector sidechains; + if (!mmpaFragmentMol(*d_mol, cores, sidechains, minCuts, maxCuts, + maxCutBonds)) { + return std::make_pair(nullptr, nullptr); + } + return std::make_pair(new JSMolList(std::move(cores)), + new JSMolList(std::move(sidechains))); +} +#endif + #ifdef RDK_BUILD_MINIMAL_LIB_RXN std::string JSReaction::get_svg(int w, int h) const { assert(d_rxn); @@ -611,20 +649,28 @@ bool JSReaction::is_valid() const { #endif JSMol *JSMolList::next() { - return (d_idx < d_mols.size() - ? new JSMol(new RDKit::RWMol(*d_mols.at(d_idx++))) - : nullptr); + JSMol *res = nullptr; + if (d_idx < d_mols.size()) { + res = at(d_idx++); + } + return res; } JSMol *JSMolList::at(size_t idx) const { - return (idx < d_mols.size() ? new JSMol(new RDKit::RWMol(*d_mols.at(idx))) - : nullptr); + JSMol *res = nullptr; + if (idx < d_mols.size()) { + const auto &molSptr = d_mols.at(idx); + if (molSptr) { + res = new JSMol(new RDKit::RWMol(*molSptr)); + } + } + return res; } JSMol *JSMolList::pop(size_t idx) { JSMol *res = nullptr; if (idx < d_mols.size()) { - res = new JSMol(new RDKit::RWMol(*d_mols.at(idx))); + res = at(idx); d_mols.erase(d_mols.begin() + idx); if (d_idx > idx) { --d_idx; diff --git a/Code/MinimalLib/minilib.h b/Code/MinimalLib/minilib.h index 54fb9541f..737d9588f 100644 --- a/Code/MinimalLib/minilib.h +++ b/Code/MinimalLib/minilib.h @@ -14,6 +14,10 @@ #include #include +#ifdef RDK_BUILD_MINIMAL_LIB_MMPA +#include +#endif + class JSMolList; class JSMol { @@ -143,6 +147,11 @@ class JSMol { unsigned int get_num_atoms(bool heavyOnly) const; unsigned int get_num_atoms() const { return get_num_atoms(false); }; unsigned int get_num_bonds() const; +#ifdef RDK_BUILD_MINIMAL_LIB_MMPA + std::pair get_mmpa_frags( + unsigned int minCuts, unsigned int maxCuts, + unsigned int maxCutBonds) const; +#endif std::unique_ptr d_mol; static constexpr int d_defaultWidth = 250; diff --git a/Code/MinimalLib/tests/tests.js b/Code/MinimalLib/tests/tests.js index 715212545..eb18efe61 100644 --- a/Code/MinimalLib/tests/tests.js +++ b/Code/MinimalLib/tests/tests.js @@ -1845,6 +1845,77 @@ function test_get_frags() { } } +function test_get_mmpa_frags() { + { + var mol = RDKitModule.get_mol("CC(C)CCN1C(=O)CN=C(c2ccccc12)C3CCCCC3"); + var expectedCores = ["O=C1CN=C(C2CCCCC2)c2ccccc2N1CCC([*:1])[*:2]", "CC([*:1])[*:2]", "CC(C[*:2])[*:1]", "CC(CC[*:2])[*:1]", + "CC(CCN1C(=O)CN=C([*:2])c2ccccc21)[*:1]", "C([*:1])[*:2]", "C(C[*:2])[*:1]", "O=C1CN=C([*:2])c2ccccc2N1CC[*:1]", + "C([*:1])[*:2]", "O=C1CN=C([*:1])c2ccccc2N1C[*:2]", "O=C1CN=C([*:1])c2ccccc2N1[*:2]"]; + var expectedSidechains = ["C[*:1].C[*:2]", "C[*:1].O=C1CN=C(C2CCCCC2)c2ccccc2N1CC[*:2]", "C[*:1].O=C1CN=C(C2CCCCC2)c2ccccc2N1C[*:2]", + "C[*:1].O=C1CN=C(C2CCCCC2)c2ccccc2N1[*:2]", "C1CCC([*:2])CC1.C[*:1]", "CC(C)[*:1].O=C1CN=C(C2CCCCC2)c2ccccc2N1C[*:2]", + "CC(C)[*:1].O=C1CN=C(C2CCCCC2)c2ccccc2N1[*:2]", "C1CCC([*:2])CC1.CC(C)[*:1]", "CC(C)C[*:1].O=C1CN=C(C2CCCCC2)c2ccccc2N1[*:2]", + "C1CCC([*:1])CC1.CC(C)C[*:2]", "C1CCC([*:1])CC1.CC(C)CC[*:2]"]; + var pairs = mol.get_mmpa_frags(2, 2, 20); + assert(pairs.cores); + assert(pairs.cores.size() === 11); + assert(pairs.sidechains); + assert(pairs.sidechains.size() === 11); + var i = 0; + while (!pairs.cores.at_end()) { + var m = pairs.cores.next(); + assert(m.get_smiles() === expectedCores[i++]); + m.delete(); + } + i = 0; + while (!pairs.sidechains.at_end()) { + var m = pairs.sidechains.next(); + assert(m.get_smiles() === expectedSidechains[i++]); + m.delete(); + } + assert(!pairs.cores.next()); + assert(!pairs.sidechains.next()); + pairs.cores.delete(); + pairs.sidechains.delete(); + mol.delete(); + } + { + var mol = RDKitModule.get_mol("CC(C)CCN1C(=O)CN=C(c2ccccc12)C3CCCCC3"); + var expectedSidechains = ["CC(CCN1C(=O)CN=C(C2CCCCC2)c2ccccc21)[*:1].C[*:1]", + "CC(C)[*:1].O=C1CN=C(C2CCCCC2)c2ccccc2N1CC[*:1]", "CC(C)C[*:1].O=C1CN=C(C2CCCCC2)c2ccccc2N1C[*:1]", + "CC(C)CC[*:1].O=C1CN=C(C2CCCCC2)c2ccccc2N1[*:1]", "C1CCC([*:1])CC1.CC(C)CCN1C(=O)CN=C([*:1])c2ccccc21"]; + + var pairs = mol.get_mmpa_frags(1, 1, 20); + assert(pairs.cores); + assert(pairs.cores.size() === 5); + assert(pairs.sidechains); + assert(pairs.sidechains.size() === 5); + while (!pairs.cores.at_end()) { + var m = pairs.cores.next(); + assert(m === null); + } + var i = 0; + while (!pairs.sidechains.at_end()) { + var m = pairs.sidechains.next(); + assert(m.get_smiles() === expectedSidechains[i++]); + m.delete(); + } + assert(!pairs.cores.next()); + assert(!pairs.sidechains.next()); + var numCores = pairs.cores.size(); + for (i = 0; i < numCores; ++i) { + assert(pairs.cores.at(i) === null); + } + for (i = 0; i < numCores; ++i) { + assert(pairs.cores.pop(0) === null); + } + assert(pairs.cores.size() === 0); + assert(pairs.cores.next() === null); + pairs.cores.delete(); + pairs.sidechains.delete(); + mol.delete(); + } +} + function test_hs_in_place() { { var mol = RDKitModule.get_mol("CC"); @@ -2754,6 +2825,9 @@ initRDKitModule().then(function(instance) { test_wedging_outside_scaffold(); test_wedging_if_no_match(); test_get_frags(); + if (RDKitModule.Mol.prototype.get_mmpa_frags) { + test_get_mmpa_frags(); + } test_hs_in_place(); test_query_colour(); test_alignment_r_groups_aromatic_ring();