minilib functions exposure: mmpa (#6902)

* MMPA functions exposure

* Minilib exposure: mmp

* minilib: mmpa - test added

* minilib: mmpa - failing test added

* - MolList should return null when the corresponding ROMOL_SPTR is a nullptr
- changed first into cores
- changed second into sidechains
- renamed some functions and constants

* - made JSMolList::at() and JSMolList::pop() robust against attempting to dereference null ROMOL_SPTR
- added tests for the above

* Update Code/MinimalLib/minilib.h

Co-authored-by: Paolo Tosco <paolo.tosco.mail@gmail.com>

* changes after review

* Update Code/MinimalLib/minilib.cpp

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>

---------

Co-authored-by: ptosco <paolo.tosco@novartis.com>
Co-authored-by: Paolo Tosco <paolo.tosco.mail@gmail.com>
Co-authored-by: Greg Landrum <greg.landrum@gmail.com>
This commit is contained in:
Leonid Stolbov
2023-12-04 22:31:49 +03:00
committed by GitHub
parent 9e2bfca876
commit 4ab18ae0ea
5 changed files with 157 additions and 7 deletions

View File

@@ -19,6 +19,10 @@ if(RDK_BUILD_MINIMAL_LIB)
add_definitions(-DRDK_BUILD_MINIMAL_LIB_MCS)
set(MINIMAL_LIB_LIBRARIES "${MINIMAL_LIB_LIBRARIES};FMCS_static")
endif()
if(RDK_BUILD_MINIMAL_LIB_MMPA)
add_definitions(-DRDK_BUILD_MINIMAL_LIB_MMPA)
set(MINIMAL_LIB_LIBRARIES "${MINIMAL_LIB_LIBRARIES};MMPA_static")
endif()
if(RDK_BUILD_FREETYPE_SUPPORT)
if( ${CMAKE_SYSTEM_NAME} MATCHES "Emscripten")
set(USE_FLAGS "-s USE_FREETYPE=1")

View File

@@ -372,6 +372,17 @@ emscripten::val get_avalon_fp_as_uint8array(const JSMol &self) {
}
#endif
#ifdef RDK_BUILD_MINIMAL_LIB_MMPA
emscripten::val get_mmpa_frags_helper(const JSMol &self, unsigned int minCuts,
unsigned int maxCuts,
unsigned int maxCutBonds) {
auto obj = emscripten::val::object();
auto pairs = self.get_mmpa_frags(minCuts, maxCuts, maxCutBonds);
obj.set("cores", pairs.first);
obj.set("sidechains", pairs.second);
return obj;
}
#endif
} // namespace
using namespace emscripten;
@@ -564,7 +575,13 @@ EMSCRIPTEN_BINDINGS(RDKit_minimal) {
.function("get_num_atoms",
select_overload<unsigned int() const>(&JSMol::get_num_atoms))
.function("get_num_bonds", &JSMol::get_num_bonds)
;
#ifdef RDK_BUILD_MINIMAL_LIB_MMPA
.function("get_mmpa_frags",
select_overload<emscripten::val(const JSMol &, unsigned int,
unsigned int, unsigned int)>(
get_mmpa_frags_helper))
#endif
;
class_<JSMolList>("MolList")
.constructor<>()

View File

@@ -591,6 +591,44 @@ unsigned int JSMol::get_num_bonds() const {
return d_mol->getNumBonds();
}
#ifdef RDK_BUILD_MINIMAL_LIB_MMPA
namespace {
bool mmpaFragmentMol(const ROMol &mol, std::vector<RDKit::ROMOL_SPTR> &cores,
std::vector<RDKit::ROMOL_SPTR> &sidechains,
unsigned int minCuts, unsigned int maxCuts,
unsigned int maxCutBonds) {
std::vector<std::pair<RDKit::ROMOL_SPTR, RDKit::ROMOL_SPTR>> mmpaFrags;
if (!RDKit::MMPA::fragmentMol(mol, mmpaFrags, minCuts, maxCuts,
maxCutBonds)) {
return false;
}
auto numEntries = mmpaFrags.size();
cores.clear();
cores.reserve(numEntries);
sidechains.clear();
sidechains.reserve(numEntries);
for (const auto &mmpaFrag : mmpaFrags) {
cores.push_back(mmpaFrag.first);
sidechains.push_back(mmpaFrag.second);
}
return true;
}
} // end of anonymous namespace
std::pair<JSMolList *, JSMolList *> JSMol::get_mmpa_frags(
unsigned int minCuts, unsigned int maxCuts,
unsigned int maxCutBonds) const {
std::vector<RDKit::ROMOL_SPTR> cores;
std::vector<RDKit::ROMOL_SPTR> sidechains;
if (!mmpaFragmentMol(*d_mol, cores, sidechains, minCuts, maxCuts,
maxCutBonds)) {
return std::make_pair(nullptr, nullptr);
}
return std::make_pair(new JSMolList(std::move(cores)),
new JSMolList(std::move(sidechains)));
}
#endif
#ifdef RDK_BUILD_MINIMAL_LIB_RXN
std::string JSReaction::get_svg(int w, int h) const {
assert(d_rxn);
@@ -611,20 +649,28 @@ bool JSReaction::is_valid() const {
#endif
JSMol *JSMolList::next() {
return (d_idx < d_mols.size()
? new JSMol(new RDKit::RWMol(*d_mols.at(d_idx++)))
: nullptr);
JSMol *res = nullptr;
if (d_idx < d_mols.size()) {
res = at(d_idx++);
}
return res;
}
JSMol *JSMolList::at(size_t idx) const {
return (idx < d_mols.size() ? new JSMol(new RDKit::RWMol(*d_mols.at(idx)))
: nullptr);
JSMol *res = nullptr;
if (idx < d_mols.size()) {
const auto &molSptr = d_mols.at(idx);
if (molSptr) {
res = new JSMol(new RDKit::RWMol(*molSptr));
}
}
return res;
}
JSMol *JSMolList::pop(size_t idx) {
JSMol *res = nullptr;
if (idx < d_mols.size()) {
res = new JSMol(new RDKit::RWMol(*d_mols.at(idx)));
res = at(idx);
d_mols.erase(d_mols.begin() + idx);
if (d_idx > idx) {
--d_idx;

View File

@@ -14,6 +14,10 @@
#include <GraphMol/ChemReactions/Reaction.h>
#include <GraphMol/ChemReactions/ReactionParser.h>
#ifdef RDK_BUILD_MINIMAL_LIB_MMPA
#include <GraphMol/MMPA/MMPA.h>
#endif
class JSMolList;
class JSMol {
@@ -143,6 +147,11 @@ class JSMol {
unsigned int get_num_atoms(bool heavyOnly) const;
unsigned int get_num_atoms() const { return get_num_atoms(false); };
unsigned int get_num_bonds() const;
#ifdef RDK_BUILD_MINIMAL_LIB_MMPA
std::pair<JSMolList *, JSMolList *> get_mmpa_frags(
unsigned int minCuts, unsigned int maxCuts,
unsigned int maxCutBonds) const;
#endif
std::unique_ptr<RDKit::RWMol> d_mol;
static constexpr int d_defaultWidth = 250;

View File

@@ -1845,6 +1845,77 @@ function test_get_frags() {
}
}
function test_get_mmpa_frags() {
{
var mol = RDKitModule.get_mol("CC(C)CCN1C(=O)CN=C(c2ccccc12)C3CCCCC3");
var expectedCores = ["O=C1CN=C(C2CCCCC2)c2ccccc2N1CCC([*:1])[*:2]", "CC([*:1])[*:2]", "CC(C[*:2])[*:1]", "CC(CC[*:2])[*:1]",
"CC(CCN1C(=O)CN=C([*:2])c2ccccc21)[*:1]", "C([*:1])[*:2]", "C(C[*:2])[*:1]", "O=C1CN=C([*:2])c2ccccc2N1CC[*:1]",
"C([*:1])[*:2]", "O=C1CN=C([*:1])c2ccccc2N1C[*:2]", "O=C1CN=C([*:1])c2ccccc2N1[*:2]"];
var expectedSidechains = ["C[*:1].C[*:2]", "C[*:1].O=C1CN=C(C2CCCCC2)c2ccccc2N1CC[*:2]", "C[*:1].O=C1CN=C(C2CCCCC2)c2ccccc2N1C[*:2]",
"C[*:1].O=C1CN=C(C2CCCCC2)c2ccccc2N1[*:2]", "C1CCC([*:2])CC1.C[*:1]", "CC(C)[*:1].O=C1CN=C(C2CCCCC2)c2ccccc2N1C[*:2]",
"CC(C)[*:1].O=C1CN=C(C2CCCCC2)c2ccccc2N1[*:2]", "C1CCC([*:2])CC1.CC(C)[*:1]", "CC(C)C[*:1].O=C1CN=C(C2CCCCC2)c2ccccc2N1[*:2]",
"C1CCC([*:1])CC1.CC(C)C[*:2]", "C1CCC([*:1])CC1.CC(C)CC[*:2]"];
var pairs = mol.get_mmpa_frags(2, 2, 20);
assert(pairs.cores);
assert(pairs.cores.size() === 11);
assert(pairs.sidechains);
assert(pairs.sidechains.size() === 11);
var i = 0;
while (!pairs.cores.at_end()) {
var m = pairs.cores.next();
assert(m.get_smiles() === expectedCores[i++]);
m.delete();
}
i = 0;
while (!pairs.sidechains.at_end()) {
var m = pairs.sidechains.next();
assert(m.get_smiles() === expectedSidechains[i++]);
m.delete();
}
assert(!pairs.cores.next());
assert(!pairs.sidechains.next());
pairs.cores.delete();
pairs.sidechains.delete();
mol.delete();
}
{
var mol = RDKitModule.get_mol("CC(C)CCN1C(=O)CN=C(c2ccccc12)C3CCCCC3");
var expectedSidechains = ["CC(CCN1C(=O)CN=C(C2CCCCC2)c2ccccc21)[*:1].C[*:1]",
"CC(C)[*:1].O=C1CN=C(C2CCCCC2)c2ccccc2N1CC[*:1]", "CC(C)C[*:1].O=C1CN=C(C2CCCCC2)c2ccccc2N1C[*:1]",
"CC(C)CC[*:1].O=C1CN=C(C2CCCCC2)c2ccccc2N1[*:1]", "C1CCC([*:1])CC1.CC(C)CCN1C(=O)CN=C([*:1])c2ccccc21"];
var pairs = mol.get_mmpa_frags(1, 1, 20);
assert(pairs.cores);
assert(pairs.cores.size() === 5);
assert(pairs.sidechains);
assert(pairs.sidechains.size() === 5);
while (!pairs.cores.at_end()) {
var m = pairs.cores.next();
assert(m === null);
}
var i = 0;
while (!pairs.sidechains.at_end()) {
var m = pairs.sidechains.next();
assert(m.get_smiles() === expectedSidechains[i++]);
m.delete();
}
assert(!pairs.cores.next());
assert(!pairs.sidechains.next());
var numCores = pairs.cores.size();
for (i = 0; i < numCores; ++i) {
assert(pairs.cores.at(i) === null);
}
for (i = 0; i < numCores; ++i) {
assert(pairs.cores.pop(0) === null);
}
assert(pairs.cores.size() === 0);
assert(pairs.cores.next() === null);
pairs.cores.delete();
pairs.sidechains.delete();
mol.delete();
}
}
function test_hs_in_place() {
{
var mol = RDKitModule.get_mol("CC");
@@ -2754,6 +2825,9 @@ initRDKitModule().then(function(instance) {
test_wedging_outside_scaffold();
test_wedging_if_no_match();
test_get_frags();
if (RDKitModule.Mol.prototype.get_mmpa_frags) {
test_get_mmpa_frags();
}
test_hs_in_place();
test_query_colour();
test_alignment_r_groups_aromatic_ring();