Expose molzip functionality to MinimalLib (#7959)

* Expose molzip functionality to MinimalLib

* changes from code review

---------

Co-authored-by: ptosco <paolo.tosco@novartis.com>
Co-authored-by: Greg Landrum <greg.landrum@gmail.com>
This commit is contained in:
Paolo Tosco
2024-11-12 17:16:14 +01:00
committed by GitHub
parent 9db22a9178
commit 9c63cf6c54
12 changed files with 304 additions and 22 deletions

View File

@@ -26,7 +26,11 @@ if(RDK_BUILD_MINIMAL_LIB)
if(RDK_BUILD_MINIMAL_LIB_MMPA)
add_definitions(-DRDK_BUILD_MINIMAL_LIB_MMPA)
set(MINIMAL_LIB_LIBRARIES "${MINIMAL_LIB_LIBRARIES};MMPA_static")
endif()
endif()
if(RDK_BUILD_MINIMAL_LIB_MOLZIP)
add_definitions(-DRDK_BUILD_MINIMAL_LIB_MOLZIP)
set(MINIMAL_LIB_LIBRARIES "${MINIMAL_LIB_LIBRARIES};ChemTransforms_static")
endif()
if(RDK_BUILD_FREETYPE_SUPPORT)
if( ${CMAKE_SYSTEM_NAME} MATCHES "Emscripten")
set(USE_FLAGS "-s USE_FREETYPE=1")

View File

@@ -16,10 +16,29 @@
#include <GraphMol/MolDraw2D/MolDraw2D.h>
#include <GraphMol/MolDraw2D/MolDraw2DUtils.h>
#include <GraphMol/MolDraw2D/MolDraw2DJS.h>
#if defined(RDK_BUILD_MINIMAL_LIB_MOLZIP) && \
defined(RDK_BUILD_MINIMAL_LIB_RGROUPDECOMP)
#include <GraphMol/ChemTransforms/MolFragmenter.h>
#include <GraphMol/ChemTransforms/MolFragmenterJSONParser.h>
#endif
using namespace RDKit;
namespace {
const emscripten::val JSMolObj() {
static const auto JSMOL = emscripten::val::module_property("Mol");
return JSMOL;
}
const emscripten::val JSMolListObj() {
static const auto JSMOLLIST = emscripten::val::module_property("MolList");
return JSMOLLIST;
}
const emscripten::val ObjectObj() {
static const auto OBJECT = emscripten::val::global("Object");
return OBJECT;
}
class JSDrawerFromDetails : public MinimalLib::DrawerFromDetails {
public:
JSDrawerFromDetails(const emscripten::val &ctx, int w = -1, int h = -1,
@@ -134,6 +153,66 @@ std::string get_mcs_as_smarts_no_details(const JSMolList &mols) {
}
#endif
#ifdef RDK_BUILD_MINIMAL_LIB_MOLZIP
JSMolBase *molzip_no_details_helper(const JSMolBase &a, const JSMolBase &b) {
return molzip(a, b, std::string());
}
#ifdef RDK_BUILD_MINIMAL_LIB_RGROUPDECOMP
JSMolBase *molzip_rgd_row_helper(const emscripten::val &rgdRow,
const std::string &details_json) {
auto rlabelsAsVal = ObjectObj().call<emscripten::val>("keys", rgdRow);
auto rlabels = emscripten::vecFromJSArray<std::string>(rlabelsAsVal);
if (rlabels.empty()) {
return nullptr;
}
bool dynamicCastOk = true;
std::map<std::string, ROMOL_SPTR> molzipRow;
for (const auto &rlabel : rlabels) {
auto jsMolAsVal = rgdRow[rlabel];
if (!jsMolAsVal.instanceof (JSMolObj())) {
return nullptr;
}
auto jsMolShared = dynamic_cast<JSMolShared *>(
jsMolAsVal.as<JSMolBase *>(emscripten::allow_raw_pointers()));
if (!jsMolShared) {
dynamicCastOk = false;
return nullptr;
}
molzipRow.emplace(rlabel, jsMolShared->get_sptr());
}
MolzipParams params;
parseMolzipParametersJSON(params, details_json.c_str());
auto res = RDKit::molzip(molzipRow, params);
return new JSMol(new RWMol(*res));
}
JSMolBase *molzip_no_details_rgd_row_helper(const emscripten::val &rgdRow) {
return molzip_rgd_row_helper(rgdRow, std::string());
}
JSMolBase *molzip_2params_helper(const emscripten::val &param1,
const emscripten::val &param2) {
JSMolBase *res = nullptr;
static const auto JSMOL = emscripten::val::module_property("Mol");
if (param1.instanceof (JSMolObj()) && param2.instanceof (JSMolObj())) {
const auto aJsMolPtr =
param1.as<JSMolBase *>(emscripten::allow_raw_pointers());
const auto bJsMolPtr =
param2.as<JSMolBase *>(emscripten::allow_raw_pointers());
if (aJsMolPtr && bJsMolPtr) {
res = molzip_no_details_helper(*aJsMolPtr, *bJsMolPtr);
}
} else if (!param1.instanceof
(JSMolObj()) && param2.typeOf().as<std::string>() == "string") {
auto details_json = param2.as<std::string>();
res = molzip_rgd_row_helper(param1, details_json);
}
return res;
}
#endif
#endif
emscripten::val binary_string_to_uint8array(const std::string &pkl) {
emscripten::val view(emscripten::typed_memory_view(
pkl.size(), reinterpret_cast<const unsigned char *>(pkl.c_str())));
@@ -340,16 +419,14 @@ emscripten::val get_mmpa_frags_helper(const JSMolBase &self,
JSRGroupDecomposition *get_rgd_helper(
const emscripten::val &singleOrMultipleCores,
const std::string &details_json) {
static const auto JSMOL = emscripten::val::module_property("Mol");
static const auto JSMOLLIST = emscripten::val::module_property("MolList");
JSRGroupDecomposition *res = nullptr;
if (singleOrMultipleCores.instanceof (JSMOL)) {
if (singleOrMultipleCores.instanceof (JSMolObj())) {
const auto jsMolPtr =
singleOrMultipleCores.as<JSMolBase *>(emscripten::allow_raw_pointers());
if (jsMolPtr) {
res = new JSRGroupDecomposition(*jsMolPtr, details_json);
}
} else if (singleOrMultipleCores.instanceof (JSMOLLIST)) {
} else if (singleOrMultipleCores.instanceof (JSMolListObj())) {
const auto jsMolListPtr =
singleOrMultipleCores.as<JSMolList *>(emscripten::allow_raw_pointers());
if (jsMolListPtr) {
@@ -747,4 +824,13 @@ EMSCRIPTEN_BINDINGS(RDKit_minimal) {
function("get_rgd", &get_rgd_helper, allow_raw_pointers());
function("get_rgd", &get_rgd_no_details_helper, allow_raw_pointers());
#endif
#if defined(RDK_BUILD_MINIMAL_LIB_MOLZIP) && defined(__EMSCRIPTEN__)
function("molzip", &::molzip, allow_raw_pointers());
#ifdef RDK_BUILD_MINIMAL_LIB_RGROUPDECOMP
function("molzip", &molzip_2params_helper, allow_raw_pointers());
function("molzip", &molzip_no_details_rgd_row_helper, allow_raw_pointers());
#else
function("molzip", &molzip_no_details_helper, allow_raw_pointers());
#endif
#endif
}

View File

@@ -31,6 +31,10 @@
#ifdef RDK_BUILD_MINIMAL_LIB_MCS
#include <GraphMol/FMCS/FMCS.h>
#endif
#ifdef RDK_BUILD_MINIMAL_LIB_MOLZIP
#include <GraphMol/ChemTransforms/MolFragmenterJSONParser.h>
#endif
#include <GraphMol/Descriptors/Property.h>
#include <GraphMol/Descriptors/MolDescriptors.h>
#include <GraphMol/MolInterchange/MolInterchange.h>
@@ -992,3 +996,12 @@ JSRGroupDecomposition::getRGroupsAsRows() const {
return res;
}
#endif
#ifdef RDK_BUILD_MINIMAL_LIB_MOLZIP
JSMolBase *molzip(const JSMolBase &a, const JSMolBase &b,
const std::string &details_json) {
MolzipParams params;
parseMolzipParametersJSON(params, details_json.c_str());
auto out = molzip(a.get(), b.get(), params);
return new JSMol(new RDKit::RWMol(*out));
}
#endif

View File

@@ -27,7 +27,7 @@ class JSMolBase {
public:
JSMolBase(const JSMolBase &) = delete;
JSMolBase &operator=(const JSMolBase &) = delete;
virtual ~JSMolBase() {};
virtual ~JSMolBase(){};
virtual const RDKit::RWMol &get() const = 0;
virtual RDKit::RWMol &get() = 0;
std::string get_smiles() const;
@@ -225,8 +225,8 @@ class JSMolShared : public JSMolBase {
class JSMolList {
public:
JSMolList(const std::vector<RDKit::ROMOL_SPTR> &mols)
: d_mols(mols), d_idx(0) {};
JSMolList() : d_idx(0) {};
: d_mols(mols), d_idx(0){};
JSMolList() : d_idx(0){};
JSMolBase *next();
size_t append(const JSMolBase &mol);
size_t insert(size_t idx, const JSMolBase &mol);
@@ -363,11 +363,11 @@ class JSRGroupDecomposition {
public:
JSRGroupDecomposition(const JSMolBase &core, const std::string &details_json);
JSRGroupDecomposition(const JSMolBase &core)
: JSRGroupDecomposition(core, "") {};
: JSRGroupDecomposition(core, ""){};
JSRGroupDecomposition(const JSMolList &cores,
const std::string &details_json);
JSRGroupDecomposition(const JSMolList &cores)
: JSRGroupDecomposition(cores, "") {};
: JSRGroupDecomposition(cores, ""){};
int add(const JSMolBase &mol);
bool process();
std::map<std::string, std::unique_ptr<JSMolList>> getRGroupsAsColumns() const;
@@ -379,3 +379,8 @@ class JSRGroupDecomposition {
std::vector<unsigned int> d_unmatched;
};
#endif
#ifdef RDK_BUILD_MINIMAL_LIB_MOLZIP
JSMolBase *molzip(const JSMolBase &a, const JSMolBase &b,
const std::string &details_json);
#endif

View File

@@ -1965,6 +1965,88 @@ function test_get_mmpa_frags() {
}
}
function test_molzip() {
{
var mol1 = RDKitModule.get_mol("F/C=C/[*:1]");
assert(mol1);
var mol2 = RDKitModule.get_mol("[*:1]F");
assert(mol2);
var expectedLinkage = "F/C=C/F";
var mol = RDKitModule.molzip(mol1, mol2);
assert(mol);
assert(mol.get_smiles() === expectedLinkage);
mol1.delete();
mol2.delete();
mol.delete();
}
{
var mol1 = RDKitModule.get_mol("[C@H]([Xe])(F)([V])");
assert(mol1);
var mol2 = RDKitModule.get_mol("[Xe]N.[V]I");
assert(mol2);
var expectedLinkage = "N[C@@H](F)I";
var details = JSON.stringify({ Label: 'AtomType', AtomSymbols: ['Xe', 'V'] });
var mol = RDKitModule.molzip(mol1, mol2, details);
assert(mol);
assert(mol.get_smiles() === expectedLinkage);
mol1.delete();
mol2.delete();
mol.delete();
}
if (RDKitModule.RGroupDecomposition) {
const smis = ['C1CN[C@H]1F', 'C1CN[C@]1(O)F', 'C1CN[C@@H]1F', 'C1CN[CH]1F'];
const core = RDKitModule.get_qmol('C1CNC1[*:1]');
const params = {
rgroupLabelling: 'Isotope',
allowMultipleRGroupsOnUnlabelled: true,
};
const rgd = RDKitModule.get_rgd(core, JSON.stringify(params));
const smisCanon = smis.map((smi, i) => {
const mol = RDKitModule.get_mol(smi);
assert(mol);
try {
const res = rgd.add(mol);
assert(res === i);
return mol.get_smiles();
} finally {
mol.delete();
}
});
rgd.process();
const rows = rgd.get_rgroups_as_rows();
const expectedRowMapping = [
{ Core: '[1*][C@@]1([2*])CCN1', R1: '[1*]F', R2: '[2*][H]' },
{ Core: '[1*][C@]1([2*])CCN1', R1: '[1*]F', R2: '[2*]O' },
{ Core: '[1*][C@]1([2*])CCN1', R1: '[1*]F', R2: '[2*][H]' },
{ Core: '[1*]C1([2*])CCN1', R1: '[1*]F', R2: '[2*][H]' },
];
const details = JSON.stringify({ Label: 'Isotope' });
rows.forEach((row, i) => {
const foundMapping = getFoundRgdRowAsMap(row, true);
assert(Object.keys(foundMapping).length === Object.keys(expectedRowMapping[i]).length);
Object.entries(foundMapping).forEach(([rlabel, smi]) => {
assert(expectedRowMapping[i][rlabel] && expectedRowMapping[i][rlabel] === smi);
});
let mol;
try {
mol = RDKitModule.molzip(row, details);
assert(mol);
mol.remove_hs_in_place();
assert(mol.get_smiles() === smisCanon[i]);
} finally {
if (mol) {
mol.delete();
}
Object.values(row).forEach((rgroup) => {
if (rgroup) {
rgroup.delete();
}
});
}
});
}
}
function test_hs_in_place() {
{
var mol = RDKitModule.get_mol("CC");
@@ -3189,14 +3271,14 @@ function test_multi_highlights() {
mol.delete();
}
const getFoundRgdRowAsMap = (row) => Object.fromEntries(Object.entries(row).map(([rlabel, mol]) => {
const getFoundRgdRowAsMap = (row, keep) => Object.fromEntries(Object.entries(row).map(([rlabel, mol]) => {
try {
assert(mol);
assert(mol instanceof RDKitModule.Mol);
const smi = mol.get_smiles();
return [rlabel, smi];
} finally {
if (mol) {
if (!keep && mol) {
mol.delete();
}
}
@@ -3509,6 +3591,9 @@ initRDKitModule().then(function(instance) {
}
test_bw_palette();
test_custom_palette();
if (RDKitModule.molzip) {
test_molzip();
}
test_pickle();
waitAllTestsFinished().then(() =>