Implement MinimalLib get_mcs() version that returns JSON (#6812)

* - added StoreAll to parseMCSParametersJSON()
- added get_mcs() to MinimalLib which returns results as JSON
- added tests for get_mcs()

* added test

* changes in response to review

---------

Co-authored-by: ptosco <paolo.tosco@novartis.com>
This commit is contained in:
Paolo Tosco
2023-10-22 05:22:14 +02:00
committed by GitHub
parent d9d1fe2838
commit 30d57ebcaa
5 changed files with 142 additions and 0 deletions

View File

@@ -135,6 +135,7 @@ void parseMCSParametersJSON(const char* json, MCSParameters* params) {
"MatchFusedRingsStrict", p.BondCompareParameters.MatchFusedRingsStrict);
p.BondCompareParameters.MatchStereo =
pt.get<bool>("MatchStereo", p.BondCompareParameters.MatchStereo);
p.StoreAll = pt.get<bool>("StoreAll", p.StoreAll);
p.setMCSAtomTyperFromConstChar(
pt.get<std::string>("AtomCompare", "def").c_str());

View File

@@ -180,6 +180,10 @@ JSMol *get_mol_no_details(const std::string &input) {
}
#ifdef RDK_BUILD_MINIMAL_LIB_MCS
std::string get_mcs_as_json_no_details(const JSMolList &mols) {
return get_mcs_as_json(mols, std::string());
}
JSMol *get_mcs_as_mol_no_details(const JSMolList &mols) {
return get_mcs_as_mol(mols, std::string());
}
@@ -668,6 +672,8 @@ EMSCRIPTEN_BINDINGS(RDKit_minimal) {
function("get_rxn", &get_rxn_no_details, allow_raw_pointers());
#endif
#ifdef RDK_BUILD_MINIMAL_LIB_MCS
function("get_mcs_as_json", &get_mcs_as_json);
function("get_mcs_as_json", &get_mcs_as_json_no_details);
function("get_mcs_as_mol", &get_mcs_as_mol, allow_raw_pointers());
function("get_mcs_as_mol", &get_mcs_as_mol_no_details, allow_raw_pointers());
function("get_mcs_as_smarts", &get_mcs_as_smarts);

View File

@@ -835,6 +835,39 @@ MCSResult getMcsResult(const JSMolList &molList,
}
} // namespace
std::string get_mcs_as_json(const JSMolList &molList, const std::string &details_json) {
auto mcsResult = getMcsResult(molList, details_json);
rj::Document doc;
doc.SetObject();
auto &alloc = doc.GetAllocator();
rj::Value rjSmarts;
if (!mcsResult.DegenerateSmartsQueryMolDict.empty()) {
rjSmarts.SetArray();
for (const auto &pair : mcsResult.DegenerateSmartsQueryMolDict) {
rjSmarts.PushBack(rj::Value(pair.first.c_str(), pair.first.size()),
alloc);
}
} else {
rjSmarts.SetString(mcsResult.SmartsString.c_str(),
mcsResult.SmartsString.size());
}
doc.AddMember("smarts", rjSmarts, alloc);
rj::Value rjCanceled;
rjCanceled.SetBool(mcsResult.Canceled);
doc.AddMember("canceled", rjCanceled, alloc);
rj::Value rjNumAtoms;
rjNumAtoms.SetInt(mcsResult.NumAtoms);
doc.AddMember("numAtoms", rjNumAtoms, alloc);
rj::Value rjNumBonds;
rjNumBonds.SetInt(mcsResult.NumBonds);
doc.AddMember("numBonds", rjNumBonds, alloc);
rj::StringBuffer buffer;
rj::Writer<rj::StringBuffer> writer(buffer);
doc.Accept(writer);
std::string res = buffer.GetString();
return res;
}
std::string get_mcs_as_smarts(const JSMolList &molList,
const std::string &details_json) {
auto res = getMcsResult(molList, details_json);

View File

@@ -272,6 +272,7 @@ void disable_logging();
JSLog *set_log_tee(const std::string &log_name);
JSLog *set_log_capture(const std::string &log_name);
#ifdef RDK_BUILD_MINIMAL_LIB_MCS
std::string get_mcs_as_json(const JSMolList &mols, const std::string &details_json);
std::string get_mcs_as_smarts(const JSMolList &mols, const std::string &details_json);
JSMol *get_mcs_as_mol(const JSMolList &mols, const std::string &details_json);
#endif

View File

@@ -2363,6 +2363,107 @@ function test_mcs() {
}
assert(res.size === 4);
}
{
const smiArray = [
"c1cccc(c12)ccc(c2)-c3n(CCC[NH3+])c(nn3)SCCc4c[nH]c(c45)cccc5",
"c1cccc(c12)sc(c2)-c3n(CCCC[NH3+])c(nn3)SCCc4c[nH]c(c45)cccc5",
];
let molList;
let mcs;
try {
molList = molListFromSmiArray(smiArray);
mcs = RDKitModule.get_mcs_as_json(molList, JSON.stringify({
RingMatchesRingOnly: true,
CompleteRingsOnly: true,
BondCompare: 'Any',
AtomCompare: 'Any',
Timeout: 1,
}));
} finally {
if (molList) {
molList.delete();
}
}
assert(mcs);
mcs = JSON.parse(mcs);
assert(mcs.canceled);
}
{
const smiArray = [
"Nc1ccc(cc1)C-Cc1c(N)cccc1",
"Nc1ccc(cc1)C=Cc1c(N)cccc1",
];
let molList;
let mcs;
try {
molList = molListFromSmiArray(smiArray);
mcs = RDKitModule.get_mcs_as_json(molList);
} finally {
if (molList) {
molList.delete();
}
}
assert(mcs);
mcs = JSON.parse(mcs);
assert(!mcs.canceled);
assert(!Array.isArray(mcs.smarts));
assert(mcs.numAtoms === 8);
assert(mcs.numBonds === 8);
assert(mcs.smarts === '[#7]-[#6]1:[#6]:[#6]:[#6](:[#6]:[#6]:1)-[#6]');
try {
molList = molListFromSmiArray(smiArray);
mcs = RDKitModule.get_mcs_as_json(molList, JSON.stringify({StoreAll: true}));
} finally {
if (molList) {
molList.delete();
}
}
assert(mcs);
mcs = JSON.parse(mcs);
assert(!mcs.canceled);
assert(Array.isArray(mcs.smarts));
assert(mcs.numAtoms === 8);
assert(mcs.numBonds === 8);
assert(mcs.smarts.length === 2);
assert(mcs.smarts.includes('[#6]-[#6]1:[#6]:[#6]:[#6]:[#6]:[#6]:1-[#7]'));
assert(mcs.smarts.includes('[#7]-[#6]1:[#6]:[#6]:[#6](:[#6]:[#6]:1)-[#6]'));
}
{
const smiArray = [
"C1CC1",
"c1ccccc1",
];
let molList;
let mcs;
try {
molList = molListFromSmiArray(smiArray);
mcs = RDKitModule.get_mcs_as_json(molList, JSON.stringify({ CompleteRingsOnly: true }));
} finally {
if (molList) {
molList.delete();
}
}
assert(mcs);
mcs = JSON.parse(mcs);
assert(!mcs.canceled);
assert(!mcs.numAtoms);
assert(!mcs.numBonds);
assert(!mcs.smarts);
try {
molList = molListFromSmiArray(smiArray);
mcs = RDKitModule.get_mcs_as_json(molList, JSON.stringify({ CompleteRingsOnly: true, StoreAll: true }));
} finally {
if (molList) {
molList.delete();
}
}
assert(mcs);
mcs = JSON.parse(mcs);
assert(!mcs.canceled);
assert(!mcs.numAtoms);
assert(!mcs.numBonds);
assert(!mcs.smarts.length);
}
}
function test_get_num_atoms_bonds() {