Expose getMolFrags in CFFI and MinimalLib (#5774)

This commit is contained in:
Paolo Tosco
2022-11-25 18:59:09 +01:00
committed by GitHub
parent 6a3e1f0d12
commit af8c5a0e76
8 changed files with 298 additions and 2 deletions

View File

@@ -1010,6 +1010,97 @@ void test_standardize() {
printf("--------------------------\n");
}
void test_get_mol_frags() {
printf("--------------------------\n");
printf(" test_get_mol_frags\n");
char *mpkl;
char *smi;
size_t mpkl_size;
size_t *frags_pkl_sz_array = NULL;
size_t num_frags = 0;
char **frags_mpkl_array = NULL;
char *mappings_json = NULL;
size_t i;
mpkl = get_mol("n1ccccc1.CC(C)C.OCCCN", &mpkl_size, "");
const char *expected_frag_smiles[] = {"c1ccncc1", "CC(C)C", "NCCCO"};
const char *expected_frag_smiles_non_sanitized[] = {"CN(C)(C)C", "c1ccc1"};
const char *expected_mappings =
"{\"frags\":[0,0,0,0,0,0,1,1,1,1,2,2,2,2,2],\"fragsMolAtomMapping\":[[0,1,2,3,4,5],[6,7,8,9],[10,11,12,13,14]]}";
frags_mpkl_array =
get_mol_frags(mpkl, mpkl_size, &frags_pkl_sz_array, &num_frags, "", NULL);
assert(frags_mpkl_array);
assert(frags_pkl_sz_array);
assert(num_frags == 3);
for (i = 0; i < num_frags; ++i) {
assert(frags_pkl_sz_array[i]);
smi = get_smiles(frags_mpkl_array[i], frags_pkl_sz_array[i], NULL);
assert(smi);
assert(!strcmp(smi, expected_frag_smiles[i]));
free(smi);
free(frags_mpkl_array[i]);
frags_mpkl_array[i] = NULL;
}
free(frags_mpkl_array);
frags_mpkl_array = NULL;
free(frags_pkl_sz_array);
frags_pkl_sz_array = NULL;
frags_mpkl_array = get_mol_frags(mpkl, mpkl_size, &frags_pkl_sz_array,
&num_frags, "", &mappings_json);
assert(frags_mpkl_array);
assert(frags_pkl_sz_array);
assert(mappings_json);
assert(num_frags == 3);
for (i = 0; i < num_frags; ++i) {
assert(frags_pkl_sz_array[i]);
smi = get_smiles(frags_mpkl_array[i], frags_pkl_sz_array[i], NULL);
assert(smi);
assert(!strcmp(smi, expected_frag_smiles[i]));
free(smi);
free(frags_mpkl_array[i]);
frags_mpkl_array[i] = NULL;
}
free(frags_mpkl_array);
frags_mpkl_array = NULL;
free(frags_pkl_sz_array);
frags_pkl_sz_array = NULL;
assert(!strcmp(mappings_json, expected_mappings));
free(mappings_json);
mappings_json = NULL;
free(mpkl);
mpkl = NULL;
mpkl = get_mol("N(C)(C)(C)C.c1ccc1", &mpkl_size, "{\"sanitize\":false}");
frags_mpkl_array =
get_mol_frags(mpkl, mpkl_size, &frags_pkl_sz_array, &num_frags, "", NULL);
assert(!frags_mpkl_array);
assert(!frags_pkl_sz_array);
assert(num_frags == 0);
frags_mpkl_array =
get_mol_frags(mpkl, mpkl_size, &frags_pkl_sz_array, &num_frags,
"{\"sanitizeFrags\":false}", NULL);
assert(frags_mpkl_array);
assert(frags_pkl_sz_array);
assert(num_frags == 2);
for (i = 0; i < num_frags; ++i) {
assert(frags_pkl_sz_array[i]);
smi = get_smiles(frags_mpkl_array[i], frags_pkl_sz_array[i], NULL);
assert(smi);
assert(!strcmp(smi, expected_frag_smiles_non_sanitized[i]));
free(smi);
free(frags_mpkl_array[i]);
frags_mpkl_array[i] = NULL;
}
free(frags_mpkl_array);
frags_mpkl_array = NULL;
free(frags_pkl_sz_array);
frags_pkl_sz_array = NULL;
free(mpkl);
mpkl = NULL;
}
int main() {
enable_logging();
char *vers = version();
@@ -1026,5 +1117,6 @@ int main() {
test_modifications();
test_coords();
test_standardize();
test_get_mol_frags();
return 0;
}

View File

@@ -296,7 +296,7 @@ extern "C" char *get_mol(const char *input, size_t *pkl_sz,
std::unique_ptr<RWMol> mol{MinimalLib::mol_from_input(input, details_json)};
if (!mol) {
*pkl_sz = 0;
return NULL;
return nullptr;
}
unsigned int propFlags = PicklerOps::PropertyPickleOptions::AllProps ^
PicklerOps::PropertyPickleOptions::ComputedProps;
@@ -323,7 +323,7 @@ extern "C" char *get_rxn(const char *input, size_t *pkl_sz,
MinimalLib::rxn_from_input(input, details_json)};
if (!rxn) {
*pkl_sz = 0;
return NULL;
return nullptr;
}
unsigned int propFlags = PicklerOps::PropertyPickleOptions::AllProps ^
PicklerOps::PropertyPickleOptions::ComputedProps;
@@ -332,6 +332,54 @@ extern "C" char *get_rxn(const char *input, size_t *pkl_sz,
return str_to_c(pkl, pkl_sz);
}
extern "C" char **get_mol_frags(const char *pkl, size_t pkl_sz,
size_t **frags_pkl_sz_array, size_t *num_frags,
const char *details_json,
char **mappings_json) {
if (!pkl || !pkl_sz || !frags_pkl_sz_array || !num_frags) {
return nullptr;
}
*frags_pkl_sz_array = nullptr;
*num_frags = 0;
auto mol = mol_from_pkl(pkl, pkl_sz);
std::vector<int> frags;
std::vector<std::vector<int>> fragsMolAtomMapping;
bool sanitizeFrags = true;
bool copyConformers = true;
if (details_json) {
std::string json = details_json;
MinimalLib::get_mol_frags_details(json, sanitizeFrags, copyConformers);
}
std::vector<ROMOL_SPTR> molFrags;
try {
molFrags = MolOps::getMolFrags(mol, sanitizeFrags, &frags,
&fragsMolAtomMapping, copyConformers);
} catch (...) {
}
if (molFrags.empty()) {
return nullptr;
}
char **molPklArray = (char **)malloc(sizeof(char *) * molFrags.size());
if (!molPklArray) {
return nullptr;
}
*frags_pkl_sz_array = (size_t *)malloc(sizeof(size_t) * molFrags.size());
if (!*frags_pkl_sz_array) {
free(molPklArray);
return nullptr;
}
memset(molPklArray, 0, sizeof(char *) * molFrags.size());
*num_frags = molFrags.size();
for (size_t i = 0; i < molFrags.size(); ++i) {
mol_to_pkl(*molFrags[i], &molPklArray[i], &(*frags_pkl_sz_array)[i]);
}
if (mappings_json) {
auto res = MinimalLib::get_mol_frags_mappings(frags, fragsMolAtomMapping);
*mappings_json = str_to_c(res);
}
return molPklArray;
}
extern "C" char *version() { return str_to_c(rdkitVersion); }
#ifdef RDK_BUILD_THREADSAFE_SSS
std::atomic_int logging_needs_init{1};

View File

@@ -44,6 +44,11 @@ RDKIT_RDKITCFFI_EXPORT char *get_inchi_for_molblock(const char *ctab,
RDKIT_RDKITCFFI_EXPORT char *get_inchikey_for_inchi(const char *inchi);
RDKIT_RDKITCFFI_EXPORT char *get_rxn(const char *input, size_t *mol_sz,
const char *details_json);
RDKIT_RDKITCFFI_EXPORT char **get_mol_frags(const char *pkl, size_t pkl_sz,
size_t **frags_pkl_sz_array,
size_t *num_frags,
const char *details_json,
char **mappings_json);
// substructure
RDKIT_RDKITCFFI_EXPORT char *get_substruct_match(const char *mol_pkl,

View File

@@ -926,6 +926,44 @@ std::string generate_aligned_coords(ROMol &mol, const ROMol &templateMol,
return res;
}
void get_mol_frags_details(const std::string &details_json, bool &sanitizeFrags,
bool &copyConformers) {
boost::property_tree::ptree pt;
if (!details_json.empty()) {
std::istringstream ss;
ss.str(details_json);
boost::property_tree::read_json(ss, pt);
LPT_OPT_GET(sanitizeFrags);
LPT_OPT_GET(copyConformers);
}
}
std::string get_mol_frags_mappings(
const std::vector<int> &frags,
const std::vector<std::vector<int>> &fragsMolAtomMapping) {
rj::Document doc;
doc.SetObject();
rj::Value rjFrags(rj::kArrayType);
for (int fragIdx : frags) {
rjFrags.PushBack(fragIdx, doc.GetAllocator());
}
doc.AddMember("frags", rjFrags, doc.GetAllocator());
rj::Value rjFragsMolAtomMapping(rj::kArrayType);
for (const auto &atomIdxVec : fragsMolAtomMapping) {
rj::Value rjAtomIndices(rj::kArrayType);
for (int atomIdx : atomIdxVec) {
rjAtomIndices.PushBack(atomIdx, doc.GetAllocator());
}
rjFragsMolAtomMapping.PushBack(rjAtomIndices, doc.GetAllocator());
}
doc.AddMember("fragsMolAtomMapping", rjFragsMolAtomMapping,
doc.GetAllocator());
rj::StringBuffer buffer;
rj::Writer<rj::StringBuffer> writer(buffer);
doc.Accept(writer);
return buffer.GetString();
}
} // namespace MinimalLib
} // namespace RDKit
#undef LPT_OPT_GET

View File

@@ -297,6 +297,19 @@ emscripten::val get_maccs_fp_as_uint8array(const JSMol &self) {
return binary_string_to_uint8array(fp);
}
emscripten::val get_frags_helper(JSMol &self, const std::string &details) {
auto res = self.get_frags(details);
auto obj = emscripten::val::object();
auto molArray = emscripten::val::object();
obj.set("molIterator", res.first);
obj.set("mappings", res.second);
return obj;
}
emscripten::val get_frags_helper(JSMol &self) {
return get_frags_helper(self, "{}");
}
#ifdef RDK_BUILD_AVALON_SUPPORT
emscripten::val get_avalon_fp_as_uint8array(const JSMol &self,
const std::string &details) {
@@ -382,6 +395,13 @@ EMSCRIPTEN_BINDINGS(RDKit_minimal) {
select_overload<emscripten::val(const JSMol &, const std::string &)>(
get_atom_pair_fp_as_uint8array))
.function("get_maccs_fp_as_uint8array", &get_maccs_fp_as_uint8array)
.function("get_frags",
select_overload<emscripten::val(JSMol &, const std::string &)>(
get_frags_helper),
allow_raw_pointers())
.function("get_frags",
select_overload<emscripten::val(JSMol &)>(get_frags_helper),
allow_raw_pointers())
#ifdef RDK_BUILD_AVALON_SUPPORT
.function("get_avalon_fp_as_uint8array",
select_overload<emscripten::val(const JSMol &)>(
@@ -478,6 +498,12 @@ EMSCRIPTEN_BINDINGS(RDKit_minimal) {
.function("straighten_depiction",
select_overload<void(bool)>(&JSMol::straighten_depiction));
class_<JSMolIterator>("MolIterator")
.function("next", &JSMolIterator::next, allow_raw_pointers())
.function("reset", &JSMolIterator::reset)
.function("at_end", &JSMolIterator::at_end)
.function("size", &JSMolIterator::size);
class_<JSReaction>("Reaction")
#ifdef __EMSCRIPTEN__
.function("draw_to_canvas_with_offset", &draw_rxn_to_canvas_with_offset)

View File

@@ -571,6 +571,24 @@ void JSMol::straighten_depiction(bool minimizeRotation) {
RDDepict::straightenDepiction(*d_mol, -1, minimizeRotation);
}
std::pair<JSMolIterator *, std::string> JSMol::get_frags(
const std::string &details_json) {
if (!d_mol) {
return std::make_pair(nullptr, "");
}
std::vector<int> frags;
std::vector<std::vector<int>> fragsMolAtomMapping;
bool sanitizeFrags = true;
bool copyConformers = true;
MinimalLib::get_mol_frags_details(details_json, sanitizeFrags,
copyConformers);
auto molFrags = MolOps::getMolFrags(*d_mol, sanitizeFrags, &frags,
&fragsMolAtomMapping, copyConformers);
return std::make_pair(
new JSMolIterator(molFrags),
MinimalLib::get_mol_frags_mappings(frags, fragsMolAtomMapping));
}
std::string JSReaction::get_svg(int w, int h) const {
if (!d_rxn) {
return "";

View File

@@ -14,6 +14,8 @@
#include <GraphMol/ChemReactions/Reaction.h>
#include <GraphMol/ChemReactions/ReactionParser.h>
class JSMolIterator;
class JSMol {
public:
JSMol() : d_mol(nullptr) {}
@@ -125,12 +127,35 @@ class JSMol {
double normalize_depiction() { return normalize_depiction(1, -1.); }
void straighten_depiction(bool minimizeRotation);
void straighten_depiction() { straighten_depiction(false); }
std::pair<JSMolIterator *, std::string> get_frags(
const std::string &details_json);
std::pair<JSMolIterator *, std::string> get_frags() {
return get_frags("{}");
}
std::unique_ptr<RDKit::RWMol> d_mol;
static constexpr int d_defaultWidth = 250;
static constexpr int d_defaultHeight = 200;
};
class JSMolIterator {
public:
JSMolIterator(const std::vector<RDKit::ROMOL_SPTR> &mols)
: d_mols(mols), d_idx(0){};
JSMol *next() {
return (d_idx < d_mols.size()
? new JSMol(new RDKit::RWMol(*d_mols.at(d_idx++)))
: nullptr);
}
void reset() { d_idx = 0; }
bool at_end() { return d_idx == d_mols.size(); }
size_t size() { return d_mols.size(); }
private:
std::vector<RDKit::ROMOL_SPTR> d_mols;
size_t d_idx;
};
class JSReaction {
public:
JSReaction() : d_rxn(nullptr) {}

View File

@@ -1062,6 +1062,49 @@ M END
assert(!svg2.includes("atom-10"));
}
function test_get_frags() {
{
var mol = RDKitModule.get_mol("n1ccccc1.CC(C)C.OCCCN");
var expectedFragSmiles = ["c1ccncc1", "CC(C)C", "NCCCO"];
var expectedFragSmilesNonSanitized = ["CN(C)(C)C", "c1ccc1"];
var expectedMappings = {
frags: [0,0,0,0,0,0,1,1,1,1,2,2,2,2,2],
fragsMolAtomMapping: [[0,1,2,3,4,5],[6,7,8,9],[10,11,12,13,14]],
};
var { molIterator, mappings } = mol.get_frags();
assert(molIterator.size() === 3);
assert(JSON.stringify(JSON.parse(mappings)) === JSON.stringify(expectedMappings));
var i = 0;
while (!molIterator.at_end()) {
var mol = molIterator.next();
assert(mol.get_smiles() === expectedFragSmiles[i++]);
mol.delete();
}
assert(!molIterator.next());
molIterator.delete();
}
{
var mol = RDKitModule.get_mol("N(C)(C)(C)C.c1ccc1", JSON.stringify({sanitize: false}));
var exceptionThrown = false;
try {
mol.get_frags();
} catch (e) {
exceptionThrown = true;
}
assert(exceptionThrown);
var { molIterator, mappings } = mol.get_frags(JSON.stringify({sanitizeFrags: false}));
assert(molIterator.size() === 2);
var i = 0;
while (!molIterator.at_end()) {
var mol = molIterator.next();
assert(mol.get_smiles() === expectedFragSmilesNonSanitized[i++]);
mol.delete();
}
assert(!molIterator.next());
molIterator.delete();
}
}
initRDKitModule().then(function(instance) {
var done = {};
const waitAllTestsFinished = () => {
@@ -1107,6 +1150,7 @@ initRDKitModule().then(function(instance) {
test_prop();
test_highlights();
test_add_chiral_hs();
test_get_frags();
waitAllTestsFinished().then(() =>
console.log("Tests finished successfully")
);