mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
Expose getMolFrags in CFFI and MinimalLib (#5774)
This commit is contained in:
@@ -1010,6 +1010,97 @@ void test_standardize() {
|
||||
printf("--------------------------\n");
|
||||
}
|
||||
|
||||
void test_get_mol_frags() {
|
||||
printf("--------------------------\n");
|
||||
printf(" test_get_mol_frags\n");
|
||||
char *mpkl;
|
||||
char *smi;
|
||||
size_t mpkl_size;
|
||||
size_t *frags_pkl_sz_array = NULL;
|
||||
size_t num_frags = 0;
|
||||
char **frags_mpkl_array = NULL;
|
||||
char *mappings_json = NULL;
|
||||
size_t i;
|
||||
|
||||
mpkl = get_mol("n1ccccc1.CC(C)C.OCCCN", &mpkl_size, "");
|
||||
const char *expected_frag_smiles[] = {"c1ccncc1", "CC(C)C", "NCCCO"};
|
||||
const char *expected_frag_smiles_non_sanitized[] = {"CN(C)(C)C", "c1ccc1"};
|
||||
const char *expected_mappings =
|
||||
"{\"frags\":[0,0,0,0,0,0,1,1,1,1,2,2,2,2,2],\"fragsMolAtomMapping\":[[0,1,2,3,4,5],[6,7,8,9],[10,11,12,13,14]]}";
|
||||
|
||||
frags_mpkl_array =
|
||||
get_mol_frags(mpkl, mpkl_size, &frags_pkl_sz_array, &num_frags, "", NULL);
|
||||
assert(frags_mpkl_array);
|
||||
assert(frags_pkl_sz_array);
|
||||
assert(num_frags == 3);
|
||||
for (i = 0; i < num_frags; ++i) {
|
||||
assert(frags_pkl_sz_array[i]);
|
||||
smi = get_smiles(frags_mpkl_array[i], frags_pkl_sz_array[i], NULL);
|
||||
assert(smi);
|
||||
assert(!strcmp(smi, expected_frag_smiles[i]));
|
||||
free(smi);
|
||||
free(frags_mpkl_array[i]);
|
||||
frags_mpkl_array[i] = NULL;
|
||||
}
|
||||
free(frags_mpkl_array);
|
||||
frags_mpkl_array = NULL;
|
||||
free(frags_pkl_sz_array);
|
||||
frags_pkl_sz_array = NULL;
|
||||
|
||||
frags_mpkl_array = get_mol_frags(mpkl, mpkl_size, &frags_pkl_sz_array,
|
||||
&num_frags, "", &mappings_json);
|
||||
assert(frags_mpkl_array);
|
||||
assert(frags_pkl_sz_array);
|
||||
assert(mappings_json);
|
||||
assert(num_frags == 3);
|
||||
for (i = 0; i < num_frags; ++i) {
|
||||
assert(frags_pkl_sz_array[i]);
|
||||
smi = get_smiles(frags_mpkl_array[i], frags_pkl_sz_array[i], NULL);
|
||||
assert(smi);
|
||||
assert(!strcmp(smi, expected_frag_smiles[i]));
|
||||
free(smi);
|
||||
free(frags_mpkl_array[i]);
|
||||
frags_mpkl_array[i] = NULL;
|
||||
}
|
||||
free(frags_mpkl_array);
|
||||
frags_mpkl_array = NULL;
|
||||
free(frags_pkl_sz_array);
|
||||
frags_pkl_sz_array = NULL;
|
||||
assert(!strcmp(mappings_json, expected_mappings));
|
||||
free(mappings_json);
|
||||
mappings_json = NULL;
|
||||
free(mpkl);
|
||||
mpkl = NULL;
|
||||
|
||||
mpkl = get_mol("N(C)(C)(C)C.c1ccc1", &mpkl_size, "{\"sanitize\":false}");
|
||||
frags_mpkl_array =
|
||||
get_mol_frags(mpkl, mpkl_size, &frags_pkl_sz_array, &num_frags, "", NULL);
|
||||
assert(!frags_mpkl_array);
|
||||
assert(!frags_pkl_sz_array);
|
||||
assert(num_frags == 0);
|
||||
frags_mpkl_array =
|
||||
get_mol_frags(mpkl, mpkl_size, &frags_pkl_sz_array, &num_frags,
|
||||
"{\"sanitizeFrags\":false}", NULL);
|
||||
assert(frags_mpkl_array);
|
||||
assert(frags_pkl_sz_array);
|
||||
assert(num_frags == 2);
|
||||
for (i = 0; i < num_frags; ++i) {
|
||||
assert(frags_pkl_sz_array[i]);
|
||||
smi = get_smiles(frags_mpkl_array[i], frags_pkl_sz_array[i], NULL);
|
||||
assert(smi);
|
||||
assert(!strcmp(smi, expected_frag_smiles_non_sanitized[i]));
|
||||
free(smi);
|
||||
free(frags_mpkl_array[i]);
|
||||
frags_mpkl_array[i] = NULL;
|
||||
}
|
||||
free(frags_mpkl_array);
|
||||
frags_mpkl_array = NULL;
|
||||
free(frags_pkl_sz_array);
|
||||
frags_pkl_sz_array = NULL;
|
||||
free(mpkl);
|
||||
mpkl = NULL;
|
||||
}
|
||||
|
||||
int main() {
|
||||
enable_logging();
|
||||
char *vers = version();
|
||||
@@ -1026,5 +1117,6 @@ int main() {
|
||||
test_modifications();
|
||||
test_coords();
|
||||
test_standardize();
|
||||
test_get_mol_frags();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -296,7 +296,7 @@ extern "C" char *get_mol(const char *input, size_t *pkl_sz,
|
||||
std::unique_ptr<RWMol> mol{MinimalLib::mol_from_input(input, details_json)};
|
||||
if (!mol) {
|
||||
*pkl_sz = 0;
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
unsigned int propFlags = PicklerOps::PropertyPickleOptions::AllProps ^
|
||||
PicklerOps::PropertyPickleOptions::ComputedProps;
|
||||
@@ -323,7 +323,7 @@ extern "C" char *get_rxn(const char *input, size_t *pkl_sz,
|
||||
MinimalLib::rxn_from_input(input, details_json)};
|
||||
if (!rxn) {
|
||||
*pkl_sz = 0;
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
unsigned int propFlags = PicklerOps::PropertyPickleOptions::AllProps ^
|
||||
PicklerOps::PropertyPickleOptions::ComputedProps;
|
||||
@@ -332,6 +332,54 @@ extern "C" char *get_rxn(const char *input, size_t *pkl_sz,
|
||||
return str_to_c(pkl, pkl_sz);
|
||||
}
|
||||
|
||||
extern "C" char **get_mol_frags(const char *pkl, size_t pkl_sz,
|
||||
size_t **frags_pkl_sz_array, size_t *num_frags,
|
||||
const char *details_json,
|
||||
char **mappings_json) {
|
||||
if (!pkl || !pkl_sz || !frags_pkl_sz_array || !num_frags) {
|
||||
return nullptr;
|
||||
}
|
||||
*frags_pkl_sz_array = nullptr;
|
||||
*num_frags = 0;
|
||||
auto mol = mol_from_pkl(pkl, pkl_sz);
|
||||
std::vector<int> frags;
|
||||
std::vector<std::vector<int>> fragsMolAtomMapping;
|
||||
bool sanitizeFrags = true;
|
||||
bool copyConformers = true;
|
||||
if (details_json) {
|
||||
std::string json = details_json;
|
||||
MinimalLib::get_mol_frags_details(json, sanitizeFrags, copyConformers);
|
||||
}
|
||||
std::vector<ROMOL_SPTR> molFrags;
|
||||
try {
|
||||
molFrags = MolOps::getMolFrags(mol, sanitizeFrags, &frags,
|
||||
&fragsMolAtomMapping, copyConformers);
|
||||
} catch (...) {
|
||||
}
|
||||
if (molFrags.empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
char **molPklArray = (char **)malloc(sizeof(char *) * molFrags.size());
|
||||
if (!molPklArray) {
|
||||
return nullptr;
|
||||
}
|
||||
*frags_pkl_sz_array = (size_t *)malloc(sizeof(size_t) * molFrags.size());
|
||||
if (!*frags_pkl_sz_array) {
|
||||
free(molPklArray);
|
||||
return nullptr;
|
||||
}
|
||||
memset(molPklArray, 0, sizeof(char *) * molFrags.size());
|
||||
*num_frags = molFrags.size();
|
||||
for (size_t i = 0; i < molFrags.size(); ++i) {
|
||||
mol_to_pkl(*molFrags[i], &molPklArray[i], &(*frags_pkl_sz_array)[i]);
|
||||
}
|
||||
if (mappings_json) {
|
||||
auto res = MinimalLib::get_mol_frags_mappings(frags, fragsMolAtomMapping);
|
||||
*mappings_json = str_to_c(res);
|
||||
}
|
||||
return molPklArray;
|
||||
}
|
||||
|
||||
extern "C" char *version() { return str_to_c(rdkitVersion); }
|
||||
#ifdef RDK_BUILD_THREADSAFE_SSS
|
||||
std::atomic_int logging_needs_init{1};
|
||||
|
||||
@@ -44,6 +44,11 @@ RDKIT_RDKITCFFI_EXPORT char *get_inchi_for_molblock(const char *ctab,
|
||||
RDKIT_RDKITCFFI_EXPORT char *get_inchikey_for_inchi(const char *inchi);
|
||||
RDKIT_RDKITCFFI_EXPORT char *get_rxn(const char *input, size_t *mol_sz,
|
||||
const char *details_json);
|
||||
RDKIT_RDKITCFFI_EXPORT char **get_mol_frags(const char *pkl, size_t pkl_sz,
|
||||
size_t **frags_pkl_sz_array,
|
||||
size_t *num_frags,
|
||||
const char *details_json,
|
||||
char **mappings_json);
|
||||
|
||||
// substructure
|
||||
RDKIT_RDKITCFFI_EXPORT char *get_substruct_match(const char *mol_pkl,
|
||||
|
||||
@@ -926,6 +926,44 @@ std::string generate_aligned_coords(ROMol &mol, const ROMol &templateMol,
|
||||
return res;
|
||||
}
|
||||
|
||||
void get_mol_frags_details(const std::string &details_json, bool &sanitizeFrags,
|
||||
bool ©Conformers) {
|
||||
boost::property_tree::ptree pt;
|
||||
if (!details_json.empty()) {
|
||||
std::istringstream ss;
|
||||
ss.str(details_json);
|
||||
boost::property_tree::read_json(ss, pt);
|
||||
LPT_OPT_GET(sanitizeFrags);
|
||||
LPT_OPT_GET(copyConformers);
|
||||
}
|
||||
}
|
||||
|
||||
std::string get_mol_frags_mappings(
|
||||
const std::vector<int> &frags,
|
||||
const std::vector<std::vector<int>> &fragsMolAtomMapping) {
|
||||
rj::Document doc;
|
||||
doc.SetObject();
|
||||
rj::Value rjFrags(rj::kArrayType);
|
||||
for (int fragIdx : frags) {
|
||||
rjFrags.PushBack(fragIdx, doc.GetAllocator());
|
||||
}
|
||||
doc.AddMember("frags", rjFrags, doc.GetAllocator());
|
||||
rj::Value rjFragsMolAtomMapping(rj::kArrayType);
|
||||
for (const auto &atomIdxVec : fragsMolAtomMapping) {
|
||||
rj::Value rjAtomIndices(rj::kArrayType);
|
||||
for (int atomIdx : atomIdxVec) {
|
||||
rjAtomIndices.PushBack(atomIdx, doc.GetAllocator());
|
||||
}
|
||||
rjFragsMolAtomMapping.PushBack(rjAtomIndices, doc.GetAllocator());
|
||||
}
|
||||
doc.AddMember("fragsMolAtomMapping", rjFragsMolAtomMapping,
|
||||
doc.GetAllocator());
|
||||
rj::StringBuffer buffer;
|
||||
rj::Writer<rj::StringBuffer> writer(buffer);
|
||||
doc.Accept(writer);
|
||||
return buffer.GetString();
|
||||
}
|
||||
|
||||
} // namespace MinimalLib
|
||||
} // namespace RDKit
|
||||
#undef LPT_OPT_GET
|
||||
|
||||
@@ -297,6 +297,19 @@ emscripten::val get_maccs_fp_as_uint8array(const JSMol &self) {
|
||||
return binary_string_to_uint8array(fp);
|
||||
}
|
||||
|
||||
emscripten::val get_frags_helper(JSMol &self, const std::string &details) {
|
||||
auto res = self.get_frags(details);
|
||||
auto obj = emscripten::val::object();
|
||||
auto molArray = emscripten::val::object();
|
||||
obj.set("molIterator", res.first);
|
||||
obj.set("mappings", res.second);
|
||||
return obj;
|
||||
}
|
||||
|
||||
emscripten::val get_frags_helper(JSMol &self) {
|
||||
return get_frags_helper(self, "{}");
|
||||
}
|
||||
|
||||
#ifdef RDK_BUILD_AVALON_SUPPORT
|
||||
emscripten::val get_avalon_fp_as_uint8array(const JSMol &self,
|
||||
const std::string &details) {
|
||||
@@ -382,6 +395,13 @@ EMSCRIPTEN_BINDINGS(RDKit_minimal) {
|
||||
select_overload<emscripten::val(const JSMol &, const std::string &)>(
|
||||
get_atom_pair_fp_as_uint8array))
|
||||
.function("get_maccs_fp_as_uint8array", &get_maccs_fp_as_uint8array)
|
||||
.function("get_frags",
|
||||
select_overload<emscripten::val(JSMol &, const std::string &)>(
|
||||
get_frags_helper),
|
||||
allow_raw_pointers())
|
||||
.function("get_frags",
|
||||
select_overload<emscripten::val(JSMol &)>(get_frags_helper),
|
||||
allow_raw_pointers())
|
||||
#ifdef RDK_BUILD_AVALON_SUPPORT
|
||||
.function("get_avalon_fp_as_uint8array",
|
||||
select_overload<emscripten::val(const JSMol &)>(
|
||||
@@ -478,6 +498,12 @@ EMSCRIPTEN_BINDINGS(RDKit_minimal) {
|
||||
.function("straighten_depiction",
|
||||
select_overload<void(bool)>(&JSMol::straighten_depiction));
|
||||
|
||||
class_<JSMolIterator>("MolIterator")
|
||||
.function("next", &JSMolIterator::next, allow_raw_pointers())
|
||||
.function("reset", &JSMolIterator::reset)
|
||||
.function("at_end", &JSMolIterator::at_end)
|
||||
.function("size", &JSMolIterator::size);
|
||||
|
||||
class_<JSReaction>("Reaction")
|
||||
#ifdef __EMSCRIPTEN__
|
||||
.function("draw_to_canvas_with_offset", &draw_rxn_to_canvas_with_offset)
|
||||
|
||||
@@ -571,6 +571,24 @@ void JSMol::straighten_depiction(bool minimizeRotation) {
|
||||
RDDepict::straightenDepiction(*d_mol, -1, minimizeRotation);
|
||||
}
|
||||
|
||||
std::pair<JSMolIterator *, std::string> JSMol::get_frags(
|
||||
const std::string &details_json) {
|
||||
if (!d_mol) {
|
||||
return std::make_pair(nullptr, "");
|
||||
}
|
||||
std::vector<int> frags;
|
||||
std::vector<std::vector<int>> fragsMolAtomMapping;
|
||||
bool sanitizeFrags = true;
|
||||
bool copyConformers = true;
|
||||
MinimalLib::get_mol_frags_details(details_json, sanitizeFrags,
|
||||
copyConformers);
|
||||
auto molFrags = MolOps::getMolFrags(*d_mol, sanitizeFrags, &frags,
|
||||
&fragsMolAtomMapping, copyConformers);
|
||||
return std::make_pair(
|
||||
new JSMolIterator(molFrags),
|
||||
MinimalLib::get_mol_frags_mappings(frags, fragsMolAtomMapping));
|
||||
}
|
||||
|
||||
std::string JSReaction::get_svg(int w, int h) const {
|
||||
if (!d_rxn) {
|
||||
return "";
|
||||
|
||||
@@ -14,6 +14,8 @@
|
||||
#include <GraphMol/ChemReactions/Reaction.h>
|
||||
#include <GraphMol/ChemReactions/ReactionParser.h>
|
||||
|
||||
class JSMolIterator;
|
||||
|
||||
class JSMol {
|
||||
public:
|
||||
JSMol() : d_mol(nullptr) {}
|
||||
@@ -125,12 +127,35 @@ class JSMol {
|
||||
double normalize_depiction() { return normalize_depiction(1, -1.); }
|
||||
void straighten_depiction(bool minimizeRotation);
|
||||
void straighten_depiction() { straighten_depiction(false); }
|
||||
std::pair<JSMolIterator *, std::string> get_frags(
|
||||
const std::string &details_json);
|
||||
std::pair<JSMolIterator *, std::string> get_frags() {
|
||||
return get_frags("{}");
|
||||
}
|
||||
|
||||
std::unique_ptr<RDKit::RWMol> d_mol;
|
||||
static constexpr int d_defaultWidth = 250;
|
||||
static constexpr int d_defaultHeight = 200;
|
||||
};
|
||||
|
||||
class JSMolIterator {
|
||||
public:
|
||||
JSMolIterator(const std::vector<RDKit::ROMOL_SPTR> &mols)
|
||||
: d_mols(mols), d_idx(0){};
|
||||
JSMol *next() {
|
||||
return (d_idx < d_mols.size()
|
||||
? new JSMol(new RDKit::RWMol(*d_mols.at(d_idx++)))
|
||||
: nullptr);
|
||||
}
|
||||
void reset() { d_idx = 0; }
|
||||
bool at_end() { return d_idx == d_mols.size(); }
|
||||
size_t size() { return d_mols.size(); }
|
||||
|
||||
private:
|
||||
std::vector<RDKit::ROMOL_SPTR> d_mols;
|
||||
size_t d_idx;
|
||||
};
|
||||
|
||||
class JSReaction {
|
||||
public:
|
||||
JSReaction() : d_rxn(nullptr) {}
|
||||
|
||||
@@ -1062,6 +1062,49 @@ M END
|
||||
assert(!svg2.includes("atom-10"));
|
||||
}
|
||||
|
||||
function test_get_frags() {
|
||||
{
|
||||
var mol = RDKitModule.get_mol("n1ccccc1.CC(C)C.OCCCN");
|
||||
var expectedFragSmiles = ["c1ccncc1", "CC(C)C", "NCCCO"];
|
||||
var expectedFragSmilesNonSanitized = ["CN(C)(C)C", "c1ccc1"];
|
||||
var expectedMappings = {
|
||||
frags: [0,0,0,0,0,0,1,1,1,1,2,2,2,2,2],
|
||||
fragsMolAtomMapping: [[0,1,2,3,4,5],[6,7,8,9],[10,11,12,13,14]],
|
||||
};
|
||||
var { molIterator, mappings } = mol.get_frags();
|
||||
assert(molIterator.size() === 3);
|
||||
assert(JSON.stringify(JSON.parse(mappings)) === JSON.stringify(expectedMappings));
|
||||
var i = 0;
|
||||
while (!molIterator.at_end()) {
|
||||
var mol = molIterator.next();
|
||||
assert(mol.get_smiles() === expectedFragSmiles[i++]);
|
||||
mol.delete();
|
||||
}
|
||||
assert(!molIterator.next());
|
||||
molIterator.delete();
|
||||
}
|
||||
{
|
||||
var mol = RDKitModule.get_mol("N(C)(C)(C)C.c1ccc1", JSON.stringify({sanitize: false}));
|
||||
var exceptionThrown = false;
|
||||
try {
|
||||
mol.get_frags();
|
||||
} catch (e) {
|
||||
exceptionThrown = true;
|
||||
}
|
||||
assert(exceptionThrown);
|
||||
var { molIterator, mappings } = mol.get_frags(JSON.stringify({sanitizeFrags: false}));
|
||||
assert(molIterator.size() === 2);
|
||||
var i = 0;
|
||||
while (!molIterator.at_end()) {
|
||||
var mol = molIterator.next();
|
||||
assert(mol.get_smiles() === expectedFragSmilesNonSanitized[i++]);
|
||||
mol.delete();
|
||||
}
|
||||
assert(!molIterator.next());
|
||||
molIterator.delete();
|
||||
}
|
||||
}
|
||||
|
||||
initRDKitModule().then(function(instance) {
|
||||
var done = {};
|
||||
const waitAllTestsFinished = () => {
|
||||
@@ -1107,6 +1150,7 @@ initRDKitModule().then(function(instance) {
|
||||
test_prop();
|
||||
test_highlights();
|
||||
test_add_chiral_hs();
|
||||
test_get_frags();
|
||||
waitAllTestsFinished().then(() =>
|
||||
console.log("Tests finished successfully")
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user