mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
458 lines
20 KiB
C++
458 lines
20 KiB
C++
//
|
|
// Copyright (C) David Cosgrove 2024.
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
|
|
#include <csignal>
|
|
|
|
#include <RDBoost/python.h>
|
|
#include <RDBoost/Wrap.h>
|
|
|
|
#include <GraphMol/ROMol.h>
|
|
#include <GraphMol/RascalMCES/RascalOptions.h>
|
|
#include <GraphMol/SynthonSpaceSearch/SynthonSpace.h>
|
|
|
|
namespace python = boost::python;
|
|
|
|
namespace RDKit {
|
|
python::list hitMolecules_helper(const SynthonSpaceSearch::SearchResults &res) {
|
|
python::list pyres;
|
|
for (auto &r : res.getHitMolecules()) {
|
|
pyres.append(boost::make_shared<ROMol>(*r));
|
|
}
|
|
return pyres;
|
|
}
|
|
|
|
struct SearchResults_wrapper {
|
|
static void wrap() {
|
|
const std::string docString =
|
|
"Used to return results of SynthonSpace searches.";
|
|
python::class_<SynthonSpaceSearch::SearchResults>(
|
|
"SubstructureResult", docString.c_str(), python::no_init)
|
|
.def("GetHitMolecules", hitMolecules_helper, python::args("self"),
|
|
"A function returning hits from the search")
|
|
.def("GetMaxNumResults",
|
|
&SynthonSpaceSearch::SearchResults::getMaxNumResults,
|
|
"The upper bound on number of results possible. There"
|
|
" may be fewer than this in practice for several reasons"
|
|
" such as duplicate reagent sets being removed or the"
|
|
" final product not matching the query even though the"
|
|
" synthons suggested they would.")
|
|
.def("GetTimedOut", &SynthonSpaceSearch::SearchResults::getTimedOut,
|
|
"Returns whether the search timed out or not.")
|
|
.def("GetCancelled", &SynthonSpaceSearch::SearchResults::getCancelled,
|
|
"Returns whether the search was cancelled or not.");
|
|
}
|
|
};
|
|
|
|
SynthonSpaceSearch::SearchResults substructureSearch_helper1(
|
|
SynthonSpaceSearch::SynthonSpace &self, const ROMol &query,
|
|
const python::object &py_smParams, const python::object &py_params) {
|
|
SynthonSpaceSearch::SynthonSpaceSearchParams params;
|
|
SubstructMatchParameters smParams;
|
|
if (!py_smParams.is_none()) {
|
|
smParams = python::extract<SubstructMatchParameters>(py_smParams);
|
|
}
|
|
if (!py_params.is_none()) {
|
|
params = python::extract<SynthonSpaceSearch::SynthonSpaceSearchParams>(
|
|
py_params);
|
|
}
|
|
|
|
SynthonSpaceSearch::SearchResults results;
|
|
{
|
|
NOGIL gil;
|
|
results = self.substructureSearch(query, smParams, params);
|
|
}
|
|
if (results.getCancelled()) {
|
|
throw_runtime_error("SubstructureSearch cancelled");
|
|
}
|
|
return results;
|
|
}
|
|
|
|
SynthonSpaceSearch::SearchResults substructureSearch_helper2(
|
|
SynthonSpaceSearch::SynthonSpace &self,
|
|
const GeneralizedSubstruct::ExtendedQueryMol &query,
|
|
const python::object &py_smParams, const python::object &py_params) {
|
|
SubstructMatchParameters smParams;
|
|
if (!py_smParams.is_none()) {
|
|
smParams = python::extract<SubstructMatchParameters>(py_smParams);
|
|
}
|
|
SynthonSpaceSearch::SynthonSpaceSearchParams params;
|
|
if (!py_params.is_none()) {
|
|
params = python::extract<SynthonSpaceSearch::SynthonSpaceSearchParams>(
|
|
py_params);
|
|
}
|
|
SynthonSpaceSearch::SearchResults results;
|
|
{
|
|
NOGIL gil;
|
|
results = self.substructureSearch(query, smParams, params);
|
|
}
|
|
if (results.getCancelled()) {
|
|
throw_runtime_error("SubstructureSearch cancelled");
|
|
}
|
|
return results;
|
|
}
|
|
|
|
struct CallbackAdapter {
|
|
python::object py_callable;
|
|
|
|
bool operator()(std::vector<std::unique_ptr<ROMol>> &results) const {
|
|
python::list pyres;
|
|
for (auto &mol : results) {
|
|
pyres.append(boost::shared_ptr<ROMol>(mol.release()));
|
|
}
|
|
return bool(py_callable(pyres));
|
|
}
|
|
};
|
|
|
|
static void substructureSearch_helper3(SynthonSpaceSearch::SynthonSpace &self,
|
|
const ROMol &query,
|
|
python::object py_callable,
|
|
const python::object &py_smParams,
|
|
const python::object &py_params) {
|
|
SynthonSpaceSearch::SynthonSpaceSearchParams params;
|
|
SubstructMatchParameters smParams;
|
|
if (!py_smParams.is_none()) {
|
|
smParams = python::extract<SubstructMatchParameters>(py_smParams);
|
|
}
|
|
if (!py_params.is_none()) {
|
|
params = python::extract<SynthonSpaceSearch::SynthonSpaceSearchParams>(
|
|
py_params);
|
|
}
|
|
CallbackAdapter callback{py_callable};
|
|
self.substructureSearch(query, callback, smParams, params);
|
|
}
|
|
|
|
SynthonSpaceSearch::SearchResults fingerprintSearch_helper(
|
|
SynthonSpaceSearch::SynthonSpace &self, const ROMol &query,
|
|
const python::object &fingerprintGenerator,
|
|
const python::object &py_params) {
|
|
SynthonSpaceSearch::SynthonSpaceSearchParams params;
|
|
if (!py_params.is_none()) {
|
|
params = python::extract<SynthonSpaceSearch::SynthonSpaceSearchParams>(
|
|
py_params);
|
|
}
|
|
SynthonSpaceSearch::SearchResults results;
|
|
{
|
|
NOGIL gil;
|
|
const FingerprintGenerator<std::uint64_t> *fpGen =
|
|
python::extract<FingerprintGenerator<std::uint64_t> *>(
|
|
fingerprintGenerator);
|
|
results = self.fingerprintSearch(query, *fpGen, params);
|
|
}
|
|
if (results.getCancelled()) {
|
|
throw_runtime_error("FingerprintSearch cancelled");
|
|
}
|
|
return results;
|
|
}
|
|
|
|
static void fingerprintSearch_helper_2(
|
|
SynthonSpaceSearch::SynthonSpace &self, const ROMol &query,
|
|
const python::object &fingerprintGenerator, python::object py_callable,
|
|
const python::object &py_params) {
|
|
SynthonSpaceSearch::SynthonSpaceSearchParams params;
|
|
if (!py_params.is_none()) {
|
|
params = python::extract<SynthonSpaceSearch::SynthonSpaceSearchParams>(
|
|
py_params);
|
|
}
|
|
const FingerprintGenerator<std::uint64_t> *fpGen =
|
|
python::extract<FingerprintGenerator<std::uint64_t> *>(
|
|
fingerprintGenerator);
|
|
|
|
CallbackAdapter callback{py_callable};
|
|
self.fingerprintSearch(query, *fpGen, callback, params);
|
|
}
|
|
|
|
SynthonSpaceSearch::SearchResults rascalSearch_helper(
|
|
SynthonSpaceSearch::SynthonSpace &self, const ROMol &query,
|
|
const python::object &py_rascalOptions, const python::object &py_params) {
|
|
RascalMCES::RascalOptions rascalOptions;
|
|
rascalOptions = python::extract<RascalMCES::RascalOptions>(py_rascalOptions);
|
|
SynthonSpaceSearch::SynthonSpaceSearchParams params;
|
|
if (!py_params.is_none()) {
|
|
params = python::extract<SynthonSpaceSearch::SynthonSpaceSearchParams>(
|
|
py_params);
|
|
}
|
|
{
|
|
NOGIL gil;
|
|
return self.rascalSearch(query, rascalOptions, params);
|
|
}
|
|
}
|
|
|
|
static void rascalSearch_helper_2(SynthonSpaceSearch::SynthonSpace &self,
|
|
const ROMol &query,
|
|
const python::object &py_rascalOptions,
|
|
python::object py_callable,
|
|
const python::object &py_params) {
|
|
RascalMCES::RascalOptions rascalOptions;
|
|
rascalOptions = python::extract<RascalMCES::RascalOptions>(py_rascalOptions);
|
|
SynthonSpaceSearch::SynthonSpaceSearchParams params;
|
|
if (!py_params.is_none()) {
|
|
params = python::extract<SynthonSpaceSearch::SynthonSpaceSearchParams>(
|
|
py_params);
|
|
}
|
|
CallbackAdapter callback{py_callable};
|
|
self.rascalSearch(query, rascalOptions, callback, params);
|
|
}
|
|
|
|
void summariseHelper(SynthonSpaceSearch::SynthonSpace &self) {
|
|
self.summarise(std::cout);
|
|
}
|
|
|
|
void convertTextToDBFile_helper(const std::string &inFilename,
|
|
const std::string &outFilename,
|
|
python::object fpGen) {
|
|
const FingerprintGenerator<std::uint64_t> *fpGenCpp = nullptr;
|
|
if (fpGen) {
|
|
fpGenCpp = python::extract<FingerprintGenerator<std::uint64_t> *>(fpGen);
|
|
}
|
|
bool cancelled = false;
|
|
SynthonSpaceSearch::convertTextToDBFile(inFilename, outFilename, cancelled,
|
|
fpGenCpp);
|
|
if (cancelled) {
|
|
throw_runtime_error("Database conversion cancelled");
|
|
}
|
|
}
|
|
|
|
void readTextFile_helper(SynthonSpaceSearch::SynthonSpace &self,
|
|
const std::string &inFilename) {
|
|
bool cancelled = false;
|
|
{
|
|
NOGIL gil;
|
|
self.readTextFile(inFilename, cancelled);
|
|
}
|
|
if (cancelled) {
|
|
throw_runtime_error("Database read cancelled.");
|
|
}
|
|
}
|
|
|
|
BOOST_PYTHON_MODULE(rdSynthonSpaceSearch) {
|
|
python::scope().attr("__doc__") =
|
|
"Module containing implementation of SynthonSpace search of"
|
|
" Synthon-based chemical libraries such as Enamine REAL."
|
|
" NOTE: This functionality is experimental and the API"
|
|
" and/or results may change in future releases.";
|
|
|
|
SearchResults_wrapper::wrap();
|
|
|
|
std::string docString = "SynthonSpaceSearch parameters.";
|
|
python::class_<SynthonSpaceSearch::SynthonSpaceSearchParams,
|
|
boost::noncopyable>("SynthonSpaceSearchParams",
|
|
docString.c_str())
|
|
.def_readwrite("maxHits",
|
|
&SynthonSpaceSearch::SynthonSpaceSearchParams::maxHits,
|
|
"The maximum number of hits to return. Default=1000."
|
|
"Use -1 for no maximum.")
|
|
.def_readwrite(
|
|
"maxNumFrags",
|
|
&SynthonSpaceSearch::SynthonSpaceSearchParams::maxNumFragSets,
|
|
"The maximum number of fragments the query can be broken into."
|
|
" Big molecules will create huge numbers of fragments that may cause"
|
|
" excessive memory use. If the number of fragments hits this number,"
|
|
" fragmentation stops and the search results will likely be incomplete."
|
|
" Default=100000.")
|
|
.def_readwrite(
|
|
"hitStart", &SynthonSpaceSearch::SynthonSpaceSearchParams::hitStart,
|
|
"The sequence number of the hit to start from. So that you"
|
|
" can return the next N hits of a search having already"
|
|
" obtained N-1. Default=0")
|
|
.def_readwrite(
|
|
"randomSample",
|
|
&SynthonSpaceSearch::SynthonSpaceSearchParams::randomSample,
|
|
"If True, returns a random sample of the hits, up to maxHits"
|
|
" in number. Default=False.")
|
|
.def_readwrite(
|
|
"randomSeed",
|
|
&SynthonSpaceSearch::SynthonSpaceSearchParams::randomSeed,
|
|
"If using randomSample, this seeds the random number"
|
|
" generator so as to give reproducible results. Default=-1"
|
|
" means use a random seed.")
|
|
.def_readwrite("buildHits",
|
|
&SynthonSpaceSearch::SynthonSpaceSearchParams::buildHits,
|
|
"If false, reports the maximum number of hits that"
|
|
" the search could produce, but doesn't return them.")
|
|
.def_readwrite(
|
|
"numRandomSweeps",
|
|
&SynthonSpaceSearch::SynthonSpaceSearchParams::numRandomSweeps,
|
|
"The random sampling doesn't always produce the"
|
|
" required number of hits in 1 go. This parameter"
|
|
" controls how many loops it makes to try and get"
|
|
" the hits before giving up. Default=10.")
|
|
.def_readwrite(
|
|
"similarityCutoff",
|
|
&SynthonSpaceSearch::SynthonSpaceSearchParams::similarityCutoff,
|
|
"Similarity cutoff for returning hits by fingerprint similarity."
|
|
" At present the fp is hard-coded to be Morgan, bits, radius=2."
|
|
" Default=0.5.")
|
|
.def_readwrite(
|
|
"fragSimilarityAdjuster",
|
|
&SynthonSpaceSearch::SynthonSpaceSearchParams::fragSimilarityAdjuster,
|
|
"Similarities of fragments are generally low due to low bit"
|
|
" densities. For the fragment matching, reduce the similarity cutoff"
|
|
" off by this amount. Default=0.1.")
|
|
.def_readwrite(
|
|
"approxSimilarityAdjuster",
|
|
&SynthonSpaceSearch::SynthonSpaceSearchParams::
|
|
approxSimilarityAdjuster,
|
|
"The fingerprint search uses an approximate similarity method"
|
|
" before building a product and doing a final check. The"
|
|
" similarityCutoff is reduced by this value for the approximate"
|
|
" check. A lower value will give faster run times at the"
|
|
" risk of missing some hits. The value you use should have a"
|
|
" positive correlation with your FOMO. The default of 0.1 is"
|
|
" appropriate for Morgan fingerprints. With RDKit fingerprints,"
|
|
" 0.05 is adequate, and higher than that has been seen to"
|
|
" produce long run times.")
|
|
.def_readwrite(
|
|
"minHitHeavyAtoms",
|
|
&SynthonSpaceSearch::SynthonSpaceSearchParams::minHitHeavyAtoms,
|
|
"Minimum number of heavy atoms in a hit. Default=0.")
|
|
.def_readwrite(
|
|
"maxHitHeavyAtoms",
|
|
&SynthonSpaceSearch::SynthonSpaceSearchParams::maxHitHeavyAtoms,
|
|
"Maximum number of heavy atoms in a hit. Default=-1 means no maximum.")
|
|
.def_readwrite("minHitMolWt",
|
|
&SynthonSpaceSearch::SynthonSpaceSearchParams::minHitMolWt,
|
|
"Minimum molecular weight for a hit. Default=0.0.")
|
|
.def_readwrite(
|
|
"maxHitMolWt",
|
|
&SynthonSpaceSearch::SynthonSpaceSearchParams::maxHitMolWt,
|
|
"Maximum molecular weight for a hit. Default=0.0 mean no maximum.")
|
|
.def_readwrite(
|
|
"minHitChiralAtoms",
|
|
&SynthonSpaceSearch::SynthonSpaceSearchParams::minHitChiralAtoms,
|
|
"Minimum number of chiral atoms in a hit. Default=0.")
|
|
.def_readwrite(
|
|
"maxHitChiralAtoms",
|
|
&SynthonSpaceSearch::SynthonSpaceSearchParams::maxHitChiralAtoms,
|
|
"Maximum number of chiral atoms in a hit. Default=-1 means no maximum.")
|
|
.def_readwrite(
|
|
"timeOut", &SynthonSpaceSearch::SynthonSpaceSearchParams::timeOut,
|
|
"Time limit for search, in seconds. Default is 600s, 0 means no"
|
|
" timeout. Requires an integer")
|
|
.def_readwrite(
|
|
"toTryChunkSize",
|
|
&SynthonSpaceSearch::SynthonSpaceSearchParams::toTryChunkSize,
|
|
"Process possible hits using the given chunk size")
|
|
.def_readwrite(
|
|
"numThreads",
|
|
&SynthonSpaceSearch::SynthonSpaceSearchParams::numThreads,
|
|
"The number of threads to use for search. If > 0, will use that"
|
|
" number. If <= 0, will use the number of hardware"
|
|
" threads plus this number. So if the number of"
|
|
" hardware threads is 8, and numThreads is -1, it will"
|
|
" use 7 threads. Default=1.")
|
|
.def("__setattr__", &safeSetattr);
|
|
|
|
docString = "SynthonSpaceSearch object.";
|
|
python::class_<SynthonSpaceSearch::SynthonSpace, boost::noncopyable>(
|
|
"SynthonSpace", docString.c_str(), python::init<>())
|
|
.def("ReadTextFile", &readTextFile_helper,
|
|
(python::arg("self"), python::arg("inFile")),
|
|
"Reads text file of the sort used by ChemSpace/Enamine.")
|
|
.def("ReadDBFile", &SynthonSpaceSearch::SynthonSpace::readDBFile,
|
|
(python::arg("self"), python::arg("inFile"),
|
|
python::arg("numThreads") = 1),
|
|
"Reads binary database file. Takes optional number of threads,"
|
|
"default=1.")
|
|
.def("WriteDBFile", &SynthonSpaceSearch::SynthonSpace::writeDBFile,
|
|
(python::arg("self"), python::arg("outFile")),
|
|
"Writes binary database file.")
|
|
.def("WriteEnumeratedFile",
|
|
&SynthonSpaceSearch::SynthonSpace::writeEnumeratedFile,
|
|
(python::arg("self"), python::arg("outFile")),
|
|
"Writes enumerated library to file.")
|
|
.def("GetNumReactions",
|
|
&SynthonSpaceSearch::SynthonSpace::getNumReactions,
|
|
python::arg("self"),
|
|
"Returns number of reactions in the SynthonSpace.")
|
|
.def("GetNumProducts", &SynthonSpaceSearch::SynthonSpace::getNumProducts,
|
|
python::arg("self"),
|
|
"Returns number of products in the SynthonSpace, with multiple"
|
|
" counting of any duplicates.")
|
|
.def("Summarise", &summariseHelper, python::arg("self"),
|
|
"Writes a summary of the SynthonSpace to stdout.")
|
|
.def("GetSynthonFingerprintType",
|
|
&SynthonSpaceSearch::SynthonSpace::getSynthonFingerprintType,
|
|
python::arg("self"),
|
|
"Returns the information string for the fingerprint generator"
|
|
" used to create this space.")
|
|
.def("SubstructureSearch", &substructureSearch_helper1,
|
|
(python::arg("self"), python::arg("query"),
|
|
python::arg("substructMatchParams") = python::object(),
|
|
python::arg("params") = python::object()),
|
|
"Does a substructure search in the SynthonSpace.")
|
|
.def("SubstructureSearch", &substructureSearch_helper2,
|
|
(python::arg("self"), python::arg("query"),
|
|
python::arg("substructMatchParams") = python::object(),
|
|
python::arg("params") = python::object()),
|
|
"Does a substructure search in the SynthonSpace using an"
|
|
" extended query.")
|
|
.def(
|
|
"SubstructureSearchIncremental", &substructureSearch_helper3,
|
|
(python::arg("self"), python::arg("query"), python::arg("callback"),
|
|
python::arg("substructMatchParams") = python::object(),
|
|
python::arg("params") = python::object()),
|
|
"Does a substructure search in the SynthonSpace returning results in the callback.")
|
|
.def("FingerprintSearch", &fingerprintSearch_helper,
|
|
(python::arg("self"), python::arg("query"),
|
|
python::arg("fingerprintGenerator"),
|
|
python::arg("params") = python::object()),
|
|
"Does a fingerprint search in the SynthonSpace using the"
|
|
" FingerprintGenerator passed in.")
|
|
.def("FingerprintSearchIncremental", &fingerprintSearch_helper_2,
|
|
(python::arg("self"), python::arg("query"),
|
|
python::arg("fingerprintGenerator"), python::arg("callback"),
|
|
python::arg("params") = python::object()),
|
|
"Does a fingerprint search in the SynthonSpace using the"
|
|
" FingerprintGenerator passed in, returning results the callback.")
|
|
.def("RascalSearch", &rascalSearch_helper,
|
|
(python::arg("self"), python::arg("query"),
|
|
python::arg("rascalOptions"),
|
|
python::arg("params") = python::object()),
|
|
"Does a search using the Rascal similarity score. The similarity"
|
|
" threshold used is provided by rascalOptions, and the one in"
|
|
" params is ignored.")
|
|
.def("RascalSearchIncremental", &rascalSearch_helper_2,
|
|
(python::arg("self"), python::arg("query"),
|
|
python::arg("rascalOptions"), python::arg("callback"),
|
|
python::arg("params") = python::object()),
|
|
"Does a search using the Rascal similarity score. The similarity"
|
|
" threshold used is provided by rascalOptions, and the one in"
|
|
" params is ignored. Returns results iteratively in the callback.")
|
|
.def(
|
|
"BuildSynthonFingerprints",
|
|
&SynthonSpaceSearch::SynthonSpace::buildSynthonFingerprints,
|
|
(python::arg("self"), python::arg("fingerprintGenerator")),
|
|
"Build the synthon fingerprints ready for similarity searching. This"
|
|
" is done automatically when the first similarity search is done, but if"
|
|
" converting a text file to binary format it might need to be done"
|
|
" explicitly.");
|
|
|
|
docString =
|
|
"Convert the text file into the binary DB file in our format."
|
|
" Assumes that all synthons from a reaction are contiguous in the input file."
|
|
" This uses a lot less memory than using ReadTextFile() followed by"
|
|
" WriteDBFile()."
|
|
"- inFilename the name of the text file"
|
|
"- outFilename the name of the binary file"
|
|
"- optional fingerprint generator";
|
|
python::def("ConvertTextToDBFile", &RDKit::convertTextToDBFile_helper,
|
|
(python::arg("inFilename"), python::arg("outFilename"),
|
|
python::arg("fpGen") = python::object()),
|
|
docString.c_str());
|
|
|
|
docString =
|
|
"Format an integer with spaces every 3 digits for ease of reading";
|
|
python::def("FormattedIntegerString",
|
|
&RDKit::SynthonSpaceSearch::formattedIntegerString,
|
|
python::arg("value"), docString.c_str());
|
|
}
|
|
|
|
} // namespace RDKit
|