// Copyright (c) 2017-2021, Novartis Institutes for BioMedical Research Inc. // and other RDKit contributors // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. // * Neither the name of Novartis Institutes for BioMedical Research Inc. // nor the names of its contributors may be used to endorse or promote // products derived from this software without specific prior written // permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // #include #include #include #include #include #include #include namespace python = boost::python; using boost_adaptbx::python::streambuf; namespace RDKit { using GeneralizedSubstruct::ExtendedQueryMol; // Because we need to release the GIL before we launch a thread, we need to make // a thin stub // for every function that does this. This stub exists Because I couldn't // quite figure out // how to make a release GIL call_guard to work with the complexity of the // functions here. // // We could have made helper functions, but this was easier in the end as all we // needed // to do was replace SubstructLibrary with SubstructLibraryWrap in the existing // boost::python wrap below. class SubstructLibraryWrap { public: SubstructLibrary ss; SubstructLibraryWrap() : ss() {} SubstructLibraryWrap(boost::shared_ptr molecules) : ss(molecules) {} SubstructLibraryWrap(boost::shared_ptr molecules, boost::shared_ptr fingerprints) : ss(molecules, fingerprints) {} SubstructLibraryWrap(boost::shared_ptr molecules, boost::shared_ptr keys) : ss(molecules, keys) {} SubstructLibraryWrap(boost::shared_ptr molecules, boost::shared_ptr fingerprints, boost::shared_ptr keys) : ss(molecules, fingerprints, keys) {} SubstructLibraryWrap(const std::string &pickle) : ss(pickle) {} boost::shared_ptr &getMolHolder() { return ss.getMolHolder(); } boost::shared_ptr &getFpHolder() { return ss.getFpHolder(); } boost::shared_ptr &getKeyHolder() { return ss.getKeyHolder(); } unsigned int addMol(const ROMol &mol) { return ss.addMol(mol); } template std::vector getMatches(const Query &query, bool recursionPossible = true, bool useChirality = true, bool useQueryQueryMatches = false, int numThreads = -1, int maxResults = -1) const { NOGIL h; return ss.getMatches(query, recursionPossible, useChirality, useQueryQueryMatches, numThreads, maxResults); } template std::vector getMatches(const Query &query, const SubstructMatchParameters ¶ms, int numThreads = -1, int maxResults = -1) const { NOGIL h; return ss.getMatches(query, 0, size(), params, numThreads, maxResults); } template std::vector getMatches( const Query &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible = true, bool useChirality = true, bool useQueryQueryMatches = false, int numThreads = -1, int maxResults = -1) const { NOGIL h; return ss.getMatches(query, startIdx, endIdx, recursionPossible, useChirality, useQueryQueryMatches, numThreads, maxResults); }; template std::vector getMatches(const Query &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads = -1, int maxResults = -1) const { NOGIL h; return ss.getMatches(query, startIdx, endIdx, params, numThreads, maxResults); } template unsigned int countMatches(const Query &query, bool recursionPossible = true, bool useChirality = true, bool useQueryQueryMatches = false, int numThreads = -1) const { NOGIL h; return ss.countMatches(query, 0, size(), recursionPossible, useChirality, useQueryQueryMatches, numThreads); } template unsigned int countMatches(const Query &query, const SubstructMatchParameters ¶ms, int numThreads = -1) const { NOGIL h; return ss.countMatches(query, 0, size(), params, numThreads); } template unsigned int countMatches(const Query &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible = true, bool useChirality = true, bool useQueryQueryMatches = false, int numThreads = -1) const { NOGIL h; return ss.countMatches(query, startIdx, endIdx, recursionPossible, useChirality, useQueryQueryMatches, numThreads); }; template unsigned int countMatches(const Query &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads = -1) const { NOGIL h; return ss.countMatches(query, startIdx, endIdx, params, numThreads); } template bool hasMatch(const Query &query, bool recursionPossible = true, bool useChirality = true, bool useQueryQueryMatches = false, int numThreads = -1) const { NOGIL h; return ss.hasMatch(query, 0, size(), recursionPossible, useChirality, useQueryQueryMatches, numThreads); } template bool hasMatch(const Query &query, const SubstructMatchParameters ¶ms, int numThreads = -1) const { NOGIL h; return ss.hasMatch(query, 0, size(), params, numThreads); } template bool hasMatch(const Query &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible = true, bool useChirality = true, bool useQueryQueryMatches = false, int numThreads = -1) const { NOGIL h; return ss.hasMatch(query, startIdx, endIdx, recursionPossible, useChirality, useQueryQueryMatches, numThreads); }; template bool hasMatch(const Query &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads = -1) const { NOGIL h; return ss.hasMatch(query, startIdx, endIdx, params, numThreads); } boost::shared_ptr getMol(unsigned int idx) const { return ss.getMol(idx); } unsigned int size() const { return ss.size(); } }; const char *MolHolderBaseDoc = "Base class for holding molecules used in the Substructure Library.\n" "Instantiations of this class are passed into the SubstructureLibrary.\n" "The API is quite simple: \n" " AddMol(mol) -> adds a molecule to the molecule holder, returns index of " "molecule\n" " GetMol(idx) -> return the molecule at index idx\n"; const char *MolHolderDoc = "Holds raw in-memory molecules\n" " AddMol(mol) -> adds a molecule to the molecule holder, returns index of " "molecule\n" " GetMol(idx,sanitize=True) -> return the molecule at index idx\n"; const char *CachedMolHolderDoc = "Holds molecules in their binary representation.\n" "This allows more molecules to be held in memory at a time\n" " AddMol(mol) -> adds a molecule to the molecule holder, returns index of " "molecule\n\n" " AddBinary(data) -> adds a picked molecule molecule to the molecule " "holder, returns index of molecule\n" " The data is stored as-is, no checking is done for " "validity.\n" " GetMol(idx) -> return the molecule at index idx\n"; const char *CachedSmilesMolHolderDoc = "Holds molecules as smiles string\n" "This allows more molecules to be held in memory at a time\n" " AddMol(mol) -> adds a molecule to the molecule holder, returns index of " "molecule\n\n" " AddSmiles(smiles) -> adds a smiles string to the molecule holder, " "returns index of molecule\n" " The smiles is stored as-is, no checking is done " "for validity.\n" " GetMol(idx) -> return the molecule at index idx\n"; const char *CachedTrustedSmilesMolHolderDoc = "Holds molecules as trusted smiles string\n" "This allows more molecules to be held in memory at a time and avoids " "RDKit sanitization\n" "overhead.\n" "See: " "http://rdkit.blogspot.com/2016/09/avoiding-unnecessary-work-and.html\n" " AddMol(mol) -> adds a molecule to the molecule holder, returns index of " "molecule\n\n" " AddSmiles(smiles) -> adds a smiles string to the molecule holder, " "returns index of molecule\n" " The smiles is stored as-is, no checking is done " "for validity.\n" " GetMol(idx,s) -> return the molecule at index idx, \n" " note, only light sanitization is done here, for instance\n" " the molecules RingInfo is not initialized\n"; const char *PatternHolderDoc = "Holds fingerprints with optional, user-defined number of bits (default: " "2048) used for filtering of molecules."; const char *TautomerPatternHolderDoc = "Holds tautomeric fingerprints with optional, user-defined number of bits " "(default: " "2048) used for filtering of molecules.\n" "These fingerprints are designed to be used with TautomerQueries."; const char *KeyHolderDoc = "Holds keys to return external references to the molecules in the " "molholder.\n" "By default use the _Name property but can be overridden to be any " "property"; const char *SubstructLibraryDoc = "SubstructLibrary: This provides a simple API for substructure searching " "large datasets\n" "The SubstructLibrary takes full advantage of available threads during the " "search operation.\n" "Basic operation is simple\n" "\n" ">>> from __future__ import print_function\n" ">>> import os\n" ">>> from rdkit import Chem, RDConfig\n" ">>> from rdkit.Chem import rdSubstructLibrary\n" ">>> library = rdSubstructLibrary.SubstructLibrary()\n" ">>> for mol in Chem.SDMolSupplier(os.path.join(RDConfig.RDDataDir, \n" "... 'NCI', 'first_200.props.sdf')):\n" "... idx = library.AddMol(mol)\n" ">>> core = Chem.MolFromSmarts('CCCCOC')\n" ">>> indices = library.GetMatches(core)\n" ">>> len(indices)\n" "11\n" "\n" "Substructure matching options can be sent into GetMatches:\n" "\n" ">>> indices = library.GetMatches(core, useChirality=False) \n" ">>> len(indices)\n" "11\n" "\n" "Controlling the number of threads or the maximum number of matches " "returned:\n" "is also available (the default is to run on all cores)\n" "\n" ">>> indices = library.GetMatches(core, numThreads=2, maxResults=10) \n" ">>> len(indices)\n" "10\n" "\n" "Working on larger datasets:\n" "\n" "Molecules are fairly large objects and will limit the number that can be " "kept in memory.\n" "To assist this we supply three other molecule holders:\n" " CachedMolHolder - stores molecules as their pickled representation\n" "\n" " CachedSmilesMolHolder - stores molecules internally as smiles strings\n" "\n" " CachedTrustedSmilesMolHolder = excepts (and stores) molecules as " "trusted smiles strings\n" "\n" "Using Pattern fingerprints as a pre-filter:" "\n" "Pattern fingerprints provide an easy way to indicate whether the " "substructure search should be\n" "be done at all. This is particularly useful with the Binary and Smiles " "based molecule holders\n" "as they have an expensive molecule creation step in addition to the " "substructure searching step\n " "\n" ">>> library = " "rdSubstructLibrary.SubstructLibrary(rdSubstructLibrary." "CachedSmilesMolHolder(), \n" "... " "rdSubstructLibrary.PatternHolder())\n" ">>> for mol in Chem.SDMolSupplier(os.path.join(RDConfig.RDDataDir, \n" "... 'NCI', 'first_200.props.sdf')):\n" "... idx = library.AddMol(mol)\n" ">>> indices = library.GetMatches(core)\n" ">>> len(indices)\n" "11\n" "\n" "This (obviously) takes longer to initialize. However, both the molecule " "and pattern\n" "holders can be populated with raw data, a simple example is below:\n" "\n" ">>> import csv\n" ">>> molholder = rdSubstructLibrary.CachedSmilesMolHolder()\n" ">>> pattern_holder = rdSubstructLibrary.PatternHolder()\n" ">>> with open(os.path.join(RDConfig.RDDataDir, 'NCI', " "'first_200.tpsa.csv')) as inf:\n" "... for i, row in enumerate(csv.reader(inf)):\n" "... if i:\n" "... idx = molholder.AddSmiles(row[0])\n" "... idx2 = pattern_holder.AddFingerprint(\n" "... " "pattern_holder.MakeFingerprint(Chem.MolFromSmiles(row[0])))\n" "... assert idx==idx2\n" ">>> library = " "rdSubstructLibrary.SubstructLibrary(molholder,pattern_holder)\n" ">>> indices = library.GetMatches(core)\n" ">>> len(indices)\n" "11\n" "\n" "Finally, the KeyFromPropHolder can be used to use external keys such as\n" "compound names. By default the holder uses the '_Name' property but can\n" "be changed to any property.\n" "\n" ">>> library = " "rdSubstructLibrary.SubstructLibrary(rdSubstructLibrary.MolHolder(), " "rdSubstructLibrary.KeyFromPropHolder())\n" ">>> m = Chem.MolFromSmiles('CCC')\n" ">>> m.SetProp('_Name', 'Z11234')\n" ">>> idx = library.AddMol(m)\n" ">>> indices = library.GetMatches(m)\n" ">>> list(library.GetKeyHolder().GetKeys(indices))\n" "['Z11234']\n" ""; python::object SubstructLibrary_Serialize(const SubstructLibraryWrap &cat) { std::string res = cat.ss.Serialize(); python::object retval = python::object( python::handle<>(PyBytes_FromStringAndSize(res.c_str(), res.length()))); return retval; } struct substructlibrary_pickle_suite : rdkit_pickle_suite { static python::tuple getinitargs(const SubstructLibraryWrap &self) { std::string res; if (!SubstructLibraryCanSerialize()) { throw_runtime_error("Pickling of FilterCatalog instances is not enabled"); } res = self.ss.Serialize(); return python::make_tuple(python::object(python::handle<>( PyBytes_FromStringAndSize(res.c_str(), res.length())))); }; }; void toStream(const SubstructLibraryWrap &cat, python::object &fileobj) { streambuf ss(fileobj, 't'); streambuf::ostream ost(ss); cat.ss.toStream(ost); } void initFromStream(SubstructLibraryWrap &cat, python::object &fileobj) { streambuf ss(fileobj, 'b'); // python StringIO can't seek, so need binary data streambuf::istream is(ss); cat.ss.initFromStream(is); } boost::shared_ptr GetMolHolder(SubstructLibraryWrap &sslib) { // need to convert from a ref to a real shared_ptr return sslib.ss.getMolHolder(); } boost::shared_ptr GetFpHolder(SubstructLibraryWrap &sslib) { // need to convert from a ref to a real shared_ptr return sslib.ss.getFpHolder(); } boost::shared_ptr GetKeyHolder(SubstructLibraryWrap &sslib) { // need to convert from a ref to a real shared_ptr return sslib.ss.getKeyHolder(); } python::tuple getSearchOrderHelper(const SubstructLibraryWrap &sslib) { python::list res; for (const auto v : sslib.ss.getSearchOrder()) { res.append(v); } return python::tuple(res); } void setSearchOrderHelper(SubstructLibraryWrap &sslib, const python::object &seq) { std::unique_ptr> sorder = pythonObjectToVect(seq); if (sorder) { sslib.ss.setSearchOrder(*sorder); } else { sslib.ss.getSearchOrder().clear(); } } void addPatternsHelper(SubstructLibraryWrap &sslib, boost::shared_ptr patterns, int numThreads) { NOGIL gil; addPatterns(sslib.ss, patterns, numThreads); } void addPatternsHelper(SubstructLibraryWrap &sslib, int numThreads) { NOGIL gil; addPatterns(sslib.ss, numThreads); } #define LARGE_DEF(_tname_) \ .def("GetMatches", \ (std::vector(SubstructLibraryWrap::*)( \ const _tname_ &, bool, bool, bool, int, int) const) & \ SubstructLibraryWrap::getMatches, \ (python::arg("self"), python::arg("query"), \ python::arg("recursionPossible") = true, \ python::arg("useChirality") = true, \ python::arg("useQueryQueryMatches") = false, \ python::arg("numThreads") = -1, python::arg("maxResults") = 1000), \ "Get the matches for the query.\n\n" \ " Arguments:\n" \ " - query: substructure query\n" \ " - numThreads: number of threads to use, -1 means all threads\n" \ " - maxResults: maximum number of results to return") \ .def( \ "GetMatches", \ (std::vector(SubstructLibraryWrap::*)( \ const _tname_ &, unsigned int, unsigned int, bool, bool, bool, \ int, int) const) & \ SubstructLibraryWrap::getMatches, \ (python::arg("self"), python::arg("query"), python::arg("startIdx"), \ python::arg("endIdx"), python::arg("recursionPossible") = true, \ python::arg("useChirality") = true, \ python::arg("useQueryQueryMatches") = false, \ python::arg("numThreads") = -1, python::arg("maxResults") = 1000), \ "Get the matches for the query.\n\n" \ " Arguments:\n" \ " - query: substructure query\n" \ " - startIdx: index to search from\n" \ " - endIdx: index (non-inclusize) to search to\n" \ " - numThreads: number of threads to use, -1 means all threads\n" \ " - maxResults: maximum number of results to return") \ .def("CountMatches", \ (unsigned int (SubstructLibraryWrap::*)(const _tname_ &, bool, \ bool, bool, int) const) & \ SubstructLibraryWrap::countMatches, \ (python::arg("self"), python::arg("query"), \ python::arg("recursionPossible") = true, \ python::arg("useChirality") = true, \ python::arg("useQueryQueryMatches") = false, \ python::arg("numThreads") = -1), \ "Get the matches for the query.\n\n" \ " Arguments:\n" \ " - query: substructure query\n" \ " - numThreads: number of threads to use, -1 means all threads\n") \ .def( \ "CountMatches", \ (unsigned int (SubstructLibraryWrap::*)( \ const _tname_ &, unsigned int, unsigned int, bool, bool, bool, \ int) const) & \ SubstructLibraryWrap::countMatches, \ (python::arg("self"), python::arg("query"), python::arg("startIdx"), \ python::arg("endIdx"), python::arg("recursionPossible") = true, \ python::arg("useChirality") = true, \ python::arg("useQueryQueryMatches") = false, \ python::arg("numThreads") = -1), \ "Get the matches for the query.\n\n" \ " Arguments:\n" \ " - query: substructure query\n" \ " - startIdx: index to search from\n" \ " - endIdx: index (non-inclusize) to search to\n" \ " - numThreads: number of threads to use, -1 means all threads\n") \ .def("HasMatch", \ (bool(SubstructLibraryWrap::*)(const _tname_ &, bool, bool, bool, \ int) const) & \ SubstructLibraryWrap::hasMatch, \ (python::arg("self"), python::arg("query"), \ python::arg("recursionPossible") = true, \ python::arg("useChirality") = true, \ python::arg("useQueryQueryMatches") = false, \ python::arg("numThreads") = -1), \ "Get the matches for the query.\n\n" \ " Arguments:\n" \ " - query: substructure query\n" \ " - numThreads: number of threads to use, -1 means all threads\n") \ .def( \ "HasMatch", \ (bool(SubstructLibraryWrap::*)(const _tname_ &, unsigned int, \ unsigned int, bool, bool, bool, int) \ const) & \ SubstructLibraryWrap::hasMatch, \ (python::arg("self"), python::arg("query"), python::arg("startIdx"), \ python::arg("endIdx"), python::arg("recursionPossible") = true, \ python::arg("useChirality") = true, \ python::arg("useQueryQueryMatches") = false, \ python::arg("numThreads") = -1), \ "Get the matches for the query.\n\n" \ " Arguments:\n" \ " - query: substructure query\n" \ " - startIdx: index to search from\n" \ " - endIdx: index (non-inclusize) to search to\n" \ " - numThreads: number of threads to use, -1 means all threads\n") \ .def("GetMatches", \ (std::vector(SubstructLibraryWrap::*)( \ const _tname_ &, const SubstructMatchParameters &, int, int) \ const) & \ SubstructLibraryWrap::getMatches, \ (python::arg("self"), python::arg("query"), \ python::arg("parameters"), python::arg("numThreads") = -1, \ python::arg("maxResults") = 1000), \ "Get the matches for the query.\n\n" \ " Arguments:\n" \ " - query: substructure query\n" \ " - numThreads: number of threads to use, -1 means all threads\n" \ " - maxResults: maximum number of results to return") \ .def( \ "GetMatches", \ (std::vector(SubstructLibraryWrap::*)( \ const _tname_ &, unsigned int, unsigned int, \ const SubstructMatchParameters &, int, int) const) & \ SubstructLibraryWrap::getMatches, \ (python::arg("self"), python::arg("query"), python::arg("startIdx"), \ python::arg("endIdx"), python::arg("parameters"), \ python::arg("numThreads") = -1, python::arg("maxResults") = 1000), \ "Get the matches for the query.\n\n" \ " Arguments:\n" \ " - query: substructure query\n" \ " - startIdx: index to search from\n" \ " - endIdx: index (non-inclusize) to search to\n" \ " - numThreads: number of threads to use, -1 means all threads\n" \ " - maxResults: maximum number of results to return") \ .def( \ "CountMatches", \ (unsigned int (SubstructLibraryWrap::*)( \ const _tname_ &, const SubstructMatchParameters &, int) const) & \ SubstructLibraryWrap::countMatches, \ (python::arg("self"), python::arg("query"), \ python::arg("parameters"), python::arg("numThreads") = -1), \ "Get the matches for the query.\n\n" \ " Arguments:\n" \ " - query: substructure query\n" \ " - numThreads: number of threads to use, -1 means all threads\n") \ .def("CountMatches", \ (unsigned int (SubstructLibraryWrap::*)( \ const _tname_ &, unsigned int, unsigned int, \ const SubstructMatchParameters &, int) const) & \ SubstructLibraryWrap::countMatches, \ (python::arg("self"), python::arg("query"), \ python::arg("startIdx"), python::arg("endIdx"), \ python::arg("parameters"), python::arg("numThreads") = -1), \ "Get the matches for the query.\n\n" \ " Arguments:\n" \ " - query: substructure query\n" \ " - startIdx: index to search from\n" \ " - endIdx: index (non-inclusize) to search to\n" \ " - numThreads: number of threads to use, -1 means all threads\n") \ .def( \ "HasMatch", \ (bool(SubstructLibraryWrap::*)( \ const _tname_ &, const SubstructMatchParameters &, int) const) & \ SubstructLibraryWrap::hasMatch, \ (python::arg("self"), python::arg("query"), \ python::arg("parameters"), python::arg("numThreads") = -1), \ "Get the matches for the query.\n\n" \ " Arguments:\n" \ " - query: substructure query\n" \ " - numThreads: number of threads to use, -1 means all threads\n") \ .def("HasMatch", \ (bool(SubstructLibraryWrap::*)( \ const _tname_ &, unsigned int, unsigned int, \ const SubstructMatchParameters &, int) const) & \ SubstructLibraryWrap::hasMatch, \ (python::arg("self"), python::arg("query"), \ python::arg("startIdx"), python::arg("endIdx"), \ python::arg("parameters"), python::arg("numThreads") = -1), \ "Get the matches for the query.\n\n" \ " Arguments:\n" \ " - query: substructure query\n" \ " - startIdx: index to search from\n" \ " - endIdx: index (non-inclusize) to search to\n" \ " - numThreads: number of threads to use, -1 means all threads\n") struct substructlibrary_wrapper { static void wrap() { python::class_, boost::noncopyable>("MolHolderBase", "", python::no_init) .def("__len__", &MolHolderBase::size, python::args("self")) .def("AddMol", &MolHolderBase::addMol, python::args("self", "m"), "Adds molecule to the molecule holder") .def("GetMol", &MolHolderBase::getMol, python::args("self", "arg1"), "Returns a particular molecule in the molecule holder\n\n" " ARGUMENTS:\n" " - idx: which molecule to return\n\n" " - sanitize: if sanitize is False, return the internal " "molecule state [default True]\n\n" " NOTE: molecule indices start at 0\n") .def("__len__", &MolHolderBase::size, python::args("self")); python::class_, python::bases>( "MolHolder", MolHolderDoc, python::init<>(python::args("self"))); python::class_, python::bases>( "CachedMolHolder", CachedMolHolderDoc, python::init<>(python::args("self"))) .def("AddBinary", &CachedMolHolder::addBinary, (python::args("self", "pickle")), "Add a binary pickle to the molecule holder, no checking is done " "on the input data"); python::class_, python::bases>( "CachedSmilesMolHolder", CachedSmilesMolHolderDoc, python::init<>(python::args("self"))) .def("AddSmiles", &CachedSmilesMolHolder::addSmiles, (python::args("self", "smiles")), "Add a trusted smiles string to the molecule holder, no checking " "is done on the input data"); python::class_, python::bases>( "CachedTrustedSmilesMolHolder", CachedTrustedSmilesMolHolderDoc, python::init<>(python::args("self"))) .def("AddSmiles", &CachedTrustedSmilesMolHolder::addSmiles, (python::args("self", "smiles")), "Add a trusted smiles string to the molecule holder, no checking " "is done on the input data"); python::class_, boost::noncopyable>("FPHolderBase", "", python::no_init) .def("__len__", &FPHolderBase::size, python::args("self")) .def("AddMol", &FPHolderBase::addMol, python::args("self", "m"), "Adds a molecule to the fingerprint database, returns the index " "of the new pattern") .def("AddFingerprint", (unsigned int (FPHolderBase::*)(const ExplicitBitVect &)) & FPHolderBase::addFingerprint, python::args("self", "v"), "Adds a raw bit vector to the fingerprint database, returns the " "index of the supplied pattern") .def("GetFingerprint", &FPHolderBase::getFingerprint, python::return_value_policy(), python::args("self", "idx"), "Return the bit vector at the specified index") .def("PassesFilter", &FPHolderBase::passesFilter, ((python::args("self"), python::args("idx")), python::args("query")), "Returns True if the specified index passes the filter supplied " "by the query bit vector") .def("MakeFingerprint", &FPHolderBase::makeFingerprint, ((python::arg("self"), python::arg("mol"))), python::return_value_policy(), "Compute the query bits for the holder"); python::class_, python::bases>( "PatternHolder", PatternHolderDoc, python::init<>(python::args("self"))) .def(python::init(python::args("self", "numBits"))); python::class_, boost::noncopyable>("KeyHolderBase", "", python::no_init) .def("__len__", &KeyHolderBase::size, python::args("self")) .def("AddMol", &KeyHolderBase::addMol, python::args("self", "m"), "Adds a molecule to the fingerprint database, returns the index " "of the new pattern") .def("AddKey", &KeyHolderBase::addKey, python::args("self", "arg1"), "Add a key to the key holder, must be manually synced") .def("GetKey", &KeyHolderBase::getKey, python::return_value_policy(), python::args("self", "arg1"), "Return the key at the specified index") .def("GetKeys", &KeyHolderBase::getKeys, python::args("self", "indices"), "Returns the keys for the given indices as return by GetMatches " "\n\n" " ARGUMENTS:\n" " - indices: The indices of the keys\n\n"); python::class_, python::bases>( "KeyFromPropHolder", KeyHolderDoc, python::init<>(python::args("self"))) .def( python::init(python::args("self", "propname"))) .def("GetPropName", (const std::string &(KeyFromPropHolder::*)() const) & KeyFromPropHolder::getPropName, python::return_value_policy(), python::args("self"), "Return the key for the given molecule index"); python::class_, python::bases>( "TautomerPatternHolder", TautomerPatternHolderDoc, python::init<>(python::args("self"))) .def(python::init(python::args("self", "numBits"))); python::class_( "SubstructLibrary", SubstructLibraryDoc, python::init<>(python::args("self"))) .def(python::init>( python::args("self", "molecules"))) .def(python::init, boost::shared_ptr>( python::args("self", "molecules", "fingerprints"))) .def(python::init, boost::shared_ptr>( python::args("self", "molecules", "keys"))) .def(python::init, boost::shared_ptr, boost::shared_ptr>( python::args("self", "molecules", "fingerprints", "keys"))) .def(python::init(python::args("self", "pickle"))) .def("GetMolHolder", &GetMolHolder, python::args("self")) .def("GetFpHolder", &GetFpHolder, python::args("self")) .def("GetKeyHolder", &GetKeyHolder, python::args("self")) .def("AddMol", &SubstructLibraryWrap::addMol, ((python::arg("self"), python::arg("mol"))), "Adds a molecule to the substruct library") // clang-format off LARGE_DEF(ROMol) LARGE_DEF(TautomerQuery) LARGE_DEF(MolBundle) LARGE_DEF(ExtendedQueryMol) // clang-format on .def("GetMol", &SubstructLibraryWrap::getMol, python::args("self", "idx"), "Returns a particular molecule in the molecule holder\n\n" " ARGUMENTS:\n" " - idx: which molecule to return\n\n" " NOTE: molecule indices start at 0\n") .def("SetSearchOrder", setSearchOrderHelper, python::args("self", "seq"), "Sets the search order for the library\n\n" " ARGUMENTS:\n" " - order: sequence of molecule indices\n\n" " NOTE: molecule indices start at 0\n") .def("GetSearchOrder", getSearchOrderHelper, python::args("self"), "Returns the search order for the library\n\n" " NOTE: molecule indices start at 0\n") .def("__len__", &SubstructLibraryWrap::size, python::args("self")) .def("ToStream", &toStream, (python::arg("self"), python::arg("stream")), "Serialize a substructure library to a python text stream.\n" "The stream can be a file in text mode or an io.StringIO type " "object\n\n" " ARGUMENTS:\n" " - stream: a text or text stream like object\n\n" " >>> from rdkit.Chem import rdSubstructLibrary\n" " >>> import io\n" " >>> lib = rdSubstructLibrary.SubstructLibrary()\n" " >>> stream = io.StringIO()\n" " >>> lib.ToStream(stream)\n\n" " or\n" " >>> with open('rdkit.sslib', 'w') as stream:\n" " ... lib.ToStream(stream)\n") .def("InitFromStream", &initFromStream, (python::arg("self"), python::arg("stream")), "Deserialize a substructure library from a python bytes stream.\n" "Python doesn't allow seeking operations inside a unicode or " "string stream anymore\n" "so this requires opening a file in binary mode or using an " "io.ByteIO type object\n\n" " ARGUMENTS:\n" " - stream: a binary stream like object\n\n" " SubstructLibrary.Serialize already writes a binary stream\n\n" " >>> from rdkit.Chem import rdSubstructLibrary\n" " >>> import io\n" " >>> lib = rdSubstructLibrary.SubstructLibrary()\n" " >>> stream = io.BytesIO( lib.Serialize() )\n" " >>> lib.InitFromStream(stream)\n\n" " remember to write to text and read from a binary stream\n" " >>> with open('rdkit.sslib', 'w') as f: lib.ToStream(f)\n" " >>> with open('rdkit.sslib', 'rb') as f: " "lib.InitFromStream(f)\n") .def("Serialize", &SubstructLibrary_Serialize, python::args("self")) // enable pickle support .def_pickle(substructlibrary_pickle_suite()); python::def("SubstructLibraryCanSerialize", SubstructLibraryCanSerialize, "Returns True if the SubstructLibrary is serializable " "(requires boost serialization"); python::def("AddPatterns", (void (*)(SubstructLibraryWrap &, int)) & addPatternsHelper, "Add pattern fingerprints to the given library, use " "numThreads=-1 to use all available cores", (python::arg("sslib"), python::arg("numThreads") = 1)); python::def( "AddPatterns", (void (*)(SubstructLibraryWrap &, boost::shared_ptr, int)) & addPatternsHelper, "Add pattern fingerprints to the given library, use numThreads=-1 to " "use all available cores", (python::arg("sslib"), python::arg("patterns"), python::arg("numThreads") = 1)); } }; } // namespace RDKit void wrap_substructlibrary() { RDKit::substructlibrary_wrapper::wrap(); }