mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
* - added gen_rdkit_stubs Python module to generate rdkit-stubs - added patch_rdkit_docstrings Python module to patch existing C++ sources to fix docstrings missing self parameter and add named parameters taken from C++ signatures where possible - added rdkit-stubs/CMakeLists.txt to build rdkit-stubs as part of the RDKit build - added an option to CMakeLists.txt to enable building rdkit-stubs as part of the RDKit build (defaults to OFF) * fixed CMakeLists.txt, rdkit-stubs/CMakeLists.txt and a doctest * - added missing cmp_func parameter - fixed case with overloads with optional parameters - do not trim params if expected_param_count == -1 - add dummy parameter names if we could not find any - keep into account member functions when making up parameter names - address __init__ and make_constructor __init__ functions - fix incorrectly assigned staticmethods * patched sources * address residual few remarks --------- Co-authored-by: ptosco <paolo.tosco@novartis.com>
195 lines
7.4 KiB
C++
195 lines
7.4 KiB
C++
// $Id$
|
|
//
|
|
// Copyright (C) 2003-2008 Greg Landrum and Rational Discovery LLC
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
|
|
#define NO_IMPORT_ARRAY
|
|
#include <RDBoost/python.h>
|
|
|
|
#define PY_ARRAY_UNIQUE_SYMBOL rdinfotheory_array_API
|
|
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
|
|
#include <numpy/arrayobject.h>
|
|
|
|
#include <RDBoost/Wrap.h>
|
|
#include <ML/InfoTheory/InfoBitRanker.h>
|
|
#include <DataStructs/BitVects.h>
|
|
#include <RDBoost/PySequenceHolder.h>
|
|
|
|
namespace python = boost::python;
|
|
|
|
namespace RDInfoTheory {
|
|
|
|
PyObject *getTopNbits(InfoBitRanker *ranker,
|
|
int num) { // int ignoreNoClass=-1) {
|
|
double *dres = ranker->getTopN(num);
|
|
npy_intp dims[2];
|
|
dims[0] = num;
|
|
dims[1] = ranker->getNumClasses() + 2;
|
|
auto *res = (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_DOUBLE);
|
|
memcpy(static_cast<void *>(PyArray_DATA(res)), static_cast<void *>(dres),
|
|
dims[0] * dims[1] * sizeof(double));
|
|
return PyArray_Return(res);
|
|
}
|
|
|
|
void AccumulateVotes(InfoBitRanker *ranker, python::object bitVect, int label) {
|
|
python::extract<ExplicitBitVect> ebvWorks(bitVect);
|
|
python::extract<SparseBitVect> sbvWorks(bitVect);
|
|
if (ebvWorks.check()) {
|
|
ExplicitBitVect ev = python::extract<ExplicitBitVect>(bitVect);
|
|
ranker->accumulateVotes(ev, label);
|
|
} else if (sbvWorks.check()) {
|
|
SparseBitVect sv = python::extract<SparseBitVect>(bitVect);
|
|
ranker->accumulateVotes(sv, label);
|
|
} else {
|
|
throw_value_error(
|
|
"Accumulate Vote can only take a explicitBitVects or SparseBitvects");
|
|
}
|
|
}
|
|
|
|
void SetBiasList(InfoBitRanker *ranker, python::object classList) {
|
|
RDKit::INT_VECT cList;
|
|
PySequenceHolder<int> bList(classList);
|
|
cList.reserve(bList.size());
|
|
for (unsigned int i = 0; i < bList.size(); i++) {
|
|
cList.push_back(bList[i]);
|
|
}
|
|
ranker->setBiasList(cList);
|
|
}
|
|
|
|
void SetMaskBits(InfoBitRanker *ranker, python::object maskBits) {
|
|
RDKit::INT_VECT cList;
|
|
PySequenceHolder<int> bList(maskBits);
|
|
cList.reserve(bList.size());
|
|
for (unsigned int i = 0; i < bList.size(); i++) {
|
|
cList.push_back(bList[i]);
|
|
}
|
|
ranker->setMaskBits(cList);
|
|
}
|
|
|
|
void tester(InfoBitRanker *, python::object bitVect) {
|
|
python::extract<SparseBitVect> sbvWorks(bitVect);
|
|
if (sbvWorks.check()) {
|
|
SparseBitVect sv = python::extract<SparseBitVect>(bitVect);
|
|
std::cout << "Num of on bits: " << sv.getNumOnBits() << "\n";
|
|
}
|
|
}
|
|
|
|
struct ranker_wrap {
|
|
static void wrap() {
|
|
std::string docString =
|
|
"A class to rank the bits from a series of labelled fingerprints\n"
|
|
"A simple demonstration may help clarify what this class does. \n"
|
|
"Here's a small set of vectors:\n\n"
|
|
">>> for i,bv in enumerate(bvs): print(bv.ToBitString(),acts[i])\n"
|
|
"... \n"
|
|
"0001 0\n"
|
|
"0101 0\n"
|
|
"0010 1\n"
|
|
"1110 1\n"
|
|
"\n"
|
|
"Default ranker, using infogain:\n\n"
|
|
">>> ranker = InfoBitRanker(4,2) \n"
|
|
">>> for i,bv in enumerate(bvs): ranker.AccumulateVotes(bv,acts[i])\n"
|
|
"... \n"
|
|
">>> for bit,gain,n0,n1 in ranker.GetTopN(3): "
|
|
"print(int(bit),'%.3f'%gain,int(n0),int(n1))\n"
|
|
"... \n"
|
|
"3 1.000 2 0\n"
|
|
"2 1.000 0 2\n"
|
|
"0 0.311 0 1\n"
|
|
"\n"
|
|
"Using the biased infogain:\n\n"
|
|
">>> ranker = InfoBitRanker(4,2,InfoTheory.InfoType.BIASENTROPY)\n"
|
|
">>> ranker.SetBiasList((1,))\n"
|
|
">>> for i,bv in enumerate(bvs): ranker.AccumulateVotes(bv,acts[i])\n"
|
|
"... \n"
|
|
">>> for bit,gain,n0,n1 in ranker.GetTopN(3): print("
|
|
"int(bit),'%.3f'%gain,int(n0),int(n1))\n"
|
|
"... \n"
|
|
"2 1.000 0 2\n"
|
|
"0 0.311 0 1\n"
|
|
"1 0.000 1 1\n"
|
|
"\n"
|
|
"A chi squared ranker is also available:\n\n"
|
|
">>> ranker = InfoBitRanker(4,2,InfoTheory.InfoType.CHISQUARE)\n"
|
|
">>> for i,bv in enumerate(bvs): ranker.AccumulateVotes(bv,acts[i])\n"
|
|
"... \n"
|
|
">>> for bit,gain,n0,n1 in ranker.GetTopN(3): print("
|
|
"int(bit),'%.3f'%gain,int(n0),int(n1))\n"
|
|
"... \n"
|
|
"3 4.000 2 0\n"
|
|
"2 4.000 0 2\n"
|
|
"0 1.333 0 1\n"
|
|
"\n"
|
|
"As is a biased chi squared:\n\n"
|
|
">>> ranker = InfoBitRanker(4,2,InfoTheory.InfoType.BIASCHISQUARE)\n"
|
|
">>> ranker.SetBiasList((1,))\n"
|
|
">>> for i,bv in enumerate(bvs): ranker.AccumulateVotes(bv,acts[i])\n"
|
|
"... \n"
|
|
">>> for bit,gain,n0,n1 in ranker.GetTopN(3): print("
|
|
"int(bit),'%.3f'%gain,int(n0),int(n1))\n"
|
|
"... \n"
|
|
"2 4.000 0 2\n"
|
|
"0 1.333 0 1\n"
|
|
"1 0.000 1 1\n";
|
|
|
|
python::class_<InfoBitRanker>(
|
|
"InfoBitRanker", docString.c_str(),
|
|
python::init<int, int>(python::args("self", "nBits", "nClasses")))
|
|
.def(python::init<int, int, InfoBitRanker::InfoType>(
|
|
python::args("self", "nBits", "nClasses", "infoType")))
|
|
.def("AccumulateVotes", AccumulateVotes,
|
|
python::args("self", "bitVect", "label"),
|
|
"Accumulate the votes for all the bits turned on in a bit "
|
|
"vector\n\n"
|
|
"ARGUMENTS:\n\n"
|
|
" - bv : bit vector either ExplicitBitVect or SparseBitVect "
|
|
"operator\n"
|
|
" - label : the class label for the bit vector. It is assumed "
|
|
"that 0 <= class < nClasses \n")
|
|
.def("SetBiasList", SetBiasList, python::args("self", "classList"),
|
|
"Set the classes to which the entropy calculation should be "
|
|
"biased\n\n"
|
|
"This list contains a set of class ids used when in the "
|
|
"BIASENTROPY mode of ranking bits. \n"
|
|
"In this mode, a bit must be correlated higher with one of the "
|
|
"biased classes than all the \n"
|
|
"other classes. For example, in a two class problem with actives "
|
|
"and inactives, the fraction of \n"
|
|
"actives that hit the bit has to be greater than the fraction of "
|
|
"inactives that hit the bit\n\n"
|
|
"ARGUMENTS: \n\n"
|
|
" - classList : list of class ids that we want a bias towards\n")
|
|
.def("SetMaskBits", SetMaskBits, python::args("self", "maskBits"),
|
|
"Set the mask bits for the calculation\n\n"
|
|
"ARGUMENTS: \n\n"
|
|
" - maskBits : list of mask bits to use\n")
|
|
.def("GetTopN", getTopNbits, python::args("self", "num"),
|
|
"Returns the top n bits ranked by the information metric\n"
|
|
"This is actually the function where most of the work of ranking "
|
|
"is happening\n\n"
|
|
"ARGUMENTS:\n\n"
|
|
" - num : the number of top ranked bits that are required\n")
|
|
.def("WriteTopBitsToFile", &InfoBitRanker::writeTopBitsToFile,
|
|
python::args("self", "fileName"),
|
|
"Write the bits that have been ranked to a file")
|
|
.def("Tester", tester, python::args("self", "bitVect"));
|
|
|
|
python::enum_<InfoBitRanker::InfoType>("InfoType")
|
|
.value("ENTROPY", InfoBitRanker::ENTROPY)
|
|
.value("BIASENTROPY", InfoBitRanker::BIASENTROPY)
|
|
.value("CHISQUARE", InfoBitRanker::CHISQUARE)
|
|
.value("BIASCHISQUARE", InfoBitRanker::BIASCHISQUARE)
|
|
.export_values();
|
|
;
|
|
};
|
|
};
|
|
} // namespace RDInfoTheory
|
|
|
|
void wrap_ranker() { RDInfoTheory::ranker_wrap::wrap(); }
|