Files
rdkit/Code/ML/InfoTheory/Wrap/rdInfoTheory.cpp
Paolo Tosco 2b4202867e Add Python modules to generate stubs and automatically patch docstrings (#6919)
* - added gen_rdkit_stubs Python module to generate rdkit-stubs
- added patch_rdkit_docstrings Python module to patch existing C++ sources to fix docstrings missing self parameter and add named parameters taken from C++ signatures where possible
- added rdkit-stubs/CMakeLists.txt to build rdkit-stubs as part of the RDKit build
- added an option to CMakeLists.txt to enable building rdkit-stubs as part of the RDKit build (defaults to OFF)

* fixed CMakeLists.txt, rdkit-stubs/CMakeLists.txt and a doctest

* - added missing cmp_func parameter
- fixed case with overloads with optional parameters
- do not trim params if expected_param_count == -1
- add dummy parameter names if we could not find any
- keep into account member functions when making up parameter names
- address __init__ and make_constructor __init__ functions
- fix incorrectly assigned staticmethods

* patched sources

* address residual few remarks

---------

Co-authored-by: ptosco <paolo.tosco@novartis.com>
2023-11-30 04:54:18 +01:00

164 lines
6.1 KiB
C++

// $Id$
//
// Copyright (C) 2003-2008 Greg Landrum and Rational Discovery LLC
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#define PY_ARRAY_UNIQUE_SYMBOL rdinfotheory_array_API
#include <RDBoost/Wrap.h>
#include <RDBoost/import_array.h>
#include <ML/InfoTheory/InfoBitRanker.h>
#include <ML/InfoTheory/InfoGainFuncs.h>
namespace python = boost::python;
using namespace RDInfoTheory;
namespace RDInfoTheory {
double infoEntropy(python::object resArr) {
PyObject *matObj = resArr.ptr();
if (!PyArray_Check(matObj)) {
throw_value_error("Expecting a Numeric array object");
}
PyArrayObject *copy;
copy = (PyArrayObject *)PyArray_ContiguousFromObject(
matObj, PyArray_DESCR((PyArrayObject *)matObj)->type_num, 1, 1);
double res = 0.0;
// we are expecting a 1 dimensional array
auto ncols = (long int)PyArray_DIM((PyArrayObject *)matObj, 0);
CHECK_INVARIANT(ncols > 0, "");
if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_DOUBLE) {
auto *data = (double *)PyArray_DATA(copy);
res = InfoEntropy(data, ncols);
} else if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_FLOAT) {
auto *data = (float *)PyArray_DATA(copy);
res = InfoEntropy(data, ncols);
} else if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_INT) {
int *data = (int *)PyArray_DATA(copy);
res = InfoEntropy(data, ncols);
} else if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_LONG) {
auto *data = (long int *)PyArray_DATA(copy);
res = InfoEntropy(data, ncols);
}
Py_DECREF(copy);
return res;
}
double infoGain(python::object resArr) {
PyObject *matObj = resArr.ptr();
if (!PyArray_Check(matObj)) {
throw_value_error("Expecting a Numeric array object");
}
PyArrayObject *copy;
copy = (PyArrayObject *)PyArray_ContiguousFromObject(
matObj, PyArray_DESCR((PyArrayObject *)matObj)->type_num, 2, 2);
auto rows = (long int)PyArray_DIM((PyArrayObject *)matObj, 0);
auto cols = (long int)PyArray_DIM((PyArrayObject *)matObj, 1);
double res = 0.0;
if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_DOUBLE) {
auto *data = (double *)PyArray_DATA(copy);
res = InfoEntropyGain(data, rows, cols);
} else if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_FLOAT) {
auto *data = (float *)PyArray_DATA(copy);
res = InfoEntropyGain(data, rows, cols);
} else if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_INT) {
int *data = (int *)PyArray_DATA(copy);
res = InfoEntropyGain(data, rows, cols);
} else if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_LONG) {
auto *data = (long int *)PyArray_DATA(copy);
res = InfoEntropyGain(data, rows, cols);
} else {
throw_value_error(
"Numeric array object of type int or long or float or double");
}
Py_DECREF(copy);
return res;
}
double chiSquare(python::object resArr) {
PyObject *matObj = resArr.ptr();
if (!PyArray_Check(matObj)) {
throw_value_error("Expecting a Numeric array object");
}
PyArrayObject *copy;
copy = (PyArrayObject *)PyArray_ContiguousFromObject(
matObj, PyArray_DESCR((PyArrayObject *)matObj)->type_num, 2, 2);
auto rows = (long int)PyArray_DIM((PyArrayObject *)matObj, 0);
auto cols = (long int)PyArray_DIM((PyArrayObject *)matObj, 1);
double res = 0.0;
if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_DOUBLE) {
auto *data = (double *)PyArray_DATA(copy);
res = ChiSquare(data, rows, cols);
} else if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_FLOAT) {
auto *data = (float *)PyArray_DATA(copy);
res = ChiSquare(data, rows, cols);
} else if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_INT) {
int *data = (int *)PyArray_DATA(copy);
res = ChiSquare(data, rows, cols);
} else if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_LONG) {
auto *data = (long int *)PyArray_DATA(copy);
res = ChiSquare(data, rows, cols);
} else {
throw_value_error(
"Numeric array object of type int or long or float or double");
}
Py_DECREF(copy);
return res;
}
} // namespace RDInfoTheory
void wrap_ranker();
void wrap_corrmatgen();
BOOST_PYTHON_MODULE(rdInfoTheory) {
python::scope().attr("__doc__") =
"Module containing bunch of functions for information metrics and a "
"ranker to rank bits";
rdkit_import_array();
wrap_ranker();
wrap_corrmatgen();
std::string docString =
"calculates the informational entropy of the values in an array\n\n\
ARGUMENTS:\n\
\n\
- resMat: pointer to a long int array containing the data\n\
- dim: long int containing the length of the _tPtr_ array.\n\n\
RETURNS:\n\n\
a double\n";
python::def("InfoEntropy", RDInfoTheory::infoEntropy, docString.c_str(),
python::args("resArr"));
docString =
"Calculates the information gain for a variable\n\n\
ARGUMENTS:\n\n\
- varMat: a Numeric Array object\n\
varMat is a Numeric array with the number of possible occurrences\n\
of each result for reach possible value of the given variable.\n\n\
So, for a variable which adopts 4 possible values and a result which\n\
has 3 possible values, varMat would be 4x3\n\n\
RETURNS:\n\n\
- a Python float object\n\n\
NOTES\n\n\
- this is a dropin replacement for _PyInfoGain()_ in entropy.py\n";
python::def("InfoGain", RDInfoTheory::infoGain, docString.c_str(),
python::args("resArr"));
docString =
"Calculates the chi squared value for a variable\n\n\
ARGUMENTS:\n\n\
- varMat: a Numeric Array object\n\
varMat is a Numeric array with the number of possible occurrences\n\
of each result for reach possible value of the given variable.\n\n\
So, for a variable which adopts 4 possible values and a result which\n\
has 3 possible values, varMat would be 4x3\n\n\
RETURNS:\n\n\
- a Python float object\n";
python::def("ChiSquare", RDInfoTheory::chiSquare, docString.c_str(),
python::args("resArr"));
}