mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-05 22:04:27 +08:00
* - added gen_rdkit_stubs Python module to generate rdkit-stubs - added patch_rdkit_docstrings Python module to patch existing C++ sources to fix docstrings missing self parameter and add named parameters taken from C++ signatures where possible - added rdkit-stubs/CMakeLists.txt to build rdkit-stubs as part of the RDKit build - added an option to CMakeLists.txt to enable building rdkit-stubs as part of the RDKit build (defaults to OFF) * fixed CMakeLists.txt, rdkit-stubs/CMakeLists.txt and a doctest * - added missing cmp_func parameter - fixed case with overloads with optional parameters - do not trim params if expected_param_count == -1 - add dummy parameter names if we could not find any - keep into account member functions when making up parameter names - address __init__ and make_constructor __init__ functions - fix incorrectly assigned staticmethods * patched sources * address residual few remarks --------- Co-authored-by: ptosco <paolo.tosco@novartis.com>
164 lines
6.1 KiB
C++
164 lines
6.1 KiB
C++
// $Id$
|
|
//
|
|
// Copyright (C) 2003-2008 Greg Landrum and Rational Discovery LLC
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#define PY_ARRAY_UNIQUE_SYMBOL rdinfotheory_array_API
|
|
#include <RDBoost/Wrap.h>
|
|
#include <RDBoost/import_array.h>
|
|
#include <ML/InfoTheory/InfoBitRanker.h>
|
|
#include <ML/InfoTheory/InfoGainFuncs.h>
|
|
|
|
namespace python = boost::python;
|
|
using namespace RDInfoTheory;
|
|
|
|
namespace RDInfoTheory {
|
|
double infoEntropy(python::object resArr) {
|
|
PyObject *matObj = resArr.ptr();
|
|
if (!PyArray_Check(matObj)) {
|
|
throw_value_error("Expecting a Numeric array object");
|
|
}
|
|
PyArrayObject *copy;
|
|
copy = (PyArrayObject *)PyArray_ContiguousFromObject(
|
|
matObj, PyArray_DESCR((PyArrayObject *)matObj)->type_num, 1, 1);
|
|
double res = 0.0;
|
|
// we are expecting a 1 dimensional array
|
|
auto ncols = (long int)PyArray_DIM((PyArrayObject *)matObj, 0);
|
|
CHECK_INVARIANT(ncols > 0, "");
|
|
if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_DOUBLE) {
|
|
auto *data = (double *)PyArray_DATA(copy);
|
|
res = InfoEntropy(data, ncols);
|
|
} else if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_FLOAT) {
|
|
auto *data = (float *)PyArray_DATA(copy);
|
|
res = InfoEntropy(data, ncols);
|
|
} else if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_INT) {
|
|
int *data = (int *)PyArray_DATA(copy);
|
|
res = InfoEntropy(data, ncols);
|
|
} else if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_LONG) {
|
|
auto *data = (long int *)PyArray_DATA(copy);
|
|
res = InfoEntropy(data, ncols);
|
|
}
|
|
Py_DECREF(copy);
|
|
return res;
|
|
}
|
|
|
|
double infoGain(python::object resArr) {
|
|
PyObject *matObj = resArr.ptr();
|
|
if (!PyArray_Check(matObj)) {
|
|
throw_value_error("Expecting a Numeric array object");
|
|
}
|
|
PyArrayObject *copy;
|
|
copy = (PyArrayObject *)PyArray_ContiguousFromObject(
|
|
matObj, PyArray_DESCR((PyArrayObject *)matObj)->type_num, 2, 2);
|
|
auto rows = (long int)PyArray_DIM((PyArrayObject *)matObj, 0);
|
|
auto cols = (long int)PyArray_DIM((PyArrayObject *)matObj, 1);
|
|
double res = 0.0;
|
|
if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_DOUBLE) {
|
|
auto *data = (double *)PyArray_DATA(copy);
|
|
res = InfoEntropyGain(data, rows, cols);
|
|
} else if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_FLOAT) {
|
|
auto *data = (float *)PyArray_DATA(copy);
|
|
res = InfoEntropyGain(data, rows, cols);
|
|
} else if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_INT) {
|
|
int *data = (int *)PyArray_DATA(copy);
|
|
res = InfoEntropyGain(data, rows, cols);
|
|
} else if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_LONG) {
|
|
auto *data = (long int *)PyArray_DATA(copy);
|
|
res = InfoEntropyGain(data, rows, cols);
|
|
} else {
|
|
throw_value_error(
|
|
"Numeric array object of type int or long or float or double");
|
|
}
|
|
Py_DECREF(copy);
|
|
return res;
|
|
}
|
|
|
|
double chiSquare(python::object resArr) {
|
|
PyObject *matObj = resArr.ptr();
|
|
if (!PyArray_Check(matObj)) {
|
|
throw_value_error("Expecting a Numeric array object");
|
|
}
|
|
PyArrayObject *copy;
|
|
copy = (PyArrayObject *)PyArray_ContiguousFromObject(
|
|
matObj, PyArray_DESCR((PyArrayObject *)matObj)->type_num, 2, 2);
|
|
auto rows = (long int)PyArray_DIM((PyArrayObject *)matObj, 0);
|
|
auto cols = (long int)PyArray_DIM((PyArrayObject *)matObj, 1);
|
|
double res = 0.0;
|
|
if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_DOUBLE) {
|
|
auto *data = (double *)PyArray_DATA(copy);
|
|
res = ChiSquare(data, rows, cols);
|
|
} else if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_FLOAT) {
|
|
auto *data = (float *)PyArray_DATA(copy);
|
|
res = ChiSquare(data, rows, cols);
|
|
} else if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_INT) {
|
|
int *data = (int *)PyArray_DATA(copy);
|
|
res = ChiSquare(data, rows, cols);
|
|
} else if (PyArray_DESCR((PyArrayObject *)matObj)->type_num == NPY_LONG) {
|
|
auto *data = (long int *)PyArray_DATA(copy);
|
|
res = ChiSquare(data, rows, cols);
|
|
} else {
|
|
throw_value_error(
|
|
"Numeric array object of type int or long or float or double");
|
|
}
|
|
Py_DECREF(copy);
|
|
return res;
|
|
}
|
|
} // namespace RDInfoTheory
|
|
|
|
void wrap_ranker();
|
|
void wrap_corrmatgen();
|
|
|
|
BOOST_PYTHON_MODULE(rdInfoTheory) {
|
|
python::scope().attr("__doc__") =
|
|
"Module containing bunch of functions for information metrics and a "
|
|
"ranker to rank bits";
|
|
|
|
rdkit_import_array();
|
|
|
|
wrap_ranker();
|
|
wrap_corrmatgen();
|
|
|
|
std::string docString =
|
|
"calculates the informational entropy of the values in an array\n\n\
|
|
ARGUMENTS:\n\
|
|
\n\
|
|
- resMat: pointer to a long int array containing the data\n\
|
|
- dim: long int containing the length of the _tPtr_ array.\n\n\
|
|
RETURNS:\n\n\
|
|
a double\n";
|
|
python::def("InfoEntropy", RDInfoTheory::infoEntropy, docString.c_str(),
|
|
python::args("resArr"));
|
|
|
|
docString =
|
|
"Calculates the information gain for a variable\n\n\
|
|
ARGUMENTS:\n\n\
|
|
- varMat: a Numeric Array object\n\
|
|
varMat is a Numeric array with the number of possible occurrences\n\
|
|
of each result for reach possible value of the given variable.\n\n\
|
|
So, for a variable which adopts 4 possible values and a result which\n\
|
|
has 3 possible values, varMat would be 4x3\n\n\
|
|
RETURNS:\n\n\
|
|
- a Python float object\n\n\
|
|
NOTES\n\n\
|
|
- this is a dropin replacement for _PyInfoGain()_ in entropy.py\n";
|
|
python::def("InfoGain", RDInfoTheory::infoGain, docString.c_str(),
|
|
python::args("resArr"));
|
|
|
|
docString =
|
|
"Calculates the chi squared value for a variable\n\n\
|
|
ARGUMENTS:\n\n\
|
|
- varMat: a Numeric Array object\n\
|
|
varMat is a Numeric array with the number of possible occurrences\n\
|
|
of each result for reach possible value of the given variable.\n\n\
|
|
So, for a variable which adopts 4 possible values and a result which\n\
|
|
has 3 possible values, varMat would be 4x3\n\n\
|
|
RETURNS:\n\n\
|
|
- a Python float object\n";
|
|
python::def("ChiSquare", RDInfoTheory::chiSquare, docString.c_str(),
|
|
python::args("resArr"));
|
|
}
|