Files
rdkit/Code/SimDivPickers/Wrap/HierarchicalClusterPicker.cpp
Paolo Tosco 2b4202867e Add Python modules to generate stubs and automatically patch docstrings (#6919)
* - added gen_rdkit_stubs Python module to generate rdkit-stubs
- added patch_rdkit_docstrings Python module to patch existing C++ sources to fix docstrings missing self parameter and add named parameters taken from C++ signatures where possible
- added rdkit-stubs/CMakeLists.txt to build rdkit-stubs as part of the RDKit build
- added an option to CMakeLists.txt to enable building rdkit-stubs as part of the RDKit build (defaults to OFF)

* fixed CMakeLists.txt, rdkit-stubs/CMakeLists.txt and a doctest

* - added missing cmp_func parameter
- fixed case with overloads with optional parameters
- do not trim params if expected_param_count == -1
- add dummy parameter names if we could not find any
- keep into account member functions when making up parameter names
- address __init__ and make_constructor __init__ functions
- fix incorrectly assigned staticmethods

* patched sources

* address residual few remarks

---------

Co-authored-by: ptosco <paolo.tosco@novartis.com>
2023-11-30 04:54:18 +01:00

115 lines
4.4 KiB
C++

// $Id$
//
// Copyright (C) 2003-2008 Greg Landrum and Rational Discovery LLC
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#define NO_IMPORT_ARRAY
#define PY_ARRAY_UNIQUE_SYMBOL rdpicker_array_API
#include <RDBoost/python.h>
#include <RDBoost/boost_numpy.h>
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include <numpy/arrayobject.h>
#include <RDBoost/Wrap.h>
#include <SimDivPickers/DistPicker.h>
#include <SimDivPickers/HierarchicalClusterPicker.h>
namespace python = boost::python;
namespace RDPickers {
// REVIEW: the poolSize can be pulled from the numeric array
RDKit::INT_VECT HierarchicalPicks(HierarchicalClusterPicker *picker,
python::object &distMat, int poolSize,
int pickSize) {
if (pickSize >= poolSize) {
throw ValueErrorException("pickSize must be less than poolSize");
}
if (!PyArray_Check(distMat.ptr())) {
throw ValueErrorException("distance mat argument must be a numpy matrix");
}
PyArrayObject *copy;
// it's painful to have to copy the input matrix, but the
// picker itself will step on the distance matrix, so use
// CopyFromObject here instead of ContiguousFromObject
copy =
(PyArrayObject *)PyArray_CopyFromObject(distMat.ptr(), NPY_DOUBLE, 1, 1);
auto *dMat = (double *)PyArray_DATA(copy);
RDKit::INT_VECT res = picker->pick(dMat, poolSize, pickSize);
Py_DECREF(copy);
return res;
}
// REVIEW: the poolSize can be pulled from the numeric array
RDKit::VECT_INT_VECT HierarchicalClusters(HierarchicalClusterPicker *picker,
python::object &distMat, int poolSize,
int pickSize) {
if (!PyArray_Check(distMat.ptr())) {
throw ValueErrorException("distance mat argument must be a numpy matrix");
}
// REVIEW: check pickSize < poolSize, otherwise throw_value_error()
PyArrayObject *copy;
// it's painful to have to copy the input matrix, but the
// picker itself will step on the distance matrix, so use
// CopyFromObject here instead of ContiguousFromObject
copy =
(PyArrayObject *)PyArray_CopyFromObject(distMat.ptr(), NPY_DOUBLE, 1, 1);
auto *dMat = (double *)PyArray_DATA(copy);
RDKit::VECT_INT_VECT res = picker->cluster(dMat, poolSize, pickSize);
Py_DECREF(copy);
return res;
}
struct HierarchCP_wrap {
static void wrap() {
std::string docString =
"A class for diversity picking of items using Hierarchical "
"Clustering\n";
python::class_<HierarchicalClusterPicker>(
"HierarchicalClusterPicker", docString.c_str(),
python::init<HierarchicalClusterPicker::ClusterMethod>(
python::args("self", "clusterMethod")))
.def("Pick", HierarchicalPicks,
python::args("self", "distMat", "poolSize", "pickSize"),
"Pick a diverse subset of items from a pool of items using "
"hierarchical clustering\n"
"\n"
"ARGUMENTS: \n"
" - distMat: 1D distance matrix (only the lower triangle "
"elements)\n"
" - poolSize: number of items in the pool\n"
" - pickSize: number of items to pick from the pool\n")
.def("Cluster", HierarchicalClusters,
python::args("self", "distMat", "poolSize", "pickSize"),
"Return a list of clusters of item from the pool using "
"hierarchical clustering\n"
"\n"
"ARGUMENTS: \n"
" - distMat: 1D distance matrix (only the lower triangle "
"elements)\n"
" - poolSize: number of items in the pool\n"
" - pickSize: number of items to pick from the pool\n");
python::enum_<HierarchicalClusterPicker::ClusterMethod>("ClusterMethod")
.value("WARD", HierarchicalClusterPicker::WARD)
.value("SLINK", HierarchicalClusterPicker::SLINK)
.value("CLINK", HierarchicalClusterPicker::CLINK)
.value("UPGMA", HierarchicalClusterPicker::UPGMA)
.value("MCQUITTY", HierarchicalClusterPicker::MCQUITTY)
.value("GOWER", HierarchicalClusterPicker::GOWER)
.value("CENTROID", HierarchicalClusterPicker::CENTROID)
.export_values();
};
};
} // namespace RDPickers
void wrap_HierarchCP() { RDPickers::HierarchCP_wrap::wrap(); }