Files
rdkit/Code/SimDivPickers/Wrap/MaxMinPicker.cpp
Greg Landrum d2fdb3d075 Fixes #294
2014-08-03 12:03:35 +02:00

204 lines
8.1 KiB
C++

// $Id$
//
// Copyright (C) 2003-2008 Greg Landrum and Rational Discovery LLC
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#define NO_IMPORT_ARRAY
#define PY_ARRAY_UNIQUE_SYMBOL rdpicker_array_API
#include <boost/python.hpp>
#include <RDBoost/Wrap.h>
#include <boost/python/numeric.hpp>
#include "numpy/oldnumeric.h"
#include <map>
#include <DataStructs/BitVects.h>
#include <DataStructs/BitOps.h>
#include <SimDivPickers/DistPicker.h>
#include <SimDivPickers/MaxMinPicker.h>
#include <SimDivPickers/HierarchicalClusterPicker.h>
#include <iostream>
namespace python = boost::python;
namespace RDPickers {
// REVIEW: the poolSize can be pulled from the numeric array
RDKit::INT_VECT MaxMinPicks(MaxMinPicker *picker,
python::object distMat,
int poolSize,
int pickSize,
python::object firstPicks,
int seed) {
if(pickSize>=poolSize){
throw ValueErrorException("pickSize must be less than poolSize");
}
if (!PyArray_Check(distMat.ptr())){
throw ValueErrorException("distance mat argument must be a numpy matrix");
}
PyArrayObject *copy;
copy = (PyArrayObject *)PyArray_ContiguousFromObject(distMat.ptr(),
PyArray_DOUBLE, 1,1);
double *dMat = (double *)copy->data;
RDKit::INT_VECT firstPickVect;
for(unsigned int i=0;i<python::extract<unsigned int>(firstPicks.attr("__len__")());++i){
firstPickVect.push_back(python::extract<int>(firstPicks[i]));
}
RDKit::INT_VECT res=picker->pick(dMat, poolSize, pickSize,firstPickVect,seed);
Py_DECREF(copy);
return res;
}
class pyobjFunctor {
public:
pyobjFunctor(python::object obj) : dp_obj(obj) {}
double operator()(unsigned int i,unsigned int j) {
double res;
std::pair<unsigned int ,unsigned int> idxPair(i,j);
if(this->d_cache.count(idxPair)>0){
res = this->d_cache[idxPair];
} else {
res=python::extract<double>(dp_obj(i,j));
this->d_cache[idxPair]=res;
}
return res;
}
private:
python::object dp_obj;
std::map<std::pair<unsigned int,unsigned int>,double> d_cache;
};
RDKit::INT_VECT LazyMaxMinPicks(MaxMinPicker *picker,
python::object distFunc,
int poolSize,
int pickSize,
python::object firstPicks,
int seed) {
pyobjFunctor functor(distFunc);
RDKit::INT_VECT firstPickVect;
for(unsigned int i=0;i<python::extract<unsigned int>(firstPicks.attr("__len__")());++i){
firstPickVect.push_back(python::extract<int>(firstPicks[i]));
}
RDKit::INT_VECT res=picker->lazyPick(functor, poolSize, pickSize,firstPickVect,seed);
return res;
}
typedef enum {
TANIMOTO=1,
DICE
} DistanceMethod;
class pyBVFunctor {
public:
pyBVFunctor(python::object obj,DistanceMethod method) : dp_obj(obj), d_method(method) {}
double operator()(unsigned int i,unsigned int j) {
const ExplicitBitVect *bvi,*bvj;
try{
bvi=python::extract<const ExplicitBitVect *>(dp_obj[i]);
bvj=python::extract<const ExplicitBitVect *>(dp_obj[j]);
} catch (...) {
throw_value_error("unable to extract ExplicitBitVect from sequence value");
}
double res=0.0;
switch(d_method){
case TANIMOTO:
res = 1.-TanimotoSimilarity(*bvi,*bvj);
break;
case DICE:
res = 1.-DiceSimilarity(*bvi,*bvj);
break;
default:
throw_value_error("unsupported similarity value");
}
return res;
}
private:
python::object dp_obj;
DistanceMethod d_method;
};
RDKit::INT_VECT LazyVectorMaxMinPicks(MaxMinPicker *picker,
python::object objs,
int poolSize,
int pickSize,
python::object firstPicks,
int seed,
DistanceMethod method
) {
pyBVFunctor functor(objs,method);
RDKit::INT_VECT firstPickVect;
for(unsigned int i=0;i<python::extract<unsigned int>(firstPicks.attr("__len__")());++i){
firstPickVect.push_back(python::extract<int>(firstPicks[i]));
}
RDKit::INT_VECT res=picker->lazyPick(functor, poolSize, pickSize,firstPickVect,seed);
return res;
}
} // end of namespace RDPickers
struct MaxMin_wrap {
static void wrap() {
python::enum_<RDPickers::DistanceMethod>("DistanceMethod")
.value("TANIMOTO", RDPickers::TANIMOTO)
.value("DICE", RDPickers::DICE);
python::class_<RDPickers::MaxMinPicker>("MaxMinPicker",
"A class for diversity picking of items using the MaxMin Algorithm\n")
.def("Pick", RDPickers::MaxMinPicks,
(python::arg("self"),python::arg("distMat"),python::arg("poolSize"),
python::arg("pickSize"),python::arg("firstPicks")=python::tuple(),
python::arg("seed")=-1),
"Pick a subset of items from a pool of items using the MaxMin Algorithm\n"
"Ashton, M. et. al., Quant. Struct.-Act. Relat., 21 (2002), 598-604 \n\n"
"ARGUMENTS:\n"
" - distMat: 1D distance matrix (only the lower triangle elements)\n"
" - poolSize: number of items in the pool\n"
" - pickSize: number of items to pick from the pool\n"
" - firstPicks: (optional) the first items to be picked (seeds the list)\n"
" - seed: (optional) seed for the random number genrator\n"
)
.def("LazyPick", RDPickers::LazyMaxMinPicks,
(python::arg("self"),python::arg("distFunc"),python::arg("poolSize"),
python::arg("pickSize"),python::arg("firstPicks")=python::tuple(),
python::arg("seed")=-1),
"Pick a subset of items from a pool of items using the MaxMin Algorithm\n"
"Ashton, M. et. al., Quant. Struct.-Act. Relat., 21 (2002), 598-604 \n"
"ARGUMENTS:\n\n"
" - distFunc: a function that should take two indices and return the\n"
" distance between those two points.\n"
" NOTE: the implementation caches distance values, so the\n"
" client code does not need to do so; indeed, it should not.\n"
" - poolSize: number of items in the pool\n"
" - pickSize: number of items to pick from the pool\n"
" - firstPicks: (optional) the first items to be picked (seeds the list)\n"
" - seed: (optional) seed for the random number genrator\n"
)
.def("LazyBitVectorPick", RDPickers::LazyVectorMaxMinPicks,
(python::arg("self"),python::arg("objects"),python::arg("poolSize"),
python::arg("pickSize"),python::arg("firstPicks")=python::tuple(),
python::arg("seed")=-1,python::arg("method")=RDPickers::TANIMOTO),
"Pick a subset of items from a pool of bit vectors using the MaxMin Algorithm\n"
"Ashton, M. et. al., Quant. Struct.-Act. Relat., 21 (2002), 598-604 \n"
"ARGUMENTS:\n\n"
" - vectors: a sequence of the bit vectors that should be picked from.\n"
" - poolSize: number of items in the pool\n"
" - pickSize: number of items to pick from the pool\n"
" - firstPicks: (optional) the first items to be picked (seeds the list)\n"
" - seed: (optional) seed for the random number genrator\n"
" - method: (optional) the distance measure to use\n"
)
;
};
};
void wrap_maxminpick() {
MaxMin_wrap::wrap();
}