mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
Merge branch 'master' into modern_cxx
# Conflicts: # .travis.yml # Code/GraphMol/FileParsers/MolFileParser.cpp # Code/GraphMol/FileParsers/MolFileStereochem.cpp # Code/GraphMol/ForceFieldHelpers/UFF/testUFFHelpers.cpp # Code/GraphMol/MolAlign/testMolAlign.cpp # Code/GraphMol/MolDraw2D/MolDraw2D.cpp # Code/GraphMol/MolDraw2D/Wrap/rdMolDraw2D.cpp # Code/GraphMol/QueryOps.cpp # Code/GraphMol/ROMol.cpp # Code/GraphMol/SmilesParse/test.cpp # Code/GraphMol/Trajectory/Trajectory.cpp # Code/GraphMol/Wrap/Atom.cpp # Code/GraphMol/Wrap/Bond.cpp # Code/GraphMol/new_canon.cpp # Code/RDGeneral/testDict.cpp # Code/SimDivPickers/Wrap/MaxMinPicker.cpp
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
// $Id$
|
||||
//
|
||||
// Copyright (C) 2003-2008 Greg Landrum and Rational Discovery LLC
|
||||
// Copyright (C) 2003-2017 Greg Landrum and Rational Discovery LLC
|
||||
// @@ All Rights Reserved @@
|
||||
// This file is part of the RDKit.
|
||||
// The contents are covered by the terms of the BSD license
|
||||
@@ -58,79 +57,78 @@ RDKit::INT_VECT MaxMinPicks(MaxMinPicker *picker, python::object distMat,
|
||||
|
||||
class pyobjFunctor {
|
||||
public:
|
||||
pyobjFunctor(python::object obj, bool useCache)
|
||||
: dp_obj(std::move(obj)), dp_cache(nullptr) {
|
||||
if (useCache)
|
||||
dp_cache = new std::map<std::pair<unsigned int, unsigned int>, double>();
|
||||
}
|
||||
~pyobjFunctor() { delete dp_cache; }
|
||||
pyobjFunctor(python::object obj) : dp_obj(obj) {}
|
||||
~pyobjFunctor() {}
|
||||
double operator()(unsigned int i, unsigned int j) {
|
||||
double res;
|
||||
std::pair<unsigned int, unsigned int> idxPair(i, j);
|
||||
if (dp_cache && dp_cache->count(idxPair) > 0) {
|
||||
res = (*dp_cache)[idxPair];
|
||||
} else {
|
||||
res = python::extract<double>(dp_obj(i, j));
|
||||
if (dp_cache) (*dp_cache)[idxPair] = res;
|
||||
}
|
||||
return res;
|
||||
return python::extract<double>(dp_obj(i, j));
|
||||
}
|
||||
|
||||
private:
|
||||
python::object dp_obj;
|
||||
std::map<std::pair<unsigned int, unsigned int>, double> *dp_cache;
|
||||
};
|
||||
|
||||
RDKit::INT_VECT LazyMaxMinPicks(MaxMinPicker *picker, python::object distFunc,
|
||||
int poolSize, int pickSize,
|
||||
python::object firstPicks, int seed,
|
||||
bool useCache) {
|
||||
namespace {
|
||||
template <typename T>
|
||||
void LazyMaxMinHelper(MaxMinPicker *picker, T functor, unsigned int poolSize,
|
||||
unsigned int pickSize, python::object firstPicks,
|
||||
int seed, RDKit::INT_VECT &res, double &threshold) {
|
||||
RDKit::INT_VECT firstPickVect;
|
||||
for (unsigned int i = 0;
|
||||
i < python::extract<unsigned int>(firstPicks.attr("__len__")()); ++i) {
|
||||
firstPickVect.push_back(python::extract<int>(firstPicks[i]));
|
||||
}
|
||||
res = picker->lazyPick(functor, poolSize, pickSize, firstPickVect, seed,
|
||||
threshold);
|
||||
}
|
||||
} // end of anonymous namespace
|
||||
|
||||
RDKit::INT_VECT LazyMaxMinPicks(MaxMinPicker *picker, python::object distFunc,
|
||||
int poolSize, int pickSize,
|
||||
python::object firstPicks, int seed,
|
||||
python::object useCache) {
|
||||
if (useCache != python::object()) {
|
||||
BOOST_LOG(rdWarningLog) << "the useCache argument is deprecated and ignored"
|
||||
<< std::endl;
|
||||
}
|
||||
pyobjFunctor functor(distFunc);
|
||||
RDKit::INT_VECT res;
|
||||
pyobjFunctor functor(distFunc, useCache);
|
||||
res = picker->lazyPick(functor, poolSize, pickSize, firstPickVect, seed);
|
||||
double threshold = -1.;
|
||||
LazyMaxMinHelper(picker, functor, poolSize, pickSize, firstPicks, seed, res,
|
||||
threshold);
|
||||
return res;
|
||||
}
|
||||
python::tuple LazyMaxMinPicksWithThreshold(
|
||||
MaxMinPicker *picker, python::object distFunc, int poolSize, int pickSize,
|
||||
double threshold, python::object firstPicks, int seed) {
|
||||
pyobjFunctor functor(distFunc);
|
||||
RDKit::INT_VECT res;
|
||||
LazyMaxMinHelper(picker, functor, poolSize, pickSize, firstPicks, seed, res,
|
||||
threshold);
|
||||
return python::make_tuple(res, threshold);
|
||||
}
|
||||
|
||||
// NOTE: TANIMOTO and DICE provably return the same results for the diversity
|
||||
// picking
|
||||
// this is still here just in case we ever later want to support other
|
||||
// picking this is still here just in case we ever later want to support other
|
||||
// methods.
|
||||
typedef enum { TANIMOTO = 1, DICE } DistanceMethod;
|
||||
|
||||
template <typename BV>
|
||||
class pyBVFunctor {
|
||||
public:
|
||||
pyBVFunctor(const std::vector<const BV *> &obj, DistanceMethod method,
|
||||
bool useCache)
|
||||
: d_obj(obj), d_method(method), dp_cache(nullptr) {
|
||||
if (useCache)
|
||||
dp_cache = new std::map<std::pair<unsigned int, unsigned int>, double>();
|
||||
}
|
||||
~pyBVFunctor() { delete dp_cache; }
|
||||
pyBVFunctor(const std::vector<const BV *> &obj, DistanceMethod method)
|
||||
: d_obj(obj), d_method(method) {}
|
||||
~pyBVFunctor() {}
|
||||
double operator()(unsigned int i, unsigned int j) {
|
||||
double res = 0.0;
|
||||
std::pair<unsigned int, unsigned int> idxPair(i, j);
|
||||
if (dp_cache && dp_cache->count(idxPair) > 0) {
|
||||
res = (*dp_cache)[idxPair];
|
||||
} else {
|
||||
switch (d_method) {
|
||||
case TANIMOTO:
|
||||
res = 1. - TanimotoSimilarity(*d_obj[i], *d_obj[j]);
|
||||
break;
|
||||
case DICE:
|
||||
res = 1. - DiceSimilarity(*d_obj[i], *d_obj[j]);
|
||||
break;
|
||||
default:
|
||||
throw_value_error("unsupported similarity value");
|
||||
}
|
||||
if (dp_cache) {
|
||||
(*dp_cache)[idxPair] = res;
|
||||
}
|
||||
switch (d_method) {
|
||||
case TANIMOTO:
|
||||
res = 1. - TanimotoSimilarity(*d_obj[i], *d_obj[j]);
|
||||
break;
|
||||
case DICE:
|
||||
res = 1. - DiceSimilarity(*d_obj[i], *d_obj[j]);
|
||||
break;
|
||||
default:
|
||||
throw_value_error("unsupported similarity value");
|
||||
}
|
||||
return res;
|
||||
}
|
||||
@@ -138,27 +136,44 @@ class pyBVFunctor {
|
||||
private:
|
||||
const std::vector<const BV *> &d_obj;
|
||||
DistanceMethod d_method;
|
||||
std::map<std::pair<unsigned int, unsigned int>, double> *dp_cache;
|
||||
};
|
||||
|
||||
RDKit::INT_VECT LazyVectorMaxMinPicks(MaxMinPicker *picker, python::object objs,
|
||||
int poolSize, int pickSize,
|
||||
python::object firstPicks, int seed,
|
||||
bool useCache) {
|
||||
python::object useCache) {
|
||||
if (useCache != python::object()) {
|
||||
BOOST_LOG(rdWarningLog) << "the useCache argument is deprecated and ignored"
|
||||
<< std::endl;
|
||||
}
|
||||
std::vector<const ExplicitBitVect *> bvs(poolSize);
|
||||
for (int i = 0; i < poolSize; ++i) {
|
||||
bvs[i] = python::extract<const ExplicitBitVect *>(objs[i]);
|
||||
}
|
||||
pyBVFunctor<ExplicitBitVect> functor(bvs, TANIMOTO, useCache);
|
||||
RDKit::INT_VECT firstPickVect;
|
||||
for (unsigned int i = 0;
|
||||
i < python::extract<unsigned int>(firstPicks.attr("__len__")()); ++i) {
|
||||
firstPickVect.push_back(python::extract<int>(firstPicks[i]));
|
||||
}
|
||||
RDKit::INT_VECT res =
|
||||
picker->lazyPick(functor, poolSize, pickSize, firstPickVect, seed);
|
||||
pyBVFunctor<ExplicitBitVect> functor(bvs, TANIMOTO);
|
||||
|
||||
RDKit::INT_VECT res;
|
||||
double threshold = -1.;
|
||||
LazyMaxMinHelper(picker, functor, poolSize, pickSize, firstPicks, seed, res,
|
||||
threshold);
|
||||
return res;
|
||||
}
|
||||
|
||||
python::tuple LazyVectorMaxMinPicksWithThreshold(
|
||||
MaxMinPicker *picker, python::object objs, int poolSize, int pickSize,
|
||||
double threshold, python::object firstPicks, int seed) {
|
||||
std::vector<const ExplicitBitVect *> bvs(poolSize);
|
||||
for (int i = 0; i < poolSize; ++i) {
|
||||
bvs[i] = python::extract<const ExplicitBitVect *>(objs[i]);
|
||||
}
|
||||
pyBVFunctor<ExplicitBitVect> functor(bvs, TANIMOTO);
|
||||
|
||||
RDKit::INT_VECT res;
|
||||
LazyMaxMinHelper(picker, functor, poolSize, pickSize, firstPicks, seed, res,
|
||||
threshold);
|
||||
return python::make_tuple(res, threshold);
|
||||
}
|
||||
|
||||
} // end of namespace RDPickers
|
||||
|
||||
struct MaxMin_wrap {
|
||||
@@ -188,7 +203,8 @@ struct MaxMin_wrap {
|
||||
(python::arg("self"), python::arg("distFunc"),
|
||||
python::arg("poolSize"), python::arg("pickSize"),
|
||||
python::arg("firstPicks") = python::tuple(),
|
||||
python::arg("seed") = -1, python::arg("useCache") = true),
|
||||
python::arg("seed") = -1,
|
||||
python::arg("useCache") = python::object()),
|
||||
"Pick a subset of items from a pool of items using the MaxMin "
|
||||
"Algorithm\n"
|
||||
"Ashton, M. et. al., Quant. Struct.-Act. Relat., 21 (2002), "
|
||||
@@ -206,14 +222,13 @@ struct MaxMin_wrap {
|
||||
" - firstPicks: (optional) the first items to be picked (seeds "
|
||||
"the list)\n"
|
||||
" - seed: (optional) seed for the random number generator\n"
|
||||
" - useCache: (optional) toggles use of a cache for the distance "
|
||||
"calculation\n"
|
||||
" This trades memory usage for speed.\n")
|
||||
" - useCache: IGNORED\n")
|
||||
.def("LazyBitVectorPick", RDPickers::LazyVectorMaxMinPicks,
|
||||
(python::arg("self"), python::arg("objects"),
|
||||
python::arg("poolSize"), python::arg("pickSize"),
|
||||
python::arg("firstPicks") = python::tuple(),
|
||||
python::arg("seed") = -1, python::arg("useCache") = true),
|
||||
python::arg("seed") = -1,
|
||||
python::arg("useCache") = python::object()),
|
||||
"Pick a subset of items from a pool of bit vectors using the "
|
||||
"MaxMin Algorithm\n"
|
||||
"Ashton, M. et. al., Quant. Struct.-Act. Relat., 21 (2002), "
|
||||
@@ -226,11 +241,54 @@ struct MaxMin_wrap {
|
||||
" - firstPicks: (optional) the first items to be picked (seeds "
|
||||
"the list)\n"
|
||||
" - seed: (optional) seed for the random number generator\n"
|
||||
" - useCache: (optional) toggles use of a cache for the distance "
|
||||
"calculation\n"
|
||||
" This trades memory usage for speed.\n"
|
||||
" - useCache: IGNORED.\n")
|
||||
|
||||
);
|
||||
.def("LazyPickWithThreshold", RDPickers::LazyMaxMinPicksWithThreshold,
|
||||
(python::arg("self"), python::arg("distFunc"),
|
||||
python::arg("poolSize"), python::arg("pickSize"),
|
||||
python::arg("threshold"),
|
||||
python::arg("firstPicks") = python::tuple(),
|
||||
python::arg("seed") = -1),
|
||||
"Pick a subset of items from a pool of items using the MaxMin "
|
||||
"Algorithm\n"
|
||||
"Ashton, M. et. al., Quant. Struct.-Act. Relat., 21 (2002), "
|
||||
"598-604 \n"
|
||||
"ARGUMENTS:\n\n"
|
||||
" - distFunc: a function that should take two indices and return "
|
||||
"the\n"
|
||||
" distance between those two points.\n"
|
||||
" NOTE: the implementation caches distance values, "
|
||||
"so the\n"
|
||||
" client code does not need to do so; indeed, it "
|
||||
"should not.\n"
|
||||
" - poolSize: number of items in the pool\n"
|
||||
" - pickSize: number of items to pick from the pool\n"
|
||||
" - threshold: stop picking when the distance goes below this "
|
||||
"value\n"
|
||||
" - firstPicks: (optional) the first items to be picked (seeds "
|
||||
"the list)\n"
|
||||
" - seed: (optional) seed for the random number generator\n")
|
||||
.def("LazyBitVectorPickWithThreshold",
|
||||
RDPickers::LazyVectorMaxMinPicksWithThreshold,
|
||||
(python::arg("self"), python::arg("objects"),
|
||||
python::arg("poolSize"), python::arg("pickSize"),
|
||||
python::arg("threshold"),
|
||||
python::arg("firstPicks") = python::tuple(),
|
||||
python::arg("seed") = -1),
|
||||
"Pick a subset of items from a pool of bit vectors using the "
|
||||
"MaxMin Algorithm\n"
|
||||
"Ashton, M. et. al., Quant. Struct.-Act. Relat., 21 (2002), "
|
||||
"598-604 \n"
|
||||
"ARGUMENTS:\n\n"
|
||||
" - vectors: a sequence of the bit vectors that should be picked "
|
||||
"from.\n"
|
||||
" - poolSize: number of items in the pool\n"
|
||||
" - pickSize: number of items to pick from the pool\n"
|
||||
" - threshold: stop picking when the distance goes below this "
|
||||
"value\n"
|
||||
" - firstPicks: (optional) the first items to be picked (seeds "
|
||||
"the list)\n"
|
||||
" - seed: (optional) seed for the random number generator\n");
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@@ -222,6 +222,43 @@ class TestCase(unittest.TestCase):
|
||||
self.assertEqual(len(mm2), N)
|
||||
self.assertEqual(list(mm1), list(mm2))
|
||||
|
||||
def testBitVectorMaxMin3(self):
|
||||
fname = os.path.join(RDConfig.RDBaseDir, 'Code', 'SimDivPickers', 'Wrap', 'test_data',
|
||||
'chembl_cyps.head.fps')
|
||||
fps = []
|
||||
with open(fname) as infil:
|
||||
for line in infil:
|
||||
fp = DataStructs.CreateFromFPSText(line.strip())
|
||||
fps.append(fp)
|
||||
mmp =rdSimDivPickers.MaxMinPicker()
|
||||
ids=list(mmp.LazyBitVectorPick(fps,len(fps),20))
|
||||
self.assertEqual(ids,[374,720,690,339,875,842,404,725,120,385,115,868,630,\
|
||||
881,516,497,412,718,869,407])
|
||||
|
||||
ids=list(mmp.LazyBitVectorPick(fps,len(fps),20,firstPicks=[374,720,690,339,875]))
|
||||
self.assertEqual(ids,[374,720,690,339,875,842,404,725,120,385,115,868,630,\
|
||||
881,516,497,412,718,869,407])
|
||||
|
||||
def testBitVectorMaxMin4(self):
|
||||
# threshold tests
|
||||
fname = os.path.join(RDConfig.RDBaseDir, 'Code', 'SimDivPickers', 'Wrap', 'test_data',
|
||||
'chembl_cyps.head.fps')
|
||||
fps = []
|
||||
with open(fname) as infil:
|
||||
for line in infil:
|
||||
fp = DataStructs.CreateFromFPSText(line.strip())
|
||||
fps.append(fp)
|
||||
mmp =rdSimDivPickers.MaxMinPicker()
|
||||
ids,threshold=mmp.LazyBitVectorPickWithThreshold(fps,len(fps),20,-1.0)
|
||||
self.assertEqual(list(ids),[374,720,690,339,875,842,404,725,120,385,115,868,630,\
|
||||
881,516,497,412,718,869,407])
|
||||
|
||||
self.assertAlmostEqual(threshold,0.8977,4)
|
||||
|
||||
ids,threshold=mmp.LazyBitVectorPickWithThreshold(fps,len(fps),20,0.91)
|
||||
self.assertEqual(list(ids),[374,720,690,339,875,842,404,725,120,385,115,868,630])
|
||||
self.assertTrue(threshold>=0.91)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
1000
Code/SimDivPickers/Wrap/test_data/chembl_cyps.head.fps
Normal file
1000
Code/SimDivPickers/Wrap/test_data/chembl_cyps.head.fps
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user