mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
* not yet done * update docs, python tests, and the release notes * updates in response to review
This commit is contained in:
committed by
Brian Kelley
parent
fe3096cffa
commit
4d14a819e6
@@ -20,6 +20,7 @@
|
||||
#include <cstdlib>
|
||||
#include "DistPicker.h"
|
||||
#include <boost/random.hpp>
|
||||
#include <random>
|
||||
|
||||
namespace RDPickers {
|
||||
|
||||
@@ -34,7 +35,7 @@ class RDKIT_SIMDIVPICKERS_EXPORT distmatFunctor {
|
||||
private:
|
||||
const double *dp_distMat;
|
||||
};
|
||||
}
|
||||
} // namespace
|
||||
|
||||
/*! \brief Implements the MaxMin algorithm for picking a subset of item from a
|
||||
*pool
|
||||
@@ -66,7 +67,9 @@ class RDKIT_SIMDIVPICKERS_EXPORT MaxMinPicker : public DistPicker {
|
||||
* poolSize*(poolSize-1)
|
||||
* \param pickSize - the number items to pick from pool (<= poolSize)
|
||||
* \param firstPicks - (optional)the first items in the pick list
|
||||
* \param seed - (optional) seed for the random number generator
|
||||
* \param seed - (optional) seed for the random number generator.
|
||||
* If this is <0 the generator will be seeded with a
|
||||
* random number.
|
||||
*/
|
||||
template <typename T>
|
||||
RDKit::INT_VECT lazyPick(T &func, unsigned int poolSize,
|
||||
@@ -117,7 +120,9 @@ class RDKIT_SIMDIVPICKERS_EXPORT MaxMinPicker : public DistPicker {
|
||||
* \param pickSize - the number items to pick from pool (<= poolSize)
|
||||
* \param firstPicks - indices of the items used to seed the pick set.
|
||||
* \param seed - (optional) seed for the random number generator
|
||||
*/
|
||||
* If this is <0 the generator will be seeded with a
|
||||
* random number.
|
||||
*/
|
||||
RDKit::INT_VECT pick(const double *distMat, unsigned int poolSize,
|
||||
unsigned int pickSize, RDKit::INT_VECT firstPicks,
|
||||
int seed = -1) const {
|
||||
@@ -175,11 +180,14 @@ RDKit::INT_VECT MaxMinPicker::lazyPick(T &func, unsigned int poolSize,
|
||||
typedef boost::mt19937 rng_type;
|
||||
typedef boost::uniform_int<> distrib_type;
|
||||
typedef boost::variate_generator<rng_type &, distrib_type> source_type;
|
||||
rng_type generator(42u);
|
||||
rng_type generator;
|
||||
distrib_type dist(0, poolSize - 1);
|
||||
if (seed >= 0) {
|
||||
generator.seed(static_cast<rng_type::result_type>(seed));
|
||||
} else {
|
||||
generator.seed(std::random_device()());
|
||||
}
|
||||
source_type randomSource(generator, dist);
|
||||
if (seed > 0) generator.seed(static_cast<rng_type::result_type>(seed));
|
||||
|
||||
pick = randomSource();
|
||||
// add the pick to the picks
|
||||
picks.push_back(pick);
|
||||
@@ -290,11 +298,12 @@ RDKit::INT_VECT MaxMinPicker::lazyPick(T &func, unsigned int poolSize,
|
||||
template <typename T>
|
||||
RDKit::INT_VECT MaxMinPicker::lazyPick(T &func, unsigned int poolSize,
|
||||
unsigned int pickSize) const {
|
||||
RDKit::INT_LIST firstPicks;
|
||||
RDKit::INT_VECT firstPicks;
|
||||
double threshold = -1.0;
|
||||
return MaxMinPicker::lazyPick(func, poolSize, pickSize, firstPicks, -1,
|
||||
int seed = -1;
|
||||
return MaxMinPicker::lazyPick(func, poolSize, pickSize, firstPicks, seed,
|
||||
threshold);
|
||||
}
|
||||
};
|
||||
}; // namespace RDPickers
|
||||
|
||||
#endif
|
||||
|
||||
@@ -129,7 +129,7 @@ class TestCase(unittest.TestCase):
|
||||
picker = rdSimDivPickers.MaxMinPicker()
|
||||
mm2 = picker.LazyBitVectorPick(vs, len(vs), N)
|
||||
self.assertEqual(len(mm2), N)
|
||||
self.assertEqual(tuple(mm2), tuple(mm1))
|
||||
self.assertNotEqual(tuple(mm2), tuple(mm1))
|
||||
picker = None
|
||||
|
||||
ds = []
|
||||
@@ -161,18 +161,18 @@ class TestCase(unittest.TestCase):
|
||||
return d
|
||||
|
||||
picker = rdSimDivPickers.MaxMinPicker()
|
||||
mm1 = picker.LazyPick(func, len(vs), N)
|
||||
mm1 = picker.LazyPick(func, len(vs), N, seed=42)
|
||||
self.assertEqual(len(mm1), N)
|
||||
|
||||
mm2 = picker.LazyPick(func, len(vs), N, useCache=False)
|
||||
mm2 = picker.LazyPick(func, len(vs), N, useCache=False, seed=42)
|
||||
self.assertEqual(len(mm2), N)
|
||||
self.assertEqual(list(mm1), list(mm2))
|
||||
|
||||
mm2 = picker.LazyBitVectorPick(vs, len(vs), N)
|
||||
mm2 = picker.LazyBitVectorPick(vs, len(vs), N, seed=42)
|
||||
self.assertEqual(len(mm2), N)
|
||||
self.assertEqual(list(mm1), list(mm2))
|
||||
|
||||
mm2 = picker.LazyBitVectorPick(vs, len(vs), N, useCache=False)
|
||||
mm2 = picker.LazyBitVectorPick(vs, len(vs), N, useCache=False, seed=42)
|
||||
self.assertEqual(len(mm2), N)
|
||||
self.assertEqual(list(mm1), list(mm2))
|
||||
|
||||
@@ -214,11 +214,11 @@ class TestCase(unittest.TestCase):
|
||||
N = 5
|
||||
fps = [DataStructs.CreateFromBitString(x) for x in fps]
|
||||
picker = rdSimDivPickers.MaxMinPicker()
|
||||
mm1 = picker.LazyBitVectorPick(fps, len(fps), N)
|
||||
mm1 = picker.LazyBitVectorPick(fps, len(fps), N, seed=42)
|
||||
self.assertEqual(len(mm1), N)
|
||||
self.assertEqual(list(mm1), [37, 1, 43, 38, 16])
|
||||
|
||||
mm2 = picker.LazyBitVectorPick(fps, len(fps), N, useCache=False)
|
||||
mm2 = picker.LazyBitVectorPick(fps, len(fps), N, useCache=False, seed=42)
|
||||
self.assertEqual(len(mm2), N)
|
||||
self.assertEqual(list(mm1), list(mm2))
|
||||
|
||||
@@ -231,11 +231,11 @@ class TestCase(unittest.TestCase):
|
||||
fp = DataStructs.CreateFromFPSText(line.strip())
|
||||
fps.append(fp)
|
||||
mmp =rdSimDivPickers.MaxMinPicker()
|
||||
ids=list(mmp.LazyBitVectorPick(fps,len(fps),20))
|
||||
ids=list(mmp.LazyBitVectorPick(fps,len(fps),20,seed=42))
|
||||
self.assertEqual(ids,[374,720,690,339,875,842,404,725,120,385,115,868,630,\
|
||||
881,516,497,412,718,869,407])
|
||||
|
||||
ids=list(mmp.LazyBitVectorPick(fps,len(fps),20,firstPicks=[374,720,690,339,875]))
|
||||
ids=list(mmp.LazyBitVectorPick(fps,len(fps),20,firstPicks=[374,720,690,339,875],seed=42))
|
||||
self.assertEqual(ids,[374,720,690,339,875,842,404,725,120,385,115,868,630,\
|
||||
881,516,497,412,718,869,407])
|
||||
|
||||
@@ -249,13 +249,13 @@ class TestCase(unittest.TestCase):
|
||||
fp = DataStructs.CreateFromFPSText(line.strip())
|
||||
fps.append(fp)
|
||||
mmp =rdSimDivPickers.MaxMinPicker()
|
||||
ids,threshold=mmp.LazyBitVectorPickWithThreshold(fps,len(fps),20,-1.0)
|
||||
ids,threshold=mmp.LazyBitVectorPickWithThreshold(fps,len(fps),20,-1.0,seed=42)
|
||||
self.assertEqual(list(ids),[374,720,690,339,875,842,404,725,120,385,115,868,630,\
|
||||
881,516,497,412,718,869,407])
|
||||
|
||||
self.assertAlmostEqual(threshold,0.8977,4)
|
||||
|
||||
ids,threshold=mmp.LazyBitVectorPickWithThreshold(fps,len(fps),20,0.91)
|
||||
ids,threshold=mmp.LazyBitVectorPickWithThreshold(fps,len(fps),20,0.91,seed=42)
|
||||
self.assertEqual(list(ids),[374,720,690,339,875,842,404,725,120,385,115,868,630])
|
||||
self.assertTrue(threshold>=0.91)
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ namespace {
|
||||
double dist_on_line(unsigned int i, unsigned int j) {
|
||||
return std::fabs((double)i - (double)j);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
void testGithub1421() {
|
||||
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
|
||||
BOOST_LOG(rdErrorLog)
|
||||
@@ -32,8 +32,40 @@ void testGithub1421() {
|
||||
BOOST_LOG(rdErrorLog) << "Done" << std::endl;
|
||||
}
|
||||
|
||||
void testGithub2245() {
|
||||
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
|
||||
BOOST_LOG(rdErrorLog) << "Testing github issue 2245: MinMax Diversity picker "
|
||||
"seeding shows deterministic / non-random behaviour."
|
||||
<< std::endl;
|
||||
{
|
||||
RDPickers::MaxMinPicker pkr;
|
||||
int poolSz = 1000;
|
||||
auto picks1 = pkr.lazyPick(dist_on_line, poolSz, 10, RDKit::INT_VECT(), -1);
|
||||
auto picks2 = pkr.lazyPick(dist_on_line, poolSz, 10, RDKit::INT_VECT(), -1);
|
||||
TEST_ASSERT(picks1 != picks2);
|
||||
}
|
||||
{ // make sure the default is also random
|
||||
RDPickers::MaxMinPicker pkr;
|
||||
int poolSz = 1000;
|
||||
auto picks1 = pkr.lazyPick(dist_on_line, poolSz, 10);
|
||||
auto picks2 = pkr.lazyPick(dist_on_line, poolSz, 10);
|
||||
TEST_ASSERT(picks1 != picks2);
|
||||
}
|
||||
{ // and we're still reproducible when we want to be
|
||||
RDPickers::MaxMinPicker pkr;
|
||||
int poolSz = 1000;
|
||||
auto picks1 =
|
||||
pkr.lazyPick(dist_on_line, poolSz, 10, RDKit::INT_VECT(), 0xf00d);
|
||||
auto picks2 =
|
||||
pkr.lazyPick(dist_on_line, poolSz, 10, RDKit::INT_VECT(), 0xf00d);
|
||||
TEST_ASSERT(picks1 == picks2);
|
||||
}
|
||||
BOOST_LOG(rdErrorLog) << "Done" << std::endl;
|
||||
}
|
||||
|
||||
int main() {
|
||||
RDLog::InitLogs();
|
||||
testGithub1421();
|
||||
testGithub2245();
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user