* not yet done

* update docs, python tests, and the release notes

* updates in response to review
This commit is contained in:
Greg Landrum
2019-01-31 06:03:39 +01:00
committed by Brian Kelley
parent fe3096cffa
commit 4d14a819e6
6 changed files with 82 additions and 24 deletions

View File

@@ -20,6 +20,7 @@
#include <cstdlib>
#include "DistPicker.h"
#include <boost/random.hpp>
#include <random>
namespace RDPickers {
@@ -34,7 +35,7 @@ class RDKIT_SIMDIVPICKERS_EXPORT distmatFunctor {
private:
const double *dp_distMat;
};
}
} // namespace
/*! \brief Implements the MaxMin algorithm for picking a subset of item from a
*pool
@@ -66,7 +67,9 @@ class RDKIT_SIMDIVPICKERS_EXPORT MaxMinPicker : public DistPicker {
* poolSize*(poolSize-1)
* \param pickSize - the number items to pick from pool (<= poolSize)
* \param firstPicks - (optional)the first items in the pick list
* \param seed - (optional) seed for the random number generator
* \param seed - (optional) seed for the random number generator.
* If this is <0 the generator will be seeded with a
* random number.
*/
template <typename T>
RDKit::INT_VECT lazyPick(T &func, unsigned int poolSize,
@@ -117,7 +120,9 @@ class RDKIT_SIMDIVPICKERS_EXPORT MaxMinPicker : public DistPicker {
* \param pickSize - the number items to pick from pool (<= poolSize)
* \param firstPicks - indices of the items used to seed the pick set.
* \param seed - (optional) seed for the random number generator
*/
* If this is <0 the generator will be seeded with a
* random number.
*/
RDKit::INT_VECT pick(const double *distMat, unsigned int poolSize,
unsigned int pickSize, RDKit::INT_VECT firstPicks,
int seed = -1) const {
@@ -175,11 +180,14 @@ RDKit::INT_VECT MaxMinPicker::lazyPick(T &func, unsigned int poolSize,
typedef boost::mt19937 rng_type;
typedef boost::uniform_int<> distrib_type;
typedef boost::variate_generator<rng_type &, distrib_type> source_type;
rng_type generator(42u);
rng_type generator;
distrib_type dist(0, poolSize - 1);
if (seed >= 0) {
generator.seed(static_cast<rng_type::result_type>(seed));
} else {
generator.seed(std::random_device()());
}
source_type randomSource(generator, dist);
if (seed > 0) generator.seed(static_cast<rng_type::result_type>(seed));
pick = randomSource();
// add the pick to the picks
picks.push_back(pick);
@@ -290,11 +298,12 @@ RDKit::INT_VECT MaxMinPicker::lazyPick(T &func, unsigned int poolSize,
template <typename T>
RDKit::INT_VECT MaxMinPicker::lazyPick(T &func, unsigned int poolSize,
unsigned int pickSize) const {
RDKit::INT_LIST firstPicks;
RDKit::INT_VECT firstPicks;
double threshold = -1.0;
return MaxMinPicker::lazyPick(func, poolSize, pickSize, firstPicks, -1,
int seed = -1;
return MaxMinPicker::lazyPick(func, poolSize, pickSize, firstPicks, seed,
threshold);
}
};
}; // namespace RDPickers
#endif

View File

@@ -129,7 +129,7 @@ class TestCase(unittest.TestCase):
picker = rdSimDivPickers.MaxMinPicker()
mm2 = picker.LazyBitVectorPick(vs, len(vs), N)
self.assertEqual(len(mm2), N)
self.assertEqual(tuple(mm2), tuple(mm1))
self.assertNotEqual(tuple(mm2), tuple(mm1))
picker = None
ds = []
@@ -161,18 +161,18 @@ class TestCase(unittest.TestCase):
return d
picker = rdSimDivPickers.MaxMinPicker()
mm1 = picker.LazyPick(func, len(vs), N)
mm1 = picker.LazyPick(func, len(vs), N, seed=42)
self.assertEqual(len(mm1), N)
mm2 = picker.LazyPick(func, len(vs), N, useCache=False)
mm2 = picker.LazyPick(func, len(vs), N, useCache=False, seed=42)
self.assertEqual(len(mm2), N)
self.assertEqual(list(mm1), list(mm2))
mm2 = picker.LazyBitVectorPick(vs, len(vs), N)
mm2 = picker.LazyBitVectorPick(vs, len(vs), N, seed=42)
self.assertEqual(len(mm2), N)
self.assertEqual(list(mm1), list(mm2))
mm2 = picker.LazyBitVectorPick(vs, len(vs), N, useCache=False)
mm2 = picker.LazyBitVectorPick(vs, len(vs), N, useCache=False, seed=42)
self.assertEqual(len(mm2), N)
self.assertEqual(list(mm1), list(mm2))
@@ -214,11 +214,11 @@ class TestCase(unittest.TestCase):
N = 5
fps = [DataStructs.CreateFromBitString(x) for x in fps]
picker = rdSimDivPickers.MaxMinPicker()
mm1 = picker.LazyBitVectorPick(fps, len(fps), N)
mm1 = picker.LazyBitVectorPick(fps, len(fps), N, seed=42)
self.assertEqual(len(mm1), N)
self.assertEqual(list(mm1), [37, 1, 43, 38, 16])
mm2 = picker.LazyBitVectorPick(fps, len(fps), N, useCache=False)
mm2 = picker.LazyBitVectorPick(fps, len(fps), N, useCache=False, seed=42)
self.assertEqual(len(mm2), N)
self.assertEqual(list(mm1), list(mm2))
@@ -231,11 +231,11 @@ class TestCase(unittest.TestCase):
fp = DataStructs.CreateFromFPSText(line.strip())
fps.append(fp)
mmp =rdSimDivPickers.MaxMinPicker()
ids=list(mmp.LazyBitVectorPick(fps,len(fps),20))
ids=list(mmp.LazyBitVectorPick(fps,len(fps),20,seed=42))
self.assertEqual(ids,[374,720,690,339,875,842,404,725,120,385,115,868,630,\
881,516,497,412,718,869,407])
ids=list(mmp.LazyBitVectorPick(fps,len(fps),20,firstPicks=[374,720,690,339,875]))
ids=list(mmp.LazyBitVectorPick(fps,len(fps),20,firstPicks=[374,720,690,339,875],seed=42))
self.assertEqual(ids,[374,720,690,339,875,842,404,725,120,385,115,868,630,\
881,516,497,412,718,869,407])
@@ -249,13 +249,13 @@ class TestCase(unittest.TestCase):
fp = DataStructs.CreateFromFPSText(line.strip())
fps.append(fp)
mmp =rdSimDivPickers.MaxMinPicker()
ids,threshold=mmp.LazyBitVectorPickWithThreshold(fps,len(fps),20,-1.0)
ids,threshold=mmp.LazyBitVectorPickWithThreshold(fps,len(fps),20,-1.0,seed=42)
self.assertEqual(list(ids),[374,720,690,339,875,842,404,725,120,385,115,868,630,\
881,516,497,412,718,869,407])
self.assertAlmostEqual(threshold,0.8977,4)
ids,threshold=mmp.LazyBitVectorPickWithThreshold(fps,len(fps),20,0.91)
ids,threshold=mmp.LazyBitVectorPickWithThreshold(fps,len(fps),20,0.91,seed=42)
self.assertEqual(list(ids),[374,720,690,339,875,842,404,725,120,385,115,868,630])
self.assertTrue(threshold>=0.91)

View File

@@ -18,7 +18,7 @@ namespace {
double dist_on_line(unsigned int i, unsigned int j) {
return std::fabs((double)i - (double)j);
}
}
} // namespace
void testGithub1421() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog)
@@ -32,8 +32,40 @@ void testGithub1421() {
BOOST_LOG(rdErrorLog) << "Done" << std::endl;
}
void testGithub2245() {
BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdErrorLog) << "Testing github issue 2245: MinMax Diversity picker "
"seeding shows deterministic / non-random behaviour."
<< std::endl;
{
RDPickers::MaxMinPicker pkr;
int poolSz = 1000;
auto picks1 = pkr.lazyPick(dist_on_line, poolSz, 10, RDKit::INT_VECT(), -1);
auto picks2 = pkr.lazyPick(dist_on_line, poolSz, 10, RDKit::INT_VECT(), -1);
TEST_ASSERT(picks1 != picks2);
}
{ // make sure the default is also random
RDPickers::MaxMinPicker pkr;
int poolSz = 1000;
auto picks1 = pkr.lazyPick(dist_on_line, poolSz, 10);
auto picks2 = pkr.lazyPick(dist_on_line, poolSz, 10);
TEST_ASSERT(picks1 != picks2);
}
{ // and we're still reproducible when we want to be
RDPickers::MaxMinPicker pkr;
int poolSz = 1000;
auto picks1 =
pkr.lazyPick(dist_on_line, poolSz, 10, RDKit::INT_VECT(), 0xf00d);
auto picks2 =
pkr.lazyPick(dist_on_line, poolSz, 10, RDKit::INT_VECT(), 0xf00d);
TEST_ASSERT(picks1 == picks2);
}
BOOST_LOG(rdErrorLog) << "Done" << std::endl;
}
int main() {
RDLog::InitLogs();
testGithub1421();
testGithub2245();
return 0;
}