From 4d14a819e605a341be256902f3112aba9228651d Mon Sep 17 00:00:00 2001 From: Greg Landrum Date: Thu, 31 Jan 2019 06:03:39 +0100 Subject: [PATCH] Fixes #2245 (#2250) * not yet done * update docs, python tests, and the release notes * updates in response to review --- Code/GraphMol/FileParsers/FileParsers.h | 2 +- .../org/RDKit/DiversityPickerTests.java | 16 +++++++-- Code/SimDivPickers/MaxMinPicker.h | 27 ++++++++++----- Code/SimDivPickers/Wrap/testPickers.py | 22 ++++++------ Code/SimDivPickers/testPickers.cpp | 34 ++++++++++++++++++- ReleaseNotes.md | 5 +++ 6 files changed, 82 insertions(+), 24 deletions(-) diff --git a/Code/GraphMol/FileParsers/FileParsers.h b/Code/GraphMol/FileParsers/FileParsers.h index ca5d4dbe3..576575b03 100644 --- a/Code/GraphMol/FileParsers/FileParsers.h +++ b/Code/GraphMol/FileParsers/FileParsers.h @@ -34,7 +34,7 @@ class MolFileUnhandledFeatureException : public std::exception { : _msg(msg){}; //! get the error message const char *message() const { return _msg.c_str(); }; - ~MolFileUnhandledFeatureException() throw() override{}; + ~MolFileUnhandledFeatureException() noexcept override{}; private: std::string _msg; diff --git a/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/DiversityPickerTests.java b/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/DiversityPickerTests.java index 3ae245f62..7d046343d 100644 --- a/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/DiversityPickerTests.java +++ b/Code/JavaWrappers/gmwrapper/src-test/org/RDKit/DiversityPickerTests.java @@ -131,7 +131,7 @@ public class DiversityPickerTests extends GraphMolTest { @Test public void test1() { - Int_Vect picks1 = RDKFuncs.pickUsingFingerprints(fps,5); + Int_Vect picks1 = RDKFuncs.pickUsingFingerprints(fps,5,42); assertEquals(picks1.size(),5); assertEquals(picks1.get(0),37); assertEquals(picks1.get(1),1); @@ -146,10 +146,22 @@ public class DiversityPickerTests extends GraphMolTest { Int_Vect picks2 = RDKFuncs.pickUsingFingerprints(fps,5,0,avoid,true); assertEquals(picks1.size(),5); assertEquals(picks2.size(),5); - for(int i=0;i #include "DistPicker.h" #include +#include namespace RDPickers { @@ -34,7 +35,7 @@ class RDKIT_SIMDIVPICKERS_EXPORT distmatFunctor { private: const double *dp_distMat; }; -} +} // namespace /*! \brief Implements the MaxMin algorithm for picking a subset of item from a *pool @@ -66,7 +67,9 @@ class RDKIT_SIMDIVPICKERS_EXPORT MaxMinPicker : public DistPicker { * poolSize*(poolSize-1) * \param pickSize - the number items to pick from pool (<= poolSize) * \param firstPicks - (optional)the first items in the pick list - * \param seed - (optional) seed for the random number generator + * \param seed - (optional) seed for the random number generator. + * If this is <0 the generator will be seeded with a + * random number. */ template RDKit::INT_VECT lazyPick(T &func, unsigned int poolSize, @@ -117,7 +120,9 @@ class RDKIT_SIMDIVPICKERS_EXPORT MaxMinPicker : public DistPicker { * \param pickSize - the number items to pick from pool (<= poolSize) * \param firstPicks - indices of the items used to seed the pick set. * \param seed - (optional) seed for the random number generator - */ + * If this is <0 the generator will be seeded with a + * random number. + */ RDKit::INT_VECT pick(const double *distMat, unsigned int poolSize, unsigned int pickSize, RDKit::INT_VECT firstPicks, int seed = -1) const { @@ -175,11 +180,14 @@ RDKit::INT_VECT MaxMinPicker::lazyPick(T &func, unsigned int poolSize, typedef boost::mt19937 rng_type; typedef boost::uniform_int<> distrib_type; typedef boost::variate_generator source_type; - rng_type generator(42u); + rng_type generator; distrib_type dist(0, poolSize - 1); + if (seed >= 0) { + generator.seed(static_cast(seed)); + } else { + generator.seed(std::random_device()()); + } source_type randomSource(generator, dist); - if (seed > 0) generator.seed(static_cast(seed)); - pick = randomSource(); // add the pick to the picks picks.push_back(pick); @@ -290,11 +298,12 @@ RDKit::INT_VECT MaxMinPicker::lazyPick(T &func, unsigned int poolSize, template RDKit::INT_VECT MaxMinPicker::lazyPick(T &func, unsigned int poolSize, unsigned int pickSize) const { - RDKit::INT_LIST firstPicks; + RDKit::INT_VECT firstPicks; double threshold = -1.0; - return MaxMinPicker::lazyPick(func, poolSize, pickSize, firstPicks, -1, + int seed = -1; + return MaxMinPicker::lazyPick(func, poolSize, pickSize, firstPicks, seed, threshold); } -}; +}; // namespace RDPickers #endif diff --git a/Code/SimDivPickers/Wrap/testPickers.py b/Code/SimDivPickers/Wrap/testPickers.py index 8fffb67b2..dfef1924e 100755 --- a/Code/SimDivPickers/Wrap/testPickers.py +++ b/Code/SimDivPickers/Wrap/testPickers.py @@ -129,7 +129,7 @@ class TestCase(unittest.TestCase): picker = rdSimDivPickers.MaxMinPicker() mm2 = picker.LazyBitVectorPick(vs, len(vs), N) self.assertEqual(len(mm2), N) - self.assertEqual(tuple(mm2), tuple(mm1)) + self.assertNotEqual(tuple(mm2), tuple(mm1)) picker = None ds = [] @@ -161,18 +161,18 @@ class TestCase(unittest.TestCase): return d picker = rdSimDivPickers.MaxMinPicker() - mm1 = picker.LazyPick(func, len(vs), N) + mm1 = picker.LazyPick(func, len(vs), N, seed=42) self.assertEqual(len(mm1), N) - mm2 = picker.LazyPick(func, len(vs), N, useCache=False) + mm2 = picker.LazyPick(func, len(vs), N, useCache=False, seed=42) self.assertEqual(len(mm2), N) self.assertEqual(list(mm1), list(mm2)) - mm2 = picker.LazyBitVectorPick(vs, len(vs), N) + mm2 = picker.LazyBitVectorPick(vs, len(vs), N, seed=42) self.assertEqual(len(mm2), N) self.assertEqual(list(mm1), list(mm2)) - mm2 = picker.LazyBitVectorPick(vs, len(vs), N, useCache=False) + mm2 = picker.LazyBitVectorPick(vs, len(vs), N, useCache=False, seed=42) self.assertEqual(len(mm2), N) self.assertEqual(list(mm1), list(mm2)) @@ -214,11 +214,11 @@ class TestCase(unittest.TestCase): N = 5 fps = [DataStructs.CreateFromBitString(x) for x in fps] picker = rdSimDivPickers.MaxMinPicker() - mm1 = picker.LazyBitVectorPick(fps, len(fps), N) + mm1 = picker.LazyBitVectorPick(fps, len(fps), N, seed=42) self.assertEqual(len(mm1), N) self.assertEqual(list(mm1), [37, 1, 43, 38, 16]) - mm2 = picker.LazyBitVectorPick(fps, len(fps), N, useCache=False) + mm2 = picker.LazyBitVectorPick(fps, len(fps), N, useCache=False, seed=42) self.assertEqual(len(mm2), N) self.assertEqual(list(mm1), list(mm2)) @@ -231,11 +231,11 @@ class TestCase(unittest.TestCase): fp = DataStructs.CreateFromFPSText(line.strip()) fps.append(fp) mmp =rdSimDivPickers.MaxMinPicker() - ids=list(mmp.LazyBitVectorPick(fps,len(fps),20)) + ids=list(mmp.LazyBitVectorPick(fps,len(fps),20,seed=42)) self.assertEqual(ids,[374,720,690,339,875,842,404,725,120,385,115,868,630,\ 881,516,497,412,718,869,407]) - ids=list(mmp.LazyBitVectorPick(fps,len(fps),20,firstPicks=[374,720,690,339,875])) + ids=list(mmp.LazyBitVectorPick(fps,len(fps),20,firstPicks=[374,720,690,339,875],seed=42)) self.assertEqual(ids,[374,720,690,339,875,842,404,725,120,385,115,868,630,\ 881,516,497,412,718,869,407]) @@ -249,13 +249,13 @@ class TestCase(unittest.TestCase): fp = DataStructs.CreateFromFPSText(line.strip()) fps.append(fp) mmp =rdSimDivPickers.MaxMinPicker() - ids,threshold=mmp.LazyBitVectorPickWithThreshold(fps,len(fps),20,-1.0) + ids,threshold=mmp.LazyBitVectorPickWithThreshold(fps,len(fps),20,-1.0,seed=42) self.assertEqual(list(ids),[374,720,690,339,875,842,404,725,120,385,115,868,630,\ 881,516,497,412,718,869,407]) self.assertAlmostEqual(threshold,0.8977,4) - ids,threshold=mmp.LazyBitVectorPickWithThreshold(fps,len(fps),20,0.91) + ids,threshold=mmp.LazyBitVectorPickWithThreshold(fps,len(fps),20,0.91,seed=42) self.assertEqual(list(ids),[374,720,690,339,875,842,404,725,120,385,115,868,630]) self.assertTrue(threshold>=0.91) diff --git a/Code/SimDivPickers/testPickers.cpp b/Code/SimDivPickers/testPickers.cpp index 63b1a2bc4..a6e5e2cb5 100644 --- a/Code/SimDivPickers/testPickers.cpp +++ b/Code/SimDivPickers/testPickers.cpp @@ -18,7 +18,7 @@ namespace { double dist_on_line(unsigned int i, unsigned int j) { return std::fabs((double)i - (double)j); } -} +} // namespace void testGithub1421() { BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl; BOOST_LOG(rdErrorLog) @@ -32,8 +32,40 @@ void testGithub1421() { BOOST_LOG(rdErrorLog) << "Done" << std::endl; } +void testGithub2245() { + BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl; + BOOST_LOG(rdErrorLog) << "Testing github issue 2245: MinMax Diversity picker " + "seeding shows deterministic / non-random behaviour." + << std::endl; + { + RDPickers::MaxMinPicker pkr; + int poolSz = 1000; + auto picks1 = pkr.lazyPick(dist_on_line, poolSz, 10, RDKit::INT_VECT(), -1); + auto picks2 = pkr.lazyPick(dist_on_line, poolSz, 10, RDKit::INT_VECT(), -1); + TEST_ASSERT(picks1 != picks2); + } + { // make sure the default is also random + RDPickers::MaxMinPicker pkr; + int poolSz = 1000; + auto picks1 = pkr.lazyPick(dist_on_line, poolSz, 10); + auto picks2 = pkr.lazyPick(dist_on_line, poolSz, 10); + TEST_ASSERT(picks1 != picks2); + } + { // and we're still reproducible when we want to be + RDPickers::MaxMinPicker pkr; + int poolSz = 1000; + auto picks1 = + pkr.lazyPick(dist_on_line, poolSz, 10, RDKit::INT_VECT(), 0xf00d); + auto picks2 = + pkr.lazyPick(dist_on_line, poolSz, 10, RDKit::INT_VECT(), 0xf00d); + TEST_ASSERT(picks1 == picks2); + } + BOOST_LOG(rdErrorLog) << "Done" << std::endl; +} + int main() { RDLog::InitLogs(); testGithub1421(); + testGithub2245(); return 0; } diff --git a/ReleaseNotes.md b/ReleaseNotes.md index b5522c004..907774c14 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -6,6 +6,11 @@ this rdkit-discuss post to learn what your options are if you need to keep using Python 2: https://www.mail-archive.com/rdkit-discuss@lists.sourceforge.net/msg08354.html +## Backwards incompatible changes +- The fix for github #2245 means that the default behavior of the MaxMinPicker is now truly random. + If you would like to reproduce the previous behavior, provide a seed value of 42. + + # Release_2018.09.1 (Changes relative to Release_2018.03.1)