Files
rdkit/Code/ML/InfoTheory/Wrap/testRanker.py
Greg Landrum 5df8f93e27 Get things working with numpy 2.4 and pandas 3.0 (#9072)
* get BertzCT working with numpy 2.4

* test pass with with Pandas 3.0
(on windows at least)

* update testRanker too

* update nb test

* run win32 CI tests with different pandas versions
also updates boost version

* works with pandas 2.0?

* update linux_build_py311 -> linux_build_py312
test both old and new pandas

can't go higher with the python version yet because the older pandas and numpy are not available.

* doctest fix?

---------

Co-authored-by: = <=>
2026-02-04 12:06:21 +01:00

161 lines
4.3 KiB
Python

import io
import os
import pickle
import unittest
import numpy
from rdkit import DataStructs, RDConfig, RDRandom
from rdkit.ML.InfoTheory import rdInfoTheory as rdit
def feq(a, b, tol=1e-4):
return abs(a - b) < tol
class TestCase(unittest.TestCase):
def setUp(self):
pass
def test0GainFuns(self):
arr = numpy.array([9, 5],float)
self.assertTrue(feq(rdit.InfoEntropy(arr), 0.9403))
arr = numpy.array([9, 9],float)
self.assertTrue(feq(rdit.InfoEntropy(arr), 1.0000))
arr = numpy.array([5, 5],float)
self.assertTrue(feq(rdit.InfoEntropy(arr), 1.0000))
arr = numpy.array([5, 0],float)
self.assertTrue(feq(rdit.InfoEntropy(arr), 0.0000))
arr = numpy.array([5, 5, 5],float)
self.assertTrue(feq(rdit.InfoEntropy(arr), 1.5850))
arr = numpy.array([2, 5, 5],float)
self.assertTrue(feq(rdit.InfoEntropy(arr), 1.4834))
mat2 = numpy.array([[6, 2], [3, 3]],float)
self.assertTrue(feq(rdit.InfoGain(mat2), 0.0481))
self.assertTrue(feq(rdit.ChiSquare(mat2), 0.9333))
mat3 = numpy.array([[1, 1], [2, 1]],float)
self.assertTrue(feq(rdit.InfoGain(mat3), 0.0200))
mat4 = numpy.array([[2, 0], [1, 2]],float)
self.assertTrue(feq(rdit.InfoGain(mat4), 0.4200))
mat5 = numpy.array([[0, 0], [0, 0]],float)
self.assertTrue(feq(rdit.InfoGain(mat5), 0.0000))
mat6 = numpy.array([[1, 0], [1, 0]],float)
self.assertTrue(feq(rdit.InfoGain(mat6), 0.0000))
def test1ranker(self):
nbits = 100
ninst = 100
dm = 50
nact = 10
nc = 2
rn = rdit.InfoBitRanker(nbits, nc, rdit.InfoType.ENTROPY)
fps = []
na = 0
ni = 0
for i in range(ninst):
v = DataStructs.SparseBitVect(nbits)
for j in range(dm):
v.SetBit(RDRandom.randrange(0, nbits))
if (RDRandom.randrange(0, ninst) < nact):
na += 1
rn.AccumulateVotes(v, 1)
fps.append((v, 1))
else:
ni += 1
rn.AccumulateVotes(v, 0)
fps.append((v, 0))
res = rn.GetTopN(50)
rn2 = rdit.InfoBitRanker(nbits, nc)
for fp in fps:
rn2.AccumulateVotes(fp[0], fp[1])
res2 = rn2.GetTopN(50)
self.assertTrue((res == res2).all())
rn3 = rdit.InfoBitRanker(nbits, nc, rdit.InfoType.BIASENTROPY)
#rn3.SetBiasList([0])
for fp in fps:
rn3.AccumulateVotes(fp[0], fp[1])
res3 = rn3.GetTopN(50)
for i in range(50):
fan = res3[i, 2] / na
fin = res3[i, 3] / ni
self.assertTrue(fan > fin)
def test2ranker(self):
nbits = 100
ninst = 100
dm = 50
nact = 10
nc = 2
RDRandom.seed(23)
rn = rdit.InfoBitRanker(nbits, nc, rdit.InfoType.ENTROPY)
rn.SetMaskBits([63, 70, 15, 25, 10])
fps = []
na = 0
ni = 0
for i in range(ninst):
v = DataStructs.SparseBitVect(nbits)
for j in range(dm):
v.SetBit(RDRandom.randrange(0, nbits))
if (RDRandom.randrange(0, ninst) < nact):
na += 1
rn.AccumulateVotes(v, 1)
fps.append((v, 1))
else:
ni += 1
rn.AccumulateVotes(v, 0)
fps.append((v, 0))
res = rn.GetTopN(5)
ids = [int(x[0]) for x in res]
ids.sort()
self.assertTrue(ids == [10, 15, 25, 63, 70])
with self.assertRaisesRegex(Exception, ""):
res = rn.GetTopN(10)
def test3Issue140(self):
nbits = 2
examples = [[0, 0, 0], [1, 1, 0], [0, 0, 1], [1, 1, 1]]
rn = rdit.InfoBitRanker(2, 2, rdit.InfoType.ENTROPY)
for example in examples:
act = example.pop(-1)
bv = DataStructs.ExplicitBitVect(2)
for i in range(2):
bv[i] = example[i]
rn.AccumulateVotes(bv, act)
try:
res = rn.GetTopN(1)
except Exception:
res = None
self.assertTrue(res is not None)
def test4Issue237(self):
with open(
os.path.join(RDConfig.RDBaseDir, 'Code', 'ML', 'InfoTheory', 'Wrap', 'testData',
'Issue237.pkl'), 'r') as inTF:
buf = inTF.read().replace('\r\n', '\n').encode('utf-8')
inTF.close()
with io.BytesIO(buf) as inF:
examples, avail, bias, nB, nPoss = pickle.load(inF, encoding='bytes')
ranker = rdit.InfoBitRanker(nB, nPoss, rdit.InfoType.BIASENTROPY)
ranker.SetMaskBits(avail)
for ex in examples:
ranker.AccumulateVotes(ex[1], ex[-1])
# this dumps core on linux if the bug isn't fixed:
v = ranker.GetTopN(1)
self.assertTrue(int(v[0][0]) == 12)
if __name__ == '__main__':
unittest.main()