diff --git a/Code/ChemicalFeatures/Wrap/testFeatures.py b/Code/ChemicalFeatures/Wrap/testFeatures.py index bb45f918d..c58d8e850 100644 --- a/Code/ChemicalFeatures/Wrap/testFeatures.py +++ b/Code/ChemicalFeatures/Wrap/testFeatures.py @@ -4,11 +4,11 @@ # # @@ All Rights Reserved @@ # -from __future__ import print_function + import os, sys import io import unittest -from rdkit.six.moves import cPickle +import pickle from rdkit import RDConfig from rdkit import Chem from rdkit.Chem import ChemicalFeatures @@ -77,8 +77,8 @@ class TestCase(unittest.TestCase): def testPickle(self): ffeat = ChemicalFeatures.FreeChemicalFeature("HBondDonor", "HBondDonor1", geom.Point3D(1.0, 2.0, 3.0), 123) - pkl = cPickle.dumps(ffeat) - ffeat2 = cPickle.loads(pkl, encoding='bytes') + pkl = pickle.dumps(ffeat) + ffeat2 = pickle.loads(pkl, encoding='bytes') self.assertTrue(ffeat2.GetId() == ffeat.GetId()) self.assertTrue(ffeat2.GetFamily() == ffeat.GetFamily()) self.assertTrue(ffeat2.GetType() == ffeat.GetType()) @@ -90,7 +90,7 @@ class TestCase(unittest.TestCase): buf = inTF.read().replace('\r\n', '\n').encode('utf-8') inTF.close() inF = io.BytesIO(buf) - ffeat2 = cPickle.load(inF, encoding='bytes') + ffeat2 = pickle.load(inF, encoding='bytes') # this version (1.0) does not have an id in the byte stream self.assertTrue(ffeat2.GetFamily() == ffeat.GetFamily()) self.assertTrue(ffeat2.GetType() == ffeat.GetType()) @@ -100,13 +100,13 @@ class TestCase(unittest.TestCase): # uncomment the following to generate (overrwrite) new version of pickled # data file - #cPickle.dump(ffeat,file(os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'),'wb+')) + #pickle.dump(ffeat,file(os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'),'wb+')) inTF = open( os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'), 'r') buf = inTF.read().replace('\r\n', '\n').encode('utf-8') inTF.close() inF = io.BytesIO(buf) - ffeat2 = cPickle.load(inF, encoding='bytes') + ffeat2 = pickle.load(inF, encoding='bytes') self.assertTrue(ffeat2.GetId() == ffeat.GetId()) self.assertTrue(ffeat2.GetFamily() == ffeat.GetFamily()) self.assertTrue(ffeat2.GetType() == ffeat.GetType()) diff --git a/Code/DataManip/MetricMatrixCalc/Wrap/testMatricCalc.py b/Code/DataManip/MetricMatrixCalc/Wrap/testMatricCalc.py index e79353115..c7108df39 100755 --- a/Code/DataManip/MetricMatrixCalc/Wrap/testMatricCalc.py +++ b/Code/DataManip/MetricMatrixCalc/Wrap/testMatricCalc.py @@ -1,4 +1,4 @@ -from __future__ import division + from rdkit import RDConfig import unittest from rdkit.DataManip.Metric import rdMetricMatrixCalc as rdmmc @@ -8,145 +8,145 @@ from rdkit import DataStructs def feq(v1, v2, tol2=1e-4): - return abs(v1 - v2) <= tol2 + return abs(v1 - v2) <= tol2 class TestCase(unittest.TestCase): - def setUp(self): - pass + def setUp(self): + pass - def test0DistsArray(self): - exp = numpy.array([1., 1.414213, 1.0], 'd') + def test0DistsArray(self): + exp = numpy.array([1., 1.414213, 1.0], 'd') - # initialize a double array and check if get back the expected distances - desc = numpy.zeros((3, 2), 'd') - desc[1, 0] = 1.0 - desc[2, 0] = 1.0 - desc[2, 1] = 1.0 + # initialize a double array and check if get back the expected distances + desc = numpy.zeros((3, 2), 'd') + desc[1, 0] = 1.0 + desc[2, 0] = 1.0 + desc[2, 1] = 1.0 - dmat = rdmmc.GetEuclideanDistMat(desc) - for i in range(numpy.shape(dmat)[0]): - assert feq(dmat[i], exp[i]) + dmat = rdmmc.GetEuclideanDistMat(desc) + for i in range(numpy.shape(dmat)[0]): + assert feq(dmat[i], exp[i]) - # repeat with an flaot array - desc = numpy.zeros((3, 2), 'f') - desc[1, 0] = 1.0 - desc[2, 0] = 1.0 - desc[2, 1] = 1.0 + # repeat with an flaot array + desc = numpy.zeros((3, 2), 'f') + desc[1, 0] = 1.0 + desc[2, 0] = 1.0 + desc[2, 1] = 1.0 - dmat = rdmmc.GetEuclideanDistMat(desc) - for i in range(numpy.shape(dmat)[0]): - assert feq(dmat[i], exp[i]) + dmat = rdmmc.GetEuclideanDistMat(desc) + for i in range(numpy.shape(dmat)[0]): + assert feq(dmat[i], exp[i]) - # finally with an interger array - desc = numpy.zeros((3, 2), 'i') - desc[1, 0] = 1 - desc[2, 0] = 1 - desc[2, 1] = 1 + # finally with an interger array + desc = numpy.zeros((3, 2), 'i') + desc[1, 0] = 1 + desc[2, 0] = 1 + desc[2, 1] = 1 - dmat = rdmmc.GetEuclideanDistMat(desc) - for i in range(numpy.shape(dmat)[0]): - assert feq(dmat[i], exp[i]) + dmat = rdmmc.GetEuclideanDistMat(desc) + for i in range(numpy.shape(dmat)[0]): + assert feq(dmat[i], exp[i]) - def ctest1DistsListArray(self): - exp = numpy.array([1., 1.414213, 1.0], 'd') + def ctest1DistsListArray(self): + exp = numpy.array([1., 1.414213, 1.0], 'd') - desc = [numpy.array([0.0, 0.0], 'd'), numpy.array([1.0, 0.0], 'd'), - numpy.array([1.0, 1.0], 'd')] - dmat = rdmmc.GetEuclideanDistMat(desc) + desc = [numpy.array([0.0, 0.0], 'd'), numpy.array([1.0, 0.0], 'd'), + numpy.array([1.0, 1.0], 'd')] + dmat = rdmmc.GetEuclideanDistMat(desc) - for i in range(numpy.shape(dmat)[0]): - assert feq(dmat[i], exp[i]) + for i in range(numpy.shape(dmat)[0]): + assert feq(dmat[i], exp[i]) - # repeat the test with a list of numpy.arrays of floats - desc = [numpy.array([0.0, 0.0], 'f'), numpy.array([1.0, 0.0], 'f'), - numpy.array([1.0, 1.0], 'f')] - dmat = rdmmc.GetEuclideanDistMat(desc) - for i in range(numpy.shape(dmat)[0]): - assert feq(dmat[i], exp[i]) + # repeat the test with a list of numpy.arrays of floats + desc = [numpy.array([0.0, 0.0], 'f'), numpy.array([1.0, 0.0], 'f'), + numpy.array([1.0, 1.0], 'f')] + dmat = rdmmc.GetEuclideanDistMat(desc) + for i in range(numpy.shape(dmat)[0]): + assert feq(dmat[i], exp[i]) - # repeat the test with a list of numpy.arrays of ints - desc = [numpy.array([0, 0], 'i'), numpy.array([1, 0], 'i'), numpy.array([1, 1], 'i')] - dmat = rdmmc.GetEuclideanDistMat(desc) - for i in range(numpy.shape(dmat)[0]): - assert feq(dmat[i], exp[i]) + # repeat the test with a list of numpy.arrays of ints + desc = [numpy.array([0, 0], 'i'), numpy.array([1, 0], 'i'), numpy.array([1, 1], 'i')] + dmat = rdmmc.GetEuclideanDistMat(desc) + for i in range(numpy.shape(dmat)[0]): + assert feq(dmat[i], exp[i]) - def test2DistListList(self): - exp = numpy.array([1., 1.414213, 1.0], 'd') + def test2DistListList(self): + exp = numpy.array([1., 1.414213, 1.0], 'd') - desc = [[0.0, 0.0], [1.0, 0.0], [1.0, 1.0]] - dmat = rdmmc.GetEuclideanDistMat(desc) - for i in range(numpy.shape(dmat)[0]): - assert feq(dmat[i], exp[i]) + desc = [[0.0, 0.0], [1.0, 0.0], [1.0, 1.0]] + dmat = rdmmc.GetEuclideanDistMat(desc) + for i in range(numpy.shape(dmat)[0]): + assert feq(dmat[i], exp[i]) - #test with ints - desc = [[0, 0], [1, 0], [1, 1]] - dmat = rdmmc.GetEuclideanDistMat(desc) - for i in range(numpy.shape(dmat)[0]): - assert feq(dmat[i], exp[i]) + # test with ints + desc = [[0, 0], [1, 0], [1, 1]] + dmat = rdmmc.GetEuclideanDistMat(desc) + for i in range(numpy.shape(dmat)[0]): + assert feq(dmat[i], exp[i]) - def test3Compare(self): - n = 30 - m = 5 + def test3Compare(self): + n = 30 + m = 5 - dscArr = numpy.zeros((n, m), 'd') - for i in range(n): - for j in range(m): - dscArr[i, j] = random.random() - dmatArr = rdmmc.GetEuclideanDistMat(dscArr) + dscArr = numpy.zeros((n, m), 'd') + for i in range(n): + for j in range(m): + dscArr[i, j] = random.random() + dmatArr = rdmmc.GetEuclideanDistMat(dscArr) - dscLL = [] - for i in range(n): - row = [] - for j in range(m): - row.append(dscArr[i, j]) - dscLL.append(row) - dmatLL = rdmmc.GetEuclideanDistMat(dscLL) + dscLL = [] + for i in range(n): + row = [] + for j in range(m): + row.append(dscArr[i, j]) + dscLL.append(row) + dmatLL = rdmmc.GetEuclideanDistMat(dscLL) - assert numpy.shape(dmatArr) == numpy.shape(dmatLL) + assert numpy.shape(dmatArr) == numpy.shape(dmatLL) - for i in range(n * (n - 1) // 2): - assert feq(dmatArr[i], dmatLL[i]) + for i in range(n * (n - 1) // 2): + assert feq(dmatArr[i], dmatLL[i]) - def test4ebv(self): + def test4ebv(self): - n = 30 - m = 2048 - dm = 800 - lst = [] - for i in range(n): - v = DataStructs.ExplicitBitVect(m) - for j in range(dm): - v.SetBit(random.randrange(0, m)) - lst.append(v) + n = 30 + m = 2048 + dm = 800 + lst = [] + for i in range(n): + v = DataStructs.ExplicitBitVect(m) + for j in range(dm): + v.SetBit(random.randrange(0, m)) + lst.append(v) - dMat = rdmmc.GetTanimotoDistMat(lst) + dMat = rdmmc.GetTanimotoDistMat(lst) - sMat = rdmmc.GetTanimotoSimMat(lst) + sMat = rdmmc.GetTanimotoSimMat(lst) - for i in range(n * (n - 1) // 2): - assert feq(sMat[i] + dMat[i], 1.0) + for i in range(n * (n - 1) // 2): + assert feq(sMat[i] + dMat[i], 1.0) - def test5sbv(self): + def test5sbv(self): - n = 30 - m = 2048 - dm = 800 - lst = [] - for i in range(n): - v = DataStructs.SparseBitVect(m) - for j in range(dm): - v.SetBit(random.randrange(0, m)) - lst.append(v) + n = 30 + m = 2048 + dm = 800 + lst = [] + for i in range(n): + v = DataStructs.SparseBitVect(m) + for j in range(dm): + v.SetBit(random.randrange(0, m)) + lst.append(v) - dMat = rdmmc.GetTanimotoDistMat(lst) + dMat = rdmmc.GetTanimotoDistMat(lst) - sMat = rdmmc.GetTanimotoSimMat(lst) + sMat = rdmmc.GetTanimotoSimMat(lst) - for i in range(n * (n - 1) // 2): - assert feq(sMat[i] + dMat[i], 1.0) + for i in range(n * (n - 1) // 2): + assert feq(sMat[i] + dMat[i], 1.0) if __name__ == '__main__': - unittest.main() + unittest.main() diff --git a/Code/DataStructs/Wrap/testBV.py b/Code/DataStructs/Wrap/testBV.py index a2d71c978..17f11c067 100755 --- a/Code/DataStructs/Wrap/testBV.py +++ b/Code/DataStructs/Wrap/testBV.py @@ -1,318 +1,318 @@ from rdkit import DataStructs from rdkit import RDConfig import unittest -from rdkit.six.moves import cPickle as pickle +import pickle import random import numpy def feq(a, b, tol=1e-4): - return abs(a - b) < tol + return abs(a - b) < tol class TestCase(unittest.TestCase): - def setUp(self): - pass + def setUp(self): + pass - def test0FromList(self): - bv1 = DataStructs.SparseBitVect(1000) - bv2 = DataStructs.SparseBitVect(1000) - obits = range(0, 1000, 3) + def test0FromList(self): + bv1 = DataStructs.SparseBitVect(1000) + bv2 = DataStructs.SparseBitVect(1000) + obits = range(0, 1000, 3) - for bit in obits: - bv1.SetBit(bit) + for bit in obits: + bv1.SetBit(bit) - bv2.SetBitsFromList(obits) + bv2.SetBitsFromList(obits) - for i in range(1000): - assert bv1.GetBit(i) == bv2.GetBit(i) + for i in range(1000): + assert bv1.GetBit(i) == bv2.GetBit(i) - self.assertTrue(bv1 == bv2) - bv2.SetBit(1) - self.assertTrue(bv1 != bv2) - bv2.UnSetBit(1) - self.assertTrue(bv1 == bv2) + self.assertTrue(bv1 == bv2) + bv2.SetBit(1) + self.assertTrue(bv1 != bv2) + bv2.UnSetBit(1) + self.assertTrue(bv1 == bv2) - bv2.UnSetBitsFromList(obits) - for i in range(1000): - assert bv2.GetBit(i) == 0 + bv2.UnSetBitsFromList(obits) + for i in range(1000): + assert bv2.GetBit(i) == 0 - bv1 = DataStructs.ExplicitBitVect(1000) - bv2 = DataStructs.ExplicitBitVect(1000) - obits = range(0, 1000, 3) + bv1 = DataStructs.ExplicitBitVect(1000) + bv2 = DataStructs.ExplicitBitVect(1000) + obits = range(0, 1000, 3) - for bit in obits: - bv1.SetBit(bit) + for bit in obits: + bv1.SetBit(bit) - bv2.SetBitsFromList(obits) + bv2.SetBitsFromList(obits) - for i in range(1000): - assert bv1.GetBit(i) == bv2.GetBit(i) + for i in range(1000): + assert bv1.GetBit(i) == bv2.GetBit(i) - bv2.UnSetBitsFromList(obits) - for i in range(1000): - assert bv2.GetBit(i) == 0 + bv2.UnSetBitsFromList(obits) + for i in range(1000): + assert bv2.GetBit(i) == 0 - def test01BVWithAllOnes(self): - bv1 = DataStructs.ExplicitBitVect(10, True) - for i in range(10): - assert bv1.GetBit(i) == 1 + def test01BVWithAllOnes(self): + bv1 = DataStructs.ExplicitBitVect(10, True) + for i in range(10): + assert bv1.GetBit(i) == 1 - def test1SparsePickle(self): - nbits = 10000 - bv1 = DataStructs.SparseBitVect(nbits) - for i in range(1000): - x = random.randrange(0, nbits) - bv1.SetBit(x) + def test1SparsePickle(self): + nbits = 10000 + bv1 = DataStructs.SparseBitVect(nbits) + for i in range(1000): + x = random.randrange(0, nbits) + bv1.SetBit(x) - pkl = pickle.dumps(bv1, 1) - bv2 = pickle.loads(pkl) - for i in range(nbits): - assert bv1[i] == bv2[i] + pkl = pickle.dumps(bv1, 1) + bv2 = pickle.loads(pkl) + for i in range(nbits): + assert bv1[i] == bv2[i] - def test2ExplicitPickle(self): - nbits = 10000 - bv1 = DataStructs.ExplicitBitVect(nbits) - for i in range(1000): - x = random.randrange(0, nbits) - bv1.SetBit(x) + def test2ExplicitPickle(self): + nbits = 10000 + bv1 = DataStructs.ExplicitBitVect(nbits) + for i in range(1000): + x = random.randrange(0, nbits) + bv1.SetBit(x) - pkl = pickle.dumps(bv1, 1) - bv2 = pickle.loads(pkl) - for i in range(nbits): - assert bv1[i] == bv2[i] + pkl = pickle.dumps(bv1, 1) + bv2 = pickle.loads(pkl) + for i in range(nbits): + assert bv1[i] == bv2[i] - def test3Bounds(self): - nbits = 10 - bv1 = DataStructs.ExplicitBitVect(nbits) - bv1[0] - with self.assertRaisesRegexp(IndexError, ""): - bv1[11] + def test3Bounds(self): + nbits = 10 + bv1 = DataStructs.ExplicitBitVect(nbits) + bv1[0] + with self.assertRaisesRegexp(IndexError, ""): + bv1[11] - def test4OnBitsInCommon(self): - sz = 100 - bv1 = DataStructs.ExplicitBitVect(sz) - bv2 = DataStructs.ExplicitBitVect(sz) - for i in range(0, sz, 2): - bv1.SetBit(i) - if i < 3 * sz / 4: - bv2.SetBit(i) - self.assertTrue(DataStructs.AllProbeBitsMatch(bv1, bv1.ToBinary())) - self.assertTrue(DataStructs.AllProbeBitsMatch(bv2, bv1.ToBinary())) - self.assertFalse(DataStructs.AllProbeBitsMatch(bv1, bv2.ToBinary())) - self.assertTrue(DataStructs.AllProbeBitsMatch(bv2, bv2.ToBinary())) + def test4OnBitsInCommon(self): + sz = 100 + bv1 = DataStructs.ExplicitBitVect(sz) + bv2 = DataStructs.ExplicitBitVect(sz) + for i in range(0, sz, 2): + bv1.SetBit(i) + if i < 3 * sz / 4: + bv2.SetBit(i) + self.assertTrue(DataStructs.AllProbeBitsMatch(bv1, bv1.ToBinary())) + self.assertTrue(DataStructs.AllProbeBitsMatch(bv2, bv1.ToBinary())) + self.assertFalse(DataStructs.AllProbeBitsMatch(bv1, bv2.ToBinary())) + self.assertTrue(DataStructs.AllProbeBitsMatch(bv2, bv2.ToBinary())) - def test5FromBitString(self): - s1 = '1010' - bv = DataStructs.CreateFromBitString(s1) - self.assertTrue(len(bv) == 4) - self.assertTrue(list(bv.GetOnBits()) == [0, 2]) + def test5FromBitString(self): + s1 = '1010' + bv = DataStructs.CreateFromBitString(s1) + self.assertTrue(len(bv) == 4) + self.assertTrue(list(bv.GetOnBits()) == [0, 2]) - def test6BulkOps(self): - nbits = 10000 - bvs = [] - for bvi in range(10): - bv = DataStructs.ExplicitBitVect(nbits) - for j in range(nbits): - x = random.randrange(0, nbits) - bv.SetBit(x) - bvs.append(bv) - sims = DataStructs.BulkTanimotoSimilarity(bvs[0], bvs) - for i in range(len(bvs)): - sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i]) - self.assertTrue(feq(sim, sims[i])) + def test6BulkOps(self): + nbits = 10000 + bvs = [] + for bvi in range(10): + bv = DataStructs.ExplicitBitVect(nbits) + for j in range(nbits): + x = random.randrange(0, nbits) + bv.SetBit(x) + bvs.append(bv) + sims = DataStructs.BulkTanimotoSimilarity(bvs[0], bvs) + for i in range(len(bvs)): + sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i]) + self.assertTrue(feq(sim, sims[i])) - sims = DataStructs.BulkDiceSimilarity(bvs[0], bvs) - for i in range(len(bvs)): - sim = DataStructs.DiceSimilarity(bvs[0], bvs[i]) - self.assertTrue(feq(sim, sims[i])) + sims = DataStructs.BulkDiceSimilarity(bvs[0], bvs) + for i in range(len(bvs)): + sim = DataStructs.DiceSimilarity(bvs[0], bvs[i]) + self.assertTrue(feq(sim, sims[i])) - sims = DataStructs.BulkAllBitSimilarity(bvs[0], bvs) - for i in range(len(bvs)): - sim = DataStructs.AllBitSimilarity(bvs[0], bvs[i]) - self.assertTrue(feq(sim, sims[i])) + sims = DataStructs.BulkAllBitSimilarity(bvs[0], bvs) + for i in range(len(bvs)): + sim = DataStructs.AllBitSimilarity(bvs[0], bvs[i]) + self.assertTrue(feq(sim, sims[i])) - sims = DataStructs.BulkOnBitSimilarity(bvs[0], bvs) - for i in range(len(bvs)): - sim = DataStructs.OnBitSimilarity(bvs[0], bvs[i]) - self.assertTrue(feq(sim, sims[i])) + sims = DataStructs.BulkOnBitSimilarity(bvs[0], bvs) + for i in range(len(bvs)): + sim = DataStructs.OnBitSimilarity(bvs[0], bvs[i]) + self.assertTrue(feq(sim, sims[i])) - sims = DataStructs.BulkRogotGoldbergSimilarity(bvs[0], bvs) - for i in range(len(bvs)): - sim = DataStructs.RogotGoldbergSimilarity(bvs[0], bvs[i]) - self.assertTrue(feq(sim, sims[i])) + sims = DataStructs.BulkRogotGoldbergSimilarity(bvs[0], bvs) + for i in range(len(bvs)): + sim = DataStructs.RogotGoldbergSimilarity(bvs[0], bvs[i]) + self.assertTrue(feq(sim, sims[i])) - sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, 1, 1) - for i in range(len(bvs)): - sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], 1, 1) - self.assertTrue(feq(sim, sims[i])) - sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i]) - self.assertTrue(feq(sim, sims[i])) + sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, 1, 1) + for i in range(len(bvs)): + sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], 1, 1) + self.assertTrue(feq(sim, sims[i])) + sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i]) + self.assertTrue(feq(sim, sims[i])) - sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, .5, .5) - for i in range(len(bvs)): - sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], .5, .5) - self.assertTrue(feq(sim, sims[i])) - sim = DataStructs.DiceSimilarity(bvs[0], bvs[i]) - self.assertTrue(feq(sim, sims[i])) + sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, .5, .5) + for i in range(len(bvs)): + sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], .5, .5) + self.assertTrue(feq(sim, sims[i])) + sim = DataStructs.DiceSimilarity(bvs[0], bvs[i]) + self.assertTrue(feq(sim, sims[i])) - def test7FPS(self): - bv = DataStructs.ExplicitBitVect(32) - bv.SetBit(0) - bv.SetBit(1) - bv.SetBit(17) - bv.SetBit(23) - bv.SetBit(31) - - self.assertEqual(DataStructs.BitVectToFPSText(bv), "03008280") - bv2 = DataStructs.CreateFromFPSText("03008280") - self.assertEqual(bv, bv2) - - self.assertRaises(ValueError, lambda: DataStructs.CreateFromFPSText("030082801")) - - bv2 = DataStructs.CreateFromFPSText("") - self.assertEqual(bv2.GetNumBits(), 0) - - def test8BinText(self): - bv = DataStructs.ExplicitBitVect(32) - bv.SetBit(0) - bv.SetBit(1) - bv.SetBit(17) - bv.SetBit(23) - bv.SetBit(31) - - bv2 = DataStructs.CreateFromBinaryText(DataStructs.BitVectToBinaryText(bv)) - self.assertEqual(bv, bv2) - - bv2 = DataStructs.CreateFromBinaryText("") - self.assertEqual(bv2.GetNumBits(), 0) - - def test9ToNumpy(self): - import numpy - for typ in (DataStructs.ExplicitBitVect,): - bv = typ(32) + def test7FPS(self): + bv = DataStructs.ExplicitBitVect(32) bv.SetBit(0) bv.SetBit(1) bv.SetBit(17) bv.SetBit(23) bv.SetBit(31) - arr = numpy.zeros((32, ), 'i') - DataStructs.ConvertToNumpyArray(bv, arr) - for i in range(bv.GetNumBits()): - self.assertEqual(bv[i], arr[i]) - for typ in (DataStructs.IntSparseIntVect, - DataStructs.LongSparseIntVect, DataStructs.UIntSparseIntVect, - DataStructs.ULongSparseIntVect): - iv = typ(32) - iv[0] = 1 - iv[1] = 1 - iv[17] = 1 - iv[23] = 1 - iv[31] = 1 - arr = numpy.zeros((32, ), 'i') - DataStructs.ConvertToNumpyArray(iv, arr) - for i in range(iv.GetLength()): - self.assertEqual(iv[i], arr[i]) + self.assertEqual(DataStructs.BitVectToFPSText(bv), "03008280") + bv2 = DataStructs.CreateFromFPSText("03008280") + self.assertEqual(bv, bv2) - def test10BulkOps2(self): - nbits = 10000 - bvs = [] - for bvi in range(10): - bv = DataStructs.ExplicitBitVect(nbits) - for j in range(nbits): - x = random.randrange(0, nbits) - bv.SetBit(x) - bvs.append(bv) - bvs = tuple(bvs) - sims = DataStructs.BulkTanimotoSimilarity(bvs[0], bvs) - for i in range(len(bvs)): - sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i]) - self.assertTrue(feq(sim, sims[i])) + self.assertRaises(ValueError, lambda: DataStructs.CreateFromFPSText("030082801")) - sims = DataStructs.BulkDiceSimilarity(bvs[0], bvs) - for i in range(len(bvs)): - sim = DataStructs.DiceSimilarity(bvs[0], bvs[i]) - self.assertTrue(feq(sim, sims[i])) + bv2 = DataStructs.CreateFromFPSText("") + self.assertEqual(bv2.GetNumBits(), 0) - sims = DataStructs.BulkAllBitSimilarity(bvs[0], bvs) - for i in range(len(bvs)): - sim = DataStructs.AllBitSimilarity(bvs[0], bvs[i]) - self.assertTrue(feq(sim, sims[i])) + def test8BinText(self): + bv = DataStructs.ExplicitBitVect(32) + bv.SetBit(0) + bv.SetBit(1) + bv.SetBit(17) + bv.SetBit(23) + bv.SetBit(31) - sims = DataStructs.BulkOnBitSimilarity(bvs[0], bvs) - for i in range(len(bvs)): - sim = DataStructs.OnBitSimilarity(bvs[0], bvs[i]) - self.assertTrue(feq(sim, sims[i])) + bv2 = DataStructs.CreateFromBinaryText(DataStructs.BitVectToBinaryText(bv)) + self.assertEqual(bv, bv2) - sims = DataStructs.BulkRogotGoldbergSimilarity(bvs[0], bvs) - for i in range(len(bvs)): - sim = DataStructs.RogotGoldbergSimilarity(bvs[0], bvs[i]) - self.assertTrue(feq(sim, sims[i])) + bv2 = DataStructs.CreateFromBinaryText("") + self.assertEqual(bv2.GetNumBits(), 0) - sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, 1, 1) - for i in range(len(bvs)): - sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], 1, 1) - self.assertTrue(feq(sim, sims[i])) - sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i]) - self.assertTrue(feq(sim, sims[i])) + def test9ToNumpy(self): + import numpy + for typ in (DataStructs.ExplicitBitVect,): + bv = typ(32) + bv.SetBit(0) + bv.SetBit(1) + bv.SetBit(17) + bv.SetBit(23) + bv.SetBit(31) + arr = numpy.zeros((32, ), 'i') + DataStructs.ConvertToNumpyArray(bv, arr) + for i in range(bv.GetNumBits()): + self.assertEqual(bv[i], arr[i]) - sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, .5, .5) - for i in range(len(bvs)): - sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], .5, .5) - self.assertTrue(feq(sim, sims[i])) - sim = DataStructs.DiceSimilarity(bvs[0], bvs[i]) - self.assertTrue(feq(sim, sims[i])) + for typ in (DataStructs.IntSparseIntVect, + DataStructs.LongSparseIntVect, DataStructs.UIntSparseIntVect, + DataStructs.ULongSparseIntVect): + iv = typ(32) + iv[0] = 1 + iv[1] = 1 + iv[17] = 1 + iv[23] = 1 + iv[31] = 1 + arr = numpy.zeros((32, ), 'i') + DataStructs.ConvertToNumpyArray(iv, arr) + for i in range(iv.GetLength()): + self.assertEqual(iv[i], arr[i]) - def test10BulkOps3(self): - nbits = 10000 - bvs = numpy.empty((10, ), DataStructs.ExplicitBitVect) - for bvi in range(10): - bv = DataStructs.ExplicitBitVect(nbits) - for j in range(nbits): - x = random.randrange(0, nbits) - bv.SetBit(x) - bvs[bvi] = bv - sims = DataStructs.BulkTanimotoSimilarity(bvs[0], bvs) - for i in range(len(bvs)): - sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i]) - self.assertTrue(feq(sim, sims[i])) + def test10BulkOps2(self): + nbits = 10000 + bvs = [] + for bvi in range(10): + bv = DataStructs.ExplicitBitVect(nbits) + for j in range(nbits): + x = random.randrange(0, nbits) + bv.SetBit(x) + bvs.append(bv) + bvs = tuple(bvs) + sims = DataStructs.BulkTanimotoSimilarity(bvs[0], bvs) + for i in range(len(bvs)): + sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i]) + self.assertTrue(feq(sim, sims[i])) - sims = DataStructs.BulkDiceSimilarity(bvs[0], bvs) - for i in range(len(bvs)): - sim = DataStructs.DiceSimilarity(bvs[0], bvs[i]) - self.assertTrue(feq(sim, sims[i])) + sims = DataStructs.BulkDiceSimilarity(bvs[0], bvs) + for i in range(len(bvs)): + sim = DataStructs.DiceSimilarity(bvs[0], bvs[i]) + self.assertTrue(feq(sim, sims[i])) - sims = DataStructs.BulkAllBitSimilarity(bvs[0], bvs) - for i in range(len(bvs)): - sim = DataStructs.AllBitSimilarity(bvs[0], bvs[i]) - self.assertTrue(feq(sim, sims[i])) + sims = DataStructs.BulkAllBitSimilarity(bvs[0], bvs) + for i in range(len(bvs)): + sim = DataStructs.AllBitSimilarity(bvs[0], bvs[i]) + self.assertTrue(feq(sim, sims[i])) - sims = DataStructs.BulkOnBitSimilarity(bvs[0], bvs) - for i in range(len(bvs)): - sim = DataStructs.OnBitSimilarity(bvs[0], bvs[i]) - self.assertTrue(feq(sim, sims[i])) + sims = DataStructs.BulkOnBitSimilarity(bvs[0], bvs) + for i in range(len(bvs)): + sim = DataStructs.OnBitSimilarity(bvs[0], bvs[i]) + self.assertTrue(feq(sim, sims[i])) - sims = DataStructs.BulkRogotGoldbergSimilarity(bvs[0], bvs) - for i in range(len(bvs)): - sim = DataStructs.RogotGoldbergSimilarity(bvs[0], bvs[i]) - self.assertTrue(feq(sim, sims[i])) + sims = DataStructs.BulkRogotGoldbergSimilarity(bvs[0], bvs) + for i in range(len(bvs)): + sim = DataStructs.RogotGoldbergSimilarity(bvs[0], bvs[i]) + self.assertTrue(feq(sim, sims[i])) - sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, 1, 1) - for i in range(len(bvs)): - sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], 1, 1) - self.assertTrue(feq(sim, sims[i])) - sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i]) - self.assertTrue(feq(sim, sims[i])) + sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, 1, 1) + for i in range(len(bvs)): + sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], 1, 1) + self.assertTrue(feq(sim, sims[i])) + sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i]) + self.assertTrue(feq(sim, sims[i])) - sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, .5, .5) - for i in range(len(bvs)): - sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], .5, .5) - self.assertTrue(feq(sim, sims[i])) - sim = DataStructs.DiceSimilarity(bvs[0], bvs[i]) - self.assertTrue(feq(sim, sims[i])) + sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, .5, .5) + for i in range(len(bvs)): + sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], .5, .5) + self.assertTrue(feq(sim, sims[i])) + sim = DataStructs.DiceSimilarity(bvs[0], bvs[i]) + self.assertTrue(feq(sim, sims[i])) + + def test10BulkOps3(self): + nbits = 10000 + bvs = numpy.empty((10, ), DataStructs.ExplicitBitVect) + for bvi in range(10): + bv = DataStructs.ExplicitBitVect(nbits) + for j in range(nbits): + x = random.randrange(0, nbits) + bv.SetBit(x) + bvs[bvi] = bv + sims = DataStructs.BulkTanimotoSimilarity(bvs[0], bvs) + for i in range(len(bvs)): + sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i]) + self.assertTrue(feq(sim, sims[i])) + + sims = DataStructs.BulkDiceSimilarity(bvs[0], bvs) + for i in range(len(bvs)): + sim = DataStructs.DiceSimilarity(bvs[0], bvs[i]) + self.assertTrue(feq(sim, sims[i])) + + sims = DataStructs.BulkAllBitSimilarity(bvs[0], bvs) + for i in range(len(bvs)): + sim = DataStructs.AllBitSimilarity(bvs[0], bvs[i]) + self.assertTrue(feq(sim, sims[i])) + + sims = DataStructs.BulkOnBitSimilarity(bvs[0], bvs) + for i in range(len(bvs)): + sim = DataStructs.OnBitSimilarity(bvs[0], bvs[i]) + self.assertTrue(feq(sim, sims[i])) + + sims = DataStructs.BulkRogotGoldbergSimilarity(bvs[0], bvs) + for i in range(len(bvs)): + sim = DataStructs.RogotGoldbergSimilarity(bvs[0], bvs[i]) + self.assertTrue(feq(sim, sims[i])) + + sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, 1, 1) + for i in range(len(bvs)): + sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], 1, 1) + self.assertTrue(feq(sim, sims[i])) + sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i]) + self.assertTrue(feq(sim, sims[i])) + + sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, .5, .5) + for i in range(len(bvs)): + sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], .5, .5) + self.assertTrue(feq(sim, sims[i])) + sim = DataStructs.DiceSimilarity(bvs[0], bvs[i]) + self.assertTrue(feq(sim, sims[i])) if __name__ == '__main__': - unittest.main() + unittest.main() diff --git a/Code/DataStructs/Wrap/testDiscreteValueVect.py b/Code/DataStructs/Wrap/testDiscreteValueVect.py index a79facc99..47cf8011c 100644 --- a/Code/DataStructs/Wrap/testDiscreteValueVect.py +++ b/Code/DataStructs/Wrap/testDiscreteValueVect.py @@ -11,251 +11,251 @@ import unittest from rdkit import RDConfig #import pickle -from rdkit.six.moves import cPickle as pickle +import pickle from rdkit import DataStructs as ds class TestCase(unittest.TestCase): - def setUp(self): - pass + def setUp(self): + pass - def test1Discrete(self): - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.ONEBITVALUE, 30) - for i in range(15): - v1[2 * i] = 1 + def test1Discrete(self): + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.ONEBITVALUE, 30) + for i in range(15): + v1[2 * i] = 1 - self.assertTrue(len(v1) == 30) - self.assertTrue(v1.GetTotalVal() == 15) + self.assertTrue(len(v1) == 30) + self.assertTrue(v1.GetTotalVal() == 15) - for i in range(len(v1)): - self.assertTrue(v1[i] == (i + 1) % 2) + for i in range(len(v1)): + self.assertTrue(v1[i] == (i + 1) % 2) - self.assertRaises(ValueError, lambda: v1.__setitem__(5, 2)) + self.assertRaises(ValueError, lambda: v1.__setitem__(5, 2)) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 30) - for i in range(len(v1)): - v1[i] = i % 4 + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 30) + for i in range(len(v1)): + v1[i] = i % 4 - self.assertTrue(len(v1) == 30) - for i in range(len(v1)): - self.assertTrue(v1[i] == i % 4) + self.assertTrue(len(v1) == 30) + for i in range(len(v1)): + self.assertTrue(v1[i] == i % 4) - self.assertRaises(ValueError, lambda: v1.__setitem__(10, 6)) + self.assertRaises(ValueError, lambda: v1.__setitem__(10, 6)) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.FOURBITVALUE, 30) - for i in range(len(v1)): - v1[i] = i % 16 + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.FOURBITVALUE, 30) + for i in range(len(v1)): + v1[i] = i % 16 - self.assertTrue(len(v1) == 30) - self.assertTrue(v1.GetTotalVal() == 211) - for i in range(len(v1)): - self.assertTrue(v1[i] == i % 16) + self.assertTrue(len(v1) == 30) + self.assertTrue(v1.GetTotalVal() == 211) + for i in range(len(v1)): + self.assertTrue(v1[i] == i % 16) - self.assertRaises(ValueError, lambda: v1.__setitem__(10, 16)) + self.assertRaises(ValueError, lambda: v1.__setitem__(10, 16)) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.EIGHTBITVALUE, 32) - for i in range(len(v1)): - v1[i] = i % 256 + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.EIGHTBITVALUE, 32) + for i in range(len(v1)): + v1[i] = i % 256 - self.assertTrue(len(v1) == 32) - self.assertTrue(v1.GetTotalVal() == 496) - for i in range(len(v1)): - self.assertTrue(v1[i] == i % 256) + self.assertTrue(len(v1) == 32) + self.assertTrue(v1.GetTotalVal() == 496) + for i in range(len(v1)): + self.assertTrue(v1[i] == i % 256) - self.assertRaises(ValueError, lambda: v1.__setitem__(10, 256)) + self.assertRaises(ValueError, lambda: v1.__setitem__(10, 256)) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.SIXTEENBITVALUE, 300) - for i in range(len(v1)): - v1[i] = i % 300 + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.SIXTEENBITVALUE, 300) + for i in range(len(v1)): + v1[i] = i % 300 - self.assertTrue(len(v1) == 300) - self.assertTrue(v1.GetTotalVal() == 44850) - self.assertRaises(ValueError, lambda: v1.__setitem__(10, 65536)) + self.assertTrue(len(v1) == 300) + self.assertTrue(v1.GetTotalVal() == 44850) + self.assertRaises(ValueError, lambda: v1.__setitem__(10, 65536)) - def test2VectDistances(self): - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.ONEBITVALUE, 30) - v2 = ds.DiscreteValueVect(ds.DiscreteValueType.ONEBITVALUE, 30) - for i in range(15): - v1[2 * i] = 1 - v2[2 * i] = 1 - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) - for i in range(30): - if (i % 3 == 0): - v2[i] = 1 - else: - v2[i] = 0 - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 15) + def test2VectDistances(self): + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.ONEBITVALUE, 30) + v2 = ds.DiscreteValueVect(ds.DiscreteValueType.ONEBITVALUE, 30) + for i in range(15): + v1[2 * i] = 1 + v2[2 * i] = 1 + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) + for i in range(30): + if (i % 3 == 0): + v2[i] = 1 + else: + v2[i] = 0 + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 15) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 30) - v2 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 30) + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 30) + v2 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 30) - for i in range(30): - v1[i] = i % 4 - v2[i] = (i + 1) % 4 + for i in range(30): + v1[i] = i % 4 + v2[i] = (i + 1) % 4 - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 44) + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 44) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.FOURBITVALUE, 16) - v2 = ds.DiscreteValueVect(ds.DiscreteValueType.FOURBITVALUE, 16) - for i in range(16): - v1[i] = i % 16 - v2[i] = i % 5 - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 90) + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.FOURBITVALUE, 16) + v2 = ds.DiscreteValueVect(ds.DiscreteValueType.FOURBITVALUE, 16) + for i in range(16): + v1[i] = i % 16 + v2[i] = i % 5 + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 90) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.EIGHTBITVALUE, 5) - v2 = ds.DiscreteValueVect(ds.DiscreteValueType.EIGHTBITVALUE, 5) - v1[0] = 34 - v1[1] = 167 - v1[2] = 3 - v1[3] = 56 - v1[4] = 128 + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.EIGHTBITVALUE, 5) + v2 = ds.DiscreteValueVect(ds.DiscreteValueType.EIGHTBITVALUE, 5) + v1[0] = 34 + v1[1] = 167 + v1[2] = 3 + v1[3] = 56 + v1[4] = 128 - v2[0] = 14 - v2[1] = 67 - v2[2] = 103 - v2[3] = 6 - v2[4] = 228 - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 370) + v2[0] = 14 + v2[1] = 67 + v2[2] = 103 + v2[3] = 6 + v2[4] = 228 + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 370) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.SIXTEENBITVALUE, 3) - v2 = ds.DiscreteValueVect(ds.DiscreteValueType.SIXTEENBITVALUE, 3) - v1[0] = 2345 - v1[1] = 64578 - v1[2] = 34 + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.SIXTEENBITVALUE, 3) + v2 = ds.DiscreteValueVect(ds.DiscreteValueType.SIXTEENBITVALUE, 3) + v1[0] = 2345 + v1[1] = 64578 + v1[2] = 34 - v2[0] = 1345 - v2[1] = 54578 - v2[2] = 10034 - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 21000) + v2[0] = 1345 + v2[1] = 54578 + v2[2] = 10034 + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 21000) - def test3Pickles(self): - #outF = file('dvvs.pkl','wb+') - with open(os.path.join(RDConfig.RDBaseDir, 'Code/DataStructs/Wrap/testData/dvvs.pkl'), - 'r') as inTF: - buf = inTF.read().replace('\r\n', '\n').encode('utf-8') - inTF.close() - with io.BytesIO(buf) as inF: - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.ONEBITVALUE, 30) - for i in range(15): - v1[2 * i] = 1 - v2 = pickle.loads(pickle.dumps(v1)) - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) - #cPickle.dump(v1,outF) - v2 = pickle.load(inF, encoding='bytes') - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) - self.assertTrue(v1.GetTotalVal() == v2.GetTotalVal()) - self.assertTrue(v2.GetTotalVal() != 0) + def test3Pickles(self): + #outF = file('dvvs.pkl','wb+') + with open(os.path.join(RDConfig.RDBaseDir, 'Code/DataStructs/Wrap/testData/dvvs.pkl'), + 'r') as inTF: + buf = inTF.read().replace('\r\n', '\n').encode('utf-8') + inTF.close() + with io.BytesIO(buf) as inF: + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.ONEBITVALUE, 30) + for i in range(15): + v1[2 * i] = 1 + v2 = pickle.loads(pickle.dumps(v1)) + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) + # pickle.dump(v1,outF) + v2 = pickle.load(inF, encoding='bytes') + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) + self.assertTrue(v1.GetTotalVal() == v2.GetTotalVal()) + self.assertTrue(v2.GetTotalVal() != 0) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 30) - for i in range(30): - v1[i] = i % 4 - v2 = pickle.loads(pickle.dumps(v1)) - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) - #pickle.dump(v1,outF) - v2 = pickle.load(inF, encoding='bytes') - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) - self.assertTrue(v1.GetTotalVal() == v2.GetTotalVal()) - self.assertTrue(v2.GetTotalVal() != 0) + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 30) + for i in range(30): + v1[i] = i % 4 + v2 = pickle.loads(pickle.dumps(v1)) + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) + # pickle.dump(v1,outF) + v2 = pickle.load(inF, encoding='bytes') + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) + self.assertTrue(v1.GetTotalVal() == v2.GetTotalVal()) + self.assertTrue(v2.GetTotalVal() != 0) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.FOURBITVALUE, 16) - for i in range(16): - v1[i] = i % 16 - v2 = pickle.loads(pickle.dumps(v1)) - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) - #pickle.dump(v1,outF) - v2 = pickle.load(inF, encoding='bytes') - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) - self.assertTrue(v1.GetTotalVal() == v2.GetTotalVal()) - self.assertTrue(v2.GetTotalVal() != 0) + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.FOURBITVALUE, 16) + for i in range(16): + v1[i] = i % 16 + v2 = pickle.loads(pickle.dumps(v1)) + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) + # pickle.dump(v1,outF) + v2 = pickle.load(inF, encoding='bytes') + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) + self.assertTrue(v1.GetTotalVal() == v2.GetTotalVal()) + self.assertTrue(v2.GetTotalVal() != 0) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.EIGHTBITVALUE, 5) - v1[0] = 34 - v1[1] = 167 - v1[2] = 3 - v1[3] = 56 - v1[4] = 128 - v2 = pickle.loads(pickle.dumps(v1)) - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) - #pickle.dump(v1,outF) - v2 = pickle.load(inF, encoding='bytes') - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) - self.assertTrue(v1.GetTotalVal() == v2.GetTotalVal()) - self.assertTrue(v2.GetTotalVal() != 0) + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.EIGHTBITVALUE, 5) + v1[0] = 34 + v1[1] = 167 + v1[2] = 3 + v1[3] = 56 + v1[4] = 128 + v2 = pickle.loads(pickle.dumps(v1)) + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) + # pickle.dump(v1,outF) + v2 = pickle.load(inF, encoding='bytes') + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) + self.assertTrue(v1.GetTotalVal() == v2.GetTotalVal()) + self.assertTrue(v2.GetTotalVal() != 0) - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.SIXTEENBITVALUE, 3) - v1[0] = 2345 - v1[1] = 64578 - v1[2] = 34 - v2 = pickle.loads(pickle.dumps(v1)) - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) - #pickle.dump(v1,outF) - v2 = pickle.load(inF, encoding='bytes') - self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) - self.assertTrue(v1.GetTotalVal() == v2.GetTotalVal()) - self.assertTrue(v2.GetTotalVal() != 0) + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.SIXTEENBITVALUE, 3) + v1[0] = 2345 + v1[1] = 64578 + v1[2] = 34 + v2 = pickle.loads(pickle.dumps(v1)) + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) + # pickle.dump(v1,outF) + v2 = pickle.load(inF, encoding='bytes') + self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) + self.assertTrue(v1.GetTotalVal() == v2.GetTotalVal()) + self.assertTrue(v2.GetTotalVal() != 0) - def test4DiscreteVectOps(self): - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 8) - for i in range(4): - v1[2 * i] = 2 - self.assertTrue(v1.GetTotalVal() == 8) - v2 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 8) - for i in range(4): - v2[2 * i + 1] = 2 - v2[2 * i] = 1 - self.assertTrue(v2.GetTotalVal() == 12) + def test4DiscreteVectOps(self): + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 8) + for i in range(4): + v1[2 * i] = 2 + self.assertTrue(v1.GetTotalVal() == 8) + v2 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 8) + for i in range(4): + v2[2 * i + 1] = 2 + v2[2 * i] = 1 + self.assertTrue(v2.GetTotalVal() == 12) - v3 = v1 | v2 - self.assertTrue(len(v3) == len(v2)) - self.assertTrue(v3.GetTotalVal() == 16) + v3 = v1 | v2 + self.assertTrue(len(v3) == len(v2)) + self.assertTrue(v3.GetTotalVal() == 16) - v3 = v1 & v2 - self.assertTrue(len(v3) == len(v2)) - self.assertTrue(v3.GetTotalVal() == 4) + v3 = v1 & v2 + self.assertTrue(len(v3) == len(v2)) + self.assertTrue(v3.GetTotalVal() == 4) - v4 = v1 + v2 - self.assertTrue(len(v4) == len(v2)) - self.assertTrue(v4.GetTotalVal() == 20) + v4 = v1 + v2 + self.assertTrue(len(v4) == len(v2)) + self.assertTrue(v4.GetTotalVal() == 20) - v4 = v1 - v2 - self.assertTrue(v4.GetTotalVal() == 4) - v4 = v2 - v1 - self.assertTrue(v4.GetTotalVal() == 8) + v4 = v1 - v2 + self.assertTrue(v4.GetTotalVal() == 4) + v4 = v2 - v1 + self.assertTrue(v4.GetTotalVal() == 8) - v4 = v2 - v4 -= v1 - self.assertTrue(v4.GetTotalVal() == 8) - v4 -= v4 - self.assertTrue(v4.GetTotalVal() == 0) + v4 = v2 + v4 -= v1 + self.assertTrue(v4.GetTotalVal() == 8) + v4 -= v4 + self.assertTrue(v4.GetTotalVal() == 0) - def testIterator(self): - """ - connected to sf.net issue 1719831: - http://sourceforge.net/tracker/index.php?func=detail&aid=1719831&group_id=160139&atid=814650 - """ - v1 = ds.DiscreteValueVect(ds.DiscreteValueType.ONEBITVALUE, 30) - for i in range(15): - v1[2 * i] = 1 - l1 = list(v1) - self.assertTrue(len(l1) == len(v1)) - for i, v in enumerate(v1): - self.assertTrue(l1[i] == v) - self.assertRaises(IndexError, lambda: v1[40]) + def testIterator(self): + """ + connected to sf.net issue 1719831: + http://sourceforge.net/tracker/index.php?func=detail&aid=1719831&group_id=160139&atid=814650 + """ + v1 = ds.DiscreteValueVect(ds.DiscreteValueType.ONEBITVALUE, 30) + for i in range(15): + v1[2 * i] = 1 + l1 = list(v1) + self.assertTrue(len(l1) == len(v1)) + for i, v in enumerate(v1): + self.assertTrue(l1[i] == v) + self.assertRaises(IndexError, lambda: v1[40]) - def test9ToNumpy(self): - import numpy - bv = ds.DiscreteValueVect(ds.DiscreteValueType.FOURBITVALUE, 32) - bv[0] = 1 - bv[1] = 4 - bv[17] = 1 - bv[23] = 8 - bv[31] = 12 - arr = numpy.zeros((3, ), 'i') - ds.ConvertToNumpyArray(bv, arr) - for i in range(len(bv)): - self.assertEqual(bv[i], arr[i]) + def test9ToNumpy(self): + import numpy + bv = ds.DiscreteValueVect(ds.DiscreteValueType.FOURBITVALUE, 32) + bv[0] = 1 + bv[1] = 4 + bv[17] = 1 + bv[23] = 8 + bv[31] = 12 + arr = numpy.zeros((3, ), 'i') + ds.ConvertToNumpyArray(bv, arr) + for i in range(len(bv)): + self.assertEqual(bv[i], arr[i]) if __name__ == '__main__': - unittest.main() + unittest.main() diff --git a/Code/DataStructs/Wrap/testSparseIntVect.py b/Code/DataStructs/Wrap/testSparseIntVect.py index 147e4e6f7..5a2417291 100644 --- a/Code/DataStructs/Wrap/testSparseIntVect.py +++ b/Code/DataStructs/Wrap/testSparseIntVect.py @@ -7,7 +7,7 @@ import os, sys import io import unittest -from rdkit.six.moves import cPickle +import pickle from rdkit import RDConfig from rdkit import DataStructs as ds @@ -80,20 +80,20 @@ class TestCase(unittest.TestCase): v1[1 << 35] = 3 self.assertTrue(v1 == v1) - v2 = cPickle.loads(cPickle.dumps(v1)) + v2 = pickle.loads(pickle.dumps(v1)) self.assertTrue(v2 == v1) v3 = ds.LongSparseIntVect(v2.ToBinary()) self.assertTrue(v2 == v3) self.assertTrue(v1 == v3) - #cPickle.dump(v1,file('lsiv.pkl','wb+')) + #pickle.dump(v1,file('lsiv.pkl','wb+')) with open(os.path.join(RDConfig.RDBaseDir, 'Code/DataStructs/Wrap/testData/lsiv.pkl'), 'r') as tf: buf = tf.read().replace('\r\n', '\n').encode('utf-8') tf.close() with io.BytesIO(buf) as f: - v3 = cPickle.load(f) + v3 = pickle.load(f) self.assertTrue(v3 == v1) def test3Pickle2(self): @@ -108,20 +108,20 @@ class TestCase(unittest.TestCase): v1[1 << 12] = 3 self.assertTrue(v1 == v1) - v2 = cPickle.loads(cPickle.dumps(v1)) + v2 = pickle.loads(pickle.dumps(v1)) self.assertTrue(v2 == v1) v3 = ds.IntSparseIntVect(v2.ToBinary()) self.assertTrue(v2 == v3) self.assertTrue(v1 == v3) - #cPickle.dump(v1,file('isiv.pkl','wb+')) + #pickle.dump(v1,file('isiv.pkl','wb+')) with open(os.path.join(RDConfig.RDBaseDir, 'Code/DataStructs/Wrap/testData/isiv.pkl'), 'r') as tf: buf = tf.read().replace('\r\n', '\n').encode('utf-8') tf.close() with io.BytesIO(buf) as f: - v3 = cPickle.load(f) + v3 = pickle.load(f) self.assertTrue(v3 == v1) def test4Update(self): diff --git a/Code/Demos/RDKit/MPI/rdkpympi.py b/Code/Demos/RDKit/MPI/rdkpympi.py index 7a8237635..1f3d1108f 100644 --- a/Code/Demos/RDKit/MPI/rdkpympi.py +++ b/Code/Demos/RDKit/MPI/rdkpympi.py @@ -7,7 +7,7 @@ # # run this with : mpirun -n 4 python rdkpympi.py # -from __future__ import print_function + from boost import mpi from rdkit import Chem from rdkit.Chem import AllChem diff --git a/Code/Demos/boost/EBV_err/setup.py b/Code/Demos/boost/EBV_err/setup.py index a7b8cf62e..fbeb47c0a 100755 --- a/Code/Demos/boost/EBV_err/setup.py +++ b/Code/Demos/boost/EBV_err/setup.py @@ -1,6 +1,6 @@ # Run this with: # python setup.py install --install-lib=. -from __future__ import print_function + from distutils.core import setup, Extension import RDConfig diff --git a/Code/Demos/boost/EBV_err/test.py b/Code/Demos/boost/EBV_err/test.py index 0ac3bca5c..00371b241 100755 --- a/Code/Demos/boost/EBV_err/test.py +++ b/Code/Demos/boost/EBV_err/test.py @@ -1,4 +1,4 @@ -from __future__ import print_function + #from DataStructs import cDataStructs from DataStructs import cDataStructs import moduleA diff --git a/Code/Demos/boost/cross_mod_err/setup.py b/Code/Demos/boost/cross_mod_err/setup.py index 95ffb15ed..63cd6a8b7 100755 --- a/Code/Demos/boost/cross_mod_err/setup.py +++ b/Code/Demos/boost/cross_mod_err/setup.py @@ -1,6 +1,6 @@ # Run this with: # python setup.py install --install-lib=. -from __future__ import print_function + from distutils.core import setup, Extension import RDConfig diff --git a/Code/Demos/boost/cross_mod_err/test.py b/Code/Demos/boost/cross_mod_err/test.py index df2778059..7e5687583 100755 --- a/Code/Demos/boost/cross_mod_err/test.py +++ b/Code/Demos/boost/cross_mod_err/test.py @@ -1,4 +1,4 @@ -from __future__ import print_function + import moduleA import moduleB import moduleC diff --git a/Code/Demos/boost/cross_module/setup.py b/Code/Demos/boost/cross_module/setup.py index 47ca07efc..87aa83d07 100755 --- a/Code/Demos/boost/cross_module/setup.py +++ b/Code/Demos/boost/cross_module/setup.py @@ -1,6 +1,6 @@ # Run this with: # python setup.py install --install-lib=. -from __future__ import print_function + from distutils.core import setup, Extension import RDConfig diff --git a/Code/Demos/boost/cross_module/test.py b/Code/Demos/boost/cross_module/test.py index e80d7786f..0813b2d08 100755 --- a/Code/Demos/boost/cross_module/test.py +++ b/Code/Demos/boost/cross_module/test.py @@ -1,4 +1,4 @@ -from __future__ import print_function + import moda import modb diff --git a/Code/Demos/boost/numpy/setup.py b/Code/Demos/boost/numpy/setup.py index 6258f5b45..44c964f20 100755 --- a/Code/Demos/boost/numpy/setup.py +++ b/Code/Demos/boost/numpy/setup.py @@ -1,6 +1,6 @@ # Run this with: # python setup.py install --install-lib=. -from __future__ import print_function + from distutils.core import setup, Extension import RDConfig diff --git a/Code/Demos/boost/numpy/test.py b/Code/Demos/boost/numpy/test.py index d09880d62..bc3ed6379 100755 --- a/Code/Demos/boost/numpy/test.py +++ b/Code/Demos/boost/numpy/test.py @@ -1,4 +1,4 @@ -from __future__ import print_function + from Numeric import * import linalg diff --git a/Code/Demos/boost/overloads/setup.py b/Code/Demos/boost/overloads/setup.py index 0440bb552..a5734ff3a 100755 --- a/Code/Demos/boost/overloads/setup.py +++ b/Code/Demos/boost/overloads/setup.py @@ -1,6 +1,6 @@ # Run this with: # python setup.py install --install-lib=. -from __future__ import print_function + from distutils.core import setup, Extension import RDConfig diff --git a/Code/Demos/boost/python_objs/setup.py b/Code/Demos/boost/python_objs/setup.py index 65a3b51af..bd4de86c1 100755 --- a/Code/Demos/boost/python_objs/setup.py +++ b/Code/Demos/boost/python_objs/setup.py @@ -1,6 +1,6 @@ # Run this with: # python setup.py install --install-lib=. -from __future__ import print_function + from distutils.core import setup, Extension from rdkit import RDConfig diff --git a/Code/Geometry/Wrap/testGeometry.py b/Code/Geometry/Wrap/testGeometry.py index d303e0d5f..d368ab602 100644 --- a/Code/Geometry/Wrap/testGeometry.py +++ b/Code/Geometry/Wrap/testGeometry.py @@ -1,10 +1,10 @@ -from __future__ import print_function + import os, sys import unittest import copy import math -from rdkit.six.moves import cPickle +import pickle from rdkit import RDConfig from rdkit import DataStructs @@ -204,8 +204,8 @@ class TestCase(unittest.TestCase): pt.Normalize() self.assertTrue(feq(pt.Length(), 1.0)) - pkl = cPickle.dumps(pt) - pt2 = cPickle.loads(pkl) + pkl = pickle.dumps(pt) + pt2 = pickle.loads(pkl) self.assertTrue(len(pt) == len(pt2)) for i in range(len(pt)): self.assertTrue(feq(pt2[i], pt[i])) @@ -276,13 +276,13 @@ class TestCase(unittest.TestCase): def testPointPickles(self): pt = geom.Point3D(2.0, -3.0, 1.0) - pt2 = cPickle.loads(cPickle.dumps(pt)) + pt2 = pickle.loads(pickle.dumps(pt)) self.assertTrue(feq(pt.x, pt2.x, 1e-6)) self.assertTrue(feq(pt.y, pt2.y, 1e-6)) self.assertTrue(feq(pt.z, pt2.z, 1e-6)) pt = geom.Point2D(2.0, -4.0) - pt2 = cPickle.loads(cPickle.dumps(pt)) + pt2 = pickle.loads(pickle.dumps(pt)) self.assertTrue(feq(pt.x, pt2.x, 1e-6)) self.assertTrue(feq(pt.y, pt2.y, 1e-6)) @@ -298,7 +298,7 @@ class TestCase(unittest.TestCase): self.assertTrue(geom.TanimotoDistance(grd, grd) == 0.0) - grd2 = cPickle.loads(cPickle.dumps(grd)) + grd2 = pickle.loads(pickle.dumps(grd)) self.assertTrue(grd2.GetNumX() == 20) self.assertTrue(grd2.GetNumY() == 18) self.assertTrue(grd2.GetNumZ() == 16) diff --git a/Code/GraphMol/ChemReactions/Wrap/testEnumerations.py b/Code/GraphMol/ChemReactions/Wrap/testEnumerations.py index dcd15a48e..d6447a077 100644 --- a/Code/GraphMol/ChemReactions/Wrap/testEnumerations.py +++ b/Code/GraphMol/ChemReactions/Wrap/testEnumerations.py @@ -28,12 +28,12 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # -from __future__ import print_function + import unittest import os,sys, copy -from rdkit.six.moves import cPickle +import pickle from rdkit import rdBase from rdkit import Chem diff --git a/Code/GraphMol/ChemReactions/Wrap/testReactionWrapper.py b/Code/GraphMol/ChemReactions/Wrap/testReactionWrapper.py index 55c9f3229..13afaea7c 100644 --- a/Code/GraphMol/ChemReactions/Wrap/testReactionWrapper.py +++ b/Code/GraphMol/ChemReactions/Wrap/testReactionWrapper.py @@ -29,12 +29,14 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # -from __future__ import print_function -import unittest, doctest -import os, sys -from rdkit.six import exec_ -from rdkit.six.moves import cPickle + +import importlib.util +import unittest +import doctest +import os +import sys +import pickle from rdkit import rdBase from rdkit import Chem @@ -45,16 +47,17 @@ from rdkit.Chem.SimpleEnum import Enumerator def feq(v1, v2, tol2=1e-4): - return abs(v1 - v2) <= tol2 + return abs(v1 - v2) <= tol2 def ptEq(pt1, pt2, tol=1e-4): - return feq(pt1.x, pt2.x, tol) and feq(pt1.y, pt2.y, tol) and feq(pt1.z, pt2.z, tol) + return feq(pt1.x, pt2.x, tol) and feq(pt1.y, pt2.y, tol) and feq(pt1.z, pt2.z, tol) + # Boost functions are NOT found by doctest, this "fixes" them # by adding the doctests to a fake module -import imp -TestPreprocess = imp.new_module("TestPreprocess") +spec = importlib.util.spec_from_loader("TestPreprocess", loader=None) +TestPreprocess = importlib.util.module_from_spec(spec) code = """ from rdkit.Chem import rdChemReactions def PreprocessReaction(*a, **kw): @@ -62,115 +65,116 @@ def PreprocessReaction(*a, **kw): ''' return rdChemReactions.PreprocessReaction(*a, **kw) """ % "\n".join([x.lstrip() for x in rdChemReactions.PreprocessReaction.__doc__.split("\n")]) -exec_(code, TestPreprocess.__dict__) +exec(code, TestPreprocess.__dict__) def load_tests(loader, tests, ignore): - tests.addTests(doctest.DocTestSuite(Enumerator)) - tests.addTests(doctest.DocTestSuite(TestPreprocess)) - return tests + tests.addTests(doctest.DocTestSuite(Enumerator)) + tests.addTests(doctest.DocTestSuite(TestPreprocess)) + return tests class TestCase(unittest.TestCase): - def setUp(self): - self.dataDir = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'ChemReactions', 'testData') + def setUp(self): + self.dataDir = os.path.join(RDConfig.RDBaseDir, 'Code', + 'GraphMol', 'ChemReactions', 'testData') - def test1Basics(self): - rxna = rdChemReactions.ChemicalReaction() - # also tests empty copy constructor - for rxn in [rxna, rdChemReactions.ChemicalReaction(rxna)]: - self.assertTrue(rxn.GetNumReactantTemplates() == 0) - self.assertTrue(rxn.GetNumProductTemplates() == 0) + def test1Basics(self): + rxna = rdChemReactions.ChemicalReaction() + # also tests empty copy constructor + for rxn in [rxna, rdChemReactions.ChemicalReaction(rxna)]: + self.assertTrue(rxn.GetNumReactantTemplates() == 0) + self.assertTrue(rxn.GetNumProductTemplates() == 0) - r1 = Chem.MolFromSmarts('[C:1](=[O:2])O') - rxn.AddReactantTemplate(r1) - self.assertTrue(rxn.GetNumReactantTemplates() == 1) + r1 = Chem.MolFromSmarts('[C:1](=[O:2])O') + rxn.AddReactantTemplate(r1) + self.assertTrue(rxn.GetNumReactantTemplates() == 1) - r1 = Chem.MolFromSmarts('[N:3]') - rxn.AddReactantTemplate(r1) - self.assertTrue(rxn.GetNumReactantTemplates() == 2) + r1 = Chem.MolFromSmarts('[N:3]') + rxn.AddReactantTemplate(r1) + self.assertTrue(rxn.GetNumReactantTemplates() == 2) - r1 = Chem.MolFromSmarts('[C:1](=[O:2])[N:3]') - rxn.AddProductTemplate(r1) - self.assertTrue(rxn.GetNumProductTemplates() == 1) + r1 = Chem.MolFromSmarts('[C:1](=[O:2])[N:3]') + rxn.AddProductTemplate(r1) + self.assertTrue(rxn.GetNumProductTemplates() == 1) - reacts = (Chem.MolFromSmiles('C(=O)O'), Chem.MolFromSmiles('N')) - ps = rxn.RunReactants(reacts) - self.assertTrue(len(ps) == 1) - self.assertTrue(len(ps[0]) == 1) - self.assertTrue(ps[0][0].GetNumAtoms() == 3) + reacts = (Chem.MolFromSmiles('C(=O)O'), Chem.MolFromSmiles('N')) + ps = rxn.RunReactants(reacts) + self.assertTrue(len(ps) == 1) + self.assertTrue(len(ps[0]) == 1) + self.assertTrue(ps[0][0].GetNumAtoms() == 3) - ps = rxn.RunReactants(list(reacts)) - self.assertTrue(len(ps) == 1) - self.assertTrue(len(ps[0]) == 1) - self.assertTrue(ps[0][0].GetNumAtoms() == 3) + ps = rxn.RunReactants(list(reacts)) + self.assertTrue(len(ps) == 1) + self.assertTrue(len(ps[0]) == 1) + self.assertTrue(ps[0][0].GetNumAtoms() == 3) - def test2DaylightParser(self): - rxna = rdChemReactions.ReactionFromSmarts('[C:1](=[O:2])O.[N:3]>>[C:1](=[O:2])[N:3]') - for rxn in [rxna, rdChemReactions.ChemicalReaction(rxna)]: - self.assertTrue(rxn) - self.assertTrue(rxn.GetNumReactantTemplates() == 2) - self.assertTrue(rxn.GetNumProductTemplates() == 1) - self.assertTrue(rxn._getImplicitPropertiesFlag()) + def test2DaylightParser(self): + rxna = rdChemReactions.ReactionFromSmarts('[C:1](=[O:2])O.[N:3]>>[C:1](=[O:2])[N:3]') + for rxn in [rxna, rdChemReactions.ChemicalReaction(rxna)]: + self.assertTrue(rxn) + self.assertTrue(rxn.GetNumReactantTemplates() == 2) + self.assertTrue(rxn.GetNumProductTemplates() == 1) + self.assertTrue(rxn._getImplicitPropertiesFlag()) - reacts = (Chem.MolFromSmiles('C(=O)O'), Chem.MolFromSmiles('N')) - ps = rxn.RunReactants(reacts) - self.assertTrue(len(ps) == 1) - self.assertTrue(len(ps[0]) == 1) - self.assertTrue(ps[0][0].GetNumAtoms() == 3) + reacts = (Chem.MolFromSmiles('C(=O)O'), Chem.MolFromSmiles('N')) + ps = rxn.RunReactants(reacts) + self.assertTrue(len(ps) == 1) + self.assertTrue(len(ps[0]) == 1) + self.assertTrue(ps[0][0].GetNumAtoms() == 3) - reacts = (Chem.MolFromSmiles('CC(=O)OC'), Chem.MolFromSmiles('CN')) - ps = rxn.RunReactants(reacts) - self.assertTrue(len(ps) == 1) - self.assertTrue(len(ps[0]) == 1) - self.assertTrue(ps[0][0].GetNumAtoms() == 5) + reacts = (Chem.MolFromSmiles('CC(=O)OC'), Chem.MolFromSmiles('CN')) + ps = rxn.RunReactants(reacts) + self.assertTrue(len(ps) == 1) + self.assertTrue(len(ps[0]) == 1) + self.assertTrue(ps[0][0].GetNumAtoms() == 5) - def test3MDLParsers(self): - fileN = os.path.join(self.dataDir, 'AmideBond.rxn') - rxna = rdChemReactions.ReactionFromRxnFile(fileN) - print("*" * 44) - print(fileN) - print(rxna) - for rxn in [rxna, rdChemReactions.ChemicalReaction(rxna)]: - self.assertTrue(rxn) - self.assertFalse(rxn._getImplicitPropertiesFlag()) + def test3MDLParsers(self): + fileN = os.path.join(self.dataDir, 'AmideBond.rxn') + rxna = rdChemReactions.ReactionFromRxnFile(fileN) + print("*" * 44) + print(fileN) + print(rxna) + for rxn in [rxna, rdChemReactions.ChemicalReaction(rxna)]: + self.assertTrue(rxn) + self.assertFalse(rxn._getImplicitPropertiesFlag()) - self.assertTrue(rxn.GetNumReactantTemplates() == 2) - self.assertTrue(rxn.GetNumProductTemplates() == 1) + self.assertTrue(rxn.GetNumReactantTemplates() == 2) + self.assertTrue(rxn.GetNumProductTemplates() == 1) - reacts = (Chem.MolFromSmiles('C(=O)O'), Chem.MolFromSmiles('N')) - ps = rxn.RunReactants(reacts) - self.assertTrue(len(ps) == 1) - self.assertTrue(len(ps[0]) == 1) - self.assertTrue(ps[0][0].GetNumAtoms() == 3) + reacts = (Chem.MolFromSmiles('C(=O)O'), Chem.MolFromSmiles('N')) + ps = rxn.RunReactants(reacts) + self.assertTrue(len(ps) == 1) + self.assertTrue(len(ps[0]) == 1) + self.assertTrue(ps[0][0].GetNumAtoms() == 3) - with open(fileN, 'r') as rxnF: - rxnBlock = rxnF.read() - rxn = rdChemReactions.ReactionFromRxnBlock(rxnBlock) - self.assertTrue(rxn) + with open(fileN, 'r') as rxnF: + rxnBlock = rxnF.read() + rxn = rdChemReactions.ReactionFromRxnBlock(rxnBlock) + self.assertTrue(rxn) - self.assertTrue(rxn.GetNumReactantTemplates() == 2) - self.assertTrue(rxn.GetNumProductTemplates() == 1) + self.assertTrue(rxn.GetNumReactantTemplates() == 2) + self.assertTrue(rxn.GetNumProductTemplates() == 1) - reacts = (Chem.MolFromSmiles('C(=O)O'), Chem.MolFromSmiles('N')) - ps = rxn.RunReactants(reacts) - self.assertTrue(len(ps) == 1) - self.assertTrue(len(ps[0]) == 1) - self.assertTrue(ps[0][0].GetNumAtoms() == 3) + reacts = (Chem.MolFromSmiles('C(=O)O'), Chem.MolFromSmiles('N')) + ps = rxn.RunReactants(reacts) + self.assertTrue(len(ps) == 1) + self.assertTrue(len(ps[0]) == 1) + self.assertTrue(ps[0][0].GetNumAtoms() == 3) - def test4ErrorHandling(self): - self.assertRaises( - ValueError, - lambda x='[C:1](=[O:2])Q.[N:3]>>[C:1](=[O:2])[N:3]': rdChemReactions.ReactionFromSmarts(x)) - self.assertRaises( - ValueError, - lambda x='[C:1](=[O:2])O.[N:3]>>[C:1](=[O:2])[N:3]Q': rdChemReactions.ReactionFromSmarts(x)) - self.assertRaises( - ValueError, - lambda x='[C:1](=[O:2])O.[N:3]>>[C:1](=[O:2])[N:3]>>CC': rdChemReactions.ReactionFromSmarts(x)) + def test4ErrorHandling(self): + self.assertRaises( + ValueError, + lambda x='[C:1](=[O:2])Q.[N:3]>>[C:1](=[O:2])[N:3]': rdChemReactions.ReactionFromSmarts(x)) + self.assertRaises( + ValueError, + lambda x='[C:1](=[O:2])O.[N:3]>>[C:1](=[O:2])[N:3]Q': rdChemReactions.ReactionFromSmarts(x)) + self.assertRaises( + ValueError, + lambda x='[C:1](=[O:2])O.[N:3]>>[C:1](=[O:2])[N:3]>>CC': rdChemReactions.ReactionFromSmarts(x)) - block = """$RXN + block = """$RXN ISIS 082120061354 @@ -205,9 +209,9 @@ $MOL 2 3 2 0 0 0 0 M END """ - self.assertRaises(ValueError, lambda x=block: rdChemReactions.ReactionFromRxnBlock(x)) + self.assertRaises(ValueError, lambda x=block: rdChemReactions.ReactionFromRxnBlock(x)) - block = """$RXN + block = """$RXN ISIS 082120061354 @@ -242,9 +246,9 @@ $MOL 2 3 2 0 0 0 0 M END """ - #self.assertRaises(ValueError,lambda x=block:rdChemReactions.ReactionFromRxnBlock(x)) + #self.assertRaises(ValueError,lambda x=block:rdChemReactions.ReactionFromRxnBlock(x)) - block = """$RXN + block = """$RXN ISIS 082120061354 @@ -279,436 +283,439 @@ $MOL 2 3 2 0 0 0 0 M END """ - #self.assertRaises(ValueError,lambda x=block:rdChemReactions.ReactionFromRxnBlock(x)) + #self.assertRaises(ValueError,lambda x=block:rdChemReactions.ReactionFromRxnBlock(x)) - def test5Validation(self): - rxn = rdChemReactions.ReactionFromSmarts('[C:1](=[O:2])O.[N:3]>>[C:1](=[O:2])[N:3]') - self.assertTrue(rxn) - self.assertTrue(rxn.Validate() == (0, 0)) + def test5Validation(self): + rxn = rdChemReactions.ReactionFromSmarts('[C:1](=[O:2])O.[N:3]>>[C:1](=[O:2])[N:3]') + self.assertTrue(rxn) + self.assertTrue(rxn.Validate() == (0, 0)) - rxn = rdChemReactions.ReactionFromSmarts('[C:1](=[O:1])O.[N:3]>>[C:1](=[O:2])[N:3]') - self.assertTrue(rxn) - self.assertTrue(rxn.Validate() == (1, 1)) + rxn = rdChemReactions.ReactionFromSmarts('[C:1](=[O:1])O.[N:3]>>[C:1](=[O:2])[N:3]') + self.assertTrue(rxn) + self.assertTrue(rxn.Validate() == (1, 1)) - rxn = rdChemReactions.ReactionFromSmarts('[C:1](=[O:2])[O:4].[N:3]>>[C:1](=[O:2])[N:3]') - self.assertTrue(rxn) - self.assertTrue(rxn.Validate() == (1, 0)) + rxn = rdChemReactions.ReactionFromSmarts('[C:1](=[O:2])[O:4].[N:3]>>[C:1](=[O:2])[N:3]') + self.assertTrue(rxn) + self.assertTrue(rxn.Validate() == (1, 0)) - rxn = rdChemReactions.ReactionFromSmarts('[C:1](=[O:2])O.[N:3]>>[C:1](=[O:2])[N:3][C:5]') - self.assertTrue(rxn) - self.assertTrue(rxn.Validate() == (1, 0)) + rxn = rdChemReactions.ReactionFromSmarts('[C:1](=[O:2])O.[N:3]>>[C:1](=[O:2])[N:3][C:5]') + self.assertTrue(rxn) + self.assertTrue(rxn.Validate() == (1, 0)) - def test6Exceptions(self): - rxn = rdChemReactions.ReactionFromSmarts('[C:1]Cl>>[C:1]') - self.assertTrue(rxn) - self.assertRaises(ValueError, lambda x=rxn: x.RunReactants(())) - self.assertRaises( - ValueError, lambda x=rxn: x.RunReactants((Chem.MolFromSmiles('CC'), Chem.MolFromSmiles('C')))) - ps = rxn.RunReactants((Chem.MolFromSmiles('CCCl'), )) - self.assertTrue(len(ps) == 1) - self.assertTrue(len(ps[0]) == 1) + def test6Exceptions(self): + rxn = rdChemReactions.ReactionFromSmarts('[C:1]Cl>>[C:1]') + self.assertTrue(rxn) + self.assertRaises(ValueError, lambda x=rxn: x.RunReactants(())) + self.assertRaises( + ValueError, lambda x=rxn: x.RunReactants((Chem.MolFromSmiles('CC'), Chem.MolFromSmiles('C')))) + ps = rxn.RunReactants((Chem.MolFromSmiles('CCCl'), )) + self.assertTrue(len(ps) == 1) + self.assertTrue(len(ps[0]) == 1) - def _test7Leak(self): - rxn = rdChemReactions.ReactionFromSmarts('[C:1]Cl>>[C:1]') - self.assertTrue(rxn) - print('running: ') - for i in range(1e5): - ps = rxn.RunReactants((Chem.MolFromSmiles('CCCl'), )) - self.assertTrue(len(ps) == 1) - self.assertTrue(len(ps[0]) == 1) - if not i % 1000: - print(i) + def _test7Leak(self): + rxn = rdChemReactions.ReactionFromSmarts('[C:1]Cl>>[C:1]') + self.assertTrue(rxn) + print('running: ') + for i in range(1e5): + ps = rxn.RunReactants((Chem.MolFromSmiles('CCCl'), )) + self.assertTrue(len(ps) == 1) + self.assertTrue(len(ps[0]) == 1) + if not i % 1000: + print(i) - def test8Properties(self): - rxn = rdChemReactions.ReactionFromSmarts('[O:1]>>[O:1][3#0]') - self.assertTrue(rxn) - ps = rxn.RunReactants((Chem.MolFromSmiles('CO'), )) - self.assertTrue(len(ps) == 1) - self.assertTrue(len(ps[0]) == 1) - Chem.SanitizeMol(ps[0][0]) - self.assertEqual(ps[0][0].GetAtomWithIdx(1).GetIsotope(), 3) + def test8Properties(self): + rxn = rdChemReactions.ReactionFromSmarts('[O:1]>>[O:1][3#0]') + self.assertTrue(rxn) + ps = rxn.RunReactants((Chem.MolFromSmiles('CO'), )) + self.assertTrue(len(ps) == 1) + self.assertTrue(len(ps[0]) == 1) + Chem.SanitizeMol(ps[0][0]) + self.assertEqual(ps[0][0].GetAtomWithIdx(1).GetIsotope(), 3) - def test9AromaticityTransfer(self): - # this was issue 2664121 - mol = Chem.MolFromSmiles('c1ccc(C2C3(Cc4c(cccc4)C2)CCCC3)cc1') - rxn = rdChemReactions.ReactionFromSmarts( - '[A:1]1~[*:2]~[*:3]~[*:4]~[*:5]~[A:6]-;@1>>[*:1]~[*:2]~[*:3]~[*:4]~[*:5]~[*:6]') - products = rxn.RunReactants([mol]) - self.assertEqual(len(products), 6) - for p in products: - self.assertEqual(len(p), 1) - Chem.SanitizeMol(p[0]) + def test9AromaticityTransfer(self): + # this was issue 2664121 + mol = Chem.MolFromSmiles('c1ccc(C2C3(Cc4c(cccc4)C2)CCCC3)cc1') + rxn = rdChemReactions.ReactionFromSmarts( + '[A:1]1~[*:2]~[*:3]~[*:4]~[*:5]~[A:6]-;@1>>[*:1]~[*:2]~[*:3]~[*:4]~[*:5]~[*:6]') + products = rxn.RunReactants([mol]) + self.assertEqual(len(products), 6) + for p in products: + self.assertEqual(len(p), 1) + Chem.SanitizeMol(p[0]) - def test10DotSeparation(self): - # 08/05/14 - # This test is changed due to a new behavior of the smarts - # reaction parser which now allows using parenthesis in products - # as well. original smiles: '[C:1]1[O:2][N:3]1>>[C:1]1[O:2].[N:3]1' - rxn = rdChemReactions.ReactionFromSmarts('[C:1]1[O:2][N:3]1>>([C:1]1[O:2].[N:3]1)') - mol = Chem.MolFromSmiles('C1ON1') - products = rxn.RunReactants([mol]) - self.assertEqual(len(products), 1) - for p in products: - self.assertEqual(len(p), 1) - self.assertEqual(p[0].GetNumAtoms(), 3) - self.assertEqual(p[0].GetNumBonds(), 2) + def test10DotSeparation(self): + # 08/05/14 + # This test is changed due to a new behavior of the smarts + # reaction parser which now allows using parenthesis in products + # as well. original smiles: '[C:1]1[O:2][N:3]1>>[C:1]1[O:2].[N:3]1' + rxn = rdChemReactions.ReactionFromSmarts('[C:1]1[O:2][N:3]1>>([C:1]1[O:2].[N:3]1)') + mol = Chem.MolFromSmiles('C1ON1') + products = rxn.RunReactants([mol]) + self.assertEqual(len(products), 1) + for p in products: + self.assertEqual(len(p), 1) + self.assertEqual(p[0].GetNumAtoms(), 3) + self.assertEqual(p[0].GetNumBonds(), 2) - def test11ImplicitProperties(self): - rxn = rdChemReactions.ReactionFromSmarts('[C:1]O>>[C:1]') - mol = Chem.MolFromSmiles('CCO') - products = rxn.RunReactants([mol]) - self.assertEqual(len(products), 1) - for p in products: - self.assertEqual(len(p), 1) - self.assertEqual(Chem.MolToSmiles(p[0]), 'CC') - mol2 = Chem.MolFromSmiles('C[CH-]O') - products = rxn.RunReactants([mol2]) - self.assertEqual(len(products), 1) - for p in products: - self.assertEqual(len(p), 1) - self.assertEqual(Chem.MolToSmiles(p[0]), '[CH2-]C') + def test11ImplicitProperties(self): + rxn = rdChemReactions.ReactionFromSmarts('[C:1]O>>[C:1]') + mol = Chem.MolFromSmiles('CCO') + products = rxn.RunReactants([mol]) + self.assertEqual(len(products), 1) + for p in products: + self.assertEqual(len(p), 1) + self.assertEqual(Chem.MolToSmiles(p[0]), 'CC') + mol2 = Chem.MolFromSmiles('C[CH-]O') + products = rxn.RunReactants([mol2]) + self.assertEqual(len(products), 1) + for p in products: + self.assertEqual(len(p), 1) + self.assertEqual(Chem.MolToSmiles(p[0]), '[CH2-]C') - rxn._setImplicitPropertiesFlag(False) - products = rxn.RunReactants([mol]) - self.assertEqual(len(products), 1) - for p in products: - self.assertEqual(len(p), 1) - self.assertEqual(Chem.MolToSmiles(p[0]), 'CC') - products = rxn.RunReactants([mol2]) - self.assertEqual(len(products), 1) - for p in products: - self.assertEqual(len(p), 1) - self.assertEqual(Chem.MolToSmiles(p[0]), 'CC') + rxn._setImplicitPropertiesFlag(False) + products = rxn.RunReactants([mol]) + self.assertEqual(len(products), 1) + for p in products: + self.assertEqual(len(p), 1) + self.assertEqual(Chem.MolToSmiles(p[0]), 'CC') + products = rxn.RunReactants([mol2]) + self.assertEqual(len(products), 1) + for p in products: + self.assertEqual(len(p), 1) + self.assertEqual(Chem.MolToSmiles(p[0]), 'CC') - def test12Pickles(self): - # 08/05/14 - # This test is changed due to a new behavior of the smarts - # reaction parser which now allows using parenthesis in products - # as well. original smiles: '[C:1]1[O:2][N:3]1>>[C:1]1[O:2].[N:3]1' - rxn = rdChemReactions.ReactionFromSmarts('[C:1]1[O:2][N:3]1>>([C:1]1[O:2].[N:3]1)') - pkl = cPickle.dumps(rxn) - rxn = cPickle.loads(pkl) - mol = Chem.MolFromSmiles('C1ON1') - products = rxn.RunReactants([mol]) - self.assertEqual(len(products), 1) - for p in products: - self.assertEqual(len(p), 1) - self.assertEqual(p[0].GetNumAtoms(), 3) - self.assertEqual(p[0].GetNumBonds(), 2) + def test12Pickles(self): + # 08/05/14 + # This test is changed due to a new behavior of the smarts + # reaction parser which now allows using parenthesis in products + # as well. original smiles: '[C:1]1[O:2][N:3]1>>[C:1]1[O:2].[N:3]1' + rxn = rdChemReactions.ReactionFromSmarts('[C:1]1[O:2][N:3]1>>([C:1]1[O:2].[N:3]1)') + pkl = pickle.dumps(rxn) + rxn = pickle.loads(pkl) + mol = Chem.MolFromSmiles('C1ON1') + products = rxn.RunReactants([mol]) + self.assertEqual(len(products), 1) + for p in products: + self.assertEqual(len(p), 1) + self.assertEqual(p[0].GetNumAtoms(), 3) + self.assertEqual(p[0].GetNumBonds(), 2) - rxn = rdChemReactions.ChemicalReaction(rxn.ToBinary()) - products = rxn.RunReactants([mol]) - self.assertEqual(len(products), 1) - for p in products: - self.assertEqual(len(p), 1) - self.assertEqual(p[0].GetNumAtoms(), 3) - self.assertEqual(p[0].GetNumBonds(), 2) + rxn = rdChemReactions.ChemicalReaction(rxn.ToBinary()) + products = rxn.RunReactants([mol]) + self.assertEqual(len(products), 1) + for p in products: + self.assertEqual(len(p), 1) + self.assertEqual(p[0].GetNumAtoms(), 3) + self.assertEqual(p[0].GetNumBonds(), 2) - def test13GetTemplates(self): - rxn = rdChemReactions.ReactionFromSmarts('[C:1]1[O:2][N:3]1>>[C:1][O:2].[N:3]') - r1 = rxn.GetReactantTemplate(0) - sma = Chem.MolToSmarts(r1) - self.assertEqual(sma, '[C:1]1[O:2][N:3]1') - p1 = rxn.GetProductTemplate(0) - sma = Chem.MolToSmarts(p1) - self.assertEqual(sma, '[C:1][O:2]') + def test13GetTemplates(self): + rxn = rdChemReactions.ReactionFromSmarts('[C:1]1[O:2][N:3]1>>[C:1][O:2].[N:3]') + r1 = rxn.GetReactantTemplate(0) + sma = Chem.MolToSmarts(r1) + self.assertEqual(sma, '[C:1]1[O:2][N:3]1') + p1 = rxn.GetProductTemplate(0) + sma = Chem.MolToSmarts(p1) + self.assertEqual(sma, '[C:1][O:2]') - p2 = rxn.GetProductTemplate(1) - sma = Chem.MolToSmarts(p2) - self.assertEqual(sma, '[N:3]') + p2 = rxn.GetProductTemplate(1) + sma = Chem.MolToSmarts(p2) + self.assertEqual(sma, '[N:3]') - self.assertRaises(ValueError, lambda: rxn.GetProductTemplate(2)) - self.assertRaises(ValueError, lambda: rxn.GetReactantTemplate(1)) + self.assertRaises(ValueError, lambda: rxn.GetProductTemplate(2)) + self.assertRaises(ValueError, lambda: rxn.GetReactantTemplate(1)) - def test14Matchers(self): - rxn = rdChemReactions.ReactionFromSmarts( - '[C;!$(C(-O)-O):1](=[O:2])[O;H,-1].[N;!H0:3]>>[C:1](=[O:2])[N:3]') - self.assertTrue(rxn) - rxn.Initialize() - self.assertTrue(rxn.IsMoleculeReactant(Chem.MolFromSmiles('OC(=O)C'))) - self.assertFalse(rxn.IsMoleculeReactant(Chem.MolFromSmiles('OC(=O)O'))) - self.assertTrue(rxn.IsMoleculeReactant(Chem.MolFromSmiles('CNC'))) - self.assertFalse(rxn.IsMoleculeReactant(Chem.MolFromSmiles('CN(C)C'))) - self.assertTrue(rxn.IsMoleculeProduct(Chem.MolFromSmiles('NC(=O)C'))) - self.assertTrue(rxn.IsMoleculeProduct(Chem.MolFromSmiles('CNC(=O)C'))) - self.assertFalse(rxn.IsMoleculeProduct(Chem.MolFromSmiles('COC(=O)C'))) + def test14Matchers(self): + rxn = rdChemReactions.ReactionFromSmarts( + '[C;!$(C(-O)-O):1](=[O:2])[O;H,-1].[N;!H0:3]>>[C:1](=[O:2])[N:3]') + self.assertTrue(rxn) + rxn.Initialize() + self.assertTrue(rxn.IsMoleculeReactant(Chem.MolFromSmiles('OC(=O)C'))) + self.assertFalse(rxn.IsMoleculeReactant(Chem.MolFromSmiles('OC(=O)O'))) + self.assertTrue(rxn.IsMoleculeReactant(Chem.MolFromSmiles('CNC'))) + self.assertFalse(rxn.IsMoleculeReactant(Chem.MolFromSmiles('CN(C)C'))) + self.assertTrue(rxn.IsMoleculeProduct(Chem.MolFromSmiles('NC(=O)C'))) + self.assertTrue(rxn.IsMoleculeProduct(Chem.MolFromSmiles('CNC(=O)C'))) + self.assertFalse(rxn.IsMoleculeProduct(Chem.MolFromSmiles('COC(=O)C'))) - def test15Replacements(self): - rxn = rdChemReactions.ReactionFromSmarts( - '[{amine}:1]>>[*:1]-C', - replacements={'{amine}': '$([N;!H0;$(N-[#6]);!$(N-[!#6;!#1]);!$(N-C=[O,N,S])])'}) - self.assertTrue(rxn) - rxn.Initialize() - reactants = (Chem.MolFromSmiles('CCN'), ) - ps = rxn.RunReactants(reactants) - self.assertEqual(len(ps), 1) - self.assertEqual(len(ps[0]), 1) - self.assertEqual(ps[0][0].GetNumAtoms(), 4) + def test15Replacements(self): + rxn = rdChemReactions.ReactionFromSmarts( + '[{amine}:1]>>[*:1]-C', + replacements={'{amine}': '$([N;!H0;$(N-[#6]);!$(N-[!#6;!#1]);!$(N-C=[O,N,S])])'}) + self.assertTrue(rxn) + rxn.Initialize() + reactants = (Chem.MolFromSmiles('CCN'), ) + ps = rxn.RunReactants(reactants) + self.assertEqual(len(ps), 1) + self.assertEqual(len(ps[0]), 1) + self.assertEqual(ps[0][0].GetNumAtoms(), 4) - def test16GetReactingAtoms(self): - rxn = rdChemReactions.ReactionFromSmarts("[O:1][C:2].[N:3]>>[N:1][C:2].[N:3]") - self.assertTrue(rxn) - rxn.Initialize() - rAs = rxn.GetReactingAtoms() - self.assertEqual(len(rAs), 2) - self.assertEqual(len(rAs[0]), 1) - self.assertEqual(len(rAs[1]), 0) + def test16GetReactingAtoms(self): + rxn = rdChemReactions.ReactionFromSmarts("[O:1][C:2].[N:3]>>[N:1][C:2].[N:3]") + self.assertTrue(rxn) + rxn.Initialize() + rAs = rxn.GetReactingAtoms() + self.assertEqual(len(rAs), 2) + self.assertEqual(len(rAs[0]), 1) + self.assertEqual(len(rAs[1]), 0) - rxn = rdChemReactions.ReactionFromSmarts("[O:1]C>>[O:1]C") - self.assertTrue(rxn) - rxn.Initialize() - rAs = rxn.GetReactingAtoms() - self.assertEqual(len(rAs), 1) - self.assertEqual(len(rAs[0]), 2) - rAs = rxn.GetReactingAtoms(True) - self.assertEqual(len(rAs), 1) - self.assertEqual(len(rAs[0]), 1) + rxn = rdChemReactions.ReactionFromSmarts("[O:1]C>>[O:1]C") + self.assertTrue(rxn) + rxn.Initialize() + rAs = rxn.GetReactingAtoms() + self.assertEqual(len(rAs), 1) + self.assertEqual(len(rAs[0]), 2) + rAs = rxn.GetReactingAtoms(True) + self.assertEqual(len(rAs), 1) + self.assertEqual(len(rAs[0]), 1) - def test17AddRecursiveQueriesToReaction(self): - rxn = rdChemReactions.ReactionFromSmarts("[C:1][O:2].[N:3]>>[C:1][N:2]") - self.assertTrue(rxn) - rxn.Initialize() - qs = {'aliphatic': Chem.MolFromSmiles('CC')} - rxn.GetReactantTemplate(0).GetAtomWithIdx(0).SetProp('query', 'aliphatic') - rxn.AddRecursiveQueriesToReaction(qs, 'query') - q = rxn.GetReactantTemplate(0) - m = Chem.MolFromSmiles('CCOC') - self.assertTrue(m.HasSubstructMatch(q)) - m = Chem.MolFromSmiles('CO') - self.assertFalse(m.HasSubstructMatch(q)) + def test17AddRecursiveQueriesToReaction(self): + rxn = rdChemReactions.ReactionFromSmarts("[C:1][O:2].[N:3]>>[C:1][N:2]") + self.assertTrue(rxn) + rxn.Initialize() + qs = {'aliphatic': Chem.MolFromSmiles('CC')} + rxn.GetReactantTemplate(0).GetAtomWithIdx(0).SetProp('query', 'aliphatic') + rxn.AddRecursiveQueriesToReaction(qs, 'query') + q = rxn.GetReactantTemplate(0) + m = Chem.MolFromSmiles('CCOC') + self.assertTrue(m.HasSubstructMatch(q)) + m = Chem.MolFromSmiles('CO') + self.assertFalse(m.HasSubstructMatch(q)) - rxn = rdChemReactions.ReactionFromSmarts("[C:1][O:2].[N:3]>>[C:1][N:2]") - rxn.Initialize() - rxn.GetReactantTemplate(0).GetAtomWithIdx(0).SetProp('query', 'aliphatic') - labels = rxn.AddRecursiveQueriesToReaction(qs, 'query', getLabels=True) - self.assertTrue(len(labels), 1) + rxn = rdChemReactions.ReactionFromSmarts("[C:1][O:2].[N:3]>>[C:1][N:2]") + rxn.Initialize() + rxn.GetReactantTemplate(0).GetAtomWithIdx(0).SetProp('query', 'aliphatic') + labels = rxn.AddRecursiveQueriesToReaction(qs, 'query', getLabels=True) + self.assertTrue(len(labels), 1) - def test17bAddRecursiveQueriesToReaction(self): - from rdkit.Chem import FilterCatalog - rxn = rdChemReactions.ReactionFromSmarts("[C:1][O:2].[N:3]>>[C:1][N:2]") - self.assertTrue(rxn) - rxn.Initialize() - rxn.GetReactantTemplate(0).GetAtomWithIdx(0).SetProp('query', 'carboxylicacid') - querydefs = {k.lower(): v - for k, v in FilterCatalog.GetFlattenedFunctionalGroupHierarchy().items()} + def test17bAddRecursiveQueriesToReaction(self): + from rdkit.Chem import FilterCatalog + rxn = rdChemReactions.ReactionFromSmarts("[C:1][O:2].[N:3]>>[C:1][N:2]") + self.assertTrue(rxn) + rxn.Initialize() + rxn.GetReactantTemplate(0).GetAtomWithIdx(0).SetProp('query', 'carboxylicacid') + querydefs = {k.lower(): v + for k, v in FilterCatalog.GetFlattenedFunctionalGroupHierarchy().items()} - self.assertTrue('CarboxylicAcid' in FilterCatalog.GetFlattenedFunctionalGroupHierarchy()) - rxn.AddRecursiveQueriesToReaction(querydefs, 'query') - q = rxn.GetReactantTemplate(0) - m = Chem.MolFromSmiles('C(=O)[O-].N') - self.assertTrue(m.HasSubstructMatch(q)) - m = Chem.MolFromSmiles('C.N') - self.assertFalse(m.HasSubstructMatch(q)) + self.assertTrue('CarboxylicAcid' in FilterCatalog.GetFlattenedFunctionalGroupHierarchy()) + rxn.AddRecursiveQueriesToReaction(querydefs, 'query') + q = rxn.GetReactantTemplate(0) + m = Chem.MolFromSmiles('C(=O)[O-].N') + self.assertTrue(m.HasSubstructMatch(q)) + m = Chem.MolFromSmiles('C.N') + self.assertFalse(m.HasSubstructMatch(q)) - def test18GithubIssue16(self): - rxn = rdChemReactions.ReactionFromSmarts("[F:1]>>[Cl:1]") - self.assertTrue(rxn) - rxn.Initialize() - self.assertRaises(ValueError, lambda: rxn.RunReactants((None, ))) + def test18GithubIssue16(self): + rxn = rdChemReactions.ReactionFromSmarts("[F:1]>>[Cl:1]") + self.assertTrue(rxn) + rxn.Initialize() + self.assertRaises(ValueError, lambda: rxn.RunReactants((None, ))) - def test19RemoveUnmappedMoleculesToAgents(self): - rxn = rdChemReactions.ReactionFromSmarts( - "[C:1]=[O:2].[N:3].C(=O)O>[OH2].[Na].[Cl]>[N:3]~[C:1]=[O:2]") - self.failUnless(rxn) - rxn.Initialize() - self.failUnless(rxn.GetNumReactantTemplates() == 3) - self.failUnless(rxn.GetNumProductTemplates() == 1) - self.failUnless(rxn.GetNumAgentTemplates() == 3) + def test19RemoveUnmappedMoleculesToAgents(self): + rxn = rdChemReactions.ReactionFromSmarts( + "[C:1]=[O:2].[N:3].C(=O)O>[OH2].[Na].[Cl]>[N:3]~[C:1]=[O:2]") + self.failUnless(rxn) + rxn.Initialize() + self.failUnless(rxn.GetNumReactantTemplates() == 3) + self.failUnless(rxn.GetNumProductTemplates() == 1) + self.failUnless(rxn.GetNumAgentTemplates() == 3) - rxn.RemoveUnmappedReactantTemplates() - rxn.RemoveUnmappedProductTemplates() + rxn.RemoveUnmappedReactantTemplates() + rxn.RemoveUnmappedProductTemplates() - self.failUnless(rxn.GetNumReactantTemplates() == 2) - self.failUnless(rxn.GetNumProductTemplates() == 1) - self.failUnless(rxn.GetNumAgentTemplates() == 4) + self.failUnless(rxn.GetNumReactantTemplates() == 2) + self.failUnless(rxn.GetNumProductTemplates() == 1) + self.failUnless(rxn.GetNumAgentTemplates() == 4) - rxn = rdChemReactions.ReactionFromSmarts("[C:1]=[O:2].[N:3].C(=O)O>>[N:3]~[C:1]=[O:2].[OH2]") - self.failUnless(rxn) - rxn.Initialize() - self.failUnless(rxn.GetNumReactantTemplates() == 3) - self.failUnless(rxn.GetNumProductTemplates() == 2) - self.failUnless(rxn.GetNumAgentTemplates() == 0) + rxn = rdChemReactions.ReactionFromSmarts( + "[C:1]=[O:2].[N:3].C(=O)O>>[N:3]~[C:1]=[O:2].[OH2]") + self.failUnless(rxn) + rxn.Initialize() + self.failUnless(rxn.GetNumReactantTemplates() == 3) + self.failUnless(rxn.GetNumProductTemplates() == 2) + self.failUnless(rxn.GetNumAgentTemplates() == 0) - agentList = [] - rxn.RemoveUnmappedReactantTemplates(moveToAgentTemplates=False, targetList=agentList) - rxn.RemoveUnmappedProductTemplates(targetList=agentList) + agentList = [] + rxn.RemoveUnmappedReactantTemplates(moveToAgentTemplates=False, targetList=agentList) + rxn.RemoveUnmappedProductTemplates(targetList=agentList) - self.failUnless(rxn.GetNumReactantTemplates() == 2) - self.failUnless(rxn.GetNumProductTemplates() == 1) - self.failUnless(rxn.GetNumAgentTemplates() == 1) - self.failUnless(len(agentList) == 2) + self.failUnless(rxn.GetNumReactantTemplates() == 2) + self.failUnless(rxn.GetNumProductTemplates() == 1) + self.failUnless(rxn.GetNumAgentTemplates() == 1) + self.failUnless(len(agentList) == 2) - def test20CheckCopyConstructedReactionAtomProps(self): - RLABEL = "_MolFileRLabel" - amine_rxn = '$RXN\n\n ISIS 090220091541\n\n 2 1\n$MOL\n\n -ISIS- 09020915412D\n\n 3 2 0 0 0 0 0 0 0 0999 V2000\n -2.9083 -0.4708 0.0000 R# 0 0 0 0 0 0 0 0 0 1 0 0\n -2.3995 -0.1771 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n -2.4042 0.4125 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0 0 0 0\n 2 3 2 0 0 0 0\nV 2 aldehyde\nM RGP 1 1 1\nM END\n$MOL\n\n -ISIS- 09020915412D\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n 2.8375 -0.2500 0.0000 R# 0 0 0 0 0 0 0 0 0 3 0 0\n 3.3463 0.0438 0.0000 N 0 0 0 0 0 0 0 0 0 4 0 0\n 1 2 1 0 0 0 0\nV 2 amine\nM RGP 1 1 2\nM END\n$MOL\n\n -ISIS- 09020915412D\n\n 4 3 0 0 0 0 0 0 0 0999 V2000\n 13.3088 0.9436 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n 13.8206 1.2321 0.0000 R# 0 0 0 0 0 0 0 0 0 1 0 0\n 13.3028 0.3561 0.0000 N 0 0 0 0 0 0 0 0 0 4 0 0\n 12.7911 0.0676 0.0000 R# 0 0 0 0 0 0 0 0 0 3 0 0\n 1 3 1 0 0 0 0\n 1 2 1 0 0 0 0\n 3 4 1 0 0 0 0\nM RGP 2 2 1 4 2\nM END\n' - rxn = rdChemReactions.ReactionFromRxnBlock(amine_rxn) - res = [] - for atom in rxn.GetReactantTemplate(0).GetAtoms(): - if atom.HasProp(RLABEL): - res.append((atom.GetIdx(), atom.GetProp(RLABEL))) - rxn2 = rdChemReactions.ChemicalReaction(rxn) - res2 = [] + def test20CheckCopyConstructedReactionAtomProps(self): + RLABEL = "_MolFileRLabel" + amine_rxn = '$RXN\n\n ISIS 090220091541\n\n 2 1\n$MOL\n\n -ISIS- 09020915412D\n\n 3 2 0 0 0 0 0 0 0 0999 V2000\n -2.9083 -0.4708 0.0000 R# 0 0 0 0 0 0 0 0 0 1 0 0\n -2.3995 -0.1771 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n -2.4042 0.4125 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0 0 0 0\n 2 3 2 0 0 0 0\nV 2 aldehyde\nM RGP 1 1 1\nM END\n$MOL\n\n -ISIS- 09020915412D\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n 2.8375 -0.2500 0.0000 R# 0 0 0 0 0 0 0 0 0 3 0 0\n 3.3463 0.0438 0.0000 N 0 0 0 0 0 0 0 0 0 4 0 0\n 1 2 1 0 0 0 0\nV 2 amine\nM RGP 1 1 2\nM END\n$MOL\n\n -ISIS- 09020915412D\n\n 4 3 0 0 0 0 0 0 0 0999 V2000\n 13.3088 0.9436 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n 13.8206 1.2321 0.0000 R# 0 0 0 0 0 0 0 0 0 1 0 0\n 13.3028 0.3561 0.0000 N 0 0 0 0 0 0 0 0 0 4 0 0\n 12.7911 0.0676 0.0000 R# 0 0 0 0 0 0 0 0 0 3 0 0\n 1 3 1 0 0 0 0\n 1 2 1 0 0 0 0\n 3 4 1 0 0 0 0\nM RGP 2 2 1 4 2\nM END\n' + rxn = rdChemReactions.ReactionFromRxnBlock(amine_rxn) + res = [] + for atom in rxn.GetReactantTemplate(0).GetAtoms(): + if atom.HasProp(RLABEL): + res.append((atom.GetIdx(), atom.GetProp(RLABEL))) + rxn2 = rdChemReactions.ChemicalReaction(rxn) + res2 = [] - for atom in rxn2.GetReactantTemplate(0).GetAtoms(): - if atom.HasProp(RLABEL): - res2.append((atom.GetIdx(), atom.GetProp(RLABEL))) - self.assertEquals(res, res2) + for atom in rxn2.GetReactantTemplate(0).GetAtoms(): + if atom.HasProp(RLABEL): + res2.append((atom.GetIdx(), atom.GetProp(RLABEL))) + self.assertEquals(res, res2) - # currently ToBinary does not save atom props - # rxn2 = rdChemReactions.ChemicalReaction(rxn.ToBinary()) + # currently ToBinary does not save atom props + # rxn2 = rdChemReactions.ChemicalReaction(rxn.ToBinary()) - def test21CheckRawIters(self): - RLABEL = "_MolFileRLabel" - amine_rxn = '$RXN\n\n ISIS 090220091541\n\n 2 1\n$MOL\n\n -ISIS- 09020915412D\n\n 3 2 0 0 0 0 0 0 0 0999 V2000\n -2.9083 -0.4708 0.0000 R# 0 0 0 0 0 0 0 0 0 1 0 0\n -2.3995 -0.1771 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n -2.4042 0.4125 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0 0 0 0\n 2 3 2 0 0 0 0\nV 2 aldehyde\nM RGP 1 1 1\nM END\n$MOL\n\n -ISIS- 09020915412D\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n 2.8375 -0.2500 0.0000 R# 0 0 0 0 0 0 0 0 0 3 0 0\n 3.3463 0.0438 0.0000 N 0 0 0 0 0 0 0 0 0 4 0 0\n 1 2 1 0 0 0 0\nV 2 amine\nM RGP 1 1 2\nM END\n$MOL\n\n -ISIS- 09020915412D\n\n 4 3 0 0 0 0 0 0 0 0999 V2000\n 13.3088 0.9436 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n 13.8206 1.2321 0.0000 R# 0 0 0 0 0 0 0 0 0 1 0 0\n 13.3028 0.3561 0.0000 N 0 0 0 0 0 0 0 0 0 4 0 0\n 12.7911 0.0676 0.0000 R# 0 0 0 0 0 0 0 0 0 3 0 0\n 1 3 1 0 0 0 0\n 1 2 1 0 0 0 0\n 3 4 1 0 0 0 0\nM RGP 2 2 1 4 2\nM END\n' - rxn = rdChemReactions.ReactionFromRxnBlock(amine_rxn) - reactants = rxn.GetReactants() - self.assertEquals(len(reactants), rxn.GetNumReactantTemplates()) - products = rxn.GetProducts() - self.assertEquals(len(products), rxn.GetNumProductTemplates()) - agents = rxn.GetAgents() - self.assertEquals(len(agents), rxn.GetNumAgentTemplates()) + def test21CheckRawIters(self): + RLABEL = "_MolFileRLabel" + amine_rxn = '$RXN\n\n ISIS 090220091541\n\n 2 1\n$MOL\n\n -ISIS- 09020915412D\n\n 3 2 0 0 0 0 0 0 0 0999 V2000\n -2.9083 -0.4708 0.0000 R# 0 0 0 0 0 0 0 0 0 1 0 0\n -2.3995 -0.1771 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n -2.4042 0.4125 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0 0 0 0\n 2 3 2 0 0 0 0\nV 2 aldehyde\nM RGP 1 1 1\nM END\n$MOL\n\n -ISIS- 09020915412D\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n 2.8375 -0.2500 0.0000 R# 0 0 0 0 0 0 0 0 0 3 0 0\n 3.3463 0.0438 0.0000 N 0 0 0 0 0 0 0 0 0 4 0 0\n 1 2 1 0 0 0 0\nV 2 amine\nM RGP 1 1 2\nM END\n$MOL\n\n -ISIS- 09020915412D\n\n 4 3 0 0 0 0 0 0 0 0999 V2000\n 13.3088 0.9436 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0\n 13.8206 1.2321 0.0000 R# 0 0 0 0 0 0 0 0 0 1 0 0\n 13.3028 0.3561 0.0000 N 0 0 0 0 0 0 0 0 0 4 0 0\n 12.7911 0.0676 0.0000 R# 0 0 0 0 0 0 0 0 0 3 0 0\n 1 3 1 0 0 0 0\n 1 2 1 0 0 0 0\n 3 4 1 0 0 0 0\nM RGP 2 2 1 4 2\nM END\n' + rxn = rdChemReactions.ReactionFromRxnBlock(amine_rxn) + reactants = rxn.GetReactants() + self.assertEquals(len(reactants), rxn.GetNumReactantTemplates()) + products = rxn.GetProducts() + self.assertEquals(len(products), rxn.GetNumProductTemplates()) + agents = rxn.GetAgents() + self.assertEquals(len(agents), rxn.GetNumAgentTemplates()) - for i in range(rxn.GetNumReactantTemplates()): - p = rxn.GetReactantTemplate(i) - mb1 = Chem.MolToMolBlock(p) - mb2 = Chem.MolToMolBlock(reactants[i]) - self.assertEquals(mb1, mb2) + for i in range(rxn.GetNumReactantTemplates()): + p = rxn.GetReactantTemplate(i) + mb1 = Chem.MolToMolBlock(p) + mb2 = Chem.MolToMolBlock(reactants[i]) + self.assertEquals(mb1, mb2) - def test22RunSingleReactant(self): - # from - # A Collection of Robust Organic Synthesis Reactions for In Silico Molecule Design - # Markus Hartenfeller,*, Martin Eberle, Peter Meier, Cristina Nieto-Oberhuber, - # Karl-Heinz Altmann, Gisbert Schneider, Edgar Jacoby, and Steffen Renner - # Novartis Institutes for BioMedical Research, Novartis Pharma AG, Forum 1, - # Novartis Campus, CH-4056 Basel, Switzerland Swiss Federal Institute of Technology (ETH) - # Zurich, Switzerland - smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]" - rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea) - reagents = [Chem.MolFromSmiles(x) for x in ['C=CCN=C=S', 'NCc1ncc(Cl)cc1Br']] - res = rxn.RunReactants(reagents) - self.assertTrue(res) - expected_result = [Chem.MolToSmiles(Chem.MolFromSmiles("C=CCNC(N)=S"))] - expected_result.sort() - sidechains_expected_result = [Chem.MolToSmiles( - Chem.MolFromSmiles("[*:1]=S.[*:3]CC=C"), isomericSmiles=True)] - sidechains_nodummy_expected_result = [[0, [3, ], [1, ]], [3, [1, ], [2, ]]] - sidechains_nodummy = [] + def test22RunSingleReactant(self): + # from + # A Collection of Robust Organic Synthesis Reactions for In Silico Molecule Design + # Markus Hartenfeller,*, Martin Eberle, Peter Meier, Cristina Nieto-Oberhuber, + # Karl-Heinz Altmann, Gisbert Schneider, Edgar Jacoby, and Steffen Renner + # Novartis Institutes for BioMedical Research, Novartis Pharma AG, Forum 1, + # Novartis Campus, CH-4056 Basel, Switzerland Swiss Federal Institute of Technology (ETH) + # Zurich, Switzerland + smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]" + rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea) + reagents = [Chem.MolFromSmiles(x) for x in ['C=CCN=C=S', 'NCc1ncc(Cl)cc1Br']] + res = rxn.RunReactants(reagents) + self.assertTrue(res) + expected_result = [Chem.MolToSmiles(Chem.MolFromSmiles("C=CCNC(N)=S"))] + expected_result.sort() + sidechains_expected_result = [Chem.MolToSmiles( + Chem.MolFromSmiles("[*:1]=S.[*:3]CC=C"), isomericSmiles=True)] + sidechains_nodummy_expected_result = [[0, [3, ], [1, ]], [3, [1, ], [2, ]]] + sidechains_nodummy = [] - sidechains_expected_result.sort() + sidechains_expected_result.sort() - for addDummy in [True, False]: - res = rxn.RunReactant(reagents[0], 0) - assert res - result = [] - sidechains = [] - for match in res: - for mol in match: - result.append(Chem.MolToSmiles(mol, isomericSmiles=True)) - sidechain = rdChemReactions.ReduceProductToSideChains(mol, addDummy) - sidechains.append(Chem.MolToSmiles(sidechain, isomericSmiles=True)) - if not addDummy: - for atom in sidechain.GetAtoms(): - if atom.HasProp("_rgroupAtomMaps"): - sidechains_nodummy.append([atom.GetIdx(), - eval(atom.GetProp("_rgroupAtomMaps")), - eval(atom.GetProp("_rgroupBonds")), ]) - result.sort() - sidechains.sort() + for addDummy in [True, False]: + res = rxn.RunReactant(reagents[0], 0) + assert res + result = [] + sidechains = [] + for match in res: + for mol in match: + result.append(Chem.MolToSmiles(mol, isomericSmiles=True)) + sidechain = rdChemReactions.ReduceProductToSideChains(mol, addDummy) + sidechains.append(Chem.MolToSmiles(sidechain, isomericSmiles=True)) + if not addDummy: + for atom in sidechain.GetAtoms(): + if atom.HasProp("_rgroupAtomMaps"): + sidechains_nodummy.append([atom.GetIdx(), + eval(atom.GetProp("_rgroupAtomMaps")), + eval(atom.GetProp("_rgroupBonds")), ]) + result.sort() + sidechains.sort() - if addDummy: + if addDummy: + self.assertEquals(result, expected_result) + self.assertEquals(sidechains, sidechains_expected_result) + else: + self.assertEquals(sidechains_nodummy, sidechains_nodummy_expected_result) + + expected_result = [Chem.MolToSmiles(Chem.MolFromSmiles("NCNCc1ncc(Cl)cc1Br"))] + expected_result.sort() + sidechains_expected_result = [Chem.MolToSmiles( + Chem.MolFromSmiles("[*:2]Cc1ncc(Cl)cc1Br"), isomericSmiles=True)] + sidechains_expected_result.sort() + + res = rxn.RunReactant(reagents[1], 1) + result = [] + sidechains = [] + for match in res: + for mol in match: + result.append(Chem.MolToSmiles(mol, isomericSmiles=True)) + sidechains.append( + Chem.MolToSmiles(rdChemReactions.ReduceProductToSideChains(mol), isomericSmiles=True)) + + result.sort() self.assertEquals(result, expected_result) self.assertEquals(sidechains, sidechains_expected_result) - else: - self.assertEquals(sidechains_nodummy, sidechains_nodummy_expected_result) - expected_result = [Chem.MolToSmiles(Chem.MolFromSmiles("NCNCc1ncc(Cl)cc1Br"))] - expected_result.sort() - sidechains_expected_result = [Chem.MolToSmiles( - Chem.MolFromSmiles("[*:2]Cc1ncc(Cl)cc1Br"), isomericSmiles=True)] - sidechains_expected_result.sort() + self.assertFalse(rxn.RunReactant(reagents[0], 1)) + self.assertFalse(rxn.RunReactant(reagents[1], 0)) - res = rxn.RunReactant(reagents[1], 1) - result = [] - sidechains = [] - for match in res: - for mol in match: - result.append(Chem.MolToSmiles(mol, isomericSmiles=True)) - sidechains.append( - Chem.MolToSmiles(rdChemReactions.ReduceProductToSideChains(mol), isomericSmiles=True)) + # try a broken ring based side-chain + sidechains_expected_result = ['c1ccc2c(c1)nc1n2CC[*:2]1'] + reactant = Chem.MolFromSmiles('c1ccc2c(c1)nc1n2CCN1') + res = rxn.RunReactant(reactant, 1) + result = [] + sidechains = [] + for match in res: + for mol in match: + result.append(Chem.MolToSmiles(mol, isomericSmiles=True)) + sidechains.append( + Chem.MolToSmiles(rdChemReactions.ReduceProductToSideChains(mol), isomericSmiles=True)) + sidechain = rdChemReactions.ReduceProductToSideChains(mol, addDummyAtoms=False) - result.sort() - self.assertEquals(result, expected_result) - self.assertEquals(sidechains, sidechains_expected_result) + self.assertEquals(sidechains, sidechains_expected_result) - self.assertFalse(rxn.RunReactant(reagents[0], 1)) - self.assertFalse(rxn.RunReactant(reagents[1], 0)) + def test23CheckNonProduct(self): + smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]" + rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea) + mol = Chem.MolFromSmiles("CCCCCCCC") + m = rdChemReactions.ReduceProductToSideChains(mol) + self.assertTrue(m.GetNumAtoms() == 0) + mol = Chem.AddHs(mol) + m = rdChemReactions.ReduceProductToSideChains(mol) + self.assertTrue(m.GetNumAtoms() == 0) - # try a broken ring based side-chain - sidechains_expected_result = ['c1ccc2c(c1)nc1n2CC[*:2]1'] - reactant = Chem.MolFromSmiles('c1ccc2c(c1)nc1n2CCN1') - res = rxn.RunReactant(reactant, 1) - result = [] - sidechains = [] - for match in res: - for mol in match: - result.append(Chem.MolToSmiles(mol, isomericSmiles=True)) - sidechains.append( - Chem.MolToSmiles(rdChemReactions.ReduceProductToSideChains(mol), isomericSmiles=True)) - sidechain = rdChemReactions.ReduceProductToSideChains(mol, addDummyAtoms=False) + def testPreprocess(self): + testFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'SimpleEnum', + 'test_data', 'boronic1.rxn') + rxn = rdChemReactions.ReactionFromRxnFile(testFile) + rxn.Initialize() + res = rdChemReactions.PreprocessReaction(rxn) + self.assertEquals(res, (0, 0, 2, 1, (((0, 'halogen.bromine.aromatic'), ), ( + (1, 'boronicacid'), )))) - self.assertEquals(sidechains, sidechains_expected_result) + def testProperties(self): + smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]" + rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea) + self.assertFalse(rxn.HasProp("fooprop")) + rxn.SetProp("fooprop", "bar", computed=True) + rxn.SetIntProp("intprop", 3) + self.assertTrue(rxn.HasProp("fooprop")) + self.assertTrue(rxn.HasProp("intprop")) + self.assertEquals(rxn.GetIntProp("intprop"), 3) + nrxn = rdChemReactions.ChemicalReaction(rxn.ToBinary()) + self.assertFalse(nrxn.HasProp("fooprop")) + nrxn = rdChemReactions.ChemicalReaction(rxn.ToBinary(Chem.PropertyPickleOptions.AllProps)) + self.assertTrue(nrxn.HasProp("fooprop")) + nrxn.ClearComputedProps() + self.assertFalse(nrxn.HasProp("fooprop")) + self.assertTrue(nrxn.HasProp("intprop")) + self.assertEquals(nrxn.GetIntProp("intprop"), 3) - def test23CheckNonProduct(self): - smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]" - rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea) - mol = Chem.MolFromSmiles("CCCCCCCC") - m = rdChemReactions.ReduceProductToSideChains(mol) - self.assertTrue(m.GetNumAtoms() == 0) - mol = Chem.AddHs(mol) - m = rdChemReactions.ReduceProductToSideChains(mol) - self.assertTrue(m.GetNumAtoms() == 0) + def testRoundTripException(self): + smarts = '[C:1]([C@:3]1([OH:24])[CH2:8][CH2:7][C@H:6]2[C@H:9]3[C@H:19]([C@@H:20]([F:22])[CH2:21][C@:4]12[CH3:5])[C@:17]1([CH3:18])[C:12](=[CH:13][C:14](=[O:23])[CH2:15][CH2:16]1)[CH:11]=[CH:10]3)#[CH:2].C(Cl)CCl.ClC1C=CC=C(C(OO)=[O:37])C=1.C(O)(C)(C)C>C(OCC)(=O)C>[C:1]([C@:3]1([OH:24])[CH2:8][CH2:7][C@H:6]2[C@H:9]3[C@H:19]([C@@H:20]([F:22])[CH2:21][C@:4]12[CH3:5])[C@:17]1([CH3:18])[C:12](=[CH:13][C:14](=[O:23])[CH2:15][CH2:16]1)[C@H:11]1[O:37][C@@H:10]31)#[CH:2]' + rxn = rdChemReactions.ReactionFromSmarts(smarts) + # this shouldn't throw an exception + smarts = rdChemReactions.ReactionToSmarts(rxn) - def testPreprocess(self): - testFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'SimpleEnum', 'test_data', 'boronic1.rxn') - rxn = rdChemReactions.ReactionFromRxnFile(testFile) - rxn.Initialize() - res = rdChemReactions.PreprocessReaction(rxn) - self.assertEquals(res, (0, 0, 2, 1, (((0, 'halogen.bromine.aromatic'), ), ( - (1, 'boronicacid'), )))) + def testMaxProducts(self): + smarts = "[c:1]1[c:2][c:3][c:4][c:5][c:6]1>>[c:1]1[c:2][c:3][c:4][c:5][c:6]1" + rxn = rdChemReactions.ReactionFromSmarts(smarts) + m = Chem.MolFromSmiles("c1ccccc1") + prods = rxn.RunReactants([m]) + self.assertEqual(len(prods), 12) - def testProperties(self): - smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]" - rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea) - self.assertFalse(rxn.HasProp("fooprop")) - rxn.SetProp("fooprop","bar",computed=True) - rxn.SetIntProp("intprop",3) - self.assertTrue(rxn.HasProp("fooprop")) - self.assertTrue(rxn.HasProp("intprop")) - self.assertEquals(rxn.GetIntProp("intprop"),3) - nrxn = rdChemReactions.ChemicalReaction(rxn.ToBinary()) - self.assertFalse(nrxn.HasProp("fooprop")) - nrxn = rdChemReactions.ChemicalReaction(rxn.ToBinary(Chem.PropertyPickleOptions.AllProps)) - self.assertTrue(nrxn.HasProp("fooprop")) - nrxn.ClearComputedProps() - self.assertFalse(nrxn.HasProp("fooprop")) - self.assertTrue(nrxn.HasProp("intprop")) - self.assertEquals(nrxn.GetIntProp("intprop"),3) + prods = rxn.RunReactants([m], 1) + self.assertEqual(len(prods), 1) - def testRoundTripException(self): - smarts = '[C:1]([C@:3]1([OH:24])[CH2:8][CH2:7][C@H:6]2[C@H:9]3[C@H:19]([C@@H:20]([F:22])[CH2:21][C@:4]12[CH3:5])[C@:17]1([CH3:18])[C:12](=[CH:13][C:14](=[O:23])[CH2:15][CH2:16]1)[CH:11]=[CH:10]3)#[CH:2].C(Cl)CCl.ClC1C=CC=C(C(OO)=[O:37])C=1.C(O)(C)(C)C>C(OCC)(=O)C>[C:1]([C@:3]1([OH:24])[CH2:8][CH2:7][C@H:6]2[C@H:9]3[C@H:19]([C@@H:20]([F:22])[CH2:21][C@:4]12[CH3:5])[C@:17]1([CH3:18])[C:12](=[CH:13][C:14](=[O:23])[CH2:15][CH2:16]1)[C@H:11]1[O:37][C@@H:10]31)#[CH:2]' - rxn = rdChemReactions.ReactionFromSmarts(smarts) - # this shouldn't throw an exception - smarts = rdChemReactions.ReactionToSmarts(rxn) - def testMaxProducts(self): - smarts = "[c:1]1[c:2][c:3][c:4][c:5][c:6]1>>[c:1]1[c:2][c:3][c:4][c:5][c:6]1" - rxn = rdChemReactions.ReactionFromSmarts(smarts) - m = Chem.MolFromSmiles("c1ccccc1") - prods = rxn.RunReactants([m]) - self.assertEqual(len(prods), 12) - - prods = rxn.RunReactants([m],1) - self.assertEqual(len(prods), 1) - if __name__ == '__main__': - unittest.main(verbosity=True) + unittest.main(verbosity=True) diff --git a/Code/GraphMol/ChemReactions/Wrap/testSanitize.py b/Code/GraphMol/ChemReactions/Wrap/testSanitize.py index b37c27bac..917b05147 100644 --- a/Code/GraphMol/ChemReactions/Wrap/testSanitize.py +++ b/Code/GraphMol/ChemReactions/Wrap/testSanitize.py @@ -28,12 +28,12 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # -from __future__ import print_function + import unittest import os,sys -from rdkit.six.moves import cPickle +import pickle from rdkit import rdBase from rdkit import Chem diff --git a/Code/GraphMol/Depictor/Wrap/testDepictor.py b/Code/GraphMol/Depictor/Wrap/testDepictor.py index a545f1b3d..361e64e4b 100755 --- a/Code/GraphMol/Depictor/Wrap/testDepictor.py +++ b/Code/GraphMol/Depictor/Wrap/testDepictor.py @@ -1,12 +1,12 @@ -## Automatically adapted for numpy.oldnumeric Jun 27, 2008 by -c +# Automatically adapted for numpy.oldnumeric Jun 27, 2008 by -c # # $Id: testDepictor.py 2112 2012-07-02 09:47:45Z glandrum $ # -#pylint:disable=E1101,C0111,C0103,R0904 -from __future__ import division, print_function +# pylint:disable=E1101,C0111,C0103,R0904 import unittest -import os, sys +import os +import sys import numpy as np from rdkit import Chem @@ -17,224 +17,225 @@ from rdkit.Chem.ChemUtils import AlignDepict def feq(v1, v2, tol2=1e-4): - return abs(v1 - v2) <= tol2 + return abs(v1 - v2) <= tol2 def ptEq(pt1, pt2, tol=1e-4): - return feq(pt1.x, pt2.x, tol) and feq(pt1.y, pt2.y, tol) and feq(pt1.z, pt2.z, tol) + return feq(pt1.x, pt2.x, tol) and feq(pt1.y, pt2.y, tol) and feq(pt1.z, pt2.z, tol) def getDistMat(mol): - conf = mol.GetConformer() - nat = mol.GetNumAtoms() - nl = nat * (nat - 1) // 2 - res = np.zeros(nl, np.float) + conf = mol.GetConformer() + nat = mol.GetNumAtoms() + nl = nat * (nat - 1) // 2 + res = np.zeros(nl, np.float) - for i in range(1, nat): - pi = conf.GetAtomPosition(i) - idx = i * (i - 1) // 2 - for j in range(i): - pj = conf.GetAtomPosition(j) - pj -= pi - res[idx + j] = pj.Length() + for i in range(1, nat): + pi = conf.GetAtomPosition(i) + idx = i * (i - 1) // 2 + for j in range(i): + pj = conf.GetAtomPosition(j) + pj -= pi + res[idx + j] = pj.Length() - return res + return res def compareCoords(m, molFile): - mo = Chem.MolFromMolFile(molFile) - co = mo.GetConformer() + mo = Chem.MolFromMolFile(molFile) + co = mo.GetConformer() - ci = m.GetConformer() - nat = m.GetNumAtoms() - if (nat != mo.GetNumAtoms()): - return 0 + ci = m.GetConformer() + nat = m.GetNumAtoms() + if (nat != mo.GetNumAtoms()): + return 0 - for i in range(nat): - pos = ci.GetAtomPosition(i) - opos = co.GetAtomPosition(i) - if not ptEq(pos, opos): - return 0 - return 1 + for i in range(nat): + pos = ci.GetAtomPosition(i) + opos = co.GetAtomPosition(i) + if not ptEq(pos, opos): + return 0 + return 1 def compareWithOld(smilesFile, sdFile): - smiSup = Chem.SmilesMolSupplier(smilesFile, ",", 0, -1) - sdsup = Chem.SDMolSupplier(sdFile) - im = 0 - for mol in smiSup: - omol = sdsup[im] - rdDepictor.Compute2DCoords(mol, canonOrient=False) - conf = mol.GetConformer() - oconf = omol.GetConformer() - nat = mol.GetNumAtoms() - for i in range(nat): - pos = conf.GetAtomPosition(i) - opos = oconf.GetAtomPosition(i) - if not ptEq(pos, opos): - print(Chem.MolToMolBlock(omol), file=sys.stderr) - print('> \n%d\n' % i, file=sys.stderr) - print("$$$$", file=sys.stderr) - print(Chem.MolToMolBlock(mol), file=sys.stderr) - print('> \n%d\n' % i, file=sys.stderr) - print("$$$$", file=sys.stderr) - return 0 - im += 1 - return 1 + smiSup = Chem.SmilesMolSupplier(smilesFile, ",", 0, -1) + sdsup = Chem.SDMolSupplier(sdFile) + im = 0 + for mol in smiSup: + omol = sdsup[im] + rdDepictor.Compute2DCoords(mol, canonOrient=False) + conf = mol.GetConformer() + oconf = omol.GetConformer() + nat = mol.GetNumAtoms() + for i in range(nat): + pos = conf.GetAtomPosition(i) + opos = oconf.GetAtomPosition(i) + if not ptEq(pos, opos): + print(Chem.MolToMolBlock(omol), file=sys.stderr) + print('> \n%d\n' % i, file=sys.stderr) + print("$$$$", file=sys.stderr) + print(Chem.MolToMolBlock(mol), file=sys.stderr) + print('> \n%d\n' % i, file=sys.stderr) + print("$$$$", file=sys.stderr) + return 0 + im += 1 + return 1 def stereoCompare(smilesFile): - smiSup = Chem.SmilesMolSupplier(smilesFile, ",", 0, -1) - for mol in smiSup: - rdDepictor.Compute2DCoords(mol, canonOrient=False) - mb = Chem.MolToMolBlock(mol) - nmol = Chem.MolFromMolBlock(mb) - matches = nmol.GetSubstructMatches(mol, False) - dbnds = [x for x in mol.GetBonds() if (x.GetBondType() == Chem.BondType.DOUBLE and \ - x.GetStereo() > Chem.BondStereo.STEREOANY) ] - ok = True - for match in matches: - for bnd in dbnds: - obnd = nmol.GetBondBetweenAtoms(match[bnd.GetBeginAtomIdx()], match[bnd.GetEndAtomIdx()]) - assert (obnd.GetBondType() == Chem.BondType.DOUBLE) - if ok: - break - if not ok: - print(Chem.MolToMolBlock(mol), file=sys.stderr) - print("$$$$", file=sys.stderr) - return 0 - return 1 + smiSup = Chem.SmilesMolSupplier(smilesFile, ",", 0, -1) + for mol in smiSup: + rdDepictor.Compute2DCoords(mol, canonOrient=False) + mb = Chem.MolToMolBlock(mol) + nmol = Chem.MolFromMolBlock(mb) + matches = nmol.GetSubstructMatches(mol, False) + dbnds = [x for x in mol.GetBonds() if (x.GetBondType() == Chem.BondType.DOUBLE and + x.GetStereo() > Chem.BondStereo.STEREOANY) ] + ok = True + for match in matches: + for bnd in dbnds: + obnd = nmol.GetBondBetweenAtoms( + match[bnd.GetBeginAtomIdx()], match[bnd.GetEndAtomIdx()]) + assert (obnd.GetBondType() == Chem.BondType.DOUBLE) + if ok: + break + if not ok: + print(Chem.MolToMolBlock(mol), file=sys.stderr) + print("$$$$", file=sys.stderr) + return 0 + return 1 class TestCase(unittest.TestCase): - def _test0First200(self): - # this test is disabled because it's not particularly useful and - # causes problems every time anything changes. - fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'Depictor', 'test_data', - 'first_200.tpsa.csv') - #smiSup = Chem.SmilesMolSupplier(fileN, ",", 0, -1) + def _test0First200(self): + # this test is disabled because it's not particularly useful and + # causes problems every time anything changes. + fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'Depictor', 'test_data', + 'first_200.tpsa.csv') + #smiSup = Chem.SmilesMolSupplier(fileN, ",", 0, -1) - ofile = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'Depictor', 'test_data', - 'first_200.python.sdf') - self.assertTrue(compareWithOld(fileN, ofile)) + ofile = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'Depictor', 'test_data', + 'first_200.python.sdf') + self.assertTrue(compareWithOld(fileN, ofile)) - def test1CisTrans(self): - fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'Depictor', 'test_data', - "cis_trans_cases.csv") - self.assertTrue(stereoCompare(fileN)) + def test1CisTrans(self): + fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'Depictor', 'test_data', + "cis_trans_cases.csv") + self.assertTrue(stereoCompare(fileN)) - def test2Coords(self): - m1 = Chem.MolFromSmiles('C1CCC1CC') - coordMap = {0: Geometry.Point2D(0, 0), - 1: Geometry.Point2D(1.5, 0), - 2: Geometry.Point2D(1.5, 1.5), - 3: Geometry.Point2D(0, 1.5)} - rdDepictor.Compute2DCoords(m1, coordMap=coordMap) - conf = m1.GetConformer(0) - for i in range(4): - self.assertTrue( - ptEq(conf.GetAtomPosition(i), Geometry.Point3D(coordMap[i].x, coordMap[i].y, 0.0))) + def test2Coords(self): + m1 = Chem.MolFromSmiles('C1CCC1CC') + coordMap = {0: Geometry.Point2D(0, 0), + 1: Geometry.Point2D(1.5, 0), + 2: Geometry.Point2D(1.5, 1.5), + 3: Geometry.Point2D(0, 1.5)} + rdDepictor.Compute2DCoords(m1, coordMap=coordMap) + conf = m1.GetConformer(0) + for i in range(4): + self.assertTrue( + ptEq(conf.GetAtomPosition(i), Geometry.Point3D(coordMap[i].x, coordMap[i].y, 0.0))) - m1 = Chem.MolFromSmiles('CCC') - try: - rdDepictor.Compute2DCoords(m1, coordMap=coordMap) - ok = 0 - except ValueError: - ok = 1 - self.assertTrue(ok) + m1 = Chem.MolFromSmiles('CCC') + try: + rdDepictor.Compute2DCoords(m1, coordMap=coordMap) + ok = 0 + except ValueError: + ok = 1 + self.assertTrue(ok) - def test3IssueSF1526844(self): - t = Chem.MolFromSmiles('c1nc(N)ccc1') - rdDepictor.Compute2DCoords(t, canonOrient=False) + def test3IssueSF1526844(self): + t = Chem.MolFromSmiles('c1nc(N)ccc1') + rdDepictor.Compute2DCoords(t, canonOrient=False) - m2 = Chem.MolFromSmiles('c1nc(NC=O)ccc1') - AlignDepict.AlignDepict(m2, t) - expected = [Geometry.Point3D(1.5, 0.0, 0.0), Geometry.Point3D(0.75, -1.299, 0.0), - Geometry.Point3D(-0.75, -1.299, 0.0), Geometry.Point3D(-1.5, -2.5981, 0.0), - Geometry.Point3D(-3.0, -2.5981, 0.0), Geometry.Point3D(-3.75, -3.8971, 0.0), - Geometry.Point3D(-1.5, 0.0, 0.0), Geometry.Point3D(-0.75, 1.2990, 0.0), - Geometry.Point3D(0.75, 1.2990, 0.0)] + m2 = Chem.MolFromSmiles('c1nc(NC=O)ccc1') + AlignDepict.AlignDepict(m2, t) + expected = [Geometry.Point3D(1.5, 0.0, 0.0), Geometry.Point3D(0.75, -1.299, 0.0), + Geometry.Point3D(-0.75, -1.299, 0.0), Geometry.Point3D(-1.5, -2.5981, 0.0), + Geometry.Point3D(-3.0, -2.5981, 0.0), Geometry.Point3D(-3.75, -3.8971, 0.0), + Geometry.Point3D(-1.5, 0.0, 0.0), Geometry.Point3D(-0.75, 1.2990, 0.0), + Geometry.Point3D(0.75, 1.2990, 0.0)] - nat = m2.GetNumAtoms() - conf = m2.GetConformer() - for i in range(nat): - pos = conf.GetAtomPosition(i) - self.assertTrue(ptEq(pos, expected[i], 0.001)) + nat = m2.GetNumAtoms() + conf = m2.GetConformer() + for i in range(nat): + pos = conf.GetAtomPosition(i) + self.assertTrue(ptEq(pos, expected[i], 0.001)) - def test4SamplingSpread(self): - mol = Chem.MolFromMolFile( - os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', 'test_data/7UPJ_xtal.mol')) + def test4SamplingSpread(self): + mol = Chem.MolFromMolFile( + os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', 'test_data/7UPJ_xtal.mol')) - # default mode - rdDepictor.Compute2DCoords(mol, canonOrient=False) - self.assertTrue( - compareCoords(mol, os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', - 'test_data/7UPJ_default.mol'))) + # default mode + rdDepictor.Compute2DCoords(mol, canonOrient=False) + self.assertTrue( + compareCoords(mol, os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', + 'test_data/7UPJ_default.mol'))) - # spread the structure as much as possible by sampling - rdDepictor.Compute2DCoords(mol, canonOrient=False, nFlipsPerSample=3, nSample=100, - sampleSeed=100, permuteDeg4Nodes=1) - self.assertTrue( - compareCoords(mol, os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', - 'test_data/7UPJ_spread.mol'))) + # spread the structure as much as possible by sampling + rdDepictor.Compute2DCoords(mol, canonOrient=False, nFlipsPerSample=3, nSample=100, + sampleSeed=100, permuteDeg4Nodes=1) + self.assertTrue( + compareCoords(mol, os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', + 'test_data/7UPJ_spread.mol'))) - def test5SamplingMimic3D(self): - mol = Chem.MolFromMolFile( - os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', 'test_data/7UPJ_xtal.mol')) - dmat3D = getDistMat(mol) + def test5SamplingMimic3D(self): + mol = Chem.MolFromMolFile( + os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', 'test_data/7UPJ_xtal.mol')) + dmat3D = getDistMat(mol) - # now mimic the coordinate with a very small weight - rdDepictor.Compute2DCoordsMimicDistmat(mol, dmat3D, weightDistMat=0.001) - self.assertTrue( - compareCoords(mol, os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', - 'test_data/7UPJ_mimic3D_1.mol'))) + # now mimic the coordinate with a very small weight + rdDepictor.Compute2DCoordsMimicDistmat(mol, dmat3D, weightDistMat=0.001) + self.assertTrue( + compareCoords(mol, os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', + 'test_data/7UPJ_mimic3D_1.mol'))) - # now mimic the coordinate with a very small weight - rdDepictor.Compute2DCoordsMimicDistmat(mol, dmat3D, weightDistMat=0.003) - self.assertTrue( - compareCoords(mol, os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', - 'test_data/7UPJ_mimic3D_2.mol'))) + # now mimic the coordinate with a very small weight + rdDepictor.Compute2DCoordsMimicDistmat(mol, dmat3D, weightDistMat=0.003) + self.assertTrue( + compareCoords(mol, os.path.join(RDConfig.RDBaseDir, 'Code/GraphMol/Depictor', + 'test_data/7UPJ_mimic3D_2.mol'))) - #mb = Chem.MolToMolBlock(mol) - #ofile = open('../test_data/7UPJ_mimic3D_2.mol', 'w') - #ofile.write(mb) - #ofile.close() + #mb = Chem.MolToMolBlock(mol) + #ofile = open('../test_data/7UPJ_mimic3D_2.mol', 'w') + # ofile.write(mb) + # ofile.close() - def test6ChangeBondLength(self): - m = Chem.MolFromSmiles('CC') - rdDepictor.Compute2DCoords(m) - conf = m.GetConformer() - self.assertAlmostEqual(conf.GetAtomPosition(0).x, -0.750, 3) - self.assertAlmostEqual(conf.GetAtomPosition(1).x, 0.750, 3) - rdDepictor.Compute2DCoords(m, bondLength=1.0) - conf = m.GetConformer() - self.assertAlmostEqual(conf.GetAtomPosition(0).x, -0.500, 3) - self.assertAlmostEqual(conf.GetAtomPosition(1).x, 0.500, 3) - rdDepictor.Compute2DCoords(m) - conf = m.GetConformer() - self.assertAlmostEqual(conf.GetAtomPosition(0).x, -0.750, 3) - self.assertAlmostEqual(conf.GetAtomPosition(1).x, 0.750, 3) + def test6ChangeBondLength(self): + m = Chem.MolFromSmiles('CC') + rdDepictor.Compute2DCoords(m) + conf = m.GetConformer() + self.assertAlmostEqual(conf.GetAtomPosition(0).x, -0.750, 3) + self.assertAlmostEqual(conf.GetAtomPosition(1).x, 0.750, 3) + rdDepictor.Compute2DCoords(m, bondLength=1.0) + conf = m.GetConformer() + self.assertAlmostEqual(conf.GetAtomPosition(0).x, -0.500, 3) + self.assertAlmostEqual(conf.GetAtomPosition(1).x, 0.500, 3) + rdDepictor.Compute2DCoords(m) + conf = m.GetConformer() + self.assertAlmostEqual(conf.GetAtomPosition(0).x, -0.750, 3) + self.assertAlmostEqual(conf.GetAtomPosition(1).x, 0.750, 3) - def testConstrainedCoords(self) : - templ = Chem.MolFromSmiles( 'c1nccc2n1ccc2' ) - rdDepictor.Compute2DCoords( templ ) - m1 = Chem.MolFromSmiles( 'c1cccc2ncn3cccc3c21' ) - rdDepictor.GenerateDepictionMatching2DStructure(m1,templ) - m2 = Chem.MolFromSmiles( 'c1cc(Cl)cc2ncn3cccc3c21' ) - rdDepictor.Compute2DCoords(m2) - refPatt1 = Chem.MolFromSmarts( '*1****2*1***2' ) - rdDepictor.GenerateDepictionMatching2DStructure( m2 , templ , -1 , refPatt1 ) - fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'Depictor', 'test_data', - '1XP0_ligand.sdf') + def testConstrainedCoords(self): + templ = Chem.MolFromSmiles('c1nccc2n1ccc2') + rdDepictor.Compute2DCoords(templ) + m1 = Chem.MolFromSmiles('c1cccc2ncn3cccc3c21') + rdDepictor.GenerateDepictionMatching2DStructure(m1, templ) + m2 = Chem.MolFromSmiles('c1cc(Cl)cc2ncn3cccc3c21') + rdDepictor.Compute2DCoords(m2) + refPatt1 = Chem.MolFromSmarts('*1****2*1***2') + rdDepictor.GenerateDepictionMatching2DStructure(m2, templ, -1, refPatt1) + fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'Depictor', 'test_data', + '1XP0_ligand.sdf') - xp0_lig = Chem.MolFromMolFile( fileN ) - xp0_lig_2d = Chem.Mol( xp0_lig ) - rdDepictor.GenerateDepictionMatching3DStructure( xp0_lig_2d , xp0_lig ) - xp0_ref = Chem.MolFromSmarts( '[#6]1~[#7][#6]~[#6]2[#6](=[#8])[#7]~[#6](c3ccccc3)[#7][#7]12' ) - rdDepictor.GenerateDepictionMatching3DStructure( xp0_lig_2d , xp0_lig , -1 , xp0_ref ) + xp0_lig = Chem.MolFromMolFile(fileN) + xp0_lig_2d = Chem.Mol(xp0_lig) + rdDepictor.GenerateDepictionMatching3DStructure(xp0_lig_2d, xp0_lig) + xp0_ref = Chem.MolFromSmarts('[#6]1~[#7][#6]~[#6]2[#6](=[#8])[#7]~[#6](c3ccccc3)[#7][#7]12') + rdDepictor.GenerateDepictionMatching3DStructure(xp0_lig_2d, xp0_lig, -1, xp0_ref) if __name__ == '__main__': - rdDepictor.SetPreferCoordGen(False); - unittest.main() + rdDepictor.SetPreferCoordGen(False) + unittest.main() diff --git a/Code/GraphMol/Descriptors/Wrap/testMolDescriptors.py b/Code/GraphMol/Descriptors/Wrap/testMolDescriptors.py index 569bce407..eecc1cfc4 100644 --- a/Code/GraphMol/Descriptors/Wrap/testMolDescriptors.py +++ b/Code/GraphMol/Descriptors/Wrap/testMolDescriptors.py @@ -1,6 +1,6 @@ # $Id$ # -from __future__ import print_function + from rdkit import Chem from rdkit.Chem import rdMolDescriptors as rdMD, Descriptors from rdkit.Chem import AllChem diff --git a/Code/GraphMol/DistGeomHelpers/Wrap/testDistGeom.py b/Code/GraphMol/DistGeomHelpers/Wrap/testDistGeom.py index b5ca37fc5..886c99ed3 100644 --- a/Code/GraphMol/DistGeomHelpers/Wrap/testDistGeom.py +++ b/Code/GraphMol/DistGeomHelpers/Wrap/testDistGeom.py @@ -1,11 +1,11 @@ -from __future__ import print_function + import unittest import os, copy import math import numpy -from rdkit.six.moves import cPickle as pickle -from rdkit.six import next +import pickle + from rdkit import Chem from rdkit.Chem import AllChem from rdkit.Chem import rdDistGeom, ChemicalForceFields, rdMolAlign diff --git a/Code/GraphMol/FileParsers/mol.py b/Code/GraphMol/FileParsers/mol.py index 51b036c47..1178ae526 100755 --- a/Code/GraphMol/FileParsers/mol.py +++ b/Code/GraphMol/FileParsers/mol.py @@ -1,4 +1,4 @@ -from __future__ import print_function + from Chem import rdmol from Chem.rdmol import Atom, Bond, Mol diff --git a/Code/GraphMol/FilterCatalog/Wrap/rough_test.py b/Code/GraphMol/FilterCatalog/Wrap/rough_test.py index 7f1241e2b..6afed68c6 100644 --- a/Code/GraphMol/FilterCatalog/Wrap/rough_test.py +++ b/Code/GraphMol/FilterCatalog/Wrap/rough_test.py @@ -34,7 +34,7 @@ it is intended to be shallow but broad. """ -from __future__ import print_function + import doctest, unittest, os import pickle from rdkit import RDConfig diff --git a/Code/GraphMol/FilterCatalog/update_pains.py b/Code/GraphMol/FilterCatalog/update_pains.py index 681b38c60..c5a9e258e 100644 --- a/Code/GraphMol/FilterCatalog/update_pains.py +++ b/Code/GraphMol/FilterCatalog/update_pains.py @@ -1,5 +1,5 @@ # must be run from this directory -from __future__ import print_function + import csv, os, sys py3 = sys.version_info[0] == 3 diff --git a/Code/GraphMol/FragCatalog/Wrap/rough_test.py b/Code/GraphMol/FragCatalog/Wrap/rough_test.py index a7ba2c859..5b3bfc10b 100755 --- a/Code/GraphMol/FragCatalog/Wrap/rough_test.py +++ b/Code/GraphMol/FragCatalog/Wrap/rough_test.py @@ -9,7 +9,7 @@ it's intended to be shallow, but broad """ import unittest, os -from rdkit.six.moves import cPickle +import pickle from rdkit import RDConfig from rdkit.RDLogger import logger logger = logger() @@ -105,8 +105,8 @@ class TestCase(unittest.TestCase): smiles.append(Chem.MolToSmiles(mol)) self.assertEqual(fcat.GetNumEntries(), 21) self.assertEqual(fcat.GetFPLength(), 21) - pkl = cPickle.dumps(fcat) - fcat2 = cPickle.loads(pkl) + pkl = pickle.dumps(fcat) + fcat2 = pickle.loads(pkl) self.assertEqual(fcat2.GetNumEntries(), 21) self.assertEqual(fcat2.GetFPLength(), 21) fpgen = FragmentCatalog.FragFPGenerator() diff --git a/Code/GraphMol/MMPA/Wrap/testMMPA.py b/Code/GraphMol/MMPA/Wrap/testMMPA.py index bab8af67b..40c58b25d 100644 --- a/Code/GraphMol/MMPA/Wrap/testMMPA.py +++ b/Code/GraphMol/MMPA/Wrap/testMMPA.py @@ -1,4 +1,4 @@ -from __future__ import print_function + from rdkit import RDConfig import unittest from rdkit import Chem diff --git a/Code/GraphMol/MolAlign/Wrap/testMolAlign.py b/Code/GraphMol/MolAlign/Wrap/testMolAlign.py index 1cfe816d7..d6cc2fa3d 100644 --- a/Code/GraphMol/MolAlign/Wrap/testMolAlign.py +++ b/Code/GraphMol/MolAlign/Wrap/testMolAlign.py @@ -4,7 +4,7 @@ # # @@ All Rights Reserved @@ # -from __future__ import print_function + from rdkit import RDConfig import os, sys, copy import unittest diff --git a/Code/GraphMol/MolCatalog/Wrap/rough_test.py b/Code/GraphMol/MolCatalog/Wrap/rough_test.py index ca6a5c552..6e6a2d090 100755 --- a/Code/GraphMol/MolCatalog/Wrap/rough_test.py +++ b/Code/GraphMol/MolCatalog/Wrap/rough_test.py @@ -3,7 +3,7 @@ # Copyright (C) 2006 Greg Landrum # import unittest, os, sys -from rdkit.six.moves import cPickle +import pickle from rdkit import RDConfig from rdkit import Chem from rdkit import DataStructs @@ -42,12 +42,12 @@ class TestCase(unittest.TestCase): cat.AddEdge(0, 2) cat.AddEdge(1, 2) - d = cPickle.dumps(cat) + d = pickle.dumps(cat) es = None entry = None cat = None - cat = cPickle.loads(d) + cat = pickle.loads(d) self.assertTrue(cat.GetNumEntries() == 3) cat = None diff --git a/Code/GraphMol/MolDraw2D/test_dir/test_rdkit_draw.py b/Code/GraphMol/MolDraw2D/test_dir/test_rdkit_draw.py index 57892daf1..a36abca69 100755 --- a/Code/GraphMol/MolDraw2D/test_dir/test_rdkit_draw.py +++ b/Code/GraphMol/MolDraw2D/test_dir/test_rdkit_draw.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -from __future__ import print_function + from rdkit import RDConfig import os, sys from rdkit import Chem diff --git a/Code/GraphMol/PartialCharges/Wrap/testPartialCharges.py b/Code/GraphMol/PartialCharges/Wrap/testPartialCharges.py index fd431c1a1..996335bf6 100644 --- a/Code/GraphMol/PartialCharges/Wrap/testPartialCharges.py +++ b/Code/GraphMol/PartialCharges/Wrap/testPartialCharges.py @@ -1,9 +1,9 @@ -from __future__ import print_function + import unittest import os import io -from rdkit.six.moves import cPickle as pickle +import pickle from rdkit import Chem from rdkit.Chem import rdPartialCharges @@ -11,126 +11,126 @@ from rdkit import RDConfig def feq(v1, v2, tol2=1e-4): - return abs(v1 - v2) <= tol2 + return abs(v1 - v2) <= tol2 class TestCase(unittest.TestCase): - def setUp(self): - pass + def setUp(self): + pass - def test0HalgrenSet(self): - smiSup = Chem.SmilesMolSupplier( - os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'PartialCharges', 'Wrap', 'test_data', - 'halgren.smi'), delimiter='\t') + def test0HalgrenSet(self): + smiSup = Chem.SmilesMolSupplier( + os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'PartialCharges', 'Wrap', 'test_data', + 'halgren.smi'), delimiter='\t') - #parse the original file - with open( - os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'PartialCharges', 'Wrap', 'test_data', - 'halgren_out.txt'), 'r') as infil: - lines = infil.readlines() + # parse the original file + with open( + os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'PartialCharges', 'Wrap', 'test_data', + 'halgren_out.txt'), 'r') as infil: + lines = infil.readlines() - tab = Chem.GetPeriodicTable() + tab = Chem.GetPeriodicTable() - olst = [] - for mol in smiSup: - rdPartialCharges.ComputeGasteigerCharges(mol) - tstr = "Molecule: " - tstr += mol.GetProp("_Name") - olst.append(tstr) - for i in range(mol.GetNumAtoms()): - at = mol.GetAtomWithIdx(i) - en = tab.GetElementSymbol(at.GetAtomicNum()) - chg = float(at.GetProp("_GasteigerCharge")) - tstr = "%i %s %6.4f" % (i, en, chg) - olst.append(tstr) + olst = [] + for mol in smiSup: + rdPartialCharges.ComputeGasteigerCharges(mol) + tstr = "Molecule: " + tstr += mol.GetProp("_Name") + olst.append(tstr) + for i in range(mol.GetNumAtoms()): + at = mol.GetAtomWithIdx(i) + en = tab.GetElementSymbol(at.GetAtomicNum()) + chg = float(at.GetProp("_GasteigerCharge")) + tstr = "%i %s %6.4f" % (i, en, chg) + olst.append(tstr) - i = 0 - for line in lines: - self.assertTrue(line.strip() == olst[i]) - i += 1 + i = 0 + for line in lines: + self.assertTrue(line.strip() == olst[i]) + i += 1 - def test1PPDataset(self): - fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'PartialCharges', 'Wrap', - 'test_data', 'PP_descrs_regress.2.csv') - infil = open(fileN, 'r') - lines = infil.readlines() - infil.close() + def test1PPDataset(self): + fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'PartialCharges', 'Wrap', + 'test_data', 'PP_descrs_regress.2.csv') + infil = open(fileN, 'r') + lines = infil.readlines() + infil.close() - infile = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'PartialCharges', 'Wrap', - 'test_data', 'PP_combi_charges.pkl') - with open(infile, 'r') as cchtFile: - buf = cchtFile.read().replace('\r\n', '\n').encode('utf-8') - cchtFile.close() - with io.BytesIO(buf) as cchFile: - combiCharges = pickle.load(cchFile) + infile = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'PartialCharges', 'Wrap', + 'test_data', 'PP_combi_charges.pkl') + with open(infile, 'r') as cchtFile: + buf = cchtFile.read().replace('\r\n', '\n').encode('utf-8') + cchtFile.close() + with io.BytesIO(buf) as cchFile: + combiCharges = pickle.load(cchFile) - for lin in lines: - if (lin[0] == '#'): - continue - tlst = lin.strip().split(',') - smi = tlst[0] - rdmol = Chem.MolFromSmiles(smi) - rdPartialCharges.ComputeGasteigerCharges(rdmol) + for lin in lines: + if (lin[0] == '#'): + continue + tlst = lin.strip().split(',') + smi = tlst[0] + rdmol = Chem.MolFromSmiles(smi) + rdPartialCharges.ComputeGasteigerCharges(rdmol) - nat = rdmol.GetNumAtoms() - failed = False - for ai in range(nat): - rdch = float(rdmol.GetAtomWithIdx(ai).GetProp('_GasteigerCharge')) - if not feq(rdch, combiCharges[smi][ai], 1.e-2): - failed = True - print(smi, ai, rdch, combiCharges[smi][ai]) - if failed: - rdmol.Debug() - self.assertFalse(failed) + nat = rdmol.GetNumAtoms() + failed = False + for ai in range(nat): + rdch = float(rdmol.GetAtomWithIdx(ai).GetProp('_GasteigerCharge')) + if not feq(rdch, combiCharges[smi][ai], 1.e-2): + failed = True + print(smi, ai, rdch, combiCharges[smi][ai]) + if failed: + rdmol.Debug() + self.assertFalse(failed) - def test2Params(self): - """ tests handling of Issue187 """ - m1 = Chem.MolFromSmiles('C(=O)[O-]') - rdPartialCharges.ComputeGasteigerCharges(m1) + def test2Params(self): + """ tests handling of Issue187 """ + m1 = Chem.MolFromSmiles('C(=O)[O-]') + rdPartialCharges.ComputeGasteigerCharges(m1) - m2 = Chem.MolFromSmiles('C(=O)[O-].[Na+]') - rdPartialCharges.ComputeGasteigerCharges(m2) + m2 = Chem.MolFromSmiles('C(=O)[O-].[Na+]') + rdPartialCharges.ComputeGasteigerCharges(m2) - for i in range(m1.GetNumAtoms()): - c1 = float(m1.GetAtomWithIdx(i).GetProp('_GasteigerCharge')) - c2 = float(m2.GetAtomWithIdx(i).GetProp('_GasteigerCharge')) - self.assertTrue(feq(c1, c2, 1e-4)) + for i in range(m1.GetNumAtoms()): + c1 = float(m1.GetAtomWithIdx(i).GetProp('_GasteigerCharge')) + c2 = float(m2.GetAtomWithIdx(i).GetProp('_GasteigerCharge')) + self.assertTrue(feq(c1, c2, 1e-4)) - def test3Params(self): - """ tests handling of Issue187 """ - m2 = Chem.MolFromSmiles('C(=O)[O-].[Na+]') - with self.assertRaisesRegexp(Exception, ""): - rdPartialCharges.ComputeGasteigerCharges(m2, 12, 1) + def test3Params(self): + """ tests handling of Issue187 """ + m2 = Chem.MolFromSmiles('C(=O)[O-].[Na+]') + with self.assertRaisesRegexp(Exception, ""): + rdPartialCharges.ComputeGasteigerCharges(m2, 12, 1) - def testGithubIssue20(self): - """ tests handling of Github issue 20 """ - m1 = Chem.MolFromSmiles('CB(O)O') - rdPartialCharges.ComputeGasteigerCharges(m1) - chgs = [-0.030, 0.448, -0.427, -0.427] - for i in range(m1.GetNumAtoms()): - c1 = float(m1.GetAtomWithIdx(i).GetProp('_GasteigerCharge')) - self.assertAlmostEqual(c1, chgs[i], 3) + def testGithubIssue20(self): + """ tests handling of Github issue 20 """ + m1 = Chem.MolFromSmiles('CB(O)O') + rdPartialCharges.ComputeGasteigerCharges(m1) + chgs = [-0.030, 0.448, -0.427, -0.427] + for i in range(m1.GetNumAtoms()): + c1 = float(m1.GetAtomWithIdx(i).GetProp('_GasteigerCharge')) + self.assertAlmostEqual(c1, chgs[i], 3) - def testGithubIssue577(self): - """ tests handling of Github issue 577 """ - m1 = Chem.MolFromSmiles('CCO') - from locale import setlocale, LC_NUMERIC - try: - setlocale(LC_NUMERIC, "de_DE") - except Exception: - # can't set the required locale, might as well just return - return - try: - rdPartialCharges.ComputeGasteigerCharges(m1) - for at in m1.GetAtoms(): - float(at.GetProp('_GasteigerCharge')) - finally: - setlocale(LC_NUMERIC, "C") - rdPartialCharges.ComputeGasteigerCharges(m1) - for at in m1.GetAtoms(): - float(at.GetProp('_GasteigerCharge')) + def testGithubIssue577(self): + """ tests handling of Github issue 577 """ + m1 = Chem.MolFromSmiles('CCO') + from locale import setlocale, LC_NUMERIC + try: + setlocale(LC_NUMERIC, "de_DE") + except Exception: + # can't set the required locale, might as well just return + return + try: + rdPartialCharges.ComputeGasteigerCharges(m1) + for at in m1.GetAtoms(): + float(at.GetProp('_GasteigerCharge')) + finally: + setlocale(LC_NUMERIC, "C") + rdPartialCharges.ComputeGasteigerCharges(m1) + for at in m1.GetAtoms(): + float(at.GetProp('_GasteigerCharge')) if __name__ == '__main__': - unittest.main() + unittest.main() diff --git a/Code/GraphMol/RGroupDecomposition/Wrap/test_rgroups.py b/Code/GraphMol/RGroupDecomposition/Wrap/test_rgroups.py index 135b345fa..77bdfd024 100644 --- a/Code/GraphMol/RGroupDecomposition/Wrap/test_rgroups.py +++ b/Code/GraphMol/RGroupDecomposition/Wrap/test_rgroups.py @@ -28,12 +28,12 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # -from __future__ import print_function + import unittest import os,sys, copy -from rdkit.six.moves import cPickle +import pickle from rdkit import rdBase from rdkit import Chem diff --git a/Code/GraphMol/ShapeHelpers/Wrap/testShapeHelpers.py b/Code/GraphMol/ShapeHelpers/Wrap/testShapeHelpers.py index 2be73f404..4cbcec0c2 100644 --- a/Code/GraphMol/ShapeHelpers/Wrap/testShapeHelpers.py +++ b/Code/GraphMol/ShapeHelpers/Wrap/testShapeHelpers.py @@ -1,4 +1,4 @@ -from __future__ import print_function + import os, sys import unittest import math diff --git a/Code/GraphMol/StructChecker/Wrap/rough_test.py b/Code/GraphMol/StructChecker/Wrap/rough_test.py index b5f7fe88d..f2cbc6d1b 100644 --- a/Code/GraphMol/StructChecker/Wrap/rough_test.py +++ b/Code/GraphMol/StructChecker/Wrap/rough_test.py @@ -30,7 +30,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # -from __future__ import print_function + from rdkit import Chem from rdkit.Chem import rdStructChecker diff --git a/Code/GraphMol/SubstructLibrary/Wrap/rough_test.py b/Code/GraphMol/SubstructLibrary/Wrap/rough_test.py index 87fe89016..7e5e2fcac 100644 --- a/Code/GraphMol/SubstructLibrary/Wrap/rough_test.py +++ b/Code/GraphMol/SubstructLibrary/Wrap/rough_test.py @@ -32,7 +32,7 @@ it is intended to be shallow but broad. """ -from __future__ import print_function + import doctest, unittest, os, sys from rdkit import RDConfig diff --git a/Code/GraphMol/UnitTestQueryMol.py b/Code/GraphMol/UnitTestQueryMol.py index b138112df..5a3e7b15c 100755 --- a/Code/GraphMol/UnitTestQueryMol.py +++ b/Code/GraphMol/UnitTestQueryMol.py @@ -2,7 +2,7 @@ """basic unit testing code for query mols """ -from __future__ import print_function + from rdkit import RDConfig import unittest, os, sys diff --git a/Code/GraphMol/Wrap/rough_test.py b/Code/GraphMol/Wrap/rough_test.py index 0ad201a82..f617f404c 100644 --- a/Code/GraphMol/Wrap/rough_test.py +++ b/Code/GraphMol/Wrap/rough_test.py @@ -7,21 +7,20 @@ it's intended to be shallow, but broad """ -from __future__ import print_function + import os, sys, tempfile, gzip, gc import unittest, doctest from rdkit import RDConfig, rdBase from rdkit import DataStructs from rdkit import Chem -from rdkit import six -from rdkit.six import exec_ from rdkit import __version__ # Boost functions are NOT found by doctest, this "fixes" them # by adding the doctests to a fake module -import imp -TestReplaceCore = imp.new_module("TestReplaceCore") +import importlib.util +spec = importlib.util.spec_from_loader("TestReplaceCore", loader=None) +TestReplaceCore = importlib.util.module_from_spec(spec) code = """ from rdkit.Chem import ReplaceCore def ReplaceCore(*a, **kw): @@ -29,7 +28,7 @@ def ReplaceCore(*a, **kw): ''' return Chem.ReplaceCore(*a, **kw) """ % "\n".join([x.lstrip() for x in Chem.ReplaceCore.__doc__.split("\n")]) -exec_(code, TestReplaceCore.__dict__) +exec(code, TestReplaceCore.__dict__) def load_tests(loader, tests, ignore): @@ -437,17 +436,17 @@ class TestCase(unittest.TestCase): self.assertTrue(len(bs) == 3) def test16Pickle(self): - from rdkit.six.moves import cPickle + import pickle m = Chem.MolFromSmiles('C1=CN=CC=C1') - pkl = cPickle.dumps(m) - m2 = cPickle.loads(pkl) + pkl = pickle.dumps(m) + m2 = pickle.loads(pkl) self.assertTrue(type(m2) == Chem.Mol) smi1 = Chem.MolToSmiles(m) smi2 = Chem.MolToSmiles(m2) self.assertTrue(smi1 == smi2) - pkl = cPickle.dumps(Chem.RWMol(m)) - m2 = cPickle.loads(pkl) + pkl = pickle.dumps(Chem.RWMol(m)) + m2 = pickle.loads(pkl) self.assertTrue(type(m2) == Chem.RWMol) smi1 = Chem.MolToSmiles(m) smi2 = Chem.MolToSmiles(m2) @@ -887,7 +886,7 @@ class TestCase(unittest.TestCase): # test parsed charges on one of the molecules for id in chgs192.keys(): self.assertTrue(mol.GetAtomWithIdx(id).GetFormalCharge() == chgs192[id]) - self.assertRaises(StopIteration, lambda: six.next(sdSup)) + self.assertRaises(StopIteration, lambda: next(sdSup)) sdSup.reset() ns = [mol.GetProp("_Name") for mol in sdSup] @@ -906,36 +905,36 @@ class TestCase(unittest.TestCase): fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'FileParsers', 'test_data', 'withHs.sdf') sdSup = Chem.SDMolSupplier(fileN) - m = six.next(sdSup) + m = next(sdSup) self.assertTrue(m) self.assertTrue(m.GetNumAtoms() == 23) - m = six.next(sdSup) + m = next(sdSup) self.assertTrue(m) self.assertTrue(m.GetNumAtoms() == 28) sdSup = Chem.SDMolSupplier(fileN, removeHs=False) - m = six.next(sdSup) + m = next(sdSup) self.assertTrue(m) self.assertTrue(m.GetNumAtoms() == 39) - m = six.next(sdSup) + m = next(sdSup) self.assertTrue(m) self.assertTrue(m.GetNumAtoms() == 30) with open(fileN, 'rb') as dFile: d = dFile.read() sdSup.SetData(d) - m = six.next(sdSup) + m = next(sdSup) self.assertTrue(m) self.assertTrue(m.GetNumAtoms() == 23) - m = six.next(sdSup) + m = next(sdSup) self.assertTrue(m) self.assertTrue(m.GetNumAtoms() == 28) sdSup.SetData(d, removeHs=False) - m = six.next(sdSup) + m = next(sdSup) self.assertTrue(m) self.assertTrue(m.GetNumAtoms() == 39) - m = six.next(sdSup) + m = next(sdSup) self.assertTrue(m) self.assertTrue(m.GetNumAtoms() == 30) @@ -1155,7 +1154,7 @@ mol-4,CCOC smis = ['CC', 'CCC', 'CCOC', 'CCCCOC'] inD = '\n'.join(smis) smiSup.SetData(inD, delimiter=",", smilesColumn=0, nameColumn=-1, titleLine=0) - m = six.next(smiSup) + m = next(smiSup) m = smiSup[3] self.assertTrue(len(smiSup) == 4) @@ -2722,7 +2721,7 @@ CAS<~> i = 0 while not suppl.atEnd(): - mol = six.next(suppl) + mol = next(suppl) self.assertTrue(mol) self.assertTrue(mol.GetProp("_Name") == molNames[i]) i += 1 @@ -2734,7 +2733,7 @@ CAS<~> inf = None i = 0 while not suppl.atEnd(): - mol = six.next(suppl) + mol = next(suppl) self.assertTrue(mol) self.assertTrue(mol.GetProp("_Name") == molNames[i]) i += 1 @@ -2757,7 +2756,7 @@ CAS<~> i = 0 while not suppl.atEnd(): - mol = six.next(suppl) + mol = next(suppl) self.assertTrue(mol) self.assertTrue(mol.GetProp("_Name") == molNames[i]) i += 1 @@ -2769,7 +2768,7 @@ CAS<~> inf = None i = 0 while not suppl.atEnd(): - mol = six.next(suppl) + mol = next(suppl) self.assertTrue(mol) self.assertTrue(mol.GetProp("_Name") == molNames[i]) i += 1 @@ -2791,7 +2790,7 @@ CAS<~> i = 0 while not suppl.atEnd(): - mol = six.next(suppl) + mol = next(suppl) self.assertTrue(mol) self.assertTrue(mol.GetProp("_Name") == molNames[i]) i += 1 @@ -2821,14 +2820,9 @@ CAS<~> def test67StreamSupplierStringIO(self): fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'FileParsers', 'test_data', 'NCI_aids_few.sdf.gz') - if six.PY3: - from io import BytesIO - sio = BytesIO(gzip.open(fileN).read()) - else: - import StringIO - sio = StringIO.StringIO(gzip.open(fileN).read()) + from io import BytesIO + sio = BytesIO(gzip.open(fileN).read()) suppl = Chem.ForwardSDMolSupplier(sio) - molNames = [ "48", "78", "128", "163", "164", "170", "180", "186", "192", "203", "210", "211", "213", "220", "229", "256" @@ -2875,10 +2869,7 @@ CAS<~> self.assertEqual(i, 16) def test70StreamSDWriter(self): - if six.PY3: - from io import BytesIO, StringIO - else: - from StringIO import StringIO + from io import BytesIO, StringIO fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'FileParsers', 'test_data', 'NCI_aids_few.sdf.gz') @@ -2899,11 +2890,8 @@ CAS<~> self.assertEqual(i, 16) w.flush() w = None - if six.PY3: - txt = osio.getvalue().encode() - isio = BytesIO(txt) - else: - isio = StringIO(osio.getvalue()) + txt = osio.getvalue().encode() + isio = BytesIO(txt) suppl = Chem.ForwardSDMolSupplier(isio) i = 0 for mol in suppl: @@ -2913,7 +2901,7 @@ CAS<~> self.assertEqual(i, 16) def test71StreamSmilesWriter(self): - from rdkit.six.moves import StringIO + from io import StringIO fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'FileParsers', 'test_data', 'esters.sdf') suppl = Chem.ForwardSDMolSupplier(fileN) @@ -2934,7 +2922,7 @@ CAS<~> self.assertEqual(txt.count('\n'), 7) def test72StreamTDTWriter(self): - from rdkit.six.moves import StringIO + from io import StringIO fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'FileParsers', 'test_data', 'esters.sdf') suppl = Chem.ForwardSDMolSupplier(fileN) @@ -2997,7 +2985,7 @@ CAS<~> fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'FileParsers', 'test_data', 'NCI_aids_few.sdf') sdSup = Chem.SDMolSupplier(fileN) - mol = six.next(sdSup) + mol = next(sdSup) nats = mol.GetNumAtoms() conf = mol.GetConformer() mol = None @@ -3837,10 +3825,10 @@ CAS<~> resMol = resMolSuppl[i] self.assertEqual(getTotalFormalCharge(resMol), totalFormalCharge) while (not resMolSuppl.atEnd()): - resMol = six.next(resMolSuppl) + resMol = next(resMolSuppl) self.assertEqual(getTotalFormalCharge(resMol), totalFormalCharge) resMolSuppl.reset() - cmpFormalChargeBondOrder(self, resMolSuppl[0], six.next(resMolSuppl)) + cmpFormalChargeBondOrder(self, resMolSuppl[0], next(resMolSuppl)) resMolSuppl = Chem.ResonanceMolSupplier(mol, Chem.ALLOW_INCOMPLETE_OCTETS \ @@ -4238,17 +4226,18 @@ CAS<~> # this test should probably always be last since it wraps # the logging stream def testLogging(self): + from io import StringIO err = sys.stderr try: loggers = [("RDKit ERROR", "1", Chem.LogErrorMsg), ("RDKit WARNING", "2", Chem.LogWarningMsg)] for msg, v, log in loggers: - sys.stderr = six.StringIO() + sys.stderr = StringIO() log(v) self.assertEqual(sys.stderr.getvalue(), "") Chem.WrapLogs() for msg, v, log in loggers: - sys.stderr = six.StringIO() + sys.stderr = StringIO() log(v) s = sys.stderr.getvalue() self.assertTrue(msg in s) @@ -4460,7 +4449,7 @@ CAS<~> self.assertEqual(Chem.MolFragmentToCXSmiles(m,atomsToUse=(1,2,3)), 'CCC |$foo;;bar$|') def testPickleProps(self): - from rdkit.six.moves import cPickle + import pickle m = Chem.MolFromSmiles('C1=CN=CC=C1') m.SetProp("_Name", "Name") for atom in m.GetAtoms(): @@ -4468,8 +4457,8 @@ CAS<~> atom.SetProp("foo", "baz" + str(atom.GetIdx())) Chem.SetDefaultPickleProperties(Chem.PropertyPickleOptions.AllProps) - pkl = cPickle.dumps(m) - m2 = cPickle.loads(pkl) + pkl = pickle.dumps(m) + m2 = pickle.loads(pkl) smi1 = Chem.MolToSmiles(m) smi2 = Chem.MolToSmiles(m2) self.assertTrue(smi1 == smi2) @@ -4479,8 +4468,8 @@ CAS<~> self.assertEqual(atom.GetProp("foo"), "baz" + str(atom.GetIdx())) Chem.SetDefaultPickleProperties(Chem.PropertyPickleOptions.AtomProps) - pkl = cPickle.dumps(m) - m2 = cPickle.loads(pkl) + pkl = pickle.dumps(m) + m2 = pickle.loads(pkl) smi1 = Chem.MolToSmiles(m) smi2 = Chem.MolToSmiles(m2) self.assertTrue(smi1 == smi2) @@ -4490,8 +4479,8 @@ CAS<~> self.assertEqual(atom.GetProp("foo"), "baz" + str(atom.GetIdx())) Chem.SetDefaultPickleProperties(Chem.PropertyPickleOptions.NoProps) - pkl = cPickle.dumps(m) - m2 = cPickle.loads(pkl) + pkl = pickle.dumps(m) + m2 = pickle.loads(pkl) smi1 = Chem.MolToSmiles(m) smi2 = Chem.MolToSmiles(m2) self.assertTrue(smi1 == smi2) @@ -4502,8 +4491,8 @@ CAS<~> Chem.SetDefaultPickleProperties(Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps) - pkl = cPickle.dumps(m) - m2 = cPickle.loads(pkl) + pkl = pickle.dumps(m) + m2 = pickle.loads(pkl) smi1 = Chem.MolToSmiles(m) smi2 = Chem.MolToSmiles(m2) self.assertTrue(smi1 == smi2) @@ -4603,9 +4592,9 @@ M END def testOldPropPickles(self): data = 'crdkit.Chem.rdchem\nMol\np0\n(S\'\\xef\\xbe\\xad\\xde\\x00\\x00\\x00\\x00\\x08\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00)\\x00\\x00\\x00-\\x00\\x00\\x00\\x80\\x01\\x06\\x00`\\x00\\x00\\x00\\x01\\x03\\x07\\x00`\\x00\\x00\\x00\\x02\\x01\\x06 4\\x00\\x00\\x00\\x01\\x01\\x04\\x06\\x00`\\x00\\x00\\x00\\x01\\x03\\x06\\x00(\\x00\\x00\\x00\\x03\\x04\\x08\\x00(\\x00\\x00\\x00\\x03\\x02\\x07\\x00h\\x00\\x00\\x00\\x03\\x02\\x01\\x06 4\\x00\\x00\\x00\\x02\\x01\\x04\\x06\\x00(\\x00\\x00\\x00\\x03\\x04\\x08\\x00(\\x00\\x00\\x00\\x03\\x02\\x07\\x00(\\x00\\x00\\x00\\x03\\x03\\x06\\x00`\\x00\\x00\\x00\\x02\\x02\\x06 4\\x00\\x00\\x00\\x01\\x01\\x04\\x08\\x00(\\x00\\x00\\x00\\x03\\x02\\x06@(\\x00\\x00\\x00\\x03\\x04\\x06@h\\x00\\x00\\x00\\x03\\x03\\x01\\x06@h\\x00\\x00\\x00\\x03\\x03\\x01\\x06@h\\x00\\x00\\x00\\x03\\x03\\x01\\x06@h\\x00\\x00\\x00\\x03\\x03\\x01\\x06@h\\x00\\x00\\x00\\x03\\x03\\x01\\x06\\x00`\\x00\\x00\\x00\\x02\\x02\\x06 4\\x00\\x00\\x00\\x01\\x01\\x04\\x06\\x00(\\x00\\x00\\x00\\x03\\x04\\x08\\x00(\\x00\\x00\\x00\\x03\\x02\\x07\\x00h\\x00\\x00\\x00\\x03\\x02\\x01\\x06 4\\x00\\x00\\x00\\x02\\x01\\x04\\x06\\x00`\\x00\\x00\\x00\\x02\\x02\\x06\\x00`\\x00\\x00\\x00\\x02\\x02\\x06\\x00`\\x00\\x00\\x00\\x02\\x02\\x06@(\\x00\\x00\\x00\\x03\\x04\\x06@h\\x00\\x00\\x00\\x03\\x03\\x01\\x06@h\\x00\\x00\\x00\\x03\\x03\\x01\\x06@h\\x00\\x00\\x00\\x03\\x03\\x01\\x06@h\\x00\\x00\\x00\\x03\\x03\\x01\\x06@(\\x00\\x00\\x00\\x03\\x04\\x06\\x00`\\x00\\x00\\x00\\x03\\x01\\x06\\x00`\\x00\\x00\\x00\\x02\\x02\\x06\\x00`\\x00\\x00\\x00\\x02\\x02\\x06\\x00`\\x00\\x00\\x00\\x02\\x02\\x06\\x00`\\x00\\x00\\x00\\x02\\x02\\x06\\x00`\\x00\\x00\\x00\\x02\\x02\\x0b\\x00\\x01\\x00\\x01\\x02\\x00\\x02\\x03\\x00\\x02\\x04\\x00\\x04\\x05(\\x02\\x04\\x06 \\x06\\x07\\x00\\x07\\x08\\x00\\x08\\t(\\x02\\x08\\n \\n\\x0b\\x00\\x0b\\x0c\\x00\\x0c\\r\\x00\\r\\x0e \\x0e\\x0fh\\x0c\\x0f\\x10h\\x0c\\x10\\x11h\\x0c\\x11\\x12h\\x0c\\x12\\x13h\\x0c\\x0c\\x14\\x00\\x14\\x15\\x00\\x15\\x16\\x00\\x16\\x17(\\x02\\x16\\x18 \\x18\\x19\\x00\\x19\\x1a\\x00\\x1a\\x1b\\x00\\x1b\\x1c\\x00\\x1c\\x1d\\x00\\x1d\\x1eh\\x0c\\x1e\\x1fh\\x0c\\x1f h\\x0c !h\\x0c!"h\\x0c\\x07#\\x00#$\\x00$%\\x00%&\\x00&\\\'\\x00\\\'(\\x00\\x15\\n\\x00"\\x19\\x00(#\\x00\\x13\\x0eh\\x0c"\\x1dh\\x0c\\x14\\x05\\x05\\x0b\\n\\x15\\x14\\x0c\\x06\\x0f\\x10\\x11\\x12\\x13\\x0e\\x06\\x1a\\x1b\\x1c\\x1d"\\x19\\x06\\x1e\\x1f !"\\x1d\\x06$%&\\\'(#\\x17\\x00\\x00\\x00\\x00\\x12\\x03\\x00\\x00\\x00\\x07\\x00\\x00\\x00numArom\\x01\\x02\\x00\\x00\\x00\\x0f\\x00\\x00\\x00_StereochemDone\\x01\\x01\\x00\\x00\\x00\\x03\\x00\\x00\\x00foo\\x00\\x03\\x00\\x00\\x00bar\\x13:\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x12\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x01\\x00\\x00\\x000\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x1d\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x01\\x00\\x00\\x001\\x04\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x15\\x00\\x00\\x00\\x12\\x00\\x00\\x00_ChiralityPossible\\x01\\x01\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPCode\\x00\\x01\\x00\\x00\\x00S\\x05\\x00\\x00\\x00myidx\\x00\\x01\\x00\\x00\\x002\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x00\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x01\\x00\\x00\\x003\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x1a\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x01\\x00\\x00\\x004\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02"\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x01\\x00\\x00\\x005\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x1f\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x01\\x00\\x00\\x006\\x04\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x16\\x00\\x00\\x00\\x12\\x00\\x00\\x00_ChiralityPossible\\x01\\x01\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPCode\\x00\\x01\\x00\\x00\\x00S\\x05\\x00\\x00\\x00myidx\\x00\\x01\\x00\\x00\\x007\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x1c\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x01\\x00\\x00\\x008\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02$\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x01\\x00\\x00\\x009\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02 \\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0010\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x13\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0011\\x04\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x18\\x00\\x00\\x00\\x12\\x00\\x00\\x00_ChiralityPossible\\x01\\x01\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPCode\\x00\\x01\\x00\\x00\\x00S\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0012\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02!\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0013\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x19\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0014\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x0f\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0015\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x0b\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0016\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x08\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0017\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x0b\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0018\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x0f\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0019\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x07\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0020\\x04\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x17\\x00\\x00\\x00\\x12\\x00\\x00\\x00_ChiralityPossible\\x01\\x01\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPCode\\x00\\x01\\x00\\x00\\x00S\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0021\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x1b\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0022\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02#\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0023\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x1e\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0024\\x04\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x14\\x00\\x00\\x00\\x12\\x00\\x00\\x00_ChiralityPossible\\x01\\x01\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPCode\\x00\\x01\\x00\\x00\\x00R\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0025\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x06\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0026\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x03\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0027\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x05\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0028\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x10\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0029\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x0c\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0030\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\t\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0031\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\n\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0032\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\r\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0033\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x11\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0034\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x0e\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0035\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x04\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0036\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x02\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0037\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x01\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0038\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x02\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0039\\x02\\x00\\x00\\x00\\x08\\x00\\x00\\x00_CIPRank\\x02\\x04\\x00\\x00\\x00\\x05\\x00\\x00\\x00myidx\\x00\\x02\\x00\\x00\\x0040\\x13\\x16\'\np1\ntp2\nRp3\n.' - from rdkit.six.moves import cPickle + import pickle # bonds were broken in v1 - m2 = cPickle.loads(data.encode("utf-8"), encoding='bytes') + m2 = pickle.loads(data.encode("utf-8"), encoding='bytes') self.assertEqual(m2.GetProp("foo"), "bar") for atom in m2.GetAtoms(): diff --git a/Code/GraphMol/Wrap/testThreads.py b/Code/GraphMol/Wrap/testThreads.py index 68574265e..1d3ff65f3 100644 --- a/Code/GraphMol/Wrap/testThreads.py +++ b/Code/GraphMol/Wrap/testThreads.py @@ -1,6 +1,6 @@ import sys from rdkit import Chem -from rdkit import six +from io import StringIO import threading import multiprocessing @@ -11,108 +11,108 @@ ref_mol = Chem.MolFromMolBlock(ref_sdf) core_smarts = '[#6]-!@[#6]-!@[#8]-!@[#6]:1:[#6](-!@[#6]#!@[#7]):[#6](-!@[#7]):[#6]:[#6](-!@[#7]-!@[#6](-!@[#6]-!@[#6]:2:[#6]:[#6]:[#6]:[#6]:[#6]:2)=!@[#8]):[#7]:1' if ref_mol is None: - raise ValueError('Bad ref structure') + raise ValueError('Bad ref structure') core_mol = Chem.MolFromSmarts(core_smarts) if core_mol is None: - raise ValueError('Bad core structure') + raise ValueError('Bad core structure') expected = {} def runner(func, args): - if args: - res = getattr(ref_mol, func)(args) - else: - res = getattr(ref_mol, func)() - if func in expected: - assert res == expected[func], "Got %r expected %r" % (ers, expected[func]) - return res + if args: + res = getattr(ref_mol, func)(args) + else: + res = getattr(ref_mol, func)() + if func in expected: + assert res == expected[func], "Got %r expected %r" % (ers, expected[func]) + return res funcs = ["GetSubstructMatch", "GetSubstructMatches", "HasSubstructMatch"] # get the expected results from the non-thread version for func in funcs: - expected[func] = runner(func, core_mol) + expected[func] = runner(func, core_mol) nthreads = int(multiprocessing.cpu_count() * 100 / 4) # 100 threads per cpu threads = [] for i in range(0, nthreads): - for func in funcs: - t = threading.Thread(target=runner, args=(func, core_mol)) + for func in funcs: + t = threading.Thread(target=runner, args=(func, core_mol)) + t.start() + threads.append(t) + t = threading.Thread(target=runner, args=("ToBinary", None)) t.start() threads.append(t) - t = threading.Thread(target=runner, args=("ToBinary", None)) - t.start() - threads.append(t) for t in threads: - t.join() + t.join() def LogError(): - i = 0 - while 1: - if i == 10: - break - i += 1 - Chem.LogErrorMsg(str(i) + ":: My dog has fleas") + i = 0 + while 1: + if i == 10: + break + i += 1 + Chem.LogErrorMsg(str(i) + ":: My dog has fleas") def LogWarning(): - i = 0 - while 1: - if i == 10: - break - i += 1 - Chem.LogWarningMsg(str(i) + ":: All good boys to fine") + i = 0 + while 1: + if i == 10: + break + i += 1 + Chem.LogWarningMsg(str(i) + ":: All good boys to fine") - # this spews a ton of logging info... - # that is all intermingled... + # this spews a ton of logging info... + # that is all intermingled... if 0: - nthreads = int(multiprocessing.cpu_count()) - threads = [] - for i in range(0, nthreads): - for func in funcs: - if i % 2 == 0: - t = threading.Thread(target=LogError) - else: + nthreads = int(multiprocessing.cpu_count()) + threads = [] + for i in range(0, nthreads): + for func in funcs: + if i % 2 == 0: + t = threading.Thread(target=LogError) + else: + t = threading.Thread(target=LogWarning) + t.start() + threads.append(t) t = threading.Thread(target=LogWarning) - t.start() - threads.append(t) - t = threading.Thread(target=LogWarning) - t.start() - threads.append(t) + t.start() + threads.append(t) - for t in threads: - t.join() + for t in threads: + t.join() Chem.WrapLogs() err = sys.stderr -stringio = sys.stderr = six.StringIO() +stringio = sys.stderr = StringIO() # now the errors should be synchronized... nthreads = int(multiprocessing.cpu_count()) threads = [] for i in range(0, nthreads): - for func in funcs: - if i % 2 == 0: - t = threading.Thread(target=LogError) - else: - t = threading.Thread(target=LogWarning) + for func in funcs: + if i % 2 == 0: + t = threading.Thread(target=LogError) + else: + t = threading.Thread(target=LogWarning) + t.start() + threads.append(t) + t = threading.Thread(target=LogWarning) t.start() threads.append(t) - t = threading.Thread(target=LogWarning) - t.start() - threads.append(t) for t in threads: - t.join() + t.join() sys.stderr = err -stringio = sys.stderr = six.StringIO() +stringio = sys.stderr = StringIO() LogWarning() LogError() sys.stderr = err diff --git a/Code/GraphMol/Wrap/testTrajectory.py b/Code/GraphMol/Wrap/testTrajectory.py index 591938055..d380b9f1d 100644 --- a/Code/GraphMol/Wrap/testTrajectory.py +++ b/Code/GraphMol/Wrap/testTrajectory.py @@ -1,4 +1,4 @@ -from __future__ import print_function + from rdkit import Chem from rdkit.Chem import ChemicalForceFields, rdtrajectory from rdkit.Chem.rdtrajectory import Snapshot, \ diff --git a/Code/GraphMol/Wrap/test_data/do_smiles.bomb.py b/Code/GraphMol/Wrap/test_data/do_smiles.bomb.py index a8bd1644e..4e1ba8333 100755 --- a/Code/GraphMol/Wrap/test_data/do_smiles.bomb.py +++ b/Code/GraphMol/Wrap/test_data/do_smiles.bomb.py @@ -1,4 +1,4 @@ -from __future__ import print_function + import re splitExpr = re.compile('[\t\ ]') diff --git a/Code/GraphMol/Wrap/test_data/do_smiles.py b/Code/GraphMol/Wrap/test_data/do_smiles.py index 925b7e000..82ba30a21 100755 --- a/Code/GraphMol/Wrap/test_data/do_smiles.py +++ b/Code/GraphMol/Wrap/test_data/do_smiles.py @@ -1,4 +1,4 @@ -from __future__ import print_function + import re splitExpr = re.compile('[\t\ ]') diff --git a/Code/JavaWrappers/make_templates.py b/Code/JavaWrappers/make_templates.py index d4fa26593..44db1e2dc 100644 --- a/Code/JavaWrappers/make_templates.py +++ b/Code/JavaWrappers/make_templates.py @@ -6,7 +6,7 @@ becomes this %template(OnBitProjSimilarityEBV) OnBitProjSimilarity; """ -from __future__ import print_function + import re template_match = re.compile(r"""template\s*\<(.+)\>\s*.*\s+(\w+)\s*\(.*""") diff --git a/Code/JavaWrappers/parse_doxy_html.py b/Code/JavaWrappers/parse_doxy_html.py index e885b4c64..45f5162fd 100644 --- a/Code/JavaWrappers/parse_doxy_html.py +++ b/Code/JavaWrappers/parse_doxy_html.py @@ -8,7 +8,7 @@ processing the C++ code. That html is viewed with Firefox and the appropriate p files don't have that section and for now this program can't handle them without some additional annotation by hand. """ -from __future__ import print_function + from BeautifulSoup import * import os diff --git a/Code/ML/InfoTheory/Wrap/testRanker.py b/Code/ML/InfoTheory/Wrap/testRanker.py index 4b6c583cd..aacb8c580 100644 --- a/Code/ML/InfoTheory/Wrap/testRanker.py +++ b/Code/ML/InfoTheory/Wrap/testRanker.py @@ -3,7 +3,7 @@ import numpy import os import io -from rdkit.six.moves import cPickle +import pickle from rdkit import RDConfig, RDRandom from rdkit.ML.InfoTheory import rdInfoTheory as rdit @@ -147,7 +147,7 @@ class TestCase(unittest.TestCase): buf = inTF.read().replace('\r\n', '\n').encode('utf-8') inTF.close() with io.BytesIO(buf) as inF: - examples, avail, bias, nB, nPoss = cPickle.load(inF, encoding='bytes') + examples, avail, bias, nB, nPoss = pickle.load(inF, encoding='bytes') ranker = rdit.InfoBitRanker(nB, nPoss, rdit.InfoType.BIASENTROPY) ranker.SetMaskBits(avail) for ex in examples: diff --git a/Code/Numerics/Alignment/Wrap/testAlignment.py b/Code/Numerics/Alignment/Wrap/testAlignment.py index 2888bcdc0..98b852256 100644 --- a/Code/Numerics/Alignment/Wrap/testAlignment.py +++ b/Code/Numerics/Alignment/Wrap/testAlignment.py @@ -3,7 +3,7 @@ Replaced numpy.oldnumeric with numpy methods - Jan 2015, PGedeck """ #pylint: disable=E1101,C0111,R0904 -from __future__ import print_function + import rdkit.Numerics.rdAlignment as rdAlg from rdkit import Geometry diff --git a/Code/SimDivPickers/Wrap/testMaxMin.py b/Code/SimDivPickers/Wrap/testMaxMin.py index d65c7087a..7507321e5 100755 --- a/Code/SimDivPickers/Wrap/testMaxMin.py +++ b/Code/SimDivPickers/Wrap/testMaxMin.py @@ -1,4 +1,4 @@ -from __future__ import print_function + from rdkit.SimDivFilters import rdSimDivPickers as rdsimdiv import numpy from rdkit import RDRandom diff --git a/Code/cmake/Modules/fixup_coverage.py b/Code/cmake/Modules/fixup_coverage.py index 472c0c5c9..a10dd8bef 100644 --- a/Code/cmake/Modules/fixup_coverage.py +++ b/Code/cmake/Modules/fixup_coverage.py @@ -3,7 +3,7 @@ the coverage tool mistakenly finds in the build tree. It replaces the paths with the ones from the source tree n.b. if a file with the same name (i.e. sln.yy) is found twice in the source tree, this will break""" -from __future__ import print_function + import os, sys source_dir, info_file = sys.argv[1:3] print(source_dir, info_file) diff --git a/Contrib/AtomAtomSimilarity/AtomAtomPathSimilarity.py b/Contrib/AtomAtomSimilarity/AtomAtomPathSimilarity.py index 418f2e848..0e696df98 100644 --- a/Contrib/AtomAtomSimilarity/AtomAtomPathSimilarity.py +++ b/Contrib/AtomAtomSimilarity/AtomAtomPathSimilarity.py @@ -1,4 +1,4 @@ -from __future__ import print_function + import numpy import time import unittest diff --git a/Contrib/Glare/glare.py b/Contrib/Glare/glare.py index d05844102..452c416e6 100755 --- a/Contrib/Glare/glare.py +++ b/Contrib/Glare/glare.py @@ -1,4 +1,4 @@ -from __future__ import print_function + import random, operator, itertools, math """ diff --git a/Contrib/LEF/AddLabels.py b/Contrib/LEF/AddLabels.py index 286ba84d6..148efdb67 100644 --- a/Contrib/LEF/AddLabels.py +++ b/Contrib/LEF/AddLabels.py @@ -29,10 +29,10 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Created by Greg Landrum and Anna Vulpetti, March 2009 -from __future__ import print_function + from rdkit import Chem from rdkit.Chem import BRICS -import sys, cPickle, re +import sys, pickle, re inF = file(sys.argv[1], 'r') inLs = inF.readlines() diff --git a/Contrib/LEF/ClusterFps.py b/Contrib/LEF/ClusterFps.py index 2a49fded6..f72b01644 100644 --- a/Contrib/LEF/ClusterFps.py +++ b/Contrib/LEF/ClusterFps.py @@ -29,10 +29,10 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Created by Greg Landrum and Anna Vulpetti, March 2009 -from __future__ import print_function + from rdkit.ML.Cluster import Butina from rdkit import DataStructs -import sys, cPickle +import sys, pickle # sims is the list of similarity thresholds used to generate clusters sims = [.9, .8, .7, .6] @@ -42,8 +42,8 @@ uFps = [] for fileN in sys.argv[1:]: inF = file(sys.argv[1], 'r') - cols = cPickle.load(inF) - fps = cPickle.load(inF) + cols = pickle.load(inF) + fps = pickle.load(inF) for row in fps: nm, smi, fp = row[:3] diff --git a/Contrib/LEF/CreateFps.py b/Contrib/LEF/CreateFps.py index 3e2679248..d5c43781d 100644 --- a/Contrib/LEF/CreateFps.py +++ b/Contrib/LEF/CreateFps.py @@ -29,12 +29,12 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Created by Greg Landrum and Anna Vulpetti, March 2009 -from __future__ import print_function + from rdkit import Chem from rdkit.Chem import AllChem from rdkit.Chem.AtomPairs import Pairs, Torsions -import sys, cPickle +import sys, pickle # maxPathLength is the maximum path length in atoms # maxPathLength=6 corresponds to F-FP-5 @@ -92,8 +92,8 @@ if __name__ == '__main__': nm = mol.GetProp(nameField) fps.append([nm, smi, fp] + queryMatches) colNames = ['name', 'smiles', 'fp'] + [x for x, y in extraQueries] - cPickle.dump(colNames, outF) - cPickle.dump(fps, outF) + pickle.dump(colNames, outF) + pickle.dump(fps, outF) print('name1 smiles1 name2 smiles2 name12 smiles12 environment_id ' + ' '.join( [x for x, y in extraQueries])) diff --git a/Contrib/M_Kossner/Frames.py b/Contrib/M_Kossner/Frames.py index bd99f5f92..7dc16576a 100644 --- a/Contrib/M_Kossner/Frames.py +++ b/Contrib/M_Kossner/Frames.py @@ -17,7 +17,7 @@ # - sd files containing all molecules belonging to one frame (1.sdf, 2.sdf etc) # - frames.smi containing the (caninical) smiles and count of occurrence # -from __future__ import print_function + import os, sys from Chem import AllChem as Chem diff --git a/Contrib/NP_Score/npscorer.py b/Contrib/NP_Score/npscorer.py index 5e0033350..737c40205 100644 --- a/Contrib/NP_Score/npscorer.py +++ b/Contrib/NP_Score/npscorer.py @@ -13,7 +13,7 @@ # peter ertl, august 2015 # -from __future__ import print_function + from rdkit import Chem from rdkit.Chem import rdMolDescriptors import sys, math, gzip, pickle diff --git a/Contrib/RxnRoleAssignment/identifyReactants.py b/Contrib/RxnRoleAssignment/identifyReactants.py index 30fd81968..d767e1092 100644 --- a/Contrib/RxnRoleAssignment/identifyReactants.py +++ b/Contrib/RxnRoleAssignment/identifyReactants.py @@ -32,7 +32,7 @@ -from __future__ import print_function + from rdkit import Chem from rdkit.Chem import AllChem from rdkit.Chem import rdqueries diff --git a/Contrib/SA_Score/UnitTestSAScore.py b/Contrib/SA_Score/UnitTestSAScore.py index e51f07a48..335c688dd 100644 --- a/Contrib/SA_Score/UnitTestSAScore.py +++ b/Contrib/SA_Score/UnitTestSAScore.py @@ -1,4 +1,4 @@ -from __future__ import print_function + from rdkit import RDConfig from rdkit import Chem import unittest, os.path diff --git a/Contrib/SA_Score/sascorer.py b/Contrib/SA_Score/sascorer.py index 568194c68..aa0044eed 100644 --- a/Contrib/SA_Score/sascorer.py +++ b/Contrib/SA_Score/sascorer.py @@ -15,12 +15,11 @@ # # peter ertl & greg landrum, september 2013 # -from __future__ import print_function + from rdkit import Chem from rdkit.Chem import rdMolDescriptors -from rdkit.six.moves import cPickle -from rdkit.six import iteritems +import pickle import math from collections import defaultdict @@ -31,132 +30,133 @@ _fscores = None def readFragmentScores(name='fpscores'): - import gzip - global _fscores - # generate the full path filename: - if name == "fpscores": - name = op.join(op.dirname(__file__), name) - _fscores = cPickle.load(gzip.open('%s.pkl.gz' % name)) - outDict = {} - for i in _fscores: - for j in range(1, len(i)): - outDict[i[j]] = float(i[0]) - _fscores = outDict + import gzip + global _fscores + # generate the full path filename: + if name == "fpscores": + name = op.join(op.dirname(__file__), name) + _fscores = pickle.load(gzip.open('%s.pkl.gz' % name)) + outDict = {} + for i in _fscores: + for j in range(1, len(i)): + outDict[i[j]] = float(i[0]) + _fscores = outDict def numBridgeheadsAndSpiro(mol, ri=None): - nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(mol) - nBridgehead = rdMolDescriptors.CalcNumBridgeheadAtoms(mol) - return nBridgehead, nSpiro + nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(mol) + nBridgehead = rdMolDescriptors.CalcNumBridgeheadAtoms(mol) + return nBridgehead, nSpiro def calculateScore(m): - if _fscores is None: - readFragmentScores() + if _fscores is None: + readFragmentScores() - # fragment score - fp = rdMolDescriptors.GetMorganFingerprint(m, - 2) #<- 2 is the *radius* of the circular fingerprint - fps = fp.GetNonzeroElements() - score1 = 0. - nf = 0 - for bitId, v in iteritems(fps): - nf += v - sfp = bitId - score1 += _fscores.get(sfp, -4) * v - score1 /= nf + # fragment score + fp = rdMolDescriptors.GetMorganFingerprint(m, + 2) # <- 2 is the *radius* of the circular fingerprint + fps = fp.GetNonzeroElements() + score1 = 0. + nf = 0 + for bitId, v in fps.items(): + nf += v + sfp = bitId + score1 += _fscores.get(sfp, -4) * v + score1 /= nf - # features score - nAtoms = m.GetNumAtoms() - nChiralCenters = len(Chem.FindMolChiralCenters(m, includeUnassigned=True)) - ri = m.GetRingInfo() - nBridgeheads, nSpiro = numBridgeheadsAndSpiro(m, ri) - nMacrocycles = 0 - for x in ri.AtomRings(): - if len(x) > 8: - nMacrocycles += 1 + # features score + nAtoms = m.GetNumAtoms() + nChiralCenters = len(Chem.FindMolChiralCenters(m, includeUnassigned=True)) + ri = m.GetRingInfo() + nBridgeheads, nSpiro = numBridgeheadsAndSpiro(m, ri) + nMacrocycles = 0 + for x in ri.AtomRings(): + if len(x) > 8: + nMacrocycles += 1 - sizePenalty = nAtoms**1.005 - nAtoms - stereoPenalty = math.log10(nChiralCenters + 1) - spiroPenalty = math.log10(nSpiro + 1) - bridgePenalty = math.log10(nBridgeheads + 1) - macrocyclePenalty = 0. - # --------------------------------------- - # This differs from the paper, which defines: - # macrocyclePenalty = math.log10(nMacrocycles+1) - # This form generates better results when 2 or more macrocycles are present - if nMacrocycles > 0: - macrocyclePenalty = math.log10(2) + sizePenalty = nAtoms**1.005 - nAtoms + stereoPenalty = math.log10(nChiralCenters + 1) + spiroPenalty = math.log10(nSpiro + 1) + bridgePenalty = math.log10(nBridgeheads + 1) + macrocyclePenalty = 0. + # --------------------------------------- + # This differs from the paper, which defines: + # macrocyclePenalty = math.log10(nMacrocycles+1) + # This form generates better results when 2 or more macrocycles are present + if nMacrocycles > 0: + macrocyclePenalty = math.log10(2) - score2 = 0. - sizePenalty - stereoPenalty - spiroPenalty - bridgePenalty - macrocyclePenalty + score2 = 0. - sizePenalty - stereoPenalty - spiroPenalty - bridgePenalty - macrocyclePenalty - # correction for the fingerprint density - # not in the original publication, added in version 1.1 - # to make highly symmetrical molecules easier to synthetise - score3 = 0. - if nAtoms > len(fps): - score3 = math.log(float(nAtoms) / len(fps)) * .5 + # correction for the fingerprint density + # not in the original publication, added in version 1.1 + # to make highly symmetrical molecules easier to synthetise + score3 = 0. + if nAtoms > len(fps): + score3 = math.log(float(nAtoms) / len(fps)) * .5 - sascore = score1 + score2 + score3 + sascore = score1 + score2 + score3 - # need to transform "raw" value into scale between 1 and 10 - min = -4.0 - max = 2.5 - sascore = 11. - (sascore - min + 1) / (max - min) * 9. - # smooth the 10-end - if sascore > 8.: - sascore = 8. + math.log(sascore + 1. - 9.) - if sascore > 10.: - sascore = 10.0 - elif sascore < 1.: - sascore = 1.0 + # need to transform "raw" value into scale between 1 and 10 + min = -4.0 + max = 2.5 + sascore = 11. - (sascore - min + 1) / (max - min) * 9. + # smooth the 10-end + if sascore > 8.: + sascore = 8. + math.log(sascore + 1. - 9.) + if sascore > 10.: + sascore = 10.0 + elif sascore < 1.: + sascore = 1.0 - return sascore + return sascore def processMols(mols): - print('smiles\tName\tsa_score') - for i, m in enumerate(mols): - if m is None: - continue + print('smiles\tName\tsa_score') + for i, m in enumerate(mols): + if m is None: + continue - s = calculateScore(m) + s = calculateScore(m) - smiles = Chem.MolToSmiles(m) - print(smiles + "\t" + m.GetProp('_Name') + "\t%3f" % s) + smiles = Chem.MolToSmiles(m) + print(smiles + "\t" + m.GetProp('_Name') + "\t%3f" % s) if __name__ == '__main__': - import sys, time + import sys + import time - t1 = time.time() - readFragmentScores("fpscores") - t2 = time.time() + t1 = time.time() + readFragmentScores("fpscores") + t2 = time.time() - suppl = Chem.SmilesMolSupplier(sys.argv[1]) - t3 = time.time() - processMols(suppl) - t4 = time.time() + suppl = Chem.SmilesMolSupplier(sys.argv[1]) + t3 = time.time() + processMols(suppl) + t4 = time.time() - print('Reading took %.2f seconds. Calculating took %.2f seconds' % ((t2 - t1), (t4 - t3)), - file=sys.stderr) + print('Reading took %.2f seconds. Calculating took %.2f seconds' % ((t2 - t1), (t4 - t3)), + file=sys.stderr) # # Copyright (c) 2013, Novartis Institutes for BioMedical Research Inc. # All rights reserved. -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are -# met: +# met: # -# * Redistributions of source code must retain the above copyright +# * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided # with the distribution. -# * Neither the name of Novartis Institutes for BioMedical Research Inc. -# nor the names of its contributors may be used to endorse or promote +# * Neither the name of Novartis Institutes for BioMedical Research Inc. +# nor the names of its contributors may be used to endorse or promote # products derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS diff --git a/Contrib/fraggle/atomcontrib.py b/Contrib/fraggle/atomcontrib.py index 3ceea4b62..35dcdadfb 100644 --- a/Contrib/fraggle/atomcontrib.py +++ b/Contrib/fraggle/atomcontrib.py @@ -29,7 +29,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Created by Jameed Hussain, May 2013 -from __future__ import print_function + import sys from optparse import OptionParser from rdkit import Chem diff --git a/Contrib/fraggle/cxn_tversky.py b/Contrib/fraggle/cxn_tversky.py index e125a06ed..dc3c8a523 100644 --- a/Contrib/fraggle/cxn_tversky.py +++ b/Contrib/fraggle/cxn_tversky.py @@ -31,7 +31,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Created by Jameed Hussain, July 2013 -from __future__ import print_function + import sys import re diff --git a/Contrib/fraggle/fraggle.py b/Contrib/fraggle/fraggle.py index 1c30bb1b2..fb7e56ab6 100644 --- a/Contrib/fraggle/fraggle.py +++ b/Contrib/fraggle/fraggle.py @@ -29,7 +29,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Created by Jameed Hussain, May 2013 -from __future__ import print_function + from rdkit import Chem from rdkit.Chem.Fraggle import FraggleSim diff --git a/Contrib/fraggle/rdkit_tversky.py b/Contrib/fraggle/rdkit_tversky.py index ef4b4f891..933268b64 100644 --- a/Contrib/fraggle/rdkit_tversky.py +++ b/Contrib/fraggle/rdkit_tversky.py @@ -29,7 +29,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Created by Jameed Hussain, October 2013 -from __future__ import print_function + import sys import re from optparse import OptionParser diff --git a/Contrib/mmpa/cansmirk.py b/Contrib/mmpa/cansmirk.py index 50190d8b6..95febd132 100644 --- a/Contrib/mmpa/cansmirk.py +++ b/Contrib/mmpa/cansmirk.py @@ -29,7 +29,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Created by Jameed Hussain, September 2012 -from __future__ import print_function + import sys import re from rdkit import Chem diff --git a/Contrib/mmpa/indexing.py b/Contrib/mmpa/indexing.py index 8851504b3..db112ea37 100644 --- a/Contrib/mmpa/indexing.py +++ b/Contrib/mmpa/indexing.py @@ -29,7 +29,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Created by Jameed Hussain, September 2012 -from __future__ import print_function + import sys import re from rdkit import Chem diff --git a/Contrib/mmpa/mol_transform.py b/Contrib/mmpa/mol_transform.py index 43f36c45f..f79f6ff7d 100644 --- a/Contrib/mmpa/mol_transform.py +++ b/Contrib/mmpa/mol_transform.py @@ -29,7 +29,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Created by Jameed Hussain, July 2013 -from __future__ import print_function + import sys import re from optparse import OptionParser diff --git a/Contrib/mmpa/rfrag.py b/Contrib/mmpa/rfrag.py index 87ec3b75c..1e3e65d51 100644 --- a/Contrib/mmpa/rfrag.py +++ b/Contrib/mmpa/rfrag.py @@ -32,7 +32,7 @@ # # Modifications and optimizations by Greg Landrum, July 2015 # -from __future__ import print_function + import sys import re from rdkit import Chem diff --git a/Contrib/mmpa/search_mmp_db.py b/Contrib/mmpa/search_mmp_db.py index d5525bec1..e7bbe42e4 100644 --- a/Contrib/mmpa/search_mmp_db.py +++ b/Contrib/mmpa/search_mmp_db.py @@ -29,7 +29,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Created by Jameed Hussain, July 2013 -from __future__ import print_function + import sys import os import re diff --git a/Contrib/pzc/p_con.py b/Contrib/pzc/p_con.py index dee81a60a..ec2e22a67 100644 --- a/Contrib/pzc/p_con.py +++ b/Contrib/pzc/p_con.py @@ -1,6 +1,6 @@ # coding=utf-8 # Copyright (c) 2014 Merck KGaA -from __future__ import print_function + import os, re, gzip, json, requests, sys, optparse, csv from rdkit import Chem from rdkit.Chem import AllChem @@ -16,7 +16,7 @@ from sklearn.cross_validation import train_test_split from sklearn.metrics import roc_curve, auc from sklearn.metrics import precision_score, recall_score from sklearn import preprocessing -import cPickle +import pickle from pickle import Unpickler import numpy as np import math @@ -1207,8 +1207,8 @@ table th[class*="col-"] { return def save_model(self, outfile, model_number=0): - """save Model to file using cPickle.dump""" - cPickle.dump(self.model[model_number], file(outfile, "wb+")) + """save Model to file using pickle.dump""" + pickle.dump(self.model[model_number], file(outfile, "wb+")) return def load_models(self, model_files): diff --git a/Data/DTDs/validate.py b/Data/DTDs/validate.py index 37a7947a1..f2fb8c6ee 100755 --- a/Data/DTDs/validate.py +++ b/Data/DTDs/validate.py @@ -1,4 +1,4 @@ -from __future__ import print_function + import pyRXP import sys diff --git a/Data/Pains/test_data/run_tests.py b/Data/Pains/test_data/run_tests.py index 7c1d6479a..e676a002b 100644 --- a/Data/Pains/test_data/run_tests.py +++ b/Data/Pains/test_data/run_tests.py @@ -5,7 +5,7 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -from __future__ import print_function + import unittest, os, csv from rdkit import Chem, RDConfig diff --git a/Data/SmartsLib/tests/bench2.py b/Data/SmartsLib/tests/bench2.py index 9b0242e17..104a2c068 100644 --- a/Data/SmartsLib/tests/bench2.py +++ b/Data/SmartsLib/tests/bench2.py @@ -1,8 +1,8 @@ -from __future__ import print_function + from rdkit import Chem from rdkit import RDConfig import time, sys, gzip -from rdkit.six.moves import cPickle +import pickle from rdkit.RDLogger import logger logger = logger() @@ -21,11 +21,11 @@ for line in file(RDConfig.RDDataDir + '/SmartsLib/RLewis_smarts.txt', 'r').readl qs.append(p) logger.info('reading target counts') -refFps = cPickle.loads(gzip.open('fps.1000.counts.pkl.gz', 'rb').read()) +refFps = pickle.loads(gzip.open('fps.1000.counts.pkl.gz', 'rb').read()) fps = [] logger.info('reading mols:') -ms = cPickle.loads(gzip.open('mols.1000.pkl.gz', 'rb').read()) +ms = pickle.loads(gzip.open('mols.1000.pkl.gz', 'rb').read()) t1 = time.time() nFail = 0 for i, m in enumerate(ms): @@ -44,7 +44,7 @@ for i, m in enumerate(ms): t2 = time.time() print('%.2f' % (t2 - t1)) -#cPickle.dump(fps,file('fps.1000.counts.pkl','wb+')) +#pickle.dump(fps,file('fps.1000.counts.pkl','wb+')) nFail = 0 for i, fp in enumerate(fps): if fp != refFps[i]: diff --git a/External/AvalonTools/Wrap/testAvalonTools.py b/External/AvalonTools/Wrap/testAvalonTools.py index 448a48924..8b7b01544 100755 --- a/External/AvalonTools/Wrap/testAvalonTools.py +++ b/External/AvalonTools/Wrap/testAvalonTools.py @@ -2,7 +2,7 @@ # # Created by Greg Landrum, July 2008 # -from __future__ import print_function + from rdkit import RDConfig import os, sys import unittest diff --git a/External/CoordGen/Wrap/testCoordGen.py b/External/CoordGen/Wrap/testCoordGen.py index 3260c84af..16122c7c6 100644 --- a/External/CoordGen/Wrap/testCoordGen.py +++ b/External/CoordGen/Wrap/testCoordGen.py @@ -5,7 +5,7 @@ # The contents are covered by the terms of the BSD license # which is included in the file license.txt, found at the root # of the RDKit source tree. -from __future__ import print_function + import unittest import os,sys, copy diff --git a/External/FreeSASA/Wrap/testFreeSASA.py b/External/FreeSASA/Wrap/testFreeSASA.py index facd454f6..5bb4958ca 100644 --- a/External/FreeSASA/Wrap/testFreeSASA.py +++ b/External/FreeSASA/Wrap/testFreeSASA.py @@ -28,7 +28,7 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # -from __future__ import print_function + import unittest import os,sys, copy diff --git a/External/pymol/modules/pymol/rpc.py b/External/pymol/modules/pymol/rpc.py index f606ed421..63d38cee5 100644 --- a/External/pymol/modules/pymol/rpc.py +++ b/External/pymol/modules/pymol/rpc.py @@ -13,7 +13,7 @@ RD Version: $Rev$ """ -from __future__ import print_function + import SimpleXMLRPCServer import threading, sys, time, types, os, tempfile from pymol import cmd, cgo diff --git a/Projects/DbCLI/CreateDb.py b/Projects/DbCLI/CreateDb.py index 6038c0ada..f62f855df 100644 --- a/Projects/DbCLI/CreateDb.py +++ b/Projects/DbCLI/CreateDb.py @@ -64,7 +64,7 @@ from rdkit.Chem.MolDb import Loader logger = logger() import sys, os import io -from rdkit.six.moves import cPickle +import pickle from rdkit.Chem.MolDb.FingerprintUtils import BuildSigFactory, LayeredOptions from rdkit.Chem.MolDb import FingerprintUtils @@ -286,7 +286,7 @@ def CreateDb(options, dataFilename='', supplier=None): with open(options.descriptorCalcFilename, 'r') as inTF: buf = inTF.read().replace('\r\n', '\n').encode('utf-8') inTF.close() - calc = cPickle.load(io.BytesIO(buf)) + calc = pickle.load(io.BytesIO(buf)) nms = [x for x in calc.GetDescriptorNames()] descrCurs = descrConn.GetCursor() descrs = ['guid integer not null primary key', '%s varchar not null unique' % options.molIdName] diff --git a/Projects/DbCLI/SearchDb.py b/Projects/DbCLI/SearchDb.py index 10a27a801..fcb2c88a2 100644 --- a/Projects/DbCLI/SearchDb.py +++ b/Projects/DbCLI/SearchDb.py @@ -31,7 +31,7 @@ # # Created by Greg Landrum, July 2007 # -from __future__ import print_function + _version = "0.14.0" _description = """ The sd filename argument can be either an SD file or an MDL mol diff --git a/Regress/Scripts/chiral_embed.py b/Regress/Scripts/chiral_embed.py index 5e0ac8a99..615f2921f 100644 --- a/Regress/Scripts/chiral_embed.py +++ b/Regress/Scripts/chiral_embed.py @@ -1,4 +1,4 @@ -from __future__ import print_function + from rdkit import Chem from rdkit.Chem.PyMol import MolViewer from rdkit.Chem import AllChem diff --git a/Regress/Scripts/new_timings.py b/Regress/Scripts/new_timings.py index 9b3bb5007..4d22b6d63 100644 --- a/Regress/Scripts/new_timings.py +++ b/Regress/Scripts/new_timings.py @@ -1,4 +1,4 @@ -from __future__ import print_function + import time, gzip, random, os, sys from rdkit import Chem from rdkit.Chem import AllChem diff --git a/Regress/Scripts/timings.py b/Regress/Scripts/timings.py index d4963cc8f..52c5ac706 100644 --- a/Regress/Scripts/timings.py +++ b/Regress/Scripts/timings.py @@ -1,4 +1,4 @@ -from __future__ import print_function + import time, gzip, random, os, sys from rdkit import Chem from rdkit.Chem import AllChem diff --git a/ReleaseNotes.md b/ReleaseNotes.md index 907774c14..06181b4d0 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -1,8 +1,8 @@ # Release_2019.03.1 (Changes relative to Release_2018.09.1) -## Deprecations -- As of the 2019.03 release the RDKit no longer supports Python 2. Please read +## REALLY IMPORTANT ANNOUNCEMENT +- As of this realease (2019.03.1) the RDKit no longer supports Python 2. Please read this rdkit-discuss post to learn what your options are if you need to keep using Python 2: https://www.mail-archive.com/rdkit-discuss@lists.sourceforge.net/msg08354.html diff --git a/Scripts/PythonFormat.py b/Scripts/PythonFormat.py index 2305256d6..3b186c0b1 100644 --- a/Scripts/PythonFormat.py +++ b/Scripts/PythonFormat.py @@ -11,7 +11,7 @@ all non-conformant files are listed and the required yapf command(s) printed. If changes are found, the script will exit with error code 1, otherwise 0. ''' -from __future__ import print_function + import os from yapf.yapflib.yapf_api import FormatCode import sys diff --git a/rdkit/Chem/AtomPairs/UnitTestDescriptors.py b/rdkit/Chem/AtomPairs/UnitTestDescriptors.py index 1873c504b..ae10e09be 100755 --- a/rdkit/Chem/AtomPairs/UnitTestDescriptors.py +++ b/rdkit/Chem/AtomPairs/UnitTestDescriptors.py @@ -8,12 +8,12 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -from __future__ import print_function + import unittest import doctest import os import gzip -from rdkit.six.moves import cPickle +import pickle from rdkit import Chem from rdkit import RDConfig from rdkit.Chem.AtomPairs import Pairs, Torsions, Utils, Sheridan @@ -33,11 +33,11 @@ class TestCase(unittest.TestCase): def setUp(self): self.testDataPath = os.path.join(RDConfig.RDCodeDir, 'Chem', 'AtomPairs', 'test_data') inF = gzip.open(os.path.join(self.testDataPath, 'mols1000.pkl.gz'), 'rb') - self.mols = cPickle.load(inF, encoding='bytes') + self.mols = pickle.load(inF, encoding='bytes') def testPairsRegression(self): inF = gzip.open(os.path.join(self.testDataPath, 'mols1000.aps.pkl.gz'), 'rb') - atomPairs = cPickle.load(inF, encoding='bytes') + atomPairs = pickle.load(inF, encoding='bytes') for i, m in enumerate(self.mols): ap = Pairs.GetAtomPairFingerprint(m) if ap != atomPairs[i]: # pragma: nocover @@ -47,7 +47,7 @@ class TestCase(unittest.TestCase): def testTorsionsRegression(self): inF = gzip.open(os.path.join(self.testDataPath, 'mols1000.tts.pkl.gz'), 'rb') - torsions = cPickle.load(inF, encoding='bytes') + torsions = pickle.load(inF, encoding='bytes') for i, m in enumerate(self.mols): tt = Torsions.GetTopologicalTorsionFingerprintAsIntVect(m) if tt != torsions[i]: # pragma: nocover diff --git a/rdkit/Chem/AtomPairs/Utils.py b/rdkit/Chem/AtomPairs/Utils.py index e31acdcc6..9c6b2667d 100755 --- a/rdkit/Chem/AtomPairs/Utils.py +++ b/rdkit/Chem/AtomPairs/Utils.py @@ -7,7 +7,7 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -from __future__ import print_function + from rdkit import Chem from rdkit.Chem import rdMolDescriptors import math diff --git a/rdkit/Chem/BRICS.py b/rdkit/Chem/BRICS.py index 95e979485..104d190db 100644 --- a/rdkit/Chem/BRICS.py +++ b/rdkit/Chem/BRICS.py @@ -32,12 +32,10 @@ """ Implementation of the BRICS algorithm from Degen et al. ChemMedChem *3* 1503-7 (2008) """ -from __future__ import print_function + import sys, re, random from rdkit import Chem from rdkit.Chem import rdChemReactions as Reactions -from rdkit.six import iteritems, iterkeys, next -from rdkit.six.moves import range # These are the definitions that will be applied to fragment molecules: environs = { @@ -208,7 +206,7 @@ for gp in smartsGps: raise environMatchers = {} -for env, sma in iteritems(environs): +for env, sma in environs.items(): environMatchers[env] = Chem.MolFromSmarts(sma) bondMatchers = [] @@ -286,7 +284,7 @@ def FindBRICSBonds(mol, randomizeOrder=False, silent=True): random.shuffle(indices, random=random.random) envMatches = {} - for env, patt in iteritems(environMatchers): + for env, patt in environMatchers.items(): envMatches[env] = mol.HasSubstructMatch(patt) for gpIdx in indices: if randomizeOrder: @@ -454,7 +452,7 @@ def BRICSDecompose(mol, allNodes=None, minFragmentSize=1, onlyUseReactions=None, newPool = {} while activePool: matched = False - nSmi = next(iterkeys(activePool)) + nSmi = next(iter(activePool)) mol = activePool.pop(nSmi) for rxnIdx, reaction in enumerate(reactionGp): if onlyUseReactions and (gpIdx, rxnIdx) not in onlyUseReactions: diff --git a/rdkit/Chem/BuildFragmentCatalog.py b/rdkit/Chem/BuildFragmentCatalog.py index e29d8d000..b1925d932 100755 --- a/rdkit/Chem/BuildFragmentCatalog.py +++ b/rdkit/Chem/BuildFragmentCatalog.py @@ -62,7 +62,7 @@ - --nBits=-1: specify the maximum number of bits to show details for """ -from __future__ import print_function + import os import sys @@ -73,590 +73,590 @@ from rdkit import RDConfig from rdkit.Chem import FragmentCatalog from rdkit.Dbase.DbConnection import DbConnect from rdkit.ML import InfoTheory -from rdkit.six import next -from rdkit.six.moves import cPickle + +import pickle def message(msg, dest=sys.stdout): - dest.write(msg) + dest.write(msg) def BuildCatalog(suppl, maxPts=-1, groupFileName=None, minPath=2, maxPath=6, reportFreq=10): - """ builds a fragment catalog from a set of molecules in a delimited text block + """ builds a fragment catalog from a set of molecules in a delimited text block - **Arguments** + **Arguments** - - suppl: a mol supplier + - suppl: a mol supplier - - maxPts: (optional) if provided, this will set an upper bound on the - number of points to be considered + - maxPts: (optional) if provided, this will set an upper bound on the + number of points to be considered - - groupFileName: (optional) name of the file containing functional group - information + - groupFileName: (optional) name of the file containing functional group + information - - minPath, maxPath: (optional) names of the minimum and maximum path lengths - to be considered + - minPath, maxPath: (optional) names of the minimum and maximum path lengths + to be considered - - reportFreq: (optional) how often to display status information + - reportFreq: (optional) how often to display status information - **Returns** + **Returns** - a FragmentCatalog + a FragmentCatalog - """ - if groupFileName is None: - groupFileName = os.path.join(RDConfig.RDDataDir, "FunctionalGroups.txt") + """ + if groupFileName is None: + groupFileName = os.path.join(RDConfig.RDDataDir, "FunctionalGroups.txt") - fpParams = FragmentCatalog.FragCatParams(minPath, maxPath, groupFileName) - catalog = FragmentCatalog.FragCatalog(fpParams) - fgen = FragmentCatalog.FragCatGenerator() - if maxPts > 0: - nPts = maxPts - else: - if hasattr(suppl, '__len__'): - nPts = len(suppl) + fpParams = FragmentCatalog.FragCatParams(minPath, maxPath, groupFileName) + catalog = FragmentCatalog.FragCatalog(fpParams) + fgen = FragmentCatalog.FragCatGenerator() + if maxPts > 0: + nPts = maxPts else: - nPts = -1 - for i, mol in enumerate(suppl): - if i == nPts: - break - if i and not i % reportFreq: - if nPts > -1: - message('Done %d of %d, %d paths\n' % (i, nPts, catalog.GetFPLength())) - else: - message('Done %d, %d paths\n' % (i, catalog.GetFPLength())) - fgen.AddFragsFromMol(mol, catalog) - return catalog + if hasattr(suppl, '__len__'): + nPts = len(suppl) + else: + nPts = -1 + for i, mol in enumerate(suppl): + if i == nPts: + break + if i and not i % reportFreq: + if nPts > -1: + message('Done %d of %d, %d paths\n' % (i, nPts, catalog.GetFPLength())) + else: + message('Done %d, %d paths\n' % (i, catalog.GetFPLength())) + fgen.AddFragsFromMol(mol, catalog) + return catalog def ScoreMolecules(suppl, catalog, maxPts=-1, actName='', acts=None, nActs=2, reportFreq=10): - """ scores the compounds in a supplier using a catalog + """ scores the compounds in a supplier using a catalog - **Arguments** + **Arguments** - - suppl: a mol supplier + - suppl: a mol supplier - - catalog: the FragmentCatalog + - catalog: the FragmentCatalog - - maxPts: (optional) the maximum number of molecules to be - considered + - maxPts: (optional) the maximum number of molecules to be + considered - - actName: (optional) the name of the molecule's activity property. - If this is not provided, the molecule's last property will be used. + - actName: (optional) the name of the molecule's activity property. + If this is not provided, the molecule's last property will be used. - - acts: (optional) a sequence of activity values (integers). - If not provided, the activities will be read from the molecules. + - acts: (optional) a sequence of activity values (integers). + If not provided, the activities will be read from the molecules. - - nActs: (optional) number of possible activity values + - nActs: (optional) number of possible activity values - - reportFreq: (optional) how often to display status information + - reportFreq: (optional) how often to display status information - **Returns** + **Returns** - a 2-tuple: + a 2-tuple: - 1) the results table (a 3D array of ints nBits x 2 x nActs) + 1) the results table (a 3D array of ints nBits x 2 x nActs) - 2) a list containing the on bit lists for each molecule + 2) a list containing the on bit lists for each molecule - """ - nBits = catalog.GetFPLength() - resTbl = numpy.zeros((nBits, 2, nActs), numpy.int) - obls = [] + """ + nBits = catalog.GetFPLength() + resTbl = numpy.zeros((nBits, 2, nActs), numpy.int) + obls = [] - if not actName and not acts: - actName = suppl[0].GetPropNames()[-1] + if not actName and not acts: + actName = suppl[0].GetPropNames()[-1] - fpgen = FragmentCatalog.FragFPGenerator() - suppl.reset() - i = 1 - for mol in suppl: - if i and not i % reportFreq: - message('Done %d.\n' % (i)) - if mol: - if not acts: - act = int(mol.GetProp(actName)) - else: - act = acts[i - 1] - fp = fpgen.GetFPForMol(mol, catalog) - obls.append([x for x in fp.GetOnBits()]) - for j in range(nBits): - resTbl[j, 0, act] += 1 - for id_ in obls[i - 1]: - resTbl[id_ - 1, 0, act] -= 1 - resTbl[id_ - 1, 1, act] += 1 - else: - obls.append([]) - i += 1 - return resTbl, obls + fpgen = FragmentCatalog.FragFPGenerator() + suppl.reset() + i = 1 + for mol in suppl: + if i and not i % reportFreq: + message('Done %d.\n' % (i)) + if mol: + if not acts: + act = int(mol.GetProp(actName)) + else: + act = acts[i - 1] + fp = fpgen.GetFPForMol(mol, catalog) + obls.append([x for x in fp.GetOnBits()]) + for j in range(nBits): + resTbl[j, 0, act] += 1 + for id_ in obls[i - 1]: + resTbl[id_ - 1, 0, act] -= 1 + resTbl[id_ - 1, 1, act] += 1 + else: + obls.append([]) + i += 1 + return resTbl, obls def ScoreFromLists(bitLists, suppl, catalog, maxPts=-1, actName='', acts=None, nActs=2, reportFreq=10): - """ similar to _ScoreMolecules()_, but uses pre-calculated bit lists - for the molecules (this speeds things up a lot) + """ similar to _ScoreMolecules()_, but uses pre-calculated bit lists + for the molecules (this speeds things up a lot) - **Arguments** + **Arguments** - - bitLists: sequence of on bit sequences for the input molecules + - bitLists: sequence of on bit sequences for the input molecules - - suppl: the input supplier (we read activities from here) + - suppl: the input supplier (we read activities from here) - - catalog: the FragmentCatalog + - catalog: the FragmentCatalog - - maxPts: (optional) the maximum number of molecules to be - considered + - maxPts: (optional) the maximum number of molecules to be + considered - - actName: (optional) the name of the molecule's activity property. - If this is not provided, the molecule's last property will be used. + - actName: (optional) the name of the molecule's activity property. + If this is not provided, the molecule's last property will be used. - - nActs: (optional) number of possible activity values + - nActs: (optional) number of possible activity values - - reportFreq: (optional) how often to display status information + - reportFreq: (optional) how often to display status information - **Returns** + **Returns** - the results table (a 3D array of ints nBits x 2 x nActs) + the results table (a 3D array of ints nBits x 2 x nActs) - """ - nBits = catalog.GetFPLength() - if maxPts > 0: - nPts = maxPts - else: - nPts = len(bitLists) - resTbl = numpy.zeros((nBits, 2, nActs), numpy.int) - if not actName and not acts: - actName = suppl[0].GetPropNames()[-1] - suppl.reset() - for i in range(1, nPts + 1): - mol = next(suppl) - if not acts: - act = int(mol.GetProp(actName)) + """ + nBits = catalog.GetFPLength() + if maxPts > 0: + nPts = maxPts else: - act = acts[i - 1] - if i and not i % reportFreq: - message('Done %d of %d\n' % (i, nPts)) - ids = set() - for id_ in bitLists[i - 1]: - ids.add(id_ - 1) - for j in range(nBits): - resTbl[j, 0, act] += 1 - for id_ in ids: - resTbl[id_, 0, act] -= 1 - resTbl[id_, 1, act] += 1 - return resTbl + nPts = len(bitLists) + resTbl = numpy.zeros((nBits, 2, nActs), numpy.int) + if not actName and not acts: + actName = suppl[0].GetPropNames()[-1] + suppl.reset() + for i in range(1, nPts + 1): + mol = next(suppl) + if not acts: + act = int(mol.GetProp(actName)) + else: + act = acts[i - 1] + if i and not i % reportFreq: + message('Done %d of %d\n' % (i, nPts)) + ids = set() + for id_ in bitLists[i - 1]: + ids.add(id_ - 1) + for j in range(nBits): + resTbl[j, 0, act] += 1 + for id_ in ids: + resTbl[id_, 0, act] -= 1 + resTbl[id_, 1, act] += 1 + return resTbl def CalcGains(suppl, catalog, topN=-1, actName='', acts=None, nActs=2, reportFreq=10, biasList=None, collectFps=0): - """ calculates info gains by constructing fingerprints - *DOC* + """ calculates info gains by constructing fingerprints + *DOC* - Returns a 2-tuple: - 1) gains matrix - 2) list of fingerprints + Returns a 2-tuple: + 1) gains matrix + 2) list of fingerprints - """ - nBits = catalog.GetFPLength() - if topN < 0: - topN = nBits - if not actName and not acts: - actName = suppl[0].GetPropNames()[-1] + """ + nBits = catalog.GetFPLength() + if topN < 0: + topN = nBits + if not actName and not acts: + actName = suppl[0].GetPropNames()[-1] - if hasattr(suppl, '__len__'): - nMols = len(suppl) - else: - nMols = -1 - fpgen = FragmentCatalog.FragFPGenerator() - # ranker = InfoTheory.InfoBitRanker(nBits,nActs,InfoTheory.InfoType.ENTROPY) - if biasList: - ranker = InfoTheory.InfoBitRanker(nBits, nActs, InfoTheory.InfoType.BIASENTROPY) - ranker.SetBiasList(biasList) - else: - ranker = InfoTheory.InfoBitRanker(nBits, nActs, InfoTheory.InfoType.ENTROPY) - i = 0 - fps = [] - for mol in suppl: - if not acts: - try: - act = int(mol.GetProp(actName)) - except KeyError: - message('ERROR: Molecule has no property: %s\n' % (actName)) - message('\tAvailable properties are: %s\n' % (str(mol.GetPropNames()))) - raise KeyError(actName) + if hasattr(suppl, '__len__'): + nMols = len(suppl) else: - act = acts[i] - if i and not i % reportFreq: - if nMols > 0: - message('Done %d of %d.\n' % (i, nMols)) - else: - message('Done %d.\n' % (i)) - fp = fpgen.GetFPForMol(mol, catalog) - ranker.AccumulateVotes(fp, act) - i += 1 - if collectFps: - fps.append(fp) - gains = ranker.GetTopN(topN) - return gains, fps + nMols = -1 + fpgen = FragmentCatalog.FragFPGenerator() + # ranker = InfoTheory.InfoBitRanker(nBits,nActs,InfoTheory.InfoType.ENTROPY) + if biasList: + ranker = InfoTheory.InfoBitRanker(nBits, nActs, InfoTheory.InfoType.BIASENTROPY) + ranker.SetBiasList(biasList) + else: + ranker = InfoTheory.InfoBitRanker(nBits, nActs, InfoTheory.InfoType.ENTROPY) + i = 0 + fps = [] + for mol in suppl: + if not acts: + try: + act = int(mol.GetProp(actName)) + except KeyError: + message('ERROR: Molecule has no property: %s\n' % (actName)) + message('\tAvailable properties are: %s\n' % (str(mol.GetPropNames()))) + raise KeyError(actName) + else: + act = acts[i] + if i and not i % reportFreq: + if nMols > 0: + message('Done %d of %d.\n' % (i, nMols)) + else: + message('Done %d.\n' % (i)) + fp = fpgen.GetFPForMol(mol, catalog) + ranker.AccumulateVotes(fp, act) + i += 1 + if collectFps: + fps.append(fp) + gains = ranker.GetTopN(topN) + return gains, fps def CalcGainsFromFps(suppl, fps, topN=-1, actName='', acts=None, nActs=2, reportFreq=10, biasList=None): - """ calculates info gains from a set of fingerprints + """ calculates info gains from a set of fingerprints - *DOC* + *DOC* - """ - nBits = len(fps[0]) - if topN < 0: - topN = nBits - if not actName and not acts: - actName = suppl[0].GetPropNames()[-1] + """ + nBits = len(fps[0]) + if topN < 0: + topN = nBits + if not actName and not acts: + actName = suppl[0].GetPropNames()[-1] - if hasattr(suppl, '__len__'): - nMols = len(suppl) - else: - nMols = -1 - if biasList: - ranker = InfoTheory.InfoBitRanker(nBits, nActs, InfoTheory.InfoType.BIASENTROPY) - ranker.SetBiasList(biasList) - else: - ranker = InfoTheory.InfoBitRanker(nBits, nActs, InfoTheory.InfoType.ENTROPY) - for i, mol in enumerate(suppl): - if not acts: - try: - act = int(mol.GetProp(actName)) - except KeyError: - message('ERROR: Molecule has no property: %s\n' % (actName)) - message('\tAvailable properties are: %s\n' % (str(mol.GetPropNames()))) - raise KeyError(actName) + if hasattr(suppl, '__len__'): + nMols = len(suppl) else: - act = acts[i] - if i and not i % reportFreq: - if nMols > 0: - message('Done %d of %d.\n' % (i, nMols)) - else: - message('Done %d.\n' % (i)) - fp = fps[i] - ranker.AccumulateVotes(fp, act) - gains = ranker.GetTopN(topN) - return gains + nMols = -1 + if biasList: + ranker = InfoTheory.InfoBitRanker(nBits, nActs, InfoTheory.InfoType.BIASENTROPY) + ranker.SetBiasList(biasList) + else: + ranker = InfoTheory.InfoBitRanker(nBits, nActs, InfoTheory.InfoType.ENTROPY) + for i, mol in enumerate(suppl): + if not acts: + try: + act = int(mol.GetProp(actName)) + except KeyError: + message('ERROR: Molecule has no property: %s\n' % (actName)) + message('\tAvailable properties are: %s\n' % (str(mol.GetPropNames()))) + raise KeyError(actName) + else: + act = acts[i] + if i and not i % reportFreq: + if nMols > 0: + message('Done %d of %d.\n' % (i, nMols)) + else: + message('Done %d.\n' % (i)) + fp = fps[i] + ranker.AccumulateVotes(fp, act) + gains = ranker.GetTopN(topN) + return gains def OutputGainsData(outF, gains, cat, nActs=2): - actHeaders = ['Act-%d' % (x) for x in range(nActs)] - if cat: - outF.write('id,Description,Gain,%s\n' % (','.join(actHeaders))) - else: - outF.write('id,Gain,%s\n' % (','.join(actHeaders))) - for entry in gains: - id_ = int(entry[0]) - outL = [str(id_)] + actHeaders = ['Act-%d' % (x) for x in range(nActs)] if cat: - descr = cat.GetBitDescription(id_) - outL.append(descr) - outL.append('%.6f' % entry[1]) - outL += ['%d' % x for x in entry[2:]] - outF.write(','.join(outL)) - outF.write('\n') + outF.write('id,Description,Gain,%s\n' % (','.join(actHeaders))) + else: + outF.write('id,Gain,%s\n' % (','.join(actHeaders))) + for entry in gains: + id_ = int(entry[0]) + outL = [str(id_)] + if cat: + descr = cat.GetBitDescription(id_) + outL.append(descr) + outL.append('%.6f' % entry[1]) + outL += ['%d' % x for x in entry[2:]] + outF.write(','.join(outL)) + outF.write('\n') def ProcessGainsData(inF, delim=',', idCol=0, gainCol=1): - """ reads a list of ids and info gains out of an input file + """ reads a list of ids and info gains out of an input file - """ - res = [] - _ = inF.readline() - for line in inF: - splitL = line.strip().split(delim) - res.append((splitL[idCol], float(splitL[gainCol]))) - return res + """ + res = [] + _ = inF.readline() + for line in inF: + splitL = line.strip().split(delim) + res.append((splitL[idCol], float(splitL[gainCol]))) + return res def ShowDetails(catalog, gains, nToDo=-1, outF=sys.stdout, idCol=0, gainCol=1, outDelim=','): - """ - gains should be a sequence of sequences. The idCol entry of each - sub-sequence should be a catalog ID. _ProcessGainsData()_ provides - suitable input. + """ + gains should be a sequence of sequences. The idCol entry of each + sub-sequence should be a catalog ID. _ProcessGainsData()_ provides + suitable input. - """ - if nToDo < 0: - nToDo = len(gains) - for i in range(nToDo): - id_ = int(gains[i][idCol]) - gain = float(gains[i][gainCol]) - descr = catalog.GetFragDescription(id_) - if descr: - outF.write('%s\n' % (outDelim.join((str(id_), descr, str(gain))))) + """ + if nToDo < 0: + nToDo = len(gains) + for i in range(nToDo): + id_ = int(gains[i][idCol]) + gain = float(gains[i][gainCol]) + descr = catalog.GetFragDescription(id_) + if descr: + outF.write('%s\n' % (outDelim.join((str(id_), descr, str(gain))))) def SupplierFromDetails(details): - from rdkit.VLib.NodeLib.DbMolSupply import DbMolSupplyNode - from rdkit.VLib.NodeLib.SmilesSupply import SmilesSupplyNode + from rdkit.VLib.NodeLib.DbMolSupply import DbMolSupplyNode + from rdkit.VLib.NodeLib.SmilesSupply import SmilesSupplyNode - if details.dbName: - conn = DbConnect(details.dbName, details.tableName) - suppl = DbMolSupplyNode(conn.GetData()) - else: - suppl = SmilesSupplyNode(details.inFileName, delim=details.delim, nameColumn=details.nameCol, - smilesColumn=details.smiCol, titleLine=details.hasTitle) + if details.dbName: + conn = DbConnect(details.dbName, details.tableName) + suppl = DbMolSupplyNode(conn.GetData()) + else: + suppl = SmilesSupplyNode(details.inFileName, delim=details.delim, nameColumn=details.nameCol, + smilesColumn=details.smiCol, titleLine=details.hasTitle) + if isinstance(details.actCol, int): + suppl.reset() + m = next(suppl) + actName = m.GetPropNames()[details.actCol] + details.actCol = actName + if isinstance(details.nameCol, int): + suppl.reset() + m = next(suppl) + nameName = m.GetPropNames()[details.nameCol] + details.nameCol = nameName + suppl.reset() if isinstance(details.actCol, int): - suppl.reset() - m = next(suppl) - actName = m.GetPropNames()[details.actCol] - details.actCol = actName + suppl.reset() + m = next(suppl) + actName = m.GetPropNames()[details.actCol] + details.actCol = actName if isinstance(details.nameCol, int): - suppl.reset() - m = next(suppl) - nameName = m.GetPropNames()[details.nameCol] - details.nameCol = nameName - suppl.reset() - if isinstance(details.actCol, int): - suppl.reset() - m = next(suppl) - actName = m.GetPropNames()[details.actCol] - details.actCol = actName - if isinstance(details.nameCol, int): - suppl.reset() - m = next(suppl) - nameName = m.GetPropNames()[details.nameCol] - details.nameCol = nameName - suppl.reset() - return suppl + suppl.reset() + m = next(suppl) + nameName = m.GetPropNames()[details.nameCol] + details.nameCol = nameName + suppl.reset() + return suppl def Usage(): - print("This is BuildFragmentCatalog") - print('usage error') - # print(__doc__) - sys.exit(-1) + print("This is BuildFragmentCatalog") + print('usage error') + # print(__doc__) + sys.exit(-1) class RunDetails(object): - numMols = -1 - doBuild = 0 - doSigs = 0 - doScore = 0 - doGains = 0 - doDetails = 0 - catalogName = None - onBitsName = None - scoresName = None - gainsName = None - dbName = '' - tableName = None - detailsName = None - inFileName = None - fpName = None - minPath = 2 - maxPath = 6 - smiCol = 1 - actCol = -1 - nameCol = -1 - hasTitle = 1 - nActs = 2 - nBits = -1 - delim = ',' - biasList = None - topN = -1 + numMols = -1 + doBuild = 0 + doSigs = 0 + doScore = 0 + doGains = 0 + doDetails = 0 + catalogName = None + onBitsName = None + scoresName = None + gainsName = None + dbName = '' + tableName = None + detailsName = None + inFileName = None + fpName = None + minPath = 2 + maxPath = 6 + smiCol = 1 + actCol = -1 + nameCol = -1 + hasTitle = 1 + nActs = 2 + nBits = -1 + delim = ',' + biasList = None + topN = -1 def ParseArgs(details): - import getopt - try: - args, extras = getopt.getopt(sys.argv[1:], 'n:d:cst', - ['catalog=', 'onbits=', 'scoresFile=', 'gainsFile=', - 'detailsFile=', 'fpFile=', 'minPath=', 'maxPath=', 'smiCol=', - 'actCol=', 'nameCol=', 'nActs=', 'nBits=', 'biasList=', 'topN=', - 'build', 'sigs', 'gains', 'details', 'score', 'noTitle']) - except Exception: - sys.stderr.write('Error parsing command line:\n') - import traceback - traceback.print_exc() - Usage() - for arg, val in args: - if arg == '-n': - details.numMols = int(val) - elif arg == '-c': - details.delim = ',' - elif arg == '-s': - details.delim = ' ' - elif arg == '-t': - details.delim = '\t' - elif arg == '-d': - details.dbName = val - elif arg == '--build': - details.doBuild = 1 - elif arg == '--score': - details.doScore = 1 - elif arg == '--gains': - details.doGains = 1 - elif arg == '--sigs': - details.doSigs = 1 - elif arg == '-details': - details.doDetails = 1 - elif arg == '--catalog': - details.catalogName = val - elif arg == '--onbits': - details.onBitsName = val - elif arg == '--scoresFile': - details.scoresName = val - elif arg == '--gainsFile': - details.gainsName = val - elif arg == '--detailsFile': - details.detailsName = val - elif arg == '--fpFile': - details.fpName = val - elif arg == '--minPath': - details.minPath = int(val) - elif arg == '--maxPath': - details.maxPath = int(val) - elif arg == '--smiCol': - try: - details.smiCol = int(val) - except ValueError: - details.smiCol = val - elif arg == '--actCol': - try: - details.actCol = int(val) - except ValueError: - details.actCol = val - elif arg == '--nameCol': - try: - details.nameCol = int(val) - except ValueError: - details.nameCol = val - elif arg == '--nActs': - details.nActs = int(val) - elif arg == '--nBits': - details.nBits = int(val) - elif arg == '--noTitle': - details.hasTitle = 0 - elif arg == '--biasList': - details.biasList = tuple(eval(val)) - elif arg == '--topN': - details.topN = int(val) - elif arg == '-h': - Usage() - sys.exit(0) + import getopt + try: + args, extras = getopt.getopt(sys.argv[1:], 'n:d:cst', + ['catalog=', 'onbits=', 'scoresFile=', 'gainsFile=', + 'detailsFile=', 'fpFile=', 'minPath=', 'maxPath=', 'smiCol=', + 'actCol=', 'nameCol=', 'nActs=', 'nBits=', 'biasList=', 'topN=', + 'build', 'sigs', 'gains', 'details', 'score', 'noTitle']) + except Exception: + sys.stderr.write('Error parsing command line:\n') + import traceback + traceback.print_exc() + Usage() + for arg, val in args: + if arg == '-n': + details.numMols = int(val) + elif arg == '-c': + details.delim = ',' + elif arg == '-s': + details.delim = ' ' + elif arg == '-t': + details.delim = '\t' + elif arg == '-d': + details.dbName = val + elif arg == '--build': + details.doBuild = 1 + elif arg == '--score': + details.doScore = 1 + elif arg == '--gains': + details.doGains = 1 + elif arg == '--sigs': + details.doSigs = 1 + elif arg == '-details': + details.doDetails = 1 + elif arg == '--catalog': + details.catalogName = val + elif arg == '--onbits': + details.onBitsName = val + elif arg == '--scoresFile': + details.scoresName = val + elif arg == '--gainsFile': + details.gainsName = val + elif arg == '--detailsFile': + details.detailsName = val + elif arg == '--fpFile': + details.fpName = val + elif arg == '--minPath': + details.minPath = int(val) + elif arg == '--maxPath': + details.maxPath = int(val) + elif arg == '--smiCol': + try: + details.smiCol = int(val) + except ValueError: + details.smiCol = val + elif arg == '--actCol': + try: + details.actCol = int(val) + except ValueError: + details.actCol = val + elif arg == '--nameCol': + try: + details.nameCol = int(val) + except ValueError: + details.nameCol = val + elif arg == '--nActs': + details.nActs = int(val) + elif arg == '--nBits': + details.nBits = int(val) + elif arg == '--noTitle': + details.hasTitle = 0 + elif arg == '--biasList': + details.biasList = tuple(eval(val)) + elif arg == '--topN': + details.topN = int(val) + elif arg == '-h': + Usage() + sys.exit(0) + else: + Usage() + if len(extras): + if details.dbName: + details.tableName = extras[0] + else: + details.inFileName = extras[0] else: - Usage() - if len(extras): - if details.dbName: - details.tableName = extras[0] - else: - details.inFileName = extras[0] - else: - Usage() + Usage() if __name__ == '__main__': - import time - details = RunDetails() - ParseArgs(details) - from io import StringIO - suppl = SupplierFromDetails(details) + import time + details = RunDetails() + ParseArgs(details) + from io import StringIO + suppl = SupplierFromDetails(details) - cat = None - obls = None - if details.doBuild: - if not suppl: - message("We require inData to generate a catalog\n") - sys.exit(-2) - message("Building catalog\n") - t1 = time.time() - cat = BuildCatalog(suppl, maxPts=details.numMols, minPath=details.minPath, - maxPath=details.maxPath) - t2 = time.time() - message("\tThat took %.2f seconds.\n" % (t2 - t1)) - if details.catalogName: - message("Dumping catalog data\n") - cPickle.dump(cat, open(details.catalogName, 'wb+')) - elif details.catalogName: - message("Loading catalog\n") - cat = cPickle.load(open(details.catalogName, 'rb')) - if details.onBitsName: - try: - obls = cPickle.load(open(details.onBitsName, 'rb')) - except Exception: - obls = None - else: - if len(obls) < (inD.count('\n') - 1): - obls = None - scores = None - if details.doScore: - if not suppl: - message("We require inData to score molecules\n") - sys.exit(-2) - if not cat: - message("We require a catalog to score molecules\n") - sys.exit(-2) - message("Scoring compounds\n") - if not obls or len(obls) < details.numMols: - scores, obls = ScoreMolecules(suppl, cat, maxPts=details.numMols, actName=details.actCol, + cat = None + obls = None + if details.doBuild: + if not suppl: + message("We require inData to generate a catalog\n") + sys.exit(-2) + message("Building catalog\n") + t1 = time.time() + cat = BuildCatalog(suppl, maxPts=details.numMols, minPath=details.minPath, + maxPath=details.maxPath) + t2 = time.time() + message("\tThat took %.2f seconds.\n" % (t2 - t1)) + if details.catalogName: + message("Dumping catalog data\n") + pickle.dump(cat, open(details.catalogName, 'wb+')) + elif details.catalogName: + message("Loading catalog\n") + cat = pickle.load(open(details.catalogName, 'rb')) + if details.onBitsName: + try: + obls = pickle.load(open(details.onBitsName, 'rb')) + except Exception: + obls = None + else: + if len(obls) < (inD.count('\n') - 1): + obls = None + scores = None + if details.doScore: + if not suppl: + message("We require inData to score molecules\n") + sys.exit(-2) + if not cat: + message("We require a catalog to score molecules\n") + sys.exit(-2) + message("Scoring compounds\n") + if not obls or len(obls) < details.numMols: + scores, obls = ScoreMolecules(suppl, cat, maxPts=details.numMols, actName=details.actCol, + nActs=details.nActs) + if details.scoresName: + pickle.dump(scores, open(details.scoresName, 'wb+')) + if details.onBitsName: + pickle.dump(obls, open(details.onBitsName, 'wb+')) + else: + scores = ScoreFromLists(obls, suppl, cat, maxPts=details.numMols, actName=details.actCol, nActs=details.nActs) - if details.scoresName: - cPickle.dump(scores, open(details.scoresName, 'wb+')) - if details.onBitsName: - cPickle.dump(obls, open(details.onBitsName, 'wb+')) - else: - scores = ScoreFromLists(obls, suppl, cat, maxPts=details.numMols, actName=details.actCol, - nActs=details.nActs) - elif details.scoresName: - scores = cPickle.load(open(details.scoresName, 'rb')) + elif details.scoresName: + scores = pickle.load(open(details.scoresName, 'rb')) - if details.fpName and os.path.exists(details.fpName) and not details.doSigs: - message("Reading fingerprints from file.\n") - fps = cPickle.load(open(details.fpName, 'rb')) - else: - fps = [] - gains = None - if details.doGains: - if not suppl: - message("We require inData to calculate gains\n") - sys.exit(-2) - if not (cat or fps): - message("We require either a catalog or fingerprints to calculate gains\n") - sys.exit(-2) - message("Calculating Gains\n") - t1 = time.time() - if details.fpName: - collectFps = 1 + if details.fpName and os.path.exists(details.fpName) and not details.doSigs: + message("Reading fingerprints from file.\n") + fps = pickle.load(open(details.fpName, 'rb')) else: - collectFps = 0 - if not fps: - gains, fps = CalcGains(suppl, cat, topN=details.topN, actName=details.actCol, - nActs=details.nActs, biasList=details.biasList, collectFps=collectFps) - if details.fpName: - message("Writing fingerprint file.\n") - tmpF = open(details.fpName, 'wb+') - cPickle.dump(fps, tmpF, 1) - tmpF.close() + fps = [] + gains = None + if details.doGains: + if not suppl: + message("We require inData to calculate gains\n") + sys.exit(-2) + if not (cat or fps): + message("We require either a catalog or fingerprints to calculate gains\n") + sys.exit(-2) + message("Calculating Gains\n") + t1 = time.time() + if details.fpName: + collectFps = 1 + else: + collectFps = 0 + if not fps: + gains, fps = CalcGains(suppl, cat, topN=details.topN, actName=details.actCol, + nActs=details.nActs, biasList=details.biasList, collectFps=collectFps) + if details.fpName: + message("Writing fingerprint file.\n") + tmpF = open(details.fpName, 'wb+') + pickle.dump(fps, tmpF, 1) + tmpF.close() + else: + gains = CalcGainsFromFps(suppl, fps, topN=details.topN, actName=details.actCol, + nActs=details.nActs, biasList=details.biasList) + t2 = time.time() + message("\tThat took %.2f seconds.\n" % (t2 - t1)) + if details.gainsName: + outF = open(details.gainsName, 'w+') + OutputGainsData(outF, gains, cat, nActs=details.nActs) else: - gains = CalcGainsFromFps(suppl, fps, topN=details.topN, actName=details.actCol, - nActs=details.nActs, biasList=details.biasList) - t2 = time.time() - message("\tThat took %.2f seconds.\n" % (t2 - t1)) - if details.gainsName: - outF = open(details.gainsName, 'w+') - OutputGainsData(outF, gains, cat, nActs=details.nActs) - else: - if details.gainsName: - inF = open(details.gainsName, 'r') - gains = ProcessGainsData(inF) + if details.gainsName: + inF = open(details.gainsName, 'r') + gains = ProcessGainsData(inF) - if details.doDetails: - if not cat: - message("We require a catalog to get details\n") - sys.exit(-2) - if not gains: - message("We require gains data to get details\n") - sys.exit(-2) - io = StringIO() - io.write('id,SMILES,gain\n') - ShowDetails(cat, gains, nToDo=details.nBits, outF=io) - if details.detailsName: - open(details.detailsName, 'w+').write(io.getvalue()) - else: - sys.stderr.write(io.getvalue()) + if details.doDetails: + if not cat: + message("We require a catalog to get details\n") + sys.exit(-2) + if not gains: + message("We require gains data to get details\n") + sys.exit(-2) + io = StringIO() + io.write('id,SMILES,gain\n') + ShowDetails(cat, gains, nToDo=details.nBits, outF=io) + if details.detailsName: + open(details.detailsName, 'w+').write(io.getvalue()) + else: + sys.stderr.write(io.getvalue()) diff --git a/rdkit/Chem/ChemUtils/AlignDepict.py b/rdkit/Chem/ChemUtils/AlignDepict.py index d8c976a42..e2f82c7f7 100644 --- a/rdkit/Chem/ChemUtils/AlignDepict.py +++ b/rdkit/Chem/ChemUtils/AlignDepict.py @@ -2,7 +2,7 @@ # Copyright (C) 2006 Greg Landrum # This file is part of RDKit and covered by $RDBASE/license.txt # -from __future__ import print_function + import argparse import sys diff --git a/rdkit/Chem/ChemUtils/BulkTester.py b/rdkit/Chem/ChemUtils/BulkTester.py index 9364e3ef5..c2a3fcd7b 100644 --- a/rdkit/Chem/ChemUtils/BulkTester.py +++ b/rdkit/Chem/ChemUtils/BulkTester.py @@ -8,7 +8,7 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -from __future__ import print_function + from rdkit import Chem import sys diff --git a/rdkit/Chem/ChemUtils/TemplateExpand.py b/rdkit/Chem/ChemUtils/TemplateExpand.py index 51c9488ee..9c56b36cb 100644 --- a/rdkit/Chem/ChemUtils/TemplateExpand.py +++ b/rdkit/Chem/ChemUtils/TemplateExpand.py @@ -3,7 +3,7 @@ # Created by Greg Landrum August, 2006 # # -from __future__ import print_function + from rdkit import RDLogger as logging logger = logging.logger() logger.setLevel(logging.INFO) diff --git a/rdkit/Chem/ChemUtils/UnitTestAlignDepict.py b/rdkit/Chem/ChemUtils/UnitTestAlignDepict.py index ae922bd31..4f15b4954 100644 --- a/rdkit/Chem/ChemUtils/UnitTestAlignDepict.py +++ b/rdkit/Chem/ChemUtils/UnitTestAlignDepict.py @@ -4,55 +4,55 @@ import unittest from rdkit import Chem from rdkit.Chem.ChemUtils.AlignDepict import initParser, processArgs, AlignDepict -from rdkit.six.moves import cStringIO as StringIO +from io import StringIO class TestCase(unittest.TestCase): - def test1(self): - parser = initParser() - with outputRedirect() as (out, err): - args = parser.parse_args('--smiles CC CCC'.split()) - args.outF = out - processArgs(args) - self.assertIn('RDKit', out.getvalue()) - self.assertIn('2D', out.getvalue()) - self.assertEqual(err.getvalue(), '') + def test1(self): + parser = initParser() + with outputRedirect() as (out, err): + args = parser.parse_args('--smiles CC CCC'.split()) + args.outF = out + processArgs(args) + self.assertIn('RDKit', out.getvalue()) + self.assertIn('2D', out.getvalue()) + self.assertEqual(err.getvalue(), '') - def test_AlignDepict(self): - mol = Chem.MolFromSmiles('CNC') - core = Chem.MolFromSmiles('CC') - pattern = Chem.MolFromSmarts('CCC') - self.assertRaises(ValueError, AlignDepict, mol, core, pattern) + def test_AlignDepict(self): + mol = Chem.MolFromSmiles('CNC') + core = Chem.MolFromSmiles('CC') + pattern = Chem.MolFromSmarts('CCC') + self.assertRaises(ValueError, AlignDepict, mol, core, pattern) - pattern = Chem.MolFromSmarts('CN') - self.assertRaises(ValueError, AlignDepict, mol, core, pattern) + pattern = Chem.MolFromSmarts('CN') + self.assertRaises(ValueError, AlignDepict, mol, core, pattern) - pattern = Chem.MolFromSmarts('CC') - self.assertRaises(ValueError, AlignDepict, mol, core, pattern) + pattern = Chem.MolFromSmarts('CC') + self.assertRaises(ValueError, AlignDepict, mol, core, pattern) - pattern = Chem.MolFromSmarts('CC') - self.assertRaises(ValueError, AlignDepict, mol, core, pattern) + pattern = Chem.MolFromSmarts('CC') + self.assertRaises(ValueError, AlignDepict, mol, core, pattern) - mol = Chem.MolFromSmiles('CCC') - Chem.rdDepictor.Compute2DCoords(core) - AlignDepict(mol, core, pattern) + mol = Chem.MolFromSmiles('CCC') + Chem.rdDepictor.Compute2DCoords(core) + AlignDepict(mol, core, pattern) - mol = Chem.MolFromSmiles('CNC') - AlignDepict(mol, core, pattern, acceptFailure=True) + mol = Chem.MolFromSmiles('CNC') + AlignDepict(mol, core, pattern, acceptFailure=True) @contextmanager def outputRedirect(): - """ Redirect standard output and error to String IO and return """ - try: - _stdout, _stderr = sys.stdout, sys.stderr - sys.stdout = sStdout = StringIO() - sys.stderr = sStderr = StringIO() - yield (sStdout, sStderr) - finally: - sys.stdout, sys.stderr = _stdout, _stderr + """ Redirect standard output and error to String IO and return """ + try: + _stdout, _stderr = sys.stdout, sys.stderr + sys.stdout = sStdout = StringIO() + sys.stderr = sStderr = StringIO() + yield (sStdout, sStderr) + finally: + sys.stdout, sys.stderr = _stdout, _stderr if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/Chem/ChemUtils/UnitTestSDFToCSV.py b/rdkit/Chem/ChemUtils/UnitTestSDFToCSV.py index 396751929..70438ce16 100644 --- a/rdkit/Chem/ChemUtils/UnitTestSDFToCSV.py +++ b/rdkit/Chem/ChemUtils/UnitTestSDFToCSV.py @@ -6,80 +6,80 @@ import unittest from rdkit import Chem from rdkit import RDConfig from rdkit.Chem.ChemUtils.SDFToCSV import Convert, initParser -from rdkit.six.moves import cStringIO as StringIO +from io import StringIO class TestCase(unittest.TestCase): - def test1(self): - fName = os.path.join(RDConfig.RDDataDir, 'NCI', 'first_200.props.sdf') - suppl = Chem.SDMolSupplier(fName) - io = StringIO() - try: - Convert(suppl, io) - except Exception: - import traceback - traceback.print_exc() - self.fail('conversion failed') - txt = io.getvalue() - lines = txt.split('\n') - if not lines[-1]: - del lines[-1] - self.assertTrue(len(lines) == 201, 'bad num lines: %d' % len(lines)) - line0 = lines[0].split(',') - self.assertEqual(len(line0), 20) - self.assertTrue(line0[0] == 'SMILES') + def test1(self): + fName = os.path.join(RDConfig.RDDataDir, 'NCI', 'first_200.props.sdf') + suppl = Chem.SDMolSupplier(fName) + io = StringIO() + try: + Convert(suppl, io) + except Exception: + import traceback + traceback.print_exc() + self.fail('conversion failed') + txt = io.getvalue() + lines = txt.split('\n') + if not lines[-1]: + del lines[-1] + self.assertTrue(len(lines) == 201, 'bad num lines: %d' % len(lines)) + line0 = lines[0].split(',') + self.assertEqual(len(line0), 20) + self.assertTrue(line0[0] == 'SMILES') - def test2(self): - fName = os.path.join(RDConfig.RDDataDir, 'NCI', 'first_200.props.sdf') - suppl = Chem.SDMolSupplier(fName) - io = StringIO() - try: - Convert(suppl, io, keyCol='AMW', stopAfter=5) - except Exception: - import traceback - traceback.print_exc() - self.fail('conversion failed') - txt = io.getvalue() - lines = [line for line in txt.split('\n') if line.strip() != ''] - self.assertTrue(len(lines) == 6, 'bad num lines: %d' % len(lines)) - line0 = lines[0].split(',') - self.assertEqual(len(line0), 20) - self.assertTrue(line0[0] == 'AMW') - self.assertTrue(line0[1] == 'SMILES') + def test2(self): + fName = os.path.join(RDConfig.RDDataDir, 'NCI', 'first_200.props.sdf') + suppl = Chem.SDMolSupplier(fName) + io = StringIO() + try: + Convert(suppl, io, keyCol='AMW', stopAfter=5) + except Exception: + import traceback + traceback.print_exc() + self.fail('conversion failed') + txt = io.getvalue() + lines = [line for line in txt.split('\n') if line.strip() != ''] + self.assertTrue(len(lines) == 6, 'bad num lines: %d' % len(lines)) + line0 = lines[0].split(',') + self.assertEqual(len(line0), 20) + self.assertTrue(line0[0] == 'AMW') + self.assertTrue(line0[1] == 'SMILES') - def test_parser(self): - parser = initParser() - # User want's help - with self.assertRaises(SystemExit), outputRedirect() as (out, err): - parser.parse_args(['-h']) - self.assertNotEqual(out.getvalue(), '') - self.assertEqual(err.getvalue(), '') + def test_parser(self): + parser = initParser() + # User want's help + with self.assertRaises(SystemExit), outputRedirect() as (out, err): + parser.parse_args(['-h']) + self.assertNotEqual(out.getvalue(), '') + self.assertEqual(err.getvalue(), '') - # Missing input file - with self.assertRaises(SystemExit), outputRedirect() as (out, err): - parser.parse_args([]) - self.assertEqual(out.getvalue(), '') - self.assertNotEqual(err.getvalue(), '') + # Missing input file + with self.assertRaises(SystemExit), outputRedirect() as (out, err): + parser.parse_args([]) + self.assertEqual(out.getvalue(), '') + self.assertNotEqual(err.getvalue(), '') - # Input file doesn't exist - with self.assertRaises(SystemExit), outputRedirect() as (out, err): - parser.parse_args(['incorrectFilename']) - self.assertEqual(out.getvalue(), '') - self.assertNotEqual(err.getvalue(), '') + # Input file doesn't exist + with self.assertRaises(SystemExit), outputRedirect() as (out, err): + parser.parse_args(['incorrectFilename']) + self.assertEqual(out.getvalue(), '') + self.assertNotEqual(err.getvalue(), '') @contextmanager def outputRedirect(): - """ Redirect standard output and error to String IO and return """ - try: - _stdout, _stderr = sys.stdout, sys.stderr - sys.stdout = sStdout = StringIO() - sys.stderr = sStderr = StringIO() - yield (sStdout, sStderr) - finally: - sys.stdout, sys.stderr = _stdout, _stderr + """ Redirect standard output and error to String IO and return """ + try: + _stdout, _stderr = sys.stdout, sys.stderr + sys.stdout = sStdout = StringIO() + sys.stderr = sStderr = StringIO() + yield (sStdout, sStderr) + finally: + sys.stdout, sys.stderr = _stdout, _stderr if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/Chem/Crippen.py b/rdkit/Chem/Crippen.py index 926f089c5..31ba8c118 100755 --- a/rdkit/Chem/Crippen.py +++ b/rdkit/Chem/Crippen.py @@ -16,7 +16,7 @@ """ -from __future__ import print_function + import os from rdkit import RDConfig from rdkit import Chem diff --git a/rdkit/Chem/Descriptors3D.py b/rdkit/Chem/Descriptors3D.py index 4c113fe01..d1f7bfb41 100644 --- a/rdkit/Chem/Descriptors3D.py +++ b/rdkit/Chem/Descriptors3D.py @@ -10,7 +10,7 @@ """ Descriptors derived from a molecule's 3D structure """ -from __future__ import print_function + from rdkit.Chem import rdMolDescriptors diff --git a/rdkit/Chem/Draw/IPythonConsole.py b/rdkit/Chem/Draw/IPythonConsole.py index c5ce32db0..448ad2cb8 100644 --- a/rdkit/Chem/Draw/IPythonConsole.py +++ b/rdkit/Chem/Draw/IPythonConsole.py @@ -7,34 +7,34 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # +from IPython.display import SVG +import numpy +import warnings +import uuid +import json +import os +import copy +from io import BytesIO, StringIO +from rdkit.Chem.Draw import rdMolDraw2D +from rdkit.Chem import Draw +from rdkit.Chem import rdchem, rdChemReactions +from rdkit import Chem import sys import IPython if IPython.release.version < '0.11': - raise ImportError('this module requires at least v0.11 of IPython') + raise ImportError('this module requires at least v0.11 of IPython') try: - import py3Dmol - _canUse3D = True + import py3Dmol + _canUse3D = True except ImportError: - _canUse3D = False + _canUse3D = False -from rdkit import Chem -from rdkit.Chem import rdchem, rdChemReactions -from rdkit.Chem import Draw -from rdkit.Chem.Draw import rdMolDraw2D -from rdkit.six import BytesIO, StringIO -import copy -import os -import json -import uuid -import warnings -import numpy try: - import Image + import Image except ImportError: - from PIL import Image + from PIL import Image -from IPython.display import SVG molSize = (450, 150) highlightSubstructs = True @@ -51,108 +51,108 @@ Chem.WrapLogs() def addMolToView(mol, view, confId=-1, drawAs=None): - if mol.GetNumAtoms() >= 999 or drawAs == 'cartoon': - # py3DMol is happier with TER and MASTER records present - pdb = Chem.MolToPDBBlock(mol, flavor=0x20 | 0x10) - view.addModel(pdb, 'pdb') - else: - # py3Dmol does not currently support v3k mol files, so - # we can only provide those with "smaller" molecules - mb = Chem.MolToMolBlock(mol, confId=confId) - view.addModel(mb, 'sdf') - if drawAs is None: - drawAs = drawing_type_3d - view.setStyle({drawAs: {}}) + if mol.GetNumAtoms() >= 999 or drawAs == 'cartoon': + # py3DMol is happier with TER and MASTER records present + pdb = Chem.MolToPDBBlock(mol, flavor=0x20 | 0x10) + view.addModel(pdb, 'pdb') + else: + # py3Dmol does not currently support v3k mol files, so + # we can only provide those with "smaller" molecules + mb = Chem.MolToMolBlock(mol, confId=confId) + view.addModel(mb, 'sdf') + if drawAs is None: + drawAs = drawing_type_3d + view.setStyle({drawAs: {}}) def drawMol3D(m, view=None, confId=-1, drawAs=None, bgColor=None, size=None): - if bgColor is None: - bgColor = bgcolor_3d - if size is None: - size = molSize_3d - if view is None: - view = py3Dmol.view(width=size[0], height=size[1]) - view.removeAllModels() - try: - iter(m) - except TypeError: - addMolToView(m, view, confId, drawAs) - else: - ms = m - for m in ms: - addMolToView(m, view, confId, drawAs) + if bgColor is None: + bgColor = bgcolor_3d + if size is None: + size = molSize_3d + if view is None: + view = py3Dmol.view(width=size[0], height=size[1]) + view.removeAllModels() + try: + iter(m) + except TypeError: + addMolToView(m, view, confId, drawAs) + else: + ms = m + for m in ms: + addMolToView(m, view, confId, drawAs) - view.setBackgroundColor(bgColor) - view.zoomTo() - return view.show() + view.setBackgroundColor(bgColor) + view.zoomTo() + return view.show() def _toJSON(mol): - """For IPython notebook, renders 3D webGL objects.""" - if not ipython_3d or not mol.GetNumConformers(): - return None - conf = mol.GetConformer() - if not conf.Is3D(): - return None - return drawMol3D(mol).data + """For IPython notebook, renders 3D webGL objects.""" + if not ipython_3d or not mol.GetNumConformers(): + return None + conf = mol.GetConformer() + if not conf.Is3D(): + return None + return drawMol3D(mol).data def _toPNG(mol): - if hasattr(mol, '__sssAtoms'): - highlightAtoms = mol.__sssAtoms - else: - highlightAtoms = [] - kekulize = kekulizeStructures - return Draw._moltoimg(mol, molSize, highlightAtoms, "", returnPNG=True, kekulize=kekulize) + if hasattr(mol, '__sssAtoms'): + highlightAtoms = mol.__sssAtoms + else: + highlightAtoms = [] + kekulize = kekulizeStructures + return Draw._moltoimg(mol, molSize, highlightAtoms, "", returnPNG=True, kekulize=kekulize) def _toSVG(mol): - if not ipython_useSVG: - return None - if hasattr(mol, '__sssAtoms'): - highlightAtoms = mol.__sssAtoms - else: - highlightAtoms = [] - kekulize = kekulizeStructures - return Draw._moltoSVG(mol, molSize, highlightAtoms, "", kekulize) + if not ipython_useSVG: + return None + if hasattr(mol, '__sssAtoms'): + highlightAtoms = mol.__sssAtoms + else: + highlightAtoms = [] + kekulize = kekulizeStructures + return Draw._moltoSVG(mol, molSize, highlightAtoms, "", kekulize) def _toReactionPNG(rxn): - rc = copy.deepcopy(rxn) - img = Draw.ReactionToImage(rc, subImgSize=(int(molSize[0] / 3), molSize[1]), - highlightByReactant=highlightByReactant) - bio = BytesIO() - img.save(bio, format='PNG') - return bio.getvalue() + rc = copy.deepcopy(rxn) + img = Draw.ReactionToImage(rc, subImgSize=(int(molSize[0] / 3), molSize[1]), + highlightByReactant=highlightByReactant) + bio = BytesIO() + img.save(bio, format='PNG') + return bio.getvalue() def _toReactionSVG(rxn): - if not ipython_useSVG: - return None - rc = copy.deepcopy(rxn) - return Draw.ReactionToImage(rc, subImgSize=(int(molSize[0] / 3), molSize[1]), useSVG=True, - highlightByReactant=highlightByReactant) + if not ipython_useSVG: + return None + rc = copy.deepcopy(rxn) + return Draw.ReactionToImage(rc, subImgSize=(int(molSize[0] / 3), molSize[1]), useSVG=True, + highlightByReactant=highlightByReactant) def _GetSubstructMatch(mol, query, **kwargs): - res = mol.__GetSubstructMatch(query, **kwargs) - if highlightSubstructs: - mol.__sssAtoms = list(res) - else: - mol.__sssAtoms = [] - return res + res = mol.__GetSubstructMatch(query, **kwargs) + if highlightSubstructs: + mol.__sssAtoms = list(res) + else: + mol.__sssAtoms = [] + return res _GetSubstructMatch.__doc__ = rdchem.Mol.GetSubstructMatch.__doc__ def _GetSubstructMatches(mol, query, **kwargs): - res = mol.__GetSubstructMatches(query, **kwargs) - mol.__sssAtoms = [] - if highlightSubstructs: - for entry in res: - mol.__sssAtoms.extend(list(entry)) - return res + res = mol.__GetSubstructMatches(query, **kwargs) + mol.__sssAtoms = [] + if highlightSubstructs: + for entry in res: + mol.__sssAtoms.extend(list(entry)) + return res _GetSubstructMatches.__doc__ = rdchem.Mol.GetSubstructMatches.__doc__ @@ -160,171 +160,187 @@ _GetSubstructMatches.__doc__ = rdchem.Mol.GetSubstructMatches.__doc__ # code for displaying PIL images directly, def display_pil_image(img): - """displayhook function for PIL Images, rendered as PNG""" - bio = BytesIO() - img.save(bio, format='PNG') - return bio.getvalue() + """displayhook function for PIL Images, rendered as PNG""" + bio = BytesIO() + img.save(bio, format='PNG') + return bio.getvalue() _MolsToGridImageSaved = None def ShowMols(mols, maxMols=50, **kwargs): - global _MolsToGridImageSaved - if 'useSVG' not in kwargs: - kwargs['useSVG'] = ipython_useSVG - if _MolsToGridImageSaved is not None: - fn = _MolsToGridImageSaved - else: - fn = Draw.MolsToGridImage - if len(mols) > maxMols: - warnings.warn( - "Truncating the list of molecules to be displayed to %d. Change the maxMols value to display more." - % (maxMols)) - mols = mols[:maxMols] - for prop in ('legends', 'highlightAtoms', 'highlightBonds'): - if prop in kwargs: - kwargs[prop] = kwargs[prop][:maxMols] + global _MolsToGridImageSaved + if 'useSVG' not in kwargs: + kwargs['useSVG'] = ipython_useSVG + if _MolsToGridImageSaved is not None: + fn = _MolsToGridImageSaved + else: + fn = Draw.MolsToGridImage + if len(mols) > maxMols: + warnings.warn( + "Truncating the list of molecules to be displayed to %d. Change the maxMols value to display more." + % (maxMols)) + mols = mols[:maxMols] + for prop in ('legends', 'highlightAtoms', 'highlightBonds'): + if prop in kwargs: + kwargs[prop] = kwargs[prop][:maxMols] - res = fn(mols, **kwargs) - if kwargs['useSVG']: - return SVG(res) - else: - return res + res = fn(mols, **kwargs) + if kwargs['useSVG']: + return SVG(res) + else: + return res ShowMols.__doc__ = Draw.MolsToGridImage.__doc__ def _DrawBit(fn, *args, **kwargs): - if 'useSVG' not in kwargs: - kwargs['useSVG'] = ipython_useSVG - res = fn(*args, **kwargs) - if kwargs['useSVG']: - return SVG(res) - else: - sio = BytesIO(res) - return Image.open(sio) + if 'useSVG' not in kwargs: + kwargs['useSVG'] = ipython_useSVG + res = fn(*args, **kwargs) + if kwargs['useSVG']: + return SVG(res) + else: + sio = BytesIO(res) + return Image.open(sio) def _DrawBits(fn, *args, **kwargs): - if 'useSVG' not in kwargs: - kwargs['useSVG'] = ipython_useSVG - res = fn(*args, **kwargs) - if kwargs['useSVG']: - return SVG(res) - else: - sio = BytesIO(res) - return Image.open(sio) + if 'useSVG' not in kwargs: + kwargs['useSVG'] = ipython_useSVG + res = fn(*args, **kwargs) + if kwargs['useSVG']: + return SVG(res) + else: + sio = BytesIO(res) + return Image.open(sio) _DrawMorganBitSaved = None + + def DrawMorganBit(mol, bitId, bitInfo, **kwargs): - global _DrawMorganBitSaved - if _DrawMorganBitSaved is not None: - fn = _DrawMorganBitSaved - else: - fn = Draw.DrawMorganBit - return _DrawBit(fn, mol, bitId, bitInfo, **kwargs) + global _DrawMorganBitSaved + if _DrawMorganBitSaved is not None: + fn = _DrawMorganBitSaved + else: + fn = Draw.DrawMorganBit + return _DrawBit(fn, mol, bitId, bitInfo, **kwargs) + + DrawMorganBit.__doc__ = Draw.DrawMorganBit.__doc__ _DrawMorganBitsSaved = None + + def DrawMorganBits(*args, **kwargs): - global _DrawMorganBitsSaved - if _DrawMorganBitsSaved is not None: - fn = _DrawMorganBitsSaved - else: - fn = Draw.DrawMorganBits - return _DrawBit(fn, *args, **kwargs) + global _DrawMorganBitsSaved + if _DrawMorganBitsSaved is not None: + fn = _DrawMorganBitsSaved + else: + fn = Draw.DrawMorganBits + return _DrawBit(fn, *args, **kwargs) + + DrawMorganBits.__doc__ = Draw.DrawMorganBits.__doc__ _DrawRDKitBitSaved = None + + def DrawRDKitBit(mol, bitId, bitInfo, **kwargs): - global _DrawRDKitBitSaved - if _DrawRDKitBitSaved is not None: - fn = _DrawRDKitBitSaved - else: - fn = Draw.DrawRDKitBit - return _DrawBit(fn, mol, bitId, bitInfo, **kwargs) + global _DrawRDKitBitSaved + if _DrawRDKitBitSaved is not None: + fn = _DrawRDKitBitSaved + else: + fn = Draw.DrawRDKitBit + return _DrawBit(fn, mol, bitId, bitInfo, **kwargs) + + DrawRDKitBit.__doc__ = Draw.DrawRDKitBit.__doc__ _DrawRDKitBitsSaved = None + + def DrawRDKitBits(*args, **kwargs): - global _DrawRDKitBitsSaved - if _DrawRDKitBitsSaved is not None: - fn = _DrawRDKitBitsSaved - else: - fn = Draw.DrawRDKitBits - return _DrawBit(fn, *args, **kwargs) + global _DrawRDKitBitsSaved + if _DrawRDKitBitsSaved is not None: + fn = _DrawRDKitBitsSaved + else: + fn = Draw.DrawRDKitBits + return _DrawBit(fn, *args, **kwargs) + + DrawRDKitBits.__doc__ = Draw.DrawRDKitBits.__doc__ _rendererInstalled = False def InstallIPythonRenderer(): - global _MolsToGridImageSaved, _DrawRDKitBitSaved, _DrawRDKitBitsSaved, _DrawMorganBitSaved, _DrawMorganBitsSaved - global _rendererInstalled - if _rendererInstalled: - return - rdchem.Mol._repr_png_ = _toPNG - rdchem.Mol._repr_svg_ = _toSVG - if _canUse3D: - rdchem.Mol._repr_html_ = _toJSON - rdChemReactions.ChemicalReaction._repr_png_ = _toReactionPNG - rdChemReactions.ChemicalReaction._repr_svg_ = _toReactionSVG - if not hasattr(rdchem.Mol, '__GetSubstructMatch'): - rdchem.Mol.__GetSubstructMatch = rdchem.Mol.GetSubstructMatch - rdchem.Mol.GetSubstructMatch = _GetSubstructMatch - if not hasattr(rdchem.Mol, '__GetSubstructMatches'): - rdchem.Mol.__GetSubstructMatches = rdchem.Mol.GetSubstructMatches - rdchem.Mol.GetSubstructMatches = _GetSubstructMatches - Image.Image._repr_png_ = display_pil_image - _MolsToGridImageSaved = Draw.MolsToGridImage - Draw.MolsToGridImage = ShowMols - _DrawRDKitBitSaved = Draw.DrawRDKitBit - Draw.DrawRDKitBit = DrawRDKitBit - _DrawRDKitBitsSaved = Draw.DrawRDKitBits - Draw.DrawRDKitBits = DrawRDKitBits - _DrawMorganBitSaved = Draw.DrawMorganBit - Draw.DrawMorganBit = DrawMorganBit - _DrawMorganBitsSaved = Draw.DrawMorganBits - Draw.DrawMorganBits = DrawMorganBits - rdchem.Mol.__DebugMol = rdchem.Mol.Debug - rdchem.Mol.Debug = lambda self, useStdout=False: self.__DebugMol(useStdout=useStdout) - _rendererInstalled = True + global _MolsToGridImageSaved, _DrawRDKitBitSaved, _DrawRDKitBitsSaved, _DrawMorganBitSaved, _DrawMorganBitsSaved + global _rendererInstalled + if _rendererInstalled: + return + rdchem.Mol._repr_png_ = _toPNG + rdchem.Mol._repr_svg_ = _toSVG + if _canUse3D: + rdchem.Mol._repr_html_ = _toJSON + rdChemReactions.ChemicalReaction._repr_png_ = _toReactionPNG + rdChemReactions.ChemicalReaction._repr_svg_ = _toReactionSVG + if not hasattr(rdchem.Mol, '__GetSubstructMatch'): + rdchem.Mol.__GetSubstructMatch = rdchem.Mol.GetSubstructMatch + rdchem.Mol.GetSubstructMatch = _GetSubstructMatch + if not hasattr(rdchem.Mol, '__GetSubstructMatches'): + rdchem.Mol.__GetSubstructMatches = rdchem.Mol.GetSubstructMatches + rdchem.Mol.GetSubstructMatches = _GetSubstructMatches + Image.Image._repr_png_ = display_pil_image + _MolsToGridImageSaved = Draw.MolsToGridImage + Draw.MolsToGridImage = ShowMols + _DrawRDKitBitSaved = Draw.DrawRDKitBit + Draw.DrawRDKitBit = DrawRDKitBit + _DrawRDKitBitsSaved = Draw.DrawRDKitBits + Draw.DrawRDKitBits = DrawRDKitBits + _DrawMorganBitSaved = Draw.DrawMorganBit + Draw.DrawMorganBit = DrawMorganBit + _DrawMorganBitsSaved = Draw.DrawMorganBits + Draw.DrawMorganBits = DrawMorganBits + rdchem.Mol.__DebugMol = rdchem.Mol.Debug + rdchem.Mol.Debug = lambda self, useStdout=False: self.__DebugMol(useStdout=useStdout) + _rendererInstalled = True InstallIPythonRenderer() def UninstallIPythonRenderer(): - global _MolsToGridImageSaved, _DrawRDKitBitSaved, _DrawMorganBitSaved, _DrawMorganBitsSaved - global _rendererInstalled - if not _rendererInstalled: - return - del rdchem.Mol._repr_svg_ - del rdchem.Mol._repr_png_ - if _canUse3D: - del rdchem.Mol._repr_html_ - del rdChemReactions.ChemicalReaction._repr_png_ - if hasattr(rdchem.Mol, '__GetSubstructMatch'): - rdchem.Mol.GetSubstructMatch = rdchem.Mol.__GetSubstructMatch - del rdchem.Mol.__GetSubstructMatch - if hasattr(rdchem.Mol, '__GetSubstructMatches'): - rdchem.Mol.GetSubstructMatches = rdchem.Mol.__GetSubstructMatches - del rdchem.Mol.__GetSubstructMatches - del Image.Image._repr_png_ - if _MolsToGridImageSaved is not None: - Draw.MolsToGridImage = _MolsToGridImageSaved - if _DrawRDKitBitSaved is not None: - Draw.DrawRDKitBit = _DrawRDKitBitSaved - if _DrawRDKitBitsSaved is not None: - Draw.DrawRDKitBits = _DrawRDKitBitsSaved - if _DrawMorganBitSaved is not None: - Draw.DrawMorganBit = _DrawMorganBitSaved - if _DrawMorganBitsSaved is not None: - Draw.DrawMorganBits = _DrawMorganBitsSaved - if hasattr(rdchem.Mol, '__DebugMol'): - rdchem.Mol.Debug = rdchem.Mol.__DebugMol - del rdchem.Mol.__DebugMol - _rendererInstalled = False + global _MolsToGridImageSaved, _DrawRDKitBitSaved, _DrawMorganBitSaved, _DrawMorganBitsSaved + global _rendererInstalled + if not _rendererInstalled: + return + del rdchem.Mol._repr_svg_ + del rdchem.Mol._repr_png_ + if _canUse3D: + del rdchem.Mol._repr_html_ + del rdChemReactions.ChemicalReaction._repr_png_ + if hasattr(rdchem.Mol, '__GetSubstructMatch'): + rdchem.Mol.GetSubstructMatch = rdchem.Mol.__GetSubstructMatch + del rdchem.Mol.__GetSubstructMatch + if hasattr(rdchem.Mol, '__GetSubstructMatches'): + rdchem.Mol.GetSubstructMatches = rdchem.Mol.__GetSubstructMatches + del rdchem.Mol.__GetSubstructMatches + del Image.Image._repr_png_ + if _MolsToGridImageSaved is not None: + Draw.MolsToGridImage = _MolsToGridImageSaved + if _DrawRDKitBitSaved is not None: + Draw.DrawRDKitBit = _DrawRDKitBitSaved + if _DrawRDKitBitsSaved is not None: + Draw.DrawRDKitBits = _DrawRDKitBitsSaved + if _DrawMorganBitSaved is not None: + Draw.DrawMorganBit = _DrawMorganBitSaved + if _DrawMorganBitsSaved is not None: + Draw.DrawMorganBits = _DrawMorganBitsSaved + if hasattr(rdchem.Mol, '__DebugMol'): + rdchem.Mol.Debug = rdchem.Mol.__DebugMol + del rdchem.Mol.__DebugMol + _rendererInstalled = False diff --git a/rdkit/Chem/Draw/MolDrawing.py b/rdkit/Chem/Draw/MolDrawing.py index 57698109a..fb301bc33 100644 --- a/rdkit/Chem/Draw/MolDrawing.py +++ b/rdkit/Chem/Draw/MolDrawing.py @@ -12,8 +12,9 @@ from rdkit import Chem import numpy import math import copy -from rdkit.six import cmp import functools +def cmp(t1, t2): + return (t1 < t2) * -1 or (t1 > t2) * 1 periodicTable = Chem.GetPeriodicTable() diff --git a/rdkit/Chem/Draw/SimilarityMaps.py b/rdkit/Chem/Draw/SimilarityMaps.py index 306ed5ac1..f9e0538e8 100644 --- a/rdkit/Chem/Draw/SimilarityMaps.py +++ b/rdkit/Chem/Draw/SimilarityMaps.py @@ -41,178 +41,180 @@ from rdkit import Chem from rdkit import DataStructs from rdkit.Chem import Draw from rdkit.Chem import rdMolDescriptors as rdMD -from rdkit.six import iteritems def GetAtomicWeightsForFingerprint(refMol, probeMol, fpFunction, metric=DataStructs.DiceSimilarity): - """ - Calculates the atomic weights for the probe molecule - based on a fingerprint function and a metric. + """ + Calculates the atomic weights for the probe molecule + based on a fingerprint function and a metric. - Parameters: - refMol -- the reference molecule - probeMol -- the probe molecule - fpFunction -- the fingerprint function - metric -- the similarity metric + Parameters: + refMol -- the reference molecule + probeMol -- the probe molecule + fpFunction -- the fingerprint function + metric -- the similarity metric - Note: - If fpFunction needs additional parameters, use a lambda construct - """ - if hasattr(probeMol, '_fpInfo'): - delattr(probeMol, '_fpInfo') - if hasattr(refMol, '_fpInfo'): - delattr(refMol, '_fpInfo') - refFP = fpFunction(refMol, -1) - probeFP = fpFunction(probeMol, -1) - baseSimilarity = metric(refFP, probeFP) - # loop over atoms - weights = [] - for atomId in range(probeMol.GetNumAtoms()): - newFP = fpFunction(probeMol, atomId) - newSimilarity = metric(refFP, newFP) - weights.append(baseSimilarity - newSimilarity) - if hasattr(probeMol, '_fpInfo'): - delattr(probeMol, '_fpInfo') - if hasattr(refMol, '_fpInfo'): - delattr(refMol, '_fpInfo') - return weights + Note: + If fpFunction needs additional parameters, use a lambda construct + """ + if hasattr(probeMol, '_fpInfo'): + delattr(probeMol, '_fpInfo') + if hasattr(refMol, '_fpInfo'): + delattr(refMol, '_fpInfo') + refFP = fpFunction(refMol, -1) + probeFP = fpFunction(probeMol, -1) + baseSimilarity = metric(refFP, probeFP) + # loop over atoms + weights = [] + for atomId in range(probeMol.GetNumAtoms()): + newFP = fpFunction(probeMol, atomId) + newSimilarity = metric(refFP, newFP) + weights.append(baseSimilarity - newSimilarity) + if hasattr(probeMol, '_fpInfo'): + delattr(probeMol, '_fpInfo') + if hasattr(refMol, '_fpInfo'): + delattr(refMol, '_fpInfo') + return weights def GetAtomicWeightsForModel(probeMol, fpFunction, predictionFunction): - """ - Calculates the atomic weights for the probe molecule based on - a fingerprint function and the prediction function of a ML model. + """ + Calculates the atomic weights for the probe molecule based on + a fingerprint function and the prediction function of a ML model. - Parameters: - probeMol -- the probe molecule - fpFunction -- the fingerprint function - predictionFunction -- the prediction function of the ML model - """ - if hasattr(probeMol, '_fpInfo'): - delattr(probeMol, '_fpInfo') - probeFP = fpFunction(probeMol, -1) - baseProba = predictionFunction(probeFP) - # loop over atoms - weights = [] - for atomId in range(probeMol.GetNumAtoms()): - newFP = fpFunction(probeMol, atomId) - newProba = predictionFunction(newFP) - weights.append(baseProba - newProba) - if hasattr(probeMol, '_fpInfo'): - delattr(probeMol, '_fpInfo') - return weights + Parameters: + probeMol -- the probe molecule + fpFunction -- the fingerprint function + predictionFunction -- the prediction function of the ML model + """ + if hasattr(probeMol, '_fpInfo'): + delattr(probeMol, '_fpInfo') + probeFP = fpFunction(probeMol, -1) + baseProba = predictionFunction(probeFP) + # loop over atoms + weights = [] + for atomId in range(probeMol.GetNumAtoms()): + newFP = fpFunction(probeMol, atomId) + newProba = predictionFunction(newFP) + weights.append(baseProba - newProba) + if hasattr(probeMol, '_fpInfo'): + delattr(probeMol, '_fpInfo') + return weights def GetStandardizedWeights(weights): - """ - Normalizes the weights, - such that the absolute maximum weight equals 1.0. + """ + Normalizes the weights, + such that the absolute maximum weight equals 1.0. - Parameters: - weights -- the list with the atomic weights - """ - tmp = [math.fabs(w) for w in weights] - currentMax = max(tmp) - if currentMax > 0: - return [w / currentMax for w in weights], currentMax - else: - return weights, currentMax + Parameters: + weights -- the list with the atomic weights + """ + tmp = [math.fabs(w) for w in weights] + currentMax = max(tmp) + if currentMax > 0: + return [w / currentMax for w in weights], currentMax + else: + return weights, currentMax def GetSimilarityMapFromWeights(mol, weights, colorMap=None, scale=-1, size=(250, 250), sigma=None, coordScale=1.5, step=0.01, colors='k', contourLines=10, alpha=0.5, **kwargs): - """ - Generates the similarity map for a molecule given the atomic weights. + """ + Generates the similarity map for a molecule given the atomic weights. - Parameters: - mol -- the molecule of interest - colorMap -- the matplotlib color map scheme, default is custom PiWG color map - scale -- the scaling: scale < 0 -> the absolute maximum weight is used as maximum scale - scale = double -> this is the maximum scale - size -- the size of the figure - sigma -- the sigma for the Gaussians - coordScale -- scaling factor for the coordinates - step -- the step for calcAtomGaussian - colors -- color of the contour lines - contourLines -- if integer number N: N contour lines are drawn - if list(numbers): contour lines at these numbers are drawn - alpha -- the alpha blending value for the contour lines - kwargs -- additional arguments for drawing - """ - if mol.GetNumAtoms() < 2: - raise ValueError("too few atoms") - fig = Draw.MolToMPL(mol, coordScale=coordScale, size=size, **kwargs) - if sigma is None: - if mol.GetNumBonds() > 0: - bond = mol.GetBondWithIdx(0) - idx1 = bond.GetBeginAtomIdx() - idx2 = bond.GetEndAtomIdx() - sigma = 0.3 * math.sqrt( - sum([(mol._atomPs[idx1][i] - mol._atomPs[idx2][i])**2 for i in range(2)])) + Parameters: + mol -- the molecule of interest + colorMap -- the matplotlib color map scheme, default is custom PiWG color map + scale -- the scaling: scale < 0 -> the absolute maximum weight is used as maximum scale + scale = double -> this is the maximum scale + size -- the size of the figure + sigma -- the sigma for the Gaussians + coordScale -- scaling factor for the coordinates + step -- the step for calcAtomGaussian + colors -- color of the contour lines + contourLines -- if integer number N: N contour lines are drawn + if list(numbers): contour lines at these numbers are drawn + alpha -- the alpha blending value for the contour lines + kwargs -- additional arguments for drawing + """ + if mol.GetNumAtoms() < 2: + raise ValueError("too few atoms") + fig = Draw.MolToMPL(mol, coordScale=coordScale, size=size, **kwargs) + if sigma is None: + if mol.GetNumBonds() > 0: + bond = mol.GetBondWithIdx(0) + idx1 = bond.GetBeginAtomIdx() + idx2 = bond.GetEndAtomIdx() + sigma = 0.3 * math.sqrt( + sum([(mol._atomPs[idx1][i] - mol._atomPs[idx2][i])**2 for i in range(2)])) + else: + sigma = 0.3 * \ + math.sqrt(sum([(mol._atomPs[0][i] - mol._atomPs[1][i])**2 for i in range(2)])) + sigma = round(sigma, 2) + x, y, z = Draw.calcAtomGaussians(mol, sigma, weights=weights, step=step) + # scaling + if scale <= 0.0: + maxScale = max(math.fabs(numpy.min(z)), math.fabs(numpy.max(z))) else: - sigma = 0.3 * math.sqrt(sum([(mol._atomPs[0][i] - mol._atomPs[1][i])**2 for i in range(2)])) - sigma = round(sigma, 2) - x, y, z = Draw.calcAtomGaussians(mol, sigma, weights=weights, step=step) - # scaling - if scale <= 0.0: - maxScale = max(math.fabs(numpy.min(z)), math.fabs(numpy.max(z))) - else: - maxScale = scale - # coloring - if colorMap is None: - PiYG_cmap = cm.get_cmap('PiYG',2) - colorMap = LinearSegmentedColormap.from_list('PiWG', [PiYG_cmap(0), (1.0, 1.0, 1.0), PiYG_cmap(1)], N=255) + maxScale = scale + # coloring + if colorMap is None: + PiYG_cmap = cm.get_cmap('PiYG', 2) + colorMap = LinearSegmentedColormap.from_list( + 'PiWG', [PiYG_cmap(0), (1.0, 1.0, 1.0), PiYG_cmap(1)], N=255) - fig.axes[0].imshow(z, cmap=colorMap, interpolation='bilinear', origin='lower', - extent=(0, 1, 0, 1), vmin=-maxScale, vmax=maxScale) - # contour lines - # only draw them when at least one weight is not zero - if len([w for w in weights if w != 0.0]): - contourset = fig.axes[0].contour(x, y, z, contourLines, colors=colors, alpha=alpha, **kwargs) - for j, c in enumerate(contourset.collections): - if contourset.levels[j] == 0.0: - c.set_linewidth(0.0) - elif contourset.levels[j] < 0: - c.set_dashes([(0, (3.0, 3.0))]) - fig.axes[0].set_axis_off() - return fig + fig.axes[0].imshow(z, cmap=colorMap, interpolation='bilinear', origin='lower', + extent=(0, 1, 0, 1), vmin=-maxScale, vmax=maxScale) + # contour lines + # only draw them when at least one weight is not zero + if len([w for w in weights if w != 0.0]): + contourset = fig.axes[0].contour( + x, y, z, contourLines, colors=colors, alpha=alpha, **kwargs) + for j, c in enumerate(contourset.collections): + if contourset.levels[j] == 0.0: + c.set_linewidth(0.0) + elif contourset.levels[j] < 0: + c.set_dashes([(0, (3.0, 3.0))]) + fig.axes[0].set_axis_off() + return fig def GetSimilarityMapForFingerprint(refMol, probeMol, fpFunction, metric=DataStructs.DiceSimilarity, **kwargs): - """ - Generates the similarity map for a given reference and probe molecule, - fingerprint function and similarity metric. + """ + Generates the similarity map for a given reference and probe molecule, + fingerprint function and similarity metric. - Parameters: - refMol -- the reference molecule - probeMol -- the probe molecule - fpFunction -- the fingerprint function - metric -- the similarity metric. - kwargs -- additional arguments for drawing - """ - weights = GetAtomicWeightsForFingerprint(refMol, probeMol, fpFunction, metric) - weights, maxWeight = GetStandardizedWeights(weights) - fig = GetSimilarityMapFromWeights(probeMol, weights, **kwargs) - return fig, maxWeight + Parameters: + refMol -- the reference molecule + probeMol -- the probe molecule + fpFunction -- the fingerprint function + metric -- the similarity metric. + kwargs -- additional arguments for drawing + """ + weights = GetAtomicWeightsForFingerprint(refMol, probeMol, fpFunction, metric) + weights, maxWeight = GetStandardizedWeights(weights) + fig = GetSimilarityMapFromWeights(probeMol, weights, **kwargs) + return fig, maxWeight def GetSimilarityMapForModel(probeMol, fpFunction, predictionFunction, **kwargs): - """ - Generates the similarity map for a given ML model and probe molecule, - and fingerprint function. + """ + Generates the similarity map for a given ML model and probe molecule, + and fingerprint function. - Parameters: - probeMol -- the probe molecule - fpFunction -- the fingerprint function - predictionFunction -- the prediction function of the ML model - kwargs -- additional arguments for drawing - """ - weights = GetAtomicWeightsForModel(probeMol, fpFunction, predictionFunction) - weights, maxWeight = GetStandardizedWeights(weights) - fig = GetSimilarityMapFromWeights(probeMol, weights, **kwargs) - return fig, maxWeight + Parameters: + probeMol -- the probe molecule + fpFunction -- the fingerprint function + predictionFunction -- the prediction function of the ML model + kwargs -- additional arguments for drawing + """ + weights = GetAtomicWeightsForModel(probeMol, fpFunction, predictionFunction) + weights, maxWeight = GetStandardizedWeights(weights) + fig = GetSimilarityMapFromWeights(probeMol, weights, **kwargs) + return fig, maxWeight apDict = {} @@ -227,25 +229,25 @@ apDict[ # usage: lambda m,i: GetAPFingerprint(m, i, fpType, nBits, minLength, maxLength, nBitsPerEntry) def GetAPFingerprint(mol, atomId=-1, fpType='normal', nBits=2048, minLength=1, maxLength=30, nBitsPerEntry=4, **kwargs): - """ - Calculates the atom pairs fingerprint with the torsions of atomId removed. + """ + Calculates the atom pairs fingerprint with the torsions of atomId removed. - Parameters: - mol -- the molecule of interest - atomId -- the atom to remove the pairs for (if -1, no pair is removed) - fpType -- the type of AP fingerprint ('normal', 'hashed', 'bv') - nBits -- the size of the bit vector (only for fpType='bv') - minLength -- the minimum path length for an atom pair - maxLength -- the maxmimum path length for an atom pair - nBitsPerEntry -- the number of bits available for each pair - """ - if fpType not in ['normal', 'hashed', 'bv']: - raise ValueError("Unknown Atom pairs fingerprint type") - if atomId < 0: - return apDict[fpType](mol, nBits, minLength, maxLength, nBitsPerEntry, 0, **kwargs) - if atomId >= mol.GetNumAtoms(): - raise ValueError("atom index greater than number of atoms") - return apDict[fpType](mol, nBits, minLength, maxLength, nBitsPerEntry, [atomId], **kwargs) + Parameters: + mol -- the molecule of interest + atomId -- the atom to remove the pairs for (if -1, no pair is removed) + fpType -- the type of AP fingerprint ('normal', 'hashed', 'bv') + nBits -- the size of the bit vector (only for fpType='bv') + minLength -- the minimum path length for an atom pair + maxLength -- the maxmimum path length for an atom pair + nBitsPerEntry -- the number of bits available for each pair + """ + if fpType not in ['normal', 'hashed', 'bv']: + raise ValueError("Unknown Atom pairs fingerprint type") + if atomId < 0: + return apDict[fpType](mol, nBits, minLength, maxLength, nBitsPerEntry, 0, **kwargs) + if atomId >= mol.GetNumAtoms(): + raise ValueError("atom index greater than number of atoms") + return apDict[fpType](mol, nBits, minLength, maxLength, nBitsPerEntry, [atomId], **kwargs) ttDict = {} @@ -260,129 +262,129 @@ ttDict[ # usage: lambda m,i: GetTTFingerprint(m, i, fpType, nBits, targetSize) def GetTTFingerprint(mol, atomId=-1, fpType='normal', nBits=2048, targetSize=4, nBitsPerEntry=4, **kwargs): - """ - Calculates the topological torsion fingerprint with the pairs of atomId removed. + """ + Calculates the topological torsion fingerprint with the pairs of atomId removed. - Parameters: - mol -- the molecule of interest - atomId -- the atom to remove the torsions for (if -1, no torsion is removed) - fpType -- the type of TT fingerprint ('normal', 'hashed', 'bv') - nBits -- the size of the bit vector (only for fpType='bv') - minLength -- the minimum path length for an atom pair - maxLength -- the maxmimum path length for an atom pair - nBitsPerEntry -- the number of bits available for each torsion + Parameters: + mol -- the molecule of interest + atomId -- the atom to remove the torsions for (if -1, no torsion is removed) + fpType -- the type of TT fingerprint ('normal', 'hashed', 'bv') + nBits -- the size of the bit vector (only for fpType='bv') + minLength -- the minimum path length for an atom pair + maxLength -- the maxmimum path length for an atom pair + nBitsPerEntry -- the number of bits available for each torsion - any additional keyword arguments will be passed to the fingerprinting function. + any additional keyword arguments will be passed to the fingerprinting function. - """ - if fpType not in ['normal', 'hashed', 'bv']: - raise ValueError("Unknown Topological torsion fingerprint type") - if atomId < 0: - return ttDict[fpType](mol, nBits, targetSize, nBitsPerEntry, 0, **kwargs) - if atomId >= mol.GetNumAtoms(): - raise ValueError("atom index greater than number of atoms") - return ttDict[fpType](mol, nBits, targetSize, nBitsPerEntry, [atomId], **kwargs) + """ + if fpType not in ['normal', 'hashed', 'bv']: + raise ValueError("Unknown Topological torsion fingerprint type") + if atomId < 0: + return ttDict[fpType](mol, nBits, targetSize, nBitsPerEntry, 0, **kwargs) + if atomId >= mol.GetNumAtoms(): + raise ValueError("atom index greater than number of atoms") + return ttDict[fpType](mol, nBits, targetSize, nBitsPerEntry, [atomId], **kwargs) # usage: lambda m,i: GetMorganFingerprint(m, i, radius, fpType, nBits, useFeatures) def GetMorganFingerprint(mol, atomId=-1, radius=2, fpType='bv', nBits=2048, useFeatures=False, **kwargs): - """ - Calculates the Morgan fingerprint with the environments of atomId removed. + """ + Calculates the Morgan fingerprint with the environments of atomId removed. - Parameters: - mol -- the molecule of interest - radius -- the maximum radius - fpType -- the type of Morgan fingerprint: 'count' or 'bv' - atomId -- the atom to remove the environments for (if -1, no environments is removed) - nBits -- the size of the bit vector (only for fpType = 'bv') - useFeatures -- if false: ConnectivityMorgan, if true: FeatureMorgan + Parameters: + mol -- the molecule of interest + radius -- the maximum radius + fpType -- the type of Morgan fingerprint: 'count' or 'bv' + atomId -- the atom to remove the environments for (if -1, no environments is removed) + nBits -- the size of the bit vector (only for fpType = 'bv') + useFeatures -- if false: ConnectivityMorgan, if true: FeatureMorgan - any additional keyword arguments will be passed to the fingerprinting function. - """ - if fpType not in ['bv', 'count']: - raise ValueError("Unknown Morgan fingerprint type") - if not hasattr(mol, '_fpInfo'): - info = {} - # get the fingerprint - if fpType == 'bv': - molFp = rdMD.GetMorganFingerprintAsBitVect(mol, radius, nBits=nBits, useFeatures=useFeatures, - bitInfo=info, **kwargs) - else: - molFp = rdMD.GetMorganFingerprint(mol, radius, useFeatures=useFeatures, bitInfo=info, - **kwargs) - # construct the bit map - if fpType == 'bv': - bitmap = [DataStructs.ExplicitBitVect(nBits) for _ in range(mol.GetNumAtoms())] - else: - bitmap = [[] for _ in range(mol.GetNumAtoms())] - for bit, es in iteritems(info): - for at1, rad in es: - if rad == 0: # for radius 0 - if fpType == 'bv': - bitmap[at1][bit] = 1 - else: - bitmap[at1].append(bit) - else: # for radii > 0 - env = Chem.FindAtomEnvironmentOfRadiusN(mol, rad, at1) - amap = {} - Chem.PathToSubmol(mol, env, atomMap=amap) - for at2 in amap.keys(): - if fpType == 'bv': - bitmap[at2][bit] = 1 - else: - bitmap[at2].append(bit) - mol._fpInfo = (molFp, bitmap) + any additional keyword arguments will be passed to the fingerprinting function. + """ + if fpType not in ['bv', 'count']: + raise ValueError("Unknown Morgan fingerprint type") + if not hasattr(mol, '_fpInfo'): + info = {} + # get the fingerprint + if fpType == 'bv': + molFp = rdMD.GetMorganFingerprintAsBitVect(mol, radius, nBits=nBits, useFeatures=useFeatures, + bitInfo=info, **kwargs) + else: + molFp = rdMD.GetMorganFingerprint(mol, radius, useFeatures=useFeatures, bitInfo=info, + **kwargs) + # construct the bit map + if fpType == 'bv': + bitmap = [DataStructs.ExplicitBitVect(nBits) for _ in range(mol.GetNumAtoms())] + else: + bitmap = [[] for _ in range(mol.GetNumAtoms())] + for bit, es in info.items(): + for at1, rad in es: + if rad == 0: # for radius 0 + if fpType == 'bv': + bitmap[at1][bit] = 1 + else: + bitmap[at1].append(bit) + else: # for radii > 0 + env = Chem.FindAtomEnvironmentOfRadiusN(mol, rad, at1) + amap = {} + Chem.PathToSubmol(mol, env, atomMap=amap) + for at2 in amap.keys(): + if fpType == 'bv': + bitmap[at2][bit] = 1 + else: + bitmap[at2].append(bit) + mol._fpInfo = (molFp, bitmap) - if atomId < 0: - return mol._fpInfo[0] - else: # remove the bits of atomId - if atomId >= mol.GetNumAtoms(): - raise ValueError("atom index greater than number of atoms") - if len(mol._fpInfo) != 2: - raise ValueError("_fpInfo not set") - if fpType == 'bv': - molFp = mol._fpInfo[0] ^ mol._fpInfo[1][atomId] # xor - else: # count - molFp = copy.deepcopy(mol._fpInfo[0]) - # delete the bits with atomId - for bit in mol._fpInfo[1][atomId]: - molFp[bit] -= 1 - return molFp + if atomId < 0: + return mol._fpInfo[0] + else: # remove the bits of atomId + if atomId >= mol.GetNumAtoms(): + raise ValueError("atom index greater than number of atoms") + if len(mol._fpInfo) != 2: + raise ValueError("_fpInfo not set") + if fpType == 'bv': + molFp = mol._fpInfo[0] ^ mol._fpInfo[1][atomId] # xor + else: # count + molFp = copy.deepcopy(mol._fpInfo[0]) + # delete the bits with atomId + for bit in mol._fpInfo[1][atomId]: + molFp[bit] -= 1 + return molFp # usage: lambda m,i: GetRDKFingerprint(m, i, fpType, nBits, minPath, maxPath, nBitsPerHash) def GetRDKFingerprint(mol, atomId=-1, fpType='bv', nBits=2048, minPath=1, maxPath=5, nBitsPerHash=2, **kwargs): - """ - Calculates the RDKit fingerprint with the paths of atomId removed. + """ + Calculates the RDKit fingerprint with the paths of atomId removed. - Parameters: - mol -- the molecule of interest - atomId -- the atom to remove the paths for (if -1, no path is removed) - fpType -- the type of RDKit fingerprint: 'bv' - nBits -- the size of the bit vector - minPath -- minimum path length - maxPath -- maximum path length - nBitsPerHash -- number of to set per path - """ - if fpType not in ['bv', '']: - raise ValueError("Unknown RDKit fingerprint type") - fpType = 'bv' - if not hasattr(mol, '_fpInfo'): - info = [] # list with bits for each atom - # get the fingerprint - molFp = Chem.RDKFingerprint(mol, fpSize=nBits, minPath=minPath, maxPath=maxPath, - nBitsPerHash=nBitsPerHash, atomBits=info, **kwargs) - mol._fpInfo = (molFp, info) + Parameters: + mol -- the molecule of interest + atomId -- the atom to remove the paths for (if -1, no path is removed) + fpType -- the type of RDKit fingerprint: 'bv' + nBits -- the size of the bit vector + minPath -- minimum path length + maxPath -- maximum path length + nBitsPerHash -- number of to set per path + """ + if fpType not in ['bv', '']: + raise ValueError("Unknown RDKit fingerprint type") + fpType = 'bv' + if not hasattr(mol, '_fpInfo'): + info = [] # list with bits for each atom + # get the fingerprint + molFp = Chem.RDKFingerprint(mol, fpSize=nBits, minPath=minPath, maxPath=maxPath, + nBitsPerHash=nBitsPerHash, atomBits=info, **kwargs) + mol._fpInfo = (molFp, info) - if atomId < 0: - return mol._fpInfo[0] - else: # remove the bits of atomId - if atomId >= mol.GetNumAtoms(): - raise ValueError("atom index greater than number of atoms") - if len(mol._fpInfo) != 2: - raise ValueError("_fpInfo not set") - molFp = copy.deepcopy(mol._fpInfo[0]) - molFp.UnSetBitsFromList(mol._fpInfo[1][atomId]) - return molFp + if atomId < 0: + return mol._fpInfo[0] + else: # remove the bits of atomId + if atomId >= mol.GetNumAtoms(): + raise ValueError("atom index greater than number of atoms") + if len(mol._fpInfo) != 2: + raise ValueError("_fpInfo not set") + molFp = copy.deepcopy(mol._fpInfo[0]) + molFp.UnSetBitsFromList(mol._fpInfo[1][atomId]) + return molFp diff --git a/rdkit/Chem/Draw/UnitTestSimilarityMaps.py b/rdkit/Chem/Draw/UnitTestSimilarityMaps.py index c21ecb182..8e9791f00 100755 --- a/rdkit/Chem/Draw/UnitTestSimilarityMaps.py +++ b/rdkit/Chem/Draw/UnitTestSimilarityMaps.py @@ -31,7 +31,7 @@ # Created by Sereina Riniker, Aug 2013 """ unit testing code for molecule drawing """ -from __future__ import print_function + import sys import unittest import os diff --git a/rdkit/Chem/Draw/__init__.py b/rdkit/Chem/Draw/__init__.py index dd14e004c..64ec05c80 100644 --- a/rdkit/Chem/Draw/__init__.py +++ b/rdkit/Chem/Draw/__init__.py @@ -10,7 +10,6 @@ from rdkit.Chem.Draw.MolDrawing import MolDrawing, DrawingOptions from rdkit.Chem.Draw.rdMolDraw2D import * from rdkit.Chem import rdDepictor from rdkit import Chem -from rdkit.six import iteritems def _getCanvas(): @@ -255,7 +254,7 @@ def MolToMPL(mol, size=(300, 300), kekulize=True, wedgeBonds=True, imageType=Non drawer.AddMol(mol, **kwargs) omol._atomPs = drawer.atomPs[mol] - for k, v in iteritems(omol._atomPs): + for k, v in omol._atomPs.items(): omol._atomPs[k] = canvas.rescalePt(v) canvas._figure.set_size_inches(float(size[0]) / 100, float(size[1]) / 100) return canvas._figure diff --git a/rdkit/Chem/Draw/cairoCanvas.py b/rdkit/Chem/Draw/cairoCanvas.py index 6dadaa68d..7503fad4b 100644 --- a/rdkit/Chem/Draw/cairoCanvas.py +++ b/rdkit/Chem/Draw/cairoCanvas.py @@ -15,38 +15,24 @@ import re from PIL import Image -from rdkit import six from rdkit.Chem.Draw.canvasbase import CanvasBase -if not six.PY3: - bytes = buffer have_cairocffi = False # for Python3, import cairocffi preferably -if six.PY3: - try: +try: import cairocffi as cairo - except ImportError: +except ImportError: import cairo - else: - have_cairocffi = True else: - try: - import cairo - except ImportError: - try: - import cairocffi as cairo - except: - raise - else: - have_cairocffi = True + have_cairocffi = True have_pango = False if 'RDK_NOPANGO' not in os.environ: - if have_cairocffi: - import cffi - import platform - ffi = cffi.FFI() - ffi.cdef(''' + if have_cairocffi: + import cffi + import platform + ffi = cffi.FFI() + ffi.cdef(''' /* GLib */ typedef void* gpointer; typedef void cairo_t; @@ -85,346 +71,346 @@ if 'RDK_NOPANGO' not in os.environ: void pango_layout_set_font_description (PangoLayout *layout, const PangoFontDescription *desc); ''') - if platform.system() == 'Windows': - defaultLibs = { - 'pango_default_lib': 'libpango-1.0-0.dll', - 'pangocairo_default_lib': 'libpangocairo-1.0-0.dll', - 'gobject_default_lib': 'libgobject-2.0-0.dll' - } + if platform.system() == 'Windows': + defaultLibs = { + 'pango_default_lib': 'libpango-1.0-0.dll', + 'pangocairo_default_lib': 'libpangocairo-1.0-0.dll', + 'gobject_default_lib': 'libgobject-2.0-0.dll' + } + else: + defaultLibs = { + 'pango_default_lib': 'pango-1.0', + 'pangocairo_default_lib': 'pangocairo-1.0', + 'gobject_default_lib': 'gobject-2.0' + } + import ctypes.util + for libType in ['pango', 'pangocairo', 'gobject']: + envVar = 'RDK_' + libType.upper() + '_LIB' + envVarSet = False + if envVar in os.environ: + envVarSet = True + libName = os.environ[envVar] + else: + libName = defaultLibs[libType + '_default_lib'] + libPath = ctypes.util.find_library(libName) + exec(libType + ' = None') + importError = False + if libPath: + try: + exec(libType + ' = ffi.dlopen("' + libPath.replace('\\', '\\\\') + '")') + except: + if envVarSet: + importError = True + else: + pass + else: + importError = True + if importError: + raise ImportError(envVar + ' set to ' + libName + ' but ' + libType.upper() + + ' library cannot be loaded.') + have_pango = (pango and pangocairo and gobject) else: - defaultLibs = { - 'pango_default_lib': 'pango-1.0', - 'pangocairo_default_lib': 'pangocairo-1.0', - 'gobject_default_lib': 'gobject-2.0' - } - import ctypes.util - for libType in ['pango', 'pangocairo', 'gobject']: - envVar = 'RDK_' + libType.upper() + '_LIB' - envVarSet = False - if envVar in os.environ: - envVarSet = True - libName = os.environ[envVar] - else: - libName = defaultLibs[libType + '_default_lib'] - libPath = ctypes.util.find_library(libName) - exec(libType + ' = None') - importError = False - if libPath: - try: - exec(libType + ' = ffi.dlopen("' + libPath.replace('\\', '\\\\') + '")') - except: - if envVarSet: - importError = True - else: - pass - else: - importError = True - if importError: - raise ImportError(envVar + ' set to ' + libName + ' but ' + libType.upper() + - ' library cannot be loaded.') - have_pango = (pango and pangocairo and gobject) - else: - for libType in ['pango', 'pangocairo']: - try: - exec('import ' + libType) - except ImportError: - exec(libType + ' = None') - have_pango = (pango and pangocairo) + for libType in ['pango', 'pangocairo']: + try: + exec('import ' + libType) + except ImportError: + exec(libType + ' = None') + have_pango = (pango and pangocairo) -if (not hasattr(cairo.ImageSurface, 'get_data') and - not hasattr(cairo.ImageSurface, 'get_data_as_rgba')): - raise ImportError('cairo version too old') +if (not hasattr(cairo.ImageSurface, 'get_data') + and not hasattr(cairo.ImageSurface, 'get_data_as_rgba')): + raise ImportError('cairo version too old') scriptPattern = re.compile(r'\<.+?\>') class Canvas(CanvasBase): - def __init__(self, - image=None, # PIL image - size=None, - ctx=None, - imageType=None, # determines file type - fileName=None, # if set determines output file name - ): - """ - Canvas can be used in four modes: - 1) using the supplied PIL image - 2) using the supplied cairo context ctx - 3) writing to a file fileName with image type imageType - 4) creating a cairo surface and context within the constructor - """ - self.image = None - self.imageType = imageType - if image is not None: - try: - imgd = getattr(image, 'tobytes', image.tostring)("raw", "BGRA") - except SystemError: - r, g, b, a = image.split() - mrg = Image.merge("RGBA", (b, g, r, a)) - imgd = getattr(mrg, 'tobytes', mrg.tostring)("raw", "RGBA") + def __init__(self, + image=None, # PIL image + size=None, + ctx=None, + imageType=None, # determines file type + fileName=None, # if set determines output file name + ): + """ + Canvas can be used in four modes: + 1) using the supplied PIL image + 2) using the supplied cairo context ctx + 3) writing to a file fileName with image type imageType + 4) creating a cairo surface and context within the constructor + """ + self.image = None + self.imageType = imageType + if image is not None: + try: + imgd = getattr(image, 'tobytes', image.tostring)("raw", "BGRA") + except SystemError: + r, g, b, a = image.split() + mrg = Image.merge("RGBA", (b, g, r, a)) + imgd = getattr(mrg, 'tobytes', mrg.tostring)("raw", "RGBA") - a = array.array('B', imgd) - stride = image.size[0] * 4 - surface = cairo.ImageSurface.create_for_data(a, cairo.FORMAT_ARGB32, image.size[0], - image.size[1], stride) - ctx = cairo.Context(surface) - size = image.size[0], image.size[1] - self.image = image - elif ctx is None and size is not None: - if hasattr(cairo, "PDFSurface") and imageType == "pdf": - surface = cairo.PDFSurface(fileName, size[0], size[1]) - elif hasattr(cairo, "SVGSurface") and imageType == "svg": - surface = cairo.SVGSurface(fileName, size[0], size[1]) - elif hasattr(cairo, "PSSurface") and imageType == "ps": - surface = cairo.PSSurface(fileName, size[0], size[1]) - elif imageType == "png": - surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, size[0], size[1]) - else: - raise ValueError("Unrecognized file type. Valid choices are pdf, svg, ps, and png") - ctx = cairo.Context(surface) - ctx.set_source_rgb(1, 1, 1) - ctx.paint() - else: - surface = ctx.get_target() - if size is None: - try: - size = surface.get_width(), surface.get_height() - except AttributeError: - size = None - self.ctx = ctx - self.size = size - self.surface = surface - self.fileName = fileName + a = array.array('B', imgd) + stride = image.size[0] * 4 + surface = cairo.ImageSurface.create_for_data(a, cairo.FORMAT_ARGB32, image.size[0], + image.size[1], stride) + ctx = cairo.Context(surface) + size = image.size[0], image.size[1] + self.image = image + elif ctx is None and size is not None: + if hasattr(cairo, "PDFSurface") and imageType == "pdf": + surface = cairo.PDFSurface(fileName, size[0], size[1]) + elif hasattr(cairo, "SVGSurface") and imageType == "svg": + surface = cairo.SVGSurface(fileName, size[0], size[1]) + elif hasattr(cairo, "PSSurface") and imageType == "ps": + surface = cairo.PSSurface(fileName, size[0], size[1]) + elif imageType == "png": + surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, size[0], size[1]) + else: + raise ValueError("Unrecognized file type. Valid choices are pdf, svg, ps, and png") + ctx = cairo.Context(surface) + ctx.set_source_rgb(1, 1, 1) + ctx.paint() + else: + surface = ctx.get_target() + if size is None: + try: + size = surface.get_width(), surface.get_height() + except AttributeError: + size = None + self.ctx = ctx + self.size = size + self.surface = surface + self.fileName = fileName - def flush(self): - """temporary interface, must be splitted to different methods, - """ - if self.fileName and self.imageType == 'png': - self.surface.write_to_png(self.fileName) - elif self.image is not None: - # on linux at least it seems like the PIL images are BGRA, not RGBA: - if hasattr(self.surface, 'get_data'): - getattr(self.image, 'frombytes', self.image.fromstring)(bytes(self.surface.get_data()), - "raw", "BGRA", 0, 1) - else: - getattr(self.image, 'frombytes', self.image.fromstring)( - bytes(self.surface.get_data_as_rgba()), "raw", "RGBA", 0, 1) - self.surface.finish() - elif self.imageType == "png": - if hasattr(self.surface, 'get_data'): - buffer = self.surface.get_data() - else: - buffer = self.surface.get_data_as_rgba() - return buffer + def flush(self): + """temporary interface, must be splitted to different methods, + """ + if self.fileName and self.imageType == 'png': + self.surface.write_to_png(self.fileName) + elif self.image is not None: + # on linux at least it seems like the PIL images are BGRA, not RGBA: + if hasattr(self.surface, 'get_data'): + getattr(self.image, 'frombytes', self.image.fromstring)(bytes(self.surface.get_data()), + "raw", "BGRA", 0, 1) + else: + getattr(self.image, 'frombytes', self.image.fromstring)( + bytes(self.surface.get_data_as_rgba()), "raw", "RGBA", 0, 1) + self.surface.finish() + elif self.imageType == "png": + if hasattr(self.surface, 'get_data'): + buffer = self.surface.get_data() + else: + buffer = self.surface.get_data_as_rgba() + return buffer - def _doLine(self, p1, p2, **kwargs): - if kwargs.get('dash', (0, 0)) == (0, 0): - self.ctx.move_to(p1[0], p1[1]) - self.ctx.line_to(p2[0], p2[1]) - else: - dash = kwargs['dash'] - pts = self._getLinePoints(p1, p2, dash) + def _doLine(self, p1, p2, **kwargs): + if kwargs.get('dash', (0, 0)) == (0, 0): + self.ctx.move_to(p1[0], p1[1]) + self.ctx.line_to(p2[0], p2[1]) + else: + dash = kwargs['dash'] + pts = self._getLinePoints(p1, p2, dash) - currDash = 0 - dashOn = True - while currDash < (len(pts) - 1): - if dashOn: - p1 = pts[currDash] - p2 = pts[currDash + 1] - self.ctx.move_to(p1[0], p1[1]) - self.ctx.line_to(p2[0], p2[1]) - currDash += 1 - dashOn = not dashOn + currDash = 0 + dashOn = True + while currDash < (len(pts) - 1): + if dashOn: + p1 = pts[currDash] + p2 = pts[currDash + 1] + self.ctx.move_to(p1[0], p1[1]) + self.ctx.line_to(p2[0], p2[1]) + currDash += 1 + dashOn = not dashOn - def addCanvasLine(self, p1, p2, color=(0, 0, 0), color2=None, **kwargs): - self.ctx.set_line_width(kwargs.get('linewidth', 1)) - if color2 and color2 != color: - mp = (p1[0] + p2[0]) / 2., (p1[1] + p2[1]) / 2. - self.ctx.set_source_rgb(*color) - self._doLine(p1, mp, **kwargs) - self.ctx.stroke() - self.ctx.set_source_rgb(*color2) - self._doLine(mp, p2, **kwargs) - self.ctx.stroke() - else: - self.ctx.set_source_rgb(*color) - self._doLine(p1, p2, **kwargs) - self.ctx.stroke() + def addCanvasLine(self, p1, p2, color=(0, 0, 0), color2=None, **kwargs): + self.ctx.set_line_width(kwargs.get('linewidth', 1)) + if color2 and color2 != color: + mp = (p1[0] + p2[0]) / 2., (p1[1] + p2[1]) / 2. + self.ctx.set_source_rgb(*color) + self._doLine(p1, mp, **kwargs) + self.ctx.stroke() + self.ctx.set_source_rgb(*color2) + self._doLine(mp, p2, **kwargs) + self.ctx.stroke() + else: + self.ctx.set_source_rgb(*color) + self._doLine(p1, p2, **kwargs) + self.ctx.stroke() - def _addCanvasText1(self, text, pos, font, color=(0, 0, 0), **kwargs): - if font.weight == 'bold': - weight = cairo.FONT_WEIGHT_BOLD - else: - weight = cairo.FONT_WEIGHT_NORMAL - self.ctx.select_font_face(font.face, cairo.FONT_SLANT_NORMAL, weight) - text = scriptPattern.sub('', text) - self.ctx.set_font_size(font.size) - w, h = self.ctx.text_extents(text)[2:4] - bw, bh = w + h * 0.4, h * 1.4 - offset = w * pos[2] - dPos = pos[0] - w / 2. + offset, pos[1] + h / 2. - self.ctx.set_source_rgb(*color) - self.ctx.move_to(*dPos) - self.ctx.show_text(text) + def _addCanvasText1(self, text, pos, font, color=(0, 0, 0), **kwargs): + if font.weight == 'bold': + weight = cairo.FONT_WEIGHT_BOLD + else: + weight = cairo.FONT_WEIGHT_NORMAL + self.ctx.select_font_face(font.face, cairo.FONT_SLANT_NORMAL, weight) + text = scriptPattern.sub('', text) + self.ctx.set_font_size(font.size) + w, h = self.ctx.text_extents(text)[2:4] + bw, bh = w + h * 0.4, h * 1.4 + offset = w * pos[2] + dPos = pos[0] - w / 2. + offset, pos[1] + h / 2. + self.ctx.set_source_rgb(*color) + self.ctx.move_to(*dPos) + self.ctx.show_text(text) - if 0: - self.ctx.move_to(dPos[0], dPos[1]) - self.ctx.line_to(dPos[0] + bw, dPos[1]) - self.ctx.line_to(dPos[0] + bw, dPos[1] - bh) - self.ctx.line_to(dPos[0], dPos[1] - bh) - self.ctx.line_to(dPos[0], dPos[1]) - self.ctx.close_path() - self.ctx.stroke() + if 0: + self.ctx.move_to(dPos[0], dPos[1]) + self.ctx.line_to(dPos[0] + bw, dPos[1]) + self.ctx.line_to(dPos[0] + bw, dPos[1] - bh) + self.ctx.line_to(dPos[0], dPos[1] - bh) + self.ctx.line_to(dPos[0], dPos[1]) + self.ctx.close_path() + self.ctx.stroke() - return (bw, bh, offset) + return (bw, bh, offset) - def _addCanvasText2(self, text, pos, font, color=(0, 0, 0), **kwargs): - if font.weight == 'bold': - weight = cairo.FONT_WEIGHT_BOLD - else: - weight = cairo.FONT_WEIGHT_NORMAL - self.ctx.select_font_face(font.face, cairo.FONT_SLANT_NORMAL, weight) - orientation = kwargs.get('orientation', 'E') + def _addCanvasText2(self, text, pos, font, color=(0, 0, 0), **kwargs): + if font.weight == 'bold': + weight = cairo.FONT_WEIGHT_BOLD + else: + weight = cairo.FONT_WEIGHT_NORMAL + self.ctx.select_font_face(font.face, cairo.FONT_SLANT_NORMAL, weight) + orientation = kwargs.get('orientation', 'E') - plainText = scriptPattern.sub('', text) + plainText = scriptPattern.sub('', text) - # for whatever reason, the font size using pango is larger - # than that w/ default cairo (at least for me) - pangoCoeff = 0.8 + # for whatever reason, the font size using pango is larger + # than that w/ default cairo (at least for me) + pangoCoeff = 0.8 - if have_cairocffi: - measureLout = pangocairo.pango_cairo_create_layout(self.ctx._pointer) - pango.pango_layout_set_alignment(measureLout, pango.PANGO_ALIGN_LEFT) - pango.pango_layout_set_markup(measureLout, plainText.encode('latin1'), -1) - lout = pangocairo.pango_cairo_create_layout(self.ctx._pointer) - pango.pango_layout_set_alignment(lout, pango.PANGO_ALIGN_LEFT) - pango.pango_layout_set_markup(lout, text.encode('latin1'), -1) - fnt = pango.pango_font_description_new() - pango.pango_font_description_set_family(fnt, font.face.encode('latin1')) - pango.pango_font_description_set_size(fnt, - int(round(font.size * pango.PANGO_SCALE * pangoCoeff))) - pango.pango_layout_set_font_description(lout, fnt) - pango.pango_layout_set_font_description(measureLout, fnt) - pango.pango_font_description_free(fnt) - else: - cctx = pangocairo.CairoContext(self.ctx) - measureLout = cctx.create_layout() - measureLout.set_alignment(pango.ALIGN_LEFT) - measureLout.set_markup(plainText) - lout = cctx.create_layout() - lout.set_alignment(pango.ALIGN_LEFT) - lout.set_markup(text) - fnt = pango.FontDescription('%s %d' % (font.face, font.size * pangoCoeff)) - lout.set_font_description(fnt) - measureLout.set_font_description(fnt) + if have_cairocffi: + measureLout = pangocairo.pango_cairo_create_layout(self.ctx._pointer) + pango.pango_layout_set_alignment(measureLout, pango.PANGO_ALIGN_LEFT) + pango.pango_layout_set_markup(measureLout, plainText.encode('latin1'), -1) + lout = pangocairo.pango_cairo_create_layout(self.ctx._pointer) + pango.pango_layout_set_alignment(lout, pango.PANGO_ALIGN_LEFT) + pango.pango_layout_set_markup(lout, text.encode('latin1'), -1) + fnt = pango.pango_font_description_new() + pango.pango_font_description_set_family(fnt, font.face.encode('latin1')) + pango.pango_font_description_set_size(fnt, + int(round(font.size * pango.PANGO_SCALE * pangoCoeff))) + pango.pango_layout_set_font_description(lout, fnt) + pango.pango_layout_set_font_description(measureLout, fnt) + pango.pango_font_description_free(fnt) + else: + cctx = pangocairo.CairoContext(self.ctx) + measureLout = cctx.create_layout() + measureLout.set_alignment(pango.ALIGN_LEFT) + measureLout.set_markup(plainText) + lout = cctx.create_layout() + lout.set_alignment(pango.ALIGN_LEFT) + lout.set_markup(text) + fnt = pango.FontDescription('%s %d' % (font.face, font.size * pangoCoeff)) + lout.set_font_description(fnt) + measureLout.set_font_description(fnt) - # this is a bit kludgy, but empirically we end up with too much - # vertical padding if we use the text box with super and subscripts - # for the measurement. - if have_cairocffi: - iext = ffi.new('PangoRectangle *') - lext = ffi.new('PangoRectangle *') - iext2 = ffi.new('PangoRectangle *') - lext2 = ffi.new('PangoRectangle *') - pango.pango_layout_get_pixel_extents(measureLout, iext, lext) - pango.pango_layout_get_pixel_extents(lout, iext2, lext2) - w = lext2.width - lext2.x - h = lext.height - lext.y - else: - iext, lext = measureLout.get_pixel_extents() - iext2, lext2 = lout.get_pixel_extents() - w = lext2[2] - lext2[0] - h = lext[3] - lext[1] - pad = [h * .2, h * .3] - # another empirical correction: labels draw at the bottom - # of bonds have too much vertical padding - if orientation == 'S': - pad[1] *= 0.5 - bw, bh = w + pad[0], h + pad[1] - offset = w * pos[2] - if 0: - if orientation == 'W': - dPos = pos[0] - w + offset, pos[1] - h / 2. - elif orientation == 'E': - dPos = pos[0] - w / 2 + offset, pos[1] - h / 2. - else: - dPos = pos[0] - w / 2 + offset, pos[1] - h / 2. - self.ctx.move_to(dPos[0], dPos[1]) - else: - dPos = pos[0] - w / 2. + offset, pos[1] - h / 2. - self.ctx.move_to(dPos[0], dPos[1]) + # this is a bit kludgy, but empirically we end up with too much + # vertical padding if we use the text box with super and subscripts + # for the measurement. + if have_cairocffi: + iext = ffi.new('PangoRectangle *') + lext = ffi.new('PangoRectangle *') + iext2 = ffi.new('PangoRectangle *') + lext2 = ffi.new('PangoRectangle *') + pango.pango_layout_get_pixel_extents(measureLout, iext, lext) + pango.pango_layout_get_pixel_extents(lout, iext2, lext2) + w = lext2.width - lext2.x + h = lext.height - lext.y + else: + iext, lext = measureLout.get_pixel_extents() + iext2, lext2 = lout.get_pixel_extents() + w = lext2[2] - lext2[0] + h = lext[3] - lext[1] + pad = [h * .2, h * .3] + # another empirical correction: labels draw at the bottom + # of bonds have too much vertical padding + if orientation == 'S': + pad[1] *= 0.5 + bw, bh = w + pad[0], h + pad[1] + offset = w * pos[2] + if 0: + if orientation == 'W': + dPos = pos[0] - w + offset, pos[1] - h / 2. + elif orientation == 'E': + dPos = pos[0] - w / 2 + offset, pos[1] - h / 2. + else: + dPos = pos[0] - w / 2 + offset, pos[1] - h / 2. + self.ctx.move_to(dPos[0], dPos[1]) + else: + dPos = pos[0] - w / 2. + offset, pos[1] - h / 2. + self.ctx.move_to(dPos[0], dPos[1]) - self.ctx.set_source_rgb(*color) - if have_cairocffi: - pangocairo.pango_cairo_update_layout(self.ctx._pointer, lout) - pangocairo.pango_cairo_show_layout(self.ctx._pointer, lout) - gobject.g_object_unref(lout) - gobject.g_object_unref(measureLout) - else: - cctx.update_layout(lout) - cctx.show_layout(lout) + self.ctx.set_source_rgb(*color) + if have_cairocffi: + pangocairo.pango_cairo_update_layout(self.ctx._pointer, lout) + pangocairo.pango_cairo_show_layout(self.ctx._pointer, lout) + gobject.g_object_unref(lout) + gobject.g_object_unref(measureLout) + else: + cctx.update_layout(lout) + cctx.show_layout(lout) - if 0: - self.ctx.move_to(dPos[0], dPos[1]) - self.ctx.line_to(dPos[0] + bw, dPos[1]) - self.ctx.line_to(dPos[0] + bw, dPos[1] + bh) - self.ctx.line_to(dPos[0], dPos[1] + bh) - self.ctx.line_to(dPos[0], dPos[1]) - self.ctx.close_path() - self.ctx.stroke() + if 0: + self.ctx.move_to(dPos[0], dPos[1]) + self.ctx.line_to(dPos[0] + bw, dPos[1]) + self.ctx.line_to(dPos[0] + bw, dPos[1] + bh) + self.ctx.line_to(dPos[0], dPos[1] + bh) + self.ctx.line_to(dPos[0], dPos[1]) + self.ctx.close_path() + self.ctx.stroke() - return (bw, bh, offset) + return (bw, bh, offset) - def addCanvasText(self, text, pos, font, color=(0, 0, 0), **kwargs): - if have_pango: - textSize = self._addCanvasText2(text, pos, font, color, **kwargs) - else: - textSize = self._addCanvasText1(text, pos, font, color, **kwargs) - return textSize + def addCanvasText(self, text, pos, font, color=(0, 0, 0), **kwargs): + if have_pango: + textSize = self._addCanvasText2(text, pos, font, color, **kwargs) + else: + textSize = self._addCanvasText1(text, pos, font, color, **kwargs) + return textSize - def addCanvasPolygon(self, ps, color=(0, 0, 0), fill=True, stroke=False, **kwargs): - if not fill and not stroke: - return - self.ctx.set_source_rgb(*color) - self.ctx.move_to(ps[0][0], ps[0][1]) - for p in ps[1:]: - self.ctx.line_to(p[0], p[1]) - self.ctx.close_path() - if stroke: - if fill: - self.ctx.stroke_preserve() - else: + def addCanvasPolygon(self, ps, color=(0, 0, 0), fill=True, stroke=False, **kwargs): + if not fill and not stroke: + return + self.ctx.set_source_rgb(*color) + self.ctx.move_to(ps[0][0], ps[0][1]) + for p in ps[1:]: + self.ctx.line_to(p[0], p[1]) + self.ctx.close_path() + if stroke: + if fill: + self.ctx.stroke_preserve() + else: + self.ctx.stroke() + if fill: + self.ctx.fill() + + def addCanvasDashedWedge(self, p1, p2, p3, dash=(2, 2), color=(0, 0, 0), color2=None, **kwargs): + self.ctx.set_line_width(kwargs.get('linewidth', 1)) + self.ctx.set_source_rgb(*color) + dash = (3, 3) + pts1 = self._getLinePoints(p1, p2, dash) + pts2 = self._getLinePoints(p1, p3, dash) + + if len(pts2) < len(pts1): + pts2, pts1 = pts1, pts2 + + for i in range(len(pts1)): + self.ctx.move_to(pts1[i][0], pts1[i][1]) + self.ctx.line_to(pts2[i][0], pts2[i][1]) self.ctx.stroke() - if fill: - self.ctx.fill() - def addCanvasDashedWedge(self, p1, p2, p3, dash=(2, 2), color=(0, 0, 0), color2=None, **kwargs): - self.ctx.set_line_width(kwargs.get('linewidth', 1)) - self.ctx.set_source_rgb(*color) - dash = (3, 3) - pts1 = self._getLinePoints(p1, p2, dash) - pts2 = self._getLinePoints(p1, p3, dash) - - if len(pts2) < len(pts1): - pts2, pts1 = pts1, pts2 - - for i in range(len(pts1)): - self.ctx.move_to(pts1[i][0], pts1[i][1]) - self.ctx.line_to(pts2[i][0], pts2[i][1]) - self.ctx.stroke() - - def addCircle(self, center, radius, color=(0, 0, 0), fill=True, stroke=False, alpha=1.0, - **kwargs): - if not fill and not stroke: - return - self.ctx.set_source_rgba(color[0], color[1], color[2], alpha) - self.ctx.arc(center[0], center[1], radius, 0, 2. * math.pi) - self.ctx.close_path() - if stroke: - if fill: - self.ctx.stroke_preserve() - else: - self.ctx.stroke() - if fill: - self.ctx.fill() + def addCircle(self, center, radius, color=(0, 0, 0), fill=True, stroke=False, alpha=1.0, + **kwargs): + if not fill and not stroke: + return + self.ctx.set_source_rgba(color[0], color[1], color[2], alpha) + self.ctx.arc(center[0], center[1], radius, 0, 2. * math.pi) + self.ctx.close_path() + if stroke: + if fill: + self.ctx.stroke_preserve() + else: + self.ctx.stroke() + if fill: + self.ctx.fill() diff --git a/rdkit/Chem/EState/EState.py b/rdkit/Chem/EState/EState.py index d7fe356f3..16f8c1868 100755 --- a/rdkit/Chem/EState/EState.py +++ b/rdkit/Chem/EState/EState.py @@ -11,7 +11,7 @@ """ Basic EState definitions """ -from __future__ import print_function + import numpy from rdkit import Chem diff --git a/rdkit/Chem/EState/Fingerprinter.py b/rdkit/Chem/EState/Fingerprinter.py index 344cab7cc..c7587cbdd 100755 --- a/rdkit/Chem/EState/Fingerprinter.py +++ b/rdkit/Chem/EState/Fingerprinter.py @@ -13,7 +13,7 @@ """ EState fingerprinting """ -from __future__ import print_function + import numpy from rdkit.Chem.EState import EStateIndices from rdkit.Chem.EState import AtomTypes diff --git a/rdkit/Chem/EState/UnitTestEState.py b/rdkit/Chem/EState/UnitTestEState.py index 437160e56..97662920c 100755 --- a/rdkit/Chem/EState/UnitTestEState.py +++ b/rdkit/Chem/EState/UnitTestEState.py @@ -13,11 +13,11 @@ validation values are from the paper (JCICS _31_ 76-81 (1991)) """ -from __future__ import print_function + import unittest -from rdkit.six import StringIO +from io import StringIO import numpy as np from rdkit import Chem @@ -26,116 +26,117 @@ from rdkit.Chem import EState class TestCase(unittest.TestCase): - def _compareEstates(self, val1, val2, msg, tol=1e-2): - maxV = max(abs(val1 - val2)) - self.assertLess(maxV, tol, msg) + def _compareEstates(self, val1, val2, msg, tol=1e-2): + maxV = max(abs(val1 - val2)) + self.assertLess(maxV, tol, msg) - def _validate(self, vals, places=2, tol=1e-2, debug=False): - for smi, ans in vals: - ans = np.array(ans) - mol = Chem.MolFromSmiles(smi) - inds = EState.EStateIndices(mol) - if debug: # pragma: nocover - print(inds) - self._compareEstates(ans, inds, 'bad EStates for smiles: {0}'.format(smi), tol=tol) + def _validate(self, vals, places=2, tol=1e-2, debug=False): + for smi, ans in vals: + ans = np.array(ans) + mol = Chem.MolFromSmiles(smi) + inds = EState.EStateIndices(mol) + if debug: # pragma: nocover + print(inds) + self._compareEstates(ans, inds, 'bad EStates for smiles: {0}'.format(smi), tol=tol) - self.assertLess(abs(EState.MaxEStateIndex(mol) - max(ans)), tol) - self.assertLess(abs(EState.MinEStateIndex(mol) - min(ans)), tol) - self.assertLess(abs(EState.MaxAbsEStateIndex(mol) - max(abs(ans))), tol) - self.assertLess(abs(EState.MinAbsEStateIndex(mol) - min(abs(ans))), tol) + self.assertLess(abs(EState.MaxEStateIndex(mol) - max(ans)), tol) + self.assertLess(abs(EState.MinEStateIndex(mol) - min(ans)), tol) + self.assertLess(abs(EState.MaxAbsEStateIndex(mol) - max(abs(ans))), tol) + self.assertLess(abs(EState.MinAbsEStateIndex(mol) - min(abs(ans))), tol) - def test_simpleMolecules(self): - data = [ - ('CCCC', [2.18, 1.32, 1.32, 2.18]), - ('CCCCC', [2.21, 1.34, 1.39, 1.34, 2.21]), - ('CCCCCCC', [2.24, 1.36, 1.42, 1.44, 1.42, 1.36, 2.24]), - ('CCCCCCCCCC', [2.27, 1.37, 1.44, 1.46, 1.47, 1.47, 1.46, 1.44, 1.37, 2.27]), - ] - self._validate(data) + def test_simpleMolecules(self): + data = [ + ('CCCC', [2.18, 1.32, 1.32, 2.18]), + ('CCCCC', [2.21, 1.34, 1.39, 1.34, 2.21]), + ('CCCCCCC', [2.24, 1.36, 1.42, 1.44, 1.42, 1.36, 2.24]), + ('CCCCCCCCCC', [2.27, 1.37, 1.44, 1.46, 1.47, 1.47, 1.46, 1.44, 1.37, 2.27]), + ] + self._validate(data) - def test_isomers(self): - data = [ - ('CCCCCC', [2.23, 1.36, 1.41, 1.41, 1.36, 2.23]), - ('CCC(C)CC', [2.23, 1.33, 0.94, 2.28, 1.33, 2.23]), - ('CC(C)CCC', [2.25, 0.90, 2.25, 1.38, 1.33, 2.22]), - ('CC(C)(C)CC', [2.24, 0.54, 2.24, 2.24, 1.27, 2.20]), - ] - self._validate(data) + def test_isomers(self): + data = [ + ('CCCCCC', [2.23, 1.36, 1.41, 1.41, 1.36, 2.23]), + ('CCC(C)CC', [2.23, 1.33, 0.94, 2.28, 1.33, 2.23]), + ('CC(C)CCC', [2.25, 0.90, 2.25, 1.38, 1.33, 2.22]), + ('CC(C)(C)CC', [2.24, 0.54, 2.24, 2.24, 1.27, 2.20]), + ] + self._validate(data) - def test_heteroatoms1(self): - data = [ - ('CCCCOCCCC', [2.18, 1.24, 1.21, 0.95, 5.31, 0.95, 1.21, 1.24, 2.18]), - ('CCC(C)OC(C)CC', [2.15, 1.12, 0.43, 2.12, 5.54, 0.43, 2.12, 1.12, 2.15]), - ('CC(C)(C)OC(C)(C)C', [2.07, -0.02, 2.07, 2.07, 5.63, -0.02, 2.07, 2.07, 2.07]), - ('CC(C)CC', [2.22, 0.88, 2.22, 1.31, 2.20]), - ('CC(C)CN', [2.10, 0.66, 2.10, 0.81, 5.17]), - ('CC(C)CO', [1.97, 0.44, 1.97, 0.31, 8.14]), - ('CC(C)CF', [1.85, 0.22, 1.85, -0.19, 11.11]), - ('CC(C)CCl', [2.09, 0.65, 2.09, 0.78, 5.34]), - ('CC(C)CBr', [2.17, 0.80, 2.17, 1.11, 3.31]), - ('CC(C)CI', [2.21, 0.87, 2.21, 1.28, 2.38]), - ] - self._validate(data, debug=False) + def test_heteroatoms1(self): + data = [ + ('CCCCOCCCC', [2.18, 1.24, 1.21, 0.95, 5.31, 0.95, 1.21, 1.24, 2.18]), + ('CCC(C)OC(C)CC', [2.15, 1.12, 0.43, 2.12, 5.54, 0.43, 2.12, 1.12, 2.15]), + ('CC(C)(C)OC(C)(C)C', [2.07, -0.02, 2.07, 2.07, 5.63, -0.02, 2.07, 2.07, 2.07]), + ('CC(C)CC', [2.22, 0.88, 2.22, 1.31, 2.20]), + ('CC(C)CN', [2.10, 0.66, 2.10, 0.81, 5.17]), + ('CC(C)CO', [1.97, 0.44, 1.97, 0.31, 8.14]), + ('CC(C)CF', [1.85, 0.22, 1.85, -0.19, 11.11]), + ('CC(C)CCl', [2.09, 0.65, 2.09, 0.78, 5.34]), + ('CC(C)CBr', [2.17, 0.80, 2.17, 1.11, 3.31]), + ('CC(C)CI', [2.21, 0.87, 2.21, 1.28, 2.38]), + ] + self._validate(data, debug=False) - def test_heteroatoms2(self): - data = [ - ('CC(N)C(=O)O', [1.42, -0.73, 4.84, -0.96, 9.57, 7.86]), - ('CCOCC', [1.99, 0.84, 4.83, 0.84, 1.99]), - ('CCSCC', [2.17, 1.26, 1.96, 1.26, 2.17]), # NOTE: this doesn't match the values in the paper - ('CC(=O)OC', [1.36, -0.24, 9.59, 4.11, 1.35]), - ('CC(=S)OC', [1.73, 0.59, 4.47, 4.48, 1.56]), - ] - self._validate(data, debug=False) + def test_heteroatoms2(self): + data = [ + ('CC(N)C(=O)O', [1.42, -0.73, 4.84, -0.96, 9.57, 7.86]), + ('CCOCC', [1.99, 0.84, 4.83, 0.84, 1.99]), + # NOTE: this doesn't match the values in the paper + ('CCSCC', [2.17, 1.26, 1.96, 1.26, 2.17]), + ('CC(=O)OC', [1.36, -0.24, 9.59, 4.11, 1.35]), + ('CC(=S)OC', [1.73, 0.59, 4.47, 4.48, 1.56]), + ] + self._validate(data, debug=False) - def test_aromatics(self): - # aromatics with heteroatoms - data = [ - ('Fc1ccc(C)cc1', [12.09, -0.17, 1.45, 1.75, 1.09, 1.93, 1.75, 1.45]), - ('Clc1ccc(C)cc1', [5.61, 0.80, 1.89, 1.99, 1.24, 2.04, 1.99, 1.89]), - ('Brc1ccc(C)cc1', [3.35, 1.14, 2.04, 2.07, 1.30, 2.08, 2.07, 2.04]), - ('Ic1ccc(C)cc1', [2.30, 1.30, 2.10, 2.11, 1.32, 2.09, 2.11, 2.10]), - ] - self._validate(data, debug=False) + def test_aromatics(self): + # aromatics with heteroatoms + data = [ + ('Fc1ccc(C)cc1', [12.09, -0.17, 1.45, 1.75, 1.09, 1.93, 1.75, 1.45]), + ('Clc1ccc(C)cc1', [5.61, 0.80, 1.89, 1.99, 1.24, 2.04, 1.99, 1.89]), + ('Brc1ccc(C)cc1', [3.35, 1.14, 2.04, 2.07, 1.30, 2.08, 2.07, 2.04]), + ('Ic1ccc(C)cc1', [2.30, 1.30, 2.10, 2.11, 1.32, 2.09, 2.11, 2.10]), + ] + self._validate(data, debug=False) - def test_GetPrincipleQuantumNumber(self): - for principalQN, (nmin, nmax) in enumerate( - [(1, 2), (3, 10), (11, 18), (19, 36), (37, 54), (55, 86), (87, 120)], 1): - for n in range(nmin, nmax + 1): - self.assertEqual(EState.GetPrincipleQuantumNumber(n), principalQN) + def test_GetPrincipleQuantumNumber(self): + for principalQN, (nmin, nmax) in enumerate( + [(1, 2), (3, 10), (11, 18), (19, 36), (37, 54), (55, 86), (87, 120)], 1): + for n in range(nmin, nmax + 1): + self.assertEqual(EState.GetPrincipleQuantumNumber(n), principalQN) - def test_cacheEstate(self): - mol = Chem.MolFromSmiles('CCCC') - expected = [2.18, 1.32, 1.32, 2.18] + def test_cacheEstate(self): + mol = Chem.MolFromSmiles('CCCC') + expected = [2.18, 1.32, 1.32, 2.18] - # The mol object has no information about E-states - self.assertFalse(hasattr(mol, '_eStateIndices')) - inds = EState.EStateIndices(mol) - self._compareEstates(inds, expected, 'cacheTest') + # The mol object has no information about E-states + self.assertFalse(hasattr(mol, '_eStateIndices')) + inds = EState.EStateIndices(mol) + self._compareEstates(inds, expected, 'cacheTest') - # We now have E-states stored with the molecule - self.assertTrue(hasattr(mol, '_eStateIndices')) + # We now have E-states stored with the molecule + self.assertTrue(hasattr(mol, '_eStateIndices')) - # Let's make sure that we skip the calculation next time if force is False - mol._eStateIndices = 'cached' - self.assertTrue(hasattr(mol, '_eStateIndices')) + # Let's make sure that we skip the calculation next time if force is False + mol._eStateIndices = 'cached' + self.assertTrue(hasattr(mol, '_eStateIndices')) - inds = EState.EStateIndices(mol, force=False) - self.assertEqual(inds, 'cached') + inds = EState.EStateIndices(mol, force=False) + self.assertEqual(inds, 'cached') - # But with force (default) we calculate again - inds = EState.EStateIndices(mol) - self._compareEstates(inds, expected, 'cacheTest') - self._compareEstates(mol._eStateIndices, expected, 'cacheTest') + # But with force (default) we calculate again + inds = EState.EStateIndices(mol) + self._compareEstates(inds, expected, 'cacheTest') + self._compareEstates(mol._eStateIndices, expected, 'cacheTest') - def test_exampleCode(self): - # We make sure that the example code runs - from rdkit.TestRunner import redirect_stdout - f = StringIO() - with redirect_stdout(f): - EState.EState._exampleCode() - s = f.getvalue() - self.assertIn('CC(N)C(=O)O', s) + def test_exampleCode(self): + # We make sure that the example code runs + from rdkit.TestRunner import redirect_stdout + f = StringIO() + with redirect_stdout(f): + EState.EState._exampleCode() + s = f.getvalue() + self.assertIn('CC(N)C(=O)O', s) if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/Chem/EState/UnitTestFingerprints.py b/rdkit/Chem/EState/UnitTestFingerprints.py index 7d6ee8e49..4fd332385 100755 --- a/rdkit/Chem/EState/UnitTestFingerprints.py +++ b/rdkit/Chem/EState/UnitTestFingerprints.py @@ -13,12 +13,12 @@ validation values are from the paper (JCICS _35_ 1039-1045 (1995)) """ -from __future__ import print_function + import unittest import numpy -from rdkit.six import StringIO +from io import StringIO from rdkit import Chem from rdkit.Chem.EState import Fingerprinter @@ -26,39 +26,39 @@ from rdkit.Chem.EState import Fingerprinter class TestCase(unittest.TestCase): - def _validate(self, vals, tol=1e-2, show=False): - for smi, c, v in vals: - mol = Chem.MolFromSmiles(smi) - counts, vals = Fingerprinter.FingerprintMol(mol) - counts = counts[numpy.nonzero(counts)] - vals = vals[numpy.nonzero(vals)] - if show: - print(counts) - print(vals) - assert len(c) == len(counts), 'bad count len for smiles: %s' % (smi) - assert len(v) == len(vals), 'bad val len for smiles: %s' % (smi) - c = numpy.array(c) - assert max(abs(c - counts)) < tol, 'bad count for SMILES: %s' % (smi) - v = numpy.array(v) - assert max(abs(v - vals)) < tol, 'bad val for SMILES: %s' % (smi) + def _validate(self, vals, tol=1e-2, show=False): + for smi, c, v in vals: + mol = Chem.MolFromSmiles(smi) + counts, vals = Fingerprinter.FingerprintMol(mol) + counts = counts[numpy.nonzero(counts)] + vals = vals[numpy.nonzero(vals)] + if show: + print(counts) + print(vals) + assert len(c) == len(counts), 'bad count len for smiles: %s' % (smi) + assert len(v) == len(vals), 'bad val len for smiles: %s' % (smi) + c = numpy.array(c) + assert max(abs(c - counts)) < tol, 'bad count for SMILES: %s' % (smi) + v = numpy.array(v) + assert max(abs(v - vals)) < tol, 'bad val for SMILES: %s' % (smi) - def test1_molecules(self): - data = [ - ('c1[nH]cnc1CC(N)C(O)=O', [1, 2, 1, 1, 1, 1, 1, 1, 1, 1], - [0.26, 3.12, -0.86, -1.01, 0.67, 5.25, 2.71, 3.84, 8.42, 10.26]), - ('NCCc1ccc(O)c(O)c1', [2, 3, 3, 1, 2], [1.26, 4.71, 0.75, 5.30, 17.97]), - ] - self._validate(data, show=False) + def test1_molecules(self): + data = [ + ('c1[nH]cnc1CC(N)C(O)=O', [1, 2, 1, 1, 1, 1, 1, 1, 1, 1], + [0.26, 3.12, -0.86, -1.01, 0.67, 5.25, 2.71, 3.84, 8.42, 10.26]), + ('NCCc1ccc(O)c(O)c1', [2, 3, 3, 1, 2], [1.26, 4.71, 0.75, 5.30, 17.97]), + ] + self._validate(data, show=False) - def test_exampleCode(self): - # We make sure that the example code runs - from rdkit.TestRunner import redirect_stdout - f = StringIO() - with redirect_stdout(f): - Fingerprinter._exampleCode() - s = f.getvalue() - self.assertIn('NCCc1ccc(O)c(O)c1', s) + def test_exampleCode(self): + # We make sure that the example code runs + from rdkit.TestRunner import redirect_stdout + f = StringIO() + with redirect_stdout(f): + Fingerprinter._exampleCode() + s = f.getvalue() + self.assertIn('NCCc1ccc(O)c(O)c1', s) if __name__ == '__main__': - unittest.main() + unittest.main() diff --git a/rdkit/Chem/EState/UnitTestTypes.py b/rdkit/Chem/EState/UnitTestTypes.py index 0f1f13272..342a9cf02 100755 --- a/rdkit/Chem/EState/UnitTestTypes.py +++ b/rdkit/Chem/EState/UnitTestTypes.py @@ -13,7 +13,7 @@ validation values are from the paper (JCICS _35_ 1039-1045 (1995)) """ -from __future__ import print_function + import unittest from rdkit import Chem from rdkit.Chem.EState import AtomTypes diff --git a/rdkit/Chem/EState/UnitTestVSA.py b/rdkit/Chem/EState/UnitTestVSA.py index 445d3126d..650dad563 100644 --- a/rdkit/Chem/EState/UnitTestVSA.py +++ b/rdkit/Chem/EState/UnitTestVSA.py @@ -11,7 +11,7 @@ """ unit testing code for MOE-type descriptors with EStates """ -from __future__ import print_function + import os import unittest diff --git a/rdkit/Chem/EnumerateStereoisomers.py b/rdkit/Chem/EnumerateStereoisomers.py index c793305cd..e19d66e72 100644 --- a/rdkit/Chem/EnumerateStereoisomers.py +++ b/rdkit/Chem/EnumerateStereoisomers.py @@ -1,8 +1,8 @@ -from rdkit import six import random from rdkit import Chem from rdkit.Chem.rdDistGeom import EmbedMolecule + class StereoEnumerationOptions(object): """ - tryEmbedding: if set the process attempts to generate a standard RDKit distance geometry @@ -24,9 +24,11 @@ class StereoEnumerationOptions(object): - onlyStereoGroups: Only find stereoisomers that differ at the StereoGroups associated with the molecule. """ - __slots__ = ('tryEmbedding', 'onlyUnassigned', 'onlyStereoGroups', 'maxIsomers', 'rand', 'unique') - def __init__(self, tryEmbedding = False, onlyUnassigned = True, - maxIsomers = 1024, rand = None, unique = True, + __slots__ = ('tryEmbedding', 'onlyUnassigned', + 'onlyStereoGroups', 'maxIsomers', 'rand', 'unique') + + def __init__(self, tryEmbedding=False, onlyUnassigned=True, + maxIsomers=1024, rand=None, unique=True, onlyStereoGroups=False): self.tryEmbedding = tryEmbedding self.onlyUnassigned = onlyUnassigned @@ -35,6 +37,7 @@ class StereoEnumerationOptions(object): self.rand = rand self.unique = unique + class _BondFlipper(object): def __init__(self, bond): self.bond = bond @@ -45,6 +48,7 @@ class _BondFlipper(object): else: self.bond.SetStereo(Chem.BondStereo.STEREOTRANS) + class _AtomFlipper(object): def __init__(self, atom): self.atom = atom @@ -80,32 +84,34 @@ def _getFlippers(mol, options): for atom in mol.GetAtoms(): if atom.HasProp("_ChiralityPossible"): if (not options.onlyUnassigned or - atom.GetChiralTag() == Chem.ChiralType.CHI_UNSPECIFIED): + atom.GetChiralTag() == Chem.ChiralType.CHI_UNSPECIFIED): flippers.append(_AtomFlipper(atom)) for bond in mol.GetBonds(): bstereo = bond.GetStereo() if bstereo != Chem.BondStereo.STEREONONE: if (not options.onlyUnassigned or - bstereo == Chem.BondStereo.STEREOANY): + bstereo == Chem.BondStereo.STEREOANY): flippers.append(_BondFlipper(bond)) if options.onlyUnassigned: # otherwise these will be counted twice for group in mol.GetStereoGroups(): - if group.GetGroupType() != Chem.StereoGroupType.STEREO_ABSOLUTE: + if group.GetGroupType() != Chem.StereoGroupType.STEREO_ABSOLUTE: flippers.append(_StereoGroupFlipper(group)) return flippers + class _RangeBitsGenerator(object): def __init__(self, nCenters): self.nCenters = nCenters def __iter__(self): - for val in six.moves.range(2**self.nCenters): + for val in range(2**self.nCenters): yield val + class _UniqueRandomBitsGenerator(object): def __init__(self, nCenters, maxIsomers, rand): self.nCenters = nCenters @@ -125,6 +131,7 @@ class _UniqueRandomBitsGenerator(object): self.already_seen.add(bits) yield bits + def EnumerateStereoisomers(m, options=StereoEnumerationOptions(), verbose=False): """ returns a generator that yields possible stereoisomers for a molecule diff --git a/rdkit/Chem/FeatFinderCLI.py b/rdkit/Chem/FeatFinderCLI.py index 94d052f7d..bbecff41e 100644 --- a/rdkit/Chem/FeatFinderCLI.py +++ b/rdkit/Chem/FeatFinderCLI.py @@ -7,7 +7,7 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -from __future__ import print_function + import argparse import re diff --git a/rdkit/Chem/FeatMaps/UnitTestFeatMapParser.py b/rdkit/Chem/FeatMaps/UnitTestFeatMapParser.py index c71b76576..07d0cf4a6 100644 --- a/rdkit/Chem/FeatMaps/UnitTestFeatMapParser.py +++ b/rdkit/Chem/FeatMaps/UnitTestFeatMapParser.py @@ -11,17 +11,17 @@ from contextlib import closing import unittest -from rdkit.six import StringIO +from io import StringIO from rdkit.Chem.FeatMaps import FeatMaps, FeatMapParser def feq(n1, n2, tol=1e-5): - return abs(n1 - n2) <= tol + return abs(n1 - n2) <= tol class TestCase(unittest.TestCase): - data = """ + data = """ ScoreMode=Best DirScoreMode=DotFullRange @@ -40,89 +40,89 @@ EndPoints """ - def test1Basics(self): - p = FeatMapParser.FeatMapParser() - p.SetData(self.data) - fm = p.Parse() - self.assertTrue(fm.scoreMode == FeatMaps.FeatMapScoreMode.Best) - self.assertTrue(fm.dirScoreMode == FeatMaps.FeatDirScoreMode.DotFullRange) - self.assertTrue(fm.GetNumFeatures() == 3) + def test1Basics(self): + p = FeatMapParser.FeatMapParser() + p.SetData(self.data) + fm = p.Parse() + self.assertTrue(fm.scoreMode == FeatMaps.FeatMapScoreMode.Best) + self.assertTrue(fm.dirScoreMode == FeatMaps.FeatDirScoreMode.DotFullRange) + self.assertTrue(fm.GetNumFeatures() == 3) - feats = fm.GetFeatures() - self.assertTrue(feq(feats[0].weight, 1.25)) - self.assertTrue(feq(feats[1].weight, 2.0)) - self.assertTrue(feq(feats[2].weight, 1.25)) + feats = fm.GetFeatures() + self.assertTrue(feq(feats[0].weight, 1.25)) + self.assertTrue(feq(feats[1].weight, 2.0)) + self.assertTrue(feq(feats[2].weight, 1.25)) - self.assertTrue(len(feats[0].featDirs) == 1) - self.assertTrue(len(feats[1].featDirs) == 2) - self.assertTrue(len(feats[2].featDirs) == 0) + self.assertTrue(len(feats[0].featDirs) == 1) + self.assertTrue(len(feats[1].featDirs) == 2) + self.assertTrue(len(feats[2].featDirs) == 0) - fams = [x.GetFamily() for x in feats] - self.assertTrue(fams == ['Acceptor', 'Aromatic', 'Acceptor']) + fams = [x.GetFamily() for x in feats] + self.assertTrue(fams == ['Acceptor', 'Aromatic', 'Acceptor']) - def test_FeatMapParser(self): - # We can use a string - p = FeatMapParser.FeatMapParser(data=self.data) - fm = p.Parse() - self.assertEqual(fm.GetNumFeatures(), 3) - self.assertEqual([x.GetFamily() for x in fm.GetFeatures()], - ['Acceptor', 'Aromatic', 'Acceptor']) + def test_FeatMapParser(self): + # We can use a string + p = FeatMapParser.FeatMapParser(data=self.data) + fm = p.Parse() + self.assertEqual(fm.GetNumFeatures(), 3) + self.assertEqual([x.GetFamily() for x in fm.GetFeatures()], + ['Acceptor', 'Aromatic', 'Acceptor']) - # We can use a list of strings - p = FeatMapParser.FeatMapParser(data=self.data.split('\n')) - fm = p.Parse() - self.assertEqual(fm.GetNumFeatures(), 3) - self.assertEqual([x.GetFamily() for x in fm.GetFeatures()], - ['Acceptor', 'Aromatic', 'Acceptor']) + # We can use a list of strings + p = FeatMapParser.FeatMapParser(data=self.data.split('\n')) + fm = p.Parse() + self.assertEqual(fm.GetNumFeatures(), 3) + self.assertEqual([x.GetFamily() for x in fm.GetFeatures()], + ['Acceptor', 'Aromatic', 'Acceptor']) - # and a stream - with closing(StringIO(self.data)) as file: - p = FeatMapParser.FeatMapParser(file=file) - fm = p.Parse() - self.assertEqual(fm.GetNumFeatures(), 3) - self.assertEqual([x.GetFamily() for x in fm.GetFeatures()], - ['Acceptor', 'Aromatic', 'Acceptor']) + # and a stream + with closing(StringIO(self.data)) as file: + p = FeatMapParser.FeatMapParser(file=file) + fm = p.Parse() + self.assertEqual(fm.GetNumFeatures(), 3) + self.assertEqual([x.GetFamily() for x in fm.GetFeatures()], + ['Acceptor', 'Aromatic', 'Acceptor']) - def test_ParseErrors(self): - # Typos in scoreMode or dirscoreMode section - data = "scoreMode = typo\nbeginParams\nfamily=Acceptor radius=1.5\nEndParams" - p = FeatMapParser.FeatMapParser(data=data) - self.assertRaises(FeatMapParser.FeatMapParseError, p.Parse) + def test_ParseErrors(self): + # Typos in scoreMode or dirscoreMode section + data = "scoreMode = typo\nbeginParams\nfamily=Acceptor radius=1.5\nEndParams" + p = FeatMapParser.FeatMapParser(data=data) + self.assertRaises(FeatMapParser.FeatMapParseError, p.Parse) - data = "dirscoremode = typo\nbeginParams\nfamily=Acceptor radius=1.5\nEndParams" - p = FeatMapParser.FeatMapParser(data=data) - self.assertRaises(FeatMapParser.FeatMapParseError, p.Parse) + data = "dirscoremode = typo\nbeginParams\nfamily=Acceptor radius=1.5\nEndParams" + p = FeatMapParser.FeatMapParser(data=data) + self.assertRaises(FeatMapParser.FeatMapParseError, p.Parse) - data = "typo = All\nbeginParams\nfamily=Acceptor radius=1.5\nEndParams" - p = FeatMapParser.FeatMapParser(data=data) - self.assertRaises(FeatMapParser.FeatMapParseError, p.Parse) + data = "typo = All\nbeginParams\nfamily=Acceptor radius=1.5\nEndParams" + p = FeatMapParser.FeatMapParser(data=data) + self.assertRaises(FeatMapParser.FeatMapParseError, p.Parse) - # Typos in paramBlock - data = "beginTypo\nfamily=Acceptor radius=1.5\nEndParams" - p = FeatMapParser.FeatMapParser(data=data) - self.assertRaises(FeatMapParser.FeatMapParseError, p.Parse) + # Typos in paramBlock + data = "beginTypo\nfamily=Acceptor radius=1.5\nEndParams" + p = FeatMapParser.FeatMapParser(data=data) + self.assertRaises(FeatMapParser.FeatMapParseError, p.Parse) - data = "beginParams\nfamily=Acceptor radius=1.5\nEndTypo" - p = FeatMapParser.FeatMapParser(data=data) - self.assertRaises(FeatMapParser.FeatMapParseError, p.Parse) + data = "beginParams\nfamily=Acceptor radius=1.5\nEndTypo" + p = FeatMapParser.FeatMapParser(data=data) + self.assertRaises(FeatMapParser.FeatMapParseError, p.Parse) - data = "beginParams\ntypo=Acceptor radius=1.5\nEndParams" - p = FeatMapParser.FeatMapParser(data=data) - self.assertRaises(FeatMapParser.FeatMapParseError, p.Parse) + data = "beginParams\ntypo=Acceptor radius=1.5\nEndParams" + p = FeatMapParser.FeatMapParser(data=data) + self.assertRaises(FeatMapParser.FeatMapParseError, p.Parse) - data = "beginParams\nprofile=Typo\nEndParams" - p = FeatMapParser.FeatMapParser(data=data) - self.assertRaises(FeatMapParser.FeatMapParseError, p.Parse) + data = "beginParams\nprofile=Typo\nEndParams" + p = FeatMapParser.FeatMapParser(data=data) + self.assertRaises(FeatMapParser.FeatMapParseError, p.Parse) - # Typos in points block - data = "BeginPoints\npos=(1.0, 0.0, 5.0, 4.0)\nEndPoints" - p = FeatMapParser.FeatMapParser(data=data) - self.assertRaises(ValueError, p.Parse) + # Typos in points block + data = "BeginPoints\npos=(1.0, 0.0, 5.0, 4.0)\nEndPoints" + p = FeatMapParser.FeatMapParser(data=data) + self.assertRaises(ValueError, p.Parse) - data = "BeginPoints\npos=(1.0, 0.0, 5.0) typo=Acceptor\nEndPoints" - p = FeatMapParser.FeatMapParser(data=data) - self.assertRaises(FeatMapParser.FeatMapParseError, p.Parse) + data = "BeginPoints\npos=(1.0, 0.0, 5.0) typo=Acceptor\nEndPoints" + p = FeatMapParser.FeatMapParser(data=data) + self.assertRaises(FeatMapParser.FeatMapParseError, p.Parse) if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/Chem/Features/ShowFeats.py b/rdkit/Chem/Features/ShowFeats.py index 4ad1f6bc8..fb5d71cdf 100644 --- a/rdkit/Chem/Features/ShowFeats.py +++ b/rdkit/Chem/Features/ShowFeats.py @@ -3,7 +3,7 @@ # Created by Greg Landrum Aug 2006 # # -from __future__ import print_function + _version = "0.3.2" diff --git a/rdkit/Chem/Fingerprints/ClusterMols.py b/rdkit/Chem/Fingerprints/ClusterMols.py index 4e6a17282..498daffd5 100755 --- a/rdkit/Chem/Fingerprints/ClusterMols.py +++ b/rdkit/Chem/Fingerprints/ClusterMols.py @@ -17,14 +17,14 @@ Sample Usage: --actTable="dop_test" --actName="moa_quant" """ -from __future__ import print_function + import numpy from rdkit import DataStructs from rdkit.Chem.Fingerprints import FingerprintMols, MolSimilarity from rdkit.ML.Cluster import Murtagh -from rdkit.six.moves import cPickle +import pickle message = FingerprintMols.message error = FingerprintMols.error @@ -110,7 +110,7 @@ def ClusterFromDetails(details): clustTree = ClusterPoints(data, details.metric, details.clusterAlgo, haveLabels=0, haveActs=1) if outF: - cPickle.dump(clustTree, outF) + pickle.dump(clustTree, outF) return clustTree diff --git a/rdkit/Chem/Fingerprints/DbFpSupplier.py b/rdkit/Chem/Fingerprints/DbFpSupplier.py index 29fced930..0b291c9c4 100755 --- a/rdkit/Chem/Fingerprints/DbFpSupplier.py +++ b/rdkit/Chem/Fingerprints/DbFpSupplier.py @@ -12,167 +12,163 @@ """ from rdkit import DataStructs -from rdkit import six from rdkit.VLib.Node import VLibNode -from rdkit.six.moves import cPickle +import pickle class DbFpSupplier(VLibNode): - """ - new fps come back with all additional fields from the - database set in a "_fieldsFromDb" data member + """ + new fps come back with all additional fields from the + database set in a "_fieldsFromDb" data member - """ - - def __init__(self, dbResults, fpColName='AutoFragmentFp', usePickles=True): """ - DbResults should be a subclass of Dbase.DbResultSet.DbResultBase + def __init__(self, dbResults, fpColName='AutoFragmentFp', usePickles=True): + """ - """ - VLibNode.__init__(self) - self._usePickles = usePickles - self._data = dbResults - self._fpColName = fpColName.upper() - self._colNames = [x.upper() for x in self._data.GetColumnNames()] - if self._fpColName not in self._colNames: - raise ValueError('fp column name "%s" not found in result set: %s' % - (self._fpColName, str(self._colNames))) - self.fpCol = self._colNames.index(self._fpColName) - del self._colNames[self.fpCol] - self._colNames = tuple(self._colNames) - self._numProcessed = 0 + DbResults should be a subclass of Dbase.DbResultSet.DbResultBase - def GetColumnNames(self): - return self._colNames + """ + VLibNode.__init__(self) + self._usePickles = usePickles + self._data = dbResults + self._fpColName = fpColName.upper() + self._colNames = [x.upper() for x in self._data.GetColumnNames()] + if self._fpColName not in self._colNames: + raise ValueError('fp column name "%s" not found in result set: %s' % + (self._fpColName, str(self._colNames))) + self.fpCol = self._colNames.index(self._fpColName) + del self._colNames[self.fpCol] + self._colNames = tuple(self._colNames) + self._numProcessed = 0 - def _BuildFp(self, data): - data = list(data) - if six.PY3: - pkl = bytes(data[self.fpCol], encoding='Latin1') - else: - pkl = str(data[self.fpCol]) - del data[self.fpCol] - self._numProcessed += 1 - try: - if self._usePickles: - newFp = cPickle.loads(pkl, encoding='bytes') - else: - newFp = DataStructs.ExplicitBitVect(pkl) - except Exception: - import traceback - traceback.print_exc() - newFp = None - if newFp: - newFp._fieldsFromDb = data - return newFp + def GetColumnNames(self): + return self._colNames - def next(self): - itm = self.NextItem() - if itm is None: - raise StopIteration - return itm + def _BuildFp(self, data): + data = list(data) + pkl = bytes(data[self.fpCol], encoding='Latin1') + del data[self.fpCol] + self._numProcessed += 1 + try: + if self._usePickles: + newFp = pickle.loads(pkl, encoding='bytes') + else: + newFp = DataStructs.ExplicitBitVect(pkl) + except Exception: + import traceback + traceback.print_exc() + newFp = None + if newFp: + newFp._fieldsFromDb = data + return newFp - __next__ = next # py3 + def next(self): + itm = self.NextItem() + if itm is None: + raise StopIteration + return itm + + __next__ = next # py3 class ForwardDbFpSupplier(DbFpSupplier): - """ DbFp supplier supporting only forward iteration + """ DbFp supplier supporting only forward iteration - >>> from rdkit import RDConfig - >>> from rdkit.Dbase.DbConnection import DbConnect - >>> fName = RDConfig.RDTestDatabase - >>> conn = DbConnect(fName,'simple_combined') - >>> suppl = ForwardDbFpSupplier(conn.GetData()) + >>> from rdkit import RDConfig + >>> from rdkit.Dbase.DbConnection import DbConnect + >>> fName = RDConfig.RDTestDatabase + >>> conn = DbConnect(fName,'simple_combined') + >>> suppl = ForwardDbFpSupplier(conn.GetData()) - we can loop over the supplied fingerprints: - >>> fps = [] - >>> for fp in suppl: - ... fps.append(fp) - >>> len(fps) - 12 - - """ - - def __init__(self, *args, **kwargs): - DbFpSupplier.__init__(self, *args, **kwargs) - self.reset() - - def reset(self): - DbFpSupplier.reset(self) - self._dataIter = iter(self._data) - - def NextItem(self): - """ - - NOTE: this has side effects + we can loop over the supplied fingerprints: + >>> fps = [] + >>> for fp in suppl: + ... fps.append(fp) + >>> len(fps) + 12 """ - try: - d = next(self._dataIter) - except StopIteration: - d = None - if d is not None: - newFp = self._BuildFp(d) - else: - newFp = None - return newFp + + def __init__(self, *args, **kwargs): + DbFpSupplier.__init__(self, *args, **kwargs) + self.reset() + + def reset(self): + DbFpSupplier.reset(self) + self._dataIter = iter(self._data) + + def NextItem(self): + """ + + NOTE: this has side effects + + """ + try: + d = next(self._dataIter) + except StopIteration: + d = None + if d is not None: + newFp = self._BuildFp(d) + else: + newFp = None + return newFp class RandomAccessDbFpSupplier(DbFpSupplier): - """ DbFp supplier supporting random access: - >>> import os.path - >>> from rdkit import RDConfig - >>> from rdkit.Dbase.DbConnection import DbConnect - >>> fName = RDConfig.RDTestDatabase - >>> conn = DbConnect(fName,'simple_combined') - >>> suppl = RandomAccessDbFpSupplier(conn.GetData()) - >>> len(suppl) - 12 + """ DbFp supplier supporting random access: + >>> import os.path + >>> from rdkit import RDConfig + >>> from rdkit.Dbase.DbConnection import DbConnect + >>> fName = RDConfig.RDTestDatabase + >>> conn = DbConnect(fName,'simple_combined') + >>> suppl = RandomAccessDbFpSupplier(conn.GetData()) + >>> len(suppl) + 12 - we can pull individual fingerprints: - >>> fp = suppl[5] - >>> fp.GetNumBits() - 128 - >>> fp.GetNumOnBits() - 54 + we can pull individual fingerprints: + >>> fp = suppl[5] + >>> fp.GetNumBits() + 128 + >>> fp.GetNumOnBits() + 54 - a standard loop over the fingerprints: - >>> fps = [] - >>> for fp in suppl: - ... fps.append(fp) - >>> len(fps) - 12 + a standard loop over the fingerprints: + >>> fps = [] + >>> for fp in suppl: + ... fps.append(fp) + >>> len(fps) + 12 - or we can use an indexed loop: - >>> fps = [None]*len(suppl) - >>> for i in range(len(suppl)): - ... fps[i] = suppl[i] - >>> len(fps) - 12 + or we can use an indexed loop: + >>> fps = [None]*len(suppl) + >>> for i in range(len(suppl)): + ... fps[i] = suppl[i] + >>> len(fps) + 12 - """ + """ - def __init__(self, *args, **kwargs): - DbFpSupplier.__init__(self, *args, **kwargs) - self.reset() + def __init__(self, *args, **kwargs): + DbFpSupplier.__init__(self, *args, **kwargs) + self.reset() - def __len__(self): - return len(self._data) + def __len__(self): + return len(self._data) - def __getitem__(self, idx): - newD = self._data[idx] - return self._BuildFp(newD) + def __getitem__(self, idx): + newD = self._data[idx] + return self._BuildFp(newD) - def reset(self): - self._pos = -1 + def reset(self): + self._pos = -1 - def NextItem(self): - self._pos += 1 - res = None - if self._pos < len(self): - res = self[self._pos] - return res + def NextItem(self): + self._pos += 1 + res = None + if self._pos < len(self): + res = self[self._pos] + return res # ------------------------------------ @@ -180,11 +176,11 @@ class RandomAccessDbFpSupplier(DbFpSupplier): # doctest boilerplate # def _runDoctests(verbose=None): # pragma: nocover - import sys - import doctest - failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) - sys.exit(failed) + import sys + import doctest + failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) + sys.exit(failed) if __name__ == '__main__': # pragma: nocover - _runDoctests() + _runDoctests() diff --git a/rdkit/Chem/Fingerprints/FingerprintMols.py b/rdkit/Chem/Fingerprints/FingerprintMols.py index 9b518c6e0..f19bc6074 100755 --- a/rdkit/Chem/Fingerprints/FingerprintMols.py +++ b/rdkit/Chem/Fingerprints/FingerprintMols.py @@ -21,7 +21,7 @@ Sample Usage: """ -from __future__ import print_function + import getopt import sys @@ -30,7 +30,7 @@ from rdkit import Chem from rdkit import DataStructs from rdkit.Chem import MACCSkeys from rdkit.ML.Cluster import Murtagh -from rdkit.six.moves import cPickle +import pickle def error(msg): @@ -230,7 +230,7 @@ def FingerprintsFromDetails(details, reportFreq=10): if details.outFileName: outF = open(details.outFileName, 'wb+') for i in range(len(fps)): - cPickle.dump(fps[i], outF) + pickle.dump(fps[i], outF) outF.close() dbName = details.outDbName or details.dbName if details.outTableName and dbName: diff --git a/rdkit/Chem/Fingerprints/MolSimilarity.py b/rdkit/Chem/Fingerprints/MolSimilarity.py index b1616adba..66e119b27 100755 --- a/rdkit/Chem/Fingerprints/MolSimilarity.py +++ b/rdkit/Chem/Fingerprints/MolSimilarity.py @@ -26,7 +26,7 @@ from rdkit.Chem.Fingerprints import FingerprintMols, DbFpSupplier from rdkit.DataStructs.TopNContainer import TopNContainer from rdkit.Dbase import DbModule from rdkit.Dbase.DbConnection import DbConnect -from rdkit.six.moves import cPickle +import pickle try: from rdkit.VLib.NodeLib.DbPickleSupplier import _lazyDataSeq as _dataSeq @@ -146,7 +146,7 @@ def GetFingerprints(details): done = 0 while not done: try: - ID, fp = cPickle.load(inF) + ID, fp = pickle.load(inF) except Exception: done = 1 else: diff --git a/rdkit/Chem/Fingerprints/SimilarityScreener.py b/rdkit/Chem/Fingerprints/SimilarityScreener.py index c9baf3d12..932132b69 100755 --- a/rdkit/Chem/Fingerprints/SimilarityScreener.py +++ b/rdkit/Chem/Fingerprints/SimilarityScreener.py @@ -14,165 +14,164 @@ See _SimilarityScreener_ for overview of required API """ from rdkit import DataStructs -from rdkit import six from rdkit.DataStructs import TopNContainer class SimilarityScreener(object): - """ base class + """ base class - important attributes: - probe: the probe fingerprint against which we screen. + important attributes: + probe: the probe fingerprint against which we screen. - metric: a function that takes two arguments and returns a similarity - measure between them + metric: a function that takes two arguments and returns a similarity + measure between them - dataSource: the source pool from which to draw, needs to support - a next() method + dataSource: the source pool from which to draw, needs to support + a next() method - fingerprinter: a function that takes a molecule and returns a - fingerprint of the appropriate format + fingerprinter: a function that takes a molecule and returns a + fingerprint of the appropriate format - **Notes** - subclasses must support either an iterator interface - or __len__ and __getitem__ - """ - - def __init__(self, probe=None, metric=None, dataSource=None, fingerprinter=None): - self.metric = metric - self.dataSource = dataSource - self.fingerprinter = fingerprinter - self.probe = probe - - def Reset(self): - """ used to reset screeners that behave as iterators """ - pass - - # FIX: add setters/getters for attributes - def SetProbe(self, probeFingerprint): - """ sets our probe fingerprint """ - self.probe = probeFingerprint - - def GetSingleFingerprint(self, probe): - """ returns a fingerprint for a single probe object - - This is potentially useful in initializing our internal - probe object. - + **Notes** + subclasses must support either an iterator interface + or __len__ and __getitem__ """ - return self.fingerprinter(probe) + + def __init__(self, probe=None, metric=None, dataSource=None, fingerprinter=None): + self.metric = metric + self.dataSource = dataSource + self.fingerprinter = fingerprinter + self.probe = probe + + def Reset(self): + """ used to reset screeners that behave as iterators """ + pass + + # FIX: add setters/getters for attributes + def SetProbe(self, probeFingerprint): + """ sets our probe fingerprint """ + self.probe = probeFingerprint + + def GetSingleFingerprint(self, probe): + """ returns a fingerprint for a single probe object + + This is potentially useful in initializing our internal + probe object. + + """ + return self.fingerprinter(probe) class ThresholdScreener(SimilarityScreener): - """ Used to return all compounds that have a similarity - to the probe beyond a threshold value + """ Used to return all compounds that have a similarity + to the probe beyond a threshold value - **Notes**: + **Notes**: - - This is as lazy as possible, so the data source isn't - queried until the client asks for a hit. + - This is as lazy as possible, so the data source isn't + queried until the client asks for a hit. - - In addition to being lazy, this class is as thin as possible. - (Who'd have thought it was possible!) - Hits are *not* stored locally, so if a client resets - the iteration and starts over, the same amount of work must - be done to retrieve the hits. + - In addition to being lazy, this class is as thin as possible. + (Who'd have thought it was possible!) + Hits are *not* stored locally, so if a client resets + the iteration and starts over, the same amount of work must + be done to retrieve the hits. - - The thinness and laziness forces us to support only forward - iteration (not random access) + - The thinness and laziness forces us to support only forward + iteration (not random access) - """ - - def __init__(self, threshold, **kwargs): - SimilarityScreener.__init__(self, **kwargs) - self.threshold = threshold - self.dataIter = iter(self.dataSource) - # FIX: add setters/getters for attributes - - def _nextMatch(self): - """ *Internal use only* """ - done = 0 - res = None - sim = 0 - while not done: - # this is going to crap out when the data source iterator finishes, - # that's how we stop when no match is found - obj = six.next(self.dataIter) - fp = self.fingerprinter(obj) - sim = DataStructs.FingerprintSimilarity(fp, self.probe, self.metric) - if sim >= self.threshold: - res = obj - done = 1 - return sim, res - - def Reset(self): - """ used to reset our internal state so that iteration - starts again from the beginning """ - self.dataSource.reset() - self.dataIter = iter(self.dataSource) - def __iter__(self): - """ returns an iterator for this screener - """ - self.Reset() - return self + def __init__(self, threshold, **kwargs): + SimilarityScreener.__init__(self, **kwargs) + self.threshold = threshold + self.dataIter = iter(self.dataSource) + # FIX: add setters/getters for attributes - def next(self): - """ required part of iterator interface """ - return self._nextMatch() + def _nextMatch(self): + """ *Internal use only* """ + done = 0 + res = None + sim = 0 + while not done: + # this is going to crap out when the data source iterator finishes, + # that's how we stop when no match is found + obj = next(self.dataIter) + fp = self.fingerprinter(obj) + sim = DataStructs.FingerprintSimilarity(fp, self.probe, self.metric) + if sim >= self.threshold: + res = obj + done = 1 + return sim, res - __next__ = next + def Reset(self): + """ used to reset our internal state so that iteration + starts again from the beginning + """ + self.dataSource.reset() + self.dataIter = iter(self.dataSource) + + def __iter__(self): + """ returns an iterator for this screener + """ + self.Reset() + return self + + def next(self): + """ required part of iterator interface """ + return self._nextMatch() + + __next__ = next class TopNScreener(SimilarityScreener): - """ A screener that only returns the top N hits found + """ A screener that only returns the top N hits found - **Notes** + **Notes** - - supports forward iteration and getitem + - supports forward iteration and getitem - """ + """ - def __init__(self, num, **kwargs): - SimilarityScreener.__init__(self, **kwargs) - self.numToGet = num - self.topN = None - self._pos = 0 + def __init__(self, num, **kwargs): + SimilarityScreener.__init__(self, **kwargs) + self.numToGet = num + self.topN = None + self._pos = 0 - def Reset(self): - self._pos = 0 + def Reset(self): + self._pos = 0 - def __iter__(self): - if self.topN is None: - self._initTopN() - self.Reset() - return self + def __iter__(self): + if self.topN is None: + self._initTopN() + self.Reset() + return self - def next(self): - if self._pos >= self.numToGet: - raise StopIteration - else: - res = self.topN[self._pos] - self._pos += 1 - return res + def next(self): + if self._pos >= self.numToGet: + raise StopIteration + else: + res = self.topN[self._pos] + self._pos += 1 + return res - __next__ = next + __next__ = next - def _initTopN(self): - self.topN = TopNContainer.TopNContainer(self.numToGet) - for obj in self.dataSource: - fp = self.fingerprinter(obj) - sim = DataStructs.FingerprintSimilarity(fp, self.probe, self.metric) - self.topN.Insert(sim, obj) + def _initTopN(self): + self.topN = TopNContainer.TopNContainer(self.numToGet) + for obj in self.dataSource: + fp = self.fingerprinter(obj) + sim = DataStructs.FingerprintSimilarity(fp, self.probe, self.metric) + self.topN.Insert(sim, obj) - def __len__(self): - if self.topN is None: - self._initTopN() - return self.numToGet + def __len__(self): + if self.topN is None: + self._initTopN() + return self.numToGet - def __getitem__(self, idx): - if self.topN is None: - self._initTopN() - return self.topN[idx] + def __getitem__(self, idx): + if self.topN is None: + self._initTopN() + return self.topN[idx] diff --git a/rdkit/Chem/Fingerprints/UnitTestDbFpSupplier.py b/rdkit/Chem/Fingerprints/UnitTestDbFpSupplier.py index 63b9832e0..51876fb8d 100644 --- a/rdkit/Chem/Fingerprints/UnitTestDbFpSupplier.py +++ b/rdkit/Chem/Fingerprints/UnitTestDbFpSupplier.py @@ -8,7 +8,7 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -from __future__ import print_function + import doctest import unittest diff --git a/rdkit/Chem/FunctionalGroups.py b/rdkit/Chem/FunctionalGroups.py index 582aa150d..953a7f241 100644 --- a/rdkit/Chem/FunctionalGroups.py +++ b/rdkit/Chem/FunctionalGroups.py @@ -36,38 +36,38 @@ import weakref from rdkit import Chem from rdkit import RDConfig -from rdkit.six.moves import cStringIO as StringIO +from io import StringIO class FGHierarchyNode(object): - children = None - name = "" - label = "" - pattern = None - smarts = "" - rxnSmarts = "" - parent = None - removalReaction = None + children = None + name = "" + label = "" + pattern = None + smarts = "" + rxnSmarts = "" + parent = None + removalReaction = None - def __init__(self, name, patt, smarts="", label="", rxnSmarts="", parent=None): - self.name = name - self.pattern = patt - if parent: - self.parent = weakref.ref(parent) - self.label = label - self.smarts = smarts - self.children = [] - self.rxnSmarts = rxnSmarts + def __init__(self, name, patt, smarts="", label="", rxnSmarts="", parent=None): + self.name = name + self.pattern = patt + if parent: + self.parent = weakref.ref(parent) + self.label = label + self.smarts = smarts + self.children = [] + self.rxnSmarts = rxnSmarts - def __len__(self): - res = 1 - for child in self.children: - res += len(child) - return res + def __len__(self): + res = 1 + for child in self.children: + res += len(child) + return res class FuncGroupFileParseError(ValueError): - pass + pass groupDefns = {} @@ -77,92 +77,95 @@ lastFilename = None def BuildFuncGroupHierarchy(fileNm=None, data=None, force=False): - global groupDefns, hierarchy, lastData, lastFilename - if (not force and hierarchy and (not data or data == lastData) and - (not fileNm or fileNm == lastFilename)): - return hierarchy[:] - lastData = data - splitter = re.compile('\t+') + global groupDefns, hierarchy, lastData, lastFilename + if (not force and hierarchy and (not data or data == lastData) + and (not fileNm or fileNm == lastFilename)): + return hierarchy[:] + lastData = data + splitter = re.compile('\t+') - if not fileNm and not data: - fileNm = os.path.join(RDConfig.RDDataDir, 'Functional_Group_Hierarchy.txt') + if not fileNm and not data: + fileNm = os.path.join(RDConfig.RDDataDir, 'Functional_Group_Hierarchy.txt') - if fileNm: - inF = open(fileNm, 'r') - lastFilename = fileNm - elif data: - inF = StringIO(data) - else: - raise ValueError("need data or filename") - - groupDefns = {} - res = [] - for lineNo, line in enumerate(inF.readlines(), 1): - line = line.strip() - line = line.split('//')[0] - if not line: - continue - splitL = splitter.split(line) - if len(splitL) < 3: - raise FuncGroupFileParseError("Input line %d (%s) is not long enough." % (lineNo, repr(line))) - label = splitL[0].strip() - if label in groupDefns: - raise FuncGroupFileParseError("Duplicate label on line %d." % lineNo) - labelHierarchy = label.split('.') - if len(labelHierarchy) > 1: - for i in range(len(labelHierarchy) - 1): - tmp = '.'.join(labelHierarchy[:i + 1]) - if tmp not in groupDefns: - raise FuncGroupFileParseError("Hierarchy member %s (line %d) not found." % (tmp, lineNo)) - parent = groupDefns['.'.join(labelHierarchy[:-1])] + if fileNm: + inF = open(fileNm, 'r') + lastFilename = fileNm + elif data: + inF = StringIO(data) else: - parent = None - smarts = splitL[1] - patt = Chem.MolFromSmarts(smarts) - if not patt: - raise FuncGroupFileParseError('Smarts "%s" (line %d) could not be parsed.' % (smarts, lineNo)) + raise ValueError("need data or filename") - name = splitL[2].strip() + groupDefns = {} + res = [] + for lineNo, line in enumerate(inF.readlines(), 1): + line = line.strip() + line = line.split('//')[0] + if not line: + continue + splitL = splitter.split(line) + if len(splitL) < 3: + raise FuncGroupFileParseError( + "Input line %d (%s) is not long enough." % (lineNo, repr(line))) + label = splitL[0].strip() + if label in groupDefns: + raise FuncGroupFileParseError("Duplicate label on line %d." % lineNo) + labelHierarchy = label.split('.') + if len(labelHierarchy) > 1: + for i in range(len(labelHierarchy) - 1): + tmp = '.'.join(labelHierarchy[:i + 1]) + if tmp not in groupDefns: + raise FuncGroupFileParseError( + "Hierarchy member %s (line %d) not found." % (tmp, lineNo)) + parent = groupDefns['.'.join(labelHierarchy[:-1])] + else: + parent = None + smarts = splitL[1] + patt = Chem.MolFromSmarts(smarts) + if not patt: + raise FuncGroupFileParseError( + 'Smarts "%s" (line %d) could not be parsed.' % (smarts, lineNo)) - rxnSmarts = '' - if len(splitL) > 3: - rxnSmarts = splitL[3] + name = splitL[2].strip() - node = FGHierarchyNode(name, patt, smarts=smarts, label=label, parent=parent, - rxnSmarts=rxnSmarts) - if parent: - parent.children.append(node) - else: - res.append(node) - groupDefns[label] = node - hierarchy = res[:] - return res + rxnSmarts = '' + if len(splitL) > 3: + rxnSmarts = splitL[3] + + node = FGHierarchyNode(name, patt, smarts=smarts, label=label, parent=parent, + rxnSmarts=rxnSmarts) + if parent: + parent.children.append(node) + else: + res.append(node) + groupDefns[label] = node + hierarchy = res[:] + return res def _SetNodeBits(mol, node, res, idx): - ms = mol.GetSubstructMatches(node.pattern) - count = 0 - seen = {} - for m in ms: - if m[0] not in seen: - count += 1 - seen[m[0]] = 1 - if count: - res[idx] = count - idx += 1 - for child in node.children: - idx = _SetNodeBits(mol, child, res, idx) - else: - idx += len(node) - return idx + ms = mol.GetSubstructMatches(node.pattern) + count = 0 + seen = {} + for m in ms: + if m[0] not in seen: + count += 1 + seen[m[0]] = 1 + if count: + res[idx] = count + idx += 1 + for child in node.children: + idx = _SetNodeBits(mol, child, res, idx) + else: + idx += len(node) + return idx def CreateMolFingerprint(mol, hierarchy): - totL = 0 - for entry in hierarchy: - totL += len(entry) - res = [0] * totL - idx = 0 - for entry in hierarchy: - idx = _SetNodeBits(mol, entry, res, idx) - return res + totL = 0 + for entry in hierarchy: + totL += len(entry) + res = [0] * totL + idx = 0 + for entry in hierarchy: + idx = _SetNodeBits(mol, entry, res, idx) + return res diff --git a/rdkit/Chem/GraphDescriptors.py b/rdkit/Chem/GraphDescriptors.py index 49e825386..997dbff1e 100755 --- a/rdkit/Chem/GraphDescriptors.py +++ b/rdkit/Chem/GraphDescriptors.py @@ -13,7 +13,7 @@ """ -from __future__ import print_function + from rdkit import Chem from rdkit.Chem import Graphs from rdkit.Chem import rdchem diff --git a/rdkit/Chem/Graphs.py b/rdkit/Chem/Graphs.py index 61d3e822b..e4566f7f5 100755 --- a/rdkit/Chem/Graphs.py +++ b/rdkit/Chem/Graphs.py @@ -17,36 +17,35 @@ C/C++ codebase. import numpy from rdkit import Chem from rdkit import DataStructs -from rdkit.six.moves import xrange import types def CharacteristicPolynomial(mol, mat=None): - """ calculates the characteristic polynomial for a molecular graph + """ calculates the characteristic polynomial for a molecular graph - if mat is not passed in, the molecule's Weighted Adjacency Matrix will - be used. + if mat is not passed in, the molecule's Weighted Adjacency Matrix will + be used. - The approach used is the Le Verrier-Faddeev-Frame method described - in _Chemical Graph Theory, 2nd Edition_ by Nenad Trinajstic (CRC Press, - 1992), pg 76. - - """ - nAtoms = mol.GetNumAtoms() - if mat is None: - # FIX: complete this: - #A = mol.GetWeightedAdjacencyMatrix() - pass - else: - A = mat - I = 1. * numpy.identity(nAtoms) - An = A - res = numpy.zeros(nAtoms + 1, numpy.float) - res[0] = 1.0 - for n in xrange(1, nAtoms + 1): - res[n] = 1. / n * numpy.trace(An) - Bn = An - res[n] * I - An = numpy.dot(A, Bn) + The approach used is the Le Verrier-Faddeev-Frame method described + in _Chemical Graph Theory, 2nd Edition_ by Nenad Trinajstic (CRC Press, + 1992), pg 76. - res[1:] *= -1 - return res + """ + nAtoms = mol.GetNumAtoms() + if mat is None: + # FIX: complete this: + #A = mol.GetWeightedAdjacencyMatrix() + pass + else: + A = mat + I = 1. * numpy.identity(nAtoms) + An = A + res = numpy.zeros(nAtoms + 1, numpy.float) + res[0] = 1.0 + for n in range(1, nAtoms + 1): + res[n] = 1. / n * numpy.trace(An) + Bn = An - res[n] * I + An = numpy.dot(A, Bn) + + res[1:] *= -1 + return res diff --git a/rdkit/Chem/MACCSkeys.py b/rdkit/Chem/MACCSkeys.py index e662e7ed9..e65b15f92 100755 --- a/rdkit/Chem/MACCSkeys.py +++ b/rdkit/Chem/MACCSkeys.py @@ -30,7 +30,7 @@ Rev history: May 2011 (gl): Update some definitions based on feedback from Andrew Dalke """ -from __future__ import print_function + from rdkit import Chem from rdkit.Chem import rdMolDescriptors from rdkit import DataStructs diff --git a/rdkit/Chem/MolDb/FingerprintUtils.py b/rdkit/Chem/MolDb/FingerprintUtils.py index 4e3c097d4..6392ec55a 100644 --- a/rdkit/Chem/MolDb/FingerprintUtils.py +++ b/rdkit/Chem/MolDb/FingerprintUtils.py @@ -3,9 +3,8 @@ # Copyright (C) 2009 Greg Landrum # All Rights Reserved # -from __future__ import print_function -from rdkit.six.moves import cPickle -from rdkit.six import iterkeys + +import pickle from rdkit import DataStructs, Chem from rdkit import Chem @@ -18,114 +17,114 @@ similarityMethods = { 'Morgan': DataStructs.UIntSparseIntVect, 'Avalon': DataStructs.ExplicitBitVect, } -supportedSimilarityMethods = list(iterkeys(similarityMethods)) +supportedSimilarityMethods = list(iter(similarityMethods)) class LayeredOptions: - loadLayerFlags = 0xFFFFFFFF - searchLayerFlags = 0x7 - minPath = 1 - maxPath = 6 - fpSize = 1024 - wordSize = 32 - nWords = fpSize // wordSize + loadLayerFlags = 0xFFFFFFFF + searchLayerFlags = 0x7 + minPath = 1 + maxPath = 6 + fpSize = 1024 + wordSize = 32 + nWords = fpSize // wordSize - @staticmethod - def GetFingerprint(mol, query=True): - if query: - flags = LayeredOptions.searchLayerFlags - else: - flags = LayeredOptions.loadLayerFlags - return Chem.LayeredFingerprint(mol, layerFlags=flags, minPath=LayeredOptions.minPath, - maxPath=LayeredOptions.maxPath, fpSize=LayeredOptions.fpSize) + @staticmethod + def GetFingerprint(mol, query=True): + if query: + flags = LayeredOptions.searchLayerFlags + else: + flags = LayeredOptions.loadLayerFlags + return Chem.LayeredFingerprint(mol, layerFlags=flags, minPath=LayeredOptions.minPath, + maxPath=LayeredOptions.maxPath, fpSize=LayeredOptions.fpSize) - @staticmethod - def GetWords(mol, query=True): - txt = LayeredOptions.GetFingerprint(mol, query=query).ToBitString() - words = [int(txt[x:x + 32], 2) for x in range(0, len(txt), 32)] - return words + @staticmethod + def GetWords(mol, query=True): + txt = LayeredOptions.GetFingerprint(mol, query=query).ToBitString() + words = [int(txt[x:x + 32], 2) for x in range(0, len(txt), 32)] + return words - @staticmethod - def GetQueryText(mol, query=True): - words = LayeredOptions.GetWords(mol, query=query) - colqs = [] - for idx, word in enumerate(words): - if not word: - continue - idx = idx + 1 - colqs.append('%(word)d&Col_%(idx)d=%(word)d' % locals()) - return ' and '.join(colqs) + @staticmethod + def GetQueryText(mol, query=True): + words = LayeredOptions.GetWords(mol, query=query) + colqs = [] + for idx, word in enumerate(words): + if not word: + continue + idx = idx + 1 + colqs.append('%(word)d&Col_%(idx)d=%(word)d' % locals()) + return ' and '.join(colqs) def BuildSigFactory(options=None, fdefFile=None, bins=[(2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 100)], skipFeats=('LumpedHydrophobe', 'ZnBinder')): - if options: - fdefFile = options.fdefFile - if not fdefFile: - raise ValueError('bad fdef file') - from rdkit.Chem import ChemicalFeatures - from rdkit.Chem.Pharm2D import SigFactory - featFactory = ChemicalFeatures.BuildFeatureFactory(fdefFile) - sigFactory = SigFactory.SigFactory(featFactory, skipFeats=skipFeats, trianglePruneBins=False) - sigFactory.SetBins(bins) - return sigFactory + if options: + fdefFile = options.fdefFile + if not fdefFile: + raise ValueError('bad fdef file') + from rdkit.Chem import ChemicalFeatures + from rdkit.Chem.Pharm2D import SigFactory + featFactory = ChemicalFeatures.BuildFeatureFactory(fdefFile) + sigFactory = SigFactory.SigFactory(featFactory, skipFeats=skipFeats, trianglePruneBins=False) + sigFactory.SetBins(bins) + return sigFactory def BuildAtomPairFP(mol): - from rdkit.Chem.AtomPairs import Pairs - fp = Pairs.GetAtomPairFingerprintAsIntVect(mol) - fp._sumCache = fp.GetTotalVal() - return fp + from rdkit.Chem.AtomPairs import Pairs + fp = Pairs.GetAtomPairFingerprintAsIntVect(mol) + fp._sumCache = fp.GetTotalVal() + return fp def BuildTorsionsFP(mol): - from rdkit.Chem.AtomPairs import Torsions - fp = Torsions.GetTopologicalTorsionFingerprintAsIntVect(mol) - fp._sumCache = fp.GetTotalVal() - return fp + from rdkit.Chem.AtomPairs import Torsions + fp = Torsions.GetTopologicalTorsionFingerprintAsIntVect(mol) + fp._sumCache = fp.GetTotalVal() + return fp def BuildRDKitFP(mol): - fp = Chem.RDKFingerprint(mol, nBitsPerHash=1) - return fp + fp = Chem.RDKFingerprint(mol, nBitsPerHash=1) + return fp def BuildPharm2DFP(mol): - global sigFactory - from rdkit.Chem.Pharm2D import Generate - try: - fp = Generate.Gen2DFingerprint(mol, sigFactory) - except IndexError: - print('FAIL:', Chem.MolToSmiles(mol, True)) - raise - return fp + global sigFactory + from rdkit.Chem.Pharm2D import Generate + try: + fp = Generate.Gen2DFingerprint(mol, sigFactory) + except IndexError: + print('FAIL:', Chem.MolToSmiles(mol, True)) + raise + return fp def BuildMorganFP(mol): - from rdkit.Chem import rdMolDescriptors - fp = rdMolDescriptors.GetMorganFingerprint(mol, 2) - fp._sumCache = fp.GetTotalVal() - return fp + from rdkit.Chem import rdMolDescriptors + fp = rdMolDescriptors.GetMorganFingerprint(mol, 2) + fp._sumCache = fp.GetTotalVal() + return fp def BuildAvalonFP(mol, smiles=None): - from rdkit.Avalon import pyAvalonTools - if smiles is None: - fp = pyAvalonTools.GetAvalonFP(mol) - else: - fp = pyAvalonTools.GetAvalonFP(smiles, True) - return fp + from rdkit.Avalon import pyAvalonTools + if smiles is None: + fp = pyAvalonTools.GetAvalonFP(mol) + else: + fp = pyAvalonTools.GetAvalonFP(smiles, True) + return fp def DepickleFP(pkl, similarityMethod): - if not isinstance(pkl, (bytes, str)): - pkl = str(pkl) - try: - klass = similarityMethods[similarityMethod] - fp = klass(pkl) - except Exception: - import traceback - traceback.print_exc() - fp = cPickle.loads(pkl) - return fp + if not isinstance(pkl, (bytes, str)): + pkl = str(pkl) + try: + klass = similarityMethods[similarityMethod] + fp = klass(pkl) + except Exception: + import traceback + traceback.print_exc() + fp = pickle.loads(pkl) + return fp diff --git a/rdkit/Chem/MolStandardize/__init__.py b/rdkit/Chem/MolStandardize/__init__.py index 9b7348602..526876707 100644 --- a/rdkit/Chem/MolStandardize/__init__.py +++ b/rdkit/Chem/MolStandardize/__init__.py @@ -11,9 +11,7 @@ Note that the C++ reimplementation of this is available in the module rdkit.Chem :license: MIT, see LICENSE file for more details. """ -from __future__ import print_function -from __future__ import unicode_literals -from __future__ import division + import logging from .standardize import Standardizer, standardize_smiles, enumerate_tautomers_smiles, canonicalize_tautomer_smiles diff --git a/rdkit/Chem/MolStandardize/charge.py b/rdkit/Chem/MolStandardize/charge.py index a0a8dc27f..837cac6eb 100644 --- a/rdkit/Chem/MolStandardize/charge.py +++ b/rdkit/Chem/MolStandardize/charge.py @@ -11,9 +11,7 @@ which attempts to neutralize ionized acids and bases on a molecule. :license: MIT, see LICENSE file for more details. """ -from __future__ import print_function -from __future__ import unicode_literals -from __future__ import division + import copy import logging @@ -177,7 +175,8 @@ class Reionizer(object): for cc in self.charge_corrections: for match in mol.GetSubstructMatches(cc.smarts): atom = mol.GetAtomWithIdx(match[0]) - log.info('Applying charge correction %s (%s %+d)', cc.name, atom.GetSymbol(), cc.charge) + log.info('Applying charge correction %s (%s %+d)', + cc.name, atom.GetSymbol(), cc.charge) atom.SetFormalCharge(cc.charge) current_charge = Chem.GetFormalCharge(mol) @@ -189,7 +188,8 @@ class Reionizer(object): ppos, poccur = self._strongest_protonated(mol) if ppos is None: break - log.info('Ionizing %s to balance previous charge corrections', self.acid_base_pairs[ppos].name) + log.info('Ionizing %s to balance previous charge corrections', + self.acid_base_pairs[ppos].name) patom = mol.GetAtomWithIdx(poccur[-1]) patom.SetFormalCharge(patom.GetFormalCharge() - 1) if patom.GetNumExplicitHs() > 0: @@ -210,11 +210,13 @@ class Reionizer(object): key = tuple(sorted([poccur[-1], ioccur[-1]])) if key in already_moved: - log.warning('Aborting reionization to avoid infinite loop due to it being ambiguous where to put a Hydrogen') + log.warning( + 'Aborting reionization to avoid infinite loop due to it being ambiguous where to put a Hydrogen') break already_moved.add(key) - log.info('Moved proton from %s to %s', self.acid_base_pairs[ppos].name, self.acid_base_pairs[ipos].name) + log.info('Moved proton from %s to %s', + self.acid_base_pairs[ppos].name, self.acid_base_pairs[ipos].name) # Remove hydrogen from strongest protonated patom = mol.GetAtomWithIdx(poccur[-1]) diff --git a/rdkit/Chem/MolStandardize/errors.py b/rdkit/Chem/MolStandardize/errors.py index b9c927da4..060ee8dfc 100644 --- a/rdkit/Chem/MolStandardize/errors.py +++ b/rdkit/Chem/MolStandardize/errors.py @@ -9,10 +9,6 @@ This module contains exceptions that are raised by MolVS. :license: MIT, see LICENSE file for more details. """ -from __future__ import print_function -from __future__ import unicode_literals -from __future__ import division - class MolVSError(Exception): pass diff --git a/rdkit/Chem/MolStandardize/fragment.py b/rdkit/Chem/MolStandardize/fragment.py index 2a180437d..4ae6eab0d 100644 --- a/rdkit/Chem/MolStandardize/fragment.py +++ b/rdkit/Chem/MolStandardize/fragment.py @@ -11,9 +11,7 @@ This module contains tools for dealing with molecules with more than one covalen :license: MIT, see LICENSE file for more details. """ -from __future__ import print_function -from __future__ import unicode_literals -from __future__ import division + import logging from rdkit import Chem @@ -51,7 +49,7 @@ class FragmentPattern(object): #: The default list of :class:`FragmentPatterns ` to be used by #: :class:`~molvs.fragment.FragmentRemover`. REMOVE_FRAGMENTS = ( - FragmentPattern('hydrogen', '[H]'), + FragmentPattern('hydrogen', '[H]'), FragmentPattern('fluorine', '[F]'), FragmentPattern('chlorine', '[Cl]'), FragmentPattern('bromine', '[Br]'), @@ -76,7 +74,8 @@ REMOVE_FRAGMENTS = ( FragmentPattern('hexafluorophosphate', '[P](-[#9])(-[#9])(-[#9])(-[#9])(-[#9])-[#9]'), FragmentPattern('sulfate', '[S](=[#8])(=[#8])(-[#8])-[#8]'), FragmentPattern('methyl sulfonate', '[#6]-[S](=[#8])(=[#8])(-[#8])'), - FragmentPattern('trifluoromethanesulfonic acid', '[#8]-[S](=[#8])(=[#8])-[#6](-[#9])(-[#9])-[#9]'), + FragmentPattern('trifluoromethanesulfonic acid', + '[#8]-[S](=[#8])(=[#8])-[#6](-[#9])(-[#9])-[#9]'), FragmentPattern('trifluoroacetic acid', '[#9]-[#6](-[#9])(-[#9])-[#6](=[#8])-[#8]'), FragmentPattern('1,2-dichloroethane', '[Cl]-[#6]-[#6]-[Cl]'), FragmentPattern('1,2-dimethoxyethane', '[#6]-[#8]-[#6]-[#6]-[#8]-[#6]'), @@ -241,5 +240,6 @@ class LargestFragmentChooser(object): continue # Otherwise this is the largest so far log.debug('New largest fragment: %s (%s)', smiles, atoms) - largest = {'smiles': smiles, 'fragment': f, 'atoms': atoms, 'weight': weight, 'organic': organic} + largest = {'smiles': smiles, 'fragment': f, + 'atoms': atoms, 'weight': weight, 'organic': organic} return largest['fragment'] diff --git a/rdkit/Chem/MolStandardize/metal.py b/rdkit/Chem/MolStandardize/metal.py index 8b6b3ee03..1846792a8 100644 --- a/rdkit/Chem/MolStandardize/metal.py +++ b/rdkit/Chem/MolStandardize/metal.py @@ -9,9 +9,7 @@ This module contains tools for disconnecting metal atoms that are defined as cov :license: MIT, see LICENSE file for more details. """ -from __future__ import print_function -from __future__ import unicode_literals -from __future__ import division + import logging from rdkit import Chem @@ -30,8 +28,10 @@ class MetalDisconnector(object): log.debug('Initializing MetalDisconnector') # Initialize SMARTS to identify relevant substructures # TODO: Use atomic numbers instead of element symbols in SMARTS to allow for isotopes? - self._metal_nof = Chem.MolFromSmarts('[Li,Na,K,Rb,Cs,Fr,Be,Mg,Ca,Sr,Ba,Ra,Sc,Ti,V,Cr,Mn,Fe,Co,Ni,Cu,Zn,Al,Ga,Y,Zr,Nb,Mo,Tc,Ru,Rh,Pd,Ag,Cd,In,Sn,Hf,Ta,W,Re,Os,Ir,Pt,Au,Hg,Tl,Pb,Bi]~[N,O,F]') - self._metal_non = Chem.MolFromSmarts('[Al,Sc,Ti,V,Cr,Mn,Fe,Co,Ni,Cu,Zn,Y,Zr,Nb,Mo,Tc,Ru,Rh,Pd,Ag,Cd,Hf,Ta,W,Re,Os,Ir,Pt,Au]~[B,C,Si,P,As,Sb,S,Se,Te,Cl,Br,I,At]') + self._metal_nof = Chem.MolFromSmarts( + '[Li,Na,K,Rb,Cs,Fr,Be,Mg,Ca,Sr,Ba,Ra,Sc,Ti,V,Cr,Mn,Fe,Co,Ni,Cu,Zn,Al,Ga,Y,Zr,Nb,Mo,Tc,Ru,Rh,Pd,Ag,Cd,In,Sn,Hf,Ta,W,Re,Os,Ir,Pt,Au,Hg,Tl,Pb,Bi]~[N,O,F]') + self._metal_non = Chem.MolFromSmarts( + '[Al,Sc,Ti,V,Cr,Mn,Fe,Co,Ni,Cu,Zn,Y,Zr,Nb,Mo,Tc,Ru,Rh,Pd,Ag,Cd,Hf,Ta,W,Re,Os,Ir,Pt,Au]~[B,C,Si,P,As,Sb,S,Se,Te,Cl,Br,I,At]') def __call__(self, mol): """Calling a MetalDisconnector instance like a function is the same as calling its disconnect(mol) method.""" @@ -69,6 +69,7 @@ class MetalDisconnector(object): atom1.SetFormalCharge(atom1.GetFormalCharge() + chg) atom2 = mol.GetAtomWithIdx(j) atom2.SetFormalCharge(atom2.GetFormalCharge() - chg) - log.info('Removed covalent bond between %s and %s', atom1.GetSymbol(), atom2.GetSymbol()) + log.info('Removed covalent bond between %s and %s', + atom1.GetSymbol(), atom2.GetSymbol()) Chem.SanitizeMol(mol) return mol diff --git a/rdkit/Chem/MolStandardize/normalize.py b/rdkit/Chem/MolStandardize/normalize.py index 7eb40712a..10c8ef4af 100644 --- a/rdkit/Chem/MolStandardize/normalize.py +++ b/rdkit/Chem/MolStandardize/normalize.py @@ -9,14 +9,11 @@ This module contains tools for normalizing molecules using reaction SMARTS patte :license: MIT, see LICENSE file for more details. """ -from __future__ import print_function -from __future__ import unicode_literals -from __future__ import division + import logging from rdkit import Chem from rdkit.Chem import AllChem -import six from .utils import memoized_property @@ -51,32 +48,46 @@ class Normalization(object): #: The default list of Normalization transforms. NORMALIZATIONS = ( # Opposite of #2.1 in InChI technical manual? Covered by RDKit Sanitization. - Normalization('Nitro to N+(O-)=O', '[N,P,As,Sb;X3:1](=[O,S,Se,Te:2])=[O,S,Se,Te:3]>>[*+1:1]([*-1:2])=[*:3]'), + Normalization('Nitro to N+(O-)=O', + '[N,P,As,Sb;X3:1](=[O,S,Se,Te:2])=[O,S,Se,Te:3]>>[*+1:1]([*-1:2])=[*:3]'), Normalization('Sulfone to S(=O)(=O)', '[S+2:1]([O-:2])([O-:3])>>[S+0:1](=[O-0:2])(=[O-0:3])'), Normalization('Pyridine oxide to n+O-', '[n:1]=[O:2]>>[n+:1][O-:2]'), Normalization('Azide to N=N+=N-', '[*,H:1][N:2]=[N:3]#[N:4]>>[*,H:1][N:2]=[N+:3]=[N-:4]'), Normalization('Diazo/azo to =N+=N-', '[*:1]=[N:2]#[N:3]>>[*:1]=[N+:2]=[N-:3]'), - Normalization('Sulfoxide to -S+(O-)-', '[!O:1][S+0;X3:2](=[O:3])[!O:4]>>[*:1][S+1:2]([O-:3])[*:4]'), + Normalization('Sulfoxide to -S+(O-)-', + '[!O:1][S+0;X3:2](=[O:3])[!O:4]>>[*:1][S+1:2]([O-:3])[*:4]'), # Equivalent to #1.5 in InChI technical manual - Normalization('Phosphate to P(O-)=O', '[O,S,Se,Te;-1:1][P+;D4:2][O,S,Se,Te;-1:3]>>[*+0:1]=[P+0;D5:2][*-1:3]'), + Normalization('Phosphate to P(O-)=O', + '[O,S,Se,Te;-1:1][P+;D4:2][O,S,Se,Te;-1:3]>>[*+0:1]=[P+0;D5:2][*-1:3]'), # Equivalent to #1.8 in InChI technical manual Normalization('C/S+N to C/S=N+', '[C,S;X3+1:1]([NX3:2])[NX3!H0:3]>>[*+0:1]([N:2])=[N+:3]'), # Equivalent to #1.8 in InChI technical manual Normalization('P+N to P=N+', '[P;X4+1:1]([NX3:2])[NX3!H0:3]>>[*+0:1]([N:2])=[N+:3]'), - Normalization('Normalize hydrazine-diazonium', '[CX4:1][NX3H:2]-[NX3H:3][CX4:4][NX2+:5]#[NX1:6]>>[CX4:1][NH0:2]=[NH+:3][C:4][N+0:5]=[NH:6]'), + Normalization('Normalize hydrazine-diazonium', + '[CX4:1][NX3H:2]-[NX3H:3][CX4:4][NX2+:5]#[NX1:6]>>[CX4:1][NH0:2]=[NH+:3][C:4][N+0:5]=[NH:6]'), # Equivalent to #1.3 in InChI technical manual - Normalization('Recombine 1,3-separated charges', '[N,P,As,Sb,O,S,Se,Te;-1:1]-[A+0:2]=[N,P,As,Sb,O,S,Se,Te;+1:3]>>[*-0:1]=[*:2]-[*+0:3]'), - Normalization('Recombine 1,3-separated charges', '[n,o,p,s;-1:1]:[a:2]=[N,O,P,S;+1:3]>>[*-0:1]:[*:2]-[*+0:3]'), - Normalization('Recombine 1,3-separated charges', '[N,O,P,S;-1:1]-[a:2]:[n,o,p,s;+1:3]>>[*-0:1]=[*:2]:[*+0:3]'), - Normalization('Recombine 1,5-separated charges', '[N,P,As,Sb,O,S,Se,Te;-1:1]-[A+0:2]=[A:3]-[A:4]=[N,P,As,Sb,O,S,Se,Te;+1:5]>>[*-0:1]=[*:2]-[*:3]=[*:4]-[*+0:5]'), - Normalization('Recombine 1,5-separated charges', '[n,o,p,s;-1:1]:[a:2]:[a:3]:[c:4]=[N,O,P,S;+1:5]>>[*-0:1]:[*:2]:[*:3]:[c:4]-[*+0:5]'), - Normalization('Recombine 1,5-separated charges', '[N,O,P,S;-1:1]-[c:2]:[a:3]:[a:4]:[n,o,p,s;+1:5]>>[*-0:1]=[c:2]:[*:3]:[*:4]:[*+0:5]'), + Normalization('Recombine 1,3-separated charges', + '[N,P,As,Sb,O,S,Se,Te;-1:1]-[A+0:2]=[N,P,As,Sb,O,S,Se,Te;+1:3]>>[*-0:1]=[*:2]-[*+0:3]'), + Normalization('Recombine 1,3-separated charges', + '[n,o,p,s;-1:1]:[a:2]=[N,O,P,S;+1:3]>>[*-0:1]:[*:2]-[*+0:3]'), + Normalization('Recombine 1,3-separated charges', + '[N,O,P,S;-1:1]-[a:2]:[n,o,p,s;+1:3]>>[*-0:1]=[*:2]:[*+0:3]'), + Normalization('Recombine 1,5-separated charges', + '[N,P,As,Sb,O,S,Se,Te;-1:1]-[A+0:2]=[A:3]-[A:4]=[N,P,As,Sb,O,S,Se,Te;+1:5]>>[*-0:1]=[*:2]-[*:3]=[*:4]-[*+0:5]'), + Normalization('Recombine 1,5-separated charges', + '[n,o,p,s;-1:1]:[a:2]:[a:3]:[c:4]=[N,O,P,S;+1:5]>>[*-0:1]:[*:2]:[*:3]:[c:4]-[*+0:5]'), + Normalization('Recombine 1,5-separated charges', + '[N,O,P,S;-1:1]-[c:2]:[a:3]:[a:4]:[n,o,p,s;+1:5]>>[*-0:1]=[c:2]:[*:3]:[*:4]:[*+0:5]'), # Conjugated cation rules taken from Francis Atkinson's standardiser. Those that can reduce aromaticity aren't included - Normalization('Normalize 1,3 conjugated cation', '[N,O;+0!H0:1]-[A:2]=[N!$(*[O-]),O;+1H0:3]>>[*+1:1]=[*:2]-[*+0:3]'), - Normalization('Normalize 1,3 conjugated cation', '[n;+0!H0:1]:[c:2]=[N!$(*[O-]),O;+1H0:3]>>[*+1:1]:[*:2]-[*+0:3]'), + Normalization('Normalize 1,3 conjugated cation', + '[N,O;+0!H0:1]-[A:2]=[N!$(*[O-]),O;+1H0:3]>>[*+1:1]=[*:2]-[*+0:3]'), + Normalization('Normalize 1,3 conjugated cation', + '[n;+0!H0:1]:[c:2]=[N!$(*[O-]),O;+1H0:3]>>[*+1:1]:[*:2]-[*+0:3]'), #Normalization('Normalize 1,3 conjugated cation', '[N,O;+0!H0:1]-[c:2]:[n!$(*[O-]),o;+1H0:3]>>[*+1:1]=[*:2]:[*+0:3]'), - Normalization('Normalize 1,5 conjugated cation', '[N,O;+0!H0:1]-[A:2]=[A:3]-[A:4]=[N!$(*[O-]),O;+1H0:5]>>[*+1:1]=[*:2]-[*:3]=[*:4]-[*+0:5]'), - Normalization('Normalize 1,5 conjugated cation', '[n;+0!H0:1]:[a:2]:[a:3]:[c:4]=[N!$(*[O-]),O;+1H0:5]>>[n+1:1]:[*:2]:[*:3]:[*:4]-[*+0:5]'), + Normalization('Normalize 1,5 conjugated cation', + '[N,O;+0!H0:1]-[A:2]=[A:3]-[A:4]=[N!$(*[O-]),O;+1H0:5]>>[*+1:1]=[*:2]-[*:3]=[*:4]-[*+0:5]'), + Normalization('Normalize 1,5 conjugated cation', + '[n;+0!H0:1]:[a:2]:[a:3]:[c:4]=[N!$(*[O-]),O;+1H0:5]>>[n+1:1]:[*:2]:[*:3]:[*:4]-[*+0:5]'), # Normalization('Normalize 1,5 conjugated cation', '[N,O;+0!H0:1]-[c:2]:[a:3]:[a:4]:[n!$(*[O-]),o;+1H0:5]>>[*+1:1]=[c:2]:[*:3]:[*:4]:[*+0:5]'), # Normalization('Normalize 1,5 conjugated cation', '[n;+0!H0:1]1:[a:2]:[a:3]:[a:4]:[n!$(*[O-]);+1H0:5]1>>[n+1:1]1:[*:2]:[*:3]:[*:4]:[n+0:5]1'), # Normalization('Normalize 1,5 conjugated cation', '[n;+0!H0:1]:[a:2]:[a:3]:[a:4]:[n!$(*[O-]);+1H0:5]>>[n+1:1]:[*:2]:[*:3]:[*:4]:[n+0:5]'), @@ -139,7 +150,7 @@ class Normalizer(object): return outmol def _normalize_fragment(self, mol): - for n in six.moves.range(self.max_restarts): + for n in range(self.max_restarts): # Iterate through Normalization transforms and apply each in order for normalization in self.normalizations: product = self._apply_transform(mol, normalization.transform) @@ -163,7 +174,7 @@ class Normalizer(object): after the final application, the first product (sorted alphabetically by SMILES) is chosen. """ mols = [mol] - for n in six.moves.range(20): + for n in range(20): products = {} for mol in mols: for product in [x[0] for x in rule.RunReactants((mol,))]: diff --git a/rdkit/Chem/MolStandardize/resonance.py b/rdkit/Chem/MolStandardize/resonance.py index f8ffa0404..a825018dc 100644 --- a/rdkit/Chem/MolStandardize/resonance.py +++ b/rdkit/Chem/MolStandardize/resonance.py @@ -9,10 +9,6 @@ Resonance (mesomeric) transformations. :license: MIT, see LICENSE file for more details. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals import logging from rdkit import Chem @@ -88,6 +84,6 @@ def enumerate_resonance_smiles(smiles): :rtype: set of strings. """ mol = Chem.MolFromSmiles(smiles) - #Chem.SanitizeMol(mol) # MolFromSmiles does Sanitize by default + # Chem.SanitizeMol(mol) # MolFromSmiles does Sanitize by default mesomers = ResonanceEnumerator().enumerate(mol) return {Chem.MolToSmiles(m, isomericSmiles=True) for m in mesomers} diff --git a/rdkit/Chem/MolStandardize/standardize.py b/rdkit/Chem/MolStandardize/standardize.py index eee0a72af..0ea51773a 100644 --- a/rdkit/Chem/MolStandardize/standardize.py +++ b/rdkit/Chem/MolStandardize/standardize.py @@ -11,9 +11,7 @@ standardization tasks. :license: MIT, see LICENSE file for more details. """ -from __future__ import print_function -from __future__ import unicode_literals -from __future__ import division + import copy import logging diff --git a/rdkit/Chem/MolStandardize/tautomer.py b/rdkit/Chem/MolStandardize/tautomer.py index 4fd9fb074..71ac96f7d 100644 --- a/rdkit/Chem/MolStandardize/tautomer.py +++ b/rdkit/Chem/MolStandardize/tautomer.py @@ -9,9 +9,7 @@ This module contains tools for enumerating tautomers and determining a canonical :license: MIT, see LICENSE file for more details. """ -from __future__ import print_function -from __future__ import unicode_literals -from __future__ import division + import copy import logging @@ -32,7 +30,8 @@ class TautomerTransform(object): custom resulting bond orders and also resulting atom charges. """ - BONDMAP = {'-': BondType.SINGLE, '=': BondType.DOUBLE, '#': BondType.TRIPLE, ':': BondType.AROMATIC} + BONDMAP = {'-': BondType.SINGLE, '=': BondType.DOUBLE, + '#': BondType.TRIPLE, ':': BondType.AROMATIC} CHARGEMAP = {'+': 1, '0': 0, '-': -1} def __init__(self, name, smarts, bonds=(), charges=(), radicals=()): @@ -103,14 +102,22 @@ TAUTOMER_TRANSFORMS = ( TautomerTransform('special imine r', '[CX4!H0]-[c]=[n]'), TautomerTransform('1,3 aromatic heteroatom H shift f', '[#7!H0]-[#6R1]=[O,#7X2]'), TautomerTransform('1,3 aromatic heteroatom H shift r', '[O,#7;!H0]-[#6R1]=[#7X2]'), - TautomerTransform('1,3 heteroatom H shift', '[#7,S,O,Se,Te;!H0]-[#7X2,#6,#15]=[#7,#16,#8,Se,Te]'), - TautomerTransform('1,5 aromatic heteroatom H shift', '[#7,#16,#8;!H0]-[#6,#7]=[#6]-[#6,#7]=[#7,#16,#8;H0]'), - TautomerTransform('1,5 aromatic heteroatom H shift f', '[#7,#16,#8,Se,Te;!H0]-[#6,nX2]=[#6,nX2]-[#6,#7X2]=[#7X2,S,O,Se,Te]'), - TautomerTransform('1,5 aromatic heteroatom H shift r', '[#7,S,O,Se,Te;!H0]-[#6,#7X2]=[#6,nX2]-[#6,nX2]=[#7,#16,#8,Se,Te]'), - TautomerTransform('1,7 aromatic heteroatom H shift f', '[#7,#8,#16,Se,Te;!H0]-[#6,#7X2]=[#6,#7X2]-[#6,#7X2]=[#6]-[#6,#7X2]=[#7X2,S,O,Se,Te,CX3]'), - TautomerTransform('1,7 aromatic heteroatom H shift r', '[#7,S,O,Se,Te,CX4;!H0]-[#6,#7X2]=[#6]-[#6,#7X2]=[#6,#7X2]-[#6,#7X2]=[NX2,S,O,Se,Te]'), - TautomerTransform('1,9 aromatic heteroatom H shift f', '[#7,O;!H0]-[#6,#7X2]=[#6,#7X2]-[#6,#7X2]=[#6,#7X2]-[#6,#7X2]=[#6,#7X2]-[#6,#7X2]=[#7,O]'), - TautomerTransform('1,11 aromatic heteroatom H shift f', '[#7,O;!H0]-[#6,nX2]=[#6,nX2]-[#6,nX2]=[#6,nX2]-[#6,nX2]=[#6,nX2]-[#6,nX2]=[#6,nX2]-[#6,nX2]=[#7X2,O]'), + TautomerTransform('1,3 heteroatom H shift', + '[#7,S,O,Se,Te;!H0]-[#7X2,#6,#15]=[#7,#16,#8,Se,Te]'), + TautomerTransform('1,5 aromatic heteroatom H shift', + '[#7,#16,#8;!H0]-[#6,#7]=[#6]-[#6,#7]=[#7,#16,#8;H0]'), + TautomerTransform('1,5 aromatic heteroatom H shift f', + '[#7,#16,#8,Se,Te;!H0]-[#6,nX2]=[#6,nX2]-[#6,#7X2]=[#7X2,S,O,Se,Te]'), + TautomerTransform('1,5 aromatic heteroatom H shift r', + '[#7,S,O,Se,Te;!H0]-[#6,#7X2]=[#6,nX2]-[#6,nX2]=[#7,#16,#8,Se,Te]'), + TautomerTransform('1,7 aromatic heteroatom H shift f', + '[#7,#8,#16,Se,Te;!H0]-[#6,#7X2]=[#6,#7X2]-[#6,#7X2]=[#6]-[#6,#7X2]=[#7X2,S,O,Se,Te,CX3]'), + TautomerTransform('1,7 aromatic heteroatom H shift r', + '[#7,S,O,Se,Te,CX4;!H0]-[#6,#7X2]=[#6]-[#6,#7X2]=[#6,#7X2]-[#6,#7X2]=[NX2,S,O,Se,Te]'), + TautomerTransform('1,9 aromatic heteroatom H shift f', + '[#7,O;!H0]-[#6,#7X2]=[#6,#7X2]-[#6,#7X2]=[#6,#7X2]-[#6,#7X2]=[#6,#7X2]-[#6,#7X2]=[#7,O]'), + TautomerTransform('1,11 aromatic heteroatom H shift f', + '[#7,O;!H0]-[#6,nX2]=[#6,nX2]-[#6,nX2]=[#6,nX2]-[#6,nX2]=[#6,nX2]-[#6,nX2]=[#6,nX2]-[#6,nX2]=[#7X2,O]'), TautomerTransform('furanone f', '[O,S,N;!H0]-[#6r5]=[#6X3r5;$([#6]([#6r5])=[#6r5])]'), TautomerTransform('furanone r', '[#6r5!H0;$([#6]([#6r5])[#6r5])]-[#6r5]=[O,S,N]'), TautomerTransform('keten/ynol f', '[C!H0]=[C]=[O,S,Se,Te;X1]', bonds='#-'), @@ -283,7 +290,8 @@ class TautomerEnumerator(object): else: # If no manually specified bond types, just swap single and double bonds current_bond_type = product.GetBondBetweenAtoms(*pair).GetBondType() - product.GetBondBetweenAtoms(*pair).SetBondType(BondType.DOUBLE if current_bond_type == BondType.SINGLE else BondType.SINGLE) + product.GetBondBetweenAtoms( + *pair).SetBondType(BondType.DOUBLE if current_bond_type == BondType.SINGLE else BondType.SINGLE) # log.debug('%s-%s: %s -> %s' % (product.GetAtomWithIdx(pair[0]).GetSymbol(), product.GetAtomWithIdx(pair[1]).GetSymbol(), current_bond_type, product.GetBondBetweenAtoms(*pair).GetBondType())) # Adjust charges if transform.charges: @@ -319,7 +327,8 @@ class TautomerEnumerator(object): end = bond.GetEndAtomIdx() for othertautomer in tautomers.values(): if not othertautomer.GetBondBetweenAtoms(begin, end).GetBondType() == BondType.DOUBLE: - neighbours = tautomer.GetAtomWithIdx(begin).GetBonds() + tautomer.GetAtomWithIdx(end).GetBonds() + neighbours = tautomer.GetAtomWithIdx( + begin).GetBonds() + tautomer.GetAtomWithIdx(end).GetBonds() for otherbond in neighbours: if otherbond.GetBondDir() in {BondDir.ENDUPRIGHT, BondDir.ENDDOWNRIGHT}: otherbond.SetBondDir(BondDir.NONE) diff --git a/rdkit/Chem/MolStandardize/utils.py b/rdkit/Chem/MolStandardize/utils.py index 879620428..abda18219 100644 --- a/rdkit/Chem/MolStandardize/utils.py +++ b/rdkit/Chem/MolStandardize/utils.py @@ -9,14 +9,10 @@ This module contains miscellaneous utility functions. :license: MIT, see LICENSE file for more details. """ -from __future__ import print_function -from __future__ import unicode_literals -from __future__ import division + import functools from itertools import tee -import six - def memoized_property(fget): """Decorator to create memoized properties.""" @@ -34,4 +30,4 @@ def pairwise(iterable): """Utility function to iterate in a pairwise fashion.""" a, b = tee(iterable) next(b, None) - return six.moves.zip(a, b) + return zip(a, b) diff --git a/rdkit/Chem/MolStandardize/validate.py b/rdkit/Chem/MolStandardize/validate.py index a91db65e8..9c22ee9eb 100644 --- a/rdkit/Chem/MolStandardize/validate.py +++ b/rdkit/Chem/MolStandardize/validate.py @@ -11,9 +11,7 @@ convenience function. :license: MIT, see LICENSE file for more details. """ -from __future__ import print_function -from __future__ import unicode_literals -from __future__ import division + import logging import sys diff --git a/rdkit/Chem/MolStandardize/validations.py b/rdkit/Chem/MolStandardize/validations.py index 728771cb7..9f0f34931 100644 --- a/rdkit/Chem/MolStandardize/validations.py +++ b/rdkit/Chem/MolStandardize/validations.py @@ -9,9 +9,7 @@ This module contains all the built-in :class:`Validations ''' -from __future__ import print_function + from base64 import b64encode import sys @@ -112,50 +112,50 @@ from rdkit.Chem.Draw import rdMolDraw2D from rdkit.Chem import SDWriter from rdkit.Chem import rdchem from rdkit.Chem.Scaffolds import MurckoScaffold -from rdkit.six import BytesIO, string_types, PY3 +from io import BytesIO log = logging.getLogger(__name__) try: - import pandas as pd + import pandas as pd - def _getPandasVersion(): - """ Get the pandas version as a tuple """ - import re - try: - v = pd.__version__ - except AttributeError: - v = pd.version.version - v = re.split(r'[^0-9,.]', v)[0].split('.') - return tuple(int(vi) for vi in v) + def _getPandasVersion(): + """ Get the pandas version as a tuple """ + import re + try: + v = pd.__version__ + except AttributeError: + v = pd.version.version + v = re.split(r'[^0-9,.]', v)[0].split('.') + return tuple(int(vi) for vi in v) - if _getPandasVersion() < (0, 10): - print("Pandas version {0} not compatible with tests".format(_getPandasVersion()), - file=sys.stderr) - pd = None - else: - # saves the default pandas rendering to allow restoration - defPandasRendering = pd.core.frame.DataFrame.to_html + if _getPandasVersion() < (0, 10): + print("Pandas version {0} not compatible with tests".format(_getPandasVersion()), + file=sys.stderr) + pd = None + else: + # saves the default pandas rendering to allow restoration + defPandasRendering = pd.core.frame.DataFrame.to_html except ImportError: - import traceback - traceback.print_exc() - pd = None + import traceback + traceback.print_exc() + pd = None except Exception as e: - import traceback - traceback.print_exc() - pd = None + import traceback + traceback.print_exc() + pd = None if pd: - try: - from pandas.io.formats import format as fmt - except: try: - from pandas.formats import format as fmt - except ImportError: - from pandas.core import format as fmt # older versions + from pandas.io.formats import format as fmt + except: + try: + from pandas.formats import format as fmt + except ImportError: + from pandas.core import format as fmt # older versions else: - fmt = 'Pandas not available' + fmt = 'Pandas not available' highlightSubstructures = True molRepresentation = 'png' # supports also SVG @@ -163,409 +163,409 @@ molSize = (200, 200) def patchPandasHTMLrepr(self, **kwargs): - ''' - Patched default escaping of HTML control characters to allow molecule image rendering dataframes - ''' - # set escape to False if not set - kwargs['escape'] = kwargs.get('escape') or False + ''' + Patched default escaping of HTML control characters to allow molecule image rendering dataframes + ''' + # set escape to False if not set + kwargs['escape'] = kwargs.get('escape') or False - # do not allow pandas to truncate text since PNG byte strings are lengthy - with pd.option_context('display.max_colwidth', -1): - return defPandasRendering(self, **kwargs) + # do not allow pandas to truncate text since PNG byte strings are lengthy + with pd.option_context('display.max_colwidth', -1): + return defPandasRendering(self, **kwargs) def patchPandasHeadMethod(self, n=5): - '''Ensure inheritance of patched to_html in "head" subframe - ''' - df = self[:n] - df.to_html = types.MethodType(patchPandasHTMLrepr, df) - df.head = types.MethodType(patchPandasHeadMethod, df) - return df + '''Ensure inheritance of patched to_html in "head" subframe + ''' + df = self[:n] + df.to_html = types.MethodType(patchPandasHTMLrepr, df) + df.head = types.MethodType(patchPandasHeadMethod, df) + return df def _get_image(x): - """displayhook function for PNG data""" - return b64encode(x).decode('ascii') + """displayhook function for PNG data""" + return b64encode(x).decode('ascii') def _get_svg_image(mol, size=(200, 200), highlightAtoms=[]): - """ mol rendered as SVG """ - from IPython.display import SVG - from rdkit.Chem import rdDepictor - from rdkit.Chem.Draw import rdMolDraw2D - try: - # If no coordinates, calculate 2D - mol.GetConformer(-1) - except ValueError: - rdDepictor.Compute2DCoords(mol) - drawer = rdMolDraw2D.MolDraw2DSVG(*size) - drawer.DrawMolecule(mol, highlightAtoms=highlightAtoms) - drawer.FinishDrawing() - svg = drawer.GetDrawingText() - return SVG(svg).data # IPython's SVG clears the svg text + """ mol rendered as SVG """ + from IPython.display import SVG + from rdkit.Chem import rdDepictor + from rdkit.Chem.Draw import rdMolDraw2D + try: + # If no coordinates, calculate 2D + mol.GetConformer(-1) + except ValueError: + rdDepictor.Compute2DCoords(mol) + drawer = rdMolDraw2D.MolDraw2DSVG(*size) + drawer.DrawMolecule(mol, highlightAtoms=highlightAtoms) + drawer.FinishDrawing() + svg = drawer.GetDrawingText() + return SVG(svg).data # IPython's SVG clears the svg text try: - from rdkit.Avalon import pyAvalonTools as pyAvalonTools - # Calculate the Avalon fingerprint - _fingerprinter = lambda x, y: pyAvalonTools.GetAvalonFP(x, isQuery=y, bitFlags=pyAvalonTools.avalonSSSBits) + from rdkit.Avalon import pyAvalonTools as pyAvalonTools + # Calculate the Avalon fingerprint + + def _fingerprinter(x, y): return pyAvalonTools.GetAvalonFP( + x, isQuery=y, bitFlags=pyAvalonTools.avalonSSSBits) except ImportError: - # Calculate fingerprint using SMARTS patterns - _fingerprinter = lambda x, y: Chem.PatternFingerprint(x, fpSize=2048) + # Calculate fingerprint using SMARTS patterns + def _fingerprinter(x, y): return Chem.PatternFingerprint(x, fpSize=2048) def _molge(x, y): - """Allows for substructure check using the >= operator (X has substructure Y -> X >= Y) by - monkey-patching the __ge__ function - This has the effect that the pandas/numpy rowfilter can be used for substructure filtering - (filtered = dframe[dframe['RDKitColumn'] >= SubstructureMolecule]) - """ - if x is None or y is None: - return False - if hasattr(x, '_substructfp'): - if not hasattr(y, '_substructfp'): - y._substructfp = _fingerprinter(y, True) - if not DataStructs.AllProbeBitsMatch(y._substructfp, x._substructfp): - return False - match = x.GetSubstructMatch(y) - x.__sssAtoms = [] - if match: - if highlightSubstructures: - x.__sssAtoms = list(match) - return True - else: - return False + """Allows for substructure check using the >= operator (X has substructure Y -> X >= Y) by + monkey-patching the __ge__ function + This has the effect that the pandas/numpy rowfilter can be used for substructure filtering + (filtered = dframe[dframe['RDKitColumn'] >= SubstructureMolecule]) + """ + if x is None or y is None: + return False + if hasattr(x, '_substructfp'): + if not hasattr(y, '_substructfp'): + y._substructfp = _fingerprinter(y, True) + if not DataStructs.AllProbeBitsMatch(y._substructfp, x._substructfp): + return False + match = x.GetSubstructMatch(y) + x.__sssAtoms = [] + if match: + if highlightSubstructures: + x.__sssAtoms = list(match) + return True + else: + return False def PrintAsBase64PNGString(x, renderer=None): - '''returns the molecules as base64 encoded PNG image - ''' - if highlightSubstructures and hasattr(x, '__sssAtoms'): - highlightAtoms = x.__sssAtoms - else: - highlightAtoms = [] - if molRepresentation.lower() == 'svg': - from IPython.display import SVG - svg = Draw._moltoSVG(x, molSize, highlightAtoms, "", True) - return SVG(svg).data - else: - data = Draw._moltoimg(x, molSize, highlightAtoms, "", returnPNG=True, kekulize=True) - return 'Mol' % _get_image(data) + '''returns the molecules as base64 encoded PNG image + ''' + if highlightSubstructures and hasattr(x, '__sssAtoms'): + highlightAtoms = x.__sssAtoms + else: + highlightAtoms = [] + if molRepresentation.lower() == 'svg': + from IPython.display import SVG + svg = Draw._moltoSVG(x, molSize, highlightAtoms, "", True) + return SVG(svg).data + else: + data = Draw._moltoimg(x, molSize, highlightAtoms, "", returnPNG=True, kekulize=True) + return 'Mol' % _get_image(data) def PrintDefaultMolRep(x): - return str(x.__repr__()) + return str(x.__repr__()) def _MolPlusFingerprint(m): - '''Precomputes fingerprints and stores results in molecule objects to accelerate - substructure matching - ''' - if m is not None: - m._substructfp = _fingerprinter(m, False) - return m + '''Precomputes fingerprints and stores results in molecule objects to accelerate + substructure matching + ''' + if m is not None: + m._substructfp = _fingerprinter(m, False) + return m def RenderImagesInAllDataFrames(images=True): - '''Changes the default dataframe rendering to not escape HTML characters, thus allowing - rendered images in all dataframes. - IMPORTANT: THIS IS A GLOBAL CHANGE THAT WILL AFFECT TO COMPLETE PYTHON SESSION. If you want - to change the rendering only for a single dataframe use the "ChangeMoleculeRendering" method - instead. - ''' - if images: - pd.core.frame.DataFrame.to_html = patchPandasHTMLrepr - else: - pd.core.frame.DataFrame.to_html = defPandasRendering + '''Changes the default dataframe rendering to not escape HTML characters, thus allowing + rendered images in all dataframes. + IMPORTANT: THIS IS A GLOBAL CHANGE THAT WILL AFFECT TO COMPLETE PYTHON SESSION. If you want + to change the rendering only for a single dataframe use the "ChangeMoleculeRendering" method + instead. + ''' + if images: + pd.core.frame.DataFrame.to_html = patchPandasHTMLrepr + else: + pd.core.frame.DataFrame.to_html = defPandasRendering def AddMoleculeColumnToFrame(frame, smilesCol='Smiles', molCol='ROMol', includeFingerprints=False): - '''Converts the molecules contains in "smilesCol" to RDKit molecules and appends them to the - dataframe "frame" using the specified column name. - If desired, a fingerprint can be computed and stored with the molecule objects to accelerate - substructure matching - ''' - if not includeFingerprints: - frame[molCol] = frame[smilesCol].map(Chem.MolFromSmiles) - else: - frame[molCol] = frame[smilesCol].map( - lambda smiles: _MolPlusFingerprint(Chem.MolFromSmiles(smiles))) - RenderImagesInAllDataFrames(images=True) + '''Converts the molecules contains in "smilesCol" to RDKit molecules and appends them to the + dataframe "frame" using the specified column name. + If desired, a fingerprint can be computed and stored with the molecule objects to accelerate + substructure matching + ''' + if not includeFingerprints: + frame[molCol] = frame[smilesCol].map(Chem.MolFromSmiles) + else: + frame[molCol] = frame[smilesCol].map( + lambda smiles: _MolPlusFingerprint(Chem.MolFromSmiles(smiles))) + RenderImagesInAllDataFrames(images=True) def ChangeMoleculeRendering(frame=None, renderer='PNG'): - '''Allows to change the rendering of the molecules between base64 PNG images and string - representations. - This serves two purposes: First it allows to avoid the generation of images if this is - not desired and, secondly, it allows to enable image rendering for newly created dataframe - that already contains molecules, without having to rerun the time-consuming - AddMoleculeColumnToFrame. Note: this behaviour is, because some pandas methods, e.g. head() - returns a new dataframe instance that uses the default pandas rendering (thus not drawing - images for molecules) instead of the monkey-patched one. - ''' - if renderer == 'String': - Chem.Mol.__str__ = PrintDefaultMolRep - else: - Chem.Mol.__str__ = PrintAsBase64PNGString - if frame is not None: - frame.to_html = types.MethodType(patchPandasHTMLrepr, frame) + '''Allows to change the rendering of the molecules between base64 PNG images and string + representations. + This serves two purposes: First it allows to avoid the generation of images if this is + not desired and, secondly, it allows to enable image rendering for newly created dataframe + that already contains molecules, without having to rerun the time-consuming + AddMoleculeColumnToFrame. Note: this behaviour is, because some pandas methods, e.g. head() + returns a new dataframe instance that uses the default pandas rendering (thus not drawing + images for molecules) instead of the monkey-patched one. + ''' + if renderer == 'String': + Chem.Mol.__str__ = PrintDefaultMolRep + else: + Chem.Mol.__str__ = PrintAsBase64PNGString + if frame is not None: + frame.to_html = types.MethodType(patchPandasHTMLrepr, frame) def LoadSDF(filename, idName='ID', molColName='ROMol', includeFingerprints=False, isomericSmiles=False, smilesName=None, embedProps=False): - '''Read file in SDF format and return as Pandas data frame. - If embedProps=True all properties also get embedded in Mol objects in the molecule column. - If molColName=None molecules would not be present in resulting DataFrame (only properties - would be read). - ''' - if isinstance(filename, string_types): - if filename.lower()[-3:] == ".gz": - import gzip - f = gzip.open(filename, "rb") + '''Read file in SDF format and return as Pandas data frame. + If embedProps=True all properties also get embedded in Mol objects in the molecule column. + If molColName=None molecules would not be present in resulting DataFrame (only properties + would be read). + ''' + if isinstance(filename, str): + if filename.lower()[-3:] == ".gz": + import gzip + f = gzip.open(filename, "rb") + else: + f = open(filename, 'rb') + close = f.close else: - f = open(filename, 'rb') - close = f.close - else: - f = filename - close = None # don't close an open file that was passed in - records = [] - indices = [] - for i, mol in enumerate(Chem.ForwardSDMolSupplier(f, sanitize=(molColName is not None))): - if mol is None: - continue - row = dict((k, mol.GetProp(k)) for k in mol.GetPropNames()) - if molColName is not None and not embedProps: - for prop in mol.GetPropNames(): - mol.ClearProp(prop) - if mol.HasProp('_Name'): - row[idName] = mol.GetProp('_Name') - if smilesName is not None: - try: - row[smilesName] = Chem.MolToSmiles(mol, isomericSmiles=isomericSmiles) - except: - log.warning('No valid smiles could be generated for molecule %s', i) - row[smilesName] = None - if molColName is not None and not includeFingerprints: - row[molColName] = mol - elif molColName is not None: - row[molColName] = _MolPlusFingerprint(mol) - records.append(row) - indices.append(i) + f = filename + close = None # don't close an open file that was passed in + records = [] + indices = [] + for i, mol in enumerate(Chem.ForwardSDMolSupplier(f, sanitize=(molColName is not None))): + if mol is None: + continue + row = dict((k, mol.GetProp(k)) for k in mol.GetPropNames()) + if molColName is not None and not embedProps: + for prop in mol.GetPropNames(): + mol.ClearProp(prop) + if mol.HasProp('_Name'): + row[idName] = mol.GetProp('_Name') + if smilesName is not None: + try: + row[smilesName] = Chem.MolToSmiles(mol, isomericSmiles=isomericSmiles) + except: + log.warning('No valid smiles could be generated for molecule %s', i) + row[smilesName] = None + if molColName is not None and not includeFingerprints: + row[molColName] = mol + elif molColName is not None: + row[molColName] = _MolPlusFingerprint(mol) + records.append(row) + indices.append(i) - if close is not None: - close() - RenderImagesInAllDataFrames(images=True) - return pd.DataFrame(records, index=indices) + if close is not None: + close() + RenderImagesInAllDataFrames(images=True) + return pd.DataFrame(records, index=indices) def WriteSDF(df, out, molColName='ROMol', idName=None, properties=None, allNumeric=False): - '''Write an SD file for the molecules in the dataframe. Dataframe columns can be exported as - SDF tags if specified in the "properties" list. "properties=list(df.columns)" would export - all columns. - The "allNumeric" flag allows to automatically include all numeric columns in the output. - User has to make sure that correct data type is assigned to column. - "idName" can be used to select a column to serve as molecule title. It can be set to - "RowID" to use the dataframe row key as title. - ''' - close = None - if isinstance(out, string_types): - if out.lower()[-3:] == ".gz": - import gzip - if PY3: - out = gzip.open(out, "wt") - else: - out = gzip.open(out, "wb") - close = out.close + '''Write an SD file for the molecules in the dataframe. Dataframe columns can be exported as + SDF tags if specified in the "properties" list. "properties=list(df.columns)" would export + all columns. + The "allNumeric" flag allows to automatically include all numeric columns in the output. + User has to make sure that correct data type is assigned to column. + "idName" can be used to select a column to serve as molecule title. It can be set to + "RowID" to use the dataframe row key as title. + ''' + close = None + if isinstance(out, str): + if out.lower()[-3:] == ".gz": + import gzip + out = gzip.open(out, "wt") + close = out.close - writer = SDWriter(out) - if properties is None: - properties = [] - else: - properties = list(properties) - if allNumeric: - properties.extend([ - dt for dt in df.dtypes.keys() - if (np.issubdtype(df.dtypes[dt], float) or np.issubdtype(df.dtypes[dt], int)) - ]) + writer = SDWriter(out) + if properties is None: + properties = [] + else: + properties = list(properties) + if allNumeric: + properties.extend([ + dt for dt in df.dtypes.keys() + if (np.issubdtype(df.dtypes[dt], float) or np.issubdtype(df.dtypes[dt], int)) + ]) - if molColName in properties: - properties.remove(molColName) - if idName in properties: - properties.remove(idName) - writer.SetProps(properties) - for row in df.iterrows(): - # make a local copy I can modify - mol = Chem.Mol(row[1][molColName]) + if molColName in properties: + properties.remove(molColName) + if idName in properties: + properties.remove(idName) + writer.SetProps(properties) + for row in df.iterrows(): + # make a local copy I can modify + mol = Chem.Mol(row[1][molColName]) - if idName is not None: - if idName == 'RowID': - mol.SetProp('_Name', str(row[0])) - else: - mol.SetProp('_Name', str(row[1][idName])) - for p in properties: - cell_value = row[1][p] - # Make sure float does not get formatted in E notation - if np.issubdtype(type(cell_value), float): - s = '{:f}'.format(cell_value).rstrip("0") # "f" will show 7.0 as 7.00000 - if s[-1] == ".": - s += "0" # put the "0" back on if it's something like "7." - mol.SetProp(p, s) - else: - mol.SetProp(p, str(cell_value)) - writer.write(mol) - writer.close() - if close is not None: - close() + if idName is not None: + if idName == 'RowID': + mol.SetProp('_Name', str(row[0])) + else: + mol.SetProp('_Name', str(row[1][idName])) + for p in properties: + cell_value = row[1][p] + # Make sure float does not get formatted in E notation + if np.issubdtype(type(cell_value), float): + s = '{:f}'.format(cell_value).rstrip("0") # "f" will show 7.0 as 7.00000 + if s[-1] == ".": + s += "0" # put the "0" back on if it's something like "7." + mol.SetProp(p, s) + else: + mol.SetProp(p, str(cell_value)) + writer.write(mol) + writer.close() + if close is not None: + close() _saltRemover = None def RemoveSaltsFromFrame(frame, molCol='ROMol'): - ''' - Removes salts from mols in pandas DataFrame's ROMol column - ''' - global _saltRemover - if _saltRemover is None: - from rdkit.Chem import SaltRemover - _saltRemover = SaltRemover.SaltRemover() - frame[molCol] = frame.apply(lambda x: _saltRemover.StripMol(x[molCol]), axis=1) + ''' + Removes salts from mols in pandas DataFrame's ROMol column + ''' + global _saltRemover + if _saltRemover is None: + from rdkit.Chem import SaltRemover + _saltRemover = SaltRemover.SaltRemover() + frame[molCol] = frame.apply(lambda x: _saltRemover.StripMol(x[molCol]), axis=1) def SaveSMILESFromFrame(frame, outFile, molCol='ROMol', NamesCol='', isomericSmiles=False): - ''' - Saves smi file. SMILES are generated from column with RDKit molecules. Column - with names is optional. - ''' - w = Chem.SmilesWriter(outFile, isomericSmiles=isomericSmiles) - if NamesCol != '': - for m, n in zip(frame[molCol], (str(c) for c in frame[NamesCol])): - m.SetProp('_Name', n) - w.write(m) - w.close() - else: - for m in frame[molCol]: - w.write(m) - w.close() + ''' + Saves smi file. SMILES are generated from column with RDKit molecules. Column + with names is optional. + ''' + w = Chem.SmilesWriter(outFile, isomericSmiles=isomericSmiles) + if NamesCol != '': + for m, n in zip(frame[molCol], (str(c) for c in frame[NamesCol])): + m.SetProp('_Name', n) + w.write(m) + w.close() + else: + for m in frame[molCol]: + w.write(m) + w.close() def SaveXlsxFromFrame(frame, outFile, molCol='ROMol', size=(300, 300)): - """ - Saves pandas DataFrame as a xlsx file with embedded images. - It maps numpy data types to excel cell types: - int, float -> number - datetime -> datetime - object -> string (limited to 32k character - xlsx limitations) - - Cells with compound images are a bit larger than images due to excel. - Column width weirdness explained (from xlsxwriter docs): - The width corresponds to the column width value that is specified in Excel. - It is approximately equal to the length of a string in the default font of Calibri 11. - Unfortunately, there is no way to specify "AutoFit" for a column in the Excel file format. - This feature is only available at runtime from within Excel. """ + Saves pandas DataFrame as a xlsx file with embedded images. + It maps numpy data types to excel cell types: + int, float -> number + datetime -> datetime + object -> string (limited to 32k character - xlsx limitations) - import xlsxwriter # don't want to make this a RDKit dependency + Cells with compound images are a bit larger than images due to excel. + Column width weirdness explained (from xlsxwriter docs): + The width corresponds to the column width value that is specified in Excel. + It is approximately equal to the length of a string in the default font of Calibri 11. + Unfortunately, there is no way to specify "AutoFit" for a column in the Excel file format. + This feature is only available at runtime from within Excel. + """ - cols = list(frame.columns) - cols.remove(molCol) - dataTypes = dict(frame.dtypes) + import xlsxwriter # don't want to make this a RDKit dependency - workbook = xlsxwriter.Workbook(outFile) # New workbook - worksheet = workbook.add_worksheet() # New work sheet - worksheet.set_column('A:A', size[0] / 6.) # column width + cols = list(frame.columns) + cols.remove(molCol) + dataTypes = dict(frame.dtypes) - # Write first row with column names - c2 = 1 - for x in cols: - worksheet.write_string(0, c2, x) - c2 += 1 - - c = 1 - for _, row in frame.iterrows(): - image_data = BytesIO() - img = Draw.MolToImage(row[molCol], size=size) - img.save(image_data, format='PNG') - - worksheet.set_row(c, height=size[1]) # looks like height is not in px? - worksheet.insert_image(c, 0, "f", {'image_data': image_data}) + workbook = xlsxwriter.Workbook(outFile) # New workbook + worksheet = workbook.add_worksheet() # New work sheet + worksheet.set_column('A:A', size[0] / 6.) # column width + # Write first row with column names c2 = 1 for x in cols: - if str(dataTypes[x]) == "object": - worksheet.write_string(c, c2, str(row[x])[:32000]) # string length is limited in xlsx - elif ('float' in str(dataTypes[x])) or ('int' in str(dataTypes[x])): - if (row[x] != np.nan) or (row[x] != np.inf): - worksheet.write_number(c, c2, row[x]) - elif 'datetime' in str(dataTypes[x]): - worksheet.write_datetime(c, c2, row[x]) - c2 += 1 - c += 1 + worksheet.write_string(0, c2, x) + c2 += 1 - workbook.close() - image_data.close() + c = 1 + for _, row in frame.iterrows(): + image_data = BytesIO() + img = Draw.MolToImage(row[molCol], size=size) + img.save(image_data, format='PNG') + + worksheet.set_row(c, height=size[1]) # looks like height is not in px? + worksheet.insert_image(c, 0, "f", {'image_data': image_data}) + + c2 = 1 + for x in cols: + if str(dataTypes[x]) == "object": + # string length is limited in xlsx + worksheet.write_string(c, c2, str(row[x])[:32000]) + elif ('float' in str(dataTypes[x])) or ('int' in str(dataTypes[x])): + if (row[x] != np.nan) or (row[x] != np.inf): + worksheet.write_number(c, c2, row[x]) + elif 'datetime' in str(dataTypes[x]): + worksheet.write_datetime(c, c2, row[x]) + c2 += 1 + c += 1 + + workbook.close() + image_data.close() def FrameToGridImage(frame, column='ROMol', legendsCol=None, **kwargs): - ''' - Draw grid image of mols in pandas DataFrame. - ''' - if legendsCol: - if legendsCol == frame.index.name: - kwargs['legends'] = [str(c) for c in frame.index] - else: - kwargs['legends'] = [str(c) for c in frame[legendsCol]] - return Draw.MolsToGridImage(frame[column], **kwargs) + ''' + Draw grid image of mols in pandas DataFrame. + ''' + if legendsCol: + if legendsCol == frame.index.name: + kwargs['legends'] = [str(c) for c in frame.index] + else: + kwargs['legends'] = [str(c) for c in frame[legendsCol]] + return Draw.MolsToGridImage(frame[column], **kwargs) def AddMurckoToFrame(frame, molCol='ROMol', MurckoCol='Murcko_SMILES', Generic=False): - ''' - Adds column with SMILES of Murcko scaffolds to pandas DataFrame. + ''' + Adds column with SMILES of Murcko scaffolds to pandas DataFrame. - Generic set to true results in SMILES of generic framework. - ''' - if Generic: - func = lambda x: Chem.MolToSmiles(MurckoScaffold.MakeScaffoldGeneric( - MurckoScaffold.GetScaffoldForMol(x[molCol]))) - else: - func = lambda x: Chem.MolToSmiles(MurckoScaffold.GetScaffoldForMol(x[molCol])) - frame[MurckoCol] = frame.apply(func, axis=1) + Generic set to true results in SMILES of generic framework. + ''' + if Generic: + def func(x): return Chem.MolToSmiles(MurckoScaffold.MakeScaffoldGeneric( + MurckoScaffold.GetScaffoldForMol(x[molCol]))) + else: + def func(x): return Chem.MolToSmiles(MurckoScaffold.GetScaffoldForMol(x[molCol])) + frame[MurckoCol] = frame.apply(func, axis=1) def AlignMol(mol, scaffold): - """ - Aligns mol (RDKit mol object) to scaffold (SMILES string) - """ - scaffold = Chem.MolFromSmiles(scaffold) - AllChem.Compute2DCoords(scaffold) - AllChem.GenerateDepictionMatching2DStructure(mol, scaffold) - return mol + """ + Aligns mol (RDKit mol object) to scaffold (SMILES string) + """ + scaffold = Chem.MolFromSmiles(scaffold) + AllChem.Compute2DCoords(scaffold) + AllChem.GenerateDepictionMatching2DStructure(mol, scaffold) + return mol def AlignToScaffold(frame, molCol='ROMol', scaffoldCol='Murcko_SMILES'): - ''' - Aligns molecules in molCol to scaffolds in scaffoldCol - ''' - frame[molCol] = frame.apply(lambda x: AlignMol(x[molCol], x[scaffoldCol]), axis=1) + ''' + Aligns molecules in molCol to scaffolds in scaffoldCol + ''' + frame[molCol] = frame.apply(lambda x: AlignMol(x[molCol], x[scaffoldCol]), axis=1) # ========================================================================================== # Monkey patching RDkit functionality def InstallPandasTools(): - """ Monkey patch a few RDkit methods of Chem.Mol """ - global _originalSettings - if len(_originalSettings) == 0: - _originalSettings['Chem.Mol.__ge__'] = Chem.Mol.__ge__ - _originalSettings['Chem.Mol.__str__'] = Chem.Mol.__str__ - rdchem.Mol.__ge__ = _molge - rdchem.Mol.__str__ = PrintAsBase64PNGString + """ Monkey patch a few RDkit methods of Chem.Mol """ + global _originalSettings + if len(_originalSettings) == 0: + _originalSettings['Chem.Mol.__ge__'] = Chem.Mol.__ge__ + _originalSettings['Chem.Mol.__str__'] = Chem.Mol.__str__ + rdchem.Mol.__ge__ = _molge + rdchem.Mol.__str__ = PrintAsBase64PNGString def UninstallPandasTools(): - """ Monkey patch a few RDkit methods of Chem.Mol """ - global _originalSettings - Chem.Mol.__ge__ = _originalSettings['Chem.Mol.__ge__'] - Chem.Mol.__str__ = _originalSettings['Chem.Mol.__str__'] + """ Monkey patch a few RDkit methods of Chem.Mol """ + global _originalSettings + Chem.Mol.__ge__ = _originalSettings['Chem.Mol.__ge__'] + Chem.Mol.__str__ = _originalSettings['Chem.Mol.__str__'] _originalSettings = {} @@ -577,37 +577,37 @@ InstallPandasTools() # doctest boilerplate # def _runDoctests(verbose=None): # pragma: nocover - import doctest - failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS + doctest.NORMALIZE_WHITESPACE, - verbose=verbose) - if (failed): - sys.exit(failed) + import doctest + failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS + doctest.NORMALIZE_WHITESPACE, + verbose=verbose) + if (failed): + sys.exit(failed) if __name__ == '__main__': # pragma: nocover - import unittest - try: - import xlsxwriter - except ImportError: - xlsxwriter = None + import unittest + try: + import xlsxwriter + except ImportError: + xlsxwriter = None - class TestCase(unittest.TestCase): + class TestCase(unittest.TestCase): - @unittest.skipIf(xlsxwriter is None, 'xlsxwriter not installed') - def testGithub1507(self): - import os - from rdkit import RDConfig - sdfFile = os.path.join(RDConfig.RDDataDir, 'NCI/first_200.props.sdf') - frame = LoadSDF(sdfFile) - SaveXlsxFromFrame(frame, 'foo.xlsx') + @unittest.skipIf(xlsxwriter is None, 'xlsxwriter not installed') + def testGithub1507(self): + import os + from rdkit import RDConfig + sdfFile = os.path.join(RDConfig.RDDataDir, 'NCI/first_200.props.sdf') + frame = LoadSDF(sdfFile) + SaveXlsxFromFrame(frame, 'foo.xlsx') - if pd is None: - print("pandas installation not found, skipping tests", file=sys.stderr) - elif _getPandasVersion() < (0, 10): - print("pandas installation >=0.10 not found, skipping tests", file=sys.stderr) - else: - _runDoctests() - unittest.main() + if pd is None: + print("pandas installation not found, skipping tests", file=sys.stderr) + elif _getPandasVersion() < (0, 10): + print("pandas installation >=0.10 not found, skipping tests", file=sys.stderr) + else: + _runDoctests() + unittest.main() # $Id$ # diff --git a/rdkit/Chem/Pharm2D/Generate.py b/rdkit/Chem/Pharm2D/Generate.py index dc90d9dd0..be6a98937 100755 --- a/rdkit/Chem/Pharm2D/Generate.py +++ b/rdkit/Chem/Pharm2D/Generate.py @@ -32,7 +32,7 @@ numbering """ -from __future__ import print_function + from rdkit.Chem.Pharm2D import Utils, SigFactory from rdkit.RDLogger import logger diff --git a/rdkit/Chem/Pharm2D/LazyGenerator.py b/rdkit/Chem/Pharm2D/LazyGenerator.py index fc1be0b5b..cfeeafc70 100755 --- a/rdkit/Chem/Pharm2D/LazyGenerator.py +++ b/rdkit/Chem/Pharm2D/LazyGenerator.py @@ -10,7 +10,7 @@ """ lazy generator of 2D pharmacophore signature data """ -from __future__ import print_function + from rdkit.Chem.Pharm2D import SigFactory, Matcher diff --git a/rdkit/Chem/Pharm2D/SigFactory.py b/rdkit/Chem/Pharm2D/SigFactory.py index b8533976c..af89a0b81 100755 --- a/rdkit/Chem/Pharm2D/SigFactory.py +++ b/rdkit/Chem/Pharm2D/SigFactory.py @@ -11,8 +11,6 @@ """ -from __future__ import print_function, division - import copy import numpy @@ -24,327 +22,328 @@ _verbose = False class SigFactory(object): - """ + """ - SigFactory's are used by creating one, setting the relevant - parameters, then calling the GetSignature() method each time a - signature is required. - - """ - - def __init__(self, featFactory, useCounts=False, minPointCount=2, maxPointCount=3, - shortestPathsOnly=True, includeBondOrder=False, skipFeats=None, - trianglePruneBins=True): - self.featFactory = featFactory - self.useCounts = useCounts - self.minPointCount = minPointCount - self.maxPointCount = maxPointCount - self.shortestPathsOnly = shortestPathsOnly - self.includeBondOrder = includeBondOrder - self.trianglePruneBins = trianglePruneBins - if skipFeats is None: - self.skipFeats = [] - else: - self.skipFeats = skipFeats - self._bins = None - self.sigKlass = None - - def SetBins(self, bins): - """ bins should be a list of 2-tuples """ - self._bins = copy.copy(bins) - self.Init() - - def GetBins(self): - return self._bins - - def GetNumBins(self): - return len(self._bins) - - def GetSignature(self): - return self.sigKlass(self._sigSize) - - def _GetBitSummaryData(self, bitIdx): - nPts, combo, scaffold = self.GetBitInfo(bitIdx) - fams = self.GetFeatFamilies() - labels = [fams[x] for x in combo] - dMat = numpy.zeros((nPts, nPts), numpy.int) - dVect = Utils.nPointDistDict[nPts] - for idx in range(len(dVect)): - i, j = dVect[idx] - dMat[i, j] = scaffold[idx] - dMat[j, i] = scaffold[idx] - - return nPts, combo, scaffold, labels, dMat - - def GetBitDescriptionAsText(self, bitIdx, includeBins=0, fullPage=1): - """ returns text with a description of the bit - - **Arguments** - - - bitIdx: an integer bit index - - - includeBins: (optional) if nonzero, information about the bins will be - included as well - - - fullPage: (optional) if nonzero, html headers and footers will - be included (so as to make the output a complete page) - - **Returns** - - a string with the HTML + SigFactory's are used by creating one, setting the relevant + parameters, then calling the GetSignature() method each time a + signature is required. """ - raise NotImplementedError('Missing implementation') - def GetBitDescription(self, bitIdx): - """ returns a text description of the bit - - **Arguments** - - - bitIdx: an integer bit index - - **Returns** - - a string - - """ - nPts, combo, scaffold, labels, dMat = self._GetBitSummaryData(bitIdx) - res = " ".join(labels) + " " - for row in dMat: - res += "|" + " ".join([str(x) for x in row]) - res += "|" - return res - - def _findBinIdx(self, dists, bins, scaffolds): - """ OBSOLETE: this has been rewritten in C++ - Internal use only - Returns the index of a bin defined by a set of distances. - - **Arguments** - - - dists: a sequence of distances (not binned) - - - bins: a sorted sequence of distance bins (2-tuples) - - - scaffolds: a list of possible scaffolds (bin combinations) - - **Returns** - - an integer bin index - - **Note** - - the value returned here is not an index in the overall - signature. It is, rather, an offset of a scaffold in the - possible combinations of distance bins for a given - proto-pharmacophore. - - """ - nDists = len(dists) - whichBins = [0] * nDists - - # This would be a ton easier if we had contiguous bins - # i.e. if we could maintain the bins as a list of bounds) - # because then we could use Python's bisect module. - # Since we can't do that, we've got to do our own binary - # search here. - for i in range(nDists): - dist = dists[i] - where = -1 - - # do a simple binary search: - startP, endP = 0, len(bins) - while startP < endP: - midP = (startP + endP) // 2 - begBin, endBin = bins[midP] - if dist < begBin: - endP = midP - elif dist >= endBin: - startP = midP + 1 + def __init__(self, featFactory, useCounts=False, minPointCount=2, maxPointCount=3, + shortestPathsOnly=True, includeBondOrder=False, skipFeats=None, + trianglePruneBins=True): + self.featFactory = featFactory + self.useCounts = useCounts + self.minPointCount = minPointCount + self.maxPointCount = maxPointCount + self.shortestPathsOnly = shortestPathsOnly + self.includeBondOrder = includeBondOrder + self.trianglePruneBins = trianglePruneBins + if skipFeats is None: + self.skipFeats = [] else: - where = midP - break - if where < 0: - return None - whichBins[i] = where - res = scaffolds.index(tuple(whichBins)) - if _verbose: - print('----- _fBI -----------') - print(' scaffolds:', scaffolds) - print(' bins:', whichBins) - print(' res:', res) - return res + self.skipFeats = skipFeats + self._bins = None + self.sigKlass = None - def GetFeatFamilies(self): - fams = [fam for fam in self.featFactory.GetFeatureFamilies() if fam not in self.skipFeats] - fams.sort() - return fams + def SetBins(self, bins): + """ bins should be a list of 2-tuples """ + self._bins = copy.copy(bins) + self.Init() - def GetMolFeats(self, mol): - featFamilies = self.GetFeatFamilies() - featMatches = {} - for fam in featFamilies: - featMatches[fam] = [] - feats = self.featFactory.GetFeaturesForMol(mol, includeOnly=fam) - for feat in feats: - featMatches[fam].append(feat.GetAtomIds()) - return [featMatches[x] for x in featFamilies] + def GetBins(self): + return self._bins - def GetBitIdx(self, featIndices, dists, sortIndices=True): - """ returns the index for a pharmacophore described using a set of - feature indices and distances + def GetNumBins(self): + return len(self._bins) - **Arguments*** + def GetSignature(self): + return self.sigKlass(self._sigSize) - - featIndices: a sequence of feature indices + def _GetBitSummaryData(self, bitIdx): + nPts, combo, scaffold = self.GetBitInfo(bitIdx) + fams = self.GetFeatFamilies() + labels = [fams[x] for x in combo] + dMat = numpy.zeros((nPts, nPts), numpy.int) + dVect = Utils.nPointDistDict[nPts] + for idx in range(len(dVect)): + i, j = dVect[idx] + dMat[i, j] = scaffold[idx] + dMat[j, i] = scaffold[idx] - - dists: a sequence of distance between the features, only the - unique distances should be included, and they should be in the - order defined in Utils. + return nPts, combo, scaffold, labels, dMat - - sortIndices : sort the indices + def GetBitDescriptionAsText(self, bitIdx, includeBins=0, fullPage=1): + """ returns text with a description of the bit - **Returns** + **Arguments** - the integer bit index + - bitIdx: an integer bit index - """ - nPoints = len(featIndices) - if nPoints > 3: - raise NotImplementedError('>3 points not supported') - if nPoints < self.minPointCount: - raise IndexError('bad number of points') - if nPoints > self.maxPointCount: - raise IndexError('bad number of points') + - includeBins: (optional) if nonzero, information about the bins will be + included as well - # this is the start of the nPoint-point pharmacophores - startIdx = self._starts[nPoints] + - fullPage: (optional) if nonzero, html headers and footers will + be included (so as to make the output a complete page) - # - # now we need to map the pattern indices to an offset from startIdx - # - if sortIndices: - tmp = list(featIndices) - tmp.sort() - featIndices = tmp + **Returns** - if featIndices[0] < 0: - raise IndexError('bad feature index') - if max(featIndices) >= self._nFeats: - raise IndexError('bad feature index') + a string with the HTML - if nPoints == 3: - featIndices, dists = Utils.OrderTriangle(featIndices, dists) + """ + raise NotImplementedError('Missing implementation') - offset = Utils.CountUpTo(self._nFeats, nPoints, featIndices) - if _verbose: - print('offset for feature %s: %d' % (str(featIndices), offset)) - offset *= len(self._scaffolds[len(dists)]) + def GetBitDescription(self, bitIdx): + """ returns a text description of the bit - try: - if _verbose: - print('>>>>>>>>>>>>>>>>>>>>>>>') - print('\tScaffolds:', repr(self._scaffolds[len(dists)]), type(self._scaffolds[len(dists)])) - print('\tDists:', repr(dists), type(dists)) - print('\tbins:', repr(self._bins), type(self._bins)) - bin_ = self._findBinIdx(dists, self._bins, self._scaffolds[len(dists)]) - except ValueError: - fams = self.GetFeatFamilies() - fams = [fams[x] for x in featIndices] - raise IndexError('distance bin not found: feats: %s; dists=%s; bins=%s; scaffolds: %s' % - (fams, dists, self._bins, self._scaffolds)) + **Arguments** - return startIdx + offset + bin_ + - bitIdx: an integer bit index - def GetBitInfo(self, idx): - """ returns information about the given bit + **Returns** - **Arguments** + a string - - idx: the bit index to be considered + """ + nPts, combo, scaffold, labels, dMat = self._GetBitSummaryData(bitIdx) + res = " ".join(labels) + " " + for row in dMat: + res += "|" + " ".join([str(x) for x in row]) + res += "|" + return res - **Returns** + def _findBinIdx(self, dists, bins, scaffolds): + """ OBSOLETE: this has been rewritten in C++ + Internal use only + Returns the index of a bin defined by a set of distances. - a 3-tuple: + **Arguments** - 1) the number of points in the pharmacophore + - dists: a sequence of distances (not binned) - 2) the proto-pharmacophore (tuple of pattern indices) + - bins: a sorted sequence of distance bins (2-tuples) - 3) the scaffold (tuple of distance indices) + - scaffolds: a list of possible scaffolds (bin combinations) - """ - if idx >= self._sigSize: - raise IndexError('bad index (%d) queried. %d is the max' % (idx, self._sigSize)) - # first figure out how many points are in the p'cophore - nPts = self.minPointCount - while nPts < self.maxPointCount and self._starts[nPts + 1] <= idx: - nPts += 1 + **Returns** - # how far are we in from the start point? - offsetFromStart = idx - self._starts[nPts] - if _verbose: - print('\t %d Points, %d offset' % (nPts, offsetFromStart)) + an integer bin index - # lookup the number of scaffolds - nDists = len(Utils.nPointDistDict[nPts]) - scaffolds = self._scaffolds[nDists] + **Note** - nScaffolds = len(scaffolds) + the value returned here is not an index in the overall + signature. It is, rather, an offset of a scaffold in the + possible combinations of distance bins for a given + proto-pharmacophore. - # figure out to which proto-pharmacophore we belong: - protoIdx = offsetFromStart // nScaffolds - indexCombos = Utils.GetIndexCombinations(self._nFeats, nPts) - combo = tuple(indexCombos[protoIdx]) - if _verbose: - print('\t combo: %s' % (str(combo))) + """ + nDists = len(dists) + whichBins = [0] * nDists - # and which scaffold: - scaffoldIdx = offsetFromStart % nScaffolds - scaffold = scaffolds[scaffoldIdx] - if _verbose: - print('\t scaffold: %s' % (str(scaffold))) - return nPts, combo, scaffold + # This would be a ton easier if we had contiguous bins + # i.e. if we could maintain the bins as a list of bounds) + # because then we could use Python's bisect module. + # Since we can't do that, we've got to do our own binary + # search here. + for i in range(nDists): + dist = dists[i] + where = -1 - def Init(self): - """ Initializes internal parameters. This **must** be called after - making any changes to the signature parameters + # do a simple binary search: + startP, endP = 0, len(bins) + while startP < endP: + midP = (startP + endP) // 2 + begBin, endBin = bins[midP] + if dist < begBin: + endP = midP + elif dist >= endBin: + startP = midP + 1 + else: + where = midP + break + if where < 0: + return None + whichBins[i] = where + res = scaffolds.index(tuple(whichBins)) + if _verbose: + print('----- _fBI -----------') + print(' scaffolds:', scaffolds) + print(' bins:', whichBins) + print(' res:', res) + return res - """ - accum = 0 - self._scaffolds = [0] * (len(Utils.nPointDistDict[self.maxPointCount + 1])) - self._starts = {} - if not self.skipFeats: - self._nFeats = len(self.featFactory.GetFeatureFamilies()) - else: - self._nFeats = 0 - for fam in self.featFactory.GetFeatureFamilies(): - if fam not in self.skipFeats: - self._nFeats += 1 - for i in range(self.minPointCount, self.maxPointCount + 1): - self._starts[i] = accum - nDistsHere = len(Utils.nPointDistDict[i]) - scaffoldsHere = Utils.GetPossibleScaffolds(i, self._bins, - useTriangleInequality=self.trianglePruneBins) - nBitsHere = len(scaffoldsHere) - self._scaffolds[nDistsHere] = scaffoldsHere - pointsHere = Utils.NumCombinations(self._nFeats, i) * nBitsHere - accum += pointsHere - self._sigSize = accum - if not self.useCounts: - self.sigKlass = SparseBitVect - elif self._sigSize < 2**31: - self.sigKlass = IntSparseIntVect - else: - self.sigKlass = LongSparseIntVect + def GetFeatFamilies(self): + fams = [fam for fam in self.featFactory.GetFeatureFamilies() if fam not in self.skipFeats] + fams.sort() + return fams - def GetSigSize(self): - return self._sigSize + def GetMolFeats(self, mol): + featFamilies = self.GetFeatFamilies() + featMatches = {} + for fam in featFamilies: + featMatches[fam] = [] + feats = self.featFactory.GetFeaturesForMol(mol, includeOnly=fam) + for feat in feats: + featMatches[fam].append(feat.GetAtomIds()) + return [featMatches[x] for x in featFamilies] + + def GetBitIdx(self, featIndices, dists, sortIndices=True): + """ returns the index for a pharmacophore described using a set of + feature indices and distances + + **Arguments*** + + - featIndices: a sequence of feature indices + + - dists: a sequence of distance between the features, only the + unique distances should be included, and they should be in the + order defined in Utils. + + - sortIndices : sort the indices + + **Returns** + + the integer bit index + + """ + nPoints = len(featIndices) + if nPoints > 3: + raise NotImplementedError('>3 points not supported') + if nPoints < self.minPointCount: + raise IndexError('bad number of points') + if nPoints > self.maxPointCount: + raise IndexError('bad number of points') + + # this is the start of the nPoint-point pharmacophores + startIdx = self._starts[nPoints] + + # + # now we need to map the pattern indices to an offset from startIdx + # + if sortIndices: + tmp = list(featIndices) + tmp.sort() + featIndices = tmp + + if featIndices[0] < 0: + raise IndexError('bad feature index') + if max(featIndices) >= self._nFeats: + raise IndexError('bad feature index') + + if nPoints == 3: + featIndices, dists = Utils.OrderTriangle(featIndices, dists) + + offset = Utils.CountUpTo(self._nFeats, nPoints, featIndices) + if _verbose: + print('offset for feature %s: %d' % (str(featIndices), offset)) + offset *= len(self._scaffolds[len(dists)]) + + try: + if _verbose: + print('>>>>>>>>>>>>>>>>>>>>>>>') + print('\tScaffolds:', repr(self._scaffolds[len(dists)]), type( + self._scaffolds[len(dists)])) + print('\tDists:', repr(dists), type(dists)) + print('\tbins:', repr(self._bins), type(self._bins)) + bin_ = self._findBinIdx(dists, self._bins, self._scaffolds[len(dists)]) + except ValueError: + fams = self.GetFeatFamilies() + fams = [fams[x] for x in featIndices] + raise IndexError('distance bin not found: feats: %s; dists=%s; bins=%s; scaffolds: %s' % + (fams, dists, self._bins, self._scaffolds)) + + return startIdx + offset + bin_ + + def GetBitInfo(self, idx): + """ returns information about the given bit + + **Arguments** + + - idx: the bit index to be considered + + **Returns** + + a 3-tuple: + + 1) the number of points in the pharmacophore + + 2) the proto-pharmacophore (tuple of pattern indices) + + 3) the scaffold (tuple of distance indices) + + """ + if idx >= self._sigSize: + raise IndexError('bad index (%d) queried. %d is the max' % (idx, self._sigSize)) + # first figure out how many points are in the p'cophore + nPts = self.minPointCount + while nPts < self.maxPointCount and self._starts[nPts + 1] <= idx: + nPts += 1 + + # how far are we in from the start point? + offsetFromStart = idx - self._starts[nPts] + if _verbose: + print('\t %d Points, %d offset' % (nPts, offsetFromStart)) + + # lookup the number of scaffolds + nDists = len(Utils.nPointDistDict[nPts]) + scaffolds = self._scaffolds[nDists] + + nScaffolds = len(scaffolds) + + # figure out to which proto-pharmacophore we belong: + protoIdx = offsetFromStart // nScaffolds + indexCombos = Utils.GetIndexCombinations(self._nFeats, nPts) + combo = tuple(indexCombos[protoIdx]) + if _verbose: + print('\t combo: %s' % (str(combo))) + + # and which scaffold: + scaffoldIdx = offsetFromStart % nScaffolds + scaffold = scaffolds[scaffoldIdx] + if _verbose: + print('\t scaffold: %s' % (str(scaffold))) + return nPts, combo, scaffold + + def Init(self): + """ Initializes internal parameters. This **must** be called after + making any changes to the signature parameters + + """ + accum = 0 + self._scaffolds = [0] * (len(Utils.nPointDistDict[self.maxPointCount + 1])) + self._starts = {} + if not self.skipFeats: + self._nFeats = len(self.featFactory.GetFeatureFamilies()) + else: + self._nFeats = 0 + for fam in self.featFactory.GetFeatureFamilies(): + if fam not in self.skipFeats: + self._nFeats += 1 + for i in range(self.minPointCount, self.maxPointCount + 1): + self._starts[i] = accum + nDistsHere = len(Utils.nPointDistDict[i]) + scaffoldsHere = Utils.GetPossibleScaffolds(i, self._bins, + useTriangleInequality=self.trianglePruneBins) + nBitsHere = len(scaffoldsHere) + self._scaffolds[nDistsHere] = scaffoldsHere + pointsHere = Utils.NumCombinations(self._nFeats, i) * nBitsHere + accum += pointsHere + self._sigSize = accum + if not self.useCounts: + self.sigKlass = SparseBitVect + elif self._sigSize < 2**31: + self.sigKlass = IntSparseIntVect + else: + self.sigKlass = LongSparseIntVect + + def GetSigSize(self): + return self._sigSize try: - from rdkit.Chem.Pharmacophores import cUtils + from rdkit.Chem.Pharmacophores import cUtils except ImportError: - pass + pass else: - SigFactory._findBinIdx = cUtils.FindBinIdx + SigFactory._findBinIdx = cUtils.FindBinIdx diff --git a/rdkit/Chem/Pharm2D/UnitTestGobbi.py b/rdkit/Chem/Pharm2D/UnitTestGobbi.py index 21f4945d7..3ddea0286 100755 --- a/rdkit/Chem/Pharm2D/UnitTestGobbi.py +++ b/rdkit/Chem/Pharm2D/UnitTestGobbi.py @@ -10,7 +10,7 @@ """unit testing code for the signatures """ -from __future__ import print_function + import os import unittest @@ -18,164 +18,163 @@ import unittest from rdkit import Chem from rdkit import RDConfig from rdkit.Chem.Pharm2D import Gobbi_Pharm2D, Generate -from rdkit.six import next class TestCase(unittest.TestCase): - def setUp(self): - self.factory = Gobbi_Pharm2D.factory + def setUp(self): + self.factory = Gobbi_Pharm2D.factory - def test1Sigs(self): - probes = [ - ('OCCC=O', { - 'HA': (1, ((0, ), (4, ))), - 'HD': (1, ((0, ), )), - 'LH': (0, None), - 'AR': (0, None), - 'RR': (0, None), - 'X': (0, None), - 'BG': (0, None), - 'AG': (0, None), - }), - ('OCCC(=O)O', { - 'HA': (1, ((0, ), (4, ))), - 'HD': (1, ((0, ), (5, ))), - 'LH': (0, None), - 'AR': (0, None), - 'RR': (0, None), - 'X': (0, None), - 'BG': (0, None), - 'AG': (1, ((3, ), )), - }), - ('CCCN', { - 'HA': (1, ((3, ), )), - 'HD': (1, ((3, ), )), - 'LH': (0, None), - 'AR': (0, None), - 'RR': (0, None), - 'X': (0, None), - 'BG': (1, ((3, ), )), - 'AG': (0, None), - }), - ('CCCCC', { - 'HA': (0, None), - 'HD': (0, None), - 'LH': (1, ((1, ), (3, ))), - 'AR': (0, None), - 'RR': (0, None), - 'X': (0, None), - 'BG': (0, None), - 'AG': (0, None), - }), - ('CC1CCC1', { - 'HA': (0, None), - 'HD': (0, None), - 'LH': (1, ((1, ), (3, ))), - 'AR': (0, None), - 'RR': (1, ((1, ), )), - 'X': (0, None), - 'BG': (0, None), - 'AG': (0, None), - }), - ('[SiH3]C1CCC1', { - 'HA': (0, None), - 'HD': (0, None), - 'LH': (1, ((1, ), )), - 'AR': (0, None), - 'RR': (1, ((1, ), )), - 'X': (1, ((0, ), )), - 'BG': (0, None), - 'AG': (0, None), - }), - ('[SiH3]c1ccccc1', { - 'HA': (0, None), - 'HD': (0, None), - 'LH': (0, None), - 'AR': (1, ((1, ), )), - 'RR': (0, None), - 'X': (1, ((0, ), )), - 'BG': (0, None), - 'AG': (0, None), - }), - ] - for smi, d in probes: - mol = Chem.MolFromSmiles(smi) - feats = self.factory.featFactory.GetFeaturesForMol(mol) - for k in d.keys(): - shouldMatch, mapList = d[k] - feats = self.factory.featFactory.GetFeaturesForMol(mol, includeOnly=k) - if shouldMatch: - self.assertTrue(feats) - self.assertEqual(len(feats), len(mapList)) - aids = [(x.GetAtomIds()[0], ) for x in feats] - aids.sort() - self.assertEqual(tuple(aids), mapList) + def test1Sigs(self): + probes = [ + ('OCCC=O', { + 'HA': (1, ((0, ), (4, ))), + 'HD': (1, ((0, ), )), + 'LH': (0, None), + 'AR': (0, None), + 'RR': (0, None), + 'X': (0, None), + 'BG': (0, None), + 'AG': (0, None), + }), + ('OCCC(=O)O', { + 'HA': (1, ((0, ), (4, ))), + 'HD': (1, ((0, ), (5, ))), + 'LH': (0, None), + 'AR': (0, None), + 'RR': (0, None), + 'X': (0, None), + 'BG': (0, None), + 'AG': (1, ((3, ), )), + }), + ('CCCN', { + 'HA': (1, ((3, ), )), + 'HD': (1, ((3, ), )), + 'LH': (0, None), + 'AR': (0, None), + 'RR': (0, None), + 'X': (0, None), + 'BG': (1, ((3, ), )), + 'AG': (0, None), + }), + ('CCCCC', { + 'HA': (0, None), + 'HD': (0, None), + 'LH': (1, ((1, ), (3, ))), + 'AR': (0, None), + 'RR': (0, None), + 'X': (0, None), + 'BG': (0, None), + 'AG': (0, None), + }), + ('CC1CCC1', { + 'HA': (0, None), + 'HD': (0, None), + 'LH': (1, ((1, ), (3, ))), + 'AR': (0, None), + 'RR': (1, ((1, ), )), + 'X': (0, None), + 'BG': (0, None), + 'AG': (0, None), + }), + ('[SiH3]C1CCC1', { + 'HA': (0, None), + 'HD': (0, None), + 'LH': (1, ((1, ), )), + 'AR': (0, None), + 'RR': (1, ((1, ), )), + 'X': (1, ((0, ), )), + 'BG': (0, None), + 'AG': (0, None), + }), + ('[SiH3]c1ccccc1', { + 'HA': (0, None), + 'HD': (0, None), + 'LH': (0, None), + 'AR': (1, ((1, ), )), + 'RR': (0, None), + 'X': (1, ((0, ), )), + 'BG': (0, None), + 'AG': (0, None), + }), + ] + for smi, d in probes: + mol = Chem.MolFromSmiles(smi) + feats = self.factory.featFactory.GetFeaturesForMol(mol) + for k in d.keys(): + shouldMatch, mapList = d[k] + feats = self.factory.featFactory.GetFeaturesForMol(mol, includeOnly=k) + if shouldMatch: + self.assertTrue(feats) + self.assertEqual(len(feats), len(mapList)) + aids = [(x.GetAtomIds()[0], ) for x in feats] + aids.sort() + self.assertEqual(tuple(aids), mapList) - def test2Sigs(self): - probes = [('O=CCC=O', (149, )), - ('OCCC=O', (149, 156)), - ('OCCC(=O)O', (22, 29, 149, 154, 156, 184, 28822, 30134)), ] - for smi, tgt in probes: - sig = Generate.Gen2DFingerprint(Chem.MolFromSmiles(smi), self.factory) - self.assertEqual(len(sig), 39972) - bs = tuple(sig.GetOnBits()) - self.assertEqual(len(bs), len(tgt)) - self.assertEqual(bs, tgt) + def test2Sigs(self): + probes = [('O=CCC=O', (149, )), + ('OCCC=O', (149, 156)), + ('OCCC(=O)O', (22, 29, 149, 154, 156, 184, 28822, 30134)), ] + for smi, tgt in probes: + sig = Generate.Gen2DFingerprint(Chem.MolFromSmiles(smi), self.factory) + self.assertEqual(len(sig), 39972) + bs = tuple(sig.GetOnBits()) + self.assertEqual(len(bs), len(tgt)) + self.assertEqual(bs, tgt) - def testOrderBug(self): - sdFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Pharm2D', 'test_data', 'orderBug.sdf') - suppl = Chem.SDMolSupplier(sdFile) - m1 = next(suppl) - m2 = next(suppl) - sig1 = Generate.Gen2DFingerprint(m1, self.factory) - sig2 = Generate.Gen2DFingerprint(m2, self.factory) - self.assertEqual(sig1, sig2) - - def testOrderBug2(self): - from rdkit.Chem import Randomize - from rdkit import DataStructs - probes = ['Oc1nc(Oc2ncccc2)ccc1'] - for smi in probes: - m1 = Chem.MolFromSmiles(smi) - # m1.Debug() - sig1 = Generate.Gen2DFingerprint(m1, self.factory) - csmi = Chem.MolToSmiles(m1) - m2 = Chem.MolFromSmiles(csmi) - # m2.Debug() - sig2 = Generate.Gen2DFingerprint(m2, self.factory) - self.assertTrue(list(sig1.GetOnBits()) == list(sig2.GetOnBits()), '%s %s' % (smi, csmi)) - self.assertEqual(DataStructs.DiceSimilarity(sig1, sig2), 1.0) - self.assertEqual(sig1, sig2) - for _ in range(10): - m2 = Randomize.RandomizeMol(m1) + def testOrderBug(self): + sdFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Pharm2D', 'test_data', 'orderBug.sdf') + suppl = Chem.SDMolSupplier(sdFile) + m1 = next(suppl) + m2 = next(suppl) + sig1 = Generate.Gen2DFingerprint(m1, self.factory) sig2 = Generate.Gen2DFingerprint(m2, self.factory) - if sig2 != sig1: - Generate._verbose = True - print('----------------') - sig1 = Generate.Gen2DFingerprint(m1, self.factory) - print('----------------') - sig2 = Generate.Gen2DFingerprint(m2, self.factory) - print('----------------') - print(Chem.MolToMolBlock(m1)) - print('----------------') - print(Chem.MolToMolBlock(m2)) - print('----------------') - s1 = set(sig1.GetOnBits()) - s2 = set(sig2.GetOnBits()) - print(s1.difference(s2)) self.assertEqual(sig1, sig2) - def testBitInfo(self): - m = Chem.MolFromSmiles('OCC=CC(=O)O') - bi = {} - sig = Generate.Gen2DFingerprint(m, Gobbi_Pharm2D.factory, bitInfo=bi) - self.assertEqual(sig.GetNumOnBits(), len(bi)) - self.assertEqual(list(sig.GetOnBits()), sorted(bi.keys())) - self.assertEqual(sorted(bi.keys()), [23, 30, 150, 154, 157, 185, 28878, 30184]) - self.assertEqual(sorted(bi[28878]), [[(0, ), (5, ), (6, )]]) - self.assertEqual(sorted(bi[157]), [[(0, ), (6, )], [(5, ), (0, )]]) + def testOrderBug2(self): + from rdkit.Chem import Randomize + from rdkit import DataStructs + probes = ['Oc1nc(Oc2ncccc2)ccc1'] + for smi in probes: + m1 = Chem.MolFromSmiles(smi) + # m1.Debug() + sig1 = Generate.Gen2DFingerprint(m1, self.factory) + csmi = Chem.MolToSmiles(m1) + m2 = Chem.MolFromSmiles(csmi) + # m2.Debug() + sig2 = Generate.Gen2DFingerprint(m2, self.factory) + self.assertTrue(list(sig1.GetOnBits()) == list(sig2.GetOnBits()), '%s %s' % (smi, csmi)) + self.assertEqual(DataStructs.DiceSimilarity(sig1, sig2), 1.0) + self.assertEqual(sig1, sig2) + for _ in range(10): + m2 = Randomize.RandomizeMol(m1) + sig2 = Generate.Gen2DFingerprint(m2, self.factory) + if sig2 != sig1: + Generate._verbose = True + print('----------------') + sig1 = Generate.Gen2DFingerprint(m1, self.factory) + print('----------------') + sig2 = Generate.Gen2DFingerprint(m2, self.factory) + print('----------------') + print(Chem.MolToMolBlock(m1)) + print('----------------') + print(Chem.MolToMolBlock(m2)) + print('----------------') + s1 = set(sig1.GetOnBits()) + s2 = set(sig2.GetOnBits()) + print(s1.difference(s2)) + self.assertEqual(sig1, sig2) + + def testBitInfo(self): + m = Chem.MolFromSmiles('OCC=CC(=O)O') + bi = {} + sig = Generate.Gen2DFingerprint(m, Gobbi_Pharm2D.factory, bitInfo=bi) + self.assertEqual(sig.GetNumOnBits(), len(bi)) + self.assertEqual(list(sig.GetOnBits()), sorted(bi.keys())) + self.assertEqual(sorted(bi.keys()), [23, 30, 150, 154, 157, 185, 28878, 30184]) + self.assertEqual(sorted(bi[28878]), [[(0, ), (5, ), (6, )]]) + self.assertEqual(sorted(bi[157]), [[(0, ), (6, )], [(5, ), (0, )]]) if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/Chem/Pharm2D/UnitTestMatcher.py b/rdkit/Chem/Pharm2D/UnitTestMatcher.py index 932ab3fe6..fe59f01c1 100755 --- a/rdkit/Chem/Pharm2D/UnitTestMatcher.py +++ b/rdkit/Chem/Pharm2D/UnitTestMatcher.py @@ -15,74 +15,75 @@ from rdkit import RDConfig from rdkit.Chem import ChemicalFeatures from rdkit.Chem.Pharm2D import Generate, SigFactory, Matcher, Gobbi_Pharm2D from rdkit.TestRunner import redirect_stdout -from rdkit.six import StringIO +from io import StringIO class TestCase(unittest.TestCase): - def setUp(self): - fdefFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Pharm2D', 'test_data', 'BaseFeatures.fdef') - featFactory = ChemicalFeatures.BuildFeatureFactory(fdefFile) - self.factory = SigFactory.SigFactory(featFactory, minPointCount=2, maxPointCount=3) - self.factory.SetBins([(0, 2), (2, 5), (5, 8)]) - self.factory.Init() + def setUp(self): + fdefFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Pharm2D', + 'test_data', 'BaseFeatures.fdef') + featFactory = ChemicalFeatures.BuildFeatureFactory(fdefFile) + self.factory = SigFactory.SigFactory(featFactory, minPointCount=2, maxPointCount=3) + self.factory.SetBins([(0, 2), (2, 5), (5, 8)]) + self.factory.Init() - def test1_simple(self): - mol = Chem.MolFromSmiles('OCC(=O)CCCN') - self.factory.skipFeats = ['Donor'] - self.factory.Init() - self.assertEqual(self.factory.GetSigSize(), 510) - Generate._verbose = False - sig = Generate.Gen2DFingerprint(mol, self.factory) - Generate._verbose = False - tgt = (1, 2, 11, 52, 117) - onBits = sig.GetOnBits() - self.assertEqual(tuple(onBits), tgt) - self.assertEqual(len(onBits), len(tgt)) + def test1_simple(self): + mol = Chem.MolFromSmiles('OCC(=O)CCCN') + self.factory.skipFeats = ['Donor'] + self.factory.Init() + self.assertEqual(self.factory.GetSigSize(), 510) + Generate._verbose = False + sig = Generate.Gen2DFingerprint(mol, self.factory) + Generate._verbose = False + tgt = (1, 2, 11, 52, 117) + onBits = sig.GetOnBits() + self.assertEqual(tuple(onBits), tgt) + self.assertEqual(len(onBits), len(tgt)) - bitMatches = ([((0, ), (3, ))], - [((0, ), (7, )), ((3, ), (7, ))], - [((0, ), (3, ), (7, ))], ) - for i, bit in enumerate(onBits): - matches = Matcher.GetAtomsMatchingBit(self.factory, bit, mol) - # print bit,matches - # tgt = bitMatches[i] - # self.assertEqual(matches,tgt) + bitMatches = ([((0, ), (3, ))], + [((0, ), (7, )), ((3, ), (7, ))], + [((0, ), (3, ), (7, ))], ) + for i, bit in enumerate(onBits): + matches = Matcher.GetAtomsMatchingBit(self.factory, bit, mol) + # print bit,matches + # tgt = bitMatches[i] + # self.assertEqual(matches,tgt) - def test2Bug28(self): - smi = 'Cc([s]1)nnc1SCC(\CS2)=C(/C([O-])=O)N3C(=O)[C@H]([C@@H]23)NC(=O)C[n]4cnnn4' - mol = Chem.MolFromSmiles(smi) - factory = Gobbi_Pharm2D.factory - factory.SetBins([(2, 3), (3, 4), (4, 5), (5, 8), (8, 100)]) - sig = Generate.Gen2DFingerprint(mol, factory) - onBits = sig.GetOnBits() - for bit in onBits: - atoms = Matcher.GetAtomsMatchingBit(factory, bit, mol, justOne=1) - self.assertTrue(len(atoms)) + def test2Bug28(self): + smi = 'Cc([s]1)nnc1SCC(\CS2)=C(/C([O-])=O)N3C(=O)[C@H]([C@@H]23)NC(=O)C[n]4cnnn4' + mol = Chem.MolFromSmiles(smi) + factory = Gobbi_Pharm2D.factory + factory.SetBins([(2, 3), (3, 4), (4, 5), (5, 8), (8, 100)]) + sig = Generate.Gen2DFingerprint(mol, factory) + onBits = sig.GetOnBits() + for bit in onBits: + atoms = Matcher.GetAtomsMatchingBit(factory, bit, mol, justOne=1) + self.assertTrue(len(atoms)) - def test3Roundtrip(self): - # longer-running Bug 28 test - nToDo = 20 - with open(os.path.join(RDConfig.RDDataDir, 'NCI', 'first_5K.smi'), 'r') as inF: - inD = inF.readlines()[:nToDo] - factory = Gobbi_Pharm2D.factory - factory.SetBins([(2, 3), (3, 4), (4, 5), (5, 8), (8, 100)]) - for line in inD: - smi = line.split('\t')[0] - mol = Chem.MolFromSmiles(smi) - sig = Generate.Gen2DFingerprint(mol, factory) - onBits = sig.GetOnBits() - for bit in onBits: - atoms = Matcher.GetAtomsMatchingBit(factory, bit, mol, justOne=1) - assert len(atoms), 'bit %d failed to match for smi %s' % (bit, smi) + def test3Roundtrip(self): + # longer-running Bug 28 test + nToDo = 20 + with open(os.path.join(RDConfig.RDDataDir, 'NCI', 'first_5K.smi'), 'r') as inF: + inD = inF.readlines()[:nToDo] + factory = Gobbi_Pharm2D.factory + factory.SetBins([(2, 3), (3, 4), (4, 5), (5, 8), (8, 100)]) + for line in inD: + smi = line.split('\t')[0] + mol = Chem.MolFromSmiles(smi) + sig = Generate.Gen2DFingerprint(mol, factory) + onBits = sig.GetOnBits() + for bit in onBits: + atoms = Matcher.GetAtomsMatchingBit(factory, bit, mol, justOne=1) + assert len(atoms), 'bit %d failed to match for smi %s' % (bit, smi) - def test_exampleCode(self): - # We make sure that the example code runs - f = StringIO() - with redirect_stdout(f): - Matcher._exampleCode() - self.assertIn('finished', f.getvalue()) + def test_exampleCode(self): + # We make sure that the example code runs + f = StringIO() + with redirect_stdout(f): + Matcher._exampleCode() + self.assertIn('finished', f.getvalue()) if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/Chem/Pharm2D/Utils.py b/rdkit/Chem/Pharm2D/Utils.py index f7083e7be..b9a5000db 100755 --- a/rdkit/Chem/Pharm2D/Utils.py +++ b/rdkit/Chem/Pharm2D/Utils.py @@ -16,8 +16,6 @@ numbering """ -from __future__ import print_function, division - import itertools # @@ -58,92 +56,92 @@ _trianglesInPharmacophore = {} def GetTriangles(nPts): - """ returns a tuple with the distance indices for - triangles composing an nPts-pharmacophore + """ returns a tuple with the distance indices for + triangles composing an nPts-pharmacophore - """ - global _trianglesInPharmacophore - if nPts < 3: - return [] - res = _trianglesInPharmacophore.get(nPts, []) - if not res: - idx1, idx2, idx3 = (0, 1, nPts - 1) - while idx1 < nPts - 2: - res.append((idx1, idx2, idx3)) - idx1 += 1 - idx2 += 1 - idx3 += 1 - res = tuple(res) - _trianglesInPharmacophore[nPts] = res - return res + """ + global _trianglesInPharmacophore + if nPts < 3: + return [] + res = _trianglesInPharmacophore.get(nPts, []) + if not res: + idx1, idx2, idx3 = (0, 1, nPts - 1) + while idx1 < nPts - 2: + res.append((idx1, idx2, idx3)) + idx1 += 1 + idx2 += 1 + idx3 += 1 + res = tuple(res) + _trianglesInPharmacophore[nPts] = res + return res def _fact(x): - if x <= 1: - return 1 + if x <= 1: + return 1 - accum = 1 - for i in range(x): - accum *= i + 1 - return accum + accum = 1 + for i in range(x): + accum *= i + 1 + return accum def BinsTriangleInequality(d1, d2, d3): - """ checks the triangle inequality for combinations - of distance bins. + """ checks the triangle inequality for combinations + of distance bins. - the general triangle inequality is: - d1 + d2 >= d3 - the conservative binned form of this is: - d1(upper) + d2(upper) >= d3(lower) + the general triangle inequality is: + d1 + d2 >= d3 + the conservative binned form of this is: + d1(upper) + d2(upper) >= d3(lower) - """ - if d1[1] + d2[1] < d3[0]: - return False - if d2[1] + d3[1] < d1[0]: - return False - if d3[1] + d1[1] < d2[0]: - return False + """ + if d1[1] + d2[1] < d3[0]: + return False + if d2[1] + d3[1] < d1[0]: + return False + if d3[1] + d1[1] < d2[0]: + return False - return True + return True def ScaffoldPasses(combo, bins=None): - """ checks the scaffold passed in to see if all - contributing triangles can satisfy the triangle inequality + """ checks the scaffold passed in to see if all + contributing triangles can satisfy the triangle inequality - the scaffold itself (encoded in combo) is a list of binned distances + the scaffold itself (encoded in combo) is a list of binned distances - """ - # this is the number of points in the pharmacophore - nPts = nDistPointDict[len(combo)] - tris = GetTriangles(nPts) - for tri in tris: - ds = [bins[combo[x]] for x in tri] - if not BinsTriangleInequality(ds[0], ds[1], ds[2]): - return False - return True + """ + # this is the number of points in the pharmacophore + nPts = nDistPointDict[len(combo)] + tris = GetTriangles(nPts) + for tri in tris: + ds = [bins[combo[x]] for x in tri] + if not BinsTriangleInequality(ds[0], ds[1], ds[2]): + return False + return True _numCombDict = {} def NumCombinations(nItems, nSlots): - """ returns the number of ways to fit nItems into nSlots + """ returns the number of ways to fit nItems into nSlots - We assume that (x,y) and (y,x) are equivalent, and - (x,x) is allowed. + We assume that (x,y) and (y,x) are equivalent, and + (x,x) is allowed. - General formula is, for N items and S slots: - res = (N+S-1)! / ( (N-1)! * S! ) + General formula is, for N items and S slots: + res = (N+S-1)! / ( (N-1)! * S! ) - """ - global _numCombDict - res = _numCombDict.get((nItems, nSlots), -1) - if res == -1: - res = _fact(nItems + nSlots - 1) // (_fact(nItems - 1) * _fact(nSlots)) - _numCombDict[(nItems, nSlots)] = res - return res + """ + global _numCombDict + res = _numCombDict.get((nItems, nSlots), -1) + if res == -1: + res = _fact(nItems + nSlots - 1) // (_fact(nItems - 1) * _fact(nSlots)) + _numCombDict[(nItems, nSlots)] = res + return res _verbose = 0 @@ -152,307 +150,308 @@ _countCache = {} def CountUpTo(nItems, nSlots, vs, idx=0, startAt=0): - """ Figures out where a given combination of indices would - occur in the combinatorial explosion generated by _GetIndexCombinations_ + """ Figures out where a given combination of indices would + occur in the combinatorial explosion generated by _GetIndexCombinations_ - **Arguments** + **Arguments** - - nItems: the number of items to distribute + - nItems: the number of items to distribute - - nSlots: the number of slots in which to distribute them + - nSlots: the number of slots in which to distribute them - - vs: a sequence containing the values to find + - vs: a sequence containing the values to find - - idx: used in the recursion + - idx: used in the recursion - - startAt: used in the recursion + - startAt: used in the recursion - **Returns** + **Returns** - an integer + an integer - """ - global _countCache - if _verbose: - print(' ' * idx, 'CountUpTo(%d)' % idx, vs[idx], startAt) - if idx == 0 and (nItems, nSlots, tuple(vs)) in _countCache: - return _countCache[(nItems, nSlots, tuple(vs))] - elif idx >= nSlots: - accum = 0 - elif idx == nSlots - 1: - accum = vs[idx] - startAt - else: - accum = 0 - # get the digit at idx correct - for i in range(startAt, vs[idx]): - nLevsUnder = nSlots - idx - 1 - nValsOver = nItems - i - if _verbose: - print(' ' * idx, ' ', i, nValsOver, nLevsUnder, NumCombinations(nValsOver, nLevsUnder)) - accum += NumCombinations(nValsOver, nLevsUnder) - accum += CountUpTo(nItems, nSlots, vs, idx + 1, vs[idx]) - if _verbose: - print(' ' * idx, '>', accum) - if idx == 0: - _countCache[(nItems, nSlots, tuple(vs))] = accum - return accum + """ + global _countCache + if _verbose: + print(' ' * idx, 'CountUpTo(%d)' % idx, vs[idx], startAt) + if idx == 0 and (nItems, nSlots, tuple(vs)) in _countCache: + return _countCache[(nItems, nSlots, tuple(vs))] + elif idx >= nSlots: + accum = 0 + elif idx == nSlots - 1: + accum = vs[idx] - startAt + else: + accum = 0 + # get the digit at idx correct + for i in range(startAt, vs[idx]): + nLevsUnder = nSlots - idx - 1 + nValsOver = nItems - i + if _verbose: + print(' ' * idx, ' ', i, nValsOver, nLevsUnder, + NumCombinations(nValsOver, nLevsUnder)) + accum += NumCombinations(nValsOver, nLevsUnder) + accum += CountUpTo(nItems, nSlots, vs, idx + 1, vs[idx]) + if _verbose: + print(' ' * idx, '>', accum) + if idx == 0: + _countCache[(nItems, nSlots, tuple(vs))] = accum + return accum _indexCombinations = {} def GetIndexCombinations(nItems, nSlots, slot=0, lastItemVal=0): - """ Generates all combinations of nItems in nSlots without including - duplicates + """ Generates all combinations of nItems in nSlots without including + duplicates - **Arguments** + **Arguments** - - nItems: the number of items to distribute + - nItems: the number of items to distribute - - nSlots: the number of slots in which to distribute them + - nSlots: the number of slots in which to distribute them - - slot: used in recursion + - slot: used in recursion - - lastItemVal: used in recursion - - **Returns** - - a list of lists - - """ - global _indexCombinations - if not slot and (nItems, nSlots) in _indexCombinations: - res = _indexCombinations[(nItems, nSlots)] - elif slot >= nSlots: - res = [] - elif slot == nSlots - 1: - res = [[x] for x in range(lastItemVal, nItems)] - else: - res = [] - for x in range(lastItemVal, nItems): - tmp = GetIndexCombinations(nItems, nSlots, slot + 1, x) - for entry in tmp: - res.append([x] + entry) - if not slot: - _indexCombinations[(nItems, nSlots)] = res - return res - - -def GetAllCombinations(choices, noDups=1, which=0): - """ Does the combinatorial explosion of the possible combinations - of the elements of _choices_. - - **Arguments** - - - choices: sequence of sequences with the elements to be enumerated - - - noDups: (optional) if this is nonzero, results with duplicates, - e.g. (1,1,0), will not be generated - - - which: used in recursion - - **Returns** - - a list of lists - - >>> GetAllCombinations([(0,),(1,),(2,)]) - [[0, 1, 2]] - >>> GetAllCombinations([(0,),(1,3),(2,)]) - [[0, 1, 2], [0, 3, 2]] - - >>> GetAllCombinations([(0,1),(1,3),(2,)]) - [[0, 1, 2], [0, 3, 2], [1, 3, 2]] - - """ - if which >= len(choices): - res = [] - elif which == len(choices) - 1: - res = [[x] for x in choices[which]] - else: - res = [] - tmp = GetAllCombinations(choices, noDups=noDups, which=which + 1) - for thing in choices[which]: - for other in tmp: - if not noDups or thing not in other: - res.append([thing] + other) - return res - - -def GetUniqueCombinations(choices, classes, which=0): - """ Does the combinatorial explosion of the possible combinations - of the elements of _choices_. - - """ - # print(choices, classes) - assert len(choices) == len(classes) - if which >= len(choices): - res = [] - elif which == len(choices) - 1: - res = [[(classes[which], x)] for x in choices[which]] - else: - res = [] - tmp = GetUniqueCombinations(choices, classes, which=which + 1) - for thing in choices[which]: - for other in tmp: - idxThere = 0 - for x in other: - if x[1] == thing: - idxThere += 1 - if not idxThere: - newL = [(classes[which], thing)] + other - newL.sort() - if newL not in res: - res.append(newL) - return res - - -def GetUniqueCombinations_new(choices, classes, which=0): - """ Does the combinatorial explosion of the possible combinations - of the elements of _choices_. - - """ - # print(choices, classes) - assert len(choices) == len(classes) - combos = set() - for choice in itertools.product(*choices): - # If a choice occurs in more than one of the fields, we ignore this case - if len(set(choice)) != len(choice): - continue - combos.add(tuple(sorted((cls, ch) for cls, ch in zip(classes, choice)))) - return [list(combo) for combo in sorted(combos)] - - -def UniquifyCombinations(combos): - """ uniquifies the combinations in the argument - - **Arguments**: - - - combos: a sequence of sequences + - lastItemVal: used in recursion **Returns** - - a list of tuples containing the unique combos + a list of lists - """ - resD = {} - for combo in combos: - k = combo[:] - k.sort() - resD[tuple(k)] = tuple(combo) - return list(resD.values()) + """ + global _indexCombinations + if not slot and (nItems, nSlots) in _indexCombinations: + res = _indexCombinations[(nItems, nSlots)] + elif slot >= nSlots: + res = [] + elif slot == nSlots - 1: + res = [[x] for x in range(lastItemVal, nItems)] + else: + res = [] + for x in range(lastItemVal, nItems): + tmp = GetIndexCombinations(nItems, nSlots, slot + 1, x) + for entry in tmp: + res.append([x] + entry) + if not slot: + _indexCombinations[(nItems, nSlots)] = res + return res + + +def GetAllCombinations(choices, noDups=1, which=0): + """ Does the combinatorial explosion of the possible combinations + of the elements of _choices_. + + **Arguments** + + - choices: sequence of sequences with the elements to be enumerated + + - noDups: (optional) if this is nonzero, results with duplicates, + e.g. (1,1,0), will not be generated + + - which: used in recursion + + **Returns** + + a list of lists + + >>> GetAllCombinations([(0,),(1,),(2,)]) + [[0, 1, 2]] + >>> GetAllCombinations([(0,),(1,3),(2,)]) + [[0, 1, 2], [0, 3, 2]] + + >>> GetAllCombinations([(0,1),(1,3),(2,)]) + [[0, 1, 2], [0, 3, 2], [1, 3, 2]] + + """ + if which >= len(choices): + res = [] + elif which == len(choices) - 1: + res = [[x] for x in choices[which]] + else: + res = [] + tmp = GetAllCombinations(choices, noDups=noDups, which=which + 1) + for thing in choices[which]: + for other in tmp: + if not noDups or thing not in other: + res.append([thing] + other) + return res + + +def GetUniqueCombinations(choices, classes, which=0): + """ Does the combinatorial explosion of the possible combinations + of the elements of _choices_. + + """ + # print(choices, classes) + assert len(choices) == len(classes) + if which >= len(choices): + res = [] + elif which == len(choices) - 1: + res = [[(classes[which], x)] for x in choices[which]] + else: + res = [] + tmp = GetUniqueCombinations(choices, classes, which=which + 1) + for thing in choices[which]: + for other in tmp: + idxThere = 0 + for x in other: + if x[1] == thing: + idxThere += 1 + if not idxThere: + newL = [(classes[which], thing)] + other + newL.sort() + if newL not in res: + res.append(newL) + return res + + +def GetUniqueCombinations_new(choices, classes, which=0): + """ Does the combinatorial explosion of the possible combinations + of the elements of _choices_. + + """ + # print(choices, classes) + assert len(choices) == len(classes) + combos = set() + for choice in itertools.product(*choices): + # If a choice occurs in more than one of the fields, we ignore this case + if len(set(choice)) != len(choice): + continue + combos.add(tuple(sorted((cls, ch) for cls, ch in zip(classes, choice)))) + return [list(combo) for combo in sorted(combos)] + + +def UniquifyCombinations(combos): + """ uniquifies the combinations in the argument + + **Arguments**: + + - combos: a sequence of sequences + + **Returns** + + - a list of tuples containing the unique combos + + """ + resD = {} + for combo in combos: + k = combo[:] + k.sort() + resD[tuple(k)] = tuple(combo) + return list(resD.values()) def GetPossibleScaffolds(nPts, bins, useTriangleInequality=True): - """ gets all realizable scaffolds (passing the triangle inequality) with the - given number of points and returns them as a list of tuples + """ gets all realizable scaffolds (passing the triangle inequality) with the + given number of points and returns them as a list of tuples - """ - if nPts < 2: - res = 0 - elif nPts == 2: - res = [(x, ) for x in range(len(bins))] - else: - nDists = len(nPointDistDict[nPts]) - combos = GetAllCombinations([range(len(bins))] * nDists, noDups=0) - res = [] - for combo in combos: - if not useTriangleInequality or ScaffoldPasses(combo, bins): - res.append(tuple(combo)) - return res + """ + if nPts < 2: + res = 0 + elif nPts == 2: + res = [(x, ) for x in range(len(bins))] + else: + nDists = len(nPointDistDict[nPts]) + combos = GetAllCombinations([range(len(bins))] * nDists, noDups=0) + res = [] + for combo in combos: + if not useTriangleInequality or ScaffoldPasses(combo, bins): + res.append(tuple(combo)) + return res def OrderTriangle(featIndices, dists): - """ - put the distances for a triangle into canonical order + """ + put the distances for a triangle into canonical order - It's easy if the features are all different: - >>> OrderTriangle([0,2,4],[1,2,3]) - ([0, 2, 4], [1, 2, 3]) + It's easy if the features are all different: + >>> OrderTriangle([0,2,4],[1,2,3]) + ([0, 2, 4], [1, 2, 3]) - It's trickiest if they are all the same: - >>> OrderTriangle([0,0,0],[1,2,3]) - ([0, 0, 0], [3, 2, 1]) - >>> OrderTriangle([0,0,0],[2,1,3]) - ([0, 0, 0], [3, 2, 1]) - >>> OrderTriangle([0,0,0],[1,3,2]) - ([0, 0, 0], [3, 2, 1]) - >>> OrderTriangle([0,0,0],[3,1,2]) - ([0, 0, 0], [3, 2, 1]) - >>> OrderTriangle([0,0,0],[3,2,1]) - ([0, 0, 0], [3, 2, 1]) + It's trickiest if they are all the same: + >>> OrderTriangle([0,0,0],[1,2,3]) + ([0, 0, 0], [3, 2, 1]) + >>> OrderTriangle([0,0,0],[2,1,3]) + ([0, 0, 0], [3, 2, 1]) + >>> OrderTriangle([0,0,0],[1,3,2]) + ([0, 0, 0], [3, 2, 1]) + >>> OrderTriangle([0,0,0],[3,1,2]) + ([0, 0, 0], [3, 2, 1]) + >>> OrderTriangle([0,0,0],[3,2,1]) + ([0, 0, 0], [3, 2, 1]) - >>> OrderTriangle([0,0,1],[3,2,1]) - ([0, 0, 1], [3, 2, 1]) - >>> OrderTriangle([0,0,1],[1,3,2]) - ([0, 0, 1], [1, 3, 2]) - >>> OrderTriangle([0,0,1],[1,2,3]) - ([0, 0, 1], [1, 3, 2]) - >>> OrderTriangle([0,0,1],[1,3,2]) - ([0, 0, 1], [1, 3, 2]) + >>> OrderTriangle([0,0,1],[3,2,1]) + ([0, 0, 1], [3, 2, 1]) + >>> OrderTriangle([0,0,1],[1,3,2]) + ([0, 0, 1], [1, 3, 2]) + >>> OrderTriangle([0,0,1],[1,2,3]) + ([0, 0, 1], [1, 3, 2]) + >>> OrderTriangle([0,0,1],[1,3,2]) + ([0, 0, 1], [1, 3, 2]) - """ - if len(featIndices) != 3: - raise ValueError('bad indices') - if len(dists) != 3: - raise ValueError('bad dists') + """ + if len(featIndices) != 3: + raise ValueError('bad indices') + if len(dists) != 3: + raise ValueError('bad dists') - fs = set(featIndices) - if len(fs) == 3: - return featIndices, dists + fs = set(featIndices) + if len(fs) == 3: + return featIndices, dists - dSums = [0] * 3 - dSums[0] = dists[0] + dists[1] - dSums[1] = dists[0] + dists[2] - dSums[2] = dists[1] + dists[2] - mD = max(dSums) - if len(fs) == 1: - if dSums[0] == mD: - if dists[0] > dists[1]: - ireorder = (0, 1, 2) - dreorder = (0, 1, 2) - else: - ireorder = (0, 2, 1) - dreorder = (1, 0, 2) - elif dSums[1] == mD: - if dists[0] > dists[2]: - ireorder = (1, 0, 2) - dreorder = (0, 2, 1) - else: - ireorder = (1, 2, 0) - dreorder = (2, 0, 1) + dSums = [0] * 3 + dSums[0] = dists[0] + dists[1] + dSums[1] = dists[0] + dists[2] + dSums[2] = dists[1] + dists[2] + mD = max(dSums) + if len(fs) == 1: + if dSums[0] == mD: + if dists[0] > dists[1]: + ireorder = (0, 1, 2) + dreorder = (0, 1, 2) + else: + ireorder = (0, 2, 1) + dreorder = (1, 0, 2) + elif dSums[1] == mD: + if dists[0] > dists[2]: + ireorder = (1, 0, 2) + dreorder = (0, 2, 1) + else: + ireorder = (1, 2, 0) + dreorder = (2, 0, 1) + else: + if dists[1] > dists[2]: + ireorder = (2, 0, 1) + dreorder = (1, 2, 0) + else: + ireorder = (2, 1, 0) + dreorder = (2, 1, 0) else: - if dists[1] > dists[2]: - ireorder = (2, 0, 1) - dreorder = (1, 2, 0) - else: - ireorder = (2, 1, 0) - dreorder = (2, 1, 0) - else: - # two classes - if featIndices[0] == featIndices[1]: - if dists[1] > dists[2]: - ireorder = (0, 1, 2) - dreorder = (0, 1, 2) - else: - ireorder = (1, 0, 2) - dreorder = (0, 2, 1) - elif featIndices[0] == featIndices[2]: - if dists[0] > dists[2]: - ireorder = (0, 1, 2) - dreorder = (0, 1, 2) - else: - ireorder = (2, 1, 0) - dreorder = (2, 1, 0) - else: # featIndices[1]==featIndices[2]: - if dists[0] > dists[1]: - ireorder = (0, 1, 2) - dreorder = (0, 1, 2) - else: - ireorder = (0, 2, 1) - dreorder = (1, 0, 2) - dists = [dists[x] for x in dreorder] - featIndices = [featIndices[x] for x in ireorder] - return featIndices, dists + # two classes + if featIndices[0] == featIndices[1]: + if dists[1] > dists[2]: + ireorder = (0, 1, 2) + dreorder = (0, 1, 2) + else: + ireorder = (1, 0, 2) + dreorder = (0, 2, 1) + elif featIndices[0] == featIndices[2]: + if dists[0] > dists[2]: + ireorder = (0, 1, 2) + dreorder = (0, 1, 2) + else: + ireorder = (2, 1, 0) + dreorder = (2, 1, 0) + else: # featIndices[1]==featIndices[2]: + if dists[0] > dists[1]: + ireorder = (0, 1, 2) + dreorder = (0, 1, 2) + else: + ireorder = (0, 2, 1) + dreorder = (1, 0, 2) + dists = [dists[x] for x in dreorder] + featIndices = [featIndices[x] for x in ireorder] + return featIndices, dists # ------------------------------------ @@ -460,11 +459,11 @@ def OrderTriangle(featIndices, dists): # doctest boilerplate # def _runDoctests(verbose=None): # pragma: nocover - import sys - import doctest - failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) - sys.exit(failed) + import sys + import doctest + failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) + sys.exit(failed) if __name__ == '__main__': # pragma: nocover - _runDoctests() + _runDoctests() diff --git a/rdkit/Chem/Pharm3D/EmbedLib.py b/rdkit/Chem/Pharm3D/EmbedLib.py index 828d85ed2..bcd2db96d 100644 --- a/rdkit/Chem/Pharm3D/EmbedLib.py +++ b/rdkit/Chem/Pharm3D/EmbedLib.py @@ -7,7 +7,7 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -from __future__ import print_function + import math import sys diff --git a/rdkit/Chem/Pharm3D/UnitTestEmbed.py b/rdkit/Chem/Pharm3D/UnitTestEmbed.py index c485237e2..19c7f3d98 100644 --- a/rdkit/Chem/Pharm3D/UnitTestEmbed.py +++ b/rdkit/Chem/Pharm3D/UnitTestEmbed.py @@ -7,7 +7,7 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -from __future__ import print_function + import doctest import gzip @@ -22,26 +22,25 @@ from rdkit import RDConfig from rdkit.Chem import ChemicalFeatures, rdDistGeom from rdkit.Chem.Pharm3D import EmbedLib from rdkit.Chem.Pharm3D import Pharmacophore -from rdkit.six import PY3 -from rdkit.six.moves import cPickle +import pickle def feq(n1, n2, tol=1e-5): - return abs(n1 - n2) <= tol + return abs(n1 - n2) <= tol def load_tests(loader, tests, ignore): - """ Add the Doctests from the module """ - tests.addTests( - doctest.DocTestSuite(EmbedLib, optionflags=doctest.ELLIPSIS + doctest.NORMALIZE_WHITESPACE)) - return tests + """ Add the Doctests from the module """ + tests.addTests( + doctest.DocTestSuite(EmbedLib, optionflags=doctest.ELLIPSIS + doctest.NORMALIZE_WHITESPACE)) + return tests class TestCase(unittest.TestCase): - def setUp(self): - self.dataDir = os.path.join(RDConfig.RDCodeDir, 'Chem/Pharm3D/test_data') - self.fdefBlock = """ + def setUp(self): + self.dataDir = os.path.join(RDConfig.RDCodeDir, 'Chem/Pharm3D/test_data') + self.fdefBlock = """ DefineFeature HAcceptor1 [N,O;H0] Family HBondAcceptor Weights 1.0 @@ -55,210 +54,210 @@ class TestCase(unittest.TestCase): Weights 1.,1.,1.,1.,1.,1. EndFeature\n""" - self.featFactory = ChemicalFeatures.BuildFeatureFactoryFromString(self.fdefBlock) - self.feats = [ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', - Geometry.Point3D(0.0, 0.0, 0.0)), - ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', - Geometry.Point3D(2.65, 0.0, 0.0)), - ChemicalFeatures.FreeChemicalFeature('Aromatic', 'Aromatic1', - Geometry.Point3D(5.12, 0.908, 0.0)), ] - self.pcophore = Pharmacophore.Pharmacophore(self.feats) - self.pcophore.setLowerBound(0, 1, 2.0) - self.pcophore.setUpperBound(0, 1, 3.3) - self.pcophore.setLowerBound(0, 2, 5.0) - self.pcophore.setUpperBound(0, 2, 5.4) - self.pcophore.setLowerBound(1, 2, 2.6) - self.pcophore.setUpperBound(1, 2, 3.0) + self.featFactory = ChemicalFeatures.BuildFeatureFactoryFromString(self.fdefBlock) + self.feats = [ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', + Geometry.Point3D(0.0, 0.0, 0.0)), + ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', + Geometry.Point3D(2.65, 0.0, 0.0)), + ChemicalFeatures.FreeChemicalFeature('Aromatic', 'Aromatic1', + Geometry.Point3D(5.12, 0.908, 0.0)), ] + self.pcophore = Pharmacophore.Pharmacophore(self.feats) + self.pcophore.setLowerBound(0, 1, 2.0) + self.pcophore.setUpperBound(0, 1, 3.3) + self.pcophore.setLowerBound(0, 2, 5.0) + self.pcophore.setUpperBound(0, 2, 5.4) + self.pcophore.setLowerBound(1, 2, 2.6) + self.pcophore.setUpperBound(1, 2, 3.0) - def _matchMol(self, tpl, pcophore, featFactory, downSample): - _, molPkl, boundsMat = tpl - mol = Chem.Mol(molPkl) - matched, matches = EmbedLib.MatchPharmacophoreToMol(mol, featFactory, pcophore) - if matched: - r = EmbedLib.MatchPharmacophore(matches, boundsMat, pcophore, useDownsampling=downSample) - if r[0]: - return 0 - else: - return 1 - else: - return 0 + def _matchMol(self, tpl, pcophore, featFactory, downSample): + _, molPkl, boundsMat = tpl + mol = Chem.Mol(molPkl) + matched, matches = EmbedLib.MatchPharmacophoreToMol(mol, featFactory, pcophore) + if matched: + r = EmbedLib.MatchPharmacophore( + matches, boundsMat, pcophore, useDownsampling=downSample) + if r[0]: + return 0 + else: + return 1 + else: + return 0 - def test1SearchFullMat(self): - inF = gzip.open(os.path.join(self.dataDir, 'cdk2-syn-clip100.pkl.gz'), 'rb') - # outF = gzip.open(os.path.join(self.dataDir,'cdk2-syn-clip100.pkl.new.gz'),'wb+') - nDone = 0 - nHits = 0 - while 1: - try: - tpl = cPickle.load(inF, encoding='latin1') - if PY3: - tpl = tpl[0], tpl[1].encode('latin1'), tpl[2] - # tpl=tpl[0],tpl[1],numpy.array(tpl[2]) - # cPickle.dump(tpl,outF) - except Exception: - break - if self._matchMol(tpl, self.pcophore, self.featFactory, 0): - nHits += 1 - nDone += 1 - self.assertEqual(nDone, 100) - # print 'nHits:',nHits - self.assertEqual(nHits, 47) + def test1SearchFullMat(self): + inF = gzip.open(os.path.join(self.dataDir, 'cdk2-syn-clip100.pkl.gz'), 'rb') + # outF = gzip.open(os.path.join(self.dataDir,'cdk2-syn-clip100.pkl.new.gz'),'wb+') + nDone = 0 + nHits = 0 + while 1: + try: + tpl = pickle.load(inF, encoding='latin1') + tpl = tpl[0], tpl[1].encode('latin1'), tpl[2] + # tpl=tpl[0],tpl[1],numpy.array(tpl[2]) + # pickle.dump(tpl,outF) + except Exception: + break + if self._matchMol(tpl, self.pcophore, self.featFactory, 0): + nHits += 1 + nDone += 1 + self.assertEqual(nDone, 100) + # print 'nHits:',nHits + self.assertEqual(nHits, 47) - def test2SearchDownsample(self): - inF = gzip.open(os.path.join(self.dataDir, 'cdk2-syn-clip100.pkl.gz'), 'rb') - nDone = 0 - nHits = 0 - while 1: - try: - tpl = cPickle.load(inF, encoding='latin1') - if PY3: - tpl = tpl[0], tpl[1].encode('latin1'), tpl[2] - except Exception: - break - if self._matchMol(tpl, self.pcophore, self.featFactory, 1): - nHits += 1 - nDone += 1 - self.assertEqual(nDone, 100) - # print 'nHits:',nHits - self.assertEqual(nHits, 47) + def test2SearchDownsample(self): + inF = gzip.open(os.path.join(self.dataDir, 'cdk2-syn-clip100.pkl.gz'), 'rb') + nDone = 0 + nHits = 0 + while 1: + try: + tpl = pickle.load(inF, encoding='latin1') + tpl = tpl[0], tpl[1].encode('latin1'), tpl[2] + except Exception: + break + if self._matchMol(tpl, self.pcophore, self.featFactory, 1): + nHits += 1 + nDone += 1 + self.assertEqual(nDone, 100) + # print 'nHits:',nHits + self.assertEqual(nHits, 47) - def test3Embed(self): - testResults = { - 'mol_197': (218.80, 35.75, 110.33, 11.58, 109.66, 11.09, 90.35, 2.95, 0.00), - 'mol_223': (259.19, 6.27, 134.13, 1.12, 134.06, 1.12, 85.74, 0.61, 0.00), - 'mol_269': (204.51, 7.89, 103.89, 1.20, 102.66, 1.20, 88.07, 1.21, 6.00), - } - inF = gzip.open(os.path.join(self.dataDir, 'cdk2-syn-clip100.pkl.gz'), 'rb') - nDone = 0 - nHits = 0 - while 1: - try: - name, molPkl, _ = cPickle.load(inF, encoding='latin1') - if PY3: - molPkl = bytes(molPkl, encoding='latin1') - except Exception: - break + def test3Embed(self): + testResults = { + 'mol_197': (218.80, 35.75, 110.33, 11.58, 109.66, 11.09, 90.35, 2.95, 0.00), + 'mol_223': (259.19, 6.27, 134.13, 1.12, 134.06, 1.12, 85.74, 0.61, 0.00), + 'mol_269': (204.51, 7.89, 103.89, 1.20, 102.66, 1.20, 88.07, 1.21, 6.00), + } + inF = gzip.open(os.path.join(self.dataDir, 'cdk2-syn-clip100.pkl.gz'), 'rb') + nDone = 0 + nHits = 0 + while 1: + try: + name, molPkl, _ = pickle.load(inF, encoding='latin1') + molPkl = bytes(molPkl, encoding='latin1') + except Exception: + break - nDone += 1 + nDone += 1 - mol = Chem.Mol(molPkl) - nboundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol) - DG.DoTriangleSmoothing(nboundsMat) - matched, matches = EmbedLib.MatchPharmacophoreToMol(mol, self.featFactory, self.pcophore) - if matched: - failed, _, match, stats = EmbedLib.MatchPharmacophore(matches, nboundsMat, self.pcophore, - useDownsampling=1) - if not failed: - nHits += 1 + mol = Chem.Mol(molPkl) + nboundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol) + DG.DoTriangleSmoothing(nboundsMat) + matched, matches = EmbedLib.MatchPharmacophoreToMol( + mol, self.featFactory, self.pcophore) + if matched: + failed, _, match, stats = EmbedLib.MatchPharmacophore(matches, nboundsMat, self.pcophore, + useDownsampling=1) + if not failed: + nHits += 1 - if name in testResults: - stats = EmbedLib.EmbedOne(mol, name, match, self.pcophore, count=10, silent=1, - randomSeed=23) - tgt = testResults[name] - self.assertEqual(len(tgt), len(stats)) - # print(name) - # print(','.join(['%.2f' % x for x in stats])) - # we'll use different tolerances for the different values: - self.assertTrue(feq(tgt[0], stats[0], 5.0), (tgt[0], stats[0])) - for i in range(2, len(tgt)): - self.assertTrue(feq(tgt[i], stats[i], 5.0), (tgt[i], stats[i])) + if name in testResults: + stats = EmbedLib.EmbedOne(mol, name, match, self.pcophore, count=10, silent=1, + randomSeed=23) + tgt = testResults[name] + self.assertEqual(len(tgt), len(stats)) + # print(name) + # print(','.join(['%.2f' % x for x in stats])) + # we'll use different tolerances for the different values: + self.assertTrue(feq(tgt[0], stats[0], 5.0), (tgt[0], stats[0])) + for i in range(2, len(tgt)): + self.assertTrue(feq(tgt[i], stats[i], 5.0), (tgt[i], stats[i])) - self.assertEqual(nDone, 100) - # print 'nHits:',nHits - self.assertEqual(nHits, 50) + self.assertEqual(nDone, 100) + # print 'nHits:',nHits + self.assertEqual(nHits, 50) - def test4Search(self): - featFactory = ChemicalFeatures.BuildFeatureFactory( - os.path.join(self.dataDir, 'BaseFeatures.fdef')) + def test4Search(self): + featFactory = ChemicalFeatures.BuildFeatureFactory( + os.path.join(self.dataDir, 'BaseFeatures.fdef')) - activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor', - Geometry.Point3D(0.0, 0.0, 0.0)), - ChemicalFeatures.FreeChemicalFeature('Donor', Geometry.Point3D(0.0, 0.0, 0.0)), - ChemicalFeatures.FreeChemicalFeature('Aromatic', - Geometry.Point3D(0.0, 0.0, 0.0))] - pcophore = Pharmacophore.Pharmacophore(activeFeats) - pcophore.setLowerBound(0, 1, 2.251) - pcophore.setUpperBound(0, 1, 2.451) - pcophore.setUpperBound2D(0, 1, 3) + activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor', + Geometry.Point3D(0.0, 0.0, 0.0)), + ChemicalFeatures.FreeChemicalFeature( + 'Donor', Geometry.Point3D(0.0, 0.0, 0.0)), + ChemicalFeatures.FreeChemicalFeature('Aromatic', + Geometry.Point3D(0.0, 0.0, 0.0))] + pcophore = Pharmacophore.Pharmacophore(activeFeats) + pcophore.setLowerBound(0, 1, 2.251) + pcophore.setUpperBound(0, 1, 2.451) + pcophore.setUpperBound2D(0, 1, 3) - pcophore.setLowerBound(0, 2, 4.970) - pcophore.setUpperBound(0, 2, 5.170) - pcophore.setUpperBound2D(0, 2, 6) + pcophore.setLowerBound(0, 2, 4.970) + pcophore.setUpperBound(0, 2, 5.170) + pcophore.setUpperBound2D(0, 2, 6) - pcophore.setLowerBound(1, 2, 2.681) - pcophore.setUpperBound(1, 2, 2.881) - pcophore.setUpperBound2D(1, 2, 6) + pcophore.setLowerBound(1, 2, 2.681) + pcophore.setUpperBound(1, 2, 2.881) + pcophore.setUpperBound2D(1, 2, 6) - inF = gzip.open(os.path.join(self.dataDir, 'cdk2-syn-clip100.pkl.gz'), 'rb') - nDone = 0 - nMatches = 0 - nHits = 0 + inF = gzip.open(os.path.join(self.dataDir, 'cdk2-syn-clip100.pkl.gz'), 'rb') + nDone = 0 + nMatches = 0 + nHits = 0 - while 1: - try: - _, molPkl, boundsMat = cPickle.load(inF, encoding='latin1') - if PY3: - molPkl = bytes(molPkl, encoding='latin1') - except Exception: - break + while 1: + try: + _, molPkl, boundsMat = pickle.load(inF, encoding='latin1') + molPkl = bytes(molPkl, encoding='latin1') + except Exception: + break - nDone += 1 + nDone += 1 - mol = Chem.Mol(molPkl) - boundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol) - DG.DoTriangleSmoothing(boundsMat) + mol = Chem.Mol(molPkl) + boundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol) + DG.DoTriangleSmoothing(boundsMat) - canMatch, matches = EmbedLib.MatchPharmacophoreToMol(mol, featFactory, pcophore) - if canMatch: - nMatches += 1 - r = EmbedLib.MatchPharmacophore(matches, boundsMat, pcophore, useDownsampling=True, - use2DLimits=True, mol=mol) - failed = r[0] - if not failed: - nHits += 1 + canMatch, matches = EmbedLib.MatchPharmacophoreToMol(mol, featFactory, pcophore) + if canMatch: + nMatches += 1 + r = EmbedLib.MatchPharmacophore(matches, boundsMat, pcophore, useDownsampling=True, + use2DLimits=True, mol=mol) + failed = r[0] + if not failed: + nHits += 1 - self.assertEqual(nDone, 100) - self.assertEqual(nMatches, 93) - # print 'nhits:',nHits - self.assertEqual(nHits, 67) + self.assertEqual(nDone, 100) + self.assertEqual(nMatches, 93) + # print 'nhits:',nHits + self.assertEqual(nHits, 67) - def testIssue268(self): - featFactory = ChemicalFeatures.BuildFeatureFactory(os.path.join(self.dataDir, 'Issue268.fdef')) - m1 = Chem.MolFromMolFile(os.path.join(self.dataDir, 'Issue268_Mol1.mol')) - m2 = Chem.MolFromMolFile(os.path.join(self.dataDir, 'Issue268_Mol2.mol')) - with open(os.path.join(self.dataDir, 'Issue268_Pcop.pkl'), 'r') as inTF: - buf = inTF.read().replace('\r\n', '\n').encode('utf-8') - inTF.close() - with io.BytesIO(buf) as inF: - pcop = cPickle.load(inF, encoding='latin1') - # pcop._boundsMat=numpy.array(pcop._boundsMat) - # pcop._boundsMat2D=numpy.array(pcop._boundsMat2D) - # cPickle.dump(pcop,file(os.path.join(self.dataDir, - # 'Issue268_Pcop.new.pkl'),'wb+')) - _, mList1 = EmbedLib.MatchFeatsToMol(m1, featFactory, pcop.getFeatures()) - _, mList2 = EmbedLib.MatchFeatsToMol(m2, featFactory, pcop.getFeatures()) - b1 = rdDistGeom.GetMoleculeBoundsMatrix(m1) - b2 = rdDistGeom.GetMoleculeBoundsMatrix(m2) + def testIssue268(self): + featFactory = ChemicalFeatures.BuildFeatureFactory( + os.path.join(self.dataDir, 'Issue268.fdef')) + m1 = Chem.MolFromMolFile(os.path.join(self.dataDir, 'Issue268_Mol1.mol')) + m2 = Chem.MolFromMolFile(os.path.join(self.dataDir, 'Issue268_Mol2.mol')) + with open(os.path.join(self.dataDir, 'Issue268_Pcop.pkl'), 'r') as inTF: + buf = inTF.read().replace('\r\n', '\n').encode('utf-8') + inTF.close() + with io.BytesIO(buf) as inF: + pcop = pickle.load(inF, encoding='latin1') + # pcop._boundsMat=numpy.array(pcop._boundsMat) + # pcop._boundsMat2D=numpy.array(pcop._boundsMat2D) + # pickle.dump(pcop,file(os.path.join(self.dataDir, + # 'Issue268_Pcop.new.pkl'),'wb+')) + _, mList1 = EmbedLib.MatchFeatsToMol(m1, featFactory, pcop.getFeatures()) + _, mList2 = EmbedLib.MatchFeatsToMol(m2, featFactory, pcop.getFeatures()) + b1 = rdDistGeom.GetMoleculeBoundsMatrix(m1) + b2 = rdDistGeom.GetMoleculeBoundsMatrix(m2) - self.assertEqual(len(EmbedLib.MatchPharmacophore(mList1, b1, pcop)[2]), 4) - self.assertEqual(len(EmbedLib.MatchPharmacophore(mList2, b2, pcop)[2]), 4) + self.assertEqual(len(EmbedLib.MatchPharmacophore(mList1, b1, pcop)[2]), 4) + self.assertEqual(len(EmbedLib.MatchPharmacophore(mList2, b2, pcop)[2]), 4) - self.assertEqual( - len(EmbedLib.MatchPharmacophore(mList1, b1, pcop, mol=m1, use2DLimits=True)[2]), 4) - self.assertEqual( - len(EmbedLib.MatchPharmacophore(mList2, b2, pcop, mol=m2, use2DLimits=True)[2]), 4) + self.assertEqual( + len(EmbedLib.MatchPharmacophore(mList1, b1, pcop, mol=m1, use2DLimits=True)[2]), 4) + self.assertEqual( + len(EmbedLib.MatchPharmacophore(mList2, b2, pcop, mol=m2, use2DLimits=True)[2]), 4) - self.assertTrue(DG.DoTriangleSmoothing(b1)) - self.assertTrue(DG.DoTriangleSmoothing(b2)) + self.assertTrue(DG.DoTriangleSmoothing(b1)) + self.assertTrue(DG.DoTriangleSmoothing(b2)) - self.assertEqual(len(EmbedLib.MatchPharmacophore(mList1, b1, pcop)[2]), 4) - self.assertEqual(len(EmbedLib.MatchPharmacophore(mList2, b2, pcop)[2]), 4) + self.assertEqual(len(EmbedLib.MatchPharmacophore(mList1, b1, pcop)[2]), 4) + self.assertEqual(len(EmbedLib.MatchPharmacophore(mList2, b2, pcop)[2]), 4) - self.assertEqual( - len(EmbedLib.MatchPharmacophore(mList1, b1, pcop, mol=m1, use2DLimits=True)[2]), 4) - self.assertEqual( - len(EmbedLib.MatchPharmacophore(mList2, b2, pcop, mol=m2, use2DLimits=True)[2]), 4) + self.assertEqual( + len(EmbedLib.MatchPharmacophore(mList1, b1, pcop, mol=m1, use2DLimits=True)[2]), 4) + self.assertEqual( + len(EmbedLib.MatchPharmacophore(mList2, b2, pcop, mol=m2, use2DLimits=True)[2]), 4) if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/Chem/Pharm3D/UnitTestExcludedVolume.py b/rdkit/Chem/Pharm3D/UnitTestExcludedVolume.py index a8394c9e4..99f8c319d 100644 --- a/rdkit/Chem/Pharm3D/UnitTestExcludedVolume.py +++ b/rdkit/Chem/Pharm3D/UnitTestExcludedVolume.py @@ -7,7 +7,7 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -from __future__ import print_function + import unittest diff --git a/rdkit/Chem/PropertyMol.py b/rdkit/Chem/PropertyMol.py index 143430825..6fbcf8cfb 100644 --- a/rdkit/Chem/PropertyMol.py +++ b/rdkit/Chem/PropertyMol.py @@ -9,14 +9,14 @@ class PropertyMol(Chem.Mol): """ allows rdkit molecules to be pickled with their properties saved. >>> import os - >>> from rdkit.six.moves import cPickle + >>> import pickle >>> from rdkit import RDConfig >>> m = Chem.MolFromMolFile(os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data/benzene.mol')) >>> m.GetProp('_Name') 'benzene.mol' by default pickling removes properties: - >>> m2 = cPickle.loads(cPickle.dumps(m)) + >>> m2 = pickle.loads(pickle.dumps(m)) >>> m2.HasProp('_Name') 0 @@ -28,7 +28,7 @@ class PropertyMol(Chem.Mol): >>> pm.HasProp('MyProp') 1 - >>> pm2 = cPickle.loads(cPickle.dumps(pm)) + >>> pm2 = pickle.loads(pickle.dumps(pm)) >>> Chem.MolToSmiles(pm2) 'c1ccccc1' >>> pm2.GetProp('_Name') @@ -68,7 +68,7 @@ class PropertyMol(Chem.Mol): to an SD file include properties: >>> fn = tempfile.mktemp('.sdf') >>> w = Chem.SDWriter(fn) - >>> pm = cPickle.loads(cPickle.dumps(pm)) + >>> pm = pickle.loads(pickle.dumps(pm)) >>> w.write(pm) >>> w=None >>> txt = open(fn,'r').read() diff --git a/rdkit/Chem/Randomize.py b/rdkit/Chem/Randomize.py index 8fa47e758..ef7459821 100644 --- a/rdkit/Chem/Randomize.py +++ b/rdkit/Chem/Randomize.py @@ -9,64 +9,63 @@ # of the RDKit source tree. # import random -from rdkit.six.moves import range from rdkit import Chem def RandomizeMolBlock(molB): - splitB = molB.split('\n') - res = [] - res.extend(splitB[0:3]) - idx = 3 - inL = splitB[idx] - res.append(inL) - nAts = int(inL[0:3]) - nBonds = int(inL[3:6]) - - idx += 1 - atLines = splitB[idx:idx + nAts] - - order = list(range(nAts)) - random.shuffle(order, random=random.random) - - for i in order: - res.append(atLines[i]) - - #print 'ORDER:',order - idx += nAts - for i in range(nBonds): + splitB = molB.split('\n') + res = [] + res.extend(splitB[0:3]) + idx = 3 inL = splitB[idx] - idx1 = int(inL[0:3]) - 1 - idx2 = int(inL[3:6]) - 1 - idx1 = order.index(idx1) - idx2 = order.index(idx2) - inL = '% 3d% 3d' % (idx1 + 1, idx2 + 1) + inL[6:] res.append(inL) + nAts = int(inL[0:3]) + nBonds = int(inL[3:6]) + idx += 1 - res.append('M END') - return '\n'.join(res) + atLines = splitB[idx:idx + nAts] + + order = list(range(nAts)) + random.shuffle(order, random=random.random) + + for i in order: + res.append(atLines[i]) + + # print 'ORDER:',order + idx += nAts + for i in range(nBonds): + inL = splitB[idx] + idx1 = int(inL[0:3]) - 1 + idx2 = int(inL[3:6]) - 1 + idx1 = order.index(idx1) + idx2 = order.index(idx2) + inL = '% 3d% 3d' % (idx1 + 1, idx2 + 1) + inL[6:] + res.append(inL) + idx += 1 + res.append('M END') + return '\n'.join(res) def RandomizeMol(mol): - mb = Chem.MolToMolBlock(mol) - #print '-----------------' - #print mb - mb = RandomizeMolBlock(mb) - #print mb - return Chem.MolFromMolBlock(mb) + mb = Chem.MolToMolBlock(mol) + # print '-----------------' + # print mb + mb = RandomizeMolBlock(mb) + # print mb + return Chem.MolFromMolBlock(mb) def CheckCanonicalization(mol, nReps=10): - refSmi = Chem.MolToSmiles(mol, False) - for i in range(nReps): - m2 = RandomizeMol(mol) - smi = Chem.MolToSmiles(m2, False) - if smi != refSmi: - raise ValueError('\nRef: %s\n : %s' % (refSmi, smi)) + refSmi = Chem.MolToSmiles(mol, False) + for i in range(nReps): + m2 = RandomizeMol(mol) + smi = Chem.MolToSmiles(m2, False) + if smi != refSmi: + raise ValueError('\nRef: %s\n : %s' % (refSmi, smi)) if __name__ == '__main__': - from rdkit.Chem import Randomize - CheckCanonicalization(Chem.MolFromSmiles('CON')) - CheckCanonicalization(Chem.MolFromSmiles('c1ccccn1')) - CheckCanonicalization(Chem.MolFromSmiles('C/C=C/F')) + from rdkit.Chem import Randomize + CheckCanonicalization(Chem.MolFromSmiles('CON')) + CheckCanonicalization(Chem.MolFromSmiles('c1ccccn1')) + CheckCanonicalization(Chem.MolFromSmiles('C/C=C/F')) diff --git a/rdkit/Chem/Recap.py b/rdkit/Chem/Recap.py index a435932f2..c124b9565 100755 --- a/rdkit/Chem/Recap.py +++ b/rdkit/Chem/Recap.py @@ -61,7 +61,6 @@ import sys import weakref from rdkit import Chem from rdkit.Chem import rdChemReactions as Reactions -from rdkit.six import iterkeys, iteritems, next # These are the definitions that will be applied to fragment molecules: reactionDefs = ( @@ -69,7 +68,7 @@ reactionDefs = ( "[C;!$(C([#7])[#7]):1](=!@[O:2])!@[#7;+0;!D1:3]>>*[C:1]=[O:2].*[#7:3]", # amide "[C:1](=!@[O:2])!@[O;+0:3]>>*[C:1]=[O:2].[O:3]*", # ester "[N;!D1;+0;!$(N-C=[#7,#8,#15,#16])](-!@[*:1])-!@[*:2]>>*[*:1].[*:2]*", # amines - #"[N;!D1](!@[*:1])!@[*:2]>>*[*:1].[*:2]*", # amines + # "[N;!D1](!@[*:1])!@[*:2]>>*[*:1].[*:2]*", # amines # again: what about aromatics? "[#7;R;D3;+0:1]-!@[*:2]>>*[#7:1].[*:2]*", # cyclic amines @@ -78,7 +77,8 @@ reactionDefs = ( "[n;+0:1]-!@[C:2]>>[n:1]*.[C:2]*", # aromatic nitrogen - aliphatic carbon "[O:3]=[C:4]-@[N;+0:1]-!@[C:2]>>[O:3]=[C:4]-[N:1]*.[C:2]*", # lactam nitrogen - aliphatic carbon "[c:1]-!@[c:2]>>[c:1]*.*[c:2]", # aromatic carbon - aromatic carbon - "[n;+0:1]-!@[c:2]>>[n:1]*.*[c:2]", # aromatic nitrogen - aromatic carbon *NOTE* this is not part of the standard recap set. + # aromatic nitrogen - aromatic carbon *NOTE* this is not part of the standard recap set. + "[n;+0:1]-!@[c:2]>>[n:1]*.*[c:2]", "[#7;+0;D2,D3:1]-!@[S:2](=[O:3])=[O:4]>>[#7:1]*.*[S:2](=[O:3])=[O:4]", # sulphonamide ) @@ -86,521 +86,523 @@ reactions = tuple([Reactions.ReactionFromSmarts(x) for x in reactionDefs]) class RecapHierarchyNode(object): - """ This class is used to hold the Recap hiearchy - """ - mol = None - children = None - parents = None - smiles = None - - def __init__(self, mol): - self.mol = mol - self.children = {} - self.parents = {} - - def GetAllChildren(self): - " returns a dictionary, keyed by SMILES, of children " - res = {} - for smi, child in iteritems(self.children): - res[smi] = child - child._gacRecurse(res, terminalOnly=False) - return res - - def GetLeaves(self): - " returns a dictionary, keyed by SMILES, of leaf (terminal) nodes " - res = {} - for smi, child in iteritems(self.children): - if not len(child.children): - res[smi] = child - else: - child._gacRecurse(res, terminalOnly=True) - return res - - def getUltimateParents(self): - """ returns all the nodes in the hierarchy tree that contain this - node as a child + """ This class is used to hold the Recap hiearchy """ - if not self.parents: - res = [self] - else: - res = [] - for p in self.parents.values(): - for uP in p.getUltimateParents(): - if uP not in res: - res.append(uP) - return res + mol = None + children = None + parents = None + smiles = None - def _gacRecurse(self, res, terminalOnly=False): - for smi, child in iteritems(self.children): - if not terminalOnly or not len(child.children): - res[smi] = child - child._gacRecurse(res, terminalOnly=terminalOnly) + def __init__(self, mol): + self.mol = mol + self.children = {} + self.parents = {} - def __del__(self): - self.children = {} - self.parents = {} - self.mol = None + def GetAllChildren(self): + " returns a dictionary, keyed by SMILES, of children " + res = {} + for smi, child in self.children.items(): + res[smi] = child + child._gacRecurse(res, terminalOnly=False) + return res + + def GetLeaves(self): + " returns a dictionary, keyed by SMILES, of leaf (terminal) nodes " + res = {} + for smi, child in self.children.items(): + if not len(child.children): + res[smi] = child + else: + child._gacRecurse(res, terminalOnly=True) + return res + + def getUltimateParents(self): + """ returns all the nodes in the hierarchy tree that contain this + node as a child + """ + if not self.parents: + res = [self] + else: + res = [] + for p in self.parents.values(): + for uP in p.getUltimateParents(): + if uP not in res: + res.append(uP) + return res + + def _gacRecurse(self, res, terminalOnly=False): + for smi, child in self.children.items(): + if not terminalOnly or not len(child.children): + res[smi] = child + child._gacRecurse(res, terminalOnly=terminalOnly) + + def __del__(self): + self.children = {} + self.parents = {} + self.mol = None def RecapDecompose(mol, allNodes=None, minFragmentSize=0, onlyUseReactions=None): - """ returns the recap decomposition for a molecule """ - mSmi = Chem.MolToSmiles(mol, 1) + """ returns the recap decomposition for a molecule """ + mSmi = Chem.MolToSmiles(mol, 1) - if allNodes is None: - allNodes = {} - if mSmi in allNodes: - return allNodes[mSmi] + if allNodes is None: + allNodes = {} + if mSmi in allNodes: + return allNodes[mSmi] - res = RecapHierarchyNode(mol) - res.smiles = mSmi - activePool = {mSmi: res} - allNodes[mSmi] = res - while activePool: - nSmi = next(iterkeys(activePool)) - node = activePool.pop(nSmi) - if not node.mol: - continue - for rxnIdx, reaction in enumerate(reactions): - if onlyUseReactions and rxnIdx not in onlyUseReactions: - continue - #print ' .',nSmi - #print ' !!!!',rxnIdx,nSmi,reactionDefs[rxnIdx] - ps = reaction.RunReactants((node.mol, )) - #print ' ',len(ps) - if ps: - for prodSeq in ps: - seqOk = True - # we want to disqualify small fragments, so sort the product sequence by size - # and then look for "forbidden" fragments - tSeq = [(prod.GetNumAtoms(onlyExplicit=True), idx) for idx, prod in enumerate(prodSeq)] - tSeq.sort() - ts = [(x, prodSeq[y]) for x, y in tSeq] - prodSeq = ts - for nats, prod in prodSeq: - try: - Chem.SanitizeMol(prod) - except Exception: - continue - pSmi = Chem.MolToSmiles(prod, 1) - if minFragmentSize > 0: - nDummies = pSmi.count('*') - if nats - nDummies < minFragmentSize: - seqOk = False - break - # don't forget after replacing dummy atoms to remove any empty - # branches: - elif pSmi.replace('*', '').replace('()', '') in ('', 'C', 'CC', 'CCC'): - seqOk = False - break - prod.pSmi = pSmi - if seqOk: - for nats, prod in prodSeq: - pSmi = prod.pSmi - #print '\t',nats,pSmi - if not pSmi in allNodes: - pNode = RecapHierarchyNode(prod) - pNode.smiles = pSmi - pNode.parents[nSmi] = weakref.proxy(node) - node.children[pSmi] = pNode - activePool[pSmi] = pNode - allNodes[pSmi] = pNode - else: - pNode = allNodes[pSmi] - pNode.parents[nSmi] = weakref.proxy(node) - node.children[pSmi] = pNode - #print ' >>an:',allNodes.keys() - return res + res = RecapHierarchyNode(mol) + res.smiles = mSmi + activePool = {mSmi: res} + allNodes[mSmi] = res + while activePool: + nSmi = next(iter(activePool)) + node = activePool.pop(nSmi) + if not node.mol: + continue + for rxnIdx, reaction in enumerate(reactions): + if onlyUseReactions and rxnIdx not in onlyUseReactions: + continue + # print ' .',nSmi + # print ' !!!!',rxnIdx,nSmi,reactionDefs[rxnIdx] + ps = reaction.RunReactants((node.mol, )) + # print ' ',len(ps) + if ps: + for prodSeq in ps: + seqOk = True + # we want to disqualify small fragments, so sort the product sequence by size + # and then look for "forbidden" fragments + tSeq = [(prod.GetNumAtoms(onlyExplicit=True), idx) + for idx, prod in enumerate(prodSeq)] + tSeq.sort() + ts = [(x, prodSeq[y]) for x, y in tSeq] + prodSeq = ts + for nats, prod in prodSeq: + try: + Chem.SanitizeMol(prod) + except Exception: + continue + pSmi = Chem.MolToSmiles(prod, 1) + if minFragmentSize > 0: + nDummies = pSmi.count('*') + if nats - nDummies < minFragmentSize: + seqOk = False + break + # don't forget after replacing dummy atoms to remove any empty + # branches: + elif pSmi.replace('*', '').replace('()', '') in ('', 'C', 'CC', 'CCC'): + seqOk = False + break + prod.pSmi = pSmi + if seqOk: + for nats, prod in prodSeq: + pSmi = prod.pSmi + # print '\t',nats,pSmi + if not pSmi in allNodes: + pNode = RecapHierarchyNode(prod) + pNode.smiles = pSmi + pNode.parents[nSmi] = weakref.proxy(node) + node.children[pSmi] = pNode + activePool[pSmi] = pNode + allNodes[pSmi] = pNode + else: + pNode = allNodes[pSmi] + pNode.parents[nSmi] = weakref.proxy(node) + node.children[pSmi] = pNode + # print ' >>an:',allNodes.keys() + return res # ------- ------- ------- ------- ------- ------- ------- ------- # Begin testing code + if __name__ == '__main__': - import unittest + import unittest - class TestCase(unittest.TestCase): + class TestCase(unittest.TestCase): - def test1(self): - m = Chem.MolFromSmiles('C1CC1Oc1ccccc1-c1ncc(OC)cc1') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.children.keys()) == 4) - self.assertTrue(len(res.GetAllChildren().keys()) == 5) - self.assertTrue(len(res.GetLeaves().keys()) == 3) + def test1(self): + m = Chem.MolFromSmiles('C1CC1Oc1ccccc1-c1ncc(OC)cc1') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.children.keys()) == 4) + self.assertTrue(len(res.GetAllChildren().keys()) == 5) + self.assertTrue(len(res.GetLeaves().keys()) == 3) - def test2(self): - m = Chem.MolFromSmiles('CCCOCCC') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(res.children == {}) + def test2(self): + m = Chem.MolFromSmiles('CCCOCCC') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(res.children == {}) - def test3(self): - allNodes = {} - m = Chem.MolFromSmiles('c1ccccc1-c1ncccc1') - res = RecapDecompose(m, allNodes=allNodes) - self.assertTrue(res) - self.assertTrue(len(res.children.keys()) == 2) - self.assertTrue(len(allNodes.keys()) == 3) + def test3(self): + allNodes = {} + m = Chem.MolFromSmiles('c1ccccc1-c1ncccc1') + res = RecapDecompose(m, allNodes=allNodes) + self.assertTrue(res) + self.assertTrue(len(res.children.keys()) == 2) + self.assertTrue(len(allNodes.keys()) == 3) - m = Chem.MolFromSmiles('COc1ccccc1-c1ncccc1') - res = RecapDecompose(m, allNodes=allNodes) - self.assertTrue(res) - self.assertTrue(len(res.children.keys()) == 2) - # we get two more nodes from that: - self.assertTrue(len(allNodes.keys()) == 5) - self.assertTrue('*c1ccccc1OC' in allNodes) - self.assertTrue('*c1ccccc1' in allNodes) + m = Chem.MolFromSmiles('COc1ccccc1-c1ncccc1') + res = RecapDecompose(m, allNodes=allNodes) + self.assertTrue(res) + self.assertTrue(len(res.children.keys()) == 2) + # we get two more nodes from that: + self.assertTrue(len(allNodes.keys()) == 5) + self.assertTrue('*c1ccccc1OC' in allNodes) + self.assertTrue('*c1ccccc1' in allNodes) - m = Chem.MolFromSmiles('C1CC1Oc1ccccc1-c1ncccc1') - res = RecapDecompose(m, allNodes=allNodes) - self.assertTrue(res) - self.assertTrue(len(res.children.keys()) == 4) - self.assertTrue(len(allNodes.keys()) == 10) + m = Chem.MolFromSmiles('C1CC1Oc1ccccc1-c1ncccc1') + res = RecapDecompose(m, allNodes=allNodes) + self.assertTrue(res) + self.assertTrue(len(res.children.keys()) == 4) + self.assertTrue(len(allNodes.keys()) == 10) - def testSFNetIssue1801871(self): - m = Chem.MolFromSmiles('c1ccccc1OC(Oc1ccccc1)Oc1ccccc1') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - ks = res.GetLeaves().keys() - self.assertFalse('*C(*)*' in ks) - self.assertTrue('*c1ccccc1' in ks) - self.assertTrue('*C(*)Oc1ccccc1' in ks) + def testSFNetIssue1801871(self): + m = Chem.MolFromSmiles('c1ccccc1OC(Oc1ccccc1)Oc1ccccc1') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + ks = res.GetLeaves().keys() + self.assertFalse('*C(*)*' in ks) + self.assertTrue('*c1ccccc1' in ks) + self.assertTrue('*C(*)Oc1ccccc1' in ks) - def testSFNetIssue1804418(self): - m = Chem.MolFromSmiles('C1CCCCN1CCCC') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - ks = res.GetLeaves().keys() - self.assertTrue('*N1CCCCC1' in ks) - self.assertTrue('*CCCC' in ks) + def testSFNetIssue1804418(self): + m = Chem.MolFromSmiles('C1CCCCN1CCCC') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + ks = res.GetLeaves().keys() + self.assertTrue('*N1CCCCC1' in ks) + self.assertTrue('*CCCC' in ks) - def testMinFragmentSize(self): - m = Chem.MolFromSmiles('CCCOCCC') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(res.children == {}) - res = RecapDecompose(m, minFragmentSize=3) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 1) - ks = res.GetLeaves().keys() - self.assertTrue('*CCC' in ks) + def testMinFragmentSize(self): + m = Chem.MolFromSmiles('CCCOCCC') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(res.children == {}) + res = RecapDecompose(m, minFragmentSize=3) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 1) + ks = res.GetLeaves().keys() + self.assertTrue('*CCC' in ks) - m = Chem.MolFromSmiles('CCCOCC') - res = RecapDecompose(m, minFragmentSize=3) - self.assertTrue(res) - self.assertTrue(res.children == {}) + m = Chem.MolFromSmiles('CCCOCC') + res = RecapDecompose(m, minFragmentSize=3) + self.assertTrue(res) + self.assertTrue(res.children == {}) - m = Chem.MolFromSmiles('CCCOCCOC') - res = RecapDecompose(m, minFragmentSize=2) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - ks = res.GetLeaves().keys() - self.assertTrue('*CCC' in ks) - ks = res.GetLeaves().keys() - self.assertTrue('*CCOC' in ks) + m = Chem.MolFromSmiles('CCCOCCOC') + res = RecapDecompose(m, minFragmentSize=2) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + ks = res.GetLeaves().keys() + self.assertTrue('*CCC' in ks) + ks = res.GetLeaves().keys() + self.assertTrue('*CCOC' in ks) - def testAmideRxn(self): - m = Chem.MolFromSmiles('C1CC1C(=O)NC1OC1') - res = RecapDecompose(m, onlyUseReactions=[1]) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - ks = res.GetLeaves().keys() - self.assertTrue('*C(=O)C1CC1' in ks) - self.assertTrue('*NC1CO1' in ks) + def testAmideRxn(self): + m = Chem.MolFromSmiles('C1CC1C(=O)NC1OC1') + res = RecapDecompose(m, onlyUseReactions=[1]) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + ks = res.GetLeaves().keys() + self.assertTrue('*C(=O)C1CC1' in ks) + self.assertTrue('*NC1CO1' in ks) - m = Chem.MolFromSmiles('C1CC1C(=O)N(C)C1OC1') - res = RecapDecompose(m, onlyUseReactions=[1]) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - ks = res.GetLeaves().keys() - self.assertTrue('*C(=O)C1CC1' in ks) - self.assertTrue('*N(C)C1CO1' in ks) + m = Chem.MolFromSmiles('C1CC1C(=O)N(C)C1OC1') + res = RecapDecompose(m, onlyUseReactions=[1]) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + ks = res.GetLeaves().keys() + self.assertTrue('*C(=O)C1CC1' in ks) + self.assertTrue('*N(C)C1CO1' in ks) - m = Chem.MolFromSmiles('C1CC1C(=O)n1cccc1') - res = RecapDecompose(m, onlyUseReactions=[1]) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - ks = res.GetLeaves().keys() - self.assertTrue('*C(=O)C1CC1' in ks) - self.assertTrue('*n1cccc1' in ks) + m = Chem.MolFromSmiles('C1CC1C(=O)n1cccc1') + res = RecapDecompose(m, onlyUseReactions=[1]) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + ks = res.GetLeaves().keys() + self.assertTrue('*C(=O)C1CC1' in ks) + self.assertTrue('*n1cccc1' in ks) - m = Chem.MolFromSmiles('C1CC1C(=O)CC1OC1') - res = RecapDecompose(m, onlyUseReactions=[1]) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 0) + m = Chem.MolFromSmiles('C1CC1C(=O)CC1OC1') + res = RecapDecompose(m, onlyUseReactions=[1]) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 0) - m = Chem.MolFromSmiles('C1CCC(=O)NC1') - res = RecapDecompose(m, onlyUseReactions=[1]) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 0) + m = Chem.MolFromSmiles('C1CCC(=O)NC1') + res = RecapDecompose(m, onlyUseReactions=[1]) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 0) - m = Chem.MolFromSmiles('CC(=O)NC') - res = RecapDecompose(m, onlyUseReactions=[1]) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - ks = res.GetLeaves().keys() + m = Chem.MolFromSmiles('CC(=O)NC') + res = RecapDecompose(m, onlyUseReactions=[1]) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + ks = res.GetLeaves().keys() - m = Chem.MolFromSmiles('CC(=O)N') - res = RecapDecompose(m, onlyUseReactions=[1]) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 0) + m = Chem.MolFromSmiles('CC(=O)N') + res = RecapDecompose(m, onlyUseReactions=[1]) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 0) - m = Chem.MolFromSmiles('C(=O)NCCNC(=O)CC') - res = RecapDecompose(m, onlyUseReactions=[1]) - self.assertTrue(res) - self.assertTrue(len(res.children) == 4) - self.assertTrue(len(res.GetLeaves()) == 3) + m = Chem.MolFromSmiles('C(=O)NCCNC(=O)CC') + res = RecapDecompose(m, onlyUseReactions=[1]) + self.assertTrue(res) + self.assertTrue(len(res.children) == 4) + self.assertTrue(len(res.GetLeaves()) == 3) - def testEsterRxn(self): - m = Chem.MolFromSmiles('C1CC1C(=O)OC1OC1') - res = RecapDecompose(m, onlyUseReactions=[2]) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - ks = res.GetLeaves().keys() - self.assertTrue('*C(=O)C1CC1' in ks) - self.assertTrue('*OC1CO1' in ks) + def testEsterRxn(self): + m = Chem.MolFromSmiles('C1CC1C(=O)OC1OC1') + res = RecapDecompose(m, onlyUseReactions=[2]) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + ks = res.GetLeaves().keys() + self.assertTrue('*C(=O)C1CC1' in ks) + self.assertTrue('*OC1CO1' in ks) - m = Chem.MolFromSmiles('C1CC1C(=O)CC1OC1') - res = RecapDecompose(m, onlyUseReactions=[2]) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 0) + m = Chem.MolFromSmiles('C1CC1C(=O)CC1OC1') + res = RecapDecompose(m, onlyUseReactions=[2]) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 0) - m = Chem.MolFromSmiles('C1CCC(=O)OC1') - res = RecapDecompose(m, onlyUseReactions=[2]) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 0) + m = Chem.MolFromSmiles('C1CCC(=O)OC1') + res = RecapDecompose(m, onlyUseReactions=[2]) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 0) - def testUreaRxn(self): - m = Chem.MolFromSmiles('C1CC1NC(=O)NC1OC1') - res = RecapDecompose(m, onlyUseReactions=[0]) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - ks = res.GetLeaves().keys() - self.assertTrue('*NC1CC1' in ks) - self.assertTrue('*NC1CO1' in ks) + def testUreaRxn(self): + m = Chem.MolFromSmiles('C1CC1NC(=O)NC1OC1') + res = RecapDecompose(m, onlyUseReactions=[0]) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + ks = res.GetLeaves().keys() + self.assertTrue('*NC1CC1' in ks) + self.assertTrue('*NC1CO1' in ks) - m = Chem.MolFromSmiles('C1CC1NC(=O)N(C)C1OC1') - res = RecapDecompose(m, onlyUseReactions=[0]) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - ks = res.GetLeaves().keys() - self.assertTrue('*NC1CC1' in ks) - self.assertTrue('*N(C)C1CO1' in ks) + m = Chem.MolFromSmiles('C1CC1NC(=O)N(C)C1OC1') + res = RecapDecompose(m, onlyUseReactions=[0]) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + ks = res.GetLeaves().keys() + self.assertTrue('*NC1CC1' in ks) + self.assertTrue('*N(C)C1CO1' in ks) - m = Chem.MolFromSmiles('C1CCNC(=O)NC1C') - res = RecapDecompose(m, onlyUseReactions=[0]) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 0) + m = Chem.MolFromSmiles('C1CCNC(=O)NC1C') + res = RecapDecompose(m, onlyUseReactions=[0]) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 0) - m = Chem.MolFromSmiles('c1cccn1C(=O)NC1OC1') - res = RecapDecompose(m, onlyUseReactions=[0]) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - ks = res.GetLeaves().keys() - self.assertTrue('*n1cccc1' in ks) - self.assertTrue('*NC1CO1' in ks) + m = Chem.MolFromSmiles('c1cccn1C(=O)NC1OC1') + res = RecapDecompose(m, onlyUseReactions=[0]) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + ks = res.GetLeaves().keys() + self.assertTrue('*n1cccc1' in ks) + self.assertTrue('*NC1CO1' in ks) - m = Chem.MolFromSmiles('c1cccn1C(=O)n1c(C)ccc1') - res = RecapDecompose(m, onlyUseReactions=[0]) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - ks = res.GetLeaves().keys() - self.assertTrue('*n1cccc1C' in ks) + m = Chem.MolFromSmiles('c1cccn1C(=O)n1c(C)ccc1') + res = RecapDecompose(m, onlyUseReactions=[0]) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + ks = res.GetLeaves().keys() + self.assertTrue('*n1cccc1C' in ks) - def testAmineRxn(self): - m = Chem.MolFromSmiles('C1CC1N(C1NC1)C1OC1') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 3) - ks = res.GetLeaves().keys() - self.assertTrue('*C1CC1' in ks) - self.assertTrue('*C1CO1' in ks) - self.assertTrue('*C1CN1' in ks) + def testAmineRxn(self): + m = Chem.MolFromSmiles('C1CC1N(C1NC1)C1OC1') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 3) + ks = res.GetLeaves().keys() + self.assertTrue('*C1CC1' in ks) + self.assertTrue('*C1CO1' in ks) + self.assertTrue('*C1CN1' in ks) - m = Chem.MolFromSmiles('c1ccccc1N(C1NC1)C1OC1') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 3) - ks = res.GetLeaves().keys() - self.assertTrue('*c1ccccc1' in ks) - self.assertTrue('*C1CO1' in ks) - self.assertTrue('*C1CN1' in ks) + m = Chem.MolFromSmiles('c1ccccc1N(C1NC1)C1OC1') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 3) + ks = res.GetLeaves().keys() + self.assertTrue('*c1ccccc1' in ks) + self.assertTrue('*C1CO1' in ks) + self.assertTrue('*C1CN1' in ks) - m = Chem.MolFromSmiles('c1ccccc1N(c1ncccc1)C1OC1') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 3) - ks = res.GetLeaves().keys() - self.assertTrue('*c1ccccc1' in ks) - self.assertTrue('*c1ccccn1' in ks) - self.assertTrue('*C1CO1' in ks) + m = Chem.MolFromSmiles('c1ccccc1N(c1ncccc1)C1OC1') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 3) + ks = res.GetLeaves().keys() + self.assertTrue('*c1ccccc1' in ks) + self.assertTrue('*c1ccccn1' in ks) + self.assertTrue('*C1CO1' in ks) - m = Chem.MolFromSmiles('c1ccccc1N(c1ncccc1)c1ccco1') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 3) - ks = res.GetLeaves().keys() - self.assertTrue('*c1ccccc1' in ks) - self.assertTrue('*c1ccccn1' in ks) - self.assertTrue('*c1ccco1' in ks) + m = Chem.MolFromSmiles('c1ccccc1N(c1ncccc1)c1ccco1') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 3) + ks = res.GetLeaves().keys() + self.assertTrue('*c1ccccc1' in ks) + self.assertTrue('*c1ccccn1' in ks) + self.assertTrue('*c1ccco1' in ks) - m = Chem.MolFromSmiles('C1CCCCN1C1CC1') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - ks = res.GetLeaves().keys() - self.assertTrue('*N1CCCCC1' in ks) - self.assertTrue('*C1CC1' in ks) + m = Chem.MolFromSmiles('C1CCCCN1C1CC1') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + ks = res.GetLeaves().keys() + self.assertTrue('*N1CCCCC1' in ks) + self.assertTrue('*C1CC1' in ks) - m = Chem.MolFromSmiles('C1CCC2N1CC2') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 0) + m = Chem.MolFromSmiles('C1CCC2N1CC2') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 0) - def testEtherRxn(self): - m = Chem.MolFromSmiles('C1CC1OC1OC1') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - ks = res.GetLeaves().keys() - self.assertTrue('*C1CC1' in ks) - self.assertTrue('*C1CO1' in ks) + def testEtherRxn(self): + m = Chem.MolFromSmiles('C1CC1OC1OC1') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + ks = res.GetLeaves().keys() + self.assertTrue('*C1CC1' in ks) + self.assertTrue('*C1CO1' in ks) - m = Chem.MolFromSmiles('C1CCCCO1') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 0) + m = Chem.MolFromSmiles('C1CCCCO1') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 0) - m = Chem.MolFromSmiles('c1ccccc1OC1OC1') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - ks = res.GetLeaves().keys() - self.assertTrue('*c1ccccc1' in ks) - self.assertTrue('*C1CO1' in ks) + m = Chem.MolFromSmiles('c1ccccc1OC1OC1') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + ks = res.GetLeaves().keys() + self.assertTrue('*c1ccccc1' in ks) + self.assertTrue('*C1CO1' in ks) - m = Chem.MolFromSmiles('c1ccccc1Oc1ncccc1') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - ks = res.GetLeaves().keys() - self.assertTrue('*c1ccccc1' in ks) - self.assertTrue('*c1ccccn1' in ks) + m = Chem.MolFromSmiles('c1ccccc1Oc1ncccc1') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + ks = res.GetLeaves().keys() + self.assertTrue('*c1ccccc1' in ks) + self.assertTrue('*c1ccccn1' in ks) - def testOlefinRxn(self): - m = Chem.MolFromSmiles('ClC=CBr') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - ks = res.GetLeaves().keys() - self.assertTrue('*CCl' in ks) - self.assertTrue('*CBr' in ks) + def testOlefinRxn(self): + m = Chem.MolFromSmiles('ClC=CBr') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + ks = res.GetLeaves().keys() + self.assertTrue('*CCl' in ks) + self.assertTrue('*CBr' in ks) - m = Chem.MolFromSmiles('C1CC=CC1') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 0) + m = Chem.MolFromSmiles('C1CC=CC1') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 0) - def testAromNAliphCRxn(self): - m = Chem.MolFromSmiles('c1cccn1CCCC') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - ks = res.GetLeaves().keys() - self.assertTrue('*n1cccc1' in ks) - self.assertTrue('*CCCC' in ks) + def testAromNAliphCRxn(self): + m = Chem.MolFromSmiles('c1cccn1CCCC') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + ks = res.GetLeaves().keys() + self.assertTrue('*n1cccc1' in ks) + self.assertTrue('*CCCC' in ks) - m = Chem.MolFromSmiles('c1ccc2n1CCCC2') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 0) + m = Chem.MolFromSmiles('c1ccc2n1CCCC2') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 0) - def testLactamNAliphCRxn(self): - m = Chem.MolFromSmiles('C1CC(=O)N1CCCC') - res = RecapDecompose(m, onlyUseReactions=[8]) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - ks = res.GetLeaves().keys() - self.assertTrue('*N1CCC1=O' in ks) - self.assertTrue('*CCCC' in ks) + def testLactamNAliphCRxn(self): + m = Chem.MolFromSmiles('C1CC(=O)N1CCCC') + res = RecapDecompose(m, onlyUseReactions=[8]) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + ks = res.GetLeaves().keys() + self.assertTrue('*N1CCC1=O' in ks) + self.assertTrue('*CCCC' in ks) - m = Chem.MolFromSmiles('O=C1CC2N1CCCC2') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 0) + m = Chem.MolFromSmiles('O=C1CC2N1CCCC2') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 0) - def testAromCAromCRxn(self): - m = Chem.MolFromSmiles('c1ccccc1c1ncccc1') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - ks = res.GetLeaves().keys() - self.assertTrue('*c1ccccc1' in ks) - self.assertTrue('*c1ccccn1' in ks) + def testAromCAromCRxn(self): + m = Chem.MolFromSmiles('c1ccccc1c1ncccc1') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + ks = res.GetLeaves().keys() + self.assertTrue('*c1ccccc1' in ks) + self.assertTrue('*c1ccccn1' in ks) - m = Chem.MolFromSmiles('c1ccccc1C1CC1') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 0) + m = Chem.MolFromSmiles('c1ccccc1C1CC1') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 0) - def testAromNAromCRxn(self): - m = Chem.MolFromSmiles('c1cccn1c1ccccc1') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - ks = res.GetLeaves().keys() - self.assertTrue('*n1cccc1' in ks) - self.assertTrue('*c1ccccc1' in ks) + def testAromNAromCRxn(self): + m = Chem.MolFromSmiles('c1cccn1c1ccccc1') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + ks = res.GetLeaves().keys() + self.assertTrue('*n1cccc1' in ks) + self.assertTrue('*c1ccccc1' in ks) - def testSulfonamideRxn(self): - m = Chem.MolFromSmiles('CCCNS(=O)(=O)CC') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - ks = res.GetLeaves().keys() - self.assertTrue('*NCCC' in ks) - self.assertTrue('*S(=O)(=O)CC' in ks) + def testSulfonamideRxn(self): + m = Chem.MolFromSmiles('CCCNS(=O)(=O)CC') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + ks = res.GetLeaves().keys() + self.assertTrue('*NCCC' in ks) + self.assertTrue('*S(=O)(=O)CC' in ks) - m = Chem.MolFromSmiles('c1cccn1S(=O)(=O)CC') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - ks = res.GetLeaves().keys() - self.assertTrue('*n1cccc1' in ks) - self.assertTrue('*S(=O)(=O)CC' in ks) + m = Chem.MolFromSmiles('c1cccn1S(=O)(=O)CC') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + ks = res.GetLeaves().keys() + self.assertTrue('*n1cccc1' in ks) + self.assertTrue('*S(=O)(=O)CC' in ks) - m = Chem.MolFromSmiles('C1CNS(=O)(=O)CC1') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 0) + m = Chem.MolFromSmiles('C1CNS(=O)(=O)CC1') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 0) - def testSFNetIssue1881803(self): - m = Chem.MolFromSmiles('c1ccccc1n1cccc1') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - m = Chem.MolFromSmiles('c1ccccc1[n+]1ccccc1') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 0) + def testSFNetIssue1881803(self): + m = Chem.MolFromSmiles('c1ccccc1n1cccc1') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + m = Chem.MolFromSmiles('c1ccccc1[n+]1ccccc1') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 0) - m = Chem.MolFromSmiles('C1CC1NC(=O)CC') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - m = Chem.MolFromSmiles('C1CC1[NH+]C(=O)CC') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 0) + m = Chem.MolFromSmiles('C1CC1NC(=O)CC') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + m = Chem.MolFromSmiles('C1CC1[NH+]C(=O)CC') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 0) - m = Chem.MolFromSmiles('C1CC1NC(=O)NC1CCC1') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 2) - m = Chem.MolFromSmiles('C1CC1[NH+]C(=O)[NH+]C1CCC1') - res = RecapDecompose(m) - self.assertTrue(res) - self.assertTrue(len(res.GetLeaves()) == 0) + m = Chem.MolFromSmiles('C1CC1NC(=O)NC1CCC1') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 2) + m = Chem.MolFromSmiles('C1CC1[NH+]C(=O)[NH+]C1CCC1') + res = RecapDecompose(m) + self.assertTrue(res) + self.assertTrue(len(res.GetLeaves()) == 0) - unittest.main() + unittest.main() diff --git a/rdkit/Chem/SATIS.py b/rdkit/Chem/SATIS.py index 70b54a6e5..75a838fd2 100755 --- a/rdkit/Chem/SATIS.py +++ b/rdkit/Chem/SATIS.py @@ -10,7 +10,7 @@ """ Functionality for SATIS typing atoms """ -from __future__ import print_function + import itertools diff --git a/rdkit/Chem/SaltRemover.py b/rdkit/Chem/SaltRemover.py index f9faf0674..5be103707 100644 --- a/rdkit/Chem/SaltRemover.py +++ b/rdkit/Chem/SaltRemover.py @@ -38,265 +38,270 @@ from contextlib import closing from rdkit import Chem, RDConfig from rdkit.Chem.rdmolfiles import SDMolSupplier, SmilesMolSupplier + class InputFormat: - SMARTS = 'smarts' - MOL = 'mol' - SMILES = 'smiles' + SMARTS = 'smarts' + MOL = 'mol' + SMILES = 'smiles' + def _smartsFromSmartsLine(line): - """ - Converts given line into a molecule using 'Chem.MolFromSmarts'. - """ - # Name the regular expression (better than inlining it) - whitespace = re.compile(r'[\t ]+') - # Reflects the specialisation of this method to read the rather unusual - # SMARTS files with the // comments. - line = line.strip().split('//')[0] - if line: - smarts = whitespace.split(line) - salt = Chem.MolFromSmarts(smarts[0]) - if salt is None: - raise ValueError(line) - return salt + """ + Converts given line into a molecule using 'Chem.MolFromSmarts'. + """ + # Name the regular expression (better than inlining it) + whitespace = re.compile(r'[\t ]+') + # Reflects the specialisation of this method to read the rather unusual + # SMARTS files with the // comments. + line = line.strip().split('//')[0] + if line: + smarts = whitespace.split(line) + salt = Chem.MolFromSmarts(smarts[0]) + if salt is None: + raise ValueError(line) + return salt + def _getSmartsSaltsFromStream(stream): - """ - Yields extracted SMARTS salts from given stream. - """ - with closing(stream) as lines: - for line in lines: - smarts = _smartsFromSmartsLine(line) - if smarts: - yield smarts + """ + Yields extracted SMARTS salts from given stream. + """ + with closing(stream) as lines: + for line in lines: + smarts = _smartsFromSmartsLine(line) + if smarts: + yield smarts + def _getSmartsSaltsFromFile(filename): - """ - Extracts SMARTS salts from given file object. - """ - return _getSmartsSaltsFromStream(open(filename, 'r')) + """ + Extracts SMARTS salts from given file object. + """ + return _getSmartsSaltsFromStream(open(filename, 'r')) + class SaltRemover(object): - defnFilename = os.path.join(RDConfig.RDDataDir, 'Salts.txt') + defnFilename = os.path.join(RDConfig.RDDataDir, 'Salts.txt') - def __init__(self, defnFilename=None, defnData=None, defnFormat=InputFormat.SMARTS): - if defnFilename: - self.defnFilename = defnFilename - self.defnData = defnData - self.salts = None - self.defnFormat = defnFormat - self._initPatterns() + def __init__(self, defnFilename=None, defnData=None, defnFormat=InputFormat.SMARTS): + if defnFilename: + self.defnFilename = defnFilename + self.defnData = defnData + self.salts = None + self.defnFormat = defnFormat + self._initPatterns() - def _initPatterns(self): - """ + def _initPatterns(self): + """ - >>> remover = SaltRemover() - >>> len(remover.salts)>0 - True + >>> remover = SaltRemover() + >>> len(remover.salts)>0 + True - Default input format is SMARTS - >>> remover = SaltRemover(defnData="[Cl,Br]") - >>> len(remover.salts) - 1 + Default input format is SMARTS + >>> remover = SaltRemover(defnData="[Cl,Br]") + >>> len(remover.salts) + 1 - >>> remover = SaltRemover(defnData="[Na+]\\nCC(=O)O", defnFormat=InputFormat.SMILES) - >>> len(remover.salts) - 2 + >>> remover = SaltRemover(defnData="[Na+]\\nCC(=O)O", defnFormat=InputFormat.SMILES) + >>> len(remover.salts) + 2 - >>> from rdkit import RDLogger - >>> RDLogger.DisableLog('rdApp.error') - >>> remover = SaltRemover(defnData="[Cl,fail]") - Traceback (most recent call last): - ... - ValueError: [Cl,fail] + >>> from rdkit import RDLogger + >>> RDLogger.DisableLog('rdApp.error') + >>> remover = SaltRemover(defnData="[Cl,fail]") + Traceback (most recent call last): + ... + ValueError: [Cl,fail] - >>> RDLogger.EnableLog('rdApp.error') - """ - if self.defnData: - from rdkit.six.moves import cStringIO as StringIO - inF = StringIO(self.defnData) - with closing(inF): - self.salts = [] - for line in inF: - if line: + >>> RDLogger.EnableLog('rdApp.error') + """ + if self.defnData: + from io import StringIO + inF = StringIO(self.defnData) + with closing(inF): + self.salts = [] + for line in inF: + if line: + if self.defnFormat == InputFormat.SMARTS: + salt = _smartsFromSmartsLine(line) + elif self.defnFormat == InputFormat.SMILES: + salt = Chem.MolFromSmiles(line) + else: + raise ValueError('Unsupported format for supplier.') + if salt is None: + raise ValueError(line) + self.salts.append(salt) + else: if self.defnFormat == InputFormat.SMARTS: - salt = _smartsFromSmartsLine(line) + self.salts = [mol for mol in _getSmartsSaltsFromFile(self.defnFilename)] + elif self.defnFormat == InputFormat.MOL: + self.salts = [mol for mol in SDMolSupplier(self.defnFilename)] elif self.defnFormat == InputFormat.SMILES: - salt = Chem.MolFromSmiles(line) + self.salts = [mol for mol in SmilesMolSupplier(self.defnFilename)] else: - raise ValueError('Unsupported format for supplier.') - if salt is None: - raise ValueError(line) - self.salts.append(salt) - else: - if self.defnFormat == InputFormat.SMARTS: - self.salts = [mol for mol in _getSmartsSaltsFromFile(self.defnFilename)] - elif self.defnFormat == InputFormat.MOL: - self.salts = [mol for mol in SDMolSupplier(self.defnFilename)] - elif self.defnFormat == InputFormat.SMILES: - self.salts = [mol for mol in SmilesMolSupplier(self.defnFilename)] - else: - raise ValueError('Unsupported format for supplier.') + raise ValueError('Unsupported format for supplier.') - def StripMol(self, mol, dontRemoveEverything=False): - """ + def StripMol(self, mol, dontRemoveEverything=False): + """ - >>> remover = SaltRemover(defnData="[Cl,Br]") - >>> len(remover.salts) - 1 + >>> remover = SaltRemover(defnData="[Cl,Br]") + >>> len(remover.salts) + 1 - >>> mol = Chem.MolFromSmiles('CN(C)C.Cl') - >>> res = remover.StripMol(mol) - >>> res is not None - True - >>> res.GetNumAtoms() - 4 + >>> mol = Chem.MolFromSmiles('CN(C)C.Cl') + >>> res = remover.StripMol(mol) + >>> res is not None + True + >>> res.GetNumAtoms() + 4 - Notice that all salts are removed: - >>> mol = Chem.MolFromSmiles('CN(C)C.Cl.Cl.Br') - >>> res = remover.StripMol(mol) - >>> res.GetNumAtoms() - 4 + Notice that all salts are removed: + >>> mol = Chem.MolFromSmiles('CN(C)C.Cl.Cl.Br') + >>> res = remover.StripMol(mol) + >>> res.GetNumAtoms() + 4 - Matching (e.g. "salt-like") atoms in the molecule are unchanged: - >>> mol = Chem.MolFromSmiles('CN(Br)Cl') - >>> res = remover.StripMol(mol) - >>> res.GetNumAtoms() - 4 + Matching (e.g. "salt-like") atoms in the molecule are unchanged: + >>> mol = Chem.MolFromSmiles('CN(Br)Cl') + >>> res = remover.StripMol(mol) + >>> res.GetNumAtoms() + 4 - >>> mol = Chem.MolFromSmiles('CN(Br)Cl.Cl') - >>> res = remover.StripMol(mol) - >>> res.GetNumAtoms() - 4 + >>> mol = Chem.MolFromSmiles('CN(Br)Cl.Cl') + >>> res = remover.StripMol(mol) + >>> res.GetNumAtoms() + 4 - Charged salts are handled reasonably: - >>> mol = Chem.MolFromSmiles('C[NH+](C)(C).[Cl-]') - >>> res = remover.StripMol(mol) - >>> res.GetNumAtoms() - 4 + Charged salts are handled reasonably: + >>> mol = Chem.MolFromSmiles('C[NH+](C)(C).[Cl-]') + >>> res = remover.StripMol(mol) + >>> res.GetNumAtoms() + 4 - Watch out for this case (everything removed): - >>> remover = SaltRemover() - >>> len(remover.salts)>1 - True - >>> mol = Chem.MolFromSmiles('CC(=O)O.[Na]') - >>> res = remover.StripMol(mol) - >>> res.GetNumAtoms() - 0 + Watch out for this case (everything removed): + >>> remover = SaltRemover() + >>> len(remover.salts)>1 + True + >>> mol = Chem.MolFromSmiles('CC(=O)O.[Na]') + >>> res = remover.StripMol(mol) + >>> res.GetNumAtoms() + 0 - dontRemoveEverything helps with this by leaving the last salt: - >>> res = remover.StripMol(mol,dontRemoveEverything=True) - >>> res.GetNumAtoms() - 4 + dontRemoveEverything helps with this by leaving the last salt: + >>> res = remover.StripMol(mol,dontRemoveEverything=True) + >>> res.GetNumAtoms() + 4 - but in cases where the last salts are the same, it can't choose - between them, so it returns all of them: - >>> mol = Chem.MolFromSmiles('Cl.Cl') - >>> res = remover.StripMol(mol,dontRemoveEverything=True) - >>> res.GetNumAtoms() - 2 + but in cases where the last salts are the same, it can't choose + between them, so it returns all of them: + >>> mol = Chem.MolFromSmiles('Cl.Cl') + >>> res = remover.StripMol(mol,dontRemoveEverything=True) + >>> res.GetNumAtoms() + 2 - """ - strippedMol = self._StripMol(mol, dontRemoveEverything) - return strippedMol.mol + """ + strippedMol = self._StripMol(mol, dontRemoveEverything) + return strippedMol.mol - def StripMolWithDeleted(self, mol, dontRemoveEverything=False): - """ - Strips given molecule and returns it, with the fragments which have been deleted. + def StripMolWithDeleted(self, mol, dontRemoveEverything=False): + """ + Strips given molecule and returns it, with the fragments which have been deleted. - >>> remover = SaltRemover(defnData="[Cl,Br]") - >>> len(remover.salts) - 1 + >>> remover = SaltRemover(defnData="[Cl,Br]") + >>> len(remover.salts) + 1 - >>> mol = Chem.MolFromSmiles('CN(C)C.Cl.Br') - >>> res, deleted = remover.StripMolWithDeleted(mol) - >>> Chem.MolToSmiles(res) - 'CN(C)C' - >>> [Chem.MolToSmarts(m) for m in deleted] - ['[Cl,Br]'] + >>> mol = Chem.MolFromSmiles('CN(C)C.Cl.Br') + >>> res, deleted = remover.StripMolWithDeleted(mol) + >>> Chem.MolToSmiles(res) + 'CN(C)C' + >>> [Chem.MolToSmarts(m) for m in deleted] + ['[Cl,Br]'] - >>> mol = Chem.MolFromSmiles('CN(C)C.Cl') - >>> res, deleted = remover.StripMolWithDeleted(mol) - >>> res.GetNumAtoms() - 4 - >>> len(deleted) - 1 - >>> deleted[0].GetNumAtoms() - 1 - >>> Chem.MolToSmarts(deleted[0]) - '[Cl,Br]' + >>> mol = Chem.MolFromSmiles('CN(C)C.Cl') + >>> res, deleted = remover.StripMolWithDeleted(mol) + >>> res.GetNumAtoms() + 4 + >>> len(deleted) + 1 + >>> deleted[0].GetNumAtoms() + 1 + >>> Chem.MolToSmarts(deleted[0]) + '[Cl,Br]' - Multiple occurrences of 'Cl' and without tuple destructuring - >>> mol = Chem.MolFromSmiles('CN(C)C.Cl.Cl') - >>> tup = remover.StripMolWithDeleted(mol) + Multiple occurrences of 'Cl' and without tuple destructuring + >>> mol = Chem.MolFromSmiles('CN(C)C.Cl.Cl') + >>> tup = remover.StripMolWithDeleted(mol) - >>> tup.mol.GetNumAtoms() - 4 - >>> len(tup.deleted) - 1 - >>> tup.deleted[0].GetNumAtoms() - 1 - >>> Chem.MolToSmarts(deleted[0]) - '[Cl,Br]' - """ - return self._StripMol(mol, dontRemoveEverything) + >>> tup.mol.GetNumAtoms() + 4 + >>> len(tup.deleted) + 1 + >>> tup.deleted[0].GetNumAtoms() + 1 + >>> Chem.MolToSmarts(deleted[0]) + '[Cl,Br]' + """ + return self._StripMol(mol, dontRemoveEverything) - def _StripMol(self, mol, dontRemoveEverything=False): + def _StripMol(self, mol, dontRemoveEverything=False): - def _applyPattern(m, salt, notEverything): - nAts = m.GetNumAtoms() - if not nAts: - return m - res = m + def _applyPattern(m, salt, notEverything): + nAts = m.GetNumAtoms() + if not nAts: + return m + res = m - t = Chem.DeleteSubstructs(res, salt, True) - if not t or (notEverything and t.GetNumAtoms() == 0): - return res - res = t - while res.GetNumAtoms() and nAts > res.GetNumAtoms(): - nAts = res.GetNumAtoms() - t = Chem.DeleteSubstructs(res, salt, True) - if notEverything and t.GetNumAtoms() == 0: - break - res = t - return res + t = Chem.DeleteSubstructs(res, salt, True) + if not t or (notEverything and t.GetNumAtoms() == 0): + return res + res = t + while res.GetNumAtoms() and nAts > res.GetNumAtoms(): + nAts = res.GetNumAtoms() + t = Chem.DeleteSubstructs(res, salt, True) + if notEverything and t.GetNumAtoms() == 0: + break + res = t + return res - StrippedMol = namedtuple('StrippedMol', ['mol', 'deleted']) - deleted = [] - if dontRemoveEverything and len(Chem.GetMolFrags(mol)) <= 1: - return StrippedMol(mol, deleted) - modified = False - natoms = mol.GetNumAtoms() - for salt in self.salts: - mol = _applyPattern(mol, salt, dontRemoveEverything) - if natoms != mol.GetNumAtoms(): - natoms = mol.GetNumAtoms() - modified = True - deleted.append(salt) + StrippedMol = namedtuple('StrippedMol', ['mol', 'deleted']) + deleted = [] if dontRemoveEverything and len(Chem.GetMolFrags(mol)) <= 1: - break - if modified and mol.GetNumAtoms() > 0: - Chem.SanitizeMol(mol) - return StrippedMol(mol, deleted) + return StrippedMol(mol, deleted) + modified = False + natoms = mol.GetNumAtoms() + for salt in self.salts: + mol = _applyPattern(mol, salt, dontRemoveEverything) + if natoms != mol.GetNumAtoms(): + natoms = mol.GetNumAtoms() + modified = True + deleted.append(salt) + if dontRemoveEverything and len(Chem.GetMolFrags(mol)) <= 1: + break + if modified and mol.GetNumAtoms() > 0: + Chem.SanitizeMol(mol) + return StrippedMol(mol, deleted) - def __call__(self, mol, dontRemoveEverything=False): - """ + def __call__(self, mol, dontRemoveEverything=False): + """ - >>> remover = SaltRemover(defnData="[Cl,Br]") - >>> len(remover.salts) - 1 - >>> Chem.MolToSmarts(remover.salts[0]) - '[Cl,Br]' + >>> remover = SaltRemover(defnData="[Cl,Br]") + >>> len(remover.salts) + 1 + >>> Chem.MolToSmarts(remover.salts[0]) + '[Cl,Br]' - >>> mol = Chem.MolFromSmiles('CN(C)C.Cl') - >>> res = remover(mol) - >>> res is not None - True - >>> res.GetNumAtoms() - 4 + >>> mol = Chem.MolFromSmiles('CN(C)C.Cl') + >>> res = remover(mol) + >>> res is not None + True + >>> res.GetNumAtoms() + 4 - """ - return self.StripMol(mol, dontRemoveEverything=dontRemoveEverything) + """ + return self.StripMol(mol, dontRemoveEverything=dontRemoveEverything) # ------------------------------------ @@ -304,11 +309,11 @@ class SaltRemover(object): # doctest boilerplate # def _runDoctests(verbose=None): # pragma: nocover - import sys - import doctest - failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) - sys.exit(failed) + import sys + import doctest + failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) + sys.exit(failed) if __name__ == '__main__': # pragma: nocover - _runDoctests() + _runDoctests() diff --git a/rdkit/Chem/SimpleEnum/Enumerator.py b/rdkit/Chem/SimpleEnum/Enumerator.py index e65043870..f33cb8b6f 100755 --- a/rdkit/Chem/SimpleEnum/Enumerator.py +++ b/rdkit/Chem/SimpleEnum/Enumerator.py @@ -29,7 +29,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Created by Greg Landrum, May 2009 -from __future__ import print_function + import os diff --git a/rdkit/Chem/Subshape/BuilderUtils.py b/rdkit/Chem/Subshape/BuilderUtils.py index 77ac7c81e..d5045e33e 100755 --- a/rdkit/Chem/Subshape/BuilderUtils.py +++ b/rdkit/Chem/Subshape/BuilderUtils.py @@ -2,7 +2,7 @@ # Copyright (C) 2007-2017 by Greg Landrum # All rights reserved # -from __future__ import print_function + import math diff --git a/rdkit/Chem/Subshape/SubshapeAligner.py b/rdkit/Chem/Subshape/SubshapeAligner.py index 715fdc385..161a59fe6 100755 --- a/rdkit/Chem/Subshape/SubshapeAligner.py +++ b/rdkit/Chem/Subshape/SubshapeAligner.py @@ -2,7 +2,7 @@ # Copyright (C) 2007-2008 by Greg Landrum # All rights reserved # -from __future__ import print_function + import numpy @@ -334,14 +334,14 @@ class SubshapeAligner(object): if __name__ == '__main__': # pragma: nocover - from rdkit.six.moves import cPickle + import pickle from rdkit.Chem.PyMol import MolViewer with open('target.pkl', 'rb') as f: - tgtMol, tgtShape = cPickle.load(f) + tgtMol, tgtShape = pickle.load(f) with open('query.pkl', 'rb') as f: - queryMol, queryShape = cPickle.load(f) + queryMol, queryShape = pickle.load(f) with open('builder.pkl', 'rb') as f: - builder = cPickle.load(f) + builder = pickle.load(f) aligner = SubshapeAligner() algs = aligner.GetSubshapeAlignments(tgtMol, tgtShape, queryMol, queryShape, builder) print(len(algs)) diff --git a/rdkit/Chem/Subshape/SubshapeBuilder.py b/rdkit/Chem/Subshape/SubshapeBuilder.py index c5ff7ef73..b825298a3 100755 --- a/rdkit/Chem/Subshape/SubshapeBuilder.py +++ b/rdkit/Chem/Subshape/SubshapeBuilder.py @@ -2,7 +2,7 @@ # Copyright (C) 2007 by Greg Landrum # All rights reserved # -from __future__ import print_function + import copy import time @@ -11,7 +11,7 @@ from rdkit import Chem, Geometry from rdkit.Chem import AllChem from rdkit.Chem.Subshape import BuilderUtils from rdkit.Chem.Subshape import SubshapeObjects -from rdkit.six.moves import cPickle +import pickle class SubshapeCombineOperations(object): @@ -88,7 +88,7 @@ class SubshapeBuilder(object): if __name__ == '__main__': # pragma: nocover from rdkit.Chem.PyMol import MolViewer - from rdkit.six.moves import cPickle + import pickle import tempfile # cmpd = Chem.MolFromSmiles('CCCc1cc(C(=O)O)ccc1') @@ -116,7 +116,7 @@ if __name__ == '__main__': # pragma: nocover v.server.resetCGO('*') with open('subshape.pkl', 'w+') as f: - cPickle.dump(shape, f) + pickle.dump(shape, f) for i, pt in enumerate(shape.skelPts): v.server.sphere(tuple(pt.location), .5, (1, 0, 1), 'Pt-%d' % i) if not hasattr(pt, 'shapeDirs'): diff --git a/rdkit/Chem/Subshape/testCombined.py b/rdkit/Chem/Subshape/testCombined.py index 6f97b1b61..ad617313f 100644 --- a/rdkit/Chem/Subshape/testCombined.py +++ b/rdkit/Chem/Subshape/testCombined.py @@ -1,10 +1,10 @@ -from __future__ import print_function + from rdkit import Chem from rdkit.Chem import AllChem from rdkit.Chem.PyMol import MolViewer from rdkit.Chem.Subshape import SubshapeBuilder, SubshapeObjects, SubshapeAligner -from rdkit.six.moves import cPickle +import pickle import copy m1 = Chem.MolFromMolFile('test_data/square1.mol') @@ -17,18 +17,18 @@ b.winRad = 2.0 if 1: print('m1:') s1 = b.GenerateSubshapeShape(m1) - cPickle.dump(s1, open('test_data/square1.shp.pkl', 'wb+')) + pickle.dump(s1, open('test_data/square1.shp.pkl', 'wb+')) print('m2:') s2 = b.GenerateSubshapeShape(m2) - cPickle.dump(s2, open('test_data/square2.shp.pkl', 'wb+')) + pickle.dump(s2, open('test_data/square2.shp.pkl', 'wb+')) ns1 = b.CombineSubshapes(s1, s2) b.GenerateSubshapeSkeleton(ns1) - cPickle.dump(ns1, open('test_data/combined.shp.pkl', 'wb+')) + pickle.dump(ns1, open('test_data/combined.shp.pkl', 'wb+')) else: - s1 = cPickle.load(open('test_data/square1.shp.pkl', 'rb')) - s2 = cPickle.load(open('test_data/square2.shp.pkl', 'rb')) - #ns1 = cPickle.load(file('test_data/combined.shp.pkl','rb')) - ns1 = cPickle.load(open('test_data/combined.shp.pkl', 'rb')) + s1 = pickle.load(open('test_data/square1.shp.pkl', 'rb')) + s2 = pickle.load(open('test_data/square2.shp.pkl', 'rb')) + #ns1 = pickle.load(file('test_data/combined.shp.pkl','rb')) + ns1 = pickle.load(open('test_data/combined.shp.pkl', 'rb')) v = MolViewer() SubshapeObjects.DisplaySubshape(v, s1, 'shape1') diff --git a/rdkit/Chem/Suppliers/UnitTestDbMolSupplier.py b/rdkit/Chem/Suppliers/UnitTestDbMolSupplier.py index a4f7d9d20..3564b9fb4 100644 --- a/rdkit/Chem/Suppliers/UnitTestDbMolSupplier.py +++ b/rdkit/Chem/Suppliers/UnitTestDbMolSupplier.py @@ -10,73 +10,72 @@ from rdkit.Chem.Suppliers.DbMolSupplier import ForwardDbMolSupplier, RandomAcces from rdkit.Chem.Suppliers.MolSupplier import MolSupplier from rdkit.Dbase.DbConnection import DbConnect from rdkit.Dbase.DbResultSet import DbResultSet, RandomAccessDbResultSet -from rdkit.six import next class TestCase(unittest.TestCase): - def setUp(self): - self.dbName = RDConfig.RDTestDatabase - self.conn = DbConnect(self.dbName) - self.curs = self.conn.GetCursor() + def setUp(self): + self.dbName = RDConfig.RDTestDatabase + self.conn = DbConnect(self.dbName) + self.curs = self.conn.GetCursor() - def test_MolSupplier(self): - self.assertRaises(ValueError, MolSupplier) + def test_MolSupplier(self): + self.assertRaises(ValueError, MolSupplier) - def test_general(self): - # Check for a molecule column - cmd = 'select * from ten_elements' - results = DbResultSet(self.curs, self.conn, cmd) - self.assertRaises(ValueError, ForwardDbMolSupplier, results) + def test_general(self): + # Check for a molecule column + cmd = 'select * from ten_elements' + results = DbResultSet(self.curs, self.conn, cmd) + self.assertRaises(ValueError, ForwardDbMolSupplier, results) - def test_ForwardDbMolSupplier(self): - cmd = 'select * from simple_mols order by ID' - results = DbResultSet(self.curs, self.conn, cmd) - expected = list(results) + def test_ForwardDbMolSupplier(self): + cmd = 'select * from simple_mols order by ID' + results = DbResultSet(self.curs, self.conn, cmd) + expected = list(results) - results = DbResultSet(self.curs, self.conn, cmd) - supp = ForwardDbMolSupplier(results) - self.assertEqual(supp.GetColumnNames(), ('ID',)) + results = DbResultSet(self.curs, self.conn, cmd) + supp = ForwardDbMolSupplier(results) + self.assertEqual(supp.GetColumnNames(), ('ID',)) - for smiles, mol in zip(expected, supp): - self.assertEqual(Chem.MolToSmiles(Chem.MolFromSmiles(smiles[0])), Chem.MolToSmiles(mol)) - self.assertEqual(smiles[1], mol.GetProp('ID')) - self.assertRaises(StopIteration, next, supp) + for smiles, mol in zip(expected, supp): + self.assertEqual(Chem.MolToSmiles(Chem.MolFromSmiles(smiles[0])), Chem.MolToSmiles(mol)) + self.assertEqual(smiles[1], mol.GetProp('ID')) + self.assertRaises(StopIteration, next, supp) - # We can not use an index for ForwardDbMolSupplier - with self.assertRaises(TypeError): - supp[0] + # We can not use an index for ForwardDbMolSupplier + with self.assertRaises(TypeError): + supp[0] - def test_RandomAccessDbMolSupplier(self): - cmd = 'select * from simple_mols order by ID' - results = RandomAccessDbResultSet(self.curs, self.conn, cmd) - expected = list(results) + def test_RandomAccessDbMolSupplier(self): + cmd = 'select * from simple_mols order by ID' + results = RandomAccessDbResultSet(self.curs, self.conn, cmd) + expected = list(results) - results = RandomAccessDbResultSet(self.curs, self.conn, cmd) - supp = RandomAccessDbMolSupplier(results) - self.assertEqual(len(supp), len(expected)) - self.assertEqual(supp.GetColumnNames(), ('ID',)) - for smiles, mol in zip(expected, supp): - self.assertEqual(Chem.MolToSmiles(Chem.MolFromSmiles(smiles[0])), Chem.MolToSmiles(mol)) - self.assertEqual(smiles[1], mol.GetProp('ID')) + results = RandomAccessDbResultSet(self.curs, self.conn, cmd) + supp = RandomAccessDbMolSupplier(results) + self.assertEqual(len(supp), len(expected)) + self.assertEqual(supp.GetColumnNames(), ('ID',)) + for smiles, mol in zip(expected, supp): + self.assertEqual(Chem.MolToSmiles(Chem.MolFromSmiles(smiles[0])), Chem.MolToSmiles(mol)) + self.assertEqual(smiles[1], mol.GetProp('ID')) - # Check that we can randomly access the data - indices = list(range(len(expected))) - random.shuffle(indices) - for idx in indices: - smiles = expected[idx] - mol = supp[idx] - self.assertEqual(Chem.MolToSmiles(Chem.MolFromSmiles(smiles[0])), Chem.MolToSmiles(mol)) - self.assertEqual(smiles[1], mol.GetProp('ID')) + # Check that we can randomly access the data + indices = list(range(len(expected))) + random.shuffle(indices) + for idx in indices: + smiles = expected[idx] + mol = supp[idx] + self.assertEqual(Chem.MolToSmiles(Chem.MolFromSmiles(smiles[0])), Chem.MolToSmiles(mol)) + self.assertEqual(smiles[1], mol.GetProp('ID')) - # We get an error if we access outside of the permitted range - with self.assertRaises(IndexError): - supp[len(expected)] + # We get an error if we access outside of the permitted range + with self.assertRaises(IndexError): + supp[len(expected)] - # The DbMolSupplier doesn't support negative indices - with self.assertRaises(IndexError): - supp[-1] + # The DbMolSupplier doesn't support negative indices + with self.assertRaises(IndexError): + supp[-1] if __name__ == '__main__': - unittest.main() + unittest.main() diff --git a/rdkit/Chem/Suppliers/UnitTestSDMolSupplier.py b/rdkit/Chem/Suppliers/UnitTestSDMolSupplier.py index 782a31619..40d64c114 100755 --- a/rdkit/Chem/Suppliers/UnitTestSDMolSupplier.py +++ b/rdkit/Chem/Suppliers/UnitTestSDMolSupplier.py @@ -17,82 +17,81 @@ import unittest from rdkit import Chem from rdkit import RDConfig -from rdkit.six import next class TestCase(unittest.TestCase): - def setUp(self): - self.fName = os.path.join(RDConfig.RDDataDir, 'NCI', 'first_200.props.sdf') - with open(self.fName, 'r') as inf: - inD = inf.read() - self.nMolecules = inD.count('$$$$') + def setUp(self): + self.fName = os.path.join(RDConfig.RDDataDir, 'NCI', 'first_200.props.sdf') + with open(self.fName, 'r') as inf: + inD = inf.read() + self.nMolecules = inD.count('$$$$') - def assertMolecule(self, mol, i): - """ Assert that we have a valid molecule """ - self.assertIsNotNone(mol, 'read %d failed' % i) - self.assertGreater(mol.GetNumAtoms(), 0, 'no atoms in mol %d' % i) + def assertMolecule(self, mol, i): + """ Assert that we have a valid molecule """ + self.assertIsNotNone(mol, 'read %d failed' % i) + self.assertGreater(mol.GetNumAtoms(), 0, 'no atoms in mol %d' % i) - def test_SDMolSupplier(self): - # tests reads using a file name (file contains 200 molecules) - supp = Chem.SDMolSupplier(self.fName) + def test_SDMolSupplier(self): + # tests reads using a file name (file contains 200 molecules) + supp = Chem.SDMolSupplier(self.fName) - # Can use as an iterator - for i in range(10): - mol = next(supp) - self.assertMolecule(mol, i) + # Can use as an iterator + for i in range(10): + mol = next(supp) + self.assertMolecule(mol, i) - # Can access directly - i = 100 - mol = supp[i - 1] - self.assertMolecule(mol, i) + # Can access directly + i = 100 + mol = supp[i - 1] + self.assertMolecule(mol, i) - # We can access the number of molecules - self.assertEqual(len(supp), self.nMolecules, 'bad supplier length') + # We can access the number of molecules + self.assertEqual(len(supp), self.nMolecules, 'bad supplier length') - # We know the number and can still access directly - i = 12 - mol = supp[i - 1] - self.assertMolecule(mol, i) + # We know the number and can still access directly + i = 12 + mol = supp[i - 1] + self.assertMolecule(mol, i) - # Get an exception if we access an invalid number - with self.assertRaises(IndexError): - _ = supp[self.nMolecules] # out of bound read must fail + # Get an exception if we access an invalid number + with self.assertRaises(IndexError): + _ = supp[self.nMolecules] # out of bound read must fail - # and we can access with negative numbers - mol1 = supp[len(supp) - 1] - mol2 = supp[-1] - self.assertEqual(Chem.MolToSmiles(mol1), Chem.MolToSmiles(mol2)) + # and we can access with negative numbers + mol1 = supp[len(supp) - 1] + mol2 = supp[-1] + self.assertEqual(Chem.MolToSmiles(mol1), Chem.MolToSmiles(mol2)) - def test_SDWriter(self): - # tests writes using a file name - supp = Chem.SDMolSupplier(self.fName) - _, outName = tempfile.mkstemp('.sdf') - writer = Chem.SDWriter(outName) - m1 = next(supp) - writer.SetProps(m1.GetPropNames()) - for m in supp: - writer.write(m) - writer.flush() - writer.close() + def test_SDWriter(self): + # tests writes using a file name + supp = Chem.SDMolSupplier(self.fName) + _, outName = tempfile.mkstemp('.sdf') + writer = Chem.SDWriter(outName) + m1 = next(supp) + writer.SetProps(m1.GetPropNames()) + for m in supp: + writer.write(m) + writer.flush() + writer.close() - # The writer does not have an explicit "close()" so need to - # let the garbage collector kick in to close the file. - writer = None - with open(outName, 'r') as inf: - outD = inf.read() - # The file should be closed, but if it isn't, and this - # is Windows, then the unlink() can fail. Wait and try again. - try: - os.unlink(outName) - except Exception: - import time - time.sleep(1) - try: - os.unlink(outName) - except Exception: - pass - self.assertEqual(self.nMolecules, outD.count('$$$$'), 'bad nMols in output') + # The writer does not have an explicit "close()" so need to + # let the garbage collector kick in to close the file. + writer = None + with open(outName, 'r') as inf: + outD = inf.read() + # The file should be closed, but if it isn't, and this + # is Windows, then the unlink() can fail. Wait and try again. + try: + os.unlink(outName) + except Exception: + import time + time.sleep(1) + try: + os.unlink(outName) + except Exception: + pass + self.assertEqual(self.nMolecules, outD.count('$$$$'), 'bad nMols in output') # def _testStreamRoundtrip(self): # inD = open(self.fName).read() @@ -176,5 +175,6 @@ class TestCase(unittest.TestCase): # fail = 0 # assert fail, 'out of bound read did not fail' + if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/Chem/Suppliers/UnitTestSmilesMolSupplier.py b/rdkit/Chem/Suppliers/UnitTestSmilesMolSupplier.py index e40ca6fd2..ae7b9da58 100755 --- a/rdkit/Chem/Suppliers/UnitTestSmilesMolSupplier.py +++ b/rdkit/Chem/Suppliers/UnitTestSmilesMolSupplier.py @@ -14,100 +14,100 @@ unit testing code for the Smiles file handling stuff import unittest from rdkit import Chem -from rdkit.six import next + from rdkit import RDLogger class TestCase(unittest.TestCase): - def setUp(self): - self.smis = ['CC', 'CCC', 'CCCCC', 'CCCCCC', 'CCCCCCC', 'CC', 'CCCCOC'] - self.nMolecules = len(self.smis) + def setUp(self): + self.smis = ['CC', 'CCC', 'CCCCC', 'CCCCCC', 'CCCCCCC', 'CC', 'CCCCOC'] + self.nMolecules = len(self.smis) - def tearDown(self): - RDLogger.EnableLog('rdApp.error') + def tearDown(self): + RDLogger.EnableLog('rdApp.error') - def assertMolecule(self, mol, i, msg=''): - """ Assert that we have a valid molecule """ - self.assertIsNotNone(mol, '{0}read {1} failed'.format(msg, i)) - self.assertGreater(mol.GetNumAtoms(), 0, '{0}no atoms in mol {1}'.format(msg, i)) + def assertMolecule(self, mol, i, msg=''): + """ Assert that we have a valid molecule """ + self.assertIsNotNone(mol, '{0}read {1} failed'.format(msg, i)) + self.assertGreater(mol.GetNumAtoms(), 0, '{0}no atoms in mol {1}'.format(msg, i)) - def test_SmilesReaderIndex(self): - # tests lazy reads - supp = Chem.SmilesMolSupplierFromText('\n'.join(self.smis), ',', 0, -1, 0) - for i in range(4): - self.assertMolecule(next(supp), i) + def test_SmilesReaderIndex(self): + # tests lazy reads + supp = Chem.SmilesMolSupplierFromText('\n'.join(self.smis), ',', 0, -1, 0) + for i in range(4): + self.assertMolecule(next(supp), i) - i = len(supp) - 1 - self.assertMolecule(supp[i], i) + i = len(supp) - 1 + self.assertMolecule(supp[i], i) - # Use in a list comprehension - ms = [Chem.MolToSmiles(mol) for mol in supp] - self.assertEqual(ms, self.smis) + # Use in a list comprehension + ms = [Chem.MolToSmiles(mol) for mol in supp] + self.assertEqual(ms, self.smis) - self.assertEqual(len(supp), self.nMolecules, 'bad supplier length') + self.assertEqual(len(supp), self.nMolecules, 'bad supplier length') - # Despite iterating through the whole supplier, we can still access by index - i = self.nMolecules - 3 - self.assertMolecule(supp[i - 1], i, msg='back index: ') + # Despite iterating through the whole supplier, we can still access by index + i = self.nMolecules - 3 + self.assertMolecule(supp[i - 1], i, msg='back index: ') - with self.assertRaises(IndexError): - _ = supp[self.nMolecules] # out of bound read must fail + with self.assertRaises(IndexError): + _ = supp[self.nMolecules] # out of bound read must fail - # and we can access with negative numbers - mol1 = supp[len(supp) - 1] - mol2 = supp[-1] - self.assertEqual(Chem.MolToSmiles(mol1), Chem.MolToSmiles(mol2)) + # and we can access with negative numbers + mol1 = supp[len(supp) - 1] + mol2 = supp[-1] + self.assertEqual(Chem.MolToSmiles(mol1), Chem.MolToSmiles(mol2)) - def test_SmilesReaderIterator(self): - # tests lazy reads using the iterator interface " - supp = Chem.SmilesMolSupplierFromText('\n'.join(self.smis), ',', 0, -1, 0) + def test_SmilesReaderIterator(self): + # tests lazy reads using the iterator interface " + supp = Chem.SmilesMolSupplierFromText('\n'.join(self.smis), ',', 0, -1, 0) - nDone = 0 - for mol in supp: - self.assertMolecule(mol, nDone) - nDone += 1 - self.assertEqual(nDone, self.nMolecules, 'bad number of molecules') + nDone = 0 + for mol in supp: + self.assertMolecule(mol, nDone) + nDone += 1 + self.assertEqual(nDone, self.nMolecules, 'bad number of molecules') - self.assertEqual(len(supp), self.nMolecules, 'bad supplier length') + self.assertEqual(len(supp), self.nMolecules, 'bad supplier length') - # Despite iterating through the whole supplier, we can still access by index - i = self.nMolecules - 3 - self.assertMolecule(supp[i - 1], i, msg='back index: ') + # Despite iterating through the whole supplier, we can still access by index + i = self.nMolecules - 3 + self.assertMolecule(supp[i - 1], i, msg='back index: ') - with self.assertRaises(IndexError): - _ = supp[self.nMolecules] # out of bound read must not fail + with self.assertRaises(IndexError): + _ = supp[self.nMolecules] # out of bound read must not fail - def test_SmilesReaderBoundaryConditions(self): - # Suppress the error message due to the incorrect smiles - RDLogger.DisableLog('rdApp.error') + def test_SmilesReaderBoundaryConditions(self): + # Suppress the error message due to the incorrect smiles + RDLogger.DisableLog('rdApp.error') - smis = ['CC', 'CCOC', 'fail', 'CCO'] - supp = Chem.SmilesMolSupplierFromText('\n'.join(smis), ',', 0, -1, 0) - self.assertEqual(len(supp), 4) - self.assertIsNone(supp[2]) - self.assertIsNotNone(supp[3]) + smis = ['CC', 'CCOC', 'fail', 'CCO'] + supp = Chem.SmilesMolSupplierFromText('\n'.join(smis), ',', 0, -1, 0) + self.assertEqual(len(supp), 4) + self.assertIsNone(supp[2]) + self.assertIsNotNone(supp[3]) - supp = Chem.SmilesMolSupplierFromText('\n'.join(smis), ',', 0, -1, 0) - self.assertIsNone(supp[2]) - self.assertIsNotNone(supp[3]) - self.assertEqual(len(supp), 4) - with self.assertRaises(IndexError): - supp[4] + supp = Chem.SmilesMolSupplierFromText('\n'.join(smis), ',', 0, -1, 0) + self.assertIsNone(supp[2]) + self.assertIsNotNone(supp[3]) + self.assertEqual(len(supp), 4) + with self.assertRaises(IndexError): + supp[4] - supp = Chem.SmilesMolSupplierFromText('\n'.join(smis), ',', 0, -1, 0) - self.assertEqual(len(supp), 4) - self.assertIsNotNone(supp[3]) - with self.assertRaises(IndexError): - supp[4] + supp = Chem.SmilesMolSupplierFromText('\n'.join(smis), ',', 0, -1, 0) + self.assertEqual(len(supp), 4) + self.assertIsNotNone(supp[3]) + with self.assertRaises(IndexError): + supp[4] - supp = Chem.SmilesMolSupplierFromText('\n'.join(smis), ',', 0, -1, 0) - with self.assertRaises(IndexError): - supp[4] + supp = Chem.SmilesMolSupplierFromText('\n'.join(smis), ',', 0, -1, 0) + with self.assertRaises(IndexError): + supp[4] - self.assertEqual(len(supp), 4) - self.assertIsNotNone(supp[3]) + self.assertEqual(len(supp), 4) + self.assertIsNotNone(supp[3]) if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/Chem/UnitTestCatalog.py b/rdkit/Chem/UnitTestCatalog.py index 509ce8094..156cca82d 100644 --- a/rdkit/Chem/UnitTestCatalog.py +++ b/rdkit/Chem/UnitTestCatalog.py @@ -14,7 +14,7 @@ import unittest from rdkit import Chem from rdkit import RDConfig from rdkit.Chem import FragmentCatalog, BuildFragmentCatalog -from rdkit.six.moves import cPickle +import pickle def feq(n1, n2, tol=1e-4): @@ -87,13 +87,13 @@ class TestCase(unittest.TestCase): self._fillCat(self.smiList2) # test non-binary pickle: - cat2 = cPickle.loads(cPickle.dumps(self.fragCat)) + cat2 = pickle.loads(pickle.dumps(self.fragCat)) assert cat2.GetNumEntries() == 21 assert cat2.GetFPLength() == 21 self._testBits(cat2) # test binary pickle: - cat2 = cPickle.loads(cPickle.dumps(self.fragCat, 1)) + cat2 = pickle.loads(pickle.dumps(self.fragCat, 1)) assert cat2.GetNumEntries() == 21 assert cat2.GetFPLength() == 21 self._testBits(cat2) @@ -104,7 +104,7 @@ class TestCase(unittest.TestCase): buf = pklTFile.read().replace('\r\n', '\n').encode('utf-8') pklTFile.close() with io.BytesIO(buf) as pklFile: - cat = cPickle.load(pklFile, encoding='bytes') + cat = pickle.load(pklFile, encoding='bytes') assert cat.GetNumEntries() == 21 assert cat.GetFPLength() == 21 self._testBits(cat) diff --git a/rdkit/Chem/UnitTestChem.py b/rdkit/Chem/UnitTestChem.py index 75d4a9b17..0996f612c 100755 --- a/rdkit/Chem/UnitTestChem.py +++ b/rdkit/Chem/UnitTestChem.py @@ -11,7 +11,7 @@ """ import unittest, os -from rdkit.six.moves import cPickle +import pickle from rdkit import RDConfig from rdkit import Chem @@ -80,9 +80,9 @@ class TestCase(unittest.TestCase): smis.append(line.split('\t')[0]) for smi in smis: m = Chem.MolFromSmiles(smi) - newM1 = cPickle.loads(cPickle.dumps(m)) + newM1 = pickle.loads(pickle.dumps(m)) newSmi1 = Chem.MolToSmiles(newM1) - newM2 = cPickle.loads(cPickle.dumps(newM1)) + newM2 = pickle.loads(pickle.dumps(newM1)) newSmi2 = Chem.MolToSmiles(newM2) assert newM1.GetNumAtoms() == m.GetNumAtoms(), 'num atoms comparison failed' assert newM2.GetNumAtoms() == m.GetNumAtoms(), 'num atoms comparison failed' @@ -95,7 +95,7 @@ class TestCase(unittest.TestCase): " testing single molecule pickle " m = Chem.MolFromSmiles('CCOC') outS = Chem.MolToSmiles(m) - m2 = cPickle.loads(cPickle.dumps(m)) + m2 = pickle.loads(pickle.dumps(m)) outS2 = Chem.MolToSmiles(m2) assert outS == outS2, "bad pickle: %s != %s" % (outS, outS2) @@ -104,8 +104,8 @@ class TestCase(unittest.TestCase): smis = self.bigSmiList for smi in smis: m = Chem.MolFromSmiles(smi) - newM1 = cPickle.loads(cPickle.dumps(m)) - newM2 = cPickle.loads(cPickle.dumps(newM1)) + newM1 = pickle.loads(pickle.dumps(m)) + newM2 = pickle.loads(pickle.dumps(newM1)) oldSmi = Chem.MolToSmiles(newM1) newSmi = Chem.MolToSmiles(newM2) assert newM1.GetNumAtoms() == m.GetNumAtoms(), 'num atoms comparison failed' @@ -119,10 +119,10 @@ class TestCase(unittest.TestCase): f = None self.m = Chem.MolFromSmiles('CC(=O)CC') outF = open(self.fName, 'wb+') - cPickle.dump(self.m, outF) + pickle.dump(self.m, outF) outF.close() inF = open(self.fName, 'rb') - m2 = cPickle.load(inF) + m2 = pickle.load(inF) inF.close() try: os.unlink(self.fName) diff --git a/rdkit/Chem/UnitTestChemv2.py b/rdkit/Chem/UnitTestChemv2.py index b8e33e5c0..4c90c5b5d 100755 --- a/rdkit/Chem/UnitTestChemv2.py +++ b/rdkit/Chem/UnitTestChemv2.py @@ -12,7 +12,7 @@ """ import unittest, os -from rdkit.six.moves import cPickle +import pickle from rdkit import RDConfig from rdkit import Chem from rdkit.Chem import AllChem @@ -132,8 +132,8 @@ class TestCase(unittest.TestCase): for smi in smis: m = Chem.MolFromSmiles(smi) - newM1 = cPickle.loads(cPickle.dumps(m)) - newM2 = cPickle.loads(cPickle.dumps(newM1)) + newM1 = pickle.loads(pickle.dumps(m)) + newM2 = pickle.loads(pickle.dumps(newM1)) oldSmi = Chem.MolToSmiles(newM1) newSmi = Chem.MolToSmiles(newM2) assert newM1.GetNumAtoms() == m.GetNumAtoms(), 'num atoms comparison failed' diff --git a/rdkit/Chem/UnitTestCrippen.py b/rdkit/Chem/UnitTestCrippen.py index 57ab25b8d..0c281faa8 100644 --- a/rdkit/Chem/UnitTestCrippen.py +++ b/rdkit/Chem/UnitTestCrippen.py @@ -11,14 +11,14 @@ """unit testing code for the Crippen clogp and MR calculators """ -from __future__ import print_function + import unittest, sys, os import io import numpy from rdkit import RDConfig -from rdkit.six.moves import cPickle +import pickle from rdkit import Chem from rdkit.Chem import Crippen @@ -95,7 +95,7 @@ class TestCase(unittest.TestCase): def _writeDetailFile(self, inF, outF): while 1: try: - smi, refContribs = cPickle.load(inF) + smi, refContribs = pickle.load(inF) except EOFError: break else: @@ -104,7 +104,7 @@ class TestCase(unittest.TestCase): mol = Chem.AddHs(mol, 1) smi2 = Chem.MolToSmiles(mol) contribs = Crippen._GetAtomContribs(mol) - cPickle.dump((smi, contribs), outF) + pickle.dump((smi, contribs), outF) else: print('Problems with SMILES:', smi) @@ -116,7 +116,7 @@ class TestCase(unittest.TestCase): if verbose: print('---------------') try: - smi, refContribs = cPickle.load(inF) + smi, refContribs = pickle.load(inF) except EOFError: done = 1 else: diff --git a/rdkit/Chem/UnitTestDescriptors.py b/rdkit/Chem/UnitTestDescriptors.py index 3e24f91e9..b9fcccc49 100644 --- a/rdkit/Chem/UnitTestDescriptors.py +++ b/rdkit/Chem/UnitTestDescriptors.py @@ -10,7 +10,7 @@ """ General descriptor testing code """ -from __future__ import print_function + import io import os.path @@ -24,7 +24,7 @@ from rdkit.Chem import AllChem from rdkit.Chem import Descriptors from rdkit.Chem import Lipinski from rdkit.Chem import rdMolDescriptors -from rdkit.six.moves import cPickle +import pickle def load_tests(loader, tests, ignore): @@ -96,7 +96,7 @@ class TestCase(unittest.TestCase): intf.close() with io.BytesIO(buf) as inf: pkl = inf.read() - refData = cPickle.loads(pkl, encoding='bytes') + refData = pickle.loads(pkl, encoding='bytes') fn = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', 'aromat_regress.txt') ms = [x for x in Chem.SmilesMolSupplier(fn, delimiter='\t')] refData2 = [] diff --git a/rdkit/Chem/UnitTestDocTestsChem.py b/rdkit/Chem/UnitTestDocTestsChem.py index d5721d9ac..567693dd7 100755 --- a/rdkit/Chem/UnitTestDocTestsChem.py +++ b/rdkit/Chem/UnitTestDocTestsChem.py @@ -1,4 +1,4 @@ -from __future__ import print_function + import unittest import doctest from rdkit import Chem diff --git a/rdkit/Chem/UnitTestEnumerateHeterocycles.py b/rdkit/Chem/UnitTestEnumerateHeterocycles.py index ccfa5b728..62a8df20e 100644 --- a/rdkit/Chem/UnitTestEnumerateHeterocycles.py +++ b/rdkit/Chem/UnitTestEnumerateHeterocycles.py @@ -1,4 +1,4 @@ -from __future__ import print_function + import doctest import unittest diff --git a/rdkit/Chem/UnitTestFeatFinderCLI.py b/rdkit/Chem/UnitTestFeatFinderCLI.py index cb1d61b1a..529ecc768 100644 --- a/rdkit/Chem/UnitTestFeatFinderCLI.py +++ b/rdkit/Chem/UnitTestFeatFinderCLI.py @@ -5,69 +5,69 @@ import unittest from rdkit import RDConfig from rdkit.Chem import FeatFinderCLI -from rdkit.six.moves import cStringIO as StringIO +from io import StringIO class TestCase(unittest.TestCase): - def test_FeatFinderCLI(self): - smilesFile = os.path.join(RDConfig.RDDataDir, 'NCI', 'first_5K.smi') - featureFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Pharm2D', 'test_data', - 'BaseFeatures.fdef') - parser = FeatFinderCLI.initParser() - cmd = '-n 10 {0} {1}'.format(featureFile, smilesFile) - with outputRedirect() as (out, err): - args = parser.parse_args(cmd.split()) - FeatFinderCLI.processArgs(args, parser) - out = out.getvalue() - err = err.getvalue() - self.assertIn('Mol-1', out) - self.assertIn('Acceptor-SingleAtomAcceptor', out) - self.assertIn('C(1)', out) - self.assertNotIn('Mol-11', out) - self.assertEqual(err, '') + def test_FeatFinderCLI(self): + smilesFile = os.path.join(RDConfig.RDDataDir, 'NCI', 'first_5K.smi') + featureFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Pharm2D', 'test_data', + 'BaseFeatures.fdef') + parser = FeatFinderCLI.initParser() + cmd = '-n 10 {0} {1}'.format(featureFile, smilesFile) + with outputRedirect() as (out, err): + args = parser.parse_args(cmd.split()) + FeatFinderCLI.processArgs(args, parser) + out = out.getvalue() + err = err.getvalue() + self.assertIn('Mol-1', out) + self.assertIn('Acceptor-SingleAtomAcceptor', out) + self.assertIn('C(1)', out) + self.assertNotIn('Mol-11', out) + self.assertEqual(err, '') - cmd = '-n 2 -r {0} {1}'.format(featureFile, smilesFile) - with outputRedirect() as (out, err): - args = parser.parse_args(cmd.split()) - FeatFinderCLI.processArgs(args, parser) - out = out.getvalue() - err = err.getvalue() - self.assertIn('Mol-1', out) - self.assertIn('Acceptor-SingleAtomAcceptor:', out) - self.assertIn('2, 3, 4', out) - self.assertNotIn('Mol-3', out) - self.assertEqual(err, '') + cmd = '-n 2 -r {0} {1}'.format(featureFile, smilesFile) + with outputRedirect() as (out, err): + args = parser.parse_args(cmd.split()) + FeatFinderCLI.processArgs(args, parser) + out = out.getvalue() + err = err.getvalue() + self.assertIn('Mol-1', out) + self.assertIn('Acceptor-SingleAtomAcceptor:', out) + self.assertIn('2, 3, 4', out) + self.assertNotIn('Mol-3', out) + self.assertEqual(err, '') - def test_FeatFinderCLIexceptions(self): - smilesFile = os.path.join(RDConfig.RDDataDir, 'NCI', 'first_5K.smi') - featureFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Pharm2D', 'test_data', - 'BaseFeatures.fdef') - parser = FeatFinderCLI.initParser() - cmd = '-n 10 {0} {1}'.format(smilesFile, smilesFile) - with self.assertRaises(SystemExit), outputRedirect() as (_, err): - args = parser.parse_args(cmd.split()) - FeatFinderCLI.processArgs(args, parser) - self.assertIn('error', err.getvalue()) + def test_FeatFinderCLIexceptions(self): + smilesFile = os.path.join(RDConfig.RDDataDir, 'NCI', 'first_5K.smi') + featureFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Pharm2D', 'test_data', + 'BaseFeatures.fdef') + parser = FeatFinderCLI.initParser() + cmd = '-n 10 {0} {1}'.format(smilesFile, smilesFile) + with self.assertRaises(SystemExit), outputRedirect() as (_, err): + args = parser.parse_args(cmd.split()) + FeatFinderCLI.processArgs(args, parser) + self.assertIn('error', err.getvalue()) - cmd = '-n 10 {0} {1}'.format(featureFile, 'incorrectFilename') - with self.assertRaises(SystemExit), outputRedirect() as (_, err): - args = parser.parse_args(cmd.split()) - FeatFinderCLI.processArgs(args, parser) - self.assertIn('error', err.getvalue()) + cmd = '-n 10 {0} {1}'.format(featureFile, 'incorrectFilename') + with self.assertRaises(SystemExit), outputRedirect() as (_, err): + args = parser.parse_args(cmd.split()) + FeatFinderCLI.processArgs(args, parser) + self.assertIn('error', err.getvalue()) @contextmanager def outputRedirect(): - """ Redirect standard output and error to String IO and return """ - try: - _stdout, _stderr = sys.stdout, sys.stderr - sys.stdout = sStdout = StringIO() - sys.stderr = sStderr = StringIO() - yield (sStdout, sStderr) - finally: - sys.stdout, sys.stderr = _stdout, _stderr + """ Redirect standard output and error to String IO and return """ + try: + _stdout, _stderr = sys.stdout, sys.stderr + sys.stdout = sStdout = StringIO() + sys.stderr = sStderr = StringIO() + yield (sStdout, sStderr) + finally: + sys.stdout, sys.stderr = _stdout, _stderr if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/Chem/UnitTestFunctionalGroups.py b/rdkit/Chem/UnitTestFunctionalGroups.py index d607ab7f3..70abe4390 100644 --- a/rdkit/Chem/UnitTestFunctionalGroups.py +++ b/rdkit/Chem/UnitTestFunctionalGroups.py @@ -30,7 +30,7 @@ # # Created by Greg Landrum October 2006 # -from __future__ import print_function + import os.path import unittest diff --git a/rdkit/Chem/UnitTestGraphDescriptors_2.py b/rdkit/Chem/UnitTestGraphDescriptors_2.py index abb38c1e5..699d18d6c 100755 --- a/rdkit/Chem/UnitTestGraphDescriptors_2.py +++ b/rdkit/Chem/UnitTestGraphDescriptors_2.py @@ -11,7 +11,7 @@ """unit testing code for graph-theoretical descriptors """ -from __future__ import print_function + import os.path import unittest diff --git a/rdkit/Chem/UnitTestInchi.py b/rdkit/Chem/UnitTestInchi.py index 36efddbb5..e5f0cc79e 100755 --- a/rdkit/Chem/UnitTestInchi.py +++ b/rdkit/Chem/UnitTestInchi.py @@ -28,7 +28,7 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # -from __future__ import print_function + import gzip import io @@ -39,14 +39,13 @@ import unittest from rdkit import RDConfig from rdkit.Chem import rdDepictor from rdkit import RDLogger -from rdkit.six.moves.cPickle import loads -from rdkit.Chem import ForwardSDMolSupplier, SanitizeMol -from rdkit.Chem import MolFromSmiles, MolToSmiles +import pickle +from rdkit.Chem import MolFromSmiles, MolToSmiles, ForwardSDMolSupplier, SanitizeMol from rdkit.Chem import MolFromMolBlock, MolToMolBlock from rdkit.Chem import INCHI_AVAILABLE if INCHI_AVAILABLE: - from rdkit.Chem import InchiReadWriteError - from rdkit.Chem import MolToInchi, MolBlockToInchi, MolFromInchi, InchiToInchiKey, MolToInchiKey + from rdkit.Chem import InchiReadWriteError + from rdkit.Chem import MolToInchi, MolBlockToInchi, MolFromInchi, InchiToInchiKey, MolToInchiKey COLOR_RED = '\033[31m' COLOR_GREEN = '\033[32m' @@ -54,227 +53,227 @@ COLOR_RESET = '\033[0m' def inchiDiffPrefix(inchi1, inchi2): - inchi1 = inchi1.split('/') - inchi2 = inchi2.split('/') - for i in range(len(inchi1) + 1): - if i == len(inchi1): - break - if i == len(inchi2) or inchi1[i] != inchi2[i]: - break - if len(inchi1) >= i: - return inchi1[i][0] - else: - return inchi2[i][0] + inchi1 = inchi1.split('/') + inchi2 = inchi2.split('/') + for i in range(len(inchi1) + 1): + if i == len(inchi1): + break + if i == len(inchi2) or inchi1[i] != inchi2[i]: + break + if len(inchi1) >= i: + return inchi1[i][0] + else: + return inchi2[i][0] def inchiDiff(inchi1, inchi2): - inchi1 = inchi1.split('/') - inchi2 = inchi2.split('/') - for i in range(len(inchi1) + 1): - if i == len(inchi1): - break - if i == len(inchi2) or inchi1[i] != inchi2[i]: - break - return ('/'.join(inchi1[:i]) + COLOR_RED + '/' + '/'.join(inchi1[i:]) + '\n' + COLOR_RESET + - '/'.join(inchi2[:i]) + COLOR_RED + '/' + '/'.join(inchi2[i:]) + COLOR_RESET) + inchi1 = inchi1.split('/') + inchi2 = inchi2.split('/') + for i in range(len(inchi1) + 1): + if i == len(inchi1): + break + if i == len(inchi2) or inchi1[i] != inchi2[i]: + break + return ('/'.join(inchi1[:i]) + COLOR_RED + '/' + '/'.join(inchi1[i:]) + '\n' + COLOR_RESET + + '/'.join(inchi2[:i]) + COLOR_RED + '/' + '/'.join(inchi2[i:]) + COLOR_RESET) @unittest.skipUnless(INCHI_AVAILABLE, 'Inchi support not available') class RegressionTest(unittest.TestCase): - def testPrechloricAcid(self): - examples = ( - ('OCl(=O)(=O)=O', 'InChI=1S/ClHO4/c2-1(3,4)5/h(H,2,3,4,5)'), - ('CC1=CC2=NCC(CN2C=C1)C(=O)c3ccc4cc(C)ccc4c3.OCl(=O)(=O)=O', - 'InChI=1S/C21H20N2O.ClHO4/c1-14-3-4-17-11-18(6-5-16(17)9-14)21(24)19-12-22-20-10-15(2)7-8-23(20)13-19;2-1(3,4)5/h3-11,19H,12-13H2,1-2H3;(H,2,3,4,5)' - ), - ('CNc1ccc2nc3ccccc3[n+](C)c2c1.[O-]Cl(=O)(=O)=O', - 'InChI=1S/C14H13N3.ClHO4/c1-15-10-7-8-12-14(9-10)17(2)13-6-4-3-5-11(13)16-12;2-1(3,4)5/h3-9H,1-2H3;(H,2,3,4,5)' - ), - ) - for smiles, expected in examples: - m = MolFromSmiles(smiles) - inchi = MolToInchi(m) - self.assertEqual(inchi, expected) + def testPrechloricAcid(self): + examples = ( + ('OCl(=O)(=O)=O', 'InChI=1S/ClHO4/c2-1(3,4)5/h(H,2,3,4,5)'), + ('CC1=CC2=NCC(CN2C=C1)C(=O)c3ccc4cc(C)ccc4c3.OCl(=O)(=O)=O', + 'InChI=1S/C21H20N2O.ClHO4/c1-14-3-4-17-11-18(6-5-16(17)9-14)21(24)19-12-22-20-10-15(2)7-8-23(20)13-19;2-1(3,4)5/h3-11,19H,12-13H2,1-2H3;(H,2,3,4,5)' + ), + ('CNc1ccc2nc3ccccc3[n+](C)c2c1.[O-]Cl(=O)(=O)=O', + 'InChI=1S/C14H13N3.ClHO4/c1-15-10-7-8-12-14(9-10)17(2)13-6-4-3-5-11(13)16-12;2-1(3,4)5/h3-9H,1-2H3;(H,2,3,4,5)' + ), + ) + for smiles, expected in examples: + m = MolFromSmiles(smiles) + inchi = MolToInchi(m) + self.assertEqual(inchi, expected) @unittest.skipUnless(INCHI_AVAILABLE, 'Inchi support not available') class TestCase(unittest.TestCase): - def setUp(self): - self.dataset = dict() - self.dataset_inchi = dict() - inf = gzip.open( - os.path.join(RDConfig.RDCodeDir, 'Chem/test_data', 'pubchem-hard-set.sdf.gz'), 'r') - self.dataset['problematic'] = ForwardSDMolSupplier(inf, sanitize=False, removeHs=False) - with open(os.path.join(RDConfig.RDCodeDir, 'Chem/test_data', 'pubchem-hard-set.inchi'), - 'r') as intF: - buf = intF.read().replace('\r\n', '\n').encode('latin1') - intF.close() - with io.BytesIO(buf) as inF: - pkl = inF.read() - self.dataset_inchi['problematic'] = loads(pkl, encoding='latin1') - # disable logging - RDLogger.DisableLog('rdApp.warning') + def setUp(self): + self.dataset = dict() + self.dataset_inchi = dict() + inf = gzip.open( + os.path.join(RDConfig.RDCodeDir, 'Chem/test_data', 'pubchem-hard-set.sdf.gz'), 'r') + self.dataset['problematic'] = ForwardSDMolSupplier(inf, sanitize=False, removeHs=False) + with open(os.path.join(RDConfig.RDCodeDir, 'Chem/test_data', 'pubchem-hard-set.inchi'), + 'r') as intF: + buf = intF.read().replace('\r\n', '\n').encode('latin1') + intF.close() + with io.BytesIO(buf) as inF: + pkl = inF.read() + self.dataset_inchi['problematic'] = pickle.loads(pkl, encoding='latin1') + # disable logging + RDLogger.DisableLog('rdApp.warning') - def tearDown(self): - RDLogger.EnableLog('rdApp.warning') - RDLogger.EnableLog('rdApp.error') - - def test0InchiWritePubChem(self): - for fp, f in self.dataset.items(): - inchi_db = self.dataset_inchi[fp] - same, diff, reasonable = 0, 0, 0 - for m in f: - if m is None: # pragma: nocover - continue - ref_inchi = inchi_db[m.GetProp('PUBCHEM_COMPOUND_CID')] - x, y = MolToInchi(m), ref_inchi - if x != y: - if re.search(r'.[1-9]?ClO4', x) is not None: - reasonable += 1 - continue - SanitizeMol(m) - if filter(lambda i: i >= 8, [len(r) for r in m.GetRingInfo().AtomRings()]): - reasonable += 1 - continue - # THERE ARE NO EXAMPLES FOR THE FOLLOWING (no coverage) - # if it is because RDKit does not think the bond is stereo - z = MolToInchi(MolFromMolBlock(MolToMolBlock(m))) - if y != z and inchiDiffPrefix(y, z) == 'b': - reasonable += 1 - continue - # some warning - try: - MolToInchi(m, treatWarningAsError=True) - except InchiReadWriteError as inst: - _, error = inst.args - if 'Metal' in error: - reasonable += 1 - continue - - diff += 1 - print('InChI mismatch for PubChem Compound ' + m.GetProp('PUBCHEM_COMPOUND_CID')) - print(MolToSmiles(m, True)) - print(inchiDiff(x, y)) - print() - - else: - same += 1 - - fmt = "\n{0}InChI write Summary: {1} identical, {2} suffix variance, {3} reasonable{4}" - print(fmt.format(COLOR_GREEN, same, diff, reasonable, COLOR_RESET)) - self.assertEqual(same, 1164) - self.assertEqual(diff, 0) - self.assertEqual(reasonable, 17) - - def test1InchiReadPubChem(self): - for f in self.dataset.values(): - same, diff, reasonable = 0, 0, 0 - for m in f: - if m is None: # pragma: nocover - continue - x = MolToInchi(m) - y = None - RDLogger.DisableLog('rdApp.error') - mol = MolFromInchi(x) + def tearDown(self): + RDLogger.EnableLog('rdApp.warning') RDLogger.EnableLog('rdApp.error') - if mol is not None: - y = MolToInchi(MolFromSmiles(MolToSmiles(mol, isomericSmiles=True))) - if y is None: - # metal involved? - try: - MolToInchi(m, treatWarningAsError=True) - except InchiReadWriteError as inst: - _, error = inst.args - if 'Metal' in error or \ - 'Charges were rearranged' in error: - reasonable += 1 - continue - # THERE ARE NO EXAMPLES FOR THE FOLLOWING (no coverage) - # RDKit does not like the SMILES? use MolBlock instead - inchiMol = MolFromInchi(x) - if inchiMol: - rdDepictor.Compute2DCoords(inchiMol) - z = MolToInchi(MolFromMolBlock(MolToMolBlock(inchiMol))) - if x == z: - reasonable += 1 - continue - # InChI messed up the radical? - unsanitizedInchiMol = MolFromInchi(x, sanitize=False) - if sum([ - a.GetNumRadicalElectrons() * a.GetAtomicNum() for a in m.GetAtoms() - if a.GetNumRadicalElectrons() != 0 - ]) != sum([ - a.GetNumRadicalElectrons() * a.GetAtomicNum() for a in unsanitizedInchiMol.GetAtoms() - if a.GetNumRadicalElectrons() != 0 - ]): - reasonable += 1 - continue - diff += 1 - cid = m.GetProp('PUBCHEM_COMPOUND_CID') - print(COLOR_GREEN + 'Empty mol for PubChem Compound ' + cid + '\n' + COLOR_RESET) - continue - if x != y: - # if there was warning in the first place, then this is - # tolerable - try: - MolToInchi(m, treatWarningAsError=True) - MolFromInchi(x, treatWarningAsError=True) - except InchiReadWriteError as inst: - reasonable += 1 - continue - # or if there are big rings - SanitizeMol(m) - if filter(lambda i: i >= 8, [len(r) for r in m.GetRingInfo().AtomRings()]): - reasonable += 1 - continue - # THERE ARE NO EXAMPLES FOR THE FOLLOWING (no coverage) - # or if RDKit loses bond stereo - s = MolToSmiles(m, True) - if MolToSmiles(MolFromSmiles(s), True) != s: - reasonable += 1 - continue - # or if it is RDKit SMILES writer unhappy about the mol - inchiMol = MolFromInchi(x) - rdDepictor.Compute2DCoords(inchiMol) - z = MolToInchi(MolFromMolBlock(MolToMolBlock(inchiMol))) - if x == z: - reasonable += 1 - continue + def test0InchiWritePubChem(self): + for fp, f in self.dataset.items(): + inchi_db = self.dataset_inchi[fp] + same, diff, reasonable = 0, 0, 0 + for m in f: + if m is None: # pragma: nocover + continue + ref_inchi = inchi_db[m.GetProp('PUBCHEM_COMPOUND_CID')] + x, y = MolToInchi(m), ref_inchi + if x != y: + if re.search(r'.[1-9]?ClO4', x) is not None: + reasonable += 1 + continue + SanitizeMol(m) + if filter(lambda i: i >= 8, [len(r) for r in m.GetRingInfo().AtomRings()]): + reasonable += 1 + continue + # THERE ARE NO EXAMPLES FOR THE FOLLOWING (no coverage) + # if it is because RDKit does not think the bond is stereo + z = MolToInchi(MolFromMolBlock(MolToMolBlock(m))) + if y != z and inchiDiffPrefix(y, z) == 'b': + reasonable += 1 + continue + # some warning + try: + MolToInchi(m, treatWarningAsError=True) + except InchiReadWriteError as inst: + _, error = inst.args + if 'Metal' in error: + reasonable += 1 + continue - diff += 1 - print(COLOR_GREEN + 'Molecule mismatch for PubChem Compound ' + cid + COLOR_RESET) - print(inchiDiff(x, y)) - print() - else: - same += 1 - fmt = "\n{0}InChI read Summary: {1} identical, {2} variance, {3} reasonable variance{4}" - print(fmt.format(COLOR_GREEN, same, diff, reasonable, COLOR_RESET)) - self.assertEqual(same, 627) - self.assertEqual(diff, 0) - self.assertEqual(reasonable, 554) + diff += 1 + print('InChI mismatch for PubChem Compound ' + m.GetProp('PUBCHEM_COMPOUND_CID')) + print(MolToSmiles(m, True)) + print(inchiDiff(x, y)) + print() - def test2InchiOptions(self): - m = MolFromSmiles("CC=C(N)C") - inchi1 = MolToInchi(m).split('/', 1)[1] - inchi2 = MolToInchi(m, "/SUU").split('/', 1)[1] - self.assertEqual(inchi1 + '/b4-3?', inchi2) + else: + same += 1 - def test3InchiKey(self): - inchi = 'InChI=1S/C9H12/c1-2-6-9-7-4-3-5-8-9/h3-5,7-8H,2,6H2,1H3' - self.assertEqual(InchiToInchiKey(inchi), 'ODLMAHJVESYWTB-UHFFFAOYSA-N') + fmt = "\n{0}InChI write Summary: {1} identical, {2} suffix variance, {3} reasonable{4}" + print(fmt.format(COLOR_GREEN, same, diff, reasonable, COLOR_RESET)) + self.assertEqual(same, 1164) + self.assertEqual(diff, 0) + self.assertEqual(reasonable, 17) - def test4MolToInchiKey(self): - m = MolFromSmiles("CC=C(N)C") - inchi = MolToInchi(m) - k1 = InchiToInchiKey(inchi) - k2 = MolToInchiKey(m) - self.assertEqual(k1, k2) + def test1InchiReadPubChem(self): + for f in self.dataset.values(): + same, diff, reasonable = 0, 0, 0 + for m in f: + if m is None: # pragma: nocover + continue + x = MolToInchi(m) + y = None + RDLogger.DisableLog('rdApp.error') + mol = MolFromInchi(x) + RDLogger.EnableLog('rdApp.error') + if mol is not None: + y = MolToInchi(MolFromSmiles(MolToSmiles(mol, isomericSmiles=True))) + if y is None: + # metal involved? + try: + MolToInchi(m, treatWarningAsError=True) + except InchiReadWriteError as inst: + _, error = inst.args + if 'Metal' in error or \ + 'Charges were rearranged' in error: + reasonable += 1 + continue + # THERE ARE NO EXAMPLES FOR THE FOLLOWING (no coverage) + # RDKit does not like the SMILES? use MolBlock instead + inchiMol = MolFromInchi(x) + if inchiMol: + rdDepictor.Compute2DCoords(inchiMol) + z = MolToInchi(MolFromMolBlock(MolToMolBlock(inchiMol))) + if x == z: + reasonable += 1 + continue + # InChI messed up the radical? + unsanitizedInchiMol = MolFromInchi(x, sanitize=False) + if sum([ + a.GetNumRadicalElectrons() * a.GetAtomicNum() for a in m.GetAtoms() + if a.GetNumRadicalElectrons() != 0 + ]) != sum([ + a.GetNumRadicalElectrons() * a.GetAtomicNum() for a in unsanitizedInchiMol.GetAtoms() + if a.GetNumRadicalElectrons() != 0 + ]): + reasonable += 1 + continue - def test5MolBlockToInchi(self): - mb = """ + diff += 1 + cid = m.GetProp('PUBCHEM_COMPOUND_CID') + print(COLOR_GREEN + 'Empty mol for PubChem Compound ' + cid + '\n' + COLOR_RESET) + continue + if x != y: + # if there was warning in the first place, then this is + # tolerable + try: + MolToInchi(m, treatWarningAsError=True) + MolFromInchi(x, treatWarningAsError=True) + except InchiReadWriteError as inst: + reasonable += 1 + continue + # or if there are big rings + SanitizeMol(m) + if filter(lambda i: i >= 8, [len(r) for r in m.GetRingInfo().AtomRings()]): + reasonable += 1 + continue + # THERE ARE NO EXAMPLES FOR THE FOLLOWING (no coverage) + # or if RDKit loses bond stereo + s = MolToSmiles(m, True) + if MolToSmiles(MolFromSmiles(s), True) != s: + reasonable += 1 + continue + # or if it is RDKit SMILES writer unhappy about the mol + inchiMol = MolFromInchi(x) + rdDepictor.Compute2DCoords(inchiMol) + z = MolToInchi(MolFromMolBlock(MolToMolBlock(inchiMol))) + if x == z: + reasonable += 1 + continue + + diff += 1 + print(COLOR_GREEN + 'Molecule mismatch for PubChem Compound ' + cid + COLOR_RESET) + print(inchiDiff(x, y)) + print() + else: + same += 1 + fmt = "\n{0}InChI read Summary: {1} identical, {2} variance, {3} reasonable variance{4}" + print(fmt.format(COLOR_GREEN, same, diff, reasonable, COLOR_RESET)) + self.assertEqual(same, 627) + self.assertEqual(diff, 0) + self.assertEqual(reasonable, 554) + + def test2InchiOptions(self): + m = MolFromSmiles("CC=C(N)C") + inchi1 = MolToInchi(m).split('/', 1)[1] + inchi2 = MolToInchi(m, "/SUU").split('/', 1)[1] + self.assertEqual(inchi1 + '/b4-3?', inchi2) + + def test3InchiKey(self): + inchi = 'InChI=1S/C9H12/c1-2-6-9-7-4-3-5-8-9/h3-5,7-8H,2,6H2,1H3' + self.assertEqual(InchiToInchiKey(inchi), 'ODLMAHJVESYWTB-UHFFFAOYSA-N') + + def test4MolToInchiKey(self): + m = MolFromSmiles("CC=C(N)C") + inchi = MolToInchi(m) + k1 = InchiToInchiKey(inchi) + k2 = MolToInchiKey(m) + self.assertEqual(k1, k2) + + def test5MolBlockToInchi(self): + mb = """ Mrv1824 02111920092D 6 6 0 0 0 0 999 V2000 @@ -291,10 +290,10 @@ class TestCase(unittest.TestCase): 1 6 1 0 0 0 0 2 3 2 0 0 0 0 M END""" - inchi = MolBlockToInchi(mb) - self.assertEqual(inchi,"InChI=1S/C5H8O/c1-2-4-6-5-3-1/h1-2H,3-5H2") - # make sure that options work - mb2 = """ + inchi = MolBlockToInchi(mb) + self.assertEqual(inchi, "InChI=1S/C5H8O/c1-2-4-6-5-3-1/h1-2H,3-5H2") + # make sure that options work + mb2 = """ Mrv1824 02121905282D 10 11 0 0 0 0 999 V2000 @@ -320,12 +319,12 @@ M END""" 8 10 1 0 0 0 0 8 9 2 0 0 0 0 M END""" - inchi2 = MolBlockToInchi(mb2,options="/FixedH") - self.assertEqual(inchi2,"InChI=1/C8H8N2/c1-6-7-4-2-3-5-8(7)10-9-6/h2-5H,1H3,(H,9,10)/f/h10H") - + inchi2 = MolBlockToInchi(mb2, options="/FixedH") + self.assertEqual( + inchi2, "InChI=1/C8H8N2/c1-6-7-4-2-3-5-8(7)10-9-6/h2-5H,1H3,(H,9,10)/f/h10H") if __name__ == '__main__': # pragma: nocover - # only run the test if InChI is available - if INCHI_AVAILABLE: - unittest.main() + # only run the test if InChI is available + if INCHI_AVAILABLE: + unittest.main() diff --git a/rdkit/Chem/UnitTestLipinski.py b/rdkit/Chem/UnitTestLipinski.py index a5c1c6df7..e82edaa4f 100755 --- a/rdkit/Chem/UnitTestLipinski.py +++ b/rdkit/Chem/UnitTestLipinski.py @@ -12,7 +12,7 @@ This provides a workout for the SMARTS matcher """ -from __future__ import print_function + import os import unittest diff --git a/rdkit/Chem/UnitTestOldBugs.py b/rdkit/Chem/UnitTestOldBugs.py index 2c704ba45..7ca92e5d5 100755 --- a/rdkit/Chem/UnitTestOldBugs.py +++ b/rdkit/Chem/UnitTestOldBugs.py @@ -16,7 +16,7 @@ relevant... but tests are tests """ from rdkit import RDConfig import unittest, os -from rdkit.six.moves import cPickle +import pickle from rdkit import Chem from rdkit.Chem import AllChem diff --git a/rdkit/Chem/UnitTestPandasTools.py b/rdkit/Chem/UnitTestPandasTools.py index 10d7930bd..9dadfa2e2 100644 --- a/rdkit/Chem/UnitTestPandasTools.py +++ b/rdkit/Chem/UnitTestPandasTools.py @@ -1,24 +1,23 @@ -from __future__ import print_function + +from rdkit import RDConfig, rdBase, Chem +from io import StringIO, BytesIO +from rdkit.Chem import PandasTools +import numpy +import unittest +import tempfile +import shutil +import os +import gzip import doctest if (getattr(doctest, 'ELLIPSIS_MARKER')): - doctest.ELLIPSIS_MARKER = '*...*' -import gzip -import os -import shutil -import tempfile -import unittest + doctest.ELLIPSIS_MARKER = '*...*' -import numpy - -from rdkit.Chem import PandasTools -from rdkit.six import PY3, StringIO, BytesIO -from rdkit import RDConfig, rdBase, Chem try: - import IPython + import IPython except ImportError: - IPython = None + IPython = None # We make sure that we don't mess up the Mol methods for the rest of the tests PandasTools.UninstallPandasTools() @@ -27,326 +26,322 @@ PandasTools.UninstallPandasTools() @unittest.skipIf(PandasTools.pd is None, 'Pandas not installed, skipping') class TestPandasTools(unittest.TestCase): - def __init__(self, methodName='runTest'): - self.df = getTestFrame() - self.df.index.name = 'IndexName' - super(TestPandasTools, self).__init__(methodName=methodName) + def __init__(self, methodName='runTest'): + self.df = getTestFrame() + self.df.index.name = 'IndexName' + super(TestPandasTools, self).__init__(methodName=methodName) - def setUp(self): - PandasTools.InstallPandasTools() - PandasTools.ChangeMoleculeRendering(renderer='PNG') - PandasTools.pd.set_option('display.max_columns', None) - self._molRepresentation = PandasTools.molRepresentation - self._highlightSubstructures = PandasTools.highlightSubstructures + def setUp(self): + PandasTools.InstallPandasTools() + PandasTools.ChangeMoleculeRendering(renderer='PNG') + PandasTools.pd.set_option('display.max_columns', None) + self._molRepresentation = PandasTools.molRepresentation + self._highlightSubstructures = PandasTools.highlightSubstructures - def tearDown(self): - PandasTools.molRepresentation = self._molRepresentation - PandasTools.highlightSubstructures = self._highlightSubstructures - PandasTools.UninstallPandasTools() + def tearDown(self): + PandasTools.molRepresentation = self._molRepresentation + PandasTools.highlightSubstructures = self._highlightSubstructures + PandasTools.UninstallPandasTools() - def testDoctest(self): - # We need to do it like this to ensure that default RDkit functionality is restored - failed, _ = doctest.testmod(PandasTools, - optionflags=doctest.ELLIPSIS + doctest.NORMALIZE_WHITESPACE) - self.assertFalse(failed) + def testDoctest(self): + # We need to do it like this to ensure that default RDkit functionality is restored + failed, _ = doctest.testmod(PandasTools, + optionflags=doctest.ELLIPSIS + doctest.NORMALIZE_WHITESPACE) + self.assertFalse(failed) - def test_RestoreMonkeyPatch(self): - sio = getStreamIO(methane + peroxide) - df = PandasTools.LoadSDF(sio) - html = df.to_html() - self.assertIn('data:image/png;base64', html) - self.assertIn('table', html) + def test_RestoreMonkeyPatch(self): + sio = getStreamIO(methane + peroxide) + df = PandasTools.LoadSDF(sio) + html = df.to_html() + self.assertIn('data:image/png;base64', html) + self.assertIn('table', html) - PandasTools.UninstallPandasTools() - html = df.to_html() - self.assertNotIn('data:image/png;base64', html) - self.assertIn('rdkit.Chem.rdchem.Mol', html) - self.assertIn('table', html) + PandasTools.UninstallPandasTools() + html = df.to_html() + self.assertNotIn('data:image/png;base64', html) + self.assertIn('rdkit.Chem.rdchem.Mol', html) + self.assertIn('table', html) - PandasTools.InstallPandasTools() - html = df.to_html() - self.assertIn('data:image/png;base64', html) - self.assertIn('table', html) + PandasTools.InstallPandasTools() + html = df.to_html() + self.assertIn('data:image/png;base64', html) + self.assertIn('table', html) - PandasTools.UninstallPandasTools() - html = df.to_html() - self.assertNotIn('data:image/png;base64', html) - self.assertIn('rdkit.Chem.rdchem.Mol', html) - self.assertIn('table', html) + PandasTools.UninstallPandasTools() + html = df.to_html() + self.assertNotIn('data:image/png;base64', html) + self.assertIn('rdkit.Chem.rdchem.Mol', html) + self.assertIn('table', html) - def test_FrameToGridImage(self): - # This test only makes sure that we get no exception. To see the created images, set - # interactive to True - interactive = False - self.assertTrue(True) - df = self.df + def test_FrameToGridImage(self): + # This test only makes sure that we get no exception. To see the created images, set + # interactive to True + interactive = False + self.assertTrue(True) + df = self.df - result = PandasTools.FrameToGridImage(df) - if interactive: - result.show() + result = PandasTools.FrameToGridImage(df) + if interactive: + result.show() - result = PandasTools.FrameToGridImage(df, legendsCol='PUBCHEM_IUPAC_INCHIKEY') - if interactive: - result.show() + result = PandasTools.FrameToGridImage(df, legendsCol='PUBCHEM_IUPAC_INCHIKEY') + if interactive: + result.show() - result = PandasTools.FrameToGridImage(df, legendsCol=df.index.name) - if interactive: - result.show() + result = PandasTools.FrameToGridImage(df, legendsCol=df.index.name) + if interactive: + result.show() - def test_AddMurckoToFrame(self): - df = self.df.copy() - self.assertIn('ROMol', df.columns) - self.assertNotIn('Murcko_SMILES', df.columns) - PandasTools.AddMurckoToFrame(df) - self.assertIn('ROMol', df.columns) - self.assertIn('Murcko_SMILES', df.columns) - self.assertEqual(df['Murcko_SMILES'][10], 'O=C(CCn1c(-c2ccccc2)n[nH]c1=S)Nc1ccccn1') + def test_AddMurckoToFrame(self): + df = self.df.copy() + self.assertIn('ROMol', df.columns) + self.assertNotIn('Murcko_SMILES', df.columns) + PandasTools.AddMurckoToFrame(df) + self.assertIn('ROMol', df.columns) + self.assertIn('Murcko_SMILES', df.columns) + self.assertEqual(df['Murcko_SMILES'][10], 'O=C(CCn1c(-c2ccccc2)n[nH]c1=S)Nc1ccccn1') - PandasTools.AddMurckoToFrame(df, Generic=True) - self.assertIn('ROMol', df.columns) - self.assertIn('Murcko_SMILES', df.columns) - self.assertEqual(df['Murcko_SMILES'][10], 'CC(CCC1C(C)CCC1C1CCCCC1)CC1CCCCC1') + PandasTools.AddMurckoToFrame(df, Generic=True) + self.assertIn('ROMol', df.columns) + self.assertIn('Murcko_SMILES', df.columns) + self.assertEqual(df['Murcko_SMILES'][10], 'CC(CCC1C(C)CCC1C1CCCCC1)CC1CCCCC1') - def test_SaveSMILESFromFrame(self): - sio = StringIO() - PandasTools.SaveSMILESFromFrame(self.df, sio) - result = sio.getvalue() - self.assertIn(self.df['SMILES'][10], result) - self.assertIn(self.df['ID'][10], result) + def test_SaveSMILESFromFrame(self): + sio = StringIO() + PandasTools.SaveSMILESFromFrame(self.df, sio) + result = sio.getvalue() + self.assertIn(self.df['SMILES'][10], result) + self.assertIn(self.df['ID'][10], result) - sio = StringIO() - PandasTools.SaveSMILESFromFrame(self.df, sio, NamesCol='PUBCHEM_IUPAC_INCHIKEY') - result = sio.getvalue() - self.assertIn(self.df['SMILES'][10], result) - self.assertIn(self.df['PUBCHEM_IUPAC_INCHIKEY'][10], result) + sio = StringIO() + PandasTools.SaveSMILESFromFrame(self.df, sio, NamesCol='PUBCHEM_IUPAC_INCHIKEY') + result = sio.getvalue() + self.assertIn(self.df['SMILES'][10], result) + self.assertIn(self.df['PUBCHEM_IUPAC_INCHIKEY'][10], result) - @unittest.skipIf(IPython is None, 'Package IPython required for testing') - def test_svgRendering(self): - df = PandasTools.LoadSDF(getStreamIO(methane + peroxide)) - self.assertIn('image/png', str(df)) - self.assertNotIn('svg', str(df)) + @unittest.skipIf(IPython is None, 'Package IPython required for testing') + def test_svgRendering(self): + df = PandasTools.LoadSDF(getStreamIO(methane + peroxide)) + self.assertIn('image/png', str(df)) + self.assertNotIn('svg', str(df)) - PandasTools.molRepresentation = 'svg' - self.assertIn('svg', str(df)) - self.assertNotIn('image/png', str(df)) + PandasTools.molRepresentation = 'svg' + self.assertIn('svg', str(df)) + self.assertNotIn('image/png', str(df)) - # we can use upper case for the molRepresentation - PandasTools.molRepresentation = 'PNG' - self.assertNotIn('svg', str(df)) - self.assertIn('image/png', str(df)) + # we can use upper case for the molRepresentation + PandasTools.molRepresentation = 'PNG' + self.assertNotIn('svg', str(df)) + self.assertIn('image/png', str(df)) - def test_patchHeadFrame(self): - df = self.df.copy() - result = str(df.head()) - self.assertIn('35024984', result) - self.assertNotIn('35024985', result) + def test_patchHeadFrame(self): + df = self.df.copy() + result = str(df.head()) + self.assertIn('35024984', result) + self.assertNotIn('35024985', result) - def test_AddMoleculeColumnToFrame(self): - df = PandasTools.LoadSDF( - getStreamIO(methane + peroxide), isomericSmiles=True, smilesName='Smiles') - PandasTools.ChangeMoleculeRendering(frame=df, renderer='String') - del df['ROMol'] - self.assertNotIn('ROMol', str(df)) - PandasTools.AddMoleculeColumnToFrame(df, includeFingerprints=False) - self.assertIn('ROMol', str(df)) + def test_AddMoleculeColumnToFrame(self): + df = PandasTools.LoadSDF( + getStreamIO(methane + peroxide), isomericSmiles=True, smilesName='Smiles') + PandasTools.ChangeMoleculeRendering(frame=df, renderer='String') + del df['ROMol'] + self.assertNotIn('ROMol', str(df)) + PandasTools.AddMoleculeColumnToFrame(df, includeFingerprints=False) + self.assertIn('ROMol', str(df)) - def test_molge(self): - # We want to have the default RDkit functionality for testing - PandasTools.UninstallPandasTools() - molge = PandasTools._molge - mol1 = Chem.MolFromSmiles('CCC') - mol2 = Chem.MolFromSmiles('CC') - mol3 = Chem.MolFromSmiles('CN') + def test_molge(self): + # We want to have the default RDkit functionality for testing + PandasTools.UninstallPandasTools() + molge = PandasTools._molge + mol1 = Chem.MolFromSmiles('CCC') + mol2 = Chem.MolFromSmiles('CC') + mol3 = Chem.MolFromSmiles('CN') - self.assertFalse(molge(mol1, None)) - self.assertFalse(molge(None, mol1)) + self.assertFalse(molge(mol1, None)) + self.assertFalse(molge(None, mol1)) - self.assertFalse(hasattr(mol1, '_substructfp')) - self.assertFalse(hasattr(mol2, '_substructfp')) - self.assertFalse(hasattr(mol1, '__sssAtoms')) - self.assertFalse(hasattr(mol2, '__sssAtoms')) + self.assertFalse(hasattr(mol1, '_substructfp')) + self.assertFalse(hasattr(mol2, '_substructfp')) + self.assertFalse(hasattr(mol1, '__sssAtoms')) + self.assertFalse(hasattr(mol2, '__sssAtoms')) - self.assertTrue(molge(mol1, mol2)) - self.assertEqual(mol1.__dict__['__sssAtoms'], [0, 1]) - PandasTools.highlightSubstructures = False - self.assertTrue(molge(mol1, mol2)) - self.assertEqual(mol1.__dict__['__sssAtoms'], []) + self.assertTrue(molge(mol1, mol2)) + self.assertEqual(mol1.__dict__['__sssAtoms'], [0, 1]) + PandasTools.highlightSubstructures = False + self.assertTrue(molge(mol1, mol2)) + self.assertEqual(mol1.__dict__['__sssAtoms'], []) - PandasTools.highlightSubstructures = True - self.assertFalse(molge(mol2, mol1)) - self.assertEqual(mol2.__dict__['__sssAtoms'], []) + PandasTools.highlightSubstructures = True + self.assertFalse(molge(mol2, mol1)) + self.assertEqual(mol2.__dict__['__sssAtoms'], []) - self.assertFalse(molge(mol1, mol3)) - self.assertEqual(mol1.__dict__['__sssAtoms'], []) + self.assertFalse(molge(mol1, mol3)) + self.assertEqual(mol1.__dict__['__sssAtoms'], []) @unittest.skipIf(PandasTools.pd is None, 'Pandas not installed, skipping') class TestLoadSDF(unittest.TestCase): - gz_filename = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', 'pandas_load.sdf.gz') + gz_filename = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', 'pandas_load.sdf.gz') - # the doctest tests loading from a ".sdf" file so there's no need for that test here + # the doctest tests loading from a ".sdf" file so there's no need for that test here - def test_load_gzip_file(self): - rdBase.DisableLog('rdApp.error') - df = PandasTools.LoadSDF(self.gz_filename) - rdBase.EnableLog('rdApp.error') - self.assertEqual(len(df), 13) - # The molecule with index 1 is invalid, so it should be missing form the index - self.assertEqual(list(df.index), [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]) + def test_load_gzip_file(self): + rdBase.DisableLog('rdApp.error') + df = PandasTools.LoadSDF(self.gz_filename) + rdBase.EnableLog('rdApp.error') + self.assertEqual(len(df), 13) + # The molecule with index 1 is invalid, so it should be missing form the index + self.assertEqual(list(df.index), [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]) - def test_load_from_sio(self): - sio = getStreamIO(methane + peroxide) - df = PandasTools.LoadSDF(sio) - self.assertEqual(len(df), 2) - self.assertEqual(list(df["ID"]), ["Methane", "Peroxide"]) - atom_counts = [mol.GetNumAtoms() for mol in df["ROMol"]] - self.assertEqual(atom_counts, [1, 2]) + def test_load_from_sio(self): + sio = getStreamIO(methane + peroxide) + df = PandasTools.LoadSDF(sio) + self.assertEqual(len(df), 2) + self.assertEqual(list(df["ID"]), ["Methane", "Peroxide"]) + atom_counts = [mol.GetNumAtoms() for mol in df["ROMol"]] + self.assertEqual(atom_counts, [1, 2]) - def test_load_specify_column_names(self): - sio = getStreamIO(methane + peroxide) - df = PandasTools.LoadSDF(sio, idName="CorpID", molColName="_rdmol") - self.assertEqual(len(df), 2) - self.assertEqual(list(df["CorpID"]), ["Methane", "Peroxide"]) - atom_counts = [mol.GetNumAtoms() for mol in df["_rdmol"]] - self.assertEqual(atom_counts, [1, 2]) + def test_load_specify_column_names(self): + sio = getStreamIO(methane + peroxide) + df = PandasTools.LoadSDF(sio, idName="CorpID", molColName="_rdmol") + self.assertEqual(len(df), 2) + self.assertEqual(list(df["CorpID"]), ["Methane", "Peroxide"]) + atom_counts = [mol.GetNumAtoms() for mol in df["_rdmol"]] + self.assertEqual(atom_counts, [1, 2]) - def test_empty_file(self): - # Should return an empty data frame with no rows or columns - sio = getStreamIO(None) - df = PandasTools.LoadSDF(sio) - self.assertEqual(len(df), 0) - self.assertEqual(len(df.index), 0) + def test_empty_file(self): + # Should return an empty data frame with no rows or columns + sio = getStreamIO(None) + df = PandasTools.LoadSDF(sio) + self.assertEqual(len(df), 0) + self.assertEqual(len(df.index), 0) - def test_passed_in_file_is_not_closed(self): - sio = getStreamIO(methane) - df = PandasTools.LoadSDF(sio) - self.assertEqual(len(df), 1) - self.assertFalse(sio.closed) + def test_passed_in_file_is_not_closed(self): + sio = getStreamIO(methane) + df = PandasTools.LoadSDF(sio) + self.assertEqual(len(df), 1) + self.assertFalse(sio.closed) - def test_properties(self): - sio = getStreamIO(peroxide + methane) - df = PandasTools.LoadSDF(sio) - self.assertEqual(set(df.columns), set("ROMol ID prop1 prop2 prop3".split())) - prop1 = list(df["prop1"]) - self.assertTrue(numpy.isnan(prop1[0]), prop1[0]) - self.assertEqual(prop1[1], "12.34") + def test_properties(self): + sio = getStreamIO(peroxide + methane) + df = PandasTools.LoadSDF(sio) + self.assertEqual(set(df.columns), set("ROMol ID prop1 prop2 prop3".split())) + prop1 = list(df["prop1"]) + self.assertTrue(numpy.isnan(prop1[0]), prop1[0]) + self.assertEqual(prop1[1], "12.34") - self.assertEqual(list(df["prop2"]), ["rtz", "qwe"]) + self.assertEqual(list(df["prop2"]), ["rtz", "qwe"]) - prop3 = list(df["prop3"]) - self.assertEqual(prop3[0], "yxcv") - self.assertTrue(numpy.isnan(prop3[1]), prop3[1]) + prop3 = list(df["prop3"]) + self.assertEqual(prop3[0], "yxcv") + self.assertTrue(numpy.isnan(prop3[1]), prop3[1]) - def test_ignore_mol_column(self): - sio = getStreamIO(peroxide + methane) - df = PandasTools.LoadSDF(sio, molColName=None) - self.assertEqual(set(df.columns), set("ID prop1 prop2 prop3".split())) + def test_ignore_mol_column(self): + sio = getStreamIO(peroxide + methane) + df = PandasTools.LoadSDF(sio, molColName=None) + self.assertEqual(set(df.columns), set("ID prop1 prop2 prop3".split())) @unittest.skipIf(PandasTools.pd is None, 'Pandas not installed, skipping') class TestWriteSDF(unittest.TestCase): - def setUp(self): - self.df = PandasTools.LoadSDF(getStreamIO(methane + peroxide)) + def setUp(self): + self.df = PandasTools.LoadSDF(getStreamIO(methane + peroxide)) - def test_default_write_does_not_include_tags(self): - sio = StringIO() - PandasTools.WriteSDF(self.df, sio) - s = sio.getvalue() - self.assertNotIn(s, "prop2") + def test_default_write_does_not_include_tags(self): + sio = StringIO() + PandasTools.WriteSDF(self.df, sio) + s = sio.getvalue() + self.assertNotIn(s, "prop2") - def test_identifier_from_a_column(self): - sio = StringIO() - PandasTools.WriteSDF(self.df, sio, idName="prop2") - s = sio.getvalue() - first_line = s.split("\n", 1)[0] - self.assertEqual(first_line, "qwe") + def test_identifier_from_a_column(self): + sio = StringIO() + PandasTools.WriteSDF(self.df, sio, idName="prop2") + s = sio.getvalue() + first_line = s.split("\n", 1)[0] + self.assertEqual(first_line, "qwe") - def test_all_numeric_with_no_numeric_columns(self): - sio = StringIO() - PandasTools.WriteSDF(self.df, sio, allNumeric=True) - s = sio.getvalue() - self.assertFalse(">" in s, s) - self.assertNotIn("7\n\n", s) # double-check that the numeric tests don't pass by accident - self.assertNotIn("8\n\n", s) + def test_all_numeric_with_no_numeric_columns(self): + sio = StringIO() + PandasTools.WriteSDF(self.df, sio, allNumeric=True) + s = sio.getvalue() + self.assertFalse(">" in s, s) + self.assertNotIn("7\n\n", s) # double-check that the numeric tests don't pass by accident + self.assertNotIn("8\n\n", s) - def test_all_numeric_with_numeric_columns(self): - sio = StringIO() - df = self.df - df["len"] = df["ID"].map(len) - PandasTools.WriteSDF(df, sio, allNumeric=True) - s = sio.getvalue() - self.assertEqual(s.count(""), 2) - self.assertIn("7\n\n", s) - self.assertIn("8\n\n", s) + def test_all_numeric_with_numeric_columns(self): + sio = StringIO() + df = self.df + df["len"] = df["ID"].map(len) + PandasTools.WriteSDF(df, sio, allNumeric=True) + s = sio.getvalue() + self.assertEqual(s.count(""), 2) + self.assertIn("7\n\n", s) + self.assertIn("8\n\n", s) - def test_specify_numeric_column(self): - sio = StringIO() - df = self.df - df["len2"] = df["ID"].map(len) - PandasTools.WriteSDF(df, sio, properties=["len2"]) - s = sio.getvalue() - self.assertEqual(s.count(""), 2) - self.assertIn("7\n\n", s) - self.assertIn("8\n\n", s) + def test_specify_numeric_column(self): + sio = StringIO() + df = self.df + df["len2"] = df["ID"].map(len) + PandasTools.WriteSDF(df, sio, properties=["len2"]) + s = sio.getvalue() + self.assertEqual(s.count(""), 2) + self.assertIn("7\n\n", s) + self.assertIn("8\n\n", s) - def test_specify_numeric_column_2(self): - sio = StringIO() - df = self.df - df["len2"] = df["ID"].map(len) - df["len3"] = df["len2"].map(float) - PandasTools.WriteSDF(df, sio, properties=["len2", "len3"]) - s = sio.getvalue() - self.assertEqual(s.count(""), 2) - self.assertEqual(s.count(""), 2) - self.assertIn("7\n\n", s) - self.assertIn("7.0\n\n", s) - self.assertIn("8\n\n", s) - self.assertIn("8.0\n\n", s) + def test_specify_numeric_column_2(self): + sio = StringIO() + df = self.df + df["len2"] = df["ID"].map(len) + df["len3"] = df["len2"].map(float) + PandasTools.WriteSDF(df, sio, properties=["len2", "len3"]) + s = sio.getvalue() + self.assertEqual(s.count(""), 2) + self.assertEqual(s.count(""), 2) + self.assertIn("7\n\n", s) + self.assertIn("7.0\n\n", s) + self.assertIn("8\n\n", s) + self.assertIn("8.0\n\n", s) - def test_write_to_sdf(self): - dirname = tempfile.mkdtemp() - try: - filename = os.path.join(dirname, "test.sdf") - PandasTools.WriteSDF(self.df, filename) - with open(filename) as f: - s = f.read() - self.assertEqual(s.count("\n$$$$\n"), 2) - self.assertEqual(s.split("\n", 1)[0], "Methane") - finally: - shutil.rmtree(dirname) + def test_write_to_sdf(self): + dirname = tempfile.mkdtemp() + try: + filename = os.path.join(dirname, "test.sdf") + PandasTools.WriteSDF(self.df, filename) + with open(filename) as f: + s = f.read() + self.assertEqual(s.count("\n$$$$\n"), 2) + self.assertEqual(s.split("\n", 1)[0], "Methane") + finally: + shutil.rmtree(dirname) - def test_write_to_sdf_gz(self): - dirname = tempfile.mkdtemp() - try: - filename = os.path.join(dirname, "test.sdf.gz") - PandasTools.WriteSDF(self.df, filename) - with gzip.open(filename) as f: - s = f.read() - if PY3: - s = s.decode('utf-8') - s = s.replace(os.linesep, '\n') - self.assertEqual(s.count("\n$$$$\n"), 2) - self.assertEqual(s.split("\n", 1)[0], "Methane") - finally: - shutil.rmtree(dirname) + def test_write_to_sdf_gz(self): + dirname = tempfile.mkdtemp() + try: + filename = os.path.join(dirname, "test.sdf.gz") + PandasTools.WriteSDF(self.df, filename) + with gzip.open(filename) as f: + s = f.read() + s = s.decode('utf-8') + s = s.replace(os.linesep, '\n') + self.assertEqual(s.count("\n$$$$\n"), 2) + self.assertEqual(s.split("\n", 1)[0], "Methane") + finally: + shutil.rmtree(dirname) def getStreamIO(sdfString): - """ Return a StringIO/BytesIO for the string """ - if PY3: + """ Return a StringIO/BytesIO for the string """ sio = BytesIO() if sdfString is None else BytesIO(sdfString.encode('utf-8')) - else: # pragma: nocover - sio = StringIO() if sdfString is None else StringIO(sdfString) - return sio + return sio def getTestFrame(): - rdBase.DisableLog('rdApp.error') - sdfFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', 'pandas_load.sdf.gz') - df = PandasTools.LoadSDF(sdfFile, smilesName='SMILES') - rdBase.EnableLog('rdApp.error') - return df + rdBase.DisableLog('rdApp.error') + sdfFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', 'pandas_load.sdf.gz') + df = PandasTools.LoadSDF(sdfFile, smilesName='SMILES') + rdBase.EnableLog('rdApp.error') + return df methane = """\ @@ -384,4 +379,4 @@ $$$$ """ if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/Chem/UnitTestQED.py b/rdkit/Chem/UnitTestQED.py index 546f202a7..5f99471a1 100755 --- a/rdkit/Chem/UnitTestQED.py +++ b/rdkit/Chem/UnitTestQED.py @@ -1,4 +1,4 @@ -from __future__ import print_function + from collections import namedtuple import doctest diff --git a/rdkit/Chem/UnitTestSATIS.py b/rdkit/Chem/UnitTestSATIS.py index 731d95f0b..0ed4239a4 100755 --- a/rdkit/Chem/UnitTestSATIS.py +++ b/rdkit/Chem/UnitTestSATIS.py @@ -7,7 +7,7 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -from __future__ import print_function + import unittest diff --git a/rdkit/Chem/UnitTestSmiles.py b/rdkit/Chem/UnitTestSmiles.py index 2c9ed3e79..5150cd055 100755 --- a/rdkit/Chem/UnitTestSmiles.py +++ b/rdkit/Chem/UnitTestSmiles.py @@ -13,7 +13,7 @@ - stereo chemistry parsing consistency """ import unittest, os -from rdkit.six.moves import cPickle +import pickle from rdkit import Chem diff --git a/rdkit/Chem/UnitTestSuppliers.py b/rdkit/Chem/UnitTestSuppliers.py index b122f53c7..c96348d1b 100755 --- a/rdkit/Chem/UnitTestSuppliers.py +++ b/rdkit/Chem/UnitTestSuppliers.py @@ -16,89 +16,89 @@ import unittest from rdkit import Chem, RDLogger from rdkit import RDConfig -from rdkit.six import next class TestCase(unittest.TestCase): - def tearDown(self): - RDLogger.EnableLog('rdApp.error') + def tearDown(self): + RDLogger.EnableLog('rdApp.error') - def test1SDSupplier(self): - fileN = os.path.join(RDConfig.RDCodeDir, 'VLib', 'NodeLib', 'test_data', 'NCI_aids.10.sdf') + def test1SDSupplier(self): + fileN = os.path.join(RDConfig.RDCodeDir, 'VLib', 'NodeLib', 'test_data', 'NCI_aids.10.sdf') - suppl = Chem.SDMolSupplier(fileN) - ms = [x for x in suppl] - self.assertEqual(len(ms), 10) + suppl = Chem.SDMolSupplier(fileN) + ms = [x for x in suppl] + self.assertEqual(len(ms), 10) - # test repeating: - ms = [x for x in suppl] - self.assertEqual(len(ms), 10) + # test repeating: + ms = [x for x in suppl] + self.assertEqual(len(ms), 10) - # test reset: - suppl.reset() - m = next(suppl) - self.assertEqual(m.GetProp('_Name'), '48') - self.assertEqual(m.GetProp('NSC'), '48') - self.assertEqual(m.GetProp('CAS_RN'), '15716-70-8') - m = next(suppl) - self.assertEqual(m.GetProp('_Name'), '78') - self.assertEqual(m.GetProp('NSC'), '78') - self.assertEqual(m.GetProp('CAS_RN'), '6290-84-2') + # test reset: + suppl.reset() + m = next(suppl) + self.assertEqual(m.GetProp('_Name'), '48') + self.assertEqual(m.GetProp('NSC'), '48') + self.assertEqual(m.GetProp('CAS_RN'), '15716-70-8') + m = next(suppl) + self.assertEqual(m.GetProp('_Name'), '78') + self.assertEqual(m.GetProp('NSC'), '78') + self.assertEqual(m.GetProp('CAS_RN'), '6290-84-2') - suppl.reset() - for _ in range(10): - m = next(suppl) + suppl.reset() + for _ in range(10): + m = next(suppl) - with self.assertRaises(StopIteration): - m = next(suppl) + with self.assertRaises(StopIteration): + m = next(suppl) - def test2SmilesSupplier(self): - fileN = os.path.join(RDConfig.RDCodeDir, 'VLib', 'NodeLib', 'test_data', 'pgp_20.txt') + def test2SmilesSupplier(self): + fileN = os.path.join(RDConfig.RDCodeDir, 'VLib', 'NodeLib', 'test_data', 'pgp_20.txt') - suppl = Chem.SmilesMolSupplier(fileN, delimiter='\t', smilesColumn=2, nameColumn=1, titleLine=1) - ms = [x for x in suppl] - self.assertEqual(len(ms), 20) + suppl = Chem.SmilesMolSupplier( + fileN, delimiter='\t', smilesColumn=2, nameColumn=1, titleLine=1) + ms = [x for x in suppl] + self.assertEqual(len(ms), 20) - # test repeating: - ms = [x for x in suppl] - self.assertEqual(len(ms), 20) - # test reset: - suppl.reset() - m = next(suppl) - self.assertEqual(m.GetProp('_Name'), 'ALDOSTERONE') - self.assertEqual(m.GetProp('ID'), 'RD-PGP-0001') - m = next(suppl) - self.assertEqual(m.GetProp('_Name'), 'AMIODARONE') - self.assertEqual(m.GetProp('ID'), 'RD-PGP-0002') - suppl.reset() - for _ in range(20): - m = next(suppl) - with self.assertRaises(StopIteration): - m = next(suppl) + # test repeating: + ms = [x for x in suppl] + self.assertEqual(len(ms), 20) + # test reset: + suppl.reset() + m = next(suppl) + self.assertEqual(m.GetProp('_Name'), 'ALDOSTERONE') + self.assertEqual(m.GetProp('ID'), 'RD-PGP-0001') + m = next(suppl) + self.assertEqual(m.GetProp('_Name'), 'AMIODARONE') + self.assertEqual(m.GetProp('ID'), 'RD-PGP-0002') + suppl.reset() + for _ in range(20): + m = next(suppl) + with self.assertRaises(StopIteration): + m = next(suppl) - def test3SmilesSupplier(self): - txt = """C1CC1,1 + def test3SmilesSupplier(self): + txt = """C1CC1,1 CC(=O)O,3 fail,4 CCOC,5 """ - RDLogger.DisableLog('rdApp.error') + RDLogger.DisableLog('rdApp.error') - fileN = tempfile.mktemp('.csv') - try: - with open(fileN, 'w+') as f: - f.write(txt) - suppl = Chem.SmilesMolSupplier(fileN, delimiter=',', smilesColumn=0, nameColumn=1, - titleLine=0) - ms = [x for x in suppl] - suppl = None - while ms.count(None): - ms.remove(None) - self.assertEqual(len(ms), 3) - finally: - os.unlink(fileN) + fileN = tempfile.mktemp('.csv') + try: + with open(fileN, 'w+') as f: + f.write(txt) + suppl = Chem.SmilesMolSupplier(fileN, delimiter=',', smilesColumn=0, nameColumn=1, + titleLine=0) + ms = [x for x in suppl] + suppl = None + while ms.count(None): + ms.remove(None) + self.assertEqual(len(ms), 3) + finally: + os.unlink(fileN) if __name__ == '__main__': - unittest.main() + unittest.main() diff --git a/rdkit/Chem/UnitTestSurf.py b/rdkit/Chem/UnitTestSurf.py index 5f425723e..64312c6fb 100755 --- a/rdkit/Chem/UnitTestSurf.py +++ b/rdkit/Chem/UnitTestSurf.py @@ -10,7 +10,7 @@ """ unit testing code for calculations in rdkit.Chem.MolSurf """ -from __future__ import print_function + from collections import namedtuple import os.path diff --git a/rdkit/Chem/fmcs/fmcs.py b/rdkit/Chem/fmcs/fmcs.py index 873ca07f3..f3998d2d4 100644 --- a/rdkit/Chem/fmcs/fmcs.py +++ b/rdkit/Chem/fmcs/fmcs.py @@ -238,8 +238,7 @@ import sys try: from rdkit import Chem - from rdkit.six import next - from rdkit.six.moves import range + except ImportError: sys.stderr.write("Please install RDKit from http://www.rdkit.org/\n") raise @@ -2374,7 +2373,7 @@ def make_fragment_sdf(mcs, mol, subgraph, args): output_tag = args.save_atom_class_tag atom_classes = get_selected_atom_classes(mol, subgraph.atom_indices) if atom_classes is not None: - fragment.SetProp(output_tag, " ".join(map(str, atom_classes))) + fragment.SetProp(output_tag, " ".join(str(x) for x in atom_classes)) _save_other_tags(fragment, fragment, mcs, mol, subgraph, args) diff --git a/rdkit/Chem/test_data/BuildCrippenTestSet.py b/rdkit/Chem/test_data/BuildCrippenTestSet.py index 84c5ef7d0..3b07cc1d6 100755 --- a/rdkit/Chem/test_data/BuildCrippenTestSet.py +++ b/rdkit/Chem/test_data/BuildCrippenTestSet.py @@ -1,8 +1,8 @@ -from __future__ import print_function + from rdkit import RDConfig import gzip import os.path -from rdkit.six.moves import cPickle +import pickle from rdkit import Chem from rdkit.Chem import Crippen Crippen._Init() @@ -21,7 +21,7 @@ def runIt(inFileName, outFileName, smiCol=0, maxMols=-1, delim=','): mol = Chem.MolFromSmiles(smi) if mol: contribs = Crippen._GetAtomContribs(mol) - cPickle.dump((smi, contribs), outF) + pickle.dump((smi, contribs), outF) nDone += 1 if maxMols > 0 and nDone >= maxMols: break diff --git a/rdkit/Chem/test_data/BuildDescrsTestSet.Crippen.py b/rdkit/Chem/test_data/BuildDescrsTestSet.Crippen.py index a1ebbaa26..60fde5a2e 100644 --- a/rdkit/Chem/test_data/BuildDescrsTestSet.Crippen.py +++ b/rdkit/Chem/test_data/BuildDescrsTestSet.Crippen.py @@ -1,7 +1,7 @@ -from __future__ import print_function + from rdkit import RDConfig import os.path -from rdkit.six.moves import cPickle +import pickle from rdkit import Chem from rdkit.Chem import Descriptors diff --git a/rdkit/Chem/test_data/BuildDescrsTestSet.py b/rdkit/Chem/test_data/BuildDescrsTestSet.py index 0cb7cbb43..e7412efed 100755 --- a/rdkit/Chem/test_data/BuildDescrsTestSet.py +++ b/rdkit/Chem/test_data/BuildDescrsTestSet.py @@ -1,7 +1,7 @@ -from __future__ import print_function + from rdkit import RDConfig import os.path -from rdkit.six.moves import cPickle +import pickle from rdkit import Chem from rdkit.Chem import Descriptors diff --git a/rdkit/DataStructs/TopNContainer.py b/rdkit/DataStructs/TopNContainer.py index 6f28d99c5..0884ae886 100755 --- a/rdkit/DataStructs/TopNContainer.py +++ b/rdkit/DataStructs/TopNContainer.py @@ -8,7 +8,7 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -from __future__ import print_function + import bisect diff --git a/rdkit/DataStructs/UnitTestDocTests.py b/rdkit/DataStructs/UnitTestDocTests.py index 61ad3feb0..4ff5d3f62 100644 --- a/rdkit/DataStructs/UnitTestDocTests.py +++ b/rdkit/DataStructs/UnitTestDocTests.py @@ -8,7 +8,7 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -from __future__ import print_function + import unittest import doctest from rdkit.DataStructs import BitUtils, VectCollection, LazySignature, FingerprintSimilarity diff --git a/rdkit/DataStructs/UnitTestTopNContainer.py b/rdkit/DataStructs/UnitTestTopNContainer.py index 00ffd6bd5..416eb9d21 100755 --- a/rdkit/DataStructs/UnitTestTopNContainer.py +++ b/rdkit/DataStructs/UnitTestTopNContainer.py @@ -11,7 +11,7 @@ import random import unittest -from rdkit.six import StringIO +from io import StringIO from rdkit.DataStructs.TopNContainer import TopNContainer, _exampleCode from rdkit.TestRunner import redirect_stdout @@ -19,81 +19,81 @@ from rdkit.TestRunner import redirect_stdout class TestCase(unittest.TestCase): - def test1(self): - # simple test with a known answer - cont = TopNContainer(4) - for foo in range(10): - cont.Insert(foo, str(foo)) - assert cont.GetPts() == list(range(6, 10)) - assert cont.GetExtras() == [str(x) for x in range(6, 10)] + def test1(self): + # simple test with a known answer + cont = TopNContainer(4) + for foo in range(10): + cont.Insert(foo, str(foo)) + assert cont.GetPts() == list(range(6, 10)) + assert cont.GetExtras() == [str(x) for x in range(6, 10)] - def test2(self): - # larger scale random test - cont = TopNContainer(50) - for _ in range(1000): - cont.Insert(random.random()) - vs = cont.GetPts() - last = vs.pop(0) - while vs: - assert vs[0] >= last - last = vs.pop(0) + def test2(self): + # larger scale random test + cont = TopNContainer(50) + for _ in range(1000): + cont.Insert(random.random()) + vs = cont.GetPts() + last = vs.pop(0) + while vs: + assert vs[0] >= last + last = vs.pop(0) - def test3(self): - # random test with extras - cont = TopNContainer(10) - for _ in range(100): - v = random.random() - cont.Insert(v, v + 1) - vs = cont.GetExtras() - last = vs.pop(0) - while vs: - assert vs[0] >= last - last = vs.pop(0) + def test3(self): + # random test with extras + cont = TopNContainer(10) + for _ in range(100): + v = random.random() + cont.Insert(v, v + 1) + vs = cont.GetExtras() + last = vs.pop(0) + while vs: + assert vs[0] >= last + last = vs.pop(0) - def test4(self): - # random test with extras and getitem - cont = TopNContainer(10) - for i in range(100): - v = random.random() - cont.Insert(v, v + 1) - lastV, lastE = cont[0] - for i in range(1, len(cont)): - v, e = cont[i] - assert v >= lastV - assert e >= lastE - lastV, lastE = v, e + def test4(self): + # random test with extras and getitem + cont = TopNContainer(10) + for i in range(100): + v = random.random() + cont.Insert(v, v + 1) + lastV, lastE = cont[0] + for i in range(1, len(cont)): + v, e = cont[i] + assert v >= lastV + assert e >= lastE + lastV, lastE = v, e - def test5(self): - # random test with extras and getitem, include reverse - cont = TopNContainer(10) - for i in range(100): - v = random.random() - cont.Insert(v, v + 1) - cont.reverse() - lastV, lastE = cont[0] - for i in range(1, len(cont)): - v, e = cont[i] - assert v <= lastV - assert e <= lastE - lastV, lastE = v, e + def test5(self): + # random test with extras and getitem, include reverse + cont = TopNContainer(10) + for i in range(100): + v = random.random() + cont.Insert(v, v + 1) + cont.reverse() + lastV, lastE = cont[0] + for i in range(1, len(cont)): + v, e = cont[i] + assert v <= lastV + assert e <= lastE + lastV, lastE = v, e - def test_keepAll(self): - # simple test with a known answer where we keep all - cont = TopNContainer(-1) - for i in range(10): - cont.Insert(9 - i, str(9 - i)) - self.assertEqual(len(cont), i + 1) - assert cont.GetPts() == list(range(10)) - assert cont.GetExtras() == [str(x) for x in range(10)] + def test_keepAll(self): + # simple test with a known answer where we keep all + cont = TopNContainer(-1) + for i in range(10): + cont.Insert(9 - i, str(9 - i)) + self.assertEqual(len(cont), i + 1) + assert cont.GetPts() == list(range(10)) + assert cont.GetExtras() == [str(x) for x in range(10)] - def test_exampleCode(self): - # We make sure that the example code runs - f = StringIO() - with redirect_stdout(f): - _exampleCode() - s = f.getvalue() - self.assertIn('[58, 75, 78, 84]', s) + def test_exampleCode(self): + # We make sure that the example code runs + f = StringIO() + with redirect_stdout(f): + _exampleCode() + s = f.getvalue() + self.assertIn('[58, 75, 78, 84]', s) if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/DataStructs/UnitTestcBitVect.py b/rdkit/DataStructs/UnitTestcBitVect.py index ca5699b22..086a2039f 100755 --- a/rdkit/DataStructs/UnitTestcBitVect.py +++ b/rdkit/DataStructs/UnitTestcBitVect.py @@ -10,13 +10,13 @@ # """ unit testing code for the C++ BitVects """ -from __future__ import print_function + import os import unittest from rdkit.DataStructs import cDataStructs -from rdkit.six.moves import cPickle +import pickle klass = cDataStructs.SparseBitVect @@ -248,10 +248,10 @@ class VectTests(object): v1[3] = 1 pklName = 'foo.pkl' outF = open(pklName, 'wb+') - cPickle.dump(v1, outF) + pickle.dump(v1, outF) outF.close() inF = open(pklName, 'rb') - v2 = cPickle.load(inF) + v2 = pickle.load(inF) inF.close() os.unlink(pklName) assert tuple(v1.GetOnBits()) == tuple(v2.GetOnBits()), 'pkl failed' diff --git a/rdkit/DataStructs/VectCollection.py b/rdkit/DataStructs/VectCollection.py index 7d7e9224d..71897dd72 100644 --- a/rdkit/DataStructs/VectCollection.py +++ b/rdkit/DataStructs/VectCollection.py @@ -8,261 +8,259 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -from __future__ import print_function + import copy import struct -from rdkit.six import iterkeys -from rdkit import six from rdkit import DataStructs class VectCollection(object): - """ + """ - >>> vc = VectCollection() - >>> bv1 = DataStructs.ExplicitBitVect(10) - >>> bv1.SetBitsFromList((1,3,5)) - >>> vc.AddVect(1,bv1) - >>> bv1 = DataStructs.ExplicitBitVect(10) - >>> bv1.SetBitsFromList((6,8)) - >>> vc.AddVect(2,bv1) - >>> len(vc) - 10 - >>> vc.GetNumBits() - 10 - >>> vc[0] - 0 - >>> vc[1] - 1 - >>> vc[9] - 0 - >>> vc[6] - 1 - >>> vc.GetBit(6) - 1 - >>> list(vc.GetOnBits()) - [1, 3, 5, 6, 8] + >>> vc = VectCollection() + >>> bv1 = DataStructs.ExplicitBitVect(10) + >>> bv1.SetBitsFromList((1,3,5)) + >>> vc.AddVect(1,bv1) + >>> bv1 = DataStructs.ExplicitBitVect(10) + >>> bv1.SetBitsFromList((6,8)) + >>> vc.AddVect(2,bv1) + >>> len(vc) + 10 + >>> vc.GetNumBits() + 10 + >>> vc[0] + 0 + >>> vc[1] + 1 + >>> vc[9] + 0 + >>> vc[6] + 1 + >>> vc.GetBit(6) + 1 + >>> list(vc.GetOnBits()) + [1, 3, 5, 6, 8] - keys must be unique, so adding a duplicate replaces the - previous values: - >>> bv1 = DataStructs.ExplicitBitVect(10) - >>> bv1.SetBitsFromList((7,9)) - >>> vc.AddVect(1,bv1) - >>> len(vc) - 10 - >>> vc[1] - 0 - >>> vc[9] - 1 - >>> vc[6] - 1 + keys must be unique, so adding a duplicate replaces the + previous values: + >>> bv1 = DataStructs.ExplicitBitVect(10) + >>> bv1.SetBitsFromList((7,9)) + >>> vc.AddVect(1,bv1) + >>> len(vc) + 10 + >>> vc[1] + 0 + >>> vc[9] + 1 + >>> vc[6] + 1 - we can also query the children: - >>> vc.NumChildren() - 2 - >>> cs = vc.GetChildren() - >>> id,fp = cs[0] - >>> id - 1 - >>> list(fp.GetOnBits()) - [7, 9] - >>> id,fp = cs[1] - >>> id - 2 - >>> list(fp.GetOnBits()) - [6, 8] + we can also query the children: + >>> vc.NumChildren() + 2 + >>> cs = vc.GetChildren() + >>> id,fp = cs[0] + >>> id + 1 + >>> list(fp.GetOnBits()) + [7, 9] + >>> id,fp = cs[1] + >>> id + 2 + >>> list(fp.GetOnBits()) + [6, 8] - attach/detach operations: - >>> bv1 = DataStructs.ExplicitBitVect(10) - >>> bv1.SetBitsFromList((5,6)) - >>> vc.AddVect(3,bv1) - >>> vc.NumChildren() - 3 - >>> list(vc.GetOnBits()) - [5, 6, 7, 8, 9] - >>> vc.DetachVectsNotMatchingBit(6) - >>> vc.NumChildren() - 2 - >>> list(vc.GetOnBits()) - [5, 6, 8] + attach/detach operations: + >>> bv1 = DataStructs.ExplicitBitVect(10) + >>> bv1.SetBitsFromList((5,6)) + >>> vc.AddVect(3,bv1) + >>> vc.NumChildren() + 3 + >>> list(vc.GetOnBits()) + [5, 6, 7, 8, 9] + >>> vc.DetachVectsNotMatchingBit(6) + >>> vc.NumChildren() + 2 + >>> list(vc.GetOnBits()) + [5, 6, 8] - >>> bv1 = DataStructs.ExplicitBitVect(10) - >>> bv1.SetBitsFromList((7,9)) - >>> vc.AddVect(1,bv1) - >>> vc.NumChildren() - 3 - >>> list(vc.GetOnBits()) - [5, 6, 7, 8, 9] - >>> vc.DetachVectsMatchingBit(6) - >>> vc.NumChildren() - 1 - >>> list(vc.GetOnBits()) - [7, 9] + >>> bv1 = DataStructs.ExplicitBitVect(10) + >>> bv1.SetBitsFromList((7,9)) + >>> vc.AddVect(1,bv1) + >>> vc.NumChildren() + 3 + >>> list(vc.GetOnBits()) + [5, 6, 7, 8, 9] + >>> vc.DetachVectsMatchingBit(6) + >>> vc.NumChildren() + 1 + >>> list(vc.GetOnBits()) + [7, 9] - to copy VectCollections, use the copy module: - >>> bv1 = DataStructs.ExplicitBitVect(10) - >>> bv1.SetBitsFromList((5,6)) - >>> vc.AddVect(3,bv1) - >>> list(vc.GetOnBits()) - [5, 6, 7, 9] - >>> vc2 = copy.copy(vc) - >>> vc.DetachVectsNotMatchingBit(6) - >>> list(vc.GetOnBits()) - [5, 6] - >>> list(vc2.GetOnBits()) - [5, 6, 7, 9] + to copy VectCollections, use the copy module: + >>> bv1 = DataStructs.ExplicitBitVect(10) + >>> bv1.SetBitsFromList((5,6)) + >>> vc.AddVect(3,bv1) + >>> list(vc.GetOnBits()) + [5, 6, 7, 9] + >>> vc2 = copy.copy(vc) + >>> vc.DetachVectsNotMatchingBit(6) + >>> list(vc.GetOnBits()) + [5, 6] + >>> list(vc2.GetOnBits()) + [5, 6, 7, 9] - The Uniquify() method can be used to remove duplicate vectors: - >>> vc = VectCollection() - >>> bv1 = DataStructs.ExplicitBitVect(10) - >>> bv1.SetBitsFromList((7,9)) - >>> vc.AddVect(1,bv1) - >>> vc.AddVect(2,bv1) - >>> bv1 = DataStructs.ExplicitBitVect(10) - >>> bv1.SetBitsFromList((2,3,5)) - >>> vc.AddVect(3,bv1) - >>> vc.NumChildren() - 3 - >>> vc.Uniquify() - >>> vc.NumChildren() - 2 + The Uniquify() method can be used to remove duplicate vectors: + >>> vc = VectCollection() + >>> bv1 = DataStructs.ExplicitBitVect(10) + >>> bv1.SetBitsFromList((7,9)) + >>> vc.AddVect(1,bv1) + >>> vc.AddVect(2,bv1) + >>> bv1 = DataStructs.ExplicitBitVect(10) + >>> bv1.SetBitsFromList((2,3,5)) + >>> vc.AddVect(3,bv1) + >>> vc.NumChildren() + 3 + >>> vc.Uniquify() + >>> vc.NumChildren() + 2 - """ + """ - def __init__(self): - self.__vects = {} - self.__orVect = None - self.__numBits = -1 - self.__needReset = True - - def GetOrVect(self): - if self.__needReset: - self.Reset() - return self.__orVect - - orVect = property(GetOrVect) - - def AddVect(self, idx, vect): - self.__vects[idx] = vect - self.__needReset = True - - def Reset(self): - if not self.__needReset: - return - self.__orVect = None - if not self.__vects: - return - ks = list(iterkeys(self.__vects)) - self.__orVect = copy.copy(self.__vects[ks[0]]) - self.__numBits = self.__orVect.GetNumBits() - for i in range(1, len(ks)): - self.__orVect |= self.__vects[ks[i]] - self.__needReset = False - - def NumChildren(self): - return len(self.__vects.keys()) - - def GetChildren(self): - return tuple(self.__vects.items()) - - def __getitem__(self, idx): - if self.__needReset: - self.Reset() - return self.__orVect.GetBit(idx) - - GetBit = __getitem__ - - def __len__(self): - if self.__needReset: - self.Reset() - return self.__numBits - - GetNumBits = __len__ - - def GetOnBits(self): - if self.__needReset: - self.Reset() - return self.__orVect.GetOnBits() - - def DetachVectsNotMatchingBit(self, bit): - items = list(self.__vects.items()) - for k, v in items: - if not v.GetBit(bit): - del (self.__vects[k]) + def __init__(self): + self.__vects = {} + self.__orVect = None + self.__numBits = -1 self.__needReset = True - def DetachVectsMatchingBit(self, bit): - items = list(self.__vects.items()) - for k, v in items: - if v.GetBit(bit): - del (self.__vects[k]) + def GetOrVect(self): + if self.__needReset: + self.Reset() + return self.__orVect + + orVect = property(GetOrVect) + + def AddVect(self, idx, vect): + self.__vects[idx] = vect self.__needReset = True - def Uniquify(self, verbose=False): - obls = {} - for k, v in self.__vects.items(): - obls[k] = list(v.GetOnBits()) + def Reset(self): + if not self.__needReset: + return + self.__orVect = None + if not self.__vects: + return + ks = list(iter(self.__vects)) + self.__orVect = copy.copy(self.__vects[ks[0]]) + self.__numBits = self.__orVect.GetNumBits() + for i in range(1, len(ks)): + self.__orVect |= self.__vects[ks[i]] + self.__needReset = False - keys = list(self.__vects.keys()) - nKeys = len(keys) - keep = list(self.__vects.keys()) - for i in range(nKeys): - k1 = keys[i] - if k1 in keep: - obl1 = obls[k1] - idx = keys.index(k1) - for j in range(idx + 1, nKeys): - k2 = keys[j] - if k2 in keep: - obl2 = obls[k2] - if obl1 == obl2: - keep.remove(k2) + def NumChildren(self): + return len(self.__vects.keys()) - self.__needsReset = True - tmp = {} - for k in keep: - tmp[k] = self.__vects[k] - if verbose: - print('uniquify:', len(self.__vects), '->', len(tmp)) - self.__vects = tmp + def GetChildren(self): + return tuple(self.__vects.items()) - # - # set up our support for pickling: - # - def __getstate__(self): - pkl = struct.pack('', len(tmp)) + self.__vects = tmp + + # + # set up our support for pickling: + # + def __getstate__(self): + pkl = struct.pack(' density and len(fp) // 2 > minLength: - fp = FoldFingerprint(fp, 2) - return fp + while fp.GetNumOnBits() / len(fp) > density and len(fp) // 2 > minLength: + fp = FoldFingerprint(fp, 2) + return fp ExplicitBitVect.ToBitString = BitVectToText diff --git a/rdkit/Dbase/DbConnection.py b/rdkit/Dbase/DbConnection.py index a05bc4cd3..85f131fcb 100755 --- a/rdkit/Dbase/DbConnection.py +++ b/rdkit/Dbase/DbConnection.py @@ -10,7 +10,7 @@ """ defines class _DbConnect_, for abstracting connections to databases """ -from __future__ import print_function + from rdkit.Dbase import DbUtils, DbInfo, DbModule diff --git a/rdkit/Dbase/DbInfo.py b/rdkit/Dbase/DbInfo.py index e16c007d0..a62038d36 100755 --- a/rdkit/Dbase/DbInfo.py +++ b/rdkit/Dbase/DbInfo.py @@ -8,12 +8,11 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -from __future__ import print_function + import sys from rdkit import RDConfig from rdkit.Dbase import DbModule -from rdkit import six sqlTextTypes = DbModule.sqlTextTypes sqlIntTypes = DbModule.sqlIntTypes @@ -22,189 +21,189 @@ sqlBinTypes = DbModule.sqlBinTypes def GetDbNames(user='sysdba', password='masterkey', dirName='.', dBase='::template1', cn=None): - """ returns a list of databases that are available + """ returns a list of databases that are available - **Arguments** + **Arguments** - - user: the username for DB access + - user: the username for DB access - - password: the password to be used for DB access + - password: the password to be used for DB access - **Returns** + **Returns** - - a list of db names (strings) + - a list of db names (strings) - """ - if DbModule.getDbSql: - if not cn: - try: - cn = DbModule.connect(dBase, user, password) - except Exception: - print('Problems opening database: %s' % (dBase)) - return [] - c = cn.cursor() - c.execute(DbModule.getDbSql) - if RDConfig.usePgSQL: - names = ['::' + str(x[0]) for x in c.fetchall()] + """ + if DbModule.getDbSql: + if not cn: + try: + cn = DbModule.connect(dBase, user, password) + except Exception: + print('Problems opening database: %s' % (dBase)) + return [] + c = cn.cursor() + c.execute(DbModule.getDbSql) + if RDConfig.usePgSQL: + names = ['::' + str(x[0]) for x in c.fetchall()] + else: + names = ['::' + str(x[0]) for x in c.fetchall()] + names.remove(dBase) + elif DbModule.fileWildcard: + import os.path + import glob + names = glob.glob(os.path.join(dirName, DbModule.fileWildcard)) else: - names = ['::' + str(x[0]) for x in c.fetchall()] - names.remove(dBase) - elif DbModule.fileWildcard: - import os.path - import glob - names = glob.glob(os.path.join(dirName, DbModule.fileWildcard)) - else: - names = [] - return names + names = [] + return names def GetTableNames(dBase, user='sysdba', password='masterkey', includeViews=0, cn=None): - """ returns a list of tables available in a database + """ returns a list of tables available in a database - **Arguments** + **Arguments** - - dBase: the name of the DB file to be used + - dBase: the name of the DB file to be used - - user: the username for DB access + - user: the username for DB access - - password: the password to be used for DB access + - password: the password to be used for DB access - - includeViews: if this is non-null, the views in the db will - also be returned + - includeViews: if this is non-null, the views in the db will + also be returned - **Returns** + **Returns** - - a list of table names (strings) + - a list of table names (strings) - """ - if not cn: - try: - cn = DbModule.connect(dBase, user, password) - except Exception: - print('Problems opening database: %s' % (dBase)) - return [] + """ + if not cn: + try: + cn = DbModule.connect(dBase, user, password) + except Exception: + print('Problems opening database: %s' % (dBase)) + return [] - c = cn.cursor() - if not includeViews: - comm = DbModule.getTablesSql - else: - comm = DbModule.getTablesAndViewsSql - c.execute(comm) - names = [str(x[0]).upper() for x in c.fetchall()] - if RDConfig.usePgSQL and 'PG_LOGDIR_LS' in names: - names.remove('PG_LOGDIR_LS') - return names + c = cn.cursor() + if not includeViews: + comm = DbModule.getTablesSql + else: + comm = DbModule.getTablesAndViewsSql + c.execute(comm) + names = [str(x[0]).upper() for x in c.fetchall()] + if RDConfig.usePgSQL and 'PG_LOGDIR_LS' in names: + names.remove('PG_LOGDIR_LS') + return names def GetColumnInfoFromCursor(cursor): - if cursor is None or cursor.description is None: - return [] - results = [] - if not RDConfig.useSqlLite: - for item in cursor.description: - cName = item[0] - cType = item[1] - if cType in sqlTextTypes: - typeStr = 'string' - elif cType in sqlIntTypes: - typeStr = 'integer' - elif cType in sqlFloatTypes: - typeStr = 'float' - elif cType in sqlBinTypes: - typeStr = 'binary' - else: - sys.stderr.write('odd type in col %s: %s\n' % (cName, str(cType))) - results.append((cName, typeStr)) - else: - r = cursor.fetchone() - if not r: - return results - for i, v in enumerate(r): - cName = cursor.description[i][0] - typ = type(v) - if isinstance(v, six.string_types): - typeStr = 'string' - elif typ == int: - typeStr = 'integer' - elif typ == float: - typeStr = 'float' - elif (six.PY2 and typ == buffer) or (six.PY3 and typ in (memoryview, bytes)): - typeStr = 'binary' - else: - sys.stderr.write('odd type in col %s: %s\n' % (cName, typ)) - results.append((cName, typeStr)) - return results + if cursor is None or cursor.description is None: + return [] + results = [] + if not RDConfig.useSqlLite: + for item in cursor.description: + cName = item[0] + cType = item[1] + if cType in sqlTextTypes: + typeStr = 'string' + elif cType in sqlIntTypes: + typeStr = 'integer' + elif cType in sqlFloatTypes: + typeStr = 'float' + elif cType in sqlBinTypes: + typeStr = 'binary' + else: + sys.stderr.write('odd type in col %s: %s\n' % (cName, str(cType))) + results.append((cName, typeStr)) + else: + r = cursor.fetchone() + if not r: + return results + for i, v in enumerate(r): + cName = cursor.description[i][0] + typ = type(v) + if isinstance(v, str): + typeStr = 'string' + elif typ == int: + typeStr = 'integer' + elif typ == float: + typeStr = 'float' + elif typ in (memoryview, bytes): + typeStr = 'binary' + else: + sys.stderr.write('odd type in col %s: %s\n' % (cName, typ)) + results.append((cName, typeStr)) + return results def GetColumnNamesAndTypes(dBase, table, user='sysdba', password='masterkey', join='', what='*', cn=None): - """ gets a list of columns available in a DB table along with their types + """ gets a list of columns available in a DB table along with their types - **Arguments** + **Arguments** - - dBase: the name of the DB file to be used + - dBase: the name of the DB file to be used - - table: the name of the table to query + - table: the name of the table to query - - user: the username for DB access + - user: the username for DB access - - password: the password to be used for DB access + - password: the password to be used for DB access - - join: an optional join clause (omit the verb 'join') + - join: an optional join clause (omit the verb 'join') - - what: an optional clause indicating what to select + - what: an optional clause indicating what to select - **Returns** + **Returns** - - a list of 2-tuples containing: + - a list of 2-tuples containing: - 1) column name + 1) column name - 2) column type + 2) column type - """ - if not cn: - cn = DbModule.connect(dBase, user, password) - c = cn.cursor() - cmd = 'select %s from %s' % (what, table) - if join: - cmd += ' join %s' % (join) - c.execute(cmd) - return GetColumnInfoFromCursor(c) + """ + if not cn: + cn = DbModule.connect(dBase, user, password) + c = cn.cursor() + cmd = 'select %s from %s' % (what, table) + if join: + cmd += ' join %s' % (join) + c.execute(cmd) + return GetColumnInfoFromCursor(c) def GetColumnNames(dBase, table, user='sysdba', password='masterkey', join='', what='*', cn=None): - """ gets a list of columns available in a DB table + """ gets a list of columns available in a DB table - **Arguments** + **Arguments** - - dBase: the name of the DB file to be used + - dBase: the name of the DB file to be used - - table: the name of the table to query + - table: the name of the table to query - - user: the username for DB access + - user: the username for DB access - - password: the password to be used for DB access + - password: the password to be used for DB access - - join: an optional join clause (omit the verb 'join') + - join: an optional join clause (omit the verb 'join') - - what: an optional clause indicating what to select + - what: an optional clause indicating what to select - **Returns** + **Returns** - - a list of column names + - a list of column names - """ - if not cn: - cn = DbModule.connect(dBase, user, password) - c = cn.cursor() - cmd = 'select %s from %s' % (what, table) - if join: - if join.strip().find('join') != 0: - join = 'join %s' % (join) - cmd += ' ' + join - c.execute(cmd) - c.fetchone() - desc = c.description - res = [str(x[0]) for x in desc] - return res + """ + if not cn: + cn = DbModule.connect(dBase, user, password) + c = cn.cursor() + cmd = 'select %s from %s' % (what, table) + if join: + if join.strip().find('join') != 0: + join = 'join %s' % (join) + cmd += ' ' + join + c.execute(cmd) + c.fetchone() + desc = c.description + res = [str(x[0]) for x in desc] + return res diff --git a/rdkit/Dbase/DbModule.py b/rdkit/Dbase/DbModule.py index efd8e9fe0..565a96355 100755 --- a/rdkit/Dbase/DbModule.py +++ b/rdkit/Dbase/DbModule.py @@ -8,22 +8,21 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -from rdkit import six from rdkit import RDConfig if hasattr(RDConfig, "usePgSQL") and RDConfig.usePgSQL: - from pyPgSQL import PgSQL - # as of this writing (March 2004), this results in a speedup in - # getting results back from the wrapper: - PgSQL.fetchReturnsList = 1 + from pyPgSQL import PgSQL + # as of this writing (March 2004), this results in a speedup in + # getting results back from the wrapper: + PgSQL.fetchReturnsList = 1 - from pyPgSQL.PgSQL import * - sqlTextTypes = [PG_CHAR, PG_BPCHAR, PG_TEXT, PG_VARCHAR, PG_NAME] - sqlIntTypes = [PG_INT8, PG_INT2, PG_INT4] - sqlFloatTypes = [PG_FLOAT4, PG_FLOAT8] - sqlBinTypes = [PG_OID, PG_BLOB, PG_BYTEA] - getTablesSql = """select tablename from pg_tables where schemaname='public'""" - getTablesAndViewsSql = """SELECT c.relname as "Name" + from pyPgSQL.PgSQL import * + sqlTextTypes = [PG_CHAR, PG_BPCHAR, PG_TEXT, PG_VARCHAR, PG_NAME] + sqlIntTypes = [PG_INT8, PG_INT2, PG_INT4] + sqlFloatTypes = [PG_FLOAT4, PG_FLOAT8] + sqlBinTypes = [PG_OID, PG_BLOB, PG_BYTEA] + getTablesSql = """select tablename from pg_tables where schemaname='public'""" + getTablesAndViewsSql = """SELECT c.relname as "Name" FROM pg_catalog.pg_class c LEFT JOIN pg_catalog.pg_user u ON u.usesysid = c.relowner LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace @@ -32,31 +31,31 @@ if hasattr(RDConfig, "usePgSQL") and RDConfig.usePgSQL: AND pg_catalog.pg_table_is_visible(c.oid) """ - getDbSql = """ select datname from pg_database where datallowconn """ - fileWildcard = None - placeHolder = '%s' - binaryTypeName = "bytea" - binaryHolder = PgBytea - RDTestDatabase = "::RDTests" + getDbSql = """ select datname from pg_database where datallowconn """ + fileWildcard = None + placeHolder = '%s' + binaryTypeName = "bytea" + binaryHolder = PgBytea + RDTestDatabase = "::RDTests" elif hasattr(RDConfig, "useSqlLite") and RDConfig.useSqlLite: - try: - import sqlite3 as sqlite - except ImportError: - from pysqlite2 import dbapi2 as sqlite - sqlTextTypes = [] - sqlIntTypes = [] - sqlFloatTypes = [] - sqlBinTypes = [] - getTablesSql = """select name from SQLite_Master where type='table'""" - getTablesAndViewsSql = """select name from SQLite_Master where type in ('table','view')""" - getDbSql = None - dbFileWildcard = '*.sqlt' - fileWildcard = dbFileWildcard - placeHolder = '?' - binaryTypeName = "blob" - binaryHolder = memoryview if six.PY3 else buffer + try: + import sqlite3 as sqlite + except ImportError: + from pysqlite2 import dbapi2 as sqlite + sqlTextTypes = [] + sqlIntTypes = [] + sqlFloatTypes = [] + sqlBinTypes = [] + getTablesSql = """select name from SQLite_Master where type='table'""" + getTablesAndViewsSql = """select name from SQLite_Master where type in ('table','view')""" + getDbSql = None + dbFileWildcard = '*.sqlt' + fileWildcard = dbFileWildcard + placeHolder = '?' + binaryTypeName = "blob" + binaryHolder = memoryview - def connect(x, *args): - return sqlite.connect(x) + def connect(x, *args): + return sqlite.connect(x) else: - raise ImportError("Neither sqlite nor PgSQL support found.") + raise ImportError("Neither sqlite nor PgSQL support found.") diff --git a/rdkit/Dbase/DbReport.py b/rdkit/Dbase/DbReport.py index 6068229af..2431d32b2 100755 --- a/rdkit/Dbase/DbReport.py +++ b/rdkit/Dbase/DbReport.py @@ -8,7 +8,7 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -from __future__ import print_function + try: from reportlab import platypus diff --git a/rdkit/Dbase/DbResultSet.py b/rdkit/Dbase/DbResultSet.py index d823a2d9c..6753a7dad 100755 --- a/rdkit/Dbase/DbResultSet.py +++ b/rdkit/Dbase/DbResultSet.py @@ -8,7 +8,7 @@ this uses the Python iterator interface, so you'll need python 2.2 or above. """ -from __future__ import print_function + import sys from rdkit.Dbase import DbInfo diff --git a/rdkit/Dbase/DbUtils.py b/rdkit/Dbase/DbUtils.py index 78221f30b..42db1f832 100755 --- a/rdkit/Dbase/DbUtils.py +++ b/rdkit/Dbase/DbUtils.py @@ -13,455 +13,453 @@ When possible, it's probably preferable to use a _DbConnection.DbConnect_ object """ -from __future__ import print_function + import sys from rdkit.Dbase import DbInfo from rdkit.Dbase import DbModule from rdkit.Dbase.DbResultSet import DbResultSet, RandomAccessDbResultSet -from rdkit.six import string_types, StringIO -from rdkit.six.moves import xrange +from io import StringIO def _take(fromL, what): - """ Given a list fromL, returns an iterator of the elements specified using their - indices in the list what """ - return map(lambda x, y=fromL: y[x], what) + """ Given a list fromL, returns an iterator of the elements specified using their + indices in the list what """ + return [fromL[x] for x in what] def GetColumns(dBase, table, fieldString, user='sysdba', password='masterkey', join='', cn=None): - """ gets a set of data from a table + """ gets a set of data from a table - **Arguments** + **Arguments** - - dBase: database name + - dBase: database name - - table: table name + - table: table name - - fieldString: a string with the names of the fields to be extracted, - this should be a comma delimited list + - fieldString: a string with the names of the fields to be extracted, + this should be a comma delimited list - - user and password: + - user and password: - - join: a join clause (omit the verb 'join') + - join: a join clause (omit the verb 'join') - **Returns** + **Returns** - - a list of the data + - a list of the data - """ - if not cn: - cn = DbModule.connect(dBase, user, password) - c = cn.cursor() - cmd = 'select %s from %s' % (fieldString, table) - if join: - if join.strip().find('join') != 0: - join = 'join %s' % (join) - cmd += ' ' + join - c.execute(cmd) - return c.fetchall() + """ + if not cn: + cn = DbModule.connect(dBase, user, password) + c = cn.cursor() + cmd = 'select %s from %s' % (fieldString, table) + if join: + if join.strip().find('join') != 0: + join = 'join %s' % (join) + cmd += ' ' + join + c.execute(cmd) + return c.fetchall() def GetData(dBase, table, fieldString='*', whereString='', user='sysdba', password='masterkey', removeDups=-1, join='', forceList=0, transform=None, randomAccess=1, extras=None, cn=None): - """ a more flexible method to get a set of data from a table + """ a more flexible method to get a set of data from a table - **Arguments** + **Arguments** - - fields: a string with the names of the fields to be extracted, - this should be a comma delimited list + - fields: a string with the names of the fields to be extracted, + this should be a comma delimited list - - where: the SQL where clause to be used with the DB query + - where: the SQL where clause to be used with the DB query - - removeDups indicates the column which should be used to screen - out duplicates. Only the first appearance of a duplicate will - be left in the dataset. + - removeDups indicates the column which should be used to screen + out duplicates. Only the first appearance of a duplicate will + be left in the dataset. - **Returns** + **Returns** - - a list of the data + - a list of the data - **Notes** + **Notes** - - EFF: this isn't particularly efficient + - EFF: this isn't particularly efficient - """ - if forceList and (transform is not None): - raise ValueError('forceList and transform arguments are not compatible') - if forceList and (not randomAccess): - raise ValueError('when forceList is set, randomAccess must also be used') - if removeDups > -1: - forceList = True + """ + if forceList and (transform is not None): + raise ValueError('forceList and transform arguments are not compatible') + if forceList and (not randomAccess): + raise ValueError('when forceList is set, randomAccess must also be used') + if removeDups > -1: + forceList = True - if not cn: - cn = DbModule.connect(dBase, user, password) - c = cn.cursor() - cmd = 'select %s from %s' % (fieldString, table) - if join: - if join.strip().find('join') != 0: - join = 'join %s' % (join) - cmd += ' ' + join - if whereString: - if whereString.strip().find('where') != 0: - whereString = 'where %s' % (whereString) - cmd += ' ' + whereString + if not cn: + cn = DbModule.connect(dBase, user, password) + c = cn.cursor() + cmd = 'select %s from %s' % (fieldString, table) + if join: + if join.strip().find('join') != 0: + join = 'join %s' % (join) + cmd += ' ' + join + if whereString: + if whereString.strip().find('where') != 0: + whereString = 'where %s' % (whereString) + cmd += ' ' + whereString - if forceList: - try: - if not extras: - c.execute(cmd) - else: - c.execute(cmd, extras) - except Exception: - sys.stderr.write('the command "%s" generated errors:\n' % (cmd)) - import traceback - traceback.print_exc() - return None - if transform is not None: - raise ValueError('forceList and transform arguments are not compatible') - if not randomAccess: - raise ValueError('when forceList is set, randomAccess must also be used') - data = c.fetchall() - if removeDups >= 0: - seen = set() - for entry in data[:]: - if entry[removeDups] in seen: - data.remove(entry) - else: - seen.add(entry[removeDups]) - else: - if randomAccess: - klass = RandomAccessDbResultSet + if forceList: + try: + if not extras: + c.execute(cmd) + else: + c.execute(cmd, extras) + except Exception: + sys.stderr.write('the command "%s" generated errors:\n' % (cmd)) + import traceback + traceback.print_exc() + return None + if transform is not None: + raise ValueError('forceList and transform arguments are not compatible') + if not randomAccess: + raise ValueError('when forceList is set, randomAccess must also be used') + data = c.fetchall() + if removeDups >= 0: + seen = set() + for entry in data[:]: + if entry[removeDups] in seen: + data.remove(entry) + else: + seen.add(entry[removeDups]) else: - klass = DbResultSet + if randomAccess: + klass = RandomAccessDbResultSet + else: + klass = DbResultSet - data = klass(c, cn, cmd, removeDups=removeDups, transform=transform, extras=extras) + data = klass(c, cn, cmd, removeDups=removeDups, transform=transform, extras=extras) - return data + return data def DatabaseToText(dBase, table, fields='*', join='', where='', user='sysdba', password='masterkey', delim=',', cn=None): - """ Pulls the contents of a database and makes a deliminted text file from them + """ Pulls the contents of a database and makes a deliminted text file from them - **Arguments** - - dBase: the name of the DB file to be used + **Arguments** + - dBase: the name of the DB file to be used - - table: the name of the table to query + - table: the name of the table to query - - fields: the fields to select with the SQL query + - fields: the fields to select with the SQL query - - join: the join clause of the SQL query - (e.g. 'join foo on foo.bar=base.bar') + - join: the join clause of the SQL query + (e.g. 'join foo on foo.bar=base.bar') - - where: the where clause of the SQL query - (e.g. 'where foo = 2' or 'where bar > 17.6') + - where: the where clause of the SQL query + (e.g. 'where foo = 2' or 'where bar > 17.6') - - user: the username for DB access + - user: the username for DB access - - password: the password to be used for DB access + - password: the password to be used for DB access - **Returns** + **Returns** - - the CSV data (as text) + - the CSV data (as text) - """ - if len(where) and where.strip().find('where') == -1: - where = 'where %s' % (where) - if len(join) and join.strip().find('join') == -1: - join = 'join %s' % (join) - sqlCommand = 'select %s from %s %s %s' % (fields, table, join, where) - if not cn: - cn = DbModule.connect(dBase, user, password) - c = cn.cursor() - c.execute(sqlCommand) - headers = [] - colsToTake = [] - # the description field of the cursor carries around info about the columns - # of the table - for i in range(len(c.description)): - item = c.description[i] - if item[1] not in DbInfo.sqlBinTypes: - colsToTake.append(i) - headers.append(item[0]) + """ + if len(where) and where.strip().find('where') == -1: + where = 'where %s' % (where) + if len(join) and join.strip().find('join') == -1: + join = 'join %s' % (join) + sqlCommand = 'select %s from %s %s %s' % (fields, table, join, where) + if not cn: + cn = DbModule.connect(dBase, user, password) + c = cn.cursor() + c.execute(sqlCommand) + headers = [] + colsToTake = [] + # the description field of the cursor carries around info about the columns + # of the table + for i in range(len(c.description)): + item = c.description[i] + if item[1] not in DbInfo.sqlBinTypes: + colsToTake.append(i) + headers.append(item[0]) - lines = [] - lines.append(delim.join(headers)) + lines = [] + lines.append(delim.join(headers)) - # grab the data - results = c.fetchall() - for res in results: - d = _take(res, colsToTake) - lines.append(delim.join(map(str, d))) + # grab the data + results = c.fetchall() + for res in results: + d = _take(res, colsToTake) + lines.append(delim.join([str(x) for x in d])) - return '\n'.join(lines) + return '\n'.join(lines) def TypeFinder(data, nRows, nCols, nullMarker=None): - """ + """ - finds the types of the columns in _data_ + finds the types of the columns in _data_ - if nullMarker is not None, elements of the data table which are - equal to nullMarker will not count towards setting the type of - their columns. + if nullMarker is not None, elements of the data table which are + equal to nullMarker will not count towards setting the type of + their columns. - """ - priorities = {float: 3, int: 2, str: 1, -1: -1} - res = [None] * nCols - for col in xrange(nCols): - typeHere = [-1, 1] - for row in xrange(nRows): - d = data[row][col] - if d is None: - continue - locType = type(d) - if locType != float and locType != int: - locType = str - try: - d = str(d) - except UnicodeError as msg: - print('cannot convert text from row %d col %d to a string' % (row + 2, col)) - print('\t>%s' % (repr(d))) - raise UnicodeError(msg) - else: - typeHere[1] = max(typeHere[1], len(str(d))) - if isinstance(d, string_types): - if nullMarker is None or d != nullMarker: - l = max(len(d), typeHere[1]) - typeHere = [str, l] - else: - try: - fD = float(int(d)) - except OverflowError: - locType = float - else: - if fD == d: - locType = int - if not isinstance(typeHere[0], string_types) and \ - priorities[locType] > priorities[typeHere[0]]: - typeHere[0] = locType - res[col] = typeHere - return res + """ + priorities = {float: 3, int: 2, str: 1, -1: -1} + res = [None] * nCols + for col in range(nCols): + typeHere = [-1, 1] + for row in range(nRows): + d = data[row][col] + if d is None: + continue + locType = type(d) + if locType != float and locType != int: + locType = str + try: + d = str(d) + except UnicodeError as msg: + print('cannot convert text from row %d col %d to a string' % (row + 2, col)) + print('\t>%s' % (repr(d))) + raise UnicodeError(msg) + else: + typeHere[1] = max(typeHere[1], len(str(d))) + if isinstance(d, str): + if nullMarker is None or d != nullMarker: + l = max(len(d), typeHere[1]) + typeHere = [str, l] + else: + try: + fD = float(int(d)) + except OverflowError: + locType = float + else: + if fD == d: + locType = int + if not isinstance(typeHere[0], str) and priorities[locType] > priorities[typeHere[0]]: + typeHere[0] = locType + res[col] = typeHere + return res def _AdjustColHeadings(colHeadings, maxColLabelLen): - """ *For Internal Use* + """ *For Internal Use* - removes illegal characters from column headings - and truncates those which are too long. + removes illegal characters from column headings + and truncates those which are too long. - """ - for i in xrange(len(colHeadings)): - # replace unallowed characters and strip extra white space - colHeadings[i] = colHeadings[i].strip() - colHeadings[i] = colHeadings[i].replace(' ', '_') - colHeadings[i] = colHeadings[i].replace('-', '_') - colHeadings[i] = colHeadings[i].replace('.', '_') + """ + for i in range(len(colHeadings)): + # replace unallowed characters and strip extra white space + colHeadings[i] = colHeadings[i].strip() + colHeadings[i] = colHeadings[i].replace(' ', '_') + colHeadings[i] = colHeadings[i].replace('-', '_') + colHeadings[i] = colHeadings[i].replace('.', '_') - if len(colHeadings[i]) > maxColLabelLen: - # interbase (at least) has a limit on the maximum length of a column name - newHead = colHeadings[i].replace('_', '') - newHead = newHead[:maxColLabelLen] - print('\tHeading %s too long, changed to %s' % (colHeadings[i], newHead)) - colHeadings[i] = newHead - return colHeadings + if len(colHeadings[i]) > maxColLabelLen: + # interbase (at least) has a limit on the maximum length of a column name + newHead = colHeadings[i].replace('_', '') + newHead = newHead[:maxColLabelLen] + print('\tHeading %s too long, changed to %s' % (colHeadings[i], newHead)) + colHeadings[i] = newHead + return colHeadings def GetTypeStrings(colHeadings, colTypes, keyCol=None): - """ returns a list of SQL type strings - """ - typeStrs = [] - for i in xrange(len(colTypes)): - typ = colTypes[i] - if typ[0] == float: - typeStrs.append('%s double precision' % colHeadings[i]) - elif typ[0] == int: - typeStrs.append('%s integer' % colHeadings[i]) - else: - typeStrs.append('%s varchar(%d)' % (colHeadings[i], typ[1])) - if colHeadings[i] == keyCol: - typeStrs[-1] = '%s not null primary key' % (typeStrs[-1]) - return typeStrs + """ returns a list of SQL type strings + """ + typeStrs = [] + for i in range(len(colTypes)): + typ = colTypes[i] + if typ[0] == float: + typeStrs.append('%s double precision' % colHeadings[i]) + elif typ[0] == int: + typeStrs.append('%s integer' % colHeadings[i]) + else: + typeStrs.append('%s varchar(%d)' % (colHeadings[i], typ[1])) + if colHeadings[i] == keyCol: + typeStrs[-1] = '%s not null primary key' % (typeStrs[-1]) + return typeStrs def _insertBlock(conn, sqlStr, block, silent=False): - try: - conn.cursor().executemany(sqlStr, block) - except Exception: - res = 0 - conn.commit() - for row in block: - try: - conn.cursor().execute(sqlStr, tuple(row)) - res += 1 - except Exception: - if not silent: - import traceback - traceback.print_exc() - print('insert failed:', sqlStr) - print('\t', repr(row)) - else: + try: + conn.cursor().executemany(sqlStr, block) + except Exception: + res = 0 conn.commit() - else: - res = len(block) - return res + for row in block: + try: + conn.cursor().execute(sqlStr, tuple(row)) + res += 1 + except Exception: + if not silent: + import traceback + traceback.print_exc() + print('insert failed:', sqlStr) + print('\t', repr(row)) + else: + conn.commit() + else: + res = len(block) + return res def _AddDataToDb(dBase, table, user, password, colDefs, colTypes, data, nullMarker=None, blockSize=100, cn=None): - """ *For Internal Use* + """ *For Internal Use* - (drops and) creates a table and then inserts the values + (drops and) creates a table and then inserts the values - """ - if not cn: - cn = DbModule.connect(dBase, user, password) - c = cn.cursor() - try: - c.execute('drop table %s' % (table)) - except Exception: - print('cannot drop table %s' % (table)) - try: - sqlStr = 'create table %s (%s)' % (table, colDefs) - c.execute(sqlStr) - except Exception: - print('create table failed: ', sqlStr) - print('here is the exception:') - import traceback - traceback.print_exc() - return - cn.commit() - c = None - - block = [] - entryTxt = [DbModule.placeHolder] * len(data[0]) - dStr = ','.join(entryTxt) - sqlStr = 'insert into %s values (%s)' % (table, dStr) - nDone = 0 - for row in data: - entries = [None] * len(row) - for col in xrange(len(row)): - if row[col] is not None and \ - (nullMarker is None or row[col] != nullMarker): - if colTypes[col][0] == float: - entries[col] = float(row[col]) - elif colTypes[col][0] == int: - entries[col] = int(row[col]) - else: - entries[col] = str(row[col]) - else: - entries[col] = None - block.append(tuple(entries)) - if len(block) >= blockSize: - nDone += _insertBlock(cn, sqlStr, block) - if not hasattr(cn, 'autocommit') or not cn.autocommit: - cn.commit() - block = [] - if len(block): - nDone += _insertBlock(cn, sqlStr, block) - if not hasattr(cn, 'autocommit') or not cn.autocommit: + """ + if not cn: + cn = DbModule.connect(dBase, user, password) + c = cn.cursor() + try: + c.execute('drop table %s' % (table)) + except Exception: + print('cannot drop table %s' % (table)) + try: + sqlStr = 'create table %s (%s)' % (table, colDefs) + c.execute(sqlStr) + except Exception: + print('create table failed: ', sqlStr) + print('here is the exception:') + import traceback + traceback.print_exc() + return cn.commit() + c = None + + block = [] + entryTxt = [DbModule.placeHolder] * len(data[0]) + dStr = ','.join(entryTxt) + sqlStr = 'insert into %s values (%s)' % (table, dStr) + nDone = 0 + for row in data: + entries = [None] * len(row) + for col in range(len(row)): + if row[col] is not None and \ + (nullMarker is None or row[col] != nullMarker): + if colTypes[col][0] == float: + entries[col] = float(row[col]) + elif colTypes[col][0] == int: + entries[col] = int(row[col]) + else: + entries[col] = str(row[col]) + else: + entries[col] = None + block.append(tuple(entries)) + if len(block) >= blockSize: + nDone += _insertBlock(cn, sqlStr, block) + if not hasattr(cn, 'autocommit') or not cn.autocommit: + cn.commit() + block = [] + if len(block): + nDone += _insertBlock(cn, sqlStr, block) + if not hasattr(cn, 'autocommit') or not cn.autocommit: + cn.commit() def TextFileToDatabase(dBase, table, inF, delim=',', user='sysdba', password='masterkey', maxColLabelLen=31, keyCol=None, nullMarker=None): - """loads the contents of the text file into a database. + """loads the contents of the text file into a database. - **Arguments** + **Arguments** - - dBase: the name of the DB to use + - dBase: the name of the DB to use - - table: the name of the table to create/overwrite + - table: the name of the table to create/overwrite - - inF: the file like object from which the data should - be pulled (must support readline()) + - inF: the file like object from which the data should + be pulled (must support readline()) - - delim: the delimiter used to separate fields + - delim: the delimiter used to separate fields - - user: the user name to use in connecting to the DB + - user: the user name to use in connecting to the DB - - password: the password to use in connecting to the DB + - password: the password to use in connecting to the DB - - maxColLabelLen: the maximum length a column label should be - allowed to have (truncation otherwise) + - maxColLabelLen: the maximum length a column label should be + allowed to have (truncation otherwise) - - keyCol: the column to be used as an index for the db + - keyCol: the column to be used as an index for the db - **Notes** + **Notes** - - if _table_ already exists, it is destroyed before we write - the new data + - if _table_ already exists, it is destroyed before we write + the new data - - we assume that the first row of the file contains the column names + - we assume that the first row of the file contains the column names - """ - table.replace('-', '_') - table.replace(' ', '_') + """ + table.replace('-', '_') + table.replace(' ', '_') - colHeadings = inF.readline().split(delim) - _AdjustColHeadings(colHeadings, maxColLabelLen) - nCols = len(colHeadings) - data = [] - inL = inF.readline() - while inL: - inL = inL.replace('\r', '') - inL = inL.replace('\n', '') - splitL = inL.split(delim) - if len(splitL) != nCols: - print('>>>', repr(inL)) - assert len(splitL) == nCols, 'unequal length' - tmpVect = [] - for entry in splitL: - try: - val = int(entry) - except ValueError: - try: - val = float(entry) - except ValueError: - val = entry - tmpVect.append(val) - data.append(tmpVect) + colHeadings = inF.readline().split(delim) + _AdjustColHeadings(colHeadings, maxColLabelLen) + nCols = len(colHeadings) + data = [] inL = inF.readline() - nRows = len(data) + while inL: + inL = inL.replace('\r', '') + inL = inL.replace('\n', '') + splitL = inL.split(delim) + if len(splitL) != nCols: + print('>>>', repr(inL)) + assert len(splitL) == nCols, 'unequal length' + tmpVect = [] + for entry in splitL: + try: + val = int(entry) + except ValueError: + try: + val = float(entry) + except ValueError: + val = entry + tmpVect.append(val) + data.append(tmpVect) + inL = inF.readline() + nRows = len(data) - # determine the types of each column - colTypes = TypeFinder(data, nRows, nCols, nullMarker=nullMarker) - typeStrs = GetTypeStrings(colHeadings, colTypes, keyCol=keyCol) - colDefs = ','.join(typeStrs) + # determine the types of each column + colTypes = TypeFinder(data, nRows, nCols, nullMarker=nullMarker) + typeStrs = GetTypeStrings(colHeadings, colTypes, keyCol=keyCol) + colDefs = ','.join(typeStrs) - _AddDataToDb(dBase, table, user, password, colDefs, colTypes, data, nullMarker=nullMarker) + _AddDataToDb(dBase, table, user, password, colDefs, colTypes, data, nullMarker=nullMarker) def DatabaseToDatabase(fromDb, fromTbl, toDb, toTbl, fields='*', join='', where='', user='sysdba', password='masterkey', keyCol=None, nullMarker='None'): - """ + """ - FIX: at the moment this is a hack + FIX: at the moment this is a hack - """ - sio = StringIO() - sio.write( - DatabaseToText(fromDb, fromTbl, fields=fields, join=join, where=where, user=user, - password=password)) - sio.seek(0) - TextFileToDatabase(toDb, toTbl, sio, user=user, password=password, keyCol=keyCol, - nullMarker=nullMarker) + """ + sio = StringIO() + sio.write( + DatabaseToText(fromDb, fromTbl, fields=fields, join=join, where=where, user=user, + password=password)) + sio.seek(0) + TextFileToDatabase(toDb, toTbl, sio, user=user, password=password, keyCol=keyCol, + nullMarker=nullMarker) if __name__ == '__main__': # pragma: nocover - sio = StringIO() - sio.write('foo,bar,baz\n') - sio.write('1,2,3\n') - sio.write('1.1,4,5\n') - sio.write('4,foo,6\n') - sio.seek(0) - from rdkit import RDConfig - import os - dirLoc = os.path.join(RDConfig.RDCodeDir, 'Dbase', 'TEST.GDB') + sio = StringIO() + sio.write('foo,bar,baz\n') + sio.write('1,2,3\n') + sio.write('1.1,4,5\n') + sio.write('4,foo,6\n') + sio.seek(0) + from rdkit import RDConfig + import os + dirLoc = os.path.join(RDConfig.RDCodeDir, 'Dbase', 'TEST.GDB') - TextFileToDatabase(dirLoc, 'fromtext', sio) + TextFileToDatabase(dirLoc, 'fromtext', sio) diff --git a/rdkit/ML/AnalyzeComposite.py b/rdkit/ML/AnalyzeComposite.py index 08c06bee4..c932a633e 100755 --- a/rdkit/ML/AnalyzeComposite.py +++ b/rdkit/ML/AnalyzeComposite.py @@ -26,7 +26,7 @@ Usage: AnalyzeComposite [optional args] -v: be verbose whilst screening """ -from __future__ import print_function + import sys @@ -36,7 +36,7 @@ from rdkit.Dbase.DbConnection import DbConnect from rdkit.ML import ScreenComposite from rdkit.ML.Data import Stats from rdkit.ML.DecTree import TreeUtils, Tree -from rdkit.six.moves import cPickle +import pickle __VERSION_STRING = "2.2.0" @@ -303,7 +303,7 @@ if __name__ == "__main__": composites = [] if db is None: for arg in extras: - composite = cPickle.load(open(arg, 'rb')) + composite = pickle.load(open(arg, 'rb')) composites.append(composite) else: tbl = extras[0] @@ -317,7 +317,7 @@ if __name__ == "__main__": composites = [] for pkl in pkls: pkl = str(pkl[0]) - comp = cPickle.loads(pkl) + comp = pickle.loads(pkl) composites.append(comp) if len(composites): diff --git a/rdkit/ML/BuildComposite.py b/rdkit/ML/BuildComposite.py index e3720bd3c..682428a8c 100755 --- a/rdkit/ML/BuildComposite.py +++ b/rdkit/ML/BuildComposite.py @@ -198,7 +198,7 @@ a QDAT file. """ -from __future__ import print_function + import sys import time @@ -212,7 +212,7 @@ from rdkit.ML import ScreenComposite from rdkit.ML.Composite import Composite, BayesComposite from rdkit.ML.Data import DataUtils, SplitData from rdkit.utils import listutils -from rdkit.six.moves import cPickle +import pickle # # from ML.SVM import SVMClassificationModel as SVM _runDetails = CompositeRun.CompositeRun() @@ -463,8 +463,8 @@ def RunOnData(details, data, progressCallback=None, saveIt=1, setDescNames=0): if details.pickleDataFileName != '': pickleDataFile = open(details.pickleDataFileName, 'wb+') - cPickle.dump(trainExamples, pickleDataFile) - cPickle.dump(testExamples, pickleDataFile) + pickle.dump(trainExamples, pickleDataFile) + pickle.dump(testExamples, pickleDataFile) pickleDataFile.close() if details.bayesModel: @@ -636,7 +636,7 @@ def RunOnData(details, data, progressCallback=None, saveIt=1, setDescNames=0): composite.ClearModelExamples() if saveIt: composite.Pickle(details.outName) - details.model = DbModule.binaryHolder(cPickle.dumps(composite)) + details.model = DbModule.binaryHolder(pickle.dumps(composite)) badExamples = [] if not details.detailedRes and (not hasattr(details, 'noScreen') or not details.noScreen): diff --git a/rdkit/ML/Cluster/Clusters.py b/rdkit/ML/Cluster/Clusters.py index 463b5fb05..7c78313de 100755 --- a/rdkit/ML/Cluster/Clusters.py +++ b/rdkit/ML/Cluster/Clusters.py @@ -11,303 +11,305 @@ """ contains the Cluster class for representing hierarchical cluster trees """ -from __future__ import print_function -from rdkit.six import cmp + +def cmp(t1, t2): + return (t1 < t2) * -1 or (t1 > t2) * 1 + CMPTOL = 1e-6 class Cluster(object): - """a class for storing clusters/data + """a class for storing clusters/data - **General Remarks** + **General Remarks** - - It is assumed that the bottom of any cluster hierarchy tree is composed of - the individual data points which were clustered. + - It is assumed that the bottom of any cluster hierarchy tree is composed of + the individual data points which were clustered. - - Clusters objects store the following pieces of data, most are - accessible via standard Setters/Getters: + - Clusters objects store the following pieces of data, most are + accessible via standard Setters/Getters: - - Children: *Not Settable*, the list of children. You can add children - with the _AddChild()_ and _AddChildren()_ methods. + - Children: *Not Settable*, the list of children. You can add children + with the _AddChild()_ and _AddChildren()_ methods. - **Note** this can be of arbitrary length, - but the current algorithms I have only produce trees with two children - per cluster + **Note** this can be of arbitrary length, + but the current algorithms I have only produce trees with two children + per cluster - - Metric: the metric for this cluster (i.e. how far apart its children are) + - Metric: the metric for this cluster (i.e. how far apart its children are) - - Index: the order in which this cluster was generated + - Index: the order in which this cluster was generated - - Points: *Not Settable*, the list of original points in this cluster - (calculated recursively from the children) + - Points: *Not Settable*, the list of original points in this cluster + (calculated recursively from the children) - - PointsPositions: *Not Settable*, the list of positions of the original - points in this cluster (calculated recursively from the children) + - PointsPositions: *Not Settable*, the list of positions of the original + points in this cluster (calculated recursively from the children) - - Position: the location of the cluster **Note** for a cluster this - probably means the location of the average of all the Points which are - its children. + - Position: the location of the cluster **Note** for a cluster this + probably means the location of the average of all the Points which are + its children. - - Data: a data field. This is used with the original points to store their - data value (i.e. the value we're using to classify) + - Data: a data field. This is used with the original points to store their + data value (i.e. the value we're using to classify) - - Name: the name of this cluster - - """ - - def __init__(self, metric=0.0, children=None, position=None, index=-1, name=None, data=None): - """Constructor - - **Arguments** - - see the class documentation for the meanings of these arguments - - *my wrists are tired* + - Name: the name of this cluster """ - if children is None: - children = [] - if position is None: - position = [] - self.metric = metric - self.children = children - self._UpdateLength() - self.pos = position - self.index = index - self.name = name - self._points = None - self._pointsPositions = None - self.data = data - def SetMetric(self, metric): - self.metric = metric + def __init__(self, metric=0.0, children=None, position=None, index=-1, name=None, data=None): + """Constructor - def GetMetric(self): - return self.metric + **Arguments** - def SetIndex(self, index): - self.index = index + see the class documentation for the meanings of these arguments - def GetIndex(self): - return self.index + *my wrists are tired* - def SetPosition(self, pos): - self.pos = pos + """ + if children is None: + children = [] + if position is None: + position = [] + self.metric = metric + self.children = children + self._UpdateLength() + self.pos = position + self.index = index + self.name = name + self._points = None + self._pointsPositions = None + self.data = data - def GetPosition(self): - return self.pos + def SetMetric(self, metric): + self.metric = metric - def GetPointsPositions(self): - if self._pointsPositions is not None: - return self._pointsPositions - else: - self._GenPoints() - return self._pointsPositions + def GetMetric(self): + return self.metric - def GetPoints(self): - if self._points is not None: - return self._points - else: - self._GenPoints() - return self._points + def SetIndex(self, index): + self.index = index - def FindSubtree(self, index): - """ finds and returns the subtree with a particular index - """ - res = None - if index == self.index: - res = self - else: - for child in self.children: - res = child.FindSubtree(index) - if res: - break - return res + def GetIndex(self): + return self.index - def _GenPoints(self): - """ Generates the _Points_ and _PointsPositions_ lists + def SetPosition(self, pos): + self.pos = pos - *intended for internal use* + def GetPosition(self): + return self.pos - """ - if len(self) == 1: - self._points = [self] - self._pointsPositions = [self.GetPosition()] - return self._points - else: - res = [] - children = self.GetChildren() - children.sort(key=lambda x: len(x), reverse=True) - for child in children: - res += child.GetPoints() - self._points = res - self._pointsPositions = [x.GetPosition() for x in res] + def GetPointsPositions(self): + if self._pointsPositions is not None: + return self._pointsPositions + else: + self._GenPoints() + return self._pointsPositions - def AddChild(self, child): - """Adds a child to our list + def GetPoints(self): + if self._points is not None: + return self._points + else: + self._GenPoints() + return self._points - **Arguments** + def FindSubtree(self, index): + """ finds and returns the subtree with a particular index + """ + res = None + if index == self.index: + res = self + else: + for child in self.children: + res = child.FindSubtree(index) + if res: + break + return res - - child: a Cluster + def _GenPoints(self): + """ Generates the _Points_ and _PointsPositions_ lists - """ - self.children.append(child) - self._GenPoints() - self._UpdateLength() + *intended for internal use* - def AddChildren(self, children): - """Adds a bunch of children to our list + """ + if len(self) == 1: + self._points = [self] + self._pointsPositions = [self.GetPosition()] + return self._points + else: + res = [] + children = self.GetChildren() + children.sort(key=lambda x: len(x), reverse=True) + for child in children: + res += child.GetPoints() + self._points = res + self._pointsPositions = [x.GetPosition() for x in res] - **Arguments** + def AddChild(self, child): + """Adds a child to our list - - children: a list of Clusters + **Arguments** - """ - self.children += children - self._GenPoints() - self._UpdateLength() + - child: a Cluster - def RemoveChild(self, child): - """Removes a child from our list + """ + self.children.append(child) + self._GenPoints() + self._UpdateLength() - **Arguments** + def AddChildren(self, children): + """Adds a bunch of children to our list - - child: a Cluster + **Arguments** - """ - self.children.remove(child) - self._UpdateLength() + - children: a list of Clusters - def GetChildren(self): - self.children.sort(key=lambda x: x.GetMetric()) - return self.children + """ + self.children += children + self._GenPoints() + self._UpdateLength() - def SetData(self, data): - self.data = data + def RemoveChild(self, child): + """Removes a child from our list - def GetData(self): - return self.data + **Arguments** - def SetName(self, name): - self.name = name + - child: a Cluster - def GetName(self): - if self.name is None: - return 'Cluster(%d)' % (self.GetIndex()) - else: - return self.name + """ + self.children.remove(child) + self._UpdateLength() - def Print(self, level=0, showData=0, offset='\t'): - if not showData or self.GetData() is None: - print('%s%s%s Metric: %f' % (' ' * level, self.GetName(), offset, self.GetMetric())) - else: - print('%s%s%s Data: %f\t Metric: %f' % - (' ' * level, self.GetName(), offset, self.GetData(), self.GetMetric())) + def GetChildren(self): + self.children.sort(key=lambda x: x.GetMetric()) + return self.children - for child in self.GetChildren(): - child.Print(level=level + 1, showData=showData, offset=offset) + def SetData(self, data): + self.data = data - def Compare(self, other, ignoreExtras=1): - """ not as choosy as self==other + def GetData(self): + return self.data - """ - tv1, tv2 = str(type(self)), str(type(other)) - tv = cmp(tv1, tv2) - if tv: - return tv - tv1, tv2 = len(self), len(other) - tv = cmp(tv1, tv2) - if tv: - return tv + def SetName(self, name): + self.name = name - if not ignoreExtras: - m1, m2 = self.GetMetric(), other.GetMetric() - if abs(m1 - m2) > CMPTOL: - return cmp(m1, m2) + def GetName(self): + if self.name is None: + return 'Cluster(%d)' % (self.GetIndex()) + else: + return self.name - if cmp(self.GetName(), other.GetName()): - return cmp(self.GetName(), other.GetName()) + def Print(self, level=0, showData=0, offset='\t'): + if not showData or self.GetData() is None: + print('%s%s%s Metric: %f' % (' ' * level, self.GetName(), offset, self.GetMetric())) + else: + print('%s%s%s Data: %f\t Metric: %f' % + (' ' * level, self.GetName(), offset, self.GetData(), self.GetMetric())) - sP = self.GetPosition() - oP = other.GetPosition() - try: - r = cmp(len(sP), len(oP)) - except Exception: - pass - else: - if r: - return r + for child in self.GetChildren(): + child.Print(level=level + 1, showData=showData, offset=offset) - try: - r = cmp(sP, oP) - except Exception: - r = sum(sP - oP) - if r: - return r + def Compare(self, other, ignoreExtras=1): + """ not as choosy as self==other - c1, c2 = self.GetChildren(), other.GetChildren() - if cmp(len(c1), len(c2)): - return cmp(len(c1), len(c2)) - for i in range(len(c1)): - t = c1[i].Compare(c2[i], ignoreExtras=ignoreExtras) - if t: - return t + """ + tv1, tv2 = str(type(self)), str(type(other)) + tv = cmp(tv1, tv2) + if tv: + return tv + tv1, tv2 = len(self), len(other) + tv = cmp(tv1, tv2) + if tv: + return tv - return 0 + if not ignoreExtras: + m1, m2 = self.GetMetric(), other.GetMetric() + if abs(m1 - m2) > CMPTOL: + return cmp(m1, m2) - def _UpdateLength(self): - """ updates our length + if cmp(self.GetName(), other.GetName()): + return cmp(self.GetName(), other.GetName()) - *intended for internal use* + sP = self.GetPosition() + oP = other.GetPosition() + try: + r = cmp(len(sP), len(oP)) + except Exception: + pass + else: + if r: + return r - """ - self._len = sum(len(c) for c in self.children) + 1 + try: + r = cmp(sP, oP) + except Exception: + r = sum(sP - oP) + if r: + return r - def IsTerminal(self): - return self._len <= 1 + c1, c2 = self.GetChildren(), other.GetChildren() + if cmp(len(c1), len(c2)): + return cmp(len(c1), len(c2)) + for i in range(len(c1)): + t = c1[i].Compare(c2[i], ignoreExtras=ignoreExtras) + if t: + return t - def __len__(self): - """ allows _len(cluster)_ to work + return 0 - """ - return self._len + def _UpdateLength(self): + """ updates our length - def __cmp__(self, other): - """ allows _cluster1 == cluster2_ to work + *intended for internal use* - """ - if cmp(type(self), type(other)): - return cmp(type(self), type(other)) + """ + self._len = sum(len(c) for c in self.children) + 1 - m1, m2 = self.GetMetric(), other.GetMetric() - if abs(m1 - m2) > CMPTOL: - return cmp(m1, m2) + def IsTerminal(self): + return self._len <= 1 - if cmp(self.GetName(), other.GetName()): - return cmp(self.GetName(), other.GetName()) + def __len__(self): + """ allows _len(cluster)_ to work - c1, c2 = self.GetChildren(), other.GetChildren() - return cmp(c1, c2) + """ + return self._len + + def __cmp__(self, other): + """ allows _cluster1 == cluster2_ to work + + """ + if cmp(type(self), type(other)): + return cmp(type(self), type(other)) + + m1, m2 = self.GetMetric(), other.GetMetric() + if abs(m1 - m2) > CMPTOL: + return cmp(m1, m2) + + if cmp(self.GetName(), other.GetName()): + return cmp(self.GetName(), other.GetName()) + + c1, c2 = self.GetChildren(), other.GetChildren() + return cmp(c1, c2) if __name__ == '__main__': # pragma: nocover - from rdkit.ML.Cluster import ClusterUtils - root = Cluster(index=1, metric=1000) - c1 = Cluster(index=10, metric=100) - c1.AddChild(Cluster(index=30, metric=10)) - c1.AddChild(Cluster(index=31, metric=10)) - c1.AddChild(Cluster(index=32, metric=10)) + from rdkit.ML.Cluster import ClusterUtils + root = Cluster(index=1, metric=1000) + c1 = Cluster(index=10, metric=100) + c1.AddChild(Cluster(index=30, metric=10)) + c1.AddChild(Cluster(index=31, metric=10)) + c1.AddChild(Cluster(index=32, metric=10)) - c2 = Cluster(index=11, metric=100) - c2.AddChild(Cluster(index=40, metric=10)) - c2.AddChild(Cluster(index=41, metric=10)) + c2 = Cluster(index=11, metric=100) + c2.AddChild(Cluster(index=40, metric=10)) + c2.AddChild(Cluster(index=41, metric=10)) - root.AddChild(c1) - root.AddChild(c2) + root.AddChild(c1) + root.AddChild(c2) - nodes = ClusterUtils.GetNodeList(root) + nodes = ClusterUtils.GetNodeList(root) - indices = [x.GetIndex() for x in nodes] - print('XXX:', indices) + indices = [x.GetIndex() for x in nodes] + print('XXX:', indices) diff --git a/rdkit/ML/Cluster/Resemblance.py b/rdkit/ML/Cluster/Resemblance.py index be374f477..690386892 100755 --- a/rdkit/ML/Cluster/Resemblance.py +++ b/rdkit/ML/Cluster/Resemblance.py @@ -25,7 +25,7 @@ '(col*(col-1))/2 + row' """ -from __future__ import print_function + import numpy diff --git a/rdkit/ML/Cluster/UnitTestCluster.py b/rdkit/ML/Cluster/UnitTestCluster.py index 0a516f63c..df109bd85 100755 --- a/rdkit/ML/Cluster/UnitTestCluster.py +++ b/rdkit/ML/Cluster/UnitTestCluster.py @@ -19,156 +19,157 @@ import numpy from rdkit.ML.Cluster import ClusterUtils from rdkit.ML.Cluster import Clusters from rdkit.TestRunner import redirect_stdout -from rdkit.six import StringIO +from io import StringIO try: - from rdkit.ML.Cluster import Murtagh + from rdkit.ML.Cluster import Murtagh except ImportError: - Murtagh = None + Murtagh = None class TestCase(unittest.TestCase): - def setUp(self): - # this is the data set used by Romesburg in "Cluster Analysis for Researchers" - # to demonstrate the different clustering methods - # print '\n%s: '%self.shortDescription(), - self.d = numpy.array([[10., 5.], [20., 20.], [30., 10.], [30., 15.], [5., 10.]]) - self.names = ['p1', 'p2', 'p3', 'p4', 'p5'] + def setUp(self): + # this is the data set used by Romesburg in "Cluster Analysis for Researchers" + # to demonstrate the different clustering methods + # print '\n%s: '%self.shortDescription(), + self.d = numpy.array([[10., 5.], [20., 20.], [30., 10.], [30., 15.], [5., 10.]]) + self.names = ['p1', 'p2', 'p3', 'p4', 'p5'] - def testDivide(self): - " tests the cluster division algorithms " - ca = Clusters.Cluster(index=1) - cb = Clusters.Cluster(index=2) - cc = Clusters.Cluster(index=3) - cd = Clusters.Cluster(index=4) - ce = Clusters.Cluster(index=5) - cf = Clusters.Cluster(index=6) + def testDivide(self): + " tests the cluster division algorithms " + ca = Clusters.Cluster(index=1) + cb = Clusters.Cluster(index=2) + cc = Clusters.Cluster(index=3) + cd = Clusters.Cluster(index=4) + ce = Clusters.Cluster(index=5) + cf = Clusters.Cluster(index=6) - c1 = Clusters.Cluster(metric=10, children=[ca, cb], index=7) - c2 = Clusters.Cluster(metric=15, children=[cc, cd], index=8) - c3 = Clusters.Cluster(metric=20, children=[ce, cf], index=9) - c4 = Clusters.Cluster(metric=25, children=[c2, c3], index=10) - c5 = Clusters.Cluster(metric=30, children=[c4, c1], index=11) + c1 = Clusters.Cluster(metric=10, children=[ca, cb], index=7) + c2 = Clusters.Cluster(metric=15, children=[cc, cd], index=8) + c3 = Clusters.Cluster(metric=20, children=[ce, cf], index=9) + c4 = Clusters.Cluster(metric=25, children=[c2, c3], index=10) + c5 = Clusters.Cluster(metric=30, children=[c4, c1], index=11) - cs = ClusterUtils.SplitIntoNClusters(c5, 4, breadthFirst=True) - assert len(cs) == 4, 'bad split length' - indices = [x.GetIndex() for x in cs] - for index in [9, 8, 1, 2]: - assert index in indices, 'index %d not found in %s' % (index, str(indices)) - # we may not want to preserve order, but test it for now - assert indices == [9, 8, 1, 2], 'bad index order' + cs = ClusterUtils.SplitIntoNClusters(c5, 4, breadthFirst=True) + assert len(cs) == 4, 'bad split length' + indices = [x.GetIndex() for x in cs] + for index in [9, 8, 1, 2]: + assert index in indices, 'index %d not found in %s' % (index, str(indices)) + # we may not want to preserve order, but test it for now + assert indices == [9, 8, 1, 2], 'bad index order' - cs2 = ClusterUtils.SplitIntoNClusters(c5, 4, breadthFirst=False) - indices = [x.GetIndex() for x in cs2] - for index in [8, 7, 5, 6]: - assert index in indices, 'index %d not found in %s' % (index, str(indices)) - # we may not want to preserve order, but test it for now - assert indices == [8, 7, 5, 6], 'bad index order' + cs2 = ClusterUtils.SplitIntoNClusters(c5, 4, breadthFirst=False) + indices = [x.GetIndex() for x in cs2] + for index in [8, 7, 5, 6]: + assert index in indices, 'index %d not found in %s' % (index, str(indices)) + # we may not want to preserve order, but test it for now + assert indices == [8, 7, 5, 6], 'bad index order' - # Exceptions and edge cases - self.assertRaises(ValueError, ClusterUtils.SplitIntoNClusters, c5, len(c5) + 1) - self.assertEqual(ClusterUtils.SplitIntoNClusters(c5, len(c5)), c5.GetPoints()) - self.assertEqual(ClusterUtils.SplitIntoNClusters(c5, 0), [c5]) + # Exceptions and edge cases + self.assertRaises(ValueError, ClusterUtils.SplitIntoNClusters, c5, len(c5) + 1) + self.assertEqual(ClusterUtils.SplitIntoNClusters(c5, len(c5)), c5.GetPoints()) + self.assertEqual(ClusterUtils.SplitIntoNClusters(c5, 0), [c5]) - for n in range(len(c5)): - if n >= 7: # Code fails for n = 7 and above - self.assertRaises(AssertionError, ClusterUtils.SplitIntoNClusters, c5, n, breadthFirst=True) - else: - ClusterUtils.SplitIntoNClusters(c5, n, breadthFirst=True) + for n in range(len(c5)): + if n >= 7: # Code fails for n = 7 and above + self.assertRaises(AssertionError, ClusterUtils.SplitIntoNClusters, + c5, n, breadthFirst=True) + else: + ClusterUtils.SplitIntoNClusters(c5, n, breadthFirst=True) - self.assertRaises(ValueError, ClusterUtils.SplitIntoNClusters, c5, len(c5) + 1, - breadthFirst=False) - self.assertEqual( - ClusterUtils.SplitIntoNClusters(c5, len(c5), breadthFirst=False), c5.GetPoints()) - self.assertEqual(ClusterUtils.SplitIntoNClusters(c5, 0, breadthFirst=False), [c5]) - - for n in range(len(c5)): - if n >= 7: # Code fails for n = 7 and above - self.assertRaises(AssertionError, ClusterUtils.SplitIntoNClusters, c5, n, + self.assertRaises(ValueError, ClusterUtils.SplitIntoNClusters, c5, len(c5) + 1, breadthFirst=False) - else: - ClusterUtils.SplitIntoNClusters(c5, n, breadthFirst=False) + self.assertEqual( + ClusterUtils.SplitIntoNClusters(c5, len(c5), breadthFirst=False), c5.GetPoints()) + self.assertEqual(ClusterUtils.SplitIntoNClusters(c5, 0, breadthFirst=False), [c5]) - def testMurtaghUPGMA(self): - if Murtagh is None: - return - nPts = 5 - sz = 5 - dataP = numpy.random.random((nPts, sz)) - newClust = Murtagh.ClusterData(dataP, nPts, Murtagh.UPGMA)[0] - ds = [] - for i in range(nPts): - for j in range(i): - d = dataP[i] - dataP[j] - ds.append(sum(d * d)) - ds = numpy.array(ds) - newClust2 = Murtagh.ClusterData(ds, nPts, Murtagh.UPGMA, isDistData=1)[0] + for n in range(len(c5)): + if n >= 7: # Code fails for n = 7 and above + self.assertRaises(AssertionError, ClusterUtils.SplitIntoNClusters, c5, n, + breadthFirst=False) + else: + ClusterUtils.SplitIntoNClusters(c5, n, breadthFirst=False) - assert len(newClust) == len(newClust2), 'length mismatch2' + def testMurtaghUPGMA(self): + if Murtagh is None: + return + nPts = 5 + sz = 5 + dataP = numpy.random.random((nPts, sz)) + newClust = Murtagh.ClusterData(dataP, nPts, Murtagh.UPGMA)[0] + ds = [] + for i in range(nPts): + for j in range(i): + d = dataP[i] - dataP[j] + ds.append(sum(d * d)) + ds = numpy.array(ds) + newClust2 = Murtagh.ClusterData(ds, nPts, Murtagh.UPGMA, isDistData=1)[0] - assert not newClust.Compare(newClust2, ignoreExtras=0), 'equality failed3' + assert len(newClust) == len(newClust2), 'length mismatch2' - newClust2 = Murtagh.ClusterData(dataP, nPts, Murtagh.UPGMA, isDistData=0)[0] - assert len(newClust) == len(newClust2), 'length mismatch2' + assert not newClust.Compare(newClust2, ignoreExtras=0), 'equality failed3' - assert not newClust.Compare(newClust2, ignoreExtras=0), 'equality failed3' + newClust2 = Murtagh.ClusterData(dataP, nPts, Murtagh.UPGMA, isDistData=0)[0] + assert len(newClust) == len(newClust2), 'length mismatch2' - def test_Cluster(self): - """ tests the Cluster class functionality """ - root = Clusters.Cluster(index=1, position=1) - c1 = Clusters.Cluster(index=10, position=10) - c1.AddChild(Clusters.Cluster(index=30, position=30)) - c1.AddChild(Clusters.Cluster(index=31, position=31)) - t32 = Clusters.Cluster(index=32, position=32) - c1.AddChild(t32) + assert not newClust.Compare(newClust2, ignoreExtras=0), 'equality failed3' - c2 = Clusters.Cluster(index=11) - # c2.AddChild(Clusters.Cluster(index=40)) - # c2.AddChild(Clusters.Cluster(index=41)) - c2.AddChildren([Clusters.Cluster(index=40), Clusters.Cluster(index=41)]) + def test_Cluster(self): + """ tests the Cluster class functionality """ + root = Clusters.Cluster(index=1, position=1) + c1 = Clusters.Cluster(index=10, position=10) + c1.AddChild(Clusters.Cluster(index=30, position=30)) + c1.AddChild(Clusters.Cluster(index=31, position=31)) + t32 = Clusters.Cluster(index=32, position=32) + c1.AddChild(t32) - root.AddChild(c1) - root.AddChild(c2) - nodes = ClusterUtils.GetNodeList(root) + c2 = Clusters.Cluster(index=11) + # c2.AddChild(Clusters.Cluster(index=40)) + # c2.AddChild(Clusters.Cluster(index=41)) + c2.AddChildren([Clusters.Cluster(index=40), Clusters.Cluster(index=41)]) - indices = [x.GetIndex() for x in nodes] - assert indices == [30, 31, 32, 10, 40, 41, 11, 1], 'bad indices' - subtree = root.FindSubtree(11) - self.assertEqual([x.GetIndex() for x in ClusterUtils.GetNodeList(subtree)], [40, 41, 11]) + root.AddChild(c1) + root.AddChild(c2) + nodes = ClusterUtils.GetNodeList(root) - self.assertFalse(root.IsTerminal()) - self.assertTrue(t32.IsTerminal()) + indices = [x.GetIndex() for x in nodes] + assert indices == [30, 31, 32, 10, 40, 41, 11, 1], 'bad indices' + subtree = root.FindSubtree(11) + self.assertEqual([x.GetIndex() for x in ClusterUtils.GetNodeList(subtree)], [40, 41, 11]) - self.assertEqual(root.GetData(), None) - root.SetData(3.14) - self.assertEqual(root.GetData(), 3.14) + self.assertFalse(root.IsTerminal()) + self.assertTrue(t32.IsTerminal()) - self.assertEqual(root.GetMetric(), 0.0) - root.SetMetric(0.1) - self.assertEqual(root.GetMetric(), 0.1) + self.assertEqual(root.GetData(), None) + root.SetData(3.14) + self.assertEqual(root.GetData(), 3.14) - self.assertEqual(root.GetIndex(), 1) - root.SetIndex(100) - self.assertEqual(root.GetIndex(), 100) + self.assertEqual(root.GetMetric(), 0.0) + root.SetMetric(0.1) + self.assertEqual(root.GetMetric(), 0.1) - self.assertEqual(root.GetPointsPositions(), [30, 31, 32, []]) + self.assertEqual(root.GetIndex(), 1) + root.SetIndex(100) + self.assertEqual(root.GetIndex(), 100) - root.RemoveChild(c1) - self.assertEqual([x.GetIndex() for x in ClusterUtils.GetNodeList(root)], [40, 41, 11, 100]) + self.assertEqual(root.GetPointsPositions(), [30, 31, 32, []]) - self.assertEqual(root.GetName(), 'Cluster(100)') - root.SetName('abc') - self.assertEqual(root.GetName(), 'abc') + root.RemoveChild(c1) + self.assertEqual([x.GetIndex() for x in ClusterUtils.GetNodeList(root)], [40, 41, 11, 100]) - f = StringIO() - with redirect_stdout(f): - root.Print(showData=True) - self.assertIn('abc', f.getvalue()) - self.assertIn('Cluster(41)', f.getvalue()) - self.assertIn('Metric', f.getvalue()) + self.assertEqual(root.GetName(), 'Cluster(100)') + root.SetName('abc') + self.assertEqual(root.GetName(), 'abc') + + f = StringIO() + with redirect_stdout(f): + root.Print(showData=True) + self.assertIn('abc', f.getvalue()) + self.assertIn('Cluster(41)', f.getvalue()) + self.assertIn('Metric', f.getvalue()) if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/ML/Cluster/murtagh_test.py b/rdkit/ML/Cluster/murtagh_test.py index b0a3d62aa..00424671a 100755 --- a/rdkit/ML/Cluster/murtagh_test.py +++ b/rdkit/ML/Cluster/murtagh_test.py @@ -1,4 +1,4 @@ -from __future__ import print_function + import numpy diff --git a/rdkit/ML/Composite/AdjustComposite.py b/rdkit/ML/Composite/AdjustComposite.py index 9bd1ff155..3b8ce4107 100755 --- a/rdkit/ML/Composite/AdjustComposite.py +++ b/rdkit/ML/Composite/AdjustComposite.py @@ -6,7 +6,7 @@ """ functionality to allow adjusting composite model contents """ -from __future__ import print_function + import copy diff --git a/rdkit/ML/Composite/BayesComposite.py b/rdkit/ML/Composite/BayesComposite.py index fee71e942..c200536cc 100755 --- a/rdkit/ML/Composite/BayesComposite.py +++ b/rdkit/ML/Composite/BayesComposite.py @@ -22,7 +22,7 @@ Other compatibility notes: """ -from __future__ import print_function + import numpy diff --git a/rdkit/ML/Composite/Composite.py b/rdkit/ML/Composite/Composite.py index 828eefd79..6e22fa6a1 100755 --- a/rdkit/ML/Composite/Composite.py +++ b/rdkit/ML/Composite/Composite.py @@ -22,9 +22,9 @@ Other compatibility notes: """ -from __future__ import print_function + import numpy -from rdkit.six.moves import cPickle +import pickle from rdkit.ML.Data import DataUtils @@ -511,7 +511,7 @@ class Composite(object): self.ClearModelExamples() pFile = open(fileName, 'wb+') - cPickle.dump(self, pFile, 1) + pickle.dump(self, pFile, 1) pFile.close() def AddModel(self, model, error, needsQuantization=1): diff --git a/rdkit/ML/Composite/UnitTestCOMServer.py b/rdkit/ML/Composite/UnitTestCOMServer.py index cdf935c29..4c4a90669 100755 --- a/rdkit/ML/Composite/UnitTestCOMServer.py +++ b/rdkit/ML/Composite/UnitTestCOMServer.py @@ -3,7 +3,7 @@ # # unit testing code for the composite model COM server -from __future__ import print_function + from rdkit import RDConfig import unittest try: diff --git a/rdkit/ML/Composite/UnitTestComposite.py b/rdkit/ML/Composite/UnitTestComposite.py index b2e587680..82957d975 100644 --- a/rdkit/ML/Composite/UnitTestComposite.py +++ b/rdkit/ML/Composite/UnitTestComposite.py @@ -8,7 +8,7 @@ """ import unittest import io -from rdkit.six.moves import cPickle +import pickle from rdkit.ML.Composite import Composite from rdkit.ML.DecTree.DecTree import DecTreeNode as Node from rdkit import RDConfig @@ -21,7 +21,7 @@ class TestCase(unittest.TestCase): buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') pklTF.close() with io.BytesIO(buf) as pklF: - self.examples = cPickle.load(pklF) + self.examples = pickle.load(pklF) self.varNames = ['composition', 'max_atomic', 'has3d', 'has4d', 'has5d', 'elconc', 'atvol', 'isferro'] self.qBounds = [[], [1.89, 3.53], [], [], [], [0.55, 0.73], [11.81, 14.52], []] @@ -48,7 +48,7 @@ class TestCase(unittest.TestCase): buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') pklTF.close() with io.BytesIO(buf) as pklF: - self.refCompos = cPickle.load(pklF) + self.refCompos = pickle.load(pklF) composite = Composite.Composite() composite._varNames = self.varNames @@ -67,7 +67,7 @@ class TestCase(unittest.TestCase): self.assertEqual(composite.errList, sorted(composite.errList)) # with open(RDConfig.RDCodeDir+'/ML/Composite/test_data/composite_base.pkl','wb') as pklF: - # cPickle.dump(composite,pklF) + # pickle.dump(composite,pklF) self.treeComposite = composite self.assertEqual(len(composite), len(self.refCompos)) diff --git a/rdkit/ML/Data/DataUtils.py b/rdkit/ML/Data/DataUtils.py index 83ed4776f..506f5fb9d 100755 --- a/rdkit/ML/Data/DataUtils.py +++ b/rdkit/ML/Data/DataUtils.py @@ -49,7 +49,7 @@ 4) A python list of lists with the data points """ -from __future__ import print_function + import csv import random @@ -59,610 +59,609 @@ import numpy from rdkit.DataStructs import BitUtils from rdkit.ML.Data import MLData -from rdkit.six import integer_types -from rdkit.six.moves import cPickle +import pickle from rdkit.utils import fileutils def permutation(nToDo): - res = list(range(nToDo)) - random.shuffle(res, random=random.random) - return res + res = list(range(nToDo)) + random.shuffle(res, random=random.random) + return res def WriteData(outFile, varNames, qBounds, examples): - """ writes out a .qdat file + """ writes out a .qdat file - **Arguments** + **Arguments** - - outFile: a file object + - outFile: a file object - - varNames: a list of variable names + - varNames: a list of variable names - - qBounds: the list of quantization bounds (should be the same length - as _varNames_) + - qBounds: the list of quantization bounds (should be the same length + as _varNames_) - - examples: the data to be written + - examples: the data to be written - """ - outFile.write('# Quantized data from DataUtils\n') - outFile.write('# ----------\n') - outFile.write('# Variable Table\n') - for i in range(len(varNames)): - outFile.write('# %s %s\n' % (varNames[i], str(qBounds[i]))) - outFile.write('# ----------\n') - for example in examples: - outFile.write(' '.join([str(e) for e in example]) + '\n') + """ + outFile.write('# Quantized data from DataUtils\n') + outFile.write('# ----------\n') + outFile.write('# Variable Table\n') + for i in range(len(varNames)): + outFile.write('# %s %s\n' % (varNames[i], str(qBounds[i]))) + outFile.write('# ----------\n') + for example in examples: + outFile.write(' '.join([str(e) for e in example]) + '\n') def ReadVars(inFile): - """ reads the variables and quantization bounds from a .qdat or .dat file + """ reads the variables and quantization bounds from a .qdat or .dat file - **Arguments** + **Arguments** - - inFile: a file object + - inFile: a file object - **Returns** + **Returns** - a 2-tuple containing: + a 2-tuple containing: - 1) varNames: a list of the variable names + 1) varNames: a list of the variable names - 2) qbounds: the list of quantization bounds for each variable + 2) qbounds: the list of quantization bounds for each variable - """ - varNames = [] - qBounds = [] - fileutils.MoveToMatchingLine(inFile, 'Variable Table') - inLine = inFile.readline() - while inLine.find('# ----') == -1: - splitLine = inLine[2:].split('[') - varNames.append(splitLine[0].strip()) - qBounds.append(splitLine[1][:-2]) + """ + varNames = [] + qBounds = [] + fileutils.MoveToMatchingLine(inFile, 'Variable Table') inLine = inFile.readline() - for i in range(len(qBounds)): + while inLine.find('# ----') == -1: + splitLine = inLine[2:].split('[') + varNames.append(splitLine[0].strip()) + qBounds.append(splitLine[1][:-2]) + inLine = inFile.readline() + for i in range(len(qBounds)): - if qBounds[i] != '': - l = qBounds[i].split(',') - qBounds[i] = [] - for item in l: - qBounds[i].append(float(item)) - else: - qBounds[i] = [] - return varNames, qBounds + if qBounds[i] != '': + l = qBounds[i].split(',') + qBounds[i] = [] + for item in l: + qBounds[i].append(float(item)) + else: + qBounds[i] = [] + return varNames, qBounds def ReadQuantExamples(inFile): - """ reads the examples from a .qdat file + """ reads the examples from a .qdat file - **Arguments** + **Arguments** - - inFile: a file object + - inFile: a file object - **Returns** + **Returns** - a 2-tuple containing: + a 2-tuple containing: - 1) the names of the examples + 1) the names of the examples - 2) a list of lists containing the examples themselves + 2) a list of lists containing the examples themselves - **Note** + **Note** - because this is reading a .qdat file, it assumed that all variable values - are integers + because this is reading a .qdat file, it assumed that all variable values + are integers - """ - expr1 = re.compile(r'^#') - expr2 = re.compile(r'[\ ]+|[\t]+') - examples = [] - names = [] - inLine = inFile.readline() - while inLine: - if expr1.search(inLine) is None: - resArr = expr2.split(inLine) - if len(resArr) > 1: - examples.append([int(x) for x in resArr[1:]]) - names.append(resArr[0]) + """ + expr1 = re.compile(r'^#') + expr2 = re.compile(r'[\ ]+|[\t]+') + examples = [] + names = [] inLine = inFile.readline() - return names, examples + while inLine: + if expr1.search(inLine) is None: + resArr = expr2.split(inLine) + if len(resArr) > 1: + examples.append([int(x) for x in resArr[1:]]) + names.append(resArr[0]) + inLine = inFile.readline() + return names, examples def ReadGeneralExamples(inFile): - """ reads the examples from a .dat file + """ reads the examples from a .dat file - **Arguments** + **Arguments** - - inFile: a file object + - inFile: a file object - **Returns** + **Returns** - a 2-tuple containing: + a 2-tuple containing: - 1) the names of the examples + 1) the names of the examples - 2) a list of lists containing the examples themselves + 2) a list of lists containing the examples themselves - **Note** + **Note** - - this attempts to convert variable values to ints, then floats. - if those both fail, they are left as strings + - this attempts to convert variable values to ints, then floats. + if those both fail, they are left as strings - """ - expr1 = re.compile(r'^#') - expr2 = re.compile(r'[\ ]+|[\t]+') - examples = [] - names = [] - inLine = inFile.readline() - while inLine: - if expr1.search(inLine) is None: - resArr = expr2.split(inLine)[:-1] - if len(resArr) > 1: - for i in range(1, len(resArr)): - d = resArr[i] - try: - resArr[i] = int(d) - except ValueError: - try: - resArr[i] = float(d) - except ValueError: - pass - examples.append(resArr[1:]) - names.append(resArr[0]) + """ + expr1 = re.compile(r'^#') + expr2 = re.compile(r'[\ ]+|[\t]+') + examples = [] + names = [] inLine = inFile.readline() - return names, examples + while inLine: + if expr1.search(inLine) is None: + resArr = expr2.split(inLine)[:-1] + if len(resArr) > 1: + for i in range(1, len(resArr)): + d = resArr[i] + try: + resArr[i] = int(d) + except ValueError: + try: + resArr[i] = float(d) + except ValueError: + pass + examples.append(resArr[1:]) + names.append(resArr[0]) + inLine = inFile.readline() + return names, examples def BuildQuantDataSet(fileName): - """ builds a data set from a .qdat file + """ builds a data set from a .qdat file - **Arguments** + **Arguments** - - fileName: the name of the .qdat file + - fileName: the name of the .qdat file - **Returns** + **Returns** - an _MLData.MLQuantDataSet_ + an _MLData.MLQuantDataSet_ - """ - with open(fileName, 'r') as inFile: - varNames, qBounds = ReadVars(inFile) - ptNames, examples = ReadQuantExamples(inFile) - data = MLData.MLQuantDataSet(examples, qBounds=qBounds, varNames=varNames, ptNames=ptNames) - return data + """ + with open(fileName, 'r') as inFile: + varNames, qBounds = ReadVars(inFile) + ptNames, examples = ReadQuantExamples(inFile) + data = MLData.MLQuantDataSet(examples, qBounds=qBounds, varNames=varNames, ptNames=ptNames) + return data def BuildDataSet(fileName): - """ builds a data set from a .dat file + """ builds a data set from a .dat file - **Arguments** + **Arguments** - - fileName: the name of the .dat file + - fileName: the name of the .dat file - **Returns** + **Returns** - an _MLData.MLDataSet_ + an _MLData.MLDataSet_ - """ - with open(fileName, 'r') as inFile: - varNames, qBounds = ReadVars(inFile) - ptNames, examples = ReadGeneralExamples(inFile) - data = MLData.MLDataSet(examples, qBounds=qBounds, varNames=varNames, ptNames=ptNames) - return data + """ + with open(fileName, 'r') as inFile: + varNames, qBounds = ReadVars(inFile) + ptNames, examples = ReadGeneralExamples(inFile) + data = MLData.MLDataSet(examples, qBounds=qBounds, varNames=varNames, ptNames=ptNames) + return data def CalcNPossibleUsingMap(data, order, qBounds, nQBounds=None, silent=True): - """ calculates the number of possible values for each variable in a data set + """ calculates the number of possible values for each variable in a data set - **Arguments** + **Arguments** - - data: a list of examples + - data: a list of examples - - order: the ordering map between the variables in _data_ and _qBounds_ + - order: the ordering map between the variables in _data_ and _qBounds_ - - qBounds: the quantization bounds for the variables + - qBounds: the quantization bounds for the variables - **Returns** + **Returns** - a list with the number of possible values each variable takes on in the data set + a list with the number of possible values each variable takes on in the data set - **Notes** + **Notes** - - variables present in _qBounds_ will have their _nPossible_ number read - from _qbounds + - variables present in _qBounds_ will have their _nPossible_ number read + from _qbounds - - _nPossible_ for other numeric variables will be calculated + - _nPossible_ for other numeric variables will be calculated - """ - numericTypes = integer_types + (float, numpy.int64, numpy.int32, numpy.int16) + """ + numericTypes = (int, float, numpy.int64, numpy.int32, numpy.int16) - if not silent: - print('order:', order, len(order)) - print('qB:', qBounds) - # print('nQB:',nQBounds, len(nQBounds)) - assert (qBounds and len(order) == len(qBounds)) or (nQBounds and len(order) == len(nQBounds)), \ - 'order/qBounds mismatch' - nVars = len(order) - nPossible = [-1] * nVars - cols = list(range(nVars)) - for i in range(nVars): - if nQBounds and nQBounds[i] != 0: - nPossible[i] = -1 - cols.remove(i) - elif len(qBounds[i]) > 0: - nPossible[i] = len(qBounds[i]) - cols.remove(i) + if not silent: + print('order:', order, len(order)) + print('qB:', qBounds) + # print('nQB:',nQBounds, len(nQBounds)) + assert (qBounds and len(order) == len(qBounds)) or (nQBounds and len(order) == len(nQBounds)), \ + 'order/qBounds mismatch' + nVars = len(order) + nPossible = [-1] * nVars + cols = list(range(nVars)) + for i in range(nVars): + if nQBounds and nQBounds[i] != 0: + nPossible[i] = -1 + cols.remove(i) + elif len(qBounds[i]) > 0: + nPossible[i] = len(qBounds[i]) + cols.remove(i) - nPts = len(data) - for i in range(nPts): - for col in cols[:]: - d = data[i][order[col]] - if type(d) in numericTypes: - if int(d) == d: - nPossible[col] = max(int(d), nPossible[col]) - else: - nPossible[col] = -1 - cols.remove(col) - else: - if not silent: - print('bye bye col %d: %s' % (col, repr(d))) - nPossible[col] = -1 - cols.remove(col) - return [int(x) + 1 for x in nPossible] + nPts = len(data) + for i in range(nPts): + for col in cols[:]: + d = data[i][order[col]] + if type(d) in numericTypes: + if int(d) == d: + nPossible[col] = max(int(d), nPossible[col]) + else: + nPossible[col] = -1 + cols.remove(col) + else: + if not silent: + print('bye bye col %d: %s' % (col, repr(d))) + nPossible[col] = -1 + cols.remove(col) + return [int(x) + 1 for x in nPossible] def WritePickledData(outName, data): - """ writes either a .qdat.pkl or a .dat.pkl file + """ writes either a .qdat.pkl or a .dat.pkl file - **Arguments** + **Arguments** - - outName: the name of the file to be used + - outName: the name of the file to be used - - data: either an _MLData.MLDataSet_ or an _MLData.MLQuantDataSet_ + - data: either an _MLData.MLDataSet_ or an _MLData.MLQuantDataSet_ - """ - varNames = data.GetVarNames() - qBounds = data.GetQuantBounds() - ptNames = data.GetPtNames() - examples = data.GetAllData() - with open(outName, 'wb+') as outFile: - cPickle.dump(varNames, outFile) - cPickle.dump(qBounds, outFile) - cPickle.dump(ptNames, outFile) - cPickle.dump(examples, outFile) + """ + varNames = data.GetVarNames() + qBounds = data.GetQuantBounds() + ptNames = data.GetPtNames() + examples = data.GetAllData() + with open(outName, 'wb+') as outFile: + pickle.dump(varNames, outFile) + pickle.dump(qBounds, outFile) + pickle.dump(ptNames, outFile) + pickle.dump(examples, outFile) def TakeEnsemble(vect, ensembleIds, isDataVect=False): - """ + """ - >>> v = [10,20,30,40,50] - >>> TakeEnsemble(v,(1,2,3)) - [20, 30, 40] - >>> v = ['foo',10,20,30,40,50,1] - >>> TakeEnsemble(v,(1,2,3),isDataVect=True) - ['foo', 20, 30, 40, 1] + >>> v = [10,20,30,40,50] + >>> TakeEnsemble(v,(1,2,3)) + [20, 30, 40] + >>> v = ['foo',10,20,30,40,50,1] + >>> TakeEnsemble(v,(1,2,3),isDataVect=True) + ['foo', 20, 30, 40, 1] - """ - if isDataVect: - ensembleIds = [x + 1 for x in ensembleIds] - vect = [vect[0]] + [vect[x] for x in ensembleIds] + [vect[-1]] - else: - vect = [vect[x] for x in ensembleIds] - return vect + """ + if isDataVect: + ensembleIds = [x + 1 for x in ensembleIds] + vect = [vect[0]] + [vect[x] for x in ensembleIds] + [vect[-1]] + else: + vect = [vect[x] for x in ensembleIds] + return vect def DBToData(dbName, tableName, user='sysdba', password='masterkey', dupCol=-1, what='*', where='', join='', pickleCol=-1, pickleClass=None, ensembleIds=None): - """ constructs an _MLData.MLDataSet_ from a database + """ constructs an _MLData.MLDataSet_ from a database - **Arguments** + **Arguments** - - dbName: the name of the database to be opened + - dbName: the name of the database to be opened - - tableName: the table name containing the data in the database + - tableName: the table name containing the data in the database - - user: the user name to be used to connect to the database + - user: the user name to be used to connect to the database - - password: the password to be used to connect to the database + - password: the password to be used to connect to the database - - dupCol: if nonzero specifies which column should be used to recognize - duplicates. + - dupCol: if nonzero specifies which column should be used to recognize + duplicates. - **Returns** + **Returns** - an _MLData.MLDataSet_ + an _MLData.MLDataSet_ - **Notes** + **Notes** - - this uses Dbase.DataUtils functionality + - this uses Dbase.DataUtils functionality - """ - from rdkit.Dbase.DbConnection import DbConnect - conn = DbConnect(dbName, tableName, user, password) - res = conn.GetData(fields=what, where=where, join=join, removeDups=dupCol, forceList=1) - nPts = len(res) - vals = [None] * nPts - ptNames = [None] * nPts - classWorks = True - for i in range(nPts): - tmp = list(res[i]) - ptNames[i] = tmp.pop(0) - if pickleCol >= 0: - if not pickleClass or not classWorks: - tmp[pickleCol] = cPickle.loads(str(tmp[pickleCol])) - else: - try: - tmp[pickleCol] = pickleClass(str(tmp[pickleCol])) - except Exception: - tmp[pickleCol] = cPickle.loads(str(tmp[pickleCol])) - classWorks = False - if ensembleIds: - tmp[pickleCol] = BitUtils.ConstructEnsembleBV(tmp[pickleCol], ensembleIds) - else: - if ensembleIds: - tmp = TakeEnsemble(tmp, ensembleIds, isDataVect=True) - vals[i] = tmp - varNames = conn.GetColumnNames(join=join, what=what) - data = MLData.MLDataSet(vals, varNames=varNames, ptNames=ptNames) - return data + """ + from rdkit.Dbase.DbConnection import DbConnect + conn = DbConnect(dbName, tableName, user, password) + res = conn.GetData(fields=what, where=where, join=join, removeDups=dupCol, forceList=1) + nPts = len(res) + vals = [None] * nPts + ptNames = [None] * nPts + classWorks = True + for i in range(nPts): + tmp = list(res[i]) + ptNames[i] = tmp.pop(0) + if pickleCol >= 0: + if not pickleClass or not classWorks: + tmp[pickleCol] = pickle.loads(str(tmp[pickleCol])) + else: + try: + tmp[pickleCol] = pickleClass(str(tmp[pickleCol])) + except Exception: + tmp[pickleCol] = pickle.loads(str(tmp[pickleCol])) + classWorks = False + if ensembleIds: + tmp[pickleCol] = BitUtils.ConstructEnsembleBV(tmp[pickleCol], ensembleIds) + else: + if ensembleIds: + tmp = TakeEnsemble(tmp, ensembleIds, isDataVect=True) + vals[i] = tmp + varNames = conn.GetColumnNames(join=join, what=what) + data = MLData.MLDataSet(vals, varNames=varNames, ptNames=ptNames) + return data def TextToData(reader, ignoreCols=[], onlyCols=None): - """ constructs an _MLData.MLDataSet_ from a bunch of text -#DOC - **Arguments** - - reader needs to be iterable and return lists of elements - (like a csv.reader) + """ constructs an _MLData.MLDataSet_ from a bunch of text + #DOC + **Arguments** + - reader needs to be iterable and return lists of elements + (like a csv.reader) - **Returns** + **Returns** - an _MLData.MLDataSet_ + an _MLData.MLDataSet_ - """ + """ - varNames = next(reader) - if not onlyCols: - keepCols = [] - for i, name in enumerate(varNames): - if name not in ignoreCols: - keepCols.append(i) - else: - keepCols = [-1] * len(onlyCols) - for i, name in enumerate(varNames): - if name in onlyCols: - keepCols[onlyCols.index(name)] = i + varNames = next(reader) + if not onlyCols: + keepCols = [] + for i, name in enumerate(varNames): + if name not in ignoreCols: + keepCols.append(i) + else: + keepCols = [-1] * len(onlyCols) + for i, name in enumerate(varNames): + if name in onlyCols: + keepCols[onlyCols.index(name)] = i - nCols = len(varNames) - varNames = tuple([varNames[x] for x in keepCols]) - nVars = len(varNames) - vals = [] - ptNames = [] - for splitLine in reader: - if len(splitLine): - if len(splitLine) != nCols: - raise ValueError('unequal line lengths') - tmp = [splitLine[x] for x in keepCols] - ptNames.append(tmp[0]) - pt = [None] * (nVars - 1) - for j in range(nVars - 1): - try: - val = int(tmp[j + 1]) - except ValueError: - try: - val = float(tmp[j + 1]) - except ValueError: - val = str(tmp[j + 1]) - pt[j] = val - vals.append(pt) - data = MLData.MLDataSet(vals, varNames=varNames, ptNames=ptNames) - return data + nCols = len(varNames) + varNames = tuple([varNames[x] for x in keepCols]) + nVars = len(varNames) + vals = [] + ptNames = [] + for splitLine in reader: + if len(splitLine): + if len(splitLine) != nCols: + raise ValueError('unequal line lengths') + tmp = [splitLine[x] for x in keepCols] + ptNames.append(tmp[0]) + pt = [None] * (nVars - 1) + for j in range(nVars - 1): + try: + val = int(tmp[j + 1]) + except ValueError: + try: + val = float(tmp[j + 1]) + except ValueError: + val = str(tmp[j + 1]) + pt[j] = val + vals.append(pt) + data = MLData.MLDataSet(vals, varNames=varNames, ptNames=ptNames) + return data def TextFileToData(fName, onlyCols=None): - """ - #DOC + """ + #DOC - """ - ext = fName.split('.')[-1] - with open(fName, 'r') as inF: - if ext.upper() == 'CSV': - # CSV module distributed with python2.3 and later - splitter = csv.reader(inF) - else: - splitter = csv.reader(inF, delimiter='\t') - res = TextToData(splitter, onlyCols=onlyCols) - return res + """ + ext = fName.split('.')[-1] + with open(fName, 'r') as inF: + if ext.upper() == 'CSV': + # CSV module distributed with python2.3 and later + splitter = csv.reader(inF) + else: + splitter = csv.reader(inF, delimiter='\t') + res = TextToData(splitter, onlyCols=onlyCols) + return res def InitRandomNumbers(seed): - """ Seeds the random number generators + """ Seeds the random number generators - **Arguments** + **Arguments** - - seed: a 2-tuple containing integers to be used as the random number seeds + - seed: a 2-tuple containing integers to be used as the random number seeds - **Notes** + **Notes** - this seeds both the RDRandom generator and the one in the standard - Python _random_ module + this seeds both the RDRandom generator and the one in the standard + Python _random_ module - """ - from rdkit import RDRandom - RDRandom.seed(seed[0]) - random.seed(seed[0]) + """ + from rdkit import RDRandom + RDRandom.seed(seed[0]) + random.seed(seed[0]) def FilterData(inData, val, frac, col=-1, indicesToUse=None, indicesOnly=0): - """ -#DOC - """ - if frac < 0 or frac > 1: - raise ValueError('filter fraction out of bounds') - try: - inData[0][col] - except IndexError: - raise ValueError('target column index out of range') + """ + #DOC + """ + if frac < 0 or frac > 1: + raise ValueError('filter fraction out of bounds') + try: + inData[0][col] + except IndexError: + raise ValueError('target column index out of range') - # convert the input data to a list and sort them - if indicesToUse: - tmp = [inData[x] for x in indicesToUse] - else: - tmp = list(inData) - nOrig = len(tmp) - sortOrder = list(range(nOrig)) - sortOrder.sort(key=lambda x: tmp[x][col]) - tmp = [tmp[x] for x in sortOrder] + # convert the input data to a list and sort them + if indicesToUse: + tmp = [inData[x] for x in indicesToUse] + else: + tmp = list(inData) + nOrig = len(tmp) + sortOrder = list(range(nOrig)) + sortOrder.sort(key=lambda x: tmp[x][col]) + tmp = [tmp[x] for x in sortOrder] - # find the start of the entries with value val - start = 0 - while start < nOrig and tmp[start][col] != val: - start += 1 - if start >= nOrig: - raise ValueError('target value (%d) not found in data' % (val)) + # find the start of the entries with value val + start = 0 + while start < nOrig and tmp[start][col] != val: + start += 1 + if start >= nOrig: + raise ValueError('target value (%d) not found in data' % (val)) - # find the end of the entries with value val - finish = start + 1 - while finish < nOrig and tmp[finish][col] == val: - finish += 1 + # find the end of the entries with value val + finish = start + 1 + while finish < nOrig and tmp[finish][col] == val: + finish += 1 - # how many entries have the target value? - nWithVal = finish - start + # how many entries have the target value? + nWithVal = finish - start - # how many don't? - nOthers = len(tmp) - nWithVal + # how many don't? + nOthers = len(tmp) - nWithVal - currFrac = float(nWithVal) / nOrig - if currFrac < frac: - # - # We're going to keep most of (all) the points with the target value, - # We need to figure out how many of the other points we'll - # toss out - # - nTgtFinal = nWithVal - nFinal = int(round(nWithVal / frac)) - nOthersFinal = nFinal - nTgtFinal + currFrac = float(nWithVal) / nOrig + if currFrac < frac: + # + # We're going to keep most of (all) the points with the target value, + # We need to figure out how many of the other points we'll + # toss out + # + nTgtFinal = nWithVal + nFinal = int(round(nWithVal / frac)) + nOthersFinal = nFinal - nTgtFinal - # - # We may need to reduce the number of targets to keep - # because it may make it impossible to hit exactly the - # fraction we're trying for. Take care of that now - # - while float(nTgtFinal) / nFinal > frac: - nTgtFinal -= 1 - nFinal -= 1 + # + # We may need to reduce the number of targets to keep + # because it may make it impossible to hit exactly the + # fraction we're trying for. Take care of that now + # + while float(nTgtFinal) / nFinal > frac: + nTgtFinal -= 1 + nFinal -= 1 - else: - # - # There are too many points with the target value, - # we'll keep most of (all) the other points and toss a random - # selection of the target value points - # - nOthersFinal = nOthers - nFinal = int(round(nOthers / (1 - frac))) - nTgtFinal = nFinal - nOthersFinal + else: + # + # There are too many points with the target value, + # we'll keep most of (all) the other points and toss a random + # selection of the target value points + # + nOthersFinal = nOthers + nFinal = int(round(nOthers / (1 - frac))) + nTgtFinal = nFinal - nOthersFinal - # - # We may need to reduce the number of others to keep - # because it may make it impossible to hit exactly the - # fraction we're trying for. Take care of that now - # - while float(nTgtFinal) / nFinal < frac: - nOthersFinal -= 1 - nFinal -= 1 + # + # We may need to reduce the number of others to keep + # because it may make it impossible to hit exactly the + # fraction we're trying for. Take care of that now + # + while float(nTgtFinal) / nFinal < frac: + nOthersFinal -= 1 + nFinal -= 1 - others = list(range(start)) + list(range(finish, nOrig)) - othersTake = permutation(nOthers) - others = [others[x] for x in othersTake[:nOthersFinal]] + others = list(range(start)) + list(range(finish, nOrig)) + othersTake = permutation(nOthers) + others = [others[x] for x in othersTake[:nOthersFinal]] - targets = list(range(start, finish)) - targetsTake = permutation(nWithVal) - targets = [targets[x] for x in targetsTake[:nTgtFinal]] + targets = list(range(start, finish)) + targetsTake = permutation(nWithVal) + targets = [targets[x] for x in targetsTake[:nTgtFinal]] - # these are all the indices we'll be keeping - indicesToKeep = targets + others + # these are all the indices we'll be keeping + indicesToKeep = targets + others - res = [] - rej = [] - # now pull the points, but in random order - if not indicesOnly: - for i in permutation(nOrig): - if i in indicesToKeep: - res.append(tmp[i]) - else: - rej.append(tmp[i]) - else: - # EFF: this is slower than it needs to be - for i in permutation(nOrig): - if not indicesToUse: - idx = sortOrder[i] - else: - idx = indicesToUse[sortOrder[i]] - if i in indicesToKeep: - res.append(idx) - else: - rej.append(idx) - return res, rej + res = [] + rej = [] + # now pull the points, but in random order + if not indicesOnly: + for i in permutation(nOrig): + if i in indicesToKeep: + res.append(tmp[i]) + else: + rej.append(tmp[i]) + else: + # EFF: this is slower than it needs to be + for i in permutation(nOrig): + if not indicesToUse: + idx = sortOrder[i] + else: + idx = indicesToUse[sortOrder[i]] + if i in indicesToKeep: + res.append(idx) + else: + rej.append(idx) + return res, rej def CountResults(inData, col=-1, bounds=None): - """ #DOC - """ - counts = {} - for p in inData: - if not bounds: - r = p[col] - else: - act = p[col] - bound = 0 - placed = 0 - while not placed and bound < len(bounds): - if act < bounds[bound]: - r = bound - placed = 1 + """ #DOC + """ + counts = {} + for p in inData: + if not bounds: + r = p[col] else: - bound += 1 - if not placed: - r = bound + act = p[col] + bound = 0 + placed = 0 + while not placed and bound < len(bounds): + if act < bounds[bound]: + r = bound + placed = 1 + else: + bound += 1 + if not placed: + r = bound - counts[r] = counts.get(r, 0) + 1 - return counts + counts[r] = counts.get(r, 0) + 1 + return counts def RandomizeActivities(dataSet, shuffle=0, runDetails=None): - """ randomizes the activity values of a dataset + """ randomizes the activity values of a dataset - **Arguments** + **Arguments** - - dataSet: a _ML.Data.MLQuantDataSet_, the activities here will be randomized + - dataSet: a _ML.Data.MLQuantDataSet_, the activities here will be randomized - - shuffle: an optional toggle. If this is set, the activity values - will be shuffled (so the number in each class remains constant) + - shuffle: an optional toggle. If this is set, the activity values + will be shuffled (so the number in each class remains constant) - - runDetails: an optional CompositeRun object + - runDetails: an optional CompositeRun object - **Note** + **Note** - - _examples_ are randomized in place + - _examples_ are randomized in place - """ - nPts = dataSet.GetNPts() - if shuffle: - if runDetails: - runDetails.shuffled = 1 - acts = dataSet.GetResults()[:] - # While the random argument is the default, removing it will cause the shuffle - # tests in UnitTestScreenComposite to fail. - random.shuffle(acts, random=random.random) - else: # This part of the code isn't working as examples is not defined - if runDetails: - runDetails.randomized = 1 - nPossible = dataSet.GetNPossibleVals()[-1] - acts = [random.randint(0, nPossible) for _ in len(examples)] - for i in range(nPts): - tmp = dataSet[i] - tmp[-1] = acts[i] - dataSet[i] = tmp + """ + nPts = dataSet.GetNPts() + if shuffle: + if runDetails: + runDetails.shuffled = 1 + acts = dataSet.GetResults()[:] + # While the random argument is the default, removing it will cause the shuffle + # tests in UnitTestScreenComposite to fail. + random.shuffle(acts, random=random.random) + else: # This part of the code isn't working as examples is not defined + if runDetails: + runDetails.randomized = 1 + nPossible = dataSet.GetNPossibleVals()[-1] + acts = [random.randint(0, nPossible) for _ in len(examples)] + for i in range(nPts): + tmp = dataSet[i] + tmp[-1] = acts[i] + dataSet[i] = tmp # ------------------------------------ @@ -670,11 +669,11 @@ def RandomizeActivities(dataSet, shuffle=0, runDetails=None): # doctest boilerplate # def _runDoctests(verbose=None): # pragma: nocover - import sys - import doctest - failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) - sys.exit(failed) + import sys + import doctest + failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) + sys.exit(failed) if __name__ == '__main__': # pragma: nocover - _runDoctests() + _runDoctests() diff --git a/rdkit/ML/Data/FindQuantBounds.py b/rdkit/ML/Data/FindQuantBounds.py index aef83fe5d..f8eb1ef2b 100755 --- a/rdkit/ML/Data/FindQuantBounds.py +++ b/rdkit/ML/Data/FindQuantBounds.py @@ -2,7 +2,7 @@ # Copyright (C) 2001 greg Landrum # -from __future__ import print_function + from rdkit.Dbase import DbConnection from rdkit.ML.Data import Quantize diff --git a/rdkit/ML/Data/MLData.py b/rdkit/ML/Data/MLData.py index 7e2414ee7..385af8cff 100755 --- a/rdkit/ML/Data/MLData.py +++ b/rdkit/ML/Data/MLData.py @@ -5,364 +5,362 @@ """ classes to be used to help work with data sets """ -from __future__ import print_function + import copy import math import numpy -from rdkit.six import integer_types - -numericTypes = integer_types + (float, ) +numericTypes = (int, float) class MLDataSet(object): - """ A data set for holding general data (floats, ints, and strings) + """ A data set for holding general data (floats, ints, and strings) - **Note** - this is intended to be a read-only data structure - (i.e. after calling the constructor you cannot touch it) - """ - - def __init__(self, data, nVars=None, nPts=None, nPossibleVals=None, qBounds=None, varNames=None, - ptNames=None, nResults=1): - """ Constructor - - **Arguments** - - - data: a list of lists containing the data. The data are copied, so don't worry - about us overwriting them. - - - nVars: the number of variables - - - nPts: the number of points - - - nPossibleVals: an list containing the number of possible values - for each variable (should contain 0 when not relevant) - This is _nVars_ long - - - qBounds: a list of lists containing quantization bounds for variables - which are to be quantized (note, this class does not quantize - the variables itself, it merely stores quantization bounds. - an empty sublist indicates no quantization for a given variable - This is _nVars_ long - - - varNames: a list of the names of the variables. - This is _nVars_ long - - - ptNames: the names (labels) of the individual data points - This is _nPts_ long - - - nResults: the number of results columns in the data lists. This is usually - 1, but can be higher. + **Note** + this is intended to be a read-only data structure + (i.e. after calling the constructor you cannot touch it) """ - self.data = [x[:] for x in data] - self.nResults = nResults - if nVars is None: - nVars = len(self.data[0]) - self.nResults - self.nVars = nVars - if nPts is None: - nPts = len(data) - self.nPts = nPts - if qBounds is None: - qBounds = [[]] * len(self.data[0]) - self.qBounds = qBounds - if nPossibleVals is None: - nPossibleVals = self._CalcNPossible(self.data) - self.nPossibleVals = nPossibleVals - if varNames is None: - varNames = [''] * self.nVars - self.varNames = varNames - if ptNames is None: - ptNames = [''] * self.nPts - self.ptNames = ptNames - def _CalcNPossible(self, data): - """calculates the number of possible values of each variable (where possible) + def __init__(self, data, nVars=None, nPts=None, nPossibleVals=None, qBounds=None, varNames=None, + ptNames=None, nResults=1): + """ Constructor - **Arguments** + **Arguments** - -data: a list of examples to be used + - data: a list of lists containing the data. The data are copied, so don't worry + about us overwriting them. - **Returns** + - nVars: the number of variables - a list of nPossible values for each variable + - nPts: the number of points - """ - nVars = self.GetNVars() + self.nResults - nPossible = [-1] * nVars - cols = list(range(nVars)) - for i, bounds in enumerate(self.qBounds): - if len(bounds) > 0: - nPossible[i] = len(bounds) - cols.remove(i) + - nPossibleVals: an list containing the number of possible values + for each variable (should contain 0 when not relevant) + This is _nVars_ long - for i, pt in enumerate(self.data): - for col in cols[:]: - d = pt[col] - if type(d) in numericTypes: - if math.floor(d) == d: - nPossible[col] = max(math.floor(d), nPossible[col]) - else: - nPossible[col] = -1 - cols.remove(col) + - qBounds: a list of lists containing quantization bounds for variables + which are to be quantized (note, this class does not quantize + the variables itself, it merely stores quantization bounds. + an empty sublist indicates no quantization for a given variable + This is _nVars_ long + + - varNames: a list of the names of the variables. + This is _nVars_ long + + - ptNames: the names (labels) of the individual data points + This is _nPts_ long + + - nResults: the number of results columns in the data lists. This is usually + 1, but can be higher. + """ + self.data = [x[:] for x in data] + self.nResults = nResults + if nVars is None: + nVars = len(self.data[0]) - self.nResults + self.nVars = nVars + if nPts is None: + nPts = len(data) + self.nPts = nPts + if qBounds is None: + qBounds = [[]] * len(self.data[0]) + self.qBounds = qBounds + if nPossibleVals is None: + nPossibleVals = self._CalcNPossible(self.data) + self.nPossibleVals = nPossibleVals + if varNames is None: + varNames = [''] * self.nVars + self.varNames = varNames + if ptNames is None: + ptNames = [''] * self.nPts + self.ptNames = ptNames + + def _CalcNPossible(self, data): + """calculates the number of possible values of each variable (where possible) + + **Arguments** + + -data: a list of examples to be used + + **Returns** + + a list of nPossible values for each variable + + """ + nVars = self.GetNVars() + self.nResults + nPossible = [-1] * nVars + cols = list(range(nVars)) + for i, bounds in enumerate(self.qBounds): + if len(bounds) > 0: + nPossible[i] = len(bounds) + cols.remove(i) + + for i, pt in enumerate(self.data): + for col in cols[:]: + d = pt[col] + if type(d) in numericTypes: + if math.floor(d) == d: + nPossible[col] = max(math.floor(d), nPossible[col]) + else: + nPossible[col] = -1 + cols.remove(col) + else: + nPossible[col] = -1 + cols.remove(col) + return [int(x) + 1 for x in nPossible] + + def GetNResults(self): + return self.nResults + + def GetNVars(self): + return self.nVars + + def GetNPts(self): + return self.nPts + + def GetNPossibleVals(self): + return self.nPossibleVals + + def GetQuantBounds(self): + return self.qBounds + + def __getitem__(self, idx): + res = [self.ptNames[idx]] + self.data[idx][:] + return res + + def __setitem__(self, idx, val): + if len(val) != self.GetNVars() + self.GetNResults() + 1: + raise ValueError('bad value in assignment') + self.ptNames[idx] = val[0] + self.data[idx] = val[1:] + return val + + def GetNamedData(self): + """ returns a list of named examples + + **Note** + + a named example is the result of prepending the example + name to the data list + + """ + res = [None] * self.nPts + for i in range(self.nPts): + res[i] = [self.ptNames[i]] + self.data[i][:] + return res + + def GetAllData(self): + """ returns a *copy* of the data + + """ + return copy.deepcopy(self.data) + + def GetInputData(self): + """ returns the input data + + **Note** + + _inputData_ means the examples without their result fields + (the last _NResults_ entries) + + """ + v = self.GetNResults() + return [x[:-v] for x in self.data] + + def GetResults(self): + """ Returns the result fields from each example + + """ + if self.GetNResults() > 1: + v = self.GetNResults() + res = [x[-v:] for x in self.data] else: - nPossible[col] = -1 - cols.remove(col) - return [int(x) + 1 for x in nPossible] + res = [x[-1] for x in self.data] + return res - def GetNResults(self): - return self.nResults + def GetVarNames(self): + return self.varNames - def GetNVars(self): - return self.nVars + def GetPtNames(self): + return self.ptNames - def GetNPts(self): - return self.nPts + def AddPoint(self, pt): + self.data.append(pt[1:]) + self.ptNames.append(pt[0]) + self.nPts += 1 - def GetNPossibleVals(self): - return self.nPossibleVals - - def GetQuantBounds(self): - return self.qBounds - - def __getitem__(self, idx): - res = [self.ptNames[idx]] + self.data[idx][:] - return res - - def __setitem__(self, idx, val): - if len(val) != self.GetNVars() + self.GetNResults() + 1: - raise ValueError('bad value in assignment') - self.ptNames[idx] = val[0] - self.data[idx] = val[1:] - return val - - def GetNamedData(self): - """ returns a list of named examples - - **Note** - - a named example is the result of prepending the example - name to the data list - - """ - res = [None] * self.nPts - for i in range(self.nPts): - res[i] = [self.ptNames[i]] + self.data[i][:] - return res - - def GetAllData(self): - """ returns a *copy* of the data - - """ - return copy.deepcopy(self.data) - - def GetInputData(self): - """ returns the input data - - **Note** - - _inputData_ means the examples without their result fields - (the last _NResults_ entries) - - """ - v = self.GetNResults() - return [x[:-v] for x in self.data] - - def GetResults(self): - """ Returns the result fields from each example - - """ - if self.GetNResults() > 1: - v = self.GetNResults() - res = [x[-v:] for x in self.data] - else: - res = [x[-1] for x in self.data] - return res - - def GetVarNames(self): - return self.varNames - - def GetPtNames(self): - return self.ptNames - - def AddPoint(self, pt): - self.data.append(pt[1:]) - self.ptNames.append(pt[0]) - self.nPts += 1 - - def AddPoints(self, pts, names): - if len(pts) != len(names): - raise ValueError("input length mismatch") - self.data += pts - self.ptNames += names - self.nPts = len(self.data) + def AddPoints(self, pts, names): + if len(pts) != len(names): + raise ValueError("input length mismatch") + self.data += pts + self.ptNames += names + self.nPts = len(self.data) class MLQuantDataSet(MLDataSet): - """ a data set for holding quantized data + """ a data set for holding quantized data - **Note** + **Note** - this is intended to be a read-only data structure - (i.e. after calling the constructor you cannot touch it) + this is intended to be a read-only data structure + (i.e. after calling the constructor you cannot touch it) - **Big differences to MLDataSet** + **Big differences to MLDataSet** - 1) data are stored in a numpy array since they are homogenous + 1) data are stored in a numpy array since they are homogenous - 2) results are assumed to be quantized (i.e. no qBounds entry is required) - - """ - - def _CalcNPossible(self, data): - """calculates the number of possible values of each variable - - **Arguments** - - -data: a list of examples to be used - - **Returns** - - a list of nPossible values for each variable + 2) results are assumed to be quantized (i.e. no qBounds entry is required) """ - return [max(x) + 1 for x in numpy.transpose(data)] - def GetNamedData(self): - """ returns a list of named examples + def _CalcNPossible(self, data): + """calculates the number of possible values of each variable - **Note** + **Arguments** - a named example is the result of prepending the example - name to the data list + -data: a list of examples to be used - """ - res = [None] * self.nPts - for i in range(self.nPts): - res[i] = [self.ptNames[i]] + self.data[i].tolist() - return res + **Returns** - def GetAllData(self): - """ returns a *copy* of the data + a list of nPossible values for each variable - """ - return self.data.tolist() + """ + return [max(x) + 1 for x in numpy.transpose(data)] - def GetInputData(self): - """ returns the input data + def GetNamedData(self): + """ returns a list of named examples - **Note** + **Note** - _inputData_ means the examples without their result fields - (the last _NResults_ entries) + a named example is the result of prepending the example + name to the data list - """ - return (self.data[:, :-self.nResults]).tolist() + """ + res = [None] * self.nPts + for i in range(self.nPts): + res[i] = [self.ptNames[i]] + self.data[i].tolist() + return res - def GetResults(self): - """ Returns the result fields from each example + def GetAllData(self): + """ returns a *copy* of the data - """ - if self.GetNResults() > 1: - v = self.GetNResults() - res = [x[-v:] for x in self.data] - else: - res = [x[-1] for x in self.data] - return res + """ + return self.data.tolist() - def __init__(self, data, nVars=None, nPts=None, nPossibleVals=None, qBounds=None, varNames=None, - ptNames=None, nResults=1): - """ Constructor + def GetInputData(self): + """ returns the input data - **Arguments** + **Note** - - data: a list of lists containing the data. The data are copied, so don't worry - about us overwriting them. + _inputData_ means the examples without their result fields + (the last _NResults_ entries) - - nVars: the number of variables + """ + return (self.data[:, :-self.nResults]).tolist() - - nPts: the number of points + def GetResults(self): + """ Returns the result fields from each example - - nPossibleVals: an list containing the number of possible values - for each variable (should contain 0 when not relevant) - This is _nVars_ long + """ + if self.GetNResults() > 1: + v = self.GetNResults() + res = [x[-v:] for x in self.data] + else: + res = [x[-1] for x in self.data] + return res - - qBounds: a list of lists containing quantization bounds for variables - which are to be quantized (note, this class does not quantize - the variables itself, it merely stores quantization bounds. - an empty sublist indicates no quantization for a given variable - This is _nVars_ long + def __init__(self, data, nVars=None, nPts=None, nPossibleVals=None, qBounds=None, varNames=None, + ptNames=None, nResults=1): + """ Constructor - - varNames: a list of the names of the variables. - This is _nVars_ long + **Arguments** - - ptNames: the names (labels) of the individual data points - This is _nPts_ long + - data: a list of lists containing the data. The data are copied, so don't worry + about us overwriting them. - - nResults: the number of results columns in the data lists. This is usually - 1, but can be higher. - """ - self.data = numpy.array(data) - self.nResults = nResults - if nVars is None: - nVars = len(data[0]) - self.nResults - self.nVars = nVars - if nPts is None: - nPts = len(data) - self.nPts = nPts - if qBounds is None: - qBounds = [[]] * self.nVars - self.qBounds = qBounds - if nPossibleVals is None: - nPossibleVals = self._CalcNPossible(data) - self.nPossibleVals = nPossibleVals - if varNames is None: - varNames = [''] * self.nVars - self.varNames = varNames - if ptNames is None: - ptNames = [''] * self.nPts - self.ptNames = ptNames + - nVars: the number of variables + + - nPts: the number of points + + - nPossibleVals: an list containing the number of possible values + for each variable (should contain 0 when not relevant) + This is _nVars_ long + + - qBounds: a list of lists containing quantization bounds for variables + which are to be quantized (note, this class does not quantize + the variables itself, it merely stores quantization bounds. + an empty sublist indicates no quantization for a given variable + This is _nVars_ long + + - varNames: a list of the names of the variables. + This is _nVars_ long + + - ptNames: the names (labels) of the individual data points + This is _nPts_ long + + - nResults: the number of results columns in the data lists. This is usually + 1, but can be higher. + """ + self.data = numpy.array(data) + self.nResults = nResults + if nVars is None: + nVars = len(data[0]) - self.nResults + self.nVars = nVars + if nPts is None: + nPts = len(data) + self.nPts = nPts + if qBounds is None: + qBounds = [[]] * self.nVars + self.qBounds = qBounds + if nPossibleVals is None: + nPossibleVals = self._CalcNPossible(data) + self.nPossibleVals = nPossibleVals + if varNames is None: + varNames = [''] * self.nVars + self.varNames = varNames + if ptNames is None: + ptNames = [''] * self.nPts + self.ptNames = ptNames if __name__ == '__main__': - from . import DataUtils - examples = [[0, 0, 0, 0, 0], [0, 0, 0, 1, 0], [1, 0, 0, 0, 1], [2, 1, 0, 0, 1], [2, 2, 1, 0, 1]] - varNames = ['foo1', 'foo2', 'foo3', 'foo4', 'res'] - ptNames = ['p1', 'p2', 'p3', 'p4', 'p5'] - dataset = MLQuantDataSet(examples, varNames=varNames, ptNames=ptNames) - DataUtils.WritePickledData('test_data/test.qdat.pkl', dataset) - print('nVars:', dataset.GetNVars()) - print('nPts:', dataset.GetNPts()) - print('nPoss:', dataset.GetNPossibleVals()) - print('qBounds:', dataset.GetQuantBounds()) - print('data:', dataset.GetAllData()) - print('Input data:', dataset.GetInputData()) - print('results:', dataset.GetResults()) + from . import DataUtils + examples = [[0, 0, 0, 0, 0], [0, 0, 0, 1, 0], [1, 0, 0, 0, 1], [2, 1, 0, 0, 1], [2, 2, 1, 0, 1]] + varNames = ['foo1', 'foo2', 'foo3', 'foo4', 'res'] + ptNames = ['p1', 'p2', 'p3', 'p4', 'p5'] + dataset = MLQuantDataSet(examples, varNames=varNames, ptNames=ptNames) + DataUtils.WritePickledData('test_data/test.qdat.pkl', dataset) + print('nVars:', dataset.GetNVars()) + print('nPts:', dataset.GetNPts()) + print('nPoss:', dataset.GetNPossibleVals()) + print('qBounds:', dataset.GetQuantBounds()) + print('data:', dataset.GetAllData()) + print('Input data:', dataset.GetInputData()) + print('results:', dataset.GetResults()) - print('nameddata:', dataset.GetNamedData()) + print('nameddata:', dataset.GetNamedData()) - examples = [ - ['foo', 1, 1.0, 1, 1.1], - ['foo', 2, 1.0, 1, 2.1], - ['foo', 3, 1.2, 1.1, 3.1], - ['foo', 4, 1.0, 1, 4.1], - ['foo', 5, 1.1, 1, 5.1], - ] - qBounds = [[], [], [], [], [2, 4]] - varNames = ['foo1', 'foo2', 'foo3', 'foo4', 'res'] - ptNames = ['p1', 'p2', 'p3', 'p4', 'p5'] - dataset = MLDataSet(examples, qBounds=qBounds) - DataUtils.WritePickledData('test_data/test.dat.pkl', dataset) - print('nVars:', dataset.GetNVars()) - print('nPts:', dataset.GetNPts()) - print('nPoss:', dataset.GetNPossibleVals()) - print('qBounds:', dataset.GetQuantBounds()) - print('data:', dataset.GetAllData()) - print('Input data:', dataset.GetInputData()) - print('results:', dataset.GetResults()) + examples = [ + ['foo', 1, 1.0, 1, 1.1], + ['foo', 2, 1.0, 1, 2.1], + ['foo', 3, 1.2, 1.1, 3.1], + ['foo', 4, 1.0, 1, 4.1], + ['foo', 5, 1.1, 1, 5.1], + ] + qBounds = [[], [], [], [], [2, 4]] + varNames = ['foo1', 'foo2', 'foo3', 'foo4', 'res'] + ptNames = ['p1', 'p2', 'p3', 'p4', 'p5'] + dataset = MLDataSet(examples, qBounds=qBounds) + DataUtils.WritePickledData('test_data/test.dat.pkl', dataset) + print('nVars:', dataset.GetNVars()) + print('nPts:', dataset.GetNPts()) + print('nPoss:', dataset.GetNPossibleVals()) + print('qBounds:', dataset.GetQuantBounds()) + print('data:', dataset.GetAllData()) + print('Input data:', dataset.GetInputData()) + print('results:', dataset.GetResults()) - print('nameddata:', dataset.GetNamedData()) + print('nameddata:', dataset.GetNamedData()) diff --git a/rdkit/ML/Data/Quantize.py b/rdkit/ML/Data/Quantize.py index d6a7fede6..0552e179a 100755 --- a/rdkit/ML/Data/Quantize.py +++ b/rdkit/ML/Data/Quantize.py @@ -14,366 +14,367 @@ lie. [0.9,1.,1.1,2.,2.2] -> [0,1,1,2,2] """ -from __future__ import print_function + import numpy from rdkit.ML.InfoTheory import entropy -from rdkit.six.moves import zip, map, range try: - from rdkit.ML.Data import cQuantize + from rdkit.ML.Data import cQuantize except ImportError: - hascQuantize = 0 + hascQuantize = 0 else: - hascQuantize = 1 + hascQuantize = 1 _float_tol = 1e-8 def feq(v1, v2, tol=_float_tol): - """ floating point equality with a tolerance factor + """ floating point equality with a tolerance factor - **Arguments** + **Arguments** - - v1: a float + - v1: a float - - v2: a float + - v2: a float - - tol: the tolerance for comparison + - tol: the tolerance for comparison - **Returns** + **Returns** - 0 or 1 + 0 or 1 - """ - return abs(v1 - v2) < tol + """ + return abs(v1 - v2) < tol def FindVarQuantBound(vals, results, nPossibleRes): - """ Uses FindVarMultQuantBounds, only here for historic reasons - """ - bounds, gain = FindVarMultQuantBounds(vals, 1, results, nPossibleRes) - return (bounds[0], gain) + """ Uses FindVarMultQuantBounds, only here for historic reasons + """ + bounds, gain = FindVarMultQuantBounds(vals, 1, results, nPossibleRes) + return (bounds[0], gain) def _GenVarTable(vals, cuts, starts, results, nPossibleRes): - """ Primarily intended for internal use + """ Primarily intended for internal use - constructs a variable table for the data passed in - The table for a given variable records the number of times each possible value - of that variable appears for each possible result of the function. + constructs a variable table for the data passed in + The table for a given variable records the number of times each possible value + of that variable appears for each possible result of the function. - **Arguments** + **Arguments** - - vals: a 1D Numeric array with the values of the variables + - vals: a 1D Numeric array with the values of the variables - - cuts: a list with the indices of the quantization bounds - (indices are into _starts_ ) + - cuts: a list with the indices of the quantization bounds + (indices are into _starts_ ) - - starts: a list of potential starting points for quantization bounds + - starts: a list of potential starting points for quantization bounds - - results: a 1D Numeric array of integer result codes + - results: a 1D Numeric array of integer result codes - - nPossibleRes: an integer with the number of possible result codes + - nPossibleRes: an integer with the number of possible result codes - **Returns** + **Returns** - the varTable, a 2D Numeric array which is nVarValues x nPossibleRes + the varTable, a 2D Numeric array which is nVarValues x nPossibleRes - **Notes** + **Notes** - - _vals_ should be sorted! + - _vals_ should be sorted! - """ - nVals = len(cuts) + 1 - varTable = numpy.zeros((nVals, nPossibleRes), 'i') - idx = 0 - for i in range(nVals - 1): - cut = cuts[i] - while idx < starts[cut]: - varTable[i, results[idx]] += 1 - idx += 1 - while idx < len(vals): - varTable[-1, results[idx]] += 1 - idx += 1 - return varTable + """ + nVals = len(cuts) + 1 + varTable = numpy.zeros((nVals, nPossibleRes), 'i') + idx = 0 + for i in range(nVals - 1): + cut = cuts[i] + while idx < starts[cut]: + varTable[i, results[idx]] += 1 + idx += 1 + while idx < len(vals): + varTable[-1, results[idx]] += 1 + idx += 1 + return varTable def _PyRecurseOnBounds(vals, cuts, which, starts, results, nPossibleRes, varTable=None): - """ Primarily intended for internal use + """ Primarily intended for internal use - Recursively finds the best quantization boundaries + Recursively finds the best quantization boundaries - **Arguments** + **Arguments** - - vals: a 1D Numeric array with the values of the variables, - this should be sorted + - vals: a 1D Numeric array with the values of the variables, + this should be sorted - - cuts: a list with the indices of the quantization bounds - (indices are into _starts_ ) + - cuts: a list with the indices of the quantization bounds + (indices are into _starts_ ) - - which: an integer indicating which bound is being adjusted here - (and index into _cuts_ ) + - which: an integer indicating which bound is being adjusted here + (and index into _cuts_ ) - - starts: a list of potential starting points for quantization bounds + - starts: a list of potential starting points for quantization bounds - - results: a 1D Numeric array of integer result codes + - results: a 1D Numeric array of integer result codes - - nPossibleRes: an integer with the number of possible result codes + - nPossibleRes: an integer with the number of possible result codes - **Returns** + **Returns** - - a 2-tuple containing: + - a 2-tuple containing: - 1) the best information gain found so far + 1) the best information gain found so far - 2) a list of the quantization bound indices ( _cuts_ for the best case) + 2) a list of the quantization bound indices ( _cuts_ for the best case) - **Notes** + **Notes** - - this is not even remotely efficient, which is why a C replacement - was written + - this is not even remotely efficient, which is why a C replacement + was written - """ - nBounds = len(cuts) - maxGain = -1e6 - bestCuts = None - highestCutHere = len(starts) - nBounds + which - if varTable is None: - varTable = _GenVarTable(vals, cuts, starts, results, nPossibleRes) - while cuts[which] <= highestCutHere: - varTable = _GenVarTable(vals, cuts, starts, results, nPossibleRes) - gainHere = entropy.InfoGain(varTable) - if gainHere > maxGain: - maxGain = gainHere - bestCuts = cuts[:] - # recurse on the next vars if needed - if which < nBounds - 1: - gainHere, cutsHere = _RecurseOnBounds(vals, cuts[:], which + 1, starts, results, nPossibleRes, - varTable=varTable) - if gainHere > maxGain: - maxGain = gainHere - bestCuts = cutsHere - # update this cut - cuts[which] += 1 - for i in range(which + 1, nBounds): - if cuts[i] == cuts[i - 1]: - cuts[i] += 1 + """ + nBounds = len(cuts) + maxGain = -1e6 + bestCuts = None + highestCutHere = len(starts) - nBounds + which + if varTable is None: + varTable = _GenVarTable(vals, cuts, starts, results, nPossibleRes) + while cuts[which] <= highestCutHere: + varTable = _GenVarTable(vals, cuts, starts, results, nPossibleRes) + gainHere = entropy.InfoGain(varTable) + if gainHere > maxGain: + maxGain = gainHere + bestCuts = cuts[:] + # recurse on the next vars if needed + if which < nBounds - 1: + gainHere, cutsHere = _RecurseOnBounds(vals, cuts[:], which + 1, starts, results, nPossibleRes, + varTable=varTable) + if gainHere > maxGain: + maxGain = gainHere + bestCuts = cutsHere + # update this cut + cuts[which] += 1 + for i in range(which + 1, nBounds): + if cuts[i] == cuts[i - 1]: + cuts[i] += 1 - return maxGain, bestCuts + return maxGain, bestCuts def _NewPyRecurseOnBounds(vals, cuts, which, starts, results, nPossibleRes, varTable=None): - """ Primarily intended for internal use + """ Primarily intended for internal use - Recursively finds the best quantization boundaries + Recursively finds the best quantization boundaries - **Arguments** + **Arguments** - - vals: a 1D Numeric array with the values of the variables, - this should be sorted + - vals: a 1D Numeric array with the values of the variables, + this should be sorted - - cuts: a list with the indices of the quantization bounds - (indices are into _starts_ ) + - cuts: a list with the indices of the quantization bounds + (indices are into _starts_ ) - - which: an integer indicating which bound is being adjusted here - (and index into _cuts_ ) + - which: an integer indicating which bound is being adjusted here + (and index into _cuts_ ) - - starts: a list of potential starting points for quantization bounds + - starts: a list of potential starting points for quantization bounds - - results: a 1D Numeric array of integer result codes + - results: a 1D Numeric array of integer result codes - - nPossibleRes: an integer with the number of possible result codes + - nPossibleRes: an integer with the number of possible result codes - **Returns** + **Returns** - - a 2-tuple containing: + - a 2-tuple containing: - 1) the best information gain found so far + 1) the best information gain found so far - 2) a list of the quantization bound indices ( _cuts_ for the best case) + 2) a list of the quantization bound indices ( _cuts_ for the best case) - **Notes** + **Notes** - - this is not even remotely efficient, which is why a C replacement - was written + - this is not even remotely efficient, which is why a C replacement + was written - """ - nBounds = len(cuts) - maxGain = -1e6 - bestCuts = None - highestCutHere = len(starts) - nBounds + which - if varTable is None: - varTable = _GenVarTable(vals, cuts, starts, results, nPossibleRes) - while cuts[which] <= highestCutHere: - gainHere = entropy.InfoGain(varTable) - if gainHere > maxGain: - maxGain = gainHere - bestCuts = cuts[:] - # recurse on the next vars if needed - if which < nBounds - 1: - gainHere, cutsHere = _RecurseOnBounds(vals, cuts[:], which + 1, starts, results, nPossibleRes, - varTable=None) - if gainHere > maxGain: - maxGain = gainHere - bestCuts = cutsHere - # update this cut - oldCut = cuts[which] - cuts[which] += 1 - bot = starts[oldCut] - if oldCut + 1 < len(starts): - top = starts[oldCut + 1] - else: - top = starts[-1] - for i in range(bot, top): - v = results[i] - varTable[which, v] += 1 - varTable[which + 1, v] -= 1 - for i in range(which + 1, nBounds): - if cuts[i] == cuts[i - 1]: - cuts[i] += 1 + """ + nBounds = len(cuts) + maxGain = -1e6 + bestCuts = None + highestCutHere = len(starts) - nBounds + which + if varTable is None: + varTable = _GenVarTable(vals, cuts, starts, results, nPossibleRes) + while cuts[which] <= highestCutHere: + gainHere = entropy.InfoGain(varTable) + if gainHere > maxGain: + maxGain = gainHere + bestCuts = cuts[:] + # recurse on the next vars if needed + if which < nBounds - 1: + gainHere, cutsHere = _RecurseOnBounds(vals, cuts[:], which + 1, starts, results, nPossibleRes, + varTable=None) + if gainHere > maxGain: + maxGain = gainHere + bestCuts = cutsHere + # update this cut + oldCut = cuts[which] + cuts[which] += 1 + bot = starts[oldCut] + if oldCut + 1 < len(starts): + top = starts[oldCut + 1] + else: + top = starts[-1] + for i in range(bot, top): + v = results[i] + varTable[which, v] += 1 + varTable[which + 1, v] -= 1 + for i in range(which + 1, nBounds): + if cuts[i] == cuts[i - 1]: + cuts[i] += 1 - return maxGain, bestCuts + return maxGain, bestCuts def _NewPyFindStartPoints(sortVals, sortResults, nData): - # -------------------------------- - # - # find all possible dividing points - # - # There are a couple requirements for a dividing point: - # 1) the dependent variable (descriptor) must change across it, - # 2) the result score must change across it - # - # So, in the list [(0,0),(1,0),(1,1),(2,1)]: - # we should divide before (1,0) and (2,1) - # - # -------------------------------- - startNext = [] - tol = 1e-8 - blockAct = sortResults[0] - lastBlockAct = None - lastDiv = None - i = 1 - while i < nData: - # move to the end of this block: - while i < nData and sortVals[i] - sortVals[i - 1] <= tol: - if sortResults[i] != blockAct: - # this block is heterogeneous - blockAct = -1 - i += 1 - if lastBlockAct is None: - # first time through: - lastBlockAct = blockAct - lastDiv = i - else: - if blockAct == -1 or lastBlockAct == -1 or blockAct != lastBlockAct: + # -------------------------------- + # + # find all possible dividing points + # + # There are a couple requirements for a dividing point: + # 1) the dependent variable (descriptor) must change across it, + # 2) the result score must change across it + # + # So, in the list [(0,0),(1,0),(1,1),(2,1)]: + # we should divide before (1,0) and (2,1) + # + # -------------------------------- + startNext = [] + tol = 1e-8 + blockAct = sortResults[0] + lastBlockAct = None + lastDiv = None + i = 1 + while i < nData: + # move to the end of this block: + while i < nData and sortVals[i] - sortVals[i - 1] <= tol: + if sortResults[i] != blockAct: + # this block is heterogeneous + blockAct = -1 + i += 1 + if lastBlockAct is None: + # first time through: + lastBlockAct = blockAct + lastDiv = i + else: + if blockAct == -1 or lastBlockAct == -1 or blockAct != lastBlockAct: + startNext.append(lastDiv) + lastDiv = i + lastBlockAct = blockAct + else: + lastDiv = i + if i < nData: + blockAct = sortResults[i] + i += 1 + # catch the case that the last point also sets a bin: + if blockAct != lastBlockAct: startNext.append(lastDiv) - lastDiv = i - lastBlockAct = blockAct - else: - lastDiv = i - if i < nData: - blockAct = sortResults[i] - i += 1 - # catch the case that the last point also sets a bin: - if blockAct != lastBlockAct: - startNext.append(lastDiv) - return startNext + return startNext def FindVarMultQuantBounds(vals, nBounds, results, nPossibleRes): - """ finds multiple quantization bounds for a single variable + """ finds multiple quantization bounds for a single variable - **Arguments** + **Arguments** - - vals: sequence of variable values (assumed to be floats) + - vals: sequence of variable values (assumed to be floats) - - nBounds: the number of quantization bounds to find + - nBounds: the number of quantization bounds to find - - results: a list of result codes (should be integers) + - results: a list of result codes (should be integers) - - nPossibleRes: an integer with the number of possible values of the - result variable + - nPossibleRes: an integer with the number of possible values of the + result variable - **Returns** + **Returns** - - a 2-tuple containing: + - a 2-tuple containing: - 1) a list of the quantization bounds (floats) + 1) a list of the quantization bounds (floats) - 2) the information gain associated with this quantization + 2) the information gain associated with this quantization - """ - assert len(vals) == len(results), 'vals/results length mismatch' + """ + assert len(vals) == len(results), 'vals/results length mismatch' - nData = len(vals) - if nData == 0: - return [], -1e8 + nData = len(vals) + if nData == 0: + return [], -1e8 - # sort the variable values: - svs = list(zip(vals, results)) - svs.sort() - sortVals, sortResults = zip(*svs) - startNext = _FindStartPoints(sortVals, sortResults, nData) - if not len(startNext): - return [0], 0.0 - if len(startNext) < nBounds: - nBounds = len(startNext) - 1 - if nBounds == 0: - nBounds = 1 - initCuts = list(range(nBounds)) - maxGain, bestCuts = _RecurseOnBounds(sortVals, initCuts, 0, startNext, sortResults, nPossibleRes) - quantBounds = [] - nVs = len(sortVals) - for cut in bestCuts: - idx = startNext[cut] - if idx == nVs: - quantBounds.append(sortVals[-1]) - elif idx == 0: - quantBounds.append(sortVals[idx]) - else: - quantBounds.append((sortVals[idx] + sortVals[idx - 1]) / 2.) + # sort the variable values: + svs = list(zip(vals, results)) + svs.sort() + sortVals, sortResults = zip(*svs) + startNext = _FindStartPoints(sortVals, sortResults, nData) + if not len(startNext): + return [0], 0.0 + if len(startNext) < nBounds: + nBounds = len(startNext) - 1 + if nBounds == 0: + nBounds = 1 + initCuts = list(range(nBounds)) + maxGain, bestCuts = _RecurseOnBounds( + sortVals, initCuts, 0, startNext, sortResults, nPossibleRes) + quantBounds = [] + nVs = len(sortVals) + for cut in bestCuts: + idx = startNext[cut] + if idx == nVs: + quantBounds.append(sortVals[-1]) + elif idx == 0: + quantBounds.append(sortVals[idx]) + else: + quantBounds.append((sortVals[idx] + sortVals[idx - 1]) / 2.) + + return quantBounds, maxGain - return quantBounds, maxGain # hascQuantize=0 if hascQuantize: - _RecurseOnBounds = cQuantize._RecurseOnBounds - _FindStartPoints = cQuantize._FindStartPoints + _RecurseOnBounds = cQuantize._RecurseOnBounds + _FindStartPoints = cQuantize._FindStartPoints else: - _RecurseOnBounds = _NewPyRecurseOnBounds - _FindStartPoints = _NewPyFindStartPoints + _RecurseOnBounds = _NewPyRecurseOnBounds + _FindStartPoints = _NewPyFindStartPoints if __name__ == '__main__': - if 1: - d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 1), (1.4, 0), (1.6, 1), (2., 1), (2.1, 0), (2.1, 0), - (2.1, 0), (2.2, 1), (2.3, 0)] - varValues = list(map(lambda x: x[0], d)) - resCodes = list(map(lambda x: x[1], d)) - nPossibleRes = 2 - res = FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes) - print('RES:', res) - target = ([1.3, 2.05], .34707) - else: - d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 1), (1.4, 0), (1.6, 1), (2., 1), (2.1, 0), (2.1, 0), - (2.1, 0), (2.2, 1), (2.3, 0)] - varValues = list(map(lambda x: x[0], d)) - resCodes = list(map(lambda x: x[1], d)) - nPossibleRes = 2 - res = FindVarMultQuantBounds(varValues, 1, resCodes, nPossibleRes) - print(res) - # sys.exit(1) - d = [(1.4, 1), (1.4, 0)] + if 1: + d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 1), (1.4, 0), (1.6, 1), (2., 1), (2.1, 0), (2.1, 0), + (2.1, 0), (2.2, 1), (2.3, 0)] + varValues = list(map(lambda x: x[0], d)) + resCodes = list(map(lambda x: x[1], d)) + nPossibleRes = 2 + res = FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes) + print('RES:', res) + target = ([1.3, 2.05], .34707) + else: + d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 1), (1.4, 0), (1.6, 1), (2., 1), (2.1, 0), (2.1, 0), + (2.1, 0), (2.2, 1), (2.3, 0)] + varValues = list(map(lambda x: x[0], d)) + resCodes = list(map(lambda x: x[1], d)) + nPossibleRes = 2 + res = FindVarMultQuantBounds(varValues, 1, resCodes, nPossibleRes) + print(res) + # sys.exit(1) + d = [(1.4, 1), (1.4, 0)] - varValues = list(map(lambda x: x[0], d)) - resCodes = list(map(lambda x: x[1], d)) - nPossibleRes = 2 - res = FindVarMultQuantBounds(varValues, 1, resCodes, nPossibleRes) - print(res) + varValues = list(map(lambda x: x[0], d)) + resCodes = list(map(lambda x: x[1], d)) + nPossibleRes = 2 + res = FindVarMultQuantBounds(varValues, 1, resCodes, nPossibleRes) + print(res) - d = [(1.4, 0), (1.4, 0), (1.6, 1)] - varValues = list(map(lambda x: x[0], d)) - resCodes = list(map(lambda x: x[1], d)) - nPossibleRes = 2 - res = FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes) - print(res) + d = [(1.4, 0), (1.4, 0), (1.6, 1)] + varValues = list(map(lambda x: x[0], d)) + resCodes = list(map(lambda x: x[1], d)) + nPossibleRes = 2 + res = FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes) + print(res) diff --git a/rdkit/ML/Data/SplitData.py b/rdkit/ML/Data/SplitData.py index b69467a8b..ac12a9abc 100755 --- a/rdkit/ML/Data/SplitData.py +++ b/rdkit/ML/Data/SplitData.py @@ -2,7 +2,7 @@ # Copyright (C) 2003-2008 Greg Landrum and Rational Discovery LLC # All Rights Reserved # -from __future__ import print_function + import random diff --git a/rdkit/ML/Data/UnitTestDoctests.py b/rdkit/ML/Data/UnitTestDoctests.py index 889172e49..308f0a1bd 100755 --- a/rdkit/ML/Data/UnitTestDoctests.py +++ b/rdkit/ML/Data/UnitTestDoctests.py @@ -9,44 +9,44 @@ import unittest from rdkit.ML.Data import SplitData, DataUtils from rdkit.TestRunner import redirect_stdout -from rdkit.six import StringIO +from io import StringIO def load_tests(loader, tests, ignore): - """ Add the Doctests from the module """ - tests.addTests(doctest.DocTestSuite(SplitData, optionflags=doctest.ELLIPSIS)) - tests.addTests(doctest.DocTestSuite(DataUtils, optionflags=doctest.ELLIPSIS)) - return tests + """ Add the Doctests from the module """ + tests.addTests(doctest.DocTestSuite(SplitData, optionflags=doctest.ELLIPSIS)) + tests.addTests(doctest.DocTestSuite(DataUtils, optionflags=doctest.ELLIPSIS)) + return tests class TestCaseSplitData(unittest.TestCase): - def test_exceptions(self): - self.assertRaises(ValueError, SplitData.SplitIndices, 10, -0.1) - self.assertRaises(ValueError, SplitData.SplitIndices, 10, 1.1) + def test_exceptions(self): + self.assertRaises(ValueError, SplitData.SplitIndices, 10, -0.1) + self.assertRaises(ValueError, SplitData.SplitIndices, 10, 1.1) - f = StringIO() - with redirect_stdout(f): - SplitData.SplitIndices(10, 0.5, replacement=True, silent=False) - s = f.getvalue() - self.assertIn('Training', s) - self.assertIn('hold-out', s) + f = StringIO() + with redirect_stdout(f): + SplitData.SplitIndices(10, 0.5, replacement=True, silent=False) + s = f.getvalue() + self.assertIn('Training', s) + self.assertIn('hold-out', s) - def test_SplitData(self): - self.assertRaises(ValueError, SplitData.SplitDataSet, None, -1.1) - self.assertRaises(ValueError, SplitData.SplitDataSet, None, 1.1) + def test_SplitData(self): + self.assertRaises(ValueError, SplitData.SplitDataSet, None, -1.1) + self.assertRaises(ValueError, SplitData.SplitDataSet, None, 1.1) - data = list(range(10)) - DataUtils.InitRandomNumbers((23, 42)) - f = StringIO() - with redirect_stdout(f): - result = SplitData.SplitDataSet(data, 0.5) - self.assertEqual(set(result[0]).intersection(result[1]), set()) - self.assertEqual(len(result[0]), 5) - s = f.getvalue() - self.assertIn('Training', s) - self.assertIn('hold-out', s) + data = list(range(10)) + DataUtils.InitRandomNumbers((23, 42)) + f = StringIO() + with redirect_stdout(f): + result = SplitData.SplitDataSet(data, 0.5) + self.assertEqual(set(result[0]).intersection(result[1]), set()) + self.assertEqual(len(result[0]), 5) + s = f.getvalue() + self.assertIn('Training', s) + self.assertIn('hold-out', s) if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/ML/Data/UnitTestMLData.py b/rdkit/ML/Data/UnitTestMLData.py index 90573f277..daf296fdc 100755 --- a/rdkit/ML/Data/UnitTestMLData.py +++ b/rdkit/ML/Data/UnitTestMLData.py @@ -10,151 +10,152 @@ import unittest from rdkit import RDConfig from rdkit.ML.Data import MLData, DataUtils -from rdkit.six import StringIO -from rdkit.six.moves import cPickle +from io import StringIO +import pickle class TestCase(unittest.TestCase): - def setUpQuantLoad(self): - self.d = DataUtils.BuildQuantDataSet(RDConfig.RDCodeDir + '/ML/Data/test_data/test.qdat') + def setUpQuantLoad(self): + self.d = DataUtils.BuildQuantDataSet(RDConfig.RDCodeDir + '/ML/Data/test_data/test.qdat') - def setUpGeneralLoad(self): - self.d = DataUtils.BuildDataSet(RDConfig.RDCodeDir + '/ML/Data/test_data/test.dat') + def setUpGeneralLoad(self): + self.d = DataUtils.BuildDataSet(RDConfig.RDCodeDir + '/ML/Data/test_data/test.dat') - def testQuantLoad(self): - # " testing QuantDataSet load" - self.d = DataUtils.BuildQuantDataSet(RDConfig.RDCodeDir + '/ML/Data/test_data/test.qdat') + def testQuantLoad(self): + # " testing QuantDataSet load" + self.d = DataUtils.BuildQuantDataSet(RDConfig.RDCodeDir + '/ML/Data/test_data/test.qdat') - def testQuantProps(self): - # " testing QuantDataSet Properties" - self.setUpQuantLoad() - assert self.d.GetNPts() == 5, 'nPts wrong' - assert self.d.GetNVars() == 4, 'nVars wrong' - assert self.d.GetNResults() == 1, 'nResults wrong' - assert self.d.GetVarNames() == ['foo1', 'foo2', 'foo3', 'foo4', 'res'], 'varNames wrong' - assert self.d.GetPtNames() == ['p1', 'p2', 'p3', 'p4', 'p5'], 'ptNames wrong' - assert self.d.GetNPossibleVals() == [3, 3, 2, 2, 2], 'nPossible Wrong' - assert self.d.GetQuantBounds() == [[], [], [], [], []], 'quantBounds Wrong' - assert self.d.GetResults() == [0, 0, 1, 1, 1], 'GetResults wrong' - assert self.d.GetAllData()[1] == [0, 0, 0, 1, 0], 'GetAllData wrong' - assert self.d.GetInputData()[3] == [2, 1, 0, 0], 'GetInputData wrong' - assert self.d.GetNamedData()[2] == ['p3', 1, 0, 0, 0, 1], 'GetNamedData wrong' + def testQuantProps(self): + # " testing QuantDataSet Properties" + self.setUpQuantLoad() + assert self.d.GetNPts() == 5, 'nPts wrong' + assert self.d.GetNVars() == 4, 'nVars wrong' + assert self.d.GetNResults() == 1, 'nResults wrong' + assert self.d.GetVarNames() == ['foo1', 'foo2', 'foo3', 'foo4', 'res'], 'varNames wrong' + assert self.d.GetPtNames() == ['p1', 'p2', 'p3', 'p4', 'p5'], 'ptNames wrong' + assert self.d.GetNPossibleVals() == [3, 3, 2, 2, 2], 'nPossible Wrong' + assert self.d.GetQuantBounds() == [[], [], [], [], []], 'quantBounds Wrong' + assert self.d.GetResults() == [0, 0, 1, 1, 1], 'GetResults wrong' + assert self.d.GetAllData()[1] == [0, 0, 0, 1, 0], 'GetAllData wrong' + assert self.d.GetInputData()[3] == [2, 1, 0, 0], 'GetInputData wrong' + assert self.d.GetNamedData()[2] == ['p3', 1, 0, 0, 0, 1], 'GetNamedData wrong' - def testQuantPickle(self): - # " testing QuantDataSet pickling " - self.setUpQuantLoad() - DataUtils.WritePickledData(RDConfig.RDCodeDir + '/ML/Data/test_data/testquant.qdat.pkl', self.d) - with open(RDConfig.RDCodeDir + '/ML/Data/test_data/testquant.qdat.pkl', 'rb') as f: - vNames = cPickle.load(f) - qBounds = cPickle.load(f) - ptNames = cPickle.load(f) - examples = cPickle.load(f) - d = MLData.MLQuantDataSet(examples, varNames=vNames, qBounds=qBounds, ptNames=ptNames) - assert self.d.GetNPts() == d.GetNPts(), 'nPts wrong' - assert self.d.GetNVars() == d.GetNVars(), 'nVars wrong' - assert self.d.GetNResults() == d.GetNResults(), 'nResults wrong' - assert self.d.GetVarNames() == d.GetVarNames(), 'varNames wrong' - assert self.d.GetPtNames() == d.GetPtNames(), 'ptNames wrong' - assert self.d.GetNPossibleVals() == d.GetNPossibleVals(), 'nPossible Wrong' - assert self.d.GetQuantBounds() == d.GetQuantBounds(), 'quantBounds Wrong' - assert self.d.GetResults() == d.GetResults(), 'GetResults wrong' - assert self.d.GetAllData()[1] == d.GetAllData()[1], 'GetAllData wrong' - assert self.d.GetInputData()[3] == d.GetInputData()[3], 'GetInputData wrong' - assert self.d.GetNamedData()[2] == d.GetNamedData()[2], 'GetNamedData wrong' + def testQuantPickle(self): + # " testing QuantDataSet pickling " + self.setUpQuantLoad() + DataUtils.WritePickledData( + RDConfig.RDCodeDir + '/ML/Data/test_data/testquant.qdat.pkl', self.d) + with open(RDConfig.RDCodeDir + '/ML/Data/test_data/testquant.qdat.pkl', 'rb') as f: + vNames = pickle.load(f) + qBounds = pickle.load(f) + ptNames = pickle.load(f) + examples = pickle.load(f) + d = MLData.MLQuantDataSet(examples, varNames=vNames, qBounds=qBounds, ptNames=ptNames) + assert self.d.GetNPts() == d.GetNPts(), 'nPts wrong' + assert self.d.GetNVars() == d.GetNVars(), 'nVars wrong' + assert self.d.GetNResults() == d.GetNResults(), 'nResults wrong' + assert self.d.GetVarNames() == d.GetVarNames(), 'varNames wrong' + assert self.d.GetPtNames() == d.GetPtNames(), 'ptNames wrong' + assert self.d.GetNPossibleVals() == d.GetNPossibleVals(), 'nPossible Wrong' + assert self.d.GetQuantBounds() == d.GetQuantBounds(), 'quantBounds Wrong' + assert self.d.GetResults() == d.GetResults(), 'GetResults wrong' + assert self.d.GetAllData()[1] == d.GetAllData()[1], 'GetAllData wrong' + assert self.d.GetInputData()[3] == d.GetInputData()[3], 'GetInputData wrong' + assert self.d.GetNamedData()[2] == d.GetNamedData()[2], 'GetNamedData wrong' - def testGeneralLoad(self): - # " testing DataSet load" - self.d = DataUtils.BuildDataSet(RDConfig.RDCodeDir + '/ML/Data/test_data/test.dat') + def testGeneralLoad(self): + # " testing DataSet load" + self.d = DataUtils.BuildDataSet(RDConfig.RDCodeDir + '/ML/Data/test_data/test.dat') - def testGeneralProps(self): - # " testing DataSet properties" - self.setUpGeneralLoad() - assert self.d.GetNPts() == 5, 'nPts wrong' - assert self.d.GetNVars() == 4, 'nVars wrong' - assert self.d.GetNResults() == 1, 'nResults wrong' - assert self.d.GetVarNames() == ['foo1', 'foo2', 'foo3', 'foo4', 'res'], 'varNames wrong' - assert self.d.GetPtNames() == ['p1', 'p2', 'p3', 'p4', 'p5'], 'ptNames wrong' - assert self.d.GetNPossibleVals() == [0, 6, 0, 0, 3], 'nPossible Wrong' - assert self.d.GetQuantBounds() == [[], [], [], [], [2, 4]], 'quantBounds Wrong' - assert self.d.GetResults() == [1.1, 2.1, 3.1, 4.1, 5.1], 'GetResults wrong' - assert self.d.GetAllData()[1] == ['foo', 2, 1.0, 1, 2.1], 'GetAllData wrong' - assert self.d.GetInputData()[4] == ['foo', 5, 1.1, 1], 'GetInputData wrong' - assert self.d.GetNamedData()[3] == ['p4', 'foo', 4, 1.0, 1, 4.1], 'GetNamedData wrong' - assert self.d[1] == ['p2', 'foo', 2, 1.0, 1, 2.1] - assert self.d[3] == ['p4', 'foo', 4, 1.0, 1, 4.1] - self.d[3] = self.d[1] - assert self.d[3] == ['p2', 'foo', 2, 1.0, 1, 2.1] + def testGeneralProps(self): + # " testing DataSet properties" + self.setUpGeneralLoad() + assert self.d.GetNPts() == 5, 'nPts wrong' + assert self.d.GetNVars() == 4, 'nVars wrong' + assert self.d.GetNResults() == 1, 'nResults wrong' + assert self.d.GetVarNames() == ['foo1', 'foo2', 'foo3', 'foo4', 'res'], 'varNames wrong' + assert self.d.GetPtNames() == ['p1', 'p2', 'p3', 'p4', 'p5'], 'ptNames wrong' + assert self.d.GetNPossibleVals() == [0, 6, 0, 0, 3], 'nPossible Wrong' + assert self.d.GetQuantBounds() == [[], [], [], [], [2, 4]], 'quantBounds Wrong' + assert self.d.GetResults() == [1.1, 2.1, 3.1, 4.1, 5.1], 'GetResults wrong' + assert self.d.GetAllData()[1] == ['foo', 2, 1.0, 1, 2.1], 'GetAllData wrong' + assert self.d.GetInputData()[4] == ['foo', 5, 1.1, 1], 'GetInputData wrong' + assert self.d.GetNamedData()[3] == ['p4', 'foo', 4, 1.0, 1, 4.1], 'GetNamedData wrong' + assert self.d[1] == ['p2', 'foo', 2, 1.0, 1, 2.1] + assert self.d[3] == ['p4', 'foo', 4, 1.0, 1, 4.1] + self.d[3] = self.d[1] + assert self.d[3] == ['p2', 'foo', 2, 1.0, 1, 2.1] - def testGeneralPickle(self): - # " testing DataSet pickling" - self.setUpGeneralLoad() - DataUtils.WritePickledData(RDConfig.RDCodeDir + '/ML/Data/test_data/testgeneral.dat.pkl', - self.d) - with open(RDConfig.RDCodeDir + '/ML/Data/test_data/testgeneral.dat.pkl', 'rb') as f: - vNames = cPickle.load(f) - qBounds = cPickle.load(f) - ptNames = cPickle.load(f) - examples = cPickle.load(f) - d = MLData.MLDataSet(examples, varNames=vNames, qBounds=qBounds, ptNames=ptNames) - assert self.d.GetNPts() == d.GetNPts(), 'nPts wrong' - assert self.d.GetNVars() == d.GetNVars(), 'nVars wrong' - assert self.d.GetNResults() == d.GetNResults(), 'nResults wrong' - assert self.d.GetVarNames() == d.GetVarNames(), 'varNames wrong' - assert self.d.GetPtNames() == d.GetPtNames(), 'ptNames wrong' - assert self.d.GetNPossibleVals() == d.GetNPossibleVals(), 'nPossible Wrong' - assert self.d.GetQuantBounds() == d.GetQuantBounds(), 'quantBounds Wrong' - assert self.d.GetResults() == d.GetResults(), 'GetResults wrong' - assert self.d.GetAllData()[1] == d.GetAllData()[1], 'GetAllData wrong' - assert self.d.GetInputData()[3] == d.GetInputData()[3], 'GetInputData wrong' - assert self.d.GetNamedData()[2] == d.GetNamedData()[2], 'GetNamedData wrong' + def testGeneralPickle(self): + # " testing DataSet pickling" + self.setUpGeneralLoad() + DataUtils.WritePickledData(RDConfig.RDCodeDir + '/ML/Data/test_data/testgeneral.dat.pkl', + self.d) + with open(RDConfig.RDCodeDir + '/ML/Data/test_data/testgeneral.dat.pkl', 'rb') as f: + vNames = pickle.load(f) + qBounds = pickle.load(f) + ptNames = pickle.load(f) + examples = pickle.load(f) + d = MLData.MLDataSet(examples, varNames=vNames, qBounds=qBounds, ptNames=ptNames) + assert self.d.GetNPts() == d.GetNPts(), 'nPts wrong' + assert self.d.GetNVars() == d.GetNVars(), 'nVars wrong' + assert self.d.GetNResults() == d.GetNResults(), 'nResults wrong' + assert self.d.GetVarNames() == d.GetVarNames(), 'varNames wrong' + assert self.d.GetPtNames() == d.GetPtNames(), 'ptNames wrong' + assert self.d.GetNPossibleVals() == d.GetNPossibleVals(), 'nPossible Wrong' + assert self.d.GetQuantBounds() == d.GetQuantBounds(), 'quantBounds Wrong' + assert self.d.GetResults() == d.GetResults(), 'GetResults wrong' + assert self.d.GetAllData()[1] == d.GetAllData()[1], 'GetAllData wrong' + assert self.d.GetInputData()[3] == d.GetInputData()[3], 'GetInputData wrong' + assert self.d.GetNamedData()[2] == d.GetNamedData()[2], 'GetNamedData wrong' - def test_WriteData(self): - self.setUpQuantLoad() - with contextlib.closing(StringIO()) as f: - DataUtils.WriteData(f, self.d.GetVarNames(), self.d.GetQuantBounds(), self.d.data) - s = f.getvalue() - self.assertIn('DataUtils', s) - self.assertIn('foo1', s) - self.assertIn('2 2 1 0 1', s) + def test_WriteData(self): + self.setUpQuantLoad() + with contextlib.closing(StringIO()) as f: + DataUtils.WriteData(f, self.d.GetVarNames(), self.d.GetQuantBounds(), self.d.data) + s = f.getvalue() + self.assertIn('DataUtils', s) + self.assertIn('foo1', s) + self.assertIn('2 2 1 0 1', s) - def test_CalcNPossibleUsingMap(self): - self.setUpQuantLoad() - order = list(range(5)) - self.assertEqual( - DataUtils.CalcNPossibleUsingMap(self.d.data, order, self.d.GetQuantBounds()), [3, 3, 2, 2, 2]) + def test_CalcNPossibleUsingMap(self): + self.setUpQuantLoad() + order = list(range(5)) + self.assertEqual( + DataUtils.CalcNPossibleUsingMap(self.d.data, order, self.d.GetQuantBounds()), [3, 3, 2, 2, 2]) - def test_RandomizeActivities(self): + def test_RandomizeActivities(self): - class RunDetails(object): - shuffled = False - randomized = False + class RunDetails(object): + shuffled = False + randomized = False - random.seed(0) - details = RunDetails() - self.setUpGeneralLoad() - dataSet = self.d - orgActivities = [d[-1] for d in dataSet] - DataUtils.RandomizeActivities(dataSet, shuffle=True, runDetails=details) - self.assertNotEqual(orgActivities, [d[-1] for d in dataSet]) - self.assertEqual(sorted(orgActivities), sorted([d[-1] for d in dataSet])) - self.assertTrue(details.shuffled) - self.assertFalse(details.randomized) + random.seed(0) + details = RunDetails() + self.setUpGeneralLoad() + dataSet = self.d + orgActivities = [d[-1] for d in dataSet] + DataUtils.RandomizeActivities(dataSet, shuffle=True, runDetails=details) + self.assertNotEqual(orgActivities, [d[-1] for d in dataSet]) + self.assertEqual(sorted(orgActivities), sorted([d[-1] for d in dataSet])) + self.assertTrue(details.shuffled) + self.assertFalse(details.randomized) - try: - details = RunDetails() - self.setUpGeneralLoad() - dataSet = self.d - orgActivities = [d[-1] for d in dataSet] - DataUtils.RandomizeActivities(dataSet, shuffle=False, runDetails=details) - self.assertNotEqual(orgActivities, [d[-1] for d in dataSet]) - self.assertEqual(sorted(orgActivities), sorted([d[-1] for d in dataSet])) - self.assertFalse(details.randomized) - self.assertTrue(details.shuffled) - except NameError: - # This code branch is not working. - pass + try: + details = RunDetails() + self.setUpGeneralLoad() + dataSet = self.d + orgActivities = [d[-1] for d in dataSet] + DataUtils.RandomizeActivities(dataSet, shuffle=False, runDetails=details) + self.assertNotEqual(orgActivities, [d[-1] for d in dataSet]) + self.assertEqual(sorted(orgActivities), sorted([d[-1] for d in dataSet])) + self.assertFalse(details.randomized) + self.assertTrue(details.shuffled) + except NameError: + # This code branch is not working. + pass if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/ML/Data/UnitTestQuantize.py b/rdkit/ML/Data/UnitTestQuantize.py index af59b2c84..c247bad83 100755 --- a/rdkit/ML/Data/UnitTestQuantize.py +++ b/rdkit/ML/Data/UnitTestQuantize.py @@ -7,284 +7,283 @@ import unittest from rdkit.ML.Data import Quantize -from rdkit.six.moves import map class TestCase(unittest.TestCase): - def testOneSplit1(self): - # """ simple case (clear division) """ - d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 0), (1.4, 0), (1.6, 0), (2., 1), (2.1, 1), (2.2, 1), - (2.3, 1)] - varValues, resCodes = zip(*d) - nPossibleRes = 2 - res = Quantize.FindVarQuantBound(varValues, resCodes, nPossibleRes) - target = (1.8, 0.97095) - self.assertEqual( - list(map(lambda x, y: Quantize.feq(x, y, 1e-4), res, target)), [1, 1], - 'result comparison failed: %s != %s' % (res, target)) + def testOneSplit1(self): + # """ simple case (clear division) """ + d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 0), (1.4, 0), (1.6, 0), (2., 1), (2.1, 1), (2.2, 1), + (2.3, 1)] + varValues, resCodes = zip(*d) + nPossibleRes = 2 + res = Quantize.FindVarQuantBound(varValues, resCodes, nPossibleRes) + target = (1.8, 0.97095) + self.assertEqual( + [Quantize.feq(x, y, 1e-4) for x, y in zip(res, target)], [1, 1], + 'result comparison failed: %s != %s' % (res, target)) - def testOneSplit2_noise(self): - # """ some noise """ - d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 0), (1.4, 1), (1.6, 0), (2., 1), (2.1, 1), (2.2, 1), - (2.3, 1)] - varValues, resCodes = zip(*d) - nPossibleRes = 2 - res = Quantize.FindVarQuantBound(varValues, resCodes, nPossibleRes) - target = (1.8, 0.60999) - self.assertEqual( - list(map(lambda x, y: Quantize.feq(x, y, 1e-4), res, target)), [1, 1], - 'result comparison failed: %s != %s' % (res, target)) + def testOneSplit2_noise(self): + # """ some noise """ + d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 0), (1.4, 1), (1.6, 0), (2., 1), (2.1, 1), (2.2, 1), + (2.3, 1)] + varValues, resCodes = zip(*d) + nPossibleRes = 2 + res = Quantize.FindVarQuantBound(varValues, resCodes, nPossibleRes) + target = (1.8, 0.60999) + self.assertEqual( + [Quantize.feq(x, y, 1e-4) for x, y in zip(res, target)], [1, 1], + 'result comparison failed: %s != %s' % (res, target)) - def testOneSplit3(self): - # """ optimal division not possibe """ - d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 2), (1.4, 2), (1.6, 2), (2., 2), (2.1, 1), (2.2, 1), - (2.3, 1)] - varValues, resCodes = zip(*d) - nPossibleRes = 3 - res = Quantize.FindVarQuantBound(varValues, resCodes, nPossibleRes) - target = (1.3, 0.88129) - self.assertEqual( - list(map(lambda x, y: Quantize.feq(x, y, 1e-4), res, target)), [1, 1], - 'result comparison failed: %s != %s' % (res, target)) + def testOneSplit3(self): + # """ optimal division not possibe """ + d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 2), (1.4, 2), (1.6, 2), (2., 2), (2.1, 1), (2.2, 1), + (2.3, 1)] + varValues, resCodes = zip(*d) + nPossibleRes = 3 + res = Quantize.FindVarQuantBound(varValues, resCodes, nPossibleRes) + target = (1.3, 0.88129) + self.assertEqual( + [Quantize.feq(x, y, 1e-4) for x, y in zip(res, target)], [1, 1], + 'result comparison failed: %s != %s' % (res, target)) - def testOneSplit4_duplicates(self): - # """ lots of duplicates """ - d = [(1., 0), (1.1, 0), (1.2, 0), (1.2, 1), (1.4, 0), (1.4, 0), (1.6, 0), (2., 1), (2.1, 1), - (2.1, 1), (2.1, 1), (2.1, 1), (2.2, 1), (2.3, 1)] - varValues, resCodes = zip(*d) - nPossibleRes = 2 - res = Quantize.FindVarQuantBound(varValues, resCodes, nPossibleRes) - target = (1.8, 0.68939) - self.assertEqual( - list(map(lambda x, y: Quantize.feq(x, y, 1e-4), res, target)), [1, 1], - 'result comparison failed: %s != %s' % (res, target)) + def testOneSplit4_duplicates(self): + # """ lots of duplicates """ + d = [(1., 0), (1.1, 0), (1.2, 0), (1.2, 1), (1.4, 0), (1.4, 0), (1.6, 0), (2., 1), (2.1, 1), + (2.1, 1), (2.1, 1), (2.1, 1), (2.2, 1), (2.3, 1)] + varValues, resCodes = zip(*d) + nPossibleRes = 2 + res = Quantize.FindVarQuantBound(varValues, resCodes, nPossibleRes) + target = (1.8, 0.68939) + self.assertEqual( + [Quantize.feq(x, y, 1e-4) for x, y in zip(res, target)], [1, 1], + 'result comparison failed: %s != %s' % (res, target)) - def testOneSplit5_outOfOrder(self): - # """ same as testOneSplit1 data, but out of order """ - d = [(1., 0), (1.1, 0), (2.2, 1), (1.2, 0), (1.6, 0), (1.4, 0), (2., 1), (2.1, 1), (1.4, 0), - (2.3, 1)] - varValues, resCodes = zip(*d) - nPossibleRes = 2 - res = Quantize.FindVarQuantBound(varValues, resCodes, nPossibleRes) - target = (1.8, 0.97095) - self.assertEqual( - list(map(lambda x, y: Quantize.feq(x, y, 1e-4), res, target)), [1, 1], - 'result comparison failed: %s != %s' % (res, target)) + def testOneSplit5_outOfOrder(self): + # """ same as testOneSplit1 data, but out of order """ + d = [(1., 0), (1.1, 0), (2.2, 1), (1.2, 0), (1.6, 0), (1.4, 0), (2., 1), (2.1, 1), (1.4, 0), + (2.3, 1)] + varValues, resCodes = zip(*d) + nPossibleRes = 2 + res = Quantize.FindVarQuantBound(varValues, resCodes, nPossibleRes) + target = (1.8, 0.97095) + self.assertEqual( + [Quantize.feq(x, y, 1e-4) for x, y in zip(res, target)], [1, 1], + 'result comparison failed: %s != %s' % (res, target)) - def testMultSplit1_simple_dual(self): - # """ simple dual split """ - d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 2), (1.4, 2), (1.6, 2), (2., 2), (2.1, 1), (2.1, 1), - (2.1, 1), (2.2, 1), (2.3, 1)] - varValues, resCodes = zip(*d) - nPossibleRes = 3 - res = Quantize.FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes) - target = ([1.3, 2.05], 1.55458) - self.assertEqual( - min(map(lambda x, y: Quantize.feq(x, y, 1e-4), res[0], target[0])), 1, - 'split bound comparison failed: %s != %s' % (res[0], target[0])) - self.assertTrue( - Quantize.feq(res[1], target[1], 1e-4), - 'InfoGain comparison failed: %s != %s' % (res[1], target[1])) + def testMultSplit1_simple_dual(self): + # """ simple dual split """ + d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 2), (1.4, 2), (1.6, 2), (2., 2), (2.1, 1), (2.1, 1), + (2.1, 1), (2.2, 1), (2.3, 1)] + varValues, resCodes = zip(*d) + nPossibleRes = 3 + res = Quantize.FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes) + target = ([1.3, 2.05], 1.55458) + self.assertEqual( + min([Quantize.feq(x, y, 1e-4) for x, y in zip(res[0], target[0])]), 1, + 'split bound comparison failed: %s != %s' % (res[0], target[0])) + self.assertTrue( + Quantize.feq(res[1], target[1], 1e-4), + 'InfoGain comparison failed: %s != %s' % (res[1], target[1])) - def testMultSplit2_outOfOrder(self): - # """ same test as testMultSplit1, but out of order """ - d = [(1., 0), (2.1, 1), (1.1, 0), (1.2, 0), (1.4, 2), (1.6, 2), (2., 2), (1.4, 2), (2.1, 1), - (2.2, 1), (2.1, 1), (2.3, 1)] - varValues, resCodes = zip(*d) - nPossibleRes = 3 - res = Quantize.FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes) - target = ([1.3, 2.05], 1.55458) - self.assertTrue( - Quantize.feq(res[1], target[1], 1e-4), - 'InfoGain comparison failed: %s != %s' % (res[1], target[1])) - self.assertEqual( - min(map(lambda x, y: Quantize.feq(x, y, 1e-4), res[0], target[0])), 1, - 'split bound comparison failed: %s != %s' % (res[0], target[0])) + def testMultSplit2_outOfOrder(self): + # """ same test as testMultSplit1, but out of order """ + d = [(1., 0), (2.1, 1), (1.1, 0), (1.2, 0), (1.4, 2), (1.6, 2), (2., 2), (1.4, 2), (2.1, 1), + (2.2, 1), (2.1, 1), (2.3, 1)] + varValues, resCodes = zip(*d) + nPossibleRes = 3 + res = Quantize.FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes) + target = ([1.3, 2.05], 1.55458) + self.assertTrue( + Quantize.feq(res[1], target[1], 1e-4), + 'InfoGain comparison failed: %s != %s' % (res[1], target[1])) + self.assertEqual( + min([Quantize.feq(x, y, 1e-4) for x, y in zip(res[0], target[0])]), 1, + 'split bound comparison failed: %s != %s' % (res[0], target[0])) - def testMultSplit3_4results(self): - # """ 4 possible results """ - d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 2), (1.4, 2), (1.6, 2), (2., 2), (2.1, 1), (2.1, 1), - (2.1, 1), (2.2, 1), (2.3, 1), (3.0, 3), (3.1, 3), (3.2, 3), (3.3, 3)] - varValues, resCodes = zip(*d) - nPossibleRes = 4 - res = Quantize.FindVarMultQuantBounds(varValues, 3, resCodes, nPossibleRes) - target = ([1.30, 2.05, 2.65], 1.97722) - self.assertTrue( - Quantize.feq(res[1], target[1], 1e-4), - 'InfoGain comparison failed: %s != %s' % (res[1], target[1])) - self.assertEqual( - min(map(lambda x, y: Quantize.feq(x, y, 1e-4), res[0], target[0])), 1, - 'split bound comparison failed: %s != %s' % (res[0], target[0])) + def testMultSplit3_4results(self): + # """ 4 possible results """ + d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 2), (1.4, 2), (1.6, 2), (2., 2), (2.1, 1), (2.1, 1), + (2.1, 1), (2.2, 1), (2.3, 1), (3.0, 3), (3.1, 3), (3.2, 3), (3.3, 3)] + varValues, resCodes = zip(*d) + nPossibleRes = 4 + res = Quantize.FindVarMultQuantBounds(varValues, 3, resCodes, nPossibleRes) + target = ([1.30, 2.05, 2.65], 1.97722) + self.assertTrue( + Quantize.feq(res[1], target[1], 1e-4), + 'InfoGain comparison failed: %s != %s' % (res[1], target[1])) + self.assertEqual( + min([Quantize.feq(x, y, 1e-4) for x, y in zip(res[0], target[0])]), 1, + 'split bound comparison failed: %s != %s' % (res[0], target[0])) - def testMultSplit4_dualValued_island(self): - # """ dual valued, with an island """ - d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 1), (1.4, 1), (1.6, 1), (2., 1), (2.1, 0), (2.1, 0), - (2.1, 0), (2.2, 0), (2.3, 0)] - varValues, resCodes = zip(*d) - nPossibleRes = 2 - res = Quantize.FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes) - target = ([1.3, 2.05], .91830) - self.assertTrue( - Quantize.feq(res[1], target[1], 1e-4), - 'InfoGain comparison failed: %s != %s' % (res[1], target[1])) - self.assertEqual( - min(map(lambda x, y: Quantize.feq(x, y, 1e-4), res[0], target[0])), 1, - 'split bound comparison failed: %s != %s' % (res[0], target[0])) + def testMultSplit4_dualValued_island(self): + # """ dual valued, with an island """ + d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 1), (1.4, 1), (1.6, 1), (2., 1), (2.1, 0), (2.1, 0), + (2.1, 0), (2.2, 0), (2.3, 0)] + varValues, resCodes = zip(*d) + nPossibleRes = 2 + res = Quantize.FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes) + target = ([1.3, 2.05], .91830) + self.assertTrue( + Quantize.feq(res[1], target[1], 1e-4), + 'InfoGain comparison failed: %s != %s' % (res[1], target[1])) + self.assertEqual( + min([Quantize.feq(x, y, 1e-4) for x, y in zip(res[0], target[0])]), 1, + 'split bound comparison failed: %s != %s' % (res[0], target[0])) - def testMultSplit5_dualValued_island_noisy(self): - # """ dual valued, with an island, a bit noisy """ - d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 1), (1.4, 0), (1.6, 1), (2., 1), (2.1, 0), (2.1, 0), - (2.1, 0), (2.2, 1), (2.3, 0)] - varValues, resCodes = zip(*d) - nPossibleRes = 2 - res = Quantize.FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes) - target = ([1.3, 2.05], .34707) - self.assertTrue( - Quantize.feq(res[1], target[1], 1e-4), - 'InfoGain comparison failed: %s != %s' % (res[1], target[1])) - self.assertEqual( - min(map(lambda x, y: Quantize.feq(x, y, 1e-4), res[0], target[0])), 1, - 'split bound comparison failed: %s != %s' % (res[0], target[0])) + def testMultSplit5_dualValued_island_noisy(self): + # """ dual valued, with an island, a bit noisy """ + d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 1), (1.4, 0), (1.6, 1), (2., 1), (2.1, 0), (2.1, 0), + (2.1, 0), (2.2, 1), (2.3, 0)] + varValues, resCodes = zip(*d) + nPossibleRes = 2 + res = Quantize.FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes) + target = ([1.3, 2.05], .34707) + self.assertTrue( + Quantize.feq(res[1], target[1], 1e-4), + 'InfoGain comparison failed: %s != %s' % (res[1], target[1])) + self.assertEqual( + min([Quantize.feq(x, y, 1e-4) for x, y in zip(res[0], target[0])]), 1, + 'split bound comparison failed: %s != %s' % (res[0], target[0])) - def test9NewSplits(self): - d = [(0, 0), - (1, 1), - (2, 0), ] - varValues, resCodes = zip(*d) - res = Quantize._NewPyFindStartPoints(varValues, resCodes, len(d)) - self.assertTrue(res == [1, 2], str(res)) - res = Quantize._FindStartPoints(varValues, resCodes, len(d)) - self.assertTrue(res == [1, 2], str(res)) + def test9NewSplits(self): + d = [(0, 0), + (1, 1), + (2, 0), ] + varValues, resCodes = zip(*d) + res = Quantize._NewPyFindStartPoints(varValues, resCodes, len(d)) + self.assertTrue(res == [1, 2], str(res)) + res = Quantize._FindStartPoints(varValues, resCodes, len(d)) + self.assertTrue(res == [1, 2], str(res)) - d = [(0, 1), - (1, 0), - (2, 1), ] - varValues, resCodes = zip(*d) - res = Quantize._NewPyFindStartPoints(varValues, resCodes, len(d)) - self.assertTrue(res == [1, 2], str(res)) - res = Quantize._FindStartPoints(varValues, resCodes, len(d)) - self.assertTrue(res == [1, 2], str(res)) + d = [(0, 1), + (1, 0), + (2, 1), ] + varValues, resCodes = zip(*d) + res = Quantize._NewPyFindStartPoints(varValues, resCodes, len(d)) + self.assertTrue(res == [1, 2], str(res)) + res = Quantize._FindStartPoints(varValues, resCodes, len(d)) + self.assertTrue(res == [1, 2], str(res)) - d = [(0, 0), - (0, 0), - (1, 1), - (1, 1), - (2, 0), - (2, 1), ] - varValues, resCodes = zip(*d) - res = Quantize._NewPyFindStartPoints(varValues, resCodes, len(d)) - self.assertTrue(res == [2, 4], str(res)) - res = Quantize._FindStartPoints(varValues, resCodes, len(d)) - self.assertTrue(res == [2, 4], str(res)) + d = [(0, 0), + (0, 0), + (1, 1), + (1, 1), + (2, 0), + (2, 1), ] + varValues, resCodes = zip(*d) + res = Quantize._NewPyFindStartPoints(varValues, resCodes, len(d)) + self.assertTrue(res == [2, 4], str(res)) + res = Quantize._FindStartPoints(varValues, resCodes, len(d)) + self.assertTrue(res == [2, 4], str(res)) - d = [(0, 0), - (0, 1), - (1, 1), - (1, 1), - (2, 0), - (2, 1), ] - varValues, resCodes = zip(*d) - res = Quantize._NewPyFindStartPoints(varValues, resCodes, len(d)) - self.assertTrue(res == [2, 4], str(res)) - res = Quantize._FindStartPoints(varValues, resCodes, len(d)) - self.assertTrue(res == [2, 4], str(res)) + d = [(0, 0), + (0, 1), + (1, 1), + (1, 1), + (2, 0), + (2, 1), ] + varValues, resCodes = zip(*d) + res = Quantize._NewPyFindStartPoints(varValues, resCodes, len(d)) + self.assertTrue(res == [2, 4], str(res)) + res = Quantize._FindStartPoints(varValues, resCodes, len(d)) + self.assertTrue(res == [2, 4], str(res)) - d = [(0, 0), - (0, 0), - (1, 0), - (1, 1), - (2, 0), - (2, 1), ] - varValues, resCodes = zip(*d) - res = Quantize._NewPyFindStartPoints(varValues, resCodes, len(d)) - self.assertTrue(res == [2, 4], str(res)) - res = Quantize._FindStartPoints(varValues, resCodes, len(d)) - self.assertTrue(res == [2, 4], str(res)) + d = [(0, 0), + (0, 0), + (1, 0), + (1, 1), + (2, 0), + (2, 1), ] + varValues, resCodes = zip(*d) + res = Quantize._NewPyFindStartPoints(varValues, resCodes, len(d)) + self.assertTrue(res == [2, 4], str(res)) + res = Quantize._FindStartPoints(varValues, resCodes, len(d)) + self.assertTrue(res == [2, 4], str(res)) - d = [(0, 0), - (0, 0), - (1, 0), - (1, 0), - (2, 1), - (2, 1), ] - varValues, resCodes = zip(*d) - res = Quantize._NewPyFindStartPoints(varValues, resCodes, len(d)) - self.assertTrue(res == [4], str(res)) - res = Quantize._FindStartPoints(varValues, resCodes, len(d)) - self.assertTrue(res == [4], str(res)) + d = [(0, 0), + (0, 0), + (1, 0), + (1, 0), + (2, 1), + (2, 1), ] + varValues, resCodes = zip(*d) + res = Quantize._NewPyFindStartPoints(varValues, resCodes, len(d)) + self.assertTrue(res == [4], str(res)) + res = Quantize._FindStartPoints(varValues, resCodes, len(d)) + self.assertTrue(res == [4], str(res)) - d = [(0, 0), - (0, 0), - (1, 1), - (1, 1), - (2, 1), - (2, 1), ] - varValues, resCodes = zip(*d) - res = Quantize._NewPyFindStartPoints(varValues, resCodes, len(d)) - self.assertTrue(res == [2], str(res)) - res = Quantize._FindStartPoints(varValues, resCodes, len(d)) - self.assertTrue(res == [2], str(res)) + d = [(0, 0), + (0, 0), + (1, 1), + (1, 1), + (2, 1), + (2, 1), ] + varValues, resCodes = zip(*d) + res = Quantize._NewPyFindStartPoints(varValues, resCodes, len(d)) + self.assertTrue(res == [2], str(res)) + res = Quantize._FindStartPoints(varValues, resCodes, len(d)) + self.assertTrue(res == [2], str(res)) - d = [(0, 0), - (0, 0), - (1, 0), - (1, 0), - (2, 0), - (2, 0), ] - varValues, resCodes = zip(*d) - res = Quantize._NewPyFindStartPoints(varValues, resCodes, len(d)) - self.assertTrue(res == [], str(res)) - res = Quantize._FindStartPoints(varValues, resCodes, len(d)) - self.assertTrue(res == [], str(res)) + d = [(0, 0), + (0, 0), + (1, 0), + (1, 0), + (2, 0), + (2, 0), ] + varValues, resCodes = zip(*d) + res = Quantize._NewPyFindStartPoints(varValues, resCodes, len(d)) + self.assertTrue(res == [], str(res)) + res = Quantize._FindStartPoints(varValues, resCodes, len(d)) + self.assertTrue(res == [], str(res)) - d = [(0, 0), - (0, 1), - (1, 0), - (1, 1), - (2, 0), - (2, 0), ] - varValues, resCodes = zip(*d) - res = Quantize._NewPyFindStartPoints(varValues, resCodes, len(d)) - self.assertTrue(res == [2, 4], str(res)) - res = Quantize._FindStartPoints(varValues, resCodes, len(d)) - self.assertTrue(res == [2, 4], str(res)) + d = [(0, 0), + (0, 1), + (1, 0), + (1, 1), + (2, 0), + (2, 0), ] + varValues, resCodes = zip(*d) + res = Quantize._NewPyFindStartPoints(varValues, resCodes, len(d)) + self.assertTrue(res == [2, 4], str(res)) + res = Quantize._FindStartPoints(varValues, resCodes, len(d)) + self.assertTrue(res == [2, 4], str(res)) - d = [(1, 0), - (2, 1), - (2, 1), - (3, 1), - (3, 1), - (3, 1), - (4, 0), - (4, 1), - (4, 1), ] - varValues, resCodes = zip(*d) - res = Quantize._NewPyFindStartPoints(varValues, resCodes, len(d)) - self.assertTrue(res == [1, 6], str(res)) - res = Quantize._FindStartPoints(varValues, resCodes, len(d)) - self.assertTrue(res == [1, 6], str(res)) + d = [(1, 0), + (2, 1), + (2, 1), + (3, 1), + (3, 1), + (3, 1), + (4, 0), + (4, 1), + (4, 1), ] + varValues, resCodes = zip(*d) + res = Quantize._NewPyFindStartPoints(varValues, resCodes, len(d)) + self.assertTrue(res == [1, 6], str(res)) + res = Quantize._FindStartPoints(varValues, resCodes, len(d)) + self.assertTrue(res == [1, 6], str(res)) - d = [(1, 1.65175902843, 0), (2, 1.89935600758, 0), (3, 1.89935600758, 1), (4, 1.89935600758, 1), - (5, 2.7561609745, 1), (6, 2.7561609745, 1), (7, 2.7561609745, 1), (8, 2.7561609745, 1), - (9, 3.53454303741, 1), (10, 3.53454303741, 1), (11, 3.53454303741, 1), - (12, 3.53454303741, 1), (13, 3.53454303741, 1)] + d = [(1, 1.65175902843, 0), (2, 1.89935600758, 0), (3, 1.89935600758, 1), (4, 1.89935600758, 1), + (5, 2.7561609745, 1), (6, 2.7561609745, 1), (7, 2.7561609745, 1), (8, 2.7561609745, 1), + (9, 3.53454303741, 1), (10, 3.53454303741, 1), (11, 3.53454303741, 1), + (12, 3.53454303741, 1), (13, 3.53454303741, 1)] - _, varValues, resCodes = zip(*d) - res = Quantize._NewPyFindStartPoints(varValues, resCodes, len(d)) - self.assertTrue(res == [1, 4], str(res)) - res = Quantize._FindStartPoints(varValues, resCodes, len(d)) - self.assertTrue(res == [1, 4], str(res)) + _, varValues, resCodes = zip(*d) + res = Quantize._NewPyFindStartPoints(varValues, resCodes, len(d)) + self.assertTrue(res == [1, 4], str(res)) + res = Quantize._FindStartPoints(varValues, resCodes, len(d)) + self.assertTrue(res == [1, 4], str(res)) - def testGithubIssue18(self): - d = [0, 1, 2, 3, 4] - a = [0, 0, 1, 1, 1] - _ = Quantize.FindVarMultQuantBounds(d, 1, a, 2) + def testGithubIssue18(self): + d = [0, 1, 2, 3, 4] + a = [0, 0, 1, 1, 1] + _ = Quantize.FindVarMultQuantBounds(d, 1, a, 2) - d2 = [(x, ) for x in d] - self.assertRaises(ValueError, lambda: Quantize.FindVarMultQuantBounds(d2, 1, a, 2)) - self.assertRaises(ValueError, lambda: Quantize._FindStartPoints(d2, a, len(d2))) + d2 = [(x, ) for x in d] + self.assertRaises(ValueError, lambda: Quantize.FindVarMultQuantBounds(d2, 1, a, 2)) + self.assertRaises(ValueError, lambda: Quantize._FindStartPoints(d2, a, len(d2))) if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/ML/DecTree/BuildQuantTree.py b/rdkit/ML/DecTree/BuildQuantTree.py index 856b1e435..6d583e794 100755 --- a/rdkit/ML/DecTree/BuildQuantTree.py +++ b/rdkit/ML/DecTree/BuildQuantTree.py @@ -6,357 +6,357 @@ """ """ -from __future__ import print_function + import numpy import random from rdkit.ML.DecTree import QuantTree, ID3 from rdkit.ML.InfoTheory import entropy from rdkit.ML.Data import Quantize -from rdkit.six.moves import range def FindBest(resCodes, examples, nBoundsPerVar, nPossibleRes, nPossibleVals, attrs, exIndices=None, **kwargs): - bestGain = -1e6 - best = -1 - bestBounds = [] + bestGain = -1e6 + best = -1 + bestBounds = [] - if exIndices is None: - exIndices = list(range(len(examples))) + if exIndices is None: + exIndices = list(range(len(examples))) - if not len(exIndices): + if not len(exIndices): + return best, bestGain, bestBounds + + nToTake = kwargs.get('randomDescriptors', 0) + if nToTake > 0: + nAttrs = len(attrs) + if nToTake < nAttrs: + ids = list(range(nAttrs)) + random.shuffle(ids, random=random.random) + tmp = [attrs[x] for x in ids[:nToTake]] + attrs = tmp + + for var in attrs: + nBounds = nBoundsPerVar[var] + if nBounds > 0: + # vTable = map(lambda x,z=var:x[z],examples) + try: + vTable = [examples[x][var] for x in exIndices] + except IndexError: + print('index error retrieving variable: %d' % var) + raise + qBounds, gainHere = Quantize.FindVarMultQuantBounds( + vTable, nBounds, resCodes, nPossibleRes) + # print('\tvar:',var,qBounds,gainHere) + elif nBounds == 0: + vTable = ID3.GenVarTable((examples[x] for x in exIndices), nPossibleVals, [var])[0] + gainHere = entropy.InfoGain(vTable) + qBounds = [] + else: + gainHere = -1e6 + qBounds = [] + if gainHere > bestGain: + bestGain = gainHere + bestBounds = qBounds + best = var + elif bestGain == gainHere: + if len(qBounds) < len(bestBounds): + best = var + bestBounds = qBounds + if best == -1: + print('best unaltered') + print('\tattrs:', attrs) + print('\tnBounds:', numpy.take(nBoundsPerVar, attrs)) + print('\texamples:') + for example in (examples[x] for x in exIndices): + print('\t\t', example) + + if 0: + print('BEST:', len(exIndices), best, bestGain, bestBounds) + if (len(exIndices) < 10): + print(len(exIndices), len(resCodes), len(examples)) + exs = [examples[x] for x in exIndices] + vals = [x[best] for x in exs] + sortIdx = numpy.argsort(vals) + sortVals = [exs[x] for x in sortIdx] + sortResults = [resCodes[x] for x in sortIdx] + for i in range(len(vals)): + print(' ', i, ['%.4f' % x for x in sortVals[i][1:-1]], sortResults[i]) return best, bestGain, bestBounds - nToTake = kwargs.get('randomDescriptors', 0) - if nToTake > 0: - nAttrs = len(attrs) - if nToTake < nAttrs: - ids = list(range(nAttrs)) - random.shuffle(ids, random=random.random) - tmp = [attrs[x] for x in ids[:nToTake]] - attrs = tmp - - for var in attrs: - nBounds = nBoundsPerVar[var] - if nBounds > 0: - # vTable = map(lambda x,z=var:x[z],examples) - try: - vTable = [examples[x][var] for x in exIndices] - except IndexError: - print('index error retrieving variable: %d' % var) - raise - qBounds, gainHere = Quantize.FindVarMultQuantBounds(vTable, nBounds, resCodes, nPossibleRes) - # print('\tvar:',var,qBounds,gainHere) - elif nBounds == 0: - vTable = ID3.GenVarTable((examples[x] for x in exIndices), nPossibleVals, [var])[0] - gainHere = entropy.InfoGain(vTable) - qBounds = [] - else: - gainHere = -1e6 - qBounds = [] - if gainHere > bestGain: - bestGain = gainHere - bestBounds = qBounds - best = var - elif bestGain == gainHere: - if len(qBounds) < len(bestBounds): - best = var - bestBounds = qBounds - if best == -1: - print('best unaltered') - print('\tattrs:', attrs) - print('\tnBounds:', numpy.take(nBoundsPerVar, attrs)) - print('\texamples:') - for example in (examples[x] for x in exIndices): - print('\t\t', example) - - if 0: - print('BEST:', len(exIndices), best, bestGain, bestBounds) - if (len(exIndices) < 10): - print(len(exIndices), len(resCodes), len(examples)) - exs = [examples[x] for x in exIndices] - vals = [x[best] for x in exs] - sortIdx = numpy.argsort(vals) - sortVals = [exs[x] for x in sortIdx] - sortResults = [resCodes[x] for x in sortIdx] - for i in range(len(vals)): - print(' ', i, ['%.4f' % x for x in sortVals[i][1:-1]], sortResults[i]) - return best, bestGain, bestBounds - def BuildQuantTree(examples, target, attrs, nPossibleVals, nBoundsPerVar, depth=0, maxDepth=-1, exIndices=None, **kwargs): - """ - **Arguments** + """ + **Arguments** - - examples: a list of lists (nInstances x nVariables+1) of variable - values + instance values + - examples: a list of lists (nInstances x nVariables+1) of variable + values + instance values - - target: an int + - target: an int - - attrs: a list of ints indicating which variables can be used in the tree + - attrs: a list of ints indicating which variables can be used in the tree - - nPossibleVals: a list containing the number of possible values of - every variable. + - nPossibleVals: a list containing the number of possible values of + every variable. - - nBoundsPerVar: the number of bounds to include for each variable + - nBoundsPerVar: the number of bounds to include for each variable - - depth: (optional) the current depth in the tree + - depth: (optional) the current depth in the tree - - maxDepth: (optional) the maximum depth to which the tree - will be grown - **Returns** + - maxDepth: (optional) the maximum depth to which the tree + will be grown + **Returns** - a QuantTree.QuantTreeNode with the decision tree + a QuantTree.QuantTreeNode with the decision tree - **NOTE:** This code cannot bootstrap (start from nothing...) - use _QuantTreeBoot_ (below) for that. - """ - tree = QuantTree.QuantTreeNode(None, 'node') - tree.SetData(-666) - nPossibleRes = nPossibleVals[-1] + **NOTE:** This code cannot bootstrap (start from nothing...) + use _QuantTreeBoot_ (below) for that. + """ + tree = QuantTree.QuantTreeNode(None, 'node') + tree.SetData(-666) + nPossibleRes = nPossibleVals[-1] - if exIndices is None: - exIndices = list(range(len(examples))) + if exIndices is None: + exIndices = list(range(len(examples))) - # counts of each result code: - resCodes = [int(x[-1]) for x in (examples[y] for y in exIndices)] - counts = [0] * nPossibleRes - for res in resCodes: - counts[res] += 1 - nzCounts = numpy.nonzero(counts)[0] + # counts of each result code: + resCodes = [int(x[-1]) for x in (examples[y] for y in exIndices)] + counts = [0] * nPossibleRes + for res in resCodes: + counts[res] += 1 + nzCounts = numpy.nonzero(counts)[0] - if len(nzCounts) == 1: - # bottomed out because there is only one result code left - # with any counts (i.e. there's only one type of example - # left... this is GOOD!). - res = nzCounts[0] - tree.SetLabel(res) - tree.SetName(str(res)) - tree.SetTerminal(1) - elif len(attrs) == 0 or (maxDepth >= 0 and depth > maxDepth): - # Bottomed out: no variables left or max depth hit - # We don't really know what to do here, so - # use the heuristic of picking the most prevalent - # result - v = numpy.argmax(counts) - tree.SetLabel(v) - tree.SetName('%d?' % v) - tree.SetTerminal(1) - else: - # find the variable which gives us the largest information gain - best, _, bestBounds = FindBest(resCodes, examples, nBoundsPerVar, nPossibleRes, nPossibleVals, - attrs, exIndices=exIndices, **kwargs) - # remove that variable from the lists of possible variables - nextAttrs = attrs[:] - if not kwargs.get('recycleVars', 0): - nextAttrs.remove(best) - - # set some info at this node - tree.SetName('Var: %d' % (best)) - tree.SetLabel(best) - tree.SetQuantBounds(bestBounds) - tree.SetTerminal(0) - - # loop over possible values of the new variable and - # build a subtree for each one - indices = exIndices[:] - if len(bestBounds) > 0: - for bound in bestBounds: - nextExamples = [] - for index in indices[:]: - ex = examples[index] - if ex[best] < bound: - nextExamples.append(index) - indices.remove(index) - - if len(nextExamples) == 0: - # this particular value of the variable has no examples, - # so there's not much sense in recursing. - # This can (and does) happen. - v = numpy.argmax(counts) - tree.AddChild('%d' % v, label=v, data=0.0, isTerminal=1) - else: - # recurse - tree.AddChildNode( - BuildQuantTree(examples, best, nextAttrs, nPossibleVals, nBoundsPerVar, depth=depth + 1, - maxDepth=maxDepth, exIndices=nextExamples, **kwargs)) - # add the last points remaining - nextExamples = [] - for index in indices: - nextExamples.append(index) - if len(nextExamples) == 0: + if len(nzCounts) == 1: + # bottomed out because there is only one result code left + # with any counts (i.e. there's only one type of example + # left... this is GOOD!). + res = nzCounts[0] + tree.SetLabel(res) + tree.SetName(str(res)) + tree.SetTerminal(1) + elif len(attrs) == 0 or (maxDepth >= 0 and depth > maxDepth): + # Bottomed out: no variables left or max depth hit + # We don't really know what to do here, so + # use the heuristic of picking the most prevalent + # result v = numpy.argmax(counts) - tree.AddChild('%d' % v, label=v, data=0.0, isTerminal=1) - else: - tree.AddChildNode( - BuildQuantTree(examples, best, nextAttrs, nPossibleVals, nBoundsPerVar, depth=depth + 1, - maxDepth=maxDepth, exIndices=nextExamples, **kwargs)) + tree.SetLabel(v) + tree.SetName('%d?' % v) + tree.SetTerminal(1) else: - for val in range(nPossibleVals[best]): - nextExamples = [] - for idx in exIndices: - if examples[idx][best] == val: - nextExamples.append(idx) - if len(nextExamples) == 0: - v = numpy.argmax(counts) - tree.AddChild('%d' % v, label=v, data=0.0, isTerminal=1) + # find the variable which gives us the largest information gain + best, _, bestBounds = FindBest(resCodes, examples, nBoundsPerVar, nPossibleRes, nPossibleVals, + attrs, exIndices=exIndices, **kwargs) + # remove that variable from the lists of possible variables + nextAttrs = attrs[:] + if not kwargs.get('recycleVars', 0): + nextAttrs.remove(best) + + # set some info at this node + tree.SetName('Var: %d' % (best)) + tree.SetLabel(best) + tree.SetQuantBounds(bestBounds) + tree.SetTerminal(0) + + # loop over possible values of the new variable and + # build a subtree for each one + indices = exIndices[:] + if len(bestBounds) > 0: + for bound in bestBounds: + nextExamples = [] + for index in indices[:]: + ex = examples[index] + if ex[best] < bound: + nextExamples.append(index) + indices.remove(index) + + if len(nextExamples) == 0: + # this particular value of the variable has no examples, + # so there's not much sense in recursing. + # This can (and does) happen. + v = numpy.argmax(counts) + tree.AddChild('%d' % v, label=v, data=0.0, isTerminal=1) + else: + # recurse + tree.AddChildNode( + BuildQuantTree(examples, best, nextAttrs, nPossibleVals, nBoundsPerVar, depth=depth + 1, + maxDepth=maxDepth, exIndices=nextExamples, **kwargs)) + # add the last points remaining + nextExamples = [] + for index in indices: + nextExamples.append(index) + if len(nextExamples) == 0: + v = numpy.argmax(counts) + tree.AddChild('%d' % v, label=v, data=0.0, isTerminal=1) + else: + tree.AddChildNode( + BuildQuantTree(examples, best, nextAttrs, nPossibleVals, nBoundsPerVar, depth=depth + 1, + maxDepth=maxDepth, exIndices=nextExamples, **kwargs)) else: - tree.AddChildNode( - BuildQuantTree(examples, best, nextAttrs, nPossibleVals, nBoundsPerVar, depth=depth + 1, - maxDepth=maxDepth, exIndices=nextExamples, **kwargs)) - return tree + for val in range(nPossibleVals[best]): + nextExamples = [] + for idx in exIndices: + if examples[idx][best] == val: + nextExamples.append(idx) + if len(nextExamples) == 0: + v = numpy.argmax(counts) + tree.AddChild('%d' % v, label=v, data=0.0, isTerminal=1) + else: + tree.AddChildNode( + BuildQuantTree(examples, best, nextAttrs, nPossibleVals, nBoundsPerVar, depth=depth + 1, + maxDepth=maxDepth, exIndices=nextExamples, **kwargs)) + return tree def QuantTreeBoot(examples, attrs, nPossibleVals, nBoundsPerVar, initialVar=None, maxDepth=-1, **kwargs): - """ Bootstrapping code for the QuantTree + """ Bootstrapping code for the QuantTree - If _initialVar_ is not set, the algorithm will automatically - choose the first variable in the tree (the standard greedy - approach). Otherwise, _initialVar_ will be used as the first - split. + If _initialVar_ is not set, the algorithm will automatically + choose the first variable in the tree (the standard greedy + approach). Otherwise, _initialVar_ will be used as the first + split. - """ - attrs = list(attrs) - for i in range(len(nBoundsPerVar)): - if nBoundsPerVar[i] == -1 and i in attrs: - attrs.remove(i) + """ + attrs = list(attrs) + for i in range(len(nBoundsPerVar)): + if nBoundsPerVar[i] == -1 and i in attrs: + attrs.remove(i) - tree = QuantTree.QuantTreeNode(None, 'node') - nPossibleRes = nPossibleVals[-1] - tree._nResultCodes = nPossibleRes + tree = QuantTree.QuantTreeNode(None, 'node') + nPossibleRes = nPossibleVals[-1] + tree._nResultCodes = nPossibleRes - resCodes = [int(x[-1]) for x in examples] - counts = [0] * nPossibleRes - for res in resCodes: - counts[res] += 1 - if initialVar is None: - best, gainHere, qBounds = FindBest(resCodes, examples, nBoundsPerVar, nPossibleRes, - nPossibleVals, attrs, **kwargs) - else: - best = initialVar - if nBoundsPerVar[best] > 0: - vTable = map(lambda x, z=best: x[z], examples) - qBounds, gainHere = Quantize.FindVarMultQuantBounds(vTable, nBoundsPerVar[best], resCodes, - nPossibleRes) - elif nBoundsPerVar[best] == 0: - vTable = ID3.GenVarTable(examples, nPossibleVals, [best])[0] - gainHere = entropy.InfoGain(vTable) - qBounds = [] + resCodes = [int(x[-1]) for x in examples] + counts = [0] * nPossibleRes + for res in resCodes: + counts[res] += 1 + if initialVar is None: + best, gainHere, qBounds = FindBest(resCodes, examples, nBoundsPerVar, nPossibleRes, + nPossibleVals, attrs, **kwargs) else: - gainHere = -1e6 - qBounds = [] + best = initialVar + if nBoundsPerVar[best] > 0: + vTable = map(lambda x, z=best: x[z], examples) + qBounds, gainHere = Quantize.FindVarMultQuantBounds(vTable, nBoundsPerVar[best], resCodes, + nPossibleRes) + elif nBoundsPerVar[best] == 0: + vTable = ID3.GenVarTable(examples, nPossibleVals, [best])[0] + gainHere = entropy.InfoGain(vTable) + qBounds = [] + else: + gainHere = -1e6 + qBounds = [] - tree.SetName('Var: %d' % (best)) - tree.SetData(gainHere) - tree.SetLabel(best) - tree.SetTerminal(0) - tree.SetQuantBounds(qBounds) - nextAttrs = list(attrs) - if not kwargs.get('recycleVars', 0): - nextAttrs.remove(best) + tree.SetName('Var: %d' % (best)) + tree.SetData(gainHere) + tree.SetLabel(best) + tree.SetTerminal(0) + tree.SetQuantBounds(qBounds) + nextAttrs = list(attrs) + if not kwargs.get('recycleVars', 0): + nextAttrs.remove(best) - indices = list(range(len(examples))) - if len(qBounds) > 0: - for bound in qBounds: - nextExamples = [] - for index in list(indices): - ex = examples[index] - if ex[best] < bound: - nextExamples.append(ex) - indices.remove(index) + indices = list(range(len(examples))) + if len(qBounds) > 0: + for bound in qBounds: + nextExamples = [] + for index in list(indices): + ex = examples[index] + if ex[best] < bound: + nextExamples.append(ex) + indices.remove(index) - if len(nextExamples): - tree.AddChildNode( - BuildQuantTree(nextExamples, best, nextAttrs, nPossibleVals, nBoundsPerVar, depth=1, - maxDepth=maxDepth, **kwargs)) - else: - v = numpy.argmax(counts) - tree.AddChild('%d??' % (v), label=v, data=0.0, isTerminal=1) - # add the last points remaining - nextExamples = [] - for index in indices: - nextExamples.append(examples[index]) - if len(nextExamples) != 0: - tree.AddChildNode( - BuildQuantTree(nextExamples, best, nextAttrs, nPossibleVals, nBoundsPerVar, depth=1, - maxDepth=maxDepth, **kwargs)) + if len(nextExamples): + tree.AddChildNode( + BuildQuantTree(nextExamples, best, nextAttrs, nPossibleVals, nBoundsPerVar, depth=1, + maxDepth=maxDepth, **kwargs)) + else: + v = numpy.argmax(counts) + tree.AddChild('%d??' % (v), label=v, data=0.0, isTerminal=1) + # add the last points remaining + nextExamples = [] + for index in indices: + nextExamples.append(examples[index]) + if len(nextExamples) != 0: + tree.AddChildNode( + BuildQuantTree(nextExamples, best, nextAttrs, nPossibleVals, nBoundsPerVar, depth=1, + maxDepth=maxDepth, **kwargs)) + else: + v = numpy.argmax(counts) + tree.AddChild('%d??' % (v), label=v, data=0.0, isTerminal=1) else: - v = numpy.argmax(counts) - tree.AddChild('%d??' % (v), label=v, data=0.0, isTerminal=1) - else: - for val in range(nPossibleVals[best]): - nextExamples = [] - for example in examples: - if example[best] == val: - nextExamples.append(example) - if len(nextExamples) != 0: - tree.AddChildNode( - BuildQuantTree(nextExamples, best, nextAttrs, nPossibleVals, nBoundsPerVar, depth=1, - maxDepth=maxDepth, **kwargs)) - else: - v = numpy.argmax(counts) - tree.AddChild('%d??' % (v), label=v, data=0.0, isTerminal=1) - return tree + for val in range(nPossibleVals[best]): + nextExamples = [] + for example in examples: + if example[best] == val: + nextExamples.append(example) + if len(nextExamples) != 0: + tree.AddChildNode( + BuildQuantTree(nextExamples, best, nextAttrs, nPossibleVals, nBoundsPerVar, depth=1, + maxDepth=maxDepth, **kwargs)) + else: + v = numpy.argmax(counts) + tree.AddChild('%d??' % (v), label=v, data=0.0, isTerminal=1) + return tree def TestTree(): - """ testing code for named trees + """ testing code for named trees - """ - examples1 = [['p1', 0, 1, 0, 0], ['p2', 0, 0, 0, 1], ['p3', 0, 0, 1, 2], ['p4', 0, 1, 1, 2], - ['p5', 1, 0, 0, 2], ['p6', 1, 0, 1, 2], ['p7', 1, 1, 0, 2], ['p8', 1, 1, 1, 0]] - attrs = list(range(1, len(examples1[0]) - 1)) - nPossibleVals = [0, 2, 2, 2, 3] - t1 = ID3.ID3Boot(examples1, attrs, nPossibleVals, maxDepth=1) - t1.Print() + """ + examples1 = [['p1', 0, 1, 0, 0], ['p2', 0, 0, 0, 1], ['p3', 0, 0, 1, 2], ['p4', 0, 1, 1, 2], + ['p5', 1, 0, 0, 2], ['p6', 1, 0, 1, 2], ['p7', 1, 1, 0, 2], ['p8', 1, 1, 1, 0]] + attrs = list(range(1, len(examples1[0]) - 1)) + nPossibleVals = [0, 2, 2, 2, 3] + t1 = ID3.ID3Boot(examples1, attrs, nPossibleVals, maxDepth=1) + t1.Print() def TestQuantTree(): # pragma: nocover - """ Testing code for named trees + """ Testing code for named trees - The created pkl file is required by the unit test code. - """ - examples1 = [['p1', 0, 1, 0.1, 0], ['p2', 0, 0, 0.1, 1], ['p3', 0, 0, 1.1, 2], - ['p4', 0, 1, 1.1, 2], ['p5', 1, 0, 0.1, 2], ['p6', 1, 0, 1.1, 2], - ['p7', 1, 1, 0.1, 2], ['p8', 1, 1, 1.1, 0]] - attrs = list(range(1, len(examples1[0]) - 1)) - nPossibleVals = [0, 2, 2, 0, 3] - boundsPerVar = [0, 0, 0, 1, 0] + The created pkl file is required by the unit test code. + """ + examples1 = [['p1', 0, 1, 0.1, 0], ['p2', 0, 0, 0.1, 1], ['p3', 0, 0, 1.1, 2], + ['p4', 0, 1, 1.1, 2], ['p5', 1, 0, 0.1, 2], ['p6', 1, 0, 1.1, 2], + ['p7', 1, 1, 0.1, 2], ['p8', 1, 1, 1.1, 0]] + attrs = list(range(1, len(examples1[0]) - 1)) + nPossibleVals = [0, 2, 2, 0, 3] + boundsPerVar = [0, 0, 0, 1, 0] - print('base') - t1 = QuantTreeBoot(examples1, attrs, nPossibleVals, boundsPerVar) - t1.Pickle('test_data/QuantTree1.pkl') - t1.Print() + print('base') + t1 = QuantTreeBoot(examples1, attrs, nPossibleVals, boundsPerVar) + t1.Pickle('test_data/QuantTree1.pkl') + t1.Print() - print('depth limit') - t1 = QuantTreeBoot(examples1, attrs, nPossibleVals, boundsPerVar, maxDepth=1) - t1.Pickle('test_data/QuantTree1.pkl') - t1.Print() + print('depth limit') + t1 = QuantTreeBoot(examples1, attrs, nPossibleVals, boundsPerVar, maxDepth=1) + t1.Pickle('test_data/QuantTree1.pkl') + t1.Print() def TestQuantTree2(): # pragma: nocover - """ testing code for named trees + """ testing code for named trees - The created pkl file is required by the unit test code. - """ - examples1 = [['p1', 0.1, 1, 0.1, 0], ['p2', 0.1, 0, 0.1, 1], ['p3', 0.1, 0, 1.1, 2], - ['p4', 0.1, 1, 1.1, 2], ['p5', 1.1, 0, 0.1, 2], ['p6', 1.1, 0, 1.1, 2], - ['p7', 1.1, 1, 0.1, 2], ['p8', 1.1, 1, 1.1, 0]] - attrs = list(range(1, len(examples1[0]) - 1)) - nPossibleVals = [0, 0, 2, 0, 3] - boundsPerVar = [0, 1, 0, 1, 0] + The created pkl file is required by the unit test code. + """ + examples1 = [['p1', 0.1, 1, 0.1, 0], ['p2', 0.1, 0, 0.1, 1], ['p3', 0.1, 0, 1.1, 2], + ['p4', 0.1, 1, 1.1, 2], ['p5', 1.1, 0, 0.1, 2], ['p6', 1.1, 0, 1.1, 2], + ['p7', 1.1, 1, 0.1, 2], ['p8', 1.1, 1, 1.1, 0]] + attrs = list(range(1, len(examples1[0]) - 1)) + nPossibleVals = [0, 0, 2, 0, 3] + boundsPerVar = [0, 1, 0, 1, 0] - t1 = QuantTreeBoot(examples1, attrs, nPossibleVals, boundsPerVar) - t1.Print() - t1.Pickle('test_data/QuantTree2.pkl') + t1 = QuantTreeBoot(examples1, attrs, nPossibleVals, boundsPerVar) + t1.Print() + t1.Pickle('test_data/QuantTree2.pkl') - for example in examples1: - print(example, t1.ClassifyExample(example)) + for example in examples1: + print(example, t1.ClassifyExample(example)) if __name__ == "__main__": # pragma: nocover - TestTree() - TestQuantTree() - # TestQuantTree2() + TestTree() + TestQuantTree() + # TestQuantTree2() diff --git a/rdkit/ML/DecTree/BuildSigTree.py b/rdkit/ML/DecTree/BuildSigTree.py index fb84d2945..1a25c835c 100755 --- a/rdkit/ML/DecTree/BuildSigTree.py +++ b/rdkit/ML/DecTree/BuildSigTree.py @@ -6,7 +6,7 @@ """ """ -from __future__ import print_function + import copy import random diff --git a/rdkit/ML/DecTree/CrossValidate.py b/rdkit/ML/DecTree/CrossValidate.py index 5675615bc..334012477 100755 --- a/rdkit/ML/DecTree/CrossValidate.py +++ b/rdkit/ML/DecTree/CrossValidate.py @@ -8,7 +8,7 @@ cross validation == evaluating the accuracy of a tree. """ -from __future__ import print_function + import numpy diff --git a/rdkit/ML/DecTree/Forest.py b/rdkit/ML/DecTree/Forest.py index c242ae92f..5cf65802a 100755 --- a/rdkit/ML/DecTree/Forest.py +++ b/rdkit/ML/DecTree/Forest.py @@ -6,12 +6,12 @@ **NOTE** This code should be obsolete now that ML.Composite.Composite is up and running. """ -from __future__ import print_function + import numpy from rdkit.ML.DecTree import CrossValidate, PruneTree -from rdkit.six.moves import cPickle +import pickle class Forest(object): @@ -139,7 +139,7 @@ class Forest(object): """ pFile = open(fileName, 'wb+') - cPickle.dump(self, pFile, 1) + pickle.dump(self, pFile, 1) pFile.close() def AddTree(self, tree, error): diff --git a/rdkit/ML/DecTree/PruneTree.py b/rdkit/ML/DecTree/PruneTree.py index 6c47c229c..9d20e53a2 100755 --- a/rdkit/ML/DecTree/PruneTree.py +++ b/rdkit/ML/DecTree/PruneTree.py @@ -4,303 +4,303 @@ """ Contains functionality for doing tree pruning """ -from __future__ import print_function + import copy import numpy from rdkit.ML.DecTree import CrossValidate, DecTree -from rdkit.six.moves import range _verbose = 0 def MaxCount(examples): - """ given a set of examples, returns the most common result code - - **Arguments** - - examples: a list of examples to be counted - - **Returns** - - the most common result code - - """ - resList = [x[-1] for x in examples] - maxVal = max(resList) - counts = [None] * (maxVal + 1) - for i in range(maxVal + 1): - counts[i] = sum([x == i for x in resList]) - - return numpy.argmax(counts) - - -def _GetLocalError(node): - nWrong = 0 - for example in node.GetExamples(): - pred = node.ClassifyExample(example, appendExamples=0) - if pred != example[-1]: - nWrong += 1 - # if _verbose: print('------------------>MISS:',example,pred) - return nWrong - - -def _Pruner(node, level=0): - """Recursively finds and removes the nodes whose removals improve classification + """ given a set of examples, returns the most common result code **Arguments** - - node: the tree to be pruned. The pruning data should already be contained - within node (i.e. node.GetExamples() should return the pruning data) - - - level: (optional) the level of recursion, used only in _verbose printing - + examples: a list of examples to be counted **Returns** - the pruned version of node + the most common result code + + """ + resList = [x[-1] for x in examples] + maxVal = max(resList) + counts = [None] * (maxVal + 1) + for i in range(maxVal + 1): + counts[i] = sum([x == i for x in resList]) + + return numpy.argmax(counts) - **Notes** +def _GetLocalError(node): + nWrong = 0 + for example in node.GetExamples(): + pred = node.ClassifyExample(example, appendExamples=0) + if pred != example[-1]: + nWrong += 1 + # if _verbose: print('------------------>MISS:',example,pred) + return nWrong - - This uses a greedy algorithm which basically does a DFS traversal of the tree, - removing nodes whenever possible. - - If removing a node does not affect the accuracy, it *will be* removed. We - favor smaller trees. +def _Pruner(node, level=0): + """Recursively finds and removes the nodes whose removals improve classification - """ - if _verbose: - print(' ' * level, '<%d> ' % level, '>>> Pruner') - children = node.GetChildren()[:] + **Arguments** - bestTree = copy.deepcopy(node) - bestErr = 1e6 - # - # Loop over the children of this node, removing them when doing so - # either improves the local error or leaves it unchanged (we're - # introducing a bias for simpler trees). - # - for i in range(len(children)): - child = children[i] - examples = child.GetExamples() + - node: the tree to be pruned. The pruning data should already be contained + within node (i.e. node.GetExamples() should return the pruning data) + + - level: (optional) the level of recursion, used only in _verbose printing + + + **Returns** + + the pruned version of node + + + **Notes** + + - This uses a greedy algorithm which basically does a DFS traversal of the tree, + removing nodes whenever possible. + + - If removing a node does not affect the accuracy, it *will be* removed. We + favor smaller trees. + + """ if _verbose: - print(' ' * level, '<%d> ' % level, ' Child:', i, child.GetLabel()) - bestTree.Print() - print() - if len(examples): - if _verbose: - print(' ' * level, '<%d> ' % level, ' Examples', len(examples)) - if child.GetTerminal(): - if _verbose: - print(' ' * level, '<%d> ' % level, ' Terminal') - continue + print(' ' * level, '<%d> ' % level, '>>> Pruner') + children = node.GetChildren()[:] - if _verbose: - print(' ' * level, '<%d> ' % level, ' Nonterminal') + bestTree = copy.deepcopy(node) + bestErr = 1e6 + # + # Loop over the children of this node, removing them when doing so + # either improves the local error or leaves it unchanged (we're + # introducing a bias for simpler trees). + # + for i in range(len(children)): + child = children[i] + examples = child.GetExamples() + if _verbose: + print(' ' * level, '<%d> ' % level, ' Child:', i, child.GetLabel()) + bestTree.Print() + print() + if len(examples): + if _verbose: + print(' ' * level, '<%d> ' % level, ' Examples', len(examples)) + if child.GetTerminal(): + if _verbose: + print(' ' * level, '<%d> ' % level, ' Terminal') + continue - workTree = copy.deepcopy(bestTree) - # - # First recurse on the child (try removing things below it) - # - newNode = _Pruner(child, level=level + 1) - workTree.ReplaceChildIndex(i, newNode) - tempErr = _GetLocalError(workTree) - if tempErr <= bestErr: - bestErr = tempErr - bestTree = copy.deepcopy(workTree) - if _verbose: - print(' ' * level, '<%d> ' % level, '>->->->->->') - print(' ' * level, '<%d> ' % level, 'replacing:', i, child.GetLabel()) - child.Print() - print(' ' * level, '<%d> ' % level, 'with:') - newNode.Print() - print(' ' * level, '<%d> ' % level, '<-<-<-<-<-<') - else: - workTree.ReplaceChildIndex(i, child) - # - # Now try replacing the child entirely - # - bestGuess = MaxCount(child.GetExamples()) - newNode = DecTree.DecTreeNode(workTree, 'L:%d' % (bestGuess), label=bestGuess, isTerminal=1) - newNode.SetExamples(child.GetExamples()) - workTree.ReplaceChildIndex(i, newNode) - if _verbose: - print(' ' * level, '<%d> ' % level, 'ATTEMPT:') - workTree.Print() - newErr = _GetLocalError(workTree) - if _verbose: - print(' ' * level, '<%d> ' % level, '---> ', newErr, bestErr) - if newErr <= bestErr: - bestErr = newErr - bestTree = copy.deepcopy(workTree) - if _verbose: - print(' ' * level, '<%d> ' % level, 'PRUNING:') - workTree.Print() - else: - if _verbose: - print(' ' * level, '<%d> ' % level, 'FAIL') - # whoops... put the child back in: - workTree.ReplaceChildIndex(i, child) - else: - if _verbose: - print(' ' * level, '<%d> ' % level, ' No Examples', len(examples)) - # - # FIX: we need to figure out what to do here (nodes that contain - # no examples in the testing set). I can concoct arguments for - # leaving them in and for removing them. At the moment they are - # left intact. - # - pass + if _verbose: + print(' ' * level, '<%d> ' % level, ' Nonterminal') - if _verbose: - print(' ' * level, '<%d> ' % level, '<<< out') - return bestTree + workTree = copy.deepcopy(bestTree) + # + # First recurse on the child (try removing things below it) + # + newNode = _Pruner(child, level=level + 1) + workTree.ReplaceChildIndex(i, newNode) + tempErr = _GetLocalError(workTree) + if tempErr <= bestErr: + bestErr = tempErr + bestTree = copy.deepcopy(workTree) + if _verbose: + print(' ' * level, '<%d> ' % level, '>->->->->->') + print(' ' * level, '<%d> ' % level, 'replacing:', i, child.GetLabel()) + child.Print() + print(' ' * level, '<%d> ' % level, 'with:') + newNode.Print() + print(' ' * level, '<%d> ' % level, '<-<-<-<-<-<') + else: + workTree.ReplaceChildIndex(i, child) + # + # Now try replacing the child entirely + # + bestGuess = MaxCount(child.GetExamples()) + newNode = DecTree.DecTreeNode(workTree, 'L:%d' % ( + bestGuess), label=bestGuess, isTerminal=1) + newNode.SetExamples(child.GetExamples()) + workTree.ReplaceChildIndex(i, newNode) + if _verbose: + print(' ' * level, '<%d> ' % level, 'ATTEMPT:') + workTree.Print() + newErr = _GetLocalError(workTree) + if _verbose: + print(' ' * level, '<%d> ' % level, '---> ', newErr, bestErr) + if newErr <= bestErr: + bestErr = newErr + bestTree = copy.deepcopy(workTree) + if _verbose: + print(' ' * level, '<%d> ' % level, 'PRUNING:') + workTree.Print() + else: + if _verbose: + print(' ' * level, '<%d> ' % level, 'FAIL') + # whoops... put the child back in: + workTree.ReplaceChildIndex(i, child) + else: + if _verbose: + print(' ' * level, '<%d> ' % level, ' No Examples', len(examples)) + # + # FIX: we need to figure out what to do here (nodes that contain + # no examples in the testing set). I can concoct arguments for + # leaving them in and for removing them. At the moment they are + # left intact. + # + pass + + if _verbose: + print(' ' * level, '<%d> ' % level, '<<< out') + return bestTree def PruneTree(tree, trainExamples, testExamples, minimizeTestErrorOnly=1): - """ implements a reduced-error pruning of decision trees + """ implements a reduced-error pruning of decision trees - This algorithm is described on page 69 of Mitchell's book. + This algorithm is described on page 69 of Mitchell's book. - Pruning can be done using just the set of testExamples (the validation set) - or both the testExamples and the trainExamples by setting minimizeTestErrorOnly - to 0. + Pruning can be done using just the set of testExamples (the validation set) + or both the testExamples and the trainExamples by setting minimizeTestErrorOnly + to 0. - **Arguments** + **Arguments** - - tree: the initial tree to be pruned + - tree: the initial tree to be pruned - - trainExamples: the examples used to train the tree + - trainExamples: the examples used to train the tree - - testExamples: the examples held out for testing the tree + - testExamples: the examples held out for testing the tree - - minimizeTestErrorOnly: if this toggle is zero, all examples (i.e. - _trainExamples_ + _testExamples_ will be used to evaluate the error. + - minimizeTestErrorOnly: if this toggle is zero, all examples (i.e. + _trainExamples_ + _testExamples_ will be used to evaluate the error. - **Returns** + **Returns** - a 2-tuple containing: + a 2-tuple containing: - 1) the best tree + 1) the best tree - 2) the best error (the one which corresponds to that tree) + 2) the best error (the one which corresponds to that tree) - """ - if minimizeTestErrorOnly: - testSet = testExamples - else: - testSet = trainExamples + testExamples + """ + if minimizeTestErrorOnly: + testSet = testExamples + else: + testSet = trainExamples + testExamples - # remove any stored examples the tree may have - tree.ClearExamples() + # remove any stored examples the tree may have + tree.ClearExamples() - # - # screen the test data through the tree so that we end up with the - # appropriate points stored at each node of the tree. Results are ignored - # - totErr, badEx = CrossValidate.CrossValidate(tree, testSet, appendExamples=1) + # + # screen the test data through the tree so that we end up with the + # appropriate points stored at each node of the tree. Results are ignored + # + totErr, badEx = CrossValidate.CrossValidate(tree, testSet, appendExamples=1) - # - # Prune - # - newTree = _Pruner(tree) + # + # Prune + # + newTree = _Pruner(tree) - # - # And recalculate the errors - # - totErr, badEx = CrossValidate.CrossValidate(newTree, testSet) - newTree.SetBadExamples(badEx) + # + # And recalculate the errors + # + totErr, badEx = CrossValidate.CrossValidate(newTree, testSet) + newTree.SetBadExamples(badEx) - return newTree, totErr + return newTree, totErr # ------- # testing code # ------- def _testRandom(): - from rdkit.ML.DecTree import randomtest - # examples, attrs, nPossibleVals = randomtest.GenRandomExamples(nVars=20, randScale=0.25, - # nExamples=200) - examples, attrs, nPossibleVals = randomtest.GenRandomExamples(nVars=10, randScale=0.5, - nExamples=200) - tree, frac = CrossValidate.CrossValidationDriver(examples, attrs, nPossibleVals) - tree.Print() - tree.Pickle('orig.pkl') - print('original error is:', frac) + from rdkit.ML.DecTree import randomtest + # examples, attrs, nPossibleVals = randomtest.GenRandomExamples(nVars=20, randScale=0.25, + # nExamples=200) + examples, attrs, nPossibleVals = randomtest.GenRandomExamples(nVars=10, randScale=0.5, + nExamples=200) + tree, frac = CrossValidate.CrossValidationDriver(examples, attrs, nPossibleVals) + tree.Print() + tree.Pickle('orig.pkl') + print('original error is:', frac) - print('----Pruning') - newTree, frac2 = PruneTree(tree, tree.GetTrainingExamples(), tree.GetTestExamples()) - newTree.Print() - print('pruned error is:', frac2) - newTree.Pickle('prune.pkl') + print('----Pruning') + newTree, frac2 = PruneTree(tree, tree.GetTrainingExamples(), tree.GetTestExamples()) + newTree.Print() + print('pruned error is:', frac2) + newTree.Pickle('prune.pkl') def _testSpecific(): - from rdkit.ML.DecTree import ID3 - oPts = [ - [0, 0, 1, 0], - [0, 1, 1, 1], - [1, 0, 1, 1], - [1, 1, 0, 0], - [1, 1, 1, 1], - ] - tPts = oPts + [[0, 1, 1, 0], [0, 1, 1, 0]] + from rdkit.ML.DecTree import ID3 + oPts = [ + [0, 0, 1, 0], + [0, 1, 1, 1], + [1, 0, 1, 1], + [1, 1, 0, 0], + [1, 1, 1, 1], + ] + tPts = oPts + [[0, 1, 1, 0], [0, 1, 1, 0]] - tree = ID3.ID3Boot(oPts, attrs=range(3), nPossibleVals=[2] * 4) - tree.Print() - err, _ = CrossValidate.CrossValidate(tree, oPts) - print('original error:', err) + tree = ID3.ID3Boot(oPts, attrs=range(3), nPossibleVals=[2] * 4) + tree.Print() + err, _ = CrossValidate.CrossValidate(tree, oPts) + print('original error:', err) - err, _ = CrossValidate.CrossValidate(tree, tPts) - print('original holdout error:', err) - newTree, frac2 = PruneTree(tree, oPts, tPts) - newTree.Print() - print('best error of pruned tree:', frac2) - err, badEx = CrossValidate.CrossValidate(newTree, tPts) - print('pruned holdout error is:', err) - print(badEx) + err, _ = CrossValidate.CrossValidate(tree, tPts) + print('original holdout error:', err) + newTree, frac2 = PruneTree(tree, oPts, tPts) + newTree.Print() + print('best error of pruned tree:', frac2) + err, badEx = CrossValidate.CrossValidate(newTree, tPts) + print('pruned holdout error is:', err) + print(badEx) # print(len(tree), len(newTree)) def _testChain(): - from rdkit.ML.DecTree import ID3 - oPts = [ - [1, 0, 0, 0, 1], - [1, 0, 0, 0, 1], - [1, 0, 0, 0, 1], - [1, 0, 0, 0, 1], - [1, 0, 0, 0, 1], - [1, 0, 0, 0, 1], - [1, 0, 0, 0, 1], - [0, 0, 1, 1, 0], - [0, 0, 1, 1, 0], - [0, 0, 1, 1, 1], - [0, 1, 0, 1, 0], - [0, 1, 0, 1, 0], - [0, 1, 0, 0, 1], - ] - tPts = oPts + from rdkit.ML.DecTree import ID3 + oPts = [ + [1, 0, 0, 0, 1], + [1, 0, 0, 0, 1], + [1, 0, 0, 0, 1], + [1, 0, 0, 0, 1], + [1, 0, 0, 0, 1], + [1, 0, 0, 0, 1], + [1, 0, 0, 0, 1], + [0, 0, 1, 1, 0], + [0, 0, 1, 1, 0], + [0, 0, 1, 1, 1], + [0, 1, 0, 1, 0], + [0, 1, 0, 1, 0], + [0, 1, 0, 0, 1], + ] + tPts = oPts - tree = ID3.ID3Boot(oPts, attrs=range(len(oPts[0]) - 1), nPossibleVals=[2] * len(oPts[0])) - tree.Print() - err, _ = CrossValidate.CrossValidate(tree, oPts) - print('original error:', err) + tree = ID3.ID3Boot(oPts, attrs=range(len(oPts[0]) - 1), nPossibleVals=[2] * len(oPts[0])) + tree.Print() + err, _ = CrossValidate.CrossValidate(tree, oPts) + print('original error:', err) - err, _ = CrossValidate.CrossValidate(tree, tPts) - print('original holdout error:', err) - newTree, frac2 = PruneTree(tree, oPts, tPts) - newTree.Print() - print('best error of pruned tree:', frac2) - err, badEx = CrossValidate.CrossValidate(newTree, tPts) - print('pruned holdout error is:', err) - print(badEx) + err, _ = CrossValidate.CrossValidate(tree, tPts) + print('original holdout error:', err) + newTree, frac2 = PruneTree(tree, oPts, tPts) + newTree.Print() + print('best error of pruned tree:', frac2) + err, badEx = CrossValidate.CrossValidate(newTree, tPts) + print('pruned holdout error is:', err) + print(badEx) if __name__ == '__main__': # pragma: nocover - _verbose = 1 - # _testRandom() - _testChain() + _verbose = 1 + # _testRandom() + _testChain() diff --git a/rdkit/ML/DecTree/Tree.py b/rdkit/ML/DecTree/Tree.py index 666f2d1a2..380d123ca 100755 --- a/rdkit/ML/DecTree/Tree.py +++ b/rdkit/ML/DecTree/Tree.py @@ -4,9 +4,9 @@ """ Implements a class used to represent N-ary trees """ -from __future__ import print_function -from rdkit.six.moves import cPickle + +import pickle # FIX: the TreeNode class has not been updated to new-style classes @@ -247,7 +247,7 @@ class TreeNode: """ with open(fileName, 'wb+') as pFile: - cPickle.dump(self, pFile) + pickle.dump(self, pFile) def __str__(self): """ returns a string representation of the tree diff --git a/rdkit/ML/DecTree/UnitTestID3.py b/rdkit/ML/DecTree/UnitTestID3.py index 36ddd70fa..e7cc1e61a 100644 --- a/rdkit/ML/DecTree/UnitTestID3.py +++ b/rdkit/ML/DecTree/UnitTestID3.py @@ -2,7 +2,7 @@ # Copyright (C) 2000 greg Landrum # """ unit tests for the ID3 implementation """ -from __future__ import print_function + import io import unittest @@ -10,7 +10,7 @@ import unittest from rdkit import RDConfig from rdkit.ML.Data import MLData from rdkit.ML.DecTree import ID3 -from rdkit.six.moves import cPickle +import pickle class ID3TestCase(unittest.TestCase): @@ -37,7 +37,7 @@ class ID3TestCase(unittest.TestCase): buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') inTFile.close() with io.BytesIO(buf) as inFile: - t2 = cPickle.load(inFile) + t2 = pickle.load(inFile) assert self.t1 == t2, 'Incorrect tree generated.' def _setupMultiTree(self): @@ -57,7 +57,7 @@ class ID3TestCase(unittest.TestCase): buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') inTFile.close() with io.BytesIO(buf) as inFile: - t2 = cPickle.load(inFile) + t2 = pickle.load(inFile) assert self.t1 == t2, 'Incorrect tree generated.' def testClassify(self): @@ -106,7 +106,7 @@ class ID3TestCase(unittest.TestCase): buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') inTFile.close() with io.BytesIO(buf) as inFile: - t2 = cPickle.load(inFile) + t2 = pickle.load(inFile) assert self.t1 == t2, 'Incorrect tree generated.' def _setupPyMultiTree(self): @@ -130,7 +130,7 @@ class ID3TestCase(unittest.TestCase): buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') inTFile.close() with io.BytesIO(buf) as inFile: - t2 = cPickle.load(inFile) + t2 = pickle.load(inFile) assert self.t1 == t2, 'Incorrect tree generated.' def testPyClassify(self): diff --git a/rdkit/ML/DecTree/UnitTestPrune.py b/rdkit/ML/DecTree/UnitTestPrune.py index c362bbe64..d4c12d638 100755 --- a/rdkit/ML/DecTree/UnitTestPrune.py +++ b/rdkit/ML/DecTree/UnitTestPrune.py @@ -7,70 +7,70 @@ import unittest from rdkit.ML.DecTree import ID3, PruneTree, CrossValidate from rdkit.TestRunner import redirect_stdout -from rdkit.six import StringIO +from io import StringIO def feq(a, b, tol=1e-4): - return abs(a - b) <= tol + return abs(a - b) <= tol class TreeTestCase(unittest.TestCase): - def setUp(self): - pass + def setUp(self): + pass - def test1(self): - # " testing pruning with known results " - oPts = [ - [0, 0, 1, 0], - [0, 1, 1, 1], - [1, 0, 1, 1], - [1, 1, 0, 0], - [1, 1, 1, 1], - ] - tPts = oPts + [[0, 1, 1, 0], [0, 1, 1, 0]] - tree = ID3.ID3Boot(oPts, attrs=range(3), nPossibleVals=[2] * 4) - err, badEx = CrossValidate.CrossValidate(tree, oPts) - assert err == 0.0, 'bad initial error' - assert len(badEx) == 0, 'bad initial error' + def test1(self): + # " testing pruning with known results " + oPts = [ + [0, 0, 1, 0], + [0, 1, 1, 1], + [1, 0, 1, 1], + [1, 1, 0, 0], + [1, 1, 1, 1], + ] + tPts = oPts + [[0, 1, 1, 0], [0, 1, 1, 0]] + tree = ID3.ID3Boot(oPts, attrs=range(3), nPossibleVals=[2] * 4) + err, badEx = CrossValidate.CrossValidate(tree, oPts) + assert err == 0.0, 'bad initial error' + assert len(badEx) == 0, 'bad initial error' - # prune with original data, shouldn't do anything - f = StringIO() - with redirect_stdout(f): - PruneTree._verbose = True - newTree, err = PruneTree.PruneTree(tree, [], oPts) - PruneTree._verbose = False - self.assertIn('Pruner', f.getvalue()) - assert newTree == tree, 'improper pruning' + # prune with original data, shouldn't do anything + f = StringIO() + with redirect_stdout(f): + PruneTree._verbose = True + newTree, err = PruneTree.PruneTree(tree, [], oPts) + PruneTree._verbose = False + self.assertIn('Pruner', f.getvalue()) + assert newTree == tree, 'improper pruning' - # prune with train data - newTree, err = PruneTree.PruneTree(tree, [], tPts) - assert newTree != tree, 'bad pruning' - assert feq(err, 0.14286), 'bad error result' + # prune with train data + newTree, err = PruneTree.PruneTree(tree, [], tPts) + assert newTree != tree, 'bad pruning' + assert feq(err, 0.14286), 'bad error result' - def test_exampleCode(self): - f = StringIO() - with redirect_stdout(f): - try: - PruneTree._testRandom() - self.assertTrue(os.path.isfile('prune.pkl')) - finally: - if os.path.isfile('orig.pkl'): - os.remove('orig.pkl') - if os.path.isfile('prune.pkl'): - os.remove('prune.pkl') - self.assertIn('pruned error', f.getvalue()) + def test_exampleCode(self): + f = StringIO() + with redirect_stdout(f): + try: + PruneTree._testRandom() + self.assertTrue(os.path.isfile('prune.pkl')) + finally: + if os.path.isfile('orig.pkl'): + os.remove('orig.pkl') + if os.path.isfile('prune.pkl'): + os.remove('prune.pkl') + self.assertIn('pruned error', f.getvalue()) - f = StringIO() - with redirect_stdout(f): - PruneTree._testSpecific() - self.assertIn('pruned holdout error', f.getvalue()) + f = StringIO() + with redirect_stdout(f): + PruneTree._testSpecific() + self.assertIn('pruned holdout error', f.getvalue()) - f = StringIO() - with redirect_stdout(f): - PruneTree._testChain() - self.assertIn('pruned holdout error', f.getvalue()) + f = StringIO() + with redirect_stdout(f): + PruneTree._testChain() + self.assertIn('pruned holdout error', f.getvalue()) if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/ML/DecTree/UnitTestQuantTree.py b/rdkit/ML/DecTree/UnitTestQuantTree.py index b35ab453a..17ee5c6e8 100644 --- a/rdkit/ML/DecTree/UnitTestQuantTree.py +++ b/rdkit/ML/DecTree/UnitTestQuantTree.py @@ -2,7 +2,7 @@ # Copyright (C) 2001,2003 greg Landrum and Rational Discovery LLC # """ unit tests for the QuantTree implementation """ -from __future__ import print_function + import io import unittest @@ -11,205 +11,208 @@ from rdkit import RDConfig from rdkit.ML.DecTree import BuildQuantTree from rdkit.ML.DecTree.QuantTree import QuantTreeNode from rdkit.TestRunner import redirect_stdout -from rdkit.six import StringIO -from rdkit.six import cmp -from rdkit.six.moves import cPickle +from io import StringIO +import pickle + + +def cmp(t1, t2): + return (t1 < t2) * -1 or (t1 > t2) * 1 class TestCase(unittest.TestCase): - def setUp(self): - self.qTree1Name = RDConfig.RDCodeDir + '/ML/DecTree/test_data/QuantTree1.pkl' - self.qTree2Name = RDConfig.RDCodeDir + '/ML/DecTree/test_data/QuantTree2.pkl' + def setUp(self): + self.qTree1Name = RDConfig.RDCodeDir + '/ML/DecTree/test_data/QuantTree1.pkl' + self.qTree2Name = RDConfig.RDCodeDir + '/ML/DecTree/test_data/QuantTree2.pkl' - def _setupTree1(self): - examples1 = [['p1', 0, 1, 0.1, 0], ['p2', 0, 0, 0.1, 1], ['p3', 0, 0, 1.1, 2], - ['p4', 0, 1, 1.1, 2], ['p5', 1, 0, 0.1, 2], ['p6', 1, 0, 1.1, 2], - ['p7', 1, 1, 0.1, 2], ['p8', 1, 1, 1.1, 0]] - attrs = list(range(1, len(examples1[0]) - 1)) - nPossibleVals = [0, 2, 2, 0, 3] - boundsPerVar = [0, 0, 0, 1, 0] + def _setupTree1(self): + examples1 = [['p1', 0, 1, 0.1, 0], ['p2', 0, 0, 0.1, 1], ['p3', 0, 0, 1.1, 2], + ['p4', 0, 1, 1.1, 2], ['p5', 1, 0, 0.1, 2], ['p6', 1, 0, 1.1, 2], + ['p7', 1, 1, 0.1, 2], ['p8', 1, 1, 1.1, 0]] + attrs = list(range(1, len(examples1[0]) - 1)) + nPossibleVals = [0, 2, 2, 0, 3] + boundsPerVar = [0, 0, 0, 1, 0] - self.t1 = BuildQuantTree.QuantTreeBoot(examples1, attrs, nPossibleVals, boundsPerVar) - self.examples1 = examples1 + self.t1 = BuildQuantTree.QuantTreeBoot(examples1, attrs, nPossibleVals, boundsPerVar) + self.examples1 = examples1 - def _setupTree2(self): - examples1 = [['p1', 0.1, 1, 0.1, 0], ['p2', 0.1, 0, 0.1, 1], ['p3', 0.1, 0, 1.1, 2], - ['p4', 0.1, 1, 1.1, 2], ['p5', 1.1, 0, 0.1, 2], ['p6', 1.1, 0, 1.1, 2], - ['p7', 1.1, 1, 0.1, 2], ['p8', 1.1, 1, 1.1, 0]] - attrs = list(range(1, len(examples1[0]) - 1)) - nPossibleVals = [0, 0, 2, 0, 3] - boundsPerVar = [0, 1, 0, 1, 0] + def _setupTree2(self): + examples1 = [['p1', 0.1, 1, 0.1, 0], ['p2', 0.1, 0, 0.1, 1], ['p3', 0.1, 0, 1.1, 2], + ['p4', 0.1, 1, 1.1, 2], ['p5', 1.1, 0, 0.1, 2], ['p6', 1.1, 0, 1.1, 2], + ['p7', 1.1, 1, 0.1, 2], ['p8', 1.1, 1, 1.1, 0]] + attrs = list(range(1, len(examples1[0]) - 1)) + nPossibleVals = [0, 0, 2, 0, 3] + boundsPerVar = [0, 1, 0, 1, 0] - self.t2 = BuildQuantTree.QuantTreeBoot(examples1, attrs, nPossibleVals, boundsPerVar) - self.examples2 = examples1 + self.t2 = BuildQuantTree.QuantTreeBoot(examples1, attrs, nPossibleVals, boundsPerVar) + self.examples2 = examples1 - def _setupTree1a(self): - examples1 = [['p1', 0, 1, 0.1, 4.0, 0], ['p2', 0, 0, 0.1, 4.1, 1], ['p3', 0, 0, 1.1, 4.2, 2], - ['p4', 0, 1, 1.1, 4.2, 2], ['p5', 1, 0, 0.1, 4.2, 2], ['p6', 1, 0, 1.1, 4.2, 2], - ['p7', 1, 1, 0.1, 4.2, 2], ['p8', 1, 1, 1.1, 4.0, 0]] - attrs = list(range(1, len(examples1[0]) - 1)) - nPossibleVals = [0, 2, 2, 0, 0, 3] - boundsPerVar = [0, 0, 0, 1, -1, 0] + def _setupTree1a(self): + examples1 = [['p1', 0, 1, 0.1, 4.0, 0], ['p2', 0, 0, 0.1, 4.1, 1], ['p3', 0, 0, 1.1, 4.2, 2], + ['p4', 0, 1, 1.1, 4.2, 2], ['p5', 1, 0, 0.1, 4.2, 2], ['p6', 1, 0, 1.1, 4.2, 2], + ['p7', 1, 1, 0.1, 4.2, 2], ['p8', 1, 1, 1.1, 4.0, 0]] + attrs = list(range(1, len(examples1[0]) - 1)) + nPossibleVals = [0, 2, 2, 0, 0, 3] + boundsPerVar = [0, 0, 0, 1, -1, 0] - self.t1 = BuildQuantTree.QuantTreeBoot(examples1, attrs, nPossibleVals, boundsPerVar) - self.examples1 = examples1 + self.t1 = BuildQuantTree.QuantTreeBoot(examples1, attrs, nPossibleVals, boundsPerVar) + self.examples1 = examples1 - def test0Cmp(self): - # " testing tree comparisons " - self._setupTree1() - self._setupTree2() - assert self.t1 == self.t1, 'self equals failed' - assert self.t2 == self.t2, 'self equals failed' - assert self.t1 != self.t2, 'not equals failed' + def test0Cmp(self): + # " testing tree comparisons " + self._setupTree1() + self._setupTree2() + assert self.t1 == self.t1, 'self equals failed' + assert self.t2 == self.t2, 'self equals failed' + assert self.t1 != self.t2, 'not equals failed' - def test1Tree(self): - # " testing tree1 " - self._setupTree1() - with open(self.qTree1Name, 'r') as inTFile: - buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') - inTFile.close() - with io.BytesIO(buf) as inFile: - t2 = cPickle.load(inFile) - assert self.t1 == t2, 'Incorrect tree generated. ' + def test1Tree(self): + # " testing tree1 " + self._setupTree1() + with open(self.qTree1Name, 'r') as inTFile: + buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') + inTFile.close() + with io.BytesIO(buf) as inFile: + t2 = pickle.load(inFile) + assert self.t1 == t2, 'Incorrect tree generated. ' - self.assertIn('Var: 2 []', str(self.t1)) - self.assertEqual(self.t1.GetQuantBounds(), []) + self.assertIn('Var: 2 []', str(self.t1)) + self.assertEqual(self.t1.GetQuantBounds(), []) - def test2Tree(self): - # " testing tree2 " - self._setupTree2() - with open(self.qTree2Name, 'r') as inTFile: - buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') - inTFile.close() - with io.BytesIO(buf) as inFile: - t2 = cPickle.load(inFile) - assert self.t2 == t2, 'Incorrect tree generated.' + def test2Tree(self): + # " testing tree2 " + self._setupTree2() + with open(self.qTree2Name, 'r') as inTFile: + buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') + inTFile.close() + with io.BytesIO(buf) as inFile: + t2 = pickle.load(inFile) + assert self.t2 == t2, 'Incorrect tree generated.' - def test3Classify(self): - # " testing classification " - self._setupTree1() - self._setupTree2() - for i in range(len(self.examples1)): - self.assertEqual( - self.t1.ClassifyExample(self.examples1[i]), self.examples1[i][-1], - msg='examples1[%d] misclassified' % i) - for i in range(len(self.examples2)): - self.assertEqual( - self.t2.ClassifyExample(self.examples2[i]), self.examples2[i][-1], - msg='examples2[%d] misclassified' % i) + def test3Classify(self): + # " testing classification " + self._setupTree1() + self._setupTree2() + for i in range(len(self.examples1)): + self.assertEqual( + self.t1.ClassifyExample(self.examples1[i]), self.examples1[i][-1], + msg='examples1[%d] misclassified' % i) + for i in range(len(self.examples2)): + self.assertEqual( + self.t2.ClassifyExample(self.examples2[i]), self.examples2[i][-1], + msg='examples2[%d] misclassified' % i) - def test4UnusedVars(self): - # " testing unused variables " - self._setupTree1a() - with open(self.qTree1Name, 'r') as inTFile: - buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') - inTFile.close() - with io.BytesIO(buf) as inFile: - t2 = cPickle.load(inFile) - assert self.t1 == t2, 'Incorrect tree generated.' - for i in range(len(self.examples1)): - self.assertEqual( - self.t1.ClassifyExample(self.examples1[i]), self.examples1[i][-1], - 'examples1[%d] misclassified' % i) + def test4UnusedVars(self): + # " testing unused variables " + self._setupTree1a() + with open(self.qTree1Name, 'r') as inTFile: + buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') + inTFile.close() + with io.BytesIO(buf) as inFile: + t2 = pickle.load(inFile) + assert self.t1 == t2, 'Incorrect tree generated.' + for i in range(len(self.examples1)): + self.assertEqual( + self.t1.ClassifyExample(self.examples1[i]), self.examples1[i][-1], + 'examples1[%d] misclassified' % i) - def test5Bug29(self): - # """ a more extensive test of the cmp stuff using hand-built trees """ - import copy + def test5Bug29(self): + # """ a more extensive test of the cmp stuff using hand-built trees """ + import copy - t1 = QuantTreeNode(None, 't1') - t1.SetQuantBounds([1.]) - c1 = QuantTreeNode(t1, 'c1') - c1.SetQuantBounds([2.]) - t1.AddChildNode(c1) - c2 = QuantTreeNode(t1, 'c2') - c2.SetQuantBounds([2.]) - t1.AddChildNode(c2) - c11 = QuantTreeNode(c1, 'c11') - c11.SetQuantBounds([3.]) - c1.AddChildNode(c11) - c12 = QuantTreeNode(c1, 'c12') - c12.SetQuantBounds([3.]) - c1.AddChildNode(c12) - assert not cmp(t1, copy.deepcopy(t1)), 'self equality failed' + t1 = QuantTreeNode(None, 't1') + t1.SetQuantBounds([1.]) + c1 = QuantTreeNode(t1, 'c1') + c1.SetQuantBounds([2.]) + t1.AddChildNode(c1) + c2 = QuantTreeNode(t1, 'c2') + c2.SetQuantBounds([2.]) + t1.AddChildNode(c2) + c11 = QuantTreeNode(c1, 'c11') + c11.SetQuantBounds([3.]) + c1.AddChildNode(c11) + c12 = QuantTreeNode(c1, 'c12') + c12.SetQuantBounds([3.]) + c1.AddChildNode(c12) + assert not cmp(t1, copy.deepcopy(t1)), 'self equality failed' - t2 = QuantTreeNode(None, 't1') - t2.SetQuantBounds([1.]) - c1 = QuantTreeNode(t2, 'c1') - c1.SetQuantBounds([2.]) - t2.AddChildNode(c1) - c2 = QuantTreeNode(t2, 'c2') - c2.SetQuantBounds([2.]) - t2.AddChildNode(c2) - c11 = QuantTreeNode(c1, 'c11') - c11.SetQuantBounds([3.]) - c1.AddChildNode(c11) - c12 = QuantTreeNode(c1, 'c12') - c12.SetQuantBounds([3.00003]) - c1.AddChildNode(c12) - assert cmp(t1, t2), 'inequality failed' + t2 = QuantTreeNode(None, 't1') + t2.SetQuantBounds([1.]) + c1 = QuantTreeNode(t2, 'c1') + c1.SetQuantBounds([2.]) + t2.AddChildNode(c1) + c2 = QuantTreeNode(t2, 'c2') + c2.SetQuantBounds([2.]) + t2.AddChildNode(c2) + c11 = QuantTreeNode(c1, 'c11') + c11.SetQuantBounds([3.]) + c1.AddChildNode(c11) + c12 = QuantTreeNode(c1, 'c12') + c12.SetQuantBounds([3.00003]) + c1.AddChildNode(c12) + assert cmp(t1, t2), 'inequality failed' - def test6Bug29_2(self): - # """ a more extensive test of the cmp stuff using pickled trees""" - import os - with open(os.path.join(RDConfig.RDCodeDir, 'ML', 'DecTree', 'test_data', 'CmpTree1.pkl'), - 'r') as t1TFile: - buf = t1TFile.read().replace('\r\n', '\n').encode('utf-8') - t1TFile.close() - with io.BytesIO(buf) as t1File: - t1 = cPickle.load(t1File) - with open(os.path.join(RDConfig.RDCodeDir, 'ML', 'DecTree', 'test_data', 'CmpTree2.pkl'), - 'r') as t2TFile: - buf = t2TFile.read().replace('\r\n', '\n').encode('utf-8') - t2TFile.close() - with io.BytesIO(buf) as t2File: - t2 = cPickle.load(t2File) - assert cmp(t1, t2), 'equality failed' + def test6Bug29_2(self): + # """ a more extensive test of the cmp stuff using pickled trees""" + import os + with open(os.path.join(RDConfig.RDCodeDir, 'ML', 'DecTree', 'test_data', 'CmpTree1.pkl'), + 'r') as t1TFile: + buf = t1TFile.read().replace('\r\n', '\n').encode('utf-8') + t1TFile.close() + with io.BytesIO(buf) as t1File: + t1 = pickle.load(t1File) + with open(os.path.join(RDConfig.RDCodeDir, 'ML', 'DecTree', 'test_data', 'CmpTree2.pkl'), + 'r') as t2TFile: + buf = t2TFile.read().replace('\r\n', '\n').encode('utf-8') + t2TFile.close() + with io.BytesIO(buf) as t2File: + t2 = pickle.load(t2File) + assert cmp(t1, t2), 'equality failed' - def test7Recycle(self): - # """ try recycling descriptors """ - examples1 = [[3, 0, 0], - [3, 1, 1], - [1, 0, 0], - [0, 0, 1], - [1, 1, 0], ] - attrs = list(range(2)) - nPossibleVals = [2, 2, 2] - boundsPerVar = [1, 0, 0] - self.t1 = BuildQuantTree.QuantTreeBoot(examples1, attrs, nPossibleVals, boundsPerVar, - recycleVars=1) - assert self.t1.GetLabel() == 0, self.t1.GetLabel() - assert self.t1.GetChildren()[0].GetLabel() == 1 - assert self.t1.GetChildren()[1].GetLabel() == 1 - assert self.t1.GetChildren()[1].GetChildren()[0].GetLabel() == 0 - assert self.t1.GetChildren()[1].GetChildren()[1].GetLabel() == 0 + def test7Recycle(self): + # """ try recycling descriptors """ + examples1 = [[3, 0, 0], + [3, 1, 1], + [1, 0, 0], + [0, 0, 1], + [1, 1, 0], ] + attrs = list(range(2)) + nPossibleVals = [2, 2, 2] + boundsPerVar = [1, 0, 0] + self.t1 = BuildQuantTree.QuantTreeBoot(examples1, attrs, nPossibleVals, boundsPerVar, + recycleVars=1) + assert self.t1.GetLabel() == 0, self.t1.GetLabel() + assert self.t1.GetChildren()[0].GetLabel() == 1 + assert self.t1.GetChildren()[1].GetLabel() == 1 + assert self.t1.GetChildren()[1].GetChildren()[0].GetLabel() == 0 + assert self.t1.GetChildren()[1].GetChildren()[1].GetLabel() == 0 - def test8RandomForest(self): - # """ try random forests descriptors """ - import random - random.seed(23) - nAttrs = 100 - nPts = 10 - examples = [] - for _ in range(nPts): - descrs = [int(random.random() > 0.5) for _ in range(nAttrs)] - act = sum(descrs) > nAttrs / 2 - examples.append(descrs + [act]) - attrs = list(range(nAttrs)) - nPossibleVals = [2] * (nAttrs + 1) - boundsPerVar = [0] * nAttrs + [0] - self.t1 = BuildQuantTree.QuantTreeBoot(examples, attrs, nPossibleVals, boundsPerVar, maxDepth=1, - recycleVars=1, randomDescriptors=3) - self.assertEqual(self.t1.GetLabel(), 49) - self.assertEqual(self.t1.GetChildren()[0].GetLabel(), 3) - self.assertEqual(self.t1.GetChildren()[1].GetLabel(), 54) + def test8RandomForest(self): + # """ try random forests descriptors """ + import random + random.seed(23) + nAttrs = 100 + nPts = 10 + examples = [] + for _ in range(nPts): + descrs = [int(random.random() > 0.5) for _ in range(nAttrs)] + act = sum(descrs) > nAttrs / 2 + examples.append(descrs + [act]) + attrs = list(range(nAttrs)) + nPossibleVals = [2] * (nAttrs + 1) + boundsPerVar = [0] * nAttrs + [0] + self.t1 = BuildQuantTree.QuantTreeBoot(examples, attrs, nPossibleVals, boundsPerVar, maxDepth=1, + recycleVars=1, randomDescriptors=3) + self.assertEqual(self.t1.GetLabel(), 49) + self.assertEqual(self.t1.GetChildren()[0].GetLabel(), 3) + self.assertEqual(self.t1.GetChildren()[1].GetLabel(), 54) - def test_exampleCode(self): - f = StringIO() - with redirect_stdout(f): - BuildQuantTree.TestTree() - self.assertIn('Var: 2', f.getvalue()) + def test_exampleCode(self): + f = StringIO() + with redirect_stdout(f): + BuildQuantTree.TestTree() + self.assertIn('Var: 2', f.getvalue()) if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/ML/DecTree/UnitTestSigTree.py b/rdkit/ML/DecTree/UnitTestSigTree.py index 71e036ac5..a19b40a0f 100644 --- a/rdkit/ML/DecTree/UnitTestSigTree.py +++ b/rdkit/ML/DecTree/UnitTestSigTree.py @@ -14,161 +14,162 @@ from rdkit.ML import InfoTheory from rdkit.ML.DecTree.BuildSigTree import BuildSigTree, _GenerateRandomEnsemble from rdkit.ML.DecTree.SigTree import SigTreeNode from rdkit.TestRunner import redirect_stdout -from rdkit.six import StringIO +from io import StringIO class TestCase(unittest.TestCase): - def setUp(self): - t1 = SigTreeNode(None, 'root', 0) + def setUp(self): + t1 = SigTreeNode(None, 'root', 0) - t2 = SigTreeNode(t1, 'nodeL1', 1) - t1.AddChildNode(t2) - t3 = SigTreeNode(t2, 'nodeLTerm0', 0, isTerminal=1) - t4 = SigTreeNode(t2, 'nodeLTerm1', 1, isTerminal=1) - t2.AddChildNode(t3) - t2.AddChildNode(t4) + t2 = SigTreeNode(t1, 'nodeL1', 1) + t1.AddChildNode(t2) + t3 = SigTreeNode(t2, 'nodeLTerm0', 0, isTerminal=1) + t4 = SigTreeNode(t2, 'nodeLTerm1', 1, isTerminal=1) + t2.AddChildNode(t3) + t2.AddChildNode(t4) - t2 = SigTreeNode(t1, 'nodeR1', 2) - t1.AddChildNode(t2) - t3 = SigTreeNode(t2, 'nodeRTerm0', 1, isTerminal=1) - t4 = SigTreeNode(t2, 'nodeRTerm1', 0, isTerminal=1) - t2.AddChildNode(t3) - t2.AddChildNode(t4) - self.tree = t1 + t2 = SigTreeNode(t1, 'nodeR1', 2) + t1.AddChildNode(t2) + t3 = SigTreeNode(t2, 'nodeRTerm0', 1, isTerminal=1) + t4 = SigTreeNode(t2, 'nodeRTerm1', 0, isTerminal=1) + t2.AddChildNode(t3) + t2.AddChildNode(t4) + self.tree = t1 - def test1(self): - t1 = self.tree - bv = ExplicitBitVect(5) + def test1(self): + t1 = self.tree + bv = ExplicitBitVect(5) - ex = ['nm', bv] - self.assertFalse(t1.ClassifyExample(ex)) - bv.SetBit(1) - self.assertTrue(t1.ClassifyExample(ex)) + ex = ['nm', bv] + self.assertFalse(t1.ClassifyExample(ex)) + bv.SetBit(1) + self.assertTrue(t1.ClassifyExample(ex)) - bv.SetBit(0) - self.assertTrue(t1.ClassifyExample(ex)) + bv.SetBit(0) + self.assertTrue(t1.ClassifyExample(ex)) - bv.SetBit(2) - self.assertFalse(t1.ClassifyExample(ex)) + bv.SetBit(2) + self.assertFalse(t1.ClassifyExample(ex)) - def test2(self): - t1 = self.tree - vc = VectCollection() + def test2(self): + t1 = self.tree + vc = VectCollection() - bv = ExplicitBitVect(5) - bv.SetBitsFromList([0]) - vc.AddVect(1, bv) + bv = ExplicitBitVect(5) + bv.SetBitsFromList([0]) + vc.AddVect(1, bv) - bv = ExplicitBitVect(5) - bv.SetBitsFromList([1, 2]) - vc.AddVect(2, bv) + bv = ExplicitBitVect(5) + bv.SetBitsFromList([1, 2]) + vc.AddVect(2, bv) - ex = ['nm', bv, 1] - self.assertTrue(t1.ClassifyExample(ex)) + ex = ['nm', bv, 1] + self.assertTrue(t1.ClassifyExample(ex)) - bv = ExplicitBitVect(5) - bv.SetBitsFromList([0, 2]) - vc.AddVect(1, bv) - ex = ['nm', bv, 1] - self.assertFalse(t1.ClassifyExample(ex)) + bv = ExplicitBitVect(5) + bv.SetBitsFromList([0, 2]) + vc.AddVect(1, bv) + ex = ['nm', bv, 1] + self.assertFalse(t1.ClassifyExample(ex)) - def test3(self): - examples = [] + def test3(self): + examples = [] - bv = ExplicitBitVect(2) - vc = VectCollection() - vc.AddVect(1, bv) - examples.append(['a', vc, 1]) + bv = ExplicitBitVect(2) + vc = VectCollection() + vc.AddVect(1, bv) + examples.append(['a', vc, 1]) - bv = ExplicitBitVect(2) - bv.SetBit(1) - vc = VectCollection() - vc.AddVect(1, bv) - examples.append(['c', vc, 0]) + bv = ExplicitBitVect(2) + bv.SetBit(1) + vc = VectCollection() + vc.AddVect(1, bv) + examples.append(['c', vc, 0]) - bv = ExplicitBitVect(2) - bv.SetBit(1) - vc = VectCollection() - vc.AddVect(1, bv) - examples.append(['c2', vc, 0]) + bv = ExplicitBitVect(2) + bv.SetBit(1) + vc = VectCollection() + vc.AddVect(1, bv) + examples.append(['c2', vc, 0]) - bv = ExplicitBitVect(2) - bv.SetBit(0) - vc = VectCollection() - vc.AddVect(1, bv) - examples.append(['d', vc, 0]) + bv = ExplicitBitVect(2) + bv.SetBit(0) + vc = VectCollection() + vc.AddVect(1, bv) + examples.append(['d', vc, 0]) - bv = ExplicitBitVect(2) - bv.SetBit(0) - vc = VectCollection() - vc.AddVect(1, bv) - bv = ExplicitBitVect(2) - bv.SetBit(1) - vc.AddVect(2, bv) - examples.append(['d2', vc, 0]) + bv = ExplicitBitVect(2) + bv.SetBit(0) + vc = VectCollection() + vc.AddVect(1, bv) + bv = ExplicitBitVect(2) + bv.SetBit(1) + vc.AddVect(2, bv) + examples.append(['d2', vc, 0]) - bv = ExplicitBitVect(2) - bv.SetBit(0) - bv.SetBit(1) - vc = VectCollection() - vc.AddVect(1, bv) - examples.append(['d', vc, 1]) + bv = ExplicitBitVect(2) + bv.SetBit(0) + bv.SetBit(1) + vc = VectCollection() + vc.AddVect(1, bv) + examples.append(['d', vc, 1]) - bv = ExplicitBitVect(2) - bv.SetBit(0) - bv.SetBit(1) - vc = VectCollection() - vc.AddVect(1, bv) - examples.append(['e', vc, 1]) + bv = ExplicitBitVect(2) + bv.SetBit(0) + bv.SetBit(1) + vc = VectCollection() + vc.AddVect(1, bv) + examples.append(['e', vc, 1]) - f = StringIO() - with redirect_stdout(f): - t = BuildSigTree(examples, 2, metric=InfoTheory.InfoType.ENTROPY, maxDepth=2, verbose=True) - self.assertIn('Build', f.getvalue()) + f = StringIO() + with redirect_stdout(f): + t = BuildSigTree(examples, 2, metric=InfoTheory.InfoType.ENTROPY, + maxDepth=2, verbose=True) + self.assertIn('Build', f.getvalue()) - self.assertEqual(t.GetName(), 'Bit-0') - self.assertEqual(t.GetLabel(), 0) - c0 = t.GetChildren()[0] - self.assertEqual(c0.GetName(), 'Bit-1') - self.assertEqual(c0.GetLabel(), 1) - c1 = t.GetChildren()[1] - self.assertEqual(c1.GetName(), 'Bit-1') - self.assertEqual(c1.GetLabel(), 1) + self.assertEqual(t.GetName(), 'Bit-0') + self.assertEqual(t.GetLabel(), 0) + c0 = t.GetChildren()[0] + self.assertEqual(c0.GetName(), 'Bit-1') + self.assertEqual(c0.GetLabel(), 1) + c1 = t.GetChildren()[1] + self.assertEqual(c1.GetName(), 'Bit-1') + self.assertEqual(c1.GetLabel(), 1) - bv = ExplicitBitVect(2) - bv.SetBit(0) - vc = VectCollection() - vc.AddVect(1, bv) - bv = ExplicitBitVect(2) - bv.SetBit(1) - vc.AddVect(2, bv) - r = t.ClassifyExample(['t', vc, 0]) - self.assertEqual(r, 0) + bv = ExplicitBitVect(2) + bv.SetBit(0) + vc = VectCollection() + vc.AddVect(1, bv) + bv = ExplicitBitVect(2) + bv.SetBit(1) + vc.AddVect(2, bv) + r = t.ClassifyExample(['t', vc, 0]) + self.assertEqual(r, 0) - def test4(self): - from rdkit.six.moves import cPickle - gz = gzip.open( - os.path.join(RDConfig.RDCodeDir, 'ML', 'DecTree', 'test_data', 'cdk2-few.pkl.gz'), 'rb') - examples = cPickle.load(gz, encoding='Latin1') - t = BuildSigTree(examples, 2, maxDepth=3) - self.assertEqual(t.GetLabel(), 2181) - self.assertEqual(t.GetChildren()[0].GetLabel(), 2861) - self.assertEqual(t.GetChildren()[1].GetLabel(), 8182) + def test4(self): + import pickle + gz = gzip.open( + os.path.join(RDConfig.RDCodeDir, 'ML', 'DecTree', 'test_data', 'cdk2-few.pkl.gz'), 'rb') + examples = pickle.load(gz, encoding='Latin1') + t = BuildSigTree(examples, 2, maxDepth=3) + self.assertEqual(t.GetLabel(), 2181) + self.assertEqual(t.GetChildren()[0].GetLabel(), 2861) + self.assertEqual(t.GetChildren()[1].GetLabel(), 8182) - def test_GenerateRandomEnsemble(self): - ensemble = _GenerateRandomEnsemble(2, 4) - self.assertEqual(len(ensemble), 2) - self.assertTrue(all(r < 4 for r in ensemble)) + def test_GenerateRandomEnsemble(self): + ensemble = _GenerateRandomEnsemble(2, 4) + self.assertEqual(len(ensemble), 2) + self.assertTrue(all(r < 4 for r in ensemble)) - ensemble = _GenerateRandomEnsemble(4, 4) - self.assertEqual(len(ensemble), 4) - self.assertTrue(all(r < 4 for r in ensemble)) + ensemble = _GenerateRandomEnsemble(4, 4) + self.assertEqual(len(ensemble), 4) + self.assertTrue(all(r < 4 for r in ensemble)) - ensemble = _GenerateRandomEnsemble(4, 40) - self.assertEqual(len(ensemble), 4) - self.assertTrue(all(r < 40 for r in ensemble)) + ensemble = _GenerateRandomEnsemble(4, 40) + self.assertEqual(len(ensemble), 4) + self.assertTrue(all(r < 40 for r in ensemble)) if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/ML/DecTree/UnitTestTree.py b/rdkit/ML/DecTree/UnitTestTree.py index 8e89a7f2f..67625867b 100755 --- a/rdkit/ML/DecTree/UnitTestTree.py +++ b/rdkit/ML/DecTree/UnitTestTree.py @@ -2,7 +2,7 @@ # Copyright (C) 2000 greg Landrum # """ unit testing code for trees and decision trees (not learning/xvalidation) """ -from __future__ import print_function + import copy import os @@ -11,128 +11,128 @@ import unittest from rdkit import RDConfig from rdkit.ML.DecTree import Tree from rdkit.TestRunner import redirect_stdout -from rdkit.six import StringIO -from rdkit.six.moves import cPickle +from io import StringIO +import pickle class TreeTestCase(unittest.TestCase): - def setUp(self): - self.baseTree = Tree.TreeNode(None, 'root') - self.pickleFileName = RDConfig.RDCodeDir + '/ML/DecTree/test_data/treeunit.pkl' + def setUp(self): + self.baseTree = Tree.TreeNode(None, 'root') + self.pickleFileName = RDConfig.RDCodeDir + '/ML/DecTree/test_data/treeunit.pkl' - def test_Tree(self): - tree = Tree.TreeNode(None, 'root', label=0) - self.assertEqual(tree.GetLevel(), 0) - self.assertEqual(tree.GetName(), 'root') - self.assertEqual(tree.GetData(), None) - self.assertEqual(tree.GetTerminal(), False) - self.assertEqual(tree.GetLabel(), 0) - self.assertEqual(tree.GetParent(), None) - self.assertEqual(tree.GetChildren(), []) + def test_Tree(self): + tree = Tree.TreeNode(None, 'root', label=0) + self.assertEqual(tree.GetLevel(), 0) + self.assertEqual(tree.GetName(), 'root') + self.assertEqual(tree.GetData(), None) + self.assertEqual(tree.GetTerminal(), False) + self.assertEqual(tree.GetLabel(), 0) + self.assertEqual(tree.GetParent(), None) + self.assertEqual(tree.GetChildren(), []) - for i in range(3): - child = tree.AddChild('child {0}'.format(i), i + 1, data={'key': 'value'}) - self.assertEqual(child.GetLevel(), 1) - self.assertEqual(child.GetName(), 'child {0}'.format(i)) - self.assertEqual(child.GetData(), {'key': 'value'}) - self.assertEqual(child.GetLabel(), i + 1) - self.assertEqual(child.GetParent(), tree) - self.assertEqual(child.GetChildren(), []) - children = tree.GetChildren() - self.assertEqual(len(children), 3) - children[0].AddChild('terminal', 4, isTerminal=True) + for i in range(3): + child = tree.AddChild('child {0}'.format(i), i + 1, data={'key': 'value'}) + self.assertEqual(child.GetLevel(), 1) + self.assertEqual(child.GetName(), 'child {0}'.format(i)) + self.assertEqual(child.GetData(), {'key': 'value'}) + self.assertEqual(child.GetLabel(), i + 1) + self.assertEqual(child.GetParent(), tree) + self.assertEqual(child.GetChildren(), []) + children = tree.GetChildren() + self.assertEqual(len(children), 3) + children[0].AddChild('terminal', 4, isTerminal=True) - s = str(tree) - self.assertIn('root', s) - self.assertIn(' terminal', s) - self.assertIn(' child 2', s) + s = str(tree) + self.assertIn('root', s) + self.assertIn(' terminal', s) + self.assertIn(' child 2', s) - tree.NameTree(['a', 'b', 'c', 'd', 'e']) - self.assertEqual(str(tree), 'a\n b\n terminal\n c\n d\n') + tree.NameTree(['a', 'b', 'c', 'd', 'e']) + self.assertEqual(str(tree), 'a\n b\n terminal\n c\n d\n') - tree.PruneChild(children[1]) - self.assertEqual(str(tree), 'a\n b\n terminal\n d\n') + tree.PruneChild(children[1]) + self.assertEqual(str(tree), 'a\n b\n terminal\n d\n') - f = StringIO() - with redirect_stdout(f): - tree.Print(showData=True) - s = f.getvalue() - self.assertIn('value', s) - self.assertIn('None', s) + f = StringIO() + with redirect_stdout(f): + tree.Print(showData=True) + s = f.getvalue() + self.assertIn('value', s) + self.assertIn('None', s) - f = StringIO() - with redirect_stdout(f): - tree.Print() - s = f.getvalue() - self.assertNotIn('value', s) - self.assertNotIn('None', s) + f = StringIO() + with redirect_stdout(f): + tree.Print() + s = f.getvalue() + self.assertNotIn('value', s) + self.assertNotIn('None', s) - tree.Destroy() - self.assertEqual(str(tree), 'a\n') + tree.Destroy() + self.assertEqual(str(tree), 'a\n') - def _readyTree(self): - tree = self.baseTree - tree.AddChild('child0') - tree.AddChild('child1') + def _readyTree(self): + tree = self.baseTree + tree.AddChild('child0') + tree.AddChild('child1') - def test5Equals(self): - # " testing tree equals " - nTree = Tree.TreeNode(None, 'root') - self._readyTree() - tTree = self.baseTree - self.baseTree = nTree - self._readyTree() - assert tTree == self.baseTree, 'Equality test 1 failed. (bad Tree.__cmp__)' - assert self.baseTree == tTree, 'Equality test 2 failed. (bad Tree.__cmp__)' - tTree.AddChild('child2') - assert tTree != self.baseTree, 'Inequality test 1 failed. (bad Tree.__cmp__)' - assert self.baseTree != tTree, 'Inequality test 2 failed. (bad Tree.__cmp__)' + def test5Equals(self): + # " testing tree equals " + nTree = Tree.TreeNode(None, 'root') + self._readyTree() + tTree = self.baseTree + self.baseTree = nTree + self._readyTree() + assert tTree == self.baseTree, 'Equality test 1 failed. (bad Tree.__cmp__)' + assert self.baseTree == tTree, 'Equality test 2 failed. (bad Tree.__cmp__)' + tTree.AddChild('child2') + assert tTree != self.baseTree, 'Inequality test 1 failed. (bad Tree.__cmp__)' + assert self.baseTree != tTree, 'Inequality test 2 failed. (bad Tree.__cmp__)' - self.assertTrue(tTree > self.baseTree, msg='Larger tree is greater') - self.assertEqual(tTree.__cmp__(self.baseTree), 1) + self.assertTrue(tTree > self.baseTree, msg='Larger tree is greater') + self.assertEqual(tTree.__cmp__(self.baseTree), 1) - def test6PickleEquals(self): - # " testing pickled tree equals " - self._readyTree() - pkl = cPickle.dumps(self.baseTree) - oTree = cPickle.loads(pkl) + def test6PickleEquals(self): + # " testing pickled tree equals " + self._readyTree() + pkl = pickle.dumps(self.baseTree) + oTree = pickle.loads(pkl) - assert oTree == self.baseTree, 'Pickle inequality test failed' - self.assertEqual(oTree.__cmp__(self.baseTree), 0) + assert oTree == self.baseTree, 'Pickle inequality test failed' + self.assertEqual(oTree.__cmp__(self.baseTree), 0) - self.baseTree.PruneChild(self.baseTree.GetChildren()[0]) - assert oTree != self.baseTree, 'Pickle inequality test failed (bad Tree.__cmp__)' - self.assertEqual(abs(oTree.__cmp__(self.baseTree)), 1) + self.baseTree.PruneChild(self.baseTree.GetChildren()[0]) + assert oTree != self.baseTree, 'Pickle inequality test failed (bad Tree.__cmp__)' + self.assertEqual(abs(oTree.__cmp__(self.baseTree)), 1) - def test7Copy(self): - # " testing deepcopy on trees " - self._readyTree() - nTree = copy.deepcopy(self.baseTree) - assert nTree == self.baseTree, 'deepcopy failed' + def test7Copy(self): + # " testing deepcopy on trees " + self._readyTree() + nTree = copy.deepcopy(self.baseTree) + assert nTree == self.baseTree, 'deepcopy failed' - def test8In(self): - # " testing list membership " - self._readyTree() - nTree = copy.deepcopy(self.baseTree) - nTree2 = copy.deepcopy(self.baseTree) - nTree2.PruneChild(self.baseTree.GetChildren()[0]) - tList = [nTree2, nTree2, nTree] - assert self.baseTree in tList, 'list membership (tree in list) failed' - tList = [nTree2, nTree2] - assert self.baseTree not in tList, 'list membership (tree not in list) failed' + def test8In(self): + # " testing list membership " + self._readyTree() + nTree = copy.deepcopy(self.baseTree) + nTree2 = copy.deepcopy(self.baseTree) + nTree2.PruneChild(self.baseTree.GetChildren()[0]) + tList = [nTree2, nTree2, nTree] + assert self.baseTree in tList, 'list membership (tree in list) failed' + tList = [nTree2, nTree2] + assert self.baseTree not in tList, 'list membership (tree not in list) failed' - def test_exampleCode(self): - try: - f = StringIO() - with redirect_stdout(f): - Tree._exampleCode() - self.assertTrue(os.path.isfile('save.pkl')) - self.assertIn('tree==tree2 False', f.getvalue(), 'Example didn' 't run to end') - finally: - if os.path.isfile('save.pkl'): - os.remove('save.pkl') + def test_exampleCode(self): + try: + f = StringIO() + with redirect_stdout(f): + Tree._exampleCode() + self.assertTrue(os.path.isfile('save.pkl')) + self.assertIn('tree==tree2 False', f.getvalue(), 'Example didn' 't run to end') + finally: + if os.path.isfile('save.pkl'): + os.remove('save.pkl') if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/ML/DecTree/UnitTestTreeUtils.py b/rdkit/ML/DecTree/UnitTestTreeUtils.py index 442d01991..9330baf91 100644 --- a/rdkit/ML/DecTree/UnitTestTreeUtils.py +++ b/rdkit/ML/DecTree/UnitTestTreeUtils.py @@ -2,7 +2,7 @@ # Copyright (C) 2000 greg Landrum # """ unit testing code for trees and decision trees (not learning/xvalidation) """ -from __future__ import print_function + import unittest diff --git a/rdkit/ML/DecTree/UnitTestXVal.py b/rdkit/ML/DecTree/UnitTestXVal.py index c1021a62f..0eaa9b86a 100644 --- a/rdkit/ML/DecTree/UnitTestXVal.py +++ b/rdkit/ML/DecTree/UnitTestXVal.py @@ -2,7 +2,7 @@ # Copyright (C) 2000 greg Landrum # """ unit testing code for cross validation """ -from __future__ import print_function + import os import unittest @@ -12,68 +12,69 @@ from rdkit import RDRandom from rdkit.ML.DecTree import CrossValidate from rdkit.ML.DecTree import randomtest from rdkit.TestRunner import redirect_stdout -from rdkit.six import BytesIO, StringIO -from rdkit.six.moves import cPickle +from io import BytesIO, StringIO +import pickle class XValTestCase(unittest.TestCase): - def setUp(self): - self.origTreeName = RDConfig.RDCodeDir + '/ML/DecTree/test_data/XValTree.pkl' - self.randomSeed = 23 - self.randomArraySeed = (23, 42) + def setUp(self): + self.origTreeName = RDConfig.RDCodeDir + '/ML/DecTree/test_data/XValTree.pkl' + self.randomSeed = 23 + self.randomArraySeed = (23, 42) - def testRun(self): - # " test that the CrossValidationDriver runs " - examples, attrs, nPossibleVals = randomtest.GenRandomExamples(nExamples=200) - f = StringIO() - with redirect_stdout(f): - tree, frac = CrossValidate.CrossValidationDriver(examples, attrs, nPossibleVals, silent=False) - self.assertGreater(frac, 0) - self.assertEqual('Var: 1', tree.GetName()) - self.assertIn('Validation error', f.getvalue()) + def testRun(self): + # " test that the CrossValidationDriver runs " + examples, attrs, nPossibleVals = randomtest.GenRandomExamples(nExamples=200) + f = StringIO() + with redirect_stdout(f): + tree, frac = CrossValidate.CrossValidationDriver( + examples, attrs, nPossibleVals, silent=False) + self.assertGreater(frac, 0) + self.assertEqual('Var: 1', tree.GetName()) + self.assertIn('Validation error', f.getvalue()) - CrossValidate.CrossValidationDriver(examples, attrs, nPossibleVals, lessGreedy=True, - calcTotalError=True, silent=True) + CrossValidate.CrossValidationDriver(examples, attrs, nPossibleVals, lessGreedy=True, + calcTotalError=True, silent=True) - def testResults(self): - # " test the results of CrossValidation " - RDRandom.seed(self.randomSeed) - examples, attrs, nPossibleVals = randomtest.GenRandomExamples(nExamples=200, - seed=self.randomArraySeed) - tree, frac = CrossValidate.CrossValidationDriver(examples, attrs, nPossibleVals, silent=1) - self.assertGreater(frac, 0) + def testResults(self): + # " test the results of CrossValidation " + RDRandom.seed(self.randomSeed) + examples, attrs, nPossibleVals = randomtest.GenRandomExamples(nExamples=200, + seed=self.randomArraySeed) + tree, frac = CrossValidate.CrossValidationDriver(examples, attrs, nPossibleVals, silent=1) + self.assertGreater(frac, 0) - with open(self.origTreeName, 'r') as inTFile: - buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') - inTFile.close() - inFile = BytesIO(buf) - oTree = cPickle.load(inFile) + with open(self.origTreeName, 'r') as inTFile: + buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') + inTFile.close() + inFile = BytesIO(buf) + oTree = pickle.load(inFile) - assert oTree == tree, 'Random CrossValidation test failed' + assert oTree == tree, 'Random CrossValidation test failed' - def testReplacementSelection(self): - # " use selection with replacement " - RDRandom.seed(self.randomSeed) - examples, attrs, nPossibleVals = randomtest.GenRandomExamples(nExamples=200, - seed=self.randomArraySeed) - tree, frac = CrossValidate.CrossValidationDriver(examples, attrs, nPossibleVals, silent=1, - replacementSelection=1) - self.assertTrue(tree) - self.assertAlmostEqual(frac, 0.01666, 4) + def testReplacementSelection(self): + # " use selection with replacement " + RDRandom.seed(self.randomSeed) + examples, attrs, nPossibleVals = randomtest.GenRandomExamples(nExamples=200, + seed=self.randomArraySeed) + tree, frac = CrossValidate.CrossValidationDriver(examples, attrs, nPossibleVals, silent=1, + replacementSelection=1) + self.assertTrue(tree) + self.assertAlmostEqual(frac, 0.01666, 4) - def test_TestRun(self): - try: - f = StringIO() - with redirect_stdout(f): - CrossValidate.TestRun() - self.assertTrue(os.path.isfile('save.pkl')) - s = f.getvalue() - self.assertIn('t1 == t2 True', s) - finally: - if os.path.isfile('save.pkl'): - os.remove('save.pkl') + def test_TestRun(self): + try: + f = StringIO() + with redirect_stdout(f): + CrossValidate.TestRun() + self.assertTrue(os.path.isfile('save.pkl')) + s = f.getvalue() + self.assertIn('t1 == t2 True', s) + finally: + if os.path.isfile('save.pkl'): + os.remove('save.pkl') if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/ML/DecTree/randomtest.py b/rdkit/ML/DecTree/randomtest.py index 4b12effa2..6361647a9 100755 --- a/rdkit/ML/DecTree/randomtest.py +++ b/rdkit/ML/DecTree/randomtest.py @@ -26,12 +26,12 @@ def GenRandomExamples(nVars=10, randScale=0.3, bitProb=0.5, nExamples=500, seed= if __name__ == '__main__': # pragma: nocover - from rdkit.six.moves import cPickle + import pickle examples, attrs, nPossibleVals = GenRandomExamples() outF = open('random.dat.pkl', 'wb+') - cPickle.dump(examples, outF) - cPickle.dump(attrs, outF) - cPickle.dump(nPossibleVals, outF) + pickle.dump(examples, outF) + pickle.dump(attrs, outF) + pickle.dump(nPossibleVals, outF) tree = ID3.ID3Boot(examples, attrs, nPossibleVals) tree.Pickle('save.pkl') diff --git a/rdkit/ML/Descriptors/CompoundDescriptors.py b/rdkit/ML/Descriptors/CompoundDescriptors.py index 992022808..39111ff0a 100755 --- a/rdkit/ML/Descriptors/CompoundDescriptors.py +++ b/rdkit/ML/Descriptors/CompoundDescriptors.py @@ -5,7 +5,7 @@ (only the composition is required) """ -from __future__ import print_function + from rdkit import RDConfig from rdkit.ML.Descriptors import Parser, Descriptors diff --git a/rdkit/ML/Descriptors/Descriptors.py b/rdkit/ML/Descriptors/Descriptors.py index 0a9f91228..a8f3b6dd2 100755 --- a/rdkit/ML/Descriptors/Descriptors.py +++ b/rdkit/ML/Descriptors/Descriptors.py @@ -4,8 +4,8 @@ """ Various bits and pieces for calculating descriptors """ -from __future__ import print_function -from rdkit.six.moves import cPickle + +import pickle class DescriptorCalculator: @@ -60,7 +60,7 @@ class DescriptorCalculator: except Exception: print('cannot open output file %s for writing' % (fileName)) return - cPickle.dump(self, f) + pickle.dump(self, f) f.close() def CalcDescriptors(self, what, *args, **kwargs): diff --git a/rdkit/ML/Descriptors/MoleculeDescriptors.py b/rdkit/ML/Descriptors/MoleculeDescriptors.py index 03a0d9505..87171b4c4 100755 --- a/rdkit/ML/Descriptors/MoleculeDescriptors.py +++ b/rdkit/ML/Descriptors/MoleculeDescriptors.py @@ -11,7 +11,7 @@ import re from rdkit.Chem import Descriptors as DescriptorsMod from rdkit.ML.Descriptors import Descriptors from rdkit.RDLogger import logger -from rdkit.six.moves import cPickle +import pickle logger = logger() @@ -67,7 +67,7 @@ class MolecularDescriptorCalculator(Descriptors.DescriptorCalculator): except Exception: logger.error('cannot open output file %s for writing' % (fileName)) return - cPickle.dump(self, f) + pickle.dump(self, f) f.close() def CalcDescriptors(self, mol, *args, **kwargs): diff --git a/rdkit/ML/Descriptors/Parser.py b/rdkit/ML/Descriptors/Parser.py index 759fce7dd..ccbed2911 100755 --- a/rdkit/ML/Descriptors/Parser.py +++ b/rdkit/ML/Descriptors/Parser.py @@ -44,7 +44,7 @@ Here's the general flow of things: """ -from __future__ import print_function + # The wildcard import is required to make functions available for the eval statement from math import * diff --git a/rdkit/ML/Descriptors/UnitTestCOMServer.py b/rdkit/ML/Descriptors/UnitTestCOMServer.py index b0480816b..755f1addd 100755 --- a/rdkit/ML/Descriptors/UnitTestCOMServer.py +++ b/rdkit/ML/Descriptors/UnitTestCOMServer.py @@ -4,7 +4,7 @@ """ unit testing code for the descriptor COM server """ -from __future__ import print_function + import unittest diff --git a/rdkit/ML/Descriptors/UnitTestDescriptors.py b/rdkit/ML/Descriptors/UnitTestDescriptors.py index 017f4b2d6..f47d402f0 100755 --- a/rdkit/ML/Descriptors/UnitTestDescriptors.py +++ b/rdkit/ML/Descriptors/UnitTestDescriptors.py @@ -8,44 +8,44 @@ import unittest from rdkit.ML.Descriptors import CompoundDescriptors from rdkit.TestRunner import redirect_stdout -from rdkit.six import StringIO +from io import StringIO class TestCase(unittest.TestCase): - def setUp(self): - d = [('DED', ['NonZero', 'Mean', 'Dev']), ('M_B_electroneg', ['NonZero']), - ('Cov_rad', ['Max', 'Min'])] - self.desc = CompoundDescriptors.CompoundDescriptorCalculator(d) - self.desc.BuildAtomDict() - self.tol = 0.0001 + def setUp(self): + d = [('DED', ['NonZero', 'Mean', 'Dev']), ('M_B_electroneg', ['NonZero']), + ('Cov_rad', ['Max', 'Min'])] + self.desc = CompoundDescriptors.CompoundDescriptorCalculator(d) + self.desc.BuildAtomDict() + self.tol = 0.0001 - def testAtomDict(self): - # " testing the atom dict " - assert len(self.desc.atomDict.keys()) == 48, 'BuildAtomDict failed' + def testAtomDict(self): + # " testing the atom dict " + assert len(self.desc.atomDict.keys()) == 48, 'BuildAtomDict failed' - def testSimpleDescriptorCalc(self): - # " testing simple descriptor calculation " - composList = ['Nb', 'Nb3', 'NbPt', 'Nb2Pt'] - compare = [[2.32224798203, 0.0, 1.34000003338, 1.34000003338], - [2.32224798203, 0.0, 1.34000003338, 1.34000003338], - [1.51555249095, 0.806695491076, 1.34000003338, 1.29999995232], - [1.78445098797, 0.717062658734, 1.34000003338, 1.29999995232]] - for i in range(len(composList)): - self.assertTrue( - max( - map(lambda x, y: abs(x - y), compare[i], self.desc.CalcSimpleDescriptorsForComposition( - composList[i]))) < self.tol, 'Descriptor calculation failed') + def testSimpleDescriptorCalc(self): + # " testing simple descriptor calculation " + composList = ['Nb', 'Nb3', 'NbPt', 'Nb2Pt'] + compare = [[2.32224798203, 0.0, 1.34000003338, 1.34000003338], + [2.32224798203, 0.0, 1.34000003338, 1.34000003338], + [1.51555249095, 0.806695491076, 1.34000003338, 1.29999995232], + [1.78445098797, 0.717062658734, 1.34000003338, 1.29999995232]] + for i in range(len(composList)): + self.assertTrue( + max( + map(lambda x, y: abs(x - y), compare[i], self.desc.CalcSimpleDescriptorsForComposition( + composList[i]))) < self.tol, 'Descriptor calculation failed') - names = self.desc.GetDescriptorNames() - self.assertEqual(len(names), 4) - self.assertIn('MEAN_DED', names) + names = self.desc.GetDescriptorNames() + self.assertEqual(len(names), 4) + self.assertIn('MEAN_DED', names) - def test_exampleCode(self): - f = StringIO() - with redirect_stdout(f): - CompoundDescriptors._exampleCode() + def test_exampleCode(self): + f = StringIO() + with redirect_stdout(f): + CompoundDescriptors._exampleCode() if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/ML/Descriptors/UnitTestMolDescriptors.py b/rdkit/ML/Descriptors/UnitTestMolDescriptors.py index eb4e0054a..02e44f6df 100644 --- a/rdkit/ML/Descriptors/UnitTestMolDescriptors.py +++ b/rdkit/ML/Descriptors/UnitTestMolDescriptors.py @@ -13,79 +13,79 @@ from rdkit import Chem from rdkit import RDConfig from rdkit.ML.Descriptors import MoleculeDescriptors, Descriptors from rdkit.TestRunner import redirect_stdout -from rdkit.six import BytesIO, StringIO -from rdkit.six.moves import cPickle +from io import BytesIO, StringIO +import pickle class TestCase(unittest.TestCase): - def setUp(self): - self.descs = ['MolLogP', 'Chi1v'] - self.vers = ('1.1.0', '1.0.0') - self.calc = MoleculeDescriptors.MolecularDescriptorCalculator(self.descs) - self.testD = [('CCOC', (0.6527, 1.40403)), ('CC=O', (0.2052, 0.81305)), ('CCC(=O)O', - (0.481, 1.48839))] + def setUp(self): + self.descs = ['MolLogP', 'Chi1v'] + self.vers = ('1.1.0', '1.0.0') + self.calc = MoleculeDescriptors.MolecularDescriptorCalculator(self.descs) + self.testD = [('CCOC', (0.6527, 1.40403)), ('CC=O', (0.2052, 0.81305)), ('CCC(=O)O', + (0.481, 1.48839))] - def testGetNames(self): - self.assertEqual(self.calc.GetDescriptorNames(), tuple(self.descs)) + def testGetNames(self): + self.assertEqual(self.calc.GetDescriptorNames(), tuple(self.descs)) - def _testVals(self, calc, testD): - for smi, vals in testD: - mol = Chem.MolFromSmiles(smi) - ans = numpy.array(vals) - res = numpy.array(calc.CalcDescriptors(mol)) - self.assertTrue( - max(abs(res - ans)) < 1e-4, 'bad descriptor values for SMILES %s (%s)' % (smi, str(res))) + def _testVals(self, calc, testD): + for smi, vals in testD: + mol = Chem.MolFromSmiles(smi) + ans = numpy.array(vals) + res = numpy.array(calc.CalcDescriptors(mol)) + self.assertTrue( + max(abs(res - ans)) < 1e-4, 'bad descriptor values for SMILES %s (%s)' % (smi, str(res))) - def testCalcVals(self): - self._testVals(self.calc, self.testD) + def testCalcVals(self): + self._testVals(self.calc, self.testD) - def testSaveState(self): - fName = os.path.join(RDConfig.RDCodeDir, 'ML/Descriptors/test_data', 'molcalc.dsc') - with open(fName, 'r') as inTF: - buf = inTF.read().replace('\r\n', '\n').encode('utf-8') - inTF.close() - inF = BytesIO(buf) - calc = cPickle.load(inF) - self.assertEqual(calc.GetDescriptorNames(), tuple(self.descs)) - self.assertEqual(calc.GetDescriptorVersions(), tuple(self.vers)) - self._testVals(calc, self.testD) + def testSaveState(self): + fName = os.path.join(RDConfig.RDCodeDir, 'ML/Descriptors/test_data', 'molcalc.dsc') + with open(fName, 'r') as inTF: + buf = inTF.read().replace('\r\n', '\n').encode('utf-8') + inTF.close() + inF = BytesIO(buf) + calc = pickle.load(inF) + self.assertEqual(calc.GetDescriptorNames(), tuple(self.descs)) + self.assertEqual(calc.GetDescriptorVersions(), tuple(self.vers)) + self._testVals(calc, self.testD) - f = StringIO() - with redirect_stdout(f): - calc.ShowDescriptors() - s = f.getvalue() - for name in calc.GetDescriptorNames(): - self.assertIn(name, s) + f = StringIO() + with redirect_stdout(f): + calc.ShowDescriptors() + s = f.getvalue() + for name in calc.GetDescriptorNames(): + self.assertIn(name, s) - self.assertIn('Wildman-Crippen LogP value', calc.GetDescriptorSummaries()) - self.assertIn('N/A', calc.GetDescriptorSummaries()) + self.assertIn('Wildman-Crippen LogP value', calc.GetDescriptorSummaries()) + self.assertIn('N/A', calc.GetDescriptorSummaries()) - funcs = calc.GetDescriptorFuncs() - self.assertEqual(len(funcs), len(self.descs)) - for f in funcs: - self.assertTrue(callable(f)) + funcs = calc.GetDescriptorFuncs() + self.assertEqual(len(funcs), len(self.descs)) + for f in funcs: + self.assertTrue(callable(f)) class TestDescriptors(unittest.TestCase): - def test_DescriptorCalculator(self): - calc = Descriptors.DescriptorCalculator() - self.assertRaises(NotImplementedError, calc.ShowDescriptors) - self.assertRaises(NotImplementedError, calc.GetDescriptorNames) - self.assertRaises(NotImplementedError, calc.CalcDescriptors, None) + def test_DescriptorCalculator(self): + calc = Descriptors.DescriptorCalculator() + self.assertRaises(NotImplementedError, calc.ShowDescriptors) + self.assertRaises(NotImplementedError, calc.GetDescriptorNames) + self.assertRaises(NotImplementedError, calc.CalcDescriptors, None) - calc.simpleList = ['simple1', 'simple2'] - calc.compoundList = ['cmpd1', 'cmpd2'] - f = StringIO() - with redirect_stdout(f): - calc.ShowDescriptors() - s = f.getvalue() - for name in calc.simpleList: - self.assertIn(name, s) - for name in calc.compoundList: - self.assertIn(name, s) + calc.simpleList = ['simple1', 'simple2'] + calc.compoundList = ['cmpd1', 'cmpd2'] + f = StringIO() + with redirect_stdout(f): + calc.ShowDescriptors() + s = f.getvalue() + for name in calc.simpleList: + self.assertIn(name, s) + for name in calc.compoundList: + self.assertIn(name, s) if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/ML/Descriptors/UnitTestParser.py b/rdkit/ML/Descriptors/UnitTestParser.py index 329c52a77..a66e91296 100755 --- a/rdkit/ML/Descriptors/UnitTestParser.py +++ b/rdkit/ML/Descriptors/UnitTestParser.py @@ -4,7 +4,7 @@ """ unit testing code for compound descriptors """ -from __future__ import print_function + import unittest diff --git a/rdkit/ML/EnrichPlot.py b/rdkit/ML/EnrichPlot.py index 7db74af46..b3e5a2d77 100755 --- a/rdkit/ML/EnrichPlot.py +++ b/rdkit/ML/EnrichPlot.py @@ -66,7 +66,6 @@ Optional Arguments: """ # from rdkit.Dbase.DbConnection import DbConnect -from __future__ import print_function import sys @@ -77,177 +76,174 @@ from rdkit import RDConfig from rdkit.Dbase.DbConnection import DbConnect from rdkit.ML import CompositeRun from rdkit.ML.Data import DataUtils, SplitData, Stats -from rdkit.six import PY3 -from rdkit.six.moves import cPickle -from rdkit.six.moves import input +import pickle __VERSION_STRING = "2.4.0" -if PY3: - def cmp(t1, t2): +def cmp(t1, t2): return (t1 < t2) * -1 or (t1 > t2) * 1 def message(msg, noRet=0, dest=sys.stderr): - """ emits messages to _sys.stderr_ - override this in modules which import this one to redirect output + """ emits messages to _sys.stderr_ + override this in modules which import this one to redirect output - **Arguments** + **Arguments** - - msg: the string to be displayed + - msg: the string to be displayed - """ - if noRet: - dest.write('%s ' % (msg)) - else: - dest.write('%s\n' % (msg)) + """ + if noRet: + dest.write('%s ' % (msg)) + else: + dest.write('%s\n' % (msg)) def error(msg, dest=sys.stderr): - """ emits messages to _sys.stderr_ - override this in modules which import this one to redirect output + """ emits messages to _sys.stderr_ + override this in modules which import this one to redirect output - **Arguments** + **Arguments** - - msg: the string to be displayed + - msg: the string to be displayed - """ - sys.stderr.write('ERROR: %s\n' % (msg)) + """ + sys.stderr.write('ERROR: %s\n' % (msg)) def ScreenModel(mdl, descs, data, picking=[1], indices=[], errorEstimate=0): - """ collects the results of screening an individual composite model that match - a particular value + """ collects the results of screening an individual composite model that match + a particular value - **Arguments** + **Arguments** - - mdl: the composite model + - mdl: the composite model - - descs: a list of descriptor names corresponding to the data set + - descs: a list of descriptor names corresponding to the data set - - data: the data set, a list of points to be screened. + - data: the data set, a list of points to be screened. - - picking: (Optional) a list of values that are to be collected. - For examples, if you want an enrichment plot for picking the values - 1 and 2, you'd having picking=[1,2]. + - picking: (Optional) a list of values that are to be collected. + For examples, if you want an enrichment plot for picking the values + 1 and 2, you'd having picking=[1,2]. - **Returns** + **Returns** - a list of 4-tuples containing: + a list of 4-tuples containing: - - the id of the point + - the id of the point - - the true result (from the data set) + - the true result (from the data set) - - the predicted result + - the predicted result - - the confidence value for the prediction + - the confidence value for the prediction - """ - mdl.SetInputOrder(descs) + """ + mdl.SetInputOrder(descs) - for j in range(len(mdl)): - tmp = mdl.GetModel(j) - if hasattr(tmp, '_trainIndices') and not isinstance(tmp._trainIndices, dict): - tis = {} - if hasattr(tmp, '_trainIndices'): - for v in tmp._trainIndices: - tis[v] = 1 - tmp._trainIndices = tis - - res = [] - if mdl.GetQuantBounds(): - needsQuant = 1 - else: - needsQuant = 0 - - if not indices: - indices = list(range(len(data))) - nTrueActives = 0 - for i in indices: - if errorEstimate: - use = [] - for j in range(len(mdl)): + for j in range(len(mdl)): tmp = mdl.GetModel(j) - if not tmp._trainIndices.get(i, 0): - use.append(j) + if hasattr(tmp, '_trainIndices') and not isinstance(tmp._trainIndices, dict): + tis = {} + if hasattr(tmp, '_trainIndices'): + for v in tmp._trainIndices: + tis[v] = 1 + tmp._trainIndices = tis + + res = [] + if mdl.GetQuantBounds(): + needsQuant = 1 else: - use = None - pt = data[i] - pred, conf = mdl.ClassifyExample(pt, onlyModels=use) - if needsQuant: - pt = mdl.QuantizeActivity(pt[:]) - trueRes = pt[-1] - if trueRes in picking: - nTrueActives += 1 - if pred in picking: - res.append((pt[0], trueRes, pred, conf)) - return nTrueActives, res + needsQuant = 0 + + if not indices: + indices = list(range(len(data))) + nTrueActives = 0 + for i in indices: + if errorEstimate: + use = [] + for j in range(len(mdl)): + tmp = mdl.GetModel(j) + if not tmp._trainIndices.get(i, 0): + use.append(j) + else: + use = None + pt = data[i] + pred, conf = mdl.ClassifyExample(pt, onlyModels=use) + if needsQuant: + pt = mdl.QuantizeActivity(pt[:]) + trueRes = pt[-1] + if trueRes in picking: + nTrueActives += 1 + if pred in picking: + res.append((pt[0], trueRes, pred, conf)) + return nTrueActives, res def AccumulateCounts(predictions, thresh=0, sortIt=1): - """ Accumulates the data for the enrichment plot for a single model + """ Accumulates the data for the enrichment plot for a single model - **Arguments** + **Arguments** - - predictions: a list of 3-tuples (as returned by _ScreenModels_) + - predictions: a list of 3-tuples (as returned by _ScreenModels_) - - thresh: a threshold for the confidence level. Anything below - this threshold will not be considered + - thresh: a threshold for the confidence level. Anything below + this threshold will not be considered - - sortIt: toggles sorting on confidence levels + - sortIt: toggles sorting on confidence levels - **Returns** + **Returns** - - a list of 3-tuples: + - a list of 3-tuples: - - the id of the active picked here + - the id of the active picked here - - num actives found so far + - num actives found so far - - number of picks made so far + - number of picks made so far - """ - if sortIt: - predictions.sort(lambda x, y: cmp(y[3], x[3])) - res = [] - nCorrect = 0 - nPts = 0 - for i in range(len(predictions)): - ID, real, pred, conf = predictions[i] - if conf > thresh: - if pred == real: - nCorrect += 1 - nPts += 1 - res.append((ID, nCorrect, nPts)) + """ + if sortIt: + predictions.sort(lambda x, y: cmp(y[3], x[3])) + res = [] + nCorrect = 0 + nPts = 0 + for i in range(len(predictions)): + ID, real, pred, conf = predictions[i] + if conf > thresh: + if pred == real: + nCorrect += 1 + nPts += 1 + res.append((ID, nCorrect, nPts)) - return res + return res def MakePlot(details, final, counts, pickVects, nModels, nTrueActs=-1): - if not hasattr(details, 'plotFile') or not details.plotFile: - return + if not hasattr(details, 'plotFile') or not details.plotFile: + return - dataFileName = '%s.dat' % (details.plotFile) - outF = open(dataFileName, 'w+') - i = 0 - while i < len(final) and counts[i] != 0: - if nModels > 1: - _, sd = Stats.MeanAndDev(pickVects[i]) - confInterval = Stats.GetConfidenceInterval(sd, len(pickVects[i]), level=90) - outF.write('%d %f %f %d %f\n' % (i + 1, final[i][0] / counts[i], final[i][1] / counts[i], - counts[i], confInterval)) - else: - outF.write('%d %f %f %d\n' % (i + 1, final[i][0] / counts[i], final[i][1] / counts[i], - counts[i])) - i += 1 - outF.close() - plotFileName = '%s.gnu' % (details.plotFile) - gnuF = open(plotFileName, 'w+') - gnuHdr = """# Generated by EnrichPlot.py version: %s + dataFileName = '%s.dat' % (details.plotFile) + outF = open(dataFileName, 'w+') + i = 0 + while i < len(final) and counts[i] != 0: + if nModels > 1: + _, sd = Stats.MeanAndDev(pickVects[i]) + confInterval = Stats.GetConfidenceInterval(sd, len(pickVects[i]), level=90) + outF.write('%d %f %f %d %f\n' % (i + 1, final[i][0] / counts[i], final[i][1] / counts[i], + counts[i], confInterval)) + else: + outF.write('%d %f %f %d\n' % (i + 1, final[i][0] / counts[i], final[i][1] / counts[i], + counts[i])) + i += 1 + outF.close() + plotFileName = '%s.gnu' % (details.plotFile) + gnuF = open(plotFileName, 'w+') + gnuHdr = """# Generated by EnrichPlot.py version: %s set size square 0.7 set xr [0:] set data styl points @@ -258,229 +254,230 @@ def MakePlot(details, final, counts, pickVects, nModels, nTrueActs=-1): set term postscript enh color solid "Helvetica" 16 set term X """ % (__VERSION_STRING) - print(gnuHdr, file=gnuF) - if nTrueActs > 0: - print('set yr [0:%d]' % nTrueActs, file=gnuF) - print('plot x with lines', file=gnuF) - if nModels > 1: - everyGap = i / 20 - print('replot "%s" using 1:2 with lines,' % (dataFileName), end='', file=gnuF) - print('"%s" every %d using 1:2:5 with yerrorbars' % (dataFileName, everyGap), file=gnuF) - else: - print('replot "%s" with points' % (dataFileName), file=gnuF) - gnuF.close() + print(gnuHdr, file=gnuF) + if nTrueActs > 0: + print('set yr [0:%d]' % nTrueActs, file=gnuF) + print('plot x with lines', file=gnuF) + if nModels > 1: + everyGap = i / 20 + print('replot "%s" using 1:2 with lines,' % (dataFileName), end='', file=gnuF) + print('"%s" every %d using 1:2:5 with yerrorbars' % (dataFileName, everyGap), file=gnuF) + else: + print('replot "%s" with points' % (dataFileName), file=gnuF) + gnuF.close() - if hasattr(details, 'showPlot') and details.showPlot: - try: - from Gnuplot import Gnuplot - p = Gnuplot() - p('load "%s"' % (plotFileName)) - input('press return to continue...\n') - except Exception: - import traceback - traceback.print_exc() + if hasattr(details, 'showPlot') and details.showPlot: + try: + from Gnuplot import Gnuplot + p = Gnuplot() + p('load "%s"' % (plotFileName)) + input('press return to continue...\n') + except Exception: + import traceback + traceback.print_exc() def Usage(): - """ displays a usage message and exits """ - sys.stderr.write(__doc__) - sys.exit(-1) + """ displays a usage message and exits """ + sys.stderr.write(__doc__) + sys.exit(-1) if __name__ == '__main__': - import getopt - try: - args, extras = getopt.getopt(sys.argv[1:], 'd:t:a:N:p:cSTHF:v:', - ('thresh=', 'plotFile=', 'showPlot', 'pickleCol=', 'OOB', 'noSort', - 'pickBase=', 'doROC', 'rocThresh=', 'enrich=')) - except Exception: - import traceback - traceback.print_exc() - Usage() - - details = CompositeRun.CompositeRun() - CompositeRun.SetDefaults(details) - - details.activeTgt = [1] - details.doTraining = 0 - details.doHoldout = 0 - details.dbTableName = '' - details.plotFile = '' - details.showPlot = 0 - details.pickleCol = -1 - details.errorEstimate = 0 - details.sortIt = 1 - details.pickBase = '' - details.doROC = 0 - details.rocThresh = -1 - for arg, val in args: - if arg == '-d': - details.dbName = val - if arg == '-t': - details.dbTableName = val - elif arg == '-a' or arg == '--enrich': - details.activeTgt = eval(val) - if not isinstance(details.activeTgt, (tuple, list)): - # if (type(details.activeTgt) not in (types.TupleType, types.ListType)): - details.activeTgt = (details.activeTgt, ) - - elif arg == '--thresh': - details.threshold = float(val) - elif arg == '-N': - details.note = val - elif arg == '-p': - details.persistTblName = val - elif arg == '-S': - details.shuffleActivities = 1 - elif arg == '-H': - details.doTraining = 0 - details.doHoldout = 1 - elif arg == '-T': - details.doTraining = 1 - details.doHoldout = 0 - elif arg == '-F': - details.filterFrac = float(val) - elif arg == '-v': - details.filterVal = float(val) - elif arg == '--plotFile': - details.plotFile = val - elif arg == '--showPlot': - details.showPlot = 1 - elif arg == '--pickleCol': - details.pickleCol = int(val) - 1 - elif arg == '--OOB': - details.errorEstimate = 1 - elif arg == '--noSort': - details.sortIt = 0 - elif arg == '--doROC': - details.doROC = 1 - elif arg == '--rocThresh': - details.rocThresh = int(val) - elif arg == '--pickBase': - details.pickBase = val - - if not details.dbName or not details.dbTableName: - Usage() - print('*******Please provide both the -d and -t arguments') - - message('Building Data set\n') - dataSet = DataUtils.DBToData(details.dbName, details.dbTableName, user=RDConfig.defaultDBUser, - password=RDConfig.defaultDBPassword, pickleCol=details.pickleCol, - pickleClass=DataStructs.ExplicitBitVect) - - descs = dataSet.GetVarNames() - nPts = dataSet.GetNPts() - message('npts: %d\n' % (nPts)) - final = numpy.zeros((nPts, 2), numpy.float) - counts = numpy.zeros(nPts, numpy.integer) - selPts = [None] * nPts - - models = [] - if details.persistTblName: - conn = DbConnect(details.dbName, details.persistTblName) - message('-> Retrieving models from database') - curs = conn.GetCursor() - curs.execute("select model from %s where note='%s'" % (details.persistTblName, details.note)) - message('-> Reconstructing models') + import getopt try: - blob = curs.fetchone() + args, extras = getopt.getopt(sys.argv[1:], 'd:t:a:N:p:cSTHF:v:', + ('thresh=', 'plotFile=', 'showPlot', 'pickleCol=', 'OOB', 'noSort', + 'pickBase=', 'doROC', 'rocThresh=', 'enrich=')) except Exception: - blob = None - while blob: - message(' Building model %d' % len(models)) - blob = blob[0] - try: - models.append(cPickle.loads(str(blob))) - except Exception: import traceback traceback.print_exc() - print('Model failed') - else: - message(' <-Done') - try: - blob = curs.fetchone() - except Exception: - blob = None - curs = None - else: - for modelName in extras: - try: - model = cPickle.load(open(modelName, 'rb')) - except Exception: - import traceback - print('problems with model %s:' % modelName) - traceback.print_exc() - else: - models.append(model) - nModels = len(models) - pickVects = {} - halfwayPts = [1e8] * len(models) - for whichModel, model in enumerate(models): - tmpD = dataSet - try: - seed = model._randomSeed - except AttributeError: - pass + Usage() + + details = CompositeRun.CompositeRun() + CompositeRun.SetDefaults(details) + + details.activeTgt = [1] + details.doTraining = 0 + details.doHoldout = 0 + details.dbTableName = '' + details.plotFile = '' + details.showPlot = 0 + details.pickleCol = -1 + details.errorEstimate = 0 + details.sortIt = 1 + details.pickBase = '' + details.doROC = 0 + details.rocThresh = -1 + for arg, val in args: + if arg == '-d': + details.dbName = val + if arg == '-t': + details.dbTableName = val + elif arg == '-a' or arg == '--enrich': + details.activeTgt = eval(val) + if not isinstance(details.activeTgt, (tuple, list)): + # if (type(details.activeTgt) not in (types.TupleType, types.ListType)): + details.activeTgt = (details.activeTgt, ) + + elif arg == '--thresh': + details.threshold = float(val) + elif arg == '-N': + details.note = val + elif arg == '-p': + details.persistTblName = val + elif arg == '-S': + details.shuffleActivities = 1 + elif arg == '-H': + details.doTraining = 0 + details.doHoldout = 1 + elif arg == '-T': + details.doTraining = 1 + details.doHoldout = 0 + elif arg == '-F': + details.filterFrac = float(val) + elif arg == '-v': + details.filterVal = float(val) + elif arg == '--plotFile': + details.plotFile = val + elif arg == '--showPlot': + details.showPlot = 1 + elif arg == '--pickleCol': + details.pickleCol = int(val) - 1 + elif arg == '--OOB': + details.errorEstimate = 1 + elif arg == '--noSort': + details.sortIt = 0 + elif arg == '--doROC': + details.doROC = 1 + elif arg == '--rocThresh': + details.rocThresh = int(val) + elif arg == '--pickBase': + details.pickBase = val + + if not details.dbName or not details.dbTableName: + Usage() + print('*******Please provide both the -d and -t arguments') + + message('Building Data set\n') + dataSet = DataUtils.DBToData(details.dbName, details.dbTableName, user=RDConfig.defaultDBUser, + password=RDConfig.defaultDBPassword, pickleCol=details.pickleCol, + pickleClass=DataStructs.ExplicitBitVect) + + descs = dataSet.GetVarNames() + nPts = dataSet.GetNPts() + message('npts: %d\n' % (nPts)) + final = numpy.zeros((nPts, 2), numpy.float) + counts = numpy.zeros(nPts, numpy.integer) + selPts = [None] * nPts + + models = [] + if details.persistTblName: + conn = DbConnect(details.dbName, details.persistTblName) + message('-> Retrieving models from database') + curs = conn.GetCursor() + curs.execute("select model from %s where note='%s'" % + (details.persistTblName, details.note)) + message('-> Reconstructing models') + try: + blob = curs.fetchone() + except Exception: + blob = None + while blob: + message(' Building model %d' % len(models)) + blob = blob[0] + try: + models.append(pickle.loads(str(blob))) + except Exception: + import traceback + traceback.print_exc() + print('Model failed') + else: + message(' <-Done') + try: + blob = curs.fetchone() + except Exception: + blob = None + curs = None else: - DataUtils.InitRandomNumbers(seed) - if details.shuffleActivities: - DataUtils.RandomizeActivities(tmpD, shuffle=1) - if hasattr(model, '_splitFrac') and (details.doHoldout or details.doTraining): - trainIdx, testIdx = SplitData.SplitIndices(tmpD.GetNPts(), model._splitFrac, silent=1) - if details.filterFrac != 0.0: - trainFilt, temp = DataUtils.FilterData(tmpD, details.filterVal, details.filterFrac, -1, - indicesToUse=trainIdx, indicesOnly=1) - testIdx += temp - trainIdx = trainFilt - if details.doTraining: - testIdx, trainIdx = trainIdx, testIdx + for modelName in extras: + try: + model = pickle.load(open(modelName, 'rb')) + except Exception: + import traceback + print('problems with model %s:' % modelName) + traceback.print_exc() + else: + models.append(model) + nModels = len(models) + pickVects = {} + halfwayPts = [1e8] * len(models) + for whichModel, model in enumerate(models): + tmpD = dataSet + try: + seed = model._randomSeed + except AttributeError: + pass + else: + DataUtils.InitRandomNumbers(seed) + if details.shuffleActivities: + DataUtils.RandomizeActivities(tmpD, shuffle=1) + if hasattr(model, '_splitFrac') and (details.doHoldout or details.doTraining): + trainIdx, testIdx = SplitData.SplitIndices(tmpD.GetNPts(), model._splitFrac, silent=1) + if details.filterFrac != 0.0: + trainFilt, temp = DataUtils.FilterData(tmpD, details.filterVal, details.filterFrac, -1, + indicesToUse=trainIdx, indicesOnly=1) + testIdx += temp + trainIdx = trainFilt + if details.doTraining: + testIdx, trainIdx = trainIdx, testIdx + else: + testIdx = list(range(tmpD.GetNPts())) + + message('screening %d examples' % (len(testIdx))) + nTrueActives, screenRes = ScreenModel(model, descs, tmpD, picking=details.activeTgt, + indices=testIdx, errorEstimate=details.errorEstimate) + message('accumulating') + runningCounts = AccumulateCounts(screenRes, sortIt=details.sortIt, thresh=details.threshold) + if details.pickBase: + pickFile = open('%s.%d.picks' % (details.pickBase, whichModel + 1), 'w+') + else: + pickFile = None + + for i, entry in enumerate(runningCounts): + entry = runningCounts[i] + selPts[i] = entry[0] + final[i][0] += entry[1] + final[i][1] += entry[2] + v = pickVects.get(i, []) + v.append(entry[1]) + pickVects[i] = v + counts[i] += 1 + if pickFile: + pickFile.write('%s\n' % (entry[0])) + if entry[1] >= nTrueActives / 2 and entry[2] < halfwayPts[whichModel]: + halfwayPts[whichModel] = entry[2] + message('Halfway point: %d\n' % halfwayPts[whichModel]) + + if details.plotFile: + MakePlot(details, final, counts, pickVects, nModels, nTrueActs=nTrueActives) else: - testIdx = list(range(tmpD.GetNPts())) + if nModels > 1: + print('#Index\tAvg_num_correct\tConf90Pct\tAvg_num_picked\tNum_picks\tlast_selection') + else: + print('#Index\tAvg_num_correct\tAvg_num_picked\tNum_picks\tlast_selection') - message('screening %d examples' % (len(testIdx))) - nTrueActives, screenRes = ScreenModel(model, descs, tmpD, picking=details.activeTgt, - indices=testIdx, errorEstimate=details.errorEstimate) - message('accumulating') - runningCounts = AccumulateCounts(screenRes, sortIt=details.sortIt, thresh=details.threshold) - if details.pickBase: - pickFile = open('%s.%d.picks' % (details.pickBase, whichModel + 1), 'w+') - else: - pickFile = None + i = 0 + while i < nPts and counts[i] != 0: + if nModels > 1: + mean, sd = Stats.MeanAndDev(pickVects[i]) + confInterval = Stats.GetConfidenceInterval(sd, len(pickVects[i]), level=90) + print('%d\t%f\t%f\t%f\t%d\t%s' % (i + 1, final[i][0] / counts[i], confInterval, + final[i][1] / counts[i], counts[i], str(selPts[i]))) + else: + print('%d\t%f\t%f\t%d\t%s' % (i + 1, final[i][0] / counts[i], final[i][1] / counts[i], + counts[i], str(selPts[i]))) + i += 1 - for i, entry in enumerate(runningCounts): - entry = runningCounts[i] - selPts[i] = entry[0] - final[i][0] += entry[1] - final[i][1] += entry[2] - v = pickVects.get(i, []) - v.append(entry[1]) - pickVects[i] = v - counts[i] += 1 - if pickFile: - pickFile.write('%s\n' % (entry[0])) - if entry[1] >= nTrueActives / 2 and entry[2] < halfwayPts[whichModel]: - halfwayPts[whichModel] = entry[2] - message('Halfway point: %d\n' % halfwayPts[whichModel]) - - if details.plotFile: - MakePlot(details, final, counts, pickVects, nModels, nTrueActs=nTrueActives) - else: - if nModels > 1: - print('#Index\tAvg_num_correct\tConf90Pct\tAvg_num_picked\tNum_picks\tlast_selection') - else: - print('#Index\tAvg_num_correct\tAvg_num_picked\tNum_picks\tlast_selection') - - i = 0 - while i < nPts and counts[i] != 0: - if nModels > 1: - mean, sd = Stats.MeanAndDev(pickVects[i]) - confInterval = Stats.GetConfidenceInterval(sd, len(pickVects[i]), level=90) - print('%d\t%f\t%f\t%f\t%d\t%s' % (i + 1, final[i][0] / counts[i], confInterval, - final[i][1] / counts[i], counts[i], str(selPts[i]))) - else: - print('%d\t%f\t%f\t%d\t%s' % (i + 1, final[i][0] / counts[i], final[i][1] / counts[i], - counts[i], str(selPts[i]))) - i += 1 - - mean, sd = Stats.MeanAndDev(halfwayPts) - print('Halfway point: %.2f(%.2f)' % (mean, sd)) + mean, sd = Stats.MeanAndDev(halfwayPts) + print('Halfway point: %.2f(%.2f)' % (mean, sd)) diff --git a/rdkit/ML/GrowComposite.py b/rdkit/ML/GrowComposite.py index a417c50ea..990e8375c 100755 --- a/rdkit/ML/GrowComposite.py +++ b/rdkit/ML/GrowComposite.py @@ -92,7 +92,7 @@ - -V: print the version number and exit """ -from __future__ import print_function + import sys import time @@ -104,7 +104,7 @@ from rdkit.ML import CompositeRun from rdkit.ML import ScreenComposite, BuildComposite from rdkit.ML.Composite import AdjustComposite from rdkit.ML.Data import DataUtils, SplitData -from rdkit.six.moves import cPickle +import pickle _runDetails = CompositeRun.CompositeRun() @@ -274,9 +274,9 @@ def GetComposites(details): mdls = conn.GetData(fields='MODEL', where="where note='%s'" % (details.inNote)) for row in mdls: rawD = row[0] - res.append(cPickle.loads(str(rawD))) + res.append(pickle.loads(str(rawD))) elif details.composFileName: - res.append(cPickle.load(open(details.composFileName, 'rb'))) + res.append(pickle.load(open(details.composFileName, 'rb'))) return res @@ -550,7 +550,7 @@ if __name__ == '__main__': message('WARNING: updating results table with models having different weights') # save the composite for i in range(len(composites)): - _runDetails.model = cPickle.dumps(composites[i]) + _runDetails.model = pickle.dumps(composites[i]) _runDetails.Store(db=_runDetails.dbName, table=_runDetails.persistTblName) elif nModels == 1: composite = GrowIt(_runDetails, initModels[0], setDescNames=1) @@ -576,7 +576,7 @@ if __name__ == '__main__': if (len(composites)) > 1: message('WARNING: updating results table with models having different weights') for i in range(len(composites)): - _runDetails.model = cPickle.dumps(composites[i]) + _runDetails.model = pickle.dumps(composites[i]) _runDetails.Store(db=_runDetails.dbName, table=_runDetails.persistTblName) else: message("No models found") diff --git a/rdkit/ML/InfoTheory/UnitTestBitRanker.py b/rdkit/ML/InfoTheory/UnitTestBitRanker.py index 5ff60fa06..6c6580975 100755 --- a/rdkit/ML/InfoTheory/UnitTestBitRanker.py +++ b/rdkit/ML/InfoTheory/UnitTestBitRanker.py @@ -2,7 +2,7 @@ This test currently fails. The database is not available. """ -from __future__ import print_function + import os import unittest @@ -10,44 +10,44 @@ import unittest from rdkit import RDConfig from rdkit.Dbase.DbConnection import DbConnect from rdkit.ML import InfoTheory -from rdkit.six.moves import cPickle as pickle +import pickle RDConfig.usePgSQL = 0 def feq(v1, v2, tol2=1e-4): - return abs(v1 - v2) <= tol2 + return abs(v1 - v2) <= tol2 def getFingerprints(conn): - data = conn.GetData(table='signatures', fields='mol_name,fingerprint') - fpMap = {} - for dat in data: - pkl = str(dat[1]) - sbv = pickle.loads(pkl) - fpMap[dat[0]] = sbv - return fpMap + data = conn.GetData(table='signatures', fields='mol_name,fingerprint') + fpMap = {} + for dat in data: + pkl = str(dat[1]) + sbv = pickle.loads(pkl) + fpMap[dat[0]] = sbv + return fpMap def getNameAct(conn): - data = conn.GetData(table='raw_data', fields='mol_name,activity_class') - nameAct = {} - for dat in data: - nameAct[dat[0]] = dat[1] + data = conn.GetData(table='raw_data', fields='mol_name,activity_class') + nameAct = {} + for dat in data: + nameAct[dat[0]] = dat[1] - return nameAct + return nameAct def ReadCombiInfo(fileName): - infil = open(fileName, 'r') - lines = infil.readlines() - infil.close() - infos = [] - for lin in lines: - tlst = lin.strip().split() - info = float(tlst[1]) - infos.append(info) - return infos + infil = open(fileName, 'r') + lines = infil.readlines() + infil.close() + infos = [] + for lin in lines: + tlst = lin.strip().split() + info = float(tlst[1]) + infos.append(info) + return infos _testDatabase = os.path.join( @@ -57,93 +57,93 @@ _testDatabase = os.path.join( @unittest.skipIf(not os.path.isfile(_testDatabase), 'Test database FEW_CDK2.GDB missing') class TestCase(unittest.TestCase): - def test0Ranker(self): - nbits = 5000 - conn = DbConnect(_testDatabase) - fps = getFingerprints(conn) - nameAct = getNameAct(conn) - sl = len(list(fps.values())[0]) - rnkr = InfoTheory.InfoBitRanker(sl, 2, InfoTheory.InfoType.ENTROPY) + def test0Ranker(self): + nbits = 5000 + conn = DbConnect(_testDatabase) + fps = getFingerprints(conn) + nameAct = getNameAct(conn) + sl = len(list(fps.values())[0]) + rnkr = InfoTheory.InfoBitRanker(sl, 2, InfoTheory.InfoType.ENTROPY) - print("Collecting Votes ....") - for key in nameAct.keys(): - if nameAct[key] == 100: - rnkr.AccumulateVotes(fps[key], 0) - if nameAct[key] == 0: - rnkr.AccumulateVotes(fps[key], 1) + print("Collecting Votes ....") + for key in nameAct.keys(): + if nameAct[key] == 100: + rnkr.AccumulateVotes(fps[key], 0) + if nameAct[key] == 0: + rnkr.AccumulateVotes(fps[key], 1) - # now do the ranking - print("ranking bits ....") - topN = rnkr.GetTopN(nbits) + # now do the ranking + print("ranking bits ....") + topN = rnkr.GetTopN(nbits) - # get the combichem ranked list from a file - cfile = os.path.join('test_data', 'combiRank.out') - combiInfo = ReadCombiInfo(cfile) - # now check if the infocontents are the same as the combichem stuff - print("Comparing bit info contents ....") - for i in range(900): - assert feq(topN[i, 1], combiInfo[i]) + # get the combichem ranked list from a file + cfile = os.path.join('test_data', 'combiRank.out') + combiInfo = ReadCombiInfo(cfile) + # now check if the infocontents are the same as the combichem stuff + print("Comparing bit info contents ....") + for i in range(900): + assert feq(topN[i, 1], combiInfo[i]) - ofile = os.path.join('test_data', 'rdTopBits.txt') - rnkr.WriteTopBitsToFile(ofile) + ofile = os.path.join('test_data', 'rdTopBits.txt') + rnkr.WriteTopBitsToFile(ofile) - @unittest.skipIf(not os.path.isfile(_testDatabase), 'Test database FEW_CDK2.GDB missing') - def test1BiasRanker(self): - nbits = 5000 - conn = DbConnect(_testDatabase) - fps = getFingerprints(conn) - nameAct = getNameAct(conn) - sl = len(list(fps.values())[0]) - rnkr = InfoTheory.InfoBitRanker(sl, 2, InfoTheory.InfoType.BIASENTROPY) - rnkr.SetBiasList([0]) - print("Collecting Votes ....") - for key in nameAct.keys(): - if nameAct[key] == 100: - rnkr.AccumulateVotes(fps[key], 0) - if nameAct[key] == 0: - rnkr.AccumulateVotes(fps[key], 1) + @unittest.skipIf(not os.path.isfile(_testDatabase), 'Test database FEW_CDK2.GDB missing') + def test1BiasRanker(self): + nbits = 5000 + conn = DbConnect(_testDatabase) + fps = getFingerprints(conn) + nameAct = getNameAct(conn) + sl = len(list(fps.values())[0]) + rnkr = InfoTheory.InfoBitRanker(sl, 2, InfoTheory.InfoType.BIASENTROPY) + rnkr.SetBiasList([0]) + print("Collecting Votes ....") + for key in nameAct.keys(): + if nameAct[key] == 100: + rnkr.AccumulateVotes(fps[key], 0) + if nameAct[key] == 0: + rnkr.AccumulateVotes(fps[key], 1) - # now do the ranking - print("ranking bits ....") - topN = rnkr.GetTopN(nbits) + # now do the ranking + print("ranking bits ....") + topN = rnkr.GetTopN(nbits) - # get the combichem ranked list from a file - cfile = os.path.join('test_data', 'combiRank.out') - combiInfo = ReadCombiInfo(cfile) - # now check if the infocontents are the same as the combichem stuff - print("Comparing bit info contents ....") - for i in range(nbits): - assert feq(topN[i, 1], combiInfo[i]) + # get the combichem ranked list from a file + cfile = os.path.join('test_data', 'combiRank.out') + combiInfo = ReadCombiInfo(cfile) + # now check if the infocontents are the same as the combichem stuff + print("Comparing bit info contents ....") + for i in range(nbits): + assert feq(topN[i, 1], combiInfo[i]) - def test2ChiSquare(self): - nbits = 5000 - conn = DbConnect(_testDatabase) - fps = getFingerprints(conn) - nameAct = getNameAct(conn) - print(fps.values()) - sl = len(list(fps.values())[0]) - rnkr = InfoTheory.InfoBitRanker(sl, 2, InfoTheory.InfoType.BIASCHISQUARE) - rnkr.SetBiasList([0]) - print("Collecting Votes ....") - for key in nameAct.keys(): - if nameAct[key] == 100: - rnkr.AccumulateVotes(fps[key], 0) - if nameAct[key] == 0: - rnkr.AccumulateVotes(fps[key], 1) + def test2ChiSquare(self): + nbits = 5000 + conn = DbConnect(_testDatabase) + fps = getFingerprints(conn) + nameAct = getNameAct(conn) + print(fps.values()) + sl = len(list(fps.values())[0]) + rnkr = InfoTheory.InfoBitRanker(sl, 2, InfoTheory.InfoType.BIASCHISQUARE) + rnkr.SetBiasList([0]) + print("Collecting Votes ....") + for key in nameAct.keys(): + if nameAct[key] == 100: + rnkr.AccumulateVotes(fps[key], 0) + if nameAct[key] == 0: + rnkr.AccumulateVotes(fps[key], 1) - # now do the ranking - print("ranking bits ....") - topN = rnkr.GetTopN(nbits) + # now do the ranking + print("ranking bits ....") + topN = rnkr.GetTopN(nbits) - # get the combichem ranked list from a file - cfile = os.path.join('test_data', 'combiRankChi.out') - combiInfo = ReadCombiInfo(cfile) - # now check if the infocontents are the same as the combichem stuff - print("Comparing bit info contents ....") - for i in range(nbits): - assert feq(topN[i, 1], combiInfo[i]) - # rnkr.WriteTopBitsToFile("chiBitsBias.txt") + # get the combichem ranked list from a file + cfile = os.path.join('test_data', 'combiRankChi.out') + combiInfo = ReadCombiInfo(cfile) + # now check if the infocontents are the same as the combichem stuff + print("Comparing bit info contents ....") + for i in range(nbits): + assert feq(topN[i, 1], combiInfo[i]) + # rnkr.WriteTopBitsToFile("chiBitsBias.txt") if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/ML/InfoTheory/UnitTestCorrMatGen.py b/rdkit/ML/InfoTheory/UnitTestCorrMatGen.py index 250834636..0b478e724 100755 --- a/rdkit/ML/InfoTheory/UnitTestCorrMatGen.py +++ b/rdkit/ML/InfoTheory/UnitTestCorrMatGen.py @@ -1,133 +1,131 @@ -from __future__ import division, print_function - -import random -import unittest - -from rdkit import DataStructs -from rdkit.ML.Data import DataUtils from rdkit.ML.InfoTheory import rdInfoTheory +from rdkit.ML.Data import DataUtils +from rdkit import DataStructs +import unittest +import random + try: - from rdkit.ML.InfoTheory import BitClusterer + from rdkit.ML.InfoTheory import BitClusterer except ImportError: - BitClusterer = None + BitClusterer = None def getValLTM(i, j, mat): - if i > j: - id_ = i * (i - 1) // 2 + j - return mat[id_] - elif j > i: - id_ = j * (j - 1) // 2 + i - return mat[id_] - else: - return 0.0 + if i > j: + id_ = i * (i - 1) // 2 + j + return mat[id_] + elif j > i: + id_ = j * (j - 1) // 2 + i + return mat[id_] + else: + return 0.0 class TestCase(unittest.TestCase): - def setUp(self): - # here is what we are going to do to test this out - # - generate bit vectrs of length nbits - # - turn on a fraction of the first nbits/2 bits at random - # - for each bit i turned on in the range (0, nbits/2) turn on the bit - # nbits/2 + i - # - basically the first half of a fingerprint is same as the second half of the - # fingerprint - # - if we repeat this process often enough we whould see strong correlation between - # the bits i (i < nbits/2) and (nbits/2 + i) - DataUtils.InitRandomNumbers((100, 23)) - self.nbits = 200 - self.d = 40 - self.nfp = 1000 + def setUp(self): + # here is what we are going to do to test this out + # - generate bit vectrs of length nbits + # - turn on a fraction of the first nbits/2 bits at random + # - for each bit i turned on in the range (0, nbits/2) turn on the bit + # nbits/2 + i + # - basically the first half of a fingerprint is same as the second half of the + # fingerprint + # - if we repeat this process often enough we whould see strong correlation between + # the bits i (i < nbits/2) and (nbits/2 + i) + DataUtils.InitRandomNumbers((100, 23)) + self.nbits = 200 + self.d = 40 + self.nfp = 1000 - self.blist = list(range(self.nbits)) + self.blist = list(range(self.nbits)) - self.fps = [] - for _ in range(self.nfp): - fp = DataStructs.ExplicitBitVect(self.nbits) - obits = list(range(self.nbits // 2)) - random.shuffle(obits, random=random.random) - for bit in obits[0:self.d]: - fp.SetBit(bit) - fp.SetBit(bit + self.nbits // 2) - self.fps.append(fp) + self.fps = [] + for _ in range(self.nfp): + fp = DataStructs.ExplicitBitVect(self.nbits) + obits = list(range(self.nbits // 2)) + random.shuffle(obits, random=random.random) + for bit in obits[0:self.d]: + fp.SetBit(bit) + fp.SetBit(bit + self.nbits // 2) + self.fps.append(fp) - def test_getValLTM(self): - # - 1 2 4 - # 1 - 3 5 - # 2 3 - 6 - # 4 5 6 - - mat = list(range(1, 7, 1)) - for i in range(4): - self.assertEqual(getValLTM(i, i, mat), 0.0) - self.assertEqual(getValLTM(0, 1, mat), 1) - self.assertEqual(getValLTM(0, 2, mat), 2) - self.assertEqual(getValLTM(0, 3, mat), 4) - self.assertEqual(getValLTM(1, 0, mat), 1) - self.assertEqual(getValLTM(2, 0, mat), 2) - self.assertEqual(getValLTM(3, 0, mat), 4) - self.assertEqual(getValLTM(1, 2, mat), 3) - self.assertEqual(getValLTM(1, 3, mat), 5) - self.assertEqual(getValLTM(2, 1, mat), 3) - self.assertEqual(getValLTM(3, 1, mat), 5) - self.assertEqual(getValLTM(2, 3, mat), 6) - self.assertEqual(getValLTM(3, 2, mat), 6) + def test_getValLTM(self): + # - 1 2 4 + # 1 - 3 5 + # 2 3 - 6 + # 4 5 6 - + mat = list(range(1, 7, 1)) + for i in range(4): + self.assertEqual(getValLTM(i, i, mat), 0.0) + self.assertEqual(getValLTM(0, 1, mat), 1) + self.assertEqual(getValLTM(0, 2, mat), 2) + self.assertEqual(getValLTM(0, 3, mat), 4) + self.assertEqual(getValLTM(1, 0, mat), 1) + self.assertEqual(getValLTM(2, 0, mat), 2) + self.assertEqual(getValLTM(3, 0, mat), 4) + self.assertEqual(getValLTM(1, 2, mat), 3) + self.assertEqual(getValLTM(1, 3, mat), 5) + self.assertEqual(getValLTM(2, 1, mat), 3) + self.assertEqual(getValLTM(3, 1, mat), 5) + self.assertEqual(getValLTM(2, 3, mat), 6) + self.assertEqual(getValLTM(3, 2, mat), 6) - def test0CorrMat(self): - cmg = rdInfoTheory.BitCorrMatGenerator() - cmg.SetBitList(self.blist) - for fp in self.fps: - cmg.CollectVotes(fp) + def test0CorrMat(self): + cmg = rdInfoTheory.BitCorrMatGenerator() + cmg.SetBitList(self.blist) + for fp in self.fps: + cmg.CollectVotes(fp) - corrMat = cmg.GetCorrMatrix() + corrMat = cmg.GetCorrMatrix() - avr = 0.0 - navr = 0.0 - for i in range(self.nbits // 2): - avr += getValLTM(i, i + self.nbits // 2, corrMat) - navr += getValLTM(i, i + 1, corrMat) + avr = 0.0 + navr = 0.0 + for i in range(self.nbits // 2): + avr += getValLTM(i, i + self.nbits // 2, corrMat) + navr += getValLTM(i, i + 1, corrMat) - self.assertEqual(2 * avr / self.nbits, 400.0) - self.assertEqual(2 * navr / self.nbits, 158.3) + self.assertEqual(2 * avr / self.nbits, 400.0) + self.assertEqual(2 * navr / self.nbits, 158.3) - @unittest.skipIf(BitClusterer is None, 'Cannot import BitClusterer') - def test1Cluster(self): - cmg = rdInfoTheory.BitCorrMatGenerator() - cmg.SetBitList(self.blist) - for fp in self.fps: - cmg.CollectVotes(fp) + @unittest.skipIf(BitClusterer is None, 'Cannot import BitClusterer') + def test1Cluster(self): + cmg = rdInfoTheory.BitCorrMatGenerator() + cmg.SetBitList(self.blist) + for fp in self.fps: + cmg.CollectVotes(fp) - corrMat = cmg.GetCorrMatrix() + corrMat = cmg.GetCorrMatrix() - bcl = BitClusterer.BitClusterer(self.blist, self.nbits // 2) - bcl.ClusterBits(corrMat) - cls = bcl.GetClusters() - for cl in cls: - self.assertEqual(len(cl), 2) - self.assertEqual((cl[0] + self.nbits // 2), cl[1]) - bcl.SetClusters(cls) - self.assertRaises(AssertionError, bcl.SetClusters, cls[:-1]) + bcl = BitClusterer.BitClusterer(self.blist, self.nbits // 2) + bcl.ClusterBits(corrMat) + cls = bcl.GetClusters() + for cl in cls: + self.assertEqual(len(cl), 2) + self.assertEqual((cl[0] + self.nbits // 2), cl[1]) + bcl.SetClusters(cls) + self.assertRaises(AssertionError, bcl.SetClusters, cls[:-1]) - tfp = DataStructs.ExplicitBitVect(self.nbits) - obits = list(range(0, self.nbits // 4)) + list(range(self.nbits // 2, 3 * self.nbits // 4)) - tfp.SetBitsFromList(obits) - rvc = bcl.MapToClusterScores(tfp) - self.assertEqual(len(rvc), self.nbits // 2) - for i in range(self.nbits // 2): - if i < self.nbits // 4: - self.assertEqual(rvc[i], 2) - else: - self.assertEqual(rvc[i], 0) + tfp = DataStructs.ExplicitBitVect(self.nbits) + obits = list(range(0, self.nbits // 4)) + list(range(self.nbits // 2, 3 * self.nbits // 4)) + tfp.SetBitsFromList(obits) + rvc = bcl.MapToClusterScores(tfp) + self.assertEqual(len(rvc), self.nbits // 2) + for i in range(self.nbits // 2): + if i < self.nbits // 4: + self.assertEqual(rvc[i], 2) + else: + self.assertEqual(rvc[i], 0) - nfp = bcl.MapToClusterFP(tfp) - self.assertEqual(len(nfp), self.nbits // 2) - for i in range(self.nbits // 2): - if i < self.nbits // 4: - self.assertTrue(nfp[i]) - else: - self.assertFalse(nfp[i]) + nfp = bcl.MapToClusterFP(tfp) + self.assertEqual(len(nfp), self.nbits // 2) + for i in range(self.nbits // 2): + if i < self.nbits // 4: + self.assertTrue(nfp[i]) + else: + self.assertFalse(nfp[i]) if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/ML/KNN/CrossValidate.py b/rdkit/ML/KNN/CrossValidate.py index 7ba4784a0..75cfa9114 100755 --- a/rdkit/ML/KNN/CrossValidate.py +++ b/rdkit/ML/KNN/CrossValidate.py @@ -6,7 +6,7 @@ and evaluation of individual models """ -from __future__ import print_function + from rdkit.ML.Data import SplitData from rdkit.ML.KNN import DistFunctions diff --git a/rdkit/ML/MLUtils/VoteImg.py b/rdkit/ML/MLUtils/VoteImg.py index 89bd90095..27abd4f4e 100755 --- a/rdkit/ML/MLUtils/VoteImg.py +++ b/rdkit/ML/MLUtils/VoteImg.py @@ -7,7 +7,7 @@ voting on a data set Uses *Numeric* and *PIL* """ -from __future__ import print_function + from PIL import Image, ImageDraw import numpy @@ -178,7 +178,7 @@ def Usage(): if __name__ == '__main__': import sys import getopt - from rdkit.six.moves import cPickle + import pickle from rdkit.ML.Data import DataUtils args, extra = getopt.getopt(sys.argv[1:], 'o:bthx:y:d:') @@ -208,7 +208,7 @@ if __name__ == '__main__': else: Usage() modelFile = open(extra[0], 'rb') - model = cPickle.load(modelFile) + model = pickle.load(modelFile) fName = extra[1] if dbName == '': diff --git a/rdkit/ML/MatOps.py b/rdkit/ML/MatOps.py index 9708ff198..d6c728efe 100755 --- a/rdkit/ML/MatOps.py +++ b/rdkit/ML/MatOps.py @@ -15,7 +15,7 @@ """ -from __future__ import print_function + import sys diff --git a/rdkit/ML/ModelPackage/PackageUtils.py b/rdkit/ML/ModelPackage/PackageUtils.py index 8517a54d3..c5e0d1fe0 100644 --- a/rdkit/ML/ModelPackage/PackageUtils.py +++ b/rdkit/ML/ModelPackage/PackageUtils.py @@ -2,142 +2,142 @@ # Copyright (C) 2003 Rational Discovery LLC # All rights are reserved. # -from __future__ import print_function + # from elementtree.ElementTree import ElementTree, Element, SubElement import time from xml.etree.ElementTree import ElementTree, Element, SubElement def _ConvertModelPerformance(perf, modelPerf): - if len(modelPerf) > 3: - confMat = modelPerf[3] - accum = 0 - for row in confMat: - for entry in row: - accum += entry - accum = str(accum) - else: - confMat = None - accum = 'N/A' + if len(modelPerf) > 3: + confMat = modelPerf[3] + accum = 0 + for row in confMat: + for entry in row: + accum += entry + accum = str(accum) + else: + confMat = None + accum = 'N/A' - if len(modelPerf) > 4: - elem = SubElement(perf, "ScreenThreshold") - elem.text = str(modelPerf[4]) - elem = SubElement(perf, "NumScreened") - elem.text = accum - if len(modelPerf) > 4: - elem = SubElement(perf, "NumSkipped") - elem.text = str(modelPerf[6]) - elem = SubElement(perf, "Accuracy") - elem.text = str(modelPerf[0]) - elem = SubElement(perf, "AvgCorrectConf") - elem.text = str(modelPerf[1]) - elem = SubElement(perf, "AvgIncorrectConf") - elem.text = str(modelPerf[2]) - if len(modelPerf) > 4: - elem = SubElement(perf, "AvgSkipConf") - elem.text = str(modelPerf[5]) - if confMat: - elem = SubElement(perf, "ConfusionMatrix") - elem.text = str(confMat) + if len(modelPerf) > 4: + elem = SubElement(perf, "ScreenThreshold") + elem.text = str(modelPerf[4]) + elem = SubElement(perf, "NumScreened") + elem.text = accum + if len(modelPerf) > 4: + elem = SubElement(perf, "NumSkipped") + elem.text = str(modelPerf[6]) + elem = SubElement(perf, "Accuracy") + elem.text = str(modelPerf[0]) + elem = SubElement(perf, "AvgCorrectConf") + elem.text = str(modelPerf[1]) + elem = SubElement(perf, "AvgIncorrectConf") + elem.text = str(modelPerf[2]) + if len(modelPerf) > 4: + elem = SubElement(perf, "AvgSkipConf") + elem.text = str(modelPerf[5]) + if confMat: + elem = SubElement(perf, "ConfusionMatrix") + elem.text = str(confMat) def PackageToXml(pkg, summary="N/A", trainingDataId='N/A', dataPerformance=[], recommendedThreshold=None, classDescriptions=None, modelType=None, modelOrganism=None): - """ generates XML for a package that follows the RD_Model.dtd + """ generates XML for a package that follows the RD_Model.dtd - If provided, dataPerformance should be a sequence of 2-tuples: - ( note, performance ) - where performance is of the form: - ( accuracy, avgCorrectConf, avgIncorrectConf, confusionMatrix, thresh, avgSkipConf, nSkipped ) - the last four elements are optional + If provided, dataPerformance should be a sequence of 2-tuples: + ( note, performance ) + where performance is of the form: + ( accuracy, avgCorrectConf, avgIncorrectConf, confusionMatrix, thresh, avgSkipConf, nSkipped ) + the last four elements are optional - """ - head = Element("RDModelInfo") - name = SubElement(head, "ModelName") - notes = pkg.GetNotes() - if not notes: - notes = "Unnamed model" - name.text = notes - summ = SubElement(head, "ModelSummary") - summ.text = summary - calc = pkg.GetCalculator() - descrs = SubElement(head, "ModelDescriptors") - for name, summary, func in zip(calc.GetDescriptorNames(), calc.GetDescriptorSummaries(), - calc.GetDescriptorFuncs()): - descr = SubElement(descrs, "Descriptor") - elem = SubElement(descr, "DescriptorName") - elem.text = name - elem = SubElement(descr, "DescriptorDetail") - elem.text = summary - if hasattr(func, 'version'): - vers = SubElement(descr, "DescriptorVersion") - major, minor, patch = func.version.split('.') - elem = SubElement(vers, "VersionMajor") - elem.text = major - elem = SubElement(vers, "VersionMinor") - elem.text = minor - elem = SubElement(vers, "VersionPatch") - elem.text = patch + """ + head = Element("RDModelInfo") + name = SubElement(head, "ModelName") + notes = pkg.GetNotes() + if not notes: + notes = "Unnamed model" + name.text = notes + summ = SubElement(head, "ModelSummary") + summ.text = summary + calc = pkg.GetCalculator() + descrs = SubElement(head, "ModelDescriptors") + for name, summary, func in zip(calc.GetDescriptorNames(), calc.GetDescriptorSummaries(), + calc.GetDescriptorFuncs()): + descr = SubElement(descrs, "Descriptor") + elem = SubElement(descr, "DescriptorName") + elem.text = name + elem = SubElement(descr, "DescriptorDetail") + elem.text = summary + if hasattr(func, 'version'): + vers = SubElement(descr, "DescriptorVersion") + major, minor, patch = func.version.split('.') + elem = SubElement(vers, "VersionMajor") + elem.text = major + elem = SubElement(vers, "VersionMinor") + elem.text = minor + elem = SubElement(vers, "VersionPatch") + elem.text = patch - elem = SubElement(head, "TrainingDataId") - elem.text = trainingDataId + elem = SubElement(head, "TrainingDataId") + elem.text = trainingDataId - for description, perfData in dataPerformance: - dataNode = SubElement(head, "ValidationData") - note = SubElement(dataNode, 'ScreenNote') - note.text = description - perf = SubElement(dataNode, "PerformanceData") - _ConvertModelPerformance(perf, perfData) + for description, perfData in dataPerformance: + dataNode = SubElement(head, "ValidationData") + note = SubElement(dataNode, 'ScreenNote') + note.text = description + perf = SubElement(dataNode, "PerformanceData") + _ConvertModelPerformance(perf, perfData) - if recommendedThreshold: - elem = SubElement(head, "RecommendedThreshold") - elem.text = str(recommendedThreshold) + if recommendedThreshold: + elem = SubElement(head, "RecommendedThreshold") + elem.text = str(recommendedThreshold) - if classDescriptions: - elem = SubElement(head, "ClassDescriptions") - for val, text in classDescriptions: - descr = SubElement(elem, 'ClassDescription') - valElem = SubElement(descr, 'ClassVal') - valElem.text = str(val) - valText = SubElement(descr, 'ClassText') - valText.text = str(text) + if classDescriptions: + elem = SubElement(head, "ClassDescriptions") + for val, text in classDescriptions: + descr = SubElement(elem, 'ClassDescription') + valElem = SubElement(descr, 'ClassVal') + valElem.text = str(val) + valText = SubElement(descr, 'ClassText') + valText.text = str(text) - if modelType: - elem = SubElement(head, "ModelType") - elem.text = modelType - if modelOrganism: - elem = SubElement(head, "ModelOrganism") - elem.text = modelOrganism + if modelType: + elem = SubElement(head, "ModelType") + elem.text = modelType + if modelOrganism: + elem = SubElement(head, "ModelOrganism") + elem.text = modelOrganism - hist = SubElement(head, "ModelHistory") - revision = SubElement(hist, "Revision") - tm = time.localtime() - date = SubElement(revision, "RevisionDate") - elem = SubElement(date, "Year") - elem.text = str(tm[0]) - elem = SubElement(date, "Month") - elem.text = str(tm[1]) - elem = SubElement(date, "Day") - elem.text = str(tm[2]) - note = SubElement(revision, "RevisionNote") - note.text = "Created" - return ElementTree(head) + hist = SubElement(head, "ModelHistory") + revision = SubElement(hist, "Revision") + tm = time.localtime() + date = SubElement(revision, "RevisionDate") + elem = SubElement(date, "Year") + elem.text = str(tm[0]) + elem = SubElement(date, "Month") + elem.text = str(tm[1]) + elem = SubElement(date, "Day") + elem.text = str(tm[2]) + note = SubElement(revision, "RevisionNote") + note.text = "Created" + return ElementTree(head) if __name__ == '__main__': # pragma: nocover - import sys - from rdkit.six.moves import cPickle - from rdkit.six import StringIO - pkg = cPickle.load(open(sys.argv[1], 'rb')) - perf = (.80, .95, .70, [[4, 1], [1, 4]]) - tree = PackageToXml(pkg, dataPerformance=[('training data performance', perf)]) - io = StringIO() - tree.write(io) - txt = io.getvalue() - header = """ + import sys + import pickle + from io import StringIO + pkg = pickle.load(open(sys.argv[1], 'rb')) + perf = (.80, .95, .70, [[4, 1], [1, 4]]) + tree = PackageToXml(pkg, dataPerformance=[('training data performance', perf)]) + io = StringIO() + tree.write(io) + txt = io.getvalue() + header = """ """ - print(header) - print(txt.replace('><', '>\n<')) + print(header) + print(txt.replace('><', '>\n<')) diff --git a/rdkit/ML/ModelPackage/UnitTestPackage.py b/rdkit/ML/ModelPackage/UnitTestPackage.py index f19a2f103..1abc0cd17 100644 --- a/rdkit/ML/ModelPackage/UnitTestPackage.py +++ b/rdkit/ML/ModelPackage/UnitTestPackage.py @@ -16,162 +16,162 @@ from rdkit.ML.Data import DataUtils from rdkit.ML.Descriptors.MoleculeDescriptors import MolecularDescriptorCalculator from rdkit.ML.ModelPackage import Packager, PackageUtils from rdkit.ML.ModelPackage.Packager import ModelPackage -from rdkit.six import BytesIO -from rdkit.six.moves import cPickle +from io import BytesIO +import pickle def feq(a, b, tol=1e-4): - return abs(a - b) <= tol + return abs(a - b) <= tol class TestCase(unittest.TestCase): - def setUp(self): - self.dataDir = os.path.join(RDConfig.RDCodeDir, 'ML/ModelPackage/test_data') - self.testD = [ - # NOTE: the confidences here can be twitchy due to changes in descriptors: - ('Fc1ccc(NC(=O)c2cccnc2Oc3cccc(c3)C(F)(F)F)c(F)c1', 0, 0.8), - # (r'CN/1(=C\C=C(/C=C1)\C\2=C\C=N(C)(Cl)\C=C2)Cl',0,0.70), - (r'NS(=O)(=O)c1cc(ccc1Cl)C2(O)NC(=O)c3ccccc32', 1, 0.70), - ] + def setUp(self): + self.dataDir = os.path.join(RDConfig.RDCodeDir, 'ML/ModelPackage/test_data') + self.testD = [ + # NOTE: the confidences here can be twitchy due to changes in descriptors: + ('Fc1ccc(NC(=O)c2cccnc2Oc3cccc(c3)C(F)(F)F)c(F)c1', 0, 0.8), + # (r'CN/1(=C\C=C(/C=C1)\C\2=C\C=N(C)(Cl)\C=C2)Cl',0,0.70), + (r'NS(=O)(=O)c1cc(ccc1Cl)C2(O)NC(=O)c3ccccc32', 1, 0.70), + ] - def _loadPackage(self): - with open(os.path.join(self.dataDir, 'Jan9_build3_pkg.pkl'), 'r') as pkgTF: - buf = pkgTF.read().replace('\r\n', '\n').encode('utf-8') - pkgTF.close() - io = BytesIO(buf) - pkg = cPickle.load(io) - return pkg + def _loadPackage(self): + with open(os.path.join(self.dataDir, 'Jan9_build3_pkg.pkl'), 'r') as pkgTF: + buf = pkgTF.read().replace('\r\n', '\n').encode('utf-8') + pkgTF.close() + io = BytesIO(buf) + pkg = pickle.load(io) + return pkg - def _verify(self, pkg, testD): - for smi, pred, conf in testD: - m = Chem.MolFromSmiles(smi) - self.assertTrue(m is not None, 'SMILES: %s failed\n' % (smi)) - p, c = pkg.Classify(m) - assert p == pred, 'bad prediction (%d) for smiles %s' % (p, smi) - assert feq(c, conf), 'bad confidence (%f) for smiles %s' % (c, smi) + def _verify(self, pkg, testD): + for smi, pred, conf in testD: + m = Chem.MolFromSmiles(smi) + self.assertTrue(m is not None, 'SMILES: %s failed\n' % (smi)) + p, c = pkg.Classify(m) + assert p == pred, 'bad prediction (%d) for smiles %s' % (p, smi) + assert feq(c, conf), 'bad confidence (%f) for smiles %s' % (c, smi) - def _verify2(self, pkg, testD): - for smi, pred, conf in testD: - m = Chem.MolFromSmiles(smi) - self.assertTrue(m is not None, 'SMILES: %s failed\n' % (smi)) - p, c = pkg.Classify(m) - assert p == pred, 'bad prediction (%d) for smiles %s' % (p, smi) - assert feq(c, conf), 'bad confidence (%f) for smiles %s' % (c, smi) - p, c = pkg.Classify(m) - assert p == pred, 'bad prediction (%d) for smiles %s' % (p, smi) - assert feq(c, conf), 'bad confidence (%f) for smiles %s' % (c, smi) + def _verify2(self, pkg, testD): + for smi, pred, conf in testD: + m = Chem.MolFromSmiles(smi) + self.assertTrue(m is not None, 'SMILES: %s failed\n' % (smi)) + p, c = pkg.Classify(m) + assert p == pred, 'bad prediction (%d) for smiles %s' % (p, smi) + assert feq(c, conf), 'bad confidence (%f) for smiles %s' % (c, smi) + p, c = pkg.Classify(m) + assert p == pred, 'bad prediction (%d) for smiles %s' % (p, smi) + assert feq(c, conf), 'bad confidence (%f) for smiles %s' % (c, smi) - def testBuild(self): - # """ tests building and screening a packager """ - with open(os.path.join(self.dataDir, 'Jan9_build3_calc.dsc'), 'r') as calcTF: - buf = calcTF.read().replace('\r\n', '\n').encode('utf-8') - calcTF.close() - calc = cPickle.load(BytesIO(buf)) - with open(os.path.join(self.dataDir, 'Jan9_build3_model.pkl'), 'rb') as modelF: - model = cPickle.load(modelF) - pkg = Packager.ModelPackage(descCalc=calc, model=model) - self._verify(pkg, self.testD) + def testBuild(self): + # """ tests building and screening a packager """ + with open(os.path.join(self.dataDir, 'Jan9_build3_calc.dsc'), 'r') as calcTF: + buf = calcTF.read().replace('\r\n', '\n').encode('utf-8') + calcTF.close() + calc = pickle.load(BytesIO(buf)) + with open(os.path.join(self.dataDir, 'Jan9_build3_model.pkl'), 'rb') as modelF: + model = pickle.load(modelF) + pkg = Packager.ModelPackage(descCalc=calc, model=model) + self._verify(pkg, self.testD) - def testLoad(self): - # """ tests loading and screening a packager """ - pkg = self._loadPackage() - self._verify(pkg, self.testD) + def testLoad(self): + # """ tests loading and screening a packager """ + pkg = self._loadPackage() + self._verify(pkg, self.testD) - def testLoad2(self): - # """ tests loading and screening a packager 2 """ - pkg = self._loadPackage() - self._verify2(pkg, self.testD) + def testLoad2(self): + # """ tests loading and screening a packager 2 """ + pkg = self._loadPackage() + self._verify2(pkg, self.testD) - def testPerm1(self): - # """ tests the descriptor remapping stuff in a packager """ - pkg = self._loadPackage() - calc = pkg.GetCalculator() - names = calc.GetDescriptorNames() - ref = {} - DataUtils.InitRandomNumbers((23, 42)) - for smi, _, _ in self.testD: - for desc in names: - fn = getattr(Descriptors, desc, lambda x: 777) - m = Chem.MolFromSmiles(smi) - ref[desc] = fn(m) + def testPerm1(self): + # """ tests the descriptor remapping stuff in a packager """ + pkg = self._loadPackage() + calc = pkg.GetCalculator() + names = calc.GetDescriptorNames() + ref = {} + DataUtils.InitRandomNumbers((23, 42)) + for smi, _, _ in self.testD: + for desc in names: + fn = getattr(Descriptors, desc, lambda x: 777) + m = Chem.MolFromSmiles(smi) + ref[desc] = fn(m) - for _ in range(5): + for _ in range(5): + perm = list(names) + random.shuffle(perm, random=random.random) + + m = Chem.MolFromSmiles(smi) + for desc in perm: + fn = getattr(Descriptors, desc, lambda x: 777) + val = fn(m) + assert feq(val, ref[desc], 1e-4), '%s: %s(%s): %f!=%f' % (str(perm), smi, desc, val, + ref[desc]) + + def testPerm2(self): + # """ tests the descriptor remapping stuff in a packager """ + pkg = self._loadPackage() + calc = pkg.GetCalculator() + names = calc.GetDescriptorNames() + DataUtils.InitRandomNumbers((23, 42)) perm = list(names) random.shuffle(perm, random=random.random) + calc.simpleList = perm + calc.descriptorNames = perm + pkg.Init() + self._verify(pkg, self.testD) - m = Chem.MolFromSmiles(smi) - for desc in perm: - fn = getattr(Descriptors, desc, lambda x: 777) - val = fn(m) - assert feq(val, ref[desc], 1e-4), '%s: %s(%s): %f!=%f' % (str(perm), smi, desc, val, - ref[desc]) + def test_ModelPackage(self): + pkg = self._loadPackage() - def testPerm2(self): - # """ tests the descriptor remapping stuff in a packager """ - pkg = self._loadPackage() - calc = pkg.GetCalculator() - names = calc.GetDescriptorNames() - DataUtils.InitRandomNumbers((23, 42)) - perm = list(names) - random.shuffle(perm, random=random.random) - calc.simpleList = perm - calc.descriptorNames = perm - pkg.Init() - self._verify(pkg, self.testD) + self.assertTrue(isinstance(pkg.GetCalculator(), MolecularDescriptorCalculator)) + pkg.SetCalculator('calculator') + self.assertEqual(pkg.GetCalculator(), 'calculator') - def test_ModelPackage(self): - pkg = self._loadPackage() + self.assertTrue(isinstance(pkg.GetModel(), Composite.Composite)) + pkg.SetModel('model') + self.assertEqual(pkg.GetModel(), 'model') - self.assertTrue(isinstance(pkg.GetCalculator(), MolecularDescriptorCalculator)) - pkg.SetCalculator('calculator') - self.assertEqual(pkg.GetCalculator(), 'calculator') + self.assertEqual(pkg.GetDataset(), None) + pkg.SetDataset('dataset') + self.assertEqual(pkg.GetDataset(), 'dataset') - self.assertTrue(isinstance(pkg.GetModel(), Composite.Composite)) - pkg.SetModel('model') - self.assertEqual(pkg.GetModel(), 'model') + self.assertEqual(pkg.GetNotes(), 'General purpose model built from PhysProp data') + pkg.SetNotes('notes') + self.assertEqual(pkg.GetNotes(), 'notes') - self.assertEqual(pkg.GetDataset(), None) - pkg.SetDataset('dataset') - self.assertEqual(pkg.GetDataset(), 'dataset') + # Here seems to be a difference between Python 2 and 3. The next assert works in Python 3, + # but fails in Python 2 + # self.assertFalse(hasattr(pkg, '_supplementalData')) + self.assertEqual(pkg.GetSupplementalData(), []) + self.assertTrue(hasattr(pkg, '_supplementalData')) - self.assertEqual(pkg.GetNotes(), 'General purpose model built from PhysProp data') - pkg.SetNotes('notes') - self.assertEqual(pkg.GetNotes(), 'notes') + delattr(pkg, '_supplementalData') + pkg.AddSupplementalData('supp1') + self.assertTrue(hasattr(pkg, '_supplementalData')) + self.assertEqual(pkg.GetSupplementalData(), ['supp1']) + pkg.AddSupplementalData('supp2') + self.assertEqual(pkg.GetSupplementalData(), ['supp1', 'supp2']) - # Here seems to be a difference between Python 2 and 3. The next assert works in Python 3, - # but fails in Python 2 - # self.assertFalse(hasattr(pkg, '_supplementalData')) - self.assertEqual(pkg.GetSupplementalData(), []) - self.assertTrue(hasattr(pkg, '_supplementalData')) + pkg = ModelPackage() + self.assertFalse(pkg._initialized) + pkg.Init() + self.assertFalse(pkg._initialized) - delattr(pkg, '_supplementalData') - pkg.AddSupplementalData('supp1') - self.assertTrue(hasattr(pkg, '_supplementalData')) - self.assertEqual(pkg.GetSupplementalData(), ['supp1']) - pkg.AddSupplementalData('supp2') - self.assertEqual(pkg.GetSupplementalData(), ['supp1', 'supp2']) - - pkg = ModelPackage() - self.assertFalse(pkg._initialized) - pkg.Init() - self.assertFalse(pkg._initialized) - - def test_PackageUtils(self): - pkg = self._loadPackage() - xml = PackageUtils.PackageToXml( - pkg, dataPerformance=[('label', ['accuracy', 'avgCorrect', 'avgIncorrect']), ], - recommendedThreshold=0.2, classDescriptions=[('a', 'texta'), ('b', 'textb')], - modelType='model type', modelOrganism='model organism') - s = prettyXML(xml.getroot()) - self.assertIn('', s) + def test_PackageUtils(self): + pkg = self._loadPackage() + xml = PackageUtils.PackageToXml( + pkg, dataPerformance=[('label', ['accuracy', 'avgCorrect', 'avgIncorrect']), ], + recommendedThreshold=0.2, classDescriptions=[('a', 'texta'), ('b', 'textb')], + modelType='model type', modelOrganism='model organism') + s = prettyXML(xml.getroot()) + self.assertIn('', s) def prettyXML(xml): - s = ET.tostring(xml, encoding='utf-8') - tree = minidom.parseString(s) - return tree.toprettyxml(indent=' ') + s = ET.tostring(xml, encoding='utf-8') + tree = minidom.parseString(s) + return tree.toprettyxml(indent=' ') if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/ML/NaiveBayes/ClassificationModel.py b/rdkit/ML/NaiveBayes/ClassificationModel.py index 1ea556bd5..5ceea2aba 100644 --- a/rdkit/ML/NaiveBayes/ClassificationModel.py +++ b/rdkit/ML/NaiveBayes/ClassificationModel.py @@ -9,15 +9,14 @@ """ import numpy from rdkit.ML.Data import Quantize -from rdkit.six import iteritems def _getBinId(val, qBounds): - bid = 0 - for bnd in qBounds: - if (val > bnd): - bid += 1 - return bid + bid = 0 + for bnd in qBounds: + if (val > bnd): + bid += 1 + return bid # FIX: this class has not been updated to new-style classes @@ -25,255 +24,257 @@ def _getBinId(val, qBounds): # data. Until a solution is found for this breakage, an update is # impossible. class NaiveBayesClassifier: - """ - _NaiveBayesClassifier_s can save the following pieces of internal state, accessible via - standard setter/getter functions: + """ + _NaiveBayesClassifier_s can save the following pieces of internal state, accessible via + standard setter/getter functions: - 1) _Examples_: a list of examples which have been predicted + 1) _Examples_: a list of examples which have been predicted - 2) _TrainingExamples_: List of training examples - the descriptor value of these examples - are quantized based on info gain using ML/Data/Quantize.py if necessary + 2) _TrainingExamples_: List of training examples - the descriptor value of these examples + are quantized based on info gain using ML/Data/Quantize.py if necessary - 3) _TestExamples_: the list of examples used to test the model + 3) _TestExamples_: the list of examples used to test the model - 4) _BadExamples_ : list of examples that were incorrectly classified + 4) _BadExamples_ : list of examples that were incorrectly classified - 4) _QBoundVals_: Quant bound values for each varaible - a list of lists + 4) _QBoundVals_: Quant bound values for each varaible - a list of lists - 5) _QBounds_ : Number of bounds for each variable - - """ - - def __init__(self, attrs, nPossibleVals, nQuantBounds, mEstimateVal=-1.0, useSigs=False): - """ Constructor + 5) _QBounds_ : Number of bounds for each variable """ - self._attrs = attrs - self._mEstimateVal = mEstimateVal - self._useSigs = useSigs - self._classProbs = {} + def __init__(self, attrs, nPossibleVals, nQuantBounds, mEstimateVal=-1.0, useSigs=False): + """ Constructor - self._examples = [] - self._trainingExamples = [] - self._testExamples = [] - self._badExamples = [] - self._QBoundVals = {} - self._nClasses = nPossibleVals[-1] - self._qBounds = nQuantBounds - self._nPosVals = nPossibleVals - self._needsQuant = 1 + """ + self._attrs = attrs + self._mEstimateVal = mEstimateVal + self._useSigs = useSigs - self._name = "" - self.mprob = -1.0 + self._classProbs = {} - # for the sake a of efficiency lets try to change the conditional probabities - # to a numpy array instead of a dictionary. The three dimension array is indexed - # on the the activity class, the discriptor ID and the descriptor binID - # self._condProbs = {} - # self._condProbs = numpy.zeros((self._nClasses, max(self._attrs)+1, - # max(self._nPosVals)+1), 'd') - self._condProbs = [None] * self._nClasses - for i in range(self._nClasses): - if not (hasattr(self, '_useSigs') and self._useSigs): - nA = max(self._attrs) + 1 - self._condProbs[i] = [None] * nA - for j in range(nA): - nV = self._nPosVals[j] - if self._qBounds[j]: - nV = max(nV, self._qBounds[j] + 1) - self._condProbs[i][j] = [0.0] * nV - else: - self._condProbs[i] = {} - for idx in self._attrs: - self._condProbs[i][idx] = [0.0] * 2 + self._examples = [] + self._trainingExamples = [] + self._testExamples = [] + self._badExamples = [] + self._QBoundVals = {} + self._nClasses = nPossibleVals[-1] + self._qBounds = nQuantBounds + self._nPosVals = nPossibleVals + self._needsQuant = 1 - def GetName(self): - return self._name + self._name = "" + self.mprob = -1.0 - def SetName(self, name): - self._name = name - - def NameModel(self, varNames): - self.SetName('NaiveBayesClassifier') - - def GetExamples(self): - return self._examples - - def SetExamples(self, examples): - self._examples = examples - - def GetTrainingExamples(self): - return self._trainingExamples - - def SetTrainingExamples(self, examples): - self._trainingExamples = examples - - def GetTestExamples(self): - return self._testExamples - - def SetTestExamples(self, examples): - self._testExamples = examples - - def SetBadExamples(self, examples): - self._badExamples = examples - - def GetBadExamples(self): - return self._badExamples - - def _computeQuantBounds(self): - neg = len(self._trainingExamples) - natr = len(self._attrs) - - # make a list of results and values - allVals = numpy.zeros((neg, natr), 'd') - res = [] # list of y values - i = 0 - for eg in self._trainingExamples: - res.append(eg[-1]) - j = 0 - for ai in self._attrs: - val = eg[ai] - allVals[i, j] = val - j += 1 - i += 1 - - # now loop over each of the columns and compute the bounds - # the number of bounds is determined by the maximum info gain - i = 0 - for ai in self._attrs: - nbnds = self._qBounds[ai] - if nbnds > 0: - mbnds = [] - mgain = -1.0 - - for j in range(1, nbnds + 1): - bnds, igain = Quantize.FindVarMultQuantBounds(allVals[:, i], j, res, self._nClasses) - if (igain > mgain): - mbnds = bnds - mgain = igain - self._QBoundVals[ai] = mbnds - i += 1 - - def trainModel(self): - """ We will assume at this point that the training examples have been set - - We have to estmate the conditional probabilities for each of the (binned) descriptor - component give a outcome (or class). Also the probabilities for each class is estimated - """ - # first estimate the class probabilities - n = len(self._trainingExamples) - for i in range(self._nClasses): - self._classProbs[i] = 0.0 - - # for i in range(self._nClasses): - # self._classProbs[i] = float(self._classProbs[i])/n - - # first find the bounds for each descriptor value if necessary - if not self._useSigs and max(self._qBounds) > 0: - self._computeQuantBounds() - - # now compute the probabilities - ncls = {} - - incr = 1.0 / n - for eg in self._trainingExamples: - cls = eg[-1] - self._classProbs[cls] += incr - ncls[cls] = ncls.get(cls, 0) + 1 - tmp = self._condProbs[cls] - if not self._useSigs: - for ai in self._attrs: - bid = eg[ai] - if self._qBounds[ai] > 0: - bid = _getBinId(bid, self._QBoundVals[ai]) - tmp[ai][bid] += 1.0 - else: - for ai in self._attrs: - if eg[1].GetBit(ai): - tmp[ai][1] += 1.0 - else: - tmp[ai][0] += 1.0 - - # for key in self._condProbs: - for cls in range(self._nClasses): - if cls not in ncls: - continue - # cls = key[0] - tmp = self._condProbs[cls] - for ai in self._attrs: - if not self._useSigs: - nbnds = self._nPosVals[ai] - if (self._qBounds[ai] > 0): - nbnds = self._qBounds[ai] - else: - nbnds = 2 - for bid in range(nbnds): - if self._mEstimateVal <= 0.0: - # this is simple the fraction of of time this descriptor component assume - # this value for the examples that belong a specific class - # self._condProbs[key] = (float(self._condProbs[key]))/ncls[cls] - tmp[ai][bid] /= ncls[cls] - else: - # this a bit more complicated form - more appropriate for unbalanced data - # see "Machine Learning" by Tom Mitchell section 6.9.1.1 - - # this is the probability that this descriptor component can take this specific value - # in the lack of any other information is is simply the inverse of the number of - # possible values 'npossible' - # If we quantized this component then - # npossible = 1 + len(self._QBoundVals[ai]) - # else if we did no qunatize (the descriptor came quantized) - # npossible = nPossibleVals[ai] - # ai = key[1] - pdesc = 0.0 - if self._qBounds[ai] > 0: - pdesc = 1.0 / (1 + len(self._QBoundVals[ai])) - elif (self._nPosVals[ai] > 0): - pdesc = 1.0 / (self._nPosVals[ai]) + # for the sake a of efficiency lets try to change the conditional probabities + # to a numpy array instead of a dictionary. The three dimension array is indexed + # on the the activity class, the discriptor ID and the descriptor binID + # self._condProbs = {} + # self._condProbs = numpy.zeros((self._nClasses, max(self._attrs)+1, + # max(self._nPosVals)+1), 'd') + self._condProbs = [None] * self._nClasses + for i in range(self._nClasses): + if not (hasattr(self, '_useSigs') and self._useSigs): + nA = max(self._attrs) + 1 + self._condProbs[i] = [None] * nA + for j in range(nA): + nV = self._nPosVals[j] + if self._qBounds[j]: + nV = max(nV, self._qBounds[j] + 1) + self._condProbs[i][j] = [0.0] * nV else: - raise ValueError('Neither Bounds set nor data pre-quantized for attribute ' + str(ai)) - tmp[ai][bid] += (self._mEstimateVal) * pdesc - tmp[ai][bid] /= (ncls[cls] + self._mEstimateVal) + self._condProbs[i] = {} + for idx in self._attrs: + self._condProbs[i][idx] = [0.0] * 2 - def ClassifyExamples(self, examples, appendExamples=0): - preds = [] - for eg in examples: - pred = self.ClassifyExample(eg, appendExamples) - preds.append(int(pred)) - return preds + def GetName(self): + return self._name - def GetClassificationDetails(self): - """ returns the probability of the last prediction """ - return self.mprob + def SetName(self, name): + self._name = name - def ClassifyExample(self, example, appendExamples=0): - """ Classify an example by summing over the conditional probabilities - The most likely class is the one with the largest probability - """ - if appendExamples: - self._examples.append(example) - clsProb = {} - for key, prob in iteritems(self._classProbs): - clsProb[key] = prob - tmp = self._condProbs[key] - for ai in self._attrs: - if not (hasattr(self, '_useSigs') and self._useSigs): - bid = example[ai] - if self._qBounds[ai] > 0: - bid = _getBinId(bid, self._QBoundVals[ai]) - else: - if example[1].GetBit(ai): - bid = 1 - else: - bid = 0 - clsProb[key] *= tmp[ai][bid] + def NameModel(self, varNames): + self.SetName('NaiveBayesClassifier') - mkey = -1 - self.mprob = -1.0 - for key, prob in iteritems(clsProb): - if (prob > self.mprob): - mkey = key - self.mprob = prob + def GetExamples(self): + return self._examples - return mkey + def SetExamples(self, examples): + self._examples = examples + + def GetTrainingExamples(self): + return self._trainingExamples + + def SetTrainingExamples(self, examples): + self._trainingExamples = examples + + def GetTestExamples(self): + return self._testExamples + + def SetTestExamples(self, examples): + self._testExamples = examples + + def SetBadExamples(self, examples): + self._badExamples = examples + + def GetBadExamples(self): + return self._badExamples + + def _computeQuantBounds(self): + neg = len(self._trainingExamples) + natr = len(self._attrs) + + # make a list of results and values + allVals = numpy.zeros((neg, natr), 'd') + res = [] # list of y values + i = 0 + for eg in self._trainingExamples: + res.append(eg[-1]) + j = 0 + for ai in self._attrs: + val = eg[ai] + allVals[i, j] = val + j += 1 + i += 1 + + # now loop over each of the columns and compute the bounds + # the number of bounds is determined by the maximum info gain + i = 0 + for ai in self._attrs: + nbnds = self._qBounds[ai] + if nbnds > 0: + mbnds = [] + mgain = -1.0 + + for j in range(1, nbnds + 1): + bnds, igain = Quantize.FindVarMultQuantBounds( + allVals[:, i], j, res, self._nClasses) + if (igain > mgain): + mbnds = bnds + mgain = igain + self._QBoundVals[ai] = mbnds + i += 1 + + def trainModel(self): + """ We will assume at this point that the training examples have been set + + We have to estmate the conditional probabilities for each of the (binned) descriptor + component give a outcome (or class). Also the probabilities for each class is estimated + """ + # first estimate the class probabilities + n = len(self._trainingExamples) + for i in range(self._nClasses): + self._classProbs[i] = 0.0 + + # for i in range(self._nClasses): + # self._classProbs[i] = float(self._classProbs[i])/n + + # first find the bounds for each descriptor value if necessary + if not self._useSigs and max(self._qBounds) > 0: + self._computeQuantBounds() + + # now compute the probabilities + ncls = {} + + incr = 1.0 / n + for eg in self._trainingExamples: + cls = eg[-1] + self._classProbs[cls] += incr + ncls[cls] = ncls.get(cls, 0) + 1 + tmp = self._condProbs[cls] + if not self._useSigs: + for ai in self._attrs: + bid = eg[ai] + if self._qBounds[ai] > 0: + bid = _getBinId(bid, self._QBoundVals[ai]) + tmp[ai][bid] += 1.0 + else: + for ai in self._attrs: + if eg[1].GetBit(ai): + tmp[ai][1] += 1.0 + else: + tmp[ai][0] += 1.0 + + # for key in self._condProbs: + for cls in range(self._nClasses): + if cls not in ncls: + continue + # cls = key[0] + tmp = self._condProbs[cls] + for ai in self._attrs: + if not self._useSigs: + nbnds = self._nPosVals[ai] + if (self._qBounds[ai] > 0): + nbnds = self._qBounds[ai] + else: + nbnds = 2 + for bid in range(nbnds): + if self._mEstimateVal <= 0.0: + # this is simple the fraction of of time this descriptor component assume + # this value for the examples that belong a specific class + # self._condProbs[key] = (float(self._condProbs[key]))/ncls[cls] + tmp[ai][bid] /= ncls[cls] + else: + # this a bit more complicated form - more appropriate for unbalanced data + # see "Machine Learning" by Tom Mitchell section 6.9.1.1 + + # this is the probability that this descriptor component can take this specific value + # in the lack of any other information is is simply the inverse of the number of + # possible values 'npossible' + # If we quantized this component then + # npossible = 1 + len(self._QBoundVals[ai]) + # else if we did no qunatize (the descriptor came quantized) + # npossible = nPossibleVals[ai] + # ai = key[1] + pdesc = 0.0 + if self._qBounds[ai] > 0: + pdesc = 1.0 / (1 + len(self._QBoundVals[ai])) + elif (self._nPosVals[ai] > 0): + pdesc = 1.0 / (self._nPosVals[ai]) + else: + raise ValueError( + 'Neither Bounds set nor data pre-quantized for attribute ' + str(ai)) + tmp[ai][bid] += (self._mEstimateVal) * pdesc + tmp[ai][bid] /= (ncls[cls] + self._mEstimateVal) + + def ClassifyExamples(self, examples, appendExamples=0): + preds = [] + for eg in examples: + pred = self.ClassifyExample(eg, appendExamples) + preds.append(int(pred)) + return preds + + def GetClassificationDetails(self): + """ returns the probability of the last prediction """ + return self.mprob + + def ClassifyExample(self, example, appendExamples=0): + """ Classify an example by summing over the conditional probabilities + The most likely class is the one with the largest probability + """ + if appendExamples: + self._examples.append(example) + clsProb = {} + for key, prob in self._classProbs.items(): + clsProb[key] = prob + tmp = self._condProbs[key] + for ai in self._attrs: + if not (hasattr(self, '_useSigs') and self._useSigs): + bid = example[ai] + if self._qBounds[ai] > 0: + bid = _getBinId(bid, self._QBoundVals[ai]) + else: + if example[1].GetBit(ai): + bid = 1 + else: + bid = 0 + clsProb[key] *= tmp[ai][bid] + + mkey = -1 + self.mprob = -1.0 + for key, prob in clsProb.items(): + if (prob > self.mprob): + mkey = key + self.mprob = prob + + return mkey diff --git a/rdkit/ML/NaiveBayes/CrossValidate.py b/rdkit/ML/NaiveBayes/CrossValidate.py index 141d00a13..3b48aa87c 100644 --- a/rdkit/ML/NaiveBayes/CrossValidate.py +++ b/rdkit/ML/NaiveBayes/CrossValidate.py @@ -7,7 +7,7 @@ and evaluation of individual models """ -from __future__ import print_function + from rdkit.ML.Data import SplitData from rdkit.ML.NaiveBayes.ClassificationModel import NaiveBayesClassifier diff --git a/rdkit/ML/Neural/CrossValidate.py b/rdkit/ML/Neural/CrossValidate.py index 0be9fc937..1d8ebb23b 100755 --- a/rdkit/ML/Neural/CrossValidate.py +++ b/rdkit/ML/Neural/CrossValidate.py @@ -7,7 +7,7 @@ This is, perhaps, a little misleading. For the purposes of this module, cross validation == evaluating the accuracy of a net. """ -from __future__ import print_function + from rdkit.ML.Neural import Network, Trainers from rdkit.ML.Data import SplitData import math diff --git a/rdkit/ML/Neural/Network.py b/rdkit/ML/Neural/Network.py index 640d05f35..846d0208f 100755 --- a/rdkit/ML/Neural/Network.py +++ b/rdkit/ML/Neural/Network.py @@ -19,7 +19,7 @@ main node list. """ -from __future__ import print_function + import numpy import random diff --git a/rdkit/ML/Neural/Trainers.py b/rdkit/ML/Neural/Trainers.py index 2a44f4611..8b61ec5d2 100755 --- a/rdkit/ML/Neural/Trainers.py +++ b/rdkit/ML/Neural/Trainers.py @@ -8,7 +8,7 @@ Dan W. Patterson, Prentice Hall, 1996 """ -from __future__ import print_function + import numpy @@ -260,9 +260,9 @@ if __name__ == '__main__': # pragma: nocover if 0: net = testXor() print('Xor:', net) - from rdkit.six.moves import cPickle + import pickle outF = open('xornet.pkl', 'wb+') - cPickle.dump(net, outF) + pickle.dump(net, outF) outF.close() else: # runProfile('testLinear') diff --git a/rdkit/ML/Neural/UnitTestOther.py b/rdkit/ML/Neural/UnitTestOther.py index 8d9824982..ebf9d5833 100755 --- a/rdkit/ML/Neural/UnitTestOther.py +++ b/rdkit/ML/Neural/UnitTestOther.py @@ -6,7 +6,7 @@ this basically works out **all** of the network code """ -from __future__ import print_function + import unittest diff --git a/rdkit/ML/Neural/UnitTestTrainer.py b/rdkit/ML/Neural/UnitTestTrainer.py index 4862c72c2..2c4d957ed 100755 --- a/rdkit/ML/Neural/UnitTestTrainer.py +++ b/rdkit/ML/Neural/UnitTestTrainer.py @@ -6,7 +6,7 @@ this basically works out **all** of the network code """ -from __future__ import print_function + import random import unittest @@ -14,74 +14,74 @@ import unittest from rdkit.ML.Neural import Network, Trainers from rdkit.ML.Neural.CrossValidate import CrossValidate, CrossValidationDriver from rdkit.TestRunner import redirect_stdout -from rdkit.six import StringIO +from io import StringIO class TrainerTestCase(unittest.TestCase): - def setUp(self): - random.seed(23) - self.trainTol = 0.3 - self.orExamples = [[0, 0, 1, 0.1], [0, 1, 1, .9], [1, 0, 1, .9], [1, 1, 1, .9]] - self.andExamples = [[0, 0, 1, 0.1], [0, 1, 1, .1], [1, 0, 1, .1], [1, 1, 1, .9]] - self.xorExamples = [[0, 0, 1, .1], [0, 1, 1, .9], [1, 0, 1, .9], [1, 1, 1, .1]] - self.linExamples = [[.1, .1], [.2, .2], [.3, .3], [.4, .4], [.8, .8]] + def setUp(self): + random.seed(23) + self.trainTol = 0.3 + self.orExamples = [[0, 0, 1, 0.1], [0, 1, 1, .9], [1, 0, 1, .9], [1, 1, 1, .9]] + self.andExamples = [[0, 0, 1, 0.1], [0, 1, 1, .1], [1, 0, 1, .1], [1, 1, 1, .9]] + self.xorExamples = [[0, 0, 1, .1], [0, 1, 1, .9], [1, 0, 1, .9], [1, 1, 1, .1]] + self.linExamples = [[.1, .1], [.2, .2], [.3, .3], [.4, .4], [.8, .8]] - def _trainExamples(self, ex, arch=[3, 1], useAvgErr=False): - net = Network.Network(arch) - t = Trainers.BackProp() - t.TrainOnLine(ex, net, errTol=self.trainTol, useAvgErr=useAvgErr, silent=True) - errs = [abs(x[-1] - net.ClassifyExample(x)) for x in ex] - return net, errs + def _trainExamples(self, ex, arch=[3, 1], useAvgErr=False): + net = Network.Network(arch) + t = Trainers.BackProp() + t.TrainOnLine(ex, net, errTol=self.trainTol, useAvgErr=useAvgErr, silent=True) + errs = [abs(x[-1] - net.ClassifyExample(x)) for x in ex] + return net, errs - def testBackpropOr(self): - # " testing backprop training on or " - _, errs = self._trainExamples(self.orExamples) - assert max(errs) < self.trainTol, 'net did not converge properly on or' + def testBackpropOr(self): + # " testing backprop training on or " + _, errs = self._trainExamples(self.orExamples) + assert max(errs) < self.trainTol, 'net did not converge properly on or' - def testBackpropAnd(self): - # " testing backprop training on and " - _, errs = self._trainExamples(self.andExamples) - assert max(errs) < self.trainTol, 'net did not converge properly on and' + def testBackpropAnd(self): + # " testing backprop training on and " + _, errs = self._trainExamples(self.andExamples) + assert max(errs) < self.trainTol, 'net did not converge properly on and' - def testBackpropLin(self): - # " testing backprop training on a linear function " - _, errs = self._trainExamples(self.linExamples, arch=[1, 2, 1]) - assert max(errs) < self.trainTol, 'net did not converge properly on linear fit' + def testBackpropLin(self): + # " testing backprop training on a linear function " + _, errs = self._trainExamples(self.linExamples, arch=[1, 2, 1]) + assert max(errs) < self.trainTol, 'net did not converge properly on linear fit' - _, errs = self._trainExamples(self.linExamples, arch=[1, 2, 1], useAvgErr=True) - assert max(errs) < 0.4, 'net did not converge properly on or' + _, errs = self._trainExamples(self.linExamples, arch=[1, 2, 1], useAvgErr=True) + assert max(errs) < 0.4, 'net did not converge properly on or' - def test_multipleHiddenLayers(self): - _, errs = self._trainExamples(self.linExamples, arch=[1, 1, 2, 1]) - assert max(errs) < self.trainTol, 'net did not converge properly on linear fit' + def test_multipleHiddenLayers(self): + _, errs = self._trainExamples(self.linExamples, arch=[1, 1, 2, 1]) + assert max(errs) < self.trainTol, 'net did not converge properly on linear fit' - def test_CrossValidate(self): - # We just check here that the code works - net, _ = self._trainExamples(self.orExamples) - percentage, badExamples = CrossValidate(net, self.orExamples, 0.2) - self.assertEqual(percentage, 1.0 / 4) - self.assertEqual(len(badExamples), 1) + def test_CrossValidate(self): + # We just check here that the code works + net, _ = self._trainExamples(self.orExamples) + percentage, badExamples = CrossValidate(net, self.orExamples, 0.2) + self.assertEqual(percentage, 1.0 / 4) + self.assertEqual(len(badExamples), 1) - percentage, badExamples = CrossValidate(net, self.orExamples, self.trainTol) - self.assertEqual(percentage, 0.0) - self.assertEqual(len(badExamples), 0) + percentage, badExamples = CrossValidate(net, self.orExamples, self.trainTol) + self.assertEqual(percentage, 0.0) + self.assertEqual(len(badExamples), 0) - net, cvError = CrossValidationDriver(self.orExamples + self.orExamples, silent=True) - self.assertEqual(cvError, 0.5) + net, cvError = CrossValidationDriver(self.orExamples + self.orExamples, silent=True) + self.assertEqual(cvError, 0.5) - net, cvError = CrossValidationDriver(self.orExamples + self.orExamples, silent=True, - replacementSelection=True) - self.assertEqual(cvError, 0.0) + net, cvError = CrossValidationDriver(self.orExamples + self.orExamples, silent=True, + replacementSelection=True) + self.assertEqual(cvError, 0.0) - net, cvError = CrossValidationDriver(self.orExamples + self.orExamples, silent=True, - calcTotalError=True) - self.assertEqual(cvError, 0.25) + net, cvError = CrossValidationDriver(self.orExamples + self.orExamples, silent=True, + calcTotalError=True) + self.assertEqual(cvError, 0.25) - f = StringIO() - with redirect_stdout(f): - CrossValidationDriver(self.orExamples + self.orExamples) + f = StringIO() + with redirect_stdout(f): + CrossValidationDriver(self.orExamples + self.orExamples) if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/ML/SLT/UnitTestRisk.py b/rdkit/ML/SLT/UnitTestRisk.py index 56e93b2a5..78aa345ef 100755 --- a/rdkit/ML/SLT/UnitTestRisk.py +++ b/rdkit/ML/SLT/UnitTestRisk.py @@ -4,7 +4,7 @@ """ unit testing code for SLT Risk functions """ -from __future__ import print_function + import unittest from rdkit.ML.SLT import Risk import math diff --git a/rdkit/ML/ScreenComposite.py b/rdkit/ML/ScreenComposite.py index 503488146..b77f5b9ec 100755 --- a/rdkit/ML/ScreenComposite.py +++ b/rdkit/ML/ScreenComposite.py @@ -108,7 +108,7 @@ a file containing a pickled composite model and _filename_ is a QDAT file. """ -from __future__ import print_function + import os import sys @@ -120,8 +120,7 @@ from rdkit.Dbase import DbModule from rdkit.Dbase.DbConnection import DbConnect from rdkit.ML import CompositeRun from rdkit.ML.Data import DataUtils, SplitData -from rdkit.six.moves import cPickle -from rdkit.six.moves import input +import pickle try: @@ -1323,7 +1322,7 @@ if __name__ == '__main__': for blob in blobs: blob = blob[0] try: - models.append(cPickle.loads(str(blob))) + models.append(pickle.loads(str(blob))) except Exception: import traceback traceback.print_exc() @@ -1332,7 +1331,7 @@ if __name__ == '__main__': else: message('-> Loading model') modelFile = open(extras[0], 'rb') - models.append(cPickle.load(modelFile)) + models.append(pickle.load(modelFile)) if not len(models): error('No composite models found') sys.exit(-1) diff --git a/rdkit/ML/UnitTestAnalyzeComposite.py b/rdkit/ML/UnitTestAnalyzeComposite.py index c4f1204c6..0b13e3e0b 100644 --- a/rdkit/ML/UnitTestAnalyzeComposite.py +++ b/rdkit/ML/UnitTestAnalyzeComposite.py @@ -16,46 +16,46 @@ import unittest from rdkit import RDConfig from rdkit.ML import AnalyzeComposite -from rdkit.six.moves import cPickle as pickle +import pickle class TestCase(unittest.TestCase): - def setUp(self): - self.baseDir = os.path.join(RDConfig.RDCodeDir, 'ML', 'test_data') + def setUp(self): + self.baseDir = os.path.join(RDConfig.RDCodeDir, 'ML', 'test_data') - def test1_Issue163(self): - name1 = os.path.join(self.baseDir, 'humanoral.1.pkl') - try: - with open(name1, 'rb') as pklF: - c1 = pickle.load(pklF) - except Exception: - c1 = None - self.assertTrue(c1) - name2 = os.path.join(self.baseDir, 'humanoral.2.pkl') - try: - with open(name2, 'rb') as pklF: - c2 = pickle.load(pklF) - except Exception: - c2 = None - self.assertTrue(c2) + def test1_Issue163(self): + name1 = os.path.join(self.baseDir, 'humanoral.1.pkl') + try: + with open(name1, 'rb') as pklF: + c1 = pickle.load(pklF) + except Exception: + c1 = None + self.assertTrue(c1) + name2 = os.path.join(self.baseDir, 'humanoral.2.pkl') + try: + with open(name2, 'rb') as pklF: + c2 = pickle.load(pklF) + except Exception: + c2 = None + self.assertTrue(c2) - try: - res = sorted(AnalyzeComposite.ProcessIt([c1, c2], verbose=-1)) - except Exception: - import traceback - traceback.print_exc() - ok = 0 - else: - ok = 1 - self.assertTrue(ok) + try: + res = sorted(AnalyzeComposite.ProcessIt([c1, c2], verbose=-1)) + except Exception: + import traceback + traceback.print_exc() + ok = 0 + else: + ok = 1 + self.assertTrue(ok) - self.assertEqual(res[0][0],'BALABANJ') - self.assertEqual(res[1][0],'BERTZCT') - self.assertEqual(res[-1][0],'VSA_ESTATE9') - for entry in res: - self.assertEqual(len(entry),5) + self.assertEqual(res[0][0], 'BALABANJ') + self.assertEqual(res[1][0], 'BERTZCT') + self.assertEqual(res[-1][0], 'VSA_ESTATE9') + for entry in res: + self.assertEqual(len(entry), 5) if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/ML/UnitTestBuildComposite.py b/rdkit/ML/UnitTestBuildComposite.py index 176b57db1..1e5920625 100644 --- a/rdkit/ML/UnitTestBuildComposite.py +++ b/rdkit/ML/UnitTestBuildComposite.py @@ -18,193 +18,193 @@ import unittest from rdkit import RDConfig from rdkit.Dbase.DbConnection import DbConnect from rdkit.ML import BuildComposite -from rdkit.six.moves import cPickle as pickle +import pickle class TestCase(unittest.TestCase): - def setUp(self): - self.baseDir = os.path.join(RDConfig.RDCodeDir, 'ML', 'test_data') - self.dbName = RDConfig.RDTestDatabase + def setUp(self): + self.baseDir = os.path.join(RDConfig.RDCodeDir, 'ML', 'test_data') + self.dbName = RDConfig.RDTestDatabase - self.details = BuildComposite.SetDefaults() - self.details.dbName = self.dbName - self.details.dbUser = RDConfig.defaultDBUser - self.details.dbPassword = RDConfig.defaultDBPassword + self.details = BuildComposite.SetDefaults() + self.details.dbName = self.dbName + self.details.dbUser = RDConfig.defaultDBUser + self.details.dbPassword = RDConfig.defaultDBPassword - def _init(self, refCompos, copyBounds=0): - BuildComposite._verbose = 0 - conn = DbConnect(self.details.dbName, self.details.tableName) - cols = [x.upper() for x in conn.GetColumnNames()] - cDescs = [x.upper() for x in refCompos.GetDescriptorNames()] - self.assertEqual(cols, cDescs) + def _init(self, refCompos, copyBounds=0): + BuildComposite._verbose = 0 + conn = DbConnect(self.details.dbName, self.details.tableName) + cols = [x.upper() for x in conn.GetColumnNames()] + cDescs = [x.upper() for x in refCompos.GetDescriptorNames()] + self.assertEqual(cols, cDescs) - self.details.nModels = 10 - self.details.lockRandom = 1 - self.details.randomSeed = refCompos._randomSeed - self.details.splitFrac = refCompos._splitFrac + self.details.nModels = 10 + self.details.lockRandom = 1 + self.details.randomSeed = refCompos._randomSeed + self.details.splitFrac = refCompos._splitFrac - if self.details.splitFrac: - self.details.splitRun = 1 - else: - self.details.splitRun = 0 + if self.details.splitFrac: + self.details.splitRun = 1 + else: + self.details.splitRun = 0 - if not copyBounds: - self.details.qBounds = [0] * len(cols) - else: - self.details.qBounds = refCompos.GetQuantBounds()[0] + if not copyBounds: + self.details.qBounds = [0] * len(cols) + else: + self.details.qBounds = refCompos.GetQuantBounds()[0] - def compare(self, compos, refCompos): - self.assertEqual(len(compos), len(refCompos)) - cs = [] - rcs = [] - for i in range(len(compos)): - cs.append(compos[i]) - rcs.append(refCompos[i]) + def compare(self, compos, refCompos): + self.assertEqual(len(compos), len(refCompos)) + cs = [] + rcs = [] + for i in range(len(compos)): + cs.append(compos[i]) + rcs.append(refCompos[i]) - cs.sort(key=lambda x: (x[2], x[2])) - rcs.sort(key=lambda x: (x[2], x[2])) + cs.sort(key=lambda x: (x[2], x[2])) + rcs.sort(key=lambda x: (x[2], x[2])) - for i in range(len(compos)): - _, count, err = cs[i] - _, refCount, refErr = rcs[i] - self.assertEqual(count, refCount) - self.assertAlmostEqual(err, refErr, 4) + for i in range(len(compos)): + _, count, err = cs[i] + _, refCount, refErr = rcs[i] + self.assertEqual(count, refCount) + self.assertAlmostEqual(err, refErr, 4) - def test1_basics(self): - # """ basics """ - self.details.tableName = 'ferro_quant' - refComposName = 'ferromag_quant_10.pkl' + def test1_basics(self): + # """ basics """ + self.details.tableName = 'ferro_quant' + refComposName = 'ferromag_quant_10.pkl' - with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - refCompos = pickle.load(pklF) + with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF: + buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') + pklTF.close() + with io.BytesIO(buf) as pklF: + refCompos = pickle.load(pklF) - # first make sure the data are intact - self._init(refCompos) - compos = BuildComposite.RunIt(self.details, saveIt=0) + # first make sure the data are intact + self._init(refCompos) + compos = BuildComposite.RunIt(self.details, saveIt=0) - # pickle.dump(compos,open(os.path.join(self.baseDir,refComposName), 'wb')) - # with open(os.path.join(self.baseDir,refComposName), 'rb') as pklF: - # refCompos = pickle.load(pklF) + # pickle.dump(compos,open(os.path.join(self.baseDir,refComposName), 'wb')) + # with open(os.path.join(self.baseDir,refComposName), 'rb') as pklF: + # refCompos = pickle.load(pklF) - self.compare(compos, refCompos) + self.compare(compos, refCompos) - def test2_depth_limit(self): - # """ depth limit """ - self.details.tableName = 'ferro_quant' - refComposName = 'ferromag_quant_10_3.pkl' + def test2_depth_limit(self): + # """ depth limit """ + self.details.tableName = 'ferro_quant' + refComposName = 'ferromag_quant_10_3.pkl' - with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - refCompos = pickle.load(pklF) + with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF: + buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') + pklTF.close() + with io.BytesIO(buf) as pklF: + refCompos = pickle.load(pklF) - # first make sure the data are intact - self._init(refCompos) - self.details.limitDepth = 3 - compos = BuildComposite.RunIt(self.details, saveIt=0) + # first make sure the data are intact + self._init(refCompos) + self.details.limitDepth = 3 + compos = BuildComposite.RunIt(self.details, saveIt=0) - self.compare(compos, refCompos) + self.compare(compos, refCompos) - def test3_depth_limit_less_greedy(self): - # """ depth limit + less greedy """ - self.details.tableName = 'ferro_quant' - refComposName = 'ferromag_quant_10_3_lessgreedy.pkl' + def test3_depth_limit_less_greedy(self): + # """ depth limit + less greedy """ + self.details.tableName = 'ferro_quant' + refComposName = 'ferromag_quant_10_3_lessgreedy.pkl' - with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - refCompos = pickle.load(pklF) + with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF: + buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') + pklTF.close() + with io.BytesIO(buf) as pklF: + refCompos = pickle.load(pklF) - # first make sure the data are intact - self._init(refCompos) - self.details.limitDepth = 3 - self.details.lessGreedy = 1 - compos = BuildComposite.RunIt(self.details, saveIt=0) + # first make sure the data are intact + self._init(refCompos) + self.details.limitDepth = 3 + self.details.lessGreedy = 1 + compos = BuildComposite.RunIt(self.details, saveIt=0) - self.compare(compos, refCompos) + self.compare(compos, refCompos) - def test4_more_trees(self): - # """ more trees """ - self.details.tableName = 'ferro_quant' - refComposName = 'ferromag_quant_50_3.pkl' + def test4_more_trees(self): + # """ more trees """ + self.details.tableName = 'ferro_quant' + refComposName = 'ferromag_quant_50_3.pkl' - with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - refCompos = pickle.load(pklF) + with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF: + buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') + pklTF.close() + with io.BytesIO(buf) as pklF: + refCompos = pickle.load(pklF) - # first make sure the data are intact - self._init(refCompos) - self.details.limitDepth = 3 - self.details.nModels = 50 - compos = BuildComposite.RunIt(self.details, saveIt=0) + # first make sure the data are intact + self._init(refCompos) + self.details.limitDepth = 3 + self.details.nModels = 50 + compos = BuildComposite.RunIt(self.details, saveIt=0) - self.compare(compos, refCompos) + self.compare(compos, refCompos) - def test5_auto_bounds(self): - # """ auto bounds """ - self.details.tableName = 'ferro_noquant' - refComposName = 'ferromag_auto_10_3.pkl' + def test5_auto_bounds(self): + # """ auto bounds """ + self.details.tableName = 'ferro_noquant' + refComposName = 'ferromag_auto_10_3.pkl' - with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - refCompos = pickle.load(pklF) + with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF: + buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') + pklTF.close() + with io.BytesIO(buf) as pklF: + refCompos = pickle.load(pklF) - # first make sure the data are intact - self._init(refCompos, copyBounds=1) - self.details.limitDepth = 3 - self.details.nModels = 10 - compos = BuildComposite.RunIt(self.details, saveIt=0) + # first make sure the data are intact + self._init(refCompos, copyBounds=1) + self.details.limitDepth = 3 + self.details.nModels = 10 + compos = BuildComposite.RunIt(self.details, saveIt=0) - self.compare(compos, refCompos) + self.compare(compos, refCompos) - def test6_auto_bounds_real_activity(self): - # """ auto bounds with a real valued activity""" - self.details.tableName = 'ferro_noquant_realact' - refComposName = 'ferromag_auto_10_3.pkl' + def test6_auto_bounds_real_activity(self): + # """ auto bounds with a real valued activity""" + self.details.tableName = 'ferro_noquant_realact' + refComposName = 'ferromag_auto_10_3.pkl' - with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - refCompos = pickle.load(pklF) + with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF: + buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') + pklTF.close() + with io.BytesIO(buf) as pklF: + refCompos = pickle.load(pklF) - # first make sure the data are intact - self._init(refCompos, copyBounds=1) - self.details.limitDepth = 3 - self.details.nModels = 10 - self.details.activityBounds = [0.5] - compos = BuildComposite.RunIt(self.details, saveIt=0) + # first make sure the data are intact + self._init(refCompos, copyBounds=1) + self.details.limitDepth = 3 + self.details.nModels = 10 + self.details.activityBounds = [0.5] + compos = BuildComposite.RunIt(self.details, saveIt=0) - self.compare(compos, refCompos) + self.compare(compos, refCompos) - def test7_composite_naiveBayes(self): - # """ Test composite of naive bayes""" - self.details.tableName = 'ferro_noquant' - refComposName = 'ferromag_NaiveBayes.pkl' - with open(os.path.join(self.baseDir, refComposName), 'r') as pklTFile: - buf = pklTFile.read().replace('\r\n', '\n').encode('utf-8') - pklTFile.close() - with io.BytesIO(buf) as pklFile: - refCompos = pickle.load(pklFile) - self._init(refCompos, copyBounds=1) - self.details.useTrees = 0 - self.details.useNaiveBayes = 1 - self.details.mEstimateVal = 20.0 - self.details.qBounds = [0] + [2] * 6 + [0] - compos = BuildComposite.RunIt(self.details, saveIt=0) + def test7_composite_naiveBayes(self): + # """ Test composite of naive bayes""" + self.details.tableName = 'ferro_noquant' + refComposName = 'ferromag_NaiveBayes.pkl' + with open(os.path.join(self.baseDir, refComposName), 'r') as pklTFile: + buf = pklTFile.read().replace('\r\n', '\n').encode('utf-8') + pklTFile.close() + with io.BytesIO(buf) as pklFile: + refCompos = pickle.load(pklFile) + self._init(refCompos, copyBounds=1) + self.details.useTrees = 0 + self.details.useNaiveBayes = 1 + self.details.mEstimateVal = 20.0 + self.details.qBounds = [0] + [2] * 6 + [0] + compos = BuildComposite.RunIt(self.details, saveIt=0) - self.compare(compos, refCompos) + self.compare(compos, refCompos) if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/ML/UnitTestScreenComposite.py b/rdkit/ML/UnitTestScreenComposite.py index 175f1b603..77e3d4ce4 100644 --- a/rdkit/ML/UnitTestScreenComposite.py +++ b/rdkit/ML/UnitTestScreenComposite.py @@ -17,333 +17,333 @@ import unittest from rdkit import RDConfig from rdkit.ML import ScreenComposite -from rdkit.six.moves import cPickle as pickle +import pickle class TestCase(unittest.TestCase): - def setUp(self): - self.baseDir = os.path.join(RDConfig.RDCodeDir, 'ML', 'test_data') - self.dbName = RDConfig.RDTestDatabase - self.details = ScreenComposite.SetDefaults() - self.details.dbName = self.dbName - self.details.dbUser = RDConfig.defaultDBUser - self.details.dbPassword = RDConfig.defaultDBPassword + def setUp(self): + self.baseDir = os.path.join(RDConfig.RDCodeDir, 'ML', 'test_data') + self.dbName = RDConfig.RDTestDatabase + self.details = ScreenComposite.SetDefaults() + self.details.dbName = self.dbName + self.details.dbUser = RDConfig.defaultDBUser + self.details.dbPassword = RDConfig.defaultDBPassword - def test1_basics(self): - # """ basics """ - self.details.tableName = 'ferro_quant' - with open(os.path.join(self.baseDir, 'ferromag_quant_10.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 5 - self.assertEqual(len(compos), tgt) + def test1_basics(self): + # """ basics """ + self.details.tableName = 'ferro_quant' + with open(os.path.join(self.baseDir, 'ferromag_quant_10.pkl'), 'r') as pklTF: + buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') + pklTF.close() + with io.BytesIO(buf) as pklF: + compos = pickle.load(pklF) + tgt = 5 + self.assertEqual(len(compos), tgt) - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( - compos, self.details) - self.assertEqual(nGood, 93) - self.assertEqual(misCount, 2) - self.assertEqual(nSkipped, 0) - self.assertAlmostEqual(avgGood, .9871, 4) - self.assertAlmostEqual(avgBad, .8000, 4) - self.assertAlmostEqual(avgSkip, 0, 4) - self.assertEqual(tbl[0, 0], 54) - self.assertEqual(tbl[1, 1], 39) - self.assertEqual(tbl[0, 1], 2) - self.assertEqual(tbl[1, 0], 0) + nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( + compos, self.details) + self.assertEqual(nGood, 93) + self.assertEqual(misCount, 2) + self.assertEqual(nSkipped, 0) + self.assertAlmostEqual(avgGood, .9871, 4) + self.assertAlmostEqual(avgBad, .8000, 4) + self.assertAlmostEqual(avgSkip, 0, 4) + self.assertEqual(tbl[0, 0], 54) + self.assertEqual(tbl[1, 1], 39) + self.assertEqual(tbl[0, 1], 2) + self.assertEqual(tbl[1, 0], 0) - def test2_include_holdout(self): - # """ include holdout data only """ - self.details.tableName = 'ferro_quant' - self.details.doHoldout = 1 - self.details.doTraining = 0 + def test2_include_holdout(self): + # """ include holdout data only """ + self.details.tableName = 'ferro_quant' + self.details.doHoldout = 1 + self.details.doTraining = 0 - with open(os.path.join(self.baseDir, 'ferromag_quant_10.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 5 - self.assertEqual(len(compos), tgt) + with open(os.path.join(self.baseDir, 'ferromag_quant_10.pkl'), 'r') as pklTF: + buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') + pklTF.close() + with io.BytesIO(buf) as pklF: + compos = pickle.load(pklF) + tgt = 5 + self.assertEqual(len(compos), tgt) - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( - compos, self.details) - self.assertEqual(nGood, 28) - self.assertEqual(misCount, 1) - self.assertEqual(nSkipped, 0) - self.assertAlmostEqual(avgGood, .9964, 4) - self.assertAlmostEqual(avgBad, 1.000, 4) - self.assertAlmostEqual(avgSkip, 0, 4) - self.assertEqual(tbl[0, 0], 16) - self.assertEqual(tbl[1, 1], 12) - self.assertEqual(tbl[0, 1], 1) - self.assertEqual(tbl[1, 0], 0) + nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( + compos, self.details) + self.assertEqual(nGood, 28) + self.assertEqual(misCount, 1) + self.assertEqual(nSkipped, 0) + self.assertAlmostEqual(avgGood, .9964, 4) + self.assertAlmostEqual(avgBad, 1.000, 4) + self.assertAlmostEqual(avgSkip, 0, 4) + self.assertEqual(tbl[0, 0], 16) + self.assertEqual(tbl[1, 1], 12) + self.assertEqual(tbl[0, 1], 1) + self.assertEqual(tbl[1, 0], 0) - def test3_include_training(self): - # """ include training data only """ - self.details.tableName = 'ferro_quant' - self.details.doHoldout = 0 - self.details.doTraining = 1 + def test3_include_training(self): + # """ include training data only """ + self.details.tableName = 'ferro_quant' + self.details.doHoldout = 0 + self.details.doTraining = 1 - with open(os.path.join(self.baseDir, 'ferromag_quant_10.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 5 - self.assertEqual(len(compos), tgt, 'bad composite loaded: %d != %d' % (len(compos), tgt)) + with open(os.path.join(self.baseDir, 'ferromag_quant_10.pkl'), 'r') as pklTF: + buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') + pklTF.close() + with io.BytesIO(buf) as pklF: + compos = pickle.load(pklF) + tgt = 5 + self.assertEqual(len(compos), tgt, 'bad composite loaded: %d != %d' % (len(compos), tgt)) - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( - compos, self.details) - self.assertEqual(nGood, 65) - self.assertEqual(misCount, 1) - self.assertEqual(nSkipped, 0) - self.assertAlmostEqual(avgGood, .98307, 4) - self.assertAlmostEqual(avgBad, 0.600, 4) - self.assertAlmostEqual(avgSkip, 0, 4) - self.assertEqual(tbl[0, 0], 38, tbl) - self.assertEqual(tbl[1, 1], 27) - self.assertEqual(tbl[0, 1], 1) - self.assertEqual(tbl[1, 0], 0) + nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( + compos, self.details) + self.assertEqual(nGood, 65) + self.assertEqual(misCount, 1) + self.assertEqual(nSkipped, 0) + self.assertAlmostEqual(avgGood, .98307, 4) + self.assertAlmostEqual(avgBad, 0.600, 4) + self.assertAlmostEqual(avgSkip, 0, 4) + self.assertEqual(tbl[0, 0], 38, tbl) + self.assertEqual(tbl[1, 1], 27) + self.assertEqual(tbl[0, 1], 1) + self.assertEqual(tbl[1, 0], 0) - def test4_thresholding(self): - # """ include thresholding """ - self.details.tableName = 'ferro_quant' - self.details.threshold = 0.80 - self.details.doHoldout = 0 - self.details.doTraining = 0 + def test4_thresholding(self): + # """ include thresholding """ + self.details.tableName = 'ferro_quant' + self.details.threshold = 0.80 + self.details.doHoldout = 0 + self.details.doTraining = 0 - with open(os.path.join(self.baseDir, 'ferromag_quant_10.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 5 - self.assertEqual(len(compos), tgt) + with open(os.path.join(self.baseDir, 'ferromag_quant_10.pkl'), 'r') as pklTF: + buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') + pklTF.close() + with io.BytesIO(buf) as pklF: + compos = pickle.load(pklF) + tgt = 5 + self.assertEqual(len(compos), tgt) - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( - compos, self.details) - self.assertEqual(nGood, 91) - self.assertEqual(misCount, 1) - self.assertEqual(nSkipped, 3) - self.assertAlmostEqual(avgGood, 0.9956, 4) - self.assertAlmostEqual(avgBad, 1.000, 4) - self.assertAlmostEqual(avgSkip, 0.6000, 4) - self.assertEqual(tbl[0, 0], 54) - self.assertEqual(tbl[1, 1], 37) - self.assertEqual(tbl[0, 1], 1) - self.assertEqual(tbl[1, 0], 0) + nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( + compos, self.details) + self.assertEqual(nGood, 91) + self.assertEqual(misCount, 1) + self.assertEqual(nSkipped, 3) + self.assertAlmostEqual(avgGood, 0.9956, 4) + self.assertAlmostEqual(avgBad, 1.000, 4) + self.assertAlmostEqual(avgSkip, 0.6000, 4) + self.assertEqual(tbl[0, 0], 54) + self.assertEqual(tbl[1, 1], 37) + self.assertEqual(tbl[0, 1], 1) + self.assertEqual(tbl[1, 0], 0) - def test5_basics(self): - # """ basics """ - self.details.tableName = 'ferro_noquant' + def test5_basics(self): + # """ basics """ + self.details.tableName = 'ferro_noquant' - with open(os.path.join(self.baseDir, 'ferromag_auto_10_3.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 10 - self.assertEqual(len(compos), tgt) + with open(os.path.join(self.baseDir, 'ferromag_auto_10_3.pkl'), 'r') as pklTF: + buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') + pklTF.close() + with io.BytesIO(buf) as pklF: + compos = pickle.load(pklF) + tgt = 10 + self.assertEqual(len(compos), tgt) - tpl = ScreenComposite.ScreenFromDetails(compos, self.details) - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = tpl + tpl = ScreenComposite.ScreenFromDetails(compos, self.details) + nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = tpl - self.assertEqual(nGood, 95) - self.assertEqual(misCount, 8) - self.assertEqual(nSkipped, 0) - self.assertAlmostEqual(avgGood, .9684, 4) - self.assertAlmostEqual(avgBad, .8375, 4) - self.assertAlmostEqual(avgSkip, 0, 4) - self.assertEqual(tbl[0, 0], 50) - self.assertEqual(tbl[1, 1], 45) - self.assertEqual(tbl[0, 1], 5) - self.assertEqual(tbl[1, 0], 3) + self.assertEqual(nGood, 95) + self.assertEqual(misCount, 8) + self.assertEqual(nSkipped, 0) + self.assertAlmostEqual(avgGood, .9684, 4) + self.assertAlmostEqual(avgBad, .8375, 4) + self.assertAlmostEqual(avgSkip, 0, 4) + self.assertEqual(tbl[0, 0], 50) + self.assertEqual(tbl[1, 1], 45) + self.assertEqual(tbl[0, 1], 5) + self.assertEqual(tbl[1, 0], 3) - def test6_multiple_models(self): - # """ multiple models """ - self.details.tableName = 'ferro_noquant' - with open(os.path.join(self.baseDir, 'ferromag_auto_10_3.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 10 - self.assertEqual(len(compos), tgt) - composites = [compos, compos] - tpl = ScreenComposite.ScreenFromDetails(composites, self.details) - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = tpl - self.assertEqual(nGood[0], 95) - self.assertEqual(misCount[0], 8) - self.assertEqual(nSkipped[0], 0) - self.assertAlmostEqual(avgGood[0], .9684, 4) - self.assertAlmostEqual(avgBad[0], .8375, 4) - self.assertAlmostEqual(avgSkip[0], 0.0, 4) - self.assertEqual(nGood[1], 0) - self.assertEqual(misCount[1], 0) - self.assertEqual(nSkipped[1], 0) - self.assertEqual(avgGood[1], 0) - self.assertEqual(avgBad[1], 0) - self.assertEqual(avgSkip[1], 0) - self.assertEqual(tbl[0, 0], 50) - self.assertEqual(tbl[1, 1], 45) - self.assertEqual(tbl[0, 1], 5) - self.assertEqual(tbl[1, 0], 3) + def test6_multiple_models(self): + # """ multiple models """ + self.details.tableName = 'ferro_noquant' + with open(os.path.join(self.baseDir, 'ferromag_auto_10_3.pkl'), 'r') as pklTF: + buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') + pklTF.close() + with io.BytesIO(buf) as pklF: + compos = pickle.load(pklF) + tgt = 10 + self.assertEqual(len(compos), tgt) + composites = [compos, compos] + tpl = ScreenComposite.ScreenFromDetails(composites, self.details) + nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = tpl + self.assertEqual(nGood[0], 95) + self.assertEqual(misCount[0], 8) + self.assertEqual(nSkipped[0], 0) + self.assertAlmostEqual(avgGood[0], .9684, 4) + self.assertAlmostEqual(avgBad[0], .8375, 4) + self.assertAlmostEqual(avgSkip[0], 0.0, 4) + self.assertEqual(nGood[1], 0) + self.assertEqual(misCount[1], 0) + self.assertEqual(nSkipped[1], 0) + self.assertEqual(avgGood[1], 0) + self.assertEqual(avgBad[1], 0) + self.assertEqual(avgSkip[1], 0) + self.assertEqual(tbl[0, 0], 50) + self.assertEqual(tbl[1, 1], 45) + self.assertEqual(tbl[0, 1], 5) + self.assertEqual(tbl[1, 0], 3) - def test7_shuffle(self): - # """ shuffle """ - self.details.tableName = 'ferro_noquant' - with open(os.path.join(self.baseDir, 'ferromag_shuffle_10_3.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 10 - self.assertEqual(len(compos), tgt) - self.details.shuffleActivities = 1 - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( - compos, self.details) - self.assertEqual(nGood, 50) - self.assertEqual(misCount, 53) - self.assertEqual(nSkipped, 0) - self.assertAlmostEqual(avgGood, .7380, 4) - self.assertAlmostEqual(avgBad, .7660, 4) - self.assertAlmostEqual(avgSkip, 0, 4) - self.assertEqual(tbl[0, 0], 30) - self.assertEqual(tbl[1, 1], 20) - self.assertEqual(tbl[0, 1], 25) - self.assertEqual(tbl[1, 0], 28) + def test7_shuffle(self): + # """ shuffle """ + self.details.tableName = 'ferro_noquant' + with open(os.path.join(self.baseDir, 'ferromag_shuffle_10_3.pkl'), 'r') as pklTF: + buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') + pklTF.close() + with io.BytesIO(buf) as pklF: + compos = pickle.load(pklF) + tgt = 10 + self.assertEqual(len(compos), tgt) + self.details.shuffleActivities = 1 + nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( + compos, self.details) + self.assertEqual(nGood, 50) + self.assertEqual(misCount, 53) + self.assertEqual(nSkipped, 0) + self.assertAlmostEqual(avgGood, .7380, 4) + self.assertAlmostEqual(avgBad, .7660, 4) + self.assertAlmostEqual(avgSkip, 0, 4) + self.assertEqual(tbl[0, 0], 30) + self.assertEqual(tbl[1, 1], 20) + self.assertEqual(tbl[0, 1], 25) + self.assertEqual(tbl[1, 0], 28) - def test8_shuffle_segmentation(self): - # """ shuffle with segmentation """ - self.details.tableName = 'ferro_noquant' - with open(os.path.join(self.baseDir, 'ferromag_shuffle_10_3.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 10 - self.assertEqual(len(compos), tgt) - self.details.shuffleActivities = 1 - self.details.doHoldout = 1 - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( - compos, self.details) - self.assertEqual(nGood, 19) - self.assertEqual(misCount, 12) - self.assertEqual(nSkipped, 0) - self.assertAlmostEqual(avgGood, .7737, 4) - self.assertAlmostEqual(avgBad, .7500, 4) - self.assertAlmostEqual(avgSkip, 0, 4) - self.assertEqual(tbl[0, 0], 12) - self.assertEqual(tbl[1, 1], 7) - self.assertEqual(tbl[0, 1], 6) - self.assertEqual(tbl[1, 0], 6) + def test8_shuffle_segmentation(self): + # """ shuffle with segmentation """ + self.details.tableName = 'ferro_noquant' + with open(os.path.join(self.baseDir, 'ferromag_shuffle_10_3.pkl'), 'r') as pklTF: + buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') + pklTF.close() + with io.BytesIO(buf) as pklF: + compos = pickle.load(pklF) + tgt = 10 + self.assertEqual(len(compos), tgt) + self.details.shuffleActivities = 1 + self.details.doHoldout = 1 + nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( + compos, self.details) + self.assertEqual(nGood, 19) + self.assertEqual(misCount, 12) + self.assertEqual(nSkipped, 0) + self.assertAlmostEqual(avgGood, .7737, 4) + self.assertAlmostEqual(avgBad, .7500, 4) + self.assertAlmostEqual(avgSkip, 0, 4) + self.assertEqual(tbl[0, 0], 12) + self.assertEqual(tbl[1, 1], 7) + self.assertEqual(tbl[0, 1], 6) + self.assertEqual(tbl[1, 0], 6) - def test9_shuffle_segmentation2(self): - # """ shuffle with segmentation2 """ - self.details.tableName = 'ferro_noquant' - with open(os.path.join(self.baseDir, 'ferromag_shuffle_10_3.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 10 - self.assertEqual(len(compos), tgt) - self.details.shuffleActivities = 1 - self.details.doTraining = 1 - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( - compos, self.details) - self.assertEqual(nGood, 31) - self.assertEqual(misCount, 41) - self.assertEqual(nSkipped, 0) - self.assertAlmostEqual(avgGood, .7161, 4) - self.assertAlmostEqual(avgBad, .7707, 4) - self.assertAlmostEqual(avgSkip, 0.0, 4) - self.assertEqual(tbl[0, 0], 18) - self.assertEqual(tbl[1, 1], 13) - self.assertEqual(tbl[0, 1], 19) - self.assertEqual(tbl[1, 0], 22) + def test9_shuffle_segmentation2(self): + # """ shuffle with segmentation2 """ + self.details.tableName = 'ferro_noquant' + with open(os.path.join(self.baseDir, 'ferromag_shuffle_10_3.pkl'), 'r') as pklTF: + buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') + pklTF.close() + with io.BytesIO(buf) as pklF: + compos = pickle.load(pklF) + tgt = 10 + self.assertEqual(len(compos), tgt) + self.details.shuffleActivities = 1 + self.details.doTraining = 1 + nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( + compos, self.details) + self.assertEqual(nGood, 31) + self.assertEqual(misCount, 41) + self.assertEqual(nSkipped, 0) + self.assertAlmostEqual(avgGood, .7161, 4) + self.assertAlmostEqual(avgBad, .7707, 4) + self.assertAlmostEqual(avgSkip, 0.0, 4) + self.assertEqual(tbl[0, 0], 18) + self.assertEqual(tbl[1, 1], 13) + self.assertEqual(tbl[0, 1], 19) + self.assertEqual(tbl[1, 0], 22) - def test10_filtering(self): - # """ filtering """ - self.details.tableName = 'ferro_noquant' - with open(os.path.join(self.baseDir, 'ferromag_filt_10_3.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 10 - self.assertEqual(len(compos), tgt) - self.details.filterVal = 1 - self.details.filterFrac = .33 + def test10_filtering(self): + # """ filtering """ + self.details.tableName = 'ferro_noquant' + with open(os.path.join(self.baseDir, 'ferromag_filt_10_3.pkl'), 'r') as pklTF: + buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') + pklTF.close() + with io.BytesIO(buf) as pklF: + compos = pickle.load(pklF) + tgt = 10 + self.assertEqual(len(compos), tgt) + self.details.filterVal = 1 + self.details.filterFrac = .33 - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( - compos, self.details) - self.assertEqual(nGood, 90) - self.assertEqual(misCount, 13) - self.assertEqual(nSkipped, 0) - self.assertAlmostEqual(avgGood, .9578, 4) - self.assertAlmostEqual(avgBad, .8538, 4) - self.assertAlmostEqual(avgSkip, 0, 4) - self.assertEqual(tbl[0, 0], 54) - self.assertEqual(tbl[1, 1], 36) - self.assertEqual(tbl[0, 1], 1) - self.assertEqual(tbl[1, 0], 12) + nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( + compos, self.details) + self.assertEqual(nGood, 90) + self.assertEqual(misCount, 13) + self.assertEqual(nSkipped, 0) + self.assertAlmostEqual(avgGood, .9578, 4) + self.assertAlmostEqual(avgBad, .8538, 4) + self.assertAlmostEqual(avgSkip, 0, 4) + self.assertEqual(tbl[0, 0], 54) + self.assertEqual(tbl[1, 1], 36) + self.assertEqual(tbl[0, 1], 1) + self.assertEqual(tbl[1, 0], 12) - def test11_filtering_segmentation(self): - # """ filtering with segmentation """ - self.details.tableName = 'ferro_noquant' - with open(os.path.join(self.baseDir, 'ferromag_filt_10_3.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 10 - self.assertEqual(len(compos), tgt) - self.details.doHoldout = 1 - self.details.filterVal = 1 - self.details.filterFrac = .33 + def test11_filtering_segmentation(self): + # """ filtering with segmentation """ + self.details.tableName = 'ferro_noquant' + with open(os.path.join(self.baseDir, 'ferromag_filt_10_3.pkl'), 'r') as pklTF: + buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') + pklTF.close() + with io.BytesIO(buf) as pklF: + compos = pickle.load(pklF) + tgt = 10 + self.assertEqual(len(compos), tgt) + self.details.doHoldout = 1 + self.details.filterVal = 1 + self.details.filterFrac = .33 - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( - compos, self.details) + nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( + compos, self.details) - self.assertEqual(nGood, 37) - self.assertEqual(misCount, 6) - self.assertEqual(nSkipped, 0) - self.assertAlmostEqual(avgGood, .95946, 4) - self.assertAlmostEqual(avgBad, .85, 4) - self.assertAlmostEqual(avgSkip, 0, 4) - self.assertEqual(tbl[0, 0], 14) - self.assertEqual(tbl[1, 1], 23) - self.assertEqual(tbl[0, 1], 1) - self.assertEqual(tbl[1, 0], 5) + self.assertEqual(nGood, 37) + self.assertEqual(misCount, 6) + self.assertEqual(nSkipped, 0) + self.assertAlmostEqual(avgGood, .95946, 4) + self.assertAlmostEqual(avgBad, .85, 4) + self.assertAlmostEqual(avgSkip, 0, 4) + self.assertEqual(tbl[0, 0], 14) + self.assertEqual(tbl[1, 1], 23) + self.assertEqual(tbl[0, 1], 1) + self.assertEqual(tbl[1, 0], 5) - def test12_naiveBayes_composite(self): - # """ test the naive bayes composite""" - self.details.tableName = 'ferro_noquant' - with open(os.path.join(self.baseDir, 'ferromag_NaiveBayes.pkl'), 'r') as pklTF: - buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') - pklTF.close() - with io.BytesIO(buf) as pklF: - compos = pickle.load(pklF) - tgt = 10 - self.assertEqual(len(compos), tgt) - self.details.doHoldout = 1 - nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( - compos, self.details) - self.assertEqual(nGood, 25) - self.assertEqual(misCount, 6) - self.assertEqual(nSkipped, 0) - self.assertAlmostEqual(avgGood, 0.9800, 4) - self.assertAlmostEqual(avgBad, 0.86667, 4) - self.assertAlmostEqual(avgSkip, 0, 4) - self.assertEqual(tbl[0, 0], 9) - self.assertEqual(tbl[0, 1], 6) - self.assertEqual(tbl[1, 0], 0) - self.assertEqual(tbl[1, 1], 16) + def test12_naiveBayes_composite(self): + # """ test the naive bayes composite""" + self.details.tableName = 'ferro_noquant' + with open(os.path.join(self.baseDir, 'ferromag_NaiveBayes.pkl'), 'r') as pklTF: + buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') + pklTF.close() + with io.BytesIO(buf) as pklF: + compos = pickle.load(pklF) + tgt = 10 + self.assertEqual(len(compos), tgt) + self.details.doHoldout = 1 + nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( + compos, self.details) + self.assertEqual(nGood, 25) + self.assertEqual(misCount, 6) + self.assertEqual(nSkipped, 0) + self.assertAlmostEqual(avgGood, 0.9800, 4) + self.assertAlmostEqual(avgBad, 0.86667, 4) + self.assertAlmostEqual(avgSkip, 0, 4) + self.assertEqual(tbl[0, 0], 9) + self.assertEqual(tbl[0, 1], 6) + self.assertEqual(tbl[1, 0], 0) + self.assertEqual(tbl[1, 1], 16) if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/ML/files.py b/rdkit/ML/files.py index 805eafb34..1dd86248e 100755 --- a/rdkit/ML/files.py +++ b/rdkit/ML/files.py @@ -2,7 +2,7 @@ """ Generic file manipulation stuff """ -from __future__ import print_function + import re diff --git a/rdkit/SimDivFilters/SimilarityPickers.py b/rdkit/SimDivFilters/SimilarityPickers.py index d08c65d3e..3ae936cdf 100755 --- a/rdkit/SimDivFilters/SimilarityPickers.py +++ b/rdkit/SimDivFilters/SimilarityPickers.py @@ -3,7 +3,7 @@ # Copyright (C) 2003-2008 Greg Landrum and Rational Discovery LLC # All Rights Reserved # -from __future__ import print_function + import bisect diff --git a/rdkit/SimDivFilters/UnitTestSimilarityPickers.py b/rdkit/SimDivFilters/UnitTestSimilarityPickers.py index 2e581ddf8..f278e3491 100755 --- a/rdkit/SimDivFilters/UnitTestSimilarityPickers.py +++ b/rdkit/SimDivFilters/UnitTestSimilarityPickers.py @@ -7,7 +7,7 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -from __future__ import print_function + import doctest import unittest diff --git a/rdkit/SimDivFilters/test_data/genfps.py b/rdkit/SimDivFilters/test_data/genfps.py index 835bacb90..36eabcefc 100755 --- a/rdkit/SimDivFilters/test_data/genfps.py +++ b/rdkit/SimDivFilters/test_data/genfps.py @@ -1,16 +1,15 @@ -from __future__ import print_function + from rdkit import Chem from rdkit import RDConfig from rdkit.Dbase import DbModule from rdkit.Dbase.DbConnection import DbConnect -from six.moves import cPickle - +import pickle if RDConfig.usePgSQL: - dbName = "::RDTests" + dbName = "::RDTests" else: - dbName = "data.sqlt" + dbName = "data.sqlt" molTblName = 'simple_mols1' fpTblName = 'simple_mols1_fp' @@ -18,9 +17,9 @@ conn = DbConnect(dbName, molTblName) conn.AddTable(fpTblName, 'id varchar(10),autofragmentfp %s' % DbModule.binaryTypeName) d = conn.GetData() for smi, ID in d: - print(repr(ID), repr(smi)) - mol = Chem.MolFromSmiles(smi) - fp = Chem.RDKFingerprint(mol) - pkl = cPickle.dumps(fp) - conn.InsertData(fpTblName, (ID, DbModule.binaryHolder(pkl))) + print(repr(ID), repr(smi)) + mol = Chem.MolFromSmiles(smi) + fp = Chem.RDKFingerprint(mol) + pkl = pickle.dumps(fp) + conn.InsertData(fpTblName, (ID, DbModule.binaryHolder(pkl))) conn.Commit() diff --git a/rdkit/TestRunner.py b/rdkit/TestRunner.py index 168487e92..d28c0477f 100755 --- a/rdkit/TestRunner.py +++ b/rdkit/TestRunner.py @@ -7,7 +7,7 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -from __future__ import print_function + from rdkit import RDConfig import os, sys, time try: diff --git a/rdkit/VLib/Filter.py b/rdkit/VLib/Filter.py index 08a27a226..54ca4ab62 100755 --- a/rdkit/VLib/Filter.py +++ b/rdkit/VLib/Filter.py @@ -4,103 +4,101 @@ # All Rights Reserved # -from rdkit import six from rdkit.VLib.Node import VLibNode class FilterNode(VLibNode): - """ base class for nodes which filter their input + """ base class for nodes which filter their input - Assumptions: + Assumptions: - - filter function takes a number of arguments equal to the - number of inputs we have. It returns a bool + - filter function takes a number of arguments equal to the + number of inputs we have. It returns a bool - - inputs (parents) can be stepped through in lockstep + - inputs (parents) can be stepped through in lockstep - - we return a tuple if there's more than one input + - we return a tuple if there's more than one input - Usage Example: - >>> from rdkit.VLib.Supply import SupplyNode - >>> def func(a,b): - ... return a+b < 5 - >>> filt = FilterNode(func=func) - >>> suppl1 = SupplyNode(contents=[1,2,3,3]) - >>> suppl2 = SupplyNode(contents=[1,2,3,1]) - >>> filt.AddParent(suppl1) - >>> filt.AddParent(suppl2) - >>> v = [x for x in filt] - >>> v - [(1, 1), (2, 2), (3, 1)] - >>> filt.reset() - >>> v = [x for x in filt] - >>> v - [(1, 1), (2, 2), (3, 1)] - >>> filt.Destroy() + Usage Example: + >>> from rdkit.VLib.Supply import SupplyNode + >>> def func(a,b): + ... return a+b < 5 + >>> filt = FilterNode(func=func) + >>> suppl1 = SupplyNode(contents=[1,2,3,3]) + >>> suppl2 = SupplyNode(contents=[1,2,3,1]) + >>> filt.AddParent(suppl1) + >>> filt.AddParent(suppl2) + >>> v = [x for x in filt] + >>> v + [(1, 1), (2, 2), (3, 1)] + >>> filt.reset() + >>> v = [x for x in filt] + >>> v + [(1, 1), (2, 2), (3, 1)] + >>> filt.Destroy() - Negation is also possible: - >>> filt = FilterNode(func=func,negate=1) - >>> suppl1 = SupplyNode(contents=[1,2,3,3]) - >>> suppl2 = SupplyNode(contents=[1,2,3,1]) - >>> filt.AddParent(suppl1) - >>> filt.AddParent(suppl2) - >>> v = [x for x in filt] - >>> v - [(3, 3)] - >>> filt.Destroy() + Negation is also possible: + >>> filt = FilterNode(func=func,negate=1) + >>> suppl1 = SupplyNode(contents=[1,2,3,3]) + >>> suppl2 = SupplyNode(contents=[1,2,3,1]) + >>> filt.AddParent(suppl1) + >>> filt.AddParent(suppl2) + >>> v = [x for x in filt] + >>> v + [(3, 3)] + >>> filt.Destroy() - With no function, just return the inputs: - >>> filt = FilterNode() - >>> suppl1 = SupplyNode(contents=[1,2,3,3]) - >>> filt.AddParent(suppl1) - >>> v = [x for x in filt] - >>> v - [1, 2, 3, 3] - >>> filt.Destroy() + With no function, just return the inputs: + >>> filt = FilterNode() + >>> suppl1 = SupplyNode(contents=[1,2,3,3]) + >>> filt.AddParent(suppl1) + >>> v = [x for x in filt] + >>> v + [1, 2, 3, 3] + >>> filt.Destroy() - """ + """ - def __init__(self, func=None, negate=0, **kwargs): - VLibNode.__init__(self, **kwargs) - self._func = func - self._negate = negate + def __init__(self, func=None, negate=0, **kwargs): + VLibNode.__init__(self, **kwargs) + self._func = func + self._negate = negate - def SetNegate(self, state): - self._negate = state + def SetNegate(self, state): + self._negate = state - def Negate(self): - return self._negate + def Negate(self): + return self._negate - def next(self): - parents = self.GetParents() - while 1: - args = [] - try: - for parent in parents: - args.append(next(parent)) - except StopIteration: - raise StopIteration - args = tuple(args) - if self._func is not None: - r = self._func(*args) - if self._negate: - r = not r - # sys.stderr.write('\t\tNEGATE -> %d\n'%(r)) - if r: - res = args - break - else: - res = args - break - if len(parents) == 1: - res = res[0] - return res + def next(self): + parents = self.GetParents() + while 1: + args = [] + try: + for parent in parents: + args.append(next(parent)) + except StopIteration: + raise StopIteration + args = tuple(args) + if self._func is not None: + r = self._func(*args) + if self._negate: + r = not r + # sys.stderr.write('\t\tNEGATE -> %d\n'%(r)) + if r: + res = args + break + else: + res = args + break + if len(parents) == 1: + res = res[0] + return res -if six.PY3: - FilterNode.__next__ = FilterNode.next +FilterNode.__next__ = FilterNode.next # ------------------------------------ @@ -108,11 +106,11 @@ if six.PY3: # doctest boilerplate # def _runDoctests(verbose=None): # pragma: nocover - import sys - import doctest - failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) - sys.exit(failed) + import sys + import doctest + failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) + sys.exit(failed) if __name__ == '__main__': # pragma: nocover - _runDoctests() + _runDoctests() diff --git a/rdkit/VLib/Node.py b/rdkit/VLib/Node.py index 26f043bdd..e568c5532 100755 --- a/rdkit/VLib/Node.py +++ b/rdkit/VLib/Node.py @@ -5,189 +5,186 @@ # import sys -from rdkit import six - class VLibNode(object): - """ base class for all virtual library nodes, - defines minimal required interface - - """ - - def __init__(self, *args, **kwargs): - self._children = [] - self._parents = [] - - # ------------------------------------ - # - # Iteration - # - def __iter__(self): - """ part of the iterator interface """ - self.reset() - return self - - def next(self): - """ part of the iterator interface - - raises StopIteration on failure - """ - pass - - def reset(self): - """ resets our iteration state + """ base class for all virtual library nodes, + defines minimal required interface """ - for parent in self.GetParents(): - parent.reset() - # ------------------------------------ - # - # Library graph operations - # Probably most of these won't need to be reimplemented in - # child classes - # - def AddChild(self, child, notify=1): - """ + def __init__(self, *args, **kwargs): + self._children = [] + self._parents = [] - >>> p1 = VLibNode() - >>> p2 = VLibNode() - >>> c1 = VLibNode() - >>> p1.AddChild(c1) - >>> len(c1.GetParents()) - 1 - >>> len(p1.GetChildren()) - 1 - >>> p2.AddChild(c1,notify=0) - >>> len(c1.GetParents()) - 1 - >>> len(p2.GetChildren()) - 1 - >>> c1.AddParent(p2,notify=0) - >>> len(c1.GetParents()) - 2 - >>> len(p2.GetChildren()) - 1 + # ------------------------------------ + # + # Iteration + # + def __iter__(self): + """ part of the iterator interface """ + self.reset() + return self - """ - self._children.append(child) - if notify: - child.AddParent(self, notify=0) + def next(self): + """ part of the iterator interface - def RemoveChild(self, child, notify=1): - """ - >>> p1 = VLibNode() - >>> c1 = VLibNode() - >>> p1.AddChild(c1) - >>> len(c1.GetParents()) - 1 - >>> len(p1.GetChildren()) - 1 - >>> p1.RemoveChild(c1) - >>> len(c1.GetParents()) - 0 - >>> len(p1.GetChildren()) - 0 - """ - self._children.remove(child) - if notify: - child.RemoveParent(self, notify=0) + raises StopIteration on failure + """ + pass - def GetChildren(self): - return tuple(self._children) + def reset(self): + """ resets our iteration state - def AddParent(self, parent, notify=True): - """ - >>> p1 = VLibNode() - >>> p2 = VLibNode() - >>> c1 = VLibNode() - >>> c1.AddParent(p1) - >>> len(c1.GetParents()) - 1 - >>> len(p1.GetChildren()) - 1 - >>> c1.AddParent(p2,notify=0) - >>> len(c1.GetParents()) - 2 - >>> len(p2.GetChildren()) - 0 - >>> p2.AddChild(c1,notify=0) - >>> len(c1.GetParents()) - 2 - >>> len(p2.GetChildren()) - 1 - """ - self._parents.append(parent) - if notify: - parent.AddChild(self, notify=False) + """ + for parent in self.GetParents(): + parent.reset() - def RemoveParent(self, parent, notify=True): - """ - >>> p1 = VLibNode() - >>> c1 = VLibNode() - >>> p1.AddChild(c1) - >>> len(c1.GetParents()) - 1 - >>> len(p1.GetChildren()) - 1 - >>> c1.RemoveParent(p1) - >>> len(c1.GetParents()) - 0 - >>> len(p1.GetChildren()) - 0 - """ - self._parents.remove(parent) - if notify: - parent.RemoveChild(self, notify=False) + # ------------------------------------ + # + # Library graph operations + # Probably most of these won't need to be reimplemented in + # child classes + # + def AddChild(self, child, notify=1): + """ - def GetParents(self): - return tuple(self._parents) + >>> p1 = VLibNode() + >>> p2 = VLibNode() + >>> c1 = VLibNode() + >>> p1.AddChild(c1) + >>> len(c1.GetParents()) + 1 + >>> len(p1.GetChildren()) + 1 + >>> p2.AddChild(c1,notify=0) + >>> len(c1.GetParents()) + 1 + >>> len(p2.GetChildren()) + 1 + >>> c1.AddParent(p2,notify=0) + >>> len(c1.GetParents()) + 2 + >>> len(p2.GetChildren()) + 1 - def Destroy(self, notify=True, propagateDown=False, propagateUp=False): - """ - >>> p1 = VLibNode() - >>> p2 = VLibNode() - >>> c1 = VLibNode() - >>> c2 = VLibNode() - >>> p1.AddChild(c1) - >>> p2.AddChild(c1) - >>> p2.AddChild(c2) - >>> len(c1.GetParents()) - 2 - >>> len(c2.GetParents()) - 1 - >>> len(p1.GetChildren()) - 1 - >>> len(p2.GetChildren()) - 2 - >>> c1.Destroy(propagateUp=True) - >>> len(p2.GetChildren()) - 0 - >>> len(c1.GetParents()) - 0 - >>> len(c2.GetParents()) - 0 + """ + self._children.append(child) + if notify: + child.AddParent(self, notify=0) - """ - if hasattr(self, '_destroyed'): - return - self._destroyed = True + def RemoveChild(self, child, notify=1): + """ + >>> p1 = VLibNode() + >>> c1 = VLibNode() + >>> p1.AddChild(c1) + >>> len(c1.GetParents()) + 1 + >>> len(p1.GetChildren()) + 1 + >>> p1.RemoveChild(c1) + >>> len(c1.GetParents()) + 0 + >>> len(p1.GetChildren()) + 0 + """ + self._children.remove(child) + if notify: + child.RemoveParent(self, notify=0) - if notify: - for o in self.GetChildren(): - o.RemoveParent(self, notify=False) - if propagateDown: - o.Destroy(notify=True, propagateDown=True, propagateUp=propagateUp) - for o in self.GetParents(): - o.RemoveChild(self, notify=False) - if propagateUp: - o.Destroy(notify=True, propagateDown=propagateDown, propagateUp=True) - self._children = [] - self._parents = [] + def GetChildren(self): + return tuple(self._children) + + def AddParent(self, parent, notify=True): + """ + >>> p1 = VLibNode() + >>> p2 = VLibNode() + >>> c1 = VLibNode() + >>> c1.AddParent(p1) + >>> len(c1.GetParents()) + 1 + >>> len(p1.GetChildren()) + 1 + >>> c1.AddParent(p2,notify=0) + >>> len(c1.GetParents()) + 2 + >>> len(p2.GetChildren()) + 0 + >>> p2.AddChild(c1,notify=0) + >>> len(c1.GetParents()) + 2 + >>> len(p2.GetChildren()) + 1 + """ + self._parents.append(parent) + if notify: + parent.AddChild(self, notify=False) + + def RemoveParent(self, parent, notify=True): + """ + >>> p1 = VLibNode() + >>> c1 = VLibNode() + >>> p1.AddChild(c1) + >>> len(c1.GetParents()) + 1 + >>> len(p1.GetChildren()) + 1 + >>> c1.RemoveParent(p1) + >>> len(c1.GetParents()) + 0 + >>> len(p1.GetChildren()) + 0 + """ + self._parents.remove(parent) + if notify: + parent.RemoveChild(self, notify=False) + + def GetParents(self): + return tuple(self._parents) + + def Destroy(self, notify=True, propagateDown=False, propagateUp=False): + """ + >>> p1 = VLibNode() + >>> p2 = VLibNode() + >>> c1 = VLibNode() + >>> c2 = VLibNode() + >>> p1.AddChild(c1) + >>> p2.AddChild(c1) + >>> p2.AddChild(c2) + >>> len(c1.GetParents()) + 2 + >>> len(c2.GetParents()) + 1 + >>> len(p1.GetChildren()) + 1 + >>> len(p2.GetChildren()) + 2 + >>> c1.Destroy(propagateUp=True) + >>> len(p2.GetChildren()) + 0 + >>> len(c1.GetParents()) + 0 + >>> len(c2.GetParents()) + 0 + + """ + if hasattr(self, '_destroyed'): + return + self._destroyed = True + + if notify: + for o in self.GetChildren(): + o.RemoveParent(self, notify=False) + if propagateDown: + o.Destroy(notify=True, propagateDown=True, propagateUp=propagateUp) + for o in self.GetParents(): + o.RemoveChild(self, notify=False) + if propagateUp: + o.Destroy(notify=True, propagateDown=propagateDown, propagateUp=True) + self._children = [] + self._parents = [] -if six.PY3: - VLibNode.__next__ = VLibNode.next +VLibNode.__next__ = VLibNode.next # ------------------------------------ @@ -195,10 +192,10 @@ if six.PY3: # doctest boilerplate # def _runDoctests(verbose=None): # pragma: nocover - import doctest - failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) - sys.exit(failed) + import doctest + failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) + sys.exit(failed) if __name__ == '__main__': # pragma: nocover - _runDoctests() + _runDoctests() diff --git a/rdkit/VLib/NodeLib/DbPickleSupplier.py b/rdkit/VLib/NodeLib/DbPickleSupplier.py index dd55e2d5d..5f139123e 100644 --- a/rdkit/VLib/NodeLib/DbPickleSupplier.py +++ b/rdkit/VLib/NodeLib/DbPickleSupplier.py @@ -3,11 +3,11 @@ # Copyright (C) 2004 Rational Discovery LLC # All Rights Reserved # -from __future__ import print_function + from rdkit import RDConfig import sys, os.path from rdkit.VLib.Supply import SupplyNode -from rdkit.six.moves import cPickle +import pickle if RDConfig.usePgSQL: from pyPgSQL import PgSQL as sql @@ -81,7 +81,7 @@ if RDConfig.usePgSQL: self._first = 0 if self._depickle: if not self._klass: - fp = cPickle.loads(val) + fp = pickle.loads(val) else: fp = self._klass(val) fields = list(t) @@ -149,7 +149,7 @@ if RDConfig.usePgSQL: t = [self.res.getvalue(self.idx - 1, x) for x in range(self.res.nfields)] if self._depickle: try: - fp = cPickle.loads(val) + fp = pickle.loads(val) except Exception: import logging del t[self._pickleCol] diff --git a/rdkit/VLib/NodeLib/SDSupply.py b/rdkit/VLib/NodeLib/SDSupply.py index 8bf1d0756..811b7654d 100755 --- a/rdkit/VLib/NodeLib/SDSupply.py +++ b/rdkit/VLib/NodeLib/SDSupply.py @@ -4,52 +4,50 @@ # All Rights Reserved # from rdkit import Chem -from rdkit import six from rdkit.VLib.Supply import SupplyNode class SDSupplyNode(SupplyNode): - """ SD supplier + """ SD supplier - Sample Usage: - >>> import os - >>> from rdkit import RDConfig - >>> fileN = os.path.join(RDConfig.RDCodeDir,'VLib','NodeLib',\ - 'test_data','NCI_aids.10.sdf') - >>> suppl = SDSupplyNode(fileN) - >>> ms = [x for x in suppl] - >>> len(ms) - 10 - >>> ms[0].GetProp("_Name") - '48' - >>> ms[1].GetProp("_Name") - '78' - >>> suppl.reset() - >>> suppl.next().GetProp("_Name") - '48' - >>> suppl.next().GetProp("_Name") - '78' - - """ - - def __init__(self, fileName, **kwargs): - SupplyNode.__init__(self, **kwargs) - self._fileName = fileName - self._supplier = Chem.SDMolSupplier(self._fileName) - - def reset(self): - SupplyNode.reset(self) - self._supplier.reset() - - def next(self): - """ + Sample Usage: + >>> import os + >>> from rdkit import RDConfig + >>> fileN = os.path.join(RDConfig.RDCodeDir,'VLib','NodeLib',\ + 'test_data','NCI_aids.10.sdf') + >>> suppl = SDSupplyNode(fileN) + >>> ms = [x for x in suppl] + >>> len(ms) + 10 + >>> ms[0].GetProp("_Name") + '48' + >>> ms[1].GetProp("_Name") + '78' + >>> suppl.reset() + >>> suppl.next().GetProp("_Name") + '48' + >>> suppl.next().GetProp("_Name") + '78' """ - return next(self._supplier) + + def __init__(self, fileName, **kwargs): + SupplyNode.__init__(self, **kwargs) + self._fileName = fileName + self._supplier = Chem.SDMolSupplier(self._fileName) + + def reset(self): + SupplyNode.reset(self) + self._supplier.reset() + + def next(self): + """ + + """ + return next(self._supplier) -if six.PY3: - SDSupplyNode.__next__ = SDSupplyNode.next +SDSupplyNode.__next__ = SDSupplyNode.next # ------------------------------------ @@ -57,11 +55,11 @@ if six.PY3: # doctest boilerplate # def _runDoctests(verbose=None): # pragma: nocover - import sys - import doctest - failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) - sys.exit(failed) + import sys + import doctest + failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) + sys.exit(failed) if __name__ == '__main__': # pragma: nocover - _runDoctests() + _runDoctests() diff --git a/rdkit/VLib/NodeLib/SmilesOutput.py b/rdkit/VLib/NodeLib/SmilesOutput.py index c8b9a6b3f..680c85807 100755 --- a/rdkit/VLib/NodeLib/SmilesOutput.py +++ b/rdkit/VLib/NodeLib/SmilesOutput.py @@ -8,63 +8,63 @@ from rdkit.VLib.Output import OutputNode as BaseOutputNode class OutputNode(BaseOutputNode): - """ dumps smiles output + """ dumps smiles output - Assumptions: + Assumptions: - - destination supports a write() method + - destination supports a write() method - - inputs (parents) can be stepped through in lockstep + - inputs (parents) can be stepped through in lockstep - Usage Example: - >>> smis = ['C1CCC1','C1CC1','C=O','NCC'] - >>> mols = [Chem.MolFromSmiles(x) for x in smis] - >>> from rdkit.VLib.Supply import SupplyNode - >>> suppl = SupplyNode(contents=mols) - >>> from rdkit.six import StringIO - >>> sio = StringIO() - >>> node = OutputNode(dest=sio,delim=', ') - >>> node.AddParent(suppl) - >>> ms = [x for x in node] - >>> len(ms) - 4 - >>> txt = sio.getvalue() - >>> repr(txt) - "'1, C1CCC1\\\\n2, C1CC1\\\\n3, C=O\\\\n4, CCN\\\\n'" + Usage Example: + >>> smis = ['C1CCC1','C1CC1','C=O','NCC'] + >>> mols = [Chem.MolFromSmiles(x) for x in smis] + >>> from rdkit.VLib.Supply import SupplyNode + >>> suppl = SupplyNode(contents=mols) + >>> from io import StringIO + >>> sio = StringIO() + >>> node = OutputNode(dest=sio,delim=', ') + >>> node.AddParent(suppl) + >>> ms = [x for x in node] + >>> len(ms) + 4 + >>> txt = sio.getvalue() + >>> repr(txt) + "'1, C1CCC1\\\\n2, C1CC1\\\\n3, C=O\\\\n4, CCN\\\\n'" - """ + """ - def __init__(self, dest=None, delim='\t', idField=None, **kwargs): - BaseOutputNode.__init__(self, dest=dest, strFunc=self.smilesOut) - self._dest = dest - self._idField = idField - self._delim = delim - self._nDumped = 0 + def __init__(self, dest=None, delim='\t', idField=None, **kwargs): + BaseOutputNode.__init__(self, dest=dest, strFunc=self.smilesOut) + self._dest = dest + self._idField = idField + self._delim = delim + self._nDumped = 0 - def reset(self): - BaseOutputNode.reset(self) - self._nDumped = 0 + def reset(self): + BaseOutputNode.reset(self) + self._nDumped = 0 - def smilesOut(self, mol): - self._nDumped += 1 - if isinstance(mol, (tuple, list)): - args = mol - mol = args[0] - if len(args) > 1: - args = list(args[1:]) - else: - args = [] - else: - args = [] + def smilesOut(self, mol): + self._nDumped += 1 + if isinstance(mol, (tuple, list)): + args = mol + mol = args[0] + if len(args) > 1: + args = list(args[1:]) + else: + args = [] + else: + args = [] - if self._idField and mol.HasProp(self._idField): - label = mol.GetProp(self._idField) - else: - label = str(self._nDumped) - smi = Chem.MolToSmiles(mol) - outp = [label, smi] + args - return '%s\n' % (self._delim.join(outp)) + if self._idField and mol.HasProp(self._idField): + label = mol.GetProp(self._idField) + else: + label = str(self._nDumped) + smi = Chem.MolToSmiles(mol) + outp = [label, smi] + args + return '%s\n' % (self._delim.join(outp)) # ------------------------------------ @@ -72,11 +72,11 @@ class OutputNode(BaseOutputNode): # doctest boilerplate # def _runDoctests(verbose=None): # pragma: nocover - import doctest - import sys - failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) - sys.exit(failed) + import doctest + import sys + failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) + sys.exit(failed) if __name__ == '__main__': # pragma: nocover - _runDoctests() + _runDoctests() diff --git a/rdkit/VLib/NodeLib/SmilesSupply.py b/rdkit/VLib/NodeLib/SmilesSupply.py index 795078f58..70b5218b0 100755 --- a/rdkit/VLib/NodeLib/SmilesSupply.py +++ b/rdkit/VLib/NodeLib/SmilesSupply.py @@ -4,62 +4,60 @@ # All Rights Reserved # from rdkit import Chem -from rdkit import six from rdkit.VLib.Supply import SupplyNode class SmilesSupplyNode(SupplyNode): - """ Smiles supplier + """ Smiles supplier - Sample Usage: - >>> import os - >>> from rdkit import RDConfig - >>> fileN = os.path.join(RDConfig.RDCodeDir,'VLib','NodeLib',\ - 'test_data','pgp_20.txt') - >>> suppl = SmilesSupplyNode(fileN,delim="\\t",smilesColumn=2,nameColumn=1,titleLine=1) - >>> ms = [x for x in suppl] - >>> len(ms) - 20 - >>> ms[0].GetProp("_Name") - 'ALDOSTERONE' - >>> ms[0].GetProp("ID") - 'RD-PGP-0001' - >>> ms[1].GetProp("_Name") - 'AMIODARONE' - >>> ms[3].GetProp("ID") - 'RD-PGP-0004' - >>> suppl.reset() - >>> suppl.next().GetProp("_Name") - 'ALDOSTERONE' - >>> suppl.next().GetProp("_Name") - 'AMIODARONE' - >>> suppl.reset() - - """ - - def __init__(self, fileName, delim="\t", nameColumn=1, smilesColumn=0, titleLine=0, **kwargs): - SupplyNode.__init__(self, **kwargs) - self._fileName = fileName - self._supplier = Chem.SmilesMolSupplier(self._fileName, delimiter=delim, - smilesColumn=smilesColumn, nameColumn=nameColumn, - titleLine=titleLine) - - def reset(self): - SupplyNode.reset(self) - self._supplier.reset() - - def next(self): - """ + Sample Usage: + >>> import os + >>> from rdkit import RDConfig + >>> fileN = os.path.join(RDConfig.RDCodeDir,'VLib','NodeLib',\ + 'test_data','pgp_20.txt') + >>> suppl = SmilesSupplyNode(fileN,delim="\\t",smilesColumn=2,nameColumn=1,titleLine=1) + >>> ms = [x for x in suppl] + >>> len(ms) + 20 + >>> ms[0].GetProp("_Name") + 'ALDOSTERONE' + >>> ms[0].GetProp("ID") + 'RD-PGP-0001' + >>> ms[1].GetProp("_Name") + 'AMIODARONE' + >>> ms[3].GetProp("ID") + 'RD-PGP-0004' + >>> suppl.reset() + >>> suppl.next().GetProp("_Name") + 'ALDOSTERONE' + >>> suppl.next().GetProp("_Name") + 'AMIODARONE' + >>> suppl.reset() """ - r = None - while not r: - r = next(self._supplier) - return r + + def __init__(self, fileName, delim="\t", nameColumn=1, smilesColumn=0, titleLine=0, **kwargs): + SupplyNode.__init__(self, **kwargs) + self._fileName = fileName + self._supplier = Chem.SmilesMolSupplier(self._fileName, delimiter=delim, + smilesColumn=smilesColumn, nameColumn=nameColumn, + titleLine=titleLine) + + def reset(self): + SupplyNode.reset(self) + self._supplier.reset() + + def next(self): + """ + + """ + r = None + while not r: + r = next(self._supplier) + return r -if six.PY3: - SmilesSupplyNode.__next__ = SmilesSupplyNode.next +SmilesSupplyNode.__next__ = SmilesSupplyNode.next # ------------------------------------ @@ -67,11 +65,11 @@ if six.PY3: # doctest boilerplate # def _runDoctests(verbose=None): # pragma: nocover - import doctest - import sys - failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) - sys.exit(failed) + import doctest + import sys + failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) + sys.exit(failed) if __name__ == '__main__': # pragma: nocover - _runDoctests() + _runDoctests() diff --git a/rdkit/VLib/NodeLib/UnitTestNodeLib.py b/rdkit/VLib/NodeLib/UnitTestNodeLib.py index 7619bf7ef..4b5c6f6ba 100755 --- a/rdkit/VLib/NodeLib/UnitTestNodeLib.py +++ b/rdkit/VLib/NodeLib/UnitTestNodeLib.py @@ -8,7 +8,7 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -from __future__ import print_function + import doctest import unittest @@ -17,85 +17,86 @@ from rdkit import Chem, RDLogger from rdkit.VLib.NodeLib import SDSupply, SmartsMolFilter, SmartsRemover from rdkit.VLib.NodeLib import SmilesDupeFilter, SmilesOutput, SmilesSupply from rdkit.VLib.Supply import SupplyNode -from rdkit.six import StringIO +from io import StringIO def load_tests(loader, tests, ignore): - """ Add the Doctests from the module """ - tests.addTests(doctest.DocTestSuite(SDSupply, optionflags=doctest.ELLIPSIS)) - tests.addTests(doctest.DocTestSuite(SmartsMolFilter, optionflags=doctest.ELLIPSIS)) - tests.addTests(doctest.DocTestSuite(SmartsRemover, optionflags=doctest.ELLIPSIS)) - tests.addTests(doctest.DocTestSuite(SmilesDupeFilter, optionflags=doctest.ELLIPSIS)) - tests.addTests(doctest.DocTestSuite(SmilesOutput, optionflags=doctest.ELLIPSIS)) - tests.addTests(doctest.DocTestSuite(SmilesSupply, optionflags=doctest.ELLIPSIS)) - # tests.addTests(doctest.DocTestSuite(DbMolSupply, optionflags=doctest.ELLIPSIS)) - return tests + """ Add the Doctests from the module """ + tests.addTests(doctest.DocTestSuite(SDSupply, optionflags=doctest.ELLIPSIS)) + tests.addTests(doctest.DocTestSuite(SmartsMolFilter, optionflags=doctest.ELLIPSIS)) + tests.addTests(doctest.DocTestSuite(SmartsRemover, optionflags=doctest.ELLIPSIS)) + tests.addTests(doctest.DocTestSuite(SmilesDupeFilter, optionflags=doctest.ELLIPSIS)) + tests.addTests(doctest.DocTestSuite(SmilesOutput, optionflags=doctest.ELLIPSIS)) + tests.addTests(doctest.DocTestSuite(SmilesSupply, optionflags=doctest.ELLIPSIS)) + # tests.addTests(doctest.DocTestSuite(DbMolSupply, optionflags=doctest.ELLIPSIS)) + return tests class Test_NodeLib(unittest.TestCase): - def tearDown(self): - RDLogger.EnableLog('rdApp.error') + def tearDown(self): + RDLogger.EnableLog('rdApp.error') - def test_SmartsMolFilter(self): - smis = ['C1CCC1', 'C1CCC1C=O', 'CCCC', 'CCC=O', 'CC(=O)C', 'CCN', 'NCCN', 'NCC=O'] - mols = [Chem.MolFromSmiles(x) for x in smis] - suppl = SupplyNode(contents=mols) - self.assertEqual(len(list(suppl)), 8) + def test_SmartsMolFilter(self): + smis = ['C1CCC1', 'C1CCC1C=O', 'CCCC', 'CCC=O', 'CC(=O)C', 'CCN', 'NCCN', 'NCC=O'] + mols = [Chem.MolFromSmiles(x) for x in smis] + suppl = SupplyNode(contents=mols) + self.assertEqual(len(list(suppl)), 8) - smas = ['C=O', 'CN'] - counts = [1, 2] - filt = SmartsMolFilter.SmartsFilter(patterns=smas, counts=counts) - filt.AddParent(suppl) - self.assertEqual(len(list(filt)), 5) + smas = ['C=O', 'CN'] + counts = [1, 2] + filt = SmartsMolFilter.SmartsFilter(patterns=smas, counts=counts) + filt.AddParent(suppl) + self.assertEqual(len(list(filt)), 5) - suppl.reset() - filt.SetNegate(True) - self.assertEqual(len(list(filt)), 3) + suppl.reset() + filt.SetNegate(True) + self.assertEqual(len(list(filt)), 3) - smas = ['C=O', 'CN'] - filt = SmartsMolFilter.SmartsFilter(patterns=smas) - filt.AddParent(suppl) - self.assertEqual(len(list(filt)), 6) + smas = ['C=O', 'CN'] + filt = SmartsMolFilter.SmartsFilter(patterns=smas) + filt.AddParent(suppl) + self.assertEqual(len(list(filt)), 6) - self.assertRaises(ValueError, SmartsMolFilter.SmartsFilter, patterns=smas, - counts=['notEnough', ]) - RDLogger.DisableLog('rdApp.error') - self.assertRaises(ValueError, SmartsMolFilter.SmartsFilter, patterns=['BadSmarts']) - RDLogger.EnableLog('rdApp.error') + self.assertRaises(ValueError, SmartsMolFilter.SmartsFilter, patterns=smas, + counts=['notEnough', ]) + RDLogger.DisableLog('rdApp.error') + self.assertRaises(ValueError, SmartsMolFilter.SmartsFilter, patterns=['BadSmarts']) + RDLogger.EnableLog('rdApp.error') - def test_SmilesOutput(self): - smis = ['C1CCC1', 'C1CC1', 'C=O', 'CCN'] - mols = [Chem.MolFromSmiles(x) for x in smis] - for i, mol in enumerate(mols, 100): - mol.SetProp('ID', str(i)) + def test_SmilesOutput(self): + smis = ['C1CCC1', 'C1CC1', 'C=O', 'CCN'] + mols = [Chem.MolFromSmiles(x) for x in smis] + for i, mol in enumerate(mols, 100): + mol.SetProp('ID', str(i)) - suppl1 = SupplyNode(contents=mols) - suppl2 = SupplyNode(contents='abcd') + suppl1 = SupplyNode(contents=mols) + suppl2 = SupplyNode(contents='abcd') - sio = StringIO() - node = SmilesOutput.OutputNode(idField='ID', dest=sio, delim=', ') - node.AddParent(suppl1) - node.AddParent(suppl2) - list(node) - self.assertEqual(sio.getvalue(), '100, C1CCC1, a\n101, C1CC1, b\n102, C=O, c\n103, CCN, d\n') + sio = StringIO() + node = SmilesOutput.OutputNode(idField='ID', dest=sio, delim=', ') + node.AddParent(suppl1) + node.AddParent(suppl2) + list(node) + self.assertEqual( + sio.getvalue(), '100, C1CCC1, a\n101, C1CC1, b\n102, C=O, c\n103, CCN, d\n') - def test_SmartsRemover(self): - salts = ['[Cl;H1&X1,-]', '[Na+]', '[O;H2,H1&-,X0&-2]', 'BadSmarts'] - RDLogger.DisableLog('rdApp.error') - self.assertRaises(ValueError, SmartsRemover.SmartsRemover, patterns=salts) - RDLogger.EnableLog('rdApp.error') + def test_SmartsRemover(self): + salts = ['[Cl;H1&X1,-]', '[Na+]', '[O;H2,H1&-,X0&-2]', 'BadSmarts'] + RDLogger.DisableLog('rdApp.error') + self.assertRaises(ValueError, SmartsRemover.SmartsRemover, patterns=salts) + RDLogger.EnableLog('rdApp.error') - def test_SmilesDupeFilter(self): - smis = ['C1CCC1', 'CCCC', 'CCCC', 'C1CCC1'] - mols = [Chem.MolFromSmiles(x) for x in smis] - suppl = SupplyNode(contents=mols) - self.assertEqual(len(list(suppl)), 4) + def test_SmilesDupeFilter(self): + smis = ['C1CCC1', 'CCCC', 'CCCC', 'C1CCC1'] + mols = [Chem.MolFromSmiles(x) for x in smis] + suppl = SupplyNode(contents=mols) + self.assertEqual(len(list(suppl)), 4) - dupFilter = SmilesDupeFilter.DupeFilter() - dupFilter.AddParent(suppl) - self.assertEqual(len(list(dupFilter)), 2) + dupFilter = SmilesDupeFilter.DupeFilter() + dupFilter.AddParent(suppl) + self.assertEqual(len(list(dupFilter)), 2) if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/VLib/NodeLib/demo.py b/rdkit/VLib/NodeLib/demo.py index 5cc8efa2c..77272d1bd 100755 --- a/rdkit/VLib/NodeLib/demo.py +++ b/rdkit/VLib/NodeLib/demo.py @@ -3,7 +3,7 @@ # Copyright (C) 2003 Rational Discovery LLC # All Rights Reserved # -from __future__ import print_function + from rdkit import RDConfig from rdkit import Chem import os.path diff --git a/rdkit/VLib/Output.py b/rdkit/VLib/Output.py index 3f462ae90..2467fd29e 100755 --- a/rdkit/VLib/Output.py +++ b/rdkit/VLib/Output.py @@ -3,62 +3,60 @@ # Copyright (C) 2003 Rational Discovery LLC # All Rights Reserved # -from rdkit import six from rdkit.VLib.Node import VLibNode class OutputNode(VLibNode): - """ base class for nodes which dump output + """ base class for nodes which dump output - Assumptions: + Assumptions: - - destination supports a write() method + - destination supports a write() method - - strFunc, if provided, returns a string representation of - the input + - strFunc, if provided, returns a string representation of + the input - - inputs (parents) can be stepped through in lockstep + - inputs (parents) can be stepped through in lockstep - Usage Example: - >>> from rdkit.VLib.Supply import SupplyNode - >>> supplier = SupplyNode(contents=[1,2,3]) - >>> from rdkit.six import StringIO - >>> sio = StringIO() - >>> node = OutputNode(dest=sio,strFunc=lambda x:'%s '%(str(x))) - >>> node.AddParent(supplier) - >>> node.next() - 1 - >>> sio.getvalue() - '1 ' - >>> node.next() - 2 - >>> sio.getvalue() - '1 2 ' + Usage Example: + >>> from rdkit.VLib.Supply import SupplyNode + >>> supplier = SupplyNode(contents=[1,2,3]) + >>> from io import StringIO + >>> sio = StringIO() + >>> node = OutputNode(dest=sio,strFunc=lambda x:'%s '%(str(x))) + >>> node.AddParent(supplier) + >>> node.next() + 1 + >>> sio.getvalue() + '1 ' + >>> node.next() + 2 + >>> sio.getvalue() + '1 2 ' - """ + """ - def __init__(self, dest=None, strFunc=None, **kwargs): - VLibNode.__init__(self, **kwargs) - self._dest = dest - self._func = strFunc + def __init__(self, dest=None, strFunc=None, **kwargs): + VLibNode.__init__(self, **kwargs) + self._dest = dest + self._func = strFunc - def next(self): - parents = self.GetParents() - args = tuple([parent.next() for parent in parents]) - if len(args) == 1: - args = args[0] - if self._dest: - if self._func is not None: - outp = self._func(args) - else: - outp = str(args) - self._dest.write(outp) - return args + def next(self): + parents = self.GetParents() + args = tuple([parent.next() for parent in parents]) + if len(args) == 1: + args = args[0] + if self._dest: + if self._func is not None: + outp = self._func(args) + else: + outp = str(args) + self._dest.write(outp) + return args -if six.PY3: - OutputNode.__next__ = OutputNode.next +OutputNode.__next__ = OutputNode.next # ------------------------------------ @@ -66,11 +64,11 @@ if six.PY3: # doctest boilerplate # def _runDoctests(verbose=None): # pragma: nocover - import doctest - import sys - failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) - sys.exit(failed) + import doctest + import sys + failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) + sys.exit(failed) if __name__ == '__main__': # pragma: nocover - _runDoctests() + _runDoctests() diff --git a/rdkit/VLib/Supply.py b/rdkit/VLib/Supply.py index f72021c2f..fbc9e3659 100755 --- a/rdkit/VLib/Supply.py +++ b/rdkit/VLib/Supply.py @@ -3,63 +3,61 @@ # Copyright (C) 2003 Rational Discovery LLC # All Rights Reserved # -from rdkit import six from rdkit.VLib.Node import VLibNode class SupplyNode(VLibNode): - """ base class for nodes which supply things + """ base class for nodes which supply things - Assumptions: - 1) no parents + Assumptions: + 1) no parents - Usage Example: - >>> supplier = SupplyNode(contents=[1,2,3]) - >>> supplier.next() - 1 - >>> supplier.next() - 2 - >>> supplier.next() - 3 - >>> supplier.next() - Traceback (most recent call last): - ... - StopIteration - >>> supplier.reset() - >>> supplier.next() - 1 - >>> [x for x in supplier] - [1, 2, 3] + Usage Example: + >>> supplier = SupplyNode(contents=[1,2,3]) + >>> supplier.next() + 1 + >>> supplier.next() + 2 + >>> supplier.next() + 3 + >>> supplier.next() + Traceback (most recent call last): + ... + StopIteration + >>> supplier.reset() + >>> supplier.next() + 1 + >>> [x for x in supplier] + [1, 2, 3] - """ + """ - def __init__(self, contents=None, **kwargs): - VLibNode.__init__(self, **kwargs) - if contents is not None: - self._contents = contents - else: - self._contents = [] - self._pos = 0 + def __init__(self, contents=None, **kwargs): + VLibNode.__init__(self, **kwargs) + if contents is not None: + self._contents = contents + else: + self._contents = [] + self._pos = 0 - def reset(self): - VLibNode.reset(self) - self._pos = 0 + def reset(self): + VLibNode.reset(self) + self._pos = 0 - def next(self): - if self._pos == len(self._contents): - raise StopIteration + def next(self): + if self._pos == len(self._contents): + raise StopIteration - res = self._contents[self._pos] - self._pos += 1 - return res + res = self._contents[self._pos] + self._pos += 1 + return res - def AddParent(self, parent, notify=1): - raise ValueError('SupplyNodes do not have parents') + def AddParent(self, parent, notify=1): + raise ValueError('SupplyNodes do not have parents') -if six.PY3: - SupplyNode.__next__ = SupplyNode.next +SupplyNode.__next__ = SupplyNode.next # ------------------------------------ @@ -67,11 +65,11 @@ if six.PY3: # doctest boilerplate # def _runDoctests(verbose=None): # pragma: nocover - import sys - import doctest - failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) - sys.exit(failed) + import sys + import doctest + failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) + sys.exit(failed) if __name__ == '__main__': # pragma: nocover - _runDoctests() + _runDoctests() diff --git a/rdkit/VLib/Transform.py b/rdkit/VLib/Transform.py index 767d00c48..ea622cdba 100755 --- a/rdkit/VLib/Transform.py +++ b/rdkit/VLib/Transform.py @@ -3,71 +3,69 @@ # Copyright (C) 2003 Rational Discovery LLC # All Rights Reserved # -from rdkit import six from rdkit.VLib.Node import VLibNode class TransformNode(VLibNode): - """ base class for nodes which filter their input + """ base class for nodes which filter their input - Assumptions: + Assumptions: - - transform function takes a number of arguments equal to the - number of inputs we have. We return whatever it returns + - transform function takes a number of arguments equal to the + number of inputs we have. We return whatever it returns - - inputs (parents) can be stepped through in lockstep + - inputs (parents) can be stepped through in lockstep - Usage Example: - >>> from rdkit.VLib.Supply import SupplyNode - >>> def func(a,b): - ... return a+b - >>> tform = TransformNode(func) - >>> suppl1 = SupplyNode(contents=[1,2,3,3]) - >>> suppl2 = SupplyNode(contents=[1,2,3,1]) - >>> tform.AddParent(suppl1) - >>> tform.AddParent(suppl2) - >>> v = [x for x in tform] - >>> v - [2, 4, 6, 4] - >>> tform.reset() - >>> v = [x for x in tform] - >>> v - [2, 4, 6, 4] + Usage Example: + >>> from rdkit.VLib.Supply import SupplyNode + >>> def func(a,b): + ... return a+b + >>> tform = TransformNode(func) + >>> suppl1 = SupplyNode(contents=[1,2,3,3]) + >>> suppl2 = SupplyNode(contents=[1,2,3,1]) + >>> tform.AddParent(suppl1) + >>> tform.AddParent(suppl2) + >>> v = [x for x in tform] + >>> v + [2, 4, 6, 4] + >>> tform.reset() + >>> v = [x for x in tform] + >>> v + [2, 4, 6, 4] - If we don't provide a function, just return the inputs: - >>> tform = TransformNode() - >>> suppl1 = SupplyNode(contents=[1,2,3,3]) - >>> suppl2 = SupplyNode(contents=[1,2,3,1]) - >>> tform.AddParent(suppl1) - >>> tform.AddParent(suppl2) - >>> v = [x for x in tform] - >>> v - [(1, 1), (2, 2), (3, 3), (3, 1)] + If we don't provide a function, just return the inputs: + >>> tform = TransformNode() + >>> suppl1 = SupplyNode(contents=[1,2,3,3]) + >>> suppl2 = SupplyNode(contents=[1,2,3,1]) + >>> tform.AddParent(suppl1) + >>> tform.AddParent(suppl2) + >>> v = [x for x in tform] + >>> v + [(1, 1), (2, 2), (3, 3), (3, 1)] - """ + """ - def __init__(self, func=None, **kwargs): - VLibNode.__init__(self, **kwargs) - self._func = func + def __init__(self, func=None, **kwargs): + VLibNode.__init__(self, **kwargs) + self._func = func - def next(self): - parent = self.GetParents()[0] - args = [] - try: - for parent in self.GetParents(): - args.append(parent.next()) - except StopIteration: - raise StopIteration - args = tuple(args) - if self._func is not None: - res = self._func(*args) - else: - res = args - return res + def next(self): + parent = self.GetParents()[0] + args = [] + try: + for parent in self.GetParents(): + args.append(parent.next()) + except StopIteration: + raise StopIteration + args = tuple(args) + if self._func is not None: + res = self._func(*args) + else: + res = args + return res -if six.PY3: - TransformNode.__next__ = TransformNode.next +TransformNode.__next__ = TransformNode.next # ------------------------------------ @@ -75,11 +73,11 @@ if six.PY3: # doctest boilerplate # def _runDoctests(verbose=None): # pragma: nocover - import sys - import doctest - failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) - sys.exit(failed) + import sys + import doctest + failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) + sys.exit(failed) if __name__ == '__main__': # pragma: nocover - _runDoctests() + _runDoctests() diff --git a/rdkit/VLib/UnitTestVLib.py b/rdkit/VLib/UnitTestVLib.py index 38cd2e33e..48cf2b37f 100755 --- a/rdkit/VLib/UnitTestVLib.py +++ b/rdkit/VLib/UnitTestVLib.py @@ -8,140 +8,140 @@ # which is included in the file license.txt, found at the root # of the RDKit source tree. # -from __future__ import print_function + import doctest import unittest from rdkit.VLib import Node, Filter, Output, Supply, Transform -from rdkit.six import StringIO +from io import StringIO def load_tests(loader, tests, ignore): - """ Add the Doctests from the module """ - tests.addTests(doctest.DocTestSuite(Filter, optionflags=doctest.ELLIPSIS)) - tests.addTests(doctest.DocTestSuite(Node, optionflags=doctest.ELLIPSIS)) - tests.addTests(doctest.DocTestSuite(Output, optionflags=doctest.ELLIPSIS)) - tests.addTests(doctest.DocTestSuite(Supply, optionflags=doctest.ELLIPSIS)) - tests.addTests(doctest.DocTestSuite(Transform, optionflags=doctest.ELLIPSIS)) - return tests + """ Add the Doctests from the module """ + tests.addTests(doctest.DocTestSuite(Filter, optionflags=doctest.ELLIPSIS)) + tests.addTests(doctest.DocTestSuite(Node, optionflags=doctest.ELLIPSIS)) + tests.addTests(doctest.DocTestSuite(Output, optionflags=doctest.ELLIPSIS)) + tests.addTests(doctest.DocTestSuite(Supply, optionflags=doctest.ELLIPSIS)) + tests.addTests(doctest.DocTestSuite(Transform, optionflags=doctest.ELLIPSIS)) + return tests class Test_VLib(unittest.TestCase): - def test_SupplyNode(self): - supplier = Supply.SupplyNode() - self.assertEqual(supplier._contents, []) + def test_SupplyNode(self): + supplier = Supply.SupplyNode() + self.assertEqual(supplier._contents, []) - supplier = Supply.SupplyNode(contents=[1, 2, 3]) - self.assertRaises(ValueError, supplier.AddParent, None) + supplier = Supply.SupplyNode(contents=[1, 2, 3]) + self.assertRaises(ValueError, supplier.AddParent, None) - def test_FilterNode(self): - filt = Filter.FilterNode(func=lambda a, b: a + b < 5) - suppl1 = Supply.SupplyNode(contents=[1, 2, 3, 3]) - suppl2 = Supply.SupplyNode(contents=[1, 2, 3, 1]) - filt.AddParent(suppl1) - filt.AddParent(suppl2) - self.assertEqual([x for x in filt], [(1, 1), (2, 2), (3, 1)]) - filt.reset() - self.assertEqual(filt.Negate(), False) - filt.SetNegate(True) - self.assertEqual(filt.Negate(), True) - self.assertEqual([x for x in filt], [(3, 3), ]) - filt.Destroy() + def test_FilterNode(self): + filt = Filter.FilterNode(func=lambda a, b: a + b < 5) + suppl1 = Supply.SupplyNode(contents=[1, 2, 3, 3]) + suppl2 = Supply.SupplyNode(contents=[1, 2, 3, 1]) + filt.AddParent(suppl1) + filt.AddParent(suppl2) + self.assertEqual([x for x in filt], [(1, 1), (2, 2), (3, 1)]) + filt.reset() + self.assertEqual(filt.Negate(), False) + filt.SetNegate(True) + self.assertEqual(filt.Negate(), True) + self.assertEqual([x for x in filt], [(3, 3), ]) + filt.Destroy() - def test_OutputNode(self): - supplier1 = Supply.SupplyNode(contents=[1, 2, 3]) - supplier2 = Supply.SupplyNode(contents=['a', 'b', 'c']) + def test_OutputNode(self): + supplier1 = Supply.SupplyNode(contents=[1, 2, 3]) + supplier2 = Supply.SupplyNode(contents=['a', 'b', 'c']) - sio = StringIO() - node = Output.OutputNode(dest=sio, strFunc=lambda x: '{0[0]}-{0[1]} '.format(x)) - node.AddParent(supplier1) - node.AddParent(supplier2) - result = list(s for s in node) - self.assertEqual(result, [(1, 'a'), (2, 'b'), (3, 'c')]) - self.assertEqual(sio.getvalue(), '1-a 2-b 3-c ') + sio = StringIO() + node = Output.OutputNode(dest=sio, strFunc=lambda x: '{0[0]}-{0[1]} '.format(x)) + node.AddParent(supplier1) + node.AddParent(supplier2) + result = list(s for s in node) + self.assertEqual(result, [(1, 'a'), (2, 'b'), (3, 'c')]) + self.assertEqual(sio.getvalue(), '1-a 2-b 3-c ') - sio = StringIO() - node = Output.OutputNode(dest=sio) - node.AddParent(supplier1) - result = list(s for s in node) - self.assertEqual(result, [1, 2, 3]) - self.assertEqual(sio.getvalue(), '123') + sio = StringIO() + node = Output.OutputNode(dest=sio) + node.AddParent(supplier1) + result = list(s for s in node) + self.assertEqual(result, [1, 2, 3]) + self.assertEqual(sio.getvalue(), '123') - def test_VLibNode(self): + def test_VLibNode(self): - def setupNodes(): - p1 = Node.VLibNode() - p2 = Node.VLibNode() - c1 = Node.VLibNode() - c2 = Node.VLibNode() - p1.AddChild(c1) - p2.AddChild(c1) - p2.AddChild(c2) - return p1, p2, c1, c2 + def setupNodes(): + p1 = Node.VLibNode() + p2 = Node.VLibNode() + c1 = Node.VLibNode() + c2 = Node.VLibNode() + p1.AddChild(c1) + p2.AddChild(c1) + p2.AddChild(c2) + return p1, p2, c1, c2 - p1, p2, c1, c2 = setupNodes() - # p1 -> c1 - # p2 -> c1 - # p2 -> c2 - self.assertEqual(len(c1.GetParents()), 2) - self.assertEqual(len(c2.GetParents()), 1) - self.assertEqual(len(p1.GetChildren()), 1) - self.assertEqual(len(p2.GetChildren()), 2) + p1, p2, c1, c2 = setupNodes() + # p1 -> c1 + # p2 -> c1 + # p2 -> c2 + self.assertEqual(len(c1.GetParents()), 2) + self.assertEqual(len(c2.GetParents()), 1) + self.assertEqual(len(p1.GetChildren()), 1) + self.assertEqual(len(p2.GetChildren()), 2) - p1.Destroy() - # p1 - # p2 -> c1 - # p2 -> c2 - self.assertEqual(len(c1.GetParents()), 1) - self.assertEqual(len(c2.GetParents()), 1) - self.assertEqual(len(p1.GetChildren()), 0) - self.assertEqual(len(p2.GetChildren()), 2) + p1.Destroy() + # p1 + # p2 -> c1 + # p2 -> c2 + self.assertEqual(len(c1.GetParents()), 1) + self.assertEqual(len(c2.GetParents()), 1) + self.assertEqual(len(p1.GetChildren()), 0) + self.assertEqual(len(p2.GetChildren()), 2) - p1, p2, c1, c2 = setupNodes() - p1.Destroy(propagateDown=True) - # p1, c1 - # p2 -> c2 - self.assertEqual(len(c1.GetParents()), 0) - self.assertEqual(len(c2.GetParents()), 1) - self.assertEqual(len(p1.GetChildren()), 0) - self.assertEqual(len(p2.GetChildren()), 1) + p1, p2, c1, c2 = setupNodes() + p1.Destroy(propagateDown=True) + # p1, c1 + # p2 -> c2 + self.assertEqual(len(c1.GetParents()), 0) + self.assertEqual(len(c2.GetParents()), 1) + self.assertEqual(len(p1.GetChildren()), 0) + self.assertEqual(len(p2.GetChildren()), 1) - p1, p2, c1, c2 = setupNodes() - p1.Destroy(propagateUp=True) - # p1 - # p2 -> c1 - # p2 -> c2 - self.assertEqual(len(c1.GetParents()), 1) - self.assertEqual(len(c2.GetParents()), 1) - self.assertEqual(len(p1.GetChildren()), 0) - self.assertEqual(len(p2.GetChildren()), 2) + p1, p2, c1, c2 = setupNodes() + p1.Destroy(propagateUp=True) + # p1 + # p2 -> c1 + # p2 -> c2 + self.assertEqual(len(c1.GetParents()), 1) + self.assertEqual(len(c2.GetParents()), 1) + self.assertEqual(len(p1.GetChildren()), 0) + self.assertEqual(len(p2.GetChildren()), 2) - p1, p2, c1, c2 = setupNodes() - c1.Destroy(propagateUp=True) - # p1, c1, p2, c2 - self.assertEqual(len(c1.GetParents()), 0) - self.assertEqual(len(c2.GetParents()), 0) - self.assertEqual(len(p1.GetChildren()), 0) - self.assertEqual(len(p2.GetChildren()), 0) + p1, p2, c1, c2 = setupNodes() + c1.Destroy(propagateUp=True) + # p1, c1, p2, c2 + self.assertEqual(len(c1.GetParents()), 0) + self.assertEqual(len(c2.GetParents()), 0) + self.assertEqual(len(p1.GetChildren()), 0) + self.assertEqual(len(p2.GetChildren()), 0) - p1, p2, c1, c2 = setupNodes() - p1.Destroy(propagateDown=True) - # p1, c1 - # p2 -> c2 - self.assertEqual(len(c1.GetParents()), 0) - self.assertEqual(len(c2.GetParents()), 1) - self.assertEqual(len(p1.GetChildren()), 0) - self.assertEqual(len(p2.GetChildren()), 1) - p1.Destroy(propagateDown=True) - # p1, c1 - # p2 -> c2 - self.assertEqual(len(c1.GetParents()), 0) - self.assertEqual(len(c2.GetParents()), 1) - self.assertEqual(len(p1.GetChildren()), 0) - self.assertEqual(len(p2.GetChildren()), 1) + p1, p2, c1, c2 = setupNodes() + p1.Destroy(propagateDown=True) + # p1, c1 + # p2 -> c2 + self.assertEqual(len(c1.GetParents()), 0) + self.assertEqual(len(c2.GetParents()), 1) + self.assertEqual(len(p1.GetChildren()), 0) + self.assertEqual(len(p2.GetChildren()), 1) + p1.Destroy(propagateDown=True) + # p1, c1 + # p2 -> c2 + self.assertEqual(len(c1.GetParents()), 0) + self.assertEqual(len(c2.GetParents()), 1) + self.assertEqual(len(p1.GetChildren()), 0) + self.assertEqual(len(p2.GetChildren()), 1) if __name__ == '__main__': # pragma: nocover - unittest.main() + unittest.main() diff --git a/rdkit/_py2_pickle.py b/rdkit/_py2_pickle.py index 0063f487d..0f6719dc4 100644 --- a/rdkit/_py2_pickle.py +++ b/rdkit/_py2_pickle.py @@ -1,6 +1,6 @@ -from cPickle import load as _load -from cPickle import loads as _loads -from cPickle import * +from pickle import load as _load +from pickle import loads as _loads +from pickle import * def load(f, **kwargs): diff --git a/rdkit/six.py b/rdkit/six.py index 21c314871..762e1574c 100644 --- a/rdkit/six.py +++ b/rdkit/six.py @@ -32,146 +32,146 @@ PY2 = sys.version_info[0] == 2 PY3 = sys.version_info[0] == 3 if PY3: - string_types = str, - integer_types = int, - class_types = type, - text_type = str - binary_type = bytes + string_types = str, + integer_types = int, + class_types = type, + text_type = str + binary_type = bytes - MAXSIZE = sys.maxsize + MAXSIZE = sys.maxsize else: - string_types = basestring, - integer_types = (int, long) - class_types = (type, types.ClassType) - text_type = unicode - binary_type = str + string_types = basestring, + integer_types = (int, long) + class_types = (type, types.ClassType) + text_type = unicode + binary_type = str - if sys.platform.startswith("java"): - # Jython always uses 32 bits. - MAXSIZE = int((1 << 31) - 1) - else: - # It's possible to have sizeof(long) != sizeof(Py_ssize_t). - class X(object): - - def __len__(self): - return 1 << 31 - - try: - len(X()) - except OverflowError: - # 32-bit - MAXSIZE = int((1 << 31) - 1) + if sys.platform.startswith("java"): + # Jython always uses 32 bits. + MAXSIZE = int((1 << 31) - 1) else: - # 64-bit - MAXSIZE = int((1 << 63) - 1) - del X + # It's possible to have sizeof(long) != sizeof(Py_ssize_t). + class X(object): + + def __len__(self): + return 1 << 31 + + try: + len(X()) + except OverflowError: + # 32-bit + MAXSIZE = int((1 << 31) - 1) + else: + # 64-bit + MAXSIZE = int((1 << 63) - 1) + del X def _add_doc(func, doc): - """Add documentation to a function.""" - func.__doc__ = doc + """Add documentation to a function.""" + func.__doc__ = doc def _import_module(name): - """Import module, returning the module after the last dot.""" - __import__(name) - return sys.modules[name] + """Import module, returning the module after the last dot.""" + __import__(name) + return sys.modules[name] class _LazyDescr(object): - def __init__(self, name): - self.name = name + def __init__(self, name): + self.name = name - def __get__(self, obj, tp): - try: - result = self._resolve() - except ImportError: - # See the nice big comment in MovedModule.__getattr__. - raise AttributeError("%s could not be imported " % self.name) - setattr(obj, self.name, result) # Invokes __set__. - # This is a bit ugly, but it avoids running this again. - delattr(obj.__class__, self.name) - return result + def __get__(self, obj, tp): + try: + result = self._resolve() + except ImportError: + # See the nice big comment in MovedModule.__getattr__. + raise AttributeError("%s could not be imported " % self.name) + setattr(obj, self.name, result) # Invokes __set__. + # This is a bit ugly, but it avoids running this again. + delattr(obj.__class__, self.name) + return result class MovedModule(_LazyDescr): - def __init__(self, name, old, new=None): - super(MovedModule, self).__init__(name) - if PY3: - if new is None: - new = name - self.mod = new - else: - self.mod = old + def __init__(self, name, old, new=None): + super(MovedModule, self).__init__(name) + if PY3: + if new is None: + new = name + self.mod = new + else: + self.mod = old - def _resolve(self): - return _import_module(self.mod) + def _resolve(self): + return _import_module(self.mod) - def __getattr__(self, attr): - # It turns out many Python frameworks like to traverse sys.modules and - # try to load various attributes. This causes problems if this is a - # platform-specific module on the wrong platform, like _winreg on - # Unixes. Therefore, we silently pretend unimportable modules do not - # have any attributes. See issues #51, #53, #56, and #63 for the full - # tales of woe. - # - # First, if possible, avoid loading the module just to look at __file__, - # __name__, or __path__. - if (attr in ("__file__", "__name__", "__path__") and self.mod not in sys.modules): - raise AttributeError(attr) - try: - _module = self._resolve() - except ImportError: - raise AttributeError(attr) - value = getattr(_module, attr) - setattr(self, attr, value) - return value + def __getattr__(self, attr): + # It turns out many Python frameworks like to traverse sys.modules and + # try to load various attributes. This causes problems if this is a + # platform-specific module on the wrong platform, like _winreg on + # Unixes. Therefore, we silently pretend unimportable modules do not + # have any attributes. See issues #51, #53, #56, and #63 for the full + # tales of woe. + # + # First, if possible, avoid loading the module just to look at __file__, + # __name__, or __path__. + if (attr in ("__file__", "__name__", "__path__") and self.mod not in sys.modules): + raise AttributeError(attr) + try: + _module = self._resolve() + except ImportError: + raise AttributeError(attr) + value = getattr(_module, attr) + setattr(self, attr, value) + return value class _LazyModule(types.ModuleType): - def __init__(self, name): - super(_LazyModule, self).__init__(name) - self.__doc__ = self.__class__.__doc__ + def __init__(self, name): + super(_LazyModule, self).__init__(name) + self.__doc__ = self.__class__.__doc__ - def __dir__(self): - attrs = ["__doc__", "__name__"] - attrs += [attr.name for attr in self._moved_attributes] - return attrs + def __dir__(self): + attrs = ["__doc__", "__name__"] + attrs += [attr.name for attr in self._moved_attributes] + return attrs - # Subclasses should override this - _moved_attributes = [] + # Subclasses should override this + _moved_attributes = [] class MovedAttribute(_LazyDescr): - def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None): - super(MovedAttribute, self).__init__(name) - if PY3: - if new_mod is None: - new_mod = name - self.mod = new_mod - if new_attr is None: - if old_attr is None: - new_attr = name + def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None): + super(MovedAttribute, self).__init__(name) + if PY3: + if new_mod is None: + new_mod = name + self.mod = new_mod + if new_attr is None: + if old_attr is None: + new_attr = name + else: + new_attr = old_attr + self.attr = new_attr else: - new_attr = old_attr - self.attr = new_attr - else: - self.mod = old_mod - if old_attr is None: - old_attr = name - self.attr = old_attr + self.mod = old_mod + if old_attr is None: + old_attr = name + self.attr = old_attr - def _resolve(self): - module = _import_module(self.mod) - return getattr(module, self.attr) + def _resolve(self): + module = _import_module(self.mod) + return getattr(module, self.attr) class _MovedItems(_LazyModule): - """Lazy loading of moved objects""" + """Lazy loading of moved objects""" _moved_attributes = [ @@ -205,8 +205,8 @@ _moved_attributes = [ MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"), MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"), MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"), - #MovedModule("cPickle", "cPickle", "pickle"), - MovedModule("cPickle", "rdkit._py2_pickle", "pickle"), + #MovedModule("pickle", "pickle", "pickle"), + MovedModule("pickle", "rdkit._py2_pickle", "pickle"), MovedModule("queue", "Queue"), MovedModule("reprlib", "repr"), MovedModule("socketserver", "SocketServer"), @@ -235,9 +235,9 @@ _moved_attributes = [ MovedModule("winreg", "_winreg"), ] for attr in _moved_attributes: - setattr(_MovedItems, attr.name, attr) - if isinstance(attr, MovedModule): - sys.modules[__name__ + ".moves." + attr.name] = attr + setattr(_MovedItems, attr.name, attr) + if isinstance(attr, MovedModule): + sys.modules[__name__ + ".moves." + attr.name] = attr del attr _MovedItems._moved_attributes = _moved_attributes @@ -246,7 +246,7 @@ moves = sys.modules[__name__ + ".moves"] = _MovedItems(__name__ + ".moves") class Module_six_moves_urllib_parse(_LazyModule): - """Lazy loading of moved objects in six.moves.urllib_parse""" + """Lazy loading of moved objects in six.moves.urllib_parse""" _urllib_parse_moved_attributes = [ @@ -268,18 +268,18 @@ _urllib_parse_moved_attributes = [ MovedAttribute("splitquery", "urllib", "urllib.parse"), ] for attr in _urllib_parse_moved_attributes: - setattr(Module_six_moves_urllib_parse, attr.name, attr) + setattr(Module_six_moves_urllib_parse, attr.name, attr) del attr Module_six_moves_urllib_parse._moved_attributes = _urllib_parse_moved_attributes sys.modules[__name__ + ".moves.urllib_parse"] = sys.modules[ - __name__ + ".moves.urllib.parse"] = Module_six_moves_urllib_parse(__name__ + - ".moves.urllib_parse") + __name__ + ".moves.urllib.parse"] = Module_six_moves_urllib_parse(__name__ + + ".moves.urllib_parse") class Module_six_moves_urllib_error(_LazyModule): - """Lazy loading of moved objects in six.moves.urllib_error""" + """Lazy loading of moved objects in six.moves.urllib_error""" _urllib_error_moved_attributes = [ @@ -288,18 +288,18 @@ _urllib_error_moved_attributes = [ MovedAttribute("ContentTooShortError", "urllib", "urllib.error"), ] for attr in _urllib_error_moved_attributes: - setattr(Module_six_moves_urllib_error, attr.name, attr) + setattr(Module_six_moves_urllib_error, attr.name, attr) del attr Module_six_moves_urllib_error._moved_attributes = _urllib_error_moved_attributes sys.modules[__name__ + ".moves.urllib_error"] = sys.modules[ - __name__ + ".moves.urllib.error"] = Module_six_moves_urllib_error(__name__ + - ".moves.urllib.error") + __name__ + ".moves.urllib.error"] = Module_six_moves_urllib_error(__name__ + + ".moves.urllib.error") class Module_six_moves_urllib_request(_LazyModule): - """Lazy loading of moved objects in six.moves.urllib_request""" + """Lazy loading of moved objects in six.moves.urllib_request""" _urllib_request_moved_attributes = [ @@ -338,18 +338,18 @@ _urllib_request_moved_attributes = [ MovedAttribute("proxy_bypass", "urllib", "urllib.request"), ] for attr in _urllib_request_moved_attributes: - setattr(Module_six_moves_urllib_request, attr.name, attr) + setattr(Module_six_moves_urllib_request, attr.name, attr) del attr Module_six_moves_urllib_request._moved_attributes = _urllib_request_moved_attributes sys.modules[__name__ + ".moves.urllib_request"] = sys.modules[ - __name__ + ".moves.urllib.request"] = Module_six_moves_urllib_request(__name__ + - ".moves.urllib.request") + __name__ + ".moves.urllib.request"] = Module_six_moves_urllib_request(__name__ + + ".moves.urllib.request") class Module_six_moves_urllib_response(_LazyModule): - """Lazy loading of moved objects in six.moves.urllib_response""" + """Lazy loading of moved objects in six.moves.urllib_response""" _urllib_response_moved_attributes = [ @@ -359,25 +359,25 @@ _urllib_response_moved_attributes = [ MovedAttribute("addinfourl", "urllib", "urllib.response"), ] for attr in _urllib_response_moved_attributes: - setattr(Module_six_moves_urllib_response, attr.name, attr) + setattr(Module_six_moves_urllib_response, attr.name, attr) del attr Module_six_moves_urllib_response._moved_attributes = _urllib_response_moved_attributes sys.modules[__name__ + ".moves.urllib_response"] = sys.modules[ - __name__ + ".moves.urllib.response"] = Module_six_moves_urllib_response(__name__ + - ".moves.urllib.response") + __name__ + ".moves.urllib.response"] = Module_six_moves_urllib_response(__name__ + + ".moves.urllib.response") class Module_six_moves_urllib_robotparser(_LazyModule): - """Lazy loading of moved objects in six.moves.urllib_robotparser""" + """Lazy loading of moved objects in six.moves.urllib_robotparser""" _urllib_robotparser_moved_attributes = [ MovedAttribute("RobotFileParser", "robotparser", "urllib.robotparser"), ] for attr in _urllib_robotparser_moved_attributes: - setattr(Module_six_moves_urllib_robotparser, attr.name, attr) + setattr(Module_six_moves_urllib_robotparser, attr.name, attr) del attr Module_six_moves_urllib_robotparser._moved_attributes = _urllib_robotparser_moved_attributes @@ -388,93 +388,93 @@ sys.modules[__name__ + ".moves.urllib_robotparser"] = sys.modules[ class Module_six_moves_urllib(types.ModuleType): - """Create a six.moves.urllib namespace that resembles the Python 3 namespace""" - parse = sys.modules[__name__ + ".moves.urllib_parse"] - error = sys.modules[__name__ + ".moves.urllib_error"] - request = sys.modules[__name__ + ".moves.urllib_request"] - response = sys.modules[__name__ + ".moves.urllib_response"] - robotparser = sys.modules[__name__ + ".moves.urllib_robotparser"] + """Create a six.moves.urllib namespace that resembles the Python 3 namespace""" + parse = sys.modules[__name__ + ".moves.urllib_parse"] + error = sys.modules[__name__ + ".moves.urllib_error"] + request = sys.modules[__name__ + ".moves.urllib_request"] + response = sys.modules[__name__ + ".moves.urllib_response"] + robotparser = sys.modules[__name__ + ".moves.urllib_robotparser"] - def __dir__(self): - return ['parse', 'error', 'request', 'response', 'robotparser'] + def __dir__(self): + return ['parse', 'error', 'request', 'response', 'robotparser'] sys.modules[__name__ + ".moves.urllib"] = Module_six_moves_urllib(__name__ + ".moves.urllib") def add_move(move): - """Add an item to six.moves.""" - setattr(_MovedItems, move.name, move) + """Add an item to six.moves.""" + setattr(_MovedItems, move.name, move) def remove_move(name): - """Remove item from six.moves.""" - try: - delattr(_MovedItems, name) - except AttributeError: + """Remove item from six.moves.""" try: - del moves.__dict__[name] - except KeyError: - raise AttributeError("no such move, %r" % (name, )) + delattr(_MovedItems, name) + except AttributeError: + try: + del moves.__dict__[name] + except KeyError: + raise AttributeError("no such move, %r" % (name, )) if PY3: - _meth_func = "__func__" - _meth_self = "__self__" + _meth_func = "__func__" + _meth_self = "__self__" - _func_closure = "__closure__" - _func_code = "__code__" - _func_defaults = "__defaults__" - _func_globals = "__globals__" + _func_closure = "__closure__" + _func_code = "__code__" + _func_defaults = "__defaults__" + _func_globals = "__globals__" else: - _meth_func = "im_func" - _meth_self = "im_self" + _meth_func = "im_func" + _meth_self = "im_self" - _func_closure = "func_closure" - _func_code = "func_code" - _func_defaults = "func_defaults" - _func_globals = "func_globals" + _func_closure = "func_closure" + _func_code = "func_code" + _func_defaults = "func_defaults" + _func_globals = "func_globals" try: - advance_iterator = next + advance_iterator = next except NameError: - def advance_iterator(it): - return it.next() + def advance_iterator(it): + return it.next() next = advance_iterator try: - callable = callable + callable = callable except NameError: - def callable(obj): - return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) + def callable(obj): + return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) if PY3: - def get_unbound_function(unbound): - return unbound + def get_unbound_function(unbound): + return unbound - create_bound_method = types.MethodType + create_bound_method = types.MethodType - Iterator = object + Iterator = object else: - def get_unbound_function(unbound): - return unbound.im_func + def get_unbound_function(unbound): + return unbound.im_func - def create_bound_method(func, obj): - return types.MethodType(func, obj, obj.__class__) + def create_bound_method(func, obj): + return types.MethodType(func, obj, obj.__class__) - class Iterator(object): + class Iterator(object): - def next(self): - return type(self).__next__(self) + def next(self): + return type(self).__next__(self) - callable = callable + callable = callable _add_doc(get_unbound_function, """Get the function out of a possibly unbound function""") get_method_function = operator.attrgetter(_meth_func) @@ -486,30 +486,30 @@ get_function_globals = operator.attrgetter(_func_globals) if PY3: - def iterkeys(d, **kw): - return iter(d.keys(**kw)) + def iterkeys(d, **kw): + return iter(d.keys(**kw)) - def itervalues(d, **kw): - return iter(d.values(**kw)) + def itervalues(d, **kw): + return iter(d.values(**kw)) - def iteritems(d, **kw): - return iter(d.items(**kw)) + def iteritems(d, **kw): + return iter(d.items(**kw)) - def iterlists(d, **kw): - return iter(d.lists(**kw)) + def iterlists(d, **kw): + return iter(d.lists(**kw)) else: - def iterkeys(d, **kw): - return iter(d.iterkeys(**kw)) + def iterkeys(d, **kw): + return iter(d.iterkeys(**kw)) - def itervalues(d, **kw): - return iter(d.itervalues(**kw)) + def itervalues(d, **kw): + return iter(d.itervalues(**kw)) - def iteritems(d, **kw): - return iter(d.iteritems(**kw)) + def iteritems(d, **kw): + return iter(d.iteritems(**kw)) - def iterlists(d, **kw): - return iter(d.iterlists(**kw)) + def iterlists(d, **kw): + return iter(d.iterlists(**kw)) _add_doc(iterkeys, "Return an iterator over the keys of a dictionary.") @@ -519,163 +519,165 @@ _add_doc(iterlists, "Return an iterator over the (key, [values]) pairs of a dict if PY3: - def b(s): - return s.encode("latin-1") + def b(s): + return s.encode("latin-1") - def u(s): - return s + def u(s): + return s - unichr = chr - if sys.version_info[1] <= 1: + unichr = chr + if sys.version_info[1] <= 1: - def int2byte(i): - return bytes((i, )) - else: - # This is about 2x faster than the implementation above on 3.2+ - int2byte = operator.methodcaller("to_bytes", 1, "big") - byte2int = operator.itemgetter(0) - indexbytes = operator.getitem - iterbytes = iter - import io - StringIO = io.StringIO - BytesIO = io.BytesIO + def int2byte(i): + return bytes((i, )) + else: + # This is about 2x faster than the implementation above on 3.2+ + int2byte = operator.methodcaller("to_bytes", 1, "big") + byte2int = operator.itemgetter(0) + indexbytes = operator.getitem + iterbytes = iter + import io + StringIO = io.StringIO + BytesIO = io.BytesIO else: - def b(s): - return s - # Workaround for standalone backslash - def u(s): - return unicode(s.replace(r'\\', r'\\\\'), "unicode_escape") + def b(s): + return s + # Workaround for standalone backslash - unichr = unichr - int2byte = chr + def u(s): + return unicode(s.replace(r'\\', r'\\\\'), "unicode_escape") - def byte2int(bs): - return ord(bs[0]) + unichr = unichr + int2byte = chr - def indexbytes(buf, i): - return ord(buf[i]) + def byte2int(bs): + return ord(bs[0]) - def iterbytes(buf): - return (ord(byte) for byte in buf) + def indexbytes(buf, i): + return ord(buf[i]) - import StringIO - StringIO = BytesIO = StringIO.StringIO + def iterbytes(buf): + return (ord(byte) for byte in buf) + + import StringIO + StringIO = BytesIO = StringIO.StringIO _add_doc(b, """Byte literal""") _add_doc(u, """Text literal""") if PY3: - exec_ = getattr(moves.builtins, "exec") + exec_ = getattr(moves.builtins, "exec") - def reraise(tp, value, tb=None): - if value.__traceback__ is not tb: - raise value.with_traceback(tb) - raise value + def reraise(tp, value, tb=None): + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + raise value else: - def exec_(_code_, _globs_=None, _locs_=None): - """Execute code in a namespace.""" - if _globs_ is None: - frame = sys._getframe(1) - _globs_ = frame.f_globals - if _locs_ is None: - _locs_ = frame.f_locals - del frame - elif _locs_ is None: - _locs_ = _globs_ - exec("""exec _code_ in _globs_, _locs_""") + def exec_(_code_, _globs_=None, _locs_=None): + """Execute code in a namespace.""" + if _globs_ is None: + frame = sys._getframe(1) + _globs_ = frame.f_globals + if _locs_ is None: + _locs_ = frame.f_locals + del frame + elif _locs_ is None: + _locs_ = _globs_ + exec("""exec _code_ in _globs_, _locs_""") - exec_("""def reraise(tp, value, tb=None): + exec_("""def reraise(tp, value, tb=None): raise tp, value, tb """) print_ = getattr(moves.builtins, "print", None) if print_ is None: - def print_(*args, **kwargs): - """The new-style print function for Python 2.4 and 2.5.""" - fp = kwargs.pop("file", sys.stdout) - if fp is None: - return + def print_(*args, **kwargs): + """The new-style print function for Python 2.4 and 2.5.""" + fp = kwargs.pop("file", sys.stdout) + if fp is None: + return - def write(data): - if not isinstance(data, basestring): - data = str(data) - # If the file has an encoding, encode unicode with it. - if (isinstance(fp, file) and isinstance(data, unicode) and fp.encoding is not None): - errors = getattr(fp, "errors", None) - if errors is None: - errors = "strict" - data = data.encode(fp.encoding, errors) - fp.write(data) + def write(data): + if not isinstance(data, basestring): + data = str(data) + # If the file has an encoding, encode unicode with it. + if (isinstance(fp, file) and isinstance(data, unicode) and fp.encoding is not None): + errors = getattr(fp, "errors", None) + if errors is None: + errors = "strict" + data = data.encode(fp.encoding, errors) + fp.write(data) - want_unicode = False - sep = kwargs.pop("sep", None) - if sep is not None: - if isinstance(sep, unicode): - want_unicode = True - elif not isinstance(sep, str): - raise TypeError("sep must be None or a string") - end = kwargs.pop("end", None) - if end is not None: - if isinstance(end, unicode): - want_unicode = True - elif not isinstance(end, str): - raise TypeError("end must be None or a string") - if kwargs: - raise TypeError("invalid keyword arguments to print()") - if not want_unicode: - for arg in args: - if isinstance(arg, unicode): - want_unicode = True - break - if want_unicode: - newline = unicode("\n") - space = unicode(" ") - else: - newline = "\n" - space = " " - if sep is None: - sep = space - if end is None: - end = newline - for i, arg in enumerate(args): - if i: - write(sep) - write(arg) - write(end) + want_unicode = False + sep = kwargs.pop("sep", None) + if sep is not None: + if isinstance(sep, unicode): + want_unicode = True + elif not isinstance(sep, str): + raise TypeError("sep must be None or a string") + end = kwargs.pop("end", None) + if end is not None: + if isinstance(end, unicode): + want_unicode = True + elif not isinstance(end, str): + raise TypeError("end must be None or a string") + if kwargs: + raise TypeError("invalid keyword arguments to print()") + if not want_unicode: + for arg in args: + if isinstance(arg, unicode): + want_unicode = True + break + if want_unicode: + newline = unicode("\n") + space = unicode(" ") + else: + newline = "\n" + space = " " + if sep is None: + sep = space + if end is None: + end = newline + for i, arg in enumerate(args): + if i: + write(sep) + write(arg) + write(end) _add_doc(reraise, """Reraise an exception.""") def with_metaclass(meta, *bases): - """Create a base class with a metaclass.""" - return meta("NewBase", bases, {}) + """Create a base class with a metaclass.""" + return meta("NewBase", bases, {}) def add_metaclass(metaclass): - """Class decorator for creating a class with a metaclass.""" + """Class decorator for creating a class with a metaclass.""" - def wrapper(cls): - orig_vars = cls.__dict__.copy() - orig_vars.pop('__dict__', None) - orig_vars.pop('__weakref__', None) - slots = orig_vars.get('__slots__') - if slots is not None: - if isinstance(slots, str): - slots = [slots] - for slots_var in slots: - orig_vars.pop(slots_var) - return metaclass(cls.__name__, cls.__bases__, orig_vars) + def wrapper(cls): + orig_vars = cls.__dict__.copy() + orig_vars.pop('__dict__', None) + orig_vars.pop('__weakref__', None) + slots = orig_vars.get('__slots__') + if slots is not None: + if isinstance(slots, str): + slots = [slots] + for slots_var in slots: + orig_vars.pop(slots_var) + return metaclass(cls.__name__, cls.__bases__, orig_vars) + + return wrapper - return wrapper # added as part of the RDKit port if PY3: - def cmp(t1, t2): - return (t1 < t2) * -1 or (t1 > t2) * 1 + def cmp(t1, t2): + return (t1 < t2) * -1 or (t1 > t2) * 1 else: - cmp = cmp + cmp = cmp diff --git a/rdkit/sping/PDF/pdfdoc.py b/rdkit/sping/PDF/pdfdoc.py index 4cfc305f2..93bea3554 100755 --- a/rdkit/sping/PDF/pdfdoc.py +++ b/rdkit/sping/PDF/pdfdoc.py @@ -13,7 +13,7 @@ piddlePDF calls pdfgen and offers a high-level interface. (C) Copyright Andy Robinson 1998-1999 """ -from __future__ import print_function + import os import sys import time @@ -167,7 +167,7 @@ class PDFDocument: i = 1 self.xref = [] f.write("%PDF-1.2" + LINEEND) # for CID support - f.write("%íì¶¾" + LINEEND) + f.write("%�춾" + LINEEND) for obj in self.objects: pos = f.tell() self.xref.append(pos) @@ -192,7 +192,7 @@ class PDFDocument: def printPDF(self): "prints it to standard output. Logs positions for doing trailer" print("%PDF-1.0") - print("%íì¶¾") + print("%�춾") i = 1 self.xref = [] for obj in self.objects: diff --git a/rdkit/sping/PDF/pdfgen.py b/rdkit/sping/PDF/pdfgen.py index 59d969bf2..557790dd2 100755 --- a/rdkit/sping/PDF/pdfgen.py +++ b/rdkit/sping/PDF/pdfgen.py @@ -50,7 +50,7 @@ Progress Reports: ## ## ## -from __future__ import print_function + import os import sys import time @@ -63,7 +63,6 @@ from . import pdfutils from . import pdfdoc from . import pdfmetrics from . import pdfgeom -from rdkit.six import string_types class PDFError(ValueError): @@ -576,7 +575,7 @@ class Canvas: return self._currentPageHasImages = 1 - if isinstance(image, string_types): + if isinstance(image, str): if os.path.splitext(image)[1] in ['.jpg', '.JPG']: #directly process JPEG files #open file, needs some error handling!! @@ -1020,7 +1019,7 @@ class PDFTextObject: since this may be indented, by default it trims whitespace off each line and from the beginning; set trim=0 to preserve whitespace.""" - if isinstance(stuff, string_types): + if isinstance(stuff, str): lines = stuff.strip().split('\n') if trim == 1: lines = [s.strip() for s in lines] diff --git a/rdkit/sping/PDF/pdfmetrics.py b/rdkit/sping/PDF/pdfmetrics.py index 64e12dd7e..44b71076d 100755 --- a/rdkit/sping/PDF/pdfmetrics.py +++ b/rdkit/sping/PDF/pdfmetrics.py @@ -23,7 +23,7 @@ Postscript font names within this module, but have not yet done so. 13th June 1999 """ -from __future__ import print_function + import os StandardEnglishFonts = [ diff --git a/rdkit/sping/PDF/pdfutils.py b/rdkit/sping/PDF/pdfutils.py index 3e811335e..576731da4 100755 --- a/rdkit/sping/PDF/pdfutils.py +++ b/rdkit/sping/PDF/pdfutils.py @@ -1,9 +1,8 @@ # pdfutils.py - everything to do with images, streams, # compression, and some constants -from __future__ import print_function + import os from io import StringIO -from rdkit.six import string_types import glob LINEEND = '\015\012' @@ -16,73 +15,73 @@ LINEEND = '\015\012' # ########################################################## def cacheImageFile(filename): - "Processes the image as if for encoding, saves to a file ending in AHX" - from PIL import Image - import zlib - img1 = Image.open(filename) - img = img1.convert('RGB') - imgwidth, imgheight = img.size - code = [] - code.append('BI') # begin image - # this describes what is in the image itself - code.append('/W %s /H %s /BPC 8 /CS /RGB /F [/A85 /Fl]' % (imgwidth, imgheight)) - code.append('ID') - #use a flate filter and Ascii Base 85 - raw = getattr(img, 'tobytes', img.tostring)() - assert len(raw) == imgwidth * imgheight, "Wrong amount of data for image" + "Processes the image as if for encoding, saves to a file ending in AHX" + from PIL import Image + import zlib + img1 = Image.open(filename) + img = img1.convert('RGB') + imgwidth, imgheight = img.size + code = [] + code.append('BI') # begin image + # this describes what is in the image itself + code.append('/W %s /H %s /BPC 8 /CS /RGB /F [/A85 /Fl]' % (imgwidth, imgheight)) + code.append('ID') + # use a flate filter and Ascii Base 85 + raw = getattr(img, 'tobytes', img.tostring)() + assert len(raw) == imgwidth * imgheight, "Wrong amount of data for image" - compressed = zlib.compress(raw) #this bit is very fast... - encoded = _AsciiBase85Encode(compressed) #...sadly this isn't + compressed = zlib.compress(raw) # this bit is very fast... + encoded = _AsciiBase85Encode(compressed) # ...sadly this isn't - #write in blocks of 60 characters per line - outstream = StringIO(encoded) - dataline = outstream.read(60) - while dataline != "": - code.append(dataline) + # write in blocks of 60 characters per line + outstream = StringIO(encoded) dataline = outstream.read(60) + while dataline != "": + code.append(dataline) + dataline = outstream.read(60) - code.append('EI') + code.append('EI') - #save it to a file - cachedname = os.path.splitext(filename)[0] + '.a85' - f = open(cachedname, 'wb') - f.write(LINEEND.join(code) + LINEEND) - f.close() - print('cached image as %s' % cachedname) + # save it to a file + cachedname = os.path.splitext(filename)[0] + '.a85' + f = open(cachedname, 'wb') + f.write(LINEEND.join(code) + LINEEND) + f.close() + print('cached image as %s' % cachedname) def preProcessImages(spec): - """accepts either a filespec ('C:\mydir\*.jpg') or a list - of image filenames, crunches them all to save time. Run this - to save huge amounts of time when repeatedly building image - documents.""" - if isinstance(spec, string_types): - filelist = glob.glob(spec) - else: #list or tuple OK - filelist = spec + """accepts either a filespec ('C:\mydir\*.jpg') or a list + of image filenames, crunches them all to save time. Run this + to save huge amounts of time when repeatedly building image + documents.""" + if isinstance(spec, str): + filelist = glob.glob(spec) + else: # list or tuple OK + filelist = spec - for filename in filelist: - if cachedImageExists(filename): - print('cached version of %s already exists' % filename) - else: - cacheImageFile(filename) + for filename in filelist: + if cachedImageExists(filename): + print('cached version of %s already exists' % filename) + else: + cacheImageFile(filename) def cachedImageExists(filename): - """Determines if a cached image exists which has - the same name and equal or newer date to the given - file.""" - cachedname = os.path.splitext(filename)[0] + '.a85' - if os.path.isfile(cachedname): - #see if it is newer - original_date = os.stat(filename)[8] - cached_date = os.stat(cachedname)[8] - if original_date > cached_date: - return 0 + """Determines if a cached image exists which has + the same name and equal or newer date to the given + file.""" + cachedname = os.path.splitext(filename)[0] + '.a85' + if os.path.isfile(cachedname): + # see if it is newer + original_date = os.stat(filename)[8] + cached_date = os.stat(cachedname)[8] + if original_date > cached_date: + return 0 + else: + return 1 else: - return 1 - else: - return 0 + return 0 ############################################################## @@ -91,228 +90,229 @@ def cachedImageExists(filename): # ############################################################## def _escape(s): - """PDF escapes are almost like Python ones, but brackets - need slashes before them too. Use Python's repr function - and chop off the quotes first""" - s = repr(s)[1:-1] - s = s.replace('(', '\(') - s = s.replace(')', '\)') - return s + """PDF escapes are almost like Python ones, but brackets + need slashes before them too. Use Python's repr function + and chop off the quotes first""" + s = repr(s)[1:-1] + s = s.replace('(', '\(') + s = s.replace(')', '\)') + return s def _normalizeLineEnds(text, desired=LINEEND): - """ensures all instances of CR, LF and CRLF end up as the specified one""" - unlikely = '\000\001\002\003' - text = text.replace('\015\012', unlikely) - text = text.replace('\015', unlikely) - text = text.replace('\012', unlikely) - text = text.replace(unlikely, desired) - return text + """ensures all instances of CR, LF and CRLF end up as the specified one""" + unlikely = '\000\001\002\003' + text = text.replace('\015\012', unlikely) + text = text.replace('\015', unlikely) + text = text.replace('\012', unlikely) + text = text.replace(unlikely, desired) + return text def _AsciiHexEncode(input): - """This is a verbose encoding used for binary data within - a PDF file. One byte binary becomes two bytes of ASCII.""" - "Helper function used by images" - output = StringIO() - for char in input: - output.write('%02x' % ord(char)) - output.write('>') - output.reset() - return output.read() + """This is a verbose encoding used for binary data within + a PDF file. One byte binary becomes two bytes of ASCII.""" + "Helper function used by images" + output = StringIO() + for char in input: + output.write('%02x' % ord(char)) + output.write('>') + output.reset() + return output.read() def _AsciiHexDecode(input): - "Not used except to provide a test of the preceding" - #strip out all whitespace - stripped = ''.join(input.split(), '') - assert stripped[-1] == '>', 'Invalid terminator for Ascii Hex Stream' - stripped = stripped[:-1] #chop off terminator - assert len(stripped) % 2 == 0, 'Ascii Hex stream has odd number of bytes' - i = 0 - output = StringIO() - while i < len(stripped): - twobytes = stripped[i:i + 2] - output.write(chr(eval('0x' + twobytes))) - i = i + 2 - output.reset() - return output.read() + "Not used except to provide a test of the preceding" + # strip out all whitespace + stripped = ''.join(input.split(), '') + assert stripped[-1] == '>', 'Invalid terminator for Ascii Hex Stream' + stripped = stripped[:-1] # chop off terminator + assert len(stripped) % 2 == 0, 'Ascii Hex stream has odd number of bytes' + i = 0 + output = StringIO() + while i < len(stripped): + twobytes = stripped[i:i + 2] + output.write(chr(eval('0x' + twobytes))) + i = i + 2 + output.reset() + return output.read() def _AsciiHexTest(text='What is the average velocity of a sparrow?'): - "Do the obvious test for whether Ascii Hex encoding works" - print('Plain text:', text) - encoded = _AsciiHexEncode(text) - print('Encoded:', encoded) - decoded = _AsciiHexDecode(encoded) - print('Decoded:', decoded) - if decoded == text: - print('Passed') - else: - print('Failed!') + "Do the obvious test for whether Ascii Hex encoding works" + print('Plain text:', text) + encoded = _AsciiHexEncode(text) + print('Encoded:', encoded) + decoded = _AsciiHexDecode(encoded) + print('Decoded:', decoded) + if decoded == text: + print('Passed') + else: + print('Failed!') def _AsciiBase85Encode(input): - """This is a compact encoding used for binary data within - a PDF file. Four bytes of binary data become five bytes of - ASCII. This is the default method used for encoding images.""" - outstream = StringIO() - # special rules apply if not a multiple of four bytes. - whole_word_count, remainder_size = divmod(len(input), 4) - cut = 4 * whole_word_count - body, lastbit = input[0:cut], input[cut:] + """This is a compact encoding used for binary data within + a PDF file. Four bytes of binary data become five bytes of + ASCII. This is the default method used for encoding images.""" + outstream = StringIO() + # special rules apply if not a multiple of four bytes. + whole_word_count, remainder_size = divmod(len(input), 4) + cut = 4 * whole_word_count + body, lastbit = input[0:cut], input[cut:] - for i in range(whole_word_count): - offset = i * 4 - b1 = ord(body[offset]) - b2 = ord(body[offset + 1]) - b3 = ord(body[offset + 2]) - b4 = ord(body[offset + 3]) + for i in range(whole_word_count): + offset = i * 4 + b1 = ord(body[offset]) + b2 = ord(body[offset + 1]) + b3 = ord(body[offset + 2]) + b4 = ord(body[offset + 3]) - num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4 + num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4 - if num == 0: - #special case - outstream.write('z') - else: - #solve for five base-85 numbers - temp, c5 = divmod(num, 85) - temp, c4 = divmod(temp, 85) - temp, c3 = divmod(temp, 85) - c1, c2 = divmod(temp, 85) - assert ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85 * c4) + c5 == num, 'dodgy code!' - outstream.write(chr(c1 + 33)) - outstream.write(chr(c2 + 33)) - outstream.write(chr(c3 + 33)) - outstream.write(chr(c4 + 33)) - outstream.write(chr(c5 + 33)) + if num == 0: + # special case + outstream.write('z') + else: + # solve for five base-85 numbers + temp, c5 = divmod(num, 85) + temp, c4 = divmod(temp, 85) + temp, c3 = divmod(temp, 85) + c1, c2 = divmod(temp, 85) + assert ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + \ + (85 * c4) + c5 == num, 'dodgy code!' + outstream.write(chr(c1 + 33)) + outstream.write(chr(c2 + 33)) + outstream.write(chr(c3 + 33)) + outstream.write(chr(c4 + 33)) + outstream.write(chr(c5 + 33)) - # now we do the final bit at the end. I repeated this separately as - # the loop above is the time-critical part of a script, whereas this - # happens only once at the end. + # now we do the final bit at the end. I repeated this separately as + # the loop above is the time-critical part of a script, whereas this + # happens only once at the end. - #encode however many bytes we have as usual - if remainder_size > 0: - while len(lastbit) < 4: - lastbit = lastbit + '\000' - b1 = ord(lastbit[0]) - b2 = ord(lastbit[1]) - b3 = ord(lastbit[2]) - b4 = ord(lastbit[3]) + # encode however many bytes we have as usual + if remainder_size > 0: + while len(lastbit) < 4: + lastbit = lastbit + '\000' + b1 = ord(lastbit[0]) + b2 = ord(lastbit[1]) + b3 = ord(lastbit[2]) + b4 = ord(lastbit[3]) - num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4 + num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4 - #solve for c1..c5 - temp, c5 = divmod(num, 85) - temp, c4 = divmod(temp, 85) - temp, c3 = divmod(temp, 85) - c1, c2 = divmod(temp, 85) + # solve for c1..c5 + temp, c5 = divmod(num, 85) + temp, c4 = divmod(temp, 85) + temp, c3 = divmod(temp, 85) + c1, c2 = divmod(temp, 85) - #print 'encoding: %d %d %d %d -> %d -> %d %d %d %d %d' % ( - # b1,b2,b3,b4,num,c1,c2,c3,c4,c5) - lastword = chr(c1 + 33) + chr(c2 + 33) + chr(c3 + 33) + chr(c4 + 33) + chr(c5 + 33) - #write out most of the bytes. - outstream.write(lastword[0:remainder_size + 1]) + # print 'encoding: %d %d %d %d -> %d -> %d %d %d %d %d' % ( + # b1,b2,b3,b4,num,c1,c2,c3,c4,c5) + lastword = chr(c1 + 33) + chr(c2 + 33) + chr(c3 + 33) + chr(c4 + 33) + chr(c5 + 33) + # write out most of the bytes. + outstream.write(lastword[0:remainder_size + 1]) - #terminator code for ascii 85 - outstream.write('~>') - outstream.reset() - return outstream.read() + # terminator code for ascii 85 + outstream.write('~>') + outstream.reset() + return outstream.read() def _AsciiBase85Decode(input): - """This is not used - Acrobat Reader decodes for you - but a round - trip is essential for testing.""" - outstream = StringIO() - #strip all whitespace - stripped = ''.join(input.split(), '') - #check end - assert stripped[-2:] == '~>', 'Invalid terminator for Ascii Base 85 Stream' - stripped = stripped[:-2] #chop off terminator + """This is not used - Acrobat Reader decodes for you - but a round + trip is essential for testing.""" + outstream = StringIO() + # strip all whitespace + stripped = ''.join(input.split(), '') + # check end + assert stripped[-2:] == '~>', 'Invalid terminator for Ascii Base 85 Stream' + stripped = stripped[:-2] # chop off terminator - #may have 'z' in it which complicates matters - expand them - stripped = stripped.replace('z', '!!!!!') - # special rules apply if not a multiple of five bytes. - whole_word_count, remainder_size = divmod(len(stripped), 5) - #print '%d words, %d leftover' % (whole_word_count, remainder_size) - assert remainder_size != 1, 'invalid Ascii 85 stream!' - cut = 5 * whole_word_count - body, lastbit = stripped[0:cut], stripped[cut:] + # may have 'z' in it which complicates matters - expand them + stripped = stripped.replace('z', '!!!!!') + # special rules apply if not a multiple of five bytes. + whole_word_count, remainder_size = divmod(len(stripped), 5) + # print '%d words, %d leftover' % (whole_word_count, remainder_size) + assert remainder_size != 1, 'invalid Ascii 85 stream!' + cut = 5 * whole_word_count + body, lastbit = stripped[0:cut], stripped[cut:] - for i in range(whole_word_count): - offset = i * 5 - c1 = ord(body[offset]) - 33 - c2 = ord(body[offset + 1]) - 33 - c3 = ord(body[offset + 2]) - 33 - c4 = ord(body[offset + 3]) - 33 - c5 = ord(body[offset + 4]) - 33 + for i in range(whole_word_count): + offset = i * 5 + c1 = ord(body[offset]) - 33 + c2 = ord(body[offset + 1]) - 33 + c3 = ord(body[offset + 2]) - 33 + c4 = ord(body[offset + 3]) - 33 + c5 = ord(body[offset + 4]) - 33 - num = ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85 * c4) + c5 + num = ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85 * c4) + c5 - temp, b4 = divmod(num, 256) - temp, b3 = divmod(temp, 256) - b1, b2 = divmod(temp, 256) + temp, b4 = divmod(num, 256) + temp, b3 = divmod(temp, 256) + b1, b2 = divmod(temp, 256) - assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!' - outstream.write(chr(b1)) - outstream.write(chr(b2)) - outstream.write(chr(b3)) - outstream.write(chr(b4)) + assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!' + outstream.write(chr(b1)) + outstream.write(chr(b2)) + outstream.write(chr(b3)) + outstream.write(chr(b4)) - #decode however many bytes we have as usual - if remainder_size > 0: - while len(lastbit) < 5: - lastbit = lastbit + '!' - c1 = ord(lastbit[0]) - 33 - c2 = ord(lastbit[1]) - 33 - c3 = ord(lastbit[2]) - 33 - c4 = ord(lastbit[3]) - 33 - c5 = ord(lastbit[4]) - 33 - num = ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85 * c4) + c5 - temp, b4 = divmod(num, 256) - temp, b3 = divmod(temp, 256) - b1, b2 = divmod(temp, 256) - assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!' - #print 'decoding: %d %d %d %d %d -> %d -> %d %d %d %d' % ( - # c1,c2,c3,c4,c5,num,b1,b2,b3,b4) + # decode however many bytes we have as usual + if remainder_size > 0: + while len(lastbit) < 5: + lastbit = lastbit + '!' + c1 = ord(lastbit[0]) - 33 + c2 = ord(lastbit[1]) - 33 + c3 = ord(lastbit[2]) - 33 + c4 = ord(lastbit[3]) - 33 + c5 = ord(lastbit[4]) - 33 + num = ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85 * c4) + c5 + temp, b4 = divmod(num, 256) + temp, b3 = divmod(temp, 256) + b1, b2 = divmod(temp, 256) + assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!' + # print 'decoding: %d %d %d %d %d -> %d -> %d %d %d %d' % ( + # c1,c2,c3,c4,c5,num,b1,b2,b3,b4) - #the last character needs 1 adding; the encoding loses - #data by rounding the number to x bytes, and when - #divided repeatedly we get one less - if remainder_size == 2: - lastword = chr(b1 + 1) - elif remainder_size == 3: - lastword = chr(b1) + chr(b2 + 1) - elif remainder_size == 4: - lastword = chr(b1) + chr(b2) + chr(b3 + 1) - outstream.write(lastword) + # the last character needs 1 adding; the encoding loses + # data by rounding the number to x bytes, and when + # divided repeatedly we get one less + if remainder_size == 2: + lastword = chr(b1 + 1) + elif remainder_size == 3: + lastword = chr(b1) + chr(b2 + 1) + elif remainder_size == 4: + lastword = chr(b1) + chr(b2) + chr(b3 + 1) + outstream.write(lastword) - #terminator code for ascii 85 - outstream.reset() - return outstream.read() + # terminator code for ascii 85 + outstream.reset() + return outstream.read() def _wrap(input, columns=60): - output = [] - length = len(input) - i = 0 - pos = columns * i - while pos < length: - output.append(input[pos:pos + columns]) - i = i + 1 + output = [] + length = len(input) + i = 0 pos = columns * i + while pos < length: + output.append(input[pos:pos + columns]) + i = i + 1 + pos = columns * i - return LINEEND.join(output) + return LINEEND.join(output) def _AsciiBase85Test(text='What is the average velocity of a sparrow?'): - "Do the obvious test for whether Base 85 encoding works" - print('Plain text:', text) - encoded = _AsciiBase85Encode(text) - print('Encoded:', encoded) - decoded = _AsciiBase85Decode(encoded) - print('Decoded:', decoded) - if decoded == text: - print('Passed') - else: - print('Failed!') + "Do the obvious test for whether Base 85 encoding works" + print('Plain text:', text) + encoded = _AsciiBase85Encode(text) + print('Encoded:', encoded) + decoded = _AsciiBase85Decode(encoded) + print('Decoded:', decoded) + if decoded == text: + print('Passed') + else: + print('Failed!') diff --git a/rdkit/sping/PDF/pidPDF.py b/rdkit/sping/PDF/pidPDF.py index ecb4417ea..015081ebe 100755 --- a/rdkit/sping/PDF/pidPDF.py +++ b/rdkit/sping/PDF/pidPDF.py @@ -13,8 +13,8 @@ self.pdf which offers numerous lower-level drawing routines. # except drawFigure, which doesn't behave like PostScript # paths so I left it unchanged. -from __future__ import print_function -#standard python library modules + +# standard python library modules from . import pdfmetrics import glob import os @@ -22,13 +22,12 @@ import types from math import sin, cos, pi, ceil # app specific -from rdkit.six import string_types from rdkit.sping import pagesizes from rdkit.sping.pid import * from . import pdfgen from . import pdfgeom -#edit this is the setting offends you, or set it in the constructor +# edit this is the setting offends you, or set it in the constructor DEFAULT_PAGE_SIZE = pagesizes.A4 #DEFAULT_PAGE_SIZE = pagesizes.letter @@ -48,9 +47,9 @@ font_face_map = { 'arial': 'helvetica' } -#maps a piddle font to a postscript one. +# maps a piddle font to a postscript one. ps_font_map = { - #face, bold, italic -> ps name + # face, bold, italic -> ps name ('times', 0, 0): 'Times-Roman', ('times', 1, 0): 'Times-Bold', ('times', 0, 1): 'Times-Italic', @@ -64,7 +63,7 @@ ps_font_map = { ('helvetica', 0, 1): 'Helvetica-Oblique', ('helvetica', 1, 1): 'Helvetica-BoldOblique', - # there is only one Symbol font + # there is only one Symbol font ('symbol', 0, 0): 'Symbol', ('symbol', 1, 0): 'Symbol', ('symbol', 0, 1): 'Symbol', @@ -85,597 +84,597 @@ ps_font_map = { class PDFCanvas(Canvas): - """This works by accumulating a list of strings containing - PDF page marking operators, as you call its methods. We could - use a big string but this is more efficient - only concatenate - it once, with control over line ends. When - done, it hands off the stream to a PDFPage object.""" - - def __init__(self, size=None, name="pidPDF.pdf", pagesize=DEFAULT_PAGE_SIZE): - #if no extension, add .PDF - root, ext = os.path.splitext(name) - if ext == '': - name = root + '.pdf' - - #create the underlying pdfgen canvas and set some attributes - self.pdf = pdfgen.Canvas(name, pagesize=pagesize, bottomup=0) # add pagesize to constructor - # by default do not use comrpression (mod by cwl, may not be necessary w/ newer pdfgen) - self.pdf.setPageCompression(0) - - self.pdf.setLineCap(2) - #now call super init, which will trigger - #calls into self.pdf - - Canvas.__init__(self, size=size, name=name) - - #memorize stuff - self.pagesize = pagesize - self.filename = name - - # self.pdf.setPageSize(pagesize) # This doesn't seem to work correctly -cwl - if size == None: - #take the page size, which might not be default - self.drawingsize = self.pagesize - else: - #convenience for other platformslike GUI views - #we let them centre a smaller drawing in a page - self.drawingsize = size - - self.pageTransitionString = '' - self.pageNumber = 1 # keep a count - - #if they specified a size smaller than page, - # be helpful and centre their diagram - if self.pagesize != self.drawingsize: - dx = 0.5 * (self.pagesize[0] - self.drawingsize[0]) - dy = 0.5 * (self.pagesize[1] - self.drawingsize[1]) - self.pdf.translate(dx, dy) - - def _resetDefaults(self): - """Only used in setup - persist from page to page""" - self.defaultLineColor = black - self.defaultFillColor = transparent - self.defaultLineWidth = 1 - self.defaultFont = Font() - self.pdf.setLineCap(2) - - def showPage(self): - """ensure basic settings are the same after a page break""" - self.pdf.showPage() - self.defaultFont = self.defaultFont - self.defaultLineColor = self.defaultLineColor - self.defaultFillColor = self.defaultFillColor - self.defaultLineWidth = self.defaultLineWidth - self.pdf.setLineCap(2) - - #------------ canvas capabilities ------------- - - def isInteractive(self): - return 0 - - def canUpdate(self): - return 0 - - #------------ general management ------------- - def clear(self): - "Not wll defined for file formats, use same as ShowPage" - self.showPage() - - def flush(self): - pass - - def save(self, file=None, format=None): - """Saves the file. If holding data, do - a showPage() to save them having to.""" - - if self.pdf.pageHasData(): - self.pdf.showPage() - - if hasattr(file, 'write'): - self.pdf.save(fileobj=file) - elif isinstance(file, string_types): - self.pdf.save(filename=file) - else: - self.pdf.save() - - def setInfoLine(self, s): - self.pdf.setTitle(s) - - #-------------handle assignment to defaultXXX------- - - def __setattr__(self, key, value): - #we let it happen... - self.__dict__[key] = value - #...but take action if needed - if key == "defaultLineColor": - self._updateLineColor(value) - elif key == "defaultLineWidth": - self._updateLineWidth(value) - elif key == "defaultFillColor": - self._updateFillColor(value) - elif key == "defaultFont": - self._updateFont(value) - - def _updateLineColor(self, color): - """Triggered when someone assigns to defaultLineColor""" - self.pdf.setStrokeColorRGB(color.red, color.green, color.blue) - - def _updateFillColor(self, color): - """Triggered when someone assigns to defaultFillColor""" - self.pdf.setFillColorRGB(color.red, color.green, color.blue) - - def _updateLineWidth(self, width): - """Triggered when someone assigns to defaultLineWidth""" - self.pdf.setLineWidth(width) - - def _updateFont(self, font): - """Triggered when someone assigns to defaultFont""" - psfont = self._findPostScriptFontName(font) - self.pdf.setFont(psfont, font.size) - - def _findPostScriptFontName(self, font): - """Attempts to return proper font name.""" - - #step 1 - no face ends up serif, others are lowercased - if not font.face: - face = 'serif' - else: - face = font.face.lower() - while face in font_face_map: - face = font_face_map[face] - #step 2, - resolve bold/italic to get the right PS font name - psname = ps_font_map[(face, font.bold, font.italic)] - return psname - - def _escape(self, s): - """PDF escapes are like Python ones, but brackets need slashes before them too. - Use Python's repr function and chop off the quotes first""" - s = repr(s)[1:-1] - s = s.replace('(', '\(') - s = s.replace(')', '\)') - return s - - def resetDefaults(self): - """If you drop down to a lower level, PIDDLE can lose - track of the current graphics state. Calling this after - wards ensures that the canvas is updated to the same - defaults as PIDDLE thinks they should be.""" - self.defaultFont = self.defaultFont - self.defaultLineColor = self.defaultLineColor - self.defaultFillColor = self.defaultFillColor - self.defaultLineWidth = self.defaultLineWidth - #------------ string/font info ------------ - - def stringWidth(self, s, font=None): - "Return the logical width of the string if it were drawn \ - in the current font (defaults to self.font)." - - if not font: - font = self.defaultFont - fontname = self._findPostScriptFontName(font) - return pdfmetrics.stringwidth(s, fontname) * font.size * 0.001 - - def fontHeight(self, font=None): - if not font: - font = self.defaultFont - return font.size - - def fontAscent(self, font=None): - if not font: - font = self.defaultFont - fontname = self._findPostScriptFontName(font) - return pdfmetrics.ascent_descent[fontname][0] * 0.001 * font.size - - def fontDescent(self, font=None): - if not font: - font = self.defaultFont - fontname = self._findPostScriptFontName(font) - return -pdfmetrics.ascent_descent[fontname][1] * 0.001 * font.size - - #------------- drawing helpers -------------- - def _endPath(self, path, edgeColor, fillColor): - """in PIDDLE, the edge and fil colors might be transparent, - and might also be None, in which case they should be taken - from the defaults. This leads to a standard 10 lines of code - when closing each shape, which are wrapped up here. Use - these if you implement new PIDDLE shapes.""" - #allow for transparent fills and lines - fill = fillColor or self.defaultFillColor - edge = edgeColor or self.defaultLineColor - if (fill == transparent and edge == transparent): - pass - else: - self.pdf.drawPath(path, - (edge != transparent), #whether to stroke - (fill != transparent) #whether to fill - ) - - #------------- drawing methods -------------- - - def drawLine(self, x1, y1, x2, y2, color=None, width=None, dash=None, **kwargs): - """Calls the underlying methods in pdfgen.canvas. For the - highest performance, use canvas.setDefaultFont and - canvas.setLineWidth, and draw batches of similar - lines together.""" - #set the state if needed - if color: - self._updateLineColor(color) - if width: - self._updateLineWidth(width) - - # now do the work - self.pdf.line(x1, y1, x2, y2) - - #now reset state if needed - if color: - self._updateLineColor(self.defaultLineColor) - if width: - self._updateLineWidth(self.defaultLineWidth) - - def drawLines(self, lineList, color=None, width=None, dash=None, **kwargs): - """Draws several distinct lines, all with same color - and width, efficiently""" - if color: - self._updateLineColor(color) - if width: - self._updateLineWidth(width) - - self.pdf.lines(lineList) - - if color: - self._updateLineColor(self.defaultLineColor) - if width: - self._updateLineWidth(self.defaultLineWidth) - - def drawString(self, s, x, y, font=None, color=None, angle=0, **kwargs): - """As it says, but many options to process. It translates - user space rather than text space, in case underlining is - needed on rotated text. It cheats and does literals - for efficiency, avoiding changing the python graphics state.""" - self.pdf.addLiteral('%begin drawString') - col = color or self.defaultLineColor - if col != transparent: - if '\n' in s or '\r' in s: - #normalize line ends - s = s.replace('\r\n', '\n') - s = s.replace('\n\r', '\n') - lines = s.split('\n') - else: - lines = [s] - fnt = font or self.defaultFont - self._updateFont(fnt) - text = self.pdf._escape(s) - - # start of Chris's hacking - # inserting basic commands here to see if can get working - textobj = self.pdf.beginText() - - if col != self.defaultFillColor: - textobj.setFillColorRGB(col.red, col.green, col.blue) - - if angle != 0: - co = cos(angle * pi / 180.0) - si = sin(angle * pi / 180.0) - textobj.setTextTransform(co, -si, si, co, x, y) #top down coords so reverse angle - else: - textobj.setTextOrigin(x, y) - - for line in lines: - #keep underlining separate - it is slow and unusual anyway - if fnt.underline: - #breaks on angled text - FIXME - ycursor = textobj.getY() # returns offset from last set origin - dy = 0.5 * self.fontDescent(fnt) - width = self.stringWidth(line, fnt) - linewidth = fnt.size * 0.1 - - self.pdf.saveState() - self.pdf.setLineWidth(linewidth) - self.pdf.translate(x, y) # need to translate first before rotate - if angle != 0: - self.pdf.rotate(-angle) - self.pdf.translate(0, ycursor - y) #move down to start of current text line - self.pdf.line(0, dy, width, dy) - self.pdf.restoreState() - lasty = ycursor - textobj.textLine(line) # adds text to textobj, advances getY's cursor - # finally actually send text object to the page - self.pdf.drawText(textobj) # draw all the text afterwards? Doesn't seem right - self.pdf.addLiteral('%end drawString') - # done wth drawString() - - def drawCurve(self, x1, y1, x2, y2, x3, y3, x4, y4, edgeColor=None, edgeWidth=None, - fillColor=None, closed=0, dash=None, **kwargs): - """This could do two totally different things. If not closed, - just does a bezier curve so fill is irrelevant. If closed, - it is actually a filled shape.""" - if closed: - if edgeColor: - self._updateLineColor(edgeColor) - if edgeWidth: - self._updateLineWidth(edgeWidth) - if fillColor: - self._updateFillColor(fillColor) - - p = self.pdf.beginPath() - p.moveTo(x1, y1) - p.curveTo(x2, y2, x3, y3, x4, y4) - p.close() - self._endPath(p, edgeColor, fillColor) #handles case of transparency - - if edgeColor: - self._updateLineColor(self.defaultLineColor) - if edgeWidth: - self._updateLineWidth(self.defaultLineWidth) - if fillColor: - self._updateFillColor(self.defaultFillColor) - else: - #just a plain old line segment - if edgeColor: - self._updateLineColor(edgeColor) - if edgeWidth: - self._updateLineWidth(edgeWidth) - - self.pdf.bezier(x1, y1, x2, y2, x3, y3, x4, y4) - - if edgeColor: - self._updateLineColor(self.defaultLineColor) - if edgeWidth: - self._updateLineWidth(self.defaultLineWidth) - - def drawRect(self, x1, y1, x2, y2, edgeColor=None, edgeWidth=None, fillColor=None, dash=None, - **kwargs): - if edgeColor: - self._updateLineColor(edgeColor) - if edgeWidth: - self._updateLineWidth(edgeWidth) - if fillColor: - self._updateFillColor(fillColor) - - p = self.pdf.beginPath() - p.rect(x1, y1, x2 - x1, y2 - y1) - self._endPath(p, edgeColor, fillColor) #handles case of transparency - - if edgeColor: - self._updateLineColor(self.defaultLineColor) - if edgeWidth: - self._updateLineWidth(self.defaultLineWidth) - if fillColor: - self._updateFillColor(self.defaultFillColor) - - #drawRoundRect is inherited - cannot really improve on that one, - #and figures are quite efficient now. - def drawEllipse(self, x1, y1, x2, y2, edgeColor=None, edgeWidth=None, fillColor=None, dash=None, - **kwargs): - if edgeColor: - self._updateLineColor(edgeColor) - if edgeWidth: - self._updateLineWidth(edgeWidth) - if fillColor: - self._updateFillColor(fillColor) - - p = self.pdf.beginPath() - p.ellipse(x1, y1, x2 - x1, y2 - y1) - self._endPath(p, edgeColor, fillColor) #handles case of transparency - - if edgeColor: - self._updateLineColor(self.defaultLineColor) - if edgeWidth: - self._updateLineWidth(self.defaultLineWidth) - if fillColor: - self._updateFillColor(self.defaultFillColor) - - def drawArc(self, x1, y1, x2, y2, startAng=0, extent=90, edgeColor=None, edgeWidth=None, - fillColor=None, dash=None, **kwargs): - """This draws a PacMan-type shape connected to the centre. One - idiosyncracy - if you specify an edge color, it apples to the - outer curved rim but not the radial edges.""" - if edgeColor: - self._updateLineColor(edgeColor) - if edgeWidth: - self._updateLineWidth(edgeWidth) - if fillColor: - self._updateFillColor(fillColor) - # I need to do some more work on flipping the coordinate system - - # in pdfgen - note the angle reversal needed when drawing top-down. - pointList = pdfgeom.bezierArc(x1, y1, x2, y2, -startAng, -extent) - start = pointList[0] - end = pointList[-1] - x_cen = 0.5 * (x1 + x2) - y_cen = 0.5 * (y1 + y2) - - #first the fill - p = self.pdf.beginPath() - p.moveTo(x_cen, y_cen) - p.lineTo(start[0], start[1]) - for curve in pointList: - p.curveTo(curve[2], curve[3], curve[4], curve[5], curve[6], curve[7]) - p.close() #back to centre - self._endPath(p, transparent, fillColor) #handles case of transparency - #now the outer rim - p2 = self.pdf.beginPath() - p2.moveTo(start[0], start[1]) - for curve in pointList: - p2.curveTo(curve[2], curve[3], curve[4], curve[5], curve[6], curve[7]) - self._endPath(p2, edgeColor, transparent) #handles case of transparency - - if edgeColor: - self._updateLineColor(self.defaultLineColor) - if edgeWidth: - self._updateLineWidth(self.defaultLineWidth) - if fillColor: - self._updateFillColor(self.defaultFillColor) - - def drawPolygon(self, pointlist, edgeColor=None, edgeWidth=None, fillColor=None, closed=0, - dash=None, **kwargs): - """As it says. Easy with paths!""" - if edgeColor: - self._updateLineColor(edgeColor) - if edgeWidth: - self._updateLineWidth(edgeWidth) - if fillColor: - self._updateFillColor(fillColor) - - p = self.pdf.beginPath() - p.moveTo(pointlist[0][0], pointlist[0][1]) - for point in pointlist[1:]: - p.lineTo(point[0], point[1]) - if closed: - p.close() - - self._endPath(p, edgeColor, fillColor) #handles case of transparency - - if edgeColor: - self._updateLineColor(self.defaultLineColor) - if edgeWidth: - self._updateLineWidth(self.defaultLineWidth) - if fillColor: - self._updateFillColor(self.defaultFillColor) - -## def drawFigure(self, partList, -## edgeColor=None, edgeWidth=None, fillColor=None, closed=0): -## """This is PIDDLE's attempt at Postscript paths. Due to -## necessary limitations in the algorithm, if the start and end -## points are not connected but closed=1, then you get the full shape -## filled but the final line segment does not join up. I have to -## do extra work to simulate this.""" -## -## if edgeColor: -## self._updateLineColor(edgeColor) -## if edgeWidth: -## self._updateLineWidth(edgeWidth) -## if fillColor: -## self._updateFillColor(fillColor) + """This works by accumulating a list of strings containing + PDF page marking operators, as you call its methods. We could + use a big string but this is more efficient - only concatenate + it once, with control over line ends. When + done, it hands off the stream to a PDFPage object.""" + + def __init__(self, size=None, name="pidPDF.pdf", pagesize=DEFAULT_PAGE_SIZE): + # if no extension, add .PDF + root, ext = os.path.splitext(name) + if ext == '': + name = root + '.pdf' + + # create the underlying pdfgen canvas and set some attributes + self.pdf = pdfgen.Canvas(name, pagesize=pagesize, bottomup=0) # add pagesize to constructor + # by default do not use comrpression (mod by cwl, may not be necessary w/ newer pdfgen) + self.pdf.setPageCompression(0) + + self.pdf.setLineCap(2) + # now call super init, which will trigger + # calls into self.pdf + + Canvas.__init__(self, size=size, name=name) + + # memorize stuff + self.pagesize = pagesize + self.filename = name + + # self.pdf.setPageSize(pagesize) # This doesn't seem to work correctly -cwl + if size == None: + # take the page size, which might not be default + self.drawingsize = self.pagesize + else: + # convenience for other platformslike GUI views + # we let them centre a smaller drawing in a page + self.drawingsize = size + + self.pageTransitionString = '' + self.pageNumber = 1 # keep a count + + # if they specified a size smaller than page, + # be helpful and centre their diagram + if self.pagesize != self.drawingsize: + dx = 0.5 * (self.pagesize[0] - self.drawingsize[0]) + dy = 0.5 * (self.pagesize[1] - self.drawingsize[1]) + self.pdf.translate(dx, dy) + + def _resetDefaults(self): + """Only used in setup - persist from page to page""" + self.defaultLineColor = black + self.defaultFillColor = transparent + self.defaultLineWidth = 1 + self.defaultFont = Font() + self.pdf.setLineCap(2) + + def showPage(self): + """ensure basic settings are the same after a page break""" + self.pdf.showPage() + self.defaultFont = self.defaultFont + self.defaultLineColor = self.defaultLineColor + self.defaultFillColor = self.defaultFillColor + self.defaultLineWidth = self.defaultLineWidth + self.pdf.setLineCap(2) + + # ------------ canvas capabilities ------------- + + def isInteractive(self): + return 0 + + def canUpdate(self): + return 0 + + # ------------ general management ------------- + def clear(self): + "Not wll defined for file formats, use same as ShowPage" + self.showPage() + + def flush(self): + pass + + def save(self, file=None, format=None): + """Saves the file. If holding data, do + a showPage() to save them having to.""" + + if self.pdf.pageHasData(): + self.pdf.showPage() + + if hasattr(file, 'write'): + self.pdf.save(fileobj=file) + elif isinstance(file, str): + self.pdf.save(filename=file) + else: + self.pdf.save() + + def setInfoLine(self, s): + self.pdf.setTitle(s) + + # -------------handle assignment to defaultXXX------- + + def __setattr__(self, key, value): + # we let it happen... + self.__dict__[key] = value + # ...but take action if needed + if key == "defaultLineColor": + self._updateLineColor(value) + elif key == "defaultLineWidth": + self._updateLineWidth(value) + elif key == "defaultFillColor": + self._updateFillColor(value) + elif key == "defaultFont": + self._updateFont(value) + + def _updateLineColor(self, color): + """Triggered when someone assigns to defaultLineColor""" + self.pdf.setStrokeColorRGB(color.red, color.green, color.blue) + + def _updateFillColor(self, color): + """Triggered when someone assigns to defaultFillColor""" + self.pdf.setFillColorRGB(color.red, color.green, color.blue) + + def _updateLineWidth(self, width): + """Triggered when someone assigns to defaultLineWidth""" + self.pdf.setLineWidth(width) + + def _updateFont(self, font): + """Triggered when someone assigns to defaultFont""" + psfont = self._findPostScriptFontName(font) + self.pdf.setFont(psfont, font.size) + + def _findPostScriptFontName(self, font): + """Attempts to return proper font name.""" + + # step 1 - no face ends up serif, others are lowercased + if not font.face: + face = 'serif' + else: + face = font.face.lower() + while face in font_face_map: + face = font_face_map[face] + # step 2, - resolve bold/italic to get the right PS font name + psname = ps_font_map[(face, font.bold, font.italic)] + return psname + + def _escape(self, s): + """PDF escapes are like Python ones, but brackets need slashes before them too. + Use Python's repr function and chop off the quotes first""" + s = repr(s)[1:-1] + s = s.replace('(', '\(') + s = s.replace(')', '\)') + return s + + def resetDefaults(self): + """If you drop down to a lower level, PIDDLE can lose + track of the current graphics state. Calling this after + wards ensures that the canvas is updated to the same + defaults as PIDDLE thinks they should be.""" + self.defaultFont = self.defaultFont + self.defaultLineColor = self.defaultLineColor + self.defaultFillColor = self.defaultFillColor + self.defaultLineWidth = self.defaultLineWidth + # ------------ string/font info ------------ + + def stringWidth(self, s, font=None): + "Return the logical width of the string if it were drawn \ + in the current font (defaults to self.font)." + + if not font: + font = self.defaultFont + fontname = self._findPostScriptFontName(font) + return pdfmetrics.stringwidth(s, fontname) * font.size * 0.001 + + def fontHeight(self, font=None): + if not font: + font = self.defaultFont + return font.size + + def fontAscent(self, font=None): + if not font: + font = self.defaultFont + fontname = self._findPostScriptFontName(font) + return pdfmetrics.ascent_descent[fontname][0] * 0.001 * font.size + + def fontDescent(self, font=None): + if not font: + font = self.defaultFont + fontname = self._findPostScriptFontName(font) + return -pdfmetrics.ascent_descent[fontname][1] * 0.001 * font.size + + # ------------- drawing helpers -------------- + def _endPath(self, path, edgeColor, fillColor): + """in PIDDLE, the edge and fil colors might be transparent, + and might also be None, in which case they should be taken + from the defaults. This leads to a standard 10 lines of code + when closing each shape, which are wrapped up here. Use + these if you implement new PIDDLE shapes.""" + # allow for transparent fills and lines + fill = fillColor or self.defaultFillColor + edge = edgeColor or self.defaultLineColor + if (fill == transparent and edge == transparent): + pass + else: + self.pdf.drawPath(path, + (edge != transparent), # whether to stroke + (fill != transparent) # whether to fill + ) + + # ------------- drawing methods -------------- + + def drawLine(self, x1, y1, x2, y2, color=None, width=None, dash=None, **kwargs): + """Calls the underlying methods in pdfgen.canvas. For the + highest performance, use canvas.setDefaultFont and + canvas.setLineWidth, and draw batches of similar + lines together.""" + # set the state if needed + if color: + self._updateLineColor(color) + if width: + self._updateLineWidth(width) + + # now do the work + self.pdf.line(x1, y1, x2, y2) + + # now reset state if needed + if color: + self._updateLineColor(self.defaultLineColor) + if width: + self._updateLineWidth(self.defaultLineWidth) + + def drawLines(self, lineList, color=None, width=None, dash=None, **kwargs): + """Draws several distinct lines, all with same color + and width, efficiently""" + if color: + self._updateLineColor(color) + if width: + self._updateLineWidth(width) + + self.pdf.lines(lineList) + + if color: + self._updateLineColor(self.defaultLineColor) + if width: + self._updateLineWidth(self.defaultLineWidth) + + def drawString(self, s, x, y, font=None, color=None, angle=0, **kwargs): + """As it says, but many options to process. It translates + user space rather than text space, in case underlining is + needed on rotated text. It cheats and does literals + for efficiency, avoiding changing the python graphics state.""" + self.pdf.addLiteral('%begin drawString') + col = color or self.defaultLineColor + if col != transparent: + if '\n' in s or '\r' in s: + # normalize line ends + s = s.replace('\r\n', '\n') + s = s.replace('\n\r', '\n') + lines = s.split('\n') + else: + lines = [s] + fnt = font or self.defaultFont + self._updateFont(fnt) + text = self.pdf._escape(s) + + # start of Chris's hacking + # inserting basic commands here to see if can get working + textobj = self.pdf.beginText() + + if col != self.defaultFillColor: + textobj.setFillColorRGB(col.red, col.green, col.blue) + + if angle != 0: + co = cos(angle * pi / 180.0) + si = sin(angle * pi / 180.0) + textobj.setTextTransform(co, -si, si, co, x, y) # top down coords so reverse angle + else: + textobj.setTextOrigin(x, y) + + for line in lines: + # keep underlining separate - it is slow and unusual anyway + if fnt.underline: + # breaks on angled text - FIXME + ycursor = textobj.getY() # returns offset from last set origin + dy = 0.5 * self.fontDescent(fnt) + width = self.stringWidth(line, fnt) + linewidth = fnt.size * 0.1 + + self.pdf.saveState() + self.pdf.setLineWidth(linewidth) + self.pdf.translate(x, y) # need to translate first before rotate + if angle != 0: + self.pdf.rotate(-angle) + self.pdf.translate(0, ycursor - y) # move down to start of current text line + self.pdf.line(0, dy, width, dy) + self.pdf.restoreState() + lasty = ycursor + textobj.textLine(line) # adds text to textobj, advances getY's cursor + # finally actually send text object to the page + self.pdf.drawText(textobj) # draw all the text afterwards? Doesn't seem right + self.pdf.addLiteral('%end drawString') + # done wth drawString() + + def drawCurve(self, x1, y1, x2, y2, x3, y3, x4, y4, edgeColor=None, edgeWidth=None, + fillColor=None, closed=0, dash=None, **kwargs): + """This could do two totally different things. If not closed, + just does a bezier curve so fill is irrelevant. If closed, + it is actually a filled shape.""" + if closed: + if edgeColor: + self._updateLineColor(edgeColor) + if edgeWidth: + self._updateLineWidth(edgeWidth) + if fillColor: + self._updateFillColor(fillColor) + + p = self.pdf.beginPath() + p.moveTo(x1, y1) + p.curveTo(x2, y2, x3, y3, x4, y4) + p.close() + self._endPath(p, edgeColor, fillColor) # handles case of transparency + + if edgeColor: + self._updateLineColor(self.defaultLineColor) + if edgeWidth: + self._updateLineWidth(self.defaultLineWidth) + if fillColor: + self._updateFillColor(self.defaultFillColor) + else: + # just a plain old line segment + if edgeColor: + self._updateLineColor(edgeColor) + if edgeWidth: + self._updateLineWidth(edgeWidth) + + self.pdf.bezier(x1, y1, x2, y2, x3, y3, x4, y4) + + if edgeColor: + self._updateLineColor(self.defaultLineColor) + if edgeWidth: + self._updateLineWidth(self.defaultLineWidth) + + def drawRect(self, x1, y1, x2, y2, edgeColor=None, edgeWidth=None, fillColor=None, dash=None, + **kwargs): + if edgeColor: + self._updateLineColor(edgeColor) + if edgeWidth: + self._updateLineWidth(edgeWidth) + if fillColor: + self._updateFillColor(fillColor) + + p = self.pdf.beginPath() + p.rect(x1, y1, x2 - x1, y2 - y1) + self._endPath(p, edgeColor, fillColor) # handles case of transparency + + if edgeColor: + self._updateLineColor(self.defaultLineColor) + if edgeWidth: + self._updateLineWidth(self.defaultLineWidth) + if fillColor: + self._updateFillColor(self.defaultFillColor) + + # drawRoundRect is inherited - cannot really improve on that one, + # and figures are quite efficient now. + def drawEllipse(self, x1, y1, x2, y2, edgeColor=None, edgeWidth=None, fillColor=None, dash=None, + **kwargs): + if edgeColor: + self._updateLineColor(edgeColor) + if edgeWidth: + self._updateLineWidth(edgeWidth) + if fillColor: + self._updateFillColor(fillColor) + + p = self.pdf.beginPath() + p.ellipse(x1, y1, x2 - x1, y2 - y1) + self._endPath(p, edgeColor, fillColor) # handles case of transparency + + if edgeColor: + self._updateLineColor(self.defaultLineColor) + if edgeWidth: + self._updateLineWidth(self.defaultLineWidth) + if fillColor: + self._updateFillColor(self.defaultFillColor) + + def drawArc(self, x1, y1, x2, y2, startAng=0, extent=90, edgeColor=None, edgeWidth=None, + fillColor=None, dash=None, **kwargs): + """This draws a PacMan-type shape connected to the centre. One + idiosyncracy - if you specify an edge color, it apples to the + outer curved rim but not the radial edges.""" + if edgeColor: + self._updateLineColor(edgeColor) + if edgeWidth: + self._updateLineWidth(edgeWidth) + if fillColor: + self._updateFillColor(fillColor) + # I need to do some more work on flipping the coordinate system - + # in pdfgen - note the angle reversal needed when drawing top-down. + pointList = pdfgeom.bezierArc(x1, y1, x2, y2, -startAng, -extent) + start = pointList[0] + end = pointList[-1] + x_cen = 0.5 * (x1 + x2) + y_cen = 0.5 * (y1 + y2) + + # first the fill + p = self.pdf.beginPath() + p.moveTo(x_cen, y_cen) + p.lineTo(start[0], start[1]) + for curve in pointList: + p.curveTo(curve[2], curve[3], curve[4], curve[5], curve[6], curve[7]) + p.close() # back to centre + self._endPath(p, transparent, fillColor) # handles case of transparency + # now the outer rim + p2 = self.pdf.beginPath() + p2.moveTo(start[0], start[1]) + for curve in pointList: + p2.curveTo(curve[2], curve[3], curve[4], curve[5], curve[6], curve[7]) + self._endPath(p2, edgeColor, transparent) # handles case of transparency + + if edgeColor: + self._updateLineColor(self.defaultLineColor) + if edgeWidth: + self._updateLineWidth(self.defaultLineWidth) + if fillColor: + self._updateFillColor(self.defaultFillColor) + + def drawPolygon(self, pointlist, edgeColor=None, edgeWidth=None, fillColor=None, closed=0, + dash=None, **kwargs): + """As it says. Easy with paths!""" + if edgeColor: + self._updateLineColor(edgeColor) + if edgeWidth: + self._updateLineWidth(edgeWidth) + if fillColor: + self._updateFillColor(fillColor) + + p = self.pdf.beginPath() + p.moveTo(pointlist[0][0], pointlist[0][1]) + for point in pointlist[1:]: + p.lineTo(point[0], point[1]) + if closed: + p.close() + + self._endPath(p, edgeColor, fillColor) # handles case of transparency + + if edgeColor: + self._updateLineColor(self.defaultLineColor) + if edgeWidth: + self._updateLineWidth(self.defaultLineWidth) + if fillColor: + self._updateFillColor(self.defaultFillColor) + +# def drawFigure(self, partList, +# edgeColor=None, edgeWidth=None, fillColor=None, closed=0): +# """This is PIDDLE's attempt at Postscript paths. Due to +# necessary limitations in the algorithm, if the start and end +# points are not connected but closed=1, then you get the full shape +# filled but the final line segment does not join up. I have to +# do extra work to simulate this.""" ## -## p1 = self.pdf.beginPath() #use for the fill (i.e. closed) -## p2 = self.pdf.beginPath() #use for the edge (may not be closed) +# if edgeColor: +# self._updateLineColor(edgeColor) +# if edgeWidth: +# self._updateLineWidth(edgeWidth) +# if fillColor: +# self._updateFillColor(fillColor) ## -## #move to first point +# p1 = self.pdf.beginPath() #use for the fill (i.e. closed) +# p2 = self.pdf.beginPath() #use for the edge (may not be closed) +## +# move to first point ## start = (partList[0][1:3]) ## end = None ## p1.moveTo(start[0], start[1]) ## p2.moveTo(start[0], start[1]) -## -## for tuple in partList: +## +# for tuple in partList: ## op = tuple[0] ## args = list(tuple[1:]) ## start = args[0:2] -## # lineTo the start if not coincident with end of last segment -## if start != end: +# lineTo the start if not coincident with end of last segment +# if start != end: ## p1.lineTo(start[0], start[1]) ## p2.lineTo(start[0], start[1]) -## -## #now draw appropriate segment -## if op == figureLine: +## +# now draw appropriate segment +# if op == figureLine: ## p1.lineTo(args[2], args[3]) ## p2.lineTo(args[2], args[3]) ## end = args[2:4] -## elif op == figureArc: -## #p1.arcTo(args[0], args[1], args[2], args[3], args[4], args[5]) -## #p2.arcTo(args[0], args[1], args[2], args[3], args[4], args[5]) +# elif op == figureArc: +# p1.arcTo(args[0], args[1], args[2], args[3], args[4], args[5]) +# p2.arcTo(args[0], args[1], args[2], args[3], args[4], args[5]) ## p1.arc(args[0], args[1], args[2], args[3], args[4], args[5]) ## p2.arc(args[0], args[1], args[2], args[3], args[4], args[5]) ## end = args[2:4] -## elif op == figureCurve: +# elif op == figureCurve: ## p1.curveTo(args[2], args[3], args[4], args[5], args[6], args[7]) ## p2.curveTo(args[2], args[3], args[4], args[5], args[6], args[7]) ## end = args[6:8] -## else: -## raise TypeError, "unknown figure operator: " + op +# else: +# raise TypeError, "unknown figure operator: " + op +## +# now for the weirdness +# p1.close() +# if closed: +# p2.close() +# print 'closed edge path' +# print 'inner path p1:' + p1.getCode() +# print 'outer path p2:' + p2.getCode() ## -## #now for the weirdness -## p1.close() -## if closed: -## p2.close() -## print 'closed edge path' -## print 'inner path p1:' + p1.getCode() -## print 'outer path p2:' + p2.getCode() -## ## self._endPath(p1, transparent, fillColor) ## self._endPath(p2, edgeColor, transparent) -## -## if edgeColor: -## self._updateLineColor(self.defaultLineColor) -## if edgeWidth: -## self._updateLineWidth(self.defaultLineWidth) -## if fillColor: -## self._updateFillColor(self.defaultFillColor) +## +# if edgeColor: +# self._updateLineColor(self.defaultLineColor) +# if edgeWidth: +# self._updateLineWidth(self.defaultLineWidth) +# if fillColor: +# self._updateFillColor(self.defaultFillColor) - def drawImage(self, image, x1, y1, x2=None, y2=None, **kwargs): - """Draw a PIL Image or image filename into the specified rectangle. - If x2 and y2 are omitted, they are calculated from the image size. - """ - # chris starts meddling here -cwl - # piddle only takes PIL images - im_width, im_height = image.size - if not x2: - x2 = x1 + im_width - if not y2: - y2 = y1 + im_height + def drawImage(self, image, x1, y1, x2=None, y2=None, **kwargs): + """Draw a PIL Image or image filename into the specified rectangle. + If x2 and y2 are omitted, they are calculated from the image size. + """ + # chris starts meddling here -cwl + # piddle only takes PIL images + im_width, im_height = image.size + if not x2: + x2 = x1 + im_width + if not y2: + y2 = y1 + im_height - self.pdf.saveState() # I'm changing coordinates to isolate the problem -cwl + self.pdf.saveState() # I'm changing coordinates to isolate the problem -cwl - self.pdf.translate(x1, y1) - self.pdf.drawInlineImage(image, 0, 0, abs(x1 - x2), abs(y1 - y2)) + self.pdf.translate(x1, y1) + self.pdf.drawInlineImage(image, 0, 0, abs(x1 - x2), abs(y1 - y2)) - self.pdf.restoreState() + self.pdf.restoreState() - ### original code below -cwl - #the underlying canvas uses a bott-up coord system, so flips things - #if x2: - #width = abs(x2 - x1) - #x = min(x1, x2) - #if y2: - #height = abs(y2 - y1) - #y = min(y1, y2) - #self.pdf.drawInlineImage(image, x, y, width, height) + # original code below -cwl + # the underlying canvas uses a bott-up coord system, so flips things + # if x2: + #width = abs(x2 - x1) + #x = min(x1, x2) + # if y2: + #height = abs(y2 - y1) + #y = min(y1, y2) + #self.pdf.drawInlineImage(image, x, y, width, height) - ########################################################## - # - # non-standard extensions outside Piddle API - # - ########################################################## + ########################################################## + # + # non-standard extensions outside Piddle API + # + ########################################################## - def drawLiteral(self, literal): - #adds a chunk of raw stuff to the PDF contents stream - self.code.append(literal) + def drawLiteral(self, literal): + # adds a chunk of raw stuff to the PDF contents stream + self.code.append(literal) def test(): - #... for testing... - canvas = PDFCanvas(name="test") + # ... for testing... + canvas = PDFCanvas(name="test") - canvas.defaultLineColor = Color(0.7, 0.7, 1.0) # light blue - canvas.drawLines(map(lambda i: (i * 10, 0, i * 10, 300), range(30))) - canvas.drawLines(map(lambda i: (0, i * 10, 300, i * 10), range(30))) - canvas.defaultLineColor = black + canvas.defaultLineColor = Color(0.7, 0.7, 1.0) # light blue + canvas.drawLines(map(lambda i: (i * 10, 0, i * 10, 300), range(30))) + canvas.drawLines(map(lambda i: (0, i * 10, 300, i * 10), range(30))) + canvas.defaultLineColor = black - canvas.drawLine(10, 200, 20, 190, color=red) + canvas.drawLine(10, 200, 20, 190, color=red) - canvas.drawEllipse(130, 30, 200, 100, fillColor=yellow, edgeWidth=4) + canvas.drawEllipse(130, 30, 200, 100, fillColor=yellow, edgeWidth=4) - canvas.drawArc(130, 30, 200, 100, 45, 50, fillColor=blue, edgeColor=navy, edgeWidth=4) + canvas.drawArc(130, 30, 200, 100, 45, 50, fillColor=blue, edgeColor=navy, edgeWidth=4) - canvas.defaultLineWidth = 4 - canvas.drawRoundRect(30, 30, 100, 100, fillColor=blue, edgeColor=maroon, dash=(3, 3)) - canvas.drawCurve(20, 20, 100, 50, 50, 100, 160, 160) + canvas.defaultLineWidth = 4 + canvas.drawRoundRect(30, 30, 100, 100, fillColor=blue, edgeColor=maroon, dash=(3, 3)) + canvas.drawCurve(20, 20, 100, 50, 50, 100, 160, 160) - canvas.drawString("This is a test!", 30, 130, Font(face="times", size=16, bold=1), color=green, - angle=-45) + canvas.drawString("This is a test!", 30, 130, Font(face="times", size=16, bold=1), color=green, + angle=-45) - canvas.drawString("This is a test!", 30, 130, color=red) + canvas.drawString("This is a test!", 30, 130, color=red) - polypoints = [(160, 120), (130, 190), (210, 145), (110, 145), (190, 190)] - canvas.drawPolygon(polypoints, fillColor=lime, edgeColor=red, edgeWidth=3, closed=1) + polypoints = [(160, 120), (130, 190), (210, 145), (110, 145), (190, 190)] + canvas.drawPolygon(polypoints, fillColor=lime, edgeColor=red, edgeWidth=3, closed=1) - canvas.drawRect(200, 200, 260, 260, edgeColor=yellow, edgeWidth=5) - canvas.drawLine(200, 260, 260, 260, color=green, width=5) - canvas.drawLine(260, 200, 260, 260, color=red, width=5) + canvas.drawRect(200, 200, 260, 260, edgeColor=yellow, edgeWidth=5) + canvas.drawLine(200, 260, 260, 260, color=green, width=5) + canvas.drawLine(260, 200, 260, 260, color=red, width=5) - canvas.save('test.pdf') + canvas.save('test.pdf') if __name__ == '__main__': - test() - #dashtest() - #test2() + test() + # dashtest() + # test2() diff --git a/rdkit/sping/PIL/pidPIL.py b/rdkit/sping/PIL/pidPIL.py index 753ed05f9..e40fb2994 100755 --- a/rdkit/sping/PIL/pidPIL.py +++ b/rdkit/sping/PIL/pidPIL.py @@ -1,16 +1,16 @@ # piddlePIL.py -- a Python Imaging Library backend for PIDDLE # Copyright (C) 1999 Joseph J. Strout -# +# # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. -# +# # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. -# +# # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA @@ -26,441 +26,443 @@ modified for use with sping. This requires Imaging to be installed as a package PIL """ -### 6/22/99: updated drawString to handle non-integer x and y +# 6/22/99: updated drawString to handle non-integer x and y from rdkit.sping.pid import * try: - import Image, ImageFont, ImageDraw + import Image + import ImageFont + import ImageDraw except ImportError: - from PIL import Image, ImageFont, ImageDraw + from PIL import Image, ImageFont, ImageDraw import math -from rdkit import six import os if __name__ == '__main__': - _fontprefix = os.path.join(os.curdir, 'pilfonts') + _fontprefix = os.path.join(os.curdir, 'pilfonts') else: - _fontprefix = os.path.join(os.path.split(__file__)[0], 'pilfonts') + _fontprefix = os.path.join(os.path.split(__file__)[0], 'pilfonts') # load font metrics try: - f = open(os.path.join(_fontprefix, 'metrics.dat'), 'rb') - from rdkit.six.moves import cPickle - _widthmaps = cPickle.load(f) - _ascents = cPickle.load(f) - _descents = cPickle.load(f) - f.close() + f = open(os.path.join(_fontprefix, 'metrics.dat'), 'rb') + import pickle + _widthmaps = pickle.load(f) + _ascents = pickle.load(f) + _descents = pickle.load(f) + f.close() except Exception: - print("Warning: unable to load font metrics from dir {0}".format(_fontprefix)) - _widthmaps = {} - _ascents = {} - _descents = {} -#finally: + print("Warning: unable to load font metrics from dir {0}".format(_fontprefix)) + _widthmaps = {} + _ascents = {} + _descents = {} +# finally: # pass # (just here so we can comment out the except clause for debugging) def _closestSize(size): - supported = [8, 10, 12, 14, 18, 24] # list of supported sizes - if size in supported: - return size - best = supported[0] - bestdist = abs(size - best) - for trial in supported[1:]: - dist = abs(size - trial) - if dist < bestdist: - best = trial - bestdist = dist - return best + supported = [8, 10, 12, 14, 18, 24] # list of supported sizes + if size in supported: + return size + best = supported[0] + bestdist = abs(size - best) + for trial in supported[1:]: + dist = abs(size - trial) + if dist < bestdist: + best = trial + bestdist = dist + return best def _pilFontPath(face, size, bold=0): - if face == 'monospaced': - face = 'courier' - elif face == 'serif': - face = 'times' - elif face == 'sansserif' or face == 'system': - face = 'helvetica' + if face == 'monospaced': + face = 'courier' + elif face == 'serif': + face = 'times' + elif face == 'sansserif' or face == 'system': + face = 'helvetica' - if bold and face != 'symbol': - fname = "%s-bold-%d.pil" % (face, size) - else: - fname = "%s-%d.pil" % (face, size) - path = os.path.join(_fontprefix, fname) - return path + if bold and face != 'symbol': + fname = "%s-bold-%d.pil" % (face, size) + else: + fname = "%s-%d.pil" % (face, size) + path = os.path.join(_fontprefix, fname) + return path def _matchingFontPath(font): - # returns a font path which matches info in our font metrics - if font.face: - face = font.face - else: - face = 'times' + # returns a font path which matches info in our font metrics + if font.face: + face = font.face + else: + face = 'times' - size = _closestSize(font.size) - if isinstance(face, six.string_types): - path = _pilFontPath(face, size, font.bold) - path = path.split(os.sep)[-1] - if path in _widthmaps.keys(): - return path - else: - for item in font.face: - path = _pilFontPath(item, size, font.bold) - path = path.split(os.sep)[-1] - if path in _widthmaps.keys(): - return path - # not found? Try it with courier, which should always be there - path = _pilFontPath('courier', size, font.bold) - return path.split(os.sep)[-1] + size = _closestSize(font.size) + if isinstance(face, str): + path = _pilFontPath(face, size, font.bold) + path = path.split(os.sep)[-1] + if path in _widthmaps.keys(): + return path + else: + for item in font.face: + path = _pilFontPath(item, size, font.bold) + path = path.split(os.sep)[-1] + if path in _widthmaps.keys(): + return path + # not found? Try it with courier, which should always be there + path = _pilFontPath('courier', size, font.bold) + return path.split(os.sep)[-1] def _pilFont(font): - if font.face: - face = font.face - else: - face = 'times' + if font.face: + face = font.face + else: + face = 'times' - size = _closestSize(font.size) - if isinstance(face, six.string_types): - try: - pilfont = ImageFont.load_path(_pilFontPath(face, size, font.bold)) - except Exception: - return 0 # font not found! - else: - for item in font.face: - pilfont = None - try: - pilfont = ImageFont.load_path(_pilFontPath(item, size, font.bold)) - break - except Exception: - pass - if pilfont == None: - return 0 # font not found! - return pilfont + size = _closestSize(font.size) + if isinstance(face, str): + try: + pilfont = ImageFont.load_path(_pilFontPath(face, size, font.bold)) + except Exception: + return 0 # font not found! + else: + for item in font.face: + pilfont = None + try: + pilfont = ImageFont.load_path(_pilFontPath(item, size, font.bold)) + break + except Exception: + pass + if pilfont == None: + return 0 # font not found! + return pilfont class PILCanvas(Canvas): - def __init__(self, size=(300, 300), name='piddlePIL'): - self._image = Image.new('RGB', (int(size[0]), int(size[1])), (255, 255, 255)) - self._pen = ImageDraw.ImageDraw(self._image) - self._setFont(Font()) - Canvas.__init__(self, size, name) + def __init__(self, size=(300, 300), name='piddlePIL'): + self._image = Image.new('RGB', (int(size[0]), int(size[1])), (255, 255, 255)) + self._pen = ImageDraw.ImageDraw(self._image) + self._setFont(Font()) + Canvas.__init__(self, size, name) - def __setattr__(self, attribute, value): - self.__dict__[attribute] = value - if attribute == "defaultLineColor": - self._setColor(self.defaultLineColor) + def __setattr__(self, attribute, value): + self.__dict__[attribute] = value + if attribute == "defaultLineColor": + self._setColor(self.defaultLineColor) - # utility functions - def _setColor(self, c): - "Set the pen color from a piddle color." - self._color = (int(c.red * 255), int(c.green * 255), int(c.blue * 255)) + # utility functions + def _setColor(self, c): + "Set the pen color from a piddle color." + self._color = (int(c.red * 255), int(c.green * 255), int(c.blue * 255)) - def _setFont(self, font): - self._font = _pilFont(font) + def _setFont(self, font): + self._font = _pilFont(font) - # public functions + # public functions - def getImage(self): - return self._image + def getImage(self): + return self._image - def save(self, file=None, format=None): - """format may be a string specifying a file extension corresponding to - an image file format. Ex: 'png', 'jpeg', 'gif', 'tif' etc. - These are defined by PIL, not by us so you need to check the docs. - In general, I just specify an extension and let format default to None""" - file = file or self.name - if hasattr(file, 'write'): - raise ValueError('fileobj not implemented for piddlePIL') - # below here, file is guaranteed to be a string - if format == None: - if '.' not in file: - filename = file + '.png' # default to producing jpg - else: - filename = file - # format = file[-3:] # os.path.splitext(..) - else: - filename = file + '.' + format + def save(self, file=None, format=None): + """format may be a string specifying a file extension corresponding to + an image file format. Ex: 'png', 'jpeg', 'gif', 'tif' etc. + These are defined by PIL, not by us so you need to check the docs. + In general, I just specify an extension and let format default to None""" + file = file or self.name + if hasattr(file, 'write'): + raise ValueError('fileobj not implemented for piddlePIL') + # below here, file is guaranteed to be a string + if format == None: + if '.' not in file: + filename = file + '.png' # default to producing jpg + else: + filename = file + # format = file[-3:] # os.path.splitext(..) + else: + filename = file + '.' + format - self._image.save(filename, format=format) + self._image.save(filename, format=format) - def clear(self): - # why is edgeColor yellow ??? - self.drawRect(0, 0, self.size[0], self.size[1], edgeColor=yellow, fillColor=white) - ### FIXME: need to reset canvas as well to defaults ??? + def clear(self): + # why is edgeColor yellow ??? + self.drawRect(0, 0, self.size[0], self.size[1], edgeColor=yellow, fillColor=white) + # FIXME: need to reset canvas as well to defaults ??? - #------------ string/font info ------------ + # ------------ string/font info ------------ - def stringWidth(self, s, font=None): - "Return the logical width of the string if it were drawn \ - in the current font (defaults to self.defaultFont)." + def stringWidth(self, s, font=None): + "Return the logical width of the string if it were drawn \ + in the current font (defaults to self.defaultFont)." - if not font: - font = self.defaultFont - if not _widthmaps: - return font.size * len(s) + if not font: + font = self.defaultFont + if not _widthmaps: + return font.size * len(s) - path = _matchingFontPath(font) - map = _widthmaps[path] - out = 0 - for c in s: - out += map.get(c, font.size) - return out + path = _matchingFontPath(font) + map = _widthmaps[path] + out = 0 + for c in s: + out += map.get(c, font.size) + return out - def fontAscent(self, font=None): - "Find the ascent (height above base) of the given font." + def fontAscent(self, font=None): + "Find the ascent (height above base) of the given font." - if not font: - font = self.defaultFont - if not _ascents: - return font.size + if not font: + font = self.defaultFont + if not _ascents: + return font.size - path = _matchingFontPath(font) - return _ascents[path] + path = _matchingFontPath(font) + return _ascents[path] - def fontDescent(self, font=None): - "Find the descent (extent below base) of the given font." + def fontDescent(self, font=None): + "Find the descent (extent below base) of the given font." - if not font: - font = self.defaultFont - if not _descents: - return font.size / 2 + if not font: + font = self.defaultFont + if not _descents: + return font.size / 2 - path = _matchingFontPath(font) - return _descents[path] + path = _matchingFontPath(font) + return _descents[path] - #------------- drawing methods -------------- - def drawLine(self, x1, y1, x2, y2, color=None, width=None, dash=None, **kwargs): - "Draw a straight line between x1,y1 and x2,y2." - # set color... - if width is None: - w = self.defaultLineWidth - elif width: - w = width - else: - return + # ------------- drawing methods -------------- + def drawLine(self, x1, y1, x2, y2, color=None, width=None, dash=None, **kwargs): + "Draw a straight line between x1,y1 and x2,y2." + # set color... + if width is None: + w = self.defaultLineWidth + elif width: + w = width + else: + return - if color: - if color == transparent: - return - self._setColor(color) - elif self.defaultLineColor == transparent: - return + if color: + if color == transparent: + return + self._setColor(color) + elif self.defaultLineColor == transparent: + return - if not dash: - self._pen.line((x1, y1, x2, y2), fill=self._color, width=w) - else: - dx = x2 - x1 - dy = y2 - y1 - lineLen = math.sqrt(dx * dx + dy * dy) - theta = math.atan2(dy, dx) - cosT = math.cos(theta) - sinT = math.sin(theta) + if not dash: + self._pen.line((x1, y1, x2, y2), fill=self._color, width=w) + else: + dx = x2 - x1 + dy = y2 - y1 + lineLen = math.sqrt(dx * dx + dy * dy) + theta = math.atan2(dy, dx) + cosT = math.cos(theta) + sinT = math.sin(theta) - pos = (x1, y1) - dist = 0 - currDash = 0 - dashOn = 1 - while dist < lineLen: - currL = dash[currDash % len(dash)] - if (dist + currL > lineLen): - currL = lineLen - dist - endP = (pos[0] + currL * cosT, pos[1] + currL * sinT) - if dashOn: - self.drawLine(pos[0], pos[1], endP[0], endP[1], color=color, width=width, dash=None, - **kwargs) - pos = endP - dist += currL - currDash += 1 - dashOn = not dashOn + pos = (x1, y1) + dist = 0 + currDash = 0 + dashOn = 1 + while dist < lineLen: + currL = dash[currDash % len(dash)] + if (dist + currL > lineLen): + currL = lineLen - dist + endP = (pos[0] + currL * cosT, pos[1] + currL * sinT) + if dashOn: + self.drawLine(pos[0], pos[1], endP[0], endP[1], color=color, width=width, dash=None, + **kwargs) + pos = endP + dist += currL + currDash += 1 + dashOn = not dashOn - def drawPolygon(self, pointlist, edgeColor=None, edgeWidth=None, fillColor=None, closed=0, - dash=None, **kwargs): - """drawPolygon(pointlist) -- draws a polygon - pointlist: a list of (x,y) tuples defining vertices - """ - # PIL's routine requires a sequence of tuples... - # the input is not so restricted, so fix it - pts = list(pointlist) - for i in range(len(pts)): - pts[i] = tuple(pts[i]) + def drawPolygon(self, pointlist, edgeColor=None, edgeWidth=None, fillColor=None, closed=0, + dash=None, **kwargs): + """drawPolygon(pointlist) -- draws a polygon + pointlist: a list of (x,y) tuples defining vertices + """ + # PIL's routine requires a sequence of tuples... + # the input is not so restricted, so fix it + pts = list(pointlist) + for i in range(len(pts)): + pts[i] = tuple(pts[i]) - # set color for fill... - filling = 0 - if fillColor: - if fillColor != transparent: - self._setColor(fillColor) - filling = 1 - elif self.defaultFillColor != transparent: - self._setColor(self.defaultFillColor) - filling = 1 + # set color for fill... + filling = 0 + if fillColor: + if fillColor != transparent: + self._setColor(fillColor) + filling = 1 + elif self.defaultFillColor != transparent: + self._setColor(self.defaultFillColor) + filling = 1 - # do the fill - if filling: - pts = [(int(x[0]), int(x[1])) for x in pts] - self._pen.polygon(pts, fill=self._color) + # do the fill + if filling: + pts = [(int(x[0]), int(x[1])) for x in pts] + self._pen.polygon(pts, fill=self._color) - # set edge width... - if edgeWidth is None: - edgeWidth = self.defaultLineWidth - elif not edgeWidth: - return + # set edge width... + if edgeWidth is None: + edgeWidth = self.defaultLineWidth + elif not edgeWidth: + return - # set color for edge... - if edgeColor: - self._setColor(edgeColor) - else: - self._setColor(self.defaultLineColor) + # set color for edge... + if edgeColor: + self._setColor(edgeColor) + else: + self._setColor(self.defaultLineColor) - # draw the outline - if (closed or (pts[0][0]==pts[-1][0] and pts[0][1]==pts[-1][1])) \ - and edgeWidth <= 1: - self._pen.polygon(pts, outline=self._color) - else: - # ...since PIL's polygon routine insists on closing, - # and does not support thick edges, we'll use our drawLine instead - # OFI: use default color/width to speed this up! - oldp = pts[0] - if closed: - pts.append(oldp) - for p in pts[1:]: - self.drawLine(oldp[0], oldp[1], p[0], p[1], edgeColor, edgeWidth, dash=dash, **kwargs) - oldp = p + # draw the outline + if (closed or (pts[0][0] == pts[-1][0] and pts[0][1] == pts[-1][1])) \ + and edgeWidth <= 1: + self._pen.polygon(pts, outline=self._color) + else: + # ...since PIL's polygon routine insists on closing, + # and does not support thick edges, we'll use our drawLine instead + # OFI: use default color/width to speed this up! + oldp = pts[0] + if closed: + pts.append(oldp) + for p in pts[1:]: + self.drawLine(oldp[0], oldp[1], p[0], p[1], edgeColor, + edgeWidth, dash=dash, **kwargs) + oldp = p - def drawString(self, s, x, y, font=None, color=None, angle=0, **kwargs): - "Draw a string starting at location x,y." - x = int(x) - y = int(y) - if '\n' in s or '\r' in s: - self.drawMultiLineString(s, x, y, font, color, angle, **kwargs) - return - if not font: - font = self.defaultFont + def drawString(self, s, x, y, font=None, color=None, angle=0, **kwargs): + "Draw a string starting at location x,y." + x = int(x) + y = int(y) + if '\n' in s or '\r' in s: + self.drawMultiLineString(s, x, y, font, color, angle, **kwargs) + return + if not font: + font = self.defaultFont - if not color: - color = self.defaultLineColor - if color == transparent: - return + if not color: + color = self.defaultLineColor + if color == transparent: + return - # draw into an offscreen Image - # tmpsize was originally 1.2* stringWidth, added code to give enough room for single character strings (piddle bug#121995) - sHeight = (self.fontAscent(font) + self.fontDescent(font)) - sWidth = self.stringWidth(s, font) - tempsize = max(sWidth * 1.2, sHeight * 2.0) - tempimg = Image.new('RGB', (int(tempsize), int(tempsize)), (0, 0, 0)) + # draw into an offscreen Image + # tmpsize was originally 1.2* stringWidth, added code to give enough room for single character strings (piddle bug#121995) + sHeight = (self.fontAscent(font) + self.fontDescent(font)) + sWidth = self.stringWidth(s, font) + tempsize = max(sWidth * 1.2, sHeight * 2.0) + tempimg = Image.new('RGB', (int(tempsize), int(tempsize)), (0, 0, 0)) - temppen = ImageDraw.ImageDraw(tempimg) + temppen = ImageDraw.ImageDraw(tempimg) - pilfont = _pilFont(font) - if not pilfont: - raise ValueError("bad font: %s" % font) - pos = [4, int(tempsize / 2 - self.fontAscent(font)) - self.fontDescent(font)] - temppen.text(pos, s, font=pilfont, fill=(255, 255, 255)) - pos[1] = int(tempsize / 2) + pilfont = _pilFont(font) + if not pilfont: + raise ValueError("bad font: %s" % font) + pos = [4, int(tempsize / 2 - self.fontAscent(font)) - self.fontDescent(font)] + temppen.text(pos, s, font=pilfont, fill=(255, 255, 255)) + pos[1] = int(tempsize / 2) - if font.underline: - ydown = (0.5 * self.fontDescent(font)) - # thickness = 0.08 * font.size # may need to ceil this - temppen.line((pos[0], pos[1] + ydown, pos[0] + sWidth, pos[1] + ydown)) + if font.underline: + ydown = (0.5 * self.fontDescent(font)) + # thickness = 0.08 * font.size # may need to ceil this + temppen.line((pos[0], pos[1] + ydown, pos[0] + sWidth, pos[1] + ydown)) - # rotate - if angle: - from math import pi, sin, cos - tempimg = tempimg.rotate(angle, Image.BILINEAR) - temppen = ImageDraw.ImageDraw(tempimg) - radians = -angle * pi / 180.0 - r = tempsize / 2 - pos[0] - pos[0] = int(tempsize / 2 - r * cos(radians)) - pos[1] = int(pos[1] - r * sin(radians)) + # rotate + if angle: + from math import pi, sin, cos + tempimg = tempimg.rotate(angle, Image.BILINEAR) + temppen = ImageDraw.ImageDraw(tempimg) + radians = -angle * pi / 180.0 + r = tempsize / 2 - pos[0] + pos[0] = int(tempsize / 2 - r * cos(radians)) + pos[1] = int(pos[1] - r * sin(radians)) - ###temppen.rectangle( (pos[0],pos[1],pos[0]+2,pos[1]+2) ) # PATCH for debugging - # colorize, and copy it in - mask = tempimg.convert('L').point(lambda c: c) - clr = (int(color.red * 255), int(color.green * 255), int(color.blue * 255)) - temppen.rectangle((0, 0, tempsize, tempsize), fill=clr) - self._image.paste(tempimg, (int(x) - pos[0], int(y) - pos[1]), mask) + # temppen.rectangle( (pos[0],pos[1],pos[0]+2,pos[1]+2) ) # PATCH for debugging + # colorize, and copy it in + mask = tempimg.convert('L').point(lambda c: c) + clr = (int(color.red * 255), int(color.green * 255), int(color.blue * 255)) + temppen.rectangle((0, 0, tempsize, tempsize), fill=clr) + self._image.paste(tempimg, (int(x) - pos[0], int(y) - pos[1]), mask) - def drawImage(self, image, x1, y1, x2=None, y2=None, **kwargs): - """Draw a PIL Image into the specified rectangle. If x2 and y2 are - omitted, they are calculated from the image size.""" + def drawImage(self, image, x1, y1, x2=None, y2=None, **kwargs): + """Draw a PIL Image into the specified rectangle. If x2 and y2 are + omitted, they are calculated from the image size.""" - if x2 and y2: - bbox = image.getbbox() - if x2 - x1 != bbox[2] - bbox[0] or y2 - y1 != bbox[3] - bbox[1]: - image = image.resize((x2 - x1, y2 - y1)) - self._image.paste(image, (x1, y1)) + if x2 and y2: + bbox = image.getbbox() + if x2 - x1 != bbox[2] - bbox[0] or y2 - y1 != bbox[3] - bbox[1]: + image = image.resize((x2 - x1, y2 - y1)) + self._image.paste(image, (x1, y1)) def test(): - #... for testing... - canvas = PILCanvas() + # ... for testing... + canvas = PILCanvas() - canvas.defaultLineColor = Color(0.7, 0.7, 1.0) # light blue - canvas.drawLines(map(lambda i: (i * 10, 0, i * 10, 300), range(30))) - canvas.drawLines(map(lambda i: (0, i * 10, 300, i * 10), range(30))) - canvas.defaultLineColor = black + canvas.defaultLineColor = Color(0.7, 0.7, 1.0) # light blue + canvas.drawLines(map(lambda i: (i * 10, 0, i * 10, 300), range(30))) + canvas.drawLines(map(lambda i: (0, i * 10, 300, i * 10), range(30))) + canvas.defaultLineColor = black - canvas.drawLine(10, 200, 20, 190, color=red) + canvas.drawLine(10, 200, 20, 190, color=red) - canvas.drawEllipse(130, 30, 200, 100, fillColor=yellow, edgeWidth=4) + canvas.drawEllipse(130, 30, 200, 100, fillColor=yellow, edgeWidth=4) - canvas.drawArc(130, 30, 200, 100, 45, 50, fillColor=blue, edgeColor=navy, edgeWidth=4) + canvas.drawArc(130, 30, 200, 100, 45, 50, fillColor=blue, edgeColor=navy, edgeWidth=4) - canvas.defaultLineWidth = 4 - canvas.drawRoundRect(30, 30, 100, 100, fillColor=blue, edgeColor=maroon) - canvas.drawCurve(20, 20, 100, 50, 50, 100, 160, 160) + canvas.defaultLineWidth = 4 + canvas.drawRoundRect(30, 30, 100, 100, fillColor=blue, edgeColor=maroon) + canvas.drawCurve(20, 20, 100, 50, 50, 100, 160, 160) - canvas.drawString("This is a test!", 30, 130, Font(face="times", size=16, bold=1), color=green, - angle=-45) + canvas.drawString("This is a test!", 30, 130, Font(face="times", size=16, bold=1), color=green, + angle=-45) - canvas.drawString("This is a test!", 30, 130, color=red, angle=45) + canvas.drawString("This is a test!", 30, 130, color=red, angle=45) - polypoints = [(160, 120), (130, 190), (210, 145), (110, 145), (190, 190)] - canvas.drawPolygon(polypoints, fillColor=lime, edgeColor=red, edgeWidth=3, closed=1) + polypoints = [(160, 120), (130, 190), (210, 145), (110, 145), (190, 190)] + canvas.drawPolygon(polypoints, fillColor=lime, edgeColor=red, edgeWidth=3, closed=1) - canvas.drawRect(200, 200, 260, 260, edgeColor=yellow, edgeWidth=5) - canvas.drawLine(200, 260, 260, 260, color=green, width=5) - canvas.drawLine(260, 200, 260, 260, color=red, width=5) + canvas.drawRect(200, 200, 260, 260, edgeColor=yellow, edgeWidth=5) + canvas.drawLine(200, 260, 260, 260, color=green, width=5) + canvas.drawLine(260, 200, 260, 260, color=red, width=5) - # now, for testing, save the image as a PNG file - canvas.flush() - canvas.getImage().save("test.png") + # now, for testing, save the image as a PNG file + canvas.flush() + canvas.getImage().save("test.png") - return canvas + return canvas def testit(canvas, s, x, y, font=None): - canvas.defaultLineColor = black - canvas.drawString(s, x, y, font=font) - canvas.defaultLineColor = blue - w = canvas.stringWidth(s, font=font) - canvas.drawLine(x, y, x + w, y) - canvas.drawLine(x, y - canvas.fontAscent(font=font), x + w, y - canvas.fontAscent(font=font)) - canvas.drawLine(x, y + canvas.fontDescent(font=font), x + w, y + canvas.fontDescent(font=font)) + canvas.defaultLineColor = black + canvas.drawString(s, x, y, font=font) + canvas.defaultLineColor = blue + w = canvas.stringWidth(s, font=font) + canvas.drawLine(x, y, x + w, y) + canvas.drawLine(x, y - canvas.fontAscent(font=font), x + w, y - canvas.fontAscent(font=font)) + canvas.drawLine(x, y + canvas.fontDescent(font=font), x + w, y + canvas.fontDescent(font=font)) def test2(): - canvas = PILCanvas() - testit(canvas, "Foogar", 20, 30) + canvas = PILCanvas() + testit(canvas, "Foogar", 20, 30) - testit(canvas, "Foogar", 20, 90, font=Font(size=24)) - global dammit - dammit = _pilFont(Font(size=24)) + testit(canvas, "Foogar", 20, 90, font=Font(size=24)) + global dammit + dammit = _pilFont(Font(size=24)) - testit(canvas, "Foogar", 20, 150, font=Font(face='courier', size=24)) + testit(canvas, "Foogar", 20, 150, font=Font(face='courier', size=24)) - testit(canvas, "Foogar", 20, 240, font=Font(face='courier')) + testit(canvas, "Foogar", 20, 240, font=Font(face='courier')) - import piddleQD - global qdcanvas - try: - qdcanvas.close() - except Exception: - pass - qdcanvas = piddleQD.QDCanvas() - qdcanvas.drawImage(canvas.getImage(), 0, 0) + import piddleQD + global qdcanvas + try: + qdcanvas.close() + except Exception: + pass + qdcanvas = piddleQD.QDCanvas() + qdcanvas.drawImage(canvas.getImage(), 0, 0) if __name__ == '__main__': - test() + test() diff --git a/rdkit/sping/PIL/pilfonts/removemedium.py b/rdkit/sping/PIL/pilfonts/removemedium.py index f68779db7..852f7c01f 100755 --- a/rdkit/sping/PIL/pilfonts/removemedium.py +++ b/rdkit/sping/PIL/pilfonts/removemedium.py @@ -1,4 +1,4 @@ -from __future__ import print_function + import os import string diff --git a/rdkit/sping/PS/pidPS.py b/rdkit/sping/PS/pidPS.py index 8a9161f5f..06b333206 100755 --- a/rdkit/sping/PS/pidPS.py +++ b/rdkit/sping/PS/pidPS.py @@ -36,12 +36,11 @@ piddlePS - a PostScript backend for the PIDDLE drawing module # DSC: plan uses flags for keeping track of BeginX/EndX pairs. # convention: use flag _inXFlag -from __future__ import print_function + from rdkit.sping.pid import * from io import StringIO from . import psmetrics # for font info import math -from rdkit.six import string_types class PostScriptLevelException(ValueError): @@ -343,7 +342,7 @@ translate def _findFont(self, font): requested = font.face or "Serif" # Serif is the default - if isinstance(requested, string_types): + if isinstance(requested, str): requested = [requested] # once again, fall back to default, redundant, no? diff --git a/rdkit/sping/Pyart/pidPyart.py b/rdkit/sping/Pyart/pidPyart.py index 51f776be6..e979b1969 100755 --- a/rdkit/sping/Pyart/pidPyart.py +++ b/rdkit/sping/Pyart/pidPyart.py @@ -5,7 +5,7 @@ # __ native drawEllipse # __ native drawArc # __ drawImage support (work on Pyart side of things) -from __future__ import print_function + import pyart from rdkit.sping.pid import * from rdkit.sping.PDF import pdfmetrics diff --git a/rdkit/sping/Qt/pidQt.py b/rdkit/sping/Qt/pidQt.py index c647324f4..c781bd82d 100755 --- a/rdkit/sping/Qt/pidQt.py +++ b/rdkit/sping/Qt/pidQt.py @@ -13,7 +13,7 @@ # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -from __future__ import print_function + """piddleQt This module implements the PIDDLE/Sping API for a Qt canvas diff --git a/rdkit/sping/SVG/pidSVG.py b/rdkit/sping/SVG/pidSVG.py index 91cfe7f37..72069c71d 100755 --- a/rdkit/sping/SVG/pidSVG.py +++ b/rdkit/sping/SVG/pidSVG.py @@ -46,7 +46,6 @@ Greg Landrum (greglandrum@earthlink.net) 3/10/2000 from rdkit.sping.pid import * from rdkit.sping.PDF import pdfmetrics # for font info -from rdkit import six from math import * #SVG_HEADER = """ @@ -158,7 +157,7 @@ class SVGCanvas(Canvas): fontStr = '' if font.face is None: font.__dict__['face'] = 'sansserif' # quick hack -cwl - if isinstance(font.face, six.string_types): + if isinstance(font.face, str): if len(font.face.split()) > 1: familyStr = '\'%s\'' % font.face else: @@ -256,7 +255,7 @@ class SVGCanvas(Canvas): if file == None: file = self.name - if isinstance(file, six.string_types): + if isinstance(file, str): isFileName = 1 else: isFileName = 0 diff --git a/rdkit/sping/WX/pidWxDc.py b/rdkit/sping/WX/pidWxDc.py index e93139893..9b2822619 100755 --- a/rdkit/sping/WX/pidWxDc.py +++ b/rdkit/sping/WX/pidWxDc.py @@ -16,7 +16,7 @@ Code factoring and pil image support by Jeffrey Kunce see also piddleWxDcDemo.py ''' -from __future__ import print_function + from wxPython.wx import * from rdkit.sping import pid as sping_pid diff --git a/rdkit/sping/colors.py b/rdkit/sping/colors.py index ec186006b..c2200378a 100755 --- a/rdkit/sping/colors.py +++ b/rdkit/sping/colors.py @@ -1,83 +1,83 @@ -from rdkit import six - - -#------------------------------------------------------------------------- +# ------------------------------------------------------------------------- # Color -#------------------------------------------------------------------------- +# ------------------------------------------------------------------------- + + class Color: - """This class is used to represent color. Components red, green, blue - are in the range 0 (dark) to 1 (full intensity).""" + """This class is used to represent color. Components red, green, blue + are in the range 0 (dark) to 1 (full intensity).""" - def __init__(self, red=0, green=0, blue=0): - "Initialize with red, green, blue in range [0-1]." - _float = float - d = self.__dict__ - d["red"] = _float(red) - d["green"] = _float(green) - d["blue"] = _float(blue) + def __init__(self, red=0, green=0, blue=0): + "Initialize with red, green, blue in range [0-1]." + _float = float + d = self.__dict__ + d["red"] = _float(red) + d["green"] = _float(green) + d["blue"] = _float(blue) - def __setattr__(self, name, value): - raise TypeError("piddle.Color has read-only attributes") + def __setattr__(self, name, value): + raise TypeError("piddle.Color has read-only attributes") - def __mul__(self, x): - return Color(self.red * x, self.green * x, self.blue * x) + def __mul__(self, x): + return Color(self.red * x, self.green * x, self.blue * x) - def __rmul__(self, x): - return Color(self.red * x, self.green * x, self.blue * x) + def __rmul__(self, x): + return Color(self.red * x, self.green * x, self.blue * x) - def __truediv__(self, x): - return Color(self.red / x, self.green / x, self.blue / x) + def __truediv__(self, x): + return Color(self.red / x, self.green / x, self.blue / x) - def __div__(self, x): - return Color(self.red / x, self.green / x, self.blue / x) + def __div__(self, x): + return Color(self.red / x, self.green / x, self.blue / x) - def __rdiv__(self, x): - return Color(self.red / x, self.green / x, self.blue / x) + def __rdiv__(self, x): + return Color(self.red / x, self.green / x, self.blue / x) - def __add__(self, x): - return Color(self.red + x.red, self.green + x.green, self.blue + x.blue) + def __add__(self, x): + return Color(self.red + x.red, self.green + x.green, self.blue + x.blue) - def __sub__(self, x): - return Color(self.red - x.red, self.green - x.green, self.blue - x.blue) + def __sub__(self, x): + return Color(self.red - x.red, self.green - x.green, self.blue - x.blue) - def __repr__(self): - return "Color(%1.2f,%1.2f,%1.2f)" % (self.red, self.green, self.blue) + def __repr__(self): + return "Color(%1.2f,%1.2f,%1.2f)" % (self.red, self.green, self.blue) - def __hash__(self): - return hash((self.red, self.green, self.blue)) + def __hash__(self): + return hash((self.red, self.green, self.blue)) - def __cmp__(self, other): - try: - dsum = 4 * self.red - 4 * other.red + 2 * self.green - 2 * other.green + self.blue - other.blue - except Exception: - return -1 - if dsum > 0: - return 1 - if dsum < 0: - return -1 - return 0 + def __cmp__(self, other): + try: + dsum = 4 * self.red - 4 * other.red + 2 * self.green - 2 * other.green + self.blue - other.blue + except Exception: + return -1 + if dsum > 0: + return 1 + if dsum < 0: + return -1 + return 0 - def toHexRGB(self): - "Convert the color back to an integer suitable for the " - "0xRRGGBB hex representation" - r = int(0xFF * self.red) - g = int(0xFF * self.green) - b = int(0xFF * self.blue) - # print "r= %d, g=%d, b = %d" % (r,b,g) - return (r << 16) + (g << 8) + b + def toHexRGB(self): + "Convert the color back to an integer suitable for the " + "0xRRGGBB hex representation" + r = int(0xFF * self.red) + g = int(0xFF * self.green) + b = int(0xFF * self.blue) + # print "r= %d, g=%d, b = %d" % (r,b,g) + return (r << 16) + (g << 8) + b - def toHexStr(self): - return "0x%.6x" % self.toHexRGB() + def toHexStr(self): + return "0x%.6x" % self.toHexRGB() def HexColor(val): - """This class converts a hex string, or an actual integer number, - into the corresponding color. E.g., in "AABBCC" or 0xAABBCC, - AA is the red, BB is the green, and CC is the blue (00-FF).""" - if isinstance(val, six.string_types): - val = int(val, 16) - factor = 1.0 / 255 - return Color(factor * ((val >> 16) & 0xFF), factor * ((val >> 8) & 0xFF), factor * (val & 0xFF)) + """This class converts a hex string, or an actual integer number, + into the corresponding color. E.g., in "AABBCC" or 0xAABBCC, + AA is the red, BB is the green, and CC is the blue (00-FF).""" + if isinstance(val, str): + val = int(val, 16) + factor = 1.0 / 255 + return Color(factor * ((val >> 16) & 0xFF), factor * ((val >> 8) & 0xFF), factor * (val & 0xFF)) + # color constants -- mostly from HTML standard aliceblue = HexColor(0xF0F8FF) diff --git a/rdkit/sping/examples/formatted-strings.py b/rdkit/sping/examples/formatted-strings.py index c06059d21..1f5bf5d50 100755 --- a/rdkit/sping/examples/formatted-strings.py +++ b/rdkit/sping/examples/formatted-strings.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -from __future__ import print_function + lines = [] lines.append("This is a test of the stringformat module.") diff --git a/rdkit/sping/examples/tkCanvas-with-scrollbars.py b/rdkit/sping/examples/tkCanvas-with-scrollbars.py index 6924a13f4..a68061b4c 100755 --- a/rdkit/sping/examples/tkCanvas-with-scrollbars.py +++ b/rdkit/sping/examples/tkCanvas-with-scrollbars.py @@ -1,4 +1,4 @@ -from __future__ import print_function + from Tkinter import * from sping.TK import TKCanvas from sping import colors diff --git a/rdkit/sping/examples/tkCanvasPIL-with-scrollbars.py b/rdkit/sping/examples/tkCanvasPIL-with-scrollbars.py index ef481e767..c8f563e73 100755 --- a/rdkit/sping/examples/tkCanvasPIL-with-scrollbars.py +++ b/rdkit/sping/examples/tkCanvasPIL-with-scrollbars.py @@ -1,4 +1,4 @@ -from __future__ import print_function + from Tkinter import * from sping.TK import TKCanvas, TKCanvasPIL from sping import colors diff --git a/rdkit/sping/pid.py b/rdkit/sping/pid.py index a7e207954..ee1b5e8dd 100755 --- a/rdkit/sping/pid.py +++ b/rdkit/sping/pid.py @@ -103,79 +103,80 @@ inch = 72 # 1 PIDDLE drawing unit == 1/72 imperial inch cm = inch / 2.54 # more sensible measurement unit -#------------------------------------------------------------------------- +# ------------------------------------------------------------------------- # StateSaver -#------------------------------------------------------------------------- +# ------------------------------------------------------------------------- class StateSaver: - """This is a little utility class for saving and restoring the - default drawing parameters of a canvas. To use it, add a line - like this before changing any of the parameters: + """This is a little utility class for saving and restoring the + default drawing parameters of a canvas. To use it, add a line + like this before changing any of the parameters: - saver = StateSaver(myCanvas) + saver = StateSaver(myCanvas) - then, when "saver" goes out of scope, it will automagically - restore the drawing parameters of myCanvas.""" + then, when "saver" goes out of scope, it will automagically + restore the drawing parameters of myCanvas.""" - def __init__(self, canvas): - self.canvas = canvas - self.defaultLineColor = canvas.defaultLineColor - self.defaultFillColor = canvas.defaultFillColor - self.defaultLineWidth = canvas.defaultLineWidth - self.defaultFont = canvas.defaultFont + def __init__(self, canvas): + self.canvas = canvas + self.defaultLineColor = canvas.defaultLineColor + self.defaultFillColor = canvas.defaultFillColor + self.defaultLineWidth = canvas.defaultLineWidth + self.defaultFont = canvas.defaultFont - def __del__(self): - self.canvas.defaultLineColor = self.defaultLineColor - self.canvas.defaultFillColor = self.defaultFillColor - self.canvas.defaultLineWidth = self.defaultLineWidth - self.canvas.defaultFont = self.defaultFont + def __del__(self): + self.canvas.defaultLineColor = self.defaultLineColor + self.canvas.defaultFillColor = self.defaultFillColor + self.canvas.defaultLineWidth = self.defaultLineWidth + self.canvas.defaultFont = self.defaultFont -#------------------------------------------------------------------------- +# ------------------------------------------------------------------------- # Font -#------------------------------------------------------------------------- +# ------------------------------------------------------------------------- class Font: - "This class represents font typeface, size, and style." + "This class represents font typeface, size, and style." - def __init__(self, size=12, bold=0, italic=0, underline=0, face=None): - # public mode variables - d = self.__dict__ - d["bold"] = bold - d["italic"] = italic - d["underline"] = underline + def __init__(self, size=12, bold=0, italic=0, underline=0, face=None): + # public mode variables + d = self.__dict__ + d["bold"] = bold + d["italic"] = italic + d["underline"] = underline - # public font size (points) - d["size"] = size + # public font size (points) + d["size"] = size - # typeface -- a name or set of names, interpreted by the Canvas, - # or "None" to indicate the Canvas-specific default typeface - d["face"] = face + # typeface -- a name or set of names, interpreted by the Canvas, + # or "None" to indicate the Canvas-specific default typeface + d["face"] = face - def __cmp__(self, other): - """Compare two fonts to see if they're the same.""" - if self.face == other.face and self.size == other.size and \ - self.bold == other.bold and self.italic == other.italic \ - and self.underline == other.underline: - return 0 - else: - return 1 + def __cmp__(self, other): + """Compare two fonts to see if they're the same.""" + if self.face == other.face and self.size == other.size and \ + self.bold == other.bold and self.italic == other.italic \ + and self.underline == other.underline: + return 0 + else: + return 1 - def __repr__(self): - return "Font(%d,%d,%d,%d,%s)" % (self.size, self.bold, self.italic, \ - self.underline, repr(self.face)) + def __repr__(self): + return "Font(%d,%d,%d,%d,%s)" % (self.size, self.bold, self.italic, + self.underline, repr(self.face)) - def __setattr__(self, name, value): - raise TypeError("piddle.Font has read-only attributes") + def __setattr__(self, name, value): + raise TypeError("piddle.Font has read-only attributes") -#------------------------------------------------------------------------- + +# ------------------------------------------------------------------------- # constants needed for Canvas.drawFigure -#------------------------------------------------------------------------- +# ------------------------------------------------------------------------- figureLine = 1 figureArc = 2 figureCurve = 3 -#------------------------------------------------------------------------- +# ------------------------------------------------------------------------- # key constants used for special keys in the onKey callback -#------------------------------------------------------------------------- +# ------------------------------------------------------------------------- keyBksp = '\010' # (erases char to left of cursor) keyDel = '\177' # (erases char to right of cursor) keyLeft = '\034' @@ -193,402 +194,402 @@ modShift = 1 # shift key was also held modControl = 2 # control key was also held -#------------------------------------------------------------------------- +# ------------------------------------------------------------------------- # Canvas -#------------------------------------------------------------------------- +# ------------------------------------------------------------------------- class Canvas: - """This is the base class for a drawing canvas. The 'plug-in renderers' - we speak of are really just classes derived from this one, which implement - the various drawing methods.""" + """This is the base class for a drawing canvas. The 'plug-in renderers' + we speak of are really just classes derived from this one, which implement + the various drawing methods.""" - def __init__(self, size=(300, 300), name="PIDDLE"): - """Initialize the canvas, and set default drawing parameters. - Derived classes should be sure to call this method.""" - # defaults used when drawing - self.defaultLineColor = black - self.defaultFillColor = transparent - self.defaultLineWidth = 1 - self.defaultFont = Font() + def __init__(self, size=(300, 300), name="PIDDLE"): + """Initialize the canvas, and set default drawing parameters. + Derived classes should be sure to call this method.""" + # defaults used when drawing + self.defaultLineColor = black + self.defaultFillColor = transparent + self.defaultLineWidth = 1 + self.defaultFont = Font() - # set up null event handlers + # set up null event handlers - # onClick: x,y is Canvas coordinates of mouseclick - def ignoreClick(canvas, x, y): - pass + # onClick: x,y is Canvas coordinates of mouseclick + def ignoreClick(canvas, x, y): + pass - self.onClick = ignoreClick + self.onClick = ignoreClick - # onOver: x,y is Canvas location of mouse - def ignoreOver(canvas, x, y): - pass + # onOver: x,y is Canvas location of mouse + def ignoreOver(canvas, x, y): + pass - self.onOver = ignoreOver + self.onOver = ignoreOver - # onKey: key is printable character or one of the constants above; - # modifiers is a tuple containing any of (modShift, modControl) - def ignoreKey(canvas, key, modifiers): - pass + # onKey: key is printable character or one of the constants above; + # modifiers is a tuple containing any of (modShift, modControl) + def ignoreKey(canvas, key, modifiers): + pass - self.onKey = ignoreKey + self.onKey = ignoreKey - # size and name, for user's reference - self.size, self.name = size, name + # size and name, for user's reference + self.size, self.name = size, name - def getSize(self): - # gL - return self.size + def getSize(self): + # gL + return self.size - #------------ canvas capabilities ------------- - def isInteractive(self): - "Returns 1 if onClick, onOver, and onKey events are possible, 0 otherwise." - return 0 + # ------------ canvas capabilities ------------- + def isInteractive(self): + "Returns 1 if onClick, onOver, and onKey events are possible, 0 otherwise." + return 0 - def canUpdate(self): - "Returns 1 if the drawing can be meaningfully updated over time \ - (e.g., screen graphics), 0 otherwise (e.g., drawing to a file)." + def canUpdate(self): + "Returns 1 if the drawing can be meaningfully updated over time \ + (e.g., screen graphics), 0 otherwise (e.g., drawing to a file)." - return 0 + return 0 - #------------ general management ------------- - def clear(self): - "Call this to clear and reset the graphics context." - pass + # ------------ general management ------------- + def clear(self): + "Call this to clear and reset the graphics context." + pass - def flush(self): - "Call this to indicate that any comamnds that have been issued \ - but which might be buffered should be flushed to the screen" + def flush(self): + "Call this to indicate that any comamnds that have been issued \ + but which might be buffered should be flushed to the screen" - pass + pass - def save(self, file=None, format=None): - """For backends that can be save to a file or sent to a - stream, create a valid file out of what's currently been - drawn on the canvas. Trigger any finalization here. - Though some backends may allow further drawing after this call, - presume that this is not possible for maximum portability + def save(self, file=None, format=None): + """For backends that can be save to a file or sent to a + stream, create a valid file out of what's currently been + drawn on the canvas. Trigger any finalization here. + Though some backends may allow further drawing after this call, + presume that this is not possible for maximum portability - file may be either a string or a file object with a write method - if left as the default, the canvas's current name will be used + file may be either a string or a file object with a write method + if left as the default, the canvas's current name will be used - format may be used to specify the type of file format to use as - well as any corresponding extension to use for the filename - This is an optional argument and backends may ignore it if - they only produce one file format.""" - pass + format may be used to specify the type of file format to use as + well as any corresponding extension to use for the filename + This is an optional argument and backends may ignore it if + they only produce one file format.""" + pass - def setInfoLine(self, s): - "For interactive Canvases, displays the given string in the \ - 'info line' somewhere where the user can probably see it." + def setInfoLine(self, s): + "For interactive Canvases, displays the given string in the \ + 'info line' somewhere where the user can probably see it." - pass + pass - #------------ string/font info ------------ + # ------------ string/font info ------------ - def stringBox(self, s, font=None): - return self.stringWidth(s, font), self.fontHeight(font) + def stringBox(self, s, font=None): + return self.stringWidth(s, font), self.fontHeight(font) - def stringWidth(self, s, font=None): - "Return the logical width of the string if it were drawn \ - in the current font (defaults to self.font)." + def stringWidth(self, s, font=None): + "Return the logical width of the string if it were drawn \ + in the current font (defaults to self.font)." - raise NotImplementedError('stringWidth') + raise NotImplementedError('stringWidth') - def fontHeight(self, font=None): - "Find the height of one line of text (baseline to baseline) of the given font." - # the following approxmation is correct for PostScript fonts, - # and should be close for most others: - if not font: - font = self.defaultFont - return 1.2 * font.size - - def fontAscent(self, font=None): - "Find the ascent (height above base) of the given font." - raise NotImplementedError('fontAscent') - - def fontDescent(self, font=None): - "Find the descent (extent below base) of the given font." - raise NotImplementedError('fontDescent') + def fontHeight(self, font=None): + "Find the height of one line of text (baseline to baseline) of the given font." + # the following approxmation is correct for PostScript fonts, + # and should be close for most others: + if not font: + font = self.defaultFont + return 1.2 * font.size + + def fontAscent(self, font=None): + "Find the ascent (height above base) of the given font." + raise NotImplementedError('fontAscent') + + def fontDescent(self, font=None): + "Find the descent (extent below base) of the given font." + raise NotImplementedError('fontDescent') - #------------- drawing helpers -------------- + # ------------- drawing helpers -------------- - def arcPoints(self, x1, y1, x2, y2, startAng=0, extent=360): - "Return a list of points approximating the given arc." - # Note: this implementation is simple and not particularly efficient. - xScale = abs((x2 - x1) / 2.0) - yScale = abs((y2 - y1) / 2.0) - - x = min(x1, x2) + xScale - y = min(y1, y2) + yScale - - # "Guesstimate" a proper number of points for the arc: - steps = min(max(xScale, yScale) * (extent / 10.0) / 10, 200) - if steps < 5: - steps = 5 - - from math import sin, cos, pi - - pointlist = [] - step = float(extent) / steps - angle = startAng - for i in range(int(steps + 1)): - point = (x + xScale * cos((angle / 180.0) * pi), y - yScale * sin((angle / 180.0) * pi)) - pointlist.append(point) - angle = angle + step - - return pointlist - - def curvePoints(self, x1, y1, x2, y2, x3, y3, x4, y4): - "Return a list of points approximating the given Bezier curve." - - # Adapted from BEZGEN3.HTML, one of the many - # Bezier utilities found on Don Lancaster's Guru's Lair at - # - bezierSteps = min( - max(max(x1, x2, x3, x4) - min(x1, x2, x3, x3), max(y1, y2, y3, y4) - min(y1, y2, y3, y4)), - 200) - - dt1 = 1. / bezierSteps - dt2 = dt1 * dt1 - dt3 = dt2 * dt1 - - xx = x1 - yy = y1 - ux = uy = vx = vy = 0 - - ax = x4 - 3 * x3 + 3 * x2 - x1 - ay = y4 - 3 * y3 + 3 * y2 - y1 - bx = 3 * x3 - 6 * x2 + 3 * x1 - by = 3 * y3 - 6 * y2 + 3 * y1 - cx = 3 * x2 - 3 * x1 - cy = 3 * y2 - 3 * y1 - - mx1 = ax * dt3 - my1 = ay * dt3 - - lx1 = bx * dt2 - ly1 = by * dt2 - - kx = mx1 + lx1 + cx * dt1 - ky = my1 + ly1 + cy * dt1 - - mx = 6 * mx1 - my = 6 * my1 - - lx = mx + 2 * lx1 - ly = my + 2 * ly1 - - pointList = [(xx, yy)] - - for i in range(bezierSteps): - xx = xx + ux + kx - yy = yy + uy + ky - ux = ux + vx + lx - uy = uy + vy + ly - vx = vx + mx - vy = vy + my - pointList.append((xx, yy)) - - return pointList - - def drawMultiLineString(self, s, x, y, font=None, color=None, angle=0, **kwargs): - "Breaks string into lines (on \n, \r, \n\r, or \r\n), and calls drawString on each." - import math - h = self.fontHeight(font) - dy = h * math.cos(angle * math.pi / 180.0) - dx = h * math.sin(angle * math.pi / 180.0) - s = s.replace('\r\n', '\n') - s = s.replace('\n\r', '\n') - s = s.replace('\r', '\n') - lines = s.split('\n') - for line in lines: - self.drawString(line, x, y, font, color, angle) - x = x + dx - y = y + dy - - #------------- drawing methods -------------- - - # Note default parameters "=None" means use the defaults - # set in the Canvas method: defaultLineColor, etc. - - def drawLine(self, x1, y1, x2, y2, color=None, width=None, dash=None, **kwargs): - "Draw a straight line between x1,y1 and x2,y2." - raise NotImplementedError('drawLine') - - def drawLines(self, lineList, color=None, width=None, dash=None, **kwargs): - "Draw a set of lines of uniform color and width. \ - lineList: a list of (x1,y1,x2,y2) line coordinates." - - # default implementation: - for x1, y1, x2, y2 in lineList: - self.drawLine(x1, y1, x2, y2, color, width, dash=dash, **kwargs) - - # For text, color defaults to self.lineColor. - - def drawString(self, s, x, y, font=None, color=None, angle=0, **kwargs): - "Draw a string starting at location x,y." - # NOTE: the baseline goes on y; drawing covers (y-ascent,y+descent) - raise NotImplementedError('drawString') - - # For fillable shapes, edgeColor defaults to self.defaultLineColor, - # edgeWidth defaults to self.defaultLineWidth, and - # fillColor defaults to self.defaultFillColor. - # Specify "don't fill" by passing fillColor=transparent. - - def drawCurve(self, x1, y1, x2, y2, x3, y3, x4, y4, edgeColor=None, edgeWidth=None, - fillColor=None, closed=0, dash=None, **kwargs): - "Draw a Bezier curve with control points x1,y1 to x4,y4." - pointlist = self.curvePoints(x1, y1, x2, y2, x3, y3, x4, y4) - self.drawPolygon(pointlist, edgeColor=edgeColor, edgeWidth=edgeWidth, fillColor=fillColor, - closed=closed, dash=dash, **kwargs) - - def drawRect(self, x1, y1, x2, y2, edgeColor=None, edgeWidth=None, fillColor=None, dash=None, - **kwargs): - "Draw the rectangle between x1,y1, and x2,y2. \ - These should have x10, and ry>0." - - x1, x2 = min(x1, x2), max(x1, x2) - y1, y2 = min(y1, y2), max(y1, y2) - - dx = rx * 2 - dy = ry * 2 - - partList = [ - (figureArc, x1, y1, x1 + dx, y1 + dy, 180, -90), (figureLine, x1 + rx, y1, x2 - rx, y1), - (figureArc, x2 - dx, y1, x2, y1 + dy, 90, -90), (figureLine, x2, y1 + ry, x2, y2 - ry), - (figureArc, x2 - dx, y2, x2, y2 - dy, 0, -90), (figureLine, x2 - rx, y2, x1 + rx, y2), - (figureArc, x1, y2, x1 + dx, y2 - dy, -90, -90), (figureLine, x1, y2 - ry, x1, y1 + rx) - ] - - self.drawFigure(partList, edgeColor, edgeWidth, fillColor, closed=1, dash=dash, **kwargs) - - def drawEllipse(self, x1, y1, x2, y2, edgeColor=None, edgeWidth=None, fillColor=None, dash=None, - **kwargs): - "Draw an orthogonal ellipse inscribed within the rectangle x1,y1,x2,y2. \ - These should have x1 + bezierSteps = min( + max(max(x1, x2, x3, x4) - min(x1, x2, x3, x3), max(y1, y2, y3, y4) - min(y1, y2, y3, y4)), + 200) + + dt1 = 1. / bezierSteps + dt2 = dt1 * dt1 + dt3 = dt2 * dt1 + + xx = x1 + yy = y1 + ux = uy = vx = vy = 0 + + ax = x4 - 3 * x3 + 3 * x2 - x1 + ay = y4 - 3 * y3 + 3 * y2 - y1 + bx = 3 * x3 - 6 * x2 + 3 * x1 + by = 3 * y3 - 6 * y2 + 3 * y1 + cx = 3 * x2 - 3 * x1 + cy = 3 * y2 - 3 * y1 + + mx1 = ax * dt3 + my1 = ay * dt3 + + lx1 = bx * dt2 + ly1 = by * dt2 + + kx = mx1 + lx1 + cx * dt1 + ky = my1 + ly1 + cy * dt1 + + mx = 6 * mx1 + my = 6 * my1 + + lx = mx + 2 * lx1 + ly = my + 2 * ly1 + + pointList = [(xx, yy)] + + for i in range(bezierSteps): + xx = xx + ux + kx + yy = yy + uy + ky + ux = ux + vx + lx + uy = uy + vy + ly + vx = vx + mx + vy = vy + my + pointList.append((xx, yy)) + + return pointList + + def drawMultiLineString(self, s, x, y, font=None, color=None, angle=0, **kwargs): + "Breaks string into lines (on \n, \r, \n\r, or \r\n), and calls drawString on each." + import math + h = self.fontHeight(font) + dy = h * math.cos(angle * math.pi / 180.0) + dx = h * math.sin(angle * math.pi / 180.0) + s = s.replace('\r\n', '\n') + s = s.replace('\n\r', '\n') + s = s.replace('\r', '\n') + lines = s.split('\n') + for line in lines: + self.drawString(line, x, y, font, color, angle) + x = x + dx + y = y + dy + + # ------------- drawing methods -------------- + + # Note default parameters "=None" means use the defaults + # set in the Canvas method: defaultLineColor, etc. + + def drawLine(self, x1, y1, x2, y2, color=None, width=None, dash=None, **kwargs): + "Draw a straight line between x1,y1 and x2,y2." + raise NotImplementedError('drawLine') + + def drawLines(self, lineList, color=None, width=None, dash=None, **kwargs): + "Draw a set of lines of uniform color and width. \ + lineList: a list of (x1,y1,x2,y2) line coordinates." + + # default implementation: + for x1, y1, x2, y2 in lineList: + self.drawLine(x1, y1, x2, y2, color, width, dash=dash, **kwargs) + + # For text, color defaults to self.lineColor. + + def drawString(self, s, x, y, font=None, color=None, angle=0, **kwargs): + "Draw a string starting at location x,y." + # NOTE: the baseline goes on y; drawing covers (y-ascent,y+descent) + raise NotImplementedError('drawString') + + # For fillable shapes, edgeColor defaults to self.defaultLineColor, + # edgeWidth defaults to self.defaultLineWidth, and + # fillColor defaults to self.defaultFillColor. + # Specify "don't fill" by passing fillColor=transparent. + + def drawCurve(self, x1, y1, x2, y2, x3, y3, x4, y4, edgeColor=None, edgeWidth=None, + fillColor=None, closed=0, dash=None, **kwargs): + "Draw a Bezier curve with control points x1,y1 to x4,y4." + pointlist = self.curvePoints(x1, y1, x2, y2, x3, y3, x4, y4) + self.drawPolygon(pointlist, edgeColor=edgeColor, edgeWidth=edgeWidth, fillColor=fillColor, + closed=closed, dash=dash, **kwargs) + + def drawRect(self, x1, y1, x2, y2, edgeColor=None, edgeWidth=None, fillColor=None, dash=None, + **kwargs): + "Draw the rectangle between x1,y1, and x2,y2. \ + These should have x10, and ry>0." + + x1, x2 = min(x1, x2), max(x1, x2) + y1, y2 = min(y1, y2), max(y1, y2) + + dx = rx * 2 + dy = ry * 2 + + partList = [ + (figureArc, x1, y1, x1 + dx, y1 + dy, 180, -90), (figureLine, x1 + rx, y1, x2 - rx, y1), + (figureArc, x2 - dx, y1, x2, y1 + dy, 90, -90), (figureLine, x2, y1 + ry, x2, y2 - ry), + (figureArc, x2 - dx, y2, x2, y2 - dy, 0, -90), (figureLine, x2 - rx, y2, x1 + rx, y2), + (figureArc, x1, y2, x1 + dx, y2 - dy, -90, -90), (figureLine, x1, y2 - ry, x1, y1 + rx) + ] + + self.drawFigure(partList, edgeColor, edgeWidth, fillColor, closed=1, dash=dash, **kwargs) + + def drawEllipse(self, x1, y1, x2, y2, edgeColor=None, edgeWidth=None, fillColor=None, dash=None, + **kwargs): + "Draw an orthogonal ellipse inscribed within the rectangle x1,y1,x2,y2. \ + These should have x1 sets a flag upon entry and # clears the flag upon exit. Each call to handle_data creates a diff --git a/rdkit/sping/tests/pidtest.py b/rdkit/sping/tests/pidtest.py index 800b435b0..d4489e256 100755 --- a/rdkit/sping/tests/pidtest.py +++ b/rdkit/sping/tests/pidtest.py @@ -2,11 +2,10 @@ This module puts the various PIDDLE backends through their paces. """ -from __future__ import print_function + from rdkit.sping import pagesizes from rdkit.sping.pid import * -from rdkit.six.moves import input # The original code imported letters, a more generic lisit. This is no longer supported. from string import ascii_letters as LETTERS import math @@ -15,441 +14,447 @@ backends = ['PDF', 'PIL', 'TK', 'PS', 'SVG', 'WX'] # 'piddleAI','piddleQD','pid backends.sort() -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # note, these tests do not flush() the canvas -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- def minimal(canvasClass): - """Just a very basic test of line drawing and canvas size.""" - canvas = canvasClass(pagesizes.A6, "test-minimal") # A6 is a quarter page - drawMinimal(canvas) - return canvas + """Just a very basic test of line drawing and canvas size.""" + canvas = canvasClass(pagesizes.A6, "test-minimal") # A6 is a quarter page + drawMinimal(canvas) + return canvas def drawMinimal(canvas): - saver = StateSaver(canvas) # leave canvas state as you found it, restores state when leaves scope - size = canvas.size # (actual size *may* differ from requested size) - canvas.defaultLineColor = green - canvas.drawLine(1, 1, size[0] - 1, size[1] - 1) - canvas.drawLine(1, size[1] - 1, size[0] - 1, 1) - canvas.drawRect(1, 1, size[0] - 1, size[1] - 1, edgeWidth=5) + # leave canvas state as you found it, restores state when leaves scope + saver = StateSaver(canvas) + size = canvas.size # (actual size *may* differ from requested size) + canvas.defaultLineColor = green + canvas.drawLine(1, 1, size[0] - 1, size[1] - 1) + canvas.drawLine(1, size[1] - 1, size[0] - 1, 1) + canvas.drawRect(1, 1, size[0] - 1, size[1] - 1, edgeWidth=5) - return canvas + return canvas -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- def basics(canvasClass): - """A general test of most of the drawing primitives except images and strings.""" - canvas = canvasClass((300, 300), "test-basics") - return drawBasics(canvas) + """A general test of most of the drawing primitives except images and strings.""" + canvas = canvasClass((300, 300), "test-basics") + return drawBasics(canvas) def drawBasics(canvas): - saver = StateSaver(canvas) # leave canvas state as you found it, restores state when leaves scope - canvas.defaultLineColor = Color(0.7, 0.7, 1.0) # light blue - canvas.drawLines(map(lambda i: (i * 10, 0, i * 10, 300), range(30))) - canvas.drawLines(map(lambda i: (0, i * 10, 300, i * 10), range(30))) - canvas.defaultLineColor = black + # leave canvas state as you found it, restores state when leaves scope + saver = StateSaver(canvas) + canvas.defaultLineColor = Color(0.7, 0.7, 1.0) # light blue + canvas.drawLines(map(lambda i: (i * 10, 0, i * 10, 300), range(30))) + canvas.drawLines(map(lambda i: (0, i * 10, 300, i * 10), range(30))) + canvas.defaultLineColor = black - canvas.drawLine(10, 200, 20, 190, color=red) - canvas.drawEllipse(130, 30, 200, 100, fillColor=yellow, edgeWidth=4) + canvas.drawLine(10, 200, 20, 190, color=red) + canvas.drawEllipse(130, 30, 200, 100, fillColor=yellow, edgeWidth=4) - canvas.drawArc(130, 30, 200, 100, 45, 50, fillColor=blue, edgeColor=navy, edgeWidth=4) + canvas.drawArc(130, 30, 200, 100, 45, 50, fillColor=blue, edgeColor=navy, edgeWidth=4) - canvas.defaultLineWidth = 4 - canvas.drawRoundRect(30, 30, 100, 100, fillColor=blue, edgeColor=maroon) - canvas.drawCurve(20, 20, 100, 50, 50, 100, 160, 160) + canvas.defaultLineWidth = 4 + canvas.drawRoundRect(30, 30, 100, 100, fillColor=blue, edgeColor=maroon) + canvas.drawCurve(20, 20, 100, 50, 50, 100, 160, 160) - #canvas.drawString("This is a test!", 30,130, Font(face="times",size=16,bold=1), - # color=green, angle=-45) + # canvas.drawString("This is a test!", 30,130, Font(face="times",size=16,bold=1), + # color=green, angle=-45) - polypoints = [(160, 120), (130, 190), (210, 145), (110, 145), (190, 190)] - canvas.drawPolygon(polypoints, fillColor=lime, edgeColor=red, edgeWidth=3, closed=1) + polypoints = [(160, 120), (130, 190), (210, 145), (110, 145), (190, 190)] + canvas.drawPolygon(polypoints, fillColor=lime, edgeColor=red, edgeWidth=3, closed=1) - canvas.drawRect(200, 200, 260, 260, edgeColor=yellow, edgeWidth=5) - canvas.drawLine(200, 260, 260, 260, color=green, width=5) - canvas.drawLine(260, 200, 260, 260, color=red, width=5) + canvas.drawRect(200, 200, 260, 260, edgeColor=yellow, edgeWidth=5) + canvas.drawLine(200, 260, 260, 260, color=green, width=5) + canvas.drawLine(260, 200, 260, 260, color=red, width=5) - return canvas + return canvas -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- def advanced(canvasClass): - """A test of figures and images.""" - canvas = canvasClass((300, 300), "test-advanced") - return drawAdvanced(canvas) + """A test of figures and images.""" + canvas = canvasClass((300, 300), "test-advanced") + return drawAdvanced(canvas) def drawAdvanced(canvas): - saver = StateSaver(canvas) # leave canvas state as you found it, restores state when leaves scope - figure = [ - (figureCurve, 20, 20, 100, 50, 50, 100, 160, 160), (figureLine, 200, 200, 250, 150), - (figureArc, 50, 10, 250, 150, 10, 90) - ] + # leave canvas state as you found it, restores state when leaves scope + saver = StateSaver(canvas) + figure = [ + (figureCurve, 20, 20, 100, 50, 50, 100, 160, 160), (figureLine, 200, 200, 250, 150), + (figureArc, 50, 10, 250, 150, 10, 90) + ] - canvas.drawFigure(figure, fillColor=yellow, edgeWidth=4) + canvas.drawFigure(figure, fillColor=yellow, edgeWidth=4) - try: - from PIL import Image - except ImportError: - canvas.drawString("PIL not available!", 20, 200) - Image = None + try: + from PIL import Image + except ImportError: + canvas.drawString("PIL not available!", 20, 200) + Image = None - if Image: - img = Image.open("python.gif") - canvas.drawImage(img, 120, 50, 120 + 32, 50 + 64) - canvas.drawImage(img, 0, 210, 300, 210 + 32) + if Image: + img = Image.open("python.gif") + canvas.drawImage(img, 120, 50, 120 + 32, 50 + 64) + canvas.drawImage(img, 0, 210, 300, 210 + 32) - return canvas + return canvas -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- def bluefunc(x): - return 1.0 / (1.0 + math.exp(-10 * (x - 0.6))) + return 1.0 / (1.0 + math.exp(-10 * (x - 0.6))) def redfunc(x): - return 1.0 / (1.0 + math.exp(10 * (x - 0.5))) + return 1.0 / (1.0 + math.exp(10 * (x - 0.5))) def greenfunc(x): - return 1 - pow(redfunc(x + 0.2), 2) - bluefunc(x - 0.3) + return 1 - pow(redfunc(x + 0.2), 2) - bluefunc(x - 0.3) def spectrum(canvasClass): - canvas = canvasClass((300, 300), "test-spectrum") - return drawSpectrum(canvas) + canvas = canvasClass((300, 300), "test-spectrum") + return drawSpectrum(canvas) def drawSpectrum(canvas): - """Generates a spectrum plot; illustrates colors and useful application.""" - saver = StateSaver(canvas) # leave canvas state as you found it, restores state when leaves scope + """Generates a spectrum plot; illustrates colors and useful application.""" + saver = StateSaver( + canvas) # leave canvas state as you found it, restores state when leaves scope - def plot(f, canvas, offset=0): - for i in range(0, 100): - x = float(i) / 100 - canvas.drawLine(i * 3 + offset, 250, i * 3 + offset, 250 - 100 * f(x)) + def plot(f, canvas, offset=0): + for i in range(0, 100): + x = float(i) / 100 + canvas.drawLine(i * 3 + offset, 250, i * 3 + offset, 250 - 100 * f(x)) - def genColors(n=100): - out = [None] * n - for i in range(n): - x = float(i) / n - out[i] = Color(redfunc(x), greenfunc(x), bluefunc(x)) - return out + def genColors(n=100): + out = [None] * n + for i in range(n): + x = float(i) / n + out[i] = Color(redfunc(x), greenfunc(x), bluefunc(x)) + return out - colors = genColors(300) + colors = genColors(300) - # draw a black background for the spectrum - canvas.drawRect(0, 0, 300, 100, edgeColor=black, fillColor=black) + # draw a black background for the spectrum + canvas.drawRect(0, 0, 300, 100, edgeColor=black, fillColor=black) - # draw the spectrum - for i in range(len(colors)): - canvas.drawLine(i, 20, i, 80, colors[i]) + # draw the spectrum + for i in range(len(colors)): + canvas.drawLine(i, 20, i, 80, colors[i]) - # plot the components of the spectrum - canvas.defaultLineColor = red - plot(redfunc, canvas) + # plot the components of the spectrum + canvas.defaultLineColor = red + plot(redfunc, canvas) - canvas.defaultLineColor = blue - plot(bluefunc, canvas, 1) + canvas.defaultLineColor = blue + plot(bluefunc, canvas, 1) - canvas.defaultLineColor = green - plot(greenfunc, canvas, 2) + canvas.defaultLineColor = green + plot(greenfunc, canvas, 2) - return canvas + return canvas -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- def strings(canvasClass): - canvas = canvasClass(size=(400, 400), name="test-strings") - return drawStrings(canvas) + canvas = canvasClass(size=(400, 400), name="test-strings") + return drawStrings(canvas) def drawStrings(canvas): - """Checks font metrics, and also illustrates the standard fonts.""" + """Checks font metrics, and also illustrates the standard fonts.""" - saver = StateSaver(canvas) # leave canvas state as you found it, restores state when leaves scope + # leave canvas state as you found it, restores state when leaves scope + saver = StateSaver(canvas) - def Write(canvas, s, font, curs): - if font: - canvas.defaultFont = font - text = s - while text and text[-1] == '\n': - text = text[:-1] - canvas.drawString(text, x=curs[0], y=curs[1]) - if s[-1] == '\n': - curs[0] = 10 - curs[1] = curs[1] + canvas.fontHeight() + canvas.fontDescent() - else: - curs[0] = curs[0] + canvas.stringWidth(s) + def Write(canvas, s, font, curs): + if font: + canvas.defaultFont = font + text = s + while text and text[-1] == '\n': + text = text[:-1] + canvas.drawString(text, x=curs[0], y=curs[1]) + if s[-1] == '\n': + curs[0] = 10 + curs[1] = curs[1] + canvas.fontHeight() + canvas.fontDescent() + else: + curs[0] = curs[0] + canvas.stringWidth(s) - def StandardFonts(canvas, Write): - canvas.defaultLineColor = black - curs = [10, 70] - for size in (12, 18): - for fontname in ("times", "courier", "helvetica", "symbol", "monospaced", "serif", - "sansserif"): - curs[0] = 10 - curs[1] = curs[1] + size * 1.5 - Write(canvas, "%s %d " % (fontname, size), Font(face=fontname, size=size), curs) - Write(canvas, "bold ", Font(face=fontname, size=size, bold=1), curs) - Write(canvas, "italic ", Font(face=fontname, size=size, italic=1), curs) - Write(canvas, "underline", Font(face=fontname, size=size, underline=1), curs) + def StandardFonts(canvas, Write): + canvas.defaultLineColor = black + curs = [10, 70] + for size in (12, 18): + for fontname in ("times", "courier", "helvetica", "symbol", "monospaced", "serif", + "sansserif"): + curs[0] = 10 + curs[1] = curs[1] + size * 1.5 + Write(canvas, "%s %d " % (fontname, size), Font(face=fontname, size=size), curs) + Write(canvas, "bold ", Font(face=fontname, size=size, bold=1), curs) + Write(canvas, "italic ", Font(face=fontname, size=size, italic=1), curs) + Write(canvas, "underline", Font(face=fontname, size=size, underline=1), curs) - CenterAndBox(canvas, "spam, spam, spam, baked beans, and spam!") - StandardFonts(canvas, Write) - return canvas + CenterAndBox(canvas, "spam, spam, spam, baked beans, and spam!") + StandardFonts(canvas, Write) + return canvas def CenterAndBox(canvas, s, cx=200, y=40): - "tests string positioning, stringWidth, fontAscent, and fontDescent" - canvas.drawLine(cx, y - 30, cx, y + 30, color=yellow) - w = canvas - w = canvas.stringWidth(s) + "tests string positioning, stringWidth, fontAscent, and fontDescent" + canvas.drawLine(cx, y - 30, cx, y + 30, color=yellow) + w = canvas + w = canvas.stringWidth(s) - canvas.drawLine(cx - w / 2, y, cx + w / 2, y, color=red) - canvas.drawString(s, cx - w / 2, y) - canvas.defaultLineColor = Color(0.7, 0.7, 1.0) # light blue - canvas.drawLine(cx - w / 2, y - 20, cx - w / 2, y + 20) # left - canvas.drawLine(cx + w / 2, y - 20, cx + w / 2, y + 20) # right - asc, desc = canvas.fontAscent(), canvas.fontDescent() - canvas.drawLine(cx - w / 2 - 20, y - asc, cx + w / 2 + 20, y - asc) # top - canvas.drawLine(cx - w / 2 - 20, y + desc, cx + w / 2 + 20, y + desc) # bottom + canvas.drawLine(cx - w / 2, y, cx + w / 2, y, color=red) + canvas.drawString(s, cx - w / 2, y) + canvas.defaultLineColor = Color(0.7, 0.7, 1.0) # light blue + canvas.drawLine(cx - w / 2, y - 20, cx - w / 2, y + 20) # left + canvas.drawLine(cx + w / 2, y - 20, cx + w / 2, y + 20) # right + asc, desc = canvas.fontAscent(), canvas.fontDescent() + canvas.drawLine(cx - w / 2 - 20, y - asc, cx + w / 2 + 20, y - asc) # top + canvas.drawLine(cx - w / 2 - 20, y + desc, cx + w / 2 + 20, y + desc) # bottom -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- def rotstring(canvasClass): - canvas = canvasClass((450, 300), name='test-rotstring') - return drawRotstring(canvas) + canvas = canvasClass((450, 300), name='test-rotstring') + return drawRotstring(canvas) def drawRotstring(canvas): - """Draws rotated strings.""" - saver = StateSaver(canvas) # leave canvas state as you found it, restores state when leaves scope - canvas.defaultFont = Font(bold=1) + """Draws rotated strings.""" + saver = StateSaver( + canvas) # leave canvas state as you found it, restores state when leaves scope + canvas.defaultFont = Font(bold=1) - canvas.defaultLineColor = (blue + white) / 2 - canvas.drawLine(0, 150, 300, 150) - canvas.drawLine(150, 0, 150, 300) + canvas.defaultLineColor = (blue + white) / 2 + canvas.drawLine(0, 150, 300, 150) + canvas.drawLine(150, 0, 150, 300) - s = " __albatros at " - w = canvas.stringWidth(s) - canvas.drawEllipse(150 - w, 150 - w, 150 + w, 150 + w, fillColor=transparent) + s = " __albatros at " + w = canvas.stringWidth(s) + canvas.drawEllipse(150 - w, 150 - w, 150 + w, 150 + w, fillColor=transparent) - colors = [red, orange, yellow, green, blue, purple] - cnum = 0 - for ang in range(0, 359, 30): - canvas.defaultLineColor = colors[cnum] - s2 = s + str(ang) - canvas.drawString(s2, 150, 150, angle=ang) - cnum = (cnum + 1) % len(colors) + colors = [red, orange, yellow, green, blue, purple] + cnum = 0 + for ang in range(0, 359, 30): + canvas.defaultLineColor = colors[cnum] + s2 = s + str(ang) + canvas.drawString(s2, 150, 150, angle=ang) + cnum = (cnum + 1) % len(colors) - canvas.drawString("This is a\nrotated\nmulti-line string!!!", 350, 100, angle=-90, - font=Font(underline=1)) - #canvas.drawString( "This is a\nrotated\nmulti-line string!!!", 400, 175, angle= -45, font=Font(underline=1) ) - return canvas + canvas.drawString("This is a\nrotated\nmulti-line string!!!", 350, 100, angle=-90, + font=Font(underline=1)) + #canvas.drawString( "This is a\nrotated\nmulti-line string!!!", 400, 175, angle= -45, font=Font(underline=1) ) + return canvas -#---------------------------------------------------------------------- -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- +# ---------------------------------------------------------------------- def tkTest(testfunc): - # TK tests are called from here because need TK's event loop - try: - import sping.TK - import Tkinter - except ImportError: - print("A module needed for sping.TK is not available, select another backend") - return + # TK tests are called from here because need TK's event loop + try: + import sping.TK + import Tkinter + except ImportError: + print("A module needed for sping.TK is not available, select another backend") + return - root = Tkinter.Tk() - frame = Tkinter.Frame(root) # label='piddletestTK' + root = Tkinter.Tk() + frame = Tkinter.Frame(root) # label='piddletestTK' - # try new Tk canvas - tkcanvas = sping.TK.TKCanvas(size=(400, 400), name='sping-testTK', master=frame) - bframe = Tkinter.Frame(root) + # try new Tk canvas + tkcanvas = sping.TK.TKCanvas(size=(400, 400), name='sping-testTK', master=frame) + bframe = Tkinter.Frame(root) - minimalB = Tkinter.Button( - bframe, text='minimal test', - command=lambda c=tkcanvas: (c.clear(), drawMinimal(c), c.flush())).pack(side=Tkinter.LEFT) - basicB = Tkinter.Button( - bframe, text='basic test', - command=lambda c=tkcanvas: (c.clear(), drawBasics(c), c.flush())).pack(side=Tkinter.LEFT) - spectB = Tkinter.Button( - bframe, text='spectrum test', - command=lambda c=tkcanvas: (c.clear(), drawSpectrum(c), c.flush())).pack(side=Tkinter.LEFT) - stringsB = Tkinter.Button( - bframe, text='strings test', - command=lambda c=tkcanvas: (c.clear(), drawStrings(c), c.flush())).pack(side=Tkinter.LEFT) - rotstrB = Tkinter.Button( - bframe, text='rotated strings test', - command=lambda c=tkcanvas: (c.clear(), drawRotstring(c), c.flush())).pack(side=Tkinter.LEFT) - advancedB = Tkinter.Button( - bframe, text='advanced test', - command=lambda c=tkcanvas: (c.clear(), drawAdvanced(c), c.flush())).pack(side=Tkinter.LEFT) - bframe.pack(side=Tkinter.TOP) - tkcanvas.pack() - frame.pack() - # try to draw before running mainloop - if testfunc == minimal: - drawMinimal(tkcanvas) - elif testfunc == basics: - drawBasics(tkcanvas) - elif testfunc == advanced: - drawAdvanced(tkcanvas) - elif testfunc == spectrum: - drawSpectrum(tkcanvas) - elif testfunc == strings: - drawStrings(tkcanvas) - elif testfunc == rotstring: - drawRotstring(tkcanvas) - else: - print("Illegal testfunc handed to tkTest") - raise ValueError("Unsupported testfunc") + minimalB = Tkinter.Button( + bframe, text='minimal test', + command=lambda c=tkcanvas: (c.clear(), drawMinimal(c), c.flush())).pack(side=Tkinter.LEFT) + basicB = Tkinter.Button( + bframe, text='basic test', + command=lambda c=tkcanvas: (c.clear(), drawBasics(c), c.flush())).pack(side=Tkinter.LEFT) + spectB = Tkinter.Button( + bframe, text='spectrum test', + command=lambda c=tkcanvas: (c.clear(), drawSpectrum(c), c.flush())).pack(side=Tkinter.LEFT) + stringsB = Tkinter.Button( + bframe, text='strings test', + command=lambda c=tkcanvas: (c.clear(), drawStrings(c), c.flush())).pack(side=Tkinter.LEFT) + rotstrB = Tkinter.Button( + bframe, text='rotated strings test', + command=lambda c=tkcanvas: (c.clear(), drawRotstring(c), c.flush())).pack(side=Tkinter.LEFT) + advancedB = Tkinter.Button( + bframe, text='advanced test', + command=lambda c=tkcanvas: (c.clear(), drawAdvanced(c), c.flush())).pack(side=Tkinter.LEFT) + bframe.pack(side=Tkinter.TOP) + tkcanvas.pack() + frame.pack() + # try to draw before running mainloop + if testfunc == minimal: + drawMinimal(tkcanvas) + elif testfunc == basics: + drawBasics(tkcanvas) + elif testfunc == advanced: + drawAdvanced(tkcanvas) + elif testfunc == spectrum: + drawSpectrum(tkcanvas) + elif testfunc == strings: + drawStrings(tkcanvas) + elif testfunc == rotstring: + drawRotstring(tkcanvas) + else: + print("Illegal testfunc handed to tkTest") + raise ValueError("Unsupported testfunc") - tkcanvas.flush() - root.mainloop() + tkcanvas.flush() + root.mainloop() -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- def wxTest(testfunc): - try: - import sping.WX - from wxPython.wx import wxApp - except ImportError: - print("A module needed for sping.WX is not available, select another backend") - return + try: + import sping.WX + from wxPython.wx import wxApp + except ImportError: + print("A module needed for sping.WX is not available, select another backend") + return - global wx_app - if not 'wx_app' in globals(): + global wx_app + if not 'wx_app' in globals(): - class CanvasApp(wxApp): - "The wxApp that runs canvas. Initializes windows, and handles redrawing" + class CanvasApp(wxApp): + "The wxApp that runs canvas. Initializes windows, and handles redrawing" - def OnInit(self): - return 1 + def OnInit(self): + return 1 - wx_app = CanvasApp(0) + wx_app = CanvasApp(0) - # run the test, passing the canvas class and returning the canvas - canvas = testfunc(sping.WX.WXCanvas) + # run the test, passing the canvas class and returning the canvas + canvas = testfunc(sping.WX.WXCanvas) - canvas.flush() + canvas.flush() - # Run the main loop - wx_app.MainLoop() + # Run the main loop + wx_app.MainLoop() def runtest(backend, testfunc): - # special cases: - if backend == 'TK': - tkTest(testfunc) # takes care of import, etc. - return + # special cases: + if backend == 'TK': + tkTest(testfunc) # takes care of import, etc. + return - if backend == 'WX': - wxTest(testfunc) # takes care of import, etc. - return + if backend == 'WX': + wxTest(testfunc) # takes care of import, etc. + return - # import the relevant module - modname = 'sping.' + backend - print("importing ", modname) - module = __import__('sping.' + backend, globals(), locals(), [backend]) + # import the relevant module + modname = 'sping.' + backend + print("importing ", modname) + module = __import__('sping.' + backend, globals(), locals(), [backend]) - # figure out the canvas class name (e.g., "PILCanvas") and get that - #canvasClass = getattr(module, backend[6:]+"Canvas") - #canvasClass = getattr(module, backend+"Canvas") - # from spam.ham import eggs - # __import('spam.ham', 'globals(), local + # figure out the canvas class name (e.g., "PILCanvas") and get that + #canvasClass = getattr(module, backend[6:]+"Canvas") + #canvasClass = getattr(module, backend+"Canvas") + # from spam.ham import eggs + # __import('spam.ham', 'globals(), local - canvasClass = getattr(module, backend + "Canvas") + canvasClass = getattr(module, backend + "Canvas") - # run the test, passing the canvas class and returning the canvas - canvas = testfunc(canvasClass) + # run the test, passing the canvas class and returning the canvas + canvas = testfunc(canvasClass) - # do post-test cleanup - canvas.flush() - # handle save's here - if backend == 'PIL': - # I'm saving twice because sometimes jpeg doesn't work when png does - canvas.save(file=canvas.name + ".jpg") # save as a jpeg file - canvas.save(format='png') # save as a png file - elif backend == 'piddleVCR': - filename = canvas.name + ".vcr" - canvas.save(filename) - print(filename, "saved") - else: # if backend == 'PS' or backend== 'PDF': - canvas.save() # should be "pass'ed" by Canvas's that don't use save + # do post-test cleanup + canvas.flush() + # handle save's here + if backend == 'PIL': + # I'm saving twice because sometimes jpeg doesn't work when png does + canvas.save(file=canvas.name + ".jpg") # save as a jpeg file + canvas.save(format='png') # save as a png file + elif backend == 'piddleVCR': + filename = canvas.name + ".vcr" + canvas.save(filename) + print(filename, "saved") + else: # if backend == 'PS' or backend== 'PDF': + canvas.save() # should be "pass'ed" by Canvas's that don't use save def mainLoop(): - global tests, backends - backend = None - test = None + global tests, backends + backend = None + test = None - while 1: - # print backends on left, tests on right, indicate chosen one of each - i = 0 - while i < len(backends) or i < len(tests): - try: - bstr = str(i + 1) + '. ' + backends[i] - except Exception: - bstr = '' - try: - tstr = chr(65 + i) + '. ' + tests[i].__name__ - except Exception: - tstr = '' - if i == backend: - bflag = '==>' - else: - bflag = '' - if i == test: - tflag = '==>' - else: - tflag = '' - print("%10s %-20s %10s %-20s" % (bflag, bstr, tflag, tstr)) - i = i + 1 - print() + while 1: + # print backends on left, tests on right, indicate chosen one of each + i = 0 + while i < len(backends) or i < len(tests): + try: + bstr = str(i + 1) + '. ' + backends[i] + except Exception: + bstr = '' + try: + tstr = chr(65 + i) + '. ' + tests[i].__name__ + except Exception: + tstr = '' + if i == backend: + bflag = '==>' + else: + bflag = '' + if i == test: + tflag = '==>' + else: + tflag = '' + print("%10s %-20s %10s %-20s" % (bflag, bstr, tflag, tstr)) + i = i + 1 + print() - inp = input("Selection (0 to exit): ") - print() + inp = input("Selection (0 to exit): ") + print() - if inp == '0': - return - if inp: - testinp = '' - if inp[-1] in LETTERS: - testinp = inp[-1] - elif inp[0] in LETTERS: - testinp = inp[0] - backinp = ' '.join(filter(lambda x: x in '0123456789', inp)) - if backinp: - backend = int(backinp) - 1 - if backend < len(backends): - docstr = __import__('sping.' + backends[backend], globals(), locals(), - backends[backend]).__doc__ - #docstr = __import__('sping.'+backends[backend]).__doc__ - if docstr: - print(docstr) - else: - print("") - else: - backend = None - if testinp: - test = ord(testinp[0].upper()) - ord('A') - if test >= 0 and test < len(tests): - docstr = tests[test].__doc__ - if docstr: - print(docstr) - else: - test = None - print + if inp == '0': + return + if inp: + testinp = '' + if inp[-1] in LETTERS: + testinp = inp[-1] + elif inp[0] in LETTERS: + testinp = inp[0] + backinp = ' '.join(filter(lambda x: x in '0123456789', inp)) + if backinp: + backend = int(backinp) - 1 + if backend < len(backends): + docstr = __import__('sping.' + backends[backend], globals(), locals(), + backends[backend]).__doc__ + #docstr = __import__('sping.'+backends[backend]).__doc__ + if docstr: + print(docstr) + else: + print("") + else: + backend = None + if testinp: + test = ord(testinp[0].upper()) - ord('A') + if test >= 0 and test < len(tests): + docstr = tests[test].__doc__ + if docstr: + print(docstr) + else: + test = None + print - # now, if we have a valid backend and test, run it - if backend != None and test != None: - runtest(backends[backend], tests[test]) + # now, if we have a valid backend and test, run it + if backend != None and test != None: + runtest(backends[backend], tests[test]) tests = (minimal, basics, advanced, spectrum, strings, rotstring) if __name__ == '__main__': - mainLoop() + mainLoop() diff --git a/rdkit/sping/tests/pstests.py b/rdkit/sping/tests/pstests.py index 0450ece36..3bfbfd427 100755 --- a/rdkit/sping/tests/pstests.py +++ b/rdkit/sping/tests/pstests.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from __future__ import print_function + import pidtest def testLatin1Chars(can): diff --git a/rdkit/sping/util/HTMLPiddler.py b/rdkit/sping/util/HTMLPiddler.py index 2d312da42..ab36d8a58 100755 --- a/rdkit/sping/util/HTMLPiddler.py +++ b/rdkit/sping/util/HTMLPiddler.py @@ -46,232 +46,232 @@ Credits: disclaimers: * NO WARRANTIES * USE AT YOUR OWN RISK * ''' -from __future__ import print_function -import htmllib, formatter, string + +import htmllib +import formatter +import string from types import * import piddle -from rdkit.six import string_types -from rdkit.six.moves import input TRACE = 0 class HTMLPiddler: - '''jjk 02/01/00''' + '''jjk 02/01/00''' - def __init__(self, html='', start=(0, 0), xLimits=(0, 800), font=None, color=None): - '''instance initializer - jjk 02/01/00''' - self.html = html - self.start = start - self.xLimits = xLimits - if not font: - font = piddle.Font() - self.font = font - self.color = color + def __init__(self, html='', start=(0, 0), xLimits=(0, 800), font=None, color=None): + '''instance initializer + jjk 02/01/00''' + self.html = html + self.start = start + self.xLimits = xLimits + if not font: + font = piddle.Font() + self.font = font + self.color = color - def renderOn(self, aPiddleCanvas): - '''draw the text with aPiddleCanvas - jjk 02/01/00''' - writer = _HtmlPiddleWriter(self, aPiddleCanvas) - fmt = formatter.AbstractFormatter(writer) - parser = _HtmlParser(fmt) - parser.feed(self.html) - parser.close() + def renderOn(self, aPiddleCanvas): + '''draw the text with aPiddleCanvas + jjk 02/01/00''' + writer = _HtmlPiddleWriter(self, aPiddleCanvas) + fmt = formatter.AbstractFormatter(writer) + parser = _HtmlParser(fmt) + parser.feed(self.html) + parser.close() class _HtmlParser(htmllib.HTMLParser): - def anchor_bgn(self, href, name, type): - htmllib.HTMLParser.anchor_bgn(self, href, name, type) - self.formatter.writer.anchor_bgn(href, name, type) + def anchor_bgn(self, href, name, type): + htmllib.HTMLParser.anchor_bgn(self, href, name, type) + self.formatter.writer.anchor_bgn(href, name, type) - def anchor_end(self): - htmllib.HTMLParser.anchor_end(self) - self.formatter.writer.anchor_end() + def anchor_end(self): + htmllib.HTMLParser.anchor_end(self) + self.formatter.writer.anchor_end() class _HtmlPiddleWriter: - FontSizeDict = {"h1": 36, "h2": 24, "h3": 18, "h4": 12, "h5": 10, "h6": 8} - DefaultFontSize = 12 + FontSizeDict = {"h1": 36, "h2": 24, "h3": 18, "h4": 12, "h5": 10, "h6": 8} + DefaultFontSize = 12 - def __init__(self, aHTMLPiddler, aPiddleCanvas): - self.piddler = aHTMLPiddler #view = view - self.pc = aPiddleCanvas - self.anchor = None - self.lineHeight = 0 - self.atbreak = 0 - self.color = self.piddler.color - self.defaultFont = self.font = self.piddler.font - s = "W" * 20 - x = self.pc.stringWidth(s, self.font) - y = self.pc.fontHeight(self.font) - x = (x + 19) / 20 # Largest character size - self.fsizex = x - self.fsizey = self.oldLineHeight = y - self.indentSize = x * 3 - self.lmargin, self.rmargin = self.piddler.xLimits - self.x, self.y = self.piddler.start - self.indent = self.lmargin + (x / 3) + def __init__(self, aHTMLPiddler, aPiddleCanvas): + self.piddler = aHTMLPiddler # view = view + self.pc = aPiddleCanvas + self.anchor = None + self.lineHeight = 0 + self.atbreak = 0 + self.color = self.piddler.color + self.defaultFont = self.font = self.piddler.font + s = "W" * 20 + x = self.pc.stringWidth(s, self.font) + y = self.pc.fontHeight(self.font) + x = (x + 19) / 20 # Largest character size + self.fsizex = x + self.fsizey = self.oldLineHeight = y + self.indentSize = x * 3 + self.lmargin, self.rmargin = self.piddler.xLimits + self.x, self.y = self.piddler.start + self.indent = self.lmargin + (x / 3) - def anchor_bgn(self, href, name, type): - if href: - self.oldcolor = self.color - self.color = piddle.Color(0.0, 0.0, 200 / 255.0) - self.anchor = (href, name, type) + def anchor_bgn(self, href, name, type): + if href: + self.oldcolor = self.color + self.color = piddle.Color(0.0, 0.0, 200 / 255.0) + self.anchor = (href, name, type) - def anchor_end(self): - if self.anchor: - self.color = self.oldcolor - self.anchor = None + def anchor_end(self): + if self.anchor: + self.color = self.oldcolor + self.anchor = None - # Start of methods required by the formatter + # Start of methods required by the formatter - def new_font(self, fontParams): - if TRACE: - print('nf', fontParams) - # fontParams is None, or the tuple (size, i, b, tt) - if not fontParams: - fontParams = (None, None, None, None) - size = fontParams[0] - try: - points = self.FontSizeDict[size] - except KeyError: - points = self.DefaultFontSize - if fontParams[3]: - face = "courier" #"modern" - elif isinstance(size, string_types) and size[0] == "h": - face = "helvetica" #"swiss" - else: - face = "times" #"roman" - italic = fontParams[1] # Italic indicator - if italic == None: - italic = 0 - bold = fontParams[2] # Bold indicator - if bold == None: - bold = 0 - self.font = piddle.Font(points, bold, italic, face=face) - x = self.pc.stringWidth('W' * 20, self.font) - self.fsizex = (x + 19) / 20 # Largest character size - self.fsizey = self.pc.fontHeight(self.font) - - def new_margin(self, margin, level): - self.send_line_break() - self.indent = self.x = self.lmargin + self.indentSize * level - - def new_spacing(self, spacing): - self.send_line_break() - t = "new_spacing(%s)" % repr(spacing) - self.OutputLine(t, 1) - - def new_styles(self, styles): - self.send_line_break() - t = "new_styles(%s)" % repr(styles) - self.OutputLine(t, 1) - - def send_label_data(self, data): - if data == "*": - w = self.pc.stringWidth(data, self.font) / 3 - h = self.pc.fontHeight(self.font) / 3 - x = self.indent - w - y = self.y - w - self.pc.drawRect(x, y, x - w, y - w) - else: - w = self.pc.stringWidth(data, self.font) - h = self.pc.fontHeight(self.font) - x = self.indent - w - self.fsizex / 3 - if x < 0: - x = 0 - self.pc.drawString(data, x, self.y, self.font, self.color) - - def send_paragraph(self, blankline): - self.send_line_break() - self.y = self.y + self.oldLineHeight * blankline - - def send_line_break(self): - if self.lineHeight: - self.y = self.y + self.lineHeight - self.oldLineHeight = self.lineHeight - self.lineHeight = 0 - self.x = self.indent - self.atbreak = 0 - if TRACE: - input('lb') - - def send_hor_rule(self): - self.send_line_break() - self.y = self.y + self.oldLineHeight - border = self.fsizex - self.pc.drawLine(border, self.y, self.rmargin - border, self.y, piddle.Color(0.0, 0.0, - 200 / 255.0)) - self.y = self.y + self.oldLineHeight - - def send_literal_data(self, data): - if not data: - return - lines = data.split(data, '\n') - text = lines[0].replace('\t', ' '*8) - for l in lines[1:]: - self.OutputLine(text, 1) - text = l.replace('\t', ' '*8) - self.OutputLine(text, 0) - self.atbreak = 0 - - def send_flowing_data(self, data): - if not data: - return - atbreak = self.atbreak or data[0] in string.whitespace - text = "" - pixels = chars = 0 - for word in data.split(): - bword = " " + word # blank + word - length = len(bword) - # The current line is "text" and its size is - # "pixels" pixels plus "chars" characters. - if not atbreak: - text = word - chars = chars + length - 1 - elif self.x + pixels + (chars + length) * self.fsizex < self.rmargin: - # Word fits easily on current line. - text = text + bword - chars = chars + length - else: - w = self.pc.stringWidth(text + bword, self.font) - h = self.pc.fontHeight(self.font) + def new_font(self, fontParams): if TRACE: - print('sfd T:', text + bword) - if TRACE: - print('sfd', self.x, w, self.x + w, self.rmargin) - if self.x + w < self.rmargin: - # Word fits. - text = text + bword - pixels = w - chars = 0 + print('nf', fontParams) + # fontParams is None, or the tuple (size, i, b, tt) + if not fontParams: + fontParams = (None, None, None, None) + size = fontParams[0] + try: + points = self.FontSizeDict[size] + except KeyError: + points = self.DefaultFontSize + if fontParams[3]: + face = "courier" # "modern" + elif isinstance(size, str) and size[0] == "h": + face = "helvetica" # "swiss" else: - # Word does not fit. Output current line. - self.OutputLine(text, 1) - text = word - chars = length - 1 - pixels = 0 - atbreak = 1 - self.OutputLine(text, 0) - self.atbreak = data[-1] in string.whitespace + face = "times" # "roman" + italic = fontParams[1] # Italic indicator + if italic == None: + italic = 0 + bold = fontParams[2] # Bold indicator + if bold == None: + bold = 0 + self.font = piddle.Font(points, bold, italic, face=face) + x = self.pc.stringWidth('W' * 20, self.font) + self.fsizex = (x + 19) / 20 # Largest character size + self.fsizey = self.pc.fontHeight(self.font) - def OutputLine(self, text, linebreak=0): - if text: - if TRACE: - print('olt:', text) - if TRACE: - print('olf:', self.font.size, self.font.bold, self.font.italic, self.font.underline, - self.font.face) - self.pc.drawString(text, self.x, self.y, self.font, self.color) - #if self.anchor: - # o.anchor = self.anchor - self.lineHeight = max(self.lineHeight, self.pc.fontHeight(self.font)) - self.x = self.x + self.pc.stringWidth(text, self.font) - if linebreak: - self.send_line_break() + def new_margin(self, margin, level): + self.send_line_break() + self.indent = self.x = self.lmargin + self.indentSize * level + + def new_spacing(self, spacing): + self.send_line_break() + t = "new_spacing(%s)" % repr(spacing) + self.OutputLine(t, 1) + + def new_styles(self, styles): + self.send_line_break() + t = "new_styles(%s)" % repr(styles) + self.OutputLine(t, 1) + + def send_label_data(self, data): + if data == "*": + w = self.pc.stringWidth(data, self.font) / 3 + h = self.pc.fontHeight(self.font) / 3 + x = self.indent - w + y = self.y - w + self.pc.drawRect(x, y, x - w, y - w) + else: + w = self.pc.stringWidth(data, self.font) + h = self.pc.fontHeight(self.font) + x = self.indent - w - self.fsizex / 3 + if x < 0: + x = 0 + self.pc.drawString(data, x, self.y, self.font, self.color) + + def send_paragraph(self, blankline): + self.send_line_break() + self.y = self.y + self.oldLineHeight * blankline + + def send_line_break(self): + if self.lineHeight: + self.y = self.y + self.lineHeight + self.oldLineHeight = self.lineHeight + self.lineHeight = 0 + self.x = self.indent + self.atbreak = 0 + if TRACE: + input('lb') + + def send_hor_rule(self): + self.send_line_break() + self.y = self.y + self.oldLineHeight + border = self.fsizex + self.pc.drawLine(border, self.y, self.rmargin - border, self.y, piddle.Color(0.0, 0.0, + 200 / 255.0)) + self.y = self.y + self.oldLineHeight + + def send_literal_data(self, data): + if not data: + return + lines = data.split(data, '\n') + text = lines[0].replace('\t', ' ' * 8) + for l in lines[1:]: + self.OutputLine(text, 1) + text = l.replace('\t', ' ' * 8) + self.OutputLine(text, 0) + self.atbreak = 0 + + def send_flowing_data(self, data): + if not data: + return + atbreak = self.atbreak or data[0] in string.whitespace + text = "" + pixels = chars = 0 + for word in data.split(): + bword = " " + word # blank + word + length = len(bword) + # The current line is "text" and its size is + # "pixels" pixels plus "chars" characters. + if not atbreak: + text = word + chars = chars + length - 1 + elif self.x + pixels + (chars + length) * self.fsizex < self.rmargin: + # Word fits easily on current line. + text = text + bword + chars = chars + length + else: + w = self.pc.stringWidth(text + bword, self.font) + h = self.pc.fontHeight(self.font) + if TRACE: + print('sfd T:', text + bword) + if TRACE: + print('sfd', self.x, w, self.x + w, self.rmargin) + if self.x + w < self.rmargin: + # Word fits. + text = text + bword + pixels = w + chars = 0 + else: + # Word does not fit. Output current line. + self.OutputLine(text, 1) + text = word + chars = length - 1 + pixels = 0 + atbreak = 1 + self.OutputLine(text, 0) + self.atbreak = data[-1] in string.whitespace + + def OutputLine(self, text, linebreak=0): + if text: + if TRACE: + print('olt:', text) + if TRACE: + print('olf:', self.font.size, self.font.bold, self.font.italic, self.font.underline, + self.font.face) + self.pc.drawString(text, self.x, self.y, self.font, self.color) + # if self.anchor: + # o.anchor = self.anchor + self.lineHeight = max(self.lineHeight, self.pc.fontHeight(self.font)) + self.x = self.x + self.pc.stringWidth(text, self.font) + if linebreak: + self.send_line_break() __copyrite_jim__ = '''\ @@ -334,72 +334,72 @@ is not really intended for real work. def demoPDF(html): - import piddlePDF - pc = piddlePDF.PDFCanvas((750, 1000), 'HTMLPiddler.pdf') - pc.drawLine(100, 100, 250, 150, color=piddle.green) - pc.drawRect(100, 100, 650, 900, edgeColor=piddle.pink) - ptt = HTMLPiddler(html, (250, 150), (100, 650)) - ptt.renderOn(pc) - pc.save() + import piddlePDF + pc = piddlePDF.PDFCanvas((750, 1000), 'HTMLPiddler.pdf') + pc.drawLine(100, 100, 250, 150, color=piddle.green) + pc.drawRect(100, 100, 650, 900, edgeColor=piddle.pink) + ptt = HTMLPiddler(html, (250, 150), (100, 650)) + ptt.renderOn(pc) + pc.save() def demoPIL(html): - print('be patient, this is a little slow...') - import piddlePIL - pc = piddlePIL.PILCanvas((800, 600), 'HTMLPiddler') - pc.drawLine(0, 0, 100, 80, color=piddle.green) - pc.drawRect(50, 50, 750, 550, edgeColor=piddle.pink) - ptt = HTMLPiddler(html, (100, 80), (50, 750)) - ptt.renderOn(pc) - pc.save(format='tif') + print('be patient, this is a little slow...') + import piddlePIL + pc = piddlePIL.PILCanvas((800, 600), 'HTMLPiddler') + pc.drawLine(0, 0, 100, 80, color=piddle.green) + pc.drawRect(50, 50, 750, 550, edgeColor=piddle.pink) + ptt = HTMLPiddler(html, (100, 80), (50, 750)) + ptt.renderOn(pc) + pc.save(format='tif') def demoTK(html): - import piddleTK - pc = piddleTK.TKCanvas((800, 600)) - pc.drawLine(0, 0, 50, 50, color=piddle.green) - pc.drawRect(10, 10, 590, 790, edgeColor=piddle.pink) - ptt = HTMLPiddler(html, (50, 50), (10, 790)) - pc.flush() - ptt.renderOn(pc) + import piddleTK + pc = piddleTK.TKCanvas((800, 600)) + pc.drawLine(0, 0, 50, 50, color=piddle.green) + pc.drawRect(10, 10, 590, 790, edgeColor=piddle.pink) + ptt = HTMLPiddler(html, (50, 50), (10, 790)) + pc.flush() + ptt.renderOn(pc) def demoWX(html): - import piddleWX - pc = piddleWX.WXCanvas((800, 600)) - pc.drawLine(0, 0, 50, 50, color=piddle.green) - pc.drawRect(10, 10, 590, 790, edgeColor=piddle.pink) - ptt = HTMLPiddler(html, (50, 50), (10, 790)) - pc.flush() - ptt.renderOn(pc) + import piddleWX + pc = piddleWX.WXCanvas((800, 600)) + pc.drawLine(0, 0, 50, 50, color=piddle.green) + pc.drawRect(10, 10, 590, 790, edgeColor=piddle.pink) + ptt = HTMLPiddler(html, (50, 50), (10, 790)) + pc.flush() + ptt.renderOn(pc) def demo(html=DEMO_HTML): - while 1: - print('Demo of HTMLPiddler.py') - print(' 1. piddlePDF') - print(' 2. piddlePIL') - #print(' 3. piddleTK') - #print(' 4. piddleWX') - print(' 0. EXIT') - sel = input('Enter Selection Number: ') - try: - sel = int(sel.strip()) - except Exception: - sel = -1 - if (sel == 0): - break - elif (sel == 1): - demoPDF(html) - elif (sel == 2): - demoPIL(html) - elif (sel == 3): - demoTK(html) - elif (sel == 4): - demoWX(html) + while 1: + print('Demo of HTMLPiddler.py') + print(' 1. piddlePDF') + print(' 2. piddlePIL') + #print(' 3. piddleTK') + #print(' 4. piddleWX') + print(' 0. EXIT') + sel = input('Enter Selection Number: ') + try: + sel = int(sel.strip()) + except Exception: + sel = -1 + if (sel == 0): + break + elif (sel == 1): + demoPDF(html) + elif (sel == 2): + demoPIL(html) + elif (sel == 3): + demoTK(html) + elif (sel == 4): + demoWX(html) if __name__ == '__main__': - import pdb - demo() + import pdb + demo() diff --git a/rdkit/utils/chemdraw.py b/rdkit/utils/chemdraw.py index 6235acee5..fb794d942 100755 --- a/rdkit/utils/chemdraw.py +++ b/rdkit/utils/chemdraw.py @@ -11,7 +11,7 @@ """ tools for interacting with chemdraw """ -from __future__ import print_function + import tempfile, os, time try: diff --git a/rdkit/utils/chemutils.py b/rdkit/utils/chemutils.py index e493402c5..62420f74a 100755 --- a/rdkit/utils/chemutils.py +++ b/rdkit/utils/chemutils.py @@ -4,161 +4,161 @@ """ utility functions with "chemical know-how" """ -from __future__ import print_function + import os import re from rdkit import RDConfig -from rdkit.six.moves import xrange if not RDConfig.usePgSQL: - _atomDbName = os.path.join(RDConfig.RDDataDir, 'atomdb.gdb') + _atomDbName = os.path.join(RDConfig.RDDataDir, 'atomdb.gdb') else: - _atomDbName = "::RDData" + _atomDbName = "::RDData" def GetAtomicData(atomDict, descriptorsDesired, dBase=_atomDbName, table='atomic_data', where='', user='sysdba', password='masterkey', includeElCounts=0): - """ pulls atomic data from a database - - **Arguments** - - - atomDict: the dictionary to populate - - - descriptorsDesired: the descriptors to pull for each atom - - - dBase: the DB to use - - - table: the DB table to use - - - where: the SQL where clause - - - user: the user name to use with the DB - - - password: the password to use with the DB - - - includeElCounts: if nonzero, valence electron count fields are added to - the _atomDict_ - - """ - extraFields = ['NVAL', 'NVAL_NO_FULL_F', 'NVAL_NO_FULL_D', 'NVAL_NO_FULL'] - from rdkit.Dbase import DbModule - cn = DbModule.connect(dBase, user, password) - c = cn.cursor() - descriptorsDesired = [s.upper() for s in descriptorsDesired] - if 'NAME' not in descriptorsDesired: - descriptorsDesired.append('NAME') - if includeElCounts and 'CONFIG' not in descriptorsDesired: - descriptorsDesired.append('CONFIG') - for field in extraFields: - if field in descriptorsDesired: - descriptorsDesired.remove(field) - toPull = ','.join(descriptorsDesired) - command = 'select %s from atomic_data %s' % (toPull, where) - try: - c.execute(command) - except Exception: - print('Problems executing command:', command) - return - res = c.fetchall() - for atom in res: - tDict = {} - for i in xrange(len(descriptorsDesired)): - desc = descriptorsDesired[i] - val = atom[i] - tDict[desc] = val - name = tDict['NAME'] - atomDict[name] = tDict - if includeElCounts: - config = atomDict[name]['CONFIG'] - atomDict[name]['NVAL'] = ConfigToNumElectrons(config) - atomDict[name]['NVAL_NO_FULL_F'] = ConfigToNumElectrons(config, ignoreFullF=1) - atomDict[name]['NVAL_NO_FULL_D'] = ConfigToNumElectrons(config, ignoreFullD=1) - atomDict[name]['NVAL_NO_FULL'] = ConfigToNumElectrons(config, ignoreFullF=1, ignoreFullD=1) - - -def SplitComposition(compStr): - """ Takes a simple chemical composition and turns into a list of element,# pairs. - - i.e. 'Fe3Al' -> [('Fe',3),('Al',1)] + """ pulls atomic data from a database **Arguments** - - compStr: the composition string to be processed + - atomDict: the dictionary to populate - **Returns** + - descriptorsDesired: the descriptors to pull for each atom - - the *composVect* corresponding to _compStr_ + - dBase: the DB to use - **Note** + - table: the DB table to use - -this isn't smart enough by half to deal with anything even - remotely subtle, so be gentle. + - where: the SQL where clause - """ - target = r'([A-Z][a-z]?)([0-9\.]*)' + - user: the user name to use with the DB - theExpr = re.compile(target) + - password: the password to use with the DB - matches = theExpr.findall(compStr) - res = [] - for match in matches: - if len(match[1]) > 0: - res.append((match[0], float(match[1]))) - else: - res.append((match[0], 1)) + - includeElCounts: if nonzero, valence electron count fields are added to + the _atomDict_ - return res + """ + extraFields = ['NVAL', 'NVAL_NO_FULL_F', 'NVAL_NO_FULL_D', 'NVAL_NO_FULL'] + from rdkit.Dbase import DbModule + cn = DbModule.connect(dBase, user, password) + c = cn.cursor() + descriptorsDesired = [s.upper() for s in descriptorsDesired] + if 'NAME' not in descriptorsDesired: + descriptorsDesired.append('NAME') + if includeElCounts and 'CONFIG' not in descriptorsDesired: + descriptorsDesired.append('CONFIG') + for field in extraFields: + if field in descriptorsDesired: + descriptorsDesired.remove(field) + toPull = ','.join(descriptorsDesired) + command = 'select %s from atomic_data %s' % (toPull, where) + try: + c.execute(command) + except Exception: + print('Problems executing command:', command) + return + res = c.fetchall() + for atom in res: + tDict = {} + for i in range(len(descriptorsDesired)): + desc = descriptorsDesired[i] + val = atom[i] + tDict[desc] = val + name = tDict['NAME'] + atomDict[name] = tDict + if includeElCounts: + config = atomDict[name]['CONFIG'] + atomDict[name]['NVAL'] = ConfigToNumElectrons(config) + atomDict[name]['NVAL_NO_FULL_F'] = ConfigToNumElectrons(config, ignoreFullF=1) + atomDict[name]['NVAL_NO_FULL_D'] = ConfigToNumElectrons(config, ignoreFullD=1) + atomDict[name]['NVAL_NO_FULL'] = ConfigToNumElectrons( + config, ignoreFullF=1, ignoreFullD=1) + + +def SplitComposition(compStr): + """ Takes a simple chemical composition and turns into a list of element,# pairs. + + i.e. 'Fe3Al' -> [('Fe',3),('Al',1)] + + **Arguments** + + - compStr: the composition string to be processed + + **Returns** + + - the *composVect* corresponding to _compStr_ + + **Note** + + -this isn't smart enough by half to deal with anything even + remotely subtle, so be gentle. + + """ + target = r'([A-Z][a-z]?)([0-9\.]*)' + + theExpr = re.compile(target) + + matches = theExpr.findall(compStr) + res = [] + for match in matches: + if len(match[1]) > 0: + res.append((match[0], float(match[1]))) + else: + res.append((match[0], 1)) + + return res def ConfigToNumElectrons(config, ignoreFullD=0, ignoreFullF=0): - """ counts the number of electrons appearing in a configuration string + """ counts the number of electrons appearing in a configuration string - **Arguments** + **Arguments** - - config: the configuration string (e.g. '2s^2 2p^4') + - config: the configuration string (e.g. '2s^2 2p^4') - - ignoreFullD: toggles not counting full d shells + - ignoreFullD: toggles not counting full d shells - - ignoreFullF: toggles not counting full f shells + - ignoreFullF: toggles not counting full f shells - **Returns** + **Returns** - the number of valence electrons + the number of valence electrons - """ - arr = config.split(' ') + """ + arr = config.split(' ') - nEl = 0 - for i in range(1, len(arr)): - l = arr[i].split('^') - incr = int(l[1]) - if ignoreFullF and incr == 14 and l[0].find('f') != -1 and len(arr) > 2: - incr = 0 - if ignoreFullD and incr == 10 and l[0].find('d') != -1 and len(arr) > 2: - incr = 0 - nEl = nEl + incr - return nEl + nEl = 0 + for i in range(1, len(arr)): + l = arr[i].split('^') + incr = int(l[1]) + if ignoreFullF and incr == 14 and l[0].find('f') != -1 and len(arr) > 2: + incr = 0 + if ignoreFullD and incr == 10 and l[0].find('d') != -1 and len(arr) > 2: + incr = 0 + nEl = nEl + incr + return nEl if __name__ == '__main__': # pragma: nocover - print(SplitComposition('Fe')) - print(SplitComposition('Fe3Al')) - print(SplitComposition('Fe99PdAl')) - print(SplitComposition('TiNiSiSO12P')) - temp = ['[Xe] 4f^12 6s^2', '[Xe] 4f^14 5d^6 6s^2', '[Xe] 4f^14 5d^10 6s^2', - '[Xe] 4f^14 5d^10 6s^2 6p^1', '[Xe] 5d^10'] - print('ignore all') - for entry in temp: - print(entry, '\t\t\t\t', ConfigToNumElectrons(entry, ignoreFullD=1, ignoreFullF=1)) - print('ignore d') - for entry in temp: - print(entry, '\t\t\t\t', ConfigToNumElectrons(entry, ignoreFullD=1, ignoreFullF=0)) - print('ignore f') - for entry in temp: - print(entry, '\t\t\t\t', ConfigToNumElectrons(entry, ignoreFullD=0, ignoreFullF=1)) - print('ignore None') - for entry in temp: - print(entry, '\t\t\t\t', ConfigToNumElectrons(entry, ignoreFullD=0, ignoreFullF=0)) + print(SplitComposition('Fe')) + print(SplitComposition('Fe3Al')) + print(SplitComposition('Fe99PdAl')) + print(SplitComposition('TiNiSiSO12P')) + temp = ['[Xe] 4f^12 6s^2', '[Xe] 4f^14 5d^6 6s^2', '[Xe] 4f^14 5d^10 6s^2', + '[Xe] 4f^14 5d^10 6s^2 6p^1', '[Xe] 5d^10'] + print('ignore all') + for entry in temp: + print(entry, '\t\t\t\t', ConfigToNumElectrons(entry, ignoreFullD=1, ignoreFullF=1)) + print('ignore d') + for entry in temp: + print(entry, '\t\t\t\t', ConfigToNumElectrons(entry, ignoreFullD=1, ignoreFullF=0)) + print('ignore f') + for entry in temp: + print(entry, '\t\t\t\t', ConfigToNumElectrons(entry, ignoreFullD=0, ignoreFullF=1)) + print('ignore None') + for entry in temp: + print(entry, '\t\t\t\t', ConfigToNumElectrons(entry, ignoreFullD=0, ignoreFullF=0)) diff --git a/rdkit/utils/spiral.py b/rdkit/utils/spiral.py index 732c6cf26..d1fab39f2 100644 --- a/rdkit/utils/spiral.py +++ b/rdkit/utils/spiral.py @@ -1,4 +1,4 @@ -from __future__ import print_function + from numpy import * from rdkit.sping import pid import math