Files
rdkit/Python/DataStructs/VectCollection.py

272 lines
6.0 KiB
Python

# $Id$
#
# Copyright (C) 2005-2006 greg landrum and Rational Discovery LLC
#
# @@ All Rights Reserved @@
#
import DataStructs
import copy,struct,cPickle
class VectCollection(object):
"""
>>> vc = VectCollection()
>>> bv1 = DataStructs.ExplicitBitVect(10)
>>> bv1.SetBitsFromList((1,3,5))
>>> vc.AddVect(1,bv1)
>>> bv1 = DataStructs.ExplicitBitVect(10)
>>> bv1.SetBitsFromList((6,8))
>>> vc.AddVect(2,bv1)
>>> len(vc)
10
>>> vc.GetNumBits()
10
>>> vc[0]
0
>>> vc[1]
1
>>> vc[9]
0
>>> vc[6]
1
>>> vc.GetBit(6)
1
>>> list(vc.GetOnBits())
[1, 3, 5, 6, 8]
keys must be unique, so adding a duplicate replaces the
previous values:
>>> bv1 = DataStructs.ExplicitBitVect(10)
>>> bv1.SetBitsFromList((7,9))
>>> vc.AddVect(1,bv1)
>>> len(vc)
10
>>> vc[1]
0
>>> vc[9]
1
>>> vc[6]
1
we can also query the children:
>>> vc.NumChildren()
2
>>> cs = vc.GetChildren()
>>> id,fp = cs[0]
>>> id
1
>>> list(fp.GetOnBits())
[7, 9]
>>> id,fp = cs[1]
>>> id
2
>>> list(fp.GetOnBits())
[6, 8]
attach/detach operations:
>>> bv1 = DataStructs.ExplicitBitVect(10)
>>> bv1.SetBitsFromList((5,6))
>>> vc.AddVect(3,bv1)
>>> vc.NumChildren()
3
>>> list(vc.GetOnBits())
[5, 6, 7, 8, 9]
>>> vc.DetachVectsNotMatchingBit(6)
>>> vc.NumChildren()
2
>>> list(vc.GetOnBits())
[5, 6, 8]
>>> bv1 = DataStructs.ExplicitBitVect(10)
>>> bv1.SetBitsFromList((7,9))
>>> vc.AddVect(1,bv1)
>>> vc.NumChildren()
3
>>> list(vc.GetOnBits())
[5, 6, 7, 8, 9]
>>> vc.DetachVectsMatchingBit(6)
>>> vc.NumChildren()
1
>>> list(vc.GetOnBits())
[7, 9]
to copy VectCollections, use the copy module:
>>> bv1 = DataStructs.ExplicitBitVect(10)
>>> bv1.SetBitsFromList((5,6))
>>> vc.AddVect(3,bv1)
>>> list(vc.GetOnBits())
[5, 6, 7, 9]
>>> vc2 = copy.copy(vc)
>>> vc.DetachVectsNotMatchingBit(6)
>>> list(vc.GetOnBits())
[5, 6]
>>> list(vc2.GetOnBits())
[5, 6, 7, 9]
The Uniquify() method can be used to remove duplicate vectors:
>>> vc = VectCollection()
>>> bv1 = DataStructs.ExplicitBitVect(10)
>>> bv1.SetBitsFromList((7,9))
>>> vc.AddVect(1,bv1)
>>> vc.AddVect(2,bv1)
>>> bv1 = DataStructs.ExplicitBitVect(10)
>>> bv1.SetBitsFromList((2,3,5))
>>> vc.AddVect(3,bv1)
>>> vc.NumChildren()
3
>>> vc.Uniquify()
>>> vc.NumChildren()
2
"""
def __init__(self):
self.__vects = {}
self.__orVect = None
self.__numBits = -1
self.__needReset=True
def GetOrVect(self):
if self.__needReset:
self.Reset()
return self.__orVect
orVect = property(GetOrVect)
def AddVect(self,id,vect):
self.__vects[id]=vect
self.__needReset=True
def Reset(self):
if not self.__needReset:
return
self.__orVect=None
if not self.__vects:
return
ks = self.__vects.keys()
self.__orVect = copy.copy(self.__vects[ks[0]])
self.__numBits = self.__orVect.GetNumBits()
for i in range(1,len(ks)):
self.__orVect |= self.__vects[ks[i]]
self.__needReset=False
def NumChildren(self):
return len(self.__vects.keys())
def GetChildren(self):
return tuple(self.__vects.iteritems())
def GetBit(self,id):
if self.__needReset:
self.Reset()
return self[id]
def GetNumBits(self):
return len(self)
def GetOnBits(self):
if self.__needReset:
self.Reset()
return self.__orVect.GetOnBits()
def DetachVectsNotMatchingBit(self,bit):
items = list(self.__vects.iteritems())
for k,v in items:
if not v.GetBit(bit):
del(self.__vects[k])
self.__needReset=True
def DetachVectsMatchingBit(self,bit):
items = list(self.__vects.iteritems())
for k,v in items:
if v.GetBit(bit):
del(self.__vects[k])
self.__needReset=True
def Uniquify(self,verbose=False):
obls = {}
for k,v in self.__vects.iteritems():
obls[k] = list(v.GetOnBits())
keys = self.__vects.keys()
nKeys = len(keys)
keep = self.__vects.keys()
for i in range(nKeys):
k1 = keys[i]
if k1 in keep:
obl1 = obls[k1]
idx = keys.index(k1)
for j in range(idx+1,nKeys):
k2 = keys[j]
if k2 in keep:
obl2 = obls[k2]
if obl1==obl2:
keep.remove(k2)
self.__needsReset=True
tmp = {}
for k in keep:
tmp[k] = self.__vects[k]
if verbose: print 'uniquify:',len(self.__vects),'->',len(tmp)
self.__vects=tmp
def __len__(self):
if self.__needReset:
self.Reset()
return self.__numBits
def __getitem__(self,id):
if self.__needReset:
self.Reset()
return self.__orVect.GetBit(id)
#
# set up our support for pickling:
#
def __getstate__(self):
pkl = struct.pack('I',len(self.__vects))
for k,v in self.__vects.iteritems():
pkl += struct.pack('I',k)
p = v.ToBinary()
l = len(p)
pkl += struct.pack('I%ds'%(l),l,p)
return pkl
def __setstate__(self,pkl):
self.__vects = {}
self.__orVect = None
self.__numBits = -1
self.__needReset=True
szI = struct.calcsize('I')
offset = 0
nToRead = struct.unpack('I',pkl[offset:offset+szI])[0]
offset += szI
for i in range(nToRead):
k = struct.unpack('I',pkl[offset:offset+szI])[0]
offset += szI
l = struct.unpack('I',pkl[offset:offset+szI])[0]
offset += szI
sz = struct.calcsize('%ds'%l)
bv = DataStructs.ExplicitBitVect(struct.unpack('%ds'%l,pkl[offset:offset+sz])[0])
offset += sz
self.AddVect(k,bv)
#------------------------------------
#
# doctest boilerplate
#
def _test():
import doctest,sys
return doctest.testmod(sys.modules["__main__"])
if __name__ == '__main__':
import sys
failed,tried = _test()
sys.exit(failed)