# $Id: VectCollection.py 5009 2006-02-22 15:19:49Z glandrum $ # # Copyright (C) 2005-2006 greg landrum and Rational Discovery LLC # # @@ All Rights Reserved @@ # import DataStructs import copy,struct,cPickle class VectCollection(object): """ >>> vc = VectCollection() >>> bv1 = DataStructs.ExplicitBitVect(10) >>> bv1.SetBitsFromList((1,3,5)) >>> vc.AddVect(1,bv1) >>> bv1 = DataStructs.ExplicitBitVect(10) >>> bv1.SetBitsFromList((6,8)) >>> vc.AddVect(2,bv1) >>> len(vc) 10 >>> vc.GetNumBits() 10 >>> vc[0] 0 >>> vc[1] 1 >>> vc[9] 0 >>> vc[6] 1 >>> vc.GetBit(6) 1 >>> list(vc.GetOnBits()) [1, 3, 5, 6, 8] keys must be unique, so adding a duplicate replaces the previous values: >>> bv1 = DataStructs.ExplicitBitVect(10) >>> bv1.SetBitsFromList((7,9)) >>> vc.AddVect(1,bv1) >>> len(vc) 10 >>> vc[1] 0 >>> vc[9] 1 >>> vc[6] 1 we can also query the children: >>> vc.NumChildren() 2 >>> cs = vc.GetChildren() >>> id,fp = cs[0] >>> id 1 >>> list(fp.GetOnBits()) [7, 9] >>> id,fp = cs[1] >>> id 2 >>> list(fp.GetOnBits()) [6, 8] attach/detach operations: >>> bv1 = DataStructs.ExplicitBitVect(10) >>> bv1.SetBitsFromList((5,6)) >>> vc.AddVect(3,bv1) >>> vc.NumChildren() 3 >>> list(vc.GetOnBits()) [5, 6, 7, 8, 9] >>> vc.DetachVectsNotMatchingBit(6) >>> vc.NumChildren() 2 >>> list(vc.GetOnBits()) [5, 6, 8] >>> bv1 = DataStructs.ExplicitBitVect(10) >>> bv1.SetBitsFromList((7,9)) >>> vc.AddVect(1,bv1) >>> vc.NumChildren() 3 >>> list(vc.GetOnBits()) [5, 6, 7, 8, 9] >>> vc.DetachVectsMatchingBit(6) >>> vc.NumChildren() 1 >>> list(vc.GetOnBits()) [7, 9] to copy VectCollections, use the copy module: >>> bv1 = DataStructs.ExplicitBitVect(10) >>> bv1.SetBitsFromList((5,6)) >>> vc.AddVect(3,bv1) >>> list(vc.GetOnBits()) [5, 6, 7, 9] >>> vc2 = copy.copy(vc) >>> vc.DetachVectsNotMatchingBit(6) >>> list(vc.GetOnBits()) [5, 6] >>> list(vc2.GetOnBits()) [5, 6, 7, 9] The Uniquify() method can be used to remove duplicate vectors: >>> vc = VectCollection() >>> bv1 = DataStructs.ExplicitBitVect(10) >>> bv1.SetBitsFromList((7,9)) >>> vc.AddVect(1,bv1) >>> vc.AddVect(2,bv1) >>> bv1 = DataStructs.ExplicitBitVect(10) >>> bv1.SetBitsFromList((2,3,5)) >>> vc.AddVect(3,bv1) >>> vc.NumChildren() 3 >>> vc.Uniquify() >>> vc.NumChildren() 2 """ def __init__(self): self.__vects = {} self.__orVect = None self.__numBits = -1 self.__needReset=True def GetOrVect(self): if self.__needReset: self.Reset() return self.__orVect orVect = property(GetOrVect) def AddVect(self,id,vect): self.__vects[id]=vect self.__needReset=True def Reset(self): if not self.__needReset: return self.__orVect=None if not self.__vects: return ks = self.__vects.keys() self.__orVect = copy.copy(self.__vects[ks[0]]) self.__numBits = self.__orVect.GetNumBits() for i in range(1,len(ks)): self.__orVect |= self.__vects[ks[i]] self.__needReset=False def NumChildren(self): return len(self.__vects.keys()) def GetChildren(self): return tuple(self.__vects.iteritems()) def GetBit(self,id): if self.__needReset: self.Reset() return self[id] def GetNumBits(self): return len(self) def GetOnBits(self): if self.__needReset: self.Reset() return self.__orVect.GetOnBits() def DetachVectsNotMatchingBit(self,bit): items = list(self.__vects.iteritems()) for k,v in items: if not v.GetBit(bit): del(self.__vects[k]) self.__needReset=True def DetachVectsMatchingBit(self,bit): items = list(self.__vects.iteritems()) for k,v in items: if v.GetBit(bit): del(self.__vects[k]) self.__needReset=True def Uniquify(self,verbose=False): obls = {} for k,v in self.__vects.iteritems(): obls[k] = list(v.GetOnBits()) keys = self.__vects.keys() nKeys = len(keys) keep = self.__vects.keys() for i in range(nKeys): k1 = keys[i] if k1 in keep: obl1 = obls[k1] idx = keys.index(k1) for j in range(idx+1,nKeys): k2 = keys[j] if k2 in keep: obl2 = obls[k2] if obl1==obl2: keep.remove(k2) self.__needsReset=True tmp = {} for k in keep: tmp[k] = self.__vects[k] if verbose: print 'uniquify:',len(self.__vects),'->',len(tmp) self.__vects=tmp def __len__(self): if self.__needReset: self.Reset() return self.__numBits def __getitem__(self,id): if self.__needReset: self.Reset() return self.__orVect.GetBit(id) # # set up our support for pickling: # def __getstate__(self): pkl = struct.pack('I',len(self.__vects)) for k,v in self.__vects.iteritems(): pkl += struct.pack('I',k) p = v.ToBinary() l = len(p) pkl += struct.pack('I%ds'%(l),l,p) return pkl def __setstate__(self,pkl): self.__vects = {} self.__orVect = None self.__numBits = -1 self.__needReset=True szI = struct.calcsize('I') offset = 0 nToRead = struct.unpack('I',pkl[offset:offset+szI])[0] offset += szI for i in range(nToRead): k = struct.unpack('I',pkl[offset:offset+szI])[0] offset += szI l = struct.unpack('I',pkl[offset:offset+szI])[0] offset += szI sz = struct.calcsize('%ds'%l) bv = DataStructs.ExplicitBitVect(struct.unpack('%ds'%l,pkl[offset:offset+sz])[0]) offset += sz self.AddVect(k,bv) #------------------------------------ # # doctest boilerplate # def _test(): import doctest,sys return doctest.testmod(sys.modules["__main__"]) if __name__ == '__main__': import sys failed,tried = _test() sys.exit(failed)