Files
rdkit/Code/DataStructs/SparseIntVect.h
Greg Landrum 1fa8cd321b add SparseIntVect in both c++ and Python
This still needs windows testing.
2007-09-22 18:49:37 +00:00

337 lines
9.2 KiB
C++

// $Id$
//
// Copyright (C) 2007 Greg Landrum
//
// @@ All Rights Reserved @@
//
#ifndef __RD_SPARSE_INT_VECT_20070921__
#define __RD_SPARSE_INT_VECT_20070921__
#include <map>
#include <string>
#include <RDGeneral/Invariant.h>
#include <sstream>
#include <RDBoost/Exceptions.h>
const int ci_SPARSEINTVECT_VERSION=0x0001; //!< version number to use in pickles
namespace RDKit{
//! a class for efficiently storing sparse vectors of ints
template <typename IndexType>
class SparseIntVect {
typedef std::map<IndexType,int> StorageType;
public:
SparseIntVect() : d_length(0) {};
//! initialize with a particular length
SparseIntVect(IndexType length) : d_length(length) {};
//! Copy constructor
SparseIntVect(const SparseIntVect<IndexType> &other){
d_length=other.d_length;
d_data.insert(other.d_data.begin(),other.d_data.end());
}
//! constructor from a pickle
SparseIntVect(const std::string pkl){
initFromText(pkl.c_str(),pkl.size());
};
//! constructor from a pickle
SparseIntVect(const char *pkl,const unsigned int len){
initFromText(pkl,len);
};
//! destructor (doesn't need to do anything)
~SparseIntVect() {}
//! return the value at an index
int getVal(IndexType idx) const {
if(idx<0||idx>=d_length){
throw IndexErrorException(idx);
}
int res=0;
typename StorageType::const_iterator iter=d_data.find(idx);
if(iter!=d_data.end()){
res=iter->second;
}
return res;
};
int operator[] (IndexType idx) const { return getVal(idx); };
//! set the value at an index
void setVal(IndexType idx, int val){
if(idx<0||idx>=d_length){
throw IndexErrorException(idx);
}
if(val!=0){
d_data[idx]=val;
} else {
d_data.erase(idx);
}
};
//! returns the length
IndexType getLength() const { return d_length; };
//! returns the sum of all the elements in the vect
int getTotalVal(bool useAbs=false) const {
int res=0;
typename StorageType::const_iterator iter;
for(iter=d_data.begin();iter!=d_data.end();++iter){
if(useAbs){
res+=abs(iter->second);
} else {
res+=iter->second;
}
}
return res;
};
//! this is a "fuzzy" intesection, the final value
//! of each element is equal to the minimum from
//! the two vects.
SparseIntVect<IndexType> &
operator&= (const SparseIntVect<IndexType> &other) {
if(other.d_length!=d_length){
throw ValueErrorException("SparseIntVect size mismatch");
}
typename StorageType::iterator iter=d_data.begin();
typename StorageType::const_iterator oIter=other.d_data.begin();
while(iter!=d_data.end()){
// we're relying on the fact that the maps are sorted:
while(oIter!=other.d_data.end() && oIter->first < iter->first){
++oIter;
}
if(oIter!=d_data.end() && oIter->first==iter->first){
// found it:
if(oIter->second<iter->second){
iter->second=oIter->second;
}
++oIter;
++iter;
} else {
// not there; our value is zero, which means
// we should remove this value:
typename StorageType::iterator tmpIter=iter;
++tmpIter;
d_data.erase(iter);
iter=tmpIter;
}
}
return *this;
};
const SparseIntVect<IndexType>
operator& (const SparseIntVect<IndexType> &other) const {
SparseIntVect<IndexType> res(*this);
return res&=other;
}
//! this is a "fuzzy" union, the final value
//! of each element is equal to the maximum from
//! the two vects.
SparseIntVect<IndexType> &
operator|= (const SparseIntVect<IndexType> &other) {
if(other.d_length!=d_length){
throw ValueErrorException("SparseIntVect size mismatch");
}
typename StorageType::iterator iter=d_data.begin();
typename StorageType::const_iterator oIter=other.d_data.begin();
while(iter!=d_data.end()){
// we're relying on the fact that the maps are sorted:
while(oIter!=other.d_data.end() &&
oIter->first < iter->first){
d_data[oIter->first]=oIter->second;
++oIter;
}
if(oIter!=other.d_data.end() && oIter->first==iter->first){
// found it:
if(oIter->second>iter->second){
iter->second=oIter->second;
}
++oIter;
}
++iter;
}
// finish up the other vect:
while(oIter!=other.d_data.end()){
d_data[oIter->first]=oIter->second;
++oIter;
}
return *this;
};
const SparseIntVect<IndexType>
operator| (const SparseIntVect<IndexType> &other) const {
SparseIntVect<IndexType> res(*this);
return res|=other;
}
SparseIntVect<IndexType> &
operator+= (const SparseIntVect<IndexType> &other) {
if(other.d_length!=d_length){
throw ValueErrorException("SparseIntVect size mismatch");
}
typename StorageType::iterator iter=d_data.begin();
typename StorageType::const_iterator oIter=other.d_data.begin();
while(oIter!=other.d_data.end()){
while(iter!=d_data.end() &&
iter->first < oIter->first){
++iter;
}
if(iter!=d_data.end() && oIter->first==iter->first){
// found it:
iter->second+=oIter->second;
if(!iter->second){
typename StorageType::iterator tIter=iter;
++tIter;
d_data.erase(iter);
iter=tIter;
} else {
++iter;
}
} else {
d_data[oIter->first]=oIter->second;
}
++oIter;
}
return *this;
};
const SparseIntVect<IndexType>
operator+ (const SparseIntVect<IndexType> &other) const {
SparseIntVect<IndexType> res(*this);
return res+=other;
}
SparseIntVect<IndexType> &
operator-= (const SparseIntVect<IndexType> &other) {
if(other.d_length!=d_length){
throw ValueErrorException("SparseIntVect size mismatch");
}
typename StorageType::iterator iter=d_data.begin();
typename StorageType::const_iterator oIter=other.d_data.begin();
while(oIter!=other.d_data.end()){
while(iter!=d_data.end() &&
iter->first < oIter->first){
++iter;
}
if(iter!=d_data.end() && oIter->first==iter->first){
// found it:
iter->second-=oIter->second;
if(!iter->second){
typename StorageType::iterator tIter=iter;
++tIter;
d_data.erase(iter);
iter=tIter;
} else {
++iter;
}
} else {
d_data[oIter->first]=-oIter->second;
}
++oIter;
}
return *this;
};
const SparseIntVect<IndexType>
operator- (const SparseIntVect<IndexType> &other) const {
SparseIntVect<IndexType> res(*this);
return res-=other;
}
bool operator==(const SparseIntVect<IndexType> &v2){
if(d_length!=v2.d_length){
return false;
}
return d_data==v2.d_data;
}
bool operator!=(const SparseIntVect<IndexType> &v2){
return !(*this==v2);
}
//! returns a binary string representation (pickle)
const std::string toString() const {
std::stringstream ss(std::ios_base::binary|std::ios_base::out|std::ios_base::in);
ss.write((const char *)&(ci_SPARSEINTVECT_VERSION),sizeof(ci_SPARSEINTVECT_VERSION));
unsigned int pieceSize=sizeof(IndexType);
ss.write((const char *)&pieceSize,sizeof(pieceSize));
ss.write((const char *)&d_length,sizeof(d_length));
IndexType nEntries=d_data.size();
ss.write((const char *)&nEntries,sizeof(nEntries));
typename StorageType::const_iterator iter=d_data.begin();
while(iter!=d_data.end()){
ss.write((const char *)&iter->first,sizeof(iter->first));
ss.write((const char *)&iter->second,sizeof(iter->second));
++iter;
}
return ss.str();
};
void fromString(std::string &txt) {
initFromText(txt.c_str(),txt.length());
}
private:
IndexType d_length;
StorageType d_data;
void initFromText(const char *pkl,const unsigned int len) {
d_data.clear();
std::stringstream ss(std::ios_base::binary|std::ios_base::out|std::ios_base::in);
ss.write(pkl,len);
int vers;
ss.read((char *)&vers,sizeof(vers));
if(vers==0x0001){
unsigned int idxSize;
ss.read((char *)&idxSize,sizeof(idxSize));
if(idxSize>sizeof(IndexType)){
throw ValueErrorException("IndexType cannot accomodate index size in SparseIntVect pickle");
}
switch(idxSize){
case 1:
readVals<unsigned char>(ss);break;
case 4:
readVals<unsigned int>(ss);break;
case 8:
readVals<unsigned long long>(ss);break;
default:
throw ValueErrorException("unreadable format");
}
} else {
throw ValueErrorException("bad version in SparseIntVect pickle");
}
};
template <typename T>
void readVals(std::stringstream &ss){
PRECONDITION(sizeof(T)<=sizeof(IndexType),"invalid size");
T tVal;
ss.read((char *)&tVal,sizeof(T));
d_length=tVal;
T nEntries;
ss.read((char *)&nEntries,sizeof(T));
for(T i=0;i<nEntries;++i){
ss.read((char *)&tVal,sizeof(tVal));
int val;
ss.read((char *)&val,sizeof(val));
d_data[tVal]=val;
}
}
};
template <typename IndexType, typename SequenceType>
void updateFromSequence(SparseIntVect<IndexType> &vect,
const SequenceType &seq){
typename SequenceType::const_iterator seqIt;
for(seqIt=seq.begin();seqIt!=seq.end();++seqIt){
// EFF: probably not the most efficient approach
IndexType idx=*seqIt;
vect.setVal(idx,vect.getVal(idx)+1);
}
}
}
#endif