mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
337 lines
9.2 KiB
C++
337 lines
9.2 KiB
C++
// $Id$
|
|
//
|
|
// Copyright (C) 2007 Greg Landrum
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
//
|
|
#ifndef __RD_SPARSE_INT_VECT_20070921__
|
|
#define __RD_SPARSE_INT_VECT_20070921__
|
|
|
|
#include <map>
|
|
#include <string>
|
|
#include <RDGeneral/Invariant.h>
|
|
#include <sstream>
|
|
#include <RDBoost/Exceptions.h>
|
|
|
|
const int ci_SPARSEINTVECT_VERSION=0x0001; //!< version number to use in pickles
|
|
namespace RDKit{
|
|
//! a class for efficiently storing sparse vectors of ints
|
|
template <typename IndexType>
|
|
class SparseIntVect {
|
|
typedef std::map<IndexType,int> StorageType;
|
|
public:
|
|
|
|
SparseIntVect() : d_length(0) {};
|
|
|
|
//! initialize with a particular length
|
|
SparseIntVect(IndexType length) : d_length(length) {};
|
|
|
|
//! Copy constructor
|
|
SparseIntVect(const SparseIntVect<IndexType> &other){
|
|
d_length=other.d_length;
|
|
d_data.insert(other.d_data.begin(),other.d_data.end());
|
|
}
|
|
|
|
//! constructor from a pickle
|
|
SparseIntVect(const std::string pkl){
|
|
initFromText(pkl.c_str(),pkl.size());
|
|
};
|
|
//! constructor from a pickle
|
|
SparseIntVect(const char *pkl,const unsigned int len){
|
|
initFromText(pkl,len);
|
|
};
|
|
|
|
//! destructor (doesn't need to do anything)
|
|
~SparseIntVect() {}
|
|
|
|
//! return the value at an index
|
|
int getVal(IndexType idx) const {
|
|
if(idx<0||idx>=d_length){
|
|
throw IndexErrorException(idx);
|
|
}
|
|
int res=0;
|
|
typename StorageType::const_iterator iter=d_data.find(idx);
|
|
if(iter!=d_data.end()){
|
|
res=iter->second;
|
|
}
|
|
return res;
|
|
};
|
|
int operator[] (IndexType idx) const { return getVal(idx); };
|
|
|
|
//! set the value at an index
|
|
void setVal(IndexType idx, int val){
|
|
if(idx<0||idx>=d_length){
|
|
throw IndexErrorException(idx);
|
|
}
|
|
if(val!=0){
|
|
d_data[idx]=val;
|
|
} else {
|
|
d_data.erase(idx);
|
|
}
|
|
};
|
|
//! returns the length
|
|
IndexType getLength() const { return d_length; };
|
|
|
|
//! returns the sum of all the elements in the vect
|
|
int getTotalVal(bool useAbs=false) const {
|
|
int res=0;
|
|
typename StorageType::const_iterator iter;
|
|
for(iter=d_data.begin();iter!=d_data.end();++iter){
|
|
if(useAbs){
|
|
res+=abs(iter->second);
|
|
} else {
|
|
res+=iter->second;
|
|
}
|
|
}
|
|
return res;
|
|
};
|
|
|
|
//! this is a "fuzzy" intesection, the final value
|
|
//! of each element is equal to the minimum from
|
|
//! the two vects.
|
|
SparseIntVect<IndexType> &
|
|
operator&= (const SparseIntVect<IndexType> &other) {
|
|
if(other.d_length!=d_length){
|
|
throw ValueErrorException("SparseIntVect size mismatch");
|
|
}
|
|
|
|
typename StorageType::iterator iter=d_data.begin();
|
|
typename StorageType::const_iterator oIter=other.d_data.begin();
|
|
while(iter!=d_data.end()){
|
|
// we're relying on the fact that the maps are sorted:
|
|
while(oIter!=other.d_data.end() && oIter->first < iter->first){
|
|
++oIter;
|
|
}
|
|
if(oIter!=d_data.end() && oIter->first==iter->first){
|
|
// found it:
|
|
if(oIter->second<iter->second){
|
|
iter->second=oIter->second;
|
|
}
|
|
++oIter;
|
|
++iter;
|
|
} else {
|
|
// not there; our value is zero, which means
|
|
// we should remove this value:
|
|
typename StorageType::iterator tmpIter=iter;
|
|
++tmpIter;
|
|
d_data.erase(iter);
|
|
iter=tmpIter;
|
|
}
|
|
}
|
|
return *this;
|
|
};
|
|
const SparseIntVect<IndexType>
|
|
operator& (const SparseIntVect<IndexType> &other) const {
|
|
SparseIntVect<IndexType> res(*this);
|
|
return res&=other;
|
|
}
|
|
|
|
//! this is a "fuzzy" union, the final value
|
|
//! of each element is equal to the maximum from
|
|
//! the two vects.
|
|
SparseIntVect<IndexType> &
|
|
operator|= (const SparseIntVect<IndexType> &other) {
|
|
if(other.d_length!=d_length){
|
|
throw ValueErrorException("SparseIntVect size mismatch");
|
|
}
|
|
|
|
typename StorageType::iterator iter=d_data.begin();
|
|
typename StorageType::const_iterator oIter=other.d_data.begin();
|
|
while(iter!=d_data.end()){
|
|
// we're relying on the fact that the maps are sorted:
|
|
while(oIter!=other.d_data.end() &&
|
|
oIter->first < iter->first){
|
|
d_data[oIter->first]=oIter->second;
|
|
++oIter;
|
|
}
|
|
if(oIter!=other.d_data.end() && oIter->first==iter->first){
|
|
// found it:
|
|
if(oIter->second>iter->second){
|
|
iter->second=oIter->second;
|
|
}
|
|
++oIter;
|
|
}
|
|
++iter;
|
|
}
|
|
// finish up the other vect:
|
|
while(oIter!=other.d_data.end()){
|
|
d_data[oIter->first]=oIter->second;
|
|
++oIter;
|
|
}
|
|
return *this;
|
|
};
|
|
const SparseIntVect<IndexType>
|
|
operator| (const SparseIntVect<IndexType> &other) const {
|
|
SparseIntVect<IndexType> res(*this);
|
|
return res|=other;
|
|
}
|
|
|
|
SparseIntVect<IndexType> &
|
|
operator+= (const SparseIntVect<IndexType> &other) {
|
|
if(other.d_length!=d_length){
|
|
throw ValueErrorException("SparseIntVect size mismatch");
|
|
}
|
|
typename StorageType::iterator iter=d_data.begin();
|
|
typename StorageType::const_iterator oIter=other.d_data.begin();
|
|
while(oIter!=other.d_data.end()){
|
|
while(iter!=d_data.end() &&
|
|
iter->first < oIter->first){
|
|
++iter;
|
|
}
|
|
if(iter!=d_data.end() && oIter->first==iter->first){
|
|
// found it:
|
|
iter->second+=oIter->second;
|
|
if(!iter->second){
|
|
typename StorageType::iterator tIter=iter;
|
|
++tIter;
|
|
d_data.erase(iter);
|
|
iter=tIter;
|
|
} else {
|
|
++iter;
|
|
}
|
|
} else {
|
|
d_data[oIter->first]=oIter->second;
|
|
}
|
|
++oIter;
|
|
}
|
|
return *this;
|
|
};
|
|
const SparseIntVect<IndexType>
|
|
operator+ (const SparseIntVect<IndexType> &other) const {
|
|
SparseIntVect<IndexType> res(*this);
|
|
return res+=other;
|
|
}
|
|
|
|
SparseIntVect<IndexType> &
|
|
operator-= (const SparseIntVect<IndexType> &other) {
|
|
if(other.d_length!=d_length){
|
|
throw ValueErrorException("SparseIntVect size mismatch");
|
|
}
|
|
typename StorageType::iterator iter=d_data.begin();
|
|
typename StorageType::const_iterator oIter=other.d_data.begin();
|
|
while(oIter!=other.d_data.end()){
|
|
while(iter!=d_data.end() &&
|
|
iter->first < oIter->first){
|
|
++iter;
|
|
}
|
|
if(iter!=d_data.end() && oIter->first==iter->first){
|
|
// found it:
|
|
iter->second-=oIter->second;
|
|
if(!iter->second){
|
|
typename StorageType::iterator tIter=iter;
|
|
++tIter;
|
|
d_data.erase(iter);
|
|
iter=tIter;
|
|
} else {
|
|
++iter;
|
|
}
|
|
} else {
|
|
d_data[oIter->first]=-oIter->second;
|
|
}
|
|
++oIter;
|
|
}
|
|
return *this;
|
|
};
|
|
const SparseIntVect<IndexType>
|
|
operator- (const SparseIntVect<IndexType> &other) const {
|
|
SparseIntVect<IndexType> res(*this);
|
|
return res-=other;
|
|
}
|
|
|
|
bool operator==(const SparseIntVect<IndexType> &v2){
|
|
if(d_length!=v2.d_length){
|
|
return false;
|
|
}
|
|
return d_data==v2.d_data;
|
|
}
|
|
bool operator!=(const SparseIntVect<IndexType> &v2){
|
|
return !(*this==v2);
|
|
}
|
|
|
|
//! returns a binary string representation (pickle)
|
|
const std::string toString() const {
|
|
std::stringstream ss(std::ios_base::binary|std::ios_base::out|std::ios_base::in);
|
|
ss.write((const char *)&(ci_SPARSEINTVECT_VERSION),sizeof(ci_SPARSEINTVECT_VERSION));
|
|
unsigned int pieceSize=sizeof(IndexType);
|
|
ss.write((const char *)&pieceSize,sizeof(pieceSize));
|
|
ss.write((const char *)&d_length,sizeof(d_length));
|
|
IndexType nEntries=d_data.size();
|
|
ss.write((const char *)&nEntries,sizeof(nEntries));
|
|
|
|
typename StorageType::const_iterator iter=d_data.begin();
|
|
while(iter!=d_data.end()){
|
|
ss.write((const char *)&iter->first,sizeof(iter->first));
|
|
ss.write((const char *)&iter->second,sizeof(iter->second));
|
|
++iter;
|
|
}
|
|
return ss.str();
|
|
};
|
|
|
|
void fromString(std::string &txt) {
|
|
initFromText(txt.c_str(),txt.length());
|
|
}
|
|
|
|
|
|
private:
|
|
IndexType d_length;
|
|
StorageType d_data;
|
|
void initFromText(const char *pkl,const unsigned int len) {
|
|
d_data.clear();
|
|
std::stringstream ss(std::ios_base::binary|std::ios_base::out|std::ios_base::in);
|
|
ss.write(pkl,len);
|
|
|
|
int vers;
|
|
ss.read((char *)&vers,sizeof(vers));
|
|
if(vers==0x0001){
|
|
unsigned int idxSize;
|
|
ss.read((char *)&idxSize,sizeof(idxSize));
|
|
if(idxSize>sizeof(IndexType)){
|
|
throw ValueErrorException("IndexType cannot accomodate index size in SparseIntVect pickle");
|
|
}
|
|
switch(idxSize){
|
|
case 1:
|
|
readVals<unsigned char>(ss);break;
|
|
case 4:
|
|
readVals<unsigned int>(ss);break;
|
|
case 8:
|
|
readVals<unsigned long long>(ss);break;
|
|
default:
|
|
throw ValueErrorException("unreadable format");
|
|
}
|
|
} else {
|
|
throw ValueErrorException("bad version in SparseIntVect pickle");
|
|
}
|
|
};
|
|
template <typename T>
|
|
void readVals(std::stringstream &ss){
|
|
PRECONDITION(sizeof(T)<=sizeof(IndexType),"invalid size");
|
|
T tVal;
|
|
ss.read((char *)&tVal,sizeof(T));
|
|
d_length=tVal;
|
|
T nEntries;
|
|
ss.read((char *)&nEntries,sizeof(T));
|
|
for(T i=0;i<nEntries;++i){
|
|
ss.read((char *)&tVal,sizeof(tVal));
|
|
int val;
|
|
ss.read((char *)&val,sizeof(val));
|
|
d_data[tVal]=val;
|
|
}
|
|
}
|
|
};
|
|
|
|
template <typename IndexType, typename SequenceType>
|
|
void updateFromSequence(SparseIntVect<IndexType> &vect,
|
|
const SequenceType &seq){
|
|
typename SequenceType::const_iterator seqIt;
|
|
for(seqIt=seq.begin();seqIt!=seq.end();++seqIt){
|
|
// EFF: probably not the most efficient approach
|
|
IndexType idx=*seqIt;
|
|
vect.setVal(idx,vect.getVal(idx)+1);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
#endif
|