mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
add SparseIntVect in both c++ and Python
This still needs windows testing.
This commit is contained in:
336
Code/DataStructs/SparseIntVect.h
Normal file
336
Code/DataStructs/SparseIntVect.h
Normal file
@@ -0,0 +1,336 @@
|
||||
// $Id$
|
||||
//
|
||||
// Copyright (C) 2007 Greg Landrum
|
||||
//
|
||||
// @@ All Rights Reserved @@
|
||||
//
|
||||
#ifndef __RD_SPARSE_INT_VECT_20070921__
|
||||
#define __RD_SPARSE_INT_VECT_20070921__
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <RDGeneral/Invariant.h>
|
||||
#include <sstream>
|
||||
#include <RDBoost/Exceptions.h>
|
||||
|
||||
const int ci_SPARSEINTVECT_VERSION=0x0001; //!< version number to use in pickles
|
||||
namespace RDKit{
|
||||
//! a class for efficiently storing sparse vectors of ints
|
||||
template <typename IndexType>
|
||||
class SparseIntVect {
|
||||
typedef std::map<IndexType,int> StorageType;
|
||||
public:
|
||||
|
||||
SparseIntVect() : d_length(0) {};
|
||||
|
||||
//! initialize with a particular length
|
||||
SparseIntVect(IndexType length) : d_length(length) {};
|
||||
|
||||
//! Copy constructor
|
||||
SparseIntVect(const SparseIntVect<IndexType> &other){
|
||||
d_length=other.d_length;
|
||||
d_data.insert(other.d_data.begin(),other.d_data.end());
|
||||
}
|
||||
|
||||
//! constructor from a pickle
|
||||
SparseIntVect(const std::string pkl){
|
||||
initFromText(pkl.c_str(),pkl.size());
|
||||
};
|
||||
//! constructor from a pickle
|
||||
SparseIntVect(const char *pkl,const unsigned int len){
|
||||
initFromText(pkl,len);
|
||||
};
|
||||
|
||||
//! destructor (doesn't need to do anything)
|
||||
~SparseIntVect() {}
|
||||
|
||||
//! return the value at an index
|
||||
int getVal(IndexType idx) const {
|
||||
if(idx<0||idx>=d_length){
|
||||
throw IndexErrorException(idx);
|
||||
}
|
||||
int res=0;
|
||||
typename StorageType::const_iterator iter=d_data.find(idx);
|
||||
if(iter!=d_data.end()){
|
||||
res=iter->second;
|
||||
}
|
||||
return res;
|
||||
};
|
||||
int operator[] (IndexType idx) const { return getVal(idx); };
|
||||
|
||||
//! set the value at an index
|
||||
void setVal(IndexType idx, int val){
|
||||
if(idx<0||idx>=d_length){
|
||||
throw IndexErrorException(idx);
|
||||
}
|
||||
if(val!=0){
|
||||
d_data[idx]=val;
|
||||
} else {
|
||||
d_data.erase(idx);
|
||||
}
|
||||
};
|
||||
//! returns the length
|
||||
IndexType getLength() const { return d_length; };
|
||||
|
||||
//! returns the sum of all the elements in the vect
|
||||
int getTotalVal(bool useAbs=false) const {
|
||||
int res=0;
|
||||
typename StorageType::const_iterator iter;
|
||||
for(iter=d_data.begin();iter!=d_data.end();++iter){
|
||||
if(useAbs){
|
||||
res+=abs(iter->second);
|
||||
} else {
|
||||
res+=iter->second;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
};
|
||||
|
||||
//! this is a "fuzzy" intesection, the final value
|
||||
//! of each element is equal to the minimum from
|
||||
//! the two vects.
|
||||
SparseIntVect<IndexType> &
|
||||
operator&= (const SparseIntVect<IndexType> &other) {
|
||||
if(other.d_length!=d_length){
|
||||
throw ValueErrorException("SparseIntVect size mismatch");
|
||||
}
|
||||
|
||||
typename StorageType::iterator iter=d_data.begin();
|
||||
typename StorageType::const_iterator oIter=other.d_data.begin();
|
||||
while(iter!=d_data.end()){
|
||||
// we're relying on the fact that the maps are sorted:
|
||||
while(oIter!=other.d_data.end() && oIter->first < iter->first){
|
||||
++oIter;
|
||||
}
|
||||
if(oIter!=d_data.end() && oIter->first==iter->first){
|
||||
// found it:
|
||||
if(oIter->second<iter->second){
|
||||
iter->second=oIter->second;
|
||||
}
|
||||
++oIter;
|
||||
++iter;
|
||||
} else {
|
||||
// not there; our value is zero, which means
|
||||
// we should remove this value:
|
||||
typename StorageType::iterator tmpIter=iter;
|
||||
++tmpIter;
|
||||
d_data.erase(iter);
|
||||
iter=tmpIter;
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
};
|
||||
const SparseIntVect<IndexType>
|
||||
operator& (const SparseIntVect<IndexType> &other) const {
|
||||
SparseIntVect<IndexType> res(*this);
|
||||
return res&=other;
|
||||
}
|
||||
|
||||
//! this is a "fuzzy" union, the final value
|
||||
//! of each element is equal to the maximum from
|
||||
//! the two vects.
|
||||
SparseIntVect<IndexType> &
|
||||
operator|= (const SparseIntVect<IndexType> &other) {
|
||||
if(other.d_length!=d_length){
|
||||
throw ValueErrorException("SparseIntVect size mismatch");
|
||||
}
|
||||
|
||||
typename StorageType::iterator iter=d_data.begin();
|
||||
typename StorageType::const_iterator oIter=other.d_data.begin();
|
||||
while(iter!=d_data.end()){
|
||||
// we're relying on the fact that the maps are sorted:
|
||||
while(oIter!=other.d_data.end() &&
|
||||
oIter->first < iter->first){
|
||||
d_data[oIter->first]=oIter->second;
|
||||
++oIter;
|
||||
}
|
||||
if(oIter!=other.d_data.end() && oIter->first==iter->first){
|
||||
// found it:
|
||||
if(oIter->second>iter->second){
|
||||
iter->second=oIter->second;
|
||||
}
|
||||
++oIter;
|
||||
}
|
||||
++iter;
|
||||
}
|
||||
// finish up the other vect:
|
||||
while(oIter!=other.d_data.end()){
|
||||
d_data[oIter->first]=oIter->second;
|
||||
++oIter;
|
||||
}
|
||||
return *this;
|
||||
};
|
||||
const SparseIntVect<IndexType>
|
||||
operator| (const SparseIntVect<IndexType> &other) const {
|
||||
SparseIntVect<IndexType> res(*this);
|
||||
return res|=other;
|
||||
}
|
||||
|
||||
SparseIntVect<IndexType> &
|
||||
operator+= (const SparseIntVect<IndexType> &other) {
|
||||
if(other.d_length!=d_length){
|
||||
throw ValueErrorException("SparseIntVect size mismatch");
|
||||
}
|
||||
typename StorageType::iterator iter=d_data.begin();
|
||||
typename StorageType::const_iterator oIter=other.d_data.begin();
|
||||
while(oIter!=other.d_data.end()){
|
||||
while(iter!=d_data.end() &&
|
||||
iter->first < oIter->first){
|
||||
++iter;
|
||||
}
|
||||
if(iter!=d_data.end() && oIter->first==iter->first){
|
||||
// found it:
|
||||
iter->second+=oIter->second;
|
||||
if(!iter->second){
|
||||
typename StorageType::iterator tIter=iter;
|
||||
++tIter;
|
||||
d_data.erase(iter);
|
||||
iter=tIter;
|
||||
} else {
|
||||
++iter;
|
||||
}
|
||||
} else {
|
||||
d_data[oIter->first]=oIter->second;
|
||||
}
|
||||
++oIter;
|
||||
}
|
||||
return *this;
|
||||
};
|
||||
const SparseIntVect<IndexType>
|
||||
operator+ (const SparseIntVect<IndexType> &other) const {
|
||||
SparseIntVect<IndexType> res(*this);
|
||||
return res+=other;
|
||||
}
|
||||
|
||||
SparseIntVect<IndexType> &
|
||||
operator-= (const SparseIntVect<IndexType> &other) {
|
||||
if(other.d_length!=d_length){
|
||||
throw ValueErrorException("SparseIntVect size mismatch");
|
||||
}
|
||||
typename StorageType::iterator iter=d_data.begin();
|
||||
typename StorageType::const_iterator oIter=other.d_data.begin();
|
||||
while(oIter!=other.d_data.end()){
|
||||
while(iter!=d_data.end() &&
|
||||
iter->first < oIter->first){
|
||||
++iter;
|
||||
}
|
||||
if(iter!=d_data.end() && oIter->first==iter->first){
|
||||
// found it:
|
||||
iter->second-=oIter->second;
|
||||
if(!iter->second){
|
||||
typename StorageType::iterator tIter=iter;
|
||||
++tIter;
|
||||
d_data.erase(iter);
|
||||
iter=tIter;
|
||||
} else {
|
||||
++iter;
|
||||
}
|
||||
} else {
|
||||
d_data[oIter->first]=-oIter->second;
|
||||
}
|
||||
++oIter;
|
||||
}
|
||||
return *this;
|
||||
};
|
||||
const SparseIntVect<IndexType>
|
||||
operator- (const SparseIntVect<IndexType> &other) const {
|
||||
SparseIntVect<IndexType> res(*this);
|
||||
return res-=other;
|
||||
}
|
||||
|
||||
bool operator==(const SparseIntVect<IndexType> &v2){
|
||||
if(d_length!=v2.d_length){
|
||||
return false;
|
||||
}
|
||||
return d_data==v2.d_data;
|
||||
}
|
||||
bool operator!=(const SparseIntVect<IndexType> &v2){
|
||||
return !(*this==v2);
|
||||
}
|
||||
|
||||
//! returns a binary string representation (pickle)
|
||||
const std::string toString() const {
|
||||
std::stringstream ss(std::ios_base::binary|std::ios_base::out|std::ios_base::in);
|
||||
ss.write((const char *)&(ci_SPARSEINTVECT_VERSION),sizeof(ci_SPARSEINTVECT_VERSION));
|
||||
unsigned int pieceSize=sizeof(IndexType);
|
||||
ss.write((const char *)&pieceSize,sizeof(pieceSize));
|
||||
ss.write((const char *)&d_length,sizeof(d_length));
|
||||
IndexType nEntries=d_data.size();
|
||||
ss.write((const char *)&nEntries,sizeof(nEntries));
|
||||
|
||||
typename StorageType::const_iterator iter=d_data.begin();
|
||||
while(iter!=d_data.end()){
|
||||
ss.write((const char *)&iter->first,sizeof(iter->first));
|
||||
ss.write((const char *)&iter->second,sizeof(iter->second));
|
||||
++iter;
|
||||
}
|
||||
return ss.str();
|
||||
};
|
||||
|
||||
void fromString(std::string &txt) {
|
||||
initFromText(txt.c_str(),txt.length());
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
IndexType d_length;
|
||||
StorageType d_data;
|
||||
void initFromText(const char *pkl,const unsigned int len) {
|
||||
d_data.clear();
|
||||
std::stringstream ss(std::ios_base::binary|std::ios_base::out|std::ios_base::in);
|
||||
ss.write(pkl,len);
|
||||
|
||||
int vers;
|
||||
ss.read((char *)&vers,sizeof(vers));
|
||||
if(vers==0x0001){
|
||||
unsigned int idxSize;
|
||||
ss.read((char *)&idxSize,sizeof(idxSize));
|
||||
if(idxSize>sizeof(IndexType)){
|
||||
throw ValueErrorException("IndexType cannot accomodate index size in SparseIntVect pickle");
|
||||
}
|
||||
switch(idxSize){
|
||||
case 1:
|
||||
readVals<unsigned char>(ss);break;
|
||||
case 4:
|
||||
readVals<unsigned int>(ss);break;
|
||||
case 8:
|
||||
readVals<unsigned long long>(ss);break;
|
||||
default:
|
||||
throw ValueErrorException("unreadable format");
|
||||
}
|
||||
} else {
|
||||
throw ValueErrorException("bad version in SparseIntVect pickle");
|
||||
}
|
||||
};
|
||||
template <typename T>
|
||||
void readVals(std::stringstream &ss){
|
||||
PRECONDITION(sizeof(T)<=sizeof(IndexType),"invalid size");
|
||||
T tVal;
|
||||
ss.read((char *)&tVal,sizeof(T));
|
||||
d_length=tVal;
|
||||
T nEntries;
|
||||
ss.read((char *)&nEntries,sizeof(T));
|
||||
for(T i=0;i<nEntries;++i){
|
||||
ss.read((char *)&tVal,sizeof(tVal));
|
||||
int val;
|
||||
ss.read((char *)&val,sizeof(val));
|
||||
d_data[tVal]=val;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename IndexType, typename SequenceType>
|
||||
void updateFromSequence(SparseIntVect<IndexType> &vect,
|
||||
const SequenceType &seq){
|
||||
typename SequenceType::const_iterator seqIt;
|
||||
for(seqIt=seq.begin();seqIt!=seq.end();++seqIt){
|
||||
// EFF: probably not the most efficient approach
|
||||
IndexType idx=*seqIt;
|
||||
vect.setVal(idx,vect.getVal(idx)+1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user