Files
rdkit/Code/DataStructs/DiscreteValueVect.cpp
2015-03-16 22:31:48 +01:00

294 lines
8.2 KiB
C++

// $Id$
//
// Copyright (C) 2004-2012 Greg Landrum and Rational Discovery LLC
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include "DiscreteValueVect.h"
#include <RDGeneral/Invariant.h>
#include <RDGeneral/StreamOps.h>
#include "DatastructsException.h"
#include "DiscreteDistMat.h"
#include <RDGeneral/Exceptions.h>
#include <boost/cstdint.hpp>
namespace RDKit {
const int ci_DISCRETEVALUEVECTPICKLE_VERSION=0x1;
DiscreteValueVect::DiscreteValueVect(const DiscreteValueVect &other) {
d_type = other.getValueType();
d_bitsPerVal = other.getNumBitsPerVal();
d_numInts = other.getNumInts();
d_length = other.getLength();
d_valsPerInt = other.d_valsPerInt;
d_mask = other.d_mask;
const boost::uint32_t *odata = other.getData();
boost::uint32_t *data = new boost::uint32_t[d_numInts];
memcpy(static_cast<void *>(data), static_cast<const void *>(odata),
d_numInts*sizeof(boost::uint32_t));
d_data.reset(data);
}
unsigned int DiscreteValueVect::getVal(unsigned int i) const {
if(i >= d_length){
throw IndexErrorException(i);
}
unsigned int shift = d_bitsPerVal*(i%d_valsPerInt);
unsigned int intId = i/d_valsPerInt;
return ( (d_data[intId] >> shift) & d_mask);
}
void DiscreteValueVect::setVal(unsigned int i, unsigned int val) {
if(i >= d_length){
throw IndexErrorException(i);
}
if ((val & d_mask) != val) {
throw ValueErrorException("Value out of range");
}
unsigned int shift = d_bitsPerVal*(i%d_valsPerInt);
unsigned int intId = i/d_valsPerInt;
unsigned int mask = ((1<<d_bitsPerVal) -1) << shift;
mask = ~mask;
d_data[intId] = (d_data[intId]&mask)|(val << shift);
}
unsigned int DiscreteValueVect::getTotalVal() const {
unsigned int i, j, res = 0;
for (i = 0; i < d_numInts; ++i) {
for (j = 0; j < d_valsPerInt; ++j) {
res += ((d_data[i] >> (j*d_bitsPerVal)) & d_mask);
}
}
return res;
}
unsigned int DiscreteValueVect::getLength() const {
return d_length;
}
const boost::uint32_t *DiscreteValueVect::getData() const {
return d_data.get();
}
unsigned int computeL1Norm(const DiscreteValueVect &v1, const DiscreteValueVect &v2) {
if (v1.getLength() != v2.getLength()) {
throw ValueErrorException("Comparing vectors of different lengths");
}
DiscreteValueVect::DiscreteValueType valType = v1.getValueType();
if (valType != v2.getValueType()) {
throw ValueErrorException("Comparing vector of different value types");
}
const boost::uint32_t* data1 = v1.getData();
const boost::uint32_t* data2 = v2.getData();
unsigned int res = 0;
if (valType <= DiscreteValueVect::EIGHTBITVALUE) {
DiscreteDistMat *dmat = getDiscreteDistMat();
unsigned char *cd1 = (unsigned char *)(data1);
unsigned char *cd2 = (unsigned char *)(data2);
const unsigned char *cend = cd1 + (v1.getNumInts()*4);
while (cd1 != cend) {
if (*cd1 == *cd2) {
cd1++;
cd2++;
continue;
}
res += dmat->getDist(*cd1, *cd2, valType);
cd1++;
cd2++;
}
} else {
// we have a sixteen bits per value type
// REVIEW: we are making an assumption here that a short
// is 16 bit - may fail on a different compiler
const unsigned short int *sd1 = (unsigned short int *)(data1);
const unsigned short int *sd2 = (unsigned short int *)(data2);
const unsigned short int *send = sd1 + (v1.getNumInts()*2);
while (sd1 != send) {
if (*sd1 == *sd2) {
sd1++;
sd2++;
continue;
}
res += abs((*sd1) - (*sd2));
sd1++;
sd2++;
}
}
return res;
}
std::string DiscreteValueVect::toString() const {
std::stringstream ss(std::ios_base::binary|std::ios_base::out|std::ios_base::in);
boost::int32_t tVers=ci_DISCRETEVALUEVECTPICKLE_VERSION*-1;
streamWrite(ss,tVers);
boost::uint32_t tInt;
tInt=d_type;
streamWrite(ss,tInt);
tInt=d_bitsPerVal;
streamWrite(ss,tInt);
tInt=d_mask;
streamWrite(ss,tInt);
tInt=d_length;
streamWrite(ss,tInt);
tInt=d_numInts;
streamWrite(ss,tInt);
#if defined(BOOST_BIG_ENDIAN)
boost::uint32_t *td = new boost::uint32_t[d_numInts];
for(unsigned int i=0;i<d_numInts;++i) td[i]=EndianSwapBytes<HOST_ENDIAN_ORDER,LITTLE_ENDIAN_ORDER>(d_data.get()[i]);
ss.write((const char *)td,d_numInts*sizeof(tInt));
delete [] td;
#else
ss.write((const char *)d_data.get(),d_numInts*sizeof(tInt));
#endif
std::string res(ss.str());
return res;
};
void DiscreteValueVect::initFromText(const char *pkl,const unsigned int len){
std::stringstream ss(std::ios_base::binary|std::ios_base::in|std::ios_base::out);
ss.write(pkl,len);
boost::int32_t tVers;
streamRead(ss,tVers);
tVers *= -1;
if(tVers==0x1){
} else {
throw ValueErrorException("bad version in DiscreteValueVect pickle");
}
boost::uint32_t tInt;
streamRead(ss,tInt);
d_type=static_cast<DiscreteValueType>(tInt);
streamRead(ss,tInt);
d_bitsPerVal=tInt;
d_valsPerInt = BITS_PER_INT/d_bitsPerVal;
streamRead(ss,tInt);
d_mask=tInt;
streamRead(ss,tInt);
d_length=tInt;
streamRead(ss,tInt);
d_numInts=tInt;
boost::uint32_t *data = new boost::uint32_t[d_numInts];
ss.read((char *)data,d_numInts*sizeof(boost::uint32_t));
#if defined(BOOST_BIG_ENDIAN)
boost::uint32_t *td = new boost::uint32_t[d_numInts];
for(unsigned int i=0;i<d_numInts;++i) td[i]=EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(data[i]);
d_data.reset(td);
delete [] data;
#else
d_data.reset(data);
#endif
};
DiscreteValueVect DiscreteValueVect::operator& (const DiscreteValueVect &other) const {
PRECONDITION(other.d_length==d_length,"length mismatch");
DiscreteValueType typ=d_type;
if(other.d_type<typ){
typ=other.d_type;
}
DiscreteValueVect ans(typ,d_length);
for(unsigned int i=0;i<d_length;++i){
unsigned int v1=getVal(i);
unsigned int v2=other.getVal(i);
if(v1<v2){
ans.setVal(i,v1);
}else{
ans.setVal(i,v2);
}
}
return(ans);
};
DiscreteValueVect DiscreteValueVect::operator|(const DiscreteValueVect &other) const {
PRECONDITION(other.d_length==d_length,"length mismatch");
DiscreteValueType typ=d_type;
if(other.d_type>typ){
typ=other.d_type;
}
DiscreteValueVect ans(typ,d_length);
for(unsigned int i=0;i<d_length;++i){
unsigned int v1=getVal(i);
unsigned int v2=other.getVal(i);
if(v1>v2){
ans.setVal(i,v1);
}else{
ans.setVal(i,v2);
}
}
return(ans);
};
DiscreteValueVect& DiscreteValueVect::operator+=(const DiscreteValueVect &other) {
PRECONDITION(other.d_length==d_length,"length mismatch");
unsigned int maxVal = (1<<d_bitsPerVal) - 1;
for(unsigned int i=0;i<d_length;i++){
unsigned int v=getVal(i)+other.getVal(i);
if(v>maxVal){
v=maxVal;
}
setVal(i,v);
}
return *this;
}
DiscreteValueVect& DiscreteValueVect::operator-=(const DiscreteValueVect &other) {
PRECONDITION(other.d_length==d_length,"length mismatch");
for(unsigned int i=0;i<d_length;i++){
unsigned int v1=getVal(i);
unsigned int v2=other.getVal(i);
if(v1>v2){
setVal(i,v1-v2);
}else{
setVal(i,0);
}
}
return *this;
}
#if 0
DiscreteValueVect DiscreteValueVect::operator~() const {
DiscreteValueVect ans(d_type,d_length);
unsigned int maxVal = (1<<d_bitsPerVal) - 1;
for(unsigned int i=0;i<d_length;++i){
unsigned int v1=getVal(i);
ans.setVal(i,maxVal-v1);
}
return(ans);
};
#endif
DiscreteValueVect operator+ (const DiscreteValueVect& p1,
const DiscreteValueVect& p2){
DiscreteValueVect res(p1);
res+=p2;
return res;
};
DiscreteValueVect operator- (const DiscreteValueVect& p1,
const DiscreteValueVect& p2){
DiscreteValueVect res(p1);
res-=p2;
return res;
};
} // end of namespace RDKit