Files
rdkit/Code/DataStructs/DiscreteValueVect.cpp
Anna Brünisholz a06c921a79 Cleanup of Code/DataStructs (#7365)
* for-loop modernization

* declaration and initialization together

* switch statements, min&max from std

* switch statement

* use of std min&max

* fixed unsigned int to int comparison

* implement switch statement

* fix unsigned int to int comparison

* revert previous mistake

* Update Code/DataStructs/DiscreteValueVect.cpp

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>

* implemented suggestions

---------

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>
2024-04-21 06:41:52 +02:00

301 lines
8.5 KiB
C++

// $Id$
//
// Copyright (C) 2004-2012 Greg Landrum and Rational Discovery LLC
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include "DiscreteValueVect.h"
#include <RDGeneral/Invariant.h>
#include <RDGeneral/StreamOps.h>
#include "DatastructsException.h"
#include "DiscreteDistMat.h"
#include <RDGeneral/Exceptions.h>
#include <cstdint>
#include <algorithm>
namespace RDKit {
const int ci_DISCRETEVALUEVECTPICKLE_VERSION = 0x1;
DiscreteValueVect::DiscreteValueVect(const DiscreteValueVect &other) {
d_type = other.getValueType();
d_bitsPerVal = other.getNumBitsPerVal();
d_numInts = other.getNumInts();
d_length = other.getLength();
d_valsPerInt = other.d_valsPerInt;
d_mask = other.d_mask;
const std::uint32_t *odata = other.getData();
auto *data = new std::uint32_t[d_numInts];
memcpy(static_cast<void *>(data), static_cast<const void *>(odata),
d_numInts * sizeof(std::uint32_t));
d_data.reset(data);
}
DiscreteValueVect &DiscreteValueVect::operator=(
const DiscreteValueVect &other) {
if (this == &other) {
return *this;
}
d_type = other.getValueType();
d_bitsPerVal = other.getNumBitsPerVal();
d_numInts = other.getNumInts();
d_length = other.getLength();
d_valsPerInt = other.d_valsPerInt;
d_mask = other.d_mask;
const std::uint32_t *odata = other.getData();
auto *data = new std::uint32_t[d_numInts];
memcpy(static_cast<void *>(data), static_cast<const void *>(odata),
d_numInts * sizeof(std::uint32_t));
d_data.reset(data);
return *this;
}
unsigned int DiscreteValueVect::getVal(unsigned int i) const {
if (i >= d_length) {
throw IndexErrorException(i);
}
unsigned int shift = d_bitsPerVal * (i % d_valsPerInt);
unsigned int intId = i / d_valsPerInt;
return ((d_data[intId] >> shift) & d_mask);
}
void DiscreteValueVect::setVal(unsigned int i, unsigned int val) {
if (i >= d_length) {
throw IndexErrorException(i);
}
if ((val & d_mask) != val) {
throw ValueErrorException("Value out of range");
}
unsigned int shift = d_bitsPerVal * (i % d_valsPerInt);
unsigned int intId = i / d_valsPerInt;
unsigned int mask = ((1 << d_bitsPerVal) - 1) << shift;
mask = ~mask;
d_data[intId] = (d_data[intId] & mask) | (val << shift);
}
unsigned int DiscreteValueVect::getTotalVal() const {
unsigned int i, j, res = 0;
for (i = 0; i < d_numInts; ++i) {
for (j = 0; j < d_valsPerInt; ++j) {
res += ((d_data[i] >> (j * d_bitsPerVal)) & d_mask);
}
}
return res;
}
unsigned int DiscreteValueVect::getLength() const { return d_length; }
const std::uint32_t *DiscreteValueVect::getData() const { return d_data.get(); }
unsigned int computeL1Norm(const DiscreteValueVect &v1,
const DiscreteValueVect &v2) {
if (v1.getLength() != v2.getLength()) {
throw ValueErrorException("Comparing vectors of different lengths");
}
DiscreteValueVect::DiscreteValueType valType = v1.getValueType();
if (valType != v2.getValueType()) {
throw ValueErrorException("Comparing vector of different value types");
}
const std::uint32_t *data1 = v1.getData();
const std::uint32_t *data2 = v2.getData();
unsigned int res = 0;
if (valType <= DiscreteValueVect::EIGHTBITVALUE) {
DiscreteDistMat *dmat = getDiscreteDistMat();
auto *cd1 = (unsigned char *)(data1);
auto *cd2 = (unsigned char *)(data2);
const unsigned char *cend = cd1 + (v1.getNumInts() * 4);
while (cd1 != cend) {
if (*cd1 == *cd2) {
cd1++;
cd2++;
continue;
}
res += dmat->getDist(*cd1, *cd2, valType);
cd1++;
cd2++;
}
} else {
// we have a sixteen bits per value type
// REVIEW: we are making an assumption here that a short
// is 16 bit - may fail on a different compiler
const unsigned short int *sd1 = (unsigned short int *)(data1);
const unsigned short int *sd2 = (unsigned short int *)(data2);
const unsigned short int *send = sd1 + (v1.getNumInts() * 2);
while (sd1 != send) {
if (*sd1 == *sd2) {
sd1++;
sd2++;
continue;
}
res += abs((*sd1) - (*sd2));
sd1++;
sd2++;
}
}
return res;
}
std::string DiscreteValueVect::toString() const {
std::stringstream ss(std::ios_base::binary | std::ios_base::out |
std::ios_base::in);
std::int32_t tVers = ci_DISCRETEVALUEVECTPICKLE_VERSION * -1;
streamWrite(ss, tVers);
std::uint32_t tInt;
tInt = d_type;
streamWrite(ss, tInt);
tInt = d_bitsPerVal;
streamWrite(ss, tInt);
tInt = d_mask;
streamWrite(ss, tInt);
tInt = d_length;
streamWrite(ss, tInt);
tInt = d_numInts;
streamWrite(ss, tInt);
#if defined(BOOST_BIG_ENDIAN)
std::uint32_t *td = new std::uint32_t[d_numInts];
for (unsigned int i = 0; i < d_numInts; ++i)
td[i] = EndianSwapBytes<HOST_ENDIAN_ORDER, LITTLE_ENDIAN_ORDER>(
d_data.get()[i]);
ss.write((const char *)td, d_numInts * sizeof(tInt));
delete[] td;
#else
ss.write((const char *)d_data.get(), d_numInts * sizeof(tInt));
#endif
std::string res(ss.str());
return res;
};
void DiscreteValueVect::initFromText(const char *pkl, const unsigned int len) {
std::stringstream ss(std::ios_base::binary | std::ios_base::in |
std::ios_base::out);
ss.write(pkl, len);
std::int32_t tVers;
streamRead(ss, tVers);
tVers *= -1;
if (tVers == 0x1) {
} else {
throw ValueErrorException("bad version in DiscreteValueVect pickle");
}
std::uint32_t tInt;
streamRead(ss, tInt);
d_type = static_cast<DiscreteValueType>(tInt);
streamRead(ss, tInt);
d_bitsPerVal = tInt;
d_valsPerInt = BITS_PER_INT / d_bitsPerVal;
streamRead(ss, tInt);
d_mask = tInt;
streamRead(ss, tInt);
d_length = tInt;
streamRead(ss, tInt);
d_numInts = tInt;
auto *data = new std::uint32_t[d_numInts];
ss.read((char *)data, d_numInts * sizeof(std::uint32_t));
#if defined(BOOST_BIG_ENDIAN)
std::uint32_t *td = new std::uint32_t[d_numInts];
for (unsigned int i = 0; i < d_numInts; ++i)
td[i] = EndianSwapBytes<LITTLE_ENDIAN_ORDER, HOST_ENDIAN_ORDER>(data[i]);
d_data.reset(td);
delete[] data;
#else
d_data.reset(data);
#endif
};
DiscreteValueVect DiscreteValueVect::operator&(
const DiscreteValueVect &other) const {
PRECONDITION(other.d_length == d_length, "length mismatch");
DiscreteValueType typ = d_type;
if (other.d_type < typ) {
typ = other.d_type;
}
DiscreteValueVect ans(typ, d_length);
for (unsigned int i = 0; i < d_length; ++i) {
unsigned int v1 = getVal(i);
unsigned int v2 = other.getVal(i);
ans.setVal(i, std::min(v2, v1));
}
return (ans);
};
DiscreteValueVect DiscreteValueVect::operator|(
const DiscreteValueVect &other) const {
PRECONDITION(other.d_length == d_length, "length mismatch");
DiscreteValueType typ = d_type;
if (other.d_type > typ) {
typ = other.d_type;
}
DiscreteValueVect ans(typ, d_length);
for (unsigned int i = 0; i < d_length; ++i) {
unsigned int v1 = getVal(i);
unsigned int v2 = other.getVal(i);
ans.setVal(i, std::max(v2, v1));
}
return (ans);
};
DiscreteValueVect &DiscreteValueVect::operator+=(
const DiscreteValueVect &other) {
PRECONDITION(other.d_length == d_length, "length mismatch");
unsigned int maxVal = (1 << d_bitsPerVal) - 1;
for (unsigned int i = 0; i < d_length; i++) {
unsigned int v = getVal(i) + other.getVal(i);
setVal(i, std::min(v, maxVal));
}
return *this;
}
DiscreteValueVect &DiscreteValueVect::operator-=(
const DiscreteValueVect &other) {
PRECONDITION(other.d_length == d_length, "length mismatch");
for (unsigned int i = 0; i < d_length; i++) {
unsigned int v1 = getVal(i);
unsigned int v2 = other.getVal(i);
setVal(i, v1 > v2 ? (v1 - v2) : 0);
}
return *this;
}
#if 0
DiscreteValueVect DiscreteValueVect::operator~() const {
DiscreteValueVect ans(d_type,d_length);
unsigned int maxVal = (1<<d_bitsPerVal) - 1;
for(unsigned int i=0;i<d_length;++i){
unsigned int v1=getVal(i);
ans.setVal(i,maxVal-v1);
}
return(ans);
};
#endif
DiscreteValueVect operator+(const DiscreteValueVect &p1,
const DiscreteValueVect &p2) {
DiscreteValueVect res(p1);
res += p2;
return res;
};
DiscreteValueVect operator-(const DiscreteValueVect &p1,
const DiscreteValueVect &p2) {
DiscreteValueVect res(p1);
res -= p2;
return res;
};
} // end of namespace RDKit