mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
289 lines
8.2 KiB
C++
289 lines
8.2 KiB
C++
// $Id$
|
|
//
|
|
// Copyright (C) 2004-2012 Greg Landrum and Rational Discovery LLC
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include "DiscreteValueVect.h"
|
|
#include <RDGeneral/Invariant.h>
|
|
#include <RDGeneral/StreamOps.h>
|
|
#include "DatastructsException.h"
|
|
#include "DiscreteDistMat.h"
|
|
#include <RDGeneral/Exceptions.h>
|
|
#include <cstdint>
|
|
#include <algorithm>
|
|
|
|
namespace RDKit {
|
|
const int ci_DISCRETEVALUEVECTPICKLE_VERSION = 0x1;
|
|
|
|
DiscreteValueVect::DiscreteValueVect(const DiscreteValueVect &other) {
|
|
d_type = other.getValueType();
|
|
d_bitsPerVal = other.getNumBitsPerVal();
|
|
d_numInts = other.getNumInts();
|
|
d_length = other.getLength();
|
|
d_valsPerInt = other.d_valsPerInt;
|
|
d_mask = other.d_mask;
|
|
const std::uint32_t *odata = other.getData();
|
|
auto *data = new std::uint32_t[d_numInts];
|
|
memcpy(static_cast<void *>(data), static_cast<const void *>(odata),
|
|
d_numInts * sizeof(std::uint32_t));
|
|
d_data.reset(data);
|
|
}
|
|
|
|
DiscreteValueVect &DiscreteValueVect::operator=(
|
|
const DiscreteValueVect &other) {
|
|
if (this == &other) {
|
|
return *this;
|
|
}
|
|
|
|
d_type = other.getValueType();
|
|
d_bitsPerVal = other.getNumBitsPerVal();
|
|
d_numInts = other.getNumInts();
|
|
d_length = other.getLength();
|
|
d_valsPerInt = other.d_valsPerInt;
|
|
d_mask = other.d_mask;
|
|
const std::uint32_t *odata = other.getData();
|
|
auto *data = new std::uint32_t[d_numInts];
|
|
memcpy(static_cast<void *>(data), static_cast<const void *>(odata),
|
|
d_numInts * sizeof(std::uint32_t));
|
|
d_data.reset(data);
|
|
|
|
return *this;
|
|
}
|
|
|
|
unsigned int DiscreteValueVect::getVal(unsigned int i) const {
|
|
if (i >= d_length) {
|
|
throw IndexErrorException(i);
|
|
}
|
|
unsigned int shift = d_bitsPerVal * (i % d_valsPerInt);
|
|
unsigned int intId = i / d_valsPerInt;
|
|
return ((d_data[intId] >> shift) & d_mask);
|
|
}
|
|
|
|
void DiscreteValueVect::setVal(unsigned int i, unsigned int val) {
|
|
if (i >= d_length) {
|
|
throw IndexErrorException(i);
|
|
}
|
|
if ((val & d_mask) != val) {
|
|
throw ValueErrorException("Value out of range");
|
|
}
|
|
unsigned int shift = d_bitsPerVal * (i % d_valsPerInt);
|
|
unsigned int intId = i / d_valsPerInt;
|
|
unsigned int mask = ((1 << d_bitsPerVal) - 1) << shift;
|
|
mask = ~mask;
|
|
d_data[intId] = (d_data[intId] & mask) | (val << shift);
|
|
}
|
|
|
|
unsigned int DiscreteValueVect::getTotalVal() const {
|
|
unsigned int i, j, res = 0;
|
|
|
|
for (i = 0; i < d_numInts; ++i) {
|
|
for (j = 0; j < d_valsPerInt; ++j) {
|
|
res += ((d_data[i] >> (j * d_bitsPerVal)) & d_mask);
|
|
}
|
|
}
|
|
return res;
|
|
}
|
|
|
|
unsigned int DiscreteValueVect::getLength() const { return d_length; }
|
|
|
|
const std::uint32_t *DiscreteValueVect::getData() const { return d_data.get(); }
|
|
|
|
unsigned int computeL1Norm(const DiscreteValueVect &v1,
|
|
const DiscreteValueVect &v2) {
|
|
if (v1.getLength() != v2.getLength()) {
|
|
throw ValueErrorException("Comparing vectors of different lengths");
|
|
}
|
|
|
|
DiscreteValueVect::DiscreteValueType valType = v1.getValueType();
|
|
|
|
if (valType != v2.getValueType()) {
|
|
throw ValueErrorException("Comparing vector of different value types");
|
|
}
|
|
|
|
const std::uint32_t *data1 = v1.getData();
|
|
const std::uint32_t *data2 = v2.getData();
|
|
|
|
unsigned int res = 0;
|
|
if (valType <= DiscreteValueVect::EIGHTBITVALUE) {
|
|
DiscreteDistMat *dmat = getDiscreteDistMat();
|
|
|
|
auto *cd1 = (unsigned char *)(data1);
|
|
auto *cd2 = (unsigned char *)(data2);
|
|
const unsigned char *cend = cd1 + (v1.getNumInts() * 4);
|
|
while (cd1 != cend) {
|
|
if (*cd1 == *cd2) {
|
|
cd1++;
|
|
cd2++;
|
|
continue;
|
|
}
|
|
res += dmat->getDist(*cd1, *cd2, valType);
|
|
cd1++;
|
|
cd2++;
|
|
}
|
|
} else {
|
|
// we have a sixteen bits per value type
|
|
// REVIEW: we are making an assumption here that a short
|
|
// is 16 bit - may fail on a different compiler
|
|
const unsigned short int *sd1 = (unsigned short int *)(data1);
|
|
const unsigned short int *sd2 = (unsigned short int *)(data2);
|
|
|
|
const unsigned short int *send = sd1 + (v1.getNumInts() * 2);
|
|
while (sd1 != send) {
|
|
if (*sd1 == *sd2) {
|
|
sd1++;
|
|
sd2++;
|
|
continue;
|
|
}
|
|
res += abs((*sd1) - (*sd2));
|
|
sd1++;
|
|
sd2++;
|
|
}
|
|
}
|
|
return res;
|
|
}
|
|
|
|
std::string DiscreteValueVect::toString() const {
|
|
std::stringstream ss(std::ios_base::binary | std::ios_base::out |
|
|
std::ios_base::in);
|
|
|
|
std::int32_t tVers = ci_DISCRETEVALUEVECTPICKLE_VERSION * -1;
|
|
streamWrite(ss, tVers);
|
|
std::uint32_t tInt;
|
|
tInt = d_type;
|
|
streamWrite(ss, tInt);
|
|
tInt = d_bitsPerVal;
|
|
streamWrite(ss, tInt);
|
|
tInt = d_mask;
|
|
streamWrite(ss, tInt);
|
|
tInt = d_length;
|
|
streamWrite(ss, tInt);
|
|
tInt = d_numInts;
|
|
streamWrite(ss, tInt);
|
|
|
|
#if defined(BOOST_BIG_ENDIAN)
|
|
std::uint32_t *td = new std::uint32_t[d_numInts];
|
|
for (unsigned int i = 0; i < d_numInts; ++i)
|
|
td[i] = EndianSwapBytes<HOST_ENDIAN_ORDER, LITTLE_ENDIAN_ORDER>(
|
|
d_data.get()[i]);
|
|
ss.write((const char *)td, d_numInts * sizeof(tInt));
|
|
delete[] td;
|
|
#else
|
|
ss.write((const char *)d_data.get(), d_numInts * sizeof(tInt));
|
|
#endif
|
|
std::string res(ss.str());
|
|
return res;
|
|
};
|
|
|
|
void DiscreteValueVect::initFromText(const char *pkl, const unsigned int len) {
|
|
std::stringstream ss(std::ios_base::binary | std::ios_base::in |
|
|
std::ios_base::out);
|
|
ss.write(pkl, len);
|
|
std::int32_t tVers;
|
|
streamRead(ss, tVers);
|
|
tVers *= -1;
|
|
if (tVers == 0x1) {
|
|
} else {
|
|
throw ValueErrorException("bad version in DiscreteValueVect pickle");
|
|
}
|
|
std::uint32_t tInt;
|
|
streamRead(ss, tInt);
|
|
d_type = static_cast<DiscreteValueType>(tInt);
|
|
|
|
streamRead(ss, tInt);
|
|
d_bitsPerVal = tInt;
|
|
d_valsPerInt = BITS_PER_INT / d_bitsPerVal;
|
|
streamRead(ss, tInt);
|
|
d_mask = tInt;
|
|
streamRead(ss, tInt);
|
|
d_length = tInt;
|
|
streamRead(ss, tInt);
|
|
d_numInts = tInt;
|
|
auto *data = new std::uint32_t[d_numInts];
|
|
ss.read((char *)data, d_numInts * sizeof(std::uint32_t));
|
|
|
|
#if defined(BOOST_BIG_ENDIAN)
|
|
std::uint32_t *td = new std::uint32_t[d_numInts];
|
|
for (unsigned int i = 0; i < d_numInts; ++i)
|
|
td[i] = EndianSwapBytes<LITTLE_ENDIAN_ORDER, HOST_ENDIAN_ORDER>(data[i]);
|
|
d_data.reset(td);
|
|
delete[] data;
|
|
#else
|
|
d_data.reset(data);
|
|
#endif
|
|
};
|
|
|
|
DiscreteValueVect DiscreteValueVect::operator&(
|
|
const DiscreteValueVect &other) const {
|
|
PRECONDITION(other.d_length == d_length, "length mismatch");
|
|
DiscreteValueType typ = d_type;
|
|
if (other.d_type < typ) {
|
|
typ = other.d_type;
|
|
}
|
|
DiscreteValueVect ans(typ, d_length);
|
|
for (unsigned int i = 0; i < d_length; ++i) {
|
|
unsigned int v1 = getVal(i);
|
|
unsigned int v2 = other.getVal(i);
|
|
ans.setVal(i, std::min(v2, v1));
|
|
}
|
|
return (ans);
|
|
};
|
|
|
|
DiscreteValueVect DiscreteValueVect::operator|(
|
|
const DiscreteValueVect &other) const {
|
|
PRECONDITION(other.d_length == d_length, "length mismatch");
|
|
DiscreteValueType typ = d_type;
|
|
if (other.d_type > typ) {
|
|
typ = other.d_type;
|
|
}
|
|
DiscreteValueVect ans(typ, d_length);
|
|
for (unsigned int i = 0; i < d_length; ++i) {
|
|
unsigned int v1 = getVal(i);
|
|
unsigned int v2 = other.getVal(i);
|
|
ans.setVal(i, std::max(v2, v1));
|
|
}
|
|
return (ans);
|
|
};
|
|
|
|
DiscreteValueVect &DiscreteValueVect::operator+=(
|
|
const DiscreteValueVect &other) {
|
|
PRECONDITION(other.d_length == d_length, "length mismatch");
|
|
unsigned int maxVal = (1 << d_bitsPerVal) - 1;
|
|
|
|
for (unsigned int i = 0; i < d_length; i++) {
|
|
unsigned int v = getVal(i) + other.getVal(i);
|
|
setVal(i, std::min(v, maxVal));
|
|
}
|
|
return *this;
|
|
}
|
|
DiscreteValueVect &DiscreteValueVect::operator-=(
|
|
const DiscreteValueVect &other) {
|
|
PRECONDITION(other.d_length == d_length, "length mismatch");
|
|
|
|
for (unsigned int i = 0; i < d_length; i++) {
|
|
unsigned int v1 = getVal(i);
|
|
unsigned int v2 = other.getVal(i);
|
|
setVal(i, v1 > v2 ? (v1 - v2) : 0);
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
DiscreteValueVect operator+(const DiscreteValueVect &p1,
|
|
const DiscreteValueVect &p2) {
|
|
DiscreteValueVect res(p1);
|
|
res += p2;
|
|
return res;
|
|
};
|
|
DiscreteValueVect operator-(const DiscreteValueVect &p1,
|
|
const DiscreteValueVect &p2) {
|
|
DiscreteValueVect res(p1);
|
|
res -= p2;
|
|
return res;
|
|
};
|
|
|
|
} // end of namespace RDKit
|