Files
rdkit/Code/Numerics/Vector.h
Ricardo Rodriguez 39d4662ae7 Throw when attempting to normalize a Zero RDGeom::Point (#8008)
* throw if close to zero

* fix moldraw2DTestCatch

* Fix testRGroupDecomp

* fix one test in distGeomHelpersCatch

* fix tests in distGeomHelpersCatch

* retry finding a dir vector when adding Hs

* push UFF fixes to calculateCosY

* fix the setTerminalAtomCoords deg 4 patch

* add a test

* reduce zero tolerance
2024-11-19 04:33:22 +01:00

333 lines
8.4 KiB
C++

//
// Copyright (C) 2004-2008 Greg Landrum and Rational Discovery LLC
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <RDGeneral/export.h>
#ifndef __RD_VECTOR_H__
#define __RD_VECTOR_H__
#include <RDGeneral/Invariant.h>
#include <RDGeneral/utils.h>
#include <cmath>
#include <iostream>
#include <iomanip>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <boost/random.hpp>
#include <boost/smart_ptr.hpp>
static constexpr double zero_tolerance = 1.e-16;
namespace RDNumeric {
//! A class to represent vectors of numbers.
template <class TYPE>
class Vector {
public:
typedef boost::shared_array<TYPE> DATA_SPTR;
//! Initialize with only a size.
explicit Vector(unsigned int N) {
d_size = N;
TYPE *data = new TYPE[N];
memset(static_cast<void *>(data), 0, d_size * sizeof(TYPE));
d_data.reset(data);
}
//! Initialize with a size and default value.
Vector(unsigned int N, TYPE val) { //: Vector(N) {
d_size = N;
TYPE *data = new TYPE[N];
unsigned int i;
for (i = 0; i < N; i++) {
data[i] = val;
}
d_data.reset(data);
}
//! Initialize from a smart pointer.
/*!
<b>NOTE:</b> the data is not copied in this case
*/
Vector(unsigned int N, DATA_SPTR data) { // TYPE *data) {
d_size = N;
d_data = data;
}
//! copy constructor
/*! We make a copy of the other vector's data.
*/
Vector(const Vector &other) {
d_size = other.size();
const TYPE *otherData = other.getData();
TYPE *data = new TYPE[d_size];
memcpy(static_cast<void *>(data), static_cast<const void *>(otherData),
d_size * sizeof(TYPE));
d_data.reset(data);
}
~Vector() = default;
//! return the size (dimension) of the vector
unsigned int size() const { return d_size; }
//! returns the value at a particular index
inline TYPE getVal(unsigned int i) const {
PRECONDITION(i < d_size, "bad index");
return d_data[i];
}
//! sets the index at a particular value
inline void setVal(unsigned int i, TYPE val) {
PRECONDITION(i < d_size, "bad index");
d_data[i] = val;
}
inline TYPE operator[](unsigned int i) const {
PRECONDITION(i < d_size, "bad index");
return d_data[i];
}
inline TYPE &operator[](unsigned int i) {
PRECONDITION(i < d_size, "bad index");
return d_data[i];
}
//! returns a pointer to our data array
inline TYPE *getData() { return d_data.get(); }
//! returns a const pointer to our data array
inline const TYPE *getData() const {
// return dp_data;
return d_data.get();
}
//! Copy operator.
/*! We make a copy of the other Vector's data.
*/
Vector<TYPE> &assign(const Vector<TYPE> &other) {
PRECONDITION(d_size == other.size(), "Size mismatch in vector copying");
const TYPE *otherData = other.getData();
memcpy(static_cast<void *>(d_data.get()),
static_cast<const void *>(otherData), d_size * sizeof(TYPE));
return *this;
}
//! elementwise addition, vectors must be the same size.
Vector<TYPE> &operator+=(const Vector<TYPE> &other) {
PRECONDITION(d_size == other.size(), "Size mismatch in vector addition");
const TYPE *otherData = other.getData();
TYPE *data = d_data.get();
unsigned int i;
for (i = 0; i < d_size; i++) {
data[i] += otherData[i];
}
return *this;
}
//! elementwise subtraction, vectors must be the same size.
Vector<TYPE> &operator-=(const Vector<TYPE> &other) {
PRECONDITION(d_size == other.size(), "Size mismatch in vector subtraction");
const TYPE *otherData = other.getData();
TYPE *data = d_data.get();
unsigned int i;
for (i = 0; i < d_size; i++) {
data[i] -= otherData[i];
}
return *this;
}
//! multiplication by a scalar
Vector<TYPE> &operator*=(TYPE scale) {
unsigned int i;
for (i = 0; i < d_size; i++) {
d_data[i] *= scale;
}
return *this;
}
//! division by a scalar
Vector<TYPE> &operator/=(TYPE scale) {
unsigned int i;
for (i = 0; i < d_size; i++) {
d_data[i] /= scale;
}
return *this;
}
//! L2 norm squared
inline TYPE normL2Sq() const {
TYPE res = (TYPE)0.0;
unsigned int i;
TYPE *data = d_data.get();
for (i = 0; i < d_size; i++) {
res += data[i] * data[i];
}
return res;
}
//! L2 norm
inline TYPE normL2() const { return sqrt(this->normL2Sq()); }
//! L1 norm
inline TYPE normL1() const {
TYPE res = (TYPE)0.0;
unsigned int i;
TYPE *data = d_data.get();
for (i = 0; i < d_size; i++) {
res += fabs(data[i]);
}
return res;
}
//! L-infinity norm
inline TYPE normLinfinity() const {
TYPE res = (TYPE)(-1.0);
unsigned int i;
TYPE *data = d_data.get();
for (i = 0; i < d_size; i++) {
if (fabs(data[i]) > res) {
res = fabs(data[i]);
}
}
return res;
}
//! \brief Gets the ID of the entry that has the largest absolute value
//! i.e. the entry being used for the L-infinity norm
inline unsigned int largestAbsValId() const {
TYPE res = (TYPE)(-1.0);
unsigned int i, id = d_size;
TYPE *data = d_data.get();
for (i = 0; i < d_size; i++) {
if (fabs(data[i]) > res) {
res = fabs(data[i]);
id = i;
}
}
return id;
}
//! \brief Gets the ID of the entry that has the largest value
inline unsigned int largestValId() const {
TYPE res = (TYPE)(-1.e8);
unsigned int i, id = d_size;
TYPE *data = d_data.get();
for (i = 0; i < d_size; i++) {
if (data[i] > res) {
res = data[i];
id = i;
}
}
return id;
}
//! \brief Gets the ID of the entry that has the smallest value
inline unsigned int smallestValId() const {
TYPE res = (TYPE)(1.e8);
unsigned int i, id = d_size;
TYPE *data = d_data.get();
for (i = 0; i < d_size; i++) {
if (data[i] < res) {
res = data[i];
id = i;
}
}
return id;
}
//! returns the dot product between two Vectors
inline TYPE dotProduct(const Vector<TYPE> other) const {
PRECONDITION(d_size == other.size(),
"Size mismatch in vector doct product");
const TYPE *oData = other.getData();
unsigned int i;
TYPE res = (TYPE)(0.0);
TYPE *data = d_data.get();
for (i = 0; i < d_size; i++) {
res += (data[i] * oData[i]);
}
return res;
}
//! Normalize the vector using the L2 norm
inline void normalize() {
TYPE val = this->normL2();
if (val < zero_tolerance) {
throw std::runtime_error("Cannot normalize a zero length vector");
}
(*this) /= val;
}
//! Set to a random unit vector
inline void setToRandom(unsigned int seed = 0) {
// we want to get our own RNG here instead of using the global
// one. This is related to Issue285.
RDKit::rng_type generator(42u);
RDKit::uniform_double dist(0, 1.0);
RDKit::double_source_type randSource(generator, dist);
if (seed > 0) {
generator.seed(seed);
} else {
// we can't initialize using only clock(), because it's possible
// that we'll get here fast enough that clock() will return 0
// and generator.seed(0) is an error:
generator.seed(clock() + 1);
}
unsigned int i;
TYPE *data = d_data.get();
for (i = 0; i < d_size; i++) {
data[i] = randSource();
}
this->normalize();
}
private:
unsigned int d_size; //!< our length
DATA_SPTR d_data;
Vector<TYPE> &operator=(const Vector<TYPE> &other);
};
typedef Vector<double> DoubleVector;
//! returns the algebraic tanimoto similarity [defn' from JCIM 46:587-96 (2006)]
template <typename T>
double TanimotoSimilarity(const Vector<T> &v1, const Vector<T> &v2) {
double numer = v1.dotProduct(v2);
if (numer == 0.0) {
return 0.0;
}
double denom = v1.normL2Sq() + v2.normL2Sq() - numer;
if (denom == 0.0) {
return 0.0;
}
return numer / denom;
}
} // end of namespace RDNumeric
//! ostream operator for Vectors
template <typename TYPE>
std::ostream &operator<<(std::ostream &target,
const RDNumeric::Vector<TYPE> &vec) {
unsigned int siz = vec.size();
target << "Size: " << siz << " [";
unsigned int i;
for (i = 0; i < siz; i++) {
target << std::setw(7) << std::setprecision(3) << vec.getVal(i) << ", ";
}
target << "]\n";
return target;
}
#endif