rdkit/Code/Numerics/Vector.h

//
//  Copyright (C) 2004-2008 Greg Landrum and Rational Discovery LLC
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#include <RDGeneral/export.h>
#ifndef __RD_VECTOR_H__
#define __RD_VECTOR_H__

#include <RDGeneral/Invariant.h>
#include <RDGeneral/utils.h>
#include <cmath>
#include <iostream>
#include <iomanip>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <boost/random.hpp>
#include <boost/smart_ptr.hpp>

static constexpr double zero_tolerance = 1.e-16;

namespace RDNumeric {

//! A class to represent vectors of numbers.
template <class TYPE>
class Vector {
 public:
  typedef boost::shared_array<TYPE> DATA_SPTR;

  //! Initialize with only a size.
  explicit Vector(unsigned int N) {
    d_size = N;
    TYPE *data = new TYPE[N];
    memset(static_cast<void *>(data), 0, d_size * sizeof(TYPE));
    d_data.reset(data);
  }

  //! Initialize with a size and default value.
  Vector(unsigned int N, TYPE val) {  //: Vector(N) {
    d_size = N;
    TYPE *data = new TYPE[N];

    unsigned int i;
    for (i = 0; i < N; i++) {
      data[i] = val;
    }
    d_data.reset(data);
  }

  //! Initialize from a smart pointer.
  /*!
    <b>NOTE:</b> the data is not copied in this case
  */
  Vector(unsigned int N, DATA_SPTR data) {  // TYPE *data) {
    d_size = N;
    d_data = data;
  }

  //! copy constructor
  /*! We make a copy of the other vector's data.
   */
  Vector(const Vector &other) {
    d_size = other.size();
    const TYPE *otherData = other.getData();
    TYPE *data = new TYPE[d_size];

    memcpy(static_cast<void *>(data), static_cast<const void *>(otherData),
           d_size * sizeof(TYPE));
    d_data.reset(data);
  }

  ~Vector() = default;

  //! return the size (dimension) of the vector
  unsigned int size() const { return d_size; }

  //! returns the value at a particular index
  inline TYPE getVal(unsigned int i) const {
    PRECONDITION(i < d_size, "bad index");
    return d_data[i];
  }

  //! sets the index at a particular value
  inline void setVal(unsigned int i, TYPE val) {
    PRECONDITION(i < d_size, "bad index");
    d_data[i] = val;
  }

  inline TYPE operator[](unsigned int i) const {
    PRECONDITION(i < d_size, "bad index");
    return d_data[i];
  }

  inline TYPE &operator[](unsigned int i) {
    PRECONDITION(i < d_size, "bad index");
    return d_data[i];
  }

  //! returns a pointer to our data array
  inline TYPE *getData() { return d_data.get(); }

  //! returns a const pointer to our data array
  inline const TYPE *getData() const {
    // return dp_data;
    return d_data.get();
  }

  //! Copy operator.
  /*! We make a copy of the other Vector's data.
   */

  Vector<TYPE> &assign(const Vector<TYPE> &other) {
    PRECONDITION(d_size == other.size(), "Size mismatch in vector copying");
    const TYPE *otherData = other.getData();
    memcpy(static_cast<void *>(d_data.get()),
           static_cast<const void *>(otherData), d_size * sizeof(TYPE));
    return *this;
  }

  //! elementwise addition, vectors must be the same size.
  Vector<TYPE> &operator+=(const Vector<TYPE> &other) {
    PRECONDITION(d_size == other.size(), "Size mismatch in vector addition");
    const TYPE *otherData = other.getData();
    TYPE *data = d_data.get();
    unsigned int i;
    for (i = 0; i < d_size; i++) {
      data[i] += otherData[i];
    }
    return *this;
  }

  //! elementwise subtraction, vectors must be the same size.
  Vector<TYPE> &operator-=(const Vector<TYPE> &other) {
    PRECONDITION(d_size == other.size(), "Size mismatch in vector subtraction");
    const TYPE *otherData = other.getData();
    TYPE *data = d_data.get();
    unsigned int i;
    for (i = 0; i < d_size; i++) {
      data[i] -= otherData[i];
    }
    return *this;
  }

  //! multiplication by a scalar
  Vector<TYPE> &operator*=(TYPE scale) {
    unsigned int i;
    for (i = 0; i < d_size; i++) {
      d_data[i] *= scale;
    }
    return *this;
  }

  //! division by a scalar
  Vector<TYPE> &operator/=(TYPE scale) {
    unsigned int i;
    for (i = 0; i < d_size; i++) {
      d_data[i] /= scale;
    }
    return *this;
  }

  //! L2 norm squared
  inline TYPE normL2Sq() const {
    TYPE res = (TYPE)0.0;
    unsigned int i;
    TYPE *data = d_data.get();
    for (i = 0; i < d_size; i++) {
      res += data[i] * data[i];
    }
    return res;
  }

  //! L2 norm
  inline TYPE normL2() const { return sqrt(this->normL2Sq()); }

  //! L1 norm
  inline TYPE normL1() const {
    TYPE res = (TYPE)0.0;
    unsigned int i;
    TYPE *data = d_data.get();
    for (i = 0; i < d_size; i++) {
      res += fabs(data[i]);
    }
    return res;
  }

  //! L-infinity norm
  inline TYPE normLinfinity() const {
    TYPE res = (TYPE)(-1.0);
    unsigned int i;
    TYPE *data = d_data.get();
    for (i = 0; i < d_size; i++) {
      if (fabs(data[i]) > res) {
        res = fabs(data[i]);
      }
    }
    return res;
  }

  //! \brief Gets the ID of the entry that has the largest absolute value
  //! i.e. the entry being used for the L-infinity norm
  inline unsigned int largestAbsValId() const {
    TYPE res = (TYPE)(-1.0);
    unsigned int i, id = d_size;
    TYPE *data = d_data.get();
    for (i = 0; i < d_size; i++) {
      if (fabs(data[i]) > res) {
        res = fabs(data[i]);
        id = i;
      }
    }
    return id;
  }

  //! \brief Gets the ID of the entry that has the largest value
  inline unsigned int largestValId() const {
    TYPE res = (TYPE)(-1.e8);
    unsigned int i, id = d_size;
    TYPE *data = d_data.get();
    for (i = 0; i < d_size; i++) {
      if (data[i] > res) {
        res = data[i];
        id = i;
      }
    }
    return id;
  }

  //! \brief Gets the ID of the entry that has the smallest value
  inline unsigned int smallestValId() const {
    TYPE res = (TYPE)(1.e8);
    unsigned int i, id = d_size;
    TYPE *data = d_data.get();
    for (i = 0; i < d_size; i++) {
      if (data[i] < res) {
        res = data[i];
        id = i;
      }
    }
    return id;
  }

  //! returns the dot product between two Vectors
  inline TYPE dotProduct(const Vector<TYPE> other) const {
    PRECONDITION(d_size == other.size(),
                 "Size mismatch in vector doct product");
    const TYPE *oData = other.getData();
    unsigned int i;
    TYPE res = (TYPE)(0.0);
    TYPE *data = d_data.get();
    for (i = 0; i < d_size; i++) {
      res += (data[i] * oData[i]);
    }
    return res;
  }

  //! Normalize the vector using the L2 norm
  inline void normalize() {
    TYPE val = this->normL2();
    if (val < zero_tolerance) {
      throw std::runtime_error("Cannot normalize a zero length vector");
    }
    (*this) /= val;
  }

  //! Set to a random unit vector
  inline void setToRandom(unsigned int seed = 0) {
    // we want to get our own RNG here instead of using the global
    // one.  This is related to Issue285.
    RDKit::rng_type generator(42u);
    RDKit::uniform_double dist(0, 1.0);
    RDKit::double_source_type randSource(generator, dist);
    if (seed > 0) {
      generator.seed(seed);
    } else {
      // we can't initialize using only clock(), because it's possible
      // that we'll get here fast enough that clock() will return 0
      // and generator.seed(0) is an error:
      generator.seed(clock() + 1);
    }

    unsigned int i;
    TYPE *data = d_data.get();
    for (i = 0; i < d_size; i++) {
      data[i] = randSource();
    }
    this->normalize();
  }

 private:
  unsigned int d_size;  //!< our length
  DATA_SPTR d_data;
  Vector<TYPE> &operator=(const Vector<TYPE> &other);
};

typedef Vector<double> DoubleVector;

//! returns the algebraic tanimoto similarity [defn' from JCIM 46:587-96 (2006)]
template <typename T>
double TanimotoSimilarity(const Vector<T> &v1, const Vector<T> &v2) {
  double numer = v1.dotProduct(v2);
  if (numer == 0.0) {
    return 0.0;
  }
  double denom = v1.normL2Sq() + v2.normL2Sq() - numer;
  if (denom == 0.0) {
    return 0.0;
  }
  return numer / denom;
}
}  // end of namespace RDNumeric

//! ostream operator for Vectors
template <typename TYPE>
std::ostream &operator<<(std::ostream &target,
                         const RDNumeric::Vector<TYPE> &vec) {
  unsigned int siz = vec.size();
  target << "Size: " << siz << " [";
  unsigned int i;
  for (i = 0; i < siz; i++) {
    target << std::setw(7) << std::setprecision(3) << vec.getVal(i) << ", ";
  }
  target << "]\n";
  return target;
}

#endif