//
// Copyright (C) 2003-2006 greg Landrum and Rational Discovery LLC
//
// @@ All Rights Reserved @@
//
#ifndef __RD_BITOPS_H__
#define __RD_BITOPS_H__
/*! \file BitOps.h
\brief Contains general bit-comparison and similarity operations.
The notation used to document the similarity metrics is:
- \c V1_n: number of bits in vector 1
- \c V1_o: number of on bits in vector 1
- (V1&V2)_o: number of on bits in the intersection of vectors 1 and 2
*/
#include "BitVects.h"
#include
bool AllProbeBitsMatch(const char *probe,const char *ref);
bool AllProbeBitsMatch(const std::string &probe,const std::string &ref);
template
bool AllProbeBitsMatch(const T1 &probe,const std::string &pkl);
//! returns the number of on bits in common between two bit vectors
/*!
\return (bv1&bv2)_o
*/
template
int
NumOnBitsInCommon(const T1& bv1,const T2& bv2);
int
NumOnBitsInCommon(const ExplicitBitVect & bv1,const ExplicitBitVect & bv2);
//! returns the Tanimoto similarity between two bit vects
/*!
\return (bv1&bv2)_o / [bv1_o + bv2_o - (bv1&bv2)_o]
*/
template
const double
TanimotoSimilarity(const T1& bv1,const T2& bv2);
//! returns the Cosine similarity between two bit vects
/*!
\return (bv1&bv2)_o / sqrt(bv1_o + bv2_o)
*/
template
const double
CosineSimilarity(const T1& bv1,
const T2& bv2);
//! returns the Kulczynski similarity between two bit vects
/*!
\return (bv1&bv2)_o * [bv1_o + bv2_o] / [2 * bv1_o * bv2_o]
*/
template
const double
KulczynskiSimilarity(const T1& bv1,
const T2& bv2);
//! returns the Dice similarity between two bit vects
/*!
\return 2*(bv1&bv2)_o / [bv1_o + bv2_o]
*/
template
const double
DiceSimilarity(const T1& bv1,
const T2& bv2);
//! returns the Sokal similarity between two bit vects
/*!
\return (bv1&bv2)_o / [2*bv1_o + 2*bv2_o - 3*(bv1&bv2)_o]
*/
template
const double
SokalSimilarity(const T1& bv1,
const T2& bv2);
//! returns the McConnaughey similarity between two bit vects
/*!
\return [(bv1&bv2)_o * (bv1_o + bv2_o) - (bv1_o * bv2_o)] / (bv1_o * bv2_o)
*/
template
const double
McConnaugheySimilarity(const T1& bv1,
const T2& bv2);
//! returns the Asymmetric similarity between two bit vects
/*!
\return (bv1&bv2)_o / min(bv1_o,bv2_o)
*/
template
const double
AsymmetricSimilarity(const T1& bv1,
const T2& bv2);
//! returns the Braun-Blanquet similarity between two bit vects
/*!
\return (bv1&bv2)_o / max(bv1_o,bv2_o)
*/
template
const double
BraunBlanquetSimilarity(const T1& bv1,
const T2& bv2);
//! returns the Russel similarity between two bit vects
/*!
\return (bv1&bv2)_o / bv1_o
Note: that this operation is non-commutative:
RusselSimilarity(bv1,bv2) != RusselSimilarity(bv2,bv1)
*/
template
const double
RusselSimilarity(const T1& bv1,
const T2& bv2);
//! returns the on bit similarity between two bit vects
/*!
\return (bv1&bv2)_o / (bv1|bv2)_o
*/
template
const double
OnBitSimilarity(const T1& bv1,const T2& bv2);
//! returns the number of common bits (on and off) between two bit vects
/*!
\return bv1_n - (bv1^bv2)_o
*/
template
const int
NumBitsInCommon(const T1& bv1,const T2& bv2);
//! returns the commong-bit similarity (on and off) between two bit vects
/*!
\return [bv1_n - (bv1^bv2)_o] / bv1_n
*/
template
const double
AllBitSimilarity(const T1& bv1,const T2& bv2);
//! returns an IntVect with indices of all on bits in common between two bit vects
template
IntVect
OnBitsInCommon(const T1& bv1,const T2& bv2);
//! returns an IntVect with indices of all off bits in common between two bit vects
template
IntVect
OffBitsInCommon(const T1& bv1,const T2& bv2);
//! returns the on-bit projected similarities between two bit vects
/*!
\return two values, as a DoubleVect:
- (bv1&bv2)_o / bv1_o
- (bv1&bv2)_o / bv2_o
*/
template
DoubleVect
OnBitProjSimilarity(const T1& bv1,const T2& bv2);
//! returns the on-bit projected similarities between two bit vects
/*!
\return two values, as a DoubleVect:
- [bv1_n - (bv1|bv2)_o] / [bv1_n - bv1_o]
- [bv2_n - (bv1|bv2)_o] / [bv2_n - bv2_o]
Note: bv1_n = bv2_n
*/
template
DoubleVect
OffBitProjSimilarity(const T1& bv1,const T2& bv2);
//! folds a bit vector \c factor times and returns the result
/*!
\param bv1 the vector to be folded
\param factor (optional) the number of times to fold it
\return a pointer to the folded fingerprint, which is
bv1_n/factor long.
Note: The caller is responsible for deleteing the result.
*/
template
T1 *
FoldFingerprint(const T1& bv1,unsigned int factor=2);
//! returns a text representation of a bit vector (a string of 0s and 1s)
/*!
\param bv1 the vector to be folded
\return an std::string
*/
template
std::string
BitVectToText(const T1& bv1);
#endif