first pass, using google style

2026-06-03 21:44:30 +08:00 · 2015-11-14 14:58:11 +01:00
parent 80bb809b31
commit e08e0d16d8
619 changed files with 138877 additions and 133381 deletions
--- a/Code/ML/Cluster/Murtagh/Clustering.cpp
+++ b/Code/ML/Cluster/Murtagh/Clustering.cpp
@@ -20,11 +20,9 @@ namespace python = boost::python;

 typedef double real;

-extern "C"
-void distdriver_(boost::int64_t *n,boost::int64_t *len,
-		 real *dists,
-		 boost::int64_t *toggle,
-		 boost::int64_t *ia,boost::int64_t *ib,real *crit);
+extern "C" void distdriver_(boost::int64_t *n, boost::int64_t *len, real *dists,
+                            boost::int64_t *toggle, boost::int64_t *ia,
+                            boost::int64_t *ib, real *crit);

 //
 // Rather than deal with any nonsense like trying to get
@@ -32,54 +30,53 @@ void distdriver_(boost::int64_t *n,boost::int64_t *len,
 // (thus drowning in the waves of f2c hate), we'll generate
 // the distance matrix on our own here and then call distdriver_
 //
-void clusterit(real *dataP,boost::int64_t n,boost::int64_t m,boost::int64_t iopt,
-	       boost::int64_t *ia,boost::int64_t *ib,real *crit){
+void clusterit(real *dataP, boost::int64_t n, boost::int64_t m,
+               boost::int64_t iopt, boost::int64_t *ia, boost::int64_t *ib,
+               real *crit) {
  real *dists;
  boost::int64_t len;
  boost::int64_t pos = 0;
-  boost::int64_t i,j,k,iTab,jTab;
+  boost::int64_t i, j, k, iTab, jTab;
  double tmp;
-  len = (n*(n-1))/2;
-  dists = (real *)calloc(len,sizeof(real));
-  for(i=1;i<n;i++){
-    iTab = i*m;
-    for(j=0;j<i;j++){
-      jTab = j*m;
-      for(k=0;k<m;k++){
-	tmp = dataP[iTab+k]-dataP[jTab+k];
-	dists[pos] += tmp*tmp;
+  len = (n * (n - 1)) / 2;
+  dists = (real *)calloc(len, sizeof(real));
+  for (i = 1; i < n; i++) {
+    iTab = i * m;
+    for (j = 0; j < i; j++) {
+      jTab = j * m;
+      for (k = 0; k < m; k++) {
+        tmp = dataP[iTab + k] - dataP[jTab + k];
+        dists[pos] += tmp * tmp;
      }
      pos++;
    }
  }
-  distdriver_(&n,&len,dists,&iopt,ia,ib,crit);
+  distdriver_(&n, &len, dists, &iopt, ia, ib, crit);
  free(dists);
 };

-static PyObject *
-Clustering_MurtaghCluster(python::object data, int nPts, int sz, int option)
-{
+static PyObject *Clustering_MurtaghCluster(python::object data, int nPts,
+                                           int sz, int option) {
  PyArrayObject *dataContig;
-  boost::int64_t *ia,*ib;
+  boost::int64_t *ia, *ib;
  real *crit;
  PyObject *res;
  PyObject *tmp;
  npy_intp dims[2];

  if (PyArray_Check(data.ptr())) {
-    dataContig 
-      = reinterpret_cast<PyArrayObject *>(PyArray_ContiguousFromObject(data.ptr(),PyArray_DOUBLE,2,2));
-  }
-  else {
+    dataContig = reinterpret_cast<PyArrayObject *>(
+        PyArray_ContiguousFromObject(data.ptr(), PyArray_DOUBLE, 2, 2));
+  } else {
    throw_value_error("PyArray_Type expected as input");
    return NULL;
  }

-  ia = (boost::int64_t *)calloc(nPts,sizeof(boost::int64_t));
-  ib = (boost::int64_t *)calloc(nPts,sizeof(boost::int64_t));
-  crit = (real *)calloc(nPts,sizeof(real));
+  ia = (boost::int64_t *)calloc(nPts, sizeof(boost::int64_t));
+  ib = (boost::int64_t *)calloc(nPts, sizeof(boost::int64_t));
+  crit = (real *)calloc(nPts, sizeof(real));

-  clusterit((real *)dataContig->data,nPts,sz,option,ia,ib,crit);
+  clusterit((real *)dataContig->data, nPts, sz, option, ia, ib, crit);

  dims[0] = nPts;
  res = PyTuple_New(3);
@@ -88,52 +85,47 @@ Clustering_MurtaghCluster(python::object data, int nPts, int sz, int option)
  //  that's why it's ok that we do not free them in this function,
  //  Python will take care of it for us.
  //
-  tmp = PyArray_SimpleNewFromData(1,dims,NPY_LONG,(void *)ia);
-  PyTuple_SetItem(res,0,(PyObject *)tmp);
+  tmp = PyArray_SimpleNewFromData(1, dims, NPY_LONG, (void *)ia);
+  PyTuple_SetItem(res, 0, (PyObject *)tmp);

-  tmp = PyArray_SimpleNewFromData(1,dims,NPY_LONG,(void *)ib);
-  PyTuple_SetItem(res,1,(PyObject *)tmp);
+  tmp = PyArray_SimpleNewFromData(1, dims, NPY_LONG, (void *)ib);
+  PyTuple_SetItem(res, 1, (PyObject *)tmp);

-  tmp = PyArray_SimpleNewFromData(1,dims,NPY_DOUBLE,(void *)crit);
-  PyTuple_SetItem(res,2,(PyObject *)tmp);
+  tmp = PyArray_SimpleNewFromData(1, dims, NPY_DOUBLE, (void *)crit);
+  PyTuple_SetItem(res, 2, (PyObject *)tmp);

  return res;
 };

-
-
-void distclusterit(real *dists,boost::int64_t n,boost::int64_t iopt,
-		   boost::int64_t *ia,boost::int64_t *ib,real *crit){
+void distclusterit(real *dists, boost::int64_t n, boost::int64_t iopt,
+                   boost::int64_t *ia, boost::int64_t *ib, real *crit) {
  boost::int64_t len;

-  len = (n*(n-1))/2;
-  distdriver_(&n,&len,dists,&iopt,ia,ib,crit);
+  len = (n * (n - 1)) / 2;
+  distdriver_(&n, &len, dists, &iopt, ia, ib, crit);
 };

-
-static PyObject *
-Clustering_MurtaghDistCluster(python::object data, int nPts, int option)
-{
+static PyObject *Clustering_MurtaghDistCluster(python::object data, int nPts,
+                                               int option) {
  PyArrayObject *dataContig;
-  boost::int64_t *ia,*ib;
+  boost::int64_t *ia, *ib;
  real *crit;
-  PyObject *res=PyTuple_New(3);
+  PyObject *res = PyTuple_New(3);
  PyObject *tmp;
  npy_intp dims[] = {1};

  if (PyArray_Check(data.ptr())) {
-    dataContig 
-      = reinterpret_cast<PyArrayObject *>(PyArray_ContiguousFromObject(data.ptr(),PyArray_DOUBLE,1,1));
-  }
-  else {
+    dataContig = reinterpret_cast<PyArrayObject *>(
+        PyArray_ContiguousFromObject(data.ptr(), PyArray_DOUBLE, 1, 1));
+  } else {
    throw_value_error("PyArray_Type expected as input");
    return NULL;
  }

-  ia = (boost::int64_t *)calloc(nPts,sizeof(boost::int64_t));
-  ib = (boost::int64_t *)calloc(nPts,sizeof(boost::int64_t));
-  crit = (real *)calloc(nPts,sizeof(real));
-  distclusterit((real *)dataContig->data,nPts,option,ia,ib,crit);
+  ia = (boost::int64_t *)calloc(nPts, sizeof(boost::int64_t));
+  ib = (boost::int64_t *)calloc(nPts, sizeof(boost::int64_t));
+  crit = (real *)calloc(nPts, sizeof(real));
+  distclusterit((real *)dataContig->data, nPts, option, ia, ib, crit);

  dims[0] = nPts;

@@ -142,30 +134,26 @@ Clustering_MurtaghDistCluster(python::object data, int nPts, int option)
  //  that's why it's ok that we do not free them in this function,
  //  Python will take care of it for us.
  //
-  tmp = PyArray_SimpleNewFromData(1,dims,NPY_LONG,(void *)ia);
-  PyTuple_SetItem(res,0,tmp);
+  tmp = PyArray_SimpleNewFromData(1, dims, NPY_LONG, (void *)ia);
+  PyTuple_SetItem(res, 0, tmp);

-  tmp = PyArray_SimpleNewFromData(1,dims,NPY_LONG,(void *)ib);
-  PyTuple_SetItem(res,1,tmp);
+  tmp = PyArray_SimpleNewFromData(1, dims, NPY_LONG, (void *)ib);
+  PyTuple_SetItem(res, 1, tmp);
+
+  tmp = PyArray_SimpleNewFromData(1, dims, NPY_DOUBLE, (void *)crit);
+  PyTuple_SetItem(res, 2, tmp);

-  tmp = PyArray_SimpleNewFromData(1,dims,NPY_DOUBLE,(void *)crit);
-  PyTuple_SetItem(res,2,tmp);
-  
  return res;
 };

-
 BOOST_PYTHON_MODULE(Clustering) {
-
  rdkit_import_array();

  python::def("MurtaghCluster", Clustering_MurtaghCluster,
-	      ( python::arg("data"), python::arg("nPts"), 
-		python::arg("sz"), python::arg("option") ),
-	      "TODO: provide docstring");
+              (python::arg("data"), python::arg("nPts"), python::arg("sz"),
+               python::arg("option")),
+              "TODO: provide docstring");
  python::def("MurtaghDistCluster", Clustering_MurtaghDistCluster,
-	      ( python::arg("data"), python::arg("nPts"), 
-		python::arg("option") ),
-	      "TODO: provide docstring");
+              (python::arg("data"), python::arg("nPts"), python::arg("option")),
+              "TODO: provide docstring");
 }
-
--- a/Code/ML/Cluster/Murtagh/f2c.h
+++ b/Code/ML/Cluster/Murtagh/f2c.h
@@ -2,7 +2,7 @@

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

-	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+        - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
@@ -19,11 +19,11 @@ typedef long int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
-#ifdef INTEGER_STAR_8	/* Adjust for integer*8. */
-typedef long long longint;		/* system-dependent */
-typedef unsigned long long ulongint;	/* system-dependent */
-#define qbit_clear(a,b)	((a) & ~((ulongint)1 << (b)))
-#define qbit_set(a,b)	((a) |  ((ulongint)1 << (b)))
+#ifdef INTEGER_STAR_8                /* Adjust for integer*8. */
+typedef long long longint;           /* system-dependent */
+typedef unsigned long long ulongint; /* system-dependent */
+#define qbit_clear(a, b) ((a) & ~((ulongint)1 << (b)))
+#define qbit_set(a, b) ((a) | ((ulongint)1 << (b)))
 #endif

 #define TRUE_ (1)
@@ -48,121 +48,121 @@ typedef long int ftnint;
 #endif

 /*external read, write*/
-typedef struct
-{	flag cierr;
-	ftnint ciunit;
-	flag ciend;
-	char *cifmt;
-	ftnint cirec;
+typedef struct {
+  flag cierr;
+  ftnint ciunit;
+  flag ciend;
+  char *cifmt;
+  ftnint cirec;
 } cilist;

 /*internal read, write*/
-typedef struct
-{	flag icierr;
-	char *iciunit;
-	flag iciend;
-	char *icifmt;
-	ftnint icirlen;
-	ftnint icirnum;
+typedef struct {
+  flag icierr;
+  char *iciunit;
+  flag iciend;
+  char *icifmt;
+  ftnint icirlen;
+  ftnint icirnum;
 } icilist;

 /*open*/
-typedef struct
-{	flag oerr;
-	ftnint ounit;
-	char *ofnm;
-	ftnlen ofnmlen;
-	char *osta;
-	char *oacc;
-	char *ofm;
-	ftnint orl;
-	char *oblnk;
+typedef struct {
+  flag oerr;
+  ftnint ounit;
+  char *ofnm;
+  ftnlen ofnmlen;
+  char *osta;
+  char *oacc;
+  char *ofm;
+  ftnint orl;
+  char *oblnk;
 } olist;

 /*close*/
-typedef struct
-{	flag cerr;
-	ftnint cunit;
-	char *csta;
+typedef struct {
+  flag cerr;
+  ftnint cunit;
+  char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
-typedef struct
-{	flag aerr;
-	ftnint aunit;
+typedef struct {
+  flag aerr;
+  ftnint aunit;
 } alist;

 /* inquire */
-typedef struct
-{	flag inerr;
-	ftnint inunit;
-	char *infile;
-	ftnlen infilen;
-	ftnint	*inex;	/*parameters in standard's order*/
-	ftnint	*inopen;
-	ftnint	*innum;
-	ftnint	*innamed;
-	char	*inname;
-	ftnlen	innamlen;
-	char	*inacc;
-	ftnlen	inacclen;
-	char	*inseq;
-	ftnlen	inseqlen;
-	char 	*indir;
-	ftnlen	indirlen;
-	char	*infmt;
-	ftnlen	infmtlen;
-	char	*inform;
-	ftnint	informlen;
-	char	*inunf;
-	ftnlen	inunflen;
-	ftnint	*inrecl;
-	ftnint	*innrec;
-	char	*inblank;
-	ftnlen	inblanklen;
+typedef struct {
+  flag inerr;
+  ftnint inunit;
+  char *infile;
+  ftnlen infilen;
+  ftnint *inex; /*parameters in standard's order*/
+  ftnint *inopen;
+  ftnint *innum;
+  ftnint *innamed;
+  char *inname;
+  ftnlen innamlen;
+  char *inacc;
+  ftnlen inacclen;
+  char *inseq;
+  ftnlen inseqlen;
+  char *indir;
+  ftnlen indirlen;
+  char *infmt;
+  ftnlen infmtlen;
+  char *inform;
+  ftnint informlen;
+  char *inunf;
+  ftnlen inunflen;
+  ftnint *inrecl;
+  ftnint *innrec;
+  char *inblank;
+  ftnlen inblanklen;
 } inlist;

 #define VOID void

-union Multitype {	/* for multiple entry points */
-	integer1 g;
-	shortint h;
-	integer i;
-	/* longint j; */
-	real r;
-	doublereal d;
-	complex c;
-	doublecomplex z;
-	};
+union Multitype {/* for multiple entry points */
+  integer1 g;
+  shortint h;
+  integer i;
+  /* longint j; */
+  real r;
+  doublereal d;
+  complex c;
+  doublecomplex z;
+};

 typedef union Multitype Multitype;

-/*typedef long int Long;*/	/* No longer used; formerly in Namelist */
+/*typedef long int Long;*/ /* No longer used; formerly in Namelist */

-struct Vardesc {	/* for Namelist */
-	char *name;
-	char *addr;
-	ftnlen *dims;
-	int  type;
-	};
+struct Vardesc {/* for Namelist */
+  char *name;
+  char *addr;
+  ftnlen *dims;
+  int type;
+};
 typedef struct Vardesc Vardesc;

 struct Namelist {
-	char *name;
-	Vardesc **vars;
-	int nvars;
-	};
+  char *name;
+  Vardesc **vars;
+  int nvars;
+};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
-#define dabs(x) (doublereal)abs(x)
-#define min(a,b) ((a) <= (b) ? (a) : (b))
-#define max(a,b) ((a) >= (b) ? (a) : (b))
-#define dmin(a,b) (doublereal)min(a,b)
-#define dmax(a,b) (doublereal)max(a,b)
-#define bit_test(a,b)	((a) >> (b) & 1)
-#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
-#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+#define dabs(x) (doublereal) abs(x)
+#define min(a, b) ((a) <= (b) ? (a) : (b))
+#define max(a, b) ((a) >= (b) ? (a) : (b))
+#define dmin(a, b) (doublereal) min(a, b)
+#define dmax(a, b) (doublereal) max(a, b)
+#define bit_test(a, b) ((a) >> (b)&1)
+#define bit_clear(a, b) ((a) & ~((uinteger)1 << (b)))
+#define bit_set(a, b) ((a) | ((uinteger)1 << (b)))

 /* procedure parameter types for -A and -C++ */

@@ -193,10 +193,10 @@ typedef /* Character */ VOID (*H_fp)();
 typedef /* Subroutine */ int (*S_fp)();
 #endif
 /* E_fp is for real functions when -R is not specified */
-typedef VOID C_f;	/* complex function */
-typedef VOID H_f;	/* character function */
-typedef VOID Z_f;	/* double complex function */
-typedef doublereal E_f;	/* real function with -R not specified */
+typedef VOID C_f;       /* complex function */
+typedef VOID H_f;       /* character function */
+typedef VOID Z_f;       /* double complex function */
+typedef doublereal E_f; /* real function with -R not specified */

 /* undef any lower-case symbols that your C compiler predefines, e.g.: */

--- a/Code/ML/Data/cQuantize.cpp
+++ b/Code/ML/Data/cQuantize.cpp
@@ -20,10 +20,11 @@ namespace python = boost::python;
 /***********************************************

   constructs a variable table for the data passed in
-   The table for a given variable records the number of times each possible value
+   The table for a given variable records the number of times each possible
+ value
    of that variable appears for each possible result of the function.

-  **Arguments**  
+  **Arguments**

   - vals: pointer to double, contains the values of the variable,
     should be sorted
@@ -34,14 +35,15 @@ namespace python = boost::python;

   - nCuts: int, the length of _cuts_

-   - starts: pointer to int, the potential starting points for quantization bounds
+   - starts: pointer to int, the potential starting points for quantization
+ bounds

   - nStarts: int, the length of _starts_

   - results: poitner to int, the result codes

   - nPossibleRes: int, the number of possible result codes
- 
+

  **Returns**

@@ -54,30 +56,29 @@ namespace python = boost::python;
    - the _results_ array is assumed to be _nVals_ long

 ***********************************************/
-long int *
-GenVarTable(double *vals,int nVals,long int *cuts,int nCuts,long int *starts,
-	    long int *results,int nPossibleRes,long int *varTable)
-{
+long int *GenVarTable(double *vals, int nVals, long int *cuts, int nCuts,
+                      long int *starts, long int *results, int nPossibleRes,
+                      long int *varTable) {
  RDUNUSED_PARAM(vals);
  int nBins = nCuts + 1;
-  int idx,i,iTab;
+  int idx, i, iTab;

-  memset(varTable,0,nBins*nPossibleRes*sizeof(long int));
+  memset(varTable, 0, nBins * nPossibleRes * sizeof(long int));
  idx = 0;
-  for(i=0;i<nCuts;i++){
+  for (i = 0; i < nCuts; i++) {
    int cut = cuts[i];
-    iTab = i*nPossibleRes;
-    while(idx<starts[cut]){
-      varTable[iTab+results[idx]] += 1;
+    iTab = i * nPossibleRes;
+    while (idx < starts[cut]) {
+      varTable[iTab + results[idx]] += 1;
      idx++;
    }
  }
-  iTab = nCuts*nPossibleRes;
-  while(idx<nVals){
-    varTable[iTab+results[idx]] += 1;
+  iTab = nCuts * nPossibleRes;
+  while (idx < nVals) {
+    varTable[iTab + results[idx]] += 1;
    idx++;
  }
-  return varTable;  
+  return varTable;
 }

 /***********************************************
@@ -86,7 +87,7 @@ GenVarTable(double *vals,int nVals,long int *cuts,int nCuts,long int *starts,
  we do things this way to avoid having to convert things back and forth
  from Python objects

-  **Arguments**  
+  **Arguments**

   - vals: pointer to double, contains the values of the variable,
     should be sorted
@@ -99,14 +100,15 @@ GenVarTable(double *vals,int nVals,long int *cuts,int nCuts,long int *starts,

   - which: int, the quant bound being modified here

-   - starts: pointer to int, the potential starting points for quantization bounds
+   - starts: pointer to int, the potential starting points for quantization
+ bounds

   - nStarts: int, the length of _starts_

   - results: poitner to int, the result codes

   - nPossibleRes: int, the number of possible result codes
- 
+

  **Returns**

@@ -120,66 +122,65 @@ GenVarTable(double *vals,int nVals,long int *cuts,int nCuts,long int *starts,
    - the _results_ array is assumed to be _nVals_ long

 ***********************************************/
-double
-RecurseHelper(double *vals,int nVals,long int *cuts,int nCuts,int which,
-	      long int *starts,int nStarts,long int *results,int nPossibleRes)
-{
-  double maxGain=-1e6,gainHere;
-  long int *bestCuts,*tCuts;
-  long int *varTable=0;
+double RecurseHelper(double *vals, int nVals, long int *cuts, int nCuts,
+                     int which, long int *starts, int nStarts,
+                     long int *results, int nPossibleRes) {
+  double maxGain = -1e6, gainHere;
+  long int *bestCuts, *tCuts;
+  long int *varTable = 0;
  int highestCutHere = nStarts - nCuts + which;
-  int i,nBounds=nCuts;
-  
-  varTable = (long int *)calloc((nCuts+1)*nPossibleRes,sizeof(long int));
-  bestCuts = (long int *)calloc(nCuts,sizeof(long int));
-  tCuts = (long int *)calloc(nCuts,sizeof(long int));
-  GenVarTable(vals,nVals,cuts,nCuts,starts,results,nPossibleRes,varTable);
-  while(cuts[which] <= highestCutHere){
-    gainHere = RDInfoTheory::InfoEntropyGain(varTable,nCuts+1,nPossibleRes);
-    if(gainHere > maxGain){
+  int i, nBounds = nCuts;
+
+  varTable = (long int *)calloc((nCuts + 1) * nPossibleRes, sizeof(long int));
+  bestCuts = (long int *)calloc(nCuts, sizeof(long int));
+  tCuts = (long int *)calloc(nCuts, sizeof(long int));
+  GenVarTable(vals, nVals, cuts, nCuts, starts, results, nPossibleRes,
+              varTable);
+  while (cuts[which] <= highestCutHere) {
+    gainHere = RDInfoTheory::InfoEntropyGain(varTable, nCuts + 1, nPossibleRes);
+    if (gainHere > maxGain) {
      maxGain = gainHere;
-      memcpy(bestCuts,cuts,nCuts*sizeof(long int));
+      memcpy(bestCuts, cuts, nCuts * sizeof(long int));
    }

    // recurse on the next vars if needed
-    if(which < nBounds-1){
-      memcpy(tCuts,cuts,nCuts*sizeof(long int));
-      gainHere = RecurseHelper(vals,nVals,tCuts,nCuts,which+1,starts,nStarts,
-			       results,nPossibleRes);
-      if(gainHere > maxGain){
+    if (which < nBounds - 1) {
+      memcpy(tCuts, cuts, nCuts * sizeof(long int));
+      gainHere = RecurseHelper(vals, nVals, tCuts, nCuts, which + 1, starts,
+                               nStarts, results, nPossibleRes);
+      if (gainHere > maxGain) {
        maxGain = gainHere;
-	memcpy(bestCuts,tCuts,nCuts*sizeof(long int));
+        memcpy(bestCuts, tCuts, nCuts * sizeof(long int));
      }
    }

    // update this cut
    int oldCut = cuts[which];
    cuts[which] += 1;
-    int top,bot;
+    int top, bot;
    bot = starts[oldCut];
-    if(oldCut+1 < nStarts)
-      top = starts[oldCut+1];
+    if (oldCut + 1 < nStarts)
+      top = starts[oldCut + 1];
    else
-      top = starts[nStarts-1];
-    for(i=bot;i<top;i++) {
-      int v=results[i];
-      varTable[which*nPossibleRes+v] += 1;
-      varTable[(which+1)*nPossibleRes+v] -= 1;
+      top = starts[nStarts - 1];
+    for (i = bot; i < top; i++) {
+      int v = results[i];
+      varTable[which * nPossibleRes + v] += 1;
+      varTable[(which + 1) * nPossibleRes + v] -= 1;
    }
-    for(i=which+1;i<nBounds;i++){
-      if(cuts[i] == cuts[i-1]) cuts[i] += 1;
+    for (i = which + 1; i < nBounds; i++) {
+      if (cuts[i] == cuts[i - 1]) cuts[i] += 1;
    }
  }
-  memcpy(cuts,bestCuts,nCuts*sizeof(long int));
+  memcpy(cuts, bestCuts, nCuts * sizeof(long int));
  free(tCuts);
  free(bestCuts);
  free(varTable);
  return maxGain;
 }

-
 /***********************************************
- 
+
   Recursively finds the best quantization boundaries

   **Arguments**
@@ -206,21 +207,22 @@ RecurseHelper(double *vals,int nVals,long int *cuts,int nCuts,int which,
       1) the best information gain found so far

       2) a list of the quantization bound indices ( _cuts_ for the best case)
-   
+
   **Notes**

    - this is not even remotely efficient, which is why a C replacement
      was written

    - this is a drop-in replacement for *ML.Data.Quantize._PyRecurseBounds*
-						
+
 ***********************************************/
-static python::tuple
-cQuantize_RecurseOnBounds(python::object vals, python::list pyCuts, int which, 
-			  python::list pyStarts, python::object results, int nPossibleRes)
-{
-  PyArrayObject *contigVals,*contigResults;
-  long int *cuts,*starts;
+static python::tuple cQuantize_RecurseOnBounds(python::object vals,
+                                               python::list pyCuts, int which,
+                                               python::list pyStarts,
+                                               python::object results,
+                                               int nPossibleRes) {
+  PyArrayObject *contigVals, *contigResults;
+  long int *cuts, *starts;

  /*
    -------
@@ -229,38 +231,37 @@ cQuantize_RecurseOnBounds(python::object vals, python::list pyCuts, int which,

    -------
  */
-  contigVals 
-    = reinterpret_cast<PyArrayObject *>(PyArray_ContiguousFromObject(vals.ptr(),PyArray_DOUBLE,1,1));
-  if(!contigVals){
+  contigVals = reinterpret_cast<PyArrayObject *>(
+      PyArray_ContiguousFromObject(vals.ptr(), PyArray_DOUBLE, 1, 1));
+  if (!contigVals) {
    throw_value_error("could not convert value argument");
  }

-  contigResults 
-    = reinterpret_cast<PyArrayObject *>(PyArray_ContiguousFromObject(results.ptr(),PyArray_LONG,1,1));
-  if(!contigResults){
+  contigResults = reinterpret_cast<PyArrayObject *>(
+      PyArray_ContiguousFromObject(results.ptr(), PyArray_LONG, 1, 1));
+  if (!contigResults) {
    throw_value_error("could not convert results argument");
  }

  python::ssize_t nCuts = python::len(pyCuts);
-  cuts = (long int *)calloc(nCuts,sizeof(long int));
-  for (python::ssize_t i=0; i<nCuts; i++) {
+  cuts = (long int *)calloc(nCuts, sizeof(long int));
+  for (python::ssize_t i = 0; i < nCuts; i++) {
    python::object elem = pyCuts[i];
    cuts[i] = python::extract<long int>(elem);
  }

  python::ssize_t nStarts = python::len(pyStarts);
-  starts = (long int *)calloc(nStarts,sizeof(long int));
-  for (python::ssize_t i=0; i<nStarts; i++){
+  starts = (long int *)calloc(nStarts, sizeof(long int));
+  for (python::ssize_t i = 0; i < nStarts; i++) {
    python::object elem = pyStarts[i];
    starts[i] = python::extract<long int>(elem);
  }

  // do the real work
-  double gain 
-    = RecurseHelper((double *)contigVals->data,contigVals->dimensions[0],
-		    cuts,nCuts,which,starts,nStarts,
-		    (long int *)contigResults->data,nPossibleRes);
-		       
+  double gain = RecurseHelper(
+      (double *)contigVals->data, contigVals->dimensions[0], cuts, nCuts, which,
+      starts, nStarts, (long int *)contigResults->data, nPossibleRes);
+
  /*
    -------

@@ -269,72 +270,71 @@ cQuantize_RecurseOnBounds(python::object vals, python::list pyCuts, int which,
    -------
  */
  python::list cutObj;
-  for (python::ssize_t i=0; i<nCuts; i++) {
+  for (python::ssize_t i = 0; i < nCuts; i++) {
    cutObj.append(cuts[i]);
  }
  free(cuts);
  free(starts);
-  return python::make_tuple(gain, cutObj); 
+  return python::make_tuple(gain, cutObj);
 }

-static python::list
-cQuantize_FindStartPoints(python::object values, python::object results, 
-			  int nData)
-{
+static python::list cQuantize_FindStartPoints(python::object values,
+                                              python::object results,
+                                              int nData) {
  python::list startPts;

-  if(nData<2){
+  if (nData < 2) {
    return startPts;
  }

-  PyArrayObject *contigVals 
-    = reinterpret_cast<PyArrayObject *>(PyArray_ContiguousFromObject(values.ptr(),PyArray_DOUBLE,1,1));
-  if(!contigVals){
+  PyArrayObject *contigVals = reinterpret_cast<PyArrayObject *>(
+      PyArray_ContiguousFromObject(values.ptr(), PyArray_DOUBLE, 1, 1));
+  if (!contigVals) {
    throw_value_error("could not convert value argument");
  }

-  double *vals=(double *)contigVals->data;
+  double *vals = (double *)contigVals->data;

-  PyArrayObject *contigResults 
-    = reinterpret_cast<PyArrayObject *>(PyArray_ContiguousFromObject(results.ptr(),PyArray_LONG,1,1));
-  if(!contigResults){
+  PyArrayObject *contigResults = reinterpret_cast<PyArrayObject *>(
+      PyArray_ContiguousFromObject(results.ptr(), PyArray_LONG, 1, 1));
+  if (!contigResults) {
    throw_value_error("could not convert results argument");
  }

-  long *res=(long *)contigResults->data;
+  long *res = (long *)contigResults->data;

-  bool firstBlock=true;
-  long lastBlockAct=-2,blockAct=res[0];
-  int lastDiv=-1;
-  double tol=1e-8;
+  bool firstBlock = true;
+  long lastBlockAct = -2, blockAct = res[0];
+  int lastDiv = -1;
+  double tol = 1e-8;

-  int i=1;
-  while(i<nData){
-    while(i<nData && vals[i]-vals[i-1]<=tol){
-      if(res[i]!=blockAct){
-        blockAct=-1;
+  int i = 1;
+  while (i < nData) {
+    while (i < nData && vals[i] - vals[i - 1] <= tol) {
+      if (res[i] != blockAct) {
+        blockAct = -1;
      }
      ++i;
    }
-    if(firstBlock){
-      firstBlock=false;
-      lastBlockAct=blockAct;
-      lastDiv=i;
+    if (firstBlock) {
+      firstBlock = false;
+      lastBlockAct = blockAct;
+      lastDiv = i;
    } else {
-      if(blockAct==-1 || lastBlockAct==-1 || blockAct!=lastBlockAct){
-	startPts.append(lastDiv);
-        lastDiv=i;
-        lastBlockAct=blockAct;
+      if (blockAct == -1 || lastBlockAct == -1 || blockAct != lastBlockAct) {
+        startPts.append(lastDiv);
+        lastDiv = i;
+        lastBlockAct = blockAct;
      } else {
-        lastDiv=i;
+        lastDiv = i;
      }
    }
-    if(i<nData) blockAct=res[i];
-    ++i; 
+    if (i < nData) blockAct = res[i];
+    ++i;
  }

  // catch the case that the last point also sets a bin:
-  if( blockAct != lastBlockAct ){
+  if (blockAct != lastBlockAct) {
    startPts.append(lastDiv);
  }

@@ -342,19 +342,15 @@ cQuantize_FindStartPoints(python::object values, python::object results,
 }

 BOOST_PYTHON_MODULE(cQuantize) {
-
  rdkit_import_array();

  python::def("_RecurseOnBounds", cQuantize_RecurseOnBounds,
-	      ( python::arg("vals"), python::arg("pyCuts"), 
-		python::arg("which"), python::arg("pyStarts"), 
-		python::arg("results"), python::arg("nPossibleRes") ),
-	      "TODO: provide docstring");
-  python::def("_FindStartPoints", cQuantize_FindStartPoints,
-	      ( python::arg("values"), python::arg("results"), 
-		python::arg("nData") ),
-	      "TODO: provide docstring");
+              (python::arg("vals"), python::arg("pyCuts"), python::arg("which"),
+               python::arg("pyStarts"), python::arg("results"),
+               python::arg("nPossibleRes")),
+              "TODO: provide docstring");
+  python::def(
+      "_FindStartPoints", cQuantize_FindStartPoints,
+      (python::arg("values"), python::arg("results"), python::arg("nData")),
+      "TODO: provide docstring");
 }
-
-
-
--- a/Code/ML/InfoTheory/CorrMatGenerator.h
+++ b/Code/ML/InfoTheory/CorrMatGenerator.h
@@ -15,105 +15,100 @@
 #include <boost/dynamic_bitset.hpp>

 namespace RDInfoTheory {
-  //FIX: won't worry about it now, but this class can be templated by the type of 
-  // container for the bit list and type of descriptors (fingerprint vs. real valued)
-  class BitCorrMatGenerator {
-    /*! \brief A class to generate a correlation matrix for a bunch of fingerprints
-     *
-     *  The correlation matrix is done only for the bit IDs that are set by a call to the 
-     *  function setDescriptorIdList
-     *  
-     *    cr = CorrMatGenerator();
-     *    cr.setDescriptorIdList(descList);
-     *    for each fingerprint in list of fingerprints {
-     *        cr.collectVotes(fingerprint);
-     *    }
-     *    double *corrMat = cr.getCorrMat()
-     *  
-     *  The resulting correlation matrix is a one dimension matrix with only the lower triangle elements
-     *  of the symmetric matrix
-     */
-  public:
-    BitCorrMatGenerator() {
-      this->initGenerator();
-    };
+// FIX: won't worry about it now, but this class can be templated by the type of
+// container for the bit list and type of descriptors (fingerprint vs. real
+// valued)
+class BitCorrMatGenerator {
+  /*! \brief A class to generate a correlation matrix for a bunch of
+   *fingerprints
+   *
+   *  The correlation matrix is done only for the bit IDs that are set by a call
+   *to the
+   *  function setDescriptorIdList
+   *
+   *    cr = CorrMatGenerator();
+   *    cr.setDescriptorIdList(descList);
+   *    for each fingerprint in list of fingerprints {
+   *        cr.collectVotes(fingerprint);
+   *    }
+   *    double *corrMat = cr.getCorrMat()
+   *
+   *  The resulting correlation matrix is a one dimension matrix with only the
+   *lower triangle elements
+   *  of the symmetric matrix
+   */
+ public:
+  BitCorrMatGenerator() { this->initGenerator(); };

-    ~BitCorrMatGenerator() {
-      delete [] dp_corrMat;
+  ~BitCorrMatGenerator() { delete[] dp_corrMat; }
+
+  void initGenerator() {
+    dp_corrMat = 0;
+    d_descs.resize(0);
+    d_nExamples = 0;
+  };
+
+  /*! \brief Set the list bits that we are interested in correlating
+   *
+   *  \param bitIdList is a list of bit ids that need to be correlated e.g. a
+   *list top ranked ensemble
+   *  of bits
+   */
+  void setBitIdList(const RDKit::INT_VECT &bitIdList) {
+    d_descs = bitIdList;
+    int i, nd = d_descs.size();
+    int nelem = nd * (nd - 1) / 2;
+    delete[] dp_corrMat;
+
+    dp_corrMat = new double[nd * (nd - 1) / 2];
+    for (i = 0; i < nelem; i++) {
+      dp_corrMat[i] = 0.0;
    }
+  };

-    void initGenerator() {
-      dp_corrMat = 0;
-      d_descs.resize(0);
-      d_nExamples = 0;
-    };
+  //! \brief get the number of examples we used so far to compute the
+  //correlation matrix
+  int getNumExamples() const { return d_nExamples; };

-    /*! \brief Set the list bits that we are interested in correlating
-     *
-     *  \param bitIdList is a list of bit ids that need to be correlated e.g. a list top ranked ensemble 
-     *  of bits 
-     */
-    void setBitIdList(const RDKit::INT_VECT &bitIdList) {
-      d_descs = bitIdList;
-      int i, nd = d_descs.size();
-      int nelem = nd*(nd-1)/2;
-      delete [] dp_corrMat;
+  //! \brief Get the list of bits ID that are used to generate the correlation
+  //matrix
+  RDKit::INT_VECT getCorrBitList() const { return d_descs; };

-      dp_corrMat = new double[nd*(nd-1)/2];
-      for (i = 0; i < nelem; i++) {
-        dp_corrMat[i] = 0.0;
+  //! \brief Gets a pointer to the correlation matrix
+  double *getCorrMat() { return dp_corrMat; };
+
+  //! \brief For each pair of on bits (bi, bj) in fp increase the correlation
+  //count
+  //    for the pair by 1
+  void collectVotes(const BitVect &fp) {
+    unsigned int nd = d_descs.size();
+    // use a temporary bit vector to first mask the fingerprint
+    ExplicitBitVect ebv(nd);
+    int bi;
+    for (unsigned int i = 0; i < nd; i++) {
+      bi = d_descs[i];
+      if (fp[bi]) {
+        ebv.setBit(i);
      }
-    };
-
-    //! \brief get the number of examples we used so far to compute the correlation matrix
-    int getNumExamples() const {
-      return d_nExamples;
-    };
-
-    //! \brief Get the list of bits ID that are used to generate the correlation matrix
-    RDKit::INT_VECT getCorrBitList() const {
-      return d_descs;
-    };
-
-    //! \brief Gets a pointer to the correlation matrix
-    double *getCorrMat() {
-      return dp_corrMat;
-    };
-    
-    //! \brief For each pair of on bits (bi, bj) in fp increase the correlation count
-    //    for the pair by 1
-    void collectVotes(const BitVect &fp) {
-      unsigned int nd = d_descs.size();
-      // use a temporary bit vector to first mask the fingerprint
-      ExplicitBitVect ebv(nd);
-      int bi;
-      for (unsigned int i = 0; i < nd; i++) {
-        bi = d_descs[i];
-        if (fp[bi]) {
-          ebv.setBit(i);
-        }
-      }
-      for (unsigned i = 1; i < nd; i++) {
-        unsigned int itab = i*(i-1)/2;
-        if (ebv[i]) {
-          for (unsigned int j = 0; j < i; j++) {
-            if ( ebv[j]) {
-              dp_corrMat[itab + j] += 1;
-            }
+    }
+    for (unsigned i = 1; i < nd; i++) {
+      unsigned int itab = i * (i - 1) / 2;
+      if (ebv[i]) {
+        for (unsigned int j = 0; j < i; j++) {
+          if (ebv[j]) {
+            dp_corrMat[itab + j] += 1;
          }
        }
      }
-      d_nExamples++;
-    };
-
-  private:
-    RDKit::INT_VECT d_descs;
-    double *dp_corrMat;
-    int d_nExamples;
+    }
+    d_nExamples++;
  };

+ private:
+  RDKit::INT_VECT d_descs;
+  double *dp_corrMat;
+  int d_nExamples;
+};
 }

 #endif
-    
-
--- a/Code/ML/InfoTheory/InfoBitRanker.cpp
+++ b/Code/ML/InfoTheory/InfoBitRanker.cpp
@@ -20,167 +20,168 @@
 #include <queue>

 namespace RDInfoTheory {
-  typedef std::pair<double, int> PAIR_D_I;
-  typedef std::vector<PAIR_D_I> VECT_PDI;
+typedef std::pair<double, int> PAIR_D_I;
+typedef std::vector<PAIR_D_I> VECT_PDI;

-  struct gtDIPair {
-    bool operator() ( const PAIR_D_I &pd1, const PAIR_D_I &pd2) const {
-      return pd1.first > pd2.first;
-    }
-  };
-
-  typedef std::priority_queue<PAIR_D_I, VECT_PDI, gtDIPair> PR_QUEUE;
-
-
-  
-  void InfoBitRanker::setBiasList(RDKit::INT_VECT &classList) {
-    URANGE_CHECK(classList.size(), d_classes);
-    d_biasList = classList;
-    //make sure we don't have any duplicates
-    std::sort(d_biasList.begin(), d_biasList.end());
-    RDKit::INT_VECT_CI bi = std::unique(d_biasList.begin(), d_biasList.end());
-    CHECK_INVARIANT(bi == d_biasList.end(), "There are duplicates in the class bias list");
-
-    // finally make sure all the class ID in d_biasList are within range
-    for (bi = d_biasList.begin(); bi != d_biasList.end(); bi++) {
-      URANGE_CHECK(static_cast<unsigned int>(*bi), d_classes-1);
-    }
+struct gtDIPair {
+  bool operator()(const PAIR_D_I &pd1, const PAIR_D_I &pd2) const {
+    return pd1.first > pd2.first;
  }
+};

-  void InfoBitRanker::setMaskBits(RDKit::INT_VECT &maskBits) {
-    delete dp_maskBits;
-    dp_maskBits = new ExplicitBitVect(d_dims);
-    for (RDKit::INT_VECT_CI bi = maskBits.begin();
-         bi != maskBits.end(); ++bi) {
-      dp_maskBits->setBit(*bi);
-    }
+typedef std::priority_queue<PAIR_D_I, VECT_PDI, gtDIPair> PR_QUEUE;
+
+void InfoBitRanker::setBiasList(RDKit::INT_VECT &classList) {
+  URANGE_CHECK(classList.size(), d_classes);
+  d_biasList = classList;
+  // make sure we don't have any duplicates
+  std::sort(d_biasList.begin(), d_biasList.end());
+  RDKit::INT_VECT_CI bi = std::unique(d_biasList.begin(), d_biasList.end());
+  CHECK_INVARIANT(bi == d_biasList.end(),
+                  "There are duplicates in the class bias list");
+
+  // finally make sure all the class ID in d_biasList are within range
+  for (bi = d_biasList.begin(); bi != d_biasList.end(); bi++) {
+    URANGE_CHECK(static_cast<unsigned int>(*bi), d_classes - 1);
  }
+}

-  bool InfoBitRanker::BiasCheckBit(RDKit::USHORT *resMat) const {
-    PRECONDITION(resMat,"bad results pointer");
-    if ((d_biasList.size() == 0) || (d_biasList.size() == d_classes)) {
-      //we will accept the bit 
-      return true;
-    }
-    RDKit::DOUBLE_VECT fracs;
-    fracs.resize(d_classes);
-
-    // compute the fractions of items in each class that hit the bit
-    // and record the maximum for the those classes not in the bias list
-    double maxCor = 0.0;
-    for (unsigned int i = 0; i < d_classes; i++) {
-      if (d_clsCount[i] > 0) {
-        fracs[i] = ((double)resMat[i])/d_clsCount[i];
-      } else {
-        fracs[i] = 0.0;
-      }
-      if (std::find(d_biasList.begin(), d_biasList.end(), i) == d_biasList.end()) {
-        // if not in the biasList
-        if (fracs[i] > maxCor) {
-          // if this is fraction is greater than the previously known maximum
-          maxCor = fracs[i];
-        }
-      }
-    }
-
-    bool bitOk = false;
-    for (RDKit::INT_VECT_CI bci = d_biasList.begin(); bci !=
-           d_biasList.end(); ++bci) {
-      if (fracs[*bci] >= maxCor) {
-        bitOk = true;
-        break;
-      }
-    }
-    return bitOk;
+void InfoBitRanker::setMaskBits(RDKit::INT_VECT &maskBits) {
+  delete dp_maskBits;
+  dp_maskBits = new ExplicitBitVect(d_dims);
+  for (RDKit::INT_VECT_CI bi = maskBits.begin(); bi != maskBits.end(); ++bi) {
+    dp_maskBits->setBit(*bi);
  }
+}

-      
-  double InfoBitRanker::BiasChiSquareGain(RDKit::USHORT *resMat) const {
-    PRECONDITION(resMat,"bad result pointer");
-    bool bitOk = this->BiasCheckBit(resMat);
-    double info=0.0;
-    if (bitOk) {
-      info = ChiSquare(resMat, 2, d_classes);
-    }
-    return info;
+bool InfoBitRanker::BiasCheckBit(RDKit::USHORT *resMat) const {
+  PRECONDITION(resMat, "bad results pointer");
+  if ((d_biasList.size() == 0) || (d_biasList.size() == d_classes)) {
+    // we will accept the bit
+    return true;
  }
+  RDKit::DOUBLE_VECT fracs;
+  fracs.resize(d_classes);

-  double InfoBitRanker::BiasInfoEntropyGain(RDKit::USHORT *resMat) const {
-    PRECONDITION(resMat,"bad result pointer");
-    bool bitOk = this->BiasCheckBit(resMat);
-    double info=0.0;
-    if (bitOk) {
-      info = InfoEntropyGain(resMat, 2, d_classes);
+  // compute the fractions of items in each class that hit the bit
+  // and record the maximum for the those classes not in the bias list
+  double maxCor = 0.0;
+  for (unsigned int i = 0; i < d_classes; i++) {
+    if (d_clsCount[i] > 0) {
+      fracs[i] = ((double)resMat[i]) / d_clsCount[i];
+    } else {
+      fracs[i] = 0.0;
    }
-    return info;
-  }
-
-  void InfoBitRanker::accumulateVotes(const ExplicitBitVect &bv, unsigned int label) {
-    URANGE_CHECK(label, d_classes-1);
-    CHECK_INVARIANT(bv.getNumBits() == d_dims, "Incorrect bit vector size");
-
-    d_nInst += 1;
-    d_clsCount[label] += 1;
-    for (unsigned int i=0;i<bv.getNumBits();i++){
-      if( (*bv.dp_bits)[i] && (!dp_maskBits || dp_maskBits->getBit(i)) ){
-        d_counts[label][i] += 1;
+    if (std::find(d_biasList.begin(), d_biasList.end(), i) ==
+        d_biasList.end()) {
+      // if not in the biasList
+      if (fracs[i] > maxCor) {
+        // if this is fraction is greater than the previously known maximum
+        maxCor = fracs[i];
      }
    }
  }
-  
-  void InfoBitRanker::accumulateVotes(const SparseBitVect &bv, unsigned int label) {
-    URANGE_CHECK(label, d_classes-1);
-    CHECK_INVARIANT(bv.getNumBits() == d_dims, "Incorrect bit vector size");

-    d_nInst += 1;
-    d_clsCount[label] += 1;
-    for (IntSet::const_iterator obi = bv.dp_bits->begin();
-         obi != bv.dp_bits->end();
-        ++obi)  {
-      if(!dp_maskBits || dp_maskBits->getBit(*obi)){
-        d_counts[label][(*obi)] += 1;
-      }
+  bool bitOk = false;
+  for (RDKit::INT_VECT_CI bci = d_biasList.begin(); bci != d_biasList.end();
+       ++bci) {
+    if (fracs[*bci] >= maxCor) {
+      bitOk = true;
+      break;
    }
  }
-  
-  double *InfoBitRanker::getTopN(unsigned int num) {
-    // this is a place holder to pass along to infogain function
-    // the size of this container should nVals*d_classes, where nVals
-    // is the number of values a variable can take.
-    // since we are dealing with a binary bit vector nVals = 2
-    // in addition the infogain function pretends that this is a 2D matrix
-    // with the number of rows equal to nVals and num of columns equal to 
-    // d_classes
-    if(num>d_dims) throw ValueErrorException("attempt to rank more bits than present in the bit vectors");
-    if(dp_maskBits)
-      CHECK_INVARIANT(num <= dp_maskBits->getNumOnBits(), "Can't rank more bits than the ensemble size"); 
-    RDKit::USHORT *resMat = new RDKit::USHORT[2*d_classes];
-    
-    PR_QUEUE topN;
+  return bitOk;
+}

-    for (unsigned int i = 0; i < d_dims; i++) {
-      // we may want to ignore bits that are not turned on in any item of class 
-      // "ignoreNoClass"
-      /*
-      if ((0 <= ignoreNoClass) && (d_classes > ignoreNoClass)) {
-        if (d_counts[ignoreNoClass][i] == 0) {
-          continue;
-        }
-        }*/
-      
-      
-      if (dp_maskBits && !dp_maskBits->getBit(i)) {
-           continue;
-      }
+double InfoBitRanker::BiasChiSquareGain(RDKit::USHORT *resMat) const {
+  PRECONDITION(resMat, "bad result pointer");
+  bool bitOk = this->BiasCheckBit(resMat);
+  double info = 0.0;
+  if (bitOk) {
+    info = ChiSquare(resMat, 2, d_classes);
+  }
+  return info;
+}

-      // fill up dmat
-      for (unsigned int j = 0; j < d_classes; j++) {
-        // we know that we have only two rows here
-        resMat[j] = d_counts[j][i];
-        resMat[d_classes + j] = (d_clsCount[j] - d_counts[j][i]);
+double InfoBitRanker::BiasInfoEntropyGain(RDKit::USHORT *resMat) const {
+  PRECONDITION(resMat, "bad result pointer");
+  bool bitOk = this->BiasCheckBit(resMat);
+  double info = 0.0;
+  if (bitOk) {
+    info = InfoEntropyGain(resMat, 2, d_classes);
+  }
+  return info;
+}
+
+void InfoBitRanker::accumulateVotes(const ExplicitBitVect &bv,
+                                    unsigned int label) {
+  URANGE_CHECK(label, d_classes - 1);
+  CHECK_INVARIANT(bv.getNumBits() == d_dims, "Incorrect bit vector size");
+
+  d_nInst += 1;
+  d_clsCount[label] += 1;
+  for (unsigned int i = 0; i < bv.getNumBits(); i++) {
+    if ((*bv.dp_bits)[i] && (!dp_maskBits || dp_maskBits->getBit(i))) {
+      d_counts[label][i] += 1;
+    }
+  }
+}
+
+void InfoBitRanker::accumulateVotes(const SparseBitVect &bv,
+                                    unsigned int label) {
+  URANGE_CHECK(label, d_classes - 1);
+  CHECK_INVARIANT(bv.getNumBits() == d_dims, "Incorrect bit vector size");
+
+  d_nInst += 1;
+  d_clsCount[label] += 1;
+  for (IntSet::const_iterator obi = bv.dp_bits->begin();
+       obi != bv.dp_bits->end(); ++obi) {
+    if (!dp_maskBits || dp_maskBits->getBit(*obi)) {
+      d_counts[label][(*obi)] += 1;
+    }
+  }
+}
+
+double *InfoBitRanker::getTopN(unsigned int num) {
+  // this is a place holder to pass along to infogain function
+  // the size of this container should nVals*d_classes, where nVals
+  // is the number of values a variable can take.
+  // since we are dealing with a binary bit vector nVals = 2
+  // in addition the infogain function pretends that this is a 2D matrix
+  // with the number of rows equal to nVals and num of columns equal to
+  // d_classes
+  if (num > d_dims)
+    throw ValueErrorException(
+        "attempt to rank more bits than present in the bit vectors");
+  if (dp_maskBits)
+    CHECK_INVARIANT(num <= dp_maskBits->getNumOnBits(),
+                    "Can't rank more bits than the ensemble size");
+  RDKit::USHORT *resMat = new RDKit::USHORT[2 * d_classes];
+
+  PR_QUEUE topN;
+
+  for (unsigned int i = 0; i < d_dims; i++) {
+    // we may want to ignore bits that are not turned on in any item of class
+    // "ignoreNoClass"
+    /*
+    if ((0 <= ignoreNoClass) && (d_classes > ignoreNoClass)) {
+      if (d_counts[ignoreNoClass][i] == 0) {
+        continue;
      }
-      double info = 0.0;
-      switch (d_type) {
+      }*/
+
+    if (dp_maskBits && !dp_maskBits->getBit(i)) {
+      continue;
+    }
+
+    // fill up dmat
+    for (unsigned int j = 0; j < d_classes; j++) {
+      // we know that we have only two rows here
+      resMat[j] = d_counts[j][i];
+      resMat[d_classes + j] = (d_clsCount[j] - d_counts[j][i]);
+    }
+    double info = 0.0;
+    switch (d_type) {
      case ENTROPY:
        info = InfoEntropyGain(resMat, 2, d_classes);
        break;
@@ -195,100 +196,93 @@ namespace RDInfoTheory {
        break;
      default:
        break;
-      }
-
-      PAIR_D_I entry(info, i);
-      
-      if (info >= 0.0) {
-        if (topN.size() < num) {
-          topN.push(entry);
-        }
-        else if (info > topN.top().first) {
-          topN.pop();
-          topN.push(entry);
-        }
-      }
-    }
-    
-    delete [] resMat;
-    
-    // now fill up the result matrix for the topN bits
-    // the result from this function is a double * of size 
-    // num*4. The caller of this function interprets this
-    // array as a two dimensional array of size num*(2+d_classes) with each row
-    // containing the following entries 
-    //   bitId, infogain, 1 additional column for number of hits for each class
-    //double *res = new double[num*(2+d_classes)];
-    
-    d_top = num;
-    int ncols = 2+d_classes;
-    
-    delete [] dp_topBits;
-    dp_topBits = new double[num*ncols];
-    
-    int offset, bid;
-    
-    RDKit::INT_VECT maskBits;
-    if (dp_maskBits && topN.size() < num) {
-      dp_maskBits->getOnBits(maskBits);
    }

-    for (int i = num - 1; i >= 0; i--) {
-      offset = i*ncols;
-      if (topN.size() == 0 ) {
-        if (dp_maskBits) {
-              bid = maskBits[i];
-        } else {
-              bid = i;
-        }
-        dp_topBits[offset + 1] = 0.0;
-      } else {
-        bid = topN.top().second; // bit id
-        dp_topBits[offset + 1] = topN.top().first; // value of the infogain
+    PAIR_D_I entry(info, i);
+
+    if (info >= 0.0) {
+      if (topN.size() < num) {
+        topN.push(entry);
+      } else if (info > topN.top().first) {
        topN.pop();
-      }
-      dp_topBits[offset] = (double)bid;
-      
-      for (unsigned int j = 0; j < d_classes; j++) {
-        dp_topBits[offset + 2 + j] = (double)d_counts[j][bid];
+        topN.push(entry);
      }
    }
-    return dp_topBits;
  }

-  void InfoBitRanker::writeTopBitsToStream(std::ostream *outStream) const {
-    (*outStream) << std::setw(12) << "Bit" << std::setw(12) << "InfoContent";
-    for (unsigned int ic = 0; ic < d_classes; ic++) {
-      (*outStream) << std::setw(10) << "class" << ic;
-    }
-    (*outStream) << std::endl;
-   
-    unsigned int ncols = 2 + d_classes;
-    for (unsigned int i = 0; i < d_top; i++) {
-      (*outStream) << std::setw(12) << (int)dp_topBits[i*ncols]
-                   << std::setw(12) << std::setprecision(5) 
-                   << dp_topBits[i*ncols + 1];
-      for (unsigned int ic = 0; ic < d_classes; ic++) {
-        (*outStream) << std::setw(10) << (int)dp_topBits[i*ncols + 2 + ic];
-      }
-      (*outStream) << "\n";
-    
-    }
-  }
-  
-  void InfoBitRanker::writeTopBitsToFile(const std::string &fileName) const {
-    std::ofstream tmpStream(fileName.c_str());
-    if ((!tmpStream) || (tmpStream.bad()) ) {
-      std::ostringstream errout;
-      errout << "Bad output file " << fileName;
-      throw RDKit::FileParseException(errout.str());
-    }
+  delete[] resMat;

-    std::ostream &outStream = static_cast<std::ostream &>(tmpStream);
-    this->writeTopBitsToStream(&outStream);
+  // now fill up the result matrix for the topN bits
+  // the result from this function is a double * of size
+  // num*4. The caller of this function interprets this
+  // array as a two dimensional array of size num*(2+d_classes) with each row
+  // containing the following entries
+  //   bitId, infogain, 1 additional column for number of hits for each class
+  // double *res = new double[num*(2+d_classes)];
+
+  d_top = num;
+  int ncols = 2 + d_classes;
+
+  delete[] dp_topBits;
+  dp_topBits = new double[num * ncols];
+
+  int offset, bid;
+
+  RDKit::INT_VECT maskBits;
+  if (dp_maskBits && topN.size() < num) {
+    dp_maskBits->getOnBits(maskBits);
  }
-  
+
+  for (int i = num - 1; i >= 0; i--) {
+    offset = i * ncols;
+    if (topN.size() == 0) {
+      if (dp_maskBits) {
+        bid = maskBits[i];
+      } else {
+        bid = i;
+      }
+      dp_topBits[offset + 1] = 0.0;
+    } else {
+      bid = topN.top().second;                    // bit id
+      dp_topBits[offset + 1] = topN.top().first;  // value of the infogain
+      topN.pop();
+    }
+    dp_topBits[offset] = (double)bid;
+
+    for (unsigned int j = 0; j < d_classes; j++) {
+      dp_topBits[offset + 2 + j] = (double)d_counts[j][bid];
+    }
+  }
+  return dp_topBits;
 }

-    
-  
+void InfoBitRanker::writeTopBitsToStream(std::ostream *outStream) const {
+  (*outStream) << std::setw(12) << "Bit" << std::setw(12) << "InfoContent";
+  for (unsigned int ic = 0; ic < d_classes; ic++) {
+    (*outStream) << std::setw(10) << "class" << ic;
+  }
+  (*outStream) << std::endl;
+
+  unsigned int ncols = 2 + d_classes;
+  for (unsigned int i = 0; i < d_top; i++) {
+    (*outStream) << std::setw(12) << (int)dp_topBits[i * ncols] << std::setw(12)
+                 << std::setprecision(5) << dp_topBits[i * ncols + 1];
+    for (unsigned int ic = 0; ic < d_classes; ic++) {
+      (*outStream) << std::setw(10) << (int)dp_topBits[i * ncols + 2 + ic];
+    }
+    (*outStream) << "\n";
+  }
+}
+
+void InfoBitRanker::writeTopBitsToFile(const std::string &fileName) const {
+  std::ofstream tmpStream(fileName.c_str());
+  if ((!tmpStream) || (tmpStream.bad())) {
+    std::ostringstream errout;
+    errout << "Bad output file " << fileName;
+    throw RDKit::FileParseException(errout.str());
+  }
+
+  std::ostream &outStream = static_cast<std::ostream &>(tmpStream);
+  this->writeTopBitsToStream(&outStream);
+}
+}
--- a/Code/ML/InfoTheory/InfoBitRanker.h
+++ b/Code/ML/InfoTheory/InfoBitRanker.h
@@ -15,236 +15,262 @@
 #include <DataStructs/BitVects.h>
 #include <iostream>

-
 /*! \brief Class used to rank bits based on a specified measure of infomation
 *
 * Basically a primitive mimic of the CombiChem "signal" functionality
 * To use:
- *  - create an instance of this class 
- *  - loop over the fingerprints in the dataset by calling accumulateVotes method
+ *  - create an instance of this class
+ *  - loop over the fingerprints in the dataset by calling accumulateVotes
+ *method
 *  - call getTopN to get the top n ranked bits
 *
 * Sample usage and results from the python wrapper:
 * Here's a small set of vectors:
 * >>> for i,bv in enumerate(bvs): print bv.ToBitString(),acts[i]
- * ... 
+ * ...
 * 0001 0
 * 0101 0
 * 0010 1
 * 1110 1
- * 
+ *
 * Default ranker, using infogain:
- * >>> ranker = InfoBitRanker(4,2)  
+ * >>> ranker = InfoBitRanker(4,2)
 * >>> for i,bv in enumerate(bvs): ranker.AccumulateVotes(bv,acts[i])
- * ... 
- * >>> for bit,gain,n0,n1 in ranker.GetTopN(3): print int(bit),'%.3f'%gain,int(n0),int(n1)
- * ... 
+ * ...
+ * >>> for bit,gain,n0,n1 in ranker.GetTopN(3): print
+ *int(bit),'%.3f'%gain,int(n0),int(n1)
+ * ...
 * 3 1.000 2 0
 * 2 1.000 0 2
 * 0 0.311 0 1
- * 
+ *
 * Using the biased infogain:
 * >>> ranker = InfoBitRanker(4,2,InfoTheory.InfoType.BIASENTROPY)
 * >>> ranker.SetBiasList((1,))
 * >>> for i,bv in enumerate(bvs): ranker.AccumulateVotes(bv,acts[i])
- * ... 
- * >>> for bit,gain,n0,n1 in ranker.GetTopN(3): print int(bit),'%.3f'%gain,int(n0),int(n1)
- * ... 
+ * ...
+ * >>> for bit,gain,n0,n1 in ranker.GetTopN(3): print
+ *int(bit),'%.3f'%gain,int(n0),int(n1)
+ * ...
 * 2 1.000 0 2
 * 0 0.311 0 1
 * 1 0.000 1 1
- * 
+ *
 * A chi squared ranker is also available:
 * >>> ranker = InfoBitRanker(4,2,InfoTheory.InfoType.CHISQUARE)
 * >>> for i,bv in enumerate(bvs): ranker.AccumulateVotes(bv,acts[i])
- * ... 
- * >>> for bit,gain,n0,n1 in ranker.GetTopN(3): print int(bit),'%.3f'%gain,int(n0),int(n1)
- * ... 
+ * ...
+ * >>> for bit,gain,n0,n1 in ranker.GetTopN(3): print
+ *int(bit),'%.3f'%gain,int(n0),int(n1)
+ * ...
 * 3 4.000 2 0
 * 2 4.000 0 2
 * 0 1.333 0 1
- * 
+ *
 * As is a biased chi squared:
 * >>> ranker = InfoBitRanker(4,2,InfoTheory.InfoType.BIASCHISQUARE)
 * >>> ranker.SetBiasList((1,))
 * >>> for i,bv in enumerate(bvs): ranker.AccumulateVotes(bv,acts[i])
- * ... 
- * >>> for bit,gain,n0,n1 in ranker.GetTopN(3): print int(bit),'%.3f'%gain,int(n0),int(n1)
- * ... 
+ * ...
+ * >>> for bit,gain,n0,n1 in ranker.GetTopN(3): print
+ *int(bit),'%.3f'%gain,int(n0),int(n1)
+ * ...
 * 2 4.000 0 2
 * 0 1.333 0 1
 * 1 0.000 1 1
 */
 namespace RDInfoTheory {
-  typedef std::vector<RDKit::USHORT> USHORT_VECT;
-  typedef std::vector<USHORT_VECT> VECT_USHORT_VECT;
+typedef std::vector<RDKit::USHORT> USHORT_VECT;
+typedef std::vector<USHORT_VECT> VECT_USHORT_VECT;

-  class InfoBitRanker {
-  public:
-    
-    /*! \brief the type of measure for information
-     * 
-     */
-    typedef enum {
-      ENTROPY=1,
-      BIASENTROPY=2,
-      CHISQUARE=3,
-      BIASCHISQUARE=4
-    } InfoType;
-    
-    /*! \brief Constructor 
-     *
-     * ARGUMENTS:
-     *
-     *   - nBits: the dimension of the bit vectors or the fingerprint length
-     *   - nClasses: the number of classes used in the classification problem (e.g. active,
-     *              moderately active, inactive etc.). It is assumed that the classes are 
-     *              numbered from 0 to (nClasses - 1) 
-     *   - infoType: the type of information metric
-     */
-    InfoBitRanker(unsigned int nBits, unsigned int nClasses, InfoType infoType=InfoBitRanker::ENTROPY) :
-    d_dims(nBits), d_classes(nClasses), d_type(infoType) {
-      d_counts.resize(0);
-      for (unsigned int i = 0; i < nClasses; i++) {
-        USHORT_VECT cCount;
-        cCount.resize(d_dims, 0);
-        d_counts.push_back(cCount);
-      }
-      d_clsCount.resize(d_classes, 0);
-      d_nInst = 0;
-      d_top = 0;
-      dp_topBits=0;
-      d_biasList.resize(0);
-      dp_maskBits=0;
-    }
-    
-    ~InfoBitRanker() {
-      if(dp_topBits)
-	delete [] dp_topBits;
-      if(dp_maskBits)
-	delete dp_maskBits;
+class InfoBitRanker {
+ public:
+  /*! \brief the type of measure for information
+   *
+   */
+  typedef enum {
+    ENTROPY = 1,
+    BIASENTROPY = 2,
+    CHISQUARE = 3,
+    BIASCHISQUARE = 4
+  } InfoType;
+
+  /*! \brief Constructor
+   *
+   * ARGUMENTS:
+   *
+   *   - nBits: the dimension of the bit vectors or the fingerprint length
+   *   - nClasses: the number of classes used in the classification problem
+   *(e.g. active,
+   *              moderately active, inactive etc.). It is assumed that the
+   *classes are
+   *              numbered from 0 to (nClasses - 1)
+   *   - infoType: the type of information metric
+   */
+  InfoBitRanker(unsigned int nBits, unsigned int nClasses,
+                InfoType infoType = InfoBitRanker::ENTROPY)
+      : d_dims(nBits), d_classes(nClasses), d_type(infoType) {
+    d_counts.resize(0);
+    for (unsigned int i = 0; i < nClasses; i++) {
+      USHORT_VECT cCount;
+      cCount.resize(d_dims, 0);
+      d_counts.push_back(cCount);
    }
+    d_clsCount.resize(d_classes, 0);
+    d_nInst = 0;
+    d_top = 0;
+    dp_topBits = 0;
+    d_biasList.resize(0);
+    dp_maskBits = 0;
+  }

-    /*! \brief Accumulate the votes for all the bits turned on in a bit vector
-     *  
-     *  ARGUMENTS:
-     *
-     *   - bv : bit vector that supports [] operator
-     *   - label : the class label for the bit vector. It is assumed that 0 <= class < nClasses
-     */
-    void accumulateVotes(const ExplicitBitVect &bv, unsigned int label);
-    void accumulateVotes(const SparseBitVect &bv, unsigned int label);
-    
-    /*! \brief Returns the top n bits ranked by the information metric
-     *
-     * This is actually the function where most of the work of ranking is happening
-     * 
-     *  \param num the number of top ranked bits that are required
-     *
-     *  \return a pointer to an information array. The client should *not*
-     *          delete this
-     */
-    double *getTopN(unsigned int num);
-    
-    /*! \brief return the number of labelled instances(examples) or fingerprints seen so far
-     *
-     */
-    unsigned int getNumInstances() const {
-      return d_nInst;
-    }
-    
-    /*! \brief return the number of classes 
-     *
-     */
-    unsigned int getNumClasses() const {
-      return d_classes;
-    }
+  ~InfoBitRanker() {
+    if (dp_topBits) delete[] dp_topBits;
+    if (dp_maskBits) delete dp_maskBits;
+  }

-    /*! \brief Set the classes to which the entropy calculation should be biased
-     *
-     * This list contains a set of class ids used when in the BIASENTROPY mode of ranking bits. 
-     * In this mode, a bit must be correllated higher with one of the biased classes than all the 
-     * other classes. For example, in a two class problem with actives and inactives, the fraction of 
-     * actives that hit the bit has to be greater than the fraction of inactives that hit the bit
-     *
-     * ARGUMENTS:
-     *   classList - list of class ids that we want a bias towards
-     */
-    void setBiasList(RDKit::INT_VECT &classList);
+  /*! \brief Accumulate the votes for all the bits turned on in a bit vector
+   *
+   *  ARGUMENTS:
+   *
+   *   - bv : bit vector that supports [] operator
+   *   - label : the class label for the bit vector. It is assumed that 0 <=
+   *class < nClasses
+   */
+  void accumulateVotes(const ExplicitBitVect &bv, unsigned int label);
+  void accumulateVotes(const SparseBitVect &bv, unsigned int label);

+  /*! \brief Returns the top n bits ranked by the information metric
+   *
+   * This is actually the function where most of the work of ranking is
+   *happening
+   *
+   *  \param num the number of top ranked bits that are required
+   *
+   *  \return a pointer to an information array. The client should *not*
+   *          delete this
+   */
+  double *getTopN(unsigned int num);

-    /*! \brief Set the bits to be used as a mask
-     *
-     * If this function is called, only the bits which are present in the
-     *   maskBits list will be used.
-     *
-     * ARGUMENTS:
-     *   maskBits - the bits to be considered
-     */
-    void setMaskBits(RDKit::INT_VECT &maskBits);
+  /*! \brief return the number of labelled instances(examples) or fingerprints
+   *seen so far
+   *
+   */
+  unsigned int getNumInstances() const { return d_nInst; }

-    /*! \brief Write the top N bits to a stream
-     *
-     */
-    void writeTopBitsToStream(std::ostream *outStream) const;
-    
-    /*! \brief Write the top bits to a file
-     *
-     */
-    void writeTopBitsToFile(const std::string &fileName) const;
+  /*! \brief return the number of classes
+   *
+   */
+  unsigned int getNumClasses() const { return d_classes; }

-  private:
-    /*! \brief check if we want to compute the info content for a bit based on the bias list
-     *
-     * This what happens here:
-     *    - the fraction of items in each class that hit a particular bit are computed
-     *    - the maximum of these fractions for classes that are not in the biasList are computed
-     *    - If this maximum is less than the fraction for atleast one of classes in the biaslist
-     *      the bit is considered good
-     * ARGUMENTS:
-     *   - resMat : the result matrix, one dimensional matrix of dimension (2*(num of classes))
-     *              a 2D structure is assumed with the first row containing number of items of each class 
-     *              with the bit set and the second row to entires of each class with the bit turned off
-     */
-    bool BiasCheckBit(RDKit::USHORT *resMat) const;
+  /*! \brief Set the classes to which the entropy calculation should be biased
+   *
+   * This list contains a set of class ids used when in the BIASENTROPY mode of
+   *ranking bits.
+   * In this mode, a bit must be correllated higher with one of the biased
+   *classes than all the
+   * other classes. For example, in a two class problem with actives and
+   *inactives, the fraction of
+   * actives that hit the bit has to be greater than the fraction of inactives
+   *that hit the bit
+   *
+   * ARGUMENTS:
+   *   classList - list of class ids that we want a bias towards
+   */
+  void setBiasList(RDKit::INT_VECT &classList);

-    /*! \brief Compute the biased info entropy gain based on the bias list
-     *
-     *  This what happens here:
-     *    - we call BiasCheckBit to see if the bit qualifies to compute the infocontent
-     *    - If this bit is ok then we call InfoEntropyGain otherwise we return 0.0 
-     *
-     * ARGUMENTS:
-     *   - resMat : the result matrix, one dimensional matrix of dimension (2*(num of classes))
-     *              a 2D structure is assumed with the first row containing number of items of each class 
-     *              with the bit set and the second row to entires of each class with the bit turned off
-     */
-    double BiasInfoEntropyGain(RDKit::USHORT *resMat) const;
+  /*! \brief Set the bits to be used as a mask
+   *
+   * If this function is called, only the bits which are present in the
+   *   maskBits list will be used.
+   *
+   * ARGUMENTS:
+   *   maskBits - the bits to be considered
+   */
+  void setMaskBits(RDKit::INT_VECT &maskBits);

-    /*! \brief Compute the biased chi qsure value based on the bias list
-     *
-     *  This what happens here:
-     *    - we call BiasCheckBit to see if the bit qualifies to compute the infocontent
-     *    - If this bit is ok then we call InfoEntropyGain otherwise we return 0.0 
-     *
-     * ARGUMENTS:
-     *   - resMat : the result matrix, one dimensional matrix of dimension (2*(num of classes))
-     *              a 2D structure is assumed with the first row containing number of items of each class 
-     *              with the bit set and the second row to entires of each class with the bit turned off
-     */
-    double BiasChiSquareGain(RDKit::USHORT *resMat) const;
+  /*! \brief Write the top N bits to a stream
+   *
+   */
+  void writeTopBitsToStream(std::ostream *outStream) const;

-    unsigned int d_dims; // the number of bits in the fingerprints
-    unsigned int d_classes; // the number of classes (active, inactive, moderately active etc.)
-    InfoType d_type; // the type of information meassure - currently we support only entropy
-    VECT_USHORT_VECT d_counts; // place holder of counting the number of hits for each bit for each class
-    USHORT_VECT d_clsCount; // counter for the number of instances of each class 
-    double *dp_topBits; // storage for the top ranked bits and the corresponding statistics
-    unsigned int d_top; // the number of bits that have been ranked
-    unsigned int d_nInst; // total number of instances or fingerprints used accumulate votes
-    RDKit::INT_VECT d_biasList; // if we want a bias towards certain classes in ranking bits
-    ExplicitBitVect *dp_maskBits; // allows only certain bits to be considered
-    
-  };
+  /*! \brief Write the top bits to a file
+   *
+   */
+  void writeTopBitsToFile(const std::string &fileName) const;
+
+ private:
+  /*! \brief check if we want to compute the info content for a bit based on the
+   *bias list
+   *
+   * This what happens here:
+   *    - the fraction of items in each class that hit a particular bit are
+   *computed
+   *    - the maximum of these fractions for classes that are not in the
+   *biasList are computed
+   *    - If this maximum is less than the fraction for atleast one of classes
+   *in the biaslist
+   *      the bit is considered good
+   * ARGUMENTS:
+   *   - resMat : the result matrix, one dimensional matrix of dimension (2*(num
+   *of classes))
+   *              a 2D structure is assumed with the first row containing number
+   *of items of each class
+   *              with the bit set and the second row to entires of each class
+   *with the bit turned off
+   */
+  bool BiasCheckBit(RDKit::USHORT *resMat) const;
+
+  /*! \brief Compute the biased info entropy gain based on the bias list
+   *
+   *  This what happens here:
+   *    - we call BiasCheckBit to see if the bit qualifies to compute the
+   *infocontent
+   *    - If this bit is ok then we call InfoEntropyGain otherwise we return 0.0
+   *
+   * ARGUMENTS:
+   *   - resMat : the result matrix, one dimensional matrix of dimension (2*(num
+   *of classes))
+   *              a 2D structure is assumed with the first row containing number
+   *of items of each class
+   *              with the bit set and the second row to entires of each class
+   *with the bit turned off
+   */
+  double BiasInfoEntropyGain(RDKit::USHORT *resMat) const;
+
+  /*! \brief Compute the biased chi qsure value based on the bias list
+   *
+   *  This what happens here:
+   *    - we call BiasCheckBit to see if the bit qualifies to compute the
+   *infocontent
+   *    - If this bit is ok then we call InfoEntropyGain otherwise we return 0.0
+   *
+   * ARGUMENTS:
+   *   - resMat : the result matrix, one dimensional matrix of dimension (2*(num
+   *of classes))
+   *              a 2D structure is assumed with the first row containing number
+   *of items of each class
+   *              with the bit set and the second row to entires of each class
+   *with the bit turned off
+   */
+  double BiasChiSquareGain(RDKit::USHORT *resMat) const;
+
+  unsigned int d_dims;     // the number of bits in the fingerprints
+  unsigned int d_classes;  // the number of classes (active, inactive,
+                           // moderately active etc.)
+  InfoType d_type;  // the type of information meassure - currently we support
+                    // only entropy
+  VECT_USHORT_VECT d_counts;  // place holder of counting the number of hits for
+                              // each bit for each class
+  USHORT_VECT d_clsCount;  // counter for the number of instances of each class
+  double *dp_topBits;  // storage for the top ranked bits and the corresponding
+                       // statistics
+  unsigned int d_top;  // the number of bits that have been ranked
+  unsigned int d_nInst;  // total number of instances or fingerprints used
+                         // accumulate votes
+  RDKit::INT_VECT
+      d_biasList;  // if we want a bias towards certain classes in ranking bits
+  ExplicitBitVect *dp_maskBits;  // allows only certain bits to be considered
+};
 }
 #endif
--- a/Code/ML/InfoTheory/InfoGainFuncs.h
+++ b/Code/ML/InfoTheory/InfoGainFuncs.h
@@ -10,129 +10,129 @@

 namespace RDInfoTheory {

-  template<class T> double ChiSquare(T *dMat, long int dim1,long int dim2) {
-    // For a contingency matrix with each column corresponding to a class and each row to a 
-    // the descriptor (or variable) state, the matrix looks something like for 3x3 problem
-    // 
-    //            1    2    3   Totals
-    //      1 |  N11  N12  N13    R1
-    //      2 |  N21  N22  N23    R2
-    //      3 |  N31  N32  N33    R3
-    // Totals |   C1   C2   C3    N
-    //
-    //  Th chi squere formula is 
-    //  chi = sum((N/Ri)*sum(Nij^2/Cj) ) -N
-    T *rowSums, *colSums;
-    int i, j, tSum;
-    // find the row sum
-    tSum = 0;
-    rowSums = new T[dim1];
-    for (i = 0; i < dim1; i++) {
-      int idx1 = i*dim2;
-      rowSums[i] = (T)0.0;
-      for (j = 0; j < dim2; j++) {
-        rowSums[i] += dMat[idx1 + j];
-      }
-      tSum += (int)rowSums[i];
+template <class T>
+double ChiSquare(T *dMat, long int dim1, long int dim2) {
+  // For a contingency matrix with each column corresponding to a class and each
+  // row to a
+  // the descriptor (or variable) state, the matrix looks something like for 3x3
+  // problem
+  //
+  //            1    2    3   Totals
+  //      1 |  N11  N12  N13    R1
+  //      2 |  N21  N22  N23    R2
+  //      3 |  N31  N32  N33    R3
+  // Totals |   C1   C2   C3    N
+  //
+  //  Th chi squere formula is
+  //  chi = sum((N/Ri)*sum(Nij^2/Cj) ) -N
+  T *rowSums, *colSums;
+  int i, j, tSum;
+  // find the row sum
+  tSum = 0;
+  rowSums = new T[dim1];
+  for (i = 0; i < dim1; i++) {
+    int idx1 = i * dim2;
+    rowSums[i] = (T)0.0;
+    for (j = 0; j < dim2; j++) {
+      rowSums[i] += dMat[idx1 + j];
    }
-
-    // find the column sums
-    colSums = new T[dim2];
-    for (i = 0; i < dim2; i++) {
-      colSums[i] = (T)0.0;
-      for (j = 0; j < dim1; j++) {
-        colSums[i] += dMat[j*dim2 + i];
-      }
-    }
-    
-    double chi = 0.0;
-    for ( i = 0; i < dim1; i++) {
-      double rchi = 0.0;
-      for (j = 0; j < dim2; j++) {
-        rchi += (pow((double)dMat[i*dim2 + j], 2)/colSums[j]);
-      }
-      chi += ( ((double)tSum/rowSums[i])*rchi );
-    }
-    chi -= tSum;
-    delete [] rowSums;
-    delete [] colSums;
-
-    return chi;
+    tSum += (int)rowSums[i];
  }

-  template<class T> double InfoEntropy(T *tPtr, long int dim) {
-    int i;
-    T nInstances = 0;
-    double accum=0.0,d;
-    
-    for(i=0;i<dim;i++){
-      nInstances += tPtr[i];
+  // find the column sums
+  colSums = new T[dim2];
+  for (i = 0; i < dim2; i++) {
+    colSums[i] = (T)0.0;
+    for (j = 0; j < dim1; j++) {
+      colSums[i] += dMat[j * dim2 + i];
    }
-  
-    if(nInstances != 0){
-      for(i=0;i<dim;i++){
-        d = (double)tPtr[i]/nInstances;
-        if(d != 0){
-          accum += -d*log(d);
-        }
-      }
-    }
-    return accum/log(2.0);
  }

-  template<class T> double InfoEntropyGain(T *dMat, long int dim1,long int dim2) {
-    T *variableRes, *overallRes;
-    double gain,term2;
-    int tSum;
-
-    //std::cerr<<" --------\n    ieg: "<<dim1<<" "<<dim2<<std::endl;
-    variableRes = new T[dim1];
-    for(long int i=0;i<dim1;i++){
-      long int idx1 = i*dim2;
-      variableRes[i] = (T)0.0;
-      for(long int j=0;j<dim2;j++){
-        variableRes[i] += dMat[idx1+j];
-        //std::cerr<<"  "<<i<<" "<<j<<" "<<dMat[idx1+j]<<std::endl;
-      }
+  double chi = 0.0;
+  for (i = 0; i < dim1; i++) {
+    double rchi = 0.0;
+    for (j = 0; j < dim2; j++) {
+      rchi += (pow((double)dMat[i * dim2 + j], 2) / colSums[j]);
    }
-
-    overallRes = new T[dim2];
-    // do the col sums
-    for(long int i=0;i<dim2;i++){
-      overallRes[i] = (T)0.0;
-      for(long int j=0;j<dim1;j++){
-        overallRes[i] += dMat[j*dim2+i];
-        //std::cerr<<"  "<<i<<" "<<j<<" "<<dMat[j*dim2+i]<<std::endl;
-      }
-    }
-
-    term2 = 0.0;
-    for(long int i=0;i<dim1;i++) {
-      T *tPtr;
-      tPtr = dMat + i*dim2;
-      term2 += variableRes[i] * InfoEntropy(tPtr,dim2);
-    }
-    tSum = 0;
-    for(long int i=0;i<dim2;i++){
-      tSum += static_cast<int>(overallRes[i]);
-    }
-    
-    if(tSum != 0){
-      term2 /= tSum;
-      gain = InfoEntropy(overallRes,dim2) - term2;
-    }
-    else{
-      gain = 0.0;
-    }
-    //std::cerr<<"  >gain> "<<gain<<std::endl;
-    
-    delete [] overallRes;
-    delete [] variableRes;
-    return gain;
+    chi += (((double)tSum / rowSums[i]) * rchi);
  }
-   
-  
+  chi -= tSum;
+  delete[] rowSums;
+  delete[] colSums;
+
+  return chi;
+}
+
+template <class T>
+double InfoEntropy(T *tPtr, long int dim) {
+  int i;
+  T nInstances = 0;
+  double accum = 0.0, d;
+
+  for (i = 0; i < dim; i++) {
+    nInstances += tPtr[i];
+  }
+
+  if (nInstances != 0) {
+    for (i = 0; i < dim; i++) {
+      d = (double)tPtr[i] / nInstances;
+      if (d != 0) {
+        accum += -d * log(d);
+      }
+    }
+  }
+  return accum / log(2.0);
+}
+
+template <class T>
+double InfoEntropyGain(T *dMat, long int dim1, long int dim2) {
+  T *variableRes, *overallRes;
+  double gain, term2;
+  int tSum;
+
+  // std::cerr<<" --------\n    ieg: "<<dim1<<" "<<dim2<<std::endl;
+  variableRes = new T[dim1];
+  for (long int i = 0; i < dim1; i++) {
+    long int idx1 = i * dim2;
+    variableRes[i] = (T)0.0;
+    for (long int j = 0; j < dim2; j++) {
+      variableRes[i] += dMat[idx1 + j];
+      // std::cerr<<"  "<<i<<" "<<j<<" "<<dMat[idx1+j]<<std::endl;
+    }
+  }
+
+  overallRes = new T[dim2];
+  // do the col sums
+  for (long int i = 0; i < dim2; i++) {
+    overallRes[i] = (T)0.0;
+    for (long int j = 0; j < dim1; j++) {
+      overallRes[i] += dMat[j * dim2 + i];
+      // std::cerr<<"  "<<i<<" "<<j<<" "<<dMat[j*dim2+i]<<std::endl;
+    }
+  }
+
+  term2 = 0.0;
+  for (long int i = 0; i < dim1; i++) {
+    T *tPtr;
+    tPtr = dMat + i * dim2;
+    term2 += variableRes[i] * InfoEntropy(tPtr, dim2);
+  }
+  tSum = 0;
+  for (long int i = 0; i < dim2; i++) {
+    tSum += static_cast<int>(overallRes[i]);
+  }
+
+  if (tSum != 0) {
+    term2 /= tSum;
+    gain = InfoEntropy(overallRes, dim2) - term2;
+  } else {
+    gain = 0.0;
+  }
+  // std::cerr<<"  >gain> "<<gain<<std::endl;
+
+  delete[] overallRes;
+  delete[] variableRes;
+  return gain;
+}
 }
 #endif
-
-
--- a/Code/ML/InfoTheory/Wrap/BitCorrMatGenerator.cpp
+++ b/Code/ML/InfoTheory/Wrap/BitCorrMatGenerator.cpp
@@ -8,7 +8,6 @@
 //  of the RDKit source tree.
 //

-
 #define NO_IMPORT_ARRAY
 #include <RDBoost/python.h>
 #define PY_ARRAY_UNIQUE_SYMBOL rdinfotheory_array_API
@@ -22,47 +21,48 @@
 namespace python = boost::python;

 namespace RDInfoTheory {
-  
-  PyObject *getCorrMatrix(BitCorrMatGenerator *cmGen) {
-    double *dres = cmGen->getCorrMat();
-    unsigned int nb = cmGen->getCorrBitList().size();
-    npy_intp dim = nb*(nb-1)/2;
-    PyArrayObject *res = (PyArrayObject *)PyArray_SimpleNew(1,&dim,NPY_DOUBLE);
-    memcpy(static_cast<void *>(res->data),
-           static_cast<void *>(dres), dim*sizeof(double));
-    return PyArray_Return(res);
-  }

-  void setBitList(BitCorrMatGenerator *cmGen, python::object bitList) {
-    PySequenceHolder<int> blist(bitList);
-    unsigned int nb = blist.size();
-    RDKit::INT_VECT res;
-    res.reserve(nb);
-    for (unsigned int i = 0; i < nb; i++) {
-      res.push_back(blist[i]);
-    }
-    cmGen->setBitIdList(res);
-  }
+PyObject *getCorrMatrix(BitCorrMatGenerator *cmGen) {
+  double *dres = cmGen->getCorrMat();
+  unsigned int nb = cmGen->getCorrBitList().size();
+  npy_intp dim = nb * (nb - 1) / 2;
+  PyArrayObject *res = (PyArrayObject *)PyArray_SimpleNew(1, &dim, NPY_DOUBLE);
+  memcpy(static_cast<void *>(res->data), static_cast<void *>(dres),
+         dim * sizeof(double));
+  return PyArray_Return(res);
+}

-  void CollectVotes(BitCorrMatGenerator *cmGen, python::object bitVect) {
-    python::extract<ExplicitBitVect> ebvWorks(bitVect);
-    python::extract<SparseBitVect> sbvWorks(bitVect);
-    if (ebvWorks.check()) {
-      ExplicitBitVect ev = python::extract<ExplicitBitVect>(bitVect);
-      cmGen->collectVotes(ev);
-    }
-    else if (sbvWorks.check()) {
-      SparseBitVect sv = python::extract<SparseBitVect>(bitVect);
-      cmGen->collectVotes(sv);
-    }
-    else {
-      throw_value_error("CollectVote can only take ExplicitBitVects or SparseBitVects");
-    }
+void setBitList(BitCorrMatGenerator *cmGen, python::object bitList) {
+  PySequenceHolder<int> blist(bitList);
+  unsigned int nb = blist.size();
+  RDKit::INT_VECT res;
+  res.reserve(nb);
+  for (unsigned int i = 0; i < nb; i++) {
+    res.push_back(blist[i]);
  }
+  cmGen->setBitIdList(res);
+}

-  struct corrmat_wrap {
-    static void wrap() {
-      std::string docString = "A class to generate a pariwise correlation matrix between a list of bits\n"
+void CollectVotes(BitCorrMatGenerator *cmGen, python::object bitVect) {
+  python::extract<ExplicitBitVect> ebvWorks(bitVect);
+  python::extract<SparseBitVect> sbvWorks(bitVect);
+  if (ebvWorks.check()) {
+    ExplicitBitVect ev = python::extract<ExplicitBitVect>(bitVect);
+    cmGen->collectVotes(ev);
+  } else if (sbvWorks.check()) {
+    SparseBitVect sv = python::extract<SparseBitVect>(bitVect);
+    cmGen->collectVotes(sv);
+  } else {
+    throw_value_error(
+        "CollectVote can only take ExplicitBitVects or SparseBitVects");
+  }
+}
+
+struct corrmat_wrap {
+  static void wrap() {
+    std::string docString =
+        "A class to generate a pariwise correlation matrix between a list of "
+        "bits\n"
        "The mode of operation for this class is something like this\n"
        "   >>> cmg = BitCorrMatGenerator() \n"
        "   >>> cmg.SetBitList(blist) \n"
@@ -70,28 +70,26 @@ namespace RDInfoTheory {
        "   >>>    cmg.CollectVotes(fp)  \n"
        "   >>> corrMat = cmg.GetCorrMatrix() \n"
        "    \n"
-        "   The resulting correlation matrix is a one dimensional nummeric array containing the \n"
+        "   The resulting correlation matrix is a one dimensional nummeric "
+        "array containing the \n"
        "   lower triangle elements\n";
-      python::class_<BitCorrMatGenerator>("BitCorrMatGenerator",
-                                          docString.c_str())
+    python::class_<BitCorrMatGenerator>("BitCorrMatGenerator",
+                                        docString.c_str())
        .def("SetBitList", setBitList,
             "Set the list of bits that need to be correllated\n\n"
             " This may for example be ther top ranking ensemble bits\n\n"
             "ARGUMENTS:\n\n"
             "  - bitList : an integer list of bit IDs\n")
        .def("CollectVotes", CollectVotes,
-             "For each pair of on bits (bi, bj) in fp increase the correlation count for the pair by 1\n\n"
+             "For each pair of on bits (bi, bj) in fp increase the correlation "
+             "count for the pair by 1\n\n"
             "ARGUMENTS:\n\n"
             "  - fp : a bit vector to collect the fingerprints from\n")
        .def("GetCorrMatrix", getCorrMatrix,
-             "Get the correlation matrix following the collection of votes from a bunch of fingerprints\n")
-        ;
-    };
+             "Get the correlation matrix following the collection of votes "
+             "from a bunch of fingerprints\n");
  };
+};
 }

-void wrap_corrmatgen() {
-  RDInfoTheory::corrmat_wrap::wrap();
-}
-
-
+void wrap_corrmatgen() { RDInfoTheory::corrmat_wrap::wrap(); }
--- a/Code/ML/InfoTheory/Wrap/InfoBitRanker.cpp
+++ b/Code/ML/InfoTheory/Wrap/InfoBitRanker.cpp
@@ -22,161 +22,171 @@
 namespace python = boost::python;

 namespace RDInfoTheory {
-  
-  PyObject *getTopNbits(InfoBitRanker *ranker, int num){// int ignoreNoClass=-1) {
-    double *dres = ranker->getTopN(num);
-    npy_intp dims[2];
-    dims[0] = num;
-    dims[1] = ranker->getNumClasses() + 2;
-    PyArrayObject *res = (PyArrayObject *)PyArray_SimpleNew(2,dims,NPY_DOUBLE);
-    memcpy(static_cast<void *>(res->data),
-           static_cast<void *>(dres), dims[0]*dims[1]*sizeof(double));
-    return PyArray_Return(res);
-  }

-  void AccumulateVotes(InfoBitRanker *ranker, python::object bitVect, int label) {
-    python::extract<ExplicitBitVect> ebvWorks(bitVect);
-    python::extract<SparseBitVect> sbvWorks(bitVect);
-    if (ebvWorks.check()) {
-      ExplicitBitVect ev = python::extract<ExplicitBitVect>(bitVect);
-      ranker->accumulateVotes(ev, label);
-    }
-    else if (sbvWorks.check()) {
-      SparseBitVect sv = python::extract<SparseBitVect>(bitVect);
-      ranker->accumulateVotes(sv, label);
-    }
-    else {
-      throw_value_error("Accumulate Vote can only take a explicitBitVects or SparseBitvects");
-    }
-  }
-  
-  void SetBiasList(InfoBitRanker *ranker, python::object classList) {
-    RDKit::INT_VECT cList;
-    PySequenceHolder<int> bList(classList);
-    cList.reserve(bList.size());
-    for (unsigned int i = 0; i < bList.size(); i++) {
-      cList.push_back(bList[i]);
-    }
-    ranker->setBiasList(cList);
-  }
+PyObject *getTopNbits(InfoBitRanker *ranker,
+                      int num) {  // int ignoreNoClass=-1) {
+  double *dres = ranker->getTopN(num);
+  npy_intp dims[2];
+  dims[0] = num;
+  dims[1] = ranker->getNumClasses() + 2;
+  PyArrayObject *res = (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_DOUBLE);
+  memcpy(static_cast<void *>(res->data), static_cast<void *>(dres),
+         dims[0] * dims[1] * sizeof(double));
+  return PyArray_Return(res);
+}

-  void SetMaskBits(InfoBitRanker *ranker, python::object maskBits) {
-    RDKit::INT_VECT cList;
-    PySequenceHolder<int> bList(maskBits);
-    cList.reserve(bList.size());
-    for (unsigned int i = 0; i < bList.size(); i++) {
-      cList.push_back(bList[i]);
-    }
-    ranker->setMaskBits(cList);
+void AccumulateVotes(InfoBitRanker *ranker, python::object bitVect, int label) {
+  python::extract<ExplicitBitVect> ebvWorks(bitVect);
+  python::extract<SparseBitVect> sbvWorks(bitVect);
+  if (ebvWorks.check()) {
+    ExplicitBitVect ev = python::extract<ExplicitBitVect>(bitVect);
+    ranker->accumulateVotes(ev, label);
+  } else if (sbvWorks.check()) {
+    SparseBitVect sv = python::extract<SparseBitVect>(bitVect);
+    ranker->accumulateVotes(sv, label);
+  } else {
+    throw_value_error(
+        "Accumulate Vote can only take a explicitBitVects or SparseBitvects");
  }
+}

-  void tester(InfoBitRanker *ranker, python::object bitVect) {
-    RDUNUSED_PARAM(ranker);
-    python::extract<SparseBitVect> sbvWorks(bitVect);
-    if (sbvWorks.check()){
-      SparseBitVect sv = python::extract<SparseBitVect>(bitVect);
-      std::cout << "Num of on bits: " << sv.getNumOnBits() << "\n";
-    }
+void SetBiasList(InfoBitRanker *ranker, python::object classList) {
+  RDKit::INT_VECT cList;
+  PySequenceHolder<int> bList(classList);
+  cList.reserve(bList.size());
+  for (unsigned int i = 0; i < bList.size(); i++) {
+    cList.push_back(bList[i]);
  }
+  ranker->setBiasList(cList);
+}

-  struct ranker_wrap {
-    static void wrap() {
-      std::string docString = "A class to rank the bits from a series of labelled fingerprints\n"
-	"A simple demonstration may help clarify what this class does. \n"
-	"Here's a small set of vectors:\n"
-	">>> for i,bv in enumerate(bvs): print bv.ToBitString(),acts[i]\n"
-	"... \n"
-	"0001 0\n"
-	"0101 0\n"
-	"0010 1\n"
-	"1110 1\n"
-	"\n"
-	"Default ranker, using infogain:\n"
-	">>> ranker = InfoBitRanker(4,2)  \n"
-	">>> for i,bv in enumerate(bvs): ranker.AccumulateVotes(bv,acts[i])\n"
-	"... \n"
-	">>> for bit,gain,n0,n1 in ranker.GetTopN(3): print int(bit),'%.3f'%gain,int(n0),int(n1)\n"
-	"... \n"
-	"3 1.000 2 0\n"
-	"2 1.000 0 2\n"
-	"0 0.311 0 1\n"
-	"\n"
-	"Using the biased infogain:\n"
-	">>> ranker = InfoBitRanker(4,2,InfoTheory.InfoType.BIASENTROPY)\n"
-	">>> ranker.SetBiasList((1,))\n"
-	">>> for i,bv in enumerate(bvs): ranker.AccumulateVotes(bv,acts[i])\n"
-	"... \n"
-	">>> for bit,gain,n0,n1 in ranker.GetTopN(3): print int(bit),'%.3f'%gain,int(n0),int(n1)\n"
-	"... \n"
-	"2 1.000 0 2\n"
-	"0 0.311 0 1\n"
-	"1 0.000 1 1\n"
-	"\n"
-	"A chi squared ranker is also available:\n"
-	">>> ranker = InfoBitRanker(4,2,InfoTheory.InfoType.CHISQUARE)\n"
-	">>> for i,bv in enumerate(bvs): ranker.AccumulateVotes(bv,acts[i])\n"
-	"... \n"
-	">>> for bit,gain,n0,n1 in ranker.GetTopN(3): print int(bit),'%.3f'%gain,int(n0),int(n1)\n"
-	"... \n"
-	"3 4.000 2 0\n"
-	"2 4.000 0 2\n"
-	"0 1.333 0 1\n"
-	"\n"
-	"As is a biased chi squared:\n"
-	">>> ranker = InfoBitRanker(4,2,InfoTheory.InfoType.BIASCHISQUARE)\n"
-	">>> ranker.SetBiasList((1,))\n"
-	">>> for i,bv in enumerate(bvs): ranker.AccumulateVotes(bv,acts[i])\n"
-	"... \n"
-	">>> for bit,gain,n0,n1 in ranker.GetTopN(3): print int(bit),'%.3f'%gain,int(n0),int(n1)\n"
-	"... \n"
-	"2 4.000 0 2\n"
-	"0 1.333 0 1\n"
-	"1 0.000 1 1\n";
+void SetMaskBits(InfoBitRanker *ranker, python::object maskBits) {
+  RDKit::INT_VECT cList;
+  PySequenceHolder<int> bList(maskBits);
+  cList.reserve(bList.size());
+  for (unsigned int i = 0; i < bList.size(); i++) {
+    cList.push_back(bList[i]);
+  }
+  ranker->setMaskBits(cList);
+}

-      python::class_<InfoBitRanker>("InfoBitRanker",
-                                    docString.c_str(),
-                                    python::init<int, int>(python::args("nBits", "nClasses")))
-        .def(python::init<int, int, InfoBitRanker::InfoType>
-                                    (python::args("nBits", "nClasses", "infoType")))
+void tester(InfoBitRanker *ranker, python::object bitVect) {
+  RDUNUSED_PARAM(ranker);
+  python::extract<SparseBitVect> sbvWorks(bitVect);
+  if (sbvWorks.check()) {
+    SparseBitVect sv = python::extract<SparseBitVect>(bitVect);
+    std::cout << "Num of on bits: " << sv.getNumOnBits() << "\n";
+  }
+}
+
+struct ranker_wrap {
+  static void wrap() {
+    std::string docString =
+        "A class to rank the bits from a series of labelled fingerprints\n"
+        "A simple demonstration may help clarify what this class does. \n"
+        "Here's a small set of vectors:\n"
+        ">>> for i,bv in enumerate(bvs): print bv.ToBitString(),acts[i]\n"
+        "... \n"
+        "0001 0\n"
+        "0101 0\n"
+        "0010 1\n"
+        "1110 1\n"
+        "\n"
+        "Default ranker, using infogain:\n"
+        ">>> ranker = InfoBitRanker(4,2)  \n"
+        ">>> for i,bv in enumerate(bvs): ranker.AccumulateVotes(bv,acts[i])\n"
+        "... \n"
+        ">>> for bit,gain,n0,n1 in ranker.GetTopN(3): print "
+        "int(bit),'%.3f'%gain,int(n0),int(n1)\n"
+        "... \n"
+        "3 1.000 2 0\n"
+        "2 1.000 0 2\n"
+        "0 0.311 0 1\n"
+        "\n"
+        "Using the biased infogain:\n"
+        ">>> ranker = InfoBitRanker(4,2,InfoTheory.InfoType.BIASENTROPY)\n"
+        ">>> ranker.SetBiasList((1,))\n"
+        ">>> for i,bv in enumerate(bvs): ranker.AccumulateVotes(bv,acts[i])\n"
+        "... \n"
+        ">>> for bit,gain,n0,n1 in ranker.GetTopN(3): print "
+        "int(bit),'%.3f'%gain,int(n0),int(n1)\n"
+        "... \n"
+        "2 1.000 0 2\n"
+        "0 0.311 0 1\n"
+        "1 0.000 1 1\n"
+        "\n"
+        "A chi squared ranker is also available:\n"
+        ">>> ranker = InfoBitRanker(4,2,InfoTheory.InfoType.CHISQUARE)\n"
+        ">>> for i,bv in enumerate(bvs): ranker.AccumulateVotes(bv,acts[i])\n"
+        "... \n"
+        ">>> for bit,gain,n0,n1 in ranker.GetTopN(3): print "
+        "int(bit),'%.3f'%gain,int(n0),int(n1)\n"
+        "... \n"
+        "3 4.000 2 0\n"
+        "2 4.000 0 2\n"
+        "0 1.333 0 1\n"
+        "\n"
+        "As is a biased chi squared:\n"
+        ">>> ranker = InfoBitRanker(4,2,InfoTheory.InfoType.BIASCHISQUARE)\n"
+        ">>> ranker.SetBiasList((1,))\n"
+        ">>> for i,bv in enumerate(bvs): ranker.AccumulateVotes(bv,acts[i])\n"
+        "... \n"
+        ">>> for bit,gain,n0,n1 in ranker.GetTopN(3): print "
+        "int(bit),'%.3f'%gain,int(n0),int(n1)\n"
+        "... \n"
+        "2 4.000 0 2\n"
+        "0 1.333 0 1\n"
+        "1 0.000 1 1\n";
+
+    python::class_<InfoBitRanker>(
+        "InfoBitRanker", docString.c_str(),
+        python::init<int, int>(python::args("nBits", "nClasses")))
+        .def(python::init<int, int, InfoBitRanker::InfoType>(
+            python::args("nBits", "nClasses", "infoType")))
        .def("AccumulateVotes", AccumulateVotes,
-             "Accumulate the votes for all the bits turned on in a bit vector\n\n"
+             "Accumulate the votes for all the bits turned on in a bit "
+             "vector\n\n"
             "ARGUMENTS:\n\n"
-             "  - bv : bit vector either ExplicitBitVect or SparseBitVect operator\n"
-             "  - label : the class label for the bit vector. It is assumed that 0 <= class < nClasses \n")
-        .def ("SetBiasList", SetBiasList,
-              "Set the classes to which the entropy calculation should be biased\n\n"
-              "This list contains a set of class ids used when in the BIASENTROPY mode of ranking bits. \n"
-              "In this mode, a bit must be correlated higher with one of the biased classes than all the \n"
-              "other classes. For example, in a two class problem with actives and inactives, the fraction of \n"
-              "actives that hit the bit has to be greater than the fraction of inactives that hit the bit\n\n"
-              "ARGUMENTS: \n\n"
-              "  - classList : list of class ids that we want a bias towards\n")
-        .def ("SetMaskBits", SetMaskBits,
-              "Set the mask bits for the calculation\n\n"
-              "ARGUMENTS: \n\n"
-              "  - maskBits : list of mask bits to use\n")
+             "  - bv : bit vector either ExplicitBitVect or SparseBitVect "
+             "operator\n"
+             "  - label : the class label for the bit vector. It is assumed "
+             "that 0 <= class < nClasses \n")
+        .def("SetBiasList", SetBiasList,
+             "Set the classes to which the entropy calculation should be "
+             "biased\n\n"
+             "This list contains a set of class ids used when in the "
+             "BIASENTROPY mode of ranking bits. \n"
+             "In this mode, a bit must be correlated higher with one of the "
+             "biased classes than all the \n"
+             "other classes. For example, in a two class problem with actives "
+             "and inactives, the fraction of \n"
+             "actives that hit the bit has to be greater than the fraction of "
+             "inactives that hit the bit\n\n"
+             "ARGUMENTS: \n\n"
+             "  - classList : list of class ids that we want a bias towards\n")
+        .def("SetMaskBits", SetMaskBits,
+             "Set the mask bits for the calculation\n\n"
+             "ARGUMENTS: \n\n"
+             "  - maskBits : list of mask bits to use\n")
        .def("GetTopN", getTopNbits,
             "Returns the top n bits ranked by the information metric\n"
-             "This is actually the function where most of the work of ranking is happening\n\n"
+             "This is actually the function where most of the work of ranking "
+             "is happening\n\n"
             "ARGUMENTS:\n\n"
             "  - num : the number of top ranked bits that are required\n")
        .def("WriteTopBitsToFile", &InfoBitRanker::writeTopBitsToFile,
             "Write the bits that have been ranked to a file")
-        .def("Tester", tester)
-        ;
-      
-      python::enum_<InfoBitRanker::InfoType>("InfoType")
+        .def("Tester", tester);
+
+    python::enum_<InfoBitRanker::InfoType>("InfoType")
        .value("ENTROPY", InfoBitRanker::ENTROPY)
        .value("BIASENTROPY", InfoBitRanker::BIASENTROPY)
        .value("CHISQUARE", InfoBitRanker::CHISQUARE)
        .value("BIASCHISQUARE", InfoBitRanker::BIASCHISQUARE)
        .export_values();
-        ;
-    };
+    ;
  };
+};
 }

-void wrap_ranker() {
-  RDInfoTheory::ranker_wrap::wrap();
-}
-
+void wrap_ranker() { RDInfoTheory::ranker_wrap::wrap(); }
--- a/Code/ML/InfoTheory/Wrap/rdInfoTheory.cpp
+++ b/Code/ML/InfoTheory/Wrap/rdInfoTheory.cpp
@@ -18,126 +18,127 @@ namespace python = boost::python;
 using namespace RDInfoTheory;

 namespace RDInfoTheory {
-  double infoEntropy(python::object resArr) {
-    PyObject *matObj = resArr.ptr();
-    if (!PyArray_Check(matObj)) {
-      throw_value_error("Expecting a Numeric array object");
-    }
-    PyArrayObject *copy;
-    copy = (PyArrayObject *)PyArray_ContiguousFromObject(matObj, 
-                                                         ((PyArrayObject *)matObj)->descr->type_num,
-                                                         1,1);
-    double res=0.0;
-    // we are expecting a 1 dimensional array
-    long int ncols = (long int)((PyArrayObject *)matObj)->dimensions[0];
-    CHECK_INVARIANT(ncols > 0, "");
-    if (((PyArrayObject *)matObj)->descr->type_num == PyArray_DOUBLE) {
-      double *data = (double *)copy->data;
-      res = InfoEntropy(data, ncols);
-    } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_FLOAT) {
-      float *data = (float *)copy->data;
-      res = InfoEntropy(data, ncols);
-    } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_INT) {
-      int *data = (int *)copy->data;
-      res = InfoEntropy(data, ncols);
-    } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_LONG) {
-      long int *data = (long int *)copy->data;
-      res = InfoEntropy(data, ncols);
-    }
-    Py_DECREF(copy);
-    return res;
+double infoEntropy(python::object resArr) {
+  PyObject *matObj = resArr.ptr();
+  if (!PyArray_Check(matObj)) {
+    throw_value_error("Expecting a Numeric array object");
  }
-   
-  double infoGain(python::object resArr) {
-    PyObject *matObj = resArr.ptr();
-    if (!PyArray_Check(matObj)) {
-      throw_value_error("Expecting a Numeric array object");
-    }
-    PyArrayObject *copy;
-    copy = (PyArrayObject *)PyArray_ContiguousFromObject(matObj, 
-                                                         ((PyArrayObject *)matObj)->descr->type_num,
-                                                         2,2);
-    long int rows = (long int)((PyArrayObject *)matObj)->dimensions[0];
-    long int cols = (long int)((PyArrayObject *)matObj)->dimensions[1];
-    double res=0.0;
-    if (((PyArrayObject *)matObj)->descr->type_num == PyArray_DOUBLE) {
-      double *data = (double *)copy->data;
-      res = InfoEntropyGain(data, rows, cols);
-    } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_FLOAT) {
-      float *data = (float *)copy->data;
-      res = InfoEntropyGain(data, rows, cols);
-    } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_INT) {
-      int *data = (int *)copy->data;
-      res = InfoEntropyGain(data, rows, cols);
-    } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_LONG) {
-      long int *data = (long int *)copy->data;
-      res = InfoEntropyGain(data, rows, cols);
-    } else {
-      throw_value_error("Numeric array object of type int or long or float or double");
-    }
-    Py_DECREF(copy);
-    return res;
+  PyArrayObject *copy;
+  copy = (PyArrayObject *)PyArray_ContiguousFromObject(
+      matObj, ((PyArrayObject *)matObj)->descr->type_num, 1, 1);
+  double res = 0.0;
+  // we are expecting a 1 dimensional array
+  long int ncols = (long int)((PyArrayObject *)matObj)->dimensions[0];
+  CHECK_INVARIANT(ncols > 0, "");
+  if (((PyArrayObject *)matObj)->descr->type_num == PyArray_DOUBLE) {
+    double *data = (double *)copy->data;
+    res = InfoEntropy(data, ncols);
+  } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_FLOAT) {
+    float *data = (float *)copy->data;
+    res = InfoEntropy(data, ncols);
+  } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_INT) {
+    int *data = (int *)copy->data;
+    res = InfoEntropy(data, ncols);
+  } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_LONG) {
+    long int *data = (long int *)copy->data;
+    res = InfoEntropy(data, ncols);
  }
+  Py_DECREF(copy);
+  return res;
+}

-  double chiSquare(python::object resArr) {
-    PyObject *matObj = resArr.ptr();
-    if (!PyArray_Check(matObj)) {
-      throw_value_error("Expecting a Numeric array object");
-    }
-    PyArrayObject *copy;
-    copy = (PyArrayObject *)PyArray_ContiguousFromObject(matObj, 
-                                                         ((PyArrayObject *)matObj)->descr->type_num,
-                                                         2,2);
-    long int rows = (long int)((PyArrayObject *)matObj)->dimensions[0];
-    long int cols = (long int)((PyArrayObject *)matObj)->dimensions[1];
-    double res=0.0;
-    if (((PyArrayObject *)matObj)->descr->type_num == PyArray_DOUBLE) {
-      double *data = (double *)copy->data;
-      res = ChiSquare(data, rows, cols);
-    } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_FLOAT) {
-      float *data = (float *)copy->data;
-      res = ChiSquare(data, rows, cols);
-    } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_INT) {
-      int *data = (int *)copy->data;
-      res = ChiSquare(data, rows, cols);
-    } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_LONG) {
-      long int *data = (long int *)copy->data;
-      res = ChiSquare(data, rows, cols);
-    } else {
-      throw_value_error("Numeric array object of type int or long or float or double");
-    }
-    Py_DECREF(copy);
-    return res;
+double infoGain(python::object resArr) {
+  PyObject *matObj = resArr.ptr();
+  if (!PyArray_Check(matObj)) {
+    throw_value_error("Expecting a Numeric array object");
  }
+  PyArrayObject *copy;
+  copy = (PyArrayObject *)PyArray_ContiguousFromObject(
+      matObj, ((PyArrayObject *)matObj)->descr->type_num, 2, 2);
+  long int rows = (long int)((PyArrayObject *)matObj)->dimensions[0];
+  long int cols = (long int)((PyArrayObject *)matObj)->dimensions[1];
+  double res = 0.0;
+  if (((PyArrayObject *)matObj)->descr->type_num == PyArray_DOUBLE) {
+    double *data = (double *)copy->data;
+    res = InfoEntropyGain(data, rows, cols);
+  } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_FLOAT) {
+    float *data = (float *)copy->data;
+    res = InfoEntropyGain(data, rows, cols);
+  } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_INT) {
+    int *data = (int *)copy->data;
+    res = InfoEntropyGain(data, rows, cols);
+  } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_LONG) {
+    long int *data = (long int *)copy->data;
+    res = InfoEntropyGain(data, rows, cols);
+  } else {
+    throw_value_error(
+        "Numeric array object of type int or long or float or double");
+  }
+  Py_DECREF(copy);
+  return res;
+}
+
+double chiSquare(python::object resArr) {
+  PyObject *matObj = resArr.ptr();
+  if (!PyArray_Check(matObj)) {
+    throw_value_error("Expecting a Numeric array object");
+  }
+  PyArrayObject *copy;
+  copy = (PyArrayObject *)PyArray_ContiguousFromObject(
+      matObj, ((PyArrayObject *)matObj)->descr->type_num, 2, 2);
+  long int rows = (long int)((PyArrayObject *)matObj)->dimensions[0];
+  long int cols = (long int)((PyArrayObject *)matObj)->dimensions[1];
+  double res = 0.0;
+  if (((PyArrayObject *)matObj)->descr->type_num == PyArray_DOUBLE) {
+    double *data = (double *)copy->data;
+    res = ChiSquare(data, rows, cols);
+  } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_FLOAT) {
+    float *data = (float *)copy->data;
+    res = ChiSquare(data, rows, cols);
+  } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_INT) {
+    int *data = (int *)copy->data;
+    res = ChiSquare(data, rows, cols);
+  } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_LONG) {
+    long int *data = (long int *)copy->data;
+    res = ChiSquare(data, rows, cols);
+  } else {
+    throw_value_error(
+        "Numeric array object of type int or long or float or double");
+  }
+  Py_DECREF(copy);
+  return res;
+}
 }

 void wrap_ranker();
 void wrap_corrmatgen();

-BOOST_PYTHON_MODULE(rdInfoTheory)
-{
+BOOST_PYTHON_MODULE(rdInfoTheory) {
  python::scope().attr("__doc__") =
-    "Module containing bunch of functions for information metrics and a ranker to rank bits"
-    ;
-  
+      "Module containing bunch of functions for information metrics and a "
+      "ranker to rank bits";
+
  rdkit_import_array();
-  python::register_exception_translator<IndexErrorException>(&translate_index_error);
-  python::register_exception_translator<ValueErrorException>(&translate_value_error);
+  python::register_exception_translator<IndexErrorException>(
+      &translate_index_error);
+  python::register_exception_translator<ValueErrorException>(
+      &translate_value_error);

  wrap_ranker();
  wrap_corrmatgen();

-  std::string docString="calculates the informational entropy of the values in an array\n\n\
+  std::string docString =
+      "calculates the informational entropy of the values in an array\n\n\
  ARGUMENTS:\n\
    \n\
    - resMat: pointer to a long int array containing the data\n\
    - dim: long int containing the length of the _tPtr_ array.\n\n\
  RETURNS:\n\n\
    a double\n";
-  python::def("InfoEntropy", RDInfoTheory::infoEntropy,
-              docString.c_str());
+  python::def("InfoEntropy", RDInfoTheory::infoEntropy, docString.c_str());

-  docString="Calculates the information gain for a variable\n\n\
+  docString =
+      "Calculates the information gain for a variable\n\n\
   ARGUMENTS:\n\n\
     - varMat: a Numeric Array object\n\
       varMat is a Numeric array with the number of possible occurances\n\
@@ -148,11 +149,10 @@ BOOST_PYTHON_MODULE(rdInfoTheory)
     - a Python float object\n\n\
   NOTES\n\n\
     - this is a dropin replacement for _PyInfoGain()_ in entropy.py\n";
-  python::def("InfoGain", RDInfoTheory::infoGain,
-              docString.c_str());
+  python::def("InfoGain", RDInfoTheory::infoGain, docString.c_str());

-
-  docString="Calculates the chi squared value for a variable\n\n\
+  docString =
+      "Calculates the chi squared value for a variable\n\n\
   ARGUMENTS:\n\n\
     - varMat: a Numeric Array object\n\
       varMat is a Numeric array with the number of possible occurances\n\
@@ -161,8 +161,5 @@ BOOST_PYTHON_MODULE(rdInfoTheory)
         has 3 possible values, varMat would be 4x3\n\n\
   RETURNS:\n\n\
     - a Python float object\n";
-  python::def("ChiSquare", RDInfoTheory::chiSquare,
-              docString.c_str());
-
+  python::def("ChiSquare", RDInfoTheory::chiSquare, docString.c_str());
 }
-