Files
rdkit/Code/DataStructs/Utils.cpp

218 lines
8.2 KiB
C++
Executable File

// $Id$
//
// Copyright (c) 2002-2006 greg Landrum, Rational Discovery LLC
//
// @@ All Rights Reserved @@
//
#include "BitVects.h"
#include "BitVectUtils.h"
#include <RDGeneral/Invariant.h>
#include <iostream>
//! Convert a SparseBitVector to an ExplicitBitVector
ExplicitBitVect *convertToExplicit(const SparseBitVect *sbv) {
unsigned int sl = sbv->GetNumBits();
ExplicitBitVect *ebv = new ExplicitBitVect(sl);
const IntSet *bset = sbv->GetBitSet();
for (IntSetConstIter it = bset->begin(); it != bset->end(); it++) {
ebv->SetBit(*it);
}
return ebv;
}
void a2b(const char *,char *);
//! \brief Construct a BitVect from the ASCII representation of a
//! Daylight fingerprint string
template <typename T>
void FromDaylightString(T &sbv,std::string s)
{
sbv.ClearBits();
int length = s.length();
int nBits;
if(s[length-1] == '\n') length -= 1;
// 4 bytes in the ascii correspond to 3 bytes in the binary
// plus there's one extra ascii byte for the pad marker
length -= 1;
nBits = (3*length/4)*8;
switch(s[length]){
case '1': nBits -= 16;break;
case '2': nBits -= 8;break;
case '3': break;
default: throw "ValueError bad daylight fingerprint string";
}
int i=0,nBitsDone=0;
while(i < length){
char bytes[3];
a2b(s.c_str()+i,bytes);
for(int j=0;j<3 && nBitsDone < nBits;j++){
unsigned char query=0x80;
for(int k=0;k<8;k++) {
if(bytes[j]&query){
sbv.SetBit(nBitsDone);
}
query >>= 1;
nBitsDone++;
}
}
i += 4;
}
}
template void FromDaylightString(SparseBitVect &sbv,std::string s);
template void FromDaylightString(ExplicitBitVect &sbv,std::string s);
//! \brief Construct a BitVect from the ASCII representation of a
//! BitString
template <typename T>
void FromBitString(T &sbv,const std::string &s)
{
PRECONDITION(s.length()<=sbv.GetNumBits(),"bad bitvect length");
sbv.ClearBits();
for(unsigned int i=0;i<sbv.GetNumBits();++i){
if(s[i]=='1') sbv.SetBit(i);
}
}
template void FromBitString(SparseBitVect &sbv,const std::string &s);
template void FromBitString(ExplicitBitVect &sbv,const std::string &s);
//! converts 4 ascii bytes at a4 to 3 binary bytes
/*!
THE FOLLOWING IS TAKEN FROM THE DAYLIGHT CONTRIB PROGRAM
ascii2bits.c
*********************************************************************
*** a2b - converts 4 ascii bytes at a4 to 3 binary
*** bytes at b3.
***
*** ASCII: |=======+=======+=======+=======| etc.
*** ^
*** becomes... 3 <-> 4
*** v
*** BINARY: |=====+=====+=====+=====| etc.
********************************************************************
*/
void a2b(const char *a4, char *b3)
{
int i;
char byte=0x00, b=0x00;
/*********************************************
*** Use the Daylight mapping to convert each
*** ascii char to its 6-bit code.
***
*** a4: |xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx (printable)
*** |=======+=======+=======+=======|
*** becomes...
*** a4: |00xxxxxx00xxxxxx00xxxxxx00xxxxxx
*** |=======+=======+=======+=======|
*********************************************/
for (i = 0; i < 4; ++i) {
switch (a4[i]) {
case '.': byte = 0x00; break; /* 00 = __000000 */
case '+': byte = 0x01; break; /* 01 = __000001 */
case '0': byte = 0x02; break; /* 02 = __000010 */
case '1': byte = 0x03; break; /* 03 = __000011 */
case '2': byte = 0x04; break; /* 04 = __000100 */
case '3': byte = 0x05; break; /* 05 = __000101 */
case '4': byte = 0x06; break; /* 06 = __000110 */
case '5': byte = 0x07; break; /* 07 = __000111 */
case '6': byte = 0x08; break; /* 08 = __001000 */
case '7': byte = 0x09; break; /* 09 = __001001 */
case '8': byte = 0x0a; break; /* 10 = __001010 */
case '9': byte = 0x0b; break; /* 11 = __001011 */
case 'A': byte = 0x0c; break; /* 12 = __001100 */
case 'B': byte = 0x0d; break; /* 13 = __001101 */
case 'C': byte = 0x0e; break; /* 14 = __001110 */
case 'D': byte = 0x0f; break; /* 15 = __001111 */
case 'E': byte = 0x10; break; /* 16 = __010000 */
case 'F': byte = 0x11; break; /* 17 = __010001 */
case 'G': byte = 0x12; break; /* 18 = __010010 */
case 'H': byte = 0x13; break; /* 19 = __010011 */
case 'I': byte = 0x14; break; /* 20 = __010100 */
case 'J': byte = 0x15; break; /* 21 = __010101 */
case 'K': byte = 0x16; break; /* 22 = __010110 */
case 'L': byte = 0x17; break; /* 23 = __010111 */
case 'M': byte = 0x18; break; /* 24 = __011000 */
case 'N': byte = 0x19; break; /* 25 = __011001 */
case 'O': byte = 0x1a; break; /* 26 = __011010 */
case 'P': byte = 0x1b; break; /* 27 = __011011 */
case 'Q': byte = 0x1c; break; /* 28 = __011100 */
case 'R': byte = 0x1d; break; /* 29 = __011101 */
case 'S': byte = 0x1e; break; /* 30 = __011110 */
case 'T': byte = 0x1f; break; /* 31 = __011111 */
case 'U': byte = 0x20; break; /* 32 = __100000 */
case 'V': byte = 0x21; break; /* 33 = __100001 */
case 'W': byte = 0x22; break; /* 34 = __100010 */
case 'X': byte = 0x23; break; /* 35 = __100011 */
case 'Y': byte = 0x24; break; /* 36 = __100100 */
case 'Z': byte = 0x25; break; /* 37 = __100101 */
case 'a': byte = 0x26; break; /* 38 = __100110 */
case 'b': byte = 0x27; break; /* 39 = __100111 */
case 'c': byte = 0x28; break; /* 40 = __101000 */
case 'd': byte = 0x29; break; /* 41 = __101001 */
case 'e': byte = 0x2a; break; /* 42 = __101010 */
case 'f': byte = 0x2b; break; /* 43 = __101011 */
case 'g': byte = 0x2c; break; /* 44 = __101100 */
case 'h': byte = 0x2d; break; /* 45 = __101101 */
case 'i': byte = 0x2e; break; /* 46 = __101110 */
case 'j': byte = 0x2f; break; /* 47 = __101111 */
case 'k': byte = 0x30; break; /* 48 = __110000 */
case 'l': byte = 0x31; break; /* 49 = __110001 */
case 'm': byte = 0x32; break; /* 50 = __110010 */
case 'n': byte = 0x33; break; /* 51 = __110011 */
case 'o': byte = 0x34; break; /* 52 = __110100 */
case 'p': byte = 0x35; break; /* 53 = __110101 */
case 'q': byte = 0x36; break; /* 54 = __110110 */
case 'r': byte = 0x37; break; /* 55 = __110111 */
case 's': byte = 0x38; break; /* 56 = __111000 */
case 't': byte = 0x39; break; /* 57 = __111001 */
case 'u': byte = 0x3a; break; /* 58 = __111010 */
case 'v': byte = 0x3b; break; /* 59 = __111011 */
case 'w': byte = 0x3c; break; /* 60 = __111100 */
case 'x': byte = 0x3d; break; /* 61 = __111101 */
case 'y': byte = 0x3e; break; /* 62 = __111110 */
case 'z': byte = 0x3f; break; /* 63 = __111111 */
}
/*********************************************
*** Now copy the 4x6=24 bits from a4 to b3.
***
*** a4: |--000000--111111--222222--333333
*** |=======+=======+=======+=======|
***
*** b3: |000000111111222222333333
*** |=====+=====+=====+=====|
*********************************************/
if (i == 0)
b3[0] = (byte << 2); /*** 6 bits into 1st byte ***/
else if (i == 1) {
b3[0] |= ((b = byte) >> 4); /*** 2 bits into 1st byte ***/
b3[1] = ((b = byte) << 4); /*** 4 bits into 2nd byte ***/
} else if (i == 2) {
b3[1] |= ((b = byte) >> 2); /*** 4 bits into 2nd byte ***/
b3[2] = ((b = byte) << 6); /*** 2 bits into 3rd byte ***/
} else if (i == 3)
b3[2] |= byte; /*** 6 bits into 3rd byte ***/
}
return;
}
// Demo Data:
// 256 bits:
//.b7HEa..ccc+gWEIr89.8lV8gOF3aXFFR.+Ps.mZ6lg.2
//
// 00000010 01110010 01010011 01000010 01100000
// 00000000 10100010 10001010 00000001 10110010
// 00100100 00010100 11011100 10100010 11000000
// 00101011 00011000 01001010 10110001 10100100
// 01000101 10011010 00110100 01010001 01110100
// 00000000 01011011 11100000 00001100 10100101
// 00100011 00011011