mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
222 lines
8.4 KiB
C++
Executable File
222 lines
8.4 KiB
C++
Executable File
// $Id$
|
|
//
|
|
// Copyright (c) 2002-20`0 greg Landrum, Rational Discovery LLC
|
|
//
|
|
// @@ All Rights Reserved @@
|
|
// This file is part of the RDKit.
|
|
// The contents are covered by the terms of the BSD license
|
|
// which is included in the file license.txt, found at the root
|
|
// of the RDKit source tree.
|
|
//
|
|
#include "BitVects.h"
|
|
#include "BitVectUtils.h"
|
|
#include <RDGeneral/Invariant.h>
|
|
#include <iostream>
|
|
|
|
//! Convert a SparseBitVector to an ExplicitBitVector
|
|
ExplicitBitVect *convertToExplicit(const SparseBitVect *sbv) {
|
|
unsigned int sl = sbv->getNumBits();
|
|
ExplicitBitVect *ebv = new ExplicitBitVect(sl);
|
|
const IntSet *bset = sbv->getBitSet();
|
|
for (IntSetConstIter it = bset->begin(); it != bset->end(); it++) {
|
|
ebv->setBit(*it);
|
|
}
|
|
return ebv;
|
|
}
|
|
|
|
void a2b(const char *,char *);
|
|
|
|
//! \brief Construct a BitVect from the ASCII representation of a
|
|
//! Daylight fingerprint string
|
|
template <typename T>
|
|
void FromDaylightString(T &sbv,std::string s)
|
|
{
|
|
sbv.clearBits();
|
|
int length = s.length();
|
|
int nBits;
|
|
|
|
if(s[length-1] == '\n') length -= 1;
|
|
|
|
// 4 bytes in the ascii correspond to 3 bytes in the binary
|
|
// plus there's one extra ascii byte for the pad marker
|
|
length -= 1;
|
|
nBits = (3*length/4)*8;
|
|
|
|
switch(s[length]){
|
|
case '1': nBits -= 16;break;
|
|
case '2': nBits -= 8;break;
|
|
case '3': break;
|
|
default: throw "ValueError bad daylight fingerprint string";
|
|
}
|
|
int i=0,nBitsDone=0;
|
|
while(i < length){
|
|
char bytes[3];
|
|
a2b(s.c_str()+i,bytes);
|
|
for(int j=0;j<3 && nBitsDone < nBits;j++){
|
|
unsigned char query=0x80;
|
|
for(int k=0;k<8;k++) {
|
|
if(bytes[j]&query){
|
|
sbv.setBit(nBitsDone);
|
|
}
|
|
query >>= 1;
|
|
nBitsDone++;
|
|
}
|
|
}
|
|
i += 4;
|
|
}
|
|
}
|
|
|
|
template void FromDaylightString(SparseBitVect &sbv,std::string s);
|
|
template void FromDaylightString(ExplicitBitVect &sbv,std::string s);
|
|
|
|
//! \brief Construct a BitVect from the ASCII representation of a
|
|
//! BitString
|
|
template <typename T>
|
|
void FromBitString(T &sbv,const std::string &s)
|
|
{
|
|
PRECONDITION(s.length()<=sbv.getNumBits(),"bad bitvect length");
|
|
sbv.clearBits();
|
|
for(unsigned int i=0;i<sbv.getNumBits();++i){
|
|
if(s[i]=='1') sbv.setBit(i);
|
|
}
|
|
}
|
|
|
|
template void FromBitString(SparseBitVect &sbv,const std::string &s);
|
|
template void FromBitString(ExplicitBitVect &sbv,const std::string &s);
|
|
|
|
|
|
//! converts 4 ascii bytes at a4 to 3 binary bytes
|
|
/*!
|
|
THE FOLLOWING IS TAKEN FROM THE DAYLIGHT CONTRIB PROGRAM
|
|
ascii2bits.c
|
|
*********************************************************************
|
|
*** a2b - converts 4 ascii bytes at a4 to 3 binary
|
|
*** bytes at b3.
|
|
***
|
|
*** ASCII: |=======+=======+=======+=======| etc.
|
|
*** ^
|
|
*** becomes... 3 <-> 4
|
|
*** v
|
|
*** BINARY: |=====+=====+=====+=====| etc.
|
|
********************************************************************
|
|
*/
|
|
void a2b(const char *a4, char *b3)
|
|
{
|
|
int i;
|
|
char byte=0x00, b=0x00;
|
|
|
|
/*********************************************
|
|
*** Use the Daylight mapping to convert each
|
|
*** ascii char to its 6-bit code.
|
|
***
|
|
*** a4: |xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx (printable)
|
|
*** |=======+=======+=======+=======|
|
|
*** becomes...
|
|
*** a4: |00xxxxxx00xxxxxx00xxxxxx00xxxxxx
|
|
*** |=======+=======+=======+=======|
|
|
*********************************************/
|
|
for (i = 0; i < 4; ++i) {
|
|
switch (a4[i]) {
|
|
case '.': byte = 0x00; break; /* 00 = __000000 */
|
|
case '+': byte = 0x01; break; /* 01 = __000001 */
|
|
case '0': byte = 0x02; break; /* 02 = __000010 */
|
|
case '1': byte = 0x03; break; /* 03 = __000011 */
|
|
case '2': byte = 0x04; break; /* 04 = __000100 */
|
|
case '3': byte = 0x05; break; /* 05 = __000101 */
|
|
case '4': byte = 0x06; break; /* 06 = __000110 */
|
|
case '5': byte = 0x07; break; /* 07 = __000111 */
|
|
case '6': byte = 0x08; break; /* 08 = __001000 */
|
|
case '7': byte = 0x09; break; /* 09 = __001001 */
|
|
case '8': byte = 0x0a; break; /* 10 = __001010 */
|
|
case '9': byte = 0x0b; break; /* 11 = __001011 */
|
|
case 'A': byte = 0x0c; break; /* 12 = __001100 */
|
|
case 'B': byte = 0x0d; break; /* 13 = __001101 */
|
|
case 'C': byte = 0x0e; break; /* 14 = __001110 */
|
|
case 'D': byte = 0x0f; break; /* 15 = __001111 */
|
|
case 'E': byte = 0x10; break; /* 16 = __010000 */
|
|
case 'F': byte = 0x11; break; /* 17 = __010001 */
|
|
case 'G': byte = 0x12; break; /* 18 = __010010 */
|
|
case 'H': byte = 0x13; break; /* 19 = __010011 */
|
|
case 'I': byte = 0x14; break; /* 20 = __010100 */
|
|
case 'J': byte = 0x15; break; /* 21 = __010101 */
|
|
case 'K': byte = 0x16; break; /* 22 = __010110 */
|
|
case 'L': byte = 0x17; break; /* 23 = __010111 */
|
|
case 'M': byte = 0x18; break; /* 24 = __011000 */
|
|
case 'N': byte = 0x19; break; /* 25 = __011001 */
|
|
case 'O': byte = 0x1a; break; /* 26 = __011010 */
|
|
case 'P': byte = 0x1b; break; /* 27 = __011011 */
|
|
case 'Q': byte = 0x1c; break; /* 28 = __011100 */
|
|
case 'R': byte = 0x1d; break; /* 29 = __011101 */
|
|
case 'S': byte = 0x1e; break; /* 30 = __011110 */
|
|
case 'T': byte = 0x1f; break; /* 31 = __011111 */
|
|
case 'U': byte = 0x20; break; /* 32 = __100000 */
|
|
case 'V': byte = 0x21; break; /* 33 = __100001 */
|
|
case 'W': byte = 0x22; break; /* 34 = __100010 */
|
|
case 'X': byte = 0x23; break; /* 35 = __100011 */
|
|
case 'Y': byte = 0x24; break; /* 36 = __100100 */
|
|
case 'Z': byte = 0x25; break; /* 37 = __100101 */
|
|
case 'a': byte = 0x26; break; /* 38 = __100110 */
|
|
case 'b': byte = 0x27; break; /* 39 = __100111 */
|
|
case 'c': byte = 0x28; break; /* 40 = __101000 */
|
|
case 'd': byte = 0x29; break; /* 41 = __101001 */
|
|
case 'e': byte = 0x2a; break; /* 42 = __101010 */
|
|
case 'f': byte = 0x2b; break; /* 43 = __101011 */
|
|
case 'g': byte = 0x2c; break; /* 44 = __101100 */
|
|
case 'h': byte = 0x2d; break; /* 45 = __101101 */
|
|
case 'i': byte = 0x2e; break; /* 46 = __101110 */
|
|
case 'j': byte = 0x2f; break; /* 47 = __101111 */
|
|
case 'k': byte = 0x30; break; /* 48 = __110000 */
|
|
case 'l': byte = 0x31; break; /* 49 = __110001 */
|
|
case 'm': byte = 0x32; break; /* 50 = __110010 */
|
|
case 'n': byte = 0x33; break; /* 51 = __110011 */
|
|
case 'o': byte = 0x34; break; /* 52 = __110100 */
|
|
case 'p': byte = 0x35; break; /* 53 = __110101 */
|
|
case 'q': byte = 0x36; break; /* 54 = __110110 */
|
|
case 'r': byte = 0x37; break; /* 55 = __110111 */
|
|
case 's': byte = 0x38; break; /* 56 = __111000 */
|
|
case 't': byte = 0x39; break; /* 57 = __111001 */
|
|
case 'u': byte = 0x3a; break; /* 58 = __111010 */
|
|
case 'v': byte = 0x3b; break; /* 59 = __111011 */
|
|
case 'w': byte = 0x3c; break; /* 60 = __111100 */
|
|
case 'x': byte = 0x3d; break; /* 61 = __111101 */
|
|
case 'y': byte = 0x3e; break; /* 62 = __111110 */
|
|
case 'z': byte = 0x3f; break; /* 63 = __111111 */
|
|
}
|
|
|
|
/*********************************************
|
|
*** Now copy the 4x6=24 bits from a4 to b3.
|
|
***
|
|
*** a4: |--000000--111111--222222--333333
|
|
*** |=======+=======+=======+=======|
|
|
***
|
|
*** b3: |000000111111222222333333
|
|
*** |=====+=====+=====+=====|
|
|
*********************************************/
|
|
if (i == 0)
|
|
b3[0] = (byte << 2); /*** 6 bits into 1st byte ***/
|
|
else if (i == 1) {
|
|
b3[0] |= ((b = byte) >> 4); /*** 2 bits into 1st byte ***/
|
|
b3[1] = ((b = byte) << 4); /*** 4 bits into 2nd byte ***/
|
|
} else if (i == 2) {
|
|
b3[1] |= ((b = byte) >> 2); /*** 4 bits into 2nd byte ***/
|
|
b3[2] = ((b = byte) << 6); /*** 2 bits into 3rd byte ***/
|
|
} else if (i == 3)
|
|
b3[2] |= byte; /*** 6 bits into 3rd byte ***/
|
|
}
|
|
return;
|
|
}
|
|
|
|
|
|
// Demo Data:
|
|
// 256 bits:
|
|
//.b7HEa..ccc+gWEIr89.8lV8gOF3aXFFR.+Ps.mZ6lg.2
|
|
//
|
|
// 00000010 01110010 01010011 01000010 01100000
|
|
// 00000000 10100010 10001010 00000001 10110010
|
|
// 00100100 00010100 11011100 10100010 11000000
|
|
// 00101011 00011000 01001010 10110001 10100100
|
|
// 01000101 10011010 00110100 01010001 01110100
|
|
// 00000000 01011011 11100000 00001100 10100101
|
|
// 00100011 00011011
|
|
|