mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
528 lines
20 KiB
C++
528 lines
20 KiB
C++
//
|
||
// Copyright (c) 2015 Greg Landrum
|
||
//
|
||
// @@ All Rights Reserved @@
|
||
// This file is part of the RDKit.
|
||
// The contents are covered by the terms of the BSD license
|
||
// which is included in the file license.txt, found at the root
|
||
// of the RDKit source tree.
|
||
//
|
||
// Implementation details here are taken from the file fpb_io.py from chemfp
|
||
// (www.chemfp.org)
|
||
// Many thanks to Andrew Dalke for creating such great software and for
|
||
// helping explain the FPB implementation
|
||
|
||
#include <DataStructs/ExplicitBitVect.h>
|
||
#include <DataStructs/BitOps.h>
|
||
|
||
#include <RDGeneral/Invariant.h>
|
||
#include <RDGeneral/StreamOps.h>
|
||
#include <RDGeneral/Ranking.h>
|
||
#include "FPBReader.h"
|
||
#include <boost/scoped_ptr.hpp>
|
||
#include <boost/scoped_array.hpp>
|
||
|
||
namespace RDKit {
|
||
|
||
namespace detail {
|
||
const unsigned int magicSize = 8;
|
||
const std::string FPB_MAGIC("FPB1\r\n\0\0", 8);
|
||
const unsigned int tagNameSize = 4;
|
||
|
||
struct FPBReader_impl {
|
||
unsigned int len;
|
||
unsigned int nBits;
|
||
boost::uint32_t numBytesStoredPerFingerprint;
|
||
std::vector<boost::uint32_t> popCountOffsets;
|
||
const boost::uint8_t *dp_fpData; // do not free this
|
||
boost::scoped_array<boost::uint8_t> dp_arenaChunk;
|
||
boost::uint32_t num4ByteElements, num8ByteElements; // for finding ids
|
||
const boost::uint8_t *dp_idOffsets; // do not free this
|
||
boost::scoped_array<boost::uint8_t> dp_idChunk;
|
||
bool df_lazy; // read the fp data lazily. In this case we use fpDataOffset
|
||
// and seek instead of using dp_fpData
|
||
std::streampos fpDataOffset; // file offset from tellg
|
||
std::istream *istrm; // we don't own this, it's just used for the lazy reader
|
||
};
|
||
|
||
// the caller is responsible for calling delete[] on `data`
|
||
void readChunkDetails(std::istream &istrm, std::string &nm,
|
||
boost::uint64_t &sz) {
|
||
streamRead(istrm, sz);
|
||
char tag[tagNameSize + 1];
|
||
tag[tagNameSize] = 0;
|
||
istrm.read(tag, tagNameSize);
|
||
nm = tag;
|
||
}
|
||
void readChunkData(std::istream &istrm, boost::uint64_t &sz,
|
||
boost::uint8_t *&data) {
|
||
if (sz) {
|
||
data = new boost::uint8_t[sz];
|
||
istrm.read((char *)data, sz);
|
||
} else {
|
||
data = NULL;
|
||
}
|
||
// std::cerr << " CHUNKSZ: " << sz << " name: " << nm << std::endl;
|
||
}
|
||
|
||
void extractPopCounts(FPBReader_impl *dp_impl, boost::uint64_t sz,
|
||
const boost::uint8_t *chunk) {
|
||
PRECONDITION(dp_impl, "bad pointer");
|
||
/* this section of the FPB format is under-documented in Andrew's code,
|
||
* fortunately it looks pretty simple
|
||
*/
|
||
if (sz % 4)
|
||
throw ValueErrorException("POPC chunk size must be a multiple of 4 bytes");
|
||
unsigned int nEntries = sz / 4;
|
||
if (nEntries < 9)
|
||
throw ValueErrorException("POPC must contain at least 9 offsets");
|
||
|
||
dp_impl->popCountOffsets.reserve(nEntries);
|
||
for (unsigned int i = 0; i < nEntries; ++i) {
|
||
dp_impl->popCountOffsets.push_back(*(boost::uint32_t *)chunk);
|
||
chunk += 4;
|
||
}
|
||
};
|
||
|
||
void extractArenaDetails(FPBReader_impl *dp_impl, boost::uint64_t sz) {
|
||
PRECONDITION(dp_impl, "bad pointer");
|
||
PRECONDITION(dp_impl->df_lazy, "should only be used in lazy mode");
|
||
|
||
boost::uint32_t numBytesPerFingerprint;
|
||
streamRead(*dp_impl->istrm, numBytesPerFingerprint);
|
||
dp_impl->nBits = numBytesPerFingerprint * 8;
|
||
|
||
boost::uint32_t numBytesStoredPerFingerprint;
|
||
streamRead(*dp_impl->istrm, numBytesStoredPerFingerprint);
|
||
dp_impl->numBytesStoredPerFingerprint = numBytesStoredPerFingerprint;
|
||
boost::uint8_t spacer;
|
||
streamRead(*dp_impl->istrm, spacer);
|
||
dp_impl->len = (sz - 9 - spacer) / numBytesStoredPerFingerprint;
|
||
|
||
// streamRead(*dp_impl->istrm, spacer);
|
||
// now move forward the length of the spacer
|
||
if (spacer) dp_impl->istrm->seekg((std::streamoff)spacer, std::ios_base::cur);
|
||
dp_impl->fpDataOffset = dp_impl->istrm->tellg();
|
||
dp_impl->istrm->seekg(
|
||
static_cast<std::streamoff>(numBytesStoredPerFingerprint * dp_impl->len),
|
||
std::ios_base::cur);
|
||
}
|
||
void extractArena(FPBReader_impl *dp_impl, boost::uint64_t sz,
|
||
const boost::uint8_t *chunk) {
|
||
PRECONDITION(dp_impl, "bad pointer");
|
||
/* Documentation from Andrew's code on the structure of the arena:
|
||
The 'AREN'a starts with a header:
|
||
<num_bytes: 4 bytes> -- the number of bytes in a fingerprint
|
||
<storage_size: 4 bytes> -- number of bytes in fingerprint + extra bytes
|
||
<spacer_size: 1 byte> -- the number of spacer bytes used so the fingerprint
|
||
chunk starts on an aligned file position.
|
||
<spacer : $spacer_size> NUL bytes> -- up to 255 NUL bytes, used for alignment.
|
||
The fingerprints are N fingerprint fields, ordered sequentially.
|
||
<fp0: $storage_size bytes> -- the first fingerprint
|
||
<fp1: $storage_size bytes> -- the second fingerprint
|
||
...
|
||
The last fingerprint ends at the last byte of the arena chunk.
|
||
|
||
Each fingerprint contains:
|
||
<fingerprint: $num_bytes bytes> -- the actual fingerprint data
|
||
<extra: $storage_size-$num_bytes bytes> -- the 'extra' NULL padding bytes
|
||
used so storage_size is a multiple of the alignment.
|
||
|
||
To get the number of fingerprints in the arena:
|
||
(len(arena content) - 4 - 4 - 1 - $spacer_size) // $storage_size
|
||
*/
|
||
boost::uint32_t numBytesPerFingerprint = *((boost::uint32_t *)chunk);
|
||
dp_impl->nBits = numBytesPerFingerprint * 8;
|
||
|
||
chunk += sizeof(boost::uint32_t);
|
||
dp_impl->numBytesStoredPerFingerprint = *((boost::uint32_t *)chunk);
|
||
chunk += sizeof(boost::uint32_t);
|
||
boost::uint8_t spacer = *((boost::uint8_t *)chunk);
|
||
chunk += 1;
|
||
// now move forward the length of the spacer
|
||
chunk += spacer;
|
||
|
||
dp_impl->dp_fpData = chunk;
|
||
dp_impl->len = (sz - 9 - spacer) / dp_impl->numBytesStoredPerFingerprint;
|
||
};
|
||
|
||
// if dp_impl->df_lazy is true, we'll use the memory in fpData (should be large
|
||
// enough to hold the result!), otherwise
|
||
// we update it to a pointer to the memory dp_impl owns.
|
||
void extractBytes(const FPBReader_impl *dp_impl, unsigned int which,
|
||
boost::uint8_t *&fpData) {
|
||
PRECONDITION(dp_impl, "bad reader pointer");
|
||
PRECONDITION((dp_impl->df_lazy || dp_impl->dp_fpData), "bad fpdata pointer");
|
||
PRECONDITION(!dp_impl->df_lazy || dp_impl->istrm, "no stream in lazy mode");
|
||
PRECONDITION(!dp_impl->df_lazy || fpData, "no fpData in lazy mode");
|
||
|
||
if (which >= dp_impl->len) {
|
||
throw ValueErrorException("bad index");
|
||
}
|
||
boost::uint64_t offset = which * dp_impl->numBytesStoredPerFingerprint;
|
||
if (!dp_impl->df_lazy) {
|
||
fpData = const_cast<boost::uint8_t *>(dp_impl->dp_fpData) + offset;
|
||
} else {
|
||
dp_impl->istrm->seekg(dp_impl->fpDataOffset + (std::streampos)offset);
|
||
dp_impl->istrm->read((char *)fpData, dp_impl->numBytesStoredPerFingerprint);
|
||
}
|
||
};
|
||
|
||
// the caller is responsible for delete[]'ing this
|
||
boost::uint8_t *copyBytes(const FPBReader_impl *dp_impl, unsigned int which) {
|
||
PRECONDITION(dp_impl, "bad reader pointer");
|
||
boost::uint8_t *res;
|
||
res = new boost::uint8_t[dp_impl->numBytesStoredPerFingerprint];
|
||
if (!dp_impl->df_lazy) {
|
||
boost::uint8_t *fpData = NULL;
|
||
extractBytes(dp_impl, which, fpData);
|
||
memcpy((void *)res, fpData, dp_impl->numBytesStoredPerFingerprint);
|
||
} else {
|
||
extractBytes(dp_impl, which, res);
|
||
}
|
||
return res;
|
||
};
|
||
|
||
// caller is responsible for delete'ing the result
|
||
boost::dynamic_bitset<> *bytesToBitset(const boost::uint8_t *fpData,
|
||
boost::uint32_t nBits) {
|
||
unsigned int nBytes = nBits / 8;
|
||
if (!(nBytes % sizeof(boost::dynamic_bitset<>::block_type))) {
|
||
// I believe this could be faster (needs to be verified of course)
|
||
unsigned int nBlocks = nBytes / sizeof(boost::dynamic_bitset<>::block_type);
|
||
boost::dynamic_bitset<>::block_type *fpBlocks =
|
||
(boost::dynamic_bitset<>::block_type *)fpData;
|
||
return new boost::dynamic_bitset<>(fpBlocks, fpBlocks + nBlocks);
|
||
} else {
|
||
return (
|
||
boost::dynamic_bitset<> *)(new boost::dynamic_bitset<boost::uint8_t>(
|
||
fpData, fpData + nBytes));
|
||
}
|
||
}
|
||
|
||
// caller is responsible for delete []'ing the result
|
||
boost::uint8_t *bitsetToBytes(const boost::dynamic_bitset<> &bitset) {
|
||
unsigned int nBits = bitset.size();
|
||
unsigned int nBytes = nBits / 8;
|
||
|
||
boost::uint8_t *res = new boost::uint8_t[nBytes];
|
||
boost::to_block_range(bitset, (boost::dynamic_bitset<>::block_type *)res);
|
||
return res;
|
||
}
|
||
|
||
// the caller is responsible for delete'ing this
|
||
ExplicitBitVect *extractFP(const FPBReader_impl *dp_impl, unsigned int which) {
|
||
PRECONDITION(dp_impl, "bad reader pointer");
|
||
boost::uint8_t *fpData;
|
||
if (dp_impl->df_lazy) {
|
||
fpData = new boost::uint8_t[dp_impl->numBytesStoredPerFingerprint];
|
||
}
|
||
extractBytes(dp_impl, which, fpData);
|
||
boost::dynamic_bitset<> *resDBS = bytesToBitset(fpData, dp_impl->nBits);
|
||
if (dp_impl->df_lazy) delete[] fpData;
|
||
return new ExplicitBitVect(resDBS);
|
||
};
|
||
|
||
double tanimoto(const FPBReader_impl *dp_impl, unsigned int which,
|
||
const ::boost::uint8_t *bv) {
|
||
PRECONDITION(dp_impl, "bad reader pointer");
|
||
PRECONDITION(bv, "bad bv pointer");
|
||
if (which >= dp_impl->len) {
|
||
throw ValueErrorException("bad index");
|
||
}
|
||
boost::uint8_t *fpData;
|
||
if (dp_impl->df_lazy) {
|
||
fpData = new boost::uint8_t[dp_impl->numBytesStoredPerFingerprint];
|
||
}
|
||
extractBytes(dp_impl, which, fpData);
|
||
double res =
|
||
CalcBitmapTanimoto(fpData, bv, dp_impl->numBytesStoredPerFingerprint);
|
||
if (dp_impl->df_lazy) delete[] fpData;
|
||
return res;
|
||
};
|
||
|
||
void extractIds(FPBReader_impl *dp_impl, boost::uint64_t sz,
|
||
const boost::uint8_t *chunk) {
|
||
PRECONDITION(dp_impl, "bad pointer");
|
||
/* Documentation from Andrew's code on the structure of the arena:
|
||
|
||
The actual layout inside of the chunk is:
|
||
<num_4byte_elements: 4 bytes> -- the number of 4 byte offsets.
|
||
<num_8byte_elements: 4 bytes> -- the number of 8 byte offsets
|
||
Note: the number of indicies is num_4byte_elements + num_8byte_elements + 1
|
||
because even with no elements there will be the initial '\0\0\0\0'.
|
||
|
||
<id 0> + NUL -- the first string, with an added NUL terminator
|
||
<id 1> + NUL -- the second string, with an added NUL terminator
|
||
....
|
||
<id N> + NUL -- the last string, with an added NUL terminator
|
||
|
||
<offset 0: 4 bytes> -- the offset relative to the start of <text 0>.
|
||
(This always contains the 4 bytes "\0\0\0\0")
|
||
...
|
||
<offset num_4byte_elements: 4 bytes> -- the last offset stored in 4 bytes
|
||
|
||
(Note: This next section exists only when <num 8 byte offsets> > 0)
|
||
<offset num_4byte_elements+1: 8 bytes> -- the first offset stored in 8
|
||
bytes
|
||
...
|
||
<offset num_4byte_elements+num_8byte_elements: 8 bytes> -- the last offset
|
||
stored in 8 bytes
|
||
|
||
To get the identifier for record at position P >= 0:
|
||
chunk_size = size of the chunk
|
||
num_4byte_elements = decode bytes[0:4] as uint32
|
||
num_8byte_elements = decode bytes[4:8] as uint32
|
||
if P >= num_4byte_elements + num_8byte_elements:
|
||
record does not exist
|
||
offset_start = chunk_size - num_4byte_elements*4 - num_8byte_elements*8
|
||
if P < num_4byte_elements:
|
||
start, end = decode bytes[offset_start:offset_start+8] as (uint32, uint32)
|
||
elif P == N4:
|
||
start, end = decode bytes[offset_start:offset_start+12] as (uint32,
|
||
uint64)
|
||
else:
|
||
start, end = decode bytes[offset_start:offset_start+16] as (uint64,
|
||
uint64)
|
||
id = bytes[start:end-1]
|
||
*/
|
||
dp_impl->num4ByteElements = *((boost::uint32_t *)chunk);
|
||
chunk += sizeof(boost::uint32_t);
|
||
dp_impl->num8ByteElements = *((boost::uint32_t *)chunk);
|
||
chunk += sizeof(boost::uint32_t);
|
||
dp_impl->dp_idOffsets = dp_impl->dp_idChunk.get() + sz -
|
||
(dp_impl->num4ByteElements + 1) * 4 -
|
||
dp_impl->num8ByteElements * 8;
|
||
};
|
||
|
||
std::string extractId(const FPBReader_impl *dp_impl, unsigned int which) {
|
||
PRECONDITION(dp_impl, "bad reader pointer");
|
||
PRECONDITION(dp_impl->dp_idOffsets, "bad idOffsets pointer");
|
||
|
||
if (which >= dp_impl->num4ByteElements + dp_impl->num8ByteElements) {
|
||
throw ValueErrorException("bad index");
|
||
}
|
||
std::string res;
|
||
|
||
boost::uint64_t offset = 0, len = 0;
|
||
if (which < dp_impl->num4ByteElements) {
|
||
offset = *(boost::uint32_t *)(dp_impl->dp_idOffsets + which * 4);
|
||
len = *(boost::uint32_t *)(dp_impl->dp_idOffsets + (which + 1) * 4);
|
||
} else if (which == dp_impl->num4ByteElements) {
|
||
// FIX: this code path is not yet tested
|
||
offset = *(boost::uint32_t *)(dp_impl->dp_idOffsets + which * 4);
|
||
len = *(boost::uint64_t *)(dp_impl->dp_idOffsets + (which + 1) * 4);
|
||
} else {
|
||
// FIX: this code path is not yet tested
|
||
offset = *(boost::uint64_t *)(dp_impl->dp_idOffsets +
|
||
dp_impl->num4ByteElements * 4 + which * 8);
|
||
len = *(boost::uint64_t *)(dp_impl->dp_idOffsets +
|
||
dp_impl->num4ByteElements * 4 + (which + 1) * 8);
|
||
}
|
||
len -= offset;
|
||
res = std::string((const char *)(dp_impl->dp_idChunk.get() + offset), len);
|
||
return res;
|
||
};
|
||
|
||
void tanimotoNeighbors(const FPBReader_impl *dp_impl, const boost::uint8_t *bv,
|
||
double threshold,
|
||
std::vector<std::pair<double, unsigned int> > &res) {
|
||
PRECONDITION(dp_impl, "bad reader pointer");
|
||
PRECONDITION(bv, "bad bv");
|
||
RANGE_CHECK(-1e-6, threshold, 1.0 + 1e-6);
|
||
res.clear();
|
||
boost::uint64_t probeCount =
|
||
CalcBitmapPopcount(bv, dp_impl->numBytesStoredPerFingerprint);
|
||
|
||
boost::uint64_t startScan = 0, endScan = dp_impl->len;
|
||
if (dp_impl->popCountOffsets.size() == dp_impl->nBits + 2) {
|
||
// figure out the bounds based on equation 24 from:
|
||
// 1. Swamidass, S. J. & Baldi, P. Bounds and Algorithms for Fast Exact
|
||
// Searches of Chemical Fingerprints in Linear and Sublinear Time. J. Chem.
|
||
// Inf. Model. 47, 302–317 (2007).
|
||
// http://pubs.acs.org/doi/abs/10.1021/ci600358f
|
||
boost::uint32_t minDbCount = (boost::uint32_t)floor(threshold * probeCount);
|
||
boost::uint32_t maxDbCount =
|
||
(threshold > 1e-6) ? (boost::uint32_t)ceil(probeCount / threshold)
|
||
: dp_impl->numBytesStoredPerFingerprint;
|
||
// std::cerr << " bounds: " << minDbCount << "-" << maxDbCount << std::endl;
|
||
startScan = dp_impl->popCountOffsets[minDbCount];
|
||
endScan = dp_impl->popCountOffsets[maxDbCount + 1];
|
||
// std::cerr << " scan: " << startScan << "-" << endScan << std::endl;
|
||
}
|
||
boost::uint8_t *dbv;
|
||
if (dp_impl->df_lazy) {
|
||
dbv = new boost::uint8_t[dp_impl->numBytesStoredPerFingerprint];
|
||
}
|
||
for (boost::uint64_t i = startScan; i < endScan; ++i) {
|
||
extractBytes(dp_impl, i, dbv);
|
||
double tani =
|
||
CalcBitmapTanimoto(dbv, bv, dp_impl->numBytesStoredPerFingerprint);
|
||
// std::cerr << " i:" << i << " " << tani << " ? " << threshold <<
|
||
// std::endl;
|
||
if (tani >= threshold) {
|
||
res.push_back(std::make_pair(tani, i));
|
||
}
|
||
}
|
||
if (dp_impl->df_lazy) delete[] dbv;
|
||
}
|
||
|
||
void containingNeighbors(const FPBReader_impl *dp_impl,
|
||
const boost::uint8_t *bv,
|
||
std::vector<unsigned int> &res) {
|
||
PRECONDITION(dp_impl, "bad reader pointer");
|
||
PRECONDITION(bv, "bad bv");
|
||
res.clear();
|
||
boost::uint64_t probeCount =
|
||
CalcBitmapPopcount(bv, dp_impl->numBytesStoredPerFingerprint);
|
||
|
||
boost::uint64_t startScan = 0, endScan = dp_impl->len;
|
||
if (dp_impl->popCountOffsets.size() == dp_impl->nBits + 2) {
|
||
startScan = dp_impl->popCountOffsets[probeCount];
|
||
// std::cerr << " scan: " << startScan << "-" << endScan << std::endl;
|
||
}
|
||
boost::uint8_t *dbv;
|
||
if (dp_impl->df_lazy) {
|
||
dbv = new boost::uint8_t[dp_impl->numBytesStoredPerFingerprint];
|
||
}
|
||
for (boost::uint64_t i = startScan; i < endScan; ++i) {
|
||
extractBytes(dp_impl, i, dbv);
|
||
if (CalcBitmapAllProbeBitsMatch(bv, dbv,
|
||
dp_impl->numBytesStoredPerFingerprint)) {
|
||
res.push_back(i);
|
||
}
|
||
}
|
||
if (dp_impl->df_lazy) delete[] dbv;
|
||
}
|
||
|
||
} // end of detail namespace
|
||
|
||
void FPBReader::init() {
|
||
PRECONDITION(dp_istrm, "no stream");
|
||
dp_impl = new detail::FPBReader_impl;
|
||
dp_impl->istrm = dp_istrm;
|
||
dp_impl->df_lazy = df_lazyRead;
|
||
|
||
char magic[detail::magicSize];
|
||
dp_istrm->read(magic, detail::magicSize);
|
||
if (detail::FPB_MAGIC != std::string(magic, detail::magicSize)) {
|
||
throw BadFileException("Invalid FPB magic");
|
||
}
|
||
while (1) {
|
||
if (dp_istrm->eof()) throw BadFileException("EOF hit before FEND record");
|
||
std::string chunkNm;
|
||
boost::uint64_t chunkSz;
|
||
boost::uint8_t *chunk = NULL;
|
||
detail::readChunkDetails(*dp_istrm, chunkNm, chunkSz);
|
||
if (!df_lazyRead || chunkNm != "AREN") {
|
||
detail::readChunkData(*dp_istrm, chunkSz, chunk);
|
||
if (chunkNm == "FEND") {
|
||
break;
|
||
} else if (chunkNm == "POPC") {
|
||
detail::extractPopCounts(dp_impl, chunkSz, chunk);
|
||
} else if (chunkNm == "AREN") {
|
||
dp_impl->dp_arenaChunk.reset(chunk);
|
||
detail::extractArena(dp_impl, chunkSz, chunk);
|
||
chunk = NULL;
|
||
} else if (chunkNm == "FPID") {
|
||
dp_impl->dp_idChunk.reset(chunk);
|
||
detail::extractIds(dp_impl, chunkSz, chunk);
|
||
chunk = NULL;
|
||
}
|
||
delete[] chunk;
|
||
} else {
|
||
// we are reading the AREN chunk in lazy mode, just get our position in
|
||
// the file.
|
||
detail::extractArenaDetails(dp_impl, chunkSz);
|
||
}
|
||
}
|
||
if ((!df_lazyRead && !dp_impl->dp_arenaChunk) ||
|
||
(df_lazyRead && !dp_impl->fpDataOffset))
|
||
throw BadFileException("No AREN record found");
|
||
if (!dp_impl->dp_idChunk) throw BadFileException("No FPID record found");
|
||
|
||
df_init = true;
|
||
};
|
||
|
||
void FPBReader::destroy() {
|
||
if (dp_impl) {
|
||
dp_impl->dp_arenaChunk.reset();
|
||
dp_impl->dp_idChunk.reset();
|
||
|
||
dp_impl->dp_fpData = NULL;
|
||
dp_impl->dp_idOffsets = NULL;
|
||
}
|
||
delete dp_impl;
|
||
};
|
||
|
||
boost::shared_ptr<ExplicitBitVect> FPBReader::getFP(unsigned int idx) const {
|
||
PRECONDITION(df_init, "not initialized");
|
||
|
||
return boost::shared_ptr<ExplicitBitVect>(detail::extractFP(dp_impl, idx));
|
||
};
|
||
boost::shared_array<boost::uint8_t> FPBReader::getBytes(
|
||
unsigned int idx) const {
|
||
PRECONDITION(df_init, "not initialized");
|
||
|
||
return boost::shared_array<boost::uint8_t>(detail::copyBytes(dp_impl, idx));
|
||
};
|
||
|
||
std::string FPBReader::getId(unsigned int idx) const {
|
||
PRECONDITION(df_init, "not initialized");
|
||
|
||
std::string res = detail::extractId(dp_impl, idx);
|
||
return res;
|
||
};
|
||
unsigned int FPBReader::length() const {
|
||
PRECONDITION(df_init, "not initialized");
|
||
PRECONDITION(dp_impl, "no impl");
|
||
return dp_impl->len;
|
||
};
|
||
unsigned int FPBReader::nBits() const {
|
||
PRECONDITION(df_init, "not initialized");
|
||
PRECONDITION(dp_impl, "no impl");
|
||
return dp_impl->nBits;
|
||
};
|
||
std::pair<unsigned int, unsigned int> FPBReader::getFPIdsInCountRange(
|
||
unsigned int minCount, unsigned int maxCount) {
|
||
PRECONDITION(df_init, "not initialized");
|
||
PRECONDITION(dp_impl, "no impl");
|
||
URANGE_CHECK(maxCount, dp_impl->nBits + 1);
|
||
PRECONDITION(maxCount >= minCount, "max < min");
|
||
if (dp_impl->popCountOffsets.size() == dp_impl->nBits + 2) {
|
||
return std::make_pair(dp_impl->popCountOffsets[minCount],
|
||
dp_impl->popCountOffsets[maxCount + 1]);
|
||
} else {
|
||
// we don't have popcounts, so we have to work for it.
|
||
// FIX: complete this
|
||
return std::make_pair(0, 0);
|
||
}
|
||
};
|
||
double FPBReader::getTanimoto(unsigned int idx,
|
||
const boost::uint8_t *bv) const {
|
||
PRECONDITION(df_init, "not initialized");
|
||
return detail::tanimoto(dp_impl, idx, bv);
|
||
}
|
||
|
||
std::vector<std::pair<double, unsigned int> > FPBReader::getTanimotoNeighbors(
|
||
const boost::uint8_t *bv, double threshold) const {
|
||
PRECONDITION(df_init, "not initialized");
|
||
std::vector<std::pair<double, unsigned int> > res;
|
||
detail::tanimotoNeighbors(dp_impl, bv, threshold, res);
|
||
std::sort(res.begin(), res.end(),
|
||
Rankers::pairGreater<double, unsigned int>());
|
||
return res;
|
||
}
|
||
|
||
std::vector<unsigned int> FPBReader::getContainingNeighbors(
|
||
const boost::uint8_t *bv) const {
|
||
PRECONDITION(df_init, "not initialized");
|
||
std::vector<unsigned int> res;
|
||
detail::containingNeighbors(dp_impl, bv, res);
|
||
std::sort(res.begin(), res.end());
|
||
|
||
return res;
|
||
}
|
||
|
||
} // end of RDKit namespace
|