// // Copyright (c) 2016 Greg Landrum // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // // Implementation details here are taken from the file fpb_io.py from chemfp // (www.chemfp.org) // Many thanks to Andrew Dalke for creating such great software and for // helping explain the FPB implementation #include #include #include #include #include #include "FPBReader.h" #include #include namespace RDKit { namespace detail { const unsigned int magicSize = 8; const std::string FPB_MAGIC("FPB1\r\n\0\0", 8); const unsigned int tagNameSize = 4; struct FPBReader_impl { unsigned int len; unsigned int nBits; boost::uint32_t numBytesStoredPerFingerprint; std::vector popCountOffsets; const boost::uint8_t *dp_fpData; // do not free this boost::scoped_array dp_arenaChunk; boost::uint32_t num4ByteElements, num8ByteElements; // for finding ids const boost::uint8_t *dp_idOffsets; // do not free this boost::scoped_array dp_idChunk; bool df_lazy; // read the fp data lazily. In this case we use fpDataOffset // and seek instead of using dp_fpData std::streampos fpDataOffset; // file offset from tellg std::streampos idDataOffset; // file offset from tellg std::streampos idChunkOffset; // file offset from tellg std::istream *istrm; // we don't own this, it's just used for the lazy reader }; // the caller is responsible for calling delete[] on `data` void readChunkDetails(std::istream &istrm, std::string &nm, boost::uint64_t &sz) { streamRead(istrm, sz); char tag[tagNameSize + 1]; tag[tagNameSize] = 0; istrm.read(tag, tagNameSize); nm = tag; } void readChunkData(std::istream &istrm, boost::uint64_t &sz, boost::uint8_t *&data) { if (sz) { data = new boost::uint8_t[sz]; istrm.read(reinterpret_cast(data), sz); } else { data = nullptr; } // std::cerr << " CHUNKSZ: " << sz << " name: " << nm << std::endl; } void extractPopCounts(FPBReader_impl *dp_impl, boost::uint64_t sz, const boost::uint8_t *chunk) { PRECONDITION(dp_impl, "bad pointer"); /* this section of the FPB format is under-documented in Andrew's code, * fortunately it looks pretty simple */ if (sz % 4) throw ValueErrorException("POPC chunk size must be a multiple of 4 bytes"); unsigned int nEntries = sz / 4; if (nEntries < 9) throw ValueErrorException("POPC must contain at least 9 offsets"); dp_impl->popCountOffsets.reserve(nEntries); for (unsigned int i = 0; i < nEntries; ++i) { dp_impl->popCountOffsets.push_back( *reinterpret_cast(chunk)); chunk += 4; } }; //----------------------------------------------------- // Arena procesing /* Documentation from Andrew's code on the structure of the arena: The 'AREN'a starts with a header: -- the number of bytes in a fingerprint -- number of bytes in fingerprint + extra bytes -- the number of spacer bytes used so the fingerprint chunk starts on an aligned file position. NUL bytes> -- up to 255 NUL bytes, used for alignment. The fingerprints are N fingerprint fields, ordered sequentially. -- the first fingerprint -- the second fingerprint ... The last fingerprint ends at the last byte of the arena chunk. Each fingerprint contains: -- the actual fingerprint data -- the 'extra' NULL padding bytes used so storage_size is a multiple of the alignment. To get the number of fingerprints in the arena: (len(arena content) - 4 - 4 - 1 - $spacer_size) // $storage_size */ void extractArenaDetails(FPBReader_impl *dp_impl, boost::uint64_t sz) { PRECONDITION(dp_impl, "bad pointer"); PRECONDITION(dp_impl->df_lazy, "should only be used in lazy mode"); boost::uint32_t numBytesPerFingerprint; streamRead(*dp_impl->istrm, numBytesPerFingerprint); dp_impl->nBits = numBytesPerFingerprint * 8; boost::uint32_t numBytesStoredPerFingerprint; streamRead(*dp_impl->istrm, numBytesStoredPerFingerprint); dp_impl->numBytesStoredPerFingerprint = numBytesStoredPerFingerprint; boost::uint8_t spacer; streamRead(*dp_impl->istrm, spacer); dp_impl->len = (sz - 9 - spacer) / numBytesStoredPerFingerprint; // streamRead(*dp_impl->istrm, spacer); // now move forward the length of the spacer if (spacer) dp_impl->istrm->seekg(static_cast(spacer), std::ios_base::cur); dp_impl->fpDataOffset = dp_impl->istrm->tellg(); dp_impl->istrm->seekg( static_cast(numBytesStoredPerFingerprint * dp_impl->len), std::ios_base::cur); } void extractArena(FPBReader_impl *dp_impl, boost::uint64_t sz, const boost::uint8_t *chunk) { PRECONDITION(dp_impl, "bad pointer"); boost::uint32_t numBytesPerFingerprint = *reinterpret_cast(chunk); dp_impl->nBits = numBytesPerFingerprint * 8; chunk += sizeof(boost::uint32_t); dp_impl->numBytesStoredPerFingerprint = *reinterpret_cast(chunk); chunk += sizeof(boost::uint32_t); boost::uint8_t spacer = *reinterpret_cast(chunk); chunk += 1; // now move forward the length of the spacer chunk += spacer; dp_impl->dp_fpData = chunk; dp_impl->len = (sz - 9 - spacer) / dp_impl->numBytesStoredPerFingerprint; }; // if dp_impl->df_lazy is true, we'll use the memory in fpData (should be large // enough to hold the result!), otherwise // we update it to a pointer to the memory dp_impl owns. void extractBytes(const FPBReader_impl *dp_impl, unsigned int which, boost::uint8_t *&fpData, unsigned int nToRead = 1) { PRECONDITION(dp_impl, "bad reader pointer"); PRECONDITION((dp_impl->df_lazy || dp_impl->dp_fpData), "bad fpdata pointer"); PRECONDITION(!dp_impl->df_lazy || dp_impl->istrm, "no stream in lazy mode"); PRECONDITION(!dp_impl->df_lazy || fpData, "no fpData in lazy mode"); PRECONDITION(nToRead > 0, "bad nToRead"); if (which + nToRead > dp_impl->len) { throw ValueErrorException("bad index"); } boost::uint64_t offset = which * dp_impl->numBytesStoredPerFingerprint; if (!dp_impl->df_lazy) { fpData = const_cast(dp_impl->dp_fpData) + offset; } else { dp_impl->istrm->seekg(dp_impl->fpDataOffset + static_cast(offset)); dp_impl->istrm->read(reinterpret_cast(fpData), nToRead * dp_impl->numBytesStoredPerFingerprint); } }; // the caller is responsible for delete[]'ing this boost::uint8_t *copyBytes(const FPBReader_impl *dp_impl, unsigned int which) { PRECONDITION(dp_impl, "bad reader pointer"); boost::uint8_t *res; res = new boost::uint8_t[dp_impl->numBytesStoredPerFingerprint]; if (!dp_impl->df_lazy) { boost::uint8_t *fpData = nullptr; extractBytes(dp_impl, which, fpData); memcpy(static_cast(res), fpData, dp_impl->numBytesStoredPerFingerprint); } else { extractBytes(dp_impl, which, res); } return res; }; // caller is responsible for delete'ing the result boost::dynamic_bitset<> *bytesToBitset(const boost::uint8_t *fpData, boost::uint32_t nBits) { unsigned int nBytes = nBits / 8; if (!(nBytes % sizeof(boost::dynamic_bitset<>::block_type))) { // I believe this could be faster (needs to be verified of course) unsigned int nBlocks = nBytes / sizeof(boost::dynamic_bitset<>::block_type); const boost::dynamic_bitset<>::block_type *fpBlocks = reinterpret_cast::block_type *>(fpData); return new boost::dynamic_bitset<>(fpBlocks, fpBlocks + nBlocks); } else { return reinterpret_cast *>( new boost::dynamic_bitset(fpData, fpData + nBytes)); } } // caller is responsible for delete []'ing the result boost::uint8_t *bitsetToBytes(const boost::dynamic_bitset<> &bitset) { unsigned int nBits = bitset.size(); unsigned int nBytes = nBits / 8; auto *res = new boost::uint8_t[nBytes]; boost::to_block_range( bitset, reinterpret_cast::block_type *>(res)); return res; } // the caller is responsible for delete'ing this ExplicitBitVect *extractFP(const FPBReader_impl *dp_impl, unsigned int which) { PRECONDITION(dp_impl, "bad reader pointer"); boost::uint8_t *fpData; if (dp_impl->df_lazy) { fpData = new boost::uint8_t[dp_impl->numBytesStoredPerFingerprint]; } extractBytes(dp_impl, which, fpData); boost::dynamic_bitset<> *resDBS = bytesToBitset(fpData, dp_impl->nBits); if (dp_impl->df_lazy) delete[] fpData; return new ExplicitBitVect(resDBS); }; double tanimoto(const FPBReader_impl *dp_impl, unsigned int which, const ::boost::uint8_t *bv) { PRECONDITION(dp_impl, "bad reader pointer"); PRECONDITION(bv, "bad bv pointer"); if (which >= dp_impl->len) { throw ValueErrorException("bad index"); } boost::uint8_t *fpData; if (dp_impl->df_lazy) { fpData = new boost::uint8_t[dp_impl->numBytesStoredPerFingerprint]; } extractBytes(dp_impl, which, fpData); double res = CalcBitmapTanimoto(fpData, bv, dp_impl->numBytesStoredPerFingerprint); if (dp_impl->df_lazy) delete[] fpData; return res; }; double tversky(const FPBReader_impl *dp_impl, unsigned int which, const ::boost::uint8_t *bv, double ca, double cb) { PRECONDITION(dp_impl, "bad reader pointer"); PRECONDITION(bv, "bad bv pointer"); if (which >= dp_impl->len) { throw ValueErrorException("bad index"); } boost::uint8_t *fpData; if (dp_impl->df_lazy) { fpData = new boost::uint8_t[dp_impl->numBytesStoredPerFingerprint]; } extractBytes(dp_impl, which, fpData); double res = CalcBitmapTversky(fpData, bv, dp_impl->numBytesStoredPerFingerprint, ca, cb); if (dp_impl->df_lazy) delete[] fpData; return res; }; //----------------------------------------------------- // Id procesing /* Documentation from Andrew's code on the structure of the arena: The actual layout inside of the chunk is: -- the number of 4 byte offsets. -- the number of 8 byte offsets Note: the number of indicies is num_4byte_elements + num_8byte_elements + 1 because even with no elements there will be the initial '\0\0\0\0'. + NUL -- the first string, with an added NUL terminator + NUL -- the second string, with an added NUL terminator .... + NUL -- the last string, with an added NUL terminator -- the offset relative to the start of . (This always contains the 4 bytes "\0\0\0\0") ... -- the last offset stored in 4 bytes (Note: This next section exists only when > 0) -- the first offset stored in 8 bytes ... -- the last offset stored in 8 bytes To get the identifier for record at position P >= 0: chunk_size = size of the chunk num_4byte_elements = decode bytes[0:4] as uint32 num_8byte_elements = decode bytes[4:8] as uint32 if P >= num_4byte_elements + num_8byte_elements: record does not exist offset_start = chunk_size - num_4byte_elements*4 - num_8byte_elements*8 if P < num_4byte_elements: start, end = decode bytes[offset_start:offset_start+8] as (uint32, uint32) elif P == N4: start, end = decode bytes[offset_start:offset_start+12] as (uint32, uint64) else: start, end = decode bytes[offset_start:offset_start+16] as (uint64, uint64) id = bytes[start:end-1] */ void extractIdsDetails(FPBReader_impl *dp_impl, boost::uint64_t sz) { PRECONDITION(dp_impl, "bad pointer"); std::streampos start = dp_impl->istrm->tellg(); dp_impl->idChunkOffset = start; streamRead(*dp_impl->istrm, dp_impl->num4ByteElements); streamRead(*dp_impl->istrm, dp_impl->num8ByteElements); dp_impl->idDataOffset = static_cast(start) + sz - (dp_impl->num4ByteElements + 1) * 4 - dp_impl->num8ByteElements * 8; dp_impl->istrm->seekg(start + static_cast(sz), std::ios_base::beg); }; void extractIds(FPBReader_impl *dp_impl, boost::uint64_t sz, const boost::uint8_t *chunk) { PRECONDITION(dp_impl, "bad pointer"); dp_impl->num4ByteElements = *reinterpret_cast(chunk); chunk += sizeof(boost::uint32_t); dp_impl->num8ByteElements = *reinterpret_cast(chunk); chunk += sizeof(boost::uint32_t); dp_impl->dp_idOffsets = dp_impl->dp_idChunk.get() + sz - (dp_impl->num4ByteElements + 1) * 4 - dp_impl->num8ByteElements * 8; }; std::string extractId(const FPBReader_impl *dp_impl, unsigned int which) { PRECONDITION(dp_impl, "bad reader pointer"); PRECONDITION((dp_impl->df_lazy || dp_impl->dp_idOffsets), "bad idOffsets pointer"); PRECONDITION(!dp_impl->df_lazy || dp_impl->istrm, "no stream in lazy mode"); if (which >= dp_impl->num4ByteElements + dp_impl->num8ByteElements) { throw ValueErrorException("bad index"); } std::string res; boost::uint64_t offset = 0, len = 0; if (which < dp_impl->num4ByteElements) { if (!dp_impl->df_lazy) { offset = *reinterpret_cast( dp_impl->dp_idOffsets + which * 4); len = *reinterpret_cast(dp_impl->dp_idOffsets + (which + 1) * 4); } else { dp_impl->istrm->seekg(dp_impl->idDataOffset + static_cast(which * 4)); dp_impl->istrm->read(reinterpret_cast(&offset), 4); dp_impl->istrm->read(reinterpret_cast(&len), 4); } } else if (which == dp_impl->num4ByteElements) { // FIX: this code path is not yet tested if (!dp_impl->df_lazy) { offset = *reinterpret_cast( dp_impl->dp_idOffsets + which * 4); len = *reinterpret_cast(dp_impl->dp_idOffsets + (which + 1) * 4); } else { dp_impl->istrm->seekg(dp_impl->idDataOffset + static_cast(which * 4)); dp_impl->istrm->read(reinterpret_cast(&offset), 4); dp_impl->istrm->read(reinterpret_cast(&len), 8); } } else { // FIX: this code path is not yet tested if (!dp_impl->df_lazy) { offset = *reinterpret_cast( dp_impl->dp_idOffsets + dp_impl->num4ByteElements * 4 + which * 8); len = *reinterpret_cast( dp_impl->dp_idOffsets + dp_impl->num4ByteElements * 4 + (which + 1) * 8); } else { dp_impl->istrm->seekg(dp_impl->idDataOffset + static_cast( dp_impl->num4ByteElements * 4 + which * 8)); dp_impl->istrm->read(reinterpret_cast(&offset), 8); dp_impl->istrm->read(reinterpret_cast(&len), 8); } } len -= offset; if (!dp_impl->df_lazy) { res = std::string( reinterpret_cast(dp_impl->dp_idChunk.get() + offset), len); } else { boost::shared_array buff(new char[len + 1]); buff[len] = 0; dp_impl->istrm->seekg(dp_impl->idChunkOffset + static_cast(offset)); dp_impl->istrm->read(reinterpret_cast(buff.get()), len); res = std::string(reinterpret_cast(buff.get())); } return res; }; void tanimotoNeighbors(const FPBReader_impl *dp_impl, const boost::uint8_t *bv, double threshold, std::vector > &res, bool usePopcountScreen, unsigned int readCache = 1000) { PRECONDITION(dp_impl, "bad reader pointer"); PRECONDITION(bv, "bad bv"); RANGE_CHECK(-1e-6, threshold, 1.0 + 1e-6); PRECONDITION(readCache > 0, "bad cache size"); res.clear(); boost::uint64_t probeCount = CalcBitmapPopcount(bv, dp_impl->numBytesStoredPerFingerprint); boost::uint64_t startScan = 0, endScan = dp_impl->len; if (usePopcountScreen && dp_impl->popCountOffsets.size() == dp_impl->nBits + 2) { // figure out the bounds based on equation 24 from: // 1. Swamidass, S. J. & Baldi, P. Bounds and Algorithms for Fast Exact // Searches of Chemical Fingerprints in Linear and Sublinear Time. J. Chem. // Inf. Model. 47, 302–317 (2007). // http://pubs.acs.org/doi/abs/10.1021/ci600358f boost::uint32_t minDbCount = static_cast(floor(threshold * probeCount)); boost::uint32_t maxDbCount = (threshold > 1e-6) ? static_cast(ceil(probeCount / threshold)) : dp_impl->numBytesStoredPerFingerprint; // std::cerr << "probeCount: " << probeCount << " bounds: " << minDbCount // << "-" << maxDbCount << std::endl; startScan = dp_impl->popCountOffsets[minDbCount]; endScan = dp_impl->popCountOffsets[maxDbCount + 1]; // std::cerr << " scan: " << startScan << "-" << endScan << std::endl; } boost::uint8_t *dbv; if (dp_impl->df_lazy) { dbv = new boost::uint8_t[dp_impl->numBytesStoredPerFingerprint * readCache]; } for (boost::uint64_t i = startScan; i < endScan; i += readCache) { unsigned int toRead = readCache; if (i + toRead >= endScan) { toRead = endScan - i; } extractBytes(dp_impl, i, dbv, toRead); for (unsigned int j = 0; j < toRead; ++j) { double tani = CalcBitmapTanimoto(dbv + j * dp_impl->numBytesStoredPerFingerprint, bv, dp_impl->numBytesStoredPerFingerprint); if (tani >= threshold) { res.push_back(std::make_pair(tani, i + j)); } } } if (dp_impl->df_lazy) delete[] dbv; } void tverskyNeighbors(const FPBReader_impl *dp_impl, const boost::uint8_t *bv, double ca, double cb, double threshold, std::vector > &res, bool usePopcountScreen) { PRECONDITION(dp_impl, "bad reader pointer"); PRECONDITION(bv, "bad bv"); RANGE_CHECK(-1e-6, threshold, 1.0 + 1e-6); res.clear(); boost::uint64_t probeCount = CalcBitmapPopcount(bv, dp_impl->numBytesStoredPerFingerprint); boost::uint64_t startScan = 0, endScan = dp_impl->len; if (usePopcountScreen && dp_impl->popCountOffsets.size() == dp_impl->nBits + 2) { // figure out the bounds based on equation 25 from: // 1. Swamidass, S. J. & Baldi, P. Bounds and Algorithms for Fast Exact // Searches of Chemical Fingerprints in Linear and Sublinear Time. J. Chem. // Inf. Model. 47, 302–317 (2007). // http://pubs.acs.org/doi/abs/10.1021/ci600358f boost::uint32_t minDbCount = static_cast(floor( (threshold * probeCount * ca) / (1. - threshold + threshold * ca))); boost::uint32_t maxDbCount = ((threshold * cb) > 1e-6) ? static_cast( ceil(probeCount * (1 - threshold + threshold * cb) / (threshold * cb))) : dp_impl->numBytesStoredPerFingerprint; // std::cerr << "probeCount: " << probeCount << " bounds: " << minDbCount // << "-" << maxDbCount << std::endl; startScan = dp_impl->popCountOffsets[minDbCount]; endScan = dp_impl->popCountOffsets[maxDbCount + 1]; // std::cerr << " scan: " << startScan << "-" << endScan << std::endl; } boost::uint8_t *dbv; if (dp_impl->df_lazy) { dbv = new boost::uint8_t[dp_impl->numBytesStoredPerFingerprint]; } for (boost::uint64_t i = startScan; i < endScan; ++i) { extractBytes(dp_impl, i, dbv); double sim = CalcBitmapTversky( dbv, bv, dp_impl->numBytesStoredPerFingerprint, ca, cb); // std::cerr << " i:" << i << " " << tani << " ? " << threshold << // std::endl; if (sim >= threshold) { res.push_back(std::make_pair(sim, i)); } } if (dp_impl->df_lazy) delete[] dbv; } void containingNeighbors(const FPBReader_impl *dp_impl, const boost::uint8_t *bv, std::vector &res) { PRECONDITION(dp_impl, "bad reader pointer"); PRECONDITION(bv, "bad bv"); res.clear(); boost::uint64_t probeCount = CalcBitmapPopcount(bv, dp_impl->numBytesStoredPerFingerprint); boost::uint64_t startScan = 0, endScan = dp_impl->len; if (dp_impl->popCountOffsets.size() == dp_impl->nBits + 2) { startScan = dp_impl->popCountOffsets[probeCount]; // std::cerr << " scan: " << startScan << "-" << endScan << std::endl; } boost::uint8_t *dbv; if (dp_impl->df_lazy) { dbv = new boost::uint8_t[dp_impl->numBytesStoredPerFingerprint]; } for (boost::uint64_t i = startScan; i < endScan; ++i) { extractBytes(dp_impl, i, dbv); if (CalcBitmapAllProbeBitsMatch(bv, dbv, dp_impl->numBytesStoredPerFingerprint)) { res.push_back(i); } } if (dp_impl->df_lazy) delete[] dbv; } } // end of detail namespace void FPBReader::init() { PRECONDITION(dp_istrm, "no stream"); if (df_init) return; dp_impl = new detail::FPBReader_impl; dp_impl->istrm = dp_istrm; dp_impl->df_lazy = df_lazyRead; char magic[detail::magicSize]; dp_istrm->read(magic, detail::magicSize); if (detail::FPB_MAGIC != std::string(magic, detail::magicSize)) { throw BadFileException("Invalid FPB magic"); } while (1) { if (dp_istrm->eof()) throw BadFileException("EOF hit before FEND record"); std::string chunkNm; boost::uint64_t chunkSz; boost::uint8_t *chunk = nullptr; detail::readChunkDetails(*dp_istrm, chunkNm, chunkSz); // std::cerr << " Chunk: " << chunkNm << " " << chunkSz << std::endl; if (!df_lazyRead || (chunkNm != "AREN" && chunkNm != "FPID")) { detail::readChunkData(*dp_istrm, chunkSz, chunk); if (chunkNm == "FEND") { break; } else if (chunkNm == "POPC") { detail::extractPopCounts(dp_impl, chunkSz, chunk); } else if (chunkNm == "AREN") { dp_impl->dp_arenaChunk.reset(chunk); detail::extractArena(dp_impl, chunkSz, chunk); chunk = nullptr; } else if (chunkNm == "FPID") { dp_impl->dp_idChunk.reset(chunk); detail::extractIds(dp_impl, chunkSz, chunk); chunk = nullptr; } else if (chunkNm == "META") { // currently ignored } else if (chunkNm == "HASH") { // currently ignored } else { BOOST_LOG(rdWarningLog) << "Unknown chunk: " << chunkNm << " ignored." << std::endl; } delete[] chunk; } else { // we are reading the AREN or FPID chunk in lazy mode, just get our // position in // the file. if (chunkNm == "AREN") { detail::extractArenaDetails(dp_impl, chunkSz); } else if (chunkNm == "FPID") { detail::extractIdsDetails(dp_impl, chunkSz); } } } if ((!df_lazyRead && !dp_impl->dp_arenaChunk) || (df_lazyRead && !dp_impl->fpDataOffset)) throw BadFileException("No AREN record found"); if ((!df_lazyRead && !dp_impl->dp_idChunk) || (df_lazyRead && !dp_impl->idDataOffset)) throw BadFileException("No FPID record found"); df_init = true; }; void FPBReader::destroy() { if (dp_impl) { dp_impl->dp_arenaChunk.reset(); dp_impl->dp_idChunk.reset(); dp_impl->dp_fpData = nullptr; dp_impl->dp_idOffsets = nullptr; } delete dp_impl; dp_impl = nullptr; }; boost::shared_ptr FPBReader::getFP(unsigned int idx) const { PRECONDITION(df_init, "not initialized"); return boost::shared_ptr(detail::extractFP(dp_impl, idx)); }; boost::shared_array FPBReader::getBytes( unsigned int idx) const { PRECONDITION(df_init, "not initialized"); return boost::shared_array(detail::copyBytes(dp_impl, idx)); }; std::string FPBReader::getId(unsigned int idx) const { PRECONDITION(df_init, "not initialized"); std::string res = detail::extractId(dp_impl, idx); return res; }; unsigned int FPBReader::length() const { PRECONDITION(df_init, "not initialized"); PRECONDITION(dp_impl, "no impl"); return dp_impl->len; }; unsigned int FPBReader::nBits() const { PRECONDITION(df_init, "not initialized"); PRECONDITION(dp_impl, "no impl"); return dp_impl->nBits; }; std::pair FPBReader::getFPIdsInCountRange( unsigned int minCount, unsigned int maxCount) { PRECONDITION(df_init, "not initialized"); PRECONDITION(dp_impl, "no impl"); URANGE_CHECK(maxCount, dp_impl->nBits + 1); PRECONDITION(maxCount >= minCount, "max < min"); if (dp_impl->popCountOffsets.size() == dp_impl->nBits + 2) { return std::make_pair(dp_impl->popCountOffsets[minCount], dp_impl->popCountOffsets[maxCount + 1]); } else { // we don't have popcounts, so we have to work for it. // FIX: complete this return std::make_pair(0, 0); } }; double FPBReader::getTanimoto(unsigned int idx, const boost::uint8_t *bv) const { PRECONDITION(df_init, "not initialized"); return detail::tanimoto(dp_impl, idx, bv); } double FPBReader::getTanimoto(unsigned int idx, const ExplicitBitVect &ebv) const { const boost::uint8_t *bv = detail::bitsetToBytes(*(ebv.dp_bits)); double res = getTanimoto(idx, bv); delete[] bv; return res; } std::vector > FPBReader::getTanimotoNeighbors( const boost::uint8_t *bv, double threshold, bool usePopcountScreen) const { PRECONDITION(df_init, "not initialized"); std::vector > res; detail::tanimotoNeighbors(dp_impl, bv, threshold, res, usePopcountScreen); std::sort(res.begin(), res.end(), Rankers::pairGreater()); return res; } std::vector > FPBReader::getTanimotoNeighbors( const ExplicitBitVect &ebv, double threshold, bool usePopcountScreen) const { const boost::uint8_t *bv = detail::bitsetToBytes(*(ebv.dp_bits)); std::vector > res = getTanimotoNeighbors(bv, threshold, usePopcountScreen); delete[] bv; return res; } double FPBReader::getTversky(unsigned int idx, const boost::uint8_t *bv, double ca, double cb) const { PRECONDITION(df_init, "not initialized"); return detail::tversky(dp_impl, idx, bv, ca, cb); } double FPBReader::getTversky(unsigned int idx, const ExplicitBitVect &ebv, double ca, double cb) const { const boost::uint8_t *bv = detail::bitsetToBytes(*(ebv.dp_bits)); double res = getTversky(idx, bv, ca, cb); delete[] bv; return res; } std::vector > FPBReader::getTverskyNeighbors( const boost::uint8_t *bv, double ca, double cb, double threshold, bool usePopcountScreen) const { PRECONDITION(df_init, "not initialized"); std::vector > res; detail::tverskyNeighbors(dp_impl, bv, ca, cb, threshold, res, usePopcountScreen); std::sort(res.begin(), res.end(), Rankers::pairGreater()); return res; } std::vector > FPBReader::getTverskyNeighbors( const ExplicitBitVect &ebv, double ca, double cb, double threshold, bool usePopcountScreen) const { const boost::uint8_t *bv = detail::bitsetToBytes(*(ebv.dp_bits)); std::vector > res = getTverskyNeighbors(bv, ca, cb, threshold, usePopcountScreen); delete[] bv; return res; } std::vector FPBReader::getContainingNeighbors( const boost::uint8_t *bv) const { PRECONDITION(df_init, "not initialized"); std::vector res; detail::containingNeighbors(dp_impl, bv, res); std::sort(res.begin(), res.end()); return res; } std::vector FPBReader::getContainingNeighbors( const ExplicitBitVect &ebv) const { const boost::uint8_t *bv = detail::bitsetToBytes(*(ebv.dp_bits)); std::vector res = getContainingNeighbors(bv); delete[] bv; return res; } } // end of RDKit namespace