// // Copyright (c) 2016 Greg Landrum // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include #include #include #include #include #include "MultiFPBReader.h" #include namespace RDKit { namespace detail { boost::uint8_t *bitsetToBytes(const boost::dynamic_bitset<> &bitset); } namespace { struct tplSorter : public std::binary_function { bool operator()(const MultiFPBReader::ResultTuple &v1, const MultiFPBReader::ResultTuple &v2) const { if (v1.get<0>() == v2.get<0>()) { if (v1.get<2>() == v2.get<2>()) { return v1.get<1>() < v2.get<1>(); } else { return v1.get<2>() < v2.get<2>(); } } else { return v1.get<0>() > v2.get<0>(); } } }; struct pairSorter : public std::binary_function, std::pair, bool> { bool operator()(const std::pair &v1, const std::pair &v2) const { if (v1.first == v2.first) { return v1.second < v2.second; } else { return v1.first < v2.first; } } }; struct sim_args { const boost::uint8_t *bv; double ca, cb; double threshold; const std::vector &readers; std::vector > *res; bool initOnSearch; }; void tversky_helper(unsigned int threadId, unsigned int numThreads, const sim_args *args) { for (unsigned int i = threadId; i < args->readers.size(); i += numThreads) { if (args->initOnSearch) args->readers[i]->init(); std::vector > r_res = args->readers[i]->getTverskyNeighbors(args->bv, args->ca, args->cb, args->threshold); (*args->res)[i].clear(); (*args->res)[i].reserve(r_res.size()); for (std::vector >::const_iterator rit = r_res.begin(); rit != r_res.end(); ++rit) { (*args->res)[i].push_back( MultiFPBReader::ResultTuple(rit->first, rit->second, i)); } } } void tani_helper(unsigned int threadId, unsigned int numThreads, const sim_args *args) { for (unsigned int i = threadId; i < args->readers.size(); i += numThreads) { if (args->initOnSearch) args->readers[i]->init(); std::vector > r_res = args->readers[i]->getTanimotoNeighbors(args->bv, args->threshold); (*args->res)[i].clear(); (*args->res)[i].reserve(r_res.size()); for (std::vector >::const_iterator rit = r_res.begin(); rit != r_res.end(); ++rit) { (*args->res)[i].push_back( MultiFPBReader::ResultTuple(rit->first, rit->second, i)); } } } template void generic_nbr_helper(std::vector &res, T func, const sim_args &args, unsigned int numThreads) { res.clear(); res.resize(0); numThreads = getNumThreadsToUse(numThreads); #ifdef RDK_THREADSAFE_SSS boost::thread_group tg; #endif if (numThreads == 1) { func(0, 1, &args); } #ifdef RDK_THREADSAFE_SSS else { for (unsigned int tid = 0; tid < numThreads && tid < args.readers.size(); ++tid) { tg.add_thread(new boost::thread(func, tid, numThreads, &args)); } tg.join_all(); } #endif for (unsigned int i = 0; i < args.readers.size(); ++i) { res.reserve(res.size() + (*args.res).size()); res.insert(res.end(), (*args.res)[i].begin(), (*args.res)[i].end()); } std::sort(res.begin(), res.end(), tplSorter()); } void get_tani_nbrs(const std::vector &d_readers, const boost::uint8_t *bv, double threshold, std::vector &res, int numThreads, bool initOnSearch) { std::vector > accum( d_readers.size()); sim_args args = {bv, 0., 0., threshold, d_readers, &accum, initOnSearch}; generic_nbr_helper(res, tani_helper, args, numThreads); } void get_tversky_nbrs(const std::vector &d_readers, const boost::uint8_t *bv, double a, double b, double threshold, std::vector &res, int numThreads, bool initOnSearch) { std::vector > accum( d_readers.size()); sim_args args = {bv, a, b, threshold, d_readers, &accum, initOnSearch}; generic_nbr_helper(res, tversky_helper, args, numThreads); } void contain_helper(unsigned int threadId, unsigned int numThreads, const boost::uint8_t *bv, const std::vector *readers, std::vector > *accum, bool initOnSearch) { for (unsigned int i = threadId; i < readers->size(); i += numThreads) { if (initOnSearch) (*readers)[i]->init(); (*accum)[i] = (*readers)[i]->getContainingNeighbors(bv); } } void get_containing_nbrs( const std::vector &d_readers, const boost::uint8_t *bv, std::vector > &res, unsigned int numThreads, bool initOnSearch) { numThreads = getNumThreadsToUse(numThreads); #ifdef RDK_THREADSAFE_SSS boost::thread_group tg; #endif std::vector > accum(d_readers.size()); if (numThreads == 1) { contain_helper(0, 1, bv, &d_readers, &accum, initOnSearch); } #ifdef RDK_THREADSAFE_SSS else { for (unsigned int tid = 0; tid < numThreads && tid < d_readers.size(); ++tid) { tg.add_thread(new boost::thread(contain_helper, tid, numThreads, bv, &d_readers, &accum, initOnSearch)); } tg.join_all(); } #endif res.clear(); for (unsigned int i = 0; i < d_readers.size(); ++i) { std::vector &r_res = accum[i]; BOOST_FOREACH (unsigned int ri, r_res) { res.push_back(std::make_pair(ri, i)); } } std::sort(res.begin(), res.end(), pairSorter()); } } // end of anonymous namespace void MultiFPBReader::init() { unsigned int nBits = 0; BOOST_FOREACH (FPBReader *rdr, d_readers) { rdr->init(); if (!nBits) { nBits = rdr->nBits(); } else { if (rdr->nBits() != nBits) throw ValueErrorException("bit lengths of child readers don't match"); } } df_init = true; }; MultiFPBReader::MultiFPBReader(std::vector &readers, bool takeOwnership, bool initOnSearch) { df_init = false; df_takeOwnership = takeOwnership; df_initOnSearch = initOnSearch; BOOST_FOREACH (FPBReader *rdr, readers) { PRECONDITION(rdr != NULL, "bad reader"); } d_readers = readers; } FPBReader *MultiFPBReader::getReader(unsigned int which) { URANGE_CHECK(which, d_readers.size()); return d_readers[which]; } unsigned int MultiFPBReader::nBits() const { PRECONDITION(d_readers.size(), "no readers"); PRECONDITION(df_init, "not initialized"); return d_readers[0]->nBits(); } std::vector MultiFPBReader::getTanimotoNeighbors( const boost::uint8_t *bv, double threshold, int numThreads) const { PRECONDITION(df_init || df_initOnSearch, "not initialized"); std::vector res; get_tani_nbrs(d_readers, bv, threshold, res, numThreads, df_initOnSearch); return res; } std::vector MultiFPBReader::getTanimotoNeighbors( const ExplicitBitVect &ebv, double threshold, int numThreads) const { PRECONDITION(df_init || df_initOnSearch, "not initialized"); std::vector res; boost::uint8_t *bv = detail::bitsetToBytes(*(ebv.dp_bits)); get_tani_nbrs(d_readers, bv, threshold, res, numThreads, df_initOnSearch); delete[] bv; return res; } std::vector MultiFPBReader::getTverskyNeighbors( const boost::uint8_t *bv, double ca, double cb, double threshold, int numThreads) const { PRECONDITION(df_init || df_initOnSearch, "not initialized"); std::vector res; get_tversky_nbrs(d_readers, bv, ca, cb, threshold, res, numThreads, df_initOnSearch); return res; } std::vector MultiFPBReader::getTverskyNeighbors( const ExplicitBitVect &ebv, double ca, double cb, double threshold, int numThreads) const { PRECONDITION(df_init || df_initOnSearch, "not initialized"); std::vector res; boost::uint8_t *bv = detail::bitsetToBytes(*(ebv.dp_bits)); get_tversky_nbrs(d_readers, bv, ca, cb, threshold, res, numThreads, df_initOnSearch); delete[] bv; return res; } std::vector > MultiFPBReader::getContainingNeighbors(const boost::uint8_t *bv, int numThreads) const { PRECONDITION(df_init || df_initOnSearch, "not initialized"); std::vector > res; get_containing_nbrs(d_readers, bv, res, numThreads, df_initOnSearch); return res; } std::vector > MultiFPBReader::getContainingNeighbors(const ExplicitBitVect &ebv, int numThreads) const { PRECONDITION(df_init || df_initOnSearch, "not initialized"); std::vector > res; boost::uint8_t *bv = detail::bitsetToBytes(*(ebv.dp_bits)); get_containing_nbrs(d_readers, bv, res, numThreads, df_initOnSearch); delete[] bv; return res; } } // end of RDKit namespace