Files
rdkit/Code/GraphMol/Fingerprints/FingerprintGenerator.cpp
Greg Landrum 9a4cca3967 Allow using generators for similarity maps (#8912)
* add option to track atoms involved in each bit for morgan FP

Needs test still

* support similarity maps using fingerprint generators

* support RDKit, AP, and TT

still need tests

* add some testing

* response to review
2025-11-06 19:12:29 +01:00

807 lines
29 KiB
C++

//
// Copyright (C) 2018-2022 Boran Adas and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
#include <DataStructs/SparseIntVect.h>
#include <DataStructs/ExplicitBitVect.h>
#include <DataStructs/SparseBitVect.h>
#include <GraphMol/Fingerprints/FingerprintGenerator.h>
#include <RDGeneral/hash/hash.hpp>
#include <cstdint>
#include <GraphMol/Fingerprints/AtomPairGenerator.h>
#include <GraphMol/Fingerprints/MorganGenerator.h>
#include <GraphMol/Fingerprints/RDKitFPGenerator.h>
#include <GraphMol/Fingerprints/TopologicalTorsionGenerator.h>
#include <RDGeneral/RDThreads.h>
#ifdef RDK_BUILD_THREADSAFE_SSS
#include <thread>
#include <future>
#endif
namespace RDKit {
FingerprintArguments::FingerprintArguments(
const bool countSimulation, const std::vector<std::uint32_t> countBounds,
std::uint32_t fpSize, std::uint32_t numBitsPerFeature,
bool includeChirality)
: df_countSimulation(countSimulation),
df_includeChirality(includeChirality),
d_countBounds(countBounds),
d_fpSize(fpSize),
d_numBitsPerFeature(numBitsPerFeature) {
PRECONDITION(!countSimulation || !countBounds.empty(),
"bad count bounds provided");
PRECONDITION(d_numBitsPerFeature > 0, "numBitsPerFeature must be >0");
}
std::string FingerprintArguments::commonArgumentsString() const {
return "Common arguments : countSimulation=" +
std::to_string(df_countSimulation) +
" fpSize=" + std::to_string(d_fpSize) +
" bitsPerFeature=" + std::to_string(d_numBitsPerFeature) +
" includeChirality=" + std::to_string(df_includeChirality);
}
template <typename OutputType>
FingerprintGenerator<OutputType>::FingerprintGenerator(
AtomEnvironmentGenerator<OutputType> *atomEnvironmentGenerator,
FingerprintArguments *fingerprintArguments,
AtomInvariantsGenerator *atomInvariantsGenerator,
BondInvariantsGenerator *bondInvariantsGenerator, bool ownsAtomInvGenerator,
bool ownsBondInvGenerator)
: df_ownsAtomInvGenerator(ownsAtomInvGenerator),
df_ownsBondInvGenerator(ownsBondInvGenerator) {
this->dp_atomEnvironmentGenerator = atomEnvironmentGenerator;
this->dp_atomEnvironmentGenerator->dp_fingerprintArguments =
fingerprintArguments;
this->dp_fingerprintArguments = fingerprintArguments;
this->dp_atomInvariantsGenerator = atomInvariantsGenerator;
this->dp_bondInvariantsGenerator = bondInvariantsGenerator;
}
template FingerprintGenerator<std::uint32_t>::FingerprintGenerator(
AtomEnvironmentGenerator<std::uint32_t> *atomEnvironmentGenerator,
FingerprintArguments *fingerprintArguments,
AtomInvariantsGenerator *atomInvariantsGenerator,
BondInvariantsGenerator *bondInvariantsGenerator, bool ownsAtomInvGenerator,
bool ownsBondInvGenerator);
template FingerprintGenerator<std::uint64_t>::FingerprintGenerator(
AtomEnvironmentGenerator<std::uint64_t> *atomEnvironmentGenerator,
FingerprintArguments *fingerprintArguments,
AtomInvariantsGenerator *atomInvariantsGenerator,
BondInvariantsGenerator *bondInvariantsGenerator, bool ownsAtomInvGenerator,
bool ownsBondInvGenerator);
template <typename OutputType>
FingerprintGenerator<OutputType>::~FingerprintGenerator() {
delete dp_atomEnvironmentGenerator;
delete dp_fingerprintArguments;
if (df_ownsAtomInvGenerator) {
delete dp_atomInvariantsGenerator;
}
if (df_ownsBondInvGenerator) {
delete dp_bondInvariantsGenerator;
}
}
namespace {
void reinitAdditionalOutput(AdditionalOutput &ao, size_t numAtoms) {
if (ao.atomCounts) {
ao.atomCounts->resize(numAtoms);
std::fill(ao.atomCounts->begin(), ao.atomCounts->end(), 0);
}
if (ao.atomToBits) {
ao.atomToBits->resize(numAtoms);
std::fill(ao.atomToBits->begin(), ao.atomToBits->end(),
std::vector<std::uint64_t>());
}
if (ao.bitInfoMap) {
ao.bitInfoMap->clear();
}
if (ao.bitPaths) {
ao.bitPaths->clear();
}
}
} // namespace
template FingerprintGenerator<std::uint32_t>::~FingerprintGenerator();
template FingerprintGenerator<std::uint64_t>::~FingerprintGenerator();
template std::string FingerprintGenerator<std::uint32_t>::infoString() const;
template std::string FingerprintGenerator<std::uint64_t>::infoString() const;
template <typename OutputType>
std::string FingerprintGenerator<OutputType>::infoString() const {
std::string separator = " --- ";
return dp_fingerprintArguments->commonArgumentsString() + separator +
dp_fingerprintArguments->infoString() + separator +
dp_atomEnvironmentGenerator->infoString() + separator +
(dp_atomInvariantsGenerator
? (dp_atomInvariantsGenerator->infoString() + separator)
: ("No atom invariants generator" + separator)) +
(dp_bondInvariantsGenerator
? (dp_bondInvariantsGenerator->infoString())
: "No bond invariants generator");
}
template <typename OutputType>
std::unique_ptr<SparseIntVect<OutputType>>
FingerprintGenerator<OutputType>::getFingerprintHelper(
const ROMol &mol, FingerprintFuncArguments &args,
const std::uint64_t fpSize) const {
const ROMol *lmol = &mol;
std::unique_ptr<ROMol> tmol;
if (dp_fingerprintArguments->df_includeChirality &&
!mol.hasProp(common_properties::_StereochemDone)) {
tmol = std::unique_ptr<ROMol>(new ROMol(mol));
MolOps::assignStereochemistry(*tmol);
lmol = tmol.get();
}
if (args.additionalOutput) {
reinitAdditionalOutput(*args.additionalOutput, mol.getNumAtoms());
}
bool hashResults = false;
if (fpSize != 0) {
hashResults = true;
}
std::unique_ptr<std::vector<std::uint32_t>> atomInvariants = nullptr;
if (args.customAtomInvariants) {
atomInvariants.reset(
new std::vector<std::uint32_t>(*args.customAtomInvariants));
} else if (dp_atomInvariantsGenerator) {
atomInvariants.reset(dp_atomInvariantsGenerator->getAtomInvariants(mol));
}
std::unique_ptr<std::vector<std::uint32_t>> bondInvariants = nullptr;
if (args.customBondInvariants) {
bondInvariants.reset(
new std::vector<std::uint32_t>(*args.customBondInvariants));
} else if (dp_bondInvariantsGenerator) {
bondInvariants.reset(dp_bondInvariantsGenerator->getBondInvariants(mol));
}
// create all atom environments that will generate the bit-ids that will make
// up the fingerprint
auto atomEnvironments = dp_atomEnvironmentGenerator->getEnvironments(
*lmol, dp_fingerprintArguments, args.fromAtoms, args.ignoreAtoms,
args.confId, args.additionalOutput, atomInvariants.get(),
bondInvariants.get(), hashResults);
// allocate the result
auto res = std::make_unique<SparseIntVect<OutputType>>(
fpSize ? fpSize : dp_atomEnvironmentGenerator->getResultSize());
// define a mersenne twister with customized parameters.
// The standard parameters (used to create boost::mt19937)
// result in an RNG that's much too computationally intensive
// to seed.
// These are the parameters that have been used for the RDKit fingerprint.
typedef boost::random::mersenne_twister<std::uint32_t, 32, 4, 2, 31,
0x9908b0df, 11, 7, 0x9d2c5680, 15,
0xefc60000, 18, 3346425566U>
rng_type;
typedef boost::uniform_int<> distrib_type;
typedef boost::variate_generator<rng_type &, distrib_type> source_type;
std::unique_ptr<rng_type> generator;
//
// if we generate arbitrarily sized ints then mod them down to the
// appropriate size, we can guarantee that a fingerprint of
// size x has the same bits set as one of size 2x that's been folded
// in half. This is a nice guarantee to have.
//
std::unique_ptr<distrib_type> dist;
std::unique_ptr<source_type> randomSource;
if (dp_fingerprintArguments->d_numBitsPerFeature > 1) {
// we will only create the RNG if we're going to need it
generator.reset(new rng_type(42u));
dist.reset(new distrib_type(0, INT_MAX));
randomSource.reset(new source_type(*generator, *dist));
}
// iterate over every atom environment and generate bit-ids that will make up
// the fingerprint
for (const auto env : atomEnvironments) {
OutputType seed = env->getBitId(dp_fingerprintArguments,
atomInvariants.get(), bondInvariants.get(),
args.additionalOutput, hashResults, fpSize);
auto bitId = seed;
if (fpSize != 0) {
bitId %= fpSize;
}
res->setVal(bitId, res->getVal(bitId) + 1);
if (args.additionalOutput) {
env->updateAdditionalOutput(args.additionalOutput, bitId);
}
// do the additional bits if required:
if (dp_fingerprintArguments->d_numBitsPerFeature > 1) {
generator->seed(static_cast<rng_type::result_type>(seed));
for (boost::uint32_t bitN = 1;
bitN < dp_fingerprintArguments->d_numBitsPerFeature; ++bitN) {
bitId = (*randomSource)();
if (fpSize != 0) {
bitId %= fpSize;
}
res->setVal(bitId, res->getVal(bitId) + 1);
if (args.additionalOutput) {
env->updateAdditionalOutput(args.additionalOutput, bitId);
}
}
}
delete env;
}
return res;
}
namespace {
template <typename OutputType>
void duplicateAdditionalOutputBit(AdditionalOutput &oldAO,
AdditionalOutput &newAO, OutputType origBitId,
OutputType newBitId) {
PRECONDITION(!((oldAO.bitInfoMap != nullptr) ^ (newAO.bitInfoMap != nullptr)),
"bitInfoMap not allocated");
PRECONDITION(!((oldAO.atomToBits != nullptr) ^ (newAO.atomToBits != nullptr)),
"atomToBits not allocated");
PRECONDITION(!((oldAO.bitPaths != nullptr) ^ (newAO.bitPaths != nullptr)),
"bitPaths not allocated");
// we don't need to do anything with atomCounts
if (oldAO.atomToBits) {
if (newAO.atomToBits->empty()) {
newAO.atomToBits->resize(oldAO.atomToBits->size());
}
for (unsigned int i = 0; i < oldAO.atomToBits->size(); ++i) {
const auto &nv = oldAO.atomToBits->at(i);
if (std::find(nv.begin(), nv.end(), origBitId) != nv.end()) {
newAO.atomToBits->at(i).push_back(newBitId);
}
}
}
if (oldAO.bitInfoMap) {
const auto v = oldAO.bitInfoMap->find(origBitId);
if (v != oldAO.bitInfoMap->end()) {
(*newAO.bitInfoMap)[newBitId] = v->second;
}
}
if (oldAO.bitPaths) {
const auto v = oldAO.bitPaths->find(origBitId);
if (v != oldAO.bitPaths->end()) {
(*newAO.bitPaths)[newBitId] = v->second;
}
}
if (oldAO.atomsPerBit) {
const auto v = oldAO.atomsPerBit->find(origBitId);
if (v != oldAO.atomsPerBit->end()) {
(*newAO.atomsPerBit)[newBitId] = v->second;
}
}
}
void setupTempAdditionalOutput(RDKit::FingerprintFuncArguments &args,
AdditionalOutput &countSimulationOutput,
size_t numAtoms) {
if (args.additionalOutput->atomToBits) {
countSimulationOutput.allocateAtomToBits();
}
if (args.additionalOutput->atomCounts) {
countSimulationOutput.allocateAtomCounts();
}
if (args.additionalOutput->bitInfoMap) {
countSimulationOutput.allocateBitInfoMap();
}
if (args.additionalOutput->bitPaths) {
countSimulationOutput.allocateBitPaths();
}
if (args.additionalOutput->atomsPerBit) {
countSimulationOutput.allocateAtomsPerBit();
}
reinitAdditionalOutput(*args.additionalOutput, numAtoms);
}
} // namespace
template <typename OutputType>
std::unique_ptr<SparseIntVect<OutputType>>
FingerprintGenerator<OutputType>::getSparseCountFingerprint(
const ROMol &mol, FingerprintFuncArguments &args) const {
return getFingerprintHelper(mol, args);
}
// todo getSparseFingerprint does not completely produce the same output as
// getSparseCountFingerprint. Count simulation and potential 64 bit outputs
// makes size limiting necessary for getSparseFingerprint. This can be
// changed if there is another way to avoid the size limitation of SparseBitVect
template <typename OutputType>
std::unique_ptr<SparseBitVect>
FingerprintGenerator<OutputType>::getSparseFingerprint(
const ROMol &mol, FingerprintFuncArguments &args) const {
// make sure the result will fit into SparseBitVect
std::uint32_t resultSize =
std::min((std::uint64_t)std::numeric_limits<std::uint32_t>::max(),
(std::uint64_t)dp_atomEnvironmentGenerator->getResultSize());
std::uint32_t effectiveSize = resultSize;
if (dp_fingerprintArguments->df_countSimulation) {
// effective size needs to be smaller than result size to compansate for
// count simulation
effectiveSize /= dp_fingerprintArguments->d_countBounds.size();
}
AdditionalOutput countSimulationOutput;
AdditionalOutput *origAO = nullptr;
if (dp_fingerprintArguments->df_countSimulation && args.additionalOutput) {
setupTempAdditionalOutput(args, countSimulationOutput, mol.getNumAtoms());
origAO = args.additionalOutput;
args.additionalOutput = &countSimulationOutput;
}
auto tempResult = getFingerprintHelper(mol, args, effectiveSize);
auto result = std::make_unique<SparseBitVect>(resultSize);
for (auto val : tempResult->getNonzeroElements()) {
if (dp_fingerprintArguments->df_countSimulation) {
for (unsigned int i = 0;
i < dp_fingerprintArguments->d_countBounds.size(); ++i) {
// for every bound in the d_countBounds in dp_fingerprintArguments, set
// a bit if the occurrence count is equal or higher than the bound for
// that bit
const auto &bounds_count = dp_fingerprintArguments->d_countBounds;
if (val.second >= static_cast<int>(bounds_count[i])) {
OutputType nBitId = val.first * bounds_count.size() + i;
result->setBit(nBitId);
if (args.additionalOutput) {
duplicateAdditionalOutputBit(*args.additionalOutput, *origAO,
static_cast<OutputType>(val.first),
nBitId);
}
}
}
} else {
result->setBit(val.first);
}
}
if (origAO) {
if (origAO->atomCounts) {
*origAO->atomCounts = *countSimulationOutput.atomCounts;
}
args.additionalOutput = origAO;
}
return result;
}
template <typename OutputType>
std::unique_ptr<SparseIntVect<std::uint32_t>>
FingerprintGenerator<OutputType>::getCountFingerprint(
const ROMol &mol, FingerprintFuncArguments &args) const {
auto tempResult =
getFingerprintHelper(mol, args, dp_fingerprintArguments->d_fpSize);
auto result = std::make_unique<SparseIntVect<std::uint32_t>>(
dp_fingerprintArguments->d_fpSize);
for (auto val : tempResult->getNonzeroElements()) {
result->setVal(val.first, val.second);
}
return result;
}
template <typename OutputType>
std::unique_ptr<ExplicitBitVect>
FingerprintGenerator<OutputType>::getFingerprint(
const ROMol &mol, FingerprintFuncArguments &args) const {
std::uint32_t effectiveSize = dp_fingerprintArguments->d_fpSize;
if (dp_fingerprintArguments->df_countSimulation) {
if (dp_fingerprintArguments->d_countBounds.empty()) {
throw ValueErrorException("Count bounds are empty");
}
if (dp_fingerprintArguments->d_countBounds.size() >= effectiveSize) {
throw ValueErrorException("Count bounds size is >= fingerprint size");
}
// effective size needs to be smaller than result size to compensate for
// count simulation
effectiveSize /= dp_fingerprintArguments->d_countBounds.size();
}
AdditionalOutput countSimulationOutput;
AdditionalOutput *origAO = nullptr;
if (dp_fingerprintArguments->df_countSimulation && args.additionalOutput) {
setupTempAdditionalOutput(args, countSimulationOutput, mol.getNumAtoms());
origAO = args.additionalOutput;
args.additionalOutput = &countSimulationOutput;
}
auto tempResult = getFingerprintHelper(mol, args, effectiveSize);
auto result =
std::make_unique<ExplicitBitVect>(dp_fingerprintArguments->d_fpSize);
for (auto val : tempResult->getNonzeroElements()) {
if (dp_fingerprintArguments->df_countSimulation) {
for (unsigned int i = 0;
i < dp_fingerprintArguments->d_countBounds.size(); ++i) {
// for every bound in the d_countBounds in dp_fingerprintArguments,
// set a bit if the occurrence count is equal or higher than the bound
// for that bit
const auto &bounds_count = dp_fingerprintArguments->d_countBounds;
if (val.second >= static_cast<int>(bounds_count[i])) {
OutputType nBitId = val.first * bounds_count.size() + i;
result->setBit(nBitId);
if (args.additionalOutput) {
duplicateAdditionalOutputBit(*args.additionalOutput, *origAO,
static_cast<OutputType>(val.first),
nBitId);
}
}
}
} else {
result->setBit(val.first);
}
}
if (origAO) {
if (origAO->atomCounts) {
*origAO->atomCounts = *countSimulationOutput.atomCounts;
}
args.additionalOutput = origAO;
}
return result;
}
namespace {
template <typename ReturnType, typename FuncType>
std::vector<std::unique_ptr<ReturnType>> mtgetFingerprints(
FuncType func, const std::vector<const ROMol *> &mols, int numThreads) {
std::vector<std::uint32_t> *fromAtoms = nullptr;
std::vector<std::uint32_t> *ignoreAtoms = nullptr;
std::vector<std::uint32_t> *customAtomInvariants = nullptr;
std::vector<std::uint32_t> *customBondInvariants = nullptr;
int confId = -1;
AdditionalOutput *additionalOutput = nullptr;
FingerprintFuncArguments args(fromAtoms, ignoreAtoms, confId,
additionalOutput, customAtomInvariants,
customBondInvariants);
std::vector<std::unique_ptr<ReturnType>> result;
auto numThreadsToUse = getNumThreadsToUse(numThreads);
unsigned int nmols = mols.size();
result.reserve(nmols);
if (numThreadsToUse == 1) {
for (auto i = 0u; i < nmols; ++i) {
if (!mols[i]) {
result.emplace_back(std::unique_ptr<ReturnType>());
} else {
result.emplace_back(std::move(func(*mols[i], args)));
}
}
}
#ifdef RDK_BUILD_THREADSAFE_SSS
else {
std::vector<std::vector<std::unique_ptr<ReturnType>>> accum(
numThreadsToUse);
std::vector<std::thread> tg;
for (auto ti = 0u; ti < numThreadsToUse; ++ti) {
auto lfunc = [&](unsigned int tidx) {
for (auto midx = tidx; midx < mols.size(); midx += numThreadsToUse) {
if (!mols[midx]) {
accum[tidx].emplace_back(std::unique_ptr<ReturnType>());
} else {
accum[tidx].emplace_back(std::move(func(*mols[midx], args)));
}
}
};
tg.emplace_back(std::thread(lfunc, ti));
}
for (auto &thread : tg) {
if (thread.joinable()) {
thread.join();
}
}
for (auto midx = 0u; midx < mols.size(); ++midx) {
auto tidx = midx % numThreadsToUse;
auto jidx = midx / numThreadsToUse;
result.emplace_back(std::move(accum[tidx][jidx]));
}
}
#endif
return result;
}
} // namespace
template <typename OutputType>
std::vector<std::unique_ptr<ExplicitBitVect>>
FingerprintGenerator<OutputType>::getFingerprints(
const std::vector<const ROMol *> &mols, int numThreads) const {
auto fpfunc = [&](const ROMol &mol, FingerprintFuncArguments &args) {
return this->getFingerprint(mol, args);
};
return mtgetFingerprints<ExplicitBitVect, decltype(fpfunc)>(fpfunc, mols,
numThreads);
}
template <typename OutputType>
std::vector<std::unique_ptr<SparseBitVect>>
FingerprintGenerator<OutputType>::getSparseFingerprints(
const std::vector<const ROMol *> &mols, int numThreads) const {
auto fpfunc = [&](const ROMol &mol, FingerprintFuncArguments &args) {
return this->getSparseFingerprint(mol, args);
};
return mtgetFingerprints<SparseBitVect, decltype(fpfunc)>(fpfunc, mols,
numThreads);
}
template <typename OutputType>
std::vector<std::unique_ptr<SparseIntVect<std::uint32_t>>>
FingerprintGenerator<OutputType>::getCountFingerprints(
const std::vector<const ROMol *> &mols, int numThreads) const {
auto fpfunc = [&](const ROMol &mol, FingerprintFuncArguments &args) {
return this->getCountFingerprint(mol, args);
};
return mtgetFingerprints<SparseIntVect<std::uint32_t>, decltype(fpfunc)>(
fpfunc, mols, numThreads);
}
template <typename OutputType>
std::vector<std::unique_ptr<SparseIntVect<OutputType>>>
FingerprintGenerator<OutputType>::getSparseCountFingerprints(
const std::vector<const ROMol *> &mols, int numThreads) const {
auto fpfunc = [&](const ROMol &mol, FingerprintFuncArguments &args) {
return this->getSparseCountFingerprint(mol, args);
};
return mtgetFingerprints<SparseIntVect<OutputType>, decltype(fpfunc)>(
fpfunc, mols, numThreads);
}
template RDKIT_FINGERPRINTS_EXPORT std::unique_ptr<SparseIntVect<std::uint32_t>>
FingerprintGenerator<std::uint32_t>::getSparseCountFingerprint(
const ROMol &mol, FingerprintFuncArguments &args) const;
template RDKIT_FINGERPRINTS_EXPORT std::unique_ptr<SparseIntVect<std::uint64_t>>
FingerprintGenerator<std::uint64_t>::getSparseCountFingerprint(
const ROMol &mol, FingerprintFuncArguments &args) const;
template RDKIT_FINGERPRINTS_EXPORT std::unique_ptr<SparseBitVect>
FingerprintGenerator<std::uint32_t>::getSparseFingerprint(
const ROMol &mol, FingerprintFuncArguments &args) const;
template RDKIT_FINGERPRINTS_EXPORT std::unique_ptr<SparseBitVect>
FingerprintGenerator<std::uint64_t>::getSparseFingerprint(
const ROMol &mol, FingerprintFuncArguments &args) const;
template RDKIT_FINGERPRINTS_EXPORT std::unique_ptr<SparseIntVect<std::uint32_t>>
FingerprintGenerator<std::uint32_t>::getCountFingerprint(
const ROMol &mol, FingerprintFuncArguments &args) const;
template RDKIT_FINGERPRINTS_EXPORT std::unique_ptr<SparseIntVect<std::uint32_t>>
FingerprintGenerator<std::uint64_t>::getCountFingerprint(
const ROMol &mol, FingerprintFuncArguments &args) const;
template RDKIT_FINGERPRINTS_EXPORT std::unique_ptr<ExplicitBitVect>
FingerprintGenerator<std::uint32_t>::getFingerprint(
const ROMol &mol, FingerprintFuncArguments &args) const;
template RDKIT_FINGERPRINTS_EXPORT std::unique_ptr<ExplicitBitVect>
FingerprintGenerator<std::uint64_t>::getFingerprint(
const ROMol &mol, FingerprintFuncArguments &args) const;
template RDKIT_FINGERPRINTS_EXPORT std::vector<std::unique_ptr<ExplicitBitVect>>
FingerprintGenerator<std::uint32_t>::getFingerprints(
const std::vector<const ROMol *> &mols, int numThreads) const;
template RDKIT_FINGERPRINTS_EXPORT std::vector<std::unique_ptr<ExplicitBitVect>>
FingerprintGenerator<std::uint64_t>::getFingerprints(
const std::vector<const ROMol *> &mols, int numThreads) const;
template RDKIT_FINGERPRINTS_EXPORT std::vector<std::unique_ptr<SparseBitVect>>
FingerprintGenerator<std::uint32_t>::getSparseFingerprints(
const std::vector<const ROMol *> &mols, int numThreads) const;
template RDKIT_FINGERPRINTS_EXPORT std::vector<std::unique_ptr<SparseBitVect>>
FingerprintGenerator<std::uint64_t>::getSparseFingerprints(
const std::vector<const ROMol *> &mols, int numThreads) const;
template RDKIT_FINGERPRINTS_EXPORT
std::vector<std::unique_ptr<SparseIntVect<std::uint32_t>>>
FingerprintGenerator<std::uint32_t>::getCountFingerprints(
const std::vector<const ROMol *> &mols, int numThreads) const;
template RDKIT_FINGERPRINTS_EXPORT
std::vector<std::unique_ptr<SparseIntVect<std::uint32_t>>>
FingerprintGenerator<std::uint64_t>::getCountFingerprints(
const std::vector<const ROMol *> &mols, int numThreads) const;
template RDKIT_FINGERPRINTS_EXPORT
std::vector<std::unique_ptr<SparseIntVect<std::uint32_t>>>
FingerprintGenerator<std::uint32_t>::getSparseCountFingerprints(
const std::vector<const ROMol *> &mols, int numThreads) const;
template RDKIT_FINGERPRINTS_EXPORT
std::vector<std::unique_ptr<SparseIntVect<std::uint64_t>>>
FingerprintGenerator<std::uint64_t>::getSparseCountFingerprints(
const std::vector<const ROMol *> &mols, int numThreads) const;
SparseIntVect<std::uint64_t> *getSparseCountFP(const ROMol &mol,
FPType fPType) {
std::vector<const ROMol *> tempVect(1, &mol);
return (*getSparseCountFPBulk(tempVect, fPType))[0];
}
SparseBitVect *getSparseFP(const ROMol &mol, FPType fPType) {
std::vector<const ROMol *> tempVect(1, &mol);
return (*getSparseFPBulk(tempVect, fPType))[0];
}
SparseIntVect<std::uint32_t> *getCountFP(const ROMol &mol, FPType fPType) {
std::vector<const ROMol *> tempVect(1, &mol);
return (*getCountFPBulk(tempVect, fPType))[0];
}
ExplicitBitVect *getFP(const ROMol &mol, FPType fPType) {
std::vector<const ROMol *> tempVect(1, &mol);
return (*getFPBulk(tempVect, fPType))[0];
}
std::vector<SparseIntVect<std::uint64_t> *> *getSparseCountFPBulk(
const std::vector<const ROMol *> molVector, FPType fPType) {
FingerprintGenerator<std::uint64_t> *generator = nullptr;
switch (fPType) {
case FPType::AtomPairFP: {
generator = AtomPair::getAtomPairGenerator<std::uint64_t>();
break;
}
case FPType::MorganFP: {
generator = MorganFingerprint::getMorganGenerator<std::uint64_t>(2);
break;
}
case FPType::RDKitFP: {
generator = RDKitFP::getRDKitFPGenerator<std::uint64_t>();
break;
}
case FPType::TopologicalTorsionFP: {
generator =
TopologicalTorsion::getTopologicalTorsionGenerator<std::uint64_t>();
break;
}
default: {
throw UnimplementedFPException(
"Fingerprint type not implemented for getSparseCountFP");
}
}
auto *res = new std::vector<SparseIntVect<std::uint64_t> *>();
for (const auto *mol : molVector) {
res->push_back(generator->getSparseCountFingerprint(*mol));
}
delete generator;
return res;
}
std::vector<SparseBitVect *> *getSparseFPBulk(
const std::vector<const ROMol *> molVector, FPType fPType) {
FingerprintGenerator<std::uint64_t> *generator = nullptr;
switch (fPType) {
case FPType::AtomPairFP: {
generator = AtomPair::getAtomPairGenerator<std::uint64_t>();
break;
}
case FPType::MorganFP: {
generator = MorganFingerprint::getMorganGenerator<std::uint64_t>(2);
break;
}
case FPType::RDKitFP: {
generator = RDKitFP::getRDKitFPGenerator<std::uint64_t>();
break;
}
case FPType::TopologicalTorsionFP: {
generator =
TopologicalTorsion::getTopologicalTorsionGenerator<std::uint64_t>();
break;
}
default: {
throw UnimplementedFPException(
"Fingerprint type not implemented for getSparseFP");
}
}
auto *res = new std::vector<SparseBitVect *>();
for (const auto *mol : molVector) {
res->push_back(generator->getSparseFingerprint(*mol));
}
delete generator;
return res;
}
std::vector<SparseIntVect<std::uint32_t> *> *getCountFPBulk(
const std::vector<const ROMol *> molVector, FPType fPType) {
FingerprintGenerator<std::uint64_t> *generator = nullptr;
switch (fPType) {
case FPType::AtomPairFP: {
generator = AtomPair::getAtomPairGenerator<std::uint64_t>();
break;
}
case FPType::MorganFP: {
generator = MorganFingerprint::getMorganGenerator<std::uint64_t>(2);
break;
}
case FPType::RDKitFP: {
generator = RDKitFP::getRDKitFPGenerator<std::uint64_t>();
break;
}
case FPType::TopologicalTorsionFP: {
generator =
TopologicalTorsion::getTopologicalTorsionGenerator<std::uint64_t>();
break;
}
default: {
throw UnimplementedFPException(
"Fingerprint type not implemented for getCountFP");
}
}
auto *res = new std::vector<SparseIntVect<std::uint32_t> *>();
for (const auto *mol : molVector) {
res->push_back(generator->getCountFingerprint(*mol));
}
delete generator;
return res;
}
std::vector<ExplicitBitVect *> *getFPBulk(
const std::vector<const ROMol *> molVector, FPType fPType) {
FingerprintGenerator<std::uint64_t> *generator = nullptr;
switch (fPType) {
case FPType::AtomPairFP: {
generator = AtomPair::getAtomPairGenerator<std::uint64_t>();
break;
}
case FPType::MorganFP: {
generator = MorganFingerprint::getMorganGenerator<std::uint64_t>(2);
break;
}
case FPType::RDKitFP: {
generator = RDKitFP::getRDKitFPGenerator<std::uint64_t>();
break;
}
case FPType::TopologicalTorsionFP: {
generator =
TopologicalTorsion::getTopologicalTorsionGenerator<std::uint64_t>();
break;
}
default: {
throw UnimplementedFPException(
"Fingerprint type not implemented for getFP");
}
}
auto *res = new std::vector<ExplicitBitVect *>();
for (const auto *mol : molVector) {
res->push_back(generator->getFingerprint(*mol));
}
delete generator;
return res;
}
} // namespace RDKit