// // Copyright (C) 2018 Boran Adas, Google Summer of Code // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include #include #include #include #include #include #include #include #include #include namespace RDKit { template FingerprintArguments::FingerprintArguments( const bool countSimulation, const std::vector countBounds, std::uint32_t fpSize) : d_countSimulation(countSimulation), d_countBounds(countBounds), d_fpSize(fpSize) { PRECONDITION(!countSimulation || !countBounds.empty(), "bad count bounds provided"); } template FingerprintArguments::FingerprintArguments( const bool countSimulation, const std::vector countBounds, std::uint32_t fpSize); template FingerprintArguments::FingerprintArguments( const bool countSimulation, const std::vector countBounds, std::uint32_t fpSize); template std::string FingerprintArguments::commonArgumentsString() const { return "Common arguments : countSimulation=" + std::to_string(d_countSimulation) + " fpSize=" + std::to_string(d_fpSize); } template FingerprintGenerator::FingerprintGenerator( AtomEnvironmentGenerator *atomEnvironmentGenerator, FingerprintArguments *fingerprintArguments, AtomInvariantsGenerator *atomInvariantsGenerator, BondInvariantsGenerator *bondInvariantsGenerator, bool ownsAtomInvGenerator, bool ownsBondInvGenerator) : df_ownsAtomInvGenerator(ownsAtomInvGenerator), df_ownsBondInvGenerator(ownsBondInvGenerator) { this->dp_atomEnvironmentGenerator = atomEnvironmentGenerator; this->dp_fingerprintArguments = fingerprintArguments; this->dp_atomInvariantsGenerator = atomInvariantsGenerator; this->dp_bondInvariantsGenerator = bondInvariantsGenerator; } template FingerprintGenerator::FingerprintGenerator( AtomEnvironmentGenerator *atomEnvironmentGenerator, FingerprintArguments *fingerprintArguments, AtomInvariantsGenerator *atomInvariantsGenerator, BondInvariantsGenerator *bondInvariantsGenerator, bool ownsAtomInvGenerator, bool ownsBondInvGenerator); template FingerprintGenerator::FingerprintGenerator( AtomEnvironmentGenerator *atomEnvironmentGenerator, FingerprintArguments *fingerprintArguments, AtomInvariantsGenerator *atomInvariantsGenerator, BondInvariantsGenerator *bondInvariantsGenerator, bool ownsAtomInvGenerator, bool ownsBondInvGenerator); template FingerprintGenerator::~FingerprintGenerator() { delete dp_atomEnvironmentGenerator; delete dp_fingerprintArguments; if (df_ownsAtomInvGenerator) { delete dp_atomInvariantsGenerator; } if (df_ownsBondInvGenerator) { delete dp_bondInvariantsGenerator; } } template FingerprintGenerator::~FingerprintGenerator(); template FingerprintGenerator::~FingerprintGenerator(); template std::string FingerprintGenerator::infoString() const; template std::string FingerprintGenerator::infoString() const; template std::string FingerprintGenerator::infoString() const { std::string seperator = " --- "; return dp_fingerprintArguments->commonArgumentsString() + seperator + dp_fingerprintArguments->infoString() + seperator + dp_atomEnvironmentGenerator->infoString() + seperator + (dp_atomInvariantsGenerator ? (dp_atomInvariantsGenerator->infoString() + seperator) : ("No atom invariants generator" + seperator)) + (dp_bondInvariantsGenerator ? (dp_bondInvariantsGenerator->infoString()) : "No bond invariants generator"); } template SparseIntVect *FingerprintGenerator::getFingerprintHelper( const ROMol &mol, const std::vector *fromAtoms, const std::vector *ignoreAtoms, const int confId, const AdditionalOutput *additionalOutput, const std::vector *customAtomInvariants, const std::vector *customBondInvariants, const std::uint64_t fpSize) const { bool hashResults = false; if (fpSize != 0) { hashResults = true; } std::vector *atomInvariants = nullptr; if (customAtomInvariants) { atomInvariants = new std::vector(*customAtomInvariants); } else if (dp_atomInvariantsGenerator) { atomInvariants = dp_atomInvariantsGenerator->getAtomInvariants(mol); } std::vector *bondInvariants = nullptr; if (customBondInvariants) { bondInvariants = new std::vector(*customBondInvariants); } else if (dp_bondInvariantsGenerator) { bondInvariants = dp_bondInvariantsGenerator->getBondInvariants(mol); } // create all atom environments that will generate the bit-ids that will make // up the fingerprint std::vector *> atomEnvironments = dp_atomEnvironmentGenerator->getEnvironments( mol, dp_fingerprintArguments, fromAtoms, ignoreAtoms, confId, additionalOutput, atomInvariants, bondInvariants, hashResults); // allocate the result SparseIntVect *res = nullptr; if (fpSize != 0) { res = new SparseIntVect(fpSize); } else { res = new SparseIntVect(dp_fingerprintArguments->getResultSize()); } // iterate over every atom environment and generate bit-ids that will make up // the fingerprint for (auto it = atomEnvironments.begin(); it != atomEnvironments.end(); it++) { OutputType bitId = (*it)->getBitId(dp_fingerprintArguments, atomInvariants, bondInvariants, additionalOutput, hashResults); if (fpSize != 0) { bitId %= fpSize; } res->setVal(bitId, res->getVal(bitId) + 1); delete (*it); } delete atomInvariants; delete bondInvariants; return res; } template SparseIntVect *FingerprintGenerator::getSparseCountFingerprint( const ROMol &mol, const std::vector *fromAtoms, const std::vector *ignoreAtoms, const int confId, const AdditionalOutput *additionalOutput, const std::vector *customAtomInvariants, const std::vector *customBondInvariants) const { return getFingerprintHelper(mol, fromAtoms, ignoreAtoms, confId, additionalOutput, customAtomInvariants, customBondInvariants); } // todo getSparseFingerprint does not completely produce the same output as // getSparseCountFingerprint. Count simulation and potential 64 bit outputs // makes size limiting necessary for getSparseFingerprint. This can be // changed if there is another way to avoid the size limitation of SparseBitVect template SparseBitVect *FingerprintGenerator::getSparseFingerprint( const ROMol &mol, const std::vector *fromAtoms, const std::vector *ignoreAtoms, const int confId, const AdditionalOutput *additionalOutput, const std::vector *customAtomInvariants, const std::vector *customBondInvariants) const { // make sure the result will fit into SparseBitVect std::uint32_t resultSize = std::min((std::uint64_t)std::numeric_limits::max(), (std::uint64_t)dp_fingerprintArguments->getResultSize()); std::uint32_t effectiveSize = resultSize; if (dp_fingerprintArguments->d_countSimulation) { // effective size needs to be smaller than result size to compansate for // count simulation effectiveSize /= dp_fingerprintArguments->d_countBounds.size(); } SparseIntVect *tempResult = getFingerprintHelper( mol, fromAtoms, ignoreAtoms, confId, additionalOutput, customAtomInvariants, customBondInvariants, effectiveSize); SparseBitVect *result = new SparseBitVect(resultSize); BOOST_FOREACH (auto val, tempResult->getNonzeroElements()) { if (dp_fingerprintArguments->d_countSimulation) { for (unsigned int i = 0; i < dp_fingerprintArguments->d_countBounds.size(); ++i) { // for every bound in the d_countBounds in dp_fingerprintArguments, set // a bit if the occurrence count is equal or higher than the bound for // that bit if (val.second >= dp_fingerprintArguments->d_countBounds[i]) { result->setBit( val.first * dp_fingerprintArguments->d_countBounds.size() + i); } } } else { result->setBit(val.first); } } delete tempResult; return result; } template SparseIntVect *FingerprintGenerator::getCountFingerprint( const ROMol &mol, const std::vector *fromAtoms, const std::vector *ignoreAtoms, const int confId, const AdditionalOutput *additionalOutput, const std::vector *customAtomInvariants, const std::vector *customBondInvariants) const { SparseIntVect *tempResult = getFingerprintHelper( mol, fromAtoms, ignoreAtoms, confId, additionalOutput, customAtomInvariants, customBondInvariants, dp_fingerprintArguments->d_fpSize); SparseIntVect *result = new SparseIntVect(dp_fingerprintArguments->d_fpSize); BOOST_FOREACH (auto val, tempResult->getNonzeroElements()) { result->setVal(val.first, val.second); } delete tempResult; return result; } template ExplicitBitVect *FingerprintGenerator::getFingerprint( const ROMol &mol, const std::vector *fromAtoms, const std::vector *ignoreAtoms, const int confId, const AdditionalOutput *additionalOutput, const std::vector *customAtomInvariants, const std::vector *customBondInvariants) const { std::uint32_t effectiveSize = dp_fingerprintArguments->d_fpSize; if (dp_fingerprintArguments->d_countSimulation) { // effective size needs to be smaller than result size to compansate for // count simulation effectiveSize /= dp_fingerprintArguments->d_countBounds.size(); } SparseIntVect *tempResult = getFingerprintHelper( mol, fromAtoms, ignoreAtoms, confId, additionalOutput, customAtomInvariants, customBondInvariants, effectiveSize); ExplicitBitVect *result = new ExplicitBitVect(dp_fingerprintArguments->d_fpSize); BOOST_FOREACH (auto val, tempResult->getNonzeroElements()) { if (dp_fingerprintArguments->d_countSimulation) { for (unsigned int i = 0; i < dp_fingerprintArguments->d_countBounds.size(); ++i) { // for every bound in the d_countBounds in dp_fingerprintArguments, set // a bit if the occurrence count is equal or higher than the bound for // that bit if (val.second >= dp_fingerprintArguments->d_countBounds[i]) { result->setBit( val.first * dp_fingerprintArguments->d_countBounds.size() + i); } } } else { result->setBit(val.first); } } delete tempResult; return result; } template RDKIT_FINGERPRINTS_EXPORT SparseIntVect *FingerprintGenerator::getSparseCountFingerprint( const ROMol &mol, const std::vector *fromAtoms, const std::vector *ignoreAtoms, const int confId, const AdditionalOutput *additionalOutput, const std::vector *customAtomInvariants, const std::vector *customBondInvariants) const; template RDKIT_FINGERPRINTS_EXPORT SparseIntVect *FingerprintGenerator::getSparseCountFingerprint( const ROMol &mol, const std::vector *fromAtoms, const std::vector *ignoreAtoms, const int confId, const AdditionalOutput *additionalOutput, const std::vector *customAtomInvariants, const std::vector *customBondInvariants) const; template RDKIT_FINGERPRINTS_EXPORT SparseBitVect * FingerprintGenerator::getSparseFingerprint( const ROMol &mol, const std::vector *fromAtoms, const std::vector *ignoreAtoms, const int confId, const AdditionalOutput *additionalOutput, const std::vector *customAtomInvariants, const std::vector *customBondInvariants) const; template RDKIT_FINGERPRINTS_EXPORT SparseBitVect * FingerprintGenerator::getSparseFingerprint( const ROMol &mol, const std::vector *fromAtoms, const std::vector *ignoreAtoms, const int confId, const AdditionalOutput *additionalOutput, const std::vector *customAtomInvariants, const std::vector *customBondInvariants) const; template RDKIT_FINGERPRINTS_EXPORT SparseIntVect *FingerprintGenerator::getCountFingerprint( const ROMol &mol, const std::vector *fromAtoms, const std::vector *ignoreAtoms, const int confId, const AdditionalOutput *additionalOutput, const std::vector *customAtomInvariants, const std::vector *customBondInvariants) const; template RDKIT_FINGERPRINTS_EXPORT SparseIntVect *FingerprintGenerator::getCountFingerprint( const ROMol &mol, const std::vector *fromAtoms, const std::vector *ignoreAtoms, const int confId, const AdditionalOutput *additionalOutput, const std::vector *customAtomInvariants, const std::vector *customBondInvariants) const; template RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect *FingerprintGenerator::getFingerprint( const ROMol &mol, const std::vector *fromAtoms, const std::vector *ignoreAtoms, const int confId, const AdditionalOutput *additionalOutput, const std::vector *customAtomInvariants, const std::vector *customBondInvariants) const; template RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect *FingerprintGenerator::getFingerprint( const ROMol &mol, const std::vector *fromAtoms, const std::vector *ignoreAtoms, const int confId, const AdditionalOutput *additionalOutput, const std::vector *customAtomInvariants, const std::vector *customBondInvariants) const; SparseIntVect *getSparseCountFP(const ROMol &mol, FPType fPType) { std::vector tempVect(1, &mol); return (*getSparseCountFPBulk(tempVect, fPType))[0]; } SparseBitVect *getSparseFP(const ROMol &mol, FPType fPType) { std::vector tempVect(1, &mol); return (*getSparseFPBulk(tempVect, fPType))[0]; } SparseIntVect *getCountFP(const ROMol &mol, FPType fPType) { std::vector tempVect(1, &mol); return (*getCountFPBulk(tempVect, fPType))[0]; } ExplicitBitVect *getFP(const ROMol &mol, FPType fPType) { std::vector tempVect(1, &mol); return (*getFPBulk(tempVect, fPType))[0]; } std::vector *> *getSparseCountFPBulk( const std::vector molVector, FPType fPType) { FingerprintGenerator *generator = nullptr; switch (fPType) { case FPType::AtomPairFP: { generator = AtomPair::getAtomPairGenerator(); break; } case FPType::MorganFP: { generator = MorganFingerprint::getMorganGenerator(2); break; } case FPType::RDKitFP: { generator = RDKitFP::getRDKitFPGenerator(); break; } case FPType::TopologicalTorsionFP: { generator = TopologicalTorsion::getTopologicalTorsionGenerator(); break; } default: { throw UnimplementedFPException( "Fingerprint type not implemented for getSparseCountFP"); } } std::vector *> *res = new std::vector *>(); BOOST_FOREACH (const ROMol *mol, molVector) { res->push_back(generator->getSparseCountFingerprint(*mol)); } delete generator; return res; } std::vector *getSparseFPBulk( const std::vector molVector, FPType fPType) { FingerprintGenerator *generator = nullptr; switch (fPType) { case FPType::AtomPairFP: { generator = AtomPair::getAtomPairGenerator(); break; } case FPType::MorganFP: { generator = MorganFingerprint::getMorganGenerator(2); break; } case FPType::RDKitFP: { generator = RDKitFP::getRDKitFPGenerator(); break; } case FPType::TopologicalTorsionFP: { generator = TopologicalTorsion::getTopologicalTorsionGenerator(); break; } default: { throw UnimplementedFPException( "Fingerprint type not implemented for getSparseFP"); } } std::vector *res = new std::vector(); BOOST_FOREACH (const ROMol *mol, molVector) { res->push_back(generator->getSparseFingerprint(*mol)); } delete generator; return res; } std::vector *> *getCountFPBulk( const std::vector molVector, FPType fPType) { FingerprintGenerator *generator = nullptr; switch (fPType) { case FPType::AtomPairFP: { generator = AtomPair::getAtomPairGenerator(); break; } case FPType::MorganFP: { generator = MorganFingerprint::getMorganGenerator(2); break; } case FPType::RDKitFP: { generator = RDKitFP::getRDKitFPGenerator(); break; } case FPType::TopologicalTorsionFP: { generator = TopologicalTorsion::getTopologicalTorsionGenerator(); break; } default: { throw UnimplementedFPException( "Fingerprint type not implemented for getCountFP"); } } std::vector *> *res = new std::vector *>(); BOOST_FOREACH (const ROMol *mol, molVector) { res->push_back(generator->getCountFingerprint(*mol)); } delete generator; return res; } std::vector *getFPBulk( const std::vector molVector, FPType fPType) { FingerprintGenerator *generator = nullptr; switch (fPType) { case FPType::AtomPairFP: { generator = AtomPair::getAtomPairGenerator(); break; } case FPType::MorganFP: { generator = MorganFingerprint::getMorganGenerator(2); break; } case FPType::RDKitFP: { generator = RDKitFP::getRDKitFPGenerator(); break; } case FPType::TopologicalTorsionFP: { generator = TopologicalTorsion::getTopologicalTorsionGenerator(); break; } default: { throw UnimplementedFPException( "Fingerprint type not implemented for getFP"); } } std::vector *res = new std::vector(); BOOST_FOREACH (const ROMol *mol, molVector) { res->push_back(generator->getFingerprint(*mol)); } delete generator; return res; } } // namespace RDKit