// // Copyright (C) 2018-2025 Boran Adas and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace RDKit { namespace RDKitFP { std::vector *RDKitFPAtomInvGenerator::getAtomInvariants( const ROMol &mol) const { auto *result = new std::vector(); result->reserve(mol.getNumAtoms()); for (const auto atom : mol.atoms()) { unsigned int aHash = (atom->getAtomicNum() % 128) << 1 | static_cast(atom->getIsAromatic()); result->push_back(aHash); } return result; } std::string RDKitFPAtomInvGenerator::infoString() const { return "RDKitFPAtomInvGenerator"; } void RDKitFPAtomInvGenerator::toJSON(boost::property_tree::ptree &pt) const { pt.put("type", "RDKitFPAtomInvGenerator"); } void RDKitFPAtomInvGenerator::fromJSON(const boost::property_tree::ptree &) { // no parameters to set } RDKitFPAtomInvGenerator *RDKitFPAtomInvGenerator::clone() const { return new RDKitFPAtomInvGenerator(); } template OutputType RDKitFPEnvGenerator::getResultSize() const { return std::numeric_limits::max(); } std::string RDKitFPArguments::infoString() const { return "RDKitFPArguments minPath=" + std::to_string(d_minPath) + " maxPath=" + std::to_string(d_maxPath) + " useHs=" + std::to_string(df_useHs) + " branchedPaths=" + std::to_string(df_branchedPaths) + " useBondOrder=" + std::to_string(df_useBondOrder); } void RDKitFPArguments::toJSON(boost::property_tree::ptree &pt) const { pt.put("type", "RDKitFPArguments"); pt.put("minPath", d_minPath); pt.put("maxPath", d_maxPath); pt.put("useHs", df_useHs); pt.put("branchedPaths", df_branchedPaths); pt.put("useBondOrder", df_useBondOrder); FingerprintArguments::toJSON(pt); } void RDKitFPArguments::fromJSON(const boost::property_tree::ptree &pt) { d_minPath = pt.get("minPath", d_minPath); d_maxPath = pt.get("maxPath", d_maxPath); df_useHs = pt.get("useHs", df_useHs); df_branchedPaths = pt.get("branchedPaths", df_branchedPaths); df_useBondOrder = pt.get("useBondOrder", df_useBondOrder); FingerprintArguments::fromJSON(pt); } RDKitFPArguments::RDKitFPArguments(unsigned int minPath, unsigned int maxPath, bool useHs, bool branchedPaths, bool useBondOrder, bool countSimulation, const std::vector countBounds, std::uint32_t fpSize, std::uint32_t numBitsPerFeature) : FingerprintArguments(countSimulation, countBounds, fpSize, numBitsPerFeature), d_minPath(minPath), d_maxPath(maxPath), df_useHs(useHs), df_branchedPaths(branchedPaths), df_useBondOrder(useBondOrder) { PRECONDITION(minPath != 0, "minPath==0"); PRECONDITION(maxPath >= minPath, "maxPath void RDKitFPAtomEnv::updateAdditionalOutput( AdditionalOutput *additionalOutput, size_t bitId) const { PRECONDITION(additionalOutput, "bad output pointer"); if (additionalOutput->bitPaths) { (*additionalOutput->bitPaths)[bitId].push_back(d_bondPath); } if (additionalOutput->atomToBits || additionalOutput->atomCounts || additionalOutput->atomsPerBit) { if (additionalOutput->atomsPerBit) { (*additionalOutput->atomsPerBit)[bitId].emplace_back(); } for (size_t i = 0; i < d_atomsInPath.size(); ++i) { if (d_atomsInPath[i]) { if (additionalOutput->atomsPerBit) { (*additionalOutput->atomsPerBit)[bitId].back().push_back(i); } if (additionalOutput->atomToBits) { auto &alist = additionalOutput->atomToBits->at(i); if (std::find(alist.begin(), alist.end(), bitId) == alist.end()) { alist.push_back(bitId); } } if (additionalOutput->atomCounts) { additionalOutput->atomCounts->at(i)++; } } } } } template OutputType RDKitFPAtomEnv::getBitId( FingerprintArguments *, // arguments const std::vector *, // atomInvariants const std::vector *, // bondInvariants AdditionalOutput *, // additional Output const bool, // hashResults const std::uint64_t // fpSize ) const { return d_bitId; } template std::string RDKitFPEnvGenerator::infoString() const { return "RDKitFPEnvGenerator"; } template void RDKitFPEnvGenerator::toJSON( boost::property_tree::ptree &pt) const { pt.put("type", "RDKitFPEnvGenerator"); AtomEnvironmentGenerator::toJSON(pt); } template void RDKitFPEnvGenerator::fromJSON( const boost::property_tree::ptree &pt) { AtomEnvironmentGenerator::fromJSON(pt); }; template std::vector *> RDKitFPEnvGenerator::getEnvironments( const ROMol &mol, FingerprintArguments *arguments, const std::vector *fromAtoms, const std::vector *, // ignoreAtoms const int, // confId const AdditionalOutput *, // additionalOutput const std::vector *atomInvariants, const std::vector *, // bondInvariants const bool // hashResults ) const { PRECONDITION(!atomInvariants || atomInvariants->size() >= mol.getNumAtoms(), "bad atomInvariants size"); auto *fpArguments = dynamic_cast(arguments); std::vector *> result; // get all paths INT_PATH_LIST_MAP allPaths; RDKitFPUtils::enumerateAllPaths( mol, allPaths, fromAtoms, fpArguments->df_branchedPaths, fpArguments->df_useHs, fpArguments->d_minPath, fpArguments->d_maxPath); // identify query bonds std::vector isQueryBond(mol.getNumBonds(), 0); std::vector bondCache; RDKitFPUtils::identifyQueryBonds(mol, bondCache, isQueryBond); boost::dynamic_bitset<> atomsInPath(mol.getNumAtoms()); for (INT_PATH_LIST_MAP_CI paths = allPaths.begin(); paths != allPaths.end(); paths++) { for (const auto &path : paths->second) { // the bond hashes of the path std::vector bondHashes = RDKitFPUtils::generateBondHashes( mol, atomsInPath, bondCache, isQueryBond, path, fpArguments->df_useBondOrder, atomInvariants); if (!bondHashes.size()) { continue; } // hash the path to generate a seed: unsigned long seed; if (path.size() > 1) { std::sort(bondHashes.begin(), bondHashes.end()); // finally, we will add the number of distinct atoms in the path at the // end // of the vect. This allows us to distinguish C1CC1 from CC(C)C bondHashes.push_back(static_cast(atomsInPath.count())); seed = gboost::hash_range(bondHashes.begin(), bondHashes.end()); } else { seed = bondHashes[0]; } result.push_back(new RDKitFPAtomEnv( static_cast(seed), atomsInPath, path)); } } return result; } template FingerprintGenerator *getRDKitFPGenerator( const RDKitFPArguments &args, AtomInvariantsGenerator *atomInvariantsGenerator, bool ownsAtomInvGen) { auto *envGenerator = new RDKitFPEnvGenerator(); bool ownsAtomInvGenerator = ownsAtomInvGen; if (!atomInvariantsGenerator) { atomInvariantsGenerator = new RDKitFPAtomInvGenerator(); ownsAtomInvGenerator = true; } return new FingerprintGenerator( envGenerator, new RDKitFPArguments(args), atomInvariantsGenerator, nullptr, ownsAtomInvGenerator, false); } template FingerprintGenerator *getRDKitFPGenerator( unsigned int minPath, unsigned int maxPath, bool useHs, bool branchedPaths, bool useBondOrder, AtomInvariantsGenerator *atomInvariantsGenerator, bool countSimulation, const std::vector countBounds, std::uint32_t fpSize, std::uint32_t numBitsPerFeature, bool ownsAtomInvGen) { RDKitFPArguments arguments(minPath, maxPath, useHs, branchedPaths, useBondOrder, countSimulation, countBounds, fpSize, numBitsPerFeature); return getRDKitFPGenerator(arguments, atomInvariantsGenerator, ownsAtomInvGen); } template RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator * getRDKitFPGenerator(unsigned int minPath, unsigned int maxPath, bool useHs, bool branchedPaths, bool useBondOrder, AtomInvariantsGenerator *atomInvariantsGenerator, bool countSimulation, const std::vector countBounds, std::uint32_t fpSize, std::uint32_t numBitsPerFeature, bool ownsAtomInvGen); template RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator * getRDKitFPGenerator(unsigned int minPath, unsigned int maxPath, bool useHs, bool branchedPaths, bool useBondOrder, AtomInvariantsGenerator *atomInvariantsGenerator, bool countSimulation, const std::vector countBounds, std::uint32_t fpSize, std::uint32_t numBitsPerFeature, bool ownsAtomInvGen); template RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator * getRDKitFPGenerator(const RDKitFPArguments &, AtomInvariantsGenerator *, bool); template RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator * getRDKitFPGenerator(const RDKitFPArguments &, AtomInvariantsGenerator *, bool); } // namespace RDKitFP } // namespace RDKit