// // Copyright (C) 2007-2022 Greg Landrum and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #pragma GCC diagnostic ignored "-Wdeprecated-declarations" #include #include #include #include #include #include #include #include #include #include #include namespace RDKit { namespace AtomPairs { template void updateElement(SparseIntVect &v, T2 elem) { v.setVal(elem, v.getVal(elem) + 1); } template void updateElement(ExplicitBitVect &v, T1 elem) { v.setBit(elem % v.getNumBits()); } template void setAtomPairBit(std::uint32_t i, std::uint32_t j, std::uint32_t nAtoms, const std::vector &atomCodes, const double *dm, T *bv, unsigned int minLength, unsigned int maxLength, bool includeChirality) { auto dist = static_cast(floor(dm[i * nAtoms + j])); if (dist >= minLength && dist <= maxLength) { std::uint32_t bitId = getAtomPairCode(atomCodes[i], atomCodes[j], dist, includeChirality); updateElement(*bv, static_cast(bitId)); } } namespace { std::unique_ptr> getAtomPairFingerprintInternal( const ROMol &mol, unsigned int nBits, unsigned int minLength, unsigned int maxLength, const std::vector *fromAtoms, const std::vector *ignoreAtoms, const std::vector *atomInvariants, bool includeChirality, bool use2D, int confId, bool sparse) { PRECONDITION(minLength <= maxLength, "bad lengths provided"); PRECONDITION(!atomInvariants || atomInvariants->size() >= mol.getNumAtoms(), "bad atomInvariants size"); const ROMol *lmol = &mol; std::unique_ptr tmol; if (includeChirality && !mol.hasProp(common_properties::_StereochemDone)) { tmol = std::unique_ptr(new ROMol(mol)); MolOps::assignStereochemistry(*tmol); lmol = tmol.get(); } FingerprintFuncArguments args; args.fromAtoms = fromAtoms; args.ignoreAtoms = ignoreAtoms; args.customAtomInvariants = atomInvariants; args.confId = confId; std::unique_ptr> fpgen{ RDKit::AtomPair::getAtomPairGenerator( minLength, maxLength, includeChirality, use2D, nullptr, true, nBits)}; return std::unique_ptr>( sparse ? fpgen->getSparseCountFingerprint(*lmol, args) : fpgen->getCountFingerprint(*lmol, args)); } } // end of anonymous namespace SparseIntVect *getAtomPairFingerprint( const ROMol &mol, const std::vector *fromAtoms, const std::vector *ignoreAtoms, const std::vector *atomInvariants, bool includeChirality, bool use2D, int confId) { return getAtomPairFingerprint(mol, 1, maxPathLen - 1, fromAtoms, ignoreAtoms, atomInvariants, includeChirality, use2D, confId); }; SparseIntVect *getAtomPairFingerprint( const ROMol &mol, unsigned int minLength, unsigned int maxLength, const std::vector *fromAtoms, const std::vector *ignoreAtoms, const std::vector *atomInvariants, bool includeChirality, bool use2D, int confId) { RDLog::deprecationWarning("please use AtomPairGenerator"); return reinterpret_cast *>( getAtomPairFingerprintInternal(mol, 0, minLength, maxLength, fromAtoms, ignoreAtoms, atomInvariants, includeChirality, use2D, confId, true) .release()); } SparseIntVect *getHashedAtomPairFingerprint( const ROMol &mol, unsigned int nBits, unsigned int minLength, unsigned int maxLength, const std::vector *fromAtoms, const std::vector *ignoreAtoms, const std::vector *atomInvariants, bool includeChirality, bool use2D, int confId) { RDLog::deprecationWarning("please use AtomPairGenerator"); auto siv = getAtomPairFingerprintInternal( mol, nBits, minLength, maxLength, fromAtoms, ignoreAtoms, atomInvariants, includeChirality, use2D, confId, false); auto *res = new SparseIntVect(nBits); for (auto v : siv->getNonzeroElements()) { res->setVal(v.first, v.second); } return res; } ExplicitBitVect *getHashedAtomPairFingerprintAsBitVect( const ROMol &mol, unsigned int nBits, unsigned int minLength, unsigned int maxLength, const std::vector *fromAtoms, const std::vector *ignoreAtoms, const std::vector *atomInvariants, unsigned int nBitsPerEntry, bool includeChirality, bool use2D, int confId) { PRECONDITION(minLength <= maxLength, "bad lengths provided"); PRECONDITION(!atomInvariants || atomInvariants->size() >= mol.getNumAtoms(), "bad atomInvariants size"); static int bounds[4] = {1, 2, 4, 8}; unsigned int blockLength = nBits / nBitsPerEntry; SparseIntVect *sres = getHashedAtomPairFingerprint( mol, blockLength, minLength, maxLength, fromAtoms, ignoreAtoms, atomInvariants, includeChirality, use2D, confId); auto *res = new ExplicitBitVect(nBits); if (nBitsPerEntry != 4) { for (auto val : sres->getNonzeroElements()) { for (unsigned int i = 0; i < nBitsPerEntry; ++i) { if (val.second > static_cast(i)) { res->setBit(val.first * nBitsPerEntry + i); } } } } else { for (auto val : sres->getNonzeroElements()) { for (unsigned int i = 0; i < nBitsPerEntry; ++i) { if (val.second >= bounds[i]) { res->setBit(val.first * nBitsPerEntry + i); } } } } delete sres; return res; } SparseIntVect *getTopologicalTorsionFingerprint( const ROMol &mol, unsigned int targetSize, const std::vector *fromAtoms, const std::vector *ignoreAtoms, const std::vector *atomInvariants, bool includeChirality) { RDLog::deprecationWarning("please use TopologicalTorsionGenerator"); PRECONDITION(!atomInvariants || atomInvariants->size() >= mol.getNumAtoms(), "bad atomInvariants size"); const ROMol *lmol = &mol; std::unique_ptr tmol; if (includeChirality && !mol.hasProp(common_properties::_StereochemDone)) { tmol = std::unique_ptr(new ROMol(mol)); MolOps::assignStereochemistry(*tmol); lmol = tmol.get(); } boost::uint64_t sz = 1; sz = (sz << (targetSize * (codeSize + (includeChirality ? numChiralBits : 0)))); // NOTE: this -1 is incorrect but it's needed for backwards compatibility. // hopefully we'll never have a case with a torsion that hits this. // // mmm, bug compatible. sz -= 1; auto *res = new SparseIntVect(sz); std::vector atomCodes; atomCodes.reserve(lmol->getNumAtoms()); for (const auto atom : lmol->atoms()) { if (!atomInvariants) { atomCodes.push_back(getAtomCode(atom, 0, includeChirality)); } else { // need to add to the atomCode here because we subtract off up to 2 below // as part of the branch correction atomCodes.push_back( (*atomInvariants)[atom->getIdx()] % ((1 << codeSize) - 1) + 2); } } boost::dynamic_bitset<> *fromAtomsBV = nullptr; if (fromAtoms) { fromAtomsBV = new boost::dynamic_bitset<>(lmol->getNumAtoms()); for (auto fAt : *fromAtoms) { fromAtomsBV->set(fAt); } } boost::dynamic_bitset<> *ignoreAtomsBV = nullptr; if (ignoreAtoms) { ignoreAtomsBV = new boost::dynamic_bitset<>(lmol->getNumAtoms()); for (auto fAt : *ignoreAtoms) { ignoreAtomsBV->set(fAt); } } boost::dynamic_bitset<> pAtoms(lmol->getNumAtoms()); PATH_LIST paths = findAllPathsOfLengthN(*lmol, targetSize, false); for (PATH_LIST::const_iterator pathIt = paths.begin(); pathIt != paths.end(); ++pathIt) { bool keepIt = true; if (fromAtomsBV) { keepIt = false; } std::vector pathCodes; const PATH_TYPE &path = *pathIt; if (fromAtomsBV) { if (fromAtomsBV->test(static_cast(path.front())) || fromAtomsBV->test(static_cast(path.back()))) { keepIt = true; } } if (keepIt && ignoreAtomsBV) { for (auto pElem : path) { if (ignoreAtomsBV->test(pElem)) { keepIt = false; break; } } } if (keepIt) { pAtoms.reset(); for (auto pIt = path.begin(); pIt < path.end(); ++pIt) { // look for a cycle that doesn't start at the first atom // we can't effectively canonicalize these at the moment // (was github #811) if (pIt != path.begin() && *pIt != *(path.begin()) && pAtoms[*pIt]) { pathCodes.clear(); break; } pAtoms.set(*pIt); unsigned int code = atomCodes[*pIt] - 1; // subtract off the branching number: if (pIt != path.begin() && pIt + 1 != path.end()) { --code; } pathCodes.push_back(code); } if (pathCodes.size()) { boost::int64_t code = getTopologicalTorsionCode(pathCodes, includeChirality); updateElement(*res, code); } } } delete fromAtomsBV; delete ignoreAtomsBV; return res; } namespace { template void TorsionFpCalc(T *res, const ROMol &mol, unsigned int nBits, unsigned int targetSize, const std::vector *fromAtoms, const std::vector *ignoreAtoms, const std::vector *atomInvariants, bool includeChirality) { PRECONDITION(!atomInvariants || atomInvariants->size() >= mol.getNumAtoms(), "bad atomInvariants size"); const ROMol *lmol = &mol; std::unique_ptr tmol; if (includeChirality && !mol.hasProp(common_properties::_StereochemDone)) { tmol = std::unique_ptr(new ROMol(mol)); MolOps::assignStereochemistry(*tmol); lmol = tmol.get(); } std::unique_ptr> fpgen{ RDKit::TopologicalTorsion::getTopologicalTorsionGenerator( includeChirality, targetSize, nullptr, true, nBits)}; FingerprintFuncArguments args; args.fromAtoms = fromAtoms; args.ignoreAtoms = ignoreAtoms; args.customAtomInvariants = atomInvariants; auto siv = fpgen->getCountFingerprint(*lmol, args); for (auto v : siv->getNonzeroElements()) { res->setVal(v.first, v.second); } } } // namespace SparseIntVect *getHashedTopologicalTorsionFingerprint( const ROMol &mol, unsigned int nBits, unsigned int targetSize, const std::vector *fromAtoms, const std::vector *ignoreAtoms, const std::vector *atomInvariants, bool includeChirality) { RDLog::deprecationWarning("please use TopologicalTorsionGenerator"); PRECONDITION(!atomInvariants || atomInvariants->size() >= mol.getNumAtoms(), "bad atomInvariants size"); auto *res = new SparseIntVect(nBits); TorsionFpCalc(res, mol, nBits, targetSize, fromAtoms, ignoreAtoms, atomInvariants, includeChirality); return res; } ExplicitBitVect *getHashedTopologicalTorsionFingerprintAsBitVect( const ROMol &mol, unsigned int nBits, unsigned int targetSize, const std::vector *fromAtoms, const std::vector *ignoreAtoms, const std::vector *atomInvariants, unsigned int nBitsPerEntry, bool includeChirality) { RDLog::deprecationWarning("please use TopologicalTorsionGenerator"); PRECONDITION(!atomInvariants || atomInvariants->size() >= mol.getNumAtoms(), "bad atomInvariants size"); static int bounds[4] = {1, 2, 4, 8}; unsigned int blockLength = nBits / nBitsPerEntry; auto *sres = new SparseIntVect(blockLength); TorsionFpCalc(sres, mol, blockLength, targetSize, fromAtoms, ignoreAtoms, atomInvariants, includeChirality); auto *res = new ExplicitBitVect(nBits); if (nBitsPerEntry != 4) { for (auto val : sres->getNonzeroElements()) { for (unsigned int i = 0; i < nBitsPerEntry; ++i) { if (val.second > static_cast(i)) { res->setBit(val.first * nBitsPerEntry + i); } } } } else { for (auto val : sres->getNonzeroElements()) { for (unsigned int i = 0; i < nBitsPerEntry; ++i) { if (val.second >= bounds[i]) { res->setBit(val.first * nBitsPerEntry + i); } } } } delete sres; return res; } } // end of namespace AtomPairs } // end of namespace RDKit