// // Copyright (C) 2001-2025 Greg Landrum and other RDKit contributors // // @@ All Rights Reserved @@ // This file is part of the RDKit. // The contents are covered by the terms of the BSD license // which is included in the file license.txt, found at the root // of the RDKit source tree. // #include "AlignMolecules.h" #include #include #include #include #include #include #include #include #include #include #include #include #include namespace RDKit { namespace MolAlign { namespace details { void symmetrizeTerminalAtoms(RWMol &mol) { // clang-format off static const std::string qsmarts = "[{atomPattern};$([{atomPattern}]-[*]=[{atomPattern}]),$([{atomPattern}]=[*]-[{atomPattern}])]~[*]"; static std::map replacements = { {"{atomPattern}", "O,N;D1"}}; // clang-format on static SmartsParserParams ps; ps.replacements = &replacements; static const std::unique_ptr qry{SmartsToMol(qsmarts, ps)}; CHECK_INVARIANT(qry, "bad query pattern"); auto matches = SubstructMatch(mol, *qry); if (matches.empty()) { return; } QueryBond qb; qb.setQuery(makeSingleOrDoubleBondQuery()); for (const auto &match : matches) { mol.getAtomWithIdx(match[0].second)->setFormalCharge(0); auto obond = mol.getBondBetweenAtoms(match[0].second, match[1].second); CHECK_INVARIANT(obond, "could not find expected bond"); mol.replaceBond(obond->getIdx(), &qb); } } } // namespace details namespace { double alignConfsOnAtomMap(const Conformer &prbCnf, const Conformer &refCnf, const MatchVectType &atomMap, RDGeom::Transform3D &trans, const RDNumeric::DoubleVector *weights, bool reflect, unsigned int maxIterations) { RDGeom::Point3DConstPtrVect refPoints, prbPoints; for (const auto &mi : atomMap) { prbPoints.push_back(&prbCnf.getAtomPos(mi.first)); refPoints.push_back(&refCnf.getAtomPos(mi.second)); } double ssr = RDNumeric::Alignments::AlignPoints( refPoints, prbPoints, trans, weights, reflect, maxIterations); return ssr / static_cast(prbPoints.size()); } void getAllMatchesPrbRef(const ROMol &prbMol, const ROMol &refMol, std::vector &matches, int maxMatches, bool symmetrizeConjugatedTerminalGroups, bool ignoreHs = false) { bool uniquify = false; bool recursionPossible = true; bool useChirality = false; bool useQueryQueryMatches = false; std::unique_ptr prbMolSymm; if (symmetrizeConjugatedTerminalGroups) { prbMolSymm.reset(new RWMol(prbMol)); details::symmetrizeTerminalAtoms(*prbMolSymm); } const auto &prbMolForMatch = prbMolSymm ? *prbMolSymm : prbMol; SubstructMatch(refMol, prbMolForMatch, matches, uniquify, recursionPossible, useChirality, useQueryQueryMatches, maxMatches); if (matches.empty()) { throw MolAlignException( "No sub-structure match found between the reference and probe mol"); } if (matches.size() > 1e6) { std::string name; prbMol.getPropIfPresent(common_properties::_Name, name); BOOST_LOG(rdWarningLog) << "Warning in " << __FUNCTION__ << ": " << matches.size() << " matches detected for molecule " << name << ", this may " << "lead to a performance slowdown." << std::endl; } if (ignoreHs) { // filter Hs out of the matches for (auto &match : matches) { std::erase_if(match, [&prbMolForMatch](const auto &mi) { return prbMolForMatch.getAtomWithIdx(mi.first)->getAtomicNum() == 1; }); } } } double calcMSDInternal(const Conformer &prbCnf, const Conformer &refCnf, const MatchVectType &atomMap, const RDNumeric::DoubleVector *weights) { unsigned int npt = atomMap.size(); std::unique_ptr unitWeights; if (!weights) { unitWeights.reset(new RDNumeric::DoubleVector(npt, 1.0)); weights = unitWeights.get(); } else { PRECONDITION(npt == weights->size(), "Mismatch in number of weights"); } RDGeom::Point3DConstPtrVect refPoints, prbPoints; for (const auto &mi : atomMap) { prbPoints.push_back(&prbCnf.getAtomPos(mi.first)); refPoints.push_back(&refCnf.getAtomPos(mi.second)); } double ssr = 0.; const RDGeom::Point3D *rpt; const RDGeom::Point3D *ppt; for (unsigned int i = 0; i < npt; ++i) { rpt = refPoints[i]; ppt = prbPoints[i]; ssr += (*weights)[i] * (*ppt - *rpt).lengthSq(); } return ssr / static_cast(npt); } double getBestRMSInternal(const ROMol &prbMol, const ROMol &refMol, int prbCid, int refCid, const std::vector &matches, RDGeom::Transform3D *trans, MatchVectType *bestMatch, const RDNumeric::DoubleVector *weights, bool reflect, unsigned int maxIters, unsigned int numThreads) { PRECONDITION(!matches.empty(), "matches must not be empty"); #ifndef RDK_BUILD_THREADSAFE_SSS numThreads = 1; #endif double msdBest = std::numeric_limits::max(); const Conformer &prbCnf = prbMol.getConformer(prbCid); const Conformer &refCnf = refMol.getConformer(refCid); const MatchVectType *bestMatchPtr = &matches[0]; if (numThreads == 1) { for (const auto &matche : matches) { RDGeom::Transform3D tmpTrans; double msd = trans ? alignConfsOnAtomMap(prbCnf, refCnf, matche, tmpTrans, weights, reflect, maxIters) : calcMSDInternal(prbCnf, refCnf, matche, weights); if (msd < msdBest) { msdBest = msd; bestMatchPtr = &matche; if (trans) { trans->assign(tmpTrans); } } } } #ifdef RDK_BUILD_THREADSAFE_SSS else { std::vector tg; std::vector< std::vector>> rmsds(numThreads); for (auto ti = 0u; ti < numThreads; ++ti) { auto func = [&](unsigned int tidx) { for (auto midx = tidx; midx < matches.size(); midx += numThreads) { auto matche = matches[midx]; RDGeom::Transform3D tmpTrans; auto msd = trans ? alignConfsOnAtomMap(prbCnf, refCnf, matche, tmpTrans, weights, reflect, maxIters) : calcMSDInternal(prbCnf, refCnf, matche, weights); rmsds[tidx].emplace_back(msd, midx, tmpTrans); } }; tg.emplace_back(std::thread(func, ti)); } for (auto &thread : tg) { if (thread.joinable()) { thread.join(); } } for (const auto &rv : rmsds) { for (const auto &res : rv) { const auto &[msd, midx, tf] = res; if (msd < msdBest) { msdBest = msd; bestMatchPtr = &matches[midx]; if (trans) { trans->assign(tf); } } } } } #endif if (bestMatch) { *bestMatch = *bestMatchPtr; } return sqrt(msdBest); } } // namespace double getAlignmentTransform(const ROMol &prbMol, const ROMol &refMol, RDGeom::Transform3D &trans, int prbCid, int refCid, const MatchVectType *atomMap, const RDNumeric::DoubleVector *weights, bool reflect, unsigned int maxIterations) { const Conformer &prbCnf = prbMol.getConformer(prbCid); const Conformer &refCnf = refMol.getConformer(refCid); MatchVectType match; if (!atomMap) { // we have to figure out the mapping between the two molecule const bool recursionPossible = true; const bool useChirality = false; const bool useQueryQueryMatches = true; if (SubstructMatch(refMol, prbMol, match, recursionPossible, useChirality, useQueryQueryMatches)) { atomMap = &match; } else { throw MolAlignException( "No sub-structure match found between the probe and query mol"); } } double msd = alignConfsOnAtomMap(prbCnf, refCnf, *atomMap, trans, weights, reflect, maxIterations); return sqrt(msd); } double alignMol(ROMol &prbMol, const ROMol &refMol, int prbCid, int refCid, const MatchVectType *atomMap, const RDNumeric::DoubleVector *weights, bool reflect, unsigned int maxIterations) { RDGeom::Transform3D trans; double res = getAlignmentTransform(prbMol, refMol, trans, prbCid, refCid, atomMap, weights, reflect, maxIterations); // now transform the relevant conformation on prbMol Conformer &conf = prbMol.getConformer(prbCid); MolTransforms::transformConformer(conf, trans); return res; } double getBestAlignmentTransform(const ROMol &prbMol, const ROMol &refMol, RDGeom::Transform3D &bestTrans, MatchVectType &bestMatch, const BestAlignmentParams ¶ms, int prbCid, int refCid, bool reflect, unsigned int maxIters) { std::vector allMatches; if (params.map.empty()) { getAllMatchesPrbRef(prbMol, refMol, allMatches, params.maxMatches, params.symmetrizeConjugatedTerminalGroups, params.ignoreHs); } const auto &matches = params.map.empty() ? allMatches : params.map; auto bestRMS = getBestRMSInternal( prbMol, refMol, prbCid, refCid, matches, &bestTrans, &bestMatch, params.weights, reflect, maxIters, getNumThreadsToUse(params.numThreads)); return bestRMS; } double getBestRMS(ROMol &prbMol, const ROMol &refMol, const BestAlignmentParams ¶ms, int prbCid, int refCid) { std::vector allMatches; if (params.map.empty()) { getAllMatchesPrbRef(prbMol, refMol, allMatches, params.maxMatches, params.symmetrizeConjugatedTerminalGroups, params.ignoreHs); } const auto &matches = params.map.empty() ? allMatches : params.map; // TODO: check mismatch between weights size and match size. if (!params.map.empty() && params.weights) { auto nAtms = params.map[0].size(); if (params.weights->size() != nAtms) { throw ValueErrorException( (boost::format( "Mismatch between number of weights (%d) and number of atoms in the alignment map (%d).") % params.weights->size() % nAtms) .str()); } } RDGeom::Transform3D trans; bool reflect = false; unsigned int maxIters = 50; auto bestRMS = getBestRMSInternal( prbMol, refMol, prbCid, refCid, matches, &trans, nullptr, params.weights, reflect, maxIters, getNumThreadsToUse(params.numThreads)); // Perform a final alignment to the best alignment... MolTransforms::transformConformer(prbMol.getConformer(prbCid), trans); return bestRMS; } std::vector getAllConformerBestRMS(const ROMol &mol, const BestAlignmentParams ¶ms) { auto numThreads = getNumThreadsToUse(params.numThreads); std::vector allMatches; if (params.map.empty()) { getAllMatchesPrbRef(mol, mol, allMatches, params.maxMatches, params.symmetrizeConjugatedTerminalGroups, params.ignoreHs); } const auto &matches = params.map.empty() ? allMatches : params.map; std::vector res; RDGeom::Transform3D trans; bool reflect = false; unsigned int maxIters = 50; std::vector cids; for (auto cit = mol.beginConformers(); cit != mol.endConformers(); ++cit) { cids.push_back((*cit)->getId()); } if (numThreads == 1) { for (auto ci = 0u; ci < mol.getNumConformers(); ++ci) { for (auto cj = 0u; cj < ci; ++cj) { res.push_back(getBestRMSInternal(mol, mol, cids[ci], cids[cj], matches, &trans, nullptr, params.weights, reflect, maxIters, 1)); } } } #ifdef RDK_BUILD_THREADSAFE_SSS else { std::vector> pairs; for (auto ci = 0u; ci < mol.getNumConformers(); ++ci) { for (auto cj = 0u; cj < ci; ++cj) { pairs.emplace_back(cids[ci], cids[cj]); } } std::vector>> rmsds(numThreads); auto func = [&](unsigned int tidx) { RDGeom::Transform3D trans; bool reflect = false; unsigned int maxIters = 50; for (auto i = tidx; i < pairs.size(); i += numThreads) { auto rms = getBestRMSInternal(mol, mol, pairs[i].first, pairs[i].second, matches, &trans, nullptr, params.weights, reflect, maxIters, 1); rmsds[tidx].emplace_back(i, rms); } }; std::vector tg; for (auto ti = 0u; ti < numThreads; ++ti) { tg.emplace_back(std::thread(func, ti)); } for (auto &thread : tg) { if (thread.joinable()) { thread.join(); } } res.resize(pairs.size()); for (const auto &tres : rmsds) { for (const auto &v : tres) { res[v.first] = v.second; } } } #endif return res; } double CalcRMS(ROMol &prbMol, const ROMol &refMol, int prbCid, int refCid, const std::vector &map, int maxMatches, bool symmetrizeConjugatedTerminalGroups, const RDNumeric::DoubleVector *weights) { std::vector allMatches; if (map.empty()) { getAllMatchesPrbRef(prbMol, refMol, allMatches, maxMatches, symmetrizeConjugatedTerminalGroups); } const auto &matches = map.empty() ? allMatches : map; bool reflect = false; unsigned int maxIters = 50; unsigned int numThreads = 1; return getBestRMSInternal(prbMol, refMol, prbCid, refCid, matches, nullptr, nullptr, weights, reflect, maxIters, numThreads); } double CalcRMS(ROMol &prbMol, const ROMol &refMol, int prbCid, int refCid, const std::vector &map, int maxMatches, const RDNumeric::DoubleVector *weights) { return CalcRMS(prbMol, refMol, prbCid, refCid, map, maxMatches, false, weights); } void _fillAtomPositions(RDGeom::Point3DConstPtrVect &pts, const Conformer &conf, const std::vector *atomIds = nullptr) { unsigned int na = conf.getNumAtoms(); pts.clear(); if (atomIds == nullptr) { unsigned int ai; pts.reserve(na); for (ai = 0; ai < na; ++ai) { pts.push_back(&conf.getAtomPos(ai)); } } else { pts.reserve(atomIds->size()); std::vector::const_iterator cai; for (cai = atomIds->begin(); cai != atomIds->end(); cai++) { pts.push_back(&conf.getAtomPos(*cai)); } } } void alignMolConformers(ROMol &mol, const std::vector *atomIds, const std::vector *confIds, const RDNumeric::DoubleVector *weights, bool reflect, unsigned int maxIters, std::vector *RMSlist) { if (mol.getNumConformers() == 0) { // nothing to be done ; return; } RDGeom::Point3DConstPtrVect refPoints, prbPoints; int cid = -1; if ((confIds != nullptr) && (confIds->size() > 0)) { cid = confIds->front(); } const Conformer &refCnf = mol.getConformer(cid); _fillAtomPositions(refPoints, refCnf, atomIds); // now loop throught the remaininf conformations and transform them RDGeom::Transform3D trans; double ssd; if (confIds == nullptr) { unsigned int i = 0; ROMol::ConformerIterator cnfi; // Conformer *conf; for (cnfi = mol.beginConformers(); cnfi != mol.endConformers(); cnfi++) { // conf = (*cnfi); i += 1; if (i == 1) { continue; } _fillAtomPositions(prbPoints, *(*cnfi), atomIds); ssd = RDNumeric::Alignments::AlignPoints(refPoints, prbPoints, trans, weights, reflect, maxIters); if (RMSlist) { ssd /= (prbPoints.size()); RMSlist->push_back(sqrt(ssd)); } MolTransforms::transformConformer(*(*cnfi), trans); } } else { std::vector::const_iterator cai; unsigned int i = 0; for (cai = confIds->begin(); cai != confIds->end(); cai++) { i += 1; if (i == 1) { continue; } Conformer &conf = mol.getConformer(*cai); _fillAtomPositions(prbPoints, conf, atomIds); ssd = RDNumeric::Alignments::AlignPoints(refPoints, prbPoints, trans, weights, reflect, maxIters); if (RMSlist) { ssd /= (prbPoints.size()); RMSlist->push_back(sqrt(ssd)); } MolTransforms::transformConformer(conf, trans); } } } } // namespace MolAlign } // namespace RDKit