/** * TM-align algorithm implementation for PyMOL. * * Ported from USalign (Zhang lab) to modern C++17 with glm math types. * Original algorithm: Zhang & Skolnick, Nucl. Acids Res. 33, 2302 (2005) * * Strips all file I/O — coordinates come from PyMOL selections. * Replaces double** with std::vector, NewArray/DeleteArray with * RAII containers, scattered output params with result structs. */ /* ============================================================================== US-align: universal structure alignment of monomeric and complex proteins and nucleic acids This program was written by Chengxin Zhang at Yang Zhang lab, Department of Computational Medicine and Bioinformatics, University of Michigan, 100 Washtenaw Ave, Ann Arbor, MI 48109-2218. Please report issues to zhanglab@zhanggroup.org References: * C Zhang, M Shine, A Pyle, Y Zhang (2022) Nat Methods. 19(9), 1109-1115. * C Zhang, L Freddolino, Y Zhang (2025) Nature Protocols, https://doi.org/10.1038/s41596-025-01189-x DISCLAIMER: Permission to use, copy, modify, and distribute this program for any purpose, with or without fee, is hereby granted, provided that the notices on the head, the reference information, and this copyright notice appear in all copies or substantial portions of the Software. It is provided "as is" without express or implied warranty. =============================================================================== */ #include "USalign.h" #include #include #include #include #include namespace pymol::usalign { // ======================================================================== // DPWorkspace // ======================================================================== void DPWorkspace::resize(int xlen, int ylen) { rows = xlen + 1; cols = ylen + 1; score_flat.assign(static_cast(rows) * cols, 0.0); val_flat.assign(static_cast(rows) * cols, 0.0); path_flat.assign(static_cast(rows) * cols, false); int minlen = std::min(xlen, ylen); xtm.resize(minlen); ytm.resize(minlen); xt.resize(xlen); r1.resize(minlen); r2.resize(minlen); } // ======================================================================== // Parameter initialization (from param_set.h) // ======================================================================== struct TMParameters { double d0 = 0.0; double d0_search = 0.0; double Lnorm = 0.0; double score_d8 = 0.0; double D0_MIN = 0.0; double dcu0 = 0.0; }; static TMParameters compute_search_params(int xlen, int ylen) { TMParameters p; p.D0_MIN = 0.5; p.dcu0 = 4.25; p.Lnorm = std::min(xlen, ylen); if (p.Lnorm <= 19) p.d0 = 0.168; else p.d0 = 1.24 * std::pow(p.Lnorm - 15.0, 1.0 / 3.0) - 1.8; // During the search phase, d0 is inflated by 0.8 compared to final scoring. // This wider distance cutoff helps the iterative refinement explore more seeds. p.D0_MIN = p.d0 + 0.8; p.d0 = p.D0_MIN; p.d0_search = p.d0; if (p.d0_search > 8.0) p.d0_search = 8.0; if (p.d0_search < 4.5) p.d0_search = 4.5; p.score_d8 = 1.5 * std::pow(p.Lnorm, 0.3) + 3.5; return p; } static void compute_final_params( double len, double& D0_MIN, double& Lnorm, double& d0, double& d0_search) { D0_MIN = 0.5; Lnorm = len; if (Lnorm <= 21) d0 = 0.5; else d0 = 1.24 * std::pow(Lnorm - 15.0, 1.0 / 3.0) - 1.8; if (d0 < D0_MIN) d0 = D0_MIN; d0_search = d0; if (d0_search > 8.0) d0_search = 8.0; if (d0_search < 4.5) d0_search = 4.5; } // ======================================================================== // Geometry utilities // ======================================================================== static inline double dist2(const glm::dvec3& a, const glm::dvec3& b) { auto d = a - b; return glm::dot(d, d); } static inline glm::dvec3 apply_transform( const glm::dvec3& v, const Superposition& sup) { return sup.rotation * v + sup.translation; } static void do_rotation(const std::vector& src, std::vector& dst, int n, const Superposition& sup) { for (int i = 0; i < n; i++) dst[i] = apply_transform(src[i], sup); } // ======================================================================== // Kabsch superposition (from Kabsch.h) // ======================================================================== static bool kabsch(const std::vector& x, const std::vector& y, int n, int mode, double& rms, Superposition& sup) { double e0, rms1, d, h, g; double cth, sth, sqrth, p, det, sigma; double xc[3], yc[3]; double a[3][3], b[3][3], r[3][3], e[3], rr[6], ss[6]; const double sqrt3 = 1.73205080756888; const double tol = 0.01; const int ip[] = {0, 1, 3, 1, 2, 4, 3, 4, 5}; const int ip2312[] = {1, 2, 0, 1}; int a_failed = 0, b_failed = 0; const double epsilon = 0.00000001; rms = 0; rms1 = 0; e0 = 0; double t[3]; double u[3][3]; double s1[3] = {0, 0, 0}; double s2[3] = {0, 0, 0}; double sx[3] = {0, 0, 0}; double sy[3] = {0, 0, 0}; double sz[3] = {0, 0, 0}; for (int i = 0; i < 3; i++) { xc[i] = 0.0; yc[i] = 0.0; t[i] = 0.0; for (int j = 0; j < 3; j++) { u[i][j] = (i == j) ? 1.0 : 0.0; r[i][j] = 0.0; a[i][j] = (i == j) ? 1.0 : 0.0; } } if (n < 1) return false; // compute centers for (int i = 0; i < n; i++) { double c1[3] = {x[i].x, x[i].y, x[i].z}; double c2[3] = {y[i].x, y[i].y, y[i].z}; for (int j = 0; j < 3; j++) { s1[j] += c1[j]; s2[j] += c2[j]; } for (int j = 0; j < 3; j++) { sx[j] += c1[0] * c2[j]; sy[j] += c1[1] * c2[j]; sz[j] += c1[2] * c2[j]; } } for (int i = 0; i < 3; i++) { xc[i] = s1[i] / n; yc[i] = s2[i] / n; } if (mode == 2 || mode == 0) { for (int mm = 0; mm < n; mm++) { double xx[3] = {x[mm].x, x[mm].y, x[mm].z}; double yy[3] = {y[mm].x, y[mm].y, y[mm].z}; for (int nn = 0; nn < 3; nn++) e0 += (xx[nn] - xc[nn]) * (xx[nn] - xc[nn]) + (yy[nn] - yc[nn]) * (yy[nn] - yc[nn]); } } for (int j = 0; j < 3; j++) { r[j][0] = sx[j] - s1[0] * s2[j] / n; r[j][1] = sy[j] - s1[1] * s2[j] / n; r[j][2] = sz[j] - s1[2] * s2[j] / n; } // compute determinant det = r[0][0] * (r[1][1] * r[2][2] - r[1][2] * r[2][1]) - r[0][1] * (r[1][0] * r[2][2] - r[1][2] * r[2][0]) + r[0][2] * (r[1][0] * r[2][1] - r[1][1] * r[2][0]); sigma = det; // compute trans(r)*r int m = 0; for (int j = 0; j < 3; j++) for (int i = 0; i <= j; i++) rr[m++] = r[0][i] * r[0][j] + r[1][i] * r[1][j] + r[2][i] * r[2][j]; double spur = (rr[0] + rr[2] + rr[5]) / 3.0; double cof = (((((rr[2] * rr[5] - rr[4] * rr[4]) + rr[0] * rr[5]) - rr[3] * rr[3]) + rr[0] * rr[2]) - rr[1] * rr[1]) / 3.0; det = det * det; for (int i = 0; i < 3; i++) e[i] = spur; if (spur > 0) { d = spur * spur; h = d - cof; g = (spur * cof - det) / 2.0 - spur * h; if (h > 0) { sqrth = std::sqrt(h); d = h * h * h - g * g; if (d < 0.0) d = 0.0; d = std::atan2(std::sqrt(d), -g) / 3.0; cth = sqrth * std::cos(d); sth = sqrth * sqrt3 * std::sin(d); e[0] = (spur + cth) + cth; e[1] = (spur - cth) + sth; e[2] = (spur - cth) - sth; if (mode != 0) { for (int l = 0; l < 3; l += 2) { d = e[l]; ss[0] = (d - rr[2]) * (d - rr[5]) - rr[4] * rr[4]; ss[1] = (d - rr[5]) * rr[1] + rr[3] * rr[4]; ss[2] = (d - rr[0]) * (d - rr[5]) - rr[3] * rr[3]; ss[3] = (d - rr[2]) * rr[3] + rr[1] * rr[4]; ss[4] = (d - rr[0]) * rr[4] + rr[1] * rr[3]; ss[5] = (d - rr[0]) * (d - rr[2]) - rr[1] * rr[1]; for (int i = 0; i < 6; i++) if (std::fabs(ss[i]) <= epsilon) ss[i] = 0.0; int j; if (std::fabs(ss[0]) >= std::fabs(ss[2])) { j = 0; if (std::fabs(ss[0]) < std::fabs(ss[5])) j = 2; } else if (std::fabs(ss[2]) >= std::fabs(ss[5])) j = 1; else j = 2; d = 0.0; j = 3 * j; for (int i = 0; i < 3; i++) { int k = ip[i + j]; a[i][l] = ss[k]; d += ss[k] * ss[k]; } if (d > epsilon) d = 1.0 / std::sqrt(d); else d = 0.0; for (int i = 0; i < 3; i++) a[i][l] *= d; } d = a[0][0] * a[0][2] + a[1][0] * a[1][2] + a[2][0] * a[2][2]; int m1, m0; if ((e[0] - e[1]) > (e[1] - e[2])) { m1 = 2; m0 = 0; } else { m1 = 0; m0 = 2; } p = 0; for (int i = 0; i < 3; i++) { a[i][m1] -= d * a[i][m0]; p += a[i][m1] * a[i][m1]; } if (p <= tol) { p = 1.0; int j = 0; for (int i = 0; i < 3; i++) { if (p < std::fabs(a[i][m0])) continue; p = std::fabs(a[i][m0]); j = i; } int k = ip2312[j]; int l = ip2312[j + 1]; p = std::sqrt(a[k][m0] * a[k][m0] + a[l][m0] * a[l][m0]); if (p > tol) { a[j][m1] = 0.0; a[k][m1] = -a[l][m0] / p; a[l][m1] = a[k][m0] / p; } else a_failed = 1; } else { p = 1.0 / std::sqrt(p); for (int i = 0; i < 3; i++) a[i][m1] *= p; } if (a_failed != 1) { a[0][1] = a[1][2] * a[2][0] - a[1][0] * a[2][2]; a[1][1] = a[2][2] * a[0][0] - a[2][0] * a[0][2]; a[2][1] = a[0][2] * a[1][0] - a[0][0] * a[1][2]; } } } if (mode != 0 && a_failed != 1) { for (int l = 0; l < 2; l++) { d = 0.0; for (int i = 0; i < 3; i++) { b[i][l] = r[i][0] * a[0][l] + r[i][1] * a[1][l] + r[i][2] * a[2][l]; d += b[i][l] * b[i][l]; } if (d > epsilon) d = 1.0 / std::sqrt(d); else d = 0.0; for (int i = 0; i < 3; i++) b[i][l] *= d; } d = b[0][0] * b[0][1] + b[1][0] * b[1][1] + b[2][0] * b[2][1]; p = 0.0; for (int i = 0; i < 3; i++) { b[i][1] -= d * b[i][0]; p += b[i][1] * b[i][1]; } if (p <= tol) { p = 1.0; int j = 0; for (int i = 0; i < 3; i++) { if (p < std::fabs(b[i][0])) continue; p = std::fabs(b[i][0]); j = i; } int k = ip2312[j]; int l = ip2312[j + 1]; p = std::sqrt(b[k][0] * b[k][0] + b[l][0] * b[l][0]); if (p > tol) { b[j][1] = 0.0; b[k][1] = -b[l][0] / p; b[l][1] = b[k][0] / p; } else b_failed = 1; } else { p = 1.0 / std::sqrt(p); for (int i = 0; i < 3; i++) b[i][1] *= p; } if (b_failed != 1) { b[0][2] = b[1][0] * b[2][1] - b[1][1] * b[2][0]; b[1][2] = b[2][0] * b[0][1] - b[2][1] * b[0][0]; b[2][2] = b[0][0] * b[1][1] - b[0][1] * b[1][0]; for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) u[i][j] = b[i][0] * a[j][0] + b[i][1] * a[j][1] + b[i][2] * a[j][2]; } for (int i = 0; i < 3; i++) t[i] = ((yc[i] - u[i][0] * xc[0]) - u[i][1] * xc[1]) - u[i][2] * xc[2]; } } else { for (int i = 0; i < 3; i++) t[i] = ((yc[i] - u[i][0] * xc[0]) - u[i][1] * xc[1]) - u[i][2] * xc[2]; } // compute rms for (int i = 0; i < 3; i++) { if (e[i] < 0) e[i] = 0; e[i] = std::sqrt(e[i]); } d = e[2]; if (sigma < 0.0) d = -d; d = (d + e[1]) + e[0]; if (mode == 2 || mode == 0) { rms1 = (e0 - d) - d; if (rms1 < 0.0) rms1 = 0.0; } rms = rms1; // convert to glm types sup.translation = glm::dvec3(t[0], t[1], t[2]); sup.rotation = glm::dmat3(u[0][0], u[1][0], u[2][0], u[0][1], u[1][1], u[2][1], u[0][2], u[1][2], u[2][2]); return true; } // ======================================================================== // Secondary structure assignment (from se.h:make_sec) // ======================================================================== static char sec_str_classify(double dis13, double dis14, double dis15, double dis24, double dis25, double dis35) { double delta = 2.1; if (std::fabs(dis15 - 6.37) < delta && std::fabs(dis14 - 5.18) < delta && std::fabs(dis25 - 5.18) < delta && std::fabs(dis13 - 5.45) < delta && std::fabs(dis24 - 5.45) < delta && std::fabs(dis35 - 5.45) < delta) return 'H'; delta = 1.42; if (std::fabs(dis15 - 13.0) < delta && std::fabs(dis14 - 10.4) < delta && std::fabs(dis25 - 10.4) < delta && std::fabs(dis13 - 6.1) < delta && std::fabs(dis24 - 6.1) < delta && std::fabs(dis35 - 6.1) < delta) return 'E'; if (dis15 < 8) return 'T'; return 'C'; } static void assign_secondary_structure( const std::vector& ca, std::vector& sec) { int len = static_cast(ca.size()); sec.assign(len, 'C'); for (int i = 0; i < len; i++) { int j1 = i - 2, j2 = i - 1, j3 = i, j4 = i + 1, j5 = i + 2; if (j1 >= 0 && j5 < len) { double d13 = std::sqrt(dist2(ca[j1], ca[j3])); double d14 = std::sqrt(dist2(ca[j1], ca[j4])); double d15 = std::sqrt(dist2(ca[j1], ca[j5])); double d24 = std::sqrt(dist2(ca[j2], ca[j4])); double d25 = std::sqrt(dist2(ca[j2], ca[j5])); double d35 = std::sqrt(dist2(ca[j3], ca[j5])); sec[i] = sec_str_classify(d13, d14, d15, d24, d25, d35); } } } // ======================================================================== // Scoring functions (from TMalign.h:score_fun8) // ======================================================================== // Lnorm > 0: normalize by Lnorm (used during main search) // Lnorm <= 0: normalize by n_ali (used during standard/final scoring) static int score_fun8(const std::vector& xa, const std::vector& ya, int n_ali, double d, std::vector& i_ali, double& score1, int score_sum_method, double score_d8, double d0, double Lnorm = 0.0) { double score_sum = 0; double d_tmp = d * d; double d02 = d0 * d0; double score_d8_cut = score_d8 * score_d8; int n_cut, inc = 0; while (true) { n_cut = 0; score_sum = 0; for (int i = 0; i < n_ali; i++) { double di = dist2(xa[i], ya[i]); if (di < d_tmp) { i_ali[n_cut] = i; n_cut++; } if (score_sum_method == 8) { if (di <= score_d8_cut) score_sum += 1.0 / (1.0 + di / d02); } else score_sum += 1.0 / (1.0 + di / d02); } if (n_cut < 3 && n_ali > 3) { inc++; double dinc = d + inc * 0.5; d_tmp = dinc * dinc; } else break; } score1 = score_sum / (Lnorm > 0.0 ? Lnorm : n_ali); return n_cut; } // ======================================================================== // TMscore8_search — iterative fragment refinement (from TMalign.h) // ======================================================================== // Lnorm > 0: normalize by Lnorm (used during main search) // Lnorm <= 0: normalize by n_ali (used during standard/final scoring) static double TMscore8_search(DPWorkspace& ws, int Lali, Superposition& sup_out, int simplify_step, int score_sum_method, double local_d0_search, double score_d8, double d0, double Lnorm = 0.0) { double score_max = -1, score, rmsd; const int n_it = 20; const int n_init_max = 6; int L_ini[6]; int L_ini_min = 4; if (Lali < L_ini_min) L_ini_min = Lali; int n_init = 0; int i; for (i = 0; i < n_init_max - 1; i++) { n_init++; L_ini[i] = static_cast(Lali / std::pow(2.0, static_cast(i))); if (L_ini[i] <= L_ini_min) { L_ini[i] = L_ini_min; break; } } if (i == n_init_max - 1) { n_init++; L_ini[i] = L_ini_min; } std::vector i_ali(Lali); std::vector k_ali(Lali); Superposition sup_tmp; for (int i_init = 0; i_init < n_init; i_init++) { int L_frag = L_ini[i_init]; int iL_max = Lali - L_frag; i = 0; while (true) { for (int k = 0; k < L_frag; k++) { int kk = k + i; ws.r1[k] = ws.xtm[kk]; ws.r2[k] = ws.ytm[kk]; k_ali[k] = kk; } kabsch(ws.r1, ws.r2, L_frag, 1, rmsd, sup_tmp); do_rotation(ws.xtm, ws.xt, Lali, sup_tmp); double d_cut = local_d0_search - 1; int n_cut = score_fun8(ws.xt, ws.ytm, Lali, d_cut, i_ali, score, score_sum_method, score_d8, d0, Lnorm); if (score > score_max) { score_max = score; sup_out = sup_tmp; } // iterative refinement d_cut = local_d0_search + 1; for (int it = 0; it < n_it; it++) { int ka = n_cut; for (int k = 0; k < n_cut; k++) { int m = i_ali[k]; ws.r1[k] = ws.xtm[m]; ws.r2[k] = ws.ytm[m]; k_ali[k] = m; } kabsch(ws.r1, ws.r2, n_cut, 1, rmsd, sup_tmp); do_rotation(ws.xtm, ws.xt, Lali, sup_tmp); n_cut = score_fun8(ws.xt, ws.ytm, Lali, d_cut, i_ali, score, score_sum_method, score_d8, d0, Lnorm); if (score > score_max) { score_max = score; sup_out = sup_tmp; } if (n_cut == ka) { int k; for (k = 0; k < n_cut; k++) if (i_ali[k] != k_ali[k]) break; if (k == n_cut) break; } } if (i < iL_max) { i += simplify_step; if (i > iL_max) i = iL_max; } else break; } } return score_max; } // ======================================================================== // Detailed search (from TMalign.h:detailed_search) // ======================================================================== // Lnorm > 0: normalize by Lnorm (used during main search) // Lnorm <= 0: normalize by n_ali (used during standard/final scoring) static double detailed_search(DPWorkspace& ws, const std::vector& x, const std::vector& y, int xlen, int ylen, const std::vector& invmap, Superposition& sup, int simplify_step, int score_sum_method, double local_d0_search, double score_d8, double d0, double Lnorm = 0.0) { int k = 0; for (int i = 0; i < ylen; i++) { int j = invmap[i]; if (j >= 0) { ws.xtm[k] = x[j]; ws.ytm[k] = y[i]; k++; } } return TMscore8_search( ws, k, sup, simplify_step, score_sum_method, local_d0_search, score_d8, d0, Lnorm); } // ======================================================================== // Get score fast — quick 3-pass evaluation (from TMalign.h:get_score_fast) // ======================================================================== static double get_score_fast(DPWorkspace& ws, const std::vector& x, const std::vector& y, int xlen, int ylen, const std::vector& invmap, double d0, double d0_search, Superposition& sup) { double rms; int k = 0; for (int j = 0; j < ylen; j++) { int i = invmap[j]; if (i >= 0) { ws.r1[k] = x[i]; ws.r2[k] = y[j]; ws.xtm[k] = x[i]; ws.ytm[k] = y[j]; k++; } } kabsch(ws.r1, ws.r2, k, 1, rms, sup); double d02 = d0 * d0; double d00 = d0_search; double d002 = d00 * d00; int n_ali = k; std::vector dis(n_ali); double tmscore = 0; for (int kk = 0; kk < n_ali; kk++) { auto xrot = apply_transform(ws.xtm[kk], sup); double di = dist2(xrot, ws.ytm[kk]); dis[kk] = di; tmscore += 1.0 / (1.0 + di / d02); } // second iteration double d002t = d002; { std::vector dis_sorted(dis.begin(), dis.end()); std::sort(dis_sorted.begin(), dis_sorted.end()); if (n_ali > 2 && d002t < dis_sorted[2]) d002t = dis_sorted[2]; } while (true) { int j = 0; for (int kk = 0; kk < n_ali; kk++) { if (dis[kk] <= d002t) { ws.r1[j] = ws.xtm[kk]; ws.r2[j] = ws.ytm[kk]; j++; } } if (j < 3 && n_ali > 3) d002t += 0.5; else break; } double tmscore1, tmscore2; int j_count; { // count how many passed j_count = 0; for (int kk = 0; kk < n_ali; kk++) if (dis[kk] <= d002t) j_count++; } if (n_ali != j_count) { Superposition sup2; kabsch(ws.r1, ws.r2, j_count, 1, rms, sup2); tmscore1 = 0; for (int kk = 0; kk < n_ali; kk++) { auto xrot = apply_transform(ws.xtm[kk], sup2); double di = dist2(xrot, ws.ytm[kk]); dis[kk] = di; tmscore1 += 1.0 / (1.0 + di / d02); } // third iteration d002t = d002 + 1; { std::vector dis_sorted(dis.begin(), dis.end()); std::sort(dis_sorted.begin(), dis_sorted.end()); if (n_ali > 2 && d002t < dis_sorted[2]) d002t = dis_sorted[2]; } while (true) { int j = 0; for (int kk = 0; kk < n_ali; kk++) { if (dis[kk] <= d002t) { ws.r1[j] = ws.xtm[kk]; ws.r2[j] = ws.ytm[kk]; j++; } } if (j < 3 && n_ali > 3) d002t += 0.5; else { j_count = j; break; } } Superposition sup3; kabsch(ws.r1, ws.r2, j_count, 1, rms, sup3); tmscore2 = 0; for (int kk = 0; kk < n_ali; kk++) { auto xrot = apply_transform(ws.xtm[kk], sup3); double di = dist2(xrot, ws.ytm[kk]); tmscore2 += 1.0 / (1.0 + di / d02); } } else { tmscore1 = tmscore; tmscore2 = tmscore; } if (tmscore1 >= tmscore) tmscore = tmscore1; if (tmscore2 >= tmscore) tmscore = tmscore2; return tmscore; } // ======================================================================== // Needleman-Wunsch DP variants (from NW.h) // ======================================================================== // NW with precomputed score matrix static void nw_align_score( DPWorkspace& ws, int len1, int len2, double gap_open, std::vector& j2i) { for (int i = 0; i <= len1; i++) { ws.val(i, 0) = 0; ws.set_path(i, 0, false); } for (int j = 0; j <= len2; j++) { ws.val(0, j) = 0; ws.set_path(0, j, false); } for (int j = 0; j < len2; j++) j2i[j] = -1; for (int i = 1; i <= len1; i++) { for (int j = 1; j <= len2; j++) { double d = ws.val(i - 1, j - 1) + ws.score(i, j); double h = ws.val(i - 1, j); if (ws.path(i - 1, j)) h += gap_open; double v = ws.val(i, j - 1); if (ws.path(i, j - 1)) v += gap_open; if (d >= h && d >= v) { ws.set_path(i, j, true); ws.val(i, j) = d; } else { ws.set_path(i, j, false); ws.val(i, j) = (v >= h) ? v : h; } } } // traceback int i = len1, j = len2; while (i > 0 && j > 0) { if (ws.path(i, j)) { j2i[j - 1] = i - 1; i--; j--; } else { double h = ws.val(i - 1, j); if (ws.path(i - 1, j)) h += gap_open; double v = ws.val(i, j - 1); if (ws.path(i, j - 1)) v += gap_open; if (v >= h) j--; else i--; } } } // NW with geometric scoring using rotation static void nw_align_transform(DPWorkspace& ws, const std::vector& x, const std::vector& y, int len1, int len2, const Superposition& sup, double d02, double gap_open, std::vector& j2i) { for (int i = 0; i <= len1; i++) { ws.val(i, 0) = 0; ws.set_path(i, 0, false); } for (int j = 0; j <= len2; j++) { ws.val(0, j) = 0; ws.set_path(0, j, false); } for (int j = 0; j < len2; j++) j2i[j] = -1; for (int i = 1; i <= len1; i++) { auto xx = apply_transform(x[i - 1], sup); for (int j = 1; j <= len2; j++) { double dij = dist2(xx, y[j - 1]); double d = ws.val(i - 1, j - 1) + 1.0 / (1.0 + dij / d02); double h = ws.val(i - 1, j); if (ws.path(i - 1, j)) h += gap_open; double v = ws.val(i, j - 1); if (ws.path(i, j - 1)) v += gap_open; if (d >= h && d >= v) { ws.set_path(i, j, true); ws.val(i, j) = d; } else { ws.set_path(i, j, false); ws.val(i, j) = (v >= h) ? v : h; } } } int i = len1, j = len2; while (i > 0 && j > 0) { if (ws.path(i, j)) { j2i[j - 1] = i - 1; i--; j--; } else { double h = ws.val(i - 1, j); if (ws.path(i - 1, j)) h += gap_open; double v = ws.val(i, j - 1); if (ws.path(i, j - 1)) v += gap_open; if (v >= h) j--; else i--; } } } // NW with secondary structure matching static void nw_align_ss(DPWorkspace& ws, const std::vector& secx, const std::vector& secy, int len1, int len2, double gap_open, std::vector& j2i) { for (int i = 0; i <= len1; i++) { ws.val(i, 0) = 0; ws.set_path(i, 0, false); } for (int j = 0; j <= len2; j++) { ws.val(0, j) = 0; ws.set_path(0, j, false); } for (int j = 0; j < len2; j++) j2i[j] = -1; for (int i = 1; i <= len1; i++) { for (int j = 1; j <= len2; j++) { double d = ws.val(i - 1, j - 1) + 1.0 * (secx[i - 1] == secy[j - 1]); double h = ws.val(i - 1, j); if (ws.path(i - 1, j)) h += gap_open; double v = ws.val(i, j - 1); if (ws.path(i, j - 1)) v += gap_open; if (d >= h && d >= v) { ws.set_path(i, j, true); ws.val(i, j) = d; } else { ws.set_path(i, j, false); ws.val(i, j) = (v >= h) ? v : h; } } } int i = len1, j = len2; while (i > 0 && j > 0) { if (ws.path(i, j)) { j2i[j - 1] = i - 1; i--; j--; } else { double h = ws.val(i - 1, j); if (ws.path(i - 1, j)) h += gap_open; double v = ws.val(i, j - 1); if (ws.path(i, j - 1)) v += gap_open; if (v >= h) j--; else i--; } } } // ======================================================================== // Initial alignment seeds // ======================================================================== // Seed 1: gapless threading static double get_initial(DPWorkspace& ws, const std::vector& x, const std::vector& y, int xlen, int ylen, std::vector& y2x, double d0, double d0_search, bool fast_opt, Superposition& sup) { int min_len = std::min(xlen, ylen); int min_ali = min_len / 2; if (min_ali <= 5) min_ali = 5; int n1 = -ylen + min_ali; int n2 = xlen - min_ali; int k_best = n1; double tmscore_max = -1; for (int k = n1; k <= n2; k += (fast_opt ? 5 : 1)) { for (int j = 0; j < ylen; j++) { int i = j + k; y2x[j] = (i >= 0 && i < xlen) ? i : -1; } double tmscore = get_score_fast(ws, x, y, xlen, ylen, y2x, d0, d0_search, sup); if (tmscore >= tmscore_max) { tmscore_max = tmscore; k_best = k; } } for (int j = 0; j < ylen; j++) { int i = j + k_best; y2x[j] = (i >= 0 && i < xlen) ? i : -1; } return tmscore_max; } // Seed 2: secondary structure alignment static void get_initial_ss(DPWorkspace& ws, const std::vector& secx, const std::vector& secy, int xlen, int ylen, std::vector& y2x) { nw_align_ss(ws, secx, secy, xlen, ylen, -1.0, y2x); } // Seed 3: local fragment superposition static bool get_initial5(DPWorkspace& ws, const std::vector& x, const std::vector& y, int xlen, int ylen, std::vector& y2x, double d0, double d0_search, bool fast_opt, double D0_MIN) { double rmsd; Superposition sup; double d01 = d0 + 1.5; if (d01 < D0_MIN) d01 = D0_MIN; double d02 = d01 * d01; double GLmax = 0; int aL = std::min(xlen, ylen); std::vector invmap(ylen, -1); int n_jump1; if (xlen > 250) n_jump1 = 45; else if (xlen > 200) n_jump1 = 35; else if (xlen > 150) n_jump1 = 25; else n_jump1 = 15; if (n_jump1 > xlen / 3) n_jump1 = xlen / 3; int n_jump2; if (ylen > 250) n_jump2 = 45; else if (ylen > 200) n_jump2 = 35; else if (ylen > 150) n_jump2 = 25; else n_jump2 = 15; if (n_jump2 > ylen / 3) n_jump2 = ylen / 3; int n_frag[2] = {20, 100}; if (n_frag[0] > aL / 3) n_frag[0] = aL / 3; if (n_frag[1] > aL / 2) n_frag[1] = aL / 2; if (fast_opt) { n_jump1 *= 5; n_jump2 *= 5; } bool flag = false; for (int i_frag = 0; i_frag < 2; i_frag++) { int m1 = xlen - n_frag[i_frag] + 1; int m2 = ylen - n_frag[i_frag] + 1; for (int i = 0; i < m1; i += n_jump1) { for (int j = 0; j < m2; j += n_jump2) { for (int k = 0; k < n_frag[i_frag]; k++) { ws.r1[k] = x[k + i]; ws.r2[k] = y[k + j]; } kabsch(ws.r1, ws.r2, n_frag[i_frag], 1, rmsd, sup); nw_align_transform(ws, x, y, xlen, ylen, sup, d02, 0.0, invmap); double GL = get_score_fast(ws, x, y, xlen, ylen, invmap, d0, d0_search, sup); if (GL > GLmax) { GLmax = GL; for (int ii = 0; ii < ylen; ii++) y2x[ii] = invmap[ii]; flag = true; } } } } return flag; } // Score matrix for ssplus seed static void score_matrix_rmsd_sec(DPWorkspace& ws, const std::vector& secx, const std::vector& secy, const std::vector& x, const std::vector& y, int xlen, int ylen, const std::vector& y2x, double D0_MIN, double d0) { Superposition sup; double rmsd; double d01 = d0 + 1.5; if (d01 < D0_MIN) d01 = D0_MIN; double d02 = d01 * d01; int k = 0; for (int j = 0; j < ylen; j++) { int i = y2x[j]; if (i >= 0) { ws.r1[k] = x[i]; ws.r2[k] = y[j]; k++; } } kabsch(ws.r1, ws.r2, k, 1, rmsd, sup); for (int ii = 0; ii < xlen; ii++) { auto xx = apply_transform(x[ii], sup); for (int jj = 0; jj < ylen; jj++) { double dij = dist2(xx, y[jj]); if (secx[ii] == secy[jj]) ws.score(ii + 1, jj + 1) = 1.0 / (1.0 + dij / d02) + 0.5; else ws.score(ii + 1, jj + 1) = 1.0 / (1.0 + dij / d02); } } } // Seed 4: secondary structure plus structural distance static void get_initial_ssplus(DPWorkspace& ws, const std::vector& secx, const std::vector& secy, const std::vector& x, const std::vector& y, int xlen, int ylen, const std::vector& y2x0, std::vector& y2x, double D0_MIN, double d0) { score_matrix_rmsd_sec(ws, secx, secy, x, y, xlen, ylen, y2x0, D0_MIN, d0); nw_align_score(ws, xlen, ylen, -1.0, y2x); } // Find max continuous fragment static void find_max_frag(const std::vector& x, int len, int& start_max, int& end_max, double dcu0, bool fast_opt) { int fra_min = fast_opt ? 8 : 4; int r_min = static_cast(len / 3.0); if (r_min > fra_min) r_min = fra_min; int Lfr_max = 0; double dcu_cut = dcu0 * dcu0; int inc = 0; while (Lfr_max < r_min) { Lfr_max = 0; int j = 1; int start = 0; for (int i = 1; i < len; i++) { if (dist2(x[i - 1], x[i]) < dcu_cut) { j++; if (i == len - 1) { if (j > Lfr_max) { Lfr_max = j; start_max = start; end_max = i; } j = 1; } } else { if (j > Lfr_max) { Lfr_max = j; start_max = start; end_max = i - 1; } j = 1; start = i; } } if (Lfr_max < r_min) { inc++; double dinc = std::pow(1.1, static_cast(inc)) * dcu0; dcu_cut = dinc * dinc; } } } // Seed 5: fragment gapless threading static double get_initial_fgt(DPWorkspace& ws, const std::vector& x, const std::vector& y, int xlen, int ylen, std::vector& y2x, double d0, double d0_search, double dcu0, bool fast_opt, Superposition& sup) { int fra_min = fast_opt ? 8 : 4; int fra_min1 = fra_min - 1; int xstart = 0, ystart = 0, xend = 0, yend = 0; find_max_frag(x, xlen, xstart, xend, dcu0, fast_opt); find_max_frag(y, ylen, ystart, yend, dcu0, fast_opt); int Lx = xend - xstart + 1; int Ly = yend - ystart + 1; int L_fr = std::min(Lx, Ly); std::vector ifr(L_fr); std::vector y2x_(ylen, -1); if (Lx < Ly || (Lx == Ly && xlen <= ylen)) { for (int i = 0; i < L_fr; i++) ifr[i] = xstart + i; } else { for (int i = 0; i < L_fr; i++) ifr[i] = ystart + i; } int L0 = std::min(xlen, ylen); if (L_fr == L0) { int n1 = static_cast(L0 * 0.1); int n2 = static_cast(L0 * 0.89); int j = 0; for (int i = n1; i <= n2; i++) ifr[j++] = ifr[i]; L_fr = j; } double tmscore_max = -1; if (Lx < Ly || (Lx == Ly && xlen <= ylen)) { int L1 = L_fr; int min_len = std::min(L1, ylen); int min_ali = static_cast(min_len / 2.5); if (min_ali <= fra_min1) min_ali = fra_min1; int n1 = -ylen + min_ali; int n2 = L1 - min_ali; for (int k = n1; k <= n2; k += (fast_opt ? 3 : 1)) { for (int j = 0; j < ylen; j++) { int i = j + k; y2x_[j] = (i >= 0 && i < L1) ? ifr[i] : -1; } double tmscore = get_score_fast(ws, x, y, xlen, ylen, y2x_, d0, d0_search, sup); if (tmscore >= tmscore_max) { tmscore_max = tmscore; for (int j = 0; j < ylen; j++) y2x[j] = y2x_[j]; } } } else { int L2 = L_fr; int min_len = std::min(xlen, L2); int min_ali = static_cast(min_len / 2.5); if (min_ali <= fra_min1) min_ali = fra_min1; int n1 = -L2 + min_ali; int n2 = xlen - min_ali; for (int k = n1; k <= n2; k++) { for (int j = 0; j < ylen; j++) y2x_[j] = -1; for (int j = 0; j < L2; j++) { int i = j + k; if (i >= 0 && i < xlen) y2x_[ifr[j]] = i; } double tmscore = get_score_fast(ws, x, y, xlen, ylen, y2x_, d0, d0_search, sup); if (tmscore >= tmscore_max) { tmscore_max = tmscore; for (int j = 0; j < ylen; j++) y2x[j] = y2x_[j]; } } } return tmscore_max; } // ======================================================================== // DP_iter — iterative DP refinement (from TMalign.h:DP_iter) // ======================================================================== static double DP_iter(DPWorkspace& ws, const std::vector& x, const std::vector& y, int xlen, int ylen, Superposition& sup, std::vector& invmap0, int g1, int g2, int iteration_max, double local_d0_search, double D0_MIN, double Lnorm, double d0, double score_d8) { double gap_open[2] = {-0.6, 0}; double rmsd; std::vector invmap(ylen, -1); double tmscore_max = -1, tmscore_old = 0; int score_sum_method = 8, simplify_step = 40; double d02 = d0 * d0; for (int g = g1; g < g2; g++) { for (int iteration = 0; iteration < iteration_max; iteration++) { nw_align_transform(ws, x, y, xlen, ylen, sup, d02, gap_open[g], invmap); int k = 0; for (int j = 0; j < ylen; j++) { int i = invmap[j]; if (i >= 0) { ws.xtm[k] = x[i]; ws.ytm[k] = y[j]; k++; } } double tmscore = TMscore8_search(ws, k, sup, simplify_step, score_sum_method, local_d0_search, score_d8, d0, Lnorm); if (tmscore > tmscore_max) { tmscore_max = tmscore; for (int i = 0; i < ylen; i++) invmap0[i] = invmap[i]; } if (iteration > 0) { if (std::fabs(tmscore_old - tmscore) < 0.000001) break; } tmscore_old = tmscore; } } return tmscore_max; } // ======================================================================== // Main orchestrator: tm_align // ======================================================================== TMAlignResult TMalign(const std::vector& target_ca, const std::vector& mobile_ca, const std::string& target_seq, const std::string& mobile_seq, bool fast) { // In USalign convention: x = mobile (structure to move), y = target // The invmap maps target indices to mobile indices: invmap[j_target] = // i_mobile const auto& x = mobile_ca; const auto& y = target_ca; const auto& seqx = mobile_seq; const auto& seqy = target_seq; int xlen = static_cast(x.size()); int ylen = static_cast(y.size()); TMAlignResult result; if (xlen < 3 || ylen < 3) return result; // Secondary structure std::vector secx, secy; assign_secondary_structure(x, secx); assign_secondary_structure(y, secy); // Allocate workspace DPWorkspace ws; ws.resize(xlen, ylen); // Parameters auto params = compute_search_params(xlen, ylen); int simplify_step = 40; int score_sum_method = 8; std::vector invmap0(ylen, -1); std::vector invmap(ylen, -1); double TMmax = -1; Superposition sup, sup_best; double ddcc = (params.Lnorm <= 40) ? 0.1 : 0.4; double local_d0_search = params.d0_search; // ============================================ // Seed 1: gapless threading // ============================================ get_initial( ws, x, y, xlen, ylen, invmap0, params.d0, params.d0_search, fast, sup); double TM = detailed_search(ws, x, y, xlen, ylen, invmap0, sup, simplify_step, score_sum_method, local_d0_search, params.score_d8, params.d0, params.Lnorm); if (TM > TMmax) TMmax = TM; TM = DP_iter(ws, x, y, xlen, ylen, sup, invmap, 0, 2, fast ? 2 : 30, local_d0_search, params.D0_MIN, params.Lnorm, params.d0, params.score_d8); if (TM > TMmax) { TMmax = TM; for (int i = 0; i < ylen; i++) invmap0[i] = invmap[i]; } // ============================================ // Seed 2: secondary structure alignment // ============================================ get_initial_ss(ws, secx, secy, xlen, ylen, invmap); TM = detailed_search(ws, x, y, xlen, ylen, invmap, sup, simplify_step, score_sum_method, local_d0_search, params.score_d8, params.d0, params.Lnorm); if (TM > TMmax) { TMmax = TM; for (int i = 0; i < ylen; i++) invmap0[i] = invmap[i]; } if (TM > TMmax * 0.2) { TM = DP_iter(ws, x, y, xlen, ylen, sup, invmap, 0, 2, fast ? 2 : 30, local_d0_search, params.D0_MIN, params.Lnorm, params.d0, params.score_d8); if (TM > TMmax) { TMmax = TM; for (int i = 0; i < ylen; i++) invmap0[i] = invmap[i]; } } // ============================================ // Seed 3: local fragment superposition // ============================================ if (get_initial5(ws, x, y, xlen, ylen, invmap, params.d0, params.d0_search, fast, params.D0_MIN)) { TM = detailed_search(ws, x, y, xlen, ylen, invmap, sup, simplify_step, score_sum_method, local_d0_search, params.score_d8, params.d0, params.Lnorm); if (TM > TMmax) { TMmax = TM; for (int i = 0; i < ylen; i++) invmap0[i] = invmap[i]; } if (TM > TMmax * ddcc) { TM = DP_iter(ws, x, y, xlen, ylen, sup, invmap, 0, 2, 2, local_d0_search, params.D0_MIN, params.Lnorm, params.d0, params.score_d8); if (TM > TMmax) { TMmax = TM; for (int i = 0; i < ylen; i++) invmap0[i] = invmap[i]; } } } // ============================================ // Seed 4: SS + structural distance // ============================================ get_initial_ssplus(ws, secx, secy, x, y, xlen, ylen, invmap0, invmap, params.D0_MIN, params.d0); TM = detailed_search(ws, x, y, xlen, ylen, invmap, sup, simplify_step, score_sum_method, local_d0_search, params.score_d8, params.d0, params.Lnorm); if (TM > TMmax) { TMmax = TM; for (int i = 0; i < ylen; i++) invmap0[i] = invmap[i]; } if (TM > TMmax * ddcc) { TM = DP_iter(ws, x, y, xlen, ylen, sup, invmap, 0, 2, fast ? 2 : 30, local_d0_search, params.D0_MIN, params.Lnorm, params.d0, params.score_d8); if (TM > TMmax) { TMmax = TM; for (int i = 0; i < ylen; i++) invmap0[i] = invmap[i]; } } // ============================================ // Seed 5: fragment gapless threading // ============================================ get_initial_fgt(ws, x, y, xlen, ylen, invmap, params.d0, params.d0_search, params.dcu0, fast, sup); TM = detailed_search(ws, x, y, xlen, ylen, invmap, sup, simplify_step, score_sum_method, local_d0_search, params.score_d8, params.d0, params.Lnorm); if (TM > TMmax) { TMmax = TM; for (int i = 0; i < ylen; i++) invmap0[i] = invmap[i]; } if (TM > TMmax * ddcc) { TM = DP_iter(ws, x, y, xlen, ylen, sup, invmap, 1, 2, 2, local_d0_search, params.D0_MIN, params.Lnorm, params.d0, params.score_d8); if (TM > TMmax) { TMmax = TM; for (int i = 0; i < ylen; i++) invmap0[i] = invmap[i]; } } // ============================================ // Verify alignment exists // ============================================ { bool flag = false; for (int i = 0; i < ylen; i++) { if (invmap0[i] >= 0) { flag = true; break; } } if (!flag) return result; } // ============================================ // Final detailed search with simplify_step=1 // ============================================ simplify_step = fast ? 40 : 1; score_sum_method = 8; detailed_search(ws, x, y, xlen, ylen, invmap0, sup, simplify_step, score_sum_method, local_d0_search, params.score_d8, params.d0); // Apply rotation to mobile and select pairs within score_d8 { std::vector xt(xlen); do_rotation(x, xt, xlen, sup); std::vector m1, m2; // aligned indices int n_ali = 0; for (int j = 0; j < ylen; j++) { int i = invmap0[j]; if (i >= 0) { n_ali++; double d = std::sqrt(dist2(xt[i], y[j])); if (d <= params.score_d8) { m1.push_back(i); m2.push_back(j); ws.xtm[static_cast(m1.size()) - 1] = x[i]; ws.ytm[static_cast(m1.size()) - 1] = y[j]; ws.r1[static_cast(m1.size()) - 1] = xt[i]; ws.r2[static_cast(m1.size()) - 1] = y[j]; } } } int n_ali8 = static_cast(m1.size()); if (n_ali8 < 1) return result; // RMSD of aligned residues double rmsd0; Superposition sup_rmsd; kabsch(ws.r1, ws.r2, n_ali8, 0, rmsd0, sup_rmsd); rmsd0 = std::sqrt(rmsd0 / n_ali8); // ============================================ // Final TM-scores normalized by each length // ============================================ simplify_step = 1; score_sum_method = 0; Superposition sup_final; // Normalized by target length (ylen) double D0_MIN_f, Lnorm_f, d0_target, d0_search_f; compute_final_params( static_cast(ylen), D0_MIN_f, Lnorm_f, d0_target, d0_search_f); double TM1 = TMscore8_search(ws, n_ali8, sup_final, simplify_step, score_sum_method, d0_search_f, params.score_d8, d0_target, Lnorm_f); // Normalized by mobile length (xlen) double d0_mobile; compute_final_params( static_cast(xlen), D0_MIN_f, Lnorm_f, d0_mobile, d0_search_f); Superposition sup_mobile; double TM2 = TMscore8_search(ws, n_ali8, sup_mobile, simplify_step, score_sum_method, d0_search_f, params.score_d8, d0_mobile, Lnorm_f); // Use the rotation from target-normalized scoring for the final transform // (matches original TMalign behavior: t0, u0 come from TM1 computation) // ============================================ // Build alignment strings // ============================================ // d0 for alignment display (same as target normalization) double d0_out = d0_target; // Re-rotate with best transform for alignment string generation do_rotation(x, xt, xlen, sup_final); int ali_len = xlen + ylen; std::string seqxA(ali_len, '-'); std::string seqM(ali_len, ' '); std::string seqyA(ali_len, '-'); int kk = 0, i_old = 0, j_old = 0; double Liden = 0; for (int k = 0; k < n_ali8; k++) { for (int i = i_old; i < m1[k]; i++) { seqxA[kk] = (i < static_cast(seqx.size())) ? seqx[i] : 'X'; seqyA[kk] = '-'; seqM[kk] = ' '; kk++; } for (int j = j_old; j < m2[k]; j++) { seqxA[kk] = '-'; seqyA[kk] = (j < static_cast(seqy.size())) ? seqy[j] : 'X'; seqM[kk] = ' '; kk++; } char cx = (m1[k] < static_cast(seqx.size())) ? seqx[m1[k]] : 'X'; char cy = (m2[k] < static_cast(seqy.size())) ? seqy[m2[k]] : 'X'; seqxA[kk] = cx; seqyA[kk] = cy; Liden += (cx == cy) ? 1 : 0; double dd = std::sqrt(dist2(xt[m1[k]], y[m2[k]])); seqM[kk] = (dd < d0_out) ? ':' : '.'; kk++; i_old = m1[k] + 1; j_old = m2[k] + 1; } // tail for (int i = i_old; i < xlen; i++) { seqxA[kk] = (i < static_cast(seqx.size())) ? seqx[i] : 'X'; seqyA[kk] = '-'; seqM[kk] = ' '; kk++; } for (int j = j_old; j < ylen; j++) { seqxA[kk] = '-'; seqyA[kk] = (j < static_cast(seqy.size())) ? seqy[j] : 'X'; seqM[kk] = ' '; kk++; } // Fill result result.tm_score_target = TM1; result.tm_score_mobile = TM2; result.d0_target = d0_target; result.d0_mobile = d0_mobile; result.rmsd = rmsd0; result.aligned_length = n_ali8; result.seq_identity = (n_ali8 > 0) ? Liden / n_ali8 : 0.0; result.transform = sup_final; result.mobile_indices.assign(m1.begin(), m1.end()); result.target_indices.assign(m2.begin(), m2.end()); result.seq_mobile = seqxA.substr(0, kk); result.seq_target = seqyA.substr(0, kk); result.seq_match = seqM.substr(0, kk); } return result; } } // namespace pymol::usalign