mirror of
https://github.com/schrodinger/pymol-open-source.git
synced 2026-06-03 19:54:24 +08:00
1639 lines
45 KiB
C++
1639 lines
45 KiB
C++
/**
|
|
* TM-align algorithm implementation for PyMOL.
|
|
*
|
|
* Ported from USalign (Zhang lab) to modern C++17 with glm math types.
|
|
* Original algorithm: Zhang & Skolnick, Nucl. Acids Res. 33, 2302 (2005)
|
|
*
|
|
* Strips all file I/O — coordinates come from PyMOL selections.
|
|
* Replaces double** with std::vector<glm::dvec3>, NewArray/DeleteArray with
|
|
* RAII containers, scattered output params with result structs.
|
|
*/
|
|
|
|
/*
|
|
==============================================================================
|
|
US-align: universal structure alignment of monomeric and complex proteins
|
|
and nucleic acids
|
|
|
|
This program was written by Chengxin Zhang at Yang Zhang lab,
|
|
Department of Computational Medicine and Bioinformatics,
|
|
University of Michigan, 100 Washtenaw Ave, Ann Arbor, MI 48109-2218.
|
|
Please report issues to zhanglab@zhanggroup.org
|
|
|
|
References:
|
|
* C Zhang, M Shine, A Pyle, Y Zhang (2022) Nat Methods. 19(9), 1109-1115.
|
|
* C Zhang, L Freddolino, Y Zhang (2025) Nature Protocols,
|
|
https://doi.org/10.1038/s41596-025-01189-x
|
|
|
|
DISCLAIMER:
|
|
Permission to use, copy, modify, and distribute this program for
|
|
any purpose, with or without fee, is hereby granted, provided that
|
|
the notices on the head, the reference information, and this
|
|
copyright notice appear in all copies or substantial portions of
|
|
the Software. It is provided "as is" without express or implied
|
|
warranty.
|
|
===============================================================================
|
|
*/
|
|
|
|
#include "USalign.h"
|
|
|
|
#include <algorithm>
|
|
#include <cmath>
|
|
#include <numeric>
|
|
#include <vector>
|
|
|
|
#include <glm/glm.hpp>
|
|
|
|
namespace pymol::usalign
|
|
{
|
|
|
|
// ========================================================================
|
|
// DPWorkspace
|
|
// ========================================================================
|
|
|
|
void DPWorkspace::resize(int xlen, int ylen)
|
|
{
|
|
rows = xlen + 1;
|
|
cols = ylen + 1;
|
|
score_flat.assign(static_cast<size_t>(rows) * cols, 0.0);
|
|
val_flat.assign(static_cast<size_t>(rows) * cols, 0.0);
|
|
path_flat.assign(static_cast<size_t>(rows) * cols, false);
|
|
|
|
int minlen = std::min(xlen, ylen);
|
|
xtm.resize(minlen);
|
|
ytm.resize(minlen);
|
|
xt.resize(xlen);
|
|
r1.resize(minlen);
|
|
r2.resize(minlen);
|
|
}
|
|
|
|
// ========================================================================
|
|
// Parameter initialization (from param_set.h)
|
|
// ========================================================================
|
|
|
|
struct TMParameters {
|
|
double d0 = 0.0;
|
|
double d0_search = 0.0;
|
|
double Lnorm = 0.0;
|
|
double score_d8 = 0.0;
|
|
double D0_MIN = 0.0;
|
|
double dcu0 = 0.0;
|
|
};
|
|
|
|
static TMParameters compute_search_params(int xlen, int ylen)
|
|
{
|
|
TMParameters p;
|
|
p.D0_MIN = 0.5;
|
|
p.dcu0 = 4.25;
|
|
|
|
p.Lnorm = std::min(xlen, ylen);
|
|
if (p.Lnorm <= 19)
|
|
p.d0 = 0.168;
|
|
else
|
|
p.d0 = 1.24 * std::pow(p.Lnorm - 15.0, 1.0 / 3.0) - 1.8;
|
|
// During the search phase, d0 is inflated by 0.8 compared to final scoring.
|
|
// This wider distance cutoff helps the iterative refinement explore more seeds.
|
|
p.D0_MIN = p.d0 + 0.8;
|
|
p.d0 = p.D0_MIN;
|
|
|
|
p.d0_search = p.d0;
|
|
if (p.d0_search > 8.0)
|
|
p.d0_search = 8.0;
|
|
if (p.d0_search < 4.5)
|
|
p.d0_search = 4.5;
|
|
|
|
p.score_d8 = 1.5 * std::pow(p.Lnorm, 0.3) + 3.5;
|
|
return p;
|
|
}
|
|
|
|
static void compute_final_params(
|
|
double len, double& D0_MIN, double& Lnorm, double& d0, double& d0_search)
|
|
{
|
|
D0_MIN = 0.5;
|
|
Lnorm = len;
|
|
if (Lnorm <= 21)
|
|
d0 = 0.5;
|
|
else
|
|
d0 = 1.24 * std::pow(Lnorm - 15.0, 1.0 / 3.0) - 1.8;
|
|
if (d0 < D0_MIN)
|
|
d0 = D0_MIN;
|
|
d0_search = d0;
|
|
if (d0_search > 8.0)
|
|
d0_search = 8.0;
|
|
if (d0_search < 4.5)
|
|
d0_search = 4.5;
|
|
}
|
|
|
|
// ========================================================================
|
|
// Geometry utilities
|
|
// ========================================================================
|
|
|
|
static inline double dist2(const glm::dvec3& a, const glm::dvec3& b)
|
|
{
|
|
auto d = a - b;
|
|
return glm::dot(d, d);
|
|
}
|
|
|
|
static inline glm::dvec3 apply_transform(
|
|
const glm::dvec3& v, const Superposition& sup)
|
|
{
|
|
return sup.rotation * v + sup.translation;
|
|
}
|
|
|
|
static void do_rotation(const std::vector<glm::dvec3>& src,
|
|
std::vector<glm::dvec3>& dst, int n, const Superposition& sup)
|
|
{
|
|
for (int i = 0; i < n; i++)
|
|
dst[i] = apply_transform(src[i], sup);
|
|
}
|
|
|
|
// ========================================================================
|
|
// Kabsch superposition (from Kabsch.h)
|
|
// ========================================================================
|
|
|
|
static bool kabsch(const std::vector<glm::dvec3>& x,
|
|
const std::vector<glm::dvec3>& y, int n, int mode, double& rms,
|
|
Superposition& sup)
|
|
{
|
|
double e0, rms1, d, h, g;
|
|
double cth, sth, sqrth, p, det, sigma;
|
|
double xc[3], yc[3];
|
|
double a[3][3], b[3][3], r[3][3], e[3], rr[6], ss[6];
|
|
const double sqrt3 = 1.73205080756888;
|
|
const double tol = 0.01;
|
|
const int ip[] = {0, 1, 3, 1, 2, 4, 3, 4, 5};
|
|
const int ip2312[] = {1, 2, 0, 1};
|
|
|
|
int a_failed = 0, b_failed = 0;
|
|
const double epsilon = 0.00000001;
|
|
|
|
rms = 0;
|
|
rms1 = 0;
|
|
e0 = 0;
|
|
|
|
double t[3];
|
|
double u[3][3];
|
|
|
|
double s1[3] = {0, 0, 0};
|
|
double s2[3] = {0, 0, 0};
|
|
double sx[3] = {0, 0, 0};
|
|
double sy[3] = {0, 0, 0};
|
|
double sz[3] = {0, 0, 0};
|
|
|
|
for (int i = 0; i < 3; i++) {
|
|
xc[i] = 0.0;
|
|
yc[i] = 0.0;
|
|
t[i] = 0.0;
|
|
for (int j = 0; j < 3; j++) {
|
|
u[i][j] = (i == j) ? 1.0 : 0.0;
|
|
r[i][j] = 0.0;
|
|
a[i][j] = (i == j) ? 1.0 : 0.0;
|
|
}
|
|
}
|
|
|
|
if (n < 1)
|
|
return false;
|
|
|
|
// compute centers
|
|
for (int i = 0; i < n; i++) {
|
|
double c1[3] = {x[i].x, x[i].y, x[i].z};
|
|
double c2[3] = {y[i].x, y[i].y, y[i].z};
|
|
for (int j = 0; j < 3; j++) {
|
|
s1[j] += c1[j];
|
|
s2[j] += c2[j];
|
|
}
|
|
for (int j = 0; j < 3; j++) {
|
|
sx[j] += c1[0] * c2[j];
|
|
sy[j] += c1[1] * c2[j];
|
|
sz[j] += c1[2] * c2[j];
|
|
}
|
|
}
|
|
for (int i = 0; i < 3; i++) {
|
|
xc[i] = s1[i] / n;
|
|
yc[i] = s2[i] / n;
|
|
}
|
|
|
|
if (mode == 2 || mode == 0) {
|
|
for (int mm = 0; mm < n; mm++) {
|
|
double xx[3] = {x[mm].x, x[mm].y, x[mm].z};
|
|
double yy[3] = {y[mm].x, y[mm].y, y[mm].z};
|
|
for (int nn = 0; nn < 3; nn++)
|
|
e0 += (xx[nn] - xc[nn]) * (xx[nn] - xc[nn]) +
|
|
(yy[nn] - yc[nn]) * (yy[nn] - yc[nn]);
|
|
}
|
|
}
|
|
|
|
for (int j = 0; j < 3; j++) {
|
|
r[j][0] = sx[j] - s1[0] * s2[j] / n;
|
|
r[j][1] = sy[j] - s1[1] * s2[j] / n;
|
|
r[j][2] = sz[j] - s1[2] * s2[j] / n;
|
|
}
|
|
|
|
// compute determinant
|
|
det = r[0][0] * (r[1][1] * r[2][2] - r[1][2] * r[2][1]) -
|
|
r[0][1] * (r[1][0] * r[2][2] - r[1][2] * r[2][0]) +
|
|
r[0][2] * (r[1][0] * r[2][1] - r[1][1] * r[2][0]);
|
|
sigma = det;
|
|
|
|
// compute trans(r)*r
|
|
int m = 0;
|
|
for (int j = 0; j < 3; j++)
|
|
for (int i = 0; i <= j; i++)
|
|
rr[m++] = r[0][i] * r[0][j] + r[1][i] * r[1][j] + r[2][i] * r[2][j];
|
|
|
|
double spur = (rr[0] + rr[2] + rr[5]) / 3.0;
|
|
double cof =
|
|
(((((rr[2] * rr[5] - rr[4] * rr[4]) + rr[0] * rr[5]) - rr[3] * rr[3]) +
|
|
rr[0] * rr[2]) -
|
|
rr[1] * rr[1]) /
|
|
3.0;
|
|
det = det * det;
|
|
|
|
for (int i = 0; i < 3; i++)
|
|
e[i] = spur;
|
|
|
|
if (spur > 0) {
|
|
d = spur * spur;
|
|
h = d - cof;
|
|
g = (spur * cof - det) / 2.0 - spur * h;
|
|
|
|
if (h > 0) {
|
|
sqrth = std::sqrt(h);
|
|
d = h * h * h - g * g;
|
|
if (d < 0.0)
|
|
d = 0.0;
|
|
d = std::atan2(std::sqrt(d), -g) / 3.0;
|
|
cth = sqrth * std::cos(d);
|
|
sth = sqrth * sqrt3 * std::sin(d);
|
|
e[0] = (spur + cth) + cth;
|
|
e[1] = (spur - cth) + sth;
|
|
e[2] = (spur - cth) - sth;
|
|
|
|
if (mode != 0) {
|
|
for (int l = 0; l < 3; l += 2) {
|
|
d = e[l];
|
|
ss[0] = (d - rr[2]) * (d - rr[5]) - rr[4] * rr[4];
|
|
ss[1] = (d - rr[5]) * rr[1] + rr[3] * rr[4];
|
|
ss[2] = (d - rr[0]) * (d - rr[5]) - rr[3] * rr[3];
|
|
ss[3] = (d - rr[2]) * rr[3] + rr[1] * rr[4];
|
|
ss[4] = (d - rr[0]) * rr[4] + rr[1] * rr[3];
|
|
ss[5] = (d - rr[0]) * (d - rr[2]) - rr[1] * rr[1];
|
|
|
|
for (int i = 0; i < 6; i++)
|
|
if (std::fabs(ss[i]) <= epsilon)
|
|
ss[i] = 0.0;
|
|
|
|
int j;
|
|
if (std::fabs(ss[0]) >= std::fabs(ss[2])) {
|
|
j = 0;
|
|
if (std::fabs(ss[0]) < std::fabs(ss[5]))
|
|
j = 2;
|
|
} else if (std::fabs(ss[2]) >= std::fabs(ss[5]))
|
|
j = 1;
|
|
else
|
|
j = 2;
|
|
|
|
d = 0.0;
|
|
j = 3 * j;
|
|
for (int i = 0; i < 3; i++) {
|
|
int k = ip[i + j];
|
|
a[i][l] = ss[k];
|
|
d += ss[k] * ss[k];
|
|
}
|
|
|
|
if (d > epsilon)
|
|
d = 1.0 / std::sqrt(d);
|
|
else
|
|
d = 0.0;
|
|
for (int i = 0; i < 3; i++)
|
|
a[i][l] *= d;
|
|
}
|
|
|
|
d = a[0][0] * a[0][2] + a[1][0] * a[1][2] + a[2][0] * a[2][2];
|
|
int m1, m0;
|
|
if ((e[0] - e[1]) > (e[1] - e[2])) {
|
|
m1 = 2;
|
|
m0 = 0;
|
|
} else {
|
|
m1 = 0;
|
|
m0 = 2;
|
|
}
|
|
p = 0;
|
|
for (int i = 0; i < 3; i++) {
|
|
a[i][m1] -= d * a[i][m0];
|
|
p += a[i][m1] * a[i][m1];
|
|
}
|
|
if (p <= tol) {
|
|
p = 1.0;
|
|
int j = 0;
|
|
for (int i = 0; i < 3; i++) {
|
|
if (p < std::fabs(a[i][m0]))
|
|
continue;
|
|
p = std::fabs(a[i][m0]);
|
|
j = i;
|
|
}
|
|
int k = ip2312[j];
|
|
int l = ip2312[j + 1];
|
|
p = std::sqrt(a[k][m0] * a[k][m0] + a[l][m0] * a[l][m0]);
|
|
if (p > tol) {
|
|
a[j][m1] = 0.0;
|
|
a[k][m1] = -a[l][m0] / p;
|
|
a[l][m1] = a[k][m0] / p;
|
|
} else
|
|
a_failed = 1;
|
|
} else {
|
|
p = 1.0 / std::sqrt(p);
|
|
for (int i = 0; i < 3; i++)
|
|
a[i][m1] *= p;
|
|
}
|
|
if (a_failed != 1) {
|
|
a[0][1] = a[1][2] * a[2][0] - a[1][0] * a[2][2];
|
|
a[1][1] = a[2][2] * a[0][0] - a[2][0] * a[0][2];
|
|
a[2][1] = a[0][2] * a[1][0] - a[0][0] * a[1][2];
|
|
}
|
|
}
|
|
}
|
|
|
|
if (mode != 0 && a_failed != 1) {
|
|
for (int l = 0; l < 2; l++) {
|
|
d = 0.0;
|
|
for (int i = 0; i < 3; i++) {
|
|
b[i][l] = r[i][0] * a[0][l] + r[i][1] * a[1][l] + r[i][2] * a[2][l];
|
|
d += b[i][l] * b[i][l];
|
|
}
|
|
if (d > epsilon)
|
|
d = 1.0 / std::sqrt(d);
|
|
else
|
|
d = 0.0;
|
|
for (int i = 0; i < 3; i++)
|
|
b[i][l] *= d;
|
|
}
|
|
d = b[0][0] * b[0][1] + b[1][0] * b[1][1] + b[2][0] * b[2][1];
|
|
p = 0.0;
|
|
for (int i = 0; i < 3; i++) {
|
|
b[i][1] -= d * b[i][0];
|
|
p += b[i][1] * b[i][1];
|
|
}
|
|
if (p <= tol) {
|
|
p = 1.0;
|
|
int j = 0;
|
|
for (int i = 0; i < 3; i++) {
|
|
if (p < std::fabs(b[i][0]))
|
|
continue;
|
|
p = std::fabs(b[i][0]);
|
|
j = i;
|
|
}
|
|
int k = ip2312[j];
|
|
int l = ip2312[j + 1];
|
|
p = std::sqrt(b[k][0] * b[k][0] + b[l][0] * b[l][0]);
|
|
if (p > tol) {
|
|
b[j][1] = 0.0;
|
|
b[k][1] = -b[l][0] / p;
|
|
b[l][1] = b[k][0] / p;
|
|
} else
|
|
b_failed = 1;
|
|
} else {
|
|
p = 1.0 / std::sqrt(p);
|
|
for (int i = 0; i < 3; i++)
|
|
b[i][1] *= p;
|
|
}
|
|
if (b_failed != 1) {
|
|
b[0][2] = b[1][0] * b[2][1] - b[1][1] * b[2][0];
|
|
b[1][2] = b[2][0] * b[0][1] - b[2][1] * b[0][0];
|
|
b[2][2] = b[0][0] * b[1][1] - b[0][1] * b[1][0];
|
|
for (int i = 0; i < 3; i++)
|
|
for (int j = 0; j < 3; j++)
|
|
u[i][j] = b[i][0] * a[j][0] + b[i][1] * a[j][1] + b[i][2] * a[j][2];
|
|
}
|
|
for (int i = 0; i < 3; i++)
|
|
t[i] = ((yc[i] - u[i][0] * xc[0]) - u[i][1] * xc[1]) - u[i][2] * xc[2];
|
|
}
|
|
} else {
|
|
for (int i = 0; i < 3; i++)
|
|
t[i] = ((yc[i] - u[i][0] * xc[0]) - u[i][1] * xc[1]) - u[i][2] * xc[2];
|
|
}
|
|
|
|
// compute rms
|
|
for (int i = 0; i < 3; i++) {
|
|
if (e[i] < 0)
|
|
e[i] = 0;
|
|
e[i] = std::sqrt(e[i]);
|
|
}
|
|
d = e[2];
|
|
if (sigma < 0.0)
|
|
d = -d;
|
|
d = (d + e[1]) + e[0];
|
|
|
|
if (mode == 2 || mode == 0) {
|
|
rms1 = (e0 - d) - d;
|
|
if (rms1 < 0.0)
|
|
rms1 = 0.0;
|
|
}
|
|
|
|
rms = rms1;
|
|
|
|
// convert to glm types
|
|
sup.translation = glm::dvec3(t[0], t[1], t[2]);
|
|
sup.rotation = glm::dmat3(u[0][0], u[1][0], u[2][0], u[0][1], u[1][1],
|
|
u[2][1], u[0][2], u[1][2], u[2][2]);
|
|
|
|
return true;
|
|
}
|
|
|
|
// ========================================================================
|
|
// Secondary structure assignment (from se.h:make_sec)
|
|
// ========================================================================
|
|
|
|
static char sec_str_classify(double dis13, double dis14, double dis15,
|
|
double dis24, double dis25, double dis35)
|
|
{
|
|
double delta = 2.1;
|
|
if (std::fabs(dis15 - 6.37) < delta && std::fabs(dis14 - 5.18) < delta &&
|
|
std::fabs(dis25 - 5.18) < delta && std::fabs(dis13 - 5.45) < delta &&
|
|
std::fabs(dis24 - 5.45) < delta && std::fabs(dis35 - 5.45) < delta)
|
|
return 'H';
|
|
|
|
delta = 1.42;
|
|
if (std::fabs(dis15 - 13.0) < delta && std::fabs(dis14 - 10.4) < delta &&
|
|
std::fabs(dis25 - 10.4) < delta && std::fabs(dis13 - 6.1) < delta &&
|
|
std::fabs(dis24 - 6.1) < delta && std::fabs(dis35 - 6.1) < delta)
|
|
return 'E';
|
|
|
|
if (dis15 < 8)
|
|
return 'T';
|
|
return 'C';
|
|
}
|
|
|
|
static void assign_secondary_structure(
|
|
const std::vector<glm::dvec3>& ca, std::vector<char>& sec)
|
|
{
|
|
int len = static_cast<int>(ca.size());
|
|
sec.assign(len, 'C');
|
|
|
|
for (int i = 0; i < len; i++) {
|
|
int j1 = i - 2, j2 = i - 1, j3 = i, j4 = i + 1, j5 = i + 2;
|
|
if (j1 >= 0 && j5 < len) {
|
|
double d13 = std::sqrt(dist2(ca[j1], ca[j3]));
|
|
double d14 = std::sqrt(dist2(ca[j1], ca[j4]));
|
|
double d15 = std::sqrt(dist2(ca[j1], ca[j5]));
|
|
double d24 = std::sqrt(dist2(ca[j2], ca[j4]));
|
|
double d25 = std::sqrt(dist2(ca[j2], ca[j5]));
|
|
double d35 = std::sqrt(dist2(ca[j3], ca[j5]));
|
|
sec[i] = sec_str_classify(d13, d14, d15, d24, d25, d35);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ========================================================================
|
|
// Scoring functions (from TMalign.h:score_fun8)
|
|
// ========================================================================
|
|
|
|
// Lnorm > 0: normalize by Lnorm (used during main search)
|
|
// Lnorm <= 0: normalize by n_ali (used during standard/final scoring)
|
|
static int score_fun8(const std::vector<glm::dvec3>& xa,
|
|
const std::vector<glm::dvec3>& ya, int n_ali, double d,
|
|
std::vector<int>& i_ali, double& score1, int score_sum_method,
|
|
double score_d8, double d0, double Lnorm = 0.0)
|
|
{
|
|
double score_sum = 0;
|
|
double d_tmp = d * d;
|
|
double d02 = d0 * d0;
|
|
double score_d8_cut = score_d8 * score_d8;
|
|
|
|
int n_cut, inc = 0;
|
|
while (true) {
|
|
n_cut = 0;
|
|
score_sum = 0;
|
|
for (int i = 0; i < n_ali; i++) {
|
|
double di = dist2(xa[i], ya[i]);
|
|
if (di < d_tmp) {
|
|
i_ali[n_cut] = i;
|
|
n_cut++;
|
|
}
|
|
if (score_sum_method == 8) {
|
|
if (di <= score_d8_cut)
|
|
score_sum += 1.0 / (1.0 + di / d02);
|
|
} else
|
|
score_sum += 1.0 / (1.0 + di / d02);
|
|
}
|
|
if (n_cut < 3 && n_ali > 3) {
|
|
inc++;
|
|
double dinc = d + inc * 0.5;
|
|
d_tmp = dinc * dinc;
|
|
} else
|
|
break;
|
|
}
|
|
score1 = score_sum / (Lnorm > 0.0 ? Lnorm : n_ali);
|
|
return n_cut;
|
|
}
|
|
|
|
// ========================================================================
|
|
// TMscore8_search — iterative fragment refinement (from TMalign.h)
|
|
// ========================================================================
|
|
|
|
// Lnorm > 0: normalize by Lnorm (used during main search)
|
|
// Lnorm <= 0: normalize by n_ali (used during standard/final scoring)
|
|
static double TMscore8_search(DPWorkspace& ws, int Lali, Superposition& sup_out,
|
|
int simplify_step, int score_sum_method, double local_d0_search,
|
|
double score_d8, double d0, double Lnorm = 0.0)
|
|
{
|
|
double score_max = -1, score, rmsd;
|
|
const int n_it = 20;
|
|
const int n_init_max = 6;
|
|
int L_ini[6];
|
|
int L_ini_min = 4;
|
|
if (Lali < L_ini_min)
|
|
L_ini_min = Lali;
|
|
|
|
int n_init = 0;
|
|
int i;
|
|
for (i = 0; i < n_init_max - 1; i++) {
|
|
n_init++;
|
|
L_ini[i] = static_cast<int>(Lali / std::pow(2.0, static_cast<double>(i)));
|
|
if (L_ini[i] <= L_ini_min) {
|
|
L_ini[i] = L_ini_min;
|
|
break;
|
|
}
|
|
}
|
|
if (i == n_init_max - 1) {
|
|
n_init++;
|
|
L_ini[i] = L_ini_min;
|
|
}
|
|
|
|
std::vector<int> i_ali(Lali);
|
|
std::vector<int> k_ali(Lali);
|
|
Superposition sup_tmp;
|
|
|
|
for (int i_init = 0; i_init < n_init; i_init++) {
|
|
int L_frag = L_ini[i_init];
|
|
int iL_max = Lali - L_frag;
|
|
|
|
i = 0;
|
|
while (true) {
|
|
for (int k = 0; k < L_frag; k++) {
|
|
int kk = k + i;
|
|
ws.r1[k] = ws.xtm[kk];
|
|
ws.r2[k] = ws.ytm[kk];
|
|
k_ali[k] = kk;
|
|
}
|
|
kabsch(ws.r1, ws.r2, L_frag, 1, rmsd, sup_tmp);
|
|
do_rotation(ws.xtm, ws.xt, Lali, sup_tmp);
|
|
|
|
double d_cut = local_d0_search - 1;
|
|
int n_cut = score_fun8(ws.xt, ws.ytm, Lali, d_cut, i_ali, score,
|
|
score_sum_method, score_d8, d0, Lnorm);
|
|
if (score > score_max) {
|
|
score_max = score;
|
|
sup_out = sup_tmp;
|
|
}
|
|
|
|
// iterative refinement
|
|
d_cut = local_d0_search + 1;
|
|
for (int it = 0; it < n_it; it++) {
|
|
int ka = n_cut;
|
|
for (int k = 0; k < n_cut; k++) {
|
|
int m = i_ali[k];
|
|
ws.r1[k] = ws.xtm[m];
|
|
ws.r2[k] = ws.ytm[m];
|
|
k_ali[k] = m;
|
|
}
|
|
|
|
kabsch(ws.r1, ws.r2, n_cut, 1, rmsd, sup_tmp);
|
|
do_rotation(ws.xtm, ws.xt, Lali, sup_tmp);
|
|
n_cut = score_fun8(ws.xt, ws.ytm, Lali, d_cut, i_ali, score,
|
|
score_sum_method, score_d8, d0, Lnorm);
|
|
if (score > score_max) {
|
|
score_max = score;
|
|
sup_out = sup_tmp;
|
|
}
|
|
|
|
if (n_cut == ka) {
|
|
int k;
|
|
for (k = 0; k < n_cut; k++)
|
|
if (i_ali[k] != k_ali[k])
|
|
break;
|
|
if (k == n_cut)
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (i < iL_max) {
|
|
i += simplify_step;
|
|
if (i > iL_max)
|
|
i = iL_max;
|
|
} else
|
|
break;
|
|
}
|
|
}
|
|
return score_max;
|
|
}
|
|
|
|
// ========================================================================
|
|
// Detailed search (from TMalign.h:detailed_search)
|
|
// ========================================================================
|
|
|
|
// Lnorm > 0: normalize by Lnorm (used during main search)
|
|
// Lnorm <= 0: normalize by n_ali (used during standard/final scoring)
|
|
static double detailed_search(DPWorkspace& ws, const std::vector<glm::dvec3>& x,
|
|
const std::vector<glm::dvec3>& y, int xlen, int ylen,
|
|
const std::vector<int>& invmap, Superposition& sup, int simplify_step,
|
|
int score_sum_method, double local_d0_search, double score_d8, double d0,
|
|
double Lnorm = 0.0)
|
|
{
|
|
int k = 0;
|
|
for (int i = 0; i < ylen; i++) {
|
|
int j = invmap[i];
|
|
if (j >= 0) {
|
|
ws.xtm[k] = x[j];
|
|
ws.ytm[k] = y[i];
|
|
k++;
|
|
}
|
|
}
|
|
return TMscore8_search(
|
|
ws, k, sup, simplify_step, score_sum_method, local_d0_search,
|
|
score_d8, d0, Lnorm);
|
|
}
|
|
|
|
// ========================================================================
|
|
// Get score fast — quick 3-pass evaluation (from TMalign.h:get_score_fast)
|
|
// ========================================================================
|
|
|
|
static double get_score_fast(DPWorkspace& ws, const std::vector<glm::dvec3>& x,
|
|
const std::vector<glm::dvec3>& y, int xlen, int ylen,
|
|
const std::vector<int>& invmap, double d0, double d0_search,
|
|
Superposition& sup)
|
|
{
|
|
double rms;
|
|
int k = 0;
|
|
for (int j = 0; j < ylen; j++) {
|
|
int i = invmap[j];
|
|
if (i >= 0) {
|
|
ws.r1[k] = x[i];
|
|
ws.r2[k] = y[j];
|
|
ws.xtm[k] = x[i];
|
|
ws.ytm[k] = y[j];
|
|
k++;
|
|
}
|
|
}
|
|
kabsch(ws.r1, ws.r2, k, 1, rms, sup);
|
|
|
|
double d02 = d0 * d0;
|
|
double d00 = d0_search;
|
|
double d002 = d00 * d00;
|
|
int n_ali = k;
|
|
|
|
std::vector<double> dis(n_ali);
|
|
double tmscore = 0;
|
|
for (int kk = 0; kk < n_ali; kk++) {
|
|
auto xrot = apply_transform(ws.xtm[kk], sup);
|
|
double di = dist2(xrot, ws.ytm[kk]);
|
|
dis[kk] = di;
|
|
tmscore += 1.0 / (1.0 + di / d02);
|
|
}
|
|
|
|
// second iteration
|
|
double d002t = d002;
|
|
{
|
|
std::vector<double> dis_sorted(dis.begin(), dis.end());
|
|
std::sort(dis_sorted.begin(), dis_sorted.end());
|
|
if (n_ali > 2 && d002t < dis_sorted[2])
|
|
d002t = dis_sorted[2];
|
|
}
|
|
while (true) {
|
|
int j = 0;
|
|
for (int kk = 0; kk < n_ali; kk++) {
|
|
if (dis[kk] <= d002t) {
|
|
ws.r1[j] = ws.xtm[kk];
|
|
ws.r2[j] = ws.ytm[kk];
|
|
j++;
|
|
}
|
|
}
|
|
if (j < 3 && n_ali > 3)
|
|
d002t += 0.5;
|
|
else
|
|
break;
|
|
}
|
|
|
|
double tmscore1, tmscore2;
|
|
int j_count;
|
|
{
|
|
// count how many passed
|
|
j_count = 0;
|
|
for (int kk = 0; kk < n_ali; kk++)
|
|
if (dis[kk] <= d002t)
|
|
j_count++;
|
|
}
|
|
|
|
if (n_ali != j_count) {
|
|
Superposition sup2;
|
|
kabsch(ws.r1, ws.r2, j_count, 1, rms, sup2);
|
|
tmscore1 = 0;
|
|
for (int kk = 0; kk < n_ali; kk++) {
|
|
auto xrot = apply_transform(ws.xtm[kk], sup2);
|
|
double di = dist2(xrot, ws.ytm[kk]);
|
|
dis[kk] = di;
|
|
tmscore1 += 1.0 / (1.0 + di / d02);
|
|
}
|
|
|
|
// third iteration
|
|
d002t = d002 + 1;
|
|
{
|
|
std::vector<double> dis_sorted(dis.begin(), dis.end());
|
|
std::sort(dis_sorted.begin(), dis_sorted.end());
|
|
if (n_ali > 2 && d002t < dis_sorted[2])
|
|
d002t = dis_sorted[2];
|
|
}
|
|
while (true) {
|
|
int j = 0;
|
|
for (int kk = 0; kk < n_ali; kk++) {
|
|
if (dis[kk] <= d002t) {
|
|
ws.r1[j] = ws.xtm[kk];
|
|
ws.r2[j] = ws.ytm[kk];
|
|
j++;
|
|
}
|
|
}
|
|
if (j < 3 && n_ali > 3)
|
|
d002t += 0.5;
|
|
else {
|
|
j_count = j;
|
|
break;
|
|
}
|
|
}
|
|
|
|
Superposition sup3;
|
|
kabsch(ws.r1, ws.r2, j_count, 1, rms, sup3);
|
|
tmscore2 = 0;
|
|
for (int kk = 0; kk < n_ali; kk++) {
|
|
auto xrot = apply_transform(ws.xtm[kk], sup3);
|
|
double di = dist2(xrot, ws.ytm[kk]);
|
|
tmscore2 += 1.0 / (1.0 + di / d02);
|
|
}
|
|
} else {
|
|
tmscore1 = tmscore;
|
|
tmscore2 = tmscore;
|
|
}
|
|
|
|
if (tmscore1 >= tmscore)
|
|
tmscore = tmscore1;
|
|
if (tmscore2 >= tmscore)
|
|
tmscore = tmscore2;
|
|
return tmscore;
|
|
}
|
|
|
|
// ========================================================================
|
|
// Needleman-Wunsch DP variants (from NW.h)
|
|
// ========================================================================
|
|
|
|
// NW with precomputed score matrix
|
|
static void nw_align_score(
|
|
DPWorkspace& ws, int len1, int len2, double gap_open, std::vector<int>& j2i)
|
|
{
|
|
for (int i = 0; i <= len1; i++) {
|
|
ws.val(i, 0) = 0;
|
|
ws.set_path(i, 0, false);
|
|
}
|
|
for (int j = 0; j <= len2; j++) {
|
|
ws.val(0, j) = 0;
|
|
ws.set_path(0, j, false);
|
|
}
|
|
for (int j = 0; j < len2; j++)
|
|
j2i[j] = -1;
|
|
|
|
for (int i = 1; i <= len1; i++) {
|
|
for (int j = 1; j <= len2; j++) {
|
|
double d = ws.val(i - 1, j - 1) + ws.score(i, j);
|
|
double h = ws.val(i - 1, j);
|
|
if (ws.path(i - 1, j))
|
|
h += gap_open;
|
|
double v = ws.val(i, j - 1);
|
|
if (ws.path(i, j - 1))
|
|
v += gap_open;
|
|
|
|
if (d >= h && d >= v) {
|
|
ws.set_path(i, j, true);
|
|
ws.val(i, j) = d;
|
|
} else {
|
|
ws.set_path(i, j, false);
|
|
ws.val(i, j) = (v >= h) ? v : h;
|
|
}
|
|
}
|
|
}
|
|
|
|
// traceback
|
|
int i = len1, j = len2;
|
|
while (i > 0 && j > 0) {
|
|
if (ws.path(i, j)) {
|
|
j2i[j - 1] = i - 1;
|
|
i--;
|
|
j--;
|
|
} else {
|
|
double h = ws.val(i - 1, j);
|
|
if (ws.path(i - 1, j))
|
|
h += gap_open;
|
|
double v = ws.val(i, j - 1);
|
|
if (ws.path(i, j - 1))
|
|
v += gap_open;
|
|
if (v >= h)
|
|
j--;
|
|
else
|
|
i--;
|
|
}
|
|
}
|
|
}
|
|
|
|
// NW with geometric scoring using rotation
|
|
static void nw_align_transform(DPWorkspace& ws,
|
|
const std::vector<glm::dvec3>& x, const std::vector<glm::dvec3>& y,
|
|
int len1, int len2, const Superposition& sup, double d02, double gap_open,
|
|
std::vector<int>& j2i)
|
|
{
|
|
for (int i = 0; i <= len1; i++) {
|
|
ws.val(i, 0) = 0;
|
|
ws.set_path(i, 0, false);
|
|
}
|
|
for (int j = 0; j <= len2; j++) {
|
|
ws.val(0, j) = 0;
|
|
ws.set_path(0, j, false);
|
|
}
|
|
for (int j = 0; j < len2; j++)
|
|
j2i[j] = -1;
|
|
|
|
for (int i = 1; i <= len1; i++) {
|
|
auto xx = apply_transform(x[i - 1], sup);
|
|
for (int j = 1; j <= len2; j++) {
|
|
double dij = dist2(xx, y[j - 1]);
|
|
double d = ws.val(i - 1, j - 1) + 1.0 / (1.0 + dij / d02);
|
|
double h = ws.val(i - 1, j);
|
|
if (ws.path(i - 1, j))
|
|
h += gap_open;
|
|
double v = ws.val(i, j - 1);
|
|
if (ws.path(i, j - 1))
|
|
v += gap_open;
|
|
|
|
if (d >= h && d >= v) {
|
|
ws.set_path(i, j, true);
|
|
ws.val(i, j) = d;
|
|
} else {
|
|
ws.set_path(i, j, false);
|
|
ws.val(i, j) = (v >= h) ? v : h;
|
|
}
|
|
}
|
|
}
|
|
|
|
int i = len1, j = len2;
|
|
while (i > 0 && j > 0) {
|
|
if (ws.path(i, j)) {
|
|
j2i[j - 1] = i - 1;
|
|
i--;
|
|
j--;
|
|
} else {
|
|
double h = ws.val(i - 1, j);
|
|
if (ws.path(i - 1, j))
|
|
h += gap_open;
|
|
double v = ws.val(i, j - 1);
|
|
if (ws.path(i, j - 1))
|
|
v += gap_open;
|
|
if (v >= h)
|
|
j--;
|
|
else
|
|
i--;
|
|
}
|
|
}
|
|
}
|
|
|
|
// NW with secondary structure matching
|
|
static void nw_align_ss(DPWorkspace& ws, const std::vector<char>& secx,
|
|
const std::vector<char>& secy, int len1, int len2, double gap_open,
|
|
std::vector<int>& j2i)
|
|
{
|
|
for (int i = 0; i <= len1; i++) {
|
|
ws.val(i, 0) = 0;
|
|
ws.set_path(i, 0, false);
|
|
}
|
|
for (int j = 0; j <= len2; j++) {
|
|
ws.val(0, j) = 0;
|
|
ws.set_path(0, j, false);
|
|
}
|
|
for (int j = 0; j < len2; j++)
|
|
j2i[j] = -1;
|
|
|
|
for (int i = 1; i <= len1; i++) {
|
|
for (int j = 1; j <= len2; j++) {
|
|
double d = ws.val(i - 1, j - 1) + 1.0 * (secx[i - 1] == secy[j - 1]);
|
|
double h = ws.val(i - 1, j);
|
|
if (ws.path(i - 1, j))
|
|
h += gap_open;
|
|
double v = ws.val(i, j - 1);
|
|
if (ws.path(i, j - 1))
|
|
v += gap_open;
|
|
|
|
if (d >= h && d >= v) {
|
|
ws.set_path(i, j, true);
|
|
ws.val(i, j) = d;
|
|
} else {
|
|
ws.set_path(i, j, false);
|
|
ws.val(i, j) = (v >= h) ? v : h;
|
|
}
|
|
}
|
|
}
|
|
|
|
int i = len1, j = len2;
|
|
while (i > 0 && j > 0) {
|
|
if (ws.path(i, j)) {
|
|
j2i[j - 1] = i - 1;
|
|
i--;
|
|
j--;
|
|
} else {
|
|
double h = ws.val(i - 1, j);
|
|
if (ws.path(i - 1, j))
|
|
h += gap_open;
|
|
double v = ws.val(i, j - 1);
|
|
if (ws.path(i, j - 1))
|
|
v += gap_open;
|
|
if (v >= h)
|
|
j--;
|
|
else
|
|
i--;
|
|
}
|
|
}
|
|
}
|
|
|
|
// ========================================================================
|
|
// Initial alignment seeds
|
|
// ========================================================================
|
|
|
|
// Seed 1: gapless threading
|
|
static double get_initial(DPWorkspace& ws, const std::vector<glm::dvec3>& x,
|
|
const std::vector<glm::dvec3>& y, int xlen, int ylen, std::vector<int>& y2x,
|
|
double d0, double d0_search, bool fast_opt, Superposition& sup)
|
|
{
|
|
int min_len = std::min(xlen, ylen);
|
|
int min_ali = min_len / 2;
|
|
if (min_ali <= 5)
|
|
min_ali = 5;
|
|
int n1 = -ylen + min_ali;
|
|
int n2 = xlen - min_ali;
|
|
|
|
int k_best = n1;
|
|
double tmscore_max = -1;
|
|
|
|
for (int k = n1; k <= n2; k += (fast_opt ? 5 : 1)) {
|
|
for (int j = 0; j < ylen; j++) {
|
|
int i = j + k;
|
|
y2x[j] = (i >= 0 && i < xlen) ? i : -1;
|
|
}
|
|
|
|
double tmscore =
|
|
get_score_fast(ws, x, y, xlen, ylen, y2x, d0, d0_search, sup);
|
|
if (tmscore >= tmscore_max) {
|
|
tmscore_max = tmscore;
|
|
k_best = k;
|
|
}
|
|
}
|
|
|
|
for (int j = 0; j < ylen; j++) {
|
|
int i = j + k_best;
|
|
y2x[j] = (i >= 0 && i < xlen) ? i : -1;
|
|
}
|
|
return tmscore_max;
|
|
}
|
|
|
|
// Seed 2: secondary structure alignment
|
|
static void get_initial_ss(DPWorkspace& ws, const std::vector<char>& secx,
|
|
const std::vector<char>& secy, int xlen, int ylen, std::vector<int>& y2x)
|
|
{
|
|
nw_align_ss(ws, secx, secy, xlen, ylen, -1.0, y2x);
|
|
}
|
|
|
|
// Seed 3: local fragment superposition
|
|
static bool get_initial5(DPWorkspace& ws, const std::vector<glm::dvec3>& x,
|
|
const std::vector<glm::dvec3>& y, int xlen, int ylen, std::vector<int>& y2x,
|
|
double d0, double d0_search, bool fast_opt, double D0_MIN)
|
|
{
|
|
double rmsd;
|
|
Superposition sup;
|
|
|
|
double d01 = d0 + 1.5;
|
|
if (d01 < D0_MIN)
|
|
d01 = D0_MIN;
|
|
double d02 = d01 * d01;
|
|
|
|
double GLmax = 0;
|
|
int aL = std::min(xlen, ylen);
|
|
std::vector<int> invmap(ylen, -1);
|
|
|
|
int n_jump1;
|
|
if (xlen > 250)
|
|
n_jump1 = 45;
|
|
else if (xlen > 200)
|
|
n_jump1 = 35;
|
|
else if (xlen > 150)
|
|
n_jump1 = 25;
|
|
else
|
|
n_jump1 = 15;
|
|
if (n_jump1 > xlen / 3)
|
|
n_jump1 = xlen / 3;
|
|
|
|
int n_jump2;
|
|
if (ylen > 250)
|
|
n_jump2 = 45;
|
|
else if (ylen > 200)
|
|
n_jump2 = 35;
|
|
else if (ylen > 150)
|
|
n_jump2 = 25;
|
|
else
|
|
n_jump2 = 15;
|
|
if (n_jump2 > ylen / 3)
|
|
n_jump2 = ylen / 3;
|
|
|
|
int n_frag[2] = {20, 100};
|
|
if (n_frag[0] > aL / 3)
|
|
n_frag[0] = aL / 3;
|
|
if (n_frag[1] > aL / 2)
|
|
n_frag[1] = aL / 2;
|
|
|
|
if (fast_opt) {
|
|
n_jump1 *= 5;
|
|
n_jump2 *= 5;
|
|
}
|
|
|
|
bool flag = false;
|
|
for (int i_frag = 0; i_frag < 2; i_frag++) {
|
|
int m1 = xlen - n_frag[i_frag] + 1;
|
|
int m2 = ylen - n_frag[i_frag] + 1;
|
|
|
|
for (int i = 0; i < m1; i += n_jump1) {
|
|
for (int j = 0; j < m2; j += n_jump2) {
|
|
for (int k = 0; k < n_frag[i_frag]; k++) {
|
|
ws.r1[k] = x[k + i];
|
|
ws.r2[k] = y[k + j];
|
|
}
|
|
|
|
kabsch(ws.r1, ws.r2, n_frag[i_frag], 1, rmsd, sup);
|
|
nw_align_transform(ws, x, y, xlen, ylen, sup, d02, 0.0, invmap);
|
|
double GL =
|
|
get_score_fast(ws, x, y, xlen, ylen, invmap, d0, d0_search, sup);
|
|
if (GL > GLmax) {
|
|
GLmax = GL;
|
|
for (int ii = 0; ii < ylen; ii++)
|
|
y2x[ii] = invmap[ii];
|
|
flag = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return flag;
|
|
}
|
|
|
|
// Score matrix for ssplus seed
|
|
static void score_matrix_rmsd_sec(DPWorkspace& ws,
|
|
const std::vector<char>& secx, const std::vector<char>& secy,
|
|
const std::vector<glm::dvec3>& x, const std::vector<glm::dvec3>& y,
|
|
int xlen, int ylen, const std::vector<int>& y2x, double D0_MIN, double d0)
|
|
{
|
|
Superposition sup;
|
|
double rmsd;
|
|
double d01 = d0 + 1.5;
|
|
if (d01 < D0_MIN)
|
|
d01 = D0_MIN;
|
|
double d02 = d01 * d01;
|
|
|
|
int k = 0;
|
|
for (int j = 0; j < ylen; j++) {
|
|
int i = y2x[j];
|
|
if (i >= 0) {
|
|
ws.r1[k] = x[i];
|
|
ws.r2[k] = y[j];
|
|
k++;
|
|
}
|
|
}
|
|
kabsch(ws.r1, ws.r2, k, 1, rmsd, sup);
|
|
|
|
for (int ii = 0; ii < xlen; ii++) {
|
|
auto xx = apply_transform(x[ii], sup);
|
|
for (int jj = 0; jj < ylen; jj++) {
|
|
double dij = dist2(xx, y[jj]);
|
|
if (secx[ii] == secy[jj])
|
|
ws.score(ii + 1, jj + 1) = 1.0 / (1.0 + dij / d02) + 0.5;
|
|
else
|
|
ws.score(ii + 1, jj + 1) = 1.0 / (1.0 + dij / d02);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Seed 4: secondary structure plus structural distance
|
|
static void get_initial_ssplus(DPWorkspace& ws, const std::vector<char>& secx,
|
|
const std::vector<char>& secy, const std::vector<glm::dvec3>& x,
|
|
const std::vector<glm::dvec3>& y, int xlen, int ylen,
|
|
const std::vector<int>& y2x0, std::vector<int>& y2x, double D0_MIN,
|
|
double d0)
|
|
{
|
|
score_matrix_rmsd_sec(ws, secx, secy, x, y, xlen, ylen, y2x0, D0_MIN, d0);
|
|
nw_align_score(ws, xlen, ylen, -1.0, y2x);
|
|
}
|
|
|
|
// Find max continuous fragment
|
|
static void find_max_frag(const std::vector<glm::dvec3>& x, int len,
|
|
int& start_max, int& end_max, double dcu0, bool fast_opt)
|
|
{
|
|
int fra_min = fast_opt ? 8 : 4;
|
|
int r_min = static_cast<int>(len / 3.0);
|
|
if (r_min > fra_min)
|
|
r_min = fra_min;
|
|
|
|
int Lfr_max = 0;
|
|
double dcu_cut = dcu0 * dcu0;
|
|
int inc = 0;
|
|
|
|
while (Lfr_max < r_min) {
|
|
Lfr_max = 0;
|
|
int j = 1;
|
|
int start = 0;
|
|
for (int i = 1; i < len; i++) {
|
|
if (dist2(x[i - 1], x[i]) < dcu_cut) {
|
|
j++;
|
|
if (i == len - 1) {
|
|
if (j > Lfr_max) {
|
|
Lfr_max = j;
|
|
start_max = start;
|
|
end_max = i;
|
|
}
|
|
j = 1;
|
|
}
|
|
} else {
|
|
if (j > Lfr_max) {
|
|
Lfr_max = j;
|
|
start_max = start;
|
|
end_max = i - 1;
|
|
}
|
|
j = 1;
|
|
start = i;
|
|
}
|
|
}
|
|
if (Lfr_max < r_min) {
|
|
inc++;
|
|
double dinc = std::pow(1.1, static_cast<double>(inc)) * dcu0;
|
|
dcu_cut = dinc * dinc;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Seed 5: fragment gapless threading
|
|
static double get_initial_fgt(DPWorkspace& ws, const std::vector<glm::dvec3>& x,
|
|
const std::vector<glm::dvec3>& y, int xlen, int ylen, std::vector<int>& y2x,
|
|
double d0, double d0_search, double dcu0, bool fast_opt, Superposition& sup)
|
|
{
|
|
int fra_min = fast_opt ? 8 : 4;
|
|
int fra_min1 = fra_min - 1;
|
|
|
|
int xstart = 0, ystart = 0, xend = 0, yend = 0;
|
|
find_max_frag(x, xlen, xstart, xend, dcu0, fast_opt);
|
|
find_max_frag(y, ylen, ystart, yend, dcu0, fast_opt);
|
|
|
|
int Lx = xend - xstart + 1;
|
|
int Ly = yend - ystart + 1;
|
|
int L_fr = std::min(Lx, Ly);
|
|
std::vector<int> ifr(L_fr);
|
|
std::vector<int> y2x_(ylen, -1);
|
|
|
|
if (Lx < Ly || (Lx == Ly && xlen <= ylen)) {
|
|
for (int i = 0; i < L_fr; i++)
|
|
ifr[i] = xstart + i;
|
|
} else {
|
|
for (int i = 0; i < L_fr; i++)
|
|
ifr[i] = ystart + i;
|
|
}
|
|
|
|
int L0 = std::min(xlen, ylen);
|
|
if (L_fr == L0) {
|
|
int n1 = static_cast<int>(L0 * 0.1);
|
|
int n2 = static_cast<int>(L0 * 0.89);
|
|
int j = 0;
|
|
for (int i = n1; i <= n2; i++)
|
|
ifr[j++] = ifr[i];
|
|
L_fr = j;
|
|
}
|
|
|
|
double tmscore_max = -1;
|
|
|
|
if (Lx < Ly || (Lx == Ly && xlen <= ylen)) {
|
|
int L1 = L_fr;
|
|
int min_len = std::min(L1, ylen);
|
|
int min_ali = static_cast<int>(min_len / 2.5);
|
|
if (min_ali <= fra_min1)
|
|
min_ali = fra_min1;
|
|
int n1 = -ylen + min_ali;
|
|
int n2 = L1 - min_ali;
|
|
|
|
for (int k = n1; k <= n2; k += (fast_opt ? 3 : 1)) {
|
|
for (int j = 0; j < ylen; j++) {
|
|
int i = j + k;
|
|
y2x_[j] = (i >= 0 && i < L1) ? ifr[i] : -1;
|
|
}
|
|
|
|
double tmscore =
|
|
get_score_fast(ws, x, y, xlen, ylen, y2x_, d0, d0_search, sup);
|
|
if (tmscore >= tmscore_max) {
|
|
tmscore_max = tmscore;
|
|
for (int j = 0; j < ylen; j++)
|
|
y2x[j] = y2x_[j];
|
|
}
|
|
}
|
|
} else {
|
|
int L2 = L_fr;
|
|
int min_len = std::min(xlen, L2);
|
|
int min_ali = static_cast<int>(min_len / 2.5);
|
|
if (min_ali <= fra_min1)
|
|
min_ali = fra_min1;
|
|
int n1 = -L2 + min_ali;
|
|
int n2 = xlen - min_ali;
|
|
|
|
for (int k = n1; k <= n2; k++) {
|
|
for (int j = 0; j < ylen; j++)
|
|
y2x_[j] = -1;
|
|
|
|
for (int j = 0; j < L2; j++) {
|
|
int i = j + k;
|
|
if (i >= 0 && i < xlen)
|
|
y2x_[ifr[j]] = i;
|
|
}
|
|
|
|
double tmscore =
|
|
get_score_fast(ws, x, y, xlen, ylen, y2x_, d0, d0_search, sup);
|
|
if (tmscore >= tmscore_max) {
|
|
tmscore_max = tmscore;
|
|
for (int j = 0; j < ylen; j++)
|
|
y2x[j] = y2x_[j];
|
|
}
|
|
}
|
|
}
|
|
|
|
return tmscore_max;
|
|
}
|
|
|
|
// ========================================================================
|
|
// DP_iter — iterative DP refinement (from TMalign.h:DP_iter)
|
|
// ========================================================================
|
|
|
|
static double DP_iter(DPWorkspace& ws, const std::vector<glm::dvec3>& x,
|
|
const std::vector<glm::dvec3>& y, int xlen, int ylen, Superposition& sup,
|
|
std::vector<int>& invmap0, int g1, int g2, int iteration_max,
|
|
double local_d0_search, double D0_MIN, double Lnorm, double d0,
|
|
double score_d8)
|
|
{
|
|
double gap_open[2] = {-0.6, 0};
|
|
double rmsd;
|
|
std::vector<int> invmap(ylen, -1);
|
|
|
|
double tmscore_max = -1, tmscore_old = 0;
|
|
int score_sum_method = 8, simplify_step = 40;
|
|
double d02 = d0 * d0;
|
|
|
|
for (int g = g1; g < g2; g++) {
|
|
for (int iteration = 0; iteration < iteration_max; iteration++) {
|
|
nw_align_transform(ws, x, y, xlen, ylen, sup, d02, gap_open[g], invmap);
|
|
|
|
int k = 0;
|
|
for (int j = 0; j < ylen; j++) {
|
|
int i = invmap[j];
|
|
if (i >= 0) {
|
|
ws.xtm[k] = x[i];
|
|
ws.ytm[k] = y[j];
|
|
k++;
|
|
}
|
|
}
|
|
|
|
double tmscore = TMscore8_search(ws, k, sup, simplify_step,
|
|
score_sum_method, local_d0_search, score_d8, d0, Lnorm);
|
|
|
|
if (tmscore > tmscore_max) {
|
|
tmscore_max = tmscore;
|
|
for (int i = 0; i < ylen; i++)
|
|
invmap0[i] = invmap[i];
|
|
}
|
|
|
|
if (iteration > 0) {
|
|
if (std::fabs(tmscore_old - tmscore) < 0.000001)
|
|
break;
|
|
}
|
|
tmscore_old = tmscore;
|
|
}
|
|
}
|
|
return tmscore_max;
|
|
}
|
|
|
|
// ========================================================================
|
|
// Main orchestrator: tm_align
|
|
// ========================================================================
|
|
|
|
TMAlignResult TMalign(const std::vector<glm::dvec3>& target_ca,
|
|
const std::vector<glm::dvec3>& mobile_ca, const std::string& target_seq,
|
|
const std::string& mobile_seq, bool fast)
|
|
{
|
|
// In USalign convention: x = mobile (structure to move), y = target
|
|
// The invmap maps target indices to mobile indices: invmap[j_target] =
|
|
// i_mobile
|
|
const auto& x = mobile_ca;
|
|
const auto& y = target_ca;
|
|
const auto& seqx = mobile_seq;
|
|
const auto& seqy = target_seq;
|
|
int xlen = static_cast<int>(x.size());
|
|
int ylen = static_cast<int>(y.size());
|
|
|
|
TMAlignResult result;
|
|
|
|
if (xlen < 3 || ylen < 3)
|
|
return result;
|
|
|
|
// Secondary structure
|
|
std::vector<char> secx, secy;
|
|
assign_secondary_structure(x, secx);
|
|
assign_secondary_structure(y, secy);
|
|
|
|
// Allocate workspace
|
|
DPWorkspace ws;
|
|
ws.resize(xlen, ylen);
|
|
|
|
// Parameters
|
|
auto params = compute_search_params(xlen, ylen);
|
|
int simplify_step = 40;
|
|
int score_sum_method = 8;
|
|
|
|
std::vector<int> invmap0(ylen, -1);
|
|
std::vector<int> invmap(ylen, -1);
|
|
double TMmax = -1;
|
|
Superposition sup, sup_best;
|
|
|
|
double ddcc = (params.Lnorm <= 40) ? 0.1 : 0.4;
|
|
double local_d0_search = params.d0_search;
|
|
|
|
// ============================================
|
|
// Seed 1: gapless threading
|
|
// ============================================
|
|
get_initial(
|
|
ws, x, y, xlen, ylen, invmap0, params.d0, params.d0_search, fast, sup);
|
|
double TM = detailed_search(ws, x, y, xlen, ylen, invmap0, sup, simplify_step,
|
|
score_sum_method, local_d0_search, params.score_d8, params.d0,
|
|
params.Lnorm);
|
|
if (TM > TMmax)
|
|
TMmax = TM;
|
|
|
|
TM = DP_iter(ws, x, y, xlen, ylen, sup, invmap, 0, 2, fast ? 2 : 30,
|
|
local_d0_search, params.D0_MIN, params.Lnorm, params.d0, params.score_d8);
|
|
if (TM > TMmax) {
|
|
TMmax = TM;
|
|
for (int i = 0; i < ylen; i++)
|
|
invmap0[i] = invmap[i];
|
|
}
|
|
|
|
// ============================================
|
|
// Seed 2: secondary structure alignment
|
|
// ============================================
|
|
get_initial_ss(ws, secx, secy, xlen, ylen, invmap);
|
|
TM = detailed_search(ws, x, y, xlen, ylen, invmap, sup, simplify_step,
|
|
score_sum_method, local_d0_search, params.score_d8, params.d0,
|
|
params.Lnorm);
|
|
if (TM > TMmax) {
|
|
TMmax = TM;
|
|
for (int i = 0; i < ylen; i++)
|
|
invmap0[i] = invmap[i];
|
|
}
|
|
if (TM > TMmax * 0.2) {
|
|
TM = DP_iter(ws, x, y, xlen, ylen, sup, invmap, 0, 2, fast ? 2 : 30,
|
|
local_d0_search, params.D0_MIN, params.Lnorm, params.d0,
|
|
params.score_d8);
|
|
if (TM > TMmax) {
|
|
TMmax = TM;
|
|
for (int i = 0; i < ylen; i++)
|
|
invmap0[i] = invmap[i];
|
|
}
|
|
}
|
|
|
|
// ============================================
|
|
// Seed 3: local fragment superposition
|
|
// ============================================
|
|
if (get_initial5(ws, x, y, xlen, ylen, invmap, params.d0, params.d0_search,
|
|
fast, params.D0_MIN)) {
|
|
TM = detailed_search(ws, x, y, xlen, ylen, invmap, sup, simplify_step,
|
|
score_sum_method, local_d0_search, params.score_d8, params.d0,
|
|
params.Lnorm);
|
|
if (TM > TMmax) {
|
|
TMmax = TM;
|
|
for (int i = 0; i < ylen; i++)
|
|
invmap0[i] = invmap[i];
|
|
}
|
|
if (TM > TMmax * ddcc) {
|
|
TM = DP_iter(ws, x, y, xlen, ylen, sup, invmap, 0, 2, 2, local_d0_search,
|
|
params.D0_MIN, params.Lnorm, params.d0, params.score_d8);
|
|
if (TM > TMmax) {
|
|
TMmax = TM;
|
|
for (int i = 0; i < ylen; i++)
|
|
invmap0[i] = invmap[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
// ============================================
|
|
// Seed 4: SS + structural distance
|
|
// ============================================
|
|
get_initial_ssplus(ws, secx, secy, x, y, xlen, ylen, invmap0, invmap,
|
|
params.D0_MIN, params.d0);
|
|
TM = detailed_search(ws, x, y, xlen, ylen, invmap, sup, simplify_step,
|
|
score_sum_method, local_d0_search, params.score_d8, params.d0,
|
|
params.Lnorm);
|
|
if (TM > TMmax) {
|
|
TMmax = TM;
|
|
for (int i = 0; i < ylen; i++)
|
|
invmap0[i] = invmap[i];
|
|
}
|
|
if (TM > TMmax * ddcc) {
|
|
TM = DP_iter(ws, x, y, xlen, ylen, sup, invmap, 0, 2, fast ? 2 : 30,
|
|
local_d0_search, params.D0_MIN, params.Lnorm, params.d0,
|
|
params.score_d8);
|
|
if (TM > TMmax) {
|
|
TMmax = TM;
|
|
for (int i = 0; i < ylen; i++)
|
|
invmap0[i] = invmap[i];
|
|
}
|
|
}
|
|
|
|
// ============================================
|
|
// Seed 5: fragment gapless threading
|
|
// ============================================
|
|
get_initial_fgt(ws, x, y, xlen, ylen, invmap, params.d0, params.d0_search,
|
|
params.dcu0, fast, sup);
|
|
TM = detailed_search(ws, x, y, xlen, ylen, invmap, sup, simplify_step,
|
|
score_sum_method, local_d0_search, params.score_d8, params.d0,
|
|
params.Lnorm);
|
|
if (TM > TMmax) {
|
|
TMmax = TM;
|
|
for (int i = 0; i < ylen; i++)
|
|
invmap0[i] = invmap[i];
|
|
}
|
|
if (TM > TMmax * ddcc) {
|
|
TM = DP_iter(ws, x, y, xlen, ylen, sup, invmap, 1, 2, 2, local_d0_search,
|
|
params.D0_MIN, params.Lnorm, params.d0, params.score_d8);
|
|
if (TM > TMmax) {
|
|
TMmax = TM;
|
|
for (int i = 0; i < ylen; i++)
|
|
invmap0[i] = invmap[i];
|
|
}
|
|
}
|
|
|
|
// ============================================
|
|
// Verify alignment exists
|
|
// ============================================
|
|
{
|
|
bool flag = false;
|
|
for (int i = 0; i < ylen; i++) {
|
|
if (invmap0[i] >= 0) {
|
|
flag = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!flag)
|
|
return result;
|
|
}
|
|
|
|
// ============================================
|
|
// Final detailed search with simplify_step=1
|
|
// ============================================
|
|
simplify_step = fast ? 40 : 1;
|
|
score_sum_method = 8;
|
|
detailed_search(ws, x, y, xlen, ylen, invmap0, sup, simplify_step,
|
|
score_sum_method, local_d0_search, params.score_d8, params.d0);
|
|
|
|
// Apply rotation to mobile and select pairs within score_d8
|
|
{
|
|
std::vector<glm::dvec3> xt(xlen);
|
|
do_rotation(x, xt, xlen, sup);
|
|
|
|
std::vector<int> m1, m2; // aligned indices
|
|
int n_ali = 0;
|
|
for (int j = 0; j < ylen; j++) {
|
|
int i = invmap0[j];
|
|
if (i >= 0) {
|
|
n_ali++;
|
|
double d = std::sqrt(dist2(xt[i], y[j]));
|
|
if (d <= params.score_d8) {
|
|
m1.push_back(i);
|
|
m2.push_back(j);
|
|
|
|
ws.xtm[static_cast<int>(m1.size()) - 1] = x[i];
|
|
ws.ytm[static_cast<int>(m1.size()) - 1] = y[j];
|
|
ws.r1[static_cast<int>(m1.size()) - 1] = xt[i];
|
|
ws.r2[static_cast<int>(m1.size()) - 1] = y[j];
|
|
}
|
|
}
|
|
}
|
|
int n_ali8 = static_cast<int>(m1.size());
|
|
|
|
if (n_ali8 < 1)
|
|
return result;
|
|
|
|
// RMSD of aligned residues
|
|
double rmsd0;
|
|
Superposition sup_rmsd;
|
|
kabsch(ws.r1, ws.r2, n_ali8, 0, rmsd0, sup_rmsd);
|
|
rmsd0 = std::sqrt(rmsd0 / n_ali8);
|
|
|
|
// ============================================
|
|
// Final TM-scores normalized by each length
|
|
// ============================================
|
|
simplify_step = 1;
|
|
score_sum_method = 0;
|
|
Superposition sup_final;
|
|
|
|
// Normalized by target length (ylen)
|
|
double D0_MIN_f, Lnorm_f, d0_target, d0_search_f;
|
|
compute_final_params(
|
|
static_cast<double>(ylen), D0_MIN_f, Lnorm_f, d0_target, d0_search_f);
|
|
double TM1 = TMscore8_search(ws, n_ali8, sup_final, simplify_step,
|
|
score_sum_method, d0_search_f, params.score_d8, d0_target, Lnorm_f);
|
|
|
|
// Normalized by mobile length (xlen)
|
|
double d0_mobile;
|
|
compute_final_params(
|
|
static_cast<double>(xlen), D0_MIN_f, Lnorm_f, d0_mobile, d0_search_f);
|
|
Superposition sup_mobile;
|
|
double TM2 = TMscore8_search(ws, n_ali8, sup_mobile, simplify_step,
|
|
score_sum_method, d0_search_f, params.score_d8, d0_mobile, Lnorm_f);
|
|
|
|
// Use the rotation from target-normalized scoring for the final transform
|
|
// (matches original TMalign behavior: t0, u0 come from TM1 computation)
|
|
|
|
// ============================================
|
|
// Build alignment strings
|
|
// ============================================
|
|
// d0 for alignment display (same as target normalization)
|
|
double d0_out = d0_target;
|
|
|
|
// Re-rotate with best transform for alignment string generation
|
|
do_rotation(x, xt, xlen, sup_final);
|
|
|
|
int ali_len = xlen + ylen;
|
|
std::string seqxA(ali_len, '-');
|
|
std::string seqM(ali_len, ' ');
|
|
std::string seqyA(ali_len, '-');
|
|
|
|
int kk = 0, i_old = 0, j_old = 0;
|
|
double Liden = 0;
|
|
for (int k = 0; k < n_ali8; k++) {
|
|
for (int i = i_old; i < m1[k]; i++) {
|
|
seqxA[kk] = (i < static_cast<int>(seqx.size())) ? seqx[i] : 'X';
|
|
seqyA[kk] = '-';
|
|
seqM[kk] = ' ';
|
|
kk++;
|
|
}
|
|
for (int j = j_old; j < m2[k]; j++) {
|
|
seqxA[kk] = '-';
|
|
seqyA[kk] = (j < static_cast<int>(seqy.size())) ? seqy[j] : 'X';
|
|
seqM[kk] = ' ';
|
|
kk++;
|
|
}
|
|
|
|
char cx = (m1[k] < static_cast<int>(seqx.size())) ? seqx[m1[k]] : 'X';
|
|
char cy = (m2[k] < static_cast<int>(seqy.size())) ? seqy[m2[k]] : 'X';
|
|
seqxA[kk] = cx;
|
|
seqyA[kk] = cy;
|
|
Liden += (cx == cy) ? 1 : 0;
|
|
double dd = std::sqrt(dist2(xt[m1[k]], y[m2[k]]));
|
|
seqM[kk] = (dd < d0_out) ? ':' : '.';
|
|
kk++;
|
|
i_old = m1[k] + 1;
|
|
j_old = m2[k] + 1;
|
|
}
|
|
// tail
|
|
for (int i = i_old; i < xlen; i++) {
|
|
seqxA[kk] = (i < static_cast<int>(seqx.size())) ? seqx[i] : 'X';
|
|
seqyA[kk] = '-';
|
|
seqM[kk] = ' ';
|
|
kk++;
|
|
}
|
|
for (int j = j_old; j < ylen; j++) {
|
|
seqxA[kk] = '-';
|
|
seqyA[kk] = (j < static_cast<int>(seqy.size())) ? seqy[j] : 'X';
|
|
seqM[kk] = ' ';
|
|
kk++;
|
|
}
|
|
|
|
// Fill result
|
|
result.tm_score_target = TM1;
|
|
result.tm_score_mobile = TM2;
|
|
result.d0_target = d0_target;
|
|
result.d0_mobile = d0_mobile;
|
|
result.rmsd = rmsd0;
|
|
result.aligned_length = n_ali8;
|
|
result.seq_identity = (n_ali8 > 0) ? Liden / n_ali8 : 0.0;
|
|
result.transform = sup_final;
|
|
result.mobile_indices.assign(m1.begin(), m1.end());
|
|
result.target_indices.assign(m2.begin(), m2.end());
|
|
result.seq_mobile = seqxA.substr(0, kk);
|
|
result.seq_target = seqyA.substr(0, kk);
|
|
result.seq_match = seqM.substr(0, kk);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
} // namespace pymol::usalign
|