PYMOL-5418: Add USAlign implementation for structural alignment

This commit is contained in:
Jarrett Johnson
2026-02-23 11:07:34 -05:00
committed by Jarrett Johnson
parent 841c094393
commit 4769e62c02
10 changed files with 2145 additions and 2 deletions

1638
layer2/USalign.cpp Normal file

File diff suppressed because it is too large Load Diff

73
layer2/USalign.h Normal file
View File

@@ -0,0 +1,73 @@
#pragma once
#include <string>
#include <vector>
#include <glm/glm.hpp>
namespace pymol::usalign {
struct Superposition {
glm::dvec3 translation{0.0};
glm::dmat3 rotation{1.0};
};
struct TMAlignResult {
double tm_score_mobile = 0.0; // normalized by mobile length
double tm_score_target = 0.0; // normalized by target length
double d0_target = 0.0; // d0 used for target normalization
double d0_mobile = 0.0; // d0 used for mobile normalization
double rmsd = 0.0;
int aligned_length = 0; // aligned pairs within distance cutoff
double seq_identity = 0.0;
Superposition transform;
std::vector<int> mobile_indices; // paired residue indices into mobile CA array
std::vector<int> target_indices; // paired residue indices into target CA array
std::string seq_mobile; // alignment string for mobile
std::string seq_target; // alignment string for target
std::string seq_match; // ':' close, '.' far, ' ' gap
};
// DP workspace — single allocation reused across all seeds
struct DPWorkspace {
std::vector<double> score_flat;
std::vector<double> val_flat;
std::vector<char> path_flat;
int rows = 0;
int cols = 0;
// Scratch buffers for TMscore8_search and scoring.
// xtm, ytm, r1, r2 are sized to min(xlen, ylen); xt is sized to xlen.
// All score_fun8 n_cut values are bounded by min(xlen, ylen).
std::vector<glm::dvec3> xtm, ytm, xt, r1, r2;
void resize(int xlen, int ylen);
double& score(int i, int j) { return score_flat[i * cols + j]; }
double& val(int i, int j) { return val_flat[i * cols + j]; }
bool path(int i, int j) const { return path_flat[i * cols + j] != 0; }
void set_path(int i, int j, bool v) { path_flat[i * cols + j] = v ? 1 : 0; }
};
/**
* Perform TM-score structural alignment between two protein structures.
*
* @param target_ca Target structure CA coordinates (remains fixed)
* @param mobile_ca Mobile structure CA coordinates (will be aligned to target)
* @param target_seq Target sequence (single-letter amino acid codes)
* @param mobile_seq Mobile sequence (single-letter amino acid codes)
* @param fast Use fast mode with fewer iterations (default: false)
* @return TMAlignResult containing TM-scores, RMSD, alignment, and transform
*
* @note TM-score ranges from 0 to 1; score > 0.5 indicates same fold
* @note Complexity: O(n²) where n = min(target_len, mobile_len)
* @note may use a lot of memory for structures above 10K residues.
*/
TMAlignResult TMalign(
const std::vector<glm::dvec3>& target_ca,
const std::vector<glm::dvec3>& mobile_ca,
const std::string& target_seq,
const std::string& mobile_seq,
bool fast = false);
} // namespace pymol::usalign

View File

@@ -77,6 +77,7 @@
#include "SceneRay.h"
#include "ScrollBar.h"
#include "SculptCache.h"
#include "Seeker.h"
#include "Selector.h"
#include "Seq.h"
#include "Setting.h"
@@ -114,6 +115,7 @@
#include "ce_types.h"
#endif
#include <glm/gtc/quaternion.hpp>
#include <glm/gtc/type_ptr.hpp>
#include <glm/vec3.hpp>
@@ -17560,3 +17562,197 @@ pymol::Result<std::unordered_set<const pymol::CObject*>> ExecutiveGetObjectDeps(
obj_set.erase(&obj);
return obj_set;
}
/**
* Run TM-align on two selections and return results.
*
* @param mobile_sele mobile selection (will be transformed)
* @param target_sele target selection (stays fixed)
* @param mobile_state state of mobile selection (0-based)
* @param target_state state of target selection (0-based)
* @param quiet suppress output
* @param transform apply superposition transform
* @param oname name for alignment object (empty = don't create)
* @param fast use fast mode (fewer iterations)
*/
pymol::Result<pymol::usalign::TMAlignResult> ExecutiveUSalign(PyMOLGlobals* G,
const char* mobile_sele, const char* target_sele, int mobile_state,
int target_state, int quiet, int transform, const char* oname, int fast)
{
// Resolve selections
auto sele_mobile = SelectorIndexByName(G, mobile_sele);
if (sele_mobile < 0)
return pymol::make_error("Invalid mobile selection: ", mobile_sele);
auto sele_target = SelectorIndexByName(G, target_sele);
if (sele_target < 0)
return pymol::make_error("Invalid target selection: ", target_sele);
// Extract CA coordinates and sequences
struct ResidueInfo {
glm::dvec3 coord;
char seq_char;
AtomInfoType* ai;
};
auto extract_ca = [&](SelectorID_t sele,
int state) -> std::vector<ResidueInfo> {
std::vector<ResidueInfo> residues;
SeleCoordIterator iter(G, sele, state);
while (iter.next()) {
auto* ai = iter.getAtomInfo();
if (ai->flags & cAtomFlag_guide) {
float* c = iter.getCoord();
ResidueInfo ri;
ri.coord = glm::dvec3(c[0], c[1], c[2]);
ri.seq_char = SeekerGetAbbr(G, LexStr(G, ai->resn), 'O', 'X');
ri.ai = ai;
residues.push_back(ri);
}
}
return residues;
};
auto mobile_res = extract_ca(sele_mobile, mobile_state);
auto target_res = extract_ca(sele_target, target_state);
if (mobile_res.size() < 3) {
return pymol::make_error("Mobile selection has fewer than 3 guide atoms (",
mobile_res.size(), ")");
}
if (target_res.size() < 3) {
return pymol::make_error("Target selection has fewer than 3 guide atoms (",
target_res.size(), ")");
}
// Build coordinate vectors and sequences
std::vector<glm::dvec3> mobile_ca, target_ca;
std::string mobile_seq, target_seq;
mobile_ca.reserve(mobile_res.size());
target_ca.reserve(target_res.size());
for (const auto& r : mobile_res) {
mobile_ca.push_back(r.coord);
mobile_seq.push_back(r.seq_char);
}
for (const auto& r : target_res) {
target_ca.push_back(r.coord);
target_seq.push_back(r.seq_char);
}
// Run TM-align
auto result = pymol::usalign::TMalign(
target_ca, mobile_ca, target_seq, mobile_seq, fast != 0);
if (result.aligned_length < 1) {
return pymol::make_error("TM-align failed to find any alignment");
}
// Print results
if (!quiet) {
PRINTFB(G, FB_Executive, FB_Results)
" USalign: TM-score= %6.4f (normalized by target, N=%d, d0=%.2f)\n",
result.tm_score_target, static_cast<int>(target_ca.size()),
result.d0_target ENDFB(G);
PRINTFB(G, FB_Executive, FB_Results)
" USalign: TM-score= %6.4f (normalized by mobile, N=%d, d0=%.2f)\n",
result.tm_score_mobile, static_cast<int>(mobile_ca.size()),
result.d0_mobile ENDFB(G);
PRINTFB(G, FB_Executive, FB_Results)
" USalign: Aligned length= %d, RMSD= %5.2f, Seq_ID=n_identical/n_aligned= "
"%4.3f\n",
result.aligned_length, result.rmsd, result.seq_identity ENDFB(G);
}
// Apply transform to mobile object
if (transform) {
// Convert double-precision Superposition to float TTT
// USalign convention: y_aligned = R * x + t
// where x = mobile coords, y = target coords
// The rotation R and translation t transform mobile -> target space
const auto& sup = result.transform;
// Build a legacy-style 16-float TTT matrix
// TTT format: [R00 R01 R02 pre_x] [R10 R11 R12 pre_y]
// [R20 R21 R22 pre_z] [tx ty tz 1]
// where pre is the pre-translation (origin), and t is post-translation
// For a simple rotation+translation (no origin): pre=0, R=rotation,
// t=translation
glm::mat3 rot_f(sup.rotation);
glm::quat q = glm::quat_cast(rot_f);
glm::vec3 t(sup.translation);
// Create TTT: pretranslate=0, rotate=q, posttranslate=t
pymol::TTT ttt(glm::vec3(0.0f), q, t);
// Convert to legacy float[16] format for ExecuteCombineObjectTTT
auto legacy = pymol::TTT::as_pymol_2_legacy(ttt);
float tttf[16];
std::memcpy(tttf, glm::value_ptr(legacy), 16 * sizeof(float));
// Follow the same pattern as ExecutiveAlign:
// 1. Copy target's TTT and state matrix to mobile (reset to same frame)
// 2. Combine the alignment transform (reverse_order=true)
// Note: Only the first object in the mobile selection is transformed,
// matching ExecutiveAlign behavior for multi-object selections.
ObjectMolecule* mobile_obj = SelectorGetFirstObjectMolecule(G, sele_mobile);
ObjectMolecule* target_obj =
SelectorGetSingleObjectMolecule(G, sele_target);
if (mobile_obj && target_obj) {
ExecutiveMatrixCopy(G, target_obj->Name, mobile_obj->Name, 1, 1,
target_state, mobile_state, false, 0, quiet);
ExecutiveMatrixCopy(G, target_obj->Name, mobile_obj->Name, 2, 2,
target_state, mobile_state, false, 0, quiet);
ExecutiveCombineObjectTTT(G, mobile_obj->Name, tttf, true, -1);
}
}
// Create alignment object
if (oname && oname[0]) {
int align_state = target_state;
if (align_state < 0) {
align_state = SceneGetState(G);
}
ObjectMolecule* trg_obj = SelectorGetSingleObjectMolecule(G, sele_target);
ObjectMolecule* mob_obj = SelectorGetFirstObjectMolecule(G, sele_mobile);
if (trg_obj && mob_obj) {
int n_pair = result.aligned_length;
pymol::vla<int> align_vla(n_pair * 3);
int* id_p = align_vla.data();
for (int k = 0; k < n_pair; k++) {
int mi = result.mobile_indices[k];
int ti = result.target_indices[k];
if (mi < static_cast<int>(mobile_res.size()) &&
ti < static_cast<int>(target_res.size())) {
id_p[0] = AtomInfoCheckUniqueID(G, target_res[ti].ai);
id_p[1] = AtomInfoCheckUniqueID(G, mobile_res[mi].ai);
id_p[2] = 0;
id_p += 3;
}
}
ObjectAlignment* obj = nullptr;
{
pymol::CObject* execObj = ExecutiveFindObjectByName(G, oname);
if (execObj && execObj->type != cObjectAlignment) {
ExecutiveDelete(G, oname);
} else {
obj = dynamic_cast<ObjectAlignment*>(execObj);
}
}
obj = ObjectAlignmentDefine(
G, obj, align_vla, align_state, true, trg_obj, mob_obj);
obj->Color = ColorGetIndex(G, "yellow");
ObjectSetName(obj, oname);
ExecutiveManageObject(G, obj, 0, quiet);
SceneInvalidate(G);
}
}
return result;
}

View File

@@ -38,6 +38,7 @@ Z* -------------------------------------------------------------------
#include "SpecRecSpecial.h"
#include "Tracker.h"
#include "TrackerList.h"
#include "USalign.h"
#include "Word.h"
#include "vla.h"
@@ -255,6 +256,11 @@ int ExecutiveAlign(PyMOLGlobals* G, const char* s1, const char* s2,
float seq_wt, float radius, float scale, float base, float coord_wt,
float expect, int window, float ante);
pymol::Result<pymol::usalign::TMAlignResult> ExecutiveUSalign(
PyMOLGlobals* G, const char* mobile_sele, const char* target_sele,
int mobile_state, int target_state, int quiet, int transform,
const char* oname, int fast);
void ExecutiveUpdateColorDepends(PyMOLGlobals* G, ObjectMolecule* mol);
void ExecutiveUpdateCoordDepends(PyMOLGlobals* G, ObjectMolecule* mol);
pymol::Result<float> ExecutiveDistance(PyMOLGlobals* G, const char* nam,

View File

@@ -1951,6 +1951,61 @@ static PyObject *CmdAlign(PyObject * self, PyObject * args)
}
}
static PyObject *CmdUSalign(PyObject * self, PyObject * args)
{
PyMOLGlobals *G = nullptr;
const char *mobile, *target, *oname;
int mobile_state, target_state, quiet, transform, fast;
API_SETUP_ARGS(G, self, args, "Ossiiiisi", &self,
&mobile, &target, &mobile_state, &target_state,
&quiet, &transform, &oname, &fast);
API_ASSERT(APIEnterNotModal(G));
OrthoLineType s1, s2;
int ok = (SelectorGetTmp(G, mobile, s1) >= 0) &&
(SelectorGetTmp(G, target, s2) >= 0);
// Store results in locals — can't call Py_BuildValue until after APIExit
double tm_target = 0, tm_mobile = 0, rmsd = 0, seq_id = 0;
int ali_len = 0;
bool have_result = false;
std::string err_msg;
if (ok) {
auto res = ExecutiveUSalign(G, s1, s2,
mobile_state, target_state, quiet, transform, oname, fast);
if (res) {
auto& r = res.result();
tm_target = r.tm_score_target;
tm_mobile = r.tm_score_mobile;
rmsd = r.rmsd;
ali_len = r.aligned_length;
seq_id = r.seq_identity;
have_result = true;
} else {
err_msg = res.error().what();
}
}
SelectorFreeTmp(G, s1);
SelectorFreeTmp(G, s2);
APIExit(G);
if (have_result) {
return Py_BuildValue("{s:d,s:d,s:d,s:i,s:d}",
"tm_score_target", tm_target,
"tm_score_mobile", tm_mobile,
"RMSD", rmsd,
"alignment_length", ali_len,
"seq_identity", seq_id);
}
if (!err_msg.empty()) {
PyErr_SetString(PyExc_RuntimeError, err_msg.c_str());
return nullptr;
}
return APIFailure();
}
static PyObject *CmdGetCoordsAsNumPy(PyObject * self, PyObject * args)
{
PyMOLGlobals *G = nullptr;
@@ -6613,6 +6668,7 @@ static PyMethodDef Cmd_methods[] = {
{"unset", CmdUnset, METH_VARARGS},
{"unset_bond", CmdUnsetBond, METH_VARARGS},
{"update", CmdUpdate, METH_VARARGS},
{"usalign", CmdUSalign, METH_VARARGS},
{"window", CmdWindow, METH_VARARGS},
{"zoom", CmdZoom, METH_VARARGS},
{NULL, nullptr} /* sentinel */

View File

@@ -302,7 +302,8 @@ from .fitting import \
intra_rms, \
intra_rms_cur, \
cealign, \
pair_fit
pair_fit, \
usalign
#--------------------------------------------------------------------
# ARE ALL OF THESE UNUSED AND/OR DEPRECATED (?)

View File

@@ -63,7 +63,7 @@ aa_map_c = [ cmd.map_sc , 'map object' , ', ' ]
aa_rep_c = [ cmd.repres_sc , 'representation' , ', ' ]
aa_rem_c = [ cmd.repmasks_sc , 'representation' , ', ' ]
aa_v_r_c = [ vol_ramp_sc , 'volume ramp' , ', ' ]
aa_ali_e = [ Shortcut(['align', 'super', 'cealign']), 'alignment method', '']
aa_ali_e = [ Shortcut(['align', 'super', 'cealign', 'usalign']), 'alignment method', '']
def wizard_sc():
import os, pymol.wizard
@@ -192,6 +192,7 @@ def get_auto_arg_list(self_cmd=cmd):
'unset_bond' : aa_set_c,
'unset_deep' : aa_set_c,
'update' : aa_sel_e,
'usalign' : aa_sel_e,
'valence' : [ self_cmd.editing.order_sc , 'order' , ', ' ],
'volume_color' : aa_vol_c,
'volume_panel' : aa_vol_c,
@@ -268,6 +269,7 @@ def get_auto_arg_list(self_cmd=cmd):
'unset_bond' : aa_sel_c,
'unset_deep' : aa_obj_e,
'update' : aa_sel_e,
'usalign' : aa_sel_e,
'ramp_new' : aa_map_c,
'valence' : aa_sel_c,
'volume_color' : aa_v_r_c,

View File

@@ -134,6 +134,72 @@ SEE ALSO
if _self._raising(r,_self): raise pymol.CmdException
return ( {"alignment_length": aliLen, "RMSD" : RMSD, "rotation_matrix" : rotMat } )
def usalign(mobile: str, target: str, mobile_state: int = 1,
target_state: int = 1, quiet: int = 1, transform: int = 1,
object: str | None = None, fast: int = 0,
*, _self=cmd) -> dict:
'''
DESCRIPTION
"usalign" performs a TM-align structural superposition of two
protein structures. Unlike "align" and "super", it uses TM-score
optimization, which is length-independent and more suitable for
comparing proteins with different lengths or low sequence identity.
Only CA of proteins and C4' of nucleic acids are considered for
alignment.
USAGE
usalign mobile, target [, mobile_state [, target_state [, quiet
[, transform [, object [, fast ]]]]]]
ARGUMENTS
mobile = string: atom selection of mobile object
target = string: atom selection of target object
mobile_state = int: object state of mobile selection {default: 1}
target_state = int: object state of target selection {default: 1}
transform = 0/1: apply superposition transform {default: 1}
object = string: name of alignment object to create {default: None}
fast = 0/1: use fast mode with fewer iterations {default: 0}
NOTES
Only guide atoms (CA for proteins, C4' for nucleic acids) are used
for alignment, regardless of the atom selection provided.
The TM-score ranges from 0 to 1, where 1 indicates a perfect match.
A TM-score above 0.5 generally indicates proteins with the same fold.
Based on the USalign algorithm by Zhang & Skolnick.
EXAMPLES
fetch 1rlw 1rsy, async=0
usalign 1rsy, 1rlw
usalign protA, protB, object=aln
SEE ALSO
align, super, cealign, pair_fit, fit
'''
mobile = selector.process(mobile)
target = selector.process(target)
if object is None:
object = ''
with _self.lockcm:
return _cmd.usalign(_self._COb, mobile, target,
int(mobile_state) - 1, int(target_state) - 1,
int(quiet), int(transform), str(object),
int(fast))
def extra_fit(selection='(all)', reference='', method='align', zoom=1,
quiet=0, *, _self=cmd, **kwargs):
'''

View File

@@ -290,6 +290,7 @@ def get_command_keywords(self_cmd=cmd):
'unset_bond' : [ self_cmd.unset_bond , 0 , 0 , '' , parsing.STRICT ],
'unset_deep' : [ self_cmd.unset_deep , 0 , 0 , '' , parsing.STRICT ],
'update' : [ self_cmd.update , 0 , 0 , '' , parsing.STRICT ],
'usalign' : [ self_cmd.usalign , 0 , 0 , '' , parsing.STRICT ],
'valence' : [ self_cmd.valence , 0 , 0 , '' , parsing.STRICT ],
'vdw_fit' : [ self_cmd.vdw_fit , 0 , 0 , '' , parsing.STRICT ],
'view' : [ self_cmd.view , 0 , 0 , '' , parsing.STRICT ],

View File

@@ -0,0 +1,104 @@
import numpy
import pytest
from pymol import cmd
from pymol import test_utils
def test_self_alignment():
"""Self-alignment should give TM-score ~1.0 and RMSD ~0.0"""
cmd.load(test_utils.datafile("1oky-frag.pdb"), "m1")
cmd.create("m2", "m1")
r = cmd.usalign("m2", "m1", transform=0)
assert isinstance(r, dict)
assert r["tm_score_target"] == pytest.approx(1.0, abs=0.01)
assert r["tm_score_mobile"] == pytest.approx(1.0, abs=0.01)
assert r["RMSD"] == pytest.approx(0.0, abs=0.1)
def test_cross_alignment():
"""Cross-alignment of two similar fragments should give reasonable scores"""
cmd.load(test_utils.datafile("1oky-frag.pdb"), "m1")
cmd.load(test_utils.datafile("1t46-frag.pdb"), "m2")
r = cmd.usalign("m1", "m2", transform=0)
assert isinstance(r, dict)
assert r["tm_score_target"] > 0.3
assert r["alignment_length"] > 10
assert r["RMSD"] > 0.0
def test_alignment_object():
"""object= should create a named alignment object"""
cmd.load(test_utils.datafile("1oky-frag.pdb"), "m1")
cmd.create("m2", "m1")
r = cmd.usalign("m2", "m1", object="aln", transform=0)
assert isinstance(r, dict)
assert "aln" in cmd.get_names()
def test_no_transform():
"""transform=0 should not move the mobile object"""
cmd.load(test_utils.datafile("1oky-frag.pdb"), "m1")
cmd.create("m2", "m1")
coords_before = numpy.array(cmd.get_coords("m2", 1))
cmd.usalign("m2", "m1", transform=0)
coords_after = numpy.array(cmd.get_coords("m2", 1))
assert numpy.allclose(coords_before, coords_after, atol=1e-6)
def test_return_dict():
"""Return value should contain all expected keys"""
cmd.load(test_utils.datafile("1oky-frag.pdb"), "m1")
cmd.create("m2", "m1")
r = cmd.usalign("m2", "m1", transform=0)
assert isinstance(r, dict)
for key in ["tm_score_target", "tm_score_mobile", "RMSD",
"alignment_length", "seq_identity"]:
assert key in r
def test_fast_mode():
"""Fast mode should still produce valid results"""
cmd.load(test_utils.datafile("1oky-frag.pdb"), "m1")
cmd.create("m2", "m1")
r = cmd.usalign("m2", "m1", fast=1, transform=0)
assert isinstance(r, dict)
assert r["tm_score_target"] > 0.5
def test_dissimilar_structures():
"""Dissimilar structures should give low TM-scores"""
cmd.load(test_utils.datafile("1oky-frag.pdb"), "m1")
cmd.load(test_utils.datafile("1rx1.pdb"), "m2")
r = cmd.usalign("m1", "m2", transform=0)
assert isinstance(r, dict)
assert r["tm_score_target"] < 0.3
assert r["tm_score_mobile"] < 0.5
assert r["alignment_length"] > 0
def test_protein_vs_nucleic_acid():
"""Protein vs nucleic acid should run without error and give low TM-scores"""
cmd.load(test_utils.datafile("1oky-frag.pdb"), "protein")
cmd.load(test_utils.datafile("1bna.cif"), "dna")
r = cmd.usalign("protein", "dna", transform=0)
assert isinstance(r, dict)
assert r["tm_score_target"] < 0.3
assert r["alignment_length"] > 0
def test_alignto_usalign():
"""alignto should work with method=usalign"""
cmd.load(test_utils.datafile("1oky-frag.pdb"), "ref")
cmd.load(test_utils.datafile("1t46-frag.pdb"), "obj1")
cmd.load(test_utils.datafile("1rx1.pdb"), "obj2")
cmd.alignto("ref", method="usalign")
assert cmd.count_atoms("obj1") > 0
assert cmd.count_atoms("obj2") > 0
def test_too_few_guide_atoms():
"""Selections with fewer than 3 guide atoms should raise RuntimeError"""
cmd.fragment("gly")
cmd.create("m2", "gly")
with pytest.raises(RuntimeError, match="fewer than 3 guide atoms"):
cmd.usalign("gly", "m2")