Improve peptide nucleic acid visualization

Fixes #187
This commit is contained in:
Jarrett Johnson
2026-03-09 12:20:28 -04:00
committed by Jarrett Johnson
parent 5456a6768b
commit fafebb56d3
7 changed files with 1735 additions and 8 deletions

View File

@@ -561,6 +561,10 @@ int AtomInfoKnownPolymerResName(const char *resn)
int AtomInfoKnownNucleicResName(const char *resn)
{
if (AtomInfoKnownPNAResName(resn)) {
return true;
}
if (resn[0] == 'D') {
// Deoxy ribonucleotide
++resn;
@@ -580,6 +584,13 @@ int AtomInfoKnownNucleicResName(const char *resn)
return false;
}
int AtomInfoKnownPNAResName(const char *resn)
{
static const std::unordered_set<std::string_view> pna_names = {
"APN", "CPN", "GPN", "TPN", "IPN"};
return pna_names.count(resn);
}
int AtomInfoKnownProteinResName(const char *resn)
{
switch (resn[0]) {

View File

@@ -446,6 +446,20 @@ int AtomInfoKnownWaterResName(PyMOLGlobals * G, const char *resn);
int AtomInfoKnownPolymerResName(const char *resn);
int AtomInfoKnownProteinResName(const char *resn);
int AtomInfoKnownNucleicResName(const char *resn);
int AtomInfoKnownPNAResName(const char *resn);
/**
* Check if an atom is a nucleic acid backbone trace atom.
* DNA/RNA: "P", PNA: "N4'" / "N4*"
*/
inline bool AtomInfoIsNucBackboneTrace(const char* name, int protons)
{
using namespace std::string_view_literals;
auto sv = std::string_view(name);
return (protons == cAN_P && sv == "P"sv) ||
(protons == cAN_N && (sv == "N4'"sv || sv == "N4*"sv));
}
void AtomInfoGetPDB3LetHydroName(PyMOLGlobals * G, const char *resn, const char *iname, char *oname);
#define cAIC_ct 0x0001

View File

@@ -16,6 +16,7 @@ I* Additional authors of this source file include:
Z* -------------------------------------------------------------------
*/
#include <array>
#include <set>
#include <unordered_map>
#include <unordered_set>
@@ -38,6 +39,7 @@ Z* -------------------------------------------------------------------
#include "CoordSet.h"
#include "AtomIterators.h"
#include "AtomNeighbors.h"
enum {
CARTOON_CYLINDRICAL_HELICES_CURVED = 1,
@@ -288,6 +290,54 @@ void RepCartoon::render(RenderInfo* info)
#define NUCLEIC_NORMAL1 "C3*"
#define NUCLEIC_NORMAL2 "C3'"
struct BondPathStep {
const char* name;
const char* name_alt; // e.g. star variant, or nullptr
int protons;
};
/**
* @brief follows a bond path from `start` through atoms matching the given
* names. Each step finds a bonded neighbor matching name/name_alt that hasn't
* been visited.
* @param obj of atoms whose bonds are followed
* @param start index of starting atom
* @param steps sequence of Bond paths to navigate
* @param marked per-atom flags for atoms already claimed by a ring; skipped
* during traversal
* @return the atom index at the end of the path, or -1 if not found.
*/
template <size_t N>
static int FollowBondPath(const ObjectMolecule* obj, int start,
const std::array<BondPathStep, N>& steps, const int* marked)
{
auto* atomInfo = obj->AtomInfo.data();
int prev = -1;
int cur = start;
for (const auto& step : steps) {
int next = -1;
for (const auto& nbr : AtomNeighbors(obj, cur)) {
if (nbr.atm != prev && atomInfo[nbr.atm].protons == step.protons &&
(!marked || !marked[nbr.atm])) {
auto aname = LexStr(obj->G, atomInfo[nbr.atm].name);
if (WordMatchExact(obj->G, step.name, aname, 1) ||
(step.name_alt &&
WordMatchExact(obj->G, step.name_alt, aname, 1))) {
next = nbr.atm;
break;
}
}
}
if (next < 0) {
return -1;
}
prev = cur;
cur = next;
}
return cur;
}
#define MAX_RING_ATOM 10
enum class ss_t {
@@ -733,6 +783,30 @@ static void do_ring(PyMOLGlobals * G, nuc_acid_data *ndata, int n_atom,
}
}
}
/* PNA: find base_at and sugar_at via N1/N9 -> C8' -> C7' -> N4' path */
if(sugar_at < 0 && base_at < 0 && (n_atom == 5 || n_atom == 6)) {
static const std::array<BondPathStep, 3> pna_path = {{
{"C8'", "C8*", cAN_C},
{"C7'", "C7*", cAN_C},
{"N4'", "N4*", cAN_N},
}};
for(i = 0; i < n_atom; i++) {
a1 = atix[i];
ai = atomInfo + a1;
if(ai->protons == cAN_N && !marked[a1] &&
(WordMatchExact(G, "N1", LexStr(G, ai->name), 1) ||
WordMatchExact(G, "N9", LexStr(G, ai->name), 1))) {
int n4_at = FollowBondPath(obj, a1, pna_path, marked);
if(n4_at >= 0) {
base_at = a1;
sugar_at = n4_at;
if(!nf) nf = nuc_flag[a1];
break;
}
}
}
}
if(n_atom == 6) {
for(i = 0; i < n_atom; i++) {
@@ -1414,7 +1488,8 @@ static void nuc_acid(PyMOLGlobals * G, nuc_acid_data *ndata, int a, int a1,
if(ndata->a2 >= 0) {
if(set_flags) {
if((obj->AtomInfo[ndata->a2].protons == cAN_P) && (!nuc_flag[ndata->a2])) {
if(AtomInfoIsNucBackboneTrace(LexStr(G, obj->AtomInfo[ndata->a2].name),
obj->AtomInfo[ndata->a2].protons) && (!nuc_flag[ndata->a2])) {
int *nf = nullptr;
AtomInfoBracketResidueFast(G, obj->AtomInfo, obj->NAtom, ndata->a2, &st, &nd);
@@ -3178,8 +3253,7 @@ void RepCartoonGeneratePASS1(PyMOLGlobals *G, RepCartoon *I, ObjectMolecule *obj
} else if(
!AtomInfoSameResidueP(G, last_ai, ai)
&& (ndata->na_mode != 1 ?
// P atom
(ai->protons == cAN_P && WordMatchExact(G, "P", ai_name, true)) :
AtomInfoIsNucBackboneTrace(ai_name, ai->protons) :
// C3* C3' atom
(ai->protons == cAN_C && (WordMatchExact(G, NUCLEIC_NORMAL1, ai_name, 1) ||
WordMatchExact(G, NUCLEIC_NORMAL2, ai_name, 1))))) {

View File

@@ -290,8 +290,8 @@ Rep *RepRibbonNew(CoordSet * cs, int state)
*(v++) = *(v1++);
a2 = a1;
} else if((((na_mode != 1) && (ai->protons == cAN_P) &&
(WordMatchExact(G, G->lex_const.P, ai->name, true))) ||
} else if((((na_mode != 1) &&
AtomInfoIsNucBackboneTrace(LexStr(G, ai->name), ai->protons)) ||
((na_mode == 1) && (ai->protons == cAN_C) &&
(WordMatchExact(G, "C4*", LexStr(G, ai->name), 1) ||
WordMatchExact(G, "C4'", LexStr(G, ai->name), 1)))) &&
@@ -654,8 +654,8 @@ void RepRibbonRenderImmediate(CoordSet * cs, RenderInfo * info)
active = true;
last_ai = ai;
a2 = a1;
} else if((((na_mode != 1) && (ai->protons == cAN_P) &&
(WordMatchExact(G, G->lex_const.P, ai->name, true))) ||
} else if((((na_mode != 1) &&
AtomInfoIsNucBackboneTrace(LexStr(G, ai->name), ai->protons)) ||
((na_mode == 1) && (ai->protons == cAN_C) &&
(WordMatchExact(G, "C4*", LexStr(G, ai->name), 1) ||
WordMatchExact(G, "C4'", LexStr(G, ai->name), 1)))) &&

View File

@@ -1128,7 +1128,8 @@ int SelectorClassifyAtoms(PyMOLGlobals * G, int sele, int preserve,
mask = 0;
if(!ai->hetatm && AtomInfoKnownProteinResName(LexStr(G, ai->resn)))
mask = cAtomFlag_polymer | cAtomFlag_protein;
else if(!ai->hetatm && AtomInfoKnownNucleicResName(LexStr(G, ai->resn)))
else if((!ai->hetatm || AtomInfoKnownPNAResName(LexStr(G, ai->resn)))
&& AtomInfoKnownNucleicResName(LexStr(G, ai->resn)))
mask = cAtomFlag_polymer | cAtomFlag_nucleic;
else if(AtomInfoKnownWaterResName(G, LexStr(G, ai->resn)))
mask = cAtomFlag_solvent;
@@ -1321,6 +1322,7 @@ int SelectorClassifyAtoms(PyMOLGlobals * G, int sele, int preserve,
}
if((mask & cAtomFlag_polymer)) {
AtomInfoType *guide_atom_c3 = nullptr;
ai0 = obj->AtomInfo + I->Table[a0].atom;
for(aa = a0; !guide_atom && aa <= a1; aa++) {
if(ai0->protons == cAN_C) {
@@ -1343,11 +1345,24 @@ int SelectorClassifyAtoms(PyMOLGlobals * G, int sele, int preserve,
break;
}
break;
case '3':
if((mask & cAtomFlag_nucleic) && !guide_atom_c3) {
switch (name[2]) { /* C3' as fallback guide for nucleic acids without C4' (PNA) */
case '*':
case '\'':
guide_atom_c3 = ai0;
break;
}
}
break;
}
}
}
ai0++;
}
if (!guide_atom && guide_atom_c3) {
guide_atom = guide_atom_c3;
}
}
if(guide_atom)

1586
testing/data/1pup.cif Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,5 @@
from pymol import cmd
from pymol import test_utils
def test_select_operators():
@@ -15,3 +16,29 @@ def test_select_operators():
assert cmd.count_atoms("b > 5 & x == 1") == 0
assert cmd.count_atoms("b < 6 & y <= 3") == 1
assert cmd.count_atoms("b = 5 & z >= 2") == 1
def test_pna_classified_as_nucleic():
"""PNA residues (CPN/TPN/APN/GPN) should be classified as polymer.nucleic"""
cmd.load(test_utils.datafile("1pup.cif"))
pna_sele = "resn CPN+TPN+APN+GPN"
n_pna = cmd.count_atoms(pna_sele)
assert n_pna > 0, "no PNA residues found"
n_nuc = cmd.count_atoms(pna_sele + " & polymer.nucleic")
assert n_nuc == n_pna, \
f"only {n_nuc}/{n_pna} PNA atoms classified as polymer.nucleic"
def test_pna_guide_atoms_are_polymer():
"""PNA guide atoms should be flagged as polymer"""
cmd.load(test_utils.datafile("1pup.cif"))
pna_sele = "resn CPN+TPN+APN+GPN"
n_guide = cmd.count_atoms(pna_sele + " & guide & polymer")
assert n_guide > 0, "PNA has no polymer guide atoms"
def test_dna_rna_unchanged():
"""Existing DNA/RNA classification should be unaffected"""
cmd.load(test_utils.datafile("1ehz-5.pdb"))
assert cmd.count_atoms("polymer.nucleic") == 112
assert cmd.count_atoms("guide") > 0