Improve peptide nucleic acid visualization

Fixes #187
2026-06-03 19:54:24 +08:00 · 2026-03-09 12:20:28 -04:00
parent 5456a6768b
commit fafebb56d3
7 changed files with 1735 additions and 8 deletions
--- a/layer2/AtomInfo.cpp
+++ b/layer2/AtomInfo.cpp
@@ -561,6 +561,10 @@ int AtomInfoKnownPolymerResName(const char *resn)

 int AtomInfoKnownNucleicResName(const char *resn)
 {
+  if (AtomInfoKnownPNAResName(resn)) {
+    return true;
+  }
+
  if (resn[0] == 'D') {
    // Deoxy ribonucleotide
    ++resn;
@@ -580,6 +584,13 @@ int AtomInfoKnownNucleicResName(const char *resn)
  return false;
 }

+int AtomInfoKnownPNAResName(const char *resn)
+{
+  static const std::unordered_set<std::string_view> pna_names = {
+      "APN", "CPN", "GPN", "TPN", "IPN"};
+  return pna_names.count(resn);
+}
+
 int AtomInfoKnownProteinResName(const char *resn)
 {
  switch (resn[0]) {
--- a/layer2/AtomInfo.h
+++ b/layer2/AtomInfo.h
@@ -446,6 +446,20 @@ int AtomInfoKnownWaterResName(PyMOLGlobals * G, const char *resn);
 int AtomInfoKnownPolymerResName(const char *resn);
 int AtomInfoKnownProteinResName(const char *resn);
 int AtomInfoKnownNucleicResName(const char *resn);
+int AtomInfoKnownPNAResName(const char *resn);
+
+/**
+ * Check if an atom is a nucleic acid backbone trace atom.
+ * DNA/RNA: "P", PNA: "N4'" / "N4*"
+ */
+inline bool AtomInfoIsNucBackboneTrace(const char* name, int protons)
+{
+  using namespace std::string_view_literals;
+  auto sv = std::string_view(name);
+
+  return (protons == cAN_P && sv == "P"sv) ||
+         (protons == cAN_N && (sv == "N4'"sv || sv == "N4*"sv));
+}
 void AtomInfoGetPDB3LetHydroName(PyMOLGlobals * G, const char *resn, const char *iname, char *oname);

 #define cAIC_ct        0x0001
--- a/layer2/RepCartoon.cpp
+++ b/layer2/RepCartoon.cpp
@@ -16,6 +16,7 @@ I* Additional authors of this source file include:
 Z* -------------------------------------------------------------------
 */

+#include <array>
 #include <set>
 #include <unordered_map>
 #include <unordered_set>
@@ -38,6 +39,7 @@ Z* -------------------------------------------------------------------
 #include "CoordSet.h"

 #include "AtomIterators.h"
+#include "AtomNeighbors.h"

 enum {
  CARTOON_CYLINDRICAL_HELICES_CURVED = 1,
@@ -288,6 +290,54 @@ void RepCartoon::render(RenderInfo* info)
 #define NUCLEIC_NORMAL1 "C3*"
 #define NUCLEIC_NORMAL2 "C3'"

+struct BondPathStep {
+  const char* name;
+  const char* name_alt; // e.g. star variant, or nullptr
+  int protons;
+};
+
+/**
+ * @brief follows a bond path from `start` through atoms matching the given
+ * names. Each step finds a bonded neighbor matching name/name_alt that hasn't
+ * been visited.
+ * @param obj of atoms whose bonds are followed
+ * @param start index of starting atom
+ * @param steps sequence of Bond paths to navigate
+ * @param marked per-atom flags for atoms already claimed by a ring; skipped
+ * during traversal
+ * @return the atom index at the end of the path, or -1 if not found.
+ */
+template <size_t N>
+static int FollowBondPath(const ObjectMolecule* obj, int start,
+    const std::array<BondPathStep, N>& steps, const int* marked)
+{
+  auto* atomInfo = obj->AtomInfo.data();
+  int prev = -1;
+  int cur = start;
+
+  for (const auto& step : steps) {
+    int next = -1;
+    for (const auto& nbr : AtomNeighbors(obj, cur)) {
+      if (nbr.atm != prev && atomInfo[nbr.atm].protons == step.protons &&
+          (!marked || !marked[nbr.atm])) {
+        auto aname = LexStr(obj->G, atomInfo[nbr.atm].name);
+        if (WordMatchExact(obj->G, step.name, aname, 1) ||
+            (step.name_alt &&
+                WordMatchExact(obj->G, step.name_alt, aname, 1))) {
+          next = nbr.atm;
+          break;
+        }
+      }
+    }
+    if (next < 0) {
+      return -1;
+    }
+    prev = cur;
+    cur = next;
+  }
+  return cur;
+}
+
 #define MAX_RING_ATOM 10

 enum class ss_t {
@@ -733,6 +783,30 @@ static void do_ring(PyMOLGlobals * G, nuc_acid_data *ndata, int n_atom,
          }
        }
      }
+      /* PNA: find base_at and sugar_at via N1/N9 -> C8' -> C7' -> N4' path */
+      if(sugar_at < 0 && base_at < 0 && (n_atom == 5 || n_atom == 6)) {
+        static const std::array<BondPathStep, 3> pna_path = {{
+            {"C8'", "C8*", cAN_C},
+            {"C7'", "C7*", cAN_C},
+            {"N4'", "N4*", cAN_N},
+        }};
+        for(i = 0; i < n_atom; i++) {
+          a1 = atix[i];
+          ai = atomInfo + a1;
+          if(ai->protons == cAN_N && !marked[a1] &&
+             (WordMatchExact(G, "N1", LexStr(G, ai->name), 1) ||
+              WordMatchExact(G, "N9", LexStr(G, ai->name), 1))) {
+            int n4_at = FollowBondPath(obj, a1, pna_path, marked);
+            if(n4_at >= 0) {
+              base_at = a1;
+              sugar_at = n4_at;
+              if(!nf) nf = nuc_flag[a1];
+              break;
+            }
+          }
+        }
+      }
+
      if(n_atom == 6) {

        for(i = 0; i < n_atom; i++) {
@@ -1414,7 +1488,8 @@ static void nuc_acid(PyMOLGlobals * G, nuc_acid_data *ndata, int a, int a1,

  if(ndata->a2 >= 0) {
    if(set_flags) {
-      if((obj->AtomInfo[ndata->a2].protons == cAN_P) && (!nuc_flag[ndata->a2])) {
+      if(AtomInfoIsNucBackboneTrace(LexStr(G, obj->AtomInfo[ndata->a2].name),
+          obj->AtomInfo[ndata->a2].protons) && (!nuc_flag[ndata->a2])) {
        int *nf = nullptr;
        AtomInfoBracketResidueFast(G, obj->AtomInfo, obj->NAtom, ndata->a2, &st, &nd);

@@ -3178,8 +3253,7 @@ void RepCartoonGeneratePASS1(PyMOLGlobals *G, RepCartoon *I, ObjectMolecule *obj
    } else if(
        !AtomInfoSameResidueP(G, last_ai, ai)
        && (ndata->na_mode != 1 ?
-          // P atom
-          (ai->protons == cAN_P && WordMatchExact(G, "P", ai_name, true)) :
+          AtomInfoIsNucBackboneTrace(ai_name, ai->protons) :
          // C3* C3' atom
          (ai->protons == cAN_C && (WordMatchExact(G, NUCLEIC_NORMAL1, ai_name, 1) ||
                                    WordMatchExact(G, NUCLEIC_NORMAL2, ai_name, 1))))) {
--- a/layer2/RepRibbon.cpp
+++ b/layer2/RepRibbon.cpp
@@ -290,8 +290,8 @@ Rep *RepRibbonNew(CoordSet * cs, int state)
          *(v++) = *(v1++);

          a2 = a1;
-        } else if((((na_mode != 1) && (ai->protons == cAN_P) &&
-                    (WordMatchExact(G, G->lex_const.P, ai->name, true))) ||
+        } else if((((na_mode != 1) &&
+                    AtomInfoIsNucBackboneTrace(LexStr(G, ai->name), ai->protons)) ||
                   ((na_mode == 1) && (ai->protons == cAN_C) &&
                    (WordMatchExact(G, "C4*", LexStr(G, ai->name), 1) ||
                     WordMatchExact(G, "C4'", LexStr(G, ai->name), 1)))) &&
@@ -654,8 +654,8 @@ void RepRibbonRenderImmediate(CoordSet * cs, RenderInfo * info)
            active = true;
            last_ai = ai;
            a2 = a1;
-          } else if((((na_mode != 1) && (ai->protons == cAN_P) &&
-                      (WordMatchExact(G, G->lex_const.P, ai->name, true))) ||
+          } else if((((na_mode != 1) &&
+                      AtomInfoIsNucBackboneTrace(LexStr(G, ai->name), ai->protons)) ||
                     ((na_mode == 1) && (ai->protons == cAN_C) &&
                      (WordMatchExact(G, "C4*", LexStr(G, ai->name), 1) ||
                       WordMatchExact(G, "C4'", LexStr(G, ai->name), 1)))) &&
--- a/layer3/Selector.cpp
+++ b/layer3/Selector.cpp
@@ -1128,7 +1128,8 @@ int SelectorClassifyAtoms(PyMOLGlobals * G, int sele, int preserve,
      mask = 0;
      if(!ai->hetatm && AtomInfoKnownProteinResName(LexStr(G, ai->resn)))
        mask = cAtomFlag_polymer | cAtomFlag_protein;
-      else if(!ai->hetatm && AtomInfoKnownNucleicResName(LexStr(G, ai->resn)))
+      else if((!ai->hetatm || AtomInfoKnownPNAResName(LexStr(G, ai->resn)))
+              && AtomInfoKnownNucleicResName(LexStr(G, ai->resn)))
        mask = cAtomFlag_polymer | cAtomFlag_nucleic;
      else if(AtomInfoKnownWaterResName(G, LexStr(G, ai->resn)))
        mask = cAtomFlag_solvent;
@@ -1321,6 +1322,7 @@ int SelectorClassifyAtoms(PyMOLGlobals * G, int sele, int preserve,
      }

      if((mask & cAtomFlag_polymer)) {
+        AtomInfoType *guide_atom_c3 = nullptr;
        ai0 = obj->AtomInfo + I->Table[a0].atom;
        for(aa = a0; !guide_atom && aa <= a1; aa++) {
          if(ai0->protons == cAN_C) {
@@ -1343,11 +1345,24 @@ int SelectorClassifyAtoms(PyMOLGlobals * G, int sele, int preserve,
                  break;
                }
                break;
+              case '3':
+                if((mask & cAtomFlag_nucleic) && !guide_atom_c3) {
+                  switch (name[2]) {      /* C3' as fallback guide for nucleic acids without C4' (PNA) */
+                  case '*':
+                  case '\'':
+                    guide_atom_c3 = ai0;
+                    break;
+                  }
+                }
+                break;
              }
            }
          }
          ai0++;
        }
+        if (!guide_atom && guide_atom_c3) {
+          guide_atom = guide_atom_c3;
+        }
      }

      if(guide_atom)
--- a/testing/data/1pup.cif
+++ b/testing/data/1pup.cif
--- a/testing/tests/api/test_selecting.py
+++ b/testing/tests/api/test_selecting.py
@@ -1,4 +1,5 @@
 from pymol import cmd
+from pymol import test_utils


 def test_select_operators():
@@ -15,3 +16,29 @@ def test_select_operators():
    assert cmd.count_atoms("b > 5 & x == 1") == 0
    assert cmd.count_atoms("b < 6 & y <= 3") == 1
    assert cmd.count_atoms("b = 5 & z >= 2") == 1
+
+
+def test_pna_classified_as_nucleic():
+    """PNA residues (CPN/TPN/APN/GPN) should be classified as polymer.nucleic"""
+    cmd.load(test_utils.datafile("1pup.cif"))
+    pna_sele = "resn CPN+TPN+APN+GPN"
+    n_pna = cmd.count_atoms(pna_sele)
+    assert n_pna > 0, "no PNA residues found"
+    n_nuc = cmd.count_atoms(pna_sele + " & polymer.nucleic")
+    assert n_nuc == n_pna, \
+        f"only {n_nuc}/{n_pna} PNA atoms classified as polymer.nucleic"
+
+
+def test_pna_guide_atoms_are_polymer():
+    """PNA guide atoms should be flagged as polymer"""
+    cmd.load(test_utils.datafile("1pup.cif"))
+    pna_sele = "resn CPN+TPN+APN+GPN"
+    n_guide = cmd.count_atoms(pna_sele + " & guide & polymer")
+    assert n_guide > 0, "PNA has no polymer guide atoms"
+
+
+def test_dna_rna_unchanged():
+    """Existing DNA/RNA classification should be unaffected"""
+    cmd.load(test_utils.datafile("1ehz-5.pdb"))
+    assert cmd.count_atoms("polymer.nucleic") == 112
+    assert cmd.count_atoms("guide") > 0