3.2.0 - fix residue constants (#231)

This commit is contained in:
Neil Thomas
2025-04-04 10:55:40 -07:00
committed by GitHub
parent 6f3e9b1809
commit 3fb1a6110d
2 changed files with 27 additions and 2 deletions

View File

@@ -79,3 +79,29 @@ restype_1to3 = {
"Y": "TYR",
"V": "VAL",
}
# Approximate Volumes of amino acids in cubic angstroms.
# https://www.imgt.org/IMGTeducation/Aide-memoire/_UK/aminoacids/abbreviation.html
amino_acid_volumes = {
"A": 88.6, # Alanine
"R": 173.4, # Arginine
"N": 114.1, # Asparagine
"D": 111.1, # Aspartic acid
"C": 108.5, # Cysteine
"Q": 143.8, # Glutamine
"E": 138.4, # Glutamic acid
"G": 60.1, # Glycine
"H": 153.2, # Histidine
"I": 166.7, # Isoleucine
"L": 166.7, # Leucine
"K": 168.6, # Lysine
"M": 162.9, # Methionine
"F": 189.9, # Phenylalanine
"P": 112.7, # Proline
"S": 89.0, # Serine
"T": 116.1, # Threonine
"W": 227.8, # Tryptophan
"Y": 193.6, # Tyrosine
"V": 140.0, # Valine
"X": 88.6, # Unknown, use Alanine as approximation
}

View File

@@ -21,7 +21,6 @@ from scipy.spatial import ConvexHull
from scipy.spatial.distance import pdist, squareform
from torch import Tensor
from evolutionaryscale import residue_constants
from esm.utils import residue_constants as RC
from esm.utils.constants import esm3 as C
from esm.utils.misc import slice_python_object_as_numpy
@@ -322,7 +321,7 @@ class ProteinChain:
sequence = [aa for aa, m in zip(self.sequence, mask) if m]
A, _ = self._mvee(points, tol=1e-3)
mvee_volume = (4 * np.pi) / (3 * np.sqrt(np.linalg.det(A)))
volume = sum(residue_constants.amino_acid_volumes[x] for x in sequence)
volume = sum(RC.amino_acid_volumes[x] for x in sequence)
ratio = volume / mvee_volume
# The paper says you must compare the ellipsoidal profile with T, a measurement of