From 3fb1a6110de3eaf1a444f49d92b090ee75341e09 Mon Sep 17 00:00:00 2001 From: Neil Thomas Date: Fri, 4 Apr 2025 10:55:40 -0700 Subject: [PATCH] 3.2.0 - fix residue constants (#231) --- esm/utils/residue_constants.py | 26 ++++++++++++++++++++++++++ esm/utils/structure/protein_chain.py | 3 +-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/esm/utils/residue_constants.py b/esm/utils/residue_constants.py index 86ea82e..b1af539 100644 --- a/esm/utils/residue_constants.py +++ b/esm/utils/residue_constants.py @@ -79,3 +79,29 @@ restype_1to3 = { "Y": "TYR", "V": "VAL", } + +# Approximate Volumes of amino acids in cubic angstroms. +# https://www.imgt.org/IMGTeducation/Aide-memoire/_UK/aminoacids/abbreviation.html +amino_acid_volumes = { + "A": 88.6, # Alanine + "R": 173.4, # Arginine + "N": 114.1, # Asparagine + "D": 111.1, # Aspartic acid + "C": 108.5, # Cysteine + "Q": 143.8, # Glutamine + "E": 138.4, # Glutamic acid + "G": 60.1, # Glycine + "H": 153.2, # Histidine + "I": 166.7, # Isoleucine + "L": 166.7, # Leucine + "K": 168.6, # Lysine + "M": 162.9, # Methionine + "F": 189.9, # Phenylalanine + "P": 112.7, # Proline + "S": 89.0, # Serine + "T": 116.1, # Threonine + "W": 227.8, # Tryptophan + "Y": 193.6, # Tyrosine + "V": 140.0, # Valine + "X": 88.6, # Unknown, use Alanine as approximation +} diff --git a/esm/utils/structure/protein_chain.py b/esm/utils/structure/protein_chain.py index 8a0ddf0..0b198e2 100644 --- a/esm/utils/structure/protein_chain.py +++ b/esm/utils/structure/protein_chain.py @@ -21,7 +21,6 @@ from scipy.spatial import ConvexHull from scipy.spatial.distance import pdist, squareform from torch import Tensor -from evolutionaryscale import residue_constants from esm.utils import residue_constants as RC from esm.utils.constants import esm3 as C from esm.utils.misc import slice_python_object_as_numpy @@ -322,7 +321,7 @@ class ProteinChain: sequence = [aa for aa, m in zip(self.sequence, mask) if m] A, _ = self._mvee(points, tol=1e-3) mvee_volume = (4 * np.pi) / (3 * np.sqrt(np.linalg.det(A))) - volume = sum(residue_constants.amino_acid_volumes[x] for x in sequence) + volume = sum(RC.amino_acid_volumes[x] for x in sequence) ratio = volume / mvee_volume # The paper says you must compare the ellipsoidal profile with T, a measurement of