mirror of
https://github.com/google-deepmind/alphafold.git
synced 2026-06-04 14:58:05 +08:00
Add pLDDT and PAE files saving to AF runner, update Colab.
PAE methods are migrated from Colab notebook_utils.py to AFOS and included in run_alphafold.py to save a new file. Colab notebook is updated accordingly. pLDDT data is transformed into JSON as saved as well, as an additional output file. PiperOrigin-RevId: 525687298 Change-Id: If7f0bcf7d3b39901dae58a67958eaa5687645de2
This commit is contained in:
committed by
Copybara-Service
parent
4d83e3fc08
commit
2819de4ddd
@@ -14,7 +14,9 @@
|
||||
|
||||
"""Functions for processing confidence metrics."""
|
||||
|
||||
import json
|
||||
from typing import Dict, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
import scipy.special
|
||||
|
||||
@@ -36,6 +38,43 @@ def compute_plddt(logits: np.ndarray) -> np.ndarray:
|
||||
return predicted_lddt_ca * 100
|
||||
|
||||
|
||||
def _confidence_category(score: float) -> str:
|
||||
"""Categorizes pLDDT into: disordered (D), low (L), medium (M), high (H)."""
|
||||
if 0 <= score < 50:
|
||||
return 'D'
|
||||
if 50 <= score < 70:
|
||||
return 'L'
|
||||
elif 70 <= score < 90:
|
||||
return 'M'
|
||||
elif 90 <= score <= 100:
|
||||
return 'H'
|
||||
else:
|
||||
raise ValueError(f'Invalid pLDDT score {score}')
|
||||
|
||||
|
||||
def confidence_json(plddt: np.ndarray) -> str:
|
||||
"""Returns JSON with confidence score and category for every residue.
|
||||
|
||||
Args:
|
||||
plddt: Per-residue confidence metric data.
|
||||
|
||||
Returns:
|
||||
String with a formatted JSON.
|
||||
|
||||
Raises:
|
||||
ValueError: If `plddt` has a rank different than 1.
|
||||
"""
|
||||
if plddt.ndim != 1:
|
||||
raise ValueError(f'The plddt array must be rank 1, got: {plddt.shape}.')
|
||||
|
||||
confidence = {
|
||||
'residueNumber': list(range(1, len(plddt) + 1)),
|
||||
'confidenceScore': [round(float(s), 2) for s in plddt],
|
||||
'confidenceCategory': [_confidence_category(s) for s in plddt],
|
||||
}
|
||||
return json.dumps(confidence, indent=None, separators=(',', ':'))
|
||||
|
||||
|
||||
def _calculate_bin_centers(breaks: np.ndarray):
|
||||
"""Gets the bin centers from the bin edges.
|
||||
|
||||
@@ -108,6 +147,32 @@ def compute_predicted_aligned_error(
|
||||
}
|
||||
|
||||
|
||||
def pae_json(pae: np.ndarray, max_pae: float) -> str:
|
||||
"""Returns the PAE in the same format as is used in the AFDB.
|
||||
|
||||
Note that the values are presented as floats to 1 decimal place, whereas AFDB
|
||||
returns integer values.
|
||||
|
||||
Args:
|
||||
pae: The n_res x n_res PAE array.
|
||||
max_pae: The maximum possible PAE value.
|
||||
|
||||
Returns:
|
||||
PAE output format as a JSON string.
|
||||
"""
|
||||
# Check the PAE array is the correct shape.
|
||||
if pae.ndim != 2 or pae.shape[0] != pae.shape[1]:
|
||||
raise ValueError(f'PAE must be a square matrix, got {pae.shape}')
|
||||
|
||||
# Round the predicted aligned errors to 1 decimal place.
|
||||
rounded_errors = np.round(pae.astype(np.float64), decimals=1)
|
||||
formatted_output = [{
|
||||
'predicted_aligned_error': rounded_errors.tolist(),
|
||||
'max_predicted_aligned_error': max_pae,
|
||||
}]
|
||||
return json.dumps(formatted_output, indent=None, separators=(',', ':'))
|
||||
|
||||
|
||||
def predicted_tm_score(
|
||||
logits: np.ndarray,
|
||||
breaks: np.ndarray,
|
||||
|
||||
48
alphafold/common/confidence_test.py
Normal file
48
alphafold/common/confidence_test.py
Normal file
@@ -0,0 +1,48 @@
|
||||
# Copyright 2023 DeepMind Technologies Limited
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Test confidence metrics."""
|
||||
|
||||
|
||||
from absl.testing import absltest
|
||||
from alphafold.common import confidence
|
||||
import numpy as np
|
||||
|
||||
|
||||
class ConfidenceTest(absltest.TestCase):
|
||||
|
||||
def test_pae_json(self):
|
||||
pae = np.array([[0.01, 13.12345], [20.0987, 0.0]])
|
||||
pae_json = confidence.pae_json(pae=pae, max_pae=31.75)
|
||||
self.assertEqual(
|
||||
pae_json, '[{"predicted_aligned_error":[[0.0,13.1],[20.1,0.0]],'
|
||||
'"max_predicted_aligned_error":31.75}]')
|
||||
|
||||
def test_confidence_json(self):
|
||||
plddt = np.array([42, 42.42])
|
||||
|
||||
confidence_json = confidence.confidence_json(plddt=plddt)
|
||||
|
||||
print(confidence_json)
|
||||
|
||||
self.assertEqual(
|
||||
confidence_json,
|
||||
('{"residueNumber":[1,2],'
|
||||
'"confidenceScore":[42.0,42.42],'
|
||||
'"confidenceCategory":["D","D"]}'),
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
||||
@@ -13,7 +13,6 @@
|
||||
# limitations under the License.
|
||||
|
||||
"""Helper methods for the AlphaFold Colab notebook."""
|
||||
import json
|
||||
from typing import AbstractSet, Any, Mapping, Optional, Sequence
|
||||
|
||||
from alphafold.common import residue_constants
|
||||
@@ -143,31 +142,6 @@ def empty_placeholder_template_features(
|
||||
}
|
||||
|
||||
|
||||
def get_pae_json(pae: np.ndarray, max_pae: float) -> str:
|
||||
"""Returns the PAE in the same format as is used in the AFDB.
|
||||
|
||||
Note that the values are presented as floats to 1 decimal place,
|
||||
whereas AFDB returns integer values.
|
||||
|
||||
Args:
|
||||
pae: The n_res x n_res PAE array.
|
||||
max_pae: The maximum possible PAE value.
|
||||
Returns:
|
||||
PAE output format as a JSON string.
|
||||
"""
|
||||
# Check the PAE array is the correct shape.
|
||||
if (pae.ndim != 2 or pae.shape[0] != pae.shape[1]):
|
||||
raise ValueError(f'PAE must be a square matrix, got {pae.shape}')
|
||||
|
||||
# Round the predicted aligned errors to 1 decimal place.
|
||||
rounded_errors = np.round(pae.astype(np.float64), decimals=1)
|
||||
formatted_output = [{
|
||||
'predicted_aligned_error': rounded_errors.tolist(),
|
||||
'max_predicted_aligned_error': max_pae
|
||||
}]
|
||||
return json.dumps(formatted_output, indent=None, separators=(',', ':'))
|
||||
|
||||
|
||||
def check_cell_execution_order(
|
||||
cells_ran: AbstractSet[int], cell_number: int) -> None:
|
||||
"""Check that the cell execution order is correct.
|
||||
|
||||
@@ -184,13 +184,6 @@ class NotebookUtilsTest(parameterized.TestCase):
|
||||
[np.array([], dtype=templates.TEMPLATE_FEATURES[feat_name]).dtype
|
||||
for feat_name in template_features])
|
||||
|
||||
def test_get_pae_json(self):
|
||||
pae = np.array([[0.01, 13.12345], [20.0987, 0.0]])
|
||||
pae_json = notebook_utils.get_pae_json(pae=pae, max_pae=31.75)
|
||||
self.assertEqual(
|
||||
pae_json, '[{"predicted_aligned_error":[[0.0,13.1],[20.1,0.0]],'
|
||||
'"max_predicted_aligned_error":31.75}]')
|
||||
|
||||
def test_check_cell_execution_order_correct(self):
|
||||
notebook_utils.check_cell_execution_order({1, 2}, 3)
|
||||
|
||||
|
||||
@@ -374,6 +374,7 @@
|
||||
"from alphafold.data import pipeline_multimer\n",
|
||||
"from alphafold.data.tools import jackhmmer\n",
|
||||
"\n",
|
||||
"from alphafold.common import confidence\n",
|
||||
"from alphafold.common import protein\n",
|
||||
"\n",
|
||||
"from alphafold.relax import relax\n",
|
||||
@@ -786,7 +787,7 @@
|
||||
"pae_output_path = os.path.join(output_dir, 'predicted_aligned_error.json')\n",
|
||||
"if pae_outputs:\n",
|
||||
" # Save predicted aligned error in the same format as the AF EMBL DB.\n",
|
||||
" pae_data = notebook_utils.get_pae_json(pae=pae, max_pae=max_pae.item())\n",
|
||||
" pae_data = confidence.get_pae_json(pae=pae, max_pae=max_pae.item())\n",
|
||||
" with open(pae_output_path, 'w') as f:\n",
|
||||
" f.write(pae_data)\n",
|
||||
"\n",
|
||||
|
||||
@@ -22,11 +22,12 @@ import random
|
||||
import shutil
|
||||
import sys
|
||||
import time
|
||||
from typing import Any, Dict, Mapping, Union
|
||||
from typing import Any, Dict, Union
|
||||
|
||||
from absl import app
|
||||
from absl import flags
|
||||
from absl import logging
|
||||
from alphafold.common import confidence
|
||||
from alphafold.common import protein
|
||||
from alphafold.common import residue_constants
|
||||
from alphafold.data import pipeline
|
||||
@@ -171,6 +172,38 @@ def _jnp_to_np(output: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return output
|
||||
|
||||
|
||||
def _save_confidence_json_file(
|
||||
plddt: np.ndarray, output_dir: str, model_name: str
|
||||
) -> None:
|
||||
confidence_json = confidence.confidence_json(plddt)
|
||||
|
||||
# Save the confidence json.
|
||||
confidence_json_output_path = os.path.join(
|
||||
output_dir, f'confidence_{model_name}.json'
|
||||
)
|
||||
with open(confidence_json_output_path, 'w') as f:
|
||||
f.write(confidence_json)
|
||||
|
||||
|
||||
def _save_pae_json_file(
|
||||
pae: np.ndarray, max_pae: float, output_dir: str, model_name: str
|
||||
) -> None:
|
||||
"""Check prediction result for PAE data and save to a JSON file if present.
|
||||
|
||||
Args:
|
||||
pae: The n_res x n_res PAE array.
|
||||
max_pae: The maximum possible PAE value.
|
||||
output_dir: Directory to which files are saved.
|
||||
model_name: Name of a model.
|
||||
"""
|
||||
pae_json = confidence.pae_json(pae, max_pae)
|
||||
|
||||
# Save the PAE json.
|
||||
pae_json_output_path = os.path.join(output_dir, f'pae_{model_name}.json')
|
||||
with open(pae_json_output_path, 'w') as f:
|
||||
f.write(pae_json)
|
||||
|
||||
|
||||
def predict_structure(
|
||||
fasta_path: str,
|
||||
fasta_name: str,
|
||||
@@ -240,8 +273,17 @@ def predict_structure(
|
||||
model_name, fasta_name, t_diff)
|
||||
|
||||
plddt = prediction_result['plddt']
|
||||
_save_confidence_json_file(plddt, output_dir, model_name)
|
||||
ranking_confidences[model_name] = prediction_result['ranking_confidence']
|
||||
|
||||
if (
|
||||
'predicted_aligned_error' in prediction_result
|
||||
and 'max_predicted_aligned_error' in prediction_result
|
||||
):
|
||||
pae = prediction_result['predicted_aligned_error']
|
||||
max_pae = prediction_result['max_predicted_aligned_error']
|
||||
_save_pae_json_file(pae, float(max_pae), output_dir, model_name)
|
||||
|
||||
# Remove jax dependency from results.
|
||||
np_prediction_result = _jnp_to_np(dict(prediction_result))
|
||||
|
||||
|
||||
@@ -84,8 +84,15 @@ class RunAlphafoldTest(parameterized.TestCase):
|
||||
|
||||
target_output_files = os.listdir(os.path.join(out_dir, 'test'))
|
||||
expected_files = [
|
||||
'features.pkl', 'msas', 'ranked_0.pdb', 'ranking_debug.json',
|
||||
'result_model1.pkl', 'timings.json', 'unrelaxed_model1.pdb',
|
||||
'features.pkl',
|
||||
'msas',
|
||||
'ranked_0.pdb',
|
||||
'ranking_debug.json',
|
||||
'result_model1.pkl',
|
||||
'timings.json',
|
||||
'unrelaxed_model1.pdb',
|
||||
'pae_model1.json',
|
||||
'confidence_model1.json',
|
||||
]
|
||||
if models_to_relax == run_alphafold.ModelsToRelax.ALL:
|
||||
expected_files.extend(['relaxed_model1.pdb', 'relax_metrics.json'])
|
||||
|
||||
Reference in New Issue
Block a user