mirror of
https://github.com/KosinskiLab/AlphaPulldown.git
synced 2026-06-04 14:14:24 +08:00
* Harden MMseqs species ID resolution fallback * Reorganize tests for CPU coverage CI * New * Fix function coverage checker def-line false positives * Expand unit coverage for helper and backend manager utilities * New. * New. * Expand unit coverage for template and post-processing helpers * Expand unit coverage for objects.py edge cases * Publish HTML coverage reports via GitHub Pages * Add CPU unit coverage for AlphaFold3 backend helpers * Reorganize tests and expand backend coverage * Reset shared test flags between cases * Expand AF3 prepare_input unit coverage * Cover AF3 and truemultimer feature creation * Test AF3 multimer MSA translation paths * Cover AF3 duplicate-residue multimer fallback * Cover AF2 resume and postprocess edge paths * Cover AF3 template mmCIF preparation * Test small script entry points * Expand workflow and ModelCIF test coverage * Add backend extras and install guide * Clarify AF3 backend installation path * Stabilize cluster GPU test runners * Document AF3 CMake SQLite hints * Simplify backend installation guide * Align AF3 install with working cluster env * Backfill typing dataclass_transform for AF2 * Pin TensorFlow for cluster installs * Fallback AF2 relax when CUDA OpenMM is unavailable * Raise AF3 default minimum bucket size * Simplify backend cluster installation guide * Fix AF3 wrapper JSON output isolation * Fix AF3 JSON wrapper outputs and MMseqs ID parsing * Fix CI entrypoint stub and Python 3.8 typing * Document release readiness test gates
161 lines
8.0 KiB
Python
161 lines
8.0 KiB
Python
import os
|
|
import logging
|
|
import subprocess
|
|
from absl.testing import parameterized
|
|
import shutil
|
|
import tempfile
|
|
from os.path import join
|
|
import zipfile
|
|
import json
|
|
import glob
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
"""
|
|
Test conversion of PDB to CIF for monomers and multimers
|
|
"""
|
|
|
|
pytest.importorskip("ihm")
|
|
pytest.importorskip("modelcif")
|
|
pytestmark = pytest.mark.external_tools
|
|
|
|
|
|
class TestConvertPDB2CIF(parameterized.TestCase):
|
|
def setUp(self) -> None:
|
|
super().setUp()
|
|
repo_root = Path(__file__).resolve().parents[2]
|
|
# Join the path with the script name
|
|
self.input_dir = join(repo_root, "test/test_data/predictions")
|
|
self.script_path = join(repo_root, "alphapulldown/scripts/convert_to_modelcif.py")
|
|
# Set logging level to INFO
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
|
@parameterized.named_parameters(
|
|
{'testcase_name': 'monomer_add_no_compress_model', 'input_dir': "TEST", 'add_associated': True,
|
|
'compress': False, 'model_selected': 0},
|
|
{'testcase_name': 'monomer_add_compress_model', 'input_dir': "TEST", 'add_associated': True, 'compress': True,
|
|
'model_selected': 0},
|
|
{'testcase_name': 'monomer_no_add_no_compress_model', 'input_dir': "TEST", 'add_associated': False,
|
|
'compress': False, 'model_selected': 0},
|
|
{'testcase_name': 'monomer_no_add_compress_model', 'input_dir': "TEST", 'add_associated': False,
|
|
'compress': True, 'model_selected': 0},
|
|
{'testcase_name': 'monomer_no_add_no_compress_no_model', 'input_dir': "TEST", 'add_associated': False,
|
|
'compress': False, 'model_selected': None},
|
|
{'testcase_name': 'monomer_no_add_compress_no_model', 'input_dir': "TEST", 'add_associated': False,
|
|
'compress': True, 'model_selected': None},
|
|
{'testcase_name': 'dimer_add_no_compress_model', 'input_dir': "TEST_and_TEST", 'add_associated': True,
|
|
'compress': False, 'model_selected': 0},
|
|
{'testcase_name': 'dimer_add_compress_model', 'input_dir': "TEST_and_TEST", 'add_associated': True,
|
|
'compress': True, 'model_selected': 0},
|
|
{'testcase_name': 'dimer_no_add_no_compress_model', 'input_dir': "TEST_and_TEST", 'add_associated': False,
|
|
'compress': False, 'model_selected': 0},
|
|
{'testcase_name': 'dimer_no_add_compress_model', 'input_dir': "TEST_and_TEST", 'add_associated': False,
|
|
'compress': True, 'model_selected': 0},
|
|
{'testcase_name': 'dimer_no_add_no_compress_no_model', 'input_dir': "TEST_and_TEST", 'add_associated': False,
|
|
'compress': False, 'model_selected': None},
|
|
{'testcase_name': 'dimer_no_add_compress_no_model', 'input_dir': "TEST_and_TEST", 'add_associated': False,
|
|
'compress': True, 'model_selected': None}
|
|
)
|
|
def test_(self, input_dir, add_associated, compress, model_selected):
|
|
"""Test conversion of PDB to CIF for monomers and multimers"""
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
test_output_dir = join(temp_dir, 'output')
|
|
shutil.copytree(join(self.input_dir, input_dir), test_output_dir)
|
|
logging.info(f"Converting {test_output_dir} to ModelCIF format...")
|
|
command = self.build_command(test_output_dir, add_associated, compress, model_selected)
|
|
logging.info(" ".join(command))
|
|
try:
|
|
result = subprocess.run(command,
|
|
check=True,
|
|
capture_output=True,
|
|
text=True)
|
|
if result.stderr:
|
|
logging.error(f"Conversion errors: {result.stderr}")
|
|
except subprocess.CalledProcessError as e:
|
|
logging.error(f"Subprocess failed with error: {e.stderr}")
|
|
raise
|
|
logging.info(os.listdir(test_output_dir))
|
|
|
|
expected_ids = [model_selected] if model_selected is not None else [i for i in range(5)]
|
|
for idx in expected_ids:
|
|
rnk = f"ranked_{idx}"
|
|
zip_dir = f"{rnk}.zip"
|
|
logging.info(f"Checking existence of file: {zip_dir}")
|
|
self.assertTrue(os.path.exists(join(test_output_dir, zip_dir)),
|
|
f"File {zip_dir} does not exist")
|
|
cif = f"ranked_{idx}.cif"
|
|
if compress:
|
|
cif = f"{cif}.gz"
|
|
logging.info(f"Checking existence of file: {cif}")
|
|
self.assertTrue(cif, f"File {cif} exists")
|
|
|
|
if add_associated:
|
|
associated_file = f"ranked_{idx}.zip"
|
|
logging.info(f"Unzipping: {associated_file}")
|
|
with zipfile.ZipFile(join(test_output_dir, associated_file), 'r') as zip_ref:
|
|
ass_dir = join(test_output_dir, "associated", f"ranked_{idx}")
|
|
zip_ref.extractall(ass_dir)
|
|
logging.info(os.listdir(ass_dir))
|
|
local_pairwise_file = f"ranked_{idx}_local_pairwise_qa.cif"
|
|
logging.info(f"Checking existence of extracted file: {local_pairwise_file}")
|
|
self.assertTrue(os.path.exists(join(ass_dir, local_pairwise_file)),
|
|
f"File {local_pairwise_file} does not exist")
|
|
expected_ids = list(set([0, 1, 2, 3, 4]) - set(expected_ids))
|
|
for idx in expected_ids:
|
|
ass_mdl_cif = join(ass_dir, f"ranked_{idx}.cif")
|
|
if compress:
|
|
ass_mdl_cif = f"{ass_mdl_cif}.gz"
|
|
logging.info(f"Checking existense of {ass_mdl_cif} in {ass_dir}")
|
|
self.assertTrue(os.path.exists(ass_mdl_cif))
|
|
ass_mdl_zip = join(ass_dir, f"ranked_{idx}.zip")
|
|
logging.info(f"Checking existense of {ass_mdl_zip} in {ass_dir}")
|
|
self.assertTrue(os.path.exists(ass_mdl_zip))
|
|
|
|
|
|
|
|
def build_command(self, output_dir, add_associated, compress, model_selected):
|
|
"""Build the command for subprocess"""
|
|
command = [
|
|
"python3", self.script_path,
|
|
"--ap_output", output_dir,
|
|
"--add_associated" if add_associated else "--noadd_associated",
|
|
"--compress" if compress else "--nocompress"
|
|
]
|
|
|
|
if model_selected is not None:
|
|
command.extend(["--model_selected", str(model_selected)])
|
|
|
|
return command
|
|
|
|
def test_missing_fasta_falls_back_to_structure_sequence(self):
|
|
"""If FASTA path in feature metadata is missing, parse sequence from structure."""
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
test_output_dir = join(temp_dir, "output")
|
|
shutil.copytree(join(self.input_dir, "TEST"), test_output_dir)
|
|
|
|
# Break the FASTA reference in feature metadata.
|
|
md_files = glob.glob(join(test_output_dir, "*_feature_metadata_*.json"))
|
|
self.assertTrue(md_files, "No feature metadata JSON found in test output dir")
|
|
for md_file in md_files:
|
|
with open(md_file, "r", encoding="ascii") as fh:
|
|
data = json.load(fh)
|
|
data["other"]["fasta_paths"] = "['/this/path/does/not/exist.fasta']"
|
|
with open(md_file, "w", encoding="ascii") as fh:
|
|
json.dump(data, fh, indent=2)
|
|
|
|
command = self.build_command(
|
|
test_output_dir, add_associated=False, compress=False, model_selected=0
|
|
)
|
|
subprocess.run(command, check=True, capture_output=True, text=True)
|
|
|
|
out_cif = join(test_output_dir, "ranked_0.cif")
|
|
self.assertTrue(os.path.exists(out_cif), "ModelCIF output was not created")
|
|
|
|
# Sequence should still be present in the output even without FASTA.
|
|
# The TEST sequence starts with "MESAIA..." in test FASTA and in the
|
|
# structure-derived sequence.
|
|
with open(out_cif, "r", encoding="ascii") as fh:
|
|
cif_txt = fh.read()
|
|
self.assertIn("MESAIA", cif_txt)
|