Files
AlphaPulldown/test/integration/test_modelcif.py
Dima fff63051b4 Tests (#600)
* Harden MMseqs species ID resolution fallback

* Reorganize tests for CPU coverage CI

* New

* Fix function coverage checker def-line false positives

* Expand unit coverage for helper and backend manager utilities

* New.

* New.

* Expand unit coverage for template and post-processing helpers

* Expand unit coverage for objects.py edge cases

* Publish HTML coverage reports via GitHub Pages

* Add CPU unit coverage for AlphaFold3 backend helpers

* Reorganize tests and expand backend coverage

* Reset shared test flags between cases

* Expand AF3 prepare_input unit coverage

* Cover AF3 and truemultimer feature creation

* Test AF3 multimer MSA translation paths

* Cover AF3 duplicate-residue multimer fallback

* Cover AF2 resume and postprocess edge paths

* Cover AF3 template mmCIF preparation

* Test small script entry points

* Expand workflow and ModelCIF test coverage

* Add backend extras and install guide

* Clarify AF3 backend installation path

* Stabilize cluster GPU test runners

* Document AF3 CMake SQLite hints

* Simplify backend installation guide

* Align AF3 install with working cluster env

* Backfill typing dataclass_transform for AF2

* Pin TensorFlow for cluster installs

* Fallback AF2 relax when CUDA OpenMM is unavailable

* Raise AF3 default minimum bucket size

* Simplify backend cluster installation guide

* Fix AF3 wrapper JSON output isolation

* Fix AF3 JSON wrapper outputs and MMseqs ID parsing

* Fix CI entrypoint stub and Python 3.8 typing

* Document release readiness test gates
2026-04-01 14:13:35 +02:00

161 lines
8.0 KiB
Python

import os
import logging
import subprocess
from absl.testing import parameterized
import shutil
import tempfile
from os.path import join
import zipfile
import json
import glob
from pathlib import Path
import pytest
"""
Test conversion of PDB to CIF for monomers and multimers
"""
pytest.importorskip("ihm")
pytest.importorskip("modelcif")
pytestmark = pytest.mark.external_tools
class TestConvertPDB2CIF(parameterized.TestCase):
def setUp(self) -> None:
super().setUp()
repo_root = Path(__file__).resolve().parents[2]
# Join the path with the script name
self.input_dir = join(repo_root, "test/test_data/predictions")
self.script_path = join(repo_root, "alphapulldown/scripts/convert_to_modelcif.py")
# Set logging level to INFO
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@parameterized.named_parameters(
{'testcase_name': 'monomer_add_no_compress_model', 'input_dir': "TEST", 'add_associated': True,
'compress': False, 'model_selected': 0},
{'testcase_name': 'monomer_add_compress_model', 'input_dir': "TEST", 'add_associated': True, 'compress': True,
'model_selected': 0},
{'testcase_name': 'monomer_no_add_no_compress_model', 'input_dir': "TEST", 'add_associated': False,
'compress': False, 'model_selected': 0},
{'testcase_name': 'monomer_no_add_compress_model', 'input_dir': "TEST", 'add_associated': False,
'compress': True, 'model_selected': 0},
{'testcase_name': 'monomer_no_add_no_compress_no_model', 'input_dir': "TEST", 'add_associated': False,
'compress': False, 'model_selected': None},
{'testcase_name': 'monomer_no_add_compress_no_model', 'input_dir': "TEST", 'add_associated': False,
'compress': True, 'model_selected': None},
{'testcase_name': 'dimer_add_no_compress_model', 'input_dir': "TEST_and_TEST", 'add_associated': True,
'compress': False, 'model_selected': 0},
{'testcase_name': 'dimer_add_compress_model', 'input_dir': "TEST_and_TEST", 'add_associated': True,
'compress': True, 'model_selected': 0},
{'testcase_name': 'dimer_no_add_no_compress_model', 'input_dir': "TEST_and_TEST", 'add_associated': False,
'compress': False, 'model_selected': 0},
{'testcase_name': 'dimer_no_add_compress_model', 'input_dir': "TEST_and_TEST", 'add_associated': False,
'compress': True, 'model_selected': 0},
{'testcase_name': 'dimer_no_add_no_compress_no_model', 'input_dir': "TEST_and_TEST", 'add_associated': False,
'compress': False, 'model_selected': None},
{'testcase_name': 'dimer_no_add_compress_no_model', 'input_dir': "TEST_and_TEST", 'add_associated': False,
'compress': True, 'model_selected': None}
)
def test_(self, input_dir, add_associated, compress, model_selected):
"""Test conversion of PDB to CIF for monomers and multimers"""
with tempfile.TemporaryDirectory() as temp_dir:
test_output_dir = join(temp_dir, 'output')
shutil.copytree(join(self.input_dir, input_dir), test_output_dir)
logging.info(f"Converting {test_output_dir} to ModelCIF format...")
command = self.build_command(test_output_dir, add_associated, compress, model_selected)
logging.info(" ".join(command))
try:
result = subprocess.run(command,
check=True,
capture_output=True,
text=True)
if result.stderr:
logging.error(f"Conversion errors: {result.stderr}")
except subprocess.CalledProcessError as e:
logging.error(f"Subprocess failed with error: {e.stderr}")
raise
logging.info(os.listdir(test_output_dir))
expected_ids = [model_selected] if model_selected is not None else [i for i in range(5)]
for idx in expected_ids:
rnk = f"ranked_{idx}"
zip_dir = f"{rnk}.zip"
logging.info(f"Checking existence of file: {zip_dir}")
self.assertTrue(os.path.exists(join(test_output_dir, zip_dir)),
f"File {zip_dir} does not exist")
cif = f"ranked_{idx}.cif"
if compress:
cif = f"{cif}.gz"
logging.info(f"Checking existence of file: {cif}")
self.assertTrue(cif, f"File {cif} exists")
if add_associated:
associated_file = f"ranked_{idx}.zip"
logging.info(f"Unzipping: {associated_file}")
with zipfile.ZipFile(join(test_output_dir, associated_file), 'r') as zip_ref:
ass_dir = join(test_output_dir, "associated", f"ranked_{idx}")
zip_ref.extractall(ass_dir)
logging.info(os.listdir(ass_dir))
local_pairwise_file = f"ranked_{idx}_local_pairwise_qa.cif"
logging.info(f"Checking existence of extracted file: {local_pairwise_file}")
self.assertTrue(os.path.exists(join(ass_dir, local_pairwise_file)),
f"File {local_pairwise_file} does not exist")
expected_ids = list(set([0, 1, 2, 3, 4]) - set(expected_ids))
for idx in expected_ids:
ass_mdl_cif = join(ass_dir, f"ranked_{idx}.cif")
if compress:
ass_mdl_cif = f"{ass_mdl_cif}.gz"
logging.info(f"Checking existense of {ass_mdl_cif} in {ass_dir}")
self.assertTrue(os.path.exists(ass_mdl_cif))
ass_mdl_zip = join(ass_dir, f"ranked_{idx}.zip")
logging.info(f"Checking existense of {ass_mdl_zip} in {ass_dir}")
self.assertTrue(os.path.exists(ass_mdl_zip))
def build_command(self, output_dir, add_associated, compress, model_selected):
"""Build the command for subprocess"""
command = [
"python3", self.script_path,
"--ap_output", output_dir,
"--add_associated" if add_associated else "--noadd_associated",
"--compress" if compress else "--nocompress"
]
if model_selected is not None:
command.extend(["--model_selected", str(model_selected)])
return command
def test_missing_fasta_falls_back_to_structure_sequence(self):
"""If FASTA path in feature metadata is missing, parse sequence from structure."""
with tempfile.TemporaryDirectory() as temp_dir:
test_output_dir = join(temp_dir, "output")
shutil.copytree(join(self.input_dir, "TEST"), test_output_dir)
# Break the FASTA reference in feature metadata.
md_files = glob.glob(join(test_output_dir, "*_feature_metadata_*.json"))
self.assertTrue(md_files, "No feature metadata JSON found in test output dir")
for md_file in md_files:
with open(md_file, "r", encoding="ascii") as fh:
data = json.load(fh)
data["other"]["fasta_paths"] = "['/this/path/does/not/exist.fasta']"
with open(md_file, "w", encoding="ascii") as fh:
json.dump(data, fh, indent=2)
command = self.build_command(
test_output_dir, add_associated=False, compress=False, model_selected=0
)
subprocess.run(command, check=True, capture_output=True, text=True)
out_cif = join(test_output_dir, "ranked_0.cif")
self.assertTrue(os.path.exists(out_cif), "ModelCIF output was not created")
# Sequence should still be present in the output even without FASTA.
# The TEST sequence starts with "MESAIA..." in test FASTA and in the
# structure-derived sequence.
with open(out_cif, "r", encoding="ascii") as fh:
cif_txt = fh.read()
self.assertIn("MESAIA", cif_txt)