Reorganize tests for CPU coverage CI

This commit is contained in:
Dima
2026-03-27 15:00:46 +01:00
parent f6af91a8ff
commit 7fd121f842
24 changed files with 463 additions and 291 deletions

19
.coveragerc Normal file
View File

@@ -0,0 +1,19 @@
[run]
branch = True
source =
alphapulldown
omit =
alphapulldown/__init__.py
alphapulldown/analysis_pipeline/af2plots/*
[report]
skip_empty = True
show_missing = True
precision = 1
exclude_lines =
pragma: no cover
if __name__ == .__main__.:
raise NotImplementedError
[html]
directory = htmlcov

View File

@@ -10,13 +10,12 @@ on:
workflow_dispatch:
jobs:
build-and-test:
smoke-tests:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11"]
install-type: ["user", "developer"]
defaults:
run:
@@ -35,25 +34,12 @@ jobs:
auto-update-conda: true
use-mamba: true
- if: matrix.install-type == 'user'
run: |
pip install alphapulldown
#pip install -U "jax[cuda12]"==0.5.3
- if: matrix.install-type == 'developer'
run: |
pip install .
#pip install -U "jax[cuda12]"==0.5.3
# export PYTHONPATH=$PWD/AlphaLink2:$PYTHONPATH
# install dependencies for AlphaLink backend
# pip install torch==1.13.0+cu117 --extra-index-url https://download.pytorch.org/whl/cu117
# pip install setuptools==69.5.1 # Downgrade setuptools to avoid crashes when installing unicore
# git clone https://github.com/dptech-corp/Uni-Core.git
# cd Uni-Core
# python setup.py install --disable-cuda-ext
# cd ..
- run: |
pytest test/
pip install -e .[test]
- run: |
pytest -n auto --dist loadfile test/unit
pytest test/integration
#export PYTHONPATH=$PWD/alphapulldown/analysis_pipeline:$PYTHONPATH
## Test analysis pipeline
#conda install -c bioconda biopandas
@@ -62,6 +48,52 @@ jobs:
#pytest -s test/test_pdb_analyser.py
#pytest -s test/test_get_good_inter_pae.py
coverage:
runs-on: ubuntu-latest
defaults:
run:
shell: bash -el {0}
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- uses: conda-incubator/setup-miniconda@v3
with:
environment-file: environment.yml
activate-environment: AlphaPulldown
python-version: "3.11"
auto-update-conda: true
use-mamba: true
- run: |
pip install -e .[test]
- run: |
pytest -n auto --dist loadfile test/unit \
--cov=alphapulldown \
--cov-config=.coveragerc \
--cov-report=
pytest test/integration \
--cov=alphapulldown \
--cov-config=.coveragerc \
--cov-append \
--cov-report=
coverage report --skip-covered --show-missing
coverage xml -o coverage.xml
coverage json -o coverage.json
coverage html -d htmlcov
python test/tools/check_function_coverage.py --report-only coverage.json
coverage report --fail-under=25 --skip-covered --show-missing
- uses: actions/upload-artifact@v4
with:
name: coverage-reports
path: |
coverage.json
coverage.xml
htmlcov
build-alphafold2-container:
runs-on: ubuntu-latest
steps:
@@ -157,4 +189,3 @@ jobs:
push: true
tags: ${{ secrets.DOCKER_USERNAME }}/alphafold3:${{ github.event.release.tag_name }}
ssh: default

4
.gitignore vendored
View File

@@ -5,6 +5,10 @@ __pycache__*
.ipynb_checkpoints*
.vscode*
.coverage*
.pytest_cache/
coverage.json
coverage.xml
htmlcov/
test/test_data/predictions/af*
build/
*.egg-info/

View File

@@ -258,6 +258,59 @@ script manually). Commonly used flags:
- `--skip_existing` leave existing feature files untouched (safe for reruns).
- `--seq_index N` only process the Nth sequence from the FASTA list.
- `--use_hhsearch`, `--re_search_templates_mmseqs2` toggle template search implementations.
## Testing
Install the repo with test dependencies:
```bash
pip install -e .[test]
```
The active CPU-first pytest layout is:
- `test/unit` for pure helpers and small mocked components
- `test/integration` for CPU-only filesystem and CLI wiring tests
- `test/functional` for heavier deterministic package-level tests
- `test/cluster` for Slurm/GPU smoke utilities that are not part of default CI
Recommended local commands:
```bash
# fast parallel unit tests
pytest -n auto --dist loadfile test/unit
# CPU CI-equivalent run
pytest -n auto --dist loadfile test/unit
pytest test/integration
# coverage run with pytest-cov
pytest -n auto --dist loadfile test/unit \
--cov=alphapulldown \
--cov-config=.coveragerc \
--cov-report=
pytest test/integration \
--cov=alphapulldown \
--cov-config=.coveragerc \
--cov-append \
--cov-report=
coverage report --skip-covered --show-missing
coverage xml -o coverage.xml
coverage json -o coverage.json
coverage html -d htmlcov
python test/tools/check_function_coverage.py --report-only coverage.json
```
The AST-based function coverage helper is currently report-first: it lists uncovered
`alphapulldown/` functions without failing. The CI coverage job still enforces a
package-only line-coverage floor via `pytest-cov`.
Cluster/GPU smoke wrappers live under `test/cluster/`, for example:
```bash
python test/cluster/run_alphafold2_predictions.py
python test/cluster/run_alphafold3_predictions.py
```
- `--path_to_mmt`, `--description_file`, `--multiple_mmts` enable TrueMultimer CSV-driven feature sets.
- `--max_template_date YYYY-MM-DD` required cutoff for template structures; keeps runs reproducible.

View File

@@ -1,6 +1,108 @@
import sys
import types
from pathlib import Path
import numpy as np
import pytest
REPO_ROOT = Path(__file__).resolve().parent
TEST_IMPORT_PATHS = (
REPO_ROOT,
REPO_ROOT / "ColabFold",
REPO_ROOT / "alphafold",
REPO_ROOT / "alphafold3" / "src",
REPO_ROOT / "AlphaLink2",
)
for import_path in TEST_IMPORT_PATHS:
import_path_str = str(import_path)
if import_path.exists() and import_path_str not in sys.path:
sys.path.insert(0, import_path_str)
def _install_jax_tree_stub() -> None:
try:
import jax # noqa: F401
return
except Exception:
for module_name in list(sys.modules):
if module_name == "jax" or module_name.startswith("jax."):
sys.modules.pop(module_name, None)
import tree as dm_tree
jax_stub = types.ModuleType("jax")
jax_numpy_stub = types.ModuleType("jax.numpy")
jax_nn_stub = types.ModuleType("jax.nn")
jax_lax_stub = types.ModuleType("jax.lax")
tree_stub = types.ModuleType("jax.tree")
tree_util_stub = types.ModuleType("jax.tree_util")
version_stub = types.ModuleType("jax.version")
version_stub.__version__ = "0.0-test"
def _tree_map(func, *structures):
return dm_tree.map_structure(func, *structures)
def _tree_flatten(structure):
return dm_tree.flatten(structure), structure
def _tree_unflatten(treedef, leaves):
return dm_tree.unflatten_as(treedef, leaves)
def _tree_leaves(structure):
return dm_tree.flatten(structure)
def _register_pytree_node(*args, **kwargs):
return None
def _register_pytree_node_class(cls):
return cls
tree_stub.map = _tree_map
tree_stub.flatten = _tree_leaves
tree_stub.unflatten = _tree_unflatten
tree_util_stub.tree_map = _tree_map
tree_util_stub.tree_flatten = _tree_flatten
tree_util_stub.tree_unflatten = _tree_unflatten
tree_util_stub.tree_leaves = _tree_leaves
tree_util_stub.tree_structure = lambda structure: structure
tree_util_stub.register_pytree_node = _register_pytree_node
tree_util_stub.register_pytree_node_class = _register_pytree_node_class
jax_numpy_stub.__dict__.update(np.__dict__)
jax_nn_stub.softmax = lambda x, axis=-1: np.exp(x - np.max(x, axis=axis, keepdims=True)) / np.sum(
np.exp(x - np.max(x, axis=axis, keepdims=True)),
axis=axis,
keepdims=True,
)
jax_lax_stub.stop_gradient = lambda x: x
jax_stub.tree = tree_stub
jax_stub.tree_map = _tree_map
jax_stub.tree_util = tree_util_stub
jax_stub.numpy = jax_numpy_stub
jax_stub.nn = jax_nn_stub
jax_stub.lax = jax_lax_stub
jax_stub.Array = np.ndarray
jax_stub.local_devices = lambda: [types.SimpleNamespace(platform="cpu")]
jax_stub.devices = lambda *_args, **_kwargs: [types.SimpleNamespace(platform="cpu")]
jax_stub.default_backend = lambda: "cpu"
jax_stub.version = version_stub
sys.modules["jax"] = jax_stub
sys.modules["jax.numpy"] = jax_numpy_stub
sys.modules["jax.nn"] = jax_nn_stub
sys.modules["jax.lax"] = jax_lax_stub
sys.modules["jax.tree"] = tree_stub
sys.modules["jax.tree_util"] = tree_util_stub
sys.modules["jax.version"] = version_stub
_install_jax_tree_stub()
def pytest_addoption(parser):
parser.addoption(
"--use-temp-dir",
@@ -10,6 +112,20 @@ def pytest_addoption(parser):
)
def pytest_collection_modifyitems(config, items):
for item in items:
path = Path(str(item.fspath))
parts = set(path.parts)
if "unit" in parts:
item.add_marker(pytest.mark.unit)
if "integration" in parts:
item.add_marker(pytest.mark.integration)
if "functional" in parts:
item.add_marker(pytest.mark.functional)
if "cluster" in parts:
item.add_marker(pytest.mark.cluster)
@pytest.hookimpl(tryfirst=True)
def pytest_itemcollected(item):
try:

View File

@@ -29,11 +29,9 @@ dependencies = [
"tensorflow-cpu>=2.16.1",
"importlib-resources>=6.1.0",
"importlib-metadata>=4.8.2,<5.0.0",
"biopython>=1.82",
"biopython>=1.81,<1.82",
"nbformat>=5.9.2",
"py3Dmol==2.0.4",
"pytest>=6.0",
"parameterized",
"ipython==8.16.1",
"tqdm>=4.66.1",
"appdirs>=1.4.4",
@@ -45,6 +43,15 @@ dependencies = [
"typing-extensions==4.14.0",
]
[project.optional-dependencies]
test = [
"parameterized",
"pytest>=8.0",
"pytest-cov>=5.0",
"pytest-timeout>=2.3",
"pytest-xdist>=3.6",
]
[project.urls]
Homepage = "https://github.com/KosinskiLab/AlphaPulldown"

View File

@@ -3,10 +3,26 @@ log_cli = true
log_level = INFO
log_format = %(asctime)s [%(levelname)8s] %(name)s: %(message)s
log_date_format = %Y-%m-%d %H:%M:%S
testpaths =
test/unit
test/integration
test/functional
python_files = test_*.py
norecursedirs =
test/alphalink test/outdated
test/outdated
alphafold
alphafold3
ColabFold
AlphaLink2
addopts = -ra
build
__pycache__
markers =
unit: fast unit tests for pure helpers and small mocked components.
integration: CPU-only integration tests for filesystem, CLI, and module wiring.
functional: deterministic package-level tests that are heavier than unit/integration.
cluster: tests and utilities intended for Slurm or cluster execution.
gpu: tests that require a GPU.
slow: tests that are intentionally slow.
external_tools: tests that rely on optional external tools or heavyweight dependencies.
network: tests that require network access.
addopts = -ra --strict-markers -m "not cluster and not gpu and not network and not external_tools"

View File

@@ -4,6 +4,7 @@ from __future__ import annotations
import pickle
import numpy as np
from pathlib import Path
import pytest
from absl import logging
from absl.testing import absltest
@@ -15,10 +16,13 @@ from alphafold.common import residue_constants
from alphapulldown.utils.msa_encoding import a3m_to_ids, ids_to_a3m
pytestmark = [pytest.mark.functional, pytest.mark.external_tools]
class TestAF3WithAF2Features(absltest.TestCase):
def test_msa_identity_after_slicing_and_runtime_dump(self):
# Load monomeric pickles
repo_root = Path(__file__).resolve().parents[1]
repo_root = Path(__file__).resolve().parents[2]
features_dir = repo_root / 'test' / 'test_data' / 'features'
pkl_a = features_dir / '3L4Q_A.3L4Q.cif.A.pkl'
pkl_c = features_dir / '3L4Q_C.3L4Q.pdb.C.pkl'
@@ -77,4 +81,3 @@ class TestAF3WithAF2Features(absltest.TestCase):
if __name__ == '__main__':
absltest.main()

View File

@@ -27,6 +27,8 @@ from alphapulldown_input_parser import generate_fold_specifications
# --------------------------------------------------------------------------- #
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
REPO_ROOT = Path(__file__).resolve().parents[2]
TEST_ROOT = REPO_ROOT / "test"
DATA_DIR = Path(os.getenv("ALPHAFOLD_DATA_DIR", "/scratch/AlphaFold_DBs/2.3.0"))
os.environ["JAX_COMPILATION_CACHE_DIR"] = "/scratch/dima/jax_cache"
@@ -139,8 +141,7 @@ class _TestBase(parameterized.TestCase):
def setUp(self):
super().setUp()
this_dir = Path(__file__).resolve().parent
self.test_data_dir = this_dir / "test_data"
self.test_data_dir = TEST_ROOT / "test_data"
self.test_features_dir = self.test_data_dir / "features"
self.test_protein_lists_dir = self.test_data_dir / "protein_lists"
self.test_modelling_dir = self.test_data_dir / "predictions"

View File

@@ -45,6 +45,8 @@ DATA_DIR = os.getenv(
)
if not os.path.exists(DATA_DIR):
absltest.skip("set $ALPHAFOLD_DATA_DIR to run Alphafold functional tests")
REPO_ROOT = Path(__file__).resolve().parents[2]
TEST_ROOT = REPO_ROOT / "test"
def _has_nvidia_gpu() -> bool:
@@ -196,13 +198,12 @@ class _TestBase(parameterized.TestCase):
super().setUp()
# directories inside the repo (relative to this file)
this_dir = Path(__file__).resolve().parent
self.test_data_dir = this_dir / "test_data"
self.test_data_dir = TEST_ROOT / "test_data"
self.test_fastas_dir = self.test_data_dir / "fastas"
self.test_features_dir = this_dir / "test_data" / "features"
self.test_protein_lists_dir = this_dir / "test_data" / "protein_lists"
self.test_templates_dir = this_dir / "test_data" / "templates"
self.test_modelling_dir = this_dir / "test_data" / "predictions"
self.test_features_dir = self.test_data_dir / "features"
self.test_protein_lists_dir = self.test_data_dir / "protein_lists"
self.test_templates_dir = self.test_data_dir / "templates"
self.test_modelling_dir = self.test_data_dir / "predictions"
# Create a unique output directory for this test
test_name = self._testMethodName

View File

@@ -47,6 +47,8 @@ ALPHALINK_WEIGHTS_DIR = os.getenv(
ALPHALINK_WEIGHTS_FILE = os.path.join(ALPHALINK_WEIGHTS_DIR, "AlphaLink-Multimer_SDA_v3.pt")
if not os.path.exists(ALPHALINK_WEIGHTS_FILE):
absltest.skip("set $ALPHALINK_WEIGHTS_DIR to run AlphaLink functional tests")
REPO_ROOT = Path(__file__).resolve().parents[2]
TEST_ROOT = REPO_ROOT / "test"
# --------------------------------------------------------------------------- #
@@ -75,13 +77,12 @@ class _TestBase(parameterized.TestCase):
super().setUp()
# directories inside the repo (relative to this file)
this_dir = Path(__file__).resolve().parent
self.test_data_dir = this_dir / "test_data"
self.test_data_dir = TEST_ROOT / "test_data"
self.test_fastas_dir = self.test_data_dir / "fastas"
self.test_features_dir = this_dir / "test_data" / "features"
self.test_protein_lists_dir = this_dir / "test_data" / "protein_lists"
self.test_templates_dir = this_dir / "test_data" / "templates"
self.test_crosslinks_dir = this_dir / "alphalink"
self.test_features_dir = self.test_data_dir / "features"
self.test_protein_lists_dir = self.test_data_dir / "protein_lists"
self.test_templates_dir = self.test_data_dir / "templates"
self.test_crosslinks_dir = TEST_ROOT / "alphalink"
# Create a unique output directory for this test
test_name = self._testMethodName
@@ -825,4 +826,4 @@ def _parse_test_args():
_TestBase.use_temp_dir, _TestBase.save_predictions = _parse_test_args()
if __name__ == "__main__":
absltest.main()
absltest.main()

View File

@@ -1,16 +1,16 @@
#!/usr/bin/env python3
"""Submit AlphaFold2 functional tests to Slurm and summarize results.
This is a standalone wrapper for `test/check_alphafold2_predictions.py`.
This is a standalone wrapper for `test/cluster/check_alphafold2_predictions.py`.
It is intentionally not a pytest test module, despite the filename.
Typical usage from a login node:
python test/test_alphafold2_predictions.py
python test/cluster/run_alphafold2_predictions.py
Run only selected tests:
python test/test_alphafold2_predictions.py -k dimer
python test/cluster/run_alphafold2_predictions.py -k dimer
"""
from __future__ import annotations
@@ -35,8 +35,8 @@ from typing import Iterable
from _pytest.mark.expression import Expression
REPO_ROOT = Path(__file__).resolve().parents[1]
DEFAULT_TEST_FILE = REPO_ROOT / "test" / "check_alphafold2_predictions.py"
REPO_ROOT = Path(__file__).resolve().parents[2]
DEFAULT_TEST_FILE = REPO_ROOT / "test" / "cluster" / "check_alphafold2_predictions.py"
DEFAULT_LOG_ROOT = REPO_ROOT / "test_logs"
PASS_STATES = {"COMPLETED"}
@@ -172,6 +172,8 @@ def collect_nodeids(
python_executable,
"-m",
"pytest",
"-o",
"addopts=-ra --strict-markers",
"--collect-only",
"-q",
str(test_file),
@@ -212,6 +214,8 @@ def write_job_script(
python_executable,
"-m",
"pytest",
"-o",
"addopts=-ra --strict-markers",
"-vv",
"-s",
job.nodeid,
@@ -498,7 +502,7 @@ def parse_args() -> argparse.Namespace:
parser.add_argument(
"--test-file",
default=str(DEFAULT_TEST_FILE),
help="Pytest file to collect from. Defaults to test/check_alphafold2_predictions.py",
help="Pytest file to collect from. Defaults to test/cluster/check_alphafold2_predictions.py",
)
parser.add_argument(
"-k",

View File

@@ -1,20 +1,20 @@
#!/usr/bin/env python3
"""Submit AlphaFold3 functional tests to Slurm and summarize results.
This is a standalone wrapper for `test/check_alphafold3_predictions.py`.
This is a standalone wrapper for `test/cluster/check_alphafold3_predictions.py`.
It is intentionally not a pytest test module, despite the filename.
Typical usage from a login node:
python test/test_alphafold3_predictions.py
python test/cluster/run_alphafold3_predictions.py
Run only selected tests:
python test/test_alphafold3_predictions.py -k chopped
python test/cluster/run_alphafold3_predictions.py -k chopped
Enable the runtime benchmark test as well:
python test/test_alphafold3_predictions.py --include-perf
python test/cluster/run_alphafold3_predictions.py --include-perf
"""
from __future__ import annotations
@@ -40,8 +40,8 @@ from typing import Iterable
from _pytest.mark.expression import Expression
REPO_ROOT = Path(__file__).resolve().parents[1]
DEFAULT_TEST_FILE = REPO_ROOT / "test" / "check_alphafold3_predictions.py"
REPO_ROOT = Path(__file__).resolve().parents[2]
DEFAULT_TEST_FILE = REPO_ROOT / "test" / "cluster" / "check_alphafold3_predictions.py"
DEFAULT_LOG_ROOT = REPO_ROOT / "test_logs"
PASS_STATES = {"COMPLETED"}
@@ -177,6 +177,8 @@ def collect_nodeids(
python_executable,
"-m",
"pytest",
"-o",
"addopts=-ra --strict-markers",
"--collect-only",
"-q",
str(test_file),
@@ -218,6 +220,8 @@ def write_job_script(
python_executable,
"-m",
"pytest",
"-o",
"addopts=-ra --strict-markers",
"-vv",
"-s",
job.nodeid,
@@ -510,7 +514,7 @@ def parse_args() -> argparse.Namespace:
parser.add_argument(
"--test-file",
default=str(DEFAULT_TEST_FILE),
help="Pytest file to collect from. Defaults to test/check_alphafold3_predictions.py",
help="Pytest file to collect from. Defaults to test/cluster/check_alphafold3_predictions.py",
)
parser.add_argument(
"-k",

View File

@@ -21,7 +21,7 @@ class TestCreateIndividualFeaturesWithTemplates(parameterized.TestCase):
self.temp_dir = tempfile.TemporaryDirectory()
self.TEST_DATA_DIR = Path(self.temp_dir.name)
# copy test data into temp area
original_test_data_dir = Path(__file__).parent / "test_data"
original_test_data_dir = Path(__file__).resolve().parents[2] / "test" / "test_data"
shutil.copytree(original_test_data_dir, self.TEST_DATA_DIR, dirs_exist_ok=True)
# ensure required dirs exist
(self.TEST_DATA_DIR / 'features').mkdir(parents=True, exist_ok=True)

View File

@@ -0,0 +1,2 @@
Functional tests live here when they are deterministic, CPU-safe, and heavier than the
unit/integration layers. GPU, Slurm, or external-tool smoke tests belong under `test/cluster/`.

View File

@@ -4,24 +4,28 @@ import subprocess
from absl.testing import parameterized
import shutil
import tempfile
from os.path import join, dirname, abspath
from os.path import join
import zipfile
import json
import glob
from pathlib import Path
import pytest
"""
Test conversion of PDB to CIF for monomers and multimers
"""
pytestmark = pytest.mark.external_tools
class TestConvertPDB2CIF(parameterized.TestCase):
def setUp(self) -> None:
super().setUp()
# Get path of the alphapulldown module
parent_dir = join(dirname(dirname(abspath(__file__))))
repo_root = Path(__file__).resolve().parents[2]
# Join the path with the script name
self.input_dir = join(parent_dir, "test/test_data/predictions")
self.script_path = join(parent_dir, "alphapulldown/scripts/convert_to_modelcif.py")
self.input_dir = join(repo_root, "test/test_data/predictions")
self.script_path = join(repo_root, "alphapulldown/scripts/convert_to_modelcif.py")
# Set logging level to INFO
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

View File

@@ -3,17 +3,18 @@ import logging
from absl.testing import parameterized
import shutil
import tempfile
from os.path import join, dirname, abspath
from os.path import join
import gzip
import json
import pickle
from pathlib import Path
from alphapulldown.utils.post_modelling import post_prediction_process
class TestPostPrediction(parameterized.TestCase):
def setUp(self) -> None:
super().setUp()
parent_dir = join(dirname(dirname(abspath(__file__))))
self.input_dir = join(parent_dir, "test/test_data/predictions")
repo_root = Path(__file__).resolve().parents[2]
self.input_dir = join(repo_root, "test/test_data/predictions")
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@parameterized.parameters(

View File

@@ -1,226 +0,0 @@
import numpy as np
from alphafold.data import msa_pairing
from alphafold.data import parsers
from alphafold.data import pipeline
from alphapulldown.objects import MonomericObject
from alphapulldown.utils import mmseqs_species_identifiers
def _feature_dict_from_a3m(
sequence: str,
a3m: str,
*,
species_resolver,
) -> dict[str, np.ndarray]:
feature_dict = {
**pipeline.make_sequence_features(sequence, 'none', len(sequence)),
**pipeline.make_msa_features([parsers.parse_a3m(a3m)]),
}
mmseqs_species_identifiers.enrich_mmseq_feature_dict_with_identifiers(
feature_dict,
a3m,
species_resolver=species_resolver,
)
valid_feats = msa_pairing.MSA_FEATURES + (
'msa_species_identifiers',
'msa_uniprot_accession_identifiers',
)
feature_dict.update(
{
f'{key}_all_seq': value
for key, value in feature_dict.items()
if key in valid_feats
}
)
return feature_dict
def test_make_msa_features_resolves_mmseqs_species_identifiers(monkeypatch):
monkeypatch.setattr(
mmseqs_species_identifiers,
'resolve_species_ids_by_accession',
lambda accessions, **_: {
'A0A636IKY3': '108619',
'UPI001118B830': '562',
},
)
a3m = '\n'.join([
'>101',
'ACDE',
'>UniRef100_A0A636IKY3\t136\t0.883',
'ACDF',
'>UniRef100_UPI001118B830\t855\t0.990',
'AC-E',
'',
])
features = mmseqs_species_identifiers.build_mmseq_identifier_features(a3m)
assert features['msa_species_identifiers'].tolist() == [
b'',
b'108619',
b'562',
]
assert features['msa_uniprot_accession_identifiers'].tolist() == [
b'',
b'A0A636IKY3',
b'UPI001118B830',
]
def test_pair_sequences_works_with_mmseqs_accession_species_resolution(
monkeypatch,
):
monkeypatch.setattr(
mmseqs_species_identifiers,
'resolve_species_ids_by_accession',
lambda accessions, **_: {
'A0A636IKY3': '562',
'A0A743YDY2': '573',
'UPI001118B830': '562',
'UPI00101273C6': '573',
},
)
chain_a = _feature_dict_from_a3m(
'ACDE',
'\n'.join([
'>101',
'ACDE',
'>UniRef100_A0A636IKY3\t136\t0.883',
'ACDF',
'>UniRef100_A0A743YDY2\t134\t0.932',
'AC-E',
'',
]),
species_resolver=mmseqs_species_identifiers.resolve_species_ids_by_accession,
)
chain_b = _feature_dict_from_a3m(
'WXYZ',
'\n'.join([
'>101',
'WXYZ',
'>UniRef100_UPI001118B830\t855\t0.990',
'WXYW',
'>UniRef100_UPI00101273C6\t833\t0.919',
'WX-Z',
'',
]),
species_resolver=mmseqs_species_identifiers.resolve_species_ids_by_accession,
)
paired_rows = msa_pairing.pair_sequences([chain_a, chain_b])[2]
assert paired_rows.shape == (3, 2)
assert tuple(paired_rows[0]) == (0, 0)
assert {tuple(row) for row in paired_rows[1:]} == {(1, 1), (2, 2)}
def test_make_mmseq_features_researches_templates_for_precomputed_msa(
monkeypatch,
tmp_path,
):
import alphapulldown.objects as objects_mod
a3m_path = tmp_path / 'dummy.a3m'
a3m_text = '\n'.join([
'# header line that should be ignored later',
'>101',
'ACDE',
'',
])
a3m_path.write_text(a3m_text, encoding='utf-8')
calls = {}
def fake_unserialize_msa(a3m_lines, sequence):
calls['unserialize_msa'] = {
'a3m_lines': a3m_lines,
'sequence': sequence,
}
return (
['PRECOMPUTED_UNPAIRED'],
['PRECOMPUTED_PAIRED'],
['PRECOMPUTED_UNIQUE'],
['PRECOMPUTED_CARDINALITY'],
['PRECOMPUTED_TEMPLATE'],
)
def fake_get_msa_and_templates(**kwargs):
calls['get_msa_and_templates'] = kwargs
return (
['IGNORED_UNPAIRED'],
['IGNORED_PAIRED'],
['IGNORED_UNIQUE'],
['IGNORED_CARDINALITY'],
['TEMPLATE_FROM_RESEARCH'],
)
def fake_build_monomer_feature(sequence, msa, template_feature):
calls['build_monomer_feature'] = {
'sequence': sequence,
'msa': msa,
'template_feature': template_feature,
}
return {
'template_confidence_scores': None,
'template_release_date': None,
}
def fake_enrich(feature_dict, a3m, **_kwargs):
calls['enrich_mmseq_feature_dict_with_identifiers'] = a3m
feature_dict['msa_species_identifiers'] = np.asarray([b''])
feature_dict['msa_uniprot_accession_identifiers'] = np.asarray([b''])
monkeypatch.setattr(objects_mod, 'unserialize_msa', fake_unserialize_msa)
monkeypatch.setattr(
objects_mod,
'get_msa_and_templates',
fake_get_msa_and_templates,
)
monkeypatch.setattr(
objects_mod,
'build_monomer_feature',
fake_build_monomer_feature,
)
monkeypatch.setattr(
objects_mod,
'enrich_mmseq_feature_dict_with_identifiers',
fake_enrich,
)
monomer = MonomericObject('dummy', 'ACDE')
monomer.make_mmseq_features(
DEFAULT_API_SERVER='https://fake.server',
output_dir=str(tmp_path),
use_precomputed_msa=True,
use_templates=True,
)
assert calls['unserialize_msa']['sequence'] == 'ACDE'
assert calls['unserialize_msa']['a3m_lines'] == ['>101\nACDE']
assert calls['get_msa_and_templates'] == {
'jobname': 'dummy',
'query_sequences': 'ACDE',
'a3m_lines': False,
'result_dir': tmp_path,
'msa_mode': 'single_sequence',
'use_templates': True,
'custom_template_path': None,
'pair_mode': 'none',
'host_url': 'https://fake.server',
'user_agent': 'alphapulldown',
}
assert calls['build_monomer_feature'] == {
'sequence': 'ACDE',
'msa': 'PRECOMPUTED_UNPAIRED',
'template_feature': 'TEMPLATE_FROM_RESEARCH',
}
assert (
calls['enrich_mmseq_feature_dict_with_identifiers']
== 'PRECOMPUTED_UNPAIRED'
)
assert isinstance(monomer.feature_dict['template_confidence_scores'], np.ndarray)
assert monomer.feature_dict['template_release_date'] == ['none']

View File

@@ -0,0 +1,131 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import ast
import json
import sys
from dataclasses import dataclass
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[2]
PACKAGE_ROOT = REPO_ROOT / "alphapulldown"
OMIT_PREFIXES = (
"alphapulldown/analysis_pipeline/af2plots/",
)
@dataclass(frozen=True)
class FunctionSpan:
path: Path
qualname: str
lineno: int
end_lineno: int
class FunctionCollector(ast.NodeVisitor):
def __init__(self, path: Path) -> None:
self.path = path
self.stack: list[str] = []
self.functions: list[FunctionSpan] = []
def visit_ClassDef(self, node: ast.ClassDef) -> None:
self.stack.append(node.name)
self.generic_visit(node)
self.stack.pop()
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
self._record(node)
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
self._record(node)
def _record(self, node: ast.FunctionDef | ast.AsyncFunctionDef) -> None:
qualname = ".".join([*self.stack, node.name]) if self.stack else node.name
end_lineno = getattr(node, "end_lineno", node.lineno)
self.functions.append(
FunctionSpan(
path=self.path,
qualname=qualname,
lineno=node.lineno,
end_lineno=end_lineno,
)
)
self.stack.append(node.name)
self.generic_visit(node)
self.stack.pop()
def iter_package_functions() -> list[FunctionSpan]:
functions: list[FunctionSpan] = []
for path in sorted(PACKAGE_ROOT.rglob("*.py")):
rel_path = path.relative_to(REPO_ROOT).as_posix()
if any(rel_path.startswith(prefix) for prefix in OMIT_PREFIXES):
continue
tree = ast.parse(path.read_text(encoding="utf-8"))
collector = FunctionCollector(path.relative_to(REPO_ROOT))
collector.visit(tree)
functions.extend(collector.functions)
return functions
def normalize_coverage_paths(payload: dict) -> dict[Path, set[int]]:
files = payload.get("files", {})
normalized: dict[Path, set[int]] = {}
for raw_path, file_payload in files.items():
path = Path(raw_path)
if path.is_absolute():
try:
path = path.relative_to(REPO_ROOT)
except ValueError:
continue
executed = set(file_payload.get("executed_lines", []))
normalized[path] = executed
return normalized
def check_function_coverage(coverage_json: Path, *, report_only: bool = False) -> int:
payload = json.loads(coverage_json.read_text(encoding="utf-8"))
executed_by_path = normalize_coverage_paths(payload)
missing: list[FunctionSpan] = []
for function in iter_package_functions():
executed_lines = executed_by_path.get(function.path, set())
if not any(line in executed_lines for line in range(function.lineno, function.end_lineno + 1)):
missing.append(function)
if not missing:
print("Function coverage check passed: every alphapulldown function was executed at least once.")
return 0
status = "report" if report_only else "failed"
print(f"Function coverage check {status}. Missing functions:")
for function in missing:
print(f" {function.path}:{function.lineno} {function.qualname}")
print(f"Total uncovered functions: {len(missing)}")
return 0 if report_only else 1
def main() -> int:
parser = argparse.ArgumentParser(description="Report or fail if alphapulldown functions were not executed.")
parser.add_argument(
"coverage_json",
nargs="?",
default="coverage.json",
help="Path to coverage.py JSON report generated by pytest-cov.",
)
parser.add_argument(
"--report-only",
action="store_true",
help="Print uncovered functions without failing.",
)
args = parser.parse_args()
return check_function_coverage(
Path(args.coverage_json),
report_only=args.report_only,
)
if __name__ == "__main__":
sys.exit(main())