mirror of
https://github.com/KosinskiLab/AlphaPulldown.git
synced 2026-06-04 14:14:24 +08:00
Fix #457
This commit is contained in:
1
.github/workflows/github_actions.yml
vendored
1
.github/workflows/github_actions.yml
vendored
@@ -68,6 +68,7 @@ jobs:
|
||||
pytest -s test/test_modelcif.py
|
||||
pytest -s test/test_features_with_templates.py
|
||||
pytest -s test/test_post_prediction.py
|
||||
pytest -s test/test_parse_fold.py
|
||||
#export PYTHONPATH=$PWD/alphapulldown/analysis_pipeline:$PYTHONPATH
|
||||
## Test analysis pipeline
|
||||
#conda install -c bioconda biopandas
|
||||
|
||||
@@ -21,7 +21,7 @@ from absl import logging
|
||||
logging.set_verbosity(logging.INFO)
|
||||
|
||||
|
||||
def parse_fold(input, features_directory, protein_delimiter):
|
||||
def parse_fold(input_list, features_directory, protein_delimiter):
|
||||
"""
|
||||
Parses a list of protein fold specifications and returns structured folding jobs.
|
||||
|
||||
@@ -37,50 +37,54 @@ def parse_fold(input, features_directory, protein_delimiter):
|
||||
FileNotFoundError: If any required protein features are missing.
|
||||
"""
|
||||
all_folding_jobs = []
|
||||
for i in input:
|
||||
formatted_folds, missing_features, unique_features = [], [], []
|
||||
missing_features = set() # Initialize as a set to collect unique missing features
|
||||
for i in input_list:
|
||||
formatted_folds = []
|
||||
protein_folds = [x.split(":") for x in i.split(protein_delimiter)]
|
||||
for protein_fold in protein_folds:
|
||||
name, number, region = None, 1, "all"
|
||||
|
||||
if len(protein_fold) ==1:
|
||||
# protein_fold is in this format: [protein_name]
|
||||
if len(protein_fold) == 1:
|
||||
# Format: [protein_name]
|
||||
name = protein_fold[0]
|
||||
elif len(protein_fold) > 1:
|
||||
name, number= protein_fold[0], protein_fold[1]
|
||||
if ("-") in protein_fold[1]:
|
||||
# protein_fold is in this format: [protein_name:1-10:14-30:40-100:etc]
|
||||
name = protein_fold[0]
|
||||
if "-" in protein_fold[1]:
|
||||
# Format: [protein_name:1-10:14-30:40-100:etc]
|
||||
try:
|
||||
number = 1
|
||||
region = protein_fold[1:]
|
||||
region = [tuple(int(x) for x in r.split("-")) for r in region]
|
||||
except Exception as e:
|
||||
logging.error(f"Your format: {i} is wrong. The programme will terminate.")
|
||||
except Exception:
|
||||
logging.error(f"Your format: {i} is wrong. The program will terminate.")
|
||||
sys.exit()
|
||||
else:
|
||||
# protein_fold is in this format: [protein_name:copy_number:1-10:14-30:40-100:etc]
|
||||
# Format: [protein_name:copy_number:1-10:14-30:40-100:etc]
|
||||
try:
|
||||
number = protein_fold[1]
|
||||
if len(protein_fold[2:]) > 0:
|
||||
number = int(protein_fold[1])
|
||||
if len(protein_fold) > 2:
|
||||
region = protein_fold[2:]
|
||||
region = [tuple(int(x) for x in r.split("-")) for r in region]
|
||||
except Exception as e:
|
||||
logging.error(f"Your format: {i} is wrong. The programme will terminate.")
|
||||
except Exception:
|
||||
logging.error(f"Your format: {i} is wrong. The program will terminate.")
|
||||
sys.exit()
|
||||
|
||||
|
||||
number = int(number)
|
||||
unique_features.append(name)
|
||||
if not any([exists(join(monomer_dir, f"{name}.pkl")) or exists(join(monomer_dir, f"{name}.pkl.xz")) for
|
||||
monomer_dir in features_directory]):
|
||||
missing_features.append(name)
|
||||
# Check for missing features
|
||||
if not any(
|
||||
exists(join(monomer_dir, f"{name}{ext}"))
|
||||
for monomer_dir in features_directory
|
||||
for ext in [".pkl", ".pkl.xz"]
|
||||
):
|
||||
missing_features.add(name) # Use .add() since missing_features is a set
|
||||
|
||||
formatted_folds.extend([{name: region} for _ in range(number)])
|
||||
all_folding_jobs.append(formatted_folds)
|
||||
missing_features = set(missing_features)
|
||||
if len(missing_features):
|
||||
raise FileNotFoundError(
|
||||
f"{missing_features} not found in {features_directory}"
|
||||
)
|
||||
|
||||
if missing_features:
|
||||
raise FileNotFoundError(
|
||||
f"{sorted(missing_features)} not found in {features_directory}"
|
||||
)
|
||||
return all_folding_jobs
|
||||
|
||||
def pad_input_features(feature_dict: dict,
|
||||
|
||||
130
test/test_parse_fold.py
Normal file
130
test/test_parse_fold.py
Normal file
@@ -0,0 +1,130 @@
|
||||
import logging
|
||||
from absl.testing import parameterized
|
||||
from unittest import mock
|
||||
from alphapulldown.utils.modelling_setup import parse_fold
|
||||
|
||||
"""
|
||||
Test parse_fold function with different scenarios
|
||||
"""
|
||||
|
||||
class TestParseFold(parameterized.TestCase):
|
||||
|
||||
def setUp(self) -> None:
|
||||
super().setUp()
|
||||
# Set logging level to INFO
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
@parameterized.named_parameters(
|
||||
{
|
||||
'testcase_name': 'single_protein_no_copy',
|
||||
'input': ['protein1'],
|
||||
'features_directory': ['dir1'],
|
||||
'protein_delimiter': '_',
|
||||
'mock_side_effect': {
|
||||
'dir1/protein1.pkl': True,
|
||||
'dir1/protein1.pkl.xz': False,
|
||||
},
|
||||
'expected_result': [[{'protein1': 'all'}]],
|
||||
},
|
||||
{
|
||||
'testcase_name': 'single_protein_with_copy_number',
|
||||
'input': ['protein1:2'],
|
||||
'features_directory': ['dir1'],
|
||||
'protein_delimiter': '_',
|
||||
'mock_side_effect': {
|
||||
'dir1/protein1.pkl': True,
|
||||
'dir1/protein1.pkl.xz': False,
|
||||
},
|
||||
'expected_result': [[{'protein1': 'all'}, {'protein1': 'all'}]],
|
||||
},
|
||||
{
|
||||
'testcase_name': 'single_protein_with_region',
|
||||
'input': ['protein1:1-10'],
|
||||
'features_directory': ['dir1'],
|
||||
'protein_delimiter': '_',
|
||||
'mock_side_effect': {
|
||||
'dir1/protein1.pkl': True,
|
||||
'dir1/protein1.pkl.xz': False,
|
||||
},
|
||||
'expected_result': [[{'protein1': [(1, 10)]}]],
|
||||
},
|
||||
{
|
||||
'testcase_name': 'single_protein_with_copy_and_regions',
|
||||
'input': ['protein1:2:1-10:20-30'],
|
||||
'features_directory': ['dir1'],
|
||||
'protein_delimiter': '_',
|
||||
'mock_side_effect': {
|
||||
'dir1/protein1.pkl': True,
|
||||
'dir1/protein1.pkl.xz': False,
|
||||
},
|
||||
'expected_result': [[{'protein1': [(1, 10), (20, 30)]}, {'protein1': [(1, 10), (20, 30)]}]],
|
||||
},
|
||||
{
|
||||
'testcase_name': 'multiple_proteins',
|
||||
'input': ['protein1:2_protein2:1-50'],
|
||||
'features_directory': ['dir1'],
|
||||
'protein_delimiter': '_',
|
||||
'mock_side_effect': {
|
||||
'dir1/protein1.pkl': True,
|
||||
'dir1/protein1.pkl.xz': False,
|
||||
'dir1/protein2.pkl': True,
|
||||
'dir1/protein2.pkl.xz': False,
|
||||
},
|
||||
'expected_result': [[{'protein1': 'all'}, {'protein1': 'all'}, {'protein2': [(1, 50)]}]],
|
||||
},
|
||||
{
|
||||
'testcase_name': 'missing_features',
|
||||
'input': ['protein1', 'protein2'],
|
||||
'features_directory': ['dir1'],
|
||||
'protein_delimiter': '_',
|
||||
'mock_side_effect': {
|
||||
'dir1/protein1.pkl': False,
|
||||
'dir1/protein1.pkl.xz': False,
|
||||
'dir1/protein2.pkl': False,
|
||||
'dir1/protein2.pkl.xz': False,
|
||||
},
|
||||
'expected_exception': FileNotFoundError,
|
||||
'expected_exception_message': "['protein1', 'protein2'] not found in ['dir1']",
|
||||
},
|
||||
{
|
||||
'testcase_name': 'invalid_format',
|
||||
'input': ['protein1::1-10'],
|
||||
'features_directory': ['dir1'],
|
||||
'protein_delimiter': '_',
|
||||
'mock_side_effect': {},
|
||||
'expected_exception': SystemExit,
|
||||
},
|
||||
{
|
||||
'testcase_name': 'feature_exists_in_multiple_dirs',
|
||||
'input': ['protein1'],
|
||||
'features_directory': ['dir1', 'dir2'],
|
||||
'protein_delimiter': '_',
|
||||
'mock_side_effect': {
|
||||
'dir1/protein1.pkl': False,
|
||||
'dir1/protein1.pkl.xz': False,
|
||||
'dir2/protein1.pkl': True,
|
||||
'dir2/protein1.pkl.xz': False,
|
||||
},
|
||||
'expected_result': [[{'protein1': 'all'}]],
|
||||
},
|
||||
)
|
||||
def test_parse_fold(self, input, features_directory, protein_delimiter, mock_side_effect,
|
||||
expected_result=None, expected_exception=None, expected_exception_message=None):
|
||||
"""Test parse_fold with different input scenarios"""
|
||||
with mock.patch('alphapulldown.utils.modelling_setup.exists') as mock_exists, \
|
||||
mock.patch('sys.exit') as mock_exit:
|
||||
mock_exists.side_effect = lambda path: mock_side_effect.get(path, False)
|
||||
# Mock sys.exit to raise SystemExit exception
|
||||
mock_exit.side_effect = SystemExit
|
||||
logging.info(f"Testing with input: {input}, features_directory: {features_directory}, "
|
||||
f"protein_delimiter: '{protein_delimiter}'")
|
||||
logging.info(f"Mock side effects: {mock_side_effect}")
|
||||
if expected_exception:
|
||||
with self.assertRaises(expected_exception) as context:
|
||||
result = parse_fold(input, features_directory, protein_delimiter)
|
||||
if expected_exception_message:
|
||||
self.assertEqual(str(context.exception), expected_exception_message)
|
||||
else:
|
||||
result = parse_fold(input, features_directory, protein_delimiter)
|
||||
logging.info(f"Result: {result}, Expected: {expected_result}")
|
||||
self.assertEqual(result, expected_result)
|
||||
Reference in New Issue
Block a user