openfe gather: add progress bar for loading JSONs (#1786)

* all the profiling

* make progress bar pretty

* revert typing thing

* news

* all the profiling

* make progress bar pretty

* revert typing thing

* news

* update expected outputs

* update expected outputs
This commit is contained in:
Alyssa Travitz
2026-01-09 15:01:30 -08:00
committed by GitHub
parent fa282cb8ea
commit 907cefd24f
18 changed files with 79 additions and 30 deletions

23
news/progress_bar.rst Normal file
View File

@@ -0,0 +1,23 @@
**Added:**
* Added a progress bar for ``openfe gather`` JSON loading.
**Changed:**
* <news item>
**Deprecated:**
* <news item>
**Removed:**
* <news item>
**Fixed:**
* <news item>
**Security:**
* <news item>

View File

@@ -7,7 +7,6 @@ import sys
from typing import List, Literal
import click
import gufe
import pandas as pd
from openfecli import OFECommandPlugin
@@ -613,9 +612,8 @@ def _collect_result_jsons(results: List[os.PathLike | str]) -> List[pathlib.Path
# 1) find all possible jsons
json_fns = collect_jsons(results)
# 2) filter only result jsons
result_fns = filter(is_results_json, json_fns)
result_fns = list(filter(is_results_json, json_fns))
return result_fns
@@ -643,35 +641,45 @@ def _get_legs_from_result_jsons(
legs = defaultdict(lambda: defaultdict(list))
for result_fn in result_fns:
result_info, result = _load_valid_result_json(result_fn)
with click.progressbar(
result_fns,
label="Loading results:",
fill_char="",
empty_char=" ",
bar_template="%(label)s %(bar)s %(info)s files",
length=len(result_fns),
show_percent=False,
show_pos=True,
show_eta=False,
) as bar:
for result_fn in bar:
result_info, result = _load_valid_result_json(result_fn)
if result_info is None: # this means it couldn't find names and/or simtype
continue
names, simtype = result_info
if report.lower() == "raw":
if result is None:
parsed_raw_data = [(None, None)]
if result_info is None: # this means it couldn't find names and/or simtype
continue
names, simtype = result_info
if report.lower() == "raw":
if result is None:
parsed_raw_data = [(None, None)]
else:
parsed_raw_data = [
(
v[0]["outputs"]["unit_estimate"],
v[0]["outputs"]["unit_estimate_error"],
)
for v in result["protocol_result"]["data"].values()
]
legs[names][simtype].append(parsed_raw_data)
else:
parsed_raw_data = [
(
v[0]["outputs"]["unit_estimate"],
v[0]["outputs"]["unit_estimate_error"],
)
for v in result["protocol_result"]["data"].values()
]
legs[names][simtype].append(parsed_raw_data)
else:
if result is None:
# we want the dict name/simtype entry to exist for error reporting, even if there's no valid data
dGs = []
else:
dGs = [
v[0]["outputs"]["unit_estimate"]
for v in result["protocol_result"]["data"].values()
]
legs[names][simtype].extend(dGs)
if result is None:
# we want the dict name/simtype entry to exist for error reporting, even if there's no valid data
dGs = []
else:
dGs = [
v[0]["outputs"]["unit_estimate"]
for v in result["protocol_result"]["data"].values()
]
legs[names][simtype].extend(dGs)
return legs

View File

@@ -140,6 +140,7 @@ def test_no_results_found():
_RBFE_EXPECTED_DG = b"""
Loading results:
ligand DG(MLE) (kcal/mol) uncertainty (kcal/mol)
lig_ejm_31 -0.09 0.05
lig_ejm_42 0.7 0.1
@@ -154,6 +155,7 @@ lig_jmc_28 -1.25 0.08
"""
_RBFE_EXPECTED_DDG = b"""
Loading results:
ligand_i ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol)
lig_ejm_31 lig_ejm_42 0.8 0.1
lig_ejm_31 lig_ejm_46 -0.89 0.06
@@ -167,6 +169,7 @@ lig_ejm_46 lig_jmc_28 -0.27 0.06
"""
_RBFE_EXPECTED_RAW = b"""\
Loading results:
leg ligand_i ligand_j DG(i->j) (kcal/mol) MBAR uncertainty (kcal/mol)
complex lig_ejm_31 lig_ejm_42 -14.9 0.8
complex lig_ejm_31 lig_ejm_42 -14.8 0.8

View File

@@ -1,2 +1,3 @@
Loading results:
ligand_i ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol)
lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 1.1 0.1
1 ligand_i Loading results: ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol)
1 Loading results:
2 ligand_i ligand_i ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol)
3 lig_CHEMBL3402745_200_5 lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 1.1 0.1

View File

@@ -1,3 +1,4 @@
Loading results:
leg ligand_i ligand_j DG(i->j) (kcal/mol) MBAR uncertainty (kcal/mol)
complex lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 -12.54 0.06
complex lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 -12.31 0.06
1 leg Loading results: ligand_i ligand_j DG(i->j) (kcal/mol) MBAR uncertainty (kcal/mol)
1 Loading results:
2 leg leg ligand_i ligand_j DG(i->j) (kcal/mol) MBAR uncertainty (kcal/mol)
3 complex complex lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 -12.54 0.06
4 complex complex lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 -12.31 0.06

View File

@@ -1,3 +1,4 @@
Loading results:
ligand_i ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol)
lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 1.2 0.1
lig_CHEMBL3402745_200_5 lig_CHEMBL3402749_500_9 3.6 0.2
1 ligand_i Loading results: ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol)
1 Loading results:
2 ligand_i ligand_i ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol)
3 lig_CHEMBL3402745_200_5 lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 1.2 0.1
4 lig_CHEMBL3402745_200_5 lig_CHEMBL3402745_200_5 lig_CHEMBL3402749_500_9 3.6 0.2

View File

@@ -1,3 +1,4 @@
Loading results:
ligand DG(MLE) (kcal/mol) uncertainty (kcal/mol)
lig_CHEMBL3402745_200_5 -0.3 0.1
lig_CHEMBL3402744_300_4 0.9 0.2
1 ligand Loading results: DG(MLE) (kcal/mol) uncertainty (kcal/mol)
1 Loading results:
2 ligand ligand DG(MLE) (kcal/mol) uncertainty (kcal/mol)
3 lig_CHEMBL3402745_200_5 lig_CHEMBL3402745_200_5 -0.3 0.1
4 lig_CHEMBL3402744_300_4 lig_CHEMBL3402744_300_4 0.9 0.2

View File

@@ -1,3 +1,4 @@
Loading results:
leg ligand_i ligand_j DG(i->j) (kcal/mol) MBAR uncertainty (kcal/mol)
complex lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 -12.54 0.06
complex lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 -12.31 0.06
1 leg Loading results: ligand_i ligand_j DG(i->j) (kcal/mol) MBAR uncertainty (kcal/mol)
1 Loading results:
2 leg leg ligand_i ligand_j DG(i->j) (kcal/mol) MBAR uncertainty (kcal/mol)
3 complex complex lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 -12.54 0.06
4 complex complex lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 -12.31 0.06

View File

@@ -1,3 +1,4 @@
Loading results:
ligand_i ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol)
lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 Error Error
lig_CHEMBL3402745_200_5 lig_CHEMBL3402749_500_9 Error Error
1 ligand_i Loading results: ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol)
1 Loading results:
2 ligand_i ligand_i ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol)
3 lig_CHEMBL3402745_200_5 lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 Error Error
4 lig_CHEMBL3402745_200_5 lig_CHEMBL3402745_200_5 lig_CHEMBL3402749_500_9 Error Error

View File

@@ -0,0 +1 @@
Loading results:
1 Loading results:
1 Loading results:

View File

@@ -0,0 +1 @@
Loading results:
1 Loading results:
1 Loading results:

View File

@@ -1,3 +1,4 @@
Loading results:
ligand_i ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol)
lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 1.2 0.1
lig_CHEMBL3402745_200_5 lig_CHEMBL3402749_500_9 3.6 0.2
1 ligand_i Loading results: ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol)
1 Loading results:
2 ligand_i ligand_i ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol)
3 lig_CHEMBL3402745_200_5 lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 1.2 0.1
4 lig_CHEMBL3402745_200_5 lig_CHEMBL3402745_200_5 lig_CHEMBL3402749_500_9 3.6 0.2

View File

@@ -1,3 +1,4 @@
Loading results:
ligand DG(MLE) (kcal/mol) uncertainty (kcal/mol)
lig_CHEMBL3402745_200_5 -0.3 0.1
lig_CHEMBL3402744_300_4 0.8 0.2
1 ligand Loading results: DG(MLE) (kcal/mol) uncertainty (kcal/mol)
1 Loading results:
2 ligand ligand DG(MLE) (kcal/mol) uncertainty (kcal/mol)
3 lig_CHEMBL3402745_200_5 lig_CHEMBL3402745_200_5 -0.3 0.1
4 lig_CHEMBL3402744_300_4 lig_CHEMBL3402744_300_4 0.8 0.2

View File

@@ -1,3 +1,4 @@
Loading results:
leg ligand_i ligand_j DG(i->j) (kcal/mol) MBAR uncertainty (kcal/mol)
complex lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 -12.54 0.06
complex lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 -12.31 0.06
1 leg Loading results: ligand_i ligand_j DG(i->j) (kcal/mol) MBAR uncertainty (kcal/mol)
1 Loading results:
2 leg leg ligand_i ligand_j DG(i->j) (kcal/mol) MBAR uncertainty (kcal/mol)
3 complex complex lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 -12.54 0.06
4 complex complex lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 -12.31 0.06

View File

@@ -1,3 +1,4 @@
Loading results:
ligand_i ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol)
lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 1.2 0.1
lig_CHEMBL3402745_200_5 lig_CHEMBL3402749_500_9 3.6 0.2
1 ligand_i Loading results: ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol)
1 Loading results:
2 ligand_i ligand_i ligand_j DDG(i->j) (kcal/mol) uncertainty (kcal/mol)
3 lig_CHEMBL3402745_200_5 lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 1.2 0.1
4 lig_CHEMBL3402745_200_5 lig_CHEMBL3402745_200_5 lig_CHEMBL3402749_500_9 3.6 0.2

View File

@@ -1,3 +1,4 @@
Loading results:
ligand DG(MLE) (kcal/mol) uncertainty (kcal/mol)
lig_CHEMBL3402745_200_5 -0.90 0.08
lig_CHEMBL3402744_300_4 0.3 0.1
1 ligand Loading results: DG(MLE) (kcal/mol) uncertainty (kcal/mol)
1 Loading results:
2 ligand ligand DG(MLE) (kcal/mol) uncertainty (kcal/mol)
3 lig_CHEMBL3402745_200_5 lig_CHEMBL3402745_200_5 -0.90 0.08
4 lig_CHEMBL3402744_300_4 lig_CHEMBL3402744_300_4 0.3 0.1

View File

@@ -1,3 +1,4 @@
Loading results:
leg ligand_i ligand_j DG(i->j) (kcal/mol) MBAR uncertainty (kcal/mol)
complex lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 -12.54 0.06
complex lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 -12.31 0.06
1 leg Loading results: ligand_i ligand_j DG(i->j) (kcal/mol) MBAR uncertainty (kcal/mol)
1 Loading results:
2 leg leg ligand_i ligand_j DG(i->j) (kcal/mol) MBAR uncertainty (kcal/mol)
3 complex complex lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 -12.54 0.06
4 complex complex lig_CHEMBL3402745_200_5 lig_CHEMBL3402744_300_4 -12.31 0.06

View File

@@ -111,6 +111,7 @@ def mock_execute(expected_transformations):
@pytest.fixture
def ref_gather():
return """\
Loading results:
ligand_i\tligand_j\tDDG(i->j) (kcal/mol)\tuncertainty (kcal/mol)
lig_ejm_31\tlig_ejm_46\t0.0\t0.0
lig_ejm_31\tlig_ejm_47\t0.0\t0.0