Add devtools for json results generation + replace old ones (#691)

* Add devtools for json results generation + replace old ones * remove extra gzip files
2026-06-04 14:14:22 +08:00 · 2024-01-29 10:37:03 +00:00
parent 8e3fecb982
commit cfcc924de8
11 changed files with 136 additions and 12 deletions
--- a/devtools/data/gen-serialized-results.py
+++ b/devtools/data/gen-serialized-results.py
@@ -0,0 +1,124 @@
+import gzip
+import json
+import logging
+import pathlib
+import tempfile
+from openff.toolkit import Molecule
+from openff.units import unit
+from kartograf.atom_aligner import align_mol_shape
+from kartograf import KartografAtomMapper
+import gufe
+from gufe.tokenization import JSON_HANDLER
+import openfe
+from openfe.protocols.openmm_md.plain_md_methods import PlainMDProtocol
+from openfe.protocols.openmm_afe import AbsoluteSolvationProtocol
+from openfe.protocols.openmm_rfe import RelativeHybridTopologyProtocol
+
+
+logger = logging.getLogger(__name__)
+
+LIGA = "[H]C([H])([H])C([H])([H])C(=O)C([H])([H])C([H])([H])[H]"
+LIGB = "[H]C([H])([H])C(=O)C([H])([H])C([H])([H])C([H])([H])[H]"
+
+
+def get_molecule(smi, name):
+    m = Molecule.from_smiles(smi)
+    m.generate_conformers()
+    m.assign_partial_charges(partial_charge_method="am1bcc")
+    return openfe.SmallMoleculeComponent.from_openff(m, name=name)
+
+
+def execute_and_serialize(dag, protocol, simname):
+    logger.info(f"running {simname}")
+    with tempfile.TemporaryDirectory() as tmpdir:
+        workdir = pathlib.Path(tmpdir)
+        dagres = gufe.protocols.execute_DAG(
+            dag,
+            shared_basedir=workdir,
+            scratch_basedir=workdir,
+            keep_shared=False,
+            n_retries=3
+        )
+    protres = protocol.gather([dagres])
+
+    outdict = {
+        "estimate": protres.get_estimate(),
+        "uncertainty": protres.get_uncertainty(),
+        "protocol_result": protres.to_dict(),
+        "unit_results": {
+            unit.key: unit.to_keyed_dict()
+            for unit in dagres.protocol_unit_results
+        }
+    }
+
+    with gzip.open(f"{simname}_json_results.gz", 'wt') as zipfile:
+        json.dump(outdict, zipfile, cls=JSON_HANDLER.encoder)
+
+
+def generate_md_json(smc):
+    settings = PlainMDProtocol.default_settings()
+    settings.simulation_settings.equilibration_length_nvt = 0.01 * unit.nanosecond
+    settings.simulation_settings.equilibration_length = 0.01 * unit.nanosecond
+    settings.simulation_settings.production_length = 0.01 * unit.nanosecond
+    settings.system_settings.nonbonded_method = "nocutoff"
+    protocol = PlainMDProtocol(settings=settings)
+    system = openfe.ChemicalSystem({"ligand": smc})
+    dag = protocol.create(stateA=system, stateB=system, mapping=None)
+
+    execute_and_serialize(dag, protocol, "MDProtocol")
+
+
+def generate_ahfe_json(smc):
+    settings = AbsoluteSolvationProtocol.default_settings()
+    settings.solvent_simulation_settings.equilibration_length = 10 * unit.picosecond
+    settings.solvent_simulation_settings.production_length = 500 * unit.picosecond
+    settings.vacuum_simulation_settings.equilibration_length = 10 * unit.picosecond
+    settings.vacuum_simulation_settings.production_length = 1000 * unit.picosecond
+    settings.alchemical_settings.lambda_elec_windows = 5
+    settings.alchemical_settings.lambda_vdw_windows = 9
+    settings.alchemsampler_settings.n_repeats = 3
+    settings.alchemsampler_settings.n_replicas = 14
+    settings.alchemsampler_settings.online_analysis_target_error = 0.2 * unit.boltzmann_constant * unit.kelvin
+    settings.vacuum_engine_settings.compute_platform = 'CPU'
+    settings.solvent_engine_settings.compute_platform = 'CUDA'
+
+    protocol = AbsoluteSolvationProtocol(settings=settings)
+    sysA = openfe.ChemicalSystem(
+        {"ligand": smc, "solvent": openfe.SolventComponent()}
+    )
+    sysB = openfe.ChemicalSystem(
+        {"solvent": openfe.SolventComponent()}
+    )
+
+    dag = protocol.create(stateA=sysA, stateB=sysB, mapping=None)
+
+    execute_and_serialize(dag, protocol, "AHFEProtocol")
+
+
+def generate_rfe_json(smcA, smcB):
+    settings = RelativeHybridTopologyProtocol.default_settings()
+    settings.simulation_settings.equilibration_length = 10 * unit.picosecond
+    settings.simulation_settings.production_length = 250 * unit.picosecond
+    settings.system_settings.nonbonded_method = "nocutoff"
+    protocol = RelativeHybridTopologyProtocol(settings=settings)
+
+    a_smcB = align_mol_shape(smcB, ref_mol=smcA)
+    mapper = KartografAtomMapper(atom_map_hydrogens=True)
+    mapping = next(mapper.suggest_mappings(smcA, a_smcB))
+
+    systemA = openfe.ChemicalSystem({'ligand': smcA})
+    systemB = openfe.ChemicalSystem({'ligand': a_smcB})
+
+    dag = protocol.create(
+        stateA=systemA, stateB=systemB, mapping={'ligands': mapping}
+    )
+
+    execute_and_serialize(dag, protocol, "RHFEProtocol")
+        
+
+if __name__ == "__main__":
+    molA = get_molecule(LIGA, "ligandA")
+    molB = get_molecule(LIGB, "ligandB")
+    generate_md_json(molA)
+    generate_ahfe_json(molA)
+    generate_rfe_json(molA, molB)
--- a/openfe/tests/data/openmm_afe/AHFEProtocol_json_results.gz
+++ b/openfe/tests/data/openmm_afe/AHFEProtocol_json_results.gz
--- a/openfe/tests/data/openmm_afe/CN_absolute_solvation_transformation.json.gz
+++ b/openfe/tests/data/openmm_afe/CN_absolute_solvation_transformation.json.gz
--- a/openfe/tests/data/openmm_md/MDProtocol_json_results.gz
+++ b/openfe/tests/data/openmm_md/MDProtocol_json_results.gz
--- a/openfe/tests/data/openmm_md/md_results.json.gz
+++ b/openfe/tests/data/openmm_md/md_results.json.gz
--- a/openfe/tests/data/openmm_rfe/RFE-ProtocolUnitResult-0f3457edf947483aa03d0f4fe88bf566.json.gz
+++ b/openfe/tests/data/openmm_rfe/RFE-ProtocolUnitResult-0f3457edf947483aa03d0f4fe88bf566.json.gz
--- a/openfe/tests/data/openmm_rfe/RHFEProtocol_json_results.gz
+++ b/openfe/tests/data/openmm_rfe/RHFEProtocol_json_results.gz
--- a/openfe/tests/protocols/conftest.py
+++ b/openfe/tests/protocols/conftest.py
@@ -195,10 +195,10 @@ def toluene_many_solv_system(benzene_modifications):

@pytest.fixture
 def rfe_transformation_json() -> str:
-    """string of a RFE result of quickrun"""
+    """string of a RFE results similar to quickrun"""
    d = resources.files('openfe.tests.data.openmm_rfe')

-    with gzip.open((d / 'RFE-ProtocolUnitResult-0f3457edf947483aa03d0f4fe88bf566.json.gz').as_posix(), 'r') as f:  # type: ignore
+    with gzip.open((d / 'RHFEProtocol_json_results.gz').as_posix(), 'r') as f:  # type: ignore
        return f.read().decode()  # type: ignore


@@ -208,7 +208,7 @@ def afe_solv_transformation_json() -> str:
    string of a Absolute Solvation result (CN in water) generated by quickrun
    """
    d = resources.files('openfe.tests.data.openmm_afe')
-    fname = "CN_absolute_solvation_transformation.json.gz"
+    fname = "AHFEProtocol_json_results.gz"
    
    with gzip.open((d / fname).as_posix(), 'r') as f:  # type: ignore
        return f.read().decode()  # type: ignore
@@ -220,7 +220,7 @@ def md_json() -> str:
    string of a MD result (TYK ligand lig_ejm_31  in water) generated by quickrun
    """
    d = resources.files('openfe.tests.data.openmm_md')
-    fname = "md_results.json.gz"
+    fname = "MDProtocol_json_results.gz"

    with gzip.open((d / fname).as_posix(), 'r') as f:  # type: ignore
        return f.read().decode()  # type: ignore
--- a/openfe/tests/protocols/test_openmm_afe_solvation_protocol.py
+++ b/openfe/tests/protocols/test_openmm_afe_solvation_protocol.py
@@ -600,7 +600,7 @@ class TestProtocolResult:
        est = protocolresult.get_estimate()

        assert est
-        assert est.m == pytest.approx(-2.977553138764437)
+        assert est.m == pytest.approx(-2.7514342223922856)
        assert isinstance(est, offunit.Quantity)
        assert est.is_compatible_with(offunit.kilojoule_per_mole)

@@ -608,7 +608,7 @@ class TestProtocolResult:
        est = protocolresult.get_uncertainty()

        assert est
-        assert est.m == pytest.approx(0.19617297299036018)
+        assert est.m == pytest.approx(0.1417058859527063)
        assert isinstance(est, offunit.Quantity)
        assert est.is_compatible_with(offunit.kilojoule_per_mole)

@@ -649,7 +649,7 @@ class TestProtocolResult:

        ovp1 = ovp[key][0]
        assert isinstance(ovp1['matrix'], np.ndarray)
-        assert ovp1['matrix'].shape == (15, 15)
+        assert ovp1['matrix'].shape == (14, 14)

    @pytest.mark.parametrize('key', ['solvent', 'vacuum'])
    def test_get_replica_transition_statistics(self, key, protocolresult):
@@ -661,8 +661,8 @@ class TestProtocolResult:
        rpx1 = rpx[key][0]
        assert 'eigenvalues' in rpx1
        assert 'matrix' in rpx1
-        assert rpx1['eigenvalues'].shape == (15,)
-        assert rpx1['matrix'].shape == (15, 15)
+        assert rpx1['eigenvalues'].shape == (14,)
+        assert rpx1['matrix'].shape == (14, 14)

    @pytest.mark.parametrize('key', ['solvent', 'vacuum'])
    def test_equilibration_iterations(self, key, protocolresult):
--- a/openfe/tests/protocols/test_openmm_equil_rfe_protocols.py
+++ b/openfe/tests/protocols/test_openmm_equil_rfe_protocols.py
@@ -1414,7 +1414,7 @@ class TestProtocolResult:
        est = protocolresult.get_estimate()

        assert est
-        assert est.m == pytest.approx(3.5531577581450953)
+        assert est.m == pytest.approx(16.887389)
        assert isinstance(est, unit.Quantity)
        assert est.is_compatible_with(unit.kilojoule_per_mole)

@@ -1422,7 +1422,7 @@ class TestProtocolResult:
        est = protocolresult.get_uncertainty()

        assert est
-        assert est.m == pytest.approx(0.03431704941311493)
+        assert est.m == pytest.approx(0.12354885)
        assert isinstance(est, unit.Quantity)
        assert est.is_compatible_with(unit.kilojoule_per_mole)

--- a/openfe/tests/protocols/test_solvation_afe_tokenization.py
+++ b/openfe/tests/protocols/test_solvation_afe_tokenization.py
@@ -85,7 +85,7 @@ class TestAbsoluteSolvationVacuumUnit(GufeTokenizableTestsMixin):

 class TestAbsoluteSolvationProtocolResult(GufeTokenizableTestsMixin):
    cls = openmm_afe.AbsoluteSolvationProtocolResult
-    key = "AbsoluteSolvationProtocolResult-e7d74b8ccc009d071b8c6eb0420da4bf"
+    key = "AbsoluteSolvationProtocolResult-291fef7bbbad3ffda898be6c01a22f16"
    repr = f"<{key}>"

    @pytest.fixture()