mirror of
https://github.com/RosettaCommons/foundry.git
synced 2026-06-04 13:24:22 +08:00
feat: update ligand templating, bump atomworks version
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -1,12 +0,0 @@
|
||||
{
|
||||
"name": "8cdz_templating_ligand",
|
||||
"components": [
|
||||
{
|
||||
"path": "docs/rf3/examples/8cdz.cif"
|
||||
}
|
||||
],
|
||||
"ground_truth_conformer_selection": ["E"],
|
||||
"msa_paths": {
|
||||
"A": "docs/rf3/examples/msas/8cdz_A.a3m.gz"
|
||||
}
|
||||
}
|
||||
6218
docs/rf3/examples/9dfn.cif
Normal file
6218
docs/rf3/examples/9dfn.cif
Normal file
File diff suppressed because it is too large
Load Diff
13
docs/rf3/examples/9dfn_template_ligand_and_protein.json
Normal file
13
docs/rf3/examples/9dfn_template_ligand_and_protein.json
Normal file
@@ -0,0 +1,13 @@
|
||||
[
|
||||
{
|
||||
"name": "9dfn_template_ligand_and_protein",
|
||||
"components": [
|
||||
{
|
||||
"path": "docs/rf3/examples/9dfn.cif"
|
||||
}
|
||||
],
|
||||
"template_selection": ["A", "C", "D"],
|
||||
"ground_truth_conformer_selection": ["C", "D"]
|
||||
|
||||
}
|
||||
]
|
||||
@@ -45,7 +45,7 @@ dependencies = [
|
||||
"beartype>=0.18.0,<1",
|
||||
|
||||
# ... dataloading
|
||||
"atomworks==1.0.0",
|
||||
"atomworks==1.0.2"
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -5,10 +5,8 @@ from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
from biotite.structure import AtomArray, concatenate
|
||||
|
||||
from atomworks.common import exists
|
||||
from atomworks.enums import ChainType
|
||||
from atomworks.ml.common import exists
|
||||
from atomworks.ml.datasets import logger
|
||||
from atomworks.ml.datasets.datasets import StructuralDatasetWrapper
|
||||
from atomworks.ml.datasets.parsers import (
|
||||
@@ -23,6 +21,7 @@ from atomworks.ml.transforms._checks import (
|
||||
from atomworks.ml.transforms.base import Transform, TransformedDict
|
||||
from atomworks.ml.transforms.msa._msa_loading_utils import load_msa_data_from_path
|
||||
from atomworks.ml.utils.rng import capture_rng_states
|
||||
from biotite.structure import AtomArray, concatenate
|
||||
|
||||
|
||||
# input data wrapper that allows multiple input files separated by ':'
|
||||
|
||||
@@ -2,8 +2,6 @@ from os import PathLike
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from omegaconf import DictConfig
|
||||
|
||||
from atomworks.common import exists
|
||||
from atomworks.constants import (
|
||||
AF3_EXCLUDED_LIGANDS,
|
||||
@@ -99,6 +97,8 @@ from atomworks.ml.transforms.msa.msa import (
|
||||
from atomworks.ml.transforms.random_atomize_residues import RandomAtomizeResidues
|
||||
from atomworks.ml.transforms.rdkit_utils import GetRDKitChiralCenters
|
||||
from atomworks.ml.transforms.symmetry import FindAutomorphismsWithNetworkX
|
||||
from omegaconf import DictConfig
|
||||
|
||||
from modelhub.data.extra_xforms import CheckForNaNsInInputs
|
||||
from modelhub.data.pipeline_utils import (
|
||||
annotate_post_crop_hash,
|
||||
@@ -272,7 +272,7 @@ def build_af3_transform_pipeline(
|
||||
TrainingRoute(
|
||||
SetOccToZeroOnBfactor(b_factor_min, b_factor_max),
|
||||
),
|
||||
RemoveUnresolvedPNUnits(),
|
||||
TrainingRoute(RemoveUnresolvedPNUnits()),
|
||||
RemovePolymersWithTooFewResolvedResidues(min_residues=4),
|
||||
MaskPolymerResiduesWithUnresolvedFrameAtoms(),
|
||||
ConditionalRoute(
|
||||
|
||||
@@ -67,7 +67,7 @@ For this example, the pTM in the `metrics.csv` should be `>0.8` (even without an
|
||||
|
||||
RF3 supports `.a3m` and `.fasta` files as input MSA formats; `.a3m` is recommended. We do not at the moment support pre-paired MSAs (we will pair on-the-fly) or on-the-fly MSA computation, but both are on the roadmap. Please raise an issue if these limitations are critical for your project and we can prioritize accordingly.
|
||||
|
||||
📝 **Example JSON configuration** (working example found at `docs/rf3/examples/3en2_from_json_with_msa.json`):
|
||||
📝 **Example JSON configuration** (full example found at `docs/rf3/examples/3en2_from_json_with_msa.json`):
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -93,7 +93,7 @@ rf3 fold inputs='docs/rf3/examples/3en2_from_json_with_msa.json'
|
||||
If performing inference from a prepared `.cif` file, MSAs can also be specified directly as a category within the raw CIF data.
|
||||
We will automatically extract the correct MSA paths during parsing.
|
||||
|
||||
📝 **Example CIF header** (working example found at `docs/rf3/examples/3en2_from_file.cif`):
|
||||
📝 **Example CIF header** (full example found at `docs/rf3/examples/3en2_from_file.cif`):
|
||||
```cif
|
||||
data_3EN2
|
||||
#
|
||||
@@ -112,7 +112,7 @@ rf3 fold inputs='docs/rf3/3en2_from_file.cif
|
||||
> Without an MSA and using default settings, the above examples will trigger "early stopping." This means that if the model determines early on that a correct prediction is unlikely, it will stop computation and only output a `metrics.csv` and `.score` file to save compute resources. You can adjust this behavior using the `early_stopping_plddt_threshold` argument (see below). In our group, we find this argument can save wasted compute on erroneous inputs.
|
||||
|
||||
> [!TIP]
|
||||
> To ensure that a provided MSA is loaded correctly, you may use the `raise_if_missing_msa_for_protein_of_length_n` command-line argument. For example, `rf3 fold inputs='docs/rf3/examples/3en2_from_json_with_msa.json raise_if_missing_msa_for_protein_of_length_n=10` would raise an error if there were any proteins >=10 residues without compatible MSAs.
|
||||
> To ensure that a provided MSA is loaded correctly, you may use the `raise_if_missing_msa_for_protein_of_length_n` command-line argument. For example, `rf3 fold inputs='docs/rf3/examples/3en2_from_json_with_msa.json' raise_if_missing_msa_for_protein_of_length_n=10` would raise an error if there were any proteins >=10 residues without compatible MSAs.
|
||||
|
||||
> [!TIP]
|
||||
> For non-canonical amino acids, most MSA generation algorithms substitute `X` (unknown residue)! Ensure your MSAs adhere to this convention.
|
||||
@@ -138,7 +138,7 @@ We will automatically distribute predictions across GPU's if running in a multi-
|
||||
|
||||
### 1️⃣ **Single JSON with Multiple Examples**
|
||||
|
||||
📝 **Example JSON configuration** (working example found at `docs/rf3/examples/multiple_example_from_json.json`)
|
||||
📝 **Example JSON configuration** (full example found at `docs/rf3/examples/multiple_example_from_json.json`)
|
||||
|
||||
```json
|
||||
[
|
||||
@@ -221,7 +221,7 @@ For convenience, we also support a `json` API analogous to that implemented by A
|
||||
> [!TIP]
|
||||
> **Performance Tip**: For small molecules, a general rule-of-thumb is that performance is best when using `CCD` codes directly, followed by `cif`/`sdf` files, and finally SMILES.
|
||||
|
||||
📝 **Example JSON configuration with arbitrary biomolecules** (working example found at `docs/rf3/examples/7o1r_from_json.json`):
|
||||
📝 **Example JSON configuration with arbitrary biomolecules** (full example found at `docs/rf3/examples/7o1r_from_json.json`):
|
||||
```json
|
||||
[
|
||||
{
|
||||
@@ -292,7 +292,7 @@ Such `.cif` files complete with appropriate bonds can be composed with AtomWorks
|
||||
|
||||
If you would prefer to use the JSON API, bonds can be explicitly given using PyMol-like strings of the form `chain_id/res_name/res_id/atom_name`. You will need to know the specific chain ID, residue name, residue ID, and atom name between the relevant pairs of atoms to unambiguously specify the bond.
|
||||
|
||||
📝 **Example JSON configuration with covalent modifcations** (working example found at `docs/rf3/examples/7o1r_from_json.json`):
|
||||
📝 **Example JSON configuration with covalent modifcations** (full example found at `docs/rf3/examples/7o1r_from_json.json`):
|
||||
```json
|
||||
[
|
||||
{
|
||||
@@ -448,7 +448,7 @@ RF3 uses AtomWorks' flexible `AtomSelectionStack` query syntax for specifying st
|
||||
|
||||
It is often helpful to template one or multiple polymer chains while allowing the other chain(s) to fold unconstrained. We demonstrate with an nanobody-antigen use case below how to apply templates.
|
||||
|
||||
📝 **Example JSON configuration templating the antigen and the nanobody framework** (working example found at `docs/rf3/examples/7xli_template_antigen_and_framework.json`):
|
||||
📝 **Example JSON configuration templating the antigen and the nanobody framework** (full example found at `docs/rf3/examples/7xli_template_antigen_and_framework.json`):
|
||||
```json
|
||||
[
|
||||
{
|
||||
@@ -479,30 +479,37 @@ You may also specify templating directly via the CLI using `template_selection="
|
||||
|
||||
#### Templating a Small Molecule
|
||||
|
||||
We find that enforcing a particular small molecule conformation has various applications within fixed-ligand protein docking, enzyme activity filtering, and other biologically relevant tasks. RF3 natively enables encouraging a particular small molecule conformations via the ground truth reference conformer track. For the moment, such an approach is only effective if we want to template the *entire* small molecule. Partial templating of small molecules is still possible via the `template_selection` API described earlier rather than the `ground_truth_conformer_selection` track.
|
||||
We find that enforcing a particular small molecule conformation has various applications within fixed-ligand protein docking, enzyme activity filtering, and other biologically relevant tasks. RF3 natively enables encouraging a particular small molecule conformations via both the ground truth reference conformer track and the template selection track.
|
||||
|
||||
📝 **Example JSON configuration templating a small molecule** (working example found at `docs/rf3/examples/8cdz_templating_ligand.json`):
|
||||
For the moment, the ground truth conformer track is only effective if we want to template the *entire* small molecule. Partial templating of small molecules is still possible via the `template_selection` approach. We encourage exploration of both templating techniques to find what combination(s) are most effective for a given problem. Below we provide both, which represents the strongest possible conditioning.
|
||||
|
||||
📝 **Example JSON configuration templating a small molecule and the corresponding protein** (full example found at `docs/rf3/examples/1eiz_template_ligand_and_protein.json`):
|
||||
```json
|
||||
{
|
||||
"name": "8cdz_templating_ligand",
|
||||
"components": [
|
||||
{
|
||||
"path": "docs/rf3/examples/8cdz.cif"
|
||||
}
|
||||
],
|
||||
"ground_truth_conformer_selection": ["E"]
|
||||
}
|
||||
[
|
||||
{
|
||||
"name": "9dfn_template_ligand_and_protein",
|
||||
"components": [
|
||||
{
|
||||
"path": "docs/rf3/examples/9dfn.cif"
|
||||
}
|
||||
],
|
||||
"template_selection": ["A", "C", "D"],
|
||||
"ground_truth_conformer_selection": ["C", "D"]
|
||||
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> We template the protein above to avoid providing an MSA
|
||||
|
||||
🚀 **Run the example:**
|
||||
|
||||
```bash
|
||||
rf3 fold inputs='docs/rf3/examples/8cdz_templating_ligand.json'
|
||||
```
|
||||
|
||||
You may also specify the ground truth conformer selection directly via the CLI, e.g., using `ground_truth_conformer_selection="[E]`
|
||||
|
||||
*Content coming soon...*
|
||||
You may also specify the ground truth conformer selection directly via the CLI, e.g., using `ground_truth_conformer_selection="[E]"`
|
||||
|
||||
#### Templating an Interface
|
||||
|
||||
|
||||
@@ -5,11 +5,11 @@ from pathlib import Path
|
||||
import hydra
|
||||
import pandas as pd
|
||||
import torch
|
||||
from atomworks.io import parse
|
||||
from atomworks.io.transforms.categories import category_to_dict
|
||||
from lightning.fabric import seed_everything
|
||||
from omegaconf import OmegaConf
|
||||
|
||||
from atomworks.io import parse
|
||||
from atomworks.io.transforms.categories import category_to_dict
|
||||
from modelhub.inference_engines.base import InferenceEngine
|
||||
from modelhub.model.RF3 import ShouldEarlyStopFn
|
||||
from modelhub.utils.datasets import (
|
||||
@@ -342,17 +342,28 @@ class RF3InferenceEngine(InferenceEngine):
|
||||
else out["asym_unit"][0]
|
||||
)
|
||||
|
||||
# ... extract temlate information from the CIF file
|
||||
templating_from_cif = category_to_dict(out["cif_block"], "templating")
|
||||
# ... extract template information from the CIF file, if present
|
||||
template_selection_from_CIF = (
|
||||
category_to_dict(out["cif_block"], "template_selection")
|
||||
if "cif_block" in out
|
||||
else {}
|
||||
)
|
||||
ground_truth_conformer_selection_from_CIF = (
|
||||
category_to_dict(out["cif_block"], "ground_truth_conformer_selection")
|
||||
if "cif_block" in out
|
||||
else {}
|
||||
)
|
||||
|
||||
# First, apply the template selection from the CIF file
|
||||
atom_array = apply_conformer_and_template_selections(
|
||||
atom_array,
|
||||
template_selection=list(
|
||||
templating_from_cif.get("template_selection", [])
|
||||
template_selection_from_CIF.get("template_selection", [])
|
||||
),
|
||||
ground_truth_conformer_selection=list(
|
||||
templating_from_cif.get("ground_truth_conformer_selection", [])
|
||||
ground_truth_conformer_selection_from_CIF.get(
|
||||
"ground_truth_conformer_selection", []
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -6,8 +6,6 @@ from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
import numpy as np
|
||||
from biotite.structure import AtomArray
|
||||
|
||||
from atomworks.common import as_list
|
||||
from atomworks.enums import GroundTruthConformerPolicy
|
||||
from atomworks.io.tools.inference import (
|
||||
@@ -16,6 +14,8 @@ from atomworks.io.tools.inference import (
|
||||
)
|
||||
from atomworks.io.utils.io_utils import to_cif_file
|
||||
from atomworks.io.utils.selection import AtomSelectionStack
|
||||
from biotite.structure import AtomArray
|
||||
|
||||
from modelhub.utils.io import (
|
||||
CIF_LIKE_EXTENSIONS,
|
||||
DICTIONARY_LIKE_EXTENSIONS,
|
||||
@@ -62,13 +62,17 @@ def _spoof_cif_from_dictionary(item: dict, temp_dir: PathLike) -> Path:
|
||||
msa_paths_by_chain_id[chain_id] = msa_path
|
||||
|
||||
extra_categories = {}
|
||||
if item.get("template_selection") or item.get("ground_truth_conformer_selection"):
|
||||
extra_categories["templating"] = {
|
||||
if item.get("template_selection"):
|
||||
extra_categories["template_selection"] = {
|
||||
"template_selection": item.get("template_selection"),
|
||||
}
|
||||
if item.get("ground_truth_conformer_selection"):
|
||||
extra_categories["ground_truth_conformer_selection"] = {
|
||||
"ground_truth_conformer_selection": item.get(
|
||||
"ground_truth_conformer_selection"
|
||||
),
|
||||
}
|
||||
|
||||
if msa_paths_by_chain_id:
|
||||
extra_categories["msa_paths_by_chain_id"] = msa_paths_by_chain_id
|
||||
|
||||
|
||||
Reference in New Issue
Block a user