mirror of
https://github.com/RosettaCommons/foundry.git
synced 2026-06-04 13:24:22 +08:00
Added small molecule example
Minor file rearranging also occurred.
This commit is contained in:
BIN
models/rfd3/docs/.assets/binder_tutorial/pdf1_bb1_hotspots.png
Normal file
BIN
models/rfd3/docs/.assets/binder_tutorial/pdf1_bb1_hotspots.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 548 KiB |
BIN
models/rfd3/docs/.assets/pdf1_hotspots.png
Normal file
BIN
models/rfd3/docs/.assets/pdf1_hotspots.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.1 MiB |
@@ -1,33 +0,0 @@
|
||||
# Designing Binders for Targets with Non-Protein Molecules
|
||||
|
||||
## CD28: Cleaning the Starting Structure
|
||||
- keeping chain C and the NAG molecules around it
|
||||
- remove chains A, B, D, E, F
|
||||
- remove the metal ions
|
||||
- remove GOL
|
||||
- remove IMD
|
||||
- remove unresolved residues
|
||||
- renaming so that the main structure is chain B and all the NAG molecules come after it in their own chains, each numbered 1:
|
||||
- select chain C
|
||||
- deselect any NAG molecules that come with it
|
||||
- `alter sele, chain='B'` and `alter sele, segi='B'`
|
||||
- change AA/C/204 to chain D residue 1: `alter sele, chain='D'` ` alter sele, resi='1'`, `a;ter sele, segi='D'`
|
||||
- repeat for BA/C/205 to be on chain E,
|
||||
- The G NAG 1 is actually already fine
|
||||
- The second G NAG is actually still that way in the PDB that they uploaded?? Oh it's because they're bonded together
|
||||
- The X/C/NAG201 goes to chain F
|
||||
- The Y/C/NAG202 goes to chain C
|
||||
- sort
|
||||
- No renumbering of chain B was necessary
|
||||
- The gap in this chain (between residues 18 and 20) was replaced with asparagine
|
||||
|
||||
## PDF1
|
||||
- fetch 8s1x
|
||||
- remove everything that isn't the protein structure and the ligand (BB2)
|
||||
- `remove resn K`
|
||||
- `remove resn ZN`
|
||||
- `remove resn PO4`
|
||||
- remove the unresolved residue in chain A (A1)
|
||||
- remove chain B
|
||||
- renumber chain A so that it starts at 1: `sele chain A`, `alter (sele),resi=str(int(resi)-1)`
|
||||
- relabel the ligand so it's chain C
|
||||
@@ -7,27 +7,21 @@
|
||||
- [Input and Output Files](#binder_design_io)
|
||||
- [Prerequisites](#binder_design_prereqs)
|
||||
- [Cleaning the Input Structures](#binder_design_clean_pdbs)
|
||||
- [PLD1: Cropping structures and selecting hotspots](#binder_design_pdb_hotspot)
|
||||
- [Step 1.1: target PDB formatting with small molecules and PTMs](#binder_design_pdb_smolecule)
|
||||
- [Step 2: setting up configuration files](#binder_design_configs)
|
||||
- [Step 2.1: protein-only target (PDL1)](#binder_design_protein_target)
|
||||
- [Step 2.2: protein \+ glycan target (CD28)](#binder_design_glycan_target)
|
||||
- [Step 2.3: protein \+ small molecule target (PDF1)](#binder_design_smolecule_target)
|
||||
- [Additional input set up options](#binder_design_addn_input)
|
||||
- [Step 3: running RFD3](#binder_design_run_rfd3)
|
||||
- [Step 4: RFD3 score metrics and next steps](#binder_design_scoring)
|
||||
- [Supplemental materials](#binder_design_si)
|
||||
- [Input configuration files](#binder_design_si_config_files)
|
||||
- [Bash running scripts](#binder_design_scripts)
|
||||
- [Step 2: Setting up the Configuration File](#binder_design_config_file)
|
||||
- [Step 3: Running RFD3](#binder_design_run_rfd3)
|
||||
- [What's Next?](#binder_design_whats_next)
|
||||
- [Additional Examples](#binder_design_addn_examples)
|
||||
- [Binder Design with Explicitly Modeled Glycans](#binder_design_glycan_example)
|
||||
- [Small Molecule Binder Design](#binder_design_smolecule_example)
|
||||
- [Glossary](#binder_design_glossary)
|
||||
- [Resources and References](#binder_design_recs)
|
||||
- [Resources and References](#binder_design_further_reading)
|
||||
|
||||
(binder_tutorial_introduction)=
|
||||
(binder_design_introduction)=
|
||||
## Introduction
|
||||
|
||||
Diffusion is a powerful tool for designing protein backbones for desired functions. RFdiffusion3 (RFD3) builds upon previous versions and introduces atom-level design–diffusing all atoms for each side-chain residue instead of only backbone residues. While (as of February 2026\) the amino acid sequences generated by RFD3 do not reach the same level of sequence recovery as MPNN (thus MPNN is still recommended as a next step to redesign sequences), RFD3 generates higher quality backbones that avoid clashes with targets by modeling side chains from the start.
|
||||
Diffusion is a powerful tool for designing protein backbones for desired functions. [RFdiffusion3 (RFD3)](https://github.com/RosettaCommons/foundry/tree/production/models/rfd3) builds upon previous versions and introduces atom-level design–diffusing all atoms for each side-chain residue instead of only backbone residues. While (as of February 2026) the amino acid sequences generated by RFD3 do not reach the same level of sequence recovery as [MPNN](https://github.com/RosettaCommons/foundry/tree/production/models/mpnn) (thus MPNN is still recommended as a next step to redesign sequences), RFD3 generates higher quality backbones that avoid clashes with targets by modeling side chains from the start.
|
||||
|
||||
In this tutorial, you will learn how to design binders using RFD3 to protein targets, protein-small molecule targets, and protein targets with post-translational modifications. Starting with a target PDB, you will be able to format the input PDB (including target cropping), assign hotspots at the atom- or residue-level, write input files with different configuration options, and finally run RFD3. From the output structures generated, you can filter based on RFD3 metrics, then move on to sequence redesign with [MPNN](https://github.com/RosettaCommons/foundry/tree/production/models/mpnn) and structure prediction using tools such as [RosettaFold3](https://github.com/RosettaCommons/foundry/tree/production/models/rf3). You may also follow along our companion video tutorial (<!-- TODO link video tutorial -->).
|
||||
In this tutorial, you will learn how to design binders using RFD3 to protein targets, protein-small molecule targets, and protein targets with post-translational modifications. Starting with a target PDB, you will be able to format the input PDB (including target cropping), assign [hotspots](#binder_design_hotspots_def) at the atom- or residue-level, write input files with different configuration options, and finally run RFD3. From the output structures generated, you can filter based on RFD3 metrics, then move on to sequence redesign with [MPNN](https://github.com/RosettaCommons/foundry/tree/production/models/mpnn) and structure prediction using tools such as [RosettaFold3](https://github.com/RosettaCommons/foundry/tree/production/models/rf3). You may also follow along with our companion video tutorial (<!-- TODO link video tutorial -->).
|
||||
|
||||
---
|
||||
|
||||
@@ -41,7 +35,7 @@ You will need to clone the repository to access the tutorial files. Using the `p
|
||||
|
||||
Make sure you have activated any environment(s) you used to install RFD3.
|
||||
|
||||
RFD3 runs best on GPUs. It is suggested to follow this tutorial on an interactive GPU node if you have access to one.
|
||||
RFD3 runs best on GPUs. We suggest to follow this tutorial on an interactive GPU node if you have access to one.
|
||||
|
||||
---
|
||||
|
||||
@@ -55,7 +49,7 @@ The tutorial will follow this general outline:
|
||||
2. Creating a configuration file (JSON or YAML) to guide the diffusion process
|
||||
3. Choosing command line arguments to run RFD3
|
||||
|
||||
The all input files and a handful of example files for the three examples in this tutorial can be found [here](./binder_design_tutorial_files).
|
||||
All input files and a handful of example output files for the three examples in this tutorial can be found [here](https://github.com/RosettaCommons/foundry/tree/production/models/rfd3/docs/tutorials/binder_design_tutorial_files).
|
||||
|
||||
(binder_design_prereqs)=
|
||||
## Prerequisites
|
||||
@@ -88,14 +82,14 @@ This structure has non-protein elements that we do not need for our design proce
|
||||
remove not (bb. or sc.)
|
||||
```
|
||||
|
||||
Before also removing VHH1 from our structure, let's take a look at the residues we will use as hotspots for our design. In this case, these are the residues that will directly interact with the designed binder.
|
||||
Before also removing VHH1 from our structure, let's take a look at the residues we will use as [hotspots](#binder_design_hotspots_def) for our design. In this case, these are the residues that will directly interact with the designed binder.
|
||||
|
||||
Since our starting structure shows an example of what our protein of interest (PDL1) binds to (VHH1) we can use it to determine what our hotspots should be. Here we have chosen residues A54 (isoleucine), A56 (tyrosine), A68 (valine), A69 (histidine), A115 (methionine), and A117(serine) because they make the most contacts with VHH1.
|
||||
Since our starting structure shows an example of what our protein of interest (PDL1) binds to (VHH1) we can use it to determine what our [hotspots](#binder_design_hotspots_def) should be. Here we have chosen residues A54 (isoleucine), A56 (tyrosine), A68 (valine), A69 (histidine), A115 (methionine), and A117(serine) because they make the most contacts with VHH1.
|
||||
|
||||
```{figure} ../.assets/binder_tutorial/8aom_choosing_hotspots.png
|
||||
:width: 100%
|
||||
|
||||
Interface between PDL1 and VHH1, with chosen target hotspot residues in PDL1 colored in dark pink.
|
||||
Interface between PDL1 and VHH1, with chosen target [hotspots](#binder_design_hotspots_def) residues in PDL1 colored in dark pink.
|
||||
```
|
||||
|
||||
Now we will remove VHH1 from our structure since we are trying to design a different binder in its place. There are several ways to accomplish this, here is how to do it via the PyMOL command prompt:
|
||||
@@ -105,18 +99,18 @@ remove chain V
|
||||
```
|
||||
|
||||
|
||||
```{figure} ../.assets/8aom_pld1_only.png
|
||||
PDL1 structure after the removal of VHH1. Hotspot residues are highlighted in dark pink.
|
||||
```{figure} ../.assets/binder_tutorial/8aom_pld1_only.png
|
||||
PDL1 structure after the removal of VHH1. [Hotspot](#binder_design_hotspots_def) residues are highlighted in dark pink.
|
||||
```
|
||||
|
||||
Target cropping is highly encouraged so as to lower the memory used when running RFD3. This can be very target dependent, but the overall goal is to remove as many residues as possible while keeping target hotspots and overall epitope intact, and without removing parts of the structure that may introduce clashes with the designed binders later on. If any residues were unresolved in the crystal structure (grayed out in the Pymol sequence), also remove them. For PDL1, residues A132-236 were removed via:
|
||||
Target cropping is highly encouraged so as to lower the memory used when running RFD3. This can be very target dependent, but the overall goal is to remove as many residues as possible while keeping target [hotspots](#binder_design_hotspots_def) and overall epitope intact, and without removing parts of the structure that may introduce clashes with the designed binders later on. If any residues were unresolved in the crystal structure (grayed out in the Pymol sequence), also remove them. For PDL1, residues A132-236 were removed via:
|
||||
|
||||
```
|
||||
sele to_delete, chain A and resi 132-N
|
||||
remove to_delete
|
||||
```
|
||||
|
||||
```{figure} ../.assets/8aom_crop_selection.png
|
||||
```{figure} ../.assets/binder_tutorial/8aom_crop_selection.png
|
||||
:width: 100%
|
||||
|
||||
Section of PLD1 that will be removed is surrounded by a red box.
|
||||
@@ -133,7 +127,7 @@ select all
|
||||
alter (sele),resi=str(int(resi)-17)
|
||||
```
|
||||
|
||||
```{figure} ../.assets/8aom_cleaned_structure.png
|
||||
```{figure} ../.assets/binder_tutorial/8aom_cleaned_structure.png
|
||||
:width: 100%
|
||||
|
||||
Final cropped structure of PDL1.
|
||||
@@ -146,9 +140,9 @@ alter (sele), chain='B'
|
||||
alter (sele), segi='B'
|
||||
```
|
||||
|
||||
Save your cropped PDB file using the command `save /your/path/pld1_cropped.pdb`, bud don't close your PyMOL session. Make sure to note the new positions of your hotspots after renumbering, these will be necessary to set up our RFD3 calculation. In this example, our hotspots are now B37, B39, B51, B52, B98, and B100.
|
||||
Save your cropped PDB file using the command `save /your/path/pld1_cropped.pdb`, but don't close your PyMOL session. Note the new positions of your [hotspots](#binder_design_hotspots_def) after renumbering, these will be necessary to set up our RFD3 calculation. In this example, our [hotspots](#binder_design_hotspots_def) are now B37, B39, B51, B52, B98, and B100.
|
||||
|
||||
In this tutorial, we will be specifying the specific atoms we want to use in our hotspot residues. You can view the atom labels in PyMOL as shown below:
|
||||
In this tutorial, we will be specifying the specific atoms we want to use in our [hotspot](#binder_design_hotspots_def) residues. You can view the atom labels in PyMOL as shown below:
|
||||
|
||||
```{figure} ../.assets/binder_tutorial/pld1_hotspot_atom_label.png
|
||||
:width: 100%
|
||||
@@ -156,11 +150,12 @@ In this tutorial, we will be specifying the specific atoms we want to use in our
|
||||
Residue B37 (green) with atom labels.
|
||||
```
|
||||
|
||||
## Step 2: Setting up the configuration file
|
||||
(binder_design_config_file)=
|
||||
## Step 2: Setting up the Configuration File
|
||||
|
||||
RFD3 takes both YAML and JSON file formats as inputs. They are interchangeable and the information contained within them is the same, only with formatting differences. In the [provided tutorial files](./binder_design_tutorial_files), examples are given for both formats. In the tutorial text we will be using the YAML syntax, as it allows for comments while the JSON format does not.
|
||||
RFD3 takes both YAML and JSON file formats as inputs. They are interchangeable and the information contained within them is the same, only with formatting differences. In the [provided tutorial files](https://github.com/RosettaCommons/foundry/tree/production/models/rfd3/docs/tutorials/binder_design_tutorial_files), examples are given for both formats. In the tutorial text we will be using the YAML syntax, as it allows for comments while the JSON format does not.
|
||||
|
||||
The configuration file houses the settings we can use to direct the diffusion process including options like how long we want our designed binder to be to which residues in from our input we want our binder to form hydrogen bonds with. We will discuss these options and more in as we create the YAML file.
|
||||
The configuration file houses the settings we can use to direct the diffusion process including how long we want our designed binder to be, which residues in from our input structure we want our binder to form hydrogen bonds with, etc. We will discuss these options and more as we create the YAML file.
|
||||
|
||||
---
|
||||
|
||||
@@ -194,9 +189,9 @@ Following the binder chain A, target chain B convention, the `contig` will be ma
|
||||
```
|
||||
This says that we want our designed binder to be between 55 and 88 residues long followed by a chain break followed by residues B1-114 of our input structure.
|
||||
|
||||
The `select_hotspots` flag is where you will include the hotspot residue/atom information you obtained in the first step of the tutorial. These can be set at the atom level, but there are various other options that can be used here that are described in the [InputSelection Mini-Language guide](../input.md#the-inputselection-mini-language).
|
||||
The `select_hotspots` flag is where you will include the [hotspot](#binder_design_hotspots_def) residue/atom information you obtained in the first step of the tutorial. These can be set at the atom level, but there are various other options that can be used here that are described in the [InputSelection Mini-Language guide](../input.md#the-inputselection-mini-language).
|
||||
|
||||
For the PDL1 example, the atom level hotspots can be set as below:
|
||||
For the PDL1 example, the atom level [hotspots](#binder_design_hotspots_def) can be set as below:
|
||||
|
||||
```
|
||||
select_hotspots:
|
||||
@@ -208,7 +203,7 @@ For the PDL1 example, the atom level hotspots can be set as below:
|
||||
"B100": "OG,CB"}
|
||||
```
|
||||
|
||||
The `select_hbond_donor` and `select_hbond_acceptor` options are used to condition RFD3 to design binders that make hydrogen bond interactions with specified atoms. For residues in your target that are good hydrogen bond donors, use `select\_hbond\_donor`; for good acceptors, use `select\_hbond\_acceptor`. It is common to also include the same residues as hotspots, to increase contact between the binder and those residues.
|
||||
The `select_hbond_donor` and `select_hbond_acceptor` options are used to condition RFD3 to design binders that make hydrogen bond interactions with specified atoms. For residues in your target that are good hydrogen bond donors, use `select_hbond_donor`; for good acceptors, use `select_hbond_acceptor`. It is common to also include the same residues as [hotspots](#binder_design_hotspots_def), to increase contact between the binder and those residues.
|
||||
|
||||
The way these atoms are specified is similar to how they were specified for the `select_hotspots` option. However, in practice it is often best to select atoms within the residue that would actually be a part of the hydrogen bond interaction (instead of specifying `TIP`, for example.)
|
||||
|
||||
@@ -218,17 +213,17 @@ The way these atoms are specified is similar to how they were specified for the
|
||||
"B52": "ND1,NE2"}
|
||||
```
|
||||
|
||||
In RFD3, you can specify the point at which the center of mass of your designed protein should be located. For design tasks with 'hotspots', it is typical to use the 'hotspots' to determine this point:
|
||||
In RFD3, you can specify the point at which the center of mass of your designed protein should be located. This position is referred to as an ['ORI token'](#binder_design_ori_def). For design tasks with [hotspots](#binder_design_hotspots_def), it is typical to use the [hotspots](#binder_design_hotspots_def) to determine this point:
|
||||
```yaml
|
||||
infer_ori_strategy: hotspots
|
||||
```
|
||||
|
||||
To be extra certain that RFD3 will not change the identity of any of the residues in the motif - what RFD3 takes from the provided input structure - let's add `redesign_motif_sidechains` to our YAML file:
|
||||
To be extra certain that RFD3 will not change the identity of any of the residues in the [motif](#binder_design_motif_def) - what RFD3 takes from the provided input structure - let's add `redesign_motif_sidechains` to our YAML file:
|
||||
```yaml
|
||||
redesign_motif_sidechains: False
|
||||
```
|
||||
|
||||
Last, but not least, we want our designs to have fewer loopy regions and more defined secondary structure motifs. We can push RFD3 to do this via:
|
||||
Last, but not least, we want our designs to have fewer loopy regions and more defined secondary structure [motifs](#binder_design_motif_def). We can push RFD3 to do this via:
|
||||
|
||||
```yaml
|
||||
is_non_loopy: True
|
||||
@@ -236,8 +231,7 @@ Last, but not least, we want our designs to have fewer loopy regions and more de
|
||||
|
||||
There are many other options that you can use to further specify the designs you want to create. Some of these are described in the two [additional examples](#binder_design_addn_examples), but even more are described [here](#../input.md#inputspecification-fields). We encourage you to explore these options for your own design projects.
|
||||
|
||||
|
||||
|
||||
(binder_design_run_rfd3)=
|
||||
## Step 3: Running RFD3
|
||||
|
||||
Now that we have our cropped PDB file and our input options specified in a YAML file, we can run RFD3 to generate binder designs. There are many command line arguments that you can use to control how RFD3 runs, which are described [here](../input.md#cli-arguments). However, we will focus only on the options that are more frequently used for binder design in this tutorial.
|
||||
@@ -257,24 +251,24 @@ diffusion_batch_size=8 \
|
||||
dump_trajectories=True
|
||||
```
|
||||
|
||||
You can either run this from the command line prompt in an interactive session on a GPU node or submit the job to the computing resources you have access to. For an example runscript for submitting these jobs, see <!-- TODO: add link to runscript example -->. Note that the options shown in this runscript might not match the options you have access to. See the documentation for the cluster you have access to for more examples.
|
||||
You can either run this from the command line prompt in an interactive session on a GPU node or submit the job to the computing resources you have access to. For an example runscript for submitting these jobs, see the [PDL1 runscript example](./binder_design_tutorial_files/PDL1/pdl1.sh). Note that the options shown in this runscript might not match the options you have access to.
|
||||
|
||||
Let's break this down:
|
||||
- `rfd3 design`: This is the main command that actually runs RFD3
|
||||
- `out_dir`: This is a **required** argument that specifies the relative path to where you want your outputs stored. If the directory does not already exist, it RFD3 will create it. In this example our outputs will be saved in a directory called `pdl1_binder_outputs` that will be created in your current working directory.
|
||||
- `out_dir`: This is a **required** argument that specifies the relative path to where you want your outputs stored. If the directory does not already exist, RFD3 will create it. In this example our outputs will be saved in a directory called `pdl1_binder_outputs` that will be created in your current working directory.
|
||||
- `inputs`: This is the relative path and file name for your input YAML or JSON file. The command above assumes that the YAML file we created for the PDL1 example is in your current working directory.
|
||||
- `n_batches`: RFD3 will run your designs in batches. The higher the number of batches, the more diversity your designs will have. *Note that all designs in a single batch will have the same length*.
|
||||
- `diffusion_batch_size`: The number of designs in each batch. Larger batch sizes are more efficient, but your results will be less diverse than generating the same total number of designs with smaller batches.
|
||||
- `dump_trajectories`: If `True`, then the trajectories created during the RFD3 design process are saved. These are not necessary for the assessment of your your designs, but can be useful for visualization purposes. In general, we recommend leaving this set to the default value of `False` because the trajectory file sizes can be large.
|
||||
- `dump_trajectories`: If `True`, then the trajectories created during the RFD3 design process are saved. These are not necessary for the assessment of your designs, but can be useful for visualization purposes. In general, we recommend leaving this set to the default value of `False` because the trajectory file sizes can be large.
|
||||
|
||||
After RFD3 runs, 4 types of files should be generated:
|
||||
(In the list below 'n' can be 0 through 7, there should be one of each file for each RFD3 designed binder.)
|
||||
- pdl1_test_0_denoised_model_n.cif.gz: A trajectory of the diffusion process for just the designed portion of the model (the binder). If you play through the frames, it will start with the final structure and end with a set of fully diffused atoms.
|
||||
- pdl1_test_0_noised_model_n.cif.gz:A trajectory of the diffusion process for the full structure. If you play through the frames, it will start with the final structure and end with a set of fully diffused atoms.
|
||||
- pdl1_test_0_noised_model_n.cif.gz: A trajectory of the diffusion process for the full structure. If you play through the frames, it will start with the final structure and end with a set of fully diffused atoms.
|
||||
- pdl1_test_0_model_n.cif.gz: This file contains the final binder structure generated by RFD3 along with any portions of the input structure that were specified by the configuration file.
|
||||
- pdl1_test_0_model_n.json: A JSON file with information about the design including metrics and a map between the input structure and the output structure, when applicable.
|
||||
|
||||
|
||||
(binder_design_whats_next)=
|
||||
## What's Next?
|
||||
We recommend that you visually inspect your designs along with taking a look at the metrics in the generated JSON file to filter your designs before moving on to the next step of your design pipeline, such as sequence design with [MPNN](https://github.com/RosettaCommons/foundry/tree/production/models/mpnn).
|
||||
|
||||
@@ -284,7 +278,7 @@ Keep in mind that while we only generate 8 designs in this tutorial, for real de
|
||||
## Additional Examples
|
||||
The additional examples explored in the next few sections follow the same process discussed in the tutorial, with minor exceptions and changes. In the text for these examples we will mostly highlight how these examples differ from the PDL1 example discussed in the main body of the tutorial and why.
|
||||
|
||||
The input files for these examples along with example output files can be found [here](./binder_design_tutorial_files).
|
||||
The input files for these examples along with example output files can be found [here](https://github.com/RosettaCommons/foundry/tree/production/models/rfd3/docs/tutorials/binder_design_tutorial_files).
|
||||
|
||||
(binder_design_glycan_example)=
|
||||
### Binder Design with Explicitly Modeled Glycans
|
||||
@@ -293,13 +287,13 @@ For this example we will start with the [8S6Z](https://www.rcsb.org/structure/8S
|
||||
#### Step 1: Cleaning the Input Structure
|
||||
You can follow the same procedure as described in [Step 1](#binder_design_clean_pdbs) of the tutorial with a few changes:
|
||||
1. Load the 8S6Z structure into PyMOL using `fetch 8S6Z`
|
||||
1. We want to keep the explicitly modeled glycans, which would be removed if we ran `remove not (bb. or sc.)` instead run:
|
||||
1. We want to keep the explicitly modeled glycans, which would be removed if we ran `remove not (bb. or sc.)`. Instead run:
|
||||
```bash
|
||||
remove solvent
|
||||
remove resn ZN
|
||||
```
|
||||
These will remove any solvent molecules and the zinc ions, in that order.
|
||||
1. The 8S6Z structure has two copies of CD28, chains C and F. Either can be used for this example, but to match the [provided tutorial files](./binder_design_tutorial_files), you will need to keep only chain C. You can remove the remaining chains via
|
||||
1. The 8S6Z structure has two copies of CD28, chains C and F. Either can be used for this example, but to match the [provided tutorial files](https://github.com/RosettaCommons/foundry/tree/production/models/rfd3/docs/tutorials/binder_design_tutorial_files), you will need to keep only chain C. You can remove the remaining chains via
|
||||
```bash
|
||||
remove chain A
|
||||
remove chain B
|
||||
@@ -329,7 +323,7 @@ You can follow the same procedure as described in [Step 1](#binder_design_clean_
|
||||
|
||||
Your final structure should match the one in the provided tutorial files.
|
||||
|
||||
```{figure} ../.assets/cd28_cleaned.png
|
||||
```{figure} ../.assets/binder_tutorial/cd28_cleaned.png
|
||||
:width: 100%
|
||||
|
||||
Final cleaned CD28 structure with glycan molecules.
|
||||
@@ -362,17 +356,17 @@ test:
|
||||
These settings specify that we want to design a binder that is 55-88 residues long around our input structure. Notice that the glycans were included in our 'contig string' to ensure that they were visible to RFD3. RFD3 knows to avoid clashing with these molecules when creating binder designs.
|
||||
|
||||
|
||||
Atoms on three residues (B31, B100, B104) have been chosen to act as hydrogen bond donors to residues on the designed binder. The hotspots were once again chosen by observing which residues had side chains that seemed to interact with the antibody fragment:
|
||||
Atoms on three residues (B31, B100, B104) have been chosen to act as hydrogen bond donors to residues on the designed binder. The [hotspots](#binder_design_hotspots_def) were once again chosen by observing which residues had side chains that seemed to interact with the antibody fragment:
|
||||
|
||||
```{figure} ../.assets/cd28_hotspots.png
|
||||
```{figure} ../.assets/binder_tutorial/cd28_hotspots.png
|
||||
:width: 100%
|
||||
|
||||
Interface between CD28 (blue) and the antibody fragment (pink). The chosen hotspots are in a darker teal color and are shown as sticks. The antibody residues close to CD28 are rendered as sticks as well to better see the interactions.
|
||||
Interface between CD28 (blue) and the antibody fragment (pink). The chosen [hotspots](#binder_design_hotspots_def) are in a darker teal color and are shown as sticks. The antibody residues close to CD28 are rendered as sticks as well to better see the interactions.
|
||||
```
|
||||
|
||||
|
||||
```{note}
|
||||
Fore more details on these settings, see the main text of the tutorial. In this section we will only be discussing details specific to this example.
|
||||
Fore more details on these settings, see the main text of the tutorial.
|
||||
```
|
||||
|
||||
#### Step 3: Running RFD3
|
||||
@@ -388,71 +382,104 @@ dump_trajectories=True
|
||||
```
|
||||
|
||||
(binder_design_smolecule_example)=
|
||||
### Small Molecule Binder Design with PDF1
|
||||
[8S1X](https://www.rcsb.org/structure/8S1X): Crystal structure of actinonin-bound PDF1 and a designed binder
|
||||
### Small Molecule Binder Design
|
||||
For this example we will start with the [8S1X](https://www.rcsb.org/structure/8S1X) structure which is composed of PDF1 bound to actinonin ([BB2](https://www.rcsb.org/ligand/BB2)) with a designed protein binder (DBAct553). This structure will allow us to explore binder design with RFD3 in the presence of small molecules.
|
||||
|
||||
Step 1:
|
||||
![][image8]
|
||||
Figure 8: Crystal structure of CD28 (salmon, chain B) with glycans (purple, chains C-G) (PDB 8S6Z)
|
||||
(binder_design_smolecule_step1)=
|
||||
#### Step 1: Cleaning the Input Structure
|
||||
You can follow the same procedure as described in [Step 1](#binder_design_clean_pdbs) of the tutorial with a few changes:
|
||||
1. Fetch the starting structure (`fetch 8S1X`)
|
||||
1. Remove any pieces of the structure that are not proteins or the actinonin ligand:
|
||||
```bash
|
||||
remove solvent
|
||||
remove resn PO4
|
||||
remove resn FMT
|
||||
remove resn ZN
|
||||
remove resn K
|
||||
```
|
||||
1. Remove any unresolved residues
|
||||
1. Before removing the designed binder (chain B) take a look at the residues at the interface between the designed binder and PDF1, this will help us determine our [hotspots](#binder_design_hotspots_def) for this example.
|
||||
```{figure} ../.assets/pdf1_hotspots.png
|
||||
|
||||
For the protein-small molecule target, we chose the structure of PDF1 bound to actinonin (8S1X). The same formatting steps were followed, where the protein chan was set as chain B, but the small molecule was set as chain C:
|
||||
DBAct553 (the designed binder) is shown in pink, PDF1 is shown in green, and actinonin (BB2) is shown in blue. Residues near the interface are shown as sticks. The chosen hotspot residues are shown in orange.
|
||||
```
|
||||
The [hotspots](#binder_design_hotspots_def) were chosen based on which residues at the interface appear to be interacting with DBAct553.
|
||||
1. Remove the designed binder (`remove chain B`)
|
||||
1. Relabel the remaining portions of the structure so that the protein is in chain B with residue numbering starting at 1 and the small molecule is the first residue of chain C:
|
||||
```bash
|
||||
# select chain A
|
||||
alter sele, chain='B'
|
||||
alter sele, segi='B'
|
||||
alter sele, resi=str(int(resi)-1)
|
||||
|
||||
![][image9]
|
||||
Figure 9: Crystal structure of PDF1 (pink, chain B) in compex with small molecule actinonin (purple, chain C) (PDB 8S1X)
|
||||
# select the small molecule
|
||||
alter sele, chain='C'
|
||||
alter sele, segi='C'
|
||||
alter sele, resi='1'
|
||||
```
|
||||
1. Save your cleaned PDB, in the provided tutorial files this structure is saved as [`pdf1_sm.pdb`](https://github.com/RosettaCommons/foundry/tree/production/models/rfd3/docs/tutorials/binder_design_tutorial_files).
|
||||
|
||||
Step 2:
|
||||
|
||||
| For the PDF1 target with a small molecule, the “contig” will be the following, where the small molecule is referenced as chain C: |
|
||||
| :---- |
|
||||
|
||||
```
|
||||
contig: 55-88,/0,B1-167,C1-1
|
||||
```{note}
|
||||
We did not crop the structure in this example because it was already small enough to use for our design tasks.
|
||||
```
|
||||
|
||||
| Since the target includes multiple chains, add up all residues of all target chains for the length flag: |
|
||||
| :---- |
|
||||
|
||||
```
|
||||
length: 222-255 #(55+167+1)-(88+167+1)
|
||||
#### Step 2: Setting up the configuration file
|
||||
Here are the settings we will use for the design of the binder for PDF1:
|
||||
```yaml
|
||||
test:
|
||||
input: ./pdf1_sm.pdb
|
||||
contig: 55-88,/0,B1-167,C1-1
|
||||
redesign_motif_sidechains: False
|
||||
infer_ori_strategy: hotspots
|
||||
is_non_loopy: True
|
||||
select_hotspots: {
|
||||
"B64": "OE1,OE2,CD,CB,CG",
|
||||
"B126": "CB,CD1,CD2,CG",
|
||||
"B87": "CD1,CD2,CE1,CE2,CB,CZ,OH",
|
||||
"B98": "CD1,CD2,CE1,CE2,CB,CZ,OH",
|
||||
"B88": "CB,CG,OE1,NE2",
|
||||
"B42": "CB,CD,CG,N",
|
||||
"B44": "CB,CD1,CG1,CG2",
|
||||
"C1": "C15,C16,C17,C18,O20,N21,C22,C23,C24,C25,O27"}
|
||||
select_hbond_donor: {
|
||||
"B87": "OH",
|
||||
"B98": "OH",
|
||||
"B64": "OE1,OE2",
|
||||
"B42": "N",
|
||||
"C1": "O20,N21,O27"}
|
||||
```
|
||||
|
||||
| For the PDF1 \+ small molecule target example, the small molecule can be set as a hotspot by specifying atoms in chain C: |
|
||||
| :---- |
|
||||
These settings specify that we want to design a binder that is 55-88 residues long around our input structure. Notice that we've included both the protein and the small molecule in our 'contig string'. This allows RFD3 to design binders that avoid clashes and interact with the small molecule.
|
||||
|
||||
```
|
||||
select_hotspots:
|
||||
{
|
||||
"B64": "OE1,OE2,CD,CB,CG",
|
||||
"B126": "CB,CD1,CD2,CG",
|
||||
"B87": "CD1,CD2,CE1,CE2,CB,CZ,OH",
|
||||
"B98": "CD1,CD2,CE1,CE2,CB,CZ,OH",
|
||||
"B88": "CB,CG,OE1,NE2",
|
||||
"B42": "CB,CD,CG,N",
|
||||
"B44": "CB,CD1,CG1,CG2",
|
||||
"C1": "C15,C16,C17,C18,O20,N21,C22,C23,C24,C25,O27"}
|
||||
The choice of residues to use as [hotspots](#binder_design_hotspots_def) was discussed in [Step 1](#binder_design_smolecule_step1). The specific atoms on these residues selected as [hotspots](#binder_design_hotspots_def) were the ones most external to the protein structure. This is also true of the [hotspot](#binder_design_hotspots_def) atoms chosen on the small molecule, the atoms chosen are the least buried in the protein structure:
|
||||
|
||||
```{figure} ../.assets/binder_tutorial/pdf1_bb1_hotspots.png
|
||||
:width: 100%
|
||||
|
||||
The actinonin atoms selected as [hotspots](#binder_design_hotspots_def) are highlighted in pink. Note the atoms selected are not buried in the protein structure.
|
||||
```
|
||||
|
||||
| The same can be done for hydrogen bond conditioning by including atoms in chain C: |
|
||||
| :---- |
|
||||
Atoms on four residues (B42, B64, B87, B98) along with three polar atoms on the small molecule have been chosen to act as hydrogen bond donors to residues on the designed binder.
|
||||
|
||||
```{note}
|
||||
Fore more details on these settings, see the main text of the tutorial.
|
||||
```
|
||||
select_hbond_donor:
|
||||
{"B87": "OH",
|
||||
"B98": "OH",
|
||||
"B64": "OE1,OE2",
|
||||
"B42": "N",
|
||||
"C1": "O20,N21,O27"}
|
||||
```
|
||||
|
||||
## Glossary {#glossary}
|
||||
(binder_design_hotspots)=
|
||||
(binder_design_glossary)=
|
||||
## Glossary
|
||||
(binder_design_hotspots_def)=
|
||||
### Hotspot
|
||||
A hotspot is a residue that is used to anchor the designed protions of your structure to any input peptides. <!-- TODO: check this definition -->
|
||||
### ORI token <!-- TODO: provide definition -->
|
||||
### Motif <!-- TODO: provide definition -->
|
||||
A hotspot is a residue that is part of the interface between the given and designed portions of the final structure.
|
||||
|
||||
## Resources and References {#resources-and-references}
|
||||
(binder_design_motif_def)=
|
||||
### Motif
|
||||
A 'motif' in the context of protein design is
|
||||
|
||||
(binder_design_ori_def)=
|
||||
### ORI token
|
||||
An ORI token, short for 'origin' token, is the location of where you want the center of mass of the designed structure of your protein to be.
|
||||
|
||||
(binder_design_further_reading)=
|
||||
## Resources and References
|
||||
|
||||
RFD3 preprint: [https://www.biorxiv.org/content/10.1101/2025.09.18.676967v2](https://www.biorxiv.org/content/10.1101/2025.09.18.676967v2)
|
||||
RFD3 Foundry documentation: [https://github.com/RosettaCommons/foundry/blob/production/models/rfd3/README.md](https://github.com/RosettaCommons/foundry/blob/production/models/rfd3/README.md)
|
||||
|
||||
8618
models/rfd3/docs/tutorials/binder_design_tutorial_files/8s1x.cif
Normal file
8618
models/rfd3/docs/tutorials/binder_design_tutorial_files/8s1x.cif
Normal file
File diff suppressed because it is too large
Load Diff
22496
models/rfd3/docs/tutorials/binder_design_tutorial_files/8s6z.cif
Normal file
22496
models/rfd3/docs/tutorials/binder_design_tutorial_files/8s6z.cif
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"test": {
|
||||
"input": "./pdf1_sm.pdb",
|
||||
"contig": "55-88,/0,B1-167,C1-1",
|
||||
"length": "222-255",
|
||||
"redesign_motif_sidechains": false,
|
||||
"infer_ori_strategy": "hotspots",
|
||||
"is_non_loopy": true,
|
||||
"select_hotspots":
|
||||
{"B64": "OE1,OE2,CD,CB,CG",
|
||||
"B126": "CB,CD1,CD2,CG",
|
||||
"B87": "CD1,CD2,CE1,CE2,CB,CZ,OH",
|
||||
"B98": "CD1,CD2,CE1,CE2,CB,CZ,OH",
|
||||
"B88": "CB,CG,OE1,NE2",
|
||||
"B42": "CB,CD,CG,N",
|
||||
"B44": "CB,CD1,CG1,CG2",
|
||||
"C1": "C15,C16,C17,C18,O20,N21,C22,C23,C24,C25,O27"},
|
||||
"select_hbond_donor":
|
||||
{"B87": "OH",
|
||||
"B98": "OH",
|
||||
"B64": "OE1,OE2",
|
||||
"B42": "N",
|
||||
"C1": "O20,N21,O27"}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --partition=h100
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=1
|
||||
#SBATCH --cpus-per-task=6
|
||||
#SBATCH --gres=gpu:1
|
||||
#SBATCH --mem 32gb
|
||||
#SBATCH --time 00:59:00
|
||||
#SBATCH --job-name="rfd3"
|
||||
|
||||
source ~/.bashrc
|
||||
conda activate rc-foundry
|
||||
|
||||
# Set variables
|
||||
INFILE="./pdf1.yaml" #or .json
|
||||
OUTDIR="./diffusion_outs"
|
||||
|
||||
# Run RFdiffusion3
|
||||
rfd3 design \
|
||||
out_dir="$OUTDIR" \
|
||||
inputs="$INFILE" \
|
||||
n_batches=1 \
|
||||
diffusion_batch_size=1 \
|
||||
dump_trajectories=1 # OPTIONAL, FOR VISUALIZATION PURPOSES
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
test:
|
||||
input: ./pdf1_sm.pdb
|
||||
contig: 55-88,/0,B1-167,C1-1
|
||||
redesign_motif_sidechains: false
|
||||
#infer_ori_strategy: hotspots
|
||||
is_non_loopy: true
|
||||
select_hotspots:
|
||||
{
|
||||
"B64": "OE1,OE2,CD,CB,CG",
|
||||
"B126": "CB,CD1,CD2,CG",
|
||||
"B87": "CD1,CD2,CE1,CE2,CB,CZ,OH",
|
||||
"B98": "CD1,CD2,CE1,CE2,CB,CZ,OH",
|
||||
"B88": "CB,CG,OE1,NE2",
|
||||
"B42": "CB,CD,CG,N",
|
||||
"B44": "CB,CD1,CG1,CG2",
|
||||
"C1": "C15,C16,C17,C18,O20,N21,C22,C23,C24,C25,O27"}
|
||||
select_hbond_donor:
|
||||
{"B87": "OH",
|
||||
"B98": "OH",
|
||||
"B64": "OE1,OE2",
|
||||
"B42": "N",
|
||||
"C1": "O20,N21,O27"}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,20 +0,0 @@
|
||||
{
|
||||
"test": {
|
||||
"input": "./pdl1_cropped.pdb",
|
||||
"contig": "55-88,/0,B1-114",
|
||||
"length": "169-202",
|
||||
"redesign_motif_sidechains": false,
|
||||
"infer_ori_strategy": "hotspots",
|
||||
"is_non_loopy": true,
|
||||
"select_hotspots":
|
||||
{"B37": "CB,CD1,CG1",
|
||||
"B39": "CD1,CD2,CE1,CE2,CG,CZ,OH",
|
||||
"B51": "CG1,CG2,CB",
|
||||
"B52": "CE1,CD2,ND1,NE2,CB,CG",
|
||||
"B98": "CB,CE,CG,SD",
|
||||
"B100": "OG,CB"},
|
||||
"select_hbond_donor":
|
||||
{"B39": "OH",
|
||||
"B52": "ND1,NE2"}
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
test:
|
||||
input: ./pdl1_cropped.pdb
|
||||
contig: 55-88,/0,B1-114
|
||||
length: 169-202 #add binder length range + target length
|
||||
redesign_motif_sidechains: false
|
||||
infer_ori_strategy: hotspots
|
||||
is_non_loopy: true
|
||||
select_hotspots:
|
||||
{"B37": "CB,CD1,CG1",
|
||||
"B39": "CD1,CD2,CE1,CE2,CG,CZ,OH",
|
||||
"B51": "CG1,CG2,CB",
|
||||
"B52": "CE1,CD2,ND1,NE2,CB,CG",
|
||||
"B98": "CB,CE,CG,SD",
|
||||
"B100": "OG,CB"}
|
||||
select_hbond_donor:
|
||||
{"B39": "OH",
|
||||
"B52": "ND1,NE2"}
|
||||
Reference in New Issue
Block a user