mirror of
https://github.com/HannesStark/boltzgen.git
synced 2026-06-04 11:54:23 +08:00
add documentation to exmaples. fix design insertions such that more than 1 insertion are now possible. fix 1 indexing in design insertion size specification
This commit is contained in:
@@ -41,9 +41,10 @@ COPY . /app
|
||||
RUN pip install --no-cache-dir -e /app
|
||||
|
||||
ARG DOWNLOAD_WEIGHTS=false
|
||||
ARG HF_TOKEN=""
|
||||
RUN mkdir -p "${HF_HOME}" && \
|
||||
if [ "${DOWNLOAD_WEIGHTS}" = "true" ]; then \
|
||||
boltzgen download all --cache "${HF_HOME}" --force_download; \
|
||||
HF_TOKEN="${HF_TOKEN}" boltzgen download --models-cache-dir "${HF_HOME}" --force-download --show-paths; \
|
||||
fi
|
||||
|
||||
ARG USERNAME=boltzgen
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
entities:
|
||||
# Specify a designed protein chain
|
||||
# random number between 120 and 140 of designed residues (inclusive)
|
||||
- protein:
|
||||
id: A
|
||||
sequence: 120..140
|
||||
# Specify a non-designed protein chain
|
||||
# fixed 20-mer, all residues fixed
|
||||
- protein:
|
||||
id: B
|
||||
sequence: GGGILPWKWPWWPWRRGGG
|
||||
|
||||
@@ -1,35 +1,50 @@
|
||||
entities:
|
||||
# Specify a designed protein chain
|
||||
- protein:
|
||||
id: B
|
||||
# random number between 40 and 80 of designed residues (inclusive)
|
||||
sequence: 40..80
|
||||
# Specification of the target which is extracted from a .cif file
|
||||
- file:
|
||||
# path to the target structure
|
||||
path: hoxd13.cif
|
||||
# Which chain and residues in the .cif file to use as target (uses only A: 1..71 here)
|
||||
include:
|
||||
- chain:
|
||||
id: A
|
||||
res_index: ..71
|
||||
# Which regions of the target should have their structure specified
|
||||
# Here we hide the entire target fragment (no coordinates specified)
|
||||
structure_groups:
|
||||
- group:
|
||||
visibility: 0
|
||||
visibility: 0
|
||||
id: "all"
|
||||
# Which regions of the target the design should or should NOT bind to
|
||||
# Here we specify that the design should bind to residues 57..71 on chain A
|
||||
binding_types:
|
||||
- chain:
|
||||
id: A
|
||||
binding: 57..71
|
||||
|
||||
|
||||
# Specify a non-designed protein chain fused to chain A
|
||||
- protein:
|
||||
id: C
|
||||
fuse: A
|
||||
# fixed 8-mer, all residues fixed, and all residues marked as binding
|
||||
sequence: AAAAAAAA
|
||||
binding_types: BBBBBBBB
|
||||
|
||||
# Continue pulling from the rest of chain A of the same file and fuse it to chain A
|
||||
- file:
|
||||
path: hoxd13.cif
|
||||
# Subsequently included residues are fused onto chain A
|
||||
fuse: A
|
||||
# Include the remainder of chain A (residue 72 to the end)
|
||||
include:
|
||||
- chain:
|
||||
id: A
|
||||
res_index: 72..
|
||||
# Start with all hidden, then explicitly show structure of a tail segment (281 to the end)
|
||||
structure_groups:
|
||||
- group:
|
||||
visibility: 0
|
||||
|
||||
@@ -1,17 +1,25 @@
|
||||
entities:
|
||||
# Specify a designed protein chain
|
||||
- protein:
|
||||
id: G
|
||||
# random number between 40 and 80 of designed residues (inclusive)
|
||||
sequence: 40..80
|
||||
# Specification of the target which is extracted from a .cif file
|
||||
- file:
|
||||
path: npm1.cif
|
||||
# Which chain and residues in the .cif file to use as target (uses only chain A here)
|
||||
include:
|
||||
- chain:
|
||||
id: A
|
||||
# Which regions of the target the design should or should NOT bind to
|
||||
# Here we specify that the design should bind to residues 123..240 on chain Aand not bind to residues 1..122 on chain A
|
||||
binding_types:
|
||||
- chain:
|
||||
id: A
|
||||
binding: 123..240
|
||||
not_binding: 1..122
|
||||
# Which regions of the target should have their structure specified
|
||||
# Here we keep structure specified (visible) for two disjoint segments of chain A
|
||||
structure_groups:
|
||||
- group:
|
||||
visibility: 1
|
||||
|
||||
@@ -1,13 +1,19 @@
|
||||
entities:
|
||||
# Specify a designed protein chain
|
||||
# random number between 40 and 80 of designed residues (inclusive)
|
||||
- protein:
|
||||
id: G
|
||||
sequence: 40..80
|
||||
# Specification of the target which is extracted from a .cif file
|
||||
- file:
|
||||
path: nup98.cif
|
||||
# Which chain and residues in the .cif file to use as target (uses only A: 1..400 here)
|
||||
include:
|
||||
- chain:
|
||||
id: A
|
||||
res_index: 1..400
|
||||
# Which regions of the target should have their structure specified
|
||||
# Here we hide the entire included target fragment (no coordinates specified)
|
||||
structure_groups:
|
||||
- group:
|
||||
visibility: 0
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
entities:
|
||||
# Specification of the target which is extracted from a .cif file
|
||||
- file:
|
||||
path: 9d3d.cif
|
||||
|
||||
# Which chain and residues in the .cif file to use as target
|
||||
# Here we include chains A, B, and C (all residues on each chain)
|
||||
include:
|
||||
- chain:
|
||||
id: A
|
||||
@@ -9,13 +11,15 @@ entities:
|
||||
id: B
|
||||
- chain:
|
||||
id: C
|
||||
|
||||
# Include residues that are within a radius of a reference region
|
||||
# Here we include residues within a radius 30 of G:106..118
|
||||
include_proximity:
|
||||
- chain:
|
||||
id: G
|
||||
res_index: 106..118
|
||||
radius: 30
|
||||
|
||||
# Which regions of the target the design should or should NOT bind to
|
||||
# Here we specify that the design should bind to residues 91, 128, and 131 on chains A, B, and C
|
||||
binding_types:
|
||||
- chain:
|
||||
id: A
|
||||
@@ -27,7 +31,10 @@ entities:
|
||||
id: C
|
||||
binding: 91,128,131
|
||||
|
||||
# Specify a designed protein chain
|
||||
# random number between 8 and 18 of designed residues (inclusive)
|
||||
- protein:
|
||||
id: E
|
||||
sequence: 8..18
|
||||
# Make the designed protein chain cyclic
|
||||
cyclic: True
|
||||
|
||||
@@ -1,25 +1,34 @@
|
||||
entities:
|
||||
# Specify a designed protein chain
|
||||
- protein:
|
||||
id: B
|
||||
# 3 design residues, Cystein, 8 design residues, Cystein,
|
||||
# 6 design residues, Cystein, 5 design residues, Cystein,
|
||||
# 3 design residues, Cystein, 1 design residue, Cystein, 2 design residues
|
||||
sequence: 3C8C6C5C3C1C2
|
||||
# Make the designed peptide cyclic
|
||||
cyclic: true
|
||||
|
||||
# Specification of the target which is extracted from a .cif file
|
||||
- file:
|
||||
path: 3ivq.cif
|
||||
|
||||
# Which chain and residues in the .cif file to use as target (here includes all residues on chain A)
|
||||
include:
|
||||
- chain:
|
||||
id: A
|
||||
|
||||
# Which regions of the target should have their structure specified
|
||||
# Here we specify that all included target residues should have their structure specified
|
||||
structure_groups: "all"
|
||||
|
||||
constraints:
|
||||
# Specify covalent disulfide bonds between the designed residues on chain B
|
||||
- bond:
|
||||
atom1: [B, 4, SG]
|
||||
atom2: [B, 26, SG]
|
||||
atom1: [B, 4, SG] # connect SG of residue 4 to SG of residue 26 on chain B
|
||||
atom2: [B, 26, SG]
|
||||
- bond:
|
||||
atom1: [B, 13, SG]
|
||||
atom2: [B, 30, SG]
|
||||
atom1: [B, 13, SG] # connect SG of residue 13 to SG of residue 30 on chain B
|
||||
atom2: [B, 30, SG]
|
||||
- bond:
|
||||
atom1: [B, 20, SG]
|
||||
atom2: [B, 32, SG]
|
||||
atom1: [B, 20, SG] # connect SG of residue 20 to SG of residue 32 on chain B
|
||||
atom2: [B, 32, SG]
|
||||
@@ -1,25 +1,34 @@
|
||||
entities:
|
||||
# Specify a designed protein chain
|
||||
# 3 design residues, Cystein, 8 design residues, Cystein,
|
||||
# 6 design residues, Cystein, 5 design residues, Cystein,
|
||||
# 3 design residues, Cystein, 1 design residue, Cystein, 2 design residues
|
||||
- protein:
|
||||
id: B
|
||||
sequence: 3C8C6C5C3C1C2
|
||||
# Make the designed peptide cyclic
|
||||
cyclic: true
|
||||
|
||||
# Specification of the target which is extracted from a .cif file
|
||||
- file:
|
||||
path: 5wrd.cif
|
||||
|
||||
# Which chain and residues in the .cif file to use as target (here all residues on chain A)
|
||||
include:
|
||||
- chain:
|
||||
id: A
|
||||
|
||||
# Which regions of the target should have their structure specified
|
||||
# Here we specify that all included target residues should have their structure specified
|
||||
structure_groups: "all"
|
||||
|
||||
constraints:
|
||||
# Specify covalent difulside bonds within designed chain B
|
||||
- bond:
|
||||
atom1: [B, 4, SG]
|
||||
atom1: [B, 4, SG] # connect SG of residue 4 to SG of residue 26 on chain B
|
||||
atom2: [B, 26, SG]
|
||||
- bond:
|
||||
atom1: [B, 13, SG]
|
||||
atom1: [B, 13, SG] # connect SG of residue 13 to SG of residue 30 on chain B
|
||||
atom2: [B, 30, SG]
|
||||
- bond:
|
||||
atom1: [B, 20, SG]
|
||||
atom1: [B, 20, SG] # connect SG of residue 20 to SG of residue 32 on chain B
|
||||
atom2: [B, 32, SG]
|
||||
@@ -1,17 +1,24 @@
|
||||
entities:
|
||||
# Specify a designed protein chain
|
||||
# random number between 8 and 16 of designed residues (inclusive)
|
||||
- protein:
|
||||
id: B
|
||||
sequence: 8..16
|
||||
# Make the designed peptide cyclic
|
||||
cyclic: true
|
||||
|
||||
# Specification of the target which is extracted from a .cif file
|
||||
- file:
|
||||
path: 8jjs.cif
|
||||
|
||||
# Which chain and residues in the .cif file to use as target
|
||||
# Here we include chains A and C (all residues on each chain)
|
||||
include:
|
||||
- chain:
|
||||
id: A
|
||||
- chain:
|
||||
id: C
|
||||
|
||||
# Which regions of the target the design should or should NOT bind to
|
||||
# Here we specify discrete binding residues on chain A
|
||||
binding_types:
|
||||
- chain:
|
||||
id: A
|
||||
|
||||
@@ -1,11 +1,15 @@
|
||||
entities:
|
||||
|
||||
# Specify a designed protein chain
|
||||
# random number between 40 and 120 of designed residues (inclusive)
|
||||
- protein:
|
||||
id: G
|
||||
sequence: 40..120
|
||||
|
||||
# Specification of the target which is extracted from a .cif file
|
||||
- file:
|
||||
path: zf.cif
|
||||
include:
|
||||
# Which chain and residues in the .cif file to use as target (here includes all residues on chains C1 and B1)
|
||||
- chain:
|
||||
id: C1
|
||||
- chain:
|
||||
|
||||
@@ -1,30 +1,40 @@
|
||||
entities:
|
||||
# Specification of the target which is extracted from a .cif file
|
||||
- file:
|
||||
path: zf.cif
|
||||
include: "all"
|
||||
# Which parts of the .cif file to include as target (here includes all chains/residues)
|
||||
include: "all"
|
||||
# Which parts of the included content to exclude (here excludes residues 10, 63..69, and 185.. in chain A1)
|
||||
exclude:
|
||||
- chain:
|
||||
id: A1
|
||||
res_index: ..10,63..69,185..
|
||||
# Where to insert new designable residues into existing chains
|
||||
# Here we insert 3..8 residues after residue 63 in chain A1
|
||||
design_insertions:
|
||||
- insertion:
|
||||
id: A1
|
||||
res_index: 63
|
||||
num_residues: 3..8
|
||||
|
||||
# Which regions of the target should have their structure specified
|
||||
# Here we hide everything (no structure specified)
|
||||
structure_groups:
|
||||
- group:
|
||||
visibility: 0
|
||||
id: "all"
|
||||
|
||||
# Which residues in the target should be redesigned
|
||||
# Here we declare A1:11..184 to be redesigned
|
||||
design:
|
||||
- chain:
|
||||
id: A1
|
||||
res_index: 11..184
|
||||
# Which residues in the target are explicitly not redesignable (override entries in design)
|
||||
# Here we carve out fixed positions on chain A1 to not be redesigned
|
||||
not_design:
|
||||
- chain:
|
||||
id: A1
|
||||
res_index: 11..20,29,33,39..48,57,61,72..81,90,94,100..109,118,122,129..138,147,151,157..166,175,179
|
||||
# Reset residue numbering to be contiguous for chain A1
|
||||
reset_res_index:
|
||||
- chain:
|
||||
id: A1
|
||||
@@ -1,18 +1,24 @@
|
||||
entities:
|
||||
# Specify a designed protein chain
|
||||
# 1 design residue, Cystein, 11..16 design residues, Cystein, 1 design residue, Cystein
|
||||
- protein:
|
||||
id: B
|
||||
sequence: 1C11..16C1
|
||||
|
||||
# Specify the secondary structure of the designed peptide (here we require sheet at residues 1 and 3..11)
|
||||
secondary_structure:
|
||||
sheet: 1,3..11
|
||||
|
||||
# Specification of the target which is extracted from a .cif file
|
||||
- file:
|
||||
path: 7nre.cif
|
||||
# Which chain and residues in the .cif file to use as target (here includes residues 24..on chain A)
|
||||
include:
|
||||
- chain:
|
||||
id: A
|
||||
res_index: 24..
|
||||
|
||||
# Which regions of the target the design should or should NOT bind to
|
||||
# Here we specify that the design should bind to residues 26..31,381,408 on chain A
|
||||
binding_types:
|
||||
- chain:
|
||||
id: A
|
||||
@@ -20,7 +26,8 @@ entities:
|
||||
|
||||
|
||||
constraints:
|
||||
# Specify covalent disulfide bonds between the designed residues on chain B
|
||||
- bond:
|
||||
atom1: [B, 2, SG]
|
||||
atom1: [B, 2, SG] # connect SG of residue 2 to SG of residue 14 on chain B
|
||||
atom2: [B, 14, SG]
|
||||
|
||||
|
||||
@@ -1,23 +1,31 @@
|
||||
entities:
|
||||
# Specify a designed protein chain
|
||||
# 1 design residue, Cystein, 11..16 design residues, Cystein, 1 design residue, Cystein
|
||||
- protein:
|
||||
id: B
|
||||
sequence: 1C11..16C1
|
||||
|
||||
# Specify the secondary structure of the designed peptide (here we require sheet at residues 1 and 3..11)
|
||||
secondary_structure:
|
||||
sheet: 1,3..11
|
||||
|
||||
# Specification of the target which is extracted from a .cif file
|
||||
- file:
|
||||
path: 7nre.cif
|
||||
# Which chain and residues in the .cif file to use as target (here includes residues 24..on chain A)
|
||||
include:
|
||||
- chain:
|
||||
id: A
|
||||
res_index: 24..
|
||||
|
||||
# Which regions of the target the design should or should NOT bind to
|
||||
# Here we specify that the design should bind to residues 26..31,381,408 on chain A
|
||||
binding_types:
|
||||
- chain:
|
||||
id: A
|
||||
binding: 26..31,381,408
|
||||
|
||||
# Include residues within a radius of a reference region
|
||||
# Here we include residues within a radius 28 of residues 26..31,381,408 on chain A
|
||||
include_proximity:
|
||||
- chain:
|
||||
id: A
|
||||
@@ -28,7 +36,8 @@ entities:
|
||||
|
||||
|
||||
constraints:
|
||||
# Specify covalent disulfide bonds between the designed residues on chain B
|
||||
- bond:
|
||||
atom1: [B, 2, SG]
|
||||
atom1: [B, 2, SG] # connect SG of residue 2 to SG of residue 14 on chain B
|
||||
atom2: [B, 14, SG]
|
||||
|
||||
|
||||
@@ -1,24 +1,29 @@
|
||||
entities:
|
||||
# Specify a designed protein chain
|
||||
# 1..3 design residues, Cystein, Cystein, 4 design residues, Cystein, 1..3 design residues, Cystein, 1..3 design residues
|
||||
- protein:
|
||||
id: B
|
||||
sequence: 1..3CC4C1..3C1..3
|
||||
|
||||
# Specification of the target which is extracted from a .cif file
|
||||
- file:
|
||||
path: 8wtw.cif
|
||||
|
||||
# Which chain and residues in the .cif file to use as target (here includes all residues on chain A)
|
||||
include:
|
||||
- chain:
|
||||
id: A
|
||||
|
||||
# Which regions of the target the design should or should NOT bind to
|
||||
# Here we specify that the design should bind to residue 24 on chain A
|
||||
binding_types:
|
||||
- chain:
|
||||
id: A
|
||||
binding: 24
|
||||
|
||||
constraints:
|
||||
# Specify covalent disulfide bonds between the designed residues on chain B
|
||||
- bond:
|
||||
atom1: [B, 2, SG]
|
||||
atom1: [B, 2, SG] # connect SG of residue 2 to SG of residue 10 on chain B
|
||||
atom2: [B, 10, SG]
|
||||
- bond:
|
||||
atom1: [B, 3, SG]
|
||||
atom1: [B, 3, SG] # connect SG of residue 3 to SG of residue 8 on chain B
|
||||
atom2: [B, 8, SG]
|
||||
|
||||
@@ -1,19 +1,24 @@
|
||||
entities:
|
||||
# Specify a designed protein chain
|
||||
# random number between 12 and 20 of designed residues (inclusive)
|
||||
- protein:
|
||||
id: G
|
||||
sequence: 12..20
|
||||
|
||||
# Specification of the target which is extracted from a .cif file
|
||||
- file:
|
||||
path: 5cqg.cif
|
||||
|
||||
# Which chain and residues in the .cif file to use as target (uses only chain A here)
|
||||
include:
|
||||
- chain:
|
||||
id: A
|
||||
|
||||
# Which regions of the target the design should or should NOT bind to
|
||||
# Here we specify that the design should bind to residues 343, 344, and 251 on chain A
|
||||
binding_types:
|
||||
- chain:
|
||||
id: A
|
||||
binding: 343,344,251
|
||||
|
||||
# Which regions of the target should have their structure specified
|
||||
# Here we specify that all included target residues should have their structure specified
|
||||
structure_groups: "all"
|
||||
|
||||
|
||||
@@ -1,10 +1,13 @@
|
||||
entities:
|
||||
# Specify a designed protein chain
|
||||
# random number between 80 and 140 of designed residues (inclusive)
|
||||
- protein:
|
||||
id: C
|
||||
sequence: 80..140
|
||||
# Specification of the target which is extracted from a .cif file
|
||||
- file:
|
||||
path: 1g13.cif
|
||||
|
||||
# Which chain and residues in the .cif file to use as target (uses only chain A here)
|
||||
include:
|
||||
- chain:
|
||||
id: A
|
||||
|
||||
@@ -1202,7 +1202,6 @@ def check_design_spec(
|
||||
parsed = parser.parse_yaml(design_spec, mols, moldir)
|
||||
structure = parsed.structure
|
||||
design_info = parsed.design_info
|
||||
|
||||
design_color_features = np.ones_like(design_info.res_binding_type) * 0.8
|
||||
design_color_features[design_info.res_binding_type.astype(bool)] = 1.0
|
||||
extract_mask = np.zeros(len(structure.residues), dtype=bool)
|
||||
|
||||
@@ -1072,7 +1072,9 @@ class YamlDesignParser:
|
||||
raise ValueError(f"Unsupported file type: {str(path)}")
|
||||
|
||||
name = path.stem
|
||||
target = self.parse_boltzgen_schema(name, data, mols, mol_dir, base_file_path=path.parent)
|
||||
target = self.parse_boltzgen_schema(
|
||||
name, data, mols, mol_dir, base_file_path=path.parent
|
||||
)
|
||||
return target
|
||||
|
||||
def log_once(self, msg: str):
|
||||
@@ -1570,7 +1572,7 @@ class YamlDesignParser:
|
||||
if isinstance(path, list) or Path(path).suffix == ".yaml":
|
||||
if isinstance(path, list):
|
||||
path = random.choice(path)
|
||||
|
||||
|
||||
resolved_path = (base_file_path / path).resolve()
|
||||
with resolved_path.open("r") as f:
|
||||
file = yaml.safe_load(f)
|
||||
@@ -1968,6 +1970,7 @@ class YamlDesignParser:
|
||||
|
||||
# Parse and apply design insertions
|
||||
if design_insertions is not None:
|
||||
num_inserted = 0
|
||||
for list_element in design_insertions:
|
||||
insertion = list_element["insertion"]
|
||||
if "id" not in insertion:
|
||||
@@ -1978,10 +1981,15 @@ class YamlDesignParser:
|
||||
raise ValueError(msg)
|
||||
chain_id = insertion["id"]
|
||||
res_index = insertion["res_index"] - 1 # 1 index input to 0 indexed
|
||||
res_index += num_inserted
|
||||
ss_insert_type = insertion.get("secondary_structure", "UNSPECIFIED")
|
||||
|
||||
# We add +1 because the parse_range function is usually used for indexing where we then convert the 1 based inputs to 0 indexing
|
||||
num_residues = insertion["num_residues"]
|
||||
num_residues = parse_range(num_residues)
|
||||
num_residues = np.random.choice(num_residues).item()
|
||||
num_residues += 1
|
||||
num_inserted += num_residues
|
||||
|
||||
if chain_id not in structure.chains["name"]:
|
||||
msg = f"Specified chain id {chain_id} not in file {path}."
|
||||
|
||||
@@ -279,7 +279,7 @@ class Filter(Task):
|
||||
{
|
||||
"feature": "ALA_fraction",
|
||||
"lower_is_better": True,
|
||||
"threshold": 0.2,
|
||||
"threshold": 0.3,
|
||||
},
|
||||
{
|
||||
"feature": "GLY_fraction",
|
||||
@@ -294,7 +294,7 @@ class Filter(Task):
|
||||
{
|
||||
"feature": "LEU_fraction",
|
||||
"lower_is_better": True,
|
||||
"threshold": 0.2,
|
||||
"threshold": 0.3,
|
||||
},
|
||||
{
|
||||
"feature": "VAL_fraction",
|
||||
|
||||
Reference in New Issue
Block a user