mirror of
https://github.com/microsoft/foldingdiff.git
synced 2026-06-04 13:30:33 +08:00
Update amlt and config files for full run
This commit is contained in:
29
config_jsons/full_run.json
Normal file
29
config_jsons/full_run.json
Normal file
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"shift_angles_zero_twopi": false,
|
||||
"noise_prior": "gaussian",
|
||||
"timesteps": 250,
|
||||
"variance_schedule": "linear",
|
||||
"variance_scale": 1.0,
|
||||
"time_encoding": "gaussian_fourier",
|
||||
"implementation": "huggingface_encoder",
|
||||
"position_embedding_type": "absolute",
|
||||
"num_hidden_layers": 12,
|
||||
"hidden_size": 384,
|
||||
"intermediate_size": 768,
|
||||
"num_heads": 12,
|
||||
"dropout_p": 0.1,
|
||||
"decoder": "mlp",
|
||||
"gradient_clip": 1.0,
|
||||
"lr": 5e-5,
|
||||
"loss": "radian_l1_smooth",
|
||||
"l2_norm": 0.0,
|
||||
"l1_norm": 0.0,
|
||||
"circle_reg": 0.0,
|
||||
"lr_scheduler": "",
|
||||
"min_epochs": 10000,
|
||||
"max_epochs": 10000,
|
||||
"early_stop_patience": 0,
|
||||
"use_swa": false,
|
||||
"batch_size": 64,
|
||||
"multithread": true
|
||||
}
|
||||
@@ -1,5 +1,4 @@
|
||||
description: Initial run of CATH dataset
|
||||
# Build takes about 30 minutes
|
||||
description: Initial full run of CATH dataset
|
||||
|
||||
target:
|
||||
service: sing # Target service platform
|
||||
@@ -7,22 +6,33 @@ target:
|
||||
workspace_name: msrresrchws # AML workspace name to use
|
||||
|
||||
environment: # https://singularitydocs.azurewebsites.net/docs/container_images/
|
||||
# image: amlt-sing/pytorch-1.10.0-a100 # run amlt cache base-images
|
||||
image: amlt-sing/pytorch-1.11.0
|
||||
# https://hub.docker.com/layers/pytorch/pytorch/pytorch/1.12.0-cuda11.3-cudnn8-runtime/images/sha256-1ef1f61b13738de8086ae7e1ce57c89f154e075dae0b165f7590b9405efeb6fe?context=explore
|
||||
# image: pytorch/pytorch:1.12.0-cuda11.3-cudnn8-runtime # Local debugging
|
||||
conda_yaml_file: $CONFIG_DIR/../environment.yml
|
||||
skip_conda_packages_on_sing: ['python', 'torch', 'tensorflow', 'cudatoolkit', 'deepspeed', 'pip', 'jupyter', 'black', 'gpustat']
|
||||
# core packages
|
||||
# numpy - included
|
||||
# pandas - included
|
||||
# tqdm - included
|
||||
# matplotlib - included
|
||||
# seaborn - installed here
|
||||
# mpl-scatter-density - installed here
|
||||
# astropy - installed here
|
||||
# pytorch - included
|
||||
# pytorch lightning - installed here
|
||||
# transformers - installed here
|
||||
setup:
|
||||
- pip install sequence-models
|
||||
- pip install seaborn # https://seaborn.pydata.org/installing.html
|
||||
- pip install mpl-scatter-density # https://github.com/astrofrog/mpl-scatter-density
|
||||
- pip install astropy # https://docs.astropy.org/en/stable/install.html
|
||||
- pip install transformers==4.11.3 # https://huggingface.co/docs/transformers/installation
|
||||
- pip install pytorch-lightning==1.6.4 # https://www.pytorchlightning.ai/
|
||||
- pip install sequence-models # https://github.com/microsoft/protein-sequence-models
|
||||
|
||||
code:
|
||||
local_dir: $CONFIG_DIR/.. # elative to config file directory
|
||||
|
||||
jobs:
|
||||
- name: training_cath # Unique name for each job
|
||||
sku: 24G2-P40 # 16G4-P100 = 16GB memory (these may be more free), 4 GPU P100 (16GB VRAM); G1 = any 1 GPU, 8C1 = 8 GB ram, 1 core; 80G1-A100 = A100 GPU, run amlt target list singularity -v
|
||||
- name: # Unique name for each job
|
||||
sku: 16G4-V100 # 16G4-P100 = 16GB memory (these may be more free), 4 GPU P100 (16GB VRAM); G1 = any 1 GPU, 8C1 = 8 GB ram, 1 core; 80G1-A100 = A100 GPU, run amlt target list singularity -v
|
||||
priority: high
|
||||
sla_tier: premium
|
||||
command:
|
||||
- python bin/train.py -o $$AMLT_OUTPUT_DIR/results
|
||||
- python bin/train.py config_jsons/full_run.json -o $$AMLT_OUTPUT_DIR/results
|
||||
Reference in New Issue
Block a user