Update amlt and config files for full run

This commit is contained in:
Kevin Wu
2022-08-19 20:33:07 +00:00
parent b57da0647b
commit aad06dd86a
2 changed files with 50 additions and 11 deletions

View File

@@ -0,0 +1,29 @@
{
"shift_angles_zero_twopi": false,
"noise_prior": "gaussian",
"timesteps": 250,
"variance_schedule": "linear",
"variance_scale": 1.0,
"time_encoding": "gaussian_fourier",
"implementation": "huggingface_encoder",
"position_embedding_type": "absolute",
"num_hidden_layers": 12,
"hidden_size": 384,
"intermediate_size": 768,
"num_heads": 12,
"dropout_p": 0.1,
"decoder": "mlp",
"gradient_clip": 1.0,
"lr": 5e-5,
"loss": "radian_l1_smooth",
"l2_norm": 0.0,
"l1_norm": 0.0,
"circle_reg": 0.0,
"lr_scheduler": "",
"min_epochs": 10000,
"max_epochs": 10000,
"early_stop_patience": 0,
"use_swa": false,
"batch_size": 64,
"multithread": true
}

View File

@@ -1,5 +1,4 @@
description: Initial run of CATH dataset
# Build takes about 30 minutes
description: Initial full run of CATH dataset
target:
service: sing # Target service platform
@@ -7,22 +6,33 @@ target:
workspace_name: msrresrchws # AML workspace name to use
environment: # https://singularitydocs.azurewebsites.net/docs/container_images/
# image: amlt-sing/pytorch-1.10.0-a100 # run amlt cache base-images
image: amlt-sing/pytorch-1.11.0
# https://hub.docker.com/layers/pytorch/pytorch/pytorch/1.12.0-cuda11.3-cudnn8-runtime/images/sha256-1ef1f61b13738de8086ae7e1ce57c89f154e075dae0b165f7590b9405efeb6fe?context=explore
# image: pytorch/pytorch:1.12.0-cuda11.3-cudnn8-runtime # Local debugging
conda_yaml_file: $CONFIG_DIR/../environment.yml
skip_conda_packages_on_sing: ['python', 'torch', 'tensorflow', 'cudatoolkit', 'deepspeed', 'pip', 'jupyter', 'black', 'gpustat']
# core packages
# numpy - included
# pandas - included
# tqdm - included
# matplotlib - included
# seaborn - installed here
# mpl-scatter-density - installed here
# astropy - installed here
# pytorch - included
# pytorch lightning - installed here
# transformers - installed here
setup:
- pip install sequence-models
- pip install seaborn # https://seaborn.pydata.org/installing.html
- pip install mpl-scatter-density # https://github.com/astrofrog/mpl-scatter-density
- pip install astropy # https://docs.astropy.org/en/stable/install.html
- pip install transformers==4.11.3 # https://huggingface.co/docs/transformers/installation
- pip install pytorch-lightning==1.6.4 # https://www.pytorchlightning.ai/
- pip install sequence-models # https://github.com/microsoft/protein-sequence-models
code:
local_dir: $CONFIG_DIR/.. # elative to config file directory
jobs:
- name: training_cath # Unique name for each job
sku: 24G2-P40 # 16G4-P100 = 16GB memory (these may be more free), 4 GPU P100 (16GB VRAM); G1 = any 1 GPU, 8C1 = 8 GB ram, 1 core; 80G1-A100 = A100 GPU, run amlt target list singularity -v
- name: # Unique name for each job
sku: 16G4-V100 # 16G4-P100 = 16GB memory (these may be more free), 4 GPU P100 (16GB VRAM); G1 = any 1 GPU, 8C1 = 8 GB ram, 1 core; 80G1-A100 = A100 GPU, run amlt target list singularity -v
priority: high
sla_tier: premium
command:
- python bin/train.py -o $$AMLT_OUTPUT_DIR/results
- python bin/train.py config_jsons/full_run.json -o $$AMLT_OUTPUT_DIR/results