diff --git a/config_jsons/full_run.json b/config_jsons/full_run.json new file mode 100644 index 0000000..fdaa2e4 --- /dev/null +++ b/config_jsons/full_run.json @@ -0,0 +1,29 @@ +{ + "shift_angles_zero_twopi": false, + "noise_prior": "gaussian", + "timesteps": 250, + "variance_schedule": "linear", + "variance_scale": 1.0, + "time_encoding": "gaussian_fourier", + "implementation": "huggingface_encoder", + "position_embedding_type": "absolute", + "num_hidden_layers": 12, + "hidden_size": 384, + "intermediate_size": 768, + "num_heads": 12, + "dropout_p": 0.1, + "decoder": "mlp", + "gradient_clip": 1.0, + "lr": 5e-5, + "loss": "radian_l1_smooth", + "l2_norm": 0.0, + "l1_norm": 0.0, + "circle_reg": 0.0, + "lr_scheduler": "", + "min_epochs": 10000, + "max_epochs": 10000, + "early_stop_patience": 0, + "use_swa": false, + "batch_size": 64, + "multithread": true +} \ No newline at end of file diff --git a/scripts/amlt.yaml b/scripts/amlt.yaml index cb36f5c..a422cb1 100644 --- a/scripts/amlt.yaml +++ b/scripts/amlt.yaml @@ -1,5 +1,4 @@ -description: Initial run of CATH dataset -# Build takes about 30 minutes +description: Initial full run of CATH dataset target: service: sing # Target service platform @@ -7,22 +6,33 @@ target: workspace_name: msrresrchws # AML workspace name to use environment: # https://singularitydocs.azurewebsites.net/docs/container_images/ - # image: amlt-sing/pytorch-1.10.0-a100 # run amlt cache base-images image: amlt-sing/pytorch-1.11.0 - # https://hub.docker.com/layers/pytorch/pytorch/pytorch/1.12.0-cuda11.3-cudnn8-runtime/images/sha256-1ef1f61b13738de8086ae7e1ce57c89f154e075dae0b165f7590b9405efeb6fe?context=explore - # image: pytorch/pytorch:1.12.0-cuda11.3-cudnn8-runtime # Local debugging - conda_yaml_file: $CONFIG_DIR/../environment.yml - skip_conda_packages_on_sing: ['python', 'torch', 'tensorflow', 'cudatoolkit', 'deepspeed', 'pip', 'jupyter', 'black', 'gpustat'] + # core packages + # numpy - included + # pandas - included + # tqdm - included + # matplotlib - included + # seaborn - installed here + # mpl-scatter-density - installed here + # astropy - installed here + # pytorch - included + # pytorch lightning - installed here + # transformers - installed here setup: - - pip install sequence-models + - pip install seaborn # https://seaborn.pydata.org/installing.html + - pip install mpl-scatter-density # https://github.com/astrofrog/mpl-scatter-density + - pip install astropy # https://docs.astropy.org/en/stable/install.html + - pip install transformers==4.11.3 # https://huggingface.co/docs/transformers/installation + - pip install pytorch-lightning==1.6.4 # https://www.pytorchlightning.ai/ + - pip install sequence-models # https://github.com/microsoft/protein-sequence-models code: local_dir: $CONFIG_DIR/.. # elative to config file directory jobs: -- name: training_cath # Unique name for each job - sku: 24G2-P40 # 16G4-P100 = 16GB memory (these may be more free), 4 GPU P100 (16GB VRAM); G1 = any 1 GPU, 8C1 = 8 GB ram, 1 core; 80G1-A100 = A100 GPU, run amlt target list singularity -v +- name: # Unique name for each job + sku: 16G4-V100 # 16G4-P100 = 16GB memory (these may be more free), 4 GPU P100 (16GB VRAM); G1 = any 1 GPU, 8C1 = 8 GB ram, 1 core; 80G1-A100 = A100 GPU, run amlt target list singularity -v priority: high sla_tier: premium command: - - python bin/train.py -o $$AMLT_OUTPUT_DIR/results \ No newline at end of file + - python bin/train.py config_jsons/full_run.json -o $$AMLT_OUTPUT_DIR/results \ No newline at end of file