This commit is contained in:
Simon Mathis
2025-08-15 00:29:29 +01:00
parent 09dec44401
commit 43ae6ea526
7 changed files with 8 additions and 317 deletions

View File

@@ -1,88 +0,0 @@
Bootstrap: docker
From: nvcr.io/nvidia/pytorch:25.04-py3
IncludeCmd: yes
# NOTE: This apptainer was written using apptainer version `1.1.6+2-g6808b5172-ipd`
# To build this apptainer, use:
# make base_apptainer
%setup
# Create a directory in the container to bind the host's current working directory
mkdir ${APPTAINER_ROOTFS}/modelhub_host
# ... for mounting `/projects` with --bind
mkdir ${APPTAINER_ROOTFS}/projects
# ... for mounting `/databases` with --bind
mkdir ${APPTAINER_ROOTFS}/net
# ... for mounting `/squash` with --bind
mkdir ${APPTAINER_ROOTFS}/squash
%files
/etc/localtime
/etc/hosts
requirements.txt /opt/requirements.txt
%post
## GENERAL SETUP
# Common symlinks (within container)
ln -s /net/databases /databases
ln -s /net/software /software
ln -s /home /mnt/home
ln -s /projects /mnt/projects
ln -s /net /mnt/net
## PACKAGE INSTALLATION
apt-get update
apt-get install -y make git libaio-dev
# Install OpenBabel (pip installation fails due to C++ build dependencies)
apt-get install -y openbabel libopenbabel-dev python3-openbabel
apt-get clean
## PYTHON DEPENDENCY INSTALLATION
# Fix NGC constraints that conflict with our required packages
# ... remove packaging constraint to allow biotite 1.3.0 installation
sed -i '/packaging==/d' /etc/pip/constraint.txt
# ... remove pytest constraint
sed -i '/pytest==/d' /etc/pip/constraint.txt
# Install all other Python dependencies using requirements.txt
# (Installs into the default NGC Python environment)
pip install -r /opt/requirements.txt
# Clean up
apt-get clean && rm -rf /var/lib/apt/lists/*
%environment
# (Flag to increase accessible GPU memory)
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
# (Turn off NVLink)
export NCCL_P2P_DISABLE=1
%runscript
# NOTE: The %runscript is invoked when the container is run without specifying a different command.
exec python "$@"
%help
modelhub environment for running modelhub independently and for development
To see this help message, use:
apptainer run-help modelhub_apptainer.sif
To build this apptainer, use:
apptainer build --bind $PWD:/modelhub_host path/to/apptainer.sif apptainer.spec
To run the container, use:
apptainer exec /path/to/apptainer.sif <command>
OR
./path/to/apptainer.sif <command>
To get an interactive shell in the container, use:
apptainer shell /path/to/apptainer.sif
%labels
Version v1.0.0
ApptainerVersion 1.1.6+2-g6808b5172-ipd

View File

@@ -1,15 +0,0 @@
# @package _global_
# NOTE: Dummy experiment that you can use to just run the code
# . For actual experiments, please create a new experiment config from copying the template 'user-XX-template.yaml'
name: none-00-dummy
tags:
# list of tags to add to the run ( & on wandb to easily find & filter runs)
- experiment
- dummy
project: test

View File

@@ -3,18 +3,18 @@
########################
# path to directory with training splits
pdb_data_dir: /projects/ml/datahub/dfs/af3_splits/2025_07_13
pdb_data_dir: ???
# fb monomer distillation dataset
monomer_distillation_data_dir: /squash/af2_distillation_facebook
monomer_distillation_parquet_dir: /projects/ml/datahub/dfs/distillation/af2_distillation_facebook
monomer_distillation_data_dir: ???
monomer_distillation_parquet_dir: ???
# na complex distill set
na_complex_distillation_data_dir: /projects/ml/prot_dna/rf3_newDL
na_complex_distillation_parquet_dir: /projects/ml/prot_dna
na_complex_distillation_data_dir: ???
na_complex_distillation_parquet_dir: ???
# disorder distill set
disorder_distill_parquet_dir: /projects/ml/disorder_distill
disorder_distill_parquet_dir: ???
########################
# MSAs
@@ -22,13 +22,11 @@ disorder_distill_parquet_dir: /projects/ml/disorder_distill
# path(s) to search for protein MSAs (for PDB datasets)
protein_msa_dirs:
- {"dir": "/projects/msa/rf2aa_af3/rf2aa_paper_model_protein_msas", "extension": ".a3m.gz", "directory_depth": 2}
- {"dir": "/projects/msa/rf2aa_af3/missing_msas_through_2024_08_12", "extension": ".msa0.a3m.gz", "directory_depth": 2}
- {"dir": "/projects/msa/mmseqs_gpu", "extension": ".a3m.gz", "directory_depth": 2}
- {"dir": ???, "extension": ".a3m.gz", "directory_depth": 2}
# path(s) to search for RNA MSAs
rna_msa_dirs:
- {"dir": "/projects/msa/rf2aa_af3/rf2aa_paper_model_rna_msas", "extension": ".afa", "directory_depth": 0}
- {"dir": ???, "extension": ".afa", "directory_depth": 0}
########################
# Misc

View File

@@ -1,45 +0,0 @@
Bootstrap: localimage
From: ./scripts/shebang/modelhub.sif
IncludeCmd: yes
# NOTE: This apptainer was written using apptainer version `1.1.6+2-g6808b5172-ipd`
%setup
# Create all required directories at once
echo "Creating directory structure in container..."
mkdir -p ${APPTAINER_ROOTFS}/opt/modelhub/{src,configs,lib/cifutils/src,lib/datahub/src}
echo "Copying project files into the container..."
# Copy .project-root file (if exists)
cp -f ./.project-root ${APPTAINER_ROOTFS}/opt/modelhub/ 2>/dev/null || echo "Note: .project-root not found, skipping"
# Copy .env file (if exists)
cp -f ./.env ${APPTAINER_ROOTFS}/opt/modelhub/ 2>/dev/null || echo "Note: .env not found, skipping"
# Copy directories with rsync
rsync -av --info=progress2 ./src/ ${APPTAINER_ROOTFS}/opt/modelhub/src/
rsync -av --info=progress2 ./configs/ ${APPTAINER_ROOTFS}/opt/modelhub/configs/
rsync -av --info=progress2 ./lib/cifutils/src/ ${APPTAINER_ROOTFS}/opt/modelhub/lib/cifutils/src/
rsync -av --info=progress2 ./lib/datahub/src/ ${APPTAINER_ROOTFS}/opt/modelhub/lib/datahub/src/
echo "All files copied successfully."
%environment
# Add project directories to PYTHONPATH (modelhub, datahub, cifutils)
export PYTHONPATH=/opt/modelhub/src:/opt/modelhub/lib/datahub/src:/opt/modelhub/lib/cifutils/src:${PYTHONPATH}
%runscript
# Run the inference.py script by default with any passed arguments
exec python /opt/modelhub/src/modelhub/inference.py "$@"
%labels
Author "Nate Corley <ncorley.uw@edu.com>"
Version 1.0.0
Description "ModelHub inference container"
%help
This apptainer exposes inference with the Institute for Protein Design's structure prediction model.
Usage:
$ ./container.sif [args] # Run inference.py with optional arguments
$ apptainer exec container.sif bash # Get a shell in the container

View File

@@ -1,7 +0,0 @@
This directory contains scripts that are not to be run directly by the user.
They are [SHEBANG scripts](https://en.wikipedia.org/wiki/Shebang_(Unix)) that are used to run the appropriate apptainer container.
For example, the script `modelhub_exec.sh` is used to run the modelhub apptainer container with the latest apptainer image
stored locally or at the IPD.
The shebang lines (`#!/bin/bash` ...) at the top of entry point scripts like `train.py` redirect the system to here to find the correct apptainer container.

View File

@@ -1 +0,0 @@
/projects/ml/modelhub/apptainer/modelhub_2025-07-31.sif

View File

@@ -1,151 +0,0 @@
#!/usr/bin/bash
###################
# You can add the path to this file as the shebang line in your python script.
# Then by default, the python script will be executed with the python interpreter
# in the SIF_PATH container. Here, we launch the container with nvidia gpu and slurm support.
#
# Example shebang: #!/usr/bin/env -S /bin/sh -c '"$(dirname "$0")/scripts/shebang/modelhub_exec.sh" "$0" "$@"'
###################
# Let the user know this script is setting things up behind the scene
SCRIPT_PATH=$(realpath $0)
SCRIPT_DIR=$(dirname $SCRIPT_PATH)
echo '################## Start shebang info ##################'
echo "The file $SCRIPT_PATH is being run as a shebang executable.
It will...
1. Add the 'modelhub', 'src/modelhub', 'lib/cifutils/src', and 'lib/datahub/src' repo directories to your PYTHONPATH.
2. Run your python script from the right container, which contains all dependencies.
3. Launch the container with slurm and nvidia gpu support."
# Extract the path to the Python script from the arguments
PYTHON_SCRIPT=$(realpath "$1")
shift
# Find repository root by looking for .project-root file
find_repo_root() {
local current_dir="$1"
while [ "$current_dir" != "/" ]; do
if [ -f "$current_dir/.project-root" ]; then
echo "$current_dir"
return 0
fi
current_dir="$(dirname "$current_dir")"
done
return 1
}
echo
echo "Searching for repository root directory..."
REPO_ROOT=$(find_repo_root "$(dirname "$PYTHON_SCRIPT")")
if [ -z "$REPO_ROOT" ]; then
echo "Error: Could not find .project-root file in any parent directory"
exit 1
else
echo "... found repository root at '$REPO_ROOT'"
fi
# Function to add a directory to PYTHONPATH if it's not already included
add_to_pythonpath() {
local dir_path="$1"
if [[ ":$PYTHONPATH:" != *":$dir_path:"* ]]; then
export PYTHONPATH="$dir_path:$PYTHONPATH"
echo "Added '$dir_path' to PYTHONPATH."
else
echo "'$dir_path' is already in PYTHONPATH."
fi
}
# Add the root directory and src directory to PYTHONPATH if not already present
echo
echo "Checking and adding root and 'src' directory to PYTHONPATH..."
add_to_pythonpath "$REPO_ROOT"
SRC_PATH="$REPO_ROOT/src"
add_to_pythonpath "$SRC_PATH"
# Add atomworks from `/lib` to the PYTHONPATH
echo
echo "Checking and adding 'atomworks' directory to PYTHONPATH..."
add_to_pythonpath "$REPO_ROOT/lib/atomworks-dev/src"
# Load the .env file environment variables from the repo root
echo
echo "Attempting to load environment variables from .env file:"
if [ -f "$REPO_ROOT/.env" ]; then
echo "... loading environment variables from '$REPO_ROOT/.env'"
export $(cat "$REPO_ROOT/.env" | grep -v '#' | xargs)
else
echo " Warning: No .env file found at repository root ($REPO_ROOT)"
fi
# check if we are at the IPD
IPD_FILE="/software/containers/versions/rf_diffusion_aa/ipd.txt"
SIF_PATH=""
echo
echo "Fetching the appropriate apptainer image..."
if [ -z "$APPTAINER_NAME" ]; then
if [ -n "$PROJECT_PATH" ]; then
# Attempt to find any .sif file in the PROJECT_PATH/scripts/shebang directory
SIF_DIR="$PROJECT_PATH/scripts/shebang"
SIF_FILE=$(find "$SIF_DIR" -maxdepth 1 -name "*.sif" -print -quit)
if [ -n "$SIF_FILE" ]; then
SIF_PATH="$SIF_FILE"
fi
fi
# If SIF_PATH is still empty, use the default SIF
if [ -z "$SIF_PATH" ]; then
SIF_NAME="modelhub.sif"
SIF_PATH="$SCRIPT_DIR/$SIF_NAME"
fi
echo "... looking for a local apptainer image at '$SIF_PATH'"
# Check if the SIF file exists
if [ ! -f "$SIF_PATH" ]; then
echo "... apptainer not found. To run with your own apptainer image, you can build it with 'make apptainer' and place it here: '$SIF_PATH'"
echo "Attempting to run $PYTHON_SCRIPT with $(which python)"
fi
else
echo "Already running inside container $APPTAINER_NAME. Executing $PYTHON_SCRIPT with $(which python) in the existing container."
fi
# Function to print debug=mode warning
print_debug_warning() {
echo
echo "###############################################################################"
echo "# #"
echo "# ⚠️ WARNING ⚠️ #"
echo "# RUNNING WITH DEBUGPY ON PORT $DEBUG_PORT #"
echo "# DON'T FORGET TO ATTACH A DEBUGGER #"
echo "# #"
echo "###############################################################################"
echo
}
if [ -n "$DEBUG_PORT" ]; then
print_debug_warning
python_cmd="python -m debugpy --listen $DEBUG_PORT --wait-for-client"
else
python_cmd="python"
echo
fi
if [ ! -z $SIF_PATH ]; then
echo "Running $PYTHON_SCRIPT with apptainer: $SIF_PATH."
echo '################## End shebang info ####################'
echo
/usr/bin/apptainer exec --nv --slurm \
--bind "$REPO_ROOT:$REPO_ROOT" \
--env PYTHONPATH="\$PYTHONPATH:$PYTHONPATH" \
$SIF_PATH $python_cmd "$PYTHON_SCRIPT" "$@"
else
echo "Running $PYTHON_SCRIPT with python: $(which python)"
echo '################## End shebang info ####################'
echo
$python_cmd "$PYTHON_SCRIPT" "$@"
fi