mirror of
https://github.com/KosinskiLab/AlphaPulldown.git
synced 2026-06-04 14:14:24 +08:00
* symmetrical refactoring to support both af2 and af3 data pipelines * Clean tests * Keep GPU tests in place * Reverted accidentally deleted templates * Add AlphaFold3 feature creation pipeline and per-chain input generation - Implement `create_pipeline_af3` to construct the AlphaFold3 data pipeline with correct database and binary paths. - Add `create_af3_individual_features` to generate AlphaFold3 input features for each chain in a FASTA, handling protein, RNA, and DNA sequences. - Integrate new AF3 logic into the main entry point, dispatching to AF2 or AF3 as appropriate. - Ensure output directory creation and error handling for missing dependencies or invalid sequences. * Convert template dates to datetime for af3 * First check for nucleotides, then for amino-acids * Skip existing features json if --skip_existing=true * Check if DNA before RNA * Bump 2.1.0 * Git ignore build/ dir
83 lines
3.1 KiB
Bash
Executable File
83 lines
3.1 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# AlphaFold3 Feature Creation Commands
|
|
# This script creates AlphaFold3 JSON input files from FASTA sequences
|
|
|
|
# Set database paths
|
|
AF2_DB_DIR="/g/alphafold/AlphaFold_DBs/2.3.0"
|
|
AF3_DB_DIR="/g/alphafold/AlphaFold_DBs/3.0.0"
|
|
|
|
# Create output directories
|
|
mkdir -p test/test_data/features/af2_features/{protein,rna,dna,mixed}
|
|
mkdir -p test/test_data/features/af3_features/{protein,rna,dna,mixed}
|
|
|
|
echo "=== Creating AlphaFold2 Features ==="
|
|
|
|
echo "Creating AlphaFold2 features for protein sequences..."
|
|
python alphapulldown/scripts/create_individual_features.py \
|
|
--fasta_paths test/test_data/fastas/A0A024R1R8.fasta,test/test_data/fastas/P61626.fasta \
|
|
--data_dir $AF2_DB_DIR \
|
|
--data_pipeline alphafold2 \
|
|
--output_dir test/test_data/features/af2_features/protein \
|
|
--max_template_date 2021-09-30
|
|
|
|
echo "=== Creating AlphaFold3 Features ==="
|
|
|
|
echo "Creating AlphaFold3 features for protein sequences..."
|
|
python alphapulldown/scripts/create_individual_features.py \
|
|
--fasta_paths test/test_data/fastas/A0A024R1R8.fasta,test/test_data/fastas/P61626.fasta \
|
|
--data_dir $AF3_DB_DIR \
|
|
--data_pipeline alphafold3 \
|
|
--output_dir test/test_data/features/af3_features/protein \
|
|
--max_template_date 2021-09-30 \
|
|
--use_mmseqs2
|
|
|
|
echo "Creating AlphaFold3 features for RNA sequences..."
|
|
python alphapulldown/scripts/create_individual_features.py \
|
|
--fasta_paths test/test_data/fastas/rna.fasta \
|
|
--data_dir $AF3_DB_DIR \
|
|
--data_pipeline alphafold3 \
|
|
--output_dir test/test_data/features/af3_features/rna \
|
|
--max_template_date 2021-09-30 \
|
|
--use_mmseqs2
|
|
|
|
echo "Creating AlphaFold3 features for DNA sequences..."
|
|
python alphapulldown/scripts/create_individual_features.py \
|
|
--fasta_paths test/test_data/fastas/dna_af3.fasta \
|
|
--data_dir $AF3_DB_DIR \
|
|
--data_pipeline alphafold3 \
|
|
--output_dir test/test_data/features/af3_features/dna \
|
|
--max_template_date 2021-09-30 \
|
|
--use_mmseqs2
|
|
|
|
echo "Creating AlphaFold3 features for protein and RNA sequences..."
|
|
python alphapulldown/scripts/create_individual_features.py \
|
|
--fasta_paths test/test_data/fastas/protein_rna_af3.fasta \
|
|
--data_dir $AF3_DB_DIR \
|
|
--data_pipeline alphafold3 \
|
|
--output_dir test/test_data/features/af3_features/protein_rna \
|
|
--max_template_date 2021-09-30 \
|
|
--use_mmseqs2
|
|
|
|
echo "=== Converting AlphaFold2 Features to AlphaFold3 JSON ==="
|
|
|
|
# Convert AlphaFold2 features to AlphaFold3 JSON format
|
|
echo "Converting AlphaFold2 protein features to AlphaFold3 JSON..."
|
|
python convert_to_alphafold3_json.py \
|
|
--pickle_dir test/test_data/features/af2_features/protein \
|
|
--output_dir test/test_data/features/af2_features/protein
|
|
|
|
|
|
echo "=== Feature Creation Complete ==="
|
|
echo ""
|
|
echo "Generated AlphaFold2 pickle files:"
|
|
find test/test_data/features/af2_features -name "*.pkl" | sort
|
|
echo ""
|
|
echo "Generated AlphaFold3 pickle files:"
|
|
find test/test_data/features/af3_features -name "*.pkl" | sort
|
|
echo ""
|
|
echo "Generated AlphaFold2 JSON files:"
|
|
find test/test_data/features/af2_features -name "*_af3_input.json" | sort
|
|
echo ""
|
|
echo "Generated AlphaFold3 JSON files:"
|
|
find test/test_data/features/af3_features -name "*_af3_input.json" | sort |