Files
foundry/configs/datasets/train/monomer_distillation.yaml
Nathaniel Corley 5a492032d5 refactor: new modelhub (#109)
* Initial commit of chiral changes

Initial checkin of chiral feature code

Add chiral metric

* Update the way chiral features are incorporated into the model

Move initialization to new func

use default pytorch reset parameters

fix initialization for chirals

config

rename argument of confidence head

fix initialization for chirals

* refactor: src nest, rename rf2aa to modelhub

* refactor: initial commit without projects

* Initial commit of chiral changes

* Initial checkin of chiral feature code

* Add chiral metric

* Remove option for double residual connection.  Add kq_norm oiptions to base (20250125) config.

* Restoring flag

* config

* rename argument of confidence head

* Update the way chiral features are incorporated into the model

* config

* rename argument of confidence head

* Update the way chiral features are incorporated into the model

* Initial commit of chiral changes

Initial checkin of chiral feature code

Add chiral metric

* Update the way chiral features are incorporated into the model

Move initialization to new func

use default pytorch reset parameters

fix initialization for chirals

config

rename argument of confidence head

fix initialization for chirals

* refactor: new modelhub

---------

Co-authored-by: fdimaio <dimaio@uw.edu>
Co-authored-by: HaotianZhangAI4Science <haotianzhang@zju.edu.cn>
2025-04-08 13:33:17 -07:00

39 lines
1.3 KiB
YAML

monomer_distillation:
dataset:
_target_: datahub.datasets.datasets.StructuralDatasetWrapper
save_failed_examples_to_dir: ${paths.data.failed_examples_dir}
# cif parser arguments
cif_parser_args:
cache_dir: null
load_from_cache: False
save_to_cache: False
# metadata parser
dataset_parser:
_target_: datahub.datasets.parsers.GenericDFParser
pn_unit_iid_colnames: null
# metadata dataset
dataset:
_target_: datahub.datasets.datasets.PandasDataset
name: af2fb_distillation
id_column: example_id
data: ${paths.data.monomer_distillation_parquet_dir}/af2_distillation_facebook.parquet
columns_to_load:
- example_id
- path
return_key: null
transform:
_target_: ${datasets.pipeline_target}
is_inference: False
protein_msa_dirs: [{"dir": "${paths.data.monomer_distillation_data_dir}/msa", "extension": ".a3m", "directory_depth": 2}]
rna_msa_dirs: []
n_recycles: ${datasets.n_recycles_train}
crop_size: ${datasets.crop_size}
n_msa: ${datasets.n_msa}
diffusion_batch_size: ${datasets.diffusion_batch_size_train}
max_atoms_in_crop: ${datasets.max_atoms_in_crop}
crop_contiguous_probability: 0.25
crop_spatial_probability: 0.75