Files
foundry/configs/datasets/train/pdb/af3_train_pn_unit.yaml
Nathaniel Corley 5a492032d5 refactor: new modelhub (#109)
* Initial commit of chiral changes

Initial checkin of chiral feature code

Add chiral metric

* Update the way chiral features are incorporated into the model

Move initialization to new func

use default pytorch reset parameters

fix initialization for chirals

config

rename argument of confidence head

fix initialization for chirals

* refactor: src nest, rename rf2aa to modelhub

* refactor: initial commit without projects

* Initial commit of chiral changes

* Initial checkin of chiral feature code

* Add chiral metric

* Remove option for double residual connection.  Add kq_norm oiptions to base (20250125) config.

* Restoring flag

* config

* rename argument of confidence head

* Update the way chiral features are incorporated into the model

* config

* rename argument of confidence head

* Update the way chiral features are incorporated into the model

* Initial commit of chiral changes

Initial checkin of chiral feature code

Add chiral metric

* Update the way chiral features are incorporated into the model

Move initialization to new func

use default pytorch reset parameters

fix initialization for chirals

config

rename argument of confidence head

fix initialization for chirals

* refactor: new modelhub

---------

Co-authored-by: fdimaio <dimaio@uw.edu>
Co-authored-by: HaotianZhangAI4Science <haotianzhang@zju.edu.cn>
2025-04-08 13:33:17 -07:00

42 lines
1.2 KiB
YAML

defaults:
- base
dataset:
dataset_parser:
_target_: datahub.datasets.parsers.PNUnitsDFParser
dataset:
name: pn_unit
data: ${paths.data.pdb_data_dir}/pn_units_df_train.parquet
filters:
# filters common across all PDB datasets
- "deposition_date < '2021-09-30'"
- "resolution < 9.0"
- "num_polymer_pn_units <= 300"
- "cluster.notnull()"
# pn_unit specific filters
- "~(q_pn_unit_non_polymer_res_names.notnull() and q_pn_unit_non_polymer_res_names.str.contains('${resolve_import:cifutils.constants,AF3_EXCLUDED_LIGANDS_REGEX}', regex=True))"
columns_to_load:
# columns common across all PDB datasets
- example_id
- pdb_id
- assembly_id
- deposition_date
- resolution
- num_polymer_pn_units
- method
- cluster
- n_prot
- n_nuc
- n_ligand
- n_peptide
- total_num_atoms_in_unprocessed_assembly
# pn_unit specific columns
- q_pn_unit_iid
- q_pn_unit_non_polymer_res_names
- all_pn_unit_iids_after_processing
- q_pn_unit_is_loi
transform:
# pn_unit-specific Transform pipeline parameters
crop_contiguous_probability: 0.3333333333333333
crop_spatial_probability: 0.6666666666666667