mirror of
https://github.com/tsa87/cgflow.git
synced 2026-06-04 12:14:22 +08:00
Finalize code release.
This commit is contained in:
2
.gitattributes
vendored
2
.gitattributes
vendored
@@ -1 +1 @@
|
||||
*.pkl.gz filter=lfs diff=lfs merge=lfs -text
|
||||
src/gflownet/utils/fpscores.pkl.gz filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
200
.gitignore
vendored
200
.gitignore
vendored
@@ -1,69 +1,167 @@
|
||||
# gflownet
|
||||
bengio2021flow_proxy.pkl.gz
|
||||
|
||||
# rxnflow
|
||||
logs/
|
||||
experiments/data/building_blocks/*
|
||||
experiments/data/envs/*
|
||||
experiments/data/stock/*
|
||||
experiments/data/experiments/*
|
||||
experiments/data/complex/*
|
||||
experiments/analysis/
|
||||
experiments/LIT-PCBA/*
|
||||
experiments/CrossDocked2020/*
|
||||
unidock_2025*
|
||||
experiments/data/CrossDocked2020/
|
||||
experiments/data/LIT-PCBA/
|
||||
|
||||
# semlaflow
|
||||
.gvp_cache/
|
||||
docking_pipeline/
|
||||
**/weights/*.ckpt
|
||||
tony/v0/notebooks/
|
||||
tony/v1/temp/
|
||||
result/
|
||||
evaluation_results/
|
||||
|
||||
# pharmaconet
|
||||
PharmacoNet/
|
||||
data/building_blocks/*
|
||||
data/envs/*
|
||||
data/experiments/*
|
||||
lightning_logs/
|
||||
weights/
|
||||
*.pdbqt
|
||||
|
||||
# package
|
||||
build/
|
||||
|
||||
# Log files
|
||||
wandb/
|
||||
molproc_logs/
|
||||
lightning_logs/
|
||||
notebooks/lightning_logs/
|
||||
nohup.out
|
||||
*job*.out
|
||||
|
||||
# Slurm submission scripts and logs
|
||||
subslurm/
|
||||
slurmlog/
|
||||
slurm-*
|
||||
job.sh
|
||||
|
||||
*.profile
|
||||
*.egg-info
|
||||
uv.lock
|
||||
|
||||
# Editors
|
||||
.vscode/
|
||||
typings/
|
||||
|
||||
# Jupyter notebook checkpoints
|
||||
notebooks/.ipynb_checkpoints/
|
||||
|
||||
# Python cache files
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*/__pycache__/
|
||||
**/__pycache__/
|
||||
*.pyc
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
*.zip
|
||||
*.tar.gz
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# experimental code
|
||||
src/app
|
||||
weights
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
pip-wheel-metadata/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# Experimental files files
|
||||
=*
|
||||
*.out
|
||||
*.profile
|
||||
evaluation_results/*
|
||||
experiments/data/*
|
||||
experiments/evals/posecheck/*
|
||||
experiments/evals/sampling_efficency/*
|
||||
experiments/logs/*
|
||||
experiments/wandb/*
|
||||
logs/*
|
||||
wandb/*
|
||||
src/app/*
|
||||
data/*.sdf
|
||||
data/*.zip
|
||||
docking_pipeline/*
|
||||
|
||||
19
.pre-commit-config.yaml
Normal file
19
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
repos:
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.12.0
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [--fix, --exit-zero] # return always true, seonghwan will fix all errors.
|
||||
files: ^(ubd|scripts)/
|
||||
- id: ruff-format
|
||||
files: ^(ubd|scripts)/
|
||||
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.6.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: check-yaml
|
||||
- id: check-added-large-files
|
||||
args: ["--maxkb=10000"]
|
||||
- id: check-merge-conflict
|
||||
2
LICENSE
2
LICENSE
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
SOFTWARE.
|
||||
|
||||
165
README.md
165
README.md
@@ -1,12 +1,15 @@
|
||||
[](https://arxiv.org/abs/2504.08051)
|
||||
[](LICENSE)
|
||||
|
||||
# CGFlow: Compositional Flows for 3D Molecule and Synthesis Pathway Co-design
|
||||
|
||||
This is the official repository of our ICML 2025 paper: **"Compositional Flows for 3D Molecule and Synthesis Pathway Co-design"**.
|
||||
This is the official repository of our ICML 2025 paper: **"Compositional Flows for 3D Molecule and Synthesis Pathway Co-design"**.
|
||||
|
||||
**Overview:** CGFlow introduces Compositional Generative Flows, a framework extending flow matching to generate compositional objects with continuous states. We apply CGFlow to synthesizable drug design by jointly designing a molecule's synthetic pathway and its 3D binding pose. For reproducing results reported in the paper, please refer to the [submission version](https://github.com/tsa87/cgflow/releases/tag/v0-icml25-submission).
|
||||
|
||||
**Demo:**
|
||||
We have a web app demo available: [3DSynthFlow Demo](https://3dsynthflowapp-s2d6tvz22exfsugf575jsm.streamlit.app/). This demo illustrates the types of molecules and synthesis trajectories generated by 3DSynthFlow.
|
||||
The underlying model is trained in a pocket-conditional setting and is intended for demo and research purposes only.
|
||||
The underlying model is trained in a pocket-conditional setting and is intended for demo and research purposes only.
|
||||
|
||||
⚠️ For practical drug discovery applications, we strongly recommend finetuning the model on your specific protein target.
|
||||
|
||||
@@ -17,129 +20,108 @@ The underlying model is trained in a pocket-conditional setting and is intended
|
||||
1. [Acknowledgements](#acknowledgements)
|
||||
2. [Installation](#installation)
|
||||
3. [Data Preparation](#data-preparation)
|
||||
4. [Running Experiments](#running-experiments)
|
||||
5. [License](#license)
|
||||
6. [Citation](#citation)
|
||||
4. [Generation](#generation)
|
||||
5. [Pretraining](#pretraining-pose-prediction-model)
|
||||
6. [License](#license)
|
||||
7. [Citation](#citation)
|
||||
|
||||
## Acknowledgements
|
||||
|
||||
This project builds upon prior work including:
|
||||
|
||||
- [GFlowNet repository](https://github.com/recursionpharma/gflownet) by Recursion
|
||||
- [RxnFlow](https://github.com/SeonghwanSeo/RxnFlow) for synthesis-based generation
|
||||
- [TacoGFN](https://github.com/tsa87/TacoGFN-SBDD) for target-conditioned reinforcement learning
|
||||
- [SemlaFlow](https://github.com/rssrwn/semla-flow) for flow matching-based molecular conformation generation
|
||||
|
||||
## Installation
|
||||
|
||||
### Environment Setup
|
||||
|
||||
```bash
|
||||
# Create and activate conda environment
|
||||
conda create --name cgflow python=3.11
|
||||
conda activate cgflow
|
||||
# 1. Create and activate environment using mamba
|
||||
mamba create -n cgflow python=3.11
|
||||
mamba activate cgflow
|
||||
|
||||
# Install PyTorch and PyTorch Geometric with CUDA 12.4 support
|
||||
pip install torch==2.6.0 torch-geometric>=2.4.0 torch-scatter>=2.1.2 torch-sparse>=0.6.18 torch-cluster>=1.6.3 -f https://data.pyg.org/whl/torch-2.6.0+cu124.html
|
||||
# 2. Install PyTorch + PyG via pip
|
||||
pip install torch==2.6.0 \
|
||||
torch-geometric>=2.4.0 \
|
||||
torch-scatter>=2.1.2 \
|
||||
torch-sparse>=0.6.18 \
|
||||
torch-cluster>=1.6.3 \
|
||||
-f https://data.pyg.org/whl/torch-2.6.0+cu124.html
|
||||
|
||||
# Install the package in editable mode
|
||||
# 3. Install your package (-e for editable)
|
||||
pip install -e .
|
||||
|
||||
# Install additional dependencies
|
||||
conda install -c conda-forge notebook unidock_env unidock
|
||||
# 4. Install extra dependencies (optional)
|
||||
# - AutoDock Vina
|
||||
pip install -e '.[vina]'
|
||||
# - Unidock as GPU-accelerated docking
|
||||
mamba install unidock
|
||||
pip install -e '.[unidock]'
|
||||
# - Extras (e.g., jupyter notebook)
|
||||
mamba install notebook
|
||||
pip install -e '.[extra]'
|
||||
```
|
||||
|
||||
## Data Preparation
|
||||
|
||||
### 1. Pose Prediction Dataset
|
||||
### Download Pretrained Model
|
||||
|
||||
Download and prepare the preprocessed PLINDER dataset for pose prediction pretraining:
|
||||
You can download the pretrained model weights from [here](https://drive.google.com/drive/folders/1XjHh6VopGX48Hg7vHW3foEvrvMvUMH7C?usp=sharing)
|
||||
|
||||
```bash
|
||||
mkdir -p experiments/data/complex
|
||||
cd experiments/data/complex
|
||||
curl -L -o plinder_15A.zip https://figshare.com/ndownloader/files/54405473
|
||||
unzip plinder_15A.zip
|
||||
gdown --id 1xGC193o4DtSPzWFjmRIlPjmn7bLfMaCd -O ./weights/cgflow_crossdock.ckpt
|
||||
```
|
||||
|
||||
### 2. LIT-PCBA Pocket Data
|
||||
### Construct Generative environment
|
||||
|
||||
See [Data Preparation](data/README.md) for detailed instructions on preparing datasets and environments.
|
||||
|
||||
## Generation
|
||||
|
||||
### 1. Pocket-specific Optimization
|
||||
|
||||
#### A. GPU-accelerated UniDock
|
||||
You can modify the config file to use your own protein target.
|
||||
```bash
|
||||
python scripts/opt/opt_unidock.py --config ./configs/opt/aldh1_unidock.yaml
|
||||
```
|
||||
|
||||
#### B. AutoDock Vina (local-opt)
|
||||
|
||||
```bash
|
||||
cd experiments/data
|
||||
curl -L -o LIT-PCBA.tar.gz https://figshare.com/ndownloader/files/54411395
|
||||
tar -xzvf LIT-PCBA.tar.gz
|
||||
python scripts/opt/opt_vina.py --config ./configs/opt/aldh1_vina.yaml
|
||||
```
|
||||
|
||||
### 3. Enamine Building Block Generation
|
||||
### 2. Zero-shot Pocket-conditional Generation
|
||||
|
||||
#### Option A: Generate from Enamine Catalog
|
||||
Use the "Comprehensive Catalog" (2024.06.10) from [Enamine](https://enamine.net/building-blocks/building-blocks-catalog):
|
||||
TBA
|
||||
|
||||
```bash
|
||||
cd experiments/data
|
||||
python scripts/a_catalog_to_smi.py -b <CATALOG_SDF> -o building_blocks/enamine_catalog.smi --cpu <CPU>
|
||||
python scripts/a_refine_smi.py -b building_blocks/enamine_catalog.smi -o building_blocks/enamine_blocks.smi --filter_druglike --cpu <CPU>
|
||||
python scripts/b_create_env.py -b building_blocks/enamine_catalog.smi -o envs/catalog/ --cpu <CPU>
|
||||
```
|
||||
### 3. Fine-tuning the pocket-conditional model
|
||||
|
||||
#### Option B: Download Prepared Files
|
||||
```bash
|
||||
cd experiments/data/envs
|
||||
gdown https://drive.google.com/uc?id=192RuXBzM51Mk__-kSKcCs4kthnKHWAgm
|
||||
tar -xzvf stock.tar.gz
|
||||
```
|
||||
TBA
|
||||
|
||||
## Running Experiments
|
||||
## Pretraining Pocket-conditional Generative Model
|
||||
|
||||
### 1. Train General State Flow (Pose Prediction)
|
||||
If you want to train the pocket-conditional generative model, you can use the following procedure.
|
||||
|
||||
Train the state flow model for pose prediction:
|
||||
```bash
|
||||
sh scripts/A_semlaflow_train_crossdocked.sh
|
||||
```
|
||||
Note: This script trains pose prediction on Plinder dataset rather than the CrossDocked dataset as done in the paper experiments.
|
||||
Plinder is a larger dataset, and we used unbiased pocket extraction. Therefore the pose prediction performance is improved compared to reported result.
|
||||
We'll release the pretrained checkpoint for Plinder training soon.
|
||||
- Download the CrossDock2020 pockets according to the instructions in the [Data Preparation](data/README.md) section.
|
||||
- You can use the following command to train the model:
|
||||
```bash
|
||||
python scripts/multi_pocket/tacogfn_proxy.py --name <PREFIX>
|
||||
```
|
||||
|
||||
For reproducing paper results, use pretrained model weights:
|
||||
```bash
|
||||
mkdir weights/
|
||||
curl -L -o weights/crossdocked2020_till_end.ckpt https://figshare.com/ndownloader/files/54411752
|
||||
```
|
||||
## Pretraining Pose Prediction Model
|
||||
|
||||
### 2. Pocket-specific Optimization (LIT-PCBA)
|
||||
If you want to train the pose prediction model, you can use the following procedure.
|
||||
|
||||
```bash
|
||||
cd experiments
|
||||
wandb sweep sweep/redock.yaml
|
||||
wandb agent <sweep-id>
|
||||
```
|
||||
- Download the preprocessed data according to the instructions in the [Data Preparation](data/README.md) section.
|
||||
- You can use the following command to train the model:
|
||||
```bash
|
||||
python scripts/pretrain/train.py --name <PREFIX>
|
||||
```
|
||||
|
||||

|
||||
|
||||
### 3. Pocket-conditional Generation
|
||||
|
||||
#### A. Download CrossDocked Dataset
|
||||
1. Get `crossdocked.tar.gz` from [here](https://drive.google.com/file/d/1BKYx_H1m-TzG_75Gk-7sjPkt5ow-Acdw/view?usp=sharing)
|
||||
2. Extract dataset:
|
||||
```bash
|
||||
cd experiments/data/
|
||||
gdown 1BKYx_H1m-TzG_75Gk-7sjPkt5ow-Acdw
|
||||
tar -xzvf crossdocked.tar.gz
|
||||
```
|
||||
|
||||
#### B. Use Pretrained Weights
|
||||
Use `crossdocked2020_till_end.ckpt` for consistency.
|
||||
|
||||
#### C. Docking Score Proxy Setup
|
||||
Follow instructions at [PharmacoNet](https://github.com/SeonghwanSeo/PharmacoNet/tree/main/src/pmnet_appl).
|
||||
|
||||
#### D. Run Experiment
|
||||
```bash
|
||||
cd experiments
|
||||
python scripts/exp3A_sbdd_proxy.py
|
||||
```
|
||||
|
||||
**Note:** Baseline methods (e.g., SynFlowNet) are provided in supplementary materials.
|
||||
|
||||
## License
|
||||
|
||||
@@ -150,6 +132,7 @@ This project is licensed under the [MIT License](./LICENSE).
|
||||
If you use this work, please cite:
|
||||
|
||||
### CGFlow (ICML '25)
|
||||
|
||||
```bibtex
|
||||
@inproceedings{shen2025compositional,
|
||||
title = {Compositional Flows for 3D Molecule and Synthesis Pathway Co-design},
|
||||
@@ -161,6 +144,7 @@ If you use this work, please cite:
|
||||
```
|
||||
|
||||
### RxnFlow (ICLR '25)
|
||||
|
||||
```bibtex
|
||||
@inproceedings{seo2025generative,
|
||||
title={Generative Flows on Synthetic Pathway for Drug Design},
|
||||
@@ -172,6 +156,7 @@ If you use this work, please cite:
|
||||
```
|
||||
|
||||
### TacoGFN (TMLR '24)
|
||||
|
||||
```bibtex
|
||||
@article{shen2024tacogfn,
|
||||
title={Taco{GFN}: Target-conditioned {GF}lowNet for Structure-based Drug Design},
|
||||
@@ -181,13 +166,3 @@ If you use this work, please cite:
|
||||
url={https://openreview.net/forum?id=N8cPv95zOU}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Instructions for Public Release
|
||||
```bash
|
||||
cat .gitignore > exclude.txt
|
||||
echo '.git/*' >> exclude.txt
|
||||
echo '.experimental/*' >> exclude.txt
|
||||
rsync -av --exclude-from=exclude.txt . ../cgflow/
|
||||
```
|
||||
|
||||
|
||||
25
configs/cgflow/default/cfm/ar_fm.yaml
Normal file
25
configs/cgflow/default/cfm/ar_fm.yaml
Normal file
@@ -0,0 +1,25 @@
|
||||
_registry_: "cfm"
|
||||
_type_: ARMolecularCFM
|
||||
|
||||
# cfm config
|
||||
use_ema: true
|
||||
self_condition: true
|
||||
|
||||
# model training
|
||||
loss_fn: huber
|
||||
dist_loss_weight: 1.0
|
||||
|
||||
lr: 0.0003
|
||||
lr_schedule: "constant"
|
||||
warmup_steps: 1000
|
||||
|
||||
# sampling (for validation)
|
||||
inference_noise_std: 0.0
|
||||
sampling_steps: 50
|
||||
sampling_strategy: "linear"
|
||||
# metric (for validation)
|
||||
use_energy_metric: True # mmff energy
|
||||
use_complex_metric: False # pose-check
|
||||
|
||||
# debug
|
||||
debug: false # if True, return trajectory
|
||||
25
configs/cgflow/default/cfm/fm.yaml
Normal file
25
configs/cgflow/default/cfm/fm.yaml
Normal file
@@ -0,0 +1,25 @@
|
||||
_registry_: "cfm"
|
||||
_type_: MolecularCFM
|
||||
|
||||
# cfm config
|
||||
use_ema: true
|
||||
self_condition: true
|
||||
|
||||
# model training
|
||||
loss_fn: huber
|
||||
dist_loss_weight: 1.0
|
||||
|
||||
lr: 0.0003
|
||||
lr_schedule: "constant"
|
||||
warmup_steps: 1000
|
||||
|
||||
# sampling (for validation)
|
||||
inference_noise_std: 0.0
|
||||
sampling_steps: 50
|
||||
sampling_strategy: "linear"
|
||||
# metric (for validation)
|
||||
use_energy_metric: True # mmff energy
|
||||
use_complex_metric: false # pose-check
|
||||
|
||||
# debug
|
||||
debug: false # if True, return trajectory
|
||||
6
configs/cgflow/default/datamodule/bucket_dm.yaml
Normal file
6
configs/cgflow/default/datamodule/bucket_dm.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
_registry_: "datamodule"
|
||||
_type_: BucketDataModule
|
||||
batch_cost: 300
|
||||
num_workers: 3
|
||||
bucket_limits: [64, 96, 128, 160]
|
||||
bucket_cost_scale: "linear"
|
||||
@@ -0,0 +1,6 @@
|
||||
_registry_: "datamodule"
|
||||
_type_: BucketDataModule
|
||||
batch_cost: 2048
|
||||
num_workers: 3
|
||||
bucket_limits: [256, 512, 768, 1024, 1536, 2048]
|
||||
bucket_cost_scale: "linear"
|
||||
4
configs/cgflow/default/datamodule/simple_dm.yaml
Normal file
4
configs/cgflow/default/datamodule/simple_dm.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
_registry_: "datamodule"
|
||||
_type_: SimpleDataModule
|
||||
batch_size: 1
|
||||
num_workers: 0
|
||||
6
configs/cgflow/default/dataset/complex_lmdb.yaml
Normal file
6
configs/cgflow/default/dataset/complex_lmdb.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
_registry_: "dataset"
|
||||
_type_: LMDBPocketComplexDataset
|
||||
data_path: ???
|
||||
key_path: ???
|
||||
dataset_size: null
|
||||
max_length: 160
|
||||
3
configs/cgflow/default/interpolant/geo.yaml
Normal file
3
configs/cgflow/default/interpolant/geo.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
_registry_: "interpolant"
|
||||
_type_: GeometricInterpolant
|
||||
noise_std: 0.2
|
||||
9
configs/cgflow/default/interpolant/rxn_till_end.yaml
Normal file
9
configs/cgflow/default/interpolant/rxn_till_end.yaml
Normal file
@@ -0,0 +1,9 @@
|
||||
_registry_: "interpolant"
|
||||
_type_: ARGeometricInterpolant
|
||||
decomposition_strategy: "reaction"
|
||||
ordering_strategy: "connected"
|
||||
noise_std: 0.2
|
||||
max_num_cuts: 2 # cut up to N+1 fragments
|
||||
t_per_ar_action: 0.3 # t_per_ar_action * max_num_cuts should be less than 1
|
||||
max_interp_time: 1.0 # if fragment is added at t, it is denoised until t + max_interp_time
|
||||
min_group_size: 5 # min fragment size
|
||||
13
configs/cgflow/default/model/ligand_base.yaml
Normal file
13
configs/cgflow/default/model/ligand_base.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
_registry_: "model"
|
||||
_type_: LigandDecoderV3
|
||||
d_equi: 96
|
||||
d_inv: 384
|
||||
d_edge: 128
|
||||
n_layers: 8
|
||||
n_attn_heads: 64 # n heads = 6
|
||||
d_message: 128
|
||||
d_message_ff: 256
|
||||
d_pocket_inv: ???
|
||||
d_pocket_equi: ???
|
||||
time_cond_dim: 3 # NOTE: for CGFlow (time;rel time; gen time)
|
||||
self_cond: True
|
||||
13
configs/cgflow/default/model/ligand_small.yaml
Normal file
13
configs/cgflow/default/model/ligand_small.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
_registry_: "model"
|
||||
_type_: LigandDecoderV3
|
||||
d_equi: 64
|
||||
d_inv: 256
|
||||
d_edge: 64
|
||||
n_layers: 8
|
||||
n_attn_heads: 64 # n heads = 4
|
||||
d_message: 64
|
||||
d_message_ff: 128
|
||||
d_pocket_inv: ???
|
||||
d_pocket_equi: ???
|
||||
time_cond_dim: 3 # NOTE: for CGFlow (time;rel time; gen time)
|
||||
self_cond: True
|
||||
10
configs/cgflow/default/model/pocket_all_atom.yaml
Normal file
10
configs/cgflow/default/model/pocket_all_atom.yaml
Normal file
@@ -0,0 +1,10 @@
|
||||
_registry_: "model"
|
||||
_type_: PocketEncoderV3
|
||||
d_equi: 96
|
||||
d_inv: 384
|
||||
d_edge: 64
|
||||
n_layers: 4
|
||||
n_attn_heads: 64 # n heads = 4
|
||||
d_message: 64
|
||||
d_message_ff: 128
|
||||
fixed_equi: False
|
||||
10
configs/cgflow/default/model/pocket_base.yaml
Normal file
10
configs/cgflow/default/model/pocket_base.yaml
Normal file
@@ -0,0 +1,10 @@
|
||||
_registry_: "model"
|
||||
_type_: ResidueEncoder
|
||||
d_equi: 96
|
||||
d_inv: 384
|
||||
d_edge: 128
|
||||
n_layers: 4
|
||||
n_attn_heads: 64 # n heads = 4
|
||||
d_message: 128
|
||||
d_message_ff: 256
|
||||
fixed_equi: False
|
||||
10
configs/cgflow/default/model/pocket_original.yaml
Normal file
10
configs/cgflow/default/model/pocket_original.yaml
Normal file
@@ -0,0 +1,10 @@
|
||||
_registry_: "model"
|
||||
_type_: PocketEncoderV1
|
||||
d_equi: 96
|
||||
d_inv: 256
|
||||
d_edge: 64
|
||||
n_layers: 4
|
||||
n_attn_heads: 64 # n heads = 4
|
||||
d_message: 64
|
||||
d_message_ff: 128
|
||||
fixed_equi: False
|
||||
10
configs/cgflow/default/model/pocket_small.yaml
Normal file
10
configs/cgflow/default/model/pocket_small.yaml
Normal file
@@ -0,0 +1,10 @@
|
||||
_registry_: "model"
|
||||
_type_: ResidueEncoder
|
||||
d_equi: 64
|
||||
d_inv: 256
|
||||
d_edge: 64
|
||||
n_layers: 4
|
||||
n_attn_heads: 64 # n heads = 4
|
||||
d_message: 64
|
||||
d_message_ff: 128
|
||||
fixed_equi: False
|
||||
3
configs/cgflow/default/prior_dist/gaussian.yaml
Normal file
3
configs/cgflow/default/prior_dist/gaussian.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
_registry_: "prior_distribution"
|
||||
_type_: GaussianPriorDistribution
|
||||
noise_std: 3.0
|
||||
4
configs/cgflow/default/time_dist/beta.yaml
Normal file
4
configs/cgflow/default/time_dist/beta.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
_registry_: "time_distribution"
|
||||
_type_: BetaTimeDistribution
|
||||
alpha: 1.0
|
||||
beta: 1.0
|
||||
3
configs/cgflow/default/time_dist/constant.yaml
Normal file
3
configs/cgflow/default/time_dist/constant.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
_registry_: "time_distribution"
|
||||
_type_: ConstantTimeDistribution
|
||||
time: 0.99 # here we use 0.99 to get the generated times for all atoms.
|
||||
2
configs/cgflow/default/time_dist/uniform.yaml
Normal file
2
configs/cgflow/default/time_dist/uniform.yaml
Normal file
@@ -0,0 +1,2 @@
|
||||
_registry_: "time_distribution"
|
||||
_type_: UniformTimeDistribution
|
||||
15
configs/cgflow/default/trainer/train.yaml
Normal file
15
configs/cgflow/default/trainer/train.yaml
Normal file
@@ -0,0 +1,15 @@
|
||||
save_dir: ./result/
|
||||
wandb_project: "cgflow"
|
||||
wandb_name: crossdock
|
||||
wandb_group: "train"
|
||||
epoch: 10000
|
||||
check_val_every_n_epoch: 1
|
||||
val_check_interval: null
|
||||
checkpoint_epochs: 1
|
||||
log_every_n_steps: 10
|
||||
num_gpus: 1
|
||||
monitor: "val/rmsd"
|
||||
monitor_mode: "min"
|
||||
accumulate_grad_batches: 1
|
||||
gradient_clip_val: 1.0
|
||||
precision: bf16-mixed
|
||||
6
configs/cgflow/default/transform/complex.yaml
Normal file
6
configs/cgflow/default/transform/complex.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
_registry_: transform
|
||||
_type_: ComplexTransform
|
||||
radius: null
|
||||
rotate: true
|
||||
zero_com: "ligand"
|
||||
center_noise: 2.0
|
||||
2
configs/cgflow/default/transform/identical.yaml
Normal file
2
configs/cgflow/default/transform/identical.yaml
Normal file
@@ -0,0 +1,2 @@
|
||||
_registry_: transform
|
||||
_type_: IdenticalTransform
|
||||
9
configs/cgflow/test.yaml
Normal file
9
configs/cgflow/test.yaml
Normal file
@@ -0,0 +1,9 @@
|
||||
test_dataset:
|
||||
_yaml_: default/dataset/complex_lmdb.yaml
|
||||
data_path: ./data/experiments/cgflow/plinder/lmdb/test/
|
||||
key_path: ./data/experiments/cgflow/plinder/keys/test.txt
|
||||
|
||||
datamodule:
|
||||
_yaml_: default/datamodule/simple_dm.yaml
|
||||
batch_size: 100
|
||||
num_workers: 10
|
||||
44
configs/cgflow/train.yaml
Normal file
44
configs/cgflow/train.yaml
Normal file
@@ -0,0 +1,44 @@
|
||||
trainer:
|
||||
_yaml_: default/trainer/train.yaml
|
||||
wandb_name: plinder
|
||||
wandb_group: train
|
||||
save_dir: ./result/pretrain/plinder
|
||||
num_gpus: 8
|
||||
|
||||
datamodule:
|
||||
_yaml_: default/datamodule/bucket_dm.yaml
|
||||
batch_cost: 7500
|
||||
num_workers: 3
|
||||
|
||||
train_dataset:
|
||||
_yaml_: default/dataset/complex_lmdb.yaml
|
||||
data_path: ./data/experiments/cgflow/plinder/lmdb/train/
|
||||
key_path: ./data/experiments/cgflow/plinder/keys/train.txt
|
||||
|
||||
val_dataset:
|
||||
_yaml_: default/dataset/complex_lmdb.yaml
|
||||
data_path: ./data/experiments/cgflow/plinder/lmdb/val/
|
||||
key_path: ./data/experiments/cgflow/plinder/keys/val.txt
|
||||
|
||||
test_dataset: null
|
||||
|
||||
cfm:
|
||||
_yaml_: default/cfm/ar_fm.yaml
|
||||
|
||||
interpolant:
|
||||
_yaml_: default/interpolant/rxn_till_end.yaml
|
||||
|
||||
pocket_encoder:
|
||||
_yaml_: default/model/pocket_base.yaml
|
||||
|
||||
ligand_decoder:
|
||||
_yaml_: default/model/ligand_base.yaml
|
||||
|
||||
prior_dist:
|
||||
_yaml_: default/prior_dist/gaussian.yaml
|
||||
|
||||
time_dist:
|
||||
_yaml_: default/time_dist/beta.yaml
|
||||
|
||||
transform:
|
||||
_yaml_: default/transform/complex.yaml
|
||||
26
configs/multi_pocket/tacogfn_zincdock.yaml
Normal file
26
configs/multi_pocket/tacogfn_zincdock.yaml
Normal file
@@ -0,0 +1,26 @@
|
||||
# save dir
|
||||
name: unif_1_64
|
||||
result_dir: "./result/multi_pocket/tacogfn_zincdock"
|
||||
|
||||
# pocket conditional proxy
|
||||
proxy: ['TacoGFN_Reward', 'QVina', 'ZINCDock15M'] # ZINCDock15M or CrossDock2020
|
||||
protein_dir: "./data/experiments/CrossDocked2020/protein/train/"
|
||||
train_key_path: "./data/experiments/CrossDocked2020/center_info/train.csv"
|
||||
|
||||
# environment
|
||||
env_dir: ./data/envs/enamine_stock
|
||||
max_atoms: 40 # maximum atom counts
|
||||
subsampling_ratio: 0.1 # Memory-variance trade-off
|
||||
|
||||
# opt
|
||||
num_steps: 100_000
|
||||
num_sampling_per_step: 32
|
||||
temperature: [1, 64] # uniform(1, 64)
|
||||
seed: 1
|
||||
|
||||
# pose prediction
|
||||
pose_model: "./weights/cgflow_crossdock.ckpt"
|
||||
pose_steps: 20
|
||||
|
||||
# extras
|
||||
random_action_prob: 0.2
|
||||
29
configs/opt/aldh1_unidock.yaml
Normal file
29
configs/opt/aldh1_unidock.yaml
Normal file
@@ -0,0 +1,29 @@
|
||||
# save dir
|
||||
result_dir: "./result/opt/unidock_qed/aldh1"
|
||||
|
||||
# environment
|
||||
env_dir: ./data/envs/enamine_stock
|
||||
max_atoms: 40 # maximum atom counts
|
||||
subsampling_ratio: 0.1 # Memory-variance trade-off
|
||||
|
||||
# docking
|
||||
protein_path: ./data/examples/aldh1_protein.pdb
|
||||
center: null # search box center
|
||||
ref_ligand_path: ./data/examples/aldh1_ligand.mol2 # to determine the center
|
||||
size: [22.5, 22.5, 22.5] # search box size
|
||||
|
||||
# opt
|
||||
num_steps: 2000
|
||||
num_sampling_per_step: 32
|
||||
temperature: [1, 64] # uniform(1, 64)
|
||||
seed: 1
|
||||
|
||||
# pose prediction
|
||||
pose_model: "./weights/cgflow_crossdock.ckpt"
|
||||
pose_steps: 40
|
||||
|
||||
# extras
|
||||
sampling_tau: 0.9 # EMA
|
||||
random_action_prob: 0.05 # suggest to set positive value
|
||||
replay_warmup_step: 10 # 10 steps
|
||||
replay_capacity: 6400
|
||||
34
configs/opt/aldh1_vina.yaml
Normal file
34
configs/opt/aldh1_vina.yaml
Normal file
@@ -0,0 +1,34 @@
|
||||
# save dir
|
||||
result_dir: "./result/opt/vina_qed/aldh1"
|
||||
|
||||
# environment
|
||||
env_dir: ./data/envs/enamine_stock
|
||||
max_atoms: 40 # maximum atom counts
|
||||
subsampling_ratio: 0.1 # Memory-variance trade-off
|
||||
|
||||
# docking
|
||||
protein_path: ./data/examples/aldh1_protein.pdb
|
||||
center: null # search box center
|
||||
ref_ligand_path: ./data/examples/aldh1_ligand.mol2 # to determine the center
|
||||
size: [22.5, 22.5, 22.5] # search box size
|
||||
exhaustiveness: 8 # vina redocking
|
||||
|
||||
# opt
|
||||
num_steps: 2000
|
||||
num_sampling_per_step: 32
|
||||
temperature: [1, 64] # uniform(1, 64)
|
||||
seed: 1
|
||||
|
||||
# pose refining
|
||||
refine: "local_opt" # [local_opt, redock]
|
||||
ff_opt: "uff"
|
||||
|
||||
# pose prediction
|
||||
pose_model: "./weights/cgflow_crossdock.ckpt"
|
||||
pose_steps: 40
|
||||
|
||||
# extras
|
||||
sampling_tau: 0.9 # EMA
|
||||
random_action_prob: 0.05 # suggest to set positive value
|
||||
replay_warmup_step: 10 # 10 steps
|
||||
replay_capacity: 6400
|
||||
60
data/README.md
Normal file
60
data/README.md
Normal file
@@ -0,0 +1,60 @@
|
||||
# Data Preparation
|
||||
|
||||
## Optimization
|
||||
|
||||
### 1: Building blocks extraction
|
||||
|
||||
#### Option A: From Enamine Catalog or Stock
|
||||
|
||||
Use the "Comprehensive Catalog" or "Stock" from [Enamine](https://enamine.net/building-blocks/building-blocks-catalog):
|
||||
|
||||
```bash
|
||||
cd experiments/data
|
||||
|
||||
# case 1 - extract smiles from the Enamine Catalog SDF file
|
||||
python scripts/a_catalog_to_smi.py -b <CATALOG_SDF> -o building_blocks/enamine_catalog.smi --cpu <CPU>
|
||||
# case 2 - extract smiles from the Enamine Stock SDF file
|
||||
python scripts/a_stock_to_smi.py -b <STOCK_SDF> -o building_blocks/enamine_stock.smi --cpu <CPU>
|
||||
```
|
||||
|
||||
#### Option B: From custom SMI file
|
||||
|
||||
```bash
|
||||
python scripts/a_refine_smi.py -b <BLOCK_SMI> -o building_blocks/custom_block.smi --cpu <CPU>
|
||||
```
|
||||
|
||||
### 2: Drug-like filtering (optional)
|
||||
|
||||
```bash
|
||||
python scripts/b_druglike_filter.py -b building_blocks/enamine_stock.smi -o building_blocks/enamine_stock_druglike.smi --cuda
|
||||
```
|
||||
|
||||
### 3: Environment construction
|
||||
|
||||
```bash
|
||||
python scripts/c_create_env.py -b building_blocks/enamine_stock.smi -o envs/enamine_stock/ --cpu <CPU>
|
||||
```
|
||||
|
||||
---
|
||||
## Multi-pocket training
|
||||
|
||||
Download the CrossDocked2020 dataset used in RxnFlow ([Google drive](https://drive.google.com/drive/folders/1e5pPZaTRGhvEMky3K2OKQ9-jV_NweK-a)):
|
||||
|
||||
```bash
|
||||
cd experiments/
|
||||
gdown --id 1iGr053FDC9tCYz4es4cRJ6WpkEEi3CAW -O CrossDocked2020_all.tar.gz
|
||||
tar -xvzf CrossDocked2020_all.tar.gz
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Pretraining the pose prediction model
|
||||
|
||||
Download and prepare the preprocessed PLINDER dataset for pose prediction pretraining:
|
||||
|
||||
```bash
|
||||
mkdir -p data/experiments/cgflow/plinder
|
||||
cd data/experiments/cgflow/plinder
|
||||
gdown --id 1dhH1Yfdr9L2lt-JlwylxS2Um-kU3ZZUZ -O plinder_20A.tar.gz
|
||||
tar -xzf plinder_20A.tar.gz
|
||||
```
|
||||
123
data/examples/aldh1_ligand.mol2
Normal file
123
data/examples/aldh1_ligand.mol2
Normal file
@@ -0,0 +1,123 @@
|
||||
@<TRIPOS>MOLECULE
|
||||
****
|
||||
56 59 0 0 0
|
||||
SMALL
|
||||
NO_CHARGES
|
||||
|
||||
@<TRIPOS>ATOM
|
||||
1 H1 32.3370 -18.6330 11.3830 H 1 <0> 0.0000
|
||||
2 H2 29.8970 -15.3670 14.1370 H 1 <0> 0.0000
|
||||
3 H3 41.1380 -13.9030 16.5370 H 1 <0> 0.0000
|
||||
4 H4 40.9990 -15.0260 17.7140 H 1 <0> 0.0000
|
||||
5 H5 40.7610 -15.5240 14.8740 H 1 <0> 0.0000
|
||||
6 H6 41.9560 -16.0750 15.8410 H 1 <0> 0.0000
|
||||
7 H7 38.9940 -13.9200 17.7140 H 1 <0> 0.0000
|
||||
8 H8 38.8950 -13.9590 16.0840 H 1 <0> 0.0000
|
||||
9 H9 40.3940 -17.8440 15.4610 H 1 <0> 0.0000
|
||||
10 H10 40.2940 -17.4070 17.0310 H 1 <0> 0.0000
|
||||
11 H11 38.5000 -16.2140 17.8540 H 1 <0> 0.0000
|
||||
12 H12 37.3740 -15.5520 16.8730 H 1 <0> 0.0000
|
||||
13 H13 34.2110 -19.1090 11.9500 H 1 <0> 0.0000
|
||||
14 H14 35.5520 -18.2890 12.3920 H 1 <0> 0.0000
|
||||
15 H15 34.9780 -19.4880 13.3410 H 1 <0> 0.0000
|
||||
16 H16 30.8610 -19.2730 9.6820 H 1 <0> 0.0000
|
||||
17 H17 29.3000 -19.7460 9.5970 H 1 <0> 0.0000
|
||||
18 H18 29.8390 -18.5340 8.6440 H 1 <0> 0.0000
|
||||
19 H19 26.9820 -17.7180 9.4510 H 1 <0> 0.0000
|
||||
20 H20 26.2740 -17.3450 10.8750 H 1 <0> 0.0000
|
||||
21 H21 26.8450 -16.1570 9.9110 H 1 <0> 0.0000
|
||||
22 H22 35.8270 -17.2180 16.1770 H 1 <0> 0.0000
|
||||
23 H23 36.0790 -18.2460 14.9340 H 1 <0> 0.0000
|
||||
24 H24 36.6700 -15.4240 14.9940 H 1 <0> 0.0000
|
||||
25 H25 36.5600 -16.2200 13.5720 H 1 <0> 0.0000
|
||||
26 C1 33.8620 -17.7350 13.4750 C.3 1 <0> 0.0000
|
||||
27 C2 34.4030 -17.0460 14.6730 C.3 1 <0> 0.0000
|
||||
28 C3 29.5590 -17.9640 10.6110 C.3 1 <0> 0.0000
|
||||
29 C4 28.3330 -17.1770 10.9180 C.3 1 <0> 0.0000
|
||||
30 C5 31.8510 -17.9710 11.9530 C.ar 1 <0> 0.0000
|
||||
31 C6 30.3930 -16.0350 13.5810 C.ar 1 <0> 0.0000
|
||||
32 C7 32.4630 -17.3990 13.0640 C.ar 1 <0> 0.0000
|
||||
33 C8 30.5520 -17.5980 11.6590 C.ar 1 <0> 0.0000
|
||||
34 C9 33.5360 -16.0740 15.4040 C.2 1 <0> 0.0000
|
||||
35 C10 31.6990 -16.4000 13.8920 C.ar 1 <0> 0.0000
|
||||
36 C11 29.8050 -16.6230 12.4830 C.ar 1 <0> 0.0000
|
||||
37 C12 38.1760 -16.8240 14.7230 C.2 1 <0> 0.0000
|
||||
38 C13 40.6690 -14.7420 16.8140 C.3 1 <0> 0.0000
|
||||
39 C14 40.9830 -15.8510 15.7930 C.3 1 <0> 0.0000
|
||||
40 C15 39.1740 -14.4690 16.8980 C.3 1 <0> 0.0000
|
||||
41 C16 40.1690 -17.0980 16.0880 C.3 1 <0> 0.0000
|
||||
42 C17 38.3480 -15.7540 16.9790 C.3 1 <0> 0.0000
|
||||
43 C18 34.7250 -18.7420 12.7250 C.3 1 <0> 0.0000
|
||||
44 C19 29.9200 -18.9630 9.5440 C.3 1 <0> 0.0000
|
||||
45 C20 26.9960 -17.0920 10.2310 C.3 1 <0> 0.0000
|
||||
46 C21 35.8060 -17.3170 15.1820 C.3 1 <0> 0.0000
|
||||
47 C22 36.7660 -16.3130 14.5460 C.3 1 <0> 0.0000
|
||||
48 N1 38.8280 -16.5930 15.8620 N.am 1 <0> 0.0000
|
||||
49 O1 33.9620 -15.4870 16.4140 O.2 1 <0> 0.0000
|
||||
50 O2 38.6770 -17.4190 13.7850 O.2 1 <0> 0.0000
|
||||
51 O3 28.5840 -16.3960 12.0290 O.3 1 <0> 0.0000
|
||||
52 O4 32.2610 -15.8060 14.9790 O.3 1 <0> 0.0000
|
||||
53 H26 33.8620 -17.7350 13.4750 H 1 <0> 0.0000
|
||||
54 H27 34.4030 -17.0460 14.6730 H 1 <0> 0.0000
|
||||
55 H28 29.5590 -17.9640 10.6110 H 1 <0> 0.0000
|
||||
56 H29 28.3330 -17.1770 10.9180 H 1 <0> 0.0000
|
||||
@<TRIPOS>BOND
|
||||
1 1 30 1
|
||||
2 2 31 1
|
||||
3 3 38 1
|
||||
4 4 38 1
|
||||
5 5 39 1
|
||||
6 6 39 1
|
||||
7 7 40 1
|
||||
8 8 40 1
|
||||
9 9 41 1
|
||||
10 10 41 1
|
||||
11 11 42 1
|
||||
12 12 42 1
|
||||
13 13 43 1
|
||||
14 14 43 1
|
||||
15 15 43 1
|
||||
16 16 44 1
|
||||
17 17 44 1
|
||||
18 18 44 1
|
||||
19 19 45 1
|
||||
20 20 45 1
|
||||
21 21 45 1
|
||||
22 22 46 1
|
||||
23 23 46 1
|
||||
24 24 47 1
|
||||
25 25 47 1
|
||||
26 26 27 1
|
||||
27 26 32 1
|
||||
28 26 43 1
|
||||
29 27 34 1
|
||||
30 27 46 1
|
||||
31 28 29 1
|
||||
32 28 33 1
|
||||
33 28 44 1
|
||||
34 29 45 1
|
||||
35 29 51 1
|
||||
36 30 32 ar
|
||||
37 30 33 ar
|
||||
38 31 35 ar
|
||||
39 31 36 ar
|
||||
40 32 35 ar
|
||||
41 33 36 ar
|
||||
42 34 49 2
|
||||
43 34 52 1
|
||||
44 35 52 1
|
||||
45 36 51 1
|
||||
46 37 47 1
|
||||
47 37 48 am
|
||||
48 37 50 2
|
||||
49 38 39 1
|
||||
50 38 40 1
|
||||
51 39 41 1
|
||||
52 40 42 1
|
||||
53 41 48 1
|
||||
54 42 48 1
|
||||
55 46 47 1
|
||||
56 26 53 1
|
||||
57 27 54 1
|
||||
58 28 55 1
|
||||
59 29 56 1
|
||||
15114
data/examples/aldh1_protein.pdb
Normal file
15114
data/examples/aldh1_protein.pdb
Normal file
File diff suppressed because it is too large
Load Diff
93
data/scripts/A1_1_extract_pocket_plinder.py
Normal file
93
data/scripts/A1_1_extract_pocket_plinder.py
Normal file
@@ -0,0 +1,93 @@
|
||||
import gc
|
||||
import shutil
|
||||
from multiprocessing import Pool
|
||||
from pathlib import Path
|
||||
|
||||
import tqdm
|
||||
from plinder.core import PlinderSystem
|
||||
from plinder.core.loader import PlinderDataset
|
||||
from rdkit import Chem
|
||||
|
||||
from cgflow.util.data.molrepr import LigandMol
|
||||
from cgflow.util.data.pocket import ProteinPocket
|
||||
from cgflow.util.data.vocab import ATOMS
|
||||
from synthflow.utils import extract_pocket
|
||||
|
||||
|
||||
def runner(key):
|
||||
savedir = out_dir / key
|
||||
|
||||
try:
|
||||
system = PlinderSystem(system_id=key)
|
||||
system._archive = Path("/home/shwan/.local/share/plinder/2024-06/v2/systems/") / key
|
||||
receptor_pdb = system.receptor_pdb
|
||||
ligand_sdfs = system.ligand_sdfs.items()
|
||||
except:
|
||||
return
|
||||
|
||||
# pdb, bio_assembly, rec_chain_id, lig_chain_id = key.split("__")
|
||||
for lig_key, lig_sdf in ligand_sdfs:
|
||||
try:
|
||||
system_dir = savedir / lig_key
|
||||
save_pocket_path = system_dir / "pocket_20A.pdb"
|
||||
save_ligand_path = system_dir / "ligand.sdf"
|
||||
if save_ligand_path.exists():
|
||||
continue
|
||||
|
||||
smi = system.smiles[lig_key]
|
||||
mol = Chem.MolFromSmiles(smi)
|
||||
if any(atom.GetSymbol() not in ATOMS for atom in mol.GetAtoms()):
|
||||
print(f"pass {key}/{lig_key}: {smi}")
|
||||
continue
|
||||
|
||||
system_dir.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
try:
|
||||
extract_pocket.extract_pocket_from_center(
|
||||
receptor_pdb, save_pocket_path, cutoff=20, ref_ligand_path=lig_sdf
|
||||
)
|
||||
assert save_pocket_path.exists()
|
||||
LigandMol.from_sdf(lig_sdf)
|
||||
ProteinPocket.from_pdb(save_pocket_path, infer_res_bonds=True, sanitize=True)
|
||||
except KeyboardInterrupt as e:
|
||||
raise e
|
||||
except Exception:
|
||||
print(f"fail {key}/{lig_key}")
|
||||
if save_pocket_path.exists():
|
||||
save_pocket_path.unlink()
|
||||
system_dir.rmdir()
|
||||
else:
|
||||
shutil.copy(lig_sdf, save_ligand_path)
|
||||
except:
|
||||
return
|
||||
del system
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
ROOT_DIR = Path("/home/shwan/Project/CGFlow/data/plinder/files_20A/")
|
||||
ROOT_DIR.mkdir(exist_ok=True, parents=True)
|
||||
if True:
|
||||
dataset = PlinderDataset(split="val", use_alternate_structures=False)
|
||||
keys = dataset._system_ids
|
||||
del dataset
|
||||
gc.collect()
|
||||
|
||||
out_dir = ROOT_DIR / "val"
|
||||
with tqdm.tqdm(total=len(keys)) as pbar:
|
||||
with Pool(4) as pool:
|
||||
res = pool.imap_unordered(runner, keys)
|
||||
for _ in res:
|
||||
pbar.update(1)
|
||||
|
||||
if True:
|
||||
dataset = PlinderDataset(split="train", use_alternate_structures=False)
|
||||
keys = sorted(dataset._system_ids)
|
||||
del dataset
|
||||
gc.collect()
|
||||
|
||||
out_dir = ROOT_DIR / "train"
|
||||
with tqdm.tqdm(total=len(keys)) as pbar:
|
||||
with Pool(4) as pool:
|
||||
res = pool.imap_unordered(runner, keys)
|
||||
for _ in res:
|
||||
pbar.update(1)
|
||||
82
data/scripts/A1_2_get_plinder_lmdb.py
Normal file
82
data/scripts/A1_2_get_plinder_lmdb.py
Normal file
@@ -0,0 +1,82 @@
|
||||
import gc
|
||||
from pathlib import Path
|
||||
|
||||
import lmdb
|
||||
from tqdm import tqdm
|
||||
|
||||
from cgflow.util.data.molrepr import LigandMol
|
||||
from cgflow.util.data.pocket import PocketComplex, ProteinPocket
|
||||
|
||||
|
||||
def main():
|
||||
ROOT_DIR = Path("/home/shwan/Project/CGFlow/data/plinder/")
|
||||
FILE_DIR = ROOT_DIR / "files_20A"
|
||||
SAVE_DIR = ROOT_DIR / "extract_20A" / "lmdb"
|
||||
KEY_DIR = ROOT_DIR / "extract_20A" / "keys"
|
||||
SAVE_DIR.mkdir(exist_ok=True, parents=True)
|
||||
KEY_DIR.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
# val set
|
||||
root_dir = FILE_DIR / "val"
|
||||
save_dir = SAVE_DIR / "val"
|
||||
key_path = KEY_DIR / "val.txt"
|
||||
env = lmdb.Environment(str(save_dir), map_size=int(1e10))
|
||||
key_writer = key_path.open("w")
|
||||
txn = env.begin(write=True)
|
||||
for system_dir in tqdm(sorted(root_dir.iterdir(), key=lambda k: k.name)):
|
||||
for ligand_dir in sorted(system_dir.iterdir(), key=lambda k: k.name):
|
||||
complex_key = f"{system_dir.name}/{ligand_dir.name}"
|
||||
ligand_sdf_path = ligand_dir / "ligand.sdf"
|
||||
pocket_pdb_path = ligand_dir / "pocket_20A.pdb"
|
||||
try:
|
||||
lig_obj = LigandMol.from_sdf(ligand_dir / "ligand.sdf")
|
||||
poc_obj = ProteinPocket.from_pdb(pocket_pdb_path, infer_res_bonds=True, sanitize=True)
|
||||
complex_obj = PocketComplex(poc_obj, lig_obj)
|
||||
complex_bytes = complex_obj.to_bytes()
|
||||
except Exception as e:
|
||||
print(f"Fail to process {complex_key}: {e}")
|
||||
continue
|
||||
txn.put(complex_key.encode(), complex_bytes)
|
||||
key_writer.write(complex_key + "\n")
|
||||
txn.commit()
|
||||
env.sync()
|
||||
env.close()
|
||||
key_writer.close()
|
||||
|
||||
# train set
|
||||
root_dir = FILE_DIR / "train"
|
||||
save_dir = SAVE_DIR / "train"
|
||||
key_path = KEY_DIR / "train.txt"
|
||||
env = lmdb.Environment(str(save_dir), map_size=int(1e11))
|
||||
key_writer = key_path.open("w")
|
||||
txn = env.begin(write=True)
|
||||
counter = 0
|
||||
for system_dir in tqdm(sorted(root_dir.iterdir(), key=lambda k: k.name)):
|
||||
for ligand_dir in sorted(system_dir.iterdir(), key=lambda k: k.name):
|
||||
complex_key = f"{system_dir.name}/{ligand_dir.name}"
|
||||
ligand_sdf_path = ligand_dir / "ligand.sdf"
|
||||
pocket_pdb_path = ligand_dir / "pocket_20A.pdb"
|
||||
try:
|
||||
lig_obj = LigandMol.from_sdf(ligand_sdf_path)
|
||||
poc_obj = ProteinPocket.from_pdb(pocket_pdb_path, infer_res_bonds=True, sanitize=True)
|
||||
complex_obj = PocketComplex(poc_obj, lig_obj)
|
||||
complex_bytes = complex_obj.to_bytes()
|
||||
except Exception as e:
|
||||
print(f"Fail to process {complex_key}: {e}")
|
||||
continue
|
||||
txn.put(complex_key.encode(), complex_bytes)
|
||||
key_writer.write(complex_key + "\n")
|
||||
counter += 1
|
||||
if counter == 10000:
|
||||
counter = 0
|
||||
txn.commit()
|
||||
gc.collect()
|
||||
txn = env.begin(write=True)
|
||||
txn.commit()
|
||||
env.sync()
|
||||
env.close()
|
||||
key_writer.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
40
data/scripts/A2_1_extract_pocket_zincdock.py
Normal file
40
data/scripts/A2_1_extract_pocket_zincdock.py
Normal file
@@ -0,0 +1,40 @@
|
||||
from multiprocessing import Pool
|
||||
from pathlib import Path
|
||||
|
||||
import tqdm
|
||||
|
||||
from cgflow.util.pocket import ProteinPocket
|
||||
from synthflow.utils import extract_pocket
|
||||
|
||||
PROTEIN_ROOT_DIR = Path("/home/shwan/DATA/ZINCDock/protein/train/")
|
||||
SAVE_DIR = Path("/home/shwan/Project/CGFlow/data/zincdock_data/pocket_15A/files/")
|
||||
|
||||
|
||||
def runner(line):
|
||||
key, x, y, z = line.split(",")
|
||||
center = float(x), float(y), float(z)
|
||||
|
||||
# pdb, bio_assembly, rec_chain_id, lig_chain_id = key.split("__")
|
||||
receptor_pdb = PROTEIN_ROOT_DIR / (key + ".pdb")
|
||||
out_pocket_path = SAVE_DIR / receptor_pdb.name
|
||||
try:
|
||||
extract_pocket.extract_pocket_from_center(receptor_pdb, out_pocket_path, center, cutoff=15)
|
||||
assert out_pocket_path.exists()
|
||||
ProteinPocket.from_pdb(out_pocket_path, infer_res_bonds=True, sanitize=True)
|
||||
except KeyboardInterrupt as e:
|
||||
raise e
|
||||
except Exception:
|
||||
print(f"fail {key}")
|
||||
if out_pocket_path.exists():
|
||||
out_pocket_path.unlink()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
SAVE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
with open("/home/shwan/DATA/ZINCDock/center_info/train.csv") as f:
|
||||
lines = f.readlines()
|
||||
with tqdm.tqdm(total=len(lines)) as pbar:
|
||||
with Pool(4) as pool:
|
||||
res = pool.imap_unordered(runner, lines)
|
||||
for _ in res:
|
||||
pbar.update(1)
|
||||
110
data/scripts/A2_2_get_zincdock_lmdb.py
Normal file
110
data/scripts/A2_2_get_zincdock_lmdb.py
Normal file
@@ -0,0 +1,110 @@
|
||||
import multiprocessing
|
||||
import os
|
||||
import pickle
|
||||
import random
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
|
||||
import lmdb
|
||||
import tqdm
|
||||
from rdkit import Chem
|
||||
|
||||
from cgflow.util.molrepr import GeometricMol
|
||||
from cgflow.util.pocket import PocketComplex, ProteinPocket
|
||||
|
||||
|
||||
def run(key: str, tmp_dir: Path):
|
||||
save_path = tmp_dir / f"{key}.pkl"
|
||||
if save_path.exists():
|
||||
return
|
||||
pocket_path = POCKET_FILE_DIR / f"{key}.pdb"
|
||||
ligand_path = LIGAND_FILE_DIR / f"{key}.sdf"
|
||||
mols = list(Chem.SDMolSupplier(str(ligand_path)))
|
||||
random.shuffle(mols)
|
||||
mols = mols[:100]
|
||||
poc_obj = ProteinPocket.from_pdb(pocket_path, infer_res_bonds=True, sanitize=True)
|
||||
lig_objs = [GeometricMol.from_rdkit(mol) for mol in mols]
|
||||
poc_byte = poc_obj.to_bytes()
|
||||
lig_bytes = [obj.to_bytes() for obj in lig_objs]
|
||||
with open(save_path, "wb") as f:
|
||||
data = (poc_byte, lig_bytes)
|
||||
pickle.dump(data, f)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
NUM_WORKERS = len(os.sched_getaffinity(0))
|
||||
ROOT_DIR = Path("/home/shwan/Project/CGFlow/data/zincdock_data/pocket_15A/")
|
||||
POCKET_FILE_DIR = ROOT_DIR / "files"
|
||||
LIGAND_FILE_DIR = Path("/home/shwan/DATA/ZINCDock/data/docking/train/0_1000/")
|
||||
SAVE_DIR = ROOT_DIR / "lmdb"
|
||||
TMP_DIR = ROOT_DIR / "tmp_pkl"
|
||||
KEY_DIR = ROOT_DIR / "keys"
|
||||
SAVE_DIR.mkdir(exist_ok=True)
|
||||
KEY_DIR.mkdir(exist_ok=True)
|
||||
TMP_DIR.mkdir(exist_ok=True)
|
||||
|
||||
random.seed(0)
|
||||
all_protein_keys = [file.stem for file in POCKET_FILE_DIR.iterdir()]
|
||||
all_ligand_keys = [file.stem for file in LIGAND_FILE_DIR.iterdir()]
|
||||
complex_keys = list(set(all_protein_keys) & set(all_ligand_keys))
|
||||
print(len(complex_keys))
|
||||
complex_keys.sort()
|
||||
random.shuffle(complex_keys)
|
||||
|
||||
train_keys = complex_keys[:14000]
|
||||
val_keys = complex_keys[14000:]
|
||||
print("split:", len(train_keys), len(val_keys))
|
||||
|
||||
with (KEY_DIR / "train.txt").open("w") as w:
|
||||
for key in train_keys:
|
||||
w.write(key + "\n")
|
||||
with (KEY_DIR / "val.txt").open("w") as w:
|
||||
for key in val_keys:
|
||||
w.write(key + "\n")
|
||||
|
||||
if True:
|
||||
# val set
|
||||
keys = val_keys
|
||||
save_dir = str(SAVE_DIR / "val")
|
||||
|
||||
with tqdm.trange(len(keys), unit="data", desc="Preprocessing") as pbar:
|
||||
with multiprocessing.Pool(NUM_WORKERS) as pool:
|
||||
for _ in pool.imap_unordered(partial(run, tmp_dir=TMP_DIR), keys):
|
||||
pbar.update(1)
|
||||
|
||||
print("save validation set")
|
||||
env = lmdb.Environment(save_dir, map_size=int(5e9)) # 5gb
|
||||
with env.begin(write=True) as txt:
|
||||
for key in keys:
|
||||
with open(TMP_DIR / f"{key}.pkl", "rb") as f:
|
||||
complex_bytes = f.read()
|
||||
txt.put(key.encode(), complex_bytes)
|
||||
env.close()
|
||||
|
||||
# test
|
||||
env = lmdb.Environment(save_dir, readonly=True, map_size=int(5e9))
|
||||
with env.begin() as txt:
|
||||
(poc_byte, lig_bytes) = pickle.loads(txt.get(keys[0].encode()))
|
||||
poc_obj = ProteinPocket.from_bytes(poc_byte)
|
||||
lig_obj = GeometricMol.from_bytes(lig_bytes[0])
|
||||
complex_obj = PocketComplex(poc_obj, lig_obj)
|
||||
env.close()
|
||||
|
||||
if True:
|
||||
# train set
|
||||
keys = train_keys
|
||||
save_dir = str(SAVE_DIR / "train")
|
||||
|
||||
with tqdm.trange(len(keys), unit="data", desc="Preprocessing") as pbar:
|
||||
with multiprocessing.Pool(NUM_WORKERS) as pool:
|
||||
for _ in pool.imap_unordered(partial(run, tmp_dir=TMP_DIR), keys):
|
||||
pbar.update(1)
|
||||
|
||||
print("save train set")
|
||||
env = lmdb.Environment(save_dir, map_size=int(1e11)) # 100gb
|
||||
with env.begin(write=True) as txt:
|
||||
for key in keys:
|
||||
with open(TMP_DIR / f"{key}.pkl", "rb") as f:
|
||||
complex_bytes = f.read()
|
||||
txt.put(key.encode(), complex_bytes)
|
||||
env.close()
|
||||
63
data/scripts/A3_1_extract_pocket_crossdock.py
Normal file
63
data/scripts/A3_1_extract_pocket_crossdock.py
Normal file
@@ -0,0 +1,63 @@
|
||||
import pickle
|
||||
import shutil
|
||||
import warnings
|
||||
from multiprocessing import Pool
|
||||
from pathlib import Path
|
||||
|
||||
import tqdm
|
||||
|
||||
from cgflow.util.data.pocket import ProteinPocket
|
||||
from synthflow.utils.extract_pocket import extract_pocket_from_center
|
||||
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
PROTEIN_KEYS = Path("/home/shwan/DATA/CrossDocked2020/center_info/train.csv")
|
||||
LIGAND_ROOT_DIR = Path("/home/shwan/DATA/CrossDocked2020/crossdocked_pocket10/")
|
||||
PROTEIN_ROOT_DIR = Path("/home/shwan/DATA/CrossDocked2020/protein/train/pdb/")
|
||||
SAVE_DIR = Path("/home/shwan/Project/CGFlow/data/crossdock/pocket_15A/files/")
|
||||
|
||||
|
||||
def runner(line):
|
||||
key, x, y, z = line.split(",")
|
||||
center = float(x), float(y), float(z)
|
||||
|
||||
# pdb, bio_assembly, rec_chain_id, lig_chain_id = key.split("__")
|
||||
receptor_pdb = PROTEIN_ROOT_DIR / (key + ".pdb")
|
||||
out_pocket_path = SAVE_DIR / key / "pocket_15A.pdb"
|
||||
out_pocket_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
extract_pocket_from_center(receptor_pdb, out_pocket_path, center, cutoff=15)
|
||||
assert out_pocket_path.exists()
|
||||
ProteinPocket.from_pdb(out_pocket_path, infer_res_bonds=True, sanitize=True)
|
||||
except KeyboardInterrupt as e:
|
||||
raise e
|
||||
except Exception:
|
||||
print(f"fail {key}")
|
||||
if out_pocket_path.exists():
|
||||
out_pocket_path.unlink()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
SAVE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open("/home/shwan/DATA/CrossDocked2020/center_info/train.csv") as f:
|
||||
lines = f.readlines()
|
||||
keys = set(ln.split(",")[0] for ln in lines)
|
||||
|
||||
with tqdm.tqdm(total=len(lines)) as pbar:
|
||||
with Pool(4) as pool:
|
||||
res = pool.imap_unordered(runner, lines)
|
||||
for _ in res:
|
||||
pbar.update(1)
|
||||
|
||||
with open("/home/shwan/DATA/CrossDocked2020/split_by_name.pkl", "rb") as f:
|
||||
split = pickle.load(f)
|
||||
|
||||
for _, ligand_fn in tqdm.tqdm(split["train"]):
|
||||
ligand_file = LIGAND_ROOT_DIR / ligand_fn
|
||||
protein_key = ligand_file.stem[:6]
|
||||
if protein_key not in keys:
|
||||
continue
|
||||
save_file = SAVE_DIR / protein_key / ligand_file.name
|
||||
if not save_file.exists():
|
||||
shutil.copyfile(ligand_file, save_file)
|
||||
90
data/scripts/A3_2_get_crossdock_lmdb.py
Normal file
90
data/scripts/A3_2_get_crossdock_lmdb.py
Normal file
@@ -0,0 +1,90 @@
|
||||
import pickle
|
||||
import random
|
||||
from pathlib import Path
|
||||
|
||||
import lmdb
|
||||
from tqdm import tqdm
|
||||
|
||||
from cgflow.util.data.molrepr import LigandMol
|
||||
from cgflow.util.data.pocket import PocketComplex, ProteinPocket
|
||||
|
||||
|
||||
def main():
|
||||
FILE_DIR = Path("/home/shwan/Project/CGFlow/data/crossdock/extract_15A/files/")
|
||||
ROOT_DIR = Path("/home/shwan/Project/CGFlow/data/crossdock/")
|
||||
# ROOT_DIR = Path("/home/shwan/Project/CGFlow/data/crossdock-small/")
|
||||
SAVE_DIR = ROOT_DIR / "extract_15A" / "lmdb"
|
||||
KEY_DIR = ROOT_DIR / "extract_15A" / "keys"
|
||||
SAVE_DIR.mkdir(exist_ok=True, parents=True)
|
||||
KEY_DIR.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
random.seed(12345)
|
||||
|
||||
with open("/home/shwan/DATA/CrossDocked2020/split_by_name.pkl", "rb") as f:
|
||||
split = pickle.load(f)["train"]
|
||||
random.shuffle(split)
|
||||
train_split, valid_split = split[:99000], split[99000:]
|
||||
# train_split, valid_split = split[:1000], split[1000:1100]
|
||||
|
||||
if True:
|
||||
# val set
|
||||
save_dir = SAVE_DIR / "val"
|
||||
key_path = KEY_DIR / "val.txt"
|
||||
env = lmdb.Environment(str(save_dir), map_size=int(1e11))
|
||||
key_writer = key_path.open("w")
|
||||
with env.begin(write=True) as txt:
|
||||
for _, ligand_fn in tqdm(valid_split):
|
||||
ligand_fn = ligand_fn.split("/")[-1]
|
||||
complex_key = ligand_fn.split(".")[0]
|
||||
protein_key = ligand_fn[:6]
|
||||
ligand_sdf_path = FILE_DIR / protein_key / ligand_fn
|
||||
pocket_pdb_path = FILE_DIR / protein_key / "pocket_15A.pdb"
|
||||
if ligand_sdf_path.is_file() is False or pocket_pdb_path.is_file() is False:
|
||||
print(ligand_fn, "no file")
|
||||
continue
|
||||
try:
|
||||
lig_obj = LigandMol.from_sdf(ligand_sdf_path)
|
||||
poc_obj = ProteinPocket.from_pdb(pocket_pdb_path, infer_res_bonds=True, sanitize=True)
|
||||
complex_obj = PocketComplex(poc_obj, lig_obj)
|
||||
except KeyboardInterrupt as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
print(ligand_fn, "fail", e)
|
||||
# raise e
|
||||
continue
|
||||
else:
|
||||
txt.put(complex_key.encode(), complex_obj.to_bytes())
|
||||
key_writer.write(complex_key + "\n")
|
||||
env.close()
|
||||
key_writer.close()
|
||||
|
||||
if True:
|
||||
# train set
|
||||
save_dir = SAVE_DIR / "train"
|
||||
key_path = KEY_DIR / "train.txt"
|
||||
env = lmdb.Environment(str(save_dir), map_size=int(1e11))
|
||||
key_writer = key_path.open("w")
|
||||
with env.begin(write=True) as txt:
|
||||
for _, ligand_fn in tqdm(train_split):
|
||||
ligand_fn = ligand_fn.split("/")[-1]
|
||||
complex_key = ligand_fn.split(".")[0]
|
||||
protein_key = ligand_fn[:6]
|
||||
ligand_sdf_path = FILE_DIR / protein_key / ligand_fn
|
||||
pocket_pdb_path = FILE_DIR / protein_key / "pocket_15A.pdb"
|
||||
if ligand_sdf_path.is_file() is False or pocket_pdb_path.is_file() is False:
|
||||
continue
|
||||
try:
|
||||
lig_obj = LigandMol.from_sdf(ligand_sdf_path)
|
||||
poc_obj = ProteinPocket.from_pdb(pocket_pdb_path, infer_res_bonds=True, sanitize=True)
|
||||
complex_obj = PocketComplex(poc_obj, lig_obj)
|
||||
except:
|
||||
continue
|
||||
else:
|
||||
txt.put(complex_key.encode(), complex_obj.to_bytes())
|
||||
key_writer.write(complex_key + "\n")
|
||||
env.close()
|
||||
key_writer.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
52
data/scripts/a_catalog_to_smi.py
Normal file
52
data/scripts/a_catalog_to_smi.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import argparse
|
||||
import multiprocessing
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from a_refine_smi import get_clean_smiles
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
def main(block_path: str, save_block_path: str, num_cpus: int):
|
||||
block_file = Path(block_path)
|
||||
assert block_file.suffix == ".sdf"
|
||||
|
||||
print("Read SDF Files")
|
||||
with block_file.open() as f:
|
||||
lines = f.readlines()
|
||||
smiles_list = [lines[i].strip() for i in tqdm(range(1, len(lines))) if lines[i - 1].startswith("> <smiles>")]
|
||||
ids = [lines[i].strip() for i in tqdm(range(1, len(lines))) if lines[i - 1].startswith("> <id>")]
|
||||
|
||||
assert len(smiles_list) == len(ids), "sdf file error, number of <smiles> and <id> should be matched"
|
||||
print("Including Mols:", len(smiles_list))
|
||||
|
||||
print("Run Building Blocks...")
|
||||
clean_smiles_list = []
|
||||
for idx in tqdm(range(0, len(smiles_list), 10000)):
|
||||
chunk = smiles_list[idx : idx + 10000]
|
||||
with multiprocessing.Pool(num_cpus) as pool:
|
||||
results = pool.map(get_clean_smiles, chunk)
|
||||
clean_smiles_list.extend(results)
|
||||
|
||||
with open(save_block_path, "w") as w:
|
||||
for smiles, id in zip(clean_smiles_list, ids, strict=True):
|
||||
if smiles is not None:
|
||||
w.write(f"{smiles}\t{id}\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Get clean building blocks")
|
||||
parser.add_argument(
|
||||
"-b", "--building_block_path", type=str, help="Path to input enamine building block file (.sdf)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--out_path",
|
||||
type=str,
|
||||
help="Path to output smiles file",
|
||||
default="./building_blocks/enamine_catalog.smi",
|
||||
)
|
||||
parser.add_argument("--cpu", type=int, help="Num Workers", default=len(os.sched_getaffinity(0)))
|
||||
args = parser.parse_args()
|
||||
|
||||
main(args.building_block_path, args.out_path, args.cpu)
|
||||
113
data/scripts/a_refine_smi.py
Normal file
113
data/scripts/a_refine_smi.py
Normal file
@@ -0,0 +1,113 @@
|
||||
import argparse
|
||||
import multiprocessing
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem import BondType
|
||||
from tqdm import tqdm
|
||||
|
||||
ATOMS = ["B", "C", "N", "O", "F", "P", "S", "Cl", "Br", "I"]
|
||||
BONDS = [BondType.SINGLE, BondType.DOUBLE, BondType.TRIPLE, BondType.AROMATIC]
|
||||
|
||||
|
||||
def get_clean_smiles(smiles: str) -> str | None:
|
||||
if "[2H]" in smiles or "[13C]" in smiles:
|
||||
return None
|
||||
|
||||
# smi -> mol
|
||||
mol = Chem.MolFromSmiles(smiles, replacements={"[C]": "C", "[CH]": "C", "[CH2]": "C", "[N]": "N"})
|
||||
if mol is None:
|
||||
return None
|
||||
try:
|
||||
Chem.SanitizeMol(mol)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
# refine smi
|
||||
smi = Chem.MolToSmiles(mol)
|
||||
if smi is None:
|
||||
return None
|
||||
|
||||
fail = False
|
||||
mol = Chem.MolFromSmiles(smi)
|
||||
for atom in mol.GetAtoms():
|
||||
atom: Chem.Atom
|
||||
if atom.GetSymbol() not in ATOMS:
|
||||
fail = True
|
||||
break
|
||||
elif atom.GetIsotope() != 0:
|
||||
fail = True
|
||||
break
|
||||
if atom.GetFormalCharge() not in [-1, 0, 1]:
|
||||
fail = True
|
||||
break
|
||||
if atom.GetNumExplicitHs() not in [0, 1]:
|
||||
fail = True
|
||||
break
|
||||
if fail:
|
||||
return None
|
||||
|
||||
for bond in mol.GetBonds():
|
||||
if bond.GetBondType() not in BONDS:
|
||||
fail = True
|
||||
break
|
||||
if fail:
|
||||
return None
|
||||
|
||||
# return the largest fragment
|
||||
smis = smi.split(".")
|
||||
smi = max(smis, key=len)
|
||||
|
||||
return smi
|
||||
|
||||
|
||||
def main(
|
||||
block_path: str,
|
||||
save_block_path: str,
|
||||
num_cpus: int,
|
||||
):
|
||||
block_file = Path(block_path)
|
||||
assert block_file.suffix == ".smi"
|
||||
|
||||
print("Read SMI file")
|
||||
with block_file.open() as f:
|
||||
lines = f.readlines()[1:]
|
||||
smiles_list: list[str] = [ln.strip().split()[0] for ln in lines]
|
||||
ids: list[str] = [ln.strip().split()[1] for ln in lines]
|
||||
print("Including mols:", len(smiles_list))
|
||||
|
||||
print("Refine building blocks...")
|
||||
clean_smiles_list: list[str | None] = []
|
||||
for idx in tqdm(range(0, len(smiles_list), 10000)):
|
||||
chunk = smiles_list[idx : idx + 10000]
|
||||
with multiprocessing.Pool(num_cpus) as pool:
|
||||
results = pool.map(get_clean_smiles, chunk)
|
||||
clean_smiles_list.extend(results)
|
||||
clean_ids = [id for i, id in enumerate(ids) if clean_smiles_list[i] is not None]
|
||||
clean_smiles = [smi for smi in clean_smiles_list if smi is not None and len(smi) > 0]
|
||||
|
||||
with open(save_block_path, "w") as w:
|
||||
for id, smiles in zip(clean_ids, clean_smiles, strict=True):
|
||||
assert smiles is not None, "Clean SMILES should not be None"
|
||||
assert len(smiles) > 0, "Clean SMILES should not be empty"
|
||||
w.write(f"{smiles}\t{id}\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Get clean building blocks")
|
||||
# refine
|
||||
parser.add_argument(
|
||||
"-b", "--building_block_path", type=str, help="Path to input enamine building block file (.smi)", required=True
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--out_path",
|
||||
type=str,
|
||||
help="Path to output smiles file",
|
||||
default="./building_blocks/enamine_blocks.smi",
|
||||
)
|
||||
parser.add_argument("--cpu", type=int, help="Num Workers", default=len(os.sched_getaffinity(0)))
|
||||
args = parser.parse_args()
|
||||
|
||||
main(args.building_block_path, args.out_path, args.cpu)
|
||||
49
data/scripts/a_stock_to_smi.py
Normal file
49
data/scripts/a_stock_to_smi.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import argparse
|
||||
import multiprocessing
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from a_refine_smi import get_clean_smiles
|
||||
from rdkit import Chem
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
def main(block_path: str, save_block_path: str, num_cpus: int):
|
||||
block_file = Path(block_path)
|
||||
assert block_file.suffix == ".sdf"
|
||||
|
||||
print("Read SDF Files")
|
||||
mols = list(Chem.SDMolSupplier(str(block_file)))
|
||||
mols = [mol for mol in mols if mol is not None]
|
||||
ids = [mol.GetProp("Catalog_ID") for mol in mols]
|
||||
print("Including Mols:", len(mols))
|
||||
print("Run Building Blocks...")
|
||||
clean_smiles_list = []
|
||||
for idx in tqdm(range(0, len(mols), 10000)):
|
||||
chunk = [Chem.MolToSmiles(mol) for mol in mols[idx : idx + 10000]]
|
||||
with multiprocessing.Pool(num_cpus) as pool:
|
||||
results = pool.map(get_clean_smiles, chunk)
|
||||
clean_smiles_list.extend(results)
|
||||
|
||||
with open(save_block_path, "w") as w:
|
||||
for smiles, id in zip(clean_smiles_list, ids, strict=True):
|
||||
if smiles is not None:
|
||||
w.write(f"{smiles}\t{id}\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Get clean building blocks")
|
||||
parser.add_argument(
|
||||
"-b", "--building_block_path", type=str, help="Path to input enamine building block file (.sdf)", required=True
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--out_path",
|
||||
type=str,
|
||||
help="Path to output smiles file",
|
||||
default="./building_blocks/enamine_stock.smi",
|
||||
)
|
||||
parser.add_argument("--cpu", type=int, help="Num Workers", default=len(os.sched_getaffinity(0)))
|
||||
args = parser.parse_args()
|
||||
|
||||
main(args.building_block_path, args.out_path, args.cpu)
|
||||
71
data/scripts/b_druglike_filter.py
Normal file
71
data/scripts/b_druglike_filter.py
Normal file
@@ -0,0 +1,71 @@
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from druglikeness.deepdl import DeepDL
|
||||
|
||||
|
||||
def main(
|
||||
block_path: str,
|
||||
save_block_path: str,
|
||||
device: str = "cpu",
|
||||
threshold: float = 60.0,
|
||||
):
|
||||
block_file = Path(block_path)
|
||||
assert block_file.suffix == ".smi"
|
||||
|
||||
print("Read SMI file")
|
||||
with block_file.open() as f:
|
||||
lines = f.readlines()[1:]
|
||||
smiles_list: list[str] = [ln.strip().split()[0] for ln in lines]
|
||||
ids: list[str] = [ln.strip().split()[1] for ln in lines]
|
||||
print("Including mols:", len(smiles_list))
|
||||
|
||||
print("Initializing DeepDL model")
|
||||
model = DeepDL.from_pretrained("extended", device)
|
||||
batch_size = 256 if device == "cuda" else 64
|
||||
|
||||
print("Filtering molecules based on druglikeness")
|
||||
scores: list[float] = []
|
||||
for i in range(0, len(smiles_list), 100000):
|
||||
_chunk = smiles_list[i : i + 100000]
|
||||
print(f"Screening {i + len(_chunk)} / {len(smiles_list)} ...")
|
||||
scores += model.screening(_chunk, naive=True, batch_size=batch_size, verbose=True)
|
||||
num_pass = sum([v > threshold for v in scores])
|
||||
print(f"After druglikeness filtering: {num_pass} molecules remaining")
|
||||
|
||||
with open(save_block_path, "w") as w:
|
||||
for id, smiles, score in zip(ids, smiles_list, scores, strict=True):
|
||||
if score < threshold:
|
||||
continue
|
||||
assert smiles is not None, "Clean SMILES should not be None"
|
||||
assert len(smiles) > 0, "Clean SMILES should not be empty"
|
||||
w.write(f"{smiles}\t{id}\t{score:.2f}\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Get clean building blocks")
|
||||
# refine
|
||||
parser.add_argument(
|
||||
"-b",
|
||||
"--building_block_path",
|
||||
type=str,
|
||||
help="Path to input enamine building block file (.smi)",
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--out_path",
|
||||
type=str,
|
||||
help="Path to output smiles file",
|
||||
default="./building_blocks/druglike_blocks.smi",
|
||||
)
|
||||
parser.add_argument("--threshold", type=float, help="Druglikeness score threshold (0-100)", default=60)
|
||||
parser.add_argument("--cuda", action="store_true", help="Use cuda for druglikeness scoring")
|
||||
args = parser.parse_args()
|
||||
|
||||
main(
|
||||
args.building_block_path,
|
||||
args.out_path,
|
||||
"cuda" if args.cuda else "cpu",
|
||||
args.threshold,
|
||||
)
|
||||
209
data/scripts/c_create_env.py
Normal file
209
data/scripts/c_create_env.py
Normal file
@@ -0,0 +1,209 @@
|
||||
import argparse
|
||||
import functools
|
||||
import multiprocessing
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from omegaconf import DictConfig, OmegaConf
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem.rdChemReactions import ChemicalReaction, ReactionFromSmarts
|
||||
from tqdm import tqdm
|
||||
|
||||
from rxnflow.utils.featurization import get_block_features
|
||||
|
||||
|
||||
class Conversion:
|
||||
def __init__(self, info: DictConfig):
|
||||
self.key = info.key
|
||||
self.template = info.original + ">>" + info.convert
|
||||
self.rxn: ChemicalReaction = ReactionFromSmarts(self.template)
|
||||
self.rxn.Initialize()
|
||||
|
||||
def run(self, mol: Chem.Mol) -> list[Chem.Mol]:
|
||||
res = self.rxn.RunReactants((mol,), 10)
|
||||
return list([v[0] for v in res])
|
||||
|
||||
|
||||
def _run_reaction(smi: str, rxn: Conversion) -> list[str]:
|
||||
mol = Chem.MolFromSmiles(smi)
|
||||
prod_mols = rxn.run(mol)
|
||||
return list(set(Chem.MolToSmiles(mol) for mol in prod_mols))
|
||||
|
||||
|
||||
def get_block(env_dir: Path, block_file: Path, protocol_dir: Path, num_cpus: int):
|
||||
# load block
|
||||
with block_file.open() as f:
|
||||
lines = f.readlines()[1:]
|
||||
enamine_block_list: list[str] = [ln.split()[0] for ln in lines]
|
||||
enamine_id_list: list[str] = [ln.strip().split()[1] for ln in lines]
|
||||
|
||||
# run conversion
|
||||
block_dir = env_dir / "blocks/"
|
||||
block_dir.mkdir(parents=True, exist_ok=True)
|
||||
conversion_config = OmegaConf.load(protocol_dir / "reactant.yaml")
|
||||
for i in tqdm(range(len(conversion_config))):
|
||||
info_i = conversion_config[i]
|
||||
rxn = Conversion(info_i)
|
||||
_func = functools.partial(_run_reaction, rxn=rxn)
|
||||
with multiprocessing.Pool(num_cpus) as pool:
|
||||
res = pool.map(_func, enamine_block_list)
|
||||
del rxn, _func
|
||||
|
||||
brick_to_id: dict[str, list[str]] = {}
|
||||
for id, bricks in zip(enamine_id_list, res, strict=True):
|
||||
for smi in bricks:
|
||||
brick_to_id.setdefault(smi, []).append(id)
|
||||
if len(brick_to_id) == 0:
|
||||
continue
|
||||
with open(block_dir / f"{info_i.key}.smi", "w") as w:
|
||||
for smi, id_list in brick_to_id.items():
|
||||
w.write(f"{smi}\t{';'.join(sorted(id_list))}\n")
|
||||
|
||||
brick_list = list(brick_to_id.keys())
|
||||
for j in tqdm(range(i + 1, len(conversion_config)), leave=False):
|
||||
info_j = conversion_config[j]
|
||||
rxn = Conversion(info_j)
|
||||
_func = functools.partial(_run_reaction, rxn=rxn)
|
||||
with multiprocessing.Pool(num_cpus) as pool:
|
||||
res = pool.map(_func, brick_list)
|
||||
del rxn, _func
|
||||
|
||||
linker_to_id: dict[str, list[str]] = {}
|
||||
for brick, linkers in zip(brick_list, res, strict=True):
|
||||
for smi in linkers:
|
||||
linker_to_id.setdefault(smi, []).extend(brick_to_id[brick])
|
||||
if len(linker_to_id) == 0:
|
||||
continue
|
||||
with open(block_dir / f"{info_i.key}-{info_j.key}.smi", "w") as w:
|
||||
for smi, ids in linker_to_id.items():
|
||||
w.write(f"{smi}\t{';'.join(sorted(ids))}\n")
|
||||
|
||||
|
||||
def get_block_data(env_dir: Path, num_cpus: int):
|
||||
block_smi_dir = env_dir / "blocks/"
|
||||
save_block_data_path = env_dir / "bb_feature.pt"
|
||||
|
||||
data: dict[str, tuple[torch.Tensor, torch.Tensor]] = {}
|
||||
for smi_file in tqdm(list(block_smi_dir.iterdir())):
|
||||
with smi_file.open() as f:
|
||||
lines = f.readlines()
|
||||
if len(lines) == 0:
|
||||
continue
|
||||
smi_list = [ln.split()[0] for ln in lines]
|
||||
fp_list = []
|
||||
desc_list = []
|
||||
for idx in tqdm(range(0, len(smi_list), 10000), leave=False):
|
||||
chunk = smi_list[idx : idx + 10000]
|
||||
with multiprocessing.Pool(num_cpus) as pool:
|
||||
results = pool.map(get_block_features, chunk)
|
||||
for fp, desc in results:
|
||||
fp_list.append(fp)
|
||||
desc_list.append(desc)
|
||||
block_descs = torch.from_numpy(np.stack(desc_list, 0))
|
||||
block_fps = torch.from_numpy(np.stack(fp_list, 0))
|
||||
data[smi_file.stem] = (block_descs, block_fps)
|
||||
torch.save(data, save_block_data_path)
|
||||
|
||||
|
||||
def get_workflow(env_dir: Path, protocol_dir: Path):
|
||||
protocol_config = OmegaConf.load(protocol_dir / "protocol.yaml")
|
||||
save_workflow_path = env_dir / "workflow.yaml"
|
||||
|
||||
firstblock_protocols: dict[str, dict] = {}
|
||||
unirxn_protocols: dict[str, dict] = {}
|
||||
birxn_protocols: dict[str, dict] = {}
|
||||
workflow_config = {"FirstBlock": firstblock_protocols, "UniRxn": unirxn_protocols, "BiRxn": birxn_protocols}
|
||||
|
||||
# firstblock
|
||||
pattern_to_types: dict[int, list[str]] = {}
|
||||
for block_file in Path(env_dir / "blocks/").iterdir():
|
||||
block_type = block_file.stem
|
||||
protocol_name = "block" + block_type
|
||||
with block_file.open() as f:
|
||||
if len(f.readline()) == 0:
|
||||
continue
|
||||
for pattern in map(int, block_type.split("-")):
|
||||
pattern_to_types.setdefault(pattern, []).append(block_type)
|
||||
# TODO: Remove here, currently, only brick for firstblock
|
||||
if "-" in block_type:
|
||||
continue
|
||||
firstblock_protocols[protocol_name] = {"block_types": [block_type]}
|
||||
|
||||
# remove redundant items
|
||||
pattern_to_types = {k: sorted(list(set(v))) for k, v in pattern_to_types.items()}
|
||||
pattern_dict = {
|
||||
pattern: {
|
||||
"brick": [t for t in block_types if ("-" not in t)],
|
||||
"linker": [t for t in block_types if ("-" in t)],
|
||||
}
|
||||
for pattern, block_types in pattern_to_types.items()
|
||||
}
|
||||
|
||||
# birxn (no unirxn)
|
||||
for rxn_name, cfg in protocol_config.items():
|
||||
rxn_name = str(rxn_name)
|
||||
if cfg.ordered:
|
||||
block_orders = [0, 1]
|
||||
else:
|
||||
assert cfg.block_type[0] == cfg.block_type[1]
|
||||
block_orders = [0]
|
||||
|
||||
for order in block_orders:
|
||||
is_block_first = order == 0
|
||||
state_pattern = cfg.block_type[1 - order]
|
||||
block_pattern = cfg.block_type[order]
|
||||
for t in ["brick", "linker"]:
|
||||
protocol_name = rxn_name + f"_{t}_" + ("b0" if is_block_first else "b1")
|
||||
block_keys = pattern_dict[block_pattern][t]
|
||||
if len(block_keys) > 0:
|
||||
birxn_protocols[protocol_name] = {
|
||||
"forward": cfg.forward,
|
||||
"reverse": cfg.reverse,
|
||||
"is_block_first": order == 0,
|
||||
"state_pattern": state_pattern,
|
||||
"block_types": block_keys,
|
||||
}
|
||||
OmegaConf.save(workflow_config, save_workflow_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Create environment")
|
||||
parser.add_argument(
|
||||
"-b",
|
||||
"--building_block_path",
|
||||
type=Path,
|
||||
help="Path of input building block smiles file",
|
||||
default="./building_blocks/enamine_stock.smi",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-p",
|
||||
"--protocol_dir",
|
||||
type=Path,
|
||||
help="Path of input synthesis protocol directory",
|
||||
default="./template/real/",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--env_dir",
|
||||
type=Path,
|
||||
help="Path of output environment directory",
|
||||
default="./envs/stock/",
|
||||
)
|
||||
parser.add_argument("--cpu", type=int, help="Num Workers", default=len(os.sched_getaffinity(0)))
|
||||
args = parser.parse_args()
|
||||
|
||||
env_dir: Path = args.env_dir
|
||||
protocol_dir: Path = args.protocol_dir
|
||||
block_file: Path = args.building_block_path
|
||||
num_cpus: int = args.cpu
|
||||
|
||||
assert not env_dir.exists()
|
||||
|
||||
print("convert building blocks to ready-to-compose fragments")
|
||||
get_block(env_dir, block_file, protocol_dir, num_cpus)
|
||||
print("pre-calculate building block features")
|
||||
get_block_data(env_dir, num_cpus)
|
||||
print("create workflow")
|
||||
get_workflow(env_dir, protocol_dir)
|
||||
68
data/scripts/estimate_search_space_size.py
Normal file
68
data/scripts/estimate_search_space_size.py
Normal file
@@ -0,0 +1,68 @@
|
||||
from pathlib import Path
|
||||
|
||||
from omegaconf import OmegaConf
|
||||
|
||||
nblocks: dict[str, int] = {}
|
||||
for path in Path("./envs/stock/blocks").iterdir():
|
||||
with path.open() as f:
|
||||
nblocks[path.stem] = len(f.readlines())
|
||||
|
||||
workflow = OmegaConf.load("./envs/stock/workflow.yaml")
|
||||
birxns = workflow["BiRxn"]
|
||||
|
||||
# nstep 2
|
||||
count_2 = []
|
||||
reactant_2 = {}
|
||||
for bt in range(1, 33):
|
||||
n = nblocks[str(bt)]
|
||||
n_reactants = 0
|
||||
for k, v in birxns.items():
|
||||
if bt == v["state_pattern"] and "brick" in k:
|
||||
assert len(v["block_types"]) == 1
|
||||
brick_pattern = v["block_types"][0]
|
||||
n_reactants += nblocks[brick_pattern]
|
||||
reactant_2[bt] = n_reactants
|
||||
count_2.append(n * n_reactants)
|
||||
print(sum(count_2) // 2)
|
||||
|
||||
# nstep 3
|
||||
count_3 = []
|
||||
reactant_3 = {}
|
||||
for bt in range(1, 33):
|
||||
n = nblocks[str(bt)]
|
||||
n_reactants = 0
|
||||
for k, v in birxns.items():
|
||||
if not (bt == v["state_pattern"] and "linker" in k):
|
||||
continue
|
||||
_bt_rxn = birxns[k.replace("linker", "brick")]["block_types"][0]
|
||||
for linker_pattern in v["block_types"]:
|
||||
_n_linkers = nblocks[linker_pattern]
|
||||
_bts = linker_pattern.split("-")
|
||||
assert _bt_rxn in _bts
|
||||
_bt_remain = _bts[1] if _bts[0] == _bt_rxn else _bts[0]
|
||||
_n_reactants = reactant_2[bt]
|
||||
n_reactants += _n_linkers * _n_reactants
|
||||
reactant_3[bt] = n_reactants
|
||||
count_3.append(n * n_reactants)
|
||||
print(sum(count_3) // 6)
|
||||
|
||||
# nstep 4
|
||||
count_4 = []
|
||||
reactant_4 = {}
|
||||
for bt in range(1, 33):
|
||||
n = nblocks[str(bt)]
|
||||
n_reactants = 0
|
||||
for k, v in birxns.items():
|
||||
if not (bt == v["state_pattern"] and "linker" in k):
|
||||
continue
|
||||
_bt_rxn = birxns[k.replace("linker", "brick")]["block_types"][0]
|
||||
for linker_pattern in v["block_types"]:
|
||||
_n_linkers = nblocks[linker_pattern]
|
||||
_bts = linker_pattern.split("-")
|
||||
assert _bt_rxn in _bts
|
||||
_bt_remain = _bts[1] if _bts[0] == _bt_rxn else _bts[0]
|
||||
_n_reactants = reactant_3[bt]
|
||||
n_reactants += _n_linkers * _n_reactants
|
||||
reactant_4[bt] = n_reactants
|
||||
count_4.append(n * n_reactants)
|
||||
print(sum(count_4) // 24)
|
||||
265
data/template/real/protocol.yaml
Normal file
265
data/template/real/protocol.yaml
Normal file
@@ -0,0 +1,265 @@
|
||||
rxn1:
|
||||
reaction: "[NX3;!H0;$(N[#6]);!$(N[#7]);!$(N[C,S]=[N,O,S]):1].[#6:4]C(=O)[OH,O-]>>[N:1]C(=O)[#6:4]"
|
||||
forward: "[#7:1]-[1*].[#6:2]-[3*]>>[#7:1]-C(=O)-[#6:2]"
|
||||
reverse: "[N;$(N[#6][#6]);!$(N[#7]);!$(N(-C(=O))[C,S]=[N,O,S]):1]-C(=O)-[#6:2]>>[#7:1]-[1*].[#6:2]-[3*]"
|
||||
block_type: [1, 3]
|
||||
ordered: true
|
||||
|
||||
rxn2:
|
||||
reaction: "[NX3;!H0;$(N[#6]);!$(N[#7]);!$(N[C,S]=[N,O,S]):4].[#6:1][C:2](=[O:3])[F,Cl,Br,O;$(O[CH3]),$(O[CH2][CH3]),$(O[CH2][C](F)(F)F)]>>[#6:1][C:2](=[O:3])[N:4]"
|
||||
forward: "[#7:1]-[1*].[#6:2]-[5*]>>[#7:1]-C(=O)-[#6:2]"
|
||||
reverse: "[N;$(N[#6][#6]);!$(N[#7]);!$(N(-C(=O))[C,S]=[N,O,S]):1]C(=O)[#6:2]>>[#7:1]-[1*].[#6:2]-[5*]"
|
||||
block_type: [1, 5]
|
||||
ordered: true
|
||||
|
||||
rxn3:
|
||||
reaction: "[NX3;!H0;$(N[#6]);!$(N[#7]);!$(N[C,S]=[N,O,S]):1].[#6:2][Cl,Br,I]>>[N:1]-[#6:2]"
|
||||
forward: "[#7:1]-[1*].[#6:2]-[8*]>>[#7:1]-[#6:2]"
|
||||
reverse: "[N;$(N[#6][#6]);!$(N[#7]);!$(N(-C(=[N,O,S]))[C,S]=[N,O,S]):1]-[#6:2]>>[#7:1]-[1*].[#6:2]-[8*]"
|
||||
block_type: [1, 8]
|
||||
ordered: true
|
||||
|
||||
rxn4:
|
||||
reaction: "[NX3;!H0;$(N[#6]);!$(N[#7]);!$(N[C,S]=[N,O,S]):1].[#6:2]S(=O)(=O)[F,Cl,Br,I]>>[N:1]S(=O)(=O)[#6:2]"
|
||||
forward: "[#7:1]-[1*].[#6:2]-[13*]>>[#7:1]-S(=O)(=O)-[#6:2]"
|
||||
reverse: "[N;$(N[#6][#6]);!$(N[#7]);!$(N(-C(=[N,O,S]))[C,S]=[N,O,S]):1][SX4;$(S(=O)(=O))][#6:2]>>[#7:1]-[1*].[#6:2]-[13*]"
|
||||
block_type: [1, 13]
|
||||
ordered: true
|
||||
|
||||
rxn5:
|
||||
reaction: "[NX3;!H0;$(N[#6]);!$(N[#7]);!$(N[C,S]=[N,O,S]):1].[C;!$(C=[N,O,S]):2]-[NH2]>>[N:1]C(=O)N[C:2]"
|
||||
forward: "[#7:1]-[1*].[#6:2]-[14*]>>[#7:1]-C(=O)N-[#6:2]"
|
||||
reverse: "[N;$(N[#6][#6]);!$(N[#7]);!$(N(-C(=O))[C,S]=[N,O,S]):1]-C(=O)[NH1+0]-[C;!$(C=[N,O,S]):2]>>[#7:1]-[1*].[#6:2]-[14*]"
|
||||
block_type: [1, 14]
|
||||
ordered: true
|
||||
|
||||
rxn6:
|
||||
reaction: "[NX3;!H0;$(N[#6]);!$(N[#7]);!$(N[C,S]=[N,O,S]):1].[NX3;!H0;$(N[#6]);!$(N[#7]);!$(N[C,S]=[N,O,S]):2]>>[N:1]C(=O)C(=O)[N:2]"
|
||||
forward: "[#7:1]-[1*].[#7:2]-[1*]>>[#7:1]-C(=O)C(=O)-[#7:2]"
|
||||
reverse: "[N;$(N[#6][#6]);!$(N[#7]);!$(N(-C(=O))[C,S]=[N,O,S]):1]-C(=O)C(=O)-[N;$(N[#6][#6]);!$(N[#7]);!$(N(-C(=O))[C,S]=[N,O,S]):2]>>[#7:1]-[1*].[#7:2]-[1*]"
|
||||
block_type: [1, 1]
|
||||
ordered: false
|
||||
|
||||
rxn7:
|
||||
reaction: "[NX3;!H0;$(N[#6]);!$(N[#7]);!$(N[C,S]=[N,O,S]):1].[#6:3][CH]=O>>[N:1]C[#6:3]"
|
||||
forward: "[#7:1]-[1*].[#6:2]-[4*]>>[#7:1]-C-[#6:2]"
|
||||
reverse: "[N;$(N[#6][#6]);!$(N[#7]);!$(N(-C(=O))[C,S]=[N,O,S]):1]-[CH2]-[#6:2]>>[#7:1]-[1*].[#6:2]-[4*]"
|
||||
block_type: [1, 4]
|
||||
ordered: true
|
||||
|
||||
rxn8:
|
||||
reaction: "[NX3;!H0;$(N[#6]);!$(N[#7]);!$(N[C,S]=[N,O,S]):1].[#6:3][C:2](=O)[#6:4]>>[N:1][C:2]([#6:3])[#6:4]"
|
||||
forward: "[#7:1]-[1*].[CH0:2]=[7*]>>[#7:1]-[CH1:2]"
|
||||
reverse: "[NX3;$(N[#6]);!$(N[#7]);!$(N[C,S]=[N,O,S]):1]-[CH1;$(C([#6])[#6]):2]>>[#7:1]-[1*].[CH0:2]=[7*]"
|
||||
block_type: [1, 7]
|
||||
ordered: true
|
||||
|
||||
rxn9:
|
||||
reaction: "[NX3;!H0;$(N[#6]);!$(N[#7]);!$(N[C,S]=[N,O,S]):1].[C:2]1[O:4][C:3]1>>[N:1][C:2][C:3][OH:4]"
|
||||
forward: "[#7:1]-[1*].[#6:2]-[20*]>>[#7:1]-[#6:2]"
|
||||
reverse: "[NX3;$(N[#6]);!$(N[#7]);!$(N[C,S]=[N,O,S]):1]-[C;$(CC[OH]):2]>>[#7:1]-[1*].[#6:2]-[20*]"
|
||||
block_type: [1, 20]
|
||||
ordered: true
|
||||
|
||||
rxn10:
|
||||
reaction: "[NX3;!H0;$(N[#6]);!$(N[#7]);!$(N[C,S]=[N,O,S]):1].[C:4]=[C:5][$(C=O),$(C#N),$(S=O),$([N+](=O)[O-]):6]>>[N:1][C:4][C:5][*:6]"
|
||||
forward: "[#7:1]-[1*].[#6:2]-[26*]>>[#7:1]-[#6:2]"
|
||||
reverse: "[NX3;$(N[#6]);!$(N[#7]);!$(N[C,S]=[N,O,S]):1][C$(CC[$(C=O),$(C#N),$(S=O),$([N+](=O)[O-])]):2]>>[#7:1]-[1*].[#6:2]-[26*]"
|
||||
block_type: [1, 26]
|
||||
ordered: true
|
||||
|
||||
rxn11:
|
||||
reaction: "[NX3;!H0;!$(N[C,S]=[N,O,S]):1][c:2][c:3][C:4](=[O:5])[NX3;!H0:6].[#6:7][CH]=O>>[N:1]1[c:2][c:3][C:4](=[O:5])[N:6]C1-[#6:7]"
|
||||
forward: "[C:1]=[28*].[#6:2]-[4*]>>[CH1:1]-[#6:2]"
|
||||
reverse: "[CH1;$(C1NC(=O)cc[NX3;!$(N[C,S]=[N,O,S])]1):1]-[#6:2]>>[#6:1]=[28*].[#6:2]-[4*]"
|
||||
block_type: [28, 4]
|
||||
ordered: true
|
||||
|
||||
rxn12:
|
||||
reaction: "[C;!$(C=[N,O,S]):1]-[Cl,$(OS(=O)(=O)[#6;!R])].[C;!$(C=[N,O,S]):2]-[Cl,$(OS(=O)(=O)[#6;!R])]>>[#6:1]S(=O)(=O)[#6:2]"
|
||||
forward: "[#6:1]-[18*].[#6:2]-[18*]>>[#6:1]S(=O)(=O)[#6:2]"
|
||||
reverse: "[C;!$(C=[N,O,S]):1]-[SX4;$(S(=O)(=O))]-[C;!$(C=[N,O,S]):2]>>[#6:1]-[18*].[#6:2]-[18*]"
|
||||
block_type: [18, 18]
|
||||
ordered: false
|
||||
|
||||
rxn13:
|
||||
reaction: "[C;!$(C=[N,O,S]):1]-[Cl,$(OS(=O)(=O)[#6;!R])].[C;!$(C=[N,O,S]):2]-[SH]>>[#6:1]S(=O)(=O)[#6:2]"
|
||||
forward: "[#6:1]-[18*].[#6:2]-[16*]>>[#6:1]S(=O)(=O)[#6:2]"
|
||||
reverse: "[C;!$(C=[N,O,S]):1]-[SX4;$(S(=O)(=O))]-[C;!$(C=[N,O,S]):2]>>[#6:1]-[18*].[#6:2]-[16*]"
|
||||
block_type: [18, 16]
|
||||
ordered: true
|
||||
|
||||
rxn14:
|
||||
reaction: "[C;!$(C=[N,O,S]):1]-[Cl,$(OS(=O)(=O)[#6;!R])].[C;!$(C=[N,O,S]):2]-[SH]>>[#6:1]S(=O)[#6:2]"
|
||||
forward: "[#6:1]-[18*].[#6:2]-[16*]>>[#6:1]S(=O)[#6:2]"
|
||||
reverse: "[C;!$(C=[N,O,S]):1]-[SX3;$(S=O)]-[C;!$(C=[N,O,S]):2]>>[#6:1]-[18*].[#6:2]-[16*]"
|
||||
block_type: [18, 16]
|
||||
ordered: true
|
||||
|
||||
rxn15:
|
||||
reaction: "[C;!$(C=[N,O,S]):1]-[Cl,Br,I].[C;!$(C=[N,O,S]):2]-[Cl,Br,I]>>[#6:1]S[#6:2]"
|
||||
forward: "[#6:1]-[17*].[#6:2]-[17*]>>[#6:1]-S-[#6:2]"
|
||||
reverse: "[C;!$(C=[N,O,S]):1]-[SX2]-[C;!$(C=[N,O,S]):2]>>[#6:1]-[17*].[#6:2]-[17*]"
|
||||
block_type: [17, 17]
|
||||
ordered: true
|
||||
|
||||
rxn16:
|
||||
reaction: "[C;!$(C=[N,O,S]):1]-[Cl,Br,I].[#6:2]-C(=O)[OH,O-]>>[#6:2]C(=O)O[C:1]"
|
||||
forward: "[#6:1]-[17*].[#6:2]-[3*]>>[#6:1]-OC(=O)-[#6:2]"
|
||||
reverse: "[C;!$(C=[N,O,S]):1]-OC(=O)-[#6:2]>>[#6:1]-[17*].[#6:2]-[3*]"
|
||||
block_type: [17, 3]
|
||||
ordered: true
|
||||
|
||||
rxn17:
|
||||
reaction: "[C;!$(C=[N,O,S]):1]-[OH].[#6:2]-C(=O)[OH,O-]>>[#6:2]C(=O)O[C:1]"
|
||||
forward: "[#6:1]-[15*].[#6:2]-[3*]>>[#6:1]-OC(=O)-[#6:2]"
|
||||
reverse: "[C;!$(C=[N,O,S]):1]-OC(=O)-[#6:2]>>[#6:1]-[15*].[#6:2]-[3*]"
|
||||
block_type: [15, 3]
|
||||
ordered: true
|
||||
|
||||
rxn18:
|
||||
reaction: "[C;!$(C=[N,O,S]):1]-[OH].[C;!$(C=[N,O,S]):2][Cl,Br,I]>>[#6:1]-O-[#6:2]"
|
||||
forward: "[#6:1]-[15*].[#6:2]-[17*]>>[#6:1]-O-[#6:2]"
|
||||
reverse: "[C;!$(C=[N,O,S]):1]-O-[C;!$(C=[N,O,S]):2]>>[#6:1]-[15*].[#6:2]-[17*]"
|
||||
block_type: [15, 17]
|
||||
ordered: true
|
||||
|
||||
rxn19:
|
||||
reaction: "[C;!$(C=[N,O,S]):1]-[SH].[C;!$(C=[N,O,S]):2][Cl,Br,I]>>[#6:1][S][#6:2]"
|
||||
forward: "[#6:1]-[16*].[#6:2]-[17*]>>[#6:1]-S-[#6:2]"
|
||||
reverse: "[C;!$(C=[N,O,S]):1]-[SX2]-[C;!$(C=[N,O,S]):2]>>[#6:1]-[16*].[#6:2]-[17*]"
|
||||
block_type: [16, 17]
|
||||
ordered: true
|
||||
|
||||
rxn20:
|
||||
reaction: "[C;!$(C=[N,O,S]):5]-[NH2].[NX3;$([NH]([#6])[C]),$([NH2][C]):1][C:2]C(=O)O[C;!R]>>[N:1]1[C:2]C(=O)N([C:5])C(=O)1"
|
||||
forward: "[#6:1]-[14*].[#7:2]-[27*]>>[#6:1]-[#7:2]"
|
||||
reverse: "[C;!$(C=[N,O,S]):1]-[N;$(N1C(=O)[NX3;!$(N[!#6])]CC(=O)1):2]>>[#6:1]-[14*].[#7:2]-[27*]"
|
||||
block_type: [14, 27]
|
||||
ordered: true
|
||||
|
||||
rxn21:
|
||||
reaction: "[C;!$(C=[N,O,S]):1]-[NH2].[C;!$(C=[N,O,S]):3]-[NH2]>>[#6:1][NH]c1ncnc2c1nc[nH0]2[#6:3]"
|
||||
forward: "[#6:1]-[14*].[#6:2]-[14*]>>[#6:1][NH]c1ncnc2c1nc[nH0]2[#6:2]"
|
||||
reverse: "[C;!$(C=[N,O,S]):1]-[NH]-[cH0]1[nH0][cH1][nH0]c2c1[nH0][cH1][nH0]2-[C;!$(C=[N,O,S]):2]>>[#6:1]-[14*].[#6:2]-[14*]"
|
||||
block_type: [14, 14]
|
||||
ordered: true
|
||||
|
||||
rxn22:
|
||||
reaction: "[C;!$(C=[N,O,S]):1]-[NH2].[NH2:3][C;!$(C=[N,O,S]):4][C:5][C:6](=[O:7])O>>[C:1]N1[C:6](=[O:7])[C:5][C:4][NH:3]C(=O)1"
|
||||
forward: "[#6:1]-[14*].[#7:2]-[29*]>>[#6:1]-[#7:2]"
|
||||
reverse: "[C;!$(C=[N,O,S]):1]-[N;$(N1C(=O)C[C;!$(C=[N,O,S])][NH]C(=O)1):2]>>[#6:1]-[14*].[#7:2]-[29*]"
|
||||
block_type: [14, 29]
|
||||
ordered: true
|
||||
|
||||
rxn23:
|
||||
reaction: "[C;!$(C=[N,O,S]):6]-[NH2].[NH2:1][c:2][c:3][C:4](=[O:5])OC>>[NH:1]1[c:2][c:3][C:4](=[O:5])N([C:6])C1=O"
|
||||
forward: "[#6:1]-[14*].[#7:2]-[32*]>>[#6:1]-[#7:2]"
|
||||
reverse: "[C;!$(C=[N,O,S]):1]-[#7;$(n1c(=O)cc[nH]c(=O)1):2]>>[#6:1]-[14*].[#7:2]-[32*]"
|
||||
block_type: [14, 32]
|
||||
ordered: true
|
||||
|
||||
rxn24:
|
||||
reaction: "[#6:1][CH]=O.[CH3,CH2&$(C([#6])[#6]):2]-[$(C=O),$(C#N),$(S=O),$([N+](=O)[O-]):3]>>[*:3][C:2]=[CH][#6:1]"
|
||||
forward: "[#6:1]-[4*].[#6:2]=[24*]>>[#6:1]-[CH]=[#6:2]"
|
||||
reverse: "[#6:1]-[CH]=[CH1,CH0&$(C(-[#6])-[#6]):2]-[$(C=O),$(C#N),$(S=O),$([N+](=O)[O-]):3]>>[#6:1]-[4*].[*:3]-[#6:2]=[24*]"
|
||||
block_type: [4, 24]
|
||||
ordered: true
|
||||
|
||||
rxn25:
|
||||
reaction: "[#6:1][CH]=O.[CH2:2]([$(C=O),$(C#N),$(S=O),$([N+](=O)[O-]):3])[$(C=O),$(C#N),$(S=O),$([N+](=O)[O-]):4]>>[*:3][C:2]([*:4])=[CH][#6:1]"
|
||||
forward: "[#6:1]-[4*].[#6:2]=[25*]>>[#6:1]-[CH]=[#6:2]"
|
||||
reverse: "[#6:1]-[CH]=[C:2](-[$(C=O),$(C#N),$(S=O),$([N+](=O)[O-]):3])-[$(C=O),$(C#N),$(S=O),$([N+](=O)[O-]):4]>>[#6:1]-[4*].[25*]=[#6:2](-[*:3])-[*:4]"
|
||||
block_type: [4, 25]
|
||||
ordered: true
|
||||
|
||||
rxn26:
|
||||
reaction: "[#6:1][CH]=O.[#6:2]-[NH][NH2]>>[#6:1][CH]=NN[#6:2]"
|
||||
forward: "[#6:1]-[4*].[#6:2]-[9*]>>[#6:1][CH]=NN[#6:2]"
|
||||
reverse: "[#6:1]-[CH]=[NX2][NH1+0]-[#6:2]>>[#6:1]-[4*].[#6:2]-[9*]"
|
||||
block_type: [4, 9]
|
||||
ordered: true
|
||||
|
||||
rxn27:
|
||||
reaction: "[#6:1][CH]=O.[NH2:3][c:4][c:5][NH2,NH,SH,OH:6]>>[#6:1]c1[nH0+0:3][c:4][c:5][*:6]1"
|
||||
forward: "[#6:1]-[4*].[#6:2]-[30*]>>[#6:1]-[#6:2]"
|
||||
reverse: "[#6:1]-[c;$(c1[nH0+0][cX3;!$(c-*)][cX3;!$(c-*)][n,s,o]1):2]>>[#6:1]-[4*].[#6:2]-[30*]"
|
||||
block_type: [4, 30]
|
||||
ordered: true
|
||||
|
||||
rxn28:
|
||||
reaction: "[#6:1]C(=O)[OH,O-].[NH2:3][c:4][c:5][NH2,NH,SH,OH:6]>>[#6:1]c1[nH0+0:3][c:4][c:5][*:6]1"
|
||||
forward: "[#6:1]-[3*].[#6:2]-[30*]>>[#6:1]-[#6:2]"
|
||||
reverse: "[#6:1]-[c;$(c1[nH0+0][cX3;!$(c-*)][cX3;!$(c-*)][n,s,o]1):2]>>[#6:1]-[3*].[#6:2]-[30*]"
|
||||
block_type: [3, 30]
|
||||
ordered: true
|
||||
|
||||
rxn29:
|
||||
reaction: "[#6:1]C(=O)[OH,O-].[#6:2]C(=O)[NH][NH2]>>[#6:2]c1[nH0][nH0]c([#6:1])[oH0]1"
|
||||
forward: "[#6:1]-[3*].[#6:2]-[6*]>>[#6:2]c1[nH0][nH0]c([#6:1])[oH0]1"
|
||||
reverse: "[#6:2]-c1[nH0][nH0]c(-[#6:1])[oH0]1>>[#6:1]-[3*].[#6:2]-[6*]"
|
||||
block_type: [3, 6]
|
||||
ordered: true
|
||||
|
||||
rxn30:
|
||||
reaction: "[c:1]-[OH].[C:2]1[O:4][C:3]1>>[#6:1]-O[C:2][C:3][OH:4]"
|
||||
forward: "[#6:1]-[22*].[#6:2]-[20*]>>[#6:1]-O-[#6:2]"
|
||||
reverse: "[c:1]-O-[C$(CC[OH]):2]>>[#6:1]-[22*].[#6:2]-[20*]"
|
||||
block_type: [22, 20]
|
||||
ordered: true
|
||||
|
||||
rxn31:
|
||||
reaction: "[c:1]-[OH].[C:4]=[C:5][$(C=O),$(C#N),$(S=O),$([N+](=O)[O-]):6]>>[#6:1]-O[C:4][C:5][*:6]"
|
||||
forward: "[#6:1]-[22*].[#6:2]-[26*]>>[#6:1]-O-[#6:2]"
|
||||
reverse: "[c:1]-O-[C:2]-[C;$(C[$(C=O),$(C#N),$(S=O),$([N+](=O)[O-])]):3]>>[#6:1]-[22*].[#6:3]-[#6:2]-[26*]"
|
||||
block_type: [22, 26]
|
||||
ordered: true
|
||||
|
||||
rxn32:
|
||||
reaction: "[#6:1]-C#N.[NH2:4][c:5][c:6][C:7](=[O:8])OC>>[#6:1]C1=[N:4][c:5][c:6][C:7](=[O:8])N1"
|
||||
forward: "[#6:1]-[11*].[#6:2]-[31*]>>[#6:1]-[#6:2]"
|
||||
reverse: "[#6:1]-[#6;$(c1n[c;!$(c-[*])][c;!$(c-[*])]c(=O)[nH1+0]1):2]>>[#6:1]-[11*].[#6:2]-[31*]"
|
||||
block_type: [11, 31]
|
||||
ordered: true
|
||||
|
||||
rxn33:
|
||||
reaction: "[#6:1]-C#N.[#6:2]-C(=O)[OH,O-]>>[#6:1]-c1[nH0+0][oH0+0]c([#6:2])[nH0]1"
|
||||
forward: "[#6:1]-[11*].[#6:2]-[3*]>>[#6:1]-c1[nH0+0][oH0+0]c(-[#6:2])[nH0]1"
|
||||
reverse: "[#6:1]-c1[nH0+0][oH0+0]c(-[#6:2])[nH0]1>>[#6:1]-[11*].[#6:2]-[3*]"
|
||||
block_type: [11, 3]
|
||||
ordered: true
|
||||
|
||||
rxn34:
|
||||
reaction: "[c:1]-B(O)O.[c:2]-[Cl,Br,I]>>[c:1]-[c:2]"
|
||||
forward: "[#6:1]-[23*].[#6:2]-[21*]>>[#6:1]-[#6:2]"
|
||||
reverse: "[c:1]-[c:2]>>[c:1]-[23*].[c:2]-[21*]"
|
||||
block_type: [23, 21]
|
||||
ordered: true
|
||||
|
||||
rxn35:
|
||||
reaction: "[#6:1][C:2](=O)[#6:3].[#6:4]-[OH,nH,NH:5]>>[#6:4][*:5][C:2]([#6:1])([#6:3])C(=O)O"
|
||||
forward: "[CH0:1]=[7*].[*:2]-[2*]>>[*:2]-[C:1]-C(=O)O"
|
||||
reverse: "[#6:3][O,#7:2]-[C;$(C([#6])[#6]):1]-C(=O)[OH]>>[CH0:1]=[7*].[#6:3]-[*:2]-[2*]"
|
||||
block_type: [7, 2]
|
||||
ordered: true
|
||||
|
||||
rxn36:
|
||||
reaction: "[CH1:1]-[Cl,Br,I].[#6:2]-C#[CH]>>[#6:2]c1c[nH0+0](-[CH1:1])[nH0+0][nH0+0]1"
|
||||
forward: "[#6:1]-[19*].[#6:2]-[12*]>>[#6:2]-c1c[nH0](-[#6:1])[nH0][nH0]1"
|
||||
reverse: "[#6:2]-c1[cH1][nH0](-[#6:1])[nH0][nH0]1>>[#6:1]-[19*].[#6:2]-[12*]"
|
||||
block_type: [19, 12]
|
||||
ordered: true
|
||||
|
||||
rxn37:
|
||||
reaction: "[#6:1]-N=[N+]=[N-].[#6:2]-C#[CH]>>[#6:2]-c1c[nH0]([#6:1])[nH0][nH0]1"
|
||||
forward: "[#6:1]-[10*].[#6:2]-[12*]>>[#6:2]c1c[nH0]([#6:1])[nH0][nH0]1"
|
||||
reverse: "[#6:2]c1[cH1][nH0]([#6:1])[nH0][nH0]1>>[#6:1]-[10*].[#6:2]-[12*]"
|
||||
block_type: [10, 12]
|
||||
ordered: true
|
||||
|
||||
rxn38:
|
||||
reaction: "[#6:1]-N=[N+]=[N-].[C;!$(C=[N,O,S]):2]-[NH2]>>[#6:2]-[NH1][CH2][cH0+0]1[cH1+0][nH0]([#6:1])[nH0][nH0]1"
|
||||
forward: "[#6:1]-[10*].[#6:2]-[14*]>>[#6:2]-[NH1][CH2][cH0+0]1[cH1+0][nH0](-[#6:1])[nH0][nH0]1"
|
||||
reverse: "[#6:2]-[NH1][CH2][cH0+0]1[cH1+0][nH0](-[#6:1])[nH0][nH0]1>>[#6:1]-[10*].[#6:2]-[14*]"
|
||||
block_type: [10, 14]
|
||||
ordered: true
|
||||
96
data/template/real/reactant.yaml
Normal file
96
data/template/real/reactant.yaml
Normal file
@@ -0,0 +1,96 @@
|
||||
- key: 1
|
||||
original: "[NX3;!H0;$(N[#6]);!$(N[#7]);!$(N[C,S]=[N,O,S]):1]"
|
||||
convert: "[#7:1]-[1*]"
|
||||
- key: 2
|
||||
original: "[#6:1]-[OH,nH,NH:2]"
|
||||
convert: "[#6:1]-[*:2]-[2*]"
|
||||
- key: 3
|
||||
original: "[#6:1]-C(=O)[OH,O-]"
|
||||
convert: "[#6:1]-[3*]"
|
||||
- key: 4
|
||||
original: "[#6:1]-[CH]=O"
|
||||
convert: "[#6:1]-[4*]"
|
||||
- key: 5
|
||||
original: "[#6:1]-C(=O)-[F,Cl,Br,O;$(O[CH3]),$(O[CH2][CH3]),$(O[CH2][C](F)(F)F)]"
|
||||
convert: "[#6:1]-[5*]"
|
||||
- key: 6
|
||||
original: "[#6:1]-C(=O)[NH][NH2]"
|
||||
convert: "[#6:1]-[6*]"
|
||||
- key: 7
|
||||
original: "[#6:1]-[C:2](=O)[#6:3]"
|
||||
convert: "[CH0:2](-[#6:1])(-[#6:3])=[7*]"
|
||||
- key: 8
|
||||
original: "[#6:1]-[Cl,Br,I]"
|
||||
convert: "[#6:1]-[8*]"
|
||||
- key: 9
|
||||
original: "[#6:1]-[NH][NH2]"
|
||||
convert: "[#6:1]-[9*]"
|
||||
- key: 10
|
||||
original: "[#6:1]-N=[N+]=[N-]"
|
||||
convert: "[#6:1]-[10*]"
|
||||
- key: 11
|
||||
original: "[#6:1]-C#N"
|
||||
convert: "[#6:1]-[11*]"
|
||||
- key: 12
|
||||
original: "[#6:1]-C#[CH]"
|
||||
convert: "[#6:1]-[12*]"
|
||||
- key: 13
|
||||
original: "[#6:1]-S(=O)(=O)[F,Cl,Br,I]"
|
||||
convert: "[#6:1]-[13*]"
|
||||
- key: 14
|
||||
original: "[C;!$(C=[N,O,S]):1]-[NH2]"
|
||||
convert: "[#6:1]-[14*]"
|
||||
- key: 15
|
||||
original: "[C;!$(C=[N,O,S]):1]-[OH]"
|
||||
convert: "[#6:1]-[15*]"
|
||||
- key: 16
|
||||
original: "[C;!$(C=[N,O,S]):1]-[SH]"
|
||||
convert: "[#6:1]-[16*]"
|
||||
- key: 17
|
||||
original: "[C;!$(C=[N,O,S]):1]-[Cl,Br,I]"
|
||||
convert: "[#6:1]-[17*]"
|
||||
- key: 18
|
||||
original: "[C;!$(C=[N,O,S]):1]-[Cl,$(OS(=O)(=O)[#6;!R])]"
|
||||
convert: "[#6:1]-[18*]"
|
||||
- key: 19
|
||||
original: "[CH1:1]-[Cl,Br,I]"
|
||||
convert: "[#6:1]-[19*]"
|
||||
- key: 20
|
||||
original: "[C:1]1[O:3][C:2]1"
|
||||
convert: "[20*]-[#6:1]-[#6:2]-[OH:3]"
|
||||
- key: 21
|
||||
original: "[c:1]-[Cl,Br,I]"
|
||||
convert: "[#6:1]-[21*]"
|
||||
- key: 22
|
||||
original: "[c:1]-[OH]"
|
||||
convert: "[#6:1]-[22*]"
|
||||
- key: 23
|
||||
original: "[c:1]-B(O)O"
|
||||
convert: "[#6:1]-[23*]"
|
||||
- key: 24
|
||||
original: "[CH3,CH2&$(C([#6])[#6]):1]-[$(C=O),$(C#N),$(S=O),$([N+](=O)[O-]):2]"
|
||||
convert: "[*:2]-[#6:1]=[24*]"
|
||||
- key: 25
|
||||
original: "[CH2:1]([$(C=O),$(C#N),$(S=O),$([N+](=O)[O-]):2])[$(C=O),$(C#N),$(S=O),$([N+](=O)[O-]):3]"
|
||||
convert: "[#6:1](-[*:2])(-[*:3])=[25*]"
|
||||
- key: 26
|
||||
original: "[C:1]=[C;$(C[$(C=O),$(C#N),$(S=O),$([N+](=O)[O-])]):2]"
|
||||
convert: "[26*]-[C:1]-[C:2]"
|
||||
- key: 27
|
||||
original: "[NX3;$([NH]([#6])[C]),$([NH2][C]):1][C:2]C(=O)O[C;!R]"
|
||||
convert: "[27*]-N1C(=O)[N:1][C:2]C(=O)1"
|
||||
- key: 28
|
||||
original: "[NX3;!H0;!$(N[C,S]=[N,O,S]):1][c:2][c:3][C:4](=[O:5])[NX3;!H0:6]"
|
||||
convert: "[N:1]1[c:2][c:3][C:4](=[O:5])[N:6]C1=[28*]"
|
||||
- key: 29
|
||||
original: "[NH2:3][C;!$(C=[N,O,S]):4][C:5][C:6](=[O:7])O"
|
||||
convert: "[29*]-N1[C:6](=[O:7])[C:5][C:4][NH:3]C(=O)1"
|
||||
- key: 30
|
||||
original: "[NH2:1][c:2][c:3][NH2,NH,SH,OH:4]"
|
||||
convert: "[30*]-c1[nH0+0:1][c:2][c:3][*:4]1"
|
||||
- key: 31
|
||||
original: "[NH2:1][c:2][c:3][C:4](=[O:5])OC"
|
||||
convert: "[31*]-C1=[N:1][c:2][c:3][C:4](=[O:5])N1"
|
||||
- key: 32
|
||||
original: "[NH2:1][c:2][c:3][C:4](=[O:5])OC"
|
||||
convert: "[NH:1]1[c:2][c:3][C:4](=[O:5])N([32*])C1=O"
|
||||
204
exclude.txt
204
exclude.txt
@@ -1,72 +1,170 @@
|
||||
# gflownet
|
||||
bengio2021flow_proxy.pkl.gz
|
||||
|
||||
# rxnflow
|
||||
logs/
|
||||
experiments/data/building_blocks/*
|
||||
experiments/data/envs/*
|
||||
experiments/data/stock/*
|
||||
experiments/data/experiments/*
|
||||
experiments/data/complex/*
|
||||
experiments/analysis/
|
||||
experiments/LIT-PCBA/*
|
||||
experiments/CrossDocked2020/*
|
||||
unidock_2025*
|
||||
experiments/data/CrossDocked2020/
|
||||
experiments/data/LIT-PCBA/
|
||||
|
||||
# semlaflow
|
||||
.gvp_cache/
|
||||
docking_pipeline/
|
||||
**/weights/*.ckpt
|
||||
tony/v0/notebooks/
|
||||
tony/v1/temp/
|
||||
result/
|
||||
evaluation_results/
|
||||
|
||||
# pharmaconet
|
||||
PharmacoNet/
|
||||
data/building_blocks/*
|
||||
data/envs/*
|
||||
data/experiments/*
|
||||
lightning_logs/
|
||||
weights/
|
||||
*.pdbqt
|
||||
|
||||
# package
|
||||
build/
|
||||
|
||||
# Log files
|
||||
wandb/
|
||||
molproc_logs/
|
||||
lightning_logs/
|
||||
notebooks/lightning_logs/
|
||||
nohup.out
|
||||
*job*.out
|
||||
|
||||
# Slurm submission scripts and logs
|
||||
subslurm/
|
||||
slurmlog/
|
||||
slurm-*
|
||||
job.sh
|
||||
|
||||
*.profile
|
||||
*.egg-info
|
||||
uv.lock
|
||||
|
||||
# Editors
|
||||
.vscode/
|
||||
typings/
|
||||
|
||||
# Jupyter notebook checkpoints
|
||||
notebooks/.ipynb_checkpoints/
|
||||
|
||||
# Python cache files
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*/__pycache__/
|
||||
**/__pycache__/
|
||||
*.pyc
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
*.zip
|
||||
*.tar.gz
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# experimental code
|
||||
src/app
|
||||
weights.git
|
||||
experimental/
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
pip-wheel-metadata/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# Experimental files files
|
||||
=*
|
||||
*.out
|
||||
*.profile
|
||||
evaluation_results/*
|
||||
experiments/data/*
|
||||
experiments/evals/posecheck/*
|
||||
experiments/evals/sampling_efficency/*
|
||||
experiments/logs/*
|
||||
experiments/wandb/*
|
||||
logs/*
|
||||
wandb/*
|
||||
src/app/*
|
||||
data/*.sdf
|
||||
data/*.zip
|
||||
docking_pipeline/*
|
||||
.git/*
|
||||
experimental/*
|
||||
.experimental/*
|
||||
private_README.md
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
|
||||
echo "train_ar_conf_crossdock-no-litpcba_gvp.sh"
|
||||
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/crossdock-no-litpcba/smol \
|
||||
--dataset crossdock \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 0.5 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 1 \
|
||||
--batch_cost 2500 \
|
||||
--time_alpha 1.0 \
|
||||
--pocket_encoding gvp
|
||||
@@ -0,0 +1,42 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
|
||||
echo "train_ar_conf_crossdock-no-litpcba_gvp.sh"
|
||||
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/crossdock-no-litpcba/smol \
|
||||
--dataset crossdock \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 0.5 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 1 \
|
||||
--batch_cost 2500 \
|
||||
--time_alpha 1.0 \
|
||||
--pocket_encoding gvp \
|
||||
--trial_run
|
||||
@@ -0,0 +1,46 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
|
||||
echo "train_ar_conf_crossdock-no-litpcba_gvp.sh"
|
||||
|
||||
|
||||
# adjust batch_cost to fit in GPU memory
|
||||
# adjust for 4 GPUs or 8 GPUS
|
||||
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/crossdock-no-litpcba/smol \
|
||||
--dataset crossdock \
|
||||
--t_per_ar_action 0.3 \
|
||||
--max_interp_time 0.4 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.6 \
|
||||
--max_num_cuts 2 \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 1 \
|
||||
--batch_cost 1500 \
|
||||
--time_alpha 1.0 \
|
||||
--pocket_encoding gvp \
|
||||
--num_gpus 4
|
||||
@@ -0,0 +1,34 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/geom-drugs/smol \
|
||||
--dataset geom-drugs \
|
||||
--t_per_ar_action 0.2 \
|
||||
--max_interp_time 0.2 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy brics \
|
||||
--max_action_t 0.8 \
|
||||
--max_num_cuts 4 \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--categorical_strategy auto-regressive \
|
||||
--val_check_epochs 10 \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--batch_cost 4096
|
||||
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/geom-drugs/smol \
|
||||
--dataset geom-drugs \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy no-change \
|
||||
--val_check_epochs 10 \
|
||||
--batch_cost 4096
|
||||
@@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/geom-drugs/smol \
|
||||
--dataset geom-drugs \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 0.25 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 2 \
|
||||
--is_pseudo_complex
|
||||
@@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/geom-drugs/smol \
|
||||
--dataset geom-drugs \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 0.25 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 1. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 2 \
|
||||
--is_pseudo_complex
|
||||
@@ -0,0 +1,38 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/geom-drugs/smol \
|
||||
--dataset geom-drugs \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 0.25 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 1. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 2 \
|
||||
--d_message 64 \
|
||||
--d_message_hidden 64 \
|
||||
--is_pseudo_complex
|
||||
@@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/geom-drugs/smol \
|
||||
--dataset geom-drugs \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 0.5 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 1. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 2 \
|
||||
--is_pseudo_complex
|
||||
@@ -0,0 +1,41 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
echo "train_ar_geom_max_0.5_dist_loss_small.sh"
|
||||
echo "pertubs pocket center"
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/geom-drugs/smol \
|
||||
--dataset geom-drugs \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 0.5 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 1. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 2 \
|
||||
--d_message 64 \
|
||||
--d_message_hidden 64 \
|
||||
--is_pseudo_complex
|
||||
@@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/geom-drugs/smol \
|
||||
--dataset geom-drugs \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 1.0 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 2 \
|
||||
--is_pseudo_complex
|
||||
@@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/geom-drugs/smol \
|
||||
--dataset geom-drugs \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 1.0 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 1. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 2 \
|
||||
--is_pseudo_complex
|
||||
@@ -0,0 +1,38 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/geom-drugs/smol \
|
||||
--dataset geom-drugs \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 1.0 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 1. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 2 \
|
||||
--d_message 64 \
|
||||
--d_message_hidden 64 \
|
||||
--is_pseudo_complex
|
||||
@@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/geom-drugs/smol \
|
||||
--dataset geom-drugs \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy no-change \
|
||||
--val_check_epochs 5
|
||||
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/geom-drugs/smol \
|
||||
--dataset geom-drugs \
|
||||
--dist_loss_weight 1. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy no-change \
|
||||
--val_check_epochs 1
|
||||
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/geom-drugs/smol \
|
||||
--dataset geom-drugs \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy no-change \
|
||||
--conf_coord_strategy harmonic \
|
||||
--val_check_epochs 5
|
||||
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/geom-drugs/smol \
|
||||
--dataset geom-drugs \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy no-change \
|
||||
--val_check_epochs 5 \
|
||||
--align_vector
|
||||
@@ -0,0 +1,28 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/geom-drugs/smol \
|
||||
--dataset geom-drugs \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy no-change \
|
||||
--val_check_epochs 1 \
|
||||
--is_pseudo_complex
|
||||
@@ -0,0 +1,28 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/geom-drugs/smol \
|
||||
--dataset geom-drugs \
|
||||
--dist_loss_weight 1. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy no-change \
|
||||
--val_check_epochs 1 \
|
||||
--is_pseudo_complex
|
||||
@@ -0,0 +1,29 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/geom-drugs/smol \
|
||||
--dataset geom-drugs \
|
||||
--dist_loss_weight 1. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy no-change \
|
||||
--conf_coord_strategy harmonic \
|
||||
--val_check_epochs 1 \
|
||||
--is_pseudo_complex
|
||||
@@ -0,0 +1,29 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/geom-drugs/smol \
|
||||
--dataset geom-drugs \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy no-change \
|
||||
--conf_coord_strategy harmonic \
|
||||
--val_check_epochs 1 \
|
||||
--is_pseudo_complex
|
||||
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/plinder-ligand/smol \
|
||||
--dataset plinder-ligand \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy no-change \
|
||||
--val_check_epochs 5 \
|
||||
--epochs 1000
|
||||
42
experimental/tony/v0/compute_canada/conf/plinder/debug.sh
Normal file
42
experimental/tony/v0/compute_canada/conf/plinder/debug.sh
Normal file
@@ -0,0 +1,42 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
|
||||
echo "finetune_ar_conf_plinder.sh"
|
||||
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/plinder/smol \
|
||||
--model_checkpoint wandb/equinv-plinder/4r7zubnv/checkpoints/last.ckpt \
|
||||
--dataset plinder \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 0.5 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 20 \
|
||||
--n_pro_layers 6 \
|
||||
--batch_cost 3000 \
|
||||
--c_alpha_only
|
||||
@@ -0,0 +1,42 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
|
||||
echo "finetune_ar_conf_plinder.sh"
|
||||
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/plinder/smol \
|
||||
--model_checkpoint wandb/equinv-plinder/4r7zubnv/checkpoints/last.ckpt \
|
||||
--dataset plinder \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 0.5 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 20 \
|
||||
--n_pro_layers 6 \
|
||||
--batch_cost 3000 \
|
||||
--c_alpha_only
|
||||
@@ -0,0 +1,22 @@
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/plinder/smol \
|
||||
--dataset plinder \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 0.5 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--val_check_epochs 50 \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--n_training_mols 1 \
|
||||
--d_message 64 \
|
||||
--d_message_hidden 64 \
|
||||
--c_alpha_only
|
||||
@@ -0,0 +1,16 @@
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/plinder/smol \
|
||||
--dataset plinder \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy no-change \
|
||||
--val_check_epochs 50 \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--n_training_mols 1 \
|
||||
--d_message 64 \
|
||||
--d_message_hidden 64 \
|
||||
--c_alpha_only
|
||||
@@ -0,0 +1,42 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
|
||||
echo "finetune_ar_conf_plinder.sh"
|
||||
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/plinder/smol \
|
||||
--model_checkpoint semlaflow/saved/models/geom-drugs-complex-conf/last.ckpt \
|
||||
--dataset plinder \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 0.5 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 20 \
|
||||
--n_pro_layers 6 \
|
||||
--batch_cost 3000 \
|
||||
--c_alpha_only
|
||||
@@ -0,0 +1,40 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
|
||||
echo "train_ar_conf_plinder.sh"
|
||||
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/plinder/smol \
|
||||
--dataset plinder \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 0.5 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 20 \
|
||||
--batch_cost 2500 \
|
||||
--c_alpha_only
|
||||
@@ -0,0 +1,41 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
export CUDA_LAUNCH_BLOCKING=1
|
||||
|
||||
echo "train_ar_conf_plinder_dist_loss.sh"
|
||||
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/plinder/smol \
|
||||
--dataset plinder \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 0.5 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 1. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 20 \
|
||||
--batch_cost 2500 \
|
||||
--c_alpha_only
|
||||
@@ -0,0 +1,40 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
|
||||
echo "train_ar_conf_plinder_all_pro_layers_max_t_0.25.sh"
|
||||
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/plinder/smol \
|
||||
--dataset plinder \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 0.25 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 20 \
|
||||
--batch_cost 2500 \
|
||||
--c_alpha_only
|
||||
@@ -0,0 +1,40 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
|
||||
echo "train_ar_conf_plinder_all_pro_layers.sh"
|
||||
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/plinder/smol \
|
||||
--dataset plinder \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 0.5 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 20 \
|
||||
--batch_cost 2500 \
|
||||
--c_alpha_only
|
||||
@@ -0,0 +1,42 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
|
||||
echo "train_ar_conf_plinder_all_pro_layers.sh"
|
||||
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/plinder/smol \
|
||||
--dataset plinder \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 0.5 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 20 \
|
||||
--batch_cost 2500 \
|
||||
--time_alpha 0.5 \
|
||||
--time_beta 0.5 \
|
||||
--c_alpha_only
|
||||
@@ -0,0 +1,41 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
|
||||
echo "train_ar_conf_plinder_all_pro_layers.sh"
|
||||
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/plinder/smol \
|
||||
--dataset plinder \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 0.5 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 20 \
|
||||
--batch_cost 2500 \
|
||||
--time_alpha 1.0 \
|
||||
--c_alpha_only
|
||||
@@ -0,0 +1,40 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
|
||||
echo "train_ar_conf_plinder_all_pro_layers_max_t_1.0.sh"
|
||||
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/plinder/smol \
|
||||
--dataset plinder \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 1.0 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 20 \
|
||||
--batch_cost 2500 \
|
||||
--c_alpha_only
|
||||
@@ -0,0 +1,41 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
|
||||
echo "train_ar_conf_plinder_all_pro_layers_max_t_1.0.sh"
|
||||
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/plinder/smol \
|
||||
--dataset plinder \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 1.0 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 20 \
|
||||
--batch_cost 2500 \
|
||||
--time_alpha 1.0 \
|
||||
--c_alpha_only
|
||||
@@ -0,0 +1,34 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
|
||||
echo "train_conf_plinder.sh"
|
||||
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/plinder/smol \
|
||||
--dataset plinder \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy no-change \
|
||||
--val_check_epochs 20 \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--batch_cost 2500 \
|
||||
--c_alpha_only
|
||||
@@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
|
||||
echo "train_conf_plinder.sh"
|
||||
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/plinder/smol \
|
||||
--dataset plinder \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy no-change \
|
||||
--val_check_epochs 20 \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--batch_cost 2500 \
|
||||
--time_alpha 0.5 \
|
||||
--time_beta 0.5 \
|
||||
--c_alpha_only
|
||||
@@ -0,0 +1,35 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
|
||||
echo "train_conf_plinder.sh"
|
||||
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/plinder/smol \
|
||||
--dataset plinder \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy no-change \
|
||||
--val_check_epochs 20 \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--batch_cost 2500 \
|
||||
--time_alpha 1.0 \
|
||||
--c_alpha_only
|
||||
@@ -0,0 +1,41 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
|
||||
echo "train_ar_conf_plinder_c_alpha.sh"
|
||||
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/plinder/smol \
|
||||
--dataset plinder \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 0.5 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 20 \
|
||||
--batch_cost 2500 \
|
||||
--time_alpha 1.0 \
|
||||
--pocket_encoding c-alpha
|
||||
@@ -0,0 +1,43 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
echo "train_ar_conf_plinder_gvp.sh"
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
|
||||
echo "train_ar_conf_plinder_all_pro_layers.sh"
|
||||
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/plinder/smol \
|
||||
--dataset plinder \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 0.5 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 20 \
|
||||
--batch_cost 2500 \
|
||||
--time_alpha 1.0 \
|
||||
--pocket_encoding gvp
|
||||
@@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
|
||||
echo "train_conf_plinder.sh"
|
||||
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/plinder/smol \
|
||||
--model_checkpoint wandb/equinv-plinder/s27wdvd8/checkpoints/last.ckpt \
|
||||
--dataset plinder \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy no-change \
|
||||
--val_check_epochs 20 \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--batch_cost 2500 \
|
||||
--n_pro_layers 6 \
|
||||
--c_alpha_only
|
||||
@@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/qm9/smol \
|
||||
--dataset qm9 \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy no-change \
|
||||
--val_check_epochs 10
|
||||
@@ -0,0 +1,41 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
|
||||
echo "train_ar_conf_plinder_c_alpha.sh"
|
||||
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/zinc15m/smol/subset \
|
||||
--dataset zinc15m \
|
||||
--t_per_ar_action 0.25 \
|
||||
--max_interp_time 0.5 \
|
||||
--ordering_strategy connected \
|
||||
--decomposition_strategy reaction \
|
||||
--max_action_t 0.75 \
|
||||
--max_num_cuts 3 \
|
||||
--dist_loss_weight 0. \
|
||||
--type_loss_weight 0. \
|
||||
--bond_loss_weight 0. \
|
||||
--charge_loss_weight 0. \
|
||||
--optimal_transport None \
|
||||
--categorical_strategy auto-regressive \
|
||||
--monitor val-strain \
|
||||
--monitor_mode min \
|
||||
--val_check_epochs 1 \
|
||||
--batch_cost 2500 \
|
||||
--time_alpha 1.0 \
|
||||
--pocket_encoding c-alpha
|
||||
@@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/geom-drugs/smol \
|
||||
--dataset geom-drugs \
|
||||
--val_check_epochs 2 \
|
||||
--batch_cost 4096 \
|
||||
--is_pseudo_complex
|
||||
@@ -0,0 +1,24 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/geom-drugs/smol \
|
||||
--dataset geom-drugs \
|
||||
--val_check_epochs 2 \
|
||||
--batch_cost 4096 \
|
||||
--is_pseudo_complex \
|
||||
--align_vector
|
||||
@@ -0,0 +1,25 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/geom-drugs/smol \
|
||||
--dataset geom-drugs \
|
||||
--val_check_epochs 2 \
|
||||
--batch_cost 4096 \
|
||||
--is_pseudo_complex \
|
||||
--align_vector \
|
||||
--complex_debug
|
||||
@@ -0,0 +1,24 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/geom-drugs/smol \
|
||||
--dataset geom-drugs \
|
||||
--val_check_epochs 2 \
|
||||
--batch_cost 4096 \
|
||||
--coord_norm off \
|
||||
--is_pseudo_complex
|
||||
@@ -0,0 +1,22 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/geom-drugs/smol \
|
||||
--dataset geom-drugs \
|
||||
--val_check_epochs 2 \
|
||||
--batch_cost 4096
|
||||
@@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/plinder-ligand/smol \
|
||||
--dataset plinder-ligand \
|
||||
--val_check_epochs 2 \
|
||||
--batch_cost 4096 \
|
||||
--epochs 1000
|
||||
@@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=3-00:00:00 #(days-hours:minutes:seconds)
|
||||
#SBATCH --mem=32G # total CPU memory
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --gres=gpu:v100l:1
|
||||
#SBATCH --mail-user=tonyzshen@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
cd $project/trans-semla
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
source ~/equinv/bin/activate
|
||||
|
||||
nvidia-smi
|
||||
|
||||
export PYTHONPATH=$PYTHONPATH:$project/trans-semla
|
||||
export WANDB_API_KEY=fe74f9b5ba3b6f8a1a5fa3be198bc1cf09cf14e6
|
||||
|
||||
python semlaflow/train.py \
|
||||
--data_path semlaflow/saved/data/plinder/smol \
|
||||
--dataset plinder \
|
||||
--val_check_epochs 1 \
|
||||
--batch_cost 1300 \
|
||||
--n_validation_mols 56 \
|
||||
--d_message 64 \
|
||||
--d_message_hidden 64
|
||||
23
experimental/tony/v0/compute_canada/readme.md
Normal file
23
experimental/tony/v0/compute_canada/readme.md
Normal file
@@ -0,0 +1,23 @@
|
||||
# Install environment
|
||||
1. Activate python module
|
||||
```
|
||||
module purge
|
||||
module load python/3.11 rdkit/2024.03.4 scipy-stack/2024a openbabel/3.1.1
|
||||
```
|
||||
|
||||
2. Create a Python virtual environment
|
||||
```
|
||||
virtualenv --no-download ~/equinv
|
||||
source ~/equinv/bin/activate
|
||||
```
|
||||
|
||||
3. Install library
|
||||
```
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
# Downlaod dataset
|
||||
```
|
||||
mkdir -p semlaflow/saved
|
||||
gdown --folder https://drive.google.com/drive/folders/1rHi5JzN05bsGRGQUcWRmDu-Ilfoa9EAT -O semlaflow/saved
|
||||
```
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user