diff --git a/.env b/.env index f86aa5b..160dc56 100644 --- a/.env +++ b/.env @@ -1,5 +1,9 @@ +# +--------+ Cifutils +--------+ CCD_MIRROR_PATH=/projects/ml/frozen_pdb_copies/2024_12_11_ccd - PDB_MIRROR_PATH=/projects/ml/frozen_pdb_copies/2024_12_01_pdb +# +--------+ Datahub +--------+ +# (Distillation) AF2FB_PATH=/squash/af2_distillation_facebook +# (PadDNA TRansform) +X3DNA=/projects/ml/prot_dna/x3dna-v2.4 diff --git a/.gitignore b/.gitignore index e624379..38fd229 100644 --- a/.gitignore +++ b/.gitignore @@ -1,23 +1,220 @@ -valid_remapped -lig_test -dataset.pkl -run_digs.sh -*.pdb -.vscode -slurm_logs/ -**/output/ -**/outputs/ -*/notebooks/ -*/models/ +# Base .gitignore from https://github.com/github/gitignore/blob/main/Python.gitignore + +# Byte-compiled / optimized / DLL files __pycache__/ -*/run_scripts/ -unit_tests/ -ruff.toml -*/scratch/ -*/wandb/ -rf2aa/dataset_20240318.pkl -*.csv +*/__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebooks (unless explicitly not ignored) +.ipynb_checkpoints +**/.ipynb + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# VS Code +.vscode +.history/ + +# Slurm +*/slurm_logs/ *.err *.log -*.json -data/run.sh + +# Ruff +ruff.toml +.ruff_cache + +# Development +dev.py + +# Pytest +*.benchmarks/ + +# Images +*.png +*.pdf +*.svg +*.jpg +*.jpeg +*.gif +*.bmp +*.tiff + +# Versioning +**/version.py + +# W&B +wandb/ + +# Hydra +.hydra/ + +# Outputs +**/outputs/ + +# Logs +logs/ + +# Other +*.sif +*.out + +# Misc +**/notebooks/ +**/models/ +**/run_scripts/ +**/scratch/ + diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml deleted file mode 100644 index 77f65ce..0000000 --- a/.gitlab-ci.yml +++ /dev/null @@ -1,31 +0,0 @@ -# This file is a template, and might need editing before it works on your project. -# This is a sample GitLab CI/CD configuration file that should run without any modifications. -# It demonstrates a basic 3 stage CI/CD pipeline. Instead of real tests or scripts, -# it uses echo commands to simulate the pipeline execution. -# -# A pipeline is composed of independent jobs that run scripts, grouped into stages. -# Stages run in sequential order, but jobs within stages run in parallel. -# -# For more information, see: https://docs.gitlab.com/ee/ci/yaml/index.html#stages -# -# You can copy and paste this template into a new `.gitlab-ci.yml` file. -# You should not add this template to an existing `.gitlab-ci.yml` file by using the `include:` keyword. -# -# To contribute improvements to CI/CD templates, please follow the Development guide at: -# https://docs.gitlab.com/ee/development/cicd/templates.html -# This specific template is located at: -# https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/ci/templates/Getting-Started.gitlab-ci.yml - -stages: # List of stages for jobs, and their order of execution - - test - -unit-test-job: # This job runs in the test stage. - stage: test # It only starts when the job in the build stage completes successfully. - rules: - - if: '$CI_PIPELINE_SOURCE == "merge_request_event"' - script: - - echo "Running unit tests" - - git submodule update --init - - cd rf2aa - - srun -p gpu --gres=gpu:a4000:1 --cpus-per-task=4 --mem=32G bash ../ci/run_tests.sh - diff --git a/__init__.py b/.project-root similarity index 100% rename from __init__.py rename to .project-root diff --git a/Makefile b/Makefile index 84f727c..d376229 100644 --- a/Makefile +++ b/Makefile @@ -24,25 +24,72 @@ format: ruff format . ruff check --fix . -## Create a new conda environment +_github_token_error: + @echo "==============================================================================="; \ + echo "Error: Environment variables GITHUB_USER and GITHUB_TOKEN must be set."; \ + echo ""; \ + echo "You need to set the environment variables GITHUB_USER and GITHUB_TOKEN."; \ + echo "You can create a personal access token on GitHub at:"; \ + echo " https://github.com/settings/tokens"; \ + echo ""; \ + echo "For more info see: https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-personal-access-token-classic"; \ + echo ""; \ + echo "To expose these variables, you can use:"; \ + echo "export GITHUB_USER="; \ + echo "export GITHUB_TOKEN="; \ + echo ""; \ + echo "It is recommended that you set these tokens in your .bashrc or .zshrc file for future use."; \ + echo "==============================================================================="; + exit 1; + +_check_conda: + @echo "... checking if conda/mamba is installed" + @command -v $(CONDA_BINARY) >/dev/null 2>&1 || { \ + echo "Error: Conda/mamba is not installed or not found in PATH" >&2; \ + exit 1; \ + } + @echo "... found conda executable: $(CONDA_BINARY)" + +_check_tokens: + @echo "... checking if GITHUB_USER and GITHUB_TOKEN are set" + @if [ -z "$(GITHUB_USER)" ] || [ -z "$(GITHUB_TOKEN)" ]; then \ + $(MAKE) _github_token_error; \ + fi + @echo "... found GITHUB_USER ($(GITHUB_USER)) and GITHUB_TOKEN." + +## Create a new conda environment and install modelhub env: - $(CONDA_BINARY) env create -n modelhub --file environment.yaml - conda init - conda activate modelhub - pip install -e ".[dev]" + @echo "Creating modelhub conda environment: modelhub" + + @$(MAKE) --no-print-directory _check_tokens + @$(MAKE) --no-print-directory _check_conda + + @$(CONDA_BINARY) env create -n modelhub --file environment.yaml + @conda init + @conda activate modelhub + @pip install -e ".[dev]" + @python -m biotite.setup_ccd + ## Install modelhub locally into the current environment install: # Install the conda requirements in the current activated environment $(CONDA_BINARY) env update --file environment.yaml # Install the pip requirements in the current activated environment - pip install -e ".[dev]" + @pip install -e ".[dev]" + @python -m biotite.setup_ccd ## Build the apptainer image -apptainer: +base_apptainer: $(eval DATE := $(shell date +%Y-%m-%d)) - $(eval IMAGE := modelhub_$(DATE).sif) - bash ./scripts/build_apptainer.sh + bash ./scripts/build_base_apptainer.sh + +# Set INSTALL_PROJECT to true to install modelhub within the apptainer (much slower) +# e.g., `make INSTALL_PROJECT=true freeze_apptainer` or `make freeze_apptainer INSTALL_PROJECT=true` +INSTALL_PROJECT ?= false +freeze_apptainer: + $(eval DATE := $(shell date +%Y-%m-%d)) + bash ./scripts/freeze_apptainer.sh $(INSTALL_PROJECT) ## Run pytest and generate coverage report test: diff --git a/README.md b/README.md index cf6d46a..75753fb 100644 --- a/README.md +++ b/README.md @@ -1,51 +1,210 @@ -RoseTTAFold All-Atom --------------------- +# Modelhub -This repository contains the code to training and running inference on -RoseTTAFold All-Atom (RFAA), a neural network that can predict the structures -of proteins in complex with DNA, RNA, and/or small molecule ligands. +- [Modelhub](#modelhub) + - [Background](#background) + - [Division of code between Modelhub, Datahub, and Cifutils](#division-of-code-between-modelhub-datahub-and-cifutils) + - [Cifutils](#cifutils) + - [Datahub](#datahub) + - [Training, Validation, and Inference](#training-validation-and-inference) + - [Training and Validation](#training-and-validation) + - [Inference](#inference) + - [Setup](#setup) + - [Apptainers](#apptainers) + - [Base Apptainer](#base-apptainer) + - [Frozen Apptainer](#frozen-apptainer) + - [Shebang](#shebang) + - [General Use](#general-use) + - [Debugging](#debugging) -`rf2aa/` contains the model and training code. -`data/` contains code used to curate the training data from the PDB. +## Background +This repository constitutes the base for deep-learning method development at the Institute for Protein Design. -## Contributing to RFAA +It is symbiotic with two other Institute for Protein Design repositories: +- [cifutils](https://github.com/baker-laboratory/cifutils), which manages input parsing and data cleaning +- [datahub](https://github.com/baker-laboratory/datahub), which manages input featurization and holds our composable `Transform` components -### Set Up +Within this ontology, `modelhub` contains the *architectures*, *training* code, and *inference* endpoints. + +## Division of code between Modelhub, Datahub, and Cifutils + +Across our codebases, we balance the need to develop quickly with the need to write code that we can continue to maintain and that is easy to understand. We below lay out some thoughts on what code should live where. + +We enforce a strict dependency flow of `modelhub` -> (depends on) `datahub` -> (depends on) `cifutils`; it would be a circular anti-pattern to thus import any `datahub` or `modelhub` functions from within `cifutils`. + +### Cifutils + +[cifutils](https://github.com/baker-laboratory/cifutils) is the most static of our three codebases. Basic parsing functionality, RDKit and other molecular toolkit utilities, and `AtomArray` quality-of-life tools live in this repository. + +Examples of `cifutils` functions are: +- All functions related to **parsing structural files from source**; e.g., keeping/removing hydrogens, resolving occupancy, etc. +- Utility functions to manipulate `AtomArrays`, the core API of the `biotite` library, upon which we heavily rely +- Utility functions for common bioinformatics software, such as `RDKit`, that interface with `AtomArrays` + +As a foundational library for the Institute for Protein Design, `cifutils` functions most like an open-source codebase. We must keep the code easy-to-understand and easy-to-maintain, both now and into the future. As such, `cifutils`: +- Maintains the **highest code quality standard**, requiring well-documented, easy-to-maintain code with adequate test coverage (we aim for **>85%** coverage) +- **Strictly versions** to minimize breaking changes with downstream repositories + +You should write code in `cifutils` if: +- You are are writing **core** `AtomArray`-level level functionality that will be broadly useful, not only to those at the Institute for Protein Design but possibly the wider bioinformatics community (i.e., without dependencies, or even knowledge of, `datahub` or `modelhub`) +- You are willing to spend some additional time to ensure the code is **scalable, well-tested, and maintainable** + +Quick-and-dirty experiments that require modifying `cifutils` can be performed by submoduling or cloning the repository and exporting a local path. + +### Datahub + +[datahub](https://github.com/baker-laboratory/datahub) manages data loading, preprocessing, and featurization pipelines for structure-dependent deep-learning models. We offer three core components: a `Transforms` library, a set of `Preprocessing` scripts, and `Datasets`. +- **Transforms**: A series of composable classes that take as input a dictionary containing sequence- and structure-based data (in the form of an `AtomArray`) and perform arbitrary operations, analogous to TorchVision's [approach](https://pytorch.org/vision/main/transforms.html) for computer vision +- **Preprocessing**: Scripts and functions for common data cleaning and preparation tasks, including specialized pipelines for frequent use cases (e.g., antibodies, clash detection, cleaning PDB data, etc.). Many of these *scripts* output `parquet` files stored to disk that are sampled from at train-time, while the *functions* are called by the scripts to clean, label, or filter the data (e.g., `has_clash()`, etc.) +- **Datasets**: The base `Datasets` and `Sampler` classes used for training, imported by `modelhub` + +`datahub` is less static than `cifutils`; however, it still must operate as a stand-alone library that others can continue to build around and upon, even without `modelhub`. We strive to maintain `datahub` like an open-source software project such that others in the lab can easily understand, and build upon, our base components. We focus on **maintainable** and **flexible** code - if a particular `Transform` is bespoke or non-generalizable (at least initially), then the `/projects` folder within `Modelhub` may be a more appropriate place for initial development. + +You should write code in `datahub` if: +- You are writing flexible, generic *pre-processing scripts* or *functions* that others in the lab have expressed interest in using (vs. a single-purpose pipeline or feature to test a hypothesis) + - **Example that should live in `datahub`**: You are writing a pre-processing pipeline to label all beta barrels in the PDB. Your scripts, written in a functional manner, may be a good candidate for `datahub/scripts/preprocessing`, so long as you are willing to write them generally and include tests. Similarly, if a single function may be generalizable but the pipeline is bespoke, that single function (with a test) could still be included as a stand-alone element in `datahub`, e.g., + ```python + atom_array_has_beta_barrel(atom_array: AtomArray) -> bool + ``` + - **Example that should live in `modelhub/projects`**: You have pulled together a script that loads PDB files, includes manual annotations, and saves out to CIF. Such a script may be appropriate for the specific use case but is unlikely to generalize across other use cases. +- You are writing `Transforms` that generalize to additional use cases beyond the current project + - **Example that should live in `datahub`**: Any `Transform` that adds a useful annotation to an `AtomArray` (e.g., annotationg pocket residues, hydrogen bonds, SASA, etc.) + - **Example that should live in `datahub`**: A `Transform` that pads DNA with generated B-form structure, as is done in AF-3; such a `Transform` may be applicable to both structure prediction and design, when proven effective + - **Example that should live in `modelhub/projects`**: A `Transform` that aggregates and/or concatenates features for a bespoke model pipeline +- You are willing to spend some additional time to ensure the code is scalable, well-tested, and maintainable. Otherwise the `projects` folder of `modelhub` may be a more appropriate place in the interim + +## Training, Validation, and Inference + +> If you are developing at the IPD, our `shebang` executables will take care of identifying and executing with the most up-do-date apptainer. If you are not at the IPD, you will need to ensure you have the appropriate apptainer. See below for details. + +NOTE: For Training, Validation, and Inference, we make heavy use of [Hydra](https://hydra.cc/) for configuration management. + +Before running any of the below commands, you will need to ensure `datahub` and `cifutils` are in your `PYTHONPATH`. E.g., ``` -git clone https://git.ipd.uw.edu/jue/RF2-allatom.git -cd RF2-allatom +export PYTHONPATH="/home//projects/datahub/src:/home//projects/cifutils/src" ``` -If you are on digs, the S3nv.sif apptainer has all the relevant packages. To get started coding: +### Training and Validation +For Training and Validation, when you execute `train.py` or `validate.py`, you will need to provide an *experiment* Hydra config. Experiments are a Hydra best-practice pattern to enable us to maintain multiple configurations; see more in the [Hydra documentaion](https://hydra.cc/docs/patterns/configuring_experiments/) +and in the `configs/experiment` sub-directory. + +For example, to test AF-3 training without confidence, run: ``` -export PYTHONPATH="../RF2-allatom" +./src/modelhub/train.py experiment=quick-af3 debug=default ``` -First, run the test suite: +**Explanation:** +- `./src/modelhub/train.py` — we execute our `train.py` like a bash executable, which triggers the `shebang` code to find the correct apptainer. It's equivalent to `apptainer exec --nv /path/to/apptainer python ./src/modelhub/train.py` +- `experiment=quick-af3` — we identify the experiment we want to use for training; in this case, `quick-af3`, which can be viewed at `configs/experiment/quick-af3.yaml`. This experiment is a simple test config for AF-3 that loads and runs more rapidly that the full training config +- `debug=default` - a setting letter Hydra know we are debugging; when we debug, we perform some automatic time-savings like setting a small diffusion batch size and crop size. You could remove this line if you don't want those options. You can explore more about various `debug` options in `config/debug` + +For validation only, run the following: ``` -apptainer exec --nv /software/containers/versions/SE3nv/SE3nv-20240415.sif pytest tests/ +./src/modelhub/validate.py experiment=quick-af3 debug=default ``` -If all the tests pass, you have a stable version of the code. -### Running model training - -We use a package called hydra to configure different training runs of the model. Config files for different training runs can be found in `rf2aa/config/train`. The base trainable version is `rf2aa/config/train/rf2aa.yaml`, to run training with this version, run: +Note that since we use `hydra`, you could specify additional setup arguments using the command line. For example, by default, we `prevalidate` - running validation at the beginning of training so we develop a baseline and catch any errors (especially out-of-memory errors) before training for a full epoch. If you don't want that behavior, you could override in-line: ``` -/software/containers/versions/SE3nv/SE3nv-20240415.sif trainer_new.py --config-name rf2aa +./src/modelhub/train.py experiment=quick-af3 debug=default trainer.prevalidate=false ``` -These tests are most often run on a4000s on digs. If you have a separate installation of cifutils in your home directory, this can potentially break the tests. -If you make changes in the code, they should NOT break backwards compatibility, e.g. there should be a flag in the yaml files that would make it as if your changes were never committed. +You can view the flattened Hydra configuration to determine how to best override or add additional arguments by: +- Running training or validation and viewing the pretty-printed file, which looks like: +![alt text](assets/example_config.png) +- Adding `--cfg job` to your launch command, which prints the config for the application and then exits + +### Inference + +To support multiple models and multiple projects, we build an `InferenceEngine` for each use case. For end-users the details of the `InferenceEngine` are not necessary; the appropriate engine can be specified with with `inference_engine` argument. + +For example, to run the latest AF-3 model with confidence, we can execute (if `cifutils` and `datahub` are in the `PYTHONPATH`): +``` +./src/modelhub/inference.py inference_engine=af3 inputs='./tests/data/example_with_ncaa.json' +``` + +We can then modify the command by adding/removing arguments with Hydra to our liking; for example, to dump diffusion trajectories and only include one model per CIF file: +``` +./src/modelhub/inference.py inference_engine=af3 inputs='./tests/data/example_with_ncaa.json' dump_trajectories=true one_model_per_file=true +``` + +More details can be found in the [inference README](src/modelhub/inference_engines/README.md) + +## Setup + +> If you are developing at the IPD, then our `shebang` executables will handle the Apptainer dependencies; no need to run the commands below. See the `shebang` section below. + +### Apptainers +To accelerate development and better contain dependencies, we offer two apptainers: +- `base_apptainer`: Contains all of the development dependencies, pre-compiled DeepSpeed, but *NOT* `cifutils` or `datahub`. The rationale for the base apptainer is that you expose these libraries via your PYTHONPATH/PATH to allow you to develop & pull updates for these libraries without having to re-build any apptainer. +- `freeze_apptainer`: Takes the `base_apptainer` as its image, and adds versioned `cifutils`, `datahub`, and (optionally) pip-installs `modelhub` as well (useful for releasing self-contained inference code). The rationale for these apptainers is to provide designers with a stable environment to tackle design problems in. + +#### Base Apptainer + +To make the base apptainer, run: +``` +make base_apptainer +``` +from the project root. This container will **not** contain `cifutils` or `datahub`; those paths must be exported explicitly during development (e.g., the paths to their respective submodules or clones elsewhere). + +Building this apptainer pre-compiles DeepSpeed, among other actions, and is slow. You **should not** need to re-build this apptainer often; changes to `datahub` and `cifutils` can be addressed much more efficiently through the `freeze_apptainer` command specified below. + +> NOTE: Since we pre-compile CUDA-specific DeepSpeed, you must run `make base_apptainer` on a GPU node + +> NOTE: You will need to adjust the IPD-speciifc paths to frozen copies of the PDB and the CCD + +#### Frozen Apptainer + +To make a container that contains `cifutils` and `datahub`, but not `modelhub`, run: +``` +make freeze_apptainer +``` +This will use the `base_apptainer` pointed to by the `shebang` symlink as a base. Note that by default the versions of `cifutils` and `datahub` are fixed; update the `freeze_apptainer.spec` file to adjust the version numbers and/or add dependencies. + +To make a container that contains `modelhub`, `datahub`, and `cifutils` (e.g., for production usage across the lab), run +``` +make INSTALL_PROJECT=true freeze_apptainer +``` +> NOTE: Since we build from the `base_apptainer` image, which contains pre-compiled DeepSpeed, `make freeze_apptainer` does NOT need to be run from a GPU + +### Shebang + +#### General Use +We use `shebang` to help manage and version apptainers. Namely: +- The shebang lines (`#!/bin/bash` ...) at the top of entry point scripts like `train.py` redirect the system to to `scripts/shebang/modelhub_exec.sh` +- The script `modelhub_exec.sh` in turn identifies the correct Apptainer and executes your command +- Apptainers are symlinks in `scripts/shebang` to elsewhere on the DIGS (where they are versioned); thus, when we update apptainers, we must also update the symlink. This allows us to track which apptainers to use for a given branch of the code at any given time (provided you update the symlinks for your branch when you switch out which apptainer you run with!) + +For example, to launch a dummy training run, one could type (after adding `cifutils` and `datahub` to your `PYTHONPATH`): +``` +cd src/modelhub +./train.py experiment=none-00-dummy +``` +> You may need to adjust the permissions on `train.py` (e.g., `chmod +x train.py`) in order to execute the file like a script. + +#### Debugging +We also support VSCode-native debugging with Apptainers. To debug: +1. Update your `launch.json` to include `Python: Attach`; for example, add the configuration: + ``` + { + "name": "Python: Attach", + "type": "debugpy", + "request": "attach", + "connect": { + "host": "localhost", + "port": 2345 + } + } + ``` +2. Add any interactive debug breakpoints in VSCode +3. Set the `DEBUG_PORT` to `2345`, and then execute your script with `shebang` like normal. That is: + ``` + export DEBUG_PORT=2345 + ./train.py experiment=none-00-dummy + ``` +4. When prompted in the termal, launch the VSCode debug session (shortcut: `F5`) + +Happy debugging! -### Contributing to model code -Generally, we follow software engineering practices of: -1. Not duplicating functionality that is already in the code -2. Keeping functions as short as possible, and splitting complicated functions into multiple functions -3. Using object oriented programming, which means subclassing already existing classes when possible. -4. Writing tests for our code and sending small functional PRs for review. -5. Maintaining code stability and not breaking backwards compatibility for users using the package. -To write new blocks in RF, you can go to the rf2aa/model directory and add the new block into the simulator_blocks.py file (and be sure to add a relevant name in the blocks_factory dictionary). These names can be referenced in hydra configs: see rf2aa.yaml for an example with any keyword arguments necessary to initialize the block. diff --git a/apptainer.spec b/base_apptainer.spec similarity index 79% rename from apptainer.spec rename to base_apptainer.spec index 4229e86..53e7eef 100644 --- a/apptainer.spec +++ b/base_apptainer.spec @@ -3,7 +3,7 @@ From: ubuntu:24.04 IncludeCmd: yes # NOTE: This apptainer was written using apptainer version `1.1.6+2-g6808b5172-ipd` # To build this apptainer, use: -# apptainer build --bind $PWD:/modelhub_host modelhub_apptainer.sif apptainer.spec +# make apptainer %setup # Create a directory in the container to bind the host's current working directory @@ -15,8 +15,9 @@ IncludeCmd: yes # ... for mounting `/squash` with --bind mkdir ${APPTAINER_ROOTFS}/squash - %files + /etc/localtime + /etc/hosts environment.yaml /opt/environment.yaml %post @@ -47,7 +48,10 @@ IncludeCmd: yes apt-get clean # Clone CUTLASS (for DeepSpeed) - git clone https://github.com/NVIDIA/cutlass /opt/cutlass + git clone https://github.com/NVIDIA/cutlass.git /opt/cutlass + + # Clone DeepSpeed (so we can pre-install the wheel) + git clone --branch v0.16.2 https://github.com/deepspeedai/DeepSpeed.git /opt/deepspeed ## ENVIRONMENT CREATION & DEPENDENCY INSTALLATION # Download miniconda @@ -66,15 +70,27 @@ IncludeCmd: yes # Add conda environment to PATH export PATH=/usr/envs/modelhub-apptainer/bin:$PATH + echo "Proceeding with DeepSpeed reinstallation." + + ## PRE-COMPILE DEEPSPEED FROM WHEEL + # (Overwrite deepspeed installation from the `environment.yaml`) + pip uninstall deepspeed -y # Avoid interactive prompts + + # (Flags for building the Evoformer attention) + export TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;8.9" + export DS_BUILD_EVOFORMER_ATTN=1 + export CUTLASS_PATH=/opt/cutlass/ + + # Reinstall DeepSpeed, pre-compiling the evoformer attentino kernel + pip wheel /opt/deepspeed -w /opt/deepspeed + pip install /opt/deepspeed/deepspeed-0.16.2+b344c04d-cp311-cp311-linux_x86_64.whl + # Run the biotite setup command # (Temporary measure until we switch to released Biotite version) . /usr/etc/profile.d/conda.sh conda activate modelhub-apptainer python -m biotite.setup_ccd - # deepspeed - pip install deepspeed==0.15.1 - # clean up files to reduce size # ... remove conda mamba clean -a -y @@ -93,7 +109,7 @@ IncludeCmd: yes %runscript # NOTE: The %runscript is invoked when the container is run without specifying a different command. - exec "$@" + exec python "$@" %help modelhub environment for running modelhub independently and for development diff --git a/ci/run_tests.sh b/ci/run_tests.sh deleted file mode 100644 index 3b20a23..0000000 --- a/ci/run_tests.sh +++ /dev/null @@ -1,4 +0,0 @@ -APP=/software/containers/versions/rf_diffusion_aa/24-05-21/rf_diffusion_aa.sif -PYTHONPATH=.. $APP -mpytest --benchmark-skip --ignore tests/test_semantics.py --durations=10 tests - - diff --git a/configs/callbacks/default.yaml b/configs/callbacks/default.yaml new file mode 100644 index 0000000..361d1a5 --- /dev/null +++ b/configs/callbacks/default.yaml @@ -0,0 +1,5 @@ +defaults: + - train_logging + - metrics_logging + - dump_validation_structures + - _self_ \ No newline at end of file diff --git a/configs/callbacks/dump_validation_structures.yaml b/configs/callbacks/dump_validation_structures.yaml new file mode 100644 index 0000000..f872333 --- /dev/null +++ b/configs/callbacks/dump_validation_structures.yaml @@ -0,0 +1,6 @@ +dump_validation_structures_callback: + _target_: modelhub.callbacks.dump_validation_structures.DumpValidationStructuresCallback + save_dir: ${paths.output_dir}/val_structures + dump_predictions: False + one_model_per_file: False + dump_trajectories: False \ No newline at end of file diff --git a/configs/callbacks/metrics_logging.yaml b/configs/callbacks/metrics_logging.yaml new file mode 100644 index 0000000..860c32d --- /dev/null +++ b/configs/callbacks/metrics_logging.yaml @@ -0,0 +1,14 @@ +store_validation_metrics_in_df_callback: + _target_: modelhub.callbacks.metrics_logging.StoreValidationMetricsInDFCallback + save_dir: ${paths.output_dir}/val_metrics + metrics_to_save: "all" + +log_af3_validation_metrics_callback: + _target_: modelhub.callbacks.metrics_logging.LogAF3ValidationMetricsCallback + metrics_to_log: + # Only logs if present in the metric output dictionary + # Must be subset of metrics_to_save + - by_type_lddt + - all_atom_lddt + - distogram_loss + - distogram_comparisons \ No newline at end of file diff --git a/configs/callbacks/train_logging.yaml b/configs/callbacks/train_logging.yaml new file mode 100644 index 0000000..be8a1b2 --- /dev/null +++ b/configs/callbacks/train_logging.yaml @@ -0,0 +1,16 @@ +log_af3_training_losses_callback: + _target_: modelhub.callbacks.train_logging.LogAF3TrainingLossesCallback + log_every_n: 10 + log_full_batch_losses: true + +log_learning_rate_callback: + _target_: modelhub.callbacks.train_logging.LogLearningRateCallback + log_every_n: 10 + +log_model_parameters_callback: + _target_: modelhub.callbacks.train_logging.LogModelParametersCallback + +log_dataset_sampling_ratios_callback: + _target_: modelhub.callbacks.train_logging.LogDatasetSamplingRatiosCallback + + diff --git a/configs/dataloader/default.yaml b/configs/dataloader/default.yaml new file mode 100644 index 0000000..32ba843 --- /dev/null +++ b/configs/dataloader/default.yaml @@ -0,0 +1,15 @@ +train: + dataloader_params: + # These parameters will be unpacked as kwargs for the DataLoader + batch_size: 1 + num_workers: 2 + prefetch_factor: 3 + n_fallback_retries: 4 + +val: + dataloader_params: + # These parameters will be unpacked as kwargs for the DataLoader + batch_size: 1 + num_workers: 2 + prefetch_factor: 3 + n_fallback_retries: 0 # Disable fallback retries for validation \ No newline at end of file diff --git a/configs/datasets/af3.yaml b/configs/datasets/af3.yaml new file mode 100644 index 0000000..7153e9e --- /dev/null +++ b/configs/datasets/af3.yaml @@ -0,0 +1,21 @@ +# AF3 dataset configuration with monomer distillation + +defaults: + - base + # The @ symbol specifies the tree under which the item will be attached to the config + - train/pdb/af3_train_interface@train.pdb.sub_datasets.interface + - train/pdb/af3_train_pn_unit@train.pdb.sub_datasets.pn_unit + - train: + - monomer_distillation + - val/af3_validation@val.af3_validation + - _self_ + +# Dataloading pipeline to use +pipeline_target: datahub.pipelines.af3.build_af3_transform_pipeline + +# Dataset weighting +train: + pdb: + probability: 0.5 + monomer_distillation: + probability: 0.5 \ No newline at end of file diff --git a/configs/datasets/base.yaml b/configs/datasets/base.yaml new file mode 100644 index 0000000..0d63139 --- /dev/null +++ b/configs/datasets/base.yaml @@ -0,0 +1,12 @@ +# Base Transform defaults +diffusion_batch_size_train: 48 +diffusion_batch_size_inference: 5 + +n_recycles_train: 4 +n_recycles_validation: 10 + +n_msa: 1024 +crop_size: 384 +max_atoms_in_crop: 5000 + +key_to_balance: n_tokens_total \ No newline at end of file diff --git a/configs/datasets/train/monomer_distillation.yaml b/configs/datasets/train/monomer_distillation.yaml new file mode 100644 index 0000000..9d9e03f --- /dev/null +++ b/configs/datasets/train/monomer_distillation.yaml @@ -0,0 +1,38 @@ +monomer_distillation: + dataset: + _target_: datahub.datasets.datasets.StructuralDatasetWrapper + save_failed_examples_to_dir: ${paths.data.failed_examples_dir} + + # cif parser arguments + cif_parser_args: + cache_dir: null + load_from_cache: False + save_to_cache: False + + # metadata parser + dataset_parser: + _target_: datahub.datasets.parsers.GenericDFParser + pn_unit_iid_colnames: null + + # metadata dataset + dataset: + _target_: datahub.datasets.datasets.PandasDataset + name: af2fb_distillation + id_column: example_id + data: ${paths.data.monomer_distillation_parquet_dir}/af2_distillation_facebook.parquet + columns_to_load: + - example_id + - path + return_key: null + transform: + _target_: ${datasets.pipeline_target} + is_inference: False + protein_msa_dirs: [{"dir": "${paths.data.monomer_distillation_data_dir}/msa", "extension": ".a3m", "directory_depth": 2}] + rna_msa_dirs: [] + n_recycles: ${datasets.n_recycles_train} + crop_size: ${datasets.crop_size} + n_msa: ${datasets.n_msa} + diffusion_batch_size: ${datasets.diffusion_batch_size_train} + max_atoms_in_crop: ${datasets.max_atoms_in_crop} + crop_contiguous_probability: 0.25 + crop_spatial_probability: 0.75 diff --git a/configs/datasets/train/pdb/af3_train_interface.yaml b/configs/datasets/train/pdb/af3_train_interface.yaml new file mode 100644 index 0000000..6dd1152 --- /dev/null +++ b/configs/datasets/train/pdb/af3_train_interface.yaml @@ -0,0 +1,45 @@ +defaults: + - base + +dataset: + dataset_parser: + _target_: datahub.datasets.parsers.InterfacesDFParser + dataset: + name: interface + data: ${paths.data.pdb_data_dir}/interfaces_df_train.parquet + filters: + # filters common across all PDB datasets + - "deposition_date < '2021-09-30'" + - "resolution < 9.0" + - "num_polymer_pn_units <= 300" + - "cluster.notnull()" + # interface specific filters + - "~(pn_unit_1_non_polymer_res_names.notnull() and pn_unit_1_non_polymer_res_names.str.contains('${resolve_import:cifutils.constants,AF3_EXCLUDED_LIGANDS_REGEX}', regex=True))" + - "~(pn_unit_2_non_polymer_res_names.notnull() and pn_unit_2_non_polymer_res_names.str.contains('${resolve_import:cifutils.constants,AF3_EXCLUDED_LIGANDS_REGEX}', regex=True))" + - "is_inter_molecule" + columns_to_load: + # columns common across all PDB datasets + - example_id + - pdb_id + - assembly_id + - deposition_date + - resolution + - num_polymer_pn_units + - method + - cluster + - n_prot + - n_nuc + - n_ligand + - n_peptide + # interface specific columns + - pn_unit_1_iid + - pn_unit_2_iid + - pn_unit_1_non_polymer_res_names + - pn_unit_2_non_polymer_res_names + - is_inter_molecule + - all_pn_unit_iids_after_processing + - involves_loi + transform: + # interface-specific Transform pipeline parameters + crop_contiguous_probability: 0.0 + crop_spatial_probability: 1.0 diff --git a/configs/datasets/train/pdb/af3_train_pn_unit.yaml b/configs/datasets/train/pdb/af3_train_pn_unit.yaml new file mode 100644 index 0000000..fa4a220 --- /dev/null +++ b/configs/datasets/train/pdb/af3_train_pn_unit.yaml @@ -0,0 +1,41 @@ +defaults: + - base + +dataset: + dataset_parser: + _target_: datahub.datasets.parsers.PNUnitsDFParser + dataset: + name: pn_unit + data: ${paths.data.pdb_data_dir}/pn_units_df_train.parquet + filters: + # filters common across all PDB datasets + - "deposition_date < '2021-09-30'" + - "resolution < 9.0" + - "num_polymer_pn_units <= 300" + - "cluster.notnull()" + # pn_unit specific filters + - "~(q_pn_unit_non_polymer_res_names.notnull() and q_pn_unit_non_polymer_res_names.str.contains('${resolve_import:cifutils.constants,AF3_EXCLUDED_LIGANDS_REGEX}', regex=True))" + columns_to_load: + # columns common across all PDB datasets + - example_id + - pdb_id + - assembly_id + - deposition_date + - resolution + - num_polymer_pn_units + - method + - cluster + - n_prot + - n_nuc + - n_ligand + - n_peptide + - total_num_atoms_in_unprocessed_assembly + # pn_unit specific columns + - q_pn_unit_iid + - q_pn_unit_non_polymer_res_names + - all_pn_unit_iids_after_processing + - q_pn_unit_is_loi + transform: + # pn_unit-specific Transform pipeline parameters + crop_contiguous_probability: 0.3333333333333333 + crop_spatial_probability: 0.6666666666666667 diff --git a/configs/datasets/train/pdb/base.yaml b/configs/datasets/train/pdb/base.yaml new file mode 100644 index 0000000..4cd9918 --- /dev/null +++ b/configs/datasets/train/pdb/base.yaml @@ -0,0 +1,33 @@ +dataset: + _target_: datahub.datasets.datasets.StructuralDatasetWrapper + save_failed_examples_to_dir: ${paths.data.failed_examples_dir} + cif_parser_args: + cache_dir: null + load_from_cache: false + save_to_cache: false + dataset: + _target_: datahub.datasets.datasets.PandasDataset + # we will use the example_id as the unique column + id_column: example_id + # return all keys (do not subset) + return_key: null + transform: + # common Transform pipeline components for all PDB datasets + _target_: ${datasets.pipeline_target} + is_inference: False + protein_msa_dirs: ${paths.data.protein_msa_dirs} + rna_msa_dirs: ${paths.data.rna_msa_dirs} + n_recycles: ${datasets.n_recycles_train} + crop_size: ${datasets.crop_size} + n_msa: ${datasets.n_msa} + diffusion_batch_size: ${datasets.diffusion_batch_size_train} + max_atoms_in_crop: ${datasets.max_atoms_in_crop} + +weights: + _target_: datahub.samplers.calculate_weights_for_pdb_dataset_df + beta: 0.5 + alphas: + a_prot: 3.0 # 3 for AF-3 + a_nuc: 0.0 # 3 for AF-3 + a_ligand: 1.0 # 1 for AF-3 + a_loi: 5.0 # 5 for AF-3 \ No newline at end of file diff --git a/configs/datasets/val/af3_validation.yaml b/configs/datasets/val/af3_validation.yaml new file mode 100644 index 0000000..9ed04df --- /dev/null +++ b/configs/datasets/val/af3_validation.yaml @@ -0,0 +1,12 @@ +defaults: + - base + +dataset: + dataset_parser: + _target_: datahub.datasets.parsers.ValidationDFParserLikeAF3 + dataset: + _target_: datahub.datasets.datasets.PandasDataset + data: ${paths.data.pdb_data_dir}/entry_level_val_df.parquet + filters: + # NOTE: We exclude these examples from validation because they produce an error upon RDKit small molecule processing that causes a data loading fallback + - example_id not in ["{['validation']}{7erc}{1}{[]}", "{['validation']}{7qbs}{1}{[]}", "{['validation']}{7z0n}{1}{[]}"] diff --git a/configs/datasets/val/base.yaml b/configs/datasets/val/base.yaml new file mode 100644 index 0000000..76abb02 --- /dev/null +++ b/configs/datasets/val/base.yaml @@ -0,0 +1,26 @@ +dataset: + _target_: datahub.datasets.datasets.StructuralDatasetWrapper + save_failed_examples_to_dir: ${paths.data.failed_examples_dir} + cif_parser_args: + cache_dir: null + load_from_cache: False + save_to_cache: False + dataset: + _target_: datahub.datasets.datasets.PandasDataset + # we will use the example_id as the unique column + id_column: example_id + # return all keys (do not subset) + return_key: null + transform: + # common Transform pipeline components for all PDB datasets + _target_: ${datasets.pipeline_target} + is_inference: True + protein_msa_dirs: ${paths.data.protein_msa_dirs} + rna_msa_dirs: ${paths.data.rna_msa_dirs} + n_recycles: ${datasets.n_recycles_validation} + crop_size: null # do not crop for inference + n_msa: ${datasets.n_msa} + diffusion_batch_size: ${datasets.diffusion_batch_size_inference} + max_atoms_in_crop: null # do not crop for inference + return_atom_array: True # return atom array for inference +key_to_balance: ${datasets.key_to_balance} \ No newline at end of file diff --git a/configs/debug/default.yaml b/configs/debug/default.yaml new file mode 100644 index 0000000..28618e5 --- /dev/null +++ b/configs/debug/default.yaml @@ -0,0 +1,64 @@ +# @package _global_ + +defaults: + - override /logger: null + +# default debugging setup, runs 1 full epoch +# other debugging configs can inherit from this one + +# overwrite task name so debugging logs are stored in separate folder +task_name: "debug" + +extras: + ignore_warnings: False + enforce_tags: False + +# sets level of all command line loggers to 'DEBUG' +# https://hydra.cc/docs/tutorials/basic/running_your_app/logging/ +hydra: + job_logging: + root: + level: DEBUG + # use the below to also set hydra loggers to 'DEBUG' + verbose: True + +# Print example ID before forward pass +callbacks: + print_example_id_before_forward_pass: + _target_: modelhub.callbacks.train_logging.PrintExampleIDBeforeForwardPassCallback + +dataloader: + train: + dataloader_params: + batch_size: 1 + num_workers: 0 # debuggers don't like multiprocessing -- work on main thread + pin_memory: False # disable gpu memory pin + prefetch_factor: null # must be null for num_workers=0 + n_fallback_retries: 0 # disable fallback retries for debugging + + val: + dataloader_params: + batch_size: 1 + num_workers: 0 + pin_memory: False + prefetch_factor: null # must be null for num_workers=0 + + +datasets: + crop_size: 100 # set small crop size for quick debugging + diffusion_batch_size_train: 1 + diffusion_batch_size_inference: 1 + n_recycles_train: 1 + n_recycles_validation: 1 + n_msa: 128 + key_to_balance: null # otherwise big examples will be processed first + +trainer: + devices_per_node: 1 + limit_train_batches: 1 + limit_val_batches: 1 + validate_every_n_epochs: 1 + +# Set tags to help identify debugging runs +tags: + - debug \ No newline at end of file diff --git a/configs/debug/train_specific_examples.yaml b/configs/debug/train_specific_examples.yaml new file mode 100644 index 0000000..22d8680 --- /dev/null +++ b/configs/debug/train_specific_examples.yaml @@ -0,0 +1,21 @@ +# @package _global_ + +# See: https://hydra.cc/docs/patterns/configuring_experiments/ + +# to execute this experiment run: +# python train.py +debug=train_single_example [any other arguments] + +defaults: + - default + - gpu + +datasets: + # you can add specific example IDs here to load a subset of the dataset (training) + subset_to_example_ids: + - "{['pdb', 'pn_units']}{3px1}{1}{['A_3']}" + val: null + +tags: + - debug + - train + - specific-examples \ No newline at end of file diff --git a/configs/experiment/ncorley/af3-elements-as-ligand-atom-names.yaml b/configs/experiment/ncorley/af3-elements-as-ligand-atom-names.yaml new file mode 100644 index 0000000..d9d1162 --- /dev/null +++ b/configs/experiment/ncorley/af3-elements-as-ligand-atom-names.yaml @@ -0,0 +1,83 @@ +# @package _global_ + +name: af3-elements-as-ligand-atom-names + +# For explanation of the "override" syntax, see: https://hydra.cc/docs/upgrades/1.0_to_1.1/defaults_list_override/ +defaults: + - override /trainer: af3 + - override /datasets: af3 + - override /model: af3 + +tags: + # list of tags to add to the run ( & on wandb to easily find & filter runs) + - atom-names + - experiment + +project: af3 + +ckpt_path: /projects/ml/modelhub/inference/weights_with_no_confidence_2025_1_21_new_modelhub.ckpt + +model: + lr_scheduler: + base_lr: 0.9e-3 # 1/2 of original learning rate (1.8e-3) + +datasets: + train: + pdb: + probability: 1.0 + sub_datasets: + interface: + dataset: + transform: + use_element_for_atom_names_of_atomized_tokens: True + dataset: + # Same as AF-3 training, but limit to protein-ligand interfaces + filters: + # (from before) + - "deposition_date < '2021-09-30'" + - "resolution < 9.0" + - "num_polymer_pn_units <= 300" + - "cluster.notnull()" + - > + ~(pn_unit_1_non_polymer_res_names.notnull() and + pn_unit_1_non_polymer_res_names.str.contains( + '${resolve_import:cifutils.constants,AF3_EXCLUDED_LIGANDS_REGEX}', + regex=True)) + - > + ~(pn_unit_2_non_polymer_res_names.notnull() and + pn_unit_2_non_polymer_res_names.str.contains( + '${resolve_import:cifutils.constants,AF3_EXCLUDED_LIGANDS_REGEX}', + regex=True)) + - "is_inter_molecule" + + # only protein-ligand interfaces + - "(n_prot == 1 and n_nuc == 0 and n_ligand == 1)" + pn_unit: + dataset: + transform: + use_element_for_atom_names_of_atomized_tokens: True + dataset: + # Same as AF-3 training, but limit to protein-ligand interfaces + filters: + # (from before) + - "deposition_date < '2021-09-30'" + - "resolution < 9.0" + - "num_polymer_pn_units <= 300" + - "cluster.notnull()" + - "~(q_pn_unit_non_polymer_res_names.notnull() and q_pn_unit_non_polymer_res_names.str.contains('${resolve_import:cifutils.constants,AF3_EXCLUDED_LIGANDS_REGEX}', regex=True))" + + # only proteins or ligands + - "(n_prot == 1 or n_ligand == 1)" + # Datasets set to null are ignored + monomer_distillation: null + val: + af3_validation: + dataset: + transform: + use_element_for_atom_names_of_atomized_tokens: True + dataset: + filters: + - "n_tokens_total < 400" + - "interfaces_to_score.str.contains('protein-ligand')" + # Exclude example where RDKit errors + - example_id not in ["{['validation']}{7qbs}{1}{[]}"] diff --git a/configs/experiment/ncorley/af3-fine-tune-bfloat-msa.yaml b/configs/experiment/ncorley/af3-fine-tune-bfloat-msa.yaml new file mode 100644 index 0000000..cd9486b --- /dev/null +++ b/configs/experiment/ncorley/af3-fine-tune-bfloat-msa.yaml @@ -0,0 +1,30 @@ +# @package _global_ + +name: af3 + +defaults: + - override /datasets: af3 + - override /model: af3 + - override /trainer: af3 + +tags: + - af3 + - fine-tune + +project: af3 + +ckpt_path: /projects/ml/modelhub/inference/rf2aa-af3-repro7_ep680.pt + +model: + lr_scheduler: + base_lr: 0.9e-3 # 1/2 of original learning rate (1.8e-3) + +# Protein-ligand only for speed +val: + af3_validation: + dataset: + dataset: + filters: + # Only score examples with protein-ligand interfaces + - "interfaces_to_score.str.contains('protein-ligand')" + - example_id not in ["{['validation']}{7qbs}{1}{[]}"] \ No newline at end of file diff --git a/configs/experiment/ncorley/af3-new-msas-pdb-only.yaml b/configs/experiment/ncorley/af3-new-msas-pdb-only.yaml new file mode 100644 index 0000000..9879934 --- /dev/null +++ b/configs/experiment/ncorley/af3-new-msas-pdb-only.yaml @@ -0,0 +1,37 @@ +# @package _global_ + +name: af3-new-msas-pdb-only + +# For explanation of the "override" syntax, see: https://hydra.cc/docs/upgrades/1.0_to_1.1/defaults_list_override/ +defaults: + - override /trainer: af3 + - override /datasets: af3 + - override /model: af3 + +tags: + # list of tags to add to the run ( & on wandb to easily find & filter runs) + - msas + - experiment + +project: af3 + +paths: + data: + protein_msa_dirs: + - {"dir": "/projects/msa/nvidia_renamed_with_seq_hash/maxseq_10k", "extension": ".a3m.gz", "directory_depth": 2} + - {"dir": "/projects/msa/rf2aa_af3/rf2aa_paper_model_protein_msas", "extension": ".a3m.gz", "directory_depth": 2} + - {"dir": "/projects/msa/rf2aa_af3/missing_msas_through_2024_08_12", "extension": ".msa0.a3m.gz", "directory_depth": 2} + +datasets: + train: + pdb: + # We must adjust the probability, since we set the monomer distillation dataset to null + probability: 1.0 + # Datasets set to null are ignored + monomer_distillation: null + val: + af3_validation: + dataset: + dataset: + filters: + - "n_tokens_total < 400" diff --git a/configs/experiment/ncorley/af3-old-msas-pdb-only.yaml b/configs/experiment/ncorley/af3-old-msas-pdb-only.yaml new file mode 100644 index 0000000..523a785 --- /dev/null +++ b/configs/experiment/ncorley/af3-old-msas-pdb-only.yaml @@ -0,0 +1,13 @@ +# @package _global_ + +name: af3-old-msas-pdb-only + +defaults: + - af3-new-msas-pdb-only + +paths: + data: + protein_msa_dirs: + - {"dir": "/projects/msa/rf2aa_af3/rf2aa_paper_model_protein_msas", "extension": ".a3m.gz", "directory_depth": 2} + - {"dir": "/projects/msa/rf2aa_af3/missing_msas_through_2024_08_12", "extension": ".msa0.a3m.gz", "directory_depth": 2} + - {"dir": "/projects/msa/nvidia_renamed_with_seq_hash/maxseq_10k", "extension": ".a3m.gz", "directory_depth": 2} diff --git a/configs/experiment/none-00-dummy.yaml b/configs/experiment/none-00-dummy.yaml new file mode 100644 index 0000000..fede952 --- /dev/null +++ b/configs/experiment/none-00-dummy.yaml @@ -0,0 +1,15 @@ +# @package _global_ + +# NOTE: Dummy experiment that you can use to just run the code +# . For actual experiments, please create a new experiment config from copying the template 'user-XX-template.yaml' + + +name: none-00-dummy + +tags: + # list of tags to add to the run ( & on wandb to easily find & filter runs) + - experiment + - dummy + +project: test + diff --git a/configs/experiment/pretrained/af3.yaml b/configs/experiment/pretrained/af3.yaml new file mode 100644 index 0000000..5b6659b --- /dev/null +++ b/configs/experiment/pretrained/af3.yaml @@ -0,0 +1,15 @@ +# @package _global_ + +name: af3 + +defaults: + - override /datasets: af3 + - override /model: af3 + - override /trainer: af3 + +tags: + - af3 + +project: af3 + +ckpt_path: /projects/ml/modelhub/inference/rf2aa-af3-repro7_ep680.pt diff --git a/configs/experiment/pretrained/af3_with_confidence.yaml b/configs/experiment/pretrained/af3_with_confidence.yaml new file mode 100644 index 0000000..749c410 --- /dev/null +++ b/configs/experiment/pretrained/af3_with_confidence.yaml @@ -0,0 +1,15 @@ +# @package _global_ + +name: af3-with-confidence + +defaults: + - override /datasets: af3 + - override /model: af3_with_confidence + - override /trainer: af3_with_confidence + +tags: + - af3 + +project: af3 + +ckpt_path: /projects/ml/modelhub/inference/weights_with_confidence_2025_2_27_new_modelhub.ckpt \ No newline at end of file diff --git a/configs/experiment/quick-af3-with-confidence.yaml b/configs/experiment/quick-af3-with-confidence.yaml new file mode 100644 index 0000000..dc102bd --- /dev/null +++ b/configs/experiment/quick-af3-with-confidence.yaml @@ -0,0 +1,14 @@ +# @package _global_ + +# Experiment that loads a small dataset for quick testing + +name: quick-af3-with-confidence + +# For explanation of the "override" syntax, see: https://hydra.cc/docs/upgrades/1.0_to_1.1/defaults_list_override/ +defaults: + - quick-af3 + - override /model: af3_with_confidence + - override /trainer: af3_with_confidence + - _self_ + +ckpt_path: /projects/ml/modelhub/inference/weights_with_confidence_2025_2_27_new_modelhub.ckpt diff --git a/configs/experiment/quick-af3.yaml b/configs/experiment/quick-af3.yaml new file mode 100644 index 0000000..c16f721 --- /dev/null +++ b/configs/experiment/quick-af3.yaml @@ -0,0 +1,50 @@ +# @package _global_ + +# Experiment that loads a small dataset for quick testing + +name: quick-af3 + +# For explanation of the "override" syntax, see: https://hydra.cc/docs/upgrades/1.0_to_1.1/defaults_list_override/ +defaults: + - override /trainer: af3 + - override /datasets: af3 + - override /model: af3 + +tags: + # list of tags to add to the run ( & on wandb to easily find & filter runs) + - quick + +project: test + +ckpt_path: /projects/ml/modelhub/inference/rf2aa-af3-repro7_ep680.pt + +datasets: + train: + pdb: + # We must adjust the probability, since we set the monomer distillation dataset to null + probability: 1.0 + sub_datasets: + interface: + dataset: + dataset: + # A small dataframe that loads quickly + data: /projects/ml/datahub/dfs/pdb/test_dfs/interfaces_df.parquet + filters: + - "num_polymer_pn_units <= 2" + - "cluster.notnull()" + pn_unit: + dataset: + dataset: + # A small dataframe that loads quickly + data: /projects/ml/datahub/dfs/pdb/test_dfs/pn_units_df.parquet + filters: + - "num_polymer_pn_units <= 2" + - "cluster.notnull()" + # Datasets set to null are ignored + monomer_distillation: null + val: + af3_validation: + dataset: + dataset: + filters: + - "n_tokens_total < 200" diff --git a/configs/hydra/default.yaml b/configs/hydra/default.yaml new file mode 100644 index 0000000..6a4ad2f --- /dev/null +++ b/configs/hydra/default.yaml @@ -0,0 +1,18 @@ +# https://hydra.cc/docs/configure_hydra/intro/ + +# enable color logging (requires `colorlog` to be installed) +# defaults: +# - override hydra_logging: colorlog +# - override job_logging: colorlog + + +# output directory, generated dynamically on each run +run: + dir: ${paths.log_dir}/${task_name}/${name}/${now:%Y-%m-%d}_${now:%H-%M} + +# ... this is where the log file is written (i.e. the programs output) +job_logging: + handlers: + file: + # Incorporates fix from https://github.com/facebookresearch/hydra/pull/2242 + filename: ${hydra.runtime.output_dir}/experiment.log \ No newline at end of file diff --git a/configs/hydra/no_logging.yaml b/configs/hydra/no_logging.yaml new file mode 100644 index 0000000..f25f45d --- /dev/null +++ b/configs/hydra/no_logging.yaml @@ -0,0 +1,7 @@ +defaults: + - override job_logging: disabled + - override hydra_logging: disabled + +output_subdir: null +run: + dir: . \ No newline at end of file diff --git a/configs/inference.yaml b/configs/inference.yaml new file mode 100644 index 0000000..a720496 --- /dev/null +++ b/configs/inference.yaml @@ -0,0 +1,7 @@ +# @package _global_ +# ^ The "package" determines where the content of the config is placed in the output config +# For more information about overriding configs, see: https://hydra.cc/docs/advanced/overriding_packages/#overriding-packages-using-the-defaults-list + +defaults: + - inference_engine: ??? + - _self_ \ No newline at end of file diff --git a/configs/inference_engine/af3.yaml b/configs/inference_engine/af3.yaml new file mode 100644 index 0000000..8e540df --- /dev/null +++ b/configs/inference_engine/af3.yaml @@ -0,0 +1,22 @@ +# @package _global_ + +defaults: + - base + - _self_ + +_target_: modelhub.inference_engines.af3.AF3InferenceEngine + +ckpt_path: /net/tukwila/ncorley/modelhub/inference/modelhub_latest.ckpt + +n_recycles: 10 +diffusion_batch_size: 5 +residue_renaming_dict: null +num_steps: 50 +solver: "af3" +print_config: true +seed: null +skip_existing: true + +dump_predictions: true +dump_trajectories: false +one_model_per_file: false diff --git a/configs/inference_engine/base.yaml b/configs/inference_engine/base.yaml new file mode 100644 index 0000000..bd0f156 --- /dev/null +++ b/configs/inference_engine/base.yaml @@ -0,0 +1,10 @@ +# @package _global_ + +defaults: + - /hydra: no_logging + +ckpt_path: ??? +inputs: ??? +out_dir: ./ +num_nodes: 1 +devices_per_node: 1 diff --git a/configs/logger/csv.yaml b/configs/logger/csv.yaml new file mode 100644 index 0000000..c424bf2 --- /dev/null +++ b/configs/logger/csv.yaml @@ -0,0 +1,6 @@ +# https://lightning.ai/docs/fabric/latest/api/generated/lightning.fabric.loggers.CSVLogger.html#lightning.fabric.loggers.CSVLogger + +csv: + _target_: lightning.fabric.loggers.CSVLogger + root_dir: ${paths.output_dir} + flush_logs_every_n_steps: 1 \ No newline at end of file diff --git a/configs/logger/default.yaml b/configs/logger/default.yaml new file mode 100644 index 0000000..fc464de --- /dev/null +++ b/configs/logger/default.yaml @@ -0,0 +1,3 @@ +defaults: + - wandb + - csv \ No newline at end of file diff --git a/configs/logger/wandb.yaml b/configs/logger/wandb.yaml new file mode 100644 index 0000000..44293fb --- /dev/null +++ b/configs/logger/wandb.yaml @@ -0,0 +1,14 @@ +# https://wandb.ai + +wandb: + _target_: wandb.integration.lightning.fabric.WandbLogger + save_dir: ${paths.output_dir} + offline: False + id: null # pass correct id (along with checkpoint path, and resume='allow' or 'must') to resume a run + anonymous: null # enable anonymous logging + project: ${project} + prefix: "" # a string to put at the beginning of metric keys + log_model: False # do not upload model checkpoints + tags: ${tags} + # (Default resume to "never" to avoid accidentally resuming runs; we want to be explicit about resuming) + resume: never # never, allow, or must (see: https://docs.wandb.ai/guides/runs/resuming/) diff --git a/configs/model/af3.yaml b/configs/model/af3.yaml new file mode 100644 index 0000000..73d2639 --- /dev/null +++ b/configs/model/af3.yaml @@ -0,0 +1,7 @@ +defaults: + - optimizers/adam@optimizer + - schedulers/af3@lr_scheduler + - components/ema@ema + - components/af3_net@net + + diff --git a/configs/model/af3_with_confidence.yaml b/configs/model/af3_with_confidence.yaml new file mode 100644 index 0000000..f71df26 --- /dev/null +++ b/configs/model/af3_with_confidence.yaml @@ -0,0 +1,5 @@ +defaults: + - af3 + - components/af3_net_with_confidence_head@net + + diff --git a/configs/model/components/af3_net.yaml b/configs/model/components/af3_net.yaml new file mode 100644 index 0000000..daf4bbc --- /dev/null +++ b/configs/model/components/af3_net.yaml @@ -0,0 +1,177 @@ +# Model architecture +_target_: modelhub.model.AF3.AF3 + +# +---------- Channel dimensions ----------+ +c_s: 384 +c_z: 128 +c_atom: 128 +c_atompair: 16 +c_s_inputs: 449 # TODO: What is this? + +# +---------- Feature embedding ----------+ +feature_initializer: + # InputFeatureEmbedder + input_feature_embedder: + features: + - restype + - profile + - deletion_mean + atom_attention_encoder: + c_token: 384 + c_atom_1d_features: 389 + c_tokenpair: ${model.net.c_z} + atom_1d_features: + - ref_pos + - ref_charge + - ref_mask + - ref_element + - ref_atom_name_chars + atom_transformer: + n_queries: 32 + n_keys: 128 + l_max: 40_000 # does not matter + diffusion_transformer: + n_block: 3 + diffusion_transformer_block: + n_head: 4 + no_residual_connection_between_attention_and_transition: true + kq_norm: false + + # RelativePositionEncoding + relative_position_encoding: + r_max: 32 + s_max: 2 + +# +---------- Recycler ----------+ +recycler: + # Pairformer + n_pairformer_blocks: 48 + pairformer_block: + p_drop: 0.25 + triangle_multiplication: + d_hidden: 128 + triangle_attention: + n_head: 4 + d_hidden: 32 + attention_pair_bias: + n_head: 16 + + # TemplateEmbedder + template_embedder: + n_block: 2 + raw_template_dim: 108 + c: 64 + p_drop: 0.25 + + # MSA module + msa_module: + n_block: 4 + c_m: 64 + p_drop_msa: 0.15 + p_drop_pair: 0.25 + msa_subsample_embedder: + num_sequences: 1024 + dim_raw_msa: 34 + c_s_inputs: ${model.net.c_s_inputs} + c_msa_embed: ${model.net.recycler.msa_module.c_m} + outer_product: + c_msa_embed: ${model.net.recycler.msa_module.c_m} + c_outer_product: 32 + c_out: ${model.net.c_z} + msa_pair_weighted_averaging: + n_heads: 8 + c_weighted_average: 32 + c_msa_embed: ${model.net.recycler.msa_module.c_m} + c_z: ${model.net.c_z} + separate_gate_for_every_channel: true + msa_transition: + n: 4 + c: ${model.net.recycler.msa_module.c_m} + triangle_multiplication_outgoing: + d_pair: ${model.net.c_z} + d_hidden: 128 + bias: True + triangle_multiplication_incoming: + d_pair: ${model.net.c_z} + d_hidden: 128 + bias: True + triangle_attention_starting: + d_pair: ${model.net.c_z} + n_head: 4 + d_hidden: 32 + p_drop: 0.0 # This does not do anything: TODO: Remove + triangle_attention_ending: + d_pair: ${model.net.c_z} + n_head: 4 + d_hidden: 32 + p_drop: 0.0 # This does not do anything; TODO: Remove + pair_transition: + n: 4 + c: ${model.net.c_z} + +# +---------- Diffusion module ----------+ +diffusion_module: + sigma_data: 16 + c_token: 768 + f_pred: edm + diffusion_conditioning: + c_s_inputs: ${model.net.c_s_inputs} + c_t_embed: 256 + relative_position_encoding: + r_max: 32 + s_max: 2 + atom_attention_encoder: + c_tokenpair: ${model.net.c_z} + c_atom_1d_features: 389 + atom_1d_features: + - ref_pos + - ref_charge + - ref_mask + - ref_element + - ref_atom_name_chars + atom_transformer: + n_queries: 32 + n_keys: 128 + l_max: ${model.net.feature_initializer.input_feature_embedder.atom_attention_encoder.atom_transformer.l_max} + diffusion_transformer: + n_block: 3 + diffusion_transformer_block: + n_head: 4 + no_residual_connection_between_attention_and_transition: true + kq_norm: false + broadcast_trunk_feats_on_1dim_old: false + use_chiral_features: true + diffusion_transformer: + n_block: 24 + diffusion_transformer_block: + n_head: 16 + no_residual_connection_between_attention_and_transition: true + kq_norm: true + atom_attention_decoder: + atom_transformer: + n_queries: 32 + n_keys: 128 + l_max: ${model.net.feature_initializer.input_feature_embedder.atom_attention_encoder.atom_transformer.l_max} + diffusion_transformer: + n_block: 3 + diffusion_transformer_block: + n_head: 4 + no_residual_connection_between_attention_and_transition: true + kq_norm: false +distogram_head: + bins: 65 + +# +---------- Inference sampler ----------+ +inference_sampler: + solver: "af3" + num_timesteps: 200 + min_t: 0 + max_t: 1 + sigma_data: ${model.net.diffusion_module.sigma_data} + s_min: 4e-4 + s_max: 160 + p: 7 + gamma_0: 0.8 + gamma_min: 1.0 + noise_scale: 1.003 + step_scale: 1.5 \ No newline at end of file diff --git a/configs/model/components/af3_net_with_confidence_head.yaml b/configs/model/components/af3_net_with_confidence_head.yaml new file mode 100644 index 0000000..a1def35 --- /dev/null +++ b/configs/model/components/af3_net_with_confidence_head.yaml @@ -0,0 +1,45 @@ +defaults: + - af3_net + +# Model architecture +_target_: modelhub.model.AF3.AF3WithConfidence + +# +---------- Mini rollout sampler ----------+ +# From the AF-3 main text: +# > ...To remedy this, we developed a diffusion ‘rollout’ procedure for the full-structure prediction generation during training (using a larger step size than normal) +# They do not further elaborate on how they adjusted the step size during diffusion rollout, but this may be a fruitful area of exploration moving forwards +mini_rollout_sampler: + solver: "af3" + num_timesteps: 20 # 20 timesteps for the mini-rollout (vs. 200 for the full rollout during inference) + min_t: 0 + max_t: 1 + sigma_data: ${model.net.diffusion_module.sigma_data} + s_min: 4e-4 + s_max: 160 + p: 7 + gamma_0: 0.8 + gamma_min: 1.0 + noise_scale: 1.003 + step_scale: 1.5 + +# +---------- Confidence head architecture ----------+ +confidence_head: + c_s: ${model.net.c_s} + c_z: ${model.net.c_z} + n_pairformer_layers: 4 + pairformer: + p_drop: 0.25 + triangle_multiplication: + d_hidden: 128 + triangle_attention: + n_head: 4 + d_hidden: 32 + attention_pair_bias: + n_head: 16 + n_bins_pae: 64 + n_bins_pde: 64 + n_bins_plddt: 50 + n_bins_exp_resolved: 2 + use_Cb_distances: False + use_af3_style_binning_and_final_layer_norms: True + symmetrize_Cb_logits: True \ No newline at end of file diff --git a/configs/model/components/ema.yaml b/configs/model/components/ema.yaml new file mode 100644 index 0000000..e6f9696 --- /dev/null +++ b/configs/model/components/ema.yaml @@ -0,0 +1 @@ +decay: 0.999 # From AF-3 \ No newline at end of file diff --git a/configs/model/optimizers/adam.yaml b/configs/model/optimizers/adam.yaml new file mode 100644 index 0000000..c17d9ae --- /dev/null +++ b/configs/model/optimizers/adam.yaml @@ -0,0 +1,5 @@ +# Optimizer +_target_: torch.optim.Adam +lr: 0 # Will be set by the scheduler (starts at 0, increasing to `base_lr`) +betas: [0.9, 0.95] +eps: 1.0e-8 \ No newline at end of file diff --git a/configs/model/schedulers/af3.yaml b/configs/model/schedulers/af3.yaml new file mode 100644 index 0000000..7c2a65d --- /dev/null +++ b/configs/model/schedulers/af3.yaml @@ -0,0 +1,6 @@ +# Learning rate scheduler +_target_: modelhub.training.schedulers.AF3Scheduler +base_lr: 1.8e-3 +warmup_steps: 1000 +decay_factor: 0.95 +decay_steps: 50000 \ No newline at end of file diff --git a/configs/paths/data/default.yaml b/configs/paths/data/default.yaml new file mode 100644 index 0000000..3307957 --- /dev/null +++ b/configs/paths/data/default.yaml @@ -0,0 +1,23 @@ +# path to directory with training splits +pdb_data_dir: /projects/ml/datahub/dfs/af3_splits/2024_12_16/ + +# fb monomer distillation dataset +monomer_distillation_data_dir: /squash/af2_distillation_facebook/ +monomer_distillation_parquet_dir: /projects/ml/datahub/dfs/distillation/af2_distillation_facebook + +# path(s) to search for protein MSAs (for PDB datasets) +protein_msa_dirs: + - {"dir": "/projects/msa/rf2aa_af3/rf2aa_paper_model_protein_msas", "extension": ".a3m.gz", "directory_depth": 2} + - {"dir": "/projects/msa/rf2aa_af3/missing_msas_through_2024_08_12", "extension": ".msa0.a3m.gz", "directory_depth": 2} + - {"dir": "/net/scratch/mkazman/msa/validate_no_leak_taxid", "extension": ".a3m.gz", "directory_depth": 2} + - {"dir": "/net/scratch/mkazman/msa/missing_antibody_msas", "extension": ".a3m.gz", "directory_depth": 2} + - {"dir": "/net/scratch/mkazman/msa/post_training_cutoff_msas/processed_nested", "extension": ".a3m.gz", "directory_depth": 2} + - {"dir": "/net/scratch/mkazman/msa/post_training_cutoff_msas/extra_seqs_processed_nested", "extension": ".a3m.gz", "directory_depth": 2} + - {"dir": "/projects/msa/nvidia_renamed_with_seq_hash/maxseq_10k", "extension": ".a3m.gz", "directory_depth": 2} + +# path(s) to search for RNA MSAs +rna_msa_dirs: + - {"dir": "/projects/msa/rf2aa_af3/rf2aa_paper_model_rna_msas", "extension": ".afa", "directory_depth": 0} + +# path to save examples that fail during the Transform pipeline (null = do not save) +failed_examples_dir: null \ No newline at end of file diff --git a/configs/paths/default.yaml b/configs/paths/default.yaml new file mode 100644 index 0000000..2643495 --- /dev/null +++ b/configs/paths/default.yaml @@ -0,0 +1,21 @@ +# NOTE: order of defaults determines the order in which configs override each other (higher up items are overridden by lower items) +defaults: + - _self_ + - data: default + +# path to root directory (requires the `PROJECT_ROOT` environment variable to be set) +#  NOTE: This variable is auto-set upon loading via `rootutils` +root_dir: ${oc.env:PROJECT_ROOT} + +# where to store data (checkpoints, logs, etc.) of all experiments in general +# (this influences the output_dir in the hydra/default.yaml config) +# change this to e.g. /scratch if you are running larger experiments with lots lof logs, checkpoints, etc. +log_dir: ${.root_dir}/logs/ + +# path to output directory for this specific run, created dynamically by hydra +# path generation pattern is specified in `configs/hydra/default.yaml` +# use it to store all files generated during the run, like ckpts and metrics +output_dir: ${hydra:runtime.output_dir} + +# path to working directory (auto-generated by hydra) +work_dir: ${hydra:runtime.cwd} \ No newline at end of file diff --git a/configs/train.yaml b/configs/train.yaml new file mode 100644 index 0000000..75f5766 --- /dev/null +++ b/configs/train.yaml @@ -0,0 +1,42 @@ +# @package _global_ +# ^ The "package" determines where the content of the config is placed in the output config +# For more information about overriding configs, see: https://hydra.cc/docs/advanced/overriding_packages/#overriding-packages-using-the-defaults-list + +# NOTE: order of defaults determines the order in which configs override each other (higher up items are overridden by lower items) +defaults: + - callbacks: default + - logger: csv + - trainer: ??? + - paths: default + - datasets: ??? + - dataloader: default + - hydra: default + - model: ??? + # We must keep _self_ before experiment and debug to ensure that the experiment and debug configs can override + - _self_ + + # experiment configs allow for version control of specific hyperparameters + # e.g. best hyperparameters for given model and datamodule + - experiment: ??? + + # debug configs to add onto any experiment for quickly testing or debugging code + - debug: null + + +# DO NOT set these here. Set them in the relevant experiment config file. +# ... these are just here to ensure users always specify these fields in their experiment configs. +name: ??? +tags: ??? + +# NOTE: These values will be overwritten by the experiment config if they are set there. They are just provided as defaults +# here. +# ... task name (determines the output directory path) +task_name: "train" + +project: ??? # required for W&B logging + +seed: 1 + +# Provide checkpoint path to resume training from a checkpoint +# NOTE: If using W&B, must also set the `id` and `resume` fields in the `logger/wandb` config +ckpt_path: null diff --git a/configs/trainer/af3.yaml b/configs/trainer/af3.yaml new file mode 100644 index 0000000..8553c69 --- /dev/null +++ b/configs/trainer/af3.yaml @@ -0,0 +1,20 @@ +defaults: + - ddp + - loss: structure_prediction + - metrics: structure_prediction + +_target_: modelhub.trainers.af3.AF3Trainer +validate_every_n_epochs: 1 +max_epochs: 10_000 +n_examples_per_epoch: 24000 +prevalidate: True + +# We must pre-specify the number of recycles during training so we can pre-sample recycles per batch consistently for each GPU +n_recycles_train: ${datasets.n_recycles_train} + +clip_grad_max_norm: 10.0 + +output_dir: ${paths.output_dir} +checkpoint_every_n_epochs: 1 + +# precision: bf16-mixed # Mixed precision training with bfloat16 (currently does not work) diff --git a/configs/trainer/af3_with_confidence.yaml b/configs/trainer/af3_with_confidence.yaml new file mode 100644 index 0000000..33c12e9 --- /dev/null +++ b/configs/trainer/af3_with_confidence.yaml @@ -0,0 +1,5 @@ +defaults: + - af3 + - override loss: structure_prediction_with_confidence + +_target_: modelhub.trainers.af3.AF3TrainerWithConfidence \ No newline at end of file diff --git a/configs/trainer/cpu.yaml b/configs/trainer/cpu.yaml new file mode 100644 index 0000000..e7fd1cb --- /dev/null +++ b/configs/trainer/cpu.yaml @@ -0,0 +1,6 @@ +defaults: + - af3 + +accelerator: cpu +devices_per_node: 1 +num_nodes: 1 \ No newline at end of file diff --git a/configs/trainer/ddp.yaml b/configs/trainer/ddp.yaml new file mode 100644 index 0000000..701c9fa --- /dev/null +++ b/configs/trainer/ddp.yaml @@ -0,0 +1,5 @@ +strategy: ddp + +accelerator: gpu +devices_per_node: 1 +num_nodes: 1 diff --git a/configs/trainer/loss/losses/confidence_loss.yaml b/configs/trainer/loss/losses/confidence_loss.yaml new file mode 100644 index 0000000..afea09d --- /dev/null +++ b/configs/trainer/loss/losses/confidence_loss.yaml @@ -0,0 +1,29 @@ +_target_: modelhub.loss.af3_confidence_loss.ConfidenceLoss +weight: 1.0 + +plddt: + weight: 1.0 + n_bins: 50 + max_value: 1.0 + +pae: + weight: 1.0 + n_bins: 64 + max_value: 32 + +pde: + weight: 1.0 + n_bins: 64 + max_value: 32 + +exp_resolved: + weight: 1.0 + n_bins: 2 + max_value: 1 + +# Adds to loss_dict true and predicted average plddt, pae, and pde per batch, also info about the spread and correlation of those values within a batch +log_statistics: True + +rank_loss: + use_listnet_loss: False + weight: 0.0 diff --git a/configs/trainer/loss/losses/diffusion_loss.yaml b/configs/trainer/loss/losses/diffusion_loss.yaml new file mode 100644 index 0000000..8d19e02 --- /dev/null +++ b/configs/trainer/loss/losses/diffusion_loss.yaml @@ -0,0 +1,9 @@ +_target_: modelhub.loss.af3_losses.DiffusionLoss +weight: 4.0 +sigma_data: ${model.net.diffusion_module.sigma_data} +alpha_dna: 5 +alpha_rna: 5 +alpha_ligand: 10 +edm_lambda: True +se3_invariant_loss: True +clamp_diffusion_loss: False \ No newline at end of file diff --git a/configs/trainer/loss/losses/distogram_loss.yaml b/configs/trainer/loss/losses/distogram_loss.yaml new file mode 100644 index 0000000..27b7782 --- /dev/null +++ b/configs/trainer/loss/losses/distogram_loss.yaml @@ -0,0 +1,2 @@ +_target_: modelhub.loss.af3_losses.DistogramLoss +weight: 3e-2 \ No newline at end of file diff --git a/configs/trainer/loss/structure_prediction.yaml b/configs/trainer/loss/structure_prediction.yaml new file mode 100644 index 0000000..d1146e3 --- /dev/null +++ b/configs/trainer/loss/structure_prediction.yaml @@ -0,0 +1,4 @@ +defaults: + # Note that the SmoothedLDDTLoss is included within the DiffusionLoss + - losses/diffusion_loss@diffusion_loss + - losses/distogram_loss@distogram_loss diff --git a/configs/trainer/loss/structure_prediction_with_confidence.yaml b/configs/trainer/loss/structure_prediction_with_confidence.yaml new file mode 100644 index 0000000..03b507c --- /dev/null +++ b/configs/trainer/loss/structure_prediction_with_confidence.yaml @@ -0,0 +1,2 @@ +defaults: + - losses/confidence_loss@confidence_loss \ No newline at end of file diff --git a/configs/trainer/metrics/structure_prediction.yaml b/configs/trainer/metrics/structure_prediction.yaml new file mode 100644 index 0000000..aa2ae36 --- /dev/null +++ b/configs/trainer/metrics/structure_prediction.yaml @@ -0,0 +1,8 @@ +by_type_lddt: + _target_: modelhub.metrics.lddt.ByTypeLDDT +all_atom_lddt: + _target_: modelhub.metrics.lddt.AllAtomLDDT +distogram: + _target_: modelhub.metrics.distogram.DistogramLoss +distogram_comparisons: + _target_: modelhub.metrics.distogram.DistogramComparisons diff --git a/configs/validate.yaml b/configs/validate.yaml new file mode 100644 index 0000000..f6c9da6 --- /dev/null +++ b/configs/validate.yaml @@ -0,0 +1,49 @@ +# @package _global_ +# ^ The "package" determines where the content of the config is placed in the output config +# For more information about overriding configs, see: https://hydra.cc/docs/advanced/overriding_packages/#overriding-packages-using-the-defaults-list + +# NOTE: order of defaults determines the order in which configs override each other (higher up items are overridden by lower items) +defaults: + - callbacks: default + - logger: csv + - trainer: ??? + - paths: default + - datasets: ??? + - dataloader: default + - hydra: default + - model: ??? + # We must keep _self_ before experiment and debug to ensure that the experiment and debug configs can override + - _self_ + + # experiment configs allow for version control of specific hyperparameters + # e.g. best hyperparameters for given model and datamodule + - experiment: ??? + + # debug configs to add onto any experiment for quickly testing or debugging code + - debug: null + + +# DO NOT set these here. Set them in the relevant experiment config file. +# ... these are just here to ensure users always specify these fields in their experiment configs. +name: ??? +tags: ??? + +# NOTE: These values will be overwritten by the experiment config if they are set there. They are just provided as defaults +# here. +# ... task name (determines the output directory path) +task_name: "validate" + +project: ??? # required for W&B logging + +seed: 1 + +# Dump CIF files for validation structures +callbacks: + dump_validation_structures_callback: + dump_predictions: True + one_model_per_file: False + dump_trajectories: False + +# passing checkpoint path required for validation +# DO NOT set here; set in the experiment config file +ckpt_path: ??? \ No newline at end of file diff --git a/environment.yaml b/environment.yaml index cff5135..6a64955 100644 --- a/environment.yaml +++ b/environment.yaml @@ -6,53 +6,80 @@ channels: - conda-forge - defaults dependencies: + # Core dependencies + - pip - python=3.11 - cuda - pytorch=2.4 - pytorch-cuda=12.4 - pytorch-scatter>=2.1.0,<3 - lightning>=2.4.0,<2.5 - - pandas>=1.4.2,<2.3 - - numpy>=1.25.0,<2.1 - - scipy>=1.13.1,<2 - - cytoolz>=0.12.3,<1 - - biopython>=1.83,<2 - - fire>=0.6.0,<1 - - ruff>=0.6.2 - - pytest-dotenv>=0.5.2,<1 - - pytest-cov>=4.1.0,<5 + # Small molecule libraries - rdkit>=2024.3.5 - openbabel=3.1.1 - - pip - pip: - - biotite>=1.1.0,<1.2 - - seaborn>=0.13.0,<1 - - loguru>=0.7.0,<1 - - beartype>=0.18.0,<1 + # Project-related dependencies + # ... generic tools + - GitPython>=3.0.0,<4 # GitPython is a Python library used to interact with Git repositories + - cython>=3.0.0,<4 # Cython compiler for C extensions + - cytoolz>=0.12.3,<1 # Cython-optimized tools for itertools and functional programming + - assertpy>=1.1.0,<2 # Assertions library + - tqdm>=4.65.0,<5 # Fast, extensible progress bar for loops and more + - rootutils>=1.0.7,<1.1 # Setting up the project root paths + - dm-tree>=0.1.6,<1 # Tree data structure from DeepMind + - deepdiff>=8.0.0,<9 # Deep difference and search of any Python object + # ... configuration & CLI + - fire>=0.6.0,<1 # Better argument parsing than argparse + - hydra-core>=1.3.0,<1.4 # Config management framework + - environs>=11.0.0,<12 + # ... linear algebra, maths & ml + - numpy>=1.25.0,<2 + - scipy>=1.13.1,<2 - einops>=0.8.0,<1 - einx>=0.1.0,<1 - - debugpy>=1.8.5,<2 - - cython>=3.0.0,<4 - - pytest>=8.2.0,<9 - - assertpy>=1.1.0,<2 - - pre-commit>=3.7.1 - - tqdm>=4.65.0,<5 - - py3Dmol>=2.2.1,<3 - - pyarrow>=17.0.0 - - fastparquet>=2024.5.0 - - ipykernel>=6.29.4,<7 - - jaxtyping>=0.2.17,<1 - - hydra-core>=1.3.0,<1.4 - - wandb>=0.15.10,<1 - - environs>=11.0.0,<12 - - rootutils>=1.0.7,<1.1 - opt_einsum>=3.4.0,<4 - - rich>=13.9.4,<14 - - msgpack>=1.1.0,<2 - - pymol-remote>=0.1.0 - - deepspeed>=0.15.1 + - deepspeed>=0.15.1 # will be uninstalled by the apptainer's `spec` file, if pre-compiling + # ... data tools + - pandas>=2.2,<2.3 # Data manipulation and analysis + - pyarrow==17.0.0 # Columnar data format for efficient data storage and processing + - fastparquet==2024.5.0 # Fast Parquet file format implementation + - seaborn>=0.13.0,<1 + # ... bioinformatics + - biopython>=1.83,<2 # Collection of Python modules for bioinformatics + - py3Dmol>=2.2.1,<3 # Python wrapper for 3Dmol.js + - pymol-remote>=0.0.5 # Remote access to PyMOL from Python (has no dependencies) - git+https://github.com/biotite-dev/biotite.git@fab175e7ba4608d9613f092ad4e080661c6cc816 - - GitPython>=3.0.0,<4 # Git library for Python + - hydride==1.2.3 #biotite supported hydrogen addition + # ... logging + - wandb>=0.15.10,<1 + - rich>=13.9.4,<14 -# NOTE: After navigating to the datahub / cifutils directories, you can install the local package in editable mode with: + # Formatting & linting (only needed for development) + - ruff==0.8.3 # python linter & formatter + - pre-commit==3.7.1 # pre-commit hooks for formatting & linting + + # Debugger & interactive tools (only needed for development) + - debugpy>=1.8.5,<2 # debugger for python + - ipykernel>=6.29.4,<7 # ipython kernel for jupyter + - icecream>=2.0.0,<3 # print debugging + - pymol-remote>=0.1.0 # Remote access to PyMOL from Python (has no dependencies) + - ipdb>=0.13.9 # IPython debugger + + # Pytest plugins (only needed for development) + - pytest>=8.2.0,<9 # testing framework + - pytest-testmon>=2.1.1,<3 # run only tests related to changed code + - pytest-xdist>=3.6.1,<4 # run tests in parallel + - pytest-dotenv>=0.5.2,<1 # load environment variables from .env file + - pytest-cov>=4.1.0,<5 # generate coverage report + - pytest-benchmark>=5.0.0,<6 # benchmark tests for speed + + # Typing & documentation (only needed for development) + - jaxtyping>=0.2.17,<1 + - beartype>=0.18.0,<1 + +# NOTE: After navigating to the datahub / cifutils / modelhub directories, you can install the local package in editable mode with: # pip install -e . + +# NOTE: By default, DeepSpeed just-in-time compiles, which may take 3-4 minutes when first running the code on a new machine. +# It may be possible to pre-compile DeepSpeed within a `conda` environment; see: https://www.deepspeed.ai/tutorials/advanced-install/ +# By default, the apptainers will have DeepSpeed pre-compiled, so when performance is a concern, it is recommended to use the apptainers. diff --git a/freeze_apptainer.spec b/freeze_apptainer.spec new file mode 100644 index 0000000..8687561 --- /dev/null +++ b/freeze_apptainer.spec @@ -0,0 +1,115 @@ +Bootstrap: localimage +From: ./scripts/shebang/modelhub.sif +IncludeCmd: yes +# NOTE: This apptainer was written using apptainer version `1.1.6+2-g6808b5172-ipd` + +%setup + # NOTE: This is executed on the host, not the container + # Ensure the token environment variables are set + set +x # ... supress bash output to avoid printing the tokens in the output + for var in GITHUB_USER GITHUB_TOKEN; do + if [ -z "$(eval echo \$$var)" ]; then + set -x + echo "ERROR: $var is not set. Please create a personal access token at" + echo " - GitHub: https://github.com/settings/tokens" + echo "Then set the following environment variables:" + echo " - GITHUB_USER" + echo " - GITHUB_TOKEN" + exit 1 + fi + done + set -x + # Create temporary `secrets.txt` file from host's environment variables in the container + # (which are otherwise not available in the %post section) + echo "Creating temporary secrets.txt file with access tokens in the container" + set +x + touch ${APPTAINER_ROOTFS}/secrets.txt + echo "GITHUB_USER=${GITHUB_USER}" >> ${APPTAINER_ROOTFS}/secrets.txt + echo "GITHUB_TOKEN=${GITHUB_TOKEN}" >> ${APPTAINER_ROOTFS}/secrets.txt + set -x + + # Conditionally copy the project files based on the INSTALL_PROJECT environment variable + if [ ${INSTALL_PROJECT} = "true" ]; then + echo "Copying project files into the container..." + mkdir -p ${APPTAINER_ROOTFS}/opt/modelhub + rsync -av ./ ${APPTAINER_ROOTFS}/opt/modelhub/ + else + echo "Skipping copying of project files." + fi + +%post + # get os name + echo "Running on OS name $(lsb_release -i | awk '{ print $3 }')" + # get os version + echo "... in OS version $(lsb_release -r | awk '{ print $2 }')" + + ## SECRETS FILE + # Deal with secrets file + # ... verify that the secrets file is present on the container + if [ ! -e /secrets.txt ]; then + echo "ERROR: secrets.txt is not present on the container" + exit 1 + fi + # ... temporarily set the access token environment variables + # from the secrets file + echo "Exporting access tokens from secrets.txt" + set +x + export GITHUB_USER=$(grep GITHUB_USER /secrets.txt | cut -d '=' -f2) + export GITHUB_TOKEN=$(grep GITHUB_TOKEN /secrets.txt | cut -d '=' -f2) + set -x + # ... remove secrets file + rm secrets.txt + # ... verify that the secrets file is not present on the container + if [ -e /secrets.txt ]; then + echo "ERROR: secrets.txt is still present on the container" + exit 1 + else + echo "Verified that secrets.txt is not present on the container" + fi + # ... verify that the access token environment variables are set + set +x + for var in GITHUB_USER GITHUB_TOKEN; do + if [ -z "$(eval echo \$$var)" ]; then + echo "ERROR: $var is not set" + exit 1 + fi + done + set -x + echo "Verified that access tokens are set" + + # Install additional libraries + + # Cifutils + pip install git+https://${GITHUB_USER}:${GITHUB_TOKEN}@github.com/baker-laboratory/cifutils.git@v2.15.0 + + # Datahub + pip install git+https://${GITHUB_USER}:${GITHUB_TOKEN}@github.com/baker-laboratory/datahub.git@v3.14.1 + + # Modelhub (maybe) + if [ -d "/opt/modelhub" ]; then + echo "Installing the project from /opt/modelhub..." + pip install /opt/modelhub + else + echo "Skipping project installation. /opt/modelhub does not exist." + fi + + ## CLEANUP + # Unset the access token environment variables to avoid possibly + # leaking them in the container + unset GITHUB_USER + unset GITHUB_TOKEN + # ... verify that the access token environment variables are unset + set +x + for var in GITHUB_USER GITHUB_TOKEN; do + if [ -n "$(eval echo \$$var)" ]; then + set -x + echo "ERROR: $var is still set" + exit 1 + fi + done + set -x + echo "Verified that access tokens are unset." + +%runscript + # NOTE: The %runscript is invoked when the container is run without specifying a different command. + exec python "$@" \ No newline at end of file diff --git a/notebooks/plot.ipynb b/notebooks/plot.ipynb new file mode 100644 index 0000000..d7ea3ec --- /dev/null +++ b/notebooks/plot.ipynb @@ -0,0 +1,547 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Plotting AF3 Results with CSV Logger" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook provides examples of how to parse the results of the `CSVLogger` for both training and validation." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Imports for this notebook\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import numpy as np\n", + "from pathlib import Path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Path to the log folder\n", + "LOG_PATH = Path(\"/path/to/logs\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Validation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Workflows to plot and visualize validation metrics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plot Results for Most Recent Epoch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "val_df = pd.read_csv(LOG_PATH / \"val_metrics/validation_output_all_epochs.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_validation_results_by_type(\n", + " df: pd.DataFrame,\n", + " ignore_zeros: bool = False,\n", + ") -> None:\n", + " \"\"\"Visualize metrics across all datasets.\n", + "\n", + " NOTE: Ensure that you first subset the DataFrame to only include the desired epoch.\n", + " \n", + " Args:\n", + " df: Combined DataFrame containing metrics data\n", + " ignore_zeros: Whether to treat zero values as missing data\n", + " \"\"\"\n", + " # (Copy the DataFrame to avoid modifying the original)\n", + " _df = df.copy()\n", + "\n", + " # ... subset to only include the desired columns\n", + " _df = _df[[\"dataset\", \"by_type_lddt.type\", \"by_type_lddt.best_of_1_lddt\", \"by_type_lddt.best_of_5_lddt\"]]\n", + " _df = _df.dropna()\n", + "\n", + " if ignore_zeros:\n", + " _df = _df.replace(0, pd.NA)\n", + "\n", + " # Prepare data\n", + " melted = pd.melt(_df,\n", + " id_vars=[\"dataset\", \"by_type_lddt.type\"],\n", + " value_vars=[\"by_type_lddt.best_of_1_lddt\", \"by_type_lddt.best_of_5_lddt\"],\n", + " var_name='metric',\n", + " value_name='lddt')\n", + " \n", + " # Create visualization\n", + " sns.set(style=\"whitegrid\", font_scale=1.1)\n", + " plt.figure(figsize=(15, 8))\n", + " \n", + " g = sns.catplot(\n", + " data=melted,\n", + " x='by_type_lddt.type',\n", + " y='lddt',\n", + " hue='metric',\n", + " col='dataset',\n", + " kind='bar',\n", + " estimator='mean', # Explicitly set to mean aggregation\n", + " ci=None, # Disable confidence intervals\n", + " height=6,\n", + " aspect=2,\n", + " sharey=False,\n", + " legend_out=False\n", + " )\n", + "\n", + " # Annotate bars with values\n", + " for ax in g.axes.flat:\n", + " for p in ax.patches:\n", + " ax.annotate(f\"{p.get_height():.2f}\",\n", + " (p.get_x() + p.get_width() / 2., p.get_height()),\n", + " ha='center', va='center',\n", + " fontsize=10,\n", + " color='black',\n", + " xytext=(0, 7),\n", + " textcoords='offset points')\n", + " \n", + " ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')\n", + " ax.set_xlabel('')\n", + " ax.set_ylabel('LDDT')\n", + "\n", + " plt.suptitle(f'Model Performance Comparison', y=1.02)\n", + " plt.tight_layout()\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "current_epoch_df = val_df[val_df[\"epoch\"] == val_df[\"epoch\"].max()]\n", + "plot_validation_results_by_type(current_epoch_df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plot Validation Curves" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_metric_trend(\n", + " df: pd.DataFrame,\n", + " metric_type: str,\n", + " dataset: str | None = None,\n", + " ignore_zeros: bool = False,\n", + " last_n_epochs: int | None = None\n", + ") -> None:\n", + " \"\"\"Plot best-of-1 vs best-of-5 trends across epochs for a specific metric type.\n", + " \n", + " Args:\n", + " df: Combined metrics DataFrame\n", + " metric_type: The 'type' to plot (e.g., 'protein-ligand')\n", + " dataset_filter: Optional specific dataset to filter\n", + " ignore_zeros: Whether to exclude zero values\n", + " last_n_epochs: Optional number of most recent epochs to plot\n", + " \"\"\"\n", + " # Filter data\n", + " filtered = df[df['by_type_lddt.type'] == metric_type]\n", + " \n", + " if dataset:\n", + " filtered = filtered[filtered['dataset'] == dataset]\n", + " \n", + " if ignore_zeros:\n", + " filtered = filtered.replace(0, pd.NA).dropna(\n", + " subset=['by_type_lddt.best_of_1_lddt', 'by_type_lddt.best_of_5_lddt']\n", + " )\n", + "\n", + " if filtered.empty:\n", + " raise ValueError(f\"No data found for {metric_type} in dataset {dataset or 'any'}\")\n", + "\n", + " if last_n_epochs:\n", + " max_epoch = filtered['epoch'].max()\n", + " filtered = filtered[filtered['epoch'] > (max_epoch - last_n_epochs)]\n", + "\n", + " # Aggregate by epoch\n", + " trend_data = filtered.groupby('epoch').agg({\n", + " 'by_type_lddt.best_of_1_lddt': 'mean',\n", + " 'by_type_lddt.best_of_5_lddt': 'mean'\n", + " }).reset_index()\n", + "\n", + " # Create plot\n", + " plt.figure(figsize=(12, 6))\n", + " sns.set_style(\"whitegrid\")\n", + "\n", + " # Plot lines with markers\n", + " sns.lineplot(\n", + " data=trend_data,\n", + " x='epoch',\n", + " y='by_type_lddt.best_of_1_lddt',\n", + " color='#1f77b4',\n", + " label='Best of 1',\n", + " marker='o',\n", + " markersize=8,\n", + " linewidth=2\n", + " )\n", + " \n", + " sns.lineplot(\n", + " data=trend_data,\n", + " x='epoch',\n", + " y='by_type_lddt.best_of_5_lddt',\n", + " color='#ff7f0e',\n", + " label='Best of 5',\n", + " marker='s',\n", + " markersize=8,\n", + " linewidth=2\n", + " )\n", + "\n", + " # Style plot\n", + " plt.title(f\"{metric_type} LDDT Trends\\nDataset: {dataset or 'All'}\")\n", + " plt.xlabel(\"Epoch\")\n", + " plt.ylabel(\"Average LDDT\")\n", + " plt.legend(title=\"Strategy\")\n", + " plt.grid(alpha=0.3)\n", + "\n", + " # Add padding to autoscaled y-axis\n", + " plt.ylim(top=min(1.0, plt.ylim()[1] * 1.05)) # Cap at 1.0 if near upper bound\n", + " plt.ylim(bottom=max(0.0, plt.ylim()[0] * 0.95)) # Floor at 0.0 if near lower bound\n", + "\n", + " # Set x-axis to show only whole numbers\n", + " plt.xticks(ticks=trend_data['epoch'], labels=trend_data['epoch'].astype(int))\n", + " \n", + " # Add final value annotations\n", + " last_values = trend_data.iloc[-1]\n", + " plt.text(\n", + " 0.95, 0.15,\n", + " f\"Final Bo1: {last_values['by_type_lddt.best_of_1_lddt']:.2f}\\nFinal Bo5: {last_values['by_type_lddt.best_of_5_lddt']:.2f}\",\n", + " ha='right',\n", + " va='bottom',\n", + " transform=plt.gca().transAxes,\n", + " bbox=dict(facecolor='white', alpha=0.8)\n", + " )\n", + "\n", + " plt.tight_layout()\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_metric_trend(val_df, 'protein-ligand', dataset=\"af3_validation\", ignore_zeros=False, last_n_epochs=4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualize Outliers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Identify and visualize outliers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "structures_path = f\"{LOG_PATH}/val_structures\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_worst_examples(\n", + " df: pd.DataFrame, \n", + " metric_type: str, \n", + " dataset: str = None, \n", + " epoch: int = None\n", + ") -> list:\n", + " \"\"\"Return example IDs sorted by worst performance for a metric type at specific epoch.\"\"\"\n", + " filtered = df[df['by_type_lddt.type'] == metric_type]\n", + " \n", + " if dataset:\n", + " filtered = filtered[filtered['dataset'] == dataset]\n", + " \n", + " # Use latest epoch if none specified\n", + " target_epoch = epoch if epoch is not None else filtered['epoch'].max()\n", + " filtered = filtered[filtered['epoch'] == target_epoch]\n", + " \n", + " return (\n", + " filtered[['example_id', 'by_type_lddt.best_of_1_lddt', 'by_type_lddt.best_of_5_lddt']]\n", + " .assign(worst_score=lambda x: x[['by_type_lddt.best_of_1_lddt', 'by_type_lddt.best_of_5_lddt']].min(axis=1))\n", + " .sort_values('worst_score')\n", + " ['example_id']\n", + " .tolist()\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from datahub.common import parse_example_id\n", + "from cifutils.utils.visualize import view\n", + "from cifutils import parse\n", + "\n", + "dataset = \"af3-validation\"\n", + "metric = \"protein-ligand\"\n", + "latest_epoch = val_df['epoch'].max()\n", + "\n", + "worst_protein_ligand_examples = get_worst_examples(val_df, metric, dataset=\"af3_validation\", epoch=latest_epoch)\n", + "\n", + "# Visualize the worst example\n", + "parsed_id = parse_example_id(worst_protein_ligand_examples[0])\n", + "\n", + "# Find the worst example in the structures directory\n", + "structure_path_for_epoch = Path(structures_path) / f\"epoch_{latest_epoch}\" / dataset\n", + "\n", + "if structure_path_for_epoch.exists():\n", + " example_path = next(structure_path_for_epoch.glob(f\"*{parsed_id['pdb_id']}_{parsed_id['assembly_id']}*\"))\n", + "\n", + " # ... and visualize\n", + " atom_array = parse(example_path)\n", + " view(atom_array[\"assemblies\"][\"1\"][0])\n", + "else:\n", + " print(f\"No structure found for {parsed_id}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "csv_path = f\"{LOG_PATH}/lightning_logs/version_0/metrics.csv\"\n", + "_df = pd.read_csv(csv_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training Curves" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Subset to the relevant columns\n", + "mean_cols = [\"train/batch_mean/diffusion_loss\", \"train/batch_mean/smoothed_lddt_loss\", \"train/batch_mean/total_loss\", \"train/batch_mean/distogram_loss\"]\n", + "per_structure_cols = [\"train/per_structure/t\", \"train/per_structure/diffusion_loss\", \"train/per_structure/smoothed_lddt_loss\"]\n", + "train_df = _df[mean_cols + per_structure_cols + [\"step\", \"train/learning_rate\"]]\n", + "\n", + "# Remove rows with all NaN values except for the 'step' column\n", + "check_cols = [col for col in train_df.columns if col != 'step']\n", + "train_df = train_df.dropna(how='all', subset=check_cols)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_training_metrics(train_df: pd.DataFrame) -> None:\n", + " \"\"\"Plot all training metrics from a DataFrame.\"\"\"\n", + " \n", + " processed = (\n", + " train_df\n", + " .groupby('step', as_index=False)\n", + " .mean()\n", + " .melt(id_vars='step', var_name='metric')\n", + " )\n", + " \n", + " # Create visualization\n", + " plt.figure(figsize=(12, 8))\n", + " sns.set_style(\"whitegrid\")\n", + " \n", + " g = sns.FacetGrid(\n", + " processed,\n", + " col='metric',\n", + " col_wrap=3,\n", + " height=4,\n", + " aspect=1.5,\n", + " sharey=False\n", + " )\n", + " \n", + " g.map(sns.lineplot, 'step', 'value', color='#2ca02c')\n", + " g.set_titles(\"{col_name}\")\n", + " g.set_axis_labels(\"Training Step\", \"Value\")\n", + " \n", + " # Special handling for learning rate\n", + " if 'learning_rate' in processed['metric'].unique():\n", + " g.axes[-1].set_yscale('log')\n", + " \n", + " plt.tight_layout()\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mean_df = train_df[mean_cols + [\"step\"]].copy()\n", + "mean_df = mean_df.dropna(subset=mean_cols)\n", + "plot_training_metrics(train_df[mean_cols + [\"step\", \"train/learning_rate\"]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training Loss by T" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_loss_scatter_by_t(\n", + " train_df: pd.DataFrame,\n", + " loss_column: str,\n", + " t_column: str = 'train/per_structure/t',\n", + " n_steps: int = 1000\n", + ") -> None:\n", + " \"\"\"Plot loss values as a scatter plot against train/t values for the most recent N steps with a log-scaled x-axis.\n", + " \n", + " Args:\n", + " train_df: DataFrame containing training metrics\n", + " loss_column: Name of loss column to plot (e.g., 'train/total_loss')\n", + " t_column: Name of the column representing 't' values\n", + " n_steps: Number of recent training steps to analyze (default: 1000)\n", + " \"\"\"\n", + " train_df = train_df.copy()\n", + "\n", + " assert loss_column in train_df.columns, f\"Loss column '{loss_column}' not found in DataFrame\"\n", + "\n", + " # Get most recent steps\n", + " unique_steps = train_df['step'].dropna().unique()\n", + " \n", + " # Get actual number of available steps\n", + " n_steps = min(n_steps, len(unique_steps))\n", + " latest_steps = np.sort(unique_steps)[-n_steps:]\n", + " \n", + " # Filter recent data\n", + " recent_data = train_df[train_df['step'].isin(latest_steps)]\n", + "\n", + " # Subset to relevant columns and remove rows with NaN values\n", + " recent_data = recent_data[[t_column, loss_column]]\n", + " \n", + " # Create scatter plot\n", + " plt.figure(figsize=(10, 6)) # Fixed figure size\n", + " sns.set_style(\"whitegrid\")\n", + " \n", + " sns.scatterplot(\n", + " data=recent_data,\n", + " x=t_column,\n", + " y=loss_column,\n", + " color='#2ca02c',\n", + " alpha=0.6\n", + " )\n", + " \n", + " plt.xscale('log') # Set x-axis to logarithmic scale\n", + " plt.title(f\"{loss_column} vs {t_column} (Log Scale)\\n(Last {n_steps} Steps)\")\n", + " plt.xlabel(t_column)\n", + " plt.ylabel(loss_column)\n", + " \n", + " plt.tight_layout()\n", + " plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_loss_scatter_by_t(train_df, 'train/per_structure/smoothed_lddt_loss', n_steps=1000)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "modelhub", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pyproject.toml b/pyproject.toml index ba0c9c0..65e6754 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,19 +20,19 @@ build-backend = "hatchling.build" source = "vcs" [tool.hatch.build.hooks.vcs] -version-file = "rf2aa/version.py" +version-file = "src/modelhub/version.py" [tool.hatch.metadata] allow-direct-references = true [tool.hatch.build.targets.wheel] -packages = ["rf2aa"] +packages = ["src/modelhub"] # Formatting & linting settings ------------------------------------------------------- [tool.ruff] line-length = 88 indent-width = 4 -target-version = "py311" +target-version = "py310" exclude = [ ".bzr", ".direnv", diff --git a/rf2aa/SE3Transformer/images/se3-transformer.png b/rf2aa/SE3Transformer/images/se3-transformer.png deleted file mode 100644 index 1173ac9..0000000 Binary files a/rf2aa/SE3Transformer/images/se3-transformer.png and /dev/null differ diff --git a/rf2aa/callbacks.py b/rf2aa/callbacks.py deleted file mode 100644 index 6d385cc..0000000 --- a/rf2aa/callbacks.py +++ /dev/null @@ -1,479 +0,0 @@ -import json -import logging -from collections import defaultdict - -import numpy as np -import pandas as pd -import torch -import torch.nn.functional as F -import tree -from icecream import ic -from lightning import LightningModule, Trainer -from lightning.pytorch.callbacks import Callback -from scipy.stats import norm - -from rf2aa import pymol, pymol_tools -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.debug import pretty_describe_dict -from rf2aa.loss.af3_losses import Loss -from rf2aa.pymol import cmd -from rf2aa.util import writepdb - -logger = logging.getLogger(__name__) - - -def flatten_dictionary(dictionary, parent_key="", separator="."): - flattened_dict = {} - for key, value in dictionary.items(): - new_key = f"{parent_key}{separator}{key}" if parent_key else key - if isinstance(value, dict): - flattened_dict.update(flatten_dictionary(value, new_key, separator)) - else: - flattened_dict[new_key] = value - return flattened_dict - - -class LogMetrics(Callback): - def __init__(self, config): - super().__init__() - self.config = config - - def on_train_batch_end( - self, - trainer: Trainer, - pl_module: LightningModule, - outputs, - batch, - batch_idx: int, - ) -> None: - logger.debug("on_train_batch_end outputs:\n" + pretty_describe_dict(outputs)) - - outputs = tree.map_structure(lambda x: x.detach().cpu(), outputs) - o = {} - stratifications = defaultdict(list) - for metric in [diffusion_losses, lddt_metrics]: - metric_d, stratification_keys = metric(self.config, outputs) - stratifications[stratification_keys].extend(metric_d.keys()) - o.update(metric_d) - - o["t"] = outputs["t"] - o["t_quantile_4"] = get_t_quantiles( - outputs["t"], self.config.loss.sigma_data, 4 - ) - df = pd.DataFrame.from_dict(o) - df = df.reindex(sorted(df.columns), axis=1) - ic(o) - (D,) = outputs["t"].shape - df["batch_idx"] = batch_idx - df["data_idx"] = np.arange(D) - df["global_step"] = trainer.global_step - trainer.logger.log_df(df, stratifications=stratifications) - return super().on_train_batch_end(trainer, pl_module, outputs, batch, batch_idx) - - def on_validation_batch_end( - self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx - ): - outputs = tree.map_structure(lambda x: x.detach().cpu(), outputs) - o = {} - for metric in [lddt_metrics, lddt_metrics_null, diffusion_losses]: - metric_d, stratification_keys = metric(self.config, outputs) - o.update(metric_d) - df = pd.DataFrame.from_dict(o) - df = df.reindex(sorted(df.columns), axis=1) - ic(o) - df["batch_idx"] = batch_idx - df["global_step"] = trainer.global_step - - trainer.logger.log_df(df, stratifications={}) - return super().on_validation_batch_end( - trainer, pl_module, outputs, batch, batch_idx, dataloader_idx - ) - - -def lddt_metrics(config, outputs): - # compute distances between ground truth atoms - ground_truth_distances = torch.cdist(outputs["X_gt_L"], outputs["X_gt_L"]) - # compute distances between predicted atoms - predicted_distances = torch.cdist(outputs["X_L"], outputs["X_L"]) - # compute LDDT score for each pair of distances - difference_distances = torch.abs(ground_truth_distances - predicted_distances) - lddt_matrix = torch.zeros_like(difference_distances) - lddt_matrix = ( - 0.25 * (difference_distances < 4.0) - + 0.25 * (difference_distances < 2.0) - + 0.25 * (difference_distances < 1.0) - + 0.25 * (difference_distances < 0.5) - ) - # remove unresolved atoms, atoms within same residue - is_real_atom = ChemData().heavyatom_mask.to(outputs["seq"].device)[outputs["seq"]] - is_resolved_atom_L = outputs["crd_mask_I"][is_real_atom] - is_unresolved_distance_LL = ( - is_resolved_atom_L[..., None] & is_resolved_atom_L[None, ...] - ) - in_same_residue_LL = ( - outputs["f"]["tok_idx"][:, None] == outputs["f"]["tok_idx"][None, :] - ) - - lddt_values = {} - for mask, mask_type in get_lddt_masks(outputs): - mask = mask & is_unresolved_distance_LL & ~in_same_residue_LL - lddt = torch.div(lddt_matrix[:, mask].sum(dim=(-1)), mask.sum(dim=(-1, -2))) - lddt_values[f"lddt_{mask_type}"] = lddt - return lddt_values, ("t_quantile_4",) - - -def lddt_metrics_null(config, outputs): - # compute distances between ground truth atoms - ground_truth_distances = torch.cdist(outputs["X_gt_L"], outputs["X_gt_L"]) - # compute distances between predicted atoms - t = outputs["t"] - X_noisy_L = outputs["X_noisy_L"] - sigma_data = 16 - - null_pred = (sigma_data**2 / (sigma_data**2 + t**2))[..., None, None] * X_noisy_L - - predicted_distances = torch.cdist(null_pred, null_pred) - # compute LDDT score for each pair of distances - difference_distances = torch.abs(ground_truth_distances - predicted_distances) - lddt_matrix = torch.zeros_like(difference_distances) - lddt_matrix = ( - 0.25 * (difference_distances < 4.0) - + 0.25 * (difference_distances < 2.0) - + 0.25 * (difference_distances < 1.0) - + 0.25 * (difference_distances < 0.5) - ) - # remove unresolved atoms, atoms within same residue - is_real_atom = ChemData().heavyatom_mask[outputs["seq"]] - is_resolved_atom_L = outputs["crd_mask_I"][is_real_atom] - is_unresolved_distance_LL = ( - is_resolved_atom_L[..., None] & is_resolved_atom_L[None, ...] - ) - in_same_residue_LL = ( - outputs["f"]["tok_idx"][:, None] == outputs["f"]["tok_idx"][None, :] - ) - - lddt_values = {} - for mask, mask_type in get_lddt_masks(outputs): - mask = mask & is_unresolved_distance_LL & ~in_same_residue_LL - lddt = torch.div(lddt_matrix[:, mask].sum(dim=(-1)), mask.sum(dim=(-1, -2))) - lddt_values[f"lddt_{mask_type}_null"] = lddt - return lddt_values, ("t_quantile_4",) - - -def get_lddt_masks(outputs): - D, L = outputs["X_L"].shape[:2] - - tok_idx = outputs["f"]["tok_idx"] - is_protein_L = outputs["f"]["is_protein"][tok_idx] - is_dna_L = outputs["f"]["is_dna"][tok_idx] - is_rna_L = outputs["f"]["is_rna"][tok_idx] - is_ligand_L = outputs["f"]["is_ligand"][tok_idx] - asym_id_L = outputs["f"]["asym_id"][tok_idx] - same_chain_LL = asym_id_L[:, None] == asym_id_L[None, :] - for mask_type in [ - "all", - "protein_intra", - "protein_inter", - "ligand_intra", - "ligand_inter", - ]: - if mask_type == "all": - mask = torch.ones((L, L), dtype=torch.bool, device=outputs["X_L"].device) - elif mask_type == "protein_intra": - mask = is_protein_L[:, None] & is_protein_L[None, :] - mask *= same_chain_LL - elif mask_type == "protein_inter": - mask = is_protein_L[:, None] & is_protein_L[None, :] - mask *= ~same_chain_LL - elif mask_type == "ligand_intra": - mask = is_ligand_L[:, None] & is_ligand_L[None, :] - mask *= same_chain_LL - elif mask_type == "ligand_inter": - mask = is_ligand_L[:, None] & is_ligand_L[None, :] - mask *= ~same_chain_LL - elif mask_type == "protein_ligand_inter": - mask = is_protein_L[:, None] & is_ligand_L[None, :] - yield (mask, mask_type) - - -def diffusion_losses(config, outputs): - loss = Loss(**config.loss) - - loss_dict_by_type = {} - t = outputs["t"] - X_noisy_L = outputs["X_noisy_L"] - sigma_data = 16 - - null_pred = (sigma_data**2 / (sigma_data**2 + t**2))[..., None, None] * X_noisy_L - - sigma_gt = torch.var(outputs["X_gt_L"], dim=(1, 2)) ** 0.5 - for input_type, X_L in ( - ("pred", outputs["X_L"]), - # ('input', outputs['X_noisy_L']), - ("true", outputs["X_gt_L"]), - ("null_pred", null_pred), - ): - l_total, _, loss_dict_batched = loss( - outputs["f"], - X_L, - outputs["X_gt_L"], - outputs["t"], - outputs["seq"], - outputs["crd_mask_I"], - ) - # loss_dict_by_type[input_type] = loss_dict_batched - loss_dict_batched_prefixed = { - f"{k}.{input_type}": v for k, v in loss_dict_batched.items() - } - loss_dict_by_type.update(loss_dict_batched_prefixed) - - # Correcting for EDM : AF3 lambda conversion - edm_corr = (t + loss.sigma_data) ** 2 / (t * loss.sigma_data) ** 2 - loss_dict_batched_edm = {k: v * edm_corr for k, v in loss_dict_batched.items()} - loss_dict_batched_prefixed_edm = { - f"{k}_edm.{input_type}": v for k, v in loss_dict_batched_edm.items() - } - loss_dict_by_type.update(loss_dict_batched_prefixed_edm) - - # Correcting for Var(gt) != sigma_data - expected_loss_gt = ( - 1 - / (loss.sigma_data**2 + t**2) - * (loss.sigma_data**2 + t**2 * sigma_gt**2 / loss.sigma_data**2) - ) - loss_dict_batched_edm_gt_corr = { - k: edm_corr * v / expected_loss_gt for k, v in loss_dict_batched.items() - } - loss_dict_batched_prefixed_edm = { - f"{k}_edm_gt_corr.{input_type}": v - for k, v in loss_dict_batched_edm_gt_corr.items() - } - loss_dict_by_type.update(loss_dict_batched_prefixed_edm) - - o = flatten_dictionary(loss_dict_by_type) - o["pred_over_null_pred"] = o["diffusion_loss.pred"] / o["diffusion_loss.null_pred"] - o["pred_over_null_pred_norm"] = ( - o["diffusion_loss_edm_gt_corr.pred"] / o["diffusion_loss_edm_gt_corr.null_pred"] - ) - return o, ("t_quantile_4",) - - -def get_normal_quantiles(n): - # Generate n evenly spaced probabilities between 0 and 1 - probabilities = np.linspace(0, 1, n) - # Use the percent point function (inverse CDF) of the standard normal distribution - return norm.ppf(probabilities) - - -def get_t_quantiles(t, sigma_data, n): - bins = sigma_data * np.exp(-1.2 + 1.5 * get_normal_quantiles(n + 1)) - t_binned_list = [] - for t in t: - t_bin = np.digitize(t, bins) - 1 - bin_start = bins[t_bin] - bin_end = bins[t_bin + 1] - t_binned = f"t=[{bin_start:.2f},{bin_end:.2f})" - t_binned_list.append(t_binned) - return t_binned_list - - -class NetworkOutputGradSanityCheck(Callback): - def __init__(self, call_n_times=0, *args, **kwargs): - super().__init__(*args, **kwargs) - self.call_n_times = call_n_times - self.call_count = 0 - - def on_after_backward(self, trainer, pl_module): - if self.call_count < self.call_n_times: - self.call_count += 1 - r_projection_weight = pl_module.model.model.diffusion_module.atom_attention_decoder.to_r_update[ - 1 - ].weight - ic( - torch.linalg.norm(r_projection_weight) - if r_projection_weight is not None - else None, - torch.linalg.norm(r_projection_weight.grad) - if r_projection_weight.grad is not None - else None, - ) - - -class MonitorActivations(Callback): - def make_hook(self, label): - def hook(module, args, kwargs, output): - activation_metrics = { - f"{label}:inter_batch_cosine_similarity": F.cosine_similarity( - torch.flatten(output[0]), - torch.flatten(output[1]), - dim=0, - ), - f"{label}:intra_batch_cosine_similarity_to_elem_0": F.cosine_similarity( - output[0][0:1], - output[0], - ).mean(), - } - self.log_dict(activation_metrics) - - return hook - - def setup(self, trainer, pl_module, stage): - self.pl_module = pl_module - self.trainer = trainer - - pl_module.model.model.diffusion_module.atom_attention_decoder.register_forward_hook( - self.make_hook( - "diffusion_module.atom_attention_decoder", - ), - with_kwargs=True, - ) - - -class FindUnusedParameters(Callback): - def __init__(self, only_once=True, *args, **kwargs): - super().__init__(*args, **kwargs) - self.only_once = only_once - self.called = False - - def on_after_backward(self, trainer, pl_module): - if self.called and self.only_once: - return - self.called = True - # Calculate unused parameters after each batch - unused_params = [ - name for name, param in pl_module.named_parameters() if param.grad is None - ] - - # Log unused parameters - logging.info( - f"global_step={pl_module.global_step}: parameters with no gradient: {json.dumps(unused_params, indent=4)}" - ) - if unused_params: - raise Exception("storp") - - -class WriteToPymol(Callback): - def __init__(self, only_once=True, *args, **kwargs): - super().__init__(*args, **kwargs) - self.only_once = only_once - self.called = False - pymol.init("http://chesaw.dhcp.ipd:9123") - - def on_train_batch_end( - self, - trainer: Trainer, - pl_module: LightningModule, - outputs, - batch, - batch_idx: int, - ) -> None: - if self.called and self.only_once: - return - self.called = True - - pymol_tools.clear() - predicted = outputs - - logger.info("predicted:\n" + pretty_describe_dict(predicted)) - ic(predicted["loss"]) - - D = predicted["X_L"].shape[0] - - max_to_show = 16 - grid_slot = 1 - cmd.set("grid_mode", 1) - for i in range(min(D, max_to_show)): - X_gt_L = predicted["X_gt_L"][i] - X_L = predicted["X_L"][i] - X_noisy_L = predicted["X_noisy_L"][i] - t = predicted["t"][i] - - label = pymol_tools.show_pymol( - pymol_tools.to_atom37(X_noisy_L, predicted["crd_mask_I"]), - predicted["seq"], - predicted["bond_feats"], - label=f"input_{i}_t_{t.item():.2f}", - ) - cmd.set("grid_slot", grid_slot, label) - cmd.color("yellow", label) - - label = pymol_tools.show_pymol( - pymol_tools.to_atom37(X_L, predicted["crd_mask_I"]), - predicted["seq"], - predicted["bond_feats"], - label=f"pred_{i}_t_{t.item():.2f}", - ) - cmd.set("grid_slot", grid_slot, label) - cmd.color("green", label) - - label = pymol_tools.show_pymol( - pymol_tools.to_atom37(X_gt_L, predicted["crd_mask_I"]), - predicted["seq"], - predicted["bond_feats"], - label=f"gt_{i}", - ) - cmd.set("grid_slot", grid_slot, label) - cmd.color("blue", label) - grid_slot += 1 - - cmd.show_as("licorice", "all") - cmd.alter("name CA", "vdw=2.0") - # cmd.set('sphere_transparency', 0.0) - cmd.show("spheres", "name CA") - - return super().on_train_batch_end(trainer, pl_module, outputs, batch, batch_idx) - - -class WritePDB(Callback): - def on_validation_batch_end( - self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx - ): - seq = batch["seq"][0][0] - is_real_atom = ChemData().heavyatom_mask.to(seq.device)[seq] - X_L = outputs["X_L"] - X_gt_L = outputs["X_gt_L"] - atom_mask = outputs["crd_mask_I"] - bond_feats = batch["bond_feats"] - - X_I = torch.full( - (X_L.shape[0], atom_mask.shape[0], ChemData().NTOTAL, 3), np.nan - ).to(X_L.device) - X_I[..., is_real_atom, :] = X_L - - X_gt_I = torch.full( - (X_gt_L.shape[0], atom_mask.shape[0], ChemData().NTOTAL, 3), np.nan - ).to(X_gt_L.device) - X_gt_I[..., atom_mask, :] = X_gt_L - pdb_path = f"tmp/true_{batch_idx}.pdb" - writepdb( - pdb_path, - X_gt_I[0], - seq.long(), - bond_feats=bond_feats, - ) - for i in range(X_L.shape[0]): - pdb_path = f"tmp/pred_{batch_idx}_{i}.pdb" - writepdb( - pdb_path, - X_I[i], - seq.long(), - bond_feats=bond_feats, - ) - - return super().on_validation_batch_end( - trainer, pl_module, outputs, batch, batch_idx, dataloader_idx - ) - - -class DebugGrads(Callback): - def on_after_backward(self, trainer, pl_module): - grad_dict = {} - for name, param in pl_module.named_parameters(): - if param.grad is not None and "pairformer" in name: - grad_dict[name] = param.grad.clone().detach() - ic( - name, - torch.linalg.norm(param.grad), - torch.linalg.norm(param), - ) - torch.save(grad_dict, "grad_dict_unbatched.pt") diff --git a/rf2aa/cartbonded.json b/rf2aa/cartbonded.json deleted file mode 100644 index f755e12..0000000 --- a/rf2aa/cartbonded.json +++ /dev/null @@ -1,9052 +0,0 @@ -{ - "lengths": [ - { - "res": "CYS", - "atm1": " SG ", - "atm2": " SG ", - "x0": 0, - "K": 0 - }, - { - "res": "ALA", - "atm1": " C ", - "atm2": " O ", - "x0": 1.23101, - "K": 563.084 - }, - { - "res": "ALA", - "atm1": " CA ", - "atm2": " C ", - "x0": 1.52326, - "K": 301.675 - }, - { - "res": "ALA", - "atm1": " CA ", - "atm2": " CB ", - "x0": 1.52174, - "K": 260.414 - }, - { - "res": "ALA", - "atm1": " CA ", - "atm2": " HA ", - "x0": 1.09008, - "K": 306.735 - }, - { - "res": "ALA", - "atm1": " CB ", - "atm2": "1HB ", - "x0": 1.09004, - "K": 122.8108 - }, - { - "res": "ALA", - "atm1": " CB ", - "atm2": "2HB ", - "x0": 1.09007, - "K": 122.8108 - }, - { - "res": "ALA", - "atm1": " CB ", - "atm2": "3HB ", - "x0": 1.0888, - "K": 122.8108 - }, - { - "res": "ALA", - "atm1": " N ", - "atm2": " CA ", - "x0": 1.458, - "K": 361.504 - }, - { - "res": "ALA", - "atm1": " N ", - "atm2": " H ", - "x0": 1.01, - "K": 458.304 - }, - { - "res": "ARG", - "atm1": " C ", - "atm2": " O ", - "x0": 1.23102, - "K": 563.084 - }, - { - "res": "ARG", - "atm1": " CA ", - "atm2": " C ", - "x0": 1.52326, - "K": 301.675 - }, - { - "res": "ARG", - "atm1": " CA ", - "atm2": " CB ", - "x0": 1.52157, - "K": 260.414 - }, - { - "res": "ARG", - "atm1": " CA ", - "atm2": " HA ", - "x0": 1.09042, - "K": 306.735 - }, - { - "res": "ARG", - "atm1": " CB ", - "atm2": "1HB ", - "x0": 1.08903, - "K": 117.8526 - }, - { - "res": "ARG", - "atm1": " CB ", - "atm2": "2HB ", - "x0": 1.09007, - "K": 117.8526 - }, - { - "res": "ARG", - "atm1": " CB ", - "atm2": " CG ", - "x0": 1.52044, - "K": 84.8615 - }, - { - "res": "ARG", - "atm1": " CD ", - "atm2": "1HD ", - "x0": 1.08956, - "K": 117.8526 - }, - { - "res": "ARG", - "atm1": " CD ", - "atm2": "2HD ", - "x0": 1.08936, - "K": 117.8526 - }, - { - "res": "ARG", - "atm1": " CD ", - "atm2": " NE ", - "x0": 1.45407, - "K": 99.5454 - }, - { - "res": "ARG", - "atm1": " CG ", - "atm2": "1HG ", - "x0": 1.09024, - "K": 117.8526 - }, - { - "res": "ARG", - "atm1": " CG ", - "atm2": "2HG ", - "x0": 1.09062, - "K": 117.8526 - }, - { - "res": "ARG", - "atm1": " CG ", - "atm2": " CD ", - "x0": 1.48537, - "K": 84.8615 - }, - { - "res": "ARG", - "atm1": " CZ ", - "atm2": " NH1", - "x0": 1.31462, - "K": 176.5882 - }, - { - "res": "ARG", - "atm1": " CZ ", - "atm2": " NH2", - "x0": 1.32163, - "K": 176.5882 - }, - { - "res": "ARG", - "atm1": " N ", - "atm2": " CA ", - "x0": 1.458, - "K": 361.504 - }, - { - "res": "ARG", - "atm1": " N ", - "atm2": " H ", - "x0": 1.01, - "K": 458.304 - }, - { - "res": "ARG", - "atm1": " NE ", - "atm2": " CZ ", - "x0": 1.34729, - "K": 176.5882 - }, - { - "res": "ARG", - "atm1": " NE ", - "atm2": " HE ", - "x0": 1.01136, - "K": 173.537 - }, - { - "res": "ARG", - "atm1": " NH1", - "atm2": "1HH1", - "x0": 1.01026, - "K": 173.537 - }, - { - "res": "ARG", - "atm1": " NH1", - "atm2": "2HH1", - "x0": 1.00968, - "K": 173.537 - }, - { - "res": "ARG", - "atm1": " NH2", - "atm2": "1HH2", - "x0": 1.01091, - "K": 173.537 - }, - { - "res": "ARG", - "atm1": " NH2", - "atm2": "2HH2", - "x0": 1.00912, - "K": 173.537 - }, - { - "res": "ASN", - "atm1": " C ", - "atm2": " O ", - "x0": 1.23102, - "K": 563.084 - }, - { - "res": "ASN", - "atm1": " CA ", - "atm2": " C ", - "x0": 1.52326, - "K": 301.675 - }, - { - "res": "ASN", - "atm1": " CA ", - "atm2": " CB ", - "x0": 1.51768, - "K": 260.414 - }, - { - "res": "ASN", - "atm1": " CA ", - "atm2": " HA ", - "x0": 1.09116, - "K": 306.735 - }, - { - "res": "ASN", - "atm1": " CB ", - "atm2": "1HB ", - "x0": 1.0908, - "K": 117.8526 - }, - { - "res": "ASN", - "atm1": " CB ", - "atm2": "2HB ", - "x0": 1.09009, - "K": 117.8526 - }, - { - "res": "ASN", - "atm1": " CB ", - "atm2": " CG ", - "x0": 1.50351, - "K": 76.28 - }, - { - "res": "ASN", - "atm1": " CG ", - "atm2": " ND2", - "x0": 1.30864, - "K": 164.002 - }, - { - "res": "ASN", - "atm1": " CG ", - "atm2": " OD1", - "x0": 1.2364, - "K": 247.91 - }, - { - "res": "ASN", - "atm1": " N ", - "atm2": " CA ", - "x0": 1.458, - "K": 361.504 - }, - { - "res": "ASN", - "atm1": " N ", - "atm2": " H ", - "x0": 1.01, - "K": 458.304 - }, - { - "res": "ASN", - "atm1": " ND2", - "atm2": "1HD2", - "x0": 1.00047, - "K": 183.072 - }, - { - "res": "ASN", - "atm1": " ND2", - "atm2": "2HD2", - "x0": 0.999495, - "K": 183.072 - }, - { - "res": "ASP", - "atm1": " C ", - "atm2": " O ", - "x0": 1.23102, - "K": 563.084 - }, - { - "res": "ASP", - "atm1": " CA ", - "atm2": " C ", - "x0": 1.52326, - "K": 301.675 - }, - { - "res": "ASP", - "atm1": " CA ", - "atm2": " CB ", - "x0": 1.53065, - "K": 260.414 - }, - { - "res": "ASP", - "atm1": " CA ", - "atm2": " HA ", - "x0": 1.09038, - "K": 306.735 - }, - { - "res": "ASP", - "atm1": " CB ", - "atm2": "1HB ", - "x0": 1.09024, - "K": 117.8526 - }, - { - "res": "ASP", - "atm1": " CB ", - "atm2": "2HB ", - "x0": 1.09039, - "K": 117.8526 - }, - { - "res": "ASP", - "atm1": " CB ", - "atm2": " CG ", - "x0": 1.52279, - "K": 76.28 - }, - { - "res": "ASP", - "atm1": " CG ", - "atm2": " OD1", - "x0": 1.20825, - "K": 200.235 - }, - { - "res": "ASP", - "atm1": " CG ", - "atm2": " OD2", - "x0": 1.20776, - "K": 200.235 - }, - { - "res": "ASP", - "atm1": " N ", - "atm2": " CA ", - "x0": 1.458, - "K": 361.504 - }, - { - "res": "ASP", - "atm1": " N ", - "atm2": " H ", - "x0": 1.01, - "K": 458.304 - }, - { - "res": "CYS", - "atm1": " C ", - "atm2": " O ", - "x0": 1.23102, - "K": 563.084 - }, - { - "res": "CYS", - "atm1": " CA ", - "atm2": " C ", - "x0": 1.52326, - "K": 301.675 - }, - { - "res": "CYS", - "atm1": " CA ", - "atm2": " CB ", - "x0": 1.52886, - "K": 260.414 - }, - { - "res": "CYS", - "atm1": " CA ", - "atm2": " HA ", - "x0": 1.09006, - "K": 306.735 - }, - { - "res": "CYS", - "atm1": " CB ", - "atm2": "1HB ", - "x0": 1.09025, - "K": 117.8526 - }, - { - "res": "CYS", - "atm1": " CB ", - "atm2": "2HB ", - "x0": 1.08982, - "K": 117.8526 - }, - { - "res": "CYS", - "atm1": " CB ", - "atm2": " SG ", - "x0": 1.8088, - "K": 75.5172 - }, - { - "res": "CYS", - "atm1": " N ", - "atm2": " CA ", - "x0": 1.458, - "K": 361.504 - }, - { - "res": "CYS", - "atm1": " N ", - "atm2": " H ", - "x0": 1.01, - "K": 458.304 - }, - { - "res": "CYS", - "atm1": " SG ", - "atm2": " HG ", - "x0": 1.32937, - "K": 104.885 - }, - { - "res": "GLN", - "atm1": " C ", - "atm2": " O ", - "x0": 1.23102, - "K": 563.084 - }, - { - "res": "GLN", - "atm1": " CA ", - "atm2": " C ", - "x0": 1.52326, - "K": 301.675 - }, - { - "res": "GLN", - "atm1": " CA ", - "atm2": " CB ", - "x0": 1.53107, - "K": 260.414 - }, - { - "res": "GLN", - "atm1": " CA ", - "atm2": " HA ", - "x0": 1.08987, - "K": 306.735 - }, - { - "res": "GLN", - "atm1": " CB ", - "atm2": "1HB ", - "x0": 1.09, - "K": 117.8526 - }, - { - "res": "GLN", - "atm1": " CB ", - "atm2": "2HB ", - "x0": 1.09, - "K": 117.8526 - }, - { - "res": "GLN", - "atm1": " CB ", - "atm2": " CG ", - "x0": 1.51911, - "K": 84.8615 - }, - { - "res": "GLN", - "atm1": " CD ", - "atm2": " NE2", - "x0": 1.32811, - "K": 164.002 - }, - { - "res": "GLN", - "atm1": " CD ", - "atm2": " OE1", - "x0": 1.23416, - "K": 247.91 - }, - { - "res": "GLN", - "atm1": " CG ", - "atm2": "1HG ", - "x0": 1.08971, - "K": 117.8526 - }, - { - "res": "GLN", - "atm1": " CG ", - "atm2": "2HG ", - "x0": 1.09, - "K": 117.8526 - }, - { - "res": "GLN", - "atm1": " CG ", - "atm2": " CD ", - "x0": 1.51688, - "K": 76.28 - }, - { - "res": "GLN", - "atm1": " N ", - "atm2": " CA ", - "x0": 1.458, - "K": 361.504 - }, - { - "res": "GLN", - "atm1": " N ", - "atm2": " H ", - "x0": 1.01, - "K": 458.304 - }, - { - "res": "GLN", - "atm1": " NE2", - "atm2": "1HE2", - "x0": 1.00096, - "K": 183.072 - }, - { - "res": "GLN", - "atm1": " NE2", - "atm2": "2HE2", - "x0": 1.00008, - "K": 183.072 - }, - { - "res": "GLU", - "atm1": " C ", - "atm2": " O ", - "x0": 1.23102, - "K": 563.084 - }, - { - "res": "GLU", - "atm1": " CA ", - "atm2": " C ", - "x0": 1.52326, - "K": 301.675 - }, - { - "res": "GLU", - "atm1": " CA ", - "atm2": " CB ", - "x0": 1.53032, - "K": 260.414 - }, - { - "res": "GLU", - "atm1": " CA ", - "atm2": " HA ", - "x0": 1.09051, - "K": 306.735 - }, - { - "res": "GLU", - "atm1": " CB ", - "atm2": "1HB ", - "x0": 1.09015, - "K": 117.8526 - }, - { - "res": "GLU", - "atm1": " CB ", - "atm2": "2HB ", - "x0": 1.09006, - "K": 117.8526 - }, - { - "res": "GLU", - "atm1": " CB ", - "atm2": " CG ", - "x0": 1.52211, - "K": 84.8615 - }, - { - "res": "GLU", - "atm1": " CD ", - "atm2": " OE1", - "x0": 1.20758, - "K": 200.235 - }, - { - "res": "GLU", - "atm1": " CD ", - "atm2": " OE2", - "x0": 1.20854, - "K": 200.235 - }, - { - "res": "GLU", - "atm1": " CG ", - "atm2": "1HG ", - "x0": 1.08969, - "K": 117.8526 - }, - { - "res": "GLU", - "atm1": " CG ", - "atm2": "2HG ", - "x0": 1.08911, - "K": 117.8526 - }, - { - "res": "GLU", - "atm1": " CG ", - "atm2": " CD ", - "x0": 1.50336, - "K": 76.28 - }, - { - "res": "GLU", - "atm1": " N ", - "atm2": " CA ", - "x0": 1.458, - "K": 361.504 - }, - { - "res": "GLU", - "atm1": " N ", - "atm2": " H ", - "x0": 1.01, - "K": 458.304 - }, - { - "res": "GLY", - "atm1": " C ", - "atm2": " O ", - "x0": 1.23102, - "K": 563.084 - }, - { - "res": "GLY", - "atm1": " CA ", - "atm2": "1HA ", - "x0": 1.09017, - "K": 330 - }, - { - "res": "GLY", - "atm1": " CA ", - "atm2": "2HA ", - "x0": 1.08935, - "K": 330 - }, - { - "res": "GLY", - "atm1": " CA ", - "atm2": " C ", - "x0": 1.52326, - "K": 301.675 - }, - { - "res": "GLY", - "atm1": " N ", - "atm2": " CA ", - "x0": 1.458, - "K": 361.504 - }, - { - "res": "GLY", - "atm1": " N ", - "atm2": " H ", - "x0": 1.01, - "K": 458.304 - }, - { - "res": "HIS", - "atm1": " C ", - "atm2": " O ", - "x0": 1.23102, - "K": 563.084 - }, - { - "res": "HIS", - "atm1": " CA ", - "atm2": " C ", - "x0": 1.52326, - "K": 301.675 - }, - { - "res": "HIS", - "atm1": " CA ", - "atm2": " CB ", - "x0": 1.53212, - "K": 260.414 - }, - { - "res": "HIS", - "atm1": " CA ", - "atm2": " HA ", - "x0": 1.08962, - "K": 306.735 - }, - { - "res": "HIS", - "atm1": " CB ", - "atm2": "1HB ", - "x0": 1.09058, - "K": 117.8526 - }, - { - "res": "HIS", - "atm1": " CB ", - "atm2": "2HB ", - "x0": 1.08966, - "K": 117.8526 - }, - { - "res": "HIS", - "atm1": " CB ", - "atm2": " CG ", - "x0": 1.49716, - "K": 87.56944 - }, - { - "res": "HIS", - "atm1": " CD2", - "atm2": "2HD ", - "x0": 1.09034, - "K": 139.211 - }, - { - "res": "HIS", - "atm1": " CD2", - "atm2": " NE2", - "x0": 1.37321, - "K": 152.56 - }, - { - "res": "HIS", - "atm1": " CE1", - "atm2": "1HE ", - "x0": 1.08979, - "K": 129.676 - }, - { - "res": "HIS", - "atm1": " CE1", - "atm2": " NE2", - "x0": 1.32038, - "K": 152.56 - }, - { - "res": "HIS", - "atm1": " CG ", - "atm2": " CD2", - "x0": 1.35365, - "K": 156.374 - }, - { - "res": "HIS", - "atm1": " CG ", - "atm2": " ND1", - "x0": 1.37916, - "K": 152.56 - }, - { - "res": "HIS", - "atm1": " N ", - "atm2": " CA ", - "x0": 1.458, - "K": 361.504 - }, - { - "res": "HIS", - "atm1": " N ", - "atm2": " H ", - "x0": 1.01, - "K": 458.304 - }, - { - "res": "HIS", - "atm1": " ND1", - "atm2": " CE1", - "x0": 1.32193, - "K": 152.56 - }, - { - "res": "HIS", - "atm1": " NE2", - "atm2": "2HE ", - "x0": 1.01008, - "K": 177.7324 - }, - { - "res": "HIS_D", - "atm1": " C ", - "atm2": " O ", - "x0": 1.23102, - "K": 563.084 - }, - { - "res": "HIS_D", - "atm1": " CA ", - "atm2": " C ", - "x0": 1.52326, - "K": 301.675 - }, - { - "res": "HIS_D", - "atm1": " CA ", - "atm2": " CB ", - "x0": 1.53212, - "K": 260.414 - }, - { - "res": "HIS_D", - "atm1": " CA ", - "atm2": " HA ", - "x0": 1.08962, - "K": 306.735 - }, - { - "res": "HIS_D", - "atm1": " CB ", - "atm2": "1HB ", - "x0": 1.09058, - "K": 117.8526 - }, - { - "res": "HIS_D", - "atm1": " CB ", - "atm2": "2HB ", - "x0": 1.08966, - "K": 117.8526 - }, - { - "res": "HIS_D", - "atm1": " CB ", - "atm2": " CG ", - "x0": 1.49716, - "K": 87.56944 - }, - { - "res": "HIS_D", - "atm1": " CD2", - "atm2": "2HD ", - "x0": 1.09034, - "K": 139.211 - }, - { - "res": "HIS_D", - "atm1": " CD2", - "atm2": " NE2", - "x0": 1.37321, - "K": 152.56 - }, - { - "res": "HIS_D", - "atm1": " CE1", - "atm2": "1HE ", - "x0": 1.08979, - "K": 129.676 - }, - { - "res": "HIS_D", - "atm1": " CE1", - "atm2": " NE2", - "x0": 1.32038, - "K": 152.56 - }, - { - "res": "HIS_D", - "atm1": " CG ", - "atm2": " CD2", - "x0": 1.35365, - "K": 156.374 - }, - { - "res": "HIS_D", - "atm1": " CG ", - "atm2": " ND1", - "x0": 1.37916, - "K": 152.56 - }, - { - "res": "HIS_D", - "atm1": " N ", - "atm2": " CA ", - "x0": 1.458, - "K": 361.504 - }, - { - "res": "HIS_D", - "atm1": " N ", - "atm2": " H ", - "x0": 1.01, - "K": 458.304 - }, - { - "res": "HIS_D", - "atm1": " ND1", - "atm2": " CE1", - "x0": 1.32193, - "K": 152.56 - }, - { - "res": "HIS_D", - "atm1": " ND1", - "atm2": "1HD ", - "x0": 1.00024, - "K": 177.7324 - }, - { - "res": "ILE", - "atm1": " C ", - "atm2": " O ", - "x0": 1.23102, - "K": 563.084 - }, - { - "res": "ILE", - "atm1": " CA ", - "atm2": " C ", - "x0": 1.52326, - "K": 301.675 - }, - { - "res": "ILE", - "atm1": " CA ", - "atm2": " CB ", - "x0": 1.53963, - "K": 260.414 - }, - { - "res": "ILE", - "atm1": " CA ", - "atm2": " HA ", - "x0": 1.08933, - "K": 306.735 - }, - { - "res": "ILE", - "atm1": " CB ", - "atm2": " CG1", - "x0": 1.5309, - "K": 84.8615 - }, - { - "res": "ILE", - "atm1": " CB ", - "atm2": " CG2", - "x0": 1.52091, - "K": 84.8615 - }, - { - "res": "ILE", - "atm1": " CB ", - "atm2": " HB ", - "x0": 1.08955, - "K": 117.8526 - }, - { - "res": "ILE", - "atm1": " CD1", - "atm2": "1HD1", - "x0": 1.09029, - "K": 122.8108 - }, - { - "res": "ILE", - "atm1": " CD1", - "atm2": "2HD1", - "x0": 1.09058, - "K": 122.8108 - }, - { - "res": "ILE", - "atm1": " CD1", - "atm2": "3HD1", - "x0": 1.08906, - "K": 122.8108 - }, - { - "res": "ILE", - "atm1": " CG1", - "atm2": "1HG1", - "x0": 1.08947, - "K": 117.8526 - }, - { - "res": "ILE", - "atm1": " CG1", - "atm2": "2HG1", - "x0": 1.09034, - "K": 117.8526 - }, - { - "res": "ILE", - "atm1": " CG1", - "atm2": " CD1", - "x0": 1.51168, - "K": 84.8615 - }, - { - "res": "ILE", - "atm1": " CG2", - "atm2": "1HG2", - "x0": 1.08976, - "K": 122.8108 - }, - { - "res": "ILE", - "atm1": " CG2", - "atm2": "2HG2", - "x0": 1.08915, - "K": 122.8108 - }, - { - "res": "ILE", - "atm1": " CG2", - "atm2": "3HG2", - "x0": 1.09015, - "K": 122.8108 - }, - { - "res": "ILE", - "atm1": " N ", - "atm2": " CA ", - "x0": 1.458, - "K": 361.504 - }, - { - "res": "ILE", - "atm1": " N ", - "atm2": " H ", - "x0": 1.01, - "K": 458.304 - }, - { - "res": "LEU", - "atm1": " C ", - "atm2": " O ", - "x0": 1.23102, - "K": 563.084 - }, - { - "res": "LEU", - "atm1": " CA ", - "atm2": " C ", - "x0": 1.52326, - "K": 301.675 - }, - { - "res": "LEU", - "atm1": " CA ", - "atm2": " CB ", - "x0": 1.53385, - "K": 260.414 - }, - { - "res": "LEU", - "atm1": " CA ", - "atm2": " HA ", - "x0": 1.08944, - "K": 306.735 - }, - { - "res": "LEU", - "atm1": " CB ", - "atm2": "1HB ", - "x0": 1.0888, - "K": 117.8526 - }, - { - "res": "LEU", - "atm1": " CB ", - "atm2": "2HB ", - "x0": 1.08992, - "K": 117.8526 - }, - { - "res": "LEU", - "atm1": " CB ", - "atm2": " CG ", - "x0": 1.53403, - "K": 84.8615 - }, - { - "res": "LEU", - "atm1": " CD1", - "atm2": "1HD1", - "x0": 1.0901, - "K": 122.8108 - }, - { - "res": "LEU", - "atm1": " CD1", - "atm2": "2HD1", - "x0": 1.09008, - "K": 122.8108 - }, - { - "res": "LEU", - "atm1": " CD1", - "atm2": "3HD1", - "x0": 1.08937, - "K": 122.8108 - }, - { - "res": "LEU", - "atm1": " CD2", - "atm2": "1HD2", - "x0": 1.09047, - "K": 122.8108 - }, - { - "res": "LEU", - "atm1": " CD2", - "atm2": "2HD2", - "x0": 1.09021, - "K": 122.8108 - }, - { - "res": "LEU", - "atm1": " CD2", - "atm2": "3HD2", - "x0": 1.0901, - "K": 122.8108 - }, - { - "res": "LEU", - "atm1": " CG ", - "atm2": " CD1", - "x0": 1.52267, - "K": 84.8615 - }, - { - "res": "LEU", - "atm1": " CG ", - "atm2": " CD2", - "x0": 1.52143, - "K": 84.8615 - }, - { - "res": "LEU", - "atm1": " CG ", - "atm2": " HG ", - "x0": 1.09033, - "K": 117.8526 - }, - { - "res": "LEU", - "atm1": " N ", - "atm2": " CA ", - "x0": 1.458, - "K": 361.504 - }, - { - "res": "LEU", - "atm1": " N ", - "atm2": " H ", - "x0": 1.01, - "K": 458.304 - }, - { - "res": "LYS", - "atm1": " C ", - "atm2": " O ", - "x0": 1.23101, - "K": 563.084 - }, - { - "res": "LYS", - "atm1": " CA ", - "atm2": " C ", - "x0": 1.52326, - "K": 301.675 - }, - { - "res": "LYS", - "atm1": " CA ", - "atm2": " CB ", - "x0": 1.52951, - "K": 260.414 - }, - { - "res": "LYS", - "atm1": " CA ", - "atm2": " HA ", - "x0": 1.09053, - "K": 306.735 - }, - { - "res": "LYS", - "atm1": " CB ", - "atm2": "1HB ", - "x0": 1.09033, - "K": 117.8526 - }, - { - "res": "LYS", - "atm1": " CB ", - "atm2": "2HB ", - "x0": 1.09021, - "K": 117.8526 - }, - { - "res": "LYS", - "atm1": " CB ", - "atm2": " CG ", - "x0": 1.52293, - "K": 84.8615 - }, - { - "res": "LYS", - "atm1": " CD ", - "atm2": "1HD ", - "x0": 1.09086, - "K": 117.8526 - }, - { - "res": "LYS", - "atm1": " CD ", - "atm2": "2HD ", - "x0": 1.08996, - "K": 117.8526 - }, - { - "res": "LYS", - "atm1": " CD ", - "atm2": " CE ", - "x0": 1.52158, - "K": 84.8615 - }, - { - "res": "LYS", - "atm1": " CE ", - "atm2": "1HE ", - "x0": 1.08899, - "K": 117.8526 - }, - { - "res": "LYS", - "atm1": " CE ", - "atm2": "2HE ", - "x0": 1.08993, - "K": 117.8526 - }, - { - "res": "LYS", - "atm1": " CE ", - "atm2": " NZ ", - "x0": 1.48811, - "K": 76.28 - }, - { - "res": "LYS", - "atm1": " CG ", - "atm2": "1HG ", - "x0": 1.09065, - "K": 117.8526 - }, - { - "res": "LYS", - "atm1": " CG ", - "atm2": "2HG ", - "x0": 1.08954, - "K": 117.8526 - }, - { - "res": "LYS", - "atm1": " CG ", - "atm2": " CD ", - "x0": 1.52135, - "K": 84.8615 - }, - { - "res": "LYS", - "atm1": " N ", - "atm2": " CA ", - "x0": 1.458, - "K": 361.504 - }, - { - "res": "LYS", - "atm1": " N ", - "atm2": " H ", - "x0": 1.01, - "K": 458.304 - }, - { - "res": "LYS", - "atm1": " NZ ", - "atm2": "1HZ ", - "x0": 1.01022, - "K": 153.7042 - }, - { - "res": "LYS", - "atm1": " NZ ", - "atm2": "2HZ ", - "x0": 1.01051, - "K": 153.7042 - }, - { - "res": "LYS", - "atm1": " NZ ", - "atm2": "3HZ ", - "x0": 1.00991, - "K": 153.7042 - }, - { - "res": "MET", - "atm1": " C ", - "atm2": " O ", - "x0": 1.23102, - "K": 563.084 - }, - { - "res": "MET", - "atm1": " CA ", - "atm2": " C ", - "x0": 1.52326, - "K": 301.675 - }, - { - "res": "MET", - "atm1": " CA ", - "atm2": " CB ", - "x0": 1.52739, - "K": 260.414 - }, - { - "res": "MET", - "atm1": " CA ", - "atm2": " HA ", - "x0": 1.08982, - "K": 306.735 - }, - { - "res": "MET", - "atm1": " CB ", - "atm2": "1HB ", - "x0": 1.08903, - "K": 117.8526 - }, - { - "res": "MET", - "atm1": " CB ", - "atm2": "2HB ", - "x0": 1.09075, - "K": 117.8526 - }, - { - "res": "MET", - "atm1": " CB ", - "atm2": " CG ", - "x0": 1.5222, - "K": 84.8615 - }, - { - "res": "MET", - "atm1": " CE ", - "atm2": "1HE ", - "x0": 1.09027, - "K": 122.8108 - }, - { - "res": "MET", - "atm1": " CE ", - "atm2": "2HE ", - "x0": 1.09085, - "K": 122.8108 - }, - { - "res": "MET", - "atm1": " CE ", - "atm2": "3HE ", - "x0": 1.09057, - "K": 122.8108 - }, - { - "res": "MET", - "atm1": " CG ", - "atm2": "1HG ", - "x0": 1.08947, - "K": 117.8526 - }, - { - "res": "MET", - "atm1": " CG ", - "atm2": "2HG ", - "x0": 1.08981, - "K": 117.8526 - }, - { - "res": "MET", - "atm1": " CG ", - "atm2": " SD ", - "x0": 1.80384, - "K": 75.5172 - }, - { - "res": "MET", - "atm1": " N ", - "atm2": " CA ", - "x0": 1.458, - "K": 361.504 - }, - { - "res": "MET", - "atm1": " N ", - "atm2": " H ", - "x0": 1.01, - "K": 458.304 - }, - { - "res": "MET", - "atm1": " SD ", - "atm2": " CE ", - "x0": 1.79039, - "K": 91.536 - }, - { - "res": "PHE", - "atm1": " C ", - "atm2": " O ", - "x0": 1.23102, - "K": 563.084 - }, - { - "res": "PHE", - "atm1": " CA ", - "atm2": " C ", - "x0": 1.52326, - "K": 301.675 - }, - { - "res": "PHE", - "atm1": " CA ", - "atm2": " CB ", - "x0": 1.5298, - "K": 260.414 - }, - { - "res": "PHE", - "atm1": " CA ", - "atm2": " HA ", - "x0": 1.0909, - "K": 306.735 - }, - { - "res": "PHE", - "atm1": " CB ", - "atm2": "1HB ", - "x0": 1.08922, - "K": 117.8526 - }, - { - "res": "PHE", - "atm1": " CB ", - "atm2": "2HB ", - "x0": 1.08919, - "K": 117.8526 - }, - { - "res": "PHE", - "atm1": " CB ", - "atm2": " CG ", - "x0": 1.50223, - "K": 87.722 - }, - { - "res": "PHE", - "atm1": " CD1", - "atm2": " CE1", - "x0": 1.38218, - "K": 116.327 - }, - { - "res": "PHE", - "atm1": " CD1", - "atm2": "1HD ", - "x0": 1.09033, - "K": 129.676 - }, - { - "res": "PHE", - "atm1": " CD2", - "atm2": " CE2", - "x0": 1.38128, - "K": 116.327 - }, - { - "res": "PHE", - "atm1": " CD2", - "atm2": "2HD ", - "x0": 1.0906, - "K": 129.676 - }, - { - "res": "PHE", - "atm1": " CE1", - "atm2": " CZ ", - "x0": 1.37858, - "K": 116.327 - }, - { - "res": "PHE", - "atm1": " CE1", - "atm2": "1HE ", - "x0": 1.08972, - "K": 129.676 - }, - { - "res": "PHE", - "atm1": " CE2", - "atm2": " CZ ", - "x0": 1.38049, - "K": 116.327 - }, - { - "res": "PHE", - "atm1": " CE2", - "atm2": "2HE ", - "x0": 1.08983, - "K": 129.676 - }, - { - "res": "PHE", - "atm1": " CG ", - "atm2": " CD1", - "x0": 1.38696, - "K": 116.327 - }, - { - "res": "PHE", - "atm1": " CG ", - "atm2": " CD2", - "x0": 1.3866, - "K": 116.327 - }, - { - "res": "PHE", - "atm1": " CZ ", - "atm2": " HZ ", - "x0": 1.08908, - "K": 129.676 - }, - { - "res": "PHE", - "atm1": " N ", - "atm2": " CA ", - "x0": 1.458, - "K": 361.504 - }, - { - "res": "PHE", - "atm1": " N ", - "atm2": " H ", - "x0": 1.01, - "K": 458.304 - }, - { - "res": "PRO", - "atm1": " C ", - "atm2": " O ", - "x0": 1.23101, - "K": 563.084 - }, - { - "res": "PRO", - "atm1": " CA ", - "atm2": " C ", - "x0": 1.52326, - "K": 301.675 - }, - { - "res": "PRO", - "atm1": " CA ", - "atm2": " CB ", - "x0": 1.532, - "K": 260.414 - }, - { - "res": "PRO", - "atm1": " CA ", - "atm2": " HA ", - "x0": 1.1, - "K": 306.735 - }, - { - "res": "PRO", - "atm1": " CB ", - "atm2": "1HB ", - "x0": 1.1, - "K": 117.8526 - }, - { - "res": "PRO", - "atm1": " CB ", - "atm2": "2HB ", - "x0": 1.1, - "K": 117.8526 - }, - { - "res": "PRO", - "atm1": " CB ", - "atm2": " CG ", - "x0": 1.4906, - "K": 84.8615 - }, - { - "res": "PRO", - "atm1": " CD ", - "atm2": "1HD ", - "x0": 1.1, - "K": 117.8526 - }, - { - "res": "PRO", - "atm1": " CD ", - "atm2": "2HD ", - "x0": 1.1, - "K": 117.8526 - }, - { - "res": "PRO", - "atm1": " CG ", - "atm2": "1HG ", - "x0": 1.1, - "K": 117.8526 - }, - { - "res": "PRO", - "atm1": " CG ", - "atm2": "2HG ", - "x0": 1.1, - "K": 117.8526 - }, - { - "res": "PRO", - "atm1": " CG ", - "atm2": " CD ", - "x0": 1.5055, - "K": 84.8615 - }, - { - "res": "PRO", - "atm1": " N ", - "atm2": " CA ", - "x0": 1.458, - "K": 361.504 - }, - { - "res": "PRO", - "atm1": " N ", - "atm2": " CD ", - "x0": 1.473, - "K": 122.048 - }, - { - "res": "SER", - "atm1": " C ", - "atm2": " O ", - "x0": 1.23102, - "K": 563.084 - }, - { - "res": "SER", - "atm1": " CA ", - "atm2": " C ", - "x0": 1.52326, - "K": 301.675 - }, - { - "res": "SER", - "atm1": " CA ", - "atm2": " CB ", - "x0": 1.51626, - "K": 260.414 - }, - { - "res": "SER", - "atm1": " CA ", - "atm2": " HA ", - "x0": 1.09092, - "K": 306.735 - }, - { - "res": "SER", - "atm1": " CB ", - "atm2": "1HB ", - "x0": 1.08969, - "K": 117.8526 - }, - { - "res": "SER", - "atm1": " CB ", - "atm2": "2HB ", - "x0": 1.08918, - "K": 117.8526 - }, - { - "res": "SER", - "atm1": " CB ", - "atm2": " OG ", - "x0": 1.40119, - "K": 163.2392 - }, - { - "res": "SER", - "atm1": " N ", - "atm2": " CA ", - "x0": 1.458, - "K": 361.504 - }, - { - "res": "SER", - "atm1": " N ", - "atm2": " H ", - "x0": 1.01, - "K": 458.304 - }, - { - "res": "SER", - "atm1": " OG ", - "atm2": " HG ", - "x0": 0.960175, - "K": 207.863 - }, - { - "res": "THR", - "atm1": " C ", - "atm2": " O ", - "x0": 1.23102, - "K": 563.084 - }, - { - "res": "THR", - "atm1": " CA ", - "atm2": " C ", - "x0": 1.52326, - "K": 301.675 - }, - { - "res": "THR", - "atm1": " CA ", - "atm2": " CB ", - "x0": 1.53992, - "K": 260.414 - }, - { - "res": "THR", - "atm1": " CA ", - "atm2": " HA ", - "x0": 1.09026, - "K": 306.735 - }, - { - "res": "THR", - "atm1": " CB ", - "atm2": " CG2", - "x0": 1.52099, - "K": 84.8615 - }, - { - "res": "THR", - "atm1": " CB ", - "atm2": " HB ", - "x0": 1.08982, - "K": 117.8526 - }, - { - "res": "THR", - "atm1": " CB ", - "atm2": " OG1", - "x0": 1.43355, - "K": 163.2392 - }, - { - "res": "THR", - "atm1": " CG2", - "atm2": "1HG2", - "x0": 1.08983, - "K": 122.8108 - }, - { - "res": "THR", - "atm1": " CG2", - "atm2": "2HG2", - "x0": 1.08986, - "K": 122.8108 - }, - { - "res": "THR", - "atm1": " CG2", - "atm2": "3HG2", - "x0": 1.08924, - "K": 122.8108 - }, - { - "res": "THR", - "atm1": " N ", - "atm2": " CA ", - "x0": 1.458, - "K": 361.504 - }, - { - "res": "THR", - "atm1": " N ", - "atm2": " H ", - "x0": 1.01, - "K": 458.304 - }, - { - "res": "THR", - "atm1": " OG1", - "atm2": " HG1", - "x0": 0.960297, - "K": 207.863 - }, - { - "res": "TRP", - "atm1": " C ", - "atm2": " O ", - "x0": 1.23101, - "K": 563.084 - }, - { - "res": "TRP", - "atm1": " CA ", - "atm2": " C ", - "x0": 1.52326, - "K": 301.675 - }, - { - "res": "TRP", - "atm1": " CA ", - "atm2": " CB ", - "x0": 1.52982, - "K": 260.414 - }, - { - "res": "TRP", - "atm1": " CA ", - "atm2": " HA ", - "x0": 1.08988, - "K": 306.735 - }, - { - "res": "TRP", - "atm1": " CB ", - "atm2": "1HB ", - "x0": 1.09017, - "K": 117.8526 - }, - { - "res": "TRP", - "atm1": " CB ", - "atm2": "2HB ", - "x0": 1.08979, - "K": 117.8526 - }, - { - "res": "TRP", - "atm1": " CB ", - "atm2": " CG ", - "x0": 1.49875, - "K": 87.722 - }, - { - "res": "TRP", - "atm1": " CD1", - "atm2": "1HD ", - "x0": 1.08852, - "K": 129.676 - }, - { - "res": "TRP", - "atm1": " CD1", - "atm2": " NE1", - "x0": 1.37294, - "K": 102.978 - }, - { - "res": "TRP", - "atm1": " CD2", - "atm2": " CE2", - "x0": 1.39734, - "K": 137.304 - }, - { - "res": "TRP", - "atm1": " CD2", - "atm2": " CE3", - "x0": 1.40038, - "K": 116.327 - }, - { - "res": "TRP", - "atm1": " CE2", - "atm2": " CZ2", - "x0": 1.38595, - "K": 116.327 - }, - { - "res": "TRP", - "atm1": " CE3", - "atm2": " CZ3", - "x0": 1.38985, - "K": 116.327 - }, - { - "res": "TRP", - "atm1": " CE3", - "atm2": " HE3", - "x0": 1.08954, - "K": 129.676 - }, - { - "res": "TRP", - "atm1": " CG ", - "atm2": " CD1", - "x0": 1.36272, - "K": 133.49 - }, - { - "res": "TRP", - "atm1": " CG ", - "atm2": " CD2", - "x0": 1.44821, - "K": 133.49 - }, - { - "res": "TRP", - "atm1": " CH2", - "atm2": " HH2", - "x0": 1.09029, - "K": 129.676 - }, - { - "res": "TRP", - "atm1": " CZ2", - "atm2": " CH2", - "x0": 1.39502, - "K": 116.327 - }, - { - "res": "TRP", - "atm1": " CZ2", - "atm2": " HZ2", - "x0": 1.09024, - "K": 129.676 - }, - { - "res": "TRP", - "atm1": " CZ3", - "atm2": " CH2", - "x0": 1.37211, - "K": 116.327 - }, - { - "res": "TRP", - "atm1": " CZ3", - "atm2": " HZ3", - "x0": 1.09029, - "K": 129.676 - }, - { - "res": "TRP", - "atm1": " N ", - "atm2": " CA ", - "x0": 1.458, - "K": 361.504 - }, - { - "res": "TRP", - "atm1": " N ", - "atm2": " H ", - "x0": 1.01, - "K": 458.304 - }, - { - "res": "TRP", - "atm1": " NE1", - "atm2": " CE2", - "x0": 1.37213, - "K": 102.978 - }, - { - "res": "TRP", - "atm1": " NE1", - "atm2": "1HE ", - "x0": 1.00989, - "K": 177.351 - }, - { - "res": "TYR", - "atm1": " C ", - "atm2": " O ", - "x0": 1.23101, - "K": 563.084 - }, - { - "res": "TYR", - "atm1": " CA ", - "atm2": " C ", - "x0": 1.52326, - "K": 301.675 - }, - { - "res": "TYR", - "atm1": " CA ", - "atm2": " CB ", - "x0": 1.53035, - "K": 260.414 - }, - { - "res": "TYR", - "atm1": " CA ", - "atm2": " HA ", - "x0": 1.09032, - "K": 306.735 - }, - { - "res": "TYR", - "atm1": " CB ", - "atm2": "1HB ", - "x0": 1.09097, - "K": 117.8526 - }, - { - "res": "TYR", - "atm1": " CB ", - "atm2": "2HB ", - "x0": 1.08957, - "K": 117.8526 - }, - { - "res": "TYR", - "atm1": " CB ", - "atm2": " CG ", - "x0": 1.51266, - "K": 87.722 - }, - { - "res": "TYR", - "atm1": " CD1", - "atm2": " CE1", - "x0": 1.38155, - "K": 116.327 - }, - { - "res": "TYR", - "atm1": " CD1", - "atm2": "1HD ", - "x0": 1.09023, - "K": 129.676 - }, - { - "res": "TYR", - "atm1": " CD2", - "atm2": " CE2", - "x0": 1.38136, - "K": 116.327 - }, - { - "res": "TYR", - "atm1": " CD2", - "atm2": "2HD ", - "x0": 1.09002, - "K": 129.676 - }, - { - "res": "TYR", - "atm1": " CE1", - "atm2": " CZ ", - "x0": 1.39041, - "K": 116.327 - }, - { - "res": "TYR", - "atm1": " CE1", - "atm2": "1HE ", - "x0": 1.08966, - "K": 129.676 - }, - { - "res": "TYR", - "atm1": " CE2", - "atm2": " CZ ", - "x0": 1.37999, - "K": 116.327 - }, - { - "res": "TYR", - "atm1": " CE2", - "atm2": "2HE ", - "x0": 1.09029, - "K": 129.676 - }, - { - "res": "TYR", - "atm1": " CG ", - "atm2": " CD1", - "x0": 1.38719, - "K": 116.327 - }, - { - "res": "TYR", - "atm1": " CG ", - "atm2": " CD2", - "x0": 1.38689, - "K": 116.327 - }, - { - "res": "TYR", - "atm1": " CZ ", - "atm2": " OH ", - "x0": 1.37596, - "K": 127.50202 - }, - { - "res": "TYR", - "atm1": " N ", - "atm2": " CA ", - "x0": 1.458, - "K": 361.504 - }, - { - "res": "TYR", - "atm1": " N ", - "atm2": " H ", - "x0": 1.01, - "K": 458.304 - }, - { - "res": "TYR", - "atm1": " OH ", - "atm2": " HH ", - "x0": 0.960239, - "K": 207.863 - }, - { - "res": "VAL", - "atm1": " C ", - "atm2": " O ", - "x0": 1.23102, - "K": 563.084 - }, - { - "res": "VAL", - "atm1": " CA ", - "atm2": " C ", - "x0": 1.52326, - "K": 301.675 - }, - { - "res": "VAL", - "atm1": " CA ", - "atm2": " CB ", - "x0": 1.54025, - "K": 260.414 - }, - { - "res": "VAL", - "atm1": " CA ", - "atm2": " HA ", - "x0": 1.09065, - "K": 306.735 - }, - { - "res": "VAL", - "atm1": " CB ", - "atm2": " CG1", - "x0": 1.52142, - "K": 84.8615 - }, - { - "res": "VAL", - "atm1": " CB ", - "atm2": " CG2", - "x0": 1.52106, - "K": 84.8615 - }, - { - "res": "VAL", - "atm1": " CB ", - "atm2": " HB ", - "x0": 1.09011, - "K": 117.8526 - }, - { - "res": "VAL", - "atm1": " CG1", - "atm2": "1HG1", - "x0": 1.09035, - "K": 122.8108 - }, - { - "res": "VAL", - "atm1": " CG1", - "atm2": "2HG1", - "x0": 1.08982, - "K": 122.8108 - }, - { - "res": "VAL", - "atm1": " CG1", - "atm2": "3HG1", - "x0": 1.08981, - "K": 122.8108 - }, - { - "res": "VAL", - "atm1": " CG2", - "atm2": "1HG2", - "x0": 1.08961, - "K": 122.8108 - }, - { - "res": "VAL", - "atm1": " CG2", - "atm2": "2HG2", - "x0": 1.09018, - "K": 122.8108 - }, - { - "res": "VAL", - "atm1": " CG2", - "atm2": "3HG2", - "x0": 1.09017, - "K": 122.8108 - }, - { - "res": "VAL", - "atm1": " N ", - "atm2": " CA ", - "x0": 1.458, - "K": 361.504 - }, - { - "res": "VAL", - "atm1": " N ", - "atm2": " H ", - "x0": 1.01, - "K": 458.304 - } - ], - "angles": [ - { - "res": "CYS", - "atm1": " CB ", - "atm2": " SG ", - "atm3": " SG ", - "x0": 0, - "K": 0 - }, - { - "res": "ALA", - "atm1": "1HB ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.91013, - "K": 60.7618 - }, - { - "res": "ALA", - "atm1": "1HB ", - "atm2": " CB ", - "atm3": "3HB ", - "x0": 1.91013, - "K": 60.7618 - }, - { - "res": "ALA", - "atm1": "2HB ", - "atm2": " CB ", - "atm3": "3HB ", - "x0": 1.91013, - "K": 60.7618 - }, - { - "res": "ALA", - "atm1": " C ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.92575, - "K": 103.9584 - }, - { - "res": "ALA", - "atm1": " C ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.88223, - "K": 85.58 - }, - { - "res": "ALA", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "ALA", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "ALA", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "3HB ", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "ALA", - "atm1": " CB ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.90057, - "K": 59.906 - }, - { - "res": "ALA", - "atm1": " N ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.9264, - "K": 144.466 - }, - { - "res": "ALA", - "atm1": " N ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.8867, - "K": 82.1568 - }, - { - "res": "ARG", - "atm1": "1HB ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.87575, - "K": 60.7618 - }, - { - "res": "ARG", - "atm1": "1HD ", - "atm2": " CD ", - "atm3": "2HD ", - "x0": 1.91013, - "K": 60.7618 - }, - { - "res": "ARG", - "atm1": "1HG ", - "atm2": " CG ", - "atm3": "2HG ", - "x0": 1.91013, - "K": 60.7618 - }, - { - "res": "ARG", - "atm1": "1HH1", - "atm2": " NH1", - "atm3": "2HH1", - "x0": 2.0944, - "K": 42.79 - }, - { - "res": "ARG", - "atm1": "1HH2", - "atm2": " NH2", - "atm3": "2HH2", - "x0": 2.0944, - "K": 42.79 - }, - { - "res": "ARG", - "atm1": " C ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.90853, - "K": 103.9584 - }, - { - "res": "ARG", - "atm1": " C ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89435, - "K": 85.58 - }, - { - "res": "ARG", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "ARG", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "ARG", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG ", - "x0": 2.00495, - "K": 121.74144 - }, - { - "res": "ARG", - "atm1": " CB ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89517, - "K": 59.906 - }, - { - "res": "ARG", - "atm1": " CB ", - "atm2": " CG ", - "atm3": "1HG ", - "x0": 1.91114, - "K": 45.3574 - }, - { - "res": "ARG", - "atm1": " CB ", - "atm2": " CG ", - "atm3": "2HG ", - "x0": 1.91114, - "K": 45.3574 - }, - { - "res": "ARG", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD ", - "x0": 1.94953, - "K": 108.09921 - }, - { - "res": "ARG", - "atm1": " CD ", - "atm2": " CG ", - "atm3": "1HG ", - "x0": 1.89072, - "K": 45.3574 - }, - { - "res": "ARG", - "atm1": " CD ", - "atm2": " CG ", - "atm3": "2HG ", - "x0": 1.89072, - "K": 45.3574 - }, - { - "res": "ARG", - "atm1": " CD ", - "atm2": " NE ", - "atm3": " CZ ", - "x0": 2.17468, - "K": 115.41698 - }, - { - "res": "ARG", - "atm1": " CD ", - "atm2": " NE ", - "atm3": " HE ", - "x0": 2.05425, - "K": 69.14864 - }, - { - "res": "ARG", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.87822, - "K": 45.3574 - }, - { - "res": "ARG", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.87822, - "K": 45.3574 - }, - { - "res": "ARG", - "atm1": " CG ", - "atm2": " CD ", - "atm3": "1HD ", - "x0": 1.91114, - "K": 45.3574 - }, - { - "res": "ARG", - "atm1": " CG ", - "atm2": " CD ", - "atm3": "2HD ", - "x0": 1.91114, - "K": 45.3574 - }, - { - "res": "ARG", - "atm1": " CG ", - "atm2": " CD ", - "atm3": " NE ", - "x0": 1.95302, - "K": 125.42102 - }, - { - "res": "ARG", - "atm1": " CZ ", - "atm2": " NE ", - "atm3": " HE ", - "x0": 2.05425, - "K": 83.8684 - }, - { - "res": "ARG", - "atm1": " CZ ", - "atm2": " NH1", - "atm3": "1HH1", - "x0": 2.0944, - "K": 83.8684 - }, - { - "res": "ARG", - "atm1": " CZ ", - "atm2": " NH1", - "atm3": "2HH1", - "x0": 2.0944, - "K": 83.8684 - }, - { - "res": "ARG", - "atm1": " CZ ", - "atm2": " NH2", - "atm3": "1HH2", - "x0": 2.0944, - "K": 83.8684 - }, - { - "res": "ARG", - "atm1": " CZ ", - "atm2": " NH2", - "atm3": "2HH2", - "x0": 2.0944, - "K": 83.8684 - }, - { - "res": "ARG", - "atm1": " N ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.93033, - "K": 144.466 - }, - { - "res": "ARG", - "atm1": " N ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89368, - "K": 82.1568 - }, - { - "res": "ARG", - "atm1": " NE ", - "atm2": " CD ", - "atm3": "1HD ", - "x0": 1.88894, - "K": 88.1474 - }, - { - "res": "ARG", - "atm1": " NE ", - "atm2": " CD ", - "atm3": "2HD ", - "x0": 1.88894, - "K": 88.1474 - }, - { - "res": "ARG", - "atm1": " NE ", - "atm2": " CZ ", - "atm3": " NH1", - "x0": 2.0944, - "K": 96.3352 - }, - { - "res": "ARG", - "atm1": " NE ", - "atm2": " CZ ", - "atm3": " NH2", - "x0": 2.0944, - "K": 96.3352 - }, - { - "res": "ARG", - "atm1": " NH1", - "atm2": " CZ ", - "atm3": " NH2", - "x0": 2.0944, - "K": 96.3352 - }, - { - "res": "ASN", - "atm1": "1HB ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.87575, - "K": 60.7618 - }, - { - "res": "ASN", - "atm1": "1HD2", - "atm2": " ND2", - "atm3": "2HD2", - "x0": 2.0944, - "K": 39.3668 - }, - { - "res": "ASN", - "atm1": " C ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.9304, - "K": 103.9584 - }, - { - "res": "ASN", - "atm1": " C ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.87155, - "K": 85.58 - }, - { - "res": "ASN", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "ASN", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "ASN", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG ", - "x0": 1.96524, - "K": 108.4928 - }, - { - "res": "ASN", - "atm1": " CB ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89517, - "K": 59.906 - }, - { - "res": "ASN", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " ND2", - "x0": 2.03331, - "K": 92.63 - }, - { - "res": "ASN", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " OD1", - "x0": 2.10836, - "K": 27.789 - }, - { - "res": "ASN", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.8992, - "K": 56.4828 - }, - { - "res": "ASN", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.8992, - "K": 56.4828 - }, - { - "res": "ASN", - "atm1": " CG ", - "atm2": " ND2", - "atm3": "1HD2", - "x0": 2.0944, - "K": 85.58 - }, - { - "res": "ASN", - "atm1": " CG ", - "atm2": " ND2", - "atm3": "2HD2", - "x0": 2.0944, - "K": 85.58 - }, - { - "res": "ASN", - "atm1": " N ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.93033, - "K": 144.466 - }, - { - "res": "ASN", - "atm1": " N ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89368, - "K": 82.1568 - }, - { - "res": "ASN", - "atm1": " OD1", - "atm2": " CG ", - "atm3": " ND2", - "x0": 2.14152, - "K": 138.945 - }, - { - "res": "ASP", - "atm1": "1HB ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.87575, - "K": 60.7618 - }, - { - "res": "ASP", - "atm1": " C ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.91479, - "K": 103.9584 - }, - { - "res": "ASP", - "atm1": " C ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.88867, - "K": 85.58 - }, - { - "res": "ASP", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "ASP", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "ASP", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG ", - "x0": 1.97048, - "K": 108.4928 - }, - { - "res": "ASP", - "atm1": " CB ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89586, - "K": 59.906 - }, - { - "res": "ASP", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " OD1", - "x0": 2.06637, - "K": 74.104 - }, - { - "res": "ASP", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " OD2", - "x0": 2.06591, - "K": 74.104 - }, - { - "res": "ASP", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.89646, - "K": 56.4828 - }, - { - "res": "ASP", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.89646, - "K": 56.4828 - }, - { - "res": "ASP", - "atm1": " N ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.92892, - "K": 144.466 - }, - { - "res": "ASP", - "atm1": " N ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89368, - "K": 82.1568 - }, - { - "res": "ASP", - "atm1": " OD1", - "atm2": " CG ", - "atm3": " OD2", - "x0": 2.1509, - "K": 185.26 - }, - { - "res": "CYS", - "atm1": "1HB ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.87575, - "K": 60.7618 - }, - { - "res": "CYS", - "atm1": " C ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.90716, - "K": 103.9584 - }, - { - "res": "CYS", - "atm1": " C ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89577, - "K": 85.58 - }, - { - "res": "CYS", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "CYS", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "CYS", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " SG ", - "x0": 1.99142, - "K": 121.0112 - }, - { - "res": "CYS", - "atm1": " CB ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89517, - "K": 59.906 - }, - { - "res": "CYS", - "atm1": " CB ", - "atm2": " SG ", - "atm3": " HG ", - "x0": 1.67531, - "K": 66.41008 - }, - { - "res": "CYS", - "atm1": " N ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.93033, - "K": 144.466 - }, - { - "res": "CYS", - "atm1": " N ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89368, - "K": 82.1568 - }, - { - "res": "CYS", - "atm1": " SG ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.88541, - "K": 78.90476 - }, - { - "res": "CYS", - "atm1": " SG ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.88541, - "K": 78.90476 - }, - { - "res": "GLN", - "atm1": "1HB ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.87575, - "K": 60.7618 - }, - { - "res": "GLN", - "atm1": "1HE2", - "atm2": " NE2", - "atm3": "2HE2", - "x0": 2.09341, - "K": 39.3668 - }, - { - "res": "GLN", - "atm1": "1HG ", - "atm2": " CG ", - "atm3": "2HG ", - "x0": 1.9205, - "K": 60.7618 - }, - { - "res": "GLN", - "atm1": " C ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.91915, - "K": 103.9584 - }, - { - "res": "GLN", - "atm1": " C ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.8844, - "K": 85.58 - }, - { - "res": "GLN", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "GLN", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "GLN", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG ", - "x0": 1.99298, - "K": 121.74144 - }, - { - "res": "GLN", - "atm1": " CB ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89608, - "K": 59.906 - }, - { - "res": "GLN", - "atm1": " CB ", - "atm2": " CG ", - "atm3": "1HG ", - "x0": 1.90066, - "K": 45.3574 - }, - { - "res": "GLN", - "atm1": " CB ", - "atm2": " CG ", - "atm3": "2HG ", - "x0": 1.90066, - "K": 45.3574 - }, - { - "res": "GLN", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD ", - "x0": 1.96227, - "K": 96.3352 - }, - { - "res": "GLN", - "atm1": " CD ", - "atm2": " CG ", - "atm3": "1HG ", - "x0": 1.88985, - "K": 56.4828 - }, - { - "res": "GLN", - "atm1": " CD ", - "atm2": " CG ", - "atm3": "2HG ", - "x0": 1.88985, - "K": 56.4828 - }, - { - "res": "GLN", - "atm1": " CD ", - "atm2": " NE2", - "atm3": "1HE2", - "x0": 2.09481, - "K": 85.58 - }, - { - "res": "GLN", - "atm1": " CD ", - "atm2": " NE2", - "atm3": "2HE2", - "x0": 2.09497, - "K": 85.58 - }, - { - "res": "GLN", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.88458, - "K": 45.3574 - }, - { - "res": "GLN", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.88458, - "K": 45.3574 - }, - { - "res": "GLN", - "atm1": " CG ", - "atm2": " CD ", - "atm3": " NE2", - "x0": 2.03179, - "K": 92.63 - }, - { - "res": "GLN", - "atm1": " CG ", - "atm2": " CD ", - "atm3": " OE1", - "x0": 2.11099, - "K": 27.789 - }, - { - "res": "GLN", - "atm1": " N ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.92846, - "K": 144.466 - }, - { - "res": "GLN", - "atm1": " N ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89368, - "K": 82.1568 - }, - { - "res": "GLN", - "atm1": " OE1", - "atm2": " CD ", - "atm3": " NE2", - "x0": 2.1404, - "K": 138.945 - }, - { - "res": "GLU", - "atm1": "1HB ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.87575, - "K": 60.7618 - }, - { - "res": "GLU", - "atm1": "1HG ", - "atm2": " CG ", - "atm3": "2HG ", - "x0": 1.88428, - "K": 60.7618 - }, - { - "res": "GLU", - "atm1": " C ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.91729, - "K": 103.9584 - }, - { - "res": "GLU", - "atm1": " C ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.88725, - "K": 85.58 - }, - { - "res": "GLU", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "GLU", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "GLU", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG ", - "x0": 1.9966, - "K": 121.74144 - }, - { - "res": "GLU", - "atm1": " CB ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89686, - "K": 59.906 - }, - { - "res": "GLU", - "atm1": " CB ", - "atm2": " CG ", - "atm3": "1HG ", - "x0": 1.90066, - "K": 45.3574 - }, - { - "res": "GLU", - "atm1": " CB ", - "atm2": " CG ", - "atm3": "2HG ", - "x0": 1.90066, - "K": 45.3574 - }, - { - "res": "GLU", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD ", - "x0": 1.97048, - "K": 96.3352 - }, - { - "res": "GLU", - "atm1": " CD ", - "atm2": " CG ", - "atm3": "1HG ", - "x0": 1.89951, - "K": 56.4828 - }, - { - "res": "GLU", - "atm1": " CD ", - "atm2": " CG ", - "atm3": "2HG ", - "x0": 1.90638, - "K": 56.4828 - }, - { - "res": "GLU", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.88266, - "K": 45.3574 - }, - { - "res": "GLU", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.88266, - "K": 45.3574 - }, - { - "res": "GLU", - "atm1": " CG ", - "atm2": " CD ", - "atm3": " OE1", - "x0": 2.06728, - "K": 74.104 - }, - { - "res": "GLU", - "atm1": " CG ", - "atm2": " CD ", - "atm3": " OE2", - "x0": 2.06581, - "K": 74.104 - }, - { - "res": "GLU", - "atm1": " N ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.92684, - "K": 144.466 - }, - { - "res": "GLU", - "atm1": " N ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89368, - "K": 82.1568 - }, - { - "res": "GLU", - "atm1": " OE1", - "atm2": " CD ", - "atm3": " OE2", - "x0": 2.15009, - "K": 185.26 - }, - { - "res": "GLY", - "atm1": "1HA ", - "atm2": " CA ", - "atm3": "2HA ", - "x0": 1.86998, - "K": 72 - }, - { - "res": "GLY", - "atm1": " C ", - "atm2": " CA ", - "atm3": "1HA ", - "x0": 1.91471, - "K": 100 - }, - { - "res": "GLY", - "atm1": " C ", - "atm2": " CA ", - "atm3": "2HA ", - "x0": 1.91471, - "K": 100 - }, - { - "res": "GLY", - "atm1": " N ", - "atm2": " CA ", - "atm3": "1HA ", - "x0": 1.91114, - "K": 96 - }, - { - "res": "GLY", - "atm1": " N ", - "atm2": " CA ", - "atm3": "2HA ", - "x0": 1.91114, - "K": 96 - }, - { - "res": "HIS", - "atm1": "1HB ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.87575, - "K": 60.7618 - }, - { - "res": "HIS", - "atm1": " C ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.91454, - "K": 103.9584 - }, - { - "res": "HIS", - "atm1": " C ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.87829, - "K": 85.58 - }, - { - "res": "HIS", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "HIS", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "HIS", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG ", - "x0": 1.98427, - "K": 121.74144 - }, - { - "res": "HIS", - "atm1": " CB ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.88557, - "K": 78.39128 - }, - { - "res": "HIS", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD2", - "x0": 2.29001, - "K": 84.84908 - }, - { - "res": "HIS", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " ND1", - "x0": 2.1403, - "K": 240.838 - }, - { - "res": "HIS", - "atm1": " CD2", - "atm2": " NE2", - "atm3": " CE1", - "x0": 1.9025, - "K": 55.578 - }, - { - "res": "HIS", - "atm1": " CD2", - "atm2": " NE2", - "atm3": "2HE ", - "x0": 2.18506, - "K": 42.79 - }, - { - "res": "HIS", - "atm1": " CE1", - "atm2": " NE2", - "atm3": "2HE ", - "x0": 2.19562, - "K": 51.348 - }, - { - "res": "HIS", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.88919, - "K": 57.218788 - }, - { - "res": "HIS", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.88919, - "K": 57.218788 - }, - { - "res": "HIS", - "atm1": " CG ", - "atm2": " CD2", - "atm3": "2HD ", - "x0": 2.20671, - "K": 42.79 - }, - { - "res": "HIS", - "atm1": " CG ", - "atm2": " CD2", - "atm3": " NE2", - "x0": 1.87038, - "K": 240.838 - }, - { - "res": "HIS", - "atm1": " CG ", - "atm2": " ND1", - "atm3": " CE1", - "x0": 1.90739, - "K": 240.838 - }, - { - "res": "HIS", - "atm1": " N ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.93189, - "K": 144.466 - }, - { - "res": "HIS", - "atm1": " N ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.91114, - "K": 82.1568 - }, - { - "res": "HIS", - "atm1": " ND1", - "atm2": " CE1", - "atm3": "1HE ", - "x0": 2.19562, - "K": 42.79 - }, - { - "res": "HIS", - "atm1": " ND1", - "atm2": " CE1", - "atm3": " NE2", - "x0": 1.89164, - "K": 240.838 - }, - { - "res": "HIS", - "atm1": " ND1", - "atm2": " CG ", - "atm3": " CD2", - "x0": 1.85288, - "K": 240.838 - }, - { - "res": "HIS", - "atm1": " NE2", - "atm2": " CD2", - "atm3": "2HD ", - "x0": 2.2061, - "K": 42.79 - }, - { - "res": "HIS", - "atm1": " NE2", - "atm2": " CE1", - "atm3": "1HE ", - "x0": 2.19593, - "K": 42.79 - }, - { - "res": "HIS_D", - "atm1": "1HB ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.87575, - "K": 60.7618 - }, - { - "res": "HIS_D", - "atm1": " C ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.91454, - "K": 103.9584 - }, - { - "res": "HIS_D", - "atm1": " C ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.8783, - "K": 85.58 - }, - { - "res": "HIS_D", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "HIS_D", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "HIS_D", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG ", - "x0": 1.98427, - "K": 121.74144 - }, - { - "res": "HIS_D", - "atm1": " CB ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.88557, - "K": 78.39128 - }, - { - "res": "HIS_D", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD2", - "x0": 2.29002, - "K": 84.84908 - }, - { - "res": "HIS_D", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " ND1", - "x0": 2.14029, - "K": 240.838 - }, - { - "res": "HIS_D", - "atm1": " CD2", - "atm2": " NE2", - "atm3": " CE1", - "x0": 1.90251, - "K": 55.578 - }, - { - "res": "HIS_D", - "atm1": " CE1", - "atm2": " ND1", - "atm3": "1HD ", - "x0": 2.16847, - "K": 42.79 - }, - { - "res": "HIS_D", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.88919, - "K": 57.218788 - }, - { - "res": "HIS_D", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.88919, - "K": 57.218788 - }, - { - "res": "HIS_D", - "atm1": " CG ", - "atm2": " CD2", - "atm3": "2HD ", - "x0": 2.31752, - "K": 42.79 - }, - { - "res": "HIS_D", - "atm1": " CG ", - "atm2": " CD2", - "atm3": " NE2", - "x0": 1.87037, - "K": 240.838 - }, - { - "res": "HIS_D", - "atm1": " CG ", - "atm2": " ND1", - "atm3": " CE1", - "x0": 1.90738, - "K": 240.838 - }, - { - "res": "HIS_D", - "atm1": " CG ", - "atm2": " ND1", - "atm3": "1HD ", - "x0": 2.20336, - "K": 42.79 - }, - { - "res": "HIS_D", - "atm1": " N ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.93189, - "K": 144.466 - }, - { - "res": "HIS_D", - "atm1": " N ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.91114, - "K": 82.1568 - }, - { - "res": "HIS_D", - "atm1": " ND1", - "atm2": " CE1", - "atm3": "1HE ", - "x0": 2.10501, - "K": 42.79 - }, - { - "res": "HIS_D", - "atm1": " ND1", - "atm2": " CE1", - "atm3": " NE2", - "x0": 1.89163, - "K": 240.838 - }, - { - "res": "HIS_D", - "atm1": " ND1", - "atm2": " CG ", - "atm3": " CD2", - "x0": 1.85288, - "K": 240.838 - }, - { - "res": "HIS_D", - "atm1": " NE2", - "atm2": " CD2", - "atm3": "2HD ", - "x0": 2.09529, - "K": 42.79 - }, - { - "res": "HIS_D", - "atm1": " NE2", - "atm2": " CE1", - "atm3": "1HE ", - "x0": 2.28654, - "K": 42.79 - }, - { - "res": "ILE", - "atm1": "1HD1", - "atm2": " CD1", - "atm3": "2HD1", - "x0": 1.90998, - "K": 60.7618 - }, - { - "res": "ILE", - "atm1": "1HD1", - "atm2": " CD1", - "atm3": "3HD1", - "x0": 1.90964, - "K": 60.7618 - }, - { - "res": "ILE", - "atm1": "1HG1", - "atm2": " CG1", - "atm3": "2HG1", - "x0": 1.8743, - "K": 60.7618 - }, - { - "res": "ILE", - "atm1": "1HG2", - "atm2": " CG2", - "atm3": "2HG2", - "x0": 1.91024, - "K": 60.7618 - }, - { - "res": "ILE", - "atm1": "1HG2", - "atm2": " CG2", - "atm3": "3HG2", - "x0": 1.91002, - "K": 60.7618 - }, - { - "res": "ILE", - "atm1": "2HD1", - "atm2": " CD1", - "atm3": "3HD1", - "x0": 1.91077, - "K": 60.7618 - }, - { - "res": "ILE", - "atm1": "2HG2", - "atm2": " CG2", - "atm3": "3HG2", - "x0": 1.91013, - "K": 60.7618 - }, - { - "res": "ILE", - "atm1": " C ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.91116, - "K": 103.9584 - }, - { - "res": "ILE", - "atm1": " C ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.88867, - "K": 85.58 - }, - { - "res": "ILE", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG1", - "x0": 1.92703, - "K": 111.30944 - }, - { - "res": "ILE", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG2", - "x0": 1.92804, - "K": 111.30944 - }, - { - "res": "ILE", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " HB ", - "x0": 1.91114, - "K": 69 - }, - { - "res": "ILE", - "atm1": " CB ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89264, - "K": 59.906 - }, - { - "res": "ILE", - "atm1": " CB ", - "atm2": " CG1", - "atm3": "1HG1", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "ILE", - "atm1": " CB ", - "atm2": " CG1", - "atm3": "2HG1", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "ILE", - "atm1": " CB ", - "atm2": " CG1", - "atm3": " CD1", - "x0": 1.98662, - "K": 108.09921 - }, - { - "res": "ILE", - "atm1": " CB ", - "atm2": " CG2", - "atm3": "1HG2", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "ILE", - "atm1": " CB ", - "atm2": " CG2", - "atm3": "2HG2", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "ILE", - "atm1": " CB ", - "atm2": " CG2", - "atm3": "3HG2", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "ILE", - "atm1": " CD1", - "atm2": " CG1", - "atm3": "1HG1", - "x0": 1.8895, - "K": 59.22136 - }, - { - "res": "ILE", - "atm1": " CD1", - "atm2": " CG1", - "atm3": "2HG1", - "x0": 1.88775, - "K": 59.22136 - }, - { - "res": "ILE", - "atm1": " CG1", - "atm2": " CB ", - "atm3": " CG2", - "x0": 1.9306, - "K": 98.83621 - }, - { - "res": "ILE", - "atm1": " CG1", - "atm2": " CB ", - "atm3": " HB ", - "x0": 1.8641, - "K": 59.0502 - }, - { - "res": "ILE", - "atm1": " CG1", - "atm2": " CD1", - "atm3": "1HD1", - "x0": 1.91114, - "K": 59.22136 - }, - { - "res": "ILE", - "atm1": " CG1", - "atm2": " CD1", - "atm3": "2HD1", - "x0": 1.91114, - "K": 59.22136 - }, - { - "res": "ILE", - "atm1": " CG1", - "atm2": " CD1", - "atm3": "3HD1", - "x0": 1.91114, - "K": 59.22136 - }, - { - "res": "ILE", - "atm1": " CG2", - "atm2": " CB ", - "atm3": " HB ", - "x0": 1.90163, - "K": 59.0502 - }, - { - "res": "ILE", - "atm1": " N ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.93557, - "K": 144.466 - }, - { - "res": "ILE", - "atm1": " N ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89368, - "K": 82.1568 - }, - { - "res": "LEU", - "atm1": "1HB ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.89065, - "K": 60.7618 - }, - { - "res": "LEU", - "atm1": "1HD1", - "atm2": " CD1", - "atm3": "2HD1", - "x0": 1.90928, - "K": 60.7618 - }, - { - "res": "LEU", - "atm1": "1HD1", - "atm2": " CD1", - "atm3": "3HD1", - "x0": 1.91003, - "K": 60.7618 - }, - { - "res": "LEU", - "atm1": "1HD2", - "atm2": " CD2", - "atm3": "2HD2", - "x0": 1.91013, - "K": 60.7618 - }, - { - "res": "LEU", - "atm1": "1HD2", - "atm2": " CD2", - "atm3": "3HD2", - "x0": 1.91013, - "K": 60.7618 - }, - { - "res": "LEU", - "atm1": "2HD1", - "atm2": " CD1", - "atm3": "3HD1", - "x0": 1.91109, - "K": 60.7618 - }, - { - "res": "LEU", - "atm1": "2HD2", - "atm2": " CD2", - "atm3": "3HD2", - "x0": 1.91013, - "K": 60.7618 - }, - { - "res": "LEU", - "atm1": " C ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.91492, - "K": 103.9584 - }, - { - "res": "LEU", - "atm1": " C ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89151, - "K": 85.58 - }, - { - "res": "LEU", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.89543, - "K": 66.86 - }, - { - "res": "LEU", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.89543, - "K": 66.86 - }, - { - "res": "LEU", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG ", - "x0": 2.01935, - "K": 121.74144 - }, - { - "res": "LEU", - "atm1": " CB ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89842, - "K": 59.906 - }, - { - "res": "LEU", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD1", - "x0": 1.91114, - "K": 98.83621 - }, - { - "res": "LEU", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD2", - "x0": 1.91114, - "K": 98.83621 - }, - { - "res": "LEU", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " HG ", - "x0": 1.91114, - "K": 59.0502 - }, - { - "res": "LEU", - "atm1": " CD1", - "atm2": " CG ", - "atm3": " CD2", - "x0": 1.93845, - "K": 98.83621 - }, - { - "res": "LEU", - "atm1": " CD1", - "atm2": " CG ", - "atm3": " HG ", - "x0": 1.88153, - "K": 59.0502 - }, - { - "res": "LEU", - "atm1": " CD2", - "atm2": " CG ", - "atm3": " HG ", - "x0": 1.91013, - "K": 59.0502 - }, - { - "res": "LEU", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.87977, - "K": 57.218788 - }, - { - "res": "LEU", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.87977, - "K": 57.218788 - }, - { - "res": "LEU", - "atm1": " CG ", - "atm2": " CD1", - "atm3": "1HD1", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "LEU", - "atm1": " CG ", - "atm2": " CD1", - "atm3": "2HD1", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "LEU", - "atm1": " CG ", - "atm2": " CD1", - "atm3": "3HD1", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "LEU", - "atm1": " CG ", - "atm2": " CD2", - "atm3": "1HD2", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "LEU", - "atm1": " CG ", - "atm2": " CD2", - "atm3": "2HD2", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "LEU", - "atm1": " CG ", - "atm2": " CD2", - "atm3": "3HD2", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "LEU", - "atm1": " N ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.92361, - "K": 144.466 - }, - { - "res": "LEU", - "atm1": " N ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89368, - "K": 82.1568 - }, - { - "res": "LYS", - "atm1": "1HB ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.87575, - "K": 60.7618 - }, - { - "res": "LYS", - "atm1": "1HD ", - "atm2": " CD ", - "atm3": "2HD ", - "x0": 1.89356, - "K": 60.7618 - }, - { - "res": "LYS", - "atm1": "1HE ", - "atm2": " CE ", - "atm3": "2HE ", - "x0": 1.88918, - "K": 60.7618 - }, - { - "res": "LYS", - "atm1": "1HG ", - "atm2": " CG ", - "atm3": "2HG ", - "x0": 1.89349, - "K": 60.7618 - }, - { - "res": "LYS", - "atm1": "1HZ ", - "atm2": " NZ ", - "atm3": "2HZ ", - "x0": 1.91021, - "K": 75.3104 - }, - { - "res": "LYS", - "atm1": "1HZ ", - "atm2": " NZ ", - "atm3": "3HZ ", - "x0": 1.91002, - "K": 75.3104 - }, - { - "res": "LYS", - "atm1": "2HZ ", - "atm2": " NZ ", - "atm3": "3HZ ", - "x0": 1.91016, - "K": 75.3104 - }, - { - "res": "LYS", - "atm1": " C ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.92181, - "K": 103.9584 - }, - { - "res": "LYS", - "atm1": " C ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.88155, - "K": 85.58 - }, - { - "res": "LYS", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "LYS", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "LYS", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG ", - "x0": 1.99661, - "K": 121.74144 - }, - { - "res": "LYS", - "atm1": " CB ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89601, - "K": 59.906 - }, - { - "res": "LYS", - "atm1": " CB ", - "atm2": " CG ", - "atm3": "1HG ", - "x0": 1.91114, - "K": 45.3574 - }, - { - "res": "LYS", - "atm1": " CB ", - "atm2": " CG ", - "atm3": "2HG ", - "x0": 1.91114, - "K": 45.3574 - }, - { - "res": "LYS", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD ", - "x0": 1.94255, - "K": 108.09921 - }, - { - "res": "LYS", - "atm1": " CD ", - "atm2": " CE ", - "atm3": "1HE ", - "x0": 1.91114, - "K": 45.3574 - }, - { - "res": "LYS", - "atm1": " CD ", - "atm2": " CE ", - "atm3": "2HE ", - "x0": 1.91114, - "K": 45.3574 - }, - { - "res": "LYS", - "atm1": " CD ", - "atm2": " CE ", - "atm3": " NZ ", - "x0": 1.95413, - "K": 125.42102 - }, - { - "res": "LYS", - "atm1": " CD ", - "atm2": " CG ", - "atm3": "1HG ", - "x0": 1.90229, - "K": 45.3574 - }, - { - "res": "LYS", - "atm1": " CD ", - "atm2": " CG ", - "atm3": "2HG ", - "x0": 1.90254, - "K": 45.3574 - }, - { - "res": "LYS", - "atm1": " CE ", - "atm2": " CD ", - "atm3": "1HD ", - "x0": 1.90247, - "K": 45.3574 - }, - { - "res": "LYS", - "atm1": " CE ", - "atm2": " CD ", - "atm3": "2HD ", - "x0": 1.90109, - "K": 45.3574 - }, - { - "res": "LYS", - "atm1": " CE ", - "atm2": " NZ ", - "atm3": "1HZ ", - "x0": 1.91114, - "K": 51.348 - }, - { - "res": "LYS", - "atm1": " CE ", - "atm2": " NZ ", - "atm3": "2HZ ", - "x0": 1.91114, - "K": 51.348 - }, - { - "res": "LYS", - "atm1": " CE ", - "atm2": " NZ ", - "atm3": "3HZ ", - "x0": 1.91114, - "K": 51.348 - }, - { - "res": "LYS", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.88265, - "K": 45.3574 - }, - { - "res": "LYS", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.88265, - "K": 45.3574 - }, - { - "res": "LYS", - "atm1": " CG ", - "atm2": " CD ", - "atm3": "1HD ", - "x0": 1.91114, - "K": 45.3574 - }, - { - "res": "LYS", - "atm1": " CG ", - "atm2": " CD ", - "atm3": "2HD ", - "x0": 1.91114, - "K": 45.3574 - }, - { - "res": "LYS", - "atm1": " CG ", - "atm2": " CD ", - "atm3": " CE ", - "x0": 1.94373, - "K": 108.09921 - }, - { - "res": "LYS", - "atm1": " N ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.92861, - "K": 144.466 - }, - { - "res": "LYS", - "atm1": " N ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89368, - "K": 82.1568 - }, - { - "res": "LYS", - "atm1": " NZ ", - "atm2": " CE ", - "atm3": "1HE ", - "x0": 1.89906, - "K": 77.022 - }, - { - "res": "LYS", - "atm1": " NZ ", - "atm2": " CE ", - "atm3": "2HE ", - "x0": 1.89802, - "K": 77.022 - }, - { - "res": "MET", - "atm1": "1HB ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.87575, - "K": 60.7618 - }, - { - "res": "MET", - "atm1": "1HE ", - "atm2": " CE ", - "atm3": "2HE ", - "x0": 1.91081, - "K": 60.7618 - }, - { - "res": "MET", - "atm1": "1HE ", - "atm2": " CE ", - "atm3": "3HE ", - "x0": 1.90982, - "K": 60.7618 - }, - { - "res": "MET", - "atm1": "1HG ", - "atm2": " CG ", - "atm3": "2HG ", - "x0": 1.88852, - "K": 60.7618 - }, - { - "res": "MET", - "atm1": "2HE ", - "atm2": " CE ", - "atm3": "3HE ", - "x0": 1.90976, - "K": 60.7618 - }, - { - "res": "MET", - "atm1": " C ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.92732, - "K": 103.9584 - }, - { - "res": "MET", - "atm1": " C ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.8787, - "K": 85.58 - }, - { - "res": "MET", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "MET", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "MET", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG ", - "x0": 1.99733, - "K": 121.74144 - }, - { - "res": "MET", - "atm1": " CB ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89845, - "K": 59.906 - }, - { - "res": "MET", - "atm1": " CB ", - "atm2": " CG ", - "atm3": "1HG ", - "x0": 1.91114, - "K": 45.3574 - }, - { - "res": "MET", - "atm1": " CB ", - "atm2": " CG ", - "atm3": "2HG ", - "x0": 1.91114, - "K": 45.3574 - }, - { - "res": "MET", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " SD ", - "x0": 1.96649, - "K": 107.4508 - }, - { - "res": "MET", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.88227, - "K": 45.3574 - }, - { - "res": "MET", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.88227, - "K": 45.3574 - }, - { - "res": "MET", - "atm1": " CG ", - "atm2": " SD ", - "atm3": " CE ", - "x0": 1.76091, - "K": 62.9884 - }, - { - "res": "MET", - "atm1": " N ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.92354, - "K": 144.466 - }, - { - "res": "MET", - "atm1": " N ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89368, - "K": 82.1568 - }, - { - "res": "MET", - "atm1": " SD ", - "atm2": " CE ", - "atm3": "1HE ", - "x0": 1.91114, - "K": 78.90476 - }, - { - "res": "MET", - "atm1": " SD ", - "atm2": " CE ", - "atm3": "2HE ", - "x0": 1.91114, - "K": 78.90476 - }, - { - "res": "MET", - "atm1": " SD ", - "atm2": " CE ", - "atm3": "3HE ", - "x0": 1.91114, - "K": 78.90476 - }, - { - "res": "MET", - "atm1": " SD ", - "atm2": " CG ", - "atm3": "1HG ", - "x0": 1.89281, - "K": 78.90476 - }, - { - "res": "MET", - "atm1": " SD ", - "atm2": " CG ", - "atm3": "2HG ", - "x0": 1.89208, - "K": 78.90476 - }, - { - "res": "PHE", - "atm1": "1HB ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.87575, - "K": 60.7618 - }, - { - "res": "PHE", - "atm1": " C ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.91412, - "K": 103.9584 - }, - { - "res": "PHE", - "atm1": " C ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89009, - "K": 85.58 - }, - { - "res": "PHE", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "PHE", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "PHE", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG ", - "x0": 1.98604, - "K": 108.07552 - }, - { - "res": "PHE", - "atm1": " CB ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89648, - "K": 59.906 - }, - { - "res": "PHE", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD1", - "x0": 2.10636, - "K": 84.84908 - }, - { - "res": "PHE", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD2", - "x0": 2.107, - "K": 84.84908 - }, - { - "res": "PHE", - "atm1": " CD1", - "atm2": " CE1", - "atm3": " CZ ", - "x0": 2.09493, - "K": 74.104 - }, - { - "res": "PHE", - "atm1": " CD1", - "atm2": " CE1", - "atm3": "1HE ", - "x0": 2.09383, - "K": 51.348 - }, - { - "res": "PHE", - "atm1": " CD1", - "atm2": " CG ", - "atm3": " CD2", - "x0": 2.06983, - "K": 74.104 - }, - { - "res": "PHE", - "atm1": " CD2", - "atm2": " CE2", - "atm3": " CZ ", - "x0": 2.09386, - "K": 74.104 - }, - { - "res": "PHE", - "atm1": " CD2", - "atm2": " CE2", - "atm3": "2HE ", - "x0": 2.0956, - "K": 51.348 - }, - { - "res": "PHE", - "atm1": " CE1", - "atm2": " CD1", - "atm3": "1HD ", - "x0": 2.09495, - "K": 51.348 - }, - { - "res": "PHE", - "atm1": " CE1", - "atm2": " CZ ", - "atm3": " CE2", - "x0": 2.09214, - "K": 74.104 - }, - { - "res": "PHE", - "atm1": " CE1", - "atm2": " CZ ", - "atm3": " HZ ", - "x0": 2.09582, - "K": 51.348 - }, - { - "res": "PHE", - "atm1": " CE2", - "atm2": " CD2", - "atm3": "2HD ", - "x0": 2.09398, - "K": 51.348 - }, - { - "res": "PHE", - "atm1": " CE2", - "atm2": " CZ ", - "atm3": " HZ ", - "x0": 2.09522, - "K": 51.348 - }, - { - "res": "PHE", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.88826, - "K": 84.38188 - }, - { - "res": "PHE", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.88826, - "K": 84.38188 - }, - { - "res": "PHE", - "atm1": " CG ", - "atm2": " CD1", - "atm3": " CE1", - "x0": 2.10722, - "K": 74.104 - }, - { - "res": "PHE", - "atm1": " CG ", - "atm2": " CD1", - "atm3": "1HD ", - "x0": 2.08102, - "K": 51.348 - }, - { - "res": "PHE", - "atm1": " CG ", - "atm2": " CD2", - "atm3": " CE2", - "x0": 2.10839, - "K": 74.104 - }, - { - "res": "PHE", - "atm1": " CG ", - "atm2": " CD2", - "atm3": "2HD ", - "x0": 2.08081, - "K": 51.348 - }, - { - "res": "PHE", - "atm1": " CZ ", - "atm2": " CE1", - "atm3": "1HE ", - "x0": 2.09443, - "K": 51.348 - }, - { - "res": "PHE", - "atm1": " CZ ", - "atm2": " CE2", - "atm3": "2HE ", - "x0": 2.09372, - "K": 51.348 - }, - { - "res": "PHE", - "atm1": " N ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.92762, - "K": 144.466 - }, - { - "res": "PHE", - "atm1": " N ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89368, - "K": 82.1568 - }, - { - "res": "PRO", - "atm1": "1HB ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.86724, - "K": 60.7618 - }, - { - "res": "PRO", - "atm1": "1HD ", - "atm2": " CD ", - "atm3": "2HD ", - "x0": 1.93141, - "K": 60.7618 - }, - { - "res": "PRO", - "atm1": "1HG ", - "atm2": " CG ", - "atm3": "2HG ", - "x0": 1.8638, - "K": 60.7618 - }, - { - "res": "PRO", - "atm1": " C ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.94845, - "K": 103.9584 - }, - { - "res": "PRO", - "atm1": " C ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.93002, - "K": 85.58 - }, - { - "res": "PRO", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.91986, - "K": 66.86 - }, - { - "res": "PRO", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.91986, - "K": 66.86 - }, - { - "res": "PRO", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG ", - "x0": 1.81863, - "K": 146.048 - }, - { - "res": "PRO", - "atm1": " CB ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.93694, - "K": 59.906 - }, - { - "res": "PRO", - "atm1": " CB ", - "atm2": " CG ", - "atm3": "1HG ", - "x0": 1.92335, - "K": 45.3574 - }, - { - "res": "PRO", - "atm1": " CB ", - "atm2": " CG ", - "atm3": "2HG ", - "x0": 1.92335, - "K": 45.3574 - }, - { - "res": "PRO", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD ", - "x0": 1.82212, - "K": 129.682 - }, - { - "res": "PRO", - "atm1": " CD ", - "atm2": " CG ", - "atm3": "1HG ", - "x0": 1.96602, - "K": 45.3574 - }, - { - "res": "PRO", - "atm1": " CD ", - "atm2": " CG ", - "atm3": "2HG ", - "x0": 1.96602, - "K": 45.3574 - }, - { - "res": "PRO", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.96927, - "K": 45.3574 - }, - { - "res": "PRO", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.96927, - "K": 45.3574 - }, - { - "res": "PRO", - "atm1": " CG ", - "atm2": " CD ", - "atm3": "1HD ", - "x0": 1.91114, - "K": 45.3574 - }, - { - "res": "PRO", - "atm1": " CG ", - "atm2": " CD ", - "atm3": "2HD ", - "x0": 1.91114, - "K": 45.3574 - }, - { - "res": "PRO", - "atm1": " N ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.79769, - "K": 144.466 - }, - { - "res": "PRO", - "atm1": " N ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.9059, - "K": 82.1568 - }, - { - "res": "PRO", - "atm1": " N ", - "atm2": " CD ", - "atm3": "1HD ", - "x0": 1.9024, - "K": 60.7618 - }, - { - "res": "PRO", - "atm1": " N ", - "atm2": " CD ", - "atm3": "2HD ", - "x0": 1.9024, - "K": 60.7618 - }, - { - "res": "PRO", - "atm1": " N ", - "atm2": " CD ", - "atm3": " CG ", - "x0": 1.8012, - "K": 125.184 - }, - { - "res": "SER", - "atm1": "1HB ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.87575, - "K": 71 - }, - { - "res": "SER", - "atm1": " C ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.91703, - "K": 103.9584 - }, - { - "res": "SER", - "atm1": " C ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89009, - "K": 85.58 - }, - { - "res": "SER", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "SER", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "SER", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " OG ", - "x0": 1.93732, - "K": 157.94048 - }, - { - "res": "SER", - "atm1": " CB ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89908, - "K": 59.906 - }, - { - "res": "SER", - "atm1": " CB ", - "atm2": " OG ", - "atm3": " HG ", - "x0": 1.85005, - "K": 115 - }, - { - "res": "SER", - "atm1": " N ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.92225, - "K": 144.466 - }, - { - "res": "SER", - "atm1": " N ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89368, - "K": 82.1568 - }, - { - "res": "SER", - "atm1": " OG ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.91373, - "K": 91.8 - }, - { - "res": "SER", - "atm1": " OG ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.91373, - "K": 91.8 - }, - { - "res": "THR", - "atm1": "1HG2", - "atm2": " CG2", - "atm3": "2HG2", - "x0": 1.91059, - "K": 60.7618 - }, - { - "res": "THR", - "atm1": "1HG2", - "atm2": " CG2", - "atm3": "3HG2", - "x0": 1.90984, - "K": 60.7618 - }, - { - "res": "THR", - "atm1": "2HG2", - "atm2": " CG2", - "atm3": "3HG2", - "x0": 1.90996, - "K": 60.7618 - }, - { - "res": "THR", - "atm1": " C ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.91334, - "K": 103.9584 - }, - { - "res": "THR", - "atm1": " C ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.8844, - "K": 85.58 - }, - { - "res": "THR", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG2", - "x0": 1.92913, - "K": 111.30944 - }, - { - "res": "THR", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " HB ", - "x0": 1.91114, - "K": 69 - }, - { - "res": "THR", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " OG1", - "x0": 1.91255, - "K": 157.94048 - }, - { - "res": "THR", - "atm1": " CB ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89094, - "K": 59.906 - }, - { - "res": "THR", - "atm1": " CB ", - "atm2": " CG2", - "atm3": "1HG2", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "THR", - "atm1": " CB ", - "atm2": " CG2", - "atm3": "2HG2", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "THR", - "atm1": " CB ", - "atm2": " CG2", - "atm3": "3HG2", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "THR", - "atm1": " CB ", - "atm2": " OG1", - "atm3": " HG1", - "x0": 1.90986, - "K": 98.417 - }, - { - "res": "THR", - "atm1": " CG2", - "atm2": " CB ", - "atm3": " HB ", - "x0": 1.90521, - "K": 59.0502 - }, - { - "res": "THR", - "atm1": " N ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.93906, - "K": 144.466 - }, - { - "res": "THR", - "atm1": " N ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89368, - "K": 82.1568 - }, - { - "res": "THR", - "atm1": " OG1", - "atm2": " CB ", - "atm3": " CG2", - "x0": 1.90802, - "K": 140.24182 - }, - { - "res": "THR", - "atm1": " OG1", - "atm2": " CB ", - "atm3": " HB ", - "x0": 1.89749, - "K": 78.56244 - }, - { - "res": "TRP", - "atm1": "1HB ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.87575, - "K": 60.7618 - }, - { - "res": "TRP", - "atm1": " C ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.91317, - "K": 103.9584 - }, - { - "res": "TRP", - "atm1": " C ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89151, - "K": 85.58 - }, - { - "res": "TRP", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "TRP", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "TRP", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG ", - "x0": 1.98154, - "K": 121.74144 - }, - { - "res": "TRP", - "atm1": " CB ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89686, - "K": 59.906 - }, - { - "res": "TRP", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD1", - "x0": 2.21197, - "K": 84.84908 - }, - { - "res": "TRP", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD2", - "x0": 2.21981, - "K": 84.84908 - }, - { - "res": "TRP", - "atm1": " CD1", - "atm2": " CG ", - "atm3": " CD2", - "x0": 1.8514, - "K": 222.312 - }, - { - "res": "TRP", - "atm1": " CD1", - "atm2": " NE1", - "atm3": " CE2", - "x0": 1.90066, - "K": 203.786 - }, - { - "res": "TRP", - "atm1": " CD1", - "atm2": " NE1", - "atm3": "1HE ", - "x0": 2.19273, - "K": 47.9248 - }, - { - "res": "TRP", - "atm1": " CD2", - "atm2": " CE2", - "atm3": " CZ2", - "x0": 2.12856, - "K": 111.156 - }, - { - "res": "TRP", - "atm1": " CD2", - "atm2": " CE2", - "atm3": " NE1", - "x0": 1.88329, - "K": 188.276 - }, - { - "res": "TRP", - "atm1": " CD2", - "atm2": " CE3", - "atm3": " CZ3", - "x0": 2.07225, - "K": 111.156 - }, - { - "res": "TRP", - "atm1": " CD2", - "atm2": " CE3", - "atm3": " HE3", - "x0": 2.10432, - "K": 51.348 - }, - { - "res": "TRP", - "atm1": " CE2", - "atm2": " CD2", - "atm3": " CE3", - "x0": 2.08238, - "K": 111.156 - }, - { - "res": "TRP", - "atm1": " CE2", - "atm2": " CZ2", - "atm3": " CH2", - "x0": 2.05076, - "K": 111.156 - }, - { - "res": "TRP", - "atm1": " CE2", - "atm2": " CZ2", - "atm3": " HZ2", - "x0": 2.13105, - "K": 51.348 - }, - { - "res": "TRP", - "atm1": " CE2", - "atm2": " NE1", - "atm3": "1HE ", - "x0": 2.18979, - "K": 47.9248 - }, - { - "res": "TRP", - "atm1": " CE3", - "atm2": " CZ3", - "atm3": " CH2", - "x0": 2.11185, - "K": 74.104 - }, - { - "res": "TRP", - "atm1": " CE3", - "atm2": " CZ3", - "atm3": " HZ3", - "x0": 2.06422, - "K": 51.348 - }, - { - "res": "TRP", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.89063, - "K": 57.218788 - }, - { - "res": "TRP", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.89063, - "K": 57.218788 - }, - { - "res": "TRP", - "atm1": " CG ", - "atm2": " CD1", - "atm3": "1HD ", - "x0": 2.17992, - "K": 54.7712 - }, - { - "res": "TRP", - "atm1": " CG ", - "atm2": " CD1", - "atm3": " NE1", - "x0": 1.92259, - "K": 222.312 - }, - { - "res": "TRP", - "atm1": " CG ", - "atm2": " CD2", - "atm3": " CE2", - "x0": 1.86683, - "K": 203.786 - }, - { - "res": "TRP", - "atm1": " CG ", - "atm2": " CD2", - "atm3": " CE3", - "x0": 2.33398, - "K": 296.416 - }, - { - "res": "TRP", - "atm1": " CH2", - "atm2": " CZ2", - "atm3": " HZ2", - "x0": 2.10137, - "K": 51.348 - }, - { - "res": "TRP", - "atm1": " CH2", - "atm2": " CZ3", - "atm3": " HZ3", - "x0": 2.10711, - "K": 51.348 - }, - { - "res": "TRP", - "atm1": " CZ2", - "atm2": " CH2", - "atm3": " CZ3", - "x0": 2.12058, - "K": 74.104 - }, - { - "res": "TRP", - "atm1": " CZ2", - "atm2": " CH2", - "atm3": " HH2", - "x0": 2.08408, - "K": 51.348 - }, - { - "res": "TRP", - "atm1": " CZ3", - "atm2": " CE3", - "atm3": " HE3", - "x0": 2.10661, - "K": 51.348 - }, - { - "res": "TRP", - "atm1": " CZ3", - "atm2": " CH2", - "atm3": " HH2", - "x0": 2.07853, - "K": 51.348 - }, - { - "res": "TRP", - "atm1": " N ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.92684, - "K": 144.466 - }, - { - "res": "TRP", - "atm1": " N ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89368, - "K": 82.1568 - }, - { - "res": "TRP", - "atm1": " NE1", - "atm2": " CD1", - "atm3": "1HD ", - "x0": 2.18068, - "K": 54.7712 - }, - { - "res": "TRP", - "atm1": " NE1", - "atm2": " CE2", - "atm3": " CZ2", - "x0": 2.27134, - "K": 296.416 - }, - { - "res": "TYR", - "atm1": "1HB ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.87575, - "K": 60.7618 - }, - { - "res": "TYR", - "atm1": " C ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.92479, - "K": 103.9584 - }, - { - "res": "TYR", - "atm1": " C ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.8787, - "K": 85.58 - }, - { - "res": "TYR", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "TYR", - "atm1": " CA ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.91114, - "K": 66.86 - }, - { - "res": "TYR", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG ", - "x0": 1.98618, - "K": 108.07552 - }, - { - "res": "TYR", - "atm1": " CB ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89623, - "K": 59.906 - }, - { - "res": "TYR", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD1", - "x0": 2.0944, - "K": 84.84908 - }, - { - "res": "TYR", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD2", - "x0": 2.0944, - "K": 84.84908 - }, - { - "res": "TYR", - "atm1": " CD1", - "atm2": " CE1", - "atm3": " CZ ", - "x0": 2.08979, - "K": 74.104 - }, - { - "res": "TYR", - "atm1": " CD1", - "atm2": " CE1", - "atm3": "1HE ", - "x0": 2.0944, - "K": 51.348 - }, - { - "res": "TYR", - "atm1": " CD1", - "atm2": " CG ", - "atm3": " CD2", - "x0": 2.0944, - "K": 74.104 - }, - { - "res": "TYR", - "atm1": " CD2", - "atm2": " CE2", - "atm3": " CZ ", - "x0": 2.0944, - "K": 74.104 - }, - { - "res": "TYR", - "atm1": " CD2", - "atm2": " CE2", - "atm3": "2HE ", - "x0": 2.0944, - "K": 51.348 - }, - { - "res": "TYR", - "atm1": " CE1", - "atm2": " CD1", - "atm3": "1HD ", - "x0": 2.0944, - "K": 51.348 - }, - { - "res": "TYR", - "atm1": " CE1", - "atm2": " CZ ", - "atm3": " CE2", - "x0": 2.099, - "K": 74.104 - }, - { - "res": "TYR", - "atm1": " CE1", - "atm2": " CZ ", - "atm3": " OH ", - "x0": 2.08979, - "K": 83.73752 - }, - { - "res": "TYR", - "atm1": " CE2", - "atm2": " CD2", - "atm3": "2HD ", - "x0": 2.09439, - "K": 51.348 - }, - { - "res": "TYR", - "atm1": " CE2", - "atm2": " CZ ", - "atm3": " OH ", - "x0": 2.0944, - "K": 83.73752 - }, - { - "res": "TYR", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "1HB ", - "x0": 1.88818, - "K": 84.38188 - }, - { - "res": "TYR", - "atm1": " CG ", - "atm2": " CB ", - "atm3": "2HB ", - "x0": 1.88818, - "K": 84.38188 - }, - { - "res": "TYR", - "atm1": " CG ", - "atm2": " CD1", - "atm3": " CE1", - "x0": 2.0944, - "K": 74.104 - }, - { - "res": "TYR", - "atm1": " CG ", - "atm2": " CD1", - "atm3": "1HD ", - "x0": 2.0944, - "K": 51.348 - }, - { - "res": "TYR", - "atm1": " CG ", - "atm2": " CD2", - "atm3": " CE2", - "x0": 2.0944, - "K": 74.104 - }, - { - "res": "TYR", - "atm1": " CG ", - "atm2": " CD2", - "atm3": "2HD ", - "x0": 2.0944, - "K": 51.348 - }, - { - "res": "TYR", - "atm1": " CZ ", - "atm2": " CE1", - "atm3": "1HE ", - "x0": 2.099, - "K": 51.348 - }, - { - "res": "TYR", - "atm1": " CZ ", - "atm2": " CE2", - "atm3": "2HE ", - "x0": 2.09439, - "K": 51.348 - }, - { - "res": "TYR", - "atm1": " CZ ", - "atm2": " OH ", - "atm3": " HH ", - "x0": 1.90939, - "K": 111.254 - }, - { - "res": "TYR", - "atm1": " N ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.92815, - "K": 144.466 - }, - { - "res": "TYR", - "atm1": " N ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89368, - "K": 82.1568 - }, - { - "res": "VAL", - "atm1": "1HG1", - "atm2": " CG1", - "atm3": "2HG1", - "x0": 1.90965, - "K": 60.7618 - }, - { - "res": "VAL", - "atm1": "1HG1", - "atm2": " CG1", - "atm3": "3HG1", - "x0": 1.90936, - "K": 60.7618 - }, - { - "res": "VAL", - "atm1": "1HG2", - "atm2": " CG2", - "atm3": "2HG2", - "x0": 1.91055, - "K": 60.7618 - }, - { - "res": "VAL", - "atm1": "1HG2", - "atm2": " CG2", - "atm3": "3HG2", - "x0": 1.90981, - "K": 60.7618 - }, - { - "res": "VAL", - "atm1": "2HG1", - "atm2": " CG1", - "atm3": "3HG1", - "x0": 1.91138, - "K": 60.7618 - }, - { - "res": "VAL", - "atm1": "2HG2", - "atm2": " CG2", - "atm3": "3HG2", - "x0": 1.91003, - "K": 60.7618 - }, - { - "res": "VAL", - "atm1": " C ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.90861, - "K": 103.9584 - }, - { - "res": "VAL", - "atm1": " C ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89719, - "K": 85.58 - }, - { - "res": "VAL", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG1", - "x0": 1.92842, - "K": 111.30944 - }, - { - "res": "VAL", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG2", - "x0": 1.91812, - "K": 111.30944 - }, - { - "res": "VAL", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " HB ", - "x0": 1.90167, - "K": 69 - }, - { - "res": "VAL", - "atm1": " CB ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.8977, - "K": 59.906 - }, - { - "res": "VAL", - "atm1": " CB ", - "atm2": " CG1", - "atm3": "1HG1", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "VAL", - "atm1": " CB ", - "atm2": " CG1", - "atm3": "2HG1", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "VAL", - "atm1": " CB ", - "atm2": " CG1", - "atm3": "3HG1", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "VAL", - "atm1": " CB ", - "atm2": " CG2", - "atm3": "1HG2", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "VAL", - "atm1": " CB ", - "atm2": " CG2", - "atm3": "2HG2", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "VAL", - "atm1": " CB ", - "atm2": " CG2", - "atm3": "3HG2", - "x0": 1.91114, - "K": 57.218788 - }, - { - "res": "VAL", - "atm1": " CG1", - "atm2": " CB ", - "atm3": " CG2", - "x0": 1.93332, - "K": 98.83621 - }, - { - "res": "VAL", - "atm1": " CG1", - "atm2": " CB ", - "atm3": " HB ", - "x0": 1.87003, - "K": 59.0502 - }, - { - "res": "VAL", - "atm1": " CG2", - "atm2": " CB ", - "atm3": " HB ", - "x0": 1.91141, - "K": 59.0502 - }, - { - "res": "VAL", - "atm1": " N ", - "atm2": " CA ", - "atm3": " CB ", - "x0": 1.9251, - "K": 144.466 - }, - { - "res": "VAL", - "atm1": " N ", - "atm2": " CA ", - "atm3": " HA ", - "x0": 1.89368, - "K": 82.1568 - } - ], - "torsions": [ - { - "res": "ALA", - "atm1": " C ", - "atm2": " CA ", - "atm3": " CB ", - "atm4": "1HB ", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "ALA", - "atm1": " C ", - "atm2": " CA ", - "atm3": " CB ", - "atm4": "2HB ", - "x0": 3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "ALA", - "atm1": " C ", - "atm2": " CA ", - "atm3": " CB ", - "atm4": "3HB ", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "ALA", - "atm1": " HA ", - "atm2": " CA ", - "atm3": " CB ", - "atm4": "1HB ", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "ALA", - "atm1": " HA ", - "atm2": " CA ", - "atm3": " CB ", - "atm4": "2HB ", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "ALA", - "atm1": " HA ", - "atm2": " CA ", - "atm3": " CB ", - "atm4": "3HB ", - "x0": 3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "ALA", - "atm1": " N ", - "atm2": " CA ", - "atm3": " CB ", - "atm4": "1HB ", - "x0": -3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "ALA", - "atm1": " N ", - "atm2": " CA ", - "atm3": " CB ", - "atm4": "2HB ", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "ALA", - "atm1": " N ", - "atm2": " CA ", - "atm3": " CB ", - "atm4": "3HB ", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "ARG", - "atm1": " CD ", - "atm2": " NE ", - "atm3": " CZ ", - "atm4": " NH1", - "x0": 0, - "K": 43.352, - "period": 2 - }, - { - "res": "ARG", - "atm1": " CD ", - "atm2": " NE ", - "atm3": " CZ ", - "atm4": " NH2", - "x0": 3.141592654, - "K": 43.352, - "period": 2 - }, - { - "res": "ARG", - "atm1": " HE ", - "atm2": " NE ", - "atm3": " CZ ", - "atm4": " NH1", - "x0": 3.141592654, - "K": 34.552, - "period": 2 - }, - { - "res": "ARG", - "atm1": " HE ", - "atm2": " NE ", - "atm3": " CZ ", - "atm4": " NH2", - "x0": 0, - "K": 34.552, - "period": 2 - }, - { - "res": "ARG", - "atm1": " NE ", - "atm2": " CZ ", - "atm3": " NH1", - "atm4": "1HH1", - "x0": 0, - "K": 34.552, - "period": 2 - }, - { - "res": "ARG", - "atm1": " NE ", - "atm2": " CZ ", - "atm3": " NH1", - "atm4": "2HH1", - "x0": -3.141592654, - "K": 34.552, - "period": 2 - }, - { - "res": "ARG", - "atm1": " NE ", - "atm2": " CZ ", - "atm3": " NH2", - "atm4": "1HH2", - "x0": 0, - "K": 34.552, - "period": 2 - }, - { - "res": "ARG", - "atm1": " NE ", - "atm2": " CZ ", - "atm3": " NH2", - "atm4": "2HH2", - "x0": 3.141592654, - "K": 34.552, - "period": 2 - }, - { - "res": "ARG", - "atm1": " NH1", - "atm2": " CZ ", - "atm3": " NH2", - "atm4": "1HH2", - "x0": -3.141592654, - "K": 34.552, - "period": 2 - }, - { - "res": "ARG", - "atm1": " NH1", - "atm2": " CZ ", - "atm3": " NH2", - "atm4": "2HH2", - "x0": 0, - "K": 34.552, - "period": 2 - }, - { - "res": "ARG", - "atm1": " NH2", - "atm2": " CZ ", - "atm3": " NH1", - "atm4": "1HH1", - "x0": 3.141592654, - "K": 34.552, - "period": 2 - }, - { - "res": "ARG", - "atm1": " NH2", - "atm2": " CZ ", - "atm3": " NH1", - "atm4": "2HH1", - "x0": 0, - "K": 34.552, - "period": 2 - }, - { - "res": "ASN", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " ND2", - "atm4": "1HD2", - "x0": -3.141592654, - "K": 34.552, - "period": 2 - }, - { - "res": "ASN", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " ND2", - "atm4": "2HD2", - "x0": 0, - "K": 34.552, - "period": 2 - }, - { - "res": "ASN", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " OD1", - "atm4": " ND2", - "x0": 0, - "K": 43.352, - "period": 2 - }, - { - "res": "ASN", - "atm1": " OD1", - "atm2": " CG ", - "atm3": " ND2", - "atm4": "1HD2", - "x0": 0, - "K": 34.552, - "period": 2 - }, - { - "res": "ASN", - "atm1": " OD1", - "atm2": " CG ", - "atm3": " ND2", - "atm4": "2HD2", - "x0": -3.141592654, - "K": 34.552, - "period": 2 - }, - { - "res": "ASP", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " OD1", - "atm4": " OD2", - "x0": 0, - "K": 43.352, - "period": 2 - }, - { - "res": "CYS", - "atm1": "1HB ", - "atm2": " CB ", - "atm3": " SG ", - "atm4": " HG ", - "x0": 1.047197551, - "K": 1, - "period": 3 - }, - { - "res": "CYS", - "atm1": "2HB ", - "atm2": " CB ", - "atm3": " SG ", - "atm4": " HG ", - "x0": 1.047197551, - "K": 1, - "period": 3 - }, - { - "res": "CYS", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " SG ", - "atm4": " HG ", - "x0": 1.047197551, - "K": 1, - "period": 3 - }, - { - "res": "GLN", - "atm1": " CG ", - "atm2": " CD ", - "atm3": " NE2", - "atm4": "1HE2", - "x0": 3.141592654, - "K": 34.552, - "period": 2 - }, - { - "res": "GLN", - "atm1": " CG ", - "atm2": " CD ", - "atm3": " NE2", - "atm4": "2HE2", - "x0": 0, - "K": 34.552, - "period": 2 - }, - { - "res": "GLN", - "atm1": " CG ", - "atm2": " CD ", - "atm3": " OE1", - "atm4": " NE2", - "x0": 0, - "K": 43.352, - "period": 2 - }, - { - "res": "GLN", - "atm1": " OE1", - "atm2": " CD ", - "atm3": " NE2", - "atm4": "1HE2", - "x0": 0, - "K": 34.552, - "period": 2 - }, - { - "res": "GLN", - "atm1": " OE1", - "atm2": " CD ", - "atm3": " NE2", - "atm4": "2HE2", - "x0": -3.141592654, - "K": 34.552, - "period": 2 - }, - { - "res": "GLU", - "atm1": " CG ", - "atm2": " CD ", - "atm3": " OE1", - "atm4": " OE2", - "x0": 0, - "K": 43.352, - "period": 2 - }, - { - "res": "HIS", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD2", - "atm4": "2HD ", - "x0": 0, - "K": 34.552, - "period": 2 - }, - { - "res": "HIS", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD2", - "atm4": " NE2", - "x0": 3.141592654, - "K": 43.352, - "period": 2 - }, - { - "res": "HIS", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " ND1", - "atm4": " CE1", - "x0": 3.141592654, - "K": 43.352, - "period": 2 - }, - { - "res": "HIS", - "atm1": " CD2", - "atm2": " CG ", - "atm3": " ND1", - "atm4": " CE1", - "x0": 0, - "K": 43.352, - "period": 2 - }, - { - "res": "HIS", - "atm1": " CG ", - "atm2": " CD2", - "atm3": " NE2", - "atm4": " CE1", - "x0": 0, - "K": 43.352, - "period": 2 - }, - { - "res": "HIS", - "atm1": " CG ", - "atm2": " CD2", - "atm3": " NE2", - "atm4": "2HE ", - "x0": 3.141592654, - "K": 34.552, - "period": 2 - }, - { - "res": "HIS", - "atm1": " CG ", - "atm2": " ND1", - "atm3": " CE1", - "atm4": "1HE ", - "x0": -3.141592654, - "K": 34.552, - "period": 2 - }, - { - "res": "HIS", - "atm1": " CG ", - "atm2": " ND1", - "atm3": " CE1", - "atm4": " NE2", - "x0": 0, - "K": 43.352, - "period": 2 - }, - { - "res": "HIS", - "atm1": "2HD ", - "atm2": " CD2", - "atm3": " NE2", - "atm4": " CE1", - "x0": 3.141592654, - "K": 34.552, - "period": 2 - }, - { - "res": "HIS", - "atm1": "2HD ", - "atm2": " CD2", - "atm3": " NE2", - "atm4": "2HE ", - "x0": 0, - "K": 34.552, - "period": 2 - }, - { - "res": "HIS", - "atm1": "1HE ", - "atm2": " CE1", - "atm3": " NE2", - "atm4": " CD2", - "x0": 3.141592654, - "K": 34.552, - "period": 2 - }, - { - "res": "HIS", - "atm1": "1HE ", - "atm2": " CE1", - "atm3": " NE2", - "atm4": "2HE ", - "x0": 0, - "K": 34.552, - "period": 2 - }, - { - "res": "HIS", - "atm1": " ND1", - "atm2": " CE1", - "atm3": " NE2", - "atm4": " CD2", - "x0": 0, - "K": 43.352, - "period": 2 - }, - { - "res": "HIS", - "atm1": " ND1", - "atm2": " CE1", - "atm3": " NE2", - "atm4": "2HE ", - "x0": 3.141592654, - "K": 43.352, - "period": 2 - }, - { - "res": "HIS", - "atm1": " ND1", - "atm2": " CG ", - "atm3": " CD2", - "atm4": "2HD ", - "x0": -3.141592654, - "K": 34.552, - "period": 2 - }, - { - "res": "HIS", - "atm1": " ND1", - "atm2": " CG ", - "atm3": " CD2", - "atm4": " NE2", - "x0": 0, - "K": 43.352, - "period": 2 - }, - { - "res": "HIS_D", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD2", - "atm4": "2HD ", - "x0": 0, - "K": 34.552, - "period": 2 - }, - { - "res": "HIS_D", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD2", - "atm4": " NE2", - "x0": 3.141592654, - "K": 43.352, - "period": 2 - }, - { - "res": "HIS_D", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " ND1", - "atm4": " CE1", - "x0": 3.141592654, - "K": 43.352, - "period": 2 - }, - { - "res": "HIS_D", - "atm1": " CD2", - "atm2": " CG ", - "atm3": " ND1", - "atm4": " CE1", - "x0": 0, - "K": 43.352, - "period": 2 - }, - { - "res": "HIS_D", - "atm1": " CG ", - "atm2": " CD2", - "atm3": " NE2", - "atm4": " CE1", - "x0": 0, - "K": 43.352, - "period": 2 - }, - { - "res": "HIS_D", - "atm1": " CG ", - "atm2": " ND1", - "atm3": " CE1", - "atm4": "1HE ", - "x0": -3.141592654, - "K": 34.552, - "period": 2 - }, - { - "res": "HIS_D", - "atm1": " CG ", - "atm2": " ND1", - "atm3": " CE1", - "atm4": " NE2", - "x0": 0, - "K": 43.352, - "period": 2 - }, - { - "res": "HIS_D", - "atm1": "2HD ", - "atm2": " CD2", - "atm3": " NE2", - "atm4": " CE1", - "x0": 3.141592654, - "K": 43.352, - "period": 2 - }, - { - "res": "HIS_D", - "atm1": "1HE ", - "atm2": " CE1", - "atm3": " NE2", - "atm4": " CD2", - "x0": 3.141592654, - "K": 34.552, - "period": 2 - }, - { - "res": "HIS_D", - "atm1": " ND1", - "atm2": " CE1", - "atm3": " NE2", - "atm4": " CD2", - "x0": 0, - "K": 43.352, - "period": 2 - }, - { - "res": "HIS_D", - "atm1": " ND1", - "atm2": " CG ", - "atm3": " CD2", - "atm4": "2HD ", - "x0": -3.141592654, - "K": 34.552, - "period": 2 - }, - { - "res": "HIS_D", - "atm1": " ND1", - "atm2": " CG ", - "atm3": " CD2", - "atm4": " NE2", - "x0": 0, - "K": 43.352, - "period": 2 - }, - { - "res": "ILE", - "atm1": "1HG1", - "atm2": " CG1", - "atm3": " CD1", - "atm4": "1HD1", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "ILE", - "atm1": "1HG1", - "atm2": " CG1", - "atm3": " CD1", - "atm4": "2HD1", - "x0": 3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "ILE", - "atm1": "1HG1", - "atm2": " CG1", - "atm3": " CD1", - "atm4": "3HD1", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "ILE", - "atm1": "2HG1", - "atm2": " CG1", - "atm3": " CD1", - "atm4": "1HD1", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "ILE", - "atm1": "2HG1", - "atm2": " CG1", - "atm3": " CD1", - "atm4": "2HD1", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "ILE", - "atm1": "2HG1", - "atm2": " CG1", - "atm3": " CD1", - "atm4": "3HD1", - "x0": -3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "ILE", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "1HG2", - "x0": 3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "ILE", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "2HG2", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "ILE", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "3HG2", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "ILE", - "atm1": " CB ", - "atm2": " CG1", - "atm3": " CD1", - "atm4": "1HD1", - "x0": -3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "ILE", - "atm1": " CB ", - "atm2": " CG1", - "atm3": " CD1", - "atm4": "2HD1", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "ILE", - "atm1": " CB ", - "atm2": " CG1", - "atm3": " CD1", - "atm4": "3HD1", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "ILE", - "atm1": " CG1", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "1HG2", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "ILE", - "atm1": " CG1", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "2HG2", - "x0": 3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "ILE", - "atm1": " CG1", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "3HG2", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "ILE", - "atm1": " HB ", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "1HG2", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "ILE", - "atm1": " HB ", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "2HG2", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "ILE", - "atm1": " HB ", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "3HG2", - "x0": -3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "LEU", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD1", - "atm4": "1HD1", - "x0": -3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "LEU", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD1", - "atm4": "2HD1", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "LEU", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD1", - "atm4": "3HD1", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "LEU", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD2", - "atm4": "1HD2", - "x0": 3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "LEU", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD2", - "atm4": "2HD2", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "LEU", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD2", - "atm4": "3HD2", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "LEU", - "atm1": " CD1", - "atm2": " CG ", - "atm3": " CD2", - "atm4": "1HD2", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "LEU", - "atm1": " CD1", - "atm2": " CG ", - "atm3": " CD2", - "atm4": "2HD2", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "LEU", - "atm1": " CD1", - "atm2": " CG ", - "atm3": " CD2", - "atm4": "3HD2", - "x0": -3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "LEU", - "atm1": " CD2", - "atm2": " CG ", - "atm3": " CD1", - "atm4": "1HD1", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "LEU", - "atm1": " CD2", - "atm2": " CG ", - "atm3": " CD1", - "atm4": "2HD1", - "x0": 3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "LEU", - "atm1": " CD2", - "atm2": " CG ", - "atm3": " CD1", - "atm4": "3HD1", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "LEU", - "atm1": " HG ", - "atm2": " CG ", - "atm3": " CD1", - "atm4": "1HD1", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "LEU", - "atm1": " HG ", - "atm2": " CG ", - "atm3": " CD1", - "atm4": "2HD1", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "LEU", - "atm1": " HG ", - "atm2": " CG ", - "atm3": " CD1", - "atm4": "3HD1", - "x0": 3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "LEU", - "atm1": " HG ", - "atm2": " CG ", - "atm3": " CD2", - "atm4": "1HD2", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "LEU", - "atm1": " HG ", - "atm2": " CG ", - "atm3": " CD2", - "atm4": "2HD2", - "x0": -3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "LEU", - "atm1": " HG ", - "atm2": " CG ", - "atm3": " CD2", - "atm4": "3HD2", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "LYS", - "atm1": "1HE ", - "atm2": " CE ", - "atm3": " NZ ", - "atm4": "1HZ ", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "LYS", - "atm1": "1HE ", - "atm2": " CE ", - "atm3": " NZ ", - "atm4": "2HZ ", - "x0": 3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "LYS", - "atm1": "1HE ", - "atm2": " CE ", - "atm3": " NZ ", - "atm4": "3HZ ", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "LYS", - "atm1": "2HE ", - "atm2": " CE ", - "atm3": " NZ ", - "atm4": "1HZ ", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "LYS", - "atm1": "2HE ", - "atm2": " CE ", - "atm3": " NZ ", - "atm4": "2HZ ", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "LYS", - "atm1": "2HE ", - "atm2": " CE ", - "atm3": " NZ ", - "atm4": "3HZ ", - "x0": -3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "LYS", - "atm1": " CD ", - "atm2": " CE ", - "atm3": " NZ ", - "atm4": "1HZ ", - "x0": -3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "LYS", - "atm1": " CD ", - "atm2": " CE ", - "atm3": " NZ ", - "atm4": "2HZ ", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "LYS", - "atm1": " CD ", - "atm2": " CE ", - "atm3": " NZ ", - "atm4": "3HZ ", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "MET", - "atm1": " CG ", - "atm2": " SD ", - "atm3": " CE ", - "atm4": "1HE ", - "x0": 3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "MET", - "atm1": " CG ", - "atm2": " SD ", - "atm3": " CE ", - "atm4": "2HE ", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "MET", - "atm1": " CG ", - "atm2": " SD ", - "atm3": " CE ", - "atm4": "3HE ", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "PHE", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD1", - "atm4": " CE1", - "x0": -3.141592654, - "K": 43.352, - "period": 1 - }, - { - "res": "PHE", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD1", - "atm4": "1HD ", - "x0": 0, - "K": 29.8460176, - "period": 1 - }, - { - "res": "PHE", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD2", - "atm4": " CE2", - "x0": -3.141592654, - "K": 43.352, - "period": 1 - }, - { - "res": "PHE", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD2", - "atm4": "2HD ", - "x0": 0, - "K": 29.8460176, - "period": 1 - }, - { - "res": "PHE", - "atm1": " CD1", - "atm2": " CE1", - "atm3": " CZ ", - "atm4": " CE2", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "PHE", - "atm1": " CD1", - "atm2": " CE1", - "atm3": " CZ ", - "atm4": " HZ ", - "x0": 3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "PHE", - "atm1": " CD1", - "atm2": " CG ", - "atm3": " CD2", - "atm4": " CE2", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "PHE", - "atm1": " CD1", - "atm2": " CG ", - "atm3": " CD2", - "atm4": "2HD ", - "x0": -3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "PHE", - "atm1": " CD2", - "atm2": " CE2", - "atm3": " CZ ", - "atm4": " CE1", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "PHE", - "atm1": " CD2", - "atm2": " CE2", - "atm3": " CZ ", - "atm4": " HZ ", - "x0": -3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "PHE", - "atm1": " CD2", - "atm2": " CG ", - "atm3": " CD1", - "atm4": " CE1", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "PHE", - "atm1": " CD2", - "atm2": " CG ", - "atm3": " CD1", - "atm4": "1HD ", - "x0": -3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "PHE", - "atm1": " CG ", - "atm2": " CD1", - "atm3": " CE1", - "atm4": " CZ ", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "PHE", - "atm1": " CG ", - "atm2": " CD1", - "atm3": " CE1", - "atm4": "1HE ", - "x0": 3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "PHE", - "atm1": " CG ", - "atm2": " CD2", - "atm3": " CE2", - "atm4": " CZ ", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "PHE", - "atm1": " CG ", - "atm2": " CD2", - "atm3": " CE2", - "atm4": "2HE ", - "x0": 3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "PHE", - "atm1": "1HD ", - "atm2": " CD1", - "atm3": " CE1", - "atm4": " CZ ", - "x0": 3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "PHE", - "atm1": "1HD ", - "atm2": " CD1", - "atm3": " CE1", - "atm4": "1HE ", - "x0": 0, - "K": 34.552, - "period": 1 - }, - { - "res": "PHE", - "atm1": "2HD ", - "atm2": " CD2", - "atm3": " CE2", - "atm4": " CZ ", - "x0": 3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "PHE", - "atm1": "2HD ", - "atm2": " CD2", - "atm3": " CE2", - "atm4": "2HE ", - "x0": 0, - "K": 34.552, - "period": 1 - }, - { - "res": "PHE", - "atm1": "1HE ", - "atm2": " CE1", - "atm3": " CZ ", - "atm4": " CE2", - "x0": -3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "PHE", - "atm1": "1HE ", - "atm2": " CE1", - "atm3": " CZ ", - "atm4": " HZ ", - "x0": 0, - "K": 34.552, - "period": 1 - }, - { - "res": "PHE", - "atm1": "2HE ", - "atm2": " CE2", - "atm3": " CZ ", - "atm4": " CE1", - "x0": 3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "PHE", - "atm1": "2HE ", - "atm2": " CE2", - "atm3": " CZ ", - "atm4": " HZ ", - "x0": 0, - "K": 34.552, - "period": 1 - }, - { - "res": "SER", - "atm1": "1HB ", - "atm2": " CB ", - "atm3": " OG ", - "atm4": " HG ", - "x0": 1.047197551, - "K": 1.2086, - "period": 3 - }, - { - "res": "SER", - "atm1": "2HB ", - "atm2": " CB ", - "atm3": " OG ", - "atm4": " HG ", - "x0": 1.047197551, - "K": 1.2086, - "period": 3 - }, - { - "res": "SER", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " OG ", - "atm4": " HG ", - "x0": 1.047197551, - "K": 1.2086, - "period": 3 - }, - { - "res": "THR", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "1HG2", - "x0": -3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "THR", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "2HG2", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "THR", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "3HG2", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "THR", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " OG1", - "atm4": " HG1", - "x0": 1.047197551, - "K": 1.2086, - "period": 3 - }, - { - "res": "THR", - "atm1": " CG2", - "atm2": " CB ", - "atm3": " OG1", - "atm4": " HG1", - "x0": 1.047197551, - "K": 1.2086, - "period": 3 - }, - { - "res": "THR", - "atm1": " HB ", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "1HG2", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "THR", - "atm1": " HB ", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "2HG2", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "THR", - "atm1": " HB ", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "3HG2", - "x0": -3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "THR", - "atm1": " HB ", - "atm2": " CB ", - "atm3": " OG1", - "atm4": " HG1", - "x0": 1.047197551, - "K": 1.2086, - "period": 3 - }, - { - "res": "THR", - "atm1": " OG1", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "1HG2", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "THR", - "atm1": " OG1", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "2HG2", - "x0": 3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "THR", - "atm1": " OG1", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "3HG2", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "TRP", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD1", - "atm4": "1HD ", - "x0": 0, - "K": 34.552, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD1", - "atm4": " NE1", - "x0": -3.141592654, - "K": 43.352, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD2", - "atm4": " CE2", - "x0": -3.141592654, - "K": 43.352, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD2", - "atm4": " CE3", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CD1", - "atm2": " CG ", - "atm3": " CD2", - "atm4": " CE2", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CD1", - "atm2": " CG ", - "atm3": " CD2", - "atm4": " CE3", - "x0": 3.141592654, - "K": 43.352, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CD1", - "atm2": " NE1", - "atm3": " CE2", - "atm4": " CD2", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CD1", - "atm2": " NE1", - "atm3": " CE2", - "atm4": " CZ2", - "x0": 3.141592654, - "K": 43.352, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CD2", - "atm2": " CE2", - "atm3": " CZ2", - "atm4": " CH2", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CD2", - "atm2": " CE2", - "atm3": " CZ2", - "atm4": " HZ2", - "x0": 3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CD2", - "atm2": " CE3", - "atm3": " CZ3", - "atm4": " CH2", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CD2", - "atm2": " CE3", - "atm3": " CZ3", - "atm4": " HZ3", - "x0": -3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CD2", - "atm2": " CG ", - "atm3": " CD1", - "atm4": "1HD ", - "x0": -3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CD2", - "atm2": " CG ", - "atm3": " CD1", - "atm4": " NE1", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CE2", - "atm2": " CD2", - "atm3": " CE3", - "atm4": " CZ3", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CE2", - "atm2": " CD2", - "atm3": " CE3", - "atm4": " HE3", - "x0": 3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CE2", - "atm2": " CZ2", - "atm3": " CH2", - "atm4": " CZ3", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CE2", - "atm2": " CZ2", - "atm3": " CH2", - "atm4": " HH2", - "x0": -3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CE3", - "atm2": " CD2", - "atm3": " CE2", - "atm4": " CZ2", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CE3", - "atm2": " CD2", - "atm3": " CE2", - "atm4": " NE1", - "x0": 3.141592654, - "K": 43.352, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CE3", - "atm2": " CZ3", - "atm3": " CH2", - "atm4": " CZ2", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CE3", - "atm2": " CZ3", - "atm3": " CH2", - "atm4": " HH2", - "x0": 3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CG ", - "atm2": " CD1", - "atm3": " NE1", - "atm4": " CE2", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CG ", - "atm2": " CD1", - "atm3": " NE1", - "atm4": "1HE ", - "x0": 3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CG ", - "atm2": " CD2", - "atm3": " CE2", - "atm4": " CZ2", - "x0": -3.141592654, - "K": 43.352, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CG ", - "atm2": " CD2", - "atm3": " CE2", - "atm4": " NE1", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CG ", - "atm2": " CD2", - "atm3": " CE3", - "atm4": " CZ3", - "x0": -3.141592654, - "K": 43.352, - "period": 1 - }, - { - "res": "TRP", - "atm1": " CG ", - "atm2": " CD2", - "atm3": " CE3", - "atm4": " HE3", - "x0": 0, - "K": 34.552, - "period": 1 - }, - { - "res": "TRP", - "atm1": "1HD ", - "atm2": " CD1", - "atm3": " NE1", - "atm4": " CE2", - "x0": 3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "TRP", - "atm1": "1HD ", - "atm2": " CD1", - "atm3": " NE1", - "atm4": "1HE ", - "x0": 0, - "K": 34.552, - "period": 1 - }, - { - "res": "TRP", - "atm1": "1HE ", - "atm2": " NE1", - "atm3": " CE2", - "atm4": " CD2", - "x0": -3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "TRP", - "atm1": "1HE ", - "atm2": " NE1", - "atm3": " CE2", - "atm4": " CZ2", - "x0": 0, - "K": 34.552, - "period": 1 - }, - { - "res": "TRP", - "atm1": " HE3", - "atm2": " CE3", - "atm3": " CZ3", - "atm4": " CH2", - "x0": -3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "TRP", - "atm1": " HE3", - "atm2": " CE3", - "atm3": " CZ3", - "atm4": " HZ3", - "x0": 0, - "K": 34.552, - "period": 1 - }, - { - "res": "TRP", - "atm1": " HZ2", - "atm2": " CZ2", - "atm3": " CH2", - "atm4": " CZ3", - "x0": -3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "TRP", - "atm1": " HZ2", - "atm2": " CZ2", - "atm3": " CH2", - "atm4": " HH2", - "x0": 0, - "K": 34.552, - "period": 1 - }, - { - "res": "TRP", - "atm1": " HZ3", - "atm2": " CZ3", - "atm3": " CH2", - "atm4": " CZ2", - "x0": 3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "TRP", - "atm1": " HZ3", - "atm2": " CZ3", - "atm3": " CH2", - "atm4": " HH2", - "x0": 0, - "K": 34.552, - "period": 1 - }, - { - "res": "TRP", - "atm1": " NE1", - "atm2": " CE2", - "atm3": " CZ2", - "atm4": " CH2", - "x0": -3.141592654, - "K": 43.352, - "period": 1 - }, - { - "res": "TRP", - "atm1": " NE1", - "atm2": " CE2", - "atm3": " CZ2", - "atm4": " HZ2", - "x0": 0, - "K": 34.552, - "period": 1 - }, - { - "res": "TYR", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD1", - "atm4": " CE1", - "x0": -3.141592654, - "K": 43.352, - "period": 1 - }, - { - "res": "TYR", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD1", - "atm4": "1HD ", - "x0": 0, - "K": 29.8460176, - "period": 1 - }, - { - "res": "TYR", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD2", - "atm4": " CE2", - "x0": -3.141592654, - "K": 43.352, - "period": 1 - }, - { - "res": "TYR", - "atm1": " CB ", - "atm2": " CG ", - "atm3": " CD2", - "atm4": "2HD ", - "x0": 0, - "K": 29.8460176, - "period": 1 - }, - { - "res": "TYR", - "atm1": " CD1", - "atm2": " CE1", - "atm3": " CZ ", - "atm4": " CE2", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "TYR", - "atm1": " CD1", - "atm2": " CE1", - "atm3": " CZ ", - "atm4": " OH ", - "x0": -3.141592654, - "K": 43.352, - "period": 2 - }, - { - "res": "TYR", - "atm1": " CD1", - "atm2": " CG ", - "atm3": " CD2", - "atm4": " CE2", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "TYR", - "atm1": " CD1", - "atm2": " CG ", - "atm3": " CD2", - "atm4": "2HD ", - "x0": 3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "TYR", - "atm1": " CD2", - "atm2": " CE2", - "atm3": " CZ ", - "atm4": " CE1", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "TYR", - "atm1": " CD2", - "atm2": " CE2", - "atm3": " CZ ", - "atm4": " OH ", - "x0": 3.141592654, - "K": 43.352, - "period": 1 - }, - { - "res": "TYR", - "atm1": " CD2", - "atm2": " CG ", - "atm3": " CD1", - "atm4": " CE1", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "TYR", - "atm1": " CD2", - "atm2": " CG ", - "atm3": " CD1", - "atm4": "1HD ", - "x0": -3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "TYR", - "atm1": " CE1", - "atm2": " CZ ", - "atm3": " OH ", - "atm4": " HH ", - "x0": 0, - "K": 34.552, - "period": 2 - }, - { - "res": "TYR", - "atm1": " CE2", - "atm2": " CZ ", - "atm3": " OH ", - "atm4": " HH ", - "x0": 0, - "K": 34.552, - "period": 2 - }, - { - "res": "TYR", - "atm1": " CG ", - "atm2": " CD1", - "atm3": " CE1", - "atm4": " CZ ", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "TYR", - "atm1": " CG ", - "atm2": " CD1", - "atm3": " CE1", - "atm4": "1HE ", - "x0": 3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "TYR", - "atm1": " CG ", - "atm2": " CD2", - "atm3": " CE2", - "atm4": " CZ ", - "x0": 0, - "K": 43.352, - "period": 1 - }, - { - "res": "TYR", - "atm1": " CG ", - "atm2": " CD2", - "atm3": " CE2", - "atm4": "2HE ", - "x0": -3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "TYR", - "atm1": "1HD ", - "atm2": " CD1", - "atm3": " CE1", - "atm4": " CZ ", - "x0": 3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "TYR", - "atm1": "1HD ", - "atm2": " CD1", - "atm3": " CE1", - "atm4": "1HE ", - "x0": 0, - "K": 34.552, - "period": 1 - }, - { - "res": "TYR", - "atm1": "2HD ", - "atm2": " CD2", - "atm3": " CE2", - "atm4": " CZ ", - "x0": 3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "TYR", - "atm1": "2HD ", - "atm2": " CD2", - "atm3": " CE2", - "atm4": "2HE ", - "x0": 0, - "K": 34.552, - "period": 1 - }, - { - "res": "TYR", - "atm1": "1HE ", - "atm2": " CE1", - "atm3": " CZ ", - "atm4": " CE2", - "x0": 3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "TYR", - "atm1": "1HE ", - "atm2": " CE1", - "atm3": " CZ ", - "atm4": " OH ", - "x0": 0, - "K": 34.552, - "period": 2 - }, - { - "res": "TYR", - "atm1": "2HE ", - "atm2": " CE2", - "atm3": " CZ ", - "atm4": " CE1", - "x0": 3.141592654, - "K": 34.552, - "period": 1 - }, - { - "res": "TYR", - "atm1": "2HE ", - "atm2": " CE2", - "atm3": " CZ ", - "atm4": " OH ", - "x0": 0, - "K": 34.552, - "period": 2 - }, - { - "res": "VAL", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG1", - "atm4": "1HG1", - "x0": -3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "VAL", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG1", - "atm4": "2HG1", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "VAL", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG1", - "atm4": "3HG1", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "VAL", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "1HG2", - "x0": 3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "VAL", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "2HG2", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "VAL", - "atm1": " CA ", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "3HG2", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "VAL", - "atm1": " CG1", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "1HG2", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "VAL", - "atm1": " CG1", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "2HG2", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "VAL", - "atm1": " CG1", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "3HG2", - "x0": -3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "VAL", - "atm1": " CG2", - "atm2": " CB ", - "atm3": " CG1", - "atm4": "1HG1", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "VAL", - "atm1": " CG2", - "atm2": " CB ", - "atm3": " CG1", - "atm4": "2HG1", - "x0": 3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "VAL", - "atm1": " CG2", - "atm2": " CB ", - "atm3": " CG1", - "atm4": "3HG1", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "VAL", - "atm1": " HB ", - "atm2": " CB ", - "atm3": " CG1", - "atm4": "1HG1", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "VAL", - "atm1": " HB ", - "atm2": " CB ", - "atm3": " CG1", - "atm4": "2HG1", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "VAL", - "atm1": " HB ", - "atm2": " CB ", - "atm3": " CG1", - "atm4": "3HG1", - "x0": -3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "VAL", - "atm1": " HB ", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "1HG2", - "x0": 1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "VAL", - "atm1": " HB ", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "2HG2", - "x0": -3.141592654, - "K": 24.336, - "period": 3 - }, - { - "res": "VAL", - "atm1": " HB ", - "atm2": " CB ", - "atm3": " CG2", - "atm4": "3HG2", - "x0": -1.047197551, - "K": 24.336, - "period": 3 - }, - { - "res": "ALA", - "atm1": " C ", - "atm2": " N ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 4.1445, - "K": 45.8703, - "period": 1 - }, - { - "res": "ALA", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 2.1402, - "K": 47.5692, - "period": 1 - }, - { - "res": "ALA", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " HA ", - "x0": -2.06684, - "K": 39.904, - "period": 1 - }, - { - "res": "ARG", - "atm1": " C ", - "atm2": " N ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 4.1462, - "K": 27.7487, - "period": 1 - }, - { - "res": "ARG", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 2.1411, - "K": 32.8454, - "period": 1 - }, - { - "res": "ARG", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " HA ", - "x0": -2.08116, - "K": 39.904, - "period": 1 - }, - { - "res": "ASN", - "atm1": " C ", - "atm2": " N ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 4.1343, - "K": 23.2183, - "period": 1 - }, - { - "res": "ASN", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 2.1497, - "K": 24.3509, - "period": 1 - }, - { - "res": "ASN", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " HA ", - "x0": -2.06748, - "K": 39.904, - "period": 1 - }, - { - "res": "ASP", - "atm1": " C ", - "atm2": " N ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 4.1413, - "K": 23.2183, - "period": 1 - }, - { - "res": "ASP", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 2.1454, - "K": 23.7846, - "period": 1 - }, - { - "res": "ASP", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " HA ", - "x0": -2.07852, - "K": 39.904, - "period": 1 - }, - { - "res": "CYS", - "atm1": " C ", - "atm2": " N ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 4.1558, - "K": 24.9172, - "period": 1 - }, - { - "res": "CYS", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 2.1332, - "K": 29.4476, - "period": 1 - }, - { - "res": "CYS", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " HA ", - "x0": -2.0829, - "K": 39.904, - "period": 1 - }, - { - "res": "GLN", - "atm1": " C ", - "atm2": " N ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 4.1488, - "K": 27.7487, - "period": 1 - }, - { - "res": "GLN", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 2.1398, - "K": 31.7128, - "period": 1 - }, - { - "res": "GLN", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " HA ", - "x0": -2.07464, - "K": 39.904, - "period": 1 - }, - { - "res": "GLU", - "atm1": " C ", - "atm2": " N ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 4.1466, - "K": 28.8813, - "period": 1 - }, - { - "res": "GLU", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 2.1398, - "K": 31.7128, - "period": 1 - }, - { - "res": "GLU", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " HA ", - "x0": -2.07654, - "K": 39.904, - "period": 1 - }, - { - "res": "HIS", - "atm1": " C ", - "atm2": " N ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 4.1507, - "K": 23.7846, - "period": 1 - }, - { - "res": "HIS", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 2.1339, - "K": 25.4835, - "period": 1 - }, - { - "res": "HIS", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " HA ", - "x0": -2.09335, - "K": 39.904, - "period": 1 - }, - { - "res": "HIS_D", - "atm1": " C ", - "atm2": " N ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 4.1507, - "K": 23.7846, - "period": 1 - }, - { - "res": "HIS_D", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 2.1339, - "K": 25.4835, - "period": 1 - }, - { - "res": "HIS_D", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " HA ", - "x0": -2.09335, - "K": 39.904, - "period": 1 - }, - { - "res": "ILE", - "atm1": " C ", - "atm2": " N ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 4.1456, - "K": 23.7846, - "period": 1 - }, - { - "res": "ILE", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 2.1473, - "K": 27.7487, - "period": 1 - }, - { - "res": "ILE", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " HA ", - "x0": -2.0785, - "K": 39.904, - "period": 1 - }, - { - "res": "LEU", - "atm1": " C ", - "atm2": " N ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 4.1565, - "K": 28.8813, - "period": 1 - }, - { - "res": "LEU", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 2.1322, - "K": 32.2791, - "period": 1 - }, - { - "res": "LEU", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " HA ", - "x0": -2.07936, - "K": 39.904, - "period": 1 - }, - { - "res": "LYS", - "atm1": " C ", - "atm2": " N ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 4.1479, - "K": 28.315, - "period": 1 - }, - { - "res": "LYS", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 2.14, - "K": 32.2791, - "period": 1 - }, - { - "res": "LYS", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " HA ", - "x0": -2.07474, - "K": 39.904, - "period": 1 - }, - { - "res": "MET", - "atm1": " C ", - "atm2": " N ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 4.1497, - "K": 22.0857, - "period": 1 - }, - { - "res": "MET", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 2.1398, - "K": 26.0498, - "period": 1 - }, - { - "res": "MET", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " HA ", - "x0": -2.07171, - "K": 39.904, - "period": 1 - }, - { - "res": "PHE", - "atm1": " C ", - "atm2": " N ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 4.1502, - "K": 22.652, - "period": 1 - }, - { - "res": "PHE", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 2.1378, - "K": 26.6161, - "period": 1 - }, - { - "res": "PHE", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " HA ", - "x0": -2.07838, - "K": 39.904, - "period": 1 - }, - { - "res": "PRO", - "atm1": " C ", - "atm2": " N ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 4.2002, - "K": 41.3399, - "period": 1 - }, - { - "res": "PRO", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 2.0058, - "K": 48.1355, - "period": 1 - }, - { - "res": "SER", - "atm1": " C ", - "atm2": " N ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 4.153, - "K": 23.7846, - "period": 1 - }, - { - "res": "SER", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 2.1381, - "K": 25.4835, - "period": 1 - }, - { - "res": "SER", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " HA ", - "x0": -2.07875, - "K": 39.904, - "period": 1 - }, - { - "res": "THR", - "atm1": " C ", - "atm2": " N ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 4.1555, - "K": 26.0498, - "period": 1 - }, - { - "res": "THR", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 2.1439, - "K": 30.0139, - "period": 1 - }, - { - "res": "THR", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " HA ", - "x0": -2.0753, - "K": 39.904, - "period": 1 - }, - { - "res": "TRP", - "atm1": " C ", - "atm2": " N ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 4.153, - "K": 23.7846, - "period": 1 - }, - { - "res": "TRP", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 2.1358, - "K": 26.0498, - "period": 1 - }, - { - "res": "TRP", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " HA ", - "x0": -2.0801, - "K": 39.904, - "period": 1 - }, - { - "res": "TYR", - "atm1": " C ", - "atm2": " N ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 4.1522, - "K": 24.9172, - "period": 1 - }, - { - "res": "TYR", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 2.1358, - "K": 28.8813, - "period": 1 - }, - { - "res": "TYR", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " HA ", - "x0": -2.07132, - "K": 39.904, - "period": 1 - }, - { - "res": "VAL", - "atm1": " C ", - "atm2": " N ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 4.1464, - "K": 26.6161, - "period": 1 - }, - { - "res": "VAL", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " CB ", - "x0": 2.1466, - "K": 29.4476, - "period": 1 - }, - { - "res": "VAL", - "atm1": " N ", - "atm2": " C ", - "atm3": " CA ", - "atm4": " HA ", - "x0": -2.08278, - "K": 39.904, - "period": 1 - } - ] -} \ No newline at end of file diff --git a/rf2aa/inference/inference.py b/rf2aa/inference/inference.py deleted file mode 100644 index 476195f..0000000 --- a/rf2aa/inference/inference.py +++ /dev/null @@ -1,534 +0,0 @@ -import os -import argparse -import json -import logging -import pickle -import tempfile -from collections.abc import Mapping -from os import PathLike -from pathlib import Path - -import hydra -import numpy as np -import torch -import yaml -from biotite.structure import AtomArray, AtomArrayStack, stack -from cifutils import parse -from cifutils.tools.inference import ( - build_msa_paths_by_chain_id_from_component_list, - components_to_atom_array, -) -from cifutils.utils.io_utils import to_cif_file -from datahub.encoding_definitions import AF3SequenceEncoding -import omegaconf -from omegaconf import OmegaConf - -from rf2aa.metrics.predicted_error import WriteAF3Confidence -from rf2aa.trainer_base import trainer_factory - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -# Define the sequence encoding; needed to decode the restypes when saving to CIF -encoding = AF3SequenceEncoding() - - -def build_stack_from_atom_array_and_batched_coords( - coords: np.ndarray, - atom_array: AtomArray, - annotations_to_keep: list[str] = [ - "chain_id", - "transformation_id", - "res_id", - "res_name", - "element", - "atom_name", - ], -) -> AtomArrayStack: - """Builds an AtomArrayStack from an AtomArray and a set of coordinates with a batch dimension. - - Additionally, handles the case where the AtomArray contains multiple transformations and we must adjust the chain_id. - - Args: - coords (np.array): The coordinates to be assigned to the AtomArrayStack. Must have shape (nbatch, n_atoms, 3). - atom_array (AtomArray): The AtomArray to be stacked. Must have shape (n_atoms,) - """ - # (Diffusion batch size will become the number of models) - n_batch = coords.shape[0] - - # Remove unwanted annotations - for annotation in atom_array.get_annotation_categories(): - if annotation not in annotations_to_keep: - atom_array.del_annotation(annotation) - - # Build the stack and assign the coordinates - atom_array_stack = stack([atom_array for _ in range(n_batch)]) - atom_array_stack.coord = coords - - # Adjust chain_id if there are multiple transformations - # (Otherwise, we will have ambiguous bond annotations, since only `chain_id` is used for the bond annotations) - if ( - "transformation_id" in atom_array.get_annotation_categories() - and len(np.unique(atom_array_stack.transformation_id)) > 1 - ): - atom_array_stack.chain_id = ( - atom_array_stack.chain_id + atom_array_stack.transformation_id - ) - - return atom_array_stack - - -def _spoof_cif_from_dictionary(item: dict, temp_dir: PathLike) -> Path: - """Unpacks a dictionary to create a CIF file from its components. - - Args: - item (dict): A dictionary containing 'name' and 'components', optionally 'bonds'. - temp_dir (Path): Path to the temporary directory for storing CIF files. - - Returns: - Path: The path to the created CIF file, saved in the temporary directory. - - Raises: - NotImplementedError: If 'bonds' is present in the dictionary. - ValueError: If 'name' or 'components' are missing from the dictionary. - """ - # Validate the dictionary structure ("name" and "components" are required, "bonds" is optional) - assert ( - "name" in item and "components" in item - ), "The input dictionary must contain 'name' and 'components' keys." - - # Build components - atom_array, component_list = components_to_atom_array( - item["components"], return_components=True, bonds=item.get("bonds", None) - ) - msa_paths_by_chain_id = build_msa_paths_by_chain_id_from_component_list( - component_list - ) - - # Create a temporary CIF file from the JSON data - cif_path = Path(temp_dir) / f"{item['name']}.cif" - save_path = to_cif_file( - atom_array, - cif_path, - extra_categories={"msa_paths_by_chain_id": msa_paths_by_chain_id} - if msa_paths_by_chain_id - else None, - file_type="cif", # Not zipped for efficiency (as it's a temporary directory anyways) - ) - - return Path(save_path) - - -def _build_file_paths_for_prediction(inputs: list, temp_dir: PathLike) -> list[Path]: - """Prepare files for prediction based on the input paths. - - Input paths may be dictionary-like format (e.g., JSON, YAML, Pickle), CIF/PDB files, or directories containing these files. - Processes directories to find supported file types and converts dictionary-like formats to CIF files. - - Args: - inputs (list): List of input paths (JSON, YAML, Pickle, or CIF/PDB). - temp_dir (Path): Path to the temporary directory for storing CIF files. - - Returns: - list[Path]: List of file paths for prediction. - """ - DICTIONARY_LIKE_EXTENSIONS = {".json", ".yaml", ".yml", ".pkl"} - CIF_LIKE_EXTENSIONS = {".cif", ".pdb", ".bcif", ".cif.gz", ".pdb.gz", ".bcif.gz"} - - # Collect all files from inputs, handling directories and individual files - paths_to_raw_input_files = [] - for input_path in inputs: - if Path(input_path).is_dir(): - paths_to_raw_input_files.extend( - _find_files( - input_path, DICTIONARY_LIKE_EXTENSIONS | CIF_LIKE_EXTENSIONS - ) - ) - else: - paths_to_raw_input_files.append(Path(input_path)) - - paths_to_cif_like_files = [] - for path in paths_to_raw_input_files: - #concatenated_suffix = "".join(path.suffixes) - concatenated_suffix = path.suffixes[-1] - if concatenated_suffix in DICTIONARY_LIKE_EXTENSIONS: - # Spoof CIF files from dictionary-like formats - with open(path, "rb" if path.suffix == ".pkl" else "r") as file: - # Load data based on file extension - if path.suffix == ".json": - data = json.load(file) - elif path.suffix in {".yaml", ".yml"}: - raise NotImplementedError("YAML files are not yet supported.") - elif path.suffix == ".pkl": - data = pickle.load(file) - - if isinstance(data, dict): - data = [ - data - ] # Convert single dictionary to list for uniform processing - - for item in data: - paths_to_cif_like_files.append( - _spoof_cif_from_dictionary(item, temp_dir) - ) - elif concatenated_suffix in CIF_LIKE_EXTENSIONS: - # Directly use CIF-like files - paths_to_cif_like_files.append(path) - else: - raise ValueError( - f"Unsupported file extension: {path.suffix} (path: {path}; paths: {paths_to_raw_input_files})." - ) - - return paths_to_cif_like_files - - -def _find_files(path: PathLike, supported_file_types: list) -> list[Path]: - """Recursively find all files with the given extensions in the specified path. - - Args: - path (PathLike): Path to the directory containing the files. - supported_file_types (list): List of supported file extensions. - - Returns: - list[Path]: List of files with the given extensions. - """ - files_with_supported_types = [] - path = Path(path) - - # Check if the path is a directory - if path.is_dir(): - # Search for files with each supported extension - for file_type in supported_file_types: - files_with_supported_types.extend(path.glob(f"*{file_type}")) - elif path.is_file() and path.suffix in supported_file_types: - # If it's a file and has a supported extension, add to the list - files_with_supported_types.append(path) - - return files_with_supported_types - - -def _update_nested_dictconfig(d: Mapping, u: Mapping, depth: int = 0) -> Mapping: - """Recursive function to overwrite contents of one nested omegaconf dictconfig with another. - - Args: - d: dictionary of dictconfigs whose contents will be overwritten - u: dictionary of items which will overwrite or add to values in d - depth: depth of recursion: a positive integer: - -used to keep the outermost layer of the config as a dict instead of DictConfig. - -set to 1 or higher to return only DictConfig. - Returns: - d updated to contain values in u - """ - d = dict(d) - u = dict(u) - for k, v in u.items(): - if isinstance(v, Mapping): - d[k] = _update_nested_dictconfig(d.get(k, {}), v, depth=depth + 1) - else: - d[k] = v - if depth == 0: - return d - else: - return omegaconf.dictconfig.DictConfig(d) - - -class EvaluateAF3: - """Class for inference with AF3. Evaluates a trained AF3 model on a set of spoofed CIFs.""" - - def __init__( - self, - checkpoint_path: PathLike, - cif_out_dir: PathLike, - n_recycles: int, - diffusion_batch_size: int, - config_override_path: PathLike | None = None, - residue_renaming_dict: dict | None = None, - temp_dir: PathLike | None = None, - num_steps: int = 200, - solver: str = "af3", - overwrite: bool = False - ): - """Initialize the evaluator. - - Args: - checkpoint_path (PathLike): Path to the checkpoint file, e.g., /path/to/checkpoint.pt. - cif_out_dir (PathLike): Directory to save the output (predicted) CIF files. - config_override_path (PathLike): Path to a yaml file with config options to override those in the checkpoint file. - world_size (int): Number of GPUs to use for evaluation. - n_recycles (int): Number of recycles for AF3. The default is 10. - diffusion_batch_size (int): Diffusion batch size for AF3. Each predicted structure will be saved as a separate model within the same CIF file. - residue_renaming_dict (dict): Dictionary of residue names to rename to avoid CCD clashes, e.g., {'ALA': 'L:1'}. - temp_dir (PathLike): Temporary directory to store intermediate files. The default is None. - num_steps (int): Number of steps for sampling of the diffusion model. The default is 200; we see reasonable results with 50 steps. - solver (str): Solver to use for inference. Options are 'af3', 'simple', 'euler', and 'heun'. The default is 'af3'. - """ - - # Load the checkpoint - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - checkpoint = torch.load(checkpoint_path, map_location=torch.device(device)) - - # Load the config - self.config = OmegaConf.create(checkpoint["training_config"]) - - if config_override_path is not None: - with open(config_override_path, 'r') as fs: - config_override_dict = yaml.load(fs, yaml.FullLoader) - self.config = _update_nested_dictconfig(self.config, config_override_dict) - self.config = OmegaConf.create(self.config) - - # Make sure we aren't using the version with a bug in plddt - if ( - self.config.experiment.name - == "rf2aa-af3-repro-rollout_nmw_from_scratch_af3_style_no_cb_normal_crop_cont_3" - ): - raise ValueError( - "These weights are outdated and the plddt metric may be inaccurate. Please update to the latest available weights." - ) - - # Sampler sets diffusion batch size based on the following, not strictly on batch size in vaildation transform - self.config.dataset_params["diffusion_batch_size_valid"] = diffusion_batch_size - self.config.af3_inference["num_steps"] = num_steps - self.config.af3_inference["solver"] = solver - - # Load the AF-3 trainer - self.trainer = trainer_factory[self.config.experiment.trainer]( - config=self.config - ) - self.trainer.checkpoint = checkpoint - - # Set the output directory for the CIF files (e.g., predicted structures) - self.cif_out_dir = Path(cif_out_dir) if cif_out_dir else Path("./") - - # Model parameters - self.n_recycles = n_recycles - self.diffusion_batch_size = diffusion_batch_size - if "confidence_loss" in self.config.loss: - self.confidence_writer = WriteAF3Confidence( - **self.config.loss.confidence_loss - ) - else: - self.confidence_writer = None - - # Rename residues - self.residue_renaming_dict = residue_renaming_dict - self.temp_dir = Path(temp_dir) - - self.overwrite = overwrite - - def construct_pipeline(self): - """Construct the AF3 inference pipeline.""" - self.config.dataset_params.val.interface.transform.n_recycles = self.n_recycles - self.config.dataset_params.val.interface.transform.diffusion_batch_size = ( - self.diffusion_batch_size - ) - self.config.dataset_params.val.interface.transform.return_atom_array = ( - True # Required for `to_cif` - ) - - assert ( - self.config.dataset_params.val.interface.transform.n_recycles - == self.n_recycles - ), "Number of recycles not set correctly." - assert ( - self.config.dataset_params.val.interface.transform.diffusion_batch_size - == self.diffusion_batch_size - ), "Diffusion batch size not set correctly." - pipeline = hydra.utils.instantiate( - self.config.dataset_params.val.interface.transform - ) - return pipeline - - def eval(self, files: list[PathLike]): - """Evaluate the model on a set of spoofed CIF files. - - Args: - files (list[PathLike]): List of paths to spoofed CIF files or directories containing spoofed CIF files. - Coordinates must be present but may contain NaN values. If a directory is provided, - all files with the extensions .cif, .pdb, .bcif, .cif.gz, .pdb.gz, .bcif.gz will be processed. - """ - # Construct the model and load the checkpoint - gpu = "cuda:0" if torch.cuda.is_available() else "cpu" - self.trainer.construct_model(device=gpu, inference=True) - self.trainer.load_model() - - # Set the model to evaluation mode - self.trainer.model.eval() - - logger.info("Building Transform pipeline...") - - # Construct the AF3 inference pipeline - pipeline = self.construct_pipeline() - - logger.info(f"Found {len(files)} structures to predict: {files}.") - - for structure in files: - # ... parse into an AtomArray (`parse` handles all valid formats) - logger.info(f"Parsing from path: {structure}") - #example_id = structure.name.split(".")[0] - example_id = ".".join(structure.name.split(".")[:-1]) - - # optionally, skip if output already exists - cif_output_path = example_id + '.cif' - cif_output_path = self.cif_out_dir / cif_output_path - if os.path.exists(cif_output_path) and not self.overwrite: - logger.info(f"Existing output for {example_id} found at {cif_output_path}. Skipping this example. Set --overwrite to not skip examples with existing output") - continue - - # If we're renaming residues, we do a brute-force replacement in the CIF file - if self.residue_renaming_dict: - logger.info( - f"Renaming residues in {structure} with brute-force find and replace: {self.residue_renaming_dict}" - ) - with open(structure, "r") as f: - content = f.read() - for old_res, new_res in self.residue_renaming_dict.items(): - content = content.replace(old_res, new_res) - structure = Path(self.temp_dir / structure.name) - with open(structure, "w") as f: - f.write(content) - - out = parse(structure, remove_hydrogens=True) - - # ... get the atom array and set NaN coordinates to random - atom_array = ( - out["assemblies"]["1"][0] - if "assemblies" in out - else out["asym_unit"][0] - ) - - # HACK: Set NaN coordinates to random values to avoid unexpected behavior in the pipeline - atom_array.coord[np.isnan(atom_array.coord)] = np.random.rand( - *atom_array.coord[np.isnan(atom_array.coord)].shape - ) - - # ... assemble the pipeline input in a format compatible with the DataHub pipeline - pipeline_input = { - "example_id": example_id, - "atom_array": atom_array, - "chain_info": out["chain_info"], - } - - # ... run dataloading and featurization - pipeline_output = pipeline(pipeline_input) - - # Model inference - with torch.no_grad(): - outputs = self.trainer.sampler.sample( - [pipeline_output], - n_cycle=self.n_recycles, - use_amp=self.config.training_params.use_amp, - ) - - # Override the AtomArray with the predited coordinates - atom_array_stack = build_stack_from_atom_array_and_batched_coords( - outputs["X_L"].cpu().numpy(), pipeline_output["atom_array"] - ) - - # Write the atom array to a CIF file - # NOTE: To make the secondary structure appear, run `dss` in PyMol (see: https://biology.stackexchange.com/questions/70143/can-pymol-show-cartoon-secondary-structure-for-a-pdb-of-multiple-frames) - out_path = to_cif_file( - atom_array_stack, self.cif_out_dir / example_id, file_type="cif" - ) - logger.info(f"Prediction for {example_id} written to {out_path}.") - - if "confidence" in outputs: - loss_input = { - "example_id": example_id, - "is_real_atom": pipeline_output["confidence_feats"]["is_real_atom"], - } - logger.info(f"Writing {example_id}.score to {self.cif_out_dir}") - df = self.confidence_writer(None, outputs, loss_input) - df.to_csv(self.cif_out_dir / f"{example_id}.score", index=False) - logger.info( - f"Confidence metrics for {example_id}.cif written to {self.cif_out_dir / example_id}.score." - ) - - -def main(): - parser = argparse.ArgumentParser(description="Evaluate AF3 using specified paths.") - parser.add_argument( - "inputs", - nargs="+", - help="List of paths to supported file types or directories of of supported files.", - ) - parser.add_argument( - "--checkpoint_path", type=str, required=True, help="Path to the checkpoint file" - ) - parser.add_argument( - "--cif_out_dir", type=str, required=True, help="Directory for output CIF files" - ) - parser.add_argument( - "--config_override_path", - type=str, - required=False, - help="Path to a yaml file with configs to override those in the checkpoint file", - ) - parser.add_argument( - "--n_recycles", type=int, default=10, help="Number of recycles for AF3" - ) - parser.add_argument( - "--diffusion_batch_size", - type=int, - default=5, - help="Diffusion batch size for AF3", - ) - parser.add_argument( - "--rename_residues", - type=str, - default="", - help="Dictionary of residue names to rename to avoid CCD clashes, e.g., {'ALA': 'L:1'}", - ) - parser.add_argument( - "--num_steps", - type=int, - default=200, - help="Number of steps for sampling of the diffusion model", - ) - parser.add_argument( - "--solver", - type=str, - default="af3", - help="Solver to use for inference. Options are 'af3', 'simple', 'euler', and 'heun'.", - ) - parser.add_argument( - "--overwrite", - default=False, - action="store_true", - help="Overwrite existing .cif outputs with new runs.", - ) - args = parser.parse_args() - - with tempfile.TemporaryDirectory() as temp_dir: - temp_dir = Path(temp_dir) - temp_dir.mkdir(parents=True, exist_ok=True) - - # Prepare inputs based on the file types - file_paths_for_prediction = _build_file_paths_for_prediction( - args.inputs, temp_dir - ) - - # Rename residues if necessary (e.g., for MPNN outputs that have ligand names that clash with the CCD) - residue_renaming_dict = ( - json.loads(args.rename_residues) if args.rename_residues else {} - ) - - # Construct the evaluator - evaluator = EvaluateAF3( - checkpoint_path=args.checkpoint_path, - cif_out_dir=args.cif_out_dir, - config_override_path=args.config_override_path, - n_recycles=args.n_recycles, - diffusion_batch_size=args.diffusion_batch_size, - residue_renaming_dict=residue_renaming_dict, - temp_dir=temp_dir, - num_steps=args.num_steps, - solver=args.solver, - overwrite=args.overwrite - ) - - # Launch the evaluation - evaluator.eval(files=file_paths_for_prediction) - - -if __name__ == "__main__": - main() diff --git a/rf2aa/metrics/distogram_metrics.py b/rf2aa/metrics/distogram_metrics.py deleted file mode 100644 index 59d9c97..0000000 --- a/rf2aa/metrics/distogram_metrics.py +++ /dev/null @@ -1,28 +0,0 @@ -import torch -import torch.nn as nn - -from rf2aa.loss.af3_losses import distogram_loss -from rf2aa.metrics.metrics_base import Metric - - -class DistogramLoss(Metric): - def __init__(self): - super().__init__() - self.cce_loss = nn.CrossEntropyLoss(reduction="none") - - def __call__(self, network_input, network_output, loss_input): - pred_distogram = network_output["distogram"] - X_rep_atoms_I = loss_input["X_rep_atoms_I"] - crd_mask_rep_atoms_I = loss_input["crd_mask_rep_atoms_I"] - loss = distogram_loss( - pred_distogram, X_rep_atoms_I, crd_mask_rep_atoms_I, self.cce_loss - ) - return {"distogram_loss": loss.detach().item()} - - -class SaveDistograms(Metric): - def __call__(self, network_input, network_output, loss_input): - pred_distogram = network_output["distogram"] - example_id = loss_input["example_id"] - torch.save(pred_distogram, f"distograms/{example_id}.pt") - return {"distogram_saved": True} diff --git a/rf2aa/metrics/lddt_metrics.py b/rf2aa/metrics/lddt_metrics.py deleted file mode 100644 index 86ced27..0000000 --- a/rf2aa/metrics/lddt_metrics.py +++ /dev/null @@ -1,288 +0,0 @@ -import torch -import torch.nn as nn - -from rf2aa.metrics.metrics_base import Metric - - -def calc_lddt( - X_L, - X_gt_L, - crd_mask_L, - tok_idx, - pairs_to_score=None, - distance_cutoff=15.0, - use_amp=True, - eps=1e-6, -): - """ - X_L: predicted coordinates (D, L, 3) - X_gt_L: ground truth coordinates (D, L, 3) - crd_mask_L: mask of coordinates (D, L,) - tok_idx: token index of each atom (L,) - pairs_to_score: pairs to score (L, L) | None - """ - D, L = X_L.shape[:2] - if pairs_to_score is None: - pairs_to_score = torch.ones((L, L), dtype=torch.bool).triu(0).to(X_L.device) - else: - assert pairs_to_score.shape == (L, L) - pairs_to_score = pairs_to_score.triu(0).to(X_L.device) - - first_index, second_index = torch.nonzero(pairs_to_score, as_tuple=True) - - lddt = [] - for d in range(D): - ground_truth_distances = torch.linalg.norm( - X_gt_L[d, first_index] - X_gt_L[d, second_index], dim=-1 - ) - - pair_mask = torch.logical_and( - ground_truth_distances > 0, ground_truth_distances < distance_cutoff - ) - - # only score pairs that are resolved in the ground truth - pair_mask *= crd_mask_L[d, first_index] * crd_mask_L[d, second_index] - # don't score pairs that are in the same token - pair_mask *= tok_idx[first_index] != tok_idx[second_index] - - valid_pairs = pair_mask.nonzero(as_tuple=True) - pair_mask = pair_mask[valid_pairs].to(X_L.dtype) - ground_truth_distances = ground_truth_distances[valid_pairs] - first_index, second_index = first_index[valid_pairs], second_index[valid_pairs] - - predicted_distances = torch.linalg.norm( - X_L[d, first_index] - X_L[d, second_index], dim=-1 - ) - - delta_distances = torch.abs(predicted_distances - ground_truth_distances + eps) - del predicted_distances, ground_truth_distances - - lddt.append( - 0.25 - * ( - torch.sum((delta_distances < 4.0) * pair_mask) - + torch.sum((delta_distances < 2.0) * pair_mask) - + torch.sum((delta_distances < 1.0) * pair_mask) - + torch.sum((delta_distances < 0.5) * pair_mask) - ) - / (torch.sum(pair_mask) + eps) - ) - - return torch.tensor(lddt) - - -class InterfaceLDDT(Metric): - def __call__(self, network_input, network_output, loss_input): - interface_lddt = {"interface_lddt_first": [], "interface_lddt_best": []} - chain_iid_token_lvl = loss_input["chain_iid_token_lvl"] - tok_idx = network_input["f"]["atom_to_token_map"].cpu().numpy() - for chain_i, chain_j, interface_type in loss_input["interfaces_to_score"]: - # get tokens in chain_i and chain_j - chain_i_tokens = chain_iid_token_lvl == chain_i - chain_j_tokens = chain_iid_token_lvl == chain_j - # convert the token level to the atom level - chain_i_atoms = chain_i_tokens[tok_idx] - chain_j_atoms = chain_j_tokens[tok_idx] - # compute the intersection of chain_i and chain_j - - chain_ij_atoms = torch.einsum( - "L, K -> LK", torch.tensor(chain_i_atoms), torch.tensor(chain_j_atoms) - ).to(network_output["X_L"].device) - - # symmetrize - chain_ij_atoms = chain_ij_atoms | chain_ij_atoms.T - - # compute lddt using the pairs_to_score from the intersection - lddt = calc_lddt( - network_output["X_L"], - loss_input["X_gt_L"], - loss_input["crd_mask_L"], - torch.tensor(tok_idx).to(network_output["X_L"].device), - pairs_to_score=chain_ij_atoms, - distance_cutoff=30.0, - ) - - interface_lddt["interface_lddt_first"].append(lddt[0].item()) - interface_lddt["interface_lddt_best"].append(lddt.max().item()) - return interface_lddt - - -class ConfidenceInterfaceLDDT(Metric): - def __call__(self, network_input, network_output, loss_input): - interface_lddt = { - "interface_lddt_first": [], - "interface_lddt_best": [], - "interface_lddt_pae": [], - "interface_lddt_pde": [], - "interface_lddt_plddt": [], - "interface_lddt_af3_style_ipae": [], - "interface_lddt_af3_style_lig_ipae": [], - } - chain_iid_token_lvl = loss_input["chain_iid_token_lvl"] - tok_idx = network_input["f"]["atom_to_token_map"].cpu().numpy() - for chain_i, chain_j, interface_type in loss_input["interfaces_to_score"]: - # get tokens in chain_i and chain_j - chain_i_tokens = chain_iid_token_lvl == chain_i - chain_j_tokens = chain_iid_token_lvl == chain_j - # convert the token level to the atom level - chain_i_atoms = chain_i_tokens[tok_idx] - chain_j_atoms = chain_j_tokens[tok_idx] - # compute the intersection of chain_i and chain_j - - chain_ij_atoms = torch.einsum( - "L, K -> LK", torch.tensor(chain_i_atoms), torch.tensor(chain_j_atoms) - ).to(network_output["X_L"].device) - - # compute lddt using the pairs_to_score from the intersection - lddt = calc_lddt( - network_output["X_L"], - loss_input["X_gt_L"], - loss_input["crd_mask_L"], - torch.tensor(tok_idx).to(network_output["X_L"].device), - pairs_to_score=chain_ij_atoms, - distance_cutoff=30.0, - ) - pae_idx = loss_input["pae_idx"] - pde_idx = loss_input["pde_idx"] - plddt_idx = loss_input["plddt_idx"] - af3_style_ipae_idx = loss_input["best_interface_idx"][ - f"{chain_i}-{chain_j}" - ] - interface_lddt["interface_lddt_first"].append(lddt[0].item()) - interface_lddt["interface_lddt_best"].append(lddt.max().item()) - interface_lddt["interface_lddt_pae"].append(lddt[pae_idx].item()) - interface_lddt["interface_lddt_pde"].append(lddt[pde_idx].item()) - interface_lddt["interface_lddt_plddt"].append(lddt[plddt_idx].item()) - interface_lddt["interface_lddt_af3_style_ipae"].append( - lddt[af3_style_ipae_idx].item() - ) - interface_lddt["interface_lddt_af3_style_lig_ipae"].append( - lddt[loss_input["best_lig_ipae_idx"][f"{chain_i}-{chain_j}"]].item() - ) - return interface_lddt - - -class ConfidenceChainLDDT(Metric): - def __call__(self, network_input, network_output, loss_input): - chain_lddt = { - "chain_lddt_first": [], - "chain_lddt_best": [], - "chain_lddt_pae": [], - "chain_lddt_pde": [], - "chain_lddt_plddt": [], - "chain_lddt_af3_style_chain": [], - "chain_lddt_af3_style_single_chain": [], - } - chain_iid_token_lvl = loss_input["chain_iid_token_lvl"] - tok_idx = network_input["f"]["atom_to_token_map"].cpu().numpy() - for chain_i, chain_type in loss_input["pn_units_to_score"]: - # print(chain_type) - # get tokens in chain_i and chain_j - chain_i_tokens = chain_iid_token_lvl == chain_i - chain_j_tokens = chain_iid_token_lvl == chain_i - # convert the token level to the atom level - chain_i_atoms = chain_i_tokens[tok_idx] - chain_j_atoms = chain_j_tokens[tok_idx] - # compute the intersection of chain_i and chain_j - chain_ij_atoms = torch.einsum( - "L, K -> LK", torch.tensor(chain_i_atoms), torch.tensor(chain_j_atoms) - ).to(network_output["X_L"].device) - - # compute lddt using the pairs_to_score from the intersection - lddt = calc_lddt( - network_output["X_L"], - loss_input["X_gt_L"], - loss_input["crd_mask_L"], - torch.tensor(tok_idx).to(network_output["X_L"].device), - pairs_to_score=chain_ij_atoms, - ) - - chain_lddt["chain_lddt_first"].append(lddt[0].item()) - chain_lddt["chain_lddt_best"].append(lddt.max().item()) - chain_lddt["chain_lddt_pae"].append(lddt[loss_input["pae_idx"]].item()) - chain_lddt["chain_lddt_pde"].append(lddt[loss_input["pde_idx"]].item()) - chain_lddt["chain_lddt_plddt"].append(lddt[loss_input["plddt_idx"]].item()) - chain_lddt["chain_lddt_af3_style_chain"].append( - lddt[loss_input["best_chain_to_all_idx"][chain_i]].item() - ) - chain_lddt["chain_lddt_af3_style_single_chain"].append( - lddt[loss_input["best_chain_to_self_idx"][chain_i]].item() - ) - return chain_lddt - - -class LigRMSD(Metric): - # TODO: move these to a separate file, here for backwards compatibility with configs - def __call__(self, network_input, network_output, loss_input): - raise NotImplementedError() - - -class InterfacePocketLigandRMSD(Metric): - # TODO: move these to a separate file, here for backwards compatibility with configs - - """ - Compute the Ligand RMSD for each interface in the interfaces_to_score list. - - The ligand RMSD is computed only for interface protein-ligand chains. - Given a chain pair (chain_i, chain_j) and the interface type, the RMSD is computed as follows: - - if the interface_type is protein_ligand: continue - - Rigid align the GT coordinates of onto the predicted coordinates using only the CA atoms within 10A of the ligand in chain_i or chain_j - - Compute the RMSD between the aligned GT coordinates and the predicted coordinates of the ligand atoms - - Note: if the interface is not between a protein-ligand pair, the RMSD is set to -1 - """ - - def __call__(self, network_input, network_output, loss_input): - raise NotImplementedError() - - -class ChainLDDT(Metric): - def __call__(self, network_input, network_output, loss_input): - chain_lddt = {"chain_lddt_first": [], "chain_lddt_best": []} - chain_iid_token_lvl = loss_input["chain_iid_token_lvl"] - tok_idx = network_input["f"]["atom_to_token_map"].cpu().numpy() - for chain_i, chain_type in loss_input["pn_units_to_score"]: - # get tokens in chain_i and chain_j - chain_i_tokens = chain_iid_token_lvl == chain_i - chain_j_tokens = chain_iid_token_lvl == chain_i - # convert the token level to the atom level - chain_i_atoms = chain_i_tokens[tok_idx] - chain_j_atoms = chain_j_tokens[tok_idx] - # compute the intersection of chain_i and chain_j - - chain_ij_atoms = torch.einsum( - "L, K -> LK", torch.tensor(chain_i_atoms), torch.tensor(chain_j_atoms) - ).to(network_output["X_L"].device) - - # compute lddt using the pairs_to_score from the intersection - lddt = calc_lddt( - network_output["X_L"], - loss_input["X_gt_L"], - loss_input["crd_mask_L"], - torch.tensor(tok_idx).to(network_output["X_L"].device), - pairs_to_score=chain_ij_atoms, - ) - - chain_lddt["chain_lddt_first"].append(lddt[0].item()) - chain_lddt["chain_lddt_best"].append(lddt.max().item()) - return chain_lddt - - -class LDDTByDiffusionStep(Metric): - def __call__(self, network_input, network_output, loss_input): - lddt_by_step = {"lddt_by_step": []} - tok_idx = network_input["f"]["atom_to_token_map"].cpu().numpy() - for i, X_L in enumerate(network_output["X_denoised_L_traj"]): - lddt = calc_lddt( - X_L, - loss_input["X_gt_L"], - loss_input["crd_mask_L"], - torch.tensor(tok_idx).to(network_output["X_L"].device), - ) - lddt_by_step["lddt_by_step"].append(lddt) - return lddt_by_step - - -class SmoothedLDDT(nn.Module): - def __call__(self, network_input, network_output, loss_input): - raise NotImplementedError() diff --git a/rf2aa/metrics/metrics_base.py b/rf2aa/metrics/metrics_base.py deleted file mode 100644 index dca10a5..0000000 --- a/rf2aa/metrics/metrics_base.py +++ /dev/null @@ -1,42 +0,0 @@ -import hydra -import torch.nn as nn - -# class Metric: -# def __call__(self, rf_output, loss_calc_items) -> float: -# raise NotImplementedError("base class") - - -class MetricManager(nn.Module): - """ - Similar syntax to LossManager, but for metrics - """ - - def __init__(self, **metrics): - super().__init__() - self.to_compute = [] - for metric_name, metric in metrics.items(): - metric_fn = hydra.utils.instantiate(metric) - print(f"Adding metric {metric_name} to the validation metrics") - self.to_compute.append(metric_fn) - - def forward( - self, - network_input, - network_output, - loss_input, - ): - loss_dict = {} - for loss_fn in self.to_compute: - loss_dict_ = loss_fn(network_input, network_output, loss_input) - loss_dict.update(loss_dict_) - return loss_dict - - -class Metric: - def __call__(self, network_input, network_output, loss_input) -> float: - raise NotImplementedError("base class") - - -class AddExampleID(Metric): - def __call__(self, network_input, network_output, loss_input): - return {"example_id": loss_input["example_id"]} diff --git a/rf2aa/metrics/metrics_factory.py b/rf2aa/metrics/metrics_factory.py deleted file mode 100644 index d1c5cf9..0000000 --- a/rf2aa/metrics/metrics_factory.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Dict - -from rf2aa.metrics.predicted_error import PAE, PLDDT - - -class MetricManager: - def __init__(self, config) -> None: - self.config = config - self.metrics = {metric: metrics_factory[metric] for metric in config.metrics} - - def __call__(self, rf_outputs, loss_calc_items) -> Dict: - metrics_dict = {} - for metric_name, metric in self.metrics: - metric_value = metric(rf_outputs, loss_calc_items) - metrics_dict[metric_name] = metric_value - return metrics_dict - - -metrics_factory = {"mean_pae": PAE(), "mean_plddt": PLDDT()} diff --git a/rf2aa/metrics/predicted_error.py b/rf2aa/metrics/predicted_error.py deleted file mode 100644 index db54951..0000000 --- a/rf2aa/metrics/predicted_error.py +++ /dev/null @@ -1,359 +0,0 @@ -from itertools import combinations -from typing import Any - -import numpy as np -import pandas as pd -import torch -import tree - -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.metrics.metric_utils import ( - compute_mean_over_subsampled_pairs, - create_chainwise_masks_1d, - create_chainwise_masks_2d, - create_interface_masks_2d, - spread_batch_into_dictionary, - unbin_logits, -) -from rf2aa.metrics.metrics_base import Metric - - -class WriteAF3Confidence(Metric): - """ - Given some config setups of pae, plddt, and pde, computes aggregate metrics for the model's confidence predictions - TO be used at inference time for users to know how confident their predictions are. - """ - - def __init__(self, pae, plddt, pde, **kwargs): - super().__init__() - self.pae = pae - self.plddt = plddt - self.pde = pde - - def __call__(self, network_input, network_output, loss_input) -> Any: - plddt_logit_stack = network_output["confidence"]["plddt_logits"] - pae_logits = network_output["confidence"]["pae_logits"] - pde_logits = network_output["confidence"]["pde_logits"] - ch_label = network_output["confidence"]["chain_iid_token_lvl"] - is_real_atom = network_output["confidence"]["is_real_atom"] - - # reorder the input tensors to be in (B, n_bins, ...) format for unbinning - plddt = unbin_logits( - plddt_logit_stack.reshape( - -1, - plddt_logit_stack.shape[1], - ChemData().NHEAVY, - self.plddt.n_bins, - ).permute(0, 3, 1, 2).float(), - self.plddt.max_value, - self.plddt.n_bins, - ) - pae = unbin_logits( - pae_logits.permute(0, 3, 1, 2).float(), self.pae.max_value, self.pae.n_bins - ) - pde = unbin_logits( - pde_logits.permute(0, 3, 1, 2).float(), self.pde.max_value, self.pde.n_bins - ) - - pae_interface = {} - pde_interface = {} - for interface, pairs_to_score in create_interface_masks_2d( - ch_label, device=pae.device - ).items(): - pae_interface[interface] = spread_batch_into_dictionary( - compute_mean_over_subsampled_pairs(pae, pairs_to_score) - ) - pde_interface[interface] = spread_batch_into_dictionary( - compute_mean_over_subsampled_pairs(pde, pairs_to_score) - ) - - pae_chainwise = {} - pde_chainwise = {} - for chain, pairs_to_score in create_chainwise_masks_2d( - ch_label, device=pae.device - ).items(): - pae_chainwise[chain] = spread_batch_into_dictionary( - compute_mean_over_subsampled_pairs(pae, pairs_to_score) - ) - pde_chainwise[chain] = spread_batch_into_dictionary( - compute_mean_over_subsampled_pairs(pde, pairs_to_score) - ) - - plddt_chainwise = {} - for chain, residue_atom_indices_to_score in create_chainwise_masks_1d( - ch_label, device=is_real_atom.device - ).items(): - chain_is_real_atom = ( - is_real_atom[..., : ChemData().NHEAVY] - * residue_atom_indices_to_score[:, None] - ) - plddt_chainwise[chain] = spread_batch_into_dictionary( - compute_mean_over_subsampled_pairs(plddt, chain_is_real_atom) - ) - - confidence_data = { - "example_id": loss_input["example_id"], - "mean_plddt": spread_batch_into_dictionary(plddt.mean(dim=(-1, -2))), - "mean_pae": spread_batch_into_dictionary(pae.mean(dim=(-1, -2))), - "mean_pde": spread_batch_into_dictionary(pde.mean(dim=(-1, -2))), - "chain_wise_mean_plddt": plddt_chainwise, - "chain_wise_mean_pae": pae_chainwise, - "chain_wise_mean_pde": pde_chainwise, - "interface_wise_mean_pae": pae_interface, - "interface_wise_mean_pde": pde_interface, - } - - num_batches = plddt.shape[0] - chains = np.unique(ch_label) - num_chains = len(chains) - chain_pairs = list(combinations(chains, 2)) - - # TODO: refactor to remove for loops - rows = [] - for batch_idx in range(num_batches): - for chain_id in range(num_chains): - chain = chains[chain_id] - row = { - "example_id": confidence_data["example_id"], - "chain_chainwise": chain, - "chainwise_plddt": confidence_data["chain_wise_mean_plddt"][chain][ - batch_idx - ], - "chainwise_pde": confidence_data["chain_wise_mean_pde"][chain][ - batch_idx - ], - "chainwise_pae": confidence_data["chain_wise_mean_pae"][chain][ - batch_idx - ], - "overall_plddt": confidence_data["mean_plddt"][batch_idx], - "overall_pde": confidence_data["mean_pde"][batch_idx], - "overall_pae": confidence_data["mean_pae"][batch_idx], - "batch_idx": batch_idx, - } - rows.append(row) - for interface in chain_pairs: - chain_i, chain_j = interface - row = { - "example_id": confidence_data["example_id"], - "chain_i_interface": chain_i, - "chain_j_interface": chain_j, - "pae_interface": confidence_data["interface_wise_mean_pae"][ - interface - ][batch_idx], - "pde_interface": confidence_data["interface_wise_mean_pde"][ - interface - ][batch_idx], - "overall_plddt": confidence_data["mean_plddt"][batch_idx], - "overall_pde": confidence_data["mean_pde"][batch_idx], - "overall_pae": confidence_data["mean_pae"][batch_idx], - "batch_idx": batch_idx, - } - rows.append(row) - - return pd.DataFrame(rows) - - -class GetConfidenceIndices(Metric): - def __call__(self, network_input, network_output, loss_input): - # AF3's ranking metrics work like this, but using ptm instead of ipae: - confidence_loss = loss_input["confidence_loss"] - del loss_input["confidence_loss"] - - ch_label = loss_input["chain_iid_token_lvl"] - scored_chains, interfaces, interface_chains = select_scored_units(loss_input) - - chain_to_all_masks = create_chain_to_all_masks(ch_label, scored_chains) - chain_to_self_masks = create_chain_to_self_masks(ch_label, scored_chains) - interface_masks, lig_chains = create_interface_masks( - ch_label, interfaces, loss_input["is_ligand"] - ) - - # map everything to gpu - gpu = network_output["confidence"]["plddt_logits"].device - chain_to_all_masks = tree.map_structure( - lambda x: x.to(gpu) if hasattr(x, "cpu") else x, chain_to_all_masks - ) - chain_to_self_masks = tree.map_structure( - lambda x: x.to(gpu) if hasattr(x, "cpu") else x, chain_to_self_masks - ) - interface_masks = tree.map_structure( - lambda x: x.to(gpu) if hasattr(x, "cpu") else x, interface_masks - ) - - confidence = network_output["confidence"] - - plddt_logits = confidence["plddt_logits"] - - # Reshape logits to B, K, L, NHEAVY - is_real_atom = network_output["confidence"]["is_real_atom"] - plddt_logits = plddt_logits.reshape( - -1, plddt_logits.shape[1], ChemData().NHEAVY, confidence_loss.plddt.n_bins - ).permute(0, 3, 1, 2).float() - # Reshape the pae and pde logits to B, K, L, L - pae_logits = confidence["pae_logits"].permute(0, 3, 1, 2).float() - pde_logits = confidence["pde_logits"].permute(0, 3, 1, 2).float() - - pae_logits_unbinned = unbin_logits( - pae_logits, confidence_loss.pae.max_value, confidence_loss.pae.n_bins - ) - plddt_logits_unbinned = unbin_logits( - plddt_logits, confidence_loss.plddt.max_value, confidence_loss.plddt.n_bins - ) - pde_logits_unbinned = unbin_logits( - pde_logits, confidence_loss.pde.max_value, confidence_loss.pde.n_bins - ) - - complex_pae = pae_logits_unbinned.mean(dim=(1, 2)) - complex_pde = pde_logits_unbinned.mean(dim=(1, 2)) - complex_plddt = ( - plddt_logits_unbinned * is_real_atom[..., : ChemData().NHEAVY] - ).sum(dim=(1, 2)) / is_real_atom[..., : ChemData().NHEAVY].sum() - - loss_input["pae_idx"] = torch.argmin(complex_pae) - loss_input["pde_idx"] = torch.argmin(complex_pde) - loss_input["plddt_idx"] = torch.argmax(complex_plddt) - - chain_to_self_paes = get_masked_error_per_chain( - scored_chains, chain_to_self_masks, pae_logits_unbinned - ) - chain_to_all_paes = get_masked_error_per_chain( - scored_chains, chain_to_all_masks, pae_logits_unbinned - ) - interface_chain_paes = get_masked_error_per_chain( - interface_chains, interface_masks, pae_logits_unbinned - ) - # average over both interfaces - average_interface_paes = get_average_error_per_interface( - interfaces, lig_chains, interface_chain_paes - ) - - loss_input["best_chain_to_all_idx"] = get_lowest_error_indices( - chain_to_all_paes - ) - loss_input["best_chain_to_self_idx"] = get_lowest_error_indices( - chain_to_self_paes - ) - loss_input["best_interface_idx"] = get_lowest_error_indices( - average_interface_paes - ) - # for ligands, we don't average the error - loss_input["best_lig_ipae_idx"] = get_lowest_error_ligand_indices( - interface_chain_paes, interfaces, lig_chains - ) - - return loss_input - - -def select_scored_units(loss_input): - scored_chains = [] - interfaces = [] - interface_chains = [] - for k in loss_input["interfaces_to_score"]: - interfaces.append(f"{k[0]}-{k[1]}") - interface_chains.append(k[0]) - interface_chains.append(k[1]) - for k in loss_input["pn_units_to_score"]: - scored_chains.append(k[0]) - - return scored_chains, interfaces, interface_chains - - -def create_chain_to_all_masks(ch_label, chains_to_score): - unique_chains = np.unique(ch_label) - I = len(ch_label) - chain_to_all_masks = {} - for chain in unique_chains: - if chain in chains_to_score: - indices = torch.from_numpy((ch_label == chain)) - mask = indices.unsqueeze(0) | indices.unsqueeze(1) - # set the diagonal to false - mask = mask & ~torch.eye(I, device=mask.device, dtype=torch.bool) - chain_to_all_masks[chain] = mask - return chain_to_all_masks - - -def create_chain_to_self_masks(ch_label, chains_to_score): - unique_chains = np.unique(ch_label) - I = len(ch_label) - chain_to_self_masks = {} - for chain in unique_chains: - if chain in chains_to_score: - indices = torch.from_numpy((ch_label == chain)) - mask = indices.unsqueeze(0) & indices.unsqueeze(1) - # set the diagonal to false - mask = mask & ~torch.eye(I, device=mask.device, dtype=torch.bool) - chain_to_self_masks[chain] = mask - return chain_to_self_masks - - -def create_interface_masks(ch_label, interfaces, is_ligand): - interface_masks = {} - interface_chains = [] - ligand_chains = [] - for interface in interfaces: - interface_chains.append(interface.split("-")[0]) - interface_chains.append(interface.split("-")[1]) - interface_chains = set(interface_chains) - for chain in interface_chains: - chain_indices = torch.from_numpy((ch_label == chain)) - - to_self = chain_indices.unsqueeze(0) & chain_indices.unsqueeze(1) - to_all = chain_indices.unsqueeze(0) | chain_indices.unsqueeze(1) - interface_mask = to_all & ~to_self - interface_masks[chain] = interface_mask - - if torch.all(is_ligand[chain_indices]): - ligand_chains.append(chain) - - return interface_masks, ligand_chains - - -def get_masked_error_per_chain(chains, masks, unbinned_logits): - error = {} - for chain in chains: - mask = masks[chain] - chain_error = compute_mean_over_subsampled_pairs(unbinned_logits, mask) - error[chain] = chain_error - - return error - - -def get_average_error_per_interface(interfaces, lig_chains, interface_errors): - average_error = {} - for interface in interfaces: - chain_a = interface.split("-")[0] - chain_b = interface.split("-")[1] - average_error[interface] = ( - interface_errors[chain_a] + interface_errors[chain_b] - ) / 2 - - return average_error - - -def get_lowest_error_indices(errors): - lowest_error_indices = {} - for k, v in errors.items(): - lowest_error_indices[k] = torch.argmin(v) - - return lowest_error_indices - - -def get_lowest_error_ligand_indices(errors, interfaces, lig_chains): - # ligands are a special case in AF3, where they only consider the ligand chain's error and not the average for the interface - lowest_error_indices = {} - for interface in interfaces: - chain_a = interface.split("-")[0] - chain_b = interface.split("-")[1] - if chain_a in lig_chains or chain_b in lig_chains: - if chain_a in lig_chains: - lig_chain = chain_a - elif chain_b in lig_chains: - lig_chain = chain_b - - lowest_error_indices[interface] = torch.argmin(errors[lig_chain]) - else: - # assign a random value to avoid key errors downstream; sorting ligand interfaces - # from other types is handles in analysis - lowest_error_indices[interface] = 0 - - return lowest_error_indices diff --git a/rf2aa/model/AF3_structure_wrapper.py b/rf2aa/model/AF3_structure_wrapper.py deleted file mode 100644 index 693261f..0000000 --- a/rf2aa/model/AF3_structure_wrapper.py +++ /dev/null @@ -1,18 +0,0 @@ - -import torch.nn as nn - -from rf2aa.model.AF3_structure import AtomAttentionDecoder, AtomAttentionEncoder - - -class NonEquivariantAtomEncoder(nn.Module): - def __init__(self, block_params): - super().__init__() - # c_atom, c_atompair, c_token = block_params.c_atom_pair, block_params.c_atom, block_params.c_token - self.model = AtomAttentionEncoder(**block_params) - - -class NonEquivariantAtomDecoder(nn.Module): - def __init__(self, block_params): - super().__init__() - # c_atom, c_atompair, c_token = block_params.c_atom_pair, block_params.c_atom, block_params.c_token - self.model = AtomAttentionDecoder(**block_params) diff --git a/rf2aa/resolvers.py b/rf2aa/resolvers.py deleted file mode 100644 index 40fbfd2..0000000 --- a/rf2aa/resolvers.py +++ /dev/null @@ -1,23 +0,0 @@ -import importlib -import sys -from typing import Any - - -def resolve_import(path: str) -> Any: - """ - Import a module from a string path. - If the module is not already imported, we dynamically import - with `importlib.import_module` and return the module object. - - Args: - path (str): The path to the module. - - Example usage with Hydra, assuming the module `rf2aa.setup` exists within the PYTHONPATH: - ```yaml - # config.yaml - setup: ${resolve_import:rf2aa.setup} - ``` - """ - namespace, name = path.rsplit(".", maxsplit=1) - importlib.import_module(namespace) - return sys.modules[namespace].__dict__[name] diff --git a/rf2aa/tests/data/example_from_ccd.cif b/rf2aa/tests/data/example_from_ccd.cif deleted file mode 100644 index 80f66ac..0000000 --- a/rf2aa/tests/data/example_from_ccd.cif +++ /dev/null @@ -1,1354 +0,0 @@ -data_unknown_id -# -_entry.id unknown_id -_entry.author ncorley -_entry.date 2025-01-14 -_entry.time 16:42:23 -# -_entity_poly.entity_id 0 -_entity_poly.type polypeptide(l) -_entity_poly.nstd_linkage no -_entity_poly.nstd_monomer no -_entity_poly.pdbx_seq_one_letter_code -;SMNPPPPETSNPNKPKRQTNQLQYLLRVVLKTLWKHQFAWPFQQPVDAVKLNLPDYYKIIKTPMDMGTIKKRLENNYYWN -AQECIQDFNTMFTNCYIYNKPGDDIVLMAEALEKLFLQKINELPTEE -; -_entity_poly.pdbx_seq_one_letter_code_can -;SMNPPPPETSNPNKPKRQTNQLQYLLRVVLKTLWKHQFAWPFQQPVDAVKLNLPDYYKIIKTPMDMGTIKKRLENNYYWN -AQECIQDFNTMFTNCYIYNKPGDDIVLMAEALEKLFLQKINELPTEE -; -_entity_poly.pdbx_strand_id A -_entity_poly.pdbx_target_identifier ? -# -loop_ -_chem_comp_bond.pdbx_ordinal -_chem_comp_bond.comp_id -_chem_comp_bond.atom_id_1 -_chem_comp_bond.atom_id_2 -_chem_comp_bond.value_order -_chem_comp_bond.pdbx_aromatic_flag -_chem_comp_bond.pdbx_stereo_config -1 SER N CA SING N ? -2 SER CA C SING N ? -3 SER CA CB SING N ? -4 SER C O DOUB N ? -5 SER CB OG SING N ? -6 MET N CA SING N ? -7 MET CA C SING N ? -8 MET CA CB SING N ? -9 MET C O DOUB N ? -10 MET CB CG SING N ? -11 MET CG SD SING N ? -12 MET SD CE SING N ? -13 ASN N CA SING N ? -14 ASN CA C SING N ? -15 ASN CA CB SING N ? -16 ASN C O DOUB N ? -17 ASN CB CG SING N ? -18 ASN CG OD1 DOUB N ? -19 ASN CG ND2 SING N ? -20 PRO N CA SING N ? -21 PRO N CD SING N ? -22 PRO CA C SING N ? -23 PRO CA CB SING N ? -24 PRO C O DOUB N ? -25 PRO CB CG SING N ? -26 PRO CG CD SING N ? -27 GLU N CA SING N ? -28 GLU CA C SING N ? -29 GLU CA CB SING N ? -30 GLU C O DOUB N ? -31 GLU CB CG SING N ? -32 GLU CG CD SING N ? -33 GLU CD OE1 DOUB N ? -34 GLU CD OE2 SING N ? -35 THR N CA SING N ? -36 THR CA C SING N ? -37 THR CA CB SING N ? -38 THR C O DOUB N ? -39 THR CB OG1 SING N ? -40 THR CB CG2 SING N ? -41 LYS N CA SING N ? -42 LYS CA C SING N ? -43 LYS CA CB SING N ? -44 LYS C O DOUB N ? -45 LYS CB CG SING N ? -46 LYS CG CD SING N ? -47 LYS CD CE SING N ? -48 LYS CE NZ SING N ? -49 ARG N CA SING N ? -50 ARG CA C SING N ? -51 ARG CA CB SING N ? -52 ARG C O DOUB N ? -53 ARG CB CG SING N ? -54 ARG CG CD SING N ? -55 ARG CD NE SING N ? -56 ARG NE CZ SING N ? -57 ARG CZ NH1 SING N ? -58 ARG CZ NH2 DOUB N ? -59 GLN N CA SING N ? -60 GLN CA C SING N ? -61 GLN CA CB SING N ? -62 GLN C O DOUB N ? -63 GLN CB CG SING N ? -64 GLN CG CD SING N ? -65 GLN CD OE1 DOUB N ? -66 GLN CD NE2 SING N ? -67 LEU N CA SING N ? -68 LEU CA C SING N ? -69 LEU CA CB SING N ? -70 LEU C O DOUB N ? -71 LEU CB CG SING N ? -72 LEU CG CD1 SING N ? -73 LEU CG CD2 SING N ? -74 TYR N CA SING N ? -75 TYR CA C SING N ? -76 TYR CA CB SING N ? -77 TYR C O DOUB N ? -78 TYR CB CG SING N ? -79 TYR CG CD1 DOUB Y ? -80 TYR CG CD2 SING Y ? -81 TYR CD1 CE1 SING Y ? -82 TYR CD2 CE2 DOUB Y ? -83 TYR CE1 CZ DOUB Y ? -84 TYR CE2 CZ SING Y ? -85 TYR CZ OH SING N ? -86 VAL N CA SING N ? -87 VAL CA C SING N ? -88 VAL CA CB SING N ? -89 VAL C O DOUB N ? -90 VAL CB CG1 SING N ? -91 VAL CB CG2 SING N ? -92 TRP N CA SING N ? -93 TRP CA C SING N ? -94 TRP CA CB SING N ? -95 TRP C O DOUB N ? -96 TRP CB CG SING N ? -97 TRP CG CD1 DOUB Y ? -98 TRP CG CD2 SING Y ? -99 TRP CD1 NE1 SING Y ? -100 TRP CD2 CE2 DOUB Y ? -101 TRP CD2 CE3 SING Y ? -102 TRP NE1 CE2 SING Y ? -103 TRP CE2 CZ2 SING Y ? -104 TRP CE3 CZ3 DOUB Y ? -105 TRP CZ2 CH2 DOUB Y ? -106 TRP CZ3 CH2 SING Y ? -107 HIS N CA SING N ? -108 HIS CA C SING N ? -109 HIS CA CB SING N ? -110 HIS C O DOUB N ? -111 HIS CB CG SING N ? -112 HIS CG ND1 SING Y ? -113 HIS CG CD2 DOUB Y ? -114 HIS ND1 CE1 DOUB Y ? -115 HIS CD2 NE2 SING Y ? -116 HIS CE1 NE2 SING Y ? -117 PHE N CA SING N ? -118 PHE CA C SING N ? -119 PHE CA CB SING N ? -120 PHE C O DOUB N ? -121 PHE CB CG SING N ? -122 PHE CG CD1 DOUB Y ? -123 PHE CG CD2 SING Y ? -124 PHE CD1 CE1 SING Y ? -125 PHE CD2 CE2 DOUB Y ? -126 PHE CE1 CZ DOUB Y ? -127 PHE CE2 CZ SING Y ? -128 ALA N CA SING N ? -129 ALA CA C SING N ? -130 ALA CA CB SING N ? -131 ALA C O DOUB N ? -132 ASP N CA SING N ? -133 ASP CA C SING N ? -134 ASP CA CB SING N ? -135 ASP C O DOUB N ? -136 ASP CB CG SING N ? -137 ASP CG OD1 DOUB N ? -138 ASP CG OD2 SING N ? -139 ILE N CA SING N ? -140 ILE CA C SING N ? -141 ILE CA CB SING N ? -142 ILE C O DOUB N ? -143 ILE CB CG1 SING N ? -144 ILE CB CG2 SING N ? -145 ILE CG1 CD1 SING N ? -146 GLY N CA SING N ? -147 GLY CA C SING N ? -148 GLY C O DOUB N ? -149 CYS N CA SING N ? -150 CYS CA C SING N ? -151 CYS CA CB SING N ? -152 CYS C O DOUB N ? -153 CYS CB SG SING N ? -154 GLU C OXT SING N ? -155 7Z2 C37 N38 SING N ? -156 7Z2 C36 C37 SING N ? -157 7Z2 C35 C36 SING N ? -158 7Z2 C4 C10 DOUB Y ? -159 7Z2 C10 C9 SING Y ? -160 7Z2 C4 C3 SING N ? -161 7Z2 C3 N2 SING N ? -162 7Z2 C40 C39 DOUB Y ? -163 7Z2 C13 C40 SING Y ? -164 7Z2 C4 C5 SING Y ? -165 7Z2 C9 N8 DOUB Y ? -166 7Z2 C34 C35 SING N ? -167 7Z2 N32 C34 SING N ? -168 7Z2 C39 C16 SING Y ? -169 7Z2 N12 C13 SING N ? -170 7Z2 N12 C1 SING N ? -171 7Z2 C13 C14 DOUB Y ? -172 7Z2 N2 C1 SING N ? -173 7Z2 C6 N2 SING N ? -174 7Z2 N33 N32 SING N ? -175 7Z2 C17 N33 DOUB N ? -176 7Z2 C7 N8 SING Y ? -177 7Z2 C5 C6 SING N ? -178 7Z2 C5 C7 DOUB Y ? -179 7Z2 C1 O11 DOUB N ? -180 7Z2 N32 C30 SING N ? -181 7Z2 C17 C16 SING N ? -182 7Z2 C15 C16 DOUB Y ? -183 7Z2 C17 C18 SING N ? -184 7Z2 C15 C14 SING Y ? -185 7Z2 C30 O31 DOUB N ? -186 7Z2 C30 C19 SING N ? -187 7Z2 C18 C19 SING N ? -188 7Z2 C20 C19 SING N ? -189 7Z2 C28 C29 DOUB Y ? -190 7Z2 C21 C29 SING Y ? -191 7Z2 C28 C27 SING Y ? -192 7Z2 C20 C21 DOUB Y ? -193 7Z2 C20 C25 SING Y ? -194 7Z2 C21 C22 SING Y ? -195 7Z2 C27 N26 DOUB Y ? -196 7Z2 C24 C25 DOUB Y ? -197 7Z2 C22 N26 SING Y ? -198 7Z2 C22 C23 DOUB Y ? -199 7Z2 C24 C23 SING Y ? -# -loop_ -_atom_site.group_PDB -_atom_site.type_symbol -_atom_site.label_atom_id -_atom_site.label_alt_id -_atom_site.label_comp_id -_atom_site.label_asym_id -_atom_site.label_entity_id -_atom_site.label_seq_id -_atom_site.pdbx_PDB_ins_code -_atom_site.auth_seq_id -_atom_site.auth_comp_id -_atom_site.auth_asym_id -_atom_site.auth_atom_id -_atom_site.occupancy -_atom_site.pdbx_formal_charge -_atom_site.Cartn_x -_atom_site.Cartn_y -_atom_site.Cartn_z -_atom_site.pdbx_PDB_model_num -_atom_site.id -ATOM N N . SER A 0 1 . 1 SER A N 1.0 ? 0.770537 0.7163085 0.9716571 1 1 -ATOM C CA . SER A 0 1 . 1 SER A CA 1.0 ? 0.032276563 0.71075296 0.0072168317 1 2 -ATOM C C . SER A 0 1 . 1 SER A C 1.0 ? 0.028413583 0.60595214 0.986185 1 3 -ATOM O O . SER A 0 1 . 1 SER A O 1.0 ? 0.28263938 0.91627383 0.91197056 1 4 -ATOM C CB . SER A 0 1 . 1 SER A CB 1.0 ? 0.96444744 0.09150175 0.67010856 1 5 -ATOM O OG . SER A 0 1 . 1 SER A OG 1.0 ? 0.26054084 0.9034797 0.54910314 1 6 -ATOM N N . MET A 0 2 . 2 MET A N 1.0 ? 0.42176604 0.1503557 0.5855051 1 7 -ATOM C CA . MET A 0 2 . 2 MET A CA 1.0 ? 0.8164472 0.91230243 0.43640634 1 8 -ATOM C C . MET A 0 2 . 2 MET A C 1.0 ? 0.6896751 0.47070634 0.60491955 1 9 -ATOM O O . MET A 0 2 . 2 MET A O 1.0 ? 0.7030589 0.68519294 0.43431538 1 10 -ATOM C CB . MET A 0 2 . 2 MET A CB 1.0 ? 0.34209174 0.66244185 0.24852546 1 11 -ATOM C CG . MET A 0 2 . 2 MET A CG 1.0 ? 0.6705897 0.7139055 0.17978548 1 12 -ATOM S SD . MET A 0 2 . 2 MET A SD 1.0 ? 0.7194814 0.22575855 0.7565886 1 13 -ATOM C CE . MET A 0 2 . 2 MET A CE 1.0 ? 0.520618 0.21957618 0.17120793 1 14 -ATOM N N . ASN A 0 3 . 3 ASN A N 1.0 ? 0.9226024 0.9256006 0.2967649 1 15 -ATOM C CA . ASN A 0 3 . 3 ASN A CA 1.0 ? 0.40048134 0.0410027 0.8738365 1 16 -ATOM C C . ASN A 0 3 . 3 ASN A C 1.0 ? 0.56359786 0.6135691 0.17325929 1 17 -ATOM O O . ASN A 0 3 . 3 ASN A O 1.0 ? 0.8518298 0.27451587 0.65287423 1 18 -ATOM C CB . ASN A 0 3 . 3 ASN A CB 1.0 ? 0.66375065 0.33746374 0.5112768 1 19 -ATOM C CG . ASN A 0 3 . 3 ASN A CG 1.0 ? 0.57714784 0.95387447 0.3648107 1 20 -ATOM O OD1 . ASN A 0 3 . 3 ASN A OD1 1.0 ? 0.06426367 0.93236685 0.40973836 1 21 -ATOM N ND2 . ASN A 0 3 . 3 ASN A ND2 1.0 ? 0.14621012 0.03968773 0.7366408 1 22 -ATOM N N . PRO A 0 4 . 4 PRO A N 1.0 ? 0.73289883 0.7379735 0.7427924 1 23 -ATOM C CA . PRO A 0 4 . 4 PRO A CA 1.0 ? 0.6033526 0.64463574 0.6319247 1 24 -ATOM C C . PRO A 0 4 . 4 PRO A C 1.0 ? 0.12590659 0.897119 0.2132349 1 25 -ATOM O O . PRO A 0 4 . 4 PRO A O 1.0 ? 0.43863657 0.68747294 0.1299934 1 26 -ATOM C CB . PRO A 0 4 . 4 PRO A CB 1.0 ? 0.7399981 0.5212088 0.3584686 1 27 -ATOM C CG . PRO A 0 4 . 4 PRO A CG 1.0 ? 0.8008769 0.35548276 0.772315 1 28 -ATOM C CD . PRO A 0 4 . 4 PRO A CD 1.0 ? 0.87791574 0.26965255 0.18035434 1 29 -ATOM N N . PRO A 0 5 . 5 PRO A N 1.0 ? 0.19950828 0.10145833 0.8738812 1 30 -ATOM C CA . PRO A 0 5 . 5 PRO A CA 1.0 ? 0.39090562 0.16060434 0.80460757 1 31 -ATOM C C . PRO A 0 5 . 5 PRO A C 1.0 ? 0.8798909 0.644216 0.9147088 1 32 -ATOM O O . PRO A 0 5 . 5 PRO A O 1.0 ? 0.8475702 0.5954083 0.7623515 1 33 -ATOM C CB . PRO A 0 5 . 5 PRO A CB 1.0 ? 0.052886274 0.9092924 0.87070537 1 34 -ATOM C CG . PRO A 0 5 . 5 PRO A CG 1.0 ? 0.6346058 0.20176318 0.2579161 1 35 -ATOM C CD . PRO A 0 5 . 5 PRO A CD 1.0 ? 0.8709117 0.936175 0.8425335 1 36 -ATOM N N . PRO A 0 6 . 6 PRO A N 1.0 ? 0.70901227 0.93153316 0.04914921 1 37 -ATOM C CA . PRO A 0 6 . 6 PRO A CA 1.0 ? 0.9284019 0.053442266 0.22155978 1 38 -ATOM C C . PRO A 0 6 . 6 PRO A C 1.0 ? 0.78144413 0.33228233 0.7443096 1 39 -ATOM O O . PRO A 0 6 . 6 PRO A O 1.0 ? 0.28586647 0.38953847 0.2653783 1 40 -ATOM C CB . PRO A 0 6 . 6 PRO A CB 1.0 ? 0.8273215 0.6737183 0.66917723 1 41 -ATOM C CG . PRO A 0 6 . 6 PRO A CG 1.0 ? 0.34814763 0.5008495 0.49426585 1 42 -ATOM C CD . PRO A 0 6 . 6 PRO A CD 1.0 ? 0.02738572 0.36174 0.7246908 1 43 -ATOM N N . PRO A 0 7 . 7 PRO A N 1.0 ? 0.13280107 0.30647534 0.18138038 1 44 -ATOM C CA . PRO A 0 7 . 7 PRO A CA 1.0 ? 0.23695669 0.41969007 0.0065960614 1 45 -ATOM C C . PRO A 0 7 . 7 PRO A C 1.0 ? 0.60388744 0.18324585 0.5962051 1 46 -ATOM O O . PRO A 0 7 . 7 PRO A O 1.0 ? 0.8739068 0.9313893 0.5004562 1 47 -ATOM C CB . PRO A 0 7 . 7 PRO A CB 1.0 ? 0.3841846 0.6005609 0.06914989 1 48 -ATOM C CG . PRO A 0 7 . 7 PRO A CG 1.0 ? 0.5146076 0.060483687 0.11898337 1 49 -ATOM C CD . PRO A 0 7 . 7 PRO A CD 1.0 ? 0.73704606 0.49044126 0.57007253 1 50 -ATOM N N . GLU A 0 8 . 8 GLU A N 1.0 ? 0.6020321 0.16984348 0.13502897 1 51 -ATOM C CA . GLU A 0 8 . 8 GLU A CA 1.0 ? 0.040855434 0.014868518 0.7254467 1 52 -ATOM C C . GLU A 0 8 . 8 GLU A C 1.0 ? 0.80895305 0.33203092 0.97204036 1 53 -ATOM O O . GLU A 0 8 . 8 GLU A O 1.0 ? 0.36493722 0.023649668 0.82551026 1 54 -ATOM C CB . GLU A 0 8 . 8 GLU A CB 1.0 ? 0.92447126 0.9681622 0.41426173 1 55 -ATOM C CG . GLU A 0 8 . 8 GLU A CG 1.0 ? 0.95872766 0.58048403 0.2556177 1 56 -ATOM C CD . GLU A 0 8 . 8 GLU A CD 1.0 ? 0.8526763 0.91072243 0.83467865 1 57 -ATOM O OE1 . GLU A 0 8 . 8 GLU A OE1 1.0 ? 0.9309038 0.43921226 0.62519735 1 58 -ATOM O OE2 . GLU A 0 8 . 8 GLU A OE2 1.0 ? 0.7280395 0.90059584 0.810192 1 59 -ATOM N N . THR A 0 9 . 9 THR A N 1.0 ? 0.92567235 0.26897138 0.12193804 1 60 -ATOM C CA . THR A 0 9 . 9 THR A CA 1.0 ? 0.76582485 0.41203532 0.051172033 1 61 -ATOM C C . THR A 0 9 . 9 THR A C 1.0 ? 0.71454304 0.6044528 0.58845794 1 62 -ATOM O O . THR A 0 9 . 9 THR A O 1.0 ? 0.29037914 0.10930096 0.4052968 1 63 -ATOM C CB . THR A 0 9 . 9 THR A CB 1.0 ? 0.36296734 0.59010106 0.58526677 1 64 -ATOM O OG1 . THR A 0 9 . 9 THR A OG1 1.0 ? 0.1770919 0.39987755 0.5306763 1 65 -ATOM C CG2 . THR A 0 9 . 9 THR A CG2 1.0 ? 0.8620062 0.95912075 0.15385854 1 66 -ATOM N N . SER A 0 10 . 10 SER A N 1.0 ? 0.01899538 0.8280582 0.012333063 1 67 -ATOM C CA . SER A 0 10 . 10 SER A CA 1.0 ? 0.31442055 0.89566374 0.7921888 1 68 -ATOM C C . SER A 0 10 . 10 SER A C 1.0 ? 0.19152968 0.568345 0.17650948 1 69 -ATOM O O . SER A 0 10 . 10 SER A O 1.0 ? 0.19668745 0.048091717 0.17103842 1 70 -ATOM C CB . SER A 0 10 . 10 SER A CB 1.0 ? 0.57666856 0.55623984 0.19206622 1 71 -ATOM O OG . SER A 0 10 . 10 SER A OG 1.0 ? 0.6130356 0.1799426 0.14675798 1 72 -ATOM N N . ASN A 0 11 . 11 ASN A N 1.0 ? 0.15973489 0.2934069 0.7857777 1 73 -ATOM C CA . ASN A 0 11 . 11 ASN A CA 1.0 ? 0.44435263 0.9688712 0.12460647 1 74 -ATOM C C . ASN A 0 11 . 11 ASN A C 1.0 ? 0.51003176 0.57947916 0.110599935 1 75 -ATOM O O . ASN A 0 11 . 11 ASN A O 1.0 ? 0.49958074 0.2049418 0.6201293 1 76 -ATOM C CB . ASN A 0 11 . 11 ASN A CB 1.0 ? 0.38693815 0.50229514 0.8115528 1 77 -ATOM C CG . ASN A 0 11 . 11 ASN A CG 1.0 ? 0.18575248 0.25162646 0.66866994 1 78 -ATOM O OD1 . ASN A 0 11 . 11 ASN A OD1 1.0 ? 0.38074353 0.18104813 0.86836535 1 79 -ATOM N ND2 . ASN A 0 11 . 11 ASN A ND2 1.0 ? 0.57305247 0.12701933 0.23278363 1 80 -ATOM N N . PRO A 0 12 . 12 PRO A N 1.0 ? 0.61526793 0.8419901 0.7973025 1 81 -ATOM C CA . PRO A 0 12 . 12 PRO A CA 1.0 ? 0.7569635 0.02018945 0.26125544 1 82 -ATOM C C . PRO A 0 12 . 12 PRO A C 1.0 ? 0.016580794 0.07260859 0.09063938 1 83 -ATOM O O . PRO A 0 12 . 12 PRO A O 1.0 ? 0.20253354 0.41119668 0.80521685 1 84 -ATOM C CB . PRO A 0 12 . 12 PRO A CB 1.0 ? 0.35801217 0.6408294 0.7039536 1 85 -ATOM C CG . PRO A 0 12 . 12 PRO A CG 1.0 ? 0.60111797 0.55348575 0.16448012 1 86 -ATOM C CD . PRO A 0 12 . 12 PRO A CD 1.0 ? 0.73826444 0.83740324 0.21896897 1 87 -ATOM N N . ASN A 0 13 . 13 ASN A N 1.0 ? 0.09765879 0.10035146 0.37806296 1 88 -ATOM C CA . ASN A 0 13 . 13 ASN A CA 1.0 ? 0.1947978 0.7758811 0.67581844 1 89 -ATOM C C . ASN A 0 13 . 13 ASN A C 1.0 ? 0.4360254 0.20494409 0.41959542 1 90 -ATOM O O . ASN A 0 13 . 13 ASN A O 1.0 ? 0.14954032 0.21009444 0.027452566 1 91 -ATOM C CB . ASN A 0 13 . 13 ASN A CB 1.0 ? 0.5198927 0.49634343 0.61531204 1 92 -ATOM C CG . ASN A 0 13 . 13 ASN A CG 1.0 ? 0.32061008 0.68316966 0.78821033 1 93 -ATOM O OD1 . ASN A 0 13 . 13 ASN A OD1 1.0 ? 0.9225087 0.5443996 0.38791323 1 94 -ATOM N ND2 . ASN A 0 13 . 13 ASN A ND2 1.0 ? 0.57208526 0.35781017 0.0065194494 1 95 -ATOM N N . LYS A 0 14 . 14 LYS A N 1.0 ? 0.7274264 0.5266288 0.2041686 1 96 -ATOM C CA . LYS A 0 14 . 14 LYS A CA 1.0 ? 0.16626 0.29749185 0.52373254 1 97 -ATOM C C . LYS A 0 14 . 14 LYS A C 1.0 ? 0.5013565 0.8527831 0.52129537 1 98 -ATOM O O . LYS A 0 14 . 14 LYS A O 1.0 ? 0.09241349 0.49619922 0.53460294 1 99 -ATOM C CB . LYS A 0 14 . 14 LYS A CB 1.0 ? 0.43970546 0.17694433 0.7981003 1 100 -ATOM C CG . LYS A 0 14 . 14 LYS A CG 1.0 ? 0.72899836 0.29298794 0.7984923 1 101 -ATOM C CD . LYS A 0 14 . 14 LYS A CD 1.0 ? 0.6161438 0.628905 0.40856582 1 102 -ATOM C CE . LYS A 0 14 . 14 LYS A CE 1.0 ? 0.7264581 0.67581695 0.0449318 1 103 -ATOM N NZ . LYS A 0 14 . 14 LYS A NZ 1.0 +1 0.9402337 0.9678885 0.53999525 1 104 -ATOM N N . PRO A 0 15 . 15 PRO A N 1.0 ? 0.9865383 0.43937346 0.64474344 1 105 -ATOM C CA . PRO A 0 15 . 15 PRO A CA 1.0 ? 0.63833505 0.073759705 0.52901 1 106 -ATOM C C . PRO A 0 15 . 15 PRO A C 1.0 ? 0.43521446 0.05092478 0.90374166 1 107 -ATOM O O . PRO A 0 15 . 15 PRO A O 1.0 ? 0.7069983 0.07161833 0.04574869 1 108 -ATOM C CB . PRO A 0 15 . 15 PRO A CB 1.0 ? 0.50611395 0.63609815 0.67469555 1 109 -ATOM C CG . PRO A 0 15 . 15 PRO A CG 1.0 ? 0.004204351 0.8242852 0.6793001 1 110 -ATOM C CD . PRO A 0 15 . 15 PRO A CD 1.0 ? 0.33527783 0.54203343 0.53751045 1 111 -ATOM N N . LYS A 0 16 . 16 LYS A N 1.0 ? 0.69385606 0.7352695 0.039008185 1 112 -ATOM C CA . LYS A 0 16 . 16 LYS A CA 1.0 ? 0.6490556 0.6891252 0.3273996 1 113 -ATOM C C . LYS A 0 16 . 16 LYS A C 1.0 ? 0.9563204 0.4739071 0.64828026 1 114 -ATOM O O . LYS A 0 16 . 16 LYS A O 1.0 ? 0.45762753 0.8175025 0.6631793 1 115 -ATOM C CB . LYS A 0 16 . 16 LYS A CB 1.0 ? 0.22042435 0.84329116 0.029074967 1 116 -ATOM C CG . LYS A 0 16 . 16 LYS A CG 1.0 ? 0.6717139 0.19487178 0.92788506 1 117 -ATOM C CD . LYS A 0 16 . 16 LYS A CD 1.0 ? 0.819796 0.91520476 0.4398502 1 118 -ATOM C CE . LYS A 0 16 . 16 LYS A CE 1.0 ? 0.51692027 0.2599724 0.9775755 1 119 -ATOM N NZ . LYS A 0 16 . 16 LYS A NZ 1.0 +1 0.51835316 0.18068148 0.9259697 1 120 -ATOM N N . ARG A 0 17 . 17 ARG A N 1.0 ? 0.89982605 0.2130354 0.8541513 1 121 -ATOM C CA . ARG A 0 17 . 17 ARG A CA 1.0 ? 0.6173771 0.027039422 0.66468704 1 122 -ATOM C C . ARG A 0 17 . 17 ARG A C 1.0 ? 0.9300999 0.94548273 0.16443104 1 123 -ATOM O O . ARG A 0 17 . 17 ARG A O 1.0 ? 0.39719996 0.82789654 0.32941064 1 124 -ATOM C CB . ARG A 0 17 . 17 ARG A CB 1.0 ? 0.41470772 0.019472545 0.4241863 1 125 -ATOM C CG . ARG A 0 17 . 17 ARG A CG 1.0 ? 0.11369026 0.19860277 0.18907766 1 126 -ATOM C CD . ARG A 0 17 . 17 ARG A CD 1.0 ? 0.48607066 0.51404345 0.4039237 1 127 -ATOM N NE . ARG A 0 17 . 17 ARG A NE 1.0 ? 0.17064233 0.19881848 0.6789832 1 128 -ATOM C CZ . ARG A 0 17 . 17 ARG A CZ 1.0 ? 0.5347117 0.16861686 0.95200044 1 129 -ATOM N NH1 . ARG A 0 17 . 17 ARG A NH1 1.0 ? 0.54475534 0.72161454 0.036181618 1 130 -ATOM N NH2 . ARG A 0 17 . 17 ARG A NH2 1.0 +1 0.017609065 0.008020056 0.96270055 1 131 -ATOM N N . GLN A 0 18 . 18 GLN A N 1.0 ? 0.35198858 0.5557327 0.52959144 1 132 -ATOM C CA . GLN A 0 18 . 18 GLN A CA 1.0 ? 0.43928036 0.33291155 0.12036057 1 133 -ATOM C C . GLN A 0 18 . 18 GLN A C 1.0 ? 0.07384167 0.8563217 0.78860766 1 134 -ATOM O O . GLN A 0 18 . 18 GLN A O 1.0 ? 0.8952469 0.13780545 0.55128634 1 135 -ATOM C CB . GLN A 0 18 . 18 GLN A CB 1.0 ? 0.7905739 0.7668214 0.21385236 1 136 -ATOM C CG . GLN A 0 18 . 18 GLN A CG 1.0 ? 0.21391055 0.4181108 0.47930494 1 137 -ATOM C CD . GLN A 0 18 . 18 GLN A CD 1.0 ? 0.83172804 0.3310944 0.90641195 1 138 -ATOM O OE1 . GLN A 0 18 . 18 GLN A OE1 1.0 ? 0.9984228 0.0323241 0.6258953 1 139 -ATOM N NE2 . GLN A 0 18 . 18 GLN A NE2 1.0 ? 0.0026738618 0.17258534 0.9618116 1 140 -ATOM N N . THR A 0 19 . 19 THR A N 1.0 ? 0.475918 0.20733109 0.33747607 1 141 -ATOM C CA . THR A 0 19 . 19 THR A CA 1.0 ? 0.5467742 0.76167434 0.70817816 1 142 -ATOM C C . THR A 0 19 . 19 THR A C 1.0 ? 0.45234236 0.9124471 0.41690144 1 143 -ATOM O O . THR A 0 19 . 19 THR A O 1.0 ? 0.0585073 0.241553 0.3225283 1 144 -ATOM C CB . THR A 0 19 . 19 THR A CB 1.0 ? 0.07696611 0.7935254 0.052559175 1 145 -ATOM O OG1 . THR A 0 19 . 19 THR A OG1 1.0 ? 0.8753697 0.6884941 0.9020753 1 146 -ATOM C CG2 . THR A 0 19 . 19 THR A CG2 1.0 ? 0.33610177 0.81610143 0.28517708 1 147 -ATOM N N . ASN A 0 20 . 20 ASN A N 1.0 ? 0.7623411 0.9393225 0.46099055 1 148 -ATOM C CA . ASN A 0 20 . 20 ASN A CA 1.0 ? 0.54201144 0.0920117 0.2541 1 149 -ATOM C C . ASN A 0 20 . 20 ASN A C 1.0 ? 0.8442163 0.21354552 0.5318202 1 150 -ATOM O O . ASN A 0 20 . 20 ASN A O 1.0 ? 0.6296841 0.5452634 0.4179544 1 151 -ATOM C CB . ASN A 0 20 . 20 ASN A CB 1.0 ? 0.8062723 0.55730367 0.8430502 1 152 -ATOM C CG . ASN A 0 20 . 20 ASN A CG 1.0 ? 0.19095723 0.39414194 0.74356574 1 153 -ATOM O OD1 . ASN A 0 20 . 20 ASN A OD1 1.0 ? 0.5669189 0.9502358 0.6097131 1 154 -ATOM N ND2 . ASN A 0 20 . 20 ASN A ND2 1.0 ? 0.7157125 0.3889525 0.5402163 1 155 -ATOM N N . GLN A 0 21 . 21 GLN A N 1.0 ? 0.80890054 0.5445482 0.51971424 1 156 -ATOM C CA . GLN A 0 21 . 21 GLN A CA 1.0 ? 0.96920973 0.46549883 0.9242009 1 157 -ATOM C C . GLN A 0 21 . 21 GLN A C 1.0 ? 0.28676298 0.92137414 0.9795548 1 158 -ATOM O O . GLN A 0 21 . 21 GLN A O 1.0 ? 0.25866175 0.38803706 0.8520793 1 159 -ATOM C CB . GLN A 0 21 . 21 GLN A CB 1.0 ? 0.5330037 0.92829293 0.9275201 1 160 -ATOM C CG . GLN A 0 21 . 21 GLN A CG 1.0 ? 0.18944408 0.19878858 0.652856 1 161 -ATOM C CD . GLN A 0 21 . 21 GLN A CD 1.0 ? 0.26273167 0.1436489 0.037094463 1 162 -ATOM O OE1 . GLN A 0 21 . 21 GLN A OE1 1.0 ? 0.5993027 0.4772177 0.40064687 1 163 -ATOM N NE2 . GLN A 0 21 . 21 GLN A NE2 1.0 ? 0.9701619 0.9619898 0.6918008 1 164 -ATOM N N . LEU A 0 22 . 22 LEU A N 1.0 ? 0.64517826 0.33595055 0.9366775 1 165 -ATOM C CA . LEU A 0 22 . 22 LEU A CA 1.0 ? 0.020341294 0.09571002 0.5557338 1 166 -ATOM C C . LEU A 0 22 . 22 LEU A C 1.0 ? 0.1144536 0.06454904 0.70707923 1 167 -ATOM O O . LEU A 0 22 . 22 LEU A O 1.0 ? 0.7303887 0.30595016 0.12251022 1 168 -ATOM C CB . LEU A 0 22 . 22 LEU A CB 1.0 ? 0.35311207 0.4663147 0.76891834 1 169 -ATOM C CG . LEU A 0 22 . 22 LEU A CG 1.0 ? 0.94659466 0.8305637 0.3693371 1 170 -ATOM C CD1 . LEU A 0 22 . 22 LEU A CD1 1.0 ? 0.8022853 0.58083165 0.7940071 1 171 -ATOM C CD2 . LEU A 0 22 . 22 LEU A CD2 1.0 ? 0.6617464 0.36021015 0.29379037 1 172 -ATOM N N . GLN A 0 23 . 23 GLN A N 1.0 ? 0.49167922 0.3628723 0.9040842 1 173 -ATOM C CA . GLN A 0 23 . 23 GLN A CA 1.0 ? 0.705645 0.05448867 0.10611702 1 174 -ATOM C C . GLN A 0 23 . 23 GLN A C 1.0 ? 0.67095065 0.70534873 0.5388193 1 175 -ATOM O O . GLN A 0 23 . 23 GLN A O 1.0 ? 0.114655785 0.20084065 0.96721524 1 176 -ATOM C CB . GLN A 0 23 . 23 GLN A CB 1.0 ? 0.25093248 0.64670694 0.90473235 1 177 -ATOM C CG . GLN A 0 23 . 23 GLN A CG 1.0 ? 0.6298752 0.66822505 0.96836257 1 178 -ATOM C CD . GLN A 0 23 . 23 GLN A CD 1.0 ? 0.4960289 0.48646775 0.20459688 1 179 -ATOM O OE1 . GLN A 0 23 . 23 GLN A OE1 1.0 ? 0.39609805 0.9703295 0.5164286 1 180 -ATOM N NE2 . GLN A 0 23 . 23 GLN A NE2 1.0 ? 0.4279274 0.42182517 0.51981413 1 181 -ATOM N N . TYR A 0 24 . 24 TYR A N 1.0 ? 0.7249731 0.079163544 0.87687606 1 182 -ATOM C CA . TYR A 0 24 . 24 TYR A CA 1.0 ? 0.2411425 0.03500603 0.39691985 1 183 -ATOM C C . TYR A 0 24 . 24 TYR A C 1.0 ? 0.69973856 0.25406805 0.20622541 1 184 -ATOM O O . TYR A 0 24 . 24 TYR A O 1.0 ? 0.33825484 0.95200896 0.94989604 1 185 -ATOM C CB . TYR A 0 24 . 24 TYR A CB 1.0 ? 0.17269161 0.4608634 0.3741373 1 186 -ATOM C CG . TYR A 0 24 . 24 TYR A CG 1.0 ? 0.66805935 0.50142735 0.9772811 1 187 -ATOM C CD1 . TYR A 0 24 . 24 TYR A CD1 1.0 ? 0.8180678 0.71514875 0.68330276 1 188 -ATOM C CD2 . TYR A 0 24 . 24 TYR A CD2 1.0 ? 0.4819288 0.18265063 0.01844217 1 189 -ATOM C CE1 . TYR A 0 24 . 24 TYR A CE1 1.0 ? 0.35798612 0.6141053 0.22718418 1 190 -ATOM C CE2 . TYR A 0 24 . 24 TYR A CE2 1.0 ? 0.18686438 0.64816844 0.22219117 1 191 -ATOM C CZ . TYR A 0 24 . 24 TYR A CZ 1.0 ? 0.824775 0.030876745 0.2403768 1 192 -ATOM O OH . TYR A 0 24 . 24 TYR A OH 1.0 ? 0.2718865 0.7017348 0.17275813 1 193 -ATOM N N . LEU A 0 25 . 25 LEU A N 1.0 ? 0.75486654 0.28188914 0.89550185 1 194 -ATOM C CA . LEU A 0 25 . 25 LEU A CA 1.0 ? 0.7474262 0.70810413 0.39533788 1 195 -ATOM C C . LEU A 0 25 . 25 LEU A C 1.0 ? 0.26445004 0.33306774 0.9429369 1 196 -ATOM O O . LEU A 0 25 . 25 LEU A O 1.0 ? 0.9166853 0.4629344 0.99060625 1 197 -ATOM C CB . LEU A 0 25 . 25 LEU A CB 1.0 ? 0.39687434 0.7149604 0.610341 1 198 -ATOM C CG . LEU A 0 25 . 25 LEU A CG 1.0 ? 0.57688886 0.6042523 0.3455713 1 199 -ATOM C CD1 . LEU A 0 25 . 25 LEU A CD1 1.0 ? 0.10287278 0.36124068 0.78332627 1 200 -ATOM C CD2 . LEU A 0 25 . 25 LEU A CD2 1.0 ? 0.28160685 0.058372986 0.42232192 1 201 -ATOM N N . LEU A 0 26 . 26 LEU A N 1.0 ? 0.4208456 0.3000153 0.23285152 1 202 -ATOM C CA . LEU A 0 26 . 26 LEU A CA 1.0 ? 0.4976509 0.41035125 0.7459699 1 203 -ATOM C C . LEU A 0 26 . 26 LEU A C 1.0 ? 0.5540123 0.16886407 0.0312236 1 204 -ATOM O O . LEU A 0 26 . 26 LEU A O 1.0 ? 0.7925842 0.87869996 0.7300467 1 205 -ATOM C CB . LEU A 0 26 . 26 LEU A CB 1.0 ? 0.53228873 0.6269484 0.69373524 1 206 -ATOM C CG . LEU A 0 26 . 26 LEU A CG 1.0 ? 0.799161 0.69473606 0.696677 1 207 -ATOM C CD1 . LEU A 0 26 . 26 LEU A CD1 1.0 ? 0.61557096 0.094855554 0.92351526 1 208 -ATOM C CD2 . LEU A 0 26 . 26 LEU A CD2 1.0 ? 0.6313924 0.80748236 0.7953716 1 209 -ATOM N N . ARG A 0 27 . 27 ARG A N 1.0 ? 0.07364199 0.40121824 0.8995278 1 210 -ATOM C CA . ARG A 0 27 . 27 ARG A CA 1.0 ? 0.89179105 0.6805262 0.67873794 1 211 -ATOM C C . ARG A 0 27 . 27 ARG A C 1.0 ? 0.1860389 0.7314234 0.86580944 1 212 -ATOM O O . ARG A 0 27 . 27 ARG A O 1.0 ? 0.53071624 0.43415344 0.84086686 1 213 -ATOM C CB . ARG A 0 27 . 27 ARG A CB 1.0 ? 0.64432424 0.06206858 0.48799354 1 214 -ATOM C CG . ARG A 0 27 . 27 ARG A CG 1.0 ? 0.3571948 0.8526172 0.69939417 1 215 -ATOM C CD . ARG A 0 27 . 27 ARG A CD 1.0 ? 0.65593785 0.8041458 0.79414433 1 216 -ATOM N NE . ARG A 0 27 . 27 ARG A NE 1.0 ? 0.40341526 0.34597468 0.0847852 1 217 -ATOM C CZ . ARG A 0 27 . 27 ARG A CZ 1.0 ? 0.93065953 0.40408003 0.6368412 1 218 -ATOM N NH1 . ARG A 0 27 . 27 ARG A NH1 1.0 ? 0.399791 0.39774838 0.067218065 1 219 -ATOM N NH2 . ARG A 0 27 . 27 ARG A NH2 1.0 +1 0.081883095 0.15008911 0.73181164 1 220 -ATOM N N . VAL A 0 28 . 28 VAL A N 1.0 ? 0.24258815 0.9969795 0.6867367 1 221 -ATOM C CA . VAL A 0 28 . 28 VAL A CA 1.0 ? 0.80981296 0.7863334 0.7959068 1 222 -ATOM C C . VAL A 0 28 . 28 VAL A C 1.0 ? 0.5834704 0.79596746 0.741709 1 223 -ATOM O O . VAL A 0 28 . 28 VAL A O 1.0 ? 0.0036967606 0.7628398 0.35516375 1 224 -ATOM C CB . VAL A 0 28 . 28 VAL A CB 1.0 ? 0.2760262 0.67746586 0.7366163 1 225 -ATOM C CG1 . VAL A 0 28 . 28 VAL A CG1 1.0 ? 0.2633362 0.33417952 0.7550066 1 226 -ATOM C CG2 . VAL A 0 28 . 28 VAL A CG2 1.0 ? 0.57380646 0.79198766 0.5037916 1 227 -ATOM N N . VAL A 0 29 . 29 VAL A N 1.0 ? 0.4697016 0.36151236 0.33613917 1 228 -ATOM C CA . VAL A 0 29 . 29 VAL A CA 1.0 ? 0.25374663 0.34883168 0.40833127 1 229 -ATOM C C . VAL A 0 29 . 29 VAL A C 1.0 ? 0.08079732 0.959353 0.3490183 1 230 -ATOM O O . VAL A 0 29 . 29 VAL A O 1.0 ? 0.22978358 0.8475634 0.27998576 1 231 -ATOM C CB . VAL A 0 29 . 29 VAL A CB 1.0 ? 0.93950206 0.91078204 0.091652006 1 232 -ATOM C CG1 . VAL A 0 29 . 29 VAL A CG1 1.0 ? 0.4132398 0.24040161 0.43864828 1 233 -ATOM C CG2 . VAL A 0 29 . 29 VAL A CG2 1.0 ? 0.023869641 0.81135565 0.99292576 1 234 -ATOM N N . LEU A 0 30 . 30 LEU A N 1.0 ? 0.37723714 0.98791075 0.6489508 1 235 -ATOM C CA . LEU A 0 30 . 30 LEU A CA 1.0 ? 0.4351733 0.89115065 0.030825859 1 236 -ATOM C C . LEU A 0 30 . 30 LEU A C 1.0 ? 0.53301764 0.52674294 0.35373816 1 237 -ATOM O O . LEU A 0 30 . 30 LEU A O 1.0 ? 0.009496881 0.6027839 0.2099817 1 238 -ATOM C CB . LEU A 0 30 . 30 LEU A CB 1.0 ? 0.7905207 0.15026835 0.029156953 1 239 -ATOM C CG . LEU A 0 30 . 30 LEU A CG 1.0 ? 0.36536542 0.058909286 0.43767282 1 240 -ATOM C CD1 . LEU A 0 30 . 30 LEU A CD1 1.0 ? 0.57813746 0.674928 0.07748683 1 241 -ATOM C CD2 . LEU A 0 30 . 30 LEU A CD2 1.0 ? 0.46156543 0.4392663 0.18404263 1 242 -ATOM N N . LYS A 0 31 . 31 LYS A N 1.0 ? 0.47373173 0.02611834 0.08264111 1 243 -ATOM C CA . LYS A 0 31 . 31 LYS A CA 1.0 ? 0.4821469 0.5776503 0.0050188107 1 244 -ATOM C C . LYS A 0 31 . 31 LYS A C 1.0 ? 0.891857 0.07156645 0.70313156 1 245 -ATOM O O . LYS A 0 31 . 31 LYS A O 1.0 ? 0.452953 0.9744351 0.75080466 1 246 -ATOM C CB . LYS A 0 31 . 31 LYS A CB 1.0 ? 0.00027897215 0.6151474 0.21232165 1 247 -ATOM C CG . LYS A 0 31 . 31 LYS A CG 1.0 ? 0.8521604 0.054145176 0.4944672 1 248 -ATOM C CD . LYS A 0 31 . 31 LYS A CD 1.0 ? 0.839968 0.76957893 0.24408461 1 249 -ATOM C CE . LYS A 0 31 . 31 LYS A CE 1.0 ? 0.8951367 0.7741478 0.028773114 1 250 -ATOM N NZ . LYS A 0 31 . 31 LYS A NZ 1.0 +1 0.040099703 0.7944569 0.68605727 1 251 -ATOM N N . THR A 0 32 . 32 THR A N 1.0 ? 0.36034122 0.7483607 0.54918945 1 252 -ATOM C CA . THR A 0 32 . 32 THR A CA 1.0 ? 0.2290617 0.7152383 0.8972597 1 253 -ATOM C C . THR A 0 32 . 32 THR A C 1.0 ? 0.28925222 0.94308394 0.54198617 1 254 -ATOM O O . THR A 0 32 . 32 THR A O 1.0 ? 0.696314 0.90295285 0.3289424 1 255 -ATOM C CB . THR A 0 32 . 32 THR A CB 1.0 ? 0.45196825 0.68025315 0.3531177 1 256 -ATOM O OG1 . THR A 0 32 . 32 THR A OG1 1.0 ? 0.42760873 0.27521527 0.64967895 1 257 -ATOM C CG2 . THR A 0 32 . 32 THR A CG2 1.0 ? 0.031886745 0.61574 0.5574547 1 258 -ATOM N N . LEU A 0 33 . 33 LEU A N 1.0 ? 0.53117603 0.55405736 0.23571807 1 259 -ATOM C CA . LEU A 0 33 . 33 LEU A CA 1.0 ? 0.9329589 0.23197943 0.08524252 1 260 -ATOM C C . LEU A 0 33 . 33 LEU A C 1.0 ? 0.8439586 0.41902125 0.6034063 1 261 -ATOM O O . LEU A 0 33 . 33 LEU A O 1.0 ? 0.8525831 0.6380688 0.4406032 1 262 -ATOM C CB . LEU A 0 33 . 33 LEU A CB 1.0 ? 0.37521634 0.6745624 0.52080065 1 263 -ATOM C CG . LEU A 0 33 . 33 LEU A CG 1.0 ? 0.009592704 0.8935191 0.23479173 1 264 -ATOM C CD1 . LEU A 0 33 . 33 LEU A CD1 1.0 ? 0.50979424 0.42545575 0.5089834 1 265 -ATOM C CD2 . LEU A 0 33 . 33 LEU A CD2 1.0 ? 0.40616918 0.43856147 0.91113335 1 266 -ATOM N N . TRP A 0 34 . 34 TRP A N 1.0 ? 0.8763125 0.6976566 0.97764003 1 267 -ATOM C CA . TRP A 0 34 . 34 TRP A CA 1.0 ? 0.53415287 0.032781 0.70587426 1 268 -ATOM C C . TRP A 0 34 . 34 TRP A C 1.0 ? 0.15644482 0.3016681 0.5216367 1 269 -ATOM O O . TRP A 0 34 . 34 TRP A O 1.0 ? 0.3692837 0.08673341 0.7888725 1 270 -ATOM C CB . TRP A 0 34 . 34 TRP A CB 1.0 ? 0.13832426 0.1944841 0.94995654 1 271 -ATOM C CG . TRP A 0 34 . 34 TRP A CG 1.0 ? 0.2992301 0.6982968 0.47736204 1 272 -ATOM C CD1 . TRP A 0 34 . 34 TRP A CD1 1.0 ? 0.86638176 0.4542065 0.46472564 1 273 -ATOM C CD2 . TRP A 0 34 . 34 TRP A CD2 1.0 ? 0.09738437 0.008563362 0.95677596 1 274 -ATOM N NE1 . TRP A 0 34 . 34 TRP A NE1 1.0 ? 0.7637661 0.061028883 0.62895584 1 275 -ATOM C CE2 . TRP A 0 34 . 34 TRP A CE2 1.0 ? 0.4840752 0.41623017 0.5721225 1 276 -ATOM C CE3 . TRP A 0 34 . 34 TRP A CE3 1.0 ? 0.7114315 0.77232516 0.4889651 1 277 -ATOM C CZ2 . TRP A 0 34 . 34 TRP A CZ2 1.0 ? 0.4670595 0.6225665 0.87318504 1 278 -ATOM C CZ3 . TRP A 0 34 . 34 TRP A CZ3 1.0 ? 0.71378464 0.2679763 0.88090295 1 279 -ATOM C CH2 . TRP A 0 34 . 34 TRP A CH2 1.0 ? 0.40837285 0.72504145 0.31229398 1 280 -ATOM N N . LYS A 0 35 . 35 LYS A N 1.0 ? 0.61882555 0.38406456 0.9586151 1 281 -ATOM C CA . LYS A 0 35 . 35 LYS A CA 1.0 ? 0.13099171 0.30971974 0.24213684 1 282 -ATOM C C . LYS A 0 35 . 35 LYS A C 1.0 ? 0.9393427 0.10598465 0.1797617 1 283 -ATOM O O . LYS A 0 35 . 35 LYS A O 1.0 ? 0.4868086 0.16185963 0.10547698 1 284 -ATOM C CB . LYS A 0 35 . 35 LYS A CB 1.0 ? 0.6111254 0.15151411 0.9993287 1 285 -ATOM C CG . LYS A 0 35 . 35 LYS A CG 1.0 ? 0.481504 0.18069822 0.46474904 1 286 -ATOM C CD . LYS A 0 35 . 35 LYS A CD 1.0 ? 0.056320995 0.2640264 0.19070192 1 287 -ATOM C CE . LYS A 0 35 . 35 LYS A CE 1.0 ? 0.3143112 0.17456672 0.30530784 1 288 -ATOM N NZ . LYS A 0 35 . 35 LYS A NZ 1.0 +1 0.243832 0.15671082 0.92448795 1 289 -ATOM N N . HIS A 0 36 . 36 HIS A N 1.0 ? 0.2861737 0.27372855 0.23431356 1 290 -ATOM C CA . HIS A 0 36 . 36 HIS A CA 1.0 ? 0.8677686 0.034978483 0.71313596 1 291 -ATOM C C . HIS A 0 36 . 36 HIS A C 1.0 ? 0.08439963 0.2762316 0.5893882 1 292 -ATOM O O . HIS A 0 36 . 36 HIS A O 1.0 ? 0.032292344 0.5771729 0.9755752 1 293 -ATOM C CB . HIS A 0 36 . 36 HIS A CB 1.0 ? 0.403272 0.614884 0.15917368 1 294 -ATOM C CG . HIS A 0 36 . 36 HIS A CG 1.0 ? 0.6721069 0.48575085 0.75274086 1 295 -ATOM N ND1 . HIS A 0 36 . 36 HIS A ND1 1.0 +1 0.903881 0.558245 0.01981394 1 296 -ATOM C CD2 . HIS A 0 36 . 36 HIS A CD2 1.0 ? 0.7948876 0.9163885 0.37661037 1 297 -ATOM C CE1 . HIS A 0 36 . 36 HIS A CE1 1.0 ? 0.79036254 0.8025582 0.08889294 1 298 -ATOM N NE2 . HIS A 0 36 . 36 HIS A NE2 1.0 ? 0.09763778 0.27818456 0.13502802 1 299 -ATOM N N . GLN A 0 37 . 37 GLN A N 1.0 ? 0.93429846 0.56118715 0.9626143 1 300 -ATOM C CA . GLN A 0 37 . 37 GLN A CA 1.0 ? 0.28249818 0.8713896 0.10653208 1 301 -ATOM C C . GLN A 0 37 . 37 GLN A C 1.0 ? 0.53094935 0.5090379 0.19547686 1 302 -ATOM O O . GLN A 0 37 . 37 GLN A O 1.0 ? 0.44246364 0.56279707 0.4501594 1 303 -ATOM C CB . GLN A 0 37 . 37 GLN A CB 1.0 ? 0.6787893 0.6699873 0.5763236 1 304 -ATOM C CG . GLN A 0 37 . 37 GLN A CG 1.0 ? 0.1764276 0.69236153 0.94728345 1 305 -ATOM C CD . GLN A 0 37 . 37 GLN A CD 1.0 ? 0.2812611 0.27163294 0.008515341 1 306 -ATOM O OE1 . GLN A 0 37 . 37 GLN A OE1 1.0 ? 0.82320917 0.84714 0.7970396 1 307 -ATOM N NE2 . GLN A 0 37 . 37 GLN A NE2 1.0 ? 0.30994013 0.9272985 0.24443364 1 308 -ATOM N N . PHE A 0 38 . 38 PHE A N 1.0 ? 0.24132045 0.8276574 0.83624 1 309 -ATOM C CA . PHE A 0 38 . 38 PHE A CA 1.0 ? 0.61637336 0.15871711 0.56080157 1 310 -ATOM C C . PHE A 0 38 . 38 PHE A C 1.0 ? 0.31044558 0.7173119 0.11906884 1 311 -ATOM O O . PHE A 0 38 . 38 PHE A O 1.0 ? 0.42534485 0.10632252 0.5072999 1 312 -ATOM C CB . PHE A 0 38 . 38 PHE A CB 1.0 ? 0.29540884 0.28302664 0.21482128 1 313 -ATOM C CG . PHE A 0 38 . 38 PHE A CG 1.0 ? 0.785434 0.9256891 0.5838521 1 314 -ATOM C CD1 . PHE A 0 38 . 38 PHE A CD1 1.0 ? 0.4388185 0.07242086 0.22534795 1 315 -ATOM C CD2 . PHE A 0 38 . 38 PHE A CD2 1.0 ? 0.35136583 0.6891547 0.018769884 1 316 -ATOM C CE1 . PHE A 0 38 . 38 PHE A CE1 1.0 ? 0.6766703 0.6372666 0.81447923 1 317 -ATOM C CE2 . PHE A 0 38 . 38 PHE A CE2 1.0 ? 0.26072857 0.94768125 0.21418172 1 318 -ATOM C CZ . PHE A 0 38 . 38 PHE A CZ 1.0 ? 0.14135395 0.96440667 0.37513745 1 319 -ATOM N N . ALA A 0 39 . 39 ALA A N 1.0 ? 0.33912235 0.4565799 0.60436815 1 320 -ATOM C CA . ALA A 0 39 . 39 ALA A CA 1.0 ? 0.22054298 0.8360551 0.61757666 1 321 -ATOM C C . ALA A 0 39 . 39 ALA A C 1.0 ? 0.89400005 0.83274525 0.5622862 1 322 -ATOM O O . ALA A 0 39 . 39 ALA A O 1.0 ? 0.573885 0.9783609 0.4960198 1 323 -ATOM C CB . ALA A 0 39 . 39 ALA A CB 1.0 ? 0.31544238 0.6257262 0.6302435 1 324 -ATOM N N . TRP A 0 40 . 40 TRP A N 1.0 ? 0.058332868 0.19465066 0.27798608 1 325 -ATOM C CA . TRP A 0 40 . 40 TRP A CA 1.0 ? 0.809721 0.6845177 0.58095104 1 326 -ATOM C C . TRP A 0 40 . 40 TRP A C 1.0 ? 0.24329413 0.9988058 0.3829077 1 327 -ATOM O O . TRP A 0 40 . 40 TRP A O 1.0 ? 0.35462514 0.2625621 0.91682655 1 328 -ATOM C CB . TRP A 0 40 . 40 TRP A CB 1.0 ? 0.5050167 0.67647064 0.114436194 1 329 -ATOM C CG . TRP A 0 40 . 40 TRP A CG 1.0 ? 0.5792685 0.24906644 0.65562356 1 330 -ATOM C CD1 . TRP A 0 40 . 40 TRP A CD1 1.0 ? 0.66754895 0.3524946 0.9716206 1 331 -ATOM C CD2 . TRP A 0 40 . 40 TRP A CD2 1.0 ? 0.1201437 0.8924077 0.67834395 1 332 -ATOM N NE1 . TRP A 0 40 . 40 TRP A NE1 1.0 ? 0.3492674 0.551546 0.5727535 1 333 -ATOM C CE2 . TRP A 0 40 . 40 TRP A CE2 1.0 ? 0.04237266 0.15520014 0.2996461 1 334 -ATOM C CE3 . TRP A 0 40 . 40 TRP A CE3 1.0 ? 0.46351382 0.15941851 0.16332635 1 335 -ATOM C CZ2 . TRP A 0 40 . 40 TRP A CZ2 1.0 ? 0.5336771 0.008008702 0.9555253 1 336 -ATOM C CZ3 . TRP A 0 40 . 40 TRP A CZ3 1.0 ? 0.18949388 0.44124085 0.508646 1 337 -ATOM C CH2 . TRP A 0 40 . 40 TRP A CH2 1.0 ? 0.72099763 0.05249567 0.04077344 1 338 -ATOM N N . PRO A 0 41 . 41 PRO A N 1.0 ? 0.19454743 0.57041323 0.9544409 1 339 -ATOM C CA . PRO A 0 41 . 41 PRO A CA 1.0 ? 0.6506624 0.96393573 0.69232154 1 340 -ATOM C C . PRO A 0 41 . 41 PRO A C 1.0 ? 0.4431386 0.8749864 0.9342974 1 341 -ATOM O O . PRO A 0 41 . 41 PRO A O 1.0 ? 0.8908653 0.7035609 0.6911605 1 342 -ATOM C CB . PRO A 0 41 . 41 PRO A CB 1.0 ? 0.27942178 0.6961869 0.8045682 1 343 -ATOM C CG . PRO A 0 41 . 41 PRO A CG 1.0 ? 0.15312766 0.3662034 0.89172906 1 344 -ATOM C CD . PRO A 0 41 . 41 PRO A CD 1.0 ? 0.2834223 0.2624727 0.1651265 1 345 -ATOM N N . PHE A 0 42 . 42 PHE A N 1.0 ? 0.3783623 0.7897705 0.43480834 1 346 -ATOM C CA . PHE A 0 42 . 42 PHE A CA 1.0 ? 0.3926801 0.853817 0.50684726 1 347 -ATOM C C . PHE A 0 42 . 42 PHE A C 1.0 ? 0.73155814 0.68620074 0.11165136 1 348 -ATOM O O . PHE A 0 42 . 42 PHE A O 1.0 ? 0.5351773 0.9263497 0.47025672 1 349 -ATOM C CB . PHE A 0 42 . 42 PHE A CB 1.0 ? 0.56616986 0.52029574 0.116331436 1 350 -ATOM C CG . PHE A 0 42 . 42 PHE A CG 1.0 ? 0.6274365 0.39049104 0.7782437 1 351 -ATOM C CD1 . PHE A 0 42 . 42 PHE A CD1 1.0 ? 0.48680526 0.9125988 0.09513958 1 352 -ATOM C CD2 . PHE A 0 42 . 42 PHE A CD2 1.0 ? 0.4682083 0.24072623 0.35675687 1 353 -ATOM C CE1 . PHE A 0 42 . 42 PHE A CE1 1.0 ? 0.22253762 0.5965237 0.4112068 1 354 -ATOM C CE2 . PHE A 0 42 . 42 PHE A CE2 1.0 ? 0.20632601 0.561948 0.19475447 1 355 -ATOM C CZ . PHE A 0 42 . 42 PHE A CZ 1.0 ? 0.0701673 0.03420465 0.36238393 1 356 -ATOM N N . GLN A 0 43 . 43 GLN A N 1.0 ? 0.68273515 0.10084579 0.6850231 1 357 -ATOM C CA . GLN A 0 43 . 43 GLN A CA 1.0 ? 0.667874 0.27110884 0.32278103 1 358 -ATOM C C . GLN A 0 43 . 43 GLN A C 1.0 ? 0.0013632654 0.46050268 0.1506054 1 359 -ATOM O O . GLN A 0 43 . 43 GLN A O 1.0 ? 0.77324444 0.39482614 0.6861698 1 360 -ATOM C CB . GLN A 0 43 . 43 GLN A CB 1.0 ? 0.37225193 0.5460526 0.25034234 1 361 -ATOM C CG . GLN A 0 43 . 43 GLN A CG 1.0 ? 0.2121051 0.02285932 0.026446385 1 362 -ATOM C CD . GLN A 0 43 . 43 GLN A CD 1.0 ? 0.5200893 0.9776999 0.7087875 1 363 -ATOM O OE1 . GLN A 0 43 . 43 GLN A OE1 1.0 ? 0.34084055 0.31931698 0.29577824 1 364 -ATOM N NE2 . GLN A 0 43 . 43 GLN A NE2 1.0 ? 0.20554468 0.41193882 0.49367467 1 365 -ATOM N N . GLN A 0 44 . 44 GLN A N 1.0 ? 0.898173 0.58473605 0.28730813 1 366 -ATOM C CA . GLN A 0 44 . 44 GLN A CA 1.0 ? 0.9712175 0.35761657 0.4122674 1 367 -ATOM C C . GLN A 0 44 . 44 GLN A C 1.0 ? 0.7774812 0.18178388 0.68586576 1 368 -ATOM O O . GLN A 0 44 . 44 GLN A O 1.0 ? 0.1646274 0.2238168 0.92106116 1 369 -ATOM C CB . GLN A 0 44 . 44 GLN A CB 1.0 ? 0.58019793 0.5133843 0.01489518 1 370 -ATOM C CG . GLN A 0 44 . 44 GLN A CG 1.0 ? 0.55293626 0.29523748 0.6011672 1 371 -ATOM C CD . GLN A 0 44 . 44 GLN A CD 1.0 ? 0.77074534 0.20868269 0.1433928 1 372 -ATOM O OE1 . GLN A 0 44 . 44 GLN A OE1 1.0 ? 0.7656789 0.46189165 0.11411065 1 373 -ATOM N NE2 . GLN A 0 44 . 44 GLN A NE2 1.0 ? 0.6504808 0.20377024 0.8955777 1 374 -ATOM N N . PRO A 0 45 . 45 PRO A N 1.0 ? 0.38310406 0.697492 0.28692868 1 375 -ATOM C CA . PRO A 0 45 . 45 PRO A CA 1.0 ? 0.9722227 0.32245997 0.2525886 1 376 -ATOM C C . PRO A 0 45 . 45 PRO A C 1.0 ? 0.9108143 0.53121036 0.9157529 1 377 -ATOM O O . PRO A 0 45 . 45 PRO A O 1.0 ? 0.92010844 0.4006269 0.81097895 1 378 -ATOM C CB . PRO A 0 45 . 45 PRO A CB 1.0 ? 0.2132886 0.79364824 0.55160093 1 379 -ATOM C CG . PRO A 0 45 . 45 PRO A CG 1.0 ? 0.056237273 0.3729544 0.53156704 1 380 -ATOM C CD . PRO A 0 45 . 45 PRO A CD 1.0 ? 0.9948235 0.13471238 0.5598238 1 381 -ATOM N N . VAL A 0 46 . 46 VAL A N 1.0 ? 0.8450263 0.07387933 0.19737151 1 382 -ATOM C CA . VAL A 0 46 . 46 VAL A CA 1.0 ? 0.87547433 0.8105741 0.000982687 1 383 -ATOM C C . VAL A 0 46 . 46 VAL A C 1.0 ? 0.43693987 0.7547549 0.68130934 1 384 -ATOM O O . VAL A 0 46 . 46 VAL A O 1.0 ? 0.17694262 0.93247986 0.22165586 1 385 -ATOM C CB . VAL A 0 46 . 46 VAL A CB 1.0 ? 0.099979416 0.24973628 0.25027823 1 386 -ATOM C CG1 . VAL A 0 46 . 46 VAL A CG1 1.0 ? 0.75629526 0.089619905 0.25832835 1 387 -ATOM C CG2 . VAL A 0 46 . 46 VAL A CG2 1.0 ? 0.3822381 0.6217628 0.58674234 1 388 -ATOM N N . ASP A 0 47 . 47 ASP A N 1.0 ? 0.6788925 0.9093209 0.34522125 1 389 -ATOM C CA . ASP A 0 47 . 47 ASP A CA 1.0 ? 0.5890678 0.12449815 0.06177147 1 390 -ATOM C C . ASP A 0 47 . 47 ASP A C 1.0 ? 0.8268594 0.5678214 0.2152193 1 391 -ATOM O O . ASP A 0 47 . 47 ASP A O 1.0 ? 0.79475945 0.12263146 0.23967016 1 392 -ATOM C CB . ASP A 0 47 . 47 ASP A CB 1.0 ? 0.19480489 0.038643777 0.05563597 1 393 -ATOM C CG . ASP A 0 47 . 47 ASP A CG 1.0 ? 0.41991577 0.90646225 0.9150677 1 394 -ATOM O OD1 . ASP A 0 47 . 47 ASP A OD1 1.0 ? 0.26781636 0.94036305 0.009159149 1 395 -ATOM O OD2 . ASP A 0 47 . 47 ASP A OD2 1.0 ? 0.12321646 0.7538334 0.33868995 1 396 -ATOM N N . ALA A 0 48 . 48 ALA A N 1.0 ? 0.3265395 0.550754 0.49227074 1 397 -ATOM C CA . ALA A 0 48 . 48 ALA A CA 1.0 ? 0.83506155 0.47559667 0.89547706 1 398 -ATOM C C . ALA A 0 48 . 48 ALA A C 1.0 ? 0.24771407 0.71267295 0.34179804 1 399 -ATOM O O . ALA A 0 48 . 48 ALA A O 1.0 ? 0.42571288 0.8026257 0.30625835 1 400 -ATOM C CB . ALA A 0 48 . 48 ALA A CB 1.0 ? 0.7027855 0.15282258 0.36189175 1 401 -ATOM N N . VAL A 0 49 . 49 VAL A N 1.0 ? 0.12749529 0.1779846 0.047657162 1 402 -ATOM C CA . VAL A 0 49 . 49 VAL A CA 1.0 ? 0.2852444 0.8111603 0.6375006 1 403 -ATOM C C . VAL A 0 49 . 49 VAL A C 1.0 ? 0.86525404 0.7326307 0.013440264 1 404 -ATOM O O . VAL A 0 49 . 49 VAL A O 1.0 ? 0.71643156 0.9493907 0.42732236 1 405 -ATOM C CB . VAL A 0 49 . 49 VAL A CB 1.0 ? 0.28902626 0.6003257 0.78393215 1 406 -ATOM C CG1 . VAL A 0 49 . 49 VAL A CG1 1.0 ? 0.55711704 0.5040807 0.1532139 1 407 -ATOM C CG2 . VAL A 0 49 . 49 VAL A CG2 1.0 ? 0.028724032 0.29403707 0.87459034 1 408 -ATOM N N . LYS A 0 50 . 50 LYS A N 1.0 ? 0.15041742 0.64174986 0.9539677 1 409 -ATOM C CA . LYS A 0 50 . 50 LYS A CA 1.0 ? 0.8996654 0.51149637 0.7355024 1 410 -ATOM C C . LYS A 0 50 . 50 LYS A C 1.0 ? 0.34185538 0.08951102 0.7174447 1 411 -ATOM O O . LYS A 0 50 . 50 LYS A O 1.0 ? 0.6129762 0.36294377 0.14702405 1 412 -ATOM C CB . LYS A 0 50 . 50 LYS A CB 1.0 ? 0.48923004 0.43207273 0.5273602 1 413 -ATOM C CG . LYS A 0 50 . 50 LYS A CG 1.0 ? 0.31637177 0.13036416 0.3529231 1 414 -ATOM C CD . LYS A 0 50 . 50 LYS A CD 1.0 ? 0.20196363 0.087143704 0.32418284 1 415 -ATOM C CE . LYS A 0 50 . 50 LYS A CE 1.0 ? 0.9895802 0.897067 0.1891967 1 416 -ATOM N NZ . LYS A 0 50 . 50 LYS A NZ 1.0 +1 0.25250864 0.8129167 0.8180016 1 417 -ATOM N N . LEU A 0 51 . 51 LEU A N 1.0 ? 0.6731959 0.6007789 0.9175959 1 418 -ATOM C CA . LEU A 0 51 . 51 LEU A CA 1.0 ? 0.23953758 0.69263124 0.049645927 1 419 -ATOM C C . LEU A 0 51 . 51 LEU A C 1.0 ? 0.17629163 0.10285572 0.37781242 1 420 -ATOM O O . LEU A 0 51 . 51 LEU A O 1.0 ? 0.6574921 0.97384006 0.43010762 1 421 -ATOM C CB . LEU A 0 51 . 51 LEU A CB 1.0 ? 0.25477478 0.81706965 0.77755034 1 422 -ATOM C CG . LEU A 0 51 . 51 LEU A CG 1.0 ? 0.7444515 0.8313177 0.67655224 1 423 -ATOM C CD1 . LEU A 0 51 . 51 LEU A CD1 1.0 ? 0.40218383 0.73413205 0.55250806 1 424 -ATOM C CD2 . LEU A 0 51 . 51 LEU A CD2 1.0 ? 0.39198065 0.86490667 0.9755116 1 425 -ATOM N N . ASN A 0 52 . 52 ASN A N 1.0 ? 0.2414945 0.47203758 0.75218296 1 426 -ATOM C CA . ASN A 0 52 . 52 ASN A CA 1.0 ? 0.2563783 0.61133546 0.78602546 1 427 -ATOM C C . ASN A 0 52 . 52 ASN A C 1.0 ? 0.96105325 0.4597222 0.12901972 1 428 -ATOM O O . ASN A 0 52 . 52 ASN A O 1.0 ? 0.3932744 0.61728877 0.51243794 1 429 -ATOM C CB . ASN A 0 52 . 52 ASN A CB 1.0 ? 0.40910214 0.48209307 0.12873648 1 430 -ATOM C CG . ASN A 0 52 . 52 ASN A CG 1.0 ? 0.28001553 0.87434286 0.98310614 1 431 -ATOM O OD1 . ASN A 0 52 . 52 ASN A OD1 1.0 ? 0.75717556 0.78281814 0.9520584 1 432 -ATOM N ND2 . ASN A 0 52 . 52 ASN A ND2 1.0 ? 0.37309688 0.63862413 0.090961136 1 433 -ATOM N N . LEU A 0 53 . 53 LEU A N 1.0 ? 0.8942717 0.2619221 0.08328475 1 434 -ATOM C CA . LEU A 0 53 . 53 LEU A CA 1.0 ? 0.4682801 0.98318464 0.4306253 1 435 -ATOM C C . LEU A 0 53 . 53 LEU A C 1.0 ? 0.73690325 0.9810117 0.023892913 1 436 -ATOM O O . LEU A 0 53 . 53 LEU A O 1.0 ? 0.13865861 0.09336469 0.5541753 1 437 -ATOM C CB . LEU A 0 53 . 53 LEU A CB 1.0 ? 0.6756664 0.43402842 0.5601464 1 438 -ATOM C CG . LEU A 0 53 . 53 LEU A CG 1.0 ? 0.607142 0.23738 0.29649872 1 439 -ATOM C CD1 . LEU A 0 53 . 53 LEU A CD1 1.0 ? 0.5278645 0.3127118 0.07962155 1 440 -ATOM C CD2 . LEU A 0 53 . 53 LEU A CD2 1.0 ? 0.9443095 0.9778153 0.5214988 1 441 -ATOM N N . PRO A 0 54 . 54 PRO A N 1.0 ? 0.11378 0.16686663 0.9626173 1 442 -ATOM C CA . PRO A 0 54 . 54 PRO A CA 1.0 ? 0.35484245 0.68859625 0.30588633 1 443 -ATOM C C . PRO A 0 54 . 54 PRO A C 1.0 ? 0.79584175 0.8332081 0.3134758 1 444 -ATOM O O . PRO A 0 54 . 54 PRO A O 1.0 ? 0.43976074 0.8686681 0.37198102 1 445 -ATOM C CB . PRO A 0 54 . 54 PRO A CB 1.0 ? 0.75398505 0.49161053 0.23245162 1 446 -ATOM C CG . PRO A 0 54 . 54 PRO A CG 1.0 ? 0.893446 0.62920696 0.29139066 1 447 -ATOM C CD . PRO A 0 54 . 54 PRO A CD 1.0 ? 0.1310129 0.8423899 0.33263823 1 448 -ATOM N N . ASP A 0 55 . 55 ASP A N 1.0 ? 0.09705027 0.49294725 0.9568137 1 449 -ATOM C CA . ASP A 0 55 . 55 ASP A CA 1.0 ? 0.31828985 0.7542209 0.67768025 1 450 -ATOM C C . ASP A 0 55 . 55 ASP A C 1.0 ? 0.3520471 0.8330337 0.24441315 1 451 -ATOM O O . ASP A 0 55 . 55 ASP A O 1.0 ? 0.19848327 0.50499994 0.2367969 1 452 -ATOM C CB . ASP A 0 55 . 55 ASP A CB 1.0 ? 0.7246686 0.43497664 0.27351695 1 453 -ATOM C CG . ASP A 0 55 . 55 ASP A CG 1.0 ? 0.4060561 0.32330722 0.1691336 1 454 -ATOM O OD1 . ASP A 0 55 . 55 ASP A OD1 1.0 ? 0.47426465 0.15464392 0.48247936 1 455 -ATOM O OD2 . ASP A 0 55 . 55 ASP A OD2 1.0 ? 0.60974556 0.7960627 0.51893693 1 456 -ATOM N N . TYR A 0 56 . 56 TYR A N 1.0 ? 0.6409492 0.5361827 0.13610755 1 457 -ATOM C CA . TYR A 0 56 . 56 TYR A CA 1.0 ? 0.023078844 0.52853984 0.594421 1 458 -ATOM C C . TYR A 0 56 . 56 TYR A C 1.0 ? 0.73059016 0.20889525 0.7104627 1 459 -ATOM O O . TYR A 0 56 . 56 TYR A O 1.0 ? 0.7269741 0.854606 0.090026565 1 460 -ATOM C CB . TYR A 0 56 . 56 TYR A CB 1.0 ? 0.99583536 0.6845918 0.8429558 1 461 -ATOM C CG . TYR A 0 56 . 56 TYR A CG 1.0 ? 0.69592273 0.7723455 0.2663198 1 462 -ATOM C CD1 . TYR A 0 56 . 56 TYR A CD1 1.0 ? 0.67809844 0.17919098 0.46456614 1 463 -ATOM C CD2 . TYR A 0 56 . 56 TYR A CD2 1.0 ? 0.6683967 0.20797648 0.10679155 1 464 -ATOM C CE1 . TYR A 0 56 . 56 TYR A CE1 1.0 ? 0.9883188 0.22850357 0.08986122 1 465 -ATOM C CE2 . TYR A 0 56 . 56 TYR A CE2 1.0 ? 0.5817833 0.019657798 0.13740902 1 466 -ATOM C CZ . TYR A 0 56 . 56 TYR A CZ 1.0 ? 0.7162707 0.8418105 0.5105862 1 467 -ATOM O OH . TYR A 0 56 . 56 TYR A OH 1.0 ? 0.3735214 0.9497863 0.32555947 1 468 -ATOM N N . TYR A 0 57 . 57 TYR A N 1.0 ? 0.023466997 0.10538969 0.8829781 1 469 -ATOM C CA . TYR A 0 57 . 57 TYR A CA 1.0 ? 0.8734042 0.99982506 0.39238128 1 470 -ATOM C C . TYR A 0 57 . 57 TYR A C 1.0 ? 0.4364223 0.79452866 0.72679955 1 471 -ATOM O O . TYR A 0 57 . 57 TYR A O 1.0 ? 0.12248909 0.3724624 0.15371205 1 472 -ATOM C CB . TYR A 0 57 . 57 TYR A CB 1.0 ? 0.26402193 0.0485551 0.14155975 1 473 -ATOM C CG . TYR A 0 57 . 57 TYR A CG 1.0 ? 0.0825966 0.011951983 0.54967594 1 474 -ATOM C CD1 . TYR A 0 57 . 57 TYR A CD1 1.0 ? 0.47458884 0.5963146 0.27306435 1 475 -ATOM C CD2 . TYR A 0 57 . 57 TYR A CD2 1.0 ? 0.24296688 0.07430506 0.92682666 1 476 -ATOM C CE1 . TYR A 0 57 . 57 TYR A CE1 1.0 ? 0.07317685 0.41076773 0.016686127 1 477 -ATOM C CE2 . TYR A 0 57 . 57 TYR A CE2 1.0 ? 0.13635196 0.5781948 0.88886976 1 478 -ATOM C CZ . TYR A 0 57 . 57 TYR A CZ 1.0 ? 0.549576 0.046199955 0.09320808 1 479 -ATOM O OH . TYR A 0 57 . 57 TYR A OH 1.0 ? 0.69150263 0.96249306 0.29961255 1 480 -ATOM N N . LYS A 0 58 . 58 LYS A N 1.0 ? 0.94361424 0.5768941 0.12245171 1 481 -ATOM C CA . LYS A 0 58 . 58 LYS A CA 1.0 ? 0.011617245 0.67664725 0.65906113 1 482 -ATOM C C . LYS A 0 58 . 58 LYS A C 1.0 ? 0.686437 0.082245134 0.32738096 1 483 -ATOM O O . LYS A 0 58 . 58 LYS A O 1.0 ? 0.8154508 0.7771073 0.16270474 1 484 -ATOM C CB . LYS A 0 58 . 58 LYS A CB 1.0 ? 0.8200214 0.7773976 0.59146714 1 485 -ATOM C CG . LYS A 0 58 . 58 LYS A CG 1.0 ? 0.7049223 0.13847801 0.8280881 1 486 -ATOM C CD . LYS A 0 58 . 58 LYS A CD 1.0 ? 0.73141944 0.7507519 0.39907303 1 487 -ATOM C CE . LYS A 0 58 . 58 LYS A CE 1.0 ? 0.94238377 0.39166385 0.33839616 1 488 -ATOM N NZ . LYS A 0 58 . 58 LYS A NZ 1.0 +1 0.31109345 0.35899767 0.09597053 1 489 -ATOM N N . ILE A 0 59 . 59 ILE A N 1.0 ? 0.122641124 0.20786412 0.4760451 1 490 -ATOM C CA . ILE A 0 59 . 59 ILE A CA 1.0 ? 0.4759011 0.38828 0.954414 1 491 -ATOM C C . ILE A 0 59 . 59 ILE A C 1.0 ? 0.8142643 0.81461555 0.5940152 1 492 -ATOM O O . ILE A 0 59 . 59 ILE A O 1.0 ? 0.48675612 0.34838444 0.93186426 1 493 -ATOM C CB . ILE A 0 59 . 59 ILE A CB 1.0 ? 0.32384887 0.46244892 0.5481371 1 494 -ATOM C CG1 . ILE A 0 59 . 59 ILE A CG1 1.0 ? 0.61914474 0.39488813 0.69284356 1 495 -ATOM C CG2 . ILE A 0 59 . 59 ILE A CG2 1.0 ? 0.328423 0.664769 0.21947087 1 496 -ATOM C CD1 . ILE A 0 59 . 59 ILE A CD1 1.0 ? 0.74882305 0.95536745 0.16748987 1 497 -ATOM N N . ILE A 0 60 . 60 ILE A N 1.0 ? 0.15950449 0.119161256 0.0054301424 1 498 -ATOM C CA . ILE A 0 60 . 60 ILE A CA 1.0 ? 0.5440776 0.9608999 0.21629007 1 499 -ATOM C C . ILE A 0 60 . 60 ILE A C 1.0 ? 0.9251559 0.48811427 0.34168687 1 500 -ATOM O O . ILE A 0 60 . 60 ILE A O 1.0 ? 0.88012916 0.9117337 0.108511694 1 501 -ATOM C CB . ILE A 0 60 . 60 ILE A CB 1.0 ? 0.6073841 0.013566251 0.45488393 1 502 -ATOM C CG1 . ILE A 0 60 . 60 ILE A CG1 1.0 ? 0.44930798 0.10914841 0.497312 1 503 -ATOM C CG2 . ILE A 0 60 . 60 ILE A CG2 1.0 ? 0.9013595 0.5590514 0.46300584 1 504 -ATOM C CD1 . ILE A 0 60 . 60 ILE A CD1 1.0 ? 0.26722455 0.35695934 0.954152 1 505 -ATOM N N . LYS A 0 61 . 61 LYS A N 1.0 ? 0.09274885 0.7857217 0.040294316 1 506 -ATOM C CA . LYS A 0 61 . 61 LYS A CA 1.0 ? 0.6369079 0.53161037 0.036455337 1 507 -ATOM C C . LYS A 0 61 . 61 LYS A C 1.0 ? 0.4476412 0.3379368 0.88069594 1 508 -ATOM O O . LYS A 0 61 . 61 LYS A O 1.0 ? 0.040578883 0.86321986 0.88739276 1 509 -ATOM C CB . LYS A 0 61 . 61 LYS A CB 1.0 ? 0.79294187 0.83316404 0.6113975 1 510 -ATOM C CG . LYS A 0 61 . 61 LYS A CG 1.0 ? 0.25506783 0.38745213 0.82342404 1 511 -ATOM C CD . LYS A 0 61 . 61 LYS A CD 1.0 ? 0.34154007 0.42303824 0.50844675 1 512 -ATOM C CE . LYS A 0 61 . 61 LYS A CE 1.0 ? 0.0076498836 0.2547723 0.31690186 1 513 -ATOM N NZ . LYS A 0 61 . 61 LYS A NZ 1.0 +1 0.16688202 0.8763437 0.61538875 1 514 -ATOM N N . THR A 0 62 . 62 THR A N 1.0 ? 0.111484125 0.26447135 0.28089914 1 515 -ATOM C CA . THR A 0 62 . 62 THR A CA 1.0 ? 0.61233956 0.039582487 0.2798445 1 516 -ATOM C C . THR A 0 62 . 62 THR A C 1.0 ? 0.91885406 0.33692163 0.6966041 1 517 -ATOM O O . THR A 0 62 . 62 THR A O 1.0 ? 0.7040226 0.50263804 0.32240143 1 518 -ATOM C CB . THR A 0 62 . 62 THR A CB 1.0 ? 0.43615857 0.5144568 0.045439247 1 519 -ATOM O OG1 . THR A 0 62 . 62 THR A OG1 1.0 ? 0.54375494 0.4116246 0.59021336 1 520 -ATOM C CG2 . THR A 0 62 . 62 THR A CG2 1.0 ? 0.6602648 0.67843974 0.78442675 1 521 -ATOM N N . PRO A 0 63 . 63 PRO A N 1.0 ? 0.7224034 0.7029764 0.75701874 1 522 -ATOM C CA . PRO A 0 63 . 63 PRO A CA 1.0 ? 0.096295536 0.65421915 0.53805536 1 523 -ATOM C C . PRO A 0 63 . 63 PRO A C 1.0 ? 0.7340098 0.61646134 0.39382687 1 524 -ATOM O O . PRO A 0 63 . 63 PRO A O 1.0 ? 0.62126327 0.3861263 0.8182773 1 525 -ATOM C CB . PRO A 0 63 . 63 PRO A CB 1.0 ? 0.4503352 0.13470727 0.33131003 1 526 -ATOM C CG . PRO A 0 63 . 63 PRO A CG 1.0 ? 0.8663621 0.24036156 0.7686692 1 527 -ATOM C CD . PRO A 0 63 . 63 PRO A CD 1.0 ? 0.5198848 0.22015809 0.27891082 1 528 -ATOM N N . MET A 0 64 . 64 MET A N 1.0 ? 0.99069095 0.25104252 0.795737 1 529 -ATOM C CA . MET A 0 64 . 64 MET A CA 1.0 ? 0.28351858 0.93372077 0.6360407 1 530 -ATOM C C . MET A 0 64 . 64 MET A C 1.0 ? 0.43163052 0.12144192 0.056580547 1 531 -ATOM O O . MET A 0 64 . 64 MET A O 1.0 ? 0.19046421 0.9856933 0.48452753 1 532 -ATOM C CB . MET A 0 64 . 64 MET A CB 1.0 ? 0.16432363 0.81963754 0.69636524 1 533 -ATOM C CG . MET A 0 64 . 64 MET A CG 1.0 ? 0.94909 0.40869218 0.667828 1 534 -ATOM S SD . MET A 0 64 . 64 MET A SD 1.0 ? 0.60204595 0.55095273 0.3734191 1 535 -ATOM C CE . MET A 0 64 . 64 MET A CE 1.0 ? 0.047459867 0.2503394 0.24858496 1 536 -ATOM N N . ASP A 0 65 . 65 ASP A N 1.0 ? 0.17134276 0.16688795 0.9644167 1 537 -ATOM C CA . ASP A 0 65 . 65 ASP A CA 1.0 ? 0.6689125 0.26743177 0.3778292 1 538 -ATOM C C . ASP A 0 65 . 65 ASP A C 1.0 ? 0.8007674 0.5838379 0.79032075 1 539 -ATOM O O . ASP A 0 65 . 65 ASP A O 1.0 ? 0.93917406 0.6937513 0.17304507 1 540 -ATOM C CB . ASP A 0 65 . 65 ASP A CB 1.0 ? 0.3040988 0.9615324 0.22830935 1 541 -ATOM C CG . ASP A 0 65 . 65 ASP A CG 1.0 ? 0.28072062 0.75160897 0.78040606 1 542 -ATOM O OD1 . ASP A 0 65 . 65 ASP A OD1 1.0 ? 0.28110743 0.80842847 0.5429227 1 543 -ATOM O OD2 . ASP A 0 65 . 65 ASP A OD2 1.0 ? 0.21500345 0.6873656 0.2025152 1 544 -ATOM N N . MET A 0 66 . 66 MET A N 1.0 ? 0.45162302 0.9858169 0.82763153 1 545 -ATOM C CA . MET A 0 66 . 66 MET A CA 1.0 ? 0.34252498 0.17872766 0.70649064 1 546 -ATOM C C . MET A 0 66 . 66 MET A C 1.0 ? 0.055838082 0.93737465 0.09025974 1 547 -ATOM O O . MET A 0 66 . 66 MET A O 1.0 ? 0.35469016 0.9048231 0.49342477 1 548 -ATOM C CB . MET A 0 66 . 66 MET A CB 1.0 ? 0.61339504 0.19498847 0.17699663 1 549 -ATOM C CG . MET A 0 66 . 66 MET A CG 1.0 ? 0.027315842 0.12026664 0.796323 1 550 -ATOM S SD . MET A 0 66 . 66 MET A SD 1.0 ? 0.13672255 0.98232543 0.33437517 1 551 -ATOM C CE . MET A 0 66 . 66 MET A CE 1.0 ? 0.22109713 0.76097137 0.36731312 1 552 -ATOM N N . GLY A 0 67 . 67 GLY A N 1.0 ? 0.74043584 0.31166017 0.4036785 1 553 -ATOM C CA . GLY A 0 67 . 67 GLY A CA 1.0 ? 0.19061248 0.7993525 0.3181992 1 554 -ATOM C C . GLY A 0 67 . 67 GLY A C 1.0 ? 0.5630007 0.6327077 0.30474848 1 555 -ATOM O O . GLY A 0 67 . 67 GLY A O 1.0 ? 0.7280966 0.9931412 0.8449017 1 556 -ATOM N N . THR A 0 68 . 68 THR A N 1.0 ? 0.4388759 0.8347357 0.43655232 1 557 -ATOM C CA . THR A 0 68 . 68 THR A CA 1.0 ? 0.029422728 0.43730855 0.84775424 1 558 -ATOM C C . THR A 0 68 . 68 THR A C 1.0 ? 0.09031379 0.029354818 0.388932 1 559 -ATOM O O . THR A 0 68 . 68 THR A O 1.0 ? 0.8916971 0.6675028 0.8270406 1 560 -ATOM C CB . THR A 0 68 . 68 THR A CB 1.0 ? 0.18174726 0.46258342 0.36372605 1 561 -ATOM O OG1 . THR A 0 68 . 68 THR A OG1 1.0 ? 0.96664995 0.62396973 0.203084 1 562 -ATOM C CG2 . THR A 0 68 . 68 THR A CG2 1.0 ? 0.022301322 0.28717744 0.41452292 1 563 -ATOM N N . ILE A 0 69 . 69 ILE A N 1.0 ? 0.11306666 0.22546692 0.2530735 1 564 -ATOM C CA . ILE A 0 69 . 69 ILE A CA 1.0 ? 0.35706502 0.105216034 0.46246397 1 565 -ATOM C C . ILE A 0 69 . 69 ILE A C 1.0 ? 0.2334002 0.9270634 0.69207454 1 566 -ATOM O O . ILE A 0 69 . 69 ILE A O 1.0 ? 0.055811718 0.87246674 0.73542076 1 567 -ATOM C CB . ILE A 0 69 . 69 ILE A CB 1.0 ? 0.9301793 0.9703641 0.24505447 1 568 -ATOM C CG1 . ILE A 0 69 . 69 ILE A CG1 1.0 ? 0.9519709 0.0926876 0.812829 1 569 -ATOM C CG2 . ILE A 0 69 . 69 ILE A CG2 1.0 ? 0.7269124 0.89564085 0.42498177 1 570 -ATOM C CD1 . ILE A 0 69 . 69 ILE A CD1 1.0 ? 0.4363302 0.34045455 0.27048227 1 571 -ATOM N N . LYS A 0 70 . 70 LYS A N 1.0 ? 0.3652729 0.17399035 0.056237765 1 572 -ATOM C CA . LYS A 0 70 . 70 LYS A CA 1.0 ? 0.4438551 0.2729624 0.56482464 1 573 -ATOM C C . LYS A 0 70 . 70 LYS A C 1.0 ? 0.9491877 0.20417556 0.93647176 1 574 -ATOM O O . LYS A 0 70 . 70 LYS A O 1.0 ? 0.4061128 0.08413371 0.34985718 1 575 -ATOM C CB . LYS A 0 70 . 70 LYS A CB 1.0 ? 0.8073384 0.997006 0.17721182 1 576 -ATOM C CG . LYS A 0 70 . 70 LYS A CG 1.0 ? 0.98681515 0.64565647 0.7076682 1 577 -ATOM C CD . LYS A 0 70 . 70 LYS A CD 1.0 ? 0.58779836 0.7315315 0.25944945 1 578 -ATOM C CE . LYS A 0 70 . 70 LYS A CE 1.0 ? 0.41647476 0.39283016 0.045054294 1 579 -ATOM N NZ . LYS A 0 70 . 70 LYS A NZ 1.0 +1 0.69617444 0.38699904 0.32574785 1 580 -ATOM N N . LYS A 0 71 . 71 LYS A N 1.0 ? 0.05742786 0.040793117 0.3429962 1 581 -ATOM C CA . LYS A 0 71 . 71 LYS A CA 1.0 ? 0.73280025 0.5775938 0.85264856 1 582 -ATOM C C . LYS A 0 71 . 71 LYS A C 1.0 ? 0.6996785 0.6799556 0.34990925 1 583 -ATOM O O . LYS A 0 71 . 71 LYS A O 1.0 ? 0.081667505 0.1617325 0.039382115 1 584 -ATOM C CB . LYS A 0 71 . 71 LYS A CB 1.0 ? 0.0502288 0.9529576 0.32988906 1 585 -ATOM C CG . LYS A 0 71 . 71 LYS A CG 1.0 ? 0.23538776 0.4382504 0.34315535 1 586 -ATOM C CD . LYS A 0 71 . 71 LYS A CD 1.0 ? 0.6592717 0.4294995 0.11019686 1 587 -ATOM C CE . LYS A 0 71 . 71 LYS A CE 1.0 ? 0.22525492 0.15250418 0.09044971 1 588 -ATOM N NZ . LYS A 0 71 . 71 LYS A NZ 1.0 +1 0.17798173 0.88153046 0.91826725 1 589 -ATOM N N . ARG A 0 72 . 72 ARG A N 1.0 ? 0.54406613 0.5424677 0.63879436 1 590 -ATOM C CA . ARG A 0 72 . 72 ARG A CA 1.0 ? 0.22762395 0.10045404 0.6215029 1 591 -ATOM C C . ARG A 0 72 . 72 ARG A C 1.0 ? 0.45063308 0.7447825 0.044959053 1 592 -ATOM O O . ARG A 0 72 . 72 ARG A O 1.0 ? 0.58237374 0.5846809 0.6237454 1 593 -ATOM C CB . ARG A 0 72 . 72 ARG A CB 1.0 ? 0.4897499 0.6112689 0.6828285 1 594 -ATOM C CG . ARG A 0 72 . 72 ARG A CG 1.0 ? 0.58863336 0.48704988 0.35755488 1 595 -ATOM C CD . ARG A 0 72 . 72 ARG A CD 1.0 ? 0.08440903 0.039790012 0.48615786 1 596 -ATOM N NE . ARG A 0 72 . 72 ARG A NE 1.0 ? 0.010483576 0.38979143 0.64597243 1 597 -ATOM C CZ . ARG A 0 72 . 72 ARG A CZ 1.0 ? 0.5362261 0.16776302 0.84590846 1 598 -ATOM N NH1 . ARG A 0 72 . 72 ARG A NH1 1.0 ? 0.76104504 0.9762006 0.9746637 1 599 -ATOM N NH2 . ARG A 0 72 . 72 ARG A NH2 1.0 +1 0.5488172 0.19692363 0.97998 1 600 -ATOM N N . LEU A 0 73 . 73 LEU A N 1.0 ? 0.019932527 0.22762266 0.4728334 1 601 -ATOM C CA . LEU A 0 73 . 73 LEU A CA 1.0 ? 0.84814054 0.88631725 0.15730233 1 602 -ATOM C C . LEU A 0 73 . 73 LEU A C 1.0 ? 0.4777981 0.4499435 0.5014301 1 603 -ATOM O O . LEU A 0 73 . 73 LEU A O 1.0 ? 0.8000935 0.41834512 0.90236557 1 604 -ATOM C CB . LEU A 0 73 . 73 LEU A CB 1.0 ? 0.90248793 0.82319105 0.24585469 1 605 -ATOM C CG . LEU A 0 73 . 73 LEU A CG 1.0 ? 0.63442975 0.4745378 0.7797341 1 606 -ATOM C CD1 . LEU A 0 73 . 73 LEU A CD1 1.0 ? 0.07057923 0.17080696 0.73684347 1 607 -ATOM C CD2 . LEU A 0 73 . 73 LEU A CD2 1.0 ? 0.70432675 0.4056411 0.6241655 1 608 -ATOM N N . GLU A 0 74 . 74 GLU A N 1.0 ? 0.7869841 0.68598306 0.76966745 1 609 -ATOM C CA . GLU A 0 74 . 74 GLU A CA 1.0 ? 0.01815502 0.785917 0.91440654 1 610 -ATOM C C . GLU A 0 74 . 74 GLU A C 1.0 ? 0.44069755 0.9602215 0.12017898 1 611 -ATOM O O . GLU A 0 74 . 74 GLU A O 1.0 ? 0.98632395 0.33671921 0.24176203 1 612 -ATOM C CB . GLU A 0 74 . 74 GLU A CB 1.0 ? 0.69509536 0.4549385 0.8398811 1 613 -ATOM C CG . GLU A 0 74 . 74 GLU A CG 1.0 ? 0.7480808 0.13565244 0.8369676 1 614 -ATOM C CD . GLU A 0 74 . 74 GLU A CD 1.0 ? 0.15467529 0.55540335 0.07619534 1 615 -ATOM O OE1 . GLU A 0 74 . 74 GLU A OE1 1.0 ? 0.6191937 0.5454651 0.5037758 1 616 -ATOM O OE2 . GLU A 0 74 . 74 GLU A OE2 1.0 ? 0.06201913 0.842046 0.90537465 1 617 -ATOM N N . ASN A 0 75 . 75 ASN A N 1.0 ? 0.88432986 0.058275476 0.28340295 1 618 -ATOM C CA . ASN A 0 75 . 75 ASN A CA 1.0 ? 0.9683867 0.9956996 0.36855182 1 619 -ATOM C C . ASN A 0 75 . 75 ASN A C 1.0 ? 0.7652855 0.23131357 0.115097605 1 620 -ATOM O O . ASN A 0 75 . 75 ASN A O 1.0 ? 0.72414005 0.3183524 0.6554864 1 621 -ATOM C CB . ASN A 0 75 . 75 ASN A CB 1.0 ? 0.15284938 0.83244365 0.082141906 1 622 -ATOM C CG . ASN A 0 75 . 75 ASN A CG 1.0 ? 0.9095807 0.040576905 0.054544855 1 623 -ATOM O OD1 . ASN A 0 75 . 75 ASN A OD1 1.0 ? 0.078435004 0.72755736 0.5939207 1 624 -ATOM N ND2 . ASN A 0 75 . 75 ASN A ND2 1.0 ? 0.82049936 0.25625336 0.7077239 1 625 -ATOM N N . ASN A 0 76 . 76 ASN A N 1.0 ? 0.7683833 0.47528148 0.9426544 1 626 -ATOM C CA . ASN A 0 76 . 76 ASN A CA 1.0 ? 0.3958445 0.8972409 0.5557501 1 627 -ATOM C C . ASN A 0 76 . 76 ASN A C 1.0 ? 0.39610022 0.09616666 0.3098616 1 628 -ATOM O O . ASN A 0 76 . 76 ASN A O 1.0 ? 0.8912652 0.27115795 0.47150052 1 629 -ATOM C CB . ASN A 0 76 . 76 ASN A CB 1.0 ? 0.3508173 0.07111238 0.31172144 1 630 -ATOM C CG . ASN A 0 76 . 76 ASN A CG 1.0 ? 0.29940933 0.062370595 0.25832543 1 631 -ATOM O OD1 . ASN A 0 76 . 76 ASN A OD1 1.0 ? 0.896429 0.6855723 0.00050425273 1 632 -ATOM N ND2 . ASN A 0 76 . 76 ASN A ND2 1.0 ? 0.5348789 0.9252746 0.56960607 1 633 -ATOM N N . TYR A 0 77 . 77 TYR A N 1.0 ? 0.6907893 0.5106046 0.5633073 1 634 -ATOM C CA . TYR A 0 77 . 77 TYR A CA 1.0 ? 0.16048932 0.9077268 0.8226039 1 635 -ATOM C C . TYR A 0 77 . 77 TYR A C 1.0 ? 0.5293217 0.612117 0.26779747 1 636 -ATOM O O . TYR A 0 77 . 77 TYR A O 1.0 ? 0.45323926 0.95417917 0.73776096 1 637 -ATOM C CB . TYR A 0 77 . 77 TYR A CB 1.0 ? 0.42094097 0.05369439 0.58914375 1 638 -ATOM C CG . TYR A 0 77 . 77 TYR A CG 1.0 ? 0.017876755 0.3821119 0.9654781 1 639 -ATOM C CD1 . TYR A 0 77 . 77 TYR A CD1 1.0 ? 0.8893033 0.17777875 0.581484 1 640 -ATOM C CD2 . TYR A 0 77 . 77 TYR A CD2 1.0 ? 0.9168661 0.5394957 0.95481175 1 641 -ATOM C CE1 . TYR A 0 77 . 77 TYR A CE1 1.0 ? 0.2701171 0.04052604 0.106834084 1 642 -ATOM C CE2 . TYR A 0 77 . 77 TYR A CE2 1.0 ? 0.9977724 0.26672313 0.8744105 1 643 -ATOM C CZ . TYR A 0 77 . 77 TYR A CZ 1.0 ? 0.83905655 0.90583366 0.34092638 1 644 -ATOM O OH . TYR A 0 77 . 77 TYR A OH 1.0 ? 0.88062686 0.8106202 0.81060785 1 645 -ATOM N N . TYR A 0 78 . 78 TYR A N 1.0 ? 0.5927302 0.014976075 0.10235968 1 646 -ATOM C CA . TYR A 0 78 . 78 TYR A CA 1.0 ? 0.71935064 0.049455635 0.5681867 1 647 -ATOM C C . TYR A 0 78 . 78 TYR A C 1.0 ? 0.9162174 0.663149 0.3697006 1 648 -ATOM O O . TYR A 0 78 . 78 TYR A O 1.0 ? 0.9825429 0.7280526 0.59062296 1 649 -ATOM C CB . TYR A 0 78 . 78 TYR A CB 1.0 ? 0.5066265 0.7476502 0.36868936 1 650 -ATOM C CG . TYR A 0 78 . 78 TYR A CG 1.0 ? 0.7606362 0.13057402 0.04492329 1 651 -ATOM C CD1 . TYR A 0 78 . 78 TYR A CD1 1.0 ? 0.21641137 0.47000328 0.8549833 1 652 -ATOM C CD2 . TYR A 0 78 . 78 TYR A CD2 1.0 ? 0.9029424 0.6150914 0.7673599 1 653 -ATOM C CE1 . TYR A 0 78 . 78 TYR A CE1 1.0 ? 0.07673269 0.56832254 0.8396492 1 654 -ATOM C CE2 . TYR A 0 78 . 78 TYR A CE2 1.0 ? 0.32929197 0.062119722 0.9459942 1 655 -ATOM C CZ . TYR A 0 78 . 78 TYR A CZ 1.0 ? 0.89149576 0.57478577 0.48854578 1 656 -ATOM O OH . TYR A 0 78 . 78 TYR A OH 1.0 ? 0.21473804 0.5505156 0.1805076 1 657 -ATOM N N . TRP A 0 79 . 79 TRP A N 1.0 ? 0.9975935 0.6728464 0.7276085 1 658 -ATOM C CA . TRP A 0 79 . 79 TRP A CA 1.0 ? 0.05749655 0.32349953 0.570255 1 659 -ATOM C C . TRP A 0 79 . 79 TRP A C 1.0 ? 0.5962234 0.5043512 0.99041855 1 660 -ATOM O O . TRP A 0 79 . 79 TRP A O 1.0 ? 0.6334593 0.93361956 0.1930102 1 661 -ATOM C CB . TRP A 0 79 . 79 TRP A CB 1.0 ? 0.35646415 0.46683624 0.36796817 1 662 -ATOM C CG . TRP A 0 79 . 79 TRP A CG 1.0 ? 0.37785852 0.9348114 0.27387276 1 663 -ATOM C CD1 . TRP A 0 79 . 79 TRP A CD1 1.0 ? 0.66729844 0.69396293 0.6211262 1 664 -ATOM C CD2 . TRP A 0 79 . 79 TRP A CD2 1.0 ? 0.49205074 0.26546577 0.43675706 1 665 -ATOM N NE1 . TRP A 0 79 . 79 TRP A NE1 1.0 ? 0.9653648 0.66586524 0.6358617 1 666 -ATOM C CE2 . TRP A 0 79 . 79 TRP A CE2 1.0 ? 0.8663934 0.8129828 0.5951715 1 667 -ATOM C CE3 . TRP A 0 79 . 79 TRP A CE3 1.0 ? 0.16612113 0.048894715 0.02198138 1 668 -ATOM C CZ2 . TRP A 0 79 . 79 TRP A CZ2 1.0 ? 0.4814995 0.005697186 0.58159184 1 669 -ATOM C CZ3 . TRP A 0 79 . 79 TRP A CZ3 1.0 ? 0.8524935 0.081529595 0.82618856 1 670 -ATOM C CH2 . TRP A 0 79 . 79 TRP A CH2 1.0 ? 0.36955765 0.12510778 0.055062134 1 671 -ATOM N N . ASN A 0 80 . 80 ASN A N 1.0 ? 0.26878136 0.14160852 0.6827199 1 672 -ATOM C CA . ASN A 0 80 . 80 ASN A CA 1.0 ? 0.074581966 0.28372192 0.32812133 1 673 -ATOM C C . ASN A 0 80 . 80 ASN A C 1.0 ? 0.085015565 0.79185736 0.45178342 1 674 -ATOM O O . ASN A 0 80 . 80 ASN A O 1.0 ? 0.7266956 0.35302758 0.030935626 1 675 -ATOM C CB . ASN A 0 80 . 80 ASN A CB 1.0 ? 0.3687143 0.80120504 0.49825665 1 676 -ATOM C CG . ASN A 0 80 . 80 ASN A CG 1.0 ? 0.096359886 0.6481487 0.9439275 1 677 -ATOM O OD1 . ASN A 0 80 . 80 ASN A OD1 1.0 ? 0.8964015 0.87487483 0.64743155 1 678 -ATOM N ND2 . ASN A 0 80 . 80 ASN A ND2 1.0 ? 0.17964427 0.0009992396 0.108265884 1 679 -ATOM N N . ALA A 0 81 . 81 ALA A N 1.0 ? 0.15583147 0.78859454 0.28851515 1 680 -ATOM C CA . ALA A 0 81 . 81 ALA A CA 1.0 ? 0.13663922 0.087701485 0.96907806 1 681 -ATOM C C . ALA A 0 81 . 81 ALA A C 1.0 ? 0.13246986 0.30038434 0.8351847 1 682 -ATOM O O . ALA A 0 81 . 81 ALA A O 1.0 ? 0.66448945 0.017495057 0.23895489 1 683 -ATOM C CB . ALA A 0 81 . 81 ALA A CB 1.0 ? 0.500439 0.010342263 0.24396911 1 684 -ATOM N N . GLN A 0 82 . 82 GLN A N 1.0 ? 0.48634917 0.10592936 0.07506386 1 685 -ATOM C CA . GLN A 0 82 . 82 GLN A CA 1.0 ? 0.8981499 0.8618579 0.11153069 1 686 -ATOM C C . GLN A 0 82 . 82 GLN A C 1.0 ? 0.6517658 0.76446015 0.1376193 1 687 -ATOM O O . GLN A 0 82 . 82 GLN A O 1.0 ? 0.43682286 0.7853506 0.81445557 1 688 -ATOM C CB . GLN A 0 82 . 82 GLN A CB 1.0 ? 0.8658707 0.7519992 0.9879568 1 689 -ATOM C CG . GLN A 0 82 . 82 GLN A CG 1.0 ? 0.4925066 0.760563 0.13433525 1 690 -ATOM C CD . GLN A 0 82 . 82 GLN A CD 1.0 ? 0.2508215 0.85573494 0.42194405 1 691 -ATOM O OE1 . GLN A 0 82 . 82 GLN A OE1 1.0 ? 0.11151997 0.9968289 0.8420484 1 692 -ATOM N NE2 . GLN A 0 82 . 82 GLN A NE2 1.0 ? 0.7758482 0.69239354 0.74775004 1 693 -ATOM N N . GLU A 0 83 . 83 GLU A N 1.0 ? 0.09569844 0.20153315 0.16472277 1 694 -ATOM C CA . GLU A 0 83 . 83 GLU A CA 1.0 ? 0.88845676 0.13557066 0.16713646 1 695 -ATOM C C . GLU A 0 83 . 83 GLU A C 1.0 ? 0.30682534 0.6703361 0.4220723 1 696 -ATOM O O . GLU A 0 83 . 83 GLU A O 1.0 ? 0.7049813 0.38890675 0.34500873 1 697 -ATOM C CB . GLU A 0 83 . 83 GLU A CB 1.0 ? 0.7903351 0.8752745 0.07748801 1 698 -ATOM C CG . GLU A 0 83 . 83 GLU A CG 1.0 ? 0.38450527 0.8352217 0.86479604 1 699 -ATOM C CD . GLU A 0 83 . 83 GLU A CD 1.0 ? 0.09425989 0.6671506 0.16998701 1 700 -ATOM O OE1 . GLU A 0 83 . 83 GLU A OE1 1.0 ? 0.94860214 0.57743573 0.0109985145 1 701 -ATOM O OE2 . GLU A 0 83 . 83 GLU A OE2 1.0 ? 0.5501702 0.56207883 0.6191854 1 702 -ATOM N N . CYS A 0 84 . 84 CYS A N 1.0 ? 0.82526803 0.4591752 0.8051508 1 703 -ATOM C CA . CYS A 0 84 . 84 CYS A CA 1.0 ? 0.9570059 0.63070875 0.43275666 1 704 -ATOM C C . CYS A 0 84 . 84 CYS A C 1.0 ? 0.69621736 0.6797836 0.36407432 1 705 -ATOM O O . CYS A 0 84 . 84 CYS A O 1.0 ? 0.52439255 0.89105475 0.79808956 1 706 -ATOM C CB . CYS A 0 84 . 84 CYS A CB 1.0 ? 0.5218931 0.65331656 0.7085323 1 707 -ATOM S SG . CYS A 0 84 . 84 CYS A SG 1.0 ? 0.7013351 0.2831097 0.08748167 1 708 -ATOM N N . ILE A 0 85 . 85 ILE A N 1.0 ? 0.4092554 0.9902672 0.8953694 1 709 -ATOM C CA . ILE A 0 85 . 85 ILE A CA 1.0 ? 0.21991777 0.6785647 0.86578405 1 710 -ATOM C C . ILE A 0 85 . 85 ILE A C 1.0 ? 0.6650675 0.7617236 0.47315672 1 711 -ATOM O O . ILE A 0 85 . 85 ILE A O 1.0 ? 0.43063253 0.8436934 0.6895484 1 712 -ATOM C CB . ILE A 0 85 . 85 ILE A CB 1.0 ? 0.5521001 0.48205745 0.1856737 1 713 -ATOM C CG1 . ILE A 0 85 . 85 ILE A CG1 1.0 ? 0.7036989 0.34982556 0.94301975 1 714 -ATOM C CG2 . ILE A 0 85 . 85 ILE A CG2 1.0 ? 0.70539236 0.9597139 0.47172 1 715 -ATOM C CD1 . ILE A 0 85 . 85 ILE A CD1 1.0 ? 0.8466955 0.7230946 0.22697482 1 716 -ATOM N N . GLN A 0 86 . 86 GLN A N 1.0 ? 0.8976154 0.78023225 0.17527384 1 717 -ATOM C CA . GLN A 0 86 . 86 GLN A CA 1.0 ? 0.81320626 0.30451006 0.60401934 1 718 -ATOM C C . GLN A 0 86 . 86 GLN A C 1.0 ? 0.0396214 0.03029044 0.6538846 1 719 -ATOM O O . GLN A 0 86 . 86 GLN A O 1.0 ? 0.15556341 0.64768165 0.7752269 1 720 -ATOM C CB . GLN A 0 86 . 86 GLN A CB 1.0 ? 0.83201593 0.2949846 0.28991967 1 721 -ATOM C CG . GLN A 0 86 . 86 GLN A CG 1.0 ? 0.8360643 0.8479897 0.42141718 1 722 -ATOM C CD . GLN A 0 86 . 86 GLN A CD 1.0 ? 0.9204351 0.31067386 0.58475095 1 723 -ATOM O OE1 . GLN A 0 86 . 86 GLN A OE1 1.0 ? 0.22972271 0.34476098 0.61762065 1 724 -ATOM N NE2 . GLN A 0 86 . 86 GLN A NE2 1.0 ? 0.70973045 0.122411594 0.75540906 1 725 -ATOM N N . ASP A 0 87 . 87 ASP A N 1.0 ? 0.67051256 0.40022698 0.72293913 1 726 -ATOM C CA . ASP A 0 87 . 87 ASP A CA 1.0 ? 0.4590393 0.8527803 0.43441555 1 727 -ATOM C C . ASP A 0 87 . 87 ASP A C 1.0 ? 0.99631214 0.53961056 0.2665523 1 728 -ATOM O O . ASP A 0 87 . 87 ASP A O 1.0 ? 0.7978781 0.11527053 0.97469586 1 729 -ATOM C CB . ASP A 0 87 . 87 ASP A CB 1.0 ? 0.50785124 0.25172973 0.22497582 1 730 -ATOM C CG . ASP A 0 87 . 87 ASP A CG 1.0 ? 0.47895315 0.64215255 0.56983304 1 731 -ATOM O OD1 . ASP A 0 87 . 87 ASP A OD1 1.0 ? 0.33215886 0.86974025 0.082429245 1 732 -ATOM O OD2 . ASP A 0 87 . 87 ASP A OD2 1.0 ? 0.6103027 0.66493183 0.32198617 1 733 -ATOM N N . PHE A 0 88 . 88 PHE A N 1.0 ? 0.6813259 0.06574339 0.85820526 1 734 -ATOM C CA . PHE A 0 88 . 88 PHE A CA 1.0 ? 0.51915723 0.45944098 0.32501137 1 735 -ATOM C C . PHE A 0 88 . 88 PHE A C 1.0 ? 0.10136202 0.7287049 0.42238167 1 736 -ATOM O O . PHE A 0 88 . 88 PHE A O 1.0 ? 0.65871423 0.24012426 0.94415927 1 737 -ATOM C CB . PHE A 0 88 . 88 PHE A CB 1.0 ? 0.6352382 0.43655804 0.77311224 1 738 -ATOM C CG . PHE A 0 88 . 88 PHE A CG 1.0 ? 0.427484 0.34206575 0.18826936 1 739 -ATOM C CD1 . PHE A 0 88 . 88 PHE A CD1 1.0 ? 0.9933286 0.1950006 0.08233392 1 740 -ATOM C CD2 . PHE A 0 88 . 88 PHE A CD2 1.0 ? 0.37208557 0.78499776 0.30669695 1 741 -ATOM C CE1 . PHE A 0 88 . 88 PHE A CE1 1.0 ? 0.45146564 0.8768799 0.8158087 1 742 -ATOM C CE2 . PHE A 0 88 . 88 PHE A CE2 1.0 ? 0.69218004 0.32574525 0.59781694 1 743 -ATOM C CZ . PHE A 0 88 . 88 PHE A CZ 1.0 ? 0.6258841 0.10326425 0.9054288 1 744 -ATOM N N . ASN A 0 89 . 89 ASN A N 1.0 ? 0.6274665 0.88618034 0.53004 1 745 -ATOM C CA . ASN A 0 89 . 89 ASN A CA 1.0 ? 0.4642488 0.5271205 0.48127824 1 746 -ATOM C C . ASN A 0 89 . 89 ASN A C 1.0 ? 0.3233532 0.74258333 0.8615474 1 747 -ATOM O O . ASN A 0 89 . 89 ASN A O 1.0 ? 0.25724816 0.051673625 0.61820513 1 748 -ATOM C CB . ASN A 0 89 . 89 ASN A CB 1.0 ? 0.19788706 0.29276678 0.5679581 1 749 -ATOM C CG . ASN A 0 89 . 89 ASN A CG 1.0 ? 0.78795946 0.86620086 0.36986887 1 750 -ATOM O OD1 . ASN A 0 89 . 89 ASN A OD1 1.0 ? 0.34865713 0.16694273 0.1437264 1 751 -ATOM N ND2 . ASN A 0 89 . 89 ASN A ND2 1.0 ? 0.5344207 0.70658624 0.22107202 1 752 -ATOM N N . THR A 0 90 . 90 THR A N 1.0 ? 0.31175175 0.94988286 0.7763237 1 753 -ATOM C CA . THR A 0 90 . 90 THR A CA 1.0 ? 0.8473105 0.5496313 0.38305473 1 754 -ATOM C C . THR A 0 90 . 90 THR A C 1.0 ? 0.6211498 0.29082635 0.6018148 1 755 -ATOM O O . THR A 0 90 . 90 THR A O 1.0 ? 0.5919917 0.8851359 0.90406376 1 756 -ATOM C CB . THR A 0 90 . 90 THR A CB 1.0 ? 0.79089916 0.28369462 0.14225847 1 757 -ATOM O OG1 . THR A 0 90 . 90 THR A OG1 1.0 ? 0.30880874 0.6601424 0.4637643 1 758 -ATOM C CG2 . THR A 0 90 . 90 THR A CG2 1.0 ? 0.545782 0.21566576 0.94990736 1 759 -ATOM N N . MET A 0 91 . 91 MET A N 1.0 ? 0.91240185 0.9630998 0.074748814 1 760 -ATOM C CA . MET A 0 91 . 91 MET A CA 1.0 ? 0.34747887 0.86954296 0.4622336 1 761 -ATOM C C . MET A 0 91 . 91 MET A C 1.0 ? 0.4230438 0.6467935 0.92592007 1 762 -ATOM O O . MET A 0 91 . 91 MET A O 1.0 ? 0.80562973 0.42011467 0.5446169 1 763 -ATOM C CB . MET A 0 91 . 91 MET A CB 1.0 ? 0.5988939 0.21447536 0.8365613 1 764 -ATOM C CG . MET A 0 91 . 91 MET A CG 1.0 ? 0.5017015 0.49588355 0.61256516 1 765 -ATOM S SD . MET A 0 91 . 91 MET A SD 1.0 ? 0.40007693 0.4639986 0.7985277 1 766 -ATOM C CE . MET A 0 91 . 91 MET A CE 1.0 ? 0.26123646 0.4189048 0.20657755 1 767 -ATOM N N . PHE A 0 92 . 92 PHE A N 1.0 ? 0.18508925 0.60791093 0.49999967 1 768 -ATOM C CA . PHE A 0 92 . 92 PHE A CA 1.0 ? 0.3406327 0.1409519 0.2614821 1 769 -ATOM C C . PHE A 0 92 . 92 PHE A C 1.0 ? 0.16033427 0.060037 0.16561483 1 770 -ATOM O O . PHE A 0 92 . 92 PHE A O 1.0 ? 0.004303971 0.46309942 0.79610443 1 771 -ATOM C CB . PHE A 0 92 . 92 PHE A CB 1.0 ? 0.8502269 0.67139554 0.008632633 1 772 -ATOM C CG . PHE A 0 92 . 92 PHE A CG 1.0 ? 0.18636551 0.46222925 0.37880808 1 773 -ATOM C CD1 . PHE A 0 92 . 92 PHE A CD1 1.0 ? 0.8203383 0.58123916 0.046864722 1 774 -ATOM C CD2 . PHE A 0 92 . 92 PHE A CD2 1.0 ? 0.7111917 0.9479025 0.03205685 1 775 -ATOM C CE1 . PHE A 0 92 . 92 PHE A CE1 1.0 ? 0.4271426 0.47250602 0.60288244 1 776 -ATOM C CE2 . PHE A 0 92 . 92 PHE A CE2 1.0 ? 0.119177505 0.38563102 0.22370405 1 777 -ATOM C CZ . PHE A 0 92 . 92 PHE A CZ 1.0 ? 0.12993589 0.81904393 0.005720969 1 778 -ATOM N N . THR A 0 93 . 93 THR A N 1.0 ? 0.69935435 0.52945393 0.16124396 1 779 -ATOM C CA . THR A 0 93 . 93 THR A CA 1.0 ? 0.655274 0.02972915 0.68011165 1 780 -ATOM C C . THR A 0 93 . 93 THR A C 1.0 ? 0.9676924 0.7402126 0.07442236 1 781 -ATOM O O . THR A 0 93 . 93 THR A O 1.0 ? 0.19059618 0.34401792 0.85975343 1 782 -ATOM C CB . THR A 0 93 . 93 THR A CB 1.0 ? 0.34769842 0.4110281 0.4649081 1 783 -ATOM O OG1 . THR A 0 93 . 93 THR A OG1 1.0 ? 0.92304176 0.011181253 0.5590658 1 784 -ATOM C CG2 . THR A 0 93 . 93 THR A CG2 1.0 ? 0.13439523 0.9196781 0.72922176 1 785 -ATOM N N . ASN A 0 94 . 94 ASN A N 1.0 ? 0.93131036 0.27378815 0.84615797 1 786 -ATOM C CA . ASN A 0 94 . 94 ASN A CA 1.0 ? 0.35561407 0.54407907 0.25615948 1 787 -ATOM C C . ASN A 0 94 . 94 ASN A C 1.0 ? 0.7270021 0.12199512 0.9281397 1 788 -ATOM O O . ASN A 0 94 . 94 ASN A O 1.0 ? 0.65929645 0.6971748 0.8288939 1 789 -ATOM C CB . ASN A 0 94 . 94 ASN A CB 1.0 ? 0.80182654 0.31508824 0.617586 1 790 -ATOM C CG . ASN A 0 94 . 94 ASN A CG 1.0 ? 0.18230076 0.40216994 0.47956306 1 791 -ATOM O OD1 . ASN A 0 94 . 94 ASN A OD1 1.0 ? 0.90642834 0.2805007 0.5923691 1 792 -ATOM N ND2 . ASN A 0 94 . 94 ASN A ND2 1.0 ? 0.22647314 0.7095632 0.17288852 1 793 -ATOM N N . CYS A 0 95 . 95 CYS A N 1.0 ? 0.6894223 0.93182874 0.38677052 1 794 -ATOM C CA . CYS A 0 95 . 95 CYS A CA 1.0 ? 0.7801362 0.15047388 0.8136133 1 795 -ATOM C C . CYS A 0 95 . 95 CYS A C 1.0 ? 0.064329214 0.043534882 0.49409232 1 796 -ATOM O O . CYS A 0 95 . 95 CYS A O 1.0 ? 0.5207449 0.43088242 0.94745207 1 797 -ATOM C CB . CYS A 0 95 . 95 CYS A CB 1.0 ? 0.41286743 0.17577994 0.7927361 1 798 -ATOM S SG . CYS A 0 95 . 95 CYS A SG 1.0 ? 0.70685846 0.11044519 0.7962053 1 799 -ATOM N N . TYR A 0 96 . 96 TYR A N 1.0 ? 0.5487207 0.47500253 0.84636915 1 800 -ATOM C CA . TYR A 0 96 . 96 TYR A CA 1.0 ? 0.6570376 0.15947218 0.040391505 1 801 -ATOM C C . TYR A 0 96 . 96 TYR A C 1.0 ? 0.64676553 0.8359332 0.010977215 1 802 -ATOM O O . TYR A 0 96 . 96 TYR A O 1.0 ? 0.1741522 0.9512573 0.7142636 1 803 -ATOM C CB . TYR A 0 96 . 96 TYR A CB 1.0 ? 0.036657497 0.42961478 0.7690413 1 804 -ATOM C CG . TYR A 0 96 . 96 TYR A CG 1.0 ? 0.8567955 0.870013 0.37343824 1 805 -ATOM C CD1 . TYR A 0 96 . 96 TYR A CD1 1.0 ? 0.552052 0.0845001 0.75948596 1 806 -ATOM C CD2 . TYR A 0 96 . 96 TYR A CD2 1.0 ? 0.11381386 0.099732846 0.4694832 1 807 -ATOM C CE1 . TYR A 0 96 . 96 TYR A CE1 1.0 ? 0.2200665 0.42848414 0.31095013 1 808 -ATOM C CE2 . TYR A 0 96 . 96 TYR A CE2 1.0 ? 0.5009303 0.13328597 0.9802069 1 809 -ATOM C CZ . TYR A 0 96 . 96 TYR A CZ 1.0 ? 0.15963578 0.27581176 0.70636034 1 810 -ATOM O OH . TYR A 0 96 . 96 TYR A OH 1.0 ? 0.30800515 0.885135 0.14321122 1 811 -ATOM N N . ILE A 0 97 . 97 ILE A N 1.0 ? 0.5728126 0.7051939 0.25793847 1 812 -ATOM C CA . ILE A 0 97 . 97 ILE A CA 1.0 ? 0.13004158 0.5488241 0.5671083 1 813 -ATOM C C . ILE A 0 97 . 97 ILE A C 1.0 ? 0.18898068 0.3767042 0.90888596 1 814 -ATOM O O . ILE A 0 97 . 97 ILE A O 1.0 ? 0.84104526 0.9464413 0.72061366 1 815 -ATOM C CB . ILE A 0 97 . 97 ILE A CB 1.0 ? 0.06469795 0.107636705 0.55860454 1 816 -ATOM C CG1 . ILE A 0 97 . 97 ILE A CG1 1.0 ? 0.7472531 0.4515391 0.91470844 1 817 -ATOM C CG2 . ILE A 0 97 . 97 ILE A CG2 1.0 ? 0.589769 0.674198 0.6383413 1 818 -ATOM C CD1 . ILE A 0 97 . 97 ILE A CD1 1.0 ? 0.46934575 0.71772164 0.20319644 1 819 -ATOM N N . TYR A 0 98 . 98 TYR A N 1.0 ? 0.005669784 0.7766905 0.809595 1 820 -ATOM C CA . TYR A 0 98 . 98 TYR A CA 1.0 ? 0.6963199 0.777312 0.13196623 1 821 -ATOM C C . TYR A 0 98 . 98 TYR A C 1.0 ? 0.9339194 0.6741905 0.53864825 1 822 -ATOM O O . TYR A 0 98 . 98 TYR A O 1.0 ? 0.93355566 0.19806021 0.5444968 1 823 -ATOM C CB . TYR A 0 98 . 98 TYR A CB 1.0 ? 0.60149604 0.17745323 0.06897259 1 824 -ATOM C CG . TYR A 0 98 . 98 TYR A CG 1.0 ? 0.5674203 0.26848975 0.48105413 1 825 -ATOM C CD1 . TYR A 0 98 . 98 TYR A CD1 1.0 ? 0.10481165 0.80527174 0.79634094 1 826 -ATOM C CD2 . TYR A 0 98 . 98 TYR A CD2 1.0 ? 0.015984453 0.31550744 0.22189872 1 827 -ATOM C CE1 . TYR A 0 98 . 98 TYR A CE1 1.0 ? 0.67532766 0.16823047 0.31446788 1 828 -ATOM C CE2 . TYR A 0 98 . 98 TYR A CE2 1.0 ? 0.33905447 0.20372726 0.8528759 1 829 -ATOM C CZ . TYR A 0 98 . 98 TYR A CZ 1.0 ? 0.10440549 0.2880816 0.9760127 1 830 -ATOM O OH . TYR A 0 98 . 98 TYR A OH 1.0 ? 0.2406016 0.08357705 0.91535723 1 831 -ATOM N N . ASN A 0 99 . 99 ASN A N 1.0 ? 0.4798952 0.88014203 0.1545242 1 832 -ATOM C CA . ASN A 0 99 . 99 ASN A CA 1.0 ? 0.6259028 0.42601475 0.8537117 1 833 -ATOM C C . ASN A 0 99 . 99 ASN A C 1.0 ? 0.6100742 0.34140986 0.49579892 1 834 -ATOM O O . ASN A 0 99 . 99 ASN A O 1.0 ? 0.7404988 0.8645296 0.05585749 1 835 -ATOM C CB . ASN A 0 99 . 99 ASN A CB 1.0 ? 0.6835095 0.40155992 0.704408 1 836 -ATOM C CG . ASN A 0 99 . 99 ASN A CG 1.0 ? 0.077381864 0.5039151 0.91126597 1 837 -ATOM O OD1 . ASN A 0 99 . 99 ASN A OD1 1.0 ? 0.6986012 0.5499873 0.4595614 1 838 -ATOM N ND2 . ASN A 0 99 . 99 ASN A ND2 1.0 ? 0.5377948 0.5179013 0.32701805 1 839 -ATOM N N . LYS A 0 100 . 100 LYS A N 1.0 ? 0.07740254 0.7968163 0.76627547 1 840 -ATOM C CA . LYS A 0 100 . 100 LYS A CA 1.0 ? 0.8589047 0.5671215 0.5465838 1 841 -ATOM C C . LYS A 0 100 . 100 LYS A C 1.0 ? 0.37059724 0.15570219 0.14930789 1 842 -ATOM O O . LYS A 0 100 . 100 LYS A O 1.0 ? 0.7305544 0.032632384 0.20310968 1 843 -ATOM C CB . LYS A 0 100 . 100 LYS A CB 1.0 ? 0.88362783 0.33711067 0.9099974 1 844 -ATOM C CG . LYS A 0 100 . 100 LYS A CG 1.0 ? 0.39799398 0.503459 0.6904515 1 845 -ATOM C CD . LYS A 0 100 . 100 LYS A CD 1.0 ? 0.12518598 0.9485056 0.4467307 1 846 -ATOM C CE . LYS A 0 100 . 100 LYS A CE 1.0 ? 0.66766834 0.19251792 0.87384623 1 847 -ATOM N NZ . LYS A 0 100 . 100 LYS A NZ 1.0 +1 0.69310623 0.86622673 0.81169504 1 848 -ATOM N N . PRO A 0 101 . 101 PRO A N 1.0 ? 0.47127727 0.9763605 0.9812645 1 849 -ATOM C CA . PRO A 0 101 . 101 PRO A CA 1.0 ? 0.5216019 0.850848 0.9281035 1 850 -ATOM C C . PRO A 0 101 . 101 PRO A C 1.0 ? 0.799135 0.51823586 0.17700832 1 851 -ATOM O O . PRO A 0 101 . 101 PRO A O 1.0 ? 0.095346354 0.2145238 0.34301051 1 852 -ATOM C CB . PRO A 0 101 . 101 PRO A CB 1.0 ? 0.84155947 0.4047478 0.7445888 1 853 -ATOM C CG . PRO A 0 101 . 101 PRO A CG 1.0 ? 0.5656297 0.24427246 0.29711175 1 854 -ATOM C CD . PRO A 0 101 . 101 PRO A CD 1.0 ? 0.601071 0.4785193 0.18841454 1 855 -ATOM N N . GLY A 0 102 . 102 GLY A N 1.0 ? 0.34228987 0.18594833 0.14211541 1 856 -ATOM C CA . GLY A 0 102 . 102 GLY A CA 1.0 ? 0.90039057 0.866687 0.86845845 1 857 -ATOM C C . GLY A 0 102 . 102 GLY A C 1.0 ? 0.28280723 0.16095328 0.79253256 1 858 -ATOM O O . GLY A 0 102 . 102 GLY A O 1.0 ? 0.73262525 0.9585768 0.47978887 1 859 -ATOM N N . ASP A 0 103 . 103 ASP A N 1.0 ? 0.34132046 0.29680762 0.8546451 1 860 -ATOM C CA . ASP A 0 103 . 103 ASP A CA 1.0 ? 0.8380499 0.82760906 0.8133684 1 861 -ATOM C C . ASP A 0 103 . 103 ASP A C 1.0 ? 0.24084541 0.05898665 0.8919945 1 862 -ATOM O O . ASP A 0 103 . 103 ASP A O 1.0 ? 0.6587152 0.9429075 0.7772344 1 863 -ATOM C CB . ASP A 0 103 . 103 ASP A CB 1.0 ? 0.59710693 0.98196846 0.041798282 1 864 -ATOM C CG . ASP A 0 103 . 103 ASP A CG 1.0 ? 0.2305151 0.72256184 0.76471364 1 865 -ATOM O OD1 . ASP A 0 103 . 103 ASP A OD1 1.0 ? 0.8338795 0.66571707 0.6115365 1 866 -ATOM O OD2 . ASP A 0 103 . 103 ASP A OD2 1.0 ? 0.65635705 0.68970966 0.9582929 1 867 -ATOM N N . ASP A 0 104 . 104 ASP A N 1.0 ? 0.559868 0.0917437 0.7752322 1 868 -ATOM C CA . ASP A 0 104 . 104 ASP A CA 1.0 ? 0.8066603 0.16880263 0.27419403 1 869 -ATOM C C . ASP A 0 104 . 104 ASP A C 1.0 ? 0.5644569 0.49543318 0.9449237 1 870 -ATOM O O . ASP A 0 104 . 104 ASP A O 1.0 ? 0.68614817 0.7101292 0.5708391 1 871 -ATOM C CB . ASP A 0 104 . 104 ASP A CB 1.0 ? 0.23920317 0.10039536 0.8381814 1 872 -ATOM C CG . ASP A 0 104 . 104 ASP A CG 1.0 ? 0.8993302 0.27391183 0.31714082 1 873 -ATOM O OD1 . ASP A 0 104 . 104 ASP A OD1 1.0 ? 0.84217227 0.54413915 0.94520336 1 874 -ATOM O OD2 . ASP A 0 104 . 104 ASP A OD2 1.0 ? 0.64095145 0.9567071 0.33913627 1 875 -ATOM N N . ILE A 0 105 . 105 ILE A N 1.0 ? 0.2900177 0.21858485 0.056190975 1 876 -ATOM C CA . ILE A 0 105 . 105 ILE A CA 1.0 ? 0.6939667 0.9926398 0.76757264 1 877 -ATOM C C . ILE A 0 105 . 105 ILE A C 1.0 ? 0.10012602 0.059828594 0.93586445 1 878 -ATOM O O . ILE A 0 105 . 105 ILE A O 1.0 ? 0.8688101 0.94329864 0.93270475 1 879 -ATOM C CB . ILE A 0 105 . 105 ILE A CB 1.0 ? 0.9433498 0.5447171 0.4006076 1 880 -ATOM C CG1 . ILE A 0 105 . 105 ILE A CG1 1.0 ? 0.88822037 0.9503507 0.51044816 1 881 -ATOM C CG2 . ILE A 0 105 . 105 ILE A CG2 1.0 ? 0.35252517 0.3677641 0.09413228 1 882 -ATOM C CD1 . ILE A 0 105 . 105 ILE A CD1 1.0 ? 0.9346723 0.9770429 0.9178189 1 883 -ATOM N N . VAL A 0 106 . 106 VAL A N 1.0 ? 0.12556203 0.51166886 0.768931 1 884 -ATOM C CA . VAL A 0 106 . 106 VAL A CA 1.0 ? 0.6565741 0.9343954 0.46931574 1 885 -ATOM C C . VAL A 0 106 . 106 VAL A C 1.0 ? 0.37071195 0.92672133 0.77920187 1 886 -ATOM O O . VAL A 0 106 . 106 VAL A O 1.0 ? 0.7925663 0.49211487 0.05528801 1 887 -ATOM C CB . VAL A 0 106 . 106 VAL A CB 1.0 ? 0.9089636 0.40352437 0.15786141 1 888 -ATOM C CG1 . VAL A 0 106 . 106 VAL A CG1 1.0 ? 0.47504684 0.6927147 0.8914684 1 889 -ATOM C CG2 . VAL A 0 106 . 106 VAL A CG2 1.0 ? 0.25196567 0.8939629 0.09026293 1 890 -ATOM N N . LEU A 0 107 . 107 LEU A N 1.0 ? 0.74015486 0.1021463 0.038200088 1 891 -ATOM C CA . LEU A 0 107 . 107 LEU A CA 1.0 ? 0.6772503 0.8641857 0.6708326 1 892 -ATOM C C . LEU A 0 107 . 107 LEU A C 1.0 ? 0.7776021 0.5872311 0.19438383 1 893 -ATOM O O . LEU A 0 107 . 107 LEU A O 1.0 ? 0.58359325 0.9464992 0.3040472 1 894 -ATOM C CB . LEU A 0 107 . 107 LEU A CB 1.0 ? 0.9908175 0.50428474 0.48866224 1 895 -ATOM C CG . LEU A 0 107 . 107 LEU A CG 1.0 ? 0.29059565 0.8653231 0.44198734 1 896 -ATOM C CD1 . LEU A 0 107 . 107 LEU A CD1 1.0 ? 0.08537846 0.43931878 0.3138285 1 897 -ATOM C CD2 . LEU A 0 107 . 107 LEU A CD2 1.0 ? 0.4779993 0.99553263 0.9125435 1 898 -ATOM N N . MET A 0 108 . 108 MET A N 1.0 ? 0.6788584 0.20716755 0.7356417 1 899 -ATOM C CA . MET A 0 108 . 108 MET A CA 1.0 ? 0.9389974 0.2903358 0.049109306 1 900 -ATOM C C . MET A 0 108 . 108 MET A C 1.0 ? 0.67364097 0.71792275 0.19891083 1 901 -ATOM O O . MET A 0 108 . 108 MET A O 1.0 ? 0.22015937 0.52867764 0.013734998 1 902 -ATOM C CB . MET A 0 108 . 108 MET A CB 1.0 ? 0.17348678 0.9306322 0.40121362 1 903 -ATOM C CG . MET A 0 108 . 108 MET A CG 1.0 ? 0.25625867 0.08764658 0.8047956 1 904 -ATOM S SD . MET A 0 108 . 108 MET A SD 1.0 ? 0.9312518 0.15004241 0.47052106 1 905 -ATOM C CE . MET A 0 108 . 108 MET A CE 1.0 ? 0.5381429 0.35533303 0.4656664 1 906 -ATOM N N . ALA A 0 109 . 109 ALA A N 1.0 ? 0.3578806 0.74701494 0.2735358 1 907 -ATOM C CA . ALA A 0 109 . 109 ALA A CA 1.0 ? 0.3660515 0.97179955 0.319449 1 908 -ATOM C C . ALA A 0 109 . 109 ALA A C 1.0 ? 0.408924 0.7585942 0.9766908 1 909 -ATOM O O . ALA A 0 109 . 109 ALA A O 1.0 ? 0.5816843 0.54112184 0.65351456 1 910 -ATOM C CB . ALA A 0 109 . 109 ALA A CB 1.0 ? 0.6597621 0.56964284 0.925807 1 911 -ATOM N N . GLU A 0 110 . 110 GLU A N 1.0 ? 0.30280945 0.31197333 0.37412372 1 912 -ATOM C CA . GLU A 0 110 . 110 GLU A CA 1.0 ? 0.5849194 0.35412955 0.15677014 1 913 -ATOM C C . GLU A 0 110 . 110 GLU A C 1.0 ? 0.030953525 0.6285669 0.08141168 1 914 -ATOM O O . GLU A 0 110 . 110 GLU A O 1.0 ? 0.43945682 0.686849 0.82046384 1 915 -ATOM C CB . GLU A 0 110 . 110 GLU A CB 1.0 ? 0.6885959 0.3640525 0.6988451 1 916 -ATOM C CG . GLU A 0 110 . 110 GLU A CG 1.0 ? 0.023194173 0.29093382 0.9509536 1 917 -ATOM C CD . GLU A 0 110 . 110 GLU A CD 1.0 ? 0.25007418 0.9413328 0.7130306 1 918 -ATOM O OE1 . GLU A 0 110 . 110 GLU A OE1 1.0 ? 0.5054532 0.7617375 0.008353268 1 919 -ATOM O OE2 . GLU A 0 110 . 110 GLU A OE2 1.0 ? 0.64165133 0.41654962 0.15909855 1 920 -ATOM N N . ALA A 0 111 . 111 ALA A N 1.0 ? 0.77128166 0.805319 0.82219213 1 921 -ATOM C CA . ALA A 0 111 . 111 ALA A CA 1.0 ? 0.27666357 0.8036676 0.3267261 1 922 -ATOM C C . ALA A 0 111 . 111 ALA A C 1.0 ? 0.82016546 0.7147994 0.18440014 1 923 -ATOM O O . ALA A 0 111 . 111 ALA A O 1.0 ? 0.6034767 0.3011823 0.85572517 1 924 -ATOM C CB . ALA A 0 111 . 111 ALA A CB 1.0 ? 0.19538827 0.36723915 0.1378603 1 925 -ATOM N N . LEU A 0 112 . 112 LEU A N 1.0 ? 0.9349745 0.7735315 0.8743088 1 926 -ATOM C CA . LEU A 0 112 . 112 LEU A CA 1.0 ? 0.32874548 0.6760009 0.5940277 1 927 -ATOM C C . LEU A 0 112 . 112 LEU A C 1.0 ? 0.09934 0.61466813 0.8107764 1 928 -ATOM O O . LEU A 0 112 . 112 LEU A O 1.0 ? 0.17834648 0.32188284 0.18005578 1 929 -ATOM C CB . LEU A 0 112 . 112 LEU A CB 1.0 ? 0.36182866 0.8676471 0.45250398 1 930 -ATOM C CG . LEU A 0 112 . 112 LEU A CG 1.0 ? 0.17934233 0.22518773 0.18929723 1 931 -ATOM C CD1 . LEU A 0 112 . 112 LEU A CD1 1.0 ? 0.3030717 0.82636714 0.07732992 1 932 -ATOM C CD2 . LEU A 0 112 . 112 LEU A CD2 1.0 ? 0.057565466 0.46554863 0.6988959 1 933 -ATOM N N . GLU A 0 113 . 113 GLU A N 1.0 ? 0.6661733 0.79137063 0.6688068 1 934 -ATOM C CA . GLU A 0 113 . 113 GLU A CA 1.0 ? 0.48769915 0.5490676 0.43566927 1 935 -ATOM C C . GLU A 0 113 . 113 GLU A C 1.0 ? 0.4911071 0.888652 0.35733393 1 936 -ATOM O O . GLU A 0 113 . 113 GLU A O 1.0 ? 0.11497206 0.0990783 0.8788884 1 937 -ATOM C CB . GLU A 0 113 . 113 GLU A CB 1.0 ? 0.40865657 0.8776229 0.3989618 1 938 -ATOM C CG . GLU A 0 113 . 113 GLU A CG 1.0 ? 0.030060116 0.8348458 0.051235784 1 939 -ATOM C CD . GLU A 0 113 . 113 GLU A CD 1.0 ? 0.7472812 0.4260645 0.77812713 1 940 -ATOM O OE1 . GLU A 0 113 . 113 GLU A OE1 1.0 ? 0.24276073 0.5168255 0.5654422 1 941 -ATOM O OE2 . GLU A 0 113 . 113 GLU A OE2 1.0 ? 0.57710725 0.7717926 0.28240535 1 942 -ATOM N N . LYS A 0 114 . 114 LYS A N 1.0 ? 0.75071186 0.024862783 0.72145736 1 943 -ATOM C CA . LYS A 0 114 . 114 LYS A CA 1.0 ? 0.90013856 0.3532775 0.7459578 1 944 -ATOM C C . LYS A 0 114 . 114 LYS A C 1.0 ? 0.79062974 0.158651 0.41711372 1 945 -ATOM O O . LYS A 0 114 . 114 LYS A O 1.0 ? 0.2973932 0.41876715 0.20137726 1 946 -ATOM C CB . LYS A 0 114 . 114 LYS A CB 1.0 ? 0.04189031 0.60211307 0.49584612 1 947 -ATOM C CG . LYS A 0 114 . 114 LYS A CG 1.0 ? 0.94917816 0.694122 0.28377262 1 948 -ATOM C CD . LYS A 0 114 . 114 LYS A CD 1.0 ? 0.67244786 0.43216345 0.42172512 1 949 -ATOM C CE . LYS A 0 114 . 114 LYS A CE 1.0 ? 0.3292481 0.5229216 0.39760807 1 950 -ATOM N NZ . LYS A 0 114 . 114 LYS A NZ 1.0 +1 0.50097674 0.1183099 0.49909472 1 951 -ATOM N N . LEU A 0 115 . 115 LEU A N 1.0 ? 0.71330607 0.6054141 0.089242 1 952 -ATOM C CA . LEU A 0 115 . 115 LEU A CA 1.0 ? 0.15822865 0.22511995 0.9169879 1 953 -ATOM C C . LEU A 0 115 . 115 LEU A C 1.0 ? 0.6620002 0.71851754 0.59361386 1 954 -ATOM O O . LEU A 0 115 . 115 LEU A O 1.0 ? 0.35199055 0.84367734 0.5242517 1 955 -ATOM C CB . LEU A 0 115 . 115 LEU A CB 1.0 ? 0.008841878 0.78410655 0.60758615 1 956 -ATOM C CG . LEU A 0 115 . 115 LEU A CG 1.0 ? 0.62491 0.39330548 0.9450346 1 957 -ATOM C CD1 . LEU A 0 115 . 115 LEU A CD1 1.0 ? 0.61377984 0.17755502 0.026237136 1 958 -ATOM C CD2 . LEU A 0 115 . 115 LEU A CD2 1.0 ? 0.4809939 0.39049196 0.19693674 1 959 -ATOM N N . PHE A 0 116 . 116 PHE A N 1.0 ? 0.20030123 0.59187144 0.08177024 1 960 -ATOM C CA . PHE A 0 116 . 116 PHE A CA 1.0 ? 0.4667436 0.8816016 0.9932461 1 961 -ATOM C C . PHE A 0 116 . 116 PHE A C 1.0 ? 0.69728845 0.65339476 0.7797139 1 962 -ATOM O O . PHE A 0 116 . 116 PHE A O 1.0 ? 0.8833651 0.025668325 0.89671594 1 963 -ATOM C CB . PHE A 0 116 . 116 PHE A CB 1.0 ? 0.266381 0.11061052 0.052023735 1 964 -ATOM C CG . PHE A 0 116 . 116 PHE A CG 1.0 ? 0.84268296 0.7308888 0.6038574 1 965 -ATOM C CD1 . PHE A 0 116 . 116 PHE A CD1 1.0 ? 0.19942789 0.15386088 0.18294896 1 966 -ATOM C CD2 . PHE A 0 116 . 116 PHE A CD2 1.0 ? 0.33465037 0.94794536 0.5476423 1 967 -ATOM C CE1 . PHE A 0 116 . 116 PHE A CE1 1.0 ? 0.9383614 0.099523656 0.96803576 1 968 -ATOM C CE2 . PHE A 0 116 . 116 PHE A CE2 1.0 ? 0.21472117 0.25487202 0.33093652 1 969 -ATOM C CZ . PHE A 0 116 . 116 PHE A CZ 1.0 ? 0.28006345 0.1714861 0.5996599 1 970 -ATOM N N . LEU A 0 117 . 117 LEU A N 1.0 ? 0.076177426 0.55012643 0.52576107 1 971 -ATOM C CA . LEU A 0 117 . 117 LEU A CA 1.0 ? 0.3060391 0.44289988 0.18028574 1 972 -ATOM C C . LEU A 0 117 . 117 LEU A C 1.0 ? 0.4871771 0.00078695326 0.6287162 1 973 -ATOM O O . LEU A 0 117 . 117 LEU A O 1.0 ? 0.45251802 0.23775214 0.23355283 1 974 -ATOM C CB . LEU A 0 117 . 117 LEU A CB 1.0 ? 0.14825407 0.15471491 0.4436278 1 975 -ATOM C CG . LEU A 0 117 . 117 LEU A CG 1.0 ? 0.14017035 0.62213117 0.9105233 1 976 -ATOM C CD1 . LEU A 0 117 . 117 LEU A CD1 1.0 ? 0.39630118 0.8587569 0.9351885 1 977 -ATOM C CD2 . LEU A 0 117 . 117 LEU A CD2 1.0 ? 0.018537743 0.13073528 0.03022651 1 978 -ATOM N N . GLN A 0 118 . 118 GLN A N 1.0 ? 0.7271397 0.20506045 0.51164687 1 979 -ATOM C CA . GLN A 0 118 . 118 GLN A CA 1.0 ? 0.51221883 0.21570651 0.6199441 1 980 -ATOM C C . GLN A 0 118 . 118 GLN A C 1.0 ? 0.49658605 0.39688706 0.29448554 1 981 -ATOM O O . GLN A 0 118 . 118 GLN A O 1.0 ? 0.972117 0.49531543 0.3067269 1 982 -ATOM C CB . GLN A 0 118 . 118 GLN A CB 1.0 ? 0.40327513 0.6198792 0.4505784 1 983 -ATOM C CG . GLN A 0 118 . 118 GLN A CG 1.0 ? 0.2661764 0.81747776 0.31911838 1 984 -ATOM C CD . GLN A 0 118 . 118 GLN A CD 1.0 ? 0.22744343 0.72311085 0.61433214 1 985 -ATOM O OE1 . GLN A 0 118 . 118 GLN A OE1 1.0 ? 0.5528642 0.64172906 0.19447161 1 986 -ATOM N NE2 . GLN A 0 118 . 118 GLN A NE2 1.0 ? 0.08832808 0.2623908 0.20186919 1 987 -ATOM N N . LYS A 0 119 . 119 LYS A N 1.0 ? 0.25498444 0.5098423 0.81854844 1 988 -ATOM C CA . LYS A 0 119 . 119 LYS A CA 1.0 ? 0.66824245 0.13893603 0.08785915 1 989 -ATOM C C . LYS A 0 119 . 119 LYS A C 1.0 ? 0.54339415 0.55636144 0.17998934 1 990 -ATOM O O . LYS A 0 119 . 119 LYS A O 1.0 ? 0.77621996 0.22275627 0.8478059 1 991 -ATOM C CB . LYS A 0 119 . 119 LYS A CB 1.0 ? 0.35290867 0.7588725 0.2297459 1 992 -ATOM C CG . LYS A 0 119 . 119 LYS A CG 1.0 ? 0.30345893 0.6881442 0.35701147 1 993 -ATOM C CD . LYS A 0 119 . 119 LYS A CD 1.0 ? 0.5987819 0.80997556 0.8336847 1 994 -ATOM C CE . LYS A 0 119 . 119 LYS A CE 1.0 ? 0.5326583 0.6251978 0.8692957 1 995 -ATOM N NZ . LYS A 0 119 . 119 LYS A NZ 1.0 +1 0.879834 0.24519433 0.816994 1 996 -ATOM N N . ILE A 0 120 . 120 ILE A N 1.0 ? 0.112604134 0.6655566 0.6816515 1 997 -ATOM C CA . ILE A 0 120 . 120 ILE A CA 1.0 ? 0.33693278 0.5614081 0.99112433 1 998 -ATOM C C . ILE A 0 120 . 120 ILE A C 1.0 ? 0.35719004 0.49964467 0.06581424 1 999 -ATOM O O . ILE A 0 120 . 120 ILE A O 1.0 ? 0.26818362 0.042281106 0.70935935 1 1000 -ATOM C CB . ILE A 0 120 . 120 ILE A CB 1.0 ? 0.7943753 0.93363506 0.67356896 1 1001 -ATOM C CG1 . ILE A 0 120 . 120 ILE A CG1 1.0 ? 0.19323492 0.95882714 0.20462707 1 1002 -ATOM C CG2 . ILE A 0 120 . 120 ILE A CG2 1.0 ? 0.39258292 0.046195135 0.21726911 1 1003 -ATOM C CD1 . ILE A 0 120 . 120 ILE A CD1 1.0 ? 0.44597784 0.6486113 0.13075969 1 1004 -ATOM N N . ASN A 0 121 . 121 ASN A N 1.0 ? 0.24959469 0.7901994 0.8714875 1 1005 -ATOM C CA . ASN A 0 121 . 121 ASN A CA 1.0 ? 0.05682813 0.81533897 0.1672402 1 1006 -ATOM C C . ASN A 0 121 . 121 ASN A C 1.0 ? 0.56219625 0.2839334 0.34508768 1 1007 -ATOM O O . ASN A 0 121 . 121 ASN A O 1.0 ? 0.58679825 0.8700826 0.94332653 1 1008 -ATOM C CB . ASN A 0 121 . 121 ASN A CB 1.0 ? 0.42549813 0.23723836 0.29523385 1 1009 -ATOM C CG . ASN A 0 121 . 121 ASN A CG 1.0 ? 0.628008 0.06991332 0.39934957 1 1010 -ATOM O OD1 . ASN A 0 121 . 121 ASN A OD1 1.0 ? 0.21198225 0.099501565 0.89117974 1 1011 -ATOM N ND2 . ASN A 0 121 . 121 ASN A ND2 1.0 ? 0.048775516 0.91773474 0.71677923 1 1012 -ATOM N N . GLU A 0 122 . 122 GLU A N 1.0 ? 0.12443121 0.29303542 0.3117705 1 1013 -ATOM C CA . GLU A 0 122 . 122 GLU A CA 1.0 ? 0.54081744 0.5265314 0.23552199 1 1014 -ATOM C C . GLU A 0 122 . 122 GLU A C 1.0 ? 0.002721111 0.47222966 0.16601019 1 1015 -ATOM O O . GLU A 0 122 . 122 GLU A O 1.0 ? 0.7698542 0.81126124 0.43661353 1 1016 -ATOM C CB . GLU A 0 122 . 122 GLU A CB 1.0 ? 0.29884014 0.5261449 0.064183764 1 1017 -ATOM C CG . GLU A 0 122 . 122 GLU A CG 1.0 ? 0.7331288 0.583649 0.85882497 1 1018 -ATOM C CD . GLU A 0 122 . 122 GLU A CD 1.0 ? 0.18755578 0.35759768 0.8961456 1 1019 -ATOM O OE1 . GLU A 0 122 . 122 GLU A OE1 1.0 ? 0.6254003 0.033941396 0.7869856 1 1020 -ATOM O OE2 . GLU A 0 122 . 122 GLU A OE2 1.0 ? 0.6078151 0.5475361 0.70318335 1 1021 -ATOM N N . LEU A 0 123 . 123 LEU A N 1.0 ? 0.4618108 0.45796382 0.2419572 1 1022 -ATOM C CA . LEU A 0 123 . 123 LEU A CA 1.0 ? 0.15751822 0.6736664 0.4880099 1 1023 -ATOM C C . LEU A 0 123 . 123 LEU A C 1.0 ? 0.5451232 0.4262873 0.56096166 1 1024 -ATOM O O . LEU A 0 123 . 123 LEU A O 1.0 ? 0.0059600207 0.081038125 0.30842263 1 1025 -ATOM C CB . LEU A 0 123 . 123 LEU A CB 1.0 ? 0.86942244 0.8436257 0.53918105 1 1026 -ATOM C CG . LEU A 0 123 . 123 LEU A CG 1.0 ? 0.9975556 0.49932283 0.86531866 1 1027 -ATOM C CD1 . LEU A 0 123 . 123 LEU A CD1 1.0 ? 0.564124 0.2477682 0.15705489 1 1028 -ATOM C CD2 . LEU A 0 123 . 123 LEU A CD2 1.0 ? 0.33865356 0.7926565 0.5572209 1 1029 -ATOM N N . PRO A 0 124 . 124 PRO A N 1.0 ? 0.8085479 0.3569686 0.01404128 1 1030 -ATOM C CA . PRO A 0 124 . 124 PRO A CA 1.0 ? 0.4628947 0.10219095 0.90201896 1 1031 -ATOM C C . PRO A 0 124 . 124 PRO A C 1.0 ? 0.9929011 0.92291665 0.47848082 1 1032 -ATOM O O . PRO A 0 124 . 124 PRO A O 1.0 ? 0.8079859 0.06917141 0.8942916 1 1033 -ATOM C CB . PRO A 0 124 . 124 PRO A CB 1.0 ? 0.3775353 0.20894742 0.065697394 1 1034 -ATOM C CG . PRO A 0 124 . 124 PRO A CG 1.0 ? 0.6530908 0.42644757 0.15872458 1 1035 -ATOM C CD . PRO A 0 124 . 124 PRO A CD 1.0 ? 0.8977084 0.70619094 0.49470606 1 1036 -ATOM N N . THR A 0 125 . 125 THR A N 1.0 ? 0.34461346 0.4751659 0.16209425 1 1037 -ATOM C CA . THR A 0 125 . 125 THR A CA 1.0 ? 0.40489158 0.30363917 0.767085 1 1038 -ATOM C C . THR A 0 125 . 125 THR A C 1.0 ? 0.35222888 0.7838939 0.33252817 1 1039 -ATOM O O . THR A 0 125 . 125 THR A O 1.0 ? 0.1651667 0.22669357 0.48177278 1 1040 -ATOM C CB . THR A 0 125 . 125 THR A CB 1.0 ? 0.6468401 0.22748764 0.6334286 1 1041 -ATOM O OG1 . THR A 0 125 . 125 THR A OG1 1.0 ? 0.097107105 0.36540738 0.27664828 1 1042 -ATOM C CG2 . THR A 0 125 . 125 THR A CG2 1.0 ? 0.18344884 0.6077041 0.9494623 1 1043 -ATOM N N . GLU A 0 126 . 126 GLU A N 1.0 ? 0.7723829 0.42752236 0.11237547 1 1044 -ATOM C CA . GLU A 0 126 . 126 GLU A CA 1.0 ? 0.04028293 0.45658332 0.7575335 1 1045 -ATOM C C . GLU A 0 126 . 126 GLU A C 1.0 ? 0.04753869 0.4968612 0.19252771 1 1046 -ATOM O O . GLU A 0 126 . 126 GLU A O 1.0 ? 0.031560935 0.07496955 0.5119186 1 1047 -ATOM C CB . GLU A 0 126 . 126 GLU A CB 1.0 ? 0.37058473 0.93847185 0.8985025 1 1048 -ATOM C CG . GLU A 0 126 . 126 GLU A CG 1.0 ? 0.8142524 0.012600652 0.5233891 1 1049 -ATOM C CD . GLU A 0 126 . 126 GLU A CD 1.0 ? 0.5154072 0.62330824 0.8445778 1 1050 -ATOM O OE1 . GLU A 0 126 . 126 GLU A OE1 1.0 ? 0.5513595 0.29976958 0.25097626 1 1051 -ATOM O OE2 . GLU A 0 126 . 126 GLU A OE2 1.0 ? 0.099623606 0.3322798 0.32472375 1 1052 -ATOM N N . GLU A 0 127 . 127 GLU A N 1.0 ? 0.28451067 0.0044668657 0.8232511 1 1053 -ATOM C CA . GLU A 0 127 . 127 GLU A CA 1.0 ? 0.6446222 0.6141458 0.15091287 1 1054 -ATOM C C . GLU A 0 127 . 127 GLU A C 1.0 ? 0.33612388 0.042631596 0.7843061 1 1055 -ATOM O O . GLU A 0 127 . 127 GLU A O 1.0 ? 0.22867699 0.99703974 0.26968738 1 1056 -ATOM C CB . GLU A 0 127 . 127 GLU A CB 1.0 ? 0.895542 0.119029514 0.33350363 1 1057 -ATOM C CG . GLU A 0 127 . 127 GLU A CG 1.0 ? 0.97272253 0.75843847 0.5037603 1 1058 -ATOM C CD . GLU A 0 127 . 127 GLU A CD 1.0 ? 0.81596637 0.9550886 0.99930865 1 1059 -ATOM O OE1 . GLU A 0 127 . 127 GLU A OE1 1.0 ? 0.4879467 0.6633147 0.7991188 1 1060 -ATOM O OE2 . GLU A 0 127 . 127 GLU A OE2 1.0 ? 0.013608676 0.058554858 0.05713468 1 1061 -ATOM O OXT . GLU A 0 127 . 127 GLU A OXT 1.0 ? 0.9214994 0.49781516 0.59980375 1 1062 -HETATM C C4 . 7Z2 C 1 1 . 1 7Z2 C C4 1.0 ? 8.641 0.198 0.651 1 1063 -HETATM C C5 . 7Z2 C 1 1 . 1 7Z2 C C5 1.0 ? 8.5 -0.873 -0.214 1 1064 -HETATM C C6 . 7Z2 C 1 1 . 1 7Z2 C C6 1.0 ? 7.035 -1.078 -0.497 1 1065 -HETATM C C7 . 7Z2 C 1 1 . 1 7Z2 C C7 1.0 ? 9.63 -1.538 -0.652 1 1066 -HETATM C C10 . 7Z2 C 1 1 . 1 7Z2 C C10 1.0 ? 9.915 0.568 1.049 1 1067 -HETATM N N12 . 7Z2 C 1 1 . 1 7Z2 C N12 1.0 ? 4.162 -0.724 -0.445 1 1068 -HETATM C C13 . 7Z2 C 1 1 . 1 7Z2 C C13 1.0 ? 2.786 -0.488 -0.516 1 1069 -HETATM C C15 . 7Z2 C 1 1 . 1 7Z2 C C15 1.0 ? 0.546 -1.33 -0.673 1 1070 -HETATM C C17 . 7Z2 C 1 1 . 1 7Z2 C C17 1.0 ? -1.393 0.229 -0.734 1 1071 -HETATM C C20 . 7Z2 C 1 1 . 1 7Z2 C C20 1.0 ? -4.695 -1.62 -0.443 1 1072 -HETATM C C21 . 7Z2 C 1 1 . 1 7Z2 C C21 1.0 ? -5.284 -2.403 0.562 1 1073 -HETATM C C22 . 7Z2 C 1 1 . 1 7Z2 C C22 1.0 ? -6.269 -3.361 0.217 1 1074 -HETATM C C24 . 7Z2 C 1 1 . 1 7Z2 C C24 1.0 ? -6.045 -2.732 -2.081 1 1075 -HETATM C C28 . 7Z2 C 1 1 . 1 7Z2 C C28 1.0 ? -5.534 -3.053 2.837 1 1076 -HETATM C C3 . 7Z2 C 1 1 . 1 7Z2 C C3 1.0 ? 7.278 0.749 0.972 1 1077 -HETATM C C9 . 7Z2 C 1 1 . 1 7Z2 C C9 1.0 ? 11.0 -0.143 0.57 1 1078 -HETATM N N8 . 7Z2 C 1 1 . 1 7Z2 C N8 1.0 ? 10.83 -1.16 -0.252 1 1079 -HETATM N N2 . 7Z2 C 1 1 . 1 7Z2 C N2 1.0 ? 6.289 -0.062 0.252 1 1080 -HETATM C C1 . 7Z2 C 1 1 . 1 7Z2 C C1 1.0 ? 4.951 0.101 0.27 1 1081 -HETATM O O11 . 7Z2 C 1 1 . 1 7Z2 C O11 1.0 ? 4.457 0.994 0.932 1 1082 -HETATM C C40 . 7Z2 C 1 1 . 1 7Z2 C C40 1.0 ? 2.305 0.816 -0.507 1 1083 -HETATM C C39 . 7Z2 C 1 1 . 1 7Z2 C C39 1.0 ? 0.95 1.053 -0.578 1 1084 -HETATM C C14 . 7Z2 C 1 1 . 1 7Z2 C C14 1.0 ? 1.903 -1.559 -0.602 1 1085 -HETATM C C16 . 7Z2 C 1 1 . 1 7Z2 C C16 1.0 ? 0.058 -0.02 -0.658 1 1086 -HETATM C C18 . 7Z2 C 1 1 . 1 7Z2 C C18 1.0 ? -2.34 -0.939 -0.82 1 1087 -HETATM N N33 . 7Z2 C 1 1 . 1 7Z2 C N33 1.0 ? -1.83 1.445 -0.72 1 1088 -HETATM N N32 . 7Z2 C 1 1 . 1 7Z2 C N32 1.0 ? -3.197 1.731 -0.757 1 1089 -HETATM C C34 . 7Z2 C 1 1 . 1 7Z2 C C34 1.0 ? -3.647 3.089 -1.073 1 1090 -HETATM C C35 . 7Z2 C 1 1 . 1 7Z2 C C35 1.0 ? -3.771 3.899 0.219 1 1091 -HETATM C C36 . 7Z2 C 1 1 . 1 7Z2 C C36 1.0 ? -4.24 5.318 -0.112 1 1092 -HETATM C C37 . 7Z2 C 1 1 . 1 7Z2 C C37 1.0 ? -4.364 6.128 1.18 1 1093 -HETATM N N38 . 7Z2 C 1 1 . 1 7Z2 C N38 1.0 ? -4.815 7.49 0.862 1 1094 -HETATM C C30 . 7Z2 C 1 1 . 1 7Z2 C C30 1.0 ? -4.105 0.769 -0.505 1 1095 -HETATM O O31 . 7Z2 C 1 1 . 1 7Z2 C O31 1.0 ? -5.29 1.005 -0.613 1 1096 -HETATM C C19 . 7Z2 C 1 1 . 1 7Z2 C C19 1.0 ? -3.642 -0.601 -0.091 1 1097 -HETATM C C29 . 7Z2 C 1 1 . 1 7Z2 C C29 1.0 ? -4.918 -2.257 1.914 1 1098 -HETATM C C27 . 7Z2 C 1 1 . 1 7Z2 C C27 1.0 ? -6.495 -3.976 2.43 1 1099 -HETATM N N26 . 7Z2 C 1 1 . 1 7Z2 C N26 1.0 ? -6.834 -4.109 1.171 1 1100 -HETATM C C23 . 7Z2 C 1 1 . 1 7Z2 C C23 1.0 ? -6.636 -3.508 -1.132 1 1101 -HETATM C C25 . 7Z2 C 1 1 . 1 7Z2 C C25 1.0 ? -5.077 -1.79 -1.739 1 1102 -# diff --git a/rf2aa/tests/data/example_from_smiles.cif b/rf2aa/tests/data/example_from_smiles.cif deleted file mode 100644 index 9aa7c80..0000000 --- a/rf2aa/tests/data/example_from_smiles.cif +++ /dev/null @@ -1,1355 +0,0 @@ -data_unknown_id -# -_entry.id unknown_id -_entry.author ncorley -_entry.date 2025-01-14 -_entry.time 16:42:23 -# -_entity_poly.entity_id 0 -_entity_poly.type polypeptide(l) -_entity_poly.nstd_linkage no -_entity_poly.nstd_monomer no -_entity_poly.pdbx_seq_one_letter_code -;SMNPPPPETSNPNKPKRQTNQLQYLLRVVLKTLWKHQFAWPFQQPVDAVKLNLPDYYKIIKTPMDMGTIKKRLENNYYWN -AQECIQDFNTMFTNCYIYNKPGDDIVLMAEALEKLFLQKINELPTEE -; -_entity_poly.pdbx_seq_one_letter_code_can -;SMNPPPPETSNPNKPKRQTNQLQYLLRVVLKTLWKHQFAWPFQQPVDAVKLNLPDYYKIIKTPMDMGTIKKRLENNYYWN -AQECIQDFNTMFTNCYIYNKPGDDIVLMAEALEKLFLQKINELPTEE -; -_entity_poly.pdbx_strand_id A -_entity_poly.pdbx_target_identifier ? -# -loop_ -_chem_comp_bond.pdbx_ordinal -_chem_comp_bond.comp_id -_chem_comp_bond.atom_id_1 -_chem_comp_bond.atom_id_2 -_chem_comp_bond.value_order -_chem_comp_bond.pdbx_aromatic_flag -_chem_comp_bond.pdbx_stereo_config -1 SER N CA SING N ? -2 SER CA C SING N ? -3 SER CA CB SING N ? -4 SER C O DOUB N ? -5 SER CB OG SING N ? -6 MET N CA SING N ? -7 MET CA C SING N ? -8 MET CA CB SING N ? -9 MET C O DOUB N ? -10 MET CB CG SING N ? -11 MET CG SD SING N ? -12 MET SD CE SING N ? -13 ASN N CA SING N ? -14 ASN CA C SING N ? -15 ASN CA CB SING N ? -16 ASN C O DOUB N ? -17 ASN CB CG SING N ? -18 ASN CG OD1 DOUB N ? -19 ASN CG ND2 SING N ? -20 PRO N CA SING N ? -21 PRO N CD SING N ? -22 PRO CA C SING N ? -23 PRO CA CB SING N ? -24 PRO C O DOUB N ? -25 PRO CB CG SING N ? -26 PRO CG CD SING N ? -27 GLU N CA SING N ? -28 GLU CA C SING N ? -29 GLU CA CB SING N ? -30 GLU C O DOUB N ? -31 GLU CB CG SING N ? -32 GLU CG CD SING N ? -33 GLU CD OE1 DOUB N ? -34 GLU CD OE2 SING N ? -35 THR N CA SING N ? -36 THR CA C SING N ? -37 THR CA CB SING N ? -38 THR C O DOUB N ? -39 THR CB OG1 SING N ? -40 THR CB CG2 SING N ? -41 LYS N CA SING N ? -42 LYS CA C SING N ? -43 LYS CA CB SING N ? -44 LYS C O DOUB N ? -45 LYS CB CG SING N ? -46 LYS CG CD SING N ? -47 LYS CD CE SING N ? -48 LYS CE NZ SING N ? -49 ARG N CA SING N ? -50 ARG CA C SING N ? -51 ARG CA CB SING N ? -52 ARG C O DOUB N ? -53 ARG CB CG SING N ? -54 ARG CG CD SING N ? -55 ARG CD NE SING N ? -56 ARG NE CZ SING N ? -57 ARG CZ NH1 SING N ? -58 ARG CZ NH2 DOUB N ? -59 GLN N CA SING N ? -60 GLN CA C SING N ? -61 GLN CA CB SING N ? -62 GLN C O DOUB N ? -63 GLN CB CG SING N ? -64 GLN CG CD SING N ? -65 GLN CD OE1 DOUB N ? -66 GLN CD NE2 SING N ? -67 LEU N CA SING N ? -68 LEU CA C SING N ? -69 LEU CA CB SING N ? -70 LEU C O DOUB N ? -71 LEU CB CG SING N ? -72 LEU CG CD1 SING N ? -73 LEU CG CD2 SING N ? -74 TYR N CA SING N ? -75 TYR CA C SING N ? -76 TYR CA CB SING N ? -77 TYR C O DOUB N ? -78 TYR CB CG SING N ? -79 TYR CG CD1 DOUB Y ? -80 TYR CG CD2 SING Y ? -81 TYR CD1 CE1 SING Y ? -82 TYR CD2 CE2 DOUB Y ? -83 TYR CE1 CZ DOUB Y ? -84 TYR CE2 CZ SING Y ? -85 TYR CZ OH SING N ? -86 VAL N CA SING N ? -87 VAL CA C SING N ? -88 VAL CA CB SING N ? -89 VAL C O DOUB N ? -90 VAL CB CG1 SING N ? -91 VAL CB CG2 SING N ? -92 TRP N CA SING N ? -93 TRP CA C SING N ? -94 TRP CA CB SING N ? -95 TRP C O DOUB N ? -96 TRP CB CG SING N ? -97 TRP CG CD1 DOUB Y ? -98 TRP CG CD2 SING Y ? -99 TRP CD1 NE1 SING Y ? -100 TRP CD2 CE2 DOUB Y ? -101 TRP CD2 CE3 SING Y ? -102 TRP NE1 CE2 SING Y ? -103 TRP CE2 CZ2 SING Y ? -104 TRP CE3 CZ3 DOUB Y ? -105 TRP CZ2 CH2 DOUB Y ? -106 TRP CZ3 CH2 SING Y ? -107 HIS N CA SING N ? -108 HIS CA C SING N ? -109 HIS CA CB SING N ? -110 HIS C O DOUB N ? -111 HIS CB CG SING N ? -112 HIS CG ND1 SING Y ? -113 HIS CG CD2 DOUB Y ? -114 HIS ND1 CE1 DOUB Y ? -115 HIS CD2 NE2 SING Y ? -116 HIS CE1 NE2 SING Y ? -117 PHE N CA SING N ? -118 PHE CA C SING N ? -119 PHE CA CB SING N ? -120 PHE C O DOUB N ? -121 PHE CB CG SING N ? -122 PHE CG CD1 DOUB Y ? -123 PHE CG CD2 SING Y ? -124 PHE CD1 CE1 SING Y ? -125 PHE CD2 CE2 DOUB Y ? -126 PHE CE1 CZ DOUB Y ? -127 PHE CE2 CZ SING Y ? -128 ALA N CA SING N ? -129 ALA CA C SING N ? -130 ALA CA CB SING N ? -131 ALA C O DOUB N ? -132 ASP N CA SING N ? -133 ASP CA C SING N ? -134 ASP CA CB SING N ? -135 ASP C O DOUB N ? -136 ASP CB CG SING N ? -137 ASP CG OD1 DOUB N ? -138 ASP CG OD2 SING N ? -139 ILE N CA SING N ? -140 ILE CA C SING N ? -141 ILE CA CB SING N ? -142 ILE C O DOUB N ? -143 ILE CB CG1 SING N ? -144 ILE CB CG2 SING N ? -145 ILE CG1 CD1 SING N ? -146 GLY N CA SING N ? -147 GLY CA C SING N ? -148 GLY C O DOUB N ? -149 CYS N CA SING N ? -150 CYS CA C SING N ? -151 CYS CA CB SING N ? -152 CYS C O DOUB N ? -153 CYS CB SG SING N ? -154 GLU C OXT SING N ? -155 L:C N0 C0 SING N ? -156 L:C C0 C1 SING N ? -157 L:C C1 C2 SING N ? -158 L:C C2 C3 SING N ? -159 L:C C3 N1 SING N ? -160 L:C N1 N2 SING N ? -161 L:C N2 C4 DOUB N ? -162 L:C C4 C5 SING N ? -163 L:C C5 C6 SING N ? -164 L:C C6 C7 SING N ? -165 L:C C7 O0 DOUB N ? -166 L:C C6 C8 SING N ? -167 L:C C8 C9 DOUB Y ? -168 L:C C9 C10 SING Y ? -169 L:C C10 C11 DOUB Y ? -170 L:C C11 C12 SING Y ? -171 L:C C12 N3 DOUB Y ? -172 L:C N3 C13 SING Y ? -173 L:C C13 C14 DOUB Y ? -174 L:C C14 C15 SING Y ? -175 L:C C15 C16 DOUB Y ? -176 L:C C4 C17 SING N ? -177 L:C C17 C18 DOUB Y ? -178 L:C C18 C19 SING Y ? -179 L:C C19 C20 DOUB Y ? -180 L:C C20 N4 SING N ? -181 L:C N4 C21 SING N ? -182 L:C C21 O1 DOUB N ? -183 L:C C21 N5 SING N ? -184 L:C N5 C22 SING N ? -185 L:C C22 C23 SING N ? -186 L:C C23 C24 DOUB Y ? -187 L:C C24 C25 SING Y ? -188 L:C C25 N6 DOUB Y ? -189 L:C N6 C26 SING Y ? -190 L:C C26 C27 DOUB Y ? -191 L:C C27 C28 SING N ? -192 L:C C20 C29 SING Y ? -193 L:C C29 C30 DOUB Y ? -194 L:C N1 C7 SING N ? -195 L:C C8 C16 SING Y ? -196 L:C C12 C16 SING Y ? -197 L:C C17 C30 SING Y ? -198 L:C N5 C28 SING N ? -199 L:C C23 C27 SING Y ? -# -loop_ -_atom_site.group_PDB -_atom_site.type_symbol -_atom_site.label_atom_id -_atom_site.label_alt_id -_atom_site.label_comp_id -_atom_site.label_asym_id -_atom_site.label_entity_id -_atom_site.label_seq_id -_atom_site.pdbx_PDB_ins_code -_atom_site.auth_seq_id -_atom_site.auth_comp_id -_atom_site.auth_asym_id -_atom_site.auth_atom_id -_atom_site.B_iso_or_equiv -_atom_site.occupancy -_atom_site.pdbx_formal_charge -_atom_site.Cartn_x -_atom_site.Cartn_y -_atom_site.Cartn_z -_atom_site.pdbx_PDB_model_num -_atom_site.id -ATOM N N . SER A 0 1 . 1 SER A N nan 1.0 ? 0.90035236 0.4302069 0.7163823 1 1 -ATOM C CA . SER A 0 1 . 1 SER A CA nan 1.0 ? 0.6971032 0.35501453 0.13359353 1 2 -ATOM C C . SER A 0 1 . 1 SER A C nan 1.0 ? 0.46320736 0.37326783 0.29685754 1 3 -ATOM O O . SER A 0 1 . 1 SER A O nan 1.0 ? 0.8700859 0.65758044 0.300621 1 4 -ATOM C CB . SER A 0 1 . 1 SER A CB nan 1.0 ? 0.5236764 0.85382247 0.8859707 1 5 -ATOM O OG . SER A 0 1 . 1 SER A OG nan 1.0 ? 0.4033607 0.19838686 0.81219655 1 6 -ATOM N N . MET A 0 2 . 2 MET A N nan 1.0 ? 0.28706038 0.40829295 0.040427737 1 7 -ATOM C CA . MET A 0 2 . 2 MET A CA nan 1.0 ? 0.4769987 0.97909445 0.54507256 1 8 -ATOM C C . MET A 0 2 . 2 MET A C nan 1.0 ? 0.60801655 0.6710983 0.58191663 1 9 -ATOM O O . MET A 0 2 . 2 MET A O nan 1.0 ? 0.7096282 0.10476555 0.44083884 1 10 -ATOM C CB . MET A 0 2 . 2 MET A CB nan 1.0 ? 0.2958249 0.8230619 0.37847888 1 11 -ATOM C CG . MET A 0 2 . 2 MET A CG nan 1.0 ? 0.053318124 0.58594424 0.4628616 1 12 -ATOM S SD . MET A 0 2 . 2 MET A SD nan 1.0 ? 0.078251876 0.80653214 0.86228144 1 13 -ATOM C CE . MET A 0 2 . 2 MET A CE nan 1.0 ? 0.815016 0.84538174 0.5613932 1 14 -ATOM N N . ASN A 0 3 . 3 ASN A N nan 1.0 ? 0.49641773 0.14597774 0.158376 1 15 -ATOM C CA . ASN A 0 3 . 3 ASN A CA nan 1.0 ? 0.96797264 0.9142554 0.1237758 1 16 -ATOM C C . ASN A 0 3 . 3 ASN A C nan 1.0 ? 0.38078973 0.8542158 0.39684743 1 17 -ATOM O O . ASN A 0 3 . 3 ASN A O nan 1.0 ? 0.95467925 0.3920897 0.23853333 1 18 -ATOM C CB . ASN A 0 3 . 3 ASN A CB nan 1.0 ? 0.24331915 0.69876623 0.116336755 1 19 -ATOM C CG . ASN A 0 3 . 3 ASN A CG nan 1.0 ? 0.010204786 0.69237316 0.005869554 1 20 -ATOM O OD1 . ASN A 0 3 . 3 ASN A OD1 nan 1.0 ? 0.41638935 0.0026478663 0.22029379 1 21 -ATOM N ND2 . ASN A 0 3 . 3 ASN A ND2 nan 1.0 ? 0.25485346 0.302807 0.90813696 1 22 -ATOM N N . PRO A 0 4 . 4 PRO A N nan 1.0 ? 0.91512895 0.287526 0.8941395 1 23 -ATOM C CA . PRO A 0 4 . 4 PRO A CA nan 1.0 ? 0.39363343 0.49111447 0.12512057 1 24 -ATOM C C . PRO A 0 4 . 4 PRO A C nan 1.0 ? 0.268333 0.3885615 0.7857272 1 25 -ATOM O O . PRO A 0 4 . 4 PRO A O nan 1.0 ? 0.52853566 0.6141442 0.6740617 1 26 -ATOM C CB . PRO A 0 4 . 4 PRO A CB nan 1.0 ? 0.47291723 0.8100004 0.74794513 1 27 -ATOM C CG . PRO A 0 4 . 4 PRO A CG nan 1.0 ? 0.38139415 0.119311586 0.8247927 1 28 -ATOM C CD . PRO A 0 4 . 4 PRO A CD nan 1.0 ? 0.66976887 0.36676714 0.19552118 1 29 -ATOM N N . PRO A 0 5 . 5 PRO A N nan 1.0 ? 0.22908053 0.065895535 0.6830425 1 30 -ATOM C CA . PRO A 0 5 . 5 PRO A CA nan 1.0 ? 0.8689927 0.4946862 0.41907245 1 31 -ATOM C C . PRO A 0 5 . 5 PRO A C nan 1.0 ? 0.9279543 0.94879967 0.22152786 1 32 -ATOM O O . PRO A 0 5 . 5 PRO A O nan 1.0 ? 0.69537675 0.3224398 0.96000046 1 33 -ATOM C CB . PRO A 0 5 . 5 PRO A CB nan 1.0 ? 0.1696772 0.7088822 0.21519484 1 34 -ATOM C CG . PRO A 0 5 . 5 PRO A CG nan 1.0 ? 0.3533213 0.0943175 0.66805255 1 35 -ATOM C CD . PRO A 0 5 . 5 PRO A CD nan 1.0 ? 0.62336755 0.9581534 0.6568798 1 36 -ATOM N N . PRO A 0 6 . 6 PRO A N nan 1.0 ? 0.06940596 0.70307165 0.6662664 1 37 -ATOM C CA . PRO A 0 6 . 6 PRO A CA nan 1.0 ? 0.6892867 0.6168599 0.14308092 1 38 -ATOM C C . PRO A 0 6 . 6 PRO A C nan 1.0 ? 0.09490764 0.38419014 0.7705685 1 39 -ATOM O O . PRO A 0 6 . 6 PRO A O nan 1.0 ? 0.14080518 0.9563648 0.5346685 1 40 -ATOM C CB . PRO A 0 6 . 6 PRO A CB nan 1.0 ? 0.9663931 0.486203 0.013866949 1 41 -ATOM C CG . PRO A 0 6 . 6 PRO A CG nan 1.0 ? 0.96804774 0.11245205 0.31621242 1 42 -ATOM C CD . PRO A 0 6 . 6 PRO A CD nan 1.0 ? 0.7779321 0.09581208 0.70542866 1 43 -ATOM N N . PRO A 0 7 . 7 PRO A N nan 1.0 ? 0.8888649 0.50545573 0.6307841 1 44 -ATOM C CA . PRO A 0 7 . 7 PRO A CA nan 1.0 ? 0.50297016 0.6778509 0.21354541 1 45 -ATOM C C . PRO A 0 7 . 7 PRO A C nan 1.0 ? 0.045569662 0.0841009 0.33914855 1 46 -ATOM O O . PRO A 0 7 . 7 PRO A O nan 1.0 ? 0.013360759 0.68114823 0.7088449 1 47 -ATOM C CB . PRO A 0 7 . 7 PRO A CB nan 1.0 ? 0.7176777 0.38699394 0.6850554 1 48 -ATOM C CG . PRO A 0 7 . 7 PRO A CG nan 1.0 ? 0.32134277 0.17223547 0.67385787 1 49 -ATOM C CD . PRO A 0 7 . 7 PRO A CD nan 1.0 ? 0.39747703 0.7812562 0.9593548 1 50 -ATOM N N . GLU A 0 8 . 8 GLU A N nan 1.0 ? 0.8784727 0.37323436 0.66725504 1 51 -ATOM C CA . GLU A 0 8 . 8 GLU A CA nan 1.0 ? 0.99158704 0.16245256 0.7092837 1 52 -ATOM C C . GLU A 0 8 . 8 GLU A C nan 1.0 ? 0.8302248 0.4291895 0.892487 1 53 -ATOM O O . GLU A 0 8 . 8 GLU A O nan 1.0 ? 0.48904023 0.26367566 0.9557627 1 54 -ATOM C CB . GLU A 0 8 . 8 GLU A CB nan 1.0 ? 0.9319665 0.024227733 0.84850943 1 55 -ATOM C CG . GLU A 0 8 . 8 GLU A CG nan 1.0 ? 0.5621614 0.9699926 0.41313997 1 56 -ATOM C CD . GLU A 0 8 . 8 GLU A CD nan 1.0 ? 0.67478555 0.67540884 0.118769825 1 57 -ATOM O OE1 . GLU A 0 8 . 8 GLU A OE1 nan 1.0 ? 0.36325958 0.39679015 0.14203572 1 58 -ATOM O OE2 . GLU A 0 8 . 8 GLU A OE2 nan 1.0 ? 0.8833815 0.20878746 0.22903591 1 59 -ATOM N N . THR A 0 9 . 9 THR A N nan 1.0 ? 0.08051662 0.5613377 0.010219526 1 60 -ATOM C CA . THR A 0 9 . 9 THR A CA nan 1.0 ? 0.5979657 0.13195919 0.8298824 1 61 -ATOM C C . THR A 0 9 . 9 THR A C nan 1.0 ? 0.515398 0.60558206 0.3437977 1 62 -ATOM O O . THR A 0 9 . 9 THR A O nan 1.0 ? 0.91219 0.9507664 0.3463389 1 63 -ATOM C CB . THR A 0 9 . 9 THR A CB nan 1.0 ? 0.8449393 0.114281625 0.23551576 1 64 -ATOM O OG1 . THR A 0 9 . 9 THR A OG1 nan 1.0 ? 0.489666 0.48146048 0.38106778 1 65 -ATOM C CG2 . THR A 0 9 . 9 THR A CG2 nan 1.0 ? 0.79823315 0.9258048 0.10134563 1 66 -ATOM N N . SER A 0 10 . 10 SER A N nan 1.0 ? 0.6108018 0.80989087 0.25512108 1 67 -ATOM C CA . SER A 0 10 . 10 SER A CA nan 1.0 ? 0.75937545 0.36268067 0.70075774 1 68 -ATOM C C . SER A 0 10 . 10 SER A C nan 1.0 ? 0.75000536 0.25641057 0.9289053 1 69 -ATOM O O . SER A 0 10 . 10 SER A O nan 1.0 ? 0.0771761 0.9581633 0.0013905136 1 70 -ATOM C CB . SER A 0 10 . 10 SER A CB nan 1.0 ? 0.20673573 0.83680725 0.76976836 1 71 -ATOM O OG . SER A 0 10 . 10 SER A OG nan 1.0 ? 0.0034697778 0.77684075 0.2555122 1 72 -ATOM N N . ASN A 0 11 . 11 ASN A N nan 1.0 ? 0.9693551 0.16306387 0.26883358 1 73 -ATOM C CA . ASN A 0 11 . 11 ASN A CA nan 1.0 ? 0.7426928 0.98909396 0.5688915 1 74 -ATOM C C . ASN A 0 11 . 11 ASN A C nan 1.0 ? 0.04003222 0.17947668 0.15506603 1 75 -ATOM O O . ASN A 0 11 . 11 ASN A O nan 1.0 ? 0.6014169 0.03092278 0.8408279 1 76 -ATOM C CB . ASN A 0 11 . 11 ASN A CB nan 1.0 ? 0.437572 0.8683382 0.6482593 1 77 -ATOM C CG . ASN A 0 11 . 11 ASN A CG nan 1.0 ? 0.22575438 0.81169844 0.7083207 1 78 -ATOM O OD1 . ASN A 0 11 . 11 ASN A OD1 nan 1.0 ? 0.14037263 0.050356068 0.7218541 1 79 -ATOM N ND2 . ASN A 0 11 . 11 ASN A ND2 nan 1.0 ? 0.40493825 0.11100167 0.72680026 1 80 -ATOM N N . PRO A 0 12 . 12 PRO A N nan 1.0 ? 0.17276008 0.36888927 0.50235534 1 81 -ATOM C CA . PRO A 0 12 . 12 PRO A CA nan 1.0 ? 0.31055555 0.00036118337 0.701897 1 82 -ATOM C C . PRO A 0 12 . 12 PRO A C nan 1.0 ? 0.90319073 0.5498373 0.50094724 1 83 -ATOM O O . PRO A 0 12 . 12 PRO A O nan 1.0 ? 0.21151268 0.53985125 0.2528853 1 84 -ATOM C CB . PRO A 0 12 . 12 PRO A CB nan 1.0 ? 0.35605824 0.4249985 0.5286867 1 85 -ATOM C CG . PRO A 0 12 . 12 PRO A CG nan 1.0 ? 0.9982561 0.15831429 0.47581136 1 86 -ATOM C CD . PRO A 0 12 . 12 PRO A CD nan 1.0 ? 0.588293 0.9344325 0.38028795 1 87 -ATOM N N . ASN A 0 13 . 13 ASN A N nan 1.0 ? 0.95318145 0.8033122 0.47886482 1 88 -ATOM C CA . ASN A 0 13 . 13 ASN A CA nan 1.0 ? 0.7627138 0.4919258 0.040642902 1 89 -ATOM C C . ASN A 0 13 . 13 ASN A C nan 1.0 ? 0.3954067 0.19656084 0.9009545 1 90 -ATOM O O . ASN A 0 13 . 13 ASN A O nan 1.0 ? 0.35979107 0.6426788 0.40092516 1 91 -ATOM C CB . ASN A 0 13 . 13 ASN A CB nan 1.0 ? 0.7060573 0.58903086 0.19027597 1 92 -ATOM C CG . ASN A 0 13 . 13 ASN A CG nan 1.0 ? 0.38162017 0.2820622 0.5548114 1 93 -ATOM O OD1 . ASN A 0 13 . 13 ASN A OD1 nan 1.0 ? 0.86649996 0.19685824 0.52346003 1 94 -ATOM N ND2 . ASN A 0 13 . 13 ASN A ND2 nan 1.0 ? 0.17100447 0.88242465 0.8431559 1 95 -ATOM N N . LYS A 0 14 . 14 LYS A N nan 1.0 ? 0.6764837 0.67932016 0.06313559 1 96 -ATOM C CA . LYS A 0 14 . 14 LYS A CA nan 1.0 ? 0.70319027 0.5521108 0.9721134 1 97 -ATOM C C . LYS A 0 14 . 14 LYS A C nan 1.0 ? 0.42003372 0.62619114 0.1446111 1 98 -ATOM O O . LYS A 0 14 . 14 LYS A O nan 1.0 ? 0.8068072 0.66767824 0.93841803 1 99 -ATOM C CB . LYS A 0 14 . 14 LYS A CB nan 1.0 ? 0.018064324 0.62793523 0.17259964 1 100 -ATOM C CG . LYS A 0 14 . 14 LYS A CG nan 1.0 ? 0.1498836 0.3027894 0.73918694 1 101 -ATOM C CD . LYS A 0 14 . 14 LYS A CD nan 1.0 ? 0.44734386 0.74382657 0.7288576 1 102 -ATOM C CE . LYS A 0 14 . 14 LYS A CE nan 1.0 ? 0.2049204 0.46083537 0.33546332 1 103 -ATOM N NZ . LYS A 0 14 . 14 LYS A NZ nan 1.0 +1 0.010748239 0.61668515 0.058343597 1 104 -ATOM N N . PRO A 0 15 . 15 PRO A N nan 1.0 ? 0.69405484 0.8732382 0.9183324 1 105 -ATOM C CA . PRO A 0 15 . 15 PRO A CA nan 1.0 ? 0.7586835 0.019229716 0.22429763 1 106 -ATOM C C . PRO A 0 15 . 15 PRO A C nan 1.0 ? 0.22850773 0.8490979 0.18863726 1 107 -ATOM O O . PRO A 0 15 . 15 PRO A O nan 1.0 ? 0.9602351 0.44892907 0.591671 1 108 -ATOM C CB . PRO A 0 15 . 15 PRO A CB nan 1.0 ? 0.09277682 0.37374014 0.49707144 1 109 -ATOM C CG . PRO A 0 15 . 15 PRO A CG nan 1.0 ? 0.585867 0.9953224 0.52363473 1 110 -ATOM C CD . PRO A 0 15 . 15 PRO A CD nan 1.0 ? 0.51339597 0.55335927 0.61053294 1 111 -ATOM N N . LYS A 0 16 . 16 LYS A N nan 1.0 ? 0.14868423 0.6838684 0.19978693 1 112 -ATOM C CA . LYS A 0 16 . 16 LYS A CA nan 1.0 ? 0.90546757 0.032624453 0.5796942 1 113 -ATOM C C . LYS A 0 16 . 16 LYS A C nan 1.0 ? 0.20046431 0.94034684 0.0069801062 1 114 -ATOM O O . LYS A 0 16 . 16 LYS A O nan 1.0 ? 0.3726253 0.5792765 0.68403995 1 115 -ATOM C CB . LYS A 0 16 . 16 LYS A CB nan 1.0 ? 0.18158911 0.3993597 0.25266123 1 116 -ATOM C CG . LYS A 0 16 . 16 LYS A CG nan 1.0 ? 0.823902 0.14472358 0.13860604 1 117 -ATOM C CD . LYS A 0 16 . 16 LYS A CD nan 1.0 ? 0.20324355 0.20908105 0.76593286 1 118 -ATOM C CE . LYS A 0 16 . 16 LYS A CE nan 1.0 ? 0.5082126 0.8732992 0.9529036 1 119 -ATOM N NZ . LYS A 0 16 . 16 LYS A NZ nan 1.0 +1 0.9408355 0.7506219 0.15792698 1 120 -ATOM N N . ARG A 0 17 . 17 ARG A N nan 1.0 ? 0.051618475 0.06779108 0.054182995 1 121 -ATOM C CA . ARG A 0 17 . 17 ARG A CA nan 1.0 ? 0.49150473 0.55707234 0.5592509 1 122 -ATOM C C . ARG A 0 17 . 17 ARG A C nan 1.0 ? 0.06495733 0.34501436 0.030559111 1 123 -ATOM O O . ARG A 0 17 . 17 ARG A O nan 1.0 ? 0.6940389 0.22097519 0.6517498 1 124 -ATOM C CB . ARG A 0 17 . 17 ARG A CB nan 1.0 ? 0.84208053 0.24519812 0.3810185 1 125 -ATOM C CG . ARG A 0 17 . 17 ARG A CG nan 1.0 ? 0.98814124 0.30237678 0.76980627 1 126 -ATOM C CD . ARG A 0 17 . 17 ARG A CD nan 1.0 ? 0.028515778 0.8809237 0.24172458 1 127 -ATOM N NE . ARG A 0 17 . 17 ARG A NE nan 1.0 ? 0.38802463 0.83473814 0.2936744 1 128 -ATOM C CZ . ARG A 0 17 . 17 ARG A CZ nan 1.0 ? 0.95385236 0.07344026 0.2898932 1 129 -ATOM N NH1 . ARG A 0 17 . 17 ARG A NH1 nan 1.0 ? 0.04439104 0.36317503 0.637072 1 130 -ATOM N NH2 . ARG A 0 17 . 17 ARG A NH2 nan 1.0 +1 0.40992975 0.9745337 0.74775535 1 131 -ATOM N N . GLN A 0 18 . 18 GLN A N nan 1.0 ? 0.7668671 0.34142557 0.5990722 1 132 -ATOM C CA . GLN A 0 18 . 18 GLN A CA nan 1.0 ? 0.37403807 0.9679144 0.2390424 1 133 -ATOM C C . GLN A 0 18 . 18 GLN A C nan 1.0 ? 0.6468835 0.63122725 0.8008938 1 134 -ATOM O O . GLN A 0 18 . 18 GLN A O nan 1.0 ? 0.8578063 0.22715269 0.85064673 1 135 -ATOM C CB . GLN A 0 18 . 18 GLN A CB nan 1.0 ? 0.56987256 0.73290986 0.2618318 1 136 -ATOM C CG . GLN A 0 18 . 18 GLN A CG nan 1.0 ? 0.98244125 0.27961993 0.7562413 1 137 -ATOM C CD . GLN A 0 18 . 18 GLN A CD nan 1.0 ? 0.08739643 0.64026475 0.5154277 1 138 -ATOM O OE1 . GLN A 0 18 . 18 GLN A OE1 nan 1.0 ? 0.6237632 0.064231336 0.08102703 1 139 -ATOM N NE2 . GLN A 0 18 . 18 GLN A NE2 nan 1.0 ? 0.0962329 0.33183298 0.24200326 1 140 -ATOM N N . THR A 0 19 . 19 THR A N nan 1.0 ? 0.18497635 0.15344463 0.16953453 1 141 -ATOM C CA . THR A 0 19 . 19 THR A CA nan 1.0 ? 0.84855753 0.1775084 0.7617898 1 142 -ATOM C C . THR A 0 19 . 19 THR A C nan 1.0 ? 0.109540716 0.7965828 0.98708344 1 143 -ATOM O O . THR A 0 19 . 19 THR A O nan 1.0 ? 0.1251938 0.14257674 0.7711215 1 144 -ATOM C CB . THR A 0 19 . 19 THR A CB nan 1.0 ? 0.045085754 0.5803357 0.9140524 1 145 -ATOM O OG1 . THR A 0 19 . 19 THR A OG1 nan 1.0 ? 0.8739174 0.44894084 0.4828067 1 146 -ATOM C CG2 . THR A 0 19 . 19 THR A CG2 nan 1.0 ? 0.9601982 0.41463733 0.28909835 1 147 -ATOM N N . ASN A 0 20 . 20 ASN A N nan 1.0 ? 0.91444343 0.062103104 0.8755301 1 148 -ATOM C CA . ASN A 0 20 . 20 ASN A CA nan 1.0 ? 0.75744176 0.8928151 0.03396393 1 149 -ATOM C C . ASN A 0 20 . 20 ASN A C nan 1.0 ? 0.453479 0.36528784 0.7709141 1 150 -ATOM O O . ASN A 0 20 . 20 ASN A O nan 1.0 ? 0.06148596 0.7451845 0.41686273 1 151 -ATOM C CB . ASN A 0 20 . 20 ASN A CB nan 1.0 ? 0.35333404 0.85169876 0.3228518 1 152 -ATOM C CG . ASN A 0 20 . 20 ASN A CG nan 1.0 ? 0.17199555 0.17680916 0.8367116 1 153 -ATOM O OD1 . ASN A 0 20 . 20 ASN A OD1 nan 1.0 ? 0.5105061 0.23463511 0.08926294 1 154 -ATOM N ND2 . ASN A 0 20 . 20 ASN A ND2 nan 1.0 ? 0.8966319 0.088738576 0.53966016 1 155 -ATOM N N . GLN A 0 21 . 21 GLN A N nan 1.0 ? 0.6164258 0.55596733 0.040161308 1 156 -ATOM C CA . GLN A 0 21 . 21 GLN A CA nan 1.0 ? 0.94914335 0.98999816 0.82357913 1 157 -ATOM C C . GLN A 0 21 . 21 GLN A C nan 1.0 ? 0.1077137 0.4913604 0.4874935 1 158 -ATOM O O . GLN A 0 21 . 21 GLN A O nan 1.0 ? 0.37967712 0.40263617 0.9911403 1 159 -ATOM C CB . GLN A 0 21 . 21 GLN A CB nan 1.0 ? 0.2914083 0.60137016 0.12327882 1 160 -ATOM C CG . GLN A 0 21 . 21 GLN A CG nan 1.0 ? 0.15209414 0.44534013 0.109117456 1 161 -ATOM C CD . GLN A 0 21 . 21 GLN A CD nan 1.0 ? 0.82027835 0.33013004 0.14108849 1 162 -ATOM O OE1 . GLN A 0 21 . 21 GLN A OE1 nan 1.0 ? 0.20471309 0.62123024 0.14886476 1 163 -ATOM N NE2 . GLN A 0 21 . 21 GLN A NE2 nan 1.0 ? 0.60116357 0.30022156 0.8619245 1 164 -ATOM N N . LEU A 0 22 . 22 LEU A N nan 1.0 ? 0.7276963 0.86261827 0.21215138 1 165 -ATOM C CA . LEU A 0 22 . 22 LEU A CA nan 1.0 ? 0.8899341 0.25833836 0.50494486 1 166 -ATOM C C . LEU A 0 22 . 22 LEU A C nan 1.0 ? 0.6003779 0.87869394 0.18324283 1 167 -ATOM O O . LEU A 0 22 . 22 LEU A O nan 1.0 ? 0.020969775 0.49746773 0.43295017 1 168 -ATOM C CB . LEU A 0 22 . 22 LEU A CB nan 1.0 ? 0.07603758 0.33706933 0.10398286 1 169 -ATOM C CG . LEU A 0 22 . 22 LEU A CG nan 1.0 ? 0.26069322 0.9867885 0.27389273 1 170 -ATOM C CD1 . LEU A 0 22 . 22 LEU A CD1 nan 1.0 ? 0.8384153 0.6177051 0.67678237 1 171 -ATOM C CD2 . LEU A 0 22 . 22 LEU A CD2 nan 1.0 ? 0.6307908 0.3815625 0.16283834 1 172 -ATOM N N . GLN A 0 23 . 23 GLN A N nan 1.0 ? 0.81063503 0.88097453 0.35505345 1 173 -ATOM C CA . GLN A 0 23 . 23 GLN A CA nan 1.0 ? 0.7802046 0.60163444 0.4403054 1 174 -ATOM C C . GLN A 0 23 . 23 GLN A C nan 1.0 ? 0.9223797 0.9393033 0.032366715 1 175 -ATOM O O . GLN A 0 23 . 23 GLN A O nan 1.0 ? 0.7410103 0.19270283 0.40853631 1 176 -ATOM C CB . GLN A 0 23 . 23 GLN A CB nan 1.0 ? 0.35254046 0.7669014 0.6069517 1 177 -ATOM C CG . GLN A 0 23 . 23 GLN A CG nan 1.0 ? 0.6319605 0.05625476 0.21947423 1 178 -ATOM C CD . GLN A 0 23 . 23 GLN A CD nan 1.0 ? 0.6240955 0.29461217 0.5008701 1 179 -ATOM O OE1 . GLN A 0 23 . 23 GLN A OE1 nan 1.0 ? 0.47627422 0.07893602 0.75093013 1 180 -ATOM N NE2 . GLN A 0 23 . 23 GLN A NE2 nan 1.0 ? 0.9538182 0.721234 0.58163464 1 181 -ATOM N N . TYR A 0 24 . 24 TYR A N nan 1.0 ? 0.5323666 0.7047955 0.12042299 1 182 -ATOM C CA . TYR A 0 24 . 24 TYR A CA nan 1.0 ? 0.99727935 0.31815055 0.50365186 1 183 -ATOM C C . TYR A 0 24 . 24 TYR A C nan 1.0 ? 0.7725593 0.5311939 0.2798514 1 184 -ATOM O O . TYR A 0 24 . 24 TYR A O nan 1.0 ? 0.05098578 0.37574342 0.15204258 1 185 -ATOM C CB . TYR A 0 24 . 24 TYR A CB nan 1.0 ? 0.6372731 0.6222522 0.16959472 1 186 -ATOM C CG . TYR A 0 24 . 24 TYR A CG nan 1.0 ? 0.5713278 0.007839346 0.6788067 1 187 -ATOM C CD1 . TYR A 0 24 . 24 TYR A CD1 nan 1.0 ? 0.06743787 0.5260393 0.21557927 1 188 -ATOM C CD2 . TYR A 0 24 . 24 TYR A CD2 nan 1.0 ? 0.61750346 0.32607672 0.23106767 1 189 -ATOM C CE1 . TYR A 0 24 . 24 TYR A CE1 nan 1.0 ? 0.38442186 0.8464428 0.3862134 1 190 -ATOM C CE2 . TYR A 0 24 . 24 TYR A CE2 nan 1.0 ? 0.637548 0.36029372 0.31038362 1 191 -ATOM C CZ . TYR A 0 24 . 24 TYR A CZ nan 1.0 ? 0.48225614 0.83877677 0.6301264 1 192 -ATOM O OH . TYR A 0 24 . 24 TYR A OH nan 1.0 ? 0.68680847 0.8662383 0.20080166 1 193 -ATOM N N . LEU A 0 25 . 25 LEU A N nan 1.0 ? 0.06919777 0.45870635 0.45743418 1 194 -ATOM C CA . LEU A 0 25 . 25 LEU A CA nan 1.0 ? 0.27446425 0.04163653 0.43135223 1 195 -ATOM C C . LEU A 0 25 . 25 LEU A C nan 1.0 ? 0.9998208 0.5208671 0.2574531 1 196 -ATOM O O . LEU A 0 25 . 25 LEU A O nan 1.0 ? 0.61882067 0.27298036 0.33795428 1 197 -ATOM C CB . LEU A 0 25 . 25 LEU A CB nan 1.0 ? 0.6722192 0.2877531 0.7240264 1 198 -ATOM C CG . LEU A 0 25 . 25 LEU A CG nan 1.0 ? 0.558408 0.16667183 0.621761 1 199 -ATOM C CD1 . LEU A 0 25 . 25 LEU A CD1 nan 1.0 ? 0.57973665 0.516224 0.6807709 1 200 -ATOM C CD2 . LEU A 0 25 . 25 LEU A CD2 nan 1.0 ? 0.9790965 0.24147661 0.9578191 1 201 -ATOM N N . LEU A 0 26 . 26 LEU A N nan 1.0 ? 0.30791745 0.55762917 0.3939135 1 202 -ATOM C CA . LEU A 0 26 . 26 LEU A CA nan 1.0 ? 0.10152806 0.31331927 0.375573 1 203 -ATOM C C . LEU A 0 26 . 26 LEU A C nan 1.0 ? 0.93277603 0.7202841 0.13637616 1 204 -ATOM O O . LEU A 0 26 . 26 LEU A O nan 1.0 ? 0.5105969 0.18611977 0.033900555 1 205 -ATOM C CB . LEU A 0 26 . 26 LEU A CB nan 1.0 ? 0.026774606 0.64739597 0.24065845 1 206 -ATOM C CG . LEU A 0 26 . 26 LEU A CG nan 1.0 ? 0.23032682 0.02812986 0.30515018 1 207 -ATOM C CD1 . LEU A 0 26 . 26 LEU A CD1 nan 1.0 ? 0.4859867 0.32277843 0.17225847 1 208 -ATOM C CD2 . LEU A 0 26 . 26 LEU A CD2 nan 1.0 ? 0.44379818 0.90514606 0.22002204 1 209 -ATOM N N . ARG A 0 27 . 27 ARG A N nan 1.0 ? 0.7438323 0.7500762 0.34533674 1 210 -ATOM C CA . ARG A 0 27 . 27 ARG A CA nan 1.0 ? 0.45649573 0.77273667 0.8219699 1 211 -ATOM C C . ARG A 0 27 . 27 ARG A C nan 1.0 ? 0.79016703 0.82866836 0.90648985 1 212 -ATOM O O . ARG A 0 27 . 27 ARG A O nan 1.0 ? 0.6563385 0.250219 0.90756005 1 213 -ATOM C CB . ARG A 0 27 . 27 ARG A CB nan 1.0 ? 0.29119152 0.37778053 0.8286057 1 214 -ATOM C CG . ARG A 0 27 . 27 ARG A CG nan 1.0 ? 0.86176246 0.5119898 0.3751338 1 215 -ATOM C CD . ARG A 0 27 . 27 ARG A CD nan 1.0 ? 0.6688924 0.4109667 0.96728957 1 216 -ATOM N NE . ARG A 0 27 . 27 ARG A NE nan 1.0 ? 0.43036973 0.5601071 0.96071047 1 217 -ATOM C CZ . ARG A 0 27 . 27 ARG A CZ nan 1.0 ? 0.7164874 0.5857849 0.18615055 1 218 -ATOM N NH1 . ARG A 0 27 . 27 ARG A NH1 nan 1.0 ? 0.056883063 0.32253456 0.6426514 1 219 -ATOM N NH2 . ARG A 0 27 . 27 ARG A NH2 nan 1.0 +1 0.78382903 0.9987141 0.16186093 1 220 -ATOM N N . VAL A 0 28 . 28 VAL A N nan 1.0 ? 0.9894113 0.36063302 0.1115817 1 221 -ATOM C CA . VAL A 0 28 . 28 VAL A CA nan 1.0 ? 0.9941264 0.88631177 0.49464726 1 222 -ATOM C C . VAL A 0 28 . 28 VAL A C nan 1.0 ? 0.78648293 0.7953945 0.6730102 1 223 -ATOM O O . VAL A 0 28 . 28 VAL A O nan 1.0 ? 0.3218896 0.56366515 0.75907063 1 224 -ATOM C CB . VAL A 0 28 . 28 VAL A CB nan 1.0 ? 0.06366744 0.458759 0.11121865 1 225 -ATOM C CG1 . VAL A 0 28 . 28 VAL A CG1 nan 1.0 ? 0.44534177 0.61597526 0.9269887 1 226 -ATOM C CG2 . VAL A 0 28 . 28 VAL A CG2 nan 1.0 ? 0.9822882 0.17379183 0.95090616 1 227 -ATOM N N . VAL A 0 29 . 29 VAL A N nan 1.0 ? 0.42619503 0.5073507 0.3113885 1 228 -ATOM C CA . VAL A 0 29 . 29 VAL A CA nan 1.0 ? 0.1937239 0.7381927 0.03432685 1 229 -ATOM C C . VAL A 0 29 . 29 VAL A C nan 1.0 ? 0.052714914 0.41938156 0.4633328 1 230 -ATOM O O . VAL A 0 29 . 29 VAL A O nan 1.0 ? 0.96566 0.7206947 0.12343166 1 231 -ATOM C CB . VAL A 0 29 . 29 VAL A CB nan 1.0 ? 0.35404414 0.0046147024 0.5386975 1 232 -ATOM C CG1 . VAL A 0 29 . 29 VAL A CG1 nan 1.0 ? 0.8163714 0.7930232 0.513808 1 233 -ATOM C CG2 . VAL A 0 29 . 29 VAL A CG2 nan 1.0 ? 0.24574259 0.90241444 0.84414923 1 234 -ATOM N N . LEU A 0 30 . 30 LEU A N nan 1.0 ? 0.12356062 0.7983444 0.6356066 1 235 -ATOM C CA . LEU A 0 30 . 30 LEU A CA nan 1.0 ? 0.90912586 0.6240865 0.83590287 1 236 -ATOM C C . LEU A 0 30 . 30 LEU A C nan 1.0 ? 0.79468036 0.96903 0.640866 1 237 -ATOM O O . LEU A 0 30 . 30 LEU A O nan 1.0 ? 0.61016804 0.13588542 0.18967526 1 238 -ATOM C CB . LEU A 0 30 . 30 LEU A CB nan 1.0 ? 0.7864123 0.95381165 0.021021262 1 239 -ATOM C CG . LEU A 0 30 . 30 LEU A CG nan 1.0 ? 0.5954329 0.5900851 0.745545 1 240 -ATOM C CD1 . LEU A 0 30 . 30 LEU A CD1 nan 1.0 ? 0.4645145 0.77331305 0.68092126 1 241 -ATOM C CD2 . LEU A 0 30 . 30 LEU A CD2 nan 1.0 ? 0.29891866 0.46852043 0.07396546 1 242 -ATOM N N . LYS A 0 31 . 31 LYS A N nan 1.0 ? 0.675535 0.78404546 0.19273004 1 243 -ATOM C CA . LYS A 0 31 . 31 LYS A CA nan 1.0 ? 0.86144805 0.6246702 0.6987265 1 244 -ATOM C C . LYS A 0 31 . 31 LYS A C nan 1.0 ? 0.8220737 0.89157474 0.13040194 1 245 -ATOM O O . LYS A 0 31 . 31 LYS A O nan 1.0 ? 0.37901548 0.4188023 0.28159252 1 246 -ATOM C CB . LYS A 0 31 . 31 LYS A CB nan 1.0 ? 0.94203144 0.18890467 0.47162423 1 247 -ATOM C CG . LYS A 0 31 . 31 LYS A CG nan 1.0 ? 0.49718964 0.015537601 0.8005585 1 248 -ATOM C CD . LYS A 0 31 . 31 LYS A CD nan 1.0 ? 0.36539948 0.27279532 0.79279876 1 249 -ATOM C CE . LYS A 0 31 . 31 LYS A CE nan 1.0 ? 0.4840803 0.8934058 0.39333895 1 250 -ATOM N NZ . LYS A 0 31 . 31 LYS A NZ nan 1.0 +1 0.97958595 0.12056521 0.073026225 1 251 -ATOM N N . THR A 0 32 . 32 THR A N nan 1.0 ? 0.6887542 0.5257946 0.10800014 1 252 -ATOM C CA . THR A 0 32 . 32 THR A CA nan 1.0 ? 0.19943047 0.09191372 0.10346767 1 253 -ATOM C C . THR A 0 32 . 32 THR A C nan 1.0 ? 0.7136943 0.18273458 0.43982187 1 254 -ATOM O O . THR A 0 32 . 32 THR A O nan 1.0 ? 0.15661035 0.59953225 0.43391758 1 255 -ATOM C CB . THR A 0 32 . 32 THR A CB nan 1.0 ? 0.30366868 0.9277157 0.10530701 1 256 -ATOM O OG1 . THR A 0 32 . 32 THR A OG1 nan 1.0 ? 0.048217803 0.8846466 0.98422575 1 257 -ATOM C CG2 . THR A 0 32 . 32 THR A CG2 nan 1.0 ? 0.5250263 0.9088623 0.6854337 1 258 -ATOM N N . LEU A 0 33 . 33 LEU A N nan 1.0 ? 0.87117237 0.7787405 0.28105366 1 259 -ATOM C CA . LEU A 0 33 . 33 LEU A CA nan 1.0 ? 0.7773442 0.860399 0.9016287 1 260 -ATOM C C . LEU A 0 33 . 33 LEU A C nan 1.0 ? 0.7754645 0.19462685 0.7746948 1 261 -ATOM O O . LEU A 0 33 . 33 LEU A O nan 1.0 ? 0.81107986 0.90863913 0.9612739 1 262 -ATOM C CB . LEU A 0 33 . 33 LEU A CB nan 1.0 ? 0.3114657 0.836424 0.44931 1 263 -ATOM C CG . LEU A 0 33 . 33 LEU A CG nan 1.0 ? 0.7737063 0.05012918 0.5658992 1 264 -ATOM C CD1 . LEU A 0 33 . 33 LEU A CD1 nan 1.0 ? 0.37292695 0.41186354 0.3646676 1 265 -ATOM C CD2 . LEU A 0 33 . 33 LEU A CD2 nan 1.0 ? 0.12074422 0.19691691 0.80206746 1 266 -ATOM N N . TRP A 0 34 . 34 TRP A N nan 1.0 ? 0.5621799 0.55828804 0.99245274 1 267 -ATOM C CA . TRP A 0 34 . 34 TRP A CA nan 1.0 ? 0.7166781 0.3586998 0.8249309 1 268 -ATOM C C . TRP A 0 34 . 34 TRP A C nan 1.0 ? 0.29615387 0.39280987 0.5227059 1 269 -ATOM O O . TRP A 0 34 . 34 TRP A O nan 1.0 ? 0.9349086 0.4428162 0.9894545 1 270 -ATOM C CB . TRP A 0 34 . 34 TRP A CB nan 1.0 ? 0.7740034 0.8439142 0.14427593 1 271 -ATOM C CG . TRP A 0 34 . 34 TRP A CG nan 1.0 ? 0.09167532 0.0425917 0.17605297 1 272 -ATOM C CD1 . TRP A 0 34 . 34 TRP A CD1 nan 1.0 ? 0.470309 0.56175923 0.9819147 1 273 -ATOM C CD2 . TRP A 0 34 . 34 TRP A CD2 nan 1.0 ? 0.5074868 0.6406654 0.7234441 1 274 -ATOM N NE1 . TRP A 0 34 . 34 TRP A NE1 nan 1.0 ? 0.8112138 0.40085202 0.7360932 1 275 -ATOM C CE2 . TRP A 0 34 . 34 TRP A CE2 nan 1.0 ? 0.8316175 0.56361765 0.3176602 1 276 -ATOM C CE3 . TRP A 0 34 . 34 TRP A CE3 nan 1.0 ? 0.6639036 0.97300464 0.5637204 1 277 -ATOM C CZ2 . TRP A 0 34 . 34 TRP A CZ2 nan 1.0 ? 0.2307534 0.09585341 0.33922267 1 278 -ATOM C CZ3 . TRP A 0 34 . 34 TRP A CZ3 nan 1.0 ? 0.965758 0.0820527 0.59982514 1 279 -ATOM C CH2 . TRP A 0 34 . 34 TRP A CH2 nan 1.0 ? 0.895409 0.9367704 0.29829302 1 280 -ATOM N N . LYS A 0 35 . 35 LYS A N nan 1.0 ? 0.38011488 0.9051172 0.59294266 1 281 -ATOM C CA . LYS A 0 35 . 35 LYS A CA nan 1.0 ? 0.8881911 0.2749702 0.3041879 1 282 -ATOM C C . LYS A 0 35 . 35 LYS A C nan 1.0 ? 0.18198201 0.054325327 0.81324196 1 283 -ATOM O O . LYS A 0 35 . 35 LYS A O nan 1.0 ? 0.6393932 0.15497187 0.1426529 1 284 -ATOM C CB . LYS A 0 35 . 35 LYS A CB nan 1.0 ? 0.2641153 0.28071237 0.4488056 1 285 -ATOM C CG . LYS A 0 35 . 35 LYS A CG nan 1.0 ? 0.06655585 0.19062062 0.171783 1 286 -ATOM C CD . LYS A 0 35 . 35 LYS A CD nan 1.0 ? 0.3658667 0.8851599 0.4236336 1 287 -ATOM C CE . LYS A 0 35 . 35 LYS A CE nan 1.0 ? 0.78543377 0.7815608 0.35528168 1 288 -ATOM N NZ . LYS A 0 35 . 35 LYS A NZ nan 1.0 +1 0.27934492 0.23475485 0.23780783 1 289 -ATOM N N . HIS A 0 36 . 36 HIS A N nan 1.0 ? 0.041138653 0.682066 0.05784882 1 290 -ATOM C CA . HIS A 0 36 . 36 HIS A CA nan 1.0 ? 0.3472665 0.2589608 0.943293 1 291 -ATOM C C . HIS A 0 36 . 36 HIS A C nan 1.0 ? 0.4827872 0.92663705 0.6178733 1 292 -ATOM O O . HIS A 0 36 . 36 HIS A O nan 1.0 ? 0.7938743 0.69659233 0.82844186 1 293 -ATOM C CB . HIS A 0 36 . 36 HIS A CB nan 1.0 ? 0.099872485 0.11180358 0.057238255 1 294 -ATOM C CG . HIS A 0 36 . 36 HIS A CG nan 1.0 ? 0.12557222 0.037152115 0.36375585 1 295 -ATOM N ND1 . HIS A 0 36 . 36 HIS A ND1 nan 1.0 +1 0.99471414 0.7240132 0.5746399 1 296 -ATOM C CD2 . HIS A 0 36 . 36 HIS A CD2 nan 1.0 ? 0.16518821 0.09518377 0.49285403 1 297 -ATOM C CE1 . HIS A 0 36 . 36 HIS A CE1 nan 1.0 ? 0.6049905 0.7780642 0.4453218 1 298 -ATOM N NE2 . HIS A 0 36 . 36 HIS A NE2 nan 1.0 ? 0.88803136 0.018631613 0.7112728 1 299 -ATOM N N . GLN A 0 37 . 37 GLN A N nan 1.0 ? 0.7294807 0.22501613 0.78490555 1 300 -ATOM C CA . GLN A 0 37 . 37 GLN A CA nan 1.0 ? 0.30329752 0.70528984 0.9314575 1 301 -ATOM C C . GLN A 0 37 . 37 GLN A C nan 1.0 ? 0.10600542 0.052869104 0.48877308 1 302 -ATOM O O . GLN A 0 37 . 37 GLN A O nan 1.0 ? 0.18828838 0.16539992 0.10547737 1 303 -ATOM C CB . GLN A 0 37 . 37 GLN A CB nan 1.0 ? 0.8677295 0.43402576 0.219496 1 304 -ATOM C CG . GLN A 0 37 . 37 GLN A CG nan 1.0 ? 0.77835816 0.82149196 0.8998915 1 305 -ATOM C CD . GLN A 0 37 . 37 GLN A CD nan 1.0 ? 0.08041105 0.3016499 0.9362006 1 306 -ATOM O OE1 . GLN A 0 37 . 37 GLN A OE1 nan 1.0 ? 0.41656408 0.03058584 0.596775 1 307 -ATOM N NE2 . GLN A 0 37 . 37 GLN A NE2 nan 1.0 ? 0.980633 0.95967287 0.01243886 1 308 -ATOM N N . PHE A 0 38 . 38 PHE A N nan 1.0 ? 0.99816823 0.28582078 0.33658466 1 309 -ATOM C CA . PHE A 0 38 . 38 PHE A CA nan 1.0 ? 0.034855567 0.60418683 0.26916963 1 310 -ATOM C C . PHE A 0 38 . 38 PHE A C nan 1.0 ? 0.08985032 0.18164991 0.73339295 1 311 -ATOM O O . PHE A 0 38 . 38 PHE A O nan 1.0 ? 0.16917053 0.44818816 0.8925519 1 312 -ATOM C CB . PHE A 0 38 . 38 PHE A CB nan 1.0 ? 0.4284991 0.1837848 0.08129157 1 313 -ATOM C CG . PHE A 0 38 . 38 PHE A CG nan 1.0 ? 0.607908 0.11872449 0.3904101 1 314 -ATOM C CD1 . PHE A 0 38 . 38 PHE A CD1 nan 1.0 ? 0.72688216 0.44018707 0.8646109 1 315 -ATOM C CD2 . PHE A 0 38 . 38 PHE A CD2 nan 1.0 ? 0.4839356 0.74734706 0.12515219 1 316 -ATOM C CE1 . PHE A 0 38 . 38 PHE A CE1 nan 1.0 ? 0.3327863 0.20702198 0.7104385 1 317 -ATOM C CE2 . PHE A 0 38 . 38 PHE A CE2 nan 1.0 ? 0.8509497 0.5569596 0.5697652 1 318 -ATOM C CZ . PHE A 0 38 . 38 PHE A CZ nan 1.0 ? 0.88834107 0.15635502 0.96841437 1 319 -ATOM N N . ALA A 0 39 . 39 ALA A N nan 1.0 ? 0.61553586 0.88007605 0.9144177 1 320 -ATOM C CA . ALA A 0 39 . 39 ALA A CA nan 1.0 ? 0.7045697 0.029909566 0.071100034 1 321 -ATOM C C . ALA A 0 39 . 39 ALA A C nan 1.0 ? 0.41608214 0.8461845 0.65488034 1 322 -ATOM O O . ALA A 0 39 . 39 ALA A O nan 1.0 ? 0.37309766 0.21433288 0.33302563 1 323 -ATOM C CB . ALA A 0 39 . 39 ALA A CB nan 1.0 ? 0.39453915 0.20802496 0.51801664 1 324 -ATOM N N . TRP A 0 40 . 40 TRP A N nan 1.0 ? 0.6314806 0.48360926 0.72014475 1 325 -ATOM C CA . TRP A 0 40 . 40 TRP A CA nan 1.0 ? 0.7013008 0.92648584 0.86726606 1 326 -ATOM C C . TRP A 0 40 . 40 TRP A C nan 1.0 ? 0.7283708 0.47048682 0.846177 1 327 -ATOM O O . TRP A 0 40 . 40 TRP A O nan 1.0 ? 0.7615136 0.84794563 0.19876993 1 328 -ATOM C CB . TRP A 0 40 . 40 TRP A CB nan 1.0 ? 0.3336301 0.2705481 0.3330699 1 329 -ATOM C CG . TRP A 0 40 . 40 TRP A CG nan 1.0 ? 0.03627299 0.9914179 0.049767874 1 330 -ATOM C CD1 . TRP A 0 40 . 40 TRP A CD1 nan 1.0 ? 0.06309719 0.7963357 0.31532043 1 331 -ATOM C CD2 . TRP A 0 40 . 40 TRP A CD2 nan 1.0 ? 0.299318 0.12275301 0.5403105 1 332 -ATOM N NE1 . TRP A 0 40 . 40 TRP A NE1 nan 1.0 ? 0.9380629 0.6109836 0.8305512 1 333 -ATOM C CE2 . TRP A 0 40 . 40 TRP A CE2 nan 1.0 ? 0.8718075 0.43908978 0.920099 1 334 -ATOM C CE3 . TRP A 0 40 . 40 TRP A CE3 nan 1.0 ? 0.86209273 0.67372036 0.73449284 1 335 -ATOM C CZ2 . TRP A 0 40 . 40 TRP A CZ2 nan 1.0 ? 0.92741454 0.31216508 0.9269512 1 336 -ATOM C CZ3 . TRP A 0 40 . 40 TRP A CZ3 nan 1.0 ? 0.24859813 0.06714438 0.81076634 1 337 -ATOM C CH2 . TRP A 0 40 . 40 TRP A CH2 nan 1.0 ? 0.17510068 0.49407807 0.42882377 1 338 -ATOM N N . PRO A 0 41 . 41 PRO A N nan 1.0 ? 0.48625293 0.5761146 0.8619164 1 339 -ATOM C CA . PRO A 0 41 . 41 PRO A CA nan 1.0 ? 0.8995213 0.95289475 0.4503979 1 340 -ATOM C C . PRO A 0 41 . 41 PRO A C nan 1.0 ? 0.14563243 0.45101908 0.83317554 1 341 -ATOM O O . PRO A 0 41 . 41 PRO A O nan 1.0 ? 0.4255166 0.30934748 0.8511698 1 342 -ATOM C CB . PRO A 0 41 . 41 PRO A CB nan 1.0 ? 0.8931787 0.48185566 0.86530894 1 343 -ATOM C CG . PRO A 0 41 . 41 PRO A CG nan 1.0 ? 0.5897085 0.73904246 0.40023413 1 344 -ATOM C CD . PRO A 0 41 . 41 PRO A CD nan 1.0 ? 0.3514196 0.3614445 6.0522907e-05 1 345 -ATOM N N . PHE A 0 42 . 42 PHE A N nan 1.0 ? 0.08030982 0.066388816 0.50792295 1 346 -ATOM C CA . PHE A 0 42 . 42 PHE A CA nan 1.0 ? 0.35800156 0.39075145 0.91515815 1 347 -ATOM C C . PHE A 0 42 . 42 PHE A C nan 1.0 ? 0.15826203 0.40783682 0.16791281 1 348 -ATOM O O . PHE A 0 42 . 42 PHE A O nan 1.0 ? 0.5649712 0.67972547 0.6499277 1 349 -ATOM C CB . PHE A 0 42 . 42 PHE A CB nan 1.0 ? 0.48789644 0.58796465 0.17436045 1 350 -ATOM C CG . PHE A 0 42 . 42 PHE A CG nan 1.0 ? 0.085832186 0.1513669 0.5667018 1 351 -ATOM C CD1 . PHE A 0 42 . 42 PHE A CD1 nan 1.0 ? 0.60372585 0.9829986 0.58652806 1 352 -ATOM C CD2 . PHE A 0 42 . 42 PHE A CD2 nan 1.0 ? 0.23139709 0.99967843 0.82249945 1 353 -ATOM C CE1 . PHE A 0 42 . 42 PHE A CE1 nan 1.0 ? 0.6692628 0.8379881 0.2583034 1 354 -ATOM C CE2 . PHE A 0 42 . 42 PHE A CE2 nan 1.0 ? 0.0456289 0.7432501 0.42623612 1 355 -ATOM C CZ . PHE A 0 42 . 42 PHE A CZ nan 1.0 ? 0.1726452 0.50526416 0.4958995 1 356 -ATOM N N . GLN A 0 43 . 43 GLN A N nan 1.0 ? 0.5885878 0.59139204 0.6037201 1 357 -ATOM C CA . GLN A 0 43 . 43 GLN A CA nan 1.0 ? 0.8098521 0.4527576 0.7380919 1 358 -ATOM C C . GLN A 0 43 . 43 GLN A C nan 1.0 ? 0.30647296 0.888769 0.5239766 1 359 -ATOM O O . GLN A 0 43 . 43 GLN A O nan 1.0 ? 0.91539663 0.5512471 0.5039983 1 360 -ATOM C CB . GLN A 0 43 . 43 GLN A CB nan 1.0 ? 0.8012377 0.28036964 0.96066004 1 361 -ATOM C CG . GLN A 0 43 . 43 GLN A CG nan 1.0 ? 0.57270414 0.7302429 0.0054054717 1 362 -ATOM C CD . GLN A 0 43 . 43 GLN A CD nan 1.0 ? 0.98635757 0.09246247 0.5604463 1 363 -ATOM O OE1 . GLN A 0 43 . 43 GLN A OE1 nan 1.0 ? 0.5531957 0.69336116 0.14599144 1 364 -ATOM N NE2 . GLN A 0 43 . 43 GLN A NE2 nan 1.0 ? 0.6015918 0.5596889 0.6679939 1 365 -ATOM N N . GLN A 0 44 . 44 GLN A N nan 1.0 ? 0.1655309 0.33902976 0.102914475 1 366 -ATOM C CA . GLN A 0 44 . 44 GLN A CA nan 1.0 ? 0.40072322 0.64710915 0.8804728 1 367 -ATOM C C . GLN A 0 44 . 44 GLN A C nan 1.0 ? 0.60641617 0.27038643 0.29949751 1 368 -ATOM O O . GLN A 0 44 . 44 GLN A O nan 1.0 ? 0.39031246 0.173541 0.569865 1 369 -ATOM C CB . GLN A 0 44 . 44 GLN A CB nan 1.0 ? 0.38352808 0.34148002 0.33775023 1 370 -ATOM C CG . GLN A 0 44 . 44 GLN A CG nan 1.0 ? 0.7537693 0.16461205 0.22972202 1 371 -ATOM C CD . GLN A 0 44 . 44 GLN A CD nan 1.0 ? 0.3960996 0.653607 0.2560925 1 372 -ATOM O OE1 . GLN A 0 44 . 44 GLN A OE1 nan 1.0 ? 0.61215425 0.24109004 0.3288419 1 373 -ATOM N NE2 . GLN A 0 44 . 44 GLN A NE2 nan 1.0 ? 0.8776236 0.94001234 0.44622025 1 374 -ATOM N N . PRO A 0 45 . 45 PRO A N nan 1.0 ? 0.22559367 0.25798467 0.24315265 1 375 -ATOM C CA . PRO A 0 45 . 45 PRO A CA nan 1.0 ? 0.30287477 0.14553599 0.8628846 1 376 -ATOM C C . PRO A 0 45 . 45 PRO A C nan 1.0 ? 0.004886818 0.18353179 0.106030576 1 377 -ATOM O O . PRO A 0 45 . 45 PRO A O nan 1.0 ? 0.6770346 0.9466653 0.12871511 1 378 -ATOM C CB . PRO A 0 45 . 45 PRO A CB nan 1.0 ? 0.0005648687 0.023009788 0.050107613 1 379 -ATOM C CG . PRO A 0 45 . 45 PRO A CG nan 1.0 ? 0.45844412 0.28976566 0.7352334 1 380 -ATOM C CD . PRO A 0 45 . 45 PRO A CD nan 1.0 ? 0.4466032 0.6149311 0.77820146 1 381 -ATOM N N . VAL A 0 46 . 46 VAL A N nan 1.0 ? 0.99341583 0.9019431 0.94043535 1 382 -ATOM C CA . VAL A 0 46 . 46 VAL A CA nan 1.0 ? 0.91676384 0.60020804 0.27946967 1 383 -ATOM C C . VAL A 0 46 . 46 VAL A C nan 1.0 ? 0.6782298 0.49610928 0.2389185 1 384 -ATOM O O . VAL A 0 46 . 46 VAL A O nan 1.0 ? 0.339149 0.92995214 0.5078733 1 385 -ATOM C CB . VAL A 0 46 . 46 VAL A CB nan 1.0 ? 0.2601233 0.46934608 0.21150668 1 386 -ATOM C CG1 . VAL A 0 46 . 46 VAL A CG1 nan 1.0 ? 0.8245012 0.22258097 0.47305003 1 387 -ATOM C CG2 . VAL A 0 46 . 46 VAL A CG2 nan 1.0 ? 0.82117474 0.92309004 0.25815704 1 388 -ATOM N N . ASP A 0 47 . 47 ASP A N nan 1.0 ? 0.5680281 0.7419584 0.60231537 1 389 -ATOM C CA . ASP A 0 47 . 47 ASP A CA nan 1.0 ? 0.9375589 0.27517512 0.44480863 1 390 -ATOM C C . ASP A 0 47 . 47 ASP A C nan 1.0 ? 0.5444864 0.6333075 0.97086394 1 391 -ATOM O O . ASP A 0 47 . 47 ASP A O nan 1.0 ? 0.93916255 0.42324644 0.5893108 1 392 -ATOM C CB . ASP A 0 47 . 47 ASP A CB nan 1.0 ? 0.13256261 0.44640556 0.9026958 1 393 -ATOM C CG . ASP A 0 47 . 47 ASP A CG nan 1.0 ? 0.5429085 0.13110183 0.54340947 1 394 -ATOM O OD1 . ASP A 0 47 . 47 ASP A OD1 nan 1.0 ? 0.8196787 0.49131605 0.97143775 1 395 -ATOM O OD2 . ASP A 0 47 . 47 ASP A OD2 nan 1.0 ? 0.4182696 0.65174603 0.5888604 1 396 -ATOM N N . ALA A 0 48 . 48 ALA A N nan 1.0 ? 0.8647231 0.16644034 0.074739 1 397 -ATOM C CA . ALA A 0 48 . 48 ALA A CA nan 1.0 ? 0.75111574 0.85908943 0.9551374 1 398 -ATOM C C . ALA A 0 48 . 48 ALA A C nan 1.0 ? 0.14976184 0.89037126 0.2852326 1 399 -ATOM O O . ALA A 0 48 . 48 ALA A O nan 1.0 ? 0.22930656 0.10643973 0.6030287 1 400 -ATOM C CB . ALA A 0 48 . 48 ALA A CB nan 1.0 ? 0.48783007 0.67303216 0.8389855 1 401 -ATOM N N . VAL A 0 49 . 49 VAL A N nan 1.0 ? 0.20183392 0.53308815 0.8749665 1 402 -ATOM C CA . VAL A 0 49 . 49 VAL A CA nan 1.0 ? 0.2617823 0.06439464 0.72643304 1 403 -ATOM C C . VAL A 0 49 . 49 VAL A C nan 1.0 ? 0.21493746 0.35583258 0.7468472 1 404 -ATOM O O . VAL A 0 49 . 49 VAL A O nan 1.0 ? 0.3093438 0.29135254 0.19601989 1 405 -ATOM C CB . VAL A 0 49 . 49 VAL A CB nan 1.0 ? 0.009635577 0.83055264 0.34605926 1 406 -ATOM C CG1 . VAL A 0 49 . 49 VAL A CG1 nan 1.0 ? 0.69822526 0.040675964 0.93566567 1 407 -ATOM C CG2 . VAL A 0 49 . 49 VAL A CG2 nan 1.0 ? 0.91082525 0.5391021 0.14685135 1 408 -ATOM N N . LYS A 0 50 . 50 LYS A N nan 1.0 ? 0.9499167 0.08714749 0.015584805 1 409 -ATOM C CA . LYS A 0 50 . 50 LYS A CA nan 1.0 ? 0.58015656 0.33556038 0.68247205 1 410 -ATOM C C . LYS A 0 50 . 50 LYS A C nan 1.0 ? 0.8597651 0.17228653 0.37108144 1 411 -ATOM O O . LYS A 0 50 . 50 LYS A O nan 1.0 ? 0.9059731 0.50703186 0.1139907 1 412 -ATOM C CB . LYS A 0 50 . 50 LYS A CB nan 1.0 ? 0.978998 0.11968944 0.9335094 1 413 -ATOM C CG . LYS A 0 50 . 50 LYS A CG nan 1.0 ? 0.51741254 0.7368045 0.71991426 1 414 -ATOM C CD . LYS A 0 50 . 50 LYS A CD nan 1.0 ? 0.68704265 0.6498999 0.7867649 1 415 -ATOM C CE . LYS A 0 50 . 50 LYS A CE nan 1.0 ? 0.76932204 0.13223027 0.0002877572 1 416 -ATOM N NZ . LYS A 0 50 . 50 LYS A NZ nan 1.0 +1 0.22206555 0.8531211 0.69648236 1 417 -ATOM N N . LEU A 0 51 . 51 LEU A N nan 1.0 ? 0.4286319 0.7490868 0.03722845 1 418 -ATOM C CA . LEU A 0 51 . 51 LEU A CA nan 1.0 ? 0.73768646 0.20017096 0.8073491 1 419 -ATOM C C . LEU A 0 51 . 51 LEU A C nan 1.0 ? 0.679937 0.30653784 0.8245869 1 420 -ATOM O O . LEU A 0 51 . 51 LEU A O nan 1.0 ? 0.45297185 0.43669912 0.6470786 1 421 -ATOM C CB . LEU A 0 51 . 51 LEU A CB nan 1.0 ? 0.36369026 0.021132488 0.8826073 1 422 -ATOM C CG . LEU A 0 51 . 51 LEU A CG nan 1.0 ? 0.59092855 0.18546413 0.8661968 1 423 -ATOM C CD1 . LEU A 0 51 . 51 LEU A CD1 nan 1.0 ? 0.2617819 0.48878193 0.35213715 1 424 -ATOM C CD2 . LEU A 0 51 . 51 LEU A CD2 nan 1.0 ? 0.8692234 0.9811613 0.82916754 1 425 -ATOM N N . ASN A 0 52 . 52 ASN A N nan 1.0 ? 0.2739397 0.7569796 0.644993 1 426 -ATOM C CA . ASN A 0 52 . 52 ASN A CA nan 1.0 ? 0.11146167 0.2729764 0.04014563 1 427 -ATOM C C . ASN A 0 52 . 52 ASN A C nan 1.0 ? 0.52543455 0.09489259 0.44605428 1 428 -ATOM O O . ASN A 0 52 . 52 ASN A O nan 1.0 ? 0.6842824 0.63486356 0.08045573 1 429 -ATOM C CB . ASN A 0 52 . 52 ASN A CB nan 1.0 ? 0.011136313 0.034330945 0.9688163 1 430 -ATOM C CG . ASN A 0 52 . 52 ASN A CG nan 1.0 ? 0.5323263 0.71032697 0.7268602 1 431 -ATOM O OD1 . ASN A 0 52 . 52 ASN A OD1 nan 1.0 ? 0.21767434 0.08810821 0.2910027 1 432 -ATOM N ND2 . ASN A 0 52 . 52 ASN A ND2 nan 1.0 ? 0.632183 0.27716058 0.8752647 1 433 -ATOM N N . LEU A 0 53 . 53 LEU A N nan 1.0 ? 0.02349116 0.850117 0.7561755 1 434 -ATOM C CA . LEU A 0 53 . 53 LEU A CA nan 1.0 ? 0.090637684 0.8221199 0.61573786 1 435 -ATOM C C . LEU A 0 53 . 53 LEU A C nan 1.0 ? 0.13726148 0.71950567 0.50729454 1 436 -ATOM O O . LEU A 0 53 . 53 LEU A O nan 1.0 ? 0.39735934 0.2943244 0.5107124 1 437 -ATOM C CB . LEU A 0 53 . 53 LEU A CB nan 1.0 ? 0.23933649 0.9442913 0.62487406 1 438 -ATOM C CG . LEU A 0 53 . 53 LEU A CG nan 1.0 ? 0.87690705 0.39608818 0.40145352 1 439 -ATOM C CD1 . LEU A 0 53 . 53 LEU A CD1 nan 1.0 ? 0.69747984 0.86333203 0.4106853 1 440 -ATOM C CD2 . LEU A 0 53 . 53 LEU A CD2 nan 1.0 ? 0.7471723 0.025820151 0.5844827 1 441 -ATOM N N . PRO A 0 54 . 54 PRO A N nan 1.0 ? 0.49281028 0.57675916 0.42941254 1 442 -ATOM C CA . PRO A 0 54 . 54 PRO A CA nan 1.0 ? 0.33012807 0.65523034 0.73586047 1 443 -ATOM C C . PRO A 0 54 . 54 PRO A C nan 1.0 ? 0.8973873 0.8407303 0.57381034 1 444 -ATOM O O . PRO A 0 54 . 54 PRO A O nan 1.0 ? 0.801236 0.94905484 0.96342355 1 445 -ATOM C CB . PRO A 0 54 . 54 PRO A CB nan 1.0 ? 0.8672147 0.424087 0.18796504 1 446 -ATOM C CG . PRO A 0 54 . 54 PRO A CG nan 1.0 ? 0.3080592 0.40871793 0.29327404 1 447 -ATOM C CD . PRO A 0 54 . 54 PRO A CD nan 1.0 ? 0.98794055 0.9347598 0.6286881 1 448 -ATOM N N . ASP A 0 55 . 55 ASP A N nan 1.0 ? 0.69451565 0.54258347 0.8070911 1 449 -ATOM C CA . ASP A 0 55 . 55 ASP A CA nan 1.0 ? 0.6735744 0.06828562 0.3621207 1 450 -ATOM C C . ASP A 0 55 . 55 ASP A C nan 1.0 ? 0.9039418 0.6012307 0.67882 1 451 -ATOM O O . ASP A 0 55 . 55 ASP A O nan 1.0 ? 0.037892614 0.22159298 0.16431983 1 452 -ATOM C CB . ASP A 0 55 . 55 ASP A CB nan 1.0 ? 0.9719066 0.6929506 0.9598728 1 453 -ATOM C CG . ASP A 0 55 . 55 ASP A CG nan 1.0 ? 0.23086117 0.91706264 0.5310151 1 454 -ATOM O OD1 . ASP A 0 55 . 55 ASP A OD1 nan 1.0 ? 0.76394755 0.3102277 0.91845036 1 455 -ATOM O OD2 . ASP A 0 55 . 55 ASP A OD2 nan 1.0 ? 0.89302003 0.53848034 0.32886824 1 456 -ATOM N N . TYR A 0 56 . 56 TYR A N nan 1.0 ? 0.7181067 0.4403224 0.87363875 1 457 -ATOM C CA . TYR A 0 56 . 56 TYR A CA nan 1.0 ? 0.37726018 0.1773956 0.21288234 1 458 -ATOM C C . TYR A 0 56 . 56 TYR A C nan 1.0 ? 0.80783355 0.49737927 0.072281994 1 459 -ATOM O O . TYR A 0 56 . 56 TYR A O nan 1.0 ? 0.18323204 0.5687068 0.34936675 1 460 -ATOM C CB . TYR A 0 56 . 56 TYR A CB nan 1.0 ? 0.94987077 0.13827707 0.8568494 1 461 -ATOM C CG . TYR A 0 56 . 56 TYR A CG nan 1.0 ? 0.5211842 0.18041296 0.9644845 1 462 -ATOM C CD1 . TYR A 0 56 . 56 TYR A CD1 nan 1.0 ? 0.09904382 0.9102703 0.6783716 1 463 -ATOM C CD2 . TYR A 0 56 . 56 TYR A CD2 nan 1.0 ? 0.41097495 0.54902905 0.70226794 1 464 -ATOM C CE1 . TYR A 0 56 . 56 TYR A CE1 nan 1.0 ? 0.9277458 0.08739406 0.016157508 1 465 -ATOM C CE2 . TYR A 0 56 . 56 TYR A CE2 nan 1.0 ? 0.9997728 0.8015424 0.91996765 1 466 -ATOM C CZ . TYR A 0 56 . 56 TYR A CZ nan 1.0 ? 0.54813147 0.81206954 0.104375124 1 467 -ATOM O OH . TYR A 0 56 . 56 TYR A OH nan 1.0 ? 0.11408961 0.75546265 0.7774168 1 468 -ATOM N N . TYR A 0 57 . 57 TYR A N nan 1.0 ? 0.21478225 0.48486668 0.13886675 1 469 -ATOM C CA . TYR A 0 57 . 57 TYR A CA nan 1.0 ? 0.044075955 0.31827185 0.76114213 1 470 -ATOM C C . TYR A 0 57 . 57 TYR A C nan 1.0 ? 0.75730085 0.2989908 0.8141658 1 471 -ATOM O O . TYR A 0 57 . 57 TYR A O nan 1.0 ? 0.8144235 0.43711135 0.37345162 1 472 -ATOM C CB . TYR A 0 57 . 57 TYR A CB nan 1.0 ? 0.012492166 0.40240026 0.5069217 1 473 -ATOM C CG . TYR A 0 57 . 57 TYR A CG nan 1.0 ? 0.3055557 0.45912588 0.5268362 1 474 -ATOM C CD1 . TYR A 0 57 . 57 TYR A CD1 nan 1.0 ? 0.48534897 0.5393931 0.6681668 1 475 -ATOM C CD2 . TYR A 0 57 . 57 TYR A CD2 nan 1.0 ? 0.5754344 0.58082193 0.41820621 1 476 -ATOM C CE1 . TYR A 0 57 . 57 TYR A CE1 nan 1.0 ? 0.17657675 0.8647352 0.5150348 1 477 -ATOM C CE2 . TYR A 0 57 . 57 TYR A CE2 nan 1.0 ? 0.92256004 0.07893054 0.5433056 1 478 -ATOM C CZ . TYR A 0 57 . 57 TYR A CZ nan 1.0 ? 0.7281874 0.43479076 0.577263 1 479 -ATOM O OH . TYR A 0 57 . 57 TYR A OH nan 1.0 ? 0.7022388 0.12774867 0.6017536 1 480 -ATOM N N . LYS A 0 58 . 58 LYS A N nan 1.0 ? 0.9343779 0.55865186 0.28857985 1 481 -ATOM C CA . LYS A 0 58 . 58 LYS A CA nan 1.0 ? 0.5005678 0.73359317 0.90877295 1 482 -ATOM C C . LYS A 0 58 . 58 LYS A C nan 1.0 ? 0.7087075 0.6352839 0.24339071 1 483 -ATOM O O . LYS A 0 58 . 58 LYS A O nan 1.0 ? 0.04608414 0.5043067 0.048990484 1 484 -ATOM C CB . LYS A 0 58 . 58 LYS A CB nan 1.0 ? 0.5809099 0.043531973 0.5287592 1 485 -ATOM C CG . LYS A 0 58 . 58 LYS A CG nan 1.0 ? 0.6559465 0.9968402 0.014424022 1 486 -ATOM C CD . LYS A 0 58 . 58 LYS A CD nan 1.0 ? 0.6059037 0.0642034 0.09668467 1 487 -ATOM C CE . LYS A 0 58 . 58 LYS A CE nan 1.0 ? 0.17334327 0.41126436 0.62122047 1 488 -ATOM N NZ . LYS A 0 58 . 58 LYS A NZ nan 1.0 +1 0.6784995 0.12765224 0.40144363 1 489 -ATOM N N . ILE A 0 59 . 59 ILE A N nan 1.0 ? 0.73827577 0.12270192 0.39467567 1 490 -ATOM C CA . ILE A 0 59 . 59 ILE A CA nan 1.0 ? 0.07888654 0.08050824 0.09006787 1 491 -ATOM C C . ILE A 0 59 . 59 ILE A C nan 1.0 ? 0.78214604 0.6116564 0.36370614 1 492 -ATOM O O . ILE A 0 59 . 59 ILE A O nan 1.0 ? 0.88374 0.43470734 0.72733325 1 493 -ATOM C CB . ILE A 0 59 . 59 ILE A CB nan 1.0 ? 0.9580614 0.20133846 0.20377982 1 494 -ATOM C CG1 . ILE A 0 59 . 59 ILE A CG1 nan 1.0 ? 0.5773808 0.29238138 0.3903531 1 495 -ATOM C CG2 . ILE A 0 59 . 59 ILE A CG2 nan 1.0 ? 0.88645047 0.26690868 0.29654214 1 496 -ATOM C CD1 . ILE A 0 59 . 59 ILE A CD1 nan 1.0 ? 0.20764117 0.10948894 0.8616355 1 497 -ATOM N N . ILE A 0 60 . 60 ILE A N nan 1.0 ? 0.981601 0.7625745 0.82535994 1 498 -ATOM C CA . ILE A 0 60 . 60 ILE A CA nan 1.0 ? 0.5685867 0.02274463 0.10022512 1 499 -ATOM C C . ILE A 0 60 . 60 ILE A C nan 1.0 ? 0.20464426 0.20211908 0.29905736 1 500 -ATOM O O . ILE A 0 60 . 60 ILE A O nan 1.0 ? 0.59952974 0.24830371 0.53254855 1 501 -ATOM C CB . ILE A 0 60 . 60 ILE A CB nan 1.0 ? 0.42661726 0.5978666 0.7278103 1 502 -ATOM C CG1 . ILE A 0 60 . 60 ILE A CG1 nan 1.0 ? 0.7884 0.009710386 0.2365403 1 503 -ATOM C CG2 . ILE A 0 60 . 60 ILE A CG2 nan 1.0 ? 0.54632545 0.6882974 0.6904691 1 504 -ATOM C CD1 . ILE A 0 60 . 60 ILE A CD1 nan 1.0 ? 0.95220333 0.060815137 0.5023966 1 505 -ATOM N N . LYS A 0 61 . 61 LYS A N nan 1.0 ? 0.03378217 0.88774717 0.8060197 1 506 -ATOM C CA . LYS A 0 61 . 61 LYS A CA nan 1.0 ? 0.6883583 0.92726344 0.18103175 1 507 -ATOM C C . LYS A 0 61 . 61 LYS A C nan 1.0 ? 0.8525849 0.09045548 0.89824706 1 508 -ATOM O O . LYS A 0 61 . 61 LYS A O nan 1.0 ? 0.14415137 0.7445669 0.20430772 1 509 -ATOM C CB . LYS A 0 61 . 61 LYS A CB nan 1.0 ? 0.16465457 0.16236225 0.26676515 1 510 -ATOM C CG . LYS A 0 61 . 61 LYS A CG nan 1.0 ? 0.5639918 0.080836296 0.92800593 1 511 -ATOM C CD . LYS A 0 61 . 61 LYS A CD nan 1.0 ? 0.7626423 0.0034649768 0.58621866 1 512 -ATOM C CE . LYS A 0 61 . 61 LYS A CE nan 1.0 ? 0.48925072 0.9124157 0.281891 1 513 -ATOM N NZ . LYS A 0 61 . 61 LYS A NZ nan 1.0 +1 0.3503192 0.63697857 0.06551219 1 514 -ATOM N N . THR A 0 62 . 62 THR A N nan 1.0 ? 0.9271109 0.2883591 0.9292356 1 515 -ATOM C CA . THR A 0 62 . 62 THR A CA nan 1.0 ? 0.84086335 0.95887005 0.12554757 1 516 -ATOM C C . THR A 0 62 . 62 THR A C nan 1.0 ? 0.29598263 0.16333403 0.6832563 1 517 -ATOM O O . THR A 0 62 . 62 THR A O nan 1.0 ? 0.70720446 0.23392439 0.5718228 1 518 -ATOM C CB . THR A 0 62 . 62 THR A CB nan 1.0 ? 0.4513995 0.6456108 0.16004534 1 519 -ATOM O OG1 . THR A 0 62 . 62 THR A OG1 nan 1.0 ? 0.69536734 0.5539168 0.31288785 1 520 -ATOM C CG2 . THR A 0 62 . 62 THR A CG2 nan 1.0 ? 0.8748205 0.18649115 0.54653835 1 521 -ATOM N N . PRO A 0 63 . 63 PRO A N nan 1.0 ? 0.9325559 0.7195611 0.799525 1 522 -ATOM C CA . PRO A 0 63 . 63 PRO A CA nan 1.0 ? 0.53213984 0.88348335 0.9654373 1 523 -ATOM C C . PRO A 0 63 . 63 PRO A C nan 1.0 ? 0.10537466 0.33659217 0.81805146 1 524 -ATOM O O . PRO A 0 63 . 63 PRO A O nan 1.0 ? 0.25462434 0.28369892 0.50918776 1 525 -ATOM C CB . PRO A 0 63 . 63 PRO A CB nan 1.0 ? 0.30372742 0.13477224 0.50532466 1 526 -ATOM C CG . PRO A 0 63 . 63 PRO A CG nan 1.0 ? 0.97020984 0.47894004 0.97938573 1 527 -ATOM C CD . PRO A 0 63 . 63 PRO A CD nan 1.0 ? 0.38899878 0.70261383 0.02750027 1 528 -ATOM N N . MET A 0 64 . 64 MET A N nan 1.0 ? 0.9129211 0.36621296 0.89939916 1 529 -ATOM C CA . MET A 0 64 . 64 MET A CA nan 1.0 ? 0.35994777 0.17172572 0.23884188 1 530 -ATOM C C . MET A 0 64 . 64 MET A C nan 1.0 ? 0.50409734 0.5650302 0.09747198 1 531 -ATOM O O . MET A 0 64 . 64 MET A O nan 1.0 ? 0.38171223 0.18780544 0.763686 1 532 -ATOM C CB . MET A 0 64 . 64 MET A CB nan 1.0 ? 0.38920614 0.901994 0.75651515 1 533 -ATOM C CG . MET A 0 64 . 64 MET A CG nan 1.0 ? 0.20383085 0.8854991 0.3130525 1 534 -ATOM S SD . MET A 0 64 . 64 MET A SD nan 1.0 ? 0.9675398 0.9610923 0.09868502 1 535 -ATOM C CE . MET A 0 64 . 64 MET A CE nan 1.0 ? 0.88989997 0.16720518 0.7802813 1 536 -ATOM N N . ASP A 0 65 . 65 ASP A N nan 1.0 ? 0.2642989 0.15011388 0.71781224 1 537 -ATOM C CA . ASP A 0 65 . 65 ASP A CA nan 1.0 ? 0.5316054 0.26185593 0.7023064 1 538 -ATOM C C . ASP A 0 65 . 65 ASP A C nan 1.0 ? 0.026728278 0.04152832 0.57584155 1 539 -ATOM O O . ASP A 0 65 . 65 ASP A O nan 1.0 ? 0.42444155 0.3602373 0.13244839 1 540 -ATOM C CB . ASP A 0 65 . 65 ASP A CB nan 1.0 ? 0.8101313 0.7643256 0.42128062 1 541 -ATOM C CG . ASP A 0 65 . 65 ASP A CG nan 1.0 ? 0.6420205 0.6266598 0.14371374 1 542 -ATOM O OD1 . ASP A 0 65 . 65 ASP A OD1 nan 1.0 ? 0.80657816 0.9936565 0.9017263 1 543 -ATOM O OD2 . ASP A 0 65 . 65 ASP A OD2 nan 1.0 ? 0.51531297 0.12695004 0.8185856 1 544 -ATOM N N . MET A 0 66 . 66 MET A N nan 1.0 ? 0.84530324 0.37490997 0.20467085 1 545 -ATOM C CA . MET A 0 66 . 66 MET A CA nan 1.0 ? 0.52315205 0.22097856 0.7283463 1 546 -ATOM C C . MET A 0 66 . 66 MET A C nan 1.0 ? 0.98351437 0.2831376 0.9128851 1 547 -ATOM O O . MET A 0 66 . 66 MET A O nan 1.0 ? 0.6431505 0.79897517 0.62549603 1 548 -ATOM C CB . MET A 0 66 . 66 MET A CB nan 1.0 ? 0.6177657 0.25656933 0.19317704 1 549 -ATOM C CG . MET A 0 66 . 66 MET A CG nan 1.0 ? 0.05214109 0.15990128 0.99566096 1 550 -ATOM S SD . MET A 0 66 . 66 MET A SD nan 1.0 ? 0.9022345 0.98361284 0.2516407 1 551 -ATOM C CE . MET A 0 66 . 66 MET A CE nan 1.0 ? 0.54477113 0.08360501 0.88596606 1 552 -ATOM N N . GLY A 0 67 . 67 GLY A N nan 1.0 ? 0.7278808 0.48437527 0.13563988 1 553 -ATOM C CA . GLY A 0 67 . 67 GLY A CA nan 1.0 ? 0.86589193 0.6940126 0.27964297 1 554 -ATOM C C . GLY A 0 67 . 67 GLY A C nan 1.0 ? 0.5326011 0.15181156 0.4894512 1 555 -ATOM O O . GLY A 0 67 . 67 GLY A O nan 1.0 ? 0.06412283 0.45273495 0.6727876 1 556 -ATOM N N . THR A 0 68 . 68 THR A N nan 1.0 ? 0.9256189 0.4551077 0.8707399 1 557 -ATOM C CA . THR A 0 68 . 68 THR A CA nan 1.0 ? 0.029652681 0.4946034 0.065145604 1 558 -ATOM C C . THR A 0 68 . 68 THR A C nan 1.0 ? 0.72773385 0.1652496 0.8097524 1 559 -ATOM O O . THR A 0 68 . 68 THR A O nan 1.0 ? 0.58458436 0.017049689 0.20271751 1 560 -ATOM C CB . THR A 0 68 . 68 THR A CB nan 1.0 ? 0.95282304 0.3698644 0.43929055 1 561 -ATOM O OG1 . THR A 0 68 . 68 THR A OG1 nan 1.0 ? 0.9295304 0.009075551 0.13637094 1 562 -ATOM C CG2 . THR A 0 68 . 68 THR A CG2 nan 1.0 ? 0.8143692 0.012404751 0.79867744 1 563 -ATOM N N . ILE A 0 69 . 69 ILE A N nan 1.0 ? 0.17207614 0.9669954 0.4050139 1 564 -ATOM C CA . ILE A 0 69 . 69 ILE A CA nan 1.0 ? 0.5631318 0.17374042 0.27694327 1 565 -ATOM C C . ILE A 0 69 . 69 ILE A C nan 1.0 ? 0.11944742 0.5173488 0.6400268 1 566 -ATOM O O . ILE A 0 69 . 69 ILE A O nan 1.0 ? 0.88285977 0.760839 0.47659373 1 567 -ATOM C CB . ILE A 0 69 . 69 ILE A CB nan 1.0 ? 0.40384325 0.97788185 0.80865794 1 568 -ATOM C CG1 . ILE A 0 69 . 69 ILE A CG1 nan 1.0 ? 0.6463252 0.5901825 0.8340412 1 569 -ATOM C CG2 . ILE A 0 69 . 69 ILE A CG2 nan 1.0 ? 0.6430363 0.4391275 0.60015184 1 570 -ATOM C CD1 . ILE A 0 69 . 69 ILE A CD1 nan 1.0 ? 0.3785092 0.9880728 0.4858249 1 571 -ATOM N N . LYS A 0 70 . 70 LYS A N nan 1.0 ? 0.39456406 0.6960093 0.94111186 1 572 -ATOM C CA . LYS A 0 70 . 70 LYS A CA nan 1.0 ? 0.7567475 0.034285 0.713535 1 573 -ATOM C C . LYS A 0 70 . 70 LYS A C nan 1.0 ? 0.87585044 0.902004 0.31062162 1 574 -ATOM O O . LYS A 0 70 . 70 LYS A O nan 1.0 ? 0.3774677 0.97696024 0.44389054 1 575 -ATOM C CB . LYS A 0 70 . 70 LYS A CB nan 1.0 ? 0.35562143 0.2946771 0.6982305 1 576 -ATOM C CG . LYS A 0 70 . 70 LYS A CG nan 1.0 ? 0.99283034 0.5221784 0.4917018 1 577 -ATOM C CD . LYS A 0 70 . 70 LYS A CD nan 1.0 ? 0.2641127 0.803119 0.5279591 1 578 -ATOM C CE . LYS A 0 70 . 70 LYS A CE nan 1.0 ? 0.36852607 0.50597036 0.07300817 1 579 -ATOM N NZ . LYS A 0 70 . 70 LYS A NZ nan 1.0 +1 0.2453973 0.26095447 0.5641949 1 580 -ATOM N N . LYS A 0 71 . 71 LYS A N nan 1.0 ? 0.49516046 0.7994917 0.024620993 1 581 -ATOM C CA . LYS A 0 71 . 71 LYS A CA nan 1.0 ? 0.77531546 0.62109876 0.3282037 1 582 -ATOM C C . LYS A 0 71 . 71 LYS A C nan 1.0 ? 0.19331904 0.09032466 0.09634605 1 583 -ATOM O O . LYS A 0 71 . 71 LYS A O nan 1.0 ? 0.5188872 0.09268895 0.2391066 1 584 -ATOM C CB . LYS A 0 71 . 71 LYS A CB nan 1.0 ? 0.5139747 0.5704273 0.23906891 1 585 -ATOM C CG . LYS A 0 71 . 71 LYS A CG nan 1.0 ? 0.6007013 0.3474039 0.86118793 1 586 -ATOM C CD . LYS A 0 71 . 71 LYS A CD nan 1.0 ? 0.2132959 0.43646318 0.20968631 1 587 -ATOM C CE . LYS A 0 71 . 71 LYS A CE nan 1.0 ? 0.058942117 0.30060282 0.016573109 1 588 -ATOM N NZ . LYS A 0 71 . 71 LYS A NZ nan 1.0 +1 0.91298324 0.86387885 0.17909355 1 589 -ATOM N N . ARG A 0 72 . 72 ARG A N nan 1.0 ? 0.95237267 0.62164867 0.5253431 1 590 -ATOM C CA . ARG A 0 72 . 72 ARG A CA nan 1.0 ? 0.096183516 0.36377364 0.010669517 1 591 -ATOM C C . ARG A 0 72 . 72 ARG A C nan 1.0 ? 0.5216535 0.8678621 0.6311071 1 592 -ATOM O O . ARG A 0 72 . 72 ARG A O nan 1.0 ? 0.041534718 0.3387672 0.658456 1 593 -ATOM C CB . ARG A 0 72 . 72 ARG A CB nan 1.0 ? 0.33647737 0.40744418 0.05993724 1 594 -ATOM C CG . ARG A 0 72 . 72 ARG A CG nan 1.0 ? 0.9785252 0.66314965 0.89859354 1 595 -ATOM C CD . ARG A 0 72 . 72 ARG A CD nan 1.0 ? 0.15664905 0.7837316 0.5520706 1 596 -ATOM N NE . ARG A 0 72 . 72 ARG A NE nan 1.0 ? 0.642944 0.5592219 0.25135642 1 597 -ATOM C CZ . ARG A 0 72 . 72 ARG A CZ nan 1.0 ? 0.9855964 0.8395812 0.15817222 1 598 -ATOM N NH1 . ARG A 0 72 . 72 ARG A NH1 nan 1.0 ? 0.9647596 0.58522487 0.7065292 1 599 -ATOM N NH2 . ARG A 0 72 . 72 ARG A NH2 nan 1.0 +1 0.034706168 0.27791172 0.11099475 1 600 -ATOM N N . LEU A 0 73 . 73 LEU A N nan 1.0 ? 0.407078 0.37502015 0.32552412 1 601 -ATOM C CA . LEU A 0 73 . 73 LEU A CA nan 1.0 ? 0.17739494 0.09437133 0.5270269 1 602 -ATOM C C . LEU A 0 73 . 73 LEU A C nan 1.0 ? 0.92581284 0.52098536 0.41285765 1 603 -ATOM O O . LEU A 0 73 . 73 LEU A O nan 1.0 ? 0.07100136 0.51280624 0.8670528 1 604 -ATOM C CB . LEU A 0 73 . 73 LEU A CB nan 1.0 ? 0.7135694 0.8322908 0.56769055 1 605 -ATOM C CG . LEU A 0 73 . 73 LEU A CG nan 1.0 ? 0.7569815 0.78833276 0.22127227 1 606 -ATOM C CD1 . LEU A 0 73 . 73 LEU A CD1 nan 1.0 ? 0.19627276 0.77200377 0.008514758 1 607 -ATOM C CD2 . LEU A 0 73 . 73 LEU A CD2 nan 1.0 ? 0.28387377 0.8061017 0.4384514 1 608 -ATOM N N . GLU A 0 74 . 74 GLU A N nan 1.0 ? 0.62985206 0.3331219 0.565394 1 609 -ATOM C CA . GLU A 0 74 . 74 GLU A CA nan 1.0 ? 0.4874943 0.42118177 0.3068441 1 610 -ATOM C C . GLU A 0 74 . 74 GLU A C nan 1.0 ? 0.68762964 0.25115663 0.5471592 1 611 -ATOM O O . GLU A 0 74 . 74 GLU A O nan 1.0 ? 0.018421303 0.022372141 0.29534158 1 612 -ATOM C CB . GLU A 0 74 . 74 GLU A CB nan 1.0 ? 0.8902391 0.71713436 0.31246796 1 613 -ATOM C CG . GLU A 0 74 . 74 GLU A CG nan 1.0 ? 0.4807157 0.85726994 0.52364624 1 614 -ATOM C CD . GLU A 0 74 . 74 GLU A CD nan 1.0 ? 0.4516472 0.7790603 0.00018880414 1 615 -ATOM O OE1 . GLU A 0 74 . 74 GLU A OE1 nan 1.0 ? 0.13191071 0.22465014 0.32492843 1 616 -ATOM O OE2 . GLU A 0 74 . 74 GLU A OE2 nan 1.0 ? 0.15865302 0.6451631 0.5090739 1 617 -ATOM N N . ASN A 0 75 . 75 ASN A N nan 1.0 ? 0.17412814 0.8112234 0.9546098 1 618 -ATOM C CA . ASN A 0 75 . 75 ASN A CA nan 1.0 ? 0.19778174 0.45828536 0.91665596 1 619 -ATOM C C . ASN A 0 75 . 75 ASN A C nan 1.0 ? 0.8871164 0.4216709 0.23089346 1 620 -ATOM O O . ASN A 0 75 . 75 ASN A O nan 1.0 ? 0.7583631 0.3905301 0.7458538 1 621 -ATOM C CB . ASN A 0 75 . 75 ASN A CB nan 1.0 ? 0.19530556 0.04756226 0.48428187 1 622 -ATOM C CG . ASN A 0 75 . 75 ASN A CG nan 1.0 ? 0.31536645 0.2515624 0.6519207 1 623 -ATOM O OD1 . ASN A 0 75 . 75 ASN A OD1 nan 1.0 ? 0.6987998 0.083615676 0.538002 1 624 -ATOM N ND2 . ASN A 0 75 . 75 ASN A ND2 nan 1.0 ? 0.61323524 0.36402792 0.9938677 1 625 -ATOM N N . ASN A 0 76 . 76 ASN A N nan 1.0 ? 0.96645963 0.10753249 0.04915864 1 626 -ATOM C CA . ASN A 0 76 . 76 ASN A CA nan 1.0 ? 0.9769989 0.85155076 0.73992306 1 627 -ATOM C C . ASN A 0 76 . 76 ASN A C nan 1.0 ? 0.7721229 0.79734784 0.07307396 1 628 -ATOM O O . ASN A 0 76 . 76 ASN A O nan 1.0 ? 0.040500414 0.6800974 0.8421704 1 629 -ATOM C CB . ASN A 0 76 . 76 ASN A CB nan 1.0 ? 0.22497675 0.2632308 0.37391895 1 630 -ATOM C CG . ASN A 0 76 . 76 ASN A CG nan 1.0 ? 0.32044068 0.2590903 0.1858797 1 631 -ATOM O OD1 . ASN A 0 76 . 76 ASN A OD1 nan 1.0 ? 0.17737667 0.43146187 0.7612957 1 632 -ATOM N ND2 . ASN A 0 76 . 76 ASN A ND2 nan 1.0 ? 0.18831839 0.9858747 0.4741859 1 633 -ATOM N N . TYR A 0 77 . 77 TYR A N nan 1.0 ? 0.04770025 0.88406783 0.7127418 1 634 -ATOM C CA . TYR A 0 77 . 77 TYR A CA nan 1.0 ? 0.8487975 0.7558377 0.9994272 1 635 -ATOM C C . TYR A 0 77 . 77 TYR A C nan 1.0 ? 0.16809033 0.23988964 0.5052329 1 636 -ATOM O O . TYR A 0 77 . 77 TYR A O nan 1.0 ? 0.27208987 0.15889637 0.6824352 1 637 -ATOM C CB . TYR A 0 77 . 77 TYR A CB nan 1.0 ? 0.20680141 0.23400351 0.37791762 1 638 -ATOM C CG . TYR A 0 77 . 77 TYR A CG nan 1.0 ? 0.8730579 0.7010038 0.7075982 1 639 -ATOM C CD1 . TYR A 0 77 . 77 TYR A CD1 nan 1.0 ? 0.6580549 0.2351866 0.78062457 1 640 -ATOM C CD2 . TYR A 0 77 . 77 TYR A CD2 nan 1.0 ? 0.3505771 0.9097244 0.7028128 1 641 -ATOM C CE1 . TYR A 0 77 . 77 TYR A CE1 nan 1.0 ? 0.5715351 0.19995147 0.8794824 1 642 -ATOM C CE2 . TYR A 0 77 . 77 TYR A CE2 nan 1.0 ? 0.6219068 0.3438669 0.2649529 1 643 -ATOM C CZ . TYR A 0 77 . 77 TYR A CZ nan 1.0 ? 0.6352017 0.087228745 0.31708395 1 644 -ATOM O OH . TYR A 0 77 . 77 TYR A OH nan 1.0 ? 0.06918658 0.5841741 0.6915744 1 645 -ATOM N N . TYR A 0 78 . 78 TYR A N nan 1.0 ? 0.044527423 0.8696741 0.79409754 1 646 -ATOM C CA . TYR A 0 78 . 78 TYR A CA nan 1.0 ? 0.8324599 0.21893133 0.5917072 1 647 -ATOM C C . TYR A 0 78 . 78 TYR A C nan 1.0 ? 0.9143626 0.9741757 0.98764884 1 648 -ATOM O O . TYR A 0 78 . 78 TYR A O nan 1.0 ? 0.012137282 0.9187441 0.96157175 1 649 -ATOM C CB . TYR A 0 78 . 78 TYR A CB nan 1.0 ? 0.91296965 0.8965023 0.54576653 1 650 -ATOM C CG . TYR A 0 78 . 78 TYR A CG nan 1.0 ? 0.04509174 0.8772603 0.9324249 1 651 -ATOM C CD1 . TYR A 0 78 . 78 TYR A CD1 nan 1.0 ? 0.62820464 0.52818996 0.24325831 1 652 -ATOM C CD2 . TYR A 0 78 . 78 TYR A CD2 nan 1.0 ? 0.78770036 0.7991897 0.77127814 1 653 -ATOM C CE1 . TYR A 0 78 . 78 TYR A CE1 nan 1.0 ? 0.06445266 0.7136319 0.8109778 1 654 -ATOM C CE2 . TYR A 0 78 . 78 TYR A CE2 nan 1.0 ? 0.45680645 0.843636 0.4037765 1 655 -ATOM C CZ . TYR A 0 78 . 78 TYR A CZ nan 1.0 ? 0.4018538 0.9563598 0.963407 1 656 -ATOM O OH . TYR A 0 78 . 78 TYR A OH nan 1.0 ? 0.80141914 0.06795039 0.8989574 1 657 -ATOM N N . TRP A 0 79 . 79 TRP A N nan 1.0 ? 0.26167294 0.62559843 0.6539007 1 658 -ATOM C CA . TRP A 0 79 . 79 TRP A CA nan 1.0 ? 0.99233866 0.97898644 0.9913828 1 659 -ATOM C C . TRP A 0 79 . 79 TRP A C nan 1.0 ? 0.6310388 0.724363 0.14613439 1 660 -ATOM O O . TRP A 0 79 . 79 TRP A O nan 1.0 ? 0.1832385 0.6927406 0.45329058 1 661 -ATOM C CB . TRP A 0 79 . 79 TRP A CB nan 1.0 ? 0.9730588 0.34648648 0.1823094 1 662 -ATOM C CG . TRP A 0 79 . 79 TRP A CG nan 1.0 ? 0.23367064 0.7872397 0.8982701 1 663 -ATOM C CD1 . TRP A 0 79 . 79 TRP A CD1 nan 1.0 ? 0.9639446 0.8128462 0.5121457 1 664 -ATOM C CD2 . TRP A 0 79 . 79 TRP A CD2 nan 1.0 ? 0.00019693919 0.63445795 0.4282731 1 665 -ATOM N NE1 . TRP A 0 79 . 79 TRP A NE1 nan 1.0 ? 0.8844704 0.4650854 0.57244897 1 666 -ATOM C CE2 . TRP A 0 79 . 79 TRP A CE2 nan 1.0 ? 0.34346926 0.551297 0.28133738 1 667 -ATOM C CE3 . TRP A 0 79 . 79 TRP A CE3 nan 1.0 ? 0.06625598 0.9491232 0.13679056 1 668 -ATOM C CZ2 . TRP A 0 79 . 79 TRP A CZ2 nan 1.0 ? 0.48177195 0.16684987 0.94097686 1 669 -ATOM C CZ3 . TRP A 0 79 . 79 TRP A CZ3 nan 1.0 ? 0.62758297 0.75673497 0.4948002 1 670 -ATOM C CH2 . TRP A 0 79 . 79 TRP A CH2 nan 1.0 ? 0.38055962 0.56667733 0.7063817 1 671 -ATOM N N . ASN A 0 80 . 80 ASN A N nan 1.0 ? 0.72955924 0.5548372 0.30124938 1 672 -ATOM C CA . ASN A 0 80 . 80 ASN A CA nan 1.0 ? 0.20057769 0.36484924 0.637632 1 673 -ATOM C C . ASN A 0 80 . 80 ASN A C nan 1.0 ? 0.00050765456 0.88671076 0.28471142 1 674 -ATOM O O . ASN A 0 80 . 80 ASN A O nan 1.0 ? 0.32011858 0.08672241 0.5725755 1 675 -ATOM C CB . ASN A 0 80 . 80 ASN A CB nan 1.0 ? 0.22694312 0.97042674 0.54954195 1 676 -ATOM C CG . ASN A 0 80 . 80 ASN A CG nan 1.0 ? 0.72962844 0.56099755 0.9752862 1 677 -ATOM O OD1 . ASN A 0 80 . 80 ASN A OD1 nan 1.0 ? 0.24366748 0.26632044 0.8536499 1 678 -ATOM N ND2 . ASN A 0 80 . 80 ASN A ND2 nan 1.0 ? 0.8077311 0.15341239 0.2594215 1 679 -ATOM N N . ALA A 0 81 . 81 ALA A N nan 1.0 ? 0.33433998 0.07829497 0.0574066 1 680 -ATOM C CA . ALA A 0 81 . 81 ALA A CA nan 1.0 ? 0.85738593 0.21180694 0.8790473 1 681 -ATOM C C . ALA A 0 81 . 81 ALA A C nan 1.0 ? 0.79062164 0.89331925 0.3117845 1 682 -ATOM O O . ALA A 0 81 . 81 ALA A O nan 1.0 ? 0.81606984 0.5238397 0.46608073 1 683 -ATOM C CB . ALA A 0 81 . 81 ALA A CB nan 1.0 ? 0.70425177 0.91540533 0.61462384 1 684 -ATOM N N . GLN A 0 82 . 82 GLN A N nan 1.0 ? 0.070829615 0.5227379 0.090386905 1 685 -ATOM C CA . GLN A 0 82 . 82 GLN A CA nan 1.0 ? 0.6213923 0.45404592 0.16961 1 686 -ATOM C C . GLN A 0 82 . 82 GLN A C nan 1.0 ? 0.5628995 0.79077613 0.78997886 1 687 -ATOM O O . GLN A 0 82 . 82 GLN A O nan 1.0 ? 0.04523078 0.54619676 0.14873561 1 688 -ATOM C CB . GLN A 0 82 . 82 GLN A CB nan 1.0 ? 0.32069886 0.35475007 0.11367264 1 689 -ATOM C CG . GLN A 0 82 . 82 GLN A CG nan 1.0 ? 0.26072082 0.66426754 0.4783517 1 690 -ATOM C CD . GLN A 0 82 . 82 GLN A CD nan 1.0 ? 0.05558348 0.91652024 0.6924265 1 691 -ATOM O OE1 . GLN A 0 82 . 82 GLN A OE1 nan 1.0 ? 0.89188075 0.775499 0.2066081 1 692 -ATOM N NE2 . GLN A 0 82 . 82 GLN A NE2 nan 1.0 ? 0.5636716 0.9514352 0.23837999 1 693 -ATOM N N . GLU A 0 83 . 83 GLU A N nan 1.0 ? 0.29769984 0.9866153 0.7352074 1 694 -ATOM C CA . GLU A 0 83 . 83 GLU A CA nan 1.0 ? 0.96461 0.5726843 0.122534454 1 695 -ATOM C C . GLU A 0 83 . 83 GLU A C nan 1.0 ? 0.10859398 0.36255783 0.76120466 1 696 -ATOM O O . GLU A 0 83 . 83 GLU A O nan 1.0 ? 0.8946002 0.44076014 0.97455746 1 697 -ATOM C CB . GLU A 0 83 . 83 GLU A CB nan 1.0 ? 0.28866336 0.24385351 0.94949055 1 698 -ATOM C CG . GLU A 0 83 . 83 GLU A CG nan 1.0 ? 0.13284174 0.28993678 0.27697453 1 699 -ATOM C CD . GLU A 0 83 . 83 GLU A CD nan 1.0 ? 0.99308074 0.6849143 0.5554101 1 700 -ATOM O OE1 . GLU A 0 83 . 83 GLU A OE1 nan 1.0 ? 0.2107889 0.1340042 0.34074366 1 701 -ATOM O OE2 . GLU A 0 83 . 83 GLU A OE2 nan 1.0 ? 0.30257842 0.7222809 0.83903193 1 702 -ATOM N N . CYS A 0 84 . 84 CYS A N nan 1.0 ? 0.8340585 0.8637017 0.6148392 1 703 -ATOM C CA . CYS A 0 84 . 84 CYS A CA nan 1.0 ? 0.0555383 0.7994423 0.4342538 1 704 -ATOM C C . CYS A 0 84 . 84 CYS A C nan 1.0 ? 0.48088798 0.82271427 0.12587357 1 705 -ATOM O O . CYS A 0 84 . 84 CYS A O nan 1.0 ? 0.29041016 0.50556916 0.08263066 1 706 -ATOM C CB . CYS A 0 84 . 84 CYS A CB nan 1.0 ? 0.73214257 0.9396432 0.7505695 1 707 -ATOM S SG . CYS A 0 84 . 84 CYS A SG nan 1.0 ? 0.3206757 0.56037706 0.8134687 1 708 -ATOM N N . ILE A 0 85 . 85 ILE A N nan 1.0 ? 0.7348049 0.3264689 0.328047 1 709 -ATOM C CA . ILE A 0 85 . 85 ILE A CA nan 1.0 ? 0.02190051 0.85684234 0.45751247 1 710 -ATOM C C . ILE A 0 85 . 85 ILE A C nan 1.0 ? 0.45007703 0.7369769 0.19855668 1 711 -ATOM O O . ILE A 0 85 . 85 ILE A O nan 1.0 ? 0.3012296 0.8205532 0.08084584 1 712 -ATOM C CB . ILE A 0 85 . 85 ILE A CB nan 1.0 ? 0.7605323 0.876065 0.977272 1 713 -ATOM C CG1 . ILE A 0 85 . 85 ILE A CG1 nan 1.0 ? 0.26272863 0.18508081 0.5757748 1 714 -ATOM C CG2 . ILE A 0 85 . 85 ILE A CG2 nan 1.0 ? 0.53095436 0.3579993 0.80399764 1 715 -ATOM C CD1 . ILE A 0 85 . 85 ILE A CD1 nan 1.0 ? 0.31272823 0.555144 0.5610441 1 716 -ATOM N N . GLN A 0 86 . 86 GLN A N nan 1.0 ? 0.6120798 0.13173823 0.66781306 1 717 -ATOM C CA . GLN A 0 86 . 86 GLN A CA nan 1.0 ? 0.80399764 0.18763158 0.8499901 1 718 -ATOM C C . GLN A 0 86 . 86 GLN A C nan 1.0 ? 0.19862904 0.66693044 0.8736677 1 719 -ATOM O O . GLN A 0 86 . 86 GLN A O nan 1.0 ? 0.61084306 0.75558525 0.47203502 1 720 -ATOM C CB . GLN A 0 86 . 86 GLN A CB nan 1.0 ? 0.94180137 0.40297207 0.70553863 1 721 -ATOM C CG . GLN A 0 86 . 86 GLN A CG nan 1.0 ? 0.1900386 0.54851145 0.16138427 1 722 -ATOM C CD . GLN A 0 86 . 86 GLN A CD nan 1.0 ? 0.17060167 0.8709764 0.22312854 1 723 -ATOM O OE1 . GLN A 0 86 . 86 GLN A OE1 nan 1.0 ? 0.07199488 0.3007852 0.8604658 1 724 -ATOM N NE2 . GLN A 0 86 . 86 GLN A NE2 nan 1.0 ? 0.27032062 0.027294677 0.9578243 1 725 -ATOM N N . ASP A 0 87 . 87 ASP A N nan 1.0 ? 0.7369257 0.435437 0.19886883 1 726 -ATOM C CA . ASP A 0 87 . 87 ASP A CA nan 1.0 ? 0.6494561 0.87583345 0.9129469 1 727 -ATOM C C . ASP A 0 87 . 87 ASP A C nan 1.0 ? 0.058297493 0.2610927 0.6138944 1 728 -ATOM O O . ASP A 0 87 . 87 ASP A O nan 1.0 ? 0.13505158 0.87689644 0.30222368 1 729 -ATOM C CB . ASP A 0 87 . 87 ASP A CB nan 1.0 ? 0.8972025 0.7827074 0.39533657 1 730 -ATOM C CG . ASP A 0 87 . 87 ASP A CG nan 1.0 ? 0.35446623 0.95119745 0.2993589 1 731 -ATOM O OD1 . ASP A 0 87 . 87 ASP A OD1 nan 1.0 ? 0.02380232 0.48129413 0.24633762 1 732 -ATOM O OD2 . ASP A 0 87 . 87 ASP A OD2 nan 1.0 ? 0.5580304 0.4610897 0.41807756 1 733 -ATOM N N . PHE A 0 88 . 88 PHE A N nan 1.0 ? 0.12838505 0.3105022 0.021716882 1 734 -ATOM C CA . PHE A 0 88 . 88 PHE A CA nan 1.0 ? 0.5634346 0.6195847 0.5837724 1 735 -ATOM C C . PHE A 0 88 . 88 PHE A C nan 1.0 ? 0.7470674 0.85442615 0.77308273 1 736 -ATOM O O . PHE A 0 88 . 88 PHE A O nan 1.0 ? 0.32743242 0.8225516 0.16247539 1 737 -ATOM C CB . PHE A 0 88 . 88 PHE A CB nan 1.0 ? 0.9898864 0.9741186 0.936772 1 738 -ATOM C CG . PHE A 0 88 . 88 PHE A CG nan 1.0 ? 0.03429066 0.9685467 0.6637821 1 739 -ATOM C CD1 . PHE A 0 88 . 88 PHE A CD1 nan 1.0 ? 0.74389935 0.1946748 0.013155027 1 740 -ATOM C CD2 . PHE A 0 88 . 88 PHE A CD2 nan 1.0 ? 0.97965294 0.4055277 0.9995715 1 741 -ATOM C CE1 . PHE A 0 88 . 88 PHE A CE1 nan 1.0 ? 0.5547972 0.2495077 0.9083073 1 742 -ATOM C CE2 . PHE A 0 88 . 88 PHE A CE2 nan 1.0 ? 0.122432314 0.10750524 0.36958537 1 743 -ATOM C CZ . PHE A 0 88 . 88 PHE A CZ nan 1.0 ? 0.57388437 0.48241886 0.17993392 1 744 -ATOM N N . ASN A 0 89 . 89 ASN A N nan 1.0 ? 0.96763176 0.3871004 0.6654369 1 745 -ATOM C CA . ASN A 0 89 . 89 ASN A CA nan 1.0 ? 0.559174 0.11816021 0.8039978 1 746 -ATOM C C . ASN A 0 89 . 89 ASN A C nan 1.0 ? 0.6502254 0.21168172 0.45968038 1 747 -ATOM O O . ASN A 0 89 . 89 ASN A O nan 1.0 ? 0.4239252 0.21411404 0.12646382 1 748 -ATOM C CB . ASN A 0 89 . 89 ASN A CB nan 1.0 ? 0.25283647 0.67181194 0.012482349 1 749 -ATOM C CG . ASN A 0 89 . 89 ASN A CG nan 1.0 ? 0.015340936 0.7981564 0.71941924 1 750 -ATOM O OD1 . ASN A 0 89 . 89 ASN A OD1 nan 1.0 ? 0.46901646 0.20224562 0.5507428 1 751 -ATOM N ND2 . ASN A 0 89 . 89 ASN A ND2 nan 1.0 ? 0.43648416 0.5838726 0.48064741 1 752 -ATOM N N . THR A 0 90 . 90 THR A N nan 1.0 ? 0.9565926 0.85100555 0.6061187 1 753 -ATOM C CA . THR A 0 90 . 90 THR A CA nan 1.0 ? 0.5697998 0.56037056 0.76768905 1 754 -ATOM C C . THR A 0 90 . 90 THR A C nan 1.0 ? 0.866605 0.7902023 0.9876335 1 755 -ATOM O O . THR A 0 90 . 90 THR A O nan 1.0 ? 0.40543523 0.32221308 0.64455086 1 756 -ATOM C CB . THR A 0 90 . 90 THR A CB nan 1.0 ? 0.67059916 0.68931043 0.9127305 1 757 -ATOM O OG1 . THR A 0 90 . 90 THR A OG1 nan 1.0 ? 0.42412075 0.20493293 0.96659124 1 758 -ATOM C CG2 . THR A 0 90 . 90 THR A CG2 nan 1.0 ? 0.26975983 0.049415406 0.5172781 1 759 -ATOM N N . MET A 0 91 . 91 MET A N nan 1.0 ? 0.26416612 0.8899853 0.05145465 1 760 -ATOM C CA . MET A 0 91 . 91 MET A CA nan 1.0 ? 0.15105492 0.31333563 0.7124942 1 761 -ATOM C C . MET A 0 91 . 91 MET A C nan 1.0 ? 0.28169182 0.71173584 0.0610804 1 762 -ATOM O O . MET A 0 91 . 91 MET A O nan 1.0 ? 0.18709788 0.5634754 0.069997914 1 763 -ATOM C CB . MET A 0 91 . 91 MET A CB nan 1.0 ? 0.7379578 0.2793375 0.24345116 1 764 -ATOM C CG . MET A 0 91 . 91 MET A CG nan 1.0 ? 0.57105136 0.07536844 0.61428666 1 765 -ATOM S SD . MET A 0 91 . 91 MET A SD nan 1.0 ? 0.86499286 0.21837336 0.032823205 1 766 -ATOM C CE . MET A 0 91 . 91 MET A CE nan 1.0 ? 0.3079395 0.2865693 0.7451216 1 767 -ATOM N N . PHE A 0 92 . 92 PHE A N nan 1.0 ? 0.5098235 0.18480334 0.5013589 1 768 -ATOM C CA . PHE A 0 92 . 92 PHE A CA nan 1.0 ? 0.40379998 0.22301705 0.57221353 1 769 -ATOM C C . PHE A 0 92 . 92 PHE A C nan 1.0 ? 0.95967764 0.7862896 0.16610144 1 770 -ATOM O O . PHE A 0 92 . 92 PHE A O nan 1.0 ? 0.24907285 0.9546904 0.5040052 1 771 -ATOM C CB . PHE A 0 92 . 92 PHE A CB nan 1.0 ? 0.47375965 0.35295936 0.60280645 1 772 -ATOM C CG . PHE A 0 92 . 92 PHE A CG nan 1.0 ? 0.22732073 0.4648514 0.289181 1 773 -ATOM C CD1 . PHE A 0 92 . 92 PHE A CD1 nan 1.0 ? 0.38222265 0.89957255 0.9855767 1 774 -ATOM C CD2 . PHE A 0 92 . 92 PHE A CD2 nan 1.0 ? 0.1940842 0.80720514 0.7607281 1 775 -ATOM C CE1 . PHE A 0 92 . 92 PHE A CE1 nan 1.0 ? 0.7403345 0.9283844 0.09793004 1 776 -ATOM C CE2 . PHE A 0 92 . 92 PHE A CE2 nan 1.0 ? 0.81450623 0.9889511 0.45397702 1 777 -ATOM C CZ . PHE A 0 92 . 92 PHE A CZ nan 1.0 ? 0.9435655 0.8567645 0.71530396 1 778 -ATOM N N . THR A 0 93 . 93 THR A N nan 1.0 ? 0.09828024 0.47938785 0.5459279 1 779 -ATOM C CA . THR A 0 93 . 93 THR A CA nan 1.0 ? 0.3632639 0.16161193 0.51040757 1 780 -ATOM C C . THR A 0 93 . 93 THR A C nan 1.0 ? 0.5436687 0.9477791 0.2665358 1 781 -ATOM O O . THR A 0 93 . 93 THR A O nan 1.0 ? 0.9142387 0.86652863 0.04148001 1 782 -ATOM C CB . THR A 0 93 . 93 THR A CB nan 1.0 ? 0.7954506 0.8753352 0.23444057 1 783 -ATOM O OG1 . THR A 0 93 . 93 THR A OG1 nan 1.0 ? 0.5855007 0.8454805 0.53531903 1 784 -ATOM C CG2 . THR A 0 93 . 93 THR A CG2 nan 1.0 ? 0.1850913 0.28999928 0.613465 1 785 -ATOM N N . ASN A 0 94 . 94 ASN A N nan 1.0 ? 0.64327383 0.7084793 0.46608865 1 786 -ATOM C CA . ASN A 0 94 . 94 ASN A CA nan 1.0 ? 0.6970221 0.64140385 0.68888193 1 787 -ATOM C C . ASN A 0 94 . 94 ASN A C nan 1.0 ? 0.26959327 0.7237351 0.03360208 1 788 -ATOM O O . ASN A 0 94 . 94 ASN A O nan 1.0 ? 0.87186193 0.85017157 0.21734932 1 789 -ATOM C CB . ASN A 0 94 . 94 ASN A CB nan 1.0 ? 0.9199495 0.018165931 0.3201485 1 790 -ATOM C CG . ASN A 0 94 . 94 ASN A CG nan 1.0 ? 0.9437756 0.7021907 0.6290612 1 791 -ATOM O OD1 . ASN A 0 94 . 94 ASN A OD1 nan 1.0 ? 0.51719177 0.10687196 0.5261213 1 792 -ATOM N ND2 . ASN A 0 94 . 94 ASN A ND2 nan 1.0 ? 0.15624602 0.43672606 0.17087576 1 793 -ATOM N N . CYS A 0 95 . 95 CYS A N nan 1.0 ? 0.2164477 0.6122553 0.54584134 1 794 -ATOM C CA . CYS A 0 95 . 95 CYS A CA nan 1.0 ? 0.78384006 0.50217766 0.40615004 1 795 -ATOM C C . CYS A 0 95 . 95 CYS A C nan 1.0 ? 0.6900293 0.1529784 0.7136228 1 796 -ATOM O O . CYS A 0 95 . 95 CYS A O nan 1.0 ? 0.8818713 0.006406596 0.66806537 1 797 -ATOM C CB . CYS A 0 95 . 95 CYS A CB nan 1.0 ? 0.8295322 0.4694831 0.52468085 1 798 -ATOM S SG . CYS A 0 95 . 95 CYS A SG nan 1.0 ? 0.8026409 0.30594078 0.26660627 1 799 -ATOM N N . TYR A 0 96 . 96 TYR A N nan 1.0 ? 0.23018686 0.3474778 0.46771142 1 800 -ATOM C CA . TYR A 0 96 . 96 TYR A CA nan 1.0 ? 0.80688405 0.71297127 0.08678546 1 801 -ATOM C C . TYR A 0 96 . 96 TYR A C nan 1.0 ? 0.9422072 0.8661789 0.007134285 1 802 -ATOM O O . TYR A 0 96 . 96 TYR A O nan 1.0 ? 0.23758538 0.78126395 0.1580255 1 803 -ATOM C CB . TYR A 0 96 . 96 TYR A CB nan 1.0 ? 0.8259806 0.865935 0.76824826 1 804 -ATOM C CG . TYR A 0 96 . 96 TYR A CG nan 1.0 ? 0.3062856 0.9080242 0.3992152 1 805 -ATOM C CD1 . TYR A 0 96 . 96 TYR A CD1 nan 1.0 ? 0.10584434 0.59001607 0.10600048 1 806 -ATOM C CD2 . TYR A 0 96 . 96 TYR A CD2 nan 1.0 ? 0.37385288 0.74212193 0.3797276 1 807 -ATOM C CE1 . TYR A 0 96 . 96 TYR A CE1 nan 1.0 ? 0.7583606 0.7853673 0.56861943 1 808 -ATOM C CE2 . TYR A 0 96 . 96 TYR A CE2 nan 1.0 ? 0.3408046 0.3774394 0.5876014 1 809 -ATOM C CZ . TYR A 0 96 . 96 TYR A CZ nan 1.0 ? 0.2789714 0.26034397 0.2574738 1 810 -ATOM O OH . TYR A 0 96 . 96 TYR A OH nan 1.0 ? 0.2636906 0.3704718 0.97094876 1 811 -ATOM N N . ILE A 0 97 . 97 ILE A N nan 1.0 ? 0.33756688 0.029333131 0.06143362 1 812 -ATOM C CA . ILE A 0 97 . 97 ILE A CA nan 1.0 ? 0.98017 0.9193811 0.22806141 1 813 -ATOM C C . ILE A 0 97 . 97 ILE A C nan 1.0 ? 0.10334618 0.6109331 0.8002258 1 814 -ATOM O O . ILE A 0 97 . 97 ILE A O nan 1.0 ? 0.6197148 0.09912995 0.6525283 1 815 -ATOM C CB . ILE A 0 97 . 97 ILE A CB nan 1.0 ? 0.40603313 0.116980456 0.52344465 1 816 -ATOM C CG1 . ILE A 0 97 . 97 ILE A CG1 nan 1.0 ? 0.9221057 0.05376749 0.95123106 1 817 -ATOM C CG2 . ILE A 0 97 . 97 ILE A CG2 nan 1.0 ? 0.79992235 0.4327412 0.8722819 1 818 -ATOM C CD1 . ILE A 0 97 . 97 ILE A CD1 nan 1.0 ? 0.77582484 0.10184183 0.9208539 1 819 -ATOM N N . TYR A 0 98 . 98 TYR A N nan 1.0 ? 0.7255853 0.9173914 0.05512103 1 820 -ATOM C CA . TYR A 0 98 . 98 TYR A CA nan 1.0 ? 0.49705932 0.5573943 0.4673608 1 821 -ATOM C C . TYR A 0 98 . 98 TYR A C nan 1.0 ? 0.89472735 0.48954543 0.798595 1 822 -ATOM O O . TYR A 0 98 . 98 TYR A O nan 1.0 ? 0.52475625 0.65905225 0.5172899 1 823 -ATOM C CB . TYR A 0 98 . 98 TYR A CB nan 1.0 ? 0.27299136 0.33730718 0.010440672 1 824 -ATOM C CG . TYR A 0 98 . 98 TYR A CG nan 1.0 ? 0.47754115 0.85905945 0.9888021 1 825 -ATOM C CD1 . TYR A 0 98 . 98 TYR A CD1 nan 1.0 ? 0.7855105 0.1795729 0.71663505 1 826 -ATOM C CD2 . TYR A 0 98 . 98 TYR A CD2 nan 1.0 ? 0.20506035 0.33822107 0.8053493 1 827 -ATOM C CE1 . TYR A 0 98 . 98 TYR A CE1 nan 1.0 ? 0.33794245 0.6581951 0.41356593 1 828 -ATOM C CE2 . TYR A 0 98 . 98 TYR A CE2 nan 1.0 ? 0.60509324 0.58498913 0.5974283 1 829 -ATOM C CZ . TYR A 0 98 . 98 TYR A CZ nan 1.0 ? 0.24011405 0.5756055 0.36766213 1 830 -ATOM O OH . TYR A 0 98 . 98 TYR A OH nan 1.0 ? 0.9646411 0.43423077 0.35370255 1 831 -ATOM N N . ASN A 0 99 . 99 ASN A N nan 1.0 ? 0.25376758 0.1795867 0.93711656 1 832 -ATOM C CA . ASN A 0 99 . 99 ASN A CA nan 1.0 ? 0.8013591 0.5757962 0.66658616 1 833 -ATOM C C . ASN A 0 99 . 99 ASN A C nan 1.0 ? 0.80435205 0.30333838 0.24141055 1 834 -ATOM O O . ASN A 0 99 . 99 ASN A O nan 1.0 ? 0.053325515 0.57215345 0.31314576 1 835 -ATOM C CB . ASN A 0 99 . 99 ASN A CB nan 1.0 ? 0.35694963 0.75036067 0.15385848 1 836 -ATOM C CG . ASN A 0 99 . 99 ASN A CG nan 1.0 ? 0.60576254 0.14770377 0.2508149 1 837 -ATOM O OD1 . ASN A 0 99 . 99 ASN A OD1 nan 1.0 ? 0.48204255 0.6021428 0.83668554 1 838 -ATOM N ND2 . ASN A 0 99 . 99 ASN A ND2 nan 1.0 ? 0.17637861 0.34876412 0.28403273 1 839 -ATOM N N . LYS A 0 100 . 100 LYS A N nan 1.0 ? 0.99282134 0.7372685 0.5500718 1 840 -ATOM C CA . LYS A 0 100 . 100 LYS A CA nan 1.0 ? 0.1401181 0.0036599543 0.23922469 1 841 -ATOM C C . LYS A 0 100 . 100 LYS A C nan 1.0 ? 0.23363504 0.6225468 0.8136883 1 842 -ATOM O O . LYS A 0 100 . 100 LYS A O nan 1.0 ? 0.483767 0.64956474 0.948551 1 843 -ATOM C CB . LYS A 0 100 . 100 LYS A CB nan 1.0 ? 0.3626481 0.053883515 0.22386697 1 844 -ATOM C CG . LYS A 0 100 . 100 LYS A CG nan 1.0 ? 0.04450091 0.37332448 0.8487104 1 845 -ATOM C CD . LYS A 0 100 . 100 LYS A CD nan 1.0 ? 0.45169455 0.3460666 0.116909824 1 846 -ATOM C CE . LYS A 0 100 . 100 LYS A CE nan 1.0 ? 0.5426863 0.37822142 0.9580295 1 847 -ATOM N NZ . LYS A 0 100 . 100 LYS A NZ nan 1.0 +1 0.5307418 0.86864597 0.8122678 1 848 -ATOM N N . PRO A 0 101 . 101 PRO A N nan 1.0 ? 0.08915217 0.876527 0.15476266 1 849 -ATOM C CA . PRO A 0 101 . 101 PRO A CA nan 1.0 ? 0.9436225 0.46828955 0.14134777 1 850 -ATOM C C . PRO A 0 101 . 101 PRO A C nan 1.0 ? 0.98685634 0.15779164 0.012306722 1 851 -ATOM O O . PRO A 0 101 . 101 PRO A O nan 1.0 ? 0.552878 0.4349593 0.88783467 1 852 -ATOM C CB . PRO A 0 101 . 101 PRO A CB nan 1.0 ? 0.08570183 0.08662134 0.16002582 1 853 -ATOM C CG . PRO A 0 101 . 101 PRO A CG nan 1.0 ? 0.43809757 0.43408918 0.355686 1 854 -ATOM C CD . PRO A 0 101 . 101 PRO A CD nan 1.0 ? 0.8521074 0.26594263 0.17797089 1 855 -ATOM N N . GLY A 0 102 . 102 GLY A N nan 1.0 ? 0.666099 0.49047562 0.15600935 1 856 -ATOM C CA . GLY A 0 102 . 102 GLY A CA nan 1.0 ? 0.08478815 0.9706912 0.55606997 1 857 -ATOM C C . GLY A 0 102 . 102 GLY A C nan 1.0 ? 0.6956637 0.76481783 0.94802374 1 858 -ATOM O O . GLY A 0 102 . 102 GLY A O nan 1.0 ? 0.16782014 0.5468061 0.20532355 1 859 -ATOM N N . ASP A 0 103 . 103 ASP A N nan 1.0 ? 0.4474538 0.59409255 0.047581457 1 860 -ATOM C CA . ASP A 0 103 . 103 ASP A CA nan 1.0 ? 0.7032879 0.7180486 0.48284322 1 861 -ATOM C C . ASP A 0 103 . 103 ASP A C nan 1.0 ? 0.6609931 0.6587709 0.69603086 1 862 -ATOM O O . ASP A 0 103 . 103 ASP A O nan 1.0 ? 0.1646504 0.9979437 0.55129737 1 863 -ATOM C CB . ASP A 0 103 . 103 ASP A CB nan 1.0 ? 0.03399644 0.7796474 0.83405554 1 864 -ATOM C CG . ASP A 0 103 . 103 ASP A CG nan 1.0 ? 0.9433064 0.4392926 0.12585244 1 865 -ATOM O OD1 . ASP A 0 103 . 103 ASP A OD1 nan 1.0 ? 0.9570044 0.8636542 0.028958613 1 866 -ATOM O OD2 . ASP A 0 103 . 103 ASP A OD2 nan 1.0 ? 0.4069789 0.47870773 0.33097798 1 867 -ATOM N N . ASP A 0 104 . 104 ASP A N nan 1.0 ? 0.6042061 0.4225088 0.23430036 1 868 -ATOM C CA . ASP A 0 104 . 104 ASP A CA nan 1.0 ? 0.6121745 0.04396632 0.9225876 1 869 -ATOM C C . ASP A 0 104 . 104 ASP A C nan 1.0 ? 0.8839952 0.88620335 0.08594814 1 870 -ATOM O O . ASP A 0 104 . 104 ASP A O nan 1.0 ? 0.7826847 0.39376673 0.27564204 1 871 -ATOM C CB . ASP A 0 104 . 104 ASP A CB nan 1.0 ? 0.25310454 0.5020336 0.43554333 1 872 -ATOM C CG . ASP A 0 104 . 104 ASP A CG nan 1.0 ? 0.15120149 0.1800564 0.07186474 1 873 -ATOM O OD1 . ASP A 0 104 . 104 ASP A OD1 nan 1.0 ? 0.1803495 0.51708984 0.042446174 1 874 -ATOM O OD2 . ASP A 0 104 . 104 ASP A OD2 nan 1.0 ? 0.39172083 0.615063 0.07803414 1 875 -ATOM N N . ILE A 0 105 . 105 ILE A N nan 1.0 ? 0.9128007 0.31355703 0.79861814 1 876 -ATOM C CA . ILE A 0 105 . 105 ILE A CA nan 1.0 ? 0.32172775 0.36907008 0.4785053 1 877 -ATOM C C . ILE A 0 105 . 105 ILE A C nan 1.0 ? 0.98287016 0.7023765 0.24268685 1 878 -ATOM O O . ILE A 0 105 . 105 ILE A O nan 1.0 ? 0.8546113 0.6686393 0.01766705 1 879 -ATOM C CB . ILE A 0 105 . 105 ILE A CB nan 1.0 ? 0.45693916 0.19162704 0.30857366 1 880 -ATOM C CG1 . ILE A 0 105 . 105 ILE A CG1 nan 1.0 ? 0.7662003 0.4265601 0.5619073 1 881 -ATOM C CG2 . ILE A 0 105 . 105 ILE A CG2 nan 1.0 ? 0.8667689 0.073004164 0.40253875 1 882 -ATOM C CD1 . ILE A 0 105 . 105 ILE A CD1 nan 1.0 ? 0.45327514 0.042007923 0.5553875 1 883 -ATOM N N . VAL A 0 106 . 106 VAL A N nan 1.0 ? 0.90049666 0.65526426 0.001144842 1 884 -ATOM C CA . VAL A 0 106 . 106 VAL A CA nan 1.0 ? 0.933136 0.08086268 0.5464202 1 885 -ATOM C C . VAL A 0 106 . 106 VAL A C nan 1.0 ? 0.6524353 0.03995343 0.6103228 1 886 -ATOM O O . VAL A 0 106 . 106 VAL A O nan 1.0 ? 0.9540483 0.94893026 0.4754918 1 887 -ATOM C CB . VAL A 0 106 . 106 VAL A CB nan 1.0 ? 0.1674306 0.95949894 0.957516 1 888 -ATOM C CG1 . VAL A 0 106 . 106 VAL A CG1 nan 1.0 ? 0.998579 0.22739376 0.5924162 1 889 -ATOM C CG2 . VAL A 0 106 . 106 VAL A CG2 nan 1.0 ? 0.42382216 0.0064344737 0.8472875 1 890 -ATOM N N . LEU A 0 107 . 107 LEU A N nan 1.0 ? 0.36153883 0.9996912 0.8975831 1 891 -ATOM C CA . LEU A 0 107 . 107 LEU A CA nan 1.0 ? 0.8056909 0.6376958 0.92775184 1 892 -ATOM C C . LEU A 0 107 . 107 LEU A C nan 1.0 ? 0.92895144 0.44777673 0.9170322 1 893 -ATOM O O . LEU A 0 107 . 107 LEU A O nan 1.0 ? 0.2178332 0.36166182 0.6508123 1 894 -ATOM C CB . LEU A 0 107 . 107 LEU A CB nan 1.0 ? 0.07329621 0.9304962 0.45088607 1 895 -ATOM C CG . LEU A 0 107 . 107 LEU A CG nan 1.0 ? 0.51425177 0.27713472 0.5093116 1 896 -ATOM C CD1 . LEU A 0 107 . 107 LEU A CD1 nan 1.0 ? 0.7338347 0.20228335 0.09792051 1 897 -ATOM C CD2 . LEU A 0 107 . 107 LEU A CD2 nan 1.0 ? 0.37769878 0.7128814 0.94171256 1 898 -ATOM N N . MET A 0 108 . 108 MET A N nan 1.0 ? 0.70973563 0.60951614 0.97623056 1 899 -ATOM C CA . MET A 0 108 . 108 MET A CA nan 1.0 ? 0.7084082 0.81205535 0.8845886 1 900 -ATOM C C . MET A 0 108 . 108 MET A C nan 1.0 ? 0.09949859 0.05495854 0.7836328 1 901 -ATOM O O . MET A 0 108 . 108 MET A O nan 1.0 ? 0.25322372 0.7105042 0.14116874 1 902 -ATOM C CB . MET A 0 108 . 108 MET A CB nan 1.0 ? 0.6745122 0.8128708 0.65148354 1 903 -ATOM C CG . MET A 0 108 . 108 MET A CG nan 1.0 ? 0.9509218 0.4960358 0.44749707 1 904 -ATOM S SD . MET A 0 108 . 108 MET A SD nan 1.0 ? 0.25373966 0.035782285 0.2539925 1 905 -ATOM C CE . MET A 0 108 . 108 MET A CE nan 1.0 ? 0.37509656 0.8996851 0.65200496 1 906 -ATOM N N . ALA A 0 109 . 109 ALA A N nan 1.0 ? 0.9316493 0.6913069 0.5321687 1 907 -ATOM C CA . ALA A 0 109 . 109 ALA A CA nan 1.0 ? 0.5634482 0.52878314 0.62094885 1 908 -ATOM C C . ALA A 0 109 . 109 ALA A C nan 1.0 ? 0.7752902 0.18888062 0.44653773 1 909 -ATOM O O . ALA A 0 109 . 109 ALA A O nan 1.0 ? 0.17227277 0.71875834 0.9238304 1 910 -ATOM C CB . ALA A 0 109 . 109 ALA A CB nan 1.0 ? 0.35713786 0.4178359 0.44867605 1 911 -ATOM N N . GLU A 0 110 . 110 GLU A N nan 1.0 ? 0.047971424 0.40170422 0.8042832 1 912 -ATOM C CA . GLU A 0 110 . 110 GLU A CA nan 1.0 ? 0.11030194 0.9582215 0.68714947 1 913 -ATOM C C . GLU A 0 110 . 110 GLU A C nan 1.0 ? 0.67153156 0.51069266 0.8165046 1 914 -ATOM O O . GLU A 0 110 . 110 GLU A O nan 1.0 ? 0.49166638 0.42134622 0.022270247 1 915 -ATOM C CB . GLU A 0 110 . 110 GLU A CB nan 1.0 ? 0.19885893 0.36271968 0.7066526 1 916 -ATOM C CG . GLU A 0 110 . 110 GLU A CG nan 1.0 ? 0.30310482 0.7275092 0.26978374 1 917 -ATOM C CD . GLU A 0 110 . 110 GLU A CD nan 1.0 ? 0.3710902 0.53392386 0.7892273 1 918 -ATOM O OE1 . GLU A 0 110 . 110 GLU A OE1 nan 1.0 ? 0.340803 0.3868019 0.8646582 1 919 -ATOM O OE2 . GLU A 0 110 . 110 GLU A OE2 nan 1.0 ? 0.6144766 0.6036513 0.9379701 1 920 -ATOM N N . ALA A 0 111 . 111 ALA A N nan 1.0 ? 0.99172074 0.76174176 0.558867 1 921 -ATOM C CA . ALA A 0 111 . 111 ALA A CA nan 1.0 ? 0.18033268 0.32613593 0.6278611 1 922 -ATOM C C . ALA A 0 111 . 111 ALA A C nan 1.0 ? 0.8073314 0.51484865 0.20153037 1 923 -ATOM O O . ALA A 0 111 . 111 ALA A O nan 1.0 ? 0.6305306 0.78608704 0.11837274 1 924 -ATOM C CB . ALA A 0 111 . 111 ALA A CB nan 1.0 ? 0.530112 0.07036833 0.14283258 1 925 -ATOM N N . LEU A 0 112 . 112 LEU A N nan 1.0 ? 0.4243469 0.68206936 0.028056426 1 926 -ATOM C CA . LEU A 0 112 . 112 LEU A CA nan 1.0 ? 0.32475987 0.41541108 0.7044405 1 927 -ATOM C C . LEU A 0 112 . 112 LEU A C nan 1.0 ? 0.64507383 0.11783654 0.06197209 1 928 -ATOM O O . LEU A 0 112 . 112 LEU A O nan 1.0 ? 0.014932189 0.84914553 0.24368882 1 929 -ATOM C CB . LEU A 0 112 . 112 LEU A CB nan 1.0 ? 0.49670342 0.7135239 0.83014035 1 930 -ATOM C CG . LEU A 0 112 . 112 LEU A CG nan 1.0 ? 0.90514255 0.3379718 0.7814438 1 931 -ATOM C CD1 . LEU A 0 112 . 112 LEU A CD1 nan 1.0 ? 0.63231486 0.98312813 0.8682091 1 932 -ATOM C CD2 . LEU A 0 112 . 112 LEU A CD2 nan 1.0 ? 0.5941822 0.9942509 0.3613126 1 933 -ATOM N N . GLU A 0 113 . 113 GLU A N nan 1.0 ? 0.87787926 0.02303473 0.66269 1 934 -ATOM C CA . GLU A 0 113 . 113 GLU A CA nan 1.0 ? 0.16659845 0.4842934 0.4729831 1 935 -ATOM C C . GLU A 0 113 . 113 GLU A C nan 1.0 ? 0.63790303 0.2998434 0.65367174 1 936 -ATOM O O . GLU A 0 113 . 113 GLU A O nan 1.0 ? 0.90668875 0.74352694 0.5499995 1 937 -ATOM C CB . GLU A 0 113 . 113 GLU A CB nan 1.0 ? 0.044542655 0.26750195 0.58081365 1 938 -ATOM C CG . GLU A 0 113 . 113 GLU A CG nan 1.0 ? 0.97669 0.8458785 0.93806744 1 939 -ATOM C CD . GLU A 0 113 . 113 GLU A CD nan 1.0 ? 0.5998821 0.8890383 0.111453146 1 940 -ATOM O OE1 . GLU A 0 113 . 113 GLU A OE1 nan 1.0 ? 0.06502544 0.2653738 0.08254236 1 941 -ATOM O OE2 . GLU A 0 113 . 113 GLU A OE2 nan 1.0 ? 0.26421124 0.4449239 0.19773184 1 942 -ATOM N N . LYS A 0 114 . 114 LYS A N nan 1.0 ? 0.9680387 0.6676914 0.71309793 1 943 -ATOM C CA . LYS A 0 114 . 114 LYS A CA nan 1.0 ? 0.032824505 0.16335182 0.52796227 1 944 -ATOM C C . LYS A 0 114 . 114 LYS A C nan 1.0 ? 0.92581147 0.90816975 0.39478892 1 945 -ATOM O O . LYS A 0 114 . 114 LYS A O nan 1.0 ? 0.63184005 0.06583369 0.9424108 1 946 -ATOM C CB . LYS A 0 114 . 114 LYS A CB nan 1.0 ? 0.7504448 0.48134002 0.52438486 1 947 -ATOM C CG . LYS A 0 114 . 114 LYS A CG nan 1.0 ? 0.6508142 0.89798635 0.049591213 1 948 -ATOM C CD . LYS A 0 114 . 114 LYS A CD nan 1.0 ? 0.23008414 0.09205648 0.8231479 1 949 -ATOM C CE . LYS A 0 114 . 114 LYS A CE nan 1.0 ? 0.20918123 0.82801837 0.18159077 1 950 -ATOM N NZ . LYS A 0 114 . 114 LYS A NZ nan 1.0 +1 0.5841841 0.26748314 0.85012954 1 951 -ATOM N N . LEU A 0 115 . 115 LEU A N nan 1.0 ? 0.3236717 0.24193309 0.7755443 1 952 -ATOM C CA . LEU A 0 115 . 115 LEU A CA nan 1.0 ? 0.64392996 0.39626768 0.6716873 1 953 -ATOM C C . LEU A 0 115 . 115 LEU A C nan 1.0 ? 0.58449954 0.77926457 0.51667184 1 954 -ATOM O O . LEU A 0 115 . 115 LEU A O nan 1.0 ? 0.16921513 0.09739585 0.025549032 1 955 -ATOM C CB . LEU A 0 115 . 115 LEU A CB nan 1.0 ? 0.3181686 0.4209753 0.4084844 1 956 -ATOM C CG . LEU A 0 115 . 115 LEU A CG nan 1.0 ? 0.3736753 0.41677505 0.55138564 1 957 -ATOM C CD1 . LEU A 0 115 . 115 LEU A CD1 nan 1.0 ? 0.8308975 0.96991014 0.965675 1 958 -ATOM C CD2 . LEU A 0 115 . 115 LEU A CD2 nan 1.0 ? 0.9683514 0.81155926 0.12926595 1 959 -ATOM N N . PHE A 0 116 . 116 PHE A N nan 1.0 ? 0.8837471 0.14250423 0.32235634 1 960 -ATOM C CA . PHE A 0 116 . 116 PHE A CA nan 1.0 ? 0.34069827 0.087347925 0.102971464 1 961 -ATOM C C . PHE A 0 116 . 116 PHE A C nan 1.0 ? 0.88482034 0.74152476 0.9269814 1 962 -ATOM O O . PHE A 0 116 . 116 PHE A O nan 1.0 ? 0.6539567 0.96282667 0.74191207 1 963 -ATOM C CB . PHE A 0 116 . 116 PHE A CB nan 1.0 ? 0.66189367 0.88438874 0.39179748 1 964 -ATOM C CG . PHE A 0 116 . 116 PHE A CG nan 1.0 ? 0.8411586 0.47754052 0.21432933 1 965 -ATOM C CD1 . PHE A 0 116 . 116 PHE A CD1 nan 1.0 ? 0.08152615 0.6543563 0.00431035 1 966 -ATOM C CD2 . PHE A 0 116 . 116 PHE A CD2 nan 1.0 ? 0.93021846 0.8738612 0.9806605 1 967 -ATOM C CE1 . PHE A 0 116 . 116 PHE A CE1 nan 1.0 ? 0.17148478 0.26169005 0.19636543 1 968 -ATOM C CE2 . PHE A 0 116 . 116 PHE A CE2 nan 1.0 ? 0.629412 0.97846717 0.24657154 1 969 -ATOM C CZ . PHE A 0 116 . 116 PHE A CZ nan 1.0 ? 0.45338464 0.9082083 0.63669986 1 970 -ATOM N N . LEU A 0 117 . 117 LEU A N nan 1.0 ? 0.9696568 0.4350206 0.25388438 1 971 -ATOM C CA . LEU A 0 117 . 117 LEU A CA nan 1.0 ? 0.8722499 0.029059451 0.43447855 1 972 -ATOM C C . LEU A 0 117 . 117 LEU A C nan 1.0 ? 0.73025715 0.8174074 0.20994641 1 973 -ATOM O O . LEU A 0 117 . 117 LEU A O nan 1.0 ? 0.955623 0.49907967 0.3463176 1 974 -ATOM C CB . LEU A 0 117 . 117 LEU A CB nan 1.0 ? 0.7451569 0.08559912 0.6823587 1 975 -ATOM C CG . LEU A 0 117 . 117 LEU A CG nan 1.0 ? 0.19589597 0.030155418 0.15320906 1 976 -ATOM C CD1 . LEU A 0 117 . 117 LEU A CD1 nan 1.0 ? 0.008291881 0.76370794 0.03393707 1 977 -ATOM C CD2 . LEU A 0 117 . 117 LEU A CD2 nan 1.0 ? 0.17907272 0.3797508 0.4211984 1 978 -ATOM N N . GLN A 0 118 . 118 GLN A N nan 1.0 ? 0.046861503 0.010911842 0.85469896 1 979 -ATOM C CA . GLN A 0 118 . 118 GLN A CA nan 1.0 ? 0.5098985 0.6379383 0.47458547 1 980 -ATOM C C . GLN A 0 118 . 118 GLN A C nan 1.0 ? 0.77979064 0.93773556 0.97952616 1 981 -ATOM O O . GLN A 0 118 . 118 GLN A O nan 1.0 ? 0.86658233 0.15165746 0.7086969 1 982 -ATOM C CB . GLN A 0 118 . 118 GLN A CB nan 1.0 ? 0.95194423 0.25400692 0.41646817 1 983 -ATOM C CG . GLN A 0 118 . 118 GLN A CG nan 1.0 ? 0.4484113 0.4939464 0.92646015 1 984 -ATOM C CD . GLN A 0 118 . 118 GLN A CD nan 1.0 ? 0.28348714 0.9638948 0.043153863 1 985 -ATOM O OE1 . GLN A 0 118 . 118 GLN A OE1 nan 1.0 ? 0.57116145 0.30513847 0.2296494 1 986 -ATOM N NE2 . GLN A 0 118 . 118 GLN A NE2 nan 1.0 ? 0.20016162 0.20242332 0.6694807 1 987 -ATOM N N . LYS A 0 119 . 119 LYS A N nan 1.0 ? 0.14860949 0.052367985 0.41832152 1 988 -ATOM C CA . LYS A 0 119 . 119 LYS A CA nan 1.0 ? 0.5667213 0.6907874 0.94889134 1 989 -ATOM C C . LYS A 0 119 . 119 LYS A C nan 1.0 ? 0.6720985 0.65007746 0.80502003 1 990 -ATOM O O . LYS A 0 119 . 119 LYS A O nan 1.0 ? 0.6369674 0.029042391 0.27068248 1 991 -ATOM C CB . LYS A 0 119 . 119 LYS A CB nan 1.0 ? 0.9653793 0.83939815 0.61836445 1 992 -ATOM C CG . LYS A 0 119 . 119 LYS A CG nan 1.0 ? 0.18387131 0.26211956 0.02127318 1 993 -ATOM C CD . LYS A 0 119 . 119 LYS A CD nan 1.0 ? 0.20469144 0.042404667 0.76081836 1 994 -ATOM C CE . LYS A 0 119 . 119 LYS A CE nan 1.0 ? 0.3381764 0.5110821 0.8096168 1 995 -ATOM N NZ . LYS A 0 119 . 119 LYS A NZ nan 1.0 +1 0.28837517 0.19217949 0.80289304 1 996 -ATOM N N . ILE A 0 120 . 120 ILE A N nan 1.0 ? 0.95320797 0.5268233 0.6600998 1 997 -ATOM C CA . ILE A 0 120 . 120 ILE A CA nan 1.0 ? 0.12084283 0.42158344 0.9419183 1 998 -ATOM C C . ILE A 0 120 . 120 ILE A C nan 1.0 ? 0.106111206 0.634359 0.17812678 1 999 -ATOM O O . ILE A 0 120 . 120 ILE A O nan 1.0 ? 0.5166289 0.06864275 0.6635294 1 1000 -ATOM C CB . ILE A 0 120 . 120 ILE A CB nan 1.0 ? 0.97081035 0.010491109 0.84198767 1 1001 -ATOM C CG1 . ILE A 0 120 . 120 ILE A CG1 nan 1.0 ? 0.8268036 0.46921903 0.35572213 1 1002 -ATOM C CG2 . ILE A 0 120 . 120 ILE A CG2 nan 1.0 ? 0.012353688 0.10832769 0.89786124 1 1003 -ATOM C CD1 . ILE A 0 120 . 120 ILE A CD1 nan 1.0 ? 0.77783203 0.97666883 0.9276842 1 1004 -ATOM N N . ASN A 0 121 . 121 ASN A N nan 1.0 ? 0.81345636 0.95326364 0.8725859 1 1005 -ATOM C CA . ASN A 0 121 . 121 ASN A CA nan 1.0 ? 0.24583806 0.99884087 0.47420847 1 1006 -ATOM C C . ASN A 0 121 . 121 ASN A C nan 1.0 ? 0.7874865 0.084816635 0.73819786 1 1007 -ATOM O O . ASN A 0 121 . 121 ASN A O nan 1.0 ? 0.06523057 0.9138201 0.46733242 1 1008 -ATOM C CB . ASN A 0 121 . 121 ASN A CB nan 1.0 ? 0.94438875 0.21456182 0.34655395 1 1009 -ATOM C CG . ASN A 0 121 . 121 ASN A CG nan 1.0 ? 0.15159485 0.6782438 0.2574997 1 1010 -ATOM O OD1 . ASN A 0 121 . 121 ASN A OD1 nan 1.0 ? 0.09851511 0.38130385 0.1096607 1 1011 -ATOM N ND2 . ASN A 0 121 . 121 ASN A ND2 nan 1.0 ? 0.74039173 0.58928657 0.898543 1 1012 -ATOM N N . GLU A 0 122 . 122 GLU A N nan 1.0 ? 0.03273668 0.6911086 0.9707743 1 1013 -ATOM C CA . GLU A 0 122 . 122 GLU A CA nan 1.0 ? 0.9491308 0.6341753 0.19717425 1 1014 -ATOM C C . GLU A 0 122 . 122 GLU A C nan 1.0 ? 0.4251164 0.5270997 0.42800397 1 1015 -ATOM O O . GLU A 0 122 . 122 GLU A O nan 1.0 ? 0.80767673 0.6561479 0.9014704 1 1016 -ATOM C CB . GLU A 0 122 . 122 GLU A CB nan 1.0 ? 0.22522202 0.65284485 0.9207706 1 1017 -ATOM C CG . GLU A 0 122 . 122 GLU A CG nan 1.0 ? 0.22248828 0.7487657 0.7280712 1 1018 -ATOM C CD . GLU A 0 122 . 122 GLU A CD nan 1.0 ? 0.81204987 0.7839215 0.77615577 1 1019 -ATOM O OE1 . GLU A 0 122 . 122 GLU A OE1 nan 1.0 ? 0.6092276 0.7658469 0.865169 1 1020 -ATOM O OE2 . GLU A 0 122 . 122 GLU A OE2 nan 1.0 ? 0.7827327 0.49897248 0.9792516 1 1021 -ATOM N N . LEU A 0 123 . 123 LEU A N nan 1.0 ? 0.13457444 0.16209134 0.91318136 1 1022 -ATOM C CA . LEU A 0 123 . 123 LEU A CA nan 1.0 ? 0.677991 0.78204757 0.9197261 1 1023 -ATOM C C . LEU A 0 123 . 123 LEU A C nan 1.0 ? 0.19941041 0.82423294 0.25507912 1 1024 -ATOM O O . LEU A 0 123 . 123 LEU A O nan 1.0 ? 0.3720848 0.3331037 0.27511615 1 1025 -ATOM C CB . LEU A 0 123 . 123 LEU A CB nan 1.0 ? 0.7525258 0.3728915 0.7637492 1 1026 -ATOM C CG . LEU A 0 123 . 123 LEU A CG nan 1.0 ? 0.29882905 0.05173779 0.0439342 1 1027 -ATOM C CD1 . LEU A 0 123 . 123 LEU A CD1 nan 1.0 ? 0.6899518 0.8064701 0.38444003 1 1028 -ATOM C CD2 . LEU A 0 123 . 123 LEU A CD2 nan 1.0 ? 0.10662969 0.4821648 0.31718495 1 1029 -ATOM N N . PRO A 0 124 . 124 PRO A N nan 1.0 ? 0.3169119 0.327272 0.42326245 1 1030 -ATOM C CA . PRO A 0 124 . 124 PRO A CA nan 1.0 ? 0.96015435 0.24555796 0.7024596 1 1031 -ATOM C C . PRO A 0 124 . 124 PRO A C nan 1.0 ? 0.6453217 0.1476699 0.16289118 1 1032 -ATOM O O . PRO A 0 124 . 124 PRO A O nan 1.0 ? 0.32772857 0.22203274 0.57844895 1 1033 -ATOM C CB . PRO A 0 124 . 124 PRO A CB nan 1.0 ? 0.08759258 0.36361325 0.44662827 1 1034 -ATOM C CG . PRO A 0 124 . 124 PRO A CG nan 1.0 ? 0.009579336 0.98298246 0.58250123 1 1035 -ATOM C CD . PRO A 0 124 . 124 PRO A CD nan 1.0 ? 0.5418588 0.02040022 0.35424072 1 1036 -ATOM N N . THR A 0 125 . 125 THR A N nan 1.0 ? 0.5239172 0.22606686 0.22434096 1 1037 -ATOM C CA . THR A 0 125 . 125 THR A CA nan 1.0 ? 0.9488097 0.53730005 0.09206712 1 1038 -ATOM C C . THR A 0 125 . 125 THR A C nan 1.0 ? 0.508952 0.68989116 0.90306765 1 1039 -ATOM O O . THR A 0 125 . 125 THR A O nan 1.0 ? 0.21074198 0.22754112 0.5735981 1 1040 -ATOM C CB . THR A 0 125 . 125 THR A CB nan 1.0 ? 0.5931955 0.74996895 0.3139638 1 1041 -ATOM O OG1 . THR A 0 125 . 125 THR A OG1 nan 1.0 ? 0.9217849 0.043773532 0.25526097 1 1042 -ATOM C CG2 . THR A 0 125 . 125 THR A CG2 nan 1.0 ? 0.96653956 0.09352469 0.8391434 1 1043 -ATOM N N . GLU A 0 126 . 126 GLU A N nan 1.0 ? 0.30930224 0.5087493 0.64281833 1 1044 -ATOM C CA . GLU A 0 126 . 126 GLU A CA nan 1.0 ? 0.7905767 0.32585153 0.21313262 1 1045 -ATOM C C . GLU A 0 126 . 126 GLU A C nan 1.0 ? 0.37769938 0.8101517 0.097206734 1 1046 -ATOM O O . GLU A 0 126 . 126 GLU A O nan 1.0 ? 0.16205078 0.15629725 0.034601092 1 1047 -ATOM C CB . GLU A 0 126 . 126 GLU A CB nan 1.0 ? 0.46585545 0.014806096 0.17301455 1 1048 -ATOM C CG . GLU A 0 126 . 126 GLU A CG nan 1.0 ? 0.5045662 0.63840914 0.15377629 1 1049 -ATOM C CD . GLU A 0 126 . 126 GLU A CD nan 1.0 ? 0.3357075 0.67247415 0.33118773 1 1050 -ATOM O OE1 . GLU A 0 126 . 126 GLU A OE1 nan 1.0 ? 0.8540735 0.15908 0.5148767 1 1051 -ATOM O OE2 . GLU A 0 126 . 126 GLU A OE2 nan 1.0 ? 0.5246732 0.82088584 0.55631673 1 1052 -ATOM N N . GLU A 0 127 . 127 GLU A N nan 1.0 ? 0.95315677 0.6910998 0.51060915 1 1053 -ATOM C CA . GLU A 0 127 . 127 GLU A CA nan 1.0 ? 0.57447445 0.14421177 0.4188694 1 1054 -ATOM C C . GLU A 0 127 . 127 GLU A C nan 1.0 ? 0.7673678 0.7740877 0.6825384 1 1055 -ATOM O O . GLU A 0 127 . 127 GLU A O nan 1.0 ? 0.8869485 0.38973004 0.01908293 1 1056 -ATOM C CB . GLU A 0 127 . 127 GLU A CB nan 1.0 ? 0.4528859 0.42768326 0.35228238 1 1057 -ATOM C CG . GLU A 0 127 . 127 GLU A CG nan 1.0 ? 0.4911798 0.42433998 0.9941097 1 1058 -ATOM C CD . GLU A 0 127 . 127 GLU A CD nan 1.0 ? 0.15267718 0.98181033 0.68008417 1 1059 -ATOM O OE1 . GLU A 0 127 . 127 GLU A OE1 nan 1.0 ? 0.9667511 0.30261344 0.8414994 1 1060 -ATOM O OE2 . GLU A 0 127 . 127 GLU A OE2 nan 1.0 ? 0.07485981 0.846001 0.004430565 1 1061 -ATOM O OXT . GLU A 0 127 . 127 GLU A OXT nan 1.0 ? 0.09164561 0.79403996 0.14932832 1 1062 -HETATM N N0 . L:C C 1 0 . 0 L:C C N0 nan 1.0 ? 0.6779108 0.097593606 0.013153834 1 1063 -HETATM C C0 . L:C C 1 0 . 0 L:C C C0 nan 1.0 ? 0.87399286 0.55288184 0.82117254 1 1064 -HETATM C C1 . L:C C 1 0 . 0 L:C C C1 nan 1.0 ? 0.2608887 0.28173625 0.92557836 1 1065 -HETATM C C2 . L:C C 1 0 . 0 L:C C C2 nan 1.0 ? 0.37387213 0.29552087 0.82152706 1 1066 -HETATM C C3 . L:C C 1 0 . 0 L:C C C3 nan 1.0 ? 0.17472608 0.14588757 0.15526193 1 1067 -HETATM N N1 . L:C C 1 0 . 0 L:C C N1 nan 1.0 ? 0.00022697571 0.96132517 0.20149735 1 1068 -HETATM N N2 . L:C C 1 0 . 0 L:C C N2 nan 1.0 ? 0.21457845 0.7270461 0.17680416 1 1069 -HETATM C C4 . L:C C 1 0 . 0 L:C C C4 nan 1.0 ? 0.7916443 0.39739743 0.66697997 1 1070 -HETATM C C5 . L:C C 1 0 . 0 L:C C C5 nan 1.0 ? 0.10334229 0.39774475 0.31259292 1 1071 -HETATM C C6 . L:C C 1 0 . 0 L:C C C6 nan 1.0 ? 0.81873995 0.43425742 0.65476316 1 1072 -HETATM C C7 . L:C C 1 0 . 0 L:C C C7 nan 1.0 ? 0.4950341 0.20785952 0.9252764 1 1073 -HETATM O O0 . L:C C 1 0 . 0 L:C C O0 nan 1.0 ? 0.23731546 0.36359096 0.14183004 1 1074 -HETATM C C8 . L:C C 1 0 . 0 L:C C C8 nan 1.0 ? 0.22478656 0.9013709 0.7784237 1 1075 -HETATM C C9 . L:C C 1 0 . 0 L:C C C9 nan 1.0 ? 0.9818587 0.4461664 0.97970617 1 1076 -HETATM C C10 . L:C C 1 0 . 0 L:C C C10 nan 1.0 ? 0.60635346 0.9849196 0.15617305 1 1077 -HETATM C C11 . L:C C 1 0 . 0 L:C C C11 nan 1.0 ? 0.98551613 0.5350776 0.1325653 1 1078 -HETATM C C12 . L:C C 1 0 . 0 L:C C C12 nan 1.0 ? 0.027748102 0.3532572 0.5370831 1 1079 -HETATM N N3 . L:C C 1 0 . 0 L:C C N3 nan 1.0 ? 0.81568825 0.06922907 0.5874546 1 1080 -HETATM C C13 . L:C C 1 0 . 0 L:C C C13 nan 1.0 ? 0.8122118 0.18374796 0.8244068 1 1081 -HETATM C C14 . L:C C 1 0 . 0 L:C C C14 nan 1.0 ? 0.15915029 0.9341084 0.5907481 1 1082 -HETATM C C15 . L:C C 1 0 . 0 L:C C C15 nan 1.0 ? 0.07072062 0.82767224 0.95903784 1 1083 -HETATM C C16 . L:C C 1 0 . 0 L:C C C16 nan 1.0 ? 0.20756003 0.66874975 0.7148525 1 1084 -HETATM C C17 . L:C C 1 0 . 0 L:C C C17 nan 1.0 ? 0.59139943 0.9651974 0.5751469 1 1085 -HETATM C C18 . L:C C 1 0 . 0 L:C C C18 nan 1.0 ? 0.015804116 0.016961012 0.9193699 1 1086 -HETATM C C19 . L:C C 1 0 . 0 L:C C C19 nan 1.0 ? 0.92409486 0.43305245 0.30597737 1 1087 -HETATM C C20 . L:C C 1 0 . 0 L:C C C20 nan 1.0 ? 0.8709819 0.1722904 0.25860667 1 1088 -HETATM N N4 . L:C C 1 0 . 0 L:C C N4 nan 1.0 ? 0.30529135 0.97226954 0.9569942 1 1089 -HETATM C C21 . L:C C 1 0 . 0 L:C C C21 nan 1.0 ? 0.93467075 0.41001067 0.3619657 1 1090 -HETATM O O1 . L:C C 1 0 . 0 L:C C O1 nan 1.0 ? 0.18081121 0.37892684 0.61745226 1 1091 -HETATM N N5 . L:C C 1 0 . 0 L:C C N5 nan 1.0 ? 0.47732008 0.14274032 0.34654078 1 1092 -HETATM C C22 . L:C C 1 0 . 0 L:C C C22 nan 1.0 ? 0.2874254 0.2196572 0.75326854 1 1093 -HETATM C C23 . L:C C 1 0 . 0 L:C C C23 nan 1.0 ? 0.7412466 0.74047816 0.086026534 1 1094 -HETATM C C24 . L:C C 1 0 . 0 L:C C C24 nan 1.0 ? 0.5974513 0.21582703 0.47175294 1 1095 -HETATM C C25 . L:C C 1 0 . 0 L:C C C25 nan 1.0 ? 0.14591278 0.1852946 0.541326 1 1096 -HETATM N N6 . L:C C 1 0 . 0 L:C C N6 nan 1.0 ? 0.97777265 0.55019534 0.7023834 1 1097 -HETATM C C26 . L:C C 1 0 . 0 L:C C C26 nan 1.0 ? 0.9935879 0.7761831 0.4851575 1 1098 -HETATM C C27 . L:C C 1 0 . 0 L:C C C27 nan 1.0 ? 0.944666 0.36344713 0.87046605 1 1099 -HETATM C C28 . L:C C 1 0 . 0 L:C C C28 nan 1.0 ? 0.2522406 0.28575218 0.92085594 1 1100 -HETATM C C29 . L:C C 1 0 . 0 L:C C C29 nan 1.0 ? 0.36873662 0.4113042 0.18834436 1 1101 -HETATM C C30 . L:C C 1 0 . 0 L:C C C30 nan 1.0 ? 0.9822055 0.6236254 0.71317333 1 1102 -# diff --git a/rf2aa/tests/data/test_sdf.sdf b/rf2aa/tests/data/test_sdf.sdf deleted file mode 100644 index 632123e..0000000 --- a/rf2aa/tests/data/test_sdf.sdf +++ /dev/null @@ -1,183 +0,0 @@ -asd - OpenBabel10022416543D - - 86 91 0 0 1 0 0 0 0 0999 V2000 - 0.0000 0.0000 0.0000 Ru 0 0 0 0 0 0 0 0 0 0 0 0 - 1.9811 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.8984 0.0000 0.9976 N 0 3 0 0 0 4 0 0 0 0 0 0 - 4.2283 -0.4311 0.5225 C 0 0 1 0 0 0 0 0 0 0 0 0 - 4.1294 -0.1510 -0.9789 C 0 0 2 0 0 0 0 0 0 0 0 0 - 2.6689 -0.1856 -1.1704 N 0 0 0 0 0 0 0 0 0 0 0 0 - 4.5104 0.8502 -1.2196 H 0 0 0 0 0 0 0 0 0 0 0 0 - 4.3148 -1.5116 0.6738 H 0 0 0 0 0 0 0 0 0 0 0 0 - 2.0926 0.0177 -2.4711 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.0551 1.3215 -3.0085 C 0 0 0 0 0 0 0 0 0 0 0 0 - 1.4395 1.5092 -4.2457 C 0 0 0 0 0 0 0 0 0 0 0 0 - 0.8688 0.4467 -4.9577 C 0 0 0 0 0 0 0 0 0 0 0 0 - 0.9728 -0.8382 -4.4242 C 0 0 0 0 0 0 0 0 0 0 0 0 - 1.5980 -1.0817 -3.1942 C 0 0 0 0 0 0 0 0 0 0 0 0 - 1.3946 2.5134 -4.6607 H 0 0 0 0 0 0 0 0 0 0 0 0 - 0.5711 -1.6818 -4.9808 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1.7839 -2.5051 -2.7336 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.6142 -2.9688 -3.2824 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1.9963 -2.5662 -1.6680 H 0 0 0 0 0 0 0 0 0 0 0 0 - 0.8879 -3.0998 -2.9345 H 0 0 0 0 0 0 0 0 0 0 0 0 - 2.6908 2.4918 -2.3015 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.2457 3.4308 -2.6402 H 0 0 0 0 0 0 0 0 0 0 0 0 - 3.7660 2.5403 -2.5179 H 0 0 0 0 0 0 0 0 0 0 0 0 - 2.5630 2.4326 -1.2198 H 0 0 0 0 0 0 0 0 0 0 0 0 - 0.1546 0.6923 -6.2641 C 0 0 0 0 0 0 0 0 0 0 0 0 - 0.0871 -0.2224 -6.8610 H 0 0 0 0 0 0 0 0 0 0 0 0 - 0.6644 1.4570 -6.8598 H 0 0 0 0 0 0 0 0 0 0 0 0 - -0.8688 1.0481 -6.0884 H 0 0 0 0 0 0 0 0 0 0 0 0 - 2.6482 0.2211 2.3858 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.4836 1.5443 2.8329 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.2054 1.7521 4.1866 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.0596 2.7690 4.5422 H 0 0 0 0 0 0 0 0 0 0 0 0 - 2.2790 -0.6127 4.6093 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.5615 -0.8708 3.2641 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.1903 -1.4511 5.2957 H 0 0 0 0 0 0 0 0 0 0 0 0 - 2.0965 0.6881 5.0892 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.7358 -2.2864 2.7757 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.3651 -2.9970 3.5199 H 0 0 0 0 0 0 0 0 0 0 0 0 - 3.7928 -2.5228 2.5978 H 0 0 0 0 0 0 0 0 0 0 0 0 - 2.1924 -2.4512 1.8403 H 0 0 0 0 0 0 0 0 0 0 0 0 - 3.4096 2.6945 1.2403 H 0 0 0 0 0 0 0 0 0 0 0 0 - 2.5155 2.7062 1.8725 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.4962 3.6544 2.4168 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1.6520 2.6776 1.1976 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1.8132 0.9407 6.5497 C 0 0 0 0 0 0 0 0 0 0 0 0 - 1.1857 1.8271 6.6864 H 0 0 0 0 0 0 0 0 0 0 0 0 - 2.7445 1.1113 7.1053 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1.3095 0.0856 7.0112 H 0 0 0 0 0 0 0 0 0 0 0 0 - -0.3034 2.2463 -0.7911 Cl 0 0 0 0 0 0 0 0 0 0 0 0 - -0.0311 -2.4021 0.1684 Cl 0 0 0 0 0 0 0 0 0 0 0 0 - -0.3847 0.3239 1.7667 C 0 0 0 0 0 3 0 0 0 0 0 0 - 0.3401 0.5495 2.5448 H 0 0 0 0 0 0 0 0 0 0 0 0 - -1.7569 0.2734 2.2121 C 0 0 0 0 0 0 0 0 0 0 0 0 - -2.7733 0.0216 1.2582 C 0 0 0 0 0 0 0 0 0 0 0 0 - -4.1105 -0.0598 1.6390 C 0 0 0 0 0 0 0 0 0 0 0 0 - -4.4360 0.1136 2.9889 C 0 0 0 0 0 0 0 0 0 0 0 0 - -3.4526 0.3669 3.9517 C 0 0 0 0 0 0 0 0 0 0 0 0 - -2.1198 0.4461 3.5620 C 0 0 0 0 0 0 0 0 0 0 0 0 - -1.3353 0.6371 4.2895 H 0 0 0 0 0 0 0 0 0 0 0 0 - -3.7295 0.4995 4.9924 H 0 0 0 0 0 0 0 0 0 0 0 0 - -5.4785 0.0501 3.2858 H 0 0 0 0 0 0 0 0 0 0 0 0 - -4.8949 -0.2514 0.9184 H 0 0 0 0 0 0 0 0 0 0 0 0 - -2.2963 -0.1404 -0.0112 O 0 0 0 0 0 0 0 0 0 0 0 0 - 5.5671 -0.2126 2.2083 H 0 0 0 0 0 0 0 0 0 0 0 0 - 5.1577 1.3111 1.4179 H 0 0 0 0 0 0 0 0 0 0 0 0 - 6.6571 0.2637 0.4773 N 0 0 0 0 0 0 0 0 0 0 0 0 - 5.3944 0.2611 1.2403 C 0 0 0 0 0 0 0 0 0 0 0 0 - 6.8286 1.1116 -0.0564 H 0 0 0 0 0 0 0 0 0 0 0 0 - 7.1294 -1.0762 -0.3820 S 0 0 0 0 0 0 0 0 0 0 0 0 - 6.3097 -0.9086 -1.8483 N 0 0 0 0 0 0 0 0 0 0 0 0 - 8.5424 -0.9362 -0.7279 O 0 0 0 0 0 0 0 0 0 0 0 0 - 6.6677 -2.2360 0.3905 O 0 0 0 0 0 0 0 0 0 0 0 0 - 4.5395 -1.0515 -2.8887 H 0 0 0 0 0 0 0 0 0 0 0 0 - 4.6166 -2.1893 -1.5280 H 0 0 0 0 0 0 0 0 0 0 0 0 - 4.8612 -1.1705 -1.8517 C 0 0 0 0 0 0 0 0 0 0 0 0 - 6.8079 -1.4416 -2.5601 H 0 0 0 0 0 0 0 0 0 0 0 0 - -3.2077 -0.2475 -1.1550 C 0 0 0 0 0 0 0 0 0 0 0 0 - -3.9743 -0.9748 -0.8710 H 0 0 0 0 0 0 0 0 0 0 0 0 - -2.3957 -0.8002 -2.3143 C 0 0 0 0 0 0 0 0 0 0 0 0 - -1.6290 -0.0874 -2.6345 H 0 0 0 0 0 0 0 0 0 0 0 0 - -1.9096 -1.7391 -2.0383 H 0 0 0 0 0 0 0 0 0 0 0 0 - -3.0669 -0.9839 -3.1595 H 0 0 0 0 0 0 0 0 0 0 0 0 - -3.8219 1.1126 -1.4561 C 0 0 0 0 0 0 0 0 0 0 0 0 - -4.5239 1.0169 -2.2909 H 0 0 0 0 0 0 0 0 0 0 0 0 - -4.3670 1.5131 -0.5970 H 0 0 0 0 0 0 0 0 0 0 0 0 - -3.0331 1.8184 -1.7297 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1 50 1 0 0 0 0 - 1 51 1 0 0 0 0 - 2 1 1 0 0 0 0 - 2 3 2 0 0 0 0 - 3 29 1 0 0 0 0 - 4 8 1 1 0 0 0 - 4 3 1 0 0 0 0 - 4 67 1 0 0 0 0 - 5 7 1 6 0 0 0 - 5 4 1 0 0 0 0 - 6 5 1 0 0 0 0 - 6 2 1 0 0 0 0 - 9 6 1 0 0 0 0 - 10 9 2 0 0 0 0 - 10 21 1 0 0 0 0 - 11 10 1 0 0 0 0 - 12 13 1 0 0 0 0 - 12 11 2 0 0 0 0 - 13 14 2 0 0 0 0 - 14 17 1 0 0 0 0 - 14 9 1 0 0 0 0 - 15 11 1 0 0 0 0 - 16 13 1 0 0 0 0 - 17 19 1 0 0 0 0 - 18 17 1 0 0 0 0 - 20 17 1 0 0 0 0 - 21 24 1 0 0 0 0 - 22 21 1 0 0 0 0 - 23 21 1 0 0 0 0 - 25 28 1 0 0 0 0 - 25 12 1 0 0 0 0 - 26 25 1 0 0 0 0 - 27 25 1 0 0 0 0 - 29 30 2 0 0 0 0 - 29 34 1 0 0 0 0 - 30 31 1 0 0 0 0 - 31 32 1 0 0 0 0 - 31 36 2 0 0 0 0 - 33 36 1 0 0 0 0 - 33 35 1 0 0 0 0 - 34 33 2 0 0 0 0 - 36 45 1 0 0 0 0 - 37 34 1 0 0 0 0 - 37 38 1 0 0 0 0 - 39 37 1 0 0 0 0 - 40 37 1 0 0 0 0 - 41 42 1 0 0 0 0 - 42 43 1 0 0 0 0 - 42 30 1 0 0 0 0 - 44 42 1 0 0 0 0 - 45 46 1 0 0 0 0 - 45 48 1 0 0 0 0 - 45 47 1 0 0 0 0 - 49 1 1 0 0 0 0 - 51 53 1 0 0 0 0 - 51 52 1 0 0 0 0 - 53 58 2 0 0 0 0 - 54 55 2 0 0 0 0 - 54 53 1 0 0 0 0 - 55 56 1 0 0 0 0 - 56 61 1 0 0 0 0 - 56 57 2 0 0 0 0 - 57 60 1 0 0 0 0 - 58 57 1 0 0 0 0 - 58 59 1 0 0 0 0 - 62 55 1 0 0 0 0 - 63 54 1 0 0 0 0 - 63 1 1 0 0 0 0 - 66 67 1 0 0 0 0 - 67 65 1 0 0 0 0 - 67 64 1 0 0 0 0 - 68 66 1 0 0 0 0 - 69 72 2 0 0 0 0 - 69 66 1 0 0 0 0 - 70 69 1 0 0 0 0 - 71 69 2 0 0 0 0 - 73 75 1 0 0 0 0 - 75 70 1 0 0 0 0 - 75 74 1 0 0 0 0 - 75 5 1 0 0 0 0 - 76 70 1 0 0 0 0 - 77 78 1 0 0 0 0 - 77 63 1 0 0 0 0 - 79 81 1 0 0 0 0 - 79 77 1 0 0 0 0 - 80 79 1 0 0 0 0 - 82 79 1 0 0 0 0 - 83 77 1 0 0 0 0 - 83 85 1 0 0 0 0 - 84 83 1 0 0 0 0 - 86 83 1 0 0 0 0 -M END -$$$$ diff --git a/rf2aa/validate.py b/rf2aa/validate.py deleted file mode 100644 index 4810a14..0000000 --- a/rf2aa/validate.py +++ /dev/null @@ -1,144 +0,0 @@ -import os -from functools import partial - -import hydra -import pandas as pd -import torch -import torch.multiprocessing as mp -from omegaconf import OmegaConf -from torch.nn.parallel import DistributedDataParallel as DDP - -from rf2aa.chemical import initialize_chemdata -from rf2aa.data.compose_dataset import compose_posebusters -from rf2aa.data.dataloader_adaptor import get_loss_calc_items -from rf2aa.trainer_new import trainer_factory -from rf2aa.util import writepdb - - -class PoseBustersBenchmark: - def __init__(self, config): - # config file logic for validation, low->high prio: - # 1) use default parameters in config/train/base.yml - # 2) use parameters saved in model - # 3) use specific params in config/inference - default_config_path = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "config/train/base.yaml" - ) - base_config = OmegaConf.load(default_config_path) - tmp_data = torch.load(config.eval_params.checkpoint_path, mmap=True) - if "training_config" in tmp_data: - train_config = tmp_data["training_config"] - self.config = OmegaConf.merge(base_config, train_config, config) - else: - self.config = OmegaConf.merge(base_config, config) - tmp_data = None - - assert self.config.ddp_params.batch_size == 1, "batch size is assumed to be 1" - if self.config.experiment.output_dir is not None: - self.output_dir = self.config.experiment.output_dir - else: - self.output_dir = "output/" - if not os.path.exists(self.output_dir): - os.makedirs(self.output_dir) - - self.trainer = trainer_factory[self.config.experiment.trainer]( - config=self.config - ) - - def construct_dataset(self, rank, world_size): - # fd initialize chemical data based on input arguments - # this needs to be initialized first - init = partial(initialize_chemdata, self.config) - init() - - return compose_posebusters(init, self.config.loader_params, rank, world_size) - - def launch_distributed_eval(self): - world_size = torch.cuda.device_count() - if "MASTER_ADDR" not in os.environ: - os.environ["MASTER_ADDR"] = ( - "127.0.0.1" # multinode requires this set in submit script - ) - if "MASTER_PORT" not in os.environ: - os.environ["MASTER_PORT"] = "%d" % self.config.ddp_params.port - - world_size = torch.cuda.device_count() - - if world_size == 0: - print("Error! No GPUs found!") - elif world_size == 1: - # No need for multiple processes with 1 GPU - self.evaluate_model(0, world_size) - else: - mp.spawn( - self.evaluate_model, args=(world_size,), nprocs=world_size, join=True - ) - - def evaluate_model(self, rank, world_size): - gpu = self.trainer.init_process_group(rank, world_size) - benchmark_loader = self.construct_dataset(rank, world_size) - - # move global information to device - self.trainer.move_constants_to_device(gpu) - - self.trainer.construct_model(device=gpu) - self.trainer.model = DDP( - self.trainer.model, - device_ids=[gpu], - find_unused_parameters=False, - broadcast_buffers=False, - ) - - self.trainer.load_checkpoint(rank) - self.trainer.load_model() - self.trainer.model.eval() - records = [] - for inputs in benchmark_loader: - item = inputs[-1] - with torch.no_grad(): - loss, loss_dict, outputs = self.trainer.train_step( - inputs, - self.config.loader_params.maxcycle, - nograds=True, - return_outputs=True, - ) - loss_dict["CHAINID"] = item["CHAINID"][0] - for k, v in loss_dict.items(): - if torch.is_tensor(v): - loss_dict[k] = v.item() - records.append(loss_dict) - df = pd.DataFrame(records) - df.to_csv( - f"{self.output_dir}/{self.config.experiment.name}_{rank}_posebusters.csv" - ) - torch.cuda.empty_cache() - - true_crds = inputs[5] - seq, _, idx_pdb, bond_feats, _, _ = get_loss_calc_items(inputs, device=gpu) - pred_crds, alphas, pred_lddts = outputs[5], outputs[6], outputs[8] - _, pred_allatom = self.trainer.xyz_converter.compute_all_atom( - seq[:, 0], pred_crds[-1], alphas[-1] - ) - - writepdb( - f"{self.output_dir}/{item['CHAINID'][0]}_nat.pdb", - true_crds[:, 0], - seq[:, 0].long(), - bond_feats=bond_feats, - ) - writepdb( - f"{self.output_dir}/{item['CHAINID'][0]}_pred.pdb", - pred_allatom[0], - seq[:, 0].long(), - bond_feats=bond_feats, - ) - - -@hydra.main(version_base=None, config_path="config/inference") -def main(config): - benchmarker = PoseBustersBenchmark(config=config) - benchmarker.launch_distributed_eval() - - -if __name__ == "__main__": - main() diff --git a/scripts/build_base_apptainer.sh b/scripts/build_base_apptainer.sh new file mode 100644 index 0000000..89f9d63 --- /dev/null +++ b/scripts/build_base_apptainer.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# This script builds a datahub apptainer container. +set -e # Exit on error + +echo "Running from $PWD" + +# Check if apptainer/singularity is available +APPTAINER_BINARY=$(command -v apptainer || command -v singularity) +if [ -z "$APPTAINER_BINARY" ]; then + echo "Error: Neither apptainer nor singularity found in PATH" + exit 1 +fi +echo "Using apptainer at: $APPTAINER_BINARY" + +# Generate the image name with today's date +DATE=$(date +%Y-%m-%d) +IMAGE_NAME="modelhub_${DATE}.sif" +echo "Building apptainer image: $IMAGE_NAME" + +# Build Phase +echo +echo "=== Starting Build Phase ===" +echo "Running: $APPTAINER_BINARY build --notest '$IMAGE_NAME' base_apptainer.spec" +echo "----------------------------------------" +$APPTAINER_BINARY build \ + --nv \ + --notest \ + "$IMAGE_NAME" base_apptainer.spec +echo "----------------------------------------" + +echo +echo "=== Build Complete ===" +echo "Container is available at: $PWD/$IMAGE_NAME" \ No newline at end of file diff --git a/scripts/freeze_apptainer.sh b/scripts/freeze_apptainer.sh new file mode 100755 index 0000000..0227445 --- /dev/null +++ b/scripts/freeze_apptainer.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# This script freezes CIFUtils, Datahub, and Modelhub versions within an existing apptainer. +set -e # Exit on error + +echo "Running from $PWD" + +SCRIPT_PATH=$(realpath $0) +SCRIPT_DIR=$(dirname $SCRIPT_PATH) + +# Check if apptainer/singularity is available +APPTAINER_BINARY=$(command -v apptainer || command -v singularity) +if [ -z "$APPTAINER_BINARY" ]; then + echo "Error: Neither apptainer nor singularity found in PATH" + exit 1 +fi +echo "Using apptainer at: $APPTAINER_BINARY" + +# This is the default apptainer that you can build from 'make apptainer' +echo "... looking for a local apptainer image at '$SCRIPT_DIR/modelhub.sif'" +SIF_PATH="$SCRIPT_DIR/modelhub.sif" +SIF_PATH=$(readlink -f "$SCRIPT_DIR/modelhub.sif" ) +echo "Base SIF path to build from: $SIF_PATH" + +# Generate the image name with today's date +DATE=$(date +%Y-%m-%d) +IMAGE_NAME="frozen_modelhub_${DATE}.sif" +echo "Building apptainer from image with frozen dependencies: $IMAGE_NAME" + +# Check if INSTALL_PROJECT is set to true and set the image name accordingly +if ${INSTALL_PROJECT}; then + echo "Modelhub WILL be installed in the apptainer! Ensure that this is intentional." + IMAGE_NAME="frozen_modelhub_datahub_cifutils_${DATE}.sif" +else + IMAGE_NAME="frozen_datahub_cifutils_${DATE}.sif" +fi + +# Build Phase +echo +echo "=== Starting Build Phase ===" +echo "Running: $APPTAINER_BINARY build --notest '$IMAGE_NAME' freeze_apptainer.spec" +echo "----------------------------------------" +INSTALL_PROJECT=$INSTALL_PROJECT $APPTAINER_BINARY build \ + --nv \ + --notest \ + "$IMAGE_NAME" freeze_apptainer.spec +echo "----------------------------------------" + +echo +echo "=== Build Complete ===" +echo "Container is available at: $PWD/$IMAGE_NAME" \ No newline at end of file diff --git a/scripts/shebang/README.md b/scripts/shebang/README.md new file mode 100644 index 0000000..ad4182c --- /dev/null +++ b/scripts/shebang/README.md @@ -0,0 +1,7 @@ +This directory contains scripts that are not to be run directly by the user. +They are [SHEBANG scripts](https://en.wikipedia.org/wiki/Shebang_(Unix)) that are used to run the appropriate apptainer container. + +For example, the script `modelhub_exec.sh` is used to run the modelhub apptainer container with the latest apptainer image +stored locally or at the IPD. + +The shebang lines (`#!/bin/bash` ...) at the top of entry point scripts like `train.py` redirect the system to here to find the correct apptainer container. \ No newline at end of file diff --git a/scripts/shebang/modelhub.sif b/scripts/shebang/modelhub.sif new file mode 120000 index 0000000..91c8858 --- /dev/null +++ b/scripts/shebang/modelhub.sif @@ -0,0 +1 @@ +/projects/ml/modelhub/apptainer/modelhub_2025-03-19.sif \ No newline at end of file diff --git a/scripts/shebang/modelhub_exec.sh b/scripts/shebang/modelhub_exec.sh new file mode 100755 index 0000000..8f47a7f --- /dev/null +++ b/scripts/shebang/modelhub_exec.sh @@ -0,0 +1,151 @@ +#!/usr/bin/bash + +################### +# You can add the path to this file as the shebang line in your python script. +# Then by default, the python script will be executed with the python interpreter +# in the SIF_PATH container. Here, we launch the container with nvidia gpu and slurm support. +# +# Example shebang: #!/usr/bin/env -S /bin/sh -c '"$(dirname "$0")/scripts/shebang/modelhub_exec.sh" "$0" "$@"' +################### + +# Let the user know this script is setting things up behind the scene +SCRIPT_PATH=$(realpath $0) +SCRIPT_DIR=$(dirname $SCRIPT_PATH) +echo '################## Start shebang info ##################' +echo "The file $SCRIPT_PATH is being run as a shebang executable. + It will... + + 1. Add the 'modelhub' and 'src/modelhub' repo directories to your PYTHONPATH. + 2. Run your python script from the right container, which contains all dependencies. + 3. Launch the container with slurm and nvidia gpu support." + +# Extract the path to the Python script from the arguments +PYTHON_SCRIPT=$(realpath "$1") +shift + +# Find repository root by looking for .project-root file +find_repo_root() { + local current_dir="$1" + while [ "$current_dir" != "/" ]; do + if [ -f "$current_dir/.project-root" ]; then + echo "$current_dir" + return 0 + fi + current_dir="$(dirname "$current_dir")" + done + return 1 +} + +echo +echo "Searching for repository root directory..." +REPO_ROOT=$(find_repo_root "$(dirname "$PYTHON_SCRIPT")") +if [ -z "$REPO_ROOT" ]; then + echo "Error: Could not find .project-root file in any parent directory" + exit 1 +else + echo "... found repository root at '$REPO_ROOT'" +fi + +# Function to add a directory to PYTHONPATH if it's not already included +add_to_pythonpath() { + local dir_path="$1" + if [[ ":$PYTHONPATH:" != *":$dir_path:"* ]]; then + export PYTHONPATH="$dir_path:$PYTHONPATH" + echo "Added '$dir_path' to PYTHONPATH." + else + echo "'$dir_path' is already in PYTHONPATH." + fi +} + +# Add the src directory to PYTHONPATH if not already present +echo +echo "Checking and adding 'src' directory to PYTHONPATH..." +SRC_PATH="$REPO_ROOT/src" +add_to_pythonpath "$SRC_PATH" + +# Add modelhub to PYTHONPATH if not already present +echo +echo "Checking and adding 'modelhub' directory to PYTHONPATH..." +MODELHUB_PATH="$SRC_PATH/modelhub" +add_to_pythonpath "$MODELHUB_PATH" + +# Load the .env file environment variables from the repo root +echo +echo "Attempting to load environment variables from .env file:" +if [ -f "$REPO_ROOT/.env" ]; then + echo "... loading environment variables from '$REPO_ROOT/.env'" + export $(cat "$REPO_ROOT/.env" | grep -v '#' | xargs) +else + echo " Warning: No .env file found at repository root ($REPO_ROOT)" +fi + +# check if we are at the IPD +IPD_FILE="/software/containers/versions/rf_diffusion_aa/ipd.txt" + +SIF_PATH="" + +echo +echo "Fetching the appropriate apptainer image..." + +if [ -z "$APPTAINER_NAME" ]; then + if [ -n "$PROJECT_PATH" ]; then + # Attempt to find any .sif file in the PROJECT_PATH/scripts/shebang directory + SIF_DIR="$PROJECT_PATH/scripts/shebang" + SIF_FILE=$(find "$SIF_DIR" -maxdepth 1 -name "*.sif" -print -quit) + + if [ -n "$SIF_FILE" ]; then + SIF_PATH="$SIF_FILE" + fi + fi + + # If SIF_PATH is still empty, use the default SIF + if [ -z "$SIF_PATH" ]; then + SIF_NAME="modelhub.sif" + SIF_PATH="$SCRIPT_DIR/$SIF_NAME" + fi + + echo "... looking for a local apptainer image at '$SIF_PATH'" + # Check if the SIF file exists + if [ ! -f "$SIF_PATH" ]; then + echo "... apptainer not found. To run with your own apptainer image, you can build it with 'make apptainer' and place it here: '$SIF_PATH'" + echo "Attempting to run $PYTHON_SCRIPT with $(which python)" + fi +else + echo "Already running inside container $APPTAINER_NAME. Executing $PYTHON_SCRIPT with $(which python) in the existing container." +fi + +# Function to print debug=mode warning +print_debug_warning() { + echo + echo "###############################################################################" + echo "# #" + echo "# ⚠️ WARNING ⚠️ #" + echo "# RUNNING WITH DEBUGPY ON PORT $DEBUG_PORT #" + echo "# DON'T FORGET TO ATTACH A DEBUGGER #" + echo "# #" + echo "###############################################################################" + echo +} + +if [ -n "$DEBUG_PORT" ]; then + print_debug_warning + python_cmd="python -m debugpy --listen $DEBUG_PORT --wait-for-client" +else + python_cmd="python" + echo +fi + +if [ ! -z $SIF_PATH ]; then + echo "Running $PYTHON_SCRIPT with apptainer: $SIF_PATH." + echo '################## End shebang info ####################' + echo + /usr/bin/apptainer exec --nv --slurm \ + --bind "$REPO_ROOT:$REPO_ROOT" \ + --env PYTHONPATH="\$PYTHONPATH:$PYTHONPATH" \ + $SIF_PATH $python_cmd "$PYTHON_SCRIPT" "$@" +else + echo "Running $PYTHON_SCRIPT with python: $(which python)" + echo '################## End shebang info ####################' + echo + $python_cmd "$PYTHON_SCRIPT" "$@" +fi diff --git a/scripts/slurm/launch.sh b/scripts/slurm/launch.sh new file mode 100644 index 0000000..bcf94d3 --- /dev/null +++ b/scripts/slurm/launch.sh @@ -0,0 +1,78 @@ +#!/bin/bash +#SBATCH -p gpu-train +#SBATCH --nodes 2 +#SBATCH --gres=gpu:l40:8 +#SBATCH --ntasks-per-node 8 +#SBATCH --mem=512g +#SBATCH -t 7-00:00:00 +#SBATCH -J af3-old-msas-pdb-only-experimental +#SBATCH -o slurm_logs/%x_%j.out +#SBATCH -e slurm_logs/%x_%j.err +#SBATCH --no-kill=off + +### Excluded Nodes: + +### To call this script run: `sbatch launch.sh` from this directory +### For reference, see the Lightning Fabric + SLURM guide: https://lightning.ai/docs/fabric/stable/guide/multi_node/slurm.html + +# (In case we're still running in debug mode) +unset DEBUG_PORT +unset PROJECT_PATH + +# (SLURM setup, ensuring we have a unique port per job, and setting the master address to Rank 0) +export MASTER_PORT=$((1024 + RANDOM % 64512)) +export MASTER_ADDR=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1) + +### Set custom paths +# WARNING: You will need to update these paths to match your local setup +# ... cifutils and datahub +export PYTHONPATH="/home/ncorley/projects/datahub/src:/home/ncorley/projects/cifutils/src:/home/ncorley/projects/modelhub/src" +# ... project path (if not using root src/modelhub) +export PROJECT_PATH="/home/ncorley/projects/modelhub/projects/rfscore" +# ... cache directory for Triton kernels (e.g., DeepSpeed4Science fused kernels) +export TRITON_CACHE_DIR="/home/ncorley/.triton" # Change this to a directory with write permissions + +### Environment flags + +# Debugging flags (optional) +export NCCL_DEBUG=INFO # NCCL internal debugging +export PYTHONFAULTHANDLER=1 # Catches Python core dumps (e.g., segmentation faults) + +# Expand CUDA memory +export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True + +# Turn off NVLink (L40 do not have NVLink) +export NCCL_P2P_DISABLE=1 + +# OPENMP and OPENBLAS optimizations +# https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html#utilize-openmp +# NOTE: Must be optimized per-system; see: https://github.com/pytorch/pytorch/blob/65e6194aeb3269a182cfe2c05c122159da12770f/torch/distributed/run.py#L596-L608 +export OMP_NUM_THREADS=4 +export OPENBLAS_NUM_THREADS=4 + +####################################################################################################### +### WARNING: The command below is just an example. It will fail if you don't update the experiment ### +### config in the command below. Please adapt according to your target experiment ### +####################################################################################################### + +### Set the effective batch size +EFFECTIVE_BATCH_SIZE=16 + +### Compose the training script +DEVICES_PER_NODE=${SLURM_NTASKS_PER_NODE:-8} # Default to 8 if not set +echo "Running on $SLURM_NNODES nodes with $DEVICES_PER_NODE tasks per node" + +### Calculate grad_accum_steps +GRAD_ACCUM_STEPS=$((EFFECTIVE_BATCH_SIZE / (DEVICES_PER_NODE * SLURM_NNODES))) +echo "Grad Accumulation Steps: $GRAD_ACCUM_STEPS" + +command="srun --kill-on-bad-exit ../../src/modelhub/train.py \ + experiment=$SLURM_JOB_NAME \ + ++trainer.devices_per_node=$DEVICES_PER_NODE \ + ++trainer.num_nodes=$SLURM_NNODES \ + ++trainer.grad_accum_steps=$GRAD_ACCUM_STEPS" + +echo -e "command\t$command" + +# Let 'er rip +$command diff --git a/rf2aa/SE3Transformer/.dockerignore b/src/modelhub/SE3Transformer/.dockerignore similarity index 100% rename from rf2aa/SE3Transformer/.dockerignore rename to src/modelhub/SE3Transformer/.dockerignore diff --git a/rf2aa/SE3Transformer/.gitignore b/src/modelhub/SE3Transformer/.gitignore similarity index 100% rename from rf2aa/SE3Transformer/.gitignore rename to src/modelhub/SE3Transformer/.gitignore diff --git a/rf2aa/SE3Transformer/Dockerfile b/src/modelhub/SE3Transformer/Dockerfile similarity index 100% rename from rf2aa/SE3Transformer/Dockerfile rename to src/modelhub/SE3Transformer/Dockerfile diff --git a/rf2aa/SE3Transformer/LICENSE b/src/modelhub/SE3Transformer/LICENSE similarity index 100% rename from rf2aa/SE3Transformer/LICENSE rename to src/modelhub/SE3Transformer/LICENSE diff --git a/rf2aa/SE3Transformer/NOTICE b/src/modelhub/SE3Transformer/NOTICE similarity index 100% rename from rf2aa/SE3Transformer/NOTICE rename to src/modelhub/SE3Transformer/NOTICE diff --git a/rf2aa/SE3Transformer/README.md b/src/modelhub/SE3Transformer/README.md similarity index 100% rename from rf2aa/SE3Transformer/README.md rename to src/modelhub/SE3Transformer/README.md diff --git a/rf2aa/SE3Transformer/requirements.txt b/src/modelhub/SE3Transformer/requirements.txt similarity index 100% rename from rf2aa/SE3Transformer/requirements.txt rename to src/modelhub/SE3Transformer/requirements.txt diff --git a/rf2aa/SE3Transformer/scripts/benchmark_inference.sh b/src/modelhub/SE3Transformer/scripts/benchmark_inference.sh similarity index 100% rename from rf2aa/SE3Transformer/scripts/benchmark_inference.sh rename to src/modelhub/SE3Transformer/scripts/benchmark_inference.sh diff --git a/rf2aa/SE3Transformer/scripts/benchmark_train.sh b/src/modelhub/SE3Transformer/scripts/benchmark_train.sh similarity index 100% rename from rf2aa/SE3Transformer/scripts/benchmark_train.sh rename to src/modelhub/SE3Transformer/scripts/benchmark_train.sh diff --git a/rf2aa/SE3Transformer/scripts/benchmark_train_multi_gpu.sh b/src/modelhub/SE3Transformer/scripts/benchmark_train_multi_gpu.sh similarity index 100% rename from rf2aa/SE3Transformer/scripts/benchmark_train_multi_gpu.sh rename to src/modelhub/SE3Transformer/scripts/benchmark_train_multi_gpu.sh diff --git a/rf2aa/SE3Transformer/scripts/predict.sh b/src/modelhub/SE3Transformer/scripts/predict.sh similarity index 100% rename from rf2aa/SE3Transformer/scripts/predict.sh rename to src/modelhub/SE3Transformer/scripts/predict.sh diff --git a/rf2aa/SE3Transformer/scripts/train.sh b/src/modelhub/SE3Transformer/scripts/train.sh similarity index 100% rename from rf2aa/SE3Transformer/scripts/train.sh rename to src/modelhub/SE3Transformer/scripts/train.sh diff --git a/rf2aa/SE3Transformer/scripts/train_multi_gpu.sh b/src/modelhub/SE3Transformer/scripts/train_multi_gpu.sh similarity index 100% rename from rf2aa/SE3Transformer/scripts/train_multi_gpu.sh rename to src/modelhub/SE3Transformer/scripts/train_multi_gpu.sh diff --git a/rf2aa/SE3Transformer/se3_transformer/__init__.py b/src/modelhub/SE3Transformer/se3_transformer/__init__.py similarity index 100% rename from rf2aa/SE3Transformer/se3_transformer/__init__.py rename to src/modelhub/SE3Transformer/se3_transformer/__init__.py diff --git a/rf2aa/SE3Transformer/se3_transformer/data_loading/__init__.py b/src/modelhub/SE3Transformer/se3_transformer/data_loading/__init__.py similarity index 100% rename from rf2aa/SE3Transformer/se3_transformer/data_loading/__init__.py rename to src/modelhub/SE3Transformer/se3_transformer/data_loading/__init__.py diff --git a/rf2aa/SE3Transformer/se3_transformer/data_loading/data_module.py b/src/modelhub/SE3Transformer/se3_transformer/data_loading/data_module.py similarity index 100% rename from rf2aa/SE3Transformer/se3_transformer/data_loading/data_module.py rename to src/modelhub/SE3Transformer/se3_transformer/data_loading/data_module.py diff --git a/rf2aa/SE3Transformer/se3_transformer/data_loading/qm9.py b/src/modelhub/SE3Transformer/se3_transformer/data_loading/qm9.py similarity index 100% rename from rf2aa/SE3Transformer/se3_transformer/data_loading/qm9.py rename to src/modelhub/SE3Transformer/se3_transformer/data_loading/qm9.py diff --git a/rf2aa/SE3Transformer/se3_transformer/model/__init__.py b/src/modelhub/SE3Transformer/se3_transformer/model/__init__.py similarity index 100% rename from rf2aa/SE3Transformer/se3_transformer/model/__init__.py rename to src/modelhub/SE3Transformer/se3_transformer/model/__init__.py diff --git a/rf2aa/SE3Transformer/se3_transformer/model/basis.py b/src/modelhub/SE3Transformer/se3_transformer/model/basis.py similarity index 98% rename from rf2aa/SE3Transformer/se3_transformer/model/basis.py rename to src/modelhub/SE3Transformer/se3_transformer/model/basis.py index e43217d..a86c0ae 100644 --- a/rf2aa/SE3Transformer/se3_transformer/model/basis.py +++ b/src/modelhub/SE3Transformer/se3_transformer/model/basis.py @@ -25,13 +25,12 @@ from functools import lru_cache from typing import Dict, List -import e3nn.o3 as o3 import torch import torch.nn.functional as F from torch import Tensor from torch.cuda.nvtx import range as nvtx_range -from rf2aa.SE3Transformer.se3_transformer.runtime.utils import degree_to_dim +from modelhub.SE3Transformer.se3_transformer.runtime.utils import degree_to_dim torch._C._jit_set_profiling_executor(False) torch._C._jit_set_profiling_mode(False) @@ -39,6 +38,8 @@ torch._C._jit_set_profiling_mode(False) @lru_cache(maxsize=None) def get_clebsch_gordon(J: int, d_in: int, d_out: int, device) -> Tensor: + import e3nn.o3 as o3 + """Get the (cached) Q^{d_out,d_in}_J matrices from equation (8)""" return o3.wigner_3j(J, d_in, d_out, dtype=torch.float64, device=device).permute( 2, 1, 0 @@ -58,6 +59,8 @@ def get_all_clebsch_gordon(max_degree: int, device) -> List[List[Tensor]]: def get_spherical_harmonics(relative_pos: Tensor, max_degree: int) -> List[Tensor]: + import e3nn.o3 as o3 + all_degrees = list(range(2 * max_degree + 1)) sh = o3.spherical_harmonics(all_degrees, relative_pos, normalize=True) return torch.split(sh, [degree_to_dim(d) for d in all_degrees], dim=1) diff --git a/rf2aa/SE3Transformer/se3_transformer/model/fiber.py b/src/modelhub/SE3Transformer/se3_transformer/model/fiber.py similarity index 98% rename from rf2aa/SE3Transformer/se3_transformer/model/fiber.py rename to src/modelhub/SE3Transformer/se3_transformer/model/fiber.py index 06177db..d6f1a52 100644 --- a/rf2aa/SE3Transformer/se3_transformer/model/fiber.py +++ b/src/modelhub/SE3Transformer/se3_transformer/model/fiber.py @@ -29,7 +29,7 @@ from typing import Dict import torch from torch import Tensor -from rf2aa.SE3Transformer.se3_transformer.runtime.utils import degree_to_dim +from modelhub.SE3Transformer.se3_transformer.runtime.utils import degree_to_dim FiberEl = namedtuple("FiberEl", ["degree", "channels"]) diff --git a/rf2aa/SE3Transformer/se3_transformer/model/layers/__init__.py b/src/modelhub/SE3Transformer/se3_transformer/model/layers/__init__.py similarity index 100% rename from rf2aa/SE3Transformer/se3_transformer/model/layers/__init__.py rename to src/modelhub/SE3Transformer/se3_transformer/model/layers/__init__.py diff --git a/rf2aa/SE3Transformer/se3_transformer/model/layers/attention.py b/src/modelhub/SE3Transformer/se3_transformer/model/layers/attention.py similarity index 95% rename from rf2aa/SE3Transformer/se3_transformer/model/layers/attention.py rename to src/modelhub/SE3Transformer/se3_transformer/model/layers/attention.py index 92d0843..9f8f0f2 100644 --- a/rf2aa/SE3Transformer/se3_transformer/model/layers/attention.py +++ b/src/modelhub/SE3Transformer/se3_transformer/model/layers/attention.py @@ -23,22 +23,19 @@ from typing import Dict, Optional, Union -import dgl import numpy as np import torch import torch.nn as nn -from dgl import DGLGraph -from dgl.ops import edge_softmax from torch import Tensor from torch.cuda.nvtx import range as nvtx_range -from rf2aa.SE3Transformer.se3_transformer.model.fiber import Fiber -from rf2aa.SE3Transformer.se3_transformer.model.layers.convolution import ( +from modelhub.SE3Transformer.se3_transformer.model.fiber import Fiber +from modelhub.SE3Transformer.se3_transformer.model.layers.convolution import ( ConvSE3, ConvSE3FuseLevel, ) -from rf2aa.SE3Transformer.se3_transformer.model.layers.linear import LinearSE3 -from rf2aa.SE3Transformer.se3_transformer.runtime.utils import ( +from modelhub.SE3Transformer.se3_transformer.model.layers.linear import LinearSE3 +from modelhub.SE3Transformer.se3_transformer.runtime.utils import ( aggregate_residual, degree_to_dim, unfuse_features, @@ -64,8 +61,11 @@ class AttentionSE3(nn.Module): value: Union[Tensor, Dict[str, Tensor]], # edge features (may be fused) key: Union[Tensor, Dict[str, Tensor]], # edge features (may be fused) query: Dict[str, Tensor], # node features - graph: DGLGraph, + graph, ): + import dgl + from dgl.ops import edge_softmax + with nvtx_range("AttentionSE3"): with nvtx_range("reshape keys and queries"): if isinstance(key, Tensor): @@ -182,7 +182,7 @@ class AttentionBlockSE3(nn.Module): self, node_features: Dict[str, Tensor], edge_features: Dict[str, Tensor], - graph: DGLGraph, + graph, basis: Dict[str, Tensor], ): with nvtx_range("AttentionBlockSE3"): diff --git a/rf2aa/SE3Transformer/se3_transformer/model/layers/convolution.py b/src/modelhub/SE3Transformer/se3_transformer/model/layers/convolution.py similarity index 99% rename from rf2aa/SE3Transformer/se3_transformer/model/layers/convolution.py rename to src/modelhub/SE3Transformer/se3_transformer/model/layers/convolution.py index a358b96..43cddeb 100644 --- a/rf2aa/SE3Transformer/se3_transformer/model/layers/convolution.py +++ b/src/modelhub/SE3Transformer/se3_transformer/model/layers/convolution.py @@ -25,17 +25,15 @@ from enum import Enum from itertools import product from typing import Dict -import dgl import numpy as np import torch import torch.nn as nn import torch.utils.checkpoint -from dgl import DGLGraph from torch import Tensor from torch.cuda.nvtx import range as nvtx_range -from rf2aa.SE3Transformer.se3_transformer.model.fiber import Fiber -from rf2aa.SE3Transformer.se3_transformer.runtime.utils import ( +from modelhub.SE3Transformer.se3_transformer.model.fiber import Fiber +from modelhub.SE3Transformer.se3_transformer.runtime.utils import ( degree_to_dim, unfuse_features, ) @@ -372,9 +370,11 @@ class ConvSE3(nn.Module): self, node_feats: Dict[str, Tensor], edge_feats: Dict[str, Tensor], - graph: DGLGraph, + graph, basis: Dict[str, Tensor], ): + import dgl + with nvtx_range("ConvSE3"): invariant_edge_feats = edge_feats["0"].squeeze(-1) src, dst = graph.edges() diff --git a/rf2aa/SE3Transformer/se3_transformer/model/layers/linear.py b/src/modelhub/SE3Transformer/se3_transformer/model/layers/linear.py similarity index 97% rename from rf2aa/SE3Transformer/se3_transformer/model/layers/linear.py rename to src/modelhub/SE3Transformer/se3_transformer/model/layers/linear.py index d777237..06463ed 100644 --- a/rf2aa/SE3Transformer/se3_transformer/model/layers/linear.py +++ b/src/modelhub/SE3Transformer/se3_transformer/model/layers/linear.py @@ -29,7 +29,7 @@ import torch import torch.nn as nn from torch import Tensor -from rf2aa.SE3Transformer.se3_transformer.model.fiber import Fiber +from modelhub.SE3Transformer.se3_transformer.model.fiber import Fiber class LinearSE3(nn.Module): diff --git a/rf2aa/SE3Transformer/se3_transformer/model/layers/norm.py b/src/modelhub/SE3Transformer/se3_transformer/model/layers/norm.py similarity index 98% rename from rf2aa/SE3Transformer/se3_transformer/model/layers/norm.py rename to src/modelhub/SE3Transformer/se3_transformer/model/layers/norm.py index 4a1a48d..783359d 100644 --- a/rf2aa/SE3Transformer/se3_transformer/model/layers/norm.py +++ b/src/modelhub/SE3Transformer/se3_transformer/model/layers/norm.py @@ -29,7 +29,7 @@ import torch.nn as nn from torch import Tensor from torch.cuda.nvtx import range as nvtx_range -from rf2aa.SE3Transformer.se3_transformer.model.fiber import Fiber +from modelhub.SE3Transformer.se3_transformer.model.fiber import Fiber @torch.jit.script diff --git a/rf2aa/SE3Transformer/se3_transformer/model/layers/pooling.py b/src/modelhub/SE3Transformer/se3_transformer/model/layers/pooling.py similarity index 93% rename from rf2aa/SE3Transformer/se3_transformer/model/layers/pooling.py rename to src/modelhub/SE3Transformer/se3_transformer/model/layers/pooling.py index 8a5b2a3..1bb7a0f 100644 --- a/rf2aa/SE3Transformer/se3_transformer/model/layers/pooling.py +++ b/src/modelhub/SE3Transformer/se3_transformer/model/layers/pooling.py @@ -24,8 +24,6 @@ from typing import Dict, Literal import torch.nn as nn -from dgl import DGLGraph -from dgl.nn.pytorch import AvgPooling, MaxPooling from torch import Tensor @@ -42,6 +40,8 @@ class GPooling(nn.Module): :param feat_type: Feature type to pool :param pool: Type of pooling: max or avg """ + from dgl.nn.pytorch import AvgPooling, MaxPooling + super().__init__() assert pool in ["max", "avg"], f"Unknown pooling: {pool}" assert feat_type == 0 or pool == "avg", ( @@ -50,6 +50,6 @@ class GPooling(nn.Module): self.feat_type = feat_type self.pool = MaxPooling() if pool == "max" else AvgPooling() - def forward(self, features: Dict[str, Tensor], graph: DGLGraph, **kwargs) -> Tensor: + def forward(self, features: Dict[str, Tensor], graph, **kwargs) -> Tensor: pooled = self.pool(graph, features[str(self.feat_type)]) return pooled.squeeze(dim=-1) diff --git a/rf2aa/SE3Transformer/se3_transformer/model/transformer.py b/src/modelhub/SE3Transformer/se3_transformer/model/transformer.py similarity index 95% rename from rf2aa/SE3Transformer/se3_transformer/model/transformer.py rename to src/modelhub/SE3Transformer/se3_transformer/model/transformer.py index e3390dc..286c020 100644 --- a/rf2aa/SE3Transformer/se3_transformer/model/transformer.py +++ b/src/modelhub/SE3Transformer/se3_transformer/model/transformer.py @@ -25,25 +25,24 @@ from typing import Dict, Literal, Optional import torch import torch.nn as nn -from dgl import DGLGraph from torch import Tensor -from rf2aa.SE3Transformer.se3_transformer.model.basis import ( +from modelhub.SE3Transformer.se3_transformer.model.basis import ( get_basis, update_basis_with_fused, ) -from rf2aa.SE3Transformer.se3_transformer.model.fiber import Fiber -from rf2aa.SE3Transformer.se3_transformer.model.layers.attention import ( +from modelhub.SE3Transformer.se3_transformer.model.fiber import Fiber +from modelhub.SE3Transformer.se3_transformer.model.layers.attention import ( AttentionBlockSE3, ) -from rf2aa.SE3Transformer.se3_transformer.model.layers.convolution import ( +from modelhub.SE3Transformer.se3_transformer.model.layers.convolution import ( ConvSE3, ConvSE3FuseLevel, ) -from rf2aa.SE3Transformer.se3_transformer.model.layers.linear import LinearSE3 -from rf2aa.SE3Transformer.se3_transformer.model.layers.norm import NormSE3 -from rf2aa.SE3Transformer.se3_transformer.model.layers.pooling import GPooling -from rf2aa.SE3Transformer.se3_transformer.runtime.utils import str2bool +from modelhub.SE3Transformer.se3_transformer.model.layers.linear import LinearSE3 +from modelhub.SE3Transformer.se3_transformer.model.layers.norm import NormSE3 +from modelhub.SE3Transformer.se3_transformer.model.layers.pooling import GPooling +from modelhub.SE3Transformer.se3_transformer.runtime.utils import str2bool class Sequential(nn.Sequential): @@ -187,7 +186,7 @@ class SE3Transformer(nn.Module): def forward( self, - graph: DGLGraph, + graph, node_feats: Dict[str, Tensor], edge_feats: Optional[Dict[str, Tensor]] = None, basis: Optional[Dict[str, Tensor]] = None, diff --git a/rf2aa/SE3Transformer/se3_transformer/runtime/__init__.py b/src/modelhub/SE3Transformer/se3_transformer/runtime/__init__.py similarity index 100% rename from rf2aa/SE3Transformer/se3_transformer/runtime/__init__.py rename to src/modelhub/SE3Transformer/se3_transformer/runtime/__init__.py diff --git a/rf2aa/SE3Transformer/se3_transformer/runtime/arguments.py b/src/modelhub/SE3Transformer/se3_transformer/runtime/arguments.py similarity index 100% rename from rf2aa/SE3Transformer/se3_transformer/runtime/arguments.py rename to src/modelhub/SE3Transformer/se3_transformer/runtime/arguments.py diff --git a/rf2aa/SE3Transformer/se3_transformer/runtime/callbacks.py b/src/modelhub/SE3Transformer/se3_transformer/runtime/callbacks.py similarity index 100% rename from rf2aa/SE3Transformer/se3_transformer/runtime/callbacks.py rename to src/modelhub/SE3Transformer/se3_transformer/runtime/callbacks.py diff --git a/rf2aa/SE3Transformer/se3_transformer/runtime/gpu_affinity.py b/src/modelhub/SE3Transformer/se3_transformer/runtime/gpu_affinity.py similarity index 100% rename from rf2aa/SE3Transformer/se3_transformer/runtime/gpu_affinity.py rename to src/modelhub/SE3Transformer/se3_transformer/runtime/gpu_affinity.py diff --git a/rf2aa/SE3Transformer/se3_transformer/runtime/inference.py b/src/modelhub/SE3Transformer/se3_transformer/runtime/inference.py similarity index 100% rename from rf2aa/SE3Transformer/se3_transformer/runtime/inference.py rename to src/modelhub/SE3Transformer/se3_transformer/runtime/inference.py diff --git a/rf2aa/SE3Transformer/se3_transformer/runtime/loggers.py b/src/modelhub/SE3Transformer/se3_transformer/runtime/loggers.py similarity index 100% rename from rf2aa/SE3Transformer/se3_transformer/runtime/loggers.py rename to src/modelhub/SE3Transformer/se3_transformer/runtime/loggers.py diff --git a/rf2aa/SE3Transformer/se3_transformer/runtime/metrics.py b/src/modelhub/SE3Transformer/se3_transformer/runtime/metrics.py similarity index 100% rename from rf2aa/SE3Transformer/se3_transformer/runtime/metrics.py rename to src/modelhub/SE3Transformer/se3_transformer/runtime/metrics.py diff --git a/rf2aa/SE3Transformer/se3_transformer/runtime/training.py b/src/modelhub/SE3Transformer/se3_transformer/runtime/training.py similarity index 100% rename from rf2aa/SE3Transformer/se3_transformer/runtime/training.py rename to src/modelhub/SE3Transformer/se3_transformer/runtime/training.py diff --git a/rf2aa/SE3Transformer/se3_transformer/runtime/utils.py b/src/modelhub/SE3Transformer/se3_transformer/runtime/utils.py similarity index 100% rename from rf2aa/SE3Transformer/se3_transformer/runtime/utils.py rename to src/modelhub/SE3Transformer/se3_transformer/runtime/utils.py diff --git a/rf2aa/SE3Transformer/setup.py b/src/modelhub/SE3Transformer/setup.py similarity index 100% rename from rf2aa/SE3Transformer/setup.py rename to src/modelhub/SE3Transformer/setup.py diff --git a/rf2aa/SE3Transformer/tests/__init__.py b/src/modelhub/SE3Transformer/tests/__init__.py similarity index 100% rename from rf2aa/SE3Transformer/tests/__init__.py rename to src/modelhub/SE3Transformer/tests/__init__.py diff --git a/rf2aa/SE3Transformer/tests/test_equivariance.py b/src/modelhub/SE3Transformer/tests/test_equivariance.py similarity index 100% rename from rf2aa/SE3Transformer/tests/test_equivariance.py rename to src/modelhub/SE3Transformer/tests/test_equivariance.py diff --git a/rf2aa/SE3Transformer/tests/utils.py b/src/modelhub/SE3Transformer/tests/utils.py similarity index 100% rename from rf2aa/SE3Transformer/tests/utils.py rename to src/modelhub/SE3Transformer/tests/utils.py diff --git a/src/modelhub/__init__.py b/src/modelhub/__init__.py new file mode 100644 index 0000000..bd38f7a --- /dev/null +++ b/src/modelhub/__init__.py @@ -0,0 +1,26 @@ +import logging + +from beartype.claw import beartype_this_package +from environs import Env +from jaxtyping import install_import_hook + +# Load environment variables from `.env` file +_env = Env() +_env.read_env() +should_typecheck = _env.bool("TYPE_CHECK", default=False) +should_debug = _env.bool("DEBUG", default=False) +should_check_nans = _env.bool("NAN_CHECK", default=True) + +# Set up logger +logger = logging.getLogger("modelhub") +# ... set logging level based on `DEBUG` environment variable +logger.setLevel(logging.DEBUG if should_debug else logging.INFO) +# ... log the current mode +logger.debug("Debug mode: %s", should_debug) +logger.debug("Type checking mode: %s", should_typecheck) +logger.debug("NAN checking mode: %s", should_check_nans) + +# Enable runtime type checking if `TYPE_CHECK` environment variable is set to `True` +if should_typecheck: + beartype_this_package() + install_import_hook("modelhub", "beartype.beartype") diff --git a/rf2aa/alignment.py b/src/modelhub/alignment.py similarity index 90% rename from rf2aa/alignment.py rename to src/modelhub/alignment.py index 23db744..3a0baad 100644 --- a/rf2aa/alignment.py +++ b/src/modelhub/alignment.py @@ -20,6 +20,12 @@ def weighted_rigid_align( """ assert X_L.shape == X_gt_L.shape assert X_L.shape[:-1] == w_L.shape + + # Assert `X_exists_L` is a boolean mask + assert ( + X_exists_L.dtype == torch.bool + ), "X_exists_L should be a boolean mask! Otherwise, the alignment will be incorrect (silent failure)!" + X_resolved = X_L[:, X_exists_L] X_gt_resolved = X_gt_L[:, X_exists_L] w_resolved = w_L[:, X_exists_L] diff --git a/rf2aa/analysis_util.py b/src/modelhub/analysis_util.py similarity index 100% rename from rf2aa/analysis_util.py rename to src/modelhub/analysis_util.py diff --git a/rf2aa/archived/arguments.py b/src/modelhub/archived/arguments.py similarity index 100% rename from rf2aa/archived/arguments.py rename to src/modelhub/archived/arguments.py diff --git a/rf2aa/archived/chain_crop.py b/src/modelhub/archived/chain_crop.py similarity index 100% rename from rf2aa/archived/chain_crop.py rename to src/modelhub/archived/chain_crop.py diff --git a/rf2aa/archived/cluster_dataset.py b/src/modelhub/archived/cluster_dataset.py similarity index 100% rename from rf2aa/archived/cluster_dataset.py rename to src/modelhub/archived/cluster_dataset.py diff --git a/rf2aa/archived/compose_dataset.py b/src/modelhub/archived/compose_dataset.py similarity index 100% rename from rf2aa/archived/compose_dataset.py rename to src/modelhub/archived/compose_dataset.py diff --git a/rf2aa/archived/data_loader.py b/src/modelhub/archived/data_loader.py similarity index 100% rename from rf2aa/archived/data_loader.py rename to src/modelhub/archived/data_loader.py diff --git a/rf2aa/archived/dataloader_adaptor.py b/src/modelhub/archived/dataloader_adaptor.py similarity index 100% rename from rf2aa/archived/dataloader_adaptor.py rename to src/modelhub/archived/dataloader_adaptor.py diff --git a/rf2aa/archived/dataloader_adaptor_af3.py b/src/modelhub/archived/dataloader_adaptor_af3.py similarity index 100% rename from rf2aa/archived/dataloader_adaptor_af3.py rename to src/modelhub/archived/dataloader_adaptor_af3.py diff --git a/rf2aa/archived/eval.py b/src/modelhub/archived/eval.py similarity index 100% rename from rf2aa/archived/eval.py rename to src/modelhub/archived/eval.py diff --git a/rf2aa/archived/eval_fb.py b/src/modelhub/archived/eval_fb.py similarity index 100% rename from rf2aa/archived/eval_fb.py rename to src/modelhub/archived/eval_fb.py diff --git a/rf2aa/archived/eval_model1.py b/src/modelhub/archived/eval_model1.py similarity index 100% rename from rf2aa/archived/eval_model1.py rename to src/modelhub/archived/eval_model1.py diff --git a/rf2aa/archived/evaluate.py b/src/modelhub/archived/evaluate.py similarity index 100% rename from rf2aa/archived/evaluate.py rename to src/modelhub/archived/evaluate.py diff --git a/rf2aa/archived/identical_ligands.py b/src/modelhub/archived/identical_ligands.py similarity index 100% rename from rf2aa/archived/identical_ligands.py rename to src/modelhub/archived/identical_ligands.py diff --git a/rf2aa/__init__.py b/src/modelhub/archived/loaders/__init__.py similarity index 100% rename from rf2aa/__init__.py rename to src/modelhub/archived/loaders/__init__.py diff --git a/rf2aa/archived/loaders/crop.py b/src/modelhub/archived/loaders/crop.py similarity index 100% rename from rf2aa/archived/loaders/crop.py rename to src/modelhub/archived/loaders/crop.py diff --git a/rf2aa/archived/loaders/data_transforms.py b/src/modelhub/archived/loaders/data_transforms.py similarity index 100% rename from rf2aa/archived/loaders/data_transforms.py rename to src/modelhub/archived/loaders/data_transforms.py diff --git a/rf2aa/archived/loaders/polymer_partners.py b/src/modelhub/archived/loaders/polymer_partners.py similarity index 100% rename from rf2aa/archived/loaders/polymer_partners.py rename to src/modelhub/archived/loaders/polymer_partners.py diff --git a/rf2aa/archived/loaders/rcsb_loader.py b/src/modelhub/archived/loaders/rcsb_loader.py similarity index 100% rename from rf2aa/archived/loaders/rcsb_loader.py rename to src/modelhub/archived/loaders/rcsb_loader.py diff --git a/rf2aa/archived/loaders/small_molecule_partners.py b/src/modelhub/archived/loaders/small_molecule_partners.py similarity index 100% rename from rf2aa/archived/loaders/small_molecule_partners.py rename to src/modelhub/archived/loaders/small_molecule_partners.py diff --git a/rf2aa/archived/loaders/spoofing.py b/src/modelhub/archived/loaders/spoofing.py similarity index 100% rename from rf2aa/archived/loaders/spoofing.py rename to src/modelhub/archived/loaders/spoofing.py diff --git a/rf2aa/archived/parsers.py b/src/modelhub/archived/parsers.py similarity index 100% rename from rf2aa/archived/parsers.py rename to src/modelhub/archived/parsers.py diff --git a/rf2aa/archived/predict.py b/src/modelhub/archived/predict.py similarity index 100% rename from rf2aa/archived/predict.py rename to src/modelhub/archived/predict.py diff --git a/rf2aa/archived/predict_casp14.py b/src/modelhub/archived/predict_casp14.py similarity index 100% rename from rf2aa/archived/predict_casp14.py rename to src/modelhub/archived/predict_casp14.py diff --git a/rf2aa/archived/sampler.py b/src/modelhub/archived/sampler.py similarity index 100% rename from rf2aa/archived/sampler.py rename to src/modelhub/archived/sampler.py diff --git a/rf2aa/archived/tests.py b/src/modelhub/archived/tests.py similarity index 100% rename from rf2aa/archived/tests.py rename to src/modelhub/archived/tests.py diff --git a/rf2aa/archived/train_multi_EMA.py b/src/modelhub/archived/train_multi_EMA.py similarity index 100% rename from rf2aa/archived/train_multi_EMA.py rename to src/modelhub/archived/train_multi_EMA.py diff --git a/rf2aa/atomized_protein_frames.pt b/src/modelhub/atomized_protein_frames.pt similarity index 100% rename from rf2aa/atomized_protein_frames.pt rename to src/modelhub/atomized_protein_frames.pt diff --git a/src/modelhub/callbacks/base.py b/src/modelhub/callbacks/base.py new file mode 100755 index 0000000..23f64d0 --- /dev/null +++ b/src/modelhub/callbacks/base.py @@ -0,0 +1,83 @@ +from abc import ABC +from beartype.typing import Any + +from lightning.fabric.wrappers import ( + _FabricOptimizer, +) +from torch import nn + + +class BaseCallback(ABC): + """Abstract base class used to build new callbacks. + + Where possible, use names consistent with PyTorch Lightning's callback names (see references below). + Note that if using any callbacks directly within a Model, they must also adhere to this schema. + + References: + - Pytorch Lightning Hooks (https://lightning.ai/docs/pytorch/stable/common/lightning_module.html#hooks) + - Calbacks Flow (https://pytorch-lightning.readthedocs.io/en/0.10.0/callbacks.html#callbacks) + """ + + # Epoch loops + def on_fit_start(self, trainer: Any | None = None, model: nn.Module = None): + pass + + def on_fit_end(self, trainer: Any | None = None): + pass + + # Training loop + def on_train_epoch_start(self, trainer: Any | None = None): + pass + + def on_train_batch_start( + self, batch: Any, batch_idx: int, trainer: Any | None = None + ): + pass + + def on_before_optimizer_step( + self, optimizer: _FabricOptimizer, trainer: Any | None = None + ): + pass + + def optimizer_step(self, optimizer: _FabricOptimizer, trainer: Any | None = None): + pass + + def on_train_batch_end( + self, outputs: Any, batch: Any, batch_idx: int, trainer: Any | None = None + ): + pass + + def on_train_epoch_end(self, trainer: Any | None = None): + pass + + # Validation loop + def on_validation_epoch_start(self, trainer: Any | None = None): + pass + + def on_validation_batch_start( + self, + batch: Any, + batch_idx: int, + num_batches: int, + trainer: Any | None = None, + dataset_name: str | None = None, + ): + pass + + def on_validation_batch_end( + self, + outputs: Any, + batch: Any, + batch_idx: int, + num_batches: int, + trainer: Any | None = None, + dataset_name: str | None = None, + ): + pass + + def on_validation_epoch_end(self, trainer: Any | None = None): + pass + + # Saving and Loading + def on_save_checkpoint(self, state: dict[str, Any], trainer: Any | None = None): + pass diff --git a/src/modelhub/callbacks/dump_validation_structures.py b/src/modelhub/callbacks/dump_validation_structures.py new file mode 100644 index 0000000..df2b30b --- /dev/null +++ b/src/modelhub/callbacks/dump_validation_structures.py @@ -0,0 +1,96 @@ +from beartype.typing import Any +from modelhub.callbacks.base import BaseCallback +from modelhub.utils.io import ( + dump_structures, + dump_trajectories, + build_stack_from_atom_array_and_batched_coords, +) +from datahub.common import parse_example_id +from pathlib import Path +from os import PathLike + + +class DumpValidationStructuresCallback(BaseCallback): + """Dump predicted structures and/or diffusion trajectories during validation""" + + def __init__( + self, + save_dir: PathLike, + dump_predictions: bool = False, + one_model_per_file: bool = False, + dump_trajectories: bool = False, + ): + """ + Args: + dump_predictions: Whether to dump structures (CIF files) after validation batches. + one_model_per_file: If True, write each structure within a diffusion batch to its own CIF files. If False, + include each structure within a diffusion batch as a separate model within one CIF file. + dump_trajectories: Whether to dump denoising trajectories after validation batches. + """ + super().__init__() + self.save_dir = Path(save_dir) + self.dump_predictions = dump_predictions + self.dump_trajectories = dump_trajectories + self.one_model_per_file = one_model_per_file + + def on_validation_batch_end( + self, + *, + outputs: dict, + trainer: Any, + batch: Any, + dataset_name: str, + **kwargs, + ): + if (not self.dump_predictions) and (not self.dump_trajectories): + return # Nothing to do + + assert ( + "network_output" in outputs + ), "Validation outputs must contain `network_output` to dump structures!" + + network_output = outputs["network_output"] + example = batch[0] # Assume batch size = 1 + + try: + # ... try to extract the PDB ID and assembly ID from the example ID + parsed_id = parse_example_id(example["example_id"]) + identifier = f"{parsed_id['pdb_id']}_{parsed_id['assembly_id']}" + except (KeyError, ValueError): + # ... if parsing fails, fall back to the original example ID + identifier = example["example_id"] + + def _build_path_from_example_id(dir: str, extra: str = "") -> Path: + """Helper function to build a path from a training or validation example_id.""" + path = ( + self.save_dir + / dir + / f"epoch_{trainer.state['current_epoch']}" + ) + + path = path / dataset_name + + return path / f"{identifier}{extra}" + + + if self.dump_predictions: + atom_array_stack = build_stack_from_atom_array_and_batched_coords( + network_output["X_L"], example["atom_array"] + ) + dump_structures( + atom_arrays=atom_array_stack, + base_path=_build_path_from_example_id("predictions"), + one_model_per_file=self.one_model_per_file, + ) + + if self.dump_trajectories: + dump_trajectories( + trajectory_list=network_output["X_denoised_L_traj"], + atom_array=example["atom_array"], + base_path=_build_path_from_example_id("trajectories", "_denoised"), + ) + dump_trajectories( + trajectory_list=network_output["X_noisy_L_traj"], + atom_array=example["atom_array"], + base_path=_build_path_from_example_id("trajectories", "_noisy"), + ) diff --git a/src/modelhub/callbacks/metrics_logging.py b/src/modelhub/callbacks/metrics_logging.py new file mode 100755 index 0000000..9a47c31 --- /dev/null +++ b/src/modelhub/callbacks/metrics_logging.py @@ -0,0 +1,273 @@ +from pathlib import Path +from beartype.typing import Any, Literal + +import lightning as L +import pandas as pd + +from modelhub.callbacks.base import BaseCallback +from modelhub.utils.ddp import RankedLogger +from datahub.utils import nested_dict +from modelhub.utils.logging import ( + print_df_as_table, + condense_count_columns_of_grouped_df, +) +from copy import deepcopy +import os +from omegaconf import ListConfig + +ranked_logger = RankedLogger(__name__, rank_zero_only=True) + + +class StoreValidationMetricsInDFCallback(BaseCallback): + """Saves the validation outputs in a DataFrame for each rank and concatenates them at the end of the validation epoch.""" + + def __init__( + self, + save_dir: os.PathLike, + metrics_to_save: list[str] | Literal["all"] = "all", + ): + self.save_dir = Path(save_dir) + self.metrics_to_save = metrics_to_save + + def _save_dataframe_for_rank(self, rank: int, epoch: int): + """Saves per-GPU output dataframe of metrics to a rank-specific CSV.""" + self.save_dir.mkdir(parents=True, exist_ok=True) + file_path = self.save_dir / f"validation_output_rank_{rank}_epoch_{epoch}.csv" + + # Flush explicitly to ensure the file is written to disk + with open(file_path, "w") as f: + self.per_gpu_outputs_df.to_csv(f, index=False) + f.flush() + os.fsync(f.fileno()) + + ranked_logger.info( + f"Saved validation outputs to {file_path} for rank {rank}, epoch {epoch}" + ) + + def on_validation_epoch_start(self, trainer: Any | None = None): + self.per_gpu_outputs_df = pd.DataFrame() + + def on_validation_batch_end( + self, + *, + outputs: dict, + batch_idx: int, + num_batches: int, + dataset_name: str, + trainer: Any, + **kwargs, + ): + """Build a flattened DataFrame from the metrics output and accumulate with the prior batches""" + assert "metrics_output" in outputs, "Validation outputs must contain metrics." + metrics_output = deepcopy(outputs["metrics_output"]) + + # ... assemble a flat DataFrame from the metrics output + example_id = metrics_output.pop("example_id") + metrics_as_list_of_dicts = [] + + # ... remove metrics that are not in the save list + if self.metrics_to_save != "all" and isinstance(self.metrics_to_save, list | ListConfig): + metrics_output = { + k: v for k, v in metrics_output.items() if any(k.startswith(prefix) for prefix in self.metrics_to_save) + } + + def _build_row_from_flattened_dict( + dict_to_flatten: dict, prefix: str, example_id: str + ): + """Helper function to build a DataFrame row""" + flattened_dict = nested_dict.flatten(dict_to_flatten, fuse_keys=".") + row_data = {"example_id": example_id} + for sub_k, sub_v in flattened_dict.items(): + # Convert lists to tuples so that they are hashable + if isinstance(sub_v, list): + sub_v = tuple(sub_v) + row_data[f"{prefix}.{sub_k}"] = sub_v + return row_data + + scalar_metrics = {"example_id": example_id} + for key, value in metrics_output.items(): + if isinstance(value, dict): + # Flatten once for this dict => 1 row. + metrics_as_list_of_dicts.append( + _build_row_from_flattened_dict(value, key, example_id) + ) + elif isinstance(value, list) and all(isinstance(x, dict) for x in value): + # Flatten each dict in the list => multiple rows. + for subdict in value: + metrics_as_list_of_dicts.append( + _build_row_from_flattened_dict(subdict, key, example_id) + ) + else: + # Scalar (string, float, int, or list that isn't list-of-dicts) + assert key not in scalar_metrics, f"Duplicate key: {key}" + scalar_metrics[key] = value + + metrics_as_list_of_dicts.append(scalar_metrics) + + # ... convert the list of dicts to a DataFrame and add epoch and dataset columns + batch_df = pd.DataFrame(metrics_as_list_of_dicts) + batch_df["epoch"] = trainer.state["current_epoch"] + batch_df["dataset"] = dataset_name + + # Assert no duplicate rows + assert ( + batch_df.duplicated().sum() == 0 + ), "Duplicate rows found in the metrics DataFrame!" + + # Accumulate into the per-rank DataFrame + self.per_gpu_outputs_df = pd.concat( + [self.per_gpu_outputs_df, batch_df], ignore_index=True + ) + + ranked_logger.info( + f"Validation Progress: {100 * batch_idx / num_batches:.0f}% for {dataset_name}" + ) + + def on_validation_epoch_end(self, trainer: Any): + """Aggregate and log the validation metrics at the end of the epoch. + + Each rank writes out its partial CSV. Then rank 0 aggregates them, logs grouped metrics by dataset, + and appends them to a master file containing data from all epochs. + """ + + # ... write out partial CSV for this rank + rank = trainer.fabric.global_rank + epoch = trainer.state["current_epoch"] + self._save_dataframe_for_rank(rank, epoch) + + # Synchronize all processes + ranked_logger.info( + "Synchronizing all processes before concatenating DataFrames..." + ) + trainer.fabric.barrier() + + # Only rank 0 loads and concatenates the DataFrames + if trainer.fabric.is_global_zero: + # ... load all partial CSVs + merged_df = self._load_and_concatenate_csvs(epoch) + + # ... append to master CSV for all epochs + master_path = self.save_dir / "validation_output_all_epochs.csv" + if master_path.exists(): + old_df = pd.read_csv(master_path) + merged_df = pd.concat( + [old_df, merged_df], ignore_index=True, sort=False + ) + merged_df.to_csv(master_path, index=False) + ranked_logger.info(f"Appended epoch={epoch} results to {master_path}") + + # Store the path to the master CSV in the Trainer + trainer.validation_results_path = master_path + + # Cleanup + self._cleanup_temp_files() + + def _load_and_concatenate_csvs(self, epoch: int) -> pd.DataFrame: + """Load rank-specific CSVs for the given epoch and concatenate them.""" + pattern = f"validation_output_rank_*_epoch_{epoch}.csv" + files = list(self.save_dir.glob(pattern)) + dataframes = [] + for f in files: + try: + df = pd.read_csv(f) + dataframes.append(df) + except pd.errors.EmptyDataError: + ranked_logger.warning(f"Skipping empty CSV: {f}") + + # Concatenate DataFrames, filling missing columns with NaN + concatenated_df = pd.concat(dataframes, axis=0, ignore_index=True, sort=False) + + return concatenated_df + + def _cleanup_temp_files(self): + """Remove temporary files used to store individual rank outputs.""" + all_files = list(self.save_dir.rglob("validation_output_rank_*_epoch_*.csv")) + for file in all_files: + try: + file.unlink() # Remove the file + except Exception as e: + ranked_logger.warning(f"Failed to delete file {file}: {e}") + +class LogAF3ValidationMetricsCallback(BaseCallback): + def __init__( + self, + metrics_to_log: list[str] | Literal["all"] = "all", + ): + self.metrics_to_log = metrics_to_log + + def on_validation_epoch_end(self, trainer: Any): + # Only log metrics to disk if this is the global zero rank + if not trainer.fabric.is_global_zero: + return + + assert hasattr(trainer, "validation_results_path"), "Results path not found! Ensure that StoreValidationMetricsInDFCallback is called first." + df = pd.read_csv(trainer.validation_results_path) + + # ... filter to most recent epoch, drop epoch column + df = df[df["epoch"] == df["epoch"].max()] + df.drop(columns=["epoch", "example_id"], inplace=True) + + # ... filter to columns that start with the metrics_to_log prefixes (and "dataset") + if self.metrics_to_log != "all" and isinstance(self.metrics_to_log, list | ListConfig): + df = df[ + [col for col in df.columns if any(col.startswith(prefix) for prefix in self.metrics_to_log)] + + ["dataset"] + ] + + for dataset in df["dataset"].unique(): + dataset_df = df[df["dataset"] == dataset].copy() + dataset_df.drop(columns=["dataset"], inplace=True) + + print(f"\n+{' ' + dataset + ' ':-^150}+\n") + + # +------------- LDDT by type (chain, interface) -------------+ + by_type_lddt_cols = [col for col in df.columns if col.startswith("by_type_lddt")] + if by_type_lddt_cols: + # ... build by-type DataFrame + by_type_df = dataset_df[by_type_lddt_cols].copy() + by_type_df= by_type_df.dropna(how='all') + + # ... remove the "by_type_lddt." prefix + by_type_df.columns = by_type_df.columns.str.replace("by_type_lddt.", "") + numeric_cols = by_type_df.select_dtypes(include="number").columns + + # ... group by type + grouped = by_type_df.groupby("type")[numeric_cols].agg(["mean", "count"]) + print_df_as_table( + condense_count_columns_of_grouped_df(grouped).reset_index(), + f"{dataset} — Epoch {trainer.state['current_epoch']} — Validation Metrics: LDDT by Type", + ) + + # Log the grouped metrics (aggregated from all ranks) with Fabric + if trainer.fabric: + for _, row in grouped.reset_index().iterrows(): + trainer.fabric.log_dict( + { + f"val/{dataset}/{row['type'].iloc[0]}/{col}": row[col]["mean"] + for col in numeric_cols + }, + step=trainer.state["current_epoch"], + ) + + # +----------------- Other metrics -----------------+ + remaining_cols = list(set(dataset_df.columns) - set(by_type_lddt_cols)) + remaining_df = dataset_df[remaining_cols].copy() + remaining_df = remaining_df.dropna(how='all') + numeric_cols = remaining_df.select_dtypes(include="number").columns + + # Compute means and non-NaN counts for numeric columns + final_means = remaining_df[numeric_cols].mean() + non_nan_counts = remaining_df[numeric_cols].count() + + # Convert the Series to a DataFrame and add the count as a new column + final_means_df = final_means.to_frame(name="mean") + final_means_df["Count"] = non_nan_counts + + print_df_as_table( + final_means_df.reset_index(), + f"{dataset} — {trainer.state['current_epoch']} — General Validation Metrics", + ) + + if trainer.fabric: + for col in numeric_cols: + trainer.fabric.log_dict({f"val/{dataset}/{col}": final_means[col]}, step=trainer.state["current_epoch"]) diff --git a/src/modelhub/callbacks/train_logging.py b/src/modelhub/callbacks/train_logging.py new file mode 100755 index 0000000..eeee6cb --- /dev/null +++ b/src/modelhub/callbacks/train_logging.py @@ -0,0 +1,269 @@ +import time +from beartype.typing import Any + +from datahub.common import parse_example_id +from rich.panel import Panel +from rich.table import Table +from rich.console import Group +from torch import nn +from torchmetrics.aggregation import MeanMetric + +from modelhub.callbacks.base import BaseCallback +from modelhub.utils.ddp import RankedLogger +from modelhub.utils.logging import ( + print_model_parameters, + print_df_as_table, + table_from_df, + safe_print, +) +from modelhub.utils.loss import mean_losses, convert_batched_losses_to_list_of_dicts +from lightning.fabric.wrappers import ( + _FabricOptimizer, +) +import pandas as pd +from collections import defaultdict + + +class LogModelParametersCallback(BaseCallback): + """Print a table of the total and trainable parameters of the model at the start of training.""" + + def on_fit_start(self, trainer: Any | None, model: nn.Module): + print_model_parameters(model) + + +class PrintExampleIDBeforeForwardPassCallback(BaseCallback): + """Print the example ID for each rank at the start of the forward pass for each batch. + + WARNING: Spams the console. Use only for debugging purposes. + """ + + def on_train_batch_start(self, batch: Any, batch_idx: int, trainer: Any): + example_id = batch[0]["example_id"] + + # Prepare the formatted strings with colors + rank_info = f"[grey][/grey]" + epoch_batch_info = ( + f"[blue]Epoch {trainer.state['current_epoch']} Batch {batch_idx}[/blue]" + ) + example_id_info = f"[bold yellow]Example ID: {example_id}[/bold yellow]" + + safe_print(f"{rank_info} {epoch_batch_info} - {example_id_info}",) + + +class LogDatasetSamplingRatiosCallback(BaseCallback): + """Monitor the sampling ratios of the datasets and log after each epoch.""" + + def on_fit_start(self, trainer: Any, model: nn.Module): + self.dataset_sampling_counts = defaultdict(int) + + def on_train_batch_start(self, batch, batch_idx, trainer): + example_id = batch[0]["example_id"] + + if trainer.fabric.is_global_zero: + dataset_string = "/".join(parse_example_id(example_id)["datasets"]) + self.dataset_sampling_counts[dataset_string] += 1 + + def on_train_epoch_end(self, trainer): + if trainer.fabric.is_global_zero: + total_samples = sum(self.dataset_sampling_counts.values()) + + data = { + "Dataset": list(self.dataset_sampling_counts.keys()), + "Count": list(self.dataset_sampling_counts.values()), + "Percentage": [ + f"{(count / total_samples) * 100:.2f}%" + for count in self.dataset_sampling_counts.values() + ], + } + + print_df_as_table( + df=pd.DataFrame(data), + title=f"Epoch {trainer.state['current_epoch']}: Dataset Sampling Ratios", + ) + + # Reset the counts for the next epoch + self.dataset_sampling_counts.clear() + + +class LogLearningRateCallback(BaseCallback): + """Monitor the learning rate of the optimizer + + Args: + log_every_n: Log the learning rate every n optimizer steps. + """ + + def __init__(self, log_every_n: int): + self.log_every_n = log_every_n + + def optimizer_step(self, optimizer: _FabricOptimizer, trainer: Any): + # Get the current global step + current_step = trainer.state["global_step"] + + # Log the learning rate only every `log_every_n` steps + if current_step % self.log_every_n == 0: + trainer.fabric.log( + "train/learning_rate", + optimizer.param_groups[0]["lr"], + step=current_step, + ) + + +class LogAF3TrainingLossesCallback(BaseCallback): + """Log the primary model losses for AF3. + + Includes: + - The mean training losses every `log_every_n` batches + - The mean training losses at the end of each epoch + - The time taken to complete each epoch + - (Optionally) The full batch losses for each structure in the diffusion batch + + Args: + log_every_n (int): Print the training loss after every n batches. + """ + + def __init__( + self, + log_full_batch_losses: bool = False, + log_every_n: int = 10, + ): + """ + Args: + log_full_batch_losses(bool): Log losses for every structure within the diffusion batch. + log_every_n (int): Print the training loss after every n batches. + console_width (int): Width of the console for printing. + """ + self.log_every_n = log_every_n + self.log_full_batch_losses = log_full_batch_losses + + self.start_time = None + self.logger = RankedLogger(__name__, rank_zero_only=True) + + # This dict will store key -> MeanMetric() for each loss + self.loss_trackers = {} + + def on_train_epoch_start(self, trainer: Any): + # Record the start time of the epoch + self.start_time = time.time() + + def on_train_batch_end( + self, outputs: Any, batch: Any, batch_idx: int, trainer: Any + ): + mean_loss_dict = {} + if "loss_dict" in outputs: + mean_loss_dict.update(mean_losses(outputs["loss_dict"])) + + for key, val in mean_loss_dict.items(): + if key not in self.loss_trackers: + self.loss_trackers[key] = trainer.fabric.to_device(MeanMetric()) + self.loss_trackers[key].update(val) + + if trainer.fabric.is_global_zero and batch_idx % self.log_every_n == 0: + # ... log losses for each structure in the batch + if self.log_full_batch_losses: + full_batch_loss_dicts = convert_batched_losses_to_list_of_dicts(outputs["loss_dict"]) + for loss_dict in full_batch_loss_dicts: + loss_dict = {f"train/per_structure/{k}": v for k, v in loss_dict.items()} + trainer.fabric.log_dict(loss_dict, step=trainer.state["global_step"]) + + # ... log losses meaned across the batch + # (Prepend "train/batch_mean" to the keys in the loss dictionary) + mean_loss_dict_for_logging = { + f"train/batch_mean/{k}": v for k, v in mean_loss_dict.items() + } + trainer.fabric.log_dict( + mean_loss_dict_for_logging, step=trainer.state["global_step"] + ) + + # ... print the mean losses in a table + df_losses = pd.DataFrame( + { + "Train Loss Name": [ + k.replace("_", " ").title() for k in mean_loss_dict.keys() + ], + "Value": [v for v in mean_loss_dict.values()], + } + ) + table = table_from_df(df_losses, title="Training Losses") + + # (percentage of batch count) + percentage_complete = (batch_idx / trainer.n_batches_per_epoch) * 100 + + # Simple progress bar using Unicode blocks + progress_bar_length = 10 # Length of the progress bar + filled_length = int(progress_bar_length * percentage_complete // 100) + progress_bar = "█" * filled_length + "░" * ( + progress_bar_length - filled_length + ) + percentage_str = f"[bold magenta]{percentage_complete:.2f}%[/bold magenta]" + + # Create a panel for the epoch and batch info with a progress bar + epoch_batch_info = ( + f"[grey][/grey] " + f"Epoch {trainer.state['current_epoch']} Batch {batch_idx} " + f"[{progress_bar}] {percentage_str}" + ) + + epoch_batch_panel = Panel( + epoch_batch_info, + border_style="bold blue", + ) + + # Create a panel for the example ID + example_id = batch[0]["example_id"] + example_id_str = f"[bold yellow]{example_id}[/bold yellow]" + example_id_panel = Panel( + example_id_str, + border_style="bold green", + ) + + # Combine all components vertically + combined_content = Group(epoch_batch_panel, example_id_panel, table) + + safe_print(combined_content) + + def on_train_epoch_end(self, trainer: Any): + # Gather final epoch means (must be run on all ranks) + final_means = { + k: tracker.compute().item() for k, tracker in self.loss_trackers.items() + } + + # Calculate elapsed time and number of batches (from the total_loss tracker, if available) + elapsed_time = time.time() - self.start_time + num_batches = ( + self.loss_trackers["total_loss"].update_count + if "total_loss" in self.loss_trackers + else trainer.n_batches_per_epoch + ) + + if trainer.fabric.is_global_zero: + # Create a summary table + table = Table( + title=f"Epoch {trainer.state['current_epoch']} Summary", + show_header=False, + header_style="bold magenta", + ) + table.add_column("Loss Name", style="bold cyan", justify="left") + table.add_column("Value", style="green", justify="right") + + for k, v in final_means.items(): + table.add_row(f" Mean {k}", f"{v:.4f}") + + table.add_section() + table.add_row("Total Optimizer Steps", str(trainer.state["global_step"])) + table.add_row("Number of Batches", str(num_batches)) + table.add_row("Elapsed Time (s)", f"{elapsed_time:.2f}") + table.add_row( + "Mean Time per Batch (s)", f"{elapsed_time / num_batches:.2f}" + ) + + safe_print(table) + + # Log these final epoch means (prepend "train/per_epoch_" to each key) + trainer.fabric.log_dict( + {f"train/per_epoch_{k}": v for k, v in final_means.items()}, + step=trainer.state["current_epoch"], + ) + + # Reset the trackers for the next epoch + for metric in self.loss_trackers.values(): + metric.reset() diff --git a/rf2aa/chemical.py b/src/modelhub/chemical.py similarity index 98% rename from rf2aa/chemical.py rename to src/modelhub/chemical.py index d6e0993..7cfccb7 100644 --- a/rf2aa/chemical.py +++ b/src/modelhub/chemical.py @@ -13,7 +13,7 @@ from hydra import compose, initialize script_dir = os.path.dirname(os.path.realpath(__file__)) + "/" -from rf2aa.scoring import * +# from modelhub.scoring import * # process ideal frames @@ -23556,7 +23556,7 @@ class ChemicalData: self.NFRAMES = max([len(f) for f in self.frames]) - atomized_protein_frames = torch.load(script_dir + "atomized_protein_frames.pt") + # atomized_protein_frames = torch.load(script_dir + "atomized_protein_frames.pt") def load_derived_data(self, params): # resolve tip atom indices @@ -23759,156 +23759,156 @@ class ChemicalData: num_tokens = self.NAATOKENS else: num_tokens = self.NNAPROTAAS - for i in range(num_tokens): - for j, a in enumerate(self.aa2type[i]): - if a is not None: - self.atom_type_index[i, j] = self.aatype2idx[a] - self.ljlk_parameters[i, j, :] = torch.tensor(type2ljlk[a]) - self.lj_correction_parameters[i, j, 0] = ( - type2hb[a] == HbAtom.DO - ) + (type2hb[a] == HbAtom.DA) - self.lj_correction_parameters[i, j, 1] = ( - type2hb[a] == HbAtom.AC - ) + (type2hb[a] == HbAtom.DA) - self.lj_correction_parameters[i, j, 2] = type2hb[a] == HbAtom.HP - self.lj_correction_parameters[i, j, 3] = a == "SH1" or a == "HS" + # for i in range(num_tokens): + # for j, a in enumerate(self.aa2type[i]): + # if a is not None: + # self.atom_type_index[i, j] = self.aatype2idx[a] + # self.ljlk_parameters[i, j, :] = torch.tensor(type2ljlk[a]) + # self.lj_correction_parameters[i, j, 0] = ( + # type2hb[a] == HbAtom.DO + # ) + (type2hb[a] == HbAtom.DA) + # self.lj_correction_parameters[i, j, 1] = ( + # type2hb[a] == HbAtom.AC + # ) + (type2hb[a] == HbAtom.DA) + # self.lj_correction_parameters[i, j, 2] = type2hb[a] == HbAtom.HP + # self.lj_correction_parameters[i, j, 3] = a == "SH1" or a == "HS" - self.hbtypes = torch.full( - (self.NAATOKENS, self.NTOTAL, 3), -1, dtype=torch.long - ) # (donortype, acceptortype, acchybtype) - self.hbbaseatoms = torch.full( - (self.NAATOKENS, self.NTOTAL, 2), -1, dtype=torch.long - ) # (B,B0) for acc; (D,-1) for don - self.hbpolys = torch.zeros( - (HbDonType.NTYPES, HbAccType.NTYPES, 3, 15) - ) # weight,xmin,xmax,ymin,ymax,c9,...,c0 + # self.hbtypes = torch.full( + # (self.NAATOKENS, self.NTOTAL, 3), -1, dtype=torch.long + # ) # (donortype, acceptortype, acchybtype) + # self.hbbaseatoms = torch.full( + # (self.NAATOKENS, self.NTOTAL, 2), -1, dtype=torch.long + # ) # (B,B0) for acc; (D,-1) for don + # self.hbpolys = torch.zeros( + # (HbDonType.NTYPES, HbAccType.NTYPES, 3, 15) + # ) # weight,xmin,xmax,ymin,ymax,c9,...,c0 - for i in range(self.NNAPROTAAS): - for j, a in enumerate(self.aa2type[i]): - if a in type2dontype: - j_hs = self.donorHs( - self.aa2long[i][j], self.aabonds[i], self.aa2long[i] - ) - for j_h in j_hs: - self.hbtypes[i, j_h, 0] = type2dontype[a] - self.hbbaseatoms[i, j_h, 0] = j - if a in type2acctype: - j_b, j_b0 = self.acceptorBB0( - self.aa2long[i][j], - type2hybtype[a], - self.aabonds[i], - self.aa2long[i], - ) - self.hbtypes[i, j, 1] = type2acctype[a] - self.hbtypes[i, j, 2] = type2hybtype[a] - self.hbbaseatoms[i, j, 0] = j_b - self.hbbaseatoms[i, j, 1] = j_b0 + # for i in range(self.NNAPROTAAS): + # for j, a in enumerate(self.aa2type[i]): + # if a in type2dontype: + # j_hs = self.donorHs( + # self.aa2long[i][j], self.aabonds[i], self.aa2long[i] + # ) + # for j_h in j_hs: + # self.hbtypes[i, j_h, 0] = type2dontype[a] + # self.hbbaseatoms[i, j_h, 0] = j + # if a in type2acctype: + # j_b, j_b0 = self.acceptorBB0( + # self.aa2long[i][j], + # type2hybtype[a], + # self.aabonds[i], + # self.aa2long[i], + # ) + # self.hbtypes[i, j, 1] = type2acctype[a] + # self.hbtypes[i, j, 2] = type2hybtype[a] + # self.hbbaseatoms[i, j, 0] = j_b + # self.hbbaseatoms[i, j, 1] = j_b0 - for i in range(HbDonType.NTYPES): - for j in range(HbAccType.NTYPES): - weight = dontype2wt[i] * acctype2wt[j] + # for i in range(HbDonType.NTYPES): + # for j in range(HbAccType.NTYPES): + # weight = dontype2wt[i] * acctype2wt[j] - pdist, pbah, pahd = hbtypepair2poly[(i, j)] - xrange, yrange, coeffs = hbpolytype2coeffs[pdist] - self.hbpolys[i, j, 0, 0] = weight - self.hbpolys[i, j, 0, 1:3] = torch.tensor(xrange) - self.hbpolys[i, j, 0, 3:5] = torch.tensor(yrange) - self.hbpolys[i, j, 0, 5:] = torch.tensor(coeffs) - xrange, yrange, coeffs = hbpolytype2coeffs[pahd] - self.hbpolys[i, j, 1, 0] = weight - self.hbpolys[i, j, 1, 1:3] = torch.tensor(xrange) - self.hbpolys[i, j, 1, 3:5] = torch.tensor(yrange) - self.hbpolys[i, j, 1, 5:] = torch.tensor(coeffs) - xrange, yrange, coeffs = hbpolytype2coeffs[pbah] - self.hbpolys[i, j, 2, 0] = weight - self.hbpolys[i, j, 2, 1:3] = torch.tensor(xrange) - self.hbpolys[i, j, 2, 3:5] = torch.tensor(yrange) - self.hbpolys[i, j, 2, 5:] = torch.tensor(coeffs) + # pdist, pbah, pahd = hbtypepair2poly[(i, j)] + # xrange, yrange, coeffs = hbpolytype2coeffs[pdist] + # self.hbpolys[i, j, 0, 0] = weight + # self.hbpolys[i, j, 0, 1:3] = torch.tensor(xrange) + # self.hbpolys[i, j, 0, 3:5] = torch.tensor(yrange) + # self.hbpolys[i, j, 0, 5:] = torch.tensor(coeffs) + # xrange, yrange, coeffs = hbpolytype2coeffs[pahd] + # self.hbpolys[i, j, 1, 0] = weight + # self.hbpolys[i, j, 1, 1:3] = torch.tensor(xrange) + # self.hbpolys[i, j, 1, 3:5] = torch.tensor(yrange) + # self.hbpolys[i, j, 1, 5:] = torch.tensor(coeffs) + # xrange, yrange, coeffs = hbpolytype2coeffs[pbah] + # self.hbpolys[i, j, 2, 0] = weight + # self.hbpolys[i, j, 2, 1:3] = torch.tensor(xrange) + # self.hbpolys[i, j, 2, 3:5] = torch.tensor(yrange) + # self.hbpolys[i, j, 2, 5:] = torch.tensor(coeffs) - # cartbonded scoring parameters - # (0) inter-res - self.cb_lengths_CN = (1.32868, 369.445) - self.cb_angles_CACN = (2.02807, 160) - self.cb_angles_CNCA = (2.12407, 96.53) - self.cb_torsions_CACNH = (0.0, 41.830) # also used for proline CACNCD - self.cb_torsions_CANCO = (0.0, 38.668) + # # cartbonded scoring parameters + # # (0) inter-res + # self.cb_lengths_CN = (1.32868, 369.445) + # self.cb_angles_CACN = (2.02807, 160) + # self.cb_angles_CNCA = (2.12407, 96.53) + # self.cb_torsions_CACNH = (0.0, 41.830) # also used for proline CACNCD + # self.cb_torsions_CANCO = (0.0, 38.668) - # note for the below, the extra amino acid corrsponds to cb params for HIS_D - # (1) intra-res lengths - self.cb_lengths = [[] for i in range(self.NAATOKENS + 1)] - for cst in cartbonded_data_raw["lengths"]: - res_idx = self.aa2num[cst["res"]] - self.cb_lengths[res_idx].append( - ( - self.aa2long[res_idx].index(cst["atm1"]), - self.aa2long[res_idx].index(cst["atm2"]), - cst["x0"], - cst["K"], - ) - ) - ncst_per_res = max([len(i) for i in self.cb_lengths]) - self.cb_length_t = torch.zeros(self.NAATOKENS + 1, ncst_per_res, 4) - for i in range(self.NNAPROTAAS + 1): - src = i - if self.num2aa[i] == "UNK" or self.num2aa[i] == "MAS": - src = self.aa2num["ALA"] - if len(self.cb_lengths[src]) > 0: - self.cb_length_t[i, : len(self.cb_lengths[src]), :] = torch.tensor( - self.cb_lengths[src] - ) + # # note for the below, the extra amino acid corrsponds to cb params for HIS_D + # # (1) intra-res lengths + # self.cb_lengths = [[] for i in range(self.NAATOKENS + 1)] + # for cst in cartbonded_data_raw["lengths"]: + # res_idx = self.aa2num[cst["res"]] + # self.cb_lengths[res_idx].append( + # ( + # self.aa2long[res_idx].index(cst["atm1"]), + # self.aa2long[res_idx].index(cst["atm2"]), + # cst["x0"], + # cst["K"], + # ) + # ) + # ncst_per_res = max([len(i) for i in self.cb_lengths]) + # self.cb_length_t = torch.zeros(self.NAATOKENS + 1, ncst_per_res, 4) + # for i in range(self.NNAPROTAAS + 1): + # src = i + # if self.num2aa[i] == "UNK" or self.num2aa[i] == "MAS": + # src = self.aa2num["ALA"] + # if len(self.cb_lengths[src]) > 0: + # self.cb_length_t[i, : len(self.cb_lengths[src]), :] = torch.tensor( + # self.cb_lengths[src] + # ) - # (2) intra-res angles - self.cb_angles = [[] for i in range(self.NAATOKENS + 1)] - for cst in cartbonded_data_raw["angles"]: - res_idx = self.aa2num[cst["res"]] - self.cb_angles[res_idx].append( - ( - self.aa2long[res_idx].index(cst["atm1"]), - self.aa2long[res_idx].index(cst["atm2"]), - self.aa2long[res_idx].index(cst["atm3"]), - cst["x0"], - cst["K"], - ) - ) - ncst_per_res = max([len(i) for i in self.cb_angles]) - self.cb_angle_t = torch.zeros(self.NAATOKENS + 1, ncst_per_res, 5) - for i in range(self.NNAPROTAAS + 1): - src = i - if self.num2aa[i] == "UNK" or self.num2aa[i] == "MAS": - src = self.aa2num["ALA"] + # # (2) intra-res angles + # self.cb_angles = [[] for i in range(self.NAATOKENS + 1)] + # for cst in cartbonded_data_raw["angles"]: + # res_idx = self.aa2num[cst["res"]] + # self.cb_angles[res_idx].append( + # ( + # self.aa2long[res_idx].index(cst["atm1"]), + # self.aa2long[res_idx].index(cst["atm2"]), + # self.aa2long[res_idx].index(cst["atm3"]), + # cst["x0"], + # cst["K"], + # ) + # ) + # ncst_per_res = max([len(i) for i in self.cb_angles]) + # self.cb_angle_t = torch.zeros(self.NAATOKENS + 1, ncst_per_res, 5) + # for i in range(self.NNAPROTAAS + 1): + # src = i + # if self.num2aa[i] == "UNK" or self.num2aa[i] == "MAS": + # src = self.aa2num["ALA"] - if len(self.cb_angles[src]) > 0: - self.cb_angle_t[i, : len(self.cb_angles[src]), :] = torch.tensor( - self.cb_angles[src] - ) + # if len(self.cb_angles[src]) > 0: + # self.cb_angle_t[i, : len(self.cb_angles[src]), :] = torch.tensor( + # self.cb_angles[src] + # ) - # (3) intra-res torsions - self.cb_torsions = [[] for i in range(self.NAATOKENS + 1)] - for cst in cartbonded_data_raw["torsions"]: - res_idx = self.aa2num[cst["res"]] - self.cb_torsions[res_idx].append( - ( - self.aa2long[res_idx].index(cst["atm1"]), - self.aa2long[res_idx].index(cst["atm2"]), - self.aa2long[res_idx].index(cst["atm3"]), - self.aa2long[res_idx].index(cst["atm4"]), - cst["x0"], - cst["K"], - cst["period"], - ) - ) - ncst_per_res = max([len(i) for i in self.cb_torsions]) - self.cb_torsion_t = torch.zeros(self.NAATOKENS + 1, ncst_per_res, 7) - self.cb_torsion_t[..., 6] = 1.0 # periodicity - for i in range(self.NNAPROTAAS): - src = i - if self.num2aa[i] == "UNK" or self.num2aa[i] == "MAS": - src = self.aa2num["ALA"] + # # (3) intra-res torsions + # self.cb_torsions = [[] for i in range(self.NAATOKENS + 1)] + # for cst in cartbonded_data_raw["torsions"]: + # res_idx = self.aa2num[cst["res"]] + # self.cb_torsions[res_idx].append( + # ( + # self.aa2long[res_idx].index(cst["atm1"]), + # self.aa2long[res_idx].index(cst["atm2"]), + # self.aa2long[res_idx].index(cst["atm3"]), + # self.aa2long[res_idx].index(cst["atm4"]), + # cst["x0"], + # cst["K"], + # cst["period"], + # ) + # ) + # ncst_per_res = max([len(i) for i in self.cb_torsions]) + # self.cb_torsion_t = torch.zeros(self.NAATOKENS + 1, ncst_per_res, 7) + # self.cb_torsion_t[..., 6] = 1.0 # periodicity + # for i in range(self.NNAPROTAAS): + # src = i + # if self.num2aa[i] == "UNK" or self.num2aa[i] == "MAS": + # src = self.aa2num["ALA"] - if len(self.cb_torsions[src]) > 0: - self.cb_torsion_t[i, : len(self.cb_torsions[src]), :] = torch.tensor( - self.cb_torsions[src] - ) + # if len(self.cb_torsions[src]) > 0: + # self.cb_torsion_t[i, : len(self.cb_torsions[src]), :] = torch.tensor( + # self.cb_torsions[src] + # ) # kinematic parameters self.base_indices = torch.full( diff --git a/rf2aa/cifutils_legacy.py b/src/modelhub/cifutils_legacy.py similarity index 100% rename from rf2aa/cifutils_legacy.py rename to src/modelhub/cifutils_legacy.py diff --git a/src/modelhub/common.py b/src/modelhub/common.py new file mode 100755 index 0000000..6732ae2 --- /dev/null +++ b/src/modelhub/common.py @@ -0,0 +1,80 @@ +from beartype.typing import Any, Callable, Iterable +from toolz import merge_with + + +def do_nothing(*args: Any, **kwargs: Any) -> None: + """Does nothing, just returns None""" + pass + + +def exists(obj: Any) -> bool: + """True iff object is not None""" + return obj is not None + + +def default(obj: Any, default: Any) -> Any: + """Return obj if it exists, otherwise return default""" + return obj if exists(obj) else default + + +def exactly_one_exists(*args: object) -> bool: + """True iff exactly one of the arguments exists""" + return sum(exists(arg) for arg in args) == 1 + + +def at_least_one_exists(*args: object) -> bool: + """True iff at least one of the arguments exists""" + return any(exists(arg) for arg in args) + + +def concat_dicts(*dicts: dict) -> dict: + """ + Concatenate a list of dicts with the same keys into a single dict. + + Example: + >>> d1 = {"a": 1, "b": 2} + >>> d2 = {"a": 3, "b": 4} + >>> concat_dicts(d1, d2) + {'a': [1, 3], 'b': [2, 4]} + """ + return merge_with(list, *dicts) + + +def listmap(fn: Callable, lst: Iterable[Any]) -> list: + """ + Apply a function to each element of a single list. + + Args: + - fn (Callable): Function to apply to each element + - lst (list): Input list + + Returns: + - list: Result of applying fn to each element + + Example: + >>> listmap(lambda x: x + 1, [1, 2, 3]) + [2, 3, 4] + """ + return [fn(x) for x in lst] + + +def listmap_with_idx(fn: Callable[[int, Any], Any], lst: Iterable[Any]) -> list: + """Maps a function over a list while providing both index and value to the function. + + A convenience wrapper around listmap that allows the mapping function to access both the index and value + of each element in the input list. + + Args: + - fn (Callable[[int, Any], Any]): Function that takes two arguments (index, value) and returns a transformed value. + - lst (list): Input list to map over. + + Returns: + - list: New list containing the results of applying fn to each (index, value) pair. + + Example: + >>> def add_index(i, x): + ... return f"{i}_{x}" + >>> listmap_with_idx(add_index, ["a", "b", "c"]) + ['0_a', '1_b', '2_c'] + """ + return [fn(idx, x) for idx, x in enumerate(lst)] diff --git a/rf2aa/config/inference/nov2023_model_eval.yaml b/src/modelhub/config/inference/nov2023_model_eval.yaml similarity index 100% rename from rf2aa/config/inference/nov2023_model_eval.yaml rename to src/modelhub/config/inference/nov2023_model_eval.yaml diff --git a/rf2aa/config/inference/paper_model_eval.yaml b/src/modelhub/config/inference/paper_model_eval.yaml similarity index 100% rename from rf2aa/config/inference/paper_model_eval.yaml rename to src/modelhub/config/inference/paper_model_eval.yaml diff --git a/rf2aa/config/inference/rf2_deep_layerdropout.yaml b/src/modelhub/config/inference/rf2_deep_layerdropout.yaml similarity index 100% rename from rf2aa/config/inference/rf2_deep_layerdropout.yaml rename to src/modelhub/config/inference/rf2_deep_layerdropout.yaml diff --git a/rf2aa/config/train/af3.yaml b/src/modelhub/config/train/af3.yaml similarity index 100% rename from rf2aa/config/train/af3.yaml rename to src/modelhub/config/train/af3.yaml diff --git a/src/modelhub/config/train/af3_repro.yaml b/src/modelhub/config/train/af3_repro.yaml new file mode 100644 index 0000000..1e11a11 --- /dev/null +++ b/src/modelhub/config/train/af3_repro.yaml @@ -0,0 +1,543 @@ +defaults: + # - af3 + - rf2aa + - _self_ + +experiment: + name: rf2aa-af3-repro + trainer: af3_repro + output_dir: null + prevalidate: True + +loss: + diffusion_loss: + _target_: modelhub.loss.af3_losses.DiffusionLoss + weight: 4.0 + sigma_data: ${model.diffusion_module.sigma_data} + alpha_dna: 5 + alpha_rna: 5 + alpha_ligand: 10 + edm_lambda: True + se3_invariant_loss: True + clamp_diffusion_loss: False + distogram_loss: + _target_: modelhub.loss.af3_losses.DistogramLoss + weight: 3e-2 + protein_ligand_bond_loss: + _target_: modelhub.loss.af3_losses.ProteinLigandBondLoss + weight: 0 + #confidence_loss: + #weight: 1e-4 + #plddt: + #weight: 1.0 + #n_bins: 50 + #max_value: 1.0 + #pae: + #weight: 1.0 + #n_bins: 64 + #max_value: 32 + #pde: + #weight: 1.0 + #n_bins: 64 + #max_value: 32 + #exp_resolved: + #weight: 1.0 + #n_bins: 2 + #max_value: 1 + +metrics: + interface_lddt: + _target_: modelhub.metrics.lddt_metrics.InterfaceLDDT + chain_lddt: + _target_: modelhub.metrics.lddt_metrics.ChainLDDT + example_id: + _target_: modelhub.metrics.metrics_base.AddExampleID + distogram: + _target_: modelhub.metrics.distogram_metrics.DistogramLoss + +ddp_params: + batch_size: 1 + accum: 3 + +loader_params: + p_msa_mask: 0.0 + maxseq: 5132 + n_fallback_retries: 10 + maxcycle: 4 + maxcycle_valid: 10 + dataloader_kwargs: + shuffle: False + num_workers: 3 + #prefetch_factor: 4 + pin_memory: True + +training_params: + learning_rate_schedule: + num_warmup_steps: 1000 + num_steps_decay: 50000 + decay_rate: 0.95 + grad_clip: 10.0 + EMA: 0.999 + seed: 1 + +interpolant: + sigma_data: 16 + + min_t: 1e-2 + separate_t: False + provide_kappa: False + hierarchical_t: False + codesign_separate_t: False + codesign_forward_fold_prop: 0.0 + codesign_inverse_fold_prop: 0.0 + + twisting: + use: False + + rots: + corrupt: True + train_schedule: linear + sample_schedule: linear + exp_rate: 10 + + trans: + corrupt: True + batch_ot: True + train_schedule: linear + sample_schedule: linear + sample_temp: 1.0 + vpsde_bmin: 0.1 + vpsde_bmax: 20.0 + potential: null + potential_t_scaling: False + rog: + weight: 10.0 + cutoff: 5.0 + + aatypes: + corrupt: False + schedule: linear + schedule_exp_rate: 10 + temp: 1.0 + noise: 0.0 + do_purity: False + train_extra_mask: 0.0 + interpolant_type: masking + num_tokens: 80 + + sampling: + num_timesteps: 20 + do_sde: False + self_condition: False + +model_globals: + l_max: 40000 # shouldn't matter... +model: + c_s: 384 + c_z: 128 + c_atom: 128 + c_atompair: 16 + c_s_inputs: 449 + feature_initializer: + c_s_inputs: ${model.c_s_inputs} + input_feature_embedder: + features: + - restype + - profile + - deletion_mean + atom_attention_encoder: + c_token: 384 + c_atom_1d_features: 389 + c_tokenpair: ${model.c_z} + atom_1d_features: + - ref_pos + - ref_charge + - ref_mask + - ref_element + - ref_atom_name_chars + atom_transformer: + n_queries: 32 + n_keys: 128 + l_max: ${model_globals.l_max} + diffusion_transformer: + n_block: 3 + diffusion_transformer_block: + n_head: 4 + no_residual_connection_between_attention_and_transition: True + + relative_position_encoding: + r_max: 32 + s_max: 2 + recycler: + n_pairformer_blocks: 48 + pairformer_block: + p_drop: 0.25 + triangle_multiplication: + d_hidden: 128 + triangle_attention: + n_head: 4 + d_hidden: 32 + attention_pair_bias: + n_head: 16 + template_embedder: + n_block: 2 + raw_template_dim: 108 + c: 64 + p_drop: 0.25 + msa_module: + n_block: 4 + c_m: 64 + p_drop_msa: 0.15 + p_drop_pair: 0.25 + msa_subsample_embedder: + num_sequences: 1024 + dim_raw_msa: 34 + c_s_inputs: ${model.c_s_inputs} + c_msa_embed: ${model.recycler.msa_module.c_m} + outer_product: + c_msa_embed: ${model.recycler.msa_module.c_m} + c_outer_product: 32 + c_out: ${model.c_z} + msa_pair_weighted_averaging: + n_heads: 8 + c_weighted_average: 32 + c_msa_embed: ${model.recycler.msa_module.c_m} + c_z: ${model.c_z} + separate_gate_for_every_channel: False + msa_transition: + n: 4 + c: ${model.recycler.msa_module.c_m} + triangle_multiplication_outgoing: + d_pair: ${model.c_z} + d_hidden: 128 + bias: True + triangle_multiplication_incoming: + d_pair: ${model.c_z} + d_hidden: 128 + bias: True + triangle_attention_starting: + d_pair: ${model.c_z} + n_head: 4 + d_hidden: 32 + p_drop: 0.0 + triangle_attention_ending: + d_pair: ${model.c_z} + n_head: 4 + d_hidden: 32 + p_drop: 0.0 + pair_transition: + n: 4 + c: ${model.c_z} + + diffusion_module: + sigma_data: ${interpolant.sigma_data} + c_token: 768 + f_pred: edm + diffusion_conditioning: + c_s_inputs: ${model.c_s_inputs} + c_t_embed: 256 + relative_position_encoding: + r_max: 32 + s_max: 2 + atom_attention_encoder: + c_tokenpair: ${model.c_z} + c_atom_1d_features: 389 + atom_1d_features: + - ref_pos + - ref_charge + - ref_mask + - ref_element + - ref_atom_name_chars + atom_transformer: + n_queries: 32 + n_keys: 128 + l_max: ${model_globals.l_max} + diffusion_transformer: + n_block: 3 + diffusion_transformer_block: + n_head: 4 + no_residual_connection_between_attention_and_transition: True + broadcast_trunk_feats_on_1dim_old: True + diffusion_transformer: + n_block: 24 + diffusion_transformer_block: + n_head: 16 + no_residual_connection_between_attention_and_transition: True + atom_attention_decoder: + atom_transformer: + n_queries: 32 + n_keys: 128 + l_max: ${model_globals.l_max} + diffusion_transformer: + n_block: 3 + diffusion_transformer_block: + n_head: 4 + no_residual_connection_between_attention_and_transition: True + + distogram_head: + bins: 65 +confidence_head: + c_s: ${model.c_s} + c_z: ${model.c_z} + n_pairformer_layers: 4 + pairformer: + p_drop: 0.25 + c: 128 + attention_pair_bias: + n_head: 16 + n_bins_pae: 64 + n_bins_pde: 64 + n_bins_plddt: 50 + n_bins_exp_resolved: 2 + +optimizer: + type: Adam + params: + lr: 1.8e-3 + betas: [0.9, 0.95] + eps: 1.0e-8 + +logger: + save_dir: csv_logs + use_wandb: False + sublogger: + project: af3-debug + +callbacks: + log_metrics: {} + +af3_data_prep: + D: 32 + sigma_data: ${model.diffusion_module.sigma_data} + s_trans: 1 + random_augmentation: True + only_ca: False + t: null + +af3_inference: + num_steps: 200 + solver: "af3" + +resume: + False + +paths: + pdb_data_dir: /projects/ml/datahub/dfs/af3_splits/2024_12_16 + protein_msa_dirs: + - {"dir": "/projects/msa/rf2aa_af3/rf2aa_paper_model_protein_msas", "extension": ".a3m.gz", "directory_depth": 2} + - {"dir": "/projects/msa/rf2aa_af3/missing_msas_through_2024_08_12", "extension": ".msa0.a3m.gz", "directory_depth": 2} + rna_msa_dirs: + - {"dir": "/projects/msa/rf2aa_af3/rf2aa_paper_model_rna_msas", "extension": ".afa", "directory_depth": 0} + +cif_parser_caching_args: + #cache_dir: /projects/ml/RF2_allatom/cache/cif + cache_dir: /net/tukwila/RF2_allatom/cache/cif + load_from_cache: False + save_to_cache: False + + +dataset_params: + constructor: + _target_: "modelhub.data.compose_data_datahub_new.NewDatapipeTrainer" + diffusion_batch_size: 32 + diffusion_batch_size_valid: ${dataset_params.diffusion_batch_size} + + n_msa: 1024 + default_transform: modelhub.data.pipelines.build_af3_transform_pipeline + train: + # ... train datasets + pdb: + probability: 0.5 + sub_datasets: + pn_unit: + dataset: + _target_: datahub.datasets.datasets.StructuralDatasetWrapper + save_failed_examples_to_dir: null + cif_parser_args: ${cif_parser_caching_args} + dataset_parser: + _target_: datahub.datasets.parsers.PNUnitsDFParser + dataset: + _target_: datahub.datasets.datasets.PandasDataset + name: pn_unit + id_column: example_id + data: ${paths.pdb_data_dir}/pn_units_df_train.parquet + filters: + - "deposition_date < '2021-09-30'" + - "resolution < 9.0" + - "num_polymer_pn_units <= 300" + - "cluster.notnull()" + - "~(q_pn_unit_non_polymer_res_names.notnull() and q_pn_unit_non_polymer_res_names.str.contains('${resolve_import:cifutils.constants.AF3_EXCLUDED_LIGANDS_REGEX}', regex=True))" + columns_to_load: + # columns required for identification, filtering & weighting + - example_id + - pdb_id + - assembly_id + - deposition_date + - resolution + - num_polymer_pn_units + - method + - cluster + - n_prot + - n_nuc + - n_ligand + - n_peptide + # pn_unit specific columns + - q_pn_unit_iid + - q_pn_unit_non_polymer_res_names + - all_pn_unit_iids_after_processing + - q_pn_unit_is_loi + return_key: null + transform: + _target_: ${dataset_params.default_transform} + is_inference: False + protein_msa_dirs: ${paths.protein_msa_dirs} + rna_msa_dirs: ${paths.rna_msa_dirs} + n_recycles: ${loader_params.maxcycle} + crop_size: ${loader_params.crop} + crop_contiguous_probability: 0.3333333333333333 + crop_spatial_probability: 0.6666666666666667 + diffusion_batch_size: ${dataset_params.diffusion_batch_size} + max_atoms_in_crop: 5000 + n_msa: ${dataset_params.n_msa} + weights: + _target_: datahub.samplers.calculate_weights_for_pdb_dataset_df + beta: 0.5 + alphas: + a_prot: 3.0 + a_nuc: 3.0 + a_ligand: 1.0 + a_loi: 5.0 + + interface: + dataset: + _target_: datahub.datasets.datasets.StructuralDatasetWrapper + save_failed_examples_to_dir: null + cif_parser_args: ${cif_parser_caching_args} + dataset_parser: + _target_: datahub.datasets.parsers.InterfacesDFParser + dataset: + _target_: datahub.datasets.datasets.PandasDataset + name: interface + id_column: example_id + data: ${paths.pdb_data_dir}/interfaces_df_train.parquet + filters: + - "deposition_date < '2021-09-30'" + - "resolution < 9.0" + - "num_polymer_pn_units <= 300" + - "cluster.notnull()" + - "~(pn_unit_1_non_polymer_res_names.notnull() and pn_unit_1_non_polymer_res_names.str.contains('${resolve_import:cifutils.constants.AF3_EXCLUDED_LIGANDS_REGEX}', regex=True))" + - "~(pn_unit_2_non_polymer_res_names.notnull() and pn_unit_2_non_polymer_res_names.str.contains('${resolve_import:cifutils.constants.AF3_EXCLUDED_LIGANDS_REGEX}', regex=True))" + - "is_inter_molecule" + columns_to_load: + # columns required for identification, filtering & weighting + - example_id + - pdb_id + - assembly_id + - deposition_date + - resolution + - num_polymer_pn_units + - method + - cluster + - n_prot + - n_nuc + - n_ligand + - n_peptide + # interface specific columns + - pn_unit_1_iid + - pn_unit_2_iid + - pn_unit_1_non_polymer_res_names + - pn_unit_2_non_polymer_res_names + - is_inter_molecule + - all_pn_unit_iids_after_processing + - involves_loi + return_key: null + transform: + _target_: ${dataset_params.default_transform} + is_inference: False + protein_msa_dirs: ${paths.protein_msa_dirs} + rna_msa_dirs: ${paths.rna_msa_dirs} + n_recycles: ${loader_params.maxcycle} + crop_size: ${loader_params.crop} + crop_contiguous_probability: 0.0 + crop_spatial_probability: 1.0 + diffusion_batch_size: ${dataset_params.diffusion_batch_size} + max_atoms_in_crop: 5000 + n_msa: ${dataset_params.n_msa} + weights: + _target_: datahub.samplers.calculate_weights_for_pdb_dataset_df + beta: 0.5 + alphas: + a_prot: 3.0 + a_nuc: 3.0 + a_ligand: 1.0 + a_loi: 5.0 + + monomer_distillation: + probability: 0.5 + sub_datasets: + af2fb_distillation: + dataset: + _target_: datahub.datasets.datasets.StructuralDatasetWrapper + save_failed_examples_to_dir: null + + # cif parser + cif_parser_args: + #assume_residues_all_resolved: true + cache_dir: null + load_from_cache: false + save_to_cache: false + + # metadata parser + dataset_parser: + _target_: datahub.datasets.parsers.AF2FB_DistillationParser + + # metadata dataset + dataset: + _target_: datahub.datasets.datasets.PandasDataset + name: af2fb_distillation + id_column: example_id + data: /squash/af2_distillation_facebook/af2_distillation_facebook.parquet + columns_to_load: + - example_id + - sequence_hash + return_key: null + transform: + _target_: ${dataset_params.default_transform} + is_inference: False + protein_msa_dirs: [{"dir": "/squash/af2_distillation_facebook/msa", "extension": ".a3m", "directory_depth": 2}] + rna_msa_dirs: [] + n_recycles: ${loader_params.maxcycle} + crop_size: ${loader_params.crop} + crop_contiguous_probability: 0.25 + crop_spatial_probability: 0.75 + diffusion_batch_size: ${dataset_params.diffusion_batch_size} + max_atoms_in_crop: 5000 + n_msa: ${dataset_params.n_msa} + + + val: + # ... val datasets + interface: + _target_: datahub.datasets.datasets.StructuralDatasetWrapper + dataset_parser: + _target_: datahub.datasets.parsers.ValidationDFParserLikeAF3 + dataset: + _target_: datahub.datasets.datasets.PandasDataset + name: val + #id_column: ${dataset_params.train.interface.dataset.dataset.id_column} + id_column: example_id + data: ${paths.pdb_data_dir}/entry_level_val_df.parquet + filters: + - "example_id not in [\"{['validation']}{7ptq}{1}{[]}\"]" + return_key: null + transform: + _target_: ${dataset_params.default_transform} + is_inference: True + protein_msa_dirs: ${paths.protein_msa_dirs} + rna_msa_dirs: ${paths.rna_msa_dirs} + n_recycles: ${loader_params.maxcycle_valid} + crop_size: ${loader_params.crop} + crop_contiguous_probability: 0.0 + crop_spatial_probability: 0.0 + diffusion_batch_size: ${dataset_params.diffusion_batch_size_valid} + n_msa: ${dataset_params.n_msa} \ No newline at end of file diff --git a/rf2aa/config/train/af3_repro_250125.yaml b/src/modelhub/config/train/af3_repro_250125.yaml similarity index 96% rename from rf2aa/config/train/af3_repro_250125.yaml rename to src/modelhub/config/train/af3_repro_250125.yaml index f06a90c..f1de092 100644 --- a/rf2aa/config/train/af3_repro_250125.yaml +++ b/src/modelhub/config/train/af3_repro_250125.yaml @@ -11,7 +11,7 @@ experiment: loss: diffusion_loss: - _target_: rf2aa.loss.af3_losses.DiffusionLoss + _target_: modelhub.loss.af3_losses.DiffusionLoss weight: 4.0 sigma_data: ${model.diffusion_module.sigma_data} alpha_dna: 5 @@ -21,10 +21,10 @@ loss: se3_invariant_loss: True clamp_diffusion_loss: False distogram_loss: - _target_: rf2aa.loss.af3_losses.DistogramLoss + _target_: modelhub.loss.af3_losses.DistogramLoss weight: 3e-2 protein_ligand_bond_loss: - _target_: rf2aa.loss.af3_losses.ProteinLigandBondLoss + _target_: modelhub.loss.af3_losses.ProteinLigandBondLoss weight: 0 #confidence_loss: #weight: 1e-4 @@ -47,15 +47,15 @@ loss: metrics: interface_lddt: - _target_: rf2aa.metrics.lddt_metrics.InterfaceLDDT + _target_: modelhub.metrics.lddt_metrics.InterfaceLDDT chain_lddt: - _target_: rf2aa.metrics.lddt_metrics.ChainLDDT + _target_: modelhub.metrics.lddt_metrics.ChainLDDT example_id: - _target_: rf2aa.metrics.metrics_base.AddExampleID + _target_: modelhub.metrics.metrics_base.AddExampleID distogram: - _target_: rf2aa.metrics.distogram_metrics.DistogramLoss + _target_: modelhub.metrics.distogram_metrics.DistogramLoss chiral: - _target_: rf2aa.metrics.chiral_metrics.ChiralLoss + _target_: modelhub.metrics.chiral_metrics.ChiralLoss ddp_params: batch_size: 1 @@ -345,12 +345,12 @@ cif_parser_caching_args: dataset_params: constructor: - _target_: "rf2aa.data.compose_data_datahub_new.NewDatapipeTrainer" + _target_: "modelhub.data.compose_data_datahub_new.NewDatapipeTrainer" diffusion_batch_size: 32 diffusion_batch_size_valid: ${dataset_params.diffusion_batch_size} n_msa: 1024 - default_transform: rf2aa.data.pipelines.build_af3_transform_pipeline + default_transform: modelhub.data.pipelines.build_af3_transform_pipeline train: # ... train datasets pdb: diff --git a/rf2aa/config/train/af3_repro_250212.yaml b/src/modelhub/config/train/af3_repro_250212.yaml similarity index 100% rename from rf2aa/config/train/af3_repro_250212.yaml rename to src/modelhub/config/train/af3_repro_250212.yaml diff --git a/rf2aa/config/train/af3_repro_rollout.yaml b/src/modelhub/config/train/af3_repro_rollout.yaml similarity index 97% rename from rf2aa/config/train/af3_repro_rollout.yaml rename to src/modelhub/config/train/af3_repro_rollout.yaml index 9c93571..5d41c0f 100644 --- a/rf2aa/config/train/af3_repro_rollout.yaml +++ b/src/modelhub/config/train/af3_repro_rollout.yaml @@ -11,7 +11,7 @@ experiment: loss: confidence_loss: - _target_: rf2aa.loss.af3_confidence_loss.ConfidenceLoss + _target_: modelhub.loss.af3_confidence_loss.ConfidenceLoss weight: 1.0 plddt: weight: 1.0 @@ -37,11 +37,11 @@ loss: metrics: example_id: - _target_: rf2aa.metrics.metrics_base.AddExampleID + _target_: modelhub.metrics.metrics_base.AddExampleID confidence_interface_lddt: - _target_: rf2aa.metrics.lddt_metrics.ConfidenceInterfaceLDDT + _target_: modelhub.metrics.lddt_metrics.ConfidenceInterfaceLDDT confidence_chain_lddt: - _target_: rf2aa.metrics.lddt_metrics.ConfidenceChainLDDT + _target_: modelhub.metrics.lddt_metrics.ConfidenceChainLDDT ddp_params: @@ -336,7 +336,7 @@ cif_parser_caching_args: dataset_params: constructor: - _target_: "rf2aa.data.compose_data_datahub_new.NewDatapipeTrainer" + _target_: "modelhub.data.compose_data_datahub_new.NewDatapipeTrainer" diffusion_batch_size: 1 diffusion_batch_size_valid: 5 diffusion_batch_size_rollout: 24 diff --git a/rf2aa/config/train/base.yaml b/src/modelhub/config/train/base.yaml similarity index 98% rename from rf2aa/config/train/base.yaml rename to src/modelhub/config/train/base.yaml index c22f5e5..7437ae7 100644 --- a/rf2aa/config/train/base.yaml +++ b/src/modelhub/config/train/base.yaml @@ -12,7 +12,7 @@ model: legacy_model: null dataset_params: constructor: - _target_: "rf2aa.data.compose_dataset.LegacyDatasetConstructor" + _target_: "modelhub.data.compose_dataset.LegacyDatasetConstructor" validate_every_n_epochs: 0 validate_after_first_epoch: False fraction_pdb: 0 diff --git a/rf2aa/config/train/chem_params/default.yaml b/src/modelhub/config/train/chem_params/default.yaml similarity index 100% rename from rf2aa/config/train/chem_params/default.yaml rename to src/modelhub/config/train/chem_params/default.yaml diff --git a/rf2aa/config/train/dataset_params/dataset_params_distillation.yaml b/src/modelhub/config/train/dataset_params/dataset_params_distillation.yaml similarity index 100% rename from rf2aa/config/train/dataset_params/dataset_params_distillation.yaml rename to src/modelhub/config/train/dataset_params/dataset_params_distillation.yaml diff --git a/rf2aa/config/train/dataset_params/dataset_params_no_distillation.yaml b/src/modelhub/config/train/dataset_params/dataset_params_no_distillation.yaml similarity index 100% rename from rf2aa/config/train/dataset_params/dataset_params_no_distillation.yaml rename to src/modelhub/config/train/dataset_params/dataset_params_no_distillation.yaml diff --git a/rf2aa/config/train/ddp_params/default.yaml b/src/modelhub/config/train/ddp_params/default.yaml similarity index 100% rename from rf2aa/config/train/ddp_params/default.yaml rename to src/modelhub/config/train/ddp_params/default.yaml diff --git a/rf2aa/config/train/debug.yaml b/src/modelhub/config/train/debug.yaml similarity index 100% rename from rf2aa/config/train/debug.yaml rename to src/modelhub/config/train/debug.yaml diff --git a/rf2aa/config/train/flow_matching.yaml b/src/modelhub/config/train/flow_matching.yaml similarity index 100% rename from rf2aa/config/train/flow_matching.yaml rename to src/modelhub/config/train/flow_matching.yaml diff --git a/rf2aa/config/train/legacy_model_param/paper_model.yaml b/src/modelhub/config/train/legacy_model_param/paper_model.yaml similarity index 100% rename from rf2aa/config/train/legacy_model_param/paper_model.yaml rename to src/modelhub/config/train/legacy_model_param/paper_model.yaml diff --git a/rf2aa/config/train/legacy_train.yaml b/src/modelhub/config/train/legacy_train.yaml similarity index 100% rename from rf2aa/config/train/legacy_train.yaml rename to src/modelhub/config/train/legacy_train.yaml diff --git a/rf2aa/config/train/loader_params/default.yaml b/src/modelhub/config/train/loader_params/default.yaml similarity index 100% rename from rf2aa/config/train/loader_params/default.yaml rename to src/modelhub/config/train/loader_params/default.yaml diff --git a/rf2aa/config/train/loader_params/ligand_exclusions.yaml b/src/modelhub/config/train/loader_params/ligand_exclusions.yaml similarity index 100% rename from rf2aa/config/train/loader_params/ligand_exclusions.yaml rename to src/modelhub/config/train/loader_params/ligand_exclusions.yaml diff --git a/rf2aa/config/train/loss_param/pretraining.yaml b/src/modelhub/config/train/loss_param/pretraining.yaml similarity index 100% rename from rf2aa/config/train/loss_param/pretraining.yaml rename to src/modelhub/config/train/loss_param/pretraining.yaml diff --git a/rf2aa/config/train/model/rf2aa_composed.yaml b/src/modelhub/config/train/model/rf2aa_composed.yaml similarity index 100% rename from rf2aa/config/train/model/rf2aa_composed.yaml rename to src/modelhub/config/train/model/rf2aa_composed.yaml diff --git a/rf2aa/config/train/model/rf_with_gradients.yaml b/src/modelhub/config/train/model/rf_with_gradients.yaml similarity index 100% rename from rf2aa/config/train/model/rf_with_gradients.yaml rename to src/modelhub/config/train/model/rf_with_gradients.yaml diff --git a/rf2aa/config/train/model/untied_p2p.yaml b/src/modelhub/config/train/model/untied_p2p.yaml similarity index 100% rename from rf2aa/config/train/model/untied_p2p.yaml rename to src/modelhub/config/train/model/untied_p2p.yaml diff --git a/rf2aa/config/train/msa_module_training.yaml b/src/modelhub/config/train/msa_module_training.yaml similarity index 98% rename from rf2aa/config/train/msa_module_training.yaml rename to src/modelhub/config/train/msa_module_training.yaml index cc43037..39b0f9b 100644 --- a/rf2aa/config/train/msa_module_training.yaml +++ b/src/modelhub/config/train/msa_module_training.yaml @@ -10,7 +10,7 @@ experiment: model_globals: l_max: 5000 model: - _target_: "rf2aa.experiments.msa_module_trainer.MsaModulewithDist" + _target_: "modelhub.experiments.msa_module_trainer.MsaModulewithDist" c_s: 384 c_z: 128 c_atom: 128 @@ -131,8 +131,8 @@ dataset_params: diffusion_batch_size: 32 constructor: - _target_: "rf2aa.data.compose_data_datahub_new.NewDatapipeTrainer" - default_transform: rf2aa.data.pipelines.build_af3_transform_pipeline + _target_: "modelhub.data.compose_data_datahub_new.NewDatapipeTrainer" + default_transform: modelhub.data.pipelines.build_af3_transform_pipeline train: # ... train datasets pdb: diff --git a/rf2aa/config/train/rf2_deep_layerdropout.yaml b/src/modelhub/config/train/rf2_deep_layerdropout.yaml similarity index 100% rename from rf2aa/config/train/rf2_deep_layerdropout.yaml rename to src/modelhub/config/train/rf2_deep_layerdropout.yaml diff --git a/rf2aa/config/train/rf2aa.yaml b/src/modelhub/config/train/rf2aa.yaml similarity index 100% rename from rf2aa/config/train/rf2aa.yaml rename to src/modelhub/config/train/rf2aa.yaml diff --git a/rf2aa/config/train/training_params/default_amp.yaml b/src/modelhub/config/train/training_params/default_amp.yaml similarity index 100% rename from rf2aa/config/train/training_params/default_amp.yaml rename to src/modelhub/config/train/training_params/default_amp.yaml diff --git a/rf2aa/config/train/untied_p2p.yaml b/src/modelhub/config/train/untied_p2p.yaml similarity index 100% rename from rf2aa/config/train/untied_p2p.yaml rename to src/modelhub/config/train/untied_p2p.yaml diff --git a/rf2aa/coords6d.py b/src/modelhub/coords6d.py similarity index 97% rename from rf2aa/coords6d.py rename to src/modelhub/coords6d.py index 2e2b1fb..abae768 100644 --- a/rf2aa/coords6d.py +++ b/src/modelhub/coords6d.py @@ -2,7 +2,7 @@ import numpy as np import scipy import scipy.spatial -from rf2aa.chemical import generate_Cbeta +from modelhub.chemical import generate_Cbeta # calculate dihedral angles defined by 4 sets of points diff --git a/rf2aa/data/chiral_transforms.py b/src/modelhub/data/chiral_transforms.py similarity index 100% rename from rf2aa/data/chiral_transforms.py rename to src/modelhub/data/chiral_transforms.py diff --git a/rf2aa/data/compose_data_datahub_new.py b/src/modelhub/data/compose_data_datahub_new.py similarity index 99% rename from rf2aa/data/compose_data_datahub_new.py rename to src/modelhub/data/compose_data_datahub_new.py index 9a1af6e..8a44623 100644 --- a/rf2aa/data/compose_data_datahub_new.py +++ b/src/modelhub/data/compose_data_datahub_new.py @@ -18,7 +18,7 @@ from datahub.samplers import ( from omegaconf import OmegaConf from torch.utils.data import DataLoader, Sampler, WeightedRandomSampler -from rf2aa.resolvers import resolve_import +from modelhub.resolvers import resolve_import logger = logging.getLogger("main") diff --git a/rf2aa/data/pipelines.py b/src/modelhub/data/pipelines.py similarity index 97% rename from rf2aa/data/pipelines.py rename to src/modelhub/data/pipelines.py index 59271d1..2294b8c 100644 --- a/rf2aa/data/pipelines.py +++ b/src/modelhub/data/pipelines.py @@ -72,7 +72,7 @@ from datahub.transforms.template import ( RandomSubsampleTemplates, ) -from rf2aa.data.chiral_transforms import ( +from modelhub.data.chiral_transforms import ( AddAF3ChiralFeatures, GetAF3ReferenceMoleculeFeatures, GetRDKitChiralCenters, @@ -119,6 +119,7 @@ def build_af3_transform_pipeline( sigma_data: float = 16.0, diffusion_batch_size: int = 48, pad_dna_p_skip: float = 0.0, + return_atom_array: bool = True, ): """Build the AF3 pipeline with specified parameters. @@ -182,7 +183,13 @@ def build_af3_transform_pipeline( ), # Remove polymers with too few resolved residues MaskPolymerResiduesWithUnresolvedFrameAtoms(), HandleUndesiredResTokens(undesired_res_names), # e.g., non-standard residues - PadDNA(p_skip = pad_dna_p_skip), + ConditionalRoute( + condition_func=lambda data: data.get("is_inference", False), + transform_map={ + True: Identity(), + False: PadDNA(p_skip = pad_dna_p_skip), + }, + ), FlagAndReassignCovalentModifications(), FlagNonPolymersForAtomization(), AddGlobalAtomIdAnnotation(), @@ -334,6 +341,8 @@ def build_af3_transform_pipeline( "coord_atom_lvl_to_be_noised", "automorphisms", "symmetry_resolution", + "extra_info", + "atom_array", ] ), ] diff --git a/rf2aa/data/rotation_augmentation.py b/src/modelhub/data/rotation_augmentation.py similarity index 96% rename from rf2aa/data/rotation_augmentation.py rename to src/modelhub/data/rotation_augmentation.py index a0bc3f4..d206488 100644 --- a/rf2aa/data/rotation_augmentation.py +++ b/src/modelhub/data/rotation_augmentation.py @@ -2,7 +2,7 @@ import math import torch -from rf2aa.flow_matching.rigid_utils import rot_vec_mul +from modelhub.flow_matching.rigid_utils import rot_vec_mul def centre(X_L, X_exists_L): diff --git a/rf2aa/debug.py b/src/modelhub/debug.py similarity index 100% rename from rf2aa/debug.py rename to src/modelhub/debug.py diff --git a/src/modelhub/diffusion_samplers/inference_sampler.py b/src/modelhub/diffusion_samplers/inference_sampler.py new file mode 100755 index 0000000..bf25c43 --- /dev/null +++ b/src/modelhub/diffusion_samplers/inference_sampler.py @@ -0,0 +1,220 @@ +from beartype.typing import Any + +import torch +from jaxtyping import Float +from beartype.typing import Literal + +from modelhub.data.rotation_augmentation import centre_random_augmentation +from modelhub.utils.ddp import RankedLogger + +ranked_logger = RankedLogger(__name__, rank_zero_only=True) + + +class SampleDiffusion: + """Algorithm 18""" + + def __init__( + self, + *, + # Hyperparameters + num_timesteps: int, # AF-3: 200 + min_t: int, # AF-3: 0 + max_t: int, # AF-3: 1 + sigma_data: int, # AF-3: 16 + s_min: float, # AF-3: 4e-4 + s_max: int, # AF-3: 160 + p: int, # AF-3: 7 + gamma_0: float, # AF-3: 0.8 + gamma_min: float, # AF-3: 1.0, + noise_scale: float, # AF-3: 1.003, + step_scale: float, # AF-3: 1.5, + solver: Literal["af3"], + ): + """Initialize the diffusion sampler, to perform a complete diffusion roll-out with the given recycling outputs. + + We do not use default values for the parameters to make the Hydra configuration the single source of truth and avoid silent failures. + + Args: + num_timesteps (int): The number of timesteps for which the noise schedule is constructed. Default is 200, per AF3. + min_t (float): The minimum value of t in the schedule. Default is 0, per AF3. + max_t (float): The maximum value of t in the schedule. Default is 1, per AF3. + sigma_data (int): A constant determined by the variance of the data. Default is 16, as defined in the AlphaFold 3 Supplement (Algorithm 20, Diffusion Module). + s_min (float): The minimum value of the noise schedule. Default is 4e-4, per AF3. + s_max (float): The maximum value of the noise schedule. Default is 160, per AF3. + p (int): A constant that determines the shape of the noise schedule. Default is 7, per AF3. + gamma_0 (float): The value of gamma when t > gamma_min. Default is 0.8, per AF3. + solver (str): The solver to use for the diffusion process. Default is "af3". + + TODO: Continue documentation of the remaining parameters. + """ + self.num_timesteps = num_timesteps + self.min_t = min_t + self.max_t = max_t + self.sigma_data = sigma_data + self.s_min = s_min + self.s_max = s_max + self.p = p + self.gamma_0 = gamma_0 + self.gamma_min = gamma_min + self.noise_scale = noise_scale + self.step_scale = step_scale + self.solver = solver + + def _construct_inference_noise_schedule(self, device: torch.device) -> torch.Tensor: + """Constructs a noise schedule for use during inference. + + The inference noise schedule is defined in the AF-3 supplement as: + + t_hat = sigma_data * (s_max**(1/p) + t * (s_min**(1/p) - s_max**(1/p)))**p + + Returns: + torch.Tensor: A tensor representing the noise schedule `t_hat`. + + Reference: + AlphaFold 3 Supplement, Section 3.7.1. + """ + # Create a linearly spaced tensor of timesteps between min_t and max_t + t = torch.linspace(self.min_t, self.max_t, self.num_timesteps, device=device) + + # Construct the noise schedule, using the formula provided in the reference + t_hat = ( + self.sigma_data + * ( + (self.s_max) ** (1 / self.p) + + t * (self.s_min ** (1 / self.p) - self.s_max ** (1 / self.p)) + ) + ** self.p + ) + + return t_hat + + def _get_initial_structure( + self, + c0: torch.Tensor, + D: int, + L: int, + coord_atom_lvl_to_be_noised: torch.Tensor, + ) -> torch.Tensor: + """Sample initial point cloud from a normal distribution. + + Args: + c0 (torch.Tensor): A scalar tensor that will be used to scale the initial point cloud. Effectively, the same as + directly changing the standard deviation of the normal distribution. Derived from noise_schedule[0]. + D (int): The number of structures to sample. + L (int): The number of atoms in the structure. + coord_atom_lvl_to_be_noised (torch.Tensor): The atom-level coordinates to be noised (either completely or partially) + """ + noise = c0 * torch.normal(mean=0.0, std=1.0, size=(D, L, 3), device=c0.device) + X_L = noise + coord_atom_lvl_to_be_noised + + return X_L + + def sample_diffusion_like_af3( + self, + *, + S_inputs_I: Float[torch.Tensor, "I c_s_inputs"], + S_trunk_I: Float[torch.Tensor, "I c_s"], + Z_trunk_II: Float[torch.Tensor, "I I c_z"], + f: dict[str, Any], + diffusion_module: torch.nn.Module, + diffusion_batch_size: int, + coord_atom_lvl_to_be_noised: Float[torch.Tensor, "D L 3"], + ) -> dict[str, Any]: + """Perform a complete diffusion roll-out with the given recycling outputs. + + Args: + diffusion_module (torch.nn.Module): The diffusion module to use for denoising. If using EMA and performing validation or inference, + this model should be the EMA model. + """ + # Construct the noise schedule t_hat for inference on the appropriate device + noise_schedule = self._construct_inference_noise_schedule( + device=S_inputs_I.device + ) + + # Infer number of atoms from any atom-level feature + L = f["ref_element"].shape[0] + D = diffusion_batch_size + + # Initial X_L is drawn from a normal distribution with a mean vector of 0 and a + # covariance matrix equal to the 3x3 identity matrix, scaled by the noise schedule + X_L = self._get_initial_structure( + c0=noise_schedule[0], + D=D, + L=L, + coord_atom_lvl_to_be_noised=coord_atom_lvl_to_be_noised, + ) # (D, L, 3) + + X_noisy_L_traj = [] + X_denoised_L_traj = [] + t_hats = [] + + for c_t_minus_1, c_t in zip(noise_schedule, noise_schedule[1:]): + # (All predicted atoms exist) + X_exists_L = torch.ones((D, L)).bool() # (D, L) + + # Apply a random rotation and translation to the structure + # TODO: Make s_trans a hyperparameter + s_trans = 1.0 + X_L = centre_random_augmentation(X_L, X_exists_L, s_trans) + + # Update gamma + gamma = self.gamma_0 if c_t > self.gamma_min else 0 + + # Compute the value of t_hat + t_hat = c_t_minus_1 * (gamma + 1) + + # Noise the coordinates with scaled Gaussian noise + epsilon_L = ( + self.noise_scale + * torch.sqrt(torch.square(t_hat) - torch.square(c_t_minus_1)) + * torch.normal(mean=0.0, std=1.0, size=X_L.shape, device=X_L.device) + ) + X_noisy_L = X_L + epsilon_L + + # Denoise the coordinates + X_denoised_L = diffusion_module( + X_noisy_L=X_noisy_L, + t=t_hat.tile(D), + f=f, + S_inputs_I=S_inputs_I, + S_trunk_I=S_trunk_I, + Z_trunk_II=Z_trunk_II, + ) + + # Compute the delta between the noisy and denoised coordinates, scaled by t_hat + delta_L = (X_noisy_L - X_denoised_L) / t_hat + d_t = c_t - t_hat + + # Update the coordinates, scaled by the step size + X_L = X_noisy_L + self.step_scale * d_t * delta_L + + # Append the results to the trajectory (for visualization of the diffusion process) + X_noisy_L_traj.append(X_noisy_L) + X_denoised_L_traj.append(X_denoised_L) + t_hats.append(t_hat) + + return dict( + X_L=X_L, # (D, L, 3) + X_noisy_L_traj=X_noisy_L_traj, # list[Tensor[D, L, 3]] + X_denoised_L_traj=X_denoised_L_traj, # list[Tensor[D, L, 3]] + t_hats=t_hats, # list[Tensor[D]], where D is shared across all diffusion batches + ) + + +class SamplePartialDiffusion(SampleDiffusion): + def __init__(self, partial_t: int, **kwargs): + super().__init__(**kwargs) + self.partial_t = partial_t + + def _construct_inference_noise_schedule(self, device: torch.device) -> torch.Tensor: + """Constructs a noise schedule for use during inference with partial t.""" + t_hat_full = super()._construct_inference_noise_schedule(device) + + assert ( + self.partial_t < self.num_timesteps + ), f"Partial t ({self.partial_t}) must be less than num_timesteps ({self.num_timesteps})" + ranked_logger.info( + f"Using partial t index: {self.partial_t} [e.g., {t_hat_full[self.partial_t]:.4}], or {self.partial_t / (self.num_timesteps):.2%}, by index (100% is data, 0% is noise)" + ) + + return t_hat_full[self.partial_t :] diff --git a/rf2aa/experiments/af3_trainer.py b/src/modelhub/experiments/af3_trainer.py similarity index 95% rename from rf2aa/experiments/af3_trainer.py rename to src/modelhub/experiments/af3_trainer.py index f0a40db..eab3292 100644 --- a/rf2aa/experiments/af3_trainer.py +++ b/src/modelhub/experiments/af3_trainer.py @@ -6,16 +6,16 @@ import torch.distributed as dist import tree from torch.nn.parallel import DistributedDataParallel as DDP -from rf2aa.chemical import initialize_chemdata -from rf2aa.debug import pretty_describe_dict -from rf2aa.flow_matching.sampler import AF3PartialSampler, AF3Sampler -from rf2aa.loss.af3_losses import Loss as AF3Loss -from rf2aa.loss.af3_losses import ResidueSymmetryResolution, SubunitSymmetryResolution -from rf2aa.metrics.metrics_base import MetricManager -from rf2aa.metrics.predicted_error import GetConfidenceIndices -from rf2aa.model import AF3_structure -from rf2aa.trainer_new import FlowMatchingTrainer -from rf2aa.training.EMA import EMA +from modelhub.chemical import initialize_chemdata +from modelhub.debug import pretty_describe_dict +from modelhub.flow_matching.sampler import AF3PartialSampler, AF3Sampler +from modelhub.loss.af3_losses import Loss as AF3Loss +from modelhub.loss.af3_losses import ResidueSymmetryResolution, SubunitSymmetryResolution +from modelhub.metrics.metrics_base import MetricManager +from modelhub.metrics.predicted_error import GetConfidenceIndices +from modelhub.model import AF3_structure +from modelhub.trainer_new import FlowMatchingTrainer +from modelhub.training.EMA import EMA logger = logging.getLogger(__name__) @@ -35,7 +35,7 @@ class AF3Trainer(FlowMatchingTrainer): broadcast_buffers=False, ) else: - from rf2aa.training.EMA import FakeDDPWrapper + from modelhub.training.EMA import FakeDDPWrapper self.model = FakeDDPWrapper(self.model) if "partial_t" in self.config.af3_data_prep: @@ -298,14 +298,14 @@ class AF3TrainerRollout(AF3Trainer): model = AF3_structure.Model(**self.config.model).to(device) model.device = device - from rf2aa.model.layers.af3_auxiliary_heads import ConfidenceHead + from modelhub.model.layers.af3_auxiliary_heads import ConfidenceHead confidence = ConfidenceHead(**self.config.confidence_head).to(device) self.confidence = confidence - from rf2aa.flow_matching.sampler import AF3Sampler + from modelhub.flow_matching.sampler import AF3Sampler self.sampler = AF3Sampler(self.config, model, confidence=self.confidence) - from rf2aa.model.af3_with_rollout import AF3_with_rollout + from modelhub.model.af3_with_rollout import AF3_with_rollout self.model = AF3_with_rollout( model, @@ -325,7 +325,7 @@ class AF3TrainerRollout(AF3Trainer): broadcast_buffers=False, ) else: - from rf2aa.training.EMA import FakeDDPWrapper + from modelhub.training.EMA import FakeDDPWrapper self.model = FakeDDPWrapper(self.model) self.sampler.model = self.model.module.shadow.model diff --git a/rf2aa/experiments/msa_module_trainer.py b/src/modelhub/experiments/msa_module_trainer.py similarity index 95% rename from rf2aa/experiments/msa_module_trainer.py rename to src/modelhub/experiments/msa_module_trainer.py index ae90ba4..a4e7158 100644 --- a/rf2aa/experiments/msa_module_trainer.py +++ b/src/modelhub/experiments/msa_module_trainer.py @@ -4,16 +4,16 @@ import tree from hydra.utils import instantiate from torch.nn.parallel import DistributedDataParallel as DDP -from rf2aa.loss.af3_losses import DistogramLoss -from rf2aa.model.AF3_structure import ( +from modelhub.loss.af3_losses import DistogramLoss +from modelhub.model.AF3_structure import ( DistogramHead, FeatureInitializer, MSAModule, PairformerBlock, ) -from rf2aa.model.layers.Embeddings import MSA_emb -from rf2aa.trainer_new import ComposedTrainer -from rf2aa.training.EMA import EMA +from modelhub.model.layers.Embeddings import MSA_emb +from modelhub.trainer_new import ComposedTrainer +from modelhub.training.EMA import EMA class MsaModuleTrainer(ComposedTrainer): diff --git a/rf2aa/ffindex.py b/src/modelhub/ffindex.py similarity index 100% rename from rf2aa/ffindex.py rename to src/modelhub/ffindex.py diff --git a/rf2aa/flow_matching/all_atom.py b/src/modelhub/flow_matching/all_atom.py similarity index 100% rename from rf2aa/flow_matching/all_atom.py rename to src/modelhub/flow_matching/all_atom.py diff --git a/rf2aa/flow_matching/data_transforms.py b/src/modelhub/flow_matching/data_transforms.py similarity index 96% rename from rf2aa/flow_matching/data_transforms.py rename to src/modelhub/flow_matching/data_transforms.py index e7ccf8d..a611da6 100644 --- a/rf2aa/flow_matching/data_transforms.py +++ b/src/modelhub/flow_matching/data_transforms.py @@ -1,9 +1,8 @@ - import torch from opt_einsum import contract as einsum -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.util import get_prot_sm_mask, rigid_from_3_points +from modelhub.chemical import ChemicalData as ChemData +from modelhub.util import get_prot_sm_mask, rigid_from_3_points """ the flow matching code in frame flow uses openfold primitives which are slightly diff --git a/rf2aa/flow_matching/data_utils.py b/src/modelhub/flow_matching/data_utils.py similarity index 99% rename from rf2aa/flow_matching/data_utils.py rename to src/modelhub/flow_matching/data_utils.py index e9fcb3f..a7ba484 100644 --- a/rf2aa/flow_matching/data_utils.py +++ b/src/modelhub/flow_matching/data_utils.py @@ -12,7 +12,7 @@ import torch # from torch_geometric.utils import scatter from torch import scatter -from rf2aa.flow_matching import rigid_utils as ru +from modelhub.flow_matching import rigid_utils as ru Rigid = ru.Rigid # Protein = protein.Protein diff --git a/rf2aa/flow_matching/interpolant.py b/src/modelhub/flow_matching/interpolant.py similarity index 99% rename from rf2aa/flow_matching/interpolant.py rename to src/modelhub/flow_matching/interpolant.py index 871d7b4..f23335b 100644 --- a/rf2aa/flow_matching/interpolant.py +++ b/src/modelhub/flow_matching/interpolant.py @@ -11,9 +11,9 @@ from scipy.optimize import linear_sum_assignment from scipy.spatial.transform import Rotation from torch import autograd -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.flow_matching import data_utils as du -from rf2aa.flow_matching import so3_utils # , all_atom +from modelhub.chemical import ChemicalData as ChemData +from modelhub.flow_matching import data_utils as du +from modelhub.flow_matching import so3_utils # , all_atom # from cogen.motif_scaffolding import twisting diff --git a/rf2aa/flow_matching/rigid_utils.py b/src/modelhub/flow_matching/rigid_utils.py similarity index 100% rename from rf2aa/flow_matching/rigid_utils.py rename to src/modelhub/flow_matching/rigid_utils.py diff --git a/rf2aa/flow_matching/sampler.py b/src/modelhub/flow_matching/sampler.py similarity index 98% rename from rf2aa/flow_matching/sampler.py rename to src/modelhub/flow_matching/sampler.py index 0e8dde6..1a9afd5 100644 --- a/rf2aa/flow_matching/sampler.py +++ b/src/modelhub/flow_matching/sampler.py @@ -4,17 +4,17 @@ import torch import torch.nn.functional as F import tree -import rf2aa.flow_matching.data_utils as du -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.data.rotation_augmentation import centre_random_augmentation -from rf2aa.flow_matching import data_transforms -from rf2aa.flow_matching.interpolant import _centered_gaussian, _uniform_so3 -from rf2aa.training.recycling import ( +import modelhub.flow_matching.data_utils as du +from modelhub.chemical import ChemicalData as ChemData +from modelhub.data.rotation_augmentation import centre_random_augmentation +from modelhub.flow_matching import data_transforms +from modelhub.flow_matching.interpolant import _centered_gaussian, _uniform_so3 +from modelhub.training.recycling import ( recycle_step_gen, recycle_step_packed, unpack_outputs, ) -from rf2aa.util import rigid_from_3_points, writepdb_file +from modelhub.util import rigid_from_3_points, writepdb_file class Sampler: diff --git a/rf2aa/flow_matching/so3_utils.py b/src/modelhub/flow_matching/so3_utils.py similarity index 100% rename from rf2aa/flow_matching/so3_utils.py rename to src/modelhub/flow_matching/so3_utils.py diff --git a/src/modelhub/inference.py b/src/modelhub/inference.py new file mode 100755 index 0000000..bf41bb7 --- /dev/null +++ b/src/modelhub/inference.py @@ -0,0 +1,36 @@ +#!/usr/bin/env -S /bin/sh -c '"$(dirname "$0")/../../scripts/shebang/modelhub_exec.sh" "$0" "$@"' + +from omegaconf import DictConfig +import hydra +from hydra.utils import instantiate +import os +import rootutils +import tempfile +from pathlib import Path + +# Setup root dir and environment variables (more info: https://github.com/ashleve/rootutils) +# NOTE: Sets the `PROJECT_ROOT` environment variable to the root directory of the project (where `.project-root` is located) +rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) + +# If the user has set `PROJECT_PATH`, use it to build the config path; otherwise, fall back to `PROJECT_ROOT` +_config_path = os.path.join(os.environ.get("PROJECT_PATH", os.environ["PROJECT_ROOT"]), "configs") + + +@hydra.main( + config_path=_config_path, + config_name="inference", + version_base="1.3", +) +def run_inference(cfg: DictConfig) -> None: + """Execute the specified inference pipeline""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_dir = Path(temp_dir) + temp_dir.mkdir(parents=True, exist_ok=True) + + inference_engine = instantiate(cfg, temp_dir=temp_dir, _convert_="partial") + inference_engine.trainer.fabric.launch() + inference_engine.eval() + + +if __name__ == "__main__": + run_inference() diff --git a/rf2aa/inference/README.md b/src/modelhub/inference/README.md similarity index 100% rename from rf2aa/inference/README.md rename to src/modelhub/inference/README.md diff --git a/src/modelhub/inference_engines/README.md b/src/modelhub/inference_engines/README.md new file mode 100644 index 0000000..4e453d0 --- /dev/null +++ b/src/modelhub/inference_engines/README.md @@ -0,0 +1,143 @@ +# Inference with `modelhub-AF3` repository + +We have reproduced AF3 and are sharing the weights with the lab to use for various tasks. +This guide provides instructions on preparing inputs and running inference for our AF3 reproduction. + +Additional variations (e.g., with chirality inputs, ligand geometry conditioning, protein backbone coordinate conditioning) are in-the-works; however, the core inference API will not change. + +## Step 1: Prepare Inputs + +> **Note:** If you already have a `CIF` or `PDB` file (e.g., from MPNN), and do not want to include MSAs, you may proceed directly to Step 2. + +We enumerate two options for preparing inputs: one with a JSON API, one by creating an `AtomArray` to spoof a CIF. + +### Option 1: Prepare inputs using a combination of one-letter polymer sequences, SMILES strings, CCD codes, and SDF files + +Create a JSON file with each component; e.g., + +```json +[ + { + "seq": "SMNPPPPETSNPNKPKRQTNQLQYLLRVVLKTLWKHQFAWPFQQPVDAVKLNLPDYYKIIKTPMDMGTIKKRLENNYYWNAQECIQDFNTMFTNCYIYNKPGDDIVLMAEALEKLFLQKINELPTEE", + "msa_path": "/path/to/msa", // optional + "chain_id": "A" + }, + { + // We will automatically name the atoms + // If no `chain_id` is specified, we will deterministically generate one (e.g., "B", since "A" exists above) + "smiles": "NCCCCN1N=C(C[C@@H](C1=O)c2cccc3ncccc23)c4ccc(NC(=O)N5Cc6ccncc6C5)cc4" + }, + { + // We will use atom names from the CCD + "ccd_code": "HEM" + }, + { + // We will use atom names from the SDF file + "path": "/path/to/sdf.sdf" + } +] +``` +The full API for inference via dictionaries of chemical components is specified in [CIFUtils](https://github.com/baker-laboratory/cifutils/blob/main/src/cifutils/tools/inference.py); additional contributions to support further formats (e.g., `MOL` files and `CIF` files, as components) are welcome and relatively straight-forward to implement. + +Supported input options: +- `seq`: For proteins and nucleic acids using non-canonical one-letter codes as they appear in a CIF file. +- `smiles`: For small molecules (ensure correctness of SMILES). +- `ccd_code`: If your small molecule is already in the CCD. +- `path`: If you have a `.sdf` file. Note that we will not (yet) use the coordinates from the `.sdf` file for the reference conformer (but that's in-the-works). + +Coming soon: support for `cif` files and `mol` files as components. + +### Option 2: Using a Spoofed CIF *(more complicated, more customizable)* + +If you can get your inputs into an `AtomArray`, use `to_cif_file` to convert the `AtomArray` to a `CIF`. Use the pre-built inference tools in `cifutils` to convert arbitrary biological inputs (e.g., FASTA, CIFs, SMILES) into an `AtomArray`. See [cifutils tests](https://github.com/baker-laboratory/cifutils/blob/main/tests/tools/test_inference_processing.py) for examples. + +#### Example Code + +```python +import os +os.environ['CCD_MIRROR_PATH'] = "/projects/ml/frozen_pdb_copies/2024_12_11_ccd" +os.environ['PDB_MIRROR_PATH'] = "/projects/ml/frozen_pdb_copies/2024_12_01_pdb" + +from cifutils.tools.inference import components_to_atom_array +from cifutils.utils.io_utils import to_cif_file + +# Define inputs as a list of dictionaries +monomer = { + "seq": "SMNPPPPETSNPNKPKRQTNQLQYLLRVVLKTLWKHQFAWPFQQPVDAVKLNLPDYYKIIKTPMDMGTIKKRLENNYYWNAQECIQDFNTMFTNCYIYNKPGDDIVLMAEALEKLFLQKINELPTEE", + "chain_type": "polypeptide(l)", + "chain_id": "A", +} + +ligand_from_smiles = { + "smiles": "NCCCCN1N=C(C[C@@H](C1=O)c2cccc3ncccc23)c4ccc(NC(=O)N5Cc6ccncc6C5)cc4", + "chain_id": "C", +} + +ligand_from_ccd = { + "ccd_code": "7Z2", + "chain_id": "C", +} + +# Convert to AtomArrays and write to CIF files +atom_array_from_ccd = components_to_atom_array([monomer, ligand_from_ccd]) +atom_array_from_smiles = components_to_atom_array([monomer, ligand_from_smiles]) + +to_cif_file(atom_array_from_ccd, "example_from_ccd.cif") +to_cif_file(atom_array_from_smiles, "example_from_smiles.cif") +``` + +## Step 2: Run `run_inference.py` + +The apptainers that we release pre-install `modelhub`, `datahub`, and `cifutils`. That means in order to run inference, essentially all that is needed is `from modelhub.inference import run_inference`, `run_inference()`. For convenience, we have written a script with that functionality, and saved to `/projects/ml/modelhub/inference/run_inference.py`. Note that this also means these apptainers are not "hackable" — if you would like to modify `modelhub`, you'll need to clone the repository, and use an appptainer without `modelhub` pre-installed. + +### Using an Existing CIF or PDB File + +Run `run_inference.py` with the appropriate apptainer, checkpoint, input directory, and output directory. + +Arguments to `run_inference.py` (and thus `inference.py`, which is called by `run_inference.py`) are: +- `inputs` (required): Path to a file (CIF/PDB/JSON) for prediction; if given a directory, all CIF/PDB files in that directory will be predicted +- `--checkpoint-path` (required): Path to checkpoint file +- `--cif_out_dir` (required): Where to save predicted structures. The output files will be named the same was as the input structures. Use `./` for current directory. +- `--n_recycles` (optional, defaults to 10): Number of recycles. +- `--diffusion_batch_size` (optional, default to 5): Number of output structures in the ensemble, drawn from the same model seed and forward pass of the Pairformer. +- `--rename_residues` (optional, default to an empty string): Dictionary of residue names to rename to avoid CCD clashes, e.g., '{"ALA": "L:1"}'. When parsing files, we use the given residue names to help identify any missing atoms. Thus, if a custom ligand overlaps with a ligand in the CCD, the prediction will be catastrophically wrong. To circumvent this issue, we accept a dictionary of ligands to rename. We suggest renaming all custom ligands to begin with `L:` to avoid all clashes with the CCD. WARNING: This command uses brute-force find a replace; please ensure that there are no other possible matches (e.g., atom names). Additionally, avoid `#` to mitigate possible CIF-parsing errors from PyMol. +- `num_steps` (optiona, default to 200)L Number of steps for sampling of the diffusino model. The default is 200. We see no deterioation in performance with 50 steps, but significant (>2x) speed improvements. + +> *NOTE:* The CIF files are saved in a compressed format, `.cif.gz`. These compressed files can be directly loaded by PyMol or parsed by `cifutils`. If you need to inspect the uncompressed file, you can use `gunzip `. + +> *NOTE:* The CIF output file will contain multiple **models**, one for each diffusion outputs (e.g., 5 by default). PyMol will hide secondary structure by default with multiple models; the command `dss` will display it again. + +Example commands (to be run from the `inference` working directory): + +### Using a JSON with multiple examples to predict +```bash +apptainer -s run --nv /net/software/containers/users/ncorley/modelhub/frozen_modelhub_datahub_cifutils_2025-02-06.sif python /projects/ml/modelhub/inference/run_inference.py /projects/ml/modelhub/inference/examples_from_json.json --checkpoint_path /projects/ml/modelhub/inference/weights_with_confidence_2025_01_06 --cif_out_dir ./ +``` + +### Using a PDB, specifying a covalent modification in the `CONECT` record (*example from Meg)* +See line `1672` for the manually-added bond; note as well the renaming of the ligand. Such renaming could be accomplished *a-priori* by modifying the file (as in this example), or with the `rename_residues` flag (see below). +```bash +apptainer -s run --nv /net/software/containers/users/ncorley/modelhub/frozen_modelhub_datahub_cifutils_2025-02-06.sif python /projects/ml/modelhub/inference/run_inference.py /projects/ml/modelhub/inference/example_from_pdb_with_inter_chain_bond.pdb --checkpoint_path /projects/ml/modelhub/inference/weights_with_confidence_2025_01_06 --cif_out_dir ./ +``` + +### Using a PDB from MPNN, renaming custom ligand that overlaps with ligand names in the CCD *(example from Indrek)* +Note that in this PDB file, the ligand "HGS" is a custom ligand, whose three-letter code overlaps with a real CCD ligand. Thus, we must rename. +```bash +apptainer -s run --nv /net/software/containers/users/ncorley/modelhub/frozen_modelhub_datahub_cifutils_2025-02-06.sif python /projects/ml/modelhub/inference/run_inference.py /projects/ml/modelhub/inference/example_pdb_with_clashing_ligand_name.pdb --checkpoint_path /projects/ml/modelhub/inference/weights_with_confidence_2025_01_06 --cif_out_dir ./ --rename_residues '{"HGS": "L:1"}' +``` + +## Step 3: View the Predicted Structure(s) + +Use the following code to view the predicted structures with `cifutils`: + +```python +from cifutils.utils.visualize import view +from cifutils import parse + +# View in CIFUtils (or PyMol, etc.) +out = parse("./predictions/json_inputs_0.cif") +atom_array = out["assemblies"]["1"][0] +view(atom_array) +``` + +View in PyMol like normal, or using `pymol_remote` diff --git a/src/modelhub/inference_engines/af3.py b/src/modelhub/inference_engines/af3.py new file mode 100644 index 0000000..af47577 --- /dev/null +++ b/src/modelhub/inference_engines/af3.py @@ -0,0 +1,356 @@ +from os import PathLike +from pathlib import Path + +import hydra +import numpy as np +import torch +from cifutils import parse +from omegaconf import OmegaConf + +from modelhub.utils.inference import build_file_paths_for_prediction +from modelhub.utils.ddp import RankedLogger, set_accelerator_based_on_availability +from modelhub.utils.logging import print_config_tree +from modelhub.utils.datasets import ( + assemble_distributed_inference_loader_from_list_of_paths, +) +from modelhub.utils.predicted_error import compile_af3_confidence_outputs, annotate_atom_array_b_factor_with_plddt +from modelhub.inference_engines.base import InferenceEngine +from modelhub.utils.io import ( + dump_structures, + dump_trajectories, + build_stack_from_atom_array_and_batched_coords, +) +import logging +from biotite.structure import AtomArray +from lightning.fabric import seed_everything + +logging.basicConfig(level=logging.INFO) +ranked_logger = RankedLogger(__name__, rank_zero_only=True) + + +class AF3InferenceEngine(InferenceEngine): + """Class for inference with AF3. Evaluates a trained AF3 model on a set of spoofed CIFs.""" + + def __init__( + self, + # Base arguments + inputs: PathLike | list[PathLike], + ckpt_path: PathLike, + out_dir: PathLike | None, + num_nodes: int, + devices_per_node: int, + skip_existing: bool, + # Model args + n_recycles: int, + diffusion_batch_size: int, + residue_renaming_dict: str | dict, + num_steps: int, + solver: str, + print_config: bool, + temp_dir: PathLike, + seed: int, + # Structure dumping arguments + dump_predictions: bool, + dump_trajectories: bool, + one_model_per_file: bool, + ): + """Initialize the Inference Engine for AF3. + + Note that for inference, we initialize the Hydra configuration from the checkpoint; we then override specific parameters based on the input arguments + and inference-specific considerations. + + Args: + ckpt_path: Path to the checkpoint file. + out_dir: Directory for output files. If None, the current directory will be used. + skip_existing: If True, only predict the structures that are not already in the output directory. + num_nodes: Number of nodes for distributed inference. The default is 1. + devices_per_node: Number of devices per node for distributed inference. The default is 1. + + n_recycles (int): Number of recycles for AF3. + diffusion_batch_size (int): Diffusion batch size for AF3. Each predicted structure will be saved as a separate model within the same CIF file. + residue_renaming_dict (dict): Dictionary of residue names to rename to avoid CCD clashes, e.g., {'ALA': 'L:1'}. + num_steps (int): Number of steps for sampling of the diffusion model. AF-3 uses 200; we see no degradation in performance with 50. + solver (str): Solver to use for inference. Options are 'af3', 'simple', 'euler', and 'heun'. + print_config (bool): Pretty-print the Hydra configs. + temp_dir (PathLike): Temporary directory to store intermediate files. + seed (int): Random seed for reproducibility / augmentation. If None, the default seed from the config will be used. + + dump_predictions (bool): Whether to dump structures (CIF files). + dump_trajectories (bool): Whether to dump denoising trajectories. + one_model_per_file (bool): If True, write each structure within a diffusion batch to its own CIF files. + If False, include each structure within a diffusion batch as a separate model within one CIF file. + """ + if solver != "af3": + # TODO: Port over additional solvers (Frank already coded; need to modify for new framework) + raise NotImplementedError( + f"Solver {solver} not implemented. Only 'af3' is supported for inference." + ) + + # Load the training config from the checkpoint + # TODO: Load checkpoint only once (instead of twice) + ranked_logger.info(f"Loading checkpoint from {Path(ckpt_path).resolve()}...") + checkpoint = torch.load( + ckpt_path, "cpu" + ) # We only extract the `train_cfg` from the checkpoint initially + self.cfg = OmegaConf.create(checkpoint["train_cfg"]) + + self.paths = build_file_paths_for_prediction( + input=inputs, + temp_dir=temp_dir, + existing_outputs_dir=out_dir if skip_existing else None + ) + + # Override specific parameters within the Hydra config: + # (a) based on the input arguments + self.cfg.model.net.inference_sampler.num_timesteps = num_steps + self.cfg.model.net.inference_sampler.solver = solver + self.cfg.trainer.num_nodes = num_nodes + self.cfg.trainer.devices_per_node = devices_per_node + + set_accelerator_based_on_availability(self.cfg) + + # (b) based on the dataset (we will apply when constructing the pipeline) + self.dataset_overrides = { + "diffusion_batch_size": diffusion_batch_size, + "n_recycles": n_recycles, + "undesired_res_names": [], + } + + self.print_config = print_config + + # ... set the random seed for reproducibility (and for augmentation, e.g., for antibodies) + seed = seed or self.cfg.seed + ranked_logger.info(f"Seeding everything with seed={seed}...") + seed_everything(seed, workers=True, verbose=True) + + ranked_logger.info("Instantiating trainer...") + if self.print_config: + print_config_tree( + self.cfg.trainer, resolve=True, title="INFERENCE TRAINER CONFIGURATION" + ) + + # ... instantiate the trainer with the (modified) configuration + self.trainer = hydra.utils.instantiate( + self.cfg.trainer, + _convert_="partial", + _recursive_=False, + ) + + self.ckpt_path = ckpt_path + + # Set the output directory for the CIF files (e.g., predicted structures) + self.cif_out_dir = Path(out_dir) if out_dir else Path("./") + + # Rename residues + self.residue_renaming_dict = residue_renaming_dict + self.temp_dir = Path(temp_dir) + + # Structure dumping + self.dump_predictions = dump_predictions + self.dump_trajectories = dump_trajectories + self.one_model_per_file = one_model_per_file + + def construct_pipeline(self): + """Construct the AF3 inference pipeline. + + By convention we use the "interface" dataset stored in the checkpoint to construct the pipeline. + """ + # ... find the first validation dataset stored under "val" + first_val_dataset_key, first_val_dataset = next( + iter(self.cfg.datasets.val.items()) + ) + ranked_logger.info( + f"Using the settings from the first validation dataset: {first_val_dataset_key}." + ) + + assert ( + first_val_dataset.dataset.transform.is_inference + ), "Inference must be enabled for the validation dataset." + for key, value in self.dataset_overrides.items(): + first_val_dataset.dataset.transform[key] = value + + if self.print_config: + print_config_tree( + first_val_dataset.dataset.transform, + resolve=True, + title="INFERENCE TRANSFORM PIPELINE", + ) + + pipeline = hydra.utils.instantiate( + first_val_dataset.dataset.transform, + ) + + return pipeline + + def parse_from_path(self, path_to_structure: Path) -> dict: + """Parse a structure from a CIF file. + + Perform additional processing if necessary, such as renaming residues. + """ + # If we're renaming residues, we do a brute-force replacement in the CIF file + if self.residue_renaming_dict: + ranked_logger.info( + f"Renaming residues in {path_to_structure} with brute-force find and replace: {self.residue_renaming_dict}" + ) + with open(path_to_structure, "r") as f: + content = f.read() + for old_res, new_res in self.residue_renaming_dict.items(): + content = content.replace(old_res, str(new_res)) + path_to_structure = Path(self.temp_dir / path_to_structure.name) + with open(path_to_structure, "w") as f: + f.write(content) + + return parse(path_to_structure, remove_hydrogens=True) + + def prepare_atom_array(self, atom_array: AtomArray) -> AtomArray: + """Prepare the AtomArray for inference. + + By default, we set NaN coordinates to random values to avoid unexpected behavior in the pipeline. + """ + # HACK: Set NaN coordinates to random values to avoid unexpected behavior in the pipeline + # TODO: Hunt down why NaN coordinates lead to this behavior + atom_array.coord[np.isnan(atom_array.coord)] = np.random.rand( + *atom_array.coord[np.isnan(atom_array.coord)].shape + ) + + return atom_array + + def eval(self): + """Evaluate the model on a set of spoofed CIF files.""" + if self.print_config: + print_config_tree( + self.cfg.model, resolve=True, title="INFERENCE MODEL CONFIGURATION" + ) + + # ... spawn processes for distributed training, if using multiple GPUs + ranked_logger.info( + f"Spawning {self.trainer.fabric.world_size} processes from {self.trainer.fabric.global_rank}..." + ) + + # ============================================================================== + # Construct the model and load the checkpoint + # ============================================================================== + + self.trainer.initialize_or_update_trainer_state({"train_cfg": self.cfg}) + self.trainer.construct_model() + self.trainer.load_checkpoint(ckpt_path=self.ckpt_path) + + self.trainer.state["model"].eval() + + # ============================================================================== + # Prepare pipeline and inference loader + # ============================================================================== + + ranked_logger.info("Building Transform pipeline...") + + # Construct the AF3 inference pipeline + pipeline = self.construct_pipeline() + + ranked_logger.info(f"Found {len(self.paths)} structures to predict!") + + loader = assemble_distributed_inference_loader_from_list_of_paths( + paths=self.paths, + world_size=self.trainer.fabric.world_size, + rank=self.trainer.fabric.global_rank, + ) + + # ============================================================================== + # Evaluate, using `validation_step`` + # ============================================================================== + + for batch_idx, path_to_structure in enumerate(loader): + # (We only have one path per batch) + path_to_structure = path_to_structure[0] + + ranked_logger.info( + f"Predicting structure {batch_idx + 1}/{len(loader)}: {path_to_structure.name}" + ) + + # ... parse into an AtomArray (`parse` handles all valid formats) + ranked_logger.info(f"Parsing from path: {path_to_structure}") + example_id = path_to_structure.name.split(".")[0] + + out = self.parse_from_path(path_to_structure) + + # ... get the atom array and set NaN coordinates to random + atom_array = ( + out["assemblies"]["1"][0] + if "assemblies" in out + else out["asym_unit"][0] + ) + + atom_array = self.prepare_atom_array(atom_array) + + # ... assemble the pipeline input in a format compatible with the DataHub pipeline + pipeline_input = { + "example_id": example_id, + "atom_array": atom_array, + "chain_info": out["chain_info"], + } + + # ... run dataloading and featurization + pipeline_output = pipeline(pipeline_input) + + # Model inference + with torch.no_grad(): + pipeline_output = self.trainer.fabric.to_device(pipeline_output) + network_output = self.trainer.validation_step( + batch=pipeline_output, + batch_idx=0, + compute_metrics=False, + )["network_output"] + + # TODO: Log `metrics_output` to a file (or store directly within the CIF file) + + # ... build the predicted AtomArrayStack + atom_array_stack = build_stack_from_atom_array_and_batched_coords( + network_output["X_L"], pipeline_output["atom_array"] + ) + if "plddt" in network_output: + confidence_outs = compile_af3_confidence_outputs( + plddt_logits=network_output["plddt"], + pae_logits=network_output["pae"], + pde_logits=network_output["pde"], + chain_iid_token_lvl=pipeline_output["ground_truth"][ + "chain_iid_token_lvl" + ], + is_real_atom=pipeline_output["confidence_feats"]["is_real_atom"], + example_id=example_id, + confidence_loss_cfg=self.cfg.trainer.loss.confidence_loss, + ) + atom_array_list = annotate_atom_array_b_factor_with_plddt( + atom_array_stack, + confidence_outs["plddt"], + pipeline_output["confidence_feats"]["is_real_atom"], + ) + logging.info(f"Annotated PLDDT scores into B-factors for {example_id}. Forcing one model per file to accommodate separate b_factors in each model.") + self.one_model_per_file = True + confidence_outs["confidence_df"].to_csv( + self.cif_out_dir / f"{example_id}.score", index=False + ) + ranked_logger.info( + f"Confidence metrics for {example_id} written to {self.cif_out_dir / example_id}.score." + ) + + if self.dump_predictions: + dump_structures( + atom_arrays=atom_array_stack if not "plddt" in network_output else atom_array_list, + base_path=self.cif_out_dir / example_id, + one_model_per_file=self.one_model_per_file, + ) + + if self.dump_trajectories: + dump_trajectories( + trajectory_list=network_output["X_denoised_L_traj"], + atom_array=pipeline_output["atom_array"], + base_path=self.cif_out_dir / f"{example_id}_denoised", + ) + dump_trajectories( + trajectory_list=network_output["X_noisy_L_traj"], + atom_array=pipeline_output["atom_array"], + base_path=self.cif_out_dir / f"{example_id}_noisy", + ) + + ranked_logger.info( + f"Outputs for {example_id} written to {self.cif_out_dir / example_id}." + ) diff --git a/src/modelhub/inference_engines/base.py b/src/modelhub/inference_engines/base.py new file mode 100644 index 0000000..44168e8 --- /dev/null +++ b/src/modelhub/inference_engines/base.py @@ -0,0 +1,17 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass +from os import PathLike +from pathlib import Path + + +class InferenceEngine(ABC): + """Abstract base class for inference pipelines.""" + + @abstractmethod + def __init__(self, **kwargs): + pass + + @abstractmethod + def eval(self, inputs: list[Path]) -> None: + """Run inference on input files.""" + pass diff --git a/rf2aa/kinematics.py b/src/modelhub/kinematics.py similarity index 99% rename from rf2aa/kinematics.py rename to src/modelhub/kinematics.py index 4092a22..b19ba5f 100644 --- a/rf2aa/kinematics.py +++ b/src/modelhub/kinematics.py @@ -4,7 +4,7 @@ import numpy as np import torch from openbabel import openbabel -from rf2aa.chemical import ChemicalData as ChemData +from modelhub.chemical import ChemicalData as ChemData PARAMS = { "DMIN": 1, diff --git a/rf2aa/loggers.py b/src/modelhub/loggers.py similarity index 100% rename from rf2aa/loggers.py rename to src/modelhub/loggers.py diff --git a/rf2aa/loss/af3_confidence_loss.py b/src/modelhub/loss/af3_confidence_loss.py similarity index 96% rename from rf2aa/loss/af3_confidence_loss.py rename to src/modelhub/loss/af3_confidence_loss.py index 22dc78a..30415e1 100644 --- a/rf2aa/loss/af3_confidence_loss.py +++ b/src/modelhub/loss/af3_confidence_loss.py @@ -2,10 +2,27 @@ import torch import torch.nn as nn from scipy.stats import spearmanr -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.loss.loss import mask_unresolved_frames_batched -from rf2aa.metrics.metric_utils import compute_mean_over_subsampled_pairs, unbin_logits -from rf2aa.util import get_frames, rigid_from_3_points +from modelhub.metrics.metric_utils import ( + compute_mean_over_subsampled_pairs, + unbin_logits, +) +from modelhub.utils.frames import ( + get_frames, + rigid_from_3_points, + mask_unresolved_frames_batched, +) +from omegaconf import DictConfig + +# TODO: REFACTOR; COPIED FROM RF2AA. WE NEED TO REMOVE CHEMDATA, ADD DOCSTRINGS, EXAMPLES, HOPEFULLY TESTS, AND CLEAN UP + +# HACK: Initialize ChemData without dependence on legacy configs +from modelhub.chemical import ChemicalData as ChemData + +chemdata = ChemData( + params=DictConfig( + {"use_phospate_frames_for_NA": False, "use_lj_params_for_atoms": True} + ) +) class ConfidenceLoss(nn.Module): @@ -46,9 +63,11 @@ class ConfidenceLoss(nn.Module): X_pred_L, X_gt_L, X_exists_L, loss_input["seq"], loss_input["is_real_atom"] ) - plddt_logits = network_output["plddt"].reshape( - -1, I, ChemData().NHEAVY, self.plddt.n_bins - ).permute(0,3,1,2) + plddt_logits = ( + network_output["plddt"] + .reshape(-1, I, ChemData().NHEAVY, self.plddt.n_bins) + .permute(0, 3, 1, 2) + ) plddt_loss = ( self.cce( plddt_logits, diff --git a/rf2aa/loss/af3_losses.py b/src/modelhub/loss/af3_losses.py similarity index 98% rename from rf2aa/loss/af3_losses.py rename to src/modelhub/loss/af3_losses.py index 3f2fdae..e05aa48 100644 --- a/rf2aa/loss/af3_losses.py +++ b/src/modelhub/loss/af3_losses.py @@ -3,8 +3,8 @@ import numpy as np import torch import torch.nn as nn -from rf2aa.alignment import weighted_rigid_align -from rf2aa.training.checkpoint import activation_checkpointing +from modelhub.alignment import weighted_rigid_align +from modelhub.training.checkpoint import activation_checkpointing # resolve residue-level symmetries in native vs pred @@ -639,6 +639,8 @@ def distogram_loss( distance_map = torch.cdist(X_rep_atoms_I, X_rep_atoms_I) distance_map[distance_map.isnan()] = 9999.0 bins = torch.linspace(min_distance, max_distance, bins).to(X_rep_atoms_I.device) + # Note that torch.bucketize adds a catch-all bin for values outside the range, + # so we end up with n_bins + 1 bins (65 in the case of AF-3) binned_distances = torch.bucketize(distance_map, bins) crd_mask_rep_atom_II = crd_mask_rep_atoms_I.unsqueeze( -1 diff --git a/rf2aa/loss/loss.py b/src/modelhub/loss/loss.py similarity index 99% rename from rf2aa/loss/loss.py rename to src/modelhub/loss/loss.py index 71de6a5..61954ed 100644 --- a/rf2aa/loss/loss.py +++ b/src/modelhub/loss/loss.py @@ -4,10 +4,10 @@ import networkx as nx import numpy as np import torch -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.kinematics import get_ang, get_dih -from rf2aa.scoring import HbHybType -from rf2aa.util import ( +from modelhub.chemical import ChemicalData as ChemData +from modelhub.kinematics import get_ang, get_dih +from modelhub.scoring import HbHybType +from modelhub.util import ( find_all_paths_of_length_n, find_all_rigid_groups, is_nucleic, diff --git a/rf2aa/loss/loss_factory.py b/src/modelhub/loss/loss_factory.py similarity index 99% rename from rf2aa/loss/loss_factory.py rename to src/modelhub/loss/loss_factory.py index c6c8888..9e96c98 100644 --- a/rf2aa/loss/loss_factory.py +++ b/src/modelhub/loss/loss_factory.py @@ -3,9 +3,9 @@ from collections import OrderedDict import torch import torch.nn as nn -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.kinematics import c6d_to_bins, xyz_to_c6d -from rf2aa.loss.loss import ( +from modelhub.chemical import ChemicalData as ChemData +from modelhub.kinematics import c6d_to_bins, xyz_to_c6d +from modelhub.loss.loss import ( calc_allatom_lddt_loss, calc_atom_bond_loss, calc_BB_bond_geom, @@ -21,7 +21,7 @@ from rf2aa.loss.loss import ( torsionAngleLoss, translation_vector_field, ) -from rf2aa.util import ( +from modelhub.util import ( Ls_from_same_chain_2d, get_frames, get_prot_sm_mask, diff --git a/rf2aa/memory.py b/src/modelhub/memory.py similarity index 100% rename from rf2aa/memory.py rename to src/modelhub/memory.py diff --git a/src/modelhub/metrics/base.py b/src/modelhub/metrics/base.py new file mode 100644 index 0000000..3161a1d --- /dev/null +++ b/src/modelhub/metrics/base.py @@ -0,0 +1,207 @@ +from abc import ABC, abstractmethod +from beartype.typing import Any + +import hydra +from omegaconf import DictConfig +from datahub.utils import error, nested_dict +import inspect +from toolz import valmap, keymap +from functools import cached_property + +from modelhub.utils.ddp import RankedLogger + +ranked_logger = RankedLogger(__name__, rank_zero_only=True) + + +def instantiate_metric_manager( + metrics_cfg: dict[str, Any] | DictConfig, +) -> "MetricManager": + """Instantiate a MetricManager from a dictionary of metrics. + + Args: + metrics: A dictionary where keys are metric names and values are Hydra configurations for the metrics. + """ + metrics = {} + for name, cfg in metrics_cfg.items(): + metric = hydra.utils.instantiate(cfg) + if not isinstance(metric, Metric): + raise TypeError(f"{name} must be a Metric instance") + ranked_logger.info(f"Adding metric {name} to the validation metrics...") + metrics[name] = metric + return MetricManager(metrics) + + +class MetricInputError(Exception): + """Exception raised when a metric fails to compute.""" + + +class MetricManager: + """Manages and computes a set of Metrics, where each Metric inherits from the Metric class. + + For model validation, additional metrics can be added through the Hydra configuration; they + will be computed with the __call__ method automatically. + + For example, during AF-3, Metrics will receive `network_input`, `network_output`, `extra_info`, + `ground_truth_atom_array_stack`, and `predicted_atom_array_stack` as input arguments. + + Example: + >>> class ExampleMetric(Metric): + ... @cached_property + ... def kwargs_to_compute_args(self): + ... return {"x": "x", "y": "y", "extra_info": "extra_info"} + ... + ... def compute(self, x, y, extra_info): + ... return {"value": x + y} + >>> metric = ExampleMetric() + >>> manager = MetricManager({"my_metric": metric}, raise_errors=True) + >>> manager(x=1, y=2, extra_info={"example_id": "123"}) + {'example_id': '123', 'my_metric.value': 3} + """ + + def __init__( + self, + metrics: dict[str, "Metric"] = {}, + *, + raise_errors: bool = True, + ): + """Initialize the MetricManager with a set of metrics. + + Args: + raise_errors: Whether to raise errors when a metric fails to compute. + metrics: A dictionary where keys are metric names and values are Metric instances. + """ + self.raise_errors = raise_errors + self.metrics = {} + for name, metric in metrics.items(): + assert isinstance( + metric, Metric + ), f"{name} must be a Metric instance, not {type(metric)}" + self.metrics[name] = metric + + @classmethod + def instantiate_from_hydra( + cls, metrics_cfg: dict[str, Any] | DictConfig + ) -> "MetricManager": + """Instantiate a MetricManager from a dictionary of metrics. + + Args: + metrics_cfg: A dictionary where keys are metric names and values are Hydra configurations for the metrics. + """ + return instantiate_metric_manager(metrics_cfg) + + def __repr__(self) -> str: + """Return a string representation of the MetricManager.""" + return f"MetricManager({', '.join(self.metrics.keys())})" + + def __call__(self, **kwargs: Any) -> dict[str, Any]: + """Compute all metrics and return their results as a dictionary mapping metric names to their computed values.""" + + # Extract example_id if it exists + example_id = nested_dict.get( + kwargs, key=("extra_info", "example_id"), default=None + ) + + # Initialize results dictionary + results = {"example_id": example_id} + + for name, metric in self.metrics.items(): + assert name not in results, f"Duplicate metric name: {name}" + + # Add some nice error handling context in case metrics fail + example_msg = ( + f" for example '{example_id}'" if example_id is not None else "" + ) + + with error.context( + msg=f"Computing '{name}' ({type(metric).__name__}){example_msg}", + raise_error=self.raise_errors, + exc_types=(MetricInputError, ValueError, TypeError, AttributeError), + ): + # ... compute the metric + metric_result = metric.compute_from_kwargs(**kwargs) + + # ... append 'name' to the keys of the metric result to ensure uniqueness + if isinstance(metric_result, dict): + metric_result = keymap(lambda k: f"{name}.{k}", metric_result) + results.update(metric_result) + elif isinstance(metric_result, list): + results[name] = metric_result + else: + raise ValueError("Unexpected result type: expected dict or list.") + + return results + + +class Metric(ABC): + """Abstract base class for Modelhub metrics. + + Defines a framework for computing metrics based on arbitrary keyword arguments. + + To implement a new metric, subclass this class and implement the `compute` method, at a minimum. + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + # Check that the 'keys' of the compute_from_kwargs method are a subset of the 'compute' methods signature + if self.kwargs_to_compute_args: + assert self.kwargs_to_compute_args.keys() <= self.required_compute_args, ( + f"The keys of the compute_from_kwargs method must be a subset of the 'compute' methods signature. " + f"{self.kwargs_to_compute_args.keys()} is not a subset of {self.required_compute_args}" + ) + + @cached_property + def required_compute_args(self) -> frozenset[str]: + """Required input keys for this metric""" + return frozenset(inspect.signature(self.compute).parameters.keys()) + + @cached_property + def required_kwargs(self) -> frozenset[str]: + """Required input keys for this metric""" + return frozenset(self.kwargs_to_compute_args.values()) + + def compute_from_kwargs(self, **kwargs: Any) -> dict[str, Any]: + """Run compute with an arbitrary dictionary of input keys and values. + + The 'kwargs_to_compute_args' property here will determine + where in the kwargs we will look for the values to pass to the compute method. + + If you need added flexibility (e.g. by passing certain defaults), you can override this method. + """ + if self.kwargs_to_compute_args: + _get = lambda key: nested_dict.getitem(kwargs, key=key) + compute_inputs = valmap(_get, self.kwargs_to_compute_args) + else: + # If kwargs_to_compute_args is not defined, use kwargs directly + compute_inputs = kwargs + return self.compute(**compute_inputs) + + @property + def kwargs_to_compute_args(self) -> dict[str, Any]: + """Map input keys to a flat dictionary. + + If not implemented, we return None, and pass the kwargs directly to the compute method. + + Override e.g. as: + ```python + @cached_property + def kwargs_to_compute_args(self) -> dict[str, Any]: + return { + "y_true": ("network_input", "coords_unnoised"), + "y_pred": ("network_output", "coords_pred"), + "extra_info": ("extra_info",), + } + ``` + """ + return None + + @abstractmethod + def compute(self, **kwargs: Any) -> dict[str, Any] | list[dict[str, Any]]: + """Implement actual metric calculation here + + Override e.g. as: + ```python + def compute(self, y_true, y_pred, extra_info): + print(extra_info) + return lddt(y_true, y_pred, thres=self.custom_thresholds) + ``` + """ + raise NotImplementedError diff --git a/rf2aa/metrics/chiral_metrics.py b/src/modelhub/metrics/chiral_metrics.py similarity index 91% rename from rf2aa/metrics/chiral_metrics.py rename to src/modelhub/metrics/chiral_metrics.py index a49391f..f3cec59 100644 --- a/rf2aa/metrics/chiral_metrics.py +++ b/src/modelhub/metrics/chiral_metrics.py @@ -31,16 +31,16 @@ def calc_chiral_loss_masked(pred, chirals, mask): class ChiralLoss(Metric): - def __call__(self, network_input, network_output, loss_input): + def compute(self, network_input, network_output, extra_info): chiral_loss = {"chiral_loss_sum": [], "chiral_loss_mean": [], "nchiral_centers": []} - chain_iid_token_lvl = loss_input["chain_iid_token_lvl"] + chain_iid_token_lvl = extra_info["chain_iid_token_lvl"] tok_idx = network_input["f"]["atom_to_token_map"].cpu().numpy() pred = network_output["X_L"] chirals = network_input["f"]['chiral_feats'] - for chain_i, chain_type in loss_input["pn_units_to_score"]: + for chain_i, chain_type in extra_info["pn_units_to_score"]: # get tokens in chain_i and chain_j chain_i_tokens = chain_iid_token_lvl == chain_i diff --git a/src/modelhub/metrics/distogram.py b/src/modelhub/metrics/distogram.py new file mode 100644 index 0000000..9f97618 --- /dev/null +++ b/src/modelhub/metrics/distogram.py @@ -0,0 +1,209 @@ +from beartype.typing import Any + +import torch.nn as nn + +from modelhub.loss.af3_losses import distogram_loss +from modelhub.metrics.base import Metric +import torch +from jaxtyping import Float +from biotite.structure import AtomArrayStack +from datahub.utils.token import get_af3_token_representative_idxs +import torch.nn.functional as F +from einops import rearrange, repeat +import numpy as np + + +class DistogramLoss(Metric): + """Computes the distogram loss, taking into account the coordinate mask.""" + + @property + def kwargs_to_compute_args(self) -> dict[str, Any]: + return { + "pred_distogram": ("network_output", "distogram"), + "X_rep_atoms_I": ("extra_info", "X_rep_atoms_I"), + "crd_mask_rep_atoms_I": ("extra_info", "crd_mask_rep_atoms_I"), + } + + def __init__(self): + super().__init__() + self.cce_loss = nn.CrossEntropyLoss(reduction="none") + + def compute( + self, + pred_distogram: Float[torch.Tensor, "I I n_bins"], + X_rep_atoms_I: Float[torch.Tensor, "I 3"], + crd_mask_rep_atoms_I: Float[torch.Tensor, "I"], + ) -> dict[str, Any]: + """Computes the distogram loss. + + Args: + pred_distogram: The predicted distogram. Shape: [I, I, n_bins], where n_bins is the number of bins (64 + 1 = 65). + X_rep_atoms_I: The ground-truth coordinates of the representative atoms for each token. Shape: [I, 3]. + crd_mask_rep_atoms_I: A boolean mask indicating which representative atoms are present. Shape: [I]. + """ + loss = distogram_loss( + pred_distogram, X_rep_atoms_I, crd_mask_rep_atoms_I, self.cce_loss + ) + return {"distogram_loss": loss.detach().item()} + +def bin_distances(coords: Float[torch.Tensor, "... L 3"], min_distance: int = 2, max_distance: int = 22, n_bins: int = 64) -> Float[torch.Tensor, "... L L {n_bins}+1"]: + # TODO: Refactor loss to use this function instead (more re-usable) + """Converts coordinates into binned distances according to the given parameters. + + NOTE: Our returned number of bins will be n_bins + 1, as torch.bucketize adds an additional bin for values greater than the maximum. + + Args: + coords (torch.Tensor): The input tensor of coordinates. May be batched. + min_distance (float): The minimum distance for binning. + max_distance (float): The maximum distance for binning. + n_bins (int): The number of bins to use. + + Returns: + torch.Tensor: The binned distances. + """ + # Compute pairwise distances + distance_map = torch.cdist(coords, coords) + + # (Replace NaN's with a large value to avoid issues with bucketize) + distance_map = torch.nan_to_num(distance_map, nan=9999.0) + + # ... bin the distances + n_bins = torch.linspace(min_distance, max_distance, n_bins).to(coords.device) + binned_distances = torch.bucketize(distance_map, n_bins) + + return binned_distances + + +def masked_distogram_cross_entropy_loss( + input: Float[torch.Tensor, "D I I n_bins"], + target: Float[torch.Tensor, "D I I"], + mask: Float[torch.Tensor, "I I"] = None, +) -> torch.Tensor: + # TODO: Refactor loss to use this function instead (more re-usable) + """Computes the masked cross-entropy between two distograms. + + Note that the cross-entropy loss is not symmetric; that is, H(x, y) != H(y, x). + """ + # From the PyTorch documentation (where C = number of classes, N = batch size): + # > Input: Shape: (C), (N, C) or (N, C, d1, d2, ..., dk) + # > Target: Shape: (N) or (N, d1, d2, ..., dk) where each value should be between [0, C) + input = rearrange(input, 'd i j n_bins -> d n_bins i j') + loss = F.cross_entropy(input, target, reduction="none") + + # Apply mask and normalize + masked_loss = loss * mask if mask is not None else loss + normalized_loss = masked_loss.sum(dim=(-1, -2)) / mask.sum() + 1e-4 # [D] + + return normalized_loss + + +class DistogramComparisons(Metric): + """Compares model distogram representations. + + Namely: + - The representation from the TRUNK vs. GROUND TRUTH + - The representation from the TRUNK vs. PREDICTED COORDINATES + + Optionally, we also subset to intra-ligand (atomized) distances. + """ + @property + def kwargs_to_compute_args(self) -> dict[str, Any]: + return { + "X_L": ("network_output", "X_L"), # [D, L, 3] + "trunk_pred_distogram": ("network_output", "distogram"), # [I, I, 65], where 65 is the number of bins (64 + 1) + "X_rep_atoms_I": ("extra_info", "X_rep_atoms_I"), # [D, I, 3] + "crd_mask_rep_atoms_I": ("extra_info", "crd_mask_rep_atoms_I"), # [D, I] + "ground_truth_atom_array_stack": "ground_truth_atom_array_stack", + } + + def __init__(self, separate_atomized_tokens: bool = True): + """ + Args: + separate_atomized: Whether to log separate comparisons for atomized tokens + """ + super().__init__() + self.separate_atomized_tokens = separate_atomized_tokens + + def compute( + self, + X_L: Float[torch.Tensor, "D L 3"], + trunk_pred_distogram: Float[torch.Tensor, "I I n_bins"], + X_rep_atoms_I: Float[torch.Tensor, "D I 3"], + crd_mask_rep_atoms_I: Float[torch.Tensor, "D I"], + ground_truth_atom_array_stack: AtomArrayStack, + ) -> dict[str, Any]: + """Computes the distogram loss for the trunk vs. ground truth and trunk vs. predicted coordinates. + + Optionally, we also subset to intra-ligand (atomized) distances. + + Args: + X_L: The predicted coordinates. Shape: [D, L, 3] + trunk_pred_distogram: The prediction from the DistogramHead, which linearly projects the trunk features. Shape: [I, I, n_bins] + X_rep_atoms_I: The ground-truth coordinates of the representative atoms for each token. Shape: [D, I, 3] + crd_mask_rep_atoms_I: A boolean mask indicating which representative atoms are present. Shape: [D, I] + ground_truth_atom_array_stack: The ground-truth atom array stack, one model per diffusion sample. Shape: [D, L] + """ + MIN_ATOMIZED = 5 + + # ... choose the first model, as we only care about 2D distance (frame-invariant) + ground_truth_atom_array = ground_truth_atom_array_stack[0] + + _token_rep_idxs = torch.from_numpy(get_af3_token_representative_idxs(ground_truth_atom_array)).to(X_L.device) + token_rep_atom_array = ground_truth_atom_array[get_af3_token_representative_idxs(ground_truth_atom_array)] + + # Create 2D coordinate mask for valid pairs of representative atoms + crd_mask_rep_atom_II = crd_mask_rep_atoms_I.unsqueeze(-1) * crd_mask_rep_atoms_I.unsqueeze(-2) + + results = {} + + # ... trunk vs. ground truth + binned_distogram_from_ground_truth = bin_distances(X_rep_atoms_I, n_bins=64) + results["trunk_vs_ground_truth_cce"] = masked_distogram_cross_entropy_loss( + trunk_pred_distogram.unsqueeze(0), binned_distogram_from_ground_truth.unsqueeze(0), crd_mask_rep_atom_II + ).detach().item() + + # ... trunk vs. predicted coordinates + # (Predicted coordinates are batched, so we build the distogram for each predicted structure) + binned_distogram_from_pred_coords = bin_distances(X_L[:, _token_rep_idxs], n_bins=64) + losses = masked_distogram_cross_entropy_loss( + repeat(trunk_pred_distogram, "i j n_bins -> d i j n_bins", d=binned_distogram_from_pred_coords.shape[0]), + binned_distogram_from_pred_coords, + crd_mask_rep_atom_II + ) + + results.update({ + f"trunk_vs_pred_coords_cce_{i}": loss.detach().item() + for i, loss in enumerate(losses) + }) + + if self.separate_atomized_tokens and np.sum(token_rep_atom_array.atomize) > MIN_ATOMIZED: + # ... trunk vs. ground truth (atomized) + + # Create a mask that is both atomized and intra-residue + same_pn_unit_mask_LL = np.equal.outer(token_rep_atom_array.pn_unit_iid, token_rep_atom_array.pn_unit_iid) + same_res_id_mask_LL = np.equal.outer(token_rep_atom_array.res_id, token_rep_atom_array.res_id) + atomized_mask_LL = np.outer(token_rep_atom_array.atomize, token_rep_atom_array.atomize) + atomized_intra_mask = torch.from_numpy(same_pn_unit_mask_LL * same_res_id_mask_LL * atomized_mask_LL).to(X_L.device) * crd_mask_rep_atom_II + + # Compute the losses, applying the mask + results["trunk_vs_ground_truth_cce_ligand_intra"] = masked_distogram_cross_entropy_loss( + trunk_pred_distogram.unsqueeze(0), binned_distogram_from_ground_truth.unsqueeze(0), atomized_intra_mask + ).detach().item() + + # ... trunk vs. predicted coordinates (atomized) + losses = masked_distogram_cross_entropy_loss( + repeat(trunk_pred_distogram, "i j n_bins -> d i j n_bins", d=binned_distogram_from_pred_coords.shape[0]), + binned_distogram_from_pred_coords, + atomized_intra_mask + ) + results.update({ + f"trunk_vs_pred_coords_cce_ligand_intra_{i}": loss.detach().item() + for i, loss in enumerate(losses) + }) + + return results + + + + + diff --git a/src/modelhub/metrics/lddt.py b/src/modelhub/metrics/lddt.py new file mode 100644 index 0000000..971442d --- /dev/null +++ b/src/modelhub/metrics/lddt.py @@ -0,0 +1,346 @@ +import torch +from beartype.typing import Any + +from modelhub.metrics.base import Metric + + +def calc_lddt( + X_L, + X_gt_L, + crd_mask_L, + tok_idx, + pairs_to_score=None, + distance_cutoff=15.0, + eps=1e-6, +): + """Calculates LDDT scores. + + Args: + X_L: Predicted coordinates (D, L, 3). + X_gt_L: Ground truth coordinates (D, L, 3). + crd_mask_L: Coordinate mask (D, L). + tok_idx: Token index of each atom (L,). + pairs_to_score: Pairs to score (L, L) or None. + distance_cutoff: Distance cutoff for scoring. + eps: Small epsilon to prevent division by zero. + + Returns: + LDDT scores as a tensor. + """ + # TODO: Refactor for clarity + D, L = X_L.shape[:2] + if pairs_to_score is None: + pairs_to_score = torch.ones((L, L), dtype=torch.bool).triu(0).to(X_L.device) + else: + assert pairs_to_score.shape == (L, L) + pairs_to_score = pairs_to_score.triu(0).to(X_L.device) + + first_index, second_index = torch.nonzero(pairs_to_score, as_tuple=True) + + lddt = [] + for d in range(D): + ground_truth_distances = torch.linalg.norm( + X_gt_L[d, first_index] - X_gt_L[d, second_index], dim=-1 + ) + + pair_mask = torch.logical_and( + ground_truth_distances > 0, ground_truth_distances < distance_cutoff + ) + + # only score pairs that are resolved in the ground truth + pair_mask *= crd_mask_L[d, first_index] * crd_mask_L[d, second_index] + # don't score pairs that are in the same token + pair_mask *= tok_idx[first_index] != tok_idx[second_index] + + valid_pairs = pair_mask.nonzero(as_tuple=True) + pair_mask = pair_mask[valid_pairs].to(X_L.dtype) + ground_truth_distances = ground_truth_distances[valid_pairs] + first_index, second_index = first_index[valid_pairs], second_index[valid_pairs] + + predicted_distances = torch.linalg.norm( + X_L[d, first_index] - X_L[d, second_index], dim=-1 + ) + + delta_distances = torch.abs(predicted_distances - ground_truth_distances + eps) + del predicted_distances, ground_truth_distances + + lddt.append( + 0.25 + * ( + torch.sum((delta_distances < 4.0) * pair_mask) + + torch.sum((delta_distances < 2.0) * pair_mask) + + torch.sum((delta_distances < 1.0) * pair_mask) + + torch.sum((delta_distances < 0.5) * pair_mask) + ) + / (torch.sum(pair_mask) + eps) + ) + + return torch.tensor(lddt) + + +class AllAtomLDDT(Metric): + """Computes all-atom LDDT scores.""" + + @property + def kwargs_to_compute_args(self) -> dict[str, Any]: + return { + "X_L": ("network_output", "X_L"), + "X_gt_L": ("extra_info", "X_gt_L"), + "crd_mask_L": ("extra_info", "crd_mask_L"), + "tok_idx": ("network_input", "f", "atom_to_token_map"), + } + + def compute( + self, + X_L: torch.Tensor, + X_gt_L: torch.Tensor, + crd_mask_L: torch.Tensor, + tok_idx: torch.Tensor, + ) -> dict: + """Calculates all-atom LDDT. + + Args: + X_L: Predicted coordinates (D, L, 3). + X_gt_L: Ground truth coordinates (D, L, 3). + crd_mask_L: Coordinate mask (D, L), indicating which atoms are resolved. + tok_idx: Atom-level map to token index (L,). + + Returns: + A dictionary with all-atom LDDT scores. + """ + tok_idx = tok_idx.cpu().numpy() + + all_atom_lddt = calc_lddt( + X_L=X_L, + X_gt_L=X_gt_L, + crd_mask_L=crd_mask_L, + tok_idx=torch.tensor(tok_idx).to(X_L.device), + pairs_to_score=None, # By default, score all pairs, except those within the same token + distance_cutoff=15.0, + ) + + return { + "best_of_1_lddt": all_atom_lddt[0].item(), + f"best_of_{len(all_atom_lddt)}_lddt": all_atom_lddt.max().item(), + } + + +# TODO: Rewrite with new Metrics API +class ByTypeInterfaceLDDT(Metric): + """Computes interface LDDT, grouped by interface type""" + + def compute( + self, network_input: dict, network_output: dict, extra_info: dict, **kwargs + ) -> dict: + """Calculates interface LDDT. + + Args: + network_input: Network input data. + network_output: Network output data. + extra_info: Additional data for metric computation. + """ + # Short-circuit + if "interfaces_to_score" not in extra_info: + return [] + + interface_results = [] + + # Map from token to pn_unit_iid + pn_unit_iid_token_lvl = extra_info["chain_iid_token_lvl"] # [n_tokens] + + # Map from atom to token + tok_idx = network_input["f"]["atom_to_token_map"].cpu().numpy() # [n_atoms] + + # Loop over the interfaces to score (e.g., pn_unit_i, pn_unit_j, interface_type) + interfaces_to_score = eval(extra_info["interfaces_to_score"]) if isinstance(extra_info["interfaces_to_score"], str) else extra_info["interfaces_to_score"] + for pn_unit_i, pn_unit_j, interface_type in interfaces_to_score: + # Get tokens in pn_unit_i and pn_unit_j + pn_unit_i_tokens = pn_unit_iid_token_lvl == pn_unit_i + pn_unit_j_tokens = pn_unit_iid_token_lvl == pn_unit_j + + # Convert the token level to the atom level + pn_unit_i_atoms = pn_unit_i_tokens[tok_idx] + pn_unit_j_atoms = pn_unit_j_tokens[tok_idx] + + # Compute the outer product of chain_i and chain_j, which represents the interface + chain_ij_atoms = torch.einsum( + "L, K -> LK", + torch.tensor(pn_unit_i_atoms), + torch.tensor(pn_unit_j_atoms), + ).to(network_output["X_L"].device) + + # Symmetrize the interface so we can later multiply with an upper triangular without losing information + chain_ij_atoms = chain_ij_atoms | chain_ij_atoms.T + + # compute lddt using the pairs_to_score from the intersection + lddt = calc_lddt( + network_output["X_L"], + extra_info["X_gt_L"], + extra_info["crd_mask_L"], + torch.tensor(tok_idx).to(network_output["X_L"].device), + pairs_to_score=chain_ij_atoms, + distance_cutoff=30.0, + ) + + # add the results to the interface_results list + n = len(lddt) + result = { + "pn_units": [pn_unit_i, pn_unit_j], + "type": interface_type, + "best_of_1_lddt": lddt[0].item(), + f"best_of_{n}_lddt": lddt.max().item(), + } + + # if confidence features are present, add them + if "confidence" in network_output: + pae_idx = network_output["confidence"]["pae_idx"] + pde_idx = network_output["confidence"]["pde_idx"] + plddt_idx = network_output["confidence"]["plddt_idx"] + # TODO: This lookup would be best implemented as a sorted Tuple of PN Unit IIDs or a symmetric 2D lookup table rather than with non-symmeterized strings + af3_style_ipae_idx = network_output["confidence"]["best_interface_idx"][ + f"{pn_unit_i}-{pn_unit_j}" + ] + result.update( + { + "oracle_by_pae": lddt[pae_idx].item(), + "oracle_by_pde": lddt[pde_idx].item(), + "oracle_by_plddt": lddt[plddt_idx].item(), + "oracle_by_af3_style_ipae": lddt[af3_style_ipae_idx].item(), + "oracle_by_af3_style_lig_ipae": lddt[ + network_output["confidence"]["best_lig_ipae_idx"][ + f"{pn_unit_i}-{pn_unit_j}" + ] + ].item(), + } + ) + + interface_results.append(result) + + return interface_results + + +# TODO: Rewrite with new Metrics API +class ChainLDDTByType(Metric): + """Computes chain-wise LDDT, grouped by chain type""" + + def compute( + self, network_input: dict, network_output: dict, extra_info: dict, **kwargs + ) -> dict: + """Calculates chain (PN unit) LDDT. + + Args: + network_input: Network input data. + network_output: Network output data. + extra_info: Additional data for metric computation. + + Returns: + A dictionary with chain LDDT scores. + """ + if "pn_units_to_score" not in extra_info: + return [] + + chain_results = [] + + chain_iid_token_lvl = extra_info["chain_iid_token_lvl"] + tok_idx = network_input["f"]["atom_to_token_map"].cpu().numpy() + + # For all chains (pn_units) to score... + pn_units_to_score = eval(extra_info["pn_units_to_score"]) if isinstance(extra_info["pn_units_to_score"], str) else extra_info["pn_units_to_score"] + for chain, chain_type in pn_units_to_score: + # ... get tokens in chain_i and chain_j + chain_tokens = chain_iid_token_lvl == chain + + # ... convert the token level to the atom level + chain_atoms = chain_tokens[tok_idx] + + # ... compute the outer product of the chain with itself (the definition of intra-lddt) + chain_ij_atoms = torch.einsum( + "L, K -> LK", torch.tensor(chain_atoms), torch.tensor(chain_atoms) + ).to(network_output["X_L"].device) + + # ... compute lddt using the pairs_to_score from the interface + lddt = calc_lddt( + network_output["X_L"], + extra_info["X_gt_L"], + extra_info["crd_mask_L"], + torch.tensor(tok_idx).to(network_output["X_L"].device), + pairs_to_score=chain_ij_atoms, + ) + + # ... and finally add the results to the chain_results list + n = len(lddt) + result = { + "pn_units": [chain], + "type": chain_type, + "best_of_1_lddt": lddt[0].item(), + f"best_of_{n}_lddt": lddt.max().item(), + } + + if "confidence" in network_output: + result.update( + { + "oracle_by_pae": lddt[ + network_output["confidence"]["pae_idx"] + ].item(), + "oracle_by_pde": lddt[ + network_output["confidence"]["pde_idx"] + ].item(), + "oracle_by_plddt": lddt[ + network_output["confidence"]["plddt_idx"] + ].item(), + "oracle_by_af3_style_chain": lddt[ + network_output["confidence"]["best_chain_to_all_idx"][chain] + ].item(), + "oracle_by_af3_style_single_chain": lddt[ + network_output["confidence"]["best_chain_to_self_idx"][ + chain + ] + ].item(), + } + ) + chain_results.append(result) + + return chain_results + +# TODO: Refactor to use new metrics API +class ByTypeLDDT(Metric): + """Calculates LDDT scores by type for both chains and interfaces""" + + def __init__(self): + self.interface_lddt = ByTypeInterfaceLDDT() + self.chain_lddt = ChainLDDTByType() + + def compute( + self, network_input: dict, network_output: dict, extra_info: dict, **kwargs + ) -> dict: + # Compute interface LDDT scores + interface_results = self.interface_lddt.compute( + network_input, network_output, extra_info + ) + + # Compute chain LDDT scores + chain_results = self.chain_lddt.compute( + network_input, network_output, extra_info + ) + + # Merge the results + combined_results = interface_results + chain_results + + return combined_results + + + +# TODO: Rewrite with new Metrics API +class LDDTByDiffusionStep(Metric): + def compute(self, network_input, network_output, loss_input): + lddt_by_step = {"lddt_by_step": []} + tok_idx = network_input["f"]["atom_to_token_map"].cpu().numpy() + for i, X_L in enumerate(network_output["X_denoised_L_traj"]): + lddt = calc_lddt( + X_L, + loss_input["X_gt_L"], + loss_input["crd_mask_L"], + torch.tensor(tok_idx).to(network_output["X_L"].device), + ) + lddt_by_step["lddt_by_step"].append(lddt) + return lddt_by_step diff --git a/rf2aa/metrics/metric_utils.py b/src/modelhub/metrics/metric_utils.py similarity index 97% rename from rf2aa/metrics/metric_utils.py rename to src/modelhub/metrics/metric_utils.py index c6e3cb8..a8b3bf9 100644 --- a/rf2aa/metrics/metric_utils.py +++ b/src/modelhub/metrics/metric_utils.py @@ -107,9 +107,11 @@ def compute_mean_over_subsampled_pairs(matrix_to_mean, pairs_to_score, eps=1e-6) 1d tensor of shape (batch,) with the mean over the subsampled pairs for each batch """ B, L, M = matrix_to_mean.shape - assert matrix_to_mean.shape == (B, L, M), ( - "Matrix to mean should be of shape (batch, L, M)" - ) + assert matrix_to_mean.shape == ( + B, + L, + M, + ), "Matrix to mean should be of shape (batch, L, M)" assert pairs_to_score.shape == (L, M), "Pairs to score should be of shape (L, M)" batch = (matrix_to_mean * pairs_to_score).sum(dim=(-1, -2)) / ( pairs_to_score.sum() + eps diff --git a/src/modelhub/model/AF3.py b/src/modelhub/model/AF3.py new file mode 100644 index 0000000..936e658 --- /dev/null +++ b/src/modelhub/model/AF3.py @@ -0,0 +1,405 @@ +from contextlib import ExitStack + +import torch +from omegaconf import DictConfig +from torch import nn + +from modelhub.model.AF3_structure import DiffusionModule, DistogramHead, Recycler +from modelhub.model.layers.pairformer_layers import ( + FeatureInitializer, +) +from modelhub.diffusion_samplers.inference_sampler import ( + SampleDiffusion, + SamplePartialDiffusion, +) +from modelhub.training.checkpoint import create_custom_forward +import torch.utils.checkpoint as checkpoint + +""" +Shape Annotation Glossary: + I: # tokens (coarse representation) + L: # atoms (fine representation) + M: # msa + T: # templates + D: # diffusion structure batch dim + + C_s: # Token-level single reprentation channel dimension + C_z: # Token-level pair reprentation channel dimension + C_atom: # Atom-level single reprentation channel dimension + C_atompair: # Atom-level pair reprentation channel dimension + +Tensor Name Glossary: + S: Token-level single representation (I, C_s) + Z: Token-level pair representation (I, I, C_z) + Q: Atom-level single representation (L, C_atom) + P: Atom-level pair representation (L, L, C_atompair) +""" + + +class AF3(nn.Module): + """AF3 Network module. + + We adhere to the PyTorch Lightning Style Guide; see (1). + + References: + (1) PyTorch Lightning Style Guide: https://lightning.ai/docs/pytorch/latest/starter/style_guide.html + """ + + def __init__( + self, + *, + # Arguments for modules that will be instantiated + feature_initializer: DictConfig | dict, + recycler: DictConfig | dict, + diffusion_module: DictConfig | dict, + distogram_head: DictConfig | dict, + inference_sampler: DictConfig | dict, + # Channel dimensions + c_s: int, # AF-3: 384, + c_z: int, # AF-3: 128, + c_atom: int, # AF-3: 128, + c_atompair: int, # AF-3: 16, + c_s_inputs: int, # AF-3: 449, + ): + """Initializes the AF3 model. + + Args: + feature_initializer: Arguments for FeatureInitializer + recycler: Arguments for Recycler + diffusion_module: Arguments for DiffusionModule + distogram_head: Arguments for DistogramHead + inference_sampler: Arguments for the SampleDiffusion class, used for inference (contains no trainable parameters) + c_s: Token-level single reprentation channel dimension + c_z: Token-level pair reprentation channel dimension + c_atom: Atom-level single reprentation channel dimension + c_atompair: Atom-level pair reprentation channel dimension + c_s_inputs: TBD what the heck this is + loss: Arguments for the loss function + partial_optimizer: Optimizer (partially initialized) to be used for training. The "configure_optimizers" method will finish instantiating the optimizer. + partial_lr_scheduler: Learning rate scheduler (partially initialized) to be used for training. The "configure_optimizers" method will finish instantiating the scheduler. + """ + super().__init__() + + # ... initialize the FeatureInitializer, which creates the initial token- and atom-level representations and conditioning + self.feature_initializer = FeatureInitializer( + c_s=c_s, + c_z=c_z, + c_atom=c_atom, + c_atompair=c_atompair, + c_s_inputs=c_s_inputs, + **feature_initializer, + ) + + # ... initialize the Recycler, which runs the trunk repeatedly with shared weights + self.recycler = Recycler(c_s=c_s, c_z=c_z, **recycler) + self.diffusion_module = DiffusionModule( + c_atom=c_atom, + c_atompair=c_atompair, + c_s=c_s, + c_z=c_z, + **diffusion_module, + ) + self.distogram_head = DistogramHead(c_z=c_z, **distogram_head) + + # ... initialize the inference sampler, which performs a full diffusion rollout during inference + self.inference_sampler = ( + SampleDiffusion(**inference_sampler) + if not inference_sampler.get("partial_t", False) + else SamplePartialDiffusion(**inference_sampler) + ) + + def forward( + self, + input: dict, + n_cycle: int, + coord_atom_lvl_to_be_noised: torch.Tensor = None, + ) -> dict: + """Complete forward pass of the model. + + Runs recycling with gradients only on final recycle. + + Args: + input (dict): Dictionary of model inputs + n_cycle (int): Number of recycling cycles for the trunk + coord_atom_lvl_to_be_noised (torch.Tensor): Atom-level coordinates to be noised further. Optional; + only used during inference for partial denoising. + + Returns: + dict: Dictionary of model outputs, including: + - X_L: Predicted atomic coordinates [D, L, 3] + - distogram: Predicted distogram [I, I, C], where C is the number of bins in the distogram + - If not training, additional lists are returned, each of length T: + * X_noisy_L_traj: List of noisy atomic coordinates at each timestep [D, L, 3] + * X_denoised_L_traj: List of denoised atomic coordinates at each timestep [D, L, 3] + * t_hats: List of tensor scalars representing the noise schedule at each timestep + """ + # ... recycling + # Gives dictionary of outputs S_inputs_I, S_init_I, Z_init_II, S_I, Z_II + recycling_outputs = self.trunk_forward_with_recycling( + f=input["f"], n_recycles=n_cycle + ) + + # Predict the distogram from the pair representation + distogram_pred = self.distogram_head(recycling_outputs["Z_II"]) + + # ... post-recycling (diffusion module) + if self.training: + # Single denoising step + X_pred = self.diffusion_module( + X_noisy_L=input["X_noisy_L"], + t=input["t"], + f=input["f"], + S_inputs_I=recycling_outputs["S_inputs_I"], + S_trunk_I=recycling_outputs["S_I"], + Z_trunk_II=recycling_outputs["Z_II"], + ) # [D, L, 3] + return dict( + X_L=X_pred, + distogram=distogram_pred, + ) + else: + # Full diffusion rollout (no gradients, or will OOM) + sample_diffusion_outs = self.inference_sampler.sample_diffusion_like_af3( + f=input["f"], + S_inputs_I=recycling_outputs["S_inputs_I"], + S_trunk_I=recycling_outputs["S_I"], + Z_trunk_II=recycling_outputs["Z_II"], + diffusion_module=self.diffusion_module, + diffusion_batch_size=input["t"].shape[0], + coord_atom_lvl_to_be_noised=coord_atom_lvl_to_be_noised, + ) + return dict( + X_L=sample_diffusion_outs["X_L"], + distogram=distogram_pred, + # For reporting, inference (validation or testing) only + X_noisy_L_traj=sample_diffusion_outs["X_noisy_L_traj"], + X_denoised_L_traj=sample_diffusion_outs["X_denoised_L_traj"], + t_hats=sample_diffusion_outs["t_hats"], + ) + + def trunk_forward_with_recycling(self, f: dict, n_recycles: int): + """Forward pass of the AF-3 trunk + + (e.g., the recycling process, including the MSAModule, PairfomerStack, etc.). + + Notes: + - We run with gradients ONLY on the final recycle + - All recycles use shared weights (ResNet-style) + + Args: + f: Feature dictionary + n_recycles: Number of recycles to run + + Returns: + dict: Recycling outputs, with keys: + - S_inputs_I: Token-level single representation input, prior to AtomAttention [I, c_s_inputs] + - S_init_I: Token-level single representation initialization [I, c_s], after AtomAttention but before recycling stack + - Z_init_II: Token-level pair representation initialization [I, I, c_z], after AtomAttention but before recycling stack + - S_I: Token-level single representation [I, c_s], after recycling stack + - Z_II: Token-level pair representation [I, I, c_z], after recycling stack + """ + # ... initialize the recycling process (feature initialization) + # Gives S_inputs_I, S_init_I, Z_init_II, S_I, Z_II + initialized_features = self.pre_recycle(f) + + # ... collect the recycling inputs, which will be updated in place + recycling_inputs = {**initialized_features, "f": f} + + for i_cycle in range(n_recycles): + with ExitStack() as stack: + # For the first n_recycles - 1 cycles (all but the last recycle), we run without gradients + if i_cycle < n_recycles - 1: + stack.enter_context(torch.no_grad()) + + # Select the MSA for the current recycle (we sample an i.i.d. MSA for each recycle) + recycling_inputs["f"]["msa"] = f["msa_stack"][i_cycle] + + # Run the model trunk (MSAModule, PairformerStack, etc.) + # We alter the S_I and Z_II in place such that the next iteration uses the updated values + recycling_inputs = self.recycle(**recycling_inputs) + + return { + "S_inputs_I": recycling_inputs["S_inputs_I"], + "S_init_I": recycling_inputs["S_init_I"], + "Z_init_II": recycling_inputs["Z_init_II"], + "S_I": recycling_inputs["S_I"], + "Z_II": recycling_inputs["Z_II"], + } + + def pre_recycle(self, f: dict) -> dict: + """Prepare feature inputs for recycling. + + Includes: + - Feature initialization (S_inputs_I, S_init_I, Z_init_II) + - Initializing S_I and Z_II to zeros + + Returns: + dict: Dictionary of recycling inputs, including: + - S_inputs_I: Token-level single representation input (prior to AtomAttention) [I, c_s_inputs] + - S_init_I: Token-level single representation initialization [I, c_s] (after round of AtomAttention) + - Z_init_II: Token-level pair representation initialization [I, I, c_z] (after round of AtomAttention) + - S_I: Token-level single representation [I, c_s], initialized to zeros + - Z_II: Token-level pair representation [I, I, c_z], initialized to zeros + """ + S_inputs_I, S_init_I, Z_init_II = self.feature_initializer(f) + S_I = torch.zeros_like(S_init_I) + Z_II = torch.zeros_like(Z_init_II) + + return dict( + S_inputs_I=S_inputs_I, + S_init_I=S_init_I, + Z_init_II=Z_init_II, + S_I=S_I, + Z_II=Z_II, + ) + + def recycle( + self, + S_inputs_I, + S_init_I, + Z_init_II, + S_I, + Z_II, + f, + ): + S_I, Z_II = self.recycler( + f=f, + S_inputs_I=S_inputs_I, + S_init_I=S_init_I, + Z_init_II=Z_init_II, + S_I=S_I, + Z_II=Z_II, + ) + return dict( + S_inputs_I=S_inputs_I, + S_init_I=S_init_I, + Z_init_II=Z_init_II, + S_I=S_I, + Z_II=Z_II, + f=f, + ) + + +class AF3WithConfidence(AF3): + """Model for training and inference with confidence metric computation""" + + def __init__( + self, + confidence_head: DictConfig | dict, + mini_rollout_sampler: DictConfig | dict, + **kwargs, + ): + """ + Args: + (... all arguments from the AF3 class) + confidence_head: Hydra configuration for the confidence head architecture + mini_rollout_sampler: Hydra configuration for the mini-rollout sampler (e.g., SampleDiffusion with 20 rather than + 200 timesteps. Note that the `inference_sampler` argument in the AF3 class will still be used for full + rollouts during inference) + """ + # (Lazy import) + from modelhub.model.layers.af3_auxiliary_heads import ConfidenceHead # noqa + + super().__init__(**kwargs) + + self.confidence_head = ConfidenceHead(**confidence_head) + self.mini_rollout_sampler = SampleDiffusion(**mini_rollout_sampler) + + def forward( + self, + input: dict, + n_cycle: int, + coord_atom_lvl_to_be_noised: torch.Tensor = None, + ) -> dict: + """Complete forward pass of the model with confidence head. + + Notes: + - Performs a mini-rollout without gradients during training (e.g., 20 timesteps) and a full rollout (e.g., 200 timesteps) during inference + - Runs the trunk forward without gradients to conserve memory (which departs from the AF-3 implementation) + - Runs the forward pass (with gradients) for the confidence model + + Args: + input (dict): Dictionary of model inputs. In addition to the standard AF-3 model inputs, we expect: + - rep_atom_idxs: TBD + - frame_atom_idxs: TBD + n_cycle (int): Number of recycling cycles for the trunk + coord_atom_lvl_to_be_noised (torch.Tensor): Atom-level coordinates to be noised further. Optional; + only used during inference for partial denoising. + + Returns: + dict: Dictionary of model outputs, including: + - X_L: Predicted atomic coordinates [D, L, 3] (from the mini rollout during training or full rollout during inference) + - plddt: TBD + - pae: TBD + - pde: TBD + - exp_resolved: TBD + """ + diffusion_batch_size = input["t"].shape[0] + with torch.no_grad(): + # ... recycling + # Gives dictionary of outputs S_inputs_I, S_init_I, Z_init_II, S_I, Z_II + recycling_outputs = self.trunk_forward_with_recycling( + f=input["f"], n_recycles=n_cycle + ) + + # Predict the distogram from the pair representation + # (NOTE: Not necessary for confidence head training, but helpful for reporting) + distogram_pred = self.distogram_head(recycling_outputs["Z_II"]) + + # ... post-recycling (diffusion module) + if self.training: + # Mini-rollout + sample_diffusion_outs = ( + self.mini_rollout_sampler.sample_diffusion_like_af3( + f=input["f"], + S_inputs_I=recycling_outputs["S_inputs_I"], + S_trunk_I=recycling_outputs["S_I"], + Z_trunk_II=recycling_outputs["Z_II"], + diffusion_module=self.diffusion_module, + diffusion_batch_size=diffusion_batch_size, + coord_atom_lvl_to_be_noised=coord_atom_lvl_to_be_noised, + ) + ) + else: + # Full diffusion rollout (no gradients, or will OOM) + sample_diffusion_outs = ( + self.inference_sampler.sample_diffusion_like_af3( + f=input["f"], + S_inputs_I=recycling_outputs["S_inputs_I"], + S_trunk_I=recycling_outputs["S_I"], + Z_trunk_II=recycling_outputs["Z_II"], + diffusion_module=self.diffusion_module, + diffusion_batch_size=diffusion_batch_size, + coord_atom_lvl_to_be_noised=coord_atom_lvl_to_be_noised, + ) + ) + + # ... run batched confidence head + confidence = checkpoint.checkpoint( + create_custom_forward( + self.confidence_head, frame_atom_idxs=input["frame_atom_idxs"] + ), + recycling_outputs["S_inputs_I"], + recycling_outputs["S_I"], + recycling_outputs["Z_II"], + sample_diffusion_outs["X_L"], + input["seq"], + input["rep_atom_idxs"], + use_reentrant=False, + ) + + return dict( + # We return X_L as X_pred_rollout_L to support future joint training with the confidence head (where we would have both X_L and X_pred_rollout_L) + X_L=None, + distogram=distogram_pred, + # For reporting, inference (validation or testing) only + X_noisy_L_traj=sample_diffusion_outs["X_noisy_L_traj"], + X_denoised_L_traj=sample_diffusion_outs["X_denoised_L_traj"], + t_hats=sample_diffusion_outs["t_hats"], + # Confidence outputs + X_pred_rollout_L=sample_diffusion_outs["X_L"], + plddt=confidence["plddt_logits"], + pae=confidence["pae_logits"], + pde=confidence["pde_logits"], + exp_resolved=confidence["exp_resolved_logits"], + ) diff --git a/rf2aa/model/AF3_blocks.py b/src/modelhub/model/AF3_blocks.py similarity index 93% rename from rf2aa/model/AF3_blocks.py rename to src/modelhub/model/AF3_blocks.py index ea630aa..c504ed9 100644 --- a/rf2aa/model/AF3_blocks.py +++ b/src/modelhub/model/AF3_blocks.py @@ -5,18 +5,18 @@ import torch.nn.functional as F import torch.utils.checkpoint as checkpoint from opt_einsum import contract as einsum -from rf2aa.model.layers.Attention_module import ( +from modelhub.model.layers.Attention_module import ( FeedForwardLayer, MSAColGlobalAttention, MSARowAttentionWithBias, TriangleAttention, TriangleMultiplication, ) -from rf2aa.model.layers.outer_product import ( +from modelhub.model.layers.outer_product import ( OuterProductMean, ) # need to code this correctly -from rf2aa.training.checkpoint import activation_checkpointing -from rf2aa.util_module import Dropout, init_lecun_normal +from modelhub.training.checkpoint import activation_checkpointing +from modelhub.util_module import Dropout, init_lecun_normal # MSA transformer @@ -247,37 +247,51 @@ class MsaSubsampleEmbedder(nn.Module): class MsaPairWeightedAverage(nn.Module): """implements Algorithm 10 from AF3 paper""" - def __init__(self, c_weighted_average, n_heads, c_msa_embed, c_z, separate_gate_for_every_channel): + def __init__( + self, + c_weighted_average, + n_heads, + c_msa_embed, + c_z, + separate_gate_for_every_channel, + ): super(MsaPairWeightedAverage, self).__init__() self.weighted_average_channels = c_weighted_average self.n_heads = n_heads self.msa_channels = c_msa_embed self.pair_channels = c_z self.norm_msa = nn.LayerNorm(self.msa_channels) - self.to_v = nn.Linear(self.msa_channels, self.n_heads*self.weighted_average_channels, bias=False) + self.to_v = nn.Linear( + self.msa_channels, self.n_heads * self.weighted_average_channels, bias=False + ) self.norm_pair = nn.LayerNorm(self.pair_channels) self.to_bias = nn.Linear(self.pair_channels, self.n_heads, bias=False) self.separate_gate_for_every_channel = separate_gate_for_every_channel if self.separate_gate_for_every_channel: - self.to_gate = nn.Linear(self.msa_channels, self.weighted_average_channels*self.n_heads, bias=False) + self.to_gate = nn.Linear( + self.msa_channels, + self.weighted_average_channels * self.n_heads, + bias=False, + ) else: self.to_gate = nn.Linear(self.msa_channels, self.n_heads, bias=False) - self.to_out = nn.Linear(self.weighted_average_channels*self.n_heads, self.msa_channels, bias=False) + self.to_out = nn.Linear( + self.weighted_average_channels * self.n_heads, self.msa_channels, bias=False + ) @activation_checkpointing - def forward(self, - msa_SI, - pair_II - ): + def forward(self, msa_SI, pair_II): S, I = msa_SI.shape[:2] # normalize inputs msa_SI = self.norm_msa(msa_SI) # construct values, bias and weights - v_SIH = self.to_v(msa_SI).reshape(S, I, self.n_heads, self.weighted_average_channels) + v_SIH = self.to_v(msa_SI).reshape( + S, I, self.n_heads, self.weighted_average_channels + ) bias_IIH = self.to_bias(self.norm_pair(pair_II)) w_IIH = F.softmax(bias_IIH, dim=-2) @@ -286,11 +300,11 @@ class MsaPairWeightedAverage(nn.Module): # compute weighted average & apply gate if self.separate_gate_for_every_channel: - weights = torch.einsum( "ijh,sjhc->sihc", w_IIH, v_SIH).reshape(S, I, -1) + weights = torch.einsum("ijh,sjhc->sihc", w_IIH, v_SIH).reshape(S, I, -1) o_SIH = gate_SIH * weights else: - weights = torch.einsum( "ijh,sjhc->sihc", w_IIH, v_SIH) - o_SIH = gate_SIH[...,None] * weights + weights = torch.einsum("ijh,sjhc->sihc", w_IIH, v_SIH) + o_SIH = gate_SIH[..., None] * weights # concatenate heads and project msa_update_SI = self.to_out(o_SIH.reshape(S, I, -1)) diff --git a/rf2aa/model/AF3_structure.py b/src/modelhub/model/AF3_structure.py similarity index 71% rename from rf2aa/model/AF3_structure.py rename to src/modelhub/model/AF3_structure.py index 9ff3d0d..6228368 100644 --- a/rf2aa/model/AF3_structure.py +++ b/src/modelhub/model/AF3_structure.py @@ -4,20 +4,19 @@ from contextlib import ExitStack import torch import torch.nn as nn -from rf2aa.model.layers.af3_diffusion_transformer import ( +from modelhub.model.layers.af3_diffusion_transformer import ( AtomAttentionEncoderDiffusion, AtomTransformer, DiffusionTransformer, ) -from rf2aa.model.layers.layer_utils import Transition, linearNoBias -from rf2aa.model.layers.pairformer_layers import ( - FeatureInitializer, +from modelhub.model.layers.layer_utils import Transition, linearNoBias +from modelhub.model.layers.pairformer_layers import ( MSAModule, PairformerBlock, RelativePositionEncoding, TemplateEmbedder, ) -from rf2aa.training.checkpoint import activation_checkpointing +from modelhub.training.checkpoint import activation_checkpointing logger = logging.getLogger(__name__) @@ -254,139 +253,6 @@ class FourierEmbedding(nn.Module): return torch.cos(2 * pi * (t[:, None] * self.w + self.b)) -class Model(nn.Module): - def __init__( - self, - c_s, - c_z, - c_atom, - c_atompair, - feature_initializer, - recycler, - diffusion_module, - distogram_head, - # confidence_head, - **kwargs, - ): - super().__init__() - self.feature_initializer = FeatureInitializer( - c_s=c_s, - c_z=c_z, - c_atom=c_atom, - c_atompair=c_atompair, - **feature_initializer, - ) - self.recycler = Recycler(c_s=c_s, c_z=c_z, **recycler) - self.diffusion_module = DiffusionModule( - c_atom=c_atom, c_atompair=c_atompair, c_s=c_s, c_z=c_z, **diffusion_module - ) - self.distogram_head = DistogramHead(c_z=c_z, **distogram_head) - - def forward(self, input, n_cycle, no_sync, use_amp=False): - """ - Runs recycling with gradients only on final recycle. - - Assums model has methods: - pre_recycle: input --> recycling_input - recycle: recycling_input --> recycling_input - post_recycle: recycling_input --> output - """ - recycling_input = self.trunk_forward( - input, n_cycle=n_cycle, no_sync=no_sync, use_amp=use_amp - ) - return self.post_recycle(**recycling_input) - - def trunk_forward(self, input, n_cycle, no_sync, use_amp=False): - recycling_input = self.pre_recycle(**input) - for i_cycle in range(n_cycle): - with ExitStack() as stack: - if i_cycle < n_cycle - 1: - stack.enter_context(torch.no_grad()) - stack.enter_context(no_sync()) - recycling_input["f"]["msa"] = input["f"]["msa_stack"][i_cycle].to( - input["t"].device - ) - recycling_input = self.recycle(**recycling_input) - return recycling_input - - def pre_recycle(self, f, X_noisy_L, t): - S_inputs_I, S_init_I, Z_init_II = self.feature_initializer(f) - S_I = torch.zeros_like(S_init_I) - Z_II = torch.zeros_like(Z_init_II) - return dict( - S_inputs_I=S_inputs_I, - S_init_I=S_init_I, - Z_init_II=Z_init_II, - S_I=S_I, - Z_II=Z_II, - f=f, - X_noisy_L=X_noisy_L, - t=t, - ) - - def recycle( - self, - S_inputs_I, - S_init_I, - Z_init_II, - S_I, - Z_II, - f, - X_noisy_L, - t, - ): - S_I, Z_II = self.recycler( - f=f, - S_inputs_I=S_inputs_I, - S_init_I=S_init_I, - Z_init_II=Z_init_II, - S_I=S_I, - Z_II=Z_II, - ) - return dict( - S_inputs_I=S_inputs_I, - S_init_I=S_init_I, - Z_init_II=Z_init_II, - S_I=S_I, - Z_II=Z_II, - f=f, - X_noisy_L=X_noisy_L, - t=t, - ) - - def post_recycle( - self, - S_inputs_I, - S_init_I, - Z_init_II, - S_I, - Z_II, - f, - X_noisy_L, - t, - is_training=True, - ): - if is_training: - X_pred = self.diffusion_module( - X_noisy_L.float(), - t, - f, - S_inputs_I.float(), - S_I.float(), - Z_II.float(), - ) - else: - X_pred = None - distogram_pred = self.distogram_head(Z_II) - return dict( - X_L=X_pred, - distogram=distogram_pred, - S_inputs_I=S_inputs_I.float(), - S_I=S_I.float(), - Z_II=Z_II.float(), - ) - - class DistogramHead(nn.Module): def __init__( self, diff --git a/rf2aa/model/RoseTTAFoldModel.py b/src/modelhub/model/RoseTTAFoldModel.py similarity index 96% rename from rf2aa/model/RoseTTAFoldModel.py rename to src/modelhub/model/RoseTTAFoldModel.py index 3c5db49..7ef5d1c 100644 --- a/rf2aa/model/RoseTTAFoldModel.py +++ b/src/modelhub/model/RoseTTAFoldModel.py @@ -5,24 +5,24 @@ import torch.nn as nn from assertpy import assert_that from icecream import ic -import rf2aa.util -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.model.layers.AuxiliaryPredictor import ( +import modelhub.util +from modelhub.chemical import ChemicalData as ChemData +from modelhub.model.layers.AuxiliaryPredictor import ( BinderNetwork, DistanceNetwork, LDDTNetwork, MaskedTokenNetwork, PAENetwork, ) -from rf2aa.model.layers.Embeddings import ( +from modelhub.model.layers.Embeddings import ( Bond_emb, Extra_emb, MSA_emb, Templ_emb, recycling_factory, ) -from rf2aa.model.Track_module import IterativeSimulator -from rf2aa.tensor_util import assert_equal, assert_shape +from modelhub.model.Track_module import IterativeSimulator +from modelhub.tensor_util import assert_equal, assert_shape logger = logging.getLogger(__name__) @@ -310,8 +310,8 @@ class LegacyRoseTTAFoldModule(nn.Module): assert_that(mask_t.device).is_equal_to(device) assert_that(same_chain.device).is_equal_to(device) - # is_sm = rf2aa.util.is_atom(seq[0]) # (L) (prot + non-protein) - is_prot = rf2aa.util.is_protein(seq[0]) # (L) (protein)() + # is_sm = modelhub.util.is_atom(seq[0]) # (L) (prot + non-protein) + is_prot = modelhub.util.is_protein(seq[0]) # (L) (protein)() if self.verbose_checks: # ic(is_motif.shape) # is_protein_motif = is_motif & ~is_sm @@ -532,10 +532,10 @@ class LegacyRoseTTAFoldModule(nn.Module): ic(pseq_0.shape) pseq_0 = pseq_0[0] ic( - f"motif sequence: {rf2aa.util.seq2chars(torch.argmax(pseq_0[is_motif], dim=-1).tolist())}" # rf2aa.chemical.seq2chars is not callable + f"motif sequence: {modelhub.util.seq2chars(torch.argmax(pseq_0[is_motif], dim=-1).tolist())}" # modelhub.chemical.seq2chars is not callable ) ic( - f"diffused sequence: {rf2aa.util.seq2chars(torch.argmax(pseq_0[~is_motif], dim=-1).tolist())}" + f"diffused sequence: {modelhub.util.seq2chars(torch.argmax(pseq_0[~is_motif], dim=-1).tolist())}" ) logits_pae = logits_pde = p_bind = None diff --git a/rf2aa/model/Track_module.py b/src/modelhub/model/Track_module.py similarity index 99% rename from rf2aa/model/Track_module.py rename to src/modelhub/model/Track_module.py index 9a26175..5afc971 100644 --- a/rf2aa/model/Track_module.py +++ b/src/modelhub/model/Track_module.py @@ -6,16 +6,16 @@ import torch.nn.functional as F import torch.utils.checkpoint as checkpoint from opt_einsum import contract as einsum -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.kinematics import Qs2Rs, normQ -from rf2aa.loss.loss import ( +from modelhub.chemical import ChemicalData as ChemData +from modelhub.kinematics import Qs2Rs, normQ +from modelhub.loss.loss import ( calc_chiral_grads, calc_lj_grads, ) -from rf2aa.model.layers.Attention_module import * -from rf2aa.model.layers.SE3_network import SE3TransformerWrapper -from rf2aa.util import is_atom, xyz_frame_from_rotation_mask -from rf2aa.util_module import * +from modelhub.model.layers.Attention_module import * +from modelhub.model.layers.SE3_network import SE3TransformerWrapper +from modelhub.util import is_atom, xyz_frame_from_rotation_mask +from modelhub.util_module import * # Components for three-track blocks # 1. MSA -> MSA update (biased attention. bias from pair & structure) diff --git a/rf2aa/model/af3_with_rollout.py b/src/modelhub/model/af3_with_rollout.py similarity index 98% rename from rf2aa/model/af3_with_rollout.py rename to src/modelhub/model/af3_with_rollout.py index 8e1c2dc..146cf34 100644 --- a/rf2aa/model/af3_with_rollout.py +++ b/src/modelhub/model/af3_with_rollout.py @@ -2,7 +2,7 @@ import torch import torch.nn as nn import torch.utils.checkpoint as checkpoint -from rf2aa.training.checkpoint import create_custom_forward +from modelhub.training.checkpoint import create_custom_forward class AF3_with_rollout(nn.Module): diff --git a/rf2aa/model/embedding_blocks.py b/src/modelhub/model/embedding_blocks.py similarity index 97% rename from rf2aa/model/embedding_blocks.py rename to src/modelhub/model/embedding_blocks.py index ef7b141..46febf8 100644 --- a/rf2aa/model/embedding_blocks.py +++ b/src/modelhub/model/embedding_blocks.py @@ -1,8 +1,8 @@ import torch import torch.nn as nn -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.model.layers.Embeddings import ( +from modelhub.chemical import ChemicalData as ChemData +from modelhub.model.layers.Embeddings import ( Bond_emb, Extra_emb, MSA_emb, @@ -59,9 +59,9 @@ class RF2_embedding(nn.Module): d_msa=d_msa, d_pair=d_pair, d_state=d_state ) self.recycling_type = block_params.recycling_type - assert self.recycling_type != "all", ( - "no backward compatibility to recycling state" - ) + assert ( + self.recycling_type != "all" + ), "no backward compatibility to recycling state" def _unpack_inputs(self, rf_inputs): msa_latent, msa_full, seq, idx, bond_feats, dist_matrix = ( diff --git a/rf2aa/model/generative_refinement.py b/src/modelhub/model/generative_refinement.py similarity index 97% rename from rf2aa/model/generative_refinement.py rename to src/modelhub/model/generative_refinement.py index 5bed0f0..7ecadc2 100644 --- a/rf2aa/model/generative_refinement.py +++ b/src/modelhub/model/generative_refinement.py @@ -1,19 +1,18 @@ -import dgl import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from opt_einsum import contract as einsum -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.flow_matching import data_utils as du -from rf2aa.model.AF3_structure import FourierEmbedding -from rf2aa.model.layers.Attention_module import FeedForwardLayer -from rf2aa.model.layers.SE3_network import ( +from modelhub.chemical import ChemicalData as ChemData +from modelhub.flow_matching import data_utils as du +from modelhub.model.AF3_structure import FourierEmbedding +from modelhub.model.layers.Attention_module import FeedForwardLayer +from modelhub.model.layers.SE3_network import ( SE3TransformerWrapper, ) -from rf2aa.util import is_atom, is_nucleic, is_protein -from rf2aa.util_module import init_lecun_normal +from modelhub.util import is_atom, is_nucleic, is_protein +from modelhub.util_module import init_lecun_normal def get_bondgraph(bonds, num_bonds, dist_matrix, idx, is_prot, is_na, is_atom): @@ -89,6 +88,8 @@ def make_atom_graph( with max_nbonds_connect=2, ~11 atoms are brought in by bonds alone with max_nbonds_connect=1, ~4 atoms are brought in by bonds alone """ + import dgl + B, L, A = xyz.shape[:3] device = xyz.device D = torch.norm(xyz[:, None, None, :, :] - xyz[:, :, :, None, None], dim=-1) diff --git a/rf2aa/model/layers/Attention_module.py b/src/modelhub/model/layers/Attention_module.py similarity index 99% rename from rf2aa/model/layers/Attention_module.py rename to src/modelhub/model/layers/Attention_module.py index de7687a..26e67e4 100644 --- a/rf2aa/model/layers/Attention_module.py +++ b/src/modelhub/model/layers/Attention_module.py @@ -7,8 +7,8 @@ from deepspeed.ops.deepspeed4science import DS4Sci_EvoformerAttention from einops import rearrange from opt_einsum import contract as einsum -from rf2aa.training.checkpoint import activation_checkpointing -from rf2aa.util_module import init_lecun_normal +from modelhub.training.checkpoint import activation_checkpointing +from modelhub.util_module import init_lecun_normal class FeedForwardLayer(nn.Module): @@ -586,6 +586,9 @@ class TriangleAttention(nn.Module): self.use_deepspeed_evo = use_deepspeed_evo + if not torch.cuda.is_available(): + self.use_deepspeed_evo = False + self.reset_parameter() def reset_parameter(self): diff --git a/rf2aa/model/layers/AuxiliaryPredictor.py b/src/modelhub/model/layers/AuxiliaryPredictor.py similarity index 98% rename from rf2aa/model/layers/AuxiliaryPredictor.py rename to src/modelhub/model/layers/AuxiliaryPredictor.py index 462b252..15ea394 100644 --- a/rf2aa/model/layers/AuxiliaryPredictor.py +++ b/src/modelhub/model/layers/AuxiliaryPredictor.py @@ -1,7 +1,7 @@ import torch import torch.nn as nn -from rf2aa.chemical import ChemicalData as ChemData +from modelhub.chemical import ChemicalData as ChemData class DistanceNetwork(nn.Module): diff --git a/rf2aa/model/layers/Embeddings.py b/src/modelhub/model/layers/Embeddings.py similarity index 99% rename from rf2aa/model/layers/Embeddings.py rename to src/modelhub/model/layers/Embeddings.py index b92d0c5..6296768 100644 --- a/rf2aa/model/layers/Embeddings.py +++ b/src/modelhub/model/layers/Embeddings.py @@ -3,13 +3,13 @@ import torch.nn as nn import torch.nn.functional as F import torch.utils.checkpoint as checkpoint -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.model.layers.Attention_module import ( +from modelhub.chemical import ChemicalData as ChemData +from modelhub.model.layers.Attention_module import ( Attention, ) -from rf2aa.model.Track_module import PairStr2Pair, PositionalEncoding2D -from rf2aa.util import * -from rf2aa.util_module import ( +from modelhub.model.Track_module import PairStr2Pair, PositionalEncoding2D +from modelhub.util import * +from modelhub.util_module import ( create_custom_forward, init_lecun_normal, rbf, diff --git a/rf2aa/model/layers/SE3_network.py b/src/modelhub/model/layers/SE3_network.py similarity index 96% rename from rf2aa/model/layers/SE3_network.py rename to src/modelhub/model/layers/SE3_network.py index 38a2b6f..02aa418 100644 --- a/rf2aa/model/layers/SE3_network.py +++ b/src/modelhub/model/layers/SE3_network.py @@ -3,16 +3,16 @@ import inspect import torch import torch.nn as nn -from rf2aa.loss.loss import calc_chiral_grads -from rf2aa.model.layers.Attention_module import FeedForwardLayer -from rf2aa.model.layers.resnet import SCPred -from rf2aa.SE3Transformer.se3_transformer.model import SE3Transformer -from rf2aa.SE3Transformer.se3_transformer.model.fiber import Fiber +from modelhub.loss.loss import calc_chiral_grads +from modelhub.model.layers.Attention_module import FeedForwardLayer +from modelhub.model.layers.resnet import SCPred +from modelhub.SE3Transformer.se3_transformer.model import SE3Transformer +from modelhub.SE3Transformer.se3_transformer.model.fiber import Fiber # script_dir = os.path.dirname(os.path.realpath(__file__))+'/' # sys.path.insert(0,script_dir+'SE3Transformer') -from rf2aa.util import xyz_frame_from_rotation_mask -from rf2aa.util_module import ( +from modelhub.util import xyz_frame_from_rotation_mask +from modelhub.util_module import ( get_seqsep_protein_sm, init_lecun_normal, make_full_graph, @@ -21,7 +21,7 @@ from rf2aa.util_module import ( se3_transformer_path = inspect.getfile(SE3Transformer) se3_fiber_path = inspect.getfile(Fiber) -assert "rf2aa" in se3_transformer_path +assert "modelhub" in se3_transformer_path class SE3TransformerWrapper(nn.Module): diff --git a/rf2aa/model/layers/af3_auxiliary_heads.py b/src/modelhub/model/layers/af3_auxiliary_heads.py similarity index 92% rename from rf2aa/model/layers/af3_auxiliary_heads.py rename to src/modelhub/model/layers/af3_auxiliary_heads.py index db3a49e..5e6674c 100644 --- a/rf2aa/model/layers/af3_auxiliary_heads.py +++ b/src/modelhub/model/layers/af3_auxiliary_heads.py @@ -2,9 +2,11 @@ import torch import torch.nn as nn import torch.nn.functional as F -import rf2aa -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.model.AF3_structure import PairformerBlock, linearNoBias +import modelhub +from modelhub.model.AF3_structure import PairformerBlock, linearNoBias + +# TODO: Get from RF2AA encoding instead +CHEM_DATA_LEGACY = {"NHEAVY": 23, "aa2num": {"UNK": 20, "GLY": 7, "MAS": 21}} def discretize_distance_matrix( @@ -58,16 +60,18 @@ class ConfidenceHead(nn.Module): self.pairformer = nn.ModuleList( [ - PairformerBlock(c_s=c_s, c_z=c_z, **pairformer) + PairformerBlock(c_s=c_s, c_z=c_z, use_deepspeed_evo=False, **pairformer) for _ in range(n_pairformer_layers) ] ) self.predict_pae = linearNoBias(c_z, n_bins_pae) self.predict_pde = linearNoBias(c_z, n_bins_pde) - self.predict_plddt = linearNoBias(c_s, ChemData().NHEAVY * n_bins_plddt) + self.predict_plddt = linearNoBias( + c_s, CHEM_DATA_LEGACY["NHEAVY"] * n_bins_plddt + ) self.predict_exp_resolved = linearNoBias( - c_s, ChemData().NHEAVY * n_bins_exp_resolved + c_s, CHEM_DATA_LEGACY["NHEAVY"] * n_bins_exp_resolved ) self.use_Cb_distances = use_Cb_distances if self.use_Cb_distances: @@ -113,9 +117,6 @@ class ConfidenceHead(nn.Module): Z_trunk_II = F.layer_norm(Z_trunk_II, normalized_shape=(Z_trunk_II.shape)) S_inputs_I = F.layer_norm(S_inputs_I, normalized_shape=(S_inputs_I.shape)) - # for debugging, make pair zero - # Z_trunk_II = torch.zeros_like(Z_trunk_II, dtype=Z_trunk_II.dtype) - # embed S_inputs_I twice S_inputs_I_right = self.process_s_inputs_right(S_inputs_I) S_inputs_I_left = self.process_s_inputs_left(S_inputs_I) @@ -178,7 +179,6 @@ class ConfidenceHead(nn.Module): S_trunk_residual_I = S_trunk_I.clone() Z_trunk_residual_II = Z_trunk_II.clone() for n in range(len(self.pairformer)): - # S_trunk_I, Z_trunk_II = checkpoint.checkpoint(self.pairformer[n], S_trunk_I, Z_trunk_II, use_reentrant=False) S_trunk_I, Z_trunk_II = self.pairformer[n](S_trunk_I, Z_trunk_II) # despite doing so in their pseudocode, af3's published code does not add the residual back @@ -232,11 +232,11 @@ def calc_Cb_distances(X_pred_L, seq, rep_atoms, frame_atom_idxs): Cb = X_pred_L.index_select(1, rep_atoms) is_valid_Cb = ( - (seq != ChemData().aa2num["UNK"]) - & (seq != ChemData().aa2num["GLY"]) - & (seq != ChemData().aa2num["MAS"]) + (seq != CHEM_DATA_LEGACY.aa2num["UNK"]) + & (seq != CHEM_DATA_LEGACY.aa2num["GLY"]) + & (seq != CHEM_DATA_LEGACY.aa2num["MAS"]) ) - is_valid_Cb = is_valid_Cb & rf2aa.util.is_protein(seq) + is_valid_Cb = is_valid_Cb & modelhub.util.is_protein(seq) b = Ca - N c = C - Ca diff --git a/rf2aa/model/layers/af3_diffusion_transformer.py b/src/modelhub/model/layers/af3_diffusion_transformer.py similarity index 97% rename from rf2aa/model/layers/af3_diffusion_transformer.py rename to src/modelhub/model/layers/af3_diffusion_transformer.py index a2e106b..73baa27 100644 --- a/rf2aa/model/layers/af3_diffusion_transformer.py +++ b/src/modelhub/model/layers/af3_diffusion_transformer.py @@ -3,15 +3,16 @@ import torch import torch.nn as nn from deepspeed.ops.deepspeed4science import DS4Sci_EvoformerAttention -from rf2aa.model.layers.layer_utils import ( +from modelhub.model.layers.layer_utils import ( AdaLN, LinearBiasInit, MultiDimLinear, collapse, linearNoBias, ) -from rf2aa.training.checkpoint import activation_checkpointing -from rf2aa.loss.loss import calc_chiral_grads_flat_impl +from modelhub.training.checkpoint import activation_checkpointing +from modelhub.utils.torch_utils import device_of +from modelhub.loss.loss import calc_chiral_grads_flat_impl class AtomAttentionEncoderDiffusion(nn.Module): def __init__( @@ -151,7 +152,9 @@ class AtomAttentionEncoderDiffusion(nn.Module): if self.broadcast_trunk_feats_on_1dim_old: P_LL = P_LL + self.process_z(Z_II)[..., tok_idx, tok_idx, :] else: - P_LL = P_LL + self.process_z(Z_II)[..., tok_idx, :, :][..., tok_idx, :] + P_LL = ( + P_LL + self.process_z(Z_II)[..., tok_idx, :, :][..., tok_idx, :] + ) # Add the noisy positions. Q_L = self.process_r(R_L) + Q_L @@ -278,7 +281,9 @@ class DiffusionTransformerBlock(nn.Module): Z_II, # [..., I, I, C_tokenpair] Beta_II, # [I, I] ): - with torch.amp.autocast("cuda", enabled=True, dtype=torch.bfloat16): + with torch.amp.autocast( + device_type=device_of(self).type, enabled=True, dtype=torch.bfloat16 + ): if self.no_residual_connection_between_attention_and_transition: B_I = self.attention_pair_bias(A_I, S_I, Z_II, Beta_II) A_I = A_I + B_I + self.conditioned_transition_block(A_I, S_I) diff --git a/rf2aa/model/layers/layer_utils.py b/src/modelhub/model/layers/layer_utils.py similarity index 98% rename from rf2aa/model/layers/layer_utils.py rename to src/modelhub/model/layers/layer_utils.py index b3135d6..216ba1b 100644 --- a/rf2aa/model/layers/layer_utils.py +++ b/src/modelhub/model/layers/layer_utils.py @@ -5,7 +5,7 @@ import torch import torch.nn as nn from torch.nn.functional import silu -from rf2aa.training.checkpoint import activation_checkpointing +from modelhub.training.checkpoint import activation_checkpointing linearNoBias = partial(torch.nn.Linear, bias=False) diff --git a/rf2aa/model/layers/outer_product.py b/src/modelhub/model/layers/outer_product.py similarity index 94% rename from rf2aa/model/layers/outer_product.py rename to src/modelhub/model/layers/outer_product.py index 7d05bdc..b22df2a 100644 --- a/rf2aa/model/layers/outer_product.py +++ b/src/modelhub/model/layers/outer_product.py @@ -1,8 +1,8 @@ import torch import torch.nn as nn -from rf2aa.training.checkpoint import activation_checkpointing -from rf2aa.util_module import init_lecun_normal +from modelhub.training.checkpoint import activation_checkpointing +from modelhub.util_module import init_lecun_normal class OuterProductMean(nn.Module): diff --git a/rf2aa/model/layers/pairformer_layers.py b/src/modelhub/model/layers/pairformer_layers.py similarity index 94% rename from rf2aa/model/layers/pairformer_layers.py rename to src/modelhub/model/layers/pairformer_layers.py index 7c2484b..0392ca0 100644 --- a/rf2aa/model/layers/pairformer_layers.py +++ b/src/modelhub/model/layers/pairformer_layers.py @@ -3,24 +3,25 @@ from deepspeed.ops.deepspeed4science import DS4Sci_EvoformerAttention from torch import nn from torch.nn.functional import one_hot, relu -from rf2aa.model.AF3_blocks import MsaPairWeightedAverage, MsaSubsampleEmbedder -from rf2aa.model.layers.af3_diffusion_transformer import AtomTransformer -from rf2aa.model.layers.Attention_module import ( +from modelhub.model.AF3_blocks import MsaPairWeightedAverage, MsaSubsampleEmbedder +from modelhub.model.layers.af3_diffusion_transformer import AtomTransformer +from modelhub.model.layers.Attention_module import ( TriangleAttention, TriangleMultiplication, ) -from rf2aa.model.layers.layer_utils import ( +from modelhub.model.layers.layer_utils import ( MultiDimLinear, Transition, collapse, create_batch_dimension_if_not_present, linearNoBias, ) -from rf2aa.model.layers.outer_product import ( +from modelhub.model.layers.outer_product import ( OuterProductMean_AF3, ) # need to code this correctly -from rf2aa.training.checkpoint import activation_checkpointing -from rf2aa.util_module import Dropout +from modelhub.training.checkpoint import activation_checkpointing +from modelhub.util_module import Dropout +from modelhub.utils.torch_utils import device_of class AtomAttentionEncoderPairformer(nn.Module): @@ -95,21 +96,19 @@ class AtomAttentionEncoderPairformer(nn.Module): assert S_trunk_I is None assert Z_II is None - # ...get the number of atoms + # ... get the number of atoms and tokens tok_idx = f["atom_to_token_map"] L = len(tok_idx) # N_atom - I = tok_idx.max() + 1 # N_token # noqa + I = tok_idx.max() + 1 # N_token - # ...flatten the last two dimensions (the letter dimension and the one-hot encoding of the unicode character dimension) + # ... flatten the last two dimensions of ref_atom_name_chars + # (the letter dimension and the one-hot encoding of the unicode character dimension) f["ref_atom_name_chars"] = f["ref_atom_name_chars"].reshape( L, -1 ) # [L, 4, 64] -> [L, 256], where L = N_atom - # Atom single conditioning: Embed per-atom meta data - - # Now, we have the single conditioning (C_L) for each atom. We will: - # 1. Use C_L to initialize the pair atom representation - # 2. Pass C_L as a skip connection to the diffusion module + # Atom single conditioning (C_L): Linearly embed concatenated per-atom features + # (e.g., ref_pos, ref_charge, ref_mask, ref_element, ref_atom_name_chars) C_L = self.process_input_features( torch.cat( tuple( @@ -120,6 +119,10 @@ class AtomAttentionEncoderPairformer(nn.Module): ) ) # [L, C_atom] + # Now, we have the single conditioning (C_L) for each atom. We will: + # 1. Use C_L to initialize the pair atom representation + # 2. Pass C_L as a skip connection to the diffusion module + # Embed offsets between atom reference positions # ref_pos is of shape [L, 3], so ref_pos.unsqueeze(-2) is of shape [L, 1, 3] and ref_pos.unsqueeze(-3) is of shape [1, L, 3] # We then take the outer difference between these two tensors to get a tensor of shape [L, L, 3] (via broadcasting both to shape [L, L, 3], and then taking the difference) @@ -312,6 +315,7 @@ class PairformerBlock(nn.Module): triangle_attention, attention_pair_bias, n_transition=4, + use_deepspeed_evo=True, ): super().__init__() @@ -325,10 +329,16 @@ class PairformerBlock(nn.Module): c_z, **triangle_multiplication, outgoing=False, bias=False ) self.tri_attn_start = TriangleAttention( - c_z, **triangle_attention, start_node=True + c_z, + **triangle_attention, + start_node=True, + use_deepspeed_evo=use_deepspeed_evo, ) self.tri_attn_end = TriangleAttention( - c_z, **triangle_attention, start_node=False + c_z, + **triangle_attention, + start_node=False, + use_deepspeed_evo=use_deepspeed_evo, ) self.z_transition = Transition(c=c_z, n=n_transition) @@ -346,7 +356,9 @@ class PairformerBlock(nn.Module): @activation_checkpointing def forward(self, S_I, Z_II): - with torch.amp.autocast("cuda", enabled=True, dtype=torch.bfloat16): + with torch.amp.autocast( + device_type=device_of(self).type, enabled=True, dtype=torch.bfloat16 + ): Z_II = Z_II + self.drop_row( self.maybe_make_batched(self.tri_mul_outgoing)(Z_II) ) @@ -561,7 +573,9 @@ class MSAModule(nn.Module): S_inputs_I, ): msa = f["msa"] - with torch.amp.autocast("cuda", enabled=True, dtype=torch.bfloat16): + with torch.amp.autocast( + device_type=device_of(self).type, enabled=True, dtype=torch.bfloat16 + ): msa_SI = self.msa_subsampler(msa, S_inputs_I) for i in range(self.n_block): @@ -643,7 +657,9 @@ class TemplateEmbedder(nn.Module): template_restype, asym_id, ): - with torch.amp.autocast("cuda", enabled=True, dtype=torch.bfloat16): + with torch.amp.autocast( + device_type=device_of(self).type, enabled=True, dtype=torch.bfloat16 + ): I = Z_II.shape[0] template_frame_mask = ( template_backbone_frame_mask[:, None] diff --git a/rf2aa/model/layers/resnet.py b/src/modelhub/model/layers/resnet.py similarity index 97% rename from rf2aa/model/layers/resnet.py rename to src/modelhub/model/layers/resnet.py index 64fe740..8f6bd51 100644 --- a/rf2aa/model/layers/resnet.py +++ b/src/modelhub/model/layers/resnet.py @@ -1,8 +1,8 @@ import torch.nn as nn import torch.nn.functional as F -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.util_module import init_lecun_normal +from modelhub.chemical import ChemicalData as ChemData +from modelhub.util_module import init_lecun_normal # pre-activation bottleneck resblock diff --git a/rf2aa/model/layers/structure_bias.py b/src/modelhub/model/layers/structure_bias.py similarity index 96% rename from rf2aa/model/layers/structure_bias.py rename to src/modelhub/model/layers/structure_bias.py index 0b6edff..76b1749 100644 --- a/rf2aa/model/layers/structure_bias.py +++ b/src/modelhub/model/layers/structure_bias.py @@ -2,7 +2,7 @@ import torch import torch.nn as nn from opt_einsum import contract as einsum -from rf2aa.util_module import init_lecun_normal, rbf +from modelhub.util_module import init_lecun_normal, rbf class StructureBias(torch.nn.Module): diff --git a/rf2aa/model/network.py b/src/modelhub/model/network.py similarity index 92% rename from rf2aa/model/network.py rename to src/modelhub/model/network.py index aecc100..f6e94aa 100644 --- a/rf2aa/model/network.py +++ b/src/modelhub/model/network.py @@ -1,11 +1,11 @@ import hydra import torch.nn as nn -from rf2aa.model.embedding_blocks import embedding_factory -from rf2aa.model.layers.AuxiliaryPredictor import aux_predictor_factory -from rf2aa.model.refinement_blocks import refinement_factory -from rf2aa.model.simulator_blocks import block_factory -from rf2aa.util import is_atom +from modelhub.model.embedding_blocks import embedding_factory +from modelhub.model.layers.AuxiliaryPredictor import aux_predictor_factory +from modelhub.model.refinement_blocks import refinement_factory +from modelhub.model.simulator_blocks import block_factory +from modelhub.util import is_atom class RosettaFold(nn.Module): @@ -27,7 +27,7 @@ class RosettaFold(nn.Module): for block in model_params.blocks.keys(): if block not in block_factory: raise ValueError( - f"User specified {block} type, but this block is not registered in rf2aa.Trunk_blocks." + f"User specified {block} type, but this block is not registered in modelhub.Trunk_blocks." ) blocks_to_add = [ block_factory[block]( diff --git a/rf2aa/model/refinement_blocks.py b/src/modelhub/model/refinement_blocks.py similarity index 97% rename from rf2aa/model/refinement_blocks.py rename to src/modelhub/model/refinement_blocks.py index 3f05a66..0396284 100644 --- a/rf2aa/model/refinement_blocks.py +++ b/src/modelhub/model/refinement_blocks.py @@ -1,13 +1,13 @@ import torch import torch.nn as nn -from rf2aa.loss.loss import calc_chiral_grads -from rf2aa.model.generative_refinement import GenerativeRefinement -from rf2aa.model.layers.SE3_network import ( +from modelhub.loss.loss import calc_chiral_grads +from modelhub.model.generative_refinement import GenerativeRefinement +from modelhub.model.layers.SE3_network import ( FullyConnectedSE3, ) -from rf2aa.model.Track_module import Str2Str -from rf2aa.util_module import init_lecun_normal, make_full_graph, make_topk_graph +from modelhub.model.Track_module import Str2Str +from modelhub.util_module import init_lecun_normal, make_full_graph, make_topk_graph class LocalRefinementSE3(FullyConnectedSE3): diff --git a/rf2aa/model/simulator_blocks.py b/src/modelhub/model/simulator_blocks.py similarity index 97% rename from rf2aa/model/simulator_blocks.py rename to src/modelhub/model/simulator_blocks.py index f7902ab..750b08d 100644 --- a/rf2aa/model/simulator_blocks.py +++ b/src/modelhub/model/simulator_blocks.py @@ -4,8 +4,8 @@ import torch import torch.nn as nn import torch.utils.checkpoint as checkpoint -from rf2aa.model.AF3_blocks import AF3_block, AF3_full_block -from rf2aa.model.layers.Attention_module import ( +from modelhub.model.AF3_blocks import AF3_block, AF3_full_block +from modelhub.model.layers.Attention_module import ( BiasedAxialAttention, BiasedUntiedAxialAttention, FeedForwardLayer, @@ -16,13 +16,13 @@ from rf2aa.model.layers.Attention_module import ( OldMSAColGlobalAttention, TriangleMultiplication, ) -from rf2aa.model.layers.outer_product import ( +from modelhub.model.layers.outer_product import ( OuterProductMean, ) # need to code this correctly -from rf2aa.model.layers.SE3_network import FullyConnectedSE3, FullyConnectedSE3_noR -from rf2aa.model.layers.structure_bias import structure_bias_factory -from rf2aa.training.checkpoint import create_custom_forward -from rf2aa.util_module import Dropout +from modelhub.model.layers.SE3_network import FullyConnectedSE3, FullyConnectedSE3_noR +from modelhub.model.layers.structure_bias import structure_bias_factory +from modelhub.training.checkpoint import create_custom_forward +from modelhub.util_module import Dropout class RF2_block(nn.Module): diff --git a/rf2aa/pymol.py b/src/modelhub/pymol.py similarity index 99% rename from rf2aa/pymol.py rename to src/modelhub/pymol.py index 5d86b39..c0abcba 100644 --- a/rf2aa/pymol.py +++ b/src/modelhub/pymol.py @@ -1,4 +1,3 @@ - import xmlrpc.client as xmlrpclib diff --git a/rf2aa/pymol_tools.py b/src/modelhub/pymol_tools.py similarity index 95% rename from rf2aa/pymol_tools.py rename to src/modelhub/pymol_tools.py index ee2179d..3bb6bdb 100644 --- a/rf2aa/pymol_tools.py +++ b/src/modelhub/pymol_tools.py @@ -2,8 +2,8 @@ import os import torch -from rf2aa.pymol import cmd -from rf2aa.util import writepdb +from modelhub.pymol import cmd +from modelhub.util import writepdb def clear(): diff --git a/src/modelhub/resolvers.py b/src/modelhub/resolvers.py new file mode 100644 index 0000000..c1a68bc --- /dev/null +++ b/src/modelhub/resolvers.py @@ -0,0 +1,27 @@ +"""Resolvers for Hydra configuration files.""" + +import importlib +from beartype.typing import Any + + +def resolve_import(module_path: str, attribute_path: str = None) -> Any: + """ + Import a module and access a specific attribute from it. + + Args: + module_path (str): The path to the module. + attribute_path (str): The path to the attribute within the module. + + Returns: + The imported attribute. + """ + module = importlib.import_module(module_path) + if attribute_path is not None: + # Split the attribute path to navigate through nested attributes + attributes = attribute_path.split(".") + attr = module + for attr_name in attributes: + attr = getattr(attr, attr_name) + return attr + else: + return module diff --git a/rf2aa/scoring.py b/src/modelhub/scoring.py similarity index 99% rename from rf2aa/scoring.py rename to src/modelhub/scoring.py index 2e530d9..c374393 100644 --- a/rf2aa/scoring.py +++ b/src/modelhub/scoring.py @@ -89,8 +89,8 @@ type2ljlk = { } # cartbonded -with open(script_dir + "cartbonded.json", "r") as j: - cartbonded_data_raw = json.loads(j.read()) +# with open(script_dir + "cartbonded.json", "r") as j: +# cartbonded_data_raw = json.loads(j.read()) # hbond donor/acceptors diff --git a/rf2aa/set_seed.py b/src/modelhub/set_seed.py similarity index 100% rename from rf2aa/set_seed.py rename to src/modelhub/set_seed.py diff --git a/rf2aa/symmetry.py b/src/modelhub/symmetry.py similarity index 100% rename from rf2aa/symmetry.py rename to src/modelhub/symmetry.py diff --git a/rf2aa/tensor_util.py b/src/modelhub/tensor_util.py similarity index 100% rename from rf2aa/tensor_util.py rename to src/modelhub/tensor_util.py diff --git a/rf2aa/test_pickles/config/legacy_train_regression.pt b/src/modelhub/test_pickles/config/legacy_train_regression.pt similarity index 100% rename from rf2aa/test_pickles/config/legacy_train_regression.pt rename to src/modelhub/test_pickles/config/legacy_train_regression.pt diff --git a/rf2aa/test_pickles/config/rf2_deep_layerdropout_regression.pt b/src/modelhub/test_pickles/config/rf2_deep_layerdropout_regression.pt similarity index 100% rename from rf2aa/test_pickles/config/rf2_deep_layerdropout_regression.pt rename to src/modelhub/test_pickles/config/rf2_deep_layerdropout_regression.pt diff --git a/rf2aa/test_pickles/config/rf2aa_legacy_refinement_regression.pt b/src/modelhub/test_pickles/config/rf2aa_legacy_refinement_regression.pt similarity index 100% rename from rf2aa/test_pickles/config/rf2aa_legacy_refinement_regression.pt rename to src/modelhub/test_pickles/config/rf2aa_legacy_refinement_regression.pt diff --git a/rf2aa/test_pickles/config/rf2aa_regression.pt b/src/modelhub/test_pickles/config/rf2aa_regression.pt similarity index 100% rename from rf2aa/test_pickles/config/rf2aa_regression.pt rename to src/modelhub/test_pickles/config/rf2aa_regression.pt diff --git a/rf2aa/test_pickles/config/rf_with_gradients_regression.pt b/src/modelhub/test_pickles/config/rf_with_gradients_regression.pt similarity index 100% rename from rf2aa/test_pickles/config/rf_with_gradients_regression.pt rename to src/modelhub/test_pickles/config/rf_with_gradients_regression.pt diff --git a/rf2aa/test_pickles/config/untied_p2p_regression.pt b/src/modelhub/test_pickles/config/untied_p2p_regression.pt similarity index 100% rename from rf2aa/test_pickles/config/untied_p2p_regression.pt rename to src/modelhub/test_pickles/config/untied_p2p_regression.pt diff --git a/rf2aa/test_pickles/data/compl_regression.pt b/src/modelhub/test_pickles/data/compl_regression.pt similarity index 100% rename from rf2aa/test_pickles/data/compl_regression.pt rename to src/modelhub/test_pickles/data/compl_regression.pt diff --git a/rf2aa/test_pickles/data/na_compl_regression.pt b/src/modelhub/test_pickles/data/na_compl_regression.pt similarity index 100% rename from rf2aa/test_pickles/data/na_compl_regression.pt rename to src/modelhub/test_pickles/data/na_compl_regression.pt diff --git a/rf2aa/test_pickles/data/pdb_regression.pt b/src/modelhub/test_pickles/data/pdb_regression.pt similarity index 100% rename from rf2aa/test_pickles/data/pdb_regression.pt rename to src/modelhub/test_pickles/data/pdb_regression.pt diff --git a/rf2aa/test_pickles/data/rna_regression.pt b/src/modelhub/test_pickles/data/rna_regression.pt similarity index 100% rename from rf2aa/test_pickles/data/rna_regression.pt rename to src/modelhub/test_pickles/data/rna_regression.pt diff --git a/rf2aa/test_pickles/data/sm_compl_covale_regression.pt b/src/modelhub/test_pickles/data/sm_compl_covale_regression.pt similarity index 100% rename from rf2aa/test_pickles/data/sm_compl_covale_regression.pt rename to src/modelhub/test_pickles/data/sm_compl_covale_regression.pt diff --git a/rf2aa/test_pickles/data/sm_compl_regression.pt b/src/modelhub/test_pickles/data/sm_compl_regression.pt similarity index 100% rename from rf2aa/test_pickles/data/sm_compl_regression.pt rename to src/modelhub/test_pickles/data/sm_compl_regression.pt diff --git a/rf2aa/test_pickles/loss/rf2aa_na_compl_loss_regression.pt b/src/modelhub/test_pickles/loss/rf2aa_na_compl_loss_regression.pt similarity index 100% rename from rf2aa/test_pickles/loss/rf2aa_na_compl_loss_regression.pt rename to src/modelhub/test_pickles/loss/rf2aa_na_compl_loss_regression.pt diff --git a/rf2aa/test_pickles/loss/rf2aa_pdb_loss_regression.pt b/src/modelhub/test_pickles/loss/rf2aa_pdb_loss_regression.pt similarity index 100% rename from rf2aa/test_pickles/loss/rf2aa_pdb_loss_regression.pt rename to src/modelhub/test_pickles/loss/rf2aa_pdb_loss_regression.pt diff --git a/rf2aa/test_pickles/loss/rf2aa_rna_loss_regression.pt b/src/modelhub/test_pickles/loss/rf2aa_rna_loss_regression.pt similarity index 100% rename from rf2aa/test_pickles/loss/rf2aa_rna_loss_regression.pt rename to src/modelhub/test_pickles/loss/rf2aa_rna_loss_regression.pt diff --git a/rf2aa/test_pickles/loss/rf2aa_sm_compl_covale_loss_regression.pt b/src/modelhub/test_pickles/loss/rf2aa_sm_compl_covale_loss_regression.pt similarity index 100% rename from rf2aa/test_pickles/loss/rf2aa_sm_compl_covale_loss_regression.pt rename to src/modelhub/test_pickles/loss/rf2aa_sm_compl_covale_loss_regression.pt diff --git a/rf2aa/test_pickles/loss/rf2aa_sm_compl_loss_regression.pt b/src/modelhub/test_pickles/loss/rf2aa_sm_compl_loss_regression.pt similarity index 100% rename from rf2aa/test_pickles/loss/rf2aa_sm_compl_loss_regression.pt rename to src/modelhub/test_pickles/loss/rf2aa_sm_compl_loss_regression.pt diff --git a/rf2aa/test_pickles/model/legacy_train_na_compl_regression.pt b/src/modelhub/test_pickles/model/legacy_train_na_compl_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/legacy_train_na_compl_regression.pt rename to src/modelhub/test_pickles/model/legacy_train_na_compl_regression.pt diff --git a/rf2aa/test_pickles/model/legacy_train_pdb_regression.pt b/src/modelhub/test_pickles/model/legacy_train_pdb_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/legacy_train_pdb_regression.pt rename to src/modelhub/test_pickles/model/legacy_train_pdb_regression.pt diff --git a/rf2aa/test_pickles/model/legacy_train_rna_regression.pt b/src/modelhub/test_pickles/model/legacy_train_rna_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/legacy_train_rna_regression.pt rename to src/modelhub/test_pickles/model/legacy_train_rna_regression.pt diff --git a/rf2aa/test_pickles/model/legacy_train_sm_compl_covale_regression.pt b/src/modelhub/test_pickles/model/legacy_train_sm_compl_covale_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/legacy_train_sm_compl_covale_regression.pt rename to src/modelhub/test_pickles/model/legacy_train_sm_compl_covale_regression.pt diff --git a/rf2aa/test_pickles/model/legacy_train_sm_compl_regression.pt b/src/modelhub/test_pickles/model/legacy_train_sm_compl_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/legacy_train_sm_compl_regression.pt rename to src/modelhub/test_pickles/model/legacy_train_sm_compl_regression.pt diff --git a/rf2aa/test_pickles/model/rf2_deep_layerdropout_na_compl_regression.pt b/src/modelhub/test_pickles/model/rf2_deep_layerdropout_na_compl_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/rf2_deep_layerdropout_na_compl_regression.pt rename to src/modelhub/test_pickles/model/rf2_deep_layerdropout_na_compl_regression.pt diff --git a/rf2aa/test_pickles/model/rf2_deep_layerdropout_pdb_regression.pt b/src/modelhub/test_pickles/model/rf2_deep_layerdropout_pdb_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/rf2_deep_layerdropout_pdb_regression.pt rename to src/modelhub/test_pickles/model/rf2_deep_layerdropout_pdb_regression.pt diff --git a/rf2aa/test_pickles/model/rf2_deep_layerdropout_rna_regression.pt b/src/modelhub/test_pickles/model/rf2_deep_layerdropout_rna_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/rf2_deep_layerdropout_rna_regression.pt rename to src/modelhub/test_pickles/model/rf2_deep_layerdropout_rna_regression.pt diff --git a/rf2aa/test_pickles/model/rf2_deep_layerdropout_sm_compl_covale_regression.pt b/src/modelhub/test_pickles/model/rf2_deep_layerdropout_sm_compl_covale_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/rf2_deep_layerdropout_sm_compl_covale_regression.pt rename to src/modelhub/test_pickles/model/rf2_deep_layerdropout_sm_compl_covale_regression.pt diff --git a/rf2aa/test_pickles/model/rf2_deep_layerdropout_sm_compl_regression.pt b/src/modelhub/test_pickles/model/rf2_deep_layerdropout_sm_compl_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/rf2_deep_layerdropout_sm_compl_regression.pt rename to src/modelhub/test_pickles/model/rf2_deep_layerdropout_sm_compl_regression.pt diff --git a/rf2aa/test_pickles/model/rf2aa_na_compl_regression.pt b/src/modelhub/test_pickles/model/rf2aa_na_compl_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/rf2aa_na_compl_regression.pt rename to src/modelhub/test_pickles/model/rf2aa_na_compl_regression.pt diff --git a/rf2aa/test_pickles/model/rf2aa_pdb_regression.pt b/src/modelhub/test_pickles/model/rf2aa_pdb_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/rf2aa_pdb_regression.pt rename to src/modelhub/test_pickles/model/rf2aa_pdb_regression.pt diff --git a/rf2aa/test_pickles/model/rf2aa_rna_regression.pt b/src/modelhub/test_pickles/model/rf2aa_rna_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/rf2aa_rna_regression.pt rename to src/modelhub/test_pickles/model/rf2aa_rna_regression.pt diff --git a/rf2aa/test_pickles/model/rf2aa_sm_compl_covale_regression.pt b/src/modelhub/test_pickles/model/rf2aa_sm_compl_covale_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/rf2aa_sm_compl_covale_regression.pt rename to src/modelhub/test_pickles/model/rf2aa_sm_compl_covale_regression.pt diff --git a/rf2aa/test_pickles/model/rf2aa_sm_compl_regression.pt b/src/modelhub/test_pickles/model/rf2aa_sm_compl_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/rf2aa_sm_compl_regression.pt rename to src/modelhub/test_pickles/model/rf2aa_sm_compl_regression.pt diff --git a/rf2aa/test_pickles/model/rf_with_gradients_na_compl_regression.pt b/src/modelhub/test_pickles/model/rf_with_gradients_na_compl_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/rf_with_gradients_na_compl_regression.pt rename to src/modelhub/test_pickles/model/rf_with_gradients_na_compl_regression.pt diff --git a/rf2aa/test_pickles/model/rf_with_gradients_pdb_regression.pt b/src/modelhub/test_pickles/model/rf_with_gradients_pdb_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/rf_with_gradients_pdb_regression.pt rename to src/modelhub/test_pickles/model/rf_with_gradients_pdb_regression.pt diff --git a/rf2aa/test_pickles/model/rf_with_gradients_rna_regression.pt b/src/modelhub/test_pickles/model/rf_with_gradients_rna_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/rf_with_gradients_rna_regression.pt rename to src/modelhub/test_pickles/model/rf_with_gradients_rna_regression.pt diff --git a/rf2aa/test_pickles/model/rf_with_gradients_sm_compl_covale_regression.pt b/src/modelhub/test_pickles/model/rf_with_gradients_sm_compl_covale_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/rf_with_gradients_sm_compl_covale_regression.pt rename to src/modelhub/test_pickles/model/rf_with_gradients_sm_compl_covale_regression.pt diff --git a/rf2aa/test_pickles/model/rf_with_gradients_sm_compl_regression.pt b/src/modelhub/test_pickles/model/rf_with_gradients_sm_compl_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/rf_with_gradients_sm_compl_regression.pt rename to src/modelhub/test_pickles/model/rf_with_gradients_sm_compl_regression.pt diff --git a/rf2aa/test_pickles/model/untied_p2p_na_compl_regression.pt b/src/modelhub/test_pickles/model/untied_p2p_na_compl_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/untied_p2p_na_compl_regression.pt rename to src/modelhub/test_pickles/model/untied_p2p_na_compl_regression.pt diff --git a/rf2aa/test_pickles/model/untied_p2p_pdb_regression.pt b/src/modelhub/test_pickles/model/untied_p2p_pdb_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/untied_p2p_pdb_regression.pt rename to src/modelhub/test_pickles/model/untied_p2p_pdb_regression.pt diff --git a/rf2aa/test_pickles/model/untied_p2p_rna_regression.pt b/src/modelhub/test_pickles/model/untied_p2p_rna_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/untied_p2p_rna_regression.pt rename to src/modelhub/test_pickles/model/untied_p2p_rna_regression.pt diff --git a/rf2aa/test_pickles/model/untied_p2p_sm_compl_covale_regression.pt b/src/modelhub/test_pickles/model/untied_p2p_sm_compl_covale_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/untied_p2p_sm_compl_covale_regression.pt rename to src/modelhub/test_pickles/model/untied_p2p_sm_compl_covale_regression.pt diff --git a/rf2aa/test_pickles/model/untied_p2p_sm_compl_regression.pt b/src/modelhub/test_pickles/model/untied_p2p_sm_compl_regression.pt similarity index 100% rename from rf2aa/test_pickles/model/untied_p2p_sm_compl_regression.pt rename to src/modelhub/test_pickles/model/untied_p2p_sm_compl_regression.pt diff --git a/rf2aa/tests/test_AF3_blocks.py b/src/modelhub/tests/test_AF3_blocks.py similarity index 93% rename from rf2aa/tests/test_AF3_blocks.py rename to src/modelhub/tests/test_AF3_blocks.py index 349cddf..476e29a 100644 --- a/rf2aa/tests/test_AF3_blocks.py +++ b/src/modelhub/tests/test_AF3_blocks.py @@ -1,6 +1,6 @@ import torch -from rf2aa.model.AF3_blocks import MsaSubsampleEmbedder +from modelhub.model.AF3_blocks import MsaSubsampleEmbedder def test_msa_module(): @@ -54,12 +54,12 @@ def test_msa_weighting_einsum(): def test_batching_pairformer(): - from rf2aa.model.AF3_structure import PairformerBlock, PairformerBlock_batched + from modelhub.model.AF3_structure import PairformerBlock, PairformerBlock_batched B, I, cs, cz = 1, 100, 128, 128 S_I = torch.randn(I, cs) Z_II = torch.randn(I, I, cz) - from rf2aa.tests.test_conditions import make_deterministic, seed_all + from modelhub.tests.test_conditions import make_deterministic, seed_all seed_all() pairformer = PairformerBlock( diff --git a/rf2aa/tests/test_af3_losses_datahub.py b/src/modelhub/tests/test_af3_losses_datahub.py similarity index 98% rename from rf2aa/tests/test_af3_losses_datahub.py rename to src/modelhub/tests/test_af3_losses_datahub.py index 10c7425..3a05d58 100644 --- a/rf2aa/tests/test_af3_losses_datahub.py +++ b/src/modelhub/tests/test_af3_losses_datahub.py @@ -10,8 +10,8 @@ from tests.datasets.conftest import ( from torch.utils.data import DataLoader, Subset from tqdm import tqdm -from rf2aa.alignment import weighted_rigid_align -from rf2aa.loss.af3_losses import DiffusionLoss +from modelhub.alignment import weighted_rigid_align +from modelhub.loss.af3_losses import DiffusionLoss @pytest.mark.parametrize("pdb_dataset", [AF3_PDB_DATASET]) diff --git a/rf2aa/tests/test_align.py b/src/modelhub/tests/test_align.py similarity index 89% rename from rf2aa/tests/test_align.py rename to src/modelhub/tests/test_align.py index c9fb50a..d91c299 100644 --- a/rf2aa/tests/test_align.py +++ b/src/modelhub/tests/test_align.py @@ -1,8 +1,8 @@ import torch from icecream import ic -from rf2aa.alignment import get_rmsd, weighted_rigid_align -from rf2aa.util import kabsch +from modelhub.alignment import get_rmsd, weighted_rigid_align +from modelhub.util import kabsch def pseudobatched_kabsch(xyz1, xyz2): diff --git a/rf2aa/tests/test_benchmark.py b/src/modelhub/tests/test_benchmark.py similarity index 90% rename from rf2aa/tests/test_benchmark.py rename to src/modelhub/tests/test_benchmark.py index ce96b5d..5b10ecb 100644 --- a/rf2aa/tests/test_benchmark.py +++ b/src/modelhub/tests/test_benchmark.py @@ -7,19 +7,19 @@ warnings.filterwarnings("ignore") from functools import partial -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.chemical import initialize_chemdata -from rf2aa.data.compose_dataset import compose_single_item_dataset -from rf2aa.data.dataloader_adaptor import get_loss_calc_items, prepare_input -from rf2aa.loss.loss_factory import get_loss_and_misc -from rf2aa.set_seed import seed_all -from rf2aa.tests.test_conditions import ( +from modelhub.chemical import ChemicalData as ChemData +from modelhub.chemical import initialize_chemdata +from modelhub.data.compose_dataset import compose_single_item_dataset +from modelhub.data.dataloader_adaptor import get_loss_calc_items, prepare_input +from modelhub.loss.loss_factory import get_loss_and_misc +from modelhub.set_seed import seed_all +from modelhub.tests.test_conditions import ( make_deterministic, setup_benchmark_array, setup_data, ) -from rf2aa.training.recycling import recycle_step_packed -from rf2aa.util_module import XYZConverter +from modelhub.training.recycling import recycle_step_packed +from modelhub.util_module import XYZConverter # goal is to test all the configs on a broad set of datasets gpu = "cuda:0" if torch.cuda.is_available() else "cpu" diff --git a/rf2aa/tests/test_conditions.py b/src/modelhub/tests/test_conditions.py similarity index 97% rename from rf2aa/tests/test_conditions.py rename to src/modelhub/tests/test_conditions.py index 2bfa31a..2e7bba9 100644 --- a/rf2aa/tests/test_conditions.py +++ b/src/modelhub/tests/test_conditions.py @@ -6,20 +6,20 @@ import pandas as pd import torch from hydra import compose, initialize -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.data.compose_dataset import set_data_loader_params -from rf2aa.data.data_loader import ( +from modelhub.chemical import ChemicalData as ChemData +from modelhub.data.compose_dataset import set_data_loader_params +from modelhub.data.data_loader import ( loader_complex, loader_dna_rna, loader_na_complex, loader_pdb, ) -from rf2aa.data.loaders.rcsb_loader import ( +from modelhub.data.loaders.rcsb_loader import ( loader_sm_compl_assembly, loader_sm_compl_assembly_single, ) -from rf2aa.set_seed import seed_all -from rf2aa.trainer_new import trainer_factory +from modelhub.set_seed import seed_all +from modelhub.trainer_new import trainer_factory # examples used in testing pdb_item = { diff --git a/rf2aa/tests/test_configs.py b/src/modelhub/tests/test_configs.py similarity index 89% rename from rf2aa/tests/test_configs.py rename to src/modelhub/tests/test_configs.py index c079f83..b7ee743 100644 --- a/rf2aa/tests/test_configs.py +++ b/src/modelhub/tests/test_configs.py @@ -4,7 +4,7 @@ import pytest import torch from hydra import compose, initialize -from rf2aa.tests.test_conditions import config_pickle_path, configs +from modelhub.tests.test_conditions import config_pickle_path, configs @pytest.mark.parametrize("config_name", configs) diff --git a/rf2aa/tests/test_crop.py b/src/modelhub/tests/test_crop.py similarity index 97% rename from rf2aa/tests/test_crop.py rename to src/modelhub/tests/test_crop.py index 8e73c6d..b108212 100644 --- a/rf2aa/tests/test_crop.py +++ b/src/modelhub/tests/test_crop.py @@ -1,13 +1,13 @@ import numpy as np import torch -from rf2aa.data.loaders.crop import ( +from modelhub.data.loaders.crop import ( contiguous_crop_index, get_preferred_chain_or_interface, radial_crop_index, select_preferred_token, ) -from rf2aa.util import get_protein_bond_feats +from modelhub.util import get_protein_bond_feats merged_outs = { "ch_letters_poly": ["A", "B", "C"], diff --git a/rf2aa/tests/test_crop_chirals.py b/src/modelhub/tests/test_crop_chirals.py similarity index 93% rename from rf2aa/tests/test_crop_chirals.py rename to src/modelhub/tests/test_crop_chirals.py index 21b2edc..e0c3cf5 100644 --- a/rf2aa/tests/test_crop_chirals.py +++ b/src/modelhub/tests/test_crop_chirals.py @@ -3,8 +3,8 @@ from itertools import product import pytest import torch -from rf2aa.data.chain_crop import crop_chirals -from rf2aa.tensor_util import assert_equal +from modelhub.data.chain_crop import crop_chirals +from modelhub.tensor_util import assert_equal CHIRALS = [ torch.zeros((0, 5)), diff --git a/rf2aa/tests/test_dataset.py b/src/modelhub/tests/test_dataset.py similarity index 93% rename from rf2aa/tests/test_dataset.py rename to src/modelhub/tests/test_dataset.py index 0927987..aa94c32 100644 --- a/rf2aa/tests/test_dataset.py +++ b/src/modelhub/tests/test_dataset.py @@ -5,16 +5,16 @@ from functools import partial import pytest import torch -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.chemical import initialize_chemdata -from rf2aa.data.compose_dataset import ( +from modelhub.chemical import ChemicalData as ChemData +from modelhub.chemical import initialize_chemdata +from modelhub.data.compose_dataset import ( compose_single_item_dataset, ) -from rf2aa.data.data_loader import get_crop -from rf2aa.set_seed import seed_all -from rf2aa.tensor_util import assert_shape -from rf2aa.tests.test_conditions import dataset_pickle_path, setup_data -from rf2aa.util import is_atom +from modelhub.data.data_loader import get_crop +from modelhub.set_seed import seed_all +from modelhub.tensor_util import assert_shape +from modelhub.tests.test_conditions import dataset_pickle_path, setup_data +from modelhub.util import is_atom data = setup_data() diff --git a/rf2aa/tests/test_diffusion_module.py b/src/modelhub/tests/test_diffusion_module.py similarity index 96% rename from rf2aa/tests/test_diffusion_module.py rename to src/modelhub/tests/test_diffusion_module.py index 1e5e43c..d698688 100644 --- a/rf2aa/tests/test_diffusion_module.py +++ b/src/modelhub/tests/test_diffusion_module.py @@ -8,9 +8,9 @@ from pl_bolts.callbacks.verification.batch_gradient import default_input_mapping from pl_bolts.utils import BatchGradientVerification from torch.nn.functional import one_hot -from rf2aa.debug import pretty_describe_dict -from rf2aa.model.AF3_structure import AtomTransformer, DiffusionModule -from rf2aa.tensor_util import assert_cmp +from modelhub.debug import pretty_describe_dict +from modelhub.model.AF3_structure import AtomTransformer, DiffusionModule +from modelhub.tensor_util import assert_cmp def test_batch_leakage(): diff --git a/rf2aa/tests/test_dropout.py b/src/modelhub/tests/test_dropout.py similarity index 92% rename from rf2aa/tests/test_dropout.py rename to src/modelhub/tests/test_dropout.py index c1a77b3..319375f 100644 --- a/rf2aa/tests/test_dropout.py +++ b/src/modelhub/tests/test_dropout.py @@ -1,6 +1,6 @@ import torch -from rf2aa.util_module import Dropout +from modelhub.util_module import Dropout def test_dropout(): diff --git a/rf2aa/tests/test_flow_matching.py b/src/modelhub/tests/test_flow_matching.py similarity index 86% rename from rf2aa/tests/test_flow_matching.py rename to src/modelhub/tests/test_flow_matching.py index 3bb11b8..2fd93cd 100644 --- a/rf2aa/tests/test_flow_matching.py +++ b/src/modelhub/tests/test_flow_matching.py @@ -5,20 +5,20 @@ import pytest import torch from omegaconf import OmegaConf -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.chemical import initialize_chemdata -from rf2aa.data.dataloader_adaptor import prepare_input_fm -from rf2aa.flow_matching.data_transforms import ( +from modelhub.chemical import ChemicalData as ChemData +from modelhub.chemical import initialize_chemdata +from modelhub.data.dataloader_adaptor import prepare_input_fm +from modelhub.flow_matching.data_transforms import ( center_chain_backbone, convert_dataloader_inputs_to_rigids, rigids_to_xyz, ) -from rf2aa.flow_matching.sampler import Sampler -from rf2aa.loss.loss import calc_crd_rmsd -from rf2aa.tensor_util import assert_equal -from rf2aa.tests.test_conditions import dataset_pickle_path, datasets, model_pickle_path -from rf2aa.training.recycling import unpack_outputs -from rf2aa.util_module import XYZConverter +from modelhub.flow_matching.sampler import Sampler +from modelhub.loss.loss import calc_crd_rmsd +from modelhub.tensor_util import assert_equal +from modelhub.tests.test_conditions import dataset_pickle_path, datasets, model_pickle_path +from modelhub.training.recycling import unpack_outputs +from modelhub.util_module import XYZConverter def mock_model_outputs(dataset): @@ -149,9 +149,9 @@ def test_sampler(dataset): inputs = torch.load(dataset_pickle, map_location=sampler.device) outputs = mock_model_outputs(dataset) with patch( - "rf2aa.flow_matching.sampler.recycle_step_packed", return_value=outputs + "modelhub.flow_matching.sampler.recycle_step_packed", return_value=outputs ) as recycling_fn: - with patch("rf2aa.flow_matching.sampler.Sampler._take_step") as euler_step: + with patch("modelhub.flow_matching.sampler.Sampler._take_step") as euler_step: sampler.sample(inputs) recycling_fn.assert_called_once() diff --git a/rf2aa/tests/test_inference_pipelines.py b/src/modelhub/tests/test_inference_pipelines.py similarity index 63% rename from rf2aa/tests/test_inference_pipelines.py rename to src/modelhub/tests/test_inference_pipelines.py index 2936aac..4552886 100644 --- a/rf2aa/tests/test_inference_pipelines.py +++ b/src/modelhub/tests/test_inference_pipelines.py @@ -4,7 +4,7 @@ from pathlib import Path import pytest from cifutils import parse -from rf2aa.inference.inference import _build_file_paths_for_prediction +from modelhub.utils.inference import build_file_paths_for_prediction current_file_directory = Path(__file__).parent @@ -12,22 +12,20 @@ current_file_directory = Path(__file__).parent @pytest.mark.parametrize( "file_path", [ - "data/example_from_ccd.cif", "data/nested_examples", - "data/example_from_sdf.json", - "data/example_from_smiles.cif", "data/multiple_examples_from_json.json", ], ) def test_build_file_paths_for_prediction(file_path: PathLike, tmp_path: Path): """Use the inference pipeline to build and parse inputs for prediction.""" - file_path = [current_file_directory / Path(file_path)] + file_path = current_file_directory / Path(file_path) # Call the function with the file path and temporary directory - paths = _build_file_paths_for_prediction(file_path, tmp_path) + paths = build_file_paths_for_prediction(file_path, tmp_path) - # Iterate over the returned paths and parse them + # Iterate over the returned paths and parse them, ensuring the the outputs are reasonable for path in paths: output = parse(path) assert output is not None assert len(output["assemblies"]["1"][0]) > 0 + diff --git a/rf2aa/tests/test_loss.py b/src/modelhub/tests/test_loss.py similarity index 91% rename from rf2aa/tests/test_loss.py rename to src/modelhub/tests/test_loss.py index a745906..944334e 100644 --- a/rf2aa/tests/test_loss.py +++ b/src/modelhub/tests/test_loss.py @@ -5,15 +5,15 @@ import pytest import torch from omegaconf import OmegaConf -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.tests.test_conditions import ( +from modelhub.chemical import ChemicalData as ChemData +from modelhub.tests.test_conditions import ( config_pickle_path, dataset_pickle_path, loss_pickle_path, make_deterministic, random_param_init, ) -from rf2aa.trainer_new import trainer_factory +from modelhub.trainer_new import trainer_factory test_conditions = list( itertools.product( @@ -106,12 +106,12 @@ def test_loss_functions(dataset, model): def test_smooth_lddt_loss(dataset): device = "cuda:0" if torch.cuda.is_available() else "cpu" - import rf2aa - from rf2aa.tests.test_conditions import pdb_item + import modelhub + from modelhub.tests.test_conditions import pdb_item - dataset_inputs = rf2aa.data.loaders.spoofing.spoofed_loader(pdb_item, {}) + dataset_inputs = modelhub.data.loaders.spoofing.spoofed_loader(pdb_item, {}) - from rf2aa.data.dataloader_adaptor_af3 import prepare_input_af3 + from modelhub.data.dataloader_adaptor_af3 import prepare_input_af3 D = 1 # diffusion batch s_trans = 1 # std dev of random translation diff --git a/rf2aa/tests/test_metrics.py b/src/modelhub/tests/test_metrics.py similarity index 86% rename from rf2aa/tests/test_metrics.py rename to src/modelhub/tests/test_metrics.py index 4fe1333..f75453e 100644 --- a/rf2aa/tests/test_metrics.py +++ b/src/modelhub/tests/test_metrics.py @@ -2,9 +2,9 @@ import pytest import torch from hydra import compose, initialize -from rf2aa.metrics.metrics_factory import MetricManager, metrics_factory -from rf2aa.tests.test_conditions import configs, model_pickle_path, setup_array -from rf2aa.tests.test_model import setup_test +from modelhub.metrics.metrics_factory import MetricManager, metrics_factory +from modelhub.tests.test_conditions import configs, model_pickle_path, setup_array +from modelhub.tests.test_model import setup_test test_conditions = setup_array( ["pdb"], [config for config in configs if "legacy" not in config] diff --git a/rf2aa/tests/test_model.py b/src/modelhub/tests/test_model.py similarity index 96% rename from rf2aa/tests/test_model.py rename to src/modelhub/tests/test_model.py index 6c293d2..578c496 100644 --- a/rf2aa/tests/test_model.py +++ b/src/modelhub/tests/test_model.py @@ -6,9 +6,9 @@ import torch warnings.filterwarnings("ignore") -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.data.dataloader_adaptor import prepare_input -from rf2aa.tests.test_conditions import ( +from modelhub.chemical import ChemicalData as ChemData +from modelhub.data.dataloader_adaptor import prepare_input +from modelhub.tests.test_conditions import ( dataset_pickle_path, find_device, make_deterministic, @@ -16,9 +16,9 @@ from rf2aa.tests.test_conditions import ( random_param_init, setup_array, ) -from rf2aa.training.recycling import run_model_forward, run_model_forward_legacy -from rf2aa.util import Ls_from_same_chain_2d, is_atom -from rf2aa.util_module import XYZConverter +from modelhub.training.recycling import run_model_forward, run_model_forward_legacy +from modelhub.util import Ls_from_same_chain_2d, is_atom +from modelhub.util_module import XYZConverter # goal is to test all the configs on a broad set of datasets diff --git a/rf2aa/tests/test_msa_featurize.py b/src/modelhub/tests/test_msa_featurize.py similarity index 96% rename from rf2aa/tests/test_msa_featurize.py rename to src/modelhub/tests/test_msa_featurize.py index 3baf5d9..e88da07 100644 --- a/rf2aa/tests/test_msa_featurize.py +++ b/src/modelhub/tests/test_msa_featurize.py @@ -5,14 +5,14 @@ import numpy as np import pytest import torch -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.chemical import initialize_chemdata -from rf2aa.data.data_loader import MSAFeaturize, cluster_sum, get_term_feats -from rf2aa.data.parsers import parse_a3m -from rf2aa.tensor_util import assert_equal -from rf2aa.tests.test_conditions import setup_data -from rf2aa.trainer_new import seed_all -from rf2aa.util import is_atom, is_nucleic, is_protein +from modelhub.chemical import ChemicalData as ChemData +from modelhub.chemical import initialize_chemdata +from modelhub.data.data_loader import MSAFeaturize, cluster_sum, get_term_feats +from modelhub.data.parsers import parse_a3m +from modelhub.tensor_util import assert_equal +from modelhub.tests.test_conditions import setup_data +from modelhub.trainer_new import seed_all +from modelhub.util import is_atom, is_nucleic, is_protein def OldMSAFeaturize( diff --git a/rf2aa/tests/test_msa_loading.py b/src/modelhub/tests/test_msa_loading.py similarity index 91% rename from rf2aa/tests/test_msa_loading.py rename to src/modelhub/tests/test_msa_loading.py index ede9012..2c6a58b 100644 --- a/rf2aa/tests/test_msa_loading.py +++ b/src/modelhub/tests/test_msa_loading.py @@ -3,11 +3,11 @@ from typing import Dict, List, Optional, Tuple import pytest import torch -from rf2aa.chemical import initialize_chemdata -from rf2aa.data.compose_dataset import default_dataloader_params -from rf2aa.data.data_loader import _load_df -from rf2aa.data.loaders.polymer_partners import load_multi_msa -from rf2aa.data.parsers import parse_a3m, parse_fasta +from modelhub.chemical import initialize_chemdata +from modelhub.data.compose_dataset import default_dataloader_params +from modelhub.data.data_loader import _load_df +from modelhub.data.loaders.polymer_partners import load_multi_msa +from modelhub.data.parsers import parse_a3m, parse_fasta initialize_chemdata() diff --git a/rf2aa/tests/test_msa_pairing.py b/src/modelhub/tests/test_msa_pairing.py similarity index 96% rename from rf2aa/tests/test_msa_pairing.py rename to src/modelhub/tests/test_msa_pairing.py index ebdc8a4..ceebfac 100644 --- a/rf2aa/tests/test_msa_pairing.py +++ b/src/modelhub/tests/test_msa_pairing.py @@ -2,8 +2,8 @@ import numpy as np import pytest import torch -from rf2aa.chemical import initialize_chemdata -from rf2aa.data.loaders.polymer_partners import join_msas_by_taxid +from modelhub.chemical import initialize_chemdata +from modelhub.data.loaders.polymer_partners import join_msas_by_taxid initialize_chemdata() diff --git a/rf2aa/tests/test_sampler.py b/src/modelhub/tests/test_sampler.py similarity index 95% rename from rf2aa/tests/test_sampler.py rename to src/modelhub/tests/test_sampler.py index db38a66..5f36fef 100644 --- a/rf2aa/tests/test_sampler.py +++ b/src/modelhub/tests/test_sampler.py @@ -4,8 +4,8 @@ import hydra import pytest from scipy.stats import pearsonr -from rf2aa.chemical import initialize_chemdata -from rf2aa.data.compose_dataset import compose_dataset +from modelhub.chemical import initialize_chemdata +from modelhub.data.compose_dataset import compose_dataset def get_sampler(config_name: str = "base", rank: int = 0, world_size: int = 1): diff --git a/rf2aa/tests/test_semantics.py b/src/modelhub/tests/test_semantics.py similarity index 93% rename from rf2aa/tests/test_semantics.py rename to src/modelhub/tests/test_semantics.py index 0421d71..498b9f6 100644 --- a/rf2aa/tests/test_semantics.py +++ b/src/modelhub/tests/test_semantics.py @@ -6,22 +6,20 @@ from functools import partial import pytest import torch -import rf2aa.cifutils as cifutils -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.chemical import initialize_chemdata -from rf2aa.data.compose_dataset import compose_single_item_dataset -from rf2aa.data.dataloader_adaptor import prepare_input -from rf2aa.tensor_util import assert_equal -from rf2aa.tests.test_conditions import ( +from modelhub.chemical import ChemicalData as ChemData +from modelhub.chemical import initialize_chemdata +from modelhub.data.compose_dataset import compose_single_item_dataset +from modelhub.data.dataloader_adaptor import prepare_input +from modelhub.tensor_util import assert_equal +from modelhub.tests.test_conditions import ( make_deterministic, random_param_init, setup_array, ) -from rf2aa.training.recycling import add_recycle_inputs, run_model_forward -from rf2aa.util import is_atom -from rf2aa.util_module import XYZConverter +from modelhub.training.recycling import add_recycle_inputs, run_model_forward +from modelhub.util import is_atom +from modelhub.util_module import XYZConverter -assert "rf2aa" in cifutils.__name__ test_conditions = setup_array(["sm_compl"], ["rf2aa", "rf_with_gradients"]) gpu = "cuda:0" if torch.cuda.is_available() else "cpu" diff --git a/rf2aa/tests/test_trainer.py b/src/modelhub/tests/test_trainer.py similarity index 95% rename from rf2aa/tests/test_trainer.py rename to src/modelhub/tests/test_trainer.py index 793fbff..0d77b35 100644 --- a/rf2aa/tests/test_trainer.py +++ b/src/modelhub/tests/test_trainer.py @@ -5,8 +5,8 @@ import pytest import torch from hydra import compose, initialize -from rf2aa.tests.test_conditions import configs -from rf2aa.trainer_new import trainer_factory +from modelhub.tests.test_conditions import configs +from modelhub.trainer_new import trainer_factory @pytest.mark.parametrize("config", configs) diff --git a/rf2aa/tests/test_write_confidence.py b/src/modelhub/tests/test_write_confidence.py similarity index 60% rename from rf2aa/tests/test_write_confidence.py rename to src/modelhub/tests/test_write_confidence.py index ea529a0..d6b6a81 100644 --- a/rf2aa/tests/test_write_confidence.py +++ b/src/modelhub/tests/test_write_confidence.py @@ -1,41 +1,27 @@ from functools import partial +import pytest import numpy as np import torch +from lightning.fabric import seed_everything -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.chemical import initialize_chemdata -from rf2aa.metrics.metric_utils import ( +from modelhub.chemical import ChemicalData as ChemData +from modelhub.chemical import initialize_chemdata +from modelhub.metrics.metric_utils import ( find_bin_midpoints, unbin_logits, ) -from rf2aa.metrics.predicted_error import WriteAF3Confidence -from rf2aa.set_seed import seed_all +from modelhub.utils.predicted_error import compile_af3_confidence_outputs +from omegaconf import DictConfig -class AttrDict(dict): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - for key, value in self.items(): - if isinstance(value, dict): - self[key] = AttrDict(value) - - def __getattr__(self, item): - if item in self: - return self[item] - raise AttributeError(f"'AttrDict' object has no attribute '{item}'") - - def __setattr__(self, key, value): - self[key] = value - - -def test_write_confidence(): +def test_compile_af3_confidence_outputs(): L = 100 init = partial(initialize_chemdata) init() - bins = 10 - seed_all(42) + # Spoofing the outputs from the model + seed_everything(42) outputs = { "confidence": { "rf2aa_seq": torch.randint(0, 21, (L,)), @@ -47,33 +33,42 @@ def test_write_confidence(): } is_real_atom = ChemData().heavyatom_mask[outputs["confidence"]["rf2aa_seq"]] outputs["confidence"]["is_real_atom"] = is_real_atom - data = AttrDict( - { - "plddt": { - "weight": 1.0, - "n_bins": 50, - "max_value": 1.0, - }, - "pae": { - "weight": 1.0, - "n_bins": 64, - "max_value": 32, - }, - "pde": { - "weight": 1.0, - "n_bins": 64, - "max_value": 32, - }, - } - ) - confidence_writer = WriteAF3Confidence(**data) - df = confidence_writer( - None, outputs, {"example_id": "test", "is_real_atom": is_real_atom} + + # Spoof the confidence loss Hydra configuration + cfg = DictConfig({ + "plddt": { + "weight": 1.0, + "n_bins": 50, + "max_value": 1.0, + }, + "pae": { + "weight": 1.0, + "n_bins": 64, + "max_value": 32, + }, + "pde": { + "weight": 1.0, + "n_bins": 64, + "max_value": 32, + }, + }) + + output = compile_af3_confidence_outputs( + plddt_logits=outputs["confidence"]["plddt_logits"], + pae_logits=outputs["confidence"]["pae_logits"], + pde_logits=outputs["confidence"]["pde_logits"], + chain_iid_token_lvl=outputs["confidence"]["chain_iid_token_lvl"], + is_real_atom=is_real_atom, + example_id="test", + confidence_loss_cfg=cfg ) + num_chains = len(np.unique(outputs["confidence"]["chain_iid_token_lvl"])) num_interfaces = num_chains * (num_chains - 1) // 2 num_batches = outputs["confidence"]["plddt_logits"].shape[0] + df = output["confidence_df"] + target_columns = [ "example_id", "chain_chainwise", @@ -103,7 +98,7 @@ def test_unbin_pae_logits(): init = partial(initialize_chemdata) init() - seed_all(42) + seed_everything(42) outputs = { "confidence": { "rf2aa_seq": torch.randint(0, 21, (L,)), @@ -113,25 +108,6 @@ def test_unbin_pae_logits(): "chain_iid_token_lvl": torch.randint(0, 10, (L,)).numpy(), } } - data = AttrDict( - { - "plddt": { - "weight": 1.0, - "n_bins": 50, - "max_value": 1.0, - }, - "pae": { - "weight": 1.0, - "n_bins": 64, - "max_value": 32, - }, - "pde": { - "weight": 1.0, - "n_bins": 64, - "max_value": 32, - }, - } - ) is_real_atom = ChemData().heavyatom_mask[outputs["confidence"]["rf2aa_seq"]] outputs["confidence"]["is_real_atom"] = is_real_atom @@ -152,7 +128,7 @@ def test_unbin_pde_logits(): init = partial(initialize_chemdata) init() - seed_all(42) + seed_everything(42) outputs = { "confidence": { "rf2aa_seq": torch.randint(0, 21, (L,)), @@ -162,25 +138,6 @@ def test_unbin_pde_logits(): "chain_iid_token_lvl": torch.randint(0, 10, (L,)).numpy(), } } - data = AttrDict( - { - "plddt": { - "weight": 1.0, - "n_bins": 50, - "max_value": 1.0, - }, - "pae": { - "weight": 1.0, - "n_bins": 64, - "max_value": 32, - }, - "pde": { - "weight": 1.0, - "n_bins": 64, - "max_value": 32, - }, - } - ) is_real_atom = ChemData().heavyatom_mask[outputs["confidence"]["rf2aa_seq"]] outputs["confidence"]["is_real_atom"] = is_real_atom @@ -190,7 +147,6 @@ def test_unbin_pde_logits(): num_bins=n_bins, ) - assert torch.allclose(torch.mean(pde_unbinned), pde, atol=1e-2) assert torch.allclose(torch.mean(pde_unbinned), torch.tensor(16.00), atol=1e-2) assert pde_unbinned.shape == (1, L, L) @@ -203,7 +159,7 @@ def test_unbin_plddt_logits(): init = partial(initialize_chemdata) init() - seed_all(42) + seed_everything(42) outputs = { "confidence": { "rf2aa_seq": torch.randint(0, 21, (L,)), @@ -213,25 +169,6 @@ def test_unbin_plddt_logits(): "chain_iid_token_lvl": torch.randint(0, 10, (L,)).numpy(), } } - data = AttrDict( - { - "plddt": { - "weight": 1.0, - "n_bins": 50, - "max_value": 1.0, - }, - "pae": { - "weight": 1.0, - "n_bins": 64, - "max_value": 32, - }, - "pde": { - "weight": 1.0, - "n_bins": 64, - "max_value": 32, - }, - } - ) is_real_atom = ChemData().heavyatom_mask[outputs["confidence"]["rf2aa_seq"]] outputs["confidence"]["is_real_atom"] = is_real_atom @@ -241,7 +178,6 @@ def test_unbin_plddt_logits(): n_bins, ) - assert torch.allclose(torch.mean(plddt_unbinned), plddt, atol=1e-2) assert plddt_unbinned.shape == (1, L, ChemData().NHEAVY) @@ -251,3 +187,6 @@ def test_bin_midpoints(): expected_bins = torch.linspace(0.25, 31.75, 64, device="cpu") pae_bins = find_bin_midpoints(max_distance, num_bins) assert torch.allclose(pae_bins, expected_bins) + +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/rf2aa/tools/config b/src/modelhub/tools/config similarity index 100% rename from rf2aa/tools/config rename to src/modelhub/tools/config diff --git a/rf2aa/tools/debug_item.py b/src/modelhub/tools/debug_item.py similarity index 95% rename from rf2aa/tools/debug_item.py rename to src/modelhub/tools/debug_item.py index eeed71c..74c9946 100644 --- a/rf2aa/tools/debug_item.py +++ b/src/modelhub/tools/debug_item.py @@ -4,17 +4,17 @@ import unittest import torch from hydra import compose, initialize -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.data.compose_dataset import ( +from modelhub.chemical import ChemicalData as ChemData +from modelhub.data.compose_dataset import ( compose_single_item_dataset, set_data_loader_params, ) -from rf2aa.data.dataloader_adaptor import prepare_input -from rf2aa.data.loaders.rcsb_loader import loader_sm_compl_assembly -from rf2aa.tensor_util import assert_shape -from rf2aa.trainer_new import trainer_factory -from rf2aa.training.recycling import recycle_step_legacy -from rf2aa.util import is_atom, writepdb +from modelhub.data.dataloader_adaptor import prepare_input +from modelhub.data.loaders.rcsb_loader import loader_sm_compl_assembly +from modelhub.tensor_util import assert_shape +from modelhub.trainer_new import trainer_factory +from modelhub.training.recycling import recycle_step_legacy +from modelhub.util import is_atom, writepdb #### Setup test case hyperparams @@ -124,7 +124,6 @@ def transfer_tensors_to_device(obj_list, device): def check_inputs(inputs): - pdb.set_trace() ( seq, diff --git a/rf2aa/tools/edit_lengths_for_removed_datasets.py b/src/modelhub/tools/edit_lengths_for_removed_datasets.py similarity index 98% rename from rf2aa/tools/edit_lengths_for_removed_datasets.py rename to src/modelhub/tools/edit_lengths_for_removed_datasets.py index 9cbbd0a..d09435b 100644 --- a/rf2aa/tools/edit_lengths_for_removed_datasets.py +++ b/src/modelhub/tools/edit_lengths_for_removed_datasets.py @@ -6,8 +6,8 @@ import numpy as np import pandas as pd import torch -from rf2aa.data.compose_dataset import set_data_loader_params -from rf2aa.data.data_loader import get_train_valid_set +from modelhub.data.compose_dataset import set_data_loader_params +from modelhub.data.data_loader import get_train_valid_set def _compute_name_of_row(row: pd.Series) -> str: diff --git a/rf2aa/tools/generate_sample_lengths.py b/src/modelhub/tools/generate_sample_lengths.py similarity index 98% rename from rf2aa/tools/generate_sample_lengths.py rename to src/modelhub/tools/generate_sample_lengths.py index 977411c..cd668df 100644 --- a/rf2aa/tools/generate_sample_lengths.py +++ b/src/modelhub/tools/generate_sample_lengths.py @@ -10,8 +10,8 @@ import torch from torch.utils import data from tqdm import tqdm -from rf2aa.chemical import load_pdb_ideal_sdf_strings -from rf2aa.data.data_loader import ( +from modelhub.chemical import load_pdb_ideal_sdf_strings +from modelhub.data.data_loader import ( DistilledDataset, default_dataloader_params, get_train_valid_set, diff --git a/rf2aa/tools/minimize_example.py b/src/modelhub/tools/minimize_example.py similarity index 89% rename from rf2aa/tools/minimize_example.py rename to src/modelhub/tools/minimize_example.py index adde4ca..b6137c7 100644 --- a/rf2aa/tools/minimize_example.py +++ b/src/modelhub/tools/minimize_example.py @@ -7,9 +7,9 @@ import torch warnings.filterwarnings("ignore") -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.data.compose_dataset import compose_single_item_dataset -from rf2aa.training.recycling import recycle_sampling +from modelhub.chemical import ChemicalData as ChemData +from modelhub.data.compose_dataset import compose_single_item_dataset +from modelhub.training.recycling import recycle_sampling # goal is to test all the configs on a broad set of datasets gpu = "cuda:0" if torch.cuda.is_available() else "cpu" @@ -75,7 +75,7 @@ def test_minimize_example(example, trainer): @hydra.main(config_path="../config/train", config_name="base") def main(config): - from rf2aa.trainer_new import trainer_factory + from modelhub.trainer_new import trainer_factory trainer = trainer_factory[config.experiment.trainer](config) example = data["pdb"] @@ -88,7 +88,7 @@ def main(config): if __name__ == "__main__": - from rf2aa.tests.test_conditions import setup_data + from modelhub.tests.test_conditions import setup_data data = setup_data() main() diff --git a/src/modelhub/train.py b/src/modelhub/train.py new file mode 100755 index 0000000..c10ded6 --- /dev/null +++ b/src/modelhub/train.py @@ -0,0 +1,181 @@ +#!/usr/bin/env -S /bin/sh -c '"$(dirname "$0")/../../scripts/shebang/modelhub_exec.sh" "$0" "$@"' + +import logging + +import hydra +import rootutils +from omegaconf import DictConfig +import os + +# Setup root dir and environment variables (more info: https://github.com/ashleve/rootutils) +# NOTE: Sets the `PROJECT_ROOT` environment variable to the root directory of the project (where `.project-root` is located) +rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) + +# If the user has set `PROJECT_PATH`, use it to build the config path; otherwise, fall back to `PROJECT_ROOT` +_config_path = os.path.join(os.environ.get("PROJECT_PATH", os.environ["PROJECT_ROOT"]), "configs") + +_spawning_process_logger = logging.getLogger(__name__) + + +@hydra.main(config_path=_config_path, config_name="train", version_base="1.3") +def train(cfg: DictConfig) -> None: + # ============================================================================== + # Import dependencies and resolve Hydra configuration + # ============================================================================== + + _spawning_process_logger.info("Importing dependencies...") + + # Lazy imports to make config generation fast + import torch + from lightning.fabric import seed_everything + from lightning.fabric.loggers import Logger + + # If training on DIGS L40, set precision of matrix multiplication to balance speed and accuracy + # Reference: https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision + torch.set_float32_matmul_precision("medium") + + from modelhub.callbacks.base import BaseCallback # noqa + from modelhub.utils.instantiators import instantiate_loggers, instantiate_callbacks # noqa + from modelhub.utils.logging import ( + print_config_tree, + log_hyperparameters_with_all_loggers, + ) # noqa + from modelhub.utils.ddp import RankedLogger # noqa + from modelhub.utils.ddp import is_rank_zero, set_accelerator_based_on_availability # noqa + from modelhub.utils.datasets import ( + recursively_instantiate_datasets_and_samplers, + assemble_distributed_loader, + subset_dataset_to_example_ids, + assemble_val_loader_dict, + ) # noqa + + set_accelerator_based_on_availability(cfg) + + ranked_logger = RankedLogger(__name__, rank_zero_only=True) + _spawning_process_logger.info("Completed dependency imports ...") + + # ... print the configuration tree (NOTE: Only prints for rank 0) + print_config_tree(cfg, resolve=True) + + # ============================================================================== + # Logging and Callback instantiation + # ============================================================================== + + # Reduce the logging level for all dataset and sampler loggers (unless rank 0) + # We will still see messages from Rank 0; they are identical, since all ranks load and sample from the same datasets + if not is_rank_zero(): + dataset_logger = logging.getLogger("datasets") + sampler_logger = logging.getLogger("datahub.samplers") + dataset_logger.setLevel(logging.WARNING) + sampler_logger.setLevel(logging.ERROR) + + # ... seed everything (NOTE: By setting `workers=True`, we ensure that the dataloaders are seeded as well) + # (`PL_GLOBAL_SEED` environment varaible will be passed to the spawned subprocessed; e.g., through `ddp_spawn` backend) + if cfg.get("seed"): + ranked_logger.info(f"Seeding everything with seed={cfg.seed}...") + seed_everything(cfg.seed, workers=True, verbose=True) + else: + ranked_logger.warning("No seed provided - Not seeding anything!") + + ranked_logger.info("Instantiating loggers...") + loggers: list[Logger] = instantiate_loggers(cfg.get("logger")) + + ranked_logger.info("Instantiating callbacks...") + callbacks: list[BaseCallback] = instantiate_callbacks(cfg.get("callbacks")) + + # ============================================================================== + # Trainer and model instantiation + # ============================================================================== + + # ... instantiate the trainer + ranked_logger.info("Instantiating trainer...") + trainer = hydra.utils.instantiate( + cfg.trainer, + loggers=loggers or None, + callbacks=callbacks or None, + _convert_="partial", + _recursive_=False, + ) + # (Store the Hydra configuration in the trainer state) + trainer.initialize_or_update_trainer_state({"train_cfg": cfg}) + + # ... spawn processes for distributed training + # (We spawn here, rather than within `fit`, so we can use Fabric's `init_module` to efficiently initialize the model on the appropriate device) + ranked_logger.info( + f"Spawning {trainer.fabric.world_size} processes from {trainer.fabric.global_rank}..." + ) + trainer.fabric.launch() + + # ... construct the model + trainer.construct_model() + + # ... construct the optimizer and schedule (which requires the model to be constructed) + trainer.construct_optimizer() + trainer.construct_scheduler() + + # ============================================================================== + # Dataset instantiation + # ============================================================================== + + # Number of examples per epoch (accross all GPUs) + # (We must sample this many indices from our sampler) + n_examples_per_epoch = cfg.trainer.n_examples_per_epoch + + # ... build the train dataset + assert ( + "train" in cfg.datasets and cfg.datasets.train + ), "No 'train' dataloader configuration provided! If only performing validation, use `validate.py` instead." + dataset_and_sampler = recursively_instantiate_datasets_and_samplers( + cfg.datasets.train + ) + train_dataset, train_sampler = ( + dataset_and_sampler["dataset"], + dataset_and_sampler["sampler"], + ) + + # ... compose the train loader + if "subset_to_example_ids" in cfg.datasets: + # Backdoor for debugging and overfitting: subset the dataset to a specific set of example IDs + train_dataset = subset_dataset_to_example_ids( + train_dataset, cfg.datasets.subset_to_example_ids + ) + train_sampler = None # Sampler is no longer valid, since we are using a subset of the dataset + + train_loader = assemble_distributed_loader( + dataset=train_dataset, + sampler=train_sampler, + rank=trainer.fabric.global_rank, + world_size=trainer.fabric.world_size, + n_examples_per_epoch=n_examples_per_epoch, + loader_cfg=cfg.dataloader["train"], + ) + + # ... compose the validation loader(s) + if "val" in cfg.datasets and cfg.datasets.val: + val_loaders = assemble_val_loader_dict( + cfg=cfg.datasets.val, + rank=trainer.fabric.global_rank, + world_size=trainer.fabric.world_size, + loader_cfg=cfg.dataloader["val"], + ) + else: + ranked_logger.warning("No validation datasets provided! Skipping validation...") + val_loaders = None + + ranked_logger.info("Logging hyperparameters...") + log_hyperparameters_with_all_loggers( + trainer=trainer, cfg=cfg, model=trainer.state["model"] + ) + + # ... train the model + ranked_logger.info("Training model...") + ckpt_path = cfg.ckpt_path if "ckpt_path" in cfg else None + trainer.fit( + train_loader=train_loader, + val_loaders=val_loaders, + ckpt_path=ckpt_path, + ) + + +if __name__ == "__main__": + train() diff --git a/rf2aa/trainer_base.py b/src/modelhub/trainer_base.py similarity index 80% rename from rf2aa/trainer_base.py rename to src/modelhub/trainer_base.py index 7ad60f3..0c38217 100644 --- a/rf2aa/trainer_base.py +++ b/src/modelhub/trainer_base.py @@ -3,10 +3,10 @@ import warnings import hydra -from rf2aa.experiments.af3_trainer import AF3Trainer, AF3TrainerRollout -from rf2aa.experiments.msa_module_trainer import MsaModuleTrainer -from rf2aa.set_seed import seed_all -from rf2aa.trainer_new import ComposedTrainer, FlowMatchingTrainer, LegacyTrainer +from modelhub.experiments.af3_trainer import AF3Trainer, AF3TrainerRollout +from modelhub.experiments.msa_module_trainer import MsaModuleTrainer +from modelhub.set_seed import seed_all +from modelhub.trainer_new import ComposedTrainer, FlowMatchingTrainer, LegacyTrainer warnings.filterwarnings("ignore") diff --git a/rf2aa/trainer_new.py b/src/modelhub/trainer_new.py similarity index 97% rename from rf2aa/trainer_new.py rename to src/modelhub/trainer_new.py index 3e1740d..095aff6 100644 --- a/rf2aa/trainer_new.py +++ b/src/modelhub/trainer_new.py @@ -18,26 +18,26 @@ from hydra.utils import instantiate from icecream import ic from torch.nn.parallel import DistributedDataParallel as DDP -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.chemical import initialize_chemdata -from rf2aa.debug import ( +from modelhub.chemical import ChemicalData as ChemData +from modelhub.chemical import initialize_chemdata +from modelhub.debug import ( debug_grads, ) -from rf2aa.flow_matching.interpolant import Interpolant -from rf2aa.flow_matching.sampler import AllAtomSampler -from rf2aa.loss.loss_factory import get_loss_and_misc -from rf2aa.model.network import RosettaFold -from rf2aa.model.RoseTTAFoldModel import LegacyRoseTTAFoldModule -from rf2aa.training.EMA import EMA, count_parameters -from rf2aa.training.optimizer import add_weight_decay -from rf2aa.training.recycling import ( +from modelhub.flow_matching.interpolant import Interpolant +from modelhub.flow_matching.sampler import AllAtomSampler +from modelhub.loss.loss_factory import get_loss_and_misc +from modelhub.model.network import RosettaFold +from modelhub.model.RoseTTAFoldModel import LegacyRoseTTAFoldModule +from modelhub.training.EMA import EMA, count_parameters +from modelhub.training.optimizer import add_weight_decay +from modelhub.training.recycling import ( recycle_sampling, recycle_step_gen, recycle_step_legacy, recycle_step_packed, ) -from rf2aa.training.scheduler import get_stepwise_decay_schedule_with_warmup -from rf2aa.util_module import XYZConverter +from modelhub.training.scheduler import get_stepwise_decay_schedule_with_warmup +from modelhub.util_module import XYZConverter logger = logging.getLogger(__name__) diff --git a/src/modelhub/trainers/af3.py b/src/modelhub/trainers/af3.py new file mode 100644 index 0000000..356be79 --- /dev/null +++ b/src/modelhub/trainers/af3.py @@ -0,0 +1,574 @@ +from modelhub.trainers.fabric import FabricTrainer +from beartype.typing import Any +from modelhub.utils.recycling import get_recycle_schedule +from lightning_utilities import apply_to_collection +import torch + +import hydra +from modelhub.utils.ddp import RankedLogger +from modelhub.utils.torch_utils import assert_no_nans, assert_same_shape +from einops import repeat +from modelhub.utils.ddp import RankedLogger +from modelhub.utils.predicted_error import ( + compute_batch_indices_with_lowest_predicted_error, +) +from modelhub.training.EMA import EMA +from modelhub.metrics.base import MetricManager +from modelhub.loss.af3_losses import Loss as AF3Loss +from modelhub.loss.af3_losses import ( + ResidueSymmetryResolution, + SubunitSymmetryResolution, +) +from omegaconf import DictConfig +from jaxtyping import Float, Int +from modelhub.utils.io import build_stack_from_atom_array_and_batched_coords + +ranked_logger = RankedLogger(__name__, rank_zero_only=True) + + +def _remap_outputs( + xyz: Float[torch.Tensor, "D L 3"], mapping: Int[torch.Tensor, "D L"] +) -> Float[torch.Tensor, "D L 3"]: + """Helper function to remap outputs using a mapping tensor.""" + for i in range(xyz.shape[0]): + xyz[i, mapping[i]] = xyz[i].clone() + return xyz + + +class AF3Trainer(FabricTrainer): + """Standard Trainer for AF3-style models""" + + def __init__( + self, + *, + n_recycles_train: int | None = None, + loss: DictConfig | dict | None = None, + metrics: DictConfig | dict | None = None, + **kwargs, + ): + """See `FabricTrainer` for the additional initialization arguments. + + Args: + n_recycles_train: Maximum number of recycles (per-batch), for models that support recycling. During training, the model will be recycled a + random number of times between 1 and `n_recycles_train`. During inference, we determine the number of recycles from the MSA stack shape. However, + for training, we must sample the number of recycles upfront, so all GPUs within a distributed batch can sample the same number of recycles. + loss: Configuration for the loss function. If None, the loss function will not be instantiated. + metrics: Configuration for the metrics. If None, the metrics will not be instantiated. + """ + super().__init__(**kwargs) + + # (Initialize recycle schedule upfront so all GPU's can sample the same number of recycles within a batch) + self.n_recycles_train = n_recycles_train + self.recycle_schedule = get_recycle_schedule( + max_cycle=n_recycles_train, + n_epochs=self.max_epochs, # Set by FabricTrainer + n_train=self.n_examples_per_epoch, # Set by FabricTrainer + world_size=self.fabric.world_size, + ) # [n_epochs, n_examples_per_epoch // world_size] + + # Metrics + # (We could have instantiated loss and metrics recursively, but we prioritize being explicit) + self.metrics = ( + MetricManager.instantiate_from_hydra(metrics_cfg=metrics) + if metrics + else None + ) + + # Loss + self.loss = AF3Loss(**loss) if loss else None + + # (Symmetry resolution) + self.subunit_symm_resolve = SubunitSymmetryResolution() + self.residue_symm_resolve = ResidueSymmetryResolution() + + def construct_model(self): + """Construct the model and optionally wrap with EMA.""" + # ... instantiate model with Hydra and Fabric + with self.fabric.init_module(): + ranked_logger.info("Instantiating model...") + + model = hydra.utils.instantiate( + self.state["train_cfg"].model.net, + _recursive_=False, + ) + + # Optionally, wrap the model with EMA + if self.state["train_cfg"].model.ema is not None: + ranked_logger.info("Wrapping model with EMA...") + model = EMA(model, **self.state["train_cfg"].model.ema) + + self.initialize_or_update_trainer_state({"model": model}) + + def _assemble_network_inputs(self, example: dict) -> dict: + """Assemble and validate the network inputs.""" + assert_same_shape(example["coord_atom_lvl_to_be_noised"], example["noise"]) + network_input = { + "X_noisy_L": example["coord_atom_lvl_to_be_noised"] + example["noise"], + "t": example["t"], + "f": example["feats"], + } + + try: + assert_no_nans( + network_input["X_noisy_L"], + msg=f"network_input (X_noisy_L) for example_id: {example['example_id']}", + ) + except AssertionError as e: + if self.state["model"].training: + # In some cases, we may indeed have NaNs in the the noisy coordinates; we can safely replace them with zeros, + # and begin noising of those coordinates (which will not have their loss computed) from the origin. + # Such a situation could occur if there was a chain in the crop with no resolved residues (but that contained resolved + # residues outside the crop); we then would not be able to resolve the missing coordinates to their "closest resolved neighbor" + # within the same chain. + network_input["X_noisy_L"] = torch.nan_to_num( + network_input["X_noisy_L"] + ) + ranked_logger.warning(str(e)) + else: + # During validation, since we do not crop, there should be no NaN's in the coordinates to noise + # (They were either removed, as is done with fully unresolved chains, or resolved accoring to our pipeline's rules) + raise e + + assert_no_nans( + network_input["f"], + msg=f"NaN detected in `feats` for example_id: {example['example_id']}", + ) + + # Force-cast some features to blfloat16 for mixed precision training + # TODO: Use Fabric's AMP instead + for x in [ + "msa_stack", + "profile", + "template_distogram", + "template_restype", + "template_unit_vector", + ]: + if x in network_input["f"]: + network_input["f"][x] = network_input["f"][x].to(torch.bfloat16) + + return network_input + + def _assemble_loss_extra_info(self, example: dict) -> dict: + """Assembles metadata arguments to the loss function (incremental to the network inputs and outputs).""" + # ... reshape + diffusion_batch_size = example["coord_atom_lvl_to_be_noised"].shape[0] + X_gt_L = repeat( + example["ground_truth"]["coord_atom_lvl"], + "l c -> d l c", + d=diffusion_batch_size, + ) # [L, 3] -> [D, L, 3] with broadcasting + crd_mask_L = repeat( + example["ground_truth"]["mask_atom_lvl"], + "l -> d l", + d=diffusion_batch_size, + ) # [L] -> [D, L] with broadcasting + + return { + "X_gt_L": X_gt_L, # [D, L, 3] + "crd_mask_L": crd_mask_L, # [D, L] + "X_rep_atoms_I": example["ground_truth"]["coord_token_lvl"], # [D, I, 3] + "crd_mask_rep_atoms_I": example["ground_truth"]["mask_token_lvl"], # [D, I] + } + + def _assemble_metrics_extra_info(self, example: dict, network_output: dict) -> dict: + """Prepares the extra info for the metrics""" + # We need the same information as for the loss... + metrics_extra_info = self._assemble_loss_extra_info(example) + + # ... and possibly some additional metadata from the example dictionary + # TODO: Generalize, so we always use the `extra_info` key, rather than unpacking the ground truth as well + metrics_extra_info.update( + { + # TODO: Remove, instead using `extra_info` for all keys + **{ + k: example["ground_truth"][k] + for k in [ + "interfaces_to_score", + "pn_units_to_score", + "chain_iid_token_lvl", + ] + if k in example["ground_truth"] + }, + "example_id": example[ + "example_id" + ], # We require the example ID for logging + # (From the parser) + **example.get("extra_info", {}), + } + ) + + # (Create a shallow copy to avoid modifying the original dictionary) + return {**metrics_extra_info} + + def training_step( + self, + batch: Any, + batch_idx: int, + is_accumulating: bool, + ) -> None: + """Training step, running forward and backward passes. + + Args: + batch: The current batch; can be of any form. + batch_idx: The index of the current batch. + is_accumulating: Whether we are accumulating gradients (i.e., not yet calling optimizer.step()). + If this is the case, we should skip the synchronization during the backward pass. + + Returns: + None; we call `loss.backward()` directly, and store the outputs in `self._current_train_return`. + """ + model = self.state["model"] + assert model.training, "Model must be training!" + + # Recycling + # (Number of recycles for the current batch; shared across all GPUs within a distributed batch) + n_cycle = self.recycle_schedule[self.state["current_epoch"], batch_idx].item() + + with self.fabric.no_backward_sync(model, enabled=is_accumulating): + # (We assume batch size of 1 for structure predictions) + example = batch[0] if not isinstance(batch, dict) else batch + + network_input = self._assemble_network_inputs(example) + + # Forward pass (without rollout) + network_output = model.forward(input=network_input, n_cycle=n_cycle) + assert_no_nans( + network_output, + msg=f"network_output for example_id: {example['example_id']}", + ) + + loss_extra_info = self._assemble_loss_extra_info(example) + + total_loss, loss_dict_batched = self.loss( + network_input=network_input, + network_output=network_output, + # TODO: Rename `loss_input` to `extra_info` to pattern-match metrics + loss_input=loss_extra_info, + ) + + # Backward pass + self.fabric.backward(total_loss) + + # ... store the outputs without gradients for use in logging, callbacks, learning rate schedulers, etc. + self._current_train_return = apply_to_collection( + {"total_loss": total_loss, "loss_dict": loss_dict_batched}, + dtype=torch.Tensor, + function=lambda x: x.detach(), + ) + + def validation_step( + self, + batch: Any, + batch_idx: int, + compute_metrics: bool = True, + ) -> dict: + """Validation step, running forward pass and computing validation metrics. + + Args: + batch: The current batch; can be of any form. + batch_idx: The index of the current batch. + compute_metrics: Whether to compute metrics. If False, we will not compute metrics, and the output will be None. + Set to False during the inference pipeline, where we need the network output but cannot compute metrics (since we + do not have the ground truth). + + Returns: + dict: Output dictionary containing the validation metrics and network output. + """ + model = self.state["model"] + assert not model.training, "Model must be in evaluation mode during validation!" + + example = batch[0] if not isinstance(batch, dict) else batch + + network_input = self._assemble_network_inputs(example) + + assert_no_nans( + network_input, + msg=f"network_input for example_id: {example['example_id']}", + ) + + # ... forward pass (with rollout) + # (Note that forward() passes to the EMA/shadow model if the model is not training) + network_output = model.forward( + input=network_input, + n_cycle=example["feats"]["msa_stack"].shape[ + 0 + ], # Determine the number of recycles from the MSA stack shape + coord_atom_lvl_to_be_noised=example["coord_atom_lvl_to_be_noised"], + ) + + assert_no_nans( + network_output, + msg=f"network_output for example_id: {example['example_id']}", + ) + + metrics_output = {} + if compute_metrics: + assert self.metrics is not None, "Metrics are not defined!" + + metrics_extra_info = self._assemble_metrics_extra_info(example, network_output) + + # Symmetry resolution + # TODO: Refactor such that symmetry returns the ideal coordinate permutation, we apply permutation, and pass adjusted prediction to metrics + # (without needing to use `extra_info` as we are now) + # TODO: Update symmetry resolution to be functional (vs. using class variable), take explicit inputs (vs. all from netowork_ouput), and use extra_info for the keys it needs + metrics_extra_info = self.subunit_symm_resolve( + network_output, + metrics_extra_info, + example["symmetry_resolution"], + ) + + metrics_extra_info = self.residue_symm_resolve( + network_output, + metrics_extra_info, + example["automorphisms"], + ) + + metrics_output = self.metrics( + network_input=network_input, + network_output=network_output, + extra_info=metrics_extra_info, + # (Uses the permuted ground truth after symmetry resolution) + ground_truth_atom_array_stack=build_stack_from_atom_array_and_batched_coords( + metrics_extra_info["X_gt_L"], example.get("atom_array", None) + ), + predicted_atom_array_stack=build_stack_from_atom_array_and_batched_coords( + network_output["X_L"], example.get("atom_array", None) + ) + ) + + # Avoid gradients in stored values to prevent memory leaks + if metrics_output is not None: + metrics_output = apply_to_collection(metrics_output, torch.Tensor, lambda x: x.detach()) + + if network_output is not None: + network_output = apply_to_collection(network_output, torch.Tensor, lambda x: x.detach()) + + return {"metrics_output": metrics_output, "network_output": network_output} + + +class AF3TrainerWithConfidence(AF3Trainer): + """AF-3 trainer with rollout and confidence prediction""" + + def construct_model(self): + super().construct_model() + + # Freeze gradients for all modules except the confidence head + for name, param in self.state["model"].named_parameters(): + if "model.confidence_head" not in name: + param.requires_grad = False + + def training_step( + self, + batch: Any, + batch_idx: int, + is_accumulating: bool, + ) -> None: + """Perform mini-rollout and assess confidence losses on forward pass""" + model = self.state["model"] + assert model.training, "Model must be training!" + + # Recycling + # (Number of recycles for the current batch; shared across all GPUs within a distributed batch) + n_cycle = self.recycle_schedule[self.state["current_epoch"], batch_idx].item() + + with self.fabric.no_backward_sync(model, enabled=is_accumulating): + # (We assume batch size of 1 for structure predictions) + example = batch[0] if not isinstance(batch, dict) else batch + + # Build the base network inputs and add confidence-specific inputs + network_input = self._assemble_network_inputs(example) + network_input.update( + { + "seq": example["confidence_feats"]["rf2aa_seq"], + "rep_atom_idxs": example["ground_truth"]["rep_atom_idxs"], + "frame_atom_idxs": example["confidence_feats"][ + "pae_frame_idx_token_lvl_from_atom_lvl" + ], + } + ) + + # Forward pass + network_output = model.forward( + input=network_input, + n_cycle=n_cycle, + coord_atom_lvl_to_be_noised=example["coord_atom_lvl_to_be_noised"], + ) + + assert_no_nans( + network_output, + msg=f"network_output for example_id: {example['example_id']}", + ) + + loss_extra_info = self._assemble_loss_extra_info(example) + loss_extra_info.update( + { + "seq": example["confidence_feats"]["rf2aa_seq"], + "atom_frames": example["confidence_feats"]["atom_frames"], + "tok_idx": example["feats"]["atom_to_token_map"], + "is_real_atom": example["confidence_feats"]["is_real_atom"], + "rep_atom_idxs": example["ground_truth"]["rep_atom_idxs"], + "frame_atom_idxs": example["confidence_feats"][ + "pae_frame_idx_token_lvl_from_atom_lvl" + ], + } + ) + + # Remap X_L to the rollout X_L so grounud truth matches rollout batch dimension during the symmetry resolution + # NOTE: Since `X_L` derives from the rollout, we cannot compute standard training loss and perform gradient updates + network_output["X_L"] = network_output["X_pred_rollout_L"] + + loss_extra_info = self.subunit_symm_resolve( + network_output, loss_extra_info, example["symmetry_resolution"] + ) + loss_extra_info = self.residue_symm_resolve( + network_output, loss_extra_info, example["automorphisms"] + ) + + # We only assess the confidence loss + total_loss, loss_dict_batched = self.loss( + network_input=network_input, + network_output=network_output, + # TODO: Rename `loss_input` to `extra_info` to pattern-match metrics + loss_input=loss_extra_info, + ) + + # Backward pass + self.fabric.backward(total_loss) + + # ... store the outputs without gradients for use in logging, callbacks, learning rate schedulers, etc. + self._current_train_return = apply_to_collection( + {"total_loss": total_loss, "loss_dict": loss_dict_batched}, + dtype=torch.Tensor, + function=lambda x: x.detach(), + ) + + def validation_step( + self, + batch: Any, + batch_idx: int, + compute_metrics: bool = True, + ) -> dict: + """Validation step, running forward pass and computing validation metrics. + + Args: + batch: The current batch; can be of any form. + batch_idx: The index of the current batch. + compute_metrics: Whether to compute metrics. If False, we will not compute metrics, and the output will be None. + Set to False during the inference pipeline, where we need the network output but cannot compute metrics (since we + do not have the ground truth). + + Returns: + dict: Output dictionary containing the validation metrics. + """ + model = self.state["model"] + assert not model.training, "Model must be in evaluation mode during validation!" + + example = batch[0] if not isinstance(batch, dict) else batch + + # Build the base network inputs and add confidence-specific inputs + network_input = self._assemble_network_inputs(example) + network_input.update( + { + "seq": example["confidence_feats"]["rf2aa_seq"], + "rep_atom_idxs": example["ground_truth"]["rep_atom_idxs"], + "frame_atom_idxs": example["confidence_feats"][ + "pae_frame_idx_token_lvl_from_atom_lvl" + ], + } + ) + + # ... forward pass (with FULL rollout) + # (Note that forward() passes to the EMA/shadow model if the model is not training) + network_output = model.forward( + input=network_input, + n_cycle=example["feats"]["msa_stack"].shape[ + 0 + ], # Determine the number of recycles from the MSA stack shape + coord_atom_lvl_to_be_noised=example["coord_atom_lvl_to_be_noised"], + ) + + # Remap X_L to the rollout X_L + network_output["X_L"] = network_output["X_pred_rollout_L"] + + assert_no_nans( + network_output, + msg=f"network_output for example_id: {example['example_id']}", + ) + + metrics_output = {} + if compute_metrics: + assert self.metrics is not None, "Metrics are not defined!" + + # Assemble the base metrics extra info and add confidence-specific inputs + metrics_extra_info = self._assemble_metrics_extra_info(example) + # TODO: Refactor; we should not need pass confidence log config through metrics extra info, it should be a property of the Metric (e.g., passed at `_init_` using Hydra interpolation from the relevant loss config) + metrics_extra_info.update( + { + "is_real_atom": example["confidence_feats"]["is_real_atom"], + "is_ligand": example["feats"]["is_ligand"], + # TODO: Refactor so that we pass the relevant values from the config direclty to the Metric upon instantiation (reference in Hydra through interpolation) + "confidence_loss": self.state[ + "train_cfg" + ].trainer.loss.confidence_loss, + } + ) + + # Symmetry resolution + # TODO: Refactor such that symmetry returns the ideal coordinate permutation, we apply permutation, and pass adjusted prediction to metrics + # (without needing to use `extra_info` as we are now) + metrics_extra_info = self.subunit_symm_resolve( + network_output, + metrics_extra_info, + example["symmetry_resolution"], + ) + + metrics_extra_info = self.residue_symm_resolve( + network_output, + metrics_extra_info, + example["automorphisms"], + ) + + # Store in `metrics_extra_info` details about which structures have the lowest confidence + network_output["confidence"] = ( + compute_batch_indices_with_lowest_predicted_error( + plddt=network_output["plddt"], + is_real_atom=metrics_extra_info["is_real_atom"], + pae=network_output["pae"], + confidence_loss_cfg=self.state[ + "train_cfg" + ].trainer.loss.confidence_loss, + chain_iid_token_lvl=metrics_extra_info["chain_iid_token_lvl"], + is_ligand=metrics_extra_info["is_ligand"], + interfaces_to_score=metrics_extra_info.get( + "interfaces_to_score", [] + ), + pn_units_to_score=metrics_extra_info.get("pn_units_to_score", []), + ) + ) + + metrics_output = self.metrics( + network_input=network_input, + network_output=network_output, + extra_info=metrics_extra_info, + ) + + if "X_gt_index_to_X" in metrics_extra_info: + # Remap outputs to minimize error with ground truth + # TODO: Remap before computing metrics, so that we can avoid pass `extra_info` to metrics (we instead just pass the remapped prediction) + mapping = metrics_extra_info["X_gt_index_to_X"] # [D, L] + network_output["X_L"] = _remap_outputs(network_output["X_L"], mapping) + + # Avoid gradients in stored values to prevent memory leaks + metrics_output = ( + apply_to_collection(metrics_output, torch.Tensor, lambda x: x.detach()) + if metrics_output is not None + else None + ) + + network_output = ( + apply_to_collection(network_output, torch.Tensor, lambda x: x.detach()) + if network_output is not None + else None + ) + + return {"metrics_output": metrics_output, "network_output": network_output} diff --git a/src/modelhub/trainers/fabric.py b/src/modelhub/trainers/fabric.py new file mode 100755 index 0000000..ef0c38e --- /dev/null +++ b/src/modelhub/trainers/fabric.py @@ -0,0 +1,858 @@ +"""Generic training harness built atop PyTorch Lightning Fabric. + +In addition to standard harness features (gradient accumulation, mixed precision, etc.), includes native support for EMA. + +References: + - Pytorch Lightning Trainer Example (https://github.com/Lightning-AI/pytorch-lightning/blob/master/examples/fabric/build_your_own_trainer/trainer.py) + - Lightning Hydra Template (https://github.com/ashleve/lightning-hydra-template) +""" + +import math +from beartype.typing import Mapping +from datetime import timedelta +from pathlib import Path +from typing import cast +from beartype.typing import Any, Literal + +import lightning as L +import torch +from lightning.fabric.accelerators import Accelerator +from lightning.fabric.loggers import Logger +from lightning.fabric.strategies import DDPStrategy, Strategy +from lightning.fabric.wrappers import ( + _FabricDataLoader, + _FabricModule, + _FabricOptimizer, +) + +from modelhub.training.EMA import EMA +from modelhub.callbacks.base import BaseCallback +from modelhub.utils.ddp import RankedLogger +from abc import ABC, abstractmethod +import hydra +from modelhub.training.schedulers import SchedulerConfig +from datahub.samplers import set_sampler_epoch + + +ranked_logger = RankedLogger(__name__, rank_zero_only=True) + + +class FabricTrainer(ABC): + def __init__( + self, + *, + accelerator: str | Accelerator = "auto", + strategy: str | Strategy = "ddp", + devices_per_node: list[int] | int | str = "auto", + num_nodes: int = 1, + precision: str | int = "32-true", + callbacks: BaseCallback | list[BaseCallback] | None = None, + loggers: Logger | list[Logger] | None = None, + max_epochs: int = 1000, + grad_accum_steps: int = 1, + validate_every_n_epochs: int = 1, + n_examples_per_epoch: int = 24_000, + output_dir: Path | str | None = None, + checkpoint_every_n_epochs: int = 1, + clip_grad_max_norm: float | None = None, + limit_train_batches: int | float = float("inf"), + limit_val_batches: int | float = float("inf"), + prevalidate: bool = False, + nccl_timeout: int = 3200, + ) -> None: + """Base Trainer class built around Lightning Fabric. + + Args: + accelerator: The hardware to run on. See (1) for details. Possible choices are: + ``"cpu"``, ``"cuda"``, ``"mps"``, ``"gpu"``, ``"tpu"``, ``"auto"``. + strategy: Strategy for how to run across multiple devices. See (1) for details. Possible choices are: + ``"dp"``, ``"ddp"``, ``"ddp_spawn"``, ``"deepspeed"``, ``"fsdp"``. + devices_per_node: Number of devices to train on per machine (``int``), which GPUs to train on (``list`` or ``str``), or ``"auto"``. + See (1) for details. + EXAMPLE: If you run on 2 nodes with 8 GPUs each, you would set ``devices_per_node=8``, not ``16``. + num_nodes: Number of machines (nodes) for distributed training (default: 1). See (1) for details. + precision: Double precision (``"64"``), full precision (``"32"``), half precision AMP (``"16-mixed"``), + or bfloat16 precision AMP (``"bf16-mixed"``). See (2) for details. + callbacks: A single callback or a list of callbacks, each inheriting the BaseCallback Abstract Base Class. + loggers: A single logger or a list of loggers. See (3) for details. + max_epochs: Maximum number of epochs to train for (default: 1000). + grad_accum_steps: Number of batches to process before calling optimizer.step() (default: 1). See (4) for details on gradient accumulation in Fabric. + validate_every_n_epochs: Number of epochs between validation runs (default: 1). + n_examples_per_epoch: Number of examples to sample per epoch, across all GPUs. E.g., number of distinct examples that will + be "seen" by the model in a single epoch. If smaller than the the number implied by the dataloader, we will + alert a warning and use the smaller number. + output_dir: Directory to save checkpoints, metrics, intermediate validation strructures, etc. (default: None). + checkpoint_every_n_epochs: Number of epochs between saving checkpoints (default: 1). + clip_grad_max_norm: Maximum gradient norm to clip to (default: None). If None, no gradient clipping is performed. + limit_train_batches: Limit on the number of training batches per epoch (default: float("inf")). + Helpful for debugging; should NOT be used when training production models. + limit_val_batches: Limit on the number of validation batches per epoch (default: float("inf")). + Helpful for debugging; should NOT be used when training production models. + prevalidate: Whether to run validation before training starts (default: False). + nccl_timeout: Timeout for NCCL operations (default: 3200). Only used with DDP strategy. + + References: + (1) Fabric Arguments (https://lightning.ai/docs/fabric/stable/api/fabric_args.html) + (2) Fabric Precision Documentation (https://lightning.ai/docs/fabric/stable/fundamentals/precision.html) + (3) Fabric Loggers (https://lightning.ai/docs/fabric/2.4.0/api/loggers.html) + (4) Efficient Gradient Accumulation (https://lightning.ai/docs/fabric/2.4.0/advanced/gradient_accumulation.html) + """ + # DDP strategy requires a manual timeout higher than the default + if strategy == "ddp": + strategy = DDPStrategy(timeout=timedelta(seconds=nccl_timeout)) + + # See (1) for initialization arguments for Fabric() + self.fabric = L.Fabric( + accelerator=accelerator, + strategy=strategy, + devices=devices_per_node, + num_nodes=num_nodes, + precision=precision, + callbacks=callbacks, + loggers=loggers, + ) + + # Training + self.clip_grad_max_norm = clip_grad_max_norm + self.grad_accum_steps = grad_accum_steps + + # Stopping + self.max_epochs = max_epochs + self.should_stop = False + self.n_examples_per_epoch = n_examples_per_epoch + self.limit_train_batches = limit_train_batches + self.limit_val_batches = limit_val_batches + + # Validation + self.validate_every_n_epochs = validate_every_n_epochs + self.prevalidate = prevalidate + + # Checkpoints + self.output_dir = Path(output_dir) if output_dir else None + self.checkpoint_every_n_epochs = checkpoint_every_n_epochs + + def initialize_or_update_trainer_state( + self, + updates: dict, + ): + """Initialize or update the state dictionary for the trainer. + + State keys: + model: The model to train. + optimizer: The optimizer to use with the model. May be None for validation/inference. + scheduler_cfg: Learning rate SchedulerConfig (e.g., a LRScheduler with intervals/frequency). May be None for validation/inference or if no scheduler is used. + global_step: Global optimizer step; used by W&B logger, learning rate schedulers, etc. Default is 0. + current_epoch: Global epoch counter; used for validation, learning rate schedulers, checkpointing, etc. Default is 0. + train_cfg: The training configuration dictionary. Used for reinitializing the trainer with the same configuration + (for training or for inference). Default is an empty dictionary. + """ + # Default values for the state + default_state = { + "model": None, + "optimizer": None, + "scheduler_cfg": None, + "global_step": 0, + "current_epoch": 0, + "train_cfg": {}, + } + + # Initialize self.state with default values if it doesn't exist + if not hasattr(self, "state"): + self.state = default_state.copy() + else: + # Ensure existing state has all default keys + for key, value in default_state.items(): + self.state.setdefault(key, value) + + # Merge the updates into the existing state + self.state.update(updates) + + def construct_optimizer(self) -> None: + """Instantiate the optimizer(s) + + We provide a default implementation that instantiates the optimizer(s) from the Hydra configuration. + More complex models (e.g., GANs) may require custom implementations. + """ + assert ( + "model" in self.state and hasattr(self.state["model"], "parameters") + ), "Model not found in state dictionary! You must call `construct_model()` before constructing the optimizer." + + if self.state["train_cfg"].model.optimizer: + # ... instantiate the optimizer + optimizer = hydra.utils.instantiate( + self.state["train_cfg"].model.optimizer, + params=self.state["model"].parameters(), + ) + self.initialize_or_update_trainer_state({"optimizer": optimizer}) + + def construct_scheduler(self) -> None: + """Instantiate the learning rate scheduler(s) + + Like optimizers, we provided a default implementation that instantiates the scheduler(s) from the Hydra configuration. + More complex models (e.g., GANs) may require custom implementations. + """ + assert ( + "optimizer" in self.state and self.state["optimizer"] + ), "Optimizer not found in state dictionary! You must call `construct_optimizer()` before constructing the scheduler." + + # ... instantiate the LR scheduler(s) + lr_scheduler = ( + hydra.utils.instantiate( + self.state["train_cfg"].model.lr_scheduler, + optimizer=self.state["optimizer"], + ) + if self.state["train_cfg"].model.lr_scheduler + else None + ) + + if lr_scheduler: + # We assume "interval = step" and "frequency = 1" for the default scheduler; custom implementations may override this method + scheduler_cfg = SchedulerConfig( + scheduler=lr_scheduler, + interval="step", + frequency=1, + ) + self.initialize_or_update_trainer_state({"scheduler_cfg": scheduler_cfg}) + + @abstractmethod + def construct_model(self): + """Instantiate the model, updating the trainer state in-place. + + This method must set the "model" key in the state dictionary using `self.initialize_or_update_trainer_state()`. + For an example, see the `construct_model` method in the `AF3Trainer` + """ + raise NotImplementedError + + def setup_model_optimizers_and_schedulers(self) -> None: + """Setup the model, optimizer(s), and scheduler(s) with Fabric. + + Note that we must call this method after constructing (instantiating) the model, optimizer(s), and scheduler(s). + For details on multi-model and multi-optimizer setups, see: https://lightning.ai/docs/fabric/2.2.3/advanced/multiple_setup.html + """ + assert self.state[ + "model" + ], "You must construct the model before setting up the model, optimizer, and scheduler." + model = self.state["model"] + optimizer = self.state["optimizer"] + + # ... setup the model and optimizer + if optimizer: + model, optimizer = self.fabric.setup(model, optimizer) + else: + model = self.fabric.setup(model) + + # ... update the state dictionary (we avoid updating the state dictionary in-place, which is an anti-pattern) + self.initialize_or_update_trainer_state( + { + "model": model, + "optimizer": optimizer, + } + ) + + def fit( + self, + train_loader: torch.utils.data.DataLoader, + val_loaders: dict[str, torch.utils.data.DataLoader] | None = None, + ckpt_path: Path | str | None = None, + ) -> None: + """Main entry point for training a model. + + Args: + train_loader: Dataloader for training. Must have an iterable returning batches. + val_loaders: Dictionary of dataloaders for validation. The keys are the names of the loaders, and the values are the loaders themselves. + ckpt_path: Path to either: + (a) A previous checkpoint directory from which to resume training from. In this case, we will automatically load + the latest checkpoint using `self.get_latest_checkpoint()`. + (b) A specific checkpoint file to load. In this case, we will load the checkpoint from the specified file. + If None, no checkpoint is loaded, and the model will be trained from scratch. + """ + assert ( + hasattr(self, "state") and "model" in self.state + ), "Model not found in state dictionary! You must call `instantiate_model()` before running fit()." + + # (If we don't have enough examples to sample, we will log a warning and use the smaller number) + if len(train_loader) * self.fabric.world_size < self.n_examples_per_epoch: + ranked_logger.warning( + f"Number of examples per epoch ({self.n_examples_per_epoch}) exceeds the number of examples in the loader: " + f"({len(train_loader) * self.fabric.world_size}). Using the latter." + ) + self.n_examples_per_epoch = len(train_loader) * self.fabric.world_size + self.n_batches_per_epoch = math.ceil( + self.n_examples_per_epoch / self.fabric.world_size + ) + + # ... setup training and validation dataloaders with Fabric + train_loader = self.fabric.setup_dataloaders( + # Our sampler is already distributed, so we don't need to wrap with a DistributedSampler + train_loader, + use_distributed_sampler=False, + ) + + if val_loaders is not None: + for key, loader in val_loaders.items(): + val_loaders[key] = self.fabric.setup_dataloaders( + loader, use_distributed_sampler=False + ) + + self.setup_model_optimizers_and_schedulers() + + if ckpt_path is not None: + ckpt_path = Path(ckpt_path) + if ckpt_path.is_dir(): + # If given a directory, load the latest checkpoint from the directory + ranked_logger.info( + f"Loading latest checkpoint within the directory {ckpt_path}..." + ) + self.load_checkpoint(self.get_latest_checkpoint(ckpt_path)) + else: + # If given a specific checkpoint file, load that checkpoint + self.load_checkpoint(ckpt_path) + + # Increment the global epoch (e.g., if we loaded a checkpoint from [the end of] epoch 5, we should start training at epoch 6) + self.state["current_epoch"] += 1 + # Stopping conditions + if (self.max_epochs is not None and self.state["current_epoch"] >= self.max_epochs): + self.should_stop = True + else: + ranked_logger.info("No checkpoint provided; training from scratch.") + + self.fabric.call("on_fit_start", trainer=self, model=self.state["model"]) + + # Prevalidate + if self.prevalidate and val_loaders: + # Temporarily decrement the current epoch, since we haven't done any training this epoch + self.state["current_epoch"] -= 1 # (Will be -1 if training from scratch) + ranked_logger.info( + f"Prevalidating with epoch {self.state['current_epoch']} before training; to avoid this behavior, set `prevalidate=False` in the Trainer config." + ) + self.validation_loop( + val_loaders=val_loaders, + limit_batches=self.limit_val_batches, + ) + self.state["current_epoch"] += 1 # (Restore the current epoch) + + while not self.should_stop: + # ... train for one epoch + ranked_logger.info( + f"\n+ Starting epoch {self.state['current_epoch']}/{self.max_epochs - 1}\n" + f"+ Total examples per epoch (across all GPU): {self.n_examples_per_epoch}\n" + f"+ Examples per GPU (batches per epoch): {self.n_batches_per_epoch}\n" + f"+ Gradient accumulation steps: {self.grad_accum_steps}\n" + f"+ Expected optimizer steps per epoch: {self.n_batches_per_epoch // self.grad_accum_steps}\n" + ) + + self.train_loop( + train_loader=train_loader, + limit_batches=self.limit_train_batches, + ) + + ranked_logger.info(f"Finished epoch {self.state['current_epoch']}!") + + # ... validate, if we're at the validation interval + if self.should_validate and val_loaders: + ranked_logger.info( + f"Starting validation for epoch {self.state['current_epoch']}!" + ) + self.validation_loop( + val_loaders=val_loaders, + limit_batches=self.limit_val_batches, + ) + + # ... step the scheduler, if we're adjusting the learning rate at the epoch-level + self.step_scheduler( + level="epoch", current_value=self.state["current_epoch"] + ) + + # ... save checkpoint, if we've reached the checkpoint interval + if self.state["current_epoch"] % self.checkpoint_every_n_epochs == 0: + self.save_checkpoint() + + # ... increment the epoch + self.state["current_epoch"] += 1 + + # Stopping conditions + if ( + self.max_epochs is not None + and self.state["current_epoch"] >= self.max_epochs + ): + self.should_stop = True + + # Reset for next `fit()` call + self.should_stop = False + + self.fabric.call("on_fit_end", trainer=self) + + def train_loop( + self, + *, + train_loader: _FabricDataLoader, + limit_batches: int | float = float("inf"), + ): + """Train model for a single epoch. + + Args: + train_loader: Dataloader for training. + limit_batches: Limit on the batches during this training epoch. If greater than the number of batches in the + `train_loader`, this argument has no effect. Helpful for debugging; should NOT be used when training production models. + """ + self.fabric.call("on_train_epoch_start", trainer=self) + + assert self.state["model"].training + + # Set sampler epochs + set_sampler_epoch(train_loader.sampler, self.state["current_epoch"]) + + for batch_idx, batch in enumerate(train_loader): + # (End epoch if stopping training completely or maximum desired batches for this epoch reached) + if self.should_stop or batch_idx >= limit_batches: + break + + self.fabric.call( + "on_train_batch_start", batch=batch, batch_idx=batch_idx, trainer=self + ) + + # Optimizer should step if we've accumulated the desired number of gradients + should_optimizer_step = (batch_idx + 1) % self.grad_accum_steps == 0 + + self.training_step( + batch=batch, + batch_idx=batch_idx, + is_accumulating=not should_optimizer_step, + ) + + if should_optimizer_step: + self.fabric.call( + "on_before_optimizer_step", + optimizer=self.state["optimizer"], + trainer=self, + ) + + # ... step the optimizer, clipping gradients and updating EMA parameters if applicable + self.step_optimizer() + + self.fabric.call( + "optimizer_step", optimizer=self.state["optimizer"], trainer=self + ) + + self.fabric.call( + "on_train_batch_end", + outputs=self._current_train_return, + batch=batch, + batch_idx=batch_idx, + trainer=self, + ) + + if should_optimizer_step: + # ... step the scheduler, if we're adjusting the learning rate at the optimizer step-level + self.step_scheduler( + level="step", current_value=self.state["global_step"] + ) + + # ... increment the global step, if optimizer stepped + # NOTE: Each node maintains its own global step + self.state["global_step"] += int(should_optimizer_step) + + self.fabric.call("on_train_epoch_end", trainer=self) + + def validation_loop( + self, + *, + val_loaders: dict[str, _FabricDataLoader], + limit_batches: int | float = float("inf"), + ): + """Run validation loop for a single validation epoch. + + Args: + val_loader: Dictionary of Dataloaders (more precisely, _FabricDataLoader) for validation. + limit_batches: Limit on the batches during this validation epoch. If greater than the number of batches in the + `val_loader`, this argument has no effect. Helpful for debugging; should NOT be used for production. + """ + # ... set model to evaluation mode + self.state["model"].eval() + + with torch.no_grad(): + # ... assert we're in evaluation mode + assert not self.state["model"].training + + self.fabric.call("on_validation_epoch_start", trainer=self) + + # ... iterate over all validation loaders + for val_loader_name, val_loader in val_loaders.items(): + ranked_logger.info( + f"Running validation on dataset: {val_loader_name}, with {len(val_loader)} batches, with world_size={self.fabric.world_size}." + ) + + for batch_idx, batch in enumerate(val_loader): + # ... end validation epoch if stopping training completely or maximum desired batches for this epoch reached + if self.should_stop or batch_idx >= limit_batches: + break + + self.fabric.call( + "on_validation_batch_start", + batch=batch, + batch_idx=batch_idx, + num_batches=len(val_loader), + trainer=self, + dataset_name=val_loader_name, + ) + + validation_result = self.validation_step( + batch=batch, + batch_idx=batch_idx, + ) + + self.fabric.call( + "on_validation_batch_end", + outputs=validation_result, + batch=batch, + batch_idx=batch_idx, + num_batches=len(val_loader), + trainer=self, + dataset_name=val_loader_name, + ) + + self.fabric.call("on_validation_epoch_end", trainer=self) + + # ... reset the model to training mode + self.state["model"].train() + + @abstractmethod + def training_step( + self, + batch: Any, + batch_idx: int, + is_accumulating: bool, + ) -> None: + """Training step, running forward and backward passes. + + Args: + batch: The current batch; can be of any form. + batch_idx: The index of the current batch. + is_accumulating: Whether we are accumulating gradients (i.e., not yet calling optimizer.step()). + If this is the case, we should skip the synchronization during the backward pass. + + Returns: + torch.Tensor | Mapping[str, Any]: The loss tensor or a dictionary containing the loss tensor. + """ + pass + + @abstractmethod + def validation_step( + self, + batch: Any, + batch_idx: int, + val_loader_name: str | None = None, + ) -> dict: + """Validation step, running forward pass. + + Args: + batch: The current batch; can be of any form. + batch_idx: The index of the current batch (within that validation loader). + val_loader_name: The name of the validation loader, if applicable. + + Returns: + dict: A dictionary containing the output of the designated validation metrics. + """ + pass + + def validate( + self, + val_loaders: dict, + ckpt_path: Path | str, + ) -> None: + """Validate a model using the given dataloaders and checkpoint. + + Args: + model: The PyTorch model to validate. + val_loaders: A dictionary of dataloaders for validation, where keys are names and values are dataloaders. + ckpt_path: Path to a specific checkpoint file to load. If None, the model will be validated as is. + """ + assert ( + hasattr(self, "state") and "model" in self.state + ), "Model not found in state dictionary! You must call `instantiate_model()` before running validate()." + + self.setup_model_optimizers_and_schedulers() + + self.load_checkpoint(ckpt_path) + + # Setup validation dataloaders with Fabric + for key, loader in val_loaders.items(): + val_loaders[key] = self.fabric.setup_dataloaders( + loader, use_distributed_sampler=False + ) + + # Run the validation loop + self.validation_loop( + val_loaders=val_loaders, limit_batches=self.limit_val_batches + ) + + def step_optimizer(self): + """Step the optimizer. + + This method must be called only when the optimizer is stepped (i.e., after accumulating the desired number of gradients). + + We then perform following steps: + 1. Clip gradients, if applicable. + 2. Step the optimizer. + 3. Zero the gradients. + 4. Update the EMA parameters, if applicable. + """ + assert "optimizer" in self.state and isinstance( + self.state["optimizer"], _FabricOptimizer + ) + assert "model" in self.state and isinstance( + self.state["model"], _FabricModule | EMA + ) + + optimizer = self.state["optimizer"] + model = self.state["model"] + + # ... clip gradients, if applicable + if self.clip_grad_max_norm is not None: + self.fabric.clip_gradients( + module=model, + optimizer=optimizer, + max_norm=self.clip_grad_max_norm, + ) + + # ... step the optimizer + optimizer.step() + + # ... zero gradients + optimizer.zero_grad() + + # ... update EMA parameters, if applicable + if hasattr(model, "update"): + model.update() + + def step_scheduler( + self, + level: Literal["epoch", "step"], + current_value: int, + ): + """Step the learning rate scheduler. + + Args: + level: The level at which to step the scheduler. Either "epoch" or "step". + current_value: The current epoch or step value. + """ + # (No scheduler) + if "scheduler_cfg" not in self.state or self.state["scheduler_cfg"] is None: + return + else: + scheduler_cfg = self.state["scheduler_cfg"] + + # (Wrong interval; e.g., we adjust learning rate every epoch, but we are stepping at the step level) + if scheduler_cfg.interval != level: + return + + # (Right interval, but wrong frequency) + if current_value % cast(int, scheduler_cfg.frequency) != 0: + return + + # ... step the scheduler + scheduler_cfg.scheduler.step() + + def save_checkpoint(self) -> None: + """Saves a checkpoint with current state to `self.output_dir/ckpt`. + + If no output directory is specified, then no checkpoint is saved. + """ + # No checkpoint directory; skip saving + if not self.output_dir: + ranked_logger.warning( + "No output directory specified; skipping model checkpointing of state dictionary." + ) + return + + # (Provide a hook to modify the state before saving) + self.fabric.call("on_save_checkpoint", state=self.state, trainer=self) + + # ... construct the checkpoint file path using Path + checkpoint_file = ( + self.output_dir / "ckpt" / f"epoch-{self.state['current_epoch']:04d}.ckpt" + ) + + # NOTE: Fabric's `save()` will call the `state_dict()` method on the model, optimizer, and scheduler_cfg + self.fabric.save(checkpoint_file, self.state) + ranked_logger.info(f"Saved checkpoint to: {checkpoint_file}") + + def _load_optimizer(self, ckpt: Mapping) -> None: + """Loads the optimizer state from the checkpoint.""" + if "optimizer" in ckpt and self.state["optimizer"]: + self.state["optimizer"].load_state_dict(ckpt["optimizer"]) + else: + ranked_logger.warning("Skipping optimizer loading...") + + def _load_scheduler(self, ckpt: Mapping) -> None: + """Loads the learning rate scheduler state from the checkpoint.""" + if "scheduler_cfg" in ckpt and self.state["scheduler_cfg"]: + self.state["scheduler_cfg"].load_state_dict(ckpt["scheduler_cfg"]) + else: + ranked_logger.warning("Skipping scheduler loading...") + + def _load_model(self, ckpt: Mapping) -> None: + """Loads the model state from the checkpoint, handling EMA and size mismatches.""" + + def _subset_state_dict_to_valid_params( + current_dict: Mapping, ckpt_dict: Mapping, log_prefix: str = "" + ) -> dict: + """Subset checkpoint to parameters with matching sizes, warn on mismatches.""" + valid_state_dict = {} + for key, ckpt_tensor in ckpt_dict.items(): + if key not in current_dict: + continue # Let strict=False handle missing keys + + if ckpt_tensor.size() != current_dict[key].size(): + ranked_logger.warning( + f"{log_prefix}Size mismatch for '{key}': " + f"model size {tuple(current_dict[key].size())} vs " + f"checkpoint size {tuple(ckpt_tensor.size())}. " + "Skipping this parameter." + ) + else: + valid_state_dict[key] = ckpt_tensor + + return valid_state_dict + + # ... load the model, subsetting to parameters with matching sizes + model = self.state["model"] + model.load_state_dict( + _subset_state_dict_to_valid_params(model.state_dict(), ckpt["model"]), + strict=False, + ) + + def load_checkpoint(self, ckpt_path: Path | str) -> None: + """Loads a checkpoint from the specified path.""" + # ... load the checkpoint (replaces the state dictionary in-place) + ranked_logger.info(f"Loading checkpoint from: {ckpt_path}...") + ckpt = self.fabric.load(ckpt_path) + + try: + # ... optimize, scheduler, model + self._load_optimizer(ckpt) + self._load_scheduler(ckpt) + self._load_model(ckpt) + + # ... stateless keys + # (We do not want to load the `train_cfg` in this instance, as it may contain different configurations) + keys_to_ignore = {"model", "optimizer", "scheduler_cfg", "train_cfg"} + self.state.update( + { + key: value + for key, value in ckpt.items() + if key not in keys_to_ignore and key in self.state + } + ) + + # Log warnings for missing and extra keys + state_keys = set(self.state) - keys_to_ignore + ckpt_keys = set(ckpt) - keys_to_ignore + + if missing := state_keys - ckpt_keys: + ranked_logger.warning( + f"Keys found in STATE but not CKPT: {sorted(missing)}" + ) + if extra := ckpt_keys - state_keys: + ranked_logger.warning( + f"Keys found in CKPT but not STATE: {sorted(extra)}" + ) + + ranked_logger.info( + f"Loaded checkpoint. Current epoch: {self.state['current_epoch']}, global step: {self.state['global_step']}" + ) + except Exception as e: + ranked_logger.error( + f"Error loading checkpoint: {e}. Trying to load with legacy settings..." + ) + self.load_legacy_checkpoint(ckpt) + + def load_legacy_checkpoint(self, ckpt: dict) -> dict: + # TODO: Remove when no longer needed + """Backwards-compatibility function to checkpoints with legacy state formats""" + new_model_state = {} + prefixes = {key.split(".")[0] for key in ckpt["final_state_dict"].keys()} + + if "model" not in prefixes: + # (Model-only checkpoints from training, without confidence head) + model_state_dict = { + f"model.{k}": v for k, v in ckpt["final_state_dict"].items() + } + shadow_state_dict = { + f"shadow.{k}": v for k, v in ckpt["model_state_dict"].items() + } + full_state_dict = {**model_state_dict, **shadow_state_dict} + + elif "confidence" in prefixes: + # (Checkpoints with confidence head) + ranked_logger.info("Detected confidence module in checkpoint...") + + # ... replace confidence head keys with model and shadow prefixes + model_state_dict = { + f"model.confidence_head{key[len('confidence'):]}" + if key.startswith("confidence") + else key: value + for key, value in ckpt["final_state_dict"].items() + } + + shadow_state_dict = { + ( + f"shadow.confidence_head{key[len('confidence'):]}" + if key.startswith("confidence") + else f"shadow{key[len('model'):]}" + if key.startswith("model") + else key + ): value + for key, value in ckpt["model_state_dict"].items() + } + full_state_dict = {**model_state_dict, **shadow_state_dict} + else: + raise ValueError("Unknown checkpoint format") + + # ... check shapes (we only load matching shapes to support fine-tuning or adding channels) + state_dict = self.state["model"].state_dict() + for param in state_dict: + if param not in full_state_dict: + ranked_logger.error(f"missing: {param}") + elif full_state_dict[param].shape == state_dict[param].shape: + new_model_state[param] = full_state_dict[param] + else: + ranked_logger.error( + f"wrong size: {param} {full_state_dict[param].shape} {state_dict[param].shape}" + ) + + # ... update the state + self.state["model"].load_state_dict(new_model_state, strict=False) + self.state["current_epoch"] = ckpt["epoch"] + + ranked_logger.info( + f"Loaded internal AF3 clone checkpoint into model. Current epoch: {self.state['current_epoch']}, global step: {self.state['global_step']}" + ) + + @staticmethod + def get_latest_checkpoint(ckpt_load_dir: Path) -> Path: + """Returns the latest checkpoint file from the given directory. + + Assumes that checkpoints are stored with filenames such that a standard string-based + sort will correctly order them by creation time (e.g., with epoch numbers, or timestamps). + + Args: + ckpt_load_dir (Path): The directory to search for checkpoint files. + + Returns: + Path: The path to the latest checkpoint file, or None if no checkpoints are found + or if the directory does not exist. + """ + if not ckpt_load_dir.is_dir(): + return None + + # List all files in the directory and sort them + items = sorted(ckpt_load_dir.iterdir()) + + # Return the last item in the sorted list, if any + return items[-1] if items else None + + @property + def should_validate(self) -> bool: + """Whether to currently run validation.""" + return self.state["current_epoch"] % self.validate_every_n_epochs == 0 diff --git a/rf2aa/training/EMA.py b/src/modelhub/training/EMA.py similarity index 54% rename from rf2aa/training/EMA.py rename to src/modelhub/training/EMA.py index 967c733..496fe64 100644 --- a/rf2aa/training/EMA.py +++ b/src/modelhub/training/EMA.py @@ -7,62 +7,69 @@ import torch.nn as nn class EMA(nn.Module): - def __init__(self, model, decay): + # TODO: Rename shadow to `ema_model` to better match convention + def __init__(self, model: nn.Module, decay: float): + """Initialize the Exponential Moving Average (EMA) module. + + EMA maintains a shadow model that slowly tracks the weight of the original model. + + Args: + model: The original model. + decay: The decay rate of the EMA. The shadow model will be updated with the formula: + shadow_variable -= (1 - decay) * (shadow_variable - variable) + """ super().__init__() self.decay = decay self.model = model self.shadow = deepcopy(self.model) + # Detach the shadow model from the computation graph for param in self.shadow.parameters(): param.detach_() @torch.no_grad() def update(self): + """Update the shadow model using the weight of the original model and the decay rate.""" if not self.training: - print( - "EMA update should only be called during training", - file=stderr, - flush=True, - ) - return + raise RuntimeError("EMA update should only be called during training") + # ... get the model and shadow parameters model_params = OrderedDict(self.model.named_parameters()) shadow_params = OrderedDict(self.shadow.named_parameters()) - # check if both model contains the same set of keys + # ... ensure that both models have the same set of keys assert model_params.keys() == shadow_params.keys() for name, param in model_params.items(): - # see https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage + # Update the shadow model with the formula: # shadow_variable -= (1 - decay) * (shadow_variable - variable) + # Reference: https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage if param.requires_grad: shadow_params[name].sub_( (1.0 - self.decay) * (shadow_params[name] - param) ) + # ... and do the same with the buffers (e.g,. objects that are part of the module state but not trainable parameters) model_buffers = OrderedDict(self.model.named_buffers()) shadow_buffers = OrderedDict(self.shadow.named_buffers()) - # check if both model contains the same set of keys assert model_buffers.keys() == shadow_buffers.keys() for name, buffer in model_buffers.items(): - # buffers are copied + # ... copy the buffers from the model to the shadow shadow_buffers[name].copy_(buffer) - # fd A hack to allow non-DDP models to be passed into the Trainer - def no_sync(self): - return contextlib.nullcontext() - def forward(self, *args, **kwargs): + """Dynamic dispatch to the correct model (model or shadow).""" if self.training: return self.model(*args, **kwargs) else: return self.shadow(*args, **kwargs) -def count_parameters(model): +def count_parameters(model: nn.Module) -> int: + """Count the number of trainable parameters in a model.""" return sum(p.numel() for p in model.parameters() if p.requires_grad) diff --git a/rf2aa/training/checkpoint.py b/src/modelhub/training/checkpoint.py similarity index 100% rename from rf2aa/training/checkpoint.py rename to src/modelhub/training/checkpoint.py diff --git a/rf2aa/training/optimizer.py b/src/modelhub/training/optimizer.py similarity index 100% rename from rf2aa/training/optimizer.py rename to src/modelhub/training/optimizer.py diff --git a/rf2aa/training/recycling.py b/src/modelhub/training/recycling.py similarity index 99% rename from rf2aa/training/recycling.py rename to src/modelhub/training/recycling.py index 8e24d28..af56e78 100644 --- a/rf2aa/training/recycling.py +++ b/src/modelhub/training/recycling.py @@ -3,7 +3,7 @@ from contextlib import ExitStack import numpy as np import torch -from rf2aa.chemical import ChemicalData as ChemData +from modelhub.chemical import ChemicalData as ChemData def recycle_step_legacy( diff --git a/rf2aa/training/scheduler.py b/src/modelhub/training/scheduler.py similarity index 100% rename from rf2aa/training/scheduler.py rename to src/modelhub/training/scheduler.py diff --git a/src/modelhub/training/schedulers.py b/src/modelhub/training/schedulers.py new file mode 100755 index 0000000..30f4a89 --- /dev/null +++ b/src/modelhub/training/schedulers.py @@ -0,0 +1,91 @@ +from dataclasses import dataclass + +from torch.optim.lr_scheduler import LRScheduler, _LRScheduler +from torch.optim.optimizer import Optimizer + + +class AF3Scheduler(_LRScheduler): + """Implements a two-phase learning rate schedule a-la AF-3: + 1. The base learning rate is 1.8 · 10^−3, which is linearly increased from 0 over the first 1,000 steps. + 2. The learning rate is then decreased by a factor of 0.95 every 50,000 steps. + + From the AF-3 Supplement, Section 5.4: + > "For training we use the Adam optimizer with parameters β1 = 0.9, β2 = 0.95, ϵ = 10^−8. The base learning rate + is 1.8 · 10^−3, which is linearly increased from 0 over the first 1,000 steps. The learning rate is then decreased + by a factor of 0.95 every 5 · 10^4 steps." + + References: + - AF-3 Supplement + """ + + def __init__( + self, + optimizer: Optimizer, + base_lr: float = 1.8e-3, + warmup_steps: int = 1000, + decay_factor: float = 0.95, + decay_steps: int = 50000, + last_epoch: int = -1, + ) -> None: + """Initializes a new instance of AF3LRScheduler. + + Note that the "last_epoch" value is incremented every time we call `scheduler.step()` + method; we name it "epoch" to follow the PyTorch convention. + + Args: + optimizer (Optimizer): Wrapped optimizer. + base_lr (float): The base learning rate after warmup (which will then be decayed). + warmup_steps (int): Number of steps for linear warmup. + decay_factor (float): Factor by which the learning rate is multiplied every decay_steps. + decay_steps (int): Number of steps between each decay. + last_epoch (int): The index of the last epoch. Default: -1. + """ + self.base_lr = base_lr + self.warmup_steps = warmup_steps + self.decay_factor = decay_factor + self.decay_steps = decay_steps + super(AF3Scheduler, self).__init__(optimizer, last_epoch) + + def get_lr(self) -> list[float]: + if self.last_epoch < self.warmup_steps: + # Linear warmup + return [ + self.base_lr * (self.last_epoch / self.warmup_steps) + for _ in self.optimizer.param_groups + ] + else: + # Decay after warmup + num_decays = (self.last_epoch - self.warmup_steps) // self.decay_steps + return [ + self.base_lr * (self.decay_factor**num_decays) + for _ in self.optimizer.param_groups + ] + + +@dataclass +class SchedulerConfig: + """Flexible configuration for a learning rate scheduler. + + Modeled on the PyTorch Lightning scheduler configuration. + + Attributes: + scheduler (LRScheduler): The learning rate scheduler instance. Must inherit from `torch.optim.lr_scheduler.LRScheduler`. + interval (str): The interval at which to apply the scheduler, typically "epoch" or "step". Defaults to "step". + frequency (int): The frequency of applying the scheduler. For example, a frequency of 1 means the scheduler is applied every epoch. Defaults to 1. + """ + + scheduler: LRScheduler = None + interval: str = "step" + frequency: int = 1 + + def state_dict(self) -> dict: + return { + "scheduler": self.scheduler.state_dict(), + "interval": self.interval, + "frequency": self.frequency, + } + + def load_state_dict(self, state_dict: dict) -> None: + self.scheduler.load_state_dict(state_dict["scheduler"]) + self.interval = state_dict["interval"] + self.frequency = state_dict["frequency"] diff --git a/rf2aa/util.py b/src/modelhub/util.py similarity index 99% rename from rf2aa/util.py rename to src/modelhub/util.py index 03a2cb0..23f896f 100644 --- a/rf2aa/util.py +++ b/src/modelhub/util.py @@ -10,9 +10,9 @@ import torch from openbabel import openbabel from scipy.spatial.transform import Rotation -from rf2aa.chemical import ChemicalData as ChemData -from rf2aa.kinematics import generate_Cbeta, get_atomize_protein_chirals -from rf2aa.scoring import * +from modelhub.chemical import ChemicalData as ChemData +from modelhub.kinematics import generate_Cbeta, get_atomize_protein_chirals +from modelhub.scoring import * def replace_missing_with_nearest_neighbors( diff --git a/rf2aa/util_module.py b/src/modelhub/util_module.py similarity index 99% rename from rf2aa/util_module.py rename to src/modelhub/util_module.py index 56f870f..cf3b010 100644 --- a/rf2aa/util_module.py +++ b/src/modelhub/util_module.py @@ -1,6 +1,5 @@ import copy -import dgl import networkx as nx import numpy as np import torch @@ -315,6 +314,7 @@ def make_full_graph(xyz, pair, idx): Output: - G: defined graph """ + import dgl B, L = xyz.shape[:2] device = xyz.device @@ -342,6 +342,7 @@ def make_topk_graph( Output: - G: defined graph """ + import dgl B, L = xyz.shape[:2] device = xyz.device @@ -476,7 +477,7 @@ def make_rot_axis(angs, u, eps=1e-6): # class XYZConverter(nn.Module): def __init__(self): - from rf2aa.chemical import ChemicalData as ChemData + from modelhub.chemical import ChemicalData as ChemData super(XYZConverter, self).__init__() @@ -500,7 +501,7 @@ class XYZConverter(nn.Module): ) def compute_all_atom(self, seq, xyz, alphas): - from rf2aa.chemical import ChemicalData as ChemData + from modelhub.chemical import ChemicalData as ChemData B, L = xyz.shape[:2] @@ -781,8 +782,8 @@ class XYZConverter(nn.Module): return tors_mask def get_torsions(self, xyz_in, seq, mask_in=None): - from rf2aa.chemical import ChemicalData as ChemData - from rf2aa.chemical import th_ang_v, th_dih + from modelhub.chemical import ChemicalData as ChemData + from modelhub.chemical import th_ang_v, th_dih B, L = xyz_in.shape[:2] diff --git a/src/modelhub/utils/datasets.py b/src/modelhub/utils/datasets.py new file mode 100755 index 0000000..3da5780 --- /dev/null +++ b/src/modelhub/utils/datasets.py @@ -0,0 +1,414 @@ +from beartype.typing import Any + +import hydra +import torch +from datahub.datasets.datasets import ( + ConcatDatasetWithID, + FallbackDatasetWrapper, + get_row_and_index_by_example_id, +) +from datahub.samplers import ( + DistributedMixedSampler, + FallbackSamplerWrapper, + LazyWeightedRandomSampler, + MixedSampler, + LoadBalancedDistributedSampler, +) +from omegaconf import DictConfig, ListConfig, OmegaConf +from modelhub.resolvers import resolve_import +from torch.utils.data import ( + DataLoader, + Dataset, + RandomSampler, + Sampler, + SequentialSampler, + Subset, + WeightedRandomSampler, +) +from torch.utils.data.distributed import DistributedSampler + +from modelhub.utils.ddp import RankedLogger + +ranked_logger = RankedLogger(__name__, rank_zero_only=True) + +# (Custom resolvers) +OmegaConf.register_new_resolver("resolve_import", resolve_import) + + +def wrap_dataset_and_sampler_with_fallbacks( + dataset_to_be_wrapped: Dataset, + sampler_to_be_wrapped: Sampler, + dataset_to_fallback_to: Dataset, + sampler_to_fallback_to: Sampler, + n_fallback_retries: int, +) -> tuple[Dataset, Sampler]: + """Wrap the specified dataset and sampler with fallback dataloading. + + If the provided fallback sampler does not have weights (e.g., a MixedSampler), we will use uniform weights. + + Args: + dataset_to_be_wrapped (Dataset): The main dataset to be wrapped. + sampler_to_be_wrapped (Sampler): The main sampler to be wrapped. + dataset_to_fallback_to (Dataset): The fallback dataset. We will sample from this dataset if the main dataset fails. + sampler_to_fallback_to (Sampler): The fallback sampler. We will sample from this sampler if the main sampler fails. + n_fallback_retries (int): Number of retries for the fallback mechanism before raising an exception. + + Returns: + tuple[Dataset, Sampler]: The wrapped dataset and sampler with fallbacks. + """ + # Instantiate a new fallback sampler to avoid scaling issues + fallback_sampler = LazyWeightedRandomSampler( + weights=sampler_to_fallback_to.weights + if "weights" in sampler_to_fallback_to + else torch.ones(len(dataset_to_fallback_to)), + num_samples=int(1e9), + replacement=True, # replacement for fallback dataloading, so we can draw a huge number of samples + generator=None, + prefetch_buffer_size=4, + ) + + # Wrap the dataset and sampler with fallback mechanisms + wrapped_dataset = FallbackDatasetWrapper( + dataset_to_be_wrapped, fallback_dataset=dataset_to_fallback_to + ) + wrapped_sampler = FallbackSamplerWrapper( + sampler_to_be_wrapped, + fallback_sampler=fallback_sampler, + n_fallback_retries=n_fallback_retries, + ) + + return wrapped_dataset, wrapped_sampler + + +def instantiate_single_dataset_and_sampler(cfg: DictConfig | dict) -> dict[str, Any]: + """Instantiate a dataset and its corresponding sampler from a configuration dictionary. + + Args: + cfg (DictConfig): Configuration dictionary defining the dataset and its parameters. + + Returns: + dict[str, Any]: A dictionary containing the instantiated dataset and sampler. + """ + # ... instantiate the dataset + dataset = hydra.utils.instantiate(cfg.dataset) + + # Users may provide only weights, in which case we will use a WeightedRandomSampler, + # or they may provide a sampler directly + + if "weights" in cfg and "sampler" not in cfg: + # ... instantiate the weights and create a WeightedRandomSampler + weights = hydra.utils.instantiate(cfg.weights, dataset_df=dataset.data) + sampler = WeightedRandomSampler( + weights=weights, + num_samples=len(dataset), + replacement=True, + ) + elif "sampler" in cfg and "weights" not in cfg: + # ... instantiate the sampler with the number of samples + sampler = hydra.utils.instantiate(cfg.sampler) + else: + ranked_logger.warning( + "No weights or sampler provided for dataset, using uniform weights with replacement." + ) + sampler = WeightedRandomSampler( + weights=torch.ones(len(dataset)), + num_samples=len(dataset), + replacement=True, + ) + + return {"dataset": dataset, "sampler": sampler} + + +def recursively_instantiate_datasets_and_samplers( + cfg: DictConfig | dict, name: str | None = None +) -> dict[str, Any]: + """Recursively instantiate datasets and samplers from a configuration dictionary. + + We must handle three cases: + (1) A single "leaf" dataset (e.g., "distillation"), specified with the "dataset" key + (2) Multiple sub-datasets that should be concatenated together with their weights (e.g., "interfaces" and "pn_units"), + specified with the "sub_datasets" key + (3) Multiple "leaf" datasets that should be sampled from with a certain probability (e.g., "distillation" and "pdb"), + + Args: + cfg (DictConfig): Configuration dictionary defining datasets and their parameters. + name (str, optional): The name of the dataset, used for reporting. Defaults to None. + + Returns: + dict[str, Any]: A dictionary containing the instantiated dataset and sampler. + """ + # ------- Base case (1): A single "leaf" dataset -------# + if "dataset" in cfg: + return {**instantiate_single_dataset_and_sampler(cfg), "name": name} + + # ------- Recursive case (2): Multiple sub-datasets that must be concatenated together -------# + elif "sub_datasets" in cfg: + # ... create a list of dictionaries for each sub-dataset + datasets_info = [] + for sub_dataset_name, sub_dataset_cfg in cfg.sub_datasets.items(): + datasets_info.append( + recursively_instantiate_datasets_and_samplers( + sub_dataset_cfg, name=sub_dataset_name + ) + ) + + # ... concatenate sub-datasets and weights (e.g., "interfaces" and "pn_units" into one ConcatDataset) + # NOTE: Order of the weights must match the order of the datasets! + concatenated_dataset = ConcatDatasetWithID( + datasets=[info["dataset"] for info in datasets_info] + ) + concatenated_weights = torch.cat( + [info["sampler"].weights for info in datasets_info] + ) + sampler = WeightedRandomSampler( + weights=concatenated_weights, + num_samples=len(concatenated_dataset), + replacement=True, + ) + + return {"dataset": concatenated_dataset, "sampler": sampler, "name": name} + + # ------- Recursive case (3): Multiple datasets that must be sampled from with specified probabilities -------# + else: + datasets_info = [] + for nested_dataset_name, nested_dataset_cfg in cfg.items(): + if nested_dataset_cfg is None: + # (Skip any None training datasets; e.g., those overrode by the experiment config) + continue + + # (To use a MixedSampler, we must provide a "probability" key for each dataset) + assert ( + "probability" in nested_dataset_cfg + ), "Expected 'probability' key in dataset configuration" + datasets_info.append( + { + **recursively_instantiate_datasets_and_samplers( + nested_dataset_cfg, name=nested_dataset_name + ), + "probability": nested_dataset_cfg["probability"], + } + ) + + # ... check that the sum of probabilities of all datasets is 1 + assert ( + sum(dataset_info["probability"] for dataset_info in datasets_info) == 1.0 + ), "Sum of probabilities must be 1.0" + + # ... compose the list of datasets into a single dataset + composed_train_dataset = ConcatDatasetWithID( + datasets=[dataset["dataset"] for dataset in datasets_info] + ) + + composed_train_sampler = MixedSampler(datasets_info=datasets_info, shuffle=True) + + return { + "dataset": composed_train_dataset, + "sampler": composed_train_sampler, + "name": name, + } + + +def assemble_distributed_loader( + dataset: Dataset, + sampler: Sampler, + rank: int | None = None, + world_size: int | None = None, + n_examples_per_epoch: int | None = None, + loader_cfg: DictConfig | dict | None = None, + shuffle: bool = True, + drop_last: bool = False, +) -> DataLoader: + """Assembles a distributed DataLoader for training or validation. + + Performs the following steps: + (1) If not already a distributed sampler, wraps the sampler with a DistributedSampler or DistributedMixedSampler + (2) Wraps the dataset and sampler with a fallback mechanism, if needed + (3) Assembles the final DataLoader + + Args: + dataset (Dataset): The dataset to be used for training or validation. + sampler (Sampler): The sampler to be used for training or validation. May already be distributed. + rank (int): The rank of the current process in distributed training. + world_size (int): The total number of processes participating in the distributed training. + n_examples_per_epoch (int): The number of examples to sample per epoch, across all GPUs. + For example, if we have 8 GPUs, with 2 gradient accumulation steps and 10 optimizer + steps per epoch, we would sample 160 examples per epoch (8 * 2 * 10). + loader_cfg (DictConfig or dict, optional): Additional configuration parameters for the + DataLoader, such as `batch_size` and `num_workers`. Defaults to an empty dictionary. + shuffle (bool, optional): Whether to shuffle the dataset. Defaults to True. + drop_last (bool, optional): Whether to drop the last incomplete batch if the dataset size + is not divisible by the number of GPUs. Defaults to False. + + Returns: + DataLoader: A PyTorch DataLoader configured for distributed training, with datasets + concatenated and sampled according to their defined probabilities. + """ + if not loader_cfg: + loader_cfg = {} + + if isinstance(sampler, MixedSampler): + # (If given a MixedSampler, we must convert to a DistributedMixedSampler) + assert ( + rank is not None + and world_size is not None + and n_examples_per_epoch is not None + ), "Rank, world_size, and n_examples_per_epoch must be provided for MixedSampler" + sampler = DistributedMixedSampler( + datasets_info=sampler.datasets_info, + num_replicas=world_size, + rank=rank, + n_examples_per_epoch=n_examples_per_epoch, + shuffle=shuffle, + drop_last=drop_last, + ) + elif isinstance(sampler, (RandomSampler, SequentialSampler)): + # (If given a RandomSampler or SequentialSampler, we must convert to a DistributedSampler) + assert ( + rank is not None and world_size is not None + ), "Rank and world_size must be provided for RandomSampler or SequentialSampler" + sampler = DistributedSampler( + dataset=dataset, + num_replicas=world_size, + rank=rank, + shuffle=shuffle, + drop_last=drop_last, + ) + else: + # (We assume we are already given a DistributedSampler or DistributedMixedSampler) + assert ( + rank is None and world_size is None + ), "Rank and world_size will have no effect on the provided sampler and should be None" + assert isinstance( + sampler, (DistributedSampler, DistributedMixedSampler) + ), "Invalid sampler type for distributed training." + + # ... wrap the composed dataset and sampler with a fallback mechanism, if needed + if "n_fallback_retries" in loader_cfg and loader_cfg.n_fallback_retries > 0: + ranked_logger.info( + f"Wrapping train dataset and sampler with {loader_cfg.n_fallback_retries} fallbacks..." + ) + dataset, sampler = wrap_dataset_and_sampler_with_fallbacks( + dataset_to_be_wrapped=dataset, + sampler_to_be_wrapped=sampler, + dataset_to_fallback_to=dataset, + sampler_to_fallback_to=sampler, + n_fallback_retries=loader_cfg.n_fallback_retries, + ) + + # ... assemble the final loader + loader = DataLoader( + dataset=dataset, + sampler=sampler, + collate_fn=lambda x: x, # No collation + **loader_cfg.dataloader_params if "dataloader_params" in loader_cfg else {}, + ) + + return loader + + +def subset_dataset_to_example_ids( + dataset: Dataset, + example_ids: list[str] | ListConfig, +) -> Dataset: + """Subset a dataset to a specific set of example IDs.""" + indices = [] + for example_id in example_ids: + index = get_row_and_index_by_example_id(dataset, example_id)["index"] + indices.append(index) + + return Subset(dataset, indices) + + +def assemble_val_loader_dict( + cfg: DictConfig, + rank: int = 0, + world_size: int = 1, + loader_cfg: DictConfig | dict | None = None, +) -> dict[str, DataLoader]: + """Assemble a dictionary of validation loaders for multiple datasets. + + If a key is provided to balance the dataset, we will use a LoadBalancedDistributedSampler + rather than a DistributedSampler to maintain a balanced example load across processes + (i.e., avoid a situation where one GPU is allocated all small examples and another all large examples). + + Args: + cfg (DictConfig): Configuration dictionary defining the validation datasets. Each key should correspond to a dataset name. + rank (int, optional): The rank of the current process in distributed training. Defaults to 0. + world_size (int, optional): The total number of processes participating in the distributed training. Defaults to 1. + loader_cfg (DictConfig, optional): Additional configuration parameters for the DataLoader, such as `batch_size` and `num_workers`. Defaults to None. + """ + # ... loop through the validation datasets and create a DataLoader for each, preserving the dataset name + val_loaders = {} + for val_dataset_name, val_dataset in cfg.items(): + if not val_dataset: + # (Skip any None validation datasets; e.g., those overrode by the experiment config) + continue + + assert ( + "dataset" in val_dataset + ), f"Expected 'dataset' key in validation dataset config for {val_dataset_name}" + dataset = hydra.utils.instantiate( + val_dataset.dataset + ) # directly instantiate the dataset + + if "key_to_balance" in val_dataset and val_dataset.key_to_balance: + # (If a key is provided to balance the dataset, we will use a LoadBalancedDistributedSampler) + key_to_balance = val_dataset.key_to_balance + ranked_logger.info(f"Balancing dataset with key: {key_to_balance}") + + assert ( + key_to_balance in dataset.data.columns + ), f"Key {key_to_balance} not found in dataset columns!" + + sampler = LoadBalancedDistributedSampler( + dataset=dataset, + num_replicas=world_size, + rank=rank, + key_to_balance=key_to_balance, + ) + else: + # (Otherwise, we will use a DistributedSampler, without regard to sample size) + sampler = DistributedSampler( + dataset, + num_replicas=world_size, + rank=rank, + shuffle=False, + drop_last=False, + ) + + val_loader = assemble_distributed_loader( + dataset=dataset, + sampler=sampler, + loader_cfg=loader_cfg, + ) + + val_loaders[val_dataset_name] = val_loader + + return val_loaders + + +def assemble_distributed_inference_loader_from_list_of_paths( + paths: list[str], rank: int, world_size: int +) -> DataLoader: + """Assemble a distributed inference DataLoader from a list of file paths.""" + dataset = FilePathDataset(paths) + sampler = SequentialSampler(dataset) + return assemble_distributed_loader( + dataset=dataset, + sampler=sampler, + rank=rank, + world_size=world_size, + ) + + +class FilePathDataset(Dataset): + """Lightweight dataset wrapper for file paths""" + + def __init__(self, files): + self.files = files + + def __len__(self): + return len(self.files) + + def __getitem__(self, idx): + return self.files[idx] diff --git a/src/modelhub/utils/ddp.py b/src/modelhub/utils/ddp.py new file mode 100644 index 0000000..7a5b5e4 --- /dev/null +++ b/src/modelhub/utils/ddp.py @@ -0,0 +1,102 @@ +import logging +from beartype.typing import Any + +from lightning_fabric.utilities import rank_zero_only +from lightning_utilities.core.rank_zero import rank_prefixed_message +from omegaconf import DictConfig +import torch + +logger = logging.getLogger(__name__) + + +def get_current_rank() -> int: + """Returns the rank of the current process.""" + return getattr(rank_zero_only, "rank", None) + + +def is_rank_zero() -> bool: + """Returns whether the current process is rank zero.""" + return get_current_rank() == 0 + + +def set_accelerator_based_on_availability(cfg: dict | DictConfig): + """Set training accelerator to CPU if no GPUs are available. + + Args: + cfg: Hydra object with trainer settings "accelerator", "devices_per_node", and "num_nodes". + + Returns: + None; modifies the input `cfg` object in place. + """ + if not torch.cuda.is_available(): + logger.error( + "No GPUs available - Setting accelerator to 'cpu'. Are you sure you are using the correct configs?" + ) + assert "trainer" in cfg, "Configuration object must have a 'trainer' key." + for key in ["accelerator", "devices_per_node", "num_nodes"]: + assert ( + key in cfg.trainer + ), f"Configuration object must have a 'trainer.{key}' key." + + # Override accelerator settings + cfg.trainer.accelerator = "cpu" + cfg.trainer.devices_per_node = 1 + cfg.trainer.num_nodes = 1 + else: + cfg.trainer.accelerator = "gpu" + + +class RankedLogger(logging.LoggerAdapter): + """A multi-GPU-friendly python command line logger. + + Modified from https://github.com/ashleve/lightning-hydra-template/blob/main/src/utils/pylogger.py + """ + + def __init__( + self, + name: str = __name__, + rank_zero_only: bool = False, + extra: Any | None = None, + ) -> None: + """Initializes a multi-GPU-friendly python command line logger that logs on all processes + with their rank prefixed in the log message. + + :param name: The name of the logger. Default is ``__name__``. + :param rank_zero_only: Whether to force all logs to only occur on the rank zero process. Default is `False`. + :param extra: (Optional) A dict-like object which provides contextual information. See `logging.LoggerAdapter`. + """ + logger = logging.getLogger(name) + super().__init__(logger=logger, extra=extra) + self.rank_zero_only = rank_zero_only + + def log( + self, level: int, msg: str, rank: int | None = None, *args, **kwargs + ) -> None: + """ + Delegate a log call to the underlying logger, after prefixing its message with the rank + of the process it's being logged from. If `'rank'` is provided, then the log will only + occur on that rank/process. + + Args: + level (int): The level to log at. Look at `logging.__init__.py` for more information. + msg (str): The message to log. + rank (Optional[int]): The rank to log at. + args: Additional args to pass to the underlying logging function. + kwargs: Any additional keyword args to pass to the underlying logging function. + """ + if self.isEnabledFor(level): + msg, kwargs = self.process(msg, kwargs) + current_rank = getattr(rank_zero_only, "rank", None) + if current_rank is None: + raise RuntimeError( + "The `rank_zero_only.rank` needs to be set before use" + ) + msg = rank_prefixed_message(msg, current_rank) + if self.rank_zero_only: + if current_rank == 0: + self.logger.log(level, msg, *args, **kwargs) + else: + if rank is None: + self.logger.log(level, msg, *args, **kwargs) + elif current_rank == rank: + self.logger.log(level, msg, *args, **kwargs) diff --git a/src/modelhub/utils/frames.py b/src/modelhub/utils/frames.py new file mode 100644 index 0000000..16a4add --- /dev/null +++ b/src/modelhub/utils/frames.py @@ -0,0 +1,109 @@ +# TODO: REFACTOR; COPIED FROM RF2AA. WE NEED TO REMOVE CHEMDATA, ADD DOCSTRINGS, EXAMPLES, HOPEFULLY TESTS, AND CLEAN UP + +import torch +from modelhub.chemical import ChemicalData as ChemData + + +def is_atom(seq): + return seq > ChemData().NNAPROTAAS + + +def get_frames(xyz_in, xyz_mask, seq, frame_indices, atom_frames=None): + # B,L,natoms = xyz_in.shape[:3] + frames = frame_indices[seq] + atoms = is_atom(seq) + if torch.any(atoms): + frames[:, atoms[0].nonzero().flatten(), 0] = atom_frames + + frame_mask = ~torch.all(frames[..., 0, :] == frames[..., 1, :], axis=-1) + + # frame_mask *= torch.all( + # torch.gather(xyz_mask,2,frames.reshape(B,L,-1)).reshape(B,L,-1,3), + # axis=-1) + + return frames, frame_mask + + +# build a frame from 3 points +# fd - more complicated version splits angle deviations between CA-N and CA-C (giving more accurate CB position) +# fd - makes no assumptions about input dims (other than last 1 is xyz) +def rigid_from_3_points(N, Ca, C, is_na=None, eps=1e-4): + dims = N.shape[:-1] + + v1 = C - Ca + v2 = N - Ca + e1 = v1 / (torch.norm(v1, dim=-1, keepdim=True) + eps) + u2 = v2 - (torch.einsum("...li, ...li -> ...l", e1, v2)[..., None] * e1) + e2 = u2 / (torch.norm(u2, dim=-1, keepdim=True) + eps) + e3 = torch.cross(e1, e2, dim=-1) + R = torch.cat( + [e1[..., None], e2[..., None], e3[..., None]], axis=-1 + ) # [B,L,3,3] - rotation matrix + + v2 = v2 / (torch.norm(v2, dim=-1, keepdim=True) + eps) + cosref = torch.sum(e1 * v2, dim=-1) + + costgt = torch.full(dims, -0.3616, device=N.device) + if is_na is not None: + costgt[is_na] = ChemData().costgtNA + + cos2del = torch.clamp( + cosref * costgt + + torch.sqrt((1 - cosref * cosref) * (1 - costgt * costgt) + eps), + min=-1.0, + max=1.0, + ) + + cosdel = torch.sqrt(0.5 * (1 + cos2del) + eps) + + sindel = torch.sign(costgt - cosref) * torch.sqrt(1 - 0.5 * (1 + cos2del) + eps) + + Rp = torch.eye(3, device=N.device).repeat(*dims, 1, 1) + Rp[..., 0, 0] = cosdel + Rp[..., 0, 1] = -sindel + Rp[..., 1, 0] = sindel + Rp[..., 1, 1] = cosdel + R = torch.einsum("...ij,...jk->...ik", R, Rp) + + return R, Ca + + +def mask_unresolved_frames_batched(frames, frame_mask, atom_mask): + """ + reindex frames tensor from relative indices to absolute indices and masks out frames with atoms that are unresolved + in the structure + Input: + - frames: relative indices for frames (B, L, nframes, 3) + - frame_mask: mask for which frames are valid to compute FAPE/losses (B, L, nframes) + - atom_mask: mask for seen coordinates (B, L, natoms) + Output: + - frames_reindex: absolute indices for frames + - frame_mask_update: updated frame mask with frames with unresolved atoms removed + """ + B, L, natoms = atom_mask.shape + + # reindex frames for flat X + frames_reindex = ( + torch.arange(L, device=frames.device)[None, :, None, None] + frames[..., 0] + ) * natoms + frames[..., 1] + + masked_atom_frames = torch.any( + frames_reindex > L * natoms, dim=-1 + ) # find frames with atoms that aren't resolved + masked_atom_frames *= torch.any(frames_reindex < 0, dim=-1) + # There are currently indices for frames that aren't in the coordinates bc they arent resolved, reset these indices to 0 to avoid + # indexing errors + frames_reindex[masked_atom_frames, :] = 0 + + frame_mask_update = frame_mask.clone() + frame_mask_update *= ~masked_atom_frames + frame_mask_update *= torch.all( + torch.gather( + atom_mask.reshape(B, L * natoms), + 1, + frames_reindex.reshape(B, L * ChemData().NFRAMES * 3), + ).reshape(B, L, -1, 3), + axis=-1, + ) + + return frames_reindex, frame_mask_update diff --git a/src/modelhub/utils/inference.py b/src/modelhub/utils/inference.py new file mode 100644 index 0000000..1c126d5 --- /dev/null +++ b/src/modelhub/utils/inference.py @@ -0,0 +1,128 @@ +from cifutils.tools.inference import ( + build_msa_paths_by_chain_id_from_component_list, + components_to_atom_array, +) +from cifutils.utils.io_utils import to_cif_file +from modelhub.utils.io import find_files_with_extension, create_example_id_extractor, DICTIONARY_LIKE_EXTENSIONS, CIF_LIKE_EXTENSIONS + +import json +import pickle +from os import PathLike +from pathlib import Path + +def _spoof_cif_from_dictionary(item: dict, temp_dir: PathLike) -> Path: + """Unpacks a dictionary to create a CIF file from its components. + + Args: + item (dict): A dictionary containing 'name' and 'components', optionally 'bonds'. + temp_dir (Path): Path to the temporary directory for storing CIF files. + + Returns: + Path: The path to the created CIF file, saved in the temporary directory. + + Raises: + NotImplementedError: If 'bonds' is present in the dictionary. + ValueError: If 'name' or 'components' are missing from the dictionary. + """ + # Validate the dictionary structure ("name" and "components" are required, "bonds" is optional) + assert ( + "name" in item and "components" in item + ), "The input dictionary must contain 'name' and 'components' keys." + + # Build components + atom_array, component_list = components_to_atom_array( + item["components"], return_components=True, bonds=item.get("bonds", None) + ) + msa_paths_by_chain_id = build_msa_paths_by_chain_id_from_component_list( + component_list + ) + + # Create a temporary CIF file from the JSON data + cif_path = Path(temp_dir) / f"{item['name']}.cif" + save_path = to_cif_file( + atom_array, + cif_path, + extra_categories={"msa_paths_by_chain_id": msa_paths_by_chain_id} + if msa_paths_by_chain_id + else None, + file_type="cif", # Not zipped for efficiency (as it's a temporary directory anyways) + ) + + return Path(save_path) + + +def build_file_paths_for_prediction( + input: PathLike | list[PathLike], + temp_dir: PathLike, + existing_outputs_dir: PathLike | None = None +) -> list[Path]: + """Prepare files for prediction based on the input paths. + + Input path may be dictionary-like format (e.g., JSON, YAML, Pickle), CIF/PDB files, or a directory containing these files. + Processes directories to find supported file types and converts dictionary-like formats to CIF files. + + Args: + input (PathLike): Input paths (JSON, YAML, Pickle, or CIF/PDB) or a directory containing these files. + temp_dir (Path): Path to the temporary directory for storing CIF files. + existing_outputs_dir(Path): Directory for existing outputs (optional). If provided, we not predict files with matching example_ids. + + Returns: + list[Path]: List of file paths for prediction. + """ + # Collect all files from inputs, handling directories, individual files, and lists of directories/files + input_paths = [input] if not isinstance(input, list) else input + + example_id_extractor = create_example_id_extractor(CIF_LIKE_EXTENSIONS) + + existing_example_ids = None + if existing_outputs_dir: + existing_example_ids = set( + example_id_extractor(path) for path in find_files_with_extension(existing_outputs_dir, CIF_LIKE_EXTENSIONS) + ) + + paths_to_raw_input_files = [] + for _path in input_paths: + if Path(_path).is_dir(): + paths_to_raw_input_files.extend( + find_files_with_extension( + _path, DICTIONARY_LIKE_EXTENSIONS | CIF_LIKE_EXTENSIONS + ) + ) + else: + paths_to_raw_input_files.append(Path(_path)) + + paths_to_cif_like_files = [] + for _path in paths_to_raw_input_files: + if _path.name.endswith(tuple(DICTIONARY_LIKE_EXTENSIONS)): + # Spoof CIF files from dictionary-like formats + with open(_path, "rb" if _path.suffix == ".pkl" else "r") as file: + # Load data based on file extension + if _path.suffix == ".json": + data = json.load(file) + elif _path.suffix in {".yaml", ".yml"}: + raise NotImplementedError("YAML files are not yet supported.") + elif _path.suffix == ".pkl": + data = pickle.load(file) + + if isinstance(data, dict): + data = [ + data + ] # Convert single dictionary to list for uniform processing + + for item in data: + paths_to_cif_like_files.append( + _spoof_cif_from_dictionary(item, temp_dir) + ) + elif _path.name.endswith(tuple(CIF_LIKE_EXTENSIONS)): + # Directly use CIF-like files + paths_to_cif_like_files.append(_path) + else: + raise ValueError( + f"Unsupported file extension: {_path.suffix} (path: {_path}; paths: {paths_to_raw_input_files})." + ) + + # Filter out existing example_ids if provided + if existing_example_ids: + paths_to_cif_like_files= [path for path in paths_to_cif_like_files if example_id_extractor(path) not in existing_example_ids] + + return paths_to_cif_like_files diff --git a/src/modelhub/utils/instantiators.py b/src/modelhub/utils/instantiators.py new file mode 100755 index 0000000..836bc85 --- /dev/null +++ b/src/modelhub/utils/instantiators.py @@ -0,0 +1,72 @@ +import hydra +from lightning.fabric.loggers import Logger +from omegaconf import DictConfig + +from modelhub.callbacks.base import BaseCallback + + +def _can_be_instantiated(cfg: DictConfig) -> bool: + """Checks if a config can be instantiated.""" + return isinstance(cfg, DictConfig) and "_target_" in cfg + + +class InstantiationError(ValueError): + """Raised when a config cannot be instantiated.""" + + pass + + +def instantiate_callbacks(callbacks_cfg: DictConfig | None) -> list[BaseCallback]: + """Instantiates callbacks from config. + + Args: + callbacks_cfg: A DictConfig object containing callback configurations. + + Returns: + A list of instantiated callbacks. + + Reference: + - Lightning Hydra Template (https://github.com/ashleve/lightning-hydra-template/blob/main/src/utils/instantiators.py#L36) + """ + callbacks: list[BaseCallback] = [] + + if not callbacks_cfg: + return callbacks + + for _, cb_conf in callbacks_cfg.items(): + if _can_be_instantiated(cb_conf): + callbacks.append(hydra.utils.instantiate(cb_conf)) + else: + raise InstantiationError( + f"Skipping callback <{cb_conf}> - Not a DictConfig with `_target_` key! Please provide a valid `_target_` for instantiation." + ) + + return callbacks + + +def instantiate_loggers(logger_cfg: DictConfig | None) -> list[Logger]: + """Instantiates loggers from config. + + Args: + logger_cfg: A DictConfig object containing logger configurations. + + Return: + A list of instantiated loggers. + + Reference: + - Lightning Hydra Template (https://github.com/ashleve/lightning-hydra-template/blob/main/src/utils/instantiators.py#L36) + """ + loggers: list[Logger] = [] + + if not logger_cfg: + return loggers + + for _, lg_conf in logger_cfg.items(): + if _can_be_instantiated(lg_conf): + loggers.append(hydra.utils.instantiate(lg_conf)) + else: + raise InstantiationError( + f"Skipping logger <{lg_conf}> - Not a DictConfig with `_target_` key! Please provide a valid `_target_` for instantiation." + ) + + return loggers diff --git a/src/modelhub/utils/io.py b/src/modelhub/utils/io.py new file mode 100644 index 0000000..a039419 --- /dev/null +++ b/src/modelhub/utils/io.py @@ -0,0 +1,190 @@ +from os import PathLike +import numpy as np +from biotite.structure import stack, AtomArray, AtomArrayStack +import torch +from pathlib import Path +from typing import Literal +from cifutils.utils.io_utils import to_cif_file +from biotite.structure import AtomArray +from modelhub.alignment import weighted_rigid_align +import re + +DICTIONARY_LIKE_EXTENSIONS = {".json", ".yaml", ".yml", ".pkl"} +CIF_LIKE_EXTENSIONS = {".cif", ".pdb", ".bcif", ".cif.gz", ".pdb.gz", ".bcif.gz"} + + +def build_stack_from_atom_array_and_batched_coords( + coords: np.ndarray | torch.Tensor, + atom_array: AtomArray, +) -> AtomArrayStack: + """Builds an AtomArrayStack from an AtomArray and a set of coordinates with a batch dimension. + + Additionally, handles the case where the AtomArray contains multiple transformations and we must adjust the chain_id. + + Args: + coords (np.array): The coordinates to be assigned to the AtomArrayStack. Must have shape (nbatch, n_atoms, 3). + atom_array (AtomArray): The AtomArray to be stacked. Must have shape (n_atoms,) + """ + if isinstance(coords, torch.Tensor): + coords = coords.cpu().numpy() + + # (Diffusion batch size will become the number of models) + n_batch = coords.shape[0] + + # Build the stack and assign the coordinates + atom_array_stack = stack([atom_array for _ in range(n_batch)]) + atom_array_stack.coord = coords + + # Adjust chain_id if there are multiple transformations + # (Otherwise, we will have ambiguous bond annotations, since only `chain_id` is used for the bond annotations) + if ( + "transformation_id" in atom_array.get_annotation_categories() + and len(np.unique(atom_array_stack.transformation_id)) > 1 + ): + new_chain_ids = np.char.add( + atom_array_stack.chain_id, atom_array_stack.transformation_id + ) + atom_array_stack.set_annotation("chain_id", new_chain_ids) + + return atom_array_stack + + +def dump_structures( + atom_arrays: AtomArrayStack | list[AtomArray] | AtomArray, + base_path: PathLike, + one_model_per_file: bool, + extra_fields: list[str] | Literal["all"] = [], +) -> None: + """Dump structures to CIF files, given the coordinates and input AtomArray. + + Args: + atom_arrays (AtomArrayStack | list[AtomArray] | AtomArray): Either an AtomArrayStack, a list of AtomArray objects, + or a single AtomArray object to be dumped to CIF file(s) + base_path (PathLike): Base path where the output files will be saved. + one_model_per_file (bool): Flag to determine if each model should be dumped into a separate file. Has no effect if + `atom_arrays` is a list of AtomArrays. + extra_fields (list[str] | Literal["all"]): List of extra fields to include in the CIF file. + """ + base_path = Path(base_path) + + if one_model_per_file: + assert isinstance(atom_arrays, AtomArrayStack) or isinstance( + atom_arrays, list + ), "AtomArrayStack or list of AtomArray required when one_model_per_file is True" + # One model per file —> loop over the diffusion batch + for i in range(len(atom_arrays)): + path = f"{base_path}_model_{i}" + to_cif_file( + atom_arrays[i], path, file_type="cif.gz", include_entity_poly=False, extra_fields=extra_fields + ) + else: + # Include all models in a single CIF file + to_cif_file( + atom_arrays, base_path, file_type="cif.gz", include_entity_poly=False, extra_fields=extra_fields + ) + + +def dump_trajectories( + trajectory_list: list[torch.Tensor | np.ndarray], + atom_array: AtomArray, + base_path: Path, + align_structures: bool = True, +) -> None: + """Write denoising trajectories to CIF files. + + Args: + trajectory_list (List[torch.Tensor]): List of tensors of length n_steps representing the diffusion trajectory at each step. + Each tensor has shape [D, L, 3], where D is the diffusion batch size and L is the number of atoms. + atom_array (np.ndarray): Atom array corresponding to the coordinates. + base_path (Path): Base path where the output files will be saved. + align_structures (bool): Flag to determine if the structures should be aligned on the final prediction. + If False, each step may have a different alignment. + """ + n_steps = len(trajectory_list) + + if align_structures: + # ... align the trajectories on the last prediction + w_L = torch.ones(*trajectory_list[0].shape[:2]).to(trajectory_list[0].device) + X_exists_L = torch.ones(trajectory_list[0].shape[1], dtype=torch.bool).to( + trajectory_list[0].device + ) + for step in range(n_steps - 1): + trajectory_list[step] = weighted_rigid_align( + X_L=trajectory_list[-1], + X_gt_L=trajectory_list[step], + X_exists_L=X_exists_L, + w_L=w_L, + ) + + # ... invert the list, to make the trajectory compatible with PyMol (which builds the bond graph from the first frame) + trajectory_list = trajectory_list[::-1] + + # ... iterate over the range of D (diffusion batch size; e.g., 5 during validation) + # (We want to convert `aligned_trajectory_list` to a list of length D where each item is a tensor of shape [n_steps, L, 3]) + trajectories_split_by_model = [] + for d in range(trajectory_list[0].shape[0]): + trajectory_for_single_model = torch.stack( + [trajectory_list[step][d] for step in range(n_steps)], dim=0 + ) + trajectories_split_by_model.append(trajectory_for_single_model) + + # ... write the trajectories to CIF files, named by epoch, dataset, example_id, and model index (within the diffusion batch) + for i, trajectory in enumerate(trajectories_split_by_model): + if isinstance(trajectory, torch.Tensor): + trajectory = trajectory.cpu().numpy() + atom_array_stack = build_stack_from_atom_array_and_batched_coords( + trajectory, atom_array + ) + + path = f"{base_path}_model_{i}" + to_cif_file( + atom_array_stack, path, file_type="cif.gz", include_entity_poly=False + ) + + +def find_files_with_extension(path: PathLike, supported_file_types: list) -> list[Path]: + """Recursively find all files with the given extensions in the specified path. + + Args: + path (PathLike): Path to the directory containing the files. + supported_file_types (list): List of supported file extensions. + + Returns: + list[Path]: List of files with the given extensions. + """ + files_with_supported_types = [] + path = Path(path) + + # Check if the path is a directory + if path.is_dir(): + # Search for files with each supported extension + for file_type in supported_file_types: + files_with_supported_types.extend(path.glob(f"*{file_type}")) + elif path.is_file() and path.suffix in supported_file_types: + # If it's a file and has a supported extension, add to the list + files_with_supported_types.append(path) + + return files_with_supported_types + +def create_example_id_extractor(extensions: set | list = CIF_LIKE_EXTENSIONS) -> str: + """Create a function with closure that extracts example_ids from file paths with specified extensions. + + Example: + >>> extractor = create_example_id_extractor({".cif", ".cif.gz"}) + >>> extractor("example.path.example_id.cif.gz") + 'example_id' + """ + pattern = re.compile('(' + '|'.join(re.escape(ext) + '$' for ext in extensions) + ')') + + def extract_id(file_path: PathLike) -> str: + """Extract example_id from file path.""" + # Remove extension and get last part after splitting by dots + without_ext = pattern.sub('', Path(file_path).name) + return without_ext.split('.')[-1] + + return extract_id + +def extract_example_id_from_path(file_path: PathLike, extensions: set | list) -> str: + """Extract example_id from file path with specified extensions.""" + extractor = create_example_id_extractor(extensions) + return extractor(file_path) \ No newline at end of file diff --git a/src/modelhub/utils/logging.py b/src/modelhub/utils/logging.py new file mode 100755 index 0000000..25daab9 --- /dev/null +++ b/src/modelhub/utils/logging.py @@ -0,0 +1,170 @@ +from beartype.typing import Any + +from lightning_fabric.utilities import rank_zero_only +from omegaconf import DictConfig, OmegaConf +from rich.console import Console +from rich.syntax import Syntax +from rich.table import Table +from rich.tree import Tree +from torch import nn +from wandb.integration.lightning.fabric import WandbLogger + +from modelhub.utils.ddp import RankedLogger +import pandas as pd + +ranked_logger = RankedLogger(__name__, rank_zero_only=True) + + +@rank_zero_only +def print_config_tree( + cfg: DictConfig, + resolve: bool = False, + console_width: int = 100, + title: str = "CONFIG", +) -> None: + """Prints the contents of a DictConfig as a tree structure using the Rich library. + + Args: + cfg (DictConfig): A DictConfig composed by Hydra. + resolve (bool): Whether to resolve reference fields of DictConfig. Default is False. + console_width (int): The width of the console for printing. Default is 100. + """ + console = Console(width=console_width) + style = "dim" + tree = Tree(title, style=style, guide_style=style) + + # Generate config tree in natural order + for field in cfg: + branch = tree.add(field, style=style, guide_style=style) + + config_group = cfg[field] + if isinstance(config_group, DictConfig): + branch_content = OmegaConf.to_yaml(config_group, resolve=resolve) + else: + branch_content = str(config_group) + + branch.add(Syntax(branch_content, "yaml", word_wrap=True)) + + # Print config tree using Rich's Console + # (This call happens before instantiating other loggers, so we don't try to capture the output) + console.print(tree) + + +@rank_zero_only +def print_model_parameters(model: nn.Module, name: str = "") -> None: + """Prints the total and trainable parameters of a PyTorch model. + + Args: + model (nn.Module): The PyTorch model to analyze. + """ + total_params = sum(p.numel() for p in model.parameters()) + trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) + data = { + "Type": ["Total Parameters", "Trainable Parameters"], + "Count": [total_params, trainable_params], + } + + title = f"Model Parameters: {name}" if name else "Model Parameters" + print_df_as_table(pd.DataFrame(data), title=title) + + +def log_hyperparameters_with_all_loggers( + trainer: Any, cfg: dict | DictConfig, model: Any +): + """Logs hyperparameters using all loggers in the trainer. + + Args: + trainer: The training object containing loggers. + cfg: Configuration dictionary containing hyperparameters. + model: The model to be tracked by loggers like WandbLogger. + """ + # If given a DictConfig, convert it to a dictionary + if isinstance(cfg, DictConfig): + cfg = OmegaConf.to_container(cfg, resolve=True) + + for logger in trainer.fabric.loggers: + # ...log hyperparameters to each Fabric logger + # For Abstract Base Class of Fabric `Loggers`, see: https://lightning.ai/docs/fabric/stable/_modules/lightning/fabric/loggers/logger.html#Logger + assert hasattr( + logger, "log_hyperparams" + ), f"Logger {logger} does not have a `log_hyperparams` method. Ensure that the logger is a subclass of Fabric's ABC `Logger`." + try: + logger.log_hyperparams(cfg) + except NotImplementedError: + pass + + # ... if the logger is a WandbLogger, `watch` the model so that we can track gradients, utilization, etc. + # (NOTE: W&B ensures only rank 0 watches the model internally) + # See: https://docs.wandb.ai/ref/python/watch/ + if isinstance(logger, WandbLogger): + logger.watch(model) + + +def condense_count_columns_of_grouped_df(df: pd.DataFrame) -> pd.DataFrame: + """Returns modified DF with single Count column if valid, otherwise original DF. + + Helpful to avoid repeating count columns in a DataFrame with multi-level columns. + """ + if not isinstance(df.columns, pd.MultiIndex): + return df + + try: + # Validate count structure + count_cols = df.xs("count", level=1, axis=1) + mean_cols = df.xs("mean", level=1, axis=1) + + # Check count consistency per row and column existence + if not (count_cols.nunique(axis=1) == 1).all(): + return df + + # Build condensed dataframe + condensed_df = mean_cols.rename(columns=lambda c: f"{c} (mean)") + condensed_df["Count"] = count_cols.iloc[:, 0].astype(int) + return condensed_df + + except (KeyError, IndexError): + return df + + +def table_from_df(df: pd.DataFrame, title: str) -> Table: + """Create a Rich Table from a DataFrame.""" + table = Table(title=title, show_header=True, header_style="bold cyan") + + # Add columns to the table + for col in df.columns: + table.add_column(col, justify="right", style="magenta", overflow="fold") + + # Iterate through DataFrame rows and add them to the table + for _, row in df.iterrows(): + row_cells = [] + + for col in df.columns: + cell_value = row[col] + + # Determine formatting based on data type + if pd.api.types.is_integer_dtype(df[col]): + formatted_value = f"{int(cell_value):,}" + elif pd.api.types.is_float_dtype(df[col]): + formatted_value = f"{float(cell_value):,.4f}" + else: + formatted_value = str(cell_value) + + row_cells.append(formatted_value) + + table.add_row(*row_cells) + + return table + +def safe_print(obj: Any, console_width = 100) -> None: + """Print a Rich object in a console- and logger-safe manner.""" + console = Console(force_terminal=False, color_system=None, width=console_width) + + # Capture the table as a string and log it + with console.capture() as capture: + console.print(obj) + ranked_logger.info(f"\n{capture.get()}") + + +def print_df_as_table(df: pd.DataFrame, title: str) -> None: + """Pretty-print a DataFrame using Rich Table""" + safe_print(table_from_df(df=df, title=title)) diff --git a/src/modelhub/utils/loss.py b/src/modelhub/utils/loss.py new file mode 100755 index 0000000..b4ecdb7 --- /dev/null +++ b/src/modelhub/utils/loss.py @@ -0,0 +1,72 @@ +import torch + + +def convert_batched_losses_to_list_of_dicts(loss_dict: dict[str, torch.Tensor]): + """Converts a dictionary of batched and non-batched loss tensors into a list of dictionaries. + + Args: + loss_dict (dict): A dictionary where keys are loss names and values are PyTorch tensors. + Some values may be batched (1D tensors), while others are not (0D tensors). + + Returns: + list: A list of dictionaries, each representing a batch or non-batched losses. + + Example: + >>> outputs = { + ... "loss_dict": { + ... "diffusion_loss": torch.tensor([0.0509, 0.0062]), + ... "smoothed_lddt_loss": torch.tensor([0.2507, 0.2797]), + ... "t": torch.tensor([1.7329, 9.3498]), + ... "distogram_loss": torch.tensor(1.7663), + ... "total_loss": torch.tensor(1.2281), + ... } + ... } + >>> convert_batched_losses_to_list_of_dicts(outputs["loss_dict"]) + [{'batch_idx': 0, 'diffusion_loss': 0.0509, 'smoothed_lddt_loss': 0.2507, 't': 1.7329}, + {'batch_idx': 1, 'diffusion_loss': 0.0062, 'smoothed_lddt_loss': 0.2797, 't': 9.3498}, + {'distogram_loss': 1.7663, 'total_loss': 1.2281}] + """ + result = [] + batch_size = next((v.size(0) for v in loss_dict.values() if v.dim() == 1), 1) + + # Create a dictionary for each batch index + for batch_idx in range(batch_size): + batch_dict = {"batch_idx": batch_idx} + + for key, value in loss_dict.items(): + if value.dim() == 1: # Check if the tensor is batched + batch_dict[key] = value[batch_idx].item() + + result.append(batch_dict) + + # Create a dictionary for non-batched losses + non_batched_dict = {} + for key, value in loss_dict.items(): + if value.dim() == 0: # Check if the tensor is not batched + non_batched_dict[key] = value.item() + + result.append(non_batched_dict) + + return result + + +def mean_losses(loss_dict_batched: dict[str, torch.Tensor]) -> dict: + """Compute the mean of each tensor in a dictionary of batched losses. + + Args: + loss_dict_batched (Dict[str, torch.Tensor]): A dictionary where each key maps to a tensor of losses. + + Returns: + dict: A dictionary with the mean loss for each key (as a tensor). + + Example: + >>> loss_dict_batched = {"loss1": torch.tensor([0.5, 0.7]), "loss2": torch.tensor([1.0])} + >>> mean_losses(loss_dict_batched) + {'loss1': 0.6, 'loss2': 1.0} + """ + loss_dict = {} + for key, batched_loss in loss_dict_batched.items(): + # Compute the mean of the tensor and store it in the dictionary + loss_dict[key] = batched_loss.mean().item() + + return loss_dict diff --git a/src/modelhub/utils/predicted_error.py b/src/modelhub/utils/predicted_error.py new file mode 100644 index 0000000..57d580d --- /dev/null +++ b/src/modelhub/utils/predicted_error.py @@ -0,0 +1,440 @@ +from itertools import combinations, chain +from beartype.typing import Any +from typing import List + +import numpy as np +import pandas as pd +import torch +import tree + +from modelhub.chemical import ChemicalData as ChemData +from modelhub.metrics.metric_utils import ( + compute_mean_over_subsampled_pairs, + create_chainwise_masks_1d, + create_chainwise_masks_2d, + create_interface_masks_2d, + spread_batch_into_dictionary, + unbin_logits, +) +from omegaconf import DictConfig +from biotite.structure import AtomArray, AtomArrayStack + + +def compile_af3_confidence_outputs( + plddt_logits: torch.Tensor, + pae_logits: torch.Tensor, + pde_logits: torch.Tensor, + chain_iid_token_lvl: torch.Tensor, + is_real_atom: torch.Tensor, + example_id: str, + confidence_loss_cfg: DictConfig | dict, +) -> dict[str, Any]: + # TODO: Refactor to accept an AtomArray + + """Given the confidence logits, computes the confidence metrics for the model's predictions. + + Returns: + dict[str, Any]: A dictionary containing the following: + - confidence_df: A DataFrame containing the aggregate confidence metrics at the chain- and interface-level + - plddt: The pLDDT logits + - pae: The pAE logits + - pde: The pDE logits + """ + + # Reorder the input tensors to be in (B, n_bins, ...) format for unbinning + plddt = unbin_logits( + plddt_logits.reshape( + -1, + plddt_logits.shape[1], + ChemData().NHEAVY, + confidence_loss_cfg.plddt.n_bins, + ) + .permute(0, 3, 1, 2) + .float(), + confidence_loss_cfg.plddt.max_value, + confidence_loss_cfg.plddt.n_bins, + ) + + # Unbin the pae and pde logits + pae = unbin_logits( + pae_logits.permute(0, 3, 1, 2).float(), + confidence_loss_cfg.pae.max_value, + confidence_loss_cfg.pae.n_bins, + ) + pde = unbin_logits( + pde_logits.permute(0, 3, 1, 2).float(), + confidence_loss_cfg.pde.max_value, + confidence_loss_cfg.pde.n_bins, + ) + + # Calculate interface metrics + interface_masks = create_interface_masks_2d(chain_iid_token_lvl, device=pae.device) + pae_interface = { + k: spread_batch_into_dictionary(compute_mean_over_subsampled_pairs(pae, v)) + for k, v in interface_masks.items() + } + pde_interface = { + k: spread_batch_into_dictionary(compute_mean_over_subsampled_pairs(pde, v)) + for k, v in interface_masks.items() + } + + # Calculate chainwise metrics + chain_masks_2d = create_chainwise_masks_2d(chain_iid_token_lvl, device=pae.device) + pae_chainwise = { + k: spread_batch_into_dictionary(compute_mean_over_subsampled_pairs(pae, v)) + for k, v in chain_masks_2d.items() + } + pde_chainwise = { + k: spread_batch_into_dictionary(compute_mean_over_subsampled_pairs(pde, v)) + for k, v in chain_masks_2d.items() + } + + chain_masks_1d = create_chainwise_masks_1d( + chain_iid_token_lvl, device=is_real_atom.device + ) + plddt_chainwise = { + k: spread_batch_into_dictionary( + compute_mean_over_subsampled_pairs( + plddt, is_real_atom[..., : ChemData().NHEAVY] * v[:, None] + ) + ) + for k, v in chain_masks_1d.items() + } + + # Aggregate confidence data + confidence_data = { + "example_id": example_id, + "mean_plddt": spread_batch_into_dictionary(plddt.mean(dim=(-1, -2))), + "mean_pae": spread_batch_into_dictionary(pae.mean(dim=(-1, -2))), + "mean_pde": spread_batch_into_dictionary(pde.mean(dim=(-1, -2))), + "chain_wise_mean_plddt": plddt_chainwise, + "chain_wise_mean_pae": pae_chainwise, + "chain_wise_mean_pde": pde_chainwise, + "interface_wise_mean_pae": pae_interface, + "interface_wise_mean_pde": pde_interface, + } + + # Generate DataFrame rows + num_batches = plddt.shape[0] + chains = np.unique(chain_iid_token_lvl) + chain_pairs = list(combinations(chains, 2)) + + # For every batch, chain, and interface (chain pair), generate a dataframe row + chain_rows = [ + { + "example_id": example_id, + "chain_chainwise": chain, + "chainwise_plddt": confidence_data["chain_wise_mean_plddt"][chain][ + batch_idx + ], + "chainwise_pde": confidence_data["chain_wise_mean_pde"][chain][batch_idx], + "chainwise_pae": confidence_data["chain_wise_mean_pae"][chain][batch_idx], + "overall_plddt": confidence_data["mean_plddt"][batch_idx], + "overall_pde": confidence_data["mean_pde"][batch_idx], + "overall_pae": confidence_data["mean_pae"][batch_idx], + "batch_idx": batch_idx, + } + for batch_idx in range(num_batches) + for chain in chains + ] + + interface_rows = [ + { + "example_id": example_id, + "chain_i_interface": chain_i, + "chain_j_interface": chain_j, + "pae_interface": confidence_data["interface_wise_mean_pae"][ + (chain_i, chain_j) + ][batch_idx], + "pde_interface": confidence_data["interface_wise_mean_pde"][ + (chain_i, chain_j) + ][batch_idx], + "overall_plddt": confidence_data["mean_plddt"][batch_idx], + "overall_pde": confidence_data["mean_pde"][batch_idx], + "overall_pae": confidence_data["mean_pae"][batch_idx], + "batch_idx": batch_idx, + } + for batch_idx in range(num_batches) + for (chain_i, chain_j) in chain_pairs + ] + + return { + "confidence_df": pd.DataFrame(chain([*chain_rows, *interface_rows])), + "plddt": plddt, + "pae": pae, + "pde": pde, + } + + +def compute_batch_indices_with_lowest_predicted_error( + plddt: torch.Tensor, + is_real_atom: torch.Tensor, + pae: torch.Tensor, + confidence_loss_cfg: dict | DictConfig, + chain_iid_token_lvl: torch.Tensor, + is_ligand: torch.Tensor, + interfaces_to_score: list[tuple], + pn_units_to_score: list[tuple], +) -> dict[str, Any]: + """Given the confidence logits, computes the index within the diffusion batch of the best predicted structure. + + Metrics include pAE, pLDDT, and pDE, among others. + + Returns: + dict[str, Any]: A dictionary containing the following keys: + - pae_idx: The index within the diffusion batch of the structure with the best overall pAE (Predicted Aligned Error) + - pde_idx: The index within the diffusion batch of the structure with the best overall pDE (Predicted Distance Error) + - plddt_idx: The index within the diffusion batch of the structure with the best overall pLDDT (Predicted Local Distance + Difference Test) + - best_chain_to_all_idx: The index within the diffusion batch of the structure with the best pAE subsampled over any + pair (i,j) where i == chain or j == chain + - best_chain_to_self_idx: The index within the diffusion batch of the structure with the best pAE subsampled over any + pair (i,j) where i == chain and j == chain + - best_interface_idx: For each interface between two scored PN Units, the index within the diffusion batch of the + structure with the best mean pAE for all (i,j) where i == interface_chain or j == interface_chain and i != j + - best_lig_ipae_idx: The index within the diffusion batch for the best pAE subsambled over any pair (i,j) + where i == chain or j == chain and i != j and i or j is a ligand + """ + # TODO: Have this function take an `AtomArray` as input so we quickly build masks with much less code + # TODO: Explore how we can write this function more concisely + return_dict = {} + + # AF3's ranking metrics work like this, but using ptm instead of ipae: + scored_chains, interfaces, interface_chains = _select_scored_units( + interfaces_to_score, pn_units_to_score + ) + + chain_to_all_masks = _create_chain_to_all_masks(chain_iid_token_lvl, scored_chains) + chain_to_self_masks = _create_chain_to_self_masks( + chain_iid_token_lvl, scored_chains + ) + interface_masks, lig_chains = _create_interface_masks( + chain_iid_token_lvl, interfaces, is_ligand + ) + + # map everything to gpu + gpu = plddt.device + chain_to_all_masks = tree.map_structure( + lambda x: x.to(gpu) if hasattr(x, "cpu") else x, chain_to_all_masks + ) + chain_to_self_masks = tree.map_structure( + lambda x: x.to(gpu) if hasattr(x, "cpu") else x, chain_to_self_masks + ) + interface_masks = tree.map_structure( + lambda x: x.to(gpu) if hasattr(x, "cpu") else x, interface_masks + ) + + # Reshape logits to B, K, L, NHEAVY + plddt = ( + plddt.reshape( + -1, + plddt.shape[1], + ChemData().NHEAVY, + confidence_loss_cfg.plddt.n_bins, + ) + .permute(0, 3, 1, 2) + .float() + ) + # Reshape the pae and pde logits to B, K, L, L + pae_logits = pae.permute(0, 3, 1, 2).float() + pde_logits = pae.permute(0, 3, 1, 2).float() + + pae_logits_unbinned = unbin_logits( + pae_logits, confidence_loss_cfg.pae.max_value, confidence_loss_cfg.pae.n_bins + ) + plddt_logits_unbinned = unbin_logits( + plddt, confidence_loss_cfg.plddt.max_value, confidence_loss_cfg.plddt.n_bins + ) + pde_logits_unbinned = unbin_logits( + pde_logits, confidence_loss_cfg.pde.max_value, confidence_loss_cfg.pde.n_bins + ) + + complex_pae = pae_logits_unbinned.mean(dim=(1, 2)) + complex_pde = pde_logits_unbinned.mean(dim=(1, 2)) + complex_plddt = ( + plddt_logits_unbinned * is_real_atom[..., : ChemData().NHEAVY] + ).sum(dim=(1, 2)) / is_real_atom[..., : ChemData().NHEAVY].sum() + + return_dict["pae_idx"] = torch.argmin(complex_pae) + return_dict["pde_idx"] = torch.argmin(complex_pde) + return_dict["plddt_idx"] = torch.argmax(complex_plddt) + + chain_to_self_paes = _get_masked_error_per_chain( + scored_chains, chain_to_self_masks, pae_logits_unbinned + ) + chain_to_all_paes = _get_masked_error_per_chain( + scored_chains, chain_to_all_masks, pae_logits_unbinned + ) + interface_chain_paes = _get_masked_error_per_chain( + interface_chains, interface_masks, pae_logits_unbinned + ) + # average over both interfaces + average_interface_paes = _get_average_error_per_interface( + interfaces, lig_chains, interface_chain_paes + ) + + return_dict["best_chain_to_all_idx"] = _get_lowest_error_indices(chain_to_all_paes) + return_dict["best_chain_to_self_idx"] = _get_lowest_error_indices( + chain_to_self_paes + ) + return_dict["best_interface_idx"] = _get_lowest_error_indices( + average_interface_paes + ) + # for ligands, we don't average the error + return_dict["best_lig_ipae_idx"] = _get_lowest_error_ligand_indices( + interface_chain_paes, interfaces, lig_chains + ) + + return return_dict + +def annotate_atom_array_b_factor_with_plddt( + atom_array: AtomArray | AtomArrayStack, + plddt: torch.Tensor, + is_real_atom: torch.Tensor +) -> List[AtomArray]: + """Annotates the b_factor of an AtomArray with the pLDDT values in the occupancy field. + + Args: + atom_array: The AtomArray or AtomArrayStack to annotate + plddt: The pLDDT tensor of shape (B, I, NHEAVY) + is_real_atom: A mask indicating which atoms are in the structure of shape (I, NHEAVY) + + Returns: + list[AtomArray]: The annotated list of AtomArrays. We must return a list of AtomArrays + because the AtomArray class does not support setting different values as annotations + other than the coordinate feature. + """ + atom_wise_plddt = plddt[:, is_real_atom[..., : ChemData().NHEAVY]] + assert atom_wise_plddt.shape[1] == atom_array.array_length() + atom_array_list = [] + # bitotite's AtomArray does not support setting different values as annotations other than + # the coordinate feature, so we convert atom_array to a list of AtomArrays + if isinstance(atom_array, AtomArrayStack): + for i, aa in enumerate(atom_array): + aa.set_annotation("b_factor", atom_wise_plddt[i].cpu().numpy()) + atom_array_list.append(aa) + else: + assert atom_wise_plddt.shape[0] == 1 + atom_array.set_annotation("b_factor", atom_wise_plddt[0].cpu().numpy()) + atom_array_list.append(atom_array) + + for aa in atom_array_list: + assert np.isnan(aa.b_factor).sum() == 0 + + return atom_array_list + +def _select_scored_units( + interfaces_to_score: list[tuple], pn_units_to_score: list[tuple] +): + scored_chains = [] + interfaces = [] + interface_chains = [] + for k in interfaces_to_score: + interfaces.append(f"{k[0]}-{k[1]}") + interface_chains.append(k[0]) + interface_chains.append(k[1]) + for k in pn_units_to_score: + scored_chains.append(k[0]) + + return scored_chains, interfaces, interface_chains + + +def _create_chain_to_all_masks(ch_label, chains_to_score): + unique_chains = np.unique(ch_label) + I = len(ch_label) + chain_to_all_masks = {} + for chain in unique_chains: + if chain in chains_to_score: + indices = torch.from_numpy((ch_label == chain)) + mask = indices.unsqueeze(0) | indices.unsqueeze(1) + # set the diagonal to false + mask = mask & ~torch.eye(I, device=mask.device, dtype=torch.bool) + chain_to_all_masks[chain] = mask + return chain_to_all_masks + + +def _create_chain_to_self_masks(ch_label, chains_to_score): + unique_chains = np.unique(ch_label) + I = len(ch_label) + chain_to_self_masks = {} + for chain in unique_chains: + if chain in chains_to_score: + indices = torch.from_numpy((ch_label == chain)) + mask = indices.unsqueeze(0) & indices.unsqueeze(1) + # set the diagonal to false + mask = mask & ~torch.eye(I, device=mask.device, dtype=torch.bool) + chain_to_self_masks[chain] = mask + return chain_to_self_masks + + +def _create_interface_masks(ch_label, interfaces, is_ligand): + interface_masks = {} + interface_chains = [] + ligand_chains = [] + for interface in interfaces: + interface_chains.append(interface.split("-")[0]) + interface_chains.append(interface.split("-")[1]) + interface_chains = set(interface_chains) + for chain in interface_chains: + chain_indices = torch.from_numpy((ch_label == chain)) + + to_self = chain_indices.unsqueeze(0) & chain_indices.unsqueeze(1) + to_all = chain_indices.unsqueeze(0) | chain_indices.unsqueeze(1) + interface_mask = to_all & ~to_self + interface_masks[chain] = interface_mask + + if torch.all(is_ligand[chain_indices]): + ligand_chains.append(chain) + + return interface_masks, ligand_chains + + +def _get_masked_error_per_chain(chains, masks, unbinned_logits): + error = {} + for chain in chains: + mask = masks[chain] + chain_error = compute_mean_over_subsampled_pairs(unbinned_logits, mask) + error[chain] = chain_error + + return error + + +def _get_average_error_per_interface(interfaces, lig_chains, interface_errors): + average_error = {} + for interface in interfaces: + chain_a = interface.split("-")[0] + chain_b = interface.split("-")[1] + average_error[interface] = ( + interface_errors[chain_a] + interface_errors[chain_b] + ) / 2 + + return average_error + + +def _get_lowest_error_indices(errors): + lowest_error_indices = {} + for k, v in errors.items(): + lowest_error_indices[k] = torch.argmin(v) + + return lowest_error_indices + + +def _get_lowest_error_ligand_indices(errors, interfaces, lig_chains): + # ligands are a special case in AF3, where they only consider the ligand chain's error and not the average for the interface + lowest_error_indices = {} + for interface in interfaces: + chain_a = interface.split("-")[0] + chain_b = interface.split("-")[1] + if chain_a in lig_chains or chain_b in lig_chains: + if chain_a in lig_chains: + lig_chain = chain_a + elif chain_b in lig_chains: + lig_chain = chain_b + + lowest_error_indices[interface] = torch.argmin(errors[lig_chain]) + else: + # assign a random value to avoid key errors downstream; sorting ligand interfaces + # from other types is handles in analysis + lowest_error_indices[interface] = 0 + + return lowest_error_indices diff --git a/src/modelhub/utils/recycling.py b/src/modelhub/utils/recycling.py new file mode 100755 index 0000000..cd3fa1e --- /dev/null +++ b/src/modelhub/utils/recycling.py @@ -0,0 +1,39 @@ +import torch +import math +from datahub.utils.rng import create_rng_state_from_seeds, rng_state + + +def get_recycle_schedule( + max_cycle: int, + n_epochs: int, + n_train: int, + world_size: int, + seed: int = 42, +) -> torch.Tensor: + """Generate a schedule for recycling iterations over multiple epochs. + + Used to ensure that each GPU has the same number of recycles within a given batch. + + Args: + max_cycle (int): Maximum number of recycling iterations (n_recycle). + n_epochs (int): Number of training epochs. + n_train (int): The total number of training examples per epoch (across all GPUs). + world_size (int): The number of distributed training processes. + seed (int, optional): The seed for random number generation. Defaults to 42. + + Returns: + torch.Tensor: A tensor containing the recycling schedule for each epoch, + with dimensions `(n_epochs, n_train // world_size)`. + + References: + AF-2 Supplement, Algorithm 31 + """ + # We use a context manager to avoid modifying the global RNG state + with rng_state(create_rng_state_from_seeds(torch_seed=seed)): + # ...generate a recycling schedule for each epoch + recycle_schedule = [] + for i in range(n_epochs): + schedule = torch.randint(1, max_cycle + 1, (math.ceil(n_train / world_size),)) + recycle_schedule.append(schedule) + + return torch.stack(recycle_schedule, dim=0) diff --git a/src/modelhub/utils/torch_utils.py b/src/modelhub/utils/torch_utils.py new file mode 100755 index 0000000..98b3cbd --- /dev/null +++ b/src/modelhub/utils/torch_utils.py @@ -0,0 +1,208 @@ +"""General convenience utilities for PyTorch.""" + +__all__ = ["map_to", "assert_no_nans", "assert_shape", "assert_same_shape"] + +import warnings +from contextlib import contextmanager +from beartype.typing import Any + +import numpy as np +import torch +from beartype.typing import Sequence +from toolz import valmap +from torch import Tensor +from torch._prims_common import DeviceLikeType +from torch.types import _dtype + +from modelhub import should_check_nans +from modelhub.common import at_least_one_exists, do_nothing + + +def map_to( + x: Any, + *, + device: DeviceLikeType | None = None, + dtype: _dtype | None = None, + non_blocking: bool = False, + **to_kwargs, +) -> Any: + """ + Recursively applies the `.to()` method to all tensors in a nested structure. + + This function handles nested structures such as dictionaries and lists, applying the `.to()` method + to any PyTorch tensors while leaving other types unchanged. + + NOTE: If you are instantiating a new tensor, you should use the `device` and `dtype` arguments + instead of calling `map_to()` on the tensor. + (https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html#create-tensors-directly-on-the-target-device) + + + Args: + - x (Any): The input structure, which can be a tensor, dictionary, list, or any other type. + - device (DeviceLikeType | None): The target device to move tensors to (e.g., 'cpu', 'cuda'). + - dtype (_dtype | None): The target dtype to cast tensors to. + - non_blocking (bool): Whether to use non-blocking transfers when possible. + - **to_kwargs: Additional keyword arguments to pass to the `.to()` method. + + Returns: + - Any: The input structure with all contained tensors processed by the `.to()` method. + + Example: + >>> data = {"tensor": torch.tensor([1, 2, 3]), "list": [torch.tensor([4, 5]), "string"]} + >>> map_to(data, device="cuda", dtype=torch.float32) + {'tensor': tensor([1., 2., 3.], device='cuda:0', dtype=torch.float32), + 'list': [tensor([4., 5.], device='cuda:0', dtype=torch.float32), 'string']} + """ + torch._assert( + at_least_one_exists(device, dtype), + "Must provide at least one of `device` or `dtype`", + ) + + if isinstance(x, dict): + return valmap( + lambda v: map_to( + v, device=device, dtype=dtype, non_blocking=non_blocking, **to_kwargs + ), + x, + ) + elif isinstance(x, (list, tuple)): + return type(x)( + map( + lambda v: map_to( + v, + device=device, + dtype=dtype, + non_blocking=non_blocking, + **to_kwargs, + ), + x, + ) + ) + elif isinstance(x, Tensor): + return x.to(device=device, dtype=dtype, non_blocking=non_blocking, **to_kwargs) + else: + return x + + +def _assert_no_nans(x: Any, *, msg: str = "", fail_if_not_tensor: bool = False) -> None: + """Recursively checks for NaN values in tensor-like objects. + + Args: + - x (Any): Input to check for NaNs. Can be a tensor, dict, list, tuple, or other type. + - msg (str): Prefix for error messages. + - fail_if_not_tensor (bool): If True, raises error for non-tensor types. + """ + if isinstance(x, Tensor): + torch._assert( + not torch.isnan(x).any(), + ": ".join(filter(bool, [msg, "Tensor contains NaNs!"])), + ) + elif isinstance(x, np.ndarray): + torch._assert( + not np.isnan(x).any(), + ": ".join(filter(bool, [msg, "Numpy array contains NaNs!"])), + ) + elif isinstance(x, float): + torch._assert( + not np.isnan(x), + ": ".join(filter(bool, [msg, "float is NaN!"])), + ) + elif isinstance(x, dict): + for k, v in x.items(): + _assert_no_nans( + v, + msg=".".join(filter(bool, [msg, k])), + fail_if_not_tensor=fail_if_not_tensor, + ) + elif isinstance(x, (list, tuple)): + for idx, v in enumerate(x): + _assert_no_nans( + v, + msg=".".join(filter(bool, [msg, str(idx)])), + fail_if_not_tensor=fail_if_not_tensor, + ) + elif fail_if_not_tensor: + raise ValueError(f"Unsupported type: {type(x)}") + + +assert_no_nans = _assert_no_nans if should_check_nans else do_nothing + + +@contextmanager +def _suppress_tracer_warnings(): + """ + Context manager to temporarily suppress known warnings in torch.jit.trace(). + Note: Cannot use catch_warnings because of https://bugs.python.org/issue29672 + + References: + - https://github.com/NVlabs/edm2/blob/main/torch_utils/misc.py + """ + tracer_warning_filter = ("ignore", None, torch.jit.TracerWarning, None, 0) + warnings.filters.insert(0, tracer_warning_filter) + yield + warnings.filters.remove(tracer_warning_filter) + + +def assert_shape(tensor: Tensor, ref_shape: Sequence[int | None]): + """ + Assert that the shape of a tensor matches the given list of integers. + None indicates that the size of a dimension is allowed to vary. + Performs symbolic assertion when used in torch.jit.trace(). + + Args: + - tensor (Tensor): The tensor to check the shape of. + - ref_shape (Sequence[int | None]): The expected shape of the tensor. + + References: + - https://github.com/NVlabs/edm2/blob/main/torch_utils/misc.py + """ + + if tensor.ndim != len(ref_shape): + raise AssertionError( + f"Wrong number of dimensions: got {tensor.ndim}, expected {len(ref_shape)}" + ) + + for idx, (size, ref_size) in enumerate(zip(tensor.shape, ref_shape)): + if tensor.ndim != len(ref_shape): + raise AssertionError( + f"Wrong number of dimensions: got {tensor.ndim}, expected {len(ref_shape)}" + ) + + for idx, (size, ref_size) in enumerate(zip(tensor.shape, ref_shape)): + if ref_size is None: + pass + elif isinstance(ref_size, torch.Tensor): + with ( + _suppress_tracer_warnings() + ): # as_tensor results are registered as constants + torch._assert( + torch.equal(torch.as_tensor(size), ref_size), + f"Wrong size for dimension {idx}", + ) + elif isinstance(size, torch.Tensor): + with ( + _suppress_tracer_warnings() + ): # as_tensor results are registered as constants + torch._assert( + torch.equal(size, torch.as_tensor(ref_size)), + f"Wrong size for dimension {idx}: expected {ref_size}", + ) + elif size != ref_size: + raise AssertionError( + f"Wrong size for dimension {idx}: got {size}, expected {ref_size}" + ) + + +def assert_same_shape(tensor: Tensor, ref_tensor: Tensor) -> None: + """Assert that two tensors have the same shape.""" + assert_shape(tensor, ref_tensor.shape) + + +def device_of(obj: Any) -> torch.device: + """Get the device of a PyTorch object, e.g. a `nn.Module` or a `Tensor`.""" + if hasattr(obj, "device"): + return obj.device + elif hasattr(obj, "parameters"): + return next(obj.parameters()).device + else: + raise ValueError(f"Unsupported type: {type(obj)}") diff --git a/src/modelhub/validate.py b/src/modelhub/validate.py new file mode 100755 index 0000000..4096382 --- /dev/null +++ b/src/modelhub/validate.py @@ -0,0 +1,127 @@ +#!/usr/bin/env -S /bin/sh -c '"$(dirname "$0")/../../scripts/shebang/modelhub_exec.sh" "$0" "$@"' + +import logging + +import hydra +import rootutils +from omegaconf import DictConfig +import os + +# Setup root dir and environment variables (more info: https://github.com/ashleve/rootutils) +# NOTE: Sets the `PROJECT_ROOT` environment variable to the root directory of the project (where `.project-root` is located) +rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) + +# If the user has set `PROJECT_PATH`, use it to build the config path; otherwise, fall back to `PROJECT_ROOT` +_config_path = os.path.join(os.environ.get("PROJECT_PATH", os.environ["PROJECT_ROOT"]), "configs") + +_spawning_process_logger = logging.getLogger(__name__) + + +@hydra.main(config_path=_config_path, config_name="validate", version_base="1.3") +def validate(cfg: DictConfig) -> None: + # ============================================================================== + # Import dependencies and resolve Hydra configuration + # ============================================================================== + + _spawning_process_logger.info("Importing dependencies...") + + # Lazy imports to make config generation fast + import torch + from lightning.fabric import seed_everything + from lightning.fabric.loggers import Logger + + # If training on DIGS L40, set precision of matrix multiplication to balance speed and accuracy + # Reference: https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision + torch.set_float32_matmul_precision("medium") + + from modelhub.callbacks.base import BaseCallback # noqa + from modelhub.utils.instantiators import instantiate_loggers, instantiate_callbacks # noqa + from modelhub.utils.logging import print_config_tree # noqa + from modelhub.utils.ddp import RankedLogger, set_accelerator_based_on_availability # noqa + from modelhub.utils.ddp import is_rank_zero # noqa + from modelhub.utils.datasets import assemble_val_loader_dict # noqa + + set_accelerator_based_on_availability(cfg) + + ranked_logger = RankedLogger(__name__, rank_zero_only=True) + _spawning_process_logger.info("Completed dependency imports ...") + + # ... print the configuration tree (NOTE: Only prints for rank 0) + print_config_tree(cfg, resolve=True) + + # ============================================================================== + # Logging and Callback instantiation + # ============================================================================== + + # Reduce the logging level for all dataset and sampler loggers (unless rank 0) + # We will still see messages from Rank 0; they are identical, since all ranks load and sample from the same datasets + if not is_rank_zero(): + dataset_logger = logging.getLogger("datasets") + sampler_logger = logging.getLogger("datahub.samplers") + dataset_logger.setLevel(logging.WARNING) + sampler_logger.setLevel(logging.ERROR) + + # ... seed everything (NOTE: By setting `workers=True`, we ensure that the dataloaders are seeded as well) + # (`PL_GLOBAL_SEED` environment varaible will be passed to the spawned subprocessed; e.g., through `ddp_spawn` backend) + if cfg.get("seed"): + ranked_logger.info(f"Seeding everything with seed={cfg.seed}...") + seed_everything(cfg.seed, workers=True, verbose=True) + else: + ranked_logger.warning("No seed provided - Not seeding anything!") + + ranked_logger.info("Instantiating loggers...") + loggers: list[Logger] = instantiate_loggers(cfg.get("logger")) + + ranked_logger.info("Instantiating callbacks...") + callbacks: list[BaseCallback] = instantiate_callbacks(cfg.get("callbacks")) + + # ============================================================================== + # Trainer and model instantiation + # ============================================================================== + + # ... instantiate the trainer + trainer = hydra.utils.instantiate( + cfg.trainer, + loggers=loggers or None, + callbacks=callbacks or None, + _convert_="partial", + _recursive_=False, + ) + # (Store the Hydra configuration in the trainer state) + trainer.initialize_or_update_trainer_state({"train_cfg": cfg}) + + # ... spawn processes for distributed training + # (We spawn here, rather than within `fit`, so we can use Fabric's `init_module` to efficiently initialize the model on the appropriate device) + ranked_logger.info( + f"Spawning {trainer.fabric.world_size} processes from {trainer.fabric.global_rank}..." + ) + trainer.fabric.launch() + + # ... construct the model + trainer.construct_model() + + # ============================================================================== + # Dataset instantiation + # ============================================================================== + + # Compose the validation loader(s) + val_loaders = assemble_val_loader_dict( + cfg=cfg.datasets.val, + rank=trainer.fabric.global_rank, + world_size=trainer.fabric.world_size, + loader_cfg=cfg.dataloader["val"], + ) + + # ... validate the model + ranked_logger.info("Validating model...") + assert cfg.ckpt_path is not None, "No checkpoint path provided for validation!" + trainer.validate( + val_loaders=val_loaders, + ckpt_path=cfg.ckpt_path, + ) + + ranked_logger.info("Validation complete!") + + +if __name__ == "__main__": + validate() diff --git a/rf2aa/archived/loaders/__init__.py b/tests/.gitkeep similarity index 100% rename from rf2aa/archived/loaders/__init__.py rename to tests/.gitkeep diff --git a/tests/data/example_with_ncaa.json b/tests/data/example_with_ncaa.json new file mode 100644 index 0000000..8e9b7d0 --- /dev/null +++ b/tests/data/example_with_ncaa.json @@ -0,0 +1,14 @@ +[ + { + "name": "example_with_ncaa", + "components": [ + { + "seq": "SPLEEREEELDRLYEKYKEEFKDLSPEELRAEVVAAIARVAEAFSDVDVWIVVSRYVTPERAAVARELAREAGYPEVLLFHSSRVEEAKAKKKELEAKGKKVKVSVDSSVDLPEADFVIPDKYAKI(PBF)WDGAAAGGTTLDFAALANALAEVLSRLAKEE", + "chain_id": "A" + }, + { + "smiles": "O=C1Nc2ccccc2[C@]34OCC[C@H]3C[C@H]14" + } + ] + } +] \ No newline at end of file diff --git a/tests/data/multiple_examples_from_json.json b/tests/data/multiple_examples_from_json.json new file mode 100644 index 0000000..ec6db7f --- /dev/null +++ b/tests/data/multiple_examples_from_json.json @@ -0,0 +1,26 @@ +[ + { + "name": "multiple_examples_from_json(1)", + "components": [ + { + "seq": "MNAKEIVVHALRLLENGDARGWCDLFHPEGVLEYPYPPPGYKTRFEGRETIWAHMRLFPEYMTIRFTDVQFYETADPDLAIGEFHGDGVHTVSGGKLAADYISVLRTRDGQILLYRLFFNPLRVLEPLGLEHHHHHH", + "chain_id": "A" + }, + { + "smiles": "O=C1OCC(=C1)C5C4(C(O)CC3C(CCC2CC(O)CCC23C)C4(O)CC5)C" + } + ] + }, + { + "name": "multiple_examples_from_json(2)", + "components": [ + { + "seq": "GSGVSLGQALLILSVAALLGTTVEEAVKRALWLKTKLGVSLEQAARTLSVAAYLGTTVEEAVKRALKLKTKLGVSLEQALLILFAAAALGTTVEEAVKRALKLKTKLGVSLEQALLILWTAVELGTTVEEAVKRALKLKTKLGVSLGQAQAILVVAAELGTTVEEAVYRALKLKTKLGVSLGQALLILEVAAKLGTTVEEAVKRALKLTTKLG", + "chain_id": "A" + }, + { + "ccd_code": "MG" + } + ] + } +] \ No newline at end of file diff --git a/tests/data/nested_examples/example_from_json.json b/tests/data/nested_examples/example_from_json.json new file mode 100644 index 0000000..8e491c2 --- /dev/null +++ b/tests/data/nested_examples/example_from_json.json @@ -0,0 +1,12 @@ +{ + "name": "example_from_smiles(json)", + "components": [ + { + "seq": "MNAKEIVVHALRLLENGDARGWCDLFHPEGVLEYPYPPPGYKTRFEGRETIWAHMRLFPEYMTIRFTDVQFYETADPDLAIGEFHGDGVHTVSGGKLAADYISVLRTRDGQILLYRLFFNPLRVLEPLGLEHHHHHH", + "chain_id": "A" + }, + { + "smiles": "O=C1OCC(=C1)C5C4(C(O)CC3C(CCC2CC(O)CCC23C)C4(O)CC5)C" + } + ] +} \ No newline at end of file diff --git a/tests/data/nested_examples/example_from_pdb_with_inter_chain_bonds.and.dots.pdb b/tests/data/nested_examples/example_from_pdb_with_inter_chain_bonds.and.dots.pdb new file mode 100644 index 0000000..561c469 --- /dev/null +++ b/tests/data/nested_examples/example_from_pdb_with_inter_chain_bonds.and.dots.pdb @@ -0,0 +1,1946 @@ +ATOM 1 N THR A 1 24.575 -10.198 -0.732 1.00 0.00 +ATOM 2 CA THR A 1 25.088 -8.836 -0.641 1.00 0.00 +ATOM 3 C THR A 1 24.500 -7.952 -1.734 1.00 0.00 +ATOM 4 O THR A 1 24.565 -8.338 -2.908 1.00 0.00 +ATOM 5 N ARG A 2 23.969 -6.868 -1.434 1.00 0.00 +ATOM 6 CA ARG A 2 23.470 -5.886 -2.389 1.00 0.00 +ATOM 7 C ARG A 2 24.354 -4.646 -2.417 1.00 0.00 +ATOM 8 O ARG A 2 24.869 -4.255 -1.361 1.00 0.00 +ATOM 9 N THR A 3 24.543 -4.047 -3.503 1.00 0.00 +ATOM 10 CA THR A 3 25.403 -2.879 -3.648 1.00 0.00 +ATOM 11 C THR A 3 24.757 -1.826 -4.540 1.00 0.00 +ATOM 12 O THR A 3 24.247 -2.158 -5.617 1.00 0.00 +ATOM 13 N VAL A 4 24.770 -0.613 -4.123 1.00 0.00 +ATOM 14 CA VAL A 4 24.420 0.549 -4.931 1.00 0.00 +ATOM 15 C VAL A 4 25.582 1.530 -5.018 1.00 0.00 +ATOM 16 O VAL A 4 26.105 1.930 -3.970 1.00 0.00 +ATOM 17 N THR A 5 25.984 1.913 -6.140 1.00 0.00 +ATOM 18 CA THR A 5 27.137 2.786 -6.322 1.00 0.00 +ATOM 19 C THR A 5 26.826 3.913 -7.298 1.00 0.00 +ATOM 20 O THR A 5 26.146 3.660 -8.301 1.00 0.00 +ATOM 21 N ILE A 6 27.241 5.021 -7.085 1.00 0.00 +ATOM 22 CA ILE A 6 27.278 6.161 -7.993 1.00 0.00 +ATOM 23 C ILE A 6 28.712 6.553 -8.326 1.00 0.00 +ATOM 24 O ILE A 6 29.422 7.152 -7.509 1.00 0.00 +ATOM 25 N PRO A 7 29.126 6.187 -9.586 1.00 0.00 +ATOM 26 CA PRO A 7 30.484 6.526 -9.994 1.00 0.00 +ATOM 27 C PRO A 7 30.659 8.033 -10.126 1.00 0.00 +ATOM 28 O PRO A 7 29.685 8.728 -10.441 1.00 0.00 +ATOM 29 N ALA A 8 31.802 8.539 -9.909 1.00 0.00 +ATOM 30 CA ALA A 8 32.069 9.964 -10.063 1.00 0.00 +ATOM 31 C ALA A 8 31.708 10.444 -11.463 1.00 0.00 +ATOM 32 O ALA A 8 32.325 10.003 -12.441 1.00 0.00 +ATOM 33 N SER A 9 30.750 11.313 -11.574 1.00 0.00 +ATOM 34 CA SER A 9 30.335 11.901 -12.842 1.00 0.00 +ATOM 35 C SER A 9 30.334 13.423 -12.769 1.00 0.00 +ATOM 36 O SER A 9 29.485 13.974 -12.058 1.00 0.00 +ATOM 37 N GLY A 10 31.174 14.096 -13.415 1.00 0.00 +ATOM 38 CA GLY A 10 31.293 15.549 -13.397 1.00 0.00 +ATOM 39 C GLY A 10 31.552 16.063 -11.987 1.00 0.00 +ATOM 40 O GLY A 10 32.638 15.809 -11.450 1.00 0.00 +ATOM 41 N SER A 11 30.653 16.742 -11.387 1.00 0.00 +ATOM 42 CA SER A 11 30.847 17.324 -10.065 1.00 0.00 +ATOM 43 C SER A 11 30.263 16.430 -8.978 1.00 0.00 +ATOM 44 O SER A 11 30.371 16.739 -7.785 1.00 0.00 +ATOM 45 N LEU A 12 29.638 15.307 -9.402 1.00 0.00 +ATOM 46 CA LEU A 12 29.044 14.363 -8.463 1.00 0.00 +ATOM 47 C LEU A 12 30.091 13.404 -7.910 1.00 0.00 +ATOM 48 O LEU A 12 30.763 12.711 -8.685 1.00 0.00 +ATOM 49 N PRO A 13 30.237 13.354 -6.593 1.00 0.00 +ATOM 50 CA PRO A 13 31.262 12.533 -5.961 1.00 0.00 +ATOM 51 C PRO A 13 30.925 11.051 -6.069 1.00 0.00 +ATOM 52 O PRO A 13 29.764 10.694 -6.304 1.00 0.00 +ATOM 53 N ALA A 14 31.922 10.196 -5.899 1.00 0.00 +ATOM 54 CA ALA A 14 31.687 8.757 -5.884 1.00 0.00 +ATOM 55 C ALA A 14 31.102 8.308 -4.551 1.00 0.00 +ATOM 56 O ALA A 14 31.510 8.808 -3.495 1.00 0.00 +ATOM 57 N VAL A 15 30.180 7.400 -4.589 1.00 0.00 +ATOM 58 CA VAL A 15 29.538 6.849 -3.402 1.00 0.00 +ATOM 59 C VAL A 15 29.215 5.372 -3.586 1.00 0.00 +ATOM 60 O VAL A 15 28.843 4.960 -4.692 1.00 0.00 +ATOM 61 N SER A 16 29.344 4.599 -2.573 1.00 0.00 +ATOM 62 CA SER A 16 29.037 3.174 -2.624 1.00 0.00 +ATOM 63 C SER A 16 28.371 2.707 -1.336 1.00 0.00 +ATOM 64 O SER A 16 28.837 3.059 -0.245 1.00 0.00 +ATOM 65 N LEU A 17 27.345 1.961 -1.430 1.00 0.00 +ATOM 66 CA LEU A 17 26.683 1.310 -0.306 1.00 0.00 +ATOM 67 C LEU A 17 26.639 -0.201 -0.492 1.00 0.00 +ATOM 68 O LEU A 17 26.039 -0.674 -1.466 1.00 0.00 +ATOM 69 N THR A 18 27.218 -0.930 0.353 1.00 0.00 +ATOM 70 CA THR A 18 27.156 -2.385 0.416 1.00 0.00 +ATOM 71 C THR A 18 26.369 -2.851 1.635 1.00 0.00 +ATOM 72 O THR A 18 26.676 -2.466 2.770 1.00 0.00 +ATOM 73 N LYS A 19 25.350 -3.684 1.386 1.00 0.00 +ATOM 74 CA LYS A 19 24.512 -4.176 2.473 1.00 0.00 +ATOM 75 C LYS A 19 24.489 -5.699 2.505 1.00 0.00 +ATOM 76 O LYS A 19 24.354 -6.348 1.460 1.00 0.00 +ATOM 77 N THR A 20 24.617 -6.249 3.680 1.00 0.00 +ATOM 78 CA THR A 20 24.529 -7.691 3.879 1.00 0.00 +ATOM 79 C THR A 20 23.546 -8.035 4.991 1.00 0.00 +ATOM 80 O THR A 20 23.626 -7.462 6.085 1.00 0.00 +ATOM 81 N LEU A 21 22.651 -8.930 4.735 1.00 0.00 +ATOM 82 CA LEU A 21 21.701 -9.411 5.731 1.00 0.00 +ATOM 83 C LEU A 21 22.101 -10.784 6.257 1.00 0.00 +ATOM 84 O LEU A 21 22.366 -11.704 5.473 1.00 0.00 +ATOM 85 N SER A 22 22.143 -10.913 7.562 1.00 0.00 +ATOM 86 CA SER A 22 22.459 -12.178 8.215 1.00 0.00 +ATOM 87 C SER A 22 21.479 -12.476 9.342 1.00 0.00 +ATOM 88 O SER A 22 20.601 -11.654 9.635 1.00 0.00 +ATOM 89 N THR A 23 21.616 -13.608 9.957 1.00 0.00 +ATOM 90 CA THR A 23 20.731 -13.986 11.052 1.00 0.00 +ATOM 91 C THR A 23 21.525 -14.442 12.269 1.00 0.00 +ATOM 92 O THR A 23 22.526 -15.158 12.140 1.00 0.00 +ATOM 93 N SER A 24 21.071 -14.023 13.444 1.00 0.00 +ATOM 94 CA SER A 24 21.651 -14.461 14.708 1.00 0.00 +ATOM 95 C SER A 24 20.577 -14.647 15.772 1.00 0.00 +ATOM 96 O SER A 24 19.935 -13.667 16.171 1.00 0.00 +ATOM 97 N GLY A 25 20.369 -15.857 16.232 1.00 0.00 +ATOM 98 CA GLY A 25 19.367 -16.212 17.230 1.00 0.00 +ATOM 99 C GLY A 25 17.971 -15.798 16.781 1.00 0.00 +ATOM 100 O GLY A 25 17.148 -15.310 17.565 1.00 0.00 +ATOM 101 N GLY A 26 17.740 -16.026 15.434 1.00 0.00 +ATOM 102 CA GLY A 26 16.400 -15.831 14.893 1.00 0.00 +ATOM 103 C GLY A 26 16.134 -14.361 14.597 1.00 0.00 +ATOM 104 O GLY A 26 14.992 -13.995 14.292 1.00 0.00 +ATOM 105 N VAL A 27 17.130 -13.535 14.677 1.00 0.00 +ATOM 106 CA VAL A 27 16.971 -12.105 14.443 1.00 0.00 +ATOM 107 C VAL A 27 17.741 -11.661 13.206 1.00 0.00 +ATOM 108 O VAL A 27 18.875 -12.111 12.996 1.00 0.00 +ATOM 109 N THR A 28 17.166 -10.808 12.402 1.00 0.00 +ATOM 110 CA THR A 28 17.834 -10.288 11.215 1.00 0.00 +ATOM 111 C THR A 28 18.807 -9.172 11.575 1.00 0.00 +ATOM 112 O THR A 28 18.430 -8.215 12.263 1.00 0.00 +ATOM 113 N VAL A 29 20.027 -9.288 11.123 1.00 0.00 +ATOM 114 CA VAL A 29 21.064 -8.274 11.276 1.00 0.00 +ATOM 115 C VAL A 29 21.504 -7.728 9.924 1.00 0.00 +ATOM 116 O VAL A 29 21.807 -8.513 9.016 1.00 0.00 +ATOM 117 N ALA A 30 21.544 -6.460 9.775 1.00 0.00 +ATOM 118 CA ALA A 30 22.011 -5.777 8.575 1.00 0.00 +ATOM 119 C ALA A 30 23.372 -5.132 8.802 1.00 0.00 +ATOM 120 O ALA A 30 23.508 -4.279 9.689 1.00 0.00 +ATOM 121 N SER A 31 24.354 -5.513 8.040 1.00 0.00 +ATOM 122 CA SER A 31 25.646 -4.837 8.014 1.00 0.00 +ATOM 123 C SER A 31 25.786 -3.965 6.773 1.00 0.00 +ATOM 124 O SER A 31 25.686 -4.480 5.652 1.00 0.00 +ATOM 125 N ASN A 32 26.004 -2.713 6.933 1.00 0.00 +ATOM 126 CA ASN A 32 26.094 -1.767 5.827 1.00 0.00 +ATOM 127 C ASN A 32 27.426 -1.029 5.840 1.00 0.00 +ATOM 128 O ASN A 32 27.903 -0.623 6.908 1.00 0.00 +ATOM 129 N THR A 33 28.009 -0.858 4.701 1.00 0.00 +ATOM 130 CA THR A 33 29.174 -0.004 4.503 1.00 0.00 +ATOM 131 C THR A 33 28.886 1.093 3.486 1.00 0.00 +ATOM 132 O THR A 33 28.609 0.780 2.321 1.00 0.00 +ATOM 133 N LEU A 34 28.939 2.301 3.864 1.00 0.00 +ATOM 134 CA LEU A 34 28.806 3.459 2.988 1.00 0.00 +ATOM 135 C LEU A 34 30.145 4.158 2.791 1.00 0.00 +ATOM 136 O LEU A 34 30.748 4.625 3.766 1.00 0.00 +ATOM 137 N THR A 35 30.609 4.234 1.568 1.00 0.00 +ATOM 138 CA THR A 35 31.860 4.906 1.237 1.00 0.00 +ATOM 139 C THR A 35 31.616 6.116 0.344 1.00 0.00 +ATOM 140 O THR A 35 30.979 6.000 -0.711 1.00 0.00 +ATOM 141 N VAL A 36 32.121 7.268 0.767 1.00 0.00 +ATOM 142 CA VAL A 36 31.991 8.489 -0.019 1.00 0.00 +ATOM 143 C VAL A 36 33.354 9.105 -0.309 1.00 0.00 +ATOM 144 O VAL A 36 34.181 9.248 0.601 1.00 0.00 +ATOM 145 N VAL A 37 33.585 9.465 -1.556 1.00 0.00 +ATOM 146 CA VAL A 37 34.837 10.090 -1.967 1.00 0.00 +ATOM 147 C VAL A 37 34.592 11.461 -2.584 1.00 0.00 +ATOM 148 O VAL A 37 33.907 11.581 -3.608 1.00 0.00 +ATOM 149 N THR A 38 35.152 12.481 -1.956 1.00 0.00 +ATOM 150 CA THR A 38 35.077 13.845 -2.466 1.00 0.00 +ATOM 151 C THR A 38 36.462 14.469 -2.578 1.00 0.00 +ATOM 152 O THR A 38 37.444 13.778 -2.275 1.00 0.00 +ATOM 153 N SER A 39 36.597 15.650 -2.968 1.00 0.00 +ATOM 154 CA SER A 39 37.874 16.324 -3.171 1.00 0.00 +ATOM 155 C SER A 39 38.598 16.541 -1.849 1.00 0.00 +ATOM 156 O SER A 39 39.819 16.742 -1.833 1.00 0.00 +ATOM 157 N VAL A 40 37.851 16.500 -0.738 1.00 0.00 +ATOM 158 CA VAL A 40 38.413 16.805 0.572 1.00 0.00 +ATOM 159 C VAL A 40 38.958 15.551 1.243 1.00 0.00 +ATOM 160 O VAL A 40 39.852 15.623 2.096 1.00 0.00 +ATOM 161 N GLY A 41 38.400 14.392 0.841 1.00 0.00 +ATOM 162 CA GLY A 41 38.820 13.125 1.428 1.00 0.00 +ATOM 163 C GLY A 41 37.745 12.058 1.263 1.00 0.00 +ATOM 164 O GLY A 41 36.733 12.281 0.586 1.00 0.00 +ATOM 165 N THR A 42 37.967 10.931 1.869 1.00 0.00 +ATOM 166 CA THR A 42 37.016 9.826 1.842 1.00 0.00 +ATOM 167 C THR A 42 36.473 9.532 3.235 1.00 0.00 +ATOM 168 O THR A 42 37.214 9.576 4.225 1.00 0.00 +ATOM 169 N LEU A 43 35.197 9.237 3.296 1.00 0.00 +ATOM 170 CA LEU A 43 34.565 8.832 4.546 1.00 0.00 +ATOM 171 C LEU A 43 33.899 7.469 4.410 1.00 0.00 +ATOM 172 O LEU A 43 33.122 7.235 3.475 1.00 0.00 +ATOM 173 N THR A 44 34.204 6.593 5.330 1.00 0.00 +ATOM 174 CA THR A 44 33.572 5.280 5.390 1.00 0.00 +ATOM 175 C THR A 44 32.758 5.118 6.667 1.00 0.00 +ATOM 176 O THR A 44 33.245 5.442 7.758 1.00 0.00 +ATOM 177 N LEU A 45 31.593 4.648 6.551 1.00 0.00 +ATOM 178 CA LEU A 45 30.717 4.359 7.680 1.00 0.00 +ATOM 179 C LEU A 45 30.333 2.885 7.716 1.00 0.00 +ATOM 180 O LEU A 45 29.799 2.359 6.730 1.00 0.00 +ATOM 181 N GLU A 46 30.592 2.233 8.812 1.00 0.00 +ATOM 182 CA GLU A 46 30.152 0.858 9.019 1.00 0.00 +ATOM 183 C GLU A 46 29.006 0.791 10.021 1.00 0.00 +ATOM 184 O GLU A 46 29.184 1.145 11.193 1.00 0.00 +ATOM 185 N THR A 47 27.856 0.349 9.575 1.00 0.00 +ATOM 186 CA THR A 47 26.665 0.304 10.415 1.00 0.00 +ATOM 187 C THR A 47 26.162 -1.124 10.582 1.00 0.00 +ATOM 188 O THR A 47 25.972 -1.812 9.571 1.00 0.00 +ATOM 189 N THR A 48 25.955 -1.562 11.704 1.00 0.00 +ATOM 190 CA THR A 48 25.278 -2.812 12.026 1.00 0.00 +ATOM 191 C THR A 48 23.975 -2.556 12.773 1.00 0.00 +ATOM 192 O THR A 48 23.973 -1.871 13.804 1.00 0.00 +ATOM 193 N VAL A 49 22.896 -3.089 12.270 1.00 0.00 +ATOM 194 CA VAL A 49 21.581 -2.893 12.869 1.00 0.00 +ATOM 195 C VAL A 49 20.856 -4.220 13.054 1.00 0.00 +ATOM 196 O VAL A 49 20.888 -5.059 12.144 1.00 0.00 +ATOM 197 N THR A 50 20.237 -4.429 14.142 1.00 0.00 +ATOM 198 CA THR A 50 19.337 -5.551 14.378 1.00 0.00 +ATOM 199 C THR A 50 17.886 -5.152 14.143 1.00 0.00 +ATOM 200 O THR A 50 17.415 -4.184 14.754 1.00 0.00 +ATOM 201 N TYR A 51 17.190 -5.842 13.309 1.00 0.00 +ATOM 202 CA TYR A 51 15.794 -5.565 12.994 1.00 0.00 +ATOM 203 C TYR A 51 14.858 -6.367 13.889 1.00 0.00 +ATOM 204 O TYR A 51 14.312 -7.379 13.433 1.00 0.00 +ATOM 205 N ALA A 52 14.662 -5.981 15.074 1.00 0.00 +ATOM 206 CA ALA A 52 13.857 -6.689 16.062 1.00 0.00 +ATOM 207 C ALA A 52 13.400 -5.753 17.174 1.00 0.00 +ATOM 208 O ALA A 52 13.801 -4.584 17.240 1.00 0.00 +ATOM 209 N ALA A 53 12.518 -6.325 18.073 1.00 0.00 +ATOM 210 CA ALA A 53 12.125 -5.538 19.235 1.00 0.00 +ATOM 211 C ALA A 53 13.326 -5.216 20.115 1.00 0.00 +ATOM 212 O ALA A 53 14.173 -6.077 20.390 1.00 0.00 +ATOM 213 N GLY A 54 13.375 -3.930 20.554 1.00 0.00 +ATOM 214 CA GLY A 54 14.466 -3.468 21.404 1.00 0.00 +ATOM 215 C GLY A 54 15.807 -3.584 20.691 1.00 0.00 +ATOM 216 O GLY A 54 16.834 -3.877 21.317 1.00 0.00 +ATOM 217 N GLY A 55 15.786 -3.353 19.382 1.00 0.00 +ATOM 218 CA GLY A 55 16.964 -3.543 18.544 1.00 0.00 +ATOM 219 C GLY A 55 18.052 -2.534 18.887 1.00 0.00 +ATOM 220 O GLY A 55 17.797 -1.491 19.503 1.00 0.00 +ATOM 221 N THR A 56 19.266 -2.878 18.469 1.00 0.00 +ATOM 222 CA THR A 56 20.411 -1.985 18.600 1.00 0.00 +ATOM 223 C THR A 56 20.998 -1.638 17.238 1.00 0.00 +ATOM 224 O THR A 56 20.985 -2.452 16.306 1.00 0.00 +ATOM 225 N ALA A 57 21.497 -0.440 17.151 1.00 0.00 +ATOM 226 CA ALA A 57 22.209 0.030 15.969 1.00 0.00 +ATOM 227 C ALA A 57 23.570 0.606 16.338 1.00 0.00 +ATOM 228 O ALA A 57 23.670 1.345 17.326 1.00 0.00 +ATOM 229 N THR A 58 24.565 0.306 15.614 1.00 0.00 +ATOM 230 CA THR A 58 25.910 0.829 15.824 1.00 0.00 +ATOM 231 C THR A 58 26.491 1.390 14.533 1.00 0.00 +ATOM 232 O THR A 58 26.323 0.800 13.458 1.00 0.00 +ATOM 233 N LEU A 59 27.155 2.499 14.641 1.00 0.00 +ATOM 234 CA LEU A 59 27.790 3.149 13.501 1.00 0.00 +ATOM 235 C LEU A 59 29.215 3.573 13.832 1.00 0.00 +ATOM 236 O LEU A 59 29.441 4.185 14.884 1.00 0.00 +ATOM 237 N LYS A 60 30.138 3.280 13.002 1.00 0.00 +ATOM 238 CA LYS A 60 31.522 3.733 13.080 1.00 0.00 +ATOM 239 C LYS A 60 31.900 4.560 11.858 1.00 0.00 +ATOM 240 O LYS A 60 31.641 4.151 10.719 1.00 0.00 +ATOM 241 N VAL A 61 32.505 5.706 12.094 1.00 0.00 +ATOM 242 CA VAL A 61 32.923 6.593 11.015 1.00 0.00 +ATOM 243 C VAL A 61 34.442 6.680 10.930 1.00 0.00 +ATOM 244 O VAL A 61 35.118 6.836 11.955 1.00 0.00 +ATOM 245 N THR A 62 34.966 6.580 9.732 1.00 0.00 +ATOM 246 CA THR A 62 36.400 6.685 9.488 1.00 0.00 +ATOM 247 C THR A 62 36.695 7.640 8.339 1.00 0.00 +ATOM 248 O THR A 62 36.056 7.553 7.283 1.00 0.00 +ATOM 249 N ALA A 63 37.639 8.535 8.528 1.00 0.00 +ATOM 250 CA ALA A 63 38.133 9.393 7.457 1.00 0.00 +ATOM 251 C ALA A 63 39.385 8.808 6.815 1.00 0.00 +ATOM 252 O ALA A 63 40.084 8.028 7.474 1.00 0.00 +ATOM 253 N GLU A 64 39.689 9.133 5.609 1.00 0.00 +ATOM 254 CA GLU A 64 40.849 8.586 4.916 1.00 0.00 +ATOM 255 C GLU A 64 42.135 8.882 5.677 1.00 0.00 +ATOM 256 O GLU A 64 42.375 9.992 6.169 1.00 0.00 +ATOM 257 N GLY A 65 42.994 7.758 5.747 1.00 0.00 +ATOM 258 CA GLY A 65 44.310 7.902 6.358 1.00 0.00 +ATOM 259 C GLY A 65 44.213 7.915 7.878 1.00 0.00 +ATOM 260 O GLY A 65 45.211 8.170 8.564 1.00 0.00 +ATOM 261 N GLN A 66 43.023 7.645 8.404 1.00 0.00 +ATOM 262 CA GLN A 66 42.830 7.642 9.849 1.00 0.00 +ATOM 263 C GLN A 66 42.104 6.383 10.306 1.00 0.00 +ATOM 264 O GLN A 66 41.466 5.700 9.494 1.00 0.00 +ATOM 265 N GLY A 67 42.198 6.084 11.564 1.00 0.00 +ATOM 266 CA GLY A 67 41.432 4.996 12.160 1.00 0.00 +ATOM 267 C GLY A 67 40.013 5.439 12.494 1.00 0.00 +ATOM 268 O GLY A 67 39.634 6.579 12.198 1.00 0.00 +ATOM 269 N VAL A 68 39.229 4.550 13.105 1.00 0.00 +ATOM 270 CA VAL A 68 37.887 4.917 13.543 1.00 0.00 +ATOM 271 C VAL A 68 37.908 6.194 14.373 1.00 0.00 +ATOM 272 O VAL A 68 38.641 6.314 15.363 1.00 0.00 +ATOM 273 N GLN A 69 37.041 7.182 13.911 1.00 0.00 +ATOM 274 CA GLN A 69 37.056 8.497 14.541 1.00 0.00 +ATOM 275 C GLN A 69 35.872 8.670 15.483 1.00 0.00 +ATOM 276 O GLN A 69 36.006 9.322 16.526 1.00 0.00 +ATOM 277 N ALA A 70 34.753 8.117 15.147 1.00 0.00 +ATOM 278 CA ALA A 70 33.545 8.212 15.958 1.00 0.00 +ATOM 279 C ALA A 70 32.766 6.903 15.942 1.00 0.00 +ATOM 280 O ALA A 70 32.682 6.247 14.896 1.00 0.00 +ATOM 281 N GLU A 71 32.222 6.531 17.037 1.00 0.00 +ATOM 282 CA GLU A 71 31.345 5.373 17.165 1.00 0.00 +ATOM 283 C GLU A 71 30.078 5.724 17.935 1.00 0.00 +ATOM 284 O GLU A 71 30.167 6.331 19.009 1.00 0.00 +ATOM 285 N VAL A 72 28.955 5.376 17.440 1.00 0.00 +ATOM 286 CA VAL A 72 27.677 5.674 18.075 1.00 0.00 +ATOM 287 C VAL A 72 26.819 4.421 18.202 1.00 0.00 +ATOM 288 O VAL A 72 26.959 3.501 17.387 1.00 0.00 +ATOM 289 N THR A 73 25.980 4.367 19.153 1.00 0.00 +ATOM 290 CA THR A 73 25.055 3.259 19.360 1.00 0.00 +ATOM 291 C THR A 73 23.683 3.761 19.791 1.00 0.00 +ATOM 292 O THR A 73 23.599 4.687 20.607 1.00 0.00 +ATOM 293 N TYR A 74 22.664 3.204 19.291 1.00 0.00 +ATOM 294 CA TYR A 74 21.282 3.481 19.664 1.00 0.00 +ATOM 295 C TYR A 74 20.545 2.201 20.035 1.00 0.00 +ATOM 296 O TYR A 74 20.514 1.236 19.260 1.00 0.00 +ATOM 297 N THR A 75 19.953 2.209 21.233 1.00 0.00 +ATOM 298 CA THR A 75 19.153 1.073 21.676 1.00 0.00 +ATOM 299 C THR A 75 17.675 1.434 21.746 1.00 0.00 +ATOM 300 O THR A 75 17.294 2.346 22.491 1.00 0.00 +ATOM 301 N ALA A 76 16.847 0.736 20.986 1.00 0.00 +ATOM 302 CA ALA A 76 15.414 1.002 20.968 1.00 0.00 +ATOM 303 C ALA A 76 14.739 0.467 22.224 1.00 0.00 +ATOM 304 O ALA A 76 15.240 -0.499 22.813 1.00 0.00 +ATOM 305 N PRO A 77 13.665 1.035 22.641 1.00 0.00 +ATOM 306 CA PRO A 77 12.919 0.548 23.795 1.00 0.00 +ATOM 307 C PRO A 77 12.324 -0.828 23.526 1.00 0.00 +ATOM 308 O PRO A 77 12.067 -1.141 22.356 1.00 0.00 +ATOM 309 N ALA A 78 12.101 -1.626 24.483 1.00 0.00 +ATOM 310 CA ALA A 78 11.628 -3.000 24.362 1.00 0.00 +ATOM 311 C ALA A 78 10.261 -3.055 23.692 1.00 0.00 +ATOM 312 O ALA A 78 9.958 -4.082 23.071 1.00 0.00 +ATOM 313 N ASN A 79 9.454 -2.076 23.767 1.00 0.00 +ATOM 314 CA ASN A 79 8.104 -2.105 23.216 1.00 0.00 +ATOM 315 C ASN A 79 8.081 -1.579 21.787 1.00 0.00 +ATOM 316 O ASN A 79 7.009 -1.342 21.216 1.00 0.00 +ATOM 317 N GLU A 80 9.306 -1.394 21.204 1.00 0.00 +ATOM 318 CA GLU A 80 9.415 -0.866 19.849 1.00 0.00 +ATOM 319 C GLU A 80 10.351 -1.716 19.000 1.00 0.00 +ATOM 320 O GLU A 80 11.440 -2.083 19.461 1.00 0.00 +ATOM 321 N THR A 81 9.956 -2.025 17.802 1.00 0.00 +ATOM 322 CA THR A 81 10.803 -2.757 16.868 1.00 0.00 +ATOM 323 C THR A 81 11.357 -1.836 15.789 1.00 0.00 +ATOM 324 O THR A 81 10.651 -0.982 15.236 1.00 0.00 +ATOM 325 N ILE A 82 12.701 -2.051 15.501 1.00 0.00 +ATOM 326 CA ILE A 82 13.296 -1.303 14.400 1.00 0.00 +ATOM 327 C ILE A 82 12.732 -1.754 13.059 1.00 0.00 +ATOM 328 O ILE A 82 12.678 -2.952 12.752 1.00 0.00 +ATOM 329 N GLN A 83 12.302 -0.729 12.247 1.00 0.00 +ATOM 330 CA GLN A 83 11.662 -1.046 10.976 1.00 0.00 +ATOM 331 C GLN A 83 12.528 -0.614 9.800 1.00 0.00 +ATOM 332 O GLN A 83 12.567 -1.350 8.806 1.00 0.00 +ATOM 333 N ALA A 84 13.177 0.445 9.834 1.00 0.00 +ATOM 334 CA ALA A 84 14.012 0.968 8.759 1.00 0.00 +ATOM 335 C ALA A 84 15.180 1.775 9.312 1.00 0.00 +ATOM 336 O ALA A 84 15.048 2.370 10.389 1.00 0.00 +ATOM 337 N ALA A 85 16.257 1.813 8.651 1.00 0.00 +ATOM 338 CA ALA A 85 17.444 2.562 9.045 1.00 0.00 +ATOM 339 C ALA A 85 18.210 3.060 7.826 1.00 0.00 +ATOM 340 O ALA A 85 18.056 2.512 6.728 1.00 0.00 +ATOM 341 N SER A 86 19.015 4.072 8.009 1.00 0.00 +ATOM 342 CA SER A 86 19.824 4.632 6.933 1.00 0.00 +ATOM 343 C SER A 86 20.969 5.472 7.485 1.00 0.00 +ATOM 344 O SER A 86 20.824 6.022 8.584 1.00 0.00 +ATOM 345 N ALA A 87 22.011 5.591 6.839 1.00 0.00 +ATOM 346 CA ALA A 87 23.164 6.413 7.186 1.00 0.00 +ATOM 347 C ALA A 87 23.699 7.154 5.967 1.00 0.00 +ATOM 348 O ALA A 87 23.670 6.587 4.867 1.00 0.00 +ATOM 349 N VAL A 88 24.154 8.310 6.095 1.00 0.00 +ATOM 350 CA VAL A 88 24.714 9.085 4.994 1.00 0.00 +ATOM 351 C VAL A 88 25.896 9.926 5.458 1.00 0.00 +ATOM 352 O VAL A 88 25.879 10.367 6.614 1.00 0.00 +ATOM 353 N ALA A 89 26.825 10.157 4.720 1.00 0.00 +ATOM 354 CA ALA A 89 27.993 10.975 5.026 1.00 0.00 +ATOM 355 C ALA A 89 28.330 11.908 3.870 1.00 0.00 +ATOM 356 O ALA A 89 28.104 11.538 2.711 1.00 0.00 +ATOM 357 N SER A 90 28.822 13.016 4.120 1.00 0.00 +ATOM 358 CA SER A 90 29.296 13.977 3.131 1.00 0.00 +ATOM 359 C SER A 90 30.583 14.651 3.590 1.00 0.00 +ATOM 360 O SER A 90 30.518 15.514 4.475 1.00 0.00 +ATOM 361 N PRO A 91 31.688 14.321 3.065 1.00 0.00 +ATOM 362 CA PRO A 91 32.987 14.853 3.459 1.00 0.00 +ATOM 363 C PRO A 91 33.051 16.361 3.253 1.00 0.00 +ATOM 364 O PRO A 91 33.661 17.098 4.037 1.00 0.00 +ATOM 365 N GLU A 92 32.368 16.808 2.120 1.00 0.00 +ATOM 366 CA GLU A 92 32.366 18.236 1.827 1.00 0.00 +ATOM 367 C GLU A 92 31.634 19.019 2.909 1.00 0.00 +ATOM 368 O GLU A 92 32.048 20.130 3.266 1.00 0.00 +ATOM 369 N ALA A 93 30.531 18.434 3.437 1.00 0.00 +ATOM 370 CA ALA A 93 29.742 19.106 4.462 1.00 0.00 +ATOM 371 C ALA A 93 30.222 18.734 5.859 1.00 0.00 +ATOM 372 O ALA A 93 29.842 19.391 6.837 1.00 0.00 +ATOM 373 N GLY A 94 31.013 17.740 5.964 1.00 0.00 +ATOM 374 CA GLY A 94 31.607 17.283 7.214 1.00 0.00 +ATOM 375 C GLY A 94 30.539 16.804 8.189 1.00 0.00 +ATOM 376 O GLY A 94 30.580 17.143 9.378 1.00 0.00 +ATOM 377 N VAL A 95 29.592 16.024 7.691 1.00 0.00 +ATOM 378 CA VAL A 95 28.508 15.498 8.512 1.00 0.00 +ATOM 379 C VAL A 95 28.216 14.042 8.172 1.00 0.00 +ATOM 380 O VAL A 95 28.075 13.702 6.991 1.00 0.00 +ATOM 381 N ALA A 96 28.127 13.214 9.144 1.00 0.00 +ATOM 382 CA ALA A 96 27.577 11.865 9.079 1.00 0.00 +ATOM 383 C ALA A 96 26.291 11.755 9.888 1.00 0.00 +ATOM 384 O ALA A 96 26.260 12.175 11.051 1.00 0.00 +ATOM 385 N GLY A 97 25.273 11.220 9.320 1.00 0.00 +ATOM 386 CA GLY A 97 23.968 11.142 9.966 1.00 0.00 +ATOM 387 C GLY A 97 23.416 9.723 9.923 1.00 0.00 +ATOM 388 O GLY A 97 23.576 9.027 8.912 1.00 0.00 +ATOM 389 N LEU A 98 22.804 9.311 10.951 1.00 0.00 +ATOM 390 CA LEU A 98 22.094 8.040 11.037 1.00 0.00 +ATOM 391 C LEU A 98 20.635 8.249 11.423 1.00 0.00 +ATOM 392 O LEU A 98 20.343 9.072 12.300 1.00 0.00 +ATOM 393 N ALA A 99 19.755 7.552 10.811 1.00 0.00 +ATOM 394 CA ALA A 99 18.332 7.663 11.107 1.00 0.00 +ATOM 395 C ALA A 99 17.683 6.289 11.219 1.00 0.00 +ATOM 396 O ALA A 99 18.084 5.365 10.500 1.00 0.00 +ATOM 397 N LEU A 100 16.761 6.141 12.040 1.00 0.00 +ATOM 398 CA LEU A 100 16.032 4.897 12.258 1.00 0.00 +ATOM 399 C LEU A 100 14.544 5.158 12.451 1.00 0.00 +ATOM 400 O LEU A 100 14.157 6.153 13.078 1.00 0.00 +ATOM 401 N VAL A 101 13.727 4.280 11.922 1.00 0.00 +ATOM 402 CA VAL A 101 12.284 4.294 12.131 1.00 0.00 +ATOM 403 C VAL A 101 11.818 3.028 12.839 1.00 0.00 +ATOM 404 O VAL A 101 12.355 1.941 12.591 1.00 0.00 +ATOM 405 N THR A 102 10.840 3.162 13.700 1.00 0.00 +ATOM 406 CA THR A 102 10.322 2.027 14.454 1.00 0.00 +ATOM 407 C THR A 102 8.903 1.681 14.022 1.00 0.00 +ATOM 408 O THR A 102 8.262 2.440 13.284 1.00 0.00 +ATOM 409 N ASP A 103 8.427 0.538 14.485 1.00 0.00 +ATOM 410 CA ASP A 103 7.087 0.069 14.152 1.00 0.00 +ATOM 411 C ASP A 103 6.020 0.983 14.742 1.00 0.00 +ATOM 412 O ASP A 103 4.894 1.058 14.234 1.00 0.00 +ATOM 413 N ALA A 104 6.403 1.690 15.845 1.00 0.00 +ATOM 414 CA ALA A 104 5.474 2.617 16.481 1.00 0.00 +ATOM 415 C ALA A 104 5.404 3.935 15.721 1.00 0.00 +ATOM 416 O ALA A 104 4.558 4.783 16.030 1.00 0.00 +ATOM 417 N GLY A 105 6.277 4.116 14.738 1.00 0.00 +ATOM 418 CA GLY A 105 6.207 5.251 13.825 1.00 0.00 +ATOM 419 C GLY A 105 7.131 6.376 14.273 1.00 0.00 +ATOM 420 O GLY A 105 7.104 7.468 13.691 1.00 0.00 +ATOM 421 N THR A 106 7.943 6.124 15.296 1.00 0.00 +ATOM 422 CA THR A 106 8.887 7.127 15.774 1.00 0.00 +ATOM 423 C THR A 106 10.150 7.145 14.922 1.00 0.00 +ATOM 424 O THR A 106 10.681 6.087 14.560 1.00 0.00 +ATOM 425 N LEU A 107 10.620 8.340 14.608 1.00 0.00 +ATOM 426 CA LEU A 107 11.874 8.550 13.895 1.00 0.00 +ATOM 427 C LEU A 107 12.981 8.988 14.845 1.00 0.00 +ATOM 428 O LEU A 107 12.808 9.962 15.589 1.00 0.00 +ATOM 429 N TYR A 108 14.083 8.304 14.829 1.00 0.00 +ATOM 430 CA TYR A 108 15.261 8.662 15.610 1.00 0.00 +ATOM 431 C TYR A 108 16.411 9.090 14.707 1.00 0.00 +ATOM 432 O TYR A 108 16.684 8.425 13.700 1.00 0.00 +ATOM 433 N THR A 109 17.075 10.170 15.044 1.00 0.00 +ATOM 434 CA THR A 109 18.138 10.688 14.191 1.00 0.00 +ATOM 435 C THR A 109 19.329 11.155 15.017 1.00 0.00 +ATOM 436 O THR A 109 19.129 11.609 16.151 1.00 0.00 +ATOM 437 N ARG A 110 20.466 11.072 14.543 1.00 0.00 +ATOM 438 CA ARG A 110 21.699 11.546 15.160 1.00 0.00 +ATOM 439 C ARG A 110 22.698 12.011 14.108 1.00 0.00 +ATOM 440 O ARG A 110 22.498 11.739 12.917 1.00 0.00 +ATOM 441 N MET A 111 23.721 12.676 14.495 1.00 0.00 +ATOM 442 CA MET A 111 24.703 13.162 13.533 1.00 0.00 +ATOM 443 C MET A 111 26.070 13.339 14.182 1.00 0.00 +ATOM 444 O MET A 111 26.135 13.732 15.353 1.00 0.00 +ATOM 445 N VAL A 112 27.083 13.084 13.519 1.00 0.00 +ATOM 446 CA VAL A 112 28.449 13.444 13.880 1.00 0.00 +ATOM 447 C VAL A 112 28.973 14.571 12.998 1.00 0.00 +ATOM 448 O VAL A 112 29.132 14.403 11.783 1.00 0.00 +ATOM 449 N ALA A 113 29.241 15.733 13.636 1.00 0.00 +ATOM 450 CA ALA A 113 29.800 16.867 12.909 1.00 0.00 +ATOM 451 C ALA A 113 31.322 16.815 12.893 1.00 0.00 +ATOM 452 O ALA A 113 31.954 16.632 13.941 1.00 0.00 +ATOM 453 N PHE A 114 31.893 16.972 11.736 1.00 0.00 +ATOM 454 CA PHE A 114 33.340 17.035 11.564 1.00 0.00 +ATOM 455 C PHE A 114 33.787 18.435 11.163 1.00 0.00 +ATOM 456 O PHE A 114 32.973 19.210 10.645 1.00 0.00 +ATOM 457 N GLU A 115 34.998 18.763 11.379 1.00 0.00 +ATOM 458 CA GLU A 115 35.602 20.010 10.925 1.00 0.00 +ATOM 459 C GLU A 115 36.911 19.752 10.189 1.00 0.00 +ATOM 460 O GLU A 115 37.611 18.770 10.466 1.00 0.00 +ATOM 461 N ASN A 116 37.226 20.642 9.254 1.00 0.00 +ATOM 462 CA ASN A 116 38.463 20.529 8.490 1.00 0.00 +ATOM 463 C ASN A 116 39.537 21.459 9.039 1.00 0.00 +ATOM 464 O ASN A 116 39.324 22.678 9.053 1.00 0.00 +ATOM 465 N VAL A 117 40.605 20.974 9.463 1.00 0.00 +ATOM 466 CA VAL A 117 41.779 21.727 9.888 1.00 0.00 +ATOM 467 C VAL A 117 43.037 21.228 9.189 1.00 0.00 +ATOM 468 O VAL A 117 43.514 20.115 9.445 1.00 0.00 +ATOM 469 N GLY A 118 43.571 22.096 8.280 1.00 0.00 +ATOM 470 CA GLY A 118 44.769 21.762 7.519 1.00 0.00 +ATOM 471 C GLY A 118 44.525 20.570 6.602 1.00 0.00 +ATOM 472 O GLY A 118 45.442 19.773 6.366 1.00 0.00 +ATOM 473 N GLY A 119 43.338 20.443 6.099 1.00 0.00 +ATOM 474 CA GLY A 119 42.998 19.434 5.103 1.00 0.00 +ATOM 475 C GLY A 119 42.656 18.103 5.761 1.00 0.00 +ATOM 476 O GLY A 119 42.387 17.122 5.057 1.00 0.00 +ATOM 477 N VAL A 120 42.663 18.051 7.074 1.00 0.00 +ATOM 478 CA VAL A 120 42.289 16.846 7.805 1.00 0.00 +ATOM 479 C VAL A 120 40.921 16.999 8.457 1.00 0.00 +ATOM 480 O VAL A 120 40.645 18.032 9.080 1.00 0.00 +ATOM 481 N LEU A 121 40.079 16.007 8.325 1.00 0.00 +ATOM 482 CA LEU A 121 38.756 15.986 8.938 1.00 0.00 +ATOM 483 C LEU A 121 38.813 15.420 10.351 1.00 0.00 +ATOM 484 O LEU A 121 39.306 14.300 10.533 1.00 0.00 +ATOM 485 N THR A 122 38.351 16.117 11.308 1.00 0.00 +ATOM 486 CA THR A 122 38.298 15.682 12.699 1.00 0.00 +ATOM 487 C THR A 122 36.880 15.765 13.249 1.00 0.00 +ATOM 488 O THR A 122 36.172 16.752 13.009 1.00 0.00 +ATOM 489 N VAL A 123 36.462 14.722 13.991 1.00 0.00 +ATOM 490 CA VAL A 123 35.160 14.771 14.645 1.00 0.00 +ATOM 491 C VAL A 123 35.088 15.918 15.645 1.00 0.00 +ATOM 492 O VAL A 123 35.923 16.015 16.553 1.00 0.00 +ATOM 493 N ARG A 124 34.085 16.787 15.471 1.00 0.00 +ATOM 494 CA ARG A 124 33.948 17.963 16.322 1.00 0.00 +ATOM 495 C ARG A 124 32.918 17.731 17.420 1.00 0.00 +ATOM 496 O ARG A 124 33.157 18.111 18.574 1.00 0.00 +ATOM 497 N GLU A 125 31.819 17.138 17.099 1.00 0.00 +ATOM 498 CA GLU A 125 30.753 16.865 18.056 1.00 0.00 +ATOM 499 C GLU A 125 29.849 15.740 17.568 1.00 0.00 +ATOM 500 O GLU A 125 29.466 15.714 16.392 1.00 0.00 +ATOM 501 N ILE A 126 29.510 14.815 18.460 1.00 0.00 +ATOM 502 CA ILE A 126 28.410 13.887 18.226 1.00 0.00 +ATOM 503 C ILE A 126 27.124 14.382 18.875 1.00 0.00 +ATOM 504 O ILE A 126 27.073 14.529 20.103 1.00 0.00 +ATOM 505 N PHE A 127 26.111 14.636 18.095 1.00 0.00 +ATOM 506 CA PHE A 127 24.843 15.132 18.618 1.00 0.00 +ATOM 507 C PHE A 127 24.072 14.029 19.332 1.00 0.00 +ATOM 508 O PHE A 127 24.232 12.848 18.999 1.00 0.00 +ATOM 509 N PRO A 128 23.247 14.395 20.299 1.00 0.00 +ATOM 510 CA PRO A 128 22.421 13.396 20.967 1.00 0.00 +ATOM 511 C PRO A 128 21.312 12.896 20.050 1.00 0.00 +ATOM 512 O PRO A 128 20.848 13.647 19.183 1.00 0.00 +ATOM 513 N TRP A 129 20.884 11.660 20.221 1.00 0.00 +ATOM 514 CA TRP A 129 19.791 11.126 19.418 1.00 0.00 +ATOM 515 C TRP A 129 18.516 11.936 19.615 1.00 0.00 +ATOM 516 O TRP A 129 18.116 12.190 20.758 1.00 0.00 +ATOM 517 N ARG A 130 17.890 12.333 18.545 1.00 0.00 +ATOM 518 CA ARG A 130 16.606 13.023 18.582 1.00 0.00 +ATOM 519 C ARG A 130 15.474 12.105 18.140 1.00 0.00 +ATOM 520 O ARG A 130 15.608 11.380 17.145 1.00 0.00 +ATOM 521 N SER A 131 14.361 12.134 18.877 1.00 0.00 +ATOM 522 CA SER A 131 13.234 11.258 18.580 1.00 0.00 +ATOM 523 C SER A 131 11.910 11.998 18.719 1.00 0.00 +ATOM 524 O SER A 131 10.987 11.477 19.359 1.00 0.00 +ATOM 525 N GLY A 132 11.788 13.183 18.148 1.00 0.00 +ATOM 526 CA GLY A 132 10.609 14.011 18.368 1.00 0.00 +ATOM 527 C GLY A 132 9.686 13.988 17.157 1.00 0.00 +ATOM 528 O GLY A 132 8.795 14.844 17.083 1.00 0.00 +ATOM 529 N VAL A 133 9.831 13.098 16.235 1.00 0.00 +ATOM 530 CA VAL A 133 8.988 13.012 15.049 1.00 0.00 +ATOM 531 C VAL A 133 8.345 11.636 14.929 1.00 0.00 +ATOM 532 O VAL A 133 8.944 10.631 15.332 1.00 0.00 +ATOM 533 N THR A 134 7.148 11.587 14.385 1.00 0.00 +ATOM 534 CA THR A 134 6.422 10.335 14.210 1.00 0.00 +ATOM 535 C THR A 134 5.702 10.299 12.868 1.00 0.00 +ATOM 536 O THR A 134 5.317 11.346 12.333 1.00 0.00 +ATOM 537 N SER A 135 5.523 9.104 12.335 1.00 0.00 +ATOM 538 CA SER A 135 4.756 8.901 11.112 1.00 0.00 +ATOM 539 C SER A 135 3.311 8.533 11.423 1.00 0.00 +ATOM 540 O SER A 135 3.067 7.814 12.400 1.00 0.00 +ATOM 541 N THR A 136 2.378 8.984 10.655 1.00 0.00 +ATOM 542 CA THR A 136 0.962 8.702 10.859 1.00 0.00 +ATOM 543 C THR A 136 0.337 8.090 9.612 1.00 0.00 +ATOM 544 O THR A 136 0.464 8.658 8.520 1.00 0.00 +ATOM 545 N ASN A 137 -0.313 6.978 9.748 1.00 0.00 +ATOM 546 CA ASN A 137 -1.057 6.296 8.695 1.00 0.00 +ATOM 547 C ASN A 137 -0.159 5.971 7.508 1.00 0.00 +ATOM 548 O ASN A 137 -0.645 5.808 6.382 1.00 0.00 +ATOM 549 N GLY A 138 1.135 5.880 7.757 1.00 0.00 +ATOM 550 CA GLY A 138 2.077 5.481 6.718 1.00 0.00 +ATOM 551 C GLY A 138 2.461 6.664 5.839 1.00 0.00 +ATOM 552 O GLY A 138 3.217 6.493 4.874 1.00 0.00 +ATOM 553 N GLN A 139 1.959 7.852 6.151 1.00 0.00 +ATOM 554 CA GLN A 139 2.314 9.043 5.389 1.00 0.00 +ATOM 555 C GLN A 139 3.739 9.487 5.693 1.00 0.00 +ATOM 556 O GLN A 139 4.245 9.166 6.776 1.00 0.00 +ATOM 557 N VAL A 140 4.383 10.179 4.832 1.00 0.00 +ATOM 558 CA VAL A 140 5.778 10.563 5.008 1.00 0.00 +ATOM 559 C VAL A 140 5.912 11.712 5.999 1.00 0.00 +ATOM 560 O VAL A 140 5.155 12.687 5.906 1.00 0.00 +ATOM 561 N ALA A 141 6.810 11.631 6.903 1.00 0.00 +ATOM 562 CA ALA A 141 7.201 12.713 7.798 1.00 0.00 +ATOM 563 C ALA A 141 8.650 13.123 7.566 1.00 0.00 +ATOM 564 O ALA A 141 9.450 12.351 7.024 1.00 0.00 +ATOM 565 N GLY A 142 8.976 14.369 7.992 1.00 0.00 +ATOM 566 CA GLY A 142 10.326 14.871 7.765 1.00 0.00 +ATOM 567 C GLY A 142 10.902 15.495 9.030 1.00 0.00 +ATOM 568 O GLY A 142 10.149 16.061 9.832 1.00 0.00 +ATOM 569 N ASP A 143 12.153 15.409 9.214 1.00 0.00 +ATOM 570 CA ASP A 143 12.894 16.051 10.293 1.00 0.00 +ATOM 571 C ASP A 143 14.194 16.659 9.783 1.00 0.00 +ATOM 572 O ASP A 143 14.725 16.204 8.762 1.00 0.00 +ATOM 573 N ILE A 144 14.698 17.642 10.447 1.00 0.00 +ATOM 574 CA ILE A 144 15.937 18.304 10.055 1.00 0.00 +ATOM 575 C ILE A 144 16.802 18.618 11.269 1.00 0.00 +ATOM 576 O ILE A 144 16.363 19.367 12.151 1.00 0.00 +ATOM 577 N LEU A 145 17.971 18.089 11.337 1.00 0.00 +ATOM 578 CA LEU A 145 18.970 18.459 12.332 1.00 0.00 +ATOM 579 C LEU A 145 20.074 19.307 11.714 1.00 0.00 +ATOM 580 O LEU A 145 20.687 18.873 10.731 1.00 0.00 +ATOM 581 N VAL A 146 20.341 20.448 12.227 1.00 0.00 +ATOM 582 CA VAL A 146 21.355 21.351 11.695 1.00 0.00 +ATOM 583 C VAL A 146 22.618 21.324 12.546 1.00 0.00 +ATOM 584 O VAL A 146 22.560 21.343 13.783 1.00 0.00 +ATOM 585 N ASP A 147 23.764 21.279 11.850 1.00 0.00 +ATOM 586 CA ASP A 147 25.049 21.470 12.511 1.00 0.00 +ATOM 587 C ASP A 147 25.511 22.918 12.408 1.00 0.00 +ATOM 588 O ASP A 147 26.083 23.298 11.378 1.00 0.00 +ATOM 589 N GLU A 148 25.294 23.707 13.397 1.00 0.00 +ATOM 590 CA GLU A 148 25.479 25.153 13.382 1.00 0.00 +ATOM 591 C GLU A 148 26.944 25.519 13.183 1.00 0.00 +ATOM 592 O GLU A 148 27.261 26.572 12.616 1.00 0.00 +ATOM 593 N ALA A 149 27.835 24.637 13.655 1.00 0.00 +ATOM 594 CA ALA A 149 29.269 24.897 13.607 1.00 0.00 +ATOM 595 C ALA A 149 29.790 24.848 12.176 1.00 0.00 +ATOM 596 O ALA A 149 30.846 25.419 11.875 1.00 0.00 +ATOM 597 N THR A 150 29.043 24.159 11.288 1.00 0.00 +ATOM 598 CA THR A 150 29.525 23.976 9.924 1.00 0.00 +ATOM 599 C THR A 150 28.482 24.420 8.907 1.00 0.00 +ATOM 600 O THR A 150 28.747 24.406 7.698 1.00 0.00 +ATOM 601 N ARG A 151 27.334 24.802 9.371 1.00 0.00 +ATOM 602 CA ARG A 151 26.236 25.284 8.541 1.00 0.00 +ATOM 603 C ARG A 151 25.738 24.195 7.600 1.00 0.00 +ATOM 604 O ARG A 151 25.350 24.497 6.464 1.00 0.00 +ATOM 605 N SER A 152 25.740 22.989 8.027 1.00 0.00 +ATOM 606 CA SER A 152 25.249 21.832 7.288 1.00 0.00 +ATOM 607 C SER A 152 23.927 21.334 7.859 1.00 0.00 +ATOM 608 O SER A 152 23.588 21.630 9.011 1.00 0.00 +ATOM 609 N ALA A 153 23.198 20.591 7.062 1.00 0.00 +ATOM 610 CA ALA A 153 21.884 20.130 7.494 1.00 0.00 +ATOM 611 C ALA A 153 21.702 18.644 7.214 1.00 0.00 +ATOM 612 O ALA A 153 22.026 18.164 6.120 1.00 0.00 +ATOM 613 N PHE A 154 21.193 17.935 8.189 1.00 0.00 +ATOM 614 CA PHE A 154 20.776 16.552 7.994 1.00 0.00 +ATOM 615 C PHE A 154 19.258 16.438 7.931 1.00 0.00 +ATOM 616 O PHE A 154 18.577 16.613 8.949 1.00 0.00 +ATOM 617 N PHE A 155 18.730 16.147 6.746 1.00 0.00 +ATOM 618 CA PHE A 155 17.296 15.947 6.576 1.00 0.00 +ATOM 619 C PHE A 155 16.949 14.465 6.527 1.00 0.00 +ATOM 620 O PHE A 155 17.684 13.660 5.940 1.00 0.00 +ATOM 621 N THR A 156 15.847 14.124 7.135 1.00 0.00 +ATOM 622 CA THR A 156 15.380 12.743 7.135 1.00 0.00 +ATOM 623 C THR A 156 13.900 12.662 6.783 1.00 0.00 +ATOM 624 O THR A 156 13.089 13.372 7.392 1.00 0.00 +ATOM 625 N LEU A 157 13.549 11.868 5.877 1.00 0.00 +ATOM 626 CA LEU A 157 12.170 11.524 5.551 1.00 0.00 +ATOM 627 C LEU A 157 11.880 10.060 5.858 1.00 0.00 +ATOM 628 O LEU A 157 12.707 9.181 5.583 1.00 0.00 +ATOM 629 N PHE A 158 10.687 9.807 6.436 1.00 0.00 +ATOM 630 CA PHE A 158 10.407 8.430 6.824 1.00 0.00 +ATOM 631 C PHE A 158 8.910 8.147 6.812 1.00 0.00 +ATOM 632 O PHE A 158 8.100 9.079 6.893 1.00 0.00 +ATOM 633 N SER A 159 8.566 6.957 6.717 1.00 0.00 +ATOM 634 CA SER A 159 7.252 6.348 6.885 1.00 0.00 +ATOM 635 C SER A 159 7.368 4.929 7.426 1.00 0.00 +ATOM 636 O SER A 159 8.491 4.573 7.804 1.00 0.00 +ATOM 637 N ASP A 160 6.458 4.164 7.502 1.00 0.00 +ATOM 638 CA ASP A 160 6.515 2.821 8.066 1.00 0.00 +ATOM 639 C ASP A 160 7.314 1.882 7.172 1.00 0.00 +ATOM 640 O ASP A 160 7.030 0.694 7.371 1.00 0.00 +ATOM 641 N GLY A 161 8.086 2.056 6.394 1.00 0.00 +ATOM 642 CA GLY A 161 8.827 1.134 5.541 1.00 0.00 +ATOM 643 C GLY A 161 9.829 1.877 4.667 1.00 0.00 +ATOM 644 O GLY A 161 10.450 1.207 3.833 1.00 0.00 +ATOM 645 N GLU A 162 10.037 3.071 4.756 1.00 0.00 +ATOM 646 CA GLU A 162 10.962 3.871 3.963 1.00 0.00 +ATOM 647 C GLU A 162 11.661 4.917 4.821 1.00 0.00 +ATOM 648 O GLU A 162 10.978 5.702 5.490 1.00 0.00 +ATOM 649 N LEU A 163 12.911 4.956 4.829 1.00 0.00 +ATOM 650 CA LEU A 163 13.731 5.980 5.465 1.00 0.00 +ATOM 651 C LEU A 163 14.740 6.565 4.485 1.00 0.00 +ATOM 652 O LEU A 163 15.586 5.816 3.979 1.00 0.00 +ATOM 653 N VAL A 164 14.695 7.796 4.210 1.00 0.00 +ATOM 654 CA VAL A 164 15.660 8.530 3.400 1.00 0.00 +ATOM 655 C VAL A 164 16.425 9.545 4.239 1.00 0.00 +ATOM 656 O VAL A 164 15.800 10.446 4.814 1.00 0.00 +ATOM 657 N VAL A 165 17.687 9.442 4.327 1.00 0.00 +ATOM 658 CA VAL A 165 18.553 10.410 4.990 1.00 0.00 +ATOM 659 C VAL A 165 19.289 11.276 3.976 1.00 0.00 +ATOM 660 O VAL A 165 19.727 10.769 2.935 1.00 0.00 +ATOM 661 N SER A 166 19.427 12.531 4.252 1.00 0.00 +ATOM 662 CA SER A 166 20.082 13.466 3.345 1.00 0.00 +ATOM 663 C SER A 166 21.037 14.385 4.096 1.00 0.00 +ATOM 664 O SER A 166 20.653 14.951 5.127 1.00 0.00 +ATOM 665 N VAL A 167 22.225 14.542 3.627 1.00 0.00 +ATOM 666 CA VAL A 167 23.171 15.525 4.141 1.00 0.00 +ATOM 667 C VAL A 167 23.408 16.642 3.132 1.00 0.00 +ATOM 668 O VAL A 167 23.683 16.360 1.959 1.00 0.00 +ATOM 669 N VAL A 168 23.309 17.869 3.552 1.00 0.00 +ATOM 670 CA VAL A 168 23.413 19.009 2.649 1.00 0.00 +ATOM 671 C VAL A 168 24.471 19.995 3.126 1.00 0.00 +ATOM 672 O VAL A 168 24.551 20.261 4.332 1.00 0.00 +ATOM 673 N SER A 169 25.243 20.517 2.272 1.00 0.00 +ATOM 674 CA SER A 169 26.240 21.538 2.569 1.00 0.00 +ATOM 675 C SER A 169 25.777 22.913 2.104 1.00 0.00 +ATOM 676 O SER A 169 25.283 23.060 0.978 1.00 0.00 +ATOM 677 N TYR A 170 25.935 23.910 2.965 1.00 0.00 +ATOM 678 CA TYR A 170 25.672 25.298 2.605 1.00 0.00 +ATOM 679 C TYR A 170 26.969 26.072 2.408 1.00 0.00 +ATOM 680 O TYR A 170 27.845 26.054 3.283 1.00 0.00 +ATOM 681 N ASP A 171 27.096 26.746 1.271 1.00 0.00 +ATOM 682 CA ASP A 171 28.257 27.599 1.044 1.00 0.00 +ATOM 683 C ASP A 171 28.233 28.817 1.959 1.00 0.00 +ATOM 684 O ASP A 171 27.297 28.985 2.751 1.00 0.00 +ATOM 685 N ASP A 172 29.249 29.657 1.852 1.00 0.00 +ATOM 686 CA ASP A 172 29.403 30.807 2.735 1.00 0.00 +ATOM 687 C ASP A 172 28.291 31.825 2.513 1.00 0.00 +ATOM 688 O ASP A 172 27.978 32.623 3.405 1.00 0.00 +ATOM 689 N ASP A 173 27.694 31.789 1.304 1.00 0.00 +ATOM 690 CA ASP A 173 26.642 32.737 0.958 1.00 0.00 +ATOM 691 C ASP A 173 25.266 32.185 1.308 1.00 0.00 +ATOM 692 O ASP A 173 24.238 32.784 0.968 1.00 0.00 +ATOM 693 N GLY A 174 25.261 31.021 1.999 1.00 0.00 +ATOM 694 CA GLY A 174 24.042 30.399 2.502 1.00 0.00 +ATOM 695 C GLY A 174 23.338 29.605 1.410 1.00 0.00 +ATOM 696 O GLY A 174 22.233 29.104 1.658 1.00 0.00 +ATOM 697 N THR A 175 23.890 29.468 0.268 1.00 0.00 +ATOM 698 CA THR A 175 23.322 28.659 -0.804 1.00 0.00 +ATOM 699 C THR A 175 23.699 27.192 -0.645 1.00 0.00 +ATOM 700 O THR A 175 24.860 26.861 -0.373 1.00 0.00 +ATOM 701 N ALA A 176 22.674 26.294 -0.824 1.00 0.00 +ATOM 702 CA ALA A 176 22.988 24.870 -0.814 1.00 0.00 +ATOM 703 C ALA A 176 23.965 24.513 -1.927 1.00 0.00 +ATOM 704 O ALA A 176 23.823 24.979 -3.065 1.00 0.00 +ATOM 705 N LYS A 177 24.966 23.678 -1.586 1.00 0.00 +ATOM 706 CA LYS A 177 26.002 23.370 -2.564 1.00 0.00 +ATOM 707 C LYS A 177 25.955 21.904 -2.974 1.00 0.00 +ATOM 708 O LYS A 177 25.960 21.656 -4.186 1.00 0.00 +ATOM 709 N THR A 178 25.915 21.004 -2.178 1.00 0.00 +ATOM 710 CA THR A 178 25.882 19.570 -2.438 1.00 0.00 +ATOM 711 C THR A 178 24.920 18.861 -1.493 1.00 0.00 +ATOM 712 O THR A 178 24.653 19.334 -0.381 1.00 0.00 +ATOM 713 N ARG A 179 24.406 17.723 -1.958 1.00 0.00 +ATOM 714 CA ARG A 179 23.522 16.893 -1.148 1.00 0.00 +ATOM 715 C ARG A 179 23.708 15.415 -1.467 1.00 0.00 +ATOM 716 O ARG A 179 23.790 15.053 -2.648 1.00 0.00 +ATOM 717 N VAL A 180 23.775 14.597 -0.504 1.00 0.00 +ATOM 718 CA VAL A 180 23.765 13.144 -0.630 1.00 0.00 +ATOM 719 C VAL A 180 22.581 12.535 0.110 1.00 0.00 +ATOM 720 O VAL A 180 22.370 12.809 1.299 1.00 0.00 +ATOM 721 N SER A 181 21.815 11.709 -0.601 1.00 0.00 +ATOM 722 CA SER A 181 20.663 11.036 -0.014 1.00 0.00 +ATOM 723 C SER A 181 20.805 9.522 -0.104 1.00 0.00 +ATOM 724 O SER A 181 21.203 8.992 -1.149 1.00 0.00 +ATOM 725 N VAL A 182 20.479 8.827 0.990 1.00 0.00 +ATOM 726 CA VAL A 182 20.610 7.375 0.989 1.00 0.00 +ATOM 727 C VAL A 182 19.427 6.715 1.686 1.00 0.00 +ATOM 728 O VAL A 182 18.980 7.192 2.737 1.00 0.00 +ATOM 729 N SER A 183 18.936 5.664 1.132 1.00 0.00 +ATOM 730 CA SER A 183 17.995 4.721 1.725 1.00 0.00 +ATOM 731 C SER A 183 18.552 3.303 1.708 1.00 0.00 +ATOM 732 O SER A 183 18.818 2.773 0.622 1.00 0.00 +ATOM 733 N MET A 184 18.732 2.691 2.830 1.00 0.00 +ATOM 734 CA MET A 184 19.324 1.362 2.930 1.00 0.00 +ATOM 735 C MET A 184 18.250 0.284 3.004 1.00 0.00 +ATOM 736 O MET A 184 18.617 -0.896 2.942 1.00 0.00 +ATOM 737 N GLY A 185 17.047 0.563 3.123 1.00 0.00 +ATOM 738 CA GLY A 185 15.994 -0.446 3.120 1.00 0.00 +ATOM 739 C GLY A 185 15.182 -0.399 4.408 1.00 0.00 +ATOM 740 O GLY A 185 15.682 0.195 5.372 1.00 0.00 +ATOM 741 N GLY A 186 14.141 -0.891 4.506 1.00 0.00 +ATOM 742 CA GLY A 186 13.306 -1.077 5.687 1.00 0.00 +ATOM 743 C GLY A 186 12.962 -2.547 5.893 1.00 0.00 +ATOM 744 O GLY A 186 13.345 -3.368 5.050 1.00 0.00 +ATOM 745 N ALA A 187 12.302 -2.903 6.903 1.00 0.00 +ATOM 746 CA ALA A 187 11.882 -4.273 7.172 1.00 0.00 +ATOM 747 C ALA A 187 10.922 -4.775 6.101 1.00 0.00 +ATOM 748 O ALA A 187 10.923 -5.957 5.734 1.00 0.00 +ATOM 749 N ALA A 188 10.065 -3.793 5.593 1.00 0.00 +ATOM 750 CA ALA A 188 9.098 -4.165 4.567 1.00 0.00 +ATOM 751 C ALA A 188 9.777 -4.370 3.219 1.00 0.00 +ATOM 752 O ALA A 188 9.267 -5.151 2.405 1.00 0.00 +ATOM 753 N ASN A 189 10.846 -3.739 2.957 1.00 0.00 +ATOM 754 CA ASN A 189 11.605 -3.824 1.715 1.00 0.00 +ATOM 755 C ASN A 189 13.104 -3.826 1.985 1.00 0.00 +ATOM 756 O ASN A 189 13.796 -2.868 1.617 1.00 0.00 +ATOM 757 N PRO A 190 13.619 -4.883 2.620 1.00 0.00 +ATOM 758 CA PRO A 190 14.959 -4.939 3.191 1.00 0.00 +ATOM 759 C PRO A 190 16.024 -4.774 2.114 1.00 0.00 +ATOM 760 O PRO A 190 17.138 -4.349 2.445 1.00 0.00 +ATOM 761 N THR A 191 15.756 -5.075 0.886 1.00 0.00 +ATOM 762 CA THR A 191 16.773 -5.066 -0.159 1.00 0.00 +ATOM 763 C THR A 191 16.686 -3.800 -1.001 1.00 0.00 +ATOM 764 O THR A 191 17.522 -3.562 -1.882 1.00 0.00 +ATOM 765 N TYR A 192 15.640 -2.979 -0.711 1.00 0.00 +ATOM 766 CA TYR A 192 15.475 -1.728 -1.441 1.00 0.00 +ATOM 767 C TYR A 192 16.465 -0.675 -0.961 1.00 0.00 +ATOM 768 O TYR A 192 16.479 -0.298 0.218 1.00 0.00 +ATOM 769 N MET A 193 17.309 -0.200 -1.924 1.00 0.00 +ATOM 770 CA MET A 193 18.291 0.828 -1.599 1.00 0.00 +ATOM 771 C MET A 193 18.279 1.948 -2.632 1.00 0.00 +ATOM 772 O MET A 193 18.167 1.653 -3.829 1.00 0.00 +ATOM 773 N VAL A 194 18.380 3.118 -2.263 1.00 0.00 +ATOM 774 CA VAL A 194 18.446 4.280 -3.141 1.00 0.00 +ATOM 775 C VAL A 194 19.570 5.221 -2.726 1.00 0.00 +ATOM 776 O VAL A 194 19.638 5.623 -1.558 1.00 0.00 +ATOM 777 N LEU A 195 20.426 5.566 -3.638 1.00 0.00 +ATOM 778 CA LEU A 195 21.468 6.567 -3.442 1.00 0.00 +ATOM 779 C LEU A 195 21.332 7.707 -4.443 1.00 0.00 +ATOM 780 O LEU A 195 21.192 7.453 -5.646 1.00 0.00 +ATOM 781 N GLU A 196 21.369 8.912 -3.990 1.00 0.00 +ATOM 782 CA GLU A 196 21.249 10.092 -4.839 1.00 0.00 +ATOM 783 C GLU A 196 22.292 11.141 -4.475 1.00 0.00 +ATOM 784 O GLU A 196 22.609 11.292 -3.288 1.00 0.00 +ATOM 785 N VAL A 197 22.798 11.820 -5.384 1.00 0.00 +ATOM 786 CA VAL A 197 23.700 12.953 -5.219 1.00 0.00 +ATOM 787 C VAL A 197 23.247 14.144 -6.054 1.00 0.00 +ATOM 788 O VAL A 197 22.854 13.989 -7.217 1.00 0.00 +ATOM 789 N GLN A 198 23.306 15.313 -5.456 1.00 0.00 +ATOM 790 CA GLN A 198 22.928 16.541 -6.146 1.00 0.00 +ATOM 791 C GLN A 198 23.980 17.626 -5.954 1.00 0.00 +ATOM 792 O GLN A 198 24.460 17.811 -4.829 1.00 0.00 +ATOM 793 N THR A 199 24.332 18.309 -6.953 1.00 0.00 +ATOM 794 CA THR A 199 25.231 19.455 -6.898 1.00 0.00 +ATOM 795 C THR A 199 24.534 20.725 -7.368 1.00 0.00 +ATOM 796 O THR A 199 23.762 20.671 -8.334 1.00 0.00 +ATOM 797 N VAL A 200 24.766 21.807 -6.750 1.00 0.00 +ATOM 798 CA VAL A 200 24.146 23.081 -7.096 1.00 0.00 +ATOM 799 C VAL A 200 25.193 24.113 -7.496 1.00 0.00 +ATOM 800 O VAL A 200 26.200 24.271 -6.794 1.00 0.00 +ATOM 801 N THR A 201 24.981 24.795 -8.577 1.00 0.00 +ATOM 802 CA THR A 201 25.840 25.888 -9.017 1.00 0.00 +ATOM 803 C THR A 201 25.063 27.195 -9.114 1.00 0.00 +ATOM 804 O THR A 201 24.095 27.300 -9.878 1.00 0.00 +ATOM 805 N LYS A 202 25.503 28.204 -8.318 1.00 0.00 +ATOM 806 CA LYS A 202 24.906 29.528 -8.445 1.00 0.00 +ATOM 807 C LYS A 202 25.367 30.220 -9.722 1.00 0.00 +ATOM 808 O LYS A 202 26.545 30.580 -9.836 1.00 0.00 +ATOM 809 N LEU A 203 24.472 30.408 -10.668 1.00 0.00 +ATOM 810 CA LEU A 203 24.807 31.051 -11.933 1.00 0.00 +ATOM 811 C LEU A 203 24.761 32.569 -11.809 1.00 0.00 +ATOM 812 O LEU A 203 25.544 33.241 -12.492 1.00 0.00 +ATOM 813 N SER A 204 23.955 33.103 -11.041 1.00 0.00 +ATOM 814 CA SER A 204 23.760 34.511 -10.715 1.00 0.00 +ATOM 815 C SER A 204 22.995 34.673 -9.408 1.00 0.00 +ATOM 816 O SER A 204 22.675 33.674 -8.750 1.00 0.00 +ATOM 817 N ASP A 205 22.701 35.913 -9.025 1.00 0.00 +ATOM 818 CA ASP A 205 21.984 36.167 -7.781 1.00 0.00 +ATOM 819 C ASP A 205 20.541 35.685 -7.868 1.00 0.00 +ATOM 820 O ASP A 205 19.845 35.636 -6.846 1.00 0.00 +ATOM 821 N THR A 206 20.075 35.324 -9.094 1.00 0.00 +ATOM 822 CA THR A 206 18.653 35.039 -9.245 1.00 0.00 +ATOM 823 C THR A 206 18.426 33.615 -9.737 1.00 0.00 +ATOM 824 O THR A 206 17.271 33.216 -9.933 1.00 0.00 +ATOM 825 N THR A 207 19.504 32.830 -9.946 1.00 0.00 +ATOM 826 CA THR A 207 19.307 31.507 -10.526 1.00 0.00 +ATOM 827 C THR A 207 20.342 30.518 -10.006 1.00 0.00 +ATOM 828 O THR A 207 21.525 30.854 -9.862 1.00 0.00 +ATOM 829 N ASN A 208 19.887 29.312 -9.731 1.00 0.00 +ATOM 830 CA ASN A 208 20.733 28.172 -9.398 1.00 0.00 +ATOM 831 C ASN A 208 20.486 27.006 -10.347 1.00 0.00 +ATOM 832 O ASN A 208 19.320 26.765 -10.687 1.00 0.00 +ATOM 833 N ARG A 209 21.425 26.323 -10.762 1.00 0.00 +ATOM 834 CA ARG A 209 21.323 25.074 -11.508 1.00 0.00 +ATOM 835 C ARG A 209 21.654 23.877 -10.626 1.00 0.00 +ATOM 836 O ARG A 209 22.649 23.905 -9.891 1.00 0.00 +ATOM 837 N THR A 210 20.850 22.853 -10.689 1.00 0.00 +ATOM 838 CA THR A 210 21.030 21.646 -9.890 1.00 0.00 +ATOM 839 C THR A 210 21.224 20.422 -10.776 1.00 0.00 +ATOM 840 O THR A 210 20.424 20.212 -11.697 1.00 0.00 +ATOM 841 N VAL A 211 22.207 19.647 -10.547 1.00 0.00 +ATOM 842 CA VAL A 211 22.416 18.356 -11.192 1.00 0.00 +ATOM 843 C VAL A 211 22.207 17.210 -10.211 1.00 0.00 +ATOM 844 O VAL A 211 22.781 17.190 -9.114 1.00 0.00 +ATOM 845 N SER A 212 21.355 16.240 -10.641 1.00 0.00 +ATOM 846 CA SER A 212 21.048 15.118 -9.762 1.00 0.00 +ATOM 847 C SER A 212 21.250 13.787 -10.476 1.00 0.00 +ATOM 848 O SER A 212 20.865 13.629 -11.641 1.00 0.00 +ATOM 849 N ARG A 213 21.849 12.852 -9.775 1.00 0.00 +ATOM 850 CA ARG A 213 21.957 11.464 -10.208 1.00 0.00 +ATOM 851 C ARG A 213 21.456 10.510 -9.131 1.00 0.00 +ATOM 852 O ARG A 213 21.803 10.673 -7.954 1.00 0.00 +ATOM 853 N ARG A 214 20.671 9.548 -9.501 1.00 0.00 +ATOM 854 CA ARG A 214 20.110 8.591 -8.555 1.00 0.00 +ATOM 855 C ARG A 214 20.334 7.159 -9.022 1.00 0.00 +ATOM 856 O ARG A 214 20.275 6.885 -10.228 1.00 0.00 +ATOM 857 N ALA A 215 20.579 6.288 -8.130 1.00 0.00 +ATOM 858 CA ALA A 215 20.643 4.849 -8.357 1.00 0.00 +ATOM 859 C ALA A 215 19.694 4.102 -7.428 1.00 0.00 +ATOM 860 O ALA A 215 19.656 4.404 -6.229 1.00 0.00 +ATOM 861 N VAL A 216 18.962 3.178 -7.921 1.00 0.00 +ATOM 862 CA VAL A 216 18.002 2.416 -7.132 1.00 0.00 +ATOM 863 C VAL A 216 18.246 0.918 -7.264 1.00 0.00 +ATOM 864 O VAL A 216 18.507 0.428 -8.370 1.00 0.00 +ATOM 865 N SER A 217 18.168 0.209 -6.181 1.00 0.00 +ATOM 866 CA SER A 217 18.212 -1.248 -6.148 1.00 0.00 +ATOM 867 C SER A 217 17.009 -1.819 -5.408 1.00 0.00 +ATOM 868 O SER A 217 16.848 -1.513 -4.220 1.00 0.00 +ATOM 869 N ASN A 218 16.207 -2.578 -5.998 1.00 0.00 +ATOM 870 CA ASN A 218 15.046 -3.227 -5.400 1.00 0.00 +ATOM 871 C ASN A 218 15.455 -4.453 -4.593 1.00 0.00 +ATOM 872 O ASN A 218 14.951 -4.854 -3.536 1.00 0.00 +ATOM 873 CB ASN A 218 14.040 -3.616 -6.481 1.00 0.00 +ATOM 874 CG ASN A 218 12.885 -2.640 -6.542 1.00 0.00 +ATOM 875 OD1 ASN A 218 12.559 -2.128 -7.619 1.00 0.00 +ATOM 876 ND2 ASN A 218 12.265 -2.380 -5.420 1.00 0.00 +ATOM 877 N GLY A 219 16.689 -5.067 -5.471 1.00 0.00 +ATOM 878 CA GLY A 219 17.255 -6.302 -4.941 1.00 0.00 +ATOM 879 C GLY A 219 18.730 -6.431 -5.299 1.00 0.00 +ATOM 880 O GLY A 219 19.526 -5.609 -4.827 1.00 0.00 +ATOM 881 N ASN A 220 19.128 -7.379 -6.078 1.00 0.00 +ATOM 882 CA ASN A 220 20.518 -7.629 -6.442 1.00 0.00 +ATOM 883 C ASN A 220 20.912 -6.842 -7.685 1.00 0.00 +ATOM 884 O ASN A 220 22.048 -7.043 -8.135 1.00 0.00 +ATOM 885 N THR A 221 20.182 -6.050 -8.238 1.00 0.00 +ATOM 886 CA THR A 221 20.436 -5.337 -9.484 1.00 0.00 +ATOM 887 C THR A 221 20.519 -3.834 -9.252 1.00 0.00 +ATOM 888 O THR A 221 19.726 -3.296 -8.468 1.00 0.00 +ATOM 889 N LEU A 222 21.391 -3.185 -9.864 1.00 0.00 +ATOM 890 CA LEU A 222 21.571 -1.747 -9.704 1.00 0.00 +ATOM 891 C LEU A 222 21.088 -0.991 -10.935 1.00 0.00 +ATOM 892 O LEU A 222 21.631 -1.193 -12.029 1.00 0.00 +ATOM 893 N THR A 223 20.111 -0.151 -10.789 1.00 0.00 +ATOM 894 CA THR A 223 19.602 0.669 -11.882 1.00 0.00 +ATOM 895 C THR A 223 20.085 2.109 -11.762 1.00 0.00 +ATOM 896 O THR A 223 19.700 2.806 -10.814 1.00 0.00 +ATOM 897 N LEU A 224 20.903 2.561 -12.683 1.00 0.00 +ATOM 898 CA LEU A 224 21.356 3.947 -12.711 1.00 0.00 +ATOM 899 C LEU A 224 20.389 4.827 -13.494 1.00 0.00 +ATOM 900 O LEU A 224 20.267 4.661 -14.714 1.00 0.00 +ATOM 901 N GLU A 225 19.714 5.737 -12.842 1.00 0.00 +ATOM 902 CA GLU A 225 18.836 6.687 -13.515 1.00 0.00 +ATOM 903 C GLU A 225 19.637 7.706 -14.315 1.00 0.00 +ATOM 904 O GLU A 225 20.779 8.023 -13.958 1.00 0.00 +ATOM 905 N PRO A 226 19.040 8.221 -15.398 1.00 0.00 +ATOM 906 CA PRO A 226 19.713 9.273 -16.150 1.00 0.00 +ATOM 907 C PRO A 226 19.887 10.529 -15.305 1.00 0.00 +ATOM 908 O PRO A 226 19.003 10.861 -14.506 1.00 0.00 +ATOM 909 N GLU A 227 20.989 11.216 -15.466 1.00 0.00 +ATOM 910 CA GLU A 227 21.249 12.462 -14.756 1.00 0.00 +ATOM 911 C GLU A 227 20.218 13.525 -15.114 1.00 0.00 +ATOM 912 O GLU A 227 19.780 13.561 -16.271 1.00 0.00 +ATOM 913 N ARG A 228 19.835 14.335 -14.248 1.00 0.00 +ATOM 914 CA ARG A 228 18.848 15.385 -14.470 1.00 0.00 +ATOM 915 C ARG A 228 19.397 16.750 -14.074 1.00 0.00 +ATOM 916 O ARG A 228 20.007 16.861 -13.003 1.00 0.00 +ATOM 917 N THR A 229 19.219 17.739 -14.846 1.00 0.00 +ATOM 918 CA THR A 229 19.592 19.116 -14.545 1.00 0.00 +ATOM 919 C THR A 229 18.362 20.007 -14.426 1.00 0.00 +ATOM 920 O THR A 229 17.526 20.004 -15.339 1.00 0.00 +ATOM 921 N VAL A 230 18.225 20.732 -13.389 1.00 0.00 +ATOM 922 CA VAL A 230 17.100 21.633 -13.169 1.00 0.00 +ATOM 923 C VAL A 230 17.577 23.048 -12.870 1.00 0.00 +ATOM 924 O VAL A 230 18.527 23.217 -12.095 1.00 0.00 +ATOM 925 N ASP A 231 16.982 24.020 -13.432 1.00 0.00 +ATOM 926 CA ASP A 231 17.222 25.424 -13.119 1.00 0.00 +ATOM 927 C ASP A 231 16.157 25.968 -12.176 1.00 0.00 +ATOM 928 O ASP A 231 14.957 25.806 -12.432 1.00 0.00 +ATOM 929 N VAL A 232 16.575 26.594 -11.118 1.00 0.00 +ATOM 930 CA VAL A 232 15.678 27.180 -10.129 1.00 0.00 +ATOM 931 C VAL A 232 15.882 28.686 -10.024 1.00 0.00 +ATOM 932 O VAL A 232 16.995 29.147 -9.740 1.00 0.00 +ATOM 933 N ALA A 233 14.801 29.459 -10.254 1.00 0.00 +ATOM 934 CA ALA A 233 14.876 30.864 -9.873 1.00 0.00 +ATOM 935 C ALA A 233 14.960 31.021 -8.360 1.00 0.00 +ATOM 936 O ALA A 233 14.404 30.182 -7.640 1.00 0.00 +ATOM 937 N SER A 234 15.600 32.013 -7.867 1.00 0.00 +ATOM 938 CA SER A 234 15.855 32.181 -6.441 1.00 0.00 +ATOM 939 C SER A 234 14.563 32.441 -5.677 1.00 0.00 +ATOM 940 O SER A 234 14.511 32.282 -4.451 1.00 0.00 +ATOM 941 N SER A 235 13.496 32.852 -6.433 1.00 0.00 +ATOM 942 CA SER A 235 12.252 33.240 -5.778 1.00 0.00 +ATOM 943 C SER A 235 11.195 32.151 -5.912 1.00 0.00 +ATOM 944 O SER A 235 10.221 32.114 -5.150 1.00 0.00 +ATOM 945 N SER A 236 11.410 31.238 -6.923 1.00 0.00 +ATOM 946 CA SER A 236 10.396 30.218 -7.163 1.00 0.00 +ATOM 947 C SER A 236 10.966 29.051 -7.959 1.00 0.00 +ATOM 948 O SER A 236 11.916 29.263 -8.724 1.00 0.00 +ATOM 949 N VAL A 237 10.476 27.919 -7.832 1.00 0.00 +ATOM 950 CA VAL A 237 10.786 26.752 -8.650 1.00 0.00 +ATOM 951 C VAL A 237 9.523 26.155 -9.259 1.00 0.00 +ATOM 952 O VAL A 237 8.585 25.852 -8.511 1.00 0.00 +ATOM 953 N THR A 238 9.459 25.982 -10.516 1.00 0.00 +ATOM 954 CA THR A 238 8.317 25.397 -11.208 1.00 0.00 +ATOM 955 C THR A 238 8.648 24.009 -11.742 1.00 0.00 +ATOM 956 O THR A 238 9.689 23.831 -12.386 1.00 0.00 +ATOM 957 N LEU A 239 7.800 23.044 -11.490 1.00 0.00 +ATOM 958 CA LEU A 239 8.031 21.677 -11.942 1.00 0.00 +ATOM 959 C LEU A 239 6.768 21.076 -12.546 1.00 0.00 +ATOM 960 O LEU A 239 5.668 21.333 -12.041 1.00 0.00 +ATOM 961 N ASP A 240 6.894 20.308 -13.580 1.00 0.00 +ATOM 962 CA ASP A 240 5.770 19.588 -14.167 1.00 0.00 +ATOM 963 C ASP A 240 5.618 18.204 -13.548 1.00 0.00 +ATOM 964 O ASP A 240 6.565 17.409 -13.591 1.00 0.00 +ATOM 965 N ILE A 241 4.495 17.909 -12.995 1.00 0.00 +ATOM 966 CA ILE A 241 4.183 16.590 -12.457 1.00 0.00 +ATOM 967 C ILE A 241 3.446 15.738 -13.482 1.00 0.00 +ATOM 968 O ILE A 241 2.358 16.124 -13.928 1.00 0.00 +ATOM 969 N LYS A 242 3.998 14.610 -13.859 1.00 0.00 +ATOM 970 CA LYS A 242 3.412 13.762 -14.890 1.00 0.00 +ATOM 971 C LYS A 242 2.383 12.807 -14.299 1.00 0.00 +ATOM 972 O LYS A 242 2.523 12.382 -13.145 1.00 0.00 +ATOM 973 N ASP A 243 1.375 12.475 -15.059 1.00 0.00 +ATOM 974 CA ASP A 243 0.355 11.533 -14.613 1.00 0.00 +ATOM 975 C ASP A 243 0.858 10.097 -14.696 1.00 0.00 +ATOM 976 O ASP A 243 1.105 9.701 -15.842 1.00 0.00 +ATOM 977 N GLY A 244 1.021 9.391 -13.865 1.00 0.00 +ATOM 978 CA GLY A 244 1.378 7.978 -13.813 1.00 0.00 +ATOM 979 C GLY A 244 2.866 7.776 -14.071 1.00 0.00 +ATOM 980 O GLY A 244 3.563 7.384 -13.127 1.00 0.00 +ATOM 981 N THR A 245 3.372 7.990 -15.173 1.00 0.00 +ATOM 982 CA THR A 245 4.777 7.751 -15.481 1.00 0.00 +ATOM 983 C THR A 245 5.388 8.935 -16.219 1.00 0.00 +ATOM 984 O THR A 245 4.695 9.903 -16.557 1.00 0.00 +ATOM 985 N ALA A 246 6.728 8.835 -16.467 1.00 0.00 +ATOM 986 CA ALA A 246 7.455 9.949 -17.064 1.00 0.00 +ATOM 987 C ALA A 246 6.938 10.257 -18.463 1.00 0.00 +ATOM 988 O ALA A 246 6.972 11.409 -18.914 1.00 0.00 +ATOM 989 N SER A 247 6.450 9.187 -19.156 1.00 0.00 +ATOM 990 CA SER A 247 6.008 9.329 -20.538 1.00 0.00 +ATOM 991 C SER A 247 4.534 9.706 -20.609 1.00 0.00 +ATOM 992 O SER A 247 4.040 9.929 -21.722 1.00 0.00 +ATOM 993 N GLY A 248 3.841 9.787 -19.565 1.00 0.00 +ATOM 994 CA GLY A 248 2.422 10.118 -19.505 1.00 0.00 +ATOM 995 C GLY A 248 2.201 11.620 -19.625 1.00 0.00 +ATOM 996 O GLY A 248 3.168 12.376 -19.466 1.00 0.00 +ATOM 997 N PRO A 249 1.052 12.068 -19.882 1.00 0.00 +ATOM 998 CA PRO A 249 0.720 13.487 -19.937 1.00 0.00 +ATOM 999 C PRO A 249 0.930 14.155 -18.584 1.00 0.00 +ATOM 1000 O PRO A 249 0.943 13.475 -17.550 1.00 0.00 +ATOM 1001 N THR A 250 1.092 15.467 -18.584 1.00 0.00 +ATOM 1002 CA THR A 250 1.282 16.213 -17.346 1.00 0.00 +ATOM 1003 C THR A 250 0.007 16.231 -16.512 1.00 0.00 +ATOM 1004 O THR A 250 -1.100 16.434 -17.025 1.00 0.00 +ATOM 1005 N ALA A 251 0.213 16.002 -15.160 1.00 0.00 +ATOM 1006 CA ALA A 251 -0.907 16.088 -14.231 1.00 0.00 +ATOM 1007 C ALA A 251 -1.188 17.533 -13.840 1.00 0.00 +ATOM 1008 O ALA A 251 -2.350 17.958 -13.875 1.00 0.00 +ATOM 1009 N TYR A 252 -0.214 18.260 -13.488 1.00 0.00 +ATOM 1010 CA TYR A 252 -0.265 19.666 -13.107 1.00 0.00 +ATOM 1011 C TYR A 252 1.132 20.271 -13.043 1.00 0.00 +ATOM 1012 O TYR A 252 2.126 19.535 -13.042 1.00 0.00 +ATOM 1013 N THR A 253 1.213 21.557 -12.991 1.00 0.00 +ATOM 1014 CA THR A 253 2.444 22.278 -12.691 1.00 0.00 +ATOM 1015 C THR A 253 2.460 22.763 -11.247 1.00 0.00 +ATOM 1016 O THR A 253 1.483 23.370 -10.790 1.00 0.00 +ATOM 1017 N VAL A 254 3.509 22.516 -10.552 1.00 0.00 +ATOM 1018 CA VAL A 254 3.661 22.950 -9.168 1.00 0.00 +ATOM 1019 C VAL A 254 4.726 24.032 -9.044 1.00 0.00 +ATOM 1020 O VAL A 254 5.828 23.869 -9.583 1.00 0.00 +ATOM 1021 N THR A 255 4.442 25.074 -8.385 1.00 0.00 +ATOM 1022 CA THR A 255 5.385 26.138 -8.062 1.00 0.00 +ATOM 1023 C THR A 255 5.577 26.266 -6.556 1.00 0.00 +ATOM 1024 O THR A 255 4.609 26.339 -5.789 1.00 0.00 +ATOM 1025 N PHE A 256 6.854 26.290 -6.156 1.00 0.00 +ATOM 1026 CA PHE A 256 7.197 26.502 -4.755 1.00 0.00 +ATOM 1027 C PHE A 256 7.841 27.866 -4.546 1.00 0.00 +ATOM 1028 O PHE A 256 8.816 28.214 -5.224 1.00 0.00 +ATOM 1029 N THR A 257 7.301 28.618 -3.623 1.00 0.00 +ATOM 1030 CA THR A 257 7.824 29.939 -3.295 1.00 0.00 +ATOM 1031 C THR A 257 7.978 30.112 -1.789 1.00 0.00 +ATOM 1032 O THR A 257 7.046 29.784 -1.045 1.00 0.00 +ATOM 1033 N VAL A 258 9.069 30.592 -1.336 1.00 0.00 +ATOM 1034 CA VAL A 258 9.283 30.935 0.065 1.00 0.00 +ATOM 1035 C VAL A 258 8.934 32.393 0.334 1.00 0.00 +ATOM 1036 O VAL A 258 9.420 33.282 -0.377 1.00 0.00 +ATOM 1037 N ASN A 259 8.134 32.648 1.309 1.00 0.00 +ATOM 1038 CA ASN A 259 7.742 33.997 1.699 1.00 0.00 +ATOM 1039 C ASN A 259 8.258 34.340 3.091 1.00 0.00 +ATOM 1040 O ASN A 259 8.238 33.484 3.984 1.00 0.00 +ATOM 1041 N SER A 260 8.710 35.546 3.283 1.00 0.00 +ATOM 1042 CA SER A 260 9.250 35.954 4.574 1.00 0.00 +ATOM 1043 C SER A 260 8.919 37.410 4.874 1.00 0.00 +ATOM 1044 O SER A 260 8.485 38.149 3.981 1.00 0.00 +ATOM 1045 N GLY A 261 9.122 37.821 6.117 1.00 0.00 +ATOM 1046 CA GLY A 261 8.918 39.221 6.469 1.00 0.00 +ATOM 1047 C GLY A 261 9.878 40.126 5.708 1.00 0.00 +ATOM 1048 O GLY A 261 10.822 39.631 5.079 1.00 0.00 +ATOM 1049 N SER A 262 9.662 41.404 5.752 1.00 0.00 +ATOM 1050 CA SER A 262 10.500 42.380 5.066 1.00 0.00 +ATOM 1051 C SER A 262 11.910 42.398 5.643 1.00 0.00 +ATOM 1052 O SER A 262 12.067 42.276 6.864 1.00 0.00 +ATOM 1053 N GLY A 263 12.905 42.541 4.833 1.00 0.00 +ATOM 1054 CA GLY A 263 14.290 42.674 5.268 1.00 0.00 +ATOM 1055 C GLY A 263 15.004 41.328 5.253 1.00 0.00 +ATOM 1056 O GLY A 263 16.238 41.296 5.341 1.00 0.00 +ATOM 1057 N ALA A 264 14.288 40.247 5.147 1.00 0.00 +ATOM 1058 CA ALA A 264 14.850 38.906 5.033 1.00 0.00 +ATOM 1059 C ALA A 264 15.079 38.527 3.575 1.00 0.00 +ATOM 1060 O ALA A 264 14.358 39.023 2.701 1.00 0.00 +ATOM 1061 N THR A 265 16.004 37.710 3.300 1.00 0.00 +ATOM 1062 CA THR A 265 16.240 37.148 1.975 1.00 0.00 +ATOM 1063 C THR A 265 15.773 35.700 1.900 1.00 0.00 +ATOM 1064 O THR A 265 16.418 34.811 2.471 1.00 0.00 +ATOM 1065 N ALA A 266 14.681 35.458 1.214 1.00 0.00 +ATOM 1066 CA ALA A 266 14.196 34.105 0.970 1.00 0.00 +ATOM 1067 C ALA A 266 14.734 33.556 -0.345 1.00 0.00 +ATOM 1068 O ALA A 266 14.817 34.284 -1.342 1.00 0.00 +ATOM 1069 N ARG A 267 15.097 32.273 -0.344 1.00 0.00 +ATOM 1070 CA ARG A 267 15.700 31.715 -1.549 1.00 0.00 +ATOM 1071 C ARG A 267 15.284 30.264 -1.752 1.00 0.00 +ATOM 1072 O ARG A 267 15.263 29.490 -0.786 1.00 0.00 +ATOM 1073 N VAL A 268 14.962 29.891 -2.955 1.00 0.00 +ATOM 1074 CA VAL A 268 14.950 28.495 -3.375 1.00 0.00 +ATOM 1075 C VAL A 268 16.324 28.054 -3.862 1.00 0.00 +ATOM 1076 O VAL A 268 16.963 28.812 -4.603 1.00 0.00 +ATOM 1077 N HIS A 269 16.805 26.891 -3.493 1.00 0.00 +ATOM 1078 CA HIS A 269 18.207 26.560 -3.718 1.00 0.00 +ATOM 1079 C HIS A 269 18.347 25.329 -4.605 1.00 0.00 +ATOM 1080 O HIS A 269 19.057 25.410 -5.615 1.00 0.00 +ATOM 1081 N ALA A 270 17.749 24.255 -4.308 1.00 0.00 +ATOM 1082 CA ALA A 270 17.916 23.015 -5.056 1.00 0.00 +ATOM 1083 C ALA A 270 16.588 22.286 -5.218 1.00 0.00 +ATOM 1084 O ALA A 270 15.682 22.480 -4.398 1.00 0.00 +ATOM 1085 N VAL A 271 16.458 21.504 -6.190 1.00 0.00 +ATOM 1086 CA VAL A 271 15.274 20.693 -6.448 1.00 0.00 +ATOM 1087 C VAL A 271 15.618 19.466 -7.283 1.00 0.00 +ATOM 1088 O VAL A 271 16.565 19.514 -8.078 1.00 0.00 +ATOM 1089 N ARG A 272 14.920 18.440 -7.133 1.00 0.00 +ATOM 1090 CA ARG A 272 15.032 17.207 -7.904 1.00 0.00 +ATOM 1091 C ARG A 272 13.665 16.577 -8.138 1.00 0.00 +ATOM 1092 O ARG A 272 12.907 16.417 -7.172 1.00 0.00 +ATOM 1093 N ILE A 273 13.337 16.235 -9.297 1.00 0.00 +ATOM 1094 CA ILE A 273 12.101 15.544 -9.646 1.00 0.00 +ATOM 1095 C ILE A 273 12.388 14.212 -10.327 1.00 0.00 +ATOM 1096 O ILE A 273 13.286 14.124 -11.175 1.00 0.00 +ATOM 1097 N TRP A 274 11.649 13.204 -9.968 1.00 0.00 +ATOM 1098 CA TRP A 274 11.841 11.879 -10.545 1.00 0.00 +ATOM 1099 C TRP A 274 10.539 11.088 -10.554 1.00 0.00 +ATOM 1100 O TRP A 274 9.637 11.400 -9.766 1.00 0.00 +ATOM 1101 N TYR A 275 10.418 10.155 -11.351 1.00 0.00 +ATOM 1102 CA TYR A 275 9.327 9.188 -11.358 1.00 0.00 +ATOM 1103 C TYR A 275 9.808 7.812 -10.916 1.00 0.00 +ATOM 1104 O TYR A 275 10.842 7.332 -11.398 1.00 0.00 +ATOM 1105 N GLU A 276 9.093 7.193 -10.034 1.00 0.00 +ATOM 1106 CA GLU A 276 9.385 5.846 -9.560 1.00 0.00 +ATOM 1107 C GLU A 276 8.120 5.001 -9.488 1.00 0.00 +ATOM 1108 O GLU A 276 7.151 5.463 -8.871 1.00 0.00 +ATOM 1109 N ASN A 277 8.054 3.914 -10.012 1.00 0.00 +ATOM 1110 CA ASN A 277 6.915 3.005 -10.047 1.00 0.00 +ATOM 1111 C ASN A 277 5.631 3.743 -10.404 1.00 0.00 +ATOM 1112 O ASN A 277 4.592 3.521 -9.770 1.00 0.00 +ATOM 1113 N GLY A 278 5.692 4.610 -11.401 1.00 0.00 +ATOM 1114 CA GLY A 278 4.524 5.263 -11.980 1.00 0.00 +ATOM 1115 C GLY A 278 4.041 6.409 -11.100 1.00 0.00 +ATOM 1116 O GLY A 278 2.923 6.904 -11.288 1.00 0.00 +ATOM 1117 N LYS A 279 4.858 6.829 -10.156 1.00 0.00 +ATOM 1118 CA LYS A 279 4.528 7.940 -9.272 1.00 0.00 +ATOM 1119 C LYS A 279 5.592 9.028 -9.333 1.00 0.00 +ATOM 1120 O LYS A 279 6.779 8.735 -9.527 1.00 0.00 +ATOM 1121 N PRO A 280 5.168 10.273 -9.170 1.00 0.00 +ATOM 1122 CA PRO A 280 6.086 11.405 -9.197 1.00 0.00 +ATOM 1123 C PRO A 280 6.628 11.709 -7.806 1.00 0.00 +ATOM 1124 O PRO A 280 5.872 11.655 -6.827 1.00 0.00 +ATOM 1125 N PHE A 281 7.862 12.011 -7.705 1.00 0.00 +ATOM 1126 CA PHE A 281 8.525 12.455 -6.484 1.00 0.00 +ATOM 1127 C PHE A 281 9.304 13.743 -6.717 1.00 0.00 +ATOM 1128 O PHE A 281 9.973 13.862 -7.752 1.00 0.00 +ATOM 1129 N VAL A 282 9.249 14.662 -5.845 1.00 0.00 +ATOM 1130 CA VAL A 282 9.945 15.939 -5.948 1.00 0.00 +ATOM 1131 C VAL A 282 10.524 16.361 -4.603 1.00 0.00 +ATOM 1132 O VAL A 282 9.797 16.318 -3.602 1.00 0.00 +ATOM 1133 N GLU A 283 11.716 16.740 -4.534 1.00 0.00 +ATOM 1134 CA GLU A 283 12.373 17.267 -3.344 1.00 0.00 +ATOM 1135 C GLU A 283 12.919 18.667 -3.591 1.00 0.00 +ATOM 1136 O GLU A 283 13.595 18.889 -4.604 1.00 0.00 +ATOM 1137 N VAL A 284 12.652 19.593 -2.715 1.00 0.00 +ATOM 1138 CA VAL A 284 13.099 20.972 -2.872 1.00 0.00 +ATOM 1139 C VAL A 284 13.731 21.496 -1.588 1.00 0.00 +ATOM 1140 O VAL A 284 13.255 21.170 -0.493 1.00 0.00 +ATOM 1141 N LEU A 285 14.757 22.272 -1.697 1.00 0.00 +ATOM 1142 CA LEU A 285 15.415 22.909 -0.563 1.00 0.00 +ATOM 1143 C LEU A 285 15.403 24.426 -0.699 1.00 0.00 +ATOM 1144 O LEU A 285 15.587 24.961 -1.800 1.00 0.00 +ATOM 1145 N MET A 286 15.189 25.098 0.399 1.00 0.00 +ATOM 1146 CA MET A 286 15.143 26.555 0.442 1.00 0.00 +ATOM 1147 C MET A 286 15.803 27.090 1.706 1.00 0.00 +ATOM 1148 O MET A 286 16.049 26.339 2.659 1.00 0.00 +ATOM 1149 N THR A 287 16.082 28.366 1.702 1.00 0.00 +ATOM 1150 CA THR A 287 16.698 29.005 2.859 1.00 0.00 +ATOM 1151 C THR A 287 16.204 30.436 3.026 1.00 0.00 +ATOM 1152 O THR A 287 16.059 31.150 2.026 1.00 0.00 +ATOM 1153 N ILE A 288 15.956 30.852 4.192 1.00 0.00 +ATOM 1154 CA ILE A 288 15.689 32.241 4.547 1.00 0.00 +ATOM 1155 C ILE A 288 16.754 32.783 5.492 1.00 0.00 +ATOM 1156 O ILE A 288 16.961 32.203 6.565 1.00 0.00 +ATOM 1157 N THR A 289 17.408 33.825 5.147 1.00 0.00 +ATOM 1158 CA THR A 289 18.422 34.471 5.973 1.00 0.00 +ATOM 1159 C THR A 289 18.019 35.897 6.325 1.00 0.00 +ATOM 1160 O THR A 289 17.500 36.612 5.459 1.00 0.00 +ATOM 1161 N GLY A 290 18.230 36.314 7.511 1.00 0.00 +ATOM 1162 CA GLY A 290 17.903 37.669 7.940 1.00 0.00 +ATOM 1163 C GLY A 290 18.215 37.870 9.418 1.00 0.00 +ATOM 1164 O GLY A 290 18.674 36.946 10.102 1.00 0.00 +ATOM 1165 N LYS A 291 17.959 39.086 9.894 1.00 0.00 +ATOM 1166 CA LYS A 291 18.163 39.395 11.304 1.00 0.00 +ATOM 1167 C LYS A 291 16.884 39.183 12.105 1.00 0.00 +ATOM 1168 O LYS A 291 16.957 38.831 13.289 1.00 0.00 +ATOM 1169 N SER A 292 15.810 39.372 11.539 1.00 0.00 +ATOM 1170 CA SER A 292 14.490 39.168 12.125 1.00 0.00 +ATOM 1171 C SER A 292 13.473 38.765 11.065 1.00 0.00 +ATOM 1172 O SER A 292 13.329 39.453 10.046 1.00 0.00 +ATOM 1173 N ILE A 293 12.783 37.676 11.293 1.00 0.00 +ATOM 1174 CA ILE A 293 11.739 37.187 10.401 1.00 0.00 +ATOM 1175 C ILE A 293 10.408 37.055 11.131 1.00 0.00 +ATOM 1176 O ILE A 293 10.247 36.103 11.905 1.00 0.00 +ATOM 1177 N SER A 294 9.487 37.921 10.934 1.00 0.00 +ATOM 1178 CA SER A 294 8.215 37.938 11.646 1.00 0.00 +ATOM 1179 C SER A 294 7.201 37.015 10.983 1.00 0.00 +ATOM 1180 O SER A 294 6.304 36.483 11.649 1.00 0.00 +ATOM 1181 N SER A 295 7.349 36.828 9.658 1.00 0.00 +ATOM 1182 CA SER A 295 6.464 35.948 8.905 1.00 0.00 +ATOM 1183 C SER A 295 7.249 35.081 7.929 1.00 0.00 +ATOM 1184 O SER A 295 8.047 35.618 7.150 1.00 0.00 +ATOM 1185 N ALA A 296 7.061 33.813 7.943 1.00 0.00 +ATOM 1186 CA ALA A 296 7.734 32.875 7.052 1.00 0.00 +ATOM 1187 C ALA A 296 6.817 31.718 6.676 1.00 0.00 +ATOM 1188 O ALA A 296 6.058 31.253 7.535 1.00 0.00 +ATOM 1189 N SER A 297 6.850 31.265 5.512 1.00 0.00 +ATOM 1190 CA SER A 297 6.000 30.179 5.037 1.00 0.00 +ATOM 1191 C SER A 297 6.421 29.717 3.648 1.00 0.00 +ATOM 1192 O SER A 297 7.222 30.385 2.981 1.00 0.00 +ATOM 1193 N VAL A 298 5.903 28.622 3.232 1.00 0.00 +ATOM 1194 CA VAL A 298 6.063 28.099 1.880 1.00 0.00 +ATOM 1195 C VAL A 298 4.734 28.081 1.135 1.00 0.00 +ATOM 1196 O VAL A 298 3.774 27.464 1.614 1.00 0.00 +ATOM 1197 N THR A 299 4.657 28.714 0.024 1.00 0.00 +ATOM 1198 CA THR A 299 3.470 28.679 -0.822 1.00 0.00 +ATOM 1199 C THR A 299 3.644 27.698 -1.975 1.00 0.00 +ATOM 1200 O THR A 299 4.601 27.785 -2.754 1.00 0.00 +ATOM 1201 N LEU A 300 2.696 26.768 -2.058 1.00 0.00 +ATOM 1202 CA LEU A 300 2.637 25.842 -3.183 1.00 0.00 +ATOM 1203 C LEU A 300 1.530 26.227 -4.156 1.00 0.00 +ATOM 1204 O LEU A 300 0.344 26.094 -3.829 1.00 0.00 +ATOM 1205 N THR A 301 1.898 26.694 -5.324 1.00 0.00 +ATOM 1206 CA THR A 301 0.924 27.069 -6.342 1.00 0.00 +ATOM 1207 C THR A 301 0.743 25.957 -7.367 1.00 0.00 +ATOM 1208 O THR A 301 1.679 25.654 -8.118 1.00 0.00 +ATOM 1209 N PHE A 302 -0.409 25.361 -7.411 1.00 0.00 +ATOM 1210 CA PHE A 302 -0.748 24.365 -8.420 1.00 0.00 +ATOM 1211 C PHE A 302 -1.444 25.006 -9.614 1.00 0.00 +ATOM 1212 O PHE A 302 -2.385 25.791 -9.447 1.00 0.00 +ATOM 1213 N SER A 303 -0.986 24.674 -10.793 1.00 0.00 +ATOM 1214 CA SER A 303 -1.513 25.263 -12.018 1.00 0.00 +ATOM 1215 C SER A 303 -1.803 24.193 -13.063 1.00 0.00 +ATOM 1216 O SER A 303 -1.067 23.206 -13.181 1.00 0.00 +ATOM 1217 N ASN A 304 -2.878 24.403 -13.813 1.00 0.00 +ATOM 1218 CA ASN A 304 -3.242 23.562 -14.947 1.00 0.00 +ATOM 1219 C ASN A 304 -3.376 22.103 -14.530 1.00 0.00 +ATOM 1220 O ASN A 304 -2.761 21.197 -15.108 1.00 0.00 +ATOM 1221 N ILE A 305 -4.236 21.913 -13.465 1.00 0.00 +ATOM 1222 CA ILE A 305 -4.473 20.543 -13.027 1.00 0.00 +ATOM 1223 C ILE A 305 -5.251 19.757 -14.075 1.00 0.00 +ATOM 1224 O ILE A 305 -6.427 20.050 -14.322 1.00 0.00 +ATOM 1225 N ASN A 306 -4.616 18.776 -14.684 1.00 0.00 +ATOM 1226 CA ASN A 306 -5.246 18.018 -15.759 1.00 0.00 +ATOM 1227 C ASN A 306 -5.981 16.799 -15.216 1.00 0.00 +ATOM 1228 O ASN A 306 -6.921 16.299 -15.847 1.00 0.00 +ATOM 1229 N THR A 307 -5.556 16.333 -14.063 1.00 0.00 +ATOM 1230 CA THR A 307 -6.189 15.209 -13.384 1.00 0.00 +ATOM 1231 C THR A 307 -6.512 15.552 -11.935 1.00 0.00 +ATOM 1232 O THR A 307 -5.729 16.264 -11.293 1.00 0.00 +ATOM 1233 N ALA A 308 -7.607 15.084 -11.413 1.00 0.00 +ATOM 1234 CA ALA A 308 -7.915 15.297 -10.004 1.00 0.00 +ATOM 1235 C ALA A 308 -6.868 14.650 -9.107 1.00 0.00 +ATOM 1236 O ALA A 308 -6.459 13.513 -9.373 1.00 0.00 +ATOM 1237 N ARG A 309 -6.437 15.314 -8.090 1.00 0.00 +ATOM 1238 CA ARG A 309 -5.417 14.789 -7.190 1.00 0.00 +ATOM 1239 C ARG A 309 -5.804 15.006 -5.733 1.00 0.00 +ATOM 1240 O ARG A 309 -6.166 16.129 -5.359 1.00 0.00 +ATOM 1241 N THR A 310 -5.740 13.996 -4.928 1.00 0.00 +ATOM 1242 CA THR A 310 -5.860 14.147 -3.483 1.00 0.00 +ATOM 1243 C THR A 310 -4.502 14.400 -2.841 1.00 0.00 +ATOM 1244 O THR A 310 -3.548 13.647 -3.074 1.00 0.00 +ATOM 1245 N ILE A 311 -4.417 15.463 -2.030 1.00 0.00 +ATOM 1246 CA ILE A 311 -3.157 15.831 -1.395 1.00 0.00 +ATOM 1247 C ILE A 311 -3.276 15.799 0.123 1.00 0.00 +ATOM 1248 O ILE A 311 -4.255 16.313 0.678 1.00 0.00 +ATOM 1249 N THR A 312 -2.330 15.225 0.774 1.00 0.00 +ATOM 1250 CA THR A 312 -2.156 15.258 2.221 1.00 0.00 +ATOM 1251 C THR A 312 -0.852 15.946 2.603 1.00 0.00 +ATOM 1252 O THR A 312 0.167 15.728 1.935 1.00 0.00 +ATOM 1253 N THR A 313 -0.850 16.741 3.619 1.00 0.00 +ATOM 1254 CA THR A 313 0.333 17.487 4.032 1.00 0.00 +ATOM 1255 C THR A 313 0.755 17.110 5.446 1.00 0.00 +ATOM 1256 O THR A 313 -0.061 17.163 6.375 1.00 0.00 +ATOM 1257 N THR A 314 1.996 16.739 5.610 1.00 0.00 +ATOM 1258 CA THR A 314 2.642 16.584 6.908 1.00 0.00 +ATOM 1259 C THR A 314 3.709 17.650 7.123 1.00 0.00 +ATOM 1260 O THR A 314 4.615 17.797 6.294 1.00 0.00 +ATOM 1261 N THR A 315 3.611 18.379 8.207 1.00 0.00 +ATOM 1262 CA THR A 315 4.549 19.458 8.495 1.00 0.00 +ATOM 1263 C THR A 315 5.329 19.183 9.774 1.00 0.00 +ATOM 1264 O THR A 315 4.766 18.703 10.766 1.00 0.00 +ATOM 1265 N SER A 316 6.623 19.490 9.743 1.00 0.00 +ATOM 1266 CA SER A 316 7.480 19.319 10.910 1.00 0.00 +ATOM 1267 C SER A 316 8.411 20.511 11.089 1.00 0.00 +ATOM 1268 O SER A 316 8.938 21.025 10.095 1.00 0.00 +ATOM 1269 N HIS A 317 8.618 20.945 12.267 1.00 0.00 +ATOM 1270 CA HIS A 317 9.516 22.046 12.595 1.00 0.00 +ATOM 1271 C HIS A 317 10.440 21.678 13.749 1.00 0.00 +ATOM 1272 O HIS A 317 9.969 21.284 14.823 1.00 0.00 +ATOM 1273 N ARG A 318 11.730 21.802 13.536 1.00 0.00 +ATOM 1274 CA ARG A 318 12.721 21.664 14.597 1.00 0.00 +ATOM 1275 C ARG A 318 13.224 23.025 15.062 1.00 0.00 +ATOM 1276 O ARG A 318 13.837 23.764 14.281 1.00 0.00 +ATOM 1277 N ASN A 319 12.969 23.353 16.316 1.00 0.00 +ATOM 1278 CA ASN A 319 13.455 24.579 16.939 1.00 0.00 +ATOM 1279 C ASN A 319 14.714 24.320 17.756 1.00 0.00 +ATOM 1280 O ASN A 319 14.630 23.644 18.789 1.00 0.00 +ATOM 1281 N LEU A 320 15.820 24.803 17.360 1.00 0.00 +ATOM 1282 CA LEU A 320 17.105 24.546 17.999 1.00 0.00 +ATOM 1283 C LEU A 320 17.202 25.253 19.345 1.00 0.00 +ATOM 1284 O LEU A 320 17.828 24.771 20.297 1.00 0.00 +ATOM 1285 N ALA A 321 16.516 26.484 19.376 1.00 0.00 +ATOM 1286 CA ALA A 321 16.586 27.292 20.587 1.00 0.00 +ATOM 1287 C ALA A 321 15.802 26.650 21.724 1.00 0.00 +ATOM 1288 O ALA A 321 16.198 26.664 22.896 1.00 0.00 +ATOM 1289 N SER A 322 14.580 26.044 21.273 1.00 0.00 +ATOM 1290 CA SER A 322 13.742 25.412 22.286 1.00 0.00 +ATOM 1291 C SER A 322 13.971 23.907 22.329 1.00 0.00 +ATOM 1292 O SER A 322 13.371 23.208 23.156 1.00 0.00 +ATOM 1293 N ASN A 323 14.809 23.421 21.468 1.00 0.00 +ATOM 1294 CA ASN A 323 15.139 22.005 21.360 1.00 0.00 +ATOM 1295 C ASN A 323 13.881 21.153 21.244 1.00 0.00 +ATOM 1296 O ASN A 323 13.795 20.104 21.895 1.00 0.00 +ATOM 1297 N THR A 324 12.952 21.552 20.475 1.00 0.00 +ATOM 1298 CA THR A 324 11.683 20.853 20.312 1.00 0.00 +ATOM 1299 C THR A 324 11.378 20.601 18.841 1.00 0.00 +ATOM 1300 O THR A 324 11.583 21.481 17.996 1.00 0.00 +ATOM 1301 N THR A 325 10.901 19.426 18.549 1.00 0.00 +ATOM 1302 CA THR A 325 10.362 19.081 17.239 1.00 0.00 +ATOM 1303 C THR A 325 8.841 19.004 17.272 1.00 0.00 +ATOM 1304 O THR A 325 8.268 18.437 18.211 1.00 0.00 +ATOM 1305 N THR A 326 8.202 19.552 16.283 1.00 0.00 +ATOM 1306 CA THR A 326 6.748 19.507 16.181 1.00 0.00 +ATOM 1307 C THR A 326 6.307 18.924 14.845 1.00 0.00 +ATOM 1308 O THR A 326 6.871 19.257 13.795 1.00 0.00 +ATOM 1309 N THR A 327 5.314 18.067 14.891 1.00 0.00 +ATOM 1310 CA THR A 327 4.798 17.435 13.683 1.00 0.00 +ATOM 1311 C THR A 327 3.285 17.584 13.587 1.00 0.00 +ATOM 1312 O THR A 327 2.613 17.648 14.624 1.00 0.00 +ATOM 1313 N SER A 328 2.748 17.640 12.413 1.00 0.00 +ATOM 1314 CA SER A 328 1.309 17.760 12.215 1.00 0.00 +ATOM 1315 C SER A 328 0.883 17.145 10.888 1.00 0.00 +ATOM 1316 O SER A 328 1.533 17.378 9.862 1.00 0.00 +ATOM 1317 N LEU A 329 -0.181 16.378 10.896 1.00 0.00 +ATOM 1318 CA LEU A 329 -0.756 15.821 9.677 1.00 0.00 +ATOM 1319 C LEU A 329 -2.054 16.528 9.308 1.00 0.00 +ATOM 1320 O LEU A 329 -2.998 16.501 10.108 1.00 0.00 +ATOM 1321 N ALA A 330 -2.142 17.131 8.185 1.00 0.00 +ATOM 1322 CA ALA A 330 -3.325 17.864 7.749 1.00 0.00 +ATOM 1323 C ALA A 330 -4.333 16.936 7.082 1.00 0.00 +ATOM 1324 O ALA A 330 -3.954 15.837 6.658 1.00 0.00 +ATOM 1325 N PRO A 331 -5.574 17.330 6.977 1.00 0.00 +ATOM 1326 CA PRO A 331 -6.568 16.527 6.275 1.00 0.00 +ATOM 1327 C PRO A 331 -6.299 16.504 4.776 1.00 0.00 +ATOM 1328 O PRO A 331 -5.550 17.347 4.266 1.00 0.00 +ATOM 1329 N ALA A 332 -6.897 15.555 4.073 1.00 0.00 +ATOM 1330 CA ALA A 332 -6.716 15.441 2.631 1.00 0.00 +ATOM 1331 C ALA A 332 -7.460 16.547 1.893 1.00 0.00 +ATOM 1332 O ALA A 332 -8.599 16.850 2.270 1.00 0.00 +ATOM 1333 N VAL A 333 -6.917 17.123 0.933 1.00 0.00 +ATOM 1334 CA VAL A 333 -7.529 18.104 0.045 1.00 0.00 +ATOM 1335 C VAL A 333 -7.644 17.566 -1.376 1.00 0.00 +ATOM 1336 O VAL A 333 -6.703 16.916 -1.850 1.00 0.00 +ATOM 1337 N THR A 334 -8.711 17.799 -2.044 1.00 0.00 +ATOM 1338 CA THR A 334 -8.917 17.379 -3.425 1.00 0.00 +ATOM 1339 C THR A 334 -8.736 18.545 -4.388 1.00 0.00 +ATOM 1340 O THR A 334 -9.545 19.482 -4.370 1.00 0.00 +ATOM 1341 N LEU A 335 -7.721 18.516 -5.210 1.00 0.00 +ATOM 1342 CA LEU A 335 -7.545 19.486 -6.284 1.00 0.00 +ATOM 1343 C LEU A 335 -8.189 19.000 -7.576 1.00 0.00 +ATOM 1344 O LEU A 335 -7.627 18.130 -8.252 1.00 0.00 +ATOM 1345 N ALA A 336 -9.336 19.533 -7.927 1.00 0.00 +ATOM 1346 CA ALA A 336 -10.060 19.109 -9.119 1.00 0.00 +ATOM 1347 C ALA A 336 -9.381 19.616 -10.385 1.00 0.00 +ATOM 1348 O ALA A 336 -8.609 20.581 -10.323 1.00 0.00 +ATOM 1349 N PRO A 337 -9.644 19.006 -11.494 1.00 0.00 +ATOM 1350 CA PRO A 337 -9.110 19.433 -12.782 1.00 0.00 +ATOM 1351 C PRO A 337 -9.579 20.839 -13.134 1.00 0.00 +ATOM 1352 O PRO A 337 -10.785 21.116 -13.141 1.00 0.00 +ATOM 1353 N SER A 338 -8.625 21.720 -13.424 1.00 0.00 +ATOM 1354 CA SER A 338 -8.964 23.109 -13.710 1.00 0.00 +ATOM 1355 C SER A 338 -7.796 23.838 -14.362 1.00 0.00 +ATOM 1356 O SER A 338 -6.664 23.337 -14.380 1.00 0.00 +ATOM 1357 N SER A 339 -8.086 25.015 -14.891 1.00 0.00 +ATOM 1358 CA SER A 339 -7.072 25.847 -15.526 1.00 0.00 +ATOM 1359 C SER A 339 -6.599 26.951 -14.589 1.00 0.00 +ATOM 1360 O SER A 339 -5.509 27.498 -14.797 1.00 0.00 +ATOM 1361 N SER A 340 -7.368 27.290 -13.586 1.00 0.00 +ATOM 1362 CA SER A 340 -7.024 28.365 -12.662 1.00 0.00 +ATOM 1363 C SER A 340 -6.001 27.901 -11.633 1.00 0.00 +ATOM 1364 O SER A 340 -5.855 26.694 -11.404 1.00 0.00 +ATOM 1365 N ASP A 341 -5.301 28.836 -11.020 1.00 0.00 +ATOM 1366 CA ASP A 341 -4.265 28.526 -10.042 1.00 0.00 +ATOM 1367 C ASP A 341 -4.864 28.287 -8.662 1.00 0.00 +ATOM 1368 O ASP A 341 -5.762 29.039 -8.261 1.00 0.00 +ATOM 1369 N THR A 342 -4.436 27.334 -7.961 1.00 0.00 +ATOM 1370 CA THR A 342 -4.756 27.076 -6.562 1.00 0.00 +ATOM 1371 C THR A 342 -3.536 27.276 -5.672 1.00 0.00 +ATOM 1372 O THR A 342 -2.436 26.840 -6.033 1.00 0.00 +ATOM 1373 N THR A 343 -3.703 27.913 -4.536 1.00 0.00 +ATOM 1374 CA THR A 343 -2.577 28.237 -3.668 1.00 0.00 +ATOM 1375 C THR A 343 -2.743 27.605 -2.292 1.00 0.00 +ATOM 1376 O THR A 343 -3.811 27.730 -1.679 1.00 0.00 +ATOM 1377 N LEU A 344 -1.743 26.956 -1.819 1.00 0.00 +ATOM 1378 CA LEU A 344 -1.675 26.412 -0.468 1.00 0.00 +ATOM 1379 C LEU A 344 -0.514 27.016 0.311 1.00 0.00 +ATOM 1380 O LEU A 344 0.577 27.158 -0.256 1.00 0.00 +ATOM 1381 N THR A 345 -0.685 27.359 1.519 1.00 0.00 +ATOM 1382 CA THR A 345 0.356 27.961 2.343 1.00 0.00 +ATOM 1383 C THR A 345 0.694 27.077 3.537 1.00 0.00 +ATOM 1384 O THR A 345 -0.177 26.779 4.364 1.00 0.00 +ATOM 1385 N LEU A 346 1.954 26.660 3.622 1.00 0.00 +ATOM 1386 CA LEU A 346 2.462 25.959 4.795 1.00 0.00 +ATOM 1387 C LEU A 346 3.262 26.895 5.693 1.00 0.00 +ATOM 1388 O LEU A 346 4.461 27.084 5.455 1.00 0.00 +ATOM 1389 N PRO A 347 2.655 27.468 6.685 1.00 0.00 +ATOM 1390 CA PRO A 347 3.293 28.461 7.542 1.00 0.00 +ATOM 1391 C PRO A 347 4.212 27.800 8.562 1.00 0.00 +ATOM 1392 O PRO A 347 3.935 26.694 9.043 1.00 0.00 +ATOM 1393 N LEU A 348 5.325 28.505 8.888 1.00 0.00 +ATOM 1394 CA LEU A 348 6.158 28.032 9.987 1.00 0.00 +ATOM 1395 C LEU A 348 5.402 28.073 11.309 1.00 0.00 +ATOM 1396 O LEU A 348 4.502 28.905 11.480 1.00 0.00 +ATOM 1397 N SER A 349 5.750 27.198 12.233 1.00 0.00 +ATOM 1398 CA SER A 349 5.088 27.128 13.530 1.00 0.00 +ATOM 1399 C SER A 349 5.294 28.412 14.323 1.00 0.00 +ATOM 1400 O SER A 349 6.265 29.136 14.071 1.00 0.00 +ATOM 1401 N SER A 350 4.428 28.705 15.251 1.00 0.00 +ATOM 1402 CA SER A 350 4.442 29.953 16.005 1.00 0.00 +ATOM 1403 C SER A 350 5.709 30.076 16.842 1.00 0.00 +ATOM 1404 O SER A 350 6.258 31.177 16.978 1.00 0.00 +ATOM 1405 N ALA A 351 6.181 28.950 17.408 1.00 0.00 +ATOM 1406 CA ALA A 351 7.386 28.946 18.228 1.00 0.00 +ATOM 1407 C ALA A 351 8.607 29.357 17.415 1.00 0.00 +ATOM 1408 O ALA A 351 9.495 30.066 17.907 1.00 0.00 +ATOM 1409 N MET A 352 8.630 28.884 16.130 1.00 0.00 +ATOM 1410 CA MET A 352 9.740 29.218 15.246 1.00 0.00 +ATOM 1411 C MET A 352 9.721 30.694 14.870 1.00 0.00 +ATOM 1412 O MET A 352 10.768 31.349 14.805 1.00 0.00 +ATOM 1413 N LEU A 353 8.485 31.214 14.621 1.00 0.00 +ATOM 1414 CA LEU A 353 8.315 32.615 14.255 1.00 0.00 +ATOM 1415 C LEU A 353 8.722 33.536 15.398 1.00 0.00 +ATOM 1416 O LEU A 353 9.348 34.579 15.171 1.00 0.00 +ATOM 1417 N ALA A 354 8.367 33.148 16.624 1.00 0.00 +ATOM 1418 CA ALA A 354 8.733 33.922 17.804 1.00 0.00 +ATOM 1419 C ALA A 354 10.245 33.980 17.978 1.00 0.00 +ATOM 1420 O ALA A 354 10.789 35.024 18.357 1.00 0.00 +ATOM 1421 N THR A 355 10.926 32.864 17.702 1.00 0.00 +ATOM 1422 CA THR A 355 12.374 32.798 17.864 1.00 0.00 +ATOM 1423 C THR A 355 13.084 33.649 16.819 1.00 0.00 +ATOM 1424 O THR A 355 14.056 34.351 17.126 1.00 0.00 +ATOM 1425 N LEU A 356 12.584 33.575 15.580 1.00 0.00 +ATOM 1426 CA LEU A 356 13.187 34.296 14.465 1.00 0.00 +ATOM 1427 C LEU A 356 12.978 35.799 14.603 1.00 0.00 +ATOM 1428 O LEU A 356 13.877 36.596 14.308 1.00 0.00 +ATOM 1429 N ALA A 357 11.757 36.177 15.062 1.00 0.00 +ATOM 1430 CA ALA A 357 11.421 37.583 15.252 1.00 0.00 +ATOM 1431 C ALA A 357 12.261 38.206 16.360 1.00 0.00 +ATOM 1432 O ALA A 357 12.505 39.419 16.345 1.00 0.00 +ATOM 1433 N ALA A 358 12.708 37.385 17.324 1.00 0.00 +ATOM 1434 CA ALA A 358 13.363 37.909 18.516 1.00 0.00 +ATOM 1435 C ALA A 358 14.879 37.839 18.387 1.00 0.00 +ATOM 1436 O ALA A 358 15.611 38.313 19.265 1.00 0.00 +ATOM 1437 N SER A 359 15.353 37.230 17.259 1.00 0.00 +ATOM 1438 CA SER A 359 16.782 36.999 17.088 1.00 0.00 +ATOM 1439 C SER A 359 17.519 38.299 16.792 1.00 0.00 +ATOM 1440 O SER A 359 17.153 39.066 15.892 1.00 0.00 +ATOM 1441 N THR A 360 18.625 38.524 17.628 1.00 0.00 +ATOM 1442 CA THR A 360 19.348 39.780 17.467 1.00 0.00 +ATOM 1443 C THR A 360 20.527 39.619 16.516 1.00 0.00 +ATOM 1444 O THR A 360 21.062 40.630 16.044 1.00 0.00 +ATOM 1445 N SER A 361 20.933 38.448 16.231 1.00 0.00 +ATOM 1446 CA SER A 361 22.001 38.120 15.295 1.00 0.00 +ATOM 1447 C SER A 361 21.440 37.574 13.988 1.00 0.00 +ATOM 1448 O SER A 361 20.238 37.288 13.914 1.00 0.00 +ATOM 1449 N THR A 362 22.245 37.429 12.992 1.00 0.00 +ATOM 1450 CA THR A 362 21.835 36.864 11.712 1.00 0.00 +ATOM 1451 C THR A 362 21.449 35.397 11.855 1.00 0.00 +ATOM 1452 O THR A 362 22.223 34.628 12.439 1.00 0.00 +ATOM 1453 N VAL A 363 20.359 35.002 11.380 1.00 0.00 +ATOM 1454 CA VAL A 363 19.837 33.645 11.485 1.00 0.00 +ATOM 1455 C VAL A 363 19.533 33.064 10.110 1.00 0.00 +ATOM 1456 O VAL A 363 19.557 33.782 9.102 1.00 0.00 +ATOM 1457 N ALA A 364 19.259 31.817 10.079 1.00 0.00 +ATOM 1458 CA ALA A 364 18.909 31.088 8.866 1.00 0.00 +ATOM 1459 C ALA A 364 17.809 30.069 9.134 1.00 0.00 +ATOM 1460 O ALA A 364 17.854 29.364 10.150 1.00 0.00 +ATOM 1461 N VAL A 365 16.864 29.986 8.273 1.00 0.00 +ATOM 1462 CA VAL A 365 15.843 28.945 8.249 1.00 0.00 +ATOM 1463 C VAL A 365 16.004 28.043 7.032 1.00 0.00 +ATOM 1464 O VAL A 365 15.761 28.471 5.896 1.00 0.00 +ATOM 1465 N VAL A 366 16.413 26.802 7.273 1.00 0.00 +ATOM 1466 CA VAL A 366 16.515 25.804 6.215 1.00 0.00 +ATOM 1467 C VAL A 366 15.246 24.967 6.123 1.00 0.00 +ATOM 1468 O VAL A 366 14.743 24.470 7.139 1.00 0.00 +ATOM 1469 N ILE A 367 14.742 24.819 4.921 1.00 0.00 +ATOM 1470 CA ILE A 367 13.518 24.061 4.688 1.00 0.00 +ATOM 1471 C ILE A 367 13.719 23.007 3.606 1.00 0.00 +ATOM 1472 O ILE A 367 14.367 23.265 2.584 1.00 0.00 +ATOM 1473 N ARG A 368 13.181 21.867 3.835 1.00 0.00 +ATOM 1474 CA ARG A 368 13.121 20.789 2.855 1.00 0.00 +ATOM 1475 C ARG A 368 11.681 20.381 2.571 1.00 0.00 +ATOM 1476 O ARG A 368 10.969 19.953 3.488 1.00 0.00 +ATOM 1477 N VAL A 369 11.260 20.503 1.368 1.00 0.00 +ATOM 1478 CA VAL A 369 9.935 20.073 0.938 1.00 0.00 +ATOM 1479 C VAL A 369 10.016 18.826 0.067 1.00 0.00 +ATOM 1480 O VAL A 369 10.788 18.771 -0.899 1.00 0.00 +ATOM 1481 N PHE A 370 9.206 17.828 0.428 1.00 0.00 +ATOM 1482 CA PHE A 370 9.147 16.591 -0.342 1.00 0.00 +ATOM 1483 C PHE A 370 7.729 16.306 -0.820 1.00 0.00 +ATOM 1484 O PHE A 370 6.811 16.187 0.001 1.00 0.00 +ATOM 1485 N LEU A 371 7.547 16.200 -2.093 1.00 0.00 +ATOM 1486 CA LEU A 371 6.269 15.806 -2.672 1.00 0.00 +ATOM 1487 C LEU A 371 6.334 14.395 -3.242 1.00 0.00 +ATOM 1488 O LEU A 371 7.017 14.165 -4.249 1.00 0.00 +ATOM 1489 N ASP A 372 5.647 13.466 -2.623 1.00 0.00 +ATOM 1490 CA ASP A 372 5.559 12.095 -3.111 1.00 0.00 +ATOM 1491 C ASP A 372 4.132 11.743 -3.510 1.00 0.00 +ATOM 1492 O ASP A 372 3.274 11.422 -2.677 1.00 0.00 +ATOM 1493 N ASP A 373 3.917 11.824 -4.877 1.00 0.00 +ATOM 1494 CA ASP A 373 2.584 11.598 -5.423 1.00 0.00 +ATOM 1495 C ASP A 373 1.582 12.599 -4.862 1.00 0.00 +ATOM 1496 O ASP A 373 1.443 13.716 -5.377 1.00 0.00 +ATOM 1497 N GLY A 374 0.880 12.175 -3.787 1.00 0.00 +ATOM 1498 CA GLY A 374 -0.159 13.041 -3.242 1.00 0.00 +ATOM 1499 C GLY A 374 0.189 13.499 -1.831 1.00 0.00 +ATOM 1500 O GLY A 374 -0.684 14.061 -1.158 1.00 0.00 +ATOM 1501 N GLN A 375 1.363 13.298 -1.368 1.00 0.00 +ATOM 1502 CA GLN A 375 1.831 13.685 -0.042 1.00 0.00 +ATOM 1503 C GLN A 375 2.879 14.787 -0.127 1.00 0.00 +ATOM 1504 O GLN A 375 3.934 14.609 -0.750 1.00 0.00 +ATOM 1505 N ILE A 376 2.584 15.908 0.494 1.00 0.00 +ATOM 1506 CA ILE A 376 3.547 16.987 0.675 1.00 0.00 +ATOM 1507 C ILE A 376 4.064 17.029 2.107 1.00 0.00 +ATOM 1508 O ILE A 376 3.293 17.299 3.037 1.00 0.00 +ATOM 1509 N THR A 377 5.333 16.770 2.291 1.00 0.00 +ATOM 1510 CA THR A 377 5.981 16.836 3.595 1.00 0.00 +ATOM 1511 C THR A 377 6.964 17.998 3.664 1.00 0.00 +ATOM 1512 O THR A 377 7.738 18.227 2.726 1.00 0.00 +ATOM 1513 N VAL A 378 6.924 18.711 4.758 1.00 0.00 +ATOM 1514 CA VAL A 378 7.803 19.858 4.955 1.00 0.00 +ATOM 1515 C VAL A 378 8.536 19.768 6.287 1.00 0.00 +ATOM 1516 O VAL A 378 7.917 19.488 7.321 1.00 0.00 +ATOM 1517 N ALA A 379 9.806 19.995 6.268 1.00 0.00 +ATOM 1518 CA ALA A 379 10.613 20.094 7.478 1.00 0.00 +ATOM 1519 C ALA A 379 11.388 21.405 7.517 1.00 0.00 +ATOM 1520 O ALA A 379 12.018 21.782 6.521 1.00 0.00 +ATOM 1521 N SER A 380 11.349 22.081 8.622 1.00 0.00 +ATOM 1522 CA SER A 380 12.032 23.362 8.761 1.00 0.00 +ATOM 1523 C SER A 380 12.904 23.388 10.010 1.00 0.00 +ATOM 1524 O SER A 380 12.519 22.791 11.023 1.00 0.00 +ATOM 1525 N ALA A 381 13.979 24.008 9.983 1.00 0.00 +ATOM 1526 CA ALA A 381 14.867 24.218 11.120 1.00 0.00 +ATOM 1527 C ALA A 381 15.499 25.604 11.077 1.00 0.00 +ATOM 1528 O ALA A 381 15.641 26.201 10.003 1.00 0.00 +ATOM 1529 N ASN A 382 15.870 26.100 12.242 1.00 0.00 +ATOM 1530 CA ASN A 382 16.469 27.425 12.357 1.00 0.00 +ATOM 1531 C ASN A 382 17.842 27.354 13.013 1.00 0.00 +ATOM 1532 O ASN A 382 18.088 26.462 13.835 1.00 0.00 +ATOM 1533 N PHE A 383 18.723 28.264 12.668 1.00 0.00 +ATOM 1534 CA PHE A 383 20.056 28.277 13.258 1.00 0.00 +ATOM 1535 C PHE A 383 20.636 29.685 13.279 1.00 0.00 +ATOM 1536 O PHE A 383 20.358 30.466 12.359 1.00 0.00 +ATOM 1537 N THR A 384 21.392 30.023 14.243 1.00 0.00 +ATOM 1538 CA THR A 384 22.071 31.312 14.307 1.00 0.00 +ATOM 1539 C THR A 384 23.401 31.272 13.565 1.00 0.00 +ATOM 1540 O THR A 384 24.130 30.275 13.652 1.00 0.00 +ATOM 1541 N LEU A 385 23.718 32.331 12.846 1.00 0.00 +ATOM 1542 CA LEU A 385 24.982 32.394 12.122 1.00 0.00 +ATOM 1543 C LEU A 385 26.053 33.100 12.944 1.00 0.00 +ATOM 1544 O LEU A 385 25.823 34.135 13.516 1.00 0.00 +HETATM 1545 CME L:G B 406 10.544 -6.508 -5.047 1.00 0.00 +HETATM 1546 C1 L:G B 406 10.919 -1.897 -5.422 1.00 0.00 +HETATM 1547 C2 L:G B 406 10.155 -2.691 -4.335 1.00 0.00 +HETATM 1548 C2N L:G B 406 10.889 -5.009 -4.499 1.00 0.00 +HETATM 1549 N2 L:G B 406 9.952 -4.157 -4.506 1.00 0.00 +HETATM 1550 O2N L:G B 406 12.047 -4.655 -4.383 1.00 0.00 +HETATM 1551 C3 L:G B 406 8.730 -2.024 -4.312 1.00 0.00 +HETATM 1552 O3 L:G B 406 7.807 -2.588 -3.358 1.00 0.00 +HETATM 1553 C4 L:G B 406 8.724 -0.448 -4.074 1.00 0.00 +HETATM 1554 O4 L:G B 406 7.464 0.335 -4.184 1.00 0.00 +HETATM 1555 C5 L:G B 406 9.634 0.201 -5.153 1.00 0.00 +HETATM 1556 O5 L:G B 406 10.980 -0.445 -5.194 1.00 0.00 +HETATM 1557 C6 L:G B 406 9.860 1.710 -4.980 1.00 0.00 +HETATM 1558 O6 L:G B 406 10.584 2.238 -6.071 1.00 0.00 +HETATM 1559 CME1 L:G B 406 7.262 4.375 -2.561 1.00 0.00 +HETATM 1560 C11 L:G B 406 6.205 -0.030 -3.401 1.00 0.00 +HETATM 1561 C21 L:G B 406 5.256 1.197 -3.327 1.00 0.00 +HETATM 1562 C2N1 L:G B 406 6.325 3.419 -3.277 1.00 0.00 +HETATM 1563 N21 L:G B 406 6.091 2.270 -2.696 1.00 0.00 +HETATM 1564 O2N1 L:G B 406 5.866 3.726 -4.369 1.00 0.00 +HETATM 1565 C31 L:G B 406 3.898 0.931 -2.580 1.00 0.00 +HETATM 1566 O31 L:G B 406 2.957 2.097 -2.518 1.00 0.00 +HETATM 1567 C41 L:G B 406 3.161 -0.377 -3.008 1.00 0.00 +HETATM 1568 O41 L:G B 406 2.195 -0.885 -2.002 1.00 0.00 +HETATM 1569 C51 L:G B 406 4.144 -1.513 -3.293 1.00 0.00 +HETATM 1570 O51 L:G B 406 5.376 -1.031 -4.080 1.00 0.00 +HETATM 1571 C61 L:G B 406 3.498 -2.709 -4.037 1.00 0.00 +HETATM 1572 O61 L:G B 406 4.489 -3.740 -4.213 1.00 0.00 +HETATM 1573 C12 L:G B 406 0.750 -0.791 -2.328 1.00 0.00 +HETATM 1574 C22 L:G B 406 -0.017 -1.616 -1.249 1.00 0.00 +HETATM 1575 O2 L:G B 406 0.224 -1.145 0.183 1.00 0.00 +HETATM 1576 C32 L:G B 406 -1.537 -1.388 -1.509 1.00 0.00 +HETATM 1577 O32 L:G B 406 -2.452 -1.959 -0.459 1.00 0.00 +HETATM 1578 C42 L:G B 406 -1.922 0.107 -1.617 1.00 0.00 +HETATM 1579 O42 L:G B 406 -3.311 0.294 -2.056 1.00 0.00 +HETATM 1580 C52 L:G B 406 -0.986 0.932 -2.622 1.00 0.00 +HETATM 1581 O52 L:G B 406 0.404 0.661 -2.274 1.00 0.00 +HETATM 1582 C62 L:G B 406 -1.221 2.478 -2.523 1.00 0.00 +HETATM 1583 O62 L:G B 406 -0.809 2.935 -1.215 1.00 0.00 +HETATM 1584 C13 L:G B 406 -1.202 4.292 -0.768 1.00 0.00 +HETATM 1585 C23 L:G B 406 0.073 5.039 -0.229 1.00 0.00 +HETATM 1586 O21 L:G B 406 -0.247 6.408 0.002 1.00 0.00 +HETATM 1587 C33 L:G B 406 0.532 4.244 1.052 1.00 0.00 +HETATM 1588 O33 L:G B 406 1.784 4.938 1.587 1.00 0.00 +HETATM 1589 C43 L:G B 406 -0.476 4.117 2.199 1.00 0.00 +HETATM 1590 O43 L:G B 406 0.142 3.213 3.210 1.00 0.00 +HETATM 1591 C53 L:G B 406 -1.742 3.470 1.568 1.00 0.00 +HETATM 1592 O53 L:G B 406 -2.143 4.202 0.340 1.00 0.00 +HETATM 1593 C63 L:G B 406 -3.017 3.448 2.438 1.00 0.00 +HETATM 1594 O63 L:G B 406 -2.711 2.838 3.789 1.00 0.00 +HETATM 1595 C14 L:G B 406 -3.875 2.669 4.656 1.00 0.00 +HETATM 1596 C24 L:G B 406 -3.272 2.376 6.075 1.00 0.00 +HETATM 1597 O22 L:G B 406 -4.354 2.520 7.074 1.00 0.00 +HETATM 1598 C34 L:G B 406 -2.699 1.023 6.218 1.00 0.00 +HETATM 1599 O34 L:G B 406 -2.272 0.801 7.611 1.00 0.00 +HETATM 1600 C44 L:G B 406 -3.679 -0.122 5.753 1.00 0.00 +HETATM 1601 O44 L:G B 406 -3.039 -1.371 5.697 1.00 0.00 +HETATM 1602 C54 L:G B 406 -4.344 0.124 4.347 1.00 0.00 +HETATM 1603 O54 L:G B 406 -4.731 1.556 4.145 1.00 0.00 +HETATM 1604 C64 L:G B 406 -5.470 -0.904 4.040 1.00 0.00 +HETATM 1605 O64 L:G B 406 -5.873 -0.799 2.677 1.00 0.00 +HETATM 1606 C15 L:G B 406 -4.192 3.598 8.091 1.00 0.00 +HETATM 1607 C25 L:G B 406 -4.831 3.306 9.443 1.00 0.00 +HETATM 1608 O23 L:G B 406 -4.465 4.336 10.461 1.00 0.00 +HETATM 1609 C35 L:G B 406 -6.264 3.022 9.285 1.00 0.00 +HETATM 1610 O35 L:G B 406 -6.844 2.535 10.590 1.00 0.00 +HETATM 1611 C45 L:G B 406 -6.950 4.375 8.840 1.00 0.00 +HETATM 1612 O45 L:G B 406 -8.400 4.162 8.736 1.00 0.00 +HETATM 1613 C55 L:G B 406 -6.302 4.798 7.503 1.00 0.00 +HETATM 1614 O55 L:G B 406 -4.833 4.802 7.538 1.00 0.00 +HETATM 1615 C65 L:G B 406 -6.839 6.114 6.901 1.00 0.00 +HETATM 1616 O65 L:G B 406 -6.783 7.137 7.789 1.00 0.00 +HETATM 1617 C16 L:G B 406 3.061 4.366 1.113 1.00 0.00 +HETATM 1618 C26 L:G B 406 4.129 4.889 2.116 1.00 0.00 +HETATM 1619 O24 L:G B 406 5.296 4.007 1.906 1.00 0.00 +HETATM 1620 C36 L:G B 406 4.506 6.382 1.804 1.00 0.00 +HETATM 1621 O36 L:G B 406 5.618 6.615 2.629 1.00 0.00 +HETATM 1622 C46 L:G B 406 4.864 6.670 0.310 1.00 0.00 +HETATM 1623 O46 L:G B 406 5.064 8.067 0.152 1.00 0.00 +HETATM 1624 C56 L:G B 406 3.676 6.157 -0.516 1.00 0.00 +HETATM 1625 O56 L:G B 406 3.334 4.716 -0.250 1.00 0.00 +HETATM 1626 C66 L:G B 406 3.943 6.312 -2.080 1.00 0.00 +HETATM 1627 O66 L:G B 406 2.693 6.379 -2.785 1.00 0.00 +HETATM 1628 C17 L:G B 406 5.216 2.829 2.738 1.00 0.00 +HETATM 1629 C27 L:G B 406 6.642 2.091 2.709 1.00 0.00 +HETATM 1630 O25 L:G B 406 6.749 1.112 3.832 1.00 0.00 +HETATM 1631 C37 L:G B 406 6.921 1.357 1.367 1.00 0.00 +HETATM 1632 O37 L:G B 406 8.212 0.614 1.326 1.00 0.00 +HETATM 1633 C47 L:G B 406 5.729 0.373 1.094 1.00 0.00 +HETATM 1634 O47 L:G B 406 5.986 -0.281 -0.165 1.00 0.00 +HETATM 1635 C57 L:G B 406 4.352 1.126 1.073 1.00 0.00 +HETATM 1636 O57 L:G B 406 4.142 1.857 2.388 1.00 0.00 +HETATM 1637 C67 L:G B 406 3.216 0.099 0.885 1.00 0.00 +HETATM 1638 O67 L:G B 406 2.082 0.775 0.293 1.00 0.00 +HETATM 1639 C18 L:G B 406 -3.030 -3.230 -0.870 1.00 0.00 +HETATM 1640 C28 L:G B 406 -4.273 -3.559 0.022 1.00 0.00 +HETATM 1641 O26 L:G B 406 -4.926 -4.791 -0.451 1.00 0.00 +HETATM 1642 C38 L:G B 406 -3.851 -3.802 1.472 1.00 0.00 +HETATM 1643 O38 L:G B 406 -4.985 -4.141 2.258 1.00 0.00 +HETATM 1644 C48 L:G B 406 -2.689 -4.818 1.591 1.00 0.00 +HETATM 1645 O48 L:G B 406 -2.149 -4.836 2.992 1.00 0.00 +HETATM 1646 C58 L:G B 406 -1.448 -4.390 0.640 1.00 0.00 +HETATM 1647 O58 L:G B 406 -1.968 -4.227 -0.766 1.00 0.00 +HETATM 1648 C68 L:G B 406 -0.299 -5.486 0.765 1.00 0.00 +HETATM 1649 O68 L:G B 406 0.801 -5.102 -0.077 1.00 0.00 +HETATM 1650 C19 L:G B 406 -5.894 -4.660 -1.509 1.00 0.00 +HETATM 1651 C29 L:G B 406 -6.824 -5.915 -1.250 1.00 0.00 +HETATM 1652 O27 L:G B 406 -7.994 -5.793 -2.108 1.00 0.00 +HETATM 1653 C39 L:G B 406 -6.084 -7.208 -1.513 1.00 0.00 +HETATM 1654 O39 L:G B 406 -6.968 -8.348 -1.313 1.00 0.00 +HETATM 1655 C49 L:G B 406 -5.372 -7.262 -2.909 1.00 0.00 +HETATM 1656 O49 L:G B 406 -4.490 -8.392 -2.995 1.00 0.00 +HETATM 1657 C59 L:G B 406 -4.478 -6.019 -3.115 1.00 0.00 +HETATM 1658 O59 L:G B 406 -5.250 -4.759 -2.801 1.00 0.00 +HETATM 1659 C69 L:G B 406 -3.846 -5.931 -4.552 1.00 0.00 +HETATM 1660 O69 L:G B 406 -4.707 -5.527 -5.582 1.00 0.00 +HETATM 1661 C110 L:G B 406 -9.134 -4.994 -1.672 1.00 0.00 +HETATM 1662 C210 L:G B 406 -10.447 -5.320 -2.563 1.00 0.00 +HETATM 1663 O28 L:G B 406 -11.539 -4.561 -1.884 1.00 0.00 +HETATM 1664 C310 L:G B 406 -10.225 -4.674 -4.006 1.00 0.00 +HETATM 1665 O310 L:G B 406 -11.405 -5.004 -4.784 1.00 0.00 +HETATM 1666 C410 L:G B 406 -9.904 -3.150 -4.005 1.00 0.00 +HETATM 1667 O410 L:G B 406 -9.530 -2.706 -5.341 1.00 0.00 +HETATM 1668 C510 L:G B 406 -8.592 -2.955 -3.150 1.00 0.00 +HETATM 1669 O510 L:G B 406 -8.800 -3.499 -1.726 1.00 0.00 +HETATM 1670 C610 L:G B 406 -8.136 -1.473 -3.043 1.00 0.00 +HETATM 1671 O610 L:G B 406 -6.808 -1.330 -2.393 1.00 0.00 +CONECT 876 1546 +CONECT 1545 1548 +CONECT 1546 1547 +CONECT 1546 1556 +CONECT 1547 1546 +CONECT 1547 1549 +CONECT 1547 1551 +CONECT 1548 1545 +CONECT 1548 1549 +CONECT 1548 1550 +CONECT 1549 1547 +CONECT 1549 1548 +CONECT 1550 1548 +CONECT 1551 1547 +CONECT 1551 1552 +CONECT 1551 1553 +CONECT 1552 1551 +CONECT 1553 1551 +CONECT 1553 1554 +CONECT 1553 1555 +CONECT 1554 1553 +CONECT 1554 1560 +CONECT 1555 1553 +CONECT 1555 1556 +CONECT 1555 1557 +CONECT 1556 1546 +CONECT 1556 1555 +CONECT 1557 1555 +CONECT 1557 1558 +CONECT 1558 1557 +CONECT 1559 1562 +CONECT 1560 1554 +CONECT 1560 1561 +CONECT 1560 1570 +CONECT 1561 1560 +CONECT 1561 1563 +CONECT 1561 1565 +CONECT 1562 1559 +CONECT 1562 1563 +CONECT 1562 1564 +CONECT 1563 1561 +CONECT 1563 1562 +CONECT 1564 1562 +CONECT 1565 1561 +CONECT 1565 1566 +CONECT 1565 1567 +CONECT 1566 1565 +CONECT 1567 1565 +CONECT 1567 1568 +CONECT 1567 1569 +CONECT 1568 1567 +CONECT 1568 1573 +CONECT 1569 1567 +CONECT 1569 1570 +CONECT 1569 1571 +CONECT 1570 1560 +CONECT 1570 1569 +CONECT 1571 1569 +CONECT 1571 1572 +CONECT 1572 1571 +CONECT 1573 1568 +CONECT 1573 1574 +CONECT 1573 1581 +CONECT 1574 1573 +CONECT 1574 1575 +CONECT 1574 1576 +CONECT 1575 1574 +CONECT 1576 1574 +CONECT 1576 1577 +CONECT 1576 1578 +CONECT 1577 1576 +CONECT 1577 1639 +CONECT 1578 1576 +CONECT 1578 1579 +CONECT 1578 1580 +CONECT 1579 1578 +CONECT 1580 1578 +CONECT 1580 1581 +CONECT 1580 1582 +CONECT 1581 1573 +CONECT 1581 1580 +CONECT 1582 1580 +CONECT 1582 1583 +CONECT 1583 1582 +CONECT 1583 1584 +CONECT 1584 1583 +CONECT 1584 1585 +CONECT 1584 1592 +CONECT 1585 1584 +CONECT 1585 1586 +CONECT 1585 1587 +CONECT 1586 1585 +CONECT 1587 1585 +CONECT 1587 1588 +CONECT 1587 1589 +CONECT 1588 1587 +CONECT 1588 1617 +CONECT 1589 1587 +CONECT 1589 1590 +CONECT 1589 1591 +CONECT 1590 1589 +CONECT 1591 1589 +CONECT 1591 1592 +CONECT 1591 1593 +CONECT 1592 1584 +CONECT 1592 1591 +CONECT 1593 1591 +CONECT 1593 1594 +CONECT 1594 1593 +CONECT 1594 1595 +CONECT 1595 1594 +CONECT 1595 1596 +CONECT 1595 1603 +CONECT 1596 1595 +CONECT 1596 1597 +CONECT 1596 1598 +CONECT 1597 1596 +CONECT 1597 1606 +CONECT 1598 1596 +CONECT 1598 1599 +CONECT 1598 1600 +CONECT 1599 1598 +CONECT 1600 1598 +CONECT 1600 1601 +CONECT 1600 1602 +CONECT 1601 1600 +CONECT 1602 1600 +CONECT 1602 1603 +CONECT 1602 1604 +CONECT 1603 1595 +CONECT 1603 1602 +CONECT 1604 1602 +CONECT 1604 1605 +CONECT 1605 1604 +CONECT 1606 1597 +CONECT 1606 1607 +CONECT 1606 1614 +CONECT 1607 1606 +CONECT 1607 1608 +CONECT 1607 1609 +CONECT 1608 1607 +CONECT 1609 1607 +CONECT 1609 1610 +CONECT 1609 1611 +CONECT 1610 1609 +CONECT 1611 1609 +CONECT 1611 1612 +CONECT 1611 1613 +CONECT 1612 1611 +CONECT 1613 1611 +CONECT 1613 1614 +CONECT 1613 1615 +CONECT 1614 1606 +CONECT 1614 1613 +CONECT 1615 1613 +CONECT 1615 1616 +CONECT 1616 1615 +CONECT 1617 1588 +CONECT 1617 1618 +CONECT 1617 1625 +CONECT 1618 1617 +CONECT 1618 1619 +CONECT 1618 1620 +CONECT 1619 1618 +CONECT 1619 1628 +CONECT 1620 1618 +CONECT 1620 1621 +CONECT 1620 1622 +CONECT 1621 1620 +CONECT 1622 1620 +CONECT 1622 1623 +CONECT 1622 1624 +CONECT 1623 1622 +CONECT 1624 1622 +CONECT 1624 1625 +CONECT 1624 1626 +CONECT 1625 1617 +CONECT 1625 1624 +CONECT 1626 1624 +CONECT 1626 1627 +CONECT 1627 1626 +CONECT 1628 1619 +CONECT 1628 1629 +CONECT 1628 1636 +CONECT 1629 1628 +CONECT 1629 1630 +CONECT 1629 1631 +CONECT 1630 1629 +CONECT 1631 1629 +CONECT 1631 1632 +CONECT 1631 1633 +CONECT 1632 1631 +CONECT 1633 1631 +CONECT 1633 1634 +CONECT 1633 1635 +CONECT 1634 1633 +CONECT 1635 1633 +CONECT 1635 1636 +CONECT 1635 1637 +CONECT 1636 1628 +CONECT 1636 1635 +CONECT 1637 1635 +CONECT 1637 1638 +CONECT 1638 1637 +CONECT 1639 1577 +CONECT 1639 1640 +CONECT 1639 1647 +CONECT 1640 1639 +CONECT 1640 1641 +CONECT 1640 1642 +CONECT 1641 1640 +CONECT 1641 1650 +CONECT 1642 1640 +CONECT 1642 1643 +CONECT 1642 1644 +CONECT 1643 1642 +CONECT 1644 1642 +CONECT 1644 1645 +CONECT 1644 1646 +CONECT 1645 1644 +CONECT 1646 1644 +CONECT 1646 1647 +CONECT 1646 1648 +CONECT 1647 1639 +CONECT 1647 1646 +CONECT 1648 1646 +CONECT 1648 1649 +CONECT 1649 1648 +CONECT 1650 1641 +CONECT 1650 1651 +CONECT 1650 1658 +CONECT 1651 1650 +CONECT 1651 1652 +CONECT 1651 1653 +CONECT 1652 1651 +CONECT 1652 1661 +CONECT 1653 1651 +CONECT 1653 1654 +CONECT 1653 1655 +CONECT 1654 1653 +CONECT 1655 1653 +CONECT 1655 1656 +CONECT 1655 1657 +CONECT 1656 1655 +CONECT 1657 1655 +CONECT 1657 1658 +CONECT 1657 1659 +CONECT 1658 1650 +CONECT 1658 1657 +CONECT 1659 1657 +CONECT 1659 1660 +CONECT 1660 1659 +CONECT 1661 1652 +CONECT 1661 1662 +CONECT 1661 1669 +CONECT 1662 1661 +CONECT 1662 1663 +CONECT 1662 1664 +CONECT 1663 1662 +CONECT 1664 1662 +CONECT 1664 1665 +CONECT 1664 1666 +CONECT 1665 1664 +CONECT 1666 1664 +CONECT 1666 1667 +CONECT 1666 1668 +CONECT 1667 1666 +CONECT 1668 1666 +CONECT 1668 1669 +CONECT 1668 1670 +CONECT 1669 1661 +CONECT 1669 1668 +CONECT 1670 1668 +CONECT 1670 1671 +CONECT 1671 1670 diff --git a/tests/test_inference_pipelines.py b/tests/test_inference_pipelines.py new file mode 100644 index 0000000..4552886 --- /dev/null +++ b/tests/test_inference_pipelines.py @@ -0,0 +1,31 @@ +from os import PathLike +from pathlib import Path + +import pytest +from cifutils import parse + +from modelhub.utils.inference import build_file_paths_for_prediction + +current_file_directory = Path(__file__).parent + + +@pytest.mark.parametrize( + "file_path", + [ + "data/nested_examples", + "data/multiple_examples_from_json.json", + ], +) +def test_build_file_paths_for_prediction(file_path: PathLike, tmp_path: Path): + """Use the inference pipeline to build and parse inputs for prediction.""" + file_path = current_file_directory / Path(file_path) + + # Call the function with the file path and temporary directory + paths = build_file_paths_for_prediction(file_path, tmp_path) + + # Iterate over the returned paths and parse them, ensuring the the outputs are reasonable + for path in paths: + output = parse(path) + assert output is not None + assert len(output["assemblies"]["1"][0]) > 0 + diff --git a/tests/test_torch_utils.py b/tests/test_torch_utils.py new file mode 100644 index 0000000..5d86732 --- /dev/null +++ b/tests/test_torch_utils.py @@ -0,0 +1,132 @@ +import os + +import pytest +import torch + +os.environ["NAN_CHECKING"] = "True" +from modelhub.utils.torch_utils import assert_no_nans, map_to + + +def test_map_to(): + # Test with a simple tensor + tensor = torch.tensor([1, 2, 3]) + result = map_to(tensor, device="cpu", dtype=torch.float32) + assert isinstance(result, torch.Tensor) + assert result.device.type == "cpu" + assert result.dtype == torch.float32 + assert torch.all(result.eq(torch.tensor([1.0, 2.0, 3.0]))) + + # Test with a nested structure + data = { + "tensor": torch.tensor([1, 2, 3]), + "list": [torch.tensor([4, 5]), "string"], + "nested": {"tensor": torch.tensor([6, 7, 8])}, + } + result = map_to(data, device="cpu", dtype=torch.float64) + + assert isinstance(result, dict) + assert isinstance(result["tensor"], torch.Tensor) + assert result["tensor"].device.type == "cpu" + assert result["tensor"].dtype == torch.float64 + assert torch.all( + result["tensor"].eq(torch.tensor([1.0, 2.0, 3.0], dtype=torch.float64)) + ) + + assert isinstance(result["list"], list) + assert isinstance(result["list"][0], torch.Tensor) + assert result["list"][0].device.type == "cpu" + assert result["list"][0].dtype == torch.float64 + assert torch.all( + result["list"][0].eq(torch.tensor([4.0, 5.0], dtype=torch.float64)) + ) + assert result["list"][1] == "string" + + assert isinstance(result["nested"], dict) + assert isinstance(result["nested"]["tensor"], torch.Tensor) + assert result["nested"]["tensor"].device.type == "cpu" + assert result["nested"]["tensor"].dtype == torch.float64 + assert torch.all( + result["nested"]["tensor"].eq( + torch.tensor([6.0, 7.0, 8.0], dtype=torch.float64) + ) + ) + + # Test with non-tensor types + non_tensor_data = {"string": "hello", "int": 42, "float": 3.14} + result = map_to(non_tensor_data, device="cpu", dtype=torch.float32) + assert result == non_tensor_data + + # Test with empty input + assert map_to({}, device="cpu", dtype=torch.float32) == {} + assert map_to([], device="cpu", dtype=torch.float32) == [] + + # Test error case: no device or dtype provided + with pytest.raises(AssertionError): + map_to(tensor) + + +def test_assert_no_nans(): + # Test with clean tensor + clean_tensor = torch.tensor([1.0, 2.0, 3.0]) + assert_no_nans(clean_tensor) # Should not raise + + # Test with tensor containing NaNs + nan_tensor = torch.tensor([1.0, float("nan"), 3.0]) + with pytest.raises(AssertionError, match="Tensor contains NaNs!"): + assert_no_nans(nan_tensor) + + # Test with numpy array + import numpy as np + + clean_array = np.array([1.0, 2.0, 3.0]) + assert_no_nans(clean_array) # Should not raise + + nan_array = np.array([1.0, np.nan, 3.0]) + with pytest.raises(AssertionError, match="Numpy array contains NaNs!"): + assert_no_nans(nan_array) + + # Test with float + clean_float = 1.0 + assert_no_nans(clean_float) # Should not raise + + nan_float = float("nan") + with pytest.raises(AssertionError, match="float is NaN!"): + assert_no_nans(nan_float) + + # Test with nested dictionary + clean_dict = { + "a": torch.tensor([1.0, 2.0]), + "b": {"c": np.array([3.0, 4.0])}, + "d": 5.0, + } + assert_no_nans(clean_dict) # Should not raise + + nan_dict = { + "a": torch.tensor([1.0, float("nan")]), + "b": {"c": torch.tensor([3.0, 4.0])}, + } + with pytest.raises(AssertionError, match=r"a: Tensor contains NaNs!"): + assert_no_nans(nan_dict) + + # Test with nested list/tuple + clean_list = [torch.tensor([1.0, 2.0]), (np.array([3.0, 4.0]),)] + assert_no_nans(clean_list) # Should not raise + + nan_list = [torch.tensor([1.0, 2.0]), (torch.tensor([float("nan"), 4.0]),)] + with pytest.raises(AssertionError, match=r"1.0: Tensor contains NaNs!"): + assert_no_nans(nan_list) + + # Test with fail_if_not_tensor=True + with pytest.raises(ValueError, match="Unsupported type"): + assert_no_nans(42, fail_if_not_tensor=True) + + # Test that integers don't raise error with fail_if_not_tensor=False + assert_no_nans(42) # Should not raise + + # Test custom error message + with pytest.raises(AssertionError, match="custom.a: Tensor contains NaNs!"): + assert_no_nans({"a": torch.tensor([1.0, float("nan")])}, msg="custom") + + +if __name__ == "__main__": + pytest.main(["-v", __file__]) diff --git a/tests/test_write_confidence.py b/tests/test_write_confidence.py new file mode 100644 index 0000000..d6b6a81 --- /dev/null +++ b/tests/test_write_confidence.py @@ -0,0 +1,192 @@ +from functools import partial +import pytest + +import numpy as np +import torch +from lightning.fabric import seed_everything + +from modelhub.chemical import ChemicalData as ChemData +from modelhub.chemical import initialize_chemdata +from modelhub.metrics.metric_utils import ( + find_bin_midpoints, + unbin_logits, +) +from modelhub.utils.predicted_error import compile_af3_confidence_outputs +from omegaconf import DictConfig + + +def test_compile_af3_confidence_outputs(): + L = 100 + init = partial(initialize_chemdata) + init() + + # Spoofing the outputs from the model + seed_everything(42) + outputs = { + "confidence": { + "rf2aa_seq": torch.randint(0, 21, (L,)), + "plddt_logits": torch.rand(2, L, ChemData().NHEAVY, 50), + "pae_logits": torch.rand(2, L, L, 64), + "pde_logits": torch.rand(2, L, L, 64), + "chain_iid_token_lvl": torch.randint(0, 10, (L,)).numpy(), + } + } + is_real_atom = ChemData().heavyatom_mask[outputs["confidence"]["rf2aa_seq"]] + outputs["confidence"]["is_real_atom"] = is_real_atom + + # Spoof the confidence loss Hydra configuration + cfg = DictConfig({ + "plddt": { + "weight": 1.0, + "n_bins": 50, + "max_value": 1.0, + }, + "pae": { + "weight": 1.0, + "n_bins": 64, + "max_value": 32, + }, + "pde": { + "weight": 1.0, + "n_bins": 64, + "max_value": 32, + }, + }) + + output = compile_af3_confidence_outputs( + plddt_logits=outputs["confidence"]["plddt_logits"], + pae_logits=outputs["confidence"]["pae_logits"], + pde_logits=outputs["confidence"]["pde_logits"], + chain_iid_token_lvl=outputs["confidence"]["chain_iid_token_lvl"], + is_real_atom=is_real_atom, + example_id="test", + confidence_loss_cfg=cfg + ) + + num_chains = len(np.unique(outputs["confidence"]["chain_iid_token_lvl"])) + num_interfaces = num_chains * (num_chains - 1) // 2 + num_batches = outputs["confidence"]["plddt_logits"].shape[0] + + df = output["confidence_df"] + + target_columns = [ + "example_id", + "chain_chainwise", + "chainwise_plddt", + "chainwise_pde", + "chainwise_pae", + "overall_plddt", + "overall_pde", + "overall_pae", + "batch_idx", + "chain_i_interface", + "chain_j_interface", + "pae_interface", + "pde_interface", + ] + assert df.columns.tolist() == target_columns, "Dataframe columns not set correctly" + assert df.shape == ( + num_batches * (num_interfaces + num_chains), + len(target_columns), + ), "Dataframe shape not set correctly" + + +def test_unbin_pae_logits(): + L = 100 + max_distance = 32 + n_bins = 64 + init = partial(initialize_chemdata) + init() + + seed_everything(42) + outputs = { + "confidence": { + "rf2aa_seq": torch.randint(0, 21, (L,)), + "plddt_logits": torch.rand(1, L, ChemData().NHEAVY, 50), + "pae_logits": torch.rand(1, L, L, 64), + "pde_logits": torch.rand(1, L, L, 64), + "chain_iid_token_lvl": torch.randint(0, 10, (L,)).numpy(), + } + } + is_real_atom = ChemData().heavyatom_mask[outputs["confidence"]["rf2aa_seq"]] + outputs["confidence"]["is_real_atom"] = is_real_atom + + pae_unbinned = unbin_logits( + outputs["confidence"]["pae_logits"].permute(0, 3, 1, 2).float(), + max_distance=max_distance, + num_bins=n_bins, + ) + + assert torch.allclose(torch.mean(pae_unbinned), torch.tensor(15.99), atol=1e-2) + assert pae_unbinned.shape == (1, L, L) + + +def test_unbin_pde_logits(): + L = 100 + max_distance = 32 + n_bins = 64 + init = partial(initialize_chemdata) + init() + + seed_everything(42) + outputs = { + "confidence": { + "rf2aa_seq": torch.randint(0, 21, (L,)), + "plddt_logits": torch.rand(1, L, ChemData().NHEAVY, 50), + "pae_logits": torch.rand(1, L, L, 64), + "pde_logits": torch.rand(1, L, L, 64), + "chain_iid_token_lvl": torch.randint(0, 10, (L,)).numpy(), + } + } + is_real_atom = ChemData().heavyatom_mask[outputs["confidence"]["rf2aa_seq"]] + outputs["confidence"]["is_real_atom"] = is_real_atom + + pde_unbinned = unbin_logits( + outputs["confidence"]["pae_logits"].permute(0, 3, 1, 2).float(), + max_distance=max_distance, + num_bins=n_bins, + ) + + assert torch.allclose(torch.mean(pde_unbinned), torch.tensor(16.00), atol=1e-2) + + assert pde_unbinned.shape == (1, L, L) + + +def test_unbin_plddt_logits(): + L = 100 + max_distance = 1.0 + n_bins = 50 + init = partial(initialize_chemdata) + init() + + seed_everything(42) + outputs = { + "confidence": { + "rf2aa_seq": torch.randint(0, 21, (L,)), + "plddt_logits": torch.rand(1, L, ChemData().NHEAVY, 50), + "pae_logits": torch.rand(1, L, L, 64), + "pde_logits": torch.rand(1, L, L, 64), + "chain_iid_token_lvl": torch.randint(0, 10, (L,)).numpy(), + } + } + is_real_atom = ChemData().heavyatom_mask[outputs["confidence"]["rf2aa_seq"]] + outputs["confidence"]["is_real_atom"] = is_real_atom + + plddt_unbinned = unbin_logits( + outputs["confidence"]["plddt_logits"].permute(0, 3, 1, 2).float(), + max_distance, + n_bins, + ) + + assert plddt_unbinned.shape == (1, L, ChemData().NHEAVY) + + +def test_bin_midpoints(): + max_distance = 32 + num_bins = 64 + expected_bins = torch.linspace(0.25, 31.75, 64, device="cpu") + pae_bins = find_bin_midpoints(max_distance, num_bins) + assert torch.allclose(pae_bins, expected_bins) + +if __name__ == "__main__": + pytest.main([__file__])