Files
esm/tools/predict.ipynb
2025-01-16 10:34:33 -08:00

149 lines
5.2 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "wO0XaARp1Ghc"
},
"source": [
"# ESM3 Prediction Notebook\n",
"\n",
"This notebook is intended to be used as a tool for quick and easy protein property prediction using the ESM3 model.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cellView": "form",
"id": "0zITyTcwKK2o"
},
"outputs": [],
"source": [
"# @title Input API keys, then hit `Runtime` -> `Run all`\n",
"# @markdown Our hosted service that provides access to the full suite of ESM3 models.\n",
"# @markdown To utilize the Forge API, users must first agree to the [Terms of Service](https://forge.evolutionaryscale.ai/termsofservice) and generate an access token via the [Forge console](https://forge.evolutionaryscale.ai/console).\n",
"# @markdown The console also provides a comprehensive list of models available to each user.\n",
"\n",
"import os\n",
"\n",
"# @markdown ### Authentication\n",
"# @markdown Paste your token from the [Forge console](https://forge.evolutionaryscale.ai/console)\n",
"forge_token = \"\" # @param {type:\"string\"}\n",
"os.environ[\"ESM_API_KEY\"] = forge_token\n",
"\n",
"# @markdown ### Model Selection\n",
"# @markdown Enter the model name from the [Forge console page](https://forge.evolutionaryscale.ai/console) that you would like to use:\n",
"model_name = \"esm3-medium-2024-08\" # @param {type:\"string\"}\n",
"\n",
"# markdown ### Sequence\n",
"# @markdown Please use '|' to delimit a multimer sequence.\n",
"sequence = \"MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKPLSVSYDQATSLRILNNGHAFNVEFDDSQDKAVLKGGPLDGTYRLIQFHFHWGSLDGQGSEHTVDKKKYAAELHLVHWNTKYGDFGKAVQQPDGLAVLGIFLKVGSAKPGLQKVVDVLDSIKTKGKSADFTNFDPRGLLPESLDYWTYPGSLTTPPLLECVTWIVLKEPISVSSEQVLKFRKLNFNGEGEPEELMVDNWRPAQPLKNRQIKASFK\" # @param {type:\"string\"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cellView": "form",
"id": "CryS18DaKgjP"
},
"outputs": [],
"source": [
"# @title Install dependencies\n",
"import os\n",
"\n",
"os.system(\"pip install git+https://github.com/evolutionaryscale/esm\")\n",
"os.system(\"pip install pydssp pygtrie dna-features-viewer py3dmol ipywidgets\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cellView": "form",
"id": "ej6cllESKj5S"
},
"outputs": [],
"source": [
"# @title Run Prediction and Display Results\n",
"from esm.sdk.api import ESMProtein, ESMProteinError, GenerationConfig\n",
"from esm.widgets.components.results_visualizer import create_results_visualizer\n",
"from esm.widgets.utils.clients import get_forge_client\n",
"from ipywidgets import widgets\n",
"\n",
"# Initialize client\n",
"client = get_forge_client(model_name)\n",
"\n",
"# Create protein object\n",
"protein = ESMProtein(sequence=sequence)\n",
"\n",
"# Predict all tracks\n",
"tracks = [\"structure\", \"secondary_structure\", \"sasa\", \"function\"]\n",
"\n",
"output = widgets.Output()\n",
"display(output)\n",
"with output:\n",
" print(\"Starting predictions...\")\n",
"\n",
" for track in tracks:\n",
" print(f\"Predicting {track}...\")\n",
" protein = client.generate(\n",
" protein, config=GenerationConfig(track=track, temperature=0.01)\n",
" )\n",
" if isinstance(protein, ESMProteinError):\n",
" raise RuntimeError(f\"Error: {str(protein)}\")\n",
"\n",
" # Create result visualizers\n",
" structure_results = create_results_visualizer(\n",
" modality=\"structure\", samples=[protein], items_per_page=1, include_title=False\n",
" )\n",
"\n",
" secondary_structure_results = create_results_visualizer(\n",
" modality=\"secondary_structure\",\n",
" samples=[protein],\n",
" items_per_page=1,\n",
" include_title=False,\n",
" )\n",
"\n",
" sasa_results = create_results_visualizer(\n",
" modality=\"sasa\", samples=[protein], items_per_page=1, include_title=False\n",
" )\n",
"\n",
" function_results = create_results_visualizer(\n",
" modality=\"function\", samples=[protein], items_per_page=1, include_title=False\n",
" )\n",
"\n",
" output.clear_output(wait=True)\n",
"\n",
" # Create tabbed interface\n",
" results_ui = widgets.Tab(\n",
" children=[\n",
" structure_results,\n",
" secondary_structure_results,\n",
" sasa_results,\n",
" function_results,\n",
" ]\n",
" )\n",
" results_ui.set_title(0, \"Structure\")\n",
" results_ui.set_title(1, \"Secondary Structure\")\n",
" results_ui.set_title(2, \"SASA\")\n",
" results_ui.set_title(3, \"Function\")\n",
" display(results_ui)"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}