diff --git a/jupyter/requisite_internal_values.ipynb b/jupyter/requisite_internal_values.ipynb index 880fee8..9329bd0 100644 --- a/jupyter/requisite_internal_values.ipynb +++ b/jupyter/requisite_internal_values.ipynb @@ -11,16 +11,28 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 179, "metadata": {}, "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "244d2364e3bf4bfc89c748dc0e4e2060", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "text/plain": [ "PosixPath('/home/t-kevinwu/protdiff/data')" ] }, - "execution_count": 1, + "execution_count": 179, "metadata": {}, "output_type": "execute_result" } @@ -37,9 +49,11 @@ "import json\n", "\n", "import numpy as np\n", + "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import py3Dmol\n", + "import nglview as nv\n", "\n", "import torch\n", "\n", @@ -81,51 +95,18 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 208, "metadata": {}, "outputs": [ { "data": { - "application/3dmoljs_load.v0": "
\n

You appear to be running in JupyterLab (or JavaScript failed to load for some other reason). You need to install the 3dmol extension:
\n jupyter labextension install jupyterlab_3dmol

\n
\n", - "text/html": [ - "
\n", - "

You appear to be running in JupyterLab (or JavaScript failed to load for some other reason). You need to install the 3dmol extension:
\n", - " jupyter labextension install jupyterlab_3dmol

\n", - "
\n", - "" + "application/vnd.jupyter.widget-view+json": { + "model_id": "84a026ef8fbf46b797d3cfe110f45008", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "NGLWidget()" ] }, "metadata": {}, @@ -133,7 +114,7 @@ } ], "source": [ - "def view_pdb(fname:str):\n", + "def view_pdb_py3dmol(fname:str):\n", " \"\"\"\n", " View a PDB file in a Jupyter notebook\n", " See: https://william-dawson.github.io/using-py3dmol.html\n", @@ -149,21 +130,29 @@ " view.zoomTo()\n", " view.show()\n", "\n", - "view_pdb(sample_structures[0])" + "def view_pdb(fname:str, save_to:str=\"\"):\n", + " \"\"\"View the PDB file\"\"\"\n", + " view = nv.show_file(str(fname))\n", + " if save_to:\n", + " view.download_image(save_to, factor=10, trim=True, transparent=True)\n", + " return view\n", + "\n", + "view = view_pdb(sample_structures[0])\n", + "view.display()" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 182, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 4, + "execution_count": 182, "metadata": {}, "output_type": "execute_result" } @@ -175,73 +164,29 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 211, "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "/home/t-kevinwu/miniconda3/envs/protdiff/lib/python3.8/site-packages/biotite/structure/util.py:47: RuntimeWarning: invalid value encountered in true_divide\n", - " v /= factor[..., np.newaxis]\n" + "1.0\n" ] }, { "data": { - "application/3dmoljs_load.v0": "
\n

You appear to be running in JupyterLab (or JavaScript failed to load for some other reason). You need to install the 3dmol extension:
\n jupyter labextension install jupyterlab_3dmol

\n
\n", - "text/html": [ - "
\n", - "

You appear to be running in JupyterLab (or JavaScript failed to load for some other reason). You need to install the 3dmol extension:
\n", - " jupyter labextension install jupyterlab_3dmol

\n", - "
\n", - "" + "application/vnd.jupyter.widget-view+json": { + "model_id": "9a1567c7a84f4c429e8ae6a4b788dfb4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "NGLWidget()" ] }, "metadata": {}, "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "1.0" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -270,23 +215,26 @@ " return -1.0\n", " score = tmalign.run_tmalign(fname, out_fname)\n", " # angles_new = ac.canonical_distances_and_dihedrals(out_fname, distances=dists_to_use, angles=angles_to_use)\n", + " view = None\n", " if visualize:\n", - " view_pdb(out_fname)\n", - " return score\n", + " view = view_pdb(out_fname)\n", + " return score, view\n", "\n", - "test_consistency(sample_structures[0], visualize=True)" + "score, view = test_consistency(sample_structures[0], visualize=True)\n", + "print(score)\n", + "view.display()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Define the training dataset and look at reconstruction within there" + "Generate a folding visual example" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 200, "metadata": {}, "outputs": [ { @@ -295,13 +243,15 @@ "24316" ] }, - "execution_count": 6, + "execution_count": 200, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "train_dset = datasets.CathCanonicalAnglesDataset(\n", + "importlib.reload(datasets)\n", + "\n", + "train_dset = datasets.CathCanonicalAnglesOnlyDataset(\n", " split='train',\n", " zero_center=True,\n", ")\n", @@ -310,7 +260,187 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 201, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'NoisedAnglesDataset wrapping with 24316 examples with linear-1000 with variance scales 1.0 and 1.0'" + ] + }, + "execution_count": 201, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_dset_noised = datasets.NoisedAnglesDataset(\n", + " train_dset,\n", + " timesteps=1000,\n", + ")\n", + "str(train_dset_noised)" + ] + }, + { + "cell_type": "code", + "execution_count": 206, + "metadata": {}, + "outputs": [ + { + "data": { + "application/3dmoljs_load.v0": "
\n

You appear to be running in JupyterLab (or JavaScript failed to load for some other reason). You need to install the 3dmol extension:
\n jupyter labextension install jupyterlab_3dmol

\n
\n", + "text/html": [ + "
\n", + "

You appear to be running in JupyterLab (or JavaScript failed to load for some other reason). You need to install the 3dmol extension:
\n", + " jupyter labextension install jupyterlab_3dmol

\n", + "
\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "view_pdb_py3dmol(train_dset.filenames[1])" + ] + }, + { + "cell_type": "code", + "execution_count": 209, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " phi psi omega tau CA:C:1N C:1N:1CA\n", + "0 0.214564 0.619860 -0.825960 0.163557 -2.001070 1.202426\n", + "1 1.916025 0.795149 1.044090 0.514331 0.411796 -0.836702\n", + "2 0.328684 -2.189758 -1.151084 -0.780847 -2.130538 -0.187419\n", + "3 1.735282 -0.728455 0.246050 1.023964 1.427071 -1.246192\n", + "4 -0.910184 -0.558096 -0.854113 -1.461113 -1.386239 -1.062923\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "bf64b4384cb94118b52a19168b7ef788", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "NGLWidget()" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from curses import savetty\n", + "\n", + "\n", + "importlib.reload(ac)\n", + "\n", + "def visualize_training_example(i: int = 0, timestep: int = 0, struct_png: str = \"\"):\n", + " \"\"\"Visualize the training example\"\"\"\n", + " # Keys ['angles', 'attn_mask', 'position_ids', 'corrupted', 't', 'known_noise']\n", + " item = train_dset_noised.__getitem__(i, use_t_val=timestep, ignore_zero_center=True)\n", + " assert item['t'].item() == timestep\n", + " attn_idx = torch.where(item['attn_mask'])[0]\n", + "\n", + " angles = item['corrupted'][attn_idx].cpu().numpy()\n", + " angles_df = pd.DataFrame(angles, columns=train_dset_noised.feature_names['angles'])\n", + " print(angles_df.head())\n", + "\n", + " with tempfile.TemporaryDirectory() as tempdir:\n", + " fname = ac.create_new_chain_nerf(os.path.join(tempdir, \"temp.pdb\"), angles_df)\n", + " return view_pdb(fname, save_to=struct_png)\n", + "\n", + "visualize_training_example(i=1, timestep=999, struct_png=\"../plots/pdb_structures/noising_visualization/fully_noised.png\").display()" + ] + }, + { + "cell_type": "code", + "execution_count": 210, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " phi psi omega tau CA:C:1N C:1N:1CA\n", + "0 0.000473 2.648319 -3.130228 1.945967 2.073573 2.084656\n", + "1 -2.483263 2.623559 3.013693 1.821129 2.000026 2.055767\n", + "2 -1.099348 2.334412 -3.131352 1.971211 2.031854 2.103379\n", + "3 1.804200 -0.285191 3.115355 1.917383 2.061199 2.093353\n", + "4 -1.107148 2.447790 2.995001 1.845528 1.983448 2.066751\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b775ab911ea7469894f9dc587696ce6f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "NGLWidget()" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "visualize_training_example(i=1, timestep=0, struct_png=\"../plots/pdb_structures/noising_visualization/clean.png\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Look at reconstruction within training examples" + ] + }, + { + "cell_type": "code", + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -324,8 +454,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "WARNING:root:Found NaN values, not writing pdb file /tmp/tmpk1lizxf3/rebuilt_3pblB01\n", - "WARNING:root:Found NaN values, not writing pdb file /tmp/tmp5awd98ep/rebuilt_1nriA00\n" + "WARNING:root:Found NaN values, not writing pdb file /tmp/tmp64khjebq/rebuilt_3pblB01\n", + "WARNING:root:Found NaN values, not writing pdb file /tmp/tmpkgpn4_39/rebuilt_1nriA00\n" ] }, { @@ -339,8 +469,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "WARNING:root:Found NaN values, not writing pdb file /tmp/tmptyq51_le/rebuilt_3pblB01\n", - "WARNING:root:Found NaN values, not writing pdb file /tmp/tmp0lht0djw/rebuilt_1nriA00\n" + "WARNING:root:Found NaN values, not writing pdb file /tmp/tmpxy7atgoq/rebuilt_3pblB01\n", + "WARNING:root:Found NaN values, not writing pdb file /tmp/tmptbeez9ea/rebuilt_1nriA00\n" ] }, { @@ -354,8 +484,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "WARNING:root:Found NaN values, not writing pdb file /tmp/tmphk1_pte_/rebuilt_3pblB01\n", - "WARNING:root:Found NaN values, not writing pdb file /tmp/tmp4wd9q902/rebuilt_1nriA00\n" + "WARNING:root:Found NaN values, not writing pdb file /tmp/tmp874w94to/rebuilt_3pblB01\n", + "WARNING:root:Found NaN values, not writing pdb file /tmp/tmpypazeyty/rebuilt_1nriA00\n" ] }, { @@ -369,8 +499,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "WARNING:root:Found NaN values, not writing pdb file /tmp/tmp5wf4nzxv/rebuilt_3pblB01\n", - "WARNING:root:Found NaN values, not writing pdb file /tmp/tmphagj4u1w/rebuilt_1nriA00\n" + "WARNING:root:Found NaN values, not writing pdb file /tmp/tmp7e69i8hz/rebuilt_3pblB01\n", + "WARNING:root:Found NaN values, not writing pdb file /tmp/tmpym7bfxsw/rebuilt_1nriA00\n" ] }, { @@ -384,8 +514,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "WARNING:root:Found NaN values, not writing pdb file /tmp/tmpc50kzrh7/rebuilt_3pblB01\n", - "WARNING:root:Found NaN values, not writing pdb file /tmp/tmp0_bu64yu/rebuilt_1nriA00\n" + "WARNING:root:Found NaN values, not writing pdb file /tmp/tmp2cvfequ3/rebuilt_3pblB01\n", + "WARNING:root:Found NaN values, not writing pdb file /tmp/tmpvmu3x6vn/rebuilt_1nriA00\n" ] }, { @@ -399,8 +529,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "WARNING:root:Found NaN values, not writing pdb file /tmp/tmpmtg9rejc/rebuilt_3pblB01\n", - "WARNING:root:Found NaN values, not writing pdb file /tmp/tmp3iwl2qlg/rebuilt_1nriA00\n" + "WARNING:root:Found NaN values, not writing pdb file /tmp/tmp1k8nxx_u/rebuilt_3pblB01\n", + "WARNING:root:Found NaN values, not writing pdb file /tmp/tmprkn8xi7v/rebuilt_1nriA00\n" ] } ], @@ -443,7 +573,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 10, "metadata": {}, "outputs": [ { diff --git a/plots/pdb_structures/noising_visualization/clean.png b/plots/pdb_structures/noising_visualization/clean.png new file mode 100644 index 0000000..f8da22d Binary files /dev/null and b/plots/pdb_structures/noising_visualization/clean.png differ diff --git a/plots/pdb_structures/noising_visualization/fully_noised.png b/plots/pdb_structures/noising_visualization/fully_noised.png new file mode 100644 index 0000000..2238264 Binary files /dev/null and b/plots/pdb_structures/noising_visualization/fully_noised.png differ