diff --git a/jupyter/requisite_internal_values.ipynb b/jupyter/requisite_internal_values.ipynb
index 880fee8..9329bd0 100644
--- a/jupyter/requisite_internal_values.ipynb
+++ b/jupyter/requisite_internal_values.ipynb
@@ -11,16 +11,28 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 179,
"metadata": {},
"outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "244d2364e3bf4bfc89c748dc0e4e2060",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": []
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
{
"data": {
"text/plain": [
"PosixPath('/home/t-kevinwu/protdiff/data')"
]
},
- "execution_count": 1,
+ "execution_count": 179,
"metadata": {},
"output_type": "execute_result"
}
@@ -37,9 +49,11 @@
"import json\n",
"\n",
"import numpy as np\n",
+ "import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"import py3Dmol\n",
+ "import nglview as nv\n",
"\n",
"import torch\n",
"\n",
@@ -81,51 +95,18 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 208,
"metadata": {},
"outputs": [
{
"data": {
- "application/3dmoljs_load.v0": "
\n
You appear to be running in JupyterLab (or JavaScript failed to load for some other reason). You need to install the 3dmol extension:
\n jupyter labextension install jupyterlab_3dmol
\n
\n",
- "text/html": [
- "\n",
- "
You appear to be running in JupyterLab (or JavaScript failed to load for some other reason). You need to install the 3dmol extension:
\n",
- " jupyter labextension install jupyterlab_3dmol
\n",
- "
\n",
- ""
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "84a026ef8fbf46b797d3cfe110f45008",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "NGLWidget()"
]
},
"metadata": {},
@@ -133,7 +114,7 @@
}
],
"source": [
- "def view_pdb(fname:str):\n",
+ "def view_pdb_py3dmol(fname:str):\n",
" \"\"\"\n",
" View a PDB file in a Jupyter notebook\n",
" See: https://william-dawson.github.io/using-py3dmol.html\n",
@@ -149,21 +130,29 @@
" view.zoomTo()\n",
" view.show()\n",
"\n",
- "view_pdb(sample_structures[0])"
+ "def view_pdb(fname:str, save_to:str=\"\"):\n",
+ " \"\"\"View the PDB file\"\"\"\n",
+ " view = nv.show_file(str(fname))\n",
+ " if save_to:\n",
+ " view.download_image(save_to, factor=10, trim=True, transparent=True)\n",
+ " return view\n",
+ "\n",
+ "view = view_pdb(sample_structures[0])\n",
+ "view.display()"
]
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 182,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 4,
+ "execution_count": 182,
"metadata": {},
"output_type": "execute_result"
}
@@ -175,73 +164,29 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 211,
"metadata": {},
"outputs": [
{
- "name": "stderr",
+ "name": "stdout",
"output_type": "stream",
"text": [
- "/home/t-kevinwu/miniconda3/envs/protdiff/lib/python3.8/site-packages/biotite/structure/util.py:47: RuntimeWarning: invalid value encountered in true_divide\n",
- " v /= factor[..., np.newaxis]\n"
+ "1.0\n"
]
},
{
"data": {
- "application/3dmoljs_load.v0": "\n
You appear to be running in JupyterLab (or JavaScript failed to load for some other reason). You need to install the 3dmol extension:
\n jupyter labextension install jupyterlab_3dmol
\n
\n",
- "text/html": [
- "\n",
- "
You appear to be running in JupyterLab (or JavaScript failed to load for some other reason). You need to install the 3dmol extension:
\n",
- " jupyter labextension install jupyterlab_3dmol
\n",
- "
\n",
- ""
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "9a1567c7a84f4c429e8ae6a4b788dfb4",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "NGLWidget()"
]
},
"metadata": {},
"output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "1.0"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
}
],
"source": [
@@ -270,23 +215,26 @@
" return -1.0\n",
" score = tmalign.run_tmalign(fname, out_fname)\n",
" # angles_new = ac.canonical_distances_and_dihedrals(out_fname, distances=dists_to_use, angles=angles_to_use)\n",
+ " view = None\n",
" if visualize:\n",
- " view_pdb(out_fname)\n",
- " return score\n",
+ " view = view_pdb(out_fname)\n",
+ " return score, view\n",
"\n",
- "test_consistency(sample_structures[0], visualize=True)"
+ "score, view = test_consistency(sample_structures[0], visualize=True)\n",
+ "print(score)\n",
+ "view.display()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "## Define the training dataset and look at reconstruction within there"
+ "Generate a folding visual example"
]
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 200,
"metadata": {},
"outputs": [
{
@@ -295,13 +243,15 @@
"24316"
]
},
- "execution_count": 6,
+ "execution_count": 200,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "train_dset = datasets.CathCanonicalAnglesDataset(\n",
+ "importlib.reload(datasets)\n",
+ "\n",
+ "train_dset = datasets.CathCanonicalAnglesOnlyDataset(\n",
" split='train',\n",
" zero_center=True,\n",
")\n",
@@ -310,7 +260,187 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 201,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'NoisedAnglesDataset wrapping with 24316 examples with linear-1000 with variance scales 1.0 and 1.0'"
+ ]
+ },
+ "execution_count": 201,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "train_dset_noised = datasets.NoisedAnglesDataset(\n",
+ " train_dset,\n",
+ " timesteps=1000,\n",
+ ")\n",
+ "str(train_dset_noised)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 206,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/3dmoljs_load.v0": "\n
You appear to be running in JupyterLab (or JavaScript failed to load for some other reason). You need to install the 3dmol extension:
\n jupyter labextension install jupyterlab_3dmol
\n
\n",
+ "text/html": [
+ "\n",
+ "
You appear to be running in JupyterLab (or JavaScript failed to load for some other reason). You need to install the 3dmol extension:
\n",
+ " jupyter labextension install jupyterlab_3dmol
\n",
+ "
\n",
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "view_pdb_py3dmol(train_dset.filenames[1])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 209,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " phi psi omega tau CA:C:1N C:1N:1CA\n",
+ "0 0.214564 0.619860 -0.825960 0.163557 -2.001070 1.202426\n",
+ "1 1.916025 0.795149 1.044090 0.514331 0.411796 -0.836702\n",
+ "2 0.328684 -2.189758 -1.151084 -0.780847 -2.130538 -0.187419\n",
+ "3 1.735282 -0.728455 0.246050 1.023964 1.427071 -1.246192\n",
+ "4 -0.910184 -0.558096 -0.854113 -1.461113 -1.386239 -1.062923\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "bf64b4384cb94118b52a19168b7ef788",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "NGLWidget()"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from curses import savetty\n",
+ "\n",
+ "\n",
+ "importlib.reload(ac)\n",
+ "\n",
+ "def visualize_training_example(i: int = 0, timestep: int = 0, struct_png: str = \"\"):\n",
+ " \"\"\"Visualize the training example\"\"\"\n",
+ " # Keys ['angles', 'attn_mask', 'position_ids', 'corrupted', 't', 'known_noise']\n",
+ " item = train_dset_noised.__getitem__(i, use_t_val=timestep, ignore_zero_center=True)\n",
+ " assert item['t'].item() == timestep\n",
+ " attn_idx = torch.where(item['attn_mask'])[0]\n",
+ "\n",
+ " angles = item['corrupted'][attn_idx].cpu().numpy()\n",
+ " angles_df = pd.DataFrame(angles, columns=train_dset_noised.feature_names['angles'])\n",
+ " print(angles_df.head())\n",
+ "\n",
+ " with tempfile.TemporaryDirectory() as tempdir:\n",
+ " fname = ac.create_new_chain_nerf(os.path.join(tempdir, \"temp.pdb\"), angles_df)\n",
+ " return view_pdb(fname, save_to=struct_png)\n",
+ "\n",
+ "visualize_training_example(i=1, timestep=999, struct_png=\"../plots/pdb_structures/noising_visualization/fully_noised.png\").display()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 210,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " phi psi omega tau CA:C:1N C:1N:1CA\n",
+ "0 0.000473 2.648319 -3.130228 1.945967 2.073573 2.084656\n",
+ "1 -2.483263 2.623559 3.013693 1.821129 2.000026 2.055767\n",
+ "2 -1.099348 2.334412 -3.131352 1.971211 2.031854 2.103379\n",
+ "3 1.804200 -0.285191 3.115355 1.917383 2.061199 2.093353\n",
+ "4 -1.107148 2.447790 2.995001 1.845528 1.983448 2.066751\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "b775ab911ea7469894f9dc587696ce6f",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "NGLWidget()"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "visualize_training_example(i=1, timestep=0, struct_png=\"../plots/pdb_structures/noising_visualization/clean.png\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Look at reconstruction within training examples"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
"metadata": {},
"outputs": [
{
@@ -324,8 +454,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "WARNING:root:Found NaN values, not writing pdb file /tmp/tmpk1lizxf3/rebuilt_3pblB01\n",
- "WARNING:root:Found NaN values, not writing pdb file /tmp/tmp5awd98ep/rebuilt_1nriA00\n"
+ "WARNING:root:Found NaN values, not writing pdb file /tmp/tmp64khjebq/rebuilt_3pblB01\n",
+ "WARNING:root:Found NaN values, not writing pdb file /tmp/tmpkgpn4_39/rebuilt_1nriA00\n"
]
},
{
@@ -339,8 +469,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "WARNING:root:Found NaN values, not writing pdb file /tmp/tmptyq51_le/rebuilt_3pblB01\n",
- "WARNING:root:Found NaN values, not writing pdb file /tmp/tmp0lht0djw/rebuilt_1nriA00\n"
+ "WARNING:root:Found NaN values, not writing pdb file /tmp/tmpxy7atgoq/rebuilt_3pblB01\n",
+ "WARNING:root:Found NaN values, not writing pdb file /tmp/tmptbeez9ea/rebuilt_1nriA00\n"
]
},
{
@@ -354,8 +484,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "WARNING:root:Found NaN values, not writing pdb file /tmp/tmphk1_pte_/rebuilt_3pblB01\n",
- "WARNING:root:Found NaN values, not writing pdb file /tmp/tmp4wd9q902/rebuilt_1nriA00\n"
+ "WARNING:root:Found NaN values, not writing pdb file /tmp/tmp874w94to/rebuilt_3pblB01\n",
+ "WARNING:root:Found NaN values, not writing pdb file /tmp/tmpypazeyty/rebuilt_1nriA00\n"
]
},
{
@@ -369,8 +499,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "WARNING:root:Found NaN values, not writing pdb file /tmp/tmp5wf4nzxv/rebuilt_3pblB01\n",
- "WARNING:root:Found NaN values, not writing pdb file /tmp/tmphagj4u1w/rebuilt_1nriA00\n"
+ "WARNING:root:Found NaN values, not writing pdb file /tmp/tmp7e69i8hz/rebuilt_3pblB01\n",
+ "WARNING:root:Found NaN values, not writing pdb file /tmp/tmpym7bfxsw/rebuilt_1nriA00\n"
]
},
{
@@ -384,8 +514,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "WARNING:root:Found NaN values, not writing pdb file /tmp/tmpc50kzrh7/rebuilt_3pblB01\n",
- "WARNING:root:Found NaN values, not writing pdb file /tmp/tmp0_bu64yu/rebuilt_1nriA00\n"
+ "WARNING:root:Found NaN values, not writing pdb file /tmp/tmp2cvfequ3/rebuilt_3pblB01\n",
+ "WARNING:root:Found NaN values, not writing pdb file /tmp/tmpvmu3x6vn/rebuilt_1nriA00\n"
]
},
{
@@ -399,8 +529,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "WARNING:root:Found NaN values, not writing pdb file /tmp/tmpmtg9rejc/rebuilt_3pblB01\n",
- "WARNING:root:Found NaN values, not writing pdb file /tmp/tmp3iwl2qlg/rebuilt_1nriA00\n"
+ "WARNING:root:Found NaN values, not writing pdb file /tmp/tmp1k8nxx_u/rebuilt_3pblB01\n",
+ "WARNING:root:Found NaN values, not writing pdb file /tmp/tmprkn8xi7v/rebuilt_1nriA00\n"
]
}
],
@@ -443,7 +573,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 10,
"metadata": {},
"outputs": [
{
diff --git a/plots/pdb_structures/noising_visualization/clean.png b/plots/pdb_structures/noising_visualization/clean.png
new file mode 100644
index 0000000..f8da22d
Binary files /dev/null and b/plots/pdb_structures/noising_visualization/clean.png differ
diff --git a/plots/pdb_structures/noising_visualization/fully_noised.png b/plots/pdb_structures/noising_visualization/fully_noised.png
new file mode 100644
index 0000000..2238264
Binary files /dev/null and b/plots/pdb_structures/noising_visualization/fully_noised.png differ