D-SCRIPT/dscript/models/testconv.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 80,
   "id": "aaaf2d91-798a-4f07-95e4-40eb86e202b8",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The autoreload extension is already loaded. To reload it, use:\n",
      "  %reload_ext autoreload\n"
     ]
    }
   ],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "id": "e5d8f711-2b12-4bee-9a43-bc0dd9308388",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "from tqdm import tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "id": "3cb89c82-23ac-4825-9aeb-fe51cbe91e8e",
   "metadata": {},
   "outputs": [],
   "source": [
    "from conv import ProteinConv\n",
    "from pool import ProteinMaxPool\n",
    "from concat import ProteinConcat\n",
    "class ProteinModel(nn.Module):\n",
    "    def __init__(self, input_dim, dropout = 0.2, activation = \"sigmoid\", stride = 2):\n",
    "        super(ProteinModel, self).__init__()\n",
    "        \n",
    "        activations = {\n",
    "        'relu': F.relu,\n",
    "        'sigmoid': F.sigmoid,\n",
    "        'tanh': torch.tanh\n",
    "        }\n",
    "        \n",
    "        self.activation = activations[activation]\n",
    "        \n",
    "        self.l1 = nn.Linear(input_dim, 100)\n",
    "        self.pconv1 = ProteinConv(no_filters=50, no_dims=100, no_channels=1, window_size=10, dropout_p = dropout, stride = stride, activation = activation)\n",
    "        self.pool1  = ProteinMaxPool(3)\n",
    "        \n",
    "        self.l2 = nn.Linear(100, 50)\n",
    "        self.pconcat = ProteinConcat(no_dims=50, no_channels=50, window_size=5, op_size=20, stride = stride, dropout_p = 0.2, activation = activation)\n",
    "        \n",
    "        self.outlayer = nn.Linear(20, 2)\n",
    "        self.softlayer = nn.Softmax(dim=1)\n",
    "        self.input_dim = input_dim\n",
    "        \n",
    "        \n",
    "    def forward(self, p1, p2):\n",
    "        \n",
    "        N1, H1, C1, D1 = p1.shape\n",
    "        N2, H2, C2, D2 = p2.shape\n",
    "        \n",
    "        assert (N1, C1, D1) == (N2, C2, D2)\n",
    "        assert D1 == self.input_dim\n",
    "\n",
    "        p1 = self.l1(p1)\n",
    "        p2 = self.l1(p2)\n",
    "        p1, p2 = self.pconv1(p1, p2)\n",
    "        p1     = self.pool1(self.activation(p1))\n",
    "        p2     = self.pool1(self.activation(p2))\n",
    "        \n",
    "        p1 = self.l2(p1)\n",
    "        p2 = self.l2(p2)\n",
    "        pout = self.pconcat(p1, p2)\n",
    "        pout = self.softlayer(self.outlayer(pout))\n",
    "        return pout"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "id": "58dafbd2-979d-4ddb-9848-d16c2e2de6a9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2400, 1096)"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import h5py\n",
    "from torch.utils.data import Dataset, DataLoader\n",
    "import pandas as pd\n",
    "\n",
    "embfile = \"/afs/csail.mit.edu/u/k/kdevko01/coip-vs-y2h-folder/data/networks/dscript-tt/y2h-coip.h5\"\n",
    "f = h5py.File(embfile)\n",
    "\n",
    "trainfile = \"/afs/csail.mit.edu/u/k/kdevko01/coip-vs-y2h-folder/data/networks/dscript-tt/coip_train.tsv\"\n",
    "dtr = pd.read_csv(trainfile, sep = \"\\t\", header = None)\n",
    "\n",
    "dtrain = pd.concat([dtr[dtr[2] == 1].sample(n=200), dtr[dtr[2] == 0].sample(n=1000)])\n",
    "dval   = pd.concat([dtr[dtr[2] == 1].sample(n=50), dtr[dtr[2] == 0].sample(n=500)])\n",
    "dval   = dval.drop(set(dval.index).intersection(set(dtrain.index)))\n",
    "\n",
    "dtrain = pd.concat([dtrain, dtrain.loc[:, [1, 0, 2]]]).reset_index(drop = True)\n",
    "dval = pd.concat([dval, dval.loc[:, [1, 0, 2]]]).reset_index(drop = True)\n",
    " \n",
    "len(dtrain), len(dval)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "id": "8803be38-395b-4576-8e14-a75509784fdf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2200"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "testfile = \"/afs/csail.mit.edu/u/k/kdevko01/coip-vs-y2h-folder/data/networks/dscript-tt/coip_test.tsv\"\n",
    "dtest = pd.read_csv(testfile, sep = \"\\t\", header = None)\n",
    "dtest = pd.concat([dtest[dtest[2] == 1].sample(n=100), dtest[dtest[2] == 0].sample(n=1000)])\n",
    "dtest = pd.concat([dtest, dtest.loc[:, [1, 0, 2]]]).reset_index(drop = True)\n",
    "len(dtest)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "id": "01e929bb-f65d-4bd4-a74c-83cb9b2bc95b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "class ProtDataset(Dataset):\n",
    "    def __init__(self, df, h5data, min_seqlen=75):\n",
    "        self.df = df\n",
    "        self.h5 = h5data\n",
    "        self.min_seqlen = min_seqlen\n",
    "    \n",
    "    def __len__(self):\n",
    "        return len(self.df)\n",
    "    \n",
    "    def __getitem__(self, id):\n",
    "        p, q, w = self.df.iloc[id, :].values\n",
    "        p1, p2, w = torch.tensor(np.array(self.h5[p]), dtype = torch.float32).squeeze(0).unsqueeze(1), torch.tensor(np.array(self.h5[q]), dtype = torch.float32).squeeze(0).unsqueeze(1), torch.tensor(w, dtype = torch.long)\n",
    "        \n",
    "        dim = p1.shape[2]\n",
    "        \n",
    "        p1seqlen = p1.shape[0]\n",
    "        p2seqlen = p2.shape[0]\n",
    "        if p1seqlen < self.min_seqlen:\n",
    "            p1 = torch.cat([p1, torch.zeros(self.min_seqlen - p1seqlen, 1, dim, dtype = torch.float32)], dim = 0)\n",
    "        if p2seqlen < self.min_seqlen:\n",
    "            p2 = torch.cat([p2, torch.zeros(self.min_seqlen - p2seqlen, 1, dim, dtype = torch.float32)], dim = 0)\n",
    "        return p1, p2, w\n",
    "        \n",
    "trdata = ProtDataset(dtrain, f)\n",
    "tedata = ProtDataset(dtest, f)\n",
    "valdata = ProtDataset(dval, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "id": "b2aa61ac-ba1d-4b07-80b1-60b1c367fd6e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([754, 1, 6165])"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p, q, w = trdata[0]\n",
    "p.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "id": "cc0cd75e-cbf3-4121-af31-3ffa978f09ba",
   "metadata": {},
   "outputs": [],
   "source": [
    "dev = torch.device(\"cuda:7\")\n",
    "lr = 0.1\n",
    "no_ep = 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "id": "b261ddb5-a720-42d8-b249-c8b9a9060569",
   "metadata": {},
   "outputs": [],
   "source": [
    "trloader = DataLoader(trdata, batch_size = 1, shuffle = True)\n",
    "valloader = DataLoader(valdata, batch_size = 1, shuffle = True)\n",
    "model = ProteinModel(6165)\n",
    "model = model.to(dev)\n",
    "opt = torch.optim.Adam(model.parameters(), lr = lr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "id": "eac600fa-525d-4e50-a2c0-0be94b3f36ea",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|                                                                                                                                                                                                              | 0/2400 [00:00<?, ?it/s]/scratch2/kdevko01/conda/.conda/envs/dscript/lib/python3.7/site-packages/torch/nn/functional.py:1960: UserWarning: nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.\n",
      "  warnings.warn(\"nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.\")\n",
      "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2400/2400 [01:18<00:00, 30.75it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1: Training Loss : 0.48005625932166973: AUPR: 0.09124087591240876\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|                                                                                                                                                                                                              | 0/2400 [00:00<?, ?it/s]/scratch2/kdevko01/conda/.conda/envs/dscript/lib/python3.7/site-packages/torch/nn/functional.py:1960: UserWarning: nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.\n",
      "  warnings.warn(\"nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.\")\n",
      "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2400/2400 [01:18<00:00, 30.68it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 2: Training Loss : 0.47992831965287525: AUPR: 0.09124087591240876\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|                                                                                                                                                                                                              | 0/2400 [00:00<?, ?it/s]/scratch2/kdevko01/conda/.conda/envs/dscript/lib/python3.7/site-packages/torch/nn/functional.py:1960: UserWarning: nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.\n",
      "  warnings.warn(\"nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.\")\n",
      "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2400/2400 [01:18<00:00, 30.42it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 3: Training Loss : 0.47992831965287525: AUPR: 0.09124087591240876\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|                                                                                                                                                                                                              | 0/2400 [00:00<?, ?it/s]/scratch2/kdevko01/conda/.conda/envs/dscript/lib/python3.7/site-packages/torch/nn/functional.py:1960: UserWarning: nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.\n",
      "  warnings.warn(\"nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.\")\n",
      "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2400/2400 [01:17<00:00, 30.89it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 4: Training Loss : 0.47992831965287525: AUPR: 0.09124087591240876\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|                                                                                                                                                                                                              | 0/2400 [00:00<?, ?it/s]/scratch2/kdevko01/conda/.conda/envs/dscript/lib/python3.7/site-packages/torch/nn/functional.py:1960: UserWarning: nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.\n",
      "  warnings.warn(\"nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.\")\n",
      "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2400/2400 [01:20<00:00, 29.71it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 5: Training Loss : 0.4803449863071243: AUPR: 0.09124087591240876\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import average_precision_score\n",
    "\n",
    "lossfn = torch.nn.CrossEntropyLoss()\n",
    "\n",
    "def compute_aupr(op, target):\n",
    "    if isinstance(op, torch.Tensor):\n",
    "        op = op.numpy()\n",
    "    if isinstance(target, torch.Tensor):\n",
    "        target = target.numpy()\n",
    "    \n",
    "    op = np.argmax(op, axis = 1)\n",
    "    return average_precision_score(op, target)\n",
    "    \n",
    "\n",
    "for ep in range(no_ep):\n",
    "    running_loss = 0\n",
    "    for i, data in enumerate(tqdm(trloader)):\n",
    "        ps, qs, wt = data\n",
    "        ps = ps.to(dev)\n",
    "        qs = qs.to(dev)\n",
    "        wt = wt.to(dev)\n",
    "        opt.zero_grad()\n",
    "        out = model(ps, qs)\n",
    "        loss = lossfn(out, wt)\n",
    "        loss.backward()\n",
    "        opt.step()\n",
    "        \n",
    "        if dev.type == \"cuda\":\n",
    "            ps = ps.to(\"cpu\")\n",
    "            qs = qs.to(\"cpu\")\n",
    "            wt = wt.to(\"cpu\")\n",
    "            loss = loss.to(\"cpu\")\n",
    "        running_loss += loss.item()\n",
    "    with torch.no_grad():\n",
    "        val_loss = 0\n",
    "        results  = []\n",
    "        targets  = []\n",
    "        for j, data in enumerate(valloader):\n",
    "            ps, qs, wt = data\n",
    "            ps = ps.to(dev)\n",
    "            qs = qs.to(dev)\n",
    "            wt = wt.to(dev)\n",
    "            out = model(ps, qs)\n",
    "            loss = lossfn(out, wt)\n",
    "        \n",
    "            if dev.type == \"cuda\":\n",
    "                ps = ps.to(\"cpu\")\n",
    "                qs = qs.to(\"cpu\")\n",
    "                wt = wt.to(\"cpu\")\n",
    "                loss = loss.to(\"cpu\")\n",
    "                out = out.to(\"cpu\")\n",
    "            val_loss += loss.item()\n",
    "            results += out.numpy()[:, 1].tolist()\n",
    "            targets += wt.numpy().tolist()\n",
    "        auprval = average_precision_score(targets, results)\n",
    "    print(f\"Epoch {ep+1}: Training Loss : {running_loss / (i+1)}: AUPR: {auprval}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "058c1509-60ca-4cd5-ad1d-dc850bb49140",
   "metadata": {},
   "outputs": [],
   "source": [
    "p1, p2, w = next(iter(trloader))\n",
    "p1.shape,p2.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "f4b0bde3-7856-4f44-b946-de2e9c2f4c9c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([2, 6, 3, 4])"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "x = torch.randn(2, 15, 3, dtype = torch.float32)\n",
    "x.unfold(1, 4, 2).shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "id": "6e874169-e8b9-4a21-89bd-61c42f8f2dfe",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "no_batch, no_height = 2, 20\n",
    "no_dims, no_channels, window_size, op_size = 10, 4, 5, 20\n",
    "\n",
    "p1 = torch.randn(no_batch, no_height, no_channels, no_dims, dtype = torch.float32)\n",
    "p2 = torch.randn(no_batch, no_height, no_channels, no_dims, dtype = torch.float32)\n",
    "\n",
    "pconcat = ProteinConcat(no_dims, no_channels, window_size, op_size, stride = 1, dropout_p = 0.2, activation = \"tanh\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "id": "1abb2033-35a0-432f-8dca-b6fad6d5cb23",
   "metadata": {},
   "outputs": [],
   "source": [
    "from conv import ProteinConv\n",
    "no_filters = 3\n",
    "pconv = ProteinConv(no_filters, no_dims, no_channels, window_size, stride = 1, dropout_p = 0.2, activation = \"tanh\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "id": "c9766858-42eb-4883-97bf-25ae0faf92a9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([2, 16, 1, 4, 10, 5])\n"
     ]
    }
   ],
   "source": [
    "o1, o2 = pconv(p1, p2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "id": "780e07e0-2cb8-49e2-bfcf-6611bb1194dd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(torch.Size([2, 16, 3, 10]), torch.Size([2, 16, 3, 10]))"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "o1.shape, o2.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "id": "ee7ba055-801e-4299-be74-5d7e07a71fea",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([2, 5, 4, 10])"
      ]
     },
     "execution_count": 94,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pool import ProteinMaxPool\n",
    "pool = ProteinMaxPool(4)\n",
    "o = pool(p1)\n",
    "o.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "id": "19104c5a-a4f4-43ad-a452-860094d0ed8b",
   "metadata": {},
   "outputs": [],
   "source": [
    "from Bio import SeqIO"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "id": "1cce5dd2-544b-4671-9a7f-186bd21a0319",
   "metadata": {},
   "outputs": [],
   "source": [
    "#!cd ..; ln -s /afs/csail.mit.edu/u/r/rsingh/work/corals/data-scratch1/STRING_foldseek_embeddings foldseek_emb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "id": "abad1c03-06e0-4d92-8ffe-a4c735d1da91",
   "metadata": {},
   "outputs": [],
   "source": [
    "fasta = SeqIO.parse(\"../foldseek_emb/r1_foldseekrep_seq.fa\", \"fasta\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "id": "a65b05cc-47bb-4cff-8166-6e013aa331f6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ID: 9606.ENSP00000386340\n",
      "Name: 9606.ENSP00000386340\n",
      "Description: 9606.ENSP00000386340 AF2:AF-P63255-F1-model_v2.pdb.gz 9606.ENSP00000386340\n",
      "Number of features: 0\n",
      "Seq('DFAQLQPRDDDDPVQWQQAPNGTHGQCQQAAPPPRHRDDRHQWDDDNSHTHGPP...DDD')\n"
     ]
    }
   ],
   "source": [
    "vocab = {}\n",
    "i = 0\n",
    "for rec in fasta:\n",
    "    print(rec)\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "id": "1b43ac2a-f3c3-47ae-97d1-638a7595add4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# import json\n",
    "# json.dump(vocab, open(\"../foldseek_vocab.json\", \"w\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "id": "2fc3d947-f7f7-44a4-b753-03a48209aa89",
   "metadata": {},
   "outputs": [
    {
     "ename": "NotImplementedError",
     "evalue": "SeqRecord comparison is deliberately not implemented. Explicitly compare the attributes of interest.",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNotImplementedError\u001b[0m                       Traceback (most recent call last)",
      "\u001b[0;32m/tmp/ipykernel_3218513/2270489430.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;34m\"9606.ENSP00000386340\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mfasta\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m/scratch2/kdevko01/conda/.conda/envs/dscript/lib/python3.7/site-packages/Bio/SeqRecord.py\u001b[0m in \u001b[0;36m__eq__\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m    792\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__eq__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    793\u001b[0m         \u001b[0;34m\"\"\"Define the equal-to operand (not implemented).\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 794\u001b[0;31m         \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_NO_SEQRECORD_COMPARISON\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    795\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    796\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__ne__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNotImplementedError\u001b[0m: SeqRecord comparison is deliberately not implemented. Explicitly compare the attributes of interest."
     ]
    }
   ],
   "source": [
    "\"9606.ENSP00000386340\" in fasta"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dac1e71f-4967-49da-9f7a-4e9d16853d38",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "dscript",
   "language": "python",
   "name": "dscript"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}