Merged LookupModal

2026-06-04 15:04:24 +08:00 · 2021-11-07 23:22:30 -05:00
parent 54f2df9847 1ed94e854e
commit af2c3a28a1
14 changed files with 187 additions and 20483 deletions
--- a/dscript/commands/train.py
+++ b/dscript/commands/train.py
@@ -5,6 +5,7 @@ Train a new model.
 import argparse
 import datetime
 import gzip as gz
+import os
 import subprocess as sp
 import sys

@@ -17,10 +18,8 @@ import torch.nn.functional as F
 import torch.optim as optim
 from sklearn.metrics import average_precision_score as average_precision
 from torch.autograd import Variable
-from torch.utils.data import DataLoader, IterableDataset
 from tqdm import tqdm

-import dscript
 from dscript.models.contact import ContactCNN
 from dscript.models.embedding import FullyConnectedEmbed, IdentityEmbed
 from dscript.models.interaction import ModelInteraction
@@ -343,7 +342,7 @@ def main(args):
    else:
        output = open(output, "w")

-    print(f'# Called as: {" ".join(sys.argv)}', file=output)
+    print(f'Called as: {" ".join(sys.argv)}', file=output)
    if output is not sys.stdout:
        print(f'Called as: {" ".join(sys.argv)}')

@@ -351,6 +350,7 @@ def main(args):
    device = args.device
    use_cuda = (device >= 0) and torch.cuda.is_available()
    if use_cuda:
+        os.environ["CUDA_VISIBLE_DEVICES"] = device
        torch.cuda.set_device(device)
        print(
            f"# Using CUDA device {device} - {torch.cuda.get_device_name(device)}",
--- a/server/frontend/package-lock.json
+++ b/server/frontend/package-lock.json
--- a/server/frontend/src/assets/styles_home.css
+++ b/server/frontend/src/assets/styles_home.css
@@ -38,6 +38,11 @@ a {
    font-family: "Source Sans Pro", sans-serif;
 }

+.half_center {
+    max-width: 75%;
+    margin: 0 auto;
+}
+
 .subtitle {
    text-align: center;
    font-size: 22px;
--- a/server/frontend/src/components/modules/LookupInput.js
+++ b/server/frontend/src/components/modules/LookupInput.js
@@ -7,6 +7,7 @@ axios.defaults.xsrfCookieName = 'csrftoken'
 axios.defaults.xsrfHeaderName = 'X-CSRFToken'

 export default function LookupInput() {
+    const BASE_URL = process.env.REACT_APP_BASE_URL;
    const [input, setInput] = useState('');
    const [jobStatus, setJobStatus] = useState(null);
    const [lookupValid, setLookupValid] = useState(false)
@@ -22,7 +23,7 @@ export default function LookupInput() {
    const handleLookup = () => {
        setJobStatus(null)
        axios
-            .get(`http://localhost:8000/api/position/${input}/`)
+            .get(`${BASE_URL}/api/position/${input}/`)
            .then((res) => {
                if (res.status === 200) {
                    setLookupValid(true)
--- a/server/frontend/src/components/modules/LookupModal.js
+++ b/server/frontend/src/components/modules/LookupModal.js
@@ -22,7 +22,6 @@ export default function LookupModal(props) {
    const [counter, setCounter] = useState(0);
    const [status, setJobStatus] = useState(props.status);
    const [processed, setProcessed] = useState(false);
-    const [_, setLookupValid] = useState(true);
    const [viewPath, setViewPath] = useState(null);
    const [filePath, setFilePath] = useState('');

@@ -43,26 +42,26 @@ export default function LookupModal(props) {
    }, [props.status])

    useEffect(() => {
+      const BASE_URL = process.env.REACT_APP_BASE_URL;
      if (counter > 0) {
        setTimeout(() => {
          setCounter(counter - 1)
        }, 1000);
      } else {
        axios
-          .get(`http://localhost:8000/api/position/${props.id}/`)
+          .get(`${BASE_URL}/api/position/${props.id}/`)
          .then((res) => {
            if (res.status === 200) {
-                setLookupValid(true)
                setJobStatus(res.data.status)
-                if (res.data.status == 'PENDING') {
+                if (res.data.status === 'PENDING') {
                  setProcessed(false)
                  setCounter(10)
-                } else if (res.data.status == 'STARTED') {
+                } else if (res.data.status === 'STARTED') {
                  setProcessed(false)
                  setCounter(10)
-                } else if (res.data.status == 'SUCCESS') {
+                } else if (res.data.status === 'SUCCESS') {
                  setProcessed(true)
-                  setViewPath(`http://localhost:8000/view/${props.id}`)
+                  setViewPath(`${BASE_URL}/analysis/${props.id}`)
                  axios
                    .get(`http://localhost:8000/api/download_loc/${props.id}/`)
                    .then((res) => {
@@ -70,11 +69,10 @@ export default function LookupModal(props) {
                      setFilePath(res)
                    })
                    .catch((err) => console.log(err))
-                } else if (res.data.status == 'FAILURE') {
+                } else if (res.data.status === 'FAILURE') {
                  setProcessed(true)
                }
            } else {
-                setLookupValid(false)
                setJobStatus(null)
            }
          })
--- a/server/frontend/src/components/modules/PredictInput.js
+++ b/server/frontend/src/components/modules/PredictInput.js
@@ -71,6 +71,7 @@ export default function PredictInput() {
        console.log(item)
        const csrftoken = Cookies.get('csrftoken');
        const uploadData = new FormData()
+        const BASE_URL = process.env.REACT_APP_BASE_URL;
        uploadData.append('title', item.title)
        uploadData.append('email', item.email)
        uploadData.append('pairsIndex', item.pairsIndex)
@@ -111,7 +112,7 @@ export default function PredictInput() {

        axios
            .post(
-                "http://localhost:8000/api/predict/",
+                `${BASE_URL}/api/predict/`,
                uploadData,
                {
                    headers: {'X-CSRFToken': csrftoken}
@@ -121,7 +122,7 @@ export default function PredictInput() {
                console.log(res)
                setJobId(res.data.id)
                axios
-                    .get(`http://localhost:8000/api/position/${res.data.id}/`)
+                    .get(`${BASE_URL}/api/position/${res.data.id}/`)
                    .then((res) => {
                        console.log(res)
                        setJobId(res.data.id)
@@ -139,10 +140,24 @@ export default function PredictInput() {
                            setJobStatus('FAILURE')
                        }
                    })
-                    .catch((err) => console.log(err))
+                    .catch((error) => {
+                        console.log(error)
+                        if (error.response) {
+                            alert(`Server side error (${error.response.status}): ${error.response.data.error}`)
+                        }
+                        setJobStatus(null)
+                    })

            })
-            .catch((err) => console.log(err))
+            .catch((error) => {
+                console.log(error)
+                if (error.response) {
+                    console.log(error.response.status)
+                    console.log(error.response.data)
+                    alert(`Server side error (${error.response.status}): ${error.response.data.error}`)
+                }
+                setJobStatus(null)
+            })

    }

@@ -152,6 +167,19 @@ export default function PredictInput() {
            <h2>PREDICT PROTEIN INTERACTIONS</h2>
            <form autoComplete="off">
                <h3>1. Provide Protein Sequences</h3>
+                <p>
+                    Proteins sequences should be provided in .fasta format, where each sequence is defined
+                    with a {">"} character followed immediately by the protein name. The name terminates after
+                    the first space, and the rest is interpreted as metadata. The sequence for the given protein
+                    is defined on the following line. Example:
+                </p>
+                <br/>
+                <p>{'>'}362663.ecp:ECP_0003 species:e.coli length:20</p>
+                <p>MVKVYAPASSANMSVGFDVL</p>
+                <p>{'>'}362663.ECP_0005 species:e.coli length:28</p>
+                <p>MQPGFFYEENMEKNDREKGEILNKCGNL</p>
+                <br/>
+
                <SequenceInput
                    index={item.seqsIndex}
                    handleIndexChange={handleSeqsIndexChange}
@@ -160,6 +188,14 @@ export default function PredictInput() {
                    handleInputChange={handleSeqsInputChange}
                ></SequenceInput>
                <h3>2. Specify Protein Pairs</h3>
+                <p>
+                    Candidate pairs are defined using the protein names, with a single pair of proteins
+                    comma-separated per line. Example:
+                </p>
+                <br/>
+                <p>362663.ecp:ECP_0003,362663.ECP_0005</p>
+                <br/>
+
                <PairInput
                    index={item.pairsIndex}
                    handleIndexChange={handlePairsIndexChange}
@@ -190,6 +226,14 @@ export default function PredictInput() {
                </TextField>
                <Button variant='contained' onClick={handleSubmit}>Compute Interaction Probability</Button>
                {/* <Button variant='contained' onClick={testSubmit}>Submit</Button> */}
+                <h3>5. Interpreting Results</h3>
+                <p>
+                    D-SCRIPT returns a predicted confidence between 0 and 1 that each pair of proteins
+                    interacts. Confidence values greater than or equal to <b>0.5</b> can be interpreted as having
+                    a high likelihood of interaction. Confidence values greater than or equal to <b>0.05</b> can be
+                    interpreted as potentially interacting. Confidence values lower than <b>0.05</b> are not predicted
+                    as interacting by D-SCRIPT.
+                </p>
            </form>
            { (modalOpen && jobStatus != null && jobId != null)  && <SubmissionModal open={modalOpen} id={jobId} status={jobStatus} email={item.email}></SubmissionModal>}
        </div>
--- a/server/frontend/src/components/modules/SubmissionModal.js
+++ b/server/frontend/src/components/modules/SubmissionModal.js
@@ -20,9 +20,9 @@ const useStyles = makeStyles((theme) => ({
 export default function SubmissionModal(props) {
    const classes = useStyles();

+    const [backoff_i, setBackoffI] = useState(0);
    const [status, setStatus] = useState(props.status);
    const [processed, setProcessed] = useState(false);
-    const [backoff_i, setBackoffI] = useState(0)
    const [counter, setCounter] = useState(Math.min(128, 2 ** backoff_i));

    const protectEmail = (email) => {
@@ -36,13 +36,14 @@ export default function SubmissionModal(props) {
    }

    useEffect(() => {
+      const BASE_URL = process.env.REACT_APP_BASE_URL;
      if (counter > 0) {
        setTimeout(() => {
          setCounter(counter - 1)
        }, 1000);
      } else {
        axios
-          .get(`http://localhost:8000/api/position/${props.id}/`)
+          .get(`${BASE_URL}/api/position/${props.id}/`)
          .then((res) => {
              setBackoffI(backoff_i + 1)
              if (res.data.status === 'PENDING') {
--- a/server/frontend/src/components/pages/Home.js
+++ b/server/frontend/src/components/pages/Home.js
@@ -6,26 +6,46 @@ import architecture from '../../assets/dscript_architecture1.png'


 export default function Home() {
+    const citation = `@article{
+        Sledzieski_Sequencebased_prediction_of_2021,
+        author = {Sledzieski, Samuel and Singh, Rohit and Cowen, Lenore and Berger, Bonnie},
+        doi = {10.1101/2021.01.22.427866},
+        journal = {bioRxiv},
+        month = {1},
+        title = {{Sequence-based prediction of protein-protein interactions: a structure-aware interpretable deep learning model}},
+        year = {2021}
+    }`
    return (
        <div className="Home-Container">
            <NavBar></NavBar>
-            <img src={architecture} alt="D-SCIPRT Architecture"/><br/>

-            <div class="about sans_font">
-            <p class="home_body">
-                D-SCRIPT is a deep learning method for predicting a physical interaction between two proteins given just their sequences.   It generalizes well to new species and is robust to limitations in training data size.  Its design reflects the intuition that for two proteins to physically interact, a subset of amino acids from each protein should be in con-tact with the other.  The intermediate stages of D-SCRIPT directly implement this intuition, with the penultimate stage in D-SCRIPT being a rough estimate of the inter-protein contact map of the protein dimer.  This structurally-motivated design enhances the interpretability of the results and, since structure is more conserved evolutionarily than sequence, improves generalizability across species.
-            <br />
-            <br />
-            D-SCRIPT is described in the paper <a href="https://www.biorxiv.org/content/10.1101/2021.01.22.427866v1">&ldquo;Sequence-based prediction of protein-protein interactions: a structure-aware interpretable deep learning model&rdquo;</a> by <a href="http://people.csail.mit.edu/samsl">Sam Sledzieski</a>, <a href="http://people.csail.mit.edu/rsingh/">Rohit Singh</a>, <a href="http://www.cs.tufts.edu/~cowen/"> Lenore Cowen</a> and <a href="http://people.csail.mit.edu/bab/">Bonnie Berger</a>.
-            </p>
+            <div class="half_center">
+                <img class="half_center" src={architecture} alt="D-SCIPRT Architecture"/><br/>
+
+                <div class="nav sans_font">
+                    <a href="./predict"><b>Make Predictions Online (InDev!)</b></a>
+                </div>
+
+                <br/>
+                <br/>
+
+                <div class="about sans_font">
+                <p class="home_body">
+                    D-SCRIPT is an interpretable deep learning method for predicting a physical interaction between two proteins given just their sequences.   It generalizes well to new species and is robust to limitations in training data size.  Its design reflects the intuition that for two proteins to physically interact, a subset of amino acids from each protein should be in con-tact with the other.  The intermediate stages of D-SCRIPT directly implement this intuition, with the penultimate stage in D-SCRIPT being a rough estimate of the inter-protein contact map of the protein dimer.  This structurally-motivated design enhances the interpretability of the results and, since structure is more conserved evolutionarily than sequence, improves generalizability across species.
+                <br/>
+                <br/>
+                D-SCRIPT is described in the paper <a href="https://www.biorxiv.org/content/10.1101/2021.01.22.427866v1">&ldquo;Sequence-based prediction of protein-protein interactions: a structure-aware interpretable deep learning model&rdquo;</a> by <a href="http://people.csail.mit.edu/samsl">Sam Sledzieski</a>, <a href="http://people.csail.mit.edu/rsingh/">Rohit Singh</a>, <a href="http://www.cs.tufts.edu/~cowen/"> Lenore Cowen</a> and <a href="http://people.csail.mit.edu/bab/">Bonnie Berger</a>.
+                </p>
+                <p class="title_font">
+                    {citation}
+                </p>
+                </div>
+                <br/>
+
+                <div class="about home_body">
+                <p class="sans_font">Installation:</p> <p class="title_font">pip install dscript</p>
+                </div>
            </div>
-
-            <br />
-
-            <div class="about home_body">
-            <div class="sans_font">Installation:</div> <div class="title_font">pip install dscript</div>
-            </div>
-
        </div>
    )
 }
--- a/server/frontend/src/components/pages/_Home.scss
+++ b/server/frontend/src/components/pages/_Home.scss
@@ -4,6 +4,13 @@
    flex-flow: column wrap;

    div {
+        min-width: 480px;
+        > * {
+            margin: 8px;
+        }
+    }
+
+    form {
        width: 50%;
        min-width: 480px;

@@ -11,4 +18,10 @@
            margin: 8px;
        }
    }
+
+
+    img {
+        max-width: 100%;
+        height: auto;
+    }
 }
--- a/server/predict/api/dscript.py
+++ b/server/predict/api/dscript.py
@@ -16,7 +16,7 @@ import torch
 from django.conf import settings
 from dotenv import load_dotenv

-from dscript.fasta import parse_input
+from dscript.fasta import parse
 from dscript.language_model import lm_embed
 from dscript.pretrained import get_pretrained

@@ -24,6 +24,13 @@ from ..models import Job

 load_dotenv()

+if settings.DSCRIPT_DEPLOY_ENV:
+    outgoing_mail_server = "outgoing.csail.mit.edu"
+    outgoing_mail_port = 25
+else:
+    outgoing_mail_server = "smtp.gmail.com"
+    outgoing_mail_port = 465
+

 def predict_pairs(
    uuid,
@@ -80,8 +87,8 @@ def predict_pairs(
    # Load Sequences
    logging.info("# Loading Sequences...")
    with open(seq_file, "r") as f:
-        names, sequences = parse_input(f.read())
-    seqDict = {n: s for n, s in zip(names, sequences)}
+        names, sequences = parse(f)
+    seqDict = {n.split()[0]: s for n, s in zip(names, sequences)}
    logging.info(seqDict)

    # Load Pairs
@@ -90,10 +97,10 @@ def predict_pairs(
    all_prots = set(pairs_array.iloc[:, 0]).union(pairs_array.iloc[:, 1])

    # Generate Embeddings
-    logging.info("# Generating Embeddings...")
-    embeddings = {}
-    for n in all_prots:
-        embeddings[n] = lm_embed(seqDict[n], use_cuda)
+    # logging.info("# Generating Embeddings...")
+    # embeddings = {}
+    # for n in all_prots:
+    #     embeddings[n] = lm_embed(seqDict[n], use_cuda)

    # Make Predictions
    logging.info("# Making Predictions...")
@@ -108,8 +115,10 @@ def predict_pairs(
                    job.save()
                    f.flush()
                n_complete += 1
-                p0 = embeddings[n0]
-                p1 = embeddings[n1]
+                # p0 = embeddings[n0]
+                # p1 = embeddings[n1]
+                p0 = lm_embed(seqDict[n0], use_cuda)
+                p1 = lm_embed(seqDict[n1], use_cuda)
                if use_cuda:
                    p0 = p0.cuda()
                    p1 = p1.cuda()
@@ -170,6 +179,23 @@ def create_message(
    return text


+def send_message(sender_email, receiver_email, text):
+    # Log in to server using secure context and send email
+    if settings.DSCRIPT_DEPLOY_ENV:
+        context = ssl.SSLContext(ssl.PROTOCOL_TLS)
+        with smtplib.SMTP(outgoing_mail_server, outgoing_mail_port) as server:
+            server.starttls(context=context)
+            server.sendmail(sender_email, receiver_email, text)
+    else:
+        context = ssl.create_default_context()
+        password = os.getenv("EMAIL_PWD")
+        with smtplib.SMTP_SSL(
+            "smtp.gmail.com", 465, context=context
+        ) as server:
+            server.login(sender_email, password)
+            server.sendmail(sender_email, receiver_email, text)
+
+
 def email_results(
    uuid,
    sender_email=settings.DSCRIPT_SENDER_EMAIL,
@@ -191,17 +217,12 @@ def email_results(
    else:
        subject = f"D-SCRIPT Results for {title} ({uuid})"
    body = f"These are the results of your D-SCRIPT prediction on job {uuid}"
-    password = os.getenv("EMAIL_PWD")

    text = create_message(
        sender_email, receiver_email, subject, body, uuid, filename
    )

-    # Log in to server using secure context and send email
-    context = ssl.create_default_context()
-    with smtplib.SMTP_SSL("smtp.gmail.com", 465, context=context) as server:
-        server.login(sender_email, password)
-        server.sendmail(sender_email, receiver_email, text)
+    send_message(sender_email, receiver_email, text)


 def email_confirmation(
@@ -224,12 +245,7 @@ def email_confirmation(
    else:
        subject = f"D-SCRIPT Job {title} ({uuid}) Submission"
    body = f"You have successfully submitted a job with id {uuid} for D-SCRIPT prediction. Keep track of this id to monitor your job status."
-    password = os.getenv("EMAIL_PWD")

    text = create_message(sender_email, receiver_email, subject, body, uuid)

-    # Log in to server using secure context and send email
-    context = ssl.create_default_context()
-    with smtplib.SMTP_SSL("smtp.gmail.com", 465, context=context) as server:
-        server.login(sender_email, password)
-        server.sendmail(sender_email, receiver_email, text)
+    send_message(sender_email, receiver_email, text)
--- a/server/predict/apps.py
+++ b/server/predict/apps.py
@@ -15,4 +15,4 @@ class PredictConfig(AppConfig):
        from .tasks import sweep_incomplete_jobs

        rslt = sweep_incomplete_jobs.delay()
-        rslt.get()
+        rslt.forget()
--- a/server/predict/views.py
+++ b/server/predict/views.py
@@ -18,7 +18,7 @@ from rest_framework import status
 from rest_framework.decorators import api_view
 from rest_framework.response import Response

-from dscript.fasta import parse_input
+from dscript.fasta import parse

 from .models import Job
 from .tasks import process_job
@@ -67,7 +67,7 @@ def upload_stream_to_local(in_file, out_file):

 def get_all_pairs(seq_file):
    with open(seq_file, "r") as f:
-        nam, _ = parse_input(f.read())
+        nam, _ = parse(f)
        pairs = "\n".join("\t".join(p) for p in itertools.combinations(nam, 2))
        return pairs

@@ -87,7 +87,7 @@ class PredictionServerException(Exception):
 def validate_inputs(seq_path, pair_path):
    try:
        with open(seq_path, "r") as f:
-            nam, _ = parse_input(f.read())
+            nam, _ = parse(f)
        assert len(nam), "You must provide at least one sequence."
        assert (
            len(nam) < settings.DSCRIPT_MAX_SEQS
@@ -104,13 +104,15 @@ def validate_inputs(seq_path, pair_path):
        assert (
            df.shape[0] < settings.DSCRIPT_MAX_PAIRS
        ), f"Number of pairs {df.shape[0]} is larger than the maximum allowed ({settings.DSCRIPT_MAX_PAIRS})."
-    except AssertionError as err:
+    except (AssertionError, pd.errors.ParserError) as err:
        raise PredictionServerException(
            status.HTTP_406_NOT_ACCEPTABLE, f"Pairs parse error: {str(err)}"
        )

    names_in_pairs = set(df.iloc[:, 0]).union(df.iloc[:, 1])
-    names_in_seqs = set(nam)
+    names_in_seqs = set([i.split()[0] for i in nam])
+    logging.debug(names_in_pairs)
+    logging.debug(names_in_seqs)
    if len(names_in_pairs.difference(names_in_seqs)):
        raise PredictionServerException(
            status.HTTP_406_NOT_ACCEPTABLE,
@@ -162,9 +164,6 @@ def predict(request):
            # Validate inputs are properly formatted and allowed
            n_seqs, n_pairs = validate_inputs(seq_path, pair_path)

-            logging.debug(n_seqs, seq_path)
-            logging.debug(n_pairs, pair_path)
-
        except PredictionServerException as err:
            logging.debug(err)
            data = {"id": job_id, "submitted": False, "error": err.message}
--- a/server/server/settings.py
+++ b/server/server/settings.py
@@ -40,7 +40,7 @@ logging.basicConfig(
    ],
 )

-ALLOWED_HOSTS = []
+ALLOWED_HOSTS = ["dscript-predict.csail.mit.edu", "localhost"]


 # Application definition
@@ -162,7 +162,14 @@ DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
 # D-SCRIPT Specific Default Variables
 DSCRIPT_MODEL_VERSION = "human_v1"
 DSCRIPT_DEVICE = -1
-DSCRIPT_SENDER_EMAIL = "dscript.results@gmail.com"
+DSCRIPT_DEPLOY_ENV = False
+if DSCRIPT_DEPLOY_ENV:
+    DSCRIPT_SENDER_EMAIL = "no-reply@dscript-predict.csail.mit.edu"
+else:
+    DSCRIPT_SENDER_EMAIL = "dscript.results@gmail.com"
 DSCRIPT_MAX_SEQS = 500
 DSCRIPT_MAX_PAIRS = 100000
 DSCRIPT_CONFIRM_SUBMISSION_EMAIL = True
+SECRET_KEY = (
+    "django-insecure-x7ncjt3_(-q1qcph92&8zx7f9_g(yc0t#cd!vgx3uw40813n62"
+)
--- a/server/server/urls.py
+++ b/server/server/urls.py
@@ -19,7 +19,7 @@ from predict import views

 urlpatterns = [
    path("admin/", admin.site.urls),
-    re_path("(^(?!(api|admin|view)).*$)", views.FrontendAppView.as_view()),
+    re_path("(^(?!(api|admin)).*$)", views.FrontendAppView.as_view()),
    path("api/predict/", views.predict),
    path("api/position/<uuid:uuid>/", views.get_position),
    path("api/download/<uuid:uuid>/", views.get_download),