Merged LookupModal

This commit is contained in:
Jesse Yang
2021-11-07 23:22:30 -05:00
14 changed files with 187 additions and 20483 deletions

View File

@@ -5,6 +5,7 @@ Train a new model.
import argparse
import datetime
import gzip as gz
import os
import subprocess as sp
import sys
@@ -17,10 +18,8 @@ import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics import average_precision_score as average_precision
from torch.autograd import Variable
from torch.utils.data import DataLoader, IterableDataset
from tqdm import tqdm
import dscript
from dscript.models.contact import ContactCNN
from dscript.models.embedding import FullyConnectedEmbed, IdentityEmbed
from dscript.models.interaction import ModelInteraction
@@ -343,7 +342,7 @@ def main(args):
else:
output = open(output, "w")
print(f'# Called as: {" ".join(sys.argv)}', file=output)
print(f'Called as: {" ".join(sys.argv)}', file=output)
if output is not sys.stdout:
print(f'Called as: {" ".join(sys.argv)}')
@@ -351,6 +350,7 @@ def main(args):
device = args.device
use_cuda = (device >= 0) and torch.cuda.is_available()
if use_cuda:
os.environ["CUDA_VISIBLE_DEVICES"] = device
torch.cuda.set_device(device)
print(
f"# Using CUDA device {device} - {torch.cuda.get_device_name(device)}",

File diff suppressed because it is too large Load Diff

View File

@@ -38,6 +38,11 @@ a {
font-family: "Source Sans Pro", sans-serif;
}
.half_center {
max-width: 75%;
margin: 0 auto;
}
.subtitle {
text-align: center;
font-size: 22px;

View File

@@ -7,6 +7,7 @@ axios.defaults.xsrfCookieName = 'csrftoken'
axios.defaults.xsrfHeaderName = 'X-CSRFToken'
export default function LookupInput() {
const BASE_URL = process.env.REACT_APP_BASE_URL;
const [input, setInput] = useState('');
const [jobStatus, setJobStatus] = useState(null);
const [lookupValid, setLookupValid] = useState(false)
@@ -22,7 +23,7 @@ export default function LookupInput() {
const handleLookup = () => {
setJobStatus(null)
axios
.get(`http://localhost:8000/api/position/${input}/`)
.get(`${BASE_URL}/api/position/${input}/`)
.then((res) => {
if (res.status === 200) {
setLookupValid(true)

View File

@@ -22,7 +22,6 @@ export default function LookupModal(props) {
const [counter, setCounter] = useState(0);
const [status, setJobStatus] = useState(props.status);
const [processed, setProcessed] = useState(false);
const [_, setLookupValid] = useState(true);
const [viewPath, setViewPath] = useState(null);
const [filePath, setFilePath] = useState('');
@@ -43,26 +42,26 @@ export default function LookupModal(props) {
}, [props.status])
useEffect(() => {
const BASE_URL = process.env.REACT_APP_BASE_URL;
if (counter > 0) {
setTimeout(() => {
setCounter(counter - 1)
}, 1000);
} else {
axios
.get(`http://localhost:8000/api/position/${props.id}/`)
.get(`${BASE_URL}/api/position/${props.id}/`)
.then((res) => {
if (res.status === 200) {
setLookupValid(true)
setJobStatus(res.data.status)
if (res.data.status == 'PENDING') {
if (res.data.status === 'PENDING') {
setProcessed(false)
setCounter(10)
} else if (res.data.status == 'STARTED') {
} else if (res.data.status === 'STARTED') {
setProcessed(false)
setCounter(10)
} else if (res.data.status == 'SUCCESS') {
} else if (res.data.status === 'SUCCESS') {
setProcessed(true)
setViewPath(`http://localhost:8000/view/${props.id}`)
setViewPath(`${BASE_URL}/analysis/${props.id}`)
axios
.get(`http://localhost:8000/api/download_loc/${props.id}/`)
.then((res) => {
@@ -70,11 +69,10 @@ export default function LookupModal(props) {
setFilePath(res)
})
.catch((err) => console.log(err))
} else if (res.data.status == 'FAILURE') {
} else if (res.data.status === 'FAILURE') {
setProcessed(true)
}
} else {
setLookupValid(false)
setJobStatus(null)
}
})

View File

@@ -71,6 +71,7 @@ export default function PredictInput() {
console.log(item)
const csrftoken = Cookies.get('csrftoken');
const uploadData = new FormData()
const BASE_URL = process.env.REACT_APP_BASE_URL;
uploadData.append('title', item.title)
uploadData.append('email', item.email)
uploadData.append('pairsIndex', item.pairsIndex)
@@ -111,7 +112,7 @@ export default function PredictInput() {
axios
.post(
"http://localhost:8000/api/predict/",
`${BASE_URL}/api/predict/`,
uploadData,
{
headers: {'X-CSRFToken': csrftoken}
@@ -121,7 +122,7 @@ export default function PredictInput() {
console.log(res)
setJobId(res.data.id)
axios
.get(`http://localhost:8000/api/position/${res.data.id}/`)
.get(`${BASE_URL}/api/position/${res.data.id}/`)
.then((res) => {
console.log(res)
setJobId(res.data.id)
@@ -139,10 +140,24 @@ export default function PredictInput() {
setJobStatus('FAILURE')
}
})
.catch((err) => console.log(err))
.catch((error) => {
console.log(error)
if (error.response) {
alert(`Server side error (${error.response.status}): ${error.response.data.error}`)
}
setJobStatus(null)
})
})
.catch((err) => console.log(err))
.catch((error) => {
console.log(error)
if (error.response) {
console.log(error.response.status)
console.log(error.response.data)
alert(`Server side error (${error.response.status}): ${error.response.data.error}`)
}
setJobStatus(null)
})
}
@@ -152,6 +167,19 @@ export default function PredictInput() {
<h2>PREDICT PROTEIN INTERACTIONS</h2>
<form autoComplete="off">
<h3>1. Provide Protein Sequences</h3>
<p>
Proteins sequences should be provided in .fasta format, where each sequence is defined
with a {">"} character followed immediately by the protein name. The name terminates after
the first space, and the rest is interpreted as metadata. The sequence for the given protein
is defined on the following line. Example:
</p>
<br/>
<p>{'>'}362663.ecp:ECP_0003 species:e.coli length:20</p>
<p>MVKVYAPASSANMSVGFDVL</p>
<p>{'>'}362663.ECP_0005 species:e.coli length:28</p>
<p>MQPGFFYEENMEKNDREKGEILNKCGNL</p>
<br/>
<SequenceInput
index={item.seqsIndex}
handleIndexChange={handleSeqsIndexChange}
@@ -160,6 +188,14 @@ export default function PredictInput() {
handleInputChange={handleSeqsInputChange}
></SequenceInput>
<h3>2. Specify Protein Pairs</h3>
<p>
Candidate pairs are defined using the protein names, with a single pair of proteins
comma-separated per line. Example:
</p>
<br/>
<p>362663.ecp:ECP_0003,362663.ECP_0005</p>
<br/>
<PairInput
index={item.pairsIndex}
handleIndexChange={handlePairsIndexChange}
@@ -190,6 +226,14 @@ export default function PredictInput() {
</TextField>
<Button variant='contained' onClick={handleSubmit}>Compute Interaction Probability</Button>
{/* <Button variant='contained' onClick={testSubmit}>Submit</Button> */}
<h3>5. Interpreting Results</h3>
<p>
D-SCRIPT returns a predicted confidence between 0 and 1 that each pair of proteins
interacts. Confidence values greater than or equal to <b>0.5</b> can be interpreted as having
a high likelihood of interaction. Confidence values greater than or equal to <b>0.05</b> can be
interpreted as potentially interacting. Confidence values lower than <b>0.05</b> are not predicted
as interacting by D-SCRIPT.
</p>
</form>
{ (modalOpen && jobStatus != null && jobId != null) && <SubmissionModal open={modalOpen} id={jobId} status={jobStatus} email={item.email}></SubmissionModal>}
</div>

View File

@@ -20,9 +20,9 @@ const useStyles = makeStyles((theme) => ({
export default function SubmissionModal(props) {
const classes = useStyles();
const [backoff_i, setBackoffI] = useState(0);
const [status, setStatus] = useState(props.status);
const [processed, setProcessed] = useState(false);
const [backoff_i, setBackoffI] = useState(0)
const [counter, setCounter] = useState(Math.min(128, 2 ** backoff_i));
const protectEmail = (email) => {
@@ -36,13 +36,14 @@ export default function SubmissionModal(props) {
}
useEffect(() => {
const BASE_URL = process.env.REACT_APP_BASE_URL;
if (counter > 0) {
setTimeout(() => {
setCounter(counter - 1)
}, 1000);
} else {
axios
.get(`http://localhost:8000/api/position/${props.id}/`)
.get(`${BASE_URL}/api/position/${props.id}/`)
.then((res) => {
setBackoffI(backoff_i + 1)
if (res.data.status === 'PENDING') {

View File

@@ -6,26 +6,46 @@ import architecture from '../../assets/dscript_architecture1.png'
export default function Home() {
const citation = `@article{
Sledzieski_Sequencebased_prediction_of_2021,
author = {Sledzieski, Samuel and Singh, Rohit and Cowen, Lenore and Berger, Bonnie},
doi = {10.1101/2021.01.22.427866},
journal = {bioRxiv},
month = {1},
title = {{Sequence-based prediction of protein-protein interactions: a structure-aware interpretable deep learning model}},
year = {2021}
}`
return (
<div className="Home-Container">
<NavBar></NavBar>
<img src={architecture} alt="D-SCIPRT Architecture"/><br/>
<div class="about sans_font">
<p class="home_body">
D-SCRIPT is a deep learning method for predicting a physical interaction between two proteins given just their sequences. It generalizes well to new species and is robust to limitations in training data size. Its design reflects the intuition that for two proteins to physically interact, a subset of amino acids from each protein should be in con-tact with the other. The intermediate stages of D-SCRIPT directly implement this intuition, with the penultimate stage in D-SCRIPT being a rough estimate of the inter-protein contact map of the protein dimer. This structurally-motivated design enhances the interpretability of the results and, since structure is more conserved evolutionarily than sequence, improves generalizability across species.
<br />
<br />
D-SCRIPT is described in the paper <a href="https://www.biorxiv.org/content/10.1101/2021.01.22.427866v1">&ldquo;Sequence-based prediction of protein-protein interactions: a structure-aware interpretable deep learning model&rdquo;</a> by <a href="http://people.csail.mit.edu/samsl">Sam Sledzieski</a>, <a href="http://people.csail.mit.edu/rsingh/">Rohit Singh</a>, <a href="http://www.cs.tufts.edu/~cowen/"> Lenore Cowen</a> and <a href="http://people.csail.mit.edu/bab/">Bonnie Berger</a>.
</p>
<div class="half_center">
<img class="half_center" src={architecture} alt="D-SCIPRT Architecture"/><br/>
<div class="nav sans_font">
<a href="./predict"><b>Make Predictions Online (InDev!)</b></a>
</div>
<br/>
<br/>
<div class="about sans_font">
<p class="home_body">
D-SCRIPT is an interpretable deep learning method for predicting a physical interaction between two proteins given just their sequences. It generalizes well to new species and is robust to limitations in training data size. Its design reflects the intuition that for two proteins to physically interact, a subset of amino acids from each protein should be in con-tact with the other. The intermediate stages of D-SCRIPT directly implement this intuition, with the penultimate stage in D-SCRIPT being a rough estimate of the inter-protein contact map of the protein dimer. This structurally-motivated design enhances the interpretability of the results and, since structure is more conserved evolutionarily than sequence, improves generalizability across species.
<br/>
<br/>
D-SCRIPT is described in the paper <a href="https://www.biorxiv.org/content/10.1101/2021.01.22.427866v1">&ldquo;Sequence-based prediction of protein-protein interactions: a structure-aware interpretable deep learning model&rdquo;</a> by <a href="http://people.csail.mit.edu/samsl">Sam Sledzieski</a>, <a href="http://people.csail.mit.edu/rsingh/">Rohit Singh</a>, <a href="http://www.cs.tufts.edu/~cowen/"> Lenore Cowen</a> and <a href="http://people.csail.mit.edu/bab/">Bonnie Berger</a>.
</p>
<p class="title_font">
{citation}
</p>
</div>
<br/>
<div class="about home_body">
<p class="sans_font">Installation:</p> <p class="title_font">pip install dscript</p>
</div>
</div>
<br />
<div class="about home_body">
<div class="sans_font">Installation:</div> <div class="title_font">pip install dscript</div>
</div>
</div>
)
}

View File

@@ -4,6 +4,13 @@
flex-flow: column wrap;
div {
min-width: 480px;
> * {
margin: 8px;
}
}
form {
width: 50%;
min-width: 480px;
@@ -11,4 +18,10 @@
margin: 8px;
}
}
img {
max-width: 100%;
height: auto;
}
}

View File

@@ -16,7 +16,7 @@ import torch
from django.conf import settings
from dotenv import load_dotenv
from dscript.fasta import parse_input
from dscript.fasta import parse
from dscript.language_model import lm_embed
from dscript.pretrained import get_pretrained
@@ -24,6 +24,13 @@ from ..models import Job
load_dotenv()
if settings.DSCRIPT_DEPLOY_ENV:
outgoing_mail_server = "outgoing.csail.mit.edu"
outgoing_mail_port = 25
else:
outgoing_mail_server = "smtp.gmail.com"
outgoing_mail_port = 465
def predict_pairs(
uuid,
@@ -80,8 +87,8 @@ def predict_pairs(
# Load Sequences
logging.info("# Loading Sequences...")
with open(seq_file, "r") as f:
names, sequences = parse_input(f.read())
seqDict = {n: s for n, s in zip(names, sequences)}
names, sequences = parse(f)
seqDict = {n.split()[0]: s for n, s in zip(names, sequences)}
logging.info(seqDict)
# Load Pairs
@@ -90,10 +97,10 @@ def predict_pairs(
all_prots = set(pairs_array.iloc[:, 0]).union(pairs_array.iloc[:, 1])
# Generate Embeddings
logging.info("# Generating Embeddings...")
embeddings = {}
for n in all_prots:
embeddings[n] = lm_embed(seqDict[n], use_cuda)
# logging.info("# Generating Embeddings...")
# embeddings = {}
# for n in all_prots:
# embeddings[n] = lm_embed(seqDict[n], use_cuda)
# Make Predictions
logging.info("# Making Predictions...")
@@ -108,8 +115,10 @@ def predict_pairs(
job.save()
f.flush()
n_complete += 1
p0 = embeddings[n0]
p1 = embeddings[n1]
# p0 = embeddings[n0]
# p1 = embeddings[n1]
p0 = lm_embed(seqDict[n0], use_cuda)
p1 = lm_embed(seqDict[n1], use_cuda)
if use_cuda:
p0 = p0.cuda()
p1 = p1.cuda()
@@ -170,6 +179,23 @@ def create_message(
return text
def send_message(sender_email, receiver_email, text):
# Log in to server using secure context and send email
if settings.DSCRIPT_DEPLOY_ENV:
context = ssl.SSLContext(ssl.PROTOCOL_TLS)
with smtplib.SMTP(outgoing_mail_server, outgoing_mail_port) as server:
server.starttls(context=context)
server.sendmail(sender_email, receiver_email, text)
else:
context = ssl.create_default_context()
password = os.getenv("EMAIL_PWD")
with smtplib.SMTP_SSL(
"smtp.gmail.com", 465, context=context
) as server:
server.login(sender_email, password)
server.sendmail(sender_email, receiver_email, text)
def email_results(
uuid,
sender_email=settings.DSCRIPT_SENDER_EMAIL,
@@ -191,17 +217,12 @@ def email_results(
else:
subject = f"D-SCRIPT Results for {title} ({uuid})"
body = f"These are the results of your D-SCRIPT prediction on job {uuid}"
password = os.getenv("EMAIL_PWD")
text = create_message(
sender_email, receiver_email, subject, body, uuid, filename
)
# Log in to server using secure context and send email
context = ssl.create_default_context()
with smtplib.SMTP_SSL("smtp.gmail.com", 465, context=context) as server:
server.login(sender_email, password)
server.sendmail(sender_email, receiver_email, text)
send_message(sender_email, receiver_email, text)
def email_confirmation(
@@ -224,12 +245,7 @@ def email_confirmation(
else:
subject = f"D-SCRIPT Job {title} ({uuid}) Submission"
body = f"You have successfully submitted a job with id {uuid} for D-SCRIPT prediction. Keep track of this id to monitor your job status."
password = os.getenv("EMAIL_PWD")
text = create_message(sender_email, receiver_email, subject, body, uuid)
# Log in to server using secure context and send email
context = ssl.create_default_context()
with smtplib.SMTP_SSL("smtp.gmail.com", 465, context=context) as server:
server.login(sender_email, password)
server.sendmail(sender_email, receiver_email, text)
send_message(sender_email, receiver_email, text)

View File

@@ -15,4 +15,4 @@ class PredictConfig(AppConfig):
from .tasks import sweep_incomplete_jobs
rslt = sweep_incomplete_jobs.delay()
rslt.get()
rslt.forget()

View File

@@ -18,7 +18,7 @@ from rest_framework import status
from rest_framework.decorators import api_view
from rest_framework.response import Response
from dscript.fasta import parse_input
from dscript.fasta import parse
from .models import Job
from .tasks import process_job
@@ -67,7 +67,7 @@ def upload_stream_to_local(in_file, out_file):
def get_all_pairs(seq_file):
with open(seq_file, "r") as f:
nam, _ = parse_input(f.read())
nam, _ = parse(f)
pairs = "\n".join("\t".join(p) for p in itertools.combinations(nam, 2))
return pairs
@@ -87,7 +87,7 @@ class PredictionServerException(Exception):
def validate_inputs(seq_path, pair_path):
try:
with open(seq_path, "r") as f:
nam, _ = parse_input(f.read())
nam, _ = parse(f)
assert len(nam), "You must provide at least one sequence."
assert (
len(nam) < settings.DSCRIPT_MAX_SEQS
@@ -104,13 +104,15 @@ def validate_inputs(seq_path, pair_path):
assert (
df.shape[0] < settings.DSCRIPT_MAX_PAIRS
), f"Number of pairs {df.shape[0]} is larger than the maximum allowed ({settings.DSCRIPT_MAX_PAIRS})."
except AssertionError as err:
except (AssertionError, pd.errors.ParserError) as err:
raise PredictionServerException(
status.HTTP_406_NOT_ACCEPTABLE, f"Pairs parse error: {str(err)}"
)
names_in_pairs = set(df.iloc[:, 0]).union(df.iloc[:, 1])
names_in_seqs = set(nam)
names_in_seqs = set([i.split()[0] for i in nam])
logging.debug(names_in_pairs)
logging.debug(names_in_seqs)
if len(names_in_pairs.difference(names_in_seqs)):
raise PredictionServerException(
status.HTTP_406_NOT_ACCEPTABLE,
@@ -162,9 +164,6 @@ def predict(request):
# Validate inputs are properly formatted and allowed
n_seqs, n_pairs = validate_inputs(seq_path, pair_path)
logging.debug(n_seqs, seq_path)
logging.debug(n_pairs, pair_path)
except PredictionServerException as err:
logging.debug(err)
data = {"id": job_id, "submitted": False, "error": err.message}

View File

@@ -40,7 +40,7 @@ logging.basicConfig(
],
)
ALLOWED_HOSTS = []
ALLOWED_HOSTS = ["dscript-predict.csail.mit.edu", "localhost"]
# Application definition
@@ -162,7 +162,14 @@ DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
# D-SCRIPT Specific Default Variables
DSCRIPT_MODEL_VERSION = "human_v1"
DSCRIPT_DEVICE = -1
DSCRIPT_SENDER_EMAIL = "dscript.results@gmail.com"
DSCRIPT_DEPLOY_ENV = False
if DSCRIPT_DEPLOY_ENV:
DSCRIPT_SENDER_EMAIL = "no-reply@dscript-predict.csail.mit.edu"
else:
DSCRIPT_SENDER_EMAIL = "dscript.results@gmail.com"
DSCRIPT_MAX_SEQS = 500
DSCRIPT_MAX_PAIRS = 100000
DSCRIPT_CONFIRM_SUBMISSION_EMAIL = True
SECRET_KEY = (
"django-insecure-x7ncjt3_(-q1qcph92&8zx7f9_g(yc0t#cd!vgx3uw40813n62"
)

View File

@@ -19,7 +19,7 @@ from predict import views
urlpatterns = [
path("admin/", admin.site.urls),
re_path("(^(?!(api|admin|view)).*$)", views.FrontendAppView.as_view()),
re_path("(^(?!(api|admin)).*$)", views.FrontendAppView.as_view()),
path("api/predict/", views.predict),
path("api/position/<uuid:uuid>/", views.get_position),
path("api/download/<uuid:uuid>/", views.get_download),