Resolve merge conflict

2026-06-04 15:04:24 +08:00 · 2023-05-26 08:47:11 -07:00
parent ca04e122a8 2b673b6dc4
commit e70ee3d7dc
5 changed files with 8674 additions and 46454 deletions
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -21,5 +21,5 @@ sphinx:
 #  configuration: mkdocs.yml

 # Optionally build your docs in additional formats such as PDF
-formats:
-  - pdf
+# formats:
+#   - pdf
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@
 - Update pretrained API and docs to include Topsy-Turvy
 - Add retry decorator to get_pretrained if download fails
 - Add ability to set a random seed for training
+- Update `evaluate` code to also store metrics in a file 

 ### v0.2.1: 2022-06-28 -- Bug fixes
 - Add biopython to setup.py
--- a/data/seqs/ecoli.fasta
+++ b/data/seqs/ecoli.fasta
--- a/dscript/commands/evaluate.py
+++ b/dscript/commands/evaluate.py
@@ -262,6 +262,14 @@ def main(args):

    phats = np.array(phats)
    labels = np.array(labels)
+
+    with open(outPath + "_metrics.txt", "w+") as f:
+        aupr = average_precision_score(labels, phats)
+        auroc = roc_auc_score(labels, phats)
+
+        log(f"AUPR: {aupr}", file=f)
+        log(f"AUROC: {auroc}", file=f)
+
    plot_eval_predictions(labels, phats, outPath)

    outFile.close()
--- a/dscript/commands/train.py
+++ b/dscript/commands/train.py
@@ -601,7 +601,6 @@ def train_model(args, output):
    no_augment = args.no_augment

    embedding_h5 = args.embedding
-    # h5fi = h5py.File(embedding_h5, "r")

    ########## Foldseek code #########################3
    allow_foldseek = args.allow_foldseek
@@ -664,11 +663,14 @@ def train_model(args, output):
    log("Loading embeddings...", file=output)
    output.flush()

-    # embeddings = {}
+    
    all_proteins = set(train_p1).union(train_p2).union(test_p1).union(test_p2)
-    # for prot_name in tqdm(all_proteins):
-    #     embeddings[prot_name] = torch.from_numpy(h5fi[prot_name][:, :])
-    embeddings = load_hdf5_parallel(embedding_h5, all_proteins)
+    
+    embeddings = {}
+    with h5py.File(embedding_h5, "r") as h5fi:
+        for prot_name in tqdm(all_proteins):
+            embeddings[prot_name] = torch.from_numpy(h5fi[prot_name][:, :])
+    # embeddings = load_hdf5_parallel(embedding_h5, all_proteins)

    # Topsy-Turvy
    run_tt = args.run_tt