Improve CPU mode

2026-06-04 15:04:24 +08:00 · 2025-07-21 18:58:51 -04:00
parent cd10a97607
commit f13926a718
3 changed files with 46 additions and 47 deletions
--- a/docs/source/usage.rst
+++ b/docs/source/usage.rst
@@ -49,31 +49,38 @@ Blocked, Multi-GPU Prediction

 .. code-block:: bash

-    usage: dscript predict [-h] [--proteins PROTEINS] [--pairs PAIRS] [--model MODEL] --embeddings EMBEDDINGS [--foldseek_fasta FOLDSEEK_FASTA] [-o OUTFILE] [-d DEVICE] [--store_cmaps] [--thresh THRESH] [--load_proc LOAD_PROC] [--blocks BLOCKS] [--sparse_loading]
+    usage: dscript predict [-h] [--proteins PROTEINS] [--pairs PAIRS] [--model MODEL] --embeddings EMBEDDINGS [--foldseek_fasta FOLDSEEK_FASTA] [-o OUTFILE] [-d DEVICE]
+                       [--store_cmaps] [--thresh THRESH] [--load_proc LOAD_PROC] [--blocks BLOCKS] [--sparse_loading]

    Make new predictions with a pre-trained model using blocked, multi-GPU pariwise inference. One of --proteins and --pairs is required.
-    
+
    options:
      -h, --help            show this help message and exit
      --proteins PROTEINS   File with protein IDs for which to predict all pairs, one per line; specify one of proteins or pairs
      --pairs PAIRS         File with candidate protein pairs to predict, one pair per line; specify one of proteins or pairs
-      --model MODEL         Pretrained Model. If this is a `.sav` or `.pt` file, it will be loaded. Otherwise, we will try to load `[model]` from HuggingFace hub [default: samsl/topsy_turvy_human_v1]
+      --model MODEL         Pretrained Model. If this is a `.sav` or `.pt` file, it will be loaded. Otherwise, we will try to load `[model]` from HuggingFace hub
+                            [default: samsl/topsy_turvy_human_v1]
      --embeddings EMBEDDINGS
                            h5 file with (a superset of) pre-embedded sequences. Generate with dscript embed.
      --foldseek_fasta FOLDSEEK_FASTA
-                            3di sequences in .fasta format. Can be generated using `dscript extract-3di. Default is None. If provided, TT3D will be run, otherwise default D-SCRIPT/TT will be run.
+                            3di sequences in .fasta format. Can be generated using `dscript extract-3di. Default is None. If provided, TT3D will be run, otherwise default
+                            D-SCRIPT/TT will be run.
      -o OUTFILE, --outfile OUTFILE
                            File for predictions
      -d DEVICE, --device DEVICE
-                            The index of a compute device (GPU) to use, or -1 to use all. To use more than one but less than all available GPUs, set CUDA_VISIBLE_DEVICES beforehand and then set d=-1.
+                            Compute device to use. Options: 'cpu', 'all' (all GPUs), or GPU index (0, 1, 2, etc.). To use specific GPUs, set CUDA_VISIBLE_DEVICES
+                            beforehand and use 'all'. [default: all]
      --store_cmaps         Store contact maps for predicted pairs above `--thresh` in an h5 file
      --thresh THRESH       Positive prediction threshold - used to store contact maps and predictions in a separate file. [default: 0.5]
      --load_proc LOAD_PROC
-                            Number of processes to use when loading embeddings (-1 = # of available CPUs, default=16). Because loading is IO-bound, values larger that the # of CPUs are allowed.
-      --blocks BLOCKS       Number of equal-sized blocks to split proteins into. In the multi-block case, maximum (embedding) memory usage should be 3 blocks' worth. When multiple GPUs are used, memory usage may briefly be higher when different GPUs are working on tasks from different
-                            blocks. And, small blocks may lead to occasional brief hangs with multiple GPUs. Default 1.
-      --sparse_loading      Load only the proteins required from each block, but do not reuse loaded blocks in memory. Recommented when predicting with many blocks on sparse pairs, such that many pairs of blocks might contain no pairs of proteins of interest. Only available when blocks >
-                            1 and pairs specified. Maximum (embedding) memory usage with this option is 4 blocks' worth.
+                            Number of processes to use when loading embeddings (-1 = # of available CPUs, default=16). Because loading is IO-bound, values larger that the
+                            # of CPUs are allowed.
+      --blocks BLOCKS       Number of equal-sized blocks to split proteins into. In the multi-block case, maximum (embedding) memory usage should be 3 blocks' worth. When
+                            multiple GPUs are used, memory usage may briefly be higher when different GPUs are working on tasks from different blocks. And, small blocks
+                            may lead to occasional brief hangs with multiple GPUs. Default 1.
+      --sparse_loading      Load only the proteins required from each block, but do not reuse loaded blocks in memory. Recommented when predicting with many blocks on
+                            sparse pairs, such that many pairs of blocks might contain no pairs of proteins of interest. Only available when blocks > 1 and pairs
+                            specified. Maximum (embedding) memory usage with this option is 4 blocks' worth.

 Bipartite, Multi-GPU Prediction
 ~~~~~~~~~~
--- a/dscript/commands/par_worker.py
+++ b/dscript/commands/par_worker.py
@@ -26,12 +26,14 @@ def _predict(
            file=None,  # If None, will be printed
            print_also=True,
        )
+        use_cuda = False
    else:
        log(
            f"Using CUDA device {device.index} - {torch.cuda.get_device_name(device)}",
            file=None,  # If None, will be printed
            print_also=True,
        )
+        use_cuda = True
    # Load Model
    try:
        if modelPath.endswith(".sav") or modelPath.endswith(".pt"):
@@ -39,14 +41,14 @@ def _predict(
            model = torch.load(
                modelPath, map_location=torch.device(device), weights_only=False
            )  # Check moved to main
-            model.use_cuda = True
+            model.use_cuda = use_cuda
        else:
            logger.debug(f"Loading model from {modelPath} on device {device}.")
            # Safe to call concurrently - see https://github.com/huggingface/huggingface_hub/pull/2534
            # Prefer to download here (will only download once) for concurrency
            model = DSCRIPTModel.from_pretrained(modelPath, use_cuda=True)
            model = model.to(device=device)
-            model.use_cuda = True
+            model.use_cuda = use_cuda
    except Exception as e:
        log(f"Model {modelPath} failed: {e}", file=None, print_also=True)
        sys.exit(7)
--- a/dscript/commands/predict_block.py
+++ b/dscript/commands/predict_block.py
@@ -153,31 +153,38 @@ def main(args):
    if device_arg.lower() == "cpu":
        device = "cpu"
        use_cuda = False
+        n_gpu = 1
    elif device_arg.lower() == "all":
        device = -1  # Use all GPUs
        use_cuda = True
+    elif device_arg.isdigit(): #Allow only nonnegative integers
+        device = int(device_arg)
+        use_cuda = True
    else:
-        try:
-            device = int(device_arg)
-            use_cuda = True
-        except ValueError:
-            log(
-                f"Invalid device argument: {device_arg}. Use 'cpu', 'all', or a GPU index.",
-                file=logFile,
-                print_also=True,
-            )
-            logFile.close()
-            sys.exit(7)
-
-    # Validate CUDA availability if GPU requested
-    if use_cuda and not torch.cuda.is_available():
        log(
-            "CUDA not available but GPU requested. Use --device cpu for CPU execution.",
+            f"Invalid device argument: {device_arg}. Use 'cpu', 'all', or a GPU index.",
            file=logFile,
            print_also=True,
        )
        logFile.close()
        sys.exit(1)
+    # Validate CUDA availability and device index if GPU requested
+    if use_cuda:
+        if not torch.cuda.is_available():
+            log(
+                "CUDA not available but GPU requested. Use --device cpu for CPU execution.",
+                file=logFile,
+                print_also=True,
+            )
+            logFile.close()
+            sys.exit(1)
+        n_gpu = torch.cuda.device_count()
+        if device >= n_gpu:
+            log(
+                f"Invalid device argument: {device_arg} exceeds the number of GPUs available, which is {n_gpu}. Please specify a valid GPU, or use --device cpu for CPU execution.", file=logFile, 
+                print_also=True,
+            )
+

    threshold = args.thresh
    foldseek_fasta = args.foldseek_fasta
@@ -298,8 +305,7 @@ def main(args):
    # This uses the pytorch spawn function to start a bunch of processes using spawn
    # Apparently, spawn (method) is required when using CUDA in the processes

-    if use_cuda and device < 0:  # Use all GPUs
-        n_gpu = torch.cuda.device_count()
+    if device == -1:  # Use all GPUs
        _ = mp.spawn(
            _predict,
            args=(
@@ -313,27 +319,11 @@ def main(args):
            nprocs=n_gpu,
            join=False,
        )
-    elif use_cuda:  # Use specific GPU
+    else:  # Use CPU or specific GPU
        p = mp.Process(
            target=_predict,
            args=(
-                device,
-                modelPath,
-                input_queue,
-                output_queue,
-                args.store_cmaps,
-                use_fs,
-                pair_done_queue,
-            ),
-        )
-        p.start()
-        n_gpu = 1
-
-    if not use_cuda:  # CPU execution
-        p = mp.Process(
-            target=_predict,
-            args=(
-                "cpu",
+                device, #"cpu" for CPU, or an index for a GPU
                modelPath,
                input_queue,
                output_queue,