Improve CPU mode

This commit is contained in:
Daniel E. Schaffer
2025-07-21 18:58:51 -04:00
parent cd10a97607
commit f13926a718
3 changed files with 46 additions and 47 deletions

View File

@@ -49,31 +49,38 @@ Blocked, Multi-GPU Prediction
.. code-block:: bash
usage: dscript predict [-h] [--proteins PROTEINS] [--pairs PAIRS] [--model MODEL] --embeddings EMBEDDINGS [--foldseek_fasta FOLDSEEK_FASTA] [-o OUTFILE] [-d DEVICE] [--store_cmaps] [--thresh THRESH] [--load_proc LOAD_PROC] [--blocks BLOCKS] [--sparse_loading]
usage: dscript predict [-h] [--proteins PROTEINS] [--pairs PAIRS] [--model MODEL] --embeddings EMBEDDINGS [--foldseek_fasta FOLDSEEK_FASTA] [-o OUTFILE] [-d DEVICE]
[--store_cmaps] [--thresh THRESH] [--load_proc LOAD_PROC] [--blocks BLOCKS] [--sparse_loading]
Make new predictions with a pre-trained model using blocked, multi-GPU pariwise inference. One of --proteins and --pairs is required.
options:
-h, --help show this help message and exit
--proteins PROTEINS File with protein IDs for which to predict all pairs, one per line; specify one of proteins or pairs
--pairs PAIRS File with candidate protein pairs to predict, one pair per line; specify one of proteins or pairs
--model MODEL Pretrained Model. If this is a `.sav` or `.pt` file, it will be loaded. Otherwise, we will try to load `[model]` from HuggingFace hub [default: samsl/topsy_turvy_human_v1]
--model MODEL Pretrained Model. If this is a `.sav` or `.pt` file, it will be loaded. Otherwise, we will try to load `[model]` from HuggingFace hub
[default: samsl/topsy_turvy_human_v1]
--embeddings EMBEDDINGS
h5 file with (a superset of) pre-embedded sequences. Generate with dscript embed.
--foldseek_fasta FOLDSEEK_FASTA
3di sequences in .fasta format. Can be generated using `dscript extract-3di. Default is None. If provided, TT3D will be run, otherwise default D-SCRIPT/TT will be run.
3di sequences in .fasta format. Can be generated using `dscript extract-3di. Default is None. If provided, TT3D will be run, otherwise default
D-SCRIPT/TT will be run.
-o OUTFILE, --outfile OUTFILE
File for predictions
-d DEVICE, --device DEVICE
The index of a compute device (GPU) to use, or -1 to use all. To use more than one but less than all available GPUs, set CUDA_VISIBLE_DEVICES beforehand and then set d=-1.
Compute device to use. Options: 'cpu', 'all' (all GPUs), or GPU index (0, 1, 2, etc.). To use specific GPUs, set CUDA_VISIBLE_DEVICES
beforehand and use 'all'. [default: all]
--store_cmaps Store contact maps for predicted pairs above `--thresh` in an h5 file
--thresh THRESH Positive prediction threshold - used to store contact maps and predictions in a separate file. [default: 0.5]
--load_proc LOAD_PROC
Number of processes to use when loading embeddings (-1 = # of available CPUs, default=16). Because loading is IO-bound, values larger that the # of CPUs are allowed.
--blocks BLOCKS Number of equal-sized blocks to split proteins into. In the multi-block case, maximum (embedding) memory usage should be 3 blocks' worth. When multiple GPUs are used, memory usage may briefly be higher when different GPUs are working on tasks from different
blocks. And, small blocks may lead to occasional brief hangs with multiple GPUs. Default 1.
--sparse_loading Load only the proteins required from each block, but do not reuse loaded blocks in memory. Recommented when predicting with many blocks on sparse pairs, such that many pairs of blocks might contain no pairs of proteins of interest. Only available when blocks >
1 and pairs specified. Maximum (embedding) memory usage with this option is 4 blocks' worth.
Number of processes to use when loading embeddings (-1 = # of available CPUs, default=16). Because loading is IO-bound, values larger that the
# of CPUs are allowed.
--blocks BLOCKS Number of equal-sized blocks to split proteins into. In the multi-block case, maximum (embedding) memory usage should be 3 blocks' worth. When
multiple GPUs are used, memory usage may briefly be higher when different GPUs are working on tasks from different blocks. And, small blocks
may lead to occasional brief hangs with multiple GPUs. Default 1.
--sparse_loading Load only the proteins required from each block, but do not reuse loaded blocks in memory. Recommented when predicting with many blocks on
sparse pairs, such that many pairs of blocks might contain no pairs of proteins of interest. Only available when blocks > 1 and pairs
specified. Maximum (embedding) memory usage with this option is 4 blocks' worth.
Bipartite, Multi-GPU Prediction
~~~~~~~~~~

View File

@@ -26,12 +26,14 @@ def _predict(
file=None, # If None, will be printed
print_also=True,
)
use_cuda = False
else:
log(
f"Using CUDA device {device.index} - {torch.cuda.get_device_name(device)}",
file=None, # If None, will be printed
print_also=True,
)
use_cuda = True
# Load Model
try:
if modelPath.endswith(".sav") or modelPath.endswith(".pt"):
@@ -39,14 +41,14 @@ def _predict(
model = torch.load(
modelPath, map_location=torch.device(device), weights_only=False
) # Check moved to main
model.use_cuda = True
model.use_cuda = use_cuda
else:
logger.debug(f"Loading model from {modelPath} on device {device}.")
# Safe to call concurrently - see https://github.com/huggingface/huggingface_hub/pull/2534
# Prefer to download here (will only download once) for concurrency
model = DSCRIPTModel.from_pretrained(modelPath, use_cuda=True)
model = model.to(device=device)
model.use_cuda = True
model.use_cuda = use_cuda
except Exception as e:
log(f"Model {modelPath} failed: {e}", file=None, print_also=True)
sys.exit(7)

View File

@@ -153,31 +153,38 @@ def main(args):
if device_arg.lower() == "cpu":
device = "cpu"
use_cuda = False
n_gpu = 1
elif device_arg.lower() == "all":
device = -1 # Use all GPUs
use_cuda = True
elif device_arg.isdigit(): #Allow only nonnegative integers
device = int(device_arg)
use_cuda = True
else:
try:
device = int(device_arg)
use_cuda = True
except ValueError:
log(
f"Invalid device argument: {device_arg}. Use 'cpu', 'all', or a GPU index.",
file=logFile,
print_also=True,
)
logFile.close()
sys.exit(7)
# Validate CUDA availability if GPU requested
if use_cuda and not torch.cuda.is_available():
log(
"CUDA not available but GPU requested. Use --device cpu for CPU execution.",
f"Invalid device argument: {device_arg}. Use 'cpu', 'all', or a GPU index.",
file=logFile,
print_also=True,
)
logFile.close()
sys.exit(1)
# Validate CUDA availability and device index if GPU requested
if use_cuda:
if not torch.cuda.is_available():
log(
"CUDA not available but GPU requested. Use --device cpu for CPU execution.",
file=logFile,
print_also=True,
)
logFile.close()
sys.exit(1)
n_gpu = torch.cuda.device_count()
if device >= n_gpu:
log(
f"Invalid device argument: {device_arg} exceeds the number of GPUs available, which is {n_gpu}. Please specify a valid GPU, or use --device cpu for CPU execution.", file=logFile,
print_also=True,
)
threshold = args.thresh
foldseek_fasta = args.foldseek_fasta
@@ -298,8 +305,7 @@ def main(args):
# This uses the pytorch spawn function to start a bunch of processes using spawn
# Apparently, spawn (method) is required when using CUDA in the processes
if use_cuda and device < 0: # Use all GPUs
n_gpu = torch.cuda.device_count()
if device == -1: # Use all GPUs
_ = mp.spawn(
_predict,
args=(
@@ -313,27 +319,11 @@ def main(args):
nprocs=n_gpu,
join=False,
)
elif use_cuda: # Use specific GPU
else: # Use CPU or specific GPU
p = mp.Process(
target=_predict,
args=(
device,
modelPath,
input_queue,
output_queue,
args.store_cmaps,
use_fs,
pair_done_queue,
),
)
p.start()
n_gpu = 1
if not use_cuda: # CPU execution
p = mp.Process(
target=_predict,
args=(
"cpu",
device, #"cpu" for CPU, or an index for a GPU
modelPath,
input_queue,
output_queue,