mirror of
https://github.com/dmlc/dgl.git
synced 2026-06-04 19:44:23 +08:00
[GraphBolt][CUDA] Optimize CopyTo performance. (#7634)
This commit is contained in:
committed by
GitHub
parent
5c902cd59e
commit
dfd491568d
@@ -369,9 +369,6 @@ class CopyTo(IterDataPipe):
|
||||
|
||||
def __iter__(self):
|
||||
for data in self.datapipe:
|
||||
if self.non_blocking:
|
||||
# The copy is non blocking only if contents of data are pinned.
|
||||
assert data.is_pinned(), f"{data} should be pinned."
|
||||
yield recursive_apply(
|
||||
data, apply_to, self.device, self.non_blocking
|
||||
)
|
||||
|
||||
@@ -231,11 +231,10 @@ class DataLoader(torch_data.DataLoader):
|
||||
datapipe_graph = dp_utils.replace_dp(
|
||||
datapipe_graph,
|
||||
copier,
|
||||
copier.datapipe.transform(
|
||||
lambda x: x.pin_memory()
|
||||
).prefetch(2)
|
||||
# After the data gets pinned, we can copy non_blocking.
|
||||
.copy_to(copier.device, non_blocking=True),
|
||||
# Add prefetch so that CPU and GPU can run concurrently.
|
||||
copier.datapipe.prefetch(2).copy_to(
|
||||
copier.device, non_blocking=True
|
||||
),
|
||||
)
|
||||
|
||||
# The stages after feature fetching is still done in the main process.
|
||||
|
||||
Reference in New Issue
Block a user