[TestFailure] Resolving exceptions thrown across multiple GraphBolt tests. (#7852)

2026-06-03 19:34:33 +08:00 · 2025-01-07 22:05:34 -08:00
parent 275183b16b
commit 540dd2ba4d
53 changed files with 301 additions and 248 deletions
--- a/dglgo/dglgo/apply_pipeline/graphpred/gen.py
+++ b/dglgo/dglgo/apply_pipeline/graphpred/gen.py
@@ -63,7 +63,7 @@ class ApplyGraphpredPipeline(PipelineBase):
            cpt: str = typer.Option(..., help="input checkpoint file path"),
        ):
            # Training configuration
-            train_cfg = torch.load(cpt)["cfg"]
+            train_cfg = torch.load(cpt, weights_only=False)["cfg"]
            if data is None:
                print("data is not specified, use the training dataset")
                data = train_cfg["data_name"]
@@ -119,7 +119,9 @@ class ApplyGraphpredPipeline(PipelineBase):
        cls.user_cfg_cls(**user_cfg_dict)

        # Training configuration
-        train_cfg = torch.load(user_cfg_dict["cpt_path"])["cfg"]
+        train_cfg = torch.load(user_cfg_dict["cpt_path"], weights_only=False)[
+            "cfg"
+        ]

        # Dict for code rendering
        render_cfg = deepcopy(user_cfg_dict)
--- a/dglgo/dglgo/apply_pipeline/graphpred/graphpred.jinja-py
+++ b/dglgo/dglgo/apply_pipeline/graphpred/graphpred.jinja-py
@@ -57,7 +57,7 @@ def main():
        data to have the same number of input edge features, got {:d} and {:d}'.format(model_edge_feat_size, data_edge_feat_size)

    model = {{ model_class_name }}(**cfg['model'])
-    model.load_state_dict(torch.load(cfg['cpt_path'], map_location='cpu')['model'])
+    model.load_state_dict(torch.load(cfg['cpt_path'], weights_only=False, map_location='cpu')['model'])
    pred = infer(device, data_loader, model).detach().cpu()

    # Dump the results
--- a/dglgo/dglgo/apply_pipeline/nodepred/gen.py
+++ b/dglgo/dglgo/apply_pipeline/nodepred/gen.py
@@ -48,7 +48,7 @@ class ApplyNodepredPipeline(PipelineBase):
            cpt: str = typer.Option(..., help="input checkpoint file path"),
        ):
            # Training configuration
-            train_cfg = torch.load(cpt)["cfg"]
+            train_cfg = torch.load(cpt, weights_only=False)["cfg"]
            if data is None:
                print("data is not specified, use the training dataset")
                data = train_cfg["data_name"]
@@ -101,7 +101,9 @@ class ApplyNodepredPipeline(PipelineBase):
        cls.user_cfg_cls(**user_cfg_dict)

        # Training configuration
-        train_cfg = torch.load(user_cfg_dict["cpt_path"])["cfg"]
+        train_cfg = torch.load(user_cfg_dict["cpt_path"], weights_only=False)[
+            "cfg"
+        ]

        # Dict for code rendering
        render_cfg = deepcopy(user_cfg_dict)
--- a/dglgo/dglgo/apply_pipeline/nodepred/nodepred.jinja-py
+++ b/dglgo/dglgo/apply_pipeline/nodepred/nodepred.jinja-py
@@ -48,7 +48,7 @@ def main():
                features, got {:d} and {:d}'.format(model_in_size, data_in_size)

    model = {{ model_class_name }}(**cfg['model'])
-    model.load_state_dict(torch.load(cfg['cpt_path'], map_location='cpu')['model'])
+    model.load_state_dict(torch.load(cfg['cpt_path'], weights_only=False, map_location='cpu')['model'])
    logits = infer(device, data, model)
    pred = logits.argmax(dim=1).cpu()

--- a/dglgo/dglgo/apply_pipeline/nodepred_sample/gen.py
+++ b/dglgo/dglgo/apply_pipeline/nodepred_sample/gen.py
@@ -48,7 +48,7 @@ class ApplyNodepredNsPipeline(PipelineBase):
            cpt: str = typer.Option(..., help="input checkpoint file path"),
        ):
            # Training configuration
-            train_cfg = torch.load(cpt)["cfg"]
+            train_cfg = torch.load(cpt, weights_only=False)["cfg"]
            if data is None:
                print("data is not specified, use the training dataset")
                data = train_cfg["data_name"]
@@ -101,7 +101,9 @@ class ApplyNodepredNsPipeline(PipelineBase):
        cls.user_cfg_cls(**user_cfg_dict)

        # Training configuration
-        train_cfg = torch.load(user_cfg_dict["cpt_path"])["cfg"]
+        train_cfg = torch.load(user_cfg_dict["cpt_path"], weights_only=False)[
+            "cfg"
+        ]

        # Dict for code rendering
        render_cfg = deepcopy(user_cfg_dict)
--- a/dglgo/dglgo/apply_pipeline/nodepred_sample/nodepred-ns.jinja-py
+++ b/dglgo/dglgo/apply_pipeline/nodepred_sample/nodepred-ns.jinja-py
@@ -48,7 +48,7 @@ def main():
                features, got {:d} and {:d}'.format(model_in_size, data_in_size)

    model = {{ model_class_name }}(**cfg['model'])
-    model.load_state_dict(torch.load(cfg['cpt_path'], map_location='cpu')['model'])
+    model.load_state_dict(torch.load(cfg['cpt_path'], weights_only=False, map_location='cpu')['model'])
    logits = infer(device, data, model)
    pred = logits.argmax(dim=1).cpu()

--- a/dglgo/dglgo/pipeline/graphpred/graphpred.jinja-py
+++ b/dglgo/dglgo/pipeline/graphpred/graphpred.jinja-py
@@ -101,7 +101,7 @@ def main(run, cfg, data):
        else:
            lr_scheduler.step()

-    model.load_state_dict(torch.load(tmp_cpt_path))
+    model.load_state_dict(torch.load(tmp_cpt_path, weights_only=False))
    os.remove(tmp_cpt_path)
    test_metric = evaluate(device, test_loader, model)
    print('Test Metric: {:.4f}'.format(test_metric))
--- a/dglgo/dglgo/pipeline/nodepred/nodepred.jinja-py
+++ b/dglgo/dglgo/pipeline/nodepred/nodepred.jinja-py
@@ -42,7 +42,7 @@ class EarlyStopping:
        torch.save(model.state_dict(), self.checkpoint_path)

    def load_checkpoint(self, model):
-        model.load_state_dict(torch.load(self.checkpoint_path))
+        model.load_state_dict(torch.load(self.checkpoint_path, weights_only=False))

    def close(self):
        os.remove(self.checkpoint_path)
--- a/dglgo/dglgo/pipeline/nodepred_sample/nodepred-ns.jinja-py
+++ b/dglgo/dglgo/pipeline/nodepred_sample/nodepred-ns.jinja-py
@@ -42,7 +42,7 @@ class EarlyStopping:
        torch.save(model.state_dict(), self.checkpoint_path)

    def load_checkpoint(self, model):
-        model.load_state_dict(torch.load(self.checkpoint_path))
+        model.load_state_dict(torch.load(self.checkpoint_path, weights_only=False))

    def close(self):
        os.remove(self.checkpoint_path)
--- a/dglgo/dglgo/utils/early_stop.py
+++ b/dglgo/dglgo/utils/early_stop.py
@@ -34,4 +34,6 @@ class EarlyStopping:
        torch.save(model.state_dict(), self.checkpoint_path)

    def load_checkpoint(self, model):
-        model.load_state_dict(torch.load(self.checkpoint_path))
+        model.load_state_dict(
+            torch.load(self.checkpoint_path, weights_only=False)
+        )
--- a/examples/pytorch/GNN-FiLM/main.py
+++ b/examples/pytorch/GNN-FiLM/main.py
@@ -194,7 +194,9 @@ def main(args):
    model.eval()
    test_loss = []
    test_f1 = []
-    model.load_state_dict(torch.load(os.path.join(args.save_dir, args.name)))
+    model.load_state_dict(
+        torch.load(os.path.join(args.save_dir, args.name), weights_only=False)
+    )
    with torch.no_grad():
        for batch in test_set:
            g = batch.graph
--- a/examples/pytorch/TAHIN/main.py
+++ b/examples/pytorch/TAHIN/main.py
@@ -111,7 +111,9 @@ def main(args):
    # test use the best model
    model.eval()
    with torch.no_grad():
-        model.load_state_dict(torch.load("TAHIN" + "_" + args.dataset))
+        model.load_state_dict(
+            torch.load("TAHIN" + "_" + args.dataset, weights_only=False)
+        )
        test_loss = []
        test_acc = []
        test_auc = []
--- a/examples/pytorch/argo/main.py
+++ b/examples/pytorch/argo/main.py
@@ -180,7 +180,7 @@ def train(

    PATH = "model.pt"
    if counter[0] != 0:
-        checkpoint = torch.load(PATH)
+        checkpoint = torch.load(PATH, weights_only=False)
        model.load_state_dict(checkpoint["model_state_dict"])
        opt.load_state_dict(checkpoint["optimizer_state_dict"])
        epoch = checkpoint["epoch"]
--- a/examples/pytorch/correct_and_smooth/main.py
+++ b/examples/pytorch/correct_and_smooth/main.py
@@ -66,7 +66,9 @@ def main():
    if args.pretrain:
        print("---------- Before ----------")
        model.load_state_dict(
-            torch.load(f"base/{args.dataset}-{args.model}.pt")
+            torch.load(
+                f"base/{args.dataset}-{args.model}.pt", weights_only=False
+            )
        )
        model.eval()

--- a/examples/pytorch/dgi/train.py
+++ b/examples/pytorch/dgi/train.py
@@ -126,7 +126,7 @@ def main(args):

    # train classifier
    print("Loading {}th epoch".format(best_t))
-    dgi.load_state_dict(torch.load("best_dgi.pkl"))
+    dgi.load_state_dict(torch.load("best_dgi.pkl", weights_only=False))
    embeds = dgi.encoder(features, corrupt=False)
    embeds = embeds.detach()
    mean = 0
--- a/examples/pytorch/diffpool/train.py
+++ b/examples/pytorch/diffpool/train.py
@@ -223,7 +223,8 @@ def graph_classify_task(prog_args):
                + "/"
                + prog_args.dataset
                + "/model.iter-"
-                + str(prog_args.load_epoch)
+                + str(prog_args.load_epoch),
+                weights_only=False,
            )
        )

@@ -334,7 +335,8 @@ def evaluate(dataloader, model, prog_args, logger=None):
                + "/"
                + prog_args.dataset
                + "/model.iter-"
-                + str(logger["best_epoch"])
+                + str(logger["best_epoch"]),
+                weights_only=False,
            )
        )
    model.eval()
--- a/examples/pytorch/dimenet/main.py
+++ b/examples/pytorch/dimenet/main.py
@@ -238,7 +238,9 @@ def main(model_cnf):
    if pretrain_params["flag"]:
        torch_path = pretrain_params["path"]
        target = model_params["targets"][0]
-        model.load_state_dict(torch.load(f"{torch_path}/{target}.pt"))
+        model.load_state_dict(
+            torch.load(f"{torch_path}/{target}.pt", weights_only=False)
+        )

        logger.info("Testing with Pretrained model")
        predictions, labels = evaluate(device, model, test_loader)
--- a/examples/pytorch/gatv2/train.py
+++ b/examples/pytorch/gatv2/train.py
@@ -178,7 +178,9 @@ def main(args):

    print()
    if args.early_stop:
-        model.load_state_dict(torch.load("es_checkpoint.pt"))
+        model.load_state_dict(
+            torch.load("es_checkpoint.pt", weights_only=False)
+        )
    acc = evaluate(g, model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))

--- a/examples/pytorch/graphsaint/train_sampling.py
+++ b/examples/pytorch/graphsaint/train_sampling.py
@@ -214,7 +214,10 @@ def main(args, task):
    # test
    if args.use_val:
        model.load_state_dict(
-            torch.load(os.path.join(log_dir, "best_model_{}.pkl".format(task)))
+            torch.load(
+                os.path.join(log_dir, "best_model_{}.pkl".format(task)),
+                weights_only=False,
+            )
        )
    if cpu_flag and cuda:
        model = model.to("cpu")
--- a/examples/pytorch/graphwriter/train.py
+++ b/examples/pytorch/graphwriter/train.py
@@ -161,7 +161,7 @@ def main(args):
    model = GraphWriter(args)
    model.to(args.device)
    if args.test:
-        model = torch.load(args.save_model)
+        model = torch.load(args.save_model, weights_only=False)
        model.args = args
        print(model)
        test(model, test_dataloader, args)
--- a/examples/pytorch/han/utils.py
+++ b/examples/pytorch/han/utils.py
@@ -304,4 +304,4 @@ class EarlyStopping(object):

    def load_checkpoint(self, model):
        """Load the latest checkpoint."""
-        model.load_state_dict(torch.load(self.filename))
+        model.load_state_dict(torch.load(self.filename, weights_only=False))
--- a/examples/pytorch/hardgat/train.py
+++ b/examples/pytorch/hardgat/train.py
@@ -154,7 +154,9 @@ def main(args):

    print()
    if args.early_stop:
-        model.load_state_dict(torch.load("es_checkpoint.pt"))
+        model.load_state_dict(
+            torch.load("es_checkpoint.pt", weights_only=False)
+        )
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))

--- a/examples/pytorch/hilander/PSS/Smooth_AP/src/netlib.py
+++ b/examples/pytorch/hilander/PSS/Smooth_AP/src/netlib.py
@@ -71,7 +71,9 @@ def networkselect(opt):
        raise Exception("Network {} not available!".format(opt.arch))

    if opt.resume:
-        weights = torch.load(os.path.join(opt.save_path, opt.resume))
+        weights = torch.load(
+            os.path.join(opt.save_path, opt.resume), weights_only=False
+        )
        weights_state_dict = weights["state_dict"]

        if torch.cuda.device_count() > 1:
--- a/examples/pytorch/hilander/PSS/test_subg_inat.py
+++ b/examples/pytorch/hilander/PSS/test_subg_inat.py
@@ -173,7 +173,7 @@ if not args.use_gt:
        use_cluster_feat=args.use_cluster_feat,
        use_focal_loss=args.use_focal_loss,
    )
-    model.load_state_dict(torch.load(args.model_filename))
+    model.load_state_dict(torch.load(args.model_filename, weights_only=False))
    model = model.to(device)
    model.eval()

--- a/examples/pytorch/hilander/test.py
+++ b/examples/pytorch/hilander/test.py
@@ -83,7 +83,7 @@ if not args.use_gt:
        use_cluster_feat=args.use_cluster_feat,
        use_focal_loss=args.use_focal_loss,
    )
-    model.load_state_dict(torch.load(args.model_filename))
+    model.load_state_dict(torch.load(args.model_filename, weights_only=False))
    model = model.to(device)
    model.eval()

--- a/examples/pytorch/hilander/test_subg.py
+++ b/examples/pytorch/hilander/test_subg.py
@@ -104,7 +104,7 @@ if not args.use_gt:
        use_cluster_feat=args.use_cluster_feat,
        use_focal_loss=args.use_focal_loss,
    )
-    model.load_state_dict(torch.load(args.model_filename))
+    model.load_state_dict(torch.load(args.model_filename, weights_only=False))
    model = model.to(device)
    model.eval()

--- a/examples/pytorch/jtnn/vaetrain_dgl.py
+++ b/examples/pytorch/jtnn/vaetrain_dgl.py
@@ -54,7 +54,7 @@ lr = float(opts.lr)
 model = DGLJTNNVAE(vocab, hidden_size, latent_size, depth)

 if opts.model_path is not None:
-    model.load_state_dict(torch.load(opts.model_path))
+    model.load_state_dict(torch.load(opts.model_path, weights_only=False))
 else:
    for param in model.parameters():
        if param.dim() == 1:
--- a/examples/pytorch/lda/lda_model.py
+++ b/examples/pytorch/lda/lda_model.py
@@ -496,6 +496,6 @@ if __name__ == "__main__":
    with io.BytesIO() as f:
        model.save(f)
        f.seek(0)
-        print(torch.load(f))
+        print(torch.load(f, weights_only=False))

    print("Testing LatentDirichletAllocation passed!")
--- a/examples/pytorch/ogb/ngnn_seal/main.py
+++ b/examples/pytorch/ogb/ngnn_seal/main.py
@@ -625,8 +625,12 @@ if __name__ == "__main__":
                        args.res_dir,
                        f"run{run+1}_optimizer_checkpoint{epoch}.pth",
                    )
-                    model.load_state_dict(torch.load(model_name))
-                    optimizer.load_state_dict(torch.load(optimizer_name))
+                    model.load_state_dict(
+                        torch.load(model_name, weights_only=False)
+                    )
+                    optimizer.load_state_dict(
+                        torch.load(optimizer_name, weights_only=False)
+                    )
                    tested[epoch] = (
                        test(final_val_loader, dataset.eval_metric)[
                            dataset.eval_metric
--- a/examples/pytorch/ogb/ogbn-arxiv/correct_and_smooth.py
+++ b/examples/pytorch/ogb/ogbn-arxiv/correct_and_smooth.py
@@ -179,7 +179,7 @@ def main():

    for pred_file in glob.iglob(args.pred_files):
        print("load:", pred_file)
-        pred = torch.load(pred_file)
+        pred = torch.load(pred_file, weights_only=False)
        val_acc, test_acc = run(
            args, graph, labels, pred, train_idx, val_idx, test_idx, evaluator
        )
--- a/examples/pytorch/ogb/ogbn-proteins/main_proteins_full_dgl.py
+++ b/examples/pytorch/ogb/ogbn-proteins/main_proteins_full_dgl.py
@@ -168,7 +168,7 @@ def main(args):
        if num_patient_epochs == args["patience"]:
            break

-    model.load_state_dict(torch.load(model_path))
+    model.load_state_dict(torch.load(model_path, weights_only=False))
    train_score, val_score, test_score = run_an_eval_epoch(
        graph, splitted_idx, model, evaluator
    )
--- a/examples/pytorch/ogb_lsc/MAG240M/train.py
+++ b/examples/pytorch/ogb_lsc/MAG240M/train.py
@@ -247,7 +247,7 @@ def test(args, dataset, g, feats, paper_offset):
        0.5,
        "paper",
    ).cuda()
-    model.load_state_dict(torch.load(args.model_path))
+    model.load_state_dict(torch.load(args.model_path, weights_only=False))

    model.eval()
    correct = total = 0
--- a/examples/pytorch/ogb_lsc/MAG240M/train_multi_gpus.py
+++ b/examples/pytorch/ogb_lsc/MAG240M/train_multi_gpus.py
@@ -304,7 +304,7 @@ def test(args, dataset, g, feats, paper_offset):
    ).cuda()

    # load ddp's model parameters, we need to remove the name of 'module.'
-    state_dict = torch.load(args.model_path)
+    state_dict = torch.load(args.model_path, weights_only=False)
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k[7:]
--- a/examples/pytorch/ogb_lsc/PCQM4M/test_inference.py
+++ b/examples/pytorch/ogb_lsc/PCQM4M/test_inference.py
@@ -206,7 +206,7 @@ def main():
        raise RuntimeError(f"Checkpoint file not found at {checkpoint_path}")

    ## reading in checkpoint
-    checkpoint = torch.load(checkpoint_path)
+    checkpoint = torch.load(checkpoint_path, weights_only=False)
    model.load_state_dict(checkpoint["model_state_dict"])

    print("Predicting on test data...")
--- a/examples/pytorch/pointcloud/bipointnet/train_cls.py
+++ b/examples/pytorch/pointcloud/bipointnet/train_cls.py
@@ -136,7 +136,9 @@ elif args.model == "bipointnet2_ssg":

 net = net.to(dev)
 if args.load_model_path:
-    net.load_state_dict(torch.load(args.load_model_path, map_location=dev))
+    net.load_state_dict(
+        torch.load(args.load_model_path, weights_only=False, map_location=dev)
+    )

 opt = optim.Adam(net.parameters(), lr=1e-3, weight_decay=1e-4)

--- a/examples/pytorch/pointcloud/edgeconv/main.py
+++ b/examples/pytorch/pointcloud/edgeconv/main.py
@@ -115,7 +115,9 @@ dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model = Model(20, [64, 64, 128, 256], [512, 512, 256], 40)
 model = model.to(dev)
 if args.load_model_path:
-    model.load_state_dict(torch.load(args.load_model_path, map_location=dev))
+    model.load_state_dict(
+        torch.load(args.load_model_path, weights_only=False, map_location=dev)
+    )

 opt = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)

--- a/examples/pytorch/pointcloud/pct/train_cls.py
+++ b/examples/pytorch/pointcloud/pct/train_cls.py
@@ -138,7 +138,9 @@ net = PointTransformerCLS()

 net = net.to(dev)
 if args.load_model_path:
-    net.load_state_dict(torch.load(args.load_model_path, map_location=dev))
+    net.load_state_dict(
+        torch.load(args.load_model_path, weights_only=False, map_location=dev)
+    )


 opt = torch.optim.SGD(
--- a/examples/pytorch/pointcloud/pct/train_partseg.py
+++ b/examples/pytorch/pointcloud/pct/train_partseg.py
@@ -181,7 +181,9 @@ net = PointTransformerSeg()

 net = net.to(dev)
 if args.load_model_path:
-    net.load_state_dict(torch.load(args.load_model_path, map_location=dev))
+    net.load_state_dict(
+        torch.load(args.load_model_path, weights_only=False, map_location=dev)
+    )

 opt = torch.optim.SGD(
    net.parameters(), lr=0.01, weight_decay=1e-4, momentum=0.9
--- a/examples/pytorch/pointcloud/point_transformer/train_cls.py
+++ b/examples/pytorch/pointcloud/point_transformer/train_cls.py
@@ -139,7 +139,9 @@ net = PointTransformerCLS(40, batch_size, feature_dim=6)

 net = net.to(dev)
 if args.load_model_path:
-    net.load_state_dict(torch.load(args.load_model_path, map_location=dev))
+    net.load_state_dict(
+        torch.load(args.load_model_path, weights_only=False, map_location=dev)
+    )

 if args.opt == "sgd":
    # The optimizer strategy described in paper:
--- a/examples/pytorch/pointcloud/point_transformer/train_partseg.py
+++ b/examples/pytorch/pointcloud/point_transformer/train_partseg.py
@@ -185,7 +185,9 @@ net = PointTransformerSeg(50, batch_size)

 net = net.to(dev)
 if args.load_model_path:
-    net.load_state_dict(torch.load(args.load_model_path, map_location=dev))
+    net.load_state_dict(
+        torch.load(args.load_model_path, weights_only=False, map_location=dev)
+    )

 if args.opt == "sgd":
    # The optimizer strategy described in paper:
--- a/examples/pytorch/pointcloud/pointnet/train_cls.py
+++ b/examples/pytorch/pointcloud/pointnet/train_cls.py
@@ -140,7 +140,9 @@ elif args.model == "pointnet2_msg":

 net = net.to(dev)
 if args.load_model_path:
-    net.load_state_dict(torch.load(args.load_model_path, map_location=dev))
+    net.load_state_dict(
+        torch.load(args.load_model_path, weights_only=False, map_location=dev)
+    )

 opt = optim.Adam(net.parameters(), lr=1e-3, weight_decay=1e-4)

--- a/examples/pytorch/pointcloud/pointnet/train_partseg.py
+++ b/examples/pytorch/pointcloud/pointnet/train_partseg.py
@@ -187,7 +187,9 @@ elif args.model == "pointnet2_msg":

 net = net.to(dev)
 if args.load_model_path:
-    net.load_state_dict(torch.load(args.load_model_path, map_location=dev))
+    net.load_state_dict(
+        torch.load(args.load_model_path, weights_only=False, map_location=dev)
+    )

 opt = optim.Adam(net.parameters(), lr=0.001, weight_decay=1e-4)
 scheduler = optim.lr_scheduler.StepLR(opt, step_size=20, gamma=0.5)
--- a/examples/pytorch/rgcn/link.py
+++ b/examples/pytorch/rgcn/link.py
@@ -336,7 +336,7 @@ if __name__ == "__main__":

    # testing
    print("Testing...")
-    checkpoint = torch.load(model_state_file)
+    checkpoint = torch.load(model_state_file, weights_only=False)
    model = model.cpu()  # test on CPU
    model.eval()
    model.load_state_dict(checkpoint["state_dict"])
--- a/examples/pytorch/rrn/sudoku_solver.py
+++ b/examples/pytorch/rrn/sudoku_solver.py
@@ -25,7 +25,9 @@ def solve_sudoku(puzzle):
        urllib.request.urlretrieve(url, model_filename)

    model = SudokuNN(num_steps=64, edge_drop=0.0)
-    model.load_state_dict(torch.load(model_filename, map_location="cpu"))
+    model.load_state_dict(
+        torch.load(model_filename, weights_only=False, map_location="cpu")
+    )
    model.eval()

    g = _basic_sudoku_graph()
--- a/examples/pytorch/rrn/train_sudoku.py
+++ b/examples/pytorch/rrn/train_sudoku.py
@@ -79,7 +79,7 @@ def main(args):
        if not os.path.exists(model_path):
            raise FileNotFoundError("Saved model not Found!")

-        model.load_state_dict(torch.load(model_path))
+        model.load_state_dict(torch.load(model_path, weights_only=False))
        model.to(device)

        test_dataloader = sudoku_dataloader(args.batch_size, segment="test")
--- a/examples/pytorch/stgcn_wave/main.py
+++ b/examples/pytorch/stgcn_wave/main.py
@@ -1,183 +1,180 @@
-import argparse
-import random
-
-import numpy as np
-import pandas as pd
-import scipy.sparse as sp
-import torch
-import torch.nn as nn
-from load_data import *
-from model import *
-from sensors2graph import *
-from sklearn.preprocessing import StandardScaler
-from utils import *
-
-import dgl
-
-parser = argparse.ArgumentParser(description="STGCN_WAVE")
-parser.add_argument("--lr", default=0.001, type=float, help="learning rate")
-parser.add_argument("--disablecuda", action="store_true", help="Disable CUDA")
-parser.add_argument(
-    "--batch_size",
-    type=int,
-    default=50,
-    help="batch size for training and validation (default: 50)",
-)
-parser.add_argument(
-    "--epochs", type=int, default=50, help="epochs for training  (default: 50)"
-)
-parser.add_argument(
-    "--num_layers", type=int, default=9, help="number of layers"
-)
-parser.add_argument("--window", type=int, default=144, help="window length")
-parser.add_argument(
-    "--sensorsfilepath",
-    type=str,
-    default="./data/sensor_graph/graph_sensor_ids.txt",
-    help="sensors file path",
-)
-parser.add_argument(
-    "--disfilepath",
-    type=str,
-    default="./data/sensor_graph/distances_la_2012.csv",
-    help="distance file path",
-)
-parser.add_argument(
-    "--tsfilepath", type=str, default="./data/metr-la.h5", help="ts file path"
-)
-parser.add_argument(
-    "--savemodelpath",
-    type=str,
-    default="stgcnwavemodel.pt",
-    help="save model path",
-)
-parser.add_argument(
-    "--pred_len",
-    type=int,
-    default=5,
-    help="how many steps away we want to predict",
-)
-parser.add_argument(
-    "--control_str",
-    type=str,
-    default="TNTSTNTST",
-    help="model strcture controller, T: Temporal Layer, S: Spatio Layer, N: Norm Layer",
-)
-parser.add_argument(
-    "--channels",
-    type=int,
-    nargs="+",
-    default=[1, 16, 32, 64, 32, 128],
-    help="model strcture controller, T: Temporal Layer, S: Spatio Layer, N: Norm Layer",
-)
-args = parser.parse_args()
-
-device = (
-    torch.device("cuda")
-    if torch.cuda.is_available() and not args.disablecuda
-    else torch.device("cpu")
-)
-
-with open(args.sensorsfilepath) as f:
-    sensor_ids = f.read().strip().split(",")
-
-distance_df = pd.read_csv(args.disfilepath, dtype={"from": "str", "to": "str"})
-
-adj_mx = get_adjacency_matrix(distance_df, sensor_ids)
-sp_mx = sp.coo_matrix(adj_mx)
-G = dgl.from_scipy(sp_mx)
-
-
-df = pd.read_hdf(args.tsfilepath)
-num_samples, num_nodes = df.shape
-
-tsdata = df.to_numpy()
-
-
-n_his = args.window
-
-save_path = args.savemodelpath
-
-
-n_pred = args.pred_len
-n_route = num_nodes
-blocks = args.channels
-# blocks = [1, 16, 32, 64, 32, 128]
-drop_prob = 0
-num_layers = args.num_layers
-
-batch_size = args.batch_size
-epochs = args.epochs
-lr = args.lr
-
-
-W = adj_mx
-len_val = round(num_samples * 0.1)
-len_train = round(num_samples * 0.7)
-train = df[:len_train]
-val = df[len_train : len_train + len_val]
-test = df[len_train + len_val :]
-
-scaler = StandardScaler()
-train = scaler.fit_transform(train)
-val = scaler.transform(val)
-test = scaler.transform(test)
-
-
-x_train, y_train = data_transform(train, n_his, n_pred, device)
-x_val, y_val = data_transform(val, n_his, n_pred, device)
-x_test, y_test = data_transform(test, n_his, n_pred, device)
-
-train_data = torch.utils.data.TensorDataset(x_train, y_train)
-train_iter = torch.utils.data.DataLoader(train_data, batch_size, shuffle=True)
-val_data = torch.utils.data.TensorDataset(x_val, y_val)
-val_iter = torch.utils.data.DataLoader(val_data, batch_size)
-test_data = torch.utils.data.TensorDataset(x_test, y_test)
-test_iter = torch.utils.data.DataLoader(test_data, batch_size)
-
-
-loss = nn.MSELoss()
-G = G.to(device)
-model = STGCN_WAVE(
-    blocks, n_his, n_route, G, drop_prob, num_layers, device, args.control_str
-).to(device)
-optimizer = torch.optim.RMSprop(model.parameters(), lr=lr)
-
-scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.7)
-
-min_val_loss = np.inf
-for epoch in range(1, epochs + 1):
-    l_sum, n = 0.0, 0
-    model.train()
-    for x, y in train_iter:
-        y_pred = model(x).view(len(x), -1)
-        l = loss(y_pred, y)
-        optimizer.zero_grad()
-        l.backward()
-        optimizer.step()
-        l_sum += l.item() * y.shape[0]
-        n += y.shape[0]
-    scheduler.step()
-    val_loss = evaluate_model(model, loss, val_iter)
-    if val_loss < min_val_loss:
-        min_val_loss = val_loss
-        torch.save(model.state_dict(), save_path)
-    print(
-        "epoch",
-        epoch,
-        ", train loss:",
-        l_sum / n,
-        ", validation loss:",
-        val_loss,
-    )
-
-
-best_model = STGCN_WAVE(
-    blocks, n_his, n_route, G, drop_prob, num_layers, device, args.control_str
-).to(device)
-best_model.load_state_dict(torch.load(save_path))
-
-
-l = evaluate_model(best_model, loss, test_iter)
-MAE, MAPE, RMSE = evaluate_metric(best_model, test_iter, scaler)
-print("test loss:", l, "\nMAE:", MAE, ", MAPE:", MAPE, ", RMSE:", RMSE)
+import argparse
+import random
+
+import numpy as np
+import pandas as pd
+import scipy.sparse as sp
+import torch
+import torch.nn as nn
+from load_data import *
+from model import *
+from sensors2graph import *
+from sklearn.preprocessing import StandardScaler
+from utils import *
+
+import dgl
+
+parser = argparse.ArgumentParser(description="STGCN_WAVE")
+parser.add_argument("--lr", default=0.001, type=float, help="learning rate")
+parser.add_argument("--disablecuda", action="store_true", help="Disable CUDA")
+parser.add_argument(
+    "--batch_size",
+    type=int,
+    default=50,
+    help="batch size for training and validation (default: 50)",
+)
+parser.add_argument(
+    "--epochs", type=int, default=50, help="epochs for training  (default: 50)"
+)
+parser.add_argument(
+    "--num_layers", type=int, default=9, help="number of layers"
+)
+parser.add_argument("--window", type=int, default=144, help="window length")
+parser.add_argument(
+    "--sensorsfilepath",
+    type=str,
+    default="./data/sensor_graph/graph_sensor_ids.txt",
+    help="sensors file path",
+)
+parser.add_argument(
+    "--disfilepath",
+    type=str,
+    default="./data/sensor_graph/distances_la_2012.csv",
+    help="distance file path",
+)
+parser.add_argument(
+    "--tsfilepath", type=str, default="./data/metr-la.h5", help="ts file path"
+)
+parser.add_argument(
+    "--savemodelpath",
+    type=str,
+    default="stgcnwavemodel.pt",
+    help="save model path",
+)
+parser.add_argument(
+    "--pred_len",
+    type=int,
+    default=5,
+    help="how many steps away we want to predict",
+)
+parser.add_argument(
+    "--control_str",
+    type=str,
+    default="TNTSTNTST",
+    help="model strcture controller, T: Temporal Layer, S: Spatio Layer, N: Norm Layer",
+)
+parser.add_argument(
+    "--channels",
+    type=int,
+    nargs="+",
+    default=[1, 16, 32, 64, 32, 128],
+    help="model strcture controller, T: Temporal Layer, S: Spatio Layer, N: Norm Layer",
+)
+args = parser.parse_args()
+
+device = (
+    torch.device("cuda")
+    if torch.cuda.is_available() and not args.disablecuda
+    else torch.device("cpu")
+)
+
+with open(args.sensorsfilepath) as f:
+    sensor_ids = f.read().strip().split(",")
+distance_df = pd.read_csv(args.disfilepath, dtype={"from": "str", "to": "str"})
+
+adj_mx = get_adjacency_matrix(distance_df, sensor_ids)
+sp_mx = sp.coo_matrix(adj_mx)
+G = dgl.from_scipy(sp_mx)
+
+
+df = pd.read_hdf(args.tsfilepath)
+num_samples, num_nodes = df.shape
+
+tsdata = df.to_numpy()
+
+
+n_his = args.window
+
+save_path = args.savemodelpath
+
+
+n_pred = args.pred_len
+n_route = num_nodes
+blocks = args.channels
+# blocks = [1, 16, 32, 64, 32, 128]
+drop_prob = 0
+num_layers = args.num_layers
+
+batch_size = args.batch_size
+epochs = args.epochs
+lr = args.lr
+
+
+W = adj_mx
+len_val = round(num_samples * 0.1)
+len_train = round(num_samples * 0.7)
+train = df[:len_train]
+val = df[len_train : len_train + len_val]
+test = df[len_train + len_val :]
+
+scaler = StandardScaler()
+train = scaler.fit_transform(train)
+val = scaler.transform(val)
+test = scaler.transform(test)
+
+
+x_train, y_train = data_transform(train, n_his, n_pred, device)
+x_val, y_val = data_transform(val, n_his, n_pred, device)
+x_test, y_test = data_transform(test, n_his, n_pred, device)
+
+train_data = torch.utils.data.TensorDataset(x_train, y_train)
+train_iter = torch.utils.data.DataLoader(train_data, batch_size, shuffle=True)
+val_data = torch.utils.data.TensorDataset(x_val, y_val)
+val_iter = torch.utils.data.DataLoader(val_data, batch_size)
+test_data = torch.utils.data.TensorDataset(x_test, y_test)
+test_iter = torch.utils.data.DataLoader(test_data, batch_size)
+
+
+loss = nn.MSELoss()
+G = G.to(device)
+model = STGCN_WAVE(
+    blocks, n_his, n_route, G, drop_prob, num_layers, device, args.control_str
+).to(device)
+optimizer = torch.optim.RMSprop(model.parameters(), lr=lr)
+
+scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.7)
+
+min_val_loss = np.inf
+for epoch in range(1, epochs + 1):
+    l_sum, n = 0.0, 0
+    model.train()
+    for x, y in train_iter:
+        y_pred = model(x).view(len(x), -1)
+        l = loss(y_pred, y)
+        optimizer.zero_grad()
+        l.backward()
+        optimizer.step()
+        l_sum += l.item() * y.shape[0]
+        n += y.shape[0]
+    scheduler.step()
+    val_loss = evaluate_model(model, loss, val_iter)
+    if val_loss < min_val_loss:
+        min_val_loss = val_loss
+        torch.save(model.state_dict(), save_path)
+    print(
+        "epoch",
+        epoch,
+        ", train loss:",
+        l_sum / n,
+        ", validation loss:",
+        val_loss,
+    )
+best_model = STGCN_WAVE(
+    blocks, n_his, n_route, G, drop_prob, num_layers, device, args.control_str
+).to(device)
+best_model.load_state_dict(torch.load(save_path, weights_only=False))
+
+
+l = evaluate_model(best_model, loss, test_iter)
+MAE, MAPE, RMSE = evaluate_metric(best_model, test_iter, scaler)
+print("test loss:", l, "\nMAE:", MAE, ", MAPE:", MAPE, ", RMSE:", RMSE)
--- a/python/dgl/distributed/partition.py
+++ b/python/dgl/distributed/partition.py
@@ -371,7 +371,7 @@ def load_partition(part_config, part_id, load_feats=True, use_graphbolt=False):
        os.path.getsize(partition_path),
    )
    graph = (
-        torch.load(partition_path)
+        torch.load(partition_path, weights_only=False)
        if use_graphbolt
        else load_graphs(partition_path)[0][0]
    )
--- a/python/dgl/graphbolt/impl/ondisk_dataset.py
+++ b/python/dgl/graphbolt/impl/ondisk_dataset.py
@@ -852,7 +852,7 @@ class OnDiskDataset(Dataset):
        if graph_topology is None:
            return None
        if graph_topology.type == "FusedCSCSamplingGraph":
-            return torch.load(graph_topology.path)
+            return torch.load(graph_topology.path, weights_only=False)
        raise NotImplementedError(
            f"Graph topology type {graph_topology.type} is not supported."
        )
--- a/python/dgl/graphbolt/impl/torch_based_feature_store.py
+++ b/python/dgl/graphbolt/impl/torch_based_feature_store.py
@@ -615,7 +615,7 @@ class TorchBasedFeatureStore(BasicFeatureStore):
                    f"but the feature {key} is loaded on disk."
                )
                features[key] = TorchBasedFeature(
-                    torch.load(spec.path), metadata=metadata
+                    torch.load(spec.path, weights_only=False), metadata=metadata
                )
            elif spec.format == "numpy":
                if spec.in_memory:
--- a/python/dgl/graphbolt/internal/utils.py
+++ b/python/dgl/graphbolt/internal/utils.py
@@ -28,7 +28,7 @@ def numpy_save_aligned(*args, **kwargs):


 def _read_torch_data(path):
-    return torch.load(path)
+    return torch.load(path, weights_only=False)


 def _read_numpy_data(path, in_memory=True):
--- a/tests/python/pytorch/graphbolt/impl/test_fused_csc_sampling_graph.py
+++ b/tests/python/pytorch/graphbolt/impl/test_fused_csc_sampling_graph.py
@@ -374,7 +374,7 @@ def test_load_save_homo_graph(
    with tempfile.TemporaryDirectory() as test_dir:
        filename = os.path.join(test_dir, "fused_csc_sampling_graph.pt")
        torch.save(graph, filename)
-        graph2 = torch.load(filename)
+        graph2 = torch.load(filename, weights_only=False)

    assert graph.total_num_nodes == graph2.total_num_nodes
    assert graph.total_num_edges == graph2.total_num_edges
@@ -459,7 +459,7 @@ def test_load_save_hetero_graph(
    with tempfile.TemporaryDirectory() as test_dir:
        filename = os.path.join(test_dir, "fused_csc_sampling_graph.pt")
        torch.save(graph, filename)
-        graph2 = torch.load(filename)
+        graph2 = torch.load(filename, weights_only=False)

    assert graph.total_num_nodes == graph2.total_num_nodes
    assert graph.total_num_edges == graph2.total_num_edges
--- a/tests/python/pytorch/graphbolt/impl/test_ondisk_dataset.py
+++ b/tests/python/pytorch/graphbolt/impl/test_ondisk_dataset.py
@@ -38,6 +38,13 @@ def write_yaml_and_load_dataset(yaml_content, dir, force_preprocess=False):
    )


+def load_sampling_graph(test_dir, processed_dataset):
+    return torch.load(
+        os.path.join(test_dir, processed_dataset["graph_topology"]["path"]),
+        weights_only=False,
+    )
+
+
 def test_OnDiskDataset_TVTSet_exceptions():
    """Test excpetions thrown when parsing TVTSet."""
    with tempfile.TemporaryDirectory() as test_dir:
@@ -1167,8 +1174,8 @@ def test_OnDiskDataset_preprocess_homogeneous(edge_fmt):
        assert "graph" not in processed_dataset
        assert "graph_topology" in processed_dataset

-        fused_csc_sampling_graph = torch.load(
-            os.path.join(test_dir, processed_dataset["graph_topology"]["path"])
+        fused_csc_sampling_graph = load_sampling_graph(
+            test_dir, processed_dataset
        )
        assert fused_csc_sampling_graph.total_num_nodes == num_nodes
        assert fused_csc_sampling_graph.total_num_edges == num_edges
@@ -1220,8 +1227,8 @@ def test_OnDiskDataset_preprocess_homogeneous(edge_fmt):
        )
        with open(output_file, "rb") as f:
            processed_dataset = yaml.load(f, Loader=yaml.Loader)
-        fused_csc_sampling_graph = torch.load(
-            os.path.join(test_dir, processed_dataset["graph_topology"]["path"])
+        fused_csc_sampling_graph = load_sampling_graph(
+            test_dir, processed_dataset
        )
        assert (
            fused_csc_sampling_graph.edge_attributes is not None
@@ -1365,8 +1372,8 @@ def test_OnDiskDataset_preprocess_homogeneous_hardcode(
        assert "graph" not in processed_dataset
        assert "graph_topology" in processed_dataset

-        fused_csc_sampling_graph = torch.load(
-            os.path.join(test_dir, processed_dataset["graph_topology"]["path"])
+        fused_csc_sampling_graph = load_sampling_graph(
+            test_dir, processed_dataset
        )
        assert fused_csc_sampling_graph.total_num_nodes == num_nodes
        assert fused_csc_sampling_graph.total_num_edges == num_edges
@@ -1575,8 +1582,8 @@ def test_OnDiskDataset_preprocess_heterogeneous_hardcode(
        assert "graph" not in processed_dataset
        assert "graph_topology" in processed_dataset

-        fused_csc_sampling_graph = torch.load(
-            os.path.join(test_dir, processed_dataset["graph_topology"]["path"])
+        fused_csc_sampling_graph = load_sampling_graph(
+            test_dir, processed_dataset
        )
        assert fused_csc_sampling_graph.total_num_nodes == 5
        assert fused_csc_sampling_graph.total_num_edges == 10
--- a/tests/python/pytorch/graphbolt/internal/test_utils.py
+++ b/tests/python/pytorch/graphbolt/internal/test_utils.py
@@ -77,7 +77,7 @@ def test_save_data(data_fmt, save_fmt, contiguous):

        # Step2. Load the data.
        if save_fmt == "torch":
-            loaded_data = torch.load(save_file_name)
+            loaded_data = torch.load(save_file_name, weights_only=False)
            assert loaded_data.is_contiguous()
            assert torch.equal(tensor_data, loaded_data)
        elif save_fmt == "numpy":