diff --git a/examples/graphbolt/pyg/hetero/node_classification.py b/examples/graphbolt/pyg/hetero/node_classification.py index 032b84d82c..4166805c3f 100644 --- a/examples/graphbolt/pyg/hetero/node_classification.py +++ b/examples/graphbolt/pyg/hetero/node_classification.py @@ -58,12 +58,12 @@ def create_dataloader( datapipe = datapipe.copy_to(device=device) need_copy = False + node_feature_keys = {"paper": ["feat"], "author": ["feat"]} if args.dataset == "ogb-lsc-mag240m": - node_feature_keys = { - "paper": ["feat"], - "author": ["feat"], - "institution": ["feat"], - } + node_feature_keys["institution"] = ["feat"] + if "igb-het" in args.dataset: + node_feature_keys["institute"] = ["feat"] + node_feature_keys["fos"] = ["feat"] # Fetch node features for the sampled subgraph. datapipe = datapipe.fetch_feature(features, node_feature_keys) @@ -335,8 +335,13 @@ def parse_args(): "--dataset", type=str, default="ogb-lsc-mag240m", - choices=["ogb-lsc-mag240m"], - help="Dataset name. Possible values: ogb-lsc-mag240m", + choices=[ + "ogb-lsc-mag240m", + "igb-het-tiny", + "igb-het-small", + "igb-het-medium", + ], + help="Dataset name. Possible values: ogb-lsc-mag240m, igb-het-[tiny|small|medium].", ) parser.add_argument( "--fanout", diff --git a/python/dgl/graphbolt/impl/ondisk_dataset.py b/python/dgl/graphbolt/impl/ondisk_dataset.py index 6783bb296a..303a423a85 100644 --- a/python/dgl/graphbolt/impl/ondisk_dataset.py +++ b/python/dgl/graphbolt/impl/ondisk_dataset.py @@ -990,6 +990,16 @@ class BuiltinDataset(OnDiskDataset): Self edges are added to the original graph. Node features are stored as float32. + **igb-het-[tiny|small|medium]** + The igb-hom-[tiny|small|medium] dataset is a heterogeneous citation network, + which is designed for developers to train and evaluate GNN models with + high fidelity. See more details in `igb-het-[tiny|small|medium] + `_. + + .. note:: + Four Reverse edge types are added to the original graph. + Node features are stored as float32. + Parameters ---------- name : str @@ -1019,6 +1029,10 @@ class BuiltinDataset(OnDiskDataset): "igb-hom-tiny-seeds", "igb-hom-small", "igb-hom-small-seeds", + "igb-het-tiny", + "igb-het-tiny-seeds", + "igb-het-small", + "igb-het-small-seeds", ] _large_datasets = [ "ogb-lsc-mag240m", @@ -1031,6 +1045,8 @@ class BuiltinDataset(OnDiskDataset): "igb-hom-large-seeds", "igb-hom", "igb-hom-seeds", + "igb-het-medium", + "igb-het-medium-seeds", ] _all_datasets = _datasets + _large_datasets