[GraphBolt][Dataset] Merging part of #7708, igb-het small datasets by Bowen Yao's (#7788)

Co-authored-by: Bowen Yao <112051015+BowenYao18@users.noreply.github.com>
Co-authored-by: BowenYao18 <by18@rice.edu>
This commit is contained in:
Muhammed Fatih BALIN
2024-09-07 15:43:19 -04:00
committed by GitHub
parent 8772c02db7
commit bbe00c04ba
2 changed files with 28 additions and 7 deletions

View File

@@ -58,12 +58,12 @@ def create_dataloader(
datapipe = datapipe.copy_to(device=device)
need_copy = False
node_feature_keys = {"paper": ["feat"], "author": ["feat"]}
if args.dataset == "ogb-lsc-mag240m":
node_feature_keys = {
"paper": ["feat"],
"author": ["feat"],
"institution": ["feat"],
}
node_feature_keys["institution"] = ["feat"]
if "igb-het" in args.dataset:
node_feature_keys["institute"] = ["feat"]
node_feature_keys["fos"] = ["feat"]
# Fetch node features for the sampled subgraph.
datapipe = datapipe.fetch_feature(features, node_feature_keys)
@@ -335,8 +335,13 @@ def parse_args():
"--dataset",
type=str,
default="ogb-lsc-mag240m",
choices=["ogb-lsc-mag240m"],
help="Dataset name. Possible values: ogb-lsc-mag240m",
choices=[
"ogb-lsc-mag240m",
"igb-het-tiny",
"igb-het-small",
"igb-het-medium",
],
help="Dataset name. Possible values: ogb-lsc-mag240m, igb-het-[tiny|small|medium].",
)
parser.add_argument(
"--fanout",

View File

@@ -990,6 +990,16 @@ class BuiltinDataset(OnDiskDataset):
Self edges are added to the original graph.
Node features are stored as float32.
**igb-het-[tiny|small|medium]**
The igb-hom-[tiny|small|medium] dataset is a heterogeneous citation network,
which is designed for developers to train and evaluate GNN models with
high fidelity. See more details in `igb-het-[tiny|small|medium]
<https://github.com/IllinoisGraphBenchmark/IGB-Datasets>`_.
.. note::
Four Reverse edge types are added to the original graph.
Node features are stored as float32.
Parameters
----------
name : str
@@ -1019,6 +1029,10 @@ class BuiltinDataset(OnDiskDataset):
"igb-hom-tiny-seeds",
"igb-hom-small",
"igb-hom-small-seeds",
"igb-het-tiny",
"igb-het-tiny-seeds",
"igb-het-small",
"igb-het-small-seeds",
]
_large_datasets = [
"ogb-lsc-mag240m",
@@ -1031,6 +1045,8 @@ class BuiltinDataset(OnDiskDataset):
"igb-hom-large-seeds",
"igb-hom",
"igb-hom-seeds",
"igb-het-medium",
"igb-het-medium-seeds",
]
_all_datasets = _datasets + _large_datasets