Skip to content

Commit

Permalink
Update GNN reference implementation: add DGL backend (#1903)
Browse files Browse the repository at this point in the history
* Update GNN reference implementation: add DGL backend

* [Automated Commit] Format Codebase

* Update README.md
  • Loading branch information
pgmpablo157321 authored Nov 6, 2024
1 parent b0686db commit d3c01ed
Show file tree
Hide file tree
Showing 22 changed files with 1,618 additions and 45 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# MLPerf™ Inference Benchmarks for Text to Image

This is the reference implementation for MLPerf Inference text to image
This is the reference implementation for MLPerf Inference text to image. Two implementations are currently supported, Graphlearn for Pytorch (GLT) and Deep Graph Library (DGL), both using pytorch as the backbone of the model.

## Supported Models

Expand Down Expand Up @@ -47,14 +47,21 @@ Install loadgen:
cd $LOADGEN_FOLDER
CFLAGS="-std=c++14" python setup.py install
```
### Install graphlearn for pytorch

Install pytorch geometric:
### Install pytorch geometric

```bash
export TORCH_VERSION=$(python -c "import torch; print(torch.__version__)")
pip install torch-geometric torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-${TORCH_VERSION}.html
```

### Install DGL
```bash
pip install dgl -f https://data.dgl.ai/wheels/torch-2.1/cu121/repo.html
```

### Install graphlearn for pytorch (Only for GLT implementation)

Follow instalation instructions at: https://github.com/alibaba/graphlearn-for-pytorch.git

### Download model
Expand All @@ -80,7 +87,7 @@ cd $GRAPH_FOLDER
python3 tools/split_seeds.py --path igbh --dataset_size tiny
```

**Compress graph (optional)**
**Compress graph (optional, only for GLT implementation)**
```bash
cd $GRAPH_FOLDER
python3 tools/compress_graph.py --path igbh --dataset_size tiny --layout <CSC or CSR>
Expand All @@ -99,7 +106,7 @@ cd $GRAPH_FOLDER
python3 tools/split_seeds.py --path igbh --dataset_size full
```

**Compress graph (optional)**
**Compress graph (optional, only for GLT implementation)**
```bash
cd $GRAPH_FOLDER
python3 tools/compress_graph.py --path igbh --dataset_size tiny --layout <CSC or CSR>
Expand All @@ -114,16 +121,22 @@ TODO
```bash
# Go to the benchmark folder
cd $GRAPH_FOLDER
# Run the benchmark
python3 main.py --dataset igbh-tiny --dataset-path igbh/ --profile debug [--model-path <path_to_ckpt>] [--in-memory] [--device <cpu or gpu>] [--dtype <fp16 or fp32>] [--scenario <SingleStream, MultiStream, Server or Offline>] [--layout <COO, CSC or CSR>]
# Run the benchmark GLT
python3 main.py --dataset igbh-glt-tiny --dataset-path igbh/ --profile debug-glt [--model-path <path_to_ckpt>] [--in-memory] [--device <cpu or gpu>] [--dtype <fp16 or fp32>] [--scenario <SingleStream, MultiStream, Server or Offline>] [--layout <COO, CSC or CSR>]

# Run the benchmark DGL
python3 main.py --dataset igbh-dgl-tiny --dataset-path igbh/ --profile debug-dgl [--model-path <path_to_ckpt>] [--in-memory] [--device <cpu or gpu>] [--dtype <fp16 or fp32>] [--scenario <SingleStream, MultiStream, Server or Offline>]
```

#### Local run
```bash
# Go to the benchmark folder
cd $GRAPH_FOLDER
# Run the benchmark
python3 main.py --dataset igbh --dataset-path igbh/ [--model-path <path_to_ckpt>] [--in-memory] [--device <cpu or gpu>] [--dtype <fp16 or fp32>] [--scenario <SingleStream, MultiStream, Server or Offline>] [--layout <COO, CSC or CSR>]
# Run the benchmark GLT
python3 main.py --dataset igbh-glt --dataset-path igbh/ --profile rgat-glt-full [--model-path <path_to_ckpt>] [--in-memory] [--device <cpu or gpu>] [--dtype <fp16 or fp32>] [--scenario <SingleStream, MultiStream, Server or Offline>] [--layout <COO, CSC or CSR>]

# Run the benchmark DGL
python3 main.py --dataset igbh-dgl --dataset-path igbh/ --profile rgat-dgl-full [--model-path <path_to_ckpt>] [--in-memory] [--device <cpu or gpu>] [--dtype <fp16 or fp32>] [--scenario <SingleStream, MultiStream, Server or Offline>]
```
#### Run using docker

Expand Down
File renamed without changes.
96 changes: 96 additions & 0 deletions graph/R-GAT/backend_dgl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@

from typing import Optional, List, Union, Any
from dgl_utilities.feature_fetching import IGBHeteroGraphStructure, Features, IGBH
from dgl_utilities.components import build_graph, get_loader, RGAT
from dgl_utilities.pyg_sampler import PyGSampler
import os
import torch
import logging
import backend
from typing import Literal

logging.basicConfig(level=logging.INFO)
log = logging.getLogger("backend-dgl")


class BackendDGL(backend.Backend):
def __init__(
self,
model_type="rgat",
type: Literal["fp16", "fp32"] = "fp16",
device: Literal["cpu", "gpu"] = "gpu",
ckpt_path: str = None,
igbh: IGBH = None,
batch_size: int = 1,
layout: Literal["CSC", "CSR", "COO"] = "COO",
edge_dir: str = "in",
):
super(BackendDGL, self).__init__()
# Set device and type
if device == "gpu":
self.device = torch.device("cuda")
else:
self.device = torch.device("cpu")

if type == "fp32":
self.type = torch.float32
else:
self.type = torch.float16
# Create Node and neighbor loader
self.fan_out = [5, 10, 15]
self.igbh_graph_structure = igbh.igbh_dataset
self.feature_store = Features(
self.igbh_graph_structure.dir,
self.igbh_graph_structure.dataset_size,
self.igbh_graph_structure.in_memory,
use_fp16=self.igbh_graph_structure.use_fp16,
)
self.feature_store.build_features(use_journal_conference=True)
self.graph = build_graph(
self.igbh_graph_structure,
"dgl",
features=self.feature_store)
self.neighbor_loader = PyGSampler([5, 10, 15])
# Load model Architechture
self.model = RGAT(
backend="dgl",
device=device,
graph=self.graph,
in_feats=1024,
h_feats=512,
num_classes=2983,
num_layers=len(self.fan_out),
n_heads=4
).to(self.type).to(self.device)
self.model.eval()
# Load model checkpoint
ckpt = None
if ckpt_path is not None:
try:
ckpt = torch.load(ckpt_path, map_location=self.device)
except FileNotFoundError as e:
print(f"Checkpoint file not found: {e}")
return -1
if ckpt is not None:
self.model.load_state_dict(ckpt["model_state_dict"])

def version(self):
return torch.__version__

def name(self):
return "pytorch-SUT"

def image_format(self):
return "NCHW"

def load(self):
return self

def predict(self, inputs: torch.Tensor):
with torch.no_grad():
input_size = inputs.shape[0]
# Get batch
batch = self.neighbor_loader.sample(self.graph, {"paper": inputs})
batch_preds, batch_labels = self.model(
batch, self.device, self.feature_store)
return batch_preds
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import graphlearn_torch as glt

logging.basicConfig(level=logging.INFO)
log = logging.getLogger("backend-pytorch")
log = logging.getLogger("backend-glt")


class CustomNeighborLoader(NodeLoader):
Expand Down Expand Up @@ -114,20 +114,19 @@ def get_neighbors(self, seeds: torch.Tensor):
return result


class BackendPytorch(backend.Backend):
class BackendGLT(backend.Backend):
def __init__(
self,
model_type="rgat",
type: Literal["fp16", "fp32"] = "fp16",
device: Literal["cpu", "gpu"] = "gpu",
ckpt_path: str = None,
igbh_dataset: IGBHeteroDataset = None,
igbh: IGBH = None,
batch_size: int = 1,
layout: Literal["CSC", "CSR", "COO"] = "COO",
edge_dir: str = "in",
):
super(BackendPytorch, self).__init__()
self.i = 0
super(BackendGLT, self).__init__()
# Set device and type
if device == "gpu":
self.device = torch.device("cuda")
Expand All @@ -140,6 +139,7 @@ def __init__(
self.type = torch.float16
# Create Node and neighbor loade
self.glt_dataset = glt.data.Dataset(edge_dir=edge_dir)
igbh_dataset = igbh.igbh_dataset
self.glt_dataset.init_node_features(
node_feature_data=igbh_dataset.feat_dict,
with_gpu=(device == "gpu"),
Expand Down
File renamed without changes.
Loading

0 comments on commit d3c01ed

Please sign in to comment.