Skip to content

Commit

Permalink
lint fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
jainapurva committed Feb 20, 2025
1 parent 91c9be1 commit b5add28
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 52 deletions.
23 changes: 15 additions & 8 deletions benchmarks/microbenchmarks/bench_inference_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,14 @@

import argparse
from copy import deepcopy
from typing import List

import torch
from utils import (
benchmark_model_inference_in_microseconds,
benchmark_model_op_with_profiler_in_microseconds,
clean_caches,
create_model_and_input,
get_default_device,
quantize_model,
clean_caches,
)


Expand All @@ -29,11 +27,16 @@ def run(
# TODO: Add more model types here
clean_caches()
base_model, input_data = create_model_and_input(
model_type, m, k, n,
model_type,
m,
k,
n,
dtype=precision,
device=device,
)
print(f"Starting benchmarking for model: {base_model.__class__.__name__} for quantization: {quantization}")
print(
f"Starting benchmarking for model: {base_model.__class__.__name__} for quantization: {quantization}"
)
# Use quantize_ to apply each quantization function to the model
m_copy = deepcopy(base_model).eval().to(device)
m_copy = quantize_model(m_copy, quantization)
Expand All @@ -45,8 +48,12 @@ def run(

# Run benchmarks
# 1. Benchmark time to run an inference call for quantized model
model_time = benchmark_model_inference_in_microseconds(model=m_copy, input_data=input_data)
print(f"Time to run a {base_model.__class__.__name__}: {model_time * 1e6:.2f} microseconds quantized with {quantization}")
model_time = benchmark_model_inference_in_microseconds(
model=m_copy, input_data=input_data
)
print(
f"Time to run a {base_model.__class__.__name__}: {model_time * 1e6:.2f} microseconds quantized with {quantization}"
)

# 2. Benchmark time using profiler
# Profile dtype model evaluation
Expand Down Expand Up @@ -104,7 +111,7 @@ def run(
parser.add_argument(
"--compile",
type=str,
nargs='?',
nargs="?",
const="default",
default=None,
help="Whether to compile the model and optionally specify compile mode (default: max-autotune)",
Expand Down
73 changes: 42 additions & 31 deletions benchmarks/microbenchmarks/config_parser.py
Original file line number Diff line number Diff line change
@@ -1,82 +1,93 @@
import yaml
import torch
from typing import Dict, List, Any, Tuple
from pathlib import Path
from itertools import product
from utils import get_name_to_shapes_iter # Import the shape utility
from typing import Any, Dict, List, Tuple

import torch
import yaml


class BenchmarkConfig:
def __init__(self, quantization: str, params: Dict[str, Any], shape_name: str, shape: List[int]):
def __init__(
self,
quantization: str,
params: Dict[str, Any],
shape_name: str,
shape: List[int],
):
self.quantization = quantization
self.m, self.k, self.n = shape
self.shape_name = shape_name
self.precision = self._parse_precision(params['precision'])
self.compile = params.get('compile', False)
self.device = params.get('device', 'cuda')
self.model_type = params.get('model_type', 'linear')
self.name = f'benchmark_{self.quantization}_{self.shape_name}_m{self.m}_k{self.k}_n{self.n}'
self.precision = self._parse_precision(params["precision"])
self.compile = params.get("compile", False)
self.device = params.get("device", "cuda")
self.model_type = params.get("model_type", "linear")
self.name = f"benchmark_{self.quantization}_{self.shape_name}_m{self.m}_k{self.k}_n{self.n}"

@staticmethod
def _parse_precision(precision_str: str) -> torch.dtype:
"""Convert string precision to torch dtype"""
return getattr(torch, precision_str.split('.')[-1])
return getattr(torch, precision_str.split(".")[-1])

def to_dict(self) -> Dict[str, Any]:
"""Convert config to dictionary for main function"""
return {
'quantization': self.quantization,
'm': self.m,
'k': self.k,
'n': self.n,
'precision': self.precision,
'compile': self.compile,
'device': self.device,
'model_type': self.model_type,
"quantization": self.quantization,
"m": self.m,
"k": self.k,
"n": self.n,
"precision": self.precision,
"compile": self.compile,
"device": self.device,
"model_type": self.model_type,
}


def get_shapes_for_config(shape_config: Dict[str, Any]) -> List[Tuple[str, List[int]]]:
"""Get shapes for a given configuration"""
name = shape_config['name']
name = shape_config["name"]
if name == "custom":
return [(name, shape) for shape in shape_config['shapes']]
return [(name, shape) for shape in shape_config["shapes"]]
# else:
# return [(name, shape) for shape in get_name_to_shapes_iter(name, None, None, None)]


def load_benchmark_configs(config_path: str) -> List[BenchmarkConfig]:
"""Load benchmark configurations from YAML file"""
with open(config_path, 'r') as f:
with open(config_path, "r") as f:
config_data = yaml.safe_load(f)

quantizations = config_data['quantizations']
params = config_data['model_params']
quantizations = config_data["quantizations"]
params = config_data["model_params"]

configs = []
# Process each shape configuration
for shape_config in params['matrix_shapes']:
for shape_config in params["matrix_shapes"]:
shapes = get_shapes_for_config(shape_config)
# Generate combinations for each shape
for quant, (shape_name, shape) in product(quantizations, shapes):
configs.append(BenchmarkConfig(quant, params, shape_name, shape))

return configs


def run_benchmarks_from_config(config_path: str) -> None:
"""Run benchmarks using configurations from YAML file"""
from bench_inference_quant import run

configs = load_benchmark_configs(config_path)
for config in configs:
print(f"\nRunning benchmark: {config.name}")
run(**config.to_dict())


if __name__ == "__main__":
import argparse

parser = argparse.ArgumentParser(description="Run benchmarks from config file")
parser.add_argument(
"--config",
type=str,
default="configs/benchmark_config.yml",
help="Path to benchmark configuration file"
help="Path to benchmark configuration file",
)
args = parser.parse_args()
run_benchmarks_from_config(args.config)
run_benchmarks_from_config(args.config)
27 changes: 14 additions & 13 deletions benchmarks/microbenchmarks/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import time
from typing import List, Optional
from typing import Optional

import torch
from torch.profiler import ProfilerActivity, profile
Expand Down Expand Up @@ -245,13 +245,13 @@ def benchmark_model_inference_in_microseconds(model, input_data):
# Returns model run time in seconds
if torch.cuda.is_available():
torch.cuda.synchronize()

# warm up
for _ in range(2):
model(input_data)
if torch.cuda.is_available():
torch.cuda.synchronize()

num_iters = 5
start_time = time.perf_counter()
with torch.no_grad():
Expand All @@ -266,15 +266,15 @@ def benchmark_model_inference_in_microseconds(model, input_data):

def benchmark_model_op_with_profiler_in_microseconds(model, input_data, op_name: str):
"""Benchmarks model inference using PyTorch profiler to measure GPU kernel execution times.
This function profiles the model execution and measures the time spent in specific GPU operations
versus overhead time. It performs warmup runs before profiling to ensure accurate measurements.
Args:
model (torch.nn.Module): PyTorch model to benchmark
input_data (torch.Tensor): Input tensor to run through the model
op_name (str): Name of the GPU operation to measure time for
Returns:
tuple[float, float]: A tuple containing:
- gpu_op_time (float): Time spent in the specified GPU operation in microseconds
Expand All @@ -283,11 +283,10 @@ def benchmark_model_op_with_profiler_in_microseconds(model, input_data, op_name:
# Warm up
for _ in range(2):
model(input_data)

# Profile model execution
with profile(
activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
record_shapes=True
activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True
) as prof:
with torch.no_grad():
_ = model(input_data)
Expand All @@ -307,7 +306,7 @@ def benchmark_model_op_with_profiler_in_microseconds(model, input_data, op_name:
gpu_op_time += event[1]
else:
gpu_overhead_time += event[1]

return gpu_op_time, gpu_overhead_time


Expand Down Expand Up @@ -339,7 +338,6 @@ def create_model_and_input(
return model, input_data



@torch.no_grad()
def benchmark_op_with_cuda_graph(op, *args, **kwargs):
"""
Expand Down Expand Up @@ -390,16 +388,19 @@ def _is_interpolate_mode(mode):
return True
return False


def clean_caches():
import gc

# Clear everything before starting
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()
gc.collect()

if compile:
torch._dynamo.reset()


def get_name_to_shapes_iter(
shape_gen_name: str,
M: Optional[int],
Expand Down Expand Up @@ -462,4 +463,4 @@ def get_name_to_shapes_iter(
}
return name_to_shapes.items()

raise AssertionError(f"unknown shape_gen_name {shape_gen_name}")
raise AssertionError(f"unknown shape_gen_name {shape_gen_name}")

0 comments on commit b5add28

Please sign in to comment.