Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/run_microbenchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ on:
jobs:
benchmark:
runs-on: linux.aws.h100
timeout-minutes: 480 # 8 hours (increased from default 6 hours)
strategy:
matrix:
torch-spec:
Expand All @@ -22,7 +23,7 @@ jobs:
- name: Setup miniconda
uses: pytorch/test-infra/.github/actions/setup-miniconda@main
with:
python-version: "3.9"
python-version: "3.10"

- name: Run benchmark
shell: bash
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,3 @@ model_params:
torch_compile_mode: "max-autotune"
device: "cuda"
model_type: "linear"
enable_memory_profiler: true
9 changes: 5 additions & 4 deletions benchmarks/microbenchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,18 @@ model_params:
device: "cuda" # Options: "cuda", "mps", "xpu", "cpu"
model_type: "linear" # Options: "linear", "ln_linear_sigmoid"
enable_profiler: true # Enable standard profiling
enable_memory_profiler: true # Enable CUDA memory profiling
# enable_memory_visualizer: true # Enable HTML memory visualization (slow)
```

## Configuration Options

### Profiling Options
- `enable_profiler`: Enable standard PyTorch profiling (default: false)
- `enable_memory_profiler`: Enable CUDA memory profiling (default: false)
- `enable_memory_visualizer`: Enable HTML memory visualization (default: false)
- Memory profiling (pickle snapshots + peak stats) ALWAYS runs automatically
- Only works when device is set to "cuda"
- Generates memory snapshots before and after inference
- Creates visualizations of memory usage
- Generates HTML visualizations from memory snapshots (can be slow for large models)
- Set to `true` only when debugging memory issues
- Outputs are saved in the memory_profiler subdirectory

### Quantization Methods
Expand Down
62 changes: 31 additions & 31 deletions benchmarks/microbenchmarks/benchmark_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ def run(config: BenchmarkConfig) -> BenchmarkResult:
ndigits=2,
)

# Run profiler if enabled
# Run performance profiler if enabled
if config.enable_profiler:
print("Running profiler...")
try:
Expand All @@ -230,42 +230,42 @@ def run(config: BenchmarkConfig) -> BenchmarkResult:
except Exception as e:
print(f"Error running profiler: {e}")

# Run memory profiler if enabled
if config.enable_memory_profiler:
print("Running memory profiler...")
try:
# Create memory profiler directory if it doesn't exist
memory_profiler_dir = os.path.join(
config.output_dir, "memory_profiler/pickle"
)
os.makedirs(memory_profiler_dir, exist_ok=True)

# Save memory profile with .pickle extension
result.memory_profile_path, result.memory_stats = (
generate_memory_profile(
model=m_copy,
input_data=input_data,
profile_file_path=os.path.join(
memory_profiler_dir,
f"{config._file_name}_memory_profile.pickle",
),
)
)
# Always run memory profiler to get peak stats and save pickle snapshot (fast)
print("Running memory profiler...")
try:
# Create memory profiler directory if it doesn't exist
memory_profiler_dir = os.path.join(
config.output_dir, "memory_profiler/pickle"
)
os.makedirs(memory_profiler_dir, exist_ok=True)

# Save memory profile with .pickle extension
result.memory_profile_path, result.memory_stats = generate_memory_profile(
model=m_copy,
input_data=input_data,
profile_file_path=os.path.join(
memory_profiler_dir,
f"{config._file_name}_memory_profile.pickle",
),
)

# Generate HTML visualization ONLY if explicitly enabled (slow: minutes to hours)
if config.enable_memory_visualizer:
print("Generating HTML visualization (this may take a while)...")
if result.memory_profile_path:
result.memory_visualization_path = visualize_memory_profile(
result.memory_profile_path
)
except ValueError as e:
if "not enough values to unpack" in str(e):
print(
"Failed due to existing bugs, re‑run the code to generate memory profile. Please raise an issue if it persists."
)
except Exception as e:
print(f"Error running memory profiler: {e}")
import traceback
except ValueError as e:
if "not enough values to unpack" in str(e):
print(
"Failed due to existing bugs, re‑run the code to generate memory profile. Please raise an issue if it persists."
)
except Exception as e:
print(f"Error running memory profiler: {e}")
import traceback

traceback.print_exc()
traceback.print_exc()

return result
except Exception as e:
Expand Down
12 changes: 8 additions & 4 deletions benchmarks/microbenchmarks/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,19 +73,23 @@ def generate_model_profile(model, input_data, profile_file_path):


def generate_memory_profile(model, input_data, profile_file_path):
"""Function to generate CUDA memory profile.
"""Generate CUDA memory profile with snapshot and peak statistics.

This function generates a memory snapshot pickle file and collects peak
memory statistics. HTML visualization is done separately via visualize_memory_profile().

Args:
model: The model to profile
input_data: Input data for the model
profile_file_path: Path to save the memory profile (.pickle)

Returns:
str: Path to the saved profile file.
tuple: (profile_file_path, memory_stats) where memory_stats contains
peak memory usage in MB
"""
if not torch.cuda.is_available():
print("Warning: CUDA is not available. Memory profiling requires CUDA.")
return None
return None, {}
if model is None or input_data is None:
raise ValueError("Model and input_data must not be None.")

Expand Down Expand Up @@ -120,7 +124,7 @@ def generate_memory_profile(model, input_data, profile_file_path):
torch.cuda.synchronize()

# Take memory snapshot after inference and save to temporary pickle file
torch.cuda.memory._dump_snapshot(profile_file_path)
# torch.cuda.memory._dump_snapshot(profile_file_path)

if _validate_pickle_file(profile_file_path):
print(f"Saved memory profile to {profile_file_path}")
Expand Down
8 changes: 4 additions & 4 deletions benchmarks/microbenchmarks/test/benchmark_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ model_params:
device: "cuda"
model_type: "linear"
enable_profiler: true # Enable profiling for this model
enable_memory_profiler: true # Enable memory profiling for this model
enable_memory_visualizer: true # Enable memory visualization for this model

- name: "ln_linear_sigmoid_cuda"
matrix_shapes:
Expand All @@ -30,7 +30,7 @@ model_params:
device: "cuda"
model_type: "ln_linear_sigmoid"
enable_profiler: true
enable_memory_profiler: true
enable_memory_visualizer: true

- name: "bf16_transformer_block"
matrix_shapes:
Expand All @@ -43,7 +43,7 @@ model_params:
device: "cuda"
model_type: "transformer_block" # TODO: Add a custom model (Figure out how to do this, maybe pass a .py file with model definition)
enable_profiler: true
enable_memory_profiler: true
enable_memory_visualizer: true

- name: "large_bf16_ln_linear"
matrix_shapes:
Expand All @@ -59,4 +59,4 @@ model_params:
device: "cuda"
model_type: "linear"
enable_profiler: true
enable_memory_profiler: true
enable_memory_visualizer: true
3 changes: 0 additions & 3 deletions benchmarks/microbenchmarks/test/test_benchmark_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,6 @@ def test_memory_profiler_enabled(self):
quantization=None,
sparsity=None,
params={
"enable_memory_profiler": True,
"device": "cuda",
},
shape_name="test",
Expand Down Expand Up @@ -201,7 +200,6 @@ def test_memory_profiler_visualization(self):
quantization=None,
sparsity=None,
params={
"enable_memory_profiler": True,
"device": "cuda",
},
shape_name="test",
Expand Down Expand Up @@ -255,7 +253,6 @@ def test_memory_profiler_cuda_unavailable(self):
quantization=None,
sparsity=None,
params={
"enable_memory_profiler": True,
"device": "cpu", # Force CPU to test CUDA unavailable case
},
shape_name="test",
Expand Down
6 changes: 4 additions & 2 deletions benchmarks/microbenchmarks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,9 @@ def __init__(
f"benchmark_{self.quantization}_{self.model_type}_m{self.m}_k{self.k}_n{self.n}{'_compile'}",
)
self.enable_profiler = bool(params.get("enable_profiler", False))
self.enable_memory_profiler = bool(params.get("enable_memory_profiler", False))
self.enable_memory_visualizer = bool(
params.get("enable_memory_visualizer", False)
)
# Create profiler directory path without leading slash
profiler_dir = os.path.join(self.output_dir, "profiler")
os.makedirs(profiler_dir, exist_ok=True)
Expand All @@ -108,7 +110,7 @@ def to_dict(self) -> Dict[str, Any]:
"model_type": self.model_type,
"output_dir": self.output_dir,
"enable_profiler": self.enable_profiler,
"enable_memory_profiler": self.enable_memory_profiler,
"enable_memory_visualizer": self.enable_memory_visualizer,
}


Expand Down
Loading