Skip to content

Commit 85f79ff

Browse files
committed
Disable memory visualization
1 parent 365b648 commit 85f79ff

File tree

7 files changed

+51
-48
lines changed

7 files changed

+51
-48
lines changed

benchmarks/dashboard/microbenchmark_quantization_config.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,3 @@ model_params:
1717
torch_compile_mode: "max-autotune"
1818
device: "cuda"
1919
model_type: "linear"
20-
enable_memory_profiler: false

benchmarks/microbenchmarks/README.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,17 +52,18 @@ model_params:
5252
device: "cuda" # Options: "cuda", "mps", "xpu", "cpu"
5353
model_type: "linear" # Options: "linear", "ln_linear_sigmoid"
5454
enable_profiler: true # Enable standard profiling
55-
enable_memory_profiler: true # Enable CUDA memory profiling
55+
# enable_memory_visualizer: true # Enable HTML memory visualization (slow)
5656
```
5757

5858
## Configuration Options
5959

6060
### Profiling Options
6161
- `enable_profiler`: Enable standard PyTorch profiling (default: false)
62-
- `enable_memory_profiler`: Enable CUDA memory profiling (default: false)
62+
- `enable_memory_visualizer`: Enable HTML memory visualization (default: false)
63+
- Memory profiling (pickle snapshots + peak stats) ALWAYS runs automatically
6364
- Only works when device is set to "cuda"
64-
- Generates memory snapshots before and after inference
65-
- Creates visualizations of memory usage
65+
- Generates HTML visualizations from memory snapshots (can be slow for large models)
66+
- Set to `true` only when debugging memory issues
6667
- Outputs are saved in the memory_profiler subdirectory
6768

6869
### Quantization Methods

benchmarks/microbenchmarks/benchmark_inference.py

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ def run(config: BenchmarkConfig) -> BenchmarkResult:
213213
ndigits=2,
214214
)
215215

216-
# Run profiler if enabled
216+
# Run performance profiler if enabled
217217
if config.enable_profiler:
218218
print("Running profiler...")
219219
try:
@@ -230,42 +230,42 @@ def run(config: BenchmarkConfig) -> BenchmarkResult:
230230
except Exception as e:
231231
print(f"Error running profiler: {e}")
232232

233-
# Run memory profiler if enabled
234-
if config.enable_memory_profiler:
235-
print("Running memory profiler...")
236-
try:
237-
# Create memory profiler directory if it doesn't exist
238-
memory_profiler_dir = os.path.join(
239-
config.output_dir, "memory_profiler/pickle"
240-
)
241-
os.makedirs(memory_profiler_dir, exist_ok=True)
242-
243-
# Save memory profile with .pickle extension
244-
result.memory_profile_path, result.memory_stats = (
245-
generate_memory_profile(
246-
model=m_copy,
247-
input_data=input_data,
248-
profile_file_path=os.path.join(
249-
memory_profiler_dir,
250-
f"{config._file_name}_memory_profile.pickle",
251-
),
252-
)
253-
)
233+
# Always run memory profiler to get peak stats and save pickle snapshot (fast)
234+
print("Running memory profiler...")
235+
try:
236+
# Create memory profiler directory if it doesn't exist
237+
memory_profiler_dir = os.path.join(
238+
config.output_dir, "memory_profiler/pickle"
239+
)
240+
os.makedirs(memory_profiler_dir, exist_ok=True)
241+
242+
# Save memory profile with .pickle extension
243+
result.memory_profile_path, result.memory_stats = generate_memory_profile(
244+
model=m_copy,
245+
input_data=input_data,
246+
profile_file_path=os.path.join(
247+
memory_profiler_dir,
248+
f"{config._file_name}_memory_profile.pickle",
249+
),
250+
)
254251

252+
# Generate HTML visualization ONLY if explicitly enabled (slow: minutes to hours)
253+
if config.enable_memory_visualizer:
254+
print("Generating HTML visualization (this may take a while)...")
255255
if result.memory_profile_path:
256256
result.memory_visualization_path = visualize_memory_profile(
257257
result.memory_profile_path
258258
)
259-
except ValueError as e:
260-
if "not enough values to unpack" in str(e):
261-
print(
262-
"Failed due to existing bugs, re‑run the code to generate memory profile. Please raise an issue if it persists."
263-
)
264-
except Exception as e:
265-
print(f"Error running memory profiler: {e}")
266-
import traceback
259+
except ValueError as e:
260+
if "not enough values to unpack" in str(e):
261+
print(
262+
"Failed due to existing bugs, re‑run the code to generate memory profile. Please raise an issue if it persists."
263+
)
264+
except Exception as e:
265+
print(f"Error running memory profiler: {e}")
266+
import traceback
267267

268-
traceback.print_exc()
268+
traceback.print_exc()
269269

270270
return result
271271
except Exception as e:

benchmarks/microbenchmarks/profiler.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,19 +73,23 @@ def generate_model_profile(model, input_data, profile_file_path):
7373

7474

7575
def generate_memory_profile(model, input_data, profile_file_path):
76-
"""Function to generate CUDA memory profile.
76+
"""Generate CUDA memory profile with snapshot and peak statistics.
77+
78+
This function generates a memory snapshot pickle file and collects peak
79+
memory statistics. HTML visualization is done separately via visualize_memory_profile().
7780
7881
Args:
7982
model: The model to profile
8083
input_data: Input data for the model
8184
profile_file_path: Path to save the memory profile (.pickle)
8285
8386
Returns:
84-
str: Path to the saved profile file.
87+
tuple: (profile_file_path, memory_stats) where memory_stats contains
88+
peak memory usage in MB
8589
"""
8690
if not torch.cuda.is_available():
8791
print("Warning: CUDA is not available. Memory profiling requires CUDA.")
88-
return None
92+
return None, {}
8993
if model is None or input_data is None:
9094
raise ValueError("Model and input_data must not be None.")
9195

benchmarks/microbenchmarks/test/benchmark_config.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ model_params:
1717
device: "cuda"
1818
model_type: "linear"
1919
enable_profiler: true # Enable profiling for this model
20-
enable_memory_profiler: true # Enable memory profiling for this model
20+
enable_memory_visualizer: true # Enable memory visualization for this model
2121

2222
- name: "ln_linear_sigmoid_cuda"
2323
matrix_shapes:
@@ -30,7 +30,7 @@ model_params:
3030
device: "cuda"
3131
model_type: "ln_linear_sigmoid"
3232
enable_profiler: true
33-
enable_memory_profiler: true
33+
enable_memory_visualizer: true
3434

3535
- name: "bf16_transformer_block"
3636
matrix_shapes:
@@ -43,7 +43,7 @@ model_params:
4343
device: "cuda"
4444
model_type: "transformer_block" # TODO: Add a custom model (Figure out how to do this, maybe pass a .py file with model definition)
4545
enable_profiler: true
46-
enable_memory_profiler: true
46+
enable_memory_visualizer: true
4747

4848
- name: "large_bf16_ln_linear"
4949
matrix_shapes:
@@ -59,4 +59,4 @@ model_params:
5959
device: "cuda"
6060
model_type: "linear"
6161
enable_profiler: true
62-
enable_memory_profiler: true
62+
enable_memory_visualizer: true

benchmarks/microbenchmarks/test/test_benchmark_profiler.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,6 @@ def test_memory_profiler_enabled(self):
162162
quantization=None,
163163
sparsity=None,
164164
params={
165-
"enable_memory_profiler": True,
166165
"device": "cuda",
167166
},
168167
shape_name="test",
@@ -201,7 +200,6 @@ def test_memory_profiler_visualization(self):
201200
quantization=None,
202201
sparsity=None,
203202
params={
204-
"enable_memory_profiler": True,
205203
"device": "cuda",
206204
},
207205
shape_name="test",
@@ -255,7 +253,6 @@ def test_memory_profiler_cuda_unavailable(self):
255253
quantization=None,
256254
sparsity=None,
257255
params={
258-
"enable_memory_profiler": True,
259256
"device": "cpu", # Force CPU to test CUDA unavailable case
260257
},
261258
shape_name="test",

benchmarks/microbenchmarks/utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,9 @@ def __init__(
8282
f"benchmark_{self.quantization}_{self.model_type}_m{self.m}_k{self.k}_n{self.n}{'_compile'}",
8383
)
8484
self.enable_profiler = bool(params.get("enable_profiler", False))
85-
self.enable_memory_profiler = bool(params.get("enable_memory_profiler", False))
85+
self.enable_memory_visualizer = bool(
86+
params.get("enable_memory_visualizer", False)
87+
)
8688
# Create profiler directory path without leading slash
8789
profiler_dir = os.path.join(self.output_dir, "profiler")
8890
os.makedirs(profiler_dir, exist_ok=True)
@@ -108,7 +110,7 @@ def to_dict(self) -> Dict[str, Any]:
108110
"model_type": self.model_type,
109111
"output_dir": self.output_dir,
110112
"enable_profiler": self.enable_profiler,
111-
"enable_memory_profiler": self.enable_memory_profiler,
113+
"enable_memory_visualizer": self.enable_memory_visualizer,
112114
}
113115

114116

0 commit comments

Comments
 (0)