From 9ab6dbe7cd306956ac31abd0d3558debf61bf6dc Mon Sep 17 00:00:00 2001 From: Vignesh Edithal Date: Fri, 2 Jan 2026 15:17:43 -0500 Subject: [PATCH 01/12] Fix kernel/dispatch fitlering in GUI * Disallow --kernel and --dispatch filtering in analyze --gui mode since GUI frontend offers dropdown menu for kernel and dispatch filtering * Update CHANGELOG and documentation * Gracefully handle N/A values * Ensure workload path is valid before using it in GUI --- projects/rocprofiler-compute/CHANGELOG.md | 2 + .../docs/how-to/analyze/standalone-gui.rst | 10 ++++- .../rocprof_compute_analyze/analysis_base.py | 38 +++++++++++-------- .../rocprof_compute_analyze/analysis_webui.py | 3 +- projects/rocprofiler-compute/src/utils/gui.py | 8 ++-- 5 files changed, 40 insertions(+), 21 deletions(-) diff --git a/projects/rocprofiler-compute/CHANGELOG.md b/projects/rocprofiler-compute/CHANGELOG.md index 5b4c63c36a3..785faaee87a 100644 --- a/projects/rocprofiler-compute/CHANGELOG.md +++ b/projects/rocprofiler-compute/CHANGELOG.md @@ -49,6 +49,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs. * Fix issue where counter collection data was empty when profiling workload which spawn multiple child processes +* Disallow using kernel/dispatch (--kernel or --dispatch options) filtering with --gui analyze mode since filtering is supported via dropdown meny in GUI frontend. + ### Removed * Removed "VL1 Lat" metric for AMD Instinct MI300 series GPUs, due to MI300 series not supporting TCP_TCP_LATENCY_sum counter. diff --git a/projects/rocprofiler-compute/docs/how-to/analyze/standalone-gui.rst b/projects/rocprofiler-compute/docs/how-to/analyze/standalone-gui.rst index e7cedb3fa12..bf0ae48868a 100644 --- a/projects/rocprofiler-compute/docs/how-to/analyze/standalone-gui.rst +++ b/projects/rocprofiler-compute/docs/how-to/analyze/standalone-gui.rst @@ -28,7 +28,15 @@ Launch the standalone GUI analyzer ---------------------------------- To launch the ROCm Compute Profiler GUI analyzer, include the ``--gui`` flag with your -desired analysis command. For example: +desired analysis command. + +.. note:: + + Do not use the ``-k/--kernel`` or ``-d/--dispatch`` command-line filtering + options when launching the standalone GUI. Instead, use the dropdown menus in + the web interface to filter by kernel or dispatch. + +For example: .. code-block:: shell-session diff --git a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py index 7cfe42403da..445a48e93aa 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py @@ -386,25 +386,33 @@ def sanitize(self) -> None: sys.exit(0) # Ensure analysis output does not overwrite existing files - if not args.output_name: - return + if args.output_name: + if not re.match(r"^[A-Za-z0-9_-]+$", args.output_name): + console_error( + "analysis", + "Analysis output file/folder name must " + "contain only alphanumeric characters " + "or underscores (_), hyphens (-).", + ) - if not re.match(r"^[A-Za-z0-9_-]+$", args.output_name): - console_error( - "analysis", - "Analysis output file/folder name must " - "contain only alphanumeric characters " - "or underscores (_), hyphens (-).", - ) + path_to_check = args.output_name + if args.output_format in ("txt", "db"): + path_to_check += f".{args.output_format}" - path_to_check = args.output_name - if args.output_format in ("txt", "db"): - path_to_check += f".{args.output_format}" + if Path(path_to_check).exists(): + console_error( + f"Analysis output file/folder {path_to_check} already exists. " + "Please choose a different name." + ) - if Path(path_to_check).exists(): + # Prevent --dispatch or --kernel with --gui, + # since gui handles it in the frontend + if args.gui and (args.gpu_kernel or args.gpu_dispatch_id): console_error( - f"Analysis output file/folder {path_to_check} already exists. " - "Please choose a different name." + "analysis", + "The --gui option cannot be used with " + "--dispatch (-d) or --kernel (-k) filters. " + "Use the dropdown menus in the GUI frontend to filter by dispatch or kernel.", ) # Check if any kernel's counters are missing due to iteration multiplexing diff --git a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py index 40df31abc45..0dc9ce6d281 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py @@ -51,7 +51,6 @@ def __init__( self.app = dash.Dash( __name__, title=PROJECT_NAME, external_stylesheets=[dbc.themes.CYBORG] ) - self.dest_dir = str(Path(args.path[0][0]).absolute().resolve()) self.arch: Optional[str] = None self.__hidden_sections = ["Memory Chart"] @@ -339,6 +338,8 @@ def pre_processing(self) -> None: args = self.get_args() + self.dest_dir = str(Path(args.path[0][0]).absolute().resolve()) + # create 'mega dataframe' self._runs[self.dest_dir].raw_pmc = file_io.create_df_pmc( self.dest_dir, diff --git a/projects/rocprofiler-compute/src/utils/gui.py b/projects/rocprofiler-compute/src/utils/gui.py index a03ab297b40..503e160e4f1 100644 --- a/projects/rocprofiler-compute/src/utils/gui.py +++ b/projects/rocprofiler-compute/src/utils/gui.py @@ -61,7 +61,7 @@ def multi_bar_chart( def create_instruction_mix_bar_chart(display_df: pd.DataFrame, df_unit: str) -> px.bar: display_df = display_df.copy() - display_df["Avg"] = display_df["Avg"].apply(lambda x: int(x) if x != "" else 0) + display_df["Avg"] = display_df["Avg"].apply(lambda x: int(x) if x != "N/A" else 0.0) return px.bar( display_df, @@ -78,7 +78,7 @@ def create_multi_bar_charts( display_df: pd.DataFrame, table_id: int, df_unit: str ) -> list[px.bar]: display_df = display_df.copy() - display_df["Avg"] = display_df["Avg"].apply(lambda x: int(x) if x != "" else 0) + display_df["Avg"] = display_df["Avg"].apply(lambda x: int(x) if x != "N/A" else 0.0) nested_bar = multi_bar_chart(table_id, display_df) charts = [] @@ -103,7 +103,7 @@ def create_multi_bar_charts( def create_sol_charts(display_df: pd.DataFrame, table_id: int) -> list[px.bar]: display_df = display_df.copy() - display_df["Avg"] = display_df["Avg"].apply(lambda x: float(x) if x != "" else 0.0) + display_df["Avg"] = display_df["Avg"].apply(lambda x: float(x) if x != "N/A" else 0.0) charts = [] @@ -144,7 +144,7 @@ def create_sol_charts(display_df: pd.DataFrame, table_id: int) -> list[px.bar]: elif table_id == 1101: # Special formatting reference 'Pct of Peak' value display_df["Pct of Peak"] = display_df["Pct of Peak"].apply( - lambda x: float(x) if x != "" else 0.0 + lambda x: float(x) if x != "N/A" else 0.0 ) charts.append( px.bar( From 58feae046c5e2beeeb0e92f4c473f2a44e58b77f Mon Sep 17 00:00:00 2001 From: Vignesh Edithal Date: Mon, 5 Jan 2026 12:40:30 -0500 Subject: [PATCH 02/12] Ignore kernel filters if dispatch filters provided --- .../rocprof_compute_analyze/analysis_base.py | 3 ++- projects/rocprofiler-compute/src/utils/gui.py | 4 +++- .../rocprofiler-compute/src/utils/parser.py | 17 +++++++++-------- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py index 445a48e93aa..e171c0ad02d 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py @@ -412,7 +412,8 @@ def sanitize(self) -> None: "analysis", "The --gui option cannot be used with " "--dispatch (-d) or --kernel (-k) filters. " - "Use the dropdown menus in the GUI frontend to filter by dispatch or kernel.", + "Use the dropdown menus in the GUI frontend " + "to filter by dispatch or kernel.", ) # Check if any kernel's counters are missing due to iteration multiplexing diff --git a/projects/rocprofiler-compute/src/utils/gui.py b/projects/rocprofiler-compute/src/utils/gui.py index 503e160e4f1..61315b6faec 100644 --- a/projects/rocprofiler-compute/src/utils/gui.py +++ b/projects/rocprofiler-compute/src/utils/gui.py @@ -103,7 +103,9 @@ def create_multi_bar_charts( def create_sol_charts(display_df: pd.DataFrame, table_id: int) -> list[px.bar]: display_df = display_df.copy() - display_df["Avg"] = display_df["Avg"].apply(lambda x: float(x) if x != "N/A" else 0.0) + display_df["Avg"] = display_df["Avg"].apply( + lambda x: float(x) if x != "N/A" else 0.0 + ) charts = [] diff --git a/projects/rocprofiler-compute/src/utils/parser.py b/projects/rocprofiler-compute/src/utils/parser.py index 4ae6c9759a7..294bd08ca78 100755 --- a/projects/rocprofiler-compute/src/utils/parser.py +++ b/projects/rocprofiler-compute/src/utils/parser.py @@ -1215,17 +1215,18 @@ def apply_filters( if filtered_df.empty: console_error("analysis", f"{workload.filter_gpu_ids} is an invalid gpu-id") - # Apply kernel filter - # NB: - # Kernel id is unique! - # We pick up kernel names from kerne ids first. - # Then filter valid entries with kernel names. - if workload.filter_kernel_ids: - filtered_df = apply_kernel_filter(filtered_df, workload, dir_path) - # Apply dispatch filter if workload.filter_dispatch_ids: filtered_df = apply_dispatch_filter(filtered_df, workload) + elif workload.filter_kernel_ids: + # Do not apply kernel filter if dispatch filter is already specified, + # because each dispatch id has a unique kernel + # Apply kernel filter + # NB: + # Kernel id is unique! + # We pick up kernel names from kerne ids first. + # Then filter valid entries with kernel names. + filtered_df = apply_kernel_filter(filtered_df, workload, dir_path) if debug: print("~" * 40, "\nraw pmc df info:\n") From e2c579e64f58e626851c2a51fb10d96884e43319 Mon Sep 17 00:00:00 2001 From: Vignesh Edithal Date: Mon, 5 Jan 2026 12:50:26 -0500 Subject: [PATCH 03/12] Add documentation for dispatch filtering overriding kernel filtering --- projects/rocprofiler-compute/CHANGELOG.md | 2 ++ .../rocprofiler-compute/docs/how-to/analyze/cli.rst | 5 +++++ .../docs/how-to/analyze/standalone-gui.rst | 5 +++++ projects/rocprofiler-compute/src/argparser.py | 10 ++++++++-- 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/projects/rocprofiler-compute/CHANGELOG.md b/projects/rocprofiler-compute/CHANGELOG.md index 785faaee87a..81ea9f89b62 100644 --- a/projects/rocprofiler-compute/CHANGELOG.md +++ b/projects/rocprofiler-compute/CHANGELOG.md @@ -51,6 +51,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs. * Disallow using kernel/dispatch (--kernel or --dispatch options) filtering with --gui analyze mode since filtering is supported via dropdown meny in GUI frontend. +* Prevent conflicts between kernel and dispatch filters by ignoring kernel filters if dispatch filters are provided since each dispatch id has a unique kernel. + ### Removed * Removed "VL1 Lat" metric for AMD Instinct MI300 series GPUs, due to MI300 series not supporting TCP_TCP_LATENCY_sum counter. diff --git a/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst b/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst index 984d226aaad..d794aacc935 100644 --- a/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst +++ b/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst @@ -346,6 +346,11 @@ Show System Speed-of-Light and CS_Busy blocks only this case, ``1`` is the ID for System Speed-of-Light and ``5.1.0`` the ID for GPU Busy Cycles metric. +.. note:: + + Any dispatch filters selected in GUI will override kernel filters, since each + dispatch id corresponds to a unique kernel. + Filter kernels First, list the top kernels in your application using `--list-stats`. diff --git a/projects/rocprofiler-compute/docs/how-to/analyze/standalone-gui.rst b/projects/rocprofiler-compute/docs/how-to/analyze/standalone-gui.rst index bf0ae48868a..fd8fef4538f 100644 --- a/projects/rocprofiler-compute/docs/how-to/analyze/standalone-gui.rst +++ b/projects/rocprofiler-compute/docs/how-to/analyze/standalone-gui.rst @@ -36,6 +36,11 @@ desired analysis command. options when launching the standalone GUI. Instead, use the dropdown menus in the web interface to filter by kernel or dispatch. +.. note:: + + Any dispatch filters selected in GUI will override kernel filters, since each + dispatch id corresponds to a unique kernel. + For example: .. code-block:: shell-session diff --git a/projects/rocprofiler-compute/src/argparser.py b/projects/rocprofiler-compute/src/argparser.py index 39abcd8bd4d..c2006dc483f 100644 --- a/projects/rocprofiler-compute/src/argparser.py +++ b/projects/rocprofiler-compute/src/argparser.py @@ -595,7 +595,10 @@ def omniarg_parser( dest="gpu_kernel", nargs="+", action="append", - help="\t\tSpecify kernel id(s) from --list-stats for filtering.", + help=( + "\t\tSpecify kernel id(s) from --list-stats for filtering.\n" + "\t\tIgnored if dispatch filtering is used." + ), ) analyze_group.add_argument( "-d", @@ -604,7 +607,10 @@ def omniarg_parser( metavar="", nargs="+", action="append", - help="\t\tSpecify dispatch id(s) for filtering.", + help=( + "\t\tSpecify dispatch id(s) for filtering.\n" + "\t\tOverrides kernel filtering if both are used." + ), ) analyze_group.add_argument( "-b", From bb36b17ddf5a969aba2f929ab0a8658209241af2 Mon Sep 17 00:00:00 2001 From: Vignesh Edithal Date: Mon, 5 Jan 2026 12:53:59 -0500 Subject: [PATCH 04/12] Fix typo --- projects/rocprofiler-compute/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/rocprofiler-compute/CHANGELOG.md b/projects/rocprofiler-compute/CHANGELOG.md index 81ea9f89b62..d30095b7d44 100644 --- a/projects/rocprofiler-compute/CHANGELOG.md +++ b/projects/rocprofiler-compute/CHANGELOG.md @@ -49,7 +49,7 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs. * Fix issue where counter collection data was empty when profiling workload which spawn multiple child processes -* Disallow using kernel/dispatch (--kernel or --dispatch options) filtering with --gui analyze mode since filtering is supported via dropdown meny in GUI frontend. +* Disallow using kernel/dispatch (--kernel or --dispatch options) filtering with --gui analyze mode since filtering is supported via dropdown menu in GUI frontend. * Prevent conflicts between kernel and dispatch filters by ignoring kernel filters if dispatch filters are provided since each dispatch id has a unique kernel. From 4f497b934955c90d7f0aa1c9ff683987bf71d0f9 Mon Sep 17 00:00:00 2001 From: Vignesh Edithal Date: Wed, 7 Jan 2026 09:40:07 -0500 Subject: [PATCH 05/12] Fix documentation --- projects/rocprofiler-compute/docs/how-to/analyze/cli.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst b/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst index d794aacc935..a5d325c2067 100644 --- a/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst +++ b/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst @@ -348,8 +348,8 @@ Show System Speed-of-Light and CS_Busy blocks only .. note:: - Any dispatch filters selected in GUI will override kernel filters, since each - dispatch id corresponds to a unique kernel. + Any dispatch filters selected (using --dispatch) will override any selected kernel filters (using --kernel) + since each dispatch id corresponds to a unique kernel. Filter kernels First, list the top kernels in your application using `--list-stats`. From d3bec000f6efcb8c7c017435ca0f04a4fa319e1a Mon Sep 17 00:00:00 2001 From: Vignesh Edithal Date: Wed, 7 Jan 2026 09:43:25 -0500 Subject: [PATCH 06/12] remove unnecessary whitespace --- .../src/rocprof_compute_analyze/analysis_webui.py | 1 - 1 file changed, 1 deletion(-) diff --git a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py index 0dc9ce6d281..eeab329f5f1 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py @@ -337,7 +337,6 @@ def pre_processing(self) -> None: ) args = self.get_args() - self.dest_dir = str(Path(args.path[0][0]).absolute().resolve()) # create 'mega dataframe' From 6484a47d0842e56bbfff83f29adc9f8d1256a661 Mon Sep 17 00:00:00 2001 From: Vignesh Edithal Date: Wed, 7 Jan 2026 09:58:21 -0500 Subject: [PATCH 07/12] Address review comments --- projects/rocprofiler-compute/src/utils/gui.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/rocprofiler-compute/src/utils/gui.py b/projects/rocprofiler-compute/src/utils/gui.py index 61315b6faec..27083655159 100644 --- a/projects/rocprofiler-compute/src/utils/gui.py +++ b/projects/rocprofiler-compute/src/utils/gui.py @@ -61,7 +61,7 @@ def multi_bar_chart( def create_instruction_mix_bar_chart(display_df: pd.DataFrame, df_unit: str) -> px.bar: display_df = display_df.copy() - display_df["Avg"] = display_df["Avg"].apply(lambda x: int(x) if x != "N/A" else 0.0) + display_df["Avg"] = display_df["Avg"].apply(lambda x: int(x) if x != "N/A" else 0) return px.bar( display_df, @@ -78,7 +78,7 @@ def create_multi_bar_charts( display_df: pd.DataFrame, table_id: int, df_unit: str ) -> list[px.bar]: display_df = display_df.copy() - display_df["Avg"] = display_df["Avg"].apply(lambda x: int(x) if x != "N/A" else 0.0) + display_df["Avg"] = display_df["Avg"].apply(lambda x: int(x) if x != "N/A" else 0) nested_bar = multi_bar_chart(table_id, display_df) charts = [] From a329e22976d35dcb03b2a2acf3102f99331f6454 Mon Sep 17 00:00:00 2001 From: Vignesh Edithal Date: Wed, 7 Jan 2026 14:07:20 -0500 Subject: [PATCH 08/12] Allow kernel/dispatch filtering with --gui --- projects/rocprofiler-compute/CHANGELOG.md | 2 -- .../docs/how-to/analyze/standalone-gui.rst | 6 ------ .../rocprof_compute_analyze/analysis_base.py | 13 +++---------- .../rocprof_compute_analyze/analysis_webui.py | 6 ++++++ .../rocprofiler-compute/src/utils/parser.py | 17 ++++++++--------- 5 files changed, 17 insertions(+), 27 deletions(-) diff --git a/projects/rocprofiler-compute/CHANGELOG.md b/projects/rocprofiler-compute/CHANGELOG.md index d30095b7d44..aa1a4db9182 100644 --- a/projects/rocprofiler-compute/CHANGELOG.md +++ b/projects/rocprofiler-compute/CHANGELOG.md @@ -49,8 +49,6 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs. * Fix issue where counter collection data was empty when profiling workload which spawn multiple child processes -* Disallow using kernel/dispatch (--kernel or --dispatch options) filtering with --gui analyze mode since filtering is supported via dropdown menu in GUI frontend. - * Prevent conflicts between kernel and dispatch filters by ignoring kernel filters if dispatch filters are provided since each dispatch id has a unique kernel. ### Removed diff --git a/projects/rocprofiler-compute/docs/how-to/analyze/standalone-gui.rst b/projects/rocprofiler-compute/docs/how-to/analyze/standalone-gui.rst index fd8fef4538f..4ddddf6c553 100644 --- a/projects/rocprofiler-compute/docs/how-to/analyze/standalone-gui.rst +++ b/projects/rocprofiler-compute/docs/how-to/analyze/standalone-gui.rst @@ -30,12 +30,6 @@ Launch the standalone GUI analyzer To launch the ROCm Compute Profiler GUI analyzer, include the ``--gui`` flag with your desired analysis command. -.. note:: - - Do not use the ``-k/--kernel`` or ``-d/--dispatch`` command-line filtering - options when launching the standalone GUI. Instead, use the dropdown menus in - the web interface to filter by kernel or dispatch. - .. note:: Any dispatch filters selected in GUI will override kernel filters, since each diff --git a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py index e171c0ad02d..57bbdb69706 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py @@ -405,16 +405,9 @@ def sanitize(self) -> None: "Please choose a different name." ) - # Prevent --dispatch or --kernel with --gui, - # since gui handles it in the frontend - if args.gui and (args.gpu_kernel or args.gpu_dispatch_id): - console_error( - "analysis", - "The --gui option cannot be used with " - "--dispatch (-d) or --kernel (-k) filters. " - "Use the dropdown menus in the GUI frontend " - "to filter by dispatch or kernel.", - ) + if args.gpu_kernel and args.gpu_dispatch_id: + console_warning(f"Ignoring kernel filter {args.gpu_kernel} since dispatch filter {args.gpu_dispatch_id} is already provided") + args.gpu_kernel = [] # Check if any kernel's counters are missing due to iteration multiplexing if ( diff --git a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py index eeab329f5f1..9b88b64f889 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py @@ -89,6 +89,7 @@ def build_layout( kernel_top_df = base_data.dfs[1] for kernel_id in base_data.filter_kernel_ids: filt_kernel_names.append(str(kernel_top_df.loc[kernel_id, "Kernel_Name"])) + input_filters["kernel"] = filt_kernel_names # setup app layout from utils.gui_components.header import get_header @@ -151,6 +152,11 @@ def generate_from_filter( console_debug("analysis", f"gui gpu filter is {gcd_filter}") console_debug("analysis", f"gui top-n filter is {top_n_filt}") + # Ignore kernel filtering if dispatch filtering is provided + if kernel_filter and disp_filt: + console_warning(f"Ignoring kernel filter {kernel_filter} since dispatch filter {disp_filt} is already provided") + kernel_filter = [] + base_data[base_run].filter_kernel_ids = ( [str(k) for k in kernel_filter] if kernel_filter else [] ) diff --git a/projects/rocprofiler-compute/src/utils/parser.py b/projects/rocprofiler-compute/src/utils/parser.py index 294bd08ca78..4ae6c9759a7 100755 --- a/projects/rocprofiler-compute/src/utils/parser.py +++ b/projects/rocprofiler-compute/src/utils/parser.py @@ -1215,18 +1215,17 @@ def apply_filters( if filtered_df.empty: console_error("analysis", f"{workload.filter_gpu_ids} is an invalid gpu-id") + # Apply kernel filter + # NB: + # Kernel id is unique! + # We pick up kernel names from kerne ids first. + # Then filter valid entries with kernel names. + if workload.filter_kernel_ids: + filtered_df = apply_kernel_filter(filtered_df, workload, dir_path) + # Apply dispatch filter if workload.filter_dispatch_ids: filtered_df = apply_dispatch_filter(filtered_df, workload) - elif workload.filter_kernel_ids: - # Do not apply kernel filter if dispatch filter is already specified, - # because each dispatch id has a unique kernel - # Apply kernel filter - # NB: - # Kernel id is unique! - # We pick up kernel names from kerne ids first. - # Then filter valid entries with kernel names. - filtered_df = apply_kernel_filter(filtered_df, workload, dir_path) if debug: print("~" * 40, "\nraw pmc df info:\n") From 6c76d939543365d2a25a8dc7218d0a1acd17f578 Mon Sep 17 00:00:00 2001 From: Vignesh Edithal Date: Mon, 12 Jan 2026 19:54:17 -0500 Subject: [PATCH 09/12] Address review comments --- projects/rocprofiler-compute/CHANGELOG.md | 2 -- .../rocprofiler-compute/docs/how-to/analyze/cli.rst | 8 ++------ .../docs/how-to/analyze/standalone-gui.rst | 5 ----- projects/rocprofiler-compute/src/argparser.py | 10 ++-------- projects/rocprofiler-compute/src/utils/parser.py | 10 +++++----- 5 files changed, 9 insertions(+), 26 deletions(-) diff --git a/projects/rocprofiler-compute/CHANGELOG.md b/projects/rocprofiler-compute/CHANGELOG.md index aa1a4db9182..5b4c63c36a3 100644 --- a/projects/rocprofiler-compute/CHANGELOG.md +++ b/projects/rocprofiler-compute/CHANGELOG.md @@ -49,8 +49,6 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs. * Fix issue where counter collection data was empty when profiling workload which spawn multiple child processes -* Prevent conflicts between kernel and dispatch filters by ignoring kernel filters if dispatch filters are provided since each dispatch id has a unique kernel. - ### Removed * Removed "VL1 Lat" metric for AMD Instinct MI300 series GPUs, due to MI300 series not supporting TCP_TCP_LATENCY_sum counter. diff --git a/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst b/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst index a5d325c2067..44fa9441853 100644 --- a/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst +++ b/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst @@ -15,7 +15,7 @@ This section provides an overview of ROCm Compute Profiler's CLI analysis featur * :ref:`Metric customization `: Isolate a subset of built-in metrics or build your own profiling configuration. * :ref:`Filtering `: Hone in on a particular kernel, GPU ID, or dispatch ID via post-process filtering. - + * :ref:`Per-kernel roofline analysis `: Detailed arithmetic intensity and performance analysis for individual kernels. Run ``rocprof-compute analyze -h`` for more details. @@ -346,10 +346,6 @@ Show System Speed-of-Light and CS_Busy blocks only this case, ``1`` is the ID for System Speed-of-Light and ``5.1.0`` the ID for GPU Busy Cycles metric. -.. note:: - - Any dispatch filters selected (using --dispatch) will override any selected kernel filters (using --kernel) - since each dispatch id corresponds to a unique kernel. Filter kernels First, list the top kernels in your application using `--list-stats`. @@ -571,4 +567,4 @@ Analysis database example DEBUG [analysis] generating analysis DEBUG SQLite database initialized with name: test.db DEBUG Initialized database: test.db - DEBUG Completed writing database \ No newline at end of file + DEBUG Completed writing database diff --git a/projects/rocprofiler-compute/docs/how-to/analyze/standalone-gui.rst b/projects/rocprofiler-compute/docs/how-to/analyze/standalone-gui.rst index 4ddddf6c553..41b2176411c 100644 --- a/projects/rocprofiler-compute/docs/how-to/analyze/standalone-gui.rst +++ b/projects/rocprofiler-compute/docs/how-to/analyze/standalone-gui.rst @@ -30,11 +30,6 @@ Launch the standalone GUI analyzer To launch the ROCm Compute Profiler GUI analyzer, include the ``--gui`` flag with your desired analysis command. -.. note:: - - Any dispatch filters selected in GUI will override kernel filters, since each - dispatch id corresponds to a unique kernel. - For example: .. code-block:: shell-session diff --git a/projects/rocprofiler-compute/src/argparser.py b/projects/rocprofiler-compute/src/argparser.py index c2006dc483f..39abcd8bd4d 100644 --- a/projects/rocprofiler-compute/src/argparser.py +++ b/projects/rocprofiler-compute/src/argparser.py @@ -595,10 +595,7 @@ def omniarg_parser( dest="gpu_kernel", nargs="+", action="append", - help=( - "\t\tSpecify kernel id(s) from --list-stats for filtering.\n" - "\t\tIgnored if dispatch filtering is used." - ), + help="\t\tSpecify kernel id(s) from --list-stats for filtering.", ) analyze_group.add_argument( "-d", @@ -607,10 +604,7 @@ def omniarg_parser( metavar="", nargs="+", action="append", - help=( - "\t\tSpecify dispatch id(s) for filtering.\n" - "\t\tOverrides kernel filtering if both are used." - ), + help="\t\tSpecify dispatch id(s) for filtering.", ) analyze_group.add_argument( "-b", diff --git a/projects/rocprofiler-compute/src/utils/parser.py b/projects/rocprofiler-compute/src/utils/parser.py index 4ae6c9759a7..3d1a24647c8 100755 --- a/projects/rocprofiler-compute/src/utils/parser.py +++ b/projects/rocprofiler-compute/src/utils/parser.py @@ -580,8 +580,7 @@ def gen_counter_list(formula: str) -> tuple[bool, list[str]]: return visited, counters try: tree = ast.parse( - formula - .replace("$normUnit", "SQ_WAVES") + formula.replace("$normUnit", "SQ_WAVES") .replace("$denom", "SQ_WAVES") .replace( "$numActiveCUs", @@ -1290,6 +1289,8 @@ def apply_dispatch_filter(df: pd.DataFrame, workload: schema.Workload) -> pd.Dat # NB: support ignoring the 1st n dispatched execution by '> n' # The better way may be parsing python slice string for dispatch_id in workload.filter_dispatch_ids: + if isinstance(dispatch_id, str) and ">" in dispatch_id: + dispatch_id = re.match(r"\>\s*(\d+)", dispatch_id).group(1) if int(dispatch_id) >= len(df): # subtract 2 bc of the two header rows console_error("analysis", f"{dispatch_id} is an invalid dispatch id.") @@ -1297,7 +1298,7 @@ def apply_dispatch_filter(df: pd.DataFrame, workload: schema.Workload) -> pd.Dat isinstance(workload.filter_dispatch_ids[0], str) and ">" in workload.filter_dispatch_ids[0] ): - dispatch_match = re.match(r"\> (\d+)", workload.filter_dispatch_ids[0]) + dispatch_match = re.match(r"\>\s*(\d+)", workload.filter_dispatch_ids[0]) df = df[ df[schema.PMC_PERF_FILE_PREFIX]["Dispatch_ID"] > int(dispatch_match.group(1)) @@ -1722,8 +1723,7 @@ def load_pc_sampling_data( # Group by Instruction_Comment and aggregate grouped_counts = ( - merged_df - .groupby("Instruction_Comment") + merged_df.groupby("Instruction_Comment") .agg( count=("Instruction_Comment", "count"), instruction=("Instruction", "first"), From 994b1904a30b17bd205d00e26c8db0afe66a53f9 Mon Sep 17 00:00:00 2001 From: Vignesh Edithal Date: Mon, 12 Jan 2026 19:56:46 -0500 Subject: [PATCH 10/12] Address review comments --- .../src/rocprof_compute_analyze/analysis_base.py | 4 ---- .../src/rocprof_compute_analyze/analysis_webui.py | 5 ----- 2 files changed, 9 deletions(-) diff --git a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py index 57bbdb69706..b4edad07fde 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py @@ -405,10 +405,6 @@ def sanitize(self) -> None: "Please choose a different name." ) - if args.gpu_kernel and args.gpu_dispatch_id: - console_warning(f"Ignoring kernel filter {args.gpu_kernel} since dispatch filter {args.gpu_dispatch_id} is already provided") - args.gpu_kernel = [] - # Check if any kernel's counters are missing due to iteration multiplexing if ( profiling_config.get("iteration_multiplexing") is not None diff --git a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py index 9b88b64f889..745b7febd0e 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_webui.py @@ -152,11 +152,6 @@ def generate_from_filter( console_debug("analysis", f"gui gpu filter is {gcd_filter}") console_debug("analysis", f"gui top-n filter is {top_n_filt}") - # Ignore kernel filtering if dispatch filtering is provided - if kernel_filter and disp_filt: - console_warning(f"Ignoring kernel filter {kernel_filter} since dispatch filter {disp_filt} is already provided") - kernel_filter = [] - base_data[base_run].filter_kernel_ids = ( [str(k) for k in kernel_filter] if kernel_filter else [] ) From 6688d8a07762cde8f68255c064d54376dcd1bbca Mon Sep 17 00:00:00 2001 From: Vignesh Edithal Date: Mon, 12 Jan 2026 20:00:02 -0500 Subject: [PATCH 11/12] Update CHANGELOG --- projects/rocprofiler-compute/CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/projects/rocprofiler-compute/CHANGELOG.md b/projects/rocprofiler-compute/CHANGELOG.md index 5b4c63c36a3..3761f78ff32 100644 --- a/projects/rocprofiler-compute/CHANGELOG.md +++ b/projects/rocprofiler-compute/CHANGELOG.md @@ -49,6 +49,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs. * Fix issue where counter collection data was empty when profiling workload which spawn multiple child processes +* Fix issue where dispatch filtering in a range (e.g. >2) was not working + ### Removed * Removed "VL1 Lat" metric for AMD Instinct MI300 series GPUs, due to MI300 series not supporting TCP_TCP_LATENCY_sum counter. From a2a098cf45e9e6c0d8a63d7dac0787679e209153 Mon Sep 17 00:00:00 2001 From: Vignesh Edithal Date: Thu, 15 Jan 2026 13:22:24 -0500 Subject: [PATCH 12/12] Fix formatting --- projects/rocprofiler-compute/.pre-commit-config.yaml | 4 ++-- projects/rocprofiler-compute/src/utils/parser.py | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/projects/rocprofiler-compute/.pre-commit-config.yaml b/projects/rocprofiler-compute/.pre-commit-config.yaml index 10c643321fe..e47ccf4aa00 100644 --- a/projects/rocprofiler-compute/.pre-commit-config.yaml +++ b/projects/rocprofiler-compute/.pre-commit-config.yaml @@ -2,7 +2,7 @@ default_stages: [pre-commit] fail_fast: true repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: check-yaml - id: end-of-file-fixer @@ -12,7 +12,7 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. Check https://github.com/astral-sh/ruff-pre-commit#version-compatibility # for the latest ruff version supported by the hook. - rev: v0.12.12 + rev: v0.14.11 hooks: - id: ruff-check args: [--fix] diff --git a/projects/rocprofiler-compute/src/utils/parser.py b/projects/rocprofiler-compute/src/utils/parser.py index 3d1a24647c8..68b56396c5d 100755 --- a/projects/rocprofiler-compute/src/utils/parser.py +++ b/projects/rocprofiler-compute/src/utils/parser.py @@ -580,7 +580,8 @@ def gen_counter_list(formula: str) -> tuple[bool, list[str]]: return visited, counters try: tree = ast.parse( - formula.replace("$normUnit", "SQ_WAVES") + formula + .replace("$normUnit", "SQ_WAVES") .replace("$denom", "SQ_WAVES") .replace( "$numActiveCUs", @@ -1723,7 +1724,8 @@ def load_pc_sampling_data( # Group by Instruction_Comment and aggregate grouped_counts = ( - merged_df.groupby("Instruction_Comment") + merged_df + .groupby("Instruction_Comment") .agg( count=("Instruction_Comment", "count"), instruction=("Instruction", "first"),