From 5e38611c50e6ab0faf188c7a1821f234ac65ec35 Mon Sep 17 00:00:00 2001
From: HRISHIKESH THULA <you@example.com>
Date: Fri, 6 Mar 2026 12:13:55 +0530
Subject: [PATCH 01/21] Add validation tests for `amd-smi` CLI output

---
 .../fetch_test_configurations.py              |  11 +
 .../test_amdsmi_cli.py                        | 268 ++++++++++++++++++
 2 files changed, 279 insertions(+)
 create mode 100644 build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
diff --git a/build_tools/github_actions/fetch_test_configurations.py b/build_tools/github_actions/fetch_test_configurations.py
index dab223872e6..65e49a27721 100644
--- a/build_tools/github_actions/fetch_test_configurations.py
+++ b/build_tools/github_actions/fetch_test_configurations.py
@@ -153,6 +153,17 @@ def _get_script_path(script_name: str) -> str:
             "windows": 1,
         },
     },
+    
+    "amdsmi_cli": {
+        "job_name": "amdsmi_cli",
+        "fetch_artifact_args": "--tests",
+        "timeout_minutes": 15,
+        "test_script": f"pytest {_get_script_path('test_amdsmi_cli.py')} -s",
+        "platform": ["linux"],
+        "total_shards_dict": {
+            "linux": 1,
+        },
+    },
     "hipcub": {
         "job_name": "hipcub",
         "fetch_artifact_args": "--prim --tests",
diff --git a/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py b/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
new file mode 100644
index 00000000000..518d25562aa
--- /dev/null
+++ b/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
@@ -0,0 +1,268 @@
+#!/usr/bin/env python3
+# Copyright (c) Advanced Micro Devices, Inc.
+# SPDX-License-Identifier: MIT
+
+"""
+validation of the `amd-smi` CLI output.
+
+This test expects `THEROCK_BIN_DIR` to point to the TheRock `bin/` directory
+containing the `amd-smi` binary (CI sets this via the setup action).
+"""
+
+import os
+import re
+import json
+import csv
+import subprocess
+from pathlib import Path
+
+import pytest
+
+
+def _amd_smi_path() -> Path:
+    """Return the path to the `amd-smi` binary from `THEROCK_BIN_DIR`.
+
+    Skips the test via pytest if `THEROCK_BIN_DIR` is not set. Asserts that
+    the expected `amd-smi` binary exists at the resolved path.
+
+    Args:
+        None
+
+    Returns:
+        pathlib.Path: Path to the `amd-smi` binary.
+    """
+    th = os.getenv("THEROCK_BIN_DIR")
+    if not th:
+        pytest.skip("THEROCK_BIN_DIR not set; skipping amdsmi tests")
+    p = Path(th) / "amd-smi"
+    assert p.exists(), f"amd-smi not found at {p}"
+    return p
+
+
+def _run_amd_smi(amd_smi: Path, args: list[str]) -> tuple[int, str, str]:
+    """Run `amd-smi list` with the given `args` and return (rc, stdout, stderr).
+
+    The function invokes the binary via subprocess.run and captures text
+    output for assertions in the tests.
+
+    Args:
+        amd_smi (pathlib.Path): Path to the `amd-smi` binary.
+        args (list[str]): Arguments to pass after `amd-smi list`.
+
+    Returns:
+        tuple[int, str, str]: Return code, stdout text, stderr text.
+    """
+    cmd = [str(amd_smi), "list"] + args
+    proc = subprocess.run(cmd, capture_output=True, text=True)
+    return proc.returncode, proc.stdout, proc.stderr
+
+
+def _parse_gpu_blocks(output: str) -> list[str]:
+    """Parse human-readable `amd-smi list` output into GPU text blocks.
+
+    Returns a list where each element is the multiline block describing a
+    single GPU. The parser looks for lines that start GPU markers like
+    "GPU: <n>" or "GPU <n>:" and groups subsequent lines until the next
+    GPU marker.
+
+    Args:
+        output (str): The human-readable stdout from `amd-smi list`.
+
+    Returns:
+        list[str]: List of multiline GPU description blocks.
+    """
+    blocks = []
+    current = None
+    for line in output.splitlines():
+        if re.search(r"GPU:\s+(\d+)", line) or re.search(r"GPU\s+(\d+):", line):
+            if current is not None:
+                blocks.append("\n".join(current))
+            current = [line]
+            continue
+        if current is not None:
+            current.append(line)
+    if current is not None:
+        blocks.append("\n".join(current))
+    return blocks
+
+
+def _validate_human_block(block_text: str) -> list[str]:
+    """Validate a single human-readable GPU block.
+
+    Returns a list of missing field names (empty if all required fields
+    appear). The function checks for BDF, UUID, KFD_ID, NODE_ID and
+    PARTITION_ID in the block_text.
+
+    Args:
+        block_text (str): Multiline text block describing a single GPU.
+
+    Returns:
+        list[str]: Missing field names (empty if validation passes).
+    """
+    missing = []
+    if not re.search(r"\s*BDF:\s*.+", block_text):
+        missing.append("BDF")
+    if not re.search(r"\s*UUID:\s*.+", block_text):
+        missing.append("UUID")
+    if not re.search(r"\s*KFD_ID:\s*\d+", block_text):
+        missing.append("KFD_ID")
+    if not re.search(r"\s*NODE_ID:\s*\d+", block_text):
+        missing.append("NODE_ID")
+    if not re.search(r"\s*PARTITION_ID:\s*\d+", block_text):
+        missing.append("PARTITION_ID")
+    return missing
+
+
+def _validate_json(obj: dict) -> list[str]:
+    """Validate a JSON GPU entry from `amd-smi --json`.
+
+    Returns a list of missing or incorrectly-typed fields. Expected fields
+    include `gpu` (int), `bdf` (str), `uuid` (str), `kfd_id` (int),
+    `node_id` (int) and `partition_id` (int).
+
+    Args:
+        obj (dict): Parsed JSON object representing a GPU entry.
+
+    Returns:
+        list[str]: Missing or invalid field names.
+    """
+    missing = []
+    # required keys mapping
+    if "gpu" not in obj or not isinstance(obj.get("gpu"), int):
+        missing.append("gpu")
+    if "bdf" not in obj or not isinstance(obj.get("bdf"), str):
+        missing.append("bdf")
+    if "uuid" not in obj or not isinstance(obj.get("uuid"), str):
+        missing.append("uuid")
+    if "kfd_id" not in obj or not isinstance(obj.get("kfd_id"), int):
+        missing.append("kfd_id")
+    if "node_id" not in obj or not isinstance(obj.get("node_id"), int):
+        missing.append("node_id")
+    if "partition_id" not in obj or not isinstance(obj.get("partition_id"), int):
+        missing.append("partition_id")
+    return missing
+
+
+def _validate_csv_row(row: dict) -> list[str]:
+    """Validate a CSV row parsed from `amd-smi --csv` output.
+
+    Expected header names are: `gpu,gpu_bdf,gpu_uuid,kfd_id,node_id,partition_id`.
+    Returns a list of missing or invalid fields.
+
+    Args:
+        row (dict): Mapping of CSV headers to values as returned by
+            `csv.DictReader`.
+
+    Returns:
+        list[str]: Missing or invalid field names.
+    """
+    # expected header names: gpu,gpu_bdf,gpu_uuid,kfd_id,node_id,partition_id
+    missing = []
+    try:
+        if "gpu" not in row or int(row.get("gpu", "")) < 0:
+            missing.append("gpu")
+    except Exception:
+        missing.append("gpu")
+    if not row.get("gpu_bdf"):
+        missing.append("gpu_bdf")
+    if not row.get("gpu_uuid"):
+        missing.append("gpu_uuid")
+    try:
+        if "kfd_id" not in row or int(row.get("kfd_id", "")) < 0:
+            missing.append("kfd_id")
+    except Exception:
+        missing.append("kfd_id")
+    try:
+        if "node_id" not in row or int(row.get("node_id", "")) < 0:
+            missing.append("node_id")
+    except Exception:
+        missing.append("node_id")
+    try:
+        if "partition_id" not in row or int(row.get("partition_id", "")) < 0:
+            missing.append("partition_id")
+    except Exception:
+        missing.append("partition_id")
+    return missing
+
+
+@pytest.mark.parametrize(
+    "mod_args",
+    [
+        ([], None),  # human readable on stdout
+        (["--json"], None),
+        (["--csv"], None),
+        (["--file"], "human"),
+        (["--json", "--file"], "json"),
+        (["--csv", "--file"], "csv"),
+    ],
+)
+def test_amd_smi_list(mod_args, tmp_path):
+    """End-to-end test of `amd-smi list` covering output modes.
+
+    The test runs `amd-smi list` with multiple modifier combinations (human,
+    JSON, CSV, and file-output variants), parses the output and validates
+    required fields for each GPU entry.
+
+    Args:
+        mod_args (tuple[list[str], Optional[str]]): Parameterized tuple where
+            the first element is a list of modifier args and the second
+            element indicates the expected parsed mode when `--file` is
+            used.
+        tmp_path (pathlib.Path): pytest temporary directory fixture.
+
+    Returns:
+        None
+    """
+    args, expected_mode = mod_args
+    amd_smi = _amd_smi_path()
+
+    file_path = None
+    run_args = list(args)
+    if "--file" in run_args:
+        # supply output file
+        file_path = tmp_path / "amdsmi_out.txt"
+        run_args = [a for a in run_args if a != "--file"]
+        run_args.extend(["--file", str(file_path)])
+
+    rc, out, err = _run_amd_smi(amd_smi, run_args)
+    assert rc == 0, f"amd-smi failed rc={rc} stderr={err} stdout={out}"
+
+    # If file was requested, stdout should be empty
+    if file_path is not None:
+        assert out.strip() == "", f"Expected no stdout when using --file, got: {out}"
+        assert file_path.exists(), "Expected output file to be created"
+        content = file_path.read_text(encoding="utf-8", errors="replace")
+    else:
+        content = out
+
+    # Validate based on mode
+    if expected_mode == "json" or ("--json" in args and expected_mode is None):
+        # JSON array expected
+        try:
+            data = json.loads(content)
+        except Exception as e:
+            pytest.fail(f"Failed to parse JSON output: {e}\nContent:\n{content}")
+        assert isinstance(data, list) and data, "Expected non-empty JSON array"
+        for idx, obj in enumerate(data):
+            missing = _validate_json(obj)
+            assert not missing, f"JSON GPU entry {idx} missing fields: {missing}"
+
+    elif expected_mode == "csv" or ("--csv" in args and expected_mode is None):
+        # CSV expected
+        try:
+            reader = csv.DictReader(content.splitlines())
+            rows = list(reader)
+        except Exception as e:
+            pytest.fail(f"Failed to parse CSV output: {e}\nContent:\n{content}")
+        assert rows, "Expected at least one CSV row"
+        for idx, row in enumerate(rows):
+            missing = _validate_csv_row(row)
+            assert not missing, f"CSV row {idx} missing fields: {missing}"
+
+    else:
+        # human readable output
+        blocks = _parse_gpu_blocks(content)
+        assert blocks, "No GPU blocks found in amd-smi human output"
+        for idx, block_text in enumerate(blocks):
+            missing = _validate_human_block(block_text)
+            assert not missing, f"Human GPU block {idx} missing fields: {missing}\nBlock:\n{block_text}"

From 7edbb8d427a5f6ccebbc09328f54f64723283bf0 Mon Sep 17 00:00:00 2001
From: HRISHIKESH THULA <you@example.com>
Date: Fri, 6 Mar 2026 17:52:20 +0530
Subject: [PATCH 02/21] Refactor amd-smi path resolution and update function
 signatures for clarity

---
 .../test_amdsmi_cli.py                        | 223 +++++++++---------
 1 file changed, 114 insertions(+), 109 deletions(-)

diff --git a/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py b/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
index 518d25562aa..ce61861f00c 100644
--- a/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
+++ b/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
@@ -31,33 +31,38 @@ def _amd_smi_path() -> Path:
     Returns:
         pathlib.Path: Path to the `amd-smi` binary.
     """
-    th = os.getenv("THEROCK_BIN_DIR")
-    if not th:
+    therock_bin_dir_env = os.getenv("THEROCK_BIN_DIR")
+    if not therock_bin_dir_env:
         pytest.skip("THEROCK_BIN_DIR not set; skipping amdsmi tests")
-    p = Path(th) / "amd-smi"
-    assert p.exists(), f"amd-smi not found at {p}"
-    return p
 
+    # Resolve the path to an absolute canonical path to avoid cwd-dependent
+    # failures (e.g., if a prior step changes directory). Also check that the
+    # binary exists and is executable.
+    amd_smi_bin_path = (Path(therock_bin_dir_env).expanduser().resolve()) / "amd-smi"
+    assert amd_smi_bin_path.exists(), f"amd-smi not found at {amd_smi_bin_path}"
+    assert os.access(amd_smi_bin_path, os.X_OK), f"amd-smi is not executable: {amd_smi_bin_path}"
+    return amd_smi_bin_path
 
-def _run_amd_smi(amd_smi: Path, args: list[str]) -> tuple[int, str, str]:
-    """Run `amd-smi list` with the given `args` and return (rc, stdout, stderr).
+
+def _run_amd_smi(amd_smi_path: Path, modifiers: list[str]) -> tuple[int, str, str]:
+    """Run `amd-smi list` with the given `modifiers` and return (rc, stdout, stderr).
 
     The function invokes the binary via subprocess.run and captures text
     output for assertions in the tests.
 
     Args:
-        amd_smi (pathlib.Path): Path to the `amd-smi` binary.
-        args (list[str]): Arguments to pass after `amd-smi list`.
+        amd_smi_path (pathlib.Path): Path to the `amd-smi` binary.
+        modifiers (list[str]): Arguments to pass after `amd-smi list`.
 
     Returns:
         tuple[int, str, str]: Return code, stdout text, stderr text.
     """
-    cmd = [str(amd_smi), "list"] + args
+    cmd = [str(amd_smi_path), "list"] + modifiers
     proc = subprocess.run(cmd, capture_output=True, text=True)
     return proc.returncode, proc.stdout, proc.stderr
 
 
-def _parse_gpu_blocks(output: str) -> list[str]:
+def _parse_gpu_blocks(text_output: str) -> list[str]:
     """Parse human-readable `amd-smi list` output into GPU text blocks.
 
     Returns a list where each element is the multiline block describing a
@@ -71,22 +76,22 @@ def _parse_gpu_blocks(output: str) -> list[str]:
     Returns:
         list[str]: List of multiline GPU description blocks.
     """
-    blocks = []
-    current = None
-    for line in output.splitlines():
+    gpu_blocks = []
+    current_block_lines = None
+    for line in text_output.splitlines():
         if re.search(r"GPU:\s+(\d+)", line) or re.search(r"GPU\s+(\d+):", line):
-            if current is not None:
-                blocks.append("\n".join(current))
-            current = [line]
+            if current_block_lines is not None:
+                gpu_blocks.append("\n".join(current_block_lines))
+            current_block_lines = [line]
             continue
-        if current is not None:
-            current.append(line)
-    if current is not None:
-        blocks.append("\n".join(current))
-    return blocks
+        if current_block_lines is not None:
+            current_block_lines.append(line)
+    if current_block_lines is not None:
+        gpu_blocks.append("\n".join(current_block_lines))
+    return gpu_blocks
 
 
-def _validate_human_block(block_text: str) -> list[str]:
+def _validate_human_readable_gpu_block(human_readable_gpu_block_text: str) -> list[str]:
     """Validate a single human-readable GPU block.
 
     Returns a list of missing field names (empty if all required fields
@@ -94,26 +99,26 @@ def _validate_human_block(block_text: str) -> list[str]:
     PARTITION_ID in the block_text.
 
     Args:
-        block_text (str): Multiline text block describing a single GPU.
+        human_readable_gpu_block_text (str): Multiline text block describing a single GPU.
 
     Returns:
         list[str]: Missing field names (empty if validation passes).
     """
-    missing = []
-    if not re.search(r"\s*BDF:\s*.+", block_text):
-        missing.append("BDF")
-    if not re.search(r"\s*UUID:\s*.+", block_text):
-        missing.append("UUID")
-    if not re.search(r"\s*KFD_ID:\s*\d+", block_text):
-        missing.append("KFD_ID")
-    if not re.search(r"\s*NODE_ID:\s*\d+", block_text):
-        missing.append("NODE_ID")
-    if not re.search(r"\s*PARTITION_ID:\s*\d+", block_text):
-        missing.append("PARTITION_ID")
-    return missing
-
-
-def _validate_json(obj: dict) -> list[str]:
+    missing_fields = []
+    if not re.search(r"\s*BDF:\s*.+", human_readable_gpu_block_text):
+        missing_fields.append("BDF")
+    if not re.search(r"\s*UUID:\s*.+", human_readable_gpu_block_text):
+        missing_fields.append("UUID")
+    if not re.search(r"\s*KFD_ID:\s*\d+", human_readable_gpu_block_text):
+        missing_fields.append("KFD_ID")
+    if not re.search(r"\s*NODE_ID:\s*\d+", human_readable_gpu_block_text):
+        missing_fields.append("NODE_ID")
+    if not re.search(r"\s*PARTITION_ID:\s*\d+", human_readable_gpu_block_text):
+        missing_fields.append("PARTITION_ID")
+    return missing_fields
+
+
+def _validate_json(gpu_obj: dict) -> list[str]:
     """Validate a JSON GPU entry from `amd-smi --json`.
 
     Returns a list of missing or incorrectly-typed fields. Expected fields
@@ -126,24 +131,24 @@ def _validate_json(obj: dict) -> list[str]:
     Returns:
         list[str]: Missing or invalid field names.
     """
-    missing = []
+    missing_fields = []
     # required keys mapping
-    if "gpu" not in obj or not isinstance(obj.get("gpu"), int):
-        missing.append("gpu")
-    if "bdf" not in obj or not isinstance(obj.get("bdf"), str):
-        missing.append("bdf")
-    if "uuid" not in obj or not isinstance(obj.get("uuid"), str):
-        missing.append("uuid")
-    if "kfd_id" not in obj or not isinstance(obj.get("kfd_id"), int):
-        missing.append("kfd_id")
-    if "node_id" not in obj or not isinstance(obj.get("node_id"), int):
-        missing.append("node_id")
-    if "partition_id" not in obj or not isinstance(obj.get("partition_id"), int):
-        missing.append("partition_id")
-    return missing
-
-
-def _validate_csv_row(row: dict) -> list[str]:
+    if "gpu" not in gpu_obj or not isinstance(gpu_obj.get("gpu"), int):
+        missing_fields.append("gpu")
+    if "bdf" not in gpu_obj or not isinstance(gpu_obj.get("bdf"), str):
+        missing_fields.append("bdf")
+    if "uuid" not in gpu_obj or not isinstance(gpu_obj.get("uuid"), str):
+        missing_fields.append("uuid")
+    if "kfd_id" not in gpu_obj or not isinstance(gpu_obj.get("kfd_id"), int):
+        missing_fields.append("kfd_id")
+    if "node_id" not in gpu_obj or not isinstance(gpu_obj.get("node_id"), int):
+        missing_fields.append("node_id")
+    if "partition_id" not in gpu_obj or not isinstance(gpu_obj.get("partition_id"), int):
+        missing_fields.append("partition_id")
+    return missing_fields
+
+
+def _validate_csv_row(csv_row: dict) -> list[str]:
     """Validate a CSV row parsed from `amd-smi --csv` output.
 
     Expected header names are: `gpu,gpu_bdf,gpu_uuid,kfd_id,node_id,partition_id`.
@@ -157,32 +162,32 @@ def _validate_csv_row(row: dict) -> list[str]:
         list[str]: Missing or invalid field names.
     """
     # expected header names: gpu,gpu_bdf,gpu_uuid,kfd_id,node_id,partition_id
-    missing = []
+    missing_fields = []
     try:
-        if "gpu" not in row or int(row.get("gpu", "")) < 0:
-            missing.append("gpu")
+        if "gpu" not in csv_row or int(csv_row.get("gpu", "")) < 0:
+            missing_fields.append("gpu")
     except Exception:
-        missing.append("gpu")
-    if not row.get("gpu_bdf"):
-        missing.append("gpu_bdf")
-    if not row.get("gpu_uuid"):
-        missing.append("gpu_uuid")
+        missing_fields.append("gpu")
+    if not csv_row.get("gpu_bdf"):
+        missing_fields.append("gpu_bdf")
+    if not csv_row.get("gpu_uuid"):
+        missing_fields.append("gpu_uuid")
     try:
-        if "kfd_id" not in row or int(row.get("kfd_id", "")) < 0:
-            missing.append("kfd_id")
+        if "kfd_id" not in csv_row or int(csv_row.get("kfd_id", "")) < 0:
+            missing_fields.append("kfd_id")
     except Exception:
-        missing.append("kfd_id")
+        missing_fields.append("kfd_id")
     try:
-        if "node_id" not in row or int(row.get("node_id", "")) < 0:
-            missing.append("node_id")
+        if "node_id" not in csv_row or int(csv_row.get("node_id", "")) < 0:
+            missing_fields.append("node_id")
     except Exception:
-        missing.append("node_id")
+        missing_fields.append("node_id")
     try:
-        if "partition_id" not in row or int(row.get("partition_id", "")) < 0:
-            missing.append("partition_id")
+        if "partition_id" not in csv_row or int(csv_row.get("partition_id", "")) < 0:
+            missing_fields.append("partition_id")
     except Exception:
-        missing.append("partition_id")
-    return missing
+        missing_fields.append("partition_id")
+    return missing_fields
 
 
 @pytest.mark.parametrize(
@@ -213,56 +218,56 @@ def test_amd_smi_list(mod_args, tmp_path):
     Returns:
         None
     """
-    args, expected_mode = mod_args
-    amd_smi = _amd_smi_path()
+    modifiers, expected_output_mode = mod_args
+    amd_smi_bin = _amd_smi_path()
 
-    file_path = None
-    run_args = list(args)
-    if "--file" in run_args:
+    output_file_path = None
+    invocation_args = list(modifiers)
+    if "--file" in invocation_args:
         # supply output file
-        file_path = tmp_path / "amdsmi_out.txt"
-        run_args = [a for a in run_args if a != "--file"]
-        run_args.extend(["--file", str(file_path)])
+        output_file_path = tmp_path / "amdsmi_out.txt"
+        invocation_args = [a for a in invocation_args if a != "--file"]
+        invocation_args.extend(["--file", str(output_file_path)])
 
-    rc, out, err = _run_amd_smi(amd_smi, run_args)
-    assert rc == 0, f"amd-smi failed rc={rc} stderr={err} stdout={out}"
+    return_code, stdout_text, stderr_text = _run_amd_smi(amd_smi_bin, invocation_args)
+    assert return_code == 0, f"amd-smi failed rc={return_code} stderr={stderr_text} stdout={stdout_text}"
 
     # If file was requested, stdout should be empty
-    if file_path is not None:
-        assert out.strip() == "", f"Expected no stdout when using --file, got: {out}"
-        assert file_path.exists(), "Expected output file to be created"
-        content = file_path.read_text(encoding="utf-8", errors="replace")
+    if output_file_path is not None:
+        assert stdout_text.strip() == "", f"Expected no stdout when using --file, got: {stdout_text}"
+        assert output_file_path.exists(), "Expected output file to be created"
+        content_text = output_file_path.read_text(encoding="utf-8", errors="replace")
     else:
-        content = out
+        content_text = stdout_text
 
     # Validate based on mode
-    if expected_mode == "json" or ("--json" in args and expected_mode is None):
+    if expected_output_mode == "json" or ("--json" in modifiers and expected_output_mode is None):
         # JSON array expected
         try:
-            data = json.loads(content)
+            json_data = json.loads(content_text)
         except Exception as e:
-            pytest.fail(f"Failed to parse JSON output: {e}\nContent:\n{content}")
-        assert isinstance(data, list) and data, "Expected non-empty JSON array"
-        for idx, obj in enumerate(data):
-            missing = _validate_json(obj)
-            assert not missing, f"JSON GPU entry {idx} missing fields: {missing}"
+            pytest.fail(f"Failed to parse JSON output: {e}\nContent:\n{content_text}")
+        assert isinstance(json_data, list) and json_data, "Expected non-empty JSON array"
+        for index, gpu_obj in enumerate(json_data):
+            missing_fields = _validate_json(gpu_obj)
+            assert not missing_fields, f"JSON GPU entry {index} missing fields: {missing_fields}"
 
-    elif expected_mode == "csv" or ("--csv" in args and expected_mode is None):
+    elif expected_output_mode == "csv" or ("--csv" in modifiers and expected_output_mode is None):
         # CSV expected
         try:
-            reader = csv.DictReader(content.splitlines())
-            rows = list(reader)
+            csv_reader = csv.DictReader(content_text.splitlines())
+            csv_rows = list(csv_reader)
         except Exception as e:
-            pytest.fail(f"Failed to parse CSV output: {e}\nContent:\n{content}")
-        assert rows, "Expected at least one CSV row"
-        for idx, row in enumerate(rows):
-            missing = _validate_csv_row(row)
-            assert not missing, f"CSV row {idx} missing fields: {missing}"
+            pytest.fail(f"Failed to parse CSV output: {e}\nContent:\n{content_text}")
+        assert csv_rows, "Expected at least one CSV row"
+        for index, csv_row in enumerate(csv_rows):
+            missing_fields = _validate_csv_row(csv_row)
+            assert not missing_fields, f"CSV row {index} missing fields: {missing_fields}"
 
     else:
         # human readable output
-        blocks = _parse_gpu_blocks(content)
-        assert blocks, "No GPU blocks found in amd-smi human output"
-        for idx, block_text in enumerate(blocks):
-            missing = _validate_human_block(block_text)
-            assert not missing, f"Human GPU block {idx} missing fields: {missing}\nBlock:\n{block_text}"
+        gpu_blocks = _parse_gpu_blocks(content_text)
+        assert gpu_blocks, "No GPU blocks found in amd-smi human output"
+        for index, human_readable_gpu_block in enumerate(gpu_blocks):
+            missing_fields = _validate_human_readable_gpu_block(human_readable_gpu_block)
+            assert not missing_fields, f"Human-readable GPU block {index} missing fields: {missing_fields}\nBlock:\n{human_readable_gpu_block}"

From bac9e22449ec8f7f8d8e72f2fdd6286da3aa4a4d Mon Sep 17 00:00:00 2001
From: HRISHIKESH THULA <you@example.com>
Date: Fri, 6 Mar 2026 18:27:11 +0530
Subject: [PATCH 03/21] Add logging for amd-smi command execution and output

---
 .../test_executable_scripts/test_amdsmi_cli.py            | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py b/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
index ce61861f00c..589d22efa00 100644
--- a/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
+++ b/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
@@ -15,10 +15,14 @@
 import csv
 import subprocess
 from pathlib import Path
+import logging
 
 import pytest
 
 
+logger = logging.getLogger(__name__)
+
+
 def _amd_smi_path() -> Path:
     """Return the path to the `amd-smi` binary from `THEROCK_BIN_DIR`.
 
@@ -58,7 +62,11 @@ def _run_amd_smi(amd_smi_path: Path, modifiers: list[str]) -> tuple[int, str, st
         tuple[int, str, str]: Return code, stdout text, stderr text.
     """
     cmd = [str(amd_smi_path), "list"] + modifiers
+    logger.debug("Running amd-smi: %s", cmd)
     proc = subprocess.run(cmd, capture_output=True, text=True)
+    logger.debug("amd-smi returncode=%s", proc.returncode)
+    logger.debug("amd-smi stdout:\n%s", proc.stdout)
+    logger.debug("amd-smi stderr:\n%s", proc.stderr)
     return proc.returncode, proc.stdout, proc.stderr
 
 

From e7bebb2056b440caeaeae82b4fc08113158b5477 Mon Sep 17 00:00:00 2001
From: HRISHIKESH THULA <you@example.com>
Date: Fri, 6 Mar 2026 18:38:44 +0530
Subject: [PATCH 04/21] Add parameterized test cases for `amd-smi` output modes

---
 .../test_executable_scripts/test_amdsmi_cli.py            | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py b/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
index 589d22efa00..d1a4880718d 100644
--- a/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
+++ b/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
@@ -208,6 +208,14 @@ def _validate_csv_row(csv_row: dict) -> list[str]:
         (["--json", "--file"], "json"),
         (["--csv", "--file"], "csv"),
     ],
+    ids=[
+        "human-stdout",
+        "json-stdout",
+        "csv-stdout",
+        "human-file",
+        "json-file",
+        "csv-file",
+    ],
 )
 def test_amd_smi_list(mod_args, tmp_path):
     """End-to-end test of `amd-smi list` covering output modes.

From 9c80073aef24e4a108b8f4af1e5bee7a78c8b2a0 Mon Sep 17 00:00:00 2001
From: HRISHIKESH THULA <you@example.com>
Date: Fri, 6 Mar 2026 22:51:42 +0530
Subject: [PATCH 05/21] Update logging level for amd-smi command execution in
 test_amdsmi_cli.py

---
 build_tools/github_actions/fetch_test_configurations.py   | 1 -
 .../test_executable_scripts/test_amdsmi_cli.py            | 8 ++++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/build_tools/github_actions/fetch_test_configurations.py b/build_tools/github_actions/fetch_test_configurations.py
index b25ecfce421..5d6feca049d 100644
--- a/build_tools/github_actions/fetch_test_configurations.py
+++ b/build_tools/github_actions/fetch_test_configurations.py
@@ -156,7 +156,6 @@ def _get_script_path(script_name: str) -> str:
             "windows": 1,
         },
     },
-    
     "amdsmi_cli": {
         "job_name": "amdsmi_cli",
         "fetch_artifact_args": "--tests",
diff --git a/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py b/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
index d1a4880718d..f2255d7d968 100644
--- a/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
+++ b/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
@@ -62,11 +62,11 @@ def _run_amd_smi(amd_smi_path: Path, modifiers: list[str]) -> tuple[int, str, st
         tuple[int, str, str]: Return code, stdout text, stderr text.
     """
     cmd = [str(amd_smi_path), "list"] + modifiers
-    logger.debug("Running amd-smi: %s", cmd)
+    logger.info("Running amd-smi: %s", cmd)
     proc = subprocess.run(cmd, capture_output=True, text=True)
-    logger.debug("amd-smi returncode=%s", proc.returncode)
-    logger.debug("amd-smi stdout:\n%s", proc.stdout)
-    logger.debug("amd-smi stderr:\n%s", proc.stderr)
+    logger.info("amd-smi returncode=%s", proc.returncode)
+    logger.info("amd-smi stdout:\n%s", proc.stdout)
+    logger.info("amd-smi stderr:\n%s", proc.stderr)
     return proc.returncode, proc.stdout, proc.stderr
 
 

From 30c2a1d3a3af82d0d48bc3228ba81221f00a0e30 Mon Sep 17 00:00:00 2001
From: HRISHIKESH THULA <you@example.com>
Date: Fri, 6 Mar 2026 23:03:41 +0530
Subject: [PATCH 06/21] command enhance

---
 build_tools/github_actions/fetch_test_configurations.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build_tools/github_actions/fetch_test_configurations.py b/build_tools/github_actions/fetch_test_configurations.py
index 5d6feca049d..e9a8fd8e7dc 100644
--- a/build_tools/github_actions/fetch_test_configurations.py
+++ b/build_tools/github_actions/fetch_test_configurations.py
@@ -160,7 +160,7 @@ def _get_script_path(script_name: str) -> str:
         "job_name": "amdsmi_cli",
         "fetch_artifact_args": "--tests",
         "timeout_minutes": 15,
-        "test_script": f"pytest {_get_script_path('test_amdsmi_cli.py')} -s",
+        "test_script": f"pytest {_get_script_path('test_amdsmi_cli.py')} -o log_cli=true --log-cli-level=INFO",
         "platform": ["linux"],
         "total_shards_dict": {
             "linux": 1,

From 7a9f5db857a79ebfa7b4aad865ec242aa5891af0 Mon Sep 17 00:00:00 2001
From: HRISHIKESH THULA <you@example.com>
Date: Fri, 6 Mar 2026 23:37:59 +0530
Subject: [PATCH 07/21] Refactor assertions for clarity and consistency in
 test_amdsmi_cli.py

---
 .../test_amdsmi_cli.py                        | 44 ++++++++++++++-----
 1 file changed, 33 insertions(+), 11 deletions(-)

diff --git a/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py b/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
index f2255d7d968..8e81918658a 100644
--- a/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
+++ b/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
@@ -44,7 +44,9 @@ def _amd_smi_path() -> Path:
     # binary exists and is executable.
     amd_smi_bin_path = (Path(therock_bin_dir_env).expanduser().resolve()) / "amd-smi"
     assert amd_smi_bin_path.exists(), f"amd-smi not found at {amd_smi_bin_path}"
-    assert os.access(amd_smi_bin_path, os.X_OK), f"amd-smi is not executable: {amd_smi_bin_path}"
+    assert os.access(
+        amd_smi_bin_path, os.X_OK
+    ), f"amd-smi is not executable: {amd_smi_bin_path}"
     return amd_smi_bin_path
 
 
@@ -151,7 +153,9 @@ def _validate_json(gpu_obj: dict) -> list[str]:
         missing_fields.append("kfd_id")
     if "node_id" not in gpu_obj or not isinstance(gpu_obj.get("node_id"), int):
         missing_fields.append("node_id")
-    if "partition_id" not in gpu_obj or not isinstance(gpu_obj.get("partition_id"), int):
+    if "partition_id" not in gpu_obj or not isinstance(
+        gpu_obj.get("partition_id"), int
+    ):
         missing_fields.append("partition_id")
     return missing_fields
 
@@ -246,29 +250,41 @@ def test_amd_smi_list(mod_args, tmp_path):
         invocation_args.extend(["--file", str(output_file_path)])
 
     return_code, stdout_text, stderr_text = _run_amd_smi(amd_smi_bin, invocation_args)
-    assert return_code == 0, f"amd-smi failed rc={return_code} stderr={stderr_text} stdout={stdout_text}"
+    assert (
+        return_code == 0
+    ), f"amd-smi failed rc={return_code} stderr={stderr_text} stdout={stdout_text}"
 
     # If file was requested, stdout should be empty
     if output_file_path is not None:
-        assert stdout_text.strip() == "", f"Expected no stdout when using --file, got: {stdout_text}"
+        assert (
+            stdout_text.strip() == ""
+        ), f"Expected no stdout when using --file, got: {stdout_text}"
         assert output_file_path.exists(), "Expected output file to be created"
         content_text = output_file_path.read_text(encoding="utf-8", errors="replace")
     else:
         content_text = stdout_text
 
     # Validate based on mode
-    if expected_output_mode == "json" or ("--json" in modifiers and expected_output_mode is None):
+    if expected_output_mode == "json" or (
+        "--json" in modifiers and expected_output_mode is None
+    ):
         # JSON array expected
         try:
             json_data = json.loads(content_text)
         except Exception as e:
             pytest.fail(f"Failed to parse JSON output: {e}\nContent:\n{content_text}")
-        assert isinstance(json_data, list) and json_data, "Expected non-empty JSON array"
+        assert (
+            isinstance(json_data, list) and json_data
+        ), "Expected non-empty JSON array"
         for index, gpu_obj in enumerate(json_data):
             missing_fields = _validate_json(gpu_obj)
-            assert not missing_fields, f"JSON GPU entry {index} missing fields: {missing_fields}"
+            assert (
+                not missing_fields
+            ), f"JSON GPU entry {index} missing fields: {missing_fields}"
 
-    elif expected_output_mode == "csv" or ("--csv" in modifiers and expected_output_mode is None):
+    elif expected_output_mode == "csv" or (
+        "--csv" in modifiers and expected_output_mode is None
+    ):
         # CSV expected
         try:
             csv_reader = csv.DictReader(content_text.splitlines())
@@ -278,12 +294,18 @@ def test_amd_smi_list(mod_args, tmp_path):
         assert csv_rows, "Expected at least one CSV row"
         for index, csv_row in enumerate(csv_rows):
             missing_fields = _validate_csv_row(csv_row)
-            assert not missing_fields, f"CSV row {index} missing fields: {missing_fields}"
+            assert (
+                not missing_fields
+            ), f"CSV row {index} missing fields: {missing_fields}"
 
     else:
         # human readable output
         gpu_blocks = _parse_gpu_blocks(content_text)
         assert gpu_blocks, "No GPU blocks found in amd-smi human output"
         for index, human_readable_gpu_block in enumerate(gpu_blocks):
-            missing_fields = _validate_human_readable_gpu_block(human_readable_gpu_block)
-            assert not missing_fields, f"Human-readable GPU block {index} missing fields: {missing_fields}\nBlock:\n{human_readable_gpu_block}"
+            missing_fields = _validate_human_readable_gpu_block(
+                human_readable_gpu_block
+            )
+            assert (
+                not missing_fields
+            ), f"Human-readable GPU block {index} missing fields: {missing_fields}\nBlock:\n{human_readable_gpu_block}"

From 3f23ba4c75e6a50dba5e5937a434c846ddb83f60 Mon Sep 17 00:00:00 2001
From: HRISHIKESH THULA <you@example.com>
Date: Wed, 11 Mar 2026 10:57:14 +0530
Subject: [PATCH 08/21] Update fetch_test_configurations.py to use --base-only
 for amdsmi_cli and refactor test_amdsmi_cli.py for improved clarity and
 functionality

---
 .../fetch_test_configurations.py              |  2 +-
 .../test_amdsmi_cli.py                        | 39 ++++---------------
 2 files changed, 8 insertions(+), 33 deletions(-)

diff --git a/build_tools/github_actions/fetch_test_configurations.py b/build_tools/github_actions/fetch_test_configurations.py
index e9a8fd8e7dc..8f325f0923c 100644
--- a/build_tools/github_actions/fetch_test_configurations.py
+++ b/build_tools/github_actions/fetch_test_configurations.py
@@ -158,7 +158,7 @@ def _get_script_path(script_name: str) -> str:
     },
     "amdsmi_cli": {
         "job_name": "amdsmi_cli",
-        "fetch_artifact_args": "--tests",
+        "fetch_artifact_args": "--base-only",
         "timeout_minutes": 15,
         "test_script": f"pytest {_get_script_path('test_amdsmi_cli.py')} -o log_cli=true --log-cli-level=INFO",
         "platform": ["linux"],
diff --git a/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py b/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
index 8e81918658a..86abdf28137 100644
--- a/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
+++ b/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
@@ -24,24 +24,10 @@
 
 
 def _amd_smi_path() -> Path:
-    """Return the path to the `amd-smi` binary from `THEROCK_BIN_DIR`.
-
-    Skips the test via pytest if `THEROCK_BIN_DIR` is not set. Asserts that
-    the expected `amd-smi` binary exists at the resolved path.
-
-    Args:
-        None
-
-    Returns:
-        pathlib.Path: Path to the `amd-smi` binary.
-    """
     therock_bin_dir_env = os.getenv("THEROCK_BIN_DIR")
     if not therock_bin_dir_env:
         pytest.skip("THEROCK_BIN_DIR not set; skipping amdsmi tests")
 
-    # Resolve the path to an absolute canonical path to avoid cwd-dependent
-    # failures (e.g., if a prior step changes directory). Also check that the
-    # binary exists and is executable.
     amd_smi_bin_path = (Path(therock_bin_dir_env).expanduser().resolve()) / "amd-smi"
     assert amd_smi_bin_path.exists(), f"amd-smi not found at {amd_smi_bin_path}"
     assert os.access(
@@ -50,20 +36,9 @@ def _amd_smi_path() -> Path:
     return amd_smi_bin_path
 
 
-def _run_amd_smi(amd_smi_path: Path, modifiers: list[str]) -> tuple[int, str, str]:
-    """Run `amd-smi list` with the given `modifiers` and return (rc, stdout, stderr).
-
-    The function invokes the binary via subprocess.run and captures text
-    output for assertions in the tests.
-
-    Args:
-        amd_smi_path (pathlib.Path): Path to the `amd-smi` binary.
-        modifiers (list[str]): Arguments to pass after `amd-smi list`.
-
-    Returns:
-        tuple[int, str, str]: Return code, stdout text, stderr text.
-    """
-    cmd = [str(amd_smi_path), "list"] + modifiers
+def _run_amd_smi(subcommands: list[str]) -> tuple[int, str, str]:
+    amd_smi_bin = _amd_smi_path()
+    cmd = [str(amd_smi_bin)] + list(subcommands)
     logger.info("Running amd-smi: %s", cmd)
     proc = subprocess.run(cmd, capture_output=True, text=True)
     logger.info("amd-smi returncode=%s", proc.returncode)
@@ -73,7 +48,7 @@ def _run_amd_smi(amd_smi_path: Path, modifiers: list[str]) -> tuple[int, str, st
 
 
 def _parse_gpu_blocks(text_output: str) -> list[str]:
-    """Parse human-readable `amd-smi list` output into GPU text blocks.
+    """Parse human-readable `amd-smi` output into GPU text blocks.
 
     Returns a list where each element is the multiline block describing a
     single GPU. The parser looks for lines that start GPU markers like
@@ -81,7 +56,7 @@ def _parse_gpu_blocks(text_output: str) -> list[str]:
     GPU marker.
 
     Args:
-        output (str): The human-readable stdout from `amd-smi list`.
+        output (str): The human-readable stdout from `amd-smi`.
 
     Returns:
         list[str]: List of multiline GPU description blocks.
@@ -239,7 +214,6 @@ def test_amd_smi_list(mod_args, tmp_path):
         None
     """
     modifiers, expected_output_mode = mod_args
-    amd_smi_bin = _amd_smi_path()
 
     output_file_path = None
     invocation_args = list(modifiers)
@@ -249,7 +223,8 @@ def test_amd_smi_list(mod_args, tmp_path):
         invocation_args = [a for a in invocation_args if a != "--file"]
         invocation_args.extend(["--file", str(output_file_path)])
 
-    return_code, stdout_text, stderr_text = _run_amd_smi(amd_smi_bin, invocation_args)
+    # subcommands: run `amd-smi list` with the invocation args
+    return_code, stdout_text, stderr_text = _run_amd_smi(["list"] + invocation_args)
     assert (
         return_code == 0
     ), f"amd-smi failed rc={return_code} stderr={stderr_text} stdout={stdout_text}"

From 443bca2285cc50e8d2a8719cf05d77968721efa4 Mon Sep 17 00:00:00 2001
From: HRISHIKESH THULA <you@example.com>
Date: Fri, 13 Mar 2026 18:14:11 +0530
Subject: [PATCH 09/21] Add AMDSMI tests and update sanity check workflow

- Introduced new markers for AMDSMI tests in conftest.py.
- Enhanced test_sanity_check.yml to include new input for handling AMDSMI default-unblocking tests.
- Updated test execution logic in test_rocm_sanity.py to validate AMDSMI CLI output and added new test cases.
- Removed obsolete test_amdsmi_cli.py file.
---
 .github/workflows/test_sanity_check.yml       |  26 +-
 .../test_amdsmi_cli.py                        | 286 ------------------
 conftest.py                                   |   9 +
 tests/test_rocm_sanity.py                     | 215 ++++++++++++-
 4 files changed, 244 insertions(+), 292 deletions(-)
 delete mode 100644 build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
 create mode 100644 conftest.py

diff --git a/.github/workflows/test_sanity_check.yml b/.github/workflows/test_sanity_check.yml
index ee60887beb3..3d66655f22a 100644
--- a/.github/workflows/test_sanity_check.yml
+++ b/.github/workflows/test_sanity_check.yml
@@ -21,6 +21,10 @@ on:
         type: string
       platform:
         type: string
+      amdsmi_tests_default_unblocking_for_sanity_blocking:
+        type: boolean
+        description: 'If true, treat amdsmi default-unblocking tests as blockers (do not continue on error)'
+        default: false
   workflow_call:
     inputs:
       artifact_group:
@@ -38,6 +42,10 @@ on:
         type: string
       platform:
         type: string
+      amdsmi_tests_default_unblocking_for_sanity_blocking:
+        type: boolean
+        description: 'If true, treat amdsmi default-unblocking tests as blockers (do not continue on error)'
+        default: false
   push:
     branches:
       - ADHOCBUILD
@@ -138,15 +146,27 @@ jobs:
         run: |
           python ./build_tools/print_driver_gpu_info.py
 
-      - name: Run ROCm Sanity Tests
-        timeout-minutes: 5
+      - name: Run ROCm Blocking Sanity Tests
+        timeout-minutes: 10
+        env:
+          # Enable verbose logging, see
+          # https://rocm.docs.amd.com/projects/HIP/en/latest/how-to/debugging.html
+          AMD_LOG_LEVEL: 4
+          ROCM_KPACK_DEBUG: "1"
+        run: |
+          pytest tests/ -m "not amdsmi_tests_default_unblocking_for_sanity" --log-cli-level=info --timeout=300
+
+      - name: Run ROCm AMDSMI Default-Unblocking-For-Sanity Tests
+        id: amdsmi_tests_default_unblocking_for_sanity_tests
+        continue-on-error: ${{ inputs.amdsmi_tests_default_unblocking_for_sanity_blocking == false }}
+        timeout-minutes: 10
         env:
           # Enable verbose logging, see
           # https://rocm.docs.amd.com/projects/HIP/en/latest/how-to/debugging.html
           AMD_LOG_LEVEL: 4
           ROCM_KPACK_DEBUG: "1"
         run: |
-          pytest tests/ --log-cli-level=info --timeout=300
+          pytest tests/ -m "amdsmi_tests_default_unblocking_for_sanity" --log-cli-level=info --timeout=300
 
       - name: Post-job cleanup processes on Windows
         if: ${{ always() && runner.os == 'Windows' }}
diff --git a/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py b/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
deleted file mode 100644
index 86abdf28137..00000000000
--- a/build_tools/github_actions/test_executable_scripts/test_amdsmi_cli.py
+++ /dev/null
@@ -1,286 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Advanced Micro Devices, Inc.
-# SPDX-License-Identifier: MIT
-
-"""
-validation of the `amd-smi` CLI output.
-
-This test expects `THEROCK_BIN_DIR` to point to the TheRock `bin/` directory
-containing the `amd-smi` binary (CI sets this via the setup action).
-"""
-
-import os
-import re
-import json
-import csv
-import subprocess
-from pathlib import Path
-import logging
-
-import pytest
-
-
-logger = logging.getLogger(__name__)
-
-
-def _amd_smi_path() -> Path:
-    therock_bin_dir_env = os.getenv("THEROCK_BIN_DIR")
-    if not therock_bin_dir_env:
-        pytest.skip("THEROCK_BIN_DIR not set; skipping amdsmi tests")
-
-    amd_smi_bin_path = (Path(therock_bin_dir_env).expanduser().resolve()) / "amd-smi"
-    assert amd_smi_bin_path.exists(), f"amd-smi not found at {amd_smi_bin_path}"
-    assert os.access(
-        amd_smi_bin_path, os.X_OK
-    ), f"amd-smi is not executable: {amd_smi_bin_path}"
-    return amd_smi_bin_path
-
-
-def _run_amd_smi(subcommands: list[str]) -> tuple[int, str, str]:
-    amd_smi_bin = _amd_smi_path()
-    cmd = [str(amd_smi_bin)] + list(subcommands)
-    logger.info("Running amd-smi: %s", cmd)
-    proc = subprocess.run(cmd, capture_output=True, text=True)
-    logger.info("amd-smi returncode=%s", proc.returncode)
-    logger.info("amd-smi stdout:\n%s", proc.stdout)
-    logger.info("amd-smi stderr:\n%s", proc.stderr)
-    return proc.returncode, proc.stdout, proc.stderr
-
-
-def _parse_gpu_blocks(text_output: str) -> list[str]:
-    """Parse human-readable `amd-smi` output into GPU text blocks.
-
-    Returns a list where each element is the multiline block describing a
-    single GPU. The parser looks for lines that start GPU markers like
-    "GPU: <n>" or "GPU <n>:" and groups subsequent lines until the next
-    GPU marker.
-
-    Args:
-        output (str): The human-readable stdout from `amd-smi`.
-
-    Returns:
-        list[str]: List of multiline GPU description blocks.
-    """
-    gpu_blocks = []
-    current_block_lines = None
-    for line in text_output.splitlines():
-        if re.search(r"GPU:\s+(\d+)", line) or re.search(r"GPU\s+(\d+):", line):
-            if current_block_lines is not None:
-                gpu_blocks.append("\n".join(current_block_lines))
-            current_block_lines = [line]
-            continue
-        if current_block_lines is not None:
-            current_block_lines.append(line)
-    if current_block_lines is not None:
-        gpu_blocks.append("\n".join(current_block_lines))
-    return gpu_blocks
-
-
-def _validate_human_readable_gpu_block(human_readable_gpu_block_text: str) -> list[str]:
-    """Validate a single human-readable GPU block.
-
-    Returns a list of missing field names (empty if all required fields
-    appear). The function checks for BDF, UUID, KFD_ID, NODE_ID and
-    PARTITION_ID in the block_text.
-
-    Args:
-        human_readable_gpu_block_text (str): Multiline text block describing a single GPU.
-
-    Returns:
-        list[str]: Missing field names (empty if validation passes).
-    """
-    missing_fields = []
-    if not re.search(r"\s*BDF:\s*.+", human_readable_gpu_block_text):
-        missing_fields.append("BDF")
-    if not re.search(r"\s*UUID:\s*.+", human_readable_gpu_block_text):
-        missing_fields.append("UUID")
-    if not re.search(r"\s*KFD_ID:\s*\d+", human_readable_gpu_block_text):
-        missing_fields.append("KFD_ID")
-    if not re.search(r"\s*NODE_ID:\s*\d+", human_readable_gpu_block_text):
-        missing_fields.append("NODE_ID")
-    if not re.search(r"\s*PARTITION_ID:\s*\d+", human_readable_gpu_block_text):
-        missing_fields.append("PARTITION_ID")
-    return missing_fields
-
-
-def _validate_json(gpu_obj: dict) -> list[str]:
-    """Validate a JSON GPU entry from `amd-smi --json`.
-
-    Returns a list of missing or incorrectly-typed fields. Expected fields
-    include `gpu` (int), `bdf` (str), `uuid` (str), `kfd_id` (int),
-    `node_id` (int) and `partition_id` (int).
-
-    Args:
-        obj (dict): Parsed JSON object representing a GPU entry.
-
-    Returns:
-        list[str]: Missing or invalid field names.
-    """
-    missing_fields = []
-    # required keys mapping
-    if "gpu" not in gpu_obj or not isinstance(gpu_obj.get("gpu"), int):
-        missing_fields.append("gpu")
-    if "bdf" not in gpu_obj or not isinstance(gpu_obj.get("bdf"), str):
-        missing_fields.append("bdf")
-    if "uuid" not in gpu_obj or not isinstance(gpu_obj.get("uuid"), str):
-        missing_fields.append("uuid")
-    if "kfd_id" not in gpu_obj or not isinstance(gpu_obj.get("kfd_id"), int):
-        missing_fields.append("kfd_id")
-    if "node_id" not in gpu_obj or not isinstance(gpu_obj.get("node_id"), int):
-        missing_fields.append("node_id")
-    if "partition_id" not in gpu_obj or not isinstance(
-        gpu_obj.get("partition_id"), int
-    ):
-        missing_fields.append("partition_id")
-    return missing_fields
-
-
-def _validate_csv_row(csv_row: dict) -> list[str]:
-    """Validate a CSV row parsed from `amd-smi --csv` output.
-
-    Expected header names are: `gpu,gpu_bdf,gpu_uuid,kfd_id,node_id,partition_id`.
-    Returns a list of missing or invalid fields.
-
-    Args:
-        row (dict): Mapping of CSV headers to values as returned by
-            `csv.DictReader`.
-
-    Returns:
-        list[str]: Missing or invalid field names.
-    """
-    # expected header names: gpu,gpu_bdf,gpu_uuid,kfd_id,node_id,partition_id
-    missing_fields = []
-    try:
-        if "gpu" not in csv_row or int(csv_row.get("gpu", "")) < 0:
-            missing_fields.append("gpu")
-    except Exception:
-        missing_fields.append("gpu")
-    if not csv_row.get("gpu_bdf"):
-        missing_fields.append("gpu_bdf")
-    if not csv_row.get("gpu_uuid"):
-        missing_fields.append("gpu_uuid")
-    try:
-        if "kfd_id" not in csv_row or int(csv_row.get("kfd_id", "")) < 0:
-            missing_fields.append("kfd_id")
-    except Exception:
-        missing_fields.append("kfd_id")
-    try:
-        if "node_id" not in csv_row or int(csv_row.get("node_id", "")) < 0:
-            missing_fields.append("node_id")
-    except Exception:
-        missing_fields.append("node_id")
-    try:
-        if "partition_id" not in csv_row or int(csv_row.get("partition_id", "")) < 0:
-            missing_fields.append("partition_id")
-    except Exception:
-        missing_fields.append("partition_id")
-    return missing_fields
-
-
-@pytest.mark.parametrize(
-    "mod_args",
-    [
-        ([], None),  # human readable on stdout
-        (["--json"], None),
-        (["--csv"], None),
-        (["--file"], "human"),
-        (["--json", "--file"], "json"),
-        (["--csv", "--file"], "csv"),
-    ],
-    ids=[
-        "human-stdout",
-        "json-stdout",
-        "csv-stdout",
-        "human-file",
-        "json-file",
-        "csv-file",
-    ],
-)
-def test_amd_smi_list(mod_args, tmp_path):
-    """End-to-end test of `amd-smi list` covering output modes.
-
-    The test runs `amd-smi list` with multiple modifier combinations (human,
-    JSON, CSV, and file-output variants), parses the output and validates
-    required fields for each GPU entry.
-
-    Args:
-        mod_args (tuple[list[str], Optional[str]]): Parameterized tuple where
-            the first element is a list of modifier args and the second
-            element indicates the expected parsed mode when `--file` is
-            used.
-        tmp_path (pathlib.Path): pytest temporary directory fixture.
-
-    Returns:
-        None
-    """
-    modifiers, expected_output_mode = mod_args
-
-    output_file_path = None
-    invocation_args = list(modifiers)
-    if "--file" in invocation_args:
-        # supply output file
-        output_file_path = tmp_path / "amdsmi_out.txt"
-        invocation_args = [a for a in invocation_args if a != "--file"]
-        invocation_args.extend(["--file", str(output_file_path)])
-
-    # subcommands: run `amd-smi list` with the invocation args
-    return_code, stdout_text, stderr_text = _run_amd_smi(["list"] + invocation_args)
-    assert (
-        return_code == 0
-    ), f"amd-smi failed rc={return_code} stderr={stderr_text} stdout={stdout_text}"
-
-    # If file was requested, stdout should be empty
-    if output_file_path is not None:
-        assert (
-            stdout_text.strip() == ""
-        ), f"Expected no stdout when using --file, got: {stdout_text}"
-        assert output_file_path.exists(), "Expected output file to be created"
-        content_text = output_file_path.read_text(encoding="utf-8", errors="replace")
-    else:
-        content_text = stdout_text
-
-    # Validate based on mode
-    if expected_output_mode == "json" or (
-        "--json" in modifiers and expected_output_mode is None
-    ):
-        # JSON array expected
-        try:
-            json_data = json.loads(content_text)
-        except Exception as e:
-            pytest.fail(f"Failed to parse JSON output: {e}\nContent:\n{content_text}")
-        assert (
-            isinstance(json_data, list) and json_data
-        ), "Expected non-empty JSON array"
-        for index, gpu_obj in enumerate(json_data):
-            missing_fields = _validate_json(gpu_obj)
-            assert (
-                not missing_fields
-            ), f"JSON GPU entry {index} missing fields: {missing_fields}"
-
-    elif expected_output_mode == "csv" or (
-        "--csv" in modifiers and expected_output_mode is None
-    ):
-        # CSV expected
-        try:
-            csv_reader = csv.DictReader(content_text.splitlines())
-            csv_rows = list(csv_reader)
-        except Exception as e:
-            pytest.fail(f"Failed to parse CSV output: {e}\nContent:\n{content_text}")
-        assert csv_rows, "Expected at least one CSV row"
-        for index, csv_row in enumerate(csv_rows):
-            missing_fields = _validate_csv_row(csv_row)
-            assert (
-                not missing_fields
-            ), f"CSV row {index} missing fields: {missing_fields}"
-
-    else:
-        # human readable output
-        gpu_blocks = _parse_gpu_blocks(content_text)
-        assert gpu_blocks, "No GPU blocks found in amd-smi human output"
-        for index, human_readable_gpu_block in enumerate(gpu_blocks):
-            missing_fields = _validate_human_readable_gpu_block(
-                human_readable_gpu_block
-            )
-            assert (
-                not missing_fields
-            ), f"Human-readable GPU block {index} missing fields: {missing_fields}\nBlock:\n{human_readable_gpu_block}"
diff --git a/conftest.py b/conftest.py
new file mode 100644
index 00000000000..918b04e5e36
--- /dev/null
+++ b/conftest.py
@@ -0,0 +1,9 @@
+def pytest_configure(config):
+    config.addinivalue_line(
+        "markers",
+        "amdsmi_tests_default_unblocking_for_sanity: marks tests as default-unblocking for amdsmi sanity (amdsmi_tests_default_unblocking_for_sanity)"
+    )
+    config.addinivalue_line(
+        "markers",
+        "amd_smi: marks tests that exercise the amd-smi CLI"
+    )
diff --git a/tests/test_rocm_sanity.py b/tests/test_rocm_sanity.py
index 233ec8d3abd..eb788e9be14 100644
--- a/tests/test_rocm_sanity.py
+++ b/tests/test_rocm_sanity.py
@@ -1,6 +1,7 @@
 # Copyright Advanced Micro Devices, Inc.
 # SPDX-License-Identifier: MIT
-
+import json
+import csv
 from pathlib import Path
 from pytest_check import check
 import logging
@@ -15,9 +16,7 @@
 THIS_DIR = Path(__file__).resolve().parent
 
 logger = logging.getLogger(__name__)
-
 THEROCK_BIN_DIR = Path(os.getenv("THEROCK_BIN_DIR")).resolve()
-
 AMDGPU_FAMILIES = os.getenv("AMDGPU_FAMILIES")
 
 # Importing is_asan from github_actions_utils.py
@@ -25,6 +24,108 @@
 from github_actions_utils import is_asan
 
 
+def _amd_smi_path() -> Path:
+    therock_bin_dir_env = os.getenv("THEROCK_BIN_DIR")
+    if not therock_bin_dir_env:
+        pytest.fail("THEROCK_BIN_DIR not set; failing amdsmi tests")
+
+    amd_smi_bin_path = (Path(therock_bin_dir_env).expanduser().resolve()) / "amd-smi"
+    if not amd_smi_bin_path.exists():
+        pytest.fail(f"amd-smi not found at {amd_smi_bin_path}")
+    if not os.access(amd_smi_bin_path, os.X_OK):
+        pytest.fail(f"amd-smi is not executable: {amd_smi_bin_path}")
+    return amd_smi_bin_path
+
+
+def _run_amd_smi(subcommands: list[str]) -> tuple[int, str, str]:
+    amd_smi_bin = _amd_smi_path()
+    cmd = [str(amd_smi_bin)] + list(subcommands)
+    logger.info("Running amd-smi: %s", cmd)
+    proc = subprocess.run(cmd, capture_output=True, text=True)
+    logger.info("amd-smi returncode=%s", proc.returncode)
+    logger.info("amd-smi stdout:\n%s", proc.stdout)
+    logger.info("amd-smi stderr:\n%s", proc.stderr)
+    return proc.returncode, proc.stdout, proc.stderr
+
+
+def _parse_gpu_blocks(text_output: str) -> list[str]:
+    gpu_blocks = []
+    current_block_lines = None
+    for line in text_output.splitlines():
+        if re.search(r"GPU:\s+(\d+)", line) or re.search(r"GPU\s+(\d+):", line):
+            if current_block_lines is not None:
+                gpu_blocks.append("\n".join(current_block_lines))
+            current_block_lines = [line]
+            continue
+        if current_block_lines is not None:
+            current_block_lines.append(line)
+    if current_block_lines is not None:
+        gpu_blocks.append("\n".join(current_block_lines))
+    return gpu_blocks
+
+
+def _validate_human_readable_gpu_block(human_readable_gpu_block_text: str) -> list[str]:
+    missing_fields = []
+    if not re.search(r"\s*BDF:\s*.+", human_readable_gpu_block_text):
+        missing_fields.append("BDF")
+    if not re.search(r"\s*UUID:\s*.+", human_readable_gpu_block_text):
+        missing_fields.append("UUID")
+    if not re.search(r"\s*KFD_ID:\s*\d+", human_readable_gpu_block_text):
+        missing_fields.append("KFD_ID")
+    if not re.search(r"\s*NODE_ID:\s*\d+", human_readable_gpu_block_text):
+        missing_fields.append("NODE_ID")
+    if not re.search(r"\s*PARTITION_ID:\s*\d+", human_readable_gpu_block_text):
+        missing_fields.append("PARTITION_ID")
+    return missing_fields
+
+
+def _validate_json(gpu_obj: dict) -> list[str]:
+    missing_fields = []
+    if "gpu" not in gpu_obj or not isinstance(gpu_obj.get("gpu"), int):
+        missing_fields.append("gpu")
+    if "bdf" not in gpu_obj or not isinstance(gpu_obj.get("bdf"), str):
+        missing_fields.append("bdf")
+    if "uuid" not in gpu_obj or not isinstance(gpu_obj.get("uuid"), str):
+        missing_fields.append("uuid")
+    if "kfd_id" not in gpu_obj or not isinstance(gpu_obj.get("kfd_id"), int):
+        missing_fields.append("kfd_id")
+    if "node_id" not in gpu_obj or not isinstance(gpu_obj.get("node_id"), int):
+        missing_fields.append("node_id")
+    if "partition_id" not in gpu_obj or not isinstance(
+        gpu_obj.get("partition_id"), int
+    ):
+        missing_fields.append("partition_id")
+    return missing_fields
+
+
+def _validate_csv_row(csv_row: dict) -> list[str]:
+    missing_fields = []
+    try:
+        if "gpu" not in csv_row or int(csv_row.get("gpu", "")) < 0:
+            missing_fields.append("gpu")
+    except Exception:
+        missing_fields.append("gpu")
+    if not csv_row.get("gpu_bdf"):
+        missing_fields.append("gpu_bdf")
+    if not csv_row.get("gpu_uuid"):
+        missing_fields.append("gpu_uuid")
+    try:
+        if "kfd_id" not in csv_row or int(csv_row.get("kfd_id", "")) < 0:
+            missing_fields.append("kfd_id")
+    except Exception:
+        missing_fields.append("kfd_id")
+    try:
+        if "node_id" not in csv_row or int(csv_row.get("node_id", "")) < 0:
+            missing_fields.append("node_id")
+    except Exception:
+        missing_fields.append("node_id")
+    try:
+        if "partition_id" not in csv_row or int(csv_row.get("partition_id", "")) < 0:
+            missing_fields.append("partition_id")
+    except Exception:
+        missing_fields.append("partition_id")
+    return missing_fields
+
 def is_windows():
     return "windows" == platform.system().lower()
 
@@ -229,3 +330,111 @@ def test_amdsmi_suite(self):
                 print(f"[amdsmitst-summary] {line}")
 
         check.equal(process.returncode, 0)
+
+    @pytest.mark.skipif(is_windows(), reason="amd-smi CLI not supported on Windows")
+    @pytest.mark.skipif(
+        AMDGPU_FAMILIES == "gfx1151", reason="Linux gfx1151 does not support amdsmi yet"
+    )
+    @pytest.mark.amd_smi
+    def test_amd_smi_blocks(self):
+        """Blocking check: `amd-smi list` prints GPU blocks and they are non-empty.
+
+        This is a lightweight blocking gate: it only asserts that GPU blocks
+        exist and contain some text, without validating specific fields.
+        """
+        return_code, stdout_text, stderr_text = _run_amd_smi(["list"])
+        assert (
+            return_code == 0
+        ), f"amd-smi failed rc={return_code} stderr={stderr_text} stdout={stdout_text}"
+
+        gpu_blocks = _parse_gpu_blocks(stdout_text)
+        assert gpu_blocks, "No GPU blocks found in amd-smi output"
+        for index, block in enumerate(gpu_blocks):
+            assert block.strip(), f"GPU block {index} is empty"
+
+    @pytest.mark.amd_smi
+    @pytest.mark.amdsmi_tests_default_unblocking_for_sanity
+    @pytest.mark.parametrize(
+        "mod_args",
+        [
+            ([], None),  # human readable on stdout
+            (["--json"], None),
+            (["--csv"], None),
+            (["--file"], "human"),
+            (["--json", "--file"], "json"),
+            (["--csv", "--file"], "csv"),
+        ],
+        ids=[
+            "human-stdout",
+            "json-stdout",
+            "csv-stdout",
+            "human-file",
+            "json-file",
+            "csv-file",
+        ],
+    )
+    def test_amd_smi_list(self, mod_args, tmp_path):
+        modifiers, expected_output_mode = mod_args
+
+        output_file_path = None
+        invocation_args = list(modifiers)
+        if "--file" in invocation_args:
+            output_file_path = tmp_path / "amdsmi_out.txt"
+            invocation_args = [a for a in invocation_args if a != "--file"]
+            invocation_args.extend(["--file", str(output_file_path)])
+
+        return_code, stdout_text, stderr_text = _run_amd_smi(["list"] + invocation_args)
+        assert (
+            return_code == 0
+        ), f"amd-smi failed rc={return_code} stderr={stderr_text} stdout={stdout_text}"
+
+        if output_file_path is not None:
+            assert (
+                stdout_text.strip() == ""
+            ), f"Expected no stdout when using --file, got: {stdout_text}"
+            assert output_file_path.exists(), "Expected output file to be created"
+            content_text = output_file_path.read_text(encoding="utf-8", errors="replace")
+        else:
+            content_text = stdout_text
+
+        if expected_output_mode == "json" or (
+            "--json" in modifiers and expected_output_mode is None
+        ):
+            try:
+                json_data = json.loads(content_text)
+            except Exception as e:
+                pytest.fail(f"Failed to parse JSON output: {e}\nContent:\n{content_text}")
+            assert (
+                isinstance(json_data, list) and json_data
+            ), "Expected non-empty JSON array"
+            for index, gpu_obj in enumerate(json_data):
+                missing_fields = _validate_json(gpu_obj)
+                assert (
+                    not missing_fields
+                ), f"JSON GPU entry {index} missing fields: {missing_fields}"
+
+        elif expected_output_mode == "csv" or (
+            "--csv" in modifiers and expected_output_mode is None
+        ):
+            try:
+                csv_reader = csv.DictReader(content_text.splitlines())
+                csv_rows = list(csv_reader)
+            except Exception as e:
+                pytest.fail(f"Failed to parse CSV output: {e}\nContent:\n{content_text}")
+            assert csv_rows, "Expected at least one CSV row"
+            for index, csv_row in enumerate(csv_rows):
+                missing_fields = _validate_csv_row(csv_row)
+                assert (
+                    not missing_fields
+                ), f"CSV row {index} missing fields: {missing_fields}"
+
+        else:
+            gpu_blocks = _parse_gpu_blocks(content_text)
+            assert gpu_blocks, "No GPU blocks found in amd-smi human output"
+            for index, human_readable_gpu_block in enumerate(gpu_blocks):
+                missing_fields = _validate_human_readable_gpu_block(
+                    human_readable_gpu_block
+                )
+                assert (
+                    not missing_fields
+                ), f"Human-readable GPU block {index} missing fields: {missing_fields}\nBlock:\n{human_readable_gpu_block}"

From 4775e8397fd9b5a736415ce3c5829396aef47376 Mon Sep 17 00:00:00 2001
From: HRISHIKESH THULA <you@example.com>
Date: Fri, 13 Mar 2026 18:19:32 +0530
Subject: [PATCH 10/21] Refactor error handling in JSON and CSV parsing to
 improve readability

---
 conftest.py               | 6 ++----
 tests/test_rocm_sanity.py | 8 ++++++--
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/conftest.py b/conftest.py
index 918b04e5e36..ec241064ca2 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,9 +1,7 @@
 def pytest_configure(config):
     config.addinivalue_line(
-        "markers",
-        "amdsmi_tests_default_unblocking_for_sanity: marks tests as default-unblocking for amdsmi sanity (amdsmi_tests_default_unblocking_for_sanity)"
+        "markers", "amdsmi_tests_default_unblocking_for_sanity: marks tests as default-unblocking for amdsmi sanity (amdsmi_tests_default_unblocking_for_sanity)"
     )
     config.addinivalue_line(
-        "markers",
-        "amd_smi: marks tests that exercise the amd-smi CLI"
+        "markers", "amd_smi: marks tests that exercise the amd-smi CLI"
     )
diff --git a/tests/test_rocm_sanity.py b/tests/test_rocm_sanity.py
index eb788e9be14..92300900e46 100644
--- a/tests/test_rocm_sanity.py
+++ b/tests/test_rocm_sanity.py
@@ -403,7 +403,9 @@ def test_amd_smi_list(self, mod_args, tmp_path):
             try:
                 json_data = json.loads(content_text)
             except Exception as e:
-                pytest.fail(f"Failed to parse JSON output: {e}\nContent:\n{content_text}")
+                pytest.fail(
+                    f"Failed to parse JSON output: {e}\nContent:\n{content_text}"
+                )
             assert (
                 isinstance(json_data, list) and json_data
             ), "Expected non-empty JSON array"
@@ -420,7 +422,9 @@ def test_amd_smi_list(self, mod_args, tmp_path):
                 csv_reader = csv.DictReader(content_text.splitlines())
                 csv_rows = list(csv_reader)
             except Exception as e:
-                pytest.fail(f"Failed to parse CSV output: {e}\nContent:\n{content_text}")
+                pytest.fail(
+                    f"Failed to parse CSV output: {e}\nContent:\n{content_text}"
+                )
             assert csv_rows, "Expected at least one CSV row"
             for index, csv_row in enumerate(csv_rows):
                 missing_fields = _validate_csv_row(csv_row)

From 58c355ea1674dd8c5751396c8e702da3654c0e9d Mon Sep 17 00:00:00 2001
From: HRISHIKESH THULA <you@example.com>
Date: Fri, 13 Mar 2026 18:23:09 +0530
Subject: [PATCH 11/21] Refactor code for improved readability in conftest.py
 and test_rocm_sanity.py

---
 conftest.py               | 3 ++-
 tests/test_rocm_sanity.py | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/conftest.py b/conftest.py
index ec241064ca2..810541c123c 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,6 +1,7 @@
 def pytest_configure(config):
     config.addinivalue_line(
-        "markers", "amdsmi_tests_default_unblocking_for_sanity: marks tests as default-unblocking for amdsmi sanity (amdsmi_tests_default_unblocking_for_sanity)"
+        "markers", 
+        "amdsmi_tests_default_unblocking_for_sanity: marks tests as default-unblocking for amdsmi sanity (amdsmi_tests_default_unblocking_for_sanity)"
     )
     config.addinivalue_line(
         "markers", "amd_smi: marks tests that exercise the amd-smi CLI"
diff --git a/tests/test_rocm_sanity.py b/tests/test_rocm_sanity.py
index 92300900e46..63f4a87b99a 100644
--- a/tests/test_rocm_sanity.py
+++ b/tests/test_rocm_sanity.py
@@ -393,7 +393,9 @@ def test_amd_smi_list(self, mod_args, tmp_path):
                 stdout_text.strip() == ""
             ), f"Expected no stdout when using --file, got: {stdout_text}"
             assert output_file_path.exists(), "Expected output file to be created"
-            content_text = output_file_path.read_text(encoding="utf-8", errors="replace")
+            content_text = output_file_path.read_text(
+                encoding="utf-8", errors="replace"
+            )
         else:
             content_text = stdout_text
 

From e45209f1e8ababc975dad5f0d4ccdb048f325987 Mon Sep 17 00:00:00 2001
From: HRISHIKESH THULA <you@example.com>
Date: Fri, 13 Mar 2026 18:26:18 +0530
Subject: [PATCH 12/21] Add missing newline in test_rocm_sanity.py and fix
 marker formatting in conftest.py

---
 conftest.py               | 2 +-
 tests/test_rocm_sanity.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/conftest.py b/conftest.py
index 810541c123c..c6cdf7a9109 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,7 +1,7 @@
 def pytest_configure(config):
     config.addinivalue_line(
         "markers", 
-        "amdsmi_tests_default_unblocking_for_sanity: marks tests as default-unblocking for amdsmi sanity (amdsmi_tests_default_unblocking_for_sanity)"
+        "amdsmi_tests_default_unblocking_for_sanity: marks tests as default-unblocking for amdsmi sanity (amdsmi_tests_default_unblocking_for_sanity)",
     )
     config.addinivalue_line(
         "markers", "amd_smi: marks tests that exercise the amd-smi CLI"
diff --git a/tests/test_rocm_sanity.py b/tests/test_rocm_sanity.py
index 63f4a87b99a..36a4f946fc5 100644
--- a/tests/test_rocm_sanity.py
+++ b/tests/test_rocm_sanity.py
@@ -126,6 +126,7 @@ def _validate_csv_row(csv_row: dict) -> list[str]:
         missing_fields.append("partition_id")
     return missing_fields
 
+
 def is_windows():
     return "windows" == platform.system().lower()
 

From 8ccca35b942ada777dae80f52db8c3241fa9abc7 Mon Sep 17 00:00:00 2001
From: HRISHIKESH THULA <you@example.com>
Date: Fri, 13 Mar 2026 18:28:44 +0530
Subject: [PATCH 13/21] Fix formatting of markers in conftest.py

---
 conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conftest.py b/conftest.py
index c6cdf7a9109..60d88e647a7 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,6 +1,6 @@
 def pytest_configure(config):
     config.addinivalue_line(
-        "markers", 
+        "markers",
         "amdsmi_tests_default_unblocking_for_sanity: marks tests as default-unblocking for amdsmi sanity (amdsmi_tests_default_unblocking_for_sanity)",
     )
     config.addinivalue_line(

From 31f476b47aa2d376a5c058ecc2a84ceedf346832 Mon Sep 17 00:00:00 2001
From: HRISHIKESH THULA <you@example.com>
Date: Fri, 13 Mar 2026 18:31:43 +0530
Subject: [PATCH 14/21] Remove timeout parameter from pytest commands in
 test_sanity_check.yml

---
 .github/workflows/test_sanity_check.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test_sanity_check.yml b/.github/workflows/test_sanity_check.yml
index 3d66655f22a..57ad349dcd0 100644
--- a/.github/workflows/test_sanity_check.yml
+++ b/.github/workflows/test_sanity_check.yml
@@ -154,7 +154,7 @@ jobs:
           AMD_LOG_LEVEL: 4
           ROCM_KPACK_DEBUG: "1"
         run: |
-          pytest tests/ -m "not amdsmi_tests_default_unblocking_for_sanity" --log-cli-level=info --timeout=300
+          pytest tests/ -m "not amdsmi_tests_default_unblocking_for_sanity" --log-cli-level=info
 
       - name: Run ROCm AMDSMI Default-Unblocking-For-Sanity Tests
         id: amdsmi_tests_default_unblocking_for_sanity_tests
@@ -166,7 +166,7 @@ jobs:
           AMD_LOG_LEVEL: 4
           ROCM_KPACK_DEBUG: "1"
         run: |
-          pytest tests/ -m "amdsmi_tests_default_unblocking_for_sanity" --log-cli-level=info --timeout=300
+          pytest tests/ -m "amdsmi_tests_default_unblocking_for_sanity" --log-cli-level=info
 
       - name: Post-job cleanup processes on Windows
         if: ${{ always() && runner.os == 'Windows' }}

From f39a95e88027bdebe2f7b27f6f1b0fb1da64cd30 Mon Sep 17 00:00:00 2001
From: HRISHIKESH THULA <you@example.com>
Date: Fri, 13 Mar 2026 18:36:43 +0530
Subject: [PATCH 15/21] Add timeout parameter to pytest commands in
 test_sanity_check.yml

---
 .github/workflows/test_sanity_check.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test_sanity_check.yml b/.github/workflows/test_sanity_check.yml
index 57ad349dcd0..708a8e23d29 100644
--- a/.github/workflows/test_sanity_check.yml
+++ b/.github/workflows/test_sanity_check.yml
@@ -154,7 +154,7 @@ jobs:
           AMD_LOG_LEVEL: 4
           ROCM_KPACK_DEBUG: "1"
         run: |
-          pytest tests/ -m "not amdsmi_tests_default_unblocking_for_sanity" --log-cli-level=info
+          pytest tests/ -m "not amdsmi_tests_default_unblocking_for_sanity" --log-cli-level=info --timeout=600
 
       - name: Run ROCm AMDSMI Default-Unblocking-For-Sanity Tests
         id: amdsmi_tests_default_unblocking_for_sanity_tests
@@ -166,7 +166,7 @@ jobs:
           AMD_LOG_LEVEL: 4
           ROCM_KPACK_DEBUG: "1"
         run: |
-          pytest tests/ -m "amdsmi_tests_default_unblocking_for_sanity" --log-cli-level=info
+          pytest tests/ -m "amdsmi_tests_default_unblocking_for_sanity" --log-cli-level=info --timeout=600
 
       - name: Post-job cleanup processes on Windows
         if: ${{ always() && runner.os == 'Windows' }}

From f2eec65f1353a47fb9603ef2c59aeeaee1e63751 Mon Sep 17 00:00:00 2001
From: HRISHIKESH THULA <you@example.com>
Date: Sat, 14 Mar 2026 00:55:34 +0530
Subject: [PATCH 16/21] Refactor amd-smi CLI tests

---
 .github/workflows/test_sanity_check.yml       | 175 --------------
 .../fetch_test_configurations.py              |   2 +-
 .../test_executable_scripts/test_sanity.py    |  11 +-
 conftest.py                                   |   9 +-
 tests/test_amdsmi_cli.py                      | 218 +++++++++++++++++
 tests/test_rocm_sanity.py                     | 219 ------------------
 6 files changed, 234 insertions(+), 400 deletions(-)
 delete mode 100644 .github/workflows/test_sanity_check.yml
 create mode 100644 tests/test_amdsmi_cli.py

diff --git a/.github/workflows/test_sanity_check.yml b/.github/workflows/test_sanity_check.yml
deleted file mode 100644
index 708a8e23d29..00000000000
--- a/.github/workflows/test_sanity_check.yml
+++ /dev/null
@@ -1,175 +0,0 @@
-# Copyright Advanced Micro Devices, Inc.
-# SPDX-License-Identifier: MIT
-
-name: TheRock Sanity Check
-
-on:
-  workflow_dispatch:
-    inputs:
-      artifact_group:
-        type: string
-      artifact_run_id:
-        type: string
-        default: ""
-      amdgpu_families:
-        type: string
-        default: ""
-      amdgpu_targets:
-        type: string
-        default: ""
-      test_runs_on:
-        type: string
-      platform:
-        type: string
-      amdsmi_tests_default_unblocking_for_sanity_blocking:
-        type: boolean
-        description: 'If true, treat amdsmi default-unblocking tests as blockers (do not continue on error)'
-        default: false
-  workflow_call:
-    inputs:
-      artifact_group:
-        type: string
-      artifact_run_id:
-        type: string
-        default: ""
-      amdgpu_families:
-        type: string
-        default: ""
-      amdgpu_targets:
-        type: string
-        default: ""
-      test_runs_on:
-        type: string
-      platform:
-        type: string
-      amdsmi_tests_default_unblocking_for_sanity_blocking:
-        type: boolean
-        description: 'If true, treat amdsmi default-unblocking tests as blockers (do not continue on error)'
-        default: false
-  push:
-    branches:
-      - ADHOCBUILD
-
-permissions:
-  contents: read
-
-jobs:
-  test_sanity_check:
-    name: "Sanity ROCM Test (${{ inputs.amdgpu_families }})"
-    runs-on: ${{ inputs.test_runs_on }}
-    # Running docker with cap-add and -v /lib/modiles, by recommendation of Github: https://rocm.docs.amd.com/projects/amdsmi/en/amd-staging/how-to/setup-docker-container.html
-    container:
-      image: ${{ inputs.platform == 'linux' && 'ghcr.io/rocm/no_rocm_image_ubuntu24_04@sha256:4150afe4759d14822f0e3f8930e1124f26e11f68b5c7b91ec9a02b20b1ebbb98' || null }}
-      # --ulimit memlock=-1:-1 - Prevents memory allocation issues with ROCm inside container
-      # --security-opt seccomp=unconfined - enables memory mapping, and is recommended for containers running in HPC environments
-      # --env-file /etc/podinfo/gha-gpu-isolation-settings - Required for GPU isolation on OSSCI MIXXX runners
-      # --user 0:0 - Running as root, by recommendation of GitHub: https://docs.github.com/en/actions/reference/workflows-and-actions/dockerfile-support#user
-      options: --ipc host
-        --group-add video
-        --device /dev/kfd
-        --device /dev/dri
-        --group-add 992
-        --group-add 110
-        --cap-add SYS_MODULE
-        -v /lib/modules:/lib/modules
-        --ulimit memlock=-1:-1
-        --security-opt seccomp=unconfined
-        --env-file /etc/podinfo/gha-gpu-isolation-settings
-        --user 0:0
-    defaults:
-      run:
-        shell: bash
-    env:
-      VENV_DIR: ${{ github.workspace }}/.venv
-      ARTIFACT_RUN_ID: "${{ inputs.artifact_run_id != '' && inputs.artifact_run_id || github.run_id }}"
-      OUTPUT_ARTIFACTS_DIR: ${{ github.workspace }}/build
-      THEROCK_BIN_DIR: ${{ github.workspace }}/build/bin
-      AMDGPU_FAMILIES: ${{ inputs.amdgpu_families }}
-      AMDGPU_TARGETS: ${{ inputs.amdgpu_targets }}
-      ARTIFACT_GROUP: ${{ inputs.artifact_group }}
-    steps:
-      - name: "Fetch 'build_tools' from repository"
-        if: ${{ runner.os == 'Windows' }}
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-        with:
-          sparse-checkout: build_tools
-          path: prejob
-
-      - name: Pre-job cleanup processes on Windows
-        if: ${{ runner.os == 'Windows' }}
-        timeout-minutes: 5
-        shell: powershell
-        run: . '${{ github.workspace }}\prejob\build_tools\github_actions\cleanup_processes.ps1'
-
-      - name: Checkout Repository
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-        with:
-          repository: "ROCm/TheRock"
-
-      - name: Pre-job cleanup Docker containers on Linux
-        if: ${{ runner.os == 'Linux' }}
-        timeout-minutes: 5
-        shell: bash
-        run: |
-          # Remove any stopped containers
-          docker container prune -f || true
-          # Remove dangling networks
-          docker network prune -f || true
-
-      - name: Run setup test environment workflow
-        timeout-minutes: 15
-        uses: './.github/actions/setup_test_environment'
-        with:
-          ARTIFACT_GROUP: ${{ inputs.artifact_group }}
-          AMDGPU_TARGETS: ${{ inputs.amdgpu_targets }}
-          ARTIFACT_RUN_ID: ${{ env.ARTIFACT_RUN_ID }}
-          OUTPUT_ARTIFACTS_DIR: ${{ env.OUTPUT_ARTIFACTS_DIR }}
-          VENV_DIR: ${{ env.VENV_DIR }}
-          FETCH_ARTIFACT_ARGS: "--base-only"
-          IS_PR_FROM_FORK: ${{ github.event.pull_request.head.repo.fork }}
-
-      # The sanity checks run tools like 'offload-arch' which may search for
-      # DLLs on multiple search paths (PATH, CWD, system32, etc.).
-      # For typical "installs" of ROCm, the rocm/bin/ dir can be expected to be
-      # added to PATH, so we do that here. If we don't do this, DLLs on test
-      # runners in system32 may be picked up instead and the tests may not be
-      # representative, see https://github.com/ROCm/TheRock/issues/2019 and
-      # https://github.com/ROCm/TheRock/pull/3230#issuecomment-3844854922.
-      - name: Set PATH and HIP_CLANG_PATH for windows
-        if: ${{ runner.os == 'Windows' }}
-        run: |
-          echo "HIP_CLANG_PATH=${OUTPUT_ARTIFACTS_DIR}\lib\llvm\bin" >> $GITHUB_ENV
-          echo "${OUTPUT_ARTIFACTS_DIR}\bin" >> $GITHUB_PATH
-
-      - name: Driver / GPU sanity check
-        timeout-minutes: 3
-        run: |
-          python ./build_tools/print_driver_gpu_info.py
-
-      - name: Run ROCm Blocking Sanity Tests
-        timeout-minutes: 10
-        env:
-          # Enable verbose logging, see
-          # https://rocm.docs.amd.com/projects/HIP/en/latest/how-to/debugging.html
-          AMD_LOG_LEVEL: 4
-          ROCM_KPACK_DEBUG: "1"
-        run: |
-          pytest tests/ -m "not amdsmi_tests_default_unblocking_for_sanity" --log-cli-level=info --timeout=600
-
-      - name: Run ROCm AMDSMI Default-Unblocking-For-Sanity Tests
-        id: amdsmi_tests_default_unblocking_for_sanity_tests
-        continue-on-error: ${{ inputs.amdsmi_tests_default_unblocking_for_sanity_blocking == false }}
-        timeout-minutes: 10
-        env:
-          # Enable verbose logging, see
-          # https://rocm.docs.amd.com/projects/HIP/en/latest/how-to/debugging.html
-          AMD_LOG_LEVEL: 4
-          ROCM_KPACK_DEBUG: "1"
-        run: |
-          pytest tests/ -m "amdsmi_tests_default_unblocking_for_sanity" --log-cli-level=info --timeout=600
-
-      - name: Post-job cleanup processes on Windows
-        if: ${{ always() && runner.os == 'Windows' }}
-        timeout-minutes: 5
-        shell: powershell
-        run: . '${{ github.workspace }}\build_tools\github_actions\cleanup_processes.ps1'
diff --git a/build_tools/github_actions/fetch_test_configurations.py b/build_tools/github_actions/fetch_test_configurations.py
index bcb6d65be84..dd002c05f02 100644
--- a/build_tools/github_actions/fetch_test_configurations.py
+++ b/build_tools/github_actions/fetch_test_configurations.py
@@ -183,7 +183,7 @@ def _get_script_path(script_name: str) -> str:
         "job_name": "amdsmi_cli",
         "fetch_artifact_args": "--base-only",
         "timeout_minutes": 15,
-        "test_script": f"pytest {_get_script_path('test_amdsmi_cli.py')} -o log_cli=true --log-cli-level=INFO",
+        "test_script": "pytest tests/test_amdsmi_cli.py -m not_sanity -o log_cli=true --log-cli-level=INFO",
         "platform": ["linux"],
         "total_shards_dict": {
             "linux": 1,
diff --git a/build_tools/github_actions/test_executable_scripts/test_sanity.py b/build_tools/github_actions/test_executable_scripts/test_sanity.py
index e54a0e20607..eb154db1785 100644
--- a/build_tools/github_actions/test_executable_scripts/test_sanity.py
+++ b/build_tools/github_actions/test_executable_scripts/test_sanity.py
@@ -12,6 +12,11 @@
 SCRIPT_DIR = Path(__file__).resolve().parent
 THEROCK_DIR = SCRIPT_DIR.parent.parent.parent
 
+
+def _run_pytest(cmd: list[str], *, cwd: Path, env: dict[str, str], check: bool) -> subprocess.CompletedProcess[str]:
+    logging.info("++ Exec [%s]$ %s", cwd, " ".join(cmd))
+    return subprocess.run(cmd, cwd=cwd, env=env, check=check, text=True)
+
 env = os.environ.copy()
 # Enable verbose ROCm logging, see
 # https://rocm.docs.amd.com/projects/HIP/en/latest/how-to/debugging.html
@@ -39,6 +44,6 @@
     "--timeout=300",
 ]
 
-logging.info(f"++ Exec [{THEROCK_DIR}]$ {' '.join(cmd)}")
-
-subprocess.run(cmd, cwd=THEROCK_DIR, env=env, check=True)
+# Default sanity behavior: run everything except tests marked as not_sanity.
+phase_cmd = cmd + ["-m", "not not_sanity"]
+_run_pytest(phase_cmd, cwd=THEROCK_DIR, env=env, check=True)
diff --git a/conftest.py b/conftest.py
index 60d88e647a7..a2aeb752037 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,8 +1,13 @@
 def pytest_configure(config):
     config.addinivalue_line(
         "markers",
-        "amdsmi_tests_default_unblocking_for_sanity: marks tests as default-unblocking for amdsmi sanity (amdsmi_tests_default_unblocking_for_sanity)",
+        "not_sanity: marks tests that must not run in sanity gating",
     )
     config.addinivalue_line(
-        "markers", "amd_smi: marks tests that exercise the amd-smi CLI"
+        "markers",
+        "amd_smi: marks tests that exercise amd-smi",
+    )
+    config.addinivalue_line(
+        "markers",
+        "amd_smi_cli: marks amd-smi CLI tests",
     )
diff --git a/tests/test_amdsmi_cli.py b/tests/test_amdsmi_cli.py
new file mode 100644
index 00000000000..61a78a55130
--- /dev/null
+++ b/tests/test_amdsmi_cli.py
@@ -0,0 +1,218 @@
+# Copyright Advanced Micro Devices, Inc.
+# SPDX-License-Identifier: MIT
+
+"""amd-smi CLI tests."""
+
+import csv
+import json
+import logging
+import os
+import platform
+import re
+import subprocess
+from pathlib import Path
+
+import pytest
+
+logger = logging.getLogger(__name__)
+
+
+def is_windows() -> bool:
+    return platform.system().lower() == "windows"
+
+
+def _amd_smi_path() -> Path:
+    therock_bin_dir_env = os.getenv("THEROCK_BIN_DIR")
+    if not therock_bin_dir_env:
+        pytest.fail("THEROCK_BIN_DIR not set; failing amd-smi CLI tests")
+
+    amd_smi_bin_path = (Path(therock_bin_dir_env).expanduser().resolve()) / "amd-smi"
+    if not amd_smi_bin_path.exists():
+        pytest.fail(f"amd-smi not found at {amd_smi_bin_path}")
+    if not os.access(amd_smi_bin_path, os.X_OK):
+        pytest.fail(f"amd-smi is not executable: {amd_smi_bin_path}")
+    return amd_smi_bin_path
+
+
+def _run_amd_smi(subcommands: list[str]) -> tuple[int, str, str]:
+    amd_smi_bin = _amd_smi_path()
+    cmd = [str(amd_smi_bin)] + list(subcommands)
+    logger.info("Running amd-smi: %s", cmd)
+    proc = subprocess.run(cmd, capture_output=True, text=True)
+    return proc.returncode, proc.stdout, proc.stderr
+
+
+def _parse_gpu_blocks(text_output: str) -> list[str]:
+    gpu_blocks: list[str] = []
+    current_block_lines: list[str] | None = None
+    for line in text_output.splitlines():
+        if re.search(r"GPU:\s+(\d+)", line) or re.search(r"GPU\s+(\d+):", line):
+            if current_block_lines is not None:
+                gpu_blocks.append("\n".join(current_block_lines))
+            current_block_lines = [line]
+            continue
+        if current_block_lines is not None:
+            current_block_lines.append(line)
+    if current_block_lines is not None:
+        gpu_blocks.append("\n".join(current_block_lines))
+    return gpu_blocks
+
+
+def _validate_human_readable_gpu_block(human_readable_gpu_block_text: str) -> list[str]:
+    missing_fields: list[str] = []
+    if not re.search(r"\s*BDF:\s*.+", human_readable_gpu_block_text):
+        missing_fields.append("BDF")
+    if not re.search(r"\s*UUID:\s*.+", human_readable_gpu_block_text):
+        missing_fields.append("UUID")
+    if not re.search(r"\s*KFD_ID:\s*\d+", human_readable_gpu_block_text):
+        missing_fields.append("KFD_ID")
+    if not re.search(r"\s*NODE_ID:\s*\d+", human_readable_gpu_block_text):
+        missing_fields.append("NODE_ID")
+    if not re.search(r"\s*PARTITION_ID:\s*\d+", human_readable_gpu_block_text):
+        missing_fields.append("PARTITION_ID")
+    return missing_fields
+
+
+def _validate_json(gpu_obj: dict) -> list[str]:
+    missing_fields: list[str] = []
+    if "gpu" not in gpu_obj or not isinstance(gpu_obj.get("gpu"), int):
+        missing_fields.append("gpu")
+    if "bdf" not in gpu_obj or not isinstance(gpu_obj.get("bdf"), str):
+        missing_fields.append("bdf")
+    if "uuid" not in gpu_obj or not isinstance(gpu_obj.get("uuid"), str):
+        missing_fields.append("uuid")
+    if "kfd_id" not in gpu_obj or not isinstance(gpu_obj.get("kfd_id"), int):
+        missing_fields.append("kfd_id")
+    if "node_id" not in gpu_obj or not isinstance(gpu_obj.get("node_id"), int):
+        missing_fields.append("node_id")
+    if "partition_id" not in gpu_obj or not isinstance(gpu_obj.get("partition_id"), int):
+        missing_fields.append("partition_id")
+    return missing_fields
+
+
+def _validate_csv_row(csv_row: dict) -> list[str]:
+    missing_fields: list[str] = []
+    try:
+        if "gpu" not in csv_row or int(csv_row.get("gpu", "")) < 0:
+            missing_fields.append("gpu")
+    except Exception:
+        missing_fields.append("gpu")
+    if not csv_row.get("gpu_bdf"):
+        missing_fields.append("gpu_bdf")
+    if not csv_row.get("gpu_uuid"):
+        missing_fields.append("gpu_uuid")
+    try:
+        if "kfd_id" not in csv_row or int(csv_row.get("kfd_id", "")) < 0:
+            missing_fields.append("kfd_id")
+    except Exception:
+        missing_fields.append("kfd_id")
+    try:
+        if "node_id" not in csv_row or int(csv_row.get("node_id", "")) < 0:
+            missing_fields.append("node_id")
+    except Exception:
+        missing_fields.append("node_id")
+    try:
+        if "partition_id" not in csv_row or int(csv_row.get("partition_id", "")) < 0:
+            missing_fields.append("partition_id")
+    except Exception:
+        missing_fields.append("partition_id")
+    return missing_fields
+
+
+AMDGPU_FAMILIES = os.getenv("AMDGPU_FAMILIES")
+
+
+# Module-wide: these are amd-smi CLI tests.
+pytestmark = [pytest.mark.amd_smi, pytest.mark.amd_smi_cli]
+
+
+@pytest.mark.skipif(is_windows(), reason="amd-smi CLI not supported on Windows")
+@pytest.mark.skipif(
+    AMDGPU_FAMILIES == "gfx1151", reason="Linux gfx1151 does not support amdsmi yet"
+)
+def test_amd_smi_blocks() -> None:
+    """Sanity-gating check: amd-smi list succeeds and reports at least one GPU."""
+    return_code, stdout_text, stderr_text = _run_amd_smi(["list"])
+    assert (
+        return_code == 0
+    ), f"amd-smi failed rc={return_code} stderr={stderr_text} stdout={stdout_text}"
+
+    gpu_blocks = _parse_gpu_blocks(stdout_text)
+    assert gpu_blocks, "No GPU blocks found in amd-smi output"
+
+
+@pytest.mark.not_sanity
+@pytest.mark.skipif(is_windows(), reason="amd-smi CLI not supported on Windows")
+@pytest.mark.skipif(
+    AMDGPU_FAMILIES == "gfx1151", reason="Linux gfx1151 does not support amdsmi yet"
+)
+@pytest.mark.parametrize(
+    "mod_args",
+    [
+        ([], None),
+        (["--json"], None),
+        (["--csv"], None),
+        (["--file"], "human"),
+        (["--json", "--file"], "json"),
+        (["--csv", "--file"], "csv"),
+    ],
+    ids=[
+        "human-stdout",
+        "json-stdout",
+        "csv-stdout",
+        "human-file",
+        "json-file",
+        "csv-file",
+    ],
+)
+def test_amd_smi_list(mod_args, tmp_path: Path) -> None:
+    modifiers, expected_output_mode = mod_args
+
+    output_file_path: Path | None = None
+    invocation_args = list(modifiers)
+    if "--file" in invocation_args:
+        output_file_path = tmp_path / "amdsmi_out.txt"
+        invocation_args = [a for a in invocation_args if a != "--file"]
+        invocation_args.extend(["--file", str(output_file_path)])
+
+    return_code, stdout_text, stderr_text = _run_amd_smi(["list"] + invocation_args)
+    assert (
+        return_code == 0
+    ), f"amd-smi failed rc={return_code} stderr={stderr_text} stdout={stdout_text}"
+
+    if output_file_path is not None:
+        assert stdout_text.strip() == "", f"Expected no stdout with --file, got: {stdout_text}"
+        assert output_file_path.exists(), "Expected output file to be created"
+        content_text = output_file_path.read_text(encoding="utf-8", errors="replace")
+    else:
+        content_text = stdout_text
+
+    if expected_output_mode == "json" or ("--json" in modifiers and expected_output_mode is None):
+        try:
+            json_data = json.loads(content_text)
+        except Exception as e:
+            pytest.fail(f"Failed to parse JSON output: {e}\nContent:\n{content_text}")
+        assert isinstance(json_data, list) and json_data, "Expected non-empty JSON array"
+        for index, gpu_obj in enumerate(json_data):
+            missing_fields = _validate_json(gpu_obj)
+            assert not missing_fields, f"JSON GPU entry {index} missing fields: {missing_fields}"
+
+    elif expected_output_mode == "csv" or ("--csv" in modifiers and expected_output_mode is None):
+        try:
+            csv_reader = csv.DictReader(content_text.splitlines())
+            csv_rows = list(csv_reader)
+        except Exception as e:
+            pytest.fail(f"Failed to parse CSV output: {e}\nContent:\n{content_text}")
+        assert csv_rows, "Expected at least one CSV row"
+        for index, csv_row in enumerate(csv_rows):
+            missing_fields = _validate_csv_row(csv_row)
+            assert not missing_fields, f"CSV row {index} missing fields: {missing_fields}"
+
+    else:
+        gpu_blocks = _parse_gpu_blocks(content_text)
+        assert gpu_blocks, "No GPU blocks found in amd-smi human output"
+        for index, human_readable_gpu_block in enumerate(gpu_blocks):
+            missing_fields = _validate_human_readable_gpu_block(human_readable_gpu_block)
+            assert (
+                not missing_fields
+            ), f"Human-readable GPU block {index} missing fields: {missing_fields}\nBlock:\n{human_readable_gpu_block}"
diff --git a/tests/test_rocm_sanity.py b/tests/test_rocm_sanity.py
index 36a4f946fc5..47c41f03e55 100644
--- a/tests/test_rocm_sanity.py
+++ b/tests/test_rocm_sanity.py
@@ -1,7 +1,5 @@
 # Copyright Advanced Micro Devices, Inc.
 # SPDX-License-Identifier: MIT
-import json
-import csv
 from pathlib import Path
 from pytest_check import check
 import logging
@@ -24,109 +22,6 @@
 from github_actions_utils import is_asan
 
 
-def _amd_smi_path() -> Path:
-    therock_bin_dir_env = os.getenv("THEROCK_BIN_DIR")
-    if not therock_bin_dir_env:
-        pytest.fail("THEROCK_BIN_DIR not set; failing amdsmi tests")
-
-    amd_smi_bin_path = (Path(therock_bin_dir_env).expanduser().resolve()) / "amd-smi"
-    if not amd_smi_bin_path.exists():
-        pytest.fail(f"amd-smi not found at {amd_smi_bin_path}")
-    if not os.access(amd_smi_bin_path, os.X_OK):
-        pytest.fail(f"amd-smi is not executable: {amd_smi_bin_path}")
-    return amd_smi_bin_path
-
-
-def _run_amd_smi(subcommands: list[str]) -> tuple[int, str, str]:
-    amd_smi_bin = _amd_smi_path()
-    cmd = [str(amd_smi_bin)] + list(subcommands)
-    logger.info("Running amd-smi: %s", cmd)
-    proc = subprocess.run(cmd, capture_output=True, text=True)
-    logger.info("amd-smi returncode=%s", proc.returncode)
-    logger.info("amd-smi stdout:\n%s", proc.stdout)
-    logger.info("amd-smi stderr:\n%s", proc.stderr)
-    return proc.returncode, proc.stdout, proc.stderr
-
-
-def _parse_gpu_blocks(text_output: str) -> list[str]:
-    gpu_blocks = []
-    current_block_lines = None
-    for line in text_output.splitlines():
-        if re.search(r"GPU:\s+(\d+)", line) or re.search(r"GPU\s+(\d+):", line):
-            if current_block_lines is not None:
-                gpu_blocks.append("\n".join(current_block_lines))
-            current_block_lines = [line]
-            continue
-        if current_block_lines is not None:
-            current_block_lines.append(line)
-    if current_block_lines is not None:
-        gpu_blocks.append("\n".join(current_block_lines))
-    return gpu_blocks
-
-
-def _validate_human_readable_gpu_block(human_readable_gpu_block_text: str) -> list[str]:
-    missing_fields = []
-    if not re.search(r"\s*BDF:\s*.+", human_readable_gpu_block_text):
-        missing_fields.append("BDF")
-    if not re.search(r"\s*UUID:\s*.+", human_readable_gpu_block_text):
-        missing_fields.append("UUID")
-    if not re.search(r"\s*KFD_ID:\s*\d+", human_readable_gpu_block_text):
-        missing_fields.append("KFD_ID")
-    if not re.search(r"\s*NODE_ID:\s*\d+", human_readable_gpu_block_text):
-        missing_fields.append("NODE_ID")
-    if not re.search(r"\s*PARTITION_ID:\s*\d+", human_readable_gpu_block_text):
-        missing_fields.append("PARTITION_ID")
-    return missing_fields
-
-
-def _validate_json(gpu_obj: dict) -> list[str]:
-    missing_fields = []
-    if "gpu" not in gpu_obj or not isinstance(gpu_obj.get("gpu"), int):
-        missing_fields.append("gpu")
-    if "bdf" not in gpu_obj or not isinstance(gpu_obj.get("bdf"), str):
-        missing_fields.append("bdf")
-    if "uuid" not in gpu_obj or not isinstance(gpu_obj.get("uuid"), str):
-        missing_fields.append("uuid")
-    if "kfd_id" not in gpu_obj or not isinstance(gpu_obj.get("kfd_id"), int):
-        missing_fields.append("kfd_id")
-    if "node_id" not in gpu_obj or not isinstance(gpu_obj.get("node_id"), int):
-        missing_fields.append("node_id")
-    if "partition_id" not in gpu_obj or not isinstance(
-        gpu_obj.get("partition_id"), int
-    ):
-        missing_fields.append("partition_id")
-    return missing_fields
-
-
-def _validate_csv_row(csv_row: dict) -> list[str]:
-    missing_fields = []
-    try:
-        if "gpu" not in csv_row or int(csv_row.get("gpu", "")) < 0:
-            missing_fields.append("gpu")
-    except Exception:
-        missing_fields.append("gpu")
-    if not csv_row.get("gpu_bdf"):
-        missing_fields.append("gpu_bdf")
-    if not csv_row.get("gpu_uuid"):
-        missing_fields.append("gpu_uuid")
-    try:
-        if "kfd_id" not in csv_row or int(csv_row.get("kfd_id", "")) < 0:
-            missing_fields.append("kfd_id")
-    except Exception:
-        missing_fields.append("kfd_id")
-    try:
-        if "node_id" not in csv_row or int(csv_row.get("node_id", "")) < 0:
-            missing_fields.append("node_id")
-    except Exception:
-        missing_fields.append("node_id")
-    try:
-        if "partition_id" not in csv_row or int(csv_row.get("partition_id", "")) < 0:
-            missing_fields.append("partition_id")
-    except Exception:
-        missing_fields.append("partition_id")
-    return missing_fields
-
-
 def is_windows():
     return "windows" == platform.system().lower()
 
@@ -331,117 +226,3 @@ def test_amdsmi_suite(self):
                 print(f"[amdsmitst-summary] {line}")
 
         check.equal(process.returncode, 0)
-
-    @pytest.mark.skipif(is_windows(), reason="amd-smi CLI not supported on Windows")
-    @pytest.mark.skipif(
-        AMDGPU_FAMILIES == "gfx1151", reason="Linux gfx1151 does not support amdsmi yet"
-    )
-    @pytest.mark.amd_smi
-    def test_amd_smi_blocks(self):
-        """Blocking check: `amd-smi list` prints GPU blocks and they are non-empty.
-
-        This is a lightweight blocking gate: it only asserts that GPU blocks
-        exist and contain some text, without validating specific fields.
-        """
-        return_code, stdout_text, stderr_text = _run_amd_smi(["list"])
-        assert (
-            return_code == 0
-        ), f"amd-smi failed rc={return_code} stderr={stderr_text} stdout={stdout_text}"
-
-        gpu_blocks = _parse_gpu_blocks(stdout_text)
-        assert gpu_blocks, "No GPU blocks found in amd-smi output"
-        for index, block in enumerate(gpu_blocks):
-            assert block.strip(), f"GPU block {index} is empty"
-
-    @pytest.mark.amd_smi
-    @pytest.mark.amdsmi_tests_default_unblocking_for_sanity
-    @pytest.mark.parametrize(
-        "mod_args",
-        [
-            ([], None),  # human readable on stdout
-            (["--json"], None),
-            (["--csv"], None),
-            (["--file"], "human"),
-            (["--json", "--file"], "json"),
-            (["--csv", "--file"], "csv"),
-        ],
-        ids=[
-            "human-stdout",
-            "json-stdout",
-            "csv-stdout",
-            "human-file",
-            "json-file",
-            "csv-file",
-        ],
-    )
-    def test_amd_smi_list(self, mod_args, tmp_path):
-        modifiers, expected_output_mode = mod_args
-
-        output_file_path = None
-        invocation_args = list(modifiers)
-        if "--file" in invocation_args:
-            output_file_path = tmp_path / "amdsmi_out.txt"
-            invocation_args = [a for a in invocation_args if a != "--file"]
-            invocation_args.extend(["--file", str(output_file_path)])
-
-        return_code, stdout_text, stderr_text = _run_amd_smi(["list"] + invocation_args)
-        assert (
-            return_code == 0
-        ), f"amd-smi failed rc={return_code} stderr={stderr_text} stdout={stdout_text}"
-
-        if output_file_path is not None:
-            assert (
-                stdout_text.strip() == ""
-            ), f"Expected no stdout when using --file, got: {stdout_text}"
-            assert output_file_path.exists(), "Expected output file to be created"
-            content_text = output_file_path.read_text(
-                encoding="utf-8", errors="replace"
-            )
-        else:
-            content_text = stdout_text
-
-        if expected_output_mode == "json" or (
-            "--json" in modifiers and expected_output_mode is None
-        ):
-            try:
-                json_data = json.loads(content_text)
-            except Exception as e:
-                pytest.fail(
-                    f"Failed to parse JSON output: {e}\nContent:\n{content_text}"
-                )
-            assert (
-                isinstance(json_data, list) and json_data
-            ), "Expected non-empty JSON array"
-            for index, gpu_obj in enumerate(json_data):
-                missing_fields = _validate_json(gpu_obj)
-                assert (
-                    not missing_fields
-                ), f"JSON GPU entry {index} missing fields: {missing_fields}"
-
-        elif expected_output_mode == "csv" or (
-            "--csv" in modifiers and expected_output_mode is None
-        ):
-            try:
-                csv_reader = csv.DictReader(content_text.splitlines())
-                csv_rows = list(csv_reader)
-            except Exception as e:
-                pytest.fail(
-                    f"Failed to parse CSV output: {e}\nContent:\n{content_text}"
-                )
-            assert csv_rows, "Expected at least one CSV row"
-            for index, csv_row in enumerate(csv_rows):
-                missing_fields = _validate_csv_row(csv_row)
-                assert (
-                    not missing_fields
-                ), f"CSV row {index} missing fields: {missing_fields}"
-
-        else:
-            gpu_blocks = _parse_gpu_blocks(content_text)
-            assert gpu_blocks, "No GPU blocks found in amd-smi human output"
-            for index, human_readable_gpu_block in enumerate(gpu_blocks):
-                missing_fields = _validate_human_readable_gpu_block(
-                    human_readable_gpu_block
-                )
-                assert (
-                    not missing_fields
-                ), f"Human-readable GPU block {index} missing fields: {missing_fields}\nBlock:\n{human_readable_gpu_block}"

From 8bfba375f46553133796fde40e7fa73fb18eab69 Mon Sep 17 00:00:00 2001
From: HRISHIKESH THULA <you@example.com>
Date: Sat, 14 Mar 2026 01:01:36 +0530
Subject: [PATCH 17/21] clean up

---
 .../github_actions/fetch_test_configurations.py       | 11 -----------
 tests/test_rocm_sanity.py                             |  3 +++
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/build_tools/github_actions/fetch_test_configurations.py b/build_tools/github_actions/fetch_test_configurations.py
index dd002c05f02..4f17329db38 100644
--- a/build_tools/github_actions/fetch_test_configurations.py
+++ b/build_tools/github_actions/fetch_test_configurations.py
@@ -168,17 +168,6 @@ def _get_script_path(script_name: str) -> str:
             "windows": 2,
         },
     },
-    "rocprofiler_systems": {
-        "job_name": "rocprofiler_systems",
-        "fetch_artifact_args": "--rocprofiler-systems --rocprofiler-sdk --tests",
-        "timeout_minutes": 15,
-        "test_script": f"python {_get_script_path('test_rocprofiler_systems.py')}",
-        "platform": ["linux"],
-        "total_shards_dict": {
-            "linux": 1,
-            "windows": 1,
-        },
-    },
     "amdsmi_cli": {
         "job_name": "amdsmi_cli",
         "fetch_artifact_args": "--base-only",
diff --git a/tests/test_rocm_sanity.py b/tests/test_rocm_sanity.py
index 47c41f03e55..233ec8d3abd 100644
--- a/tests/test_rocm_sanity.py
+++ b/tests/test_rocm_sanity.py
@@ -1,5 +1,6 @@
 # Copyright Advanced Micro Devices, Inc.
 # SPDX-License-Identifier: MIT
+
 from pathlib import Path
 from pytest_check import check
 import logging
@@ -14,7 +15,9 @@
 THIS_DIR = Path(__file__).resolve().parent
 
 logger = logging.getLogger(__name__)
+
 THEROCK_BIN_DIR = Path(os.getenv("THEROCK_BIN_DIR")).resolve()
+
 AMDGPU_FAMILIES = os.getenv("AMDGPU_FAMILIES")
 
 # Importing is_asan from github_actions_utils.py

From 69cc2848173be5da0d5ddc1583823b9f8af6d049 Mon Sep 17 00:00:00 2001
From: HRISHIKESH THULA <you@example.com>
Date: Sat, 14 Mar 2026 01:06:09 +0530
Subject: [PATCH 18/21] Refactor code for improved readability and consistency
 in test scripts

---
 .../test_executable_scripts/test_sanity.py    |  5 ++-
 tests/test_amdsmi_cli.py                      | 32 ++++++++++++++-----
 2 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/build_tools/github_actions/test_executable_scripts/test_sanity.py b/build_tools/github_actions/test_executable_scripts/test_sanity.py
index eb154db1785..823b29c7ec7 100644
--- a/build_tools/github_actions/test_executable_scripts/test_sanity.py
+++ b/build_tools/github_actions/test_executable_scripts/test_sanity.py
@@ -13,10 +13,13 @@
 THEROCK_DIR = SCRIPT_DIR.parent.parent.parent
 
 
-def _run_pytest(cmd: list[str], *, cwd: Path, env: dict[str, str], check: bool) -> subprocess.CompletedProcess[str]:
+def _run_pytest(
+    cmd: list[str], *, cwd: Path, env: dict[str, str], check: bool
+) -> subprocess.CompletedProcess[str]:
     logging.info("++ Exec [%s]$ %s", cwd, " ".join(cmd))
     return subprocess.run(cmd, cwd=cwd, env=env, check=check, text=True)
 
+
 env = os.environ.copy()
 # Enable verbose ROCm logging, see
 # https://rocm.docs.amd.com/projects/HIP/en/latest/how-to/debugging.html
diff --git a/tests/test_amdsmi_cli.py b/tests/test_amdsmi_cli.py
index 61a78a55130..e176073f41e 100644
--- a/tests/test_amdsmi_cli.py
+++ b/tests/test_amdsmi_cli.py
@@ -85,7 +85,9 @@ def _validate_json(gpu_obj: dict) -> list[str]:
         missing_fields.append("kfd_id")
     if "node_id" not in gpu_obj or not isinstance(gpu_obj.get("node_id"), int):
         missing_fields.append("node_id")
-    if "partition_id" not in gpu_obj or not isinstance(gpu_obj.get("partition_id"), int):
+    if "partition_id" not in gpu_obj or not isinstance(
+        gpu_obj.get("partition_id"), int
+    ):
         missing_fields.append("partition_id")
     return missing_fields
 
@@ -181,23 +183,33 @@ def test_amd_smi_list(mod_args, tmp_path: Path) -> None:
     ), f"amd-smi failed rc={return_code} stderr={stderr_text} stdout={stdout_text}"
 
     if output_file_path is not None:
-        assert stdout_text.strip() == "", f"Expected no stdout with --file, got: {stdout_text}"
+        assert (
+            stdout_text.strip() == ""
+        ), f"Expected no stdout with --file, got: {stdout_text}"
         assert output_file_path.exists(), "Expected output file to be created"
         content_text = output_file_path.read_text(encoding="utf-8", errors="replace")
     else:
         content_text = stdout_text
 
-    if expected_output_mode == "json" or ("--json" in modifiers and expected_output_mode is None):
+    if expected_output_mode == "json" or (
+        "--json" in modifiers and expected_output_mode is None
+    ):
         try:
             json_data = json.loads(content_text)
         except Exception as e:
             pytest.fail(f"Failed to parse JSON output: {e}\nContent:\n{content_text}")
-        assert isinstance(json_data, list) and json_data, "Expected non-empty JSON array"
+        assert (
+            isinstance(json_data, list) and json_data
+        ), "Expected non-empty JSON array"
         for index, gpu_obj in enumerate(json_data):
             missing_fields = _validate_json(gpu_obj)
-            assert not missing_fields, f"JSON GPU entry {index} missing fields: {missing_fields}"
+            assert (
+                not missing_fields
+            ), f"JSON GPU entry {index} missing fields: {missing_fields}"
 
-    elif expected_output_mode == "csv" or ("--csv" in modifiers and expected_output_mode is None):
+    elif expected_output_mode == "csv" or (
+        "--csv" in modifiers and expected_output_mode is None
+    ):
         try:
             csv_reader = csv.DictReader(content_text.splitlines())
             csv_rows = list(csv_reader)
@@ -206,13 +218,17 @@ def test_amd_smi_list(mod_args, tmp_path: Path) -> None:
         assert csv_rows, "Expected at least one CSV row"
         for index, csv_row in enumerate(csv_rows):
             missing_fields = _validate_csv_row(csv_row)
-            assert not missing_fields, f"CSV row {index} missing fields: {missing_fields}"
+            assert (
+                not missing_fields
+            ), f"CSV row {index} missing fields: {missing_fields}"
 
     else:
         gpu_blocks = _parse_gpu_blocks(content_text)
         assert gpu_blocks, "No GPU blocks found in amd-smi human output"
         for index, human_readable_gpu_block in enumerate(gpu_blocks):
-            missing_fields = _validate_human_readable_gpu_block(human_readable_gpu_block)
+            missing_fields = _validate_human_readable_gpu_block(
+                human_readable_gpu_block
+            )
             assert (
                 not missing_fields
             ), f"Human-readable GPU block {index} missing fields: {missing_fields}\nBlock:\n{human_readable_gpu_block}"

From 053d2bb9b3d0541016c2aefb9d5d27caae97cc6c Mon Sep 17 00:00:00 2001
From: HRISHIKESH THULA <you@example.com>
Date: Sat, 14 Mar 2026 01:33:41 +0530
Subject: [PATCH 19/21] Enhance logging in _run_amd_smi function to include
 return code and output

---
 tests/test_amdsmi_cli.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/test_amdsmi_cli.py b/tests/test_amdsmi_cli.py
index e176073f41e..31eadd22e7c 100644
--- a/tests/test_amdsmi_cli.py
+++ b/tests/test_amdsmi_cli.py
@@ -39,6 +39,11 @@ def _run_amd_smi(subcommands: list[str]) -> tuple[int, str, str]:
     cmd = [str(amd_smi_bin)] + list(subcommands)
     logger.info("Running amd-smi: %s", cmd)
     proc = subprocess.run(cmd, capture_output=True, text=True)
+    logger.info("amd-smi returncode=%s", proc.returncode)
+    if proc.stdout:
+        logger.info("amd-smi stdout:\n%s", proc.stdout)
+    if proc.stderr:
+        logger.info("amd-smi stderr:\n%s", proc.stderr)
     return proc.returncode, proc.stdout, proc.stderr
 
 

From d3aa5f61be9820d98f5f2e8bb36aa9cce53dddcf Mon Sep 17 00:00:00 2001
From: HRISHIKESH THULA <you@example.com>
Date: Sat, 14 Mar 2026 11:19:37 +0530
Subject: [PATCH 20/21] Remove amdsmi_cli tests and related configurations;
 refactor amd-smi handling in ROCm sanity tests

---
 .../fetch_test_configurations.py              |  10 -
 conftest.py                                   |  13 -
 tests/test_amdsmi_cli.py                      | 239 ------------------
 tests/test_rocm_sanity.py                     | 208 +++++++++++++++
 4 files changed, 208 insertions(+), 262 deletions(-)
 delete mode 100644 conftest.py
 delete mode 100644 tests/test_amdsmi_cli.py

diff --git a/build_tools/github_actions/fetch_test_configurations.py b/build_tools/github_actions/fetch_test_configurations.py
index 4f17329db38..b82a1a219b9 100644
--- a/build_tools/github_actions/fetch_test_configurations.py
+++ b/build_tools/github_actions/fetch_test_configurations.py
@@ -168,16 +168,6 @@ def _get_script_path(script_name: str) -> str:
             "windows": 2,
         },
     },
-    "amdsmi_cli": {
-        "job_name": "amdsmi_cli",
-        "fetch_artifact_args": "--base-only",
-        "timeout_minutes": 15,
-        "test_script": "pytest tests/test_amdsmi_cli.py -m not_sanity -o log_cli=true --log-cli-level=INFO",
-        "platform": ["linux"],
-        "total_shards_dict": {
-            "linux": 1,
-        },
-    },
     "hipcub": {
         "job_name": "hipcub",
         "fetch_artifact_args": "--prim --tests",
diff --git a/conftest.py b/conftest.py
deleted file mode 100644
index a2aeb752037..00000000000
--- a/conftest.py
+++ /dev/null
@@ -1,13 +0,0 @@
-def pytest_configure(config):
-    config.addinivalue_line(
-        "markers",
-        "not_sanity: marks tests that must not run in sanity gating",
-    )
-    config.addinivalue_line(
-        "markers",
-        "amd_smi: marks tests that exercise amd-smi",
-    )
-    config.addinivalue_line(
-        "markers",
-        "amd_smi_cli: marks amd-smi CLI tests",
-    )
diff --git a/tests/test_amdsmi_cli.py b/tests/test_amdsmi_cli.py
deleted file mode 100644
index 31eadd22e7c..00000000000
--- a/tests/test_amdsmi_cli.py
+++ /dev/null
@@ -1,239 +0,0 @@
-# Copyright Advanced Micro Devices, Inc.
-# SPDX-License-Identifier: MIT
-
-"""amd-smi CLI tests."""
-
-import csv
-import json
-import logging
-import os
-import platform
-import re
-import subprocess
-from pathlib import Path
-
-import pytest
-
-logger = logging.getLogger(__name__)
-
-
-def is_windows() -> bool:
-    return platform.system().lower() == "windows"
-
-
-def _amd_smi_path() -> Path:
-    therock_bin_dir_env = os.getenv("THEROCK_BIN_DIR")
-    if not therock_bin_dir_env:
-        pytest.fail("THEROCK_BIN_DIR not set; failing amd-smi CLI tests")
-
-    amd_smi_bin_path = (Path(therock_bin_dir_env).expanduser().resolve()) / "amd-smi"
-    if not amd_smi_bin_path.exists():
-        pytest.fail(f"amd-smi not found at {amd_smi_bin_path}")
-    if not os.access(amd_smi_bin_path, os.X_OK):
-        pytest.fail(f"amd-smi is not executable: {amd_smi_bin_path}")
-    return amd_smi_bin_path
-
-
-def _run_amd_smi(subcommands: list[str]) -> tuple[int, str, str]:
-    amd_smi_bin = _amd_smi_path()
-    cmd = [str(amd_smi_bin)] + list(subcommands)
-    logger.info("Running amd-smi: %s", cmd)
-    proc = subprocess.run(cmd, capture_output=True, text=True)
-    logger.info("amd-smi returncode=%s", proc.returncode)
-    if proc.stdout:
-        logger.info("amd-smi stdout:\n%s", proc.stdout)
-    if proc.stderr:
-        logger.info("amd-smi stderr:\n%s", proc.stderr)
-    return proc.returncode, proc.stdout, proc.stderr
-
-
-def _parse_gpu_blocks(text_output: str) -> list[str]:
-    gpu_blocks: list[str] = []
-    current_block_lines: list[str] | None = None
-    for line in text_output.splitlines():
-        if re.search(r"GPU:\s+(\d+)", line) or re.search(r"GPU\s+(\d+):", line):
-            if current_block_lines is not None:
-                gpu_blocks.append("\n".join(current_block_lines))
-            current_block_lines = [line]
-            continue
-        if current_block_lines is not None:
-            current_block_lines.append(line)
-    if current_block_lines is not None:
-        gpu_blocks.append("\n".join(current_block_lines))
-    return gpu_blocks
-
-
-def _validate_human_readable_gpu_block(human_readable_gpu_block_text: str) -> list[str]:
-    missing_fields: list[str] = []
-    if not re.search(r"\s*BDF:\s*.+", human_readable_gpu_block_text):
-        missing_fields.append("BDF")
-    if not re.search(r"\s*UUID:\s*.+", human_readable_gpu_block_text):
-        missing_fields.append("UUID")
-    if not re.search(r"\s*KFD_ID:\s*\d+", human_readable_gpu_block_text):
-        missing_fields.append("KFD_ID")
-    if not re.search(r"\s*NODE_ID:\s*\d+", human_readable_gpu_block_text):
-        missing_fields.append("NODE_ID")
-    if not re.search(r"\s*PARTITION_ID:\s*\d+", human_readable_gpu_block_text):
-        missing_fields.append("PARTITION_ID")
-    return missing_fields
-
-
-def _validate_json(gpu_obj: dict) -> list[str]:
-    missing_fields: list[str] = []
-    if "gpu" not in gpu_obj or not isinstance(gpu_obj.get("gpu"), int):
-        missing_fields.append("gpu")
-    if "bdf" not in gpu_obj or not isinstance(gpu_obj.get("bdf"), str):
-        missing_fields.append("bdf")
-    if "uuid" not in gpu_obj or not isinstance(gpu_obj.get("uuid"), str):
-        missing_fields.append("uuid")
-    if "kfd_id" not in gpu_obj or not isinstance(gpu_obj.get("kfd_id"), int):
-        missing_fields.append("kfd_id")
-    if "node_id" not in gpu_obj or not isinstance(gpu_obj.get("node_id"), int):
-        missing_fields.append("node_id")
-    if "partition_id" not in gpu_obj or not isinstance(
-        gpu_obj.get("partition_id"), int
-    ):
-        missing_fields.append("partition_id")
-    return missing_fields
-
-
-def _validate_csv_row(csv_row: dict) -> list[str]:
-    missing_fields: list[str] = []
-    try:
-        if "gpu" not in csv_row or int(csv_row.get("gpu", "")) < 0:
-            missing_fields.append("gpu")
-    except Exception:
-        missing_fields.append("gpu")
-    if not csv_row.get("gpu_bdf"):
-        missing_fields.append("gpu_bdf")
-    if not csv_row.get("gpu_uuid"):
-        missing_fields.append("gpu_uuid")
-    try:
-        if "kfd_id" not in csv_row or int(csv_row.get("kfd_id", "")) < 0:
-            missing_fields.append("kfd_id")
-    except Exception:
-        missing_fields.append("kfd_id")
-    try:
-        if "node_id" not in csv_row or int(csv_row.get("node_id", "")) < 0:
-            missing_fields.append("node_id")
-    except Exception:
-        missing_fields.append("node_id")
-    try:
-        if "partition_id" not in csv_row or int(csv_row.get("partition_id", "")) < 0:
-            missing_fields.append("partition_id")
-    except Exception:
-        missing_fields.append("partition_id")
-    return missing_fields
-
-
-AMDGPU_FAMILIES = os.getenv("AMDGPU_FAMILIES")
-
-
-# Module-wide: these are amd-smi CLI tests.
-pytestmark = [pytest.mark.amd_smi, pytest.mark.amd_smi_cli]
-
-
-@pytest.mark.skipif(is_windows(), reason="amd-smi CLI not supported on Windows")
-@pytest.mark.skipif(
-    AMDGPU_FAMILIES == "gfx1151", reason="Linux gfx1151 does not support amdsmi yet"
-)
-def test_amd_smi_blocks() -> None:
-    """Sanity-gating check: amd-smi list succeeds and reports at least one GPU."""
-    return_code, stdout_text, stderr_text = _run_amd_smi(["list"])
-    assert (
-        return_code == 0
-    ), f"amd-smi failed rc={return_code} stderr={stderr_text} stdout={stdout_text}"
-
-    gpu_blocks = _parse_gpu_blocks(stdout_text)
-    assert gpu_blocks, "No GPU blocks found in amd-smi output"
-
-
-@pytest.mark.not_sanity
-@pytest.mark.skipif(is_windows(), reason="amd-smi CLI not supported on Windows")
-@pytest.mark.skipif(
-    AMDGPU_FAMILIES == "gfx1151", reason="Linux gfx1151 does not support amdsmi yet"
-)
-@pytest.mark.parametrize(
-    "mod_args",
-    [
-        ([], None),
-        (["--json"], None),
-        (["--csv"], None),
-        (["--file"], "human"),
-        (["--json", "--file"], "json"),
-        (["--csv", "--file"], "csv"),
-    ],
-    ids=[
-        "human-stdout",
-        "json-stdout",
-        "csv-stdout",
-        "human-file",
-        "json-file",
-        "csv-file",
-    ],
-)
-def test_amd_smi_list(mod_args, tmp_path: Path) -> None:
-    modifiers, expected_output_mode = mod_args
-
-    output_file_path: Path | None = None
-    invocation_args = list(modifiers)
-    if "--file" in invocation_args:
-        output_file_path = tmp_path / "amdsmi_out.txt"
-        invocation_args = [a for a in invocation_args if a != "--file"]
-        invocation_args.extend(["--file", str(output_file_path)])
-
-    return_code, stdout_text, stderr_text = _run_amd_smi(["list"] + invocation_args)
-    assert (
-        return_code == 0
-    ), f"amd-smi failed rc={return_code} stderr={stderr_text} stdout={stdout_text}"
-
-    if output_file_path is not None:
-        assert (
-            stdout_text.strip() == ""
-        ), f"Expected no stdout with --file, got: {stdout_text}"
-        assert output_file_path.exists(), "Expected output file to be created"
-        content_text = output_file_path.read_text(encoding="utf-8", errors="replace")
-    else:
-        content_text = stdout_text
-
-    if expected_output_mode == "json" or (
-        "--json" in modifiers and expected_output_mode is None
-    ):
-        try:
-            json_data = json.loads(content_text)
-        except Exception as e:
-            pytest.fail(f"Failed to parse JSON output: {e}\nContent:\n{content_text}")
-        assert (
-            isinstance(json_data, list) and json_data
-        ), "Expected non-empty JSON array"
-        for index, gpu_obj in enumerate(json_data):
-            missing_fields = _validate_json(gpu_obj)
-            assert (
-                not missing_fields
-            ), f"JSON GPU entry {index} missing fields: {missing_fields}"
-
-    elif expected_output_mode == "csv" or (
-        "--csv" in modifiers and expected_output_mode is None
-    ):
-        try:
-            csv_reader = csv.DictReader(content_text.splitlines())
-            csv_rows = list(csv_reader)
-        except Exception as e:
-            pytest.fail(f"Failed to parse CSV output: {e}\nContent:\n{content_text}")
-        assert csv_rows, "Expected at least one CSV row"
-        for index, csv_row in enumerate(csv_rows):
-            missing_fields = _validate_csv_row(csv_row)
-            assert (
-                not missing_fields
-            ), f"CSV row {index} missing fields: {missing_fields}"
-
-    else:
-        gpu_blocks = _parse_gpu_blocks(content_text)
-        assert gpu_blocks, "No GPU blocks found in amd-smi human output"
-        for index, human_readable_gpu_block in enumerate(gpu_blocks):
-            missing_fields = _validate_human_readable_gpu_block(
-                human_readable_gpu_block
-            )
-            assert (
-                not missing_fields
-            ), f"Human-readable GPU block {index} missing fields: {missing_fields}\nBlock:\n{human_readable_gpu_block}"
diff --git a/tests/test_rocm_sanity.py b/tests/test_rocm_sanity.py
index 233ec8d3abd..7a1bdf57d52 100644
--- a/tests/test_rocm_sanity.py
+++ b/tests/test_rocm_sanity.py
@@ -1,6 +1,8 @@
 # Copyright Advanced Micro Devices, Inc.
 # SPDX-License-Identifier: MIT
 
+import csv
+import json
 from pathlib import Path
 from pytest_check import check
 import logging
@@ -25,6 +27,117 @@
 from github_actions_utils import is_asan
 
 
+def _amd_smi_path() -> Path:
+    therock_bin_dir_env = os.getenv("THEROCK_BIN_DIR")
+    if not therock_bin_dir_env:
+        pytest.fail("THEROCK_BIN_DIR not set; failing amd-smi CLI tests")
+
+    amd_smi_bin_path = (Path(therock_bin_dir_env).expanduser().resolve()) / "amd-smi"
+    if not amd_smi_bin_path.exists():
+        pytest.fail(f"amd-smi not found at {amd_smi_bin_path}")
+    if not os.access(amd_smi_bin_path, os.X_OK):
+        pytest.fail(f"amd-smi is not executable: {amd_smi_bin_path}")
+    return amd_smi_bin_path
+
+
+def _run_amd_smi(subcommands: list[str]) -> tuple[int, str, str]:
+    amd_smi_bin = _amd_smi_path()
+    cmd = [str(amd_smi_bin)] + list(subcommands)
+    logger.info("Running amd-smi: %s", cmd)
+    proc = subprocess.run(cmd, capture_output=True, text=True)
+    logger.info("amd-smi returncode=%s", proc.returncode)
+    if proc.returncode != 0:
+        if proc.stdout:
+            logger.error("amd-smi stdout:\n%s", proc.stdout)
+        if proc.stderr:
+            logger.error("amd-smi stderr:\n%s", proc.stderr)
+    else:
+        if proc.stdout:
+            logger.info("amd-smi stdout:\n%s", proc.stdout)
+        if proc.stderr:
+            logger.error("amd-smi stderr (unexpected on success):\n%s", proc.stderr)
+    return proc.returncode, proc.stdout, proc.stderr
+
+
+def _parse_gpu_blocks(text_output: str) -> list[str]:
+    gpu_blocks: list[str] = []
+    current_block_lines: list[str] | None = None
+    for line in text_output.splitlines():
+        if re.search(r"GPU:\s+(\d+)", line) or re.search(r"GPU\s+(\d+):", line):
+            if current_block_lines is not None:
+                gpu_blocks.append("\n".join(current_block_lines))
+            current_block_lines = [line]
+            continue
+        if current_block_lines is not None:
+            current_block_lines.append(line)
+    if current_block_lines is not None:
+        gpu_blocks.append("\n".join(current_block_lines))
+    return gpu_blocks
+
+
+def _validate_human_readable_gpu_block(human_readable_gpu_block_text: str) -> list[str]:
+    missing_fields: list[str] = []
+    if not re.search(r"\s*BDF:\s*.+", human_readable_gpu_block_text):
+        missing_fields.append("BDF")
+    if not re.search(r"\s*UUID:\s*.+", human_readable_gpu_block_text):
+        missing_fields.append("UUID")
+    if not re.search(r"\s*KFD_ID:\s*\d+", human_readable_gpu_block_text):
+        missing_fields.append("KFD_ID")
+    if not re.search(r"\s*NODE_ID:\s*\d+", human_readable_gpu_block_text):
+        missing_fields.append("NODE_ID")
+    if not re.search(r"\s*PARTITION_ID:\s*\d+", human_readable_gpu_block_text):
+        missing_fields.append("PARTITION_ID")
+    return missing_fields
+
+
+def _validate_json(gpu_obj: dict) -> list[str]:
+    missing_fields: list[str] = []
+    if "gpu" not in gpu_obj or not isinstance(gpu_obj.get("gpu"), int):
+        missing_fields.append("gpu")
+    if "bdf" not in gpu_obj or not isinstance(gpu_obj.get("bdf"), str):
+        missing_fields.append("bdf")
+    if "uuid" not in gpu_obj or not isinstance(gpu_obj.get("uuid"), str):
+        missing_fields.append("uuid")
+    if "kfd_id" not in gpu_obj or not isinstance(gpu_obj.get("kfd_id"), int):
+        missing_fields.append("kfd_id")
+    if "node_id" not in gpu_obj or not isinstance(gpu_obj.get("node_id"), int):
+        missing_fields.append("node_id")
+    if "partition_id" not in gpu_obj or not isinstance(
+        gpu_obj.get("partition_id"), int
+    ):
+        missing_fields.append("partition_id")
+    return missing_fields
+
+
+def _validate_csv_row(csv_row: dict) -> list[str]:
+    missing_fields: list[str] = []
+    try:
+        if "gpu" not in csv_row or int(csv_row.get("gpu", "")) < 0:
+            missing_fields.append("gpu")
+    except Exception:
+        missing_fields.append("gpu")
+    if not csv_row.get("gpu_bdf"):
+        missing_fields.append("gpu_bdf")
+    if not csv_row.get("gpu_uuid"):
+        missing_fields.append("gpu_uuid")
+    try:
+        if "kfd_id" not in csv_row or int(csv_row.get("kfd_id", "")) < 0:
+            missing_fields.append("kfd_id")
+    except Exception:
+        missing_fields.append("kfd_id")
+    try:
+        if "node_id" not in csv_row or int(csv_row.get("node_id", "")) < 0:
+            missing_fields.append("node_id")
+    except Exception:
+        missing_fields.append("node_id")
+    try:
+        if "partition_id" not in csv_row or int(csv_row.get("partition_id", "")) < 0:
+            missing_fields.append("partition_id")
+    except Exception:
+        missing_fields.append("partition_id")
+    return missing_fields
+
+
 def is_windows():
     return "windows" == platform.system().lower()
 
@@ -229,3 +342,98 @@ def test_amdsmi_suite(self):
                 print(f"[amdsmitst-summary] {line}")
 
         check.equal(process.returncode, 0)
+
+    @pytest.mark.skipif(is_windows(), reason="amd-smi CLI not supported on Windows")
+    @pytest.mark.skipif(
+        AMDGPU_FAMILIES == "gfx1151", reason="Linux gfx1151 does not support amdsmi yet"
+    )
+    @pytest.mark.parametrize(
+        "mod_args",
+        [
+            ([], None),
+            (["--json"], None),
+            (["--csv"], None),
+            (["--file"], "human"),
+            (["--json", "--file"], "json"),
+            (["--csv", "--file"], "csv"),
+        ],
+        ids=[
+            "human-stdout",
+            "json-stdout",
+            "csv-stdout",
+            "human-file",
+            "json-file",
+            "csv-file",
+        ],
+    )
+    def test_amd_smi_list(self, mod_args, tmp_path: Path) -> None:
+        modifiers, expected_output_mode = mod_args
+
+        output_file_path: Path | None = None
+        invocation_args = list(modifiers)
+        if "--file" in invocation_args:
+            output_file_path = tmp_path / "amdsmi_out.txt"
+            invocation_args = [a for a in invocation_args if a != "--file"]
+            invocation_args.extend(["--file", str(output_file_path)])
+
+        return_code, stdout_text, stderr_text = _run_amd_smi(["list"] + invocation_args)
+        assert (
+            return_code == 0
+        ), f"amd-smi failed rc={return_code} stderr={stderr_text} stdout={stdout_text}"
+
+        if output_file_path is not None:
+            assert (
+                stdout_text.strip() == ""
+            ), f"Expected no stdout with --file, got: {stdout_text}"
+            assert output_file_path.exists(), "Expected output file to be created"
+            content_text = output_file_path.read_text(
+                encoding="utf-8", errors="replace"
+            )
+        else:
+            content_text = stdout_text
+
+        if expected_output_mode == "json" or (
+            "--json" in modifiers and expected_output_mode is None
+        ):
+            try:
+                json_data = json.loads(content_text)
+            except Exception as e:
+                pytest.fail(
+                    f"Failed to parse JSON output: {e}\nContent:\n{content_text}"
+                )
+            assert (
+                isinstance(json_data, list) and json_data
+            ), "Expected non-empty JSON array"
+            for index, gpu_obj in enumerate(json_data):
+                missing_fields = _validate_json(gpu_obj)
+                assert (
+                    not missing_fields
+                ), f"JSON GPU entry {index} missing fields: {missing_fields}"
+
+        elif expected_output_mode == "csv" or (
+            "--csv" in modifiers and expected_output_mode is None
+        ):
+            try:
+                csv_reader = csv.DictReader(content_text.splitlines())
+                csv_rows = list(csv_reader)
+            except Exception as e:
+                pytest.fail(
+                    f"Failed to parse CSV output: {e}\nContent:\n{content_text}"
+                )
+            assert csv_rows, "Expected at least one CSV row"
+            for index, csv_row in enumerate(csv_rows):
+                missing_fields = _validate_csv_row(csv_row)
+                assert (
+                    not missing_fields
+                ), f"CSV row {index} missing fields: {missing_fields}"
+
+        else:
+            gpu_blocks = _parse_gpu_blocks(content_text)
+            assert gpu_blocks, "No GPU blocks found in amd-smi human output"
+            for index, human_readable_gpu_block in enumerate(gpu_blocks):
+                missing_fields = _validate_human_readable_gpu_block(
+                    human_readable_gpu_block
+                )
+                assert (
+                    not missing_fields
+                ), f"Human-readable GPU block {index} missing fields: {missing_fields}\nBlock:\n{human_readable_gpu_block}"

From a794b26c62d0265c60be89eccbe4aebe7475d0a1 Mon Sep 17 00:00:00 2001
From: HRISHIKESH THULA <you@example.com>
Date: Sat, 14 Mar 2026 11:33:01 +0530
Subject: [PATCH 21/21] Refactor test_sanity.py by removing the _run_pytest
 function and directly executing pytest command

---
 .../test_executable_scripts/test_sanity.py         | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/build_tools/github_actions/test_executable_scripts/test_sanity.py b/build_tools/github_actions/test_executable_scripts/test_sanity.py
index 823b29c7ec7..e54a0e20607 100644
--- a/build_tools/github_actions/test_executable_scripts/test_sanity.py
+++ b/build_tools/github_actions/test_executable_scripts/test_sanity.py
@@ -12,14 +12,6 @@
 SCRIPT_DIR = Path(__file__).resolve().parent
 THEROCK_DIR = SCRIPT_DIR.parent.parent.parent
 
-
-def _run_pytest(
-    cmd: list[str], *, cwd: Path, env: dict[str, str], check: bool
-) -> subprocess.CompletedProcess[str]:
-    logging.info("++ Exec [%s]$ %s", cwd, " ".join(cmd))
-    return subprocess.run(cmd, cwd=cwd, env=env, check=check, text=True)
-
-
 env = os.environ.copy()
 # Enable verbose ROCm logging, see
 # https://rocm.docs.amd.com/projects/HIP/en/latest/how-to/debugging.html
@@ -47,6 +39,6 @@ def _run_pytest(
     "--timeout=300",
 ]
 
-# Default sanity behavior: run everything except tests marked as not_sanity.
-phase_cmd = cmd + ["-m", "not not_sanity"]
-_run_pytest(phase_cmd, cwd=THEROCK_DIR, env=env, check=True)
+logging.info(f"++ Exec [{THEROCK_DIR}]$ {' '.join(cmd)}")
+
+subprocess.run(cmd, cwd=THEROCK_DIR, env=env, check=True)