From b53a22de244a64dc23b5f664ecb4f40890d8f2f5 Mon Sep 17 00:00:00 2001 From: AaronStGeorge Date: Wed, 4 Mar 2026 19:20:31 +0000 Subject: [PATCH 1/3] Add fusilli-provider to dnn-providers Copy the fusilli hipDNN plugin from iree-libs/fusilli/plugins/hipdnn-plugin/ to rocm-libraries/dnn-providers/fusilli-provider/ so it lives alongside the other hipDNN providers (miopen-provider, hipblaslt-provider). Co-Authored-By: Claude Opus 4.6 --- dnn-providers/fusilli-provider/.clang-format | 1 + dnn-providers/fusilli-provider/.gitignore | 8 + dnn-providers/fusilli-provider/CMakeLists.txt | 129 ++++ dnn-providers/fusilli-provider/README.md | 7 + .../build_tools/FusilliPluginTestUtils.cmake | 134 ++++ .../fusilli-provider/build_tools/ThePebble.py | 574 ++++++++++++++++++ .../build_tools/thepebble_config.toml | 9 + dnn-providers/fusilli-provider/exports.map | 8 + .../fusilli-provider/include/graph_import.h | 429 +++++++++++++ .../hipdnn_engine_plugin_execution_context.h | 39 ++ .../include/hipdnn_engine_plugin_handle.h | 82 +++ .../fusilli-provider/include/utils.h | 255 ++++++++ .../fusilli-provider/src/fusilli_plugin.cpp | 557 +++++++++++++++++ .../fusilli-provider/test/CMakeLists.txt | 43 ++ .../test/integration/CMakeLists.txt | 156 +++++ .../conv_fprop_parameterized_full.cpp | 307 ++++++++++ ...conv_fprop_parameterized_stream_device.cpp | 218 +++++++ .../simple_conv_fprop_with_relu_and_bias.cpp | 282 +++++++++ .../convolution/simple_conv_wgrad.cpp | 152 +++++ .../matmul/batched_matmul_parameterized.cpp | 255 ++++++++ .../matmul/matmul_parameterized.cpp | 236 +++++++ .../test/integration/matmul/simple_matmul.cpp | 127 ++++ .../simple_scaled_matmul_accumulate.cpp | 190 ++++++ .../test/integration/plugin_load.cpp | 88 +++ .../pointwise/simple_pointwise.cpp | 206 +++++++ .../test/test_fusilli_plugin_api.cpp | 536 ++++++++++++++++ .../test/test_graph_import.cpp | 47 ++ dnn-providers/fusilli-provider/test/utils.h | 35 ++ 28 files changed, 5110 insertions(+) create mode 100644 dnn-providers/fusilli-provider/.clang-format create mode 100644 dnn-providers/fusilli-provider/.gitignore create mode 100644 dnn-providers/fusilli-provider/CMakeLists.txt create mode 100644 dnn-providers/fusilli-provider/README.md create mode 100644 dnn-providers/fusilli-provider/build_tools/FusilliPluginTestUtils.cmake create mode 100644 dnn-providers/fusilli-provider/build_tools/ThePebble.py create mode 100644 dnn-providers/fusilli-provider/build_tools/thepebble_config.toml create mode 100644 dnn-providers/fusilli-provider/exports.map create mode 100644 dnn-providers/fusilli-provider/include/graph_import.h create mode 100644 dnn-providers/fusilli-provider/include/hipdnn_engine_plugin_execution_context.h create mode 100644 dnn-providers/fusilli-provider/include/hipdnn_engine_plugin_handle.h create mode 100644 dnn-providers/fusilli-provider/include/utils.h create mode 100644 dnn-providers/fusilli-provider/src/fusilli_plugin.cpp create mode 100644 dnn-providers/fusilli-provider/test/CMakeLists.txt create mode 100644 dnn-providers/fusilli-provider/test/integration/CMakeLists.txt create mode 100644 dnn-providers/fusilli-provider/test/integration/convolution/conv_fprop_parameterized_full.cpp create mode 100644 dnn-providers/fusilli-provider/test/integration/convolution/simple_conv_fprop_parameterized_stream_device.cpp create mode 100644 dnn-providers/fusilli-provider/test/integration/convolution/simple_conv_fprop_with_relu_and_bias.cpp create mode 100644 dnn-providers/fusilli-provider/test/integration/convolution/simple_conv_wgrad.cpp create mode 100644 dnn-providers/fusilli-provider/test/integration/matmul/batched_matmul_parameterized.cpp create mode 100644 dnn-providers/fusilli-provider/test/integration/matmul/matmul_parameterized.cpp create mode 100644 dnn-providers/fusilli-provider/test/integration/matmul/simple_matmul.cpp create mode 100644 dnn-providers/fusilli-provider/test/integration/matmul/simple_scaled_matmul_accumulate.cpp create mode 100644 dnn-providers/fusilli-provider/test/integration/plugin_load.cpp create mode 100644 dnn-providers/fusilli-provider/test/integration/pointwise/simple_pointwise.cpp create mode 100644 dnn-providers/fusilli-provider/test/test_fusilli_plugin_api.cpp create mode 100644 dnn-providers/fusilli-provider/test/test_graph_import.cpp create mode 100644 dnn-providers/fusilli-provider/test/utils.h diff --git a/dnn-providers/fusilli-provider/.clang-format b/dnn-providers/fusilli-provider/.clang-format new file mode 100644 index 00000000000..9b3aa8b7213 --- /dev/null +++ b/dnn-providers/fusilli-provider/.clang-format @@ -0,0 +1 @@ +BasedOnStyle: LLVM diff --git a/dnn-providers/fusilli-provider/.gitignore b/dnn-providers/fusilli-provider/.gitignore new file mode 100644 index 00000000000..29b3bd70db9 --- /dev/null +++ b/dnn-providers/fusilli-provider/.gitignore @@ -0,0 +1,8 @@ +# CMake build +build/ + +# clangd intellisense cache +.cache/ + +# CMake presets +CMakeUserPresets.json diff --git a/dnn-providers/fusilli-provider/CMakeLists.txt b/dnn-providers/fusilli-provider/CMakeLists.txt new file mode 100644 index 00000000000..79d262fb0fb --- /dev/null +++ b/dnn-providers/fusilli-provider/CMakeLists.txt @@ -0,0 +1,129 @@ +# Copyright 2025 Advanced Micro Devices, Inc. +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +cmake_minimum_required(VERSION 3.24) + +project(fusilli-plugin + VERSION 0.1.0 + DESCRIPTION "Fusilli-Plugin: A Fusilli/IREE powered hipDNN plugin for graph JIT compilation." + LANGUAGES C CXX) + +# Set C++ standard +set(CMAKE_C_STANDARD 11) +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_SCAN_FOR_MODULES OFF) + +# CMake includes +include(GNUInstallDirs) +include(FetchContent) + +# IREE Runtime Dependency - build from source, same pattern as fusilli/CMakeLists.txt +if(NOT IREE_SOURCE_DIR) + message(FATAL_ERROR "IREE_SOURCE_DIR must be provided. Set -DIREE_SOURCE_DIR= to the IREE source directory.") +endif() +message(STATUS "Using existing IREE sources: ${IREE_SOURCE_DIR}") +# Set IREE build flags +set(IREE_VISIBILITY_HIDDEN OFF) +set(IREE_BUILD_COMPILER OFF) +set(IREE_BUILD_TESTS OFF) +set(IREE_BUILD_SAMPLES OFF) +set(IREE_ERROR_ON_MISSING_SUBMODULES OFF) +set(IREE_HAL_DRIVER_DEFAULTS OFF) +set(IREE_HAL_DRIVER_HIP ON) +set(IREE_HIP_TEST_TARGET_CHIP "" CACHE STRING "") +# Build IREERuntime as part of plugin +FetchContent_Declare( + IREERuntime + SOURCE_DIR ${IREE_SOURCE_DIR} + # Fusilli's config file requires IREE runtime via + # find_dependency(IREERuntime), which triggers find_package(IREERuntime). The + # dependency can only be fulfilled via source build: FetchContent_Declare or + # add_subdirectory. Of those options, only FetchContent_Declare provides a way + # of intercepting the find_package call and redirecting to the source build. + OVERRIDE_FIND_PACKAGE + SYSTEM +) +FetchContent_MakeAvailable(IREERuntime) + +# Other dependencies +find_package(hip CONFIG REQUIRED) +find_package(hipdnn_data_sdk CONFIG REQUIRED) +find_package(hipdnn_plugin_sdk CONFIG REQUIRED) +find_package(hipdnn_frontend CONFIG REQUIRED) +find_package(hipdnn_test_sdk CONFIG REQUIRED) +find_package(Fusilli CONFIG REQUIRED) +# Fetch GTest if system install isn't available +find_package(GTest CONFIG QUIET) +if(NOT GTest_FOUND) + message(STATUS "GTest not found on system: Fetching from GitHub") + FetchContent_Declare( + googletest + QUIET + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG v1.16.0 + ) + FetchContent_MakeAvailable(googletest) +endif() + + +# Global constants +set(FUSILLI_PLUGIN_TARGET fusilli_plugin) +# Relative path from build/install prefix to plugin location. +# HIPDNN_PLUGIN_ENGINE_SUBDIR is provided by hipdnn_sdk. +set(FUSILLI_PLUGIN_RELATIVE_PATH "${CMAKE_INSTALL_LIBDIR}/${HIPDNN_PLUGIN_ENGINE_SUBDIR}") + +# Useful compile warnings +list(PREPEND FUSILLI_PLUGIN_WARNING_COMPILE_OPTIONS + -Werror # Treat all warnings as errors + -Wall # Enable most common warnings + -Wextra # Enable additional warnings not covered by -Wall + -Wpedantic # Enforce strict ISO C++ compliance + -Wshadow # Warn about variable shadowing + -Wnon-virtual-dtor # Warn if a class with virtual functions has a non-virtual destructor + -Wold-style-cast # Warn about C-style casts + -Wcast-align # Warn about potential performance issues with misaligned casts + -Woverloaded-virtual # Warn if a base class function is hidden by a derived class function with the same name + -Wconversion # Warn about implicit type conversions that may alter a value + -Wsign-conversion # Warn about implicit conversions between signed and unsigned types + -Wnull-dereference # Warn about dereferencing null pointers + -Wdouble-promotion # Warn when a float is implicitly promoted to a double + -Wformat=2 # Enable stricter format string checks + -Winit-self # Warn about variables initialized with itself + -Wunreachable-code # Warn about unreachable code + -Wno-error=unused-command-line-argument # Disable error for unused command line arguments + -Wno-gnu-statement-expression # Disable gnu error for statement expression, this will need to change on Windows. + -Wswitch-default # Warn if a switch statement does not have a default case +) + +# Includes +include_directories(include) + +# Plugin definition +add_library(${FUSILLI_PLUGIN_TARGET} SHARED + src/fusilli_plugin.cpp +) +target_compile_options(${FUSILLI_PLUGIN_TARGET} PRIVATE ${FUSILLI_PLUGIN_WARNING_COMPILE_OPTIONS}) +target_link_libraries(${FUSILLI_PLUGIN_TARGET} PRIVATE hipdnn_plugin_sdk hipdnn_data_sdk hip::host fusilli::fusilli) +set_target_properties(${FUSILLI_PLUGIN_TARGET} PROPERTIES + C_VISIBILITY_PRESET hidden + CXX_VISIBILITY_PRESET hidden + VISIBILITY_INLINES_HIDDEN ON + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${FUSILLI_PLUGIN_RELATIVE_PATH}" +) +# Version script hides all symbol definitions not staring with "hipdnn" +target_link_options( + ${FUSILLI_PLUGIN_TARGET} PRIVATE "LINKER:--version-script=${CMAKE_SOURCE_DIR}/exports.map" +) + +# Installation +install(TARGETS ${FUSILLI_PLUGIN_TARGET} + LIBRARY DESTINATION "${FUSILLI_PLUGIN_RELATIVE_PATH}" +) + +# Tests +enable_testing() +add_subdirectory(test) diff --git a/dnn-providers/fusilli-provider/README.md b/dnn-providers/fusilli-provider/README.md new file mode 100644 index 00000000000..7a3af43e789 --- /dev/null +++ b/dnn-providers/fusilli-provider/README.md @@ -0,0 +1,7 @@ +# Fusilli Plugin + +Fusilli-Plugin: A Fusilli/IREE powered hipDNN plugin for graph JIT compilation. + +:construction: **This project is under active development, many things don't work yet** :construction: + +The plugin builds as a shared library (`fusilli_plugin.so`) providing a `hipDNN` [kernel engine plugin](https://github.com/ROCm/hipDNN/blob/develop/docs/PluginDevelopment.md#creating-a-kernel-engine-plugin) [API](https://github.com/ROCm/hipDNN/blob/839cf6c4bc6fe403d0ef72cb5d7df004e2004743/sdk/include/hipdnn_sdk/plugin/EnginePluginApi.h). diff --git a/dnn-providers/fusilli-provider/build_tools/FusilliPluginTestUtils.cmake b/dnn-providers/fusilli-provider/build_tools/FusilliPluginTestUtils.cmake new file mode 100644 index 00000000000..df364df14c8 --- /dev/null +++ b/dnn-providers/fusilli-provider/build_tools/FusilliPluginTestUtils.cmake @@ -0,0 +1,134 @@ +# Copyright 2025 Advanced Micro Devices, Inc. +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#===------------------------------------------------------------------------===# +# +# This file contains test utilities for Fusilli's hipDNN plugin. +# +# TheRock CI generates test, lib, dbg, etc. artifacts from the build - CI tests +# are run on separate runners using a downloaded version of a project's test +# artifacts. This approach allows the build to use a cheaper CPU-only machine, +# and the tests for each project to run in parallel on more expensive GPU +# machines (after the build has completed). +# +# In TheRock the pattern is to install tests as well as a generated CTest script +# in the test artifact. When the artifact is downloaded to an isolated runner, +# CTest can simply be pointed at the generated script + pre-built tests. +# +# This file provides `add_fusilli_plugin_test()` to wrap up boilerplate for test +# setup + test installation + test script generation. +# +#===------------------------------------------------------------------------===# + +include(GoogleTest) + +# Set up the installed CTestTestfile.cmake. We add a ".install" postfix to +# differentiate from CTestTestfile.cmake generated by GTest during the build. +# Note: we can't simply use the GTest version as the paths are absolute, the +# test script will need to be installed (and therefore relocated) so must use +# relative paths. +set(_FUSILLI_PLUGIN_INSTALLED_CTEST_FILE "${CMAKE_CURRENT_BINARY_DIR}/CTestTestfile.cmake.install") + +file(WRITE "${_FUSILLI_PLUGIN_INSTALLED_CTEST_FILE}" + "# Autogenerated CTestTestfile for installed fusilli-plugin tests\n" + "# Generated by fusilli-plugin build system\n\n" +) + +install( + FILES "${_FUSILLI_PLUGIN_INSTALLED_CTEST_FILE}" + DESTINATION "${CMAKE_INSTALL_BINDIR}/fusilli_plugin_test_infra" + RENAME CTestTestfile.cmake +) + +# Creates a fusilli plugin test. +# +# To support build configurations (such as TheRock) that require tests to run on +# a different machine from where they were built, add_fusilli_plugin_test builds +# and _installs_ tests + CTest runner scripts +# +# add_fusilli_plugin_test( +# NAME +# SRCS [ ...] +# DEPS [ ...] +# [COMPILE_DEFS [ ...]] +# ) +# +# NAME +# The name of the test executable (required). +# +# SRCS +# Source files to compile into the executable (required). +# +# DEPS +# Library dependencies to be linked to this target. +# +# COMPILE_DEFS +# Compile definitions to add to the target. +function(add_fusilli_plugin_test) + cmake_parse_arguments( + ARG # prefix + "" # options + "NAME" # one value keywords + "SRCS;DEPS;COMPILE_DEFS" # multi-value keywords + ${ARGN} # extra arguments + ) + + if(NOT DEFINED ARG_NAME) + message(FATAL_ERROR "add_fusilli_plugin_test: NAME is required") + endif() + + if(NOT DEFINED ARG_SRCS) + message(FATAL_ERROR "add_fusilli_plugin_test: SRCS is required") + endif() + + # Create executable + add_executable(${ARG_NAME} ${ARG_SRCS}) + + # When installed all tests are at the same level in the file hierarchy, ensure + # tests are at the same level in the build directory as well + set_target_properties(${ARG_NAME} PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/test" + ) + + # Set compile options + target_compile_options(${ARG_NAME} PRIVATE ${FUSILLI_PLUGIN_WARNING_COMPILE_OPTIONS}) + + # Link dependencies + target_link_libraries(${ARG_NAME} PRIVATE ${ARG_DEPS}) + + # Add compile definitions if provided + if(DEFINED ARG_COMPILE_DEFS) + target_compile_definitions(${ARG_NAME} PRIVATE ${ARG_COMPILE_DEFS}) + endif() + + # Register with CTest + gtest_discover_tests(${ARG_NAME} + PROPERTIES + ENVIRONMENT "HIPDNN_LOG_LEVEL=info" + ENVIRONMENT "FUSILLI_LOG_INFO=1" + ENVIRONMENT "FUSILLI_LOG_FILE=stdout" + ) + + # Install test executable + install(TARGETS ${ARG_NAME} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + ) + + # Append to installed CTestTestfile.cmake. Paths are relative as tests get + # relocated on install. + file(APPEND "${_FUSILLI_PLUGIN_INSTALLED_CTEST_FILE}" + "add_test(${ARG_NAME} \"../${ARG_NAME}\")\n" + ) + file(APPEND "${_FUSILLI_PLUGIN_INSTALLED_CTEST_FILE}" + "\n# Set environment variables\n" + "set_tests_properties(${ARG_NAME}\n" + " PROPERTIES\n" + " ENVIRONMENT \"HIPDNN_LOG_LEVEL=info\"\n" + " ENVIRONMENT \"FUSILLI_LOG_INFO=1\"\n" + " ENVIRONMENT \"FUSILLI_LOG_FILE=stdout\"\n" + ")\n" + ) +endfunction() diff --git a/dnn-providers/fusilli-provider/build_tools/ThePebble.py b/dnn-providers/fusilli-provider/build_tools/ThePebble.py new file mode 100644 index 00000000000..1aa95576665 --- /dev/null +++ b/dnn-providers/fusilli-provider/build_tools/ThePebble.py @@ -0,0 +1,574 @@ +""" +ThePebble - A simulacrum of TheRock for fusilli's hipDNN plugin dev/CI +environment setup. + +ThePebble composes build artifacts into one distribution directory at +$HOME/.cache/ThePebble/dist. The dist directory resembles what TheRock provides +when building fusilli's hipDNN plugin - namely a composition of the installed +build artifacts for the plugin's declared dependencies. + +TODO: when multi-stage build lands in TheRock investigate simply running the +iree-libs phase of TheRock. + +usage: + --setup + Installs plugin dependencies and creates a CMakeUserPresets.json for local + development. After running --setup, configure the plugin with: + cmake --preset thepebble + + Dependencies installed: + - Hip. For the hip dependency ThePebble takes an approach suggested in + TheRock's RELEASES.md and uses TheRock's CI scripts `fetch_artifacts.py` + and `install_rocm_from_artifacts.py` to install from artifacts built by + TheRock. We use the more granular artifacts (used primarily by tests) + rather than the monolithic tarball. The granular artifacts allow us to + install hip without ending up with duplicate copies of fusilli-plugin - + ThePebble and TheRock both build one. The granular approach also allows + us to compose final installed artifacts from multiple builds - we can use + hip from an older build if there was a regression, and hipDNN from a + newer build if there was an API update. + + TheRock's scripts require a github run ID. This is configured in + thepebble_config.toml as `versions.hip_run_id`. To find a run ID go to + https://github.com/ROCm/TheRock/actions - each action has the run id in + the URL. A run ID can come from a PR or a nightly release build from + `main`. Note: Filtering https://github.com/ROCm/TheRock/actions by + "scheduled" events will display only nightly release builds, if that's + what you're looking for. + + - HipDNN. Currently ThePebble builds + installs hipDNN from source in + rocm-libraries. But, the source approach can and should be augmented with + an alternative option to fetch hipDNN artifacts from TheRock like we do + for hip (with an independent run id). Currently hipDNN has a CMake bug + that crashes the build when using TheRock's artifacts; it contains a + hardcoded path that won't exist if outside of build machine for TheRock. + + - IREE runtime. This is the black sheep. As fusilli and the plugin each build + IREE from source internally this dependency isn't built and installed to + dist, it just exists at a path where fusilli + the plugin can find it. + ThePebble provides IREE's path to fusilli and the plugin through + -DIREE_SOURCE_DIR CMake Cache variable (via build flags and cmake preset + respectively). + + - Fusilli. The plugin builds independently from fusilli itself - the plugin + may not live in the fusilli repo long term, so is designed to be easily + relocatable. The current version of fusilli is built + installed into dist + using IREE runtime fetched by ThePebble. + + --ci-install-and-test-fusilli-plugin + Builds and installs the fusilli plugin into the dist directory, then runs + TheRock's test script for fusilli plugin. + + Note: This installs the plugin into the same dist directory as dependencies + created with --setup, which may be annoying for local development - you'll + end up with two versions of fusilli_pluggin.so running around (one in dist + and one in your local build folder). For local development it's probably + easiest to run the tests from your local build. TheRock doesn't run tests + using ctest on build folder, but bugs due to test environment setup should + be rare. +""" + +import argparse +import json +import os +import shutil +import subprocess +import sys +import tempfile +import tomllib +import venv +from pathlib import Path + +PEBBLE_DIR = Path.home() / ".cache" / "ThePebble" +INSTALL_DIR = PEBBLE_DIR / "dist" +CACHED_CONFIG = PEBBLE_DIR / "_copy_of_thepebble_config_for_cache_invalidation.toml" +THEROCK_REPO = "https://github.com/ROCm/TheRock.git" +THEROCK_DIR = PEBBLE_DIR / "TheRock" +ROCM_LIBRARIES_REPO = "https://github.com/ROCm/rocm-libraries.git" +IREE_REPO = "https://github.com/iree-org/iree.git" +IREE_DIR = PEBBLE_DIR / "iree" +IREE_SUBMODULES = ["third_party/flatcc", "third_party/benchmark"] +HIPDNN_SRC_DIR = PEBBLE_DIR / "rocm-libraries" + +# ============================================================================== +# Utils +# ============================================================================== + + +def load_config() -> dict: + """Load configuration from thepebble_config.toml.""" + config_path = Path(__file__).parent / "thepebble_config.toml" + with open(config_path, "rb") as f: + return tomllib.load(f) + + +def get_fusilli_dir() -> Path: + """Get the fusilli source directory (parent of hipdnn-plugin).""" + # ThePebble.py is in fusilli/plugins/hipdnn-plugin/build_tools/ + return Path(__file__).parent.parent.parent.parent + + +def get_iree_git_tag() -> str: + """Read IREE version from the root version.json (single source of truth).""" + version_json = get_fusilli_dir() / "version.json" + with open(version_json) as f: + data = json.load(f) + return data["iree-version"] + + +def get_plugin_dir() -> Path: + """Get the hipdnn-plugin directory.""" + # ThePebble.py is in fusilli/plugins/hipdnn-plugin/build_tools/ + return Path(__file__).parent.parent + + +def validate_config(): + """Check cache exists and config matches. Error if mismatch.""" + if not CACHED_CONFIG.exists(): + sys.exit("Error: No cached config. Run --setup first.") + + current_config = load_config() + with open(CACHED_CONFIG, "rb") as f: + cached_config = tomllib.load(f) + + if current_config != cached_config: + sys.exit("Error: Config mismatch. Re-run --setup to update.") + + +# ============================================================================== +# Setup +# ============================================================================== + + +def setup_therock(git_ref: str): + """Clone TheRock and set up venv (ThePebble only uses python scripts)""" + print(f"Cloning TheRock at {git_ref}...") + subprocess.run( + [ + "git", + "clone", + "--depth=1", + "--branch", + git_ref, + THEROCK_REPO, + str(THEROCK_DIR), + ], + check=True, + ) + + # Set up venv + print("Setting up TheRock venv...") + venv_dir = THEROCK_DIR / ".venv" + subprocess.run(["python3", "-m", "venv", str(venv_dir)], check=True) + pip = venv_dir / "bin" / "pip" + subprocess.run( + [str(pip), "install", "-r", str(THEROCK_DIR / "requirements.txt")], + check=True, + ) + subprocess.run( + [str(pip), "install", "-r", str(THEROCK_DIR / "requirements-test.txt")], + check=True, + ) + + +def install_hip(run_id: str): + """Download and install Hip artifacts using install_rocm_from_artifacts.py.""" + venv_python = THEROCK_DIR / ".venv" / "bin" / "python" + + # Use TheRock's install_rocm_from_artifacts.py + # --run-github-repo is needed to override GITHUB_REPOSITORY env var in CI + cmd = [ + str(venv_python), + str(THEROCK_DIR / "build_tools" / "install_rocm_from_artifacts.py"), + "--run-id", + run_id, + "--run-github-repo", + "ROCm/TheRock", + "--artifact-group", + "generic", + "--output-dir", + str(INSTALL_DIR), + "--base-only", + ] + print(f"Fetching Hip artifacts from run {run_id}...") + subprocess.run(cmd, check=True) + + # Fetch amd-llvm_dev (not sure why this isn't included in "base") + cmd = [ + str(venv_python), + str(THEROCK_DIR / "build_tools" / "fetch_artifacts.py"), + "--run-id", + run_id, + "--run-github-repo", + "ROCm/TheRock", + "--artifact-group", + "generic", + "--output-dir", + str(INSTALL_DIR), + "--flatten", + "amd-llvm_dev", + ] + print(f"Fetching amd-llvm_dev artifact...") + subprocess.run(cmd, check=True) + + +def build_hipdnn(git_ref: str): + """Build and install hipDNN from rocm-libraries sparse checkout.""" + # Sparse checkout of rocm-libraries + print(f"Sparse checkout of rocm-libraries at {git_ref}...") + subprocess.run( + [ + "git", + "clone", + "--no-checkout", + "--filter=blob:none", + ROCM_LIBRARIES_REPO, + str(HIPDNN_SRC_DIR), + ], + check=True, + ) + subprocess.run( + ["git", "sparse-checkout", "init", "--cone"], + cwd=HIPDNN_SRC_DIR, + check=True, + ) + subprocess.run( + ["git", "sparse-checkout", "set", "projects/hipdnn"], + cwd=HIPDNN_SRC_DIR, + check=True, + ) + subprocess.run(["git", "checkout", git_ref], cwd=HIPDNN_SRC_DIR, check=True) + + # Build inside projects/hipdnn so IDEs auto-discover compile_commands.json + hipdnn_project_dir = HIPDNN_SRC_DIR / "projects" / "hipdnn" + hipdnn_build_dir = hipdnn_project_dir / "build" + print(f"Building hipDNN from {hipdnn_project_dir}...") + + cmake_args = [ + "cmake", + "-G", + "Ninja", + "-S", + str(hipdnn_project_dir), + "-B", + str(hipdnn_build_dir), + f"-DCMAKE_INSTALL_PREFIX={INSTALL_DIR}", + f"-DCMAKE_PREFIX_PATH={INSTALL_DIR}", + "-DCMAKE_BUILD_TYPE=Debug", + "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", + "-DHIP_PLATFORM=amd", + "-DHIP_DNN_BUILD_PLUGINS=OFF", + # Headers are already checked into git, no need to re-generate them + # unless you're changing the schema. + "-DHIP_DNN_GENERATE_SDK_HEADERS=OFF", + "-DENABLE_CLANG_TIDY=OFF", + "-DENABLE_CLANG_FORMAT=OFF", + "-DHIPDNN_FRONTEND_SKIP_JSON_LIB=ON", + ] + subprocess.run(cmake_args, check=True) + + # Build and install + subprocess.run(["cmake", "--build", str(hipdnn_build_dir)], check=True) + subprocess.run(["cmake", "--install", str(hipdnn_build_dir)], check=True) + + +def setup_iree(tag: str): + """Clone IREE at a tag and fetch required submodules""" + print(f"Cloning IREE at tag {tag}...") + subprocess.run( + ["git", "clone", "--depth=1", "--branch", tag, IREE_REPO, str(IREE_DIR)], + check=True, + ) + + # Fetch only required submodules + print(f"Fetching IREE submodules: {IREE_SUBMODULES}") + for submodule in IREE_SUBMODULES: + subprocess.run( + ["git", "submodule", "update", "--init", "--depth=1", submodule], + cwd=IREE_DIR, + check=True, + ) + + +def build_fusilli(): + """Build and install fusilli from source.""" + fusilli_src = get_fusilli_dir() + + with tempfile.TemporaryDirectory() as tmpdir: + fusilli_build = Path(tmpdir) + print(f"Building fusilli from {fusilli_src}...") + + # Configure fusilli - based on TheRock's CMake args + cmake_args = [ + "cmake", + "-G", + "Ninja", + "-S", + str(fusilli_src), + "-B", + str(fusilli_build), + f"-DCMAKE_INSTALL_PREFIX={INSTALL_DIR}", + f"-DCMAKE_PREFIX_PATH={INSTALL_DIR}", + "-DCMAKE_BUILD_TYPE=Release", + "-DFUSILLI_BUILD_TESTS=OFF", + "-DFUSILLI_BUILD_BENCHMARKS=OFF", + "-DFUSILLI_SYSTEMS_AMDGPU=ON", + "-DFUSILLI_CODE_COVERAGE=OFF", + "-DFUSILLI_ENABLE_LOGGING=OFF", + "-DFUSILLI_ENABLE_CLANG_TIDY=OFF", + f"-DIREE_SOURCE_DIR={IREE_DIR}", + "-DHIP_PLATFORM=amd", + "-DIREE_USE_SYSTEM_DEPS=ON", + ] + subprocess.run(cmake_args, check=True) + + # Build and install + subprocess.run(["cmake", "--build", str(fusilli_build)], check=True) + subprocess.run(["cmake", "--install", str(fusilli_build)], check=True) + + +def generate_cmake_user_presets(): + """Generate CMakeUserPresets.json in the hipdnn-plugin directory.""" + plugin_dir = get_plugin_dir() + llvm_bin = INSTALL_DIR / "lib" / "llvm" / "bin" + + presets = { + "version": 6, + "configurePresets": [ + { + "name": "thepebble", + "generator": "Ninja", + "binaryDir": "${sourceDir}/build", + "cacheVariables": { + "CMAKE_C_COMPILER": str(llvm_bin / "clang"), + "CMAKE_CXX_COMPILER": str(llvm_bin / "clang++"), + "CMAKE_PREFIX_PATH": str(INSTALL_DIR), + "IREE_SOURCE_DIR": str(IREE_DIR), + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "IREE_USE_SYSTEM_DEPS": "ON", + "HIP_PLATFORM": "amd", + }, + } + ], + } + + presets_path = plugin_dir / "CMakeUserPresets.json" + print(f"Writing {presets_path}...") + with open(presets_path, "w") as f: + json.dump(presets, f, indent=2) + f.write("\n") + + +def provide_iree_tools(iree_version: str): + """Pip install iree-base-compiler and symlink IREE tools into dist/. + + TheRock builds libIREECompiler.so and installs it to dist/lib/; ThePebble + gets it from pip's iree-base-compiler instead and symlinks it into dist/ + so TheRock's test scripts can find it.""" + # Create venv and pip install iree-base-compiler + venv_dir = PEBBLE_DIR / ".venv" + print(f"Creating venv at {venv_dir}...") + venv.EnvBuilder(with_pip=True, prompt="ThePebble").create(venv_dir) + + pip = venv_dir / "bin" / "pip" + print(f"Installing iree-base-compiler=={iree_version}...") + subprocess.run( + [ + str(pip), + "install", + "--find-links", + "https://iree.dev/pip-release-links.html", + f"iree-base-compiler=={iree_version}", + ], + check=True, + ) + + # Symlink libIREECompiler.so into dist/lib/ + venv_python = venv_dir / "bin" / "python" + result = subprocess.run( + [ + str(venv_python), + "-c", + "import pathlib, iree.compiler._mlir_libs;" + " print(pathlib.Path(iree.compiler._mlir_libs.__file__).parent" + " / 'libIREECompiler.so')", + ], + capture_output=True, + text=True, + check=True, + ) + iree_compiler_lib = Path(result.stdout.strip()) + lib_symlink = INSTALL_DIR / "lib" / "libIREECompiler.so" + lib_symlink.unlink(missing_ok=True) + print(f"Symlinking {lib_symlink} -> {iree_compiler_lib}") + lib_symlink.symlink_to(iree_compiler_lib) + + # Symlink iree-compile binary into dist/bin/ + iree_compile_src = venv_dir / "bin" / "iree-compile" + bin_symlink = INSTALL_DIR / "bin" / "iree-compile" + bin_symlink.unlink(missing_ok=True) + print(f"Symlinking {bin_symlink} -> {iree_compile_src}") + bin_symlink.symlink_to(iree_compile_src) + + +def generate_local_environment_setup(): + """Generate an 'activate' script to set up the local machine with correct + $PATH and $LD_LIBRARY_PATH to use ThePebble installed programs.""" + bin_dir = INSTALL_DIR / "bin" + lib_dir = INSTALL_DIR / "lib" + script_content = f"""#!/bin/bash +# ThePebble environment activation script +# Usage: source {PEBBLE_DIR}/activate + +if [[ "${{BASH_SOURCE[0]}}" == "${{0}}" ]]; then + echo "Error: This script must be sourced, not executed." + echo "Usage: source {PEBBLE_DIR}/activate" + exit 1 +fi + +export PATH="{bin_dir}:$PATH" +export LD_LIBRARY_PATH="{lib_dir}:$LD_LIBRARY_PATH" + +echo "ThePebble environment activated." +""" + + activate_path = PEBBLE_DIR / "activate" + print(f"Writing {activate_path}...") + with open(activate_path, "w") as f: + f.write(script_content) + + +# ============================================================================== +# CI install and test fusilli-plugin +# ============================================================================== + + +def build_fusilli_plugin(): + """Build and install fusilli plugin to dist.""" + plugin_src = get_plugin_dir() + llvm_bin = INSTALL_DIR / "lib" / "llvm" / "bin" + + with tempfile.TemporaryDirectory() as tmpdir: + plugin_build = Path(tmpdir) + print(f"Building fusilli plugin from {plugin_src}...") + + cmake_args = [ + "cmake", + "-G", + "Ninja", + "-S", + str(plugin_src), + "-B", + str(plugin_build), + f"-DCMAKE_C_COMPILER={llvm_bin / 'clang'}", + f"-DCMAKE_CXX_COMPILER={llvm_bin / 'clang++'}", + f"-DCMAKE_INSTALL_PREFIX={INSTALL_DIR}", + f"-DCMAKE_PREFIX_PATH={INSTALL_DIR}", + "-DCMAKE_BUILD_TYPE=Release", + f"-DIREE_SOURCE_DIR={IREE_DIR}", + "-DIREE_USE_SYSTEM_DEPS=ON", + "-DHIP_PLATFORM=amd", + ] + subprocess.run(cmake_args, check=True) + subprocess.run(["cmake", "--build", str(plugin_build)], check=True) + subprocess.run(["cmake", "--install", str(plugin_build)], check=True) + + +def test_fusilli_plugin(): + """Run test_fusilli_plugin.py from TheRock.""" + # The test script expects THEROCK_BIN_DIR to point to the bin/ directory + bin_dir = INSTALL_DIR / "bin" + + # Create iree_tag_for_pip.txt. + # TheRock/iree-libs/post_hook_fusilliprovider.cmake would create this file + # when building in TheRock. + iree_version = get_iree_git_tag() + iree_tag_file = bin_dir / "fusilli_plugin_test_infra" / "iree_tag_for_pip.txt" + iree_tag_file.write_text(iree_version) + print(f"Created {iree_tag_file} with version {iree_version}") + + # Run TheRock's test_fusilliprovider.py + therock_dir = PEBBLE_DIR / "TheRock" + test_script = ( + therock_dir + / "build_tools" + / "github_actions" + / "test_executable_scripts" + / "test_fusilliprovider.py" + ) + + env = os.environ.copy() + env["THEROCK_BIN_DIR"] = str(bin_dir) + + # iree-libs/post_hook_fusilliprovider.cmake sets up RPATHs so that a .so in + # "lib/hipdnn_plugins/engines" will be found by tests that + # fusilli_plugin.so, and so that + # "lib/hipdnn_plugins/engines/fusilli_plugin.so" can find hip .so's in lib. + # In ThePebble we just use LD_LIBRARY_PATH. + lib_dir = INSTALL_DIR / "lib" + plugin_lib_dir = lib_dir / "hipdnn_plugins" / "engines" + ld_path = f"{lib_dir}:{plugin_lib_dir}" + if "LD_LIBRARY_PATH" in env: + ld_path = f"{ld_path}:{env['LD_LIBRARY_PATH']}" + env["LD_LIBRARY_PATH"] = ld_path + + print(f"Running {test_script}...") + subprocess.run(["python3", str(test_script)], env=env, check=True) + + +def main(): + parser = argparse.ArgumentParser( + description="ThePebble a simulacrum of TheRock for fusilli plugin dev environment setup" + ) + parser.add_argument( + "--setup", + action="store_true", + help="Setup deps as TheRock would, and crate CMake preset for local dev", + ) + parser.add_argument( + "--ci-install-and-test-fusilli-plugin", + action="store_true", + help="Build + install + test the plugin using TheRock's test script", + ) + args = parser.parse_args() + + if not args.setup and not args.ci_install_and_test_fusilli_plugin: + parser.print_help() + sys.exit(1) + + if args.setup: + config = load_config() + versions = config["versions"] + + # Start fresh + if PEBBLE_DIR.exists(): + print(f"Removing previous setup {PEBBLE_DIR}...") + shutil.rmtree(PEBBLE_DIR) + + # Run setup + PEBBLE_DIR.mkdir(parents=True, exist_ok=True) + setup_therock(versions["therock_git_ref"]) + install_hip(versions["hip_run_id"]) + build_hipdnn(versions["hipdnn_git_ref"]) + setup_iree(f"iree-{get_iree_git_tag()}") + build_fusilli() + generate_cmake_user_presets() + provide_iree_tools(get_iree_git_tag()) + generate_local_environment_setup() + + # Copy config to cache for validation checks + config_src = Path(__file__).parent / "thepebble_config.toml" + shutil.copy(config_src, CACHED_CONFIG) + + print(f"\nSetup complete.") + print(f"To activate the ThePebble local dev environment, run:") + print(f" source {PEBBLE_DIR}/activate") + + if args.ci_install_and_test_fusilli_plugin: + validate_config() + build_fusilli_plugin() + test_fusilli_plugin() + + +if __name__ == "__main__": + main() diff --git a/dnn-providers/fusilli-provider/build_tools/thepebble_config.toml b/dnn-providers/fusilli-provider/build_tools/thepebble_config.toml new file mode 100644 index 00000000000..513ab9f2707 --- /dev/null +++ b/dnn-providers/fusilli-provider/build_tools/thepebble_config.toml @@ -0,0 +1,9 @@ +[versions] +# Git ref (commit/tag/branch) for TheRock tooling +therock_git_ref = "main" + +# GitHub CI run ID for Hip artifacts (from ROCm/TheRock actions) +hip_run_id = "19089392286" + +# Git ref (commit/tag/branch) for rocm-libraries hipDNN +hipdnn_git_ref = "develop" diff --git a/dnn-providers/fusilli-provider/exports.map b/dnn-providers/fusilli-provider/exports.map new file mode 100644 index 00000000000..b8b178be5a3 --- /dev/null +++ b/dnn-providers/fusilli-provider/exports.map @@ -0,0 +1,8 @@ +/* + * Version script for fusilli_plugin + * Export only the hipDNN plugin interface symbols + */ +{ + global: hipdnn*; + local: *; +}; diff --git a/dnn-providers/fusilli-provider/include/graph_import.h b/dnn-providers/fusilli-provider/include/graph_import.h new file mode 100644 index 00000000000..7cf0e68f6ed --- /dev/null +++ b/dnn-providers/fusilli-provider/include/graph_import.h @@ -0,0 +1,429 @@ +// Copyright 2025 Advanced Micro Devices, Inc. +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +//===----------------------------------------------------------------------===// +// +// This file contains facilities for converting hipDNN serialized graphs to +// fusilli graphs. +// +//===----------------------------------------------------------------------===// + +#ifndef FUSILLI_PLUGIN_SRC_GRAPH_IMPORT_H +#define FUSILLI_PLUGIN_SRC_GRAPH_IMPORT_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "hipdnn_engine_plugin_execution_context.h" + +// Convert from hipDNN DataType to fusilli DataType. +inline fusilli::ErrorOr hipDnnDataTypeToFusilliDataType( + hipdnn_data_sdk::data_objects::DataType hipdnnType) { + switch (hipdnnType) { + case hipdnn_data_sdk::data_objects::DataType::HALF: + return ok(fusilli::DataType::Half); + case hipdnn_data_sdk::data_objects::DataType::BFLOAT16: + return ok(fusilli::DataType::BFloat16); + case hipdnn_data_sdk::data_objects::DataType::FLOAT: + return ok(fusilli::DataType::Float); + case hipdnn_data_sdk::data_objects::DataType::DOUBLE: + return ok(fusilli::DataType::Double); + case hipdnn_data_sdk::data_objects::DataType::UINT8: + return ok(fusilli::DataType::Uint8); + case hipdnn_data_sdk::data_objects::DataType::INT32: + return ok(fusilli::DataType::Int32); + case hipdnn_data_sdk::data_objects::DataType::UNSET: + return ok(fusilli::DataType::NotSet); + default: + return error(fusilli::ErrorCode::RuntimeFailure, + "Unknown type in hipdnn -> fusilli graph translation."); + } +} + +#define FUSILLI_PLUGIN_POINTWISE_CASE(CASE) \ + case hipdnn_data_sdk::data_objects::PointwiseMode::CASE: \ + return fusilli::PointwiseAttr::Mode::CASE; + +// Convert from hipDNN PointwiseMode to fusilli PointwiseAttr::Mode. +inline fusilli::ErrorOr +hipDnnPointwiseModeToFusilliMode( + hipdnn_data_sdk::data_objects::PointwiseMode hipdnnMode) { + switch (hipdnnMode) { + FUSILLI_POINTWISE_OPS(FUSILLI_PLUGIN_POINTWISE_CASE) + default: + return error(fusilli::ErrorCode::NotImplemented, + "Unsupported pointwise mode."); + } +} + +// Graph import is done through importGraph function, this class exists for +// organization and is used by importGraph. +// +// Graph import is designed around individual Node import functions (such as +// importConvFPropAttr) which convert a given node type, and track input and +// output tensors in shared state (via importNodeInput and importNodeOutput +// functions). Graph nodes are processed in topological order to ensure that +// outputs of producer nodes are tracked and available for consuming nodes. +// +// NOTE: inputs should already be topologically sorted, hipDNN's +// Graph::validate() includes a topological sort. +class GraphImport { +private: + friend fusilli::ErrorOr + importGraph(const hipdnnPluginConstData_t *opGraph); + + // The imported graph. + fusilli::Graph fusilliGraph; + + // Maps hipDNN tensor UIDs to fusilli::TensorAttrs for graph boundary tensors + // (inputs and outputs). Used by hipdnnEnginePluginExecuteOpGraph to match + // incoming device buffers (identified by UID) to their corresponding + // fusilli::TensorAttr. + std::unordered_map> + uidToIOTensor; + + // Maps hipDNN tensor UIDs to fusilli::TensorAttrs for intermediate (virtual) + // tensors. These are outputs of one node that serve as inputs to another. + std::unordered_map> + uidToVirtualTensor; + + // Helper class for reading from flatbuffer. + hipdnn_plugin_sdk::GraphWrapper opGraphWrapper; + + GraphImport(const hipdnnPluginConstData_t *opGraph) + : opGraphWrapper(opGraph->ptr, opGraph->size) {} + + fusilli::ErrorObject importGraph() { + const hipdnn_data_sdk::data_objects::Graph &hipDnnGraph = + opGraphWrapper.getGraph(); + + // Import graph level properties. + fusilli::DataType ioDataType; + FUSILLI_ASSIGN_OR_RETURN(ioDataType, hipDnnDataTypeToFusilliDataType( + hipDnnGraph.io_data_type())); + fusilli::DataType intermediateDataType; + FUSILLI_ASSIGN_OR_RETURN( + intermediateDataType, + hipDnnDataTypeToFusilliDataType(hipDnnGraph.intermediate_data_type())); + fusilli::DataType computeDataType; + FUSILLI_ASSIGN_OR_RETURN( + computeDataType, + hipDnnDataTypeToFusilliDataType(hipDnnGraph.compute_data_type())); + fusilliGraph.setName(hipDnnGraph.name()->str()) + .setIODataType(ioDataType) + .setIntermediateDataType(intermediateDataType) + .setComputeDataType(computeDataType); + + return importNodes(); + } + + // Import all graph nodes. + fusilli::ErrorObject importNodes() { + for (uint32_t i = 0; i < opGraphWrapper.nodeCount(); ++i) { + const hipdnn_data_sdk::data_objects::Node &node = + opGraphWrapper.getNode(i); + FUSILLI_CHECK_ERROR(importNode(node)); + } + + return fusilli::ok(); + } + + // Import single graph node. + fusilli::ErrorObject + importNode(const hipdnn_data_sdk::data_objects::Node &node) { + switch (node.attributes_type()) { + case hipdnn_data_sdk::data_objects::NodeAttributes:: + ConvolutionFwdAttributes: + FUSILLI_CHECK_ERROR( + importConvFPropAttr(node.attributes_as_ConvolutionFwdAttributes())); + break; + case hipdnn_data_sdk::data_objects::NodeAttributes:: + ConvolutionWrwAttributes: + FUSILLI_CHECK_ERROR( + importConvWGradAttr(node.attributes_as_ConvolutionWrwAttributes())); + break; + case hipdnn_data_sdk::data_objects::NodeAttributes::PointwiseAttributes: + FUSILLI_CHECK_ERROR( + importPointwiseAttr(node.attributes_as_PointwiseAttributes())); + break; + case hipdnn_data_sdk::data_objects::NodeAttributes::MatmulAttributes: + FUSILLI_CHECK_ERROR( + importMatmulAttr(node.attributes_as_MatmulAttributes())); + break; + default: + return fusilli::error(fusilli::ErrorCode::NotImplemented, + "Unsupported node type."); + } + return fusilli::ok(); + } + + fusilli::ErrorObject importConvFPropAttr( + const hipdnn_data_sdk::data_objects::ConvolutionFwdAttributes + *hipDnnConvFwdAttr) { + // Import node inputs. + FUSILLI_ASSIGN_OR_RETURN( + std::shared_ptr x, + importNodeInput(hipDnnConvFwdAttr->x_tensor_uid(), "x")); + FUSILLI_ASSIGN_OR_RETURN( + std::shared_ptr w, + importNodeInput(hipDnnConvFwdAttr->w_tensor_uid(), "w")); + + // Fusilli only supports symmetric padding. + if (!std::ranges::equal(*hipDnnConvFwdAttr->pre_padding(), + *hipDnnConvFwdAttr->post_padding())) // C++20 + return fusilli::error(fusilli::ErrorCode::AttributeNotSet, + "Conv node with asymmetric padding found."); + // Import node. + auto fusilliConvFwdAttr = + fusilli::ConvFPropAttr() + .setPadding(*hipDnnConvFwdAttr->post_padding()) + .setStride(*hipDnnConvFwdAttr->stride()) + .setDilation(*hipDnnConvFwdAttr->dilation()); + std::shared_ptr y = + fusilliGraph.convFProp(x, w, fusilliConvFwdAttr); + + // Import node output. + FUSILLI_CHECK_ERROR( + importNodeOutput(hipDnnConvFwdAttr->y_tensor_uid(), "y", y)); + + return fusilli::ok(); + } + + fusilli::ErrorObject importConvWGradAttr( + const hipdnn_data_sdk::data_objects::ConvolutionWrwAttributes + *hipDnnConvWrwAttr) { + // Import node inputs. + FUSILLI_ASSIGN_OR_RETURN( + std::shared_ptr dy, + importNodeInput(hipDnnConvWrwAttr->dy_tensor_uid(), "dy")); + FUSILLI_ASSIGN_OR_RETURN( + std::shared_ptr x, + importNodeInput(hipDnnConvWrwAttr->x_tensor_uid(), "x")); + + // Fusilli only supports symmetric padding. + if (!std::ranges::equal(*hipDnnConvWrwAttr->pre_padding(), + *hipDnnConvWrwAttr->post_padding())) // C++20 + return fusilli::error(fusilli::ErrorCode::AttributeNotSet, + "Conv wgrad node with asymmetric padding found."); + // Import node. + auto fusilliConvWGradAttr = + fusilli::ConvWGradAttr() + .setPadding(*hipDnnConvWrwAttr->post_padding()) + .setStride(*hipDnnConvWrwAttr->stride()) + .setDilation(*hipDnnConvWrwAttr->dilation()); + std::shared_ptr dw = + fusilliGraph.convWGrad(dy, x, fusilliConvWGradAttr); + + // Import node output. + FUSILLI_CHECK_ERROR( + importNodeOutput(hipDnnConvWrwAttr->dw_tensor_uid(), "dw", dw)); + + return fusilli::ok(); + } + + fusilli::ErrorObject importPointwiseAttr( + const hipdnn_data_sdk::data_objects::PointwiseAttributes *hipDnnPwAttr) { + // Get mode and determine input count. + FUSILLI_ASSIGN_OR_RETURN( + fusilli::PointwiseAttr::Mode mode, + hipDnnPointwiseModeToFusilliMode(hipDnnPwAttr->operation())); + int requiredInputs = + fusilli::PointwiseAttr::kModeToRequiredInputCount.at(mode); + + // Import first input (always present). + FUSILLI_ASSIGN_OR_RETURN( + std::shared_ptr in0, + importNodeInput(hipDnnPwAttr->in_0_tensor_uid(), "in0")); + + // Build fusilli pointwise node. + std::shared_ptr out; + auto fusilliPwAttr = fusilli::PointwiseAttr().setMode(mode); + + switch (requiredInputs) { + case 1: + // Unary op (e.g., RELU_FWD). + out = fusilliGraph.pointwise(in0, fusilliPwAttr); + break; + case 2: { + // Binary op (e.g., ADD, MUL, SUB, DIV). + auto in1Uid = hipDnnPwAttr->in_1_tensor_uid(); + if (!in1Uid.has_value()) + return fusilli::error(fusilli::ErrorCode::AttributeNotSet, + "Binary pointwise op missing second input."); + FUSILLI_ASSIGN_OR_RETURN(std::shared_ptr in1, + importNodeInput(in1Uid.value(), "in1")); + out = fusilliGraph.pointwise(in0, in1, fusilliPwAttr); + break; + } + default: + return fusilli::error(fusilli::ErrorCode::RuntimeFailure, + "Unexpected number of inputs to pointwise op."); + } + + // Import node output. + FUSILLI_CHECK_ERROR( + importNodeOutput(hipDnnPwAttr->out_0_tensor_uid(), "out0", out)); + + return fusilli::ok(); + } + + fusilli::ErrorObject importMatmulAttr( + const hipdnn_data_sdk::data_objects::MatmulAttributes *hipDnnMatmulAttr) { + // Import node inputs. + FUSILLI_ASSIGN_OR_RETURN( + std::shared_ptr a, + importNodeInput(hipDnnMatmulAttr->a_tensor_uid(), "a")); + FUSILLI_ASSIGN_OR_RETURN( + std::shared_ptr b, + importNodeInput(hipDnnMatmulAttr->b_tensor_uid(), "b")); + + // Import node - matmul has no extra attributes. + auto fusilliMatmulAttr = fusilli::MatmulAttr(); + std::shared_ptr c = + fusilliGraph.matmul(a, b, fusilliMatmulAttr); + + // Import node output. + FUSILLI_CHECK_ERROR( + importNodeOutput(hipDnnMatmulAttr->c_tensor_uid(), "c", c)); + + return fusilli::ok(); + } + + // Import, and track, node input tensor. Node input tensor is created in the + // case of a boundary tensor, and read from shared state otherwise. + fusilli::ErrorOr> + importNodeInput(int64_t uid, const char *name) { + // Get hipDNN tensor. TensorMap is created from the graph that uid variable + // is read from, so .at() call should be safe. + const hipdnn_data_sdk::data_objects::TensorAttributes *hipDnnTensorAttr = + opGraphWrapper.getTensorMap().at(uid); + + // A virtual tensor indicates an intermediate (non-boundary) tensor. + if (hipDnnTensorAttr->virtual_()) { + // Look up the output of a previously imported node. + if (!uidToVirtualTensor.contains(uid)) + return fusilli::error(fusilli::ErrorCode::RuntimeFailure, + "Virtual tensor not found - graph may not be " + "topologically sorted."); + return ok(uidToVirtualTensor.at(uid)); + } + + // Import new tensor. + fusilli::TensorAttr fusilliTensorAttr; + if (isPassByValue(hipDnnTensorAttr)) { // handle scalar tensors + switch (hipDnnTensorAttr->value_type()) { + case hipdnn_data_sdk::data_objects::TensorValue::Float32Value: + fusilliTensorAttr = fusilli::TensorAttr( + hipDnnTensorAttr->value_as_Float32Value()->value()); + break; + case hipdnn_data_sdk::data_objects::TensorValue::Float64Value: + fusilliTensorAttr = fusilli::TensorAttr( + hipDnnTensorAttr->value_as_Float64Value()->value()); + break; + case hipdnn_data_sdk::data_objects::TensorValue::Int32Value: + fusilliTensorAttr = fusilli::TensorAttr( + hipDnnTensorAttr->value_as_Int32Value()->value()); + break; + default: + return fusilli::error( + fusilli::ErrorCode::NotImplemented, + "Unsupported scalar type in hipdnn -> fusilli graph translation."); + } + } + fusilliTensorAttr.setName(std::format("{}_{}", name, uid)); // C++20 + FUSILLI_CHECK_ERROR(importAttrs(fusilliTensorAttr, hipDnnTensorAttr)); + std::shared_ptr graphInput = + fusilliGraph.tensor(fusilliTensorAttr); + + // Scalar constants are embedded in the MLIR IR and don't need device + // buffers, so exclude them from the IO tensor map that drives variant + // pack construction at execution time. + if (!graphInput->isScalar()) + uidToIOTensor[uid] = graphInput; + + return ok(graphInput); + }; + + // Import and track node output tensor. + fusilli::ErrorObject + importNodeOutput(int64_t uid, const char *name, + const std::shared_ptr &nodeOutput) { + // Get hipDNN tensor. TensorMap is created from the graph that uid variable + // is read from, so .at() call should be safe. + const hipdnn_data_sdk::data_objects::TensorAttributes *hipDnnTensorAttr = + opGraphWrapper.getTensorMap().at(uid); + + // Import attrs. + nodeOutput->setName(std::format("{}_{}", name, uid)); // C++20 + FUSILLI_CHECK_ERROR(importAttrs(*nodeOutput, hipDnnTensorAttr)); + + // A virtual tensor indicates an intermediate (non-boundary) tensor. + if (hipDnnTensorAttr->virtual_()) { + // Check for duplicate UIDs. + if (uidToVirtualTensor.contains(uid)) + return fusilli::error( + fusilli::ErrorCode::RuntimeFailure, + "Duplicate virtual tensor UID - UIDs must be unique."); + // Track for use by downstream nodes. + uidToVirtualTensor[uid] = nodeOutput; + return fusilli::ok(); + } + + // Track boundary tensor. + uidToIOTensor[uid] = nodeOutput; + + return fusilli::ok(); + }; + + // Whether the hipDNN tensor carries a pass-by-value scalar (equivalent to + // hipDNN frontend's TensorAttributes::get_pass_by_value()). + static bool + isPassByValue(const hipdnn_data_sdk::data_objects::TensorAttributes *src) { + return src->value_type() != + hipdnn_data_sdk::data_objects::TensorValue::NONE; + } + + // Import tensor attrs (dims, strides, datatype) from hipDNN to fusilli. + fusilli::ErrorObject + importAttrs(fusilli::TensorAttr &dest, + const hipdnn_data_sdk::data_objects::TensorAttributes *src) { + FUSILLI_ASSIGN_OR_RETURN(auto dataType, + hipDnnDataTypeToFusilliDataType(src->data_type())); + dest.setIsVirtual(src->virtual_()) + .setDim(*src->dims()) + .setStride(*src->strides()) + .setDataType(dataType); + return fusilli::ok(); + } +}; + +// Given a hipDNN serialized graph, return imported fusilli::Graph and UID -> +// fusilli::TensorAttr map for IO tensors. +// +// NOTE: HipdnnEnginePluginExecutionContext used as return type because it +// contains (only) the exact required fields. If it requires more members in +// the future it's probably worth creating a new data transmission type. +inline fusilli::ErrorOr +importGraph(const hipdnnPluginConstData_t *opGraph) { + auto gc = GraphImport(opGraph); + FUSILLI_CHECK_ERROR(gc.importGraph()); + FUSILLI_CHECK_ERROR(gc.fusilliGraph.validate()); + return HipdnnEnginePluginExecutionContext{.graph = std::move(gc.fusilliGraph), + .uidToFusilliTensorAttr = + std::move(gc.uidToIOTensor)}; +} + +#endif // FUSILLI_PLUGIN_SRC_GRAPH_IMPORT_H diff --git a/dnn-providers/fusilli-provider/include/hipdnn_engine_plugin_execution_context.h b/dnn-providers/fusilli-provider/include/hipdnn_engine_plugin_execution_context.h new file mode 100644 index 00000000000..d5a38da68b0 --- /dev/null +++ b/dnn-providers/fusilli-provider/include/hipdnn_engine_plugin_execution_context.h @@ -0,0 +1,39 @@ +// Copyright 2025 Advanced Micro Devices, Inc. +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +//===----------------------------------------------------------------------===// +// +// This file contains the fusilli plugin's definition of +// HipdnnEnginePluginExecutionContext. To hipDNN this type is opaque, it deals +// in hipdnnEnginePluginExecutionContext_t which is a pointer to the undefined +// HipdnnEnginePluginExecutionContext. Each plugin must define +// HipdnnEnginePluginExecutionContext in order to create something when hipDNN +// asks for an execution context. +// +// The execution context should store what's needed to execute a given kernel +// (plan in hipDNN parlance) in a hot loop without any overhead. For fusilli +// plugin, that maps to constructing and storing a fusilli::Graph based on +// hipDNN graph. When an execution is requested, it should be a simple lookup +// for UID -> tensor attribute, then a graph execution. +// +//===----------------------------------------------------------------------===// + +#ifndef FUSILLI_PLUGIN_SRC_HIPDNN_ENGINE_PLUGIN_EXECUTION_CONTEXT_H +#define FUSILLI_PLUGIN_SRC_HIPDNN_ENGINE_PLUGIN_EXECUTION_CONTEXT_H + +#include + +struct HipdnnEnginePluginExecutionContext { + // Fusilli graph. + fusilli::Graph graph; + + // Map from hipDNN tensor UID to fusilli::TensorAttrs for graph boundary + // tensors (inputs and outputs). + std::unordered_map> + uidToFusilliTensorAttr; +}; + +#endif // FUSILLI_PLUGIN_SRC_HIPDNN_ENGINE_PLUGIN_EXECUTION_CONTEXT_H diff --git a/dnn-providers/fusilli-provider/include/hipdnn_engine_plugin_handle.h b/dnn-providers/fusilli-provider/include/hipdnn_engine_plugin_handle.h new file mode 100644 index 00000000000..61ccb87012c --- /dev/null +++ b/dnn-providers/fusilli-provider/include/hipdnn_engine_plugin_handle.h @@ -0,0 +1,82 @@ +// Copyright 2025 Advanced Micro Devices, Inc. +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +//===----------------------------------------------------------------------===// +// +// This file contains the fusilli plugin's definition of +// HipdnnEnginePluginHandle. To hipDNN this type is opaque, it deals in +// hipdnnEnginePluginHandle_t which is a pointer to the undefined +// HipdnnEnginePluginHandle. Each plugin must define HipdnnEnginePluginHandle in +// order to create something when hipDNN asks for an plugin handle. +// +// HipdnnEnginePluginHandle stores any persistent data associated with a +// particular engine plugin. In fusilli plugin that's the fusilli::Handle, and +// some temporary buffers that higher level APIs create and destroy at different +// times. +// +//===----------------------------------------------------------------------===// + +#ifndef FUSILLI_PLUGIN_SRC_HIPDNN_ENGINE_PLUGIN_HANDLE_H +#define FUSILLI_PLUGIN_SRC_HIPDNN_ENGINE_PLUGIN_HANDLE_H + +#include +#include +#include + +#include +#include +#include +#include + +struct HipdnnEnginePluginHandle { +public: + const int deviceId; + + HipdnnEnginePluginHandle(int deviceId) : deviceId(deviceId) {} + + // Take ownership of a flatbuffers::DetachedBuffer and store it associated + // with its memory address. + void storeEngineDetailsBuffer( + const void *ptr, std::unique_ptr &&buffer) { + _engineDetailsBuffers[ptr] = std::move(buffer); + } + + // Destroy the flatbuffers::DetachedBuffer associated with ptr. + void eraseEngineDetailsBuffer(const void *ptr) { + _engineDetailsBuffers.erase(ptr); + } + + // Get or create fusilli::Handle just in time. As the engine API may set the + // stream (through `hipdnnEnginePluginSetStream`) after initial handle + // creation (in `hipdnnEnginePluginCreate`) we defer the fusilli::Handle + // creation until we know if a stream has been set. + fusilli::ErrorOr> getFusilliHandle() { + if (!_fusilliHandle.has_value()) { + FUSILLI_ASSIGN_OR_RETURN( + auto handle, + fusilli::Handle::create(fusilli::Backend::AMDGPU, deviceId, + reinterpret_cast(_stream))); + _fusilliHandle = std::move(handle); + } + return fusilli::ok( + std::reference_wrapper(*_fusilliHandle)); + } + + void setStream(hipStream_t stream) { _stream = stream; } + +private: + // Default to creating a handle on the null (default) stream. + hipStream_t _stream = 0; + + // Fusilli handle, will be created on the first call to `getFusilliHandle`. + std::optional _fusilliHandle; + + // Storage for engine details. + std::unordered_map> + _engineDetailsBuffers; +}; + +#endif // FUSILLI_PLUGIN_SRC_HIPDNN_ENGINE_PLUGIN_HANDLE_H diff --git a/dnn-providers/fusilli-provider/include/utils.h b/dnn-providers/fusilli-provider/include/utils.h new file mode 100644 index 00000000000..eaca757a644 --- /dev/null +++ b/dnn-providers/fusilli-provider/include/utils.h @@ -0,0 +1,255 @@ +// Copyright 2025 Advanced Micro Devices, Inc. +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +//===----------------------------------------------------------------------===// +// +// This file contains the fusilli plugin utils and macros. +// +//===----------------------------------------------------------------------===// + +#ifndef FUSILLI_PLUGIN_SRC_UTILS_H +#define FUSILLI_PLUGIN_SRC_UTILS_H + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace fusilli_plugin { + +// Checks for null, sets the plugin last error manager and returns error if +// null. +// +// SIDE EFFECT: any util function returning an `hipdnnPluginStatus_t` is +// intended for error checking and reporting, and therefore sets +// PluginLastErrorManager::setLastError to an appropriate error on the unhappy +// path. +template hipdnnPluginStatus_t isNull(T *value) { + if (value == nullptr) { + return hipdnn_plugin_sdk::PluginLastErrorManager::setLastError( + HIPDNN_PLUGIN_STATUS_BAD_PARAM, + std::string(typeid(T).name()) + " is nullptr"); + } + return HIPDNN_PLUGIN_STATUS_SUCCESS; +} + +// Find deviceBuffer with UID. +inline fusilli::ErrorOr +findDeviceBuffer(int64_t uid, const hipdnnPluginDeviceBuffer_t *deviceBuffers, + uint32_t numDeviceBuffers) { + for (uint32_t i = 0; i < numDeviceBuffers; ++i) { + if (uid == deviceBuffers[i].uid) { + return fusilli::ok(deviceBuffers[i]); + } + } + + return fusilli::error(fusilli::ErrorCode::AttributeNotSet, + "Device buffer with the uid: " + std::to_string(uid) + + " not found in the provided device buffers."); +} + +// If null, set plugin error manager last error to +// HIPDNN_PLUGIN_STATUS_BAD_PARAM and return said error from the enclosing +// scope. +#define FUSILLI_PLUGIN_CHECK_NULL(X) \ + do { \ + if (hipdnnPluginStatus_t status = isNull(X); \ + status != HIPDNN_PLUGIN_STATUS_SUCCESS) { \ + return status; \ + } \ + } while (false) + +// LOG_API_SUCCESS from hipDNN, but deducing the enclosing function rather than +// passing the function name. +#define LOG_API_SUCCESS_AUTO(msg) LOG_API_SUCCESS(__func__, msg) + +// Unwrap the value returned from an expression that evaluates to a +// fusilli::ErrorOr. In the unhappy path set plugin error manager last error to +// HIPDNN_PLUGIN_STATUS_INTERNAL_ERROR and return said error from the enclosing +// scope. +// +// Usage: +// fusilli::ErrorOr getString(); +// +// hipdnnPluginStatus_t processString() { +// // Either gets the string or returns error. +// FUSILLI_PLUGIN_ASSIGN_OR_RETURN(std::string str, getString()); +// doSomethingImportant(str); +// return HIPDNN_PLUGIN_STATUS_SUCCESS; +// } +#define FUSILLI_PLUGIN_ASSIGN_OR_RETURN_IMPL(errorOr, var, expr) \ + auto errorOr = (expr); \ + if (fusilli::isError(errorOr)) { \ + return hipdnn_plugin_sdk::PluginLastErrorManager::setLastError( \ + HIPDNN_PLUGIN_STATUS_INTERNAL_ERROR, \ + fusilli::ErrorObject(errorOr).getMessage()); \ + } \ + var = std::move(*errorOr); + +#define FUSILLI_PLUGIN_ASSIGN_OR_RETURN(varDecl, expr) \ + FUSILLI_DISABLE_COUNTER_WARNING \ + FUSILLI_PLUGIN_ASSIGN_OR_RETURN_IMPL(FUSILLI_ERROR_VAR(_errorOr), varDecl, \ + expr) \ + FUSILLI_RESTORE_COUNTER_WARNING + +template fusilli::ErrorObject convertToErrorObject(T &&error) { + using DecayT = std::decay_t; + if constexpr (std::is_convertible_v) { + return std::forward(error); + } else if constexpr (std::is_same_v) { + // Convert HIP error to fusilli ErrorObject + if (error != hipSuccess) { + return fusilli::error(fusilli::ErrorCode::InternalError, + hipGetErrorString(error)); + } + return fusilli::ok(); + } else { + static_assert( + std::is_convertible_v || + std::is_same_v, + "convertToErrorObject requires fusilli::ErrorObject or hipError_t"); + // Unreachable + return fusilli::error(fusilli::ErrorCode::InternalError, + "Unknown error type"); + } +} + +// Set plugin error manager last error and return failed status from enclosing +// scope if expression evaluates to a fusilli::ErrorObject in an error state; or +// in the case of fusilli::ErrorOr is convertible to an fusilli::ErrorObject +// in an error state. +// +// Usage: +// fusilli::ErrorObject doBar(); +// +// hipdnnPluginStatus_t doFoo() { +// // Returns error if doBar() fails +// FUSILLI_PLUGIN_CHECK_ERROR(doBar()); +// return HIPDNN_PLUGIN_STATUS_SUCCESS; +// } +#define FUSILLI_PLUGIN_CHECK_ERROR(expr) \ + do { \ + fusilli::ErrorObject err = convertToErrorObject(expr); \ + if (isError(err)) { \ + return hipdnn_plugin_sdk::PluginLastErrorManager::setLastError( \ + HIPDNN_PLUGIN_STATUS_INTERNAL_ERROR, err.getMessage()); \ + } \ + } while (false) + +// Convert from fusilli DataType to iree hal data type. +inline fusilli::ErrorOr +fusilliDataTypeToIreeHalDataType(fusilli::DataType fusilliDataType) { + switch (fusilliDataType) { + case fusilli::DataType::Half: + return fusilli::ok(IREE_HAL_ELEMENT_TYPE_FLOAT_16); + case fusilli::DataType::BFloat16: + return fusilli::ok(IREE_HAL_ELEMENT_TYPE_BFLOAT_16); + case fusilli::DataType::Float: + return fusilli::ok(IREE_HAL_ELEMENT_TYPE_FLOAT_32); + case fusilli::DataType::Double: + return fusilli::ok(IREE_HAL_ELEMENT_TYPE_FLOAT_64); + case fusilli::DataType::Uint8: + return fusilli::ok(IREE_HAL_ELEMENT_TYPE_UINT_8); + case fusilli::DataType::Int8: + return fusilli::ok(IREE_HAL_ELEMENT_TYPE_INT_8); + case fusilli::DataType::Int16: + return fusilli::ok(IREE_HAL_ELEMENT_TYPE_INT_16); + case fusilli::DataType::Int32: + return fusilli::ok(IREE_HAL_ELEMENT_TYPE_INT_32); + case fusilli::DataType::Int64: + return fusilli::ok(IREE_HAL_ELEMENT_TYPE_INT_64); + case fusilli::DataType::Boolean: + return fusilli::ok(IREE_HAL_ELEMENT_TYPE_BOOL_8); + case fusilli::DataType::FP8E5M2: + return fusilli::ok(IREE_HAL_ELEMENT_TYPE_FLOAT_8_E5M2); + case fusilli::DataType::NotSet: + default: + return fusilli::error( + fusilli::ErrorCode::InvalidAttribute, + "unknown data type in fusilli -> iree runtime data type conversion"); + } +} + +// Import a HIP device pointer into the IREE runtime as a fusilli::Buffer. +// +// No allocations are done, this is making an existing allocation available to +// the IREE runtime. +inline fusilli::ErrorOr> +importDevicePointer(iree_hal_allocator_t *deviceAllocator, void *devicePtr, + size_t sizeBytes, + std::span shape, // C++20 + iree_hal_element_type_t elementType) { + iree_allocator_t hostAllocator = iree_allocator_system(); + + // Import external buffer into IREE runtime. + iree_hal_buffer_params_t bufferParams = { + .usage = IREE_HAL_BUFFER_USAGE_DEFAULT, + .access = IREE_HAL_MEMORY_ACCESS_READ | IREE_HAL_MEMORY_ACCESS_WRITE, + .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL, + .queue_affinity = IREE_HAL_QUEUE_AFFINITY_ANY, + // As we are importing a buffer rather than allocating, this param should + // be ignored. + .min_alignment = 0, + }; + iree_hal_external_buffer_t externalBuffer = { + .type = IREE_HAL_EXTERNAL_BUFFER_TYPE_DEVICE_ALLOCATION, + .flags = 0, + .size = static_cast(sizeBytes), + .handle = + { + .device_allocation = + { + .ptr = reinterpret_cast(devicePtr), + }, + }, + }; + iree_hal_buffer_t *importedBuffer = nullptr; + FUSILLI_CHECK_ERROR(iree_hal_allocator_import_buffer( + /*allocator=*/deviceAllocator, /*params=*/bufferParams, + /*external_buffer=*/&externalBuffer, + /*release_callback=*/iree_hal_buffer_release_callback_null(), + /*out_buffer=*/&importedBuffer)); + + // Create a buffer view for external buffer. + iree_hal_buffer_view_t *outBufferView = nullptr; + FUSILLI_CHECK_ERROR(iree_hal_buffer_view_create( + /*buffer=*/importedBuffer, + /*shape_rank=*/shape.size(), + /*shape=*/shape.data(), + /*element_type=*/elementType, + /*encoding_type=*/IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, + /*host_allocator=*/hostAllocator, + /*out_buffer_view=*/&outBufferView)); + + // Release our reference to buffer. The buffer view holds a reference to + // buffer and will handle release + possible destruction when it's destroyed. + iree_hal_buffer_release(importedBuffer); + + // Create fusilli::Buffer from buffer view. fusilli::Buffer is a RAII type + // that retains the buffer view on construction (incrementing its reference + // count) and releases the buffer view on destruction. + FUSILLI_ASSIGN_OR_RETURN(auto fusilliBuffer, + fusilli::Buffer::import(outBufferView)); + std::shared_ptr result = + std::make_shared(std::move(fusilliBuffer)); + + // Release our reference to buffer view. The buffer view and buffer is tied to + // fusilli::Buffer's lifetime as it holds the only reference to the buffer + // view. + iree_hal_buffer_view_release(outBufferView); + + return fusilli::ok(std::move(result)); +} + +} // namespace fusilli_plugin + +#endif // FUSILLI_PLUGIN_SRC_UTILS_H diff --git a/dnn-providers/fusilli-provider/src/fusilli_plugin.cpp b/dnn-providers/fusilli-provider/src/fusilli_plugin.cpp new file mode 100644 index 00000000000..3dd4a42b26d --- /dev/null +++ b/dnn-providers/fusilli-provider/src/fusilli_plugin.cpp @@ -0,0 +1,557 @@ +// Copyright 2025 Advanced Micro Devices, Inc. +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +//===----------------------------------------------------------------------===// +// +// This file is the main entry point for fusilli-plugin, implementations for all +// required hipDNN engine plugin API functions live here. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "graph_import.h" +#include "hipdnn_engine_plugin_execution_context.h" +#include "hipdnn_engine_plugin_handle.h" +#include "utils.h" + +using namespace hipdnn_plugin_sdk; +using namespace fusilli_plugin; + +// TODO(#2317): ensure single source of truth for plugin version +static const char *fusilliPluginVersion = "0.0.1"; + +// s_lastError is thread_local static so can't be initialized in the header file +// as the header file is included in many context. Clear the string here. +thread_local char + PluginLastErrorManager::s_lastError[HIPDNN_PLUGIN_ERROR_STRING_MAX_LENGTH] = + ""; + +extern "C" { + +// ---------------------------------------------------------------------- +// Implementations for the basic plugin API defined in +// hipDNN/sdk/include/hipdnn_sdk/plugin/PluginApi.h +// ---------------------------------------------------------------------- + +hipdnnPluginStatus_t hipdnnPluginGetName(const char **name) { + LOG_API_ENTRY("name_ptr=" << static_cast(name)); + FUSILLI_PLUGIN_CHECK_NULL(name); + + *name = hipdnn_data_sdk::utilities::FUSILLI_ENGINE_NAME; + + LOG_API_SUCCESS_AUTO("pluginName=" << *name); + return HIPDNN_PLUGIN_STATUS_SUCCESS; +} + +hipdnnPluginStatus_t hipdnnPluginGetVersion(const char **version) { + LOG_API_ENTRY("version_ptr=" << static_cast(version)); + FUSILLI_PLUGIN_CHECK_NULL(version); + + *version = fusilliPluginVersion; + + LOG_API_SUCCESS_AUTO("version=" << *version); + return HIPDNN_PLUGIN_STATUS_SUCCESS; +} + +hipdnnPluginStatus_t hipdnnPluginGetType(hipdnnPluginType_t *type) { + LOG_API_ENTRY("type_ptr=" << static_cast(type)); + FUSILLI_PLUGIN_CHECK_NULL(type); + + *type = HIPDNN_PLUGIN_TYPE_ENGINE; + + LOG_API_SUCCESS_AUTO("type=" << *type); + return HIPDNN_PLUGIN_STATUS_SUCCESS; +} + +void hipdnnPluginGetLastErrorString(const char **error_str) { + if (error_str) { + *error_str = hipdnn_plugin_sdk::PluginLastErrorManager::getLastError(); + } +} + +// Once plugins are loaded via plugin manager then logging will work for them +hipdnnPluginStatus_t hipdnnPluginSetLoggingCallback(hipdnnCallback_t callback) { + // No LOG_API_ENTRY as logging won't be wired up yet. + FUSILLI_PLUGIN_CHECK_NULL(callback); + + hipdnn_plugin_sdk::logging::initializeCallbackLogging( + hipdnn_data_sdk::utilities::FUSILLI_ENGINE_NAME, callback); + + LOG_API_SUCCESS_AUTO("logging callback initialized"); + return HIPDNN_PLUGIN_STATUS_SUCCESS; +} + +// ---------------------------------------------------------------------- +// Implementations for engine plugin API defined in +// hipDNN/sdk/include/hipdnn_sdk/plugin/EnginePluginApi.h +// ---------------------------------------------------------------------- + +hipdnnPluginStatus_t hipdnnEnginePluginGetAllEngineIds(int64_t *engineIds, + uint32_t maxEngines, + uint32_t *numEngines) { + LOG_API_ENTRY("engineIds=" << static_cast(engineIds) + << ", maxEngines=" << maxEngines << ", numEngines=" + << static_cast(numEngines)); + FUSILLI_PLUGIN_CHECK_NULL(numEngines); + if (maxEngines != 0) { + FUSILLI_PLUGIN_CHECK_NULL(engineIds); + } + + // Set `numEngines` regardless of how many engines are actually returned. + // The backend queries this function twice: + // - First call: engineIds=NULL, maxEngines=0 to get the count + // - Second call: engineIds allocated based on numEngines from first pass + *numEngines = 1; + + if (maxEngines >= 1) { + engineIds[0] = hipdnn_data_sdk::utilities::FUSILLI_ENGINE_ID; + } + + LOG_API_SUCCESS_AUTO("numEngines=" << *numEngines); + return HIPDNN_PLUGIN_STATUS_SUCCESS; +} + +hipdnnPluginStatus_t +hipdnnEnginePluginCreate(hipdnnEnginePluginHandle_t *handle) { + LOG_API_ENTRY("handle_ptr=" << static_cast(handle)); + FUSILLI_PLUGIN_CHECK_NULL(handle); + + // Get device id. + int deviceId; + FUSILLI_PLUGIN_CHECK_ERROR(hipGetDevice(&deviceId)); + + // Create handle. + *handle = new HipdnnEnginePluginHandle(deviceId); + + LOG_API_SUCCESS_AUTO("createdHandle=" << static_cast(*handle)); + return HIPDNN_PLUGIN_STATUS_SUCCESS; +} + +hipdnnPluginStatus_t +hipdnnEnginePluginDestroy(hipdnnEnginePluginHandle_t handle) { + LOG_API_ENTRY("handle=" << static_cast(handle)); + FUSILLI_PLUGIN_CHECK_NULL(handle); + + delete handle; + + LOG_API_SUCCESS_AUTO(""); + return HIPDNN_PLUGIN_STATUS_SUCCESS; +} + +hipdnnPluginStatus_t +hipdnnEnginePluginSetStream(hipdnnEnginePluginHandle_t handle, + hipStream_t stream) { + LOG_API_ENTRY("handle=" << static_cast(handle) + << ", stream_id=" << static_cast(stream)); + FUSILLI_PLUGIN_CHECK_NULL(handle); + + // Get device associated with stream. + hipDevice_t deviceId; + FUSILLI_PLUGIN_CHECK_ERROR(hipStreamGetDevice(stream, &deviceId)); + + // This should never happen, check so that when it does we get a nice error + // message. + if (deviceId != handle->deviceId) { + return hipdnn_plugin_sdk::PluginLastErrorManager::setLastError( + HIPDNN_PLUGIN_STATUS_BAD_PARAM, + "Stream is associated with different device. Device reported " + "through `hipStreamGetDevice` does not match active " + "device reported through `hipGetDevice`."); + } + + // Set stream, it will be used to create fusilli::Handle later. + handle->setStream(stream); + + LOG_API_SUCCESS_AUTO(""); + return HIPDNN_PLUGIN_STATUS_SUCCESS; +} + +hipdnnPluginStatus_t hipdnnEnginePluginGetApplicableEngineIds( + hipdnnEnginePluginHandle_t handle, const hipdnnPluginConstData_t *opGraph, + int64_t *engineIds, uint32_t maxEngines, uint32_t *numEngines) { + LOG_API_ENTRY("handle=" << static_cast(handle) + << ", opGraph=" << static_cast(opGraph) + << ", engineIds=" << static_cast(engineIds) + << ", maxEngines=" << maxEngines << ", numEngines=" + << static_cast(numEngines)); + FUSILLI_PLUGIN_CHECK_NULL(handle); + FUSILLI_PLUGIN_CHECK_NULL(opGraph); + if (maxEngines != 0) { + FUSILLI_PLUGIN_CHECK_NULL(engineIds); + } + FUSILLI_PLUGIN_CHECK_NULL(numEngines); + + *numEngines = 0; + if (maxEngines < 1) { + HIPDNN_PLUGIN_LOG_INFO( + "Maximum number of engines reached (" + << maxEngines + << "), ignoring additional engines, numEngines count: " << *numEngines); + LOG_API_SUCCESS_AUTO("numEngines=" << *numEngines); + return HIPDNN_PLUGIN_STATUS_SUCCESS; + } + + // Use the graph import translation layer to determine if this graph is + // supported. If import succeeds, the graph is composed of ops fusilli can + // handle. + // + // NOTE: If a translatable graph should not be claimed (e.g. numerical + // issues), one can gate particular ops in the translation layer + // (graph_import.h), or filter out very specific graph types here - gates + // should check environment variables so it's easy to run the problematic + // graphs during development. + auto result = importGraph(opGraph); + if (fusilli::isError(static_cast(result))) { + HIPDNN_PLUGIN_LOG_INFO( + "Graph not supported: " + << static_cast(result).getMessage()); + return HIPDNN_PLUGIN_STATUS_SUCCESS; + } + + // Graph passes all checks, the fusilli engine is applicable. + engineIds[0] = hipdnn_data_sdk::utilities::FUSILLI_ENGINE_ID; + *numEngines = 1; + + LOG_API_SUCCESS_AUTO("numEngines=" << *numEngines); + return HIPDNN_PLUGIN_STATUS_SUCCESS; +} + +hipdnnPluginStatus_t +hipdnnEnginePluginGetEngineDetails(hipdnnEnginePluginHandle_t handle, + int64_t engineId, + const hipdnnPluginConstData_t *opGraph, + hipdnnPluginConstData_t *engineDetails) { + // ---------------------------------------------------------------------- + // Plugin API call flow for engine configuration and execution. + // + // hipDNN Plugin + // ====================================================================== + // hipdnnEnginePluginGetEngineDetails -> populates engineDetails + // (flatbuffer object) with + // behavioral notes + knob + // definitions that are available + // to the higher level API. + // Return populated engineDetails + // <- (hipdnnPluginConstData_t). + // + // Decides final configuration, populating ~~ + // engineConfig flatbuffer + // (hipdnnPluginConstData_t) based on info + // provided in engineDetails. + // + // hipdnnEnginePluginCreateExecutionContext -> Creates execution context + // (hipdnnEnginePluginExecutionContext_t) + // <- based on engineConfig. + // + // Uses returned execution context to ~~ + // invoke kernels + // + // hipdnnEnginePluginDestroyEngineDetails -> cleans up engine details. + // + // hipdnnEnginePluginDestroyExecutionContext -> cleans up execution context. + // ---------------------------------------------------------------------- + + LOG_API_ENTRY( + "handle=" << static_cast(handle) << ", engineId=" << engineId + << ", opGraph=" << static_cast(opGraph) + << ", engineDetails=" << static_cast(engineDetails)); + FUSILLI_PLUGIN_CHECK_NULL(handle); + FUSILLI_PLUGIN_CHECK_NULL(opGraph); + FUSILLI_PLUGIN_CHECK_NULL(engineDetails); + + if (engineId != hipdnn_data_sdk::utilities::FUSILLI_ENGINE_ID) { + return hipdnn_plugin_sdk::PluginLastErrorManager::setLastError( + HIPDNN_PLUGIN_STATUS_BAD_PARAM, "unexpected engine id"); + } + + // Build engine details object, we're only storing the engine id for the time + // being. + flatbuffers::FlatBufferBuilder builder; + auto engineDetailsObj = + hipdnn_data_sdk::data_objects::CreateEngineDetails(builder, engineId); + builder.Finish(engineDetailsObj); + + // Populate out parameter. + auto detachedBuffer = + std::make_unique(builder.Release()); + engineDetails->ptr = detachedBuffer->data(); + engineDetails->size = detachedBuffer->size(); + + // Store owning pointer in handle, hipdnnEnginePluginDestroyEngineDetails will + // inform us when it's safe to clean this up. + handle->storeEngineDetailsBuffer(engineDetails->ptr, + std::move(detachedBuffer)); + + LOG_API_SUCCESS_AUTO("engineDetails->ptr=" << engineDetails->ptr); + return HIPDNN_PLUGIN_STATUS_SUCCESS; +} + +hipdnnPluginStatus_t +hipdnnEnginePluginDestroyEngineDetails(hipdnnEnginePluginHandle_t handle, + hipdnnPluginConstData_t *engineDetails) { + // See comment in hipdnnEnginePluginGetEngineDetails for more about how this + // function fits into the flow. + + LOG_API_ENTRY("handle=" << static_cast(handle) << ", engineDetails=" + << static_cast(engineDetails)); + FUSILLI_PLUGIN_CHECK_NULL(handle); + FUSILLI_PLUGIN_CHECK_NULL(engineDetails); + FUSILLI_PLUGIN_CHECK_NULL(engineDetails->ptr); + + // Deallocate engine details. + handle->eraseEngineDetailsBuffer(engineDetails->ptr); + engineDetails->ptr = nullptr; + engineDetails->size = 0; + + LOG_API_SUCCESS_AUTO("engineDetails->ptr=" << engineDetails->ptr); + return HIPDNN_PLUGIN_STATUS_SUCCESS; +} + +hipdnnPluginStatus_t +hipdnnEnginePluginGetWorkspaceSize(hipdnnEnginePluginHandle_t handle, + const hipdnnPluginConstData_t *engineConfig, + const hipdnnPluginConstData_t *opGraph, + size_t *workspaceSize) { + LOG_API_ENTRY( + "handle=" << static_cast(handle) + << ", engineConfig=" << static_cast(engineConfig) + << ", opGraph=" << static_cast(opGraph) + << ", workspaceSize=" << static_cast(workspaceSize)); + FUSILLI_PLUGIN_CHECK_NULL(handle); + FUSILLI_PLUGIN_CHECK_NULL(engineConfig); + FUSILLI_PLUGIN_CHECK_NULL(opGraph); + FUSILLI_PLUGIN_CHECK_NULL(workspaceSize); + + // TODO(#197): Create a heuristic to estimate workspace size from the op graph + // without requiring full compilation. For now, return 0 — the actual + // workspace size will be reported by GetWorkspaceSizeFromExecutionContext + // after the graph is compiled. + *workspaceSize = 0; + + LOG_API_SUCCESS_AUTO("workspaceSize=" << *workspaceSize); + return HIPDNN_PLUGIN_STATUS_SUCCESS; +} + +hipdnnPluginStatus_t hipdnnEnginePluginCreateExecutionContext( + hipdnnEnginePluginHandle_t handle, + const hipdnnPluginConstData_t *engineConfig, + const hipdnnPluginConstData_t *opGraph, + hipdnnEnginePluginExecutionContext_t *executionContext) { + // See comment in hipdnnEnginePluginGetEngineDetails for more about how this + // function fits into the flow. + + LOG_API_ENTRY("handle=" << static_cast(handle) << ", engineConfig=" + << static_cast(engineConfig) + << ", opGraph=" << static_cast(opGraph) + << ", executionContext=" + << static_cast(executionContext)); + FUSILLI_PLUGIN_CHECK_NULL(handle); + FUSILLI_PLUGIN_CHECK_NULL(engineConfig); + FUSILLI_PLUGIN_CHECK_NULL(opGraph); + FUSILLI_PLUGIN_CHECK_NULL(executionContext); + + // Ensure that config contains expected engine id. + hipdnn_plugin_sdk::EngineConfigWrapper engineConfigWrapper( + engineConfig->ptr, engineConfig->size); + if (engineConfigWrapper.engineId() != + hipdnn_data_sdk::utilities::FUSILLI_ENGINE_ID) { + return hipdnn_plugin_sdk::PluginLastErrorManager::setLastError( + HIPDNN_PLUGIN_STATUS_BAD_PARAM, "unexpected engine id"); + } + + auto importAndCompile = [&handle](const hipdnnPluginConstData_t *opGraph) + -> fusilli::ErrorOr { + // Import fusilli::Graph and compute UID -> fusilli::TensorAttr map for + // graph boundary tensors. + FUSILLI_ASSIGN_OR_RETURN(HipdnnEnginePluginExecutionContext graphImport, + importGraph(opGraph)); + + // Compile graph + FUSILLI_ASSIGN_OR_RETURN(auto fusilliHandle, handle->getFusilliHandle()); + FUSILLI_CHECK_ERROR(graphImport.graph.compile(fusilliHandle)); + + return fusilli::ok(std::move(graphImport)); + }; + + FUSILLI_PLUGIN_ASSIGN_OR_RETURN(auto importedGraph, + importAndCompile(opGraph)); + *executionContext = + new HipdnnEnginePluginExecutionContext(std::move(importedGraph)); + + LOG_API_SUCCESS_AUTO( + "created_execution_context=" << static_cast(*executionContext)); + return HIPDNN_PLUGIN_STATUS_SUCCESS; +} + +hipdnnPluginStatus_t hipdnnEnginePluginDestroyExecutionContext( + hipdnnEnginePluginHandle_t handle, + hipdnnEnginePluginExecutionContext_t executionContext) { + LOG_API_ENTRY("handle=" << static_cast(handle) + << ", executionContext=" + << static_cast(executionContext)); + FUSILLI_PLUGIN_CHECK_NULL(handle); + FUSILLI_PLUGIN_CHECK_NULL(executionContext); + + delete executionContext; + + LOG_API_SUCCESS_AUTO("destroyed executionContext"); + return HIPDNN_PLUGIN_STATUS_SUCCESS; +} + +hipdnnPluginStatus_t hipdnnEnginePluginGetWorkspaceSizeFromExecutionContext( + hipdnnEnginePluginHandle_t handle, + hipdnnEnginePluginExecutionContext_t executionContext, + size_t *workspaceSize) { + LOG_API_ENTRY( + "handle=" << static_cast(handle) << ", executionContext=" + << static_cast(executionContext) + << ", workspaceSize=" << static_cast(workspaceSize)); + FUSILLI_PLUGIN_CHECK_NULL(handle); + FUSILLI_PLUGIN_CHECK_NULL(executionContext); + FUSILLI_PLUGIN_CHECK_NULL(workspaceSize); + + // This should never happen. When it does we'll at least get a sane error + // message. + std::optional maybeSize = executionContext->graph.getWorkspaceSize(); + if (!maybeSize.has_value()) { + return hipdnn_plugin_sdk::PluginLastErrorManager::setLastError( + HIPDNN_PLUGIN_STATUS_INTERNAL_ERROR, + "Workspace size not available — graph may not be compiled"); + } + *workspaceSize = *maybeSize; + + LOG_API_SUCCESS_AUTO("workspaceSize=" << *workspaceSize); + return HIPDNN_PLUGIN_STATUS_SUCCESS; +} + +hipdnnPluginStatus_t hipdnnEnginePluginExecuteOpGraph( + hipdnnEnginePluginHandle_t handle, + hipdnnEnginePluginExecutionContext_t executionContext, void *workspacePtr, + const hipdnnPluginDeviceBuffer_t *deviceBuffers, + uint32_t numDeviceBuffers) { + // See comment in hipdnnEnginePluginGetEngineDetails for more about how this + // function fits into the flow. + + LOG_API_ENTRY( + "handle=" << static_cast(handle) << ", executionContext=" + << static_cast(executionContext) + << ", workspace=" << workspacePtr << ", deviceBuffers=" + << static_cast(deviceBuffers) + << ", numDeviceBuffers=" << numDeviceBuffers); + FUSILLI_PLUGIN_CHECK_NULL(handle); + FUSILLI_PLUGIN_CHECK_NULL(executionContext); + FUSILLI_PLUGIN_CHECK_NULL(deviceBuffers); + + // Get device allocator for buffer imports below. + FUSILLI_PLUGIN_ASSIGN_OR_RETURN(auto fusilliHandle, + handle->getFusilliHandle()); + iree_hal_allocator_t *deviceAllocator = + iree_hal_device_allocator(fusilliHandle.get()); + + // Fill variant pack for graph execution. Fusilli expects a variant pack to + // map from fusilli::TensorAttr -> fusilli::Buffer for all boundary tensors. + // + // The execution context (created by hipdnnEnginePluginCreateExecutionContext) + // holds a UID -> fusilli::TensorAttr mapping for all boundary tensors + // already. To build the mapping we need to: + // 1. Find the external HIP-allocated device buffer in `deviceBuffers` + // associated with UID. + // 2. Import buffer from 1) into IREE runtime and create fusilli::Buffer. + // + // We may want to cache all of this in the future. As long as the device + // pointers + UIDs haven't changed it should be possible to re-use an already + // imported buffer + buffer view + the call that fusilli::Graph::execute + // builds internally. + std::unordered_map, + std::shared_ptr> + variantPack; + for (auto &[uid, tensorAttr] : executionContext->uidToFusilliTensorAttr) { + // 1. Find associated buffer. + FUSILLI_PLUGIN_ASSIGN_OR_RETURN( + hipdnnPluginDeviceBuffer_t hipMallocedBuffer, + findDeviceBuffer(uid, deviceBuffers, numDeviceBuffers)); + + // 2. Import external buffer into IREE runtime and create fusilli::Buffer. + FUSILLI_PLUGIN_ASSIGN_OR_RETURN( + auto elementType, + fusilliDataTypeToIreeHalDataType(tensorAttr->getDataType())); + size_t sizeBytes = iree_hal_element_dense_byte_count(elementType) * + static_cast(tensorAttr->getVolume()); + std::vector dims = tensorAttr->getPhysicalDim(); + std::vector shape(dims.begin(), dims.end()); + FUSILLI_PLUGIN_ASSIGN_OR_RETURN( + auto fusilliBuffer, + importDevicePointer(/*deviceAllocator=*/deviceAllocator, + /*devicePtr=*/hipMallocedBuffer.ptr, + /*sizeBytes=*/sizeBytes, + /*shape=*/shape, + /*elementType=*/elementType)); + variantPack[tensorAttr] = std::move(fusilliBuffer); + } + + // Import workspace buffer if the compiled graph requires transient storage. + std::shared_ptr workspace = nullptr; + std::optional maybeWorkspaceSize = + executionContext->graph.getWorkspaceSize(); + if (!maybeWorkspaceSize.has_value()) { + return hipdnn_plugin_sdk::PluginLastErrorManager::setLastError( + HIPDNN_PLUGIN_STATUS_INTERNAL_ERROR, + "Workspace size not available — graph may not be compiled"); + } + size_t workspaceSize = *maybeWorkspaceSize; + if (workspaceSize > 0) { + if (workspacePtr == nullptr) { + return hipdnn_plugin_sdk::PluginLastErrorManager::setLastError( + HIPDNN_PLUGIN_STATUS_BAD_PARAM, + "Workspace of size " + std::to_string(workspaceSize) + + " bytes required but workspace pointer is null"); + } + // Workspace is opaque 1D array of bytes. + iree_hal_dim_t workspaceShape[1] = {workspaceSize}; + FUSILLI_PLUGIN_ASSIGN_OR_RETURN( + workspace, + importDevicePointer(/*deviceAllocator=*/deviceAllocator, + /*devicePtr=*/workspacePtr, + /*sizeBytes=*/workspaceSize, + /*shape=*/workspaceShape, + /*elementType=*/IREE_HAL_ELEMENT_TYPE_UINT_8)); + } + + FUSILLI_PLUGIN_CHECK_ERROR( + executionContext->graph.execute(fusilliHandle, variantPack, workspace)); + + LOG_API_SUCCESS_AUTO("executed graph"); + return HIPDNN_PLUGIN_STATUS_SUCCESS; +} + +} // extern "C" diff --git a/dnn-providers/fusilli-provider/test/CMakeLists.txt b/dnn-providers/fusilli-provider/test/CMakeLists.txt new file mode 100644 index 00000000000..4b5160982d5 --- /dev/null +++ b/dnn-providers/fusilli-provider/test/CMakeLists.txt @@ -0,0 +1,43 @@ +# Copyright 2025 Advanced Micro Devices, Inc. +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +find_package(Threads REQUIRED) + +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../build_tools") +include(FusilliPluginTestUtils) + +# Plugin API tests +add_fusilli_plugin_test( + NAME fusilli_plugin_api_test + SRCS + test_fusilli_plugin_api.cpp + utils.h + DEPS + hip::host + fusilli_plugin # Link plugin .so directly + fusilli::fusilli + GTest::gtest_main + hipdnn_plugin_sdk + hipdnn_data_sdk + hipdnn_test_sdk + Threads::Threads +) + +# Graph import tests +add_fusilli_plugin_test( + NAME fusilli_plugin_graph_import_test + SRCS + test_graph_import.cpp + utils.h + DEPS + fusilli::fusilli + GTest::gtest_main + hipdnn_plugin_sdk + hipdnn_data_sdk +) + +# Integration tests +add_subdirectory(integration) diff --git a/dnn-providers/fusilli-provider/test/integration/CMakeLists.txt b/dnn-providers/fusilli-provider/test/integration/CMakeLists.txt new file mode 100644 index 00000000000..97a3db491db --- /dev/null +++ b/dnn-providers/fusilli-provider/test/integration/CMakeLists.txt @@ -0,0 +1,156 @@ +# Copyright 2025 Advanced Micro Devices, Inc. +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +find_package(Threads REQUIRED) + +# Integration tests +add_fusilli_plugin_test( + NAME fusilli_plugin_plugin_load_test + SRCS plugin_load.cpp + DEPS + GTest::gtest_main + hip::host + hipdnn_frontend + hipdnn_data_sdk + hipdnn_test_sdk + Threads::Threads + COMPILE_DEFS + FUSILLI_PLUGIN_PATH="../${FUSILLI_PLUGIN_RELATIVE_PATH}" + FUSILLI_PLUGIN_TARGET="${FUSILLI_PLUGIN_TARGET}" +) + +add_fusilli_plugin_test( + NAME fusilli_plugin_conv_fprop_parameterized_full_test + SRCS convolution/conv_fprop_parameterized_full.cpp + DEPS + GTest::gtest_main + hip::host + hipdnn_frontend + hipdnn_data_sdk + hipdnn_test_sdk + Threads::Threads + COMPILE_DEFS + FUSILLI_PLUGIN_PATH="../${FUSILLI_PLUGIN_RELATIVE_PATH}" +) + +add_fusilli_plugin_test( + NAME fusilli_plugin_simple_conv_fprop_with_relu_and_bias_test + SRCS convolution/simple_conv_fprop_with_relu_and_bias.cpp + DEPS + GTest::gtest_main + hip::host + hipdnn_frontend + hipdnn_plugin_sdk + hipdnn_data_sdk + hipdnn_test_sdk + Threads::Threads + COMPILE_DEFS + FUSILLI_PLUGIN_PATH="../${FUSILLI_PLUGIN_RELATIVE_PATH}" +) + +add_fusilli_plugin_test( + NAME fusilli_plugin_simple_conv_fprop_parameterized_stream_device_test + SRCS convolution/simple_conv_fprop_parameterized_stream_device.cpp + DEPS + GTest::gtest_main + hip::host + hipdnn_frontend + hipdnn_data_sdk + hipdnn_test_sdk + Threads::Threads + COMPILE_DEFS + FUSILLI_PLUGIN_PATH="../${FUSILLI_PLUGIN_RELATIVE_PATH}" +) + +add_fusilli_plugin_test( + NAME fusilli_plugin_simple_matmul_test + SRCS matmul/simple_matmul.cpp + DEPS + GTest::gtest_main + hip::host + hipdnn_frontend + hipdnn_plugin_sdk + hipdnn_data_sdk + hipdnn_test_sdk + Threads::Threads + COMPILE_DEFS + FUSILLI_PLUGIN_PATH="../${FUSILLI_PLUGIN_RELATIVE_PATH}" +) + +add_fusilli_plugin_test( + NAME fusilli_plugin_matmul_parameterized_test + SRCS matmul/matmul_parameterized.cpp + DEPS + GTest::gtest_main + hip::host + hipdnn_frontend + hipdnn_plugin_sdk + hipdnn_data_sdk + hipdnn_test_sdk + Threads::Threads + COMPILE_DEFS + FUSILLI_PLUGIN_PATH="../${FUSILLI_PLUGIN_RELATIVE_PATH}" +) + +add_fusilli_plugin_test( + NAME fusilli_plugin_batched_matmul_parameterized_test + SRCS matmul/batched_matmul_parameterized.cpp + DEPS + GTest::gtest_main + hip::host + hipdnn_frontend + hipdnn_plugin_sdk + hipdnn_data_sdk + hipdnn_test_sdk + Threads::Threads + COMPILE_DEFS + FUSILLI_PLUGIN_PATH="../${FUSILLI_PLUGIN_RELATIVE_PATH}" +) + +add_fusilli_plugin_test( + NAME fusilli_plugin_simple_scaled_matmul_accumulate_test + SRCS matmul/simple_scaled_matmul_accumulate.cpp + DEPS + GTest::gtest_main + hip::host + hipdnn_frontend + hipdnn_plugin_sdk + hipdnn_data_sdk + hipdnn_test_sdk + Threads::Threads + COMPILE_DEFS + FUSILLI_PLUGIN_PATH="../${FUSILLI_PLUGIN_RELATIVE_PATH}" +) + +add_fusilli_plugin_test( + NAME fusilli_plugin_simple_pointwise_test + SRCS pointwise/simple_pointwise.cpp + DEPS + GTest::gtest_main + hip::host + hipdnn_frontend + hipdnn_plugin_sdk + hipdnn_data_sdk + hipdnn_test_sdk + Threads::Threads + COMPILE_DEFS + FUSILLI_PLUGIN_PATH="../${FUSILLI_PLUGIN_RELATIVE_PATH}" +) + +add_fusilli_plugin_test( + NAME fusilli_plugin_simple_conv_wgrad_test + SRCS convolution/simple_conv_wgrad.cpp + DEPS + GTest::gtest_main + hip::host + hipdnn_frontend + hipdnn_plugin_sdk + hipdnn_data_sdk + hipdnn_test_sdk + Threads::Threads + COMPILE_DEFS + FUSILLI_PLUGIN_PATH="../${FUSILLI_PLUGIN_RELATIVE_PATH}" +) diff --git a/dnn-providers/fusilli-provider/test/integration/convolution/conv_fprop_parameterized_full.cpp b/dnn-providers/fusilli-provider/test/integration/convolution/conv_fprop_parameterized_full.cpp new file mode 100644 index 00000000000..70e4f2487fb --- /dev/null +++ b/dnn-providers/fusilli-provider/test/integration/convolution/conv_fprop_parameterized_full.cpp @@ -0,0 +1,307 @@ +// Copyright 2025 Advanced Micro Devices, Inc. +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +using namespace hipdnn_frontend; +using namespace hipdnn_data_sdk::utilities; +using namespace hipdnn_test_sdk::utilities; + +namespace { + +struct ConvTestCase { + std::vector xDims_; // Input tensor dims [N, C, H, W] + std::vector wDims_; // Weight tensor dims [K, C/groups, R, S] + std::vector yDims_; // Output tensor dims (computed) + std::vector padding_; + std::vector stride_; + std::vector dilation_; + unsigned seed_; + std::string layoutName_; + + ConvTestCase(std::vector xDims, std::vector wDims, + std::vector padding, std::vector stride, + std::vector dilation, unsigned seed, + std::string layoutName) + : xDims_(std::move(xDims)), wDims_(std::move(wDims)), + padding_(std::move(padding)), stride_(std::move(stride)), + dilation_(std::move(dilation)), seed_(seed), + layoutName_(std::move(layoutName)) { + + // Indices: + // input (x) dimensions + // n - Batch size, always at index 0 + // ci - Channels, always at index 1 + // d - Depth (for 3D convolutions), always at index 2 if present + // h - Height, at index 2 for 2D conv and index 3 for 3D conv + // w - Width, at index 3 for 2D conv and index 4 for 3D conv + // weight (w) dimensions + // k - Number of output channels, always at index 0 + // cw - Weight channels, always at index 1 + // - for grouped convs groups = ci / cw + // t - Filter Depth (for 3D convolutions), always at index 2 if present + // r - Filter height, at index 2 for 2D conv and index 3 for 3D conv + // s - Filter width, at index 3 for 2D conv and index 4 for 3D conv + // output (y) dimensions + // n - Batch size, always at index 0 + // k - Output channels, always at index 1 + // o - Output depth (for 3D convolutions), always at index 2 if present + // p - Output height, at index 2 for 2D conv and index 3 for 3D conv + // q - Output width, at index 3 for 2D conv and index 4 for 3D conv + constexpr int kBatchSizeIndex = 0; + constexpr int kWeightOutputChannelIndex = 0; + constexpr int kChannelIndex = 1; + constexpr int kSpatialStartIndex = 2; + + int64_t n = xDims_[kBatchSizeIndex]; + int64_t k = wDims_[kWeightOutputChannelIndex]; + size_t numSpatialDims = xDims_.size() - kSpatialStartIndex; + + // Validate that the convolution parameter vectors match the number of + // spatial dimensions + if (padding_.size() != numSpatialDims || + dilation_.size() != numSpatialDims || + stride_.size() != numSpatialDims) { + throw std::invalid_argument("Convolution parameter vectors must match " + "the number of spatial dimensions."); + } + + yDims_.resize(xDims_.size()); + yDims_[kBatchSizeIndex] = n; + yDims_[kChannelIndex] = k; + + // Formula: + // YDim = ((XDim + 2*padding - dilation*(WDim-1) - 1) / stride) + 1 + for (size_t i = 0; i < numSpatialDims; ++i) { + int64_t inDim = xDims_[i + kSpatialStartIndex]; + int64_t kernelDim = wDims_[i + kSpatialStartIndex]; + int64_t effectiveKernelSize = dilation_[i] * (kernelDim - 1) + 1; + int64_t paddedInputSize = inDim + 2 * padding_[i]; + yDims_[i + kSpatialStartIndex] = + ((paddedInputSize - effectiveKernelSize) / stride_[i]) + 1; + } + } + + friend std::ostream &operator<<(std::ostream &ss, const ConvTestCase &tc) { + // Format: N{n}C{c}H{h}W{w}_K{k}R{r}S{s}_Pad{p}_Str{s}_Dil{d}_{layout} + ss << "N" << tc.xDims_[0] << "C" << tc.xDims_[1] << "H" << tc.xDims_[2] + << "W" << tc.xDims_[3]; + ss << "_K" << tc.wDims_[0] << "R" << tc.wDims_[2] << "S" << tc.wDims_[3]; + ss << "_Pad" << tc.padding_[0]; + ss << "_Str" << tc.stride_[0]; + ss << "_Dil" << tc.dilation_[0]; + ss << "_" << tc.layoutName_; + return ss; + } +}; + +std::vector getConvTestCases(std::string layoutName) { + unsigned seed = 42; + return { + // Filter 1x1 + {/*xDims=*/{1, 16, 16, 16}, /*wDims=*/{1, 16, 1, 1}, + /*padding=*/{0, 0}, /*stride=*/{1, 1}, /*dilation=*/{1, 1}, seed, + layoutName}, + // Filter 3x3, no padding + {/*xDims=*/{1, 16, 16, 16}, /*wDims=*/{1, 16, 3, 3}, + /*padding=*/{0, 0}, /*stride=*/{1, 1}, /*dilation=*/{1, 1}, seed, + layoutName}, + // Padding = 1 + {/*xDims=*/{1, 16, 16, 16}, /*wDims=*/{1, 16, 3, 3}, + /*padding=*/{1, 1}, /*stride=*/{1, 1}, /*dilation=*/{1, 1}, seed, + layoutName}, + // Stride = 2 + {/*xDims=*/{1, 16, 16, 16}, /*wDims=*/{1, 16, 3, 3}, + /*padding=*/{1, 1}, /*stride=*/{2, 2}, /*dilation=*/{1, 1}, seed, + layoutName}, + // Dilation = 2 + {/*xDims=*/{1, 16, 16, 16}, /*wDims=*/{1, 16, 3, 3}, + /*padding=*/{2, 2}, /*stride=*/{1, 1}, /*dilation=*/{2, 2}, seed, + layoutName}, + // Batched convolution + {/*xDims=*/{8, 16, 16, 16}, /*wDims=*/{1, 16, 1, 1}, + /*padding=*/{0, 0}, /*stride=*/{1, 1}, /*dilation=*/{1, 1}, seed, + layoutName}, + // Non-square spatial dims + {/*xDims=*/{1, 16, 16, 8}, /*wDims=*/{1, 16, 3, 3}, + /*padding=*/{1, 1}, /*stride=*/{1, 1}, /*dilation=*/{1, 1}, seed, + layoutName}, + // Grouped convolution - 2 groups + {/*xDims=*/{1, 16, 16, 16}, /*wDims=*/{2, 8, 3, 3}, + /*padding=*/{1, 1}, /*stride=*/{1, 1}, /*dilation=*/{1, 1}, seed, + layoutName}, + // Grouped convolution - 2 batches, 4 groups, stride, padding, dilation + {/*xDims=*/{2, 32, 16, 16}, /*wDims=*/{4, 8, 3, 3}, + /*padding=*/{1, 1}, /*stride=*/{2, 2}, /*dilation=*/{2, 2}, seed, + layoutName}, + }; +} + +} // namespace + +class ConvFpropParameterizedTest + : public ::testing::TestWithParam {}; + +TEST_P(ConvFpropParameterizedTest, Correctness) { + const ::testing::TestInfo *const test_info = + ::testing::UnitTest::GetInstance()->current_test_info(); + + // Get the name of the individual test + const char *test_name = test_info->name(); + + const ConvTestCase &tc = GetParam(); + + // Load only the fusilli plugin + auto pluginPath = std::filesystem::canonical(getCurrentExecutableDirectory() / + FUSILLI_PLUGIN_PATH); + const std::array paths = {pluginPath.c_str()}; + ASSERT_EQ(hipdnnSetEnginePluginPaths_ext(paths.size(), paths.data(), + HIPDNN_PLUGIN_LOADING_ABSOLUTE), + HIPDNN_STATUS_SUCCESS); + + // Setup hipdnn and set handle + hipStream_t stream = nullptr; + ASSERT_EQ(hipStreamCreate(&stream), hipSuccess); + hipdnnHandle_t handle = nullptr; + ASSERT_EQ(hipdnnCreate(&handle), HIPDNN_STATUS_SUCCESS); + ASSERT_EQ(hipdnnSetStream(handle, stream), HIPDNN_STATUS_SUCCESS); + + // See comment on INSTANTIATE_TEST_SUITE_P below + TensorLayout layout; + if (tc.layoutName_ == "NCHW") { + layout = TensorLayout::NCHW; + } else if (tc.layoutName_ == "NHWC") { + layout = TensorLayout::NHWC; + } else { + throw std::invalid_argument("Unknown layout: " + tc.layoutName_); + } + + // UIDs + const int64_t xUID = 0; + const int64_t wUID = 1; + const int64_t yUID = 2; + + // Create tensors + PinnedTensor xTensor(tc.xDims_, layout); + PinnedTensor wTensor(tc.wDims_, layout); + PinnedTensor yTensor(tc.yDims_, layout); + PinnedTensor expectedOutput(tc.yDims_, layout); + + // Initialize with random values + std::mt19937 gen(tc.seed_); + std::uniform_real_distribution dist(-1.0f, 1.0f); + + auto *xData = xTensor.memory().hostData(); + auto *wData = wTensor.memory().hostData(); + size_t xSize = 1, wSize = 1; + for (long d : tc.xDims_) + xSize *= static_cast(d); + for (long d : tc.wDims_) + wSize *= static_cast(d); + + for (size_t i = 0; i < xSize; ++i) + xData[i] = dist(gen); + for (size_t i = 0; i < wSize; ++i) + wData[i] = dist(gen); + + xTensor.memory().markHostModified(); + wTensor.memory().markHostModified(); + + // Compute CPU reference + CpuFpReferenceConvolution::fprop(xTensor, wTensor, expectedOutput, tc.stride_, + tc.dilation_, tc.padding_); + + // Create graph + auto graph = std::make_shared(); + graph->set_name(test_name); + graph->set_io_data_type(DataType_t::FLOAT) + .set_compute_data_type(DataType_t::FLOAT); + + // Create tensor attributes + auto xAttr = std::make_shared( + graph::makeTensorAttributes("input", DataType_t::FLOAT, xTensor)); + xAttr->set_uid(xUID); + auto wAttr = std::make_shared( + graph::makeTensorAttributes("filter", DataType_t::FLOAT, wTensor)); + wAttr->set_uid(wUID); + + // Create convolution attributes + graph::ConvFpropAttributes convAttr; + convAttr.set_name("conv_fprop") + .set_padding(tc.padding_) + .set_stride(tc.stride_) + .set_dilation(tc.dilation_); + + // Build graph + auto yAttr = graph->conv_fprop(xAttr, wAttr, convAttr); + yAttr->set_uid(yUID); + yAttr->set_dim(yTensor.dims()).set_stride(yTensor.strides()).set_output(true); + + // Validate and build plans + auto result = graph->validate(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->build_operation_graph(handle); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->create_execution_plans(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->check_support(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->build_plans(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + // Create variant pack + std::unordered_map variantPack; + variantPack[xUID] = xTensor.memory().deviceData(); + variantPack[wUID] = wTensor.memory().deviceData(); + variantPack[yUID] = yTensor.memory().deviceData(); + + // Execute graph + result = graph->execute(handle, variantPack, nullptr); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + // Mark device data as modified so validation reads from device + yTensor.memory().markDeviceModified(); + + // Validate results + CpuFpReferenceValidation validator(1e-5f, 1e-5f); + EXPECT_TRUE(validator.allClose(expectedOutput, yTensor)); + + // Cleanup + hipdnnDestroy(handle); + (void)hipStreamDestroy(stream); +} + +// Use string literals instead of TensorLayout::NCHW/NHWC/etc. to avoid static +// initialization order fiasco. TensorLayout is defined in a shared library with +// members that require runtime initialization. INSTANTIATE_TEST_SUITE_P +// creates static registrations, and if those run before the library's statics +// initialize, we'll get a segfault or similar. The solution would be to +// constexpr-ize TensorLayout::NCHW/NHWC. +INSTANTIATE_TEST_SUITE_P(NCHW, ConvFpropParameterizedTest, + ::testing::ValuesIn(getConvTestCases("NCHW"))); + +INSTANTIATE_TEST_SUITE_P(NHWC, ConvFpropParameterizedTest, + ::testing::ValuesIn(getConvTestCases("NHWC"))); diff --git a/dnn-providers/fusilli-provider/test/integration/convolution/simple_conv_fprop_parameterized_stream_device.cpp b/dnn-providers/fusilli-provider/test/integration/convolution/simple_conv_fprop_parameterized_stream_device.cpp new file mode 100644 index 00000000000..0fd66da1f19 --- /dev/null +++ b/dnn-providers/fusilli-provider/test/integration/convolution/simple_conv_fprop_parameterized_stream_device.cpp @@ -0,0 +1,218 @@ +// Copyright 2025 Advanced Micro Devices, Inc. +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// ---------------------------------------------------------------------- +// Test for simple NCHW KCRS convfprop with parameterization for +// implicit/explicitly set stream, and device. +// +// | Device | Stream | Test Name | +// |--------|--------|---------------------------| +// | 0 | No | Device0_WithoutStream | +// | 0 | Yes | Device0_WithStream | +// | N-1* | No | Device{N-1}_WithoutStream | +// | N-1* | Yes | Device{N-1}_WithStream | +// +// *Device N-1 tests only run if deviceCount > 1 +// ---------------------------------------------------------------------- + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +using namespace hipdnn_frontend; +using namespace hipdnn_data_sdk::utilities; +using namespace hipdnn_test_sdk::utilities; + +struct TestParams { + bool shouldSetStream; + hipDevice_t deviceId; + + // GTest uses the << operator to update the parameterized test name. + friend std::ostream &operator<<(std::ostream &ss, const TestParams &p) { + ss << "Device" << p.deviceId << "_"; + ss << (p.shouldSetStream ? "WithStream" : "WithoutStream"); + return ss; + } +}; + +class ConvFpropIntegrationTest : public ::testing::TestWithParam {}; + +TEST_P(ConvFpropIntegrationTest, Basic1x1Convolution) { + TestParams params = GetParam(); + // Uncomment to enable debug logging + // setenv("HIPDNN_LOG_LEVEL", "info", 1); + + // Initialize HIP. + ASSERT_EQ(hipInit(0), hipSuccess); + + // Set device. + ASSERT_EQ(hipSetDevice(params.deviceId), hipSuccess); + + // Create stream. + hipStream_t stream = nullptr; + if (params.shouldSetStream) { + ASSERT_EQ(hipStreamCreate(&stream), hipSuccess); + } + + // Set plugin paths. + // + // FUSILLI_PLUGIN_PATH is a relative from to executable directory where this + // test lives e.g. "../lib/hipdnn_plugins/engines/fusilli_plugin". It needs to + // be relative as the tests will be installed (and therefore located) in some + // build configurations (`TheRock` for example). + auto pluginPath = std::filesystem::canonical(getCurrentExecutableDirectory() / + FUSILLI_PLUGIN_PATH); + const std::array paths = {pluginPath.c_str()}; + ASSERT_EQ(hipdnnSetEnginePluginPaths_ext(paths.size(), paths.data(), + HIPDNN_PLUGIN_LOADING_ABSOLUTE), + HIPDNN_STATUS_SUCCESS); + + // Create handle. + hipdnnHandle_t handle; + ASSERT_EQ(hipdnnCreate(&handle), HIPDNN_STATUS_SUCCESS); + + // Check that loading the plugin didn't change the active device. + hipDevice_t deviceId = -1; + ASSERT_EQ(hipGetDevice(&deviceId), hipSuccess); + ASSERT_EQ(deviceId, params.deviceId); + + if (params.shouldSetStream) { + ASSERT_EQ(hipdnnSetStream(handle, stream), HIPDNN_STATUS_SUCCESS); + } + + // Dimensions. + const int64_t n = 16; // batch + const int64_t c = 128; // in channels + const int64_t h = 64; // image height + const int64_t w = 64; // image width + const int64_t k = 256; // out channels + const int64_t r = 1; // filter height + const int64_t s = 1; // filter width + + // UIDs. + const int64_t xUID = 0; + const int64_t wUID = 1; + const int64_t yUID = 2; + + // Initialize tensors. + PinnedTensor xTensor({n, c, h, w}); + PinnedTensor wTensor({k, c, r, s}); + PinnedTensor yTensor({n, k, h, w}); + xTensor.fillWithValue(1.0f); + wTensor.fillWithValue(1.0f); + yTensor.fillWithValue(-100.0f); + + // Expected output. + PinnedTensor expectedOutput({n, k, h, w}); + expectedOutput.fillWithValue(128.0f); + + // Create graph. + auto graph = std::make_shared(); + graph->set_name("conv_1x1_test"); + graph->set_io_data_type(DataType_t::FLOAT) + .set_compute_data_type(DataType_t::FLOAT); + + // Create tensor attributes. + auto xAttr = std::make_shared( + graph::makeTensorAttributes("input", DataType_t::FLOAT, xTensor)); + xAttr->set_uid(xUID); + auto wAttr = std::make_shared( + graph::makeTensorAttributes("filter", DataType_t::FLOAT, wTensor)); + wAttr->set_uid(wUID); + + // Create convolution attributes. + graph::ConvFpropAttributes convAttr; + convAttr.set_name("conv_fprop") + .set_padding({0, 0}) + .set_stride({1, 1}) + .set_dilation({1, 1}); + + // Create graph. + auto yAttr = graph->conv_fprop(xAttr, wAttr, convAttr); + yAttr->set_uid(yUID); + yAttr->set_dim(yTensor.dims()).set_stride(yTensor.strides()).set_output(true); + + // Build + validate + build plans for graph. + auto result = graph->validate(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->build_operation_graph(handle); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->create_execution_plans(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->check_support(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->build_plans(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + // Create variant pack. + std::unordered_map variantPack; + variantPack[xUID] = xTensor.memory().deviceData(); + variantPack[wUID] = wTensor.memory().deviceData(); + variantPack[yUID] = yTensor.memory().deviceData(); + + // Execute graph. + result = graph->execute(handle, variantPack, nullptr); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + // Mark hipDNN tensor CPU cache ask stale, data must be read from device. + yTensor.memory().markDeviceModified(); + + // Check results. + CpuFpReferenceValidation validator(1e-6f, 1e-6f); + EXPECT_TRUE(validator.allClose(expectedOutput, yTensor)); + + // Clean up. + if (params.shouldSetStream) { + ASSERT_EQ(hipStreamDestroy(stream), HIPDNN_STATUS_SUCCESS); + } + ASSERT_EQ(hipdnnDestroy(handle), HIPDNN_STATUS_SUCCESS); +} + +static std::vector generateTestParams() { + std::vector params; + int deviceCount = -1; + assert(hipGetDeviceCount(&deviceCount) == hipSuccess); + + // Always test with device 0. + params.push_back({ + .shouldSetStream = false, + .deviceId = 0, + }); + params.push_back({ + .shouldSetStream = true, + .deviceId = 0, + }); + + // Test on last device if multiple devices are available. + if (deviceCount > 1) { + params.push_back({ + .shouldSetStream = false, + .deviceId = deviceCount - 1, + }); + params.push_back({ + .shouldSetStream = true, + .deviceId = deviceCount - 1, + }); + } + + return params; +} + +INSTANTIATE_TEST_SUITE_P(, ConvFpropIntegrationTest, + ::testing::ValuesIn(generateTestParams())); diff --git a/dnn-providers/fusilli-provider/test/integration/convolution/simple_conv_fprop_with_relu_and_bias.cpp b/dnn-providers/fusilli-provider/test/integration/convolution/simple_conv_fprop_with_relu_and_bias.cpp new file mode 100644 index 00000000000..2a5920a0287 --- /dev/null +++ b/dnn-providers/fusilli-provider/test/integration/convolution/simple_conv_fprop_with_relu_and_bias.cpp @@ -0,0 +1,282 @@ +// Copyright 2025 Advanced Micro Devices, Inc. +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +using namespace hipdnn_frontend; +using namespace hipdnn_data_sdk::utilities; +using namespace hipdnn_test_sdk::utilities; + +// Test: Convolution 1x1 fused with ReLU activation +// Pattern: conv_fprop -> pointwise(RELU_FWD) +TEST(ConvFpropFusedIntegrationTest, ConvolutionWithRelu) { + // Initialize HIP. + ASSERT_EQ(hipInit(0), hipSuccess); + ASSERT_EQ(hipSetDevice(0), hipSuccess); + + hipStream_t stream = nullptr; + ASSERT_EQ(hipStreamCreate(&stream), hipSuccess); + + // Set plugin paths. + auto pluginPath = std::filesystem::canonical(getCurrentExecutableDirectory() / + FUSILLI_PLUGIN_PATH); + const std::array paths = {pluginPath.c_str()}; + ASSERT_EQ(hipdnnSetEnginePluginPaths_ext(paths.size(), paths.data(), + HIPDNN_PLUGIN_LOADING_ABSOLUTE), + HIPDNN_STATUS_SUCCESS); + + // Create handle. + hipdnnHandle_t handle; + ASSERT_EQ(hipdnnCreate(&handle), HIPDNN_STATUS_SUCCESS); + ASSERT_EQ(hipdnnSetStream(handle, stream), HIPDNN_STATUS_SUCCESS); + + // Dimensions. + const int64_t n = 16; // batch + const int64_t c = 128; // in channels + const int64_t h = 64; // image height + const int64_t w = 64; // image width + const int64_t k = 256; // out channels + const int64_t r = 1; // filter height + const int64_t s = 1; // filter width + + // UIDs. + const int64_t xUID = 0; + const int64_t wUID = 1; + const int64_t yUID = 2; + + // Initialize tensors. + PinnedTensor xTensor({n, c, h, w}); + PinnedTensor wTensor({k, c, r, s}); + PinnedTensor yTensor({n, k, h, w}); + xTensor.fillWithValue(1.0f); + wTensor.fillWithValue(1.0f); + yTensor.fillWithValue(-100.0f); + + // Expected output: conv produces 128.0, ReLU passes through (128 > 0). + PinnedTensor expectedOutput({n, k, h, w}); + expectedOutput.fillWithValue(128.0f); + + // Create graph. + auto graph = std::make_shared(); + graph->set_name("conv_1x1_relu_test"); + graph->set_io_data_type(DataType_t::FLOAT) + .set_intermediate_data_type(DataType_t::FLOAT) + .set_compute_data_type(DataType_t::FLOAT); + + // Create tensor attributes. + auto xAttr = std::make_shared( + graph::makeTensorAttributes("input", DataType_t::FLOAT, xTensor)); + xAttr->set_uid(xUID); + auto wAttr = std::make_shared( + graph::makeTensorAttributes("filter", DataType_t::FLOAT, wTensor)); + wAttr->set_uid(wUID); + + // Create convolution attributes. + graph::ConvFpropAttributes convAttr; + convAttr.set_name("conv_fprop") + .set_padding({0, 0}) + .set_stride({1, 1}) + .set_dilation({1, 1}); + + // Create conv node - mark as non-output (intermediate). + auto convOutAttr = graph->conv_fprop(xAttr, wAttr, convAttr); + convOutAttr->set_dim(yTensor.dims()) + .set_stride(yTensor.strides()) + .set_output(false); + + // Create pointwise ReLU activation. + graph::PointwiseAttributes reluAttr; + reluAttr.set_name("relu").set_mode(PointwiseMode_t::RELU_FWD); + + auto yAttr = graph->pointwise(convOutAttr, reluAttr); + yAttr->set_uid(yUID); + yAttr->set_dim(yTensor.dims()).set_stride(yTensor.strides()).set_output(true); + + // Build + validate + build plans for graph. + auto result = graph->validate(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->build_operation_graph(handle); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->create_execution_plans(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->check_support(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->build_plans(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + // Create variant pack. + std::unordered_map variantPack; + variantPack[xUID] = xTensor.memory().deviceData(); + variantPack[wUID] = wTensor.memory().deviceData(); + variantPack[yUID] = yTensor.memory().deviceData(); + + // Execute graph. + result = graph->execute(handle, variantPack, nullptr); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + yTensor.memory().markDeviceModified(); + + // Check results. + CpuFpReferenceValidation validator(1e-6f, 1e-6f); + EXPECT_TRUE(validator.allClose(expectedOutput, yTensor)); + + // Clean up. + ASSERT_EQ(hipStreamDestroy(stream), HIPDNN_STATUS_SUCCESS); + ASSERT_EQ(hipdnnDestroy(handle), HIPDNN_STATUS_SUCCESS); +} + +// Test: Convolution 1x1 fused with Bias (ADD) and ReLU activation +// Pattern: conv_fprop -> pointwise(ADD) -> pointwise(RELU_FWD) +TEST(ConvFpropFusedIntegrationTest, ConvolutionWithBiasAndRelu) { + // Initialize HIP. + ASSERT_EQ(hipInit(0), hipSuccess); + ASSERT_EQ(hipSetDevice(0), hipSuccess); + + hipStream_t stream = nullptr; + ASSERT_EQ(hipStreamCreate(&stream), hipSuccess); + + // Set plugin paths. + auto pluginPath = std::filesystem::canonical(getCurrentExecutableDirectory() / + FUSILLI_PLUGIN_PATH); + const std::array paths = {pluginPath.c_str()}; + ASSERT_EQ(hipdnnSetEnginePluginPaths_ext(paths.size(), paths.data(), + HIPDNN_PLUGIN_LOADING_ABSOLUTE), + HIPDNN_STATUS_SUCCESS); + + // Create handle. + hipdnnHandle_t handle; + ASSERT_EQ(hipdnnCreate(&handle), HIPDNN_STATUS_SUCCESS); + ASSERT_EQ(hipdnnSetStream(handle, stream), HIPDNN_STATUS_SUCCESS); + + // Dimensions. + const int64_t n = 16; // batch + const int64_t c = 128; // in channels + const int64_t h = 64; // image height + const int64_t w = 64; // image width + const int64_t k = 256; // out channels + const int64_t r = 1; // filter height + const int64_t s = 1; // filter width + + // UIDs. + const int64_t xUID = 0; + const int64_t wUID = 1; + const int64_t biasUID = 2; + const int64_t yUID = 3; + + // Initialize tensors. + PinnedTensor xTensor({n, c, h, w}); + PinnedTensor wTensor({k, c, r, s}); + // Bias is per-channel: shape (1, k, 1, 1). + PinnedTensor biasTensor({1, k, 1, 1}); + PinnedTensor yTensor({n, k, h, w}); + xTensor.fillWithValue(1.0f); + wTensor.fillWithValue(1.0f); + biasTensor.fillWithValue(2.0f); + yTensor.fillWithValue(-100.0f); + + // Expected output: conv produces 128.0, + bias 2.0 = 130.0, ReLU passes. + PinnedTensor expectedOutput({n, k, h, w}); + expectedOutput.fillWithValue(130.0f); + + // Create graph. + auto graph = std::make_shared(); + graph->set_name("conv_1x1_bias_relu_test"); + graph->set_io_data_type(DataType_t::FLOAT) + .set_intermediate_data_type(DataType_t::FLOAT) + .set_compute_data_type(DataType_t::FLOAT); + + // Create tensor attributes. + auto xAttr = std::make_shared( + graph::makeTensorAttributes("input", DataType_t::FLOAT, xTensor)); + xAttr->set_uid(xUID); + auto wAttr = std::make_shared( + graph::makeTensorAttributes("filter", DataType_t::FLOAT, wTensor)); + wAttr->set_uid(wUID); + auto biasAttr = std::make_shared( + graph::makeTensorAttributes("bias", DataType_t::FLOAT, biasTensor)); + biasAttr->set_uid(biasUID); + + // Create convolution attributes. + graph::ConvFpropAttributes convAttr; + convAttr.set_name("conv_fprop") + .set_padding({0, 0}) + .set_stride({1, 1}) + .set_dilation({1, 1}); + + // Create conv node - mark as non-output (intermediate). + auto convOutAttr = graph->conv_fprop(xAttr, wAttr, convAttr); + convOutAttr->set_dim(yTensor.dims()) + .set_stride(yTensor.strides()) + .set_output(false); + + // Create pointwise ADD for bias. + graph::PointwiseAttributes biasAddAttr; + biasAddAttr.set_name("bias_add").set_mode(PointwiseMode_t::ADD); + + auto biasOutAttr = graph->pointwise(convOutAttr, biasAttr, biasAddAttr); + biasOutAttr->set_dim(yTensor.dims()) + .set_stride(yTensor.strides()) + .set_output(false); + + // Create pointwise ReLU activation. + graph::PointwiseAttributes reluAttr; + reluAttr.set_name("relu").set_mode(PointwiseMode_t::RELU_FWD); + + auto yAttr = graph->pointwise(biasOutAttr, reluAttr); + yAttr->set_uid(yUID); + yAttr->set_dim(yTensor.dims()).set_stride(yTensor.strides()).set_output(true); + + // Build + validate + build plans for graph. + auto result = graph->validate(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->build_operation_graph(handle); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->create_execution_plans(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->check_support(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->build_plans(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + // Create variant pack. + std::unordered_map variantPack; + variantPack[xUID] = xTensor.memory().deviceData(); + variantPack[wUID] = wTensor.memory().deviceData(); + variantPack[biasUID] = biasTensor.memory().deviceData(); + variantPack[yUID] = yTensor.memory().deviceData(); + + // Execute graph. + result = graph->execute(handle, variantPack, nullptr); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + yTensor.memory().markDeviceModified(); + + // Check results. + CpuFpReferenceValidation validator(1e-6f, 1e-6f); + EXPECT_TRUE(validator.allClose(expectedOutput, yTensor)); + + // Clean up. + ASSERT_EQ(hipStreamDestroy(stream), HIPDNN_STATUS_SUCCESS); + ASSERT_EQ(hipdnnDestroy(handle), HIPDNN_STATUS_SUCCESS); +} diff --git a/dnn-providers/fusilli-provider/test/integration/convolution/simple_conv_wgrad.cpp b/dnn-providers/fusilli-provider/test/integration/convolution/simple_conv_wgrad.cpp new file mode 100644 index 00000000000..641c67a7d67 --- /dev/null +++ b/dnn-providers/fusilli-provider/test/integration/convolution/simple_conv_wgrad.cpp @@ -0,0 +1,152 @@ +// Copyright 2026 Advanced Micro Devices, Inc. +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +using namespace hipdnn_frontend; +using namespace hipdnn_data_sdk::utilities; +using namespace hipdnn_test_sdk::utilities; + +// Test: ConvWGrad 1x1. +TEST(ConvWgradIntegrationTest, ConvWgrad) { + // Initialize HIP. + ASSERT_EQ(hipInit(0), hipSuccess); + ASSERT_EQ(hipSetDevice(0), hipSuccess); + + hipStream_t stream = nullptr; + ASSERT_EQ(hipStreamCreate(&stream), hipSuccess); + + // Set plugin path. + auto pluginPath = std::filesystem::canonical(getCurrentExecutableDirectory() / + FUSILLI_PLUGIN_PATH); + const std::array paths = {pluginPath.c_str()}; + ASSERT_EQ(hipdnnSetEnginePluginPaths_ext(paths.size(), paths.data(), + HIPDNN_PLUGIN_LOADING_ABSOLUTE), + HIPDNN_STATUS_SUCCESS); + + // Create handle. + hipdnnHandle_t handle; + ASSERT_EQ(hipdnnCreate(&handle), HIPDNN_STATUS_SUCCESS); + ASSERT_EQ(hipdnnSetStream(handle, stream), HIPDNN_STATUS_SUCCESS); + + const int64_t n = 16; // batch + const int64_t c = 128; // input channels + const int64_t h = 64; // spatial height + const int64_t w = 32; // spatial width + const int64_t k = 256; // output channels (gradient channels) + const int64_t r = 1; // filter height + const int64_t s = 1; // filter width + + // UIDs. + const int64_t dyUID = 0; + const int64_t xUID = 1; + const int64_t dwUID = 2; + + // Initialize tensors. + PinnedTensor dyTensor({n, k, h, w}, {k * h * w, 1, k * w, k}); + PinnedTensor xTensor({n, c, h, w}, {c * h * w, 1, c * w, c}); + PinnedTensor dwTensor({k, c, r, s}); + dyTensor.fillWithValue(1.0f); + xTensor.fillWithValue(1.0f); + dwTensor.fillWithValue(-100.0f); + + // Create graph. + auto graph = std::make_shared(); + graph->set_name("workspace_conv_wgrad_test"); + graph->set_io_data_type(DataType_t::FLOAT) + .set_intermediate_data_type(DataType_t::FLOAT) + .set_compute_data_type(DataType_t::FLOAT); + + // Create tensor attributes with NHWC strides. + auto dyAttr = std::make_shared( + graph::makeTensorAttributes("dy", DataType_t::FLOAT, dyTensor)); + dyAttr->set_uid(dyUID); + auto xAttr = std::make_shared( + graph::makeTensorAttributes("x", DataType_t::FLOAT, xTensor)); + xAttr->set_uid(xUID); + + // Create conv wgrad attributes. + graph::ConvWgradAttributes convWgradAttr; + convWgradAttr.set_name("conv_wgrad") + .set_padding({0, 0}) + .set_stride({1, 1}) + .set_dilation({1, 1}); + + // Create conv wgrad node. + auto dwAttr = graph->conv_wgrad(dyAttr, xAttr, convWgradAttr); + dwAttr->set_uid(dwUID); + dwAttr->set_dim(dwTensor.dims()) + .set_stride(dwTensor.strides()) + .set_output(true); + + // Build + validate + build plans for graph. + auto result = graph->validate(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->build_operation_graph(handle); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->create_execution_plans(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->check_support(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->build_plans(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + // Query workspace size. + int64_t workspaceSize = 0; + result = graph->get_workspace_size(workspaceSize); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + // Changes in IREE may invalidate this assertion in the future, at the time of + // this commit this is the only test that validates workspaces sizes. If you + // change this please ensure there's another test that uses a non-zero + // workspace. + ASSERT_GT(workspaceSize, 0) + << "Conv wgrad should require non-zero workspace for multi-dispatch " + "execution"; + + // Allocate workspace. + void *workspace = nullptr; + ASSERT_EQ(hipMalloc(&workspace, static_cast(workspaceSize)), + hipSuccess); + + // Create variant pack. + std::unordered_map variantPack; + variantPack[dyUID] = dyTensor.memory().deviceData(); + variantPack[xUID] = xTensor.memory().deviceData(); + variantPack[dwUID] = dwTensor.memory().deviceData(); + + // Execute graph with workspace. + result = graph->execute(handle, variantPack, workspace); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + dwTensor.memory().markDeviceModified(); + + // Validate results. For 1x1 conv wgrad with all-ones inputs, stride=1, + // no padding: dw[k,c,0,0] = sum_{n,h,w} dy * x = n * h * w. + const float expected = static_cast(n * h * w); + auto *dwData = dwTensor.memory().hostData(); + for (size_t i = 0; i < static_cast(k * c * r * s); ++i) { + EXPECT_FLOAT_EQ(dwData[i], expected) << "mismatch at index " << i; + } + + // Clean up. + ASSERT_EQ(hipFree(workspace), hipSuccess); + ASSERT_EQ(hipStreamDestroy(stream), HIPDNN_STATUS_SUCCESS); + ASSERT_EQ(hipdnnDestroy(handle), HIPDNN_STATUS_SUCCESS); +} diff --git a/dnn-providers/fusilli-provider/test/integration/matmul/batched_matmul_parameterized.cpp b/dnn-providers/fusilli-provider/test/integration/matmul/batched_matmul_parameterized.cpp new file mode 100644 index 00000000000..d9d96a35039 --- /dev/null +++ b/dnn-providers/fusilli-provider/test/integration/matmul/batched_matmul_parameterized.cpp @@ -0,0 +1,255 @@ +// Copyright 2025 Advanced Micro Devices, Inc. +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +using namespace hipdnn_frontend; +using namespace hipdnn_data_sdk::utilities; +using namespace hipdnn_test_sdk::utilities; + +namespace { + +// Batched matmul: A[B,M,K] x B[B,K,N] = C[B,M,N] +constexpr int64_t B = 2; +constexpr int64_t M = 4; +constexpr int64_t K = 8; +constexpr int64_t N = 5; + +// Bias modes for batched matmul with output shape {B, M, N}. +enum class BiasMode { + NoBias, + BiasFullBroadcast, // Bias shape {1, 1, N} - broadcasts across B batches and M + // rows + BiasRowBroadcast, // Bias shape {B, 1, N} - different per batch, broadcasts + // across M rows + BiasNoBroadcast // Bias shape {B, M, N} - no broadcasting +}; + +struct BatchedMatmulTestCase { + // When true, B tensor is stored physically transposed (though we still + // describe it to the API as a row-major tensor). B will have logical dims + // {B, K, N} (row-major), but stride {N*K, 1, K} (column major). + bool transposeB; + + BiasMode biasMode; + + friend std::ostream &operator<<(std::ostream &os, + const BatchedMatmulTestCase &tc) { + os << "B" << B << "M" << M << "K" << K << "N" << N; + if (tc.transposeB) { + os << "_TransposeB"; + } + switch (tc.biasMode) { + case BiasMode::BiasFullBroadcast: + os << "_BiasFullBroadcast"; + break; + case BiasMode::BiasRowBroadcast: + os << "_BiasRowBroadcast"; + break; + case BiasMode::BiasNoBroadcast: + os << "_BiasNoBroad"; + break; + default: + break; + } + return os; + } +}; + +std::vector getBatchedMatmulTestCases() { + return { + // transposeB=false + {false, BiasMode::NoBias}, + {false, BiasMode::BiasFullBroadcast}, + {false, BiasMode::BiasRowBroadcast}, + {false, BiasMode::BiasNoBroadcast}, + // transposeB=true + {true, BiasMode::NoBias}, + {true, BiasMode::BiasFullBroadcast}, + {true, BiasMode::BiasRowBroadcast}, + {true, BiasMode::BiasNoBroadcast}, + }; +} + +} // namespace + +class BatchedMatmulParameterizedTest + : public ::testing::TestWithParam {}; + +TEST_P(BatchedMatmulParameterizedTest, Correctness) { + const BatchedMatmulTestCase &tc = GetParam(); + + // Initialize HIP. + ASSERT_EQ(hipInit(0), hipSuccess); + ASSERT_EQ(hipSetDevice(0), hipSuccess); + + hipStream_t stream = nullptr; + ASSERT_EQ(hipStreamCreate(&stream), hipSuccess); + + // Set plugin paths. + auto pluginPath = std::filesystem::canonical(getCurrentExecutableDirectory() / + FUSILLI_PLUGIN_PATH); + const std::array paths = {pluginPath.c_str()}; + ASSERT_EQ(hipdnnSetEnginePluginPaths_ext(paths.size(), paths.data(), + HIPDNN_PLUGIN_LOADING_ABSOLUTE), + HIPDNN_STATUS_SUCCESS); + + // Create handle. + hipdnnHandle_t handle; + ASSERT_EQ(hipdnnCreate(&handle), HIPDNN_STATUS_SUCCESS); + ASSERT_EQ(hipdnnSetStream(handle, stream), HIPDNN_STATUS_SUCCESS); + + // UIDs. + const int64_t aUID = 0; + const int64_t bUID = 1; + const int64_t biasUID = 2; + const int64_t outUID = 3; + + // Initialize tensors. + PinnedTensor aTensor({B, M, K}); + // B tensor - logical dims are {B, K, N}, stride encodes the physical layout + // {K*N, N, 1} - (row major) when in standard layout + // {K*N, 1, K} - (column major) when transposed. + PinnedTensor bTensor(/*dims=*/{B, K, N}, + /*strides=*/tc.transposeB + ? std::vector{K * N, 1, K} + : std::vector{K * N, N, 1}); + PinnedTensor outTensor({B, M, N}); + + aTensor.fillWithValue(1.0f); + bTensor.fillWithValue(1.0f); + outTensor.fillWithValue(-100.0f); + + // Bias tensor + std::optional> biasTensor; + if (tc.biasMode != BiasMode::NoBias) { + std::vector biasDims; + switch (tc.biasMode) { + case BiasMode::BiasFullBroadcast: + biasDims = {1, 1, N}; + break; + case BiasMode::BiasRowBroadcast: + biasDims = {B, 1, N}; + break; + case BiasMode::BiasNoBroadcast: + biasDims = {B, M, N}; + break; + default: + break; + } + biasTensor.emplace(std::move(biasDims)); + biasTensor->fillWithValue(2.0f); + } + + // Expected output: each element = K (dot product of K ones) + bias if + // present. + PinnedTensor expectedOutput({B, M, N}); + float expectedValue = static_cast(K); + if (tc.biasMode != BiasMode::NoBias) { + expectedValue += 2.0f; + } + expectedOutput.fillWithValue(expectedValue); + + // Create graph. + auto graph = std::make_shared(); + graph->set_name("batched_matmul_parameterized_test"); + graph->set_io_data_type(DataType_t::FLOAT) + .set_intermediate_data_type(DataType_t::FLOAT) + .set_compute_data_type(DataType_t::FLOAT); + + // Input tensors. + auto aAttr = std::make_shared( + graph::makeTensorAttributes("A", DataType_t::FLOAT, aTensor)); + aAttr->set_uid(aUID); + auto bAttr = std::make_shared( + graph::makeTensorAttributes("B", DataType_t::FLOAT, bTensor)); + bAttr->set_uid(bUID); + + // Create matmul node. + graph::MatmulAttributes matmulAttr; + matmulAttr.set_name("batched_matmul"); + auto matmul = graph->matmul(aAttr, bAttr, matmulAttr); + matmul->set_dim(outTensor.dims()).set_stride(outTensor.strides()); + + // Maybe add bias. + if (tc.biasMode != BiasMode::NoBias) { + // Matmul output is intermediate, bias is final. + matmul->set_output(false); + + // Create bias tensor attributes. + auto biasAttr = std::make_shared( + graph::makeTensorAttributes("bias", DataType_t::FLOAT, *biasTensor)); + biasAttr->set_uid(biasUID); + + // Add bias. + graph::PointwiseAttributes biasAddAttr; + biasAddAttr.set_name("bias_add").set_mode(PointwiseMode_t::ADD); + auto biasAdd = graph->pointwise(matmul, biasAttr, biasAddAttr); + biasAdd->set_uid(outUID) + .set_dim(outTensor.dims()) + .set_stride(outTensor.strides()) + .set_output(true); + } else { + // No bias: matmul output is the final output. + matmul->set_uid(outUID).set_output(true); + } + + // Build + validate + build plans for graph. + auto result = graph->validate(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->build_operation_graph(handle); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->create_execution_plans(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->check_support(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->build_plans(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + // Create variant pack. + std::unordered_map variantPack; + variantPack[aUID] = aTensor.memory().deviceData(); + variantPack[bUID] = bTensor.memory().deviceData(); + variantPack[outUID] = outTensor.memory().deviceData(); + if (tc.biasMode != BiasMode::NoBias) { + variantPack[biasUID] = biasTensor->memory().deviceData(); + } + + // Execute graph. + result = graph->execute(handle, variantPack, nullptr); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + outTensor.memory().markDeviceModified(); + + // Check results. + CpuFpReferenceValidation validator(1e-6f, 1e-6f); + EXPECT_TRUE(validator.allClose(expectedOutput, outTensor)); + + // Clean up. + ASSERT_EQ(hipStreamDestroy(stream), HIPDNN_STATUS_SUCCESS); + ASSERT_EQ(hipdnnDestroy(handle), HIPDNN_STATUS_SUCCESS); +} + +INSTANTIATE_TEST_SUITE_P(BatchedMatmul, BatchedMatmulParameterizedTest, + ::testing::ValuesIn(getBatchedMatmulTestCases())); diff --git a/dnn-providers/fusilli-provider/test/integration/matmul/matmul_parameterized.cpp b/dnn-providers/fusilli-provider/test/integration/matmul/matmul_parameterized.cpp new file mode 100644 index 00000000000..9c9342875a0 --- /dev/null +++ b/dnn-providers/fusilli-provider/test/integration/matmul/matmul_parameterized.cpp @@ -0,0 +1,236 @@ +// Copyright 2025 Advanced Micro Devices, Inc. +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +using namespace hipdnn_frontend; +using namespace hipdnn_data_sdk::utilities; +using namespace hipdnn_test_sdk::utilities; + +namespace { + +// 2D matmul: A[M,K] x B[K,N] = C[M,N] +constexpr int64_t M = 4; +constexpr int64_t K = 8; +constexpr int64_t N = 5; + +// Bias modes +enum class BiasMode { + NoBias, // No bias + BiasFullBroadcast, // Bias shape {1, N} - broadcasts across M rows + BiasNoBroadcast // Bias shape {M, N} - no broadcasting +}; + +struct MatmulTestCase { + // When true, B tensor is stored physically transposed (though we still + // describe it to the API as a row-major tensor). B will have logical dims + // {K, N} (row-major), but stride {N*K, 1, K} (column major). + bool transposeB; + + BiasMode biasMode; + + friend std::ostream &operator<<(std::ostream &os, const MatmulTestCase &tc) { + os << "M" << M << "K" << K << "N" << N; + if (tc.transposeB) { + os << "_TransposeB"; + } + switch (tc.biasMode) { + case BiasMode::BiasFullBroadcast: + os << "_BiasBroadcast"; + break; + case BiasMode::BiasNoBroadcast: + os << "_BiasFull"; + break; + default: + break; + } + return os; + } +}; + +std::vector getMatmulTestCases() { + return { + // transposeB=false + {false, BiasMode::NoBias}, + {false, BiasMode::BiasFullBroadcast}, + {false, BiasMode::BiasNoBroadcast}, + // transposeB=true + {true, BiasMode::NoBias}, + {true, BiasMode::BiasFullBroadcast}, + {true, BiasMode::BiasNoBroadcast}, + }; +} + +} // namespace + +class MatmulParameterizedTest + : public ::testing::TestWithParam {}; + +TEST_P(MatmulParameterizedTest, Correctness) { + const MatmulTestCase &tc = GetParam(); + + // Initialize HIP. + ASSERT_EQ(hipInit(0), hipSuccess); + ASSERT_EQ(hipSetDevice(0), hipSuccess); + + hipStream_t stream = nullptr; + ASSERT_EQ(hipStreamCreate(&stream), hipSuccess); + + // Set plugin paths. + auto pluginPath = std::filesystem::canonical(getCurrentExecutableDirectory() / + FUSILLI_PLUGIN_PATH); + const std::array paths = {pluginPath.c_str()}; + ASSERT_EQ(hipdnnSetEnginePluginPaths_ext(paths.size(), paths.data(), + HIPDNN_PLUGIN_LOADING_ABSOLUTE), + HIPDNN_STATUS_SUCCESS); + + // Create handle. + hipdnnHandle_t handle; + ASSERT_EQ(hipdnnCreate(&handle), HIPDNN_STATUS_SUCCESS); + ASSERT_EQ(hipdnnSetStream(handle, stream), HIPDNN_STATUS_SUCCESS); + + // UIDs. + const int64_t aUID = 0; + const int64_t bUID = 1; + const int64_t biasUID = 2; + const int64_t outUID = 3; + + // Initialize tensors. + PinnedTensor aTensor({M, K}); + + // B tensor - logical dims are {K, N}, stride encodes the physical layout + // {N, 1} - (row major) when in standard layout. + // {1, K} - (column major) when transposed. + PinnedTensor bTensor(/*dims=*/{K, N}, + /*strides=*/tc.transposeB + ? std::vector{1, K} + : std::vector{N, 1}); + + PinnedTensor outTensor({M, N}); + + aTensor.fillWithValue(1.0f); + bTensor.fillWithValue(1.0f); + outTensor.fillWithValue(-100.0f); + + // Bias. + std::optional> biasTensor; + if (tc.biasMode != BiasMode::NoBias) { + std::vector biasDims = (tc.biasMode == BiasMode::BiasFullBroadcast) + ? std::vector{1, N} + : std::vector{M, N}; + biasTensor.emplace(std::move(biasDims)); + biasTensor->fillWithValue(2.0f); + } + + // Expected output: each element = K (dot product of K ones) + bias if + // present. + PinnedTensor expectedOutput({M, N}); + float expectedValue = static_cast(K); + if (tc.biasMode != BiasMode::NoBias) { + expectedValue += 2.0f; + } + expectedOutput.fillWithValue(expectedValue); + + // Create graph. + auto graph = std::make_shared(); + graph->set_name("matmul_parameterized_test"); + graph->set_io_data_type(DataType_t::FLOAT) + .set_intermediate_data_type(DataType_t::FLOAT) + .set_compute_data_type(DataType_t::FLOAT); + + // Input tensors. + auto aAttr = std::make_shared( + graph::makeTensorAttributes("A", DataType_t::FLOAT, aTensor)); + aAttr->set_uid(aUID); + auto bAttr = std::make_shared( + graph::makeTensorAttributes("B", DataType_t::FLOAT, bTensor)); + bAttr->set_uid(bUID); + + // Create matmul node. + graph::MatmulAttributes matmulAttr; + matmulAttr.set_name("matmul"); + auto matmul = graph->matmul(aAttr, bAttr, matmulAttr); + matmul->set_dim(outTensor.dims()).set_stride(outTensor.strides()); + + // Maybe add bias. + if (tc.biasMode != BiasMode::NoBias) { + // Matmul output is intermediate, bias is final. + matmul->set_output(false); + + // Create bias tensor attributes. + auto biasAttr = std::make_shared( + graph::makeTensorAttributes("bias", DataType_t::FLOAT, *biasTensor)); + biasAttr->set_uid(biasUID); + + // Add bias. + graph::PointwiseAttributes biasAddAttr; + biasAddAttr.set_name("bias_add").set_mode(PointwiseMode_t::ADD); + auto biasAdd = graph->pointwise(matmul, biasAttr, biasAddAttr); + biasAdd->set_uid(outUID) + .set_dim(outTensor.dims()) + .set_stride(outTensor.strides()) + .set_output(true); + } else { + // No bias: matmul output is the final output. + matmul->set_uid(outUID).set_output(true); + } + + // Build + validate + build plans for graph. + auto result = graph->validate(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->build_operation_graph(handle); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->create_execution_plans(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->check_support(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->build_plans(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + // Create variant pack. + std::unordered_map variantPack; + variantPack[aUID] = aTensor.memory().deviceData(); + variantPack[bUID] = bTensor.memory().deviceData(); + variantPack[outUID] = outTensor.memory().deviceData(); + if (tc.biasMode != BiasMode::NoBias) { + variantPack[biasUID] = biasTensor->memory().deviceData(); + } + + // Execute graph. + result = graph->execute(handle, variantPack, nullptr); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + outTensor.memory().markDeviceModified(); + + // Check results. + CpuFpReferenceValidation validator(1e-6f, 1e-6f); + EXPECT_TRUE(validator.allClose(expectedOutput, outTensor)); + + // Clean up. + ASSERT_EQ(hipStreamDestroy(stream), HIPDNN_STATUS_SUCCESS); + ASSERT_EQ(hipdnnDestroy(handle), HIPDNN_STATUS_SUCCESS); +} + +INSTANTIATE_TEST_SUITE_P(Matmul, MatmulParameterizedTest, + ::testing::ValuesIn(getMatmulTestCases())); diff --git a/dnn-providers/fusilli-provider/test/integration/matmul/simple_matmul.cpp b/dnn-providers/fusilli-provider/test/integration/matmul/simple_matmul.cpp new file mode 100644 index 00000000000..5d241beb38f --- /dev/null +++ b/dnn-providers/fusilli-provider/test/integration/matmul/simple_matmul.cpp @@ -0,0 +1,127 @@ +// Copyright 2025 Advanced Micro Devices, Inc. +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +using namespace hipdnn_frontend; +using namespace hipdnn_data_sdk::utilities; +using namespace hipdnn_test_sdk::utilities; + +// Test: Simple 2D matrix multiplication +// A[M,K] x B[K,N] = C[M,N] +TEST(MatmulIntegrationTest, SimpleMatmul) { + // Initialize HIP. + ASSERT_EQ(hipInit(0), hipSuccess); + ASSERT_EQ(hipSetDevice(0), hipSuccess); + + hipStream_t stream = nullptr; + ASSERT_EQ(hipStreamCreate(&stream), hipSuccess); + + // Set plugin paths. + auto pluginPath = std::filesystem::canonical(getCurrentExecutableDirectory() / + FUSILLI_PLUGIN_PATH); + const std::array paths = {pluginPath.c_str()}; + ASSERT_EQ(hipdnnSetEnginePluginPaths_ext(paths.size(), paths.data(), + HIPDNN_PLUGIN_LOADING_ABSOLUTE), + HIPDNN_STATUS_SUCCESS); + + // Create handle. + hipdnnHandle_t handle; + ASSERT_EQ(hipdnnCreate(&handle), HIPDNN_STATUS_SUCCESS); + ASSERT_EQ(hipdnnSetStream(handle, stream), HIPDNN_STATUS_SUCCESS); + + // Dimensions: A[4,8] x B[8,5] = C[4,5] + const int64_t M = 4; + const int64_t K = 8; + const int64_t N = 5; + + // UIDs. + const int64_t aUID = 0; + const int64_t bUID = 1; + const int64_t cUID = 2; + + // Initialize tensors. + PinnedTensor aTensor({M, K}); + PinnedTensor bTensor({K, N}); + PinnedTensor cTensor({M, N}); + aTensor.fillWithValue(1.0f); + bTensor.fillWithValue(1.0f); + cTensor.fillWithValue(-100.0f); + + // Expected output: each element of C = K (dot product of K ones). + PinnedTensor expectedOutput({M, N}); + expectedOutput.fillWithValue(static_cast(K)); + + // Create graph. + auto graph = std::make_shared(); + graph->set_name("simple_matmul_test"); + graph->set_io_data_type(DataType_t::FLOAT) + .set_intermediate_data_type(DataType_t::FLOAT) + .set_compute_data_type(DataType_t::FLOAT); + + // Create tensor attributes. + auto aAttr = std::make_shared( + graph::makeTensorAttributes("A", DataType_t::FLOAT, aTensor)); + aAttr->set_uid(aUID); + auto bAttr = std::make_shared( + graph::makeTensorAttributes("B", DataType_t::FLOAT, bTensor)); + bAttr->set_uid(bUID); + + // Create matmul attributes. + graph::MatmulAttributes matmulAttr; + matmulAttr.set_name("matmul"); + + // Create matmul node. + auto cAttr = graph->matmul(aAttr, bAttr, matmulAttr); + cAttr->set_uid(cUID); + cAttr->set_dim(cTensor.dims()).set_stride(cTensor.strides()).set_output(true); + + // Build + validate + build plans for graph. + auto result = graph->validate(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->build_operation_graph(handle); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->create_execution_plans(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->check_support(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->build_plans(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + // Create variant pack. + std::unordered_map variantPack; + variantPack[aUID] = aTensor.memory().deviceData(); + variantPack[bUID] = bTensor.memory().deviceData(); + variantPack[cUID] = cTensor.memory().deviceData(); + + // Execute graph. + result = graph->execute(handle, variantPack, nullptr); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + cTensor.memory().markDeviceModified(); + + // Check results. + CpuFpReferenceValidation validator(1e-6f, 1e-6f); + EXPECT_TRUE(validator.allClose(expectedOutput, cTensor)); + + // Clean up. + ASSERT_EQ(hipStreamDestroy(stream), HIPDNN_STATUS_SUCCESS); + ASSERT_EQ(hipdnnDestroy(handle), HIPDNN_STATUS_SUCCESS); +} diff --git a/dnn-providers/fusilli-provider/test/integration/matmul/simple_scaled_matmul_accumulate.cpp b/dnn-providers/fusilli-provider/test/integration/matmul/simple_scaled_matmul_accumulate.cpp new file mode 100644 index 00000000000..e8c49487542 --- /dev/null +++ b/dnn-providers/fusilli-provider/test/integration/matmul/simple_scaled_matmul_accumulate.cpp @@ -0,0 +1,190 @@ +// Copyright 2026 Advanced Micro Devices, Inc. +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +using namespace hipdnn_frontend; +using namespace hipdnn_data_sdk::utilities; +using namespace hipdnn_test_sdk::utilities; + +// Test: Scaled matrix multiplication with accumulation +// Computes: result = alpha * (A @ B) + beta * C +// +// Graph structure: +// matmul_out = matmul(A[M,K], B[K,N]) -> [M,N] +// scaled = pointwise_mul(matmul_out, alpha) -> alpha * matmul_out +// accum = pointwise_mul(C, beta) -> beta * C +// result = pointwise_add(scaled, accum) -> alpha*(A@B) + beta*C +// +// alpha and beta are pass-by-value scalars, NOT device buffers. +TEST(MatmulIntegrationTest, ScaledMatmulAccumulate) { + // Initialize HIP. + ASSERT_EQ(hipInit(0), hipSuccess); + ASSERT_EQ(hipSetDevice(0), hipSuccess); + + hipStream_t stream = nullptr; + ASSERT_EQ(hipStreamCreate(&stream), hipSuccess); + + // Set plugin paths. + auto pluginPath = std::filesystem::canonical(getCurrentExecutableDirectory() / + FUSILLI_PLUGIN_PATH); + const std::array paths = {pluginPath.c_str()}; + ASSERT_EQ(hipdnnSetEnginePluginPaths_ext(paths.size(), paths.data(), + HIPDNN_PLUGIN_LOADING_ABSOLUTE), + HIPDNN_STATUS_SUCCESS); + + // Create handle. + hipdnnHandle_t handle; + ASSERT_EQ(hipdnnCreate(&handle), HIPDNN_STATUS_SUCCESS); + ASSERT_EQ(hipdnnSetStream(handle, stream), HIPDNN_STATUS_SUCCESS); + + // Dimensions: A[4,8] x B[8,5] = matmul_out[4,5] + const int64_t M = 4; + const int64_t K = 8; + const int64_t N = 5; + + // Scalar values. + const float alphaVal = 2.0f; + const float betaVal = 3.0f; + + // UIDs for device-backed tensors. + const int64_t aUID = 0; + const int64_t bUID = 1; + const int64_t cUID = 2; + const int64_t resultUID = 3; + + // UIDs for scalars (pass-by-value, NOT in variant pack). + const int64_t alphaUID = 4; + const int64_t betaUID = 5; + + // Initialize device tensors. + PinnedTensor aTensor({M, K}); + PinnedTensor bTensor({K, N}); + PinnedTensor cTensor({M, N}); + PinnedTensor resultTensor({M, N}); + aTensor.fillWithValue(1.0f); + bTensor.fillWithValue(1.0f); + cTensor.fillWithValue(5.0f); + resultTensor.fillWithValue(-100.0f); + + // Expected: alpha * (A@B) + beta * C + // = 2.0 * (K * 1.0 * 1.0) + 3.0 * 5.0 + // = 2.0 * 8.0 + 15.0 + // = 31.0 + PinnedTensor expectedOutput({M, N}); + expectedOutput.fillWithValue(alphaVal * static_cast(K) + + betaVal * 5.0f); + + // Create graph. + auto graph = std::make_shared(); + graph->set_name("scaled_matmul_accumulate_test"); + graph->set_io_data_type(DataType_t::FLOAT) + .set_intermediate_data_type(DataType_t::FLOAT) + .set_compute_data_type(DataType_t::FLOAT); + + // Create device-backed tensor attributes. + auto aAttr = std::make_shared( + graph::makeTensorAttributes("A", DataType_t::FLOAT, aTensor)); + aAttr->set_uid(aUID); + + auto bAttr = std::make_shared( + graph::makeTensorAttributes("B", DataType_t::FLOAT, bTensor)); + bAttr->set_uid(bUID); + + auto cAttr = std::make_shared( + graph::makeTensorAttributes("C", DataType_t::FLOAT, cTensor)); + cAttr->set_uid(cUID); + + // Create scalar attributes (pass-by-value, NOT device buffers). + auto alphaAttr = std::make_shared(); + alphaAttr->set_name("alpha").set_value(alphaVal).set_uid(alphaUID); + + auto betaAttr = std::make_shared(); + betaAttr->set_name("beta").set_value(betaVal).set_uid(betaUID); + + // Build graph: result = alpha * (A @ B) + beta * C + + // matmul_out = A @ B + graph::MatmulAttributes matmulAttr; + matmulAttr.set_name("matmul"); + auto matmulOutAttr = graph->matmul(aAttr, bAttr, matmulAttr); + matmulOutAttr->set_dim(resultTensor.dims()) + .set_stride(resultTensor.strides()) + .set_output(false); + + // scaled = matmul_out * alpha + graph::PointwiseAttributes mulAlphaAttr; + mulAlphaAttr.set_name("mul_alpha").set_mode(PointwiseMode_t::MUL); + auto scaledAttr = graph->pointwise(matmulOutAttr, alphaAttr, mulAlphaAttr); + scaledAttr->set_dim(resultTensor.dims()) + .set_stride(resultTensor.strides()) + .set_output(false); + + // accum = C * beta + graph::PointwiseAttributes mulBetaAttr; + mulBetaAttr.set_name("mul_beta").set_mode(PointwiseMode_t::MUL); + auto accumAttr = graph->pointwise(cAttr, betaAttr, mulBetaAttr); + accumAttr->set_dim(resultTensor.dims()) + .set_stride(resultTensor.strides()) + .set_output(false); + + // result = scaled + accum + graph::PointwiseAttributes addAttr; + addAttr.set_name("add").set_mode(PointwiseMode_t::ADD); + auto resultAttr = graph->pointwise(scaledAttr, accumAttr, addAttr); + resultAttr->set_uid(resultUID); + resultAttr->set_dim(resultTensor.dims()) + .set_stride(resultTensor.strides()) + .set_output(true); + + // Build + validate + build plans for graph. + auto result = graph->validate(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->build_operation_graph(handle); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->create_execution_plans(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->check_support(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->build_plans(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + // Create variant pack for device-backed tensors, not scalars. + std::unordered_map variantPack; + variantPack[aUID] = aTensor.memory().deviceData(); + variantPack[bUID] = bTensor.memory().deviceData(); + variantPack[cUID] = cTensor.memory().deviceData(); + variantPack[resultUID] = resultTensor.memory().deviceData(); + + // Execute graph. + result = graph->execute(handle, variantPack, nullptr); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + resultTensor.memory().markDeviceModified(); + + // Check results. + CpuFpReferenceValidation validator(1e-6f, 1e-6f); + EXPECT_TRUE(validator.allClose(expectedOutput, resultTensor)); + + // Clean up. + ASSERT_EQ(hipStreamDestroy(stream), HIPDNN_STATUS_SUCCESS); + ASSERT_EQ(hipdnnDestroy(handle), HIPDNN_STATUS_SUCCESS); +} diff --git a/dnn-providers/fusilli-provider/test/integration/plugin_load.cpp b/dnn-providers/fusilli-provider/test/integration/plugin_load.cpp new file mode 100644 index 00000000000..4b28f7c42a0 --- /dev/null +++ b/dnn-providers/fusilli-provider/test/integration/plugin_load.cpp @@ -0,0 +1,88 @@ +// Copyright 2025 Advanced Micro Devices, Inc. +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include + +#include +#include +#include +#include + +using hipdnn_data_sdk::utilities::getCurrentExecutableDirectory; + +static std::vector getLoadedPlugins(hipdnnHandle_t handle) { + size_t numPlugins = 0; + size_t maxPathLength = 0; + auto status = hipdnnGetLoadedEnginePluginPaths_ext(handle, &numPlugins, + nullptr, &maxPathLength); + + if (status != HIPDNN_STATUS_SUCCESS) { + throw std::runtime_error("Failed to get loaded plugin paths"); + } + + if (numPlugins == 0) { + return {}; + } + + std::vector> pathBuffers(numPlugins, + std::vector(maxPathLength)); + std::vector pluginPathsC(numPlugins); + for (size_t i = 0; i < numPlugins; ++i) { + pluginPathsC[i] = pathBuffers[i].data(); + } + + status = hipdnnGetLoadedEnginePluginPaths_ext( + handle, &numPlugins, pluginPathsC.data(), &maxPathLength); + if (status != HIPDNN_STATUS_SUCCESS) { + throw std::runtime_error("Failed to get loaded plugin paths"); + } + + std::vector pluginPaths; + pluginPaths.reserve(numPlugins); + for (size_t i = 0; i < numPlugins; ++i) { + pluginPaths.emplace_back(pluginPathsC[i]); + } + return pluginPaths; +} + +TEST(IntegrationTests, PluginLoad) { + // Set plugin paths. + // + // FUSILLI_PLUGIN_PATH is a relative path from the executable directory where + // this test lives to the location of the plugin's .so e.g. + // "../lib/hipdnn_plugins/engines/fusilli_plugin". The tests will be + // installed, and therefore re-located, in some build configurations + // (`TheRock` for example) so we must use a relative path. + auto pluginPath = std::filesystem::canonical(getCurrentExecutableDirectory() / + FUSILLI_PLUGIN_PATH); + const std::array paths = {pluginPath.c_str()}; + hipdnnStatus_t status = hipdnnSetEnginePluginPaths_ext( + paths.size(), paths.data(), HIPDNN_PLUGIN_LOADING_ABSOLUTE); + EXPECT_EQ(status, HIPDNN_STATUS_SUCCESS); + + // Stand up enough of hipDNN to load plugins. + hipdnnHandle_t handle = nullptr; + status = hipdnnCreate(&handle); + ASSERT_EQ(status, HIPDNN_STATUS_SUCCESS); + ASSERT_NE(handle, nullptr); + + // If fusilli plugin fails to define a required method it will fail to load. + auto loadedPlugins = getLoadedPlugins(handle); + EXPECT_EQ(loadedPlugins.size(), 1); + + // Check that fusilli plugin did load. + auto expectedPath = + pluginPath / std::format("lib{}.so", FUSILLI_PLUGIN_TARGET); + EXPECT_TRUE(std::ranges::any_of( + loadedPlugins, [&expectedPath](const std::string &loadedPluginPath) { + return std::filesystem::canonical(loadedPluginPath) == + std::filesystem::canonical(expectedPath); + })); + + EXPECT_EQ(hipdnnDestroy(handle), HIPDNN_STATUS_SUCCESS); +} diff --git a/dnn-providers/fusilli-provider/test/integration/pointwise/simple_pointwise.cpp b/dnn-providers/fusilli-provider/test/integration/pointwise/simple_pointwise.cpp new file mode 100644 index 00000000000..e0cd3434431 --- /dev/null +++ b/dnn-providers/fusilli-provider/test/integration/pointwise/simple_pointwise.cpp @@ -0,0 +1,206 @@ +// Copyright 2026 Advanced Micro Devices, Inc. +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +using namespace hipdnn_frontend; +using namespace hipdnn_data_sdk::utilities; +using namespace hipdnn_test_sdk::utilities; + +class PointwiseIntegrationTest : public ::testing::Test { +protected: + void SetUp() override { + ASSERT_EQ(hipInit(0), hipSuccess); + ASSERT_EQ(hipSetDevice(0), hipSuccess); + ASSERT_EQ(hipStreamCreate(&stream), hipSuccess); + + auto pluginPath = std::filesystem::canonical( + getCurrentExecutableDirectory() / FUSILLI_PLUGIN_PATH); + const std::array paths = {pluginPath.c_str()}; + ASSERT_EQ(hipdnnSetEnginePluginPaths_ext(paths.size(), paths.data(), + HIPDNN_PLUGIN_LOADING_ABSOLUTE), + HIPDNN_STATUS_SUCCESS); + + ASSERT_EQ(hipdnnCreate(&handle), HIPDNN_STATUS_SUCCESS); + ASSERT_EQ(hipdnnSetStream(handle, stream), HIPDNN_STATUS_SUCCESS); + } + + void TearDown() override { + ASSERT_EQ(hipStreamDestroy(stream), HIPDNN_STATUS_SUCCESS); + ASSERT_EQ(hipdnnDestroy(handle), HIPDNN_STATUS_SUCCESS); + } + + hipStream_t stream = nullptr; + hipdnnHandle_t handle; +}; + +// Test: Standalone pointwise ReLU forward (unary) +// Graph: input[4,8] -> pointwise(RELU_FWD) -> output[4,8] +TEST_F(PointwiseIntegrationTest, SimpleReluFwd) { + // Dimensions. + const int64_t M = 4; + const int64_t N = 8; + + // UIDs. + const int64_t inputUID = 0; + const int64_t outputUID = 1; + + // Initialize tensors. + PinnedTensor inputTensor({M, N}); + PinnedTensor outputTensor({M, N}); + inputTensor.fillWithValue(-3.0f); + outputTensor.fillWithValue(-100.0f); + + // Expected output: ReLU clamps negatives to 0. + PinnedTensor expectedOutput({M, N}); + expectedOutput.fillWithValue(0.0f); + + // Create graph. + auto graph = std::make_shared(); + graph->set_name("simple_relu_fwd_test"); + graph->set_io_data_type(DataType_t::FLOAT) + .set_intermediate_data_type(DataType_t::FLOAT) + .set_compute_data_type(DataType_t::FLOAT); + + // Create tensor attributes. + auto inputAttr = std::make_shared( + graph::makeTensorAttributes("input", DataType_t::FLOAT, inputTensor)); + inputAttr->set_uid(inputUID); + + // Create pointwise ReLU attributes. + graph::PointwiseAttributes reluAttr; + reluAttr.set_name("relu").set_mode(PointwiseMode_t::RELU_FWD); + + // Create pointwise node (unary). + auto outputAttr = graph->pointwise(inputAttr, reluAttr); + outputAttr->set_uid(outputUID); + outputAttr->set_dim(outputTensor.dims()) + .set_stride(outputTensor.strides()) + .set_output(true); + + // Build + validate + build plans for graph. + auto result = graph->validate(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->build_operation_graph(handle); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->create_execution_plans(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->check_support(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->build_plans(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + // Create variant pack. + std::unordered_map variantPack; + variantPack[inputUID] = inputTensor.memory().deviceData(); + variantPack[outputUID] = outputTensor.memory().deviceData(); + + // Execute graph. + result = graph->execute(handle, variantPack, nullptr); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + outputTensor.memory().markDeviceModified(); + + // Check results. + CpuFpReferenceValidation validator(1e-6f, 1e-6f); + EXPECT_TRUE(validator.allClose(expectedOutput, outputTensor)); +} + +// Test: Standalone pointwise ADD (binary) +// Graph: in0[4,8] + in1[4,8] -> output[4,8] +TEST_F(PointwiseIntegrationTest, SimpleAdd) { + // Dimensions. + const int64_t M = 4; + const int64_t N = 8; + + // UIDs. + const int64_t in0UID = 0; + const int64_t in1UID = 1; + const int64_t outputUID = 2; + + // Initialize tensors. + PinnedTensor in0Tensor({M, N}); + PinnedTensor in1Tensor({M, N}); + PinnedTensor outputTensor({M, N}); + in0Tensor.fillWithValue(3.0f); + in1Tensor.fillWithValue(5.0f); + outputTensor.fillWithValue(-100.0f); + + // Expected output: 3.0 + 5.0 = 8.0. + PinnedTensor expectedOutput({M, N}); + expectedOutput.fillWithValue(8.0f); + + // Create graph. + auto graph = std::make_shared(); + graph->set_name("simple_add_test"); + graph->set_io_data_type(DataType_t::FLOAT) + .set_intermediate_data_type(DataType_t::FLOAT) + .set_compute_data_type(DataType_t::FLOAT); + + // Create tensor attributes. + auto in0Attr = std::make_shared( + graph::makeTensorAttributes("in0", DataType_t::FLOAT, in0Tensor)); + in0Attr->set_uid(in0UID); + auto in1Attr = std::make_shared( + graph::makeTensorAttributes("in1", DataType_t::FLOAT, in1Tensor)); + in1Attr->set_uid(in1UID); + + // Create pointwise ADD attributes. + graph::PointwiseAttributes addAttr; + addAttr.set_name("add").set_mode(PointwiseMode_t::ADD); + + // Create pointwise node (binary). + auto outputAttr = graph->pointwise(in0Attr, in1Attr, addAttr); + outputAttr->set_uid(outputUID); + outputAttr->set_dim(outputTensor.dims()) + .set_stride(outputTensor.strides()) + .set_output(true); + + // Build + validate + build plans for graph. + auto result = graph->validate(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->build_operation_graph(handle); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->create_execution_plans(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->check_support(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + result = graph->build_plans(); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + + // Create variant pack. + std::unordered_map variantPack; + variantPack[in0UID] = in0Tensor.memory().deviceData(); + variantPack[in1UID] = in1Tensor.memory().deviceData(); + variantPack[outputUID] = outputTensor.memory().deviceData(); + + // Execute graph. + result = graph->execute(handle, variantPack, nullptr); + ASSERT_EQ(result.code, error_code_t::OK) << result.err_msg; + outputTensor.memory().markDeviceModified(); + + // Check results. + CpuFpReferenceValidation validator(1e-6f, 1e-6f); + EXPECT_TRUE(validator.allClose(expectedOutput, outputTensor)); +} diff --git a/dnn-providers/fusilli-provider/test/test_fusilli_plugin_api.cpp b/dnn-providers/fusilli-provider/test/test_fusilli_plugin_api.cpp new file mode 100644 index 00000000000..832c129a62d --- /dev/null +++ b/dnn-providers/fusilli-provider/test/test_fusilli_plugin_api.cpp @@ -0,0 +1,536 @@ +// Copyright 2025 Advanced Micro Devices, Inc. +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "graph_import.h" +#include "hipdnn_engine_plugin_execution_context.h" +#include "utils.h" + +bool loggingCallbackCalled = false; +std::vector capturedLogMessages; +std::vector capturedLogSeverities; +std::mutex logMutex; +std::condition_variable logConditionVariable; + +void testLoggingCallback(hipdnnSeverity_t severity, const char *msg) { + // hipDNN sets spdlog up to log in a separate thread, so we need to put our + // mutual exclusion gloves on before touching any variables the main thread + // does. + std::scoped_lock lock(logMutex); + + loggingCallbackCalled = true; + if (msg) { + capturedLogMessages.push_back(std::string(msg)); + capturedLogSeverities.push_back(severity); + } + logConditionVariable.notify_one(); +} + +// Build matmul + pointwise graph using frontend API. +flatbuffers::DetachedBuffer +buildMatmulActivGraph(const std::vector &aDims, + const std::vector &bDims, + const std::vector &cDims, + hipdnn_frontend::PointwiseMode activMode) { + hipdnn_frontend::graph::Graph graph; + graph.set_name("MatmulActivTest") + .set_io_data_type(hipdnn_frontend::DataType::FLOAT) + .set_compute_data_type(hipdnn_frontend::DataType::FLOAT) + .set_intermediate_data_type(hipdnn_frontend::DataType::FLOAT); + + int64_t uid = 1; + + // Input A: [M, K] + auto aAttr = std::make_shared(); + aAttr->set_uid(uid++) + .set_name("A") + .set_data_type(hipdnn_frontend::DataType::FLOAT) + .set_dim(aDims) + .set_stride({aDims[1], 1}); + + // Input B: [K, N] + auto bAttr = std::make_shared(); + bAttr->set_uid(uid++) + .set_name("B") + .set_data_type(hipdnn_frontend::DataType::FLOAT) + .set_dim(bDims) + .set_stride({bDims[1], 1}); + + // Matmul: A x B -> C_matmul (virtual) + hipdnn_frontend::graph::MatmulAttributes matmulAttrs; + matmulAttrs.set_name("Matmul"); + auto cMatmul = graph.matmul(aAttr, bAttr, matmulAttrs); + + // Pointwise activation: C_matmul -> C + hipdnn_frontend::graph::PointwiseAttributes pointwiseAttrs; + pointwiseAttrs.set_mode(activMode); + auto cOut = graph.pointwise(cMatmul, pointwiseAttrs); + cOut->set_uid(uid++) + .set_name("Activ") + .set_data_type(hipdnn_frontend::DataType::FLOAT) + .set_dim(cDims) + .set_stride({cDims[1], 1}) + .set_output(true); + + auto result = graph.validate(); + if (result.is_bad()) { + throw std::runtime_error("Graph validation failed: " + + result.get_message()); + } + + return graph.buildFlatbufferOperationGraph(); +} + +TEST(TestFusilliPluginApi, Logging) { + // Set tracking variables + { + std::scoped_lock lock(logMutex); + loggingCallbackCalled = false; + capturedLogMessages.clear(); + capturedLogSeverities.clear(); + } + + // Set up logging callback + ASSERT_EQ(hipdnnPluginSetLoggingCallback(testLoggingCallback), + HIPDNN_PLUGIN_STATUS_SUCCESS); + + std::unique_lock lock(logMutex); + + // Wait for the logging callback to signal that it has been called. + auto timeout = std::chrono::steady_clock::now() + std::chrono::seconds(5); + EXPECT_TRUE(logConditionVariable.wait_until( + lock, timeout, [&]() { return loggingCallbackCalled; })); + + EXPECT_TRUE(loggingCallbackCalled); + EXPECT_FALSE(capturedLogMessages.empty()); + EXPECT_TRUE(capturedLogMessages.front().find( + "logging callback initialized") != std::string::npos); +}; + +TEST(TestFusilliPluginApi, GetNameSuccess) { + const char *name = nullptr; + EXPECT_EQ(hipdnnPluginGetName(&name), HIPDNN_PLUGIN_STATUS_SUCCESS); + EXPECT_STREQ(name, hipdnn_data_sdk::utilities::FUSILLI_ENGINE_NAME); +} + +TEST(TestFusilliPluginApi, GetNameNullptr) { + EXPECT_EQ(hipdnnPluginGetName(nullptr), HIPDNN_PLUGIN_STATUS_BAD_PARAM); + + // Verify error was set + const char *errorStr = nullptr; + hipdnnPluginGetLastErrorString(&errorStr); + ASSERT_NE(errorStr, nullptr); +} + +TEST(TestFusilliPluginApi, GetVersionSuccess) { + const char *version = nullptr; + EXPECT_EQ(hipdnnPluginGetVersion(&version), HIPDNN_PLUGIN_STATUS_SUCCESS); + ASSERT_NE(version, nullptr); + // TODO(#2317): check returned version against single source of truth. +} + +TEST(TestFusilliPluginApi, GetVersionNullptr) { + EXPECT_EQ(hipdnnPluginGetVersion(nullptr), HIPDNN_PLUGIN_STATUS_BAD_PARAM); + + // Verify error was set + const char *errorStr = nullptr; + hipdnnPluginGetLastErrorString(&errorStr); + ASSERT_NE(errorStr, nullptr); +} + +TEST(TestFusilliPluginApi, GetTypeSuccess) { + hipdnnPluginType_t type; + EXPECT_EQ(hipdnnPluginGetType(&type), HIPDNN_PLUGIN_STATUS_SUCCESS); + EXPECT_EQ(type, HIPDNN_PLUGIN_TYPE_ENGINE); +} + +TEST(TestFusilliPluginApi, GetTypeNullptr) { + EXPECT_EQ(hipdnnPluginGetType(nullptr), HIPDNN_PLUGIN_STATUS_BAD_PARAM); + + // Verify error was set + const char *errorStr = nullptr; + hipdnnPluginGetLastErrorString(&errorStr); + ASSERT_NE(errorStr, nullptr); +} + +TEST(TestFusilliPluginApi, GetLastErrorStringSuccess) { + const char *errorStr = nullptr; + hipdnnPluginGetLastErrorString(&errorStr); + ASSERT_NE(errorStr, nullptr); + // Initially should be empty or contain a previous error + EXPECT_GE(strlen(errorStr), 0); +} + +TEST(TestFusilliPluginApi, GetLastErrorStringNullptr) { + // This should not crash even with nullptr + EXPECT_NO_THROW(hipdnnPluginGetLastErrorString(nullptr)); +} + +TEST(TestFusilliPluginApi, SetLoggingCallbackNullptr) { + // Setting nullptr should return BAD_PARAM + EXPECT_EQ(hipdnnPluginSetLoggingCallback(nullptr), + HIPDNN_PLUGIN_STATUS_BAD_PARAM); + + // Verify error was set + const char *errorStr = nullptr; + hipdnnPluginGetLastErrorString(&errorStr); + ASSERT_NE(errorStr, nullptr); +} + +TEST(TestFusilliPluginApi, GetAllEngineIds) { + // First call with null buffer to get count + uint32_t numEngines = 0; + EXPECT_EQ(hipdnnEnginePluginGetAllEngineIds(nullptr, 0, &numEngines), + HIPDNN_PLUGIN_STATUS_SUCCESS); + EXPECT_EQ(numEngines, 1); + + // Second call to get actual engine IDs + std::vector engineIds(numEngines); + EXPECT_EQ(hipdnnEnginePluginGetAllEngineIds(engineIds.data(), numEngines, + &numEngines), + HIPDNN_PLUGIN_STATUS_SUCCESS); + EXPECT_EQ(numEngines, 1); + EXPECT_EQ(engineIds[0], hipdnn_data_sdk::utilities::FUSILLI_ENGINE_ID); +} + +TEST(TestFusilliPluginApi, GetAllEngineIdsNullNumEngines) { + EXPECT_EQ(hipdnnEnginePluginGetAllEngineIds(nullptr, 0, nullptr), + HIPDNN_PLUGIN_STATUS_BAD_PARAM); + + // Verify error was set + const char *errorStr = nullptr; + hipdnnPluginGetLastErrorString(&errorStr); + ASSERT_NE(errorStr, nullptr); + EXPECT_GT(strlen(errorStr), 0u); +} + +TEST(TestFusilliPluginApi, GetApplicableEngineIds) { + // Create plugin handle. + hipdnnEnginePluginHandle_t handle = nullptr; + ASSERT_EQ(hipdnnEnginePluginCreate(&handle), HIPDNN_PLUGIN_STATUS_SUCCESS); + ASSERT_NE(handle, nullptr); + + // Create a serialized hipDNN batch norm graph. + auto builder = hipdnn_test_sdk::utilities::createValidBatchnormBwdGraph(); + hipdnnPluginConstData_t opGraph; + opGraph.ptr = builder.GetBufferPointer(); + opGraph.size = builder.GetSize(); + + // Fusilli plugin should not offer to compile and execute bach norm (yet). + std::array engineIDs; + uint32_t numEngines = 10; + ASSERT_EQ(hipdnnEnginePluginGetApplicableEngineIds( + /*handle=*/handle, /*op_graph=*/&opGraph, + /*engine_ids=*/engineIDs.data(), /*max_engines=*/5, + /*num_engines=*/&numEngines), + HIPDNN_PLUGIN_STATUS_SUCCESS); + ASSERT_EQ(numEngines, 0); + + // Create a serialized hipDNN conv_fprop graph with symmetric padding. + builder = hipdnn_test_sdk::utilities::createValidConvFwdGraph(); + opGraph.ptr = builder.GetBufferPointer(); + opGraph.size = builder.GetSize(); + + // Fusilli plugin should offer to compile and execute single node conv_fprop. + ASSERT_EQ(hipdnnEnginePluginGetApplicableEngineIds( + /*handle=*/handle, /*op_graph=*/&opGraph, + /*engine_ids=*/engineIDs.data(), /*max_engines=*/5, + /*num_engines=*/&numEngines), + HIPDNN_PLUGIN_STATUS_SUCCESS); + ASSERT_EQ(numEngines, 1); + ASSERT_EQ(engineIDs[0], hipdnn_data_sdk::utilities::FUSILLI_ENGINE_ID); + + // Create a serialized hipDNN conv_fprop graph with asymmetric padding. + builder = hipdnn_test_sdk::utilities::createValidConvFwdGraph( + /*xDims=*/{4, 4, 4, 4}, /*xStrides=*/{64, 16, 4, 1}, + /*wDims=*/{4, 4, 1, 1}, /*wStrides=*/{4, 1, 1, 1}, + /*yDims=*/{4, 4, 4, 4}, /*yStrides=*/{64, 16, 4, 1}, + /*convPrePadding=*/{1, 0}, // asymmetric: pre doesn't match post + /*convPostPadding=*/{2, 1}, // asymmetric: pre doesn't match post + /*convStrides=*/{1, 1}, /*convDilation=*/{1, 1}); + opGraph.ptr = builder.GetBufferPointer(); + opGraph.size = builder.GetSize(); + + // Fusilli plugin should not offer to compile and execute single node + // conv_fprop with asymmetric padding. + ASSERT_EQ(hipdnnEnginePluginGetApplicableEngineIds( + /*handle=*/handle, /*op_graph=*/&opGraph, + /*engine_ids=*/engineIDs.data(), /*max_engines=*/5, + /*num_engines=*/&numEngines), + HIPDNN_PLUGIN_STATUS_SUCCESS); + ASSERT_EQ(numEngines, 0); +} + +TEST(TestFusilliPluginApi, GetApplicableEngineIdsConvPointwise) { + // Create plugin handle. + hipdnnEnginePluginHandle_t handle = nullptr; + ASSERT_EQ(hipdnnEnginePluginCreate(&handle), HIPDNN_PLUGIN_STATUS_SUCCESS); + ASSERT_NE(handle, nullptr); + + std::array engineIDs; + uint32_t numEngines = 0; + + // Test conv + unary pointwise activation for various modes. + // (conv -> binary -> pointwise covered in + // GetApplicableEngineIdsConvBiasActiv) + for (auto mode : {hipdnn_data_sdk::data_objects::PointwiseMode::RELU_FWD, + hipdnn_data_sdk::data_objects::PointwiseMode::SIGMOID_FWD, + hipdnn_data_sdk::data_objects::PointwiseMode::TANH_FWD, + hipdnn_data_sdk::data_objects::PointwiseMode::GELU_FWD, + hipdnn_data_sdk::data_objects::PointwiseMode::ELU_FWD}) { + auto builder = hipdnn_test_sdk::utilities::createValidConvFwdActivGraph( + /*xDims=*/{4, 4, 4, 4}, /*xStrides=*/{64, 16, 4, 1}, + /*wDims=*/{4, 4, 1, 1}, /*wStrides=*/{4, 1, 1, 1}, + /*yDims=*/{4, 4, 4, 4}, /*yStrides=*/{64, 16, 4, 1}, + /*convPrePadding=*/{0, 0}, /*convPostPadding=*/{0, 0}, + /*convStrides=*/{1, 1}, /*convDilation=*/{1, 1}, + /*activMode=*/mode); + hipdnnPluginConstData_t opGraph; + opGraph.ptr = builder.GetBufferPointer(); + opGraph.size = builder.GetSize(); + + ASSERT_EQ(hipdnnEnginePluginGetApplicableEngineIds( + handle, &opGraph, engineIDs.data(), 5, &numEngines), + HIPDNN_PLUGIN_STATUS_SUCCESS); + + // If the translation (hipDNN -> fusilli) is supported, the graph should be + // supported. + bool modeSupported = + !fusilli::isError(hipDnnPointwiseModeToFusilliMode(mode)); + uint32_t expectedEngines = modeSupported ? 1 : 0; + ASSERT_EQ(numEngines, expectedEngines); + } + + EXPECT_EQ(hipdnnEnginePluginDestroy(handle), HIPDNN_PLUGIN_STATUS_SUCCESS); +} + +TEST(TestFusilliPluginApi, GetApplicableEngineIdsConvBiasActiv) { + // Create plugin handle. + hipdnnEnginePluginHandle_t handle = nullptr; + ASSERT_EQ(hipdnnEnginePluginCreate(&handle), HIPDNN_PLUGIN_STATUS_SUCCESS); + ASSERT_NE(handle, nullptr); + + std::array engineIDs; + uint32_t numEngines = 0; + + // Graph structure: conv -> bias (ADD) -> activation + for (auto activMode : + {hipdnn_data_sdk::data_objects::PointwiseMode::RELU_FWD, + hipdnn_data_sdk::data_objects::PointwiseMode::SIGMOID_FWD, + hipdnn_data_sdk::data_objects::PointwiseMode::TANH_FWD}) { + auto builder = hipdnn_test_sdk::utilities::createValidConvFwdBiasActivGraph( + /*xDims=*/{4, 4, 4, 4}, /*xStrides=*/{64, 16, 4, 1}, + /*wDims=*/{4, 4, 1, 1}, /*wStrides=*/{4, 1, 1, 1}, + /*yDims=*/{4, 4, 4, 4}, /*yStrides=*/{64, 16, 4, 1}, + /*convPrePadding=*/{0, 0}, /*convPostPadding=*/{0, 0}, + /*convStrides=*/{1, 1}, /*convDilation=*/{1, 1}, + /*activMode=*/activMode); + hipdnnPluginConstData_t opGraph; + opGraph.ptr = builder.GetBufferPointer(); + opGraph.size = builder.GetSize(); + + ASSERT_EQ(hipdnnEnginePluginGetApplicableEngineIds( + handle, &opGraph, engineIDs.data(), 5, &numEngines), + HIPDNN_PLUGIN_STATUS_SUCCESS); + + // Graph should be supported if translation for activation is supported. + bool activSupported = + !fusilli::isError(hipDnnPointwiseModeToFusilliMode(activMode)); + uint32_t expectedEngines = activSupported ? 1 : 0; + ASSERT_EQ(numEngines, expectedEngines); + } + + EXPECT_EQ(hipdnnEnginePluginDestroy(handle), HIPDNN_PLUGIN_STATUS_SUCCESS); +} + +TEST(TestFusilliPluginApi, GetApplicableEngineIdsMatmulPointwise) { + hipdnnEnginePluginHandle_t handle = nullptr; + ASSERT_EQ(hipdnnEnginePluginCreate(&handle), HIPDNN_PLUGIN_STATUS_SUCCESS); + + std::array engineIDs; + uint32_t numEngines = 0; + + // Test matmul + unary pointwise for various activation modes. + for (auto mode : {hipdnn_frontend::PointwiseMode::RELU_FWD, + hipdnn_frontend::PointwiseMode::SIGMOID_FWD, + hipdnn_frontend::PointwiseMode::TANH_FWD, + hipdnn_frontend::PointwiseMode::GELU_FWD}) { + auto flatbufferGraph = buildMatmulActivGraph( + /*aDims=*/{4, 8}, /*bDims=*/{8, 5}, /*cDims=*/{4, 5}, mode); + + hipdnnPluginConstData_t opGraph; + opGraph.ptr = flatbufferGraph.data(); + opGraph.size = flatbufferGraph.size(); + + ASSERT_EQ(hipdnnEnginePluginGetApplicableEngineIds( + handle, &opGraph, engineIDs.data(), 5, &numEngines), + HIPDNN_PLUGIN_STATUS_SUCCESS); + + // Graph supported if pointwise mode translates to fusilli. + auto sdkMode = hipdnn_frontend::toSdkType(mode); + bool modeSupported = + !fusilli::isError(hipDnnPointwiseModeToFusilliMode(sdkMode)); + uint32_t expectedEngines = modeSupported ? 1 : 0; + ASSERT_EQ(numEngines, expectedEngines); + } + + EXPECT_EQ(hipdnnEnginePluginDestroy(handle), HIPDNN_PLUGIN_STATUS_SUCCESS); +} + +TEST(TestFusilliPluginApi, CreateExecutionContext) { + // Create plugin handle. + hipdnnEnginePluginHandle_t handle = nullptr; + ASSERT_EQ(hipdnnEnginePluginCreate(&handle), HIPDNN_PLUGIN_STATUS_SUCCESS); + ASSERT_NE(handle, nullptr); + + // UIDs. + int64_t xUID = 1; + int64_t wUID = 2; + int64_t yUID = 3; + + // Dims and strides. + const std::vector expectedXDims = {4, 4, 4, 4}; + const std::vector expectedXStrides = {64, 16, 4, 1}; + const std::vector expectedWDims = {4, 4, 1, 1}; + const std::vector expectedWStrides = {4, 1, 1, 1}; + const std::vector expectedYDims = {4, 4, 4, 4}; + const std::vector expectedYStrides = {64, 16, 4, 1}; + const hipdnn_data_sdk::data_objects::DataType dataType = + hipdnn_data_sdk::data_objects::DataType::FLOAT; + FUSILLI_PLUGIN_EXPECT_OR_ASSIGN(fusilli::DataType expectedDataType, + hipDnnDataTypeToFusilliDataType(dataType)); + + // Create a serialized hipDNN conv_fprop. + // Note: createValidConvFwdGraph uses hardcoded UIDs 1, 2, 3 for x, w, y + auto builder = hipdnn_test_sdk::utilities::createValidConvFwdGraph( + expectedXDims, expectedXStrides, expectedWDims, expectedWStrides, + expectedYDims, expectedYStrides, /*convPrePadding=*/{0, 0}, + /*convPostPadding=*/{0, 0}, /*convStrides=*/{1, 1}, + /*convDilation=*/{1, 1}, dataType); + hipdnnPluginConstData_t opGraph; + opGraph.ptr = builder.GetBufferPointer(); + opGraph.size = builder.GetSize(); + + // Create engine config. + flatbuffers::FlatBufferBuilder configBuilder; + auto engineConfig = hipdnn_data_sdk::data_objects::CreateEngineConfig( + configBuilder, hipdnn_data_sdk::utilities::FUSILLI_ENGINE_ID); + configBuilder.Finish(engineConfig); + hipdnnPluginConstData_t engineConfigData; + engineConfigData.ptr = configBuilder.GetBufferPointer(); + engineConfigData.size = configBuilder.GetSize(); + + // The function we're actually testing. + hipdnnEnginePluginExecutionContext_t executionContext = nullptr; + ASSERT_EQ(hipdnnEnginePluginCreateExecutionContext( + handle, &engineConfigData, &opGraph, &executionContext), + HIPDNN_PLUGIN_STATUS_SUCCESS); + ASSERT_NE(executionContext, nullptr); + + auto *ctx = + static_cast(executionContext); + + // Check that we have 3 tensors tracked (x, w, y). + EXPECT_EQ(ctx->uidToFusilliTensorAttr.size(), 3); + + // Check x tensor properties. + ASSERT_TRUE(ctx->uidToFusilliTensorAttr.contains(xUID)); // C++20 + std::shared_ptr xTensor = + ctx->uidToFusilliTensorAttr[xUID]; + EXPECT_EQ(xTensor->getDim(), expectedXDims); + EXPECT_EQ(xTensor->getStride(), expectedXStrides); + EXPECT_EQ(xTensor->getDataType(), expectedDataType); + EXPECT_FALSE(xTensor->isVirtual()); + + // Check w tensor properties. + ASSERT_TRUE(ctx->uidToFusilliTensorAttr.contains(wUID)); // C++20 + std::shared_ptr wTensor = + ctx->uidToFusilliTensorAttr[wUID]; + EXPECT_EQ(wTensor->getDim(), expectedWDims); + EXPECT_EQ(wTensor->getStride(), expectedWStrides); + EXPECT_EQ(wTensor->getDataType(), expectedDataType); + EXPECT_FALSE(wTensor->isVirtual()); + + // Check y tensor properties. + ASSERT_TRUE(ctx->uidToFusilliTensorAttr.contains(wUID)); // C++20 + std::shared_ptr yTensor = + ctx->uidToFusilliTensorAttr[yUID]; + EXPECT_EQ(yTensor->getDim(), expectedYDims); + EXPECT_EQ(yTensor->getStride(), expectedYStrides); + EXPECT_EQ(yTensor->getDataType(), expectedDataType); + EXPECT_FALSE(yTensor->isVirtual()); + + // Verify graph properties. + EXPECT_EQ(ctx->graph.context.getIODataType(), expectedDataType); + EXPECT_EQ(ctx->graph.context.getIntermediateDataType(), expectedDataType); + EXPECT_EQ(ctx->graph.context.getComputeDataType(), expectedDataType); + + // Clean up. + EXPECT_EQ(hipdnnEnginePluginDestroyExecutionContext(handle, executionContext), + HIPDNN_PLUGIN_STATUS_SUCCESS); + EXPECT_EQ(hipdnnEnginePluginDestroy(handle), HIPDNN_PLUGIN_STATUS_SUCCESS); +} + +TEST(TestFusilliPluginApi, SetStreamSuccess) { + // Create plugin handle. + hipdnnEnginePluginHandle_t handle = nullptr; + ASSERT_EQ(hipdnnEnginePluginCreate(&handle), HIPDNN_PLUGIN_STATUS_SUCCESS); + + // Create a HIP stream. + hipStream_t stream; + ASSERT_EQ(hipStreamCreate(&stream), hipSuccess); + + // Set the stream on the handle. + EXPECT_EQ(hipdnnEnginePluginSetStream(handle, stream), + HIPDNN_PLUGIN_STATUS_SUCCESS); + + // Clean up. + EXPECT_EQ(hipStreamDestroy(stream), hipSuccess); + EXPECT_EQ(hipdnnEnginePluginDestroy(handle), HIPDNN_PLUGIN_STATUS_SUCCESS); +} + +TEST(TestFusilliPluginApi, SetStreamNullHandle) { + // Create a HIP stream. + hipStream_t stream; + ASSERT_EQ(hipStreamCreate(&stream), hipSuccess); + + // Attempt to set stream with null handle should fail. + EXPECT_EQ(hipdnnEnginePluginSetStream(nullptr, stream), + HIPDNN_PLUGIN_STATUS_BAD_PARAM); + + // Verify error was set. + const char *errorStr = nullptr; + hipdnnPluginGetLastErrorString(&errorStr); + ASSERT_NE(errorStr, nullptr); + EXPECT_GT(strlen(errorStr), 0u); + + // Clean up. + EXPECT_EQ(hipStreamDestroy(stream), hipSuccess); +} diff --git a/dnn-providers/fusilli-provider/test/test_graph_import.cpp b/dnn-providers/fusilli-provider/test/test_graph_import.cpp new file mode 100644 index 00000000000..bea2370a9be --- /dev/null +++ b/dnn-providers/fusilli-provider/test/test_graph_import.cpp @@ -0,0 +1,47 @@ +// Copyright 2025 Advanced Micro Devices, Inc. +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "graph_import.h" +#include "utils.h" + +#include +#include +#include + +TEST(TestGraphImport, ConvertHipDnnToFusilli) { + FUSILLI_PLUGIN_EXPECT_OR_ASSIGN( + auto halfDt, hipDnnDataTypeToFusilliDataType( + hipdnn_data_sdk::data_objects::DataType::HALF)); + EXPECT_EQ(halfDt, fusilli::DataType::Half); + FUSILLI_PLUGIN_EXPECT_OR_ASSIGN( + auto bfloat16Dt, hipDnnDataTypeToFusilliDataType( + hipdnn_data_sdk::data_objects::DataType::BFLOAT16)); + EXPECT_EQ(bfloat16Dt, fusilli::DataType::BFloat16); + FUSILLI_PLUGIN_EXPECT_OR_ASSIGN( + auto floatDt, hipDnnDataTypeToFusilliDataType( + hipdnn_data_sdk::data_objects::DataType::FLOAT)); + EXPECT_EQ(floatDt, fusilli::DataType::Float); + FUSILLI_PLUGIN_EXPECT_OR_ASSIGN( + auto doubleDt, hipDnnDataTypeToFusilliDataType( + hipdnn_data_sdk::data_objects::DataType::DOUBLE)); + EXPECT_EQ(doubleDt, fusilli::DataType::Double); + FUSILLI_PLUGIN_EXPECT_OR_ASSIGN( + auto uint8Dt, hipDnnDataTypeToFusilliDataType( + hipdnn_data_sdk::data_objects::DataType::UINT8)); + EXPECT_EQ(uint8Dt, fusilli::DataType::Uint8); + FUSILLI_PLUGIN_EXPECT_OR_ASSIGN( + auto int32Dt, hipDnnDataTypeToFusilliDataType( + hipdnn_data_sdk::data_objects::DataType::INT32)); + EXPECT_EQ(int32Dt, fusilli::DataType::Int32); + FUSILLI_PLUGIN_EXPECT_OR_ASSIGN( + auto unsetDt, hipDnnDataTypeToFusilliDataType( + hipdnn_data_sdk::data_objects::DataType::UNSET)); + EXPECT_EQ(unsetDt, fusilli::DataType::NotSet); + + auto invalidResult = hipDnnDataTypeToFusilliDataType( + static_cast(42)); + EXPECT_TRUE(isError(invalidResult)); +} diff --git a/dnn-providers/fusilli-provider/test/utils.h b/dnn-providers/fusilli-provider/test/utils.h new file mode 100644 index 00000000000..9c9d1d8370d --- /dev/null +++ b/dnn-providers/fusilli-provider/test/utils.h @@ -0,0 +1,35 @@ +// Copyright 2025 Advanced Micro Devices, Inc. +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +//===----------------------------------------------------------------------===// +// +// This file contains utilities for fusilli plugin tests. +// +//===----------------------------------------------------------------------===// + +#ifndef FUSILLI_PLUGIN_TESTS_UTILS_H +#define FUSILLI_PLUGIN_TESTS_UTILS_H + +#define FUSILLI_PLUGIN_CONCAT_IMPL(a, b, c) a##_##b##_##c +#define FUSILLI_PLUGIN_ERROR_VAR(name, line, counter) \ + FUSILLI_PLUGIN_CONCAT_IMPL(name, line, counter) + +#define FUSILLI_PLUGIN_EXPECT_OR_ASSIGN_IMPL(errorOr, var, expr) \ + auto errorOr = (expr); \ + EXPECT_TRUE(!isError(errorOr)); \ + var = std::move(*errorOr); + +// Assigns the result of an expression that evaluates to an ErrorOr to a +// variable, or returns the error from the enclosing function if the result is +// an error. +#define FUSILLI_PLUGIN_EXPECT_OR_ASSIGN(varDecl, expr) \ + FUSILLI_DISABLE_COUNTER_WARNING \ + FUSILLI_PLUGIN_EXPECT_OR_ASSIGN_IMPL( \ + FUSILLI_PLUGIN_ERROR_VAR(_errorOr, __LINE__, __COUNTER__), varDecl, \ + expr) \ + FUSILLI_RESTORE_COUNTER_WARNING + +#endif // FUSILLI_PLUGIN_TESTS_UTILS_H From 01550af38744b565217bc41567495dc957bd48de Mon Sep 17 00:00:00 2001 From: AaronStGeorge Date: Wed, 4 Mar 2026 21:39:55 +0000 Subject: [PATCH 2/3] Remove obsolete ThePebble dev environment scripts ThePebble was a standalone dev/CI setup tool for when the plugin lived inside the fusilli repo. Now that the plugin is built through TheRock, these files serve no purpose and contain stale relative path assumptions. Co-Authored-By: Claude Opus 4.6 --- .../fusilli-provider/build_tools/ThePebble.py | 574 ------------------ .../build_tools/thepebble_config.toml | 9 - 2 files changed, 583 deletions(-) delete mode 100644 dnn-providers/fusilli-provider/build_tools/ThePebble.py delete mode 100644 dnn-providers/fusilli-provider/build_tools/thepebble_config.toml diff --git a/dnn-providers/fusilli-provider/build_tools/ThePebble.py b/dnn-providers/fusilli-provider/build_tools/ThePebble.py deleted file mode 100644 index 1aa95576665..00000000000 --- a/dnn-providers/fusilli-provider/build_tools/ThePebble.py +++ /dev/null @@ -1,574 +0,0 @@ -""" -ThePebble - A simulacrum of TheRock for fusilli's hipDNN plugin dev/CI -environment setup. - -ThePebble composes build artifacts into one distribution directory at -$HOME/.cache/ThePebble/dist. The dist directory resembles what TheRock provides -when building fusilli's hipDNN plugin - namely a composition of the installed -build artifacts for the plugin's declared dependencies. - -TODO: when multi-stage build lands in TheRock investigate simply running the -iree-libs phase of TheRock. - -usage: - --setup - Installs plugin dependencies and creates a CMakeUserPresets.json for local - development. After running --setup, configure the plugin with: - cmake --preset thepebble - - Dependencies installed: - - Hip. For the hip dependency ThePebble takes an approach suggested in - TheRock's RELEASES.md and uses TheRock's CI scripts `fetch_artifacts.py` - and `install_rocm_from_artifacts.py` to install from artifacts built by - TheRock. We use the more granular artifacts (used primarily by tests) - rather than the monolithic tarball. The granular artifacts allow us to - install hip without ending up with duplicate copies of fusilli-plugin - - ThePebble and TheRock both build one. The granular approach also allows - us to compose final installed artifacts from multiple builds - we can use - hip from an older build if there was a regression, and hipDNN from a - newer build if there was an API update. - - TheRock's scripts require a github run ID. This is configured in - thepebble_config.toml as `versions.hip_run_id`. To find a run ID go to - https://github.com/ROCm/TheRock/actions - each action has the run id in - the URL. A run ID can come from a PR or a nightly release build from - `main`. Note: Filtering https://github.com/ROCm/TheRock/actions by - "scheduled" events will display only nightly release builds, if that's - what you're looking for. - - - HipDNN. Currently ThePebble builds + installs hipDNN from source in - rocm-libraries. But, the source approach can and should be augmented with - an alternative option to fetch hipDNN artifacts from TheRock like we do - for hip (with an independent run id). Currently hipDNN has a CMake bug - that crashes the build when using TheRock's artifacts; it contains a - hardcoded path that won't exist if outside of build machine for TheRock. - - - IREE runtime. This is the black sheep. As fusilli and the plugin each build - IREE from source internally this dependency isn't built and installed to - dist, it just exists at a path where fusilli + the plugin can find it. - ThePebble provides IREE's path to fusilli and the plugin through - -DIREE_SOURCE_DIR CMake Cache variable (via build flags and cmake preset - respectively). - - - Fusilli. The plugin builds independently from fusilli itself - the plugin - may not live in the fusilli repo long term, so is designed to be easily - relocatable. The current version of fusilli is built + installed into dist - using IREE runtime fetched by ThePebble. - - --ci-install-and-test-fusilli-plugin - Builds and installs the fusilli plugin into the dist directory, then runs - TheRock's test script for fusilli plugin. - - Note: This installs the plugin into the same dist directory as dependencies - created with --setup, which may be annoying for local development - you'll - end up with two versions of fusilli_pluggin.so running around (one in dist - and one in your local build folder). For local development it's probably - easiest to run the tests from your local build. TheRock doesn't run tests - using ctest on build folder, but bugs due to test environment setup should - be rare. -""" - -import argparse -import json -import os -import shutil -import subprocess -import sys -import tempfile -import tomllib -import venv -from pathlib import Path - -PEBBLE_DIR = Path.home() / ".cache" / "ThePebble" -INSTALL_DIR = PEBBLE_DIR / "dist" -CACHED_CONFIG = PEBBLE_DIR / "_copy_of_thepebble_config_for_cache_invalidation.toml" -THEROCK_REPO = "https://github.com/ROCm/TheRock.git" -THEROCK_DIR = PEBBLE_DIR / "TheRock" -ROCM_LIBRARIES_REPO = "https://github.com/ROCm/rocm-libraries.git" -IREE_REPO = "https://github.com/iree-org/iree.git" -IREE_DIR = PEBBLE_DIR / "iree" -IREE_SUBMODULES = ["third_party/flatcc", "third_party/benchmark"] -HIPDNN_SRC_DIR = PEBBLE_DIR / "rocm-libraries" - -# ============================================================================== -# Utils -# ============================================================================== - - -def load_config() -> dict: - """Load configuration from thepebble_config.toml.""" - config_path = Path(__file__).parent / "thepebble_config.toml" - with open(config_path, "rb") as f: - return tomllib.load(f) - - -def get_fusilli_dir() -> Path: - """Get the fusilli source directory (parent of hipdnn-plugin).""" - # ThePebble.py is in fusilli/plugins/hipdnn-plugin/build_tools/ - return Path(__file__).parent.parent.parent.parent - - -def get_iree_git_tag() -> str: - """Read IREE version from the root version.json (single source of truth).""" - version_json = get_fusilli_dir() / "version.json" - with open(version_json) as f: - data = json.load(f) - return data["iree-version"] - - -def get_plugin_dir() -> Path: - """Get the hipdnn-plugin directory.""" - # ThePebble.py is in fusilli/plugins/hipdnn-plugin/build_tools/ - return Path(__file__).parent.parent - - -def validate_config(): - """Check cache exists and config matches. Error if mismatch.""" - if not CACHED_CONFIG.exists(): - sys.exit("Error: No cached config. Run --setup first.") - - current_config = load_config() - with open(CACHED_CONFIG, "rb") as f: - cached_config = tomllib.load(f) - - if current_config != cached_config: - sys.exit("Error: Config mismatch. Re-run --setup to update.") - - -# ============================================================================== -# Setup -# ============================================================================== - - -def setup_therock(git_ref: str): - """Clone TheRock and set up venv (ThePebble only uses python scripts)""" - print(f"Cloning TheRock at {git_ref}...") - subprocess.run( - [ - "git", - "clone", - "--depth=1", - "--branch", - git_ref, - THEROCK_REPO, - str(THEROCK_DIR), - ], - check=True, - ) - - # Set up venv - print("Setting up TheRock venv...") - venv_dir = THEROCK_DIR / ".venv" - subprocess.run(["python3", "-m", "venv", str(venv_dir)], check=True) - pip = venv_dir / "bin" / "pip" - subprocess.run( - [str(pip), "install", "-r", str(THEROCK_DIR / "requirements.txt")], - check=True, - ) - subprocess.run( - [str(pip), "install", "-r", str(THEROCK_DIR / "requirements-test.txt")], - check=True, - ) - - -def install_hip(run_id: str): - """Download and install Hip artifacts using install_rocm_from_artifacts.py.""" - venv_python = THEROCK_DIR / ".venv" / "bin" / "python" - - # Use TheRock's install_rocm_from_artifacts.py - # --run-github-repo is needed to override GITHUB_REPOSITORY env var in CI - cmd = [ - str(venv_python), - str(THEROCK_DIR / "build_tools" / "install_rocm_from_artifacts.py"), - "--run-id", - run_id, - "--run-github-repo", - "ROCm/TheRock", - "--artifact-group", - "generic", - "--output-dir", - str(INSTALL_DIR), - "--base-only", - ] - print(f"Fetching Hip artifacts from run {run_id}...") - subprocess.run(cmd, check=True) - - # Fetch amd-llvm_dev (not sure why this isn't included in "base") - cmd = [ - str(venv_python), - str(THEROCK_DIR / "build_tools" / "fetch_artifacts.py"), - "--run-id", - run_id, - "--run-github-repo", - "ROCm/TheRock", - "--artifact-group", - "generic", - "--output-dir", - str(INSTALL_DIR), - "--flatten", - "amd-llvm_dev", - ] - print(f"Fetching amd-llvm_dev artifact...") - subprocess.run(cmd, check=True) - - -def build_hipdnn(git_ref: str): - """Build and install hipDNN from rocm-libraries sparse checkout.""" - # Sparse checkout of rocm-libraries - print(f"Sparse checkout of rocm-libraries at {git_ref}...") - subprocess.run( - [ - "git", - "clone", - "--no-checkout", - "--filter=blob:none", - ROCM_LIBRARIES_REPO, - str(HIPDNN_SRC_DIR), - ], - check=True, - ) - subprocess.run( - ["git", "sparse-checkout", "init", "--cone"], - cwd=HIPDNN_SRC_DIR, - check=True, - ) - subprocess.run( - ["git", "sparse-checkout", "set", "projects/hipdnn"], - cwd=HIPDNN_SRC_DIR, - check=True, - ) - subprocess.run(["git", "checkout", git_ref], cwd=HIPDNN_SRC_DIR, check=True) - - # Build inside projects/hipdnn so IDEs auto-discover compile_commands.json - hipdnn_project_dir = HIPDNN_SRC_DIR / "projects" / "hipdnn" - hipdnn_build_dir = hipdnn_project_dir / "build" - print(f"Building hipDNN from {hipdnn_project_dir}...") - - cmake_args = [ - "cmake", - "-G", - "Ninja", - "-S", - str(hipdnn_project_dir), - "-B", - str(hipdnn_build_dir), - f"-DCMAKE_INSTALL_PREFIX={INSTALL_DIR}", - f"-DCMAKE_PREFIX_PATH={INSTALL_DIR}", - "-DCMAKE_BUILD_TYPE=Debug", - "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", - "-DHIP_PLATFORM=amd", - "-DHIP_DNN_BUILD_PLUGINS=OFF", - # Headers are already checked into git, no need to re-generate them - # unless you're changing the schema. - "-DHIP_DNN_GENERATE_SDK_HEADERS=OFF", - "-DENABLE_CLANG_TIDY=OFF", - "-DENABLE_CLANG_FORMAT=OFF", - "-DHIPDNN_FRONTEND_SKIP_JSON_LIB=ON", - ] - subprocess.run(cmake_args, check=True) - - # Build and install - subprocess.run(["cmake", "--build", str(hipdnn_build_dir)], check=True) - subprocess.run(["cmake", "--install", str(hipdnn_build_dir)], check=True) - - -def setup_iree(tag: str): - """Clone IREE at a tag and fetch required submodules""" - print(f"Cloning IREE at tag {tag}...") - subprocess.run( - ["git", "clone", "--depth=1", "--branch", tag, IREE_REPO, str(IREE_DIR)], - check=True, - ) - - # Fetch only required submodules - print(f"Fetching IREE submodules: {IREE_SUBMODULES}") - for submodule in IREE_SUBMODULES: - subprocess.run( - ["git", "submodule", "update", "--init", "--depth=1", submodule], - cwd=IREE_DIR, - check=True, - ) - - -def build_fusilli(): - """Build and install fusilli from source.""" - fusilli_src = get_fusilli_dir() - - with tempfile.TemporaryDirectory() as tmpdir: - fusilli_build = Path(tmpdir) - print(f"Building fusilli from {fusilli_src}...") - - # Configure fusilli - based on TheRock's CMake args - cmake_args = [ - "cmake", - "-G", - "Ninja", - "-S", - str(fusilli_src), - "-B", - str(fusilli_build), - f"-DCMAKE_INSTALL_PREFIX={INSTALL_DIR}", - f"-DCMAKE_PREFIX_PATH={INSTALL_DIR}", - "-DCMAKE_BUILD_TYPE=Release", - "-DFUSILLI_BUILD_TESTS=OFF", - "-DFUSILLI_BUILD_BENCHMARKS=OFF", - "-DFUSILLI_SYSTEMS_AMDGPU=ON", - "-DFUSILLI_CODE_COVERAGE=OFF", - "-DFUSILLI_ENABLE_LOGGING=OFF", - "-DFUSILLI_ENABLE_CLANG_TIDY=OFF", - f"-DIREE_SOURCE_DIR={IREE_DIR}", - "-DHIP_PLATFORM=amd", - "-DIREE_USE_SYSTEM_DEPS=ON", - ] - subprocess.run(cmake_args, check=True) - - # Build and install - subprocess.run(["cmake", "--build", str(fusilli_build)], check=True) - subprocess.run(["cmake", "--install", str(fusilli_build)], check=True) - - -def generate_cmake_user_presets(): - """Generate CMakeUserPresets.json in the hipdnn-plugin directory.""" - plugin_dir = get_plugin_dir() - llvm_bin = INSTALL_DIR / "lib" / "llvm" / "bin" - - presets = { - "version": 6, - "configurePresets": [ - { - "name": "thepebble", - "generator": "Ninja", - "binaryDir": "${sourceDir}/build", - "cacheVariables": { - "CMAKE_C_COMPILER": str(llvm_bin / "clang"), - "CMAKE_CXX_COMPILER": str(llvm_bin / "clang++"), - "CMAKE_PREFIX_PATH": str(INSTALL_DIR), - "IREE_SOURCE_DIR": str(IREE_DIR), - "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", - "IREE_USE_SYSTEM_DEPS": "ON", - "HIP_PLATFORM": "amd", - }, - } - ], - } - - presets_path = plugin_dir / "CMakeUserPresets.json" - print(f"Writing {presets_path}...") - with open(presets_path, "w") as f: - json.dump(presets, f, indent=2) - f.write("\n") - - -def provide_iree_tools(iree_version: str): - """Pip install iree-base-compiler and symlink IREE tools into dist/. - - TheRock builds libIREECompiler.so and installs it to dist/lib/; ThePebble - gets it from pip's iree-base-compiler instead and symlinks it into dist/ - so TheRock's test scripts can find it.""" - # Create venv and pip install iree-base-compiler - venv_dir = PEBBLE_DIR / ".venv" - print(f"Creating venv at {venv_dir}...") - venv.EnvBuilder(with_pip=True, prompt="ThePebble").create(venv_dir) - - pip = venv_dir / "bin" / "pip" - print(f"Installing iree-base-compiler=={iree_version}...") - subprocess.run( - [ - str(pip), - "install", - "--find-links", - "https://iree.dev/pip-release-links.html", - f"iree-base-compiler=={iree_version}", - ], - check=True, - ) - - # Symlink libIREECompiler.so into dist/lib/ - venv_python = venv_dir / "bin" / "python" - result = subprocess.run( - [ - str(venv_python), - "-c", - "import pathlib, iree.compiler._mlir_libs;" - " print(pathlib.Path(iree.compiler._mlir_libs.__file__).parent" - " / 'libIREECompiler.so')", - ], - capture_output=True, - text=True, - check=True, - ) - iree_compiler_lib = Path(result.stdout.strip()) - lib_symlink = INSTALL_DIR / "lib" / "libIREECompiler.so" - lib_symlink.unlink(missing_ok=True) - print(f"Symlinking {lib_symlink} -> {iree_compiler_lib}") - lib_symlink.symlink_to(iree_compiler_lib) - - # Symlink iree-compile binary into dist/bin/ - iree_compile_src = venv_dir / "bin" / "iree-compile" - bin_symlink = INSTALL_DIR / "bin" / "iree-compile" - bin_symlink.unlink(missing_ok=True) - print(f"Symlinking {bin_symlink} -> {iree_compile_src}") - bin_symlink.symlink_to(iree_compile_src) - - -def generate_local_environment_setup(): - """Generate an 'activate' script to set up the local machine with correct - $PATH and $LD_LIBRARY_PATH to use ThePebble installed programs.""" - bin_dir = INSTALL_DIR / "bin" - lib_dir = INSTALL_DIR / "lib" - script_content = f"""#!/bin/bash -# ThePebble environment activation script -# Usage: source {PEBBLE_DIR}/activate - -if [[ "${{BASH_SOURCE[0]}}" == "${{0}}" ]]; then - echo "Error: This script must be sourced, not executed." - echo "Usage: source {PEBBLE_DIR}/activate" - exit 1 -fi - -export PATH="{bin_dir}:$PATH" -export LD_LIBRARY_PATH="{lib_dir}:$LD_LIBRARY_PATH" - -echo "ThePebble environment activated." -""" - - activate_path = PEBBLE_DIR / "activate" - print(f"Writing {activate_path}...") - with open(activate_path, "w") as f: - f.write(script_content) - - -# ============================================================================== -# CI install and test fusilli-plugin -# ============================================================================== - - -def build_fusilli_plugin(): - """Build and install fusilli plugin to dist.""" - plugin_src = get_plugin_dir() - llvm_bin = INSTALL_DIR / "lib" / "llvm" / "bin" - - with tempfile.TemporaryDirectory() as tmpdir: - plugin_build = Path(tmpdir) - print(f"Building fusilli plugin from {plugin_src}...") - - cmake_args = [ - "cmake", - "-G", - "Ninja", - "-S", - str(plugin_src), - "-B", - str(plugin_build), - f"-DCMAKE_C_COMPILER={llvm_bin / 'clang'}", - f"-DCMAKE_CXX_COMPILER={llvm_bin / 'clang++'}", - f"-DCMAKE_INSTALL_PREFIX={INSTALL_DIR}", - f"-DCMAKE_PREFIX_PATH={INSTALL_DIR}", - "-DCMAKE_BUILD_TYPE=Release", - f"-DIREE_SOURCE_DIR={IREE_DIR}", - "-DIREE_USE_SYSTEM_DEPS=ON", - "-DHIP_PLATFORM=amd", - ] - subprocess.run(cmake_args, check=True) - subprocess.run(["cmake", "--build", str(plugin_build)], check=True) - subprocess.run(["cmake", "--install", str(plugin_build)], check=True) - - -def test_fusilli_plugin(): - """Run test_fusilli_plugin.py from TheRock.""" - # The test script expects THEROCK_BIN_DIR to point to the bin/ directory - bin_dir = INSTALL_DIR / "bin" - - # Create iree_tag_for_pip.txt. - # TheRock/iree-libs/post_hook_fusilliprovider.cmake would create this file - # when building in TheRock. - iree_version = get_iree_git_tag() - iree_tag_file = bin_dir / "fusilli_plugin_test_infra" / "iree_tag_for_pip.txt" - iree_tag_file.write_text(iree_version) - print(f"Created {iree_tag_file} with version {iree_version}") - - # Run TheRock's test_fusilliprovider.py - therock_dir = PEBBLE_DIR / "TheRock" - test_script = ( - therock_dir - / "build_tools" - / "github_actions" - / "test_executable_scripts" - / "test_fusilliprovider.py" - ) - - env = os.environ.copy() - env["THEROCK_BIN_DIR"] = str(bin_dir) - - # iree-libs/post_hook_fusilliprovider.cmake sets up RPATHs so that a .so in - # "lib/hipdnn_plugins/engines" will be found by tests that - # fusilli_plugin.so, and so that - # "lib/hipdnn_plugins/engines/fusilli_plugin.so" can find hip .so's in lib. - # In ThePebble we just use LD_LIBRARY_PATH. - lib_dir = INSTALL_DIR / "lib" - plugin_lib_dir = lib_dir / "hipdnn_plugins" / "engines" - ld_path = f"{lib_dir}:{plugin_lib_dir}" - if "LD_LIBRARY_PATH" in env: - ld_path = f"{ld_path}:{env['LD_LIBRARY_PATH']}" - env["LD_LIBRARY_PATH"] = ld_path - - print(f"Running {test_script}...") - subprocess.run(["python3", str(test_script)], env=env, check=True) - - -def main(): - parser = argparse.ArgumentParser( - description="ThePebble a simulacrum of TheRock for fusilli plugin dev environment setup" - ) - parser.add_argument( - "--setup", - action="store_true", - help="Setup deps as TheRock would, and crate CMake preset for local dev", - ) - parser.add_argument( - "--ci-install-and-test-fusilli-plugin", - action="store_true", - help="Build + install + test the plugin using TheRock's test script", - ) - args = parser.parse_args() - - if not args.setup and not args.ci_install_and_test_fusilli_plugin: - parser.print_help() - sys.exit(1) - - if args.setup: - config = load_config() - versions = config["versions"] - - # Start fresh - if PEBBLE_DIR.exists(): - print(f"Removing previous setup {PEBBLE_DIR}...") - shutil.rmtree(PEBBLE_DIR) - - # Run setup - PEBBLE_DIR.mkdir(parents=True, exist_ok=True) - setup_therock(versions["therock_git_ref"]) - install_hip(versions["hip_run_id"]) - build_hipdnn(versions["hipdnn_git_ref"]) - setup_iree(f"iree-{get_iree_git_tag()}") - build_fusilli() - generate_cmake_user_presets() - provide_iree_tools(get_iree_git_tag()) - generate_local_environment_setup() - - # Copy config to cache for validation checks - config_src = Path(__file__).parent / "thepebble_config.toml" - shutil.copy(config_src, CACHED_CONFIG) - - print(f"\nSetup complete.") - print(f"To activate the ThePebble local dev environment, run:") - print(f" source {PEBBLE_DIR}/activate") - - if args.ci_install_and_test_fusilli_plugin: - validate_config() - build_fusilli_plugin() - test_fusilli_plugin() - - -if __name__ == "__main__": - main() diff --git a/dnn-providers/fusilli-provider/build_tools/thepebble_config.toml b/dnn-providers/fusilli-provider/build_tools/thepebble_config.toml deleted file mode 100644 index 513ab9f2707..00000000000 --- a/dnn-providers/fusilli-provider/build_tools/thepebble_config.toml +++ /dev/null @@ -1,9 +0,0 @@ -[versions] -# Git ref (commit/tag/branch) for TheRock tooling -therock_git_ref = "main" - -# GitHub CI run ID for Hip artifacts (from ROCm/TheRock actions) -hip_run_id = "19089392286" - -# Git ref (commit/tag/branch) for rocm-libraries hipDNN -hipdnn_git_ref = "develop" From 291495d0818335126e0a2b90ec222f02d7cda6a1 Mon Sep 17 00:00:00 2001 From: AaronStGeorge Date: Tue, 10 Mar 2026 22:04:42 +0000 Subject: [PATCH 3/3] Address doc review --- dnn-providers/fusilli-provider/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dnn-providers/fusilli-provider/README.md b/dnn-providers/fusilli-provider/README.md index 7a3af43e789..595f4b2ea25 100644 --- a/dnn-providers/fusilli-provider/README.md +++ b/dnn-providers/fusilli-provider/README.md @@ -1,7 +1,7 @@ # Fusilli Plugin -Fusilli-Plugin: A Fusilli/IREE powered hipDNN plugin for graph JIT compilation. +Fusilli-Plugin: A Fusilli/IREE-powered hipDNN plugin for graph JIT compilation. :construction: **This project is under active development, many things don't work yet** :construction: -The plugin builds as a shared library (`fusilli_plugin.so`) providing a `hipDNN` [kernel engine plugin](https://github.com/ROCm/hipDNN/blob/develop/docs/PluginDevelopment.md#creating-a-kernel-engine-plugin) [API](https://github.com/ROCm/hipDNN/blob/839cf6c4bc6fe403d0ef72cb5d7df004e2004743/sdk/include/hipdnn_sdk/plugin/EnginePluginApi.h). +The plugin builds as a shared library (`fusilli_plugin.so`), providing a `hipDNN` [kernel engine plugin](https://github.com/ROCm/hipDNN/blob/develop/docs/PluginDevelopment.md#creating-a-kernel-engine-plugin) [API](https://github.com/ROCm/hipDNN/blob/839cf6c4bc6fe403d0ef72cb5d7df004e2004743/sdk/include/hipdnn_sdk/plugin/EnginePluginApi.h).