diff --git a/.github/actions/build-tt-mlir-action/action.yaml b/.github/actions/build-tt-mlir-action/action.yaml index ed88f6291b..700e970756 100644 --- a/.github/actions/build-tt-mlir-action/action.yaml +++ b/.github/actions/build-tt-mlir-action/action.yaml @@ -10,6 +10,9 @@ inputs: enable-emitc: description: "Enable EmitC tests" required: true + enable-async: + description: "Enable async ttnn tests" + required: true build-name: description: "A unique name for this build (e.g., 'run' or 'perf')" required: true @@ -109,7 +112,7 @@ runs: - name: Upload Test Report uses: actions/upload-artifact@v4 with: - name: test-reports-${{ inputs.runs-on }}-perf-${{ inputs.enable-perf }}-op_model-${{ inputs.enable-op-model }}-emitc-${{ inputs.enable-emitc }} + name: test-reports-${{ inputs.runs-on }}-perf-${{ inputs.enable-perf }}-op_model-${{ inputs.enable-op-model }}-emitc-${{ inputs.enable-emitc }}-async-${{ inputs.enable-async }} path: ${{ inputs.test_report_path }} - name: Upload ttrt .whl diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index b55fd101e7..3e6b524bd7 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -97,9 +97,10 @@ jobs: fail-fast: false matrix: build: [ - {runs-on: ubuntu-latest, enable_perf: OFF, enable_op_model: OFF, enable_emitc: OFF, name: "run", ttrt_flags: ""}, - {runs-on: ubuntu-latest, enable_perf: ON, enable_op_model: OFF, enable_emitc: OFF, name: "perf", ttrt_flags: ""}, - {runs-on: ubuntu-latest, enable_perf: OFF, enable_op_model: OFF, enable_emitc: ON, name: "emitc", ttrt_flags: ""}, + {runs-on: ubuntu-latest, enable_perf: OFF, enable_op_model: OFF, enable_emitc: OFF, enable_async: OFF, name: "run", ttrt_flags: ""}, + {runs-on: ubuntu-latest, enable_perf: ON, enable_op_model: OFF, enable_emitc: OFF, enable_async: OFF, name: "perf", ttrt_flags: ""}, + {runs-on: ubuntu-latest, enable_perf: OFF, enable_op_model: OFF, enable_emitc: ON, enable_async: OFF, name: "emitc", ttrt_flags: ""}, + {runs-on: ubuntu-latest, enable_perf: OFF, enable_op_model: OFF, enable_emitc: OFF, enable_async: ON, name: "async", ttrt_flags: ""}, ] name: Build and test tt-mlir (compute machine) @@ -118,7 +119,7 @@ jobs: id: strings shell: bash env: - job-name: "Build tt-mlir (${{ matrix.build.runs-on }}, ${{ matrix.build.enable_perf }}, ${{ matrix.build.enable_op_model }}, ${{ matrix.build.enable_emitc }}, ${{ matrix.build.name }})" + job-name: "Build tt-mlir (${{ matrix.build.runs-on }}, ${{ matrix.build.enable_perf }}, ${{ matrix.build.enable_op_model }}, ${{ matrix.build.enable_emitc }}, ${{ matrix.build.enable_async }}, ${{ matrix.build.name }})" run: | echo "work-dir=$(pwd)" >> "$GITHUB_OUTPUT" echo "build-output-dir=$(pwd)/build" >> "$GITHUB_OUTPUT" @@ -140,7 +141,7 @@ jobs: uses: hendrikmuhs/ccache-action@v1.2 with: create-symlink: true - key: ${{ matrix.build.runs-on }}-run-ON-perf-${{ matrix.build.enable_perf }}-op_model-${{ matrix.build.enable_op_model }}-${{ env.SDK_VERSION }} + key: ${{ matrix.build.runs-on }}-run-ON-perf-${{ matrix.build.enable_perf }}-op_model-${{ matrix.build.enable_op_model }}-emitc-${{ matrix.build.enable_emitc }}-async-${{ matrix.build.enable_async }}-${{ env.SDK_VERSION }} - name: Run build and test tt-mlir uses: ./.github/actions/build-tt-mlir-action @@ -148,6 +149,7 @@ jobs: enable-perf: ${{ matrix.build.enable_perf }} enable-op-model: ${{ matrix.build.enable_op_model }} enable-emitc: ${{ matrix.build.enable_emitc }} + enable-async: ${{ matrix.build.enable_async }} build-name: ${{ matrix.build.name }} build-output-dir: ${{ steps.strings.outputs.build-output-dir }} install-output-dir: ${{ steps.strings.outputs.install-output-dir }} @@ -166,13 +168,15 @@ jobs: fail-fast: false matrix: build: [ - {runs-on: n150, enable_perf: OFF, enable_emitc: OFF, name: "run", build_name: "run", ttrt_flags: "--non-zero"}, - {runs-on: n150, enable_perf: ON, enable_emitc: OFF, name: "perf", build_name: "perf"}, - {runs-on: n150, enable_perf: OFF, enable_emitc: ON, name: "run", build_name: "emitc", ttrt_flags: "--emitc"}, - {runs-on: n300, enable_perf: OFF, enable_emitc: OFF, name: "run", build_name: "run", ttrt_flags: "--non-zero"}, - {runs-on: n300, enable_perf: ON, enable_emitc: OFF, name: "perf", build_name: "perf"}, + {runs-on: n150, enable_perf: OFF, enable_emitc: OFF, enable_async: OFF, name: "run", build_name: "run", ttrt_flags: "--non-zero"}, + {runs-on: n150, enable_perf: ON, enable_emitc: OFF, enable_async: OFF, name: "perf", build_name: "perf"}, + {runs-on: n150, enable_perf: OFF, enable_emitc: ON, enable_async: OFF, name: "run", build_name: "emitc", ttrt_flags: "--emitc"}, + {runs-on: n150, enable_perf: OFF, enable_emitc: OFF, enable_async: ON, name: "run", build_name: "async", ttrt_flags: "--non-zero --enable-async-ttnn"}, + {runs-on: n300, enable_perf: OFF, enable_emitc: OFF, enable_async: OFF, name: "run", build_name: "run", ttrt_flags: "--non-zero"}, + {runs-on: n300, enable_perf: ON, enable_emitc: OFF, enable_async: OFF, name: "perf", build_name: "perf"}, + {runs-on: n300, enable_perf: OFF, enable_emitc: OFF, enable_async: ON, name: "run", build_name: "async", ttrt_flags: "--non-zero --enable-async-ttnn"}, ] - name: "run-tests (${{ matrix.build.runs-on }}, ${{ matrix.build.enable_perf }}, ${{ matrix.build.enable_emitc }}, ${{ matrix.build.build_name }})" + name: "run-tests (${{ matrix.build.runs-on }}, ${{ matrix.build.enable_perf }}, ${{ matrix.build.enable_emitc }}, ${{ matrix.build.enable_async }}, ${{ matrix.build.build_name }})" runs-on: - in-service @@ -197,7 +201,7 @@ jobs: id: strings shell: bash env: - job-name: "run-tests (${{ matrix.build.runs-on }}, ${{ matrix.build.enable_perf }}, ${{ matrix.build.enable_emitc }}, ${{ matrix.build.build_name }})" + job-name: "run-tests (${{ matrix.build.runs-on }}, ${{ matrix.build.enable_perf }}, ${{ matrix.build.enable_emitc }}, , ${{ matrix.build.enable_async }}, ${{ matrix.build.build_name }})" run: | echo "work-dir=$(pwd)" >> "$GITHUB_OUTPUT" echo "build-output-dir=$(pwd)/build" >> "$GITHUB_OUTPUT" @@ -273,11 +277,18 @@ jobs: - name: Run functional tests shell: bash - if: matrix.build.enable_perf == 'OFF' && matrix.build.enable_emitc == 'OFF' + if: matrix.build.enable_perf == 'OFF' && matrix.build.enable_emitc == 'OFF' && matrix.build.enable_async == 'OFF' run: | source env/activate ttrt ${{ matrix.build.name }} ${{ matrix.build.ttrt_flags }} ${{ steps.strings.outputs.build-output-dir }}/test/ttmlir/Silicon + - name: Run async TTNN tests + shell: bash + if: matrix.build.enable_async == 'ON' + run: | + source env/activate + ttrt ${{ matrix.build.name }} ${{ matrix.build.ttrt_flags }} ${{ steps.strings.outputs.build-output-dir }}/test/ttmlir/Silicon/TTNN + - name: Run perf tests shell: bash if: matrix.build.enable_perf == 'ON' @@ -297,9 +308,8 @@ jobs: if: always() uses: actions/upload-artifact@v4 with: - name: ${{ matrix.build.runs-on }}_${{ matrix.build.enable_perf }}_${{ matrix.build.enable_emitc }}_${{ matrix.build.build_name }}_results.json + name: ${{ matrix.build.runs-on }}_${{ matrix.build.enable_perf }}_${{ matrix.build.enable_emitc }}_${{ matrix.build.enable_async }}_${{ matrix.build.build_name }}_results.json path: ${{ matrix.build.build_name }}_results.json - - name: Upload Test Report xml uses: actions/upload-artifact@v4 if: success() || failure() @@ -616,7 +626,7 @@ jobs: fail-fast: false matrix: build: [ - {runs-on: n300, enable_perf: OFF, enable_op_model: ON, enable_emitc: OFF, name: "op_model", build_name: "op_model", ttrt_flags: ""} + {runs-on: n300, enable_perf: OFF, enable_op_model: ON, enable_emitc: OFF, enable_async: OFF, name: "op_model", build_name: "op_model", ttrt_flags: ""} ] name: Run build and test tt-mlir (TT machine) @@ -642,7 +652,7 @@ jobs: id: strings shell: bash env: - job-name: "Build tt-mlir (${{ matrix.build.runs-on }}, ${{ matrix.build.enable_perf }}, ${{ matrix.build.enable_op_model }}, ${{ matrix.build.enable_emitc }}, ${{ matrix.build.build_name }})" + job-name: "Build tt-mlir (${{ matrix.build.runs-on }}, ${{ matrix.build.enable_perf }}, ${{ matrix.build.enable_op_model }}, ${{ matrix.build.enable_emitc }}, ${{ matrix.build.enable_async }}, ${{ matrix.build.build_name }})" run: | echo "work-dir=$(pwd)" >> "$GITHUB_OUTPUT" echo "build-output-dir=$(pwd)/build" >> "$GITHUB_OUTPUT" @@ -664,7 +674,7 @@ jobs: uses: hendrikmuhs/ccache-action@v1.2 with: create-symlink: true - key: ${{ matrix.build.runs-on }}-run-ON-perf-${{ matrix.build.enable_perf }}-op_model-${{ matrix.build.enable_op_model }}-${{ matrix.build.enable_emitc }}-${{ env.SDK_VERSION }} + key: ${{ matrix.build.runs-on }}-run-ON-perf-${{ matrix.build.enable_perf }}-op_model-${{ matrix.build.enable_op_model }}-${{ matrix.build.enable_emitc }}-async-${{ matrix.build.enable_async }}-${{ env.SDK_VERSION }} # Build project - name: Run build and test tt-mlir @@ -673,6 +683,7 @@ jobs: enable-perf: ${{ matrix.build.enable_perf }} enable-op-model: ${{ matrix.build.enable_op_model }} enable-emitc: ${{ matrix.build.enable_emitc }} + enable-async: ${{ matrix.build.enable_async }} build-name: ${{ matrix.build.build_name }} build-output-dir: ${{ steps.strings.outputs.build-output-dir }} install-output-dir: ${{ steps.strings.outputs.install-output-dir }} diff --git a/runtime/include/tt/runtime/detail/debug.h b/runtime/include/tt/runtime/detail/debug.h index 8d598864c9..bdaa81fbaf 100644 --- a/runtime/include/tt/runtime/detail/debug.h +++ b/runtime/include/tt/runtime/detail/debug.h @@ -19,28 +19,25 @@ struct Env { #else constexpr static Env #endif - get(bool loadKernelsFromDisk = false, bool enableAsyncTTNN = false) + get(bool loadKernelsFromDisk = false) #if defined(TT_RUNTIME_DEBUG) && TT_RUNTIME_DEBUG == 1 ; #else { - return Env(false, false); + return Env(false); } #endif bool loadKernelsFromDisk; - bool enableAsyncTTNN; private: - constexpr Env(bool loadKernelsFromDisk, bool enableAsyncTTNN) - : loadKernelsFromDisk(loadKernelsFromDisk), - enableAsyncTTNN(enableAsyncTTNN) {} + constexpr Env(bool loadKernelsFromDisk) + : loadKernelsFromDisk(loadKernelsFromDisk) {} }; inline std::ostream &operator<<(std::ostream &os, Env const &env) { os << "debug::Env{\n" - << "\t" << "loadKernelsFromDisk: " << env.loadKernelsFromDisk << ",\n" - << "\t" << "enableAsyncTTNN: " << env.enableAsyncTTNN << "\n" + << "\t" << "loadKernelsFromDisk: " << env.loadKernelsFromDisk << "\n" << "}"; return os; } diff --git a/runtime/include/tt/runtime/detail/ttmetal.h b/runtime/include/tt/runtime/detail/ttmetal.h index cfb779f9ff..6dbd9aa1b8 100644 --- a/runtime/include/tt/runtime/detail/ttmetal.h +++ b/runtime/include/tt/runtime/detail/ttmetal.h @@ -38,7 +38,8 @@ size_t getNumAvailableDevices(); Device openDevice(DeviceIds const &deviceIds, size_t numHWCQs = 1, std::optional l1SmallSize = std::nullopt, - std::optional dispatchCoreType = std::nullopt); + std::optional dispatchCoreType = std::nullopt, + [[maybe_unused]] std::optional enableAsyncTTNN = std::nullopt); void closeDevice(Device device); diff --git a/runtime/include/tt/runtime/detail/ttnn.h b/runtime/include/tt/runtime/detail/ttnn.h index 99e5cf1e6b..74c29948c7 100644 --- a/runtime/include/tt/runtime/detail/ttnn.h +++ b/runtime/include/tt/runtime/detail/ttnn.h @@ -92,7 +92,8 @@ size_t getNumAvailableDevices(); Device openDevice(DeviceIds const &deviceIds, size_t numHWCQs = 1, std::optional l1SmallSize = std::nullopt, - std::optional dispatchCoreType = std::nullopt); + std::optional dispatchCoreType = std::nullopt, + std::optional enableAsyncTTNN = std::nullopt); void closeDevice(Device device); diff --git a/runtime/include/tt/runtime/runtime.h b/runtime/include/tt/runtime/runtime.h index 6cad5293a1..de57f83424 100644 --- a/runtime/include/tt/runtime/runtime.h +++ b/runtime/include/tt/runtime/runtime.h @@ -87,7 +87,8 @@ size_t getNumAvailableDevices(); Device openDevice(DeviceIds const &deviceIds, size_t numHWCQs = 1, std::optional l1SmallSize = std::nullopt, - std::optional dispatchCoreType = std::nullopt); + std::optional dispatchCoreType = std::nullopt, + std::optional enableAsyncTTNN = std::nullopt); void closeDevice(Device device); diff --git a/runtime/lib/common/debug.cpp b/runtime/lib/common/debug.cpp index 34a274ca87..35be42f158 100644 --- a/runtime/lib/common/debug.cpp +++ b/runtime/lib/common/debug.cpp @@ -8,8 +8,8 @@ namespace tt::runtime::debug { -Env const &Env::get(bool loadKernelsFromDisk, bool enableAsyncTTNN) { - static Env config(loadKernelsFromDisk, enableAsyncTTNN); +Env const &Env::get(bool loadKernelsFromDisk) { + static Env config(loadKernelsFromDisk); return config; } diff --git a/runtime/lib/runtime.cpp b/runtime/lib/runtime.cpp index e7e990bd31..2d1574188a 100644 --- a/runtime/lib/runtime.cpp +++ b/runtime/lib/runtime.cpp @@ -235,18 +235,19 @@ size_t getNumAvailableDevices() { Device openDevice(DeviceIds const &deviceIds, size_t numHWCQs, std::optional l1SmallSize, - std::optional dispatchCoreType) { + std::optional dispatchCoreType, + std::optional enableAsyncTTNN) { #if defined(TT_RUNTIME_ENABLE_TTNN) if (getCurrentRuntime() == DeviceRuntime::TTNN) { return ::tt::runtime::ttnn::openDevice(deviceIds, numHWCQs, l1SmallSize, - dispatchCoreType); + dispatchCoreType, enableAsyncTTNN); } #endif #if defined(TT_RUNTIME_ENABLE_TTMETAL) if (getCurrentRuntime() == DeviceRuntime::TTMetal) { - return ::tt::runtime::ttmetal::openDevice(deviceIds, numHWCQs, l1SmallSize, - dispatchCoreType); + return ::tt::runtime::ttmetal::openDevice( + deviceIds, numHWCQs, l1SmallSize, dispatchCoreType, enableAsyncTTNN); } #endif LOG_FATAL("runtime is not enabled"); diff --git a/runtime/lib/ttmetal/runtime.cpp b/runtime/lib/ttmetal/runtime.cpp index 3e1084babd..9c3a4bbcb8 100644 --- a/runtime/lib/ttmetal/runtime.cpp +++ b/runtime/lib/ttmetal/runtime.cpp @@ -82,7 +82,8 @@ size_t getNumAvailableDevices() { Device openDevice(DeviceIds const &deviceIds, size_t numHWCQs, std::optional l1SmallSize, - std::optional dispatchCoreType) { + std::optional dispatchCoreType, + [[maybe_unused]] std::optional enableAsyncTTNN) { LOG_ASSERT(deviceIds.size(), "No devices specified"); ::tt::tt_metal::DispatchCoreType type = diff --git a/runtime/lib/ttnn/runtime.cpp b/runtime/lib/ttnn/runtime.cpp index 5fce15329e..ac741d6b83 100644 --- a/runtime/lib/ttnn/runtime.cpp +++ b/runtime/lib/ttnn/runtime.cpp @@ -189,7 +189,8 @@ size_t getNumAvailableDevices() { Device openDevice(DeviceIds const &deviceIds, size_t numHWCQs, std::optional l1SmallSize, - std::optional dispatchCoreType) { + std::optional dispatchCoreType, + std::optional enableAsyncTTNN) { ::tt::tt_metal::DispatchCoreType type = tt::runtime::common::getDispatchCoreType(dispatchCoreType); @@ -205,9 +206,9 @@ Device openDevice(DeviceIds const &deviceIds, size_t numHWCQs, LOG_INFO("Grid size = { ", logical_grid_size.x, ", ", logical_grid_size.y, "}"); - bool enableAsync = debug::Env::get().enableAsyncTTNN; + bool enableAsyncValue = enableAsyncTTNN.value_or(false); for (::ttnn::IDevice *device : meshDevice->get_devices()) { - device->enable_async(enableAsync); + device->enable_async(enableAsyncValue); } return Device(std::static_pointer_cast(meshDevice), diff --git a/runtime/tools/python/ttrt/common/run.py b/runtime/tools/python/ttrt/common/run.py index 624610bc42..1bf2e8ed03 100644 --- a/runtime/tools/python/ttrt/common/run.py +++ b/runtime/tools/python/ttrt/common/run.py @@ -385,9 +385,7 @@ def _execute(binaries): self.logging.warning(f"no binaries found to run - returning early") return - debug_env = ttrt.runtime.DebugEnv.get( - self["--load-kernels-from-disk"], self["--enable-async-ttnn"] - ) + debug_env = ttrt.runtime.DebugEnv.get(self["--load-kernels-from-disk"]) self.logging.debug(f"setting tt runtime debug env={debug_env}") workaround_env = ttrt.runtime.WorkaroundEnv.get( not self["--disable-maxpool2d-preshard"], @@ -401,7 +399,9 @@ def _execute(binaries): ttrt.runtime.set_compatible_runtime(binaries[0].fbb) current_runtime = ttrt.runtime.get_current_runtime() self.logging.debug(f"opening devices={self.query.device_ids}") - device = ttrt.runtime.open_device(self.query.device_ids) + device = ttrt.runtime.open_device( + self.query.device_ids, enable_async_ttnn=self["--enable-async-ttnn"] + ) callback_runtime_config = CallbackRuntimeConfig( device, diff --git a/runtime/tools/python/ttrt/runtime/module.cpp b/runtime/tools/python/ttrt/runtime/module.cpp index 1189258eb5..3bb1d88299 100644 --- a/runtime/tools/python/ttrt/runtime/module.cpp +++ b/runtime/tools/python/ttrt/runtime/module.cpp @@ -123,6 +123,7 @@ PYBIND11_MODULE(_C, m) { py::arg("num_hw_cqs") = size_t{1}, py::arg("l1_small_size") = py::none(), py::arg("dispatch_core_type") = py::none(), + py::arg("enable_async_ttnn") = py::none(), "Open a mesh of devices for execution"); m.def("close_device", &tt::runtime::closeDevice, "Close a mesh device"); m.def("to_host", &tt::runtime::toHost, py::arg("tensor"),