Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions backends/cuda/cuda_partitioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
PartitionResult,
)
from executorch.exir.backend.utils import tag_constant_data, tag_mutated_buffer
from torch._export.utils import is_buffer, is_lifted_tensor_constant, is_param
from torch.export.exported_program import ExportedProgram


Expand Down Expand Up @@ -56,6 +57,18 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult:
tag_constant_data(exported_program)
tag_mutated_buffer(exported_program)

# Tag constant placeholders that have no users
# tag_constant_data only tags constants that have users with delegation_tag
# but we need to tag all constants for this partition
for node in exported_program.graph.nodes:
if node.op == "placeholder" and (
is_param(exported_program, node)
or is_buffer(exported_program, node)
or is_lifted_tensor_constant(exported_program, node)
):
if "delegation_tag" not in node.meta:
node.meta["delegation_tag"] = tag

return PartitionResult(
tagged_exported_program=exported_program, partition_tags=partition_tags
)
Expand Down
97 changes: 97 additions & 0 deletions examples/models/whisper/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

cmake_minimum_required(VERSION 3.24)
project(whisper_runner)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

set(EXECUTORCH_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../../..")
include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)

# Let files say "include <executorch/path/to/header.h>"
set(_common_include_directories ${EXECUTORCH_ROOT}/..)

# Need this for gflags for some reason
set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags)
find_package(gflags REQUIRED)

list(APPEND CMAKE_FIND_ROOT_PATH ${CMAKE_CURRENT_BINARY_DIR}/../../..)
find_package(executorch CONFIG REQUIRED FIND_ROOT_PATH_BOTH)
executorch_target_link_options_shared_lib(executorch)

set(link_libraries executorch gflags)
set(_srcs multimodal.cpp)

list(
APPEND
link_libraries
optimized_native_cpu_ops_lib
quantized_ops_lib
custom_ops
cpublas
eigen_blas
)
executorch_target_link_options_shared_lib(optimized_native_cpu_ops_lib)
executorch_target_link_options_shared_lib(quantized_ops_lib)
executorch_target_link_options_shared_lib(custom_ops)

# XNNPACK
if(TARGET xnnpack_backend)
set(xnnpack_backend_libs xnnpack_backend XNNPACK xnnpack-microkernels-prod)
if(TARGET kleidiai)
list(APPEND xnnpack_backend_libs kleidiai)
endif()
list(APPEND link_libraries ${xnnpack_backend_libs})
executorch_target_link_options_shared_lib(xnnpack_backend)
endif()

# Add LLM runner and extension module
if(NOT TARGET extension_llm_runner)
message(
FATAL_ERROR
"ExecuTorch must be installed with EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER enabled."
)
endif()

# Needed for cpuinfo where it uses android specific log lib
if(ANDROID)
list(APPEND link_libraries log)
endif()

# Add the required ExecuTorch extensions for multimodal LLM runner
list(
APPEND
link_libraries
extension_llm_runner
extension_module
extension_data_loader
extension_tensor
extension_flat_tensor
)

# Link CUDA backend
if(EXECUTORCH_BUILD_CUDA)
find_package(CUDAToolkit REQUIRED)
list(APPEND link_libraries aoti_cuda)
executorch_target_link_options_shared_lib(aoti_cuda)
endif()

if(EXECUTORCH_BUILD_METAL)
list(APPEND link_libraries metal_backend)
executorch_target_link_options_shared_lib(metal_backend)
endif()

# Add tokenizers
list(APPEND link_libraries tokenizers::tokenizers)

add_executable(whisper_runner runner.cpp main.cpp)

target_include_directories(whisper_runner PUBLIC ${_common_include_directories})

target_link_libraries(whisper_runner PUBLIC ${link_libraries})
target_compile_options(whisper_runner PUBLIC ${_common_compile_options})
69 changes: 69 additions & 0 deletions examples/models/whisper/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Whisper Runner

This directory hosts a lightweight C++ helper that drives Whisper models
exported to ExecuTorch. The `WhisperRunner` owns the `Module` instance that
wraps a bundled `.pte` program and optional `.ptd` weight file, loads the
`encoder` and `text_decoder` methods, and exposes a `transcribe()` loop that
streams decoded text pieces through a callback.

The runner assumes:
- `model.pte` contains both Whisper encoder and decoder entry points named
`encoder` and `text_decoder`.
- External parameters (for example KV cache blocks) are stored in a companion
`model.ptd`.
- A tokenizer JSON compatible with the ExecuTorch tokenizers shim is available.

Audio preprocessing is not part of the runner itself. To transform raw audio
into the mel features expected by the encoder, reuse the pattern in
`examples/models/voxtral/multimodal.cpp`, which loads a `preprocessor.pte`
module to generate the spectrogram tensor.

## Build

```bash
cmake -G Ninja \
-B cmake-out/examples/models/whisper \
-S examples/models/whisper
cmake --build cmake-out/examples/models/whisper -j
```

The build produces a static library named `whisper_runner`. Link it into your
application together with the standard ExecuTorch runtime libraries and the
tokenizer target (`tokenizers::tokenizers`).

## Usage

```cpp
#include <executorch/examples/models/whisper/runner.h>
#include <executorch/extension/tensor/tensor_ptr.h>

using example::WhisperRunner;
using example::WhisperTranscribeConfig;

WhisperRunner runner("model.pte", "model.ptd", "tokenizer.json");
ET_CHECK_OK(runner.load());

// `features` is the mel spectrogram tensor produced by the preprocessor.
executorch::aten::Tensor features = load_features_somehow();

WhisperTranscribeConfig config;
config.max_new_tokens = 128; // stop after 128 generated tokens
config.temperature = 0.7f; // optional: enable stochastic sampling

auto tokens_result = runner.transcribe(
features,
config,
[](const std::string& piece) {
std::cout << piece;
});

if (!tokens_result.ok()) {
ET_LOG(Error, "Transcription failed: %d", static_cast<int>(tokens_result.error()));
}
```

`transcribe()` returns the full token history (prompt + generated tokens) and
invokes the callback every time a new token is emitted. Provide a non-empty
`decoder_input_ids` vector if you want to seed the decoder with a custom prompt,
and override `WhisperTranscribeConfig::eos_token_ids` when the model exposes
custom termination ids.
Loading
Loading