Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions backends/openvino/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ Follow the steps below to setup your build environment:
```bash
./openvino_build.sh --cpp_runtime
```
**Build C++ Llama Runner**: First, ensure the C++ runtime libraries are built by following the earlier instructions. Then, run the `openvino_build.sh` script with the `--llama_runner flag` to compile the LlaMA runner as shown the below command, which enables executing inference with models exported using export_llama. The resulting binary is located at: `<executorch_root>/cmake-out/examples/models/llama/llama_main`
**Build C++ Runtime Libraries with LLM Extension**: Run the `openvino_build.sh` script with the `--cpp_runtime_llm` flag to build the C++ runtime libraries with LLM extension as shown in the below command. Use this option instead of `--cpp_runtime` for LLM extension support which is required by LLM examples.
```bash
./openvino_build.sh --llama_runner
./openvino_build.sh --cpp_runtime_llm
```

For more information about ExecuTorch environment setup, refer to the [Environment Setup](https://pytorch.org/executorch/main/getting-started-setup#environment-setup) guide.
Expand Down
57 changes: 22 additions & 35 deletions backends/openvino/scripts/openvino_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,54 +16,42 @@ install_requirements() {
build_cpp_runtime() {
echo "Building C++ Runtime Libraries"

local llm_enabled=${1:-0}

# Set build directory
local build_dir="cmake-out"

# Enter the Executorch root directory
cd "$EXECUTORCH_ROOT"
rm -rf "${build_dir}"

CMAKE_ARGS=(
"-DCMAKE_BUILD_TYPE=Release"
"-DEXECUTORCH_BUILD_OPENVINO=ON"
"-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON"
"-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON"
"-DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON"
"-DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON"
"-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON"
"-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON"
"-DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON"
"-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON"
)

if [[ "$llm_enabled" -eq 1 ]]; then
CMAKE_ARGS+=("-DEXECUTORCH_BUILD_EXTENSION_LLM=ON -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON")
fi

# Configure the project with CMake
# Note: Add any additional configuration options you need here
cmake -DCMAKE_INSTALL_PREFIX="${build_dir}" \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_OPENVINO=ON \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \
-DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
-DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
${CMAKE_ARGS[@]} \
-B"${build_dir}"


# Build the project
cmake --build ${build_dir} --target install --config Release -j$(nproc)
}

build_llama_runner() {
echo "Building Export Llama Runner"

# Set build directory
local build_dir="cmake-out"

# Enter the Executorch root directory
cd "$EXECUTORCH_ROOT"

# Configure the project with CMake
# Note: Add any additional configuration options you need here
cmake -DCMAKE_INSTALL_PREFIX="${build_dir}" \
-DCMAKE_BUILD_TYPE=Release \
-B"${build_dir}"/examples/models/llama \
examples/models/llama
# Build the export llama runner
cmake --build cmake-out/examples/models/llama -j$(nproc) --config Release
}

build_python_enabled() {
echo "Building Python Package with Pybinding"

Expand Down Expand Up @@ -92,16 +80,15 @@ main() {
install_requirements
build_python_enabled
build_cpp_runtime
build_llama_runner

# If the first arguments is --cpp_runtime, build libraries for C++ runtime
elif [[ "$build_type" == "--cpp_runtime" ]]; then
build_cpp_runtime

# If the first arguments is --llama_runner, build export llama runner binary
# Note: c++ runtime with openvino backend should be built before building export llama runner
elif [[ "$build_type" == "--llama_runner" ]]; then
build_llama_runner
elif [[ "$build_type" == "--cpp_runtime_llm" ]]; then
build_cpp_runtime 1

# If the first arguments is --enable_python, build python package with python bindings
elif [[ "$build_type" == "--enable_python" ]]; then
Expand Down
14 changes: 3 additions & 11 deletions examples/models/yolo12/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.5)

project(Yolo12DetectionDemo VERSION 0.1)

option(USE_OPENVINO_BACKEND "Build the tutorial with the OPENVINO backend" ON)
option(USE_OPENVINO_BACKEND "Build the tutorial with the OPENVINO backend" OFF)
option(USE_XNNPACK_BACKEND "Build the tutorial with the XNNPACK backend" OFF)

set(CMAKE_INCLUDE_CURRENT_DIR ON)
Expand Down Expand Up @@ -38,21 +38,13 @@ list(APPEND link_libraries portable_ops_lib portable_kernels)
executorch_target_link_options_shared_lib(portable_ops_lib)

if(USE_XNNPACK_BACKEND)
set(xnnpack_backend_libs xnnpack_backend XNNPACK microkernels-prod)
set(xnnpack_backend_libs xnnpack_backend XNNPACK xnnpack-microkernels-prod)
list(APPEND link_libraries ${xnnpack_backend_libs})
executorch_target_link_options_shared_lib(xnnpack_backend)
endif()

if(USE_OPENVINO_BACKEND)
add_subdirectory(${EXECUTORCH_ROOT}/backends/openvino openvino_backend)

target_include_directories(
openvino_backend
INTERFACE
${CMAKE_CURRENT_BINARY_DIR}/../../include
${CMAKE_CURRENT_BINARY_DIR}/../../include/executorch/runtime/core/portable_type/c10
${CMAKE_CURRENT_BINARY_DIR}/../../lib
)
find_package(OpenVINO REQUIRED)
list(APPEND link_libraries openvino_backend)
executorch_target_link_options_shared_lib(openvino_backend)
endif()
Expand Down
2 changes: 1 addition & 1 deletion examples/models/yolo12/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
ultralytics==8.3.97
ultralytics==8.3.196
14 changes: 10 additions & 4 deletions examples/openvino/llama/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,19 @@ python -m executorch.extension.llm.export.export_llm \
OpenVINO backend also offers Quantization support for llama models when exporting the model. The different quantization modes that are offered are INT4 groupwise & per-channel weights compression and INT8 per-channel weights compression. It can be achieved by setting `pt2e_quantize` option in `llama3_2_ov_4wo.yaml` file under `quantization`. Set this parameter to `openvino_4wo` for INT4 or `openvino_8wo` for INT8 weight compression. It is set to `openvino_4wo` in `llama3_2_ov_4wo.yaml` file by default. For modifying the group size, set `group_size` option in `llama3_2_ov_4wo.yaml` file under `quantization`. By default group size 128 is used to achieve optimal performance with the NPU.

## Build OpenVINO C++ Runtime with Llama Runner:
First, build the backend libraries by executing the script below in `<executorch_root>/backends/openvino/scripts` folder:
First, build the backend libraries with llm extension by executing the script below in `<executorch_root>/backends/openvino/scripts` folder:
```bash
./openvino_build.sh --cpp_runtime
./openvino_build.sh --cpp_runtime_llm
```
Then, build the llama runner by executing the script below (with `--llama_runner` argument) also in `<executorch_root>/backends/openvino/scripts` folder:
Then, build the llama runner by executing commands below in `<executorch_root>` folder:
```bash
./openvino_build.sh --llama_runner
# Configure the project with CMake
cmake -DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release \
-Bcmake-out/examples/models/llama \
examples/models/llama
# Build the llama runner
cmake --build cmake-out/examples/models/llama -j$(nproc) --config Release
```
The executable is saved in `<executorch_root>/cmake-out/examples/models/llama/llama_main`

Expand Down
Loading