Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions backends/openvino/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ Follow the steps below to setup your build environment:
```bash
./openvino_build.sh --cpp_runtime
```
**Build C++ Llama Runner**: First, ensure the C++ runtime libraries are built by following the earlier instructions. Then, run the `openvino_build.sh` script with the `--llama_runner flag` to compile the LlaMA runner as shown the below command, which enables executing inference with models exported using export_llama. The resulting binary is located at: `<executorch_root>/cmake-out/examples/models/llama/llama_main`
**Build C++ Runtime Libraries with LLM Extension**: Run the `openvino_build.sh` script with the `--cpp_runtime_llm` flag to build the C++ runtime libraries with LLM extension as shown in the below command. Use this option instead of `--cpp_runtime` for LLM extension support which is required by LLM examples.
```bash
./openvino_build.sh --llama_runner
./openvino_build.sh --cpp_runtime_llm
```

For more information about ExecuTorch environment setup, refer to the [Environment Setup](https://pytorch.org/executorch/main/getting-started-setup#environment-setup) guide.
Expand Down
1 change: 0 additions & 1 deletion backends/openvino/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
transformers
git+https://github.com/openvinotoolkit/nncf@3d753ac#egg=nncf
76 changes: 31 additions & 45 deletions backends/openvino/scripts/openvino_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,69 +7,55 @@ set -e
EXECUTORCH_ROOT=$(realpath "$(dirname "$0")/../../..")
echo EXECUTORCH_ROOT=${EXECUTORCH_ROOT}

# Enter the Executorch root directory
cd "$EXECUTORCH_ROOT"

install_requirements() {
echo "Installing Requirements For OpenVINO Backend"
cd "$EXECUTORCH_ROOT"
pip install -r backends/openvino/requirements.txt
}

build_cpp_runtime() {
echo "Building C++ Runtime Libraries"

local llm_enabled=${1:-0}

# Set build directory
local build_dir="cmake-out"

# Enter the Executorch root directory
cd "$EXECUTORCH_ROOT"
rm -rf "${build_dir}"

CMAKE_ARGS=(
"-DCMAKE_BUILD_TYPE=Release"
"-DEXECUTORCH_BUILD_OPENVINO=ON"
"-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON"
"-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON"
"-DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON"
"-DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON"
"-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON"
"-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON"
"-DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON"
"-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON"
)

if [[ "$llm_enabled" -eq 1 ]]; then
CMAKE_ARGS+=("-DEXECUTORCH_BUILD_EXTENSION_LLM=ON -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON")
fi

# Configure the project with CMake
# Note: Add any additional configuration options you need here
cmake -DCMAKE_INSTALL_PREFIX="${build_dir}" \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_OPENVINO=ON \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \
-DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
-DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
${CMAKE_ARGS[@]} \
-B"${build_dir}"


# Build the project
cmake --build ${build_dir} --target install --config Release -j$(nproc)
}

build_llama_runner() {
echo "Building Export Llama Runner"

# Set build directory
local build_dir="cmake-out"

# Enter the Executorch root directory
cd "$EXECUTORCH_ROOT"

# Configure the project with CMake
# Note: Add any additional configuration options you need here
cmake -DCMAKE_INSTALL_PREFIX="${build_dir}" \
-DCMAKE_BUILD_TYPE=Release \
-B"${build_dir}"/examples/models/llama \
examples/models/llama
# Build the export llama runner
cmake --build cmake-out/examples/models/llama -j$(nproc) --config Release
}

build_python_enabled() {
echo "Building Python Package with Pybinding"

# Enter the Executorch root directory
cd "$EXECUTORCH_ROOT"
./install_executorch.sh --clean

# Set parameters to configure the project with CMake
# Note: Add any additional configuration options you need here
Expand All @@ -87,23 +73,23 @@ build_python_enabled() {
main() {
build_type=${1:-"--build_all"}

# If the first arguments is --build_all (default), build python package, C++ runtime, and llama runner binary
# If the first argument is --build_all (default), build python package, C++ runtime
if [[ -z "$build_type" || "$build_type" == "--build_all" ]]; then
./install_executorch.sh --clean
install_requirements
build_python_enabled
build_cpp_runtime
build_llama_runner

# If the first arguments is --cpp_runtime, build libraries for C++ runtime
# If the first argument is --cpp_runtime, build libraries for C++ runtime
elif [[ "$build_type" == "--cpp_runtime" ]]; then
build_cpp_runtime

# If the first arguments is --llama_runner, build export llama runner binary
# Note: c++ runtime with openvino backend should be built before building export llama runner
elif [[ "$build_type" == "--llama_runner" ]]; then
build_llama_runner
# If the first argument is --cpp_runtime_llm, build C++ runtime with llm extension
# Note: c++ runtime with openvino backend should be built before building llama runner
elif [[ "$build_type" == "--cpp_runtime_llm" ]]; then
build_cpp_runtime 1

# If the first arguments is --enable_python, build python package with python bindings
# If the first argument is --enable_python, build python package with python bindings
elif [[ "$build_type" == "--enable_python" ]]; then
install_requirements
build_python_enabled
Expand Down
14 changes: 3 additions & 11 deletions examples/models/yolo12/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.5)

project(Yolo12DetectionDemo VERSION 0.1)

option(USE_OPENVINO_BACKEND "Build the tutorial with the OPENVINO backend" ON)
option(USE_OPENVINO_BACKEND "Build the tutorial with the OPENVINO backend" OFF)
option(USE_XNNPACK_BACKEND "Build the tutorial with the XNNPACK backend" OFF)

set(CMAKE_INCLUDE_CURRENT_DIR ON)
Expand Down Expand Up @@ -38,21 +38,13 @@ list(APPEND link_libraries portable_ops_lib portable_kernels)
executorch_target_link_options_shared_lib(portable_ops_lib)

if(USE_XNNPACK_BACKEND)
set(xnnpack_backend_libs xnnpack_backend XNNPACK microkernels-prod)
set(xnnpack_backend_libs xnnpack_backend XNNPACK xnnpack-microkernels-prod)
list(APPEND link_libraries ${xnnpack_backend_libs})
executorch_target_link_options_shared_lib(xnnpack_backend)
endif()

if(USE_OPENVINO_BACKEND)
add_subdirectory(${EXECUTORCH_ROOT}/backends/openvino openvino_backend)

target_include_directories(
openvino_backend
INTERFACE
${CMAKE_CURRENT_BINARY_DIR}/../../include
${CMAKE_CURRENT_BINARY_DIR}/../../include/executorch/runtime/core/portable_type/c10
${CMAKE_CURRENT_BINARY_DIR}/../../lib
)
find_package(OpenVINO REQUIRED)
list(APPEND link_libraries openvino_backend)
executorch_target_link_options_shared_lib(openvino_backend)
endif()
Expand Down
2 changes: 1 addition & 1 deletion examples/models/yolo12/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
ultralytics==8.3.97
ultralytics==8.3.196
14 changes: 10 additions & 4 deletions examples/openvino/llama/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,19 @@ python -m executorch.extension.llm.export.export_llm \
OpenVINO backend also offers Quantization support for llama models when exporting the model. The different quantization modes that are offered are INT4 groupwise & per-channel weights compression and INT8 per-channel weights compression. It can be achieved by setting `pt2e_quantize` option in `llama3_2_ov_4wo.yaml` file under `quantization`. Set this parameter to `openvino_4wo` for INT4 or `openvino_8wo` for INT8 weight compression. It is set to `openvino_4wo` in `llama3_2_ov_4wo.yaml` file by default. For modifying the group size, set `group_size` option in `llama3_2_ov_4wo.yaml` file under `quantization`. By default group size 128 is used to achieve optimal performance with the NPU.

## Build OpenVINO C++ Runtime with Llama Runner:
First, build the backend libraries by executing the script below in `<executorch_root>/backends/openvino/scripts` folder:
First, build the backend libraries with llm extension by executing the script below in `<executorch_root>/backends/openvino/scripts` folder:
```bash
./openvino_build.sh --cpp_runtime
./openvino_build.sh --cpp_runtime_llm
```
Then, build the llama runner by executing the script below (with `--llama_runner` argument) also in `<executorch_root>/backends/openvino/scripts` folder:
Then, build the llama runner by executing commands below in `<executorch_root>` folder:
```bash
./openvino_build.sh --llama_runner
# Configure the project with CMake
cmake -DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release \
-Bcmake-out/examples/models/llama \
examples/models/llama
# Build the llama runner
cmake --build cmake-out/examples/models/llama -j$(nproc) --config Release
```
The executable is saved in `<executorch_root>/cmake-out/examples/models/llama/llama_main`

Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
transformers
diffusers>=0.29.0
2 changes: 1 addition & 1 deletion examples/openvino/stable_diffusion/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Follow the [instructions](../../../backends/openvino/README.md) of **Prerequisit

### Install dependencies
```bash
pip install -r requirements.txt
pip install -r ../requirements.txt
```

## Export the Model
Expand Down
Loading