cavusmustafa · cavusmustafa · Oct 27, 2025 · Oct 27, 2025 · Oct 27, 2025 · Oct 27, 2025
diff --git a/backends/openvino/README.md b/backends/openvino/README.md
@@ -109,9 +109,9 @@ Follow the steps below to setup your build environment:
      ```bash
    ./openvino_build.sh --cpp_runtime
    ```
-   **Build C++ Llama Runner**: First, ensure the C++ runtime libraries are built by following the earlier instructions. Then, run the `openvino_build.sh` script with the `--llama_runner flag` to compile the LlaMA runner as shown the below command, which enables executing inference with models exported using export_llama. The resulting binary is located at: `<executorch_root>/cmake-out/examples/models/llama/llama_main`
+   **Build C++ Runtime Libraries with LLM Extension**: Run the `openvino_build.sh` script with the `--cpp_runtime_llm` flag to build the C++ runtime libraries with LLM extension as shown in the below command. Use this option instead of `--cpp_runtime` for LLM extension support which is required by LLM examples.
      ```bash
-   ./openvino_build.sh --llama_runner
+   ./openvino_build.sh --cpp_runtime_llm
    ```
 
 For more information about ExecuTorch environment setup, refer to the [Environment Setup](https://pytorch.org/executorch/main/getting-started-setup#environment-setup) guide.

diff --git a/backends/openvino/scripts/openvino_build.sh b/backends/openvino/scripts/openvino_build.sh
@@ -16,54 +16,42 @@ install_requirements() {
 build_cpp_runtime() {
     echo "Building C++ Runtime Libraries"
 
+    local llm_enabled=${1:-0}
+
     # Set build directory
     local build_dir="cmake-out"
 
     # Enter the Executorch root directory
     cd "$EXECUTORCH_ROOT"
     rm -rf "${build_dir}"
 
+    CMAKE_ARGS=(
+        "-DCMAKE_BUILD_TYPE=Release"
+        "-DEXECUTORCH_BUILD_OPENVINO=ON"
+        "-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON"
+        "-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON"
+        "-DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON"
+        "-DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON"
+        "-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON"
+        "-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON"
+        "-DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON"
+        "-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON"
+    )
+
+    if [[ "$llm_enabled" -eq 1 ]]; then
+	    CMAKE_ARGS+=("-DEXECUTORCH_BUILD_EXTENSION_LLM=ON -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON")
+    fi
+
     # Configure the project with CMake
     # Note: Add any additional configuration options you need here
     cmake -DCMAKE_INSTALL_PREFIX="${build_dir}" \
-          -DCMAKE_BUILD_TYPE=Release \
-          -DEXECUTORCH_BUILD_OPENVINO=ON \
-          -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-          -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-          -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \
-          -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
-          -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
-          -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-          -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON \
-          -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-          -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
-          -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
+          ${CMAKE_ARGS[@]} \
           -B"${build_dir}"
 
-
     # Build the project
     cmake --build ${build_dir} --target install --config Release -j$(nproc)
 }
 
-build_llama_runner() {
-    echo "Building Export Llama Runner"
-
-    # Set build directory
-    local build_dir="cmake-out"
-
-    # Enter the Executorch root directory
-    cd "$EXECUTORCH_ROOT"
-
-    # Configure the project with CMake
-    # Note: Add any additional configuration options you need here
-    cmake -DCMAKE_INSTALL_PREFIX="${build_dir}" \
-        -DCMAKE_BUILD_TYPE=Release \
-        -B"${build_dir}"/examples/models/llama \
-        examples/models/llama
-    # Build the export llama runner
-    cmake --build cmake-out/examples/models/llama -j$(nproc) --config Release
-}
-
 build_python_enabled() {
     echo "Building Python Package with Pybinding"
 
@@ -92,16 +80,15 @@ main() {
         install_requirements
         build_python_enabled
         build_cpp_runtime
-        build_llama_runner
 
     # If the first arguments is --cpp_runtime, build libraries for C++ runtime
     elif [[ "$build_type" == "--cpp_runtime" ]]; then
         build_cpp_runtime
 
     # If the first arguments is --llama_runner, build export llama runner binary
     # Note: c++ runtime with openvino backend should be built before building export llama runner
-    elif [[ "$build_type" == "--llama_runner" ]]; then
-        build_llama_runner
+    elif [[ "$build_type" == "--cpp_runtime_llm" ]]; then
+        build_cpp_runtime 1
 
     # If the first arguments is --enable_python, build python package with python bindings
     elif [[ "$build_type" == "--enable_python" ]]; then

diff --git a/examples/models/yolo12/CMakeLists.txt b/examples/models/yolo12/CMakeLists.txt
@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.5)
 
 project(Yolo12DetectionDemo VERSION 0.1)
 
-option(USE_OPENVINO_BACKEND "Build the tutorial with the OPENVINO backend" ON)
+option(USE_OPENVINO_BACKEND "Build the tutorial with the OPENVINO backend" OFF)
 option(USE_XNNPACK_BACKEND "Build the tutorial with the XNNPACK backend" OFF)
 
 set(CMAKE_INCLUDE_CURRENT_DIR ON)
@@ -38,21 +38,13 @@ list(APPEND link_libraries portable_ops_lib portable_kernels)
 executorch_target_link_options_shared_lib(portable_ops_lib)
 
 if(USE_XNNPACK_BACKEND)
-  set(xnnpack_backend_libs xnnpack_backend XNNPACK microkernels-prod)
+  set(xnnpack_backend_libs xnnpack_backend XNNPACK xnnpack-microkernels-prod)
   list(APPEND link_libraries ${xnnpack_backend_libs})
   executorch_target_link_options_shared_lib(xnnpack_backend)
 endif()
 
 if(USE_OPENVINO_BACKEND)
-  add_subdirectory(${EXECUTORCH_ROOT}/backends/openvino openvino_backend)
-
-  target_include_directories(
-    openvino_backend
-    INTERFACE
-      ${CMAKE_CURRENT_BINARY_DIR}/../../include
-      ${CMAKE_CURRENT_BINARY_DIR}/../../include/executorch/runtime/core/portable_type/c10
-      ${CMAKE_CURRENT_BINARY_DIR}/../../lib
-  )
+  find_package(OpenVINO REQUIRED)
   list(APPEND link_libraries openvino_backend)
   executorch_target_link_options_shared_lib(openvino_backend)
 endif()

diff --git a/examples/models/yolo12/requirements.txt b/examples/models/yolo12/requirements.txt
@@ -1 +1 @@
-ultralytics==8.3.97
+ultralytics==8.3.196
diff --git a/examples/openvino/llama/README.md b/examples/openvino/llama/README.md
@@ -28,13 +28,19 @@ python -m executorch.extension.llm.export.export_llm \
 OpenVINO backend also offers Quantization support for llama models when exporting the model. The different quantization modes that are offered are INT4 groupwise & per-channel weights compression and INT8 per-channel weights compression. It can be achieved by setting `pt2e_quantize` option in `llama3_2_ov_4wo.yaml` file under `quantization`. Set this parameter to `openvino_4wo` for INT4 or `openvino_8wo` for INT8 weight compression. It is set to `openvino_4wo` in `llama3_2_ov_4wo.yaml` file by default. For modifying the group size, set `group_size` option in `llama3_2_ov_4wo.yaml` file under `quantization`. By default group size 128 is used to achieve optimal performance with the NPU.
 
 ## Build OpenVINO C++ Runtime with Llama Runner:
-First, build the backend libraries by executing the script below in `<executorch_root>/backends/openvino/scripts` folder:
+First, build the backend libraries with llm extension by executing the script below in `<executorch_root>/backends/openvino/scripts` folder:
 ```bash
-./openvino_build.sh --cpp_runtime
+./openvino_build.sh --cpp_runtime_llm
 ```
-Then, build the llama runner by executing the script below (with `--llama_runner` argument) also in `<executorch_root>/backends/openvino/scripts` folder:
+Then, build the llama runner by executing commands below in `<executorch_root>` folder:
 ```bash
-./openvino_build.sh --llama_runner
+# Configure the project with CMake
+cmake -DCMAKE_INSTALL_PREFIX=cmake-out \
+      -DCMAKE_BUILD_TYPE=Release \
+      -Bcmake-out/examples/models/llama \
+      examples/models/llama
+# Build the llama runner
+cmake --build cmake-out/examples/models/llama -j$(nproc) --config Release
 ```
 The executable is saved in `<executorch_root>/cmake-out/examples/models/llama/llama_main`