Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ option(onnxruntime_DISABLE_SPARSE_TENSORS "Disable sparse tensors data types" OF
option(onnxruntime_DISABLE_OPTIONAL_TYPE "Disable optional type" OFF)
option(onnxruntime_DISABLE_FLOAT8_TYPES "Disable float 8 types" OFF)
option(onnxruntime_MINIMAL_BUILD "Exclude as much as possible from the build. Support ORT format models. No support for ONNX format models." OFF)
option(onnxruntime_CLIENT_PACKAGE_BUILD "Enables default settings that are more appropriate for client/on-device workloads." OFF)
cmake_dependent_option(onnxruntime_DISABLE_RTTI "Disable RTTI" ON "NOT onnxruntime_ENABLE_PYTHON;NOT onnxruntime_USE_CUDA" OFF)
# For now onnxruntime_DISABLE_EXCEPTIONS will only work with onnxruntime_MINIMAL_BUILD, more changes (ONNX, non-CPU EP, ...) are required to run this standalone
cmake_dependent_option(onnxruntime_DISABLE_EXCEPTIONS "Disable exception handling. Requires onnxruntime_MINIMAL_BUILD currently." ON "onnxruntime_MINIMAL_BUILD;NOT onnxruntime_ENABLE_PYTHON" OFF)
Expand Down
5 changes: 5 additions & 0 deletions cmake/adjust_global_compile_flags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,11 @@ if (onnxruntime_MINIMAL_BUILD)
endif()
endif()

# ORT build with default settings more appropriate for client/on-device workloads.
if (onnxruntime_CLIENT_PACKAGE_BUILD)
add_compile_definitions(ORT_CLIENT_PACKAGE_BUILD)
endif()

if (onnxruntime_ENABLE_LTO)
include(CheckIPOSupported)
check_ipo_supported(RESULT ipo_enabled OUTPUT ipo_output)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,9 @@ static const char* const kOrtSessionOptionsUseDeviceAllocatorForInitializers = "

// Configure whether to allow the inter_op/intra_op threads spinning a number of times before blocking
// "0": thread will block if found no job to run
// "1": default, thread will spin a number of times before blocking
// "1": thread will spin a number of times before blocking
// The default is "0" when ORT is built with "ORT_CLIENT_PACKAGE_BUILD" and "1" otherwise.
// Thread spinning is disabled by default for client/on-device workloads to reduce cpu utilization and improve power efficiency.
static const char* const kOrtSessionOptionsConfigAllowInterOpSpinning = "session.inter_op.allow_spinning";
static const char* const kOrtSessionOptionsConfigAllowIntraOpSpinning = "session.intra_op.allow_spinning";

Expand Down
12 changes: 12 additions & 0 deletions onnxruntime/core/session/inference_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,13 @@ void InferenceSession::ConstructorCommon(const SessionOptions& session_options,
{
if (!external_intra_op_thread_pool_) {
bool allow_intra_op_spinning =
#if !defined(ORT_CLIENT_PACKAGE_BUILD)
session_options_.config_options.GetConfigOrDefault(kOrtSessionOptionsConfigAllowIntraOpSpinning, "1") == "1";
#else
// default KOrtSessionOptionsConfigAllowIntraOpSpinning to "0" for ORT builds targeting client/on-device workloads,
// to reduce CPU utilization and improve power efficiency.
session_options_.config_options.GetConfigOrDefault(kOrtSessionOptionsConfigAllowIntraOpSpinning, "0") == "1";
#endif
OrtThreadPoolParams to = session_options_.intra_op_param;
std::basic_stringstream<ORTCHAR_T> ss;
if (to.name) {
Expand Down Expand Up @@ -461,7 +467,13 @@ void InferenceSession::ConstructorCommon(const SessionOptions& session_options,
if (session_options_.execution_mode == ExecutionMode::ORT_PARALLEL) {
if (!external_inter_op_thread_pool_) {
bool allow_inter_op_spinning =
#if !defined(ORT_CLIENT_PACKAGE_BUILD)
session_options_.config_options.GetConfigOrDefault(kOrtSessionOptionsConfigAllowInterOpSpinning, "1") == "1";
#else
// default kOrtSessionOptionsConfigAllowInterOpSpinning to "0" for ORT builds targeting client/on-device workloads,
// to reduce CPU utilization and improve power efficiency.
session_options_.config_options.GetConfigOrDefault(kOrtSessionOptionsConfigAllowInterOpSpinning, "0") == "1";
#endif
OrtThreadPoolParams to = session_options_.inter_op_param;
to.auto_set_affinity = to.thread_pool_size == 0 && session_options_.execution_mode == ExecutionMode::ORT_SEQUENTIAL;
std::basic_stringstream<ORTCHAR_T> ss;
Expand Down
6 changes: 6 additions & 0 deletions onnxruntime/core/util/thread_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,13 @@ struct OrtThreadPoolParams {
bool auto_set_affinity = false;

// If it is true, the thread pool will spin a while after the queue became empty.
#if !defined(ORT_CLIENT_PACKAGE_BUILD)
bool allow_spinning = true;
#else
// default allow_spinning to false for ORT builds targeting client/on-device workloads,
// to reduce CPU utilization and improve power efficiency.
bool allow_spinning = false;
#endif

// It it is non-negative, thread pool will split a task by a decreasing block size
// of remaining_of_total_iterations / (num_of_threads * dynamic_block_base_)
Expand Down
1 change: 1 addition & 0 deletions tools/ci_build/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,7 @@ def generate_build_tree(
else "OFF"
),
"-Donnxruntime_REDUCED_OPS_BUILD=" + ("ON" if is_reduced_ops_build(args) else "OFF"),
"-Donnxruntime_CLIENT_PACKAGE_BUILD=" + ("ON" if args.client_package_build else "OFF"),
"-Donnxruntime_BUILD_MS_EXPERIMENTAL_OPS=" + ("ON" if args.ms_experimental else "OFF"),
"-Donnxruntime_ENABLE_LTO=" + ("ON" if args.enable_lto else "OFF"),
"-Donnxruntime_USE_ACL=" + ("ON" if args.use_acl else "OFF"),
Expand Down
10 changes: 10 additions & 0 deletions tools/ci_build/build_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,15 @@ def add_size_reduction_args(parser: argparse.ArgumentParser) -> None:
)


def add_client_package_args(parser: argparse.ArgumentParser) -> None:
"""Adds arguments for client package build package."""
parser.add_argument(
"--client_package_build",
action="store_true",
help="Create ORT package with default settings more appropriate for client/on-device workloads.",
)


def add_python_binding_args(parser: argparse.ArgumentParser) -> None:
"""Adds arguments for Python bindings."""
parser.add_argument("--enable_pybind", action="store_true", help="Enable Python bindings.")
Expand Down Expand Up @@ -833,6 +842,7 @@ def convert_arg_line_to_args(self, arg_line: str) -> list[str]: # Use list[str]
add_dependency_args(parser)
add_extension_args(parser)
add_size_reduction_args(parser)
add_client_package_args(parser)

# Language Bindings
add_python_binding_args(parser)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ stages:
name: ${{ parameters.qnn_ep_build_pool_name }}
variables:
OrtPackageId: ${{ parameters.OrtNugetPackageId }}
commonBuildArgs: '--compile_no_warning_as_error --skip_submodule_sync --build_shared_lib --cmake_generator "Visual Studio 17 2022" --config ${{ parameters.build_config }} --parallel --use_vcpkg --use_vcpkg_ms_internal_asset_cache --use_binskim_compliant_compile_flags '
commonBuildArgs: '--compile_no_warning_as_error --skip_submodule_sync --build_shared_lib --client_package_build --cmake_generator "Visual Studio 17 2022" --config ${{ parameters.build_config }} --parallel --use_vcpkg --use_vcpkg_ms_internal_asset_cache --use_binskim_compliant_compile_flags '

steps:
- template: set-version-number-variables-step.yml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
matrix:
SHARED_LIB:
QnnLibKind: 'shared_lib'
ExtraQnnBuildArgs: ''
ExtraQnnBuildArgs: '--client_package_build'
STATIC_LIB:
QnnLibKind: 'static_lib'
ExtraQnnBuildArgs: ''
Expand Down
Loading