diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index e7a8fc0335ade..987064b973107 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -767,17 +767,235 @@ if(onnxruntime_USE_CUDA)
       endif()
 endif()
 
+FILE(TO_NATIVE_PATH ${CMAKE_BINARY_DIR}  ORT_BINARY_DIR)
+FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR}  ORT_SOURCE_DIR)
+
+
+
+set(ORT_WARNING_FLAGS)
+if (WIN32)
+    add_definitions(-DPLATFORM_WINDOWS -DNOGDI -DNOMINMAX -D_USE_MATH_DEFINES)
+    if(onnxruntime_ENABLE_MEMLEAK_CHECKER)
+      add_definitions(-DONNXRUNTIME_ENABLE_MEMLEAK_CHECK)
+    endif()
+    # parallel build
+    # These compiler opitions cannot be forwarded to NVCC, so cannot use add_compiler_options
+    string(APPEND CMAKE_CXX_FLAGS " /MP")
+    #Compiler bug, we should get such warnings. It will be fixed in a new VC release
+    list(APPEND ORT_WARNING_FLAGS "/wd4127")
+    # class needs to have dll-interface to be used by clients
+    list(APPEND ORT_WARNING_FLAGS "/wd4251")
+    # issued by thrust nonstandard extension used: nameless struct/union
+    list(APPEND ORT_WARNING_FLAGS "/wd4201")
+    if (onnxruntime_ENABLE_STATIC_ANALYSIS)
+        list(APPEND ORT_WARNING_FLAGS "/analyze:stacksize 131072")
+        list(APPEND ORT_WARNING_FLAGS "/wd6326") # potential comparison of a constant with another constant
+        if(onnxruntime_USE_OPENMP)
+          list(APPEND ORT_WARNING_FLAGS "/wd6993") # Code analysis ignores OpenMP constructs
+        endif()
+    endif()
+    # Treat warning as error if onnxruntime_DEV_MODE is ON
+    # For cross-compiled ARM64 binaries, there are too many warnings to fix, hence ignore warnings for now
+    if (onnxruntime_DEV_MODE AND NOT CMAKE_CROSSCOMPILING)
+      # treat warnings as errors
+      list(APPEND ORT_WARNING_FLAGS "/WX")
+      foreach(type EXE STATIC SHARED)
+        set(CMAKE_${type}_LINKER_FLAGS "${CMAKE_${type}_LINKER_FLAGS} /WX")
+      endforeach()
+    endif()
+
+    # set linker flags to minimize the binary size.
+    if (MSVC)
+      foreach(type EXE STATIC SHARED)
+        if (NOT type MATCHES STATIC)
+          # The WinML internal toolchain does not allow link's "additional options" to contain optimization
+          # flags (/OPT#); these are already specified using msbuild properties.
+          if (NOT DEFINED onnxruntime_DISABLE_LINKER_OPT_FLAGS)
+            set(CMAKE_${type}_LINKER_FLAGS_RELEASE "${CMAKE_${type}_LINKER_FLAGS_RELEASE} /OPT:REF,ICF,LBR")
+            set(CMAKE_${type}_LINKER_FLAGS_RELEASE "${CMAKE_${type}_LINKER_FLAGS_RELEASE} /INCREMENTAL:NO")
+            set(CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO} /OPT:REF,ICF,LBR")
+            set(CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO} /INCREMENTAL:NO")
+            set(CMAKE_${type}_LINKER_FLAGS_MINSIZEREL "${CMAKE_${type}_LINKER_FLAGS_MINSIZEREL} /OPT:REF,ICF,LBR")
+            set(CMAKE_${type}_LINKER_FLAGS_MINSIZEREL "${CMAKE_${type}_LINKER_FLAGS_MINSIZEREL} /INCREMENTAL:NO")
+          endif()
+        endif()
+        if (onnxruntime_ENABLE_LTO AND NOT onnxruntime_USE_CUDA)
+          set(CMAKE_${type}_LINKER_FLAGS_RELEASE "${CMAKE_${type}_LINKER_FLAGS_RELEASE} /LTCG")
+          set(CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO} /LTCG")
+          set(CMAKE_${type}_LINKER_FLAGS_MINSIZEREL "${CMAKE_${type}_LINKER_FLAGS_MINSIZEREL} /LTCG")
+        endif()
+      endforeach()
+    endif()
+
+else()
+  add_definitions(-DPLATFORM_POSIX)
+  # Enable warning
+  list(APPEND ORT_WARNING_FLAGS -Wall -Wextra)
+
+  if(onnxruntime_DEV_MODE)
+    list(APPEND ORT_WARNING_FLAGS -Werror)
+  endif()
+  check_cxx_compiler_flag(-Wunused-but-set-variable HAS_UNUSED_BUT_SET_VARIABLE)
+  check_cxx_compiler_flag(-Wunused-parameter HAS_UNUSED_PARAMETER)
+  check_cxx_compiler_flag(-Wunused-variable HAS_UNUSED_VARIABLE)
+  check_cxx_compiler_flag(-Wcast-function-type HAS_CAST_FUNCTION_TYPE)
+  check_cxx_compiler_flag(-Wparentheses HAS_PARENTHESES)
+  check_cxx_compiler_flag(-Wuseless-cast HAS_USELESS_CAST)
+  check_cxx_compiler_flag(-Wnonnull-compare HAS_NONNULL_COMPARE)
+  check_cxx_compiler_flag(-Wtautological-pointer-compare HAS_TAUTOLOGICAL_POINTER_COMPARE)
+  check_cxx_compiler_flag(-Wcatch-value HAS_CATCH_VALUE)
+  check_cxx_compiler_flag(-Wmissing-braces HAS_MISSING_BRACES)
+  check_cxx_compiler_flag(-Wignored-attributes HAS_IGNORED_ATTRIBUTES)
+  check_cxx_compiler_flag(-Wdeprecated-copy HAS_DEPRECATED_COPY)
+  check_cxx_compiler_flag(-Wdeprecated-declarations HAS_DEPRECATED_DECLARATIONS)
+  check_cxx_compiler_flag(-Wclass-memaccess HAS_CLASS_MEMACCESS)
+  check_cxx_compiler_flag(-Wmaybe-uninitialized HAS_MAYBE_UNINITIALIZED)
+  check_cxx_compiler_flag(-Wstrict-aliasing HAS_STRICT_ALIASING)
+
+  if(HAS_TAUTOLOGICAL_POINTER_COMPARE)
+    #we may have extra null pointer checkings in debug build, it's not an issue
+    list(APPEND ORT_WARNING_FLAGS -Wno-tautological-pointer-compare)
+  endif()
+  if(HAS_NONNULL_COMPARE)
+    #we may have extra null pointer checkings in debug build, it's not an issue
+    list(APPEND ORT_WARNING_FLAGS -Wno-nonnull-compare)
+  endif()
+  if(HAS_PARENTHESES)
+    list(APPEND ORT_WARNING_FLAGS -Wno-parentheses)
+  endif()
+endif()
+
+#names in this var must match the directory names under onnxruntime/core/providers
+#The list of legacy providers that have not been converted to dynamic loading
+#But DNNL is an exception, it still needs be here.
+set(ONNXRUNTIME_PROVIDER_NAMES cpu)
+
+set(ORT_PROVIDER_FLAGS)
+set(ORT_PROVIDER_CMAKE_FLAGS)
+
+if (onnxruntime_USE_CUDA)
+    list(APPEND ORT_PROVIDER_FLAGS -DUSE_CUDA=1)
+    list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_CUDA=1)
+    list(APPEND ONNXRUNTIME_PROVIDER_NAMES cuda)
+endif()
+if (onnxruntime_USE_VITISAI)
+    list(APPEND ORT_PROVIDER_FLAGS -DUSE_VITISAI=1)
+    list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_VITISAI=1)
+    list(APPEND ONNXRUNTIME_PROVIDER_NAMES vitisai)
+endif()
+if (onnxruntime_USE_DNNL)
+    list(APPEND ORT_PROVIDER_FLAGS -DUSE_DNNL=1)
+    list(APPEND ONNXRUNTIME_PROVIDER_NAMES dnnl)
+    list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_DNNL=1)
+endif()
+if (onnxruntime_USE_OPENVINO)
+    list(APPEND ORT_PROVIDER_FLAGS -DUSE_OPENVINO=1)
+    list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_OPENVINO=1)
+endif()
+if (onnxruntime_USE_TENSORRT)
+    list(APPEND ORT_PROVIDER_FLAGS -DUSE_TENSORRT=1)
+    list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_TENSORRT=1)
+endif()
+if (onnxruntime_USE_RKNPU)
+    list(APPEND ORT_PROVIDER_FLAGS -DUSE_RKNPU=1)
+    list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_RKNPU=1)
+    list(APPEND ONNXRUNTIME_PROVIDER_NAMES rknpu)
+endif()
+if (onnxruntime_USE_NNAPI_BUILTIN)
+    list(APPEND ORT_PROVIDER_FLAGS -DUSE_NNAPI=1)
+    list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_NNAPI_BUILTIN=1)
+    list(APPEND ONNXRUNTIME_PROVIDER_NAMES nnapi)
+endif()
+if (onnxruntime_USE_NUPHAR)
+    list(APPEND ORT_PROVIDER_FLAGS  -DUSE_NUPHAR=1)
+    list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_NUPHAR=1)
+    list(APPEND ONNXRUNTIME_PROVIDER_NAMES nuphar)
+endif()
+if(onnxruntime_USE_WINML)
+  list(APPEND ORT_PROVIDER_FLAGS  -DUSE_WINML=1)
+  list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_WINML=1)
+  list(APPEND ONNXRUNTIME_PROVIDER_NAMES winml)
+endif()
+if (onnxruntime_USE_ACL)
+    list(APPEND ORT_PROVIDER_FLAGS  -DUSE_ACL=1)
+    list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_ACL=1)
+    list(APPEND ONNXRUNTIME_PROVIDER_NAMES acl)
+endif()
+if (onnxruntime_USE_DML)
+    list(APPEND ORT_PROVIDER_FLAGS  -DUSE_DML=1)
+    list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_DML=1)
+    list(APPEND ONNXRUNTIME_PROVIDER_NAMES dml)
+endif()
+if(onnxruntime_USE_MIGRAPHX)
+    list(APPEND ORT_PROVIDER_FLAGS  -DUSE_MIGRAPHX=1)
+    list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_MIGRAPHX=1)
+    list(APPEND ONNXRUNTIME_PROVIDER_NAMES migraphx)
+endif()
+
+if (onnxruntime_USE_ARMNN)
+    list(APPEND ORT_PROVIDER_FLAGS  -DUSE_ARMNN=1)
+    list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_ARMNN=1)
+    list(APPEND ONNXRUNTIME_PROVIDER_NAMES armnn)
+endif()
+if (onnxruntime_USE_ROCM)
+    list(APPEND ORT_PROVIDER_FLAGS  -DUSE_ROCM=1)
+    list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_ROCM=1)
+    list(APPEND ONNXRUNTIME_PROVIDER_NAMES rocm)
+endif()
+if (onnxruntime_USE_COREML)
+    list(APPEND ORT_PROVIDER_FLAGS  -DUSE_COREML=1)
+    list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_COREML=1)
+    list(APPEND ONNXRUNTIME_PROVIDER_NAMES coreml)
+endif()
+function(onnxruntime_set_compile_flags target_name)
+    if (MSVC)
+      target_compile_options(${target_name} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>" "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
+      target_compile_options(${target_name} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /sdl>" "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/sdl>")
+      set_target_properties(${target_name} 
+                      PROPERTIES VS_GLOBAL_CAExcludePath "${ORT_BINARY_DIR};${ORT_SOURCE_DIR}")
+    endif()
+    if(NOT onnxruntime_BUILD_WEBASSEMBLY AND NOT WIN32)
+      target_compile_definitions(${target_name} PUBLIC -DNSYNC_ATOMIC_CPP11)
+      target_include_directories(${target_name} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/external/nsync/public")
+    endif()
+    foreach(ORT_FLAG ${ORT_PROVIDER_FLAGS})
+      target_compile_definitions(${target_name} PRIVATE ${ORT_FLAG})
+    endforeach()
+    if(HAS_DEPRECATED_COPY)
+      #too many such errors in eigen
+      target_compile_options(${target_name} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options -Wno-deprecated-copy>" "$<$<COMPILE_LANGUAGE:CXX>:-Wno-deprecated-copy>")
+    endif()
+    if(onnxruntime_USE_CUDA)
+      if(HAS_STRICT_ALIASING AND ${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.0.0")
+        target_compile_options(${target_name} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Wno-strict-aliasing>")
+      endif()
+    endif()
+    if(onnxruntime_USE_CUDA)
+      if(HAS_UNUSED_PARAMETER AND ${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.0.0")
+        target_compile_options(${target_name} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Wno-unused-parameter>")
+      endif()
+    endif()
+    foreach(ORT_FLAG ${ORT_WARNING_FLAGS})
+      target_compile_options(${target_name} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options ${ORT_FLAG}>" "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:${ORT_FLAG}>")      
+    endforeach()
+endfunction()
+
 function(onnxruntime_add_shared_library target_name)
   add_library(${target_name} SHARED ${ARGN})
+  target_link_directories(${target_name} PRIVATE ${onnxruntime_LINK_DIRS})  
+  onnxruntime_set_compile_flags(${target_name})  
+  target_include_directories(${target_name} PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT})
+  if(onnxruntime_ENABLE_LTO)
+    set_target_properties(${target_name} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE TRUE)
+    set_target_properties(${target_name} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELWITHDEBINFO TRUE)
+    set_target_properties(${target_name} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_MINSIZEREL TRUE)
+  endif() 
+endfunction()
+
+function(onnxruntime_add_static_library target_name)
+  add_library(${target_name} ${ARGN})
   target_link_directories(${target_name} PRIVATE ${onnxruntime_LINK_DIRS})
-  if (MSVC)
-    target_compile_options(${target_name} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>" "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
-    target_compile_options(${target_name} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /sdl>" "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/sdl>")
-    set_target_properties(${target_name} PROPERTIES VS_CA_EXCLUDE_PATH "${CMAKE_CURRENT_SOURCE_DIR}")
-  elseif(NOT onnxruntime_BUILD_WEBASSEMBLY)
-    target_compile_definitions(${target_name} PUBLIC -DNSYNC_ATOMIC_CPP11)
-    target_include_directories(${target_name} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/external/nsync/public")
-  endif()
+  onnxruntime_set_compile_flags(${target_name})  
   target_include_directories(${target_name} PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT})
   if(onnxruntime_ENABLE_LTO)
     set_target_properties(${target_name} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE TRUE)
@@ -788,16 +1006,15 @@ endfunction()
 
 #For plugins that are not linked into other targets but may be loaded dynamically at runtime using dlopen-like functionality.
 function(onnxruntime_add_shared_library_module target_name)
-  add_library(${target_name} MODULE ${ARGN})
-  target_link_directories(${target_name} PRIVATE ${onnxruntime_LINK_DIRS})
-  if (MSVC)
-    target_compile_options(${target_name} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>" "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
-    target_compile_options(${target_name} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /sdl>" "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/sdl>")
-    set_target_properties(${target_name} PROPERTIES VS_CA_EXCLUDE_PATH "${CMAKE_CURRENT_SOURCE_DIR}")
-  elseif(NOT onnxruntime_BUILD_WEBASSEMBLY)
-    target_compile_definitions(${target_name} PUBLIC -DNSYNC_ATOMIC_CPP11)
-    target_include_directories(${target_name} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/external/nsync/public")
+  if ((${CMAKE_SYSTEM_NAME} MATCHES "Darwin") OR (${CMAKE_SYSTEM_NAME} MATCHES "iOSCross"))
+    add_library(${target_name} SHARED ${ARGN})
+  else()
+    #On Windows, this target shouldn't generate an import lib, but I don't know how to disable it.
+    add_library(${target_name} MODULE ${ARGN})
   endif()
+  
+  target_link_directories(${target_name} PRIVATE ${onnxruntime_LINK_DIRS})
+  onnxruntime_set_compile_flags(${target_name})  
   target_include_directories(${target_name} PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT})
   if(onnxruntime_ENABLE_LTO)
     set_target_properties(${target_name} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE TRUE)
@@ -810,14 +1027,7 @@ endfunction()
 function(onnxruntime_add_executable target_name)
   add_executable(${target_name} ${ARGN})
   target_link_directories(${target_name} PRIVATE ${onnxruntime_LINK_DIRS})
-  if (MSVC)
-    target_compile_options(${target_name} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>" "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
-    target_compile_options(${target_name} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /sdl>" "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/sdl>")
-    set_target_properties(${target_name} PROPERTIES VS_CA_EXCLUDE_PATH "${CMAKE_CURRENT_SOURCE_DIR}")
-  elseif(NOT onnxruntime_BUILD_WEBASSEMBLY)
-    target_compile_definitions(${target_name} PUBLIC -DNSYNC_ATOMIC_CPP11)
-    target_include_directories(${target_name} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/external/nsync/public")
-  endif()
+  onnxruntime_set_compile_flags(${target_name})  
   target_include_directories(${target_name} PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT})
   if(onnxruntime_ENABLE_LTO)
     set_target_properties(${target_name} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE TRUE)
@@ -910,7 +1120,6 @@ if (onnxruntime_USE_ARMNN)
 endif()
 
 if (onnxruntime_USE_DNNL)
-  add_definitions(-DUSE_DNNL=1)
   include(dnnl)
 endif()
 
@@ -1019,108 +1228,7 @@ if(onnxruntime_USE_CUDA)
 else()
   set_msvc_c_cpp_compiler_warning_level(4)
 endif()
-if (WIN32)
-    set(ORT_WARNING_FLAGS)
-    add_definitions(-DPLATFORM_WINDOWS -DNOGDI -DNOMINMAX -D_USE_MATH_DEFINES)
-    if(onnxruntime_ENABLE_MEMLEAK_CHECKER)
-      add_definitions(-DONNXRUNTIME_ENABLE_MEMLEAK_CHECK)
-    endif()
-    # parallel build
-    # These compiler opitions cannot be forwarded to NVCC, so cannot use add_compiler_options
-    string(APPEND CMAKE_CXX_FLAGS " /MP")
-    #Compiler bug, we should get such warnings. It will be fixed in a new VC release
-    list(APPEND ORT_WARNING_FLAGS "/wd4127")
-    # class needs to have dll-interface to be used by clients
-    list(APPEND ORT_WARNING_FLAGS "/wd4251")
-    # issued by thrust nonstandard extension used: nameless struct/union
-    list(APPEND ORT_WARNING_FLAGS "/wd4201")
-    if (onnxruntime_ENABLE_STATIC_ANALYSIS)
-        list(APPEND ORT_WARNING_FLAGS "/analyze:stacksize 131072")
-        list(APPEND ORT_WARNING_FLAGS "/wd6326") # potential comparison of a constant with another constant
-        if(onnxruntime_USE_OPENMP)
-          list(APPEND ORT_WARNING_FLAGS "/wd6993") # Code analysis ignores OpenMP constructs
-        endif()
-    endif()
-    # Treat warning as error if onnxruntime_DEV_MODE is ON
-    # For cross-compiled ARM64 binaries, there are too many warnings to fix, hence ignore warnings for now
-    if (onnxruntime_DEV_MODE AND NOT CMAKE_CROSSCOMPILING)
-      # treat warnings as errors
-      list(APPEND ORT_WARNING_FLAGS "/WX")
-      foreach(type EXE STATIC SHARED)
-        set(CMAKE_${type}_LINKER_FLAGS "${CMAKE_${type}_LINKER_FLAGS} /WX")
-      endforeach()
-    endif()
-
-    # set linker flags to minimize the binary size.
-    if (MSVC)
-      foreach(type EXE STATIC SHARED)
-        if (NOT type MATCHES STATIC)
-          # The WinML internal toolchain does not allow link's "additional options" to contain optimization
-          # flags (/OPT#); these are already specified using msbuild properties.
-          if (NOT DEFINED onnxruntime_DISABLE_LINKER_OPT_FLAGS)
-            set(CMAKE_${type}_LINKER_FLAGS_RELEASE "${CMAKE_${type}_LINKER_FLAGS_RELEASE} /OPT:REF,ICF,LBR")
-            set(CMAKE_${type}_LINKER_FLAGS_RELEASE "${CMAKE_${type}_LINKER_FLAGS_RELEASE} /INCREMENTAL:NO")
-            set(CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO} /OPT:REF,ICF,LBR")
-            set(CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO} /INCREMENTAL:NO")
-            set(CMAKE_${type}_LINKER_FLAGS_MINSIZEREL "${CMAKE_${type}_LINKER_FLAGS_MINSIZEREL} /OPT:REF,ICF,LBR")
-            set(CMAKE_${type}_LINKER_FLAGS_MINSIZEREL "${CMAKE_${type}_LINKER_FLAGS_MINSIZEREL} /INCREMENTAL:NO")
-          endif()
-        endif()
-        if (onnxruntime_ENABLE_LTO AND NOT onnxruntime_USE_CUDA)
-          set(CMAKE_${type}_LINKER_FLAGS_RELEASE "${CMAKE_${type}_LINKER_FLAGS_RELEASE} /LTCG")
-          set(CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO} /LTCG")
-          set(CMAKE_${type}_LINKER_FLAGS_MINSIZEREL "${CMAKE_${type}_LINKER_FLAGS_MINSIZEREL} /LTCG")
-        endif()
-      endforeach()
-    endif()
-    foreach(ORT_FLAG ${ORT_WARNING_FLAGS})
-      string(APPEND CMAKE_CXX_FLAGS " ${ORT_FLAG}")
-      string(APPEND CMAKE_C_FLAGS " ${ORT_FLAG}")
-    endforeach()
-else()
-  add_definitions(-DPLATFORM_POSIX)
-  # Enable warning
-  string(APPEND CMAKE_CXX_FLAGS " -Wall -Wextra")
-  string(APPEND CMAKE_C_FLAGS " -Wall -Wextra")
-
-  if(onnxruntime_DEV_MODE)
-    string(APPEND CMAKE_CXX_FLAGS " -Werror")
-    string(APPEND CMAKE_C_FLAGS " -Werror")
-  endif()
-  check_cxx_compiler_flag(-Wunused-but-set-variable HAS_UNUSED_BUT_SET_VARIABLE)
-  check_cxx_compiler_flag(-Wunused-parameter HAS_UNUSED_PARAMETER)
-  check_cxx_compiler_flag(-Wunused-variable HAS_UNUSED_VARIABLE)
-  check_cxx_compiler_flag(-Wcast-function-type HAS_CAST_FUNCTION_TYPE)
-  check_cxx_compiler_flag(-Wparentheses HAS_PARENTHESES)
-  check_cxx_compiler_flag(-Wuseless-cast HAS_USELESS_CAST)
-  check_cxx_compiler_flag(-Wnonnull-compare HAS_NONNULL_COMPARE)
-  check_cxx_compiler_flag(-Wtautological-pointer-compare HAS_TAUTOLOGICAL_POINTER_COMPARE)
-  check_cxx_compiler_flag(-Wcatch-value HAS_CATCH_VALUE)
-  check_cxx_compiler_flag(-Wmissing-braces HAS_MISSING_BRACES)
-  check_cxx_compiler_flag(-Wignored-attributes HAS_IGNORED_ATTRIBUTES)
-  check_cxx_compiler_flag(-Wdeprecated-copy HAS_DEPRECATED_COPY)
-  check_cxx_compiler_flag(-Wdeprecated-declarations HAS_DEPRECATED_DECLARATIONS)
-  check_cxx_compiler_flag(-Wclass-memaccess HAS_CLASS_MEMACCESS)
-  check_cxx_compiler_flag(-Wmaybe-uninitialized HAS_MAYBE_UNINITIALIZED)
 
-  if(HAS_TAUTOLOGICAL_POINTER_COMPARE)
-    #we may have extra null pointer checkings in debug build, it's not an issue
-    string(APPEND CMAKE_CXX_FLAGS_DEBUG " -Wno-tautological-pointer-compare")
-    string(APPEND CMAKE_C_FLAGS_DEBUG " -Wno-tautological-pointer-compare")
-  endif()
-  if(HAS_NONNULL_COMPARE)
-    #we may have extra null pointer checkings in debug build, it's not an issue
-    string(APPEND CMAKE_CXX_FLAGS_DEBUG " -Wno-nonnull-compare")
-    string(APPEND CMAKE_C_FLAGS_DEBUG " -Wno-nonnull-compare")
-  endif()
-  if(HAS_DEPRECATED_COPY)
-    #too many such errors in eigen
-    string(APPEND CMAKE_CXX_FLAGS " -Wno-deprecated-copy")
-  endif()
-  if(HAS_PARENTHESES)
-    string(APPEND CMAKE_CXX_FLAGS " -Wno-parentheses")
-  endif()
-endif()
 set(onnxruntime_DELAYLOAD_FLAGS "")
 
 include_directories(
@@ -1200,7 +1308,6 @@ if (onnxruntime_USE_VITISAI)
   if(WIN32)
     message(FATAL_ERROR "Vitis-AI execution provider is not supported on Windows.")
   else()
-    add_definitions(-DUSE_VITISAI=1)
     include(pyxir)
     list(APPEND onnxruntime_EXTERNAL_LIBRARIES pyxir)
     list(APPEND onnxruntime_EXTERNAL_DEPENDENCIES pyxir)
@@ -1210,7 +1317,6 @@ endif()
 configure_file(onnxruntime_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime_config.h)
 
 if (onnxruntime_USE_CUDA)
-  add_definitions(-DUSE_CUDA=1)
   #The following 6 lines are copied from https://gitlab.kitware.com/cmake/cmake/issues/17559
   set( CMAKE_CUDA_FLAGS "" CACHE STRING "" )
   if ( CMAKE_CUDA_FLAGS )
@@ -1224,6 +1330,7 @@ if (onnxruntime_USE_CUDA)
   endif()
   enable_language(CUDA)
   message( STATUS "CMAKE_CUDA_COMPILER_VERSION: ${CMAKE_CUDA_COMPILER_VERSION}")
+
   if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11)
     set(CMAKE_CUDA_STANDARD 14)
   else()
@@ -1284,7 +1391,6 @@ if (onnxruntime_USE_MIGRAPHX)
     message(FATAL_ERROR "MIGraphX does not support build in Windows!")
   endif()
   set(AMD_MIGRAPHX_HOME ${onnxruntime_MIGRAPHX_HOME})
-  add_definitions(-DUSE_MIGRAPHX=1)
 endif()
 
 if (onnxruntime_USE_ROCM)
@@ -1328,7 +1434,6 @@ if (onnxruntime_USE_DML)
     message(FATAL_ERROR "The DirectML execution provider is only supported when building for Windows.")
   endif()
 
-  add_definitions(-DUSE_DML=1)
   include(dml)
 endif()
 
@@ -1454,32 +1559,11 @@ if (onnxruntime_ENABLE_TRAINING)
   list(APPEND onnxruntime_EXTERNAL_LIBRARIES tensorboard)
 endif()
 
-#names in this var must match the directory names under onnxruntime/core/providers
-set(ONNXRUNTIME_PROVIDER_NAMES cpu)
 foreach(target_name onnxruntime_common onnxruntime_graph onnxruntime_framework onnxruntime_util onnxruntime_providers onnxruntime_optimizer onnxruntime_session onnxruntime_mlas onnxruntime_flatbuffers)
-  include(${target_name}.cmake)
-  if (MSVC)
-    target_compile_options(${target_name} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>" "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
-    target_compile_options(${target_name} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /sdl>" "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/sdl>")
-    set_target_properties(${target_name} PROPERTIES VS_CA_EXCLUDE_PATH "${CMAKE_CURRENT_SOURCE_DIR}")
-  elseif(NOT onnxruntime_BUILD_WEBASSEMBLY)
-    target_compile_definitions(${target_name} PUBLIC -DNSYNC_ATOMIC_CPP11)
-    target_include_directories(${target_name} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/external/nsync/public")
-  endif()
-  target_include_directories(${target_name} PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT})
+  include(${target_name}.cmake)    
 endforeach()
 
-foreach(provider_name ${ONNXRUNTIME_PROVIDER_NAMES})
-  if(NOT provider_name STREQUAL "cpu" AND NOT provider_name STREQUAL "winml")
-    if (MSVC)
-      target_compile_options(onnxruntime_providers_${provider_name} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>" "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
-      target_compile_options(onnxruntime_providers_${provider_name} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /sdl>" "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/sdl>")
-    elseif(NOT onnxruntime_BUILD_WEBASSEMBLY)
-      target_compile_definitions(onnxruntime_providers_${provider_name} PUBLIC -DNSYNC_ATOMIC_CPP11)
-      target_include_directories(onnxruntime_providers_${provider_name} PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT} "${CMAKE_CURRENT_SOURCE_DIR}/external/nsync/public")
-    endif()
-  endif()
-endforeach()
+
 
 
 
diff --git a/cmake/external/pybind11.cmake b/cmake/external/pybind11.cmake
index ee2fb29ab0661..86aee98efa76a 100644
--- a/cmake/external/pybind11.cmake
+++ b/cmake/external/pybind11.cmake
@@ -8,7 +8,7 @@ if(NOT TARGET pybind11::module)
 
   set(pybind11_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/pybind11/src/pybind11/include)
   set(pybind11_URL https://github.com/pybind/pybind11.git)
-  set(pybind11_TAG v2.6.1)
+  set(pybind11_TAG v2.6.2)
 
   ExternalProject_Add(pybind11
         PREFIX pybind11
diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake
index 15a31de6a8fd6..2db1e43f2a0d5 100644
--- a/cmake/onnxruntime.cmake
+++ b/cmake/onnxruntime.cmake
@@ -98,19 +98,19 @@ endif()
 target_link_libraries(onnxruntime PRIVATE
     onnxruntime_session
     ${onnxruntime_libs}
+    ${PROVIDERS_ACL}
+    ${PROVIDERS_ARMNN}
+    ${PROVIDERS_COREML}
     ${PROVIDERS_CUDA}
-    ${PROVIDERS_NNAPI}
-    ${PROVIDERS_RKNPU}
+    ${PROVIDERS_DML}
     ${PROVIDERS_MIGRAPHX}
+    ${PROVIDERS_NNAPI}
     ${PROVIDERS_NUPHAR}
+    ${PROVIDERS_RKNPU}
+    ${PROVIDERS_ROCM}
     ${PROVIDERS_VITISAI}
-    ${PROVIDERS_DML}
-    ${PROVIDERS_ACL}
-    ${PROVIDERS_ARMNN}
     ${PROVIDERS_INTERNAL_TESTING}
     ${onnxruntime_winml}
-    ${PROVIDERS_ROCM}
-    ${PROVIDERS_COREML}
     onnxruntime_optimizer
     onnxruntime_providers
     onnxruntime_util
diff --git a/cmake/onnxruntime_codegen.cmake b/cmake/onnxruntime_codegen.cmake
index 112fad0ce9999..b0bdc79ce515b 100644
--- a/cmake/onnxruntime_codegen.cmake
+++ b/cmake/onnxruntime_codegen.cmake
@@ -16,7 +16,7 @@ file(GLOB_RECURSE onnxruntime_codegen_tvm_srcs CONFIGURE_DEPENDS
 source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_codegen_common_srcs} ${onnxruntime_codegen_tvm_srcs})
 
 #onnxruntime_codegen_tvm depends on onnxruntime framework
-add_library(onnxruntime_codegen_tvm ${onnxruntime_codegen_common_srcs} ${onnxruntime_codegen_tvm_srcs})
+onnxruntime_add_static_library(onnxruntime_codegen_tvm ${onnxruntime_codegen_common_srcs} ${onnxruntime_codegen_tvm_srcs})
 set_target_properties(onnxruntime_codegen_tvm PROPERTIES FOLDER "ONNXRuntime")
 target_include_directories(onnxruntime_codegen_tvm PRIVATE ${ONNXRUNTIME_ROOT} ${TVM_INCLUDES} ${MKLML_INCLUDE_DIR} ${eigen_INCLUDE_DIRS})
 onnxruntime_add_include_to_target(onnxruntime_codegen_tvm onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf flatbuffers)
diff --git a/cmake/onnxruntime_common.cmake b/cmake/onnxruntime_common.cmake
index fd65893355edf..89b7ae43d31c5 100644
--- a/cmake/onnxruntime_common.cmake
+++ b/cmake/onnxruntime_common.cmake
@@ -94,7 +94,7 @@ endif()
 
 source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_common_src})
 
-add_library(onnxruntime_common ${onnxruntime_common_src})
+onnxruntime_add_static_library(onnxruntime_common ${onnxruntime_common_src})
 
 if (onnxruntime_USE_CUDA)
   target_include_directories(onnxruntime_common PUBLIC ${onnxruntime_CUDA_HOME}/include ${onnxruntime_CUDA_HOME}/extras/CUPTI/include)
diff --git a/cmake/onnxruntime_flatbuffers.cmake b/cmake/onnxruntime_flatbuffers.cmake
index 9d6f3a063ba95..49302e92f5a66 100644
--- a/cmake/onnxruntime_flatbuffers.cmake
+++ b/cmake/onnxruntime_flatbuffers.cmake
@@ -8,7 +8,7 @@ file(GLOB onnxruntime_flatbuffers_srcs CONFIGURE_DEPENDS
 
 source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_flatbuffers_srcs})
 
-add_library(onnxruntime_flatbuffers ${onnxruntime_flatbuffers_srcs})
+onnxruntime_add_static_library(onnxruntime_flatbuffers ${onnxruntime_flatbuffers_srcs})
 onnxruntime_add_include_to_target(onnxruntime_flatbuffers onnx flatbuffers)
 if(onnxruntime_ENABLE_INSTRUMENT)
   target_compile_definitions(onnxruntime_flatbuffers PUBLIC ONNXRUNTIME_ENABLE_INSTRUMENT)
diff --git a/cmake/onnxruntime_framework.cmake b/cmake/onnxruntime_framework.cmake
index fbfacafc1dd79..dbe083c9d50b6 100644
--- a/cmake/onnxruntime_framework.cmake
+++ b/cmake/onnxruntime_framework.cmake
@@ -27,7 +27,7 @@ endif()
 
 source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_framework_srcs})
 
-add_library(onnxruntime_framework ${onnxruntime_framework_srcs})
+onnxruntime_add_static_library(onnxruntime_framework ${onnxruntime_framework_srcs})
 if(onnxruntime_ENABLE_INSTRUMENT)
   target_compile_definitions(onnxruntime_framework PRIVATE ONNXRUNTIME_ENABLE_INSTRUMENT)
 endif()
diff --git a/cmake/onnxruntime_graph.cmake b/cmake/onnxruntime_graph.cmake
index 37ab9ac9ffc4c..25ee83d15f448 100644
--- a/cmake/onnxruntime_graph.cmake
+++ b/cmake/onnxruntime_graph.cmake
@@ -72,7 +72,7 @@ if (onnxruntime_ENABLE_TRAINING)
     list(APPEND onnxruntime_graph_lib_src ${orttraining_graph_src})
 endif()
 
-add_library(onnxruntime_graph ${onnxruntime_graph_lib_src})
+onnxruntime_add_static_library(onnxruntime_graph ${onnxruntime_graph_lib_src})
 add_dependencies(onnxruntime_graph onnx_proto flatbuffers)
 onnxruntime_add_include_to_target(onnxruntime_graph onnxruntime_common onnx onnx_proto protobuf::libprotobuf flatbuffers)
 
diff --git a/cmake/onnxruntime_java.cmake b/cmake/onnxruntime_java.cmake
index 8d60966c5b3db..ea3368c578ab6 100644
--- a/cmake/onnxruntime_java.cmake
+++ b/cmake/onnxruntime_java.cmake
@@ -9,7 +9,6 @@ find_package(Java REQUIRED)
 include(UseJava)
 if (NOT CMAKE_SYSTEM_NAME STREQUAL "Android")
     find_package(JNI REQUIRED)
-    include_directories(${JNI_INCLUDE_DIRS})
 endif()
 
 set(JAVA_ROOT ${REPO_ROOT}/java)
@@ -47,9 +46,7 @@ elseif (CMAKE_SYSTEM_NAME STREQUAL "Android")
   # it is better to not keep a daemon running
   set(GRADLE_ARGS ${GRADLE_ARGS} --no-daemon)
 endif()
-if(onnxruntime_USE_CUDA)
-  set(GRADLE_ARGS ${GRADLE_ARGS} -DUSE_CUDA=1)
-endif()
+
 add_custom_command(OUTPUT ${JAVA_OUTPUT_JAR} COMMAND ${GRADLE_EXECUTABLE} ${GRADLE_ARGS} WORKING_DIRECTORY ${JAVA_ROOT} DEPENDS ${onnxruntime4j_gradle_files} ${onnxruntime4j_src})
 add_custom_target(onnxruntime4j DEPENDS ${JAVA_OUTPUT_JAR})
 set_source_files_properties(${JAVA_OUTPUT_JAR} PROPERTIES GENERATED TRUE)
@@ -62,49 +59,15 @@ file(GLOB onnxruntime4j_native_src
     "${REPO_ROOT}/include/onnxruntime/core/session/*.h"
     )
 # Build the JNI library
-add_library(onnxruntime4j_jni SHARED ${onnxruntime4j_native_src})
+onnxruntime_add_shared_library_module(onnxruntime4j_jni ${onnxruntime4j_native_src})
 set_property(TARGET onnxruntime4j_jni PROPERTY CXX_STANDARD 11)
 
-# Tell the JNI code about the requested providers
-if (onnxruntime_USE_CUDA)
-  target_compile_definitions(onnxruntime4j_jni PRIVATE USE_CUDA=1)
-endif()
-if (onnxruntime_USE_DNNL)
-  target_compile_definitions(onnxruntime4j_jni PRIVATE USE_DNNL=1)
-endif()
-if (onnxruntime_USE_OPENVINO)
-  target_compile_definitions(onnxruntime4j_jni PRIVATE USE_OPENVINO=1)
-endif()
-if (onnxruntime_USE_TENSORRT)
-  target_compile_definitions(onnxruntime4j_jni PRIVATE USE_TENSORRT=1)
-endif()
-if (onnxruntime_USE_NNAPI_BUILTIN)
-  target_compile_definitions(onnxruntime4j_jni PRIVATE USE_NNAPI=1)
-endif()
-if (onnxruntime_USE_NUPHAR)
-  target_compile_definitions(onnxruntime4j_jni PRIVATE USE_NUPHAR=1)
-endif()
-if (onnxruntime_USE_ACL)
-  target_compile_definitions(onnxruntime4j_jni PRIVATE USE_ACL=1)
-endif()
-if (onnxruntime_USE_DML)
-  target_compile_definitions(onnxruntime4j_jni PRIVATE USE_DIRECTML=1)
-endif()
-if (onnxruntime_USE_ARMNN)
-  target_compile_definitions(onnxruntime4j_jni PRIVATE USE_ARMNN=1)
-endif()
-if (onnxruntime_USE_ROCM)
-  target_compile_definitions(onnxruntime4j_jni PRIVATE USE_ROCM=1)
-endif()
-if (onnxruntime_USE_COREML)
-  target_compile_definitions(onnxruntime4j_jni PRIVATE USE_COREML=1)
-endif()
 
 # depend on java sources. if they change, the JNI should recompile
 add_dependencies(onnxruntime4j_jni onnxruntime4j)
 onnxruntime_add_include_to_target(onnxruntime4j_jni onnxruntime_session)
 # the JNI headers are generated in the onnxruntime4j target
-target_include_directories(onnxruntime4j_jni PRIVATE ${REPO_ROOT}/include ${JAVA_ROOT}/build/headers)
+target_include_directories(onnxruntime4j_jni PRIVATE ${REPO_ROOT}/include ${JAVA_ROOT}/build/headers ${JNI_INCLUDE_DIRS})
 target_link_libraries(onnxruntime4j_jni PUBLIC onnxruntime)
 
 set(JAVA_PACKAGE_OUTPUT_DIR ${JAVA_OUTPUT_DIR}/build)
@@ -199,9 +162,10 @@ elseif (CMAKE_SYSTEM_NAME STREQUAL "Android")
   # it is better to not keep a daemon running
   set(GRADLE_ARGS ${GRADLE_ARGS} --no-daemon)
 endif()
-if(onnxruntime_USE_CUDA)
-  set(GRADLE_ARGS ${GRADLE_ARGS} -DUSE_CUDA=1)
-endif()
+string(JOIN " " GRADLE_EP_FLAGS ${ORT_PROVIDER_FLAGS})
+set(GRADLE_ARGS ${GRADLE_ARGS} ${GRADLE_EP_FLAGS})
+
+message(STATUS "GRADLE_ARGS: ${GRADLE_ARGS}")
 add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${GRADLE_EXECUTABLE} ${GRADLE_ARGS} WORKING_DIRECTORY ${JAVA_ROOT})
 if (CMAKE_SYSTEM_NAME STREQUAL "Android")
   add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${GRADLE_EXECUTABLE} -b build-android.gradle -c settings-android.gradle build -DjniLibsDir=${ANDROID_PACKAGE_JNILIBS_DIR} -DbuildDir=${ANDROID_PACKAGE_OUTPUT_DIR} WORKING_DIRECTORY ${JAVA_ROOT})
diff --git a/cmake/onnxruntime_language_interop_ops.cmake b/cmake/onnxruntime_language_interop_ops.cmake
index 62f568848adc2..241ddb3305ec1 100644
--- a/cmake/onnxruntime_language_interop_ops.cmake
+++ b/cmake/onnxruntime_language_interop_ops.cmake
@@ -2,7 +2,7 @@
 # Licensed under the MIT License.
 include(onnxruntime_pyop.cmake)
 file (GLOB onnxruntime_language_interop_ops_src "${ONNXRUNTIME_ROOT}/core/language_interop_ops/language_interop_ops.cc")
-add_library(onnxruntime_language_interop ${onnxruntime_language_interop_ops_src})
+onnxruntime_add_static_library(onnxruntime_language_interop ${onnxruntime_language_interop_ops_src})
 add_dependencies(onnxruntime_language_interop onnxruntime_pyop)
 onnxruntime_add_include_to_target(onnxruntime_language_interop onnxruntime_common onnxruntime_graph onnxruntime_framework onnxruntime_pyop onnx onnx_proto protobuf::libprotobuf flatbuffers)
 target_include_directories(onnxruntime_language_interop PRIVATE ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS})
\ No newline at end of file
diff --git a/cmake/onnxruntime_mlas.cmake b/cmake/onnxruntime_mlas.cmake
index 398c14442d568..cc39659d2fa5e 100644
--- a/cmake/onnxruntime_mlas.cmake
+++ b/cmake/onnxruntime_mlas.cmake
@@ -351,7 +351,7 @@ else()
   endif()
 endif()
 
-add_library(onnxruntime_mlas STATIC ${mlas_common_srcs} ${mlas_platform_srcs})
+onnxruntime_add_static_library(onnxruntime_mlas ${mlas_common_srcs} ${mlas_platform_srcs})
 target_include_directories(onnxruntime_mlas PRIVATE ${ONNXRUNTIME_ROOT}/core/mlas/inc ${ONNXRUNTIME_ROOT}/core/mlas/lib)
 set_target_properties(onnxruntime_mlas PROPERTIES FOLDER "ONNXRuntime")
 if (WIN32)
diff --git a/cmake/onnxruntime_nodejs.cmake b/cmake/onnxruntime_nodejs.cmake
index 42ff2684e00b0..0176350242b12 100644
--- a/cmake/onnxruntime_nodejs.cmake
+++ b/cmake/onnxruntime_nodejs.cmake
@@ -26,6 +26,7 @@ if(had_error)
     message(FATAL_ERROR "Failed to find NPM: " ${had_error})
 endif()
 
+if(NOT onnxruntime_ENABLE_STATIC_ANALYSIS)
 # add custom target
 add_custom_target(nodejs_binding_wrapper ALL
     COMMAND ${NPM_CLI} ci --ort-skip-build
@@ -33,3 +34,4 @@ add_custom_target(nodejs_binding_wrapper ALL
     WORKING_DIRECTORY ${NODEJS_BINDING_ROOT}
     COMMENT "Using cmake-js to build OnnxRuntime Node.js binding")
 add_dependencies(nodejs_binding_wrapper onnxruntime)
+endif()
\ No newline at end of file
diff --git a/cmake/onnxruntime_nuphar_extern.cmake b/cmake/onnxruntime_nuphar_extern.cmake
index 3dee498209f44..fb5a9b45615f5 100644
--- a/cmake/onnxruntime_nuphar_extern.cmake
+++ b/cmake/onnxruntime_nuphar_extern.cmake
@@ -22,7 +22,7 @@ set(nuphar_extern_srcs
     ${extern_avx2_srcs}
 )
 
-add_library(onnxruntime_nuphar_extern  ${nuphar_extern_srcs})
+onnxruntime_add_static_library(onnxruntime_nuphar_extern  ${nuphar_extern_srcs})
 
 if (onnxruntime_USE_MKLML)
   add_definitions(-DNUPHAR_USE_MKL)
diff --git a/cmake/onnxruntime_optimizer.cmake b/cmake/onnxruntime_optimizer.cmake
index 1486e5e19e2c8..c63739bd0d8c4 100644
--- a/cmake/onnxruntime_optimizer.cmake
+++ b/cmake/onnxruntime_optimizer.cmake
@@ -28,7 +28,7 @@ endif()
 
 source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_optimizer_srcs})
 
-add_library(onnxruntime_optimizer ${onnxruntime_optimizer_srcs})
+onnxruntime_add_static_library(onnxruntime_optimizer ${onnxruntime_optimizer_srcs})
 
 install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/optimizer  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core)
 onnxruntime_add_include_to_target(onnxruntime_optimizer onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf flatbuffers)
diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake
index 0498bd673c9a9..86454df59b89e 100644
--- a/cmake/onnxruntime_providers.cmake
+++ b/cmake/onnxruntime_providers.cmake
@@ -57,60 +57,40 @@ file(GLOB onnxruntime_providers_common_srcs CONFIGURE_DEPENDS
 )
 
 if(onnxruntime_USE_NUPHAR)
-  set(PROVIDERS_NUPHAR onnxruntime_providers_nuphar)
-  list(APPEND ONNXRUNTIME_PROVIDER_NAMES nuphar)
+  set(PROVIDERS_NUPHAR onnxruntime_providers_nuphar)  
 endif()
 if(onnxruntime_USE_VITISAI)
-  set(PROVIDERS_VITISAI onnxruntime_providers_vitisai)
-  list(APPEND ONNXRUNTIME_PROVIDER_NAMES vitisai)
+  set(PROVIDERS_VITISAI onnxruntime_providers_vitisai)  
 endif()
 if(onnxruntime_USE_CUDA)
   set(PROVIDERS_CUDA onnxruntime_providers_cuda)
-  list(APPEND ONNXRUNTIME_PROVIDER_NAMES cuda)
-endif()
-if(onnxruntime_USE_TENSORRT)
-  set(PROVIDERS_TENSORRT onnxruntime_providers_tensorrt)
-  list(APPEND ONNXRUNTIME_PROVIDER_NAMES tensorrt)
 endif()
 if(onnxruntime_USE_COREML)
   set(PROVIDERS_COREML onnxruntime_providers_coreml)
-  list(APPEND ONNXRUNTIME_PROVIDER_NAMES coreml)
 endif()
 if(onnxruntime_USE_NNAPI_BUILTIN)
   set(PROVIDERS_NNAPI onnxruntime_providers_nnapi)
-  list(APPEND ONNXRUNTIME_PROVIDER_NAMES nnapi)
 endif()
 if(onnxruntime_USE_RKNPU)
   set(PROVIDERS_RKNPU onnxruntime_providers_rknpu)
-  list(APPEND ONNXRUNTIME_PROVIDER_NAMES rknpu)
 endif()
 if(onnxruntime_USE_DML)
-  set(PROVIDERS_DML onnxruntime_providers_dml)
-  list(APPEND ONNXRUNTIME_PROVIDER_NAMES dml)
+  set(PROVIDERS_DML onnxruntime_providers_dml)  
 endif()
 if(onnxruntime_USE_MIGRAPHX)
   set(PROVIDERS_MIGRAPHX onnxruntime_providers_migraphx)
-  list(APPEND ONNXRUNTIME_PROVIDER_NAMES migraphx)
-endif()
-if(onnxruntime_USE_OPENVINO)
-  set(PROVIDERS_OPENVINO onnxruntime_providers_openvino)
-  list(APPEND ONNXRUNTIME_PROVIDER_NAMES openvino)
 endif()
 if(onnxruntime_USE_WINML)
   set(PROVIDERS_WINML onnxruntime_providers_winml)
-  list(APPEND ONNXRUNTIME_PROVIDER_NAMES winml)
 endif()
 if(onnxruntime_USE_ACL)
-  set(PROVIDERS_ACL onnxruntime_providers_acl)
-  list(APPEND ONNXRUNTIME_PROVIDER_NAMES acl)
+  set(PROVIDERS_ACL onnxruntime_providers_acl)  
 endif()
 if(onnxruntime_USE_ARMNN)
-  set(PROVIDERS_ARMNN onnxruntime_providers_armnn)
-  list(APPEND ONNXRUNTIME_PROVIDER_NAMES armnn)
+  set(PROVIDERS_ARMNN onnxruntime_providers_armnn)  
 endif()
 if(onnxruntime_USE_ROCM)
-  set(PROVIDERS_ROCM onnxruntime_providers_rocm)
-  list(APPEND ONNXRUNTIME_PROVIDER_NAMES rocm)
+  set(PROVIDERS_ROCM onnxruntime_providers_rocm)  
 endif()
 
 source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_common_srcs} ${onnxruntime_providers_srcs})
@@ -172,7 +152,7 @@ if (onnxruntime_ENABLE_TRAINING)
   list(APPEND onnxruntime_providers_src ${onnxruntime_cpu_training_ops_srcs})
 endif()
 
-add_library(onnxruntime_providers ${onnxruntime_providers_src})
+onnxruntime_add_static_library(onnxruntime_providers ${onnxruntime_providers_src})
 if (MSVC)
    target_compile_options(onnxruntime_providers PRIVATE "/bigobj")
    if(NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
@@ -278,7 +258,7 @@ if (onnxruntime_USE_CUDA)
     list(APPEND onnxruntime_providers_cuda_src ${onnxruntime_cuda_training_ops_cc_srcs} ${onnxruntime_cuda_training_ops_cu_srcs})
   endif()
 
-  add_library(onnxruntime_providers_cuda ${onnxruntime_providers_cuda_src})
+  onnxruntime_add_static_library(onnxruntime_providers_cuda ${onnxruntime_providers_cuda_src})
 
   #target_compile_options(onnxruntime_providers_cuda PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler \"/analyze:stacksize 131072\">")
   if (HAS_GUARD_CF)
@@ -382,8 +362,6 @@ if (onnxruntime_USE_TENSORRT OR onnxruntime_USE_DNNL OR onnxruntime_USE_OPENVINO
 endif()
 
 if (onnxruntime_USE_DNNL)
-  list(APPEND ONNXRUNTIME_PROVIDER_NAMES dnnl)
-
   file(GLOB_RECURSE onnxruntime_providers_dnnl_cc_srcs CONFIGURE_DEPENDS
     "${ONNXRUNTIME_ROOT}/core/providers/dnnl/*.h"
     "${ONNXRUNTIME_ROOT}/core/providers/dnnl/*.cc"
@@ -535,7 +513,7 @@ if (onnxruntime_USE_NUPHAR)
   endif()
 
   source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_nuphar_cc_srcs})
-  add_library(onnxruntime_providers_nuphar ${onnxruntime_providers_nuphar_cc_srcs})
+  onnxruntime_add_static_library(onnxruntime_providers_nuphar ${onnxruntime_providers_nuphar_cc_srcs})
   onnxruntime_add_include_to_target(onnxruntime_providers_nuphar onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf flatbuffers)
   set_target_properties(onnxruntime_providers_nuphar PROPERTIES FOLDER "ONNXRuntime")
   target_include_directories(onnxruntime_providers_nuphar PRIVATE ${ONNXRUNTIME_ROOT} ${TVM_INCLUDES} ${eigen_INCLUDE_DIRS})
@@ -552,7 +530,7 @@ if (onnxruntime_USE_VITISAI)
   )
 
   source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_vitisai_cc_srcs})
-  add_library(onnxruntime_providers_vitisai ${onnxruntime_providers_vitisai_cc_srcs})
+  onnxruntime_add_static_library(onnxruntime_providers_vitisai ${onnxruntime_providers_vitisai_cc_srcs})
   onnxruntime_add_include_to_target(onnxruntime_providers_vitisai onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf flatbuffers)
   add_dependencies(onnxruntime_providers_vitisai ${onnxruntime_EXTERNAL_DEPENDENCIES})
   set_target_properties(onnxruntime_providers_vitisai PROPERTIES FOLDER "ONNXRuntime")
@@ -625,7 +603,7 @@ if (onnxruntime_USE_COREML)
   file(GLOB coreml_proto_srcs
     "${COREML_PROTO_ROOT}/*.proto"
   )
-  add_library(onnxruntime_coreml_proto ${coreml_proto_srcs})
+  onnxruntime_add_static_library(onnxruntime_coreml_proto ${coreml_proto_srcs})
   target_include_directories(onnxruntime_coreml_proto PUBLIC $<TARGET_PROPERTY:protobuf::libprotobuf,INTERFACE_INCLUDE_DIRECTORIES> "${CMAKE_CURRENT_BINARY_DIR}")
   target_compile_definitions(onnxruntime_coreml_proto PUBLIC $<TARGET_PROPERTY:protobuf::libprotobuf,INTERFACE_COMPILE_DEFINITIONS>)
   set_target_properties(onnxruntime_coreml_proto PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
@@ -678,7 +656,7 @@ if (onnxruntime_USE_COREML)
   )
 
   source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_coreml_cc_srcs})
-  add_library(onnxruntime_providers_coreml ${onnxruntime_providers_coreml_cc_srcs} ${onnxruntime_providers_coreml_objcc_srcs})
+  onnxruntime_add_static_library(onnxruntime_providers_coreml ${onnxruntime_providers_coreml_cc_srcs} ${onnxruntime_providers_coreml_objcc_srcs})
   onnxruntime_add_include_to_target(onnxruntime_providers_coreml onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf-lite flatbuffers)
   onnxruntime_add_include_to_target(onnxruntime_providers_coreml onnxruntime_coreml_proto)
   target_link_libraries(onnxruntime_providers_coreml PRIVATE onnxruntime_coreml_proto "-framework Foundation" "-framework CoreML")
@@ -750,7 +728,7 @@ if (onnxruntime_USE_NNAPI_BUILTIN)
   )
 
   source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_nnapi_cc_srcs})
-  add_library(onnxruntime_providers_nnapi ${onnxruntime_providers_nnapi_cc_srcs})
+  onnxruntime_add_static_library(onnxruntime_providers_nnapi ${onnxruntime_providers_nnapi_cc_srcs})
   onnxruntime_add_include_to_target(onnxruntime_providers_nnapi onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf-lite flatbuffers)
   target_link_libraries(onnxruntime_providers_nnapi)
   add_dependencies(onnxruntime_providers_nnapi onnx ${onnxruntime_EXTERNAL_DEPENDENCIES})
@@ -786,7 +764,7 @@ if (onnxruntime_USE_RKNPU)
     "${ONNXRUNTIME_ROOT}/core/providers/rknpu/*.cc"
   )
   source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_rknpu_cc_srcs})
-  add_library(onnxruntime_providers_rknpu ${onnxruntime_providers_rknpu_cc_srcs})
+  onnxruntime_add_static_library(onnxruntime_providers_rknpu ${onnxruntime_providers_rknpu_cc_srcs})
   onnxruntime_add_include_to_target(onnxruntime_providers_rknpu onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf-lite flatbuffers)
   target_link_libraries(onnxruntime_providers_rknpu PRIVATE -lrknpu_ddk)
   add_dependencies(onnxruntime_providers_rknpu onnx ${onnxruntime_EXTERNAL_DEPENDENCIES})
@@ -805,7 +783,7 @@ if (onnxruntime_USE_DML)
     "${ONNXRUNTIME_ROOT}/core/providers/dml/*.cc"
   )
   source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_dml_cc_srcs})
-  add_library(onnxruntime_providers_dml ${onnxruntime_providers_dml_cc_srcs})
+  onnxruntime_add_static_library(onnxruntime_providers_dml ${onnxruntime_providers_dml_cc_srcs})
   onnxruntime_add_include_to_target(onnxruntime_providers_dml onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf flatbuffers)
   add_dependencies(onnxruntime_providers_dml ${onnxruntime_EXTERNAL_DEPENDENCIES})
   target_include_directories(onnxruntime_providers_dml PRIVATE ${ONNXRUNTIME_ROOT} ${ONNXRUNTIME_ROOT}/../cmake/external/wil/include)
@@ -878,7 +856,7 @@ if (onnxruntime_USE_MIGRAPHX)
   )
 
   source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_migraphx_cc_srcs})
-  add_library(onnxruntime_providers_migraphx ${onnxruntime_providers_migraphx_cc_srcs})
+  onnxruntime_add_static_library(onnxruntime_providers_migraphx ${onnxruntime_providers_migraphx_cc_srcs})
   target_link_libraries(onnxruntime_providers_migraphx PRIVATE ${migraphx_libs})
   set_target_properties(onnxruntime_providers_migraphx PROPERTIES FOLDER "ONNXRuntime")
   target_compile_options(onnxruntime_providers_migraphx PRIVATE -Wno-error=sign-compare)
@@ -897,7 +875,7 @@ if (onnxruntime_USE_ACL)
   )
 
   source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_acl_cc_srcs})
-  add_library(onnxruntime_providers_acl ${onnxruntime_providers_acl_cc_srcs})
+  onnxruntime_add_static_library(onnxruntime_providers_acl ${onnxruntime_providers_acl_cc_srcs})
   onnxruntime_add_include_to_target(onnxruntime_providers_acl onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf flatbuffers)
   target_link_libraries(onnxruntime_providers_acl -L$ENV{LD_LIBRARY_PATH})
   add_dependencies(onnxruntime_providers_acl ${onnxruntime_EXTERNAL_DEPENDENCIES})
@@ -915,7 +893,7 @@ if (onnxruntime_USE_ARMNN)
   )
 
   source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_armnn_cc_srcs})
-  add_library(onnxruntime_providers_armnn ${onnxruntime_providers_armnn_cc_srcs})
+  onnxruntime_add_static_library(onnxruntime_providers_armnn ${onnxruntime_providers_armnn_cc_srcs})
   onnxruntime_add_include_to_target(onnxruntime_providers_armnn onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf flatbuffers)
   add_dependencies(onnxruntime_providers_armnn ${onnxruntime_EXTERNAL_DEPENDENCIES})
   set_target_properties(onnxruntime_providers_armnn PROPERTIES FOLDER "ONNXRuntime")
diff --git a/cmake/onnxruntime_pyop.cmake b/cmake/onnxruntime_pyop.cmake
index 5401883c4d35d..8c4fba3999652 100644
--- a/cmake/onnxruntime_pyop.cmake
+++ b/cmake/onnxruntime_pyop.cmake
@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 file(GLOB onnxruntime_pyop_srcs "${ONNXRUNTIME_ROOT}/core/language_interop_ops/pyop/pyop.cc")
-add_library(onnxruntime_pyop ${onnxruntime_pyop_srcs})
+onnxruntime_add_static_library(onnxruntime_pyop ${onnxruntime_pyop_srcs})
 add_dependencies(onnxruntime_pyop onnxruntime_graph)
 onnxruntime_add_include_to_target(onnxruntime_pyop onnxruntime_common onnxruntime_graph onnxruntime_framework onnx onnx_proto protobuf::libprotobuf flatbuffers)
 target_include_directories(onnxruntime_pyop PRIVATE ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS})
diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake
index 45b464fc756f8..9b9124f72c708 100644
--- a/cmake/onnxruntime_python.cmake
+++ b/cmake/onnxruntime_python.cmake
@@ -52,6 +52,7 @@ file(GLOB onnxruntime_pybind_srcs CONFIGURE_DEPENDS
 onnxruntime_add_shared_library_module(onnxruntime_pybind11_state ${onnxruntime_pybind_srcs})
 if(MSVC)
   target_compile_options(onnxruntime_pybind11_state PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>" "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
+  target_compile_options(onnxruntime_pybind11_state PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /bigobj>" "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/bigobj>")
 endif()
 if(HAS_CAST_FUNCTION_TYPE)
   target_compile_options(onnxruntime_pybind11_state PRIVATE "-Wno-cast-function-type")
@@ -61,9 +62,6 @@ if(onnxruntime_PYBIND_EXPORT_OPSCHEMA)
   target_compile_definitions(onnxruntime_pybind11_state PRIVATE onnxruntime_PYBIND_EXPORT_OPSCHEMA)
 endif()
 
-if (onnxruntime_USE_DNNL)
-  target_compile_definitions(onnxruntime_pybind11_state PRIVATE USE_DNNL=1)
-endif()
 if (MSVC AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
     #TODO: fix the warnings
     target_compile_options(onnxruntime_pybind11_state PRIVATE "/wd4244")
@@ -94,16 +92,18 @@ endif()
 set(onnxruntime_pybind11_state_libs
     onnxruntime_session
     ${onnxruntime_libs}
+    ${PROVIDERS_ACL}
+    ${PROVIDERS_ARMNN}
+    ${PROVIDERS_COREML}
     ${PROVIDERS_CUDA}
+    ${PROVIDERS_DML}
     ${PROVIDERS_MIGRAPHX}
-    ${PROVIDERS_NUPHAR}
-    ${PROVIDERS_VITISAI}
     ${PROVIDERS_NNAPI}
+    ${PROVIDERS_NUPHAR}
     ${PROVIDERS_RKNPU}
-    ${PROVIDERS_DML}
-    ${PROVIDERS_ACL}
-    ${PROVIDERS_ARMNN}
     ${PROVIDERS_ROCM}
+    ${PROVIDERS_VITISAI}
+    ${PROVIDERS_INTERNAL_TESTING}
     onnxruntime_optimizer
     onnxruntime_providers
     onnxruntime_util
@@ -276,191 +276,191 @@ file(GLOB onnxruntime_python_datasets_data CONFIGURE_DEPENDS
 )
 
 set(build_output_target onnxruntime_common)
-
-add_custom_command(
-  TARGET onnxruntime_pybind11_state POST_BUILD
-  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/backend
-  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi
-  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/training
-  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/datasets
-  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/tools
-  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/tools/featurizer_ops
-  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers
-  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/longformer
-  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/quantization
-  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/quantization/operators
-  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/quantization/CalTableFlatBuffers
-  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/checkpoint
-  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/dhp_parallel
-  COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/quantization
-  COMMAND ${CMAKE_COMMAND} -E copy
-      ${ONNXRUNTIME_ROOT}/__init__.py
-      $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/
-  COMMAND ${CMAKE_COMMAND} -E copy
-      ${REPO_ROOT}/ThirdPartyNotices.txt
-      $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/
-  COMMAND ${CMAKE_COMMAND} -E copy
-      ${REPO_ROOT}/docs/Privacy.md
-      $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/
-  COMMAND ${CMAKE_COMMAND} -E copy
-      ${REPO_ROOT}/LICENSE
-      $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/
-  COMMAND ${CMAKE_COMMAND} -E copy
-      ${onnxruntime_backend_srcs}
-      $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/backend/
-  COMMAND ${CMAKE_COMMAND} -E copy
-      ${onnxruntime_python_srcs}
-      $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
-  COMMAND ${CMAKE_COMMAND} -E copy
-      ${onnxruntime_python_capi_training_srcs}
-      $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/training/
-  COMMAND ${CMAKE_COMMAND} -E copy
-      $<TARGET_FILE:onnxruntime_pybind11_state>
-      $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
-  COMMAND ${CMAKE_COMMAND} -E copy
-      ${onnxruntime_python_datasets_srcs}
-      $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/datasets/
-  COMMAND ${CMAKE_COMMAND} -E copy
-      ${onnxruntime_python_datasets_data}
-      $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/datasets/
-  COMMAND ${CMAKE_COMMAND} -E copy
-      ${onnxruntime_python_tools_srcs}
-      $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/tools/
-  COMMAND ${CMAKE_COMMAND} -E copy
-      ${onnxruntime_python_tools_featurizers_src}
-      $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/tools/featurizer_ops/
-  COMMAND ${CMAKE_COMMAND} -E copy
-      ${onnxruntime_python_quantization_src}
-      $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/quantization/
-  COMMAND ${CMAKE_COMMAND} -E copy
-      ${onnxruntime_python_quantization_operators_src}
-      $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/quantization/operators/
-  COMMAND ${CMAKE_COMMAND} -E copy
-      ${onnxruntime_python_quantization_cal_table_flatbuffers_src}
-      $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/quantization/CalTableFlatBuffers/
-  COMMAND ${CMAKE_COMMAND} -E copy
-      ${onnxruntime_python_transformers_src}
-      $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/
-  COMMAND ${CMAKE_COMMAND} -E copy
-      ${onnxruntime_python_transformers_longformer_src}
-      $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/longformer/
-  COMMAND ${CMAKE_COMMAND} -E copy
-      ${REPO_ROOT}/VERSION_NUMBER
-      $<TARGET_FILE_DIR:${build_output_target}>
-)
-
-if (onnxruntime_BUILD_UNIT_TESTS)
-  add_custom_command(
-    TARGET onnxruntime_pybind11_state POST_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy
-        ${onnxruntime_python_test_srcs}
-        $<TARGET_FILE_DIR:${build_output_target}>
-    COMMAND ${CMAKE_COMMAND} -E copy
-        ${onnxruntime_python_quantization_test_srcs}
-        $<TARGET_FILE_DIR:${build_output_target}>/quantization/
-    COMMAND ${CMAKE_COMMAND} -E copy
-        ${onnxruntime_python_checkpoint_test_srcs}
-        $<TARGET_FILE_DIR:${build_output_target}>/checkpoint/
-    COMMAND ${CMAKE_COMMAND} -E copy
-        ${onnxruntime_python_dhp_parallel_test_srcs}
-        $<TARGET_FILE_DIR:${build_output_target}>/dhp_parallel/
-  )
-endif()
-
-if (onnxruntime_ENABLE_TRAINING)
-  add_custom_command(
-    TARGET onnxruntime_pybind11_state POST_BUILD
-    COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training
-    COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/amp
-    COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/optim
-    COMMAND ${CMAKE_COMMAND} -E copy
-        ${onnxruntime_python_capi_training_srcs}
-        $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/training/
-    COMMAND ${CMAKE_COMMAND} -E copy
-        ${onnxruntime_python_root_srcs}
-        $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/
-    COMMAND ${CMAKE_COMMAND} -E copy
-        ${onnxruntime_python_amp_srcs}
-        $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/amp/
-    COMMAND ${CMAKE_COMMAND} -E copy
-        ${onnxruntime_python_optim_srcs}
-        $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/optim/
-    COMMAND ${CMAKE_COMMAND} -E copy
-        ${onnxruntime_python_train_tools_srcs}
-        $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/
-  )
-endif()
-
-if (onnxruntime_USE_DNNL)
-  add_custom_command(
-    TARGET onnxruntime_pybind11_state POST_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy
-        ${DNNL_DLL_PATH} $<TARGET_FILE:onnxruntime_providers_dnnl>
-        $<TARGET_FILE:onnxruntime_providers_shared>
-        $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
-  )
-endif()
-
-if (onnxruntime_USE_TENSORRT)
-  add_custom_command(
-    TARGET onnxruntime_pybind11_state POST_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy
-        $<TARGET_FILE:onnxruntime_providers_tensorrt>
-        $<TARGET_FILE:onnxruntime_providers_shared>
-        $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
-  )
-endif()
-
-if (onnxruntime_USE_OPENVINO)
+if(NOT onnxruntime_ENABLE_STATIC_ANALYSIS)
     add_custom_command(
       TARGET onnxruntime_pybind11_state POST_BUILD
+      COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/backend
+      COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi
+      COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/training
+      COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/datasets
+      COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/tools
+      COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/tools/featurizer_ops
+      COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers
+      COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/longformer
+      COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/quantization
+      COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/quantization/operators
+      COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/quantization/CalTableFlatBuffers
+      COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/checkpoint
+      COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/dhp_parallel
+      COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/quantization
+      COMMAND ${CMAKE_COMMAND} -E copy
+          ${ONNXRUNTIME_ROOT}/__init__.py
+          $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/
+      COMMAND ${CMAKE_COMMAND} -E copy
+          ${REPO_ROOT}/ThirdPartyNotices.txt
+          $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/
+      COMMAND ${CMAKE_COMMAND} -E copy
+          ${REPO_ROOT}/docs/Privacy.md
+          $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/
+      COMMAND ${CMAKE_COMMAND} -E copy
+          ${REPO_ROOT}/LICENSE
+          $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/
+      COMMAND ${CMAKE_COMMAND} -E copy
+          ${onnxruntime_backend_srcs}
+          $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/backend/
+      COMMAND ${CMAKE_COMMAND} -E copy
+          ${onnxruntime_python_srcs}
+          $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
       COMMAND ${CMAKE_COMMAND} -E copy
-          $<TARGET_FILE:onnxruntime_providers_openvino>
-          $<TARGET_FILE:onnxruntime_providers_shared>
+          ${onnxruntime_python_capi_training_srcs}
+          $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/training/
+      COMMAND ${CMAKE_COMMAND} -E copy
+          $<TARGET_FILE:onnxruntime_pybind11_state>
           $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
+      COMMAND ${CMAKE_COMMAND} -E copy
+          ${onnxruntime_python_datasets_srcs}
+          $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/datasets/
+      COMMAND ${CMAKE_COMMAND} -E copy
+          ${onnxruntime_python_datasets_data}
+          $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/datasets/
+      COMMAND ${CMAKE_COMMAND} -E copy
+          ${onnxruntime_python_tools_srcs}
+          $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/tools/
+      COMMAND ${CMAKE_COMMAND} -E copy
+          ${onnxruntime_python_tools_featurizers_src}
+          $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/tools/featurizer_ops/
+      COMMAND ${CMAKE_COMMAND} -E copy
+          ${onnxruntime_python_quantization_src}
+          $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/quantization/
+      COMMAND ${CMAKE_COMMAND} -E copy
+          ${onnxruntime_python_quantization_operators_src}
+          $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/quantization/operators/
+      COMMAND ${CMAKE_COMMAND} -E copy
+          ${onnxruntime_python_quantization_cal_table_flatbuffers_src}
+          $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/quantization/CalTableFlatBuffers/
+      COMMAND ${CMAKE_COMMAND} -E copy
+          ${onnxruntime_python_transformers_src}
+          $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/
+      COMMAND ${CMAKE_COMMAND} -E copy
+          ${onnxruntime_python_transformers_longformer_src}
+          $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/transformers/longformer/
+      COMMAND ${CMAKE_COMMAND} -E copy
+          ${REPO_ROOT}/VERSION_NUMBER
+          $<TARGET_FILE_DIR:${build_output_target}>
     )
-endif()
 
-if (onnxruntime_USE_TVM)
-  add_custom_command(
-    TARGET onnxruntime_pybind11_state POST_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy
-        $<TARGET_FILE:tvm> $<TARGET_FILE:nnvm_compiler>
-        $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
-  )
-endif()
+    if (onnxruntime_BUILD_UNIT_TESTS)
+      add_custom_command(
+        TARGET onnxruntime_pybind11_state POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy
+            ${onnxruntime_python_test_srcs}
+            $<TARGET_FILE_DIR:${build_output_target}>
+        COMMAND ${CMAKE_COMMAND} -E copy
+            ${onnxruntime_python_quantization_test_srcs}
+            $<TARGET_FILE_DIR:${build_output_target}>/quantization/
+        COMMAND ${CMAKE_COMMAND} -E copy
+            ${onnxruntime_python_checkpoint_test_srcs}
+            $<TARGET_FILE_DIR:${build_output_target}>/checkpoint/
+        COMMAND ${CMAKE_COMMAND} -E copy
+            ${onnxruntime_python_dhp_parallel_test_srcs}
+            $<TARGET_FILE_DIR:${build_output_target}>/dhp_parallel/
+      )
+    endif()
 
-if (onnxruntime_USE_NUPHAR)
-  file(GLOB onnxruntime_python_nuphar_python_srcs CONFIGURE_DEPENDS
-    "${ONNXRUNTIME_ROOT}/core/providers/nuphar/scripts/*"
-  )
-  add_custom_command(
-    TARGET onnxruntime_pybind11_state POST_BUILD
-    COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/nuphar
-    COMMAND ${CMAKE_COMMAND} -E copy
-      ${onnxruntime_python_nuphar_python_srcs}
-      $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/nuphar/
-  )
-endif()
+    if (onnxruntime_ENABLE_TRAINING)
+      add_custom_command(
+        TARGET onnxruntime_pybind11_state POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training
+        COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/amp
+        COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/optim
+        COMMAND ${CMAKE_COMMAND} -E copy
+            ${onnxruntime_python_capi_training_srcs}
+            $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/training/
+        COMMAND ${CMAKE_COMMAND} -E copy
+            ${onnxruntime_python_root_srcs}
+            $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/
+        COMMAND ${CMAKE_COMMAND} -E copy
+            ${onnxruntime_python_amp_srcs}
+            $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/amp/
+        COMMAND ${CMAKE_COMMAND} -E copy
+            ${onnxruntime_python_optim_srcs}
+            $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/optim/
+        COMMAND ${CMAKE_COMMAND} -E copy
+            ${onnxruntime_python_train_tools_srcs}
+            $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/
+      )
+    endif()
 
-if (onnxruntime_USE_DML)
-  add_custom_command(
-    TARGET onnxruntime_pybind11_state POST_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy
-        ${DML_PACKAGE_DIR}/bin/${onnxruntime_target_platform}-win/${DML_SHARED_LIB}
-        $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
-  )
-endif()
+    if (onnxruntime_USE_DNNL)
+      add_custom_command(
+        TARGET onnxruntime_pybind11_state POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy
+            ${DNNL_DLL_PATH} $<TARGET_FILE:onnxruntime_providers_dnnl>
+            $<TARGET_FILE:onnxruntime_providers_shared>
+            $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
+      )
+    endif()
 
-if (onnxruntime_USE_NNAPI_BUILTIN)
-  add_custom_command(
-    TARGET onnxruntime_pybind11_state POST_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy
-        $<TARGET_FILE:onnxruntime_providers_nnapi>
-        $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
-  )
-endif()
+    if (onnxruntime_USE_TENSORRT)
+      add_custom_command(
+        TARGET onnxruntime_pybind11_state POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy
+            $<TARGET_FILE:onnxruntime_providers_tensorrt>
+            $<TARGET_FILE:onnxruntime_providers_shared>
+            $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
+      )
+    endif()
+
+    if (onnxruntime_USE_OPENVINO)
+        add_custom_command(
+          TARGET onnxruntime_pybind11_state POST_BUILD
+          COMMAND ${CMAKE_COMMAND} -E copy
+              $<TARGET_FILE:onnxruntime_providers_openvino>
+              $<TARGET_FILE:onnxruntime_providers_shared>
+              $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
+        )
+    endif()
 
+    if (onnxruntime_USE_TVM)
+      add_custom_command(
+        TARGET onnxruntime_pybind11_state POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy
+            $<TARGET_FILE:tvm> $<TARGET_FILE:nnvm_compiler>
+            $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
+      )
+    endif()
+
+    if (onnxruntime_USE_NUPHAR)
+      file(GLOB onnxruntime_python_nuphar_python_srcs CONFIGURE_DEPENDS
+        "${ONNXRUNTIME_ROOT}/core/providers/nuphar/scripts/*"
+      )
+      add_custom_command(
+        TARGET onnxruntime_pybind11_state POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/nuphar
+        COMMAND ${CMAKE_COMMAND} -E copy
+          ${onnxruntime_python_nuphar_python_srcs}
+          $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/nuphar/
+      )
+    endif()
+
+    if (onnxruntime_USE_DML)
+      add_custom_command(
+        TARGET onnxruntime_pybind11_state POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy
+            ${DML_PACKAGE_DIR}/bin/${onnxruntime_target_platform}-win/${DML_SHARED_LIB}
+            $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
+      )
+    endif()
+
+    if (onnxruntime_USE_NNAPI_BUILTIN)
+      add_custom_command(
+        TARGET onnxruntime_pybind11_state POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy
+            $<TARGET_FILE:onnxruntime_providers_nnapi>
+            $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
+      )
+    endif()
+endif()
 if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS)
   include(onnxruntime_language_interop_ops.cmake)
 endif()
diff --git a/cmake/onnxruntime_session.cmake b/cmake/onnxruntime_session.cmake
index df7eebf5a2b01..564dc53dfe8dd 100644
--- a/cmake/onnxruntime_session.cmake
+++ b/cmake/onnxruntime_session.cmake
@@ -9,7 +9,7 @@ file(GLOB onnxruntime_session_srcs CONFIGURE_DEPENDS
 
 source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_session_srcs})
 
-add_library(onnxruntime_session ${onnxruntime_session_srcs})
+onnxruntime_add_static_library(onnxruntime_session ${onnxruntime_session_srcs})
 install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/session  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core)
 onnxruntime_add_include_to_target(onnxruntime_session onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf flatbuffers)
 if(onnxruntime_ENABLE_INSTRUMENT)
diff --git a/cmake/onnxruntime_training.cmake b/cmake/onnxruntime_training.cmake
index cc7d865490781..1d12d17c0ce81 100644
--- a/cmake/onnxruntime_training.cmake
+++ b/cmake/onnxruntime_training.cmake
@@ -17,7 +17,7 @@ file(GLOB_RECURSE onnxruntime_training_srcs
     "${ORTTRAINING_SOURCE_DIR}/core/agent/*.cc"
     )
 
-add_library(onnxruntime_training ${onnxruntime_training_srcs})
+onnxruntime_add_static_library(onnxruntime_training ${onnxruntime_training_srcs})
 add_dependencies(onnxruntime_training onnx tensorboard ${onnxruntime_EXTERNAL_DEPENDENCIES})
 onnxruntime_add_include_to_target(onnxruntime_training onnxruntime_common onnx onnx_proto tensorboard protobuf::libprotobuf flatbuffers)
 
@@ -54,7 +54,7 @@ else ()
     "${onnxruntime_perf_test_src_dir}/posix/utils.cc")
 endif()
 
-add_library(onnxruntime_training_runner ${onnxruntime_training_runner_srcs} ${onnxruntime_perf_test_src})
+onnxruntime_add_static_library(onnxruntime_training_runner ${onnxruntime_training_runner_srcs} ${onnxruntime_perf_test_src})
 add_dependencies(onnxruntime_training_runner ${onnxruntime_EXTERNAL_DEPENDENCIES} onnx onnxruntime_providers)
 
 onnxruntime_add_include_to_target(onnxruntime_training_runner onnxruntime_training onnxruntime_framework onnxruntime_common onnx onnx_proto protobuf::libprotobuf onnxruntime_training flatbuffers)
diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
index 3a289a3a07b8d..48d937bf1fb0c 100644
--- a/cmake/onnxruntime_unittests.cmake
+++ b/cmake/onnxruntime_unittests.cmake
@@ -522,7 +522,7 @@ file(GLOB onnxruntime_test_framework_src CONFIGURE_DEPENDS
   )
 
 #without auto initialize onnxruntime
-add_library(onnxruntime_test_utils ${onnxruntime_test_utils_src})
+onnxruntime_add_static_library(onnxruntime_test_utils ${onnxruntime_test_utils_src})
 if(MSVC)
   target_compile_options(onnxruntime_test_utils PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>"
           "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
@@ -533,9 +533,7 @@ elseif (NOT onnxruntime_BUILD_WEBASSEMBLY)
 endif()
 onnxruntime_add_include_to_target(onnxruntime_test_utils onnxruntime_common onnxruntime_framework onnxruntime_session GTest::gtest GTest::gmock onnx onnx_proto flatbuffers)
 
-if (onnxruntime_USE_DNNL)
-  target_compile_definitions(onnxruntime_test_utils PUBLIC USE_DNNL=1)
-endif()
+
 if (onnxruntime_USE_DML)
   target_add_dml(onnxruntime_test_utils)
 endif()
@@ -551,7 +549,7 @@ file(GLOB onnx_test_runner_common_srcs CONFIGURE_DEPENDS
 
 list(REMOVE_ITEM onnx_test_runner_common_srcs ${onnx_test_runner_src_dir}/main.cc)
 
-add_library(onnx_test_runner_common ${onnx_test_runner_common_srcs})
+onnxruntime_add_static_library(onnx_test_runner_common ${onnx_test_runner_common_srcs})
 if(MSVC)
   target_compile_options(onnx_test_runner_common PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>"
           "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
@@ -580,7 +578,7 @@ if(NOT TARGET onnxruntime AND NOT onnxruntime_BUILD_WEBASSEMBLY)
 endif()
 
 if (onnxruntime_USE_CUDA)
-  add_library(onnxruntime_test_cuda_ops_lib ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/cuda_ops.cu)
+  onnxruntime_add_static_library(onnxruntime_test_cuda_ops_lib ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/cuda_ops.cu)
   list(APPEND onnxruntime_test_common_libs onnxruntime_test_cuda_ops_lib)
 endif()
 
@@ -697,7 +695,7 @@ if(WIN32)
   endif()
 endif()
 
-add_library(onnx_test_data_proto ${TEST_SRC_DIR}/proto/tml.proto)
+onnxruntime_add_static_library(onnx_test_data_proto ${TEST_SRC_DIR}/proto/tml.proto)
 add_dependencies(onnx_test_data_proto onnx_proto ${onnxruntime_EXTERNAL_DEPENDENCIES})
 #onnx_proto target should mark this definition as public, instead of private
 target_compile_definitions(onnx_test_data_proto PRIVATE "-DONNX_API=")
@@ -722,7 +720,7 @@ onnxruntime_protobuf_generate(APPEND_PATH IMPORT_DIRS external/onnx TARGET onnx_
 
 if(WIN32)
   set(wide_get_opt_src_dir ${TEST_SRC_DIR}/win_getopt/wide)
-  add_library(win_getopt_wide ${wide_get_opt_src_dir}/getopt.cc ${wide_get_opt_src_dir}/include/getopt.h)
+  onnxruntime_add_static_library(win_getopt_wide ${wide_get_opt_src_dir}/getopt.cc ${wide_get_opt_src_dir}/include/getopt.h)
   target_include_directories(win_getopt_wide INTERFACE ${wide_get_opt_src_dir}/include)
   set_target_properties(win_getopt_wide PROPERTIES FOLDER "ONNXRuntimeTest")
   set(onnx_test_runner_common_srcs ${onnx_test_runner_common_srcs})
@@ -885,7 +883,7 @@ endif()
 
 # shared lib
 if (onnxruntime_BUILD_SHARED_LIB)
-  add_library(onnxruntime_mocked_allocator ${TEST_SRC_DIR}/util/test_allocator.cc)
+  onnxruntime_add_static_library(onnxruntime_mocked_allocator ${TEST_SRC_DIR}/util/test_allocator.cc)
   target_include_directories(onnxruntime_mocked_allocator PUBLIC ${TEST_SRC_DIR}/util/include)
   set_target_properties(onnxruntime_mocked_allocator PROPERTIES FOLDER "ONNXRuntimeTest")
 
@@ -1000,7 +998,7 @@ if (onnxruntime_BUILD_WEBASSEMBLY)
   set_target_properties(onnxruntime_mlas_test PROPERTIES LINK_FLAGS "-s ALLOW_MEMORY_GROWTH=1")
 endif()
 
-add_library(custom_op_library SHARED ${TEST_SRC_DIR}/testdata/custom_op_library/custom_op_library.cc)
+onnxruntime_add_shared_library_module(custom_op_library ${TEST_SRC_DIR}/testdata/custom_op_library/custom_op_library.cc)
 target_include_directories(custom_op_library PRIVATE ${REPO_ROOT}/include)
 if(UNIX)
   if (APPLE)
@@ -1033,6 +1031,7 @@ if (onnxruntime_BUILD_JAVA)
         -DGRADLE_EXECUTABLE=${GRADLE_EXECUTABLE}
         -DBIN_DIR=${CMAKE_CURRENT_BINARY_DIR}
         -DREPO_ROOT=${REPO_ROOT}
+        ${ORT_PROVIDER_CMAKE_FLAGS}
         -P ${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime_java_unittests.cmake)
     else()
       add_custom_command(TARGET custom_op_library POST_BUILD COMMAND ${CMAKE_COMMAND} -E create_symlink $<TARGET_FILE:custom_op_library>
diff --git a/cmake/onnxruntime_util.cmake b/cmake/onnxruntime_util.cmake
index 6b22ac9693957..ddc8428c657f7 100644
--- a/cmake/onnxruntime_util.cmake
+++ b/cmake/onnxruntime_util.cmake
@@ -10,7 +10,7 @@ file(GLOB_RECURSE onnxruntime_util_srcs CONFIGURE_DEPENDS
 
 source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_util_srcs})
 
-add_library(onnxruntime_util ${onnxruntime_util_srcs})
+onnxruntime_add_static_library(onnxruntime_util ${onnxruntime_util_srcs})
 target_include_directories(onnxruntime_util PRIVATE ${ONNXRUNTIME_ROOT} PUBLIC ${eigen_INCLUDE_DIRS})
 if (onnxruntime_USE_CUDA)
  target_include_directories(onnxruntime_util PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
diff --git a/cmake/winml.cmake b/cmake/winml.cmake
index 436ef5ca45629..c2ee74e7998c1 100644
--- a/cmake/winml.cmake
+++ b/cmake/winml.cmake
@@ -151,7 +151,7 @@ add_dependencies(winml_api_native_internal RESTORE_NUGET_PACKAGES)
 ###########################
 
 # Add static library that will be archived/linked for both static/dynamic library
-add_library(winml_lib_telemetry STATIC
+onnxruntime_add_static_library(winml_lib_telemetry
   ${winml_lib_telemetry_dir}/inc/TelemetryEvent.h
   ${ONNXRUNTIME_INCLUDE_DIR}/core/platform/windows/TraceLoggingConfig.h
   ${winml_lib_common_dir}/inc/WinMLTelemetryHelper.h
@@ -222,7 +222,7 @@ if (onnxruntime_USE_DML)
 endif()
 
 # Add static library that will be archived/linked for both static/dynamic library
-add_library(winml_lib_ort STATIC ${winml_lib_api_ort_files})
+onnxruntime_add_static_library(winml_lib_ort ${winml_lib_api_ort_files})
 
 # Compiler options
 target_compile_features(winml_lib_ort PRIVATE cxx_std_17)
@@ -297,7 +297,7 @@ if (onnxruntime_USE_DML)
     )
 endif()
 
-add_library(winml_adapter ${winml_adapter_files})
+onnxruntime_add_static_library(winml_adapter ${winml_adapter_files})
 
 if (onnxruntime_WINML_NAMESPACE_OVERRIDE STREQUAL "Windows")
   target_compile_definitions(winml_adapter PRIVATE "BUILD_INBOX=1")
@@ -339,7 +339,7 @@ list(APPEND onnxruntime_EXTERNAL_DEPENDENCIES winml_adapter)
 ###########################
 
 # Add static library that will be archived/linked for both static/dynamic library
-add_library(winml_lib_image STATIC
+onnxruntime_add_static_library(winml_lib_image
   ${winml_lib_api_image_dir}/inc/ConverterResourceStore.h
   ${winml_lib_api_image_dir}/inc/D3DDeviceCache.h
   ${winml_lib_api_image_dir}/inc/DeviceHelpers.h
@@ -426,7 +426,7 @@ endif(onnxruntime_USE_DML)
 ###########################
 
 # Add static library that will be archived/linked for both static/dynamic library
-add_library(winml_lib_api STATIC
+onnxruntime_add_static_library(winml_lib_api
   ${winml_lib_api_dir}/impl/FeatureCompatibility.h
   ${winml_lib_api_dir}/impl/IData.h
   ${winml_lib_api_dir}/impl/IMapFeatureValue.h
@@ -540,7 +540,7 @@ endif(onnxruntime_USE_DML)
 ###########################
 
 # Add static library that will be archived/linked for both static/dynamic library
-add_library(winml_lib_api_experimental STATIC
+onnxruntime_add_static_library(winml_lib_api_experimental
   ${winml_lib_api_experimental_dir}/LearningModelBuilder.cpp
   ${winml_lib_api_experimental_dir}/LearningModelBuilder.h
   ${winml_lib_api_experimental_dir}/LearningModelInputs.cpp
@@ -630,7 +630,7 @@ endif(onnxruntime_USE_DML)
 # Add winml_lib_common
 ###########################
 
-add_library(winml_lib_common STATIC
+onnxruntime_add_static_library(winml_lib_common
   ${winml_lib_common_dir}/inc/common.h
   ${winml_lib_common_dir}/inc/CommonDeviceHelpers.h
   ${winml_lib_common_dir}/inc/cppwinrt_onnx.h
@@ -692,7 +692,7 @@ set_source_files_properties(
   TRUE)
 
 # Add library
-add_library(winml_dll SHARED
+onnxruntime_add_shared_library(winml_dll 
   ${CMAKE_CURRENT_BINARY_DIR}/winml_api/comp_generated/module.g.excl.cpp
   ${winml_dll_dir}/winml.def
   ${winml_dll_dir}/winml.rc
diff --git a/cmake/winml_unittests.cmake b/cmake/winml_unittests.cmake
index a92503c64f433..e5461c647c426 100644
--- a/cmake/winml_unittests.cmake
+++ b/cmake/winml_unittests.cmake
@@ -173,7 +173,7 @@ endfunction()
 file(GLOB winml_test_common_src CONFIGURE_DEPENDS
     "${WINML_TEST_SRC_DIR}/common/*.h"
     "${WINML_TEST_SRC_DIR}/common/*.cpp")
-add_library(winml_test_common STATIC ${winml_test_common_src})
+onnxruntime_add_static_library(winml_test_common ${winml_test_common_src})
 target_compile_options(winml_test_common PRIVATE /wd5205)  # workaround cppwinrt SDK bug https://github.com/microsoft/cppwinrt/issues/584
 if (onnxruntime_WINML_NAMESPACE_OVERRIDE STREQUAL "Windows")
   target_compile_definitions(winml_test_common PRIVATE "BUILD_INBOX=1")
@@ -184,7 +184,7 @@ add_dependencies(winml_test_common
   winml_dll
 )
 onnxruntime_add_include_to_target(winml_test_common onnx_proto)
-add_library(winml_google_test_lib STATIC ${WINML_TEST_SRC_DIR}/common/googletest/main.cpp)
+onnxruntime_add_static_library(winml_google_test_lib ${WINML_TEST_SRC_DIR}/common/googletest/main.cpp)
 set_winml_target_properties(winml_google_test_lib)
 
 set_winml_target_properties(winml_test_common)
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
index 0df4c77404898..0b2fc65dd3c30 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
@@ -14,17 +14,19 @@ public struct OrtApiBase
     };
 
     // NOTE: The order of the APIs in this struct should match exactly that in
-    // OrtApi ort_api_1_to_4 (onnxruntime_c_api.cc)
+    // OrtApi ort_api_1_to_8 (onnxruntime_c_api.cc)
     [StructLayout(LayoutKind.Sequential)]
     public struct OrtApi
     {
         public IntPtr CreateStatus;
         public IntPtr GetErrorCode;
         public IntPtr GetErrorMessage;
+
         public IntPtr CreateEnv;
         public IntPtr CreateEnvWithCustomLogger;
         public IntPtr EnableTelemetryEvents;
         public IntPtr DisableTelemetryEvents;
+
         public IntPtr CreateSession;
         public IntPtr CreateSessionFromArray;
         public IntPtr Run;
@@ -60,6 +62,7 @@ public struct OrtApi
         public IntPtr SessionGetInputName;
         public IntPtr SessionGetOutputName;
         public IntPtr SessionGetOverridableInitializerName;
+
         public IntPtr CreateRunOptions;
         public IntPtr RunOptionsSetRunLogVerbosityLevel;
         public IntPtr RunOptionsSetRunLogSeverityLevel;
@@ -129,6 +132,8 @@ public struct OrtApi
         public IntPtr ReleaseTensorTypeAndShapeInfo;
         public IntPtr ReleaseSessionOptions;
         public IntPtr ReleaseCustomOpDomain;
+        // End of Version 1 - DO NOT MODIFY ABOVE (see above text for more information)
+
         public IntPtr GetDenotationFromTypeInfo;
         public IntPtr CastTypeInfoToMapTypeInfo;
         public IntPtr CastTypeInfoToSequenceTypeInfo;
@@ -138,7 +143,6 @@ public struct OrtApi
         public IntPtr ReleaseMapTypeInfo;
         public IntPtr ReleaseSequenceTypeInfo;
         public IntPtr SessionEndProfiling;
-
         public IntPtr SessionGetModelMetadata;
         public IntPtr ModelMetadataGetProducerName;
         public IntPtr ModelMetadataGetGraphName;
@@ -147,6 +151,7 @@ public struct OrtApi
         public IntPtr ModelMetadataLookupCustomMetadataMap;
         public IntPtr ModelMetadataGetVersion;
         public IntPtr ReleaseModelMetadata;
+        // End of Version 2 - DO NOT MODIFY ABOVE (see above text for more information)
 
         public IntPtr CreateEnvWithGlobalThreadPools;
         public IntPtr DisablePerSessionThreads;
@@ -154,14 +159,19 @@ public struct OrtApi
         public IntPtr ReleaseThreadingOptions;
         public IntPtr ModelMetadataGetCustomMetadataMapKeys;
         public IntPtr AddFreeDimensionOverrideByName;
+        // End of Version 3 - DO NOT MODIFY ABOVE (see above text for more information)
 
         public IntPtr GetAvailableProviders;
         public IntPtr ReleaseAvailableProviders;
+        // End of Version 4 - DO NOT MODIFY ABOVE (see above text for more information)
+
         public IntPtr GetStringTensorElementLength;
         public IntPtr GetStringTensorElement;
         public IntPtr FillStringTensorElement;
         public IntPtr AddSessionConfigEntry;
 
+        // IoBinding and above are propagated in the same order to C# API
+        // Do not move
         public IntPtr CreateAllocator;
         public IntPtr ReleaseAllocator;
         public IntPtr RunWithBinding;
@@ -181,6 +191,8 @@ public struct OrtApi
         public IntPtr SetGlobalIntraOpNumThreads;
         public IntPtr SetGlobalInterOpNumThreads;
         public IntPtr SetGlobalSpinControl;
+        // End of Version 5 - DO NOT MODIFY ABOVE (see above text for more information)
+
         public IntPtr AddInitializer;
         public IntPtr CreateEnvWithCustomLoggerAndGlobalThreadPools;
         public IntPtr SessionOptionsAppendExecutionProvider_CUDA;
@@ -189,7 +201,17 @@ public struct OrtApi
         public IntPtr SetGlobalDenormalAsZero;
         public IntPtr CreateArenaCfg;
         public IntPtr ReleaseArenaCfg;
+        // End of Version 6 - DO NOT MODIFY ABOVE (see above text for more information)
+
         public IntPtr ModelMetadataGetGraphDescription;
+        public IntPtr SessionOptionsAppendExecutionProvider_TensorRT;
+        public IntPtr SetCurrentGpuDeviceId;
+        public IntPtr GetCurrentGpuDeviceId;
+        // End of Version 7 - DO NOT MODIFY ABOVE (see above text for more information)
+
+        // Version 8 - In development, feel free to add/remove/rearrange here
+        public IntPtr KernelInfoGetAttributeArray_float;
+        public IntPtr KernelInfoGetAttributeArray_int64;
     }
 
     internal static class NativeMethods
@@ -567,13 +589,6 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         [DllImport(nativeLib, CharSet = charSet)]
         public static extern IntPtr /*(OrtStatus*)*/ OrtSessionOptionsAppendExecutionProvider_DML(IntPtr /*(OrtSessionOptions*) */ options, int device_id);
 
-        [DllImport(nativeLib, CharSet = charSet)]
-        public static extern IntPtr /*(OrtStatus*)*/ OrtSessionOptionsAppendExecutionProvider_OpenVINO(
-                                                    IntPtr /*(OrtSessionOptions*)*/ options, IntPtr /*(const char*)*/ device_id);
-
-        [DllImport(nativeLib, CharSet = charSet)]
-        public static extern IntPtr /*(OrtStatus*)*/ OrtSessionOptionsAppendExecutionProvider_Tensorrt(IntPtr /*(OrtSessionOptions*)*/ options, int device_id);
-
         [DllImport(nativeLib, CharSet = charSet)]
         public static extern IntPtr /*(OrtStatus*)*/ OrtSessionOptionsAppendExecutionProvider_MIGraphX(IntPtr /*(OrtSessionOptions*)*/ options, int device_id);
 
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
index 6bc48a0d704da..bf7e8d8b874c3 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
@@ -155,30 +155,7 @@ public void AppendExecutionProvider_CUDA(int deviceId)
         public void AppendExecutionProvider_DML(int deviceId)
         {
             NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsAppendExecutionProvider_DML(handle, deviceId));
-        }
-
-
-        /// <summary>
-        /// Use only if you have the onnxruntime package specific to this Execution Provider.
-        /// </summary>
-        /// <param name="deviceId">device identification, default empty string</param>
-        public void AppendExecutionProvider_OpenVINO(string deviceId = "")
-        {
-            var deviceIdPinned = GCHandle.Alloc(NativeOnnxValueHelper.StringToZeroTerminatedUtf8(deviceId), GCHandleType.Pinned);
-            using (var pinnedDeviceIdName = new PinnedGCHandle(deviceIdPinned))
-            {
-                NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsAppendExecutionProvider_OpenVINO(handle, pinnedDeviceIdName.Pointer));
-            }
-        }
-
-        /// <summary>
-        /// Use only if you have the onnxruntime package specific to this Execution Provider.
-        /// </summary>
-        /// <param name="deviceId">device identification</param>
-        public void AppendExecutionProvider_Tensorrt(int deviceId)
-        {
-            NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsAppendExecutionProvider_Tensorrt(handle, deviceId));
-        }
+        }       
 
         /// <summary>
         /// Use only if you have the onnxruntime package specific to this Execution Provider.
diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
index 3deb62bddd577..793c327685716 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -112,12 +112,6 @@ public void TestSessionOptions()
                 SetDllDirectory(null);
 
 #endif
-#if USE_OPENVINO
-                opt.AppendExecutionProvider_OpenVINO();
-#endif
-#if USE_TENSORRT
-                opt.AppendExecutionProvider_Tensorrt(0);
-#endif
 #if USE_MIGRAPHX
                 opt.AppendExecutionProvider_MIGraphX(0);
 #endif
@@ -2356,12 +2350,6 @@ private void VerifyNativeMethodsExist()
 #if USE_DML
             ,"OrtSessionOptionsAppendExecutionProvider_DML"
 #endif
-#if USE_OPENVINO
-            ,"OrtSessionOptionsAppendExecutionProvider_OpenVINO"
-#endif
-#if USE_TENSORRT
-            ,"OrtSessionOptionsAppendExecutionProvider_Tensorrt"
-#endif
 #if USE_MIGRAPHX
             ,"OrtSessionOptionsAppendExecutionProvider_MIGraphX"
 #endif
diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/OnnxMl.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/OnnxMl.cs
index 701ee8fe13853..dac978a8d7d42 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/OnnxMl.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/OnnxMl.cs
@@ -151,7 +151,7 @@ public enum Version {
     /// <summary>
     /// The version field is always serialized and we will use it to store the
     /// version that the  graph is generated from. This helps us set up version
-    /// control. 
+    /// control.
     /// For the IR, we are using simple numbers starting with 0x00000001,
     /// which was the version we published on Oct 10, 2017.
     /// </summary>
@@ -1028,7 +1028,7 @@ public void MergeFrom(pb::CodedInputStream input) {
   /// Computation graphs are made up of a DAG of nodes, which represent what is
   /// commonly called a "layer" or "pipeline stage" in machine learning frameworks.
   ///
-  /// For example, it can be a node of type "Conv" that takes in an image, a filter 
+  /// For example, it can be a node of type "Conv" that takes in an image, a filter
   /// tensor and a bias tensor, and produces the convolved output.
   /// </summary>
   public sealed partial class NodeProto : pb::IMessage<NodeProto> {
@@ -1437,7 +1437,7 @@ public TrainingInfoProto Clone() {
     ///
     /// Notice that an input of a node in the "algorithm" graph may reference the
     /// output of a node in the inference graph (but not the other way round). Also, inference
-    /// node cannot reference inputs of "algorithm". With these restrictions, inference graph 
+    /// node cannot reference inputs of "algorithm". With these restrictions, inference graph
     /// can always be run independently without training information.
     ///
     /// By default, this field is an empty graph and its evaluation does not
@@ -1459,7 +1459,7 @@ public TrainingInfoProto Clone() {
     private readonly pbc::RepeatedField<global::Onnx.StringStringEntryProto> initializationBinding_ = new pbc::RepeatedField<global::Onnx.StringStringEntryProto>();
     /// <summary>
     /// This field specifies the bindings from the outputs of "initialization" to
-    /// some initializers in "ModelProto.graph.initializer" and 
+    /// some initializers in "ModelProto.graph.initializer" and
     /// the "algorithm.initializer" in the same TrainingInfoProto.
     /// See "update_binding" below for details.
     ///
@@ -2398,7 +2398,7 @@ public void MergeFrom(pb::CodedInputStream input) {
   /// <summary>
   /// Graphs
   ///
-  /// A graph defines the computational logic of a model and is comprised of a parameterized 
+  /// A graph defines the computational logic of a model and is comprised of a parameterized
   /// list of nodes that form a directed acyclic graph based on their inputs and outputs.
   /// This is the equivalent of the "network" or "graph" in many deep learning
   /// frameworks.
@@ -4201,8 +4201,8 @@ public TypeProto Clone() {
     public const int DenotationFieldNumber = 6;
     private string denotation_ = "";
     /// <summary>
-    /// An optional denotation can be used to denote the whole 
-    /// type with a standard semantic description as to what is 
+    /// An optional denotation can be used to denote the whole
+    /// type with a standard semantic description as to what is
     /// stored inside. Refer to https://github.com/onnx/onnx/blob/master/docs/TypeDenotation.md#type-denotation-definition
     /// for pre-defined type denotations.
     /// </summary>
@@ -4970,9 +4970,9 @@ public SparseTensor Clone() {
         public const int ElemTypeFieldNumber = 1;
         private int elemType_;
         /// <summary>
-        /// This field MUST NOT have the value of UNDEFINED 
+        /// This field MUST NOT have the value of UNDEFINED
         /// This field MUST have a valid TensorProto.DataType value
-        /// This field MUST be present for this version of the IR. 
+        /// This field MUST be present for this version of the IR.
         /// </summary>
         [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
         public int ElemType {
diff --git a/csharp/tools/Microsoft.ML.OnnxRuntime.PerfTool/OnnxMl.cs b/csharp/tools/Microsoft.ML.OnnxRuntime.PerfTool/OnnxMl.cs
index 701ee8fe13853..dac978a8d7d42 100644
--- a/csharp/tools/Microsoft.ML.OnnxRuntime.PerfTool/OnnxMl.cs
+++ b/csharp/tools/Microsoft.ML.OnnxRuntime.PerfTool/OnnxMl.cs
@@ -151,7 +151,7 @@ public enum Version {
     /// <summary>
     /// The version field is always serialized and we will use it to store the
     /// version that the  graph is generated from. This helps us set up version
-    /// control. 
+    /// control.
     /// For the IR, we are using simple numbers starting with 0x00000001,
     /// which was the version we published on Oct 10, 2017.
     /// </summary>
@@ -1028,7 +1028,7 @@ public void MergeFrom(pb::CodedInputStream input) {
   /// Computation graphs are made up of a DAG of nodes, which represent what is
   /// commonly called a "layer" or "pipeline stage" in machine learning frameworks.
   ///
-  /// For example, it can be a node of type "Conv" that takes in an image, a filter 
+  /// For example, it can be a node of type "Conv" that takes in an image, a filter
   /// tensor and a bias tensor, and produces the convolved output.
   /// </summary>
   public sealed partial class NodeProto : pb::IMessage<NodeProto> {
@@ -1437,7 +1437,7 @@ public TrainingInfoProto Clone() {
     ///
     /// Notice that an input of a node in the "algorithm" graph may reference the
     /// output of a node in the inference graph (but not the other way round). Also, inference
-    /// node cannot reference inputs of "algorithm". With these restrictions, inference graph 
+    /// node cannot reference inputs of "algorithm". With these restrictions, inference graph
     /// can always be run independently without training information.
     ///
     /// By default, this field is an empty graph and its evaluation does not
@@ -1459,7 +1459,7 @@ public TrainingInfoProto Clone() {
     private readonly pbc::RepeatedField<global::Onnx.StringStringEntryProto> initializationBinding_ = new pbc::RepeatedField<global::Onnx.StringStringEntryProto>();
     /// <summary>
     /// This field specifies the bindings from the outputs of "initialization" to
-    /// some initializers in "ModelProto.graph.initializer" and 
+    /// some initializers in "ModelProto.graph.initializer" and
     /// the "algorithm.initializer" in the same TrainingInfoProto.
     /// See "update_binding" below for details.
     ///
@@ -2398,7 +2398,7 @@ public void MergeFrom(pb::CodedInputStream input) {
   /// <summary>
   /// Graphs
   ///
-  /// A graph defines the computational logic of a model and is comprised of a parameterized 
+  /// A graph defines the computational logic of a model and is comprised of a parameterized
   /// list of nodes that form a directed acyclic graph based on their inputs and outputs.
   /// This is the equivalent of the "network" or "graph" in many deep learning
   /// frameworks.
@@ -4201,8 +4201,8 @@ public TypeProto Clone() {
     public const int DenotationFieldNumber = 6;
     private string denotation_ = "";
     /// <summary>
-    /// An optional denotation can be used to denote the whole 
-    /// type with a standard semantic description as to what is 
+    /// An optional denotation can be used to denote the whole
+    /// type with a standard semantic description as to what is
     /// stored inside. Refer to https://github.com/onnx/onnx/blob/master/docs/TypeDenotation.md#type-denotation-definition
     /// for pre-defined type denotations.
     /// </summary>
@@ -4970,9 +4970,9 @@ public SparseTensor Clone() {
         public const int ElemTypeFieldNumber = 1;
         private int elemType_;
         /// <summary>
-        /// This field MUST NOT have the value of UNDEFINED 
+        /// This field MUST NOT have the value of UNDEFINED
         /// This field MUST have a valid TensorProto.DataType value
-        /// This field MUST be present for this version of the IR. 
+        /// This field MUST be present for this version of the IR.
         /// </summary>
         [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
         public int ElemType {
diff --git a/docs/ContribOperators.md b/docs/ContribOperators.md
index c066e4bd57c5e..aecdaefd26cb7 100644
--- a/docs/ContribOperators.md
+++ b/docs/ContribOperators.md
@@ -56,6 +56,7 @@
   * <a href="#com.microsoft.Trilu">com.microsoft.Trilu</a>
   * <a href="#com.microsoft.Unique">com.microsoft.Unique</a>
   * <a href="#com.microsoft.WordConvEmbedding">com.microsoft.WordConvEmbedding</a>
+  * <sub>experimental</sub> <a href="#com.microsoft.IsAllFinite">com.microsoft.IsAllFinite</a>
 * com.microsoft.nchwc
   * <a href="#com.microsoft.nchwc.AveragePool">com.microsoft.nchwc.AveragePool</a>
   * <a href="#com.microsoft.nchwc.Conv">com.microsoft.nchwc.Conv</a>
@@ -2673,6 +2674,46 @@ This version of the operator has been available since version 1 of the 'com.micr
 </dl>
 
 
+### <sub>experimental</sub> <a name="com.microsoft.IsAllFinite"></a><a name="com.microsoft.isallfinite">**com.microsoft.IsAllFinite**</a>
+
+  IsAllFinite
+
+#### Version
+
+No versioning maintained for experimental ops.
+#### Attributes
+
+<dl>
+<dt><tt>isinf_only</tt> : int</dt>
+<dd>If true, check only for Inf, -Inf.</dd>
+<dt><tt>isnan_only</tt> : int</dt>
+<dd>If true, check only for NaN.</dd>
+</dl>
+
+#### Inputs (1 - &#8734;)
+
+<dl>
+<dt><tt>input</tt> (variadic) : V</dt>
+<dd>Input tensors to check.</dd>
+</dl>
+
+#### Outputs
+
+<dl>
+<dt><tt>output</tt> : T</dt>
+<dd>The output scalar. Its value is true if all input tensors are finite. Otherwise, the output value would be false.</dd>
+</dl>
+
+#### Type Constraints
+
+<dl>
+<dt><tt>V</tt> : tensor(float16), tensor(float), tensor(double), tensor(bfloat16)</dt>
+<dd>Constrain input and output types to float tensors.</dd>
+<dt><tt>T</tt> : tensor(bool)</dt>
+<dd>Constrain the output to a boolean tensor.</dd>
+</dl>
+
+
 ## com.microsoft.nchwc
 ### <a name="com.microsoft.nchwc.AveragePool"></a><a name="com.microsoft.nchwc.averagepool">**com.microsoft.nchwc.AveragePool**</a>
 
diff --git a/include/onnxruntime/core/platform/EigenNonBlockingThreadPool.h b/include/onnxruntime/core/platform/EigenNonBlockingThreadPool.h
index 12784dbd258db..e36bfbe6e4a9a 100644
--- a/include/onnxruntime/core/platform/EigenNonBlockingThreadPool.h
+++ b/include/onnxruntime/core/platform/EigenNonBlockingThreadPool.h
@@ -715,9 +715,7 @@ class ThreadPoolTempl : public onnxruntime::concurrency::ExtendedThreadPoolInter
         worker_data_(num_threads),
         all_coprimes_(num_threads),
         blocked_(0),
-        done_(false),
-        cancelled_(false) {
-
+        done_(false) {
     // Calculate coprimes of all numbers [1, num_threads].
     // Coprimes are used for random walks over all threads in Steal
     // and NonEmptyQueueIndex. Iteration is based on the fact that if we take
@@ -748,15 +746,7 @@ class ThreadPoolTempl : public onnxruntime::concurrency::ExtendedThreadPoolInter
     // Now if all threads block without work, they will start exiting.
     // But note that threads can continue to work arbitrary long,
     // block, submit new work, unblock and otherwise live full life.
-    if (!cancelled_) {
-      WakeAllWorkersForExit();
-    } else {
-      // Since we were cancelled, there might be entries in the queues.
-      // Empty them to prevent their destructor from asserting.
-      for (size_t i = 0; i < worker_data_.size(); i++) {
-        worker_data_[i].queue.Flush();
-      }
-    }
+    WakeAllWorkersForExit();
     // Join threads explicitly (by destroying) to avoid destruction order within
     // this class.
     for (size_t i = 0; i < worker_data_.size(); ++i) worker_data_[i].thread.reset();
@@ -1104,22 +1094,6 @@ void RunInParallel(std::function<void(unsigned idx)> fn, unsigned n, std::ptrdif
   profiler_.LogEnd(ThreadPoolProfiler::WAIT);
 }
 
-void Cancel() override {
-  cancelled_ = true;
-  // If done_ is true, which means this object is being destructing.
-  // Therefore worker_data_[i].thread could be NULL.
-  if (!done_) {
-    done_ = true;
-    // Let each thread know it's been cancelled.
-    for (size_t i = 0; i < worker_data_.size(); i++) {
-      assert(worker_data_[i].thread != nullptr);
-      worker_data_[i].thread->OnCancel();
-    }
-  }
-
-  // Wake up the threads without work to let them exit on their own.
-  WakeAllWorkersForExit();
-}
 
 int NumThreads() const EIGEN_FINAL {
   return num_threads_;
@@ -1290,7 +1264,6 @@ int CurrentThreadId() const EIGEN_FINAL {
   Eigen::MaxSizeVector<Eigen::MaxSizeVector<unsigned>> all_coprimes_;
   std::atomic<unsigned> blocked_;  // Count of blocked workers, used as a termination condition
   std::atomic<bool> done_;
-  std::atomic<bool> cancelled_;
 
   // Allow control over how many bits to use in each entry in good_worker_hints_.
   // We reduce this below the full 64-bit word size for two reasons.  First, it
@@ -1302,8 +1275,7 @@ int CurrentThreadId() const EIGEN_FINAL {
   unsigned num_hint_words_;
   std::unique_ptr<std::atomic<uint64_t>[]> good_worker_hints_;
 
-  // Wake any blocked workers so that they can cleanly exit WorkerLoop().  For an
-  // abrupt exit, cancelled_==true and threads will exit their worker loops.  For
+  // Wake any blocked workers so that they can cleanly exit WorkerLoop().  For
   // a clean exit, each thread will observe (1) done_ set, indicating that the
   // destructor has been called, (2) all threads blocked, and (3) no
   // items in the work queues.
@@ -1334,82 +1306,80 @@ int CurrentThreadId() const EIGEN_FINAL {
     SetDenormalAsZero(set_denormal_as_zero_);
     profiler_.LogThreadId(thread_id);
 
-    while (!cancelled_ && !should_exit) {
-        Task t = q.PopFront();
+    while (!should_exit) {
+      Task t = q.PopFront();
+      if (!t) {
+        // Spin waiting for work.  We indicate, via SetGOodWorkerHint that we are
+        // spinning.  This will bias other threads toward pushing work to our queue.
+        // In addition, priodically make a best-effort attempt to steal from other
+        // threads which are not themselves spinning.
+
+        SetGoodWorkerHint(thread_id, true);
+        for (int i = 0; i < spin_count && !t && !done_; i++) {
+          t = ((i + 1) % steal_count == 0) ? TrySteal() : q.PopFront();
+          onnxruntime::concurrency::SpinPause();
+        }
+        SetGoodWorkerHint(thread_id, false);
+
         if (!t) {
-          // Spin waiting for work.  We indicate, via SetGOodWorkerHint that we are
-          // spinning.  This will bias other threads toward pushing work to our queue.
-          // In addition, priodically make a best-effort attempt to steal from other
-          // threads which are not themselves spinning.
-
-          SetGoodWorkerHint(thread_id, true);
-          for (int i = 0; i < spin_count && !t && !cancelled_ && !done_; i++) {
-            t = ((i + 1) % steal_count == 0) ? TrySteal() : q.PopFront();
-            onnxruntime::concurrency::SpinPause();
+          // No work passed to us while spinning; make a further full attempt to
+          // steal work from other threads prior to blocking.
+          if (num_threads_ != 1) {
+            t = Steal(true /* true => check all queues */);
           }
-          SetGoodWorkerHint(thread_id, false);
-
           if (!t) {
-            // No work passed to us while spinning; make a further full attempt to
-            // steal work from other threads prior to blocking.
-            if (num_threads_ != 1) {
-              t = Steal(true /* true => check all queues */);
-            }
-            if (!t) {
-              td.SetBlocked(
-                  // Pre-block test
-                  [&]() -> bool {
-                    bool should_block = true;
-                    // We already did a best-effort emptiness check when stealing; now
-                    // do a full check prior to blocking.
-                    int victim = NonEmptyQueueIndex();
-                    if (victim != -1) {
+            td.SetBlocked(
+                // Pre-block test
+                [&]() -> bool {
+                  bool should_block = true;
+                  // We already did a best-effort emptiness check when stealing; now
+                  // do a full check prior to blocking.
+                  int victim = NonEmptyQueueIndex();
+                  if (victim != -1) {
+                    should_block = false;
+                    t = worker_data_[victim].queue.PopBack();
+                  }
+                  // Number of blocked threads is used as termination condition.
+                  // If we are shutting down and all worker threads blocked without work,
+                  // that's we are done.
+                  if (should_block) {
+                    blocked_++;
+                    if (done_ && blocked_ == static_cast<unsigned>(num_threads_)) {
                       should_block = false;
-                      if (!cancelled_) {
-                        t = worker_data_[victim].queue.PopBack();
+                      // Almost done, but need to re-check queues.
+                      // Consider that all queues are empty and all worker threads are preempted
+                      // right after incrementing blocked_ above. Now a free-standing thread
+                      // submits work and calls destructor (which sets done_). If we don't
+                      // re-check queues, we will exit leaving the work unexecuted.
+                      if (NonEmptyQueueIndex() != -1) {
+                        // Note: we must not pop from queues before we decrement blocked_,
+                        // otherwise the following scenario is possible. Consider that instead
+                        // of checking for emptiness we popped the only element from queues.
+                        // Now other worker threads can start exiting, which is bad if the
+                        // work item submits other work. So we just check emptiness here,
+                        // which ensures that all worker threads exit at the same time.
+                        blocked_--;
+                      } else {
+                        should_exit = true;
                       }
                     }
-                    // Number of blocked threads is used as termination condition.
-                    // If we are shutting down and all worker threads blocked without work,
-                    // that's we are done.
-                    if (should_block) {
-                      blocked_++;
-                      if (done_ && blocked_ == static_cast<unsigned>(num_threads_)) {
-                        should_block = false;
-                        // Almost done, but need to re-check queues.
-                        // Consider that all queues are empty and all worker threads are preempted
-                        // right after incrementing blocked_ above. Now a free-standing thread
-                        // submits work and calls destructor (which sets done_). If we don't
-                        // re-check queues, we will exit leaving the work unexecuted.
-                        if (NonEmptyQueueIndex() != -1) {
-                          // Note: we must not pop from queues before we decrement blocked_,
-                          // otherwise the following scenario is possible. Consider that instead
-                          // of checking for emptiness we popped the only element from queues.
-                          // Now other worker threads can start exiting, which is bad if the
-                          // work item submits other work. So we just check emptiness here,
-                          // which ensures that all worker threads exit at the same time.
-                          blocked_--;
-                        } else {
-                          should_exit = true;
-                        }
-                      }
-                    }
-                    return should_block;
-                  },
-                  // Post-block update (executed only if we blocked)
-                  [&]() {
-                    blocked_--;
-                  });
-            }
+                  }
+                  return should_block;
+                },
+                // Post-block update (executed only if we blocked)
+                [&]() {
+                  blocked_--;
+                });
           }
         }
-        if (t) {
-          td.SetActive();
-          t();
-          profiler_.LogRun(thread_id);
-          td.SetSpinning();
-        }
       }
+      if (t) {
+        td.SetActive();
+        t();
+        profiler_.LogRun(thread_id);
+        td.SetSpinning();
+      }
+    }
 
       // Whichever thread(s) observe the termination conditions are responsible for waking
       // any other threads that have remained blocked.
diff --git a/include/onnxruntime/core/providers/openvino/openvino_provider_factory.h b/include/onnxruntime/core/providers/openvino/openvino_provider_factory.h
index 7646644c688ab..6f94da7f37eb6 100644
--- a/include/onnxruntime/core/providers/openvino/openvino_provider_factory.h
+++ b/include/onnxruntime/core/providers/openvino/openvino_provider_factory.h
@@ -2,22 +2,10 @@
 // Licensed under the MIT License
 
 #include "onnxruntime_c_api.h"
-
 #ifdef __cplusplus
+#include <vector>
+#include <string>
 struct ProviderInfo_OpenVINO {
   virtual std::vector<std::string> GetAvailableDevices() const = 0;
 };
-
-extern "C" {
-#endif
-
-/**
- * \param device_type openvino device type and precision. Could be any of
- * CPU_FP32, GPU_FP32, GPU_FP16, MYRIAD_FP16, VAD-M_FP16 or VAD-F_FP32.
- */
-ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_OpenVINO,
-               _In_ OrtSessionOptions* options, _In_ const char* device_type);
-
-#ifdef __cplusplus
-}
 #endif
diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
deleted file mode 100644
index 44debc901cb77..0000000000000
--- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-#include "onnxruntime_c_api.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtSessionOptions* options, int device_id);
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/java/src/main/java/ai/onnxruntime/OrtSession.java b/java/src/main/java/ai/onnxruntime/OrtSession.java
index 286159b20730e..2f974d0498b10 100644
--- a/java/src/main/java/ai/onnxruntime/OrtSession.java
+++ b/java/src/main/java/ai/onnxruntime/OrtSession.java
@@ -790,28 +790,6 @@ public void addDnnl(boolean useArena) throws OrtException {
       addDnnl(OnnxRuntime.ortApiHandle, nativeHandle, useArena ? 1 : 0);
     }
 
-    /**
-     * Adds OpenVINO as an execution backend.
-     *
-     * @param deviceId The id of the OpenVINO execution device.
-     * @throws OrtException If there was an error in native code.
-     */
-    public void addOpenVINO(String deviceId) throws OrtException {
-      checkClosed();
-      addOpenVINO(OnnxRuntime.ortApiHandle, nativeHandle, deviceId);
-    }
-
-    /**
-     * Adds Nvidia's TensorRT as an execution backend.
-     *
-     * @param deviceNum The id of the CUDA device.
-     * @throws OrtException If there was an error in native code.
-     */
-    public void addTensorrt(int deviceNum) throws OrtException {
-      checkClosed();
-      addTensorrt(OnnxRuntime.ortApiHandle, nativeHandle, deviceNum);
-    }
-
     /**
      * Adds Android's NNAPI as an execution backend. Uses the default empty flag.
      *
@@ -984,12 +962,6 @@ private native void addCUDA(long apiHandle, long nativeHandle, int deviceNum)
     private native void addDnnl(long apiHandle, long nativeHandle, int useArena)
         throws OrtException;
 
-    private native void addOpenVINO(long apiHandle, long nativeHandle, String deviceId)
-        throws OrtException;
-
-    private native void addTensorrt(long apiHandle, long nativeHandle, int deviceNum)
-        throws OrtException;
-
     private native void addNnapi(long apiHandle, long nativeHandle, int nnapiFlags)
         throws OrtException;
 
diff --git a/java/src/main/native/OrtJniUtil.c b/java/src/main/native/OrtJniUtil.c
index 7e5229ee69a41..dd42a63f0cba3 100644
--- a/java/src/main/native/OrtJniUtil.c
+++ b/java/src/main/native/OrtJniUtil.c
@@ -507,6 +507,7 @@ size_t copyJavaToTensor(JNIEnv *jniEnv, ONNXTensorElementDataType onnxType, uint
 
 size_t copyPrimitiveArrayToJava(JNIEnv *jniEnv, ONNXTensorElementDataType onnxType, uint8_t* tensor, jarray output) {
     uint32_t outputLength = (*jniEnv)->GetArrayLength(jniEnv,output);
+    if (outputLength == 0) return 0;
     size_t consumedSize = outputLength * onnxTypeSize(onnxType);
     switch (onnxType) {
         case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8:   // maps to c type uint8_t
@@ -534,7 +535,10 @@ size_t copyPrimitiveArrayToJava(JNIEnv *jniEnv, ONNXTensorElementDataType onnxTy
             return consumedSize;
         }
         case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16: { // stored as a uint16_t
-            float *floatArr = malloc(sizeof(float) * outputLength);
+            jfloat *floatArr = malloc(sizeof(jfloat) * outputLength);
+            if(floatArr == NULL) {
+                throwOrtException(jniEnv, 1, "Not enough memory");
+            }
             uint16_t *halfArr = (uint16_t *) tensor;
             for (uint32_t i = 0; i < outputLength; i++) {
                 floatArr[i] = convertHalfToFloat(halfArr[i]);
@@ -647,6 +651,7 @@ void copyStringTensorToArray(JNIEnv *jniEnv, const OrtApi * api, OrtAllocator* a
             checkOrtStatus(jniEnv,api,api->AllocatorAlloc(allocator,curSize,(void**)&tempBuffer));
             bufferSize = curSize;
         }
+        if(tempBuffer == NULL) throwOrtException(jniEnv, 1, "Not enough memory");
         memcpy(tempBuffer,characterBuffer+offsets[i],curSize);
         tempBuffer[curSize-1] = '\0';
         jobject tempString = (*jniEnv)->NewStringUTF(jniEnv,tempBuffer);
@@ -991,29 +996,29 @@ jint throwOrtException(JNIEnv *jniEnv, int messageId, const char *message) {
 jint convertErrorCode(OrtErrorCode code) {
     switch (code) {
         case ORT_OK:
-			return 0;
+            return 0;
         case ORT_FAIL:
-			return 1;
+            return 1;
         case ORT_INVALID_ARGUMENT:
-			return 2;
+            return 2;
         case ORT_NO_SUCHFILE:
-			return 3;
+            return 3;
         case ORT_NO_MODEL:
-			return 4;
+            return 4;
         case ORT_ENGINE_ERROR:
-			return 5;
+            return 5;
         case ORT_RUNTIME_EXCEPTION:
-			return 6;
+            return 6;
         case ORT_INVALID_PROTOBUF:
-			return 7;
+            return 7;
         case ORT_MODEL_LOADED:
-			return 8;
+            return 8;
         case ORT_NOT_IMPLEMENTED:
-			return 9;
+            return 9;
         case ORT_INVALID_GRAPH:
-			return 10;
+            return 10;
         case ORT_EP_FAIL:
-			return 11;
+            return 11;
         default:
             return -1; // Unknown error code
     }
@@ -1024,6 +1029,9 @@ void checkOrtStatus(JNIEnv *jniEnv, const OrtApi * api, OrtStatus * status) {
         const char* message = api->GetErrorMessage(status);
         size_t len = strlen(message)+1;
         char* copy = malloc(sizeof(char)*len);
+        if (copy == NULL) {
+          throwOrtException(jniEnv, 1, "Not enough memory");
+        }
         memcpy(copy,message,len);
         int messageId = convertErrorCode(api->GetErrorCode(status));
         api->ReleaseStatus(status);
diff --git a/java/src/main/native/ai_onnxruntime_OrtSession.c b/java/src/main/native/ai_onnxruntime_OrtSession.c
index b439c8542e2fd..b7a8ce03a868b 100644
--- a/java/src/main/native/ai_onnxruntime_OrtSession.c
+++ b/java/src/main/native/ai_onnxruntime_OrtSession.c
@@ -23,6 +23,7 @@ JNIEXPORT jlong JNICALL Java_ai_onnxruntime_OrtSession_createSession__JJLjava_la
     const jchar* cPath = (*jniEnv)->GetStringChars(jniEnv, modelPath, NULL);
     size_t stringLength = (*jniEnv)->GetStringLength(jniEnv, modelPath);
     wchar_t* newString = (wchar_t*)calloc(stringLength+1,sizeof(jchar));
+    if(newString == NULL) throwOrtException(jniEnv, 1, "Not enough memory");
     wcsncpy_s(newString, stringLength+1, (const wchar_t*) cPath, stringLength);
     checkOrtStatus(jniEnv,api,api->CreateSession((OrtEnv*)envHandle, (const wchar_t*)newString, (OrtSessionOptions*)optsHandle, &session));
     free(newString);
diff --git a/java/src/main/native/ai_onnxruntime_OrtSession_SessionOptions.c b/java/src/main/native/ai_onnxruntime_OrtSession_SessionOptions.c
index fc5cab8c8006c..35021c0139734 100644
--- a/java/src/main/native/ai_onnxruntime_OrtSession_SessionOptions.c
+++ b/java/src/main/native/ai_onnxruntime_OrtSession_SessionOptions.c
@@ -19,14 +19,12 @@
 #include "onnxruntime/core/providers/dnnl/dnnl_provider_factory.h"
 #include "onnxruntime/core/providers/nnapi/nnapi_provider_factory.h"
 #include "onnxruntime/core/providers/nuphar/nuphar_provider_factory.h"
-#include "onnxruntime/core/providers/openvino/openvino_provider_factory.h"
-#include "onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h"
 #include "onnxruntime/core/providers/migraphx/migraphx_provider_factory.h"
 #include "onnxruntime/core/providers/acl/acl_provider_factory.h"
 #include "onnxruntime/core/providers/armnn/armnn_provider_factory.h"
 #include "onnxruntime/core/providers/coreml/coreml_provider_factory.h"
 #include "onnxruntime/core/providers/rocm/rocm_provider_factory.h"
-#ifdef USE_DIRECTML
+#ifdef USE_DML
 #include "onnxruntime/core/providers/dml/dml_provider_factory.h"
 #endif
 
@@ -92,6 +90,7 @@ JNIEXPORT void JNICALL Java_ai_onnxruntime_OrtSession_00024SessionOptions_setOpt
     const jchar* path = (*jniEnv)->GetStringChars(jniEnv, pathString, NULL);
     size_t stringLength = (*jniEnv)->GetStringLength(jniEnv, pathString);
     wchar_t* newString = (wchar_t*)calloc(stringLength+1,sizeof(jchar));
+    if(newString == NULL) throwOrtException(jniEnv, 1, "Not enough memory");
     wcsncpy_s(newString, stringLength+1, (const wchar_t*) path, stringLength);
     checkOrtStatus(jniEnv,(const OrtApi*)apiHandle,api->SetOptimizedModelFilePath((OrtSessionOptions*) handle, (const wchar_t*) newString));
     free(newString);
@@ -162,6 +161,7 @@ JNIEXPORT void JNICALL Java_ai_onnxruntime_OrtSession_00024SessionOptions_enable
   const jchar* path = (*jniEnv)->GetStringChars(jniEnv, pathString, NULL);
   size_t stringLength = (*jniEnv)->GetStringLength(jniEnv, pathString);
   wchar_t* newString = (wchar_t*)calloc(stringLength+1,sizeof(jchar));
+  if(newString == NULL) throwOrtException(jniEnv, 1, "Not enough memory");
   wcsncpy_s(newString, stringLength+1, (const wchar_t*) path, stringLength);
   checkOrtStatus(jniEnv,(const OrtApi*)apiHandle,api->EnableProfiling(options, (const wchar_t*) newString));
   free(newString);
@@ -390,40 +390,6 @@ JNIEXPORT void JNICALL Java_ai_onnxruntime_OrtSession_00024SessionOptions_addDnn
   #endif
 }
 
-/*
- * Class:     ai_onnxruntime_OrtSession_SessionOptions
- * Method:    addOpenVINO
- * Signature: (JJLjava/lang/String;)V
- */
-JNIEXPORT void JNICALL Java_ai_onnxruntime_OrtSession_00024SessionOptions_addOpenVINO
-  (JNIEnv * jniEnv, jobject jobj, jlong apiHandle, jlong handle, jstring deviceIDString) {
-    (void)jobj;
-  #ifdef USE_OPENVINO
-    const char* deviceID = (*jniEnv)->GetStringUTFChars(jniEnv, deviceIDString, NULL);
-    checkOrtStatus(jniEnv,(const OrtApi*)apiHandle,OrtSessionOptionsAppendExecutionProvider_OpenVINO((OrtSessionOptions*) handle, deviceID));
-    (*jniEnv)->ReleaseStringUTFChars(jniEnv,deviceIDString,deviceID);
-  #else
-    (void)apiHandle;(void)handle;(void)deviceIDString; // Parameters used when OpenVINO is defined.
-    throwOrtException(jniEnv,convertErrorCode(ORT_INVALID_ARGUMENT),"This binary was not compiled with OpenVINO support.");
-  #endif
-}
-
-/*
- * Class:     ai_onnxruntime_OrtSession_SessionOptions
- * Method:    addTensorrt
- * Signature: (JJI)V
- */
-JNIEXPORT void JNICALL Java_ai_onnxruntime_OrtSession_00024SessionOptions_addTensorrt
-  (JNIEnv * jniEnv, jobject jobj, jlong apiHandle, jlong handle, jint deviceNum) {
-    (void)jobj;
-  #ifdef USE_TENSORRT
-    checkOrtStatus(jniEnv,(const OrtApi*)apiHandle,OrtSessionOptionsAppendExecutionProvider_Tensorrt((OrtSessionOptions*) handle, deviceNum));
-  #else
-    (void)apiHandle;(void)handle;(void)deviceNum; // Parameters used when TensorRT is defined.
-    throwOrtException(jniEnv,convertErrorCode(ORT_INVALID_ARGUMENT),"This binary was not compiled with TensorRT support.");
-  #endif
-}
-
 /*
  * Class:     ai_onnxruntime_OrtSession_SessionOptions
  * Method:    addNnapi
diff --git a/onnxruntime/core/framework/provider_bridge_ort.cc b/onnxruntime/core/framework/provider_bridge_ort.cc
index 4943d11c27ca7..1d56c374bba7d 100644
--- a/onnxruntime/core/framework/provider_bridge_ort.cc
+++ b/onnxruntime/core/framework/provider_bridge_ort.cc
@@ -17,7 +17,7 @@
 #include "core/session/inference_session.h"
 #include "core/session/abi_session_options_impl.h"
 #include "core/session/ort_apis.h"
-
+#include "core/providers/openvino/openvino_provider_factory.h"
 #ifdef USE_TENSORRT
 #include "core/providers/cuda/cuda_allocator.h"
 #include "core/providers/cuda/gpu_data_transfer.h"
@@ -43,8 +43,6 @@ using IndexedSubGraph_MetaDef = IndexedSubGraph::MetaDef;
 #include "core/providers/shared_library/provider_interfaces.h"
 
 #include "core/providers/dnnl/dnnl_provider_factory.h"
-#include "core/providers/tensorrt/tensorrt_provider_factory.h"
-#include "core/providers/openvino/openvino_provider_factory.h"
 
 // The filename extension for a shared library is different per platform
 #ifdef _WIN32
@@ -693,16 +691,6 @@ ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_Dnnl, _In_ OrtSessi
   return nullptr;
 }
 
-ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtSessionOptions* options, int device_id) {
-  auto factory = onnxruntime::CreateExecutionProviderFactory_Tensorrt(device_id);
-  if (!factory) {
-    return OrtApis::CreateStatus(ORT_FAIL, "OrtSessionOptionsAppendExecutionProvider_Tensorrt: Failed to load shared library");
-  }
-
-  options->provider_factories.push_back(factory);
-  return nullptr;
-}
-
 ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In_ OrtSessionOptions* options, _In_ const OrtTensorRTProviderOptions* tensorrt_options) {
   auto factory = onnxruntime::CreateExecutionProviderFactory_Tensorrt(tensorrt_options);
   if (!factory) {
@@ -722,15 +710,3 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_OpenVINO, _In
   options->provider_factories.push_back(factory);
   return nullptr;
 }
-
-ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_OpenVINO, _In_ OrtSessionOptions* options, _In_ const char* device_type) {
-  OrtOpenVINOProviderOptions provider_options;
-  provider_options.device_type = device_type;
-  auto factory = onnxruntime::CreateExecutionProviderFactory_OpenVINO(&provider_options);
-  if (!factory) {
-    return OrtApis::CreateStatus(ORT_FAIL, "OrtSessionOptionsAppendExecutionProvider_OpenVINO: Failed to load shared library");
-  }
-
-  options->provider_factories.push_back(factory);
-  return nullptr;
-}
diff --git a/onnxruntime/core/platform/env.h b/onnxruntime/core/platform/env.h
index 268f729acf811..8f412dd84ae69 100644
--- a/onnxruntime/core/platform/env.h
+++ b/onnxruntime/core/platform/env.h
@@ -48,7 +48,6 @@ using FileOffsetType = off_t;
 
 class EnvThread {
  public:
-  virtual void OnCancel() = 0;
   virtual ~EnvThread() = default;
 };
 
diff --git a/onnxruntime/core/platform/posix/env.cc b/onnxruntime/core/platform/posix/env.cc
index 4714d735151ca..e48a364c354ad 100644
--- a/onnxruntime/core/platform/posix/env.cc
+++ b/onnxruntime/core/platform/posix/env.cc
@@ -154,11 +154,6 @@ class PosixThread : public EnvThread {
 #endif
   }
 
-  // This function is called when the threadpool is cancelled.
-  // TODO: Find a way to avoid calling TerminateThread
-  void OnCancel() override {
-  }
-
  private:
   static void* ThreadMain(void* param) {
     std::unique_ptr<Param> p((Param*)param);
@@ -167,7 +162,7 @@ class PosixThread : public EnvThread {
       p->start_address(p->index, p->param);
     }
     ORT_CATCH(const std::exception&) {
-      p->param->Cancel();
+      //ignore any exceptions
     }
     return nullptr;
   }
diff --git a/onnxruntime/core/platform/windows/debug_alloc.cc b/onnxruntime/core/platform/windows/debug_alloc.cc
index d8f1d092ec820..c459645b0423d 100644
--- a/onnxruntime/core/platform/windows/debug_alloc.cc
+++ b/onnxruntime/core/platform/windows/debug_alloc.cc
@@ -77,7 +77,7 @@ struct SymbolHelper {
       return;
     }
 
-    _snprintf_s(buffer, _TRUNCATE, "%s(%d): %s", line.FileName, line.LineNumber, symbol.Name);
+    _snprintf_s(buffer, _TRUNCATE, "%s(%d): %s", line.FileName, static_cast<int>(line.LineNumber), symbol.Name);
     string.append(buffer);
   }
 
@@ -233,7 +233,7 @@ Memory_LeakCheck::~Memory_LeakCheck() {
 
     std::string string;
     char buffer[1024];
-    _snprintf_s(buffer, _TRUNCATE, "%d bytes of memory leaked in %d allocations", leaked_bytes, leak_count);
+    _snprintf_s(buffer, _TRUNCATE, "%d bytes of memory leaked in %d allocations", static_cast<int>(leaked_bytes), static_cast<int>(leak_count));
     string.append(buffer);
 
     std::cout << "\n----- MEMORY LEAKS: " << string.c_str() << "\n";
diff --git a/onnxruntime/core/platform/windows/env.cc b/onnxruntime/core/platform/windows/env.cc
index ef9d8920c1fb1..f33be154443e6 100644
--- a/onnxruntime/core/platform/windows/env.cc
+++ b/onnxruntime/core/platform/windows/env.cc
@@ -64,16 +64,12 @@ class WindowsThread : public EnvThread {
     FAIL_FAST_LAST_ERROR_IF(waitStatus == WAIT_FAILED);
   }
 
-  // This function is called when the threadpool is cancelled.
-  // TODO: Find a way to avoid calling TerminateThread
-  void OnCancel() {
-#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
-    TerminateThread(hThread.get(), 1);
-#endif
-  }
 
  private:
   typedef HRESULT(WINAPI* SetThreadDescriptionFunc)(HANDLE hThread, PCWSTR lpThreadDescription);
+
+#pragma warning(push)
+#pragma warning(disable : 6387)
   static unsigned __stdcall ThreadMain(void* param) {
     std::unique_ptr<Param> p((Param*)param);
     // TODO: should I try to use SetThreadSelectedCpuSets?
@@ -82,9 +78,11 @@ class WindowsThread : public EnvThread {
 #if WINVER >= _WIN32_WINNT_WIN10
     constexpr SetThreadDescriptionFunc pSetThrDesc = SetThreadDescription;
 #elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+    HMODULE kernelModule = GetModuleHandle(TEXT("kernel32.dll"));
     // kernel32.dll is always loaded
+    assert(kernelModule != nullptr);
     auto pSetThrDesc =
-        (SetThreadDescriptionFunc)GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "SetThreadDescription");
+        (SetThreadDescriptionFunc)GetProcAddress(kernelModule, "SetThreadDescription");
 #else
     constexpr SetThreadDescriptionFunc pSetThrDesc = nullptr;
 #endif
@@ -107,6 +105,8 @@ class WindowsThread : public EnvThread {
     }
     return ret;
   }
+#pragma warning(pop)
+
   unsigned threadID = 0;
   wil::unique_handle hThread;
 };
diff --git a/onnxruntime/core/providers/cuda/tensor/resize_impl.cu b/onnxruntime/core/providers/cuda/tensor/resize_impl.cu
index a73a3e75980a9..73728f5182959 100644
--- a/onnxruntime/core/providers/cuda/tensor/resize_impl.cu
+++ b/onnxruntime/core/providers/cuda/tensor/resize_impl.cu
@@ -782,9 +782,8 @@ void ResizeImpl(
             reinterpret_cast<LinearMappingInfo*>(dims_mapping));
         return;
       }
-
+      ORT_THROW("Only bilinear/trilinear and bicubic modes are supported in Resize");
       break;
-
     case UpsampleMode::CUBIC:
       if (is_2D) {
         _ResizeCubicCoordinateMapping<T><<<blocksPerDimsMappingGrid, 32, 0, stream>>>(
@@ -804,6 +803,9 @@ void ResizeImpl(
             reinterpret_cast<CubicMappingInfo*>(dims_mapping));
         return;
       }
+      ORT_THROW("Only bilinear/trilinear and bicubic modes are supported in Resize");
+    case UpsampleMode::NN:
+      ORT_THROW("Only bilinear/trilinear and bicubic modes are supported in Resize");
   }
 }
 
diff --git a/onnxruntime/core/providers/cuda/tensor/where_impl.cu b/onnxruntime/core/providers/cuda/tensor/where_impl.cu
index 91d143ddc2053..440a805d9a9b9 100644
--- a/onnxruntime/core/providers/cuda/tensor/where_impl.cu
+++ b/onnxruntime/core/providers/cuda/tensor/where_impl.cu
@@ -1,6 +1,10 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#ifdef __GNUC__
+#include "onnxruntime_config.h"
+#pragma GCC diagnostic ignored "-Wswitch"
+#endif
 #include <stdint.h>
 #include "core/providers/cuda/shared_inc/cuda_utils.h"
 #include "core/providers/cuda/cu_inc/common.cuh"
diff --git a/onnxruntime/core/providers/dnnl/symbols.def b/onnxruntime/core/providers/dnnl/symbols.def
index 4ec2f7914c208..ab8ff7bbd15b5 100644
--- a/onnxruntime/core/providers/dnnl/symbols.def
+++ b/onnxruntime/core/providers/dnnl/symbols.def
@@ -1,2 +1,2 @@
 EXPORTS
-   GetProvider
+   GetProvider PRIVATE
diff --git a/onnxruntime/core/providers/openvino/symbols.def b/onnxruntime/core/providers/openvino/symbols.def
index 4ec2f7914c208..ab8ff7bbd15b5 100644
--- a/onnxruntime/core/providers/openvino/symbols.def
+++ b/onnxruntime/core/providers/openvino/symbols.def
@@ -1,2 +1,2 @@
 EXPORTS
-   GetProvider
+   GetProvider PRIVATE
diff --git a/onnxruntime/core/providers/openvino/symbols.txt b/onnxruntime/core/providers/openvino/symbols.txt
deleted file mode 100644
index 86725efe5be89..0000000000000
--- a/onnxruntime/core/providers/openvino/symbols.txt
+++ /dev/null
@@ -1 +0,0 @@
-OrtSessionOptionsAppendExecutionProvider_OpenVINO
diff --git a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc
index 78fcc2b0424ac..1bab852728708 100644
--- a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc
+++ b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc
@@ -8,8 +8,17 @@
 #include <mutex>
 #include "core/providers/shared/common.h"
 
+#ifndef _Ret_notnull_
+#define _Ret_notnull_
+#endif
+
+
+#ifndef _Post_writable_byte_size_
+#define _Post_writable_byte_size_(n)
+#endif
+
 // Override default new/delete so that we match the host's allocator
-void* operator new(size_t n) { return Provider_GetHost()->HeapAllocate(n); }
+_Ret_notnull_ _Post_writable_byte_size_(n) void* operator new(size_t n) { return Provider_GetHost()->HeapAllocate(n); }
 void operator delete(void* p) { return Provider_GetHost()->HeapFree(p); }
 void operator delete(void* p, size_t /*size*/) { return Provider_GetHost()->HeapFree(p); }
 
diff --git a/onnxruntime/core/providers/tensorrt/symbols.def b/onnxruntime/core/providers/tensorrt/symbols.def
index 4ec2f7914c208..ab8ff7bbd15b5 100644
--- a/onnxruntime/core/providers/tensorrt/symbols.def
+++ b/onnxruntime/core/providers/tensorrt/symbols.def
@@ -1,2 +1,2 @@
 EXPORTS
-   GetProvider
+   GetProvider PRIVATE
diff --git a/onnxruntime/core/providers/tensorrt/symbols.txt b/onnxruntime/core/providers/tensorrt/symbols.txt
deleted file mode 100644
index 47950c476c5e8..0000000000000
--- a/onnxruntime/core/providers/tensorrt/symbols.txt
+++ /dev/null
@@ -1 +0,0 @@
-OrtSessionOptionsAppendExecutionProvider_Tensorrt
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
index 64b57dfbe6bb0..b689704491226 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
@@ -2,7 +2,6 @@
 // Licensed under the MIT License.
 
 #include "core/providers/shared_library/provider_api.h"
-#include "core/providers/tensorrt/tensorrt_provider_factory.h"
 #include <atomic>
 #include "tensorrt_execution_provider.h"
 
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index 64071a89bc47d..5412be17e5331 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -165,9 +165,6 @@ size_t gpu_mem_limit = std::numeric_limits<size_t>::max();
 onnxruntime::ArenaExtendStrategy arena_extend_strategy = onnxruntime::ArenaExtendStrategy::kNextPowerOfTwo;
 #endif
 
-#ifdef USE_TENSORRT
-#include "core/providers/tensorrt/tensorrt_provider_factory.h"
-#endif
 #ifdef USE_MIGRAPHX
 #include "core/providers/migraphx/migraphx_provider_factory.h"
 #endif
diff --git a/onnxruntime/test/platform/threadpool_test.cc b/onnxruntime/test/platform/threadpool_test.cc
index ba6e8d96ddfa4..95e124a6a7eab 100644
--- a/onnxruntime/test/platform/threadpool_test.cc
+++ b/onnxruntime/test/platform/threadpool_test.cc
@@ -382,6 +382,8 @@ TEST(ThreadPoolTest, TestMultiLoopSections_4Thread_100Loop) {
 }
 
 #ifdef _WIN32
+#pragma warning(push)
+#pragma warning(disable : 6387)
 TEST(ThreadPoolTest, TestStackSize) {
   ThreadOptions to;
   // For ARM, x86 and x64 machines, the default stack size is 1 MB
@@ -408,6 +410,7 @@ TEST(ThreadPoolTest, TestStackSize) {
   if (has_thread_limit_info)
     ASSERT_EQ(high_limit - low_limit, to.stack_size);
 }
+#pragma warning(pop)
 #endif
 
 }  // namespace onnxruntime
diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc
index 06073e528f6de..3c5985dac789f 100644
--- a/onnxruntime/test/shared_lib/test_inference.cc
+++ b/onnxruntime/test/shared_lib/test_inference.cc
@@ -788,7 +788,8 @@ TEST(CApiTest, io_binding) {
   binding.ClearBoundOutputs();
 }
 
-#if defined(USE_CUDA) || defined(USE_TENSORRT)
+//TODO: Replace the function call OrtSessionOptionsAppendExecutionProvider_CUDA
+#if defined(USE_CUDA)
 TEST(CApiTest, io_binding_cuda) {
   struct CudaMemoryDeleter {
     explicit CudaMemoryDeleter(const Ort::Allocator* alloc) {
@@ -802,11 +803,7 @@ TEST(CApiTest, io_binding_cuda) {
   };
 
   Ort::SessionOptions session_options;
-#ifdef USE_TENSORRT
-  Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Tensorrt(session_options, 0));
-#else
   Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0));
-#endif
   Ort::Session session(*ort_env, MODEL_URI, session_options);
 
   Ort::MemoryInfo info_cuda("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault);
diff --git a/onnxruntime/test/util/include/providers.h b/onnxruntime/test/util/include/providers.h
index 2f6a62b9a7f81..9a9b6ad12879a 100644
--- a/onnxruntime/test/util/include/providers.h
+++ b/onnxruntime/test/util/include/providers.h
@@ -13,9 +13,6 @@
 #ifdef USE_NUPHAR
 #include "core/providers/nuphar/nuphar_provider_factory.h"
 #endif
-#ifdef USE_TENSORRT
-#include "core/providers/tensorrt/tensorrt_provider_factory.h"
-#endif
 #ifdef USE_OPENVINO
 #include "core/providers/openvino/openvino_provider_factory.h"
 #endif
diff --git a/server/environment.cc b/server/environment.cc
index 59d8e71764006..d6dc0329b1d6f 100644
--- a/server/environment.cc
+++ b/server/environment.cc
@@ -17,11 +17,6 @@
 
 #endif
 
-#ifdef USE_OPENVINO
-
-#include "core/providers/openvino/openvino_provider_factory.h"
-
-#endif
 
 namespace onnxruntime {
 namespace server {
@@ -68,12 +63,6 @@ void ServerEnvironment::RegisterExecutionProviders(){
   #ifdef USE_NUPHAR
   Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Nuphar(options_, 1, ""));
   #endif
-  
-  #ifdef USE_OPENVINO
-  Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_OpenVINO(options_, ""));
-  #endif
-  
-
 }
 
 void ServerEnvironment::InitializeModel(const std::string& model_path, const std::string& model_name, const std::string& model_version) {
diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py
index ad72044afde7c..918b18852417f 100644
--- a/tools/ci_build/build.py
+++ b/tools/ci_build/build.py
@@ -1698,26 +1698,26 @@ def build_protoc_for_host(cmake_path, source_dir, build_dir, args):
 
 
 def generate_documentation(source_dir, build_dir, configs):
+    # Randomly choose one build config
+    config = next(iter(configs))
+    cwd = get_config_build_dir(build_dir, config)
+    if is_windows():
+        cwd = os.path.join(cwd, config)
     operator_doc_path = os.path.join(source_dir, 'docs', 'ContribOperators.md')
     opkernel_doc_path = os.path.join(source_dir, 'docs', 'OperatorKernels.md')
-    for config in configs:
-        # Copy the gen_contrib_doc.py.
-        shutil.copy(
-            os.path.join(source_dir, 'tools', 'python', 'gen_contrib_doc.py'),
-            os.path.join(build_dir, config))
-        shutil.copy(
-            os.path.join(source_dir, 'tools', 'python', 'gen_opkernel_doc.py'),
-            os.path.join(build_dir, config))
-        run_subprocess(
-            [sys.executable,
-             'gen_contrib_doc.py',
-             '--output_path', operator_doc_path],
-            cwd=os.path.join(build_dir, config))
-        run_subprocess(
-            [sys.executable,
-             'gen_opkernel_doc.py',
-             '--output_path', opkernel_doc_path],
-            cwd=os.path.join(build_dir, config))
+    shutil.copy(
+        os.path.join(source_dir, 'tools', 'python', 'gen_contrib_doc.py'), cwd)
+    shutil.copy(
+         os.path.join(source_dir, 'tools', 'python', 'gen_opkernel_doc.py'),
+         cwd)
+    run_subprocess(
+        [sys.executable,
+         'gen_contrib_doc.py',
+         '--output_path', operator_doc_path], cwd=cwd)
+    run_subprocess(
+        [sys.executable,
+         'gen_opkernel_doc.py',
+         '--output_path', opkernel_doc_path], cwd=cwd)
     docdiff = ''
     try:
         docdiff = subprocess.check_output(['git', 'diff', opkernel_doc_path], cwd=source_dir)
diff --git a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ortmodule-distributed-test-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ortmodule-distributed-test-ci-pipeline.yml
index a920e0feb5cec..b4c063ef3807e 100644
--- a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ortmodule-distributed-test-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ortmodule-distributed-test-ci-pipeline.yml
@@ -14,7 +14,7 @@ jobs:
   - template: templates/run-docker-build-steps.yml
     parameters:
       RunDockerBuildArgs: |
-        -o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory) \
+        -o ubuntu18.04 -d gpu -r $(Build.BinariesDirectory) \
         -t onnxruntime_ortmodule_distributed_tests_image \
         -x " \
           --config RelWithDebInfo \
diff --git a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ortmodule-test-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ortmodule-test-ci-pipeline.yml
index 5dc7c3923f674..95162e57ccbfe 100644
--- a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ortmodule-test-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ortmodule-test-ci-pipeline.yml
@@ -14,7 +14,7 @@ jobs:
   - template: templates/run-docker-build-steps.yml
     parameters:
       RunDockerBuildArgs: |
-        -o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory) \
+        -o ubuntu18.04 -d gpu -r $(Build.BinariesDirectory) \
         -t onnxruntime_ortmodule_tests_image \
         -x " \
           --config RelWithDebInfo \
diff --git a/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml
index be9d469ac3ee9..99be609ebd8e9 100644
--- a/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml
@@ -1,37 +1,67 @@
+parameters:
+- name: RunOnnxRuntimeTests
+  displayName: Run Tests?
+  type: boolean
+  default: true
+
 jobs:
 - job: 'build'
   pool: 'Win-CPU-2019'
   strategy:
-    maxParallel: 2
     matrix:
-      debug:
+      x64_debug:
         BuildConfig: 'Debug'
         UseOmp: '--use_openmp'
-      release:
+        EnvSetupScript: setup_env.bat
+        buildArch: x64
+        additionalBuildFlags: --use_dnnl --build_java --build_nodejs --gen_doc
+        msbuildPlatform: x64
+        isX86: false
+      x64_release:
+        BuildConfig: 'RelWithDebInfo'
+        UseOmp: ''
+        EnvSetupScript: setup_env.bat
+        buildArch: x64
+        additionalBuildFlags: --use_dnnl --build_java --build_nodejs --gen_doc
+        msbuildPlatform: x64
+        isX86: false
+      x86_release:
         BuildConfig: 'RelWithDebInfo'
         UseOmp: ''
+        EnvSetupScript: setup_env_x86.bat
+        buildArch: x86
+        additionalBuildFlags: --x86
+        msbuildPlatform: Win32
+        isX86: true
   variables:
     OrtPackageId: 'Microsoft.ML.OnnxRuntime'
-    MsbuildArguments: '-detailedsummary -maxcpucount -consoleloggerparameters:PerformanceSummary'
+    MsbuildArguments: '-maxcpucount'
     OnnxRuntimeBuildDirectory: '$(Build.BinariesDirectory)'
-    DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
-    EnvSetupScript: setup_env.bat
-    buildArch: x64
+    DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true    
     setVcvars: true
     ALLOW_RELEASED_ONNX_OPSET_ONLY: '0'
   timeoutInMinutes: 120
   workspace:
     clean: all
-  steps:    
+  steps:
   - task: UsePythonVersion@0
-    inputs: 
-      versionSpec: '3.7' 
-      addToPath: true 
+    inputs:
+      versionSpec: '3.7'
+      addToPath: true
       architecture: $(buildArch)
 
   - task: NodeTool@0
     inputs:
       versionSpec: '12.x'
+      force32bit: $(isX86)
+
+  - task: JavaToolInstaller@0
+    #Our build machine doesn't have java x86
+    condition: and(succeeded(), eq(variables['buildArch'], 'x64'))
+    inputs:
+      versionSpec: '11'
+      jdkArchitectureOption: $(buildArch)
+      jdkSourceOption: 'PreInstalled'
 
   - task: BatchScript@1
     displayName: 'setup env'
@@ -61,7 +91,7 @@ jobs:
      $Env:CMAKE_ARGS="-DONNX_USE_PROTOBUF_SHARED_LIBS=OFF -DProtobuf_USE_STATIC_LIBS=ON -DONNX_USE_LITE_PROTO=ON -DCMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake -DVCPKG_TARGET_TRIPLET=$(buildArch)-windows-static"
      python setup.py bdist_wheel
      python -m pip uninstall -y onnx -qq
-     Get-ChildItem -Path dist/*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname}   
+     Get-ChildItem -Path dist/*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname}
     workingDirectory: '$(Build.SourcesDirectory)\cmake\external\onnx'
     displayName: 'Install ONNX'
 
@@ -83,14 +113,14 @@ jobs:
     displayName: 'Generate cmake config'
     inputs:
       scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py'
-      arguments: '--gen_doc --config $(BuildConfig) --build_dir $(Build.BinariesDirectory) $(UseOmp) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_wheel --use_dnnl --use_winml  --build_shared_lib --enable_onnx_tests --enable_wcos --build_java --build_nodejs'
+      arguments: '--config $(BuildConfig) --build_dir $(Build.BinariesDirectory) $(UseOmp) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_wheel --use_winml  --build_shared_lib --enable_onnx_tests --enable_wcos $(additionalBuildFlags)'
       workingDirectory: '$(Build.BinariesDirectory)'
 
   - task: VSBuild@1
     displayName: 'Build'
     inputs:
       solution: '$(Build.BinariesDirectory)\$(BuildConfig)\onnxruntime.sln'
-      platform: 'x64'
+      platform: $(msbuildPlatform)
       configuration: $(BuildConfig)
       msbuildArgs: $(MsbuildArguments)
       msbuildArchitecture: $(buildArch)
@@ -127,7 +157,7 @@ jobs:
 
   - task: DotNetCoreCLI@2
     displayName: 'Test C#'
-    condition: and(succeeded(), eq(variables['BuildConfig'], 'RelWithDebInfo'))
+    condition: and(and(succeeded(), eq(variables['BuildConfig'], 'RelWithDebInfo')),eq('${{ parameters.RunOnnxRuntimeTests}}', true))
     inputs:
       command: test
       projects: '$(Build.SourcesDirectory)\csharp\test\Microsoft.ML.OnnxRuntime.Tests\Microsoft.ML.OnnxRuntime.Tests.csproj'
@@ -135,150 +165,55 @@ jobs:
       arguments: '--configuration $(BuildConfig) -p:Platform="Any CPU" -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=$(OrtPackageId) --blame'
       workingDirectory: '$(Build.SourcesDirectory)\csharp'
 
-  - script: |
-     mklink  /D /J $(Build.BinariesDirectory)\$(BuildConfig)\models $(Build.BinariesDirectory)\models  
-     DIR dist\ /S /B > wheel_filename_file
-     set /p WHEEL_FILENAME=<wheel_filename_file
-     del wheel_filename_file
-     python.exe -m pip install -q --upgrade %WHEEL_FILENAME%
-     set PATH=$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig);%PATH%
-     python $(Build.SourcesDirectory)\tools\ci_build\build.py --config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --build_nodejs --test --cmake_generator "Visual Studio 16 2019"  --use_dnnl --build_wheel --enable_onnx_tests
+  - powershell: |
+     Get-ChildItem -Path dist/*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname}
+     python $(Build.SourcesDirectory)\tools\ci_build\build.py --config $(BuildConfig) --build_dir $(Build.BinariesDirectory) $(UseOmp) --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 16 2019" --build_wheel --use_winml  --build_shared_lib --enable_onnx_tests --enable_wcos $(additionalBuildFlags)
    
     workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig)'
+    condition: and(succeeded(), eq('${{ parameters.RunOnnxRuntimeTests}}', true))
     displayName: 'Run tests'
- 
-  - task: PublishTestResults@2
-    displayName: 'Publish unit test results'
-    inputs:
-      testResultsFiles: '**/*.results.xml'
-      searchFolder: '$(Build.BinariesDirectory)'
-      testRunTitle: 'Unit Test Run'
-    condition: succeededOrFailed()
-
-  - template: templates/component-governance-component-detection-steps.yml
-    parameters :
-      condition : 'succeeded'
-
-  - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
-    displayName: 'Clean Agent Directories'
-    condition: always()
-
-- job: 'x86_build'
-  pool: 'Win-CPU-2019'
-  variables:
-    OrtPackageId: 'Microsoft.ML.OnnxRuntime'
-    MsbuildArguments: '-detailedsummary -maxcpucount -consoleloggerparameters:PerformanceSummary'
-    OnnxRuntimeBuildDirectory: '$(Build.BinariesDirectory)'
-    DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
-    EnvSetupScript: setup_env_x86.bat
-    buildArch: x86
-    setVcvars: true
-    BuildConfig: 'RelWithDebInfo'
-    ALLOW_RELEASED_ONNX_OPSET_ONLY: '0'
-  timeoutInMinutes: 120
-  workspace:
-    clean: all
-  steps:    
-  - task: UsePythonVersion@0
-    inputs: 
-      versionSpec: '3.7' 
-      addToPath: true 
-      architecture: $(buildArch)
-
-  - task: BatchScript@1
-    displayName: 'setup env'
-    inputs:
-      filename: '$(Build.SourcesDirectory)\tools\ci_build\github\windows\$(EnvSetupScript)'
-      modifyEnvironment: true
-      workingFolder: '$(Build.BinariesDirectory)'
-
-  - script: |
-     python -m pip install -q pyopenssl setuptools wheel numpy flake8
-    workingDirectory: '$(Build.BinariesDirectory)'
-    displayName: 'Install python modules'
-
-  - powershell: |
-     $Env:USE_MSVC_STATIC_RUNTIME=1
-     $Env:ONNX_ML=1
-     $Env:CMAKE_ARGS="-DONNX_USE_PROTOBUF_SHARED_LIBS=OFF -DProtobuf_USE_STATIC_LIBS=ON -DONNX_USE_LITE_PROTO=ON -DCMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake -DVCPKG_TARGET_TRIPLET=$(buildArch)-windows-static"
-     python setup.py bdist_wheel
-     python -m pip uninstall -y onnx -qq
-     Get-ChildItem -Path dist/*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname}   
-    workingDirectory: '$(Build.SourcesDirectory)\cmake\external\onnx'
-    displayName: 'Install ONNX'
 
   - task: PythonScript@0
-    displayName: 'Generate cmake config'
+    displayName: 'Regenerate cmake config with STATIC_ANALYSIS=ON '
+    condition: and(succeeded(), eq(variables['BuildConfig'], 'RelWithDebInfo'))
     inputs:
       scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py'
-      arguments: '--config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --use_winml --update --cmake_generator "Visual Studio 16 2019" --build_wheel --x86  --build_shared_lib --enable_onnx_tests --enable_wcos'
+      arguments: '--config RelWithDebInfo --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_wheel --build_shared_lib --enable_onnx_tests $(additionalBuildFlags) --cmake_extra_defines onnxruntime_ENABLE_STATIC_ANALYSIS=ON'
       workingDirectory: '$(Build.BinariesDirectory)'
 
-  - task: VSBuild@1
-    displayName: 'Build'
-    inputs:
-      solution: '$(Build.BinariesDirectory)\$(BuildConfig)\onnxruntime.sln'
-      platform: 'Win32'
-      configuration: $(BuildConfig)
-      msbuildArgs: $(MsbuildArguments)
-      msbuildArchitecture: $(buildArch)
-      maximumCpuCount: true
-      logProjectEvents: false
-      workingFolder: '$(Build.BinariesDirectory)\$(BuildConfig)'
-      createLogFile: true
-
-  - task: PythonScript@0
-    displayName: 'Build wheel'
+  - task: DeleteFiles@1
+    displayName: 'Delete files from $(Build.BinariesDirectory)\RelWithDebInfo'
+    condition: and(succeeded(), eq(variables['BuildConfig'], 'RelWithDebInfo'))
     inputs:
-      scriptPath: '$(Build.SourcesDirectory)\setup.py'
-      arguments: 'bdist_wheel'
-      workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig)'
-
+      SourceFolder: '$(Build.BinariesDirectory)\RelWithDebInfo'
+      Contents: |
+       **/*.obj
+       **/*.pdb
+       **/*.dll
 
-  - task: NuGetToolInstaller@0
-    displayName: Use Nuget 5.7.0
+  #Manually set msBuildCommandline so that we can also set CAExcludePath
+  - task: SDLNativeRules@2
+    displayName: 'Run the PREfast SDL Native Rules for MSBuild'
+    condition: and(succeeded(), eq(variables['BuildConfig'], 'RelWithDebInfo'))
     inputs:
-      versionSpec: 5.7.0
+      userProvideBuildInfo: msBuildInfo
+      msBuildVersion: 16.0
+      msBuildArchitecture: $(msbuildPlatform)
+      msBuildCommandline: '"C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Current\Bin\amd64\msbuild.exe" "$(Build.BinariesDirectory)\RelWithDebInfo\onnxruntime.sln" /p:platform=$(msbuildPlatform) /p:configuration="RelWithDebInfo" /p:CAExcludePath="$(Build.BinariesDirectory);$(Build.SourcesDirectory)\cmake;C:\program files (x86)" /p:VisualStudioVersion="16.0" /m /p:PreferredToolArchitecture=x64'
 
 
-  - task: DotNetCoreCLI@2
-    displayName: 'Restore nuget packages'
-    inputs:
-      command: restore
-      projects: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln'
-      configuration: '$(BuildConfig)'
-      arguments: '--configuration $(BuildConfig) -p:Platform="Any CPU" -p:OrtPackageId=$(OrtPackageId)'
-      workingDirectory: '$(Build.SourcesDirectory)\csharp'      
-
-  - task: DotNetCoreCLI@2
-    displayName: 'Build C#'    
+  - task: PublishSecurityAnalysisLogs@3
+    displayName: 'Publish Security Analysis Logs'
+    condition: and(succeeded(), eq(variables['BuildConfig'], 'RelWithDebInfo'))
     inputs:
-      command: build
-      projects: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln'
-      configuration: '$(BuildConfig)'          
-      arguments: '--configuration $(BuildConfig) -p:Platform="Any CPU" -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=$(OrtPackageId)'
-      workingDirectory: '$(Build.SourcesDirectory)\csharp'
+      ArtifactName: CodeAnalysisLogs
 
-  - task: DotNetCoreCLI@2
-    displayName: 'Test C#'
+  - task: PostAnalysis@1
+    displayName: 'Post Analysis'
     condition: and(succeeded(), eq(variables['BuildConfig'], 'RelWithDebInfo'))
     inputs:
-      command: test
-      projects: '$(Build.SourcesDirectory)\csharp\test\Microsoft.ML.OnnxRuntime.Tests\Microsoft.ML.OnnxRuntime.Tests.csproj'
-      configuration: '$(BuildConfig)'          
-      arguments: '--configuration $(BuildConfig) -p:Platform="Any CPU" -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=$(OrtPackageId)'
-      workingDirectory: '$(Build.SourcesDirectory)\csharp'
-
-  - script: |
-     mklink  /D /J $(Build.BinariesDirectory)\$(BuildConfig)\models $(Build.BinariesDirectory)\models  
-     DIR dist\ /S /B > wheel_filename_file
-     set /p WHEEL_FILENAME=<wheel_filename_file
-     del wheel_filename_file
-     python.exe -m pip install -q --upgrade %WHEEL_FILENAME%
-     python $(Build.SourcesDirectory)\tools\ci_build\build.py --config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --use_winml --test --cmake_generator "Visual Studio 16 2019"  --x86 --build_wheel --enable_onnx_tests --enable_wcos
-
-    workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig)'
-    displayName: 'Run tests'
+      SDLNativeRules: true
+      ToolLogsNotFoundAction: Error
 
   - task: PublishTestResults@2
     displayName: 'Publish unit test results'
@@ -291,10 +226,6 @@ jobs:
   - template: templates/component-governance-component-detection-steps.yml
     parameters :
       condition : 'succeeded'
-
-  - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
-    displayName: 'Clean Agent Directories'
-    condition: always()
    
 - job: 'x86_no_contrib_ops'
   pool: 'Win-CPU-2019'
@@ -311,11 +242,11 @@ jobs:
   timeoutInMinutes: 120
   workspace:
     clean: all
-  steps:    
+  steps:
   - task: UsePythonVersion@0
-    inputs: 
-      versionSpec: '3.7' 
-      addToPath: true 
+    inputs:
+      versionSpec: '3.7'
+      addToPath: true
       architecture: $(buildArch)
 
   - task: BatchScript@1
@@ -413,10 +344,6 @@ jobs:
     parameters :
       condition : 'succeeded'
 
-  - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
-    displayName: 'Clean Agent Directories'
-    condition: always()
-
 - job: 'build_x64_no_contrib_ops'
   pool: 'Win-CPU-2019'
   variables:
@@ -534,9 +461,4 @@ jobs:
 
   - template: templates/component-governance-component-detection-steps.yml
     parameters :
-      condition : 'succeeded'
-
-  - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
-    displayName: 'Clean Agent Directories'
-    condition: always()
-   
+      condition : 'succeeded'
\ No newline at end of file
diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-ci-pipeline.yml
index 8afb0ccf96944..2150ac121c6f3 100644
--- a/tools/ci_build/github/azure-pipelines/win-gpu-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/win-gpu-ci-pipeline.yml
@@ -1,17 +1,27 @@
+parameters:
+- name: RunOnnxRuntimeTests
+  displayName: Run Tests?
+  type: boolean
+  default: true
+
 jobs:
 - job: 'build'
   pool: 'Win-GPU-2019'
   variables:
-    OrtPackageId: 'Microsoft.ML.OnnxRuntime'
-    MsbuildArguments: '-detailedsummary -maxcpucount -consoleloggerparameters:PerformanceSummary'
+    OrtPackageId: 'Microsoft.ML.OnnxRuntime.Gpu'
+    MsbuildArguments: '-maxcpucount'
     OnnxRuntimeBuildDirectory: '$(Build.BinariesDirectory)'
     DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
     EnvSetupScript: setup_env_cuda_11.bat
-    buildArch: x64
     setVcvars: true
     BuildConfig: 'RelWithDebInfo'
+    UseOmp: ''
+    buildArch: x64
+    additionalBuildFlags: --build_java --build_nodejs --use_dml --use_cuda --cuda_version=11.0 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0" --cudnn_home="C:\local\cudnn-11.0-windows-x64-v8.0.2.39\cuda" --cmake_extra_defines CMAKE_SYSTEM_VERSION=10.0.18362.0
+    msbuildPlatform: x64
+    isX86: false
     ALLOW_RELEASED_ONNX_OPSET_ONLY: '0'
-  timeoutInMinutes: 120
+  timeoutInMinutes: 180
   workspace:
     clean: all
   steps:    
@@ -21,6 +31,19 @@ jobs:
       addToPath: true 
       architecture: $(buildArch)
 
+  - task: NodeTool@0
+    inputs:
+      versionSpec: '12.x'
+      force32bit: $(isX86)
+
+  - task: JavaToolInstaller@0
+    #Our build machine doesn't have java x86
+    condition: and(succeeded(), eq(variables['buildArch'], 'x64'))
+    inputs:
+      versionSpec: '11'
+      jdkArchitectureOption: $(buildArch)
+      jdkSourceOption: 'PreInstalled'
+
   - task: BatchScript@1
     displayName: 'setup env'
     inputs:
@@ -28,6 +51,16 @@ jobs:
       modifyEnvironment: true
       workingFolder: '$(Build.BinariesDirectory)'
 
+  - script: |
+     set ORT_DOXY_SRC=$(Build.SourcesDirectory)
+     set ORT_DOXY_OUT=$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig)
+     mkdir %ORT_DOXY_SRC% 
+     mkdir %ORT_DOXY_OUT%
+     "C:\Program Files\doxygen\bin\doxygen.exe" $(Build.SourcesDirectory)\tools\ci_build\github\Doxyfile_csharp.cfg
+     
+    workingDirectory: '$(Build.SourcesDirectory)'
+    displayName: 'API Documentation Check and generate'
+
   - script: |
      python -m pip install -q pyopenssl setuptools wheel numpy flake8
     workingDirectory: '$(Build.BinariesDirectory)'
@@ -43,18 +76,32 @@ jobs:
     workingDirectory: '$(Build.SourcesDirectory)\cmake\external\onnx'
     displayName: 'Install ONNX'
 
+  - task: NuGetToolInstaller@0
+    displayName: Use Nuget 5.7.0
+    inputs:
+      versionSpec: 5.7.0
+
+  - task: NuGetCommand@2
+    displayName: 'NuGet restore'
+    inputs:
+      command: 'restore'
+      feedsToUse: 'config'
+      restoreSolution: '$(Build.SourcesDirectory)\packages.config'
+      nugetConfigPath: '$(Build.SourcesDirectory)\NuGet.config'
+      restoreDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)'
+
   - task: PythonScript@0
     displayName: 'Generate cmake config'
     inputs:
       scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py'
-      arguments: '--config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_wheel --disable_rtti --build_shared_lib --build_java --enable_onnx_tests --use_dml --use_cuda --cuda_version=11.0 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0" --cudnn_home="C:\local\cudnn-11.0-windows-x64-v8.0.2.39\cuda" --cmake_extra_defines CMAKE_SYSTEM_VERSION=10.0.18362.0'
+      arguments: '--config $(BuildConfig) --build_dir $(Build.BinariesDirectory) $(UseOmp) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_wheel --disable_rtti --use_winml  --build_shared_lib --enable_onnx_tests --enable_wcos $(additionalBuildFlags)'
       workingDirectory: '$(Build.BinariesDirectory)'
 
   - task: VSBuild@1
     displayName: 'Build'
     inputs:
       solution: '$(Build.BinariesDirectory)\$(BuildConfig)\onnxruntime.sln'
-      platform: 'x64'
+      platform: $(msbuildPlatform)
       configuration: $(BuildConfig)
       msbuildArgs: $(MsbuildArguments)
       msbuildArchitecture: $(buildArch)
@@ -70,11 +117,6 @@ jobs:
       arguments: 'bdist_wheel'
       workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig)'
 
-  - task: NuGetToolInstaller@0
-    displayName: Use Nuget 5.7.0
-    inputs:
-      versionSpec: 5.7.0
-
   - task: DotNetCoreCLI@2
     displayName: 'Restore nuget packages'
     inputs:
@@ -95,7 +137,7 @@ jobs:
 
   - task: DotNetCoreCLI@2
     displayName: 'Test C#'
-    condition: and(succeeded(), eq(variables['BuildConfig'], 'RelWithDebInfo'))
+    condition: and(and(succeeded(), eq(variables['BuildConfig'], 'RelWithDebInfo')),eq('${{ parameters.RunOnnxRuntimeTests}}', true))
     inputs:
       command: test
       projects: '$(Build.SourcesDirectory)\csharp\test\Microsoft.ML.OnnxRuntime.Tests\Microsoft.ML.OnnxRuntime.Tests.csproj'
@@ -103,18 +145,58 @@ jobs:
       arguments: '--configuration $(BuildConfig) -p:Platform="Any CPU" -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=$(OrtPackageId)'
       workingDirectory: '$(Build.SourcesDirectory)\csharp'
 
-  - script: |
-     mklink  /D /J $(Build.BinariesDirectory)\$(BuildConfig)\models $(Build.BinariesDirectory)\models  
-     DIR dist\ /S /B > wheel_filename_file
-     set /p WHEEL_FILENAME=<wheel_filename_file
-     del wheel_filename_file
-     python.exe -m pip install -q --upgrade %WHEEL_FILENAME%
-     set PATH=$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig);%PATH%
-     python $(Build.SourcesDirectory)\tools\ci_build\build.py --config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 16 2019" --build_wheel --build_java --disable_rtti --build_shared_lib --enable_onnx_tests --use_dml --use_cuda --cuda_version=11.0 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0" --cudnn_home="C:\local\cudnn-11.0-windows-x64-v8.0.2.39\cuda" --cmake_extra_defines CMAKE_SYSTEM_VERSION=10.0.18362.0
-
+  - powershell: |
+     Get-ChildItem -Path dist/*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname}
+     python $(Build.SourcesDirectory)\tools\ci_build\build.py --config $(BuildConfig) --build_dir $(Build.BinariesDirectory) $(UseOmp) --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 16 2019" --build_wheel --disable_rtti --use_winml  --build_shared_lib --enable_onnx_tests --enable_wcos $(additionalBuildFlags)
+   
     workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig)'
+    condition: and(succeeded(), eq('${{ parameters.RunOnnxRuntimeTests}}', true))
     displayName: 'Run tests'
 
+  - task: PythonScript@0
+    displayName: 'Regenerate cmake config with STATIC_ANALYSIS=ON '
+    condition: and(succeeded(), eq(variables['BuildConfig'], 'RelWithDebInfo'))
+    inputs:
+      scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py'
+      arguments: '--config RelWithDebInfo --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_wheel --disable_rtti --build_shared_lib --enable_onnx_tests $(additionalBuildFlags) --cmake_extra_defines onnxruntime_ENABLE_STATIC_ANALYSIS=ON'
+      workingDirectory: '$(Build.BinariesDirectory)'
+
+  - task: DeleteFiles@1
+    displayName: 'Delete files from $(Build.BinariesDirectory)\RelWithDebInfo'
+    condition: and(succeeded(), eq(variables['BuildConfig'], 'RelWithDebInfo'))
+    inputs:
+      SourceFolder: '$(Build.BinariesDirectory)\RelWithDebInfo'
+      Contents: |
+       **/*.obj
+       **/*.pdb
+       **/*.dll
+
+  #Manually set msBuildCommandline so that we can also set CAExcludePath
+  - task: SDLNativeRules@2
+    displayName: 'Run the PREfast SDL Native Rules for MSBuild'
+    condition: and(succeeded(), eq(variables['BuildConfig'], 'RelWithDebInfo'))
+    inputs:
+      userProvideBuildInfo: msBuildInfo
+      msBuildVersion: 16.0
+      msBuildArchitecture: $(msbuildPlatform)
+      msBuildCommandline: '"C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Current\Bin\amd64\msbuild.exe" "$(Build.BinariesDirectory)\RelWithDebInfo\onnxruntime.sln" /p:platform=$(msbuildPlatform) /p:configuration="RelWithDebInfo" /p:CAExcludePath="$(Build.BinariesDirectory);$(Build.SourcesDirectory)\cmake;C:\program files (x86)" /p:VisualStudioVersion="16.0" /m /p:PreferredToolArchitecture=x64'
+
+
+  - task: PublishSecurityAnalysisLogs@3
+    displayName: 'Publish Security Analysis Logs'
+    condition: and(succeeded(), eq(variables['BuildConfig'], 'RelWithDebInfo'))
+    inputs:
+      ArtifactName: CodeAnalysisLogs
+
+  - task: PublishTestResults@2
+    displayName: 'Publish unit test results'
+    inputs:
+      testResultsFiles: '**/*.results.xml'
+      searchFolder: '$(Build.BinariesDirectory)'
+      testRunTitle: 'Unit Test Run'
+    condition: succeededOrFailed()
+
   - template: templates/component-governance-component-detection-steps.yml
     parameters :
       condition : 'succeeded'
+   
\ No newline at end of file
diff --git a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu
index da8d9ddfb3534..6a8d643d04168 100644
--- a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu
+++ b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu
@@ -8,8 +8,6 @@ RUN /tmp/scripts/install_ubuntu.sh -p $PYTHON_VERSION && /tmp/scripts/install_de
 
 WORKDIR /root
 
-ENV LD_LIBRARY_PATH /usr/local/openblas/lib:$LD_LIBRARY_PATH
-
 ARG BUILD_UID=1000
 ARG BUILD_USER=onnxruntimedev
 RUN adduser --gecos 'onnxruntime Build User' --disabled-password $BUILD_USER --uid $BUILD_UID
diff --git a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_for_android b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_for_android
index ea6458d62705b..56c5ea97b19fc 100644
--- a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_for_android
+++ b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_for_android
@@ -7,8 +7,6 @@ RUN /tmp/scripts/install_ubuntu.sh -p $PYTHON_VERSION && /tmp/scripts/install_de
 
 WORKDIR /root
 
-ENV LD_LIBRARY_PATH /usr/local/openblas/lib:$LD_LIBRARY_PATH
-
 ARG BUILD_UID=1000
 ARG BUILD_USER=onnxruntimedev
 RUN adduser --gecos 'onnxruntime Build User' --disabled-password $BUILD_USER --uid $BUILD_UID
diff --git a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_gpu b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_gpu
index c6a5972afe1ee..94c2e9f0998fb 100644
--- a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_gpu
+++ b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_gpu
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu16.04
+FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
 
 ARG PYTHON_VERSION=3.6
 ARG INSTALL_DEPS_EXTRA_ARGS
diff --git a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_tensorrt b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_tensorrt
index 477ff4cbc61fa..077e228a06a19 100644
--- a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_tensorrt
+++ b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_tensorrt
@@ -22,8 +22,6 @@ RUN _CUDNN_VERSION=$(echo $CUDNN_VERSION | cut -d. -f1-2) && \
     ln -s /etc/alternatives/libcudnn_so /usr/local/cudnn-$_CUDNN_VERSION/cuda/lib64/libcudnn.so && \
     ln -s /usr/local/cudnn{-$_CUDNN_VERSION,}
 
-ENV LD_LIBRARY_PATH /usr/local/openblas/lib:$LD_LIBRARY_PATH
-
 ARG BUILD_USER=onnxruntimedev
 ARG BUILD_UID=1000
 WORKDIR /home/$BUILD_USER
diff --git a/tools/ci_build/github/linux/docker/scripts/install_ubuntu.sh b/tools/ci_build/github/linux/docker/scripts/install_ubuntu.sh
index 023855e1b5326..b253f4a54dc9c 100755
--- a/tools/ci_build/github/linux/docker/scripts/install_ubuntu.sh
+++ b/tools/ci_build/github/linux/docker/scripts/install_ubuntu.sh
@@ -39,7 +39,6 @@ if [ "$OS_VERSION" = "16.04" ]; then
         gfortran \
         python3-dev \
         language-pack-en \
-        libopenblas-dev \
         liblttng-ust0 \
         libcurl3 \
         libssl1.0.0 \
@@ -72,7 +71,6 @@ elif [ "$OS_VERSION" = "18.04" ]; then
         gfortran \
         python3-dev \
         language-pack-en \
-        libopenblas-dev \
         liblttng-ust0 \
         libcurl4 \
         libssl1.0.0 \
@@ -105,7 +103,6 @@ else # ubuntu20.04
         gfortran \
         python3-dev \
         language-pack-en \
-        libopenblas-dev \
         liblttng-ust0 \
         libcurl4 \
         libssl1.1 \