[pull] main from microsoft:main #152

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

TedThemistokleous merged 6 commits into ROCm:main from microsoft:main

Jul 10, 2025

cmake/external/onnxruntime_external_deps.cmake

-Original file line number
+Diff line change
@@ Expand Up / @@ -774,13 +774,24 @@ if (onnxruntime_USE_WEBGPU) @@
       endif()
       if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND onnxruntime_WGSL_TEMPLATE STREQUAL "dynamic")
-        onnxruntime_fetchcontent_declare(
-          duktape
-          URL ${DEP_URL_duktape}
-          URL_HASH SHA1=${DEP_SHA1_duktape}
-          EXCLUDE_FROM_ALL
-        )
-        onnxruntime_fetchcontent_makeavailable(duktape)
+        if(onnxruntime_USE_VCPKG)
+          find_package(unofficial-duktape CONFIG REQUIRED)
+          add_library(duktape_static ALIAS unofficial::duktape::duktape)
+        else()
+          onnxruntime_fetchcontent_declare(
+            duktape
+            URL ${DEP_URL_duktape}
+            URL_HASH SHA1=${DEP_SHA1_duktape}
+            EXCLUDE_FROM_ALL
+          )
+          onnxruntime_fetchcontent_makeavailable(duktape)
+          if(NOT TARGET duktape_static)
+            add_library(duktape_static STATIC "${duktape_SOURCE_DIR}/src/duktape.c")
+            target_compile_features(duktape_static PRIVATE c_std_99)
+            target_include_directories(duktape_static INTERFACE $<BUILD_INTERFACE:${duktape_SOURCE_DIR}/src>)
+          endif()
+        endif()
       endif()
     endif()
@@ Expand Down @@

cmake/onnxruntime_providers_tensorrt.cmake

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -72,26 +72,21 @@
  
      endif()

      # TensorRT 10 GA onwards, the TensorRT libraries will have major version appended to the end on Windows,

      # for example, nvinfer_10.dll, nvinfer_plugin_10.dll, nvonnxparser_10.dll ...

      # for example, nvinfer_10.dll, nvonnxparser_10.dll ...

      if (WIN32 AND TRT_GREATER_OR_EQUAL_TRT_10_GA)

        set(NVINFER_LIB "nvinfer_${NV_TENSORRT_MAJOR}")

        set(NVINFER_PLUGIN_LIB "nvinfer_plugin_${NV_TENSORRT_MAJOR}")

        set(PARSER_LIB "nvonnxparser_${NV_TENSORRT_MAJOR}")

      endif()

      if (NOT NVINFER_LIB)

         set(NVINFER_LIB "nvinfer")

      endif()

      if (NOT NVINFER_PLUGIN_LIB)

         set(NVINFER_PLUGIN_LIB "nvinfer_plugin")

      endif()

      if (NOT PARSER_LIB)

         set(PARSER_LIB "nvonnxparser")

      endif()

      MESSAGE(STATUS "Looking for ${NVINFER_LIB} and ${NVINFER_PLUGIN_LIB}")

      MESSAGE(STATUS "Looking for ${NVINFER_LIB}")

      find_library(TENSORRT_LIBRARY_INFER ${NVINFER_LIB}

        HINTS ${TENSORRT_ROOT}

    @@ -101,14 +96,6 @@
  
        MESSAGE(STATUS "Can't find ${NVINFER_LIB}")

      endif()

      find_library(TENSORRT_LIBRARY_INFER_PLUGIN ${NVINFER_PLUGIN_LIB}

        HINTS  ${TENSORRT_ROOT}

        PATH_SUFFIXES lib lib64 lib/x64)

      if (NOT TENSORRT_LIBRARY_INFER_PLUGIN)

        MESSAGE(STATUS "Can't find ${NVINFER_PLUGIN_LIB}")

      endif()

      if (onnxruntime_USE_TENSORRT_BUILTIN_PARSER)

        MESSAGE(STATUS "Looking for ${PARSER_LIB}")

    @@ -120,7 +107,7 @@
  
          MESSAGE(STATUS "Can't find ${PARSER_LIB}")

        endif()

        set(TENSORRT_LIBRARY ${TENSORRT_LIBRARY_INFER} ${TENSORRT_LIBRARY_INFER_PLUGIN} ${TENSORRT_LIBRARY_NVONNXPARSER})

        set(TENSORRT_LIBRARY ${TENSORRT_LIBRARY_INFER} ${TENSORRT_LIBRARY_NVONNXPARSER})

        MESSAGE(STATUS "Find TensorRT libs at ${TENSORRT_LIBRARY}")

      else()

        if (TRT_GREATER_OR_EQUAL_TRT_10_GA)

    @@ -153,15 +140,15 @@
  
        endif()

        # Static libraries are just nvonnxparser_static on all platforms

        set(onnxparser_link_libs nvonnxparser_static)

        set(TENSORRT_LIBRARY ${TENSORRT_LIBRARY_INFER} ${TENSORRT_LIBRARY_INFER_PLUGIN})

        set(TENSORRT_LIBRARY ${TENSORRT_LIBRARY_INFER})

        MESSAGE(STATUS "Find TensorRT libs at ${TENSORRT_LIBRARY}")

      endif()

      # ${TENSORRT_LIBRARY} is empty if we link nvonnxparser_static.

      # nvonnxparser_static is linked against tensorrt libraries in onnx-tensorrt

      # See https://github.com/onnx/onnx-tensorrt/blob/8af13d1b106f58df1e98945a5e7c851ddb5f0791/CMakeLists.txt#L121

      # However, starting from TRT 10 GA, nvonnxparser_static doesn't link against tensorrt libraries.

      # Therefore, the above code finds ${TENSORRT_LIBRARY_INFER} and ${TENSORRT_LIBRARY_INFER_PLUGIN}.

      # Therefore, the above code finds ${TENSORRT_LIBRARY_INFER}.

      if(onnxruntime_CUDA_MINIMAL)

        set(trt_link_libs ${CMAKE_DL_LIBS} ${TENSORRT_LIBRARY})

      else()

cmake/onnxruntime_providers_webgpu.cmake

-Original file line number
+Diff line change
@@ Expand Up / @@ -172,10 +172,12 @@ @@
         file(MAKE_DIRECTORY ${WGSL_GENERATED_DIR})
         # Find all WGSL template input files
-        file(GLOB_RECURSE WGSL_TEMPLATE_FILES "${ONNXRUNTIME_ROOT}/core/providers/webgpu/*.wgsl.template")
+        file(GLOB_RECURSE WGSL_TEMPLATE_FILES
+          "${ONNXRUNTIME_ROOT}/core/providers/webgpu/*.wgsl.template"
+          "${ONNXRUNTIME_ROOT}/contrib_ops/webgpu/*.wgsl.template")
         # Set wgsl-gen command line options as a list
-        set(WGSL_GEN_OPTIONS "-i" "../" "--output" "${WGSL_GENERATED_DIR}" "-I" "wgsl_template_gen/" "--preserve-code-ref" "--verbose")
+        set(WGSL_GEN_OPTIONS "-i" "${ONNXRUNTIME_ROOT}/core/providers/webgpu/" "-i" "${ONNXRUNTIME_ROOT}/contrib_ops/webgpu/" "--output" "${WGSL_GENERATED_DIR}" "-I" "wgsl_template_gen/" "--preserve-code-ref" "--verbose")
         if (onnxruntime_WGSL_TEMPLATE STREQUAL "static")
           if (CMAKE_BUILD_TYPE STREQUAL "Debug")
             list(APPEND WGSL_GEN_OPTIONS "--generator" "static-cpp-literal")
@@ Expand Down Expand Up / @@ -207,10 +209,9 @@ @@
           # Add the generated directory to include paths
           target_include_directories(onnxruntime_providers_webgpu PRIVATE ${WGSL_GENERATED_ROOT})
         elseif(onnxruntime_WGSL_TEMPLATE STREQUAL "dynamic")
-          add_library(duktape_static STATIC "${duktape_SOURCE_DIR}/src/duktape.c")
-          target_compile_features(duktape_static PRIVATE c_std_99)
           target_link_libraries(onnxruntime_providers_webgpu duktape_static)
-          target_include_directories(onnxruntime_providers_webgpu PRIVATE ${duktape_SOURCE_DIR}/src)
+          onnxruntime_add_include_to_target(onnxruntime_providers_webgpu duktape_static)
           # Define the path to the generated templates.js file
           target_compile_definitions(onnxruntime_providers_webgpu PRIVATE
             "ORT_WGSL_TEMPLATES_JS_PATH=\"${WGSL_GENERATED_TEMPLATES_JS}\"")
@@ Expand Down @@

cmake/vcpkg.json

-Original file line number
+Diff line change
@@ Expand Up / @@ -93,6 +93,10 @@ @@
         "webgpu-ep": {
           "description": "Build with WebGPU EP",
           "dependencies": []
+        },
+        "webgpu-ep-wgsl-template-dynamic": {
+          "description": "Build with WebGPU EP with dynamic WGSL template code generator",
+          "dependencies": ["duktape"]
         }
       },
       "overrides": [
@@ Expand All / @@ -103,6 +107,10 @@ @@
         {
           "name": "flatbuffers",
           "version": "23.5.26"
+        },
+        {
+          "name": "duktape",
+          "version": "2.7.0#2"
         }
       ]
     }

include/onnxruntime/core/graph/graph.h

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -952,9 +952,12 @@ class Graph {  // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
  
        return const_cast<Graph*>(this)->GetNodeArg(name);

      }

      // search this and up through any parent_graph_ instance for a NodeArg

      // Searches for a NodeArg in the current graph and its parent graphs, and returns the corresponding mutable NodeArg

      NodeArg* GetNodeArgIncludingParentGraphs(const std::string& node_arg_name);

      // Searches for a NodeArg in the current graph and its parent graphs, and returns the corresponding const NodeArg

      const NodeArg* GetNodeArgIncludingParentGraphs(const std::string& node_arg_name) const;

      /** Gets a mutable NodeArg by name. Creates a new NodeArg that is owned by this Graph if not found.

      @param name The NodeArg name.

      @param[in] p_arg_type Optional TypeProto to use if the NodeArg needs to be created.

include/onnxruntime/core/session/onnxruntime_c_api.h

-Original file line number
+Diff line change
@@ Expand Up / @@ -5748,6 +5748,24 @@ struct OrtApi { @@
        */
       ORT_API2_STATUS(Graph_GetParentNode, _In_ const OrtGraph* graph, _Outptr_result_maybenull_ const OrtNode** node);
+      /** \brief Returns an OrtGraph that contains a subset of nodes in the source OrtGraph.
+       *
+       * Note:
+       * The lifetime of "dst_graph" is tied to that of "src_graph", as they both internally reference
+       * the same underlying graph.
+       *
+       * \param[in] src_graph The source OrtGraph instance.
+       * \param[in] nodes A subset of the nodes/OrtNodes in 'graph'.
+       * \param[in] num_nodes Number of nodes.
+       * \param[out] dst_sub_graph An OrtGraph created from a given set of nodes. Must be released by calling ReleaseGraph.
+       *
+       * \snippet{doc} snippets.dox OrtStatus Return Value
+       *
+       * \since Version 1.23.
+       */
+      ORT_API2_STATUS(Graph_GetGraphView, _In_ const OrtGraph* src_graph, _In_ const OrtNode** nodes,
+                      _In_ size_t num_nodes, _Outptr_ OrtGraph** dst_graph);
       /// @}
       /// \name OrtNode
@@ Expand Down @@

onnxruntime/core/graph/ep_api_types.cc

-Original file line number
+Diff line change
@@ Expand Up @@
     EpGraph::EpGraph(const GraphViewer& graph_viewer, PrivateTag)
         : OrtGraph(OrtGraphIrApi::kEpApi), graph_viewer_(graph_viewer) {}
+    EpGraph::EpGraph(std::unique_ptr<GraphViewer> graph_viewer,
+                     std::unique_ptr<IndexedSubGraph> indexed_sub_graph,
+                     PrivateTag)
+        : OrtGraph(OrtGraphIrApi::kEpApi),
+          graph_viewer_(*graph_viewer.get()),
+          owned_graph_viewer_(std::move(graph_viewer)),
+          owned_indexed_sub_graph_(std::move(indexed_sub_graph)) {}
     // Static class function to create a std::unique_ptr<EpGraph>.
     Status EpGraph::Create(const GraphViewer& graph_viewer, /*out*/ std::unique_ptr<EpGraph>& result) {
       auto ep_graph = std::make_unique<EpGraph>(graph_viewer, PrivateTag{});
+      return CreateImpl(std::move(ep_graph), graph_viewer, result);
+    }
+    // Static class function to create a std::unique_ptr<EpGraph>.
+    Status EpGraph::Create(std::unique_ptr<GraphViewer> src_graph_viewer,
+                           std::unique_ptr<IndexedSubGraph> src_indexed_sub_graph,
+                           /*out*/ std::unique_ptr<EpGraph>& result) {
+      auto& graph_viewer = *src_graph_viewer.get();
+      auto ep_graph = std::make_unique<EpGraph>(std::move(src_graph_viewer),
+                                                std::move(src_indexed_sub_graph),
+                                                PrivateTag{});
+      return CreateImpl(std::move(ep_graph), graph_viewer, result);
+    }
+    Status EpGraph::CreateImpl(std::unique_ptr<EpGraph> ep_graph, const GraphViewer& graph_viewer, /*out*/ std::unique_ptr<EpGraph>& result) {
       AllocatorPtr initializer_allocator = CPUAllocator::DefaultInstance();
       std::unordered_map<std::string, std::unique_ptr<EpValueInfo>> value_infos_map;
@@ Expand Down @@

onnxruntime/core/graph/ep_api_types.h

-Original file line number
+Diff line change
@@ Expand Up / @@ -251,15 +251,32 @@ struct EpGraph : public OrtGraph { @@
      public:
       EpGraph(const GraphViewer& graph_viewer, PrivateTag);
+      EpGraph(std::unique_ptr<GraphViewer> graph_viewer,
+              std::unique_ptr<IndexedSubGraph> indexed_sub_graph,
+              PrivateTag);
       /// <summary>
       /// Creates an instance of EpGraph, which wraps a GraphViewer.
+      /// This call is used when creating an EpGraph from a GraphViewer instance. The GraphViewer instance is not onwed by this EpGraph.
       /// </summary>
       /// <param name="graph_viewer"></param>
       /// <param name="result"></param>
       /// <returns></returns>
       static Status Create(const GraphViewer& graph_viewer, /*out*/ std::unique_ptr<EpGraph>& result);
+      /// <summary>
+      /// Creates an instance of EpGraph, which wraps a GraphViewer.
+      /// This call is used when creating an EpGraph from a subset of nodes in another EpGraph.
+      /// In this case, due to the implementation of OrtApis::Graph_GetGraphView, the new EpGraph instance
+      /// must take ownership of both the GraphViewer and IndexedSubGraph.
+      /// </summary>
+      /// <param name="graph_viewer"></param>
+      /// <param name="result"></param>
+      /// <returns></returns>
+      static Status Create(std::unique_ptr<GraphViewer> graph_viewer,
+                           std::unique_ptr<IndexedSubGraph> indexed_sub_graph,
+                           /*out*/ std::unique_ptr<EpGraph>& result);
       // Defines ToExternal() and ToInternal() functions to convert between OrtGraph and EpGraph.
       DEFINE_ORT_GRAPH_IR_TO_EXTERNAL_INTERNAL_FUNCS(OrtGraph, EpGraph, OrtGraphIrApi::kEpApi)
@@ Expand Down Expand Up / @@ -331,9 +348,22 @@ struct EpGraph : public OrtGraph { @@
       const OrtValue* GetInitializerValue(std::string_view name) const;
      private:
+      /// <summary>
+      /// The real implementation of creating an EpGraph instance.
+      /// Please use one of the above 'Create' functions that internally call this function, and avoid calling this function directly.
+      /// </summary>
+      /// <param name="ep_graph"></param>
+      /// <param name="graph_viewer"></param>
+      /// <param name="result"></param>
+      /// <returns></returns>
+      static Status CreateImpl(std::unique_ptr<EpGraph> ep_graph, const GraphViewer& graph_viewer, /*out*/ std::unique_ptr<EpGraph>& result);
       const GraphViewer& graph_viewer_;
       const EpNode* parent_node_ = nullptr;
+      std::unique_ptr<GraphViewer> owned_graph_viewer_ = nullptr;
+      std::unique_ptr<IndexedSubGraph> owned_indexed_sub_graph_ = nullptr;
       std::vector<std::unique_ptr<EpNode>> nodes_;
       IndexToEpNodeMap index_to_ep_node_;
@@ Expand Down @@

onnxruntime/core/graph/graph.cc

-Original file line number
+Diff line change
@@ Expand Up @@
       return node_arg;
     }
+    const NodeArg* Graph::GetNodeArgIncludingParentGraphs(const std::string& node_arg_name) const {
+      return const_cast<Graph*>(this)->GetNodeArgIncludingParentGraphs(node_arg_name);
+    }
     void Graph::ReverseDFSFrom(gsl::span<NodeIndex const> from,
                                const std::function<void(const Node*)>& enter,
                                const std::function<void(const Node*)>& leave,
@@ Expand Down @@

onnxruntime/core/graph/graph_viewer.cc

-Original file line number
+Diff line change
@@ Expand Up @@
         filtered_node_inputs_including_initializers_.reserve(metadef->inputs.size());
         for (const auto& input : metadef->inputs) {
-          const auto* nodearg = graph.GetNodeArg(input);
+          // NodeArgs from the current scope or any outer scopes should be handled correctly.
+          //
+          // There is an edge case where the model consists of a graph with subgraphs nested across three levels.
+          // In this scenario, a third-layer subgraph consumes an input from the first-layer graph (not an initializer).
+          // When constructing a new GraphViewer for the second- and third-layer subgraphs,
+          // the second-layer graph may not have the corresponding value_info for that first-layer input,
+          // because the second-layer graph itself doesn't consume it.
+          // Therefore, when working within the second-layer graph, we need to search outer scopes for the missing value_info.
+          const auto* nodearg = graph.GetNodeArgIncludingParentGraphs(input);
           ORT_ENFORCE(nodearg, "Mismatch between Graph and IndexedSubGraph. Input not found:", input);
           filtered_node_inputs_including_initializers_.push_back(nodearg);
           if (!graph.IsInitializedTensor(input)) {
@@ Expand All @@
         }
         for (const auto& output : metadef->outputs) {
-          const auto* nodearg = graph.GetNodeArg(output);
+          const auto* nodearg = graph.GetNodeArgIncludingParentGraphs(output);
           ORT_ENFORCE(nodearg, "Mismatch between Graph and IndexedSubGraph. Output not found:", output);
           filtered_node_outputs_.push_back(nodearg);
         }
@@ Expand Down @@

onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc

-Original file line number
+Diff line change
@@ Expand Up @@
       AllocatorCreationInfo pinned_allocator_info(
           [](OrtDevice::DeviceId device_id) {
-            return std::make_unique<CUDAPinnedAllocator>(device_id, CUDA_PINNED);
+            return std::make_unique<CUDAPinnedAllocator>(CUDA_PINNED, device_id);
           },
           narrow<OrtDevice::DeviceId>(device_id_));
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[pull] main from microsoft:main #152

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!