diff --git a/onnxruntime/core/providers/webgpu/webgpu_context.cc b/onnxruntime/core/providers/webgpu/webgpu_context.cc index 2f50fd8051b9c..7cb6a852e8d7e 100644 --- a/onnxruntime/core/providers/webgpu/webgpu_context.cc +++ b/onnxruntime/core/providers/webgpu/webgpu_context.cc @@ -628,17 +628,15 @@ void WebGpuContext::CollectProfilingData(profiling::Events& events) { for (size_t i = 0; i < pending_kernels.size(); i++) { const PendingKernelInfo& pending_kernel_info = pending_kernels[i]; - const auto& inputs = pending_kernel_info.inputs; - const auto& outputs = pending_kernel_info.outputs; + const auto& input_shapes = pending_kernel_info.input_shapes; + const auto& output_shapes = pending_kernel_info.output_shapes; SS(shapes, 128); - for (size_t s = 0; s < inputs.size(); s++) { - const auto& input = inputs[s]; - shapes << "inputs[" << s << "] = " << input.override_shape.ToString() << " "; + for (size_t s = 0; s < input_shapes.size(); s++) { + shapes << "inputs[" << s << "] = " << input_shapes[s].ToString() << " "; } - for (size_t s = 0; s < outputs.size(); s++) { - const auto& output = outputs[s]; - shapes << "outputs[" << s << "] = " << output.override_shape.ToString() << " "; + for (size_t s = 0; s < output_shapes.size(); s++) { + shapes << "outputs[" << s << "] = " << output_shapes[s].ToString() << " "; } if (gpu_timestamp_offset_ == 0) { diff --git a/onnxruntime/core/providers/webgpu/webgpu_context.h b/onnxruntime/core/providers/webgpu/webgpu_context.h index 8cc513680142d..9feea69f3702b 100644 --- a/onnxruntime/core/providers/webgpu/webgpu_context.h +++ b/onnxruntime/core/providers/webgpu/webgpu_context.h @@ -268,7 +268,17 @@ class WebGpuContext final { std::string_view cache_key, const std::vector& inputs, const std::vector& outputs) - : name{absl::StrJoin({kernel_name, kernel_type, program_name}, "&")}, cache_key{cache_key}, inputs{inputs}, outputs{outputs} {} + : name{absl::StrJoin({kernel_name, kernel_type, program_name}, "&")}, cache_key{cache_key} { + // Store shape information instead of tensor pointers to avoid accessing released tensors + input_shapes.reserve(inputs.size()); + for (const auto& input : inputs) { + input_shapes.emplace_back(input.use_override_shape ? input.override_shape : input.tensor->Shape()); + } + output_shapes.reserve(outputs.size()); + for (const auto& output : outputs) { + output_shapes.emplace_back(output.use_override_shape ? output.override_shape : output.tensor->Shape()); + } + } PendingKernelInfo(PendingKernelInfo&&) = default; PendingKernelInfo& operator=(PendingKernelInfo&&) = default; @@ -276,8 +286,8 @@ class WebGpuContext final { std::string name; std::string cache_key; - std::vector inputs; - std::vector outputs; + std::vector input_shapes; + std::vector output_shapes; }; struct PendingQueryInfo {