diff --git a/include/onnxruntime/core/framework/op_kernel.h b/include/onnxruntime/core/framework/op_kernel.h index 8ec94c67cc0a4..42e8e9c5e3cbe 100644 --- a/include/onnxruntime/core/framework/op_kernel.h +++ b/include/onnxruntime/core/framework/op_kernel.h @@ -105,9 +105,9 @@ class OpKernel { return Status::OK(); } - // Note: New implementations should override OpKernel::UseSharedPrePackedBuffers_V2 instead. // Override this function to use provided pre-packed weight. // Status UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + // gsl::span prepacked_buffer_sizes, // int input_idx, // /*out*/ bool& used_shared_buffers) { // used_shared_buffers = true; @@ -121,37 +121,18 @@ class OpKernel { // and must use the same order for retrieval in UseSharedPrePackedBuffers(). Though each element // of this vector is a BufferUniquePtr, the deleter of the BufferUniquePtr is NULL. So actually they // are raw pointers. + // @param prepacked_buffer_sizes: The sizes (in bytes) of each buffer in prepacked_buffers. // @param input_idx: The input index of the tensor in this kernel // @param used_shared_buffers: Boolean flag set by the kernel implementation indicating // that the provided weight has been used by the kernel. virtual Status UseSharedPrePackedBuffers(std::vector& /*prepacked_buffers*/, + gsl::span /*prepacked_buffer_sizes*/, int /*input_idx*/, /*out*/ bool& used_shared_buffers) { used_shared_buffers = false; return Status::OK(); } - /// - /// Version 2 of OpKernel::UseSharedPrePackedBuffers() that additionally accepts the buffer sizes as a parameter. - /// The default implementation of this function just calls directly to OpKernel::UseSharedPrePackedBuffers() - /// to avoid the need to update all existing kernel-based provider-bridge EPs. - /// - /// TODO: Consolidate UseSharedPrePackedBuffers and UseSharedPrePackedBuffers_V2 into a single function, - /// which will require updating kernel-based provider-bridge EPs (cpu, cuda, webgpu). - /// - /// - /// - /// - /// - /// - /// - virtual Status UseSharedPrePackedBuffers_V2(std::vector& prepacked_buffers, - gsl::span /*prepacked_buffer_sizes*/, - int input_idx, - /*out*/ bool& used_shared_buffers) { - return UseSharedPrePackedBuffers(prepacked_buffers, input_idx, used_shared_buffers); - } - const OrtDevice GetDevice(OrtMemType mem_type) const; const OpKernelInfo& Info() const { return *op_kernel_info_; diff --git a/onnxruntime/contrib_ops/cpu/bert/attention.cc b/onnxruntime/contrib_ops/cpu/bert/attention.cc index 7268b32623b95..e1981fb5c2442 100644 --- a/onnxruntime/contrib_ops/cpu/bert/attention.cc +++ b/onnxruntime/contrib_ops/cpu/bert/attention.cc @@ -34,6 +34,7 @@ class Attention : public OpKernel, public AttentionCPUBase { /*out*/ PrePackedWeights* prepacked_weights) override; Status UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, int input_idx, /*out*/ bool& used_shared_buffers) override; @@ -176,6 +177,7 @@ Status Attention::PrePack(const Tensor& weights, int input_idx, AllocatorPtr template Status Attention::UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, int input_idx, /*out*/ bool& used_shared_buffers) { if (1 != input_idx) { diff --git a/onnxruntime/contrib_ops/cpu/moe/moe_quantization_cpu.cc b/onnxruntime/contrib_ops/cpu/moe/moe_quantization_cpu.cc index ca2c3ab001da6..a674d05b6daae 100644 --- a/onnxruntime/contrib_ops/cpu/moe/moe_quantization_cpu.cc +++ b/onnxruntime/contrib_ops/cpu/moe/moe_quantization_cpu.cc @@ -578,10 +578,10 @@ Status QMoECPU::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr all } template -Status QMoECPU::UseSharedPrePackedBuffers_V2(std::vector& prepacked_buffers, - gsl::span /*prepacked_buffer_sizes*/, - int input_idx, - /*out*/ bool& used_shared_buffers) { +Status QMoECPU::UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, + int input_idx, + /*out*/ bool& used_shared_buffers) { used_shared_buffers = false; if (expert_weight_bits_ != 4) { @@ -1577,11 +1577,11 @@ template QMoECPU::QMoECPU(const OpKernelInfo& op_kernel_info); template Status QMoECPU::Compute(OpKernelContext* context) const; template Status QMoECPU::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc, bool& is_packed, PrePackedWeights* prepacked_weights); -template Status QMoECPU::UseSharedPrePackedBuffers_V2(std::vector& prepacked_buffers, gsl::span prepacked_buffer_sizes, int input_idx, bool& used_shared_buffers); +template Status QMoECPU::UseSharedPrePackedBuffers(std::vector& prepacked_buffers, gsl::span prepacked_buffer_sizes, int input_idx, bool& used_shared_buffers); template QMoECPU::QMoECPU(const OpKernelInfo& op_kernel_info); template Status QMoECPU::Compute(OpKernelContext* context) const; template Status QMoECPU::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc, bool& is_packed, PrePackedWeights* prepacked_weights); -template Status QMoECPU::UseSharedPrePackedBuffers_V2(std::vector& prepacked_buffers, gsl::span prepacked_buffer_sizes, int input_idx, bool& used_shared_buffers); +template Status QMoECPU::UseSharedPrePackedBuffers(std::vector& prepacked_buffers, gsl::span prepacked_buffer_sizes, int input_idx, bool& used_shared_buffers); // Kernel Registration ONNX_OPERATOR_TYPED_KERNEL_EX( diff --git a/onnxruntime/contrib_ops/cpu/moe/moe_quantization_cpu.h b/onnxruntime/contrib_ops/cpu/moe/moe_quantization_cpu.h index f678a27190c90..c5e6904ae48c2 100644 --- a/onnxruntime/contrib_ops/cpu/moe/moe_quantization_cpu.h +++ b/onnxruntime/contrib_ops/cpu/moe/moe_quantization_cpu.h @@ -32,10 +32,10 @@ class QMoECPU final : public OpKernel, public MoEBaseCPU { /*out*/ bool& is_packed, /*out*/ PrePackedWeights* prepacked_weights) override; - Status UseSharedPrePackedBuffers_V2(std::vector& prepacked_buffers, - gsl::span prepacked_buffer_sizes, - int input_idx, - /*out*/ bool& used_shared_buffers) override; + Status UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span prepacked_buffer_sizes, + int input_idx, + /*out*/ bool& used_shared_buffers) override; void ApplyActivationVectorized(float* data, int64_t size) const; diff --git a/onnxruntime/contrib_ops/cpu/quantization/attention_quant.cc b/onnxruntime/contrib_ops/cpu/quantization/attention_quant.cc index b30fa1e5e618a..931677582d469 100644 --- a/onnxruntime/contrib_ops/cpu/quantization/attention_quant.cc +++ b/onnxruntime/contrib_ops/cpu/quantization/attention_quant.cc @@ -28,6 +28,7 @@ class QAttention : public OpKernel, public AttentionCPUBase { /*out*/ PrePackedWeights* prepacked_weights) override; Status UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, int input_idx, /*out*/ bool& used_shared_buffers) override; @@ -117,6 +118,7 @@ Status QAttention::PrePack(const Tensor& weights, int input_idx, AllocatorPtr template Status QAttention::UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, int input_idx, /*out*/ bool& used_shared_buffers) { if (1 != input_idx) { diff --git a/onnxruntime/contrib_ops/cpu/quantization/dynamic_quantize_lstm.cc b/onnxruntime/contrib_ops/cpu/quantization/dynamic_quantize_lstm.cc index f55e66f9c5d81..2094af78f40b7 100644 --- a/onnxruntime/contrib_ops/cpu/quantization/dynamic_quantize_lstm.cc +++ b/onnxruntime/contrib_ops/cpu/quantization/dynamic_quantize_lstm.cc @@ -17,6 +17,7 @@ class DynamicQuantizeLSTM : public OpKernel, public LSTMBase { /*out*/ PrePackedWeights* prepacked_weights) override; Status UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, int input_idx, /*out*/ bool& used_shared_buffers) override; @@ -117,6 +118,7 @@ Status DynamicQuantizeLSTM::PrePack(const Tensor& tensor, int input_idx, Allocat } Status DynamicQuantizeLSTM::UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, int input_idx, /*out*/ bool& used_shared_buffers) { used_shared_buffers = false; diff --git a/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc b/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc index d2996b122c5f7..3da0ee19d4cde 100644 --- a/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc +++ b/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc @@ -135,7 +135,9 @@ class MatMulNBits final : public OpKernel { /*out*/ bool& is_packed, /*out*/ PrePackedWeights* prepacked_weights) override; - Status UseSharedPrePackedBuffers(std::vector& prepacked_buffers, int input_idx, + Status UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, + int input_idx, /*out*/ bool& used_shared_buffers) override; private: @@ -557,7 +559,9 @@ Status MatMulNBits::PrePack(const Tensor& tensor, int input_idx, /*ou #endif // end !MLAS_F16VEC_INTRINSICS_SUPPORTED || !MLAS_TARGET_ARM64 template -Status MatMulNBits::UseSharedPrePackedBuffers(std::vector& prepacked_buffers, int input_idx, +Status MatMulNBits::UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, + int input_idx, /*out*/ bool& used_shared_buffers) { used_shared_buffers = false; diff --git a/onnxruntime/core/framework/session_state.cc b/onnxruntime/core/framework/session_state.cc index 5c33a621cf514..84521af2d8532 100644 --- a/onnxruntime/core/framework/session_state.cc +++ b/onnxruntime/core/framework/session_state.cc @@ -436,8 +436,8 @@ static Status KernelUseSharedPrePackedBuffers(OpKernel& kernel, int input_idx, } bool used_shared_buffers = false; - ORT_RETURN_IF_ERROR(kernel.UseSharedPrePackedBuffers_V2(shared_prepacked_buffers, shared_prepacked_buffer_sizes, - input_idx, used_shared_buffers)); + ORT_RETURN_IF_ERROR(kernel.UseSharedPrePackedBuffers(shared_prepacked_buffers, shared_prepacked_buffer_sizes, + input_idx, used_shared_buffers)); // BUG CHECK: Ensure that the kernel used the provided shared buffers // Mostly a debug check to ensure that the kernel has an overridden implementation of the diff --git a/onnxruntime/core/providers/acl/math/matmul.cc b/onnxruntime/core/providers/acl/math/matmul.cc index 468b394471c13..029a9ebe2768a 100644 --- a/onnxruntime/core/providers/acl/math/matmul.cc +++ b/onnxruntime/core/providers/acl/math/matmul.cc @@ -269,6 +269,7 @@ Status MatMul::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc, } Status MatMul::UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, int input_idx, /*out*/ bool& used_shared_buffers) { used_shared_buffers = false; if (input_idx != 1) { diff --git a/onnxruntime/core/providers/acl/math/matmul.h b/onnxruntime/core/providers/acl/math/matmul.h index b137e33833de9..783e15585ebf5 100644 --- a/onnxruntime/core/providers/acl/math/matmul.h +++ b/onnxruntime/core/providers/acl/math/matmul.h @@ -34,6 +34,7 @@ class MatMul : public OpKernel { bool& is_packed, PrePackedWeights*) override; Status UseSharedPrePackedBuffers(std::vector&, + gsl::span, int, bool&) override; Status Compute(OpKernelContext* context) const override; diff --git a/onnxruntime/core/providers/acl/nn/conv.cc b/onnxruntime/core/providers/acl/nn/conv.cc index a62158f1c26ee..5cc10f7cfd2a8 100644 --- a/onnxruntime/core/providers/acl/nn/conv.cc +++ b/onnxruntime/core/providers/acl/nn/conv.cc @@ -370,6 +370,7 @@ Status Conv::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc, } Status Conv::UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, int input_idx, /*out*/ bool& used_shared_buffers) { used_shared_buffers = false; if (isQuantized ? (input_idx != 3) : (input_idx != 1)) { diff --git a/onnxruntime/core/providers/acl/nn/conv.h b/onnxruntime/core/providers/acl/nn/conv.h index b05ba5363542f..7af086a410857 100644 --- a/onnxruntime/core/providers/acl/nn/conv.h +++ b/onnxruntime/core/providers/acl/nn/conv.h @@ -36,6 +36,7 @@ class Conv : public onnxruntime::OpKernel { bool& is_packed, PrePackedWeights*) override; Status UseSharedPrePackedBuffers(std::vector&, + gsl::span, int, bool&) override; Status Compute(OpKernelContext* context) const override; diff --git a/onnxruntime/core/providers/cpu/fp16/fp16_conv.cc b/onnxruntime/core/providers/cpu/fp16/fp16_conv.cc index 790b1543bbd74..08dbc46213f65 100644 --- a/onnxruntime/core/providers/cpu/fp16/fp16_conv.cc +++ b/onnxruntime/core/providers/cpu/fp16/fp16_conv.cc @@ -54,6 +54,7 @@ class FusedConvFp16 final : public OpKernel { /*out*/ bool& is_packed, /*out*/ PrePackedWeights* prepacked_weights) override; Status UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, int input_idx, /*out*/ bool& used_shared_buffers) override; @@ -211,6 +212,7 @@ Status FusedConvFp16::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr } Status FusedConvFp16::UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, int input_idx, /*out*/ bool& used_shared_buffers) { if (input_idx != 1) { diff --git a/onnxruntime/core/providers/cpu/math/gemm.cc b/onnxruntime/core/providers/cpu/math/gemm.cc index ac931c76ee3ae..c0da9aec1e1b1 100644 --- a/onnxruntime/core/providers/cpu/math/gemm.cc +++ b/onnxruntime/core/providers/cpu/math/gemm.cc @@ -296,6 +296,7 @@ Status Gemm::PrePack(const Tensor& tensor, int input_idx, template Status Gemm::UseSharedPrePackedBuffers(std::vector& /*prepacked_buffers*/, + gsl::span /*prepacked_buffer_sizes*/, int /*input_idx*/, /*out*/ bool& used_shared_buffers) { used_shared_buffers = false; @@ -304,6 +305,7 @@ Status Gemm::UseSharedPrePackedBuffers(std::vector& /*prepac template <> Status Gemm::UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, int input_idx, /*out*/ bool& used_shared_buffers) { used_shared_buffers = false; diff --git a/onnxruntime/core/providers/cpu/math/gemm.h b/onnxruntime/core/providers/cpu/math/gemm.h index c65f3eb96f62e..d9e66df4bee7c 100644 --- a/onnxruntime/core/providers/cpu/math/gemm.h +++ b/onnxruntime/core/providers/cpu/math/gemm.h @@ -37,6 +37,7 @@ class Gemm : protected GemmBase, public OpKernel { /*out*/ PrePackedWeights* prepacked_weights) override; Status UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, int input_idx, /*out*/ bool& used_shared_buffers) override; diff --git a/onnxruntime/core/providers/cpu/math/matmul.cc b/onnxruntime/core/providers/cpu/math/matmul.cc index 8a7795a81027d..8dea41e3488e2 100644 --- a/onnxruntime/core/providers/cpu/math/matmul.cc +++ b/onnxruntime/core/providers/cpu/math/matmul.cc @@ -220,6 +220,7 @@ Status MatMul::PrePack(const Tensor& tensor, int input_idx, /*out*/ Alloc } Status MatMul::UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, int input_idx, /*out*/ bool& used_shared_buffers) { used_shared_buffers = false; diff --git a/onnxruntime/core/providers/cpu/math/matmul.h b/onnxruntime/core/providers/cpu/math/matmul.h index 7f2d2ee400b63..9e6ef1a486235 100644 --- a/onnxruntime/core/providers/cpu/math/matmul.h +++ b/onnxruntime/core/providers/cpu/math/matmul.h @@ -47,7 +47,9 @@ class MatMul final : public OpKernel { /*out*/ bool& is_packed, /*out*/ PrePackedWeights* prepacked_weights) override; - Status UseSharedPrePackedBuffers(std::vector& prepacked_buffers, int input_idx, + Status UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, + int input_idx, /*out*/ bool& used_shared_buffers) override; Status Compute(OpKernelContext* context) const override; diff --git a/onnxruntime/core/providers/cpu/nn/conv_transpose.cc b/onnxruntime/core/providers/cpu/nn/conv_transpose.cc index 6ebd12a525371..bbb530d037cec 100644 --- a/onnxruntime/core/providers/cpu/nn/conv_transpose.cc +++ b/onnxruntime/core/providers/cpu/nn/conv_transpose.cc @@ -102,6 +102,7 @@ Status ConvTranspose::PrePack(const Tensor& tensor, int input_idx, Alloca template Status ConvTranspose::UseSharedPrePackedBuffers(std::vector& /*prepacked_buffers*/, + gsl::span /*prepacked_buffer_sizes*/, int /*input_idx*/, /*out*/ bool& used_shared_buffers) { used_shared_buffers = false; @@ -110,6 +111,7 @@ Status ConvTranspose::UseSharedPrePackedBuffers(std::vector& template <> Status ConvTranspose::UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, int input_idx, /*out*/ bool& used_shared_buffers) { used_shared_buffers = false; diff --git a/onnxruntime/core/providers/cpu/nn/conv_transpose.h b/onnxruntime/core/providers/cpu/nn/conv_transpose.h index fd6021e65670e..96e3ecf912f32 100644 --- a/onnxruntime/core/providers/cpu/nn/conv_transpose.h +++ b/onnxruntime/core/providers/cpu/nn/conv_transpose.h @@ -35,6 +35,7 @@ class ConvTranspose : public OpKernel { /*out*/ PrePackedWeights* prepacked_weights) override; Status UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, int input_idx, /*out*/ bool& used_shared_buffers) override; diff --git a/onnxruntime/core/providers/cpu/quantization/matmul_integer_base.h b/onnxruntime/core/providers/cpu/quantization/matmul_integer_base.h index fb86e9731035c..9916c426a54fe 100644 --- a/onnxruntime/core/providers/cpu/quantization/matmul_integer_base.h +++ b/onnxruntime/core/providers/cpu/quantization/matmul_integer_base.h @@ -80,6 +80,7 @@ class MatMulIntegerBase : public OpKernel { } Status UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, int input_idx, /*out*/ bool& used_shared_buffers) override { used_shared_buffers = false; diff --git a/onnxruntime/core/providers/cpu/quantization/qlinearconv.cc b/onnxruntime/core/providers/cpu/quantization/qlinearconv.cc index 24c8b0d57294e..a5e3d4b04a1e3 100644 --- a/onnxruntime/core/providers/cpu/quantization/qlinearconv.cc +++ b/onnxruntime/core/providers/cpu/quantization/qlinearconv.cc @@ -30,6 +30,7 @@ class QLinearConv : public OpKernel { /*out*/ PrePackedWeights* prepacked_weights) override; Status UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, int input_idx, /*out*/ bool& used_shared_buffers) override; @@ -495,6 +496,7 @@ Status QLinearConv::PrePack(const Tensor& tensor, int input_idx, Alloca template Status QLinearConv::UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, int input_idx, /*out*/ bool& used_shared_buffers) { if (input_idx != 3) { diff --git a/onnxruntime/core/providers/cpu/rnn/deep_cpu_gru.cc b/onnxruntime/core/providers/cpu/rnn/deep_cpu_gru.cc index d1ddd04a953ef..d5be6bd29592e 100644 --- a/onnxruntime/core/providers/cpu/rnn/deep_cpu_gru.cc +++ b/onnxruntime/core/providers/cpu/rnn/deep_cpu_gru.cc @@ -322,6 +322,7 @@ Status DeepCpuGruOp::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr a } Status DeepCpuGruOp::UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, int input_idx, /*out*/ bool& used_shared_buffers) { used_shared_buffers = false; diff --git a/onnxruntime/core/providers/cpu/rnn/deep_cpu_gru.h b/onnxruntime/core/providers/cpu/rnn/deep_cpu_gru.h index 881adf9efb376..fa233cc6f9cde 100644 --- a/onnxruntime/core/providers/cpu/rnn/deep_cpu_gru.h +++ b/onnxruntime/core/providers/cpu/rnn/deep_cpu_gru.h @@ -69,6 +69,7 @@ class DeepCpuGruOp final : public OpKernel { /*out*/ PrePackedWeights* prepacked_weights) override; Status UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, int input_idx, /*out*/ bool& used_shared_buffers) override; diff --git a/onnxruntime/core/providers/cpu/rnn/deep_cpu_lstm.cc b/onnxruntime/core/providers/cpu/rnn/deep_cpu_lstm.cc index 4b3ea672c0812..d2520804bb64c 100644 --- a/onnxruntime/core/providers/cpu/rnn/deep_cpu_lstm.cc +++ b/onnxruntime/core/providers/cpu/rnn/deep_cpu_lstm.cc @@ -260,6 +260,7 @@ Status DeepCpuLstmOp::PrePack(const Tensor& tensor, int input_idx, } Status DeepCpuLstmOp::UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, int input_idx, /*out*/ bool& used_shared_buffers) { used_shared_buffers = false; diff --git a/onnxruntime/core/providers/cpu/rnn/deep_cpu_lstm.h b/onnxruntime/core/providers/cpu/rnn/deep_cpu_lstm.h index c949b62ce7186..487e2a3fb8129 100644 --- a/onnxruntime/core/providers/cpu/rnn/deep_cpu_lstm.h +++ b/onnxruntime/core/providers/cpu/rnn/deep_cpu_lstm.h @@ -24,6 +24,7 @@ class DeepCpuLstmOp final : public OpKernel, public LSTMBase { /*out*/ PrePackedWeights* prepacked_weights) override; Status UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, int input_idx, /*out*/ bool& used_shared_buffers) override; diff --git a/onnxruntime/core/session/plugin_ep/ep_kernel_registration.cc b/onnxruntime/core/session/plugin_ep/ep_kernel_registration.cc index 625645e71cfec..6f29361502a73 100644 --- a/onnxruntime/core/session/plugin_ep/ep_kernel_registration.cc +++ b/onnxruntime/core/session/plugin_ep/ep_kernel_registration.cc @@ -126,9 +126,9 @@ class PluginEpOpKernel final : public controlflow::IControlFlowKernel { return Status::OK(); } - Status UseSharedPrePackedBuffers_V2(std::vector& buffer_unique_ptrs, - gsl::span buffer_sizes, - int input_idx, /*out*/ bool& used_shared_buffers) override { + Status UseSharedPrePackedBuffers(std::vector& buffer_unique_ptrs, + gsl::span buffer_sizes, + int input_idx, /*out*/ bool& used_shared_buffers) override { assert(kernel_impl_ != nullptr); // Should be ensured by PluginEpOpKernel::Create(). if (kernel_impl_->ort_version_supported < 24 || kernel_impl_->SetSharedPrePackedWeight == nullptr) { diff --git a/onnxruntime/test/framework/session_state_test.cc b/onnxruntime/test/framework/session_state_test.cc index 656b0ef86289d..418bb2a809259 100644 --- a/onnxruntime/test/framework/session_state_test.cc +++ b/onnxruntime/test/framework/session_state_test.cc @@ -662,6 +662,7 @@ class PrePackingTestOpKernel : public OpKernel { } Status UseSharedPrePackedBuffers(std::vector& prepacked_buffers, + gsl::span /*prepacked_buffer_sizes*/, int input_idx, /*out*/ bool& used_shared_buffers) override { ORT_UNUSED_PARAMETER(input_idx);