diff --git a/onnxruntime/contrib_ops/cuda/bert/gqa_unfused_attention.cu b/onnxruntime/contrib_ops/cuda/bert/gqa_unfused_attention.cu index f65a041a80630..8aac549aeba01 100644 --- a/onnxruntime/contrib_ops/cuda/bert/gqa_unfused_attention.cu +++ b/onnxruntime/contrib_ops/cuda/bert/gqa_unfused_attention.cu @@ -4,7 +4,7 @@ // GQA-capable unfused CUDA attention kernel. See header for contract. #include -#include +#include "core/providers/cuda/cu_inc/cub.cuh" #include #include #include "core/common/safeint.h"