Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

error: One or more operations with large vector sizes (16384 bytes) were found #19058

Open
pdhirajkumarprasad opened this issue Nov 7, 2024 · 2 comments
Assignees
Labels
bug 🐞 Something isn't working

Comments

@pdhirajkumarprasad
Copy link

What happened?

For the given IR

module {
  func.func @torch_jit(%arg0: !torch.vtensor<[1,3,224,224],f32>,  %arg2: !torch.vtensor<[?,768,?],f32>) -> !torch.vtensor<[?,1000],f32> attributes {torch.onnx_meta.ir_version = 7 : si64, torch.onnx_meta.opset_version = 21 : si64, torch.onnx_meta.producer_name = "pytorch", torch.onnx_meta.producer_version = "1.12.1"} {
    %2 = torch.operator "onnx.Constant"() {torch.onnx.value = dense<0.0> : tensor<384x192x2x2xf32>} : () -> !torch.vtensor<[384,192,2,2],f32> 
    %3 = torch.operator "onnx.Constant"() {torch.onnx.value = dense<0.0> : tensor<384xf32>} : () -> !torch.vtensor<[384],f32> 
    %4 = torch.operator "onnx.Constant"() {torch.onnx.value = dense<0.0> : tensor<1000x768xf32>} : () -> !torch.vtensor<[1000,768],f32> 
    %5 = torch.operator "onnx.Constant"() {torch.onnx.value = dense<0.0> : tensor<1000xf32>} : () -> !torch.vtensor<[1000],f32> 
    %1 = torch.operator "onnx.Constant"() {torch.onnx.value = dense<0> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> 
    %6 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_onnx__Sub_3847> : tensor<si64>} : () -> !torch.vtensor<[],si64> 
    %none = torch.constant.none
    %7 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_> : tensor<si64>} : () -> !torch.vtensor<[],si64> 
    %8 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__1> : tensor<si64>} : () -> !torch.vtensor<[],si64> 
    %9 = torch.operator "onnx.Mod"(%7, %8) {torch.onnx.fmod = 0 : si64} : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> 
    %10 = torch.operator "onnx.Concat"(%1, %1) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[2],si64> 
    %11 = torch.operator "onnx.Unsqueeze"(%9, %1) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> 
    %12 = torch.operator "onnx.Concat"(%1, %11) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[2],si64> 
    %13 = torch.operator "onnx.Shape"(%10) : (!torch.vtensor<[2],si64>) -> !torch.vtensor<[1],si64> 
    %14 = torch.operator "onnx.Gather"(%13, %1) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> 
    %15 = torch.operator "onnx.Cast"(%12) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[2],si64>) -> !torch.vtensor<[2],si64> 
    %16 = torch.operator "onnx.ConstantOfShape"(%1) {torch.onnx.value = dense_resource<__7> : tensor<1xsi64>} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[6],si64> 
    %17 = torch.operator "onnx.Concat"(%15, %16) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[6],si64>) -> !torch.vtensor<[8],si64> 
    %18 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__8> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> 
    %19 = torch.operator "onnx.Reshape"(%17, %18) : (!torch.vtensor<[8],si64>, !torch.vtensor<[2],si64>) -> !torch.vtensor<[4,2],si64> 
    %20 = torch.operator "onnx.Slice"(%19, %1, %1, %1, %1) : (!torch.vtensor<[4,2],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4,2],si64> 
    %21 = torch.operator "onnx.Transpose"(%20) {torch.onnx.perm = [1 : si64, 0 : si64]} : (!torch.vtensor<[4,2],si64>) -> !torch.vtensor<[2,4],si64> 
    %22 = torch.operator "onnx.Reshape"(%21, %1) : (!torch.vtensor<[2,4],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[8],si64> 
    %23 = torch.operator "onnx.Cast"(%22) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[8],si64>) -> !torch.vtensor<[8],si64> 
    %24 = torch.operator "onnx.Pad"(%arg0, %23, %none) {torch.onnx.mode = "constant"} : (!torch.vtensor<[1,3,224,224],f32>, !torch.vtensor<[8],si64>, !torch.none) -> !torch.vtensor<[?,?,?,?],f32> 
    %25 = torch.operator "onnx.Conv"(%24, %2, %3) {torch.onnx.dilations = [1 : si64, 1 : si64], torch.onnx.group = 1 : si64, torch.onnx.kernel_shape = [2 : si64, 2 : si64], torch.onnx.pads = [0 : si64, 0 : si64, 0 : si64, 0 : si64], torch.onnx.strides = [2 : si64, 2 : si64]} : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[384,192,2,2],f32>, !torch.vtensor<[384],f32>) -> !torch.vtensor<[?,384,?,?],f32> 
    %26 = torch.operator "onnx.Shape"(%25) : (!torch.vtensor<[?,384,?,?],f32>) -> !torch.vtensor<[4],si64> 
    %27 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__364> : tensor<si64>} : () -> !torch.vtensor<[],si64> 
    %28 = torch.operator "onnx.Gather"(%26, %27) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> 
    %29 = torch.operator "onnx.Unsqueeze"(%28, %1) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> 
    %30 = torch.operator "onnx.Concat"(%29, %1, %1, %29) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> 
    %31 = torch.operator "onnx.Reshape"(%arg2, %30) : (!torch.vtensor<[?,768,?],f32>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,768,?,?],f32> 
    %32 = torch.operator "onnx.GlobalAveragePool"(%31) : (!torch.vtensor<[?,768,?,?],f32>) -> !torch.vtensor<[?,768,1,1],f32> 
    %33 = torch.operator "onnx.Flatten"(%32) {torch.onnx.axis = 1 : si64} : (!torch.vtensor<[?,768,1,1],f32>) -> !torch.vtensor<[?,768],f32> 
    %34 = torch.operator "onnx.Gemm"(%33, %4, %5) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[?,768],f32>, !torch.vtensor<[1000,768],f32>, !torch.vtensor<[1000],f32>) -> !torch.vtensor<[?,1000],f32> 
    return %34 : !torch.vtensor<[?,1000],f32>
  }
}

{-#
  dialect_resources: {
    builtin: {
      _onnx__Sub_3847: "0x080000000800000000000000",
      __364: "0x080000000000000000000000",
      __7: "0x080000000000000000000000",
      __8: "0x08000000FFFFFFFFFFFFFFFF0200000000000000",
      __1: "0x080000000400000000000000",
      _: "0x08000000E000000000000000"
    }
  }
#-}

getting error as

model.torch_onnx.mlir:37:11: error: One or more operations with large vector sizes (16384 bytes) were found:

    %34 = torch.operator "onnx.Gemm"(%33, %4, %5) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[?,768],f32>, !torch.vtensor<[1000,768],f32>, !torch.vtensor<[1000],f32>) -> !torch.vtensor<[?,1000],f32> 

iree version:

IREE (https://iree.dev):
IREE compiler version 20241105.1069 @ 17bba14
LLVM version 20.0.0git
Optimized build

Steps to reproduce your issue

command:

iree-compile --iree-hal-target-backends=llvm-cpu model.torch_onnx.mlir -o abc.vmfb --iree-llvmcpu-target-cpu=host

dump with '--mlir-print-ir-after-all --mlir-print-ir-before-all --mlir-disable-threading --mlir-elide-elementsattrs-if-larger=4'

dump.zip

What component(s) does this issue relate to?

Compiler

Version information

No response

Additional context

No response

@pashu123
Copy link
Contributor

pashu123 commented Nov 7, 2024

Fails with a different error in the latest main

<unknown>:0: error: invalid size -9223372036854775808 for !torch.tensor type
iree-compile: iree/third_party/llvm-project/mlir/include/mlir/IR/StorageUniquerSupport.h:180: static ConcreteT mlir::detail::StorageUserBase<mlir::torch::Torch::ValueTensorType, mlir::torch::Torch::BaseTensorType, mlir::torch::Torch::detail::ValueTensorTypeStorage, mlir::detail::TypeUniquer>::get(MLIRContext *, Args &&...) [ConcreteT = mlir::torch::Torch::ValueTensorType, BaseT = mlir::torch::Torch::BaseTensorType, StorageT = mlir::torch::Torch::detail::ValueTensorTypeStorage, UniquerT = mlir::detail::TypeUniquer, Traits = <>, Args = <std::optional<llvm::ArrayRef<long>> &, mlir::Type &, mlir::Attribute &>]: Assertion `succeeded( ConcreteT::verifyInvariants(getDefaultDiagnosticEmitFn(ctx), args...))' failed.
Please report issues to https://github.com/iree-org/iree/issues and include the crash backtrace.
Stack dump:
0.	Program arguments: iree-compile --iree-hal-target-backends=llvm-cpu new_onnx.mlir -o abc.vmfb --iree-llvmcpu-target-cpu=host
 #0 0x000076aef4672168 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) /home/prashant/iree/third_party/llvm-project/llvm/lib/Support/Unix/Signals.inc:723:13
 #1 0x000076aef4670380 llvm::sys::RunSignalHandlers() /home/prashant/iree/third_party/llvm-project/llvm/lib/Support/Signals.cpp:106:18
 #2 0x000076aef4672808 SignalHandler(int) /home/prashant/iree/third_party/llvm-project/llvm/lib/Support/Unix/Signals.inc:413:1
 #3 0x000076aeee842520 (/lib/x86_64-linux-gnu/libc.so.6+0x42520)
 #4 0x000076aeee8969fc __pthread_kill_implementation ./nptl/pthread_kill.c:44:76
 #5 0x000076aeee8969fc __pthread_kill_internal ./nptl/pthread_kill.c:78:10
 #6 0x000076aeee8969fc pthread_kill ./nptl/pthread_kill.c:89:10
 #7 0x000076aeee842476 gsignal ./signal/../sysdeps/posix/raise.c:27:6
 #8 0x000076aeee8287f3 abort ./stdlib/abort.c:81:7
 #9 0x000076aeee82871b _nl_load_domain ./intl/loadmsgcat.c:1177:9
#10 0x000076aeee839e96 (/lib/x86_64-linux-gnu/libc.so.6+0x39e96)
#11 0x000076aef5669f3c (/home/prashant/iree-build/lib/libIREECompiler.so+0x6669f3c)
#12 0x000076aef5666bb1 mlir::torch::Torch::ValueTensorType::get(mlir::MLIRContext*, std::optional<llvm::ArrayRef<long>>, mlir::Type, mlir::Attribute) /home/prashant/iree-build/compiler/plugins/input/Torch/torch-mlir/Dialect/Torch/IR/TorchTypes.cpp.inc:382:3
#13 0x000076aef5434fb9 mlir::TypeStorage::getAbstractType() /home/prashant/iree/third_party/llvm-project/mlir/include/mlir/IR/TypeSupport.h:173:5
#14 0x000076aef5434fb9 mlir::Type::getTypeID() /home/prashant/iree/third_party/llvm-project/mlir/include/mlir/IR/Types.h:117:37
#15 0x000076aef5434fb9 bool mlir::detail::StorageUserBase<mlir::torch::Torch::NonValueTensorType, mlir::torch::Torch::BaseTensorType, mlir::torch::Torch::detail::NonValueTensorTypeStorage, mlir::detail::TypeUniquer>::classof<mlir::Type>(mlir::Type) /home/prashant/iree/third_party/llvm-project/mlir/include/mlir/IR/StorageUniquerSupport.h:113:16
#16 0x000076aef5434fb9 llvm::CastInfo<mlir::torch::Torch::NonValueTensorType, mlir::Type const, void>::isPossible(mlir::Type) /home/prashant/iree/third_party/llvm-project/mlir/include/mlir/IR/Types.h:423:14
#17 0x000076aef5434fb9 bool llvm::isa<mlir::torch::Torch::NonValueTensorType, mlir::Type>(mlir::Type const&) /home/prashant/iree/third_party/llvm-project/llvm/include/llvm/Support/Casting.h:549:10
#18 0x000076aef5434fb9 bool llvm::isa<mlir::torch::Torch::NonValueTensorType, mlir::torch::Torch::ValueTensorType, mlir::Type>(mlir::Type const&) /home/prashant/iree/third_party/llvm-project/llvm/include/llvm/Support/Casting.h:554:10
#19 0x000076aef5434fb9 mlir::torch::Torch::BaseTensorType::classof(mlir::Type) /home/prashant/iree/third_party/torch-mlir/include/torch-mlir/Dialect/Torch/IR/TorchTypes.h:163:10
#20 0x000076aef5434fb9 llvm::CastInfo<mlir::torch::Torch::BaseTensorType, mlir::Type const, void>::isPossible(mlir::Type) /home/prashant/iree/third_party/llvm-project/mlir/include/mlir/IR/Types.h:423:14
#21 0x000076aef5434fb9 bool llvm::isa<mlir::torch::Torch::BaseTensorType, mlir::Type>(mlir::Type const&) /home/prashant/iree/third_party/llvm-project/llvm/include/llvm/Support/Casting.h:549:10
#22 0x000076aef5434fb9 decltype(auto) llvm::cast<mlir::torch::Torch::BaseTensorType, mlir::Type>(mlir::Type const&) /home/prashant/iree/third_party/llvm-project/llvm/include/llvm/Support/Casting.h:566:3
#23 0x000076aef5434fb9 refineShapeCalculateResult(mlir::torch::Torch

@pdhirajkumarprasad
Copy link
Author

@pashu123 this issue is there even with today's build so likely some local change in your sandbox?

IREE (https://iree.dev):
IREE compiler version 20241107.1071 @ e4f3960
LLVM version 20.0.0git
Optimized build

pdhirajkumarprasad pushed a commit to nod-ai/SHARK-TestSuite that referenced this issue Nov 7, 2024
1. Three `jx_nest_*` models will newly pass with basic optimizations. 
2. Three `twins_svt_*` models will now pass iree inference invocation,
but fail numerics quite terribly.
3. The `bat_resnext` model will still fail compile due to a similar
error as in <iree-org/iree#19058>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug 🐞 Something isn't working
Projects
None yet
Development

No branches or pull requests

2 participants