diff --git a/benchmarks/cutlass_kernel/python_main.cpp b/benchmarks/cutlass_kernel/python_main.cpp index 5211735581..9f1e54c59e 100644 --- a/benchmarks/cutlass_kernel/python_main.cpp +++ b/benchmarks/cutlass_kernel/python_main.cpp @@ -139,19 +139,17 @@ static auto gemm_run(const at::Tensor &A, const at::Tensor &B, at::Tensor &C, size_t workspace_size = Gemm::get_workspace_size(arguments); cutlass::device_memory::allocation workspace(workspace_size); - { - CUTLASS_CHECK(gemm_op.can_implement(arguments)); - } - { - CUTLASS_CHECK(gemm_op.initialize(arguments, workspace.get())); - } - { - CUTLASS_CHECK(gemm_op.run()); - } + CUTLASS_CHECK(gemm_op.can_implement(arguments)); + CUTLASS_CHECK(gemm_op.initialize(arguments, workspace.get())); + CUTLASS_CHECK(gemm_op.run()); + + syclcompat::wait(); + } catch (std::exception &e) { std::cerr << "Runtime error: " << e.what() << std::endl; return -1; } catch (...) { + std::cerr << "Unexpected error" << std::endl; return -1; }