diff --git a/.github/workflows/ci-gpu.yaml b/.github/workflows/ci-gpu.yaml index 5ac34c1fc..bf766f12e 100644 --- a/.github/workflows/ci-gpu.yaml +++ b/.github/workflows/ci-gpu.yaml @@ -24,7 +24,7 @@ concurrency: env: LLVM_SHA_FILE: llvm-sha.txt - LLVM_CACHE_NUMBER: 1 # Increase to reset cache + LLVM_CACHE_NUMBER: 2 # Increase to reset cache jobs: # Water uses its own LLVM version independent of the one used by IREE and diff --git a/water/lib/Dialect/Wave/IR/WaveOps.cpp b/water/lib/Dialect/Wave/IR/WaveOps.cpp index 79dfd7626..0715338ca 100644 --- a/water/lib/Dialect/Wave/IR/WaveOps.cpp +++ b/water/lib/Dialect/Wave/IR/WaveOps.cpp @@ -112,7 +112,7 @@ bool wave::IterateOp::areTypesCompatible(mlir::Type lhs, mlir::Type rhs) { } mlir::OperandRange -wave::IterateOp::getEntrySuccessorOperands(mlir::RegionBranchPoint point) { +wave::IterateOp::getEntrySuccessorOperands(mlir::RegionSuccessor successor) { return getIterArgs(); } @@ -120,7 +120,7 @@ void wave::IterateOp::getSuccessorRegions( mlir::RegionBranchPoint point, ::llvm::SmallVectorImpl<::mlir::RegionSuccessor> ®ions) { // May branch into the region or bypass it regardless of the source. - regions.emplace_back(mlir::RegionSuccessor(getResults())); + regions.emplace_back(mlir::RegionSuccessor(getOperation(), getResults())); regions.emplace_back( mlir::RegionSuccessor(&getBody(), getBody().front().getArguments())); } @@ -544,6 +544,6 @@ LogicalResult WriteOp::verify() { //----------------------------------------------------------------------------- mlir::MutableOperandRange -wave::YieldOp::getMutableSuccessorOperands(mlir::RegionBranchPoint) { +wave::YieldOp::getMutableSuccessorOperands(mlir::RegionSuccessor) { return getValuesMutable(); } diff --git a/water/llvm-sha.txt b/water/llvm-sha.txt index f94b55e73..b3e9aa173 100644 --- a/water/llvm-sha.txt +++ b/water/llvm-sha.txt @@ -1 +1 @@ -ec3cf67434ba361124cfbb548e93589acd0d3cf2 +1e3a1ce911d1e5e3804b63e3ba3059c36eb697e5 diff --git a/water/python/CMakeLists.txt b/water/python/CMakeLists.txt index fcd7ddb25..1cc77432e 100644 --- a/water/python/CMakeLists.txt +++ b/water/python/CMakeLists.txt @@ -9,6 +9,7 @@ if (WATER_ENABLE_PYTHON) # Avoid clashing with IREE Python bindings set(MLIR_BINDINGS_PYTHON_NB_DOMAIN "water_mlir") + set(MLIR_BINDINGS_PYTHON_INSTALL_PREFIX "python_packages/water/water_mlir" CACHE STRING "" FORCE) add_compile_definitions("MLIR_PYTHON_PACKAGE_PREFIX=water_mlir.") declare_mlir_python_sources(WaterPythonSources) diff --git a/water/test/Transforms/lowered_gemm_pipelined.mlir b/water/test/Transforms/lowered_gemm_pipelined.mlir index 0fed3fb1c..7cecc9522 100644 --- a/water/test/Transforms/lowered_gemm_pipelined.mlir +++ b/water/test/Transforms/lowered_gemm_pipelined.mlir @@ -55,8 +55,8 @@ module attributes {transform.with_named_sequence} { %58 = vector.load %view[%5, %7] : memref<64x36xbf16, #gpu.address_space>, vector<8xbf16> %59 = vector.load %view_4[%8, %6] : memref<64x36xbf16, #gpu.address_space>, vector<8xbf16> %60 = vector.load %view_4[%8, %7] : memref<64x36xbf16, #gpu.address_space>, vector<8xbf16> - %61 = amdgpu.mfma %59 * %57 + %arg4 {blocks = 1 : i32, k = 16 : i32, m = 32 : i32, n = 32 : i32} blgp = none : vector<8xbf16>, vector<8xbf16>, vector<16xf32> - %62 = amdgpu.mfma %60 * %58 + %61 {blocks = 1 : i32, k = 16 : i32, m = 32 : i32, n = 32 : i32} blgp = none : vector<8xbf16>, vector<8xbf16>, vector<16xf32> + %61 = amdgpu.mfma 32x32x16 %59 * %57 + %arg4 {blocks = 1 : i32} blgp = none : vector<8xbf16>, vector<8xbf16>, vector<16xf32> + %62 = amdgpu.mfma 32x32x16 %60 * %58 + %61 {blocks = 1 : i32} blgp = none : vector<8xbf16>, vector<8xbf16>, vector<16xf32> scf.yield %62, %arg7, %arg8, %55, %56 : vector<16xf32>, vector<8xbf16>, vector<8xbf16>, vector<8xbf16>, vector<8xbf16> } %16 = vector.extract_strided_slice %15#0 {offsets = [0], sizes = [1], strides = [1]} : vector<16xf32> to vector<1xf32>