Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build_tools/llvm_version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
e00853859e89114d8db24aa0b863b618175f79c7
eb6da944af31dd684be3ab2f93f453a3837a72c6
1 change: 0 additions & 1 deletion include/TPP/Dialect/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
add_subdirectory(Check)
add_subdirectory(Perf)
add_subdirectory(Tune)
add_subdirectory(Xsmm)
9 changes: 0 additions & 9 deletions include/TPP/Dialect/Tune/CMakeLists.txt

This file was deleted.

26 changes: 0 additions & 26 deletions include/TPP/Dialect/Tune/TuneDialect.h

This file was deleted.

35 changes: 0 additions & 35 deletions include/TPP/Dialect/Tune/TuneDialect.td

This file was deleted.

17 changes: 0 additions & 17 deletions include/TPP/Dialect/Tune/TuneTransformOps.h

This file was deleted.

25 changes: 0 additions & 25 deletions include/TPP/Dialect/Tune/TuneTransformOps.td

This file was deleted.

1 change: 0 additions & 1 deletion lib/TPP-CAPI/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ add_mlir_public_c_api_library(TPPCAPI
LINK_LIBS PUBLIC
TPPCheckDialect
TPPPerfDialect
TPPTuneDialect
TPPXsmmDialect
TPPTransforms
TPPPipeline
Expand Down
1 change: 0 additions & 1 deletion lib/TPP/Dialect/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
add_subdirectory(Check)
add_subdirectory(Perf)
add_subdirectory(Tune)
add_subdirectory(Xsmm)
13 changes: 0 additions & 13 deletions lib/TPP/Dialect/Tune/CMakeLists.txt

This file was deleted.

13 changes: 0 additions & 13 deletions lib/TPP/Dialect/Tune/TransformOps/CMakeLists.txt

This file was deleted.

49 changes: 0 additions & 49 deletions lib/TPP/Dialect/Tune/TransformOps/TuneTransformOps.cpp

This file was deleted.

21 changes: 0 additions & 21 deletions lib/TPP/Dialect/Tune/TuneDialect.cpp

This file was deleted.

6 changes: 3 additions & 3 deletions lib/TPP/GPU/GpuDataTransfer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,19 +146,19 @@ static FailureOr<Value> transferMemref(RewriterBase &rewriter,
return failure();
gpuBuffer = *newBuffer;
}
rewriter.create<gpu::MemcpyOp>(loc, std::nullopt, ValueRange{}, gpuBuffer,
rewriter.create<gpu::MemcpyOp>(loc, ValueRange{}, ValueRange{}, gpuBuffer,
hostBuffer);

// If requested, copy data back to the host.
if (copyDataBack) {
rewriter.setInsertionPointAfter(launchFuncOp);
rewriter.create<gpu::MemcpyOp>(loc, std::nullopt, ValueRange{}, hostBuffer,
rewriter.create<gpu::MemcpyOp>(loc, ValueRange{}, ValueRange{}, hostBuffer,
gpuBuffer);
}

// Cleanup device buffer.
rewriter.setInsertionPoint(block.getTerminator());
rewriter.create<gpu::DeallocOp>(loc, std::nullopt, gpuAlloc.getMemref());
rewriter.create<gpu::DeallocOp>(loc, ValueRange{}, gpuAlloc.getMemref());

return gpuBuffer;
}
Expand Down
5 changes: 3 additions & 2 deletions lib/TPP/GPU/LinalgToXeGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,10 @@ static std::optional<Value> lowerGenericOp(linalg::GenericOp genericOp,
if (isa<FloatType>(eltType)) {
auto floatType = cast<FloatType>(eltType);
zeroConst = rewriter.create<arith::ConstantFloatOp>(
loc, APFloat::getZero(floatType.getFloatSemantics()), floatType);
loc, floatType, APFloat::getZero(floatType.getFloatSemantics()));
} else if (isa<IntegerType>(eltType)) {
zeroConst = rewriter.create<arith::ConstantIntOp>(loc, 0, eltType);
auto intType = llvm::dyn_cast<mlir::IntegerType>(eltType);
zeroConst = rewriter.create<arith::ConstantIntOp>(loc, 0, intType.getWidth());
} else {
// Unhandled type. Bail out.
return std::nullopt;
Expand Down
12 changes: 6 additions & 6 deletions lib/TPP/Runner/MLIRBench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,13 +190,13 @@ Value MLIRBench::registerOnGpu(Value buf, MemRefType memRefTy) {
if (backend == "intel") {
memcpy = builder.create<memref::CopyOp>(unkLoc, buf, gpuBuf);
} else {
memcpy = builder.create<gpu::MemcpyOp>(unkLoc, /*asyncToken=*/std::nullopt,
memcpy = builder.create<gpu::MemcpyOp>(unkLoc, /*asyncToken=*/ValueRange{},
ValueRange{}, gpuBuf, buf);
}

// Dealloc the arg buffer at the end of program
builder.setInsertionPointToEnd(&getMainBlock());
builder.create<gpu::DeallocOp>(unkLoc, /*asyncToken=*/std::nullopt, gpuBuf);
builder.create<gpu::DeallocOp>(unkLoc, /*asyncToken=*/ValueRange{}, gpuBuf);

// Continue inserting ops after the created kernel arg
builder.setInsertionPointAfter(memcpy);
Expand Down Expand Up @@ -232,7 +232,7 @@ LogicalResult MLIRBench::createKernelArgs() {
memrefType, seed);
data = registerOnGpu(data, memrefType);
return builder.create<bufferization::ToTensorOp>(
unkLoc, data, /*restrict=*/true, /*writable=*/true);
unkLoc, tensorTy, data, /*restrict=*/true, /*writable=*/true);
})
.Default([&](auto t) { return std::nullopt; });

Expand Down Expand Up @@ -267,7 +267,7 @@ Value MLIRBench::createTimerLoop(unsigned iters) {
auto count = getConstInt(builder, iters, 64);

// Create perf benchmarking region, set insertion to inside the body
auto bench = builder.create<perf::BenchOp>(unkLoc, count);
auto bench = builder.create<perf::BenchOp>(unkLoc, count, ValueRange{});
builder.setInsertionPointToStart(bench.getBody());

// Call the kernel, ignore output
Expand Down Expand Up @@ -305,7 +305,7 @@ void MLIRBench::printVector(Value vector) {
if (vectorValue.getElementType().isBF16()) {
VectorType vecType =
VectorType::get(vectorValue.getShape(), builder.getF32Type());
op = builder.create<arith::ExtFOp>(unkLoc, vecType, vector, std::nullopt);
op = builder.create<arith::ExtFOp>(unkLoc, vecType, vector, arith::FastMathFlagsAttr{});
}
builder.create<vector::PrintOp>(unkLoc, op);
}
Expand Down Expand Up @@ -398,7 +398,7 @@ LogicalResult MLIRBench::printResult(Operation *kernelCall) {
memcpy = builder.create<memref::CopyOp>(unkLoc, result, outBuf);
} else {
memcpy = builder.create<gpu::MemcpyOp>(
unkLoc, /*asyncToken=*/std::nullopt, ValueRange{}, outBuf, result);
unkLoc, /*asyncToken=*/ValueRange{}, ValueRange{}, outBuf, result);
}

// Dealloc the output buffer at the end of program.
Expand Down
9 changes: 8 additions & 1 deletion lib/TPP/Transforms/LinalgVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,14 @@ struct VectorizationPattern : public RewritePattern {
if (isa<tensor::InsertSliceOp>(op))
return rewriter.notifyMatchFailure(op,
"Insert slice vectorization disabled");
return linalg::vectorize(rewriter, op);

auto vectorizeResult = linalg::vectorize(rewriter, op);
if (failed(vectorizeResult))
return failure();

rewriter.replaceOp(op, vectorizeResult->replacements);

return success();
}
};

Expand Down
2 changes: 1 addition & 1 deletion lib/TPP/Transforms/SCFParallelLoopTiling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ void tileParallelLoop(ParallelOp op, ArrayRef<unsigned> tileSizes,
b.setInsertionPointToStart(innerLoop.getBody());
// Insert in-bound check
Value inbound =
b.create<arith::ConstantIntOp>(op.getLoc(), 1, b.getIntegerType(1));
b.create<arith::ConstantIntOp>(op.getLoc(), 1, 1);
for (auto [outerUpperBound, outerIV, innerIV, innerStep] :
llvm::zip(outerLoop.getUpperBound(), outerLoop.getInductionVars(),
innerLoop.getInductionVars(), innerLoop.getStep())) {
Expand Down
7 changes: 4 additions & 3 deletions lib/TPP/Transforms/VectorContractToAMX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,8 @@ struct VectorContractToAMXPattern
// Create sequence of Read, Up-Convert and Write
auto readC = rewriter.create<vector::TransferReadOp>(
loc, VectorType::get({16}, accType.getElementType()),
accSubview, ValueRange{iv, innerIv}, ArrayRef{true});
accSubview, ValueRange{iv, innerIv}, std::nullopt /* padding */,
ArrayRef{true});
auto bitcastLoad = rewriter.create<vector::BitCastOp>(
loc, VectorType::get({16}, rewriter.getI16Type()), readC);

Expand Down Expand Up @@ -586,8 +587,8 @@ struct VectorContractToAMXPattern
auto elementType = bufferType.getElementType();
FloatType floatType = cast<FloatType>(elementType);
Value f0 = rewriter.create<arith::ConstantFloatOp>(
loc, APFloat::getZero(floatType.getFloatSemantics()),
floatType);
loc, floatType,
APFloat::getZero(floatType.getFloatSemantics()));

// Read
auto readC = rewriter.create<vector::TransferReadOp>(
Expand Down
2 changes: 1 addition & 1 deletion lib/TPP/Transforms/VectorContractToOuterproduct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ struct VectorContractToOuterproductPattern
auto elementType = lhsType.getElementType();
FloatType floatType = cast<FloatType>(elementType);
Value f0 = rewriter.create<arith::ConstantFloatOp>(
loc, APFloat::getZero(floatType.getFloatSemantics()), floatType);
loc, floatType, APFloat::getZero(floatType.getFloatSemantics()));

// Create the outer scf.for loop
auto forOp = rewriter.create<scf::ForOp>(
Expand Down
Loading