Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clang/lib/CodeGen/CGExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2216,7 +2216,7 @@ llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value *Value, QualType Ty) {
if (auto *AtomicTy = Ty->getAs<AtomicType>())
Ty = AtomicTy->getValueType();

if (Ty->isExtVectorBoolType()) {
if (Ty->isExtVectorBoolType() || Ty->isConstantMatrixBoolType()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Curious I thought there would be some kind of HLSL specific thing for the vector case?

Second C\C++ does not support boolean matrix types so this is correct as far as I can tell. However if they ever do or some other C dialect comes along that does and they want to treat bools as I1 will this code still be correct?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The vector case isn't hlsl specific since boolean vectors are packed in other languages so also need to be converted.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As long as (boolean) matrices remain represented as vectors in memory, this logic should be correct if C/C++ or some other C Dialect adds boolean matrix types.

llvm::Type *StoreTy = convertTypeForLoadStore(Ty, Value->getType());
if (StoreTy->isVectorTy() && StoreTy->getScalarSizeInBits() >
Value->getType()->getScalarSizeInBits())
Expand Down
13 changes: 11 additions & 2 deletions clang/lib/CodeGen/CodeGenTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,7 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) {
llvm::Type *IRElemTy = ConvertType(MT->getElementType());
if (Context.getLangOpts().HLSL && T->isConstantMatrixBoolType())
IRElemTy = ConvertTypeForMem(Context.BoolTy);
return llvm::ArrayType::get(IRElemTy,
MT->getNumRows() * MT->getNumColumns());
return llvm::ArrayType::get(IRElemTy, MT->getNumElementsFlattened());
}

llvm::Type *R = ConvertType(T);
Expand Down Expand Up @@ -180,6 +179,16 @@ llvm::Type *CodeGenTypes::convertTypeForLoadStore(QualType T,
return llvm::IntegerType::get(getLLVMContext(),
(unsigned)Context.getTypeSize(T));

if (T->isConstantMatrixBoolType()) {
Copy link
Contributor Author

@Icohedron Icohedron Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be noted that when CodeGenTypes::convertTypeForLoadStore is called where T->isConstantMatrixBoolType(), the LLVMTy passed in is of type <N x i1>.
Normally the LLVMTy is returned and therefore no ZExt occurs because the type of the value being stored is already a <N x i1>.
Therefore this change makes it so that if T->isConstantMatrixBoolType(), then we return a <N x i32> to reuse the existing logic that ZExts boolean vectors <N x i1> to <N x i32>

// Matrices are loaded and stored atomically as vectors. Therefore we
// construct a FixedVectorType here instead of returning
// ConvertTypeForMem(T) which would return an ArrayType instead.
const Type *Ty = Context.getCanonicalType(T).getTypePtr();
const ConstantMatrixType *MT = cast<ConstantMatrixType>(Ty);
llvm::Type *IRElemTy = ConvertTypeForMem(MT->getElementType());
return llvm::FixedVectorType::get(IRElemTy, MT->getNumElementsFlattened());
}

if (T->isExtVectorBoolType())
return ConvertTypeForMem(T);

Expand Down
43 changes: 30 additions & 13 deletions clang/test/CodeGenHLSL/BoolMatrix.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ struct S {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i1, align 4
// CHECK-NEXT: [[B:%.*]] = alloca [4 x i32], align 4
// CHECK-NEXT: store <4 x i1> splat (i1 true), ptr [[B]], align 4
// CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[B]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[B]], align 4
// CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0
// CHECK-NEXT: store i32 [[MATRIXEXT]], ptr [[RETVAL]], align 4
Expand Down Expand Up @@ -40,11 +40,12 @@ bool fn1() {
// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x i1> [[VECINIT]], i1 [[LOADEDV1]], i32 1
// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x i1> [[VECINIT2]], i1 true, i32 2
// CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <4 x i1> [[VECINIT3]], i1 false, i32 3
// CHECK-NEXT: store <4 x i1> [[VECINIT4]], ptr [[A]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[A]], align 4
// CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[RETVAL]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i1>, ptr [[RETVAL]], align 4
// CHECK-NEXT: ret <4 x i1> [[TMP3]]
// CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[VECINIT4]] to <4 x i32>
// CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[A]], align 4
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are we storing a vector into an array here? is this okay? I would think this isn't okay.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought it wasn't ok either but @farzonl told me it's not an issue. Also it occurs in non HLSL tests too.

This C++ test for example
https://github.com/llvm/llvm-project/blob/main/clang/test/CodeGenCXX/matrix-type.cpp#L29-L31

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is going to likely change when we do per element updates of vector elements to fix the data race issue.

// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[A]], align 4
// CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[RETVAL]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i1>, ptr [[RETVAL]], align 4
// CHECK-NEXT: ret <4 x i1> [[TMP4]]
//
bool2x2 fn2(bool V) {
bool2x2 A = {V, true, V, false};
Expand All @@ -57,7 +58,7 @@ bool2x2 fn2(bool V) {
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i1, align 4
// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
// CHECK-NEXT: [[BM:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 0
// CHECK-NEXT: store <4 x i1> <i1 true, i1 false, i1 true, i1 false>, ptr [[BM]], align 1
// CHECK-NEXT: store <4 x i32> <i32 1, i32 0, i32 1, i32 0>, ptr [[BM]], align 1
// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 1
// CHECK-NEXT: store float 1.000000e+00, ptr [[F]], align 1
// CHECK-NEXT: [[BM1:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 0
Expand All @@ -77,9 +78,9 @@ bool fn3() {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i1, align 4
// CHECK-NEXT: [[ARR:%.*]] = alloca [2 x [4 x i32]], align 4
// CHECK-NEXT: store <4 x i1> splat (i1 true), ptr [[ARR]], align 4
// CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[ARR]], align 4
// CHECK-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [4 x i32], ptr [[ARR]], i32 1
// CHECK-NEXT: store <4 x i1> zeroinitializer, ptr [[ARRAYINIT_ELEMENT]], align 4
// CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[ARRAYINIT_ELEMENT]], align 4
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [4 x i32]], ptr [[ARR]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 4
// CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1
Expand All @@ -96,7 +97,7 @@ bool fn4() {
// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[M:%.*]] = alloca [4 x i32], align 4
// CHECK-NEXT: store <4 x i1> splat (i1 true), ptr [[M]], align 4
// CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[M]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[M]], align 4
// CHECK-NEXT: [[MATINS:%.*]] = insertelement <4 x i32> [[TMP0]], i32 0, i32 3
// CHECK-NEXT: store <4 x i32> [[MATINS]], ptr [[M]], align 4
Expand All @@ -114,7 +115,7 @@ void fn5() {
// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
// CHECK-NEXT: store i32 0, ptr [[V]], align 4
// CHECK-NEXT: [[BM:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 0
// CHECK-NEXT: store <4 x i1> <i1 true, i1 false, i1 true, i1 false>, ptr [[BM]], align 1
// CHECK-NEXT: store <4 x i32> <i32 1, i32 0, i32 1, i32 0>, ptr [[BM]], align 1
// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 1
// CHECK-NEXT: store float 1.000000e+00, ptr [[F]], align 1
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[V]], align 4
Expand All @@ -136,9 +137,9 @@ void fn6() {
// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[ARR:%.*]] = alloca [2 x [4 x i32]], align 4
// CHECK-NEXT: store <4 x i1> splat (i1 true), ptr [[ARR]], align 4
// CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[ARR]], align 4
// CHECK-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [4 x i32], ptr [[ARR]], i32 1
// CHECK-NEXT: store <4 x i1> zeroinitializer, ptr [[ARRAYINIT_ELEMENT]], align 4
// CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[ARRAYINIT_ELEMENT]], align 4
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [4 x i32]], ptr [[ARR]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 4
// CHECK-NEXT: [[MATINS:%.*]] = insertelement <4 x i32> [[TMP0]], i32 0, i32 1
Expand All @@ -149,3 +150,19 @@ void fn7() {
bool2x2 Arr[2] = {{true,true,true,true}, {false,false,false,false}};
Arr[0][1][0] = false;
}

// CHECK-LABEL: define hidden noundef <16 x i1> @_Z3fn8u11matrix_typeILm4ELm4EbE(
// CHECK-SAME: <16 x i1> noundef [[M:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <16 x i1>, align 4
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [16 x i32], align 4
// CHECK-NEXT: [[TMP0:%.*]] = zext <16 x i1> [[M]] to <16 x i32>
// CHECK-NEXT: store <16 x i32> [[TMP0]], ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[M_ADDR]], align 4
// CHECK-NEXT: store <16 x i32> [[TMP1]], ptr [[RETVAL]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i1>, ptr [[RETVAL]], align 4
// CHECK-NEXT: ret <16 x i1> [[TMP2]]
//
bool4x4 fn8(bool4x4 m) {
return m;
}