triton-lang · ThomasRaoux · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026
@@ -158,6 +158,7 @@ def TT_TensorDescType : TritonTypeDef<"TensorDesc", "tensordesc", [TT_TensorDesc
 
   let hasCustomAssemblyFormat = 1;
   let skipDefaultBuilders = 1;
+  let genVerifyDecl = 1;
 }
 
 #endif
@@ -1418,7 +1418,7 @@ static LogicalResult verifyGatherScatterResultType(Operation *op,
 LogicalResult verifyGatherScatterOp(Operation *op, ShapedType blockType,
                                     ShapedType resultType,
                                     ShapedType indicesType) {
-  // Gather from `!tt.tensordesc<tensor<1xMxdtype>>`.
+  // Gather from `!tt.tensordesc<1xMxdtype>`.
   if (blockType.getRank() != 2) {
     return op->emitOpError("descriptor block must be a 2D tensor, but got ")
            << blockType;

@@ -27,6 +27,7 @@ void TritonDialect::registerTypes() {
 // Format: !tt.tensordesc<128x64xf16>
 //         !tt.tensordesc<128x64xf16, #shared>
 Type TensorDescType::parse(AsmParser &parser) {
+  Location loc = parser.getEncodedSourceLoc(parser.getCurrentLocation());
   if (failed(parser.parseLess()))
     return Type();
 
@@ -47,7 +48,8 @@ Type TensorDescType::parse(AsmParser &parser) {
   if (failed(parser.parseGreater()))
     return Type();
 
-  return TensorDescType::get(shape, elementType, sharedLayout);
+  return TensorDescType::getChecked(loc, parser.getContext(), shape,
+                                    elementType, sharedLayout);
 }
 
 void TensorDescType::print(AsmPrinter &printer) const {
@@ -88,6 +90,18 @@ void PointerType::print(AsmPrinter &printer) const {
   }
 }
 
+LogicalResult
+TensorDescType::verify(function_ref<InFlightDiagnostic()> emitError,
+                       ArrayRef<int64_t> shape, Type elementType,
+                       Attribute sharedLayout) {
+  if (isa<RankedTensorType>(elementType)) {
+    return emitError()
+           << "tensor descriptors must not wrap tensor types; use "
+              "!tt.tensordesc<shape x element-type[, layout]> instead";
+  }
+  return success();
+}
+
 LogicalResult PointerType::verify(function_ref<InFlightDiagnostic()> emitError,
                                   Type pointeeType, int addressSpace) {
   if (isa<RankedTensorType>(pointeeType)) {

@@ -647,16 +647,16 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, ttg.targ
   // CHECK-LABEL: tdm_partitioned_shared_waitcnt
   tt.func public @tdm_partitioned_shared_waitcnt(
     %memDesc: !ttg.memdesc<128x16xf16, #partitioned, #smem, mutable>,
-    %tensorDesc: !tt.tensordesc<tensor<128x16xf16>>,
+    %tensorDesc: !tt.tensordesc<128x16xf16>,
     %mask: i32
   ) {
     %c0_i32 = arith.constant 0 : i32
 
     // numLogicalPieces = numPartitions * numGroups = 2 * 4 = 8
     // warpsAlongPartition = gcd(numWarps=4, numLogicalPieces=8) = 4
     // Each async_tdm_copy emits divideCeil(8, 4) = 2 instructions
-    %1 = amdg.async_tdm_copy_global_to_local %tensorDesc[%c0_i32, %c0_i32] into %memDesc, pred = %mask : !tt.tensordesc<tensor<128x16xf16>> -> !ttg.memdesc<128x16xf16, #partitioned, #smem, mutable>
-    %2 = amdg.async_tdm_copy_global_to_local %tensorDesc[%c0_i32, %c0_i32] into %memDesc, pred = %mask : !tt.tensordesc<tensor<128x16xf16>> -> !ttg.memdesc<128x16xf16, #partitioned, #smem, mutable>
+    %1 = amdg.async_tdm_copy_global_to_local %tensorDesc[%c0_i32, %c0_i32] into %memDesc, pred = %mask : !tt.tensordesc<128x16xf16> -> !ttg.memdesc<128x16xf16, #partitioned, #smem, mutable>
+    %2 = amdg.async_tdm_copy_global_to_local %tensorDesc[%c0_i32, %c0_i32] into %memDesc, pred = %mask : !tt.tensordesc<128x16xf16> -> !ttg.memdesc<128x16xf16, #partitioned, #smem, mutable>
 
     // Skip second copy (2 instructions) => count = 2
     // CHECK: amdg.async_tdm_intrinsic_wait {{.*}} {count = 2

@@ -264,7 +264,7 @@ module attributes {"ttg.num-ctas" = 2 : i32, "ttg.num-warps" = 1 : i32, ttg.shar
   // CHECK: arith.shrui
   // CHECK-LABEL: @outstanding_commits_multicast_tma_recipients
   tt.func public @outstanding_commits_multicast_tma_recipients(
-      %desc: !tt.tensordesc<tensor<32x32xf32, #shared>>,
+      %desc: !tt.tensordesc<32x32xf32, #shared>,
       %ptr: tensor<32x32x!tt.ptr<f32>, #blocked>) {
     %true = arith.constant true
     %c0_i32 = arith.constant 0 : i32
@@ -286,7 +286,7 @@ module attributes {"ttg.num-ctas" = 2 : i32, "ttg.num-warps" = 1 : i32, ttg.shar
     // CHECK: %[[RECIPIENTS:.*]] = arith.shli %[[PATTERN]],
     // CHECK: tt.call @__triton_consan_check_outstanding_commits{{.*}}({{.*}}, %[[RECIPIENTS]])
     // CHECK: ttng.async_tma_copy_global_to_local
-    ttng.async_tma_copy_global_to_local %desc[%c0_i32, %c0_i32] %shmem, %bar, %true {multicast} : !tt.tensordesc<tensor<32x32xf32, #shared>>, !ttg.memdesc<2xi64, #shared1, #smem, mutable> -> !ttg.memdesc<32x32xf32, #shared, #smem, mutable>
+    ttng.async_tma_copy_global_to_local %desc[%c0_i32, %c0_i32] %shmem, %bar, %true {multicast} : !tt.tensordesc<32x32xf32, #shared>, !ttg.memdesc<2xi64, #shared1, #smem, mutable> -> !ttg.memdesc<32x32xf32, #shared, #smem, mutable>
     tt.return
   }
 }

@@ -30,6 +30,16 @@ module {
 
 // -----
 
+#shared = #ttg.swizzled_shared<{vec = 1, perPhase = 1, maxPhase = 1, order = [0, 1]}>
+module {
+  // expected-error @+1 {{tensor descriptors must not wrap tensor types; use !tt.tensordesc<shape x element-type[, layout]> instead}}
+  tt.func public @nested_tensordesc(%arg0: !tt.tensordesc<tensor<8x16xf32, #shared>>) {
+    tt.return
+  }
+}
+
+// -----
+
 #shared = #ttg.swizzled_shared<{vec = 1, perPhase = 1, maxPhase = 1, order = [1, 0], CGALayout = [[0, 1]]}>
 #smem = #ttg.shared_memory
 module attributes {"ttg.num-ctas" = 2 : i32} {