lint fix

LeiWang1999 · LeiWang1999 · commit 0fb9ae664e08 · 2025-09-02T20:38:49.000+08:00
diff --git a/src/target/intrin_rule_cuda.cc b/src/target/intrin_rule_cuda.cc
@@ -2,137 +2,137 @@
  * \file intrin_rule_cuda.cc
  * \brief CUDA intrinsic rules.
  */
- #include <tvm/tir/builtin.h>
- #include <tvm/tir/op_attr_types.h>
- 
- #include "target/intrin_rule.h"
- 
- namespace tvm {
- namespace codegen {
- namespace intrin {
- // Add float suffix to the intrinsics, CUDA fast math.
- using tir::FLowerIntrinsic;
- 
- struct CUDAMath {
-   std::string operator()(DataType t, std::string name) const {
-     if (t.is_float()) {
-       switch (t.bits()) {
-         case 64:
-           return name;
-         case 32:
-           return name + 'f';
-         case 16: {
-           if (name == "fabs") {
-             return "__habs";
-           } else if (name == "round") {
-             return "hrint";
-           } else {
-             return "h" + name;
-           }
-         }
-         default:
-           return "";
-       }
-     } else if (t.is_bfloat16()) {
-       if (name == "fabs") {
-         return "__habs";
-       } else if (name == "round") {
-         return "hrint";
-       } else {
-         return "h" + name;
-       }
-     } else if (t.is_int() || t.is_uint()) {
-       switch (t.bits()) {
-         case 32:
-           return "__" + name;
-         case 64:
-           return "__" + name + "ll";
-         default:
-           return "";
-       }
-     }
-     return "";
-   }
- };
- 
- struct CUDAFastMath : public CUDAMath {
-   std::string operator()(DataType t, std::string name) const {
-     if (t.is_float() && t.bits() == 32) {
-       return "__" + name + 'f';
-     } else {
-       return CUDAMath::operator()(t, name);
-     }
-     return "";
-   }
- };
- 
- struct CUDAFastMathTan : public CUDAMath {
-   std::string operator()(DataType t, std::string name) const {
-     if (t.is_float()) {
-       switch (t.bits()) {
-         case 64:
-           return name;
-         // `__tanf` seems to produce some values too deviant from numpy tan version.
-         // So, let's use just `tanf` instead.
-         case 32:
-           return name + 'f';
-         case 16:
-           return 'h' + name;
-         default:
-           return "";
-       }
-     }
-     return "";
-   }
- };
- 
- struct CUDAPopcount {
-   std::string operator()(DataType t, std::string name) const {
-     if (t.is_uint()) {
-       switch (t.bits()) {
-         case 32:
-           return "__popc";
-         case 64:
-           return "__popcll";
-         default:
-           return "";
-       }
-     }
-     return "";
-   }
- };
- 
- struct CUDAWarpIntrinsic {
-   const Op operator()(DataType t, const Op& orig_op) const {
-     if (orig_op.same_as(builtin::tvm_warp_shuffle())) {
-       return Op::Get("tir.cuda.__shfl_sync");
-     } else if (orig_op.same_as(builtin::tvm_warp_shuffle_up())) {
-       return Op::Get("tir.cuda.__shfl_up_sync");
-     } else {
-       ICHECK(orig_op.same_as(builtin::tvm_warp_shuffle_down()));
-       return Op::Get("tir.cuda.__shfl_down_sync");
-     }
-   }
- };
- 
- static PrimExpr DispatchCUDAWarpActiveMask(const PrimExpr& e) {
-   const CallNode* call = e.as<CallNode>();
-   return Call(call->dtype, Op::Get("tir.cuda.__activemask"), call->args);
- }
- 
- template <typename T>
- static PrimExpr DispatchCUDAShuffle(const PrimExpr& e) {
-   const CallNode* call = e.as<CallNode>();
-   ICHECK(call != nullptr);
-   ICHECK_EQ(call->args.size(), 5);  // mask, value, warp_id, width, warp_size
-   Array<PrimExpr> cuda_args{{call->args[0], call->args[1], call->args[2], call->args[3]}};
-   return Call(call->dtype, T()(call->dtype, Downcast<Op>(call->op)), cuda_args);
- }
- 
- TVM_REGISTER_OP("tir.rsqrt")
-     .set_attr<FLowerIntrinsic>("cuda.FLowerIntrinsic", DispatchPureExtern<CUDAMath>);
- 
- }  // namespace intrin
- }  // namespace codegen
- }  // namespace tvm
- 
+#include <tvm/tir/builtin.h>
+#include <tvm/tir/op_attr_types.h>
+
+#include "target/intrin_rule.h"
+
+namespace tvm {
+namespace codegen {
+namespace intrin {
+// Add float suffix to the intrinsics, CUDA fast math.
+using tir::FLowerIntrinsic;
+
+struct CUDAMath {
+  std::string operator()(DataType t, std::string name) const {
+    if (t.is_float()) {
+      switch (t.bits()) {
+      case 64:
+        return name;
+      case 32:
+        return name + 'f';
+      case 16: {
+        if (name == "fabs") {
+          return "__habs";
+        } else if (name == "round") {
+          return "hrint";
+        } else {
+          return "h" + name;
+        }
+      }
+      default:
+        return "";
+      }
+    } else if (t.is_bfloat16()) {
+      if (name == "fabs") {
+        return "__habs";
+      } else if (name == "round") {
+        return "hrint";
+      } else {
+        return "h" + name;
+      }
+    } else if (t.is_int() || t.is_uint()) {
+      switch (t.bits()) {
+      case 32:
+        return "__" + name;
+      case 64:
+        return "__" + name + "ll";
+      default:
+        return "";
+      }
+    }
+    return "";
+  }
+};
+
+struct CUDAFastMath : public CUDAMath {
+  std::string operator()(DataType t, std::string name) const {
+    if (t.is_float() && t.bits() == 32) {
+      return "__" + name + 'f';
+    } else {
+      return CUDAMath::operator()(t, name);
+    }
+    return "";
+  }
+};
+
+struct CUDAFastMathTan : public CUDAMath {
+  std::string operator()(DataType t, std::string name) const {
+    if (t.is_float()) {
+      switch (t.bits()) {
+      case 64:
+        return name;
+      // `__tanf` seems to produce some values too deviant from numpy tan
+      // version. So, let's use just `tanf` instead.
+      case 32:
+        return name + 'f';
+      case 16:
+        return 'h' + name;
+      default:
+        return "";
+      }
+    }
+    return "";
+  }
+};
+
+struct CUDAPopcount {
+  std::string operator()(DataType t, std::string name) const {
+    if (t.is_uint()) {
+      switch (t.bits()) {
+      case 32:
+        return "__popc";
+      case 64:
+        return "__popcll";
+      default:
+        return "";
+      }
+    }
+    return "";
+  }
+};
+
+struct CUDAWarpIntrinsic {
+  const Op operator()(DataType t, const Op &orig_op) const {
+    if (orig_op.same_as(builtin::tvm_warp_shuffle())) {
+      return Op::Get("tir.cuda.__shfl_sync");
+    } else if (orig_op.same_as(builtin::tvm_warp_shuffle_up())) {
+      return Op::Get("tir.cuda.__shfl_up_sync");
+    } else {
+      ICHECK(orig_op.same_as(builtin::tvm_warp_shuffle_down()));
+      return Op::Get("tir.cuda.__shfl_down_sync");
+    }
+  }
+};
+
+static PrimExpr DispatchCUDAWarpActiveMask(const PrimExpr &e) {
+  const CallNode *call = e.as<CallNode>();
+  return Call(call->dtype, Op::Get("tir.cuda.__activemask"), call->args);
+}
+
+template <typename T> static PrimExpr DispatchCUDAShuffle(const PrimExpr &e) {
+  const CallNode *call = e.as<CallNode>();
+  ICHECK(call != nullptr);
+  ICHECK_EQ(call->args.size(), 5); // mask, value, warp_id, width, warp_size
+  Array<PrimExpr> cuda_args{
+      {call->args[0], call->args[1], call->args[2], call->args[3]}};
+  return Call(call->dtype, T()(call->dtype, Downcast<Op>(call->op)), cuda_args);
+}
+
+TVM_REGISTER_OP("tir.rsqrt")
+    .set_attr<FLowerIntrinsic>("cuda.FLowerIntrinsic",
+                               DispatchPureExtern<CUDAMath>);
+
+} // namespace intrin
+} // namespace codegen
+} // namespace tvm
diff --git a/src/tl_templates/cuda/common.h b/src/tl_templates/cuda/common.h
@@ -60,7 +60,6 @@ TL_PATCH TL_DEVICE half_t hrsqrt(const half_t x) {
   return half_t(hrsqrt(x.to_half()));
 }
 
-
 // Pack two half values.
 TL_DEVICE unsigned __pack_half2(const half x, const half y) {
   unsigned v0 = *((unsigned short *)&x);

Original file line number	Diff line number	Diff line change
`@@ -60,7 +60,6 @@ TL_PATCH TL_DEVICE half_t hrsqrt(const half_t x) {`
`60`	`60`	`return half_t(hrsqrt(x.to_half()));`
`61`	`61`	`}`
`62`	`62`
`63`		`-`
`64`	`63`	`// Pack two half values.`
`65`	`64`	`TL_DEVICE unsigned __pack_half2(const half x, const half y) {`
`66`	`65`	`unsigned v0 = ((unsigned short )&x);`