Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 183 additions & 1 deletion src/target/codegen_cuda.cc
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,188 @@ CodeGenTileLangCUDA::CodeGenTileLangCUDA() {
runtime::symbol::tvm_global_barrier_state);
}

void CodeGenTileLangCUDA::ReserveKeywordsAsUnique_() {
CodeGenC::ReserveKeywordsAsUnique();
name_supply_->ReserveName("max");
name_supply_->ReserveName("min");
name_supply_->ReserveName("isfinite");
name_supply_->ReserveName("isinf");
name_supply_->ReserveName("isnan");

// skip single precision mathematical functions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Fix misleading comments.

Lines 221 and 313 say "skip" but the code actually reserves these function names to prevent collisions. The comments should read something like "reserve single/double precision mathematical functions" instead.

🔎 Proposed fix
-  // skip single precision mathematical functions
+  // reserve single precision mathematical functions
   name_supply_->ReserveName("acosf");
-  // skip double precision mathematical functions
+  // reserve double precision mathematical functions
   name_supply_->ReserveName("acos");

Also applies to: 313-313

🤖 Prompt for AI Agents
In src/target/codegen_cuda.cc around lines 221 and 313, the comments currently
say "skip single/double precision mathematical functions" but the code actually
reserves those function names to avoid collisions; update the comments to read
something like "reserve single-precision mathematical function names to prevent
collisions" at line 221 and "reserve double-precision mathematical function
names to prevent collisions" at line 313 (or equivalent wording clarifying
reservation rather than skipping).

name_supply_->ReserveName("acosf");
name_supply_->ReserveName("acoshf");
name_supply_->ReserveName("asinf");
name_supply_->ReserveName("asinhf");
name_supply_->ReserveName("atan2f");
name_supply_->ReserveName("atanf");
name_supply_->ReserveName("atanhf");
name_supply_->ReserveName("cbrtf");
name_supply_->ReserveName("ceilf");
name_supply_->ReserveName("copysignf");
name_supply_->ReserveName("cosf");
name_supply_->ReserveName("coshf");
name_supply_->ReserveName("cospif");
name_supply_->ReserveName("cyl_bessel_i0f");
name_supply_->ReserveName("cyl_bessel_i1f");
name_supply_->ReserveName("erfcf");
name_supply_->ReserveName("erfcinvf");
name_supply_->ReserveName("erfcxf");
name_supply_->ReserveName("erff");
name_supply_->ReserveName("erfinvf");
name_supply_->ReserveName("exp10f");
name_supply_->ReserveName("exp2f");
name_supply_->ReserveName("expf");
name_supply_->ReserveName("expm1f");
name_supply_->ReserveName("fabsf");
name_supply_->ReserveName("fdimf");
name_supply_->ReserveName("fdividef");
name_supply_->ReserveName("floorf");
name_supply_->ReserveName("fmaf");
name_supply_->ReserveName("fmaxf");
name_supply_->ReserveName("fminf");
name_supply_->ReserveName("fmodf");
name_supply_->ReserveName("frexpf");
name_supply_->ReserveName("hypotf");
name_supply_->ReserveName("ilogbf");
name_supply_->ReserveName("j0f");
name_supply_->ReserveName("j1f");
name_supply_->ReserveName("jnf");
name_supply_->ReserveName("ldexpf");
name_supply_->ReserveName("lgammaf");
name_supply_->ReserveName("llrintf");
name_supply_->ReserveName("llroundf");
name_supply_->ReserveName("log10f");
name_supply_->ReserveName("log1pf");
name_supply_->ReserveName("log2f");
name_supply_->ReserveName("logbf");
name_supply_->ReserveName("logf");
name_supply_->ReserveName("lrintf");
name_supply_->ReserveName("lroundf");
name_supply_->ReserveName("modff");
name_supply_->ReserveName("nanf");
name_supply_->ReserveName("nearbyintf");
name_supply_->ReserveName("nextafterf");
name_supply_->ReserveName("norm3df");
name_supply_->ReserveName("norm4df");
name_supply_->ReserveName("normcdff");
name_supply_->ReserveName("normcdfinvf");
name_supply_->ReserveName("normf");
name_supply_->ReserveName("powf");
name_supply_->ReserveName("rcbrtf");
name_supply_->ReserveName("remainderf");
name_supply_->ReserveName("remquof");
name_supply_->ReserveName("rhypotf");
name_supply_->ReserveName("rintf");
name_supply_->ReserveName("rnorm3df");
name_supply_->ReserveName("rnorm4df");
name_supply_->ReserveName("rnormf");
name_supply_->ReserveName("roundf");
name_supply_->ReserveName("rsqrtf");
name_supply_->ReserveName("scalblnf");
name_supply_->ReserveName("scalbnf");
name_supply_->ReserveName("signbit");
name_supply_->ReserveName("sincosf");
name_supply_->ReserveName("sincospif");
name_supply_->ReserveName("sinf");
name_supply_->ReserveName("sinhf");
name_supply_->ReserveName("sinpif");
name_supply_->ReserveName("sqrtf");
name_supply_->ReserveName("tanf");
name_supply_->ReserveName("tanhf");
name_supply_->ReserveName("tgammaf");
name_supply_->ReserveName("truncf");
name_supply_->ReserveName("y0f");
name_supply_->ReserveName("y1f");
name_supply_->ReserveName("ynf");

// skip double precision mathematical functions
name_supply_->ReserveName("acos");
name_supply_->ReserveName("acosh");
name_supply_->ReserveName("asin");
name_supply_->ReserveName("asinh");
name_supply_->ReserveName("atan");
name_supply_->ReserveName("atan2");
name_supply_->ReserveName("atanh");
name_supply_->ReserveName("cbrt");
name_supply_->ReserveName("ceil");
name_supply_->ReserveName("copysign");
name_supply_->ReserveName("cos");
name_supply_->ReserveName("cosh");
name_supply_->ReserveName("cospi");
name_supply_->ReserveName("cyl_bessel_i0");
name_supply_->ReserveName("cyl_bessel_i1");
name_supply_->ReserveName("erf");
name_supply_->ReserveName("erfc");
name_supply_->ReserveName("erfcinv");
name_supply_->ReserveName("erfcx");
name_supply_->ReserveName("erfinv");
name_supply_->ReserveName("exp");
name_supply_->ReserveName("exp10");
name_supply_->ReserveName("exp2");
name_supply_->ReserveName("expm1");
name_supply_->ReserveName("fabs");
name_supply_->ReserveName("fdim");
name_supply_->ReserveName("floor");
name_supply_->ReserveName("fma");
name_supply_->ReserveName("fmax");
name_supply_->ReserveName("fmin");
name_supply_->ReserveName("fmod");
name_supply_->ReserveName("frexp");
name_supply_->ReserveName("hypot");
name_supply_->ReserveName("ilogb");
name_supply_->ReserveName("j0");
name_supply_->ReserveName("j1");
name_supply_->ReserveName("jn");
name_supply_->ReserveName("ldexp");
name_supply_->ReserveName("lgamma");
name_supply_->ReserveName("llrint");
name_supply_->ReserveName("llround");
name_supply_->ReserveName("log");
name_supply_->ReserveName("log10");
name_supply_->ReserveName("log1p");
name_supply_->ReserveName("log2");
name_supply_->ReserveName("logb");
name_supply_->ReserveName("lrint");
name_supply_->ReserveName("lround");
name_supply_->ReserveName("modf");
name_supply_->ReserveName("nan");
name_supply_->ReserveName("nearbyint");
name_supply_->ReserveName("nextafter");
name_supply_->ReserveName("norm");
name_supply_->ReserveName("norm3d");
name_supply_->ReserveName("norm4d");
name_supply_->ReserveName("normcdf");
name_supply_->ReserveName("normcdfinv");
name_supply_->ReserveName("pow");
name_supply_->ReserveName("rcbrt");
name_supply_->ReserveName("remainder");
name_supply_->ReserveName("remquo");
name_supply_->ReserveName("rhypot");
name_supply_->ReserveName("rint");
name_supply_->ReserveName("rnorm");
name_supply_->ReserveName("rnorm3d");
name_supply_->ReserveName("rnorm4d");
name_supply_->ReserveName("round");
name_supply_->ReserveName("rsqrt");
name_supply_->ReserveName("scalbln");
name_supply_->ReserveName("scalbn");
name_supply_->ReserveName("signbit");
name_supply_->ReserveName("sin");
name_supply_->ReserveName("sincos");
name_supply_->ReserveName("sincospi");
name_supply_->ReserveName("sinh");
name_supply_->ReserveName("sinpi");
name_supply_->ReserveName("sqrt");
name_supply_->ReserveName("tan");
name_supply_->ReserveName("tanh");
name_supply_->ReserveName("tgamma");
name_supply_->ReserveName("trunc");
name_supply_->ReserveName("y0");
name_supply_->ReserveName("y1");
name_supply_->ReserveName("yn");
}
Comment on lines 219 to 399
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

# First, let's verify the file exists and check the exact lines mentioned
cd /tmp/repo && wc -l src/target/codegen_cuda.cc 2>/dev/null || echo "File not found, searching for it..."
find . -name "codegen_cuda.cc" -type f 2>/dev/null | head -5

Repository: tile-ai/tilelang

Length of output: 201


🏁 Script executed:

# Once we locate the file, examine lines 219-311
sed -n '219,311p' src/target/codegen_cuda.cc 2>/dev/null | cat -n

Repository: tile-ai/tilelang

Length of output: 4326


🏁 Script executed:

# Search for context around this function and any related comments
rg -A 5 "ReserveKeywordsAsUnique_" src/target/codegen_cuda.cc 2>/dev/null || echo "Pattern not found"

Repository: tile-ai/tilelang

Length of output: 566


🏁 Script executed:

# Look for issue #1561 or related comments about naming conflicts
rg "1561|naming.*conflict|ReserveName" src/target/codegen_cuda.cc | head -20

Repository: tile-ai/tilelang

Length of output: 850


🏁 Script executed:

# Search for issue #1561 or related discussions in the codebase
rg -i "1561|sincosf|naming conflict|reserve" --type cc --type h src/target/ | grep -i "issue\|1561\|sincosf\|reserve" | head -20

Repository: tile-ai/tilelang

Length of output: 86


🏁 Script executed:

# Check if there are any comments or documentation about these reserved names
rg -B 5 -A 2 "ReserveKeywordsAsUnique" src/target/codegen_cuda.cc | head -40

Repository: tile-ai/tilelang

Length of output: 642


🏁 Script executed:

# Look for related CUDA function reservations in CodeGenC
find . -name "*.cc" -o -name "*.h" | xargs grep -l "ReserveKeywordsAsUnique" | head -5

Repository: tile-ai/tilelang

Length of output: 197


🏁 Script executed:

# Check the parent class CodeGenC to understand what's already reserved
rg -A 50 "void.*CodeGenC::ReserveKeywordsAsUnique" src/target/codegen_c.cc 2>/dev/null | head -80

Repository: tile-ai/tilelang

Length of output: 42


🌐 Web query:

CUDA math API sincosf sincospif voidsincosf function names documentation

💡 Result:

Summary of CUDA sin/cos combined functions and docs

  • Prototypes (single-precision, device):

    • void sincosf(float x, float *sptr, float *cptr) — computes sin(x) and cos(x) into *sptr and *cptr. [1]
    • void sincospif(float x, float sptr, float cptr) — computes sin(pix) and cos(pix) into *sptr and *cptr. [1][2]
  • libdevice/internal name:

    • __nv_sincosf(float x, float* sptr, float* cptr) — same functionality; documented in the libdevice user guide. [3]
  • Notes:

    • These are device functions (for device code). [1]
    • Results/accuracy and behavior can be affected by the --use_fast_math compiler flag; see the CUDA C++ Programming Guide for accuracy tables. [1][2]
    • See related single-precision functions sinf(), sinpif(), cospif() in the CUDA Math API docs. [1][2]

Sources: NVIDIA CUDA Math API and libdevice docs. [1][2][3]

References
[1] NVIDIA CUDA Math API (single-precision functions).
[2] NVIDIA CUDA Math API (sincospif / sinpif entries).
[3] NVIDIA libdevice User’s Guide: __nv_sincosf.


🏁 Script executed:

# Check if there are TODOs or issues related to this in the code
rg -i "todo|fixme|issue|precision|sincosf" src/target/codegen_cuda.cc

Repository: tile-ai/tilelang

Length of output: 1135


🏁 Script executed:

# Look at HIP codegen to see if it reserves similar functions
rg "ReserveKeywordsAsUnique_|ReserveName" src/target/codegen_hip.cc | head -20

Repository: tile-ai/tilelang

Length of output: 42


🏁 Script executed:

# Check git history or comments about why these specific functions were chosen
git log --oneline -n 20 -- src/target/codegen_cuda.cc 2>/dev/null || echo "Git not available"

Repository: tile-ai/tilelang

Length of output: 131


🌐 Web query:

CUDA math API double-precision functions sin cos sqrt exp log asin acos atan

💡 Result:

Short answer: CUDA provides full double-precision math functions for sin, cos, sqrt, exp, log, asin, acos, atan (and many others) as device-callable functions (e.g. device double sin(double), cos(double), sqrt(double), exp(double), log(double), asin(double), acos(double), atan(double)). sincos/sincospi and sinpi/cospi variants are also available. You do not need extra headers to call them from device code; see NVIDIA’s CUDA Math API (double-precision section) for details and accuracy notes. [1][2][3]

References:

  • NVIDIA CUDA Math API — Double Precision Mathematical Functions. [1]
  • NVIDIA CUDA Math API (log, sqrt, etc., entries). [2]
  • CUDA C++ Best Practices note on sincos / fast-math mapping. [3]

🌐 Web query:

CUDA half-precision math functions hsin hcos hsqrt half precision device functions

💡 Result:

Short answer: CUDA provides half-precision device math functions such as hsin, hcos, hsqrt (and many others) as device intrinsics returning __half; they are declared in cuda_fp16.h and operate in round-to-nearest-even mode. See the CUDA Math API reference for full list and semantics. [1][2]

Key points / prototypes

  • Header: #include <cuda_fp16.h>. [1][2]
  • Example prototypes (device-only):
    • device __half hsin(const __half a);
    • device __half hcos(const __half a);
    • device __half hsqrt(const __half a);
    • device __half hrsqrt(const __half a);
    • (plus many others: hexp, hlog, hrcp, hceil, hfloor, hrint, htrunc, atomicAdd for __half, etc.). [1][2]

Behavior notes

  • Operations return __half and use round-to-nearest-even rounding. [1][2]
  • NaN/Inf and special-case behavior (e.g., hsin(±∞) → NaN, hsqrt(x<0) → NaN, hrsqrt(±0) → ±∞) are documented per-function in the CUDA Math API. [2]

If you want, I can:

  • paste a minimal device-kernel example using hsin/hsqrt, or
  • list the full set of half math intrinsics and their per-function special-case rules.

Fix incorrect CUDA function names and consider double/half-precision functions.

Lines 80-81 contain incorrect function names: voidsincosf and voidsincospif should be sincosf and sincospif. The current typos prevent actual naming conflicts with the real CUDA functions.

Additionally, the implementation reserves only single-precision math functions. Consider also reserving:

  • Double-precision equivalents (e.g., sin, cos, sqrt, asin, acos, atan) which exist as CUDA device functions
  • Half-precision functions (e.g., hsin, hcos, hsqrt, hexp) from cuda_fp16.h

These would provide comprehensive coverage of CUDA math API intrinsics that could collide with user-defined identifiers.

🤖 Prompt for AI Agents
In src/target/codegen_cuda.cc around lines 219 to 311, the reserved keyword list
contains typos "voidsincosf" and "voidsincospif" (should be "sincosf" and
"sincospif"); replace those entries with the correct CUDA function names and add
reservations for common double-precision and half-precision CUDA math intrinsics
(e.g., sin, cos, sqrt, asin, acos, atan, exp, log and their f- and half-
counterparts where applicable, including names from cuda_fp16.h like hsin, hcos,
hsqt/hsqrt-style names) by calling name_supply_->ReserveName(...) for each new
identifier so the name supply prevents collisions with device math functions.


void CodeGenTileLangCUDA::PrintFuncPrefix(std::ostream &os) {
os << "extern \"C\" __global__ ";
}
Expand Down Expand Up @@ -3431,7 +3613,7 @@ void CodeGenTileLangCUDA::AddFunction(const GlobalVar &gvar,
// clear previous generated state.
this->InitFuncState(f);
// reserve keywords
ReserveKeywordsAsUnique();
ReserveKeywordsAsUnique_();

auto global_symbol = f->GetAttr<String>(tvm::attr::kGlobalSymbol);
ICHECK(global_symbol)
Expand Down
1 change: 1 addition & 0 deletions src/target/codegen_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class CodeGenTileLangCUDA final : public CodeGenC {
const PrimFunc &func, std::ostream &os);

protected:
void ReserveKeywordsAsUnique_();
virtual std::string GetBufferRef(DataType t, const BufferNode *buffer,
PrimExpr index) final;
void PrintCallExtern(Type ret_type, ffi::String global_symbol,
Expand Down
Loading