-
Notifications
You must be signed in to change notification settings - Fork 450
[Bugfix] Mangle Single Precision Mathematical Functions of cuda math api #1602
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
24eaa00
e76ac1d
4348da7
5ea73df
0065aaf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -216,6 +216,188 @@ CodeGenTileLangCUDA::CodeGenTileLangCUDA() { | |
| runtime::symbol::tvm_global_barrier_state); | ||
| } | ||
|
|
||
| void CodeGenTileLangCUDA::ReserveKeywordsAsUnique_() { | ||
| CodeGenC::ReserveKeywordsAsUnique(); | ||
| name_supply_->ReserveName("max"); | ||
| name_supply_->ReserveName("min"); | ||
| name_supply_->ReserveName("isfinite"); | ||
| name_supply_->ReserveName("isinf"); | ||
| name_supply_->ReserveName("isnan"); | ||
|
|
||
| // skip single precision mathematical functions | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fix misleading comments. Lines 221 and 313 say "skip" but the code actually reserves these function names to prevent collisions. The comments should read something like "reserve single/double precision mathematical functions" instead. 🔎 Proposed fix- // skip single precision mathematical functions
+ // reserve single precision mathematical functions
name_supply_->ReserveName("acosf");- // skip double precision mathematical functions
+ // reserve double precision mathematical functions
name_supply_->ReserveName("acos");Also applies to: 313-313 🤖 Prompt for AI Agents |
||
| name_supply_->ReserveName("acosf"); | ||
| name_supply_->ReserveName("acoshf"); | ||
| name_supply_->ReserveName("asinf"); | ||
| name_supply_->ReserveName("asinhf"); | ||
| name_supply_->ReserveName("atan2f"); | ||
| name_supply_->ReserveName("atanf"); | ||
| name_supply_->ReserveName("atanhf"); | ||
| name_supply_->ReserveName("cbrtf"); | ||
| name_supply_->ReserveName("ceilf"); | ||
| name_supply_->ReserveName("copysignf"); | ||
| name_supply_->ReserveName("cosf"); | ||
| name_supply_->ReserveName("coshf"); | ||
| name_supply_->ReserveName("cospif"); | ||
| name_supply_->ReserveName("cyl_bessel_i0f"); | ||
| name_supply_->ReserveName("cyl_bessel_i1f"); | ||
| name_supply_->ReserveName("erfcf"); | ||
| name_supply_->ReserveName("erfcinvf"); | ||
| name_supply_->ReserveName("erfcxf"); | ||
| name_supply_->ReserveName("erff"); | ||
| name_supply_->ReserveName("erfinvf"); | ||
| name_supply_->ReserveName("exp10f"); | ||
| name_supply_->ReserveName("exp2f"); | ||
| name_supply_->ReserveName("expf"); | ||
| name_supply_->ReserveName("expm1f"); | ||
| name_supply_->ReserveName("fabsf"); | ||
| name_supply_->ReserveName("fdimf"); | ||
| name_supply_->ReserveName("fdividef"); | ||
| name_supply_->ReserveName("floorf"); | ||
| name_supply_->ReserveName("fmaf"); | ||
| name_supply_->ReserveName("fmaxf"); | ||
| name_supply_->ReserveName("fminf"); | ||
| name_supply_->ReserveName("fmodf"); | ||
| name_supply_->ReserveName("frexpf"); | ||
| name_supply_->ReserveName("hypotf"); | ||
| name_supply_->ReserveName("ilogbf"); | ||
| name_supply_->ReserveName("j0f"); | ||
| name_supply_->ReserveName("j1f"); | ||
| name_supply_->ReserveName("jnf"); | ||
| name_supply_->ReserveName("ldexpf"); | ||
| name_supply_->ReserveName("lgammaf"); | ||
| name_supply_->ReserveName("llrintf"); | ||
| name_supply_->ReserveName("llroundf"); | ||
| name_supply_->ReserveName("log10f"); | ||
| name_supply_->ReserveName("log1pf"); | ||
| name_supply_->ReserveName("log2f"); | ||
| name_supply_->ReserveName("logbf"); | ||
| name_supply_->ReserveName("logf"); | ||
| name_supply_->ReserveName("lrintf"); | ||
| name_supply_->ReserveName("lroundf"); | ||
| name_supply_->ReserveName("modff"); | ||
| name_supply_->ReserveName("nanf"); | ||
| name_supply_->ReserveName("nearbyintf"); | ||
| name_supply_->ReserveName("nextafterf"); | ||
| name_supply_->ReserveName("norm3df"); | ||
| name_supply_->ReserveName("norm4df"); | ||
| name_supply_->ReserveName("normcdff"); | ||
| name_supply_->ReserveName("normcdfinvf"); | ||
| name_supply_->ReserveName("normf"); | ||
| name_supply_->ReserveName("powf"); | ||
| name_supply_->ReserveName("rcbrtf"); | ||
| name_supply_->ReserveName("remainderf"); | ||
| name_supply_->ReserveName("remquof"); | ||
| name_supply_->ReserveName("rhypotf"); | ||
| name_supply_->ReserveName("rintf"); | ||
| name_supply_->ReserveName("rnorm3df"); | ||
| name_supply_->ReserveName("rnorm4df"); | ||
| name_supply_->ReserveName("rnormf"); | ||
| name_supply_->ReserveName("roundf"); | ||
| name_supply_->ReserveName("rsqrtf"); | ||
| name_supply_->ReserveName("scalblnf"); | ||
| name_supply_->ReserveName("scalbnf"); | ||
| name_supply_->ReserveName("signbit"); | ||
| name_supply_->ReserveName("sincosf"); | ||
| name_supply_->ReserveName("sincospif"); | ||
| name_supply_->ReserveName("sinf"); | ||
| name_supply_->ReserveName("sinhf"); | ||
| name_supply_->ReserveName("sinpif"); | ||
| name_supply_->ReserveName("sqrtf"); | ||
| name_supply_->ReserveName("tanf"); | ||
| name_supply_->ReserveName("tanhf"); | ||
| name_supply_->ReserveName("tgammaf"); | ||
| name_supply_->ReserveName("truncf"); | ||
| name_supply_->ReserveName("y0f"); | ||
| name_supply_->ReserveName("y1f"); | ||
| name_supply_->ReserveName("ynf"); | ||
|
|
||
| // skip double precision mathematical functions | ||
| name_supply_->ReserveName("acos"); | ||
| name_supply_->ReserveName("acosh"); | ||
| name_supply_->ReserveName("asin"); | ||
| name_supply_->ReserveName("asinh"); | ||
| name_supply_->ReserveName("atan"); | ||
| name_supply_->ReserveName("atan2"); | ||
| name_supply_->ReserveName("atanh"); | ||
| name_supply_->ReserveName("cbrt"); | ||
| name_supply_->ReserveName("ceil"); | ||
| name_supply_->ReserveName("copysign"); | ||
| name_supply_->ReserveName("cos"); | ||
| name_supply_->ReserveName("cosh"); | ||
| name_supply_->ReserveName("cospi"); | ||
| name_supply_->ReserveName("cyl_bessel_i0"); | ||
| name_supply_->ReserveName("cyl_bessel_i1"); | ||
| name_supply_->ReserveName("erf"); | ||
| name_supply_->ReserveName("erfc"); | ||
| name_supply_->ReserveName("erfcinv"); | ||
| name_supply_->ReserveName("erfcx"); | ||
| name_supply_->ReserveName("erfinv"); | ||
| name_supply_->ReserveName("exp"); | ||
| name_supply_->ReserveName("exp10"); | ||
| name_supply_->ReserveName("exp2"); | ||
| name_supply_->ReserveName("expm1"); | ||
| name_supply_->ReserveName("fabs"); | ||
| name_supply_->ReserveName("fdim"); | ||
| name_supply_->ReserveName("floor"); | ||
| name_supply_->ReserveName("fma"); | ||
| name_supply_->ReserveName("fmax"); | ||
| name_supply_->ReserveName("fmin"); | ||
| name_supply_->ReserveName("fmod"); | ||
| name_supply_->ReserveName("frexp"); | ||
| name_supply_->ReserveName("hypot"); | ||
| name_supply_->ReserveName("ilogb"); | ||
| name_supply_->ReserveName("j0"); | ||
| name_supply_->ReserveName("j1"); | ||
| name_supply_->ReserveName("jn"); | ||
| name_supply_->ReserveName("ldexp"); | ||
| name_supply_->ReserveName("lgamma"); | ||
| name_supply_->ReserveName("llrint"); | ||
| name_supply_->ReserveName("llround"); | ||
| name_supply_->ReserveName("log"); | ||
| name_supply_->ReserveName("log10"); | ||
| name_supply_->ReserveName("log1p"); | ||
| name_supply_->ReserveName("log2"); | ||
| name_supply_->ReserveName("logb"); | ||
| name_supply_->ReserveName("lrint"); | ||
| name_supply_->ReserveName("lround"); | ||
| name_supply_->ReserveName("modf"); | ||
| name_supply_->ReserveName("nan"); | ||
| name_supply_->ReserveName("nearbyint"); | ||
| name_supply_->ReserveName("nextafter"); | ||
| name_supply_->ReserveName("norm"); | ||
| name_supply_->ReserveName("norm3d"); | ||
| name_supply_->ReserveName("norm4d"); | ||
| name_supply_->ReserveName("normcdf"); | ||
| name_supply_->ReserveName("normcdfinv"); | ||
| name_supply_->ReserveName("pow"); | ||
| name_supply_->ReserveName("rcbrt"); | ||
| name_supply_->ReserveName("remainder"); | ||
| name_supply_->ReserveName("remquo"); | ||
| name_supply_->ReserveName("rhypot"); | ||
| name_supply_->ReserveName("rint"); | ||
| name_supply_->ReserveName("rnorm"); | ||
| name_supply_->ReserveName("rnorm3d"); | ||
| name_supply_->ReserveName("rnorm4d"); | ||
| name_supply_->ReserveName("round"); | ||
| name_supply_->ReserveName("rsqrt"); | ||
| name_supply_->ReserveName("scalbln"); | ||
| name_supply_->ReserveName("scalbn"); | ||
| name_supply_->ReserveName("signbit"); | ||
| name_supply_->ReserveName("sin"); | ||
| name_supply_->ReserveName("sincos"); | ||
| name_supply_->ReserveName("sincospi"); | ||
| name_supply_->ReserveName("sinh"); | ||
| name_supply_->ReserveName("sinpi"); | ||
| name_supply_->ReserveName("sqrt"); | ||
| name_supply_->ReserveName("tan"); | ||
| name_supply_->ReserveName("tanh"); | ||
| name_supply_->ReserveName("tgamma"); | ||
| name_supply_->ReserveName("trunc"); | ||
| name_supply_->ReserveName("y0"); | ||
| name_supply_->ReserveName("y1"); | ||
| name_supply_->ReserveName("yn"); | ||
| } | ||
|
Comment on lines
219
to
399
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: # First, let's verify the file exists and check the exact lines mentioned
cd /tmp/repo && wc -l src/target/codegen_cuda.cc 2>/dev/null || echo "File not found, searching for it..."
find . -name "codegen_cuda.cc" -type f 2>/dev/null | head -5Repository: tile-ai/tilelang Length of output: 201 🏁 Script executed: # Once we locate the file, examine lines 219-311
sed -n '219,311p' src/target/codegen_cuda.cc 2>/dev/null | cat -nRepository: tile-ai/tilelang Length of output: 4326 🏁 Script executed: # Search for context around this function and any related comments
rg -A 5 "ReserveKeywordsAsUnique_" src/target/codegen_cuda.cc 2>/dev/null || echo "Pattern not found"Repository: tile-ai/tilelang Length of output: 566 🏁 Script executed: # Look for issue #1561 or related comments about naming conflicts
rg "1561|naming.*conflict|ReserveName" src/target/codegen_cuda.cc | head -20Repository: tile-ai/tilelang Length of output: 850 🏁 Script executed: # Search for issue #1561 or related discussions in the codebase
rg -i "1561|sincosf|naming conflict|reserve" --type cc --type h src/target/ | grep -i "issue\|1561\|sincosf\|reserve" | head -20Repository: tile-ai/tilelang Length of output: 86 🏁 Script executed: # Check if there are any comments or documentation about these reserved names
rg -B 5 -A 2 "ReserveKeywordsAsUnique" src/target/codegen_cuda.cc | head -40Repository: tile-ai/tilelang Length of output: 642 🏁 Script executed: # Look for related CUDA function reservations in CodeGenC
find . -name "*.cc" -o -name "*.h" | xargs grep -l "ReserveKeywordsAsUnique" | head -5Repository: tile-ai/tilelang Length of output: 197 🏁 Script executed: # Check the parent class CodeGenC to understand what's already reserved
rg -A 50 "void.*CodeGenC::ReserveKeywordsAsUnique" src/target/codegen_c.cc 2>/dev/null | head -80Repository: tile-ai/tilelang Length of output: 42 🌐 Web query:
💡 Result: Summary of CUDA sin/cos combined functions and docs
Sources: NVIDIA CUDA Math API and libdevice docs. [1][2][3] References 🏁 Script executed: # Check if there are TODOs or issues related to this in the code
rg -i "todo|fixme|issue|precision|sincosf" src/target/codegen_cuda.ccRepository: tile-ai/tilelang Length of output: 1135 🏁 Script executed: # Look at HIP codegen to see if it reserves similar functions
rg "ReserveKeywordsAsUnique_|ReserveName" src/target/codegen_hip.cc | head -20Repository: tile-ai/tilelang Length of output: 42 🏁 Script executed: # Check git history or comments about why these specific functions were chosen
git log --oneline -n 20 -- src/target/codegen_cuda.cc 2>/dev/null || echo "Git not available"Repository: tile-ai/tilelang Length of output: 131 🌐 Web query:
💡 Result: Short answer: CUDA provides full double-precision math functions for sin, cos, sqrt, exp, log, asin, acos, atan (and many others) as device-callable functions (e.g. device double sin(double), cos(double), sqrt(double), exp(double), log(double), asin(double), acos(double), atan(double)). sincos/sincospi and sinpi/cospi variants are also available. You do not need extra headers to call them from device code; see NVIDIA’s CUDA Math API (double-precision section) for details and accuracy notes. [1][2][3] References:
🌐 Web query:
💡 Result: Short answer: CUDA provides half-precision device math functions such as hsin, hcos, hsqrt (and many others) as device intrinsics returning __half; they are declared in cuda_fp16.h and operate in round-to-nearest-even mode. See the CUDA Math API reference for full list and semantics. [1][2] Key points / prototypes
Behavior notes
If you want, I can:
Fix incorrect CUDA function names and consider double/half-precision functions. Lines 80-81 contain incorrect function names: Additionally, the implementation reserves only single-precision math functions. Consider also reserving:
These would provide comprehensive coverage of CUDA math API intrinsics that could collide with user-defined identifiers. 🤖 Prompt for AI Agents |
||
|
|
||
| void CodeGenTileLangCUDA::PrintFuncPrefix(std::ostream &os) { | ||
| os << "extern \"C\" __global__ "; | ||
| } | ||
|
|
@@ -3431,7 +3613,7 @@ void CodeGenTileLangCUDA::AddFunction(const GlobalVar &gvar, | |
| // clear previous generated state. | ||
| this->InitFuncState(f); | ||
| // reserve keywords | ||
| ReserveKeywordsAsUnique(); | ||
| ReserveKeywordsAsUnique_(); | ||
|
|
||
| auto global_symbol = f->GetAttr<String>(tvm::attr::kGlobalSymbol); | ||
| ICHECK(global_symbol) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.