File tree
55 files changed
+3499
-3435
lines changed- 3rdparty
- examples/jax/encoder
- tests
- cpp/operator
- jax
- pytorch
- distributed
- transformer_engine
- common
- fused_attn
- fused_rope
- include/transformer_engine
- normalization
- layernorm
- rmsnorm
- triton
- jax
- cpp_extensions
- csrc/extensions
- flax
- pytorch
- attention
- dot_product_attention
- cpp_extensions
- csrc
- extensions
- module
- tensor
- triton
Some content is hidden
Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
55 files changed
+3499
-3435
lines changedSubmodule cudnn-frontend updated 87 files
- .github/ISSUE_TEMPLATE/bug_report.md+1-1
- CMakeLists.txt+1-1
- benchmark/sdpa_benchmark/benchmark_flash_attention.py+4-4
- benchmark/sdpa_benchmark_training/README.md+4-4
- benchmark/sdpa_benchmark_training/benchmark_single_sdpa.py+53-96
- dlpack_version.txt+1-1
- include/cudnn_frontend/cudnn_interface.h+1-1
- include/cudnn_frontend/graph_interface.h+39
- include/cudnn_frontend/graph_properties.h+51-4
- include/cudnn_frontend/node/moe_grouped_matmul.h+196
- include/cudnn_frontend/node/scaled_dot_product_flash_attention.h+90-5
- include/cudnn_frontend/node/sdpa_support_surface.h+38-17
- include/cudnn_frontend/node_interface.h+11
- include/cudnn_frontend/utils/serialize.h+14
- include/cudnn_frontend_EngineConfigGenerator.h+4-2
- include/cudnn_frontend_get_plan.h+10-4
- include/cudnn_frontend_utils.h+92-2
- include/cudnn_frontend_version.h+1-1
- pyproject.toml+9-2
- python/cudnn/README.md+41
- python/cudnn/__init__.py+51-1
- python/cudnn/api_base.py+238
- python/cudnn/datatypes.py+75
- python/cudnn/gemm_amax/__init__.py+9
- python/cudnn/gemm_amax/api.py+842
- python/cudnn/gemm_amax/dense_blockscaled_gemm_persistent_amax.py+1.9k
- python/cudnn/gemm_swiglu/__init__.py+9
- python/cudnn/gemm_swiglu/api.py+626
- python/cudnn/gemm_swiglu/dense_gemm_persistent_swiglu.py+1.7k
- python/cudnn/wrapper.py+50-18
- python/properties.cpp+15-1
- python/pygraph/pygraph.cpp+88
- python/pygraph/pygraph.h+21
- python/pygraph/sdpa.cpp+11-1
- samples/cpp/CMakeLists.txt+3
- samples/cpp/sdpa/fp16_bwd_with_flexible_graphs.cpp+6
- samples/cpp/sdpa/fp16_fwd_with_block_mask.cpp+267
- samples/cpp/sdpa/fp8_bwd.cpp+10
- samples/cpp/sdpa/fp8_fwd.cpp+5
- samples/cpp/sdpa/fp8_fwd_bottom_right_causal_mask.cpp+5
- samples/cpp/sdpa/fp8_fwd_current_scaling.cpp+165
- samples/cpp/utils/helpers.h+6
- samples/legacy_samples/fp8_sample.cpp+2-4
- samples/python/00_introduction.ipynb+1-1
- samples/python/01_matmul_bias.ipynb-230
- samples/python/02_low_level_api.ipynb+2-2
- samples/python/02_sdpa_graph_serialization.ipynb-296
- samples/python/03_mixed_precision_matmul.ipynb-234
- samples/python/20_layernorm.ipynb-374
- samples/python/24_rmsnorm.ipynb-481
- samples/python/25_batchnorm.ipynb-321
- samples/python/25_layernorm_forward_training_and_backward_with_relu_bitmask.ipynb+2-2
- samples/python/27_instancenorm.ipynb-326
- samples/python/28_instancenorm_fusion.ipynb-357
- samples/python/29_layernorm_zero_centered_gamma_forward_training_and_backward.ipynb-587
- samples/python/30_layernorm_zero_centered_gamma_inference.ipynb-407
- samples/python/31_adaptive_layernorm_forward_training_and_backward.ipynb-541
- samples/python/32_adaptive_layernorm_inference.ipynb-494
- samples/python/33_layernorm_forward_training_and_backward_with_relu_bitmask.ipynb-583
- samples/python/50_scaled_dot_product_attention.ipynb-258
- samples/python/50_sdpa_forward.ipynb+1-1
- samples/python/51_scaled_dot_product_attention_backward.ipynb-370
- samples/python/51_sdpa_backward.ipynb+1-1
- samples/python/52_scaled_dot_product_attention_with_paged_caches.ipynb-517
- samples/python/52_sdpa_with_paged_caches.ipynb+3-1
- samples/python/53_scaled_dot_product_attention_decode_with_paged_caches.ipynb-486
- samples/python/53_sdpa_decode_with_paged_caches.ipynb+3-1
- setup.py+1-1
- test/python/conftest.py+59-51
- test/python/fe_api/test_gemm_amax.py+189
- test/python/fe_api/test_gemm_amax_utils.py+316
- test/python/fe_api/test_gemm_swiglu.py+184
- test/python/fe_api/test_gemm_swiglu_utils.py+231
- test/python/test_batchnorm.py+24-6
- test/python/test_block_scale_quantize.py+353
- test/python/test_conv_genstats.py+8-2
- test/python/test_flexible_sdpa.py+33-3
- test/python/test_instancenorm.py+8-2
- test/python/test_kernel_cache.py+11-1
- test/python/test_layernorm.py+14-2
- test/python/test_low_precision_matmul.py+34
- test/python/test_matmul_bias_relu.py+22-2
- test/python/test_mhas.py+25-9
- test/python/test_rmsnorm.py+8-2
- test/python/test_sdpa_with_caching.py+1-1
- test/python/test_silu_and_mul.py+12-3
- test/python/test_slice.py+11-1
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
672 | 672 | | |
673 | 673 | | |
674 | 674 | | |
675 | | - | |
| 675 | + | |
676 | 676 | | |
677 | 677 | | |
678 | 678 | | |
| |||
710 | 710 | | |
711 | 711 | | |
712 | 712 | | |
713 | | - | |
| 713 | + | |
714 | 714 | | |
715 | 715 | | |
716 | 716 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
114 | 114 | | |
115 | 115 | | |
116 | 116 | | |
| 117 | + | |
117 | 118 | | |
118 | | - | |
| 119 | + | |
| 120 | + | |
| 121 | + | |
| 122 | + | |
| 123 | + | |
| 124 | + | |
| 125 | + | |
| 126 | + | |
| 127 | + | |
| 128 | + | |
119 | 129 | | |
120 | 130 | | |
121 | 131 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
45 | 45 | | |
46 | 46 | | |
47 | 47 | | |
48 | | - | |
49 | 48 | | |
50 | 49 | | |
51 | 50 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
389 | 389 | | |
390 | 390 | | |
391 | 391 | | |
| 392 | + | |
392 | 393 | | |
393 | 394 | | |
394 | 395 | | |
| |||
417 | 418 | | |
418 | 419 | | |
419 | 420 | | |
| 421 | + | |
420 | 422 | | |
421 | 423 | | |
422 | 424 | | |
| |||
Lines changed: 66 additions & 1 deletion
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
11 | 11 | | |
12 | 12 | | |
13 | 13 | | |
14 | | - | |
| 14 | + | |
15 | 15 | | |
16 | 16 | | |
17 | 17 | | |
| |||
22 | 22 | | |
23 | 23 | | |
24 | 24 | | |
| 25 | + | |
25 | 26 | | |
26 | 27 | | |
27 | 28 | | |
| |||
32 | 33 | | |
33 | 34 | | |
34 | 35 | | |
| 36 | + | |
| 37 | + | |
35 | 38 | | |
36 | 39 | | |
37 | 40 | | |
38 | 41 | | |
39 | 42 | | |
| 43 | + | |
| 44 | + | |
40 | 45 | | |
41 | 46 | | |
42 | 47 | | |
| |||
253 | 258 | | |
254 | 259 | | |
255 | 260 | | |
| 261 | + | |
| 262 | + | |
| 263 | + | |
| 264 | + | |
| 265 | + | |
| 266 | + | |
| 267 | + | |
| 268 | + | |
| 269 | + | |
| 270 | + | |
| 271 | + | |
| 272 | + | |
| 273 | + | |
| 274 | + | |
| 275 | + | |
| 276 | + | |
| 277 | + | |
| 278 | + | |
| 279 | + | |
| 280 | + | |
| 281 | + | |
| 282 | + | |
| 283 | + | |
| 284 | + | |
| 285 | + | |
| 286 | + | |
| 287 | + | |
| 288 | + | |
| 289 | + | |
| 290 | + | |
| 291 | + | |
| 292 | + | |
| 293 | + | |
| 294 | + | |
| 295 | + | |
| 296 | + | |
| 297 | + | |
| 298 | + | |
| 299 | + | |
| 300 | + | |
| 301 | + | |
| 302 | + | |
| 303 | + | |
| 304 | + | |
| 305 | + | |
| 306 | + | |
| 307 | + | |
| 308 | + | |
| 309 | + | |
| 310 | + | |
| 311 | + | |
| 312 | + | |
| 313 | + | |
| 314 | + | |
| 315 | + | |
| 316 | + | |
| 317 | + | |
| 318 | + | |
| 319 | + | |
| 320 | + | |
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
364 | 364 | | |
365 | 365 | | |
366 | 366 | | |
367 | | - | |
| 367 | + | |
368 | 368 | | |
369 | | - | |
| 369 | + | |
370 | 370 | | |
371 | 371 | | |
372 | 372 | | |
| |||
1035 | 1035 | | |
1036 | 1036 | | |
1037 | 1037 | | |
1038 | | - | |
| 1038 | + | |
1039 | 1039 | | |
1040 | 1040 | | |
1041 | 1041 | | |
1042 | 1042 | | |
1043 | 1043 | | |
1044 | 1044 | | |
1045 | | - | |
| 1045 | + | |
1046 | 1046 | | |
1047 | 1047 | | |
1048 | 1048 | | |
| |||
1199 | 1199 | | |
1200 | 1200 | | |
1201 | 1201 | | |
1202 | | - | |
| 1202 | + | |
1203 | 1203 | | |
1204 | 1204 | | |
1205 | 1205 | | |
1206 | 1206 | | |
1207 | 1207 | | |
1208 | 1208 | | |
1209 | | - | |
| 1209 | + | |
1210 | 1210 | | |
1211 | 1211 | | |
1212 | 1212 | | |
| |||
0 commit comments