Commit 5364bac
committed
[ARITH][BUGFIX] Fix a bug of iter map floormod(x,2) simplify
This PR fixes a previous bug introduced in itermap detection.
Specifically, y - (x % 2) were simplified to y + (x % 2) - 1.
Which is wrong. The working rule is y + ((x + 1) % 2) - 1,
but that rule will change the base iterator which is not desirable here.
We also removed the rule that simplifies (x + 1) % 2 => 1 - x % 2
as benefit is minimal and it introduces extra negative co-efficients
that hurts analysis in general (as negative co-efficients are
harder in many cases).1 parent 4e07a8e commit 5364bac
File tree
7 files changed
+87
-66
lines changed- 3rdparty
- src/arith
- tests/python/unittest
7 files changed
+87
-66
lines changed- docs/annotated.html+1-1
- docs/default__mma__core__simt_8h_source.html+1-1
- docs/hierarchy.html+1-1
- docs/namespacecutlass_1_1transform.html+1-1
- docs/pitch__linear__thread__map_8h.html+1-1
- docs/pitch__linear__thread__map_8h_source.html+1-1
- docs/structcutlass_1_1transform_1_1TransposePitchLinearThreadMap2DThreadTile.html+1-1
- examples/41_fused_multi_head_attention/debug_utils.h+46-7
- examples/41_fused_multi_head_attention/default_fmha_grouped.h+6-6
- examples/41_fused_multi_head_attention/epilogue/epilogue_pipelined.h
- examples/41_fused_multi_head_attention/epilogue/epilogue_rescale_output.h
- examples/41_fused_multi_head_attention/epilogue/epilogue_thread_apply_logsumexp.h
- examples/41_fused_multi_head_attention/fmha_grouped.h+132-11
- examples/41_fused_multi_head_attention/fused_multihead_attention_fixed_seqlen.cu+7-2
- examples/41_fused_multi_head_attention/gemm/find_default_mma.h+2
- examples/41_fused_multi_head_attention/gemm/mma_accum_lambda_iterator.h+17-152
- examples/41_fused_multi_head_attention/gemm/mma_from_smem.h+263-23
- examples/41_fused_multi_head_attention/kernel_forward.h+437-52
- examples/41_fused_multi_head_attention/transform/tile_smem_loader.h+88
- include/cutlass/gemm/device/base_grouped.h+1-1
- include/cutlass/gemm/kernel/gemm_universal_streamk.h+52-126
- include/cutlass/gemm/threadblock/threadblock_swizzle_streamk.h+2-9
- include/cutlass/transform/pitch_linear_thread_map.h+1-1
- test/unit/common/cutlass_unit_test.h+1-1
- tools/library/scripts/generator.py+36-16
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
898 | 898 | | |
899 | 899 | | |
900 | 900 | | |
901 | | - | |
902 | | - | |
903 | | - | |
904 | | - | |
905 | | - | |
906 | 901 | | |
907 | 902 | | |
908 | 903 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
306 | 306 | | |
307 | 307 | | |
308 | 308 | | |
| 309 | + | |
| 310 | + | |
| 311 | + | |
| 312 | + | |
| 313 | + | |
| 314 | + | |
| 315 | + | |
| 316 | + | |
| 317 | + | |
309 | 318 | | |
310 | 319 | | |
311 | 320 | | |
| |||
1018 | 1027 | | |
1019 | 1028 | | |
1020 | 1029 | | |
1021 | | - | |
1022 | | - | |
1023 | | - | |
1024 | | - | |
| 1030 | + | |
| 1031 | + | |
| 1032 | + | |
| 1033 | + | |
1025 | 1034 | | |
1026 | 1035 | | |
1027 | 1036 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
415 | 415 | | |
416 | 416 | | |
417 | 417 | | |
| 418 | + | |
| 419 | + | |
| 420 | + | |
| 421 | + | |
| 422 | + | |
| 423 | + | |
| 424 | + | |
418 | 425 | | |
419 | 426 | | |
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
199 | 199 | | |
200 | 200 | | |
201 | 201 | | |
202 | | - | |
| 202 | + | |
203 | 203 | | |
204 | 204 | | |
205 | 205 | | |
206 | | - | |
207 | | - | |
208 | | - | |
209 | | - | |
| 206 | + | |
| 207 | + | |
| 208 | + | |
| 209 | + | |
210 | 210 | | |
211 | 211 | | |
212 | 212 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
392 | 392 | | |
393 | 393 | | |
394 | 394 | | |
395 | | - | |
396 | | - | |
| 395 | + | |
| 396 | + | |
397 | 397 | | |
398 | 398 | | |
399 | 399 | | |
| |||
554 | 554 | | |
555 | 555 | | |
556 | 556 | | |
557 | | - | |
558 | 557 | | |
559 | 558 | | |
560 | 559 | | |
561 | 560 | | |
562 | 561 | | |
563 | 562 | | |
| 563 | + | |
| 564 | + | |
| 565 | + | |
564 | 566 | | |
565 | 567 | | |
566 | 568 | | |
| |||
574 | 576 | | |
575 | 577 | | |
576 | 578 | | |
| 579 | + | |
| 580 | + | |
| 581 | + | |
577 | 582 | | |
578 | 583 | | |
579 | 584 | | |
580 | 585 | | |
581 | 586 | | |
582 | | - | |
583 | | - | |
584 | 587 | | |
585 | 588 | | |
586 | 589 | | |
| |||
592 | 595 | | |
593 | 596 | | |
594 | 597 | | |
595 | | - | |
596 | | - | |
| 598 | + | |
| 599 | + | |
| 600 | + | |
| 601 | + | |
| 602 | + | |
| 603 | + | |
| 604 | + | |
597 | 605 | | |
598 | 606 | | |
599 | 607 | | |
| |||
Lines changed: 46 additions & 44 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
139 | 139 | | |
140 | 140 | | |
141 | 141 | | |
142 | | - | |
143 | | - | |
| 142 | + | |
| 143 | + | |
144 | 144 | | |
145 | 145 | | |
146 | 146 | | |
| |||
202 | 202 | | |
203 | 203 | | |
204 | 204 | | |
205 | | - | |
206 | | - | |
| 205 | + | |
| 206 | + | |
207 | 207 | | |
208 | 208 | | |
209 | 209 | | |
| |||
266 | 266 | | |
267 | 267 | | |
268 | 268 | | |
269 | | - | |
| 269 | + | |
270 | 270 | | |
271 | 271 | | |
272 | 272 | | |
| |||
278 | 278 | | |
279 | 279 | | |
280 | 280 | | |
281 | | - | |
| 281 | + | |
282 | 282 | | |
283 | 283 | | |
284 | 284 | | |
| |||
291 | 291 | | |
292 | 292 | | |
293 | 293 | | |
294 | | - | |
| 294 | + | |
295 | 295 | | |
296 | 296 | | |
297 | 297 | | |
| |||
391 | 391 | | |
392 | 392 | | |
393 | 393 | | |
394 | | - | |
395 | | - | |
| 394 | + | |
| 395 | + | |
396 | 396 | | |
397 | 397 | | |
398 | | - | |
399 | | - | |
| 398 | + | |
| 399 | + | |
400 | 400 | | |
401 | 401 | | |
402 | 402 | | |
| |||
475 | 475 | | |
476 | 476 | | |
477 | 477 | | |
478 | | - | |
| 478 | + | |
479 | 479 | | |
480 | 480 | | |
481 | 481 | | |
482 | | - | |
483 | | - | |
| 482 | + | |
| 483 | + | |
484 | 484 | | |
485 | 485 | | |
486 | 486 | | |
| |||
491 | 491 | | |
492 | 492 | | |
493 | 493 | | |
494 | | - | |
495 | | - | |
496 | | - | |
497 | | - | |
| 494 | + | |
| 495 | + | |
| 496 | + | |
| 497 | + | |
498 | 498 | | |
499 | 499 | | |
500 | 500 | | |
| |||
516 | 516 | | |
517 | 517 | | |
518 | 518 | | |
519 | | - | |
520 | | - | |
| 519 | + | |
| 520 | + | |
521 | 521 | | |
522 | 522 | | |
523 | 523 | | |
| |||
603 | 603 | | |
604 | 604 | | |
605 | 605 | | |
606 | | - | |
| 606 | + | |
607 | 607 | | |
608 | 608 | | |
609 | 609 | | |
610 | | - | |
611 | | - | |
| 610 | + | |
| 611 | + | |
612 | 612 | | |
613 | 613 | | |
614 | 614 | | |
615 | 615 | | |
616 | 616 | | |
617 | 617 | | |
618 | | - | |
619 | | - | |
620 | | - | |
621 | | - | |
| 618 | + | |
| 619 | + | |
| 620 | + | |
| 621 | + | |
622 | 622 | | |
623 | 623 | | |
624 | 624 | | |
625 | 625 | | |
626 | 626 | | |
627 | | - | |
| 627 | + | |
628 | 628 | | |
629 | | - | |
| 629 | + | |
630 | 630 | | |
631 | 631 | | |
632 | 632 | | |
| |||
640 | 640 | | |
641 | 641 | | |
642 | 642 | | |
643 | | - | |
644 | | - | |
| 643 | + | |
| 644 | + | |
645 | 645 | | |
646 | 646 | | |
647 | 647 | | |
| |||
768 | 768 | | |
769 | 769 | | |
770 | 770 | | |
771 | | - | |
772 | | - | |
| 771 | + | |
| 772 | + | |
773 | 773 | | |
774 | 774 | | |
775 | 775 | | |
| |||
799 | 799 | | |
800 | 800 | | |
801 | 801 | | |
802 | | - | |
803 | | - | |
| 802 | + | |
| 803 | + | |
804 | 804 | | |
805 | 805 | | |
806 | 806 | | |
| |||
929 | 929 | | |
930 | 930 | | |
931 | 931 | | |
932 | | - | |
933 | | - | |
| 932 | + | |
| 933 | + | |
| 934 | + | |
| 935 | + | |
934 | 936 | | |
935 | 937 | | |
936 | 938 | | |
937 | 939 | | |
938 | 940 | | |
939 | 941 | | |
940 | | - | |
| 942 | + | |
941 | 943 | | |
942 | 944 | | |
943 | 945 | | |
944 | | - | |
| 946 | + | |
945 | 947 | | |
946 | | - | |
| 948 | + | |
947 | 949 | | |
948 | | - | |
| 950 | + | |
949 | 951 | | |
950 | | - | |
| 952 | + | |
951 | 953 | | |
952 | 954 | | |
953 | 955 | | |
| |||
961 | 963 | | |
962 | 964 | | |
963 | 965 | | |
964 | | - | |
965 | | - | |
| 966 | + | |
| 967 | + | |
966 | 968 | | |
967 | 969 | | |
968 | 970 | | |
| |||
0 commit comments