Skip to content
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
0adc241
Refactors CustomSchedule.py for scale out of schedules (#2574)
talumbau Nov 11, 2025
7641aa7
add 192x256x64TN
jfactory07 Nov 12, 2025
8f47b79
typo
jfactory07 Nov 12, 2025
1b39b51
add test
jfactory07 Nov 10, 2025
485c703
more test
jfactory07 Nov 12, 2025
c1b8911
Merge branch 'hipblaslt_common_cms_dev' into users/jzhou/cms-192x256x…
jfactory07 Nov 14, 2025
5a943a5
typo
jfactory07 Nov 14, 2025
461fb4f
fix random fail
jfactory07 Nov 17, 2025
634b8ae
Merge branch 'hipblaslt_common_cms_dev' into users/jzhou/cms-192x256x…
jfactory07 Nov 20, 2025
ba7d50b
refine
jfactory07 Nov 20, 2025
366831f
Merge branch 'hipblaslt_common_cms_dev' into users/jzhou/cms-192x256x…
jfactory07 Nov 20, 2025
3cebdbf
Merge branch 'hipblaslt_common_cms_dev' into users/jzhou/cms-192x256x…
jfactory07 Nov 20, 2025
564ae51
Merge branch 'hipblaslt_common_cms_dev' into users/jzhou/cms-192x256x…
jfactory07 Nov 21, 2025
4d3c807
remove typo
jfactory07 Nov 21, 2025
5a79662
typo
jfactory07 Nov 21, 2025
9f33c6f
Merge branch 'hipblaslt_common_cms_dev' into users/jzhou/cms-192x256x…
jfactory07 Nov 24, 2025
f0865c6
Merge branch 'hipblaslt_common_cms_dev' into users/jzhou/cms-192x256x…
jfactory07 Nov 24, 2025
3abb511
Merge branch 'hipblaslt_common_cms_dev' into users/jzhou/cms-192x256x…
jfactory07 Nov 25, 2025
6c7223e
typo
jfactory07 Nov 25, 2025
f9f9b0c
typo
jfactory07 Nov 25, 2025
1c4f0c0
typo
jfactory07 Nov 25, 2025
84c1192
rename
jfactory07 Nov 25, 2025
eed850a
refine
jfactory07 Nov 26, 2025
5a125e0
Merge branch 'hipblaslt_common_cms_dev' into users/jzhou/cms-192x256x…
jfactory07 Nov 26, 2025
efda36c
typo
jfactory07 Nov 28, 2025
0dca66c
Merge branch 'hipblaslt_common_cms_dev' into users/jzhou/cms-192x256x…
jfactory07 Nov 28, 2025
c4b1d4a
disable LDSTrInst in test
jfactory07 Nov 28, 2025
77c3ebd
refine
jfactory07 Dec 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,49 @@ def _get_schedule_192x256x64_16bit(kernel, useLDSTr, TLDS):
SBarrier(comment=""),
SWaitCnt(dscnt=0, vlcnt=-1, vscnt=-1, comment="Wait for LRB0 to complete"),]
nglshift = nllshift = 14 # vmcnt shift for ngl and nll
elif isTN(kernel) and not useLDSTr and TLDS == 1:
#index and code pair
syncTable = [-1, SWaitCnt(dscnt=7, vlcnt=-1, vscnt=-1, comment="for LRB1-0"),
5, SWaitCnt(dscnt=6+4, vlcnt=-1, vscnt=-1, comment="for LRB1-1"),
8, SBarrier(comment="for GRA start"),
11, SWaitCnt(dscnt=5+8, vlcnt=-1, vscnt=-1, comment="for LRB1-2"),
17, SWaitCnt(dscnt=4+11, vlcnt=-1, vscnt=-1, comment="for LRB1-3"),
23, SWaitCnt(dscnt=15, vlcnt=-1, vscnt=-1, comment="for LRB1-4:6"),
41, SWaitCnt(dscnt=14, vlcnt=-1, vscnt=-1, comment="for LRB1-7"),
46, SWaitCnt(dscnt=-1, vlcnt=14, vscnt=-1, comment="for LRA1"),
47, SWaitCnt(dscnt=7, vlcnt=-1, vscnt=-1, comment="for LRB0-0"),
48, SBarrier(comment="for LRA1 start"),
51, SWaitCnt(dscnt=1, vlcnt=-1, vscnt=-1, comment="for LRB1"),
78, SWaitCnt(dscnt=-1, vlcnt=14, vscnt=-1, comment="for LRB0"),
78, SBarrier(comment="for LRB1 start"),]
optSchedule = {
'SYNC' : [syncTable[::2]],
'GRIncA': [[0,1,2,3,4,5,6,7,8]],
'GRIncB': [[9,10,11,12,13,14,15,16,17]],

'LRA0' : [[0, 2, 3, 4, 5, 6]],
'LRB0' : [[7, 9, 11, 13, 15, 17, 19, 21],
[8, 10, 12, 14, 16, 18, 20, 22]],
'GRA' : [[8,8, 10,10, 12,12, 14,14, 26,26, 31,31],
[9,9, 11,11, 13,13, 15,15, 27,27, 32,32]],
Comment thread
jfactory07 marked this conversation as resolved.
Outdated

'GRB' : [[46,46, 50,50, 54,54, 58,58, 62,62, 66,66, 70,70, 76,76],
Comment thread
jfactory07 marked this conversation as resolved.
[47,47, 51,51, 55,55, 59,59, 63,63, 67,67, 71,71, 77,77]],
'LRA1' : [[48, 52, 56, 58, 60, 64],
[49, 53, 57, 59, 61, 65]],
# 0 1 2 3 4 5 6 7
'LRB1' : [[78, 80, 82, 84, 86, 90, 92, 94],
[79, 81, 83, 85, 87, 91, 93, 95]],

'LRSA' : [[22]],
'LRSB' : [[23]],

'LWSA' : [[20]],
'LWSB' : [[78]],
'LCC' : [[95, 95]],
}
syncCode = syncTable[1::2]
nglshift = nllshift = 14 # vmcnt shift for ngl and nll
elif isNT(kernel) and not useLDSTr and TLDS == 0:
kernel["UsePLRPack"] = True

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,42 @@ BenchmarkProblems:
- Range: [[8192], [7168], [1], [64, 64, 256]]
- Exact: [4096, 3584, 1, 8192]
- BiasTypeArgs: ['b']
- # BenchmarkProblemSizeGroup - Standard - All problem
InitialSolutionParameters:
BenchmarkCommonParameters:
- KernelLanguage: ["Assembly"]
ForkParameters:
- MatrixInstruction:
- [16, 16,32, 1, 1, 8, 6, 2,2 ]
- PrefetchGlobalRead: [2]
- PrefetchLocalRead: [1]
- DepthU: [64]
- ScheduleIterAlg: [3]
- ExpandPointerSwap: [0]
- TransposeLDS: [1] #0,1
- LocalReadVectorWidth: [8]
- GlobalReadVectorWidthA: [8]
- GlobalReadVectorWidthB: [8]
- DirectToLds: [1]
- StreamK: [3]
- LdsPadA: [8] #[-1]
- LdsPadB: [8] #[-1]
- StaggerU: [0]
- WorkGroupMapping: [16]
- WorkGroupMappingXCC: [2]
- 1LDSBuffer: [0]
- NonTemporalD: [4]
- SourceSwap: [1]
- UseSgprForGRO: [0]
- UseCustomMainLoopSchedule: [0, 1]
BenchmarkJoinParameters:
BenchmarkFinalParameters:
- ProblemSizes:
- Range: [[256], [192], [1], [64, 64, 256]]
Comment thread
jfactory07 marked this conversation as resolved.
- Range: [[256], [192], [1], [1, 1, 64]]
- Range: [[256], [192], [1], [32, 64, 256]]
- Exact: [4096, 3072, 1, 8192]
- BiasTypeArgs: ['b']

- # BenchmarkProblemSizeGroup - Standard - All problem
InitialSolutionParameters:
Expand Down
Loading