diff --git a/projects/hipblaslt/clients/scripts/performance/problems/matmul_example_bench.yaml b/projects/hipblaslt/clients/scripts/performance/problems/matmul_example_bench.yaml new file mode 100755 index 000000000000..6bbb296d99bd --- /dev/null +++ b/projects/hipblaslt/clients/scripts/performance/problems/matmul_example_bench.yaml @@ -0,0 +1,17 @@ +--- +include: ../../../tests/data/hipblaslt_common.yaml +include: ../../../tests/data/matmul_common.yaml + +Tests: +- name: matmul_example + function: + matmul: *real_precisions + M: 128 + N: 128 + K: 128 + alpha_beta: *alpha_beta_range + transA_transB: *transA_transB_range + iters: 1000 + cold_iters: 1000 + rotating: 512 +... diff --git a/projects/hipblaslt/clients/scripts/performance/problems/matmul_probset3_bench.yaml b/projects/hipblaslt/clients/scripts/performance/problems/matmul_probset3_bench.yaml new file mode 100755 index 000000000000..15b9d14a93c3 --- /dev/null +++ b/projects/hipblaslt/clients/scripts/performance/problems/matmul_probset3_bench.yaml @@ -0,0 +1,74 @@ +--- +include: ../../../tests/data/hipblaslt_common.yaml +include: ../../../tests/data/matmul_common.yaml + +Definitions: +Tests: +- name: hpa_half_precision_rcmt_mphb_nn + function: + matmul: *hpa_half_precision + matrix_size: + - { M: 8192, N: 320, K: 320 } + - { M: 2048, N: 640, K: 640 } + - { M: 512, N: 1280, K: 1280 } + - { M: 8192, N: 320 , K: 320 } + - { M: 512, N: 10240, K: 1280 } + - { M: 2048, N: 5120, K: 640 } + - { M: 8192, N: 2560, K: 320 } + - { M: 512, N: 1280, K: 5120 } + - { M: 2048, N: 640, K: 2560 } + - { M: 154, N: 320, K: 768 } + - { M: 154, N: 1280, K: 768 } + initialization: trig_float + transA: N + transB: N + iters: 1000 + cold_iters: 1000 + rotating: 512 + +- name: hpa_half_precision_rcmt_mphb_b_nn + function: + matmul: *hpa_half_precision + matrix_size: + - { M: 4096, N: 40, K: 4096 } + - { M: 1024, N: 80, K: 1024 } + - { M: 1024, N: 88, K: 77 } + initialization: trig_float + transA: N + transB: N + batch_count: 16 + iters: 1000 + cold_iters: 1000 + rotating: 512 + +- name: hpa_half_precision_rcmt_mphb_b_nt + function: + matmul: *hpa_half_precision + matrix_size: + - { M: 4096, N: 4096, K: 40 } + - { M: 1024, N: 1024, K: 80 } + - { M: 4096, N: 77, K: 40 } + - { M: 256, N: 77, K: 160 } + - { M: 1024, N: 77, K: 160 } + initialization: trig_float + transA: N + transB: T + batch_count: 16 + iters: 1000 + cold_iters: 1000 + rotating: 512 + +- name: hpa_half_precision_rcmt_mhb_tn + function: + matmul: *hpa_half_precision + matrix_size: + - { M: 10240, N: 16 , K: 64640 } + initialization: trig_float + transA: T + transB: N + splitk: 1 + iters: 1000 + cold_iters: 4000 + rotating: 512 + api_method: 2 +... diff --git a/projects/hipblaslt/clients/scripts/performance/problems/matmul_probset4_bench.yaml b/projects/hipblaslt/clients/scripts/performance/problems/matmul_probset4_bench.yaml new file mode 100755 index 000000000000..7e781e853626 --- /dev/null +++ b/projects/hipblaslt/clients/scripts/performance/problems/matmul_probset4_bench.yaml @@ -0,0 +1,165 @@ +--- +include: ../../../tests/data/hipblaslt_common.yaml +include: ../../../tests/data/matmul_common.yaml + +Definitions: + +test precision: &hpc_gemm_shapes +- *single_precision +- *hpa_bf16_precision +- *hpa_half_precision +- *f8_fnuz_precision_dst_f8_fnuz +- *f8_fnuz_precision_dst_fp16 + +Tests: +- name: hpa_half_precision_mad_summary + function: + matmul: *hpc_gemm_shapes + matrix_size: + - { M: 1, N: 10240, K: 8192 } + - { M: 1, N: 12288, K: 4096 } + - { M: 1, N: 12288, K: 12288 } + - { M: 1, N: 15360, K: 5120 } + - { M: 1, N: 16384, K: 2048 } + - { M: 1, N: 18432, K: 6144 } + - { M: 1, N: 20480, K: 5120 } + - { M: 1, N: 24576, K: 5120 } + - { M: 1, N: 25600, K: 6400 } + - { M: 1, N: 3072, K: 2048 } + - { M: 1, N: 3584, K: 3584 } + - { M: 1, N: 4096, K: 4096 } + - { M: 1, N: 4096, K: 11008 } + - { M: 1, N: 4096, K: 22016 } + - { M: 1, N: 4608, K: 3584 } + - { M: 1, N: 5120, K: 1536 } + - { M: 1, N: 5120, K: 12288 } + - { M: 1, N: 57344, K: 8192 } + - { M: 1, N: 6144, K: 4096 } + - { M: 1, N: 8192, K: 8192 } + - { M: 2, N: 3072, K: 5120 } + - { M: 2, N: 5120, K: 12288 } + - { M: 4, N: 12288, K: 4096 } + - { M: 4, N: 4096, K: 4096 } + - { M: 4, N: 5120, K: 12288 } + - { M: 4, N: 6144, K: 4096 } + - { M: 4, N: 2560, K: 10240 } + - { M: 4, N: 44032, K: 4096 } + - { M: 4, N: 5120, K: 1536 } + - { M: 8, N: 10240, K: 8192 } + - { M: 8, N: 11008, K: 4096 } + - { M: 8, N: 12288, K: 4096 } + - { M: 8, N: 15360, K: 5120 } + - { M: 8, N: 16384, K: 2048 } + - { M: 8, N: 18432, K: 6144 } + - { M: 8, N: 19200, K: 6400 } + - { M: 8, N: 20480, K: 2560 } + - { M: 8, N: 24576, K: 5120 } + - { M: 8, N: 2560, K: 2560 } + - { M: 8, N: 28672, K: 4096 } + - { M: 8, N: 3072, K: 3072 } + - { M: 8, N: 3584, K: 3584 } + - { M: 8, N: 3840, K: 2560 } + - { M: 8, N: 4096, K: 13696 } + - { M: 8, N: 4608, K: 4608 } + - { M: 8, N: 5120, K: 5120 } + - { M: 8, N: 5760, K: 5120 } + - { M: 8, N: 6144, K: 4096 } + - { M: 8, N: 7680, K: 5120 } + - { M: 8, N: 8192, K: 24576 } + - { M: 16, N: 5120, K: 1536 } + - { M: 32, N: 10240, K: 8192 } + - { M: 32, N: 11008, K: 4096 } + - { M: 32, N: 12288, K: 4096 } + - { M: 32, N: 128256,K: 4096 } + - { M: 32, N: 13696, K: 4096 } + - { M: 32, N: 15360, K: 5120 } + - { M: 32, N: 16384, K: 4096 } + - { M: 32, N: 18432, K: 6144 } + - { M: 32, N: 20480, K: 5120 } + - { M: 32, N: 24576, K: 6144 } + - { M: 32, N: 2560, K: 10240 } + - { M: 32, N: 28672, K: 4096 } + - { M: 32, N: 3072, K: 3072 } + - { M: 32, N: 4096, K: 14336 } + - { M: 32, N: 44032, K: 4096 } + - { M: 32, N: 49152, K: 4096 } + - { M: 32, N: 5120, K: 1536 } + - { M: 32, N: 5760, K: 5120 } + - { M: 32, N: 6144, K: 4096 } + - { M: 32, N: 7168, K: 5120 } + - { M: 32, N: 7680, K: 5120 } + - { M: 32, N: 8192, K: 24576 } + - { M: 48, N: 10240, K: 10240 } + - { M: 64, N: 5120, K: 1536 } + - { M: 76, N: 32768, K: 4096 } + - { M: 77, N: 12288, K: 4096 } + - { M: 96, N: 35840, K: 6656 } + - { M: 96, N: 6656, K: 6656 } + - { M: 128, N: 5120, K: 12288 } + - { M: 128, N: 8192, K: 6144 } + - { M: 154, N: 2304, K: 768 } + - { M: 154, N: 3072, K: 1024 } + - { M: 197, N: 2304, K: 768 } + - { M: 256, N: 3072, K: 5120 } + - { M: 308, N: 2304, K: 768 } + - { M: 384, N: 2304, K: 768 } + - { M: 512, N: 3072, K: 5120 } + - { M: 1024, N: 1024, K: 4608 } + - { M: 1024, N: 16384, K: 4096 } + - { M: 1024, N: 20480, K: 5120 } + - { M: 1024, N: 2560, K: 10240 } + - { M: 1024, N: 3072, K: 2048 } + - { M: 1024, N: 4096, K: 1024 } + - { M: 1024, N: 5120, K: 13696 } + - { M: 1024, N: 768, K: 768 } + - { M: 1232, N: 2304, K: 768 } + - { M: 1536, N: 3072, K: 1024 } + - { M: 2048, N: 2048, K: 8192 } + - { M: 2048, N: 21504, K: 7168 } + - { M: 2048, N: 27648, K: 5120 } + - { M: 2048, N: 3072, K: 8192 } + - { M: 2048, N: 5120, K: 13824 } + - { M: 2048, N: 768, K: 768 } + - { M: 2464, N: 2304, K: 768 } + - { M: 3072, N: 2560, K: 2560 } + - { M: 3072, N: 49152, K: 12288 } + - { M: 4096, N: 1024, K: 4096 } + - { M: 4096, N: 16384, K: 2048 } + - { M: 4096, N: 2560, K: 10240 } + - { M: 4096, N: 3072, K: 1024 } + - { M: 4096, N: 5120, K: 5120 } + - { M: 4096, N: 768, K: 768 } + - { M: 8192, N: 20480, K: 2560 } + - { M: 8192, N: 3072, K: 1024 } + - { M: 8192, N: 4096, K: 1024 } + - { M: 8192, N: 768, K: 768 } + - { M: 10240, N: 5120, K: 5120 } + - { M: 12288, N: 1024, K: 4096 } + - { M: 14336, N: 16384, K: 6144 } + - { M: 16384, N: 1024, K: 4096 } + - { M: 16384, N: 16384, K: 4096 } + - { M: 16384, N: 2560, K: 2560 } + - { M: 16384, N: 3072, K: 1024 } + - { M: 16384, N: 4096, K: 13696 } + - { M: 16384, N: 768, K: 768 } + - { M: 20480, N: 128, K: 1 } + - { M: 25216, N: 2304, K: 768 } + - { M: 32768, N: 2048, K: 2048 } + - { M: 32768, N: 4544, K: 4544 } + - { M: 32768, N: 768, K: 768 } + - { M: 49152, N: 10240, K: 10240 } + - { M: 57344, N: 10240, K: 8192 } + - { M: 65536, N: 3840, K: 1280 } + - { M: 65536, N: 768, K: 768 } + - { M: 73728, N: 2304, K: 768 } + - { M: 77824, N: 6656, K: 6656 } + - { M: 802816, N: 16, K: 32 } + - { M: 8000000, N: 16, K: 32 } + - { M: 10000000, N: 1, K: 2 } + initialization: trig_float + transA: N + transB: N + iters: 1000 + cold_iters: 1000 + rotating: 512 +... \ No newline at end of file diff --git a/projects/hipblaslt/clients/scripts/performance/problems/matmul_probset5_bench.yaml b/projects/hipblaslt/clients/scripts/performance/problems/matmul_probset5_bench.yaml new file mode 100755 index 000000000000..e9e709e81680 --- /dev/null +++ b/projects/hipblaslt/clients/scripts/performance/problems/matmul_probset5_bench.yaml @@ -0,0 +1,41 @@ +--- +include: ../../../tests/data/hipblaslt_common.yaml +include: ../../../tests/data/matmul_common.yaml + +Definitions: +Tests: +- name: test_rvs + function: + matmul: *hpa_half_precision + matrix_size: + - { M: 1024, N: 1024, K: 1024 } + - { M: 2048, N: 2048, K: 2048 } + - { M: 3072, N: 3072, K: 3072 } + - { M: 4096, N: 8192, K: 4096 } + - { M: 4864, N: 8192, K: 4096 } + - { M: 8096, N: 8096, K: 8096 } + - { M: 8192, N: 8192, K: 8192 } + - { M: 8192, N: 16384, K: 8192 } + - { M: 8640, N: 8640, K: 8640 } + - { M: 16182, N: 16182, K: 16182 } + - { M: 28000, N: 28000, K: 28000 } + initialization: trig_float + transA: N + transB: N + iters: 1000 + cold_iters: 1000 + rotating: 512 + +- name: test_rvs_batched + function: + matmul: *hpa_half_precision + matrix_size: + - { M: 8640, N: 8640, K: 8640 } + initialization: trig_float + transA: N + transB: N + batch_count: 96 + iters: 1000 + cold_iters: 1000 + rotating: 512 +... diff --git a/projects/hipblaslt/clients/scripts/performance/problems/matmul_probset6_bench.yaml b/projects/hipblaslt/clients/scripts/performance/problems/matmul_probset6_bench.yaml new file mode 100755 index 000000000000..bf7a62bf1ccb --- /dev/null +++ b/projects/hipblaslt/clients/scripts/performance/problems/matmul_probset6_bench.yaml @@ -0,0 +1,86 @@ +--- +include: ../../../tests/data/hipblaslt_common.yaml +include: ../../../tests/data/matmul_common.yaml + +Definitions: +Tests: +- name: real_precisions_agfhc + function: + matmul: *hpa_half_precision + matrix_size: + - { M: 4096, N: 8192, K: 4096 } + - { M: 8192, N: 8192, K: 8192 } + - { M: 8192, N: 8192, K: 8960 } + - { M: 8192, N: 4096, K: 12288 } + - { M: 8192, N: 16384, K: 8192 } + - { M: 8640, N: 8640, K: 8640 } + - { M: 28000, N: 28000, K: 28000 } + initialization: trig_float + transA: N + transB: N + iters: 1000 + cold_iters: 1000 + rotating: 512 + +- name: real_precisions_agfhc_batched + function: + matmul: *hpa_half_precision + matrix_size: + - { M: 8640, N: 8640, K: 8640 } + initialization: trig_float + transA: N + transB: N + batch_count: 96 + iters: 1000 + cold_iters: 1000 + rotating: 512 + +- name: f8_fnuz_precision_dst_f8_fnuz_agfhc + function: + matmul: *f8_fnuz_precision_dst_f8_fnuz + matrix_size: + - { M: 8192, N: 8192, K: 8192 } + - { M: 8192, N: 16384,K: 8192 } + initialization: trig_float + transA: N + transB: N + iters: 1000 + cold_iters: 1000 + rotating: 512 + +- name: i8_precision_dst_i8_agfhc + function: + matmul: *i8_precision_dst_i8 + matrix_size: + - { M: 8192, N: 17792,K: 13312 } + initialization: trig_float + transA: N + transB: N + iters: 1000 + cold_iters: 1000 + rotating: 512 + +- name: mxf6_mxf6_precision_dst_fp32_agfhc + function: + matmul: *mxf6_mxf6_precision_dst_fp32 + matrix_size: + - { M: 2048, N: 2048,K: 2048 } + initialization: trig_float + transA: N + transB: N + iters: 1000 + cold_iters: 1000 + rotating: 512 + +- name: mxf4_mxf4_precision_dst_fp32_agfhc + function: + matmul: *mxf4_mxf4_precision_dst_fp32 + matrix_size: + - { M: 2048, N: 2048,K: 2048 } + initialization: trig_float + transA: N + transB: N + iters: 1000 + cold_iters: 1000 + rotating: 512 +... diff --git a/projects/hipblaslt/clients/scripts/performance/suites.py b/projects/hipblaslt/clients/scripts/performance/suites.py index c330136d59ba..068b241a1640 100644 --- a/projects/hipblaslt/clients/scripts/performance/suites.py +++ b/projects/hipblaslt/clients/scripts/performance/suites.py @@ -86,6 +86,12 @@ def amax_set_1(): yield ProblemSet(benchType="amax", name="benchset_1", problems=problemlist) +def matmul_examples(): + """gemm examples""" + + problemlist = [Problem(args={"--log_function_name" : "" , "--yaml" : "matmul_example_bench.yaml"})] + yield ProblemSet(benchType="matmul", name="example", problems=problemlist) + def matmul_set_1(): """gemm benchset 1""" @@ -98,6 +104,30 @@ def matmul_set_2(): problemlist = [Problem(args={"--log_function_name" : "" , "--yaml" : "matmul_probset2_bench.yaml"})] yield ProblemSet(benchType="matmul", name="benchset_2", problems=problemlist) +def matmul_set_3(): + """gemm benchset 3""" + + problemlist = [Problem(args={"--log_function_name" : "" , "--yaml" : "matmul_probset3_bench.yaml"})] + yield ProblemSet(benchType="matmul", name="benchset_3", problems=problemlist) + +def matmul_set_4(): + """gemm benchset 4""" + + problemlist = [Problem(args={"--log_function_name" : "" , "--yaml" : "matmul_probset4_bench.yaml"})] + yield ProblemSet(benchType="matmul", name="benchset_4", problems=problemlist) + +def matmul_set_5(): + """gemm benchset 5""" + + problemlist = [Problem(args={"--log_function_name" : "" , "--yaml" : "matmul_probset5_bench.yaml"})] + yield ProblemSet(benchType="matmul", name="benchset_5", problems=problemlist) + +def matmul_set_6(): + """gemm benchset 6""" + + problemlist = [Problem(args={"--log_function_name" : "" , "--yaml" : "matmul_probset6_bench.yaml"})] + yield ProblemSet(benchType="matmul", name="benchset_6", problems=problemlist) + def ci_perf_job(): """run basic job for PR-CI""" @@ -121,10 +151,26 @@ def all(): yield from api_overhead() yield from amax_set_1() - if "942" in target_arch: + if "gfx942" in target_arch: + # Put focused tests set for gfx942 yield from matmul_set_1() # this problemset is an initial test example for gfx942 yield from matmul_set_2() - # Can put any other interested set for gfx942 - elif "950" in target_arch: + yield from matmul_set_3() + yield from matmul_set_4() + yield from matmul_set_5() + yield from matmul_set_6() + elif "gfx950" in target_arch: + # Put focused tests set for gfx950 yield from matmul_set_2() - # Can put any other interested set for gfx950 + elif "gfx90a" in target_arch: + # FIXME- please replace the examples with the real interested problems for gfx90a + yield from matmul_set_3() + yield from matmul_set_4() + yield from matmul_set_5() + elif "gfx110" in target_arch: + # FIXME- please replace the examples with the real interested problems for gfx110? + yield from matmul_set_3() + elif "gfx120" in target_arch: + # FIXME- please replace the examples with the real interested problems for gfx120? + yield from matmul_set_3() +