Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23,851 changes: 23,646 additions & 205 deletions library/src/blas3/Tensile/Logic/archive/vega20_Cijk_Ailk_Bjlk_SB.yaml

Large diffs are not rendered by default.

11,785 changes: 11,234 additions & 551 deletions library/src/blas3/Tensile/Logic/archive/vega20_Cijk_Ailk_Bljk_SB.yaml

Large diffs are not rendered by default.

12,830 changes: 12,657 additions & 173 deletions library/src/blas3/Tensile/Logic/archive/vega20_Cijk_Alik_Bljk_SB.yaml

Large diffs are not rendered by default.

64,568 changes: 55,725 additions & 8,843 deletions library/src/blas3/Tensile/Logic/asm_ci/vega20_Cijk_Ailk_Bjlk_SB.yaml

Large diffs are not rendered by default.

68,626 changes: 44,996 additions & 23,630 deletions library/src/blas3/Tensile/Logic/asm_ci/vega20_Cijk_Ailk_Bljk_SB.yaml

Large diffs are not rendered by default.

54,380 changes: 39,674 additions & 14,706 deletions library/src/blas3/Tensile/Logic/asm_ci/vega20_Cijk_Alik_Bljk_SB.yaml

Large diffs are not rendered by default.

64,568 changes: 55,725 additions & 8,843 deletions library/src/blas3/Tensile/Logic/asm_full/vega20_Cijk_Ailk_Bjlk_SB.yaml

Large diffs are not rendered by default.

68,626 changes: 44,996 additions & 23,630 deletions library/src/blas3/Tensile/Logic/asm_full/vega20_Cijk_Ailk_Bljk_SB.yaml

Large diffs are not rendered by default.

54,380 changes: 39,674 additions & 14,706 deletions library/src/blas3/Tensile/Logic/asm_full/vega20_Cijk_Alik_Bljk_SB.yaml

Large diffs are not rendered by default.

29 changes: 29 additions & 0 deletions scripts/performance/sgemm_bert.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 1600 -n 512 -k 1024 --alpha -1.0 --lda 1600 --ldb 1024 --beta 1.0 --ldc 1600
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 1024 -n 512 -k 1 --alpha -1.0 --lda 1024 --ldb 1 --beta 1.0 --ldc 1024
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 1024 -n 512 -k 64 --alpha -1.0 --lda 1024 --ldb 64 --beta 1.0 --ldc 1024
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 64 -n 512 -k 1 --alpha -1.0 --lda 64 --ldb 1 --beta 1.0 --ldc 64
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 2048 -n 512 -k 1 --alpha -1.0 --lda 2048 --ldb 1 --beta 1.0 --ldc 2048
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 100 -n 512 -k 2048 --alpha -1.0 --lda 100 --ldb 2048 --beta 1.0 --ldc 100
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 1 -n 512 -k 1 --alpha -1.0 --lda 1 --ldb 1 --beta 1.0 --ldc 1
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 512 -n 200 -k 32 --alpha -1.0 --lda 512 --ldb 32 --beta 1.0 --ldc 512
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 560 -n 200 -k 1024 --alpha -1.0 --lda 560 --ldb 1024 --beta 1.0 --ldc 560
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 32 -n 200 -k 1 --alpha -1.0 --lda 32 --ldb 1 --beta 1.0 --ldc 32
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 1024 -n 200 -k 1 --alpha -1.0 --lda 1024 --ldb 1 --beta 1.0 --ldc 1024
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 1 -n 200 -k 1 --alpha -1.0 --lda 1 --ldb 1 --beta 1.0 --ldc 1
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 512 -n 200 -k 1 --alpha -1.0 --lda 512 --ldb 1 --beta 1.0 --ldc 512
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 1024 -n 64 -k 512 --alpha -1.0 --lda 1024 --ldb 64 --beta 1.0 --ldc 1024
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 100 -n 2048 -k 512 --alpha -1.0 --lda 100 --ldb 2048 --beta 1.0 --ldc 100
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 2048 -n 1 -k 512 --alpha -1.0 --lda 2048 --ldb 1 --beta 1.0 --ldc 2048
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 2048 -n 2048 -k 512 --alpha -1.0 --lda 2048 --ldb 2048 --beta 1.0 --ldc 2048
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 1600 -n 1024 -k 512 --alpha -1.0 --lda 1600 --ldb 1024 --beta 1.0 --ldc 1600
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 1024 -n 1024 -k 512 --alpha -1.0 --lda 1024 --ldb 1024 --beta 1.0 --ldc 1024
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 560 -n 1024 -k 200 --alpha -1.0 --lda 560 --ldb 1024 --beta 1.0 --ldc 560
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 1024 -n 1024 -k 200 --alpha -1.0 --lda 1024 --ldb 1024 --beta 1.0 --ldc 1024
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 1024 -n 1 -k 200 --alpha -1.0 --lda 1024 --ldb 1 --beta 1.0 --ldc 1024
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 512 -n 32 -k 200 --alpha -1.0 --lda 512 --ldb 32 --beta 1.0 --ldc 512
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 512 -n 512 -k 200 --alpha -1.0 --lda 512 --ldb 512 --beta 1.0 --ldc 512
./rocblas-bench -f gemm -r f32_r --transposeA T --transposeB N -m 1024 -n 512 -k 1600 --alpha -1.0 --lda 1600 --ldb 1600 --beta 1.0 --ldc 1024
./rocblas-bench -f gemm -r f32_r --transposeA T --transposeB N -m 2048 -n 512 -k 100 --alpha -1.0 --lda 100 --ldb 100 --beta 1.0 --ldc 2048
./rocblas-bench -f gemm -r f32_r --transposeA T --transposeB N -m 1024 -n 200 -k 560 --alpha -1.0 --lda 560 --ldb 560 --beta 1.0 --ldc 1024
29 changes: 29 additions & 0 deletions scripts/performance/sgemm_dlrm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 1600 -n 512 -k 1024 --alpha -1.0 --lda 1600 --ldb 1024 --beta 1.0 --ldc 1600
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 1024 -n 512 -k 1 --alpha -1.0 --lda 1024 --ldb 1 --beta 1.0 --ldc 1024
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 1024 -n 512 -k 64 --alpha -1.0 --lda 1024 --ldb 64 --beta 1.0 --ldc 1024
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 64 -n 512 -k 1 --alpha -1.0 --lda 64 --ldb 1 --beta 1.0 --ldc 64
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 2048 -n 512 -k 1 --alpha -1.0 --lda 2048 --ldb 1 --beta 1.0 --ldc 2048
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 100 -n 512 -k 2048 --alpha -1.0 --lda 100 --ldb 2048 --beta 1.0 --ldc 100
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 1 -n 512 -k 1 --alpha -1.0 --lda 1 --ldb 1 --beta 1.0 --ldc 1
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 512 -n 200 -k 32 --alpha -1.0 --lda 512 --ldb 32 --beta 1.0 --ldc 512
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 560 -n 200 -k 1024 --alpha -1.0 --lda 560 --ldb 1024 --beta 1.0 --ldc 560
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 32 -n 200 -k 1 --alpha -1.0 --lda 32 --ldb 1 --beta 1.0 --ldc 32
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 1024 -n 200 -k 1 --alpha -1.0 --lda 1024 --ldb 1 --beta 1.0 --ldc 1024
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 1 -n 200 -k 1 --alpha -1.0 --lda 1 --ldb 1 --beta 1.0 --ldc 1
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 512 -n 200 -k 1 --alpha -1.0 --lda 512 --ldb 1 --beta 1.0 --ldc 512
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 1024 -n 64 -k 512 --alpha -1.0 --lda 1024 --ldb 64 --beta 1.0 --ldc 1024
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 100 -n 2048 -k 512 --alpha -1.0 --lda 100 --ldb 2048 --beta 1.0 --ldc 100
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 2048 -n 1 -k 512 --alpha -1.0 --lda 2048 --ldb 1 --beta 1.0 --ldc 2048
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 2048 -n 2048 -k 512 --alpha -1.0 --lda 2048 --ldb 2048 --beta 1.0 --ldc 2048
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 1600 -n 1024 -k 512 --alpha -1.0 --lda 1600 --ldb 1024 --beta 1.0 --ldc 1600
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 1024 -n 1024 -k 512 --alpha -1.0 --lda 1024 --ldb 1024 --beta 1.0 --ldc 1024
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 560 -n 1024 -k 200 --alpha -1.0 --lda 560 --ldb 1024 --beta 1.0 --ldc 560
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 1024 -n 1024 -k 200 --alpha -1.0 --lda 1024 --ldb 1024 --beta 1.0 --ldc 1024
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 1024 -n 1 -k 200 --alpha -1.0 --lda 1024 --ldb 1 --beta 1.0 --ldc 1024
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 512 -n 32 -k 200 --alpha -1.0 --lda 512 --ldb 32 --beta 1.0 --ldc 512
./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 512 -n 512 -k 200 --alpha -1.0 --lda 512 --ldb 512 --beta 1.0 --ldc 512
./rocblas-bench -f gemm -r f32_r --transposeA T --transposeB N -m 1024 -n 512 -k 1600 --alpha -1.0 --lda 1600 --ldb 1600 --beta 1.0 --ldc 1024
./rocblas-bench -f gemm -r f32_r --transposeA T --transposeB N -m 2048 -n 512 -k 100 --alpha -1.0 --lda 100 --ldb 100 --beta 1.0 --ldc 2048
./rocblas-bench -f gemm -r f32_r --transposeA T --transposeB N -m 1024 -n 200 -k 560 --alpha -1.0 --lda 560 --ldb 560 --beta 1.0 --ldc 1024
Loading