-
Notifications
You must be signed in to change notification settings - Fork 9.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
mulmat-tune-tool: support RoPE dimensions (N/K of 128, N/K=M)
- Loading branch information
Showing
54 changed files
with
2,674 additions
and
1,498 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
N=5120,K=5120 | ||
|
||
#M,1,2,4,8,16,32,64,128,256,512 | ||
#0_0_CPU_nth=1, 0.001, 0.003, 0.006, 0.011, 0.027, 0.044, 0.092, 0.180, 0.426, 0.901 | ||
#0_1_CPU_nth=1, 1.865, 2.792, 5.318, 10.243, 20.234, 40.552, 82.861, 163.494, 327.287, 656.334 | ||
#0_total_nth=1, 1.866, 2.795, 5.324, 10.254, 20.261, 40.596, 82.953, 163.674, 327.713, 657.235 | ||
#1_1_GPU_nth=1, 25.473, 27.273, 28.110, 27.216, 25.473, 30.464, 28.960, 37.230, 53.190, 104.783 | ||
#1_total_nth=1, 25.473, 27.273, 28.110, 27.216, 25.473, 30.464, 28.960, 37.230, 53.190, 104.783 | ||
#2_0_CPU_nth=1, 17.471, 17.374, 17.341, 16.926, 16.926, 17.043, 16.791, 16.867, 16.885, 16.795 | ||
#2_1_GPU_nth=1, 3.963, 9.189, 7.681, 7.504, 8.406, 8.686, 10.945, 18.892, 36.241, 79.932 | ||
#2_total_nth=1, 21.434, 26.563, 25.022, 24.430, 25.332, 25.729, 27.736, 35.759, 53.126, 96.727 | ||
|
||
#0_1_CPU_nth=2, 0.932, 1.396, 2.659, 5.121, 10.117, 20.276, 41.430, 81.747, 163.643, 328.167 | ||
#0_total_nth=2, 0.932, 1.399, 2.665, 5.132, 10.144, 20.320, 41.522, 81.927, 164.069, 329.068 | ||
#2_0_CPU_nth=2, 8.735, 8.687, 8.670, 8.463, 8.463, 8.521, 8.395, 8.433, 8.442, 8.397 | ||
#2_total_nth=2, 12.698, 17.876, 16.351, 15.967, 16.869, 17.207, 19.340, 27.325, 44.683, 88.329 | ||
|
||
#0_1_CPU_nth=4, 0.466, 0.698, 1.329, 2.560, 5.058, 10.138, 20.715, 40.873, 81.821, 164.083 | ||
#0_total_nth=4, 0.466, 0.701, 1.335, 2.571, 5.085, 10.182, 20.807, 41.053, 82.247, 164.984 | ||
#2_0_CPU_nth=4, 4.367, 4.343, 4.335, 4.231, 4.231, 4.260, 4.197, 4.216, 4.221, 4.198 | ||
#2_total_nth=4, 8.330, 13.532, 12.016, 11.735, 12.637, 12.946, 15.142, 23.108, 40.462, 84.130 | ||
|
||
#0_1_CPU_nth=8, 0.233, 0.349, 0.664, 1.280, 2.529, 5.069, 10.357, 20.436, 40.910, 82.041 | ||
#0_total_nth=8, 0.233, 0.352, 0.670, 1.291, 2.556, 5.113, 10.449, 20.616, 41.336, 82.942 | ||
#2_0_CPU_nth=8, 2.183, 2.171, 2.167, 2.115, 2.115, 2.130, 2.098, 2.108, 2.110, 2.099 | ||
#2_total_nth=8, 6.146, 11.360, 9.848, 9.619, 10.521, 10.816, 13.043, 21.000, 38.351, 82.031 | ||
|
||
N=5120,K=13824 | ||
|
||
#M,1,2,4,8,16,32,64,128,256,512 | ||
#0_0_CPU_nth=1, 0.004, 0.008, 0.015, 0.036, 0.062, 0.138, 0.274, 0.656, 1.261, 2.505 | ||
#0_1_CPU_nth=1, 4.134, 8.043, 13.618, 27.683, 54.559, 111.237, 220.435, 441.494, 876.869,2086.429 | ||
#0_total_nth=1, 4.138, 8.051, 13.633, 27.719, 54.621, 111.375, 220.709, 442.150, 878.130,2088.934 | ||
#1_1_GPU_nth=1, 57.853, 60.208, 60.265, 61.098, 62.087, 65.072, 72.668, 83.500, 124.821, 247.048 | ||
#1_total_nth=1, 57.853, 60.208, 60.265, 61.098, 62.087, 65.072, 72.668, 83.500, 124.821, 247.048 | ||
#2_0_CPU_nth=1, 45.824, 46.040, 46.237, 45.706, 46.960, 46.344, 46.489, 45.963, 46.154, 47.247 | ||
#2_1_GPU_nth=1, 10.868, 14.136, 13.913, 14.035, 16.470, 20.092, 25.479, 38.612, 85.573, 200.631 | ||
#2_total_nth=1, 56.692, 60.176, 60.150, 59.741, 63.430, 66.436, 71.968, 84.575, 131.727, 247.878 | ||
|
||
#0_1_CPU_nth=2, 2.067, 4.021, 6.809, 13.841, 27.279, 55.618, 110.217, 220.747, 438.434,1043.214 | ||
#0_total_nth=2, 2.067, 4.029, 6.824, 13.877, 27.341, 55.756, 110.491, 221.403, 439.695,1045.719 | ||
#2_0_CPU_nth=2, 22.912, 23.020, 23.118, 22.853, 23.480, 23.172, 23.244, 22.981, 23.077, 23.623 | ||
#2_total_nth=2, 33.780, 37.156, 37.031, 36.888, 39.950, 43.264, 48.723, 61.593, 108.650, 224.254 | ||
|
||
#0_1_CPU_nth=4, 1.033, 2.010, 3.404, 6.920, 13.639, 27.809, 55.108, 110.373, 219.217, 521.607 | ||
#0_total_nth=4, 1.033, 2.018, 3.419, 6.956, 13.701, 27.947, 55.382, 111.029, 220.478, 524.112 | ||
#2_0_CPU_nth=4, 11.456, 11.510, 11.559, 11.426, 11.740, 11.586, 11.622, 11.490, 11.538, 11.811 | ||
#2_total_nth=4, 22.324, 25.646, 25.472, 25.461, 28.210, 31.678, 37.101, 50.102, 97.111, 212.442 | ||
|
||
#0_1_CPU_nth=8, 0.516, 1.005, 1.702, 3.460, 6.819, 13.904, 27.554, 55.186, 109.608, 260.803 | ||
#0_total_nth=8, 0.516, 1.013, 1.717, 3.496, 6.881, 14.042, 27.828, 55.842, 110.869, 263.308 | ||
#2_0_CPU_nth=8, 5.728, 5.755, 5.779, 5.713, 5.870, 5.793, 5.811, 5.745, 5.769, 5.905 | ||
#2_total_nth=8, 16.596, 19.891, 19.692, 19.748, 22.340, 25.885, 31.290, 44.357, 91.342, 206.536 | ||
|
||
N=13824,K=5120 | ||
|
||
#M,1,2,4,8,16,32,64,128,256,512 | ||
#0_0_CPU_nth=1, 0.001, 0.002, 0.006, 0.011, 0.022, 0.044, 0.092, 0.209, 0.517, 0.927 | ||
#0_1_CPU_nth=1, 4.253, 7.677, 13.841, 27.808, 54.106, 107.527, 222.913, 445.720, 879.513,1810.754 | ||
#0_total_nth=1, 4.254, 7.679, 13.847, 27.819, 54.128, 107.571, 223.005, 445.929, 880.030,1811.681 | ||
#1_1_GPU_nth=1, 57.567, 76.545, 74.841, 75.060, 79.525, 81.015, 85.740, 103.517, 141.373, 229.593 | ||
#1_total_nth=1, 57.567, 76.545, 74.841, 75.060, 79.525, 81.015, 85.740, 103.517, 141.373, 229.593 | ||
#2_0_CPU_nth=1, 46.789, 46.680, 46.584, 46.350, 46.347, 45.930, 47.050, 46.310, 45.846, 45.996 | ||
#2_1_GPU_nth=1, 10.666, 30.947, 29.037, 29.520, 30.463, 34.035, 40.673, 59.049, 96.992, 190.396 | ||
#2_total_nth=1, 57.455, 77.627, 75.621, 75.870, 76.810, 79.965, 87.723, 105.359, 142.838, 236.392 | ||
|
||
#0_1_CPU_nth=2, 2.126, 3.838, 6.920, 13.904, 27.053, 53.763, 111.456, 222.860, 439.756, 905.377 | ||
#0_total_nth=2, 2.126, 3.840, 6.926, 13.915, 27.075, 53.807, 111.548, 223.069, 440.273, 906.304 | ||
#2_0_CPU_nth=2, 23.394, 23.340, 23.292, 23.175, 23.173, 22.965, 23.525, 23.155, 22.923, 22.998 | ||
#2_total_nth=2, 34.060, 54.287, 52.329, 52.695, 53.636, 57.000, 64.198, 82.204, 119.915, 213.394 | ||
|
||
#0_1_CPU_nth=4, 1.063, 1.919, 3.460, 6.952, 13.526, 26.881, 55.728, 111.430, 219.878, 452.688 | ||
#0_total_nth=4, 1.063, 1.921, 3.466, 6.963, 13.548, 26.925, 55.820, 111.639, 220.395, 453.615 | ||
#2_0_CPU_nth=4, 11.697, 11.670, 11.646, 11.587, 11.586, 11.482, 11.762, 11.577, 11.461, 11.499 | ||
#2_total_nth=4, 22.363, 42.617, 40.683, 41.107, 42.049, 45.517, 52.435, 70.626, 108.453, 201.895 | ||
|
||
#0_1_CPU_nth=8, 0.531, 0.959, 1.730, 3.476, 6.763, 13.440, 27.864, 55.715, 109.939, 226.344 | ||
#0_total_nth=8, 0.531, 0.961, 1.736, 3.487, 6.785, 13.484, 27.956, 55.924, 110.456, 227.271 | ||
#2_0_CPU_nth=8, 5.848, 5.835, 5.823, 5.793, 5.793, 5.741, 5.881, 5.788, 5.730, 5.749 | ||
#2_total_nth=8, 16.514, 36.782, 34.860, 35.313, 36.256, 39.776, 46.554, 64.837, 102.722, 196.145 | ||
|
||
N=32000,K=5120 | ||
|
||
#M,1,2,4,8,16,32,64,128,256,512 | ||
#0_0_CPU_nth=1, 0.001, 0.002, 0.005, 0.013, 0.022, 0.045, 0.092, 0.219, 0.523, 1.070 | ||
#0_1_CPU_nth=1, 11.220, 17.901, 32.707, 63.996, 126.191, 247.205, 519.326,1029.476,2048.717,4178.401 | ||
#0_total_nth=1, 11.221, 17.903, 32.712, 64.009, 126.213, 247.250, 519.418,1029.695,2049.240,4179.471 | ||
#1_1_GPU_nth=1, 133.042, 180.445, 181.255, 180.258, 181.677, 187.155, 203.563, 244.199, 322.144, 516.456 | ||
#1_total_nth=1, 133.042, 180.445, 181.255, 180.258, 181.677, 187.155, 203.563, 244.199, 322.144, 516.456 | ||
#2_0_CPU_nth=1, 105.843, 106.249, 106.652, 107.003, 105.882, 105.918, 106.720, 106.509, 105.890, 106.880 | ||
#2_1_GPU_nth=1, 24.019, 78.692, 78.512, 80.839, 81.402, 86.450, 101.041, 135.251, 215.908, 416.035 | ||
#2_total_nth=1, 129.862, 184.941, 185.164, 187.842, 187.284, 192.368, 207.761, 241.760, 321.798, 522.915 | ||
|
||
#0_1_CPU_nth=2, 5.610, 8.950, 16.353, 31.998, 63.095, 123.602, 259.663, 514.738,1024.358,2089.200 | ||
#0_total_nth=2, 5.610, 8.952, 16.358, 32.011, 63.117, 123.647, 259.755, 514.957,1024.881,2090.270 | ||
#2_0_CPU_nth=2, 52.921, 53.124, 53.326, 53.501, 52.941, 52.959, 53.360, 53.254, 52.945, 53.440 | ||
#2_total_nth=2, 76.940, 131.816, 131.838, 134.340, 134.343, 139.409, 154.401, 188.505, 268.853, 469.475 | ||
|
||
#0_1_CPU_nth=4, 2.805, 4.475, 8.176, 15.999, 31.547, 61.801, 129.831, 257.369, 512.179,1044.600 | ||
#0_total_nth=4, 2.805, 4.477, 8.181, 16.012, 31.569, 61.846, 129.923, 257.588, 512.702,1045.670 | ||
#2_0_CPU_nth=4, 26.460, 26.562, 26.663, 26.750, 26.470, 26.479, 26.680, 26.627, 26.472, 26.720 | ||
#2_total_nth=4, 50.479, 105.254, 105.175, 107.589, 107.872, 112.929, 127.721, 161.878, 242.380, 442.755 | ||
|
||
#0_1_CPU_nth=8, 1.402, 2.237, 4.088, 7.999, 15.773, 30.900, 64.915, 128.684, 256.089, 522.300 | ||
#0_total_nth=8, 1.402, 2.239, 4.093, 8.012, 15.795, 30.945, 65.007, 128.903, 256.612, 523.370 | ||
#2_0_CPU_nth=8, 13.230, 13.281, 13.331, 13.375, 13.235, 13.239, 13.340, 13.313, 13.236, 13.360 | ||
#2_total_nth=8, 37.249, 91.973, 91.843, 94.214, 94.637, 99.689, 114.381, 148.564, 229.144, 429.395 | ||
|
||
N=128,K=M | ||
|
||
#M,1,2,4,8,16,32,64,128,256,512 | ||
#0_0_CPU_nth=1, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.002, 0.010, 0.040, 0.162 | ||
#0_1_CPU_nth=1, 0.002, 0.002, 0.004, 0.010, 0.028, 0.021, 0.052, 0.201, 0.766, 2.892 | ||
#0_total_nth=1, 0.002, 0.002, 0.004, 0.010, 0.028, 0.021, 0.054, 0.211, 0.806, 3.054 | ||
#1_1_GPU_nth=1, 0.000, 0.000, 0.001, 0.001, 0.003, 0.007, 0.038, 0.112, 0.144, 0.287 | ||
#1_total_nth=1, 0.000, 0.000, 0.001, 0.001, 0.003, 0.007, 0.038, 0.112, 0.144, 0.287 | ||
|
||
#0_1_CPU_nth=2, 0.001, 0.001, 0.002, 0.005, 0.014, 0.010, 0.026, 0.100, 0.383, 1.446 | ||
#0_total_nth=2, 0.001, 0.001, 0.002, 0.005, 0.014, 0.010, 0.028, 0.110, 0.423, 1.608 | ||
|
||
#0_1_CPU_nth=4, 0.000, 0.000, 0.001, 0.002, 0.007, 0.005, 0.013, 0.050, 0.191, 0.723 | ||
#0_total_nth=4, 0.000, 0.000, 0.001, 0.002, 0.007, 0.005, 0.015, 0.060, 0.231, 0.885 | ||
|
||
#0_1_CPU_nth=8, 0.000, 0.000, 0.000, 0.001, 0.003, 0.002, 0.006, 0.025, 0.095, 0.361 | ||
#0_total_nth=8, 0.000, 0.000, 0.000, 0.001, 0.003, 0.002, 0.008, 0.035, 0.135, 0.523 | ||
|
||
N=M,K=128 | ||
|
||
#M,1,2,4,8,16,32,64,128,256,512 | ||
#0_0_CPU_nth=1, 0.000, 0.000, 0.000, 0.001, 0.001, 0.002, 0.005, 0.010, 0.017, 0.039 | ||
#0_1_CPU_nth=1, 0.000, 0.000, 0.000, 0.001, 0.003, 0.010, 0.051, 0.163, 0.759, 2.837 | ||
#0_total_nth=1, 0.000, 0.000, 0.000, 0.002, 0.004, 0.012, 0.056, 0.173, 0.776, 2.876 | ||
#1_1_GPU_nth=1, 0.000, 0.000, 0.000, 0.000, 0.002, 0.007, 0.016, 0.094, 0.171, 0.321 | ||
#1_total_nth=1, 0.000, 0.000, 0.000, 0.000, 0.002, 0.007, 0.016, 0.094, 0.171, 0.321 | ||
|
||
#0_1_CPU_nth=2, 0.000, 0.000, 0.000, 0.000, 0.001, 0.005, 0.025, 0.081, 0.379, 1.418 | ||
#0_total_nth=2, 0.000, 0.000, 0.000, 0.001, 0.002, 0.007, 0.030, 0.091, 0.396, 1.457 | ||
|
||
#0_1_CPU_nth=4, 0.000, 0.000, 0.000, 0.000, 0.000, 0.002, 0.012, 0.040, 0.189, 0.709 | ||
#0_total_nth=4, 0.000, 0.000, 0.000, 0.001, 0.001, 0.004, 0.017, 0.050, 0.206, 0.748 | ||
|
||
#0_1_CPU_nth=8, 0.000, 0.000, 0.000, 0.000, 0.000, 0.001, 0.006, 0.020, 0.094, 0.354 | ||
#0_total_nth=8, 0.000, 0.000, 0.000, 0.001, 0.001, 0.003, 0.011, 0.030, 0.111, 0.393 |
Oops, something went wrong.