diff --git a/aiter/ops/triton/configs/gemm/MI350X-GEMM-AFP4WFP4_PRESHUFFLED-N=57344-K=8192.json b/aiter/ops/triton/configs/gemm/MI350X-GEMM-AFP4WFP4_PRESHUFFLED-N=57344-K=8192.json index dea39ddd30..909689ce23 100644 --- a/aiter/ops/triton/configs/gemm/MI350X-GEMM-AFP4WFP4_PRESHUFFLED-N=57344-K=8192.json +++ b/aiter/ops/triton/configs/gemm/MI350X-GEMM-AFP4WFP4_PRESHUFFLED-N=57344-K=8192.json @@ -83,4 +83,4 @@ "cache_modifier": null, "NUM_KSPLIT": 1 } -} +} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/README.md b/aiter/ops/triton/configs/gemm/aot/README.md index 2111d4e57c..f320237595 100644 --- a/aiter/ops/triton/configs/gemm/aot/README.md +++ b/aiter/ops/triton/configs/gemm/aot/README.md @@ -2,3 +2,5 @@ This is the first iteration of aot compilation for triton All compiled modules are FP4 preshuffled GEMMs with TN layout and bf16 dtype + +All binary files are generated using triton==3.5.0+gitc172d539 \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco new file mode 100644 index 0000000000..9a721b6230 Binary files /dev/null and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json new file mode 100644 index 0000000000..218b217517 --- /dev/null +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -0,0 +1 @@ +{"hash": "e90d4ba9cf14219bef1bca72767ed05991913eb79484a5b706cb25d9f2f71474", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 43520, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco new file mode 100644 index 0000000000..a76261d24b Binary files /dev/null and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json new file mode 100644 index 0000000000..4485032426 --- /dev/null +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -0,0 +1 @@ +{"hash": "e80a3f3a19a5da27236f25e468c4b22caa88c28f65793d17c3d2045fe972817c", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 4, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 10752, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco new file mode 100644 index 0000000000..19aa40e784 Binary files /dev/null and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json new file mode 100644 index 0000000000..1e5bb1dfae --- /dev/null +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -0,0 +1 @@ +{"hash": "cb12dc32b0ed1a5ac880a6dd3bee50fb59d11e1a8eeccc3ae8153c968e7f2c75", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 4, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 13056, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco new file mode 100644 index 0000000000..8dcc5280de Binary files /dev/null and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json new file mode 100644 index 0000000000..fca09fa225 --- /dev/null +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=1-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -0,0 +1 @@ +{"hash": "347e0c55794ac0ca235e8b969a4b5a5268100a128f24dcce30fe2005b2bc21b1", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 4, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 16896, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco index db7cf95ec1..febfd8cf3b 100644 Binary files a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json index 4e0e1d3a42..84e66f815f 100644 --- a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -1 +1 @@ -{"hash": "292fe75724049bbaf3301c4cee1b466a851b9b909d8b4f451314204b47ce9517", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "triton_version": "3.5.0", "shared": 26112, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file +{"hash": "a729967cd59e3c39a6f61dd259cc2b7cd9768909003d37d03d9dc7dae7280b9e", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 52224, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco index 2d602201d2..1620f0f01d 100644 Binary files a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json index 072c5c9474..56a2dfd70c 100644 --- a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -1 +1 @@ -{"hash": "381e9e3775074f0d3d4dabc7198238d3ffd5c3781c53f69b3fb265d1e01d550a", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 4, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "triton_version": "3.5.0", "shared": 6528, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file +{"hash": "a38121d8f5709315553f0016ca0e08c77bfd16fd57e336ed676b85615be00762", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 4, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 13056, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco index 0236f06479..cfdd8d48cc 100644 Binary files a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json index 1efb591495..fc4ab35831 100644 --- a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -1 +1 @@ -{"hash": "d592667547642151ddbcce925b29b550695b44a7943ffcfc0021ac6adc78f72b", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "triton_version": "3.5.0", "shared": 5376, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file +{"hash": "4af84e6c0b5acb21f71e7f71ab43f43a465dd74734d7c6def0d9fc859c471c1f", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 10752, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco index db7cf95ec1..febfd8cf3b 100644 Binary files a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json index 4e0e1d3a42..84e66f815f 100644 --- a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=16-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -1 +1 @@ -{"hash": "292fe75724049bbaf3301c4cee1b466a851b9b909d8b4f451314204b47ce9517", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "triton_version": "3.5.0", "shared": 26112, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file +{"hash": "a729967cd59e3c39a6f61dd259cc2b7cd9768909003d37d03d9dc7dae7280b9e", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 52224, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco new file mode 100644 index 0000000000..d5d5dde2c6 Binary files /dev/null and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json new file mode 100644 index 0000000000..046da07114 --- /dev/null +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -0,0 +1 @@ +{"hash": "41f673542e895bf56edb8e6a137febf789c28a9da5b4693a1065490a62336656", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 43520, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco new file mode 100644 index 0000000000..6ce75e81b2 Binary files /dev/null and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json new file mode 100644 index 0000000000..7b2c5ab8de --- /dev/null +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -0,0 +1 @@ +{"hash": "1b29a972364a81e3844504157096f1a0ca2164836cee9758c885f562921d6f0c", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 4, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 10752, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco new file mode 100644 index 0000000000..a43f1de3b1 Binary files /dev/null and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json new file mode 100644 index 0000000000..ac8df07cd5 --- /dev/null +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -0,0 +1 @@ +{"hash": "96e931c980f9bd1b0d7ba209973d637dae985113c68e23d0476ea6a3789b77f4", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 4, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 13056, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco new file mode 100644 index 0000000000..bdcde11a55 Binary files /dev/null and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json new file mode 100644 index 0000000000..3c5b94ec80 --- /dev/null +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=2-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -0,0 +1 @@ +{"hash": "3a4dedb1720cc3cc439e59bf26afb541bb9591e17d932fe1696e4ac46fe1c376", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 4, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 16896, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco index 68845753a5..ba18fd23df 100644 Binary files a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json index 545b2b9bee..8dc6746eff 100644 --- a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -1 +1 @@ -{"hash": "6fd582dbab816b4fbda10fb925852f0f721f023a9875736b8c80ff9f4a311d59", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "triton_version": "3.5.0", "shared": 4864, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file +{"hash": "518af245b3686a62c8aae8b677a2e83177124a639e544e12c11c00b9797474fd", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 9728, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco index 39b514d7d8..cf679ceb39 100644 Binary files a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json index 07aed71e55..62c83cddea 100644 --- a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -1 +1 @@ -{"hash": "331e02d8526830cf9036ea78b8a0a98d7daa7d8507856b5b907a87e08e84e7c3", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 1, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "triton_version": "3.5.0", "shared": 5376, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file +{"hash": "1737c0a38627fe5406a6244d0c66b46e3b98dfd8daf99c31b2c2ab219ffd8249", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 1, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 10752, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco index ebe0228680..b34288dac8 100644 Binary files a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json index 85cd089c80..859c3bf7e2 100644 --- a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -1 +1 @@ -{"hash": "05c0143f077ccdc178d6c09fa0de340121c786052781a1940f2534aa51aba683", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "triton_version": "3.5.0", "shared": 9728, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file +{"hash": "84f327b5729d25ec4ad344f8a9b211f9c9786815df9873b33e1a44d2cdf8e580", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 19456, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco index edf52f8cc5..b7504c5898 100644 Binary files a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json index b989df5afb..8b0d6ebf34 100644 --- a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=32-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -1 +1 @@ -{"hash": "153391689950fdc22278ea6edf0032c3da45c8ceb7ca23d63a80442d0fef4f9f", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 4, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "triton_version": "3.5.0", "shared": 21504, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file +{"hash": "4f511f2573c219ee1928e586a5facd24ea5ddbd2f6314d14387f45c2ca36905b", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 4, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 43008, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco index c8bb2626f7..d5d5dde2c6 100644 Binary files a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json index 593f73e607..046da07114 100644 --- a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -1 +1 @@ -{"hash": "57eaa2ef293000ca6cabd569c875cbd34f86b2062387d6e97c7734016226654f", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "triton_version": "3.5.0", "shared": 21760, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file +{"hash": "41f673542e895bf56edb8e6a137febf789c28a9da5b4693a1065490a62336656", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 43520, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco index 23c5bbf565..6ce75e81b2 100644 Binary files a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json index 6e8aea0074..7b2c5ab8de 100644 --- a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -1 +1 @@ -{"hash": "4d1219a8922b637286054f7a68829a70c916ceaf8e9622ece79e70647b91772c", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 4, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "triton_version": "3.5.0", "shared": 5376, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file +{"hash": "1b29a972364a81e3844504157096f1a0ca2164836cee9758c885f562921d6f0c", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 4, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 10752, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco index 92321b547f..a43f1de3b1 100644 Binary files a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json index 1b7681386e..ac8df07cd5 100644 --- a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -1 +1 @@ -{"hash": "a41d293cf0d1b74495ed5d2e9eb2d8d128dabd63575aa2dc91d7ab09505d0391", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 4, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "triton_version": "3.5.0", "shared": 6528, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file +{"hash": "96e931c980f9bd1b0d7ba209973d637dae985113c68e23d0476ea6a3789b77f4", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 4, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 13056, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco index a3dfa82120..bdcde11a55 100644 Binary files a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json index 3a3168129a..3c5b94ec80 100644 --- a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=4-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -1 +1 @@ -{"hash": "7579180edc1b69865076ed8b2dd22a48ac1780a4dc1fd8916e1b69388b8fc292", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 4, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "triton_version": "3.5.0", "shared": 8448, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file +{"hash": "3a4dedb1720cc3cc439e59bf26afb541bb9591e17d932fe1696e4ac46fe1c376", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 4, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 16896, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco index 471f21afa6..36a56dbea1 100644 Binary files a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json index a6efe572f4..a6f0809dde 100644 --- a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -1 +1 @@ -{"hash": "e28e4e16e7f368052f9462fa623339d677b142e4fde62f9883543b2087eaf1f0", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "triton_version": "3.5.0", "shared": 9728, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file +{"hash": "f22922a7294924d71ca6c72a6b4ac34c07ff79ccf09d45e9fea0fcec2660ee0c", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 19456, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco index ffdea1be9f..b5e58ce97c 100644 Binary files a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json index 5bef3c3ca9..9f7f4d9500 100644 --- a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -1 +1 @@ -{"hash": "21e1d18446fc1eeb25e49906165b921b062b7c9c3a3a5c28497bf91bc0f60736", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 1, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "triton_version": "3.5.0", "shared": 19456, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file +{"hash": "7ee2132b54aabbbef6a1f5cc7a99ad94d8c6ee8420e5a6fb8702168c0df06a5d", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 1, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 38912, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco index d8667026ac..4240460535 100644 Binary files a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json index 01cd5311b7..5373477c35 100644 --- a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -1 +1 @@ -{"hash": "762bb8fc6091588b6b0125da3c61db1073b62d383d315f99fbcbb62f2d3f149f", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 1, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "triton_version": "3.5.0", "shared": 19456, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file +{"hash": "1007fa9b77c1c41ab0d7f1875b4474e4c8e58481c2f80bfcfbe0ee0131caa0e5", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 1, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 38912, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco index 679141676f..ec1d731f5a 100644 Binary files a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json index 9ca1db9736..b0be146e7f 100644 --- a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=64-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -1 +1 @@ -{"hash": "a87e429545937d63139268da7c6dc436f82c7fda1d042a65210cebaab750ff13", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 4, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "triton_version": "3.5.0", "shared": 38912, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file +{"hash": "e9a4e058b4c9508aa7b4c8c5c8ff9bba7f3a3c069f2492dbac912115e7a4108a", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 4, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 77824, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco index c8bb2626f7..d5d5dde2c6 100644 Binary files a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json index 593f73e607..046da07114 100644 --- a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=10240-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -1 +1 @@ -{"hash": "57eaa2ef293000ca6cabd569c875cbd34f86b2062387d6e97c7734016226654f", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "triton_version": "3.5.0", "shared": 21760, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file +{"hash": "41f673542e895bf56edb8e6a137febf789c28a9da5b4693a1065490a62336656", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 43520, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco index 23c5bbf565..6ce75e81b2 100644 Binary files a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json index 6e8aea0074..7b2c5ab8de 100644 --- a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=57344-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -1 +1 @@ -{"hash": "4d1219a8922b637286054f7a68829a70c916ceaf8e9622ece79e70647b91772c", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 4, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "triton_version": "3.5.0", "shared": 5376, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file +{"hash": "1b29a972364a81e3844504157096f1a0ca2164836cee9758c885f562921d6f0c", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 4, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 10752, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco index 92321b547f..a43f1de3b1 100644 Binary files a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json index 1b7681386e..ac8df07cd5 100644 --- a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=8192-K=28672/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -1 +1 @@ -{"hash": "a41d293cf0d1b74495ed5d2e9eb2d8d128dabd63575aa2dc91d7ab09505d0391", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 4, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "triton_version": "3.5.0", "shared": 6528, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file +{"hash": "96e931c980f9bd1b0d7ba209973d637dae985113c68e23d0476ea6a3789b77f4", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 2, "waves_per_eu": 4, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 13056, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco index a3dfa82120..bdcde11a55 100644 Binary files a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco and b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.hsaco differ diff --git a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json index 3a3168129a..3c5b94ec80 100644 --- a/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json +++ b/aiter/ops/triton/configs/gemm/aot/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales_M=8-N=8192-K=8192/_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.json @@ -1 +1 @@ -{"hash": "7579180edc1b69865076ed8b2dd22a48ac1780a4dc1fd8916e1b69388b8fc292", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 4, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "triton_version": "3.5.0", "shared": 8448, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file +{"hash": "3a4dedb1720cc3cc439e59bf26afb541bb9591e17d932fe1696e4ac46fe1c376", "target": {"backend": "hip", "arch": "gfx950", "warp_size": 64}, "num_warps": 4, "waves_per_eu": 2, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "/app/triton-tot/python/triton/backends/amd/lib/ocml.bc"], ["ockl", "/app/triton-tot/python/triton/backends/amd/lib/ockl.bc"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx950", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": ["fp8e4b8", "fp8e5b16"], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "bf16x3", "bf16x6"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 16, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "AMDGCN_USE_BUFFER_OPS": "true", "TRITON_HIP_USE_ASYNC_COPY": "true", "TRITON_HIP_USE_BLOCK_PINGPONG": "true", "triton_version": "3.5.0", "shared": 16896, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "_gemm_afp4_wfp4_kernel_preshuffled_weight_scales"} \ No newline at end of file diff --git a/aiter/ops/triton/fused_mul_add.py b/aiter/ops/triton/fused_mul_add.py index ff7bb36a1f..c756ffc94c 100644 --- a/aiter/ops/triton/fused_mul_add.py +++ b/aiter/ops/triton/fused_mul_add.py @@ -30,8 +30,7 @@ def fused_mul_add( - out: same shape as x """ _LOGGER.info( - f"FUSED_MUL_ADD: x={tuple(x.shape)} a={tuple(a.shape) if isinstance(a, torch.Tensor) else a} " - + f"b={tuple(b.shape) if isinstance(b, torch.Tensor) else b}" + f"FUSED_MUL_ADD: x={tuple(x.shape)} a={tuple(a.shape) if isinstance(a, torch.Tensor) else a} b={tuple(b.shape) if isinstance(b, torch.Tensor) else b}" ) N = x.numel() diff --git a/aiter/ops/triton/gemm_afp4wfp4.py b/aiter/ops/triton/gemm_afp4wfp4.py index e72760f6ac..4011501965 100644 --- a/aiter/ops/triton/gemm_afp4wfp4.py +++ b/aiter/ops/triton/gemm_afp4wfp4.py @@ -403,7 +403,10 @@ def gemm_afp4wfp4_preshuffled_weight_scales( ), ) - metadata_pth = f"{AITER_TRITON_CONFIGS_PATH}/gemm/aot/{_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.fn.__name__}_M={M}-N={N}-K={K*2}" + M_POW2 = triton.next_power_of_2(M) + if M < 32 and M_POW2 > 16: + M_POW2 = 16 + metadata_pth = f"{AITER_TRITON_CONFIGS_PATH}/gemm/aot/{_gemm_afp4_wfp4_kernel_preshuffled_weight_scales.fn.__name__}_M={M_POW2}-N={N}-K={K*2}" if os.path.exists(metadata_pth): with AOTMetadataContext( _gemm_afp4_wfp4_kernel_preshuffled_weight_scales.fn.__name__, diff --git a/op_tests/op_benchmarks/triton/bench_gemm_afp4wfp4.py b/op_tests/op_benchmarks/triton/bench_gemm_afp4wfp4.py index 88f91841f1..45f1a7fce0 100644 --- a/op_tests/op_benchmarks/triton/bench_gemm_afp4wfp4.py +++ b/op_tests/op_benchmarks/triton/bench_gemm_afp4wfp4.py @@ -18,7 +18,6 @@ get_model_benchmark_object, get_shape_benchmark_object, print_vgpr, - get_caller_name_no_ext, ) import aiter.ops.triton.utils._triton.arch_info as arch_info diff --git a/op_tests/triton_tests/test_gemm_afp4wfp4.py b/op_tests/triton_tests/test_gemm_afp4wfp4.py index bddccfb135..1ae27efbe8 100644 --- a/op_tests/triton_tests/test_gemm_afp4wfp4.py +++ b/op_tests/triton_tests/test_gemm_afp4wfp4.py @@ -83,7 +83,7 @@ def generate_gemm_afp4wfp4_inputs( if M >= 32: x_scales_shuffled = shuffle_scales(x_scales) else: - x_scales_shuffled = x_scales + x_scales_shuffled = x_scales.contiguous() w_scales_shuffled = shuffle_scales(w_scales) else: x_scales_shuffled = x_scales @@ -219,8 +219,7 @@ def run_torch(x, w, x_scales, w_scales, dtype): @pytest.mark.parametrize("output", [True, False]) @pytest.mark.parametrize( "shuffle_scales_fg, shuffle_weight_fg", - # [(False, False), (True, False), (True, True)], - [(True, True)], + [(False, False), (True, False), (True, True)], ) def test_gemm_afp4_wfp4( M: int, N: int, K: int, dtype, layout, output, shuffle_scales_fg, shuffle_weight_fg diff --git a/op_tests/triton_tests/triton_metadata_redirect/triton_metadata_redirect/kernel.py b/op_tests/triton_tests/triton_metadata_redirect/triton_metadata_redirect/kernel.py new file mode 100644 index 0000000000..b737c23b7e --- /dev/null +++ b/op_tests/triton_tests/triton_metadata_redirect/triton_metadata_redirect/kernel.py @@ -0,0 +1,8 @@ +import triton +import triton.language as tl + + +@triton.jit +def empty_kernel(x_ptr, SIZE: tl.constexpr): + # Add implementation here + return diff --git a/op_tests/triton_tests/triton_metadata_redirect/triton_metadata_redirect/test_metadata_redirect.py b/op_tests/triton_tests/triton_metadata_redirect/triton_metadata_redirect/test_metadata_redirect.py new file mode 100644 index 0000000000..ea41db4c89 --- /dev/null +++ b/op_tests/triton_tests/triton_metadata_redirect/triton_metadata_redirect/test_metadata_redirect.py @@ -0,0 +1,5872 @@ +import os +from pathlib import Path +import torch +import tempfile + +import triton +import triton.language as tl +from triton.backends.compiler import GPUTarget +from triton.tools.compile import compile_kernel, CompileArgs + +from aiter.utility.triton.triton_metadata_redirect import ( + AOTMetadataContext, + with_custom_metadata_path, +) + +triton_path = triton.__path__[0] +kernel_path = os.path.join(Path(__file__).parent, "kernel.py") + +ocml_path = os.path.join(triton_path, "backends", "amd", "lib", "ocml.bc") +ockl_path = os.path.join(triton_path, "backends", "amd", "lib", "ockl.bc") +aot_kernel0_json = f'{{"hash": "6bcc39a06793f9046cfac6c1e43d525df901f199afd42416bdc907df0b6194cf", "target": {{"backend": "hip", "arch": "gfx942", "warp_size": 64}}, "num_warps": 4, "waves_per_eu": 1, "num_stages": 2, "num_ctas": 1, "extern_libs": [["ocml", "{ocml_path}"], ["ockl", "{ockl_path}"]], "cluster_dims": [1, 1, 1], "debug": false, "sanitize_overflow": true, "arch": "gfx942", "supported_fp8_dtypes": ["fp8e4b8", "fp8e4nv", "fp8e5", "fp8e5b16"], "deprecated_fp8_dot_operand_dtypes": [], "default_dot_input_precision": "ieee", "allowed_dot_input_precisions": ["ieee", "tf32"], "enable_fp_fusion": true, "launch_cooperative_grid": false, "matrix_instr_nonkdim": 0, "kpack": 1, "allow_flush_denorm": false, "max_num_imprecise_acc_default": 0, "backend_name": "hip", "instrumentation_mode": "", "schedule_hint": "none", "warp_size": 64, "triton_version": "3.5.0", "shared": 0, "profile_scratch_size": 0, "profile_scratch_align": 1, "name": "empty_kernel"}}' + +aot_kernel0_hsaco = [ + 0x7F, + 0x45, + 0x4C, + 0x46, + 0x02, + 0x01, + 0x01, + 0x40, + 0x03, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x03, + 0x00, + 0xE0, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x40, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x30, + 0x11, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x4C, + 0x05, + 0x00, + 0x00, + 0x40, + 0x00, + 0x38, + 0x00, + 0x08, + 0x00, + 0x40, + 0x00, + 0x15, + 0x00, + 0x13, + 0x00, + 0x06, + 0x00, + 0x00, + 0x00, + 0x04, + 0x00, + 0x00, + 0x00, + 0x40, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x40, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x40, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xC0, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xC0, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x08, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x04, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x40, + 0x05, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x40, + 0x05, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x10, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x05, + 0x00, + 0x00, + 0x00, + 0x00, + 0x06, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x16, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x16, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x40, + 0x05, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x40, + 0x05, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x10, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x06, + 0x00, + 0x00, + 0x00, + 0x40, + 0x0B, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x40, + 0x2B, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x40, + 0x2B, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x70, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xC0, + 0x04, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x10, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x02, + 0x00, + 0x00, + 0x00, + 0x06, + 0x00, + 0x00, + 0x00, + 0x40, + 0x0B, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x40, + 0x2B, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x40, + 0x2B, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x70, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x70, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x08, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x52, + 0xE5, + 0x74, + 0x64, + 0x04, + 0x00, + 0x00, + 0x00, + 0x40, + 0x0B, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x40, + 0x2B, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x40, + 0x2B, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x70, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xC0, + 0x04, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x51, + 0xE5, + 0x74, + 0x64, + 0x06, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x04, + 0x00, + 0x00, + 0x00, + 0x04, + 0x00, + 0x00, + 0x00, + 0x00, + 0x02, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x02, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x02, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x4C, + 0x02, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x4C, + 0x02, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x04, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x07, + 0x00, + 0x00, + 0x00, + 0x37, + 0x02, + 0x00, + 0x00, + 0x20, + 0x00, + 0x00, + 0x00, + 0x41, + 0x4D, + 0x44, + 0x47, + 0x50, + 0x55, + 0x00, + 0x00, + 0x83, + 0xAE, + 0x61, + 0x6D, + 0x64, + 0x68, + 0x73, + 0x61, + 0x2E, + 0x6B, + 0x65, + 0x72, + 0x6E, + 0x65, + 0x6C, + 0x73, + 0x91, + 0x8F, + 0xAB, + 0x2E, + 0x61, + 0x67, + 0x70, + 0x72, + 0x5F, + 0x63, + 0x6F, + 0x75, + 0x6E, + 0x74, + 0x00, + 0xA5, + 0x2E, + 0x61, + 0x72, + 0x67, + 0x73, + 0x93, + 0x84, + 0xAE, + 0x2E, + 0x61, + 0x64, + 0x64, + 0x72, + 0x65, + 0x73, + 0x73, + 0x5F, + 0x73, + 0x70, + 0x61, + 0x63, + 0x65, + 0xA6, + 0x67, + 0x6C, + 0x6F, + 0x62, + 0x61, + 0x6C, + 0xA7, + 0x2E, + 0x6F, + 0x66, + 0x66, + 0x73, + 0x65, + 0x74, + 0x00, + 0xA5, + 0x2E, + 0x73, + 0x69, + 0x7A, + 0x65, + 0x08, + 0xAB, + 0x2E, + 0x76, + 0x61, + 0x6C, + 0x75, + 0x65, + 0x5F, + 0x6B, + 0x69, + 0x6E, + 0x64, + 0xAD, + 0x67, + 0x6C, + 0x6F, + 0x62, + 0x61, + 0x6C, + 0x5F, + 0x62, + 0x75, + 0x66, + 0x66, + 0x65, + 0x72, + 0x84, + 0xAE, + 0x2E, + 0x61, + 0x64, + 0x64, + 0x72, + 0x65, + 0x73, + 0x73, + 0x5F, + 0x73, + 0x70, + 0x61, + 0x63, + 0x65, + 0xA6, + 0x67, + 0x6C, + 0x6F, + 0x62, + 0x61, + 0x6C, + 0xA7, + 0x2E, + 0x6F, + 0x66, + 0x66, + 0x73, + 0x65, + 0x74, + 0x08, + 0xA5, + 0x2E, + 0x73, + 0x69, + 0x7A, + 0x65, + 0x08, + 0xAB, + 0x2E, + 0x76, + 0x61, + 0x6C, + 0x75, + 0x65, + 0x5F, + 0x6B, + 0x69, + 0x6E, + 0x64, + 0xAD, + 0x67, + 0x6C, + 0x6F, + 0x62, + 0x61, + 0x6C, + 0x5F, + 0x62, + 0x75, + 0x66, + 0x66, + 0x65, + 0x72, + 0x84, + 0xAE, + 0x2E, + 0x61, + 0x64, + 0x64, + 0x72, + 0x65, + 0x73, + 0x73, + 0x5F, + 0x73, + 0x70, + 0x61, + 0x63, + 0x65, + 0xA6, + 0x67, + 0x6C, + 0x6F, + 0x62, + 0x61, + 0x6C, + 0xA7, + 0x2E, + 0x6F, + 0x66, + 0x66, + 0x73, + 0x65, + 0x74, + 0x10, + 0xA5, + 0x2E, + 0x73, + 0x69, + 0x7A, + 0x65, + 0x08, + 0xAB, + 0x2E, + 0x76, + 0x61, + 0x6C, + 0x75, + 0x65, + 0x5F, + 0x6B, + 0x69, + 0x6E, + 0x64, + 0xAD, + 0x67, + 0x6C, + 0x6F, + 0x62, + 0x61, + 0x6C, + 0x5F, + 0x62, + 0x75, + 0x66, + 0x66, + 0x65, + 0x72, + 0xB9, + 0x2E, + 0x67, + 0x72, + 0x6F, + 0x75, + 0x70, + 0x5F, + 0x73, + 0x65, + 0x67, + 0x6D, + 0x65, + 0x6E, + 0x74, + 0x5F, + 0x66, + 0x69, + 0x78, + 0x65, + 0x64, + 0x5F, + 0x73, + 0x69, + 0x7A, + 0x65, + 0x00, + 0xB6, + 0x2E, + 0x6B, + 0x65, + 0x72, + 0x6E, + 0x61, + 0x72, + 0x67, + 0x5F, + 0x73, + 0x65, + 0x67, + 0x6D, + 0x65, + 0x6E, + 0x74, + 0x5F, + 0x61, + 0x6C, + 0x69, + 0x67, + 0x6E, + 0x08, + 0xB5, + 0x2E, + 0x6B, + 0x65, + 0x72, + 0x6E, + 0x61, + 0x72, + 0x67, + 0x5F, + 0x73, + 0x65, + 0x67, + 0x6D, + 0x65, + 0x6E, + 0x74, + 0x5F, + 0x73, + 0x69, + 0x7A, + 0x65, + 0x18, + 0xB8, + 0x2E, + 0x6D, + 0x61, + 0x78, + 0x5F, + 0x66, + 0x6C, + 0x61, + 0x74, + 0x5F, + 0x77, + 0x6F, + 0x72, + 0x6B, + 0x67, + 0x72, + 0x6F, + 0x75, + 0x70, + 0x5F, + 0x73, + 0x69, + 0x7A, + 0x65, + 0xCD, + 0x01, + 0x00, + 0xA5, + 0x2E, + 0x6E, + 0x61, + 0x6D, + 0x65, + 0xAC, + 0x65, + 0x6D, + 0x70, + 0x74, + 0x79, + 0x5F, + 0x6B, + 0x65, + 0x72, + 0x6E, + 0x65, + 0x6C, + 0xBB, + 0x2E, + 0x70, + 0x72, + 0x69, + 0x76, + 0x61, + 0x74, + 0x65, + 0x5F, + 0x73, + 0x65, + 0x67, + 0x6D, + 0x65, + 0x6E, + 0x74, + 0x5F, + 0x66, + 0x69, + 0x78, + 0x65, + 0x64, + 0x5F, + 0x73, + 0x69, + 0x7A, + 0x65, + 0x00, + 0xAB, + 0x2E, + 0x73, + 0x67, + 0x70, + 0x72, + 0x5F, + 0x63, + 0x6F, + 0x75, + 0x6E, + 0x74, + 0x0E, + 0xB1, + 0x2E, + 0x73, + 0x67, + 0x70, + 0x72, + 0x5F, + 0x73, + 0x70, + 0x69, + 0x6C, + 0x6C, + 0x5F, + 0x63, + 0x6F, + 0x75, + 0x6E, + 0x74, + 0x00, + 0xA7, + 0x2E, + 0x73, + 0x79, + 0x6D, + 0x62, + 0x6F, + 0x6C, + 0xAF, + 0x65, + 0x6D, + 0x70, + 0x74, + 0x79, + 0x5F, + 0x6B, + 0x65, + 0x72, + 0x6E, + 0x65, + 0x6C, + 0x2E, + 0x6B, + 0x64, + 0xB3, + 0x2E, + 0x75, + 0x73, + 0x65, + 0x73, + 0x5F, + 0x64, + 0x79, + 0x6E, + 0x61, + 0x6D, + 0x69, + 0x63, + 0x5F, + 0x73, + 0x74, + 0x61, + 0x63, + 0x6B, + 0xC2, + 0xAB, + 0x2E, + 0x76, + 0x67, + 0x70, + 0x72, + 0x5F, + 0x63, + 0x6F, + 0x75, + 0x6E, + 0x74, + 0x00, + 0xB1, + 0x2E, + 0x76, + 0x67, + 0x70, + 0x72, + 0x5F, + 0x73, + 0x70, + 0x69, + 0x6C, + 0x6C, + 0x5F, + 0x63, + 0x6F, + 0x75, + 0x6E, + 0x74, + 0x00, + 0xAF, + 0x2E, + 0x77, + 0x61, + 0x76, + 0x65, + 0x66, + 0x72, + 0x6F, + 0x6E, + 0x74, + 0x5F, + 0x73, + 0x69, + 0x7A, + 0x65, + 0x40, + 0xAD, + 0x61, + 0x6D, + 0x64, + 0x68, + 0x73, + 0x61, + 0x2E, + 0x74, + 0x61, + 0x72, + 0x67, + 0x65, + 0x74, + 0xB9, + 0x61, + 0x6D, + 0x64, + 0x67, + 0x63, + 0x6E, + 0x2D, + 0x61, + 0x6D, + 0x64, + 0x2D, + 0x61, + 0x6D, + 0x64, + 0x68, + 0x73, + 0x61, + 0x2D, + 0x2D, + 0x67, + 0x66, + 0x78, + 0x39, + 0x34, + 0x32, + 0xAE, + 0x61, + 0x6D, + 0x64, + 0x68, + 0x73, + 0x61, + 0x2E, + 0x76, + 0x65, + 0x72, + 0x73, + 0x69, + 0x6F, + 0x6E, + 0x92, + 0x01, + 0x02, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x12, + 0x03, + 0x07, + 0x00, + 0x00, + 0x16, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x04, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x0E, + 0x00, + 0x00, + 0x00, + 0x11, + 0x00, + 0x06, + 0x00, + 0x00, + 0x05, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x40, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x1A, + 0x00, + 0x00, + 0x00, + 0x00, + 0x40, + 0x10, + 0x00, + 0x00, + 0x00, + 0x02, + 0x02, + 0x01, + 0x00, + 0x00, + 0x00, + 0x14, + 0xB0, + 0x18, + 0x3B, + 0x71, + 0x79, + 0xA6, + 0xE4, + 0x03, + 0x00, + 0x00, + 0x00, + 0x03, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x02, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x65, + 0x6D, + 0x70, + 0x74, + 0x79, + 0x5F, + 0x6B, + 0x65, + 0x72, + 0x6E, + 0x65, + 0x6C, + 0x00, + 0x65, + 0x6D, + 0x70, + 0x74, + 0x79, + 0x5F, + 0x6B, + 0x65, + 0x72, + 0x6E, + 0x65, + 0x6C, + 0x2E, + 0x6B, + 0x64, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x18, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x11, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x20, + 0x03, + 0xAF, + 0x00, + 0x90, + 0x00, + 0x00, + 0x00, + 0x08, + 0x00, + 0x06, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x80, + 0x00, + 0x06, + 0xC0, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x0A, + 0xC0, + 0x08, + 0x00, + 0x00, + 0x00, + 0x7F, + 0xC0, + 0x8C, + 0xBF, + 0x3A, + 0x00, + 0x82, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x81, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x00, + 0x00, + 0x80, + 0xBF, + 0x06, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x50, + 0x04, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x0B, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x18, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x05, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xDC, + 0x04, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x0A, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x1E, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xF5, + 0xFE, + 0xFF, + 0x6F, + 0x00, + 0x00, + 0x00, + 0x00, + 0x98, + 0x04, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x04, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xBC, + 0x04, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x11, + 0x00, + 0x25, + 0x0E, + 0x13, + 0x05, + 0x03, + 0x0E, + 0x10, + 0x17, + 0x1B, + 0x0E, + 0x11, + 0x01, + 0x12, + 0x06, + 0x00, + 0x00, + 0x00, + 0x26, + 0x00, + 0x00, + 0x00, + 0x04, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x08, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x02, + 0x00, + 0x5A, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x07, + 0x00, + 0x00, + 0x00, + 0x00, + 0x16, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x04, + 0x01, + 0x00, + 0x00, + 0x74, + 0x72, + 0x69, + 0x74, + 0x6F, + 0x6E, + 0x00, + 0x2F, + 0x6D, + 0x6E, + 0x74, + 0x2F, + 0x72, + 0x61, + 0x69, + 0x64, + 0x30, + 0x2F, + 0x6A, + 0x69, + 0x61, + 0x6E, + 0x2E, + 0x77, + 0x75, + 0x2F, + 0x77, + 0x6F, + 0x72, + 0x6B, + 0x2F, + 0x61, + 0x69, + 0x74, + 0x65, + 0x72, + 0x2F, + 0x61, + 0x69, + 0x74, + 0x65, + 0x72, + 0x2F, + 0x6F, + 0x70, + 0x5F, + 0x74, + 0x65, + 0x73, + 0x74, + 0x73, + 0x2F, + 0x74, + 0x72, + 0x69, + 0x74, + 0x6F, + 0x6E, + 0x5F, + 0x74, + 0x65, + 0x73, + 0x74, + 0x73, + 0x2F, + 0x74, + 0x72, + 0x69, + 0x74, + 0x6F, + 0x6E, + 0x5F, + 0x6D, + 0x65, + 0x74, + 0x61, + 0x64, + 0x61, + 0x74, + 0x61, + 0x5F, + 0x72, + 0x65, + 0x64, + 0x69, + 0x72, + 0x65, + 0x63, + 0x74, + 0x00, + 0x6B, + 0x65, + 0x72, + 0x6E, + 0x65, + 0x6C, + 0x2E, + 0x70, + 0x79, + 0x00, + 0x95, + 0x00, + 0x00, + 0x00, + 0x04, + 0x00, + 0x74, + 0x00, + 0x00, + 0x00, + 0x04, + 0x01, + 0x01, + 0xFB, + 0x0E, + 0x0D, + 0x00, + 0x01, + 0x01, + 0x01, + 0x01, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x01, + 0x2F, + 0x6D, + 0x6E, + 0x74, + 0x2F, + 0x72, + 0x61, + 0x69, + 0x64, + 0x30, + 0x2F, + 0x6A, + 0x69, + 0x61, + 0x6E, + 0x2E, + 0x77, + 0x75, + 0x2F, + 0x77, + 0x6F, + 0x72, + 0x6B, + 0x2F, + 0x61, + 0x69, + 0x74, + 0x65, + 0x72, + 0x2F, + 0x61, + 0x69, + 0x74, + 0x65, + 0x72, + 0x2F, + 0x6F, + 0x70, + 0x5F, + 0x74, + 0x65, + 0x73, + 0x74, + 0x73, + 0x2F, + 0x74, + 0x72, + 0x69, + 0x74, + 0x6F, + 0x6E, + 0x5F, + 0x74, + 0x65, + 0x73, + 0x74, + 0x73, + 0x2F, + 0x74, + 0x72, + 0x69, + 0x74, + 0x6F, + 0x6E, + 0x5F, + 0x6D, + 0x65, + 0x74, + 0x61, + 0x64, + 0x61, + 0x74, + 0x61, + 0x5F, + 0x72, + 0x65, + 0x64, + 0x69, + 0x72, + 0x65, + 0x63, + 0x74, + 0x00, + 0x00, + 0x6B, + 0x65, + 0x72, + 0x6E, + 0x65, + 0x6C, + 0x2E, + 0x70, + 0x79, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x0A, + 0x00, + 0x09, + 0x02, + 0x00, + 0x16, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x16, + 0x06, + 0x02, + 0x40, + 0x0D, + 0x05, + 0x02, + 0x06, + 0x18, + 0x02, + 0x90, + 0x02, + 0x00, + 0x01, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x0C, + 0x00, + 0x00, + 0x00, + 0xFF, + 0xFF, + 0xFF, + 0xFF, + 0x04, + 0x00, + 0x08, + 0x00, + 0x04, + 0x04, + 0x10, + 0x00, + 0x14, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x16, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x04, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x4C, + 0x69, + 0x6E, + 0x6B, + 0x65, + 0x72, + 0x3A, + 0x20, + 0x4C, + 0x4C, + 0x44, + 0x20, + 0x32, + 0x32, + 0x2E, + 0x30, + 0x2E, + 0x30, + 0x20, + 0x28, + 0x68, + 0x74, + 0x74, + 0x70, + 0x73, + 0x3A, + 0x2F, + 0x2F, + 0x67, + 0x69, + 0x74, + 0x68, + 0x75, + 0x62, + 0x2E, + 0x63, + 0x6F, + 0x6D, + 0x2F, + 0x6C, + 0x6C, + 0x76, + 0x6D, + 0x2F, + 0x6C, + 0x6C, + 0x76, + 0x6D, + 0x2D, + 0x70, + 0x72, + 0x6F, + 0x6A, + 0x65, + 0x63, + 0x74, + 0x20, + 0x30, + 0x36, + 0x34, + 0x66, + 0x30, + 0x32, + 0x64, + 0x61, + 0x63, + 0x30, + 0x63, + 0x38, + 0x31, + 0x63, + 0x31, + 0x39, + 0x33, + 0x35, + 0x30, + 0x61, + 0x37, + 0x34, + 0x34, + 0x31, + 0x35, + 0x62, + 0x33, + 0x32, + 0x34, + 0x35, + 0x66, + 0x34, + 0x32, + 0x66, + 0x65, + 0x64, + 0x30, + 0x39, + 0x64, + 0x63, + 0x29, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xF1, + 0xFF, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x17, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xF1, + 0xFF, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x2D, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xF1, + 0xFF, + 0x08, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x48, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xF1, + 0xFF, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x66, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xF1, + 0xFF, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x7C, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xF1, + 0xFF, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x9B, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xF1, + 0xFF, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xBC, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xF1, + 0xFF, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xD7, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xF1, + 0xFF, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xF6, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xF1, + 0xFF, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x0A, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0xF1, + 0xFF, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x1E, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0xF1, + 0xFF, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x4F, + 0x01, + 0x00, + 0x00, + 0x00, + 0x02, + 0x08, + 0x00, + 0x40, + 0x2B, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x32, + 0x01, + 0x00, + 0x00, + 0x12, + 0x03, + 0x07, + 0x00, + 0x00, + 0x16, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x04, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x3F, + 0x01, + 0x00, + 0x00, + 0x11, + 0x00, + 0x06, + 0x00, + 0x00, + 0x05, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x40, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x2E, + 0x6E, + 0x6F, + 0x74, + 0x65, + 0x00, + 0x2E, + 0x64, + 0x79, + 0x6E, + 0x73, + 0x79, + 0x6D, + 0x00, + 0x2E, + 0x67, + 0x6E, + 0x75, + 0x2E, + 0x68, + 0x61, + 0x73, + 0x68, + 0x00, + 0x2E, + 0x68, + 0x61, + 0x73, + 0x68, + 0x00, + 0x2E, + 0x64, + 0x79, + 0x6E, + 0x73, + 0x74, + 0x72, + 0x00, + 0x2E, + 0x72, + 0x6F, + 0x64, + 0x61, + 0x74, + 0x61, + 0x00, + 0x2E, + 0x74, + 0x65, + 0x78, + 0x74, + 0x00, + 0x2E, + 0x64, + 0x79, + 0x6E, + 0x61, + 0x6D, + 0x69, + 0x63, + 0x00, + 0x2E, + 0x72, + 0x65, + 0x6C, + 0x72, + 0x6F, + 0x5F, + 0x70, + 0x61, + 0x64, + 0x64, + 0x69, + 0x6E, + 0x67, + 0x00, + 0x2E, + 0x41, + 0x4D, + 0x44, + 0x47, + 0x50, + 0x55, + 0x2E, + 0x63, + 0x73, + 0x64, + 0x61, + 0x74, + 0x61, + 0x00, + 0x2E, + 0x41, + 0x4D, + 0x44, + 0x47, + 0x50, + 0x55, + 0x2E, + 0x67, + 0x70, + 0x72, + 0x5F, + 0x6D, + 0x61, + 0x78, + 0x69, + 0x6D, + 0x75, + 0x6D, + 0x73, + 0x00, + 0x2E, + 0x64, + 0x65, + 0x62, + 0x75, + 0x67, + 0x5F, + 0x61, + 0x62, + 0x62, + 0x72, + 0x65, + 0x76, + 0x00, + 0x2E, + 0x64, + 0x65, + 0x62, + 0x75, + 0x67, + 0x5F, + 0x69, + 0x6E, + 0x66, + 0x6F, + 0x00, + 0x2E, + 0x64, + 0x65, + 0x62, + 0x75, + 0x67, + 0x5F, + 0x73, + 0x74, + 0x72, + 0x00, + 0x2E, + 0x64, + 0x65, + 0x62, + 0x75, + 0x67, + 0x5F, + 0x6C, + 0x69, + 0x6E, + 0x65, + 0x00, + 0x2E, + 0x64, + 0x65, + 0x62, + 0x75, + 0x67, + 0x5F, + 0x66, + 0x72, + 0x61, + 0x6D, + 0x65, + 0x00, + 0x2E, + 0x63, + 0x6F, + 0x6D, + 0x6D, + 0x65, + 0x6E, + 0x74, + 0x00, + 0x2E, + 0x73, + 0x79, + 0x6D, + 0x74, + 0x61, + 0x62, + 0x00, + 0x2E, + 0x73, + 0x68, + 0x73, + 0x74, + 0x72, + 0x74, + 0x61, + 0x62, + 0x00, + 0x2E, + 0x73, + 0x74, + 0x72, + 0x74, + 0x61, + 0x62, + 0x00, + 0x00, + 0x65, + 0x6D, + 0x70, + 0x74, + 0x79, + 0x5F, + 0x6B, + 0x65, + 0x72, + 0x6E, + 0x65, + 0x6C, + 0x2E, + 0x6E, + 0x75, + 0x6D, + 0x5F, + 0x76, + 0x67, + 0x70, + 0x72, + 0x00, + 0x65, + 0x6D, + 0x70, + 0x74, + 0x79, + 0x5F, + 0x6B, + 0x65, + 0x72, + 0x6E, + 0x65, + 0x6C, + 0x2E, + 0x6E, + 0x75, + 0x6D, + 0x5F, + 0x61, + 0x67, + 0x70, + 0x72, + 0x00, + 0x65, + 0x6D, + 0x70, + 0x74, + 0x79, + 0x5F, + 0x6B, + 0x65, + 0x72, + 0x6E, + 0x65, + 0x6C, + 0x2E, + 0x6E, + 0x75, + 0x6D, + 0x62, + 0x65, + 0x72, + 0x65, + 0x64, + 0x5F, + 0x73, + 0x67, + 0x70, + 0x72, + 0x00, + 0x65, + 0x6D, + 0x70, + 0x74, + 0x79, + 0x5F, + 0x6B, + 0x65, + 0x72, + 0x6E, + 0x65, + 0x6C, + 0x2E, + 0x70, + 0x72, + 0x69, + 0x76, + 0x61, + 0x74, + 0x65, + 0x5F, + 0x73, + 0x65, + 0x67, + 0x5F, + 0x73, + 0x69, + 0x7A, + 0x65, + 0x00, + 0x65, + 0x6D, + 0x70, + 0x74, + 0x79, + 0x5F, + 0x6B, + 0x65, + 0x72, + 0x6E, + 0x65, + 0x6C, + 0x2E, + 0x75, + 0x73, + 0x65, + 0x73, + 0x5F, + 0x76, + 0x63, + 0x63, + 0x00, + 0x65, + 0x6D, + 0x70, + 0x74, + 0x79, + 0x5F, + 0x6B, + 0x65, + 0x72, + 0x6E, + 0x65, + 0x6C, + 0x2E, + 0x75, + 0x73, + 0x65, + 0x73, + 0x5F, + 0x66, + 0x6C, + 0x61, + 0x74, + 0x5F, + 0x73, + 0x63, + 0x72, + 0x61, + 0x74, + 0x63, + 0x68, + 0x00, + 0x65, + 0x6D, + 0x70, + 0x74, + 0x79, + 0x5F, + 0x6B, + 0x65, + 0x72, + 0x6E, + 0x65, + 0x6C, + 0x2E, + 0x68, + 0x61, + 0x73, + 0x5F, + 0x64, + 0x79, + 0x6E, + 0x5F, + 0x73, + 0x69, + 0x7A, + 0x65, + 0x64, + 0x5F, + 0x73, + 0x74, + 0x61, + 0x63, + 0x6B, + 0x00, + 0x65, + 0x6D, + 0x70, + 0x74, + 0x79, + 0x5F, + 0x6B, + 0x65, + 0x72, + 0x6E, + 0x65, + 0x6C, + 0x2E, + 0x68, + 0x61, + 0x73, + 0x5F, + 0x72, + 0x65, + 0x63, + 0x75, + 0x72, + 0x73, + 0x69, + 0x6F, + 0x6E, + 0x00, + 0x65, + 0x6D, + 0x70, + 0x74, + 0x79, + 0x5F, + 0x6B, + 0x65, + 0x72, + 0x6E, + 0x65, + 0x6C, + 0x2E, + 0x68, + 0x61, + 0x73, + 0x5F, + 0x69, + 0x6E, + 0x64, + 0x69, + 0x72, + 0x65, + 0x63, + 0x74, + 0x5F, + 0x63, + 0x61, + 0x6C, + 0x6C, + 0x00, + 0x61, + 0x6D, + 0x64, + 0x67, + 0x70, + 0x75, + 0x2E, + 0x6D, + 0x61, + 0x78, + 0x5F, + 0x6E, + 0x75, + 0x6D, + 0x5F, + 0x76, + 0x67, + 0x70, + 0x72, + 0x00, + 0x61, + 0x6D, + 0x64, + 0x67, + 0x70, + 0x75, + 0x2E, + 0x6D, + 0x61, + 0x78, + 0x5F, + 0x6E, + 0x75, + 0x6D, + 0x5F, + 0x61, + 0x67, + 0x70, + 0x72, + 0x00, + 0x61, + 0x6D, + 0x64, + 0x67, + 0x70, + 0x75, + 0x2E, + 0x6D, + 0x61, + 0x78, + 0x5F, + 0x6E, + 0x75, + 0x6D, + 0x5F, + 0x73, + 0x67, + 0x70, + 0x72, + 0x00, + 0x65, + 0x6D, + 0x70, + 0x74, + 0x79, + 0x5F, + 0x6B, + 0x65, + 0x72, + 0x6E, + 0x65, + 0x6C, + 0x00, + 0x65, + 0x6D, + 0x70, + 0x74, + 0x79, + 0x5F, + 0x6B, + 0x65, + 0x72, + 0x6E, + 0x65, + 0x6C, + 0x2E, + 0x6B, + 0x64, + 0x00, + 0x5F, + 0x44, + 0x59, + 0x4E, + 0x41, + 0x4D, + 0x49, + 0x43, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x07, + 0x00, + 0x00, + 0x00, + 0x02, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x02, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x02, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x4C, + 0x02, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x04, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x07, + 0x00, + 0x00, + 0x00, + 0x0B, + 0x00, + 0x00, + 0x00, + 0x02, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x50, + 0x04, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x50, + 0x04, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x48, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x05, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x08, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x18, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x0F, + 0x00, + 0x00, + 0x00, + 0xF6, + 0xFF, + 0xFF, + 0x6F, + 0x02, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x98, + 0x04, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x98, + 0x04, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x24, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x02, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x08, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x19, + 0x00, + 0x00, + 0x00, + 0x05, + 0x00, + 0x00, + 0x00, + 0x02, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xBC, + 0x04, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xBC, + 0x04, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x20, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x02, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x04, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x04, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x1F, + 0x00, + 0x00, + 0x00, + 0x03, + 0x00, + 0x00, + 0x00, + 0x02, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xDC, + 0x04, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xDC, + 0x04, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x1E, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x27, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x02, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x05, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x05, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x40, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x40, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x2F, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x06, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x16, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x06, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x40, + 0x05, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x35, + 0x00, + 0x00, + 0x00, + 0x06, + 0x00, + 0x00, + 0x00, + 0x03, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x40, + 0x2B, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x40, + 0x0B, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x70, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x05, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x08, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x10, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x3E, + 0x00, + 0x00, + 0x00, + 0x08, + 0x00, + 0x00, + 0x00, + 0x03, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xB0, + 0x2B, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xB0, + 0x0B, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x50, + 0x04, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x4D, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xB0, + 0x0B, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x5C, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xB0, + 0x0B, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x71, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xB0, + 0x0B, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x14, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x7F, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xC4, + 0x0B, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x2A, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x8B, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x30, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xEE, + 0x0B, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x64, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x96, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x52, + 0x0C, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x99, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xA2, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xF0, + 0x0C, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x28, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x08, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xAF, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x30, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x18, + 0x0D, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x63, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xB8, + 0x00, + 0x00, + 0x00, + 0x02, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x80, + 0x0D, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x80, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x14, + 0x00, + 0x00, + 0x00, + 0x0E, + 0x00, + 0x00, + 0x00, + 0x08, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x18, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xC0, + 0x00, + 0x00, + 0x00, + 0x03, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x0F, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xD2, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xCA, + 0x00, + 0x00, + 0x00, + 0x03, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0xD2, + 0x0F, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x58, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, +] + + +def test_f32_kernel(): + with tempfile.TemporaryDirectory() as temp_dir: + print("temp_dir:", temp_dir) + + # write hsaco + hsaco_file = os.path.join(temp_dir, "empty_kernel.hsaco") + with open(hsaco_file, "wb") as f: + f.write(bytes(aot_kernel0_hsaco)) + + # write json + json_file = os.path.join(temp_dir, "empty_kernel.json") + with open(json_file, "w") as f: + f.write(aot_kernel0_json) + + compile_args = CompileArgs( + path=kernel_path, + kernel_name="empty_kernel", + signature=f"*fp32:16,1", + grid="1,1,1", + num_warps=4, + num_stages=2, + out_name="kernel_f32", + ) + + with AOTMetadataContext("empty_kernel", temp_dir): + triton_kernel0, output_files0 = compile_kernel(compile_args) + + +def test_jit(): + with tempfile.TemporaryDirectory() as temp_dir: + print("temp_dir:", temp_dir) + + # write hsaco + hsaco_file = os.path.join(temp_dir, "empty_kernel.hsaco") + with open(hsaco_file, "wb") as f: + f.write(bytes(aot_kernel0_hsaco)) + + # write json + json_file = os.path.join(temp_dir, "empty_kernel.json") + with open(json_file, "w") as f: + f.write(aot_kernel0_json) + + @with_custom_metadata_path(temp_dir) + @triton.jit + def empty_kernel(x_ptr, SIZE: tl.constexpr): + return + + x = torch.zeros((1, 1, 2), dtype=torch.float32, device="cuda") + kernel = empty_kernel[(1, 1, 1)](x, SIZE=64) + + # check + assert kernel.metadata.hash in aot_kernel0_json, "hash not found in json" + + +def test_separate_compile_and_run(): + with tempfile.TemporaryDirectory() as temp_dir: + print("temp_dir:", temp_dir) + + # write hsaco + hsaco_file = os.path.join(temp_dir, "empty_kernel.hsaco") + with open(hsaco_file, "wb") as f: + f.write(bytes(aot_kernel0_hsaco)) + + # write json + json_file = os.path.join(temp_dir, "empty_kernel.json") + with open(json_file, "w") as f: + f.write(aot_kernel0_json) + + @triton.jit + def empty_kernel(x_ptr, SIZE: tl.constexpr): + return + + # compile for kernel0 + target = GPUTarget("hip", "gfx942", 64) + src = triton.compiler.ASTSource( + fn=empty_kernel, + signature={ + "x_ptr": "*fp32", + }, + constexprs={"SIZE": 64}, + ) + with AOTMetadataContext("empty_kernel", temp_dir): + kernel = triton.compile(src, target=target) + + # run kernel0 + x = torch.zeros((1, 1, 2), dtype=torch.float32, device="cuda") + kernel[(1, 1, 1)](x) + + # check + assert kernel.metadata.hash in aot_kernel0_json, "hash not found in json" + + # compile and run for kernel1 + # ... + + # compile and run for kernel2 + # ... + + +if __name__ == "__main__": + test_f32_kernel() + test_jit() + test_separate_compile_and_run()