From 22f8f37cf708d81ff21642e501ebddfa61de0ea5 Mon Sep 17 00:00:00 2001 From: xudoyuan Date: Tue, 28 Apr 2026 07:51:19 +0000 Subject: [PATCH 1/4] [FLYDSL]: 0.1.5 --- pyproject.toml | 2 +- requirements.txt | 3 ++- setup.py | 11 ++++++++--- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 082b4c37cc..239f1d91c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ requires = [ "psutil", "ninja", "pandas", - "flydsl==0.1.4.2" + "flydsl>=0.1.5.dev0" ] [tool.setuptools_scm] diff --git a/requirements.txt b/requirements.txt index 8a863d7694..e44b0f52c9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +--extra-index-url https://rocm.frameworks-devreleases.amd.com/whl-staging/gfx942-gfx950/ pandas pytest psutil @@ -6,4 +7,4 @@ pyyaml einops pybind11>=3.0.1 ninja -flydsl==0.1.4.2 +flydsl==0.1.5.dev20260428+2c868c8 diff --git a/setup.py b/setup.py index bf78f9b177..d03f41609e 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,8 @@ OPT_COMPILER_CONFIG = os.path.join(this_dir, "aiter", "jit", "optCompilerConfig.json") PACKAGE_NAME = "amd-aiter" -FLYDSL_VERSION = "flydsl==0.1.4.2" +FLYDSL_NIGHTLY_INDEX = "https://rocm.frameworks-devreleases.amd.com/whl-staging/gfx942-gfx950/" +FLYDSL_VERSION = "flydsl==0.1.5.dev20260428+2c868c8" BUILD_TARGET = os.environ.get("BUILD_TARGET", "auto") PREBUILD_KERNELS = int(os.environ.get("PREBUILD_KERNELS", 0)) @@ -58,7 +59,9 @@ def is_develop_mode(): try: from importlib.metadata import version as pkg_version - if pkg_version("flydsl") != FLYDSL_VERSION.split("==")[1]: + _installed = pkg_version("flydsl") + _expected = FLYDSL_VERSION.split("==")[1] + if _installed != _expected and not _installed.startswith(_expected.split("+")[0]): raise ImportError("version mismatch") except Exception: subprocess.check_call( @@ -67,6 +70,8 @@ def is_develop_mode(): "-m", "pip", "install", + "--extra-index-url", + FLYDSL_NIGHTLY_INDEX, FLYDSL_VERSION, ] ) @@ -398,7 +403,7 @@ def has_ext_modules(self): "einops", "psutil", "packaging", - FLYDSL_VERSION, + "flydsl>=0.1.5.dev0", ] setup( From 8567d970812ab1961a695e3a68c0519cfe692fe5 Mon Sep 17 00:00:00 2001 From: xudoyuan Date: Tue, 28 Apr 2026 07:57:14 +0000 Subject: [PATCH 2/4] [FLYDSL: black] --- setup.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index d03f41609e..9f50fef97c 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,9 @@ OPT_COMPILER_CONFIG = os.path.join(this_dir, "aiter", "jit", "optCompilerConfig.json") PACKAGE_NAME = "amd-aiter" -FLYDSL_NIGHTLY_INDEX = "https://rocm.frameworks-devreleases.amd.com/whl-staging/gfx942-gfx950/" +FLYDSL_NIGHTLY_INDEX = ( + "https://rocm.frameworks-devreleases.amd.com/whl-staging/gfx942-gfx950/" +) FLYDSL_VERSION = "flydsl==0.1.5.dev20260428+2c868c8" BUILD_TARGET = os.environ.get("BUILD_TARGET", "auto") @@ -61,7 +63,9 @@ def is_develop_mode(): _installed = pkg_version("flydsl") _expected = FLYDSL_VERSION.split("==")[1] - if _installed != _expected and not _installed.startswith(_expected.split("+")[0]): + if _installed != _expected and not _installed.startswith( + _expected.split("+")[0] + ): raise ImportError("version mismatch") except Exception: subprocess.check_call( From f0a8ce8131b31464083fccf860f5cddfb0100a24 Mon Sep 17 00:00:00 2001 From: xudoyuan Date: Tue, 28 Apr 2026 08:17:56 +0000 Subject: [PATCH 3/4] test: use flydsl==0.1.5.dev504 from PyPI for CI validation Co-Authored-By: Claude Opus 4 --- pyproject.toml | 2 +- requirements.txt | 3 +-- setup.py | 15 +++------------ 3 files changed, 5 insertions(+), 15 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 239f1d91c8..7fdb43384d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ requires = [ "psutil", "ninja", "pandas", - "flydsl>=0.1.5.dev0" + "flydsl==0.1.5.dev504" ] [tool.setuptools_scm] diff --git a/requirements.txt b/requirements.txt index e44b0f52c9..b986f2909d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ ---extra-index-url https://rocm.frameworks-devreleases.amd.com/whl-staging/gfx942-gfx950/ pandas pytest psutil @@ -7,4 +6,4 @@ pyyaml einops pybind11>=3.0.1 ninja -flydsl==0.1.5.dev20260428+2c868c8 +flydsl==0.1.5.dev504 diff --git a/setup.py b/setup.py index 9f50fef97c..6a296edfb1 100644 --- a/setup.py +++ b/setup.py @@ -13,10 +13,7 @@ OPT_COMPILER_CONFIG = os.path.join(this_dir, "aiter", "jit", "optCompilerConfig.json") PACKAGE_NAME = "amd-aiter" -FLYDSL_NIGHTLY_INDEX = ( - "https://rocm.frameworks-devreleases.amd.com/whl-staging/gfx942-gfx950/" -) -FLYDSL_VERSION = "flydsl==0.1.5.dev20260428+2c868c8" +FLYDSL_VERSION = "flydsl==0.1.5.dev504" BUILD_TARGET = os.environ.get("BUILD_TARGET", "auto") PREBUILD_KERNELS = int(os.environ.get("PREBUILD_KERNELS", 0)) @@ -61,11 +58,7 @@ def is_develop_mode(): try: from importlib.metadata import version as pkg_version - _installed = pkg_version("flydsl") - _expected = FLYDSL_VERSION.split("==")[1] - if _installed != _expected and not _installed.startswith( - _expected.split("+")[0] - ): + if pkg_version("flydsl") != FLYDSL_VERSION.split("==")[1]: raise ImportError("version mismatch") except Exception: subprocess.check_call( @@ -74,8 +67,6 @@ def is_develop_mode(): "-m", "pip", "install", - "--extra-index-url", - FLYDSL_NIGHTLY_INDEX, FLYDSL_VERSION, ] ) @@ -407,7 +398,7 @@ def has_ext_modules(self): "einops", "psutil", "packaging", - "flydsl>=0.1.5.dev0", + FLYDSL_VERSION, ] setup( From 6883c310e19ada0d4ab259a1663abce76fb51247 Mon Sep 17 00:00:00 2001 From: xudoyuan Date: Tue, 28 Apr 2026 02:33:41 +0000 Subject: [PATCH 4/4] [FLYDSL]: if/else adaptation --- .../ops/flydsl/kernels/mixed_moe_gemm_2stage.py | 16 ++++++++-------- aiter/ops/flydsl/kernels/preshuffle_gemm.py | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/aiter/ops/flydsl/kernels/mixed_moe_gemm_2stage.py b/aiter/ops/flydsl/kernels/mixed_moe_gemm_2stage.py index 81d7bfeb4e..75089a0286 100644 --- a/aiter/ops/flydsl/kernels/mixed_moe_gemm_2stage.py +++ b/aiter/ops/flydsl/kernels/mixed_moe_gemm_2stage.py @@ -520,7 +520,7 @@ def moe_gemm1( by = gpu.block_id("x") # tile along inter_dim (N) bx_persist = gpu.block_id("y") # persistent WG index - if xcd_swizzle > 0: + if const_expr(xcd_swizzle > 0): _NUM_XCDS_S1 = 8 _c1_sw = arith.constant(1, index=True) _c_tn_sw = arith.constant(tile_n, index=True) @@ -579,7 +579,7 @@ def moe_gemm1( _lds_out_elem_type = ( T.f32 if _need_quant else (T.bf16 if out_is_bf16 else T.f16) ) - if _split_lds_out and _use_cshuffle_epilog: + if const_expr(_split_lds_out and _use_cshuffle_epilog): _half_out_elems = int(tile_m) * (int(tile_n) // 2) lds_out = SmemPtr( base_ptr_pong, @@ -1296,7 +1296,7 @@ def pack_i64x4_to_i32x8(x0, x1, x2, x3): ) for ikxdl in range_constexpr(pack_K): k_idx = ku128 * pack_K + ikxdl - if k_idx < ku_count: + if const_expr(k_idx < ku_count): gate_bp0, gate_bp1 = gate_b_tile_in[k_idx] if const_expr(not _single_b): up_bp0, up_bp1 = up_b_tile_in[k_idx] @@ -1863,7 +1863,7 @@ def _interleaved_half( prefetch_x_to_lds(k_tail1, lds_x_ping) else: x_regs_ping = load_x_tile(k_tail1) - if _pad_ku_skip > 0: + if const_expr(_pad_ku_skip > 0): gate_w_ping, up_w_ping = load_b_tile( k_tail1 // arith.constant(2, index=True), ku_limit=_tail_ku, @@ -1893,7 +1893,7 @@ def _interleaved_half( store_x_tile_to_lds(x_regs_ping, lds_x_ping) rocdl.s_waitcnt(0) _barrier() - if _pad_ku_skip > 0: + if const_expr(_pad_ku_skip > 0): a_tile_ping = prefetch_full_a_from_lds( lds_x_ping, ku_limit=_tail_ku ) @@ -1974,7 +1974,7 @@ def _swiglu_mul_vec4(gate_v4, up_v4): def _act_vec4(gate_v4, up_v4): """Dispatch activation based on `act` parameter.""" - if act == "swiglu": + if const_expr(act == "swiglu"): return _swiglu_mul_vec4(gate_v4, up_v4) else: return _silu_mul_vec4(gate_v4, up_v4) @@ -2320,7 +2320,7 @@ def store_pair(*, row_local, row, row_ctx, col_pair0, col_g0, frag): _w, ) out_ptr_v = _idx_to_llvm_ptr(ptr_addr_idx) - if _e_vec == 2: + if const_expr(_e_vec == 2): store_val = arith.TruncIOp(T.i16, packed_i32) store_raw = ( store_val._value @@ -4512,7 +4512,7 @@ def launch_mixed_moe_gemm2( gx = ( n_in - _model_dim_pad_idx + _tile_n_idx - arith.constant(1, index=True) ) / _tile_n_idx - if _persistent: + if const_expr(_persistent): gy = arith.constant(_cu_num, index=True) else: _c_pm_l = arith.constant(persist_m, index=True) diff --git a/aiter/ops/flydsl/kernels/preshuffle_gemm.py b/aiter/ops/flydsl/kernels/preshuffle_gemm.py index 932b9c001a..e043ac088c 100644 --- a/aiter/ops/flydsl/kernels/preshuffle_gemm.py +++ b/aiter/ops/flydsl/kernels/preshuffle_gemm.py @@ -907,7 +907,7 @@ def pack_i64x4_to_i32x8(x0, x1, x2, x3): for imxdl in range_constexpr(_fp4_pack_M): mi_idx = mi_p * _fp4_pack_M + imxdl curr_row_a_lds = row_a_lds + (mi_idx * 16) - if ( + if const_expr( (a0_prefetch is not None) and (k_idx == 0) and (mi_idx == 0)