Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
93 commits
Select commit Hold shift + click to select a range
ef5a634
upda for deepgemm
FlamingoPg Aug 4, 2025
313b362
upd
FlamingoPg Aug 4, 2025
1268ba7
upd
FlamingoPg Aug 5, 2025
77196a0
Merge branch 'main' into deepgemm-update
FlamingoPg Aug 5, 2025
596f2ad
Update CMakeLists.txt
FlamingoPg Aug 5, 2025
57bb109
Update CMakeLists.txt
FlamingoPg Aug 5, 2025
6916191
Merge branch 'main' into deepgemm-update
FlamingoPg Aug 5, 2025
c50d71e
Merge remote-tracking branch 'origin' into deepgemm-update
BBuf Aug 7, 2025
ed8c049
Merge branch 'main' into deepgemm-update
zhyncs Aug 15, 2025
426cfc5
Merge branch 'main' into deepgemm-update
zhyncs Aug 15, 2025
b8cd466
fix
FlamingoPg Aug 19, 2025
1dcfe71
Merge remote-tracking branch 'origin/deepgemm-update' into deepgemm-u…
FlamingoPg Aug 19, 2025
cdb055d
Merge branch 'main' into deepgemm-update
FlamingoPg Aug 19, 2025
ce125b7
fix lint
FlamingoPg Aug 19, 2025
617918d
Merge branch 'main' into deepgemm-update2
zhyncs Aug 19, 2025
0d6006c
Merge branch 'main' into deepgemm-update2
zhyncs Aug 23, 2025
c10d88a
Merge remote-tracking branch 'origin/main' into deepgemm-update
FlamingoPg Aug 23, 2025
c243027
more
fzyzcjy Aug 14, 2025
53ab93d
more
fzyzcjy Aug 14, 2025
4d5d0e7
more
fzyzcjy Aug 14, 2025
fc5de46
more
fzyzcjy Aug 14, 2025
4fabb3d
more
fzyzcjy Aug 14, 2025
559e737
more
fzyzcjy Aug 14, 2025
3bca059
more
fzyzcjy Aug 14, 2025
ae38bbd
more
fzyzcjy Aug 14, 2025
a2628eb
more
fzyzcjy Aug 14, 2025
2ae1219
more
fzyzcjy Aug 14, 2025
e67bc0c
more
fzyzcjy Aug 14, 2025
811e6ed
more
fzyzcjy Aug 14, 2025
bcfa4c6
more
fzyzcjy Aug 14, 2025
f4c1a83
more
fzyzcjy Aug 14, 2025
990146b
more
fzyzcjy Aug 14, 2025
bd1722f
more
fzyzcjy Aug 14, 2025
955275e
more
fzyzcjy Aug 14, 2025
b5e20f8
more
fzyzcjy Aug 14, 2025
24f4a36
more
fzyzcjy Aug 14, 2025
3ecc5c0
more
fzyzcjy Aug 14, 2025
1699b11
more
fzyzcjy Aug 14, 2025
bfdfd69
more
fzyzcjy Aug 14, 2025
b9c4b52
more
fzyzcjy Aug 14, 2025
183d137
more
fzyzcjy Aug 14, 2025
22da01d
more
fzyzcjy Aug 14, 2025
d618da4
more
fzyzcjy Aug 14, 2025
ea77b8d
more
fzyzcjy Aug 14, 2025
6adf309
more
fzyzcjy Aug 14, 2025
95e6dbf
more
fzyzcjy Aug 14, 2025
a87ad24
more
fzyzcjy Aug 14, 2025
d61abf1
more
fzyzcjy Aug 14, 2025
d037a10
more
fzyzcjy Aug 14, 2025
a0625cb
more
fzyzcjy Aug 14, 2025
d5a1338
more
fzyzcjy Aug 14, 2025
9ab122e
more
fzyzcjy Aug 14, 2025
1e8adae
fmt
fzyzcjy Aug 14, 2025
ecb053b
more
fzyzcjy Aug 14, 2025
bf42c6d
more
fzyzcjy Aug 14, 2025
6801209
more
fzyzcjy Aug 14, 2025
b7ad7fe
more
fzyzcjy Aug 14, 2025
ea26b75
more
fzyzcjy Aug 14, 2025
1c7e19b
more
fzyzcjy Aug 14, 2025
66f74d8
more
fzyzcjy Aug 14, 2025
9184596
more
fzyzcjy Aug 14, 2025
69b537d
more
fzyzcjy Aug 14, 2025
61d5aa8
more
fzyzcjy Aug 14, 2025
9beee5d
more
fzyzcjy Aug 14, 2025
b0bec03
more
fzyzcjy Aug 14, 2025
a06e8e6
more
fzyzcjy Aug 14, 2025
9f437b0
fmt
fzyzcjy Aug 14, 2025
ee9e699
upd
zhyncs Aug 15, 2025
ef0c175
more
fzyzcjy Aug 14, 2025
cbb2285
more
fzyzcjy Aug 14, 2025
bf710d0
chore: cherry-pick PR #9167 (squash)
FlamingoPg Aug 23, 2025
f2ceca3
Merge branch 'main' into deepgemm-update2
zhyncs Aug 24, 2025
305a72a
upd
zhyncs Aug 24, 2025
7d4accb
fix transform_sf_into_required_layout import
FlamingoPg Aug 25, 2025
580634f
Merge branch 'main' into deepgemm-update2
FlamingoPg Aug 25, 2025
5d2d0aa
Merge remote-tracking branch 'origin/deepgemm-update2' into deepgemm-…
FlamingoPg Aug 25, 2025
e95ff2b
Merge branch 'main' into deepgemm-update2
zhyncs Aug 25, 2025
1171b28
upd
zhyncs Aug 26, 2025
c46b9cf
upd
zhyncs Aug 26, 2025
c3d95b8
upd
zhyncs Aug 26, 2025
1c449d4
upd
zhyncs Aug 26, 2025
d43ad87
upd
zhyncs Aug 26, 2025
ed94c18
Merge branch 'main' into deepgemm-update2
zhyncs Aug 26, 2025
34f8554
Merge branch 'main' into deepgemm-update2
zhyncs Aug 26, 2025
7ff8b43
upd
zhyncs Aug 27, 2025
051eff8
upd
zhyncs Aug 27, 2025
6ae91af
Merge branch 'main' into deepgemm-update2
zhyncs Aug 27, 2025
a535425
Merge branch 'main' into deepgemm-update2
zhyncs Aug 27, 2025
38920b2
Merge branch 'main' into deepgemm-update2
zhyncs Aug 27, 2025
f6808ca
upd
zhyncs Aug 27, 2025
0c60431
Revert "use fast math for per_token_group_quant_8bit. (#9177)"
zhyncs Aug 27, 2025
018800b
Merge branch 'main' into deepgemm-update2
zhyncs Aug 27, 2025
c199d3f
upd
zhyncs Aug 27, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/pr-test-sgl-kernel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ jobs:
include:
- python-version: "3.10"
cuda-version: "12.4"
- python-version: "3.10"
cuda-version: "12.8"
- python-version: "3.10"
cuda-version: "12.9"
name: Build Wheel (CUDA ${{ matrix.cuda-version }})
Expand Down
8 changes: 1 addition & 7 deletions python/sglang/srt/layers/moe/ep_moe/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,6 @@ def forward_deepgemm(
gateup_output,
masked_m,
expected_m,
recipe=(1, 128, 128) if deep_gemm_wrapper.DEEPGEMM_BLACKWELL else None,
)
del gateup_input
del gateup_input_fp8
Expand Down Expand Up @@ -304,7 +303,6 @@ def forward_deepgemm(
down_output,
masked_m,
expected_m,
recipe=(1, 128, 128) if deep_gemm_wrapper.DEEPGEMM_BLACKWELL else None,
)
del down_input
del down_input_fp8
Expand Down Expand Up @@ -667,7 +665,6 @@ def forward_deepgemm_masked(
gateup_output,
masked_m,
expected_m,
recipe=(1, 128, 128) if deep_gemm_wrapper.DEEPGEMM_BLACKWELL else None,
)
dispose_tensor(hidden_states_fp8[0])

Expand Down Expand Up @@ -708,9 +705,7 @@ def forward_deepgemm_masked(
(
down_input_scale
if deep_gemm_wrapper.DEEPGEMM_SCALE_UE8M0
else deep_gemm_wrapper.get_col_major_tma_aligned_tensor(
down_input_scale
)
else deep_gemm_wrapper.get_mn_major_tma_aligned_tensor(down_input_scale)
),
)
down_output = torch.empty(
Expand All @@ -722,7 +717,6 @@ def forward_deepgemm_masked(
down_output,
masked_m,
expected_m,
recipe=(1, 128, 128) if deep_gemm_wrapper.DEEPGEMM_BLACKWELL else None,
)

return down_output
Expand Down
Loading
Loading