Skip to content

Commit d4c0b53

Browse files
authored
remove vllm fuse_moe kernel and add moe_sum_reduce, moe_silu_and_mul kernel. (#688)
1 parent 709b973 commit d4c0b53

File tree

95 files changed

+881
-10490
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+881
-10490
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"1": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 2, "num_warps": 4, "num_stages": 3}, "8": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 2, "num_warps": 4, "num_stages": 3}, "64": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 3}, "128": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 4, "num_warps": 4, "num_stages": 3}, "256": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 2, "num_warps": 4, "num_stages": 3}, "512": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 2, "num_warps": 4, "num_stages": 4}, "1024": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 4, "num_warps": 4, "num_stages": 4}, "4096": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "num_warps": 4, "num_stages": 4}, "8192": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 3}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"1": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 2, "num_warps": 4, "num_stages": 4}, "8": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 2, "num_warps": 4, "num_stages": 3}, "64": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 4, "num_warps": 8, "num_stages": 3}, "128": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 2, "num_warps": 4, "num_stages": 3}, "256": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "num_warps": 4, "num_stages": 3}, "512": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 2, "num_warps": 4, "num_stages": 2}, "1024": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 4, "num_warps": 4, "num_stages": 4}, "4096": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 2, "num_warps": 4, "num_stages": 3}, "8192": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "num_warps": 4, "num_stages": 5}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"1": {"BLOCK_M": 1, "BLOCK_N": 1024, "num_warps": 16}, "8": {"BLOCK_M": 4, "BLOCK_N": 128, "num_warps": 8}, "64": {"BLOCK_M": 4, "BLOCK_N": 256, "num_warps": 8}, "128": {"BLOCK_M": 4, "BLOCK_N": 128, "num_warps": 8}, "256": {"BLOCK_M": 1, "BLOCK_N": 512, "num_warps": 2}, "512": {"BLOCK_M": 1, "BLOCK_N": 512, "num_warps": 1}, "1024": {"BLOCK_M": 2, "BLOCK_N": 1024, "num_warps": 4}, "2048": {"BLOCK_M": 4, "BLOCK_N": 256, "num_warps": 1}, "4096": {"BLOCK_M": 8, "BLOCK_N": 64, "num_warps": 4}, "8192": {"BLOCK_M": 4, "BLOCK_N": 256, "num_warps": 1}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"1": {"BLOCK_M": 2, "BLOCK_N": 512, "num_warps": 8}, "8": {"BLOCK_M": 16, "BLOCK_N": 64, "num_warps": 16}, "64": {"BLOCK_M": 8, "BLOCK_N": 64, "num_warps": 8}, "128": {"BLOCK_M": 8, "BLOCK_N": 64, "num_warps": 4}, "256": {"BLOCK_M": 4, "BLOCK_N": 128, "num_warps": 16}, "512": {"BLOCK_M": 8, "BLOCK_N": 64, "num_warps": 4}, "1024": {"BLOCK_M": 16, "BLOCK_N": 64, "num_warps": 8}, "2048": {"BLOCK_M": 16, "BLOCK_N": 64, "num_warps": 4}, "4096": {"BLOCK_M": 4, "BLOCK_N": 128, "num_warps": 4}, "8192": {"BLOCK_M": 8, "BLOCK_N": 128, "num_warps": 4}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"1": {"BLOCK_M": 2, "BLOCK_N": 256, "num_warps": 8}, "8": {"BLOCK_M": 2, "BLOCK_N": 128, "num_warps": 8}, "64": {"BLOCK_M": 4, "BLOCK_N": 512, "num_warps": 16}, "128": {"BLOCK_M": 4, "BLOCK_N": 128, "num_warps": 4}, "256": {"BLOCK_M": 4, "BLOCK_N": 128, "num_warps": 2}, "512": {"BLOCK_M": 1, "BLOCK_N": 512, "num_warps": 4}, "1024": {"BLOCK_M": 2, "BLOCK_N": 1024, "num_warps": 4}, "2048": {"BLOCK_M": 8, "BLOCK_N": 64, "num_warps": 4}, "4096": {"BLOCK_M": 8, "BLOCK_N": 128, "num_warps": 1}, "8192": {"BLOCK_M": 1, "BLOCK_N": 1024, "num_warps": 4}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"1": {"BLOCK_M": 2, "BLOCK_N": 128, "num_warps": 4}, "8": {"BLOCK_M": 1, "BLOCK_N": 256, "num_warps": 16}, "64": {"BLOCK_M": 2, "BLOCK_N": 256, "num_warps": 8}, "128": {"BLOCK_M": 8, "BLOCK_N": 64, "num_warps": 8}, "256": {"BLOCK_M": 16, "BLOCK_N": 64, "num_warps": 8}, "512": {"BLOCK_M": 8, "BLOCK_N": 64, "num_warps": 4}, "1024": {"BLOCK_M": 8, "BLOCK_N": 128, "num_warps": 8}, "2048": {"BLOCK_M": 8, "BLOCK_N": 64, "num_warps": 8}, "4096": {"BLOCK_M": 4, "BLOCK_N": 256, "num_warps": 4}, "8192": {"BLOCK_M": 16, "BLOCK_N": 64, "num_warps": 8}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"1": {"BLOCK_M": 2, "BLOCK_N": 256, "num_warps": 4}, "8": {"BLOCK_M": 8, "BLOCK_N": 64, "num_warps": 16}, "64": {"BLOCK_M": 4, "BLOCK_N": 128, "num_warps": 4}, "128": {"BLOCK_M": 1, "BLOCK_N": 512, "num_warps": 4}, "256": {"BLOCK_M": 1, "BLOCK_N": 512, "num_warps": 4}, "512": {"BLOCK_M": 2, "BLOCK_N": 1024, "num_warps": 4}, "1024": {"BLOCK_M": 2, "BLOCK_N": 512, "num_warps": 1}, "2048": {"BLOCK_M": 32, "BLOCK_N": 64, "num_warps": 2}, "4096": {"BLOCK_M": 1, "BLOCK_N": 1024, "num_warps": 1}, "8192": {"BLOCK_M": 1, "BLOCK_N": 1024, "num_warps": 1}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"1": {"BLOCK_M": 1, "BLOCK_N": 256, "num_warps": 2}, "8": {"BLOCK_M": 2, "BLOCK_N": 128, "num_warps": 4}, "64": {"BLOCK_M": 4, "BLOCK_N": 128, "num_warps": 16}, "128": {"BLOCK_M": 4, "BLOCK_N": 128, "num_warps": 4}, "256": {"BLOCK_M": 8, "BLOCK_N": 64, "num_warps": 8}, "512": {"BLOCK_M": 2, "BLOCK_N": 256, "num_warps": 4}, "1024": {"BLOCK_M": 4, "BLOCK_N": 128, "num_warps": 2}, "2048": {"BLOCK_M": 2, "BLOCK_N": 512, "num_warps": 4}, "4096": {"BLOCK_M": 4, "BLOCK_N": 256, "num_warps": 4}, "8192": {"BLOCK_M": 8, "BLOCK_N": 256, "num_warps": 2}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"1": {"BLOCK_M": 2, "BLOCK_N": 256, "num_warps": 8}, "8": {"BLOCK_M": 1, "BLOCK_N": 1024, "num_warps": 16}, "64": {"BLOCK_M": 1, "BLOCK_N": 512, "num_warps": 2}, "128": {"BLOCK_M": 2, "BLOCK_N": 1024, "num_warps": 4}, "256": {"BLOCK_M": 16, "BLOCK_N": 64, "num_warps": 8}, "512": {"BLOCK_M": 2, "BLOCK_N": 512, "num_warps": 2}, "1024": {"BLOCK_M": 8, "BLOCK_N": 512, "num_warps": 4}, "2048": {"BLOCK_M": 2, "BLOCK_N": 256, "num_warps": 4}, "4096": {"BLOCK_M": 2, "BLOCK_N": 512, "num_warps": 1}, "8192": {"BLOCK_M": 4, "BLOCK_N": 256, "num_warps": 1}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"1": {"BLOCK_M": 1, "BLOCK_N": 512, "num_warps": 4}, "8": {"BLOCK_M": 4, "BLOCK_N": 128, "num_warps": 8}, "64": {"BLOCK_M": 8, "BLOCK_N": 64, "num_warps": 4}, "128": {"BLOCK_M": 8, "BLOCK_N": 128, "num_warps": 8}, "256": {"BLOCK_M": 1, "BLOCK_N": 512, "num_warps": 4}, "512": {"BLOCK_M": 1, "BLOCK_N": 512, "num_warps": 2}, "1024": {"BLOCK_M": 1, "BLOCK_N": 512, "num_warps": 1}, "2048": {"BLOCK_M": 2, "BLOCK_N": 512, "num_warps": 4}, "4096": {"BLOCK_M": 2, "BLOCK_N": 1024, "num_warps": 8}, "8192": {"BLOCK_M": 8, "BLOCK_N": 64, "num_warps": 2}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"1": {"BLOCK_M": 2, "BLOCK_N": 512, "num_warps": 8}, "8": {"BLOCK_M": 1, "BLOCK_N": 512, "num_warps": 4}, "64": {"BLOCK_M": 1, "BLOCK_N": 512, "num_warps": 4}, "128": {"BLOCK_M": 2, "BLOCK_N": 1024, "num_warps": 4}, "256": {"BLOCK_M": 1, "BLOCK_N": 1024, "num_warps": 1}, "512": {"BLOCK_M": 32, "BLOCK_N": 128, "num_warps": 4}, "1024": {"BLOCK_M": 4, "BLOCK_N": 256, "num_warps": 1}, "2048": {"BLOCK_M": 4, "BLOCK_N": 256, "num_warps": 1}, "4096": {"BLOCK_M": 4, "BLOCK_N": 256, "num_warps": 1}, "8192": {"BLOCK_M": 4, "BLOCK_N": 256, "num_warps": 1}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"1": {"BLOCK_M": 8, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 1}, "8": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 1}, "64": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 2}, "128": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 5}, "256": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 8, "NUM_STAGE": 3}, "512": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 1}, "1024": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 3}, "2048": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 1}, "4096": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 4, "NUM_STAGE": 1}, "8192": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 4, "NUM_STAGE": 1}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"1": {"BLOCK_M": 4, "BLOCK_DIM": 128, "num_warps": 4, "NUM_STAGE": 3}, "8": {"BLOCK_M": 1, "BLOCK_DIM": 256, "num_warps": 4, "NUM_STAGE": 1}, "64": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 8, "NUM_STAGE": 1}, "128": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "256": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 4, "NUM_STAGE": 1}, "512": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "1024": {"BLOCK_M": 2, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "2048": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "4096": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 4, "NUM_STAGE": 1}, "8192": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 4, "NUM_STAGE": 1}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"1": {"BLOCK_M": 16, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 4}, "8": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 4, "NUM_STAGE": 1}, "64": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 1}, "128": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 8, "NUM_STAGE": 3}, "256": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 4, "NUM_STAGE": 2}, "512": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 1}, "1024": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 5}, "2048": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 8, "NUM_STAGE": 1}, "4096": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 4, "NUM_STAGE": 4}, "8192": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 4, "NUM_STAGE": 3}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"1": {"BLOCK_M": 16, "BLOCK_DIM": 256, "num_warps": 4, "NUM_STAGE": 1}, "8": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 8, "NUM_STAGE": 1}, "64": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 4, "NUM_STAGE": 1}, "128": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "256": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "512": {"BLOCK_M": 2, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "1024": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "2048": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "4096": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "8192": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"1": {"BLOCK_M": 32, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 4}, "8": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 4}, "64": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 3}, "128": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 3}, "256": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 2}, "512": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 2}, "1024": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 8, "NUM_STAGE": 5}, "2048": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 1, "NUM_STAGE": 2}, "4096": {"BLOCK_M": 2, "BLOCK_DIM": 1024, "num_warps": 4, "NUM_STAGE": 1}, "8192": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 4, "NUM_STAGE": 2}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"1": {"BLOCK_M": 8, "BLOCK_DIM": 512, "num_warps": 8, "NUM_STAGE": 1}, "8": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 8, "NUM_STAGE": 1}, "64": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "128": {"BLOCK_M": 1, "BLOCK_DIM": 256, "num_warps": 2, "NUM_STAGE": 1}, "256": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 1, "NUM_STAGE": 1}, "512": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "1024": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "2048": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "4096": {"BLOCK_M": 2, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "8192": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"1": {"BLOCK_M": 2, "BLOCK_DIM": 512, "num_warps": 4, "NUM_STAGE": 1}, "8": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 4}, "64": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 4, "NUM_STAGE": 4}, "128": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 1}, "256": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 5}, "512": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 4}, "1024": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 4, "NUM_STAGE": 3}, "2048": {"BLOCK_M": 2, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 5}, "4096": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 4, "NUM_STAGE": 2}, "8192": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 1, "NUM_STAGE": 3}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"1": {"BLOCK_M": 1, "BLOCK_DIM": 128, "num_warps": 4, "NUM_STAGE": 5}, "8": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 8, "NUM_STAGE": 1}, "64": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "128": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 1, "NUM_STAGE": 4}, "256": {"BLOCK_M": 2, "BLOCK_DIM": 512, "num_warps": 2, "NUM_STAGE": 1}, "512": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 1, "NUM_STAGE": 1}, "1024": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "2048": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "4096": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "8192": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 4, "NUM_STAGE": 1}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"1": {"BLOCK_M": 16, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 2}, "8": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 3}, "64": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 1}, "128": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 3}, "256": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 1}, "512": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 8, "NUM_STAGE": 5}, "1024": {"BLOCK_M": 1, "BLOCK_DIM": 1024, "num_warps": 1, "NUM_STAGE": 4}, "2048": {"BLOCK_M": 2, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "4096": {"BLOCK_M": 2, "BLOCK_DIM": 1024, "num_warps": 1, "NUM_STAGE": 4}, "8192": {"BLOCK_M": 4, "BLOCK_DIM": 1024, "num_warps": 2, "NUM_STAGE": 3}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"1": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 4, "NUM_STAGE": 1}, "8": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 4, "NUM_STAGE": 1}, "64": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "128": {"BLOCK_M": 2, "BLOCK_DIM": 256, "num_warps": 1, "NUM_STAGE": 1}, "256": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "512": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "1024": {"BLOCK_M": 1, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "2048": {"BLOCK_M": 2, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "4096": {"BLOCK_M": 4, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}, "8192": {"BLOCK_M": 2, "BLOCK_DIM": 512, "num_warps": 1, "NUM_STAGE": 1}}

lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight.py

-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from lightllm.common.quantization import vLLMFP8w8a8QuantizationMethod
77

88
from lightllm.common.vllm_kernel import _custom_ops as ops
9-
from lightllm.common.fused_moe import fused_experts
109
from lightllm.utils.device_utils import get_current_device_id
1110

1211

lightllm/common/fused_moe/__init__.py

-11
Original file line numberDiff line numberDiff line change
@@ -1,11 +0,0 @@
1-
from lightllm.utils.log_utils import init_logger
2-
3-
logger = init_logger(__name__)
4-
5-
try:
6-
from vllm.model_executor.layers.fused_moe import *
7-
except ImportError:
8-
try:
9-
from lightllm.common.fused_moe.fused_moe import *
10-
except ImportError:
11-
logger.error("vllm or lightllm_kernel is not installed, you can't use fused_moe")

0 commit comments

Comments
 (0)