Skip to content

Commit

Permalink
move to weight_loading test to deal with CI cuda memory issues
Browse files Browse the repository at this point in the history
  • Loading branch information
dsikka committed Aug 21, 2024
1 parent 70ce46e commit 20b5940
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 7 deletions.
7 changes: 0 additions & 7 deletions tests/quantization/test_compressed_tensors.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,3 @@ def test_compressed_tensors_kv_cache(vllm_runner):
with vllm_runner(model_path, kv_cache_dtype="fp8") as llm:
output = llm.generate_greedy("Hello world!", max_tokens=20)
assert output


def test_compressed_tensors_fused_moe(vllm_runner):
model_path = "nm-testing/Mixtral-8x7B-Instruct-v0.1-W4A16-channel-quantized"
with vllm_runner(model_path) as llm:
output = llm.generate_greedy("Hello world!", max_tokens=20)
assert output
1 change: 1 addition & 0 deletions tests/weight_loading/models.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,6 @@ compressed-tensors, nm-testing/Meta-Llama-3-8B-FP8-compressed-tensors-test, main
compressed-tensors, nm-testing/Phi-3-mini-128k-instruct-FP8, main
compressed-tensors, neuralmagic/Phi-3-medium-128k-instruct-quantized.w4a16, main
compressed-tensors, nm-testing/Mixtral-8x7B-Instruct-v0.1-W4A16-quantized, main
compressed-tensors, nm-testing/Mixtral-8x7B-Instruct-v0.1-W4A16-channel-quantized, main
awq, casperhansen/mixtral-instruct-awq, main
awq_marlin, casperhansen/mixtral-instruct-awq, main

0 comments on commit 20b5940

Please sign in to comment.