File tree Expand file tree Collapse file tree 5 files changed +39
-10
lines changed Expand file tree Collapse file tree 5 files changed +39
-10
lines changed Original file line number Diff line number Diff line change @@ -45,6 +45,10 @@ microsoft/Phi-3.5-mini-instruct:
4545 - accuracy : 31.354
4646microsoft/Phi-4-mini-instruct :
4747 - accuracy : 32.921
48+ bigcode/starcoder2-7b :
49+ - accuracy : 26.611
50+ - quant_algo : FP8
51+ accuracy : 26.611
4852mistralai/Codestral-22B-v0.1 :
4953 - accuracy : 30.316
5054 - quant_algo : FP8
Original file line number Diff line number Diff line change @@ -229,6 +229,10 @@ nvidia/Nemotron-H-56B-Base-8K:
229229 accuracy : 83.82
230230microsoft/Phi-4-mini-instruct :
231231 - accuracy : 68.98
232+ bigcode/starcoder2-7b :
233+ - accuracy : 41.35
234+ - quant_algo : FP8
235+ accuracy : 41.35
232236mistralai/Codestral-22B-v0.1 :
233237 - accuracy : 61.72
234238 - quant_algo : FP8
Original file line number Diff line number Diff line change @@ -435,6 +435,32 @@ def test_auto_dtype(self):
435435 task .evaluate (llm )
436436
437437
438+ class TestStarCoder2_7B (LlmapiAccuracyTestHarness ):
439+ MODEL_NAME = "bigcode/starcoder2-7b"
440+ MODEL_PATH = f"{ llm_models_root ()} /starcoder2-7b"
441+ kv_cache_config = KvCacheConfig (free_gpu_memory_fraction = 0.6 )
442+
443+ @pytest .mark .skip_less_device_memory (70000 )
444+ def test_auto_dtype (self ):
445+ with LLM (self .MODEL_PATH , kv_cache_config = self .kv_cache_config ) as llm :
446+ task = CnnDailymail (self .MODEL_NAME )
447+ task .evaluate (llm )
448+ task = MMLU (self .MODEL_NAME )
449+ task .evaluate (llm )
450+
451+ @skip_pre_ada
452+ @pytest .mark .skip_less_device_memory (70000 )
453+ def test_fp8 (self ):
454+ quant_config = QuantConfig (QuantAlgo .FP8 )
455+ with LLM (self .MODEL_PATH ,
456+ quant_config = quant_config ,
457+ kv_cache_config = self .kv_cache_config ) as llm :
458+ task = CnnDailymail (self .MODEL_NAME )
459+ task .evaluate (llm )
460+ task = MMLU (self .MODEL_NAME )
461+ task .evaluate (llm )
462+
463+
438464class TestCodestral_22B_V01 (LlmapiAccuracyTestHarness ):
439465 MODEL_NAME = "mistralai/Codestral-22B-v0.1"
440466 MODEL_PATH = f"{ llm_models_root ()} /Codestral-22B-v0.1"
Original file line number Diff line number Diff line change @@ -2426,16 +2426,6 @@ def test_auto_dtype(self):
24262426 task .evaluate (llm )
24272427
24282428
2429- class TestStarCoder2_7B (LlmapiAccuracyTestHarness ):
2430- MODEL_NAME = "bigcode/starcoder2-7b"
2431- MODEL_PATH = f"{ llm_models_root ()} /starcoder2-7b"
2432-
2433- def test_auto_dtype (self ):
2434- with LLM (self .MODEL_PATH ) as llm :
2435- task = MMLU (self .MODEL_NAME )
2436- task .evaluate (llm )
2437-
2438-
24392429class TestCodestral_22B_V01 (LlmapiAccuracyTestHarness ):
24402430 MODEL_NAME = "mistralai/Codestral-22B-v0.1"
24412431 MODEL_PATH = f"{ llm_models_root ()} /Codestral-22B-v0.1"
Original file line number Diff line number Diff line change @@ -21,3 +21,8 @@ accuracy/test_llm_api_pytorch.py::TestNemotronH_56B_Base::test_auto_dtype[tp8-cu
2121accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_auto_dtype[tp8ep4-cuda_graph=True]
2222accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_fp8_prequantized[tp8ep4-cuda_graph=True]
2323accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_fp8_prequantized[tp8-cuda_graph=True]
24+ accuracy/test_llm_api.py::TestStarCoder2_7B::test_auto_dtype
25+ accuracy/test_llm_api.py::TestStarCoder2_7B::test_fp8
26+ accuracy/test_llm_api.py::TestCodestral_22B_V01::test_auto_dtype
27+ accuracy/test_llm_api.py::TestCodestral_22B_V01::test_fp8
28+ accuracy/test_llm_api_pytorch.py::TestCodestral_22B_V01::test_auto_dtype
You can’t perform that action at this time.
0 commit comments