16
16
torch ._inductor .config .force_fuse_int_mm_with_mul = True
17
17
torch ._inductor .config .fx_graph_cache = True
18
18
19
- def run_evaluation (repo_id , task_list , limit , device , precision , quantization , compile , batch_size , max_length ):
19
+ def run_evaluation (repo_id , tasks , limit , device , precision , quantization , compile , batch_size , max_length ):
20
20
21
21
tokenizer = AutoTokenizer .from_pretrained (repo_id )
22
22
model = AutoModelForCausalLM .from_pretrained (repo_id ).to (device = "cpu" , dtype = precision )
@@ -41,7 +41,7 @@ def run_evaluation(repo_id, task_list, limit, device, precision, quantization, c
41
41
tokenizer = tokenizer ,
42
42
batch_size = batch_size ,
43
43
max_length = max_length ),
44
- get_task_dict (task_list ),
44
+ get_task_dict (tasks ),
45
45
limit = limit ,
46
46
)
47
47
for task , res in result ["results" ].items ():
@@ -52,7 +52,7 @@ def run_evaluation(repo_id, task_list, limit, device, precision, quantization, c
52
52
import argparse
53
53
parser = argparse .ArgumentParser (description = 'Run HF Model Evaluation' )
54
54
parser .add_argument ('--repo_id' , type = str , default = "meta-llama/Meta-Llama-3-8B" , help = 'Repository ID to download from HF.' )
55
- parser .add_argument ('--task_list ' , nargs = '+' , type = str , default = ["wikitext" ], help = 'List of lm-eluther tasks to evaluate usage: --tasks task1 task2' )
55
+ parser .add_argument ('--tasks ' , nargs = '+' , type = str , default = ["wikitext" ], help = 'List of lm-eluther tasks to evaluate usage: --tasks task1 task2' )
56
56
parser .add_argument ('--limit' , type = int , default = None , help = 'Number of eval samples to evaluate' )
57
57
parser .add_argument ('--precision' , type = lambda x : getattr (torch , x .split ("." )[- 1 ]), default = torch .bfloat16 , help = 'dtype precision to use' )
58
58
parser .add_argument ('--device' , type = str , default = "cuda" , help = 'Device to use for evaluation' )
@@ -62,4 +62,4 @@ def run_evaluation(repo_id, task_list, limit, device, precision, quantization, c
62
62
parser .add_argument ('--max_length' , type = int , default = None , help = 'Length of text to process at one time' )
63
63
64
64
args = parser .parse_args ()
65
- run_evaluation (args .repo_id , args .task_list , args .limit , args .device , args .precision , args .quantization , args .compile , args .batch_size , args .max_length )
65
+ run_evaluation (args .repo_id , args .tasks , args .limit , args .device , args .precision , args .quantization , args .compile , args .batch_size , args .max_length )
0 commit comments