| 
34 | 34 | from lighteval.metrics import (  | 
35 | 35 |     apply_generative_logprob_metric,  | 
36 | 36 |     apply_generative_metric,  | 
37 |  | -    apply_generative_multi_turn_metric,  | 
 | 37 | +    apply_llm_as_judge_metric,  | 
38 | 38 |     apply_multichoice_metric,  | 
39 | 39 |     apply_multichoice_metric_one_token,  | 
40 | 40 |     apply_perplexity_metric,  | 
@@ -412,8 +412,10 @@ def get_request_type(self) -> list[RequestType]:  | 
412 | 412 |             request_types.append(RequestType.LOGLIKELIHOOD_ROLLING)  | 
413 | 413 |         if self.has_metric_category[MetricCategory.GENERATIVE]:  | 
414 | 414 |             request_types.append(RequestType.GREEDY_UNTIL)  | 
415 |  | -        if self.has_metric_category[MetricCategory.GENERATIVE_MULTI_TURN]:  | 
 | 415 | +        if self.has_metric_category[MetricCategory.LLM_AS_JUDGE_MULTI_TURN]:  | 
416 | 416 |             request_types.append(RequestType.GREEDY_UNTIL_MULTI_TURN)  | 
 | 417 | +        if self.has_metric_category[MetricCategory.LLM_AS_JUDGE]:  | 
 | 418 | +            request_types.append(RequestType.GREEDY_UNTIL)  | 
417 | 419 |         if self.has_metric_category[MetricCategory.GENERATIVE_LOGPROB]:  | 
418 | 420 |             request_types.append(RequestType.GREEDY_UNTIL_WITH_LOGITS)  | 
419 | 421 |         if self.has_metric_category[MetricCategory.MULTICHOICE]:  | 
@@ -504,7 +506,7 @@ def construct_requests(  | 
504 | 506 |                     choices=formatted_doc.choices,  | 
505 | 507 |                 )  | 
506 | 508 |             ]  | 
507 |  | -        if self.has_metric_category[MetricCategory.GENERATIVE_MULTI_TURN]:  | 
 | 509 | +        if self.has_metric_category[MetricCategory.LLM_AS_JUDGE_MULTI_TURN]:  | 
508 | 510 |             requests[RequestType.GREEDY_UNTIL_MULTI_TURN] += [  | 
509 | 511 |                 GreedyUntilMultiTurnRequest(  | 
510 | 512 |                     task_name=current_task_name,  | 
@@ -561,8 +563,11 @@ def process_results(self, formatted_doc: Doc, results: list[ModelReturn]) -> dic  | 
561 | 563 |                 results=results, formatted_doc=formatted_doc, metrics=self.metrics  | 
562 | 564 |             )  | 
563 | 565 |             outputs.update(cur_outputs)  | 
564 |  | -        if self.has_metric_category[MetricCategory.GENERATIVE_MULTI_TURN]:  | 
565 |  | -            results, cur_outputs = apply_generative_multi_turn_metric(  | 
 | 566 | +        if (  | 
 | 567 | +            self.has_metric_category[MetricCategory.LLM_AS_JUDGE_MULTI_TURN]  | 
 | 568 | +            or self.has_metric_category[MetricCategory.LLM_AS_JUDGE]  | 
 | 569 | +        ):  | 
 | 570 | +            results, cur_outputs = apply_llm_as_judge_metric(  | 
566 | 571 |                 results=results, formatted_doc=formatted_doc, metrics=self.metrics  | 
567 | 572 |             )  | 
568 | 573 |             outputs.update(cur_outputs)  | 
 | 
0 commit comments