|
34 | 34 | from lighteval.metrics import (
|
35 | 35 | apply_generative_logprob_metric,
|
36 | 36 | apply_generative_metric,
|
37 |
| - apply_generative_multi_turn_metric, |
| 37 | + apply_llm_as_judge_metric, |
38 | 38 | apply_multichoice_metric,
|
39 | 39 | apply_multichoice_metric_one_token,
|
40 | 40 | apply_perplexity_metric,
|
@@ -412,8 +412,10 @@ def get_request_type(self) -> list[RequestType]:
|
412 | 412 | request_types.append(RequestType.LOGLIKELIHOOD_ROLLING)
|
413 | 413 | if self.has_metric_category[MetricCategory.GENERATIVE]:
|
414 | 414 | request_types.append(RequestType.GREEDY_UNTIL)
|
415 |
| - if self.has_metric_category[MetricCategory.GENERATIVE_MULTI_TURN]: |
| 415 | + if self.has_metric_category[MetricCategory.LLM_AS_JUDGE_MULTI_TURN]: |
416 | 416 | request_types.append(RequestType.GREEDY_UNTIL_MULTI_TURN)
|
| 417 | + if self.has_metric_category[MetricCategory.LLM_AS_JUDGE]: |
| 418 | + request_types.append(RequestType.GREEDY_UNTIL) |
417 | 419 | if self.has_metric_category[MetricCategory.GENERATIVE_LOGPROB]:
|
418 | 420 | request_types.append(RequestType.GREEDY_UNTIL_WITH_LOGITS)
|
419 | 421 | if self.has_metric_category[MetricCategory.MULTICHOICE]:
|
@@ -504,7 +506,7 @@ def construct_requests(
|
504 | 506 | choices=formatted_doc.choices,
|
505 | 507 | )
|
506 | 508 | ]
|
507 |
| - if self.has_metric_category[MetricCategory.GENERATIVE_MULTI_TURN]: |
| 509 | + if self.has_metric_category[MetricCategory.LLM_AS_JUDGE_MULTI_TURN]: |
508 | 510 | requests[RequestType.GREEDY_UNTIL_MULTI_TURN] += [
|
509 | 511 | GreedyUntilMultiTurnRequest(
|
510 | 512 | task_name=current_task_name,
|
@@ -561,8 +563,11 @@ def process_results(self, formatted_doc: Doc, results: list[ModelReturn]) -> dic
|
561 | 563 | results=results, formatted_doc=formatted_doc, metrics=self.metrics
|
562 | 564 | )
|
563 | 565 | outputs.update(cur_outputs)
|
564 |
| - if self.has_metric_category[MetricCategory.GENERATIVE_MULTI_TURN]: |
565 |
| - results, cur_outputs = apply_generative_multi_turn_metric( |
| 566 | + if ( |
| 567 | + self.has_metric_category[MetricCategory.LLM_AS_JUDGE_MULTI_TURN] |
| 568 | + or self.has_metric_category[MetricCategory.LLM_AS_JUDGE] |
| 569 | + ): |
| 570 | + results, cur_outputs = apply_llm_as_judge_metric( |
566 | 571 | results=results, formatted_doc=formatted_doc, metrics=self.metrics
|
567 | 572 | )
|
568 | 573 | outputs.update(cur_outputs)
|
|
0 commit comments