Skip to content

Commit f8ebe64

Browse files
authored
add cot_prompt in vllm (#654)
1 parent dceb39d commit f8ebe64

File tree

4 files changed

+20
-2
lines changed

4 files changed

+20
-2
lines changed

src/lighteval/main_vllm.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ def vllm(
5252
system_prompt: Annotated[
5353
Optional[str], Option(help="Use system prompt for evaluation.", rich_help_panel=HELP_PANEL_NAME_4)
5454
] = None,
55+
cot_prompt: Annotated[
56+
Optional[str], Option(help="Use chain of thought prompt for evaluation.", rich_help_panel=HELP_PANEL_NAME_4)
57+
] = None,
5558
dataset_loading_processes: Annotated[
5659
int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANEL_NAME_1)
5760
] = 1,
@@ -128,6 +131,7 @@ def vllm(
128131
max_samples=max_samples,
129132
use_chat_template=use_chat_template,
130133
system_prompt=system_prompt,
134+
cot_prompt=cot_prompt,
131135
load_responses_from_details_date_id=load_responses_from_details_date_id,
132136
)
133137

src/lighteval/pipeline.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ class PipelineParameters:
107107
max_samples: int | None = None
108108
use_chat_template: bool = False
109109
system_prompt: str | None = None
110+
cot_prompt: str | None = None
110111
load_responses_from_details_date_id: str | None = None
111112

112113
def __post_init__(self): # noqa C901
@@ -236,6 +237,7 @@ def _init_tasks_and_requests(self, tasks: str):
236237
evaluation_tracker=self.evaluation_tracker,
237238
use_chat_template=self.pipeline_parameters.use_chat_template,
238239
system_prompt=self.pipeline_parameters.system_prompt,
240+
cot_prompt=self.pipeline_parameters.cot_prompt,
239241
)
240242

241243
self.task_names_list = task_names_list

src/lighteval/tasks/lighteval_task.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,7 @@ def create_requests_from_tasks( # noqa: C901
582582
evaluation_tracker: "EvaluationTracker",
583583
use_chat_template: bool,
584584
system_prompt: str | None,
585+
cot_prompt: str | None,
585586
) -> Tuple[dict[RequestType, list[Request]], dict[SampleUid, Doc]]:
586587
"""
587588
Takes a task dict and a fewshot dict and returns a dict of requests, a dict
@@ -599,6 +600,8 @@ def create_requests_from_tasks( # noqa: C901
599600
max_samples (int): maximum number of samples.
600601
evaluation_tracker (EvaluationTracker): evaluation tracker.
601602
use_chat_template (bool): Whether to use the chat template.
603+
system_prompt (str): System prompt
604+
cot_prompt (str): Chain of thought prompt
602605
603606
Raises:
604607
NotImplementedError: If the request type is not implemented for the
@@ -646,6 +649,7 @@ def create_requests_from_tasks( # noqa: C901
646649
truncate_few_shots=truncate_few_shots,
647650
use_chat_template=use_chat_template,
648651
system_prompt=system_prompt,
652+
cot_prompt=cot_prompt,
649653
)
650654

651655
# Constructing the requests

src/lighteval/tasks/prompt_manager.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ def add_context_to_doc(
107107
truncate_few_shots: bool = False,
108108
use_chat_template=False,
109109
system_prompt: str = None,
110+
cot_prompt: str = None,
110111
) -> Doc:
111112
is_multi_turn = doc.specific is not None and len(doc.specific.get("multi_turn_queries", [])) > 0
112113
if is_multi_turn:
@@ -121,6 +122,7 @@ def add_context_to_doc(
121122
sampler=sampler,
122123
use_chat_template=use_chat_template,
123124
system_prompt=system_prompt,
125+
cot_prompt=cot_prompt,
124126
)
125127
doc.num_effective_few_shots = num_effective_few_shots
126128
doc.num_asked_few_shots = num_fewshot
@@ -175,6 +177,7 @@ def _single_turn_context(
175177
truncate_few_shots: bool = False,
176178
use_chat_template=False,
177179
system_prompt: str = None,
180+
cot_prompt: str = None,
178181
):
179182
"""Returns a fewshot context string that is made up of a prepended description
180183
(if provided), the `num_fewshot` number of examples, and an appended prompt example.
@@ -206,6 +209,7 @@ def _single_turn_context(
206209
fewshot_ex=fewshot_ex,
207210
system_prompt=system_prompt,
208211
use_chat_template=use_chat_template,
212+
cot_prompt=cot_prompt,
209213
)
210214
if not use_chat_template:
211215
toks = self.model.tok_encode(output)
@@ -228,6 +232,7 @@ def _single_turn_context(
228232
fewshot_ex=fewshot_ex[:num_effective_fewshots],
229233
system_prompt=system_prompt,
230234
use_chat_template=use_chat_template,
235+
cot_prompt=cot_prompt,
231236
)
232237
if not use_chat_template:
233238
toks = self.model.tok_encode(output)
@@ -252,6 +257,7 @@ def get_examples(
252257
fewshot_ex: list[str],
253258
system_prompt: Union[str | None],
254259
use_chat_template: bool,
260+
cot_prompt: Union[str | None],
255261
):
256262
examples = []
257263
# Few shot examples
@@ -263,10 +269,12 @@ def get_examples(
263269
examples.append(self.doc_to_text(ex, return_instructions=False) + self.doc_to_target(ex))
264270

265271
# Actual example
272+
content = example + cot_prompt if cot_prompt is not None else example
273+
266274
if use_chat_template:
267-
examples.append({"role": "user", "content": example})
275+
examples.append({"role": "user", "content": content})
268276
else:
269-
examples.append(example)
277+
examples.append(content)
270278

271279
# System prompt and instruction
272280
if use_chat_template:

0 commit comments

Comments
 (0)