Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/lighteval/main_vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ def vllm(
system_prompt: Annotated[
Optional[str], Option(help="Use system prompt for evaluation.", rich_help_panel=HELP_PANEL_NAME_4)
] = None,
cot_prompt: Annotated[
Optional[str], Option(help="Use chain of thought prompt for evaluation.", rich_help_panel=HELP_PANEL_NAME_4)
] = None,
dataset_loading_processes: Annotated[
int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANEL_NAME_1)
] = 1,
Expand Down Expand Up @@ -128,6 +131,7 @@ def vllm(
max_samples=max_samples,
use_chat_template=use_chat_template,
system_prompt=system_prompt,
cot_prompt=cot_prompt,
load_responses_from_details_date_id=load_responses_from_details_date_id,
)

Expand Down
2 changes: 2 additions & 0 deletions src/lighteval/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ class PipelineParameters:
max_samples: int | None = None
use_chat_template: bool = False
system_prompt: str | None = None
cot_prompt: str | None = None
load_responses_from_details_date_id: str | None = None

def __post_init__(self): # noqa C901
Expand Down Expand Up @@ -236,6 +237,7 @@ def _init_tasks_and_requests(self, tasks: str):
evaluation_tracker=self.evaluation_tracker,
use_chat_template=self.pipeline_parameters.use_chat_template,
system_prompt=self.pipeline_parameters.system_prompt,
cot_prompt=self.pipeline_parameters.cot_prompt,
)

self.task_names_list = task_names_list
Expand Down
4 changes: 4 additions & 0 deletions src/lighteval/tasks/lighteval_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,7 @@ def create_requests_from_tasks( # noqa: C901
evaluation_tracker: "EvaluationTracker",
use_chat_template: bool,
system_prompt: str | None,
cot_prompt: str | None,
) -> Tuple[dict[RequestType, list[Request]], dict[SampleUid, Doc]]:
"""
Takes a task dict and a fewshot dict and returns a dict of requests, a dict
Expand All @@ -599,6 +600,8 @@ def create_requests_from_tasks( # noqa: C901
max_samples (int): maximum number of samples.
evaluation_tracker (EvaluationTracker): evaluation tracker.
use_chat_template (bool): Whether to use the chat template.
system_prompt (str): System prompt
cot_prompt (str): Chain of thought prompt

Raises:
NotImplementedError: If the request type is not implemented for the
Expand Down Expand Up @@ -646,6 +649,7 @@ def create_requests_from_tasks( # noqa: C901
truncate_few_shots=truncate_few_shots,
use_chat_template=use_chat_template,
system_prompt=system_prompt,
cot_prompt=cot_prompt,
)

# Constructing the requests
Expand Down
12 changes: 10 additions & 2 deletions src/lighteval/tasks/prompt_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ def add_context_to_doc(
truncate_few_shots: bool = False,
use_chat_template=False,
system_prompt: str = None,
cot_prompt: str = None,
) -> Doc:
is_multi_turn = doc.specific is not None and len(doc.specific.get("multi_turn_queries", [])) > 0
if is_multi_turn:
Expand All @@ -121,6 +122,7 @@ def add_context_to_doc(
sampler=sampler,
use_chat_template=use_chat_template,
system_prompt=system_prompt,
cot_prompt=cot_prompt,
)
doc.num_effective_few_shots = num_effective_few_shots
doc.num_asked_few_shots = num_fewshot
Expand Down Expand Up @@ -175,6 +177,7 @@ def _single_turn_context(
truncate_few_shots: bool = False,
use_chat_template=False,
system_prompt: str = None,
cot_prompt: str = None,
):
"""Returns a fewshot context string that is made up of a prepended description
(if provided), the `num_fewshot` number of examples, and an appended prompt example.
Expand Down Expand Up @@ -206,6 +209,7 @@ def _single_turn_context(
fewshot_ex=fewshot_ex,
system_prompt=system_prompt,
use_chat_template=use_chat_template,
cot_prompt=cot_prompt,
)
if not use_chat_template:
toks = self.model.tok_encode(output)
Expand All @@ -228,6 +232,7 @@ def _single_turn_context(
fewshot_ex=fewshot_ex[:num_effective_fewshots],
system_prompt=system_prompt,
use_chat_template=use_chat_template,
cot_prompt=cot_prompt,
)
if not use_chat_template:
toks = self.model.tok_encode(output)
Expand All @@ -252,6 +257,7 @@ def get_examples(
fewshot_ex: list[str],
system_prompt: Union[str | None],
use_chat_template: bool,
cot_prompt: Union[str | None],
):
examples = []
# Few shot examples
Expand All @@ -263,10 +269,12 @@ def get_examples(
examples.append(self.doc_to_text(ex, return_instructions=False) + self.doc_to_target(ex))

# Actual example
content = example + cot_prompt if cot_prompt is not None else example

if use_chat_template:
examples.append({"role": "user", "content": example})
examples.append({"role": "user", "content": content})
else:
examples.append(example)
examples.append(content)

# System prompt and instruction
if use_chat_template:
Expand Down