Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/trufflehog.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,3 @@ jobs:
fetch-depth: 0
- name: Secret Scanning
uses: trufflesecurity/trufflehog@main

7 changes: 2 additions & 5 deletions community_tasks/arabic_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ def arabic_mmlu_pfn(line, task_name: str = None):
choices=valid_keys_arabic, # Return only valid choices (Arabic keys)
gold_index=answer_index, # Correct index in the valid Arabic keys
instruction=instruction,
target_for_fewshot_sorting=valid_keys_arabic[answer_index], # Correct answer in Arabic form
)


Expand Down Expand Up @@ -149,7 +148,6 @@ def arabic_mmlu_ht_pfn(line, task_name: str = None):
choices=[str(i) for i in range(1, len(choices) + 1)], # List of strings instead of ints
gold_index=answer_index,
instruction=instruction,
target_for_fewshot_sorting=str(answer_index), # Assuming it's sorted based on the number
)


Expand Down Expand Up @@ -328,7 +326,6 @@ def aratrust_pfn(line, task_name: str = None):
choices=LETTER_INDICES_AR[:3],
gold_index=answer_index,
instruction=instruction,
target_for_fewshot_sorting=LETTER_INDICES_AR[answer_index],
)


Expand Down Expand Up @@ -413,7 +410,8 @@ def arabic_exams_pfn(line, task_name: str = None):
def alghafa_pfn(line, task_name: str = None):
question = line["query"]
answer_index = int(line["label"])
choices = [line[key] for key in ["sol1", "sol2", "sol3", "sol4"]]
allowed_keys = [f"sol{i}" for i in range(1, 6)]
choices = [line[key] for key in allowed_keys if key in line]

instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n"
query = f"{instruction}السؤال: {question}\n"
Expand Down Expand Up @@ -802,7 +800,6 @@ def madinah_qa_pfn(line, task_name: str = None):
choices=choices,
gold_index=answer_index, # Correct index in the valid keys
instruction=instruction,
target_for_fewshot_sorting=valid_keys_latin[answer_index], # Correct answer in Latin form
)


Expand Down
1 change: 0 additions & 1 deletion docs/source/adding-a-new-metric.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,3 @@ if __name__ == "__main__":

You can then give your custom metric to lighteval by using `--custom-tasks
path_to_your_file` when launching it.

12 changes: 6 additions & 6 deletions docs/source/contributing-to-multilingual-evaluations.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ We welcome translations in your language!

To contribute, you'll need to
1. Open the [translation_literals](https://github.com/huggingface/lighteval/blob/main/src/lighteval/tasks/templates/utils/translation_literals.py) file
2. Edit the file to add or expand the literal for your language of interest.
2. Edit the file to add or expand the literal for your language of interest.

```python
Language.ENGLISH: TranslationLiterals(
Expand Down Expand Up @@ -42,7 +42,7 @@ To contribute, you'll need to

## Contributing a new multilingual task

You should first read our guide on [adding a custom task](adding-a-custom-task), to better understand the different parameters we use.
You should first read our guide on [adding a custom task](adding-a-custom-task), to better understand the different parameters we use.

Then, you should take a look at the current [multilingual tasks](https://github.com/huggingface/lighteval/blob/main/src/lighteval/tasks/multilingual/tasks.py) file, to understand how they are defined. For multilingual evaluations the `prompt_function` should be implemented by language-adapted template. The template will take care of correct formatting, correct and consistent usage of language adjusted prompt anchors (e.g Question/Answer) and punctuation.

Expand All @@ -58,7 +58,7 @@ your_tasks = [
LightevalTaskConfig(
# Name of your evaluation
name=f"evalname_{language.value}_{formulation.name.lower()}",
# The evaluation is community contributed
# The evaluation is community contributed
suite=["community"],
# This will automatically get the correct metrics for your chosen formulation
metric=get_metrics_for_formulation(
Expand All @@ -72,7 +72,7 @@ your_tasks = [
# In this function, you choose which template to follow and for which language and formulation
prompt_function=get_template_prompt_function(
language=language,
# then use the adapter to define the mapping between the
# then use the adapter to define the mapping between the
# keys of the template (left), and the keys of your dataset
# (right)
# To know which template keys are required and available,
Expand All @@ -83,9 +83,9 @@ your_tasks = [
},
formulation=formulation,
),
# You can also add specific filters to remove irrelevant samples
# You can also add specific filters to remove irrelevant samples
hf_filter=lambda line: line["label"] in <condition>,
# You then select your huggingface dataset as well as
# You then select your huggingface dataset as well as
# the splits available for evaluation
hf_repo=<dataset>,
hf_subset=<subset>,
Expand Down
2 changes: 1 addition & 1 deletion docs/source/using-the-python-api.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def main():
env_config=EnvConfig(cache_dir="tmp/"),
# Remove the 2 parameters below once your configuration is tested
override_batch_size=1,
max_samples=10
max_samples=10
)

model_config = VLLMModelConfig(
Expand Down
Loading