Skip to content

Commit dd3e8fc

Browse files
alielfilali01clefourrierNathanHB
authored
Update arabic_evals.py: Fix custom arabic tasks [2nd attempt] (#444)
Fix alghafa prompt function by explicitly determining the list of choices based on task_name. (Not all subsets of AlGhafa Native share same columns) --------- Co-authored-by: Clémentine Fourrier <[email protected]> Co-authored-by: Nathan Habib <[email protected]>
1 parent d9f9b81 commit dd3e8fc

File tree

5 files changed

+9
-14
lines changed

5 files changed

+9
-14
lines changed

.github/workflows/trufflehog.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,3 @@ jobs:
1616
fetch-depth: 0
1717
- name: Secret Scanning
1818
uses: trufflesecurity/trufflehog@main
19-

community_tasks/arabic_evals.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ def arabic_mmlu_pfn(line, task_name: str = None):
8686
choices=valid_keys_arabic, # Return only valid choices (Arabic keys)
8787
gold_index=answer_index, # Correct index in the valid Arabic keys
8888
instruction=instruction,
89-
target_for_fewshot_sorting=valid_keys_arabic[answer_index], # Correct answer in Arabic form
9089
)
9190

9291

@@ -149,7 +148,6 @@ def arabic_mmlu_ht_pfn(line, task_name: str = None):
149148
choices=[str(i) for i in range(1, len(choices) + 1)], # List of strings instead of ints
150149
gold_index=answer_index,
151150
instruction=instruction,
152-
target_for_fewshot_sorting=str(answer_index), # Assuming it's sorted based on the number
153151
)
154152

155153

@@ -328,7 +326,6 @@ def aratrust_pfn(line, task_name: str = None):
328326
choices=LETTER_INDICES_AR[:3],
329327
gold_index=answer_index,
330328
instruction=instruction,
331-
target_for_fewshot_sorting=LETTER_INDICES_AR[answer_index],
332329
)
333330

334331

@@ -413,7 +410,8 @@ def arabic_exams_pfn(line, task_name: str = None):
413410
def alghafa_pfn(line, task_name: str = None):
414411
question = line["query"]
415412
answer_index = int(line["label"])
416-
choices = [line[key] for key in ["sol1", "sol2", "sol3", "sol4"]]
413+
allowed_keys = [f"sol{i}" for i in range(1, 6)]
414+
choices = [line[key] for key in allowed_keys if key in line]
417415

418416
instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n"
419417
query = f"{instruction}السؤال: {question}\n"
@@ -802,7 +800,6 @@ def madinah_qa_pfn(line, task_name: str = None):
802800
choices=choices,
803801
gold_index=answer_index, # Correct index in the valid keys
804802
instruction=instruction,
805-
target_for_fewshot_sorting=valid_keys_latin[answer_index], # Correct answer in Latin form
806803
)
807804

808805

docs/source/adding-a-new-metric.mdx

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,4 +92,3 @@ if __name__ == "__main__":
9292

9393
You can then give your custom metric to lighteval by using `--custom-tasks
9494
path_to_your_file` when launching it.
95-

docs/source/contributing-to-multilingual-evaluations.mdx

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ We welcome translations in your language!
88

99
To contribute, you'll need to
1010
1. Open the [translation_literals](https://github.com/huggingface/lighteval/blob/main/src/lighteval/tasks/templates/utils/translation_literals.py) file
11-
2. Edit the file to add or expand the literal for your language of interest.
11+
2. Edit the file to add or expand the literal for your language of interest.
1212

1313
```python
1414
Language.ENGLISH: TranslationLiterals(
@@ -42,7 +42,7 @@ To contribute, you'll need to
4242

4343
## Contributing a new multilingual task
4444

45-
You should first read our guide on [adding a custom task](adding-a-custom-task), to better understand the different parameters we use.
45+
You should first read our guide on [adding a custom task](adding-a-custom-task), to better understand the different parameters we use.
4646

4747
Then, you should take a look at the current [multilingual tasks](https://github.com/huggingface/lighteval/blob/main/src/lighteval/tasks/multilingual/tasks.py) file, to understand how they are defined. For multilingual evaluations the `prompt_function` should be implemented by language-adapted template. The template will take care of correct formatting, correct and consistent usage of language adjusted prompt anchors (e.g Question/Answer) and punctuation.
4848

@@ -58,7 +58,7 @@ your_tasks = [
5858
LightevalTaskConfig(
5959
# Name of your evaluation
6060
name=f"evalname_{language.value}_{formulation.name.lower()}",
61-
# The evaluation is community contributed
61+
# The evaluation is community contributed
6262
suite=["community"],
6363
# This will automatically get the correct metrics for your chosen formulation
6464
metric=get_metrics_for_formulation(
@@ -72,7 +72,7 @@ your_tasks = [
7272
# In this function, you choose which template to follow and for which language and formulation
7373
prompt_function=get_template_prompt_function(
7474
language=language,
75-
# then use the adapter to define the mapping between the
75+
# then use the adapter to define the mapping between the
7676
# keys of the template (left), and the keys of your dataset
7777
# (right)
7878
# To know which template keys are required and available,
@@ -83,9 +83,9 @@ your_tasks = [
8383
},
8484
formulation=formulation,
8585
),
86-
# You can also add specific filters to remove irrelevant samples
86+
# You can also add specific filters to remove irrelevant samples
8787
hf_filter=lambda line: line["label"] in <condition>,
88-
# You then select your huggingface dataset as well as
88+
# You then select your huggingface dataset as well as
8989
# the splits available for evaluation
9090
hf_repo=<dataset>,
9191
hf_subset=<subset>,

docs/source/using-the-python-api.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def main():
3535
env_config=EnvConfig(cache_dir="tmp/"),
3636
# Remove the 2 parameters below once your configuration is tested
3737
override_batch_size=1,
38-
max_samples=10
38+
max_samples=10
3939
)
4040

4141
model_config = VLLMModelConfig(

0 commit comments

Comments
 (0)