Fix custom arabic tasks (#440)

clefourrier · web-flow · commit 0135c2e6dc7a · 2024-12-12T13:07:54.000+01:00
* removed unused params

* fix issue with task function
diff --git a/community_tasks/_template.py b/community_tasks/_template.py
@@ -99,8 +99,6 @@ def __init__(
             suite=["community"],
             generation_size=-1,
             stop_sequence=None,
-            output_regex=None,
-            frozen=False,
         )
 
 
diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py
@@ -109,8 +109,6 @@ def __init__(
             suite=["community"],
             generation_size=-1,
             stop_sequence=None,
-            output_regex=None,
-            frozen=False,
             trust_dataset=True,
             version=0,
         )
@@ -174,8 +172,6 @@ def __init__(
             suite=["community"],
             generation_size=-1,
             stop_sequence=None,
-            output_regex=None,
-            frozen=False,
             trust_dataset=True,
             version=0,
         )
@@ -241,8 +237,6 @@ def __init__(
             suite=["community"],
             generation_size=-1,
             stop_sequence=None,
-            output_regex=None,
-            frozen=False,
             trust_dataset=True,
             version=0,
         )
@@ -299,8 +293,6 @@ def __init__(
             suite=["community"],
             generation_size=-1,
             stop_sequence=None,
-            output_regex=None,
-            frozen=False,
             trust_dataset=True,
             version=0,
         )
@@ -361,8 +353,6 @@ def __init__(
             suite=["community"],
             generation_size=-1,
             stop_sequence=[],
-            output_regex=None,
-            frozen=False,
             trust_dataset=True,
             version=0,
         )
@@ -423,9 +413,7 @@ def arabic_exams_pfn(line, task_name: str = None):
 def alghafa_pfn(line, task_name: str = None):
     question = line["query"]
     answer_index = int(line["label"])
-    # Dynamically determining the choices by excluding '__few_shots', 'query' and 'label'
-    choices_keys = [key for key in line.keys() if key not in ["query", "label", "__few_shots"]]
-    choices = [line[key] for key in choices_keys]
+    choices = [line[key] for key in ["sol1", "sol2", "sol3", "sol4"]]
 
     instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n"
     query = f"{instruction}السؤال: {question}\n"
@@ -461,8 +449,6 @@ def __init__(
             suite=["community"],
             generation_size=-1,
             stop_sequence=None,
-            output_regex=None,
-            frozen=False,
             trust_dataset=True,
             version=0,
         )
@@ -839,8 +825,6 @@ def __init__(
             suite=["community"],
             generation_size=-1,
             stop_sequence=None,
-            output_regex=None,
-            frozen=False,
             trust_dataset=True,
             version=0,
         )
diff --git a/docs/source/adding-a-custom-task.mdx b/docs/source/adding-a-custom-task.mdx
@@ -107,8 +107,6 @@ class CustomSubsetTask(LightevalTaskConfig):
             suite=["community"],
             generation_size=-1,
             stop_sequence=None,
-            output_regex=None,
-            frozen=False,
         )
 SUBSET_TASKS = [CustomSubsetTask(name=f"mytask:{subset}", hf_subset=subset) for subset in SAMPLE_SUBSETS]
 ```
@@ -154,13 +152,6 @@ Here is a list of the parameters and their meaning:
   for your generation
 - `metric` (list), the metrics you want to use for your evaluation (see next
   section for a detailed explanation)
-- `output_regex` (str), A regex string that will be used to filter your
-  generation. (Generative metrics will only select tokens that are between the
-  first and the second sequence matched by the regex. For example, for a regex
-  matching `\n` and a generation `\nModel generation output\nSome other text`
-  the metric will only be fed with `Model generation output`)
-- `frozen` (bool), for now, is set to False, but we will steadily pass all
-  stable tasks to True.
 - `trust_dataset` (bool), set to True if you trust the dataset.
 
 
diff --git a/docs/source/saving-and-reading-results.mdx b/docs/source/saving-and-reading-results.mdx
@@ -170,9 +170,7 @@ The detail file contains the following columns:
       "stop_sequence": [
         "Question="
       ],
-      "output_regex": null,
       "num_samples": null,
-      "frozen": false,
       "suite": [
         "lighteval"
       ],
diff --git a/examples/nanotron/custom_evaluation_tasks.py b/examples/nanotron/custom_evaluation_tasks.py
@@ -267,8 +267,6 @@ def __init__(
         generation_size=40,
         trust_dataset=True,
         stop_sequence=None,
-        output_regex=None,
-        frozen=False,
     ):
         super().__init__(
             name=name,
@@ -282,8 +280,6 @@ def __init__(
             few_shots_select=few_shots_select,
             suite=suite,
             generation_size=generation_size,
-            output_regex=output_regex,
-            frozen=frozen,
             trust_dataset=trust_dataset,
             stop_sequence=(stop_sequence if stop_sequence is not None else ["\n"]),
         )
@@ -370,8 +366,6 @@ def __init__(
         generation_size=-1,
         trust_dataset=True,
         stop_sequence=None,
-        output_regex=None,
-        frozen=False,
     ):
         super().__init__(
             name=name,
@@ -387,8 +381,6 @@ def __init__(
             generation_size=generation_size,
             trust_dataset=trust_dataset,
             stop_sequence=(stop_sequence if stop_sequence is not None else ["\n"]),
-            output_regex=output_regex,
-            frozen=frozen,
         )
 
 
@@ -487,8 +479,6 @@ def __init__(
         generation_size=4,
         trust_dataset=True,
         stop_sequence=None,
-        output_regex=None,
-        frozen=False,
     ):
         super().__init__(
             name=name,
@@ -504,8 +494,6 @@ def __init__(
             generation_size=generation_size,
             trust_dataset=trust_dataset,
             stop_sequence=(stop_sequence if stop_sequence is not None else ["\n"]),
-            output_regex=output_regex,
-            frozen=frozen,
         )
 
 
@@ -623,8 +611,6 @@ def __init__(
         generation_size=-1,
         trust_dataset=True,
         stop_sequence=None,
-        output_regex=None,
-        frozen=False,
     ):
         super().__init__(
             name=name,
@@ -640,8 +626,6 @@ def __init__(
             generation_size=generation_size,
             trust_dataset=trust_dataset,
             stop_sequence=(stop_sequence if stop_sequence is not None else ["\n"]),
-            output_regex=output_regex,
-            frozen=frozen,
         )
 
 
diff --git a/examples/nanotron/custom_task.py b/examples/nanotron/custom_task.py
@@ -82,8 +82,6 @@ def mmlu_anatomy(line):
         generation_size=5,
         metric=[Metrics.loglikelihood_acc_single_token],
         stop_sequence=["\n"],
-        output_regex=None,
-        frozen=False,
     ),
     LightevalTaskConfig(
         name="mmlu:anatomy_signs",
@@ -98,7 +96,5 @@ def mmlu_anatomy(line):
         generation_size=5,
         metric=[Metrics.loglikelihood_acc_single_token],
         stop_sequence=["\n"],
-        output_regex=None,
-        frozen=False,
     ),
 ]

Original file line number	Diff line number	Diff line change
`@@ -99,8 +99,6 @@ def __init__(`
`99`	`99`	`suite=["community"],`
`100`	`100`	`generation_size=-1,`
`101`	`101`	`stop_sequence=None,`
`102`		`- output_regex=None,`
`103`		`- frozen=False,`
`104`	`102`	`)`
`105`	`103`
`106`	`104`