Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions src/lighteval/tasks/multilingual/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1707,6 +1707,89 @@
]
]

# Translated MMLU using both professional and non-professional translators. Contains tags for cultural sensitivity.
# CA: Cultural Agnostic
# CS: Cultural Specific
# ALL: All of the above
# https://huggingface.co/papers/2412.03304
global_mmlu_tasks = [
LightevalTaskConfig(
name=f"global_mmlu_{sensitivity_label.lower()}_{language.value}_{formulation.name.lower()}:{subset}",
prompt_function=get_mcq_prompt_function(
language,
lambda line: {
"question": line["question"],
"choices": [line["option_a"], line["option_b"], line["option_c"], line["option_d"]],
"gold_idx": LETTER_INDICES.index(line["answer"]),
},
formulation=formulation,
),
suite=("lighteval",),
hf_repo="CohereForAI/Global-MMLU",
hf_subset=standardize_tag(language.value),
evaluation_splits=("test",),
few_shots_split="dev",
hf_filter=partial(
lambda subset, sensitivity_label, x: x["subject"].lower() == subset
and (sensitivity_label == "ALL" or sensitivity_label in x["cultural_sensitivity_label"]),
subset,
sensitivity_label,
),
metric=get_metrics_for_formulation(
formulation,
[
loglikelihood_acc_metric(normalization=LogProbTokenNorm()),
loglikelihood_acc_metric(normalization=LogProbCharNorm()),
loglikelihood_acc_metric(normalization=LogProbPMINorm()),
],
),
)
for subset in MMLU_SUBSETS
for language in [
Language.AMHARIC,
Language.ARABIC,
Language.BENGALI,
Language.CHINESE,
Language.CZECH,
Language.GERMAN,
Language.ENGLISH,
Language.SPANISH,
Language.FRENCH,
Language.HEBREW,
Language.HINDI,
Language.INDONESIAN,
Language.ITALIAN,
Language.JAPANESE,
Language.KOREAN,
Language.MALAY,
Language.DUTCH,
Language.NORWEGIAN,
Language.POLISH,
Language.PORTUGUESE,
Language.ROMANIAN,
Language.RUSSIAN,
Language.SERBIAN,
Language.SWEDISH,
Language.SWAHILI,
Language.TAMIL,
Language.TELUGU,
Language.THAI,
Language.TURKISH,
Language.UKRAINIAN,
Language.URDU,
Language.VIETNAMESE,
Language.YORUBA,
Language.ZULU,
]
for formulation in [
MCFFormulation(),
CFFormulation(),
HybridFormulation(),
]
for sensitivity_label in ["ALL", "CA", "CS"]
]


# There are only these subsets in the African MMLU
AFRI_MMLU_SUBSETS = [
"elementary_mathematics",
Expand Down Expand Up @@ -2088,6 +2171,7 @@
*arabic_mmlu_tasks,
*turkish_mmlu_tasks,
*afri_mmlu_tasks,
*global_mmlu_tasks,
]
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1007,4 +1007,5 @@ def __getattribute__(self, name: str) -> str:
Language.WESTERN_FRISIAN: TranslationLiterals(language=Language.WESTERN_FRISIAN),
Language.YIDDISH: TranslationLiterals(language=Language.YIDDISH),
Language.YORUBA: TranslationLiterals(language=Language.YORUBA),
Language.ZULU: TranslationLiterals(language=Language.ZULU),
}
1 change: 1 addition & 0 deletions src/lighteval/utils/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ class Language(Enum):
WAR = "war"
SHAN = "shn"
UDMURT = "udm"
ZULU = "zul"


# This mapping was created for beleble, it converts iso_639_3 individual codes to iso_639_3 macro codes
Expand Down
Loading