diff --git a/src/lighteval/tasks/templates/utils/translation_literals.py b/src/lighteval/tasks/templates/utils/translation_literals.py index c66922de3..7537c38f0 100644 --- a/src/lighteval/tasks/templates/utils/translation_literals.py +++ b/src/lighteval/tasks/templates/utils/translation_literals.py @@ -668,6 +668,29 @@ def __getattribute__(self, name: str) -> str: semicolon=";", ), Language.SERBOCROATIAN: TranslationLiterals(language=Language.SERBOCROATIAN), # Deprecated + Language.SHAN: TranslationLiterals( + language=Language.SHAN, + question_word="ၶေႃႈထၢမ်", + answer="ၶေႃႈတွပ်ႇ", + confirmation_word="ၸွင်ႇၸႂ်ႈ", + yes="ၸႂ်ႈ", + no="ဢမ်ႇ", + also="လႄႈ", + cause_word="ၵွပ်ႈပိူဝ်ႈ", + effect_word="ၵွပ်ႈၼၼ်", + true="တႄႉ", + false="ဢမ်ႇတႄႉ", + neither="ဢမ်ႇၸႂ်ႈတင်းသွင်ဢၼ်", + or_word="ဢမ်ႇၼၼ်", + full_stop=".", + comma=",", + question_mark="?", + exclamation_mark="!", + word_space="", + sentence_space=" ", + colon=":", + indices=["ၵ", "ၶ", "င", "ၸ", "သ", "ၺ"], + ), Language.SINDHI: TranslationLiterals(language=Language.SINDHI), Language.SINHALA: TranslationLiterals(language=Language.SINHALA), Language.SLOVAK: TranslationLiterals( diff --git a/src/lighteval/utils/language.py b/src/lighteval/utils/language.py index 6fb444933..1e9707a3e 100644 --- a/src/lighteval/utils/language.py +++ b/src/lighteval/utils/language.py @@ -120,6 +120,7 @@ class Language(Enum): SORANI = "ckb" CEBUANO = "ceb" WAR = "war" + SHAN = "shn" # This mapping was created for beleble, it converts iso_639_3 individual codes to iso_639_3 macro codes @@ -211,7 +212,7 @@ class Language(Enum): # 'lin': Language.LINGALA, # 'mri': Language.MAORI, "pan": Language.PUNJABI, - # 'shn': Language.SHAN, + "shn": Language.SHAN, "spa": Language.SPANISH, "fil": Language.TAGALOG, "uzn": Language.UZBEK,