|
|
nllb_language_codes: dict[str, str] = { |
|
|
"Acehnese (Arabic script)": "ace_Arab", |
|
|
"Acehnese (Latin script)": "ace_Latn", |
|
|
"Mesopotamian Arabic": "acm_Arab", |
|
|
"Ta’izzi-Adeni Arabic": "acq_Arab", |
|
|
"Tunisian Arabic": "aeb_Arab", |
|
|
"Afrikaans": "afr_Latn", |
|
|
"South Levantine Arabic": "ajp_Arab", |
|
|
"Akan": "aka_Latn", |
|
|
"Amharic": "amh_Ethi", |
|
|
"North Levantine Arabic": "apc_Arab", |
|
|
"Modern Standard Arabic": "arb_Arab", |
|
|
"Modern Standard Arabic (Romanized)": "arb_Latn", |
|
|
"Najdi Arabic": "ars_Arab", |
|
|
"Moroccan Arabic": "ary_Arab", |
|
|
"Egyptian Arabic": "arz_Arab", |
|
|
"Assamese": "asm_Beng", |
|
|
"Asturian": "ast_Latn", |
|
|
"Awadhi": "awa_Deva", |
|
|
"Central Aymara": "ayr_Latn", |
|
|
"South Azerbaijani": "azb_Arab", |
|
|
"North Azerbaijani": "azj_Latn", |
|
|
"Bashkir": "bak_Cyrl", |
|
|
"Bambara": "bam_Latn", |
|
|
"Balinese": "ban_Latn", |
|
|
"Belarusian": "bel_Cyrl", |
|
|
"Bemba": "bem_Latn", |
|
|
"Bengali": "ben_Beng", |
|
|
"Bhojpuri": "bho_Deva", |
|
|
"Banjar (Arabic script)": "bjn_Arab", |
|
|
"Banjar (Latin script)": "bjn_Latn", |
|
|
"Standard Tibetan": "bod_Tibt", |
|
|
"Bosnian": "bos_Latn", |
|
|
"Buginese": "bug_Latn", |
|
|
"Bulgarian": "bul_Cyrl", |
|
|
"Catalan": "cat_Latn", |
|
|
"Cebuano": "ceb_Latn", |
|
|
"Czech": "ces_Latn", |
|
|
"Chokwe": "cjk_Latn", |
|
|
"Central Kurdish": "ckb_Arab", |
|
|
"Crimean Tatar": "crh_Latn", |
|
|
"Welsh": "cym_Latn", |
|
|
"Danish": "dan_Latn", |
|
|
"German": "deu_Latn", |
|
|
"Southwestern Dinka": "dik_Latn", |
|
|
"Dyula": "dyu_Latn", |
|
|
"Dzongkha": "dzo_Tibt", |
|
|
"Greek": "ell_Grek", |
|
|
"English": "eng_Latn", |
|
|
"Esperanto": "epo_Latn", |
|
|
"Estonian": "est_Latn", |
|
|
"Basque": "eus_Latn", |
|
|
"Ewe": "ewe_Latn", |
|
|
"Faroese": "fao_Latn", |
|
|
"Fijian": "fij_Latn", |
|
|
"Finnish": "fin_Latn", |
|
|
"Fon": "fon_Latn", |
|
|
"French": "fra_Latn", |
|
|
"Friulian": "fur_Latn", |
|
|
"Nigerian Fulfulde": "fuv_Latn", |
|
|
"Scottish Gaelic": "gla_Latn", |
|
|
"Irish": "gle_Latn", |
|
|
"Galician": "glg_Latn", |
|
|
"Guarani": "grn_Latn", |
|
|
"Gujarati": "guj_Gujr", |
|
|
"Haitian Creole": "hat_Latn", |
|
|
"Hausa": "hau_Latn", |
|
|
"Hebrew": "heb_Hebr", |
|
|
"Hindi": "hin_Deva", |
|
|
"Chhattisgarhi": "hne_Deva", |
|
|
"Croatian": "hrv_Latn", |
|
|
"Hungarian": "hun_Latn", |
|
|
"Armenian": "hye_Armn", |
|
|
"Igbo": "ibo_Latn", |
|
|
"Ilocano": "ilo_Latn", |
|
|
"Indonesian": "ind_Latn", |
|
|
"Icelandic": "isl_Latn", |
|
|
"Italian": "ita_Latn", |
|
|
"Javanese": "jav_Latn", |
|
|
"Japanese": "jpn_Jpan", |
|
|
"Kabyle": "kab_Latn", |
|
|
"Jingpho": "kac_Latn", |
|
|
"Kamba": "kam_Latn", |
|
|
"Kannada": "kan_Knda", |
|
|
"Kashmiri (Arabic script)": "kas_Arab", |
|
|
"Kashmiri (Devanagari script)": "kas_Deva", |
|
|
"Georgian": "kat_Geor", |
|
|
"Central Kanuri (Arabic script)": "knc_Arab", |
|
|
"Central Kanuri (Latin script)": "knc_Latn", |
|
|
"Kazakh": "kaz_Cyrl", |
|
|
"Kabiyè": "kbp_Latn", |
|
|
"Kabuverdianu": "kea_Latn", |
|
|
"Khmer": "khm_Khmr", |
|
|
"Kikuyu": "kik_Latn", |
|
|
"Kinyarwanda": "kin_Latn", |
|
|
"Kyrgyz": "kir_Cyrl", |
|
|
"Kimbundu": "kmb_Latn", |
|
|
"Northern Kurdish": "kmr_Latn", |
|
|
"Kikongo": "kon_Latn", |
|
|
"Korean": "kor_Hang", |
|
|
"Lao": "lao_Laoo", |
|
|
"Ligurian": "lij_Latn", |
|
|
"Limburgish": "lim_Latn", |
|
|
"Lingala": "lin_Latn", |
|
|
"Lithuanian": "lit_Latn", |
|
|
"Lombard": "lmo_Latn", |
|
|
"Latgalian": "ltg_Latn", |
|
|
"Luxembourgish": "ltz_Latn", |
|
|
"Luba-Kasai": "lua_Latn", |
|
|
"Ganda": "lug_Latn", |
|
|
"Luo": "luo_Latn", |
|
|
"Mizo": "lus_Latn", |
|
|
"Standard Latvian": "lvs_Latn", |
|
|
"Magahi": "mag_Deva", |
|
|
"Maithili": "mai_Deva", |
|
|
"Malayalam": "mal_Mlym", |
|
|
"Marathi": "mar_Deva", |
|
|
"Minangkabau (Arabic script)": "min_Arab", |
|
|
"Minangkabau (Latin script)": "min_Latn", |
|
|
"Macedonian": "mkd_Cyrl", |
|
|
"Plateau Malagasy": "plt_Latn", |
|
|
"Maltese": "mlt_Latn", |
|
|
"Meitei (Bengali script)": "mni_Beng", |
|
|
"Halh Mongolian": "khk_Cyrl", |
|
|
"Mossi": "mos_Latn", |
|
|
"Maori": "mri_Latn", |
|
|
"Burmese": "mya_Mymr", |
|
|
"Dutch": "nld_Latn", |
|
|
"Norwegian Nynorsk": "nno_Latn", |
|
|
"Norwegian Bokmål": "nob_Latn", |
|
|
"Nepali": "npi_Deva", |
|
|
"Northern Sotho": "nso_Latn", |
|
|
"Nuer": "nus_Latn", |
|
|
"Nyanja": "nya_Latn", |
|
|
"Occitan": "oci_Latn", |
|
|
"West Central Oromo": "gaz_Latn", |
|
|
"Odia": "ory_Orya", |
|
|
"Pangasinan": "pag_Latn", |
|
|
"Eastern Panjabi": "pan_Guru", |
|
|
"Papiamento": "pap_Latn", |
|
|
"Western Persian": "pes_Arab", |
|
|
"Polish": "pol_Latn", |
|
|
"Portuguese": "por_Latn", |
|
|
"Dari": "prs_Arab", |
|
|
"Southern Pashto": "pbt_Arab", |
|
|
"Ayacucho Quechua": "quy_Latn", |
|
|
"Romanian": "ron_Latn", |
|
|
"Rundi": "run_Latn", |
|
|
"Russian": "rus_Cyrl", |
|
|
"Sango": "sag_Latn", |
|
|
"Sanskrit": "san_Deva", |
|
|
"Santali": "sat_Olck", |
|
|
"Sicilian": "scn_Latn", |
|
|
"Shan": "shn_Mymr", |
|
|
"Sinhala": "sin_Sinh", |
|
|
"Slovak": "slk_Latn", |
|
|
"Slovenian": "slv_Latn", |
|
|
"Samoan": "smo_Latn", |
|
|
"Shona": "sna_Latn", |
|
|
"Sindhi": "snd_Arab", |
|
|
"Somali": "som_Latn", |
|
|
"Southern Sotho": "sot_Latn", |
|
|
"Spanish": "spa_Latn", |
|
|
"Tosk Albanian": "als_Latn", |
|
|
"Sardinian": "srd_Latn", |
|
|
"Serbian": "srp_Cyrl", |
|
|
"Swati": "ssw_Latn", |
|
|
"Sundanese": "sun_Latn", |
|
|
"Swedish": "swe_Latn", |
|
|
"Swahili": "swh_Latn", |
|
|
"Silesian": "szl_Latn", |
|
|
"Tamil": "tam_Taml", |
|
|
"Tatar": "tat_Cyrl", |
|
|
"Telugu": "tel_Telu", |
|
|
"Tajik": "tgk_Cyrl", |
|
|
"Tagalog": "tgl_Latn", |
|
|
"Thai": "tha_Thai", |
|
|
"Tigrinya": "tir_Ethi", |
|
|
"Tamasheq (Latin script)": "taq_Latn", |
|
|
"Tamasheq (Tifinagh script)": "taq_Tfng", |
|
|
"Tok Pisin": "tpi_Latn", |
|
|
"Tswana": "tsn_Latn", |
|
|
"Tsonga": "tso_Latn", |
|
|
"Turkmen": "tuk_Latn", |
|
|
"Tumbuka": "tum_Latn", |
|
|
"Turkish": "tur_Latn", |
|
|
"Twi": "twi_Latn", |
|
|
"Central Atlas Tamazight": "tzm_Tfng", |
|
|
"Uyghur": "uig_Arab", |
|
|
"Ukrainian": "ukr_Cyrl", |
|
|
"Umbundu": "umb_Latn", |
|
|
"Urdu": "urd_Arab", |
|
|
"Northern Uzbek": "uzn_Latn", |
|
|
"Venetian": "vec_Latn", |
|
|
"Vietnamese": "vie_Latn", |
|
|
"Waray": "war_Latn", |
|
|
"Wolof": "wol_Latn", |
|
|
"Xhosa": "xho_Latn", |
|
|
"Eastern Yiddish": "ydd_Hebr", |
|
|
"Yoruba": "yor_Latn", |
|
|
"Yue Chinese": "yue_Hant", |
|
|
"Chinese (Simplified)": "zho_Hans", |
|
|
"Chinese (Traditional)": "zho_Hant", |
|
|
"Standard Malay": "zsm_Latn", |
|
|
"Zulu": "zul_Latn", |
|
|
} |
|
|
mbart_large_languages: dict[str, str] = { |
|
|
'Arabic': 'ar_AR', |
|
|
'Czech': 'cs_CZ', |
|
|
'German': 'de_DE', |
|
|
'English': 'en_XX', |
|
|
'Spanish': 'es_XX', |
|
|
'Estonian': 'et_EE', |
|
|
'Finnish': 'fi_FI', |
|
|
'French': 'fr_XX', |
|
|
'Gujarati': 'gu_IN', |
|
|
'Hindi': 'hi_IN', |
|
|
'Italian': 'it_IT', |
|
|
'Japanese': 'ja_XX', |
|
|
'Kazakh': 'kk_KZ', |
|
|
'Korean': 'ko_KR', |
|
|
'Lithuanian': 'lt_LT', |
|
|
'Latvian': 'lv_LV', |
|
|
'Burmese': 'my_MM', |
|
|
'Nepali': 'ne_NP', |
|
|
'Dutch': 'nl_XX', |
|
|
'Romanian': 'ro_RO', |
|
|
'Russian': 'ru_RU', |
|
|
'Sinhala': 'si_LK', |
|
|
'Turkish': 'tr_TR', |
|
|
'Vietnamese': 'vi_VN', |
|
|
'Chinese': 'zh_CN', |
|
|
'Afrikaans': 'af_ZA', |
|
|
'Azerbaijani': 'az_AZ', |
|
|
'Bengali': 'bn_IN', |
|
|
'Persian': 'fa_IR', |
|
|
'Hebrew': 'he_IL', |
|
|
'Croatian': 'hr_HR', |
|
|
'Indonesian': 'id_ID', |
|
|
'Georgian': 'ka_GE', |
|
|
'Khmer': 'km_KH', |
|
|
'Macedonian': 'mk_MK', |
|
|
'Malayalam': 'ml_IN', |
|
|
'Mongolian': 'mn_MN', |
|
|
'Marathi': 'mr_IN', |
|
|
'Polish': 'pl_PL', |
|
|
'Pashto': 'ps_AF', |
|
|
'Portuguese': 'pt_XX', |
|
|
'Swedish': 'sv_SE', |
|
|
'Swahili': 'sw_KE', |
|
|
'Tamil': 'ta_IN', |
|
|
'Telugu': 'te_IN', |
|
|
'Thai': 'th_TH', |
|
|
'Tagalog': 'tl_XX', |
|
|
'Ukrainian': 'uk_UA', |
|
|
'Urdu': 'ur_PK', |
|
|
'Xhosa': 'xh_ZA', |
|
|
'Galician': 'gl_ES', |
|
|
'Slovene': 'sl_SI' |
|
|
} |
|
|
|
|
|
iso_languages: dict[str, str] = { |
|
|
"Afrikaans": "af", |
|
|
"Albanian": "sq", |
|
|
"Amharic": "am", |
|
|
"Arabic": "ar", |
|
|
"Armenian": "hy", |
|
|
"Azerbaijani": "az", |
|
|
"Basque": "eu", |
|
|
"Belarusian": "be", |
|
|
"Bengali": "bn", |
|
|
"Bosnian": "bs", |
|
|
"Bulgarian": "bg", |
|
|
"Catalan": "ca", |
|
|
"Cebuano": "ceb", |
|
|
"Chinese (Simplified)": "zh-CN", |
|
|
"Chinese (Traditional)": "zh-TW", |
|
|
"Chinese": "zh", |
|
|
"Corsican": "co", |
|
|
"Croatian": "hr", |
|
|
"Czech": "cs", |
|
|
"Danish": "da", |
|
|
"Dutch": "nl", |
|
|
"English": "en", |
|
|
"Esperanto": "eo", |
|
|
"Estonian": "et", |
|
|
"Finnish": "fi", |
|
|
"French": "fr", |
|
|
"Galician": "gl", |
|
|
"Georgian": "ka", |
|
|
"German": "de", |
|
|
"Greek": "el", |
|
|
"Gujarati": "gu", |
|
|
"Haitian Creole": "ht", |
|
|
"Hausa": "ha", |
|
|
"Hawaiian": "haw", |
|
|
"Hebrew": "he", |
|
|
"Hindi": "hi", |
|
|
"Hungarian": "hu", |
|
|
"Icelandic": "is", |
|
|
"Igbo": "ig", |
|
|
"Indonesian": "id", |
|
|
"Irish": "ga", |
|
|
"Italian": "it", |
|
|
"Japanese": "ja", |
|
|
"Javanese": "jv", |
|
|
"Kannada": "kn", |
|
|
"Kazakh": "kk", |
|
|
"Khmer": "km", |
|
|
"Kinyarwanda": "rw", |
|
|
"Korean": "ko", |
|
|
"Kurdish (Kurmanji)": "ku", |
|
|
"Kyrgyz": "ky", |
|
|
"Lao": "lo", |
|
|
"Latin": "la", |
|
|
"Latvian": "lv", |
|
|
"Lithuanian": "lt", |
|
|
"Luxembourgish": "lb", |
|
|
"Macedonian": "mk", |
|
|
"Malagasy": "mg", |
|
|
"Malay": "ms", |
|
|
"Malayalam": "ml", |
|
|
"Maltese": "mt", |
|
|
"Maori": "mi", |
|
|
"Marathi": "mr", |
|
|
"Mongolian": "mn", |
|
|
"Myanmar (Burmese)": "my", |
|
|
"Nepali": "ne", |
|
|
"Norwegian": "no", |
|
|
"Nyanja (Chichewa)": "ny", |
|
|
"Odia (Oriya)": "or", |
|
|
"Pashto": "ps", |
|
|
"Persian": "fa", |
|
|
"Polish": "pl", |
|
|
"Portuguese": "pt", |
|
|
"Punjabi": "pa", |
|
|
"Romanian": "ro", |
|
|
"Russian": "ru", |
|
|
"Samoan": "sm", |
|
|
"Scots Gaelic": "gd", |
|
|
"Serbian": "sr", |
|
|
"Sesotho": "st", |
|
|
"Shona": "sn", |
|
|
"Sindhi": "sd", |
|
|
"Sinhala": "si", |
|
|
"Slovak": "sk", |
|
|
"Slovenian": "sl", |
|
|
"Somali": "so", |
|
|
"Spanish": "es", |
|
|
"Sundanese": "su", |
|
|
"Swahili": "sw", |
|
|
"Swedish": "sv", |
|
|
"Tagalog (Filipino)": "tl", |
|
|
"Tajik": "tg", |
|
|
"Tamil": "ta", |
|
|
"Tatar": "tt", |
|
|
"Telugu": "te", |
|
|
"Thai": "th", |
|
|
"Turkish": "tr", |
|
|
"Turkmen": "tk", |
|
|
"Ukrainian": "uk", |
|
|
"Urdu": "ur", |
|
|
"Uyghur": "ug", |
|
|
"Uzbek": "uz", |
|
|
"Vietnamese": "vi", |
|
|
"Welsh": "cy", |
|
|
"Xhosa": "xh", |
|
|
"Yiddish": "yi", |
|
|
"Yoruba": "yo", |
|
|
"Zulu": "zu" |
|
|
} |
|
|
|
|
|
|
|
|
iso_languages_byname: dict[str, tuple[str, str, str]] = { |
|
|
'Abkhazian': ('ab', 'abk', 'abk'), |
|
|
'Afar': ('aa', 'aar', 'aar'), |
|
|
'Afrikaans': ('af', 'afr', 'afr'), |
|
|
'Akan': ('ak', 'aka', 'aka'), |
|
|
'Albanian': ('sq', 'alb', 'sqi'), |
|
|
'Amharic': ('am', 'amh', 'amh'), |
|
|
'Arabic': ('ar', 'ara', 'ara'), |
|
|
'Aragonese': ('an', 'arg', 'arg'), |
|
|
'Armenian': ('hy', 'arm', 'hye'), |
|
|
'Assamese': ('as', 'asm', 'asm'), |
|
|
'Avaric': ('av', 'ava', 'ava'), |
|
|
'Avestan': ('ae', 'ave', 'ave'), |
|
|
'Aymara': ('ay', 'aym', 'aym'), |
|
|
'Azerbaijani': ('az', 'aze', 'aze'), |
|
|
'Bambara': ('bm', 'bam', 'bam'), |
|
|
'Bashkir': ('ba', 'bak', 'bak'), |
|
|
'Basque': ('eu', 'baq', 'eus'), |
|
|
'Belarusian': ('be', 'bel', 'bel'), |
|
|
'Bengali': ('bn', 'ben', 'ben'), |
|
|
'Bislama': ('bi', 'bis', 'bis'), |
|
|
'Bosnian': ('bs', 'bos', 'bos'), |
|
|
'Breton': ('br', 'bre', 'bre'), |
|
|
'Bulgarian': ('bg', 'bul', 'bul'), |
|
|
'Burmese': ('my', 'bur', 'mya'), |
|
|
'Catalan': ('ca', 'cat', 'cat'), |
|
|
'Chamorro': ('ch', 'cha', 'cha'), |
|
|
'Chechen': ('ce', 'che', 'che'), |
|
|
'Chichewa': ('ny', 'nya', 'nya'), |
|
|
'Chinese': ('zh', 'chi', 'zho'), |
|
|
'Church Slavic': ('cu', 'chu', 'chu'), |
|
|
'Chuvash': ('cv', 'chv', 'chv'), |
|
|
'Cornish': ('kw', 'cor', 'cor'), |
|
|
'Corsican': ('co', 'cos', 'cos'), |
|
|
'Cree': ('cr', 'cre', 'cre'), |
|
|
'Croatian': ('hr', 'hrv', 'hrv'), |
|
|
'Czech': ('cs', 'cze', 'ces'), |
|
|
'Danish': ('da', 'dan', 'dan'), |
|
|
'Divehi': ('dv', 'div', 'div'), |
|
|
'Dutch': ('nl', 'dut', 'nld'), |
|
|
'Dzongkha': ('dz', 'dzo', 'dzo'), |
|
|
'English': ('en', 'eng', 'eng'), |
|
|
'Esperanto': ('eo', 'epo', 'epo'), |
|
|
'Estonian': ('et', 'est', 'est'), |
|
|
'Ewe': ('ee', 'ewe', 'ewe'), |
|
|
'Faroese': ('fo', 'fao', 'fao'), |
|
|
'Fijian': ('fj', 'fij', 'fij'), |
|
|
'Finnish': ('fi', 'fin', 'fin'), |
|
|
'French': ('fr', 'fre', 'fra'), |
|
|
'Fulah': ('ff', 'ful', 'ful'), |
|
|
'Galician': ('gl', 'glg', 'glg'), |
|
|
'Ganda': ('lg', 'lug', 'lug'), |
|
|
'Georgian': ('ka', 'geo', 'kat'), |
|
|
'German': ('de', 'ger', 'deu'), |
|
|
'Greek': ('el', 'gre', 'ell'), |
|
|
'Guarani': ('gn', 'grn', 'grn'), |
|
|
'Gujarati': ('gu', 'guj', 'guj'), |
|
|
'Haitian': ('ht', 'hat', 'hat'), |
|
|
'Hausa': ('ha', 'hau', 'hau'), |
|
|
'Hebrew': ('he', 'heb', 'heb'), |
|
|
'Herero': ('hz', 'her', 'her'), |
|
|
'Hindi': ('hi', 'hin', 'hin'), |
|
|
'Hiri Motu': ('ho', 'hmo', 'hmo'), |
|
|
'Hungarian': ('hu', 'hun', 'hun'), |
|
|
'Icelandic': ('is', 'ice', 'isl'), |
|
|
'Ido': ('io', 'ido', 'ido'), |
|
|
'Igbo': ('ig', 'ibo', 'ibo'), |
|
|
'Indonesian': ('id', 'ind', 'ind'), |
|
|
'Interlingua': ('ia', 'ina', 'ina'), |
|
|
'Interlingue': ('ie', 'ile', 'ile'), |
|
|
'Inuktitut': ('iu', 'iku', 'iku'), |
|
|
'Inupiaq': ('ik', 'ipk', 'ipk'), |
|
|
'Irish': ('ga', 'gle', 'gle'), |
|
|
'Italian': ('it', 'ita', 'ita'), |
|
|
'Japanese': ('ja', 'jpn', 'jpn'), |
|
|
'Javanese': ('jv', 'jav', 'jav'), |
|
|
'Kalaallisut': ('kl', 'kal', 'kal'), |
|
|
'Kannada': ('kn', 'kan', 'kan'), |
|
|
'Kanuri': ('kr', 'kau', 'kau'), |
|
|
'Kashmiri': ('ks', 'kas', 'kas'), |
|
|
'Kazakh': ('kk', 'kaz', 'kaz'), |
|
|
'Khmer': ('km', 'khm', 'khm'), |
|
|
'Kikuyu': ('ki', 'kik', 'kik'), |
|
|
'Kinyarwanda': ('rw', 'kin', 'kin'), |
|
|
'Kirghiz': ('ky', 'kir', 'kir'), |
|
|
'Komi': ('kv', 'kom', 'kom'), |
|
|
'Kongo': ('kg', 'kon', 'kon'), |
|
|
'Korean': ('ko', 'kor', 'kor'), |
|
|
'Kuanyama': ('kj', 'kua', 'kua'), |
|
|
'Kurdish': ('ku', 'kur', 'kur'), |
|
|
'Lao': ('lo', 'lao', 'lao'), |
|
|
'Latin': ('la', 'lat', 'lat'), |
|
|
'Latvian': ('lv', 'lav', 'lav'), |
|
|
'Limburgan': ('li', 'lim', 'lim'), |
|
|
'Lingala': ('ln', 'lin', 'lin'), |
|
|
'Lithuanian': ('lt', 'lit', 'lit'), |
|
|
'Luba-Katanga': ('lu', 'lub', 'lub'), |
|
|
'Luxembourgish': ('lb', 'ltz', 'ltz'), |
|
|
'Macedonian': ('mk', 'mac', 'mkd'), |
|
|
'Malagasy': ('mg', 'mlg', 'mlg'), |
|
|
'Malay': ('ms', 'may', 'msa'), |
|
|
'Malayalam': ('ml', 'mal', 'mal'), |
|
|
'Maltese': ('mt', 'mlt', 'mlt'), |
|
|
'Manx': ('gv', 'glv', 'glv'), |
|
|
'Maori': ('mi', 'mao', 'mri'), |
|
|
'Marathi': ('mr', 'mar', 'mar'), |
|
|
'Marshallese': ('mh', 'mah', 'mah'), |
|
|
'Mongolian': ('mn', 'mon', 'mon'), |
|
|
'Nauru': ('na', 'nau', 'nau'), |
|
|
'Navajo': ('nv', 'nav', 'nav'), |
|
|
'Ndonga': ('ng', 'ndo', 'ndo'), |
|
|
'Nepali': ('ne', 'nep', 'nep'), |
|
|
'North Ndebele': ('nd', 'nde', 'nde'), |
|
|
'Northern Sami': ('se', 'sme', 'sme'), |
|
|
'Norwegian': ('no', 'nor', 'nor'), |
|
|
'Norwegian Bokmål': ('nb', 'nob', 'nob'), |
|
|
'Norwegian Nynorsk': ('nn', 'nno', 'nno'), |
|
|
'Occitan': ('oc', 'oci', 'oci'), |
|
|
'Ojibwa': ('oj', 'oji', 'oji'), |
|
|
'Oriya': ('or', 'ori', 'ori'), |
|
|
'Oromo': ('om', 'orm', 'orm'), |
|
|
'Ossetian': ('os', 'oss', 'oss'), |
|
|
'Pali': ('pi', 'pli', 'pli'), |
|
|
'Panjabi': ('pa', 'pan', 'pan'), |
|
|
'Persian': ('fa', 'per', 'fas'), |
|
|
'Polish': ('pl', 'pol', 'pol'), |
|
|
'Portuguese': ('pt', 'por', 'por'), |
|
|
'Pushto': ('ps', 'pus', 'pus'), |
|
|
'Quechua': ('qu', 'que', 'que'), |
|
|
'Romanian': ('ro', 'rum', 'ron'), |
|
|
'Romansh': ('rm', 'roh', 'roh'), |
|
|
'Rundi': ('rn', 'run', 'run'), |
|
|
'Russian': ('ru', 'rus', 'rus'), |
|
|
'Samoan': ('sm', 'smo', 'smo'), |
|
|
'Sango': ('sg', 'sag', 'sag'), |
|
|
'Sanskrit': ('sa', 'san', 'san'), |
|
|
'Sardinian': ('sc', 'srd', 'srd'), |
|
|
'Scottish Gaelic': ('gd', 'gla', 'gla'), |
|
|
'Serbian': ('sr', 'srp', 'srp'), |
|
|
'Shona': ('sn', 'sna', 'sna'), |
|
|
'Sichuan Yi': ('ii', 'iii', 'iii'), |
|
|
'Sindhi': ('sd', 'snd', 'snd'), |
|
|
'Sinhala': ('si', 'sin', 'sin'), |
|
|
'Slovak': ('sk', 'slo', 'slk'), |
|
|
'Slovenian': ('sl', 'slv', 'slv'), |
|
|
'Somali': ('so', 'som', 'som'), |
|
|
'South Ndebele': ('nr', 'nbl', 'nbl'), |
|
|
'Southern Sotho': ('st', 'sot', 'sot'), |
|
|
'Spanish': ('es', 'spa', 'spa'), |
|
|
'Sundanese': ('su', 'sun', 'sun'), |
|
|
'Swahili': ('sw', 'swa', 'swa'), |
|
|
'Swati': ('ss', 'ssw', 'ssw'), |
|
|
'Swedish': ('sv', 'swe', 'swe'), |
|
|
'Tagalog': ('tl', 'tgl', 'tgl'), |
|
|
'Tahitian': ('ty', 'tah', 'tah'), |
|
|
'Tajik': ('tg', 'tgk', 'tgk'), |
|
|
'Tamil': ('ta', 'tam', 'tam'), |
|
|
'Tatar': ('tt', 'tat', 'tat'), |
|
|
'Telugu': ('te', 'tel', 'tel'), |
|
|
'Thai': ('th', 'tha', 'tha'), |
|
|
'Tibetan': ('bo', 'tib', 'bod'), |
|
|
'Tigrinya': ('ti', 'tir', 'tir'), |
|
|
'Tonga': ('to', 'ton', 'ton'), |
|
|
'Tsonga': ('ts', 'tso', 'tso'), |
|
|
'Tswana': ('tn', 'tsn', 'tsn'), |
|
|
'Turkish': ('tr', 'tur', 'tur'), |
|
|
'Turkmen': ('tk', 'tuk', 'tuk'), |
|
|
'Twi': ('tw', 'twi', 'twi'), |
|
|
'Uighur': ('ug', 'uig', 'uig'), |
|
|
'Ukrainian': ('uk', 'ukr', 'ukr'), |
|
|
'Urdu': ('ur', 'urd', 'urd'), |
|
|
'Uzbek': ('uz', 'uzb', 'uzb'), |
|
|
'Venda': ('ve', 'ven', 'ven'), |
|
|
'Vietnamese': ('vi', 'vie', 'vie'), |
|
|
'Volapük': ('vo', 'vol', 'vol'), |
|
|
'Walloon': ('wa', 'wln', 'wln'), |
|
|
'Welsh': ('cy', 'wel', 'cym'), |
|
|
'Western Frisian': ('fy', 'fry', 'fry'), |
|
|
'Wolof': ('wo', 'wol', 'wol'), |
|
|
'Xhosa': ('xh', 'xho', 'xho'), |
|
|
'Yiddish': ('yi', 'yid', 'yid'), |
|
|
'Yoruba': ('yo', 'yor', 'yor'), |
|
|
'Zhuang': ('za', 'zha', 'zha'), |
|
|
'Zulu': ('zu', 'zul', 'zul') |
|
|
} |
|
|
|
|
|
|
|
|
iso_languages_byiso1: dict[str, tuple[str, str, str]] = { |
|
|
'ab': ('Abkhazian', 'abk', 'abk'), |
|
|
'aa': ('Afar', 'aar', 'aar'), |
|
|
'af': ('Afrikaans', 'afr', 'afr'), |
|
|
'ak': ('Akan', 'aka', 'aka'), |
|
|
'sq': ('Albanian', 'alb', 'sqi'), |
|
|
'am': ('Amharic', 'amh', 'amh'), |
|
|
'ar': ('Arabic', 'ara', 'ara'), |
|
|
'an': ('Aragonese', 'arg', 'arg'), |
|
|
'hy': ('Armenian', 'arm', 'hye'), |
|
|
'as': ('Assamese', 'asm', 'asm'), |
|
|
'av': ('Avaric', 'ava', 'ava'), |
|
|
'ae': ('Avestan', 'ave', 'ave'), |
|
|
'ay': ('Aymara', 'aym', 'aym'), |
|
|
'az': ('Azerbaijani', 'aze', 'aze'), |
|
|
'bm': ('Bambara', 'bam', 'bam'), |
|
|
'ba': ('Bashkir', 'bak', 'bak'), |
|
|
'eu': ('Basque', 'baq', 'eus'), |
|
|
'be': ('Belarusian', 'bel', 'bel'), |
|
|
'bn': ('Bengali', 'ben', 'ben'), |
|
|
'bi': ('Bislama', 'bis', 'bis'), |
|
|
'bs': ('Bosnian', 'bos', 'bos'), |
|
|
'br': ('Breton', 'bre', 'bre'), |
|
|
'bg': ('Bulgarian', 'bul', 'bul'), |
|
|
'my': ('Burmese', 'bur', 'mya'), |
|
|
'ca': ('Catalan', 'cat', 'cat'), |
|
|
'ch': ('Chamorro', 'cha', 'cha'), |
|
|
'ce': ('Chechen', 'che', 'che'), |
|
|
'ny': ('Chichewa', 'nya', 'nya'), |
|
|
'zh': ('Chinese', 'chi', 'zho'), |
|
|
'cu': ('Church Slavic', 'chu', 'chu'), |
|
|
'cv': ('Chuvash', 'chv', 'chv'), |
|
|
'kw': ('Cornish', 'cor', 'cor'), |
|
|
'co': ('Corsican', 'cos', 'cos'), |
|
|
'cr': ('Cree', 'cre', 'cre'), |
|
|
'hr': ('Croatian', 'hrv', 'hrv'), |
|
|
'cs': ('Czech', 'cze', 'ces'), |
|
|
'da': ('Danish', 'dan', 'dan'), |
|
|
'dv': ('Divehi', 'div', 'div'), |
|
|
'nl': ('Dutch', 'dut', 'nld'), |
|
|
'dz': ('Dzongkha', 'dzo', 'dzo'), |
|
|
'en': ('English', 'eng', 'eng'), |
|
|
'eo': ('Esperanto', 'epo', 'epo'), |
|
|
'et': ('Estonian', 'est', 'est'), |
|
|
'ee': ('Ewe', 'ewe', 'ewe'), |
|
|
'fo': ('Faroese', 'fao', 'fao'), |
|
|
'fj': ('Fijian', 'fij', 'fij'), |
|
|
'fi': ('Finnish', 'fin', 'fin'), |
|
|
'fr': ('French', 'fre', 'fra'), |
|
|
'ff': ('Fulah', 'ful', 'ful'), |
|
|
'gl': ('Galician', 'glg', 'glg'), |
|
|
'lg': ('Ganda', 'lug', 'lug'), |
|
|
'ka': ('Georgian', 'geo', 'kat'), |
|
|
'de': ('German', 'ger', 'deu'), |
|
|
'el': ('Greek', 'gre', 'ell'), |
|
|
'gn': ('Guarani', 'grn', 'grn'), |
|
|
'gu': ('Gujarati', 'guj', 'guj'), |
|
|
'ht': ('Haitian', 'hat', 'hat'), |
|
|
'ha': ('Hausa', 'hau', 'hau'), |
|
|
'he': ('Hebrew', 'heb', 'heb'), |
|
|
'hz': ('Herero', 'her', 'her'), |
|
|
'hi': ('Hindi', 'hin', 'hin'), |
|
|
'ho': ('Hiri Motu', 'hmo', 'hmo'), |
|
|
'hu': ('Hungarian', 'hun', 'hun'), |
|
|
'is': ('Icelandic', 'ice', 'isl'), |
|
|
'io': ('Ido', 'ido', 'ido'), |
|
|
'ig': ('Igbo', 'ibo', 'ibo'), |
|
|
'id': ('Indonesian', 'ind', 'ind'), |
|
|
'ia': ('Interlingua', 'ina', 'ina'), |
|
|
'ie': ('Interlingue', 'ile', 'ile'), |
|
|
'iu': ('Inuktitut', 'iku', 'iku'), |
|
|
'ik': ('Inupiaq', 'ipk', 'ipk'), |
|
|
'ga': ('Irish', 'gle', 'gle'), |
|
|
'it': ('Italian', 'ita', 'ita'), |
|
|
'ja': ('Japanese', 'jpn', 'jpn'), |
|
|
'jv': ('Javanese', 'jav', 'jav'), |
|
|
'kl': ('Kalaallisut', 'kal', 'kal'), |
|
|
'kn': ('Kannada', 'kan', 'kan'), |
|
|
'kr': ('Kanuri', 'kau', 'kau'), |
|
|
'ks': ('Kashmiri', 'kas', 'kas'), |
|
|
'kk': ('Kazakh', 'kaz', 'kaz'), |
|
|
'km': ('Khmer', 'khm', 'khm'), |
|
|
'ki': ('Kikuyu', 'kik', 'kik'), |
|
|
'rw': ('Kinyarwanda', 'kin', 'kin'), |
|
|
'ky': ('Kirghiz', 'kir', 'kir'), |
|
|
'kv': ('Komi', 'kom', 'kom'), |
|
|
'kg': ('Kongo', 'kon', 'kon'), |
|
|
'ko': ('Korean', 'kor', 'kor'), |
|
|
'kj': ('Kuanyama', 'kua', 'kua'), |
|
|
'ku': ('Kurdish', 'kur', 'kur'), |
|
|
'lo': ('Lao', 'lao', 'lao'), |
|
|
'la': ('Latin', 'lat', 'lat'), |
|
|
'lv': ('Latvian', 'lav', 'lav'), |
|
|
'li': ('Limburgan', 'lim', 'lim'), |
|
|
'ln': ('Lingala', 'lin', 'lin'), |
|
|
'lt': ('Lithuanian', 'lit', 'lit'), |
|
|
'lu': ('Luba-Katanga', 'lub', 'lub'), |
|
|
'lb': ('Luxembourgish', 'ltz', 'ltz'), |
|
|
'mk': ('Macedonian', 'mac', 'mkd'), |
|
|
'mg': ('Malagasy', 'mlg', 'mlg'), |
|
|
'ms': ('Malay', 'may', 'msa'), |
|
|
'ml': ('Malayalam', 'mal', 'mal'), |
|
|
'mt': ('Maltese', 'mlt', 'mlt'), |
|
|
'gv': ('Manx', 'glv', 'glv'), |
|
|
'mi': ('Maori', 'mao', 'mri'), |
|
|
'mr': ('Marathi', 'mar', 'mar'), |
|
|
'mh': ('Marshallese', 'mah', 'mah'), |
|
|
'mn': ('Mongolian', 'mon', 'mon'), |
|
|
'na': ('Nauru', 'nau', 'nau'), |
|
|
'nv': ('Navajo', 'nav', 'nav'), |
|
|
'ng': ('Ndonga', 'ndo', 'ndo'), |
|
|
'ne': ('Nepali', 'nep', 'nep'), |
|
|
'nd': ('North Ndebele', 'nde', 'nde'), |
|
|
'se': ('Northern Sami', 'sme', 'sme'), |
|
|
'no': ('Norwegian', 'nor', 'nor'), |
|
|
'nb': ('Norwegian Bokmål', 'nob', 'nob'), |
|
|
'nn': ('Norwegian Nynorsk', 'nno', 'nno'), |
|
|
'oc': ('Occitan', 'oci', 'oci'), |
|
|
'oj': ('Ojibwa', 'oji', 'oji'), |
|
|
'or': ('Oriya', 'ori', 'ori'), |
|
|
'om': ('Oromo', 'orm', 'orm'), |
|
|
'os': ('Ossetian', 'oss', 'oss'), |
|
|
'pi': ('Pali', 'pli', 'pli'), |
|
|
'pa': ('Panjabi', 'pan', 'pan'), |
|
|
'fa': ('Persian', 'per', 'fas'), |
|
|
'pl': ('Polish', 'pol', 'pol'), |
|
|
'pt': ('Portuguese', 'por', 'por'), |
|
|
'ps': ('Pushto', 'pus', 'pus'), |
|
|
'qu': ('Quechua', 'que', 'que'), |
|
|
'ro': ('Romanian', 'rum', 'ron'), |
|
|
'rm': ('Romansh', 'roh', 'roh'), |
|
|
'rn': ('Rundi', 'run', 'run'), |
|
|
'ru': ('Russian', 'rus', 'rus'), |
|
|
'sm': ('Samoan', 'smo', 'smo'), |
|
|
'sg': ('Sango', 'sag', 'sag'), |
|
|
'sa': ('Sanskrit', 'san', 'san'), |
|
|
'sc': ('Sardinian', 'srd', 'srd'), |
|
|
'gd': ('Scottish Gaelic', 'gla', 'gla'), |
|
|
'sr': ('Serbian', 'srp', 'srp'), |
|
|
'sn': ('Shona', 'sna', 'sna'), |
|
|
'ii': ('Sichuan Yi', 'iii', 'iii'), |
|
|
'sd': ('Sindhi', 'snd', 'snd'), |
|
|
'si': ('Sinhala', 'sin', 'sin'), |
|
|
'sk': ('Slovak', 'slo', 'slk'), |
|
|
'sl': ('Slovenian', 'slv', 'slv'), |
|
|
'so': ('Somali', 'som', 'som'), |
|
|
'nr': ('South Ndebele', 'nbl', 'nbl'), |
|
|
'st': ('Southern Sotho', 'sot', 'sot'), |
|
|
'es': ('Spanish', 'spa', 'spa'), |
|
|
'su': ('Sundanese', 'sun', 'sun'), |
|
|
'sw': ('Swahili', 'swa', 'swa'), |
|
|
'ss': ('Swati', 'ssw', 'ssw'), |
|
|
'sv': ('Swedish', 'swe', 'swe'), |
|
|
'tl': ('Tagalog', 'tgl', 'tgl'), |
|
|
'ty': ('Tahitian', 'tah', 'tah'), |
|
|
'tg': ('Tajik', 'tgk', 'tgk'), |
|
|
'ta': ('Tamil', 'tam', 'tam'), |
|
|
'tt': ('Tatar', 'tat', 'tat'), |
|
|
'te': ('Telugu', 'tel', 'tel'), |
|
|
'th': ('Thai', 'tha', 'tha'), |
|
|
'bo': ('Tibetan', 'tib', 'bod'), |
|
|
'ti': ('Tigrinya', 'tir', 'tir'), |
|
|
'to': ('Tonga', 'ton', 'ton'), |
|
|
'ts': ('Tsonga', 'tso', 'tso'), |
|
|
'tn': ('Tswana', 'tsn', 'tsn'), |
|
|
'tr': ('Turkish', 'tur', 'tur'), |
|
|
'tk': ('Turkmen', 'tuk', 'tuk'), |
|
|
'tw': ('Twi', 'twi', 'twi'), |
|
|
'ug': ('Uighur', 'uig', 'uig'), |
|
|
'uk': ('Ukrainian', 'ukr', 'ukr'), |
|
|
'ur': ('Urdu', 'urd', 'urd'), |
|
|
'uz': ('Uzbek', 'uzb', 'uzb'), |
|
|
've': ('Venda', 'ven', 'ven'), |
|
|
'vi': ('Vietnamese', 'vie', 'vie'), |
|
|
'vo': ('Volapük', 'vol', 'vol'), |
|
|
'wa': ('Walloon', 'wln', 'wln'), |
|
|
'cy': ('Welsh', 'wel', 'cym'), |
|
|
'fy': ('Western Frisian', 'fry', 'fry'), |
|
|
'wo': ('Wolof', 'wol', 'wol'), |
|
|
'xh': ('Xhosa', 'xho', 'xho'), |
|
|
'yi': ('Yiddish', 'yid', 'yid'), |
|
|
'yo': ('Yoruba', 'yor', 'yor'), |
|
|
'za': ('Zhuang', 'zha', 'zha'), |
|
|
'zu': ('Zulu', 'zul', 'zul') |
|
|
} |