Module:Wikt-lang/data and Module:Wikt-lang/data/sandbox: Difference between pages
Appearance
(Difference between pages)
Content deleted Content added
+ilo; |
No edit summary |
||
Line 18: | Line 18: | ||
--[[ |
--[[ |
||
This is a table of Wiktionary language codes with data belonging to them. |
|||
* "Wiktionary_name" is the "canonical name" used on Wiktionary. Should be set only if different from the name from Module:Lang. |
|||
This is a table of Wiktionary language codes with data belonging to them. |
|||
Name is the "canonical name" used on Wiktionary. |
|||
Article is the Wikipedia article. |
|||
Script is the ISO 15924 code. |
|||
]] |
]] |
||
local data = { |
local data = { |
||
["languages"] = { |
["languages"] = { |
||
["aaq"] = { |
["aaq"] = { |
||
[" |
["Wiktionary_name"] = "Penobscot", |
||
}, |
|||
["ab"] = { |
|||
["name"] = "Abkhaz", |
|||
}, |
}, |
||
["abe"] = { |
["abe"] = { |
||
[" |
["Wiktionary_name"] = "Abenaki", |
||
}, |
}, |
||
[" |
["ajp"] = { |
||
[" |
["Wiktionary_name"] = "South Levantine Arabic", |
||
["article"] = {"Old English"}, |
|||
-- Remove macrons, acutes, and overdots |
|||
["replacements"] = { |
|||
decompose = true, |
|||
from = { "[" .. macron .. acute .. dot .. "]" }, |
|||
}, |
|||
}, |
|||
["ar"] = { |
|||
["name"] = "Arabic", |
|||
["article"] = "Arabic language", |
|||
["direction"] = "rtl", -- Should be in the script data module. |
|||
["replacements"] = { |
["replacements"] = { |
||
-- ālif with wasla is replaced by ālif; |
-- ālif with wasla is replaced by ālif; |
||
Line 59: | Line 42: | ||
}, |
}, |
||
}, |
}, |
||
[" |
["ang"] = { |
||
-- Remove macrons, acutes, and overdots |
|||
["name"] = "Arabic", |
|||
["article"] = "Arabic language", |
|||
["direction"] = "rtl", -- Should be in the script data module. |
|||
["replacements"] = { |
["replacements"] = { |
||
decompose = true, |
|||
-- ālif with wasla is replaced by ālif; |
|||
from = { "[" .. macron .. acute .. dot .. "]" }, |
|||
[U(0x0671)] = U(0x0627), |
|||
-- taṭwīl, fatḥatan, ḍammatan, kasratan, |
|||
-- fatḥa, ḍamma, kasra, |
|||
-- shadda, sukūn, and superscript (dagger) ālif are removed. |
|||
["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D) |
|||
..U(0x064E)..U(0x064F)..U(0x0650) |
|||
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "", |
|||
}, |
}, |
||
}, |
}, |
||
[" |
["apc"] = { |
||
[" |
["Wiktionary_name"] = "North Levantine Arabic", |
||
["article"] = "Modern Standard Arabic", |
|||
["direction"] = "rtl", -- Should be in the script data module. |
|||
["replacements"] = { |
["replacements"] = { |
||
-- ālif with wasla is replaced by ālif; |
-- ālif with wasla is replaced by ālif; |
||
Line 89: | Line 62: | ||
}, |
}, |
||
}, |
}, |
||
[" |
["ar"] = { |
||
["name"] = "North Levantine Arabic", |
|||
["article"] = "North Levantine Arabic", |
|||
["direction"] = "rtl", -- Should be in the script data module. |
|||
["replacements"] = { |
["replacements"] = { |
||
-- ālif with wasla is replaced by ālif; |
-- ālif with wasla is replaced by ālif; |
||
Line 104: | Line 74: | ||
}, |
}, |
||
}, |
}, |
||
[" |
["arb"] = { |
||
[" |
["Wiktionary_name"] = "Modern Standard Arabic", |
||
["article"] = "South Levantine Arabic", |
|||
["direction"] = "rtl", -- Should be in the script data module. |
|||
["replacements"] = { |
["replacements"] = { |
||
-- ālif with wasla is replaced by ālif; |
-- ālif with wasla is replaced by ālif; |
||
Line 120: | Line 88: | ||
}, |
}, |
||
["arz"] = { |
["arz"] = { |
||
["name"] = "Egyptian Arabic", |
|||
["article"] = "Egyptian Arabic", |
|||
["direction"] = "rtl", -- Should be in the script data module. |
|||
["replacements"] = { |
["replacements"] = { |
||
-- ālif with wasla is replaced by ālif; |
-- ālif with wasla is replaced by ālif; |
||
Line 133: | Line 98: | ||
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "", |
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "", |
||
}, |
}, |
||
}, |
|||
["av"] = { |
|||
["name"] = "Avar" |
|||
}, |
}, |
||
["be"] = { |
["be"] = { |
||
[" |
["replacements"] = { |
||
[acute] = "", |
|||
}, |
|||
}, |
}, |
||
[" |
["cel-x-bryproto"] = { |
||
[" |
["Wiktionary_name"] = "Proto-Brythonic", |
||
["article"] = "Bengali language", |
|||
}, |
|||
["bua"] = { |
|||
["name"] = "Buryat", |
|||
}, |
|||
["cel-pro"] = { -- Incorrect tag |
|||
["name"] = "Proto-Celtic", |
|||
["Wikipedia_code"] = "cel-x-proto", |
|||
}, |
|||
["cel-x-proto"] = { |
|||
["name"] = "Proto-Celtic", |
|||
}, |
|||
["cel-bry-pro"] = { -- Incorrect tag |
|||
["name"] = "Proto-Brythonic", |
|||
["article"] = "Common Brittonic", |
|||
["type"] = "reconstructed", |
["type"] = "reconstructed", |
||
}, |
}, |
||
["com"] = { |
|||
["name"] = "Comanche", |
|||
["article"] = "Comanche language", |
|||
}, |
|||
["cu"] = { |
["cu"] = { |
||
[" |
["Wiktionary_name"] = "Old Church Slavonic", |
||
["article"] = "Old Church Slavonic", |
|||
}, |
|||
["de"] = { |
|||
["name"] = "German", |
|||
["article"] = "German language", |
|||
}, |
|||
["en"] = { |
|||
["name"] = "English", |
|||
["article"] = "English language", |
|||
}, |
|||
["es"] = { |
|||
["name"] = "Spanish", |
|||
["article"] = "Spanish language", |
|||
}, |
}, |
||
["egy"] = { |
["egy"] = { |
||
[" |
["Wiktionary_name"] = "Egyptian", |
||
}, |
|||
["evn"] = { |
|||
["name"] = "Evenki", |
|||
["article"] = "Evenki language", |
|||
}, |
|||
["fr"] = { |
|||
["name"] = "French", |
|||
["article"] = "French language", |
|||
}, |
|||
["frm"] = { |
|||
["name"] = "Middle French", |
|||
["article"] = "Middle French", |
|||
}, |
}, |
||
["frp"] = { |
["frp"] = { |
||
[" |
["Wiktionary_name"] = "Franco-Provençal", |
||
}, |
|||
["ff"] = { |
|||
["name"] = "Fula", |
|||
}, |
|||
["gem-pro"] = { -- Incorrect tag |
|||
["name"] = "Proto-Germanic", |
|||
["article"] = "Proto-Germanic language", |
|||
["type"] = "reconstructed", |
|||
["replacements"] = {}, |
|||
["Wikipedia_code"] = "gem-x-proto", |
|||
}, |
}, |
||
["gem-x-proto"] = { |
["gem-x-proto"] = { |
||
["name"] = "Proto-Germanic", |
|||
["article"] = "Proto-Germanic language", |
|||
["type"] = "reconstructed", |
["type"] = "reconstructed", |
||
["replacements"] = {}, |
|||
}, |
|||
["gml"] = { |
|||
["name"] = "Middle Low German", |
|||
}, |
|||
["gmw-ecg"] = { |
|||
["name"] = "East Central German", |
|||
}, |
}, |
||
["gmw-x-proto"] = { |
["gmw-x-proto"] = { |
||
[" |
["Wiktionary_name"] = "Proto-West Germanic", |
||
["article"] = "Proto-West Germanic language", |
|||
["type"] = "reconstructed", |
["type"] = "reconstructed", |
||
["replacements"] = {}, |
|||
}, |
|||
["gmq-x-gut"] = { |
|||
["name"] = "Gutnish", |
|||
["article"] = "Gutnish", |
|||
}, |
}, |
||
["goh"] = { |
["goh"] = { |
||
Line 239: | Line 133: | ||
}, |
}, |
||
["got"] = { |
["got"] = { |
||
["name"] = "Gothic", |
|||
["article"] = "Gothic language", |
|||
["replacements"] = { |
["replacements"] = { |
||
-- Latin to Gothic since people will not want to have to copy |
-- Latin to Gothic since people will not want to have to copy |
||
Line 270: | Line 162: | ||
["[OoŌō]"] = "𐍉", |
["[OoŌō]"] = "𐍉", |
||
}, |
}, |
||
}, |
|||
["gsw"] = { |
|||
["name"] = "Alemannic German", |
|||
}, |
}, |
||
["grc"] = { |
["grc"] = { |
||
["name"] = "Ancient Greek", |
|||
["article"] = "Ancient Greek", |
|||
["replacements"] = { |
["replacements"] = { |
||
decompose = true, |
decompose = true, |
||
Line 289: | Line 176: | ||
} |
} |
||
}, |
}, |
||
}, |
|||
["grk-pro"] = { -- Incorrect tag |
|||
["name"] = "Proto-Hellenic", |
|||
["Wikipedia_name"] = "Proto-Greek", |
|||
["article"] = "Proto-Greek language", |
|||
["type"] = "reconstructed", |
|||
["replacements"] = {}, |
|||
["Wikipedia_code"] = "grk-x-proto", |
|||
}, |
}, |
||
["grk-x-proto"] = { |
["grk-x-proto"] = { |
||
[" |
["Wiktionary_name"] = "Proto-Hellenic", |
||
["Wikipedia_name"] = "Proto-Greek", |
|||
["article"] = "Proto-Greek language", |
|||
["type"] = "reconstructed", |
["type"] = "reconstructed", |
||
["replacements"] = {}, |
["replacements"] = {}, |
||
}, |
|||
["grt"] = { |
|||
["name"] = "Garo", |
|||
}, |
}, |
||
["ha"] = { |
["ha"] = { |
||
["name"] = "Hausa", |
|||
-- remove tilde, grave, acute, macron, circumflex |
-- remove tilde, grave, acute, macron, circumflex |
||
["replacements"] = { |
["replacements"] = { |
||
Line 316: | Line 189: | ||
}, |
}, |
||
}, |
}, |
||
[" |
["ine-x-bsproto"] = { |
||
[" |
["Wiktionary_name"] = "Proto-Balto-Slavic", |
||
["article"] = "Hindi", |
|||
}, |
|||
["ilo"] = { |
|||
["name"] = "Ilocano", |
|||
["article"] = "Ilocano language", |
|||
}, |
|||
["ine-bsl-pro"] = { |
|||
["name"] = "Proto-Balto-Slavic", |
|||
["article"] = "Proto-Balto-Slavic language", |
|||
["type"] = "reconstructed", |
["type"] = "reconstructed", |
||
}, |
|||
["ine-pro"] = { -- Incorrect tag |
|||
["name"] = "Proto-Indo-European", |
|||
["article"] = "Proto-Indo-European language", |
|||
["type"] = "reconstructed", |
|||
["replacements"] = {}, |
|||
["Wikipedia_code"] = "ine-x-proto", |
|||
}, |
}, |
||
["ine-x-proto"] = { |
["ine-x-proto"] = { |
||
["name"] = "Proto-Indo-European", |
|||
["article"] = "Proto-Indo-European language", |
|||
["type"] = "reconstructed", |
["type"] = "reconstructed", |
||
["replacements"] = {}, |
|||
}, |
|||
["ja"] = { |
|||
["name"] = "Japanese", |
|||
["article"] = "Japanese language", |
|||
}, |
}, |
||
["jbo"] = { -- Lojban |
["jbo"] = { -- Lojban |
||
["type"] = "appendix", |
["type"] = "appendix", |
||
}, |
|||
["ket"] = { |
|||
["name"] = "Ket", |
|||
["article"] = "Ket language", |
|||
}, |
|||
["ksk"] = { |
|||
["name"] = "Kansa", |
|||
["article"] = "Kansa language", |
|||
}, |
}, |
||
["la"] = { |
["la"] = { |
||
["name"] = "Latin", |
|||
["article"] = "Latin", |
|||
["replacements"] = { |
["replacements"] = { |
||
decompose = true, |
decompose = true, |
||
Line 366: | Line 206: | ||
}, |
}, |
||
["lt"] = { |
["lt"] = { |
||
["name"] = "Lithuanian", |
|||
-- remove acute, tilde, grave |
-- remove acute, tilde, grave |
||
["replacements"] = { |
["replacements"] = { |
||
Line 372: | Line 211: | ||
from = { "[" .. acute .. tilde .. grave .. "]" }, |
from = { "[" .. acute .. tilde .. grave .. "]" }, |
||
}, |
}, |
||
}, |
|||
["mkh-mvi"] = { |
|||
["name"] = "Middle Vietnamese", |
|||
}, |
}, |
||
["moe"] = { |
["moe"] = { |
||
[" |
["Wiktionary_name"] = "Cree", |
||
}, |
}, |
||
["mul"] = { |
["mul"] = { |
||
[" |
["Wiktionary_name"] = "Translingual", |
||
["article"] = "", |
|||
}, |
}, |
||
["nci"] = { |
["nci"] = { |
||
["name"] = "Classical Nahuatl", |
|||
["article"] = "Classical Nahuatl", |
|||
-- Remove macrons, acutes, circumflexes and graves |
-- Remove macrons, acutes, circumflexes and graves |
||
["replacements"] = { |
["replacements"] = { |
||
Line 395: | Line 228: | ||
}, |
}, |
||
["nds-de"] = { |
["nds-de"] = { |
||
[" |
["Wiktionary_name"] = "German Low German", |
||
}, |
|||
["non"] = { |
|||
["name"] = "Old Norse", |
|||
}, |
}, |
||
["non-x-proto"] = { |
["non-x-proto"] = { |
||
[" |
["Wiktionary_name"] = "Proto-Norse", |
||
}, |
|||
["odt"] = { |
|||
["name"] = "Old Dutch", |
|||
}, |
|||
["oge"] = { |
|||
["name"] = "Old Georgian", |
|||
}, |
|||
["oj"] = { |
|||
["name"] = "Ojibwe", |
|||
}, |
}, |
||
["orv"] = { |
["orv"] = { |
||
["name"] = "Old East Slavic", |
|||
["article"] = "Old East Slavic", |
|||
["replacements"] = { |
["replacements"] = { |
||
[U(0x484)] = "", |
[U(0x484)] = "", |
||
}, |
}, |
||
}, |
}, |
||
[" |
["poz-x-polproto"] = { -- is this even in use? |
||
[" |
["Wiktionary_name"] = "Proto-Nuclear Polynesian", |
||
}, |
|||
["pt"] = { |
|||
["name"] = "Portuguese", |
|||
["article"] = "Portuguese language", |
|||
-- ["scripts"] = { "Latn" }, |
|||
}, |
|||
["pa"] = { |
|||
["name"] = "Punjabi", |
|||
["article"] = "Punjabi language", |
|||
}, |
|||
["pgl"] = { |
|||
["name"] = "Primitive Irish", |
|||
["article"] = "Primitive Irish", |
|||
}, |
|||
["pis"] = { |
|||
["name"] = "Pijin", |
|||
["article"] = "Pijin language", |
|||
}, |
|||
["poz-x-poly-proto"] = { |
|||
["name"] = "Proto-Nuclear Polynesian", |
|||
["article"] = "Proto-Polynesian language", |
|||
["type"] = "reconstructed", |
["type"] = "reconstructed", |
||
}, |
|||
["rap"] = { |
|||
["name"] = "Rapa Nui", |
|||
["article"] = "Rapa Nui language", |
|||
}, |
}, |
||
["ru"] = { |
["ru"] = { |
||
[" |
["replacements"] = { |
||
[ |
[acute] = "", |
||
}, |
|||
["replacements"] = { [acute] = "", }, |
|||
}, |
}, |
||
["rw"] = { |
["rw"] = { |
||
[" |
["Wiktionary_name"] = "Rwanda-Rundi", |
||
}, |
}, |
||
["se"] = { |
["se"] = { |
||
Line 461: | Line 255: | ||
}, |
}, |
||
}, |
}, |
||
["sem- |
["sem-x-proto"] = { |
||
["name"] = "Proto-Semitic", |
|||
["article"] = "Proto-Semitic", |
|||
["type"] = "reconstructed", |
["type"] = "reconstructed", |
||
}, |
}, |
||
["sh"] = { |
["sh"] = { |
||
["article"] = "Serbo-Croatian language", |
|||
["replacements"] = { |
["replacements"] = { |
||
decompose = true, |
decompose = true, |
||
from = { "([AaEeIiOoUuRrАаЕеИиОоУуРр])[" .. double_grave |
from = { "([AaEeIiOoUuRrАаЕеИиОоУуРр])[" .. double_grave |
||
.. grave .. invbreve .. acute .. macron .. tilde .. "]" }, |
.. grave .. invbreve .. acute .. macron .. tilde .. "]" }, |
||
to = { |
to = {"%1"}, |
||
}, |
}, |
||
}, |
}, |
||
["sl"] = { |
["sl"] = { |
||
["name"] = "Slovene", |
|||
["replacements"] = { |
["replacements"] = { |
||
decompose = true, |
decompose = true, |
||
Line 484: | Line 274: | ||
}, |
}, |
||
}, |
}, |
||
["sla- |
["sla-x-proto"] = { |
||
["name"] = "Proto-Slavic", -- also Common Slavic |
|||
["type"] = "reconstructed", |
["type"] = "reconstructed", |
||
["replacements"] = { |
["replacements"] = { |
||
Line 508: | Line 297: | ||
}, |
}, |
||
["tts"] = { |
["tts"] = { |
||
[" |
["Wiktionary_name"] = "Isan", -- also "Northeastern Thai" |
||
["article"] = "Isan language", |
|||
}, |
|||
["tzo"] = { |
|||
["name"] = "Tzotzil", |
|||
["article"] = "Tzotzil language", |
|||
}, |
|||
["ug"] = { |
|||
["name"] = "Uyghur", --also less commonly "Uighur" |
|||
["article"] = "Uyghur language", |
|||
}, |
}, |
||
["uk"] = { |
["uk"] = { |
||
[" |
["replacements"] = { |
||
[acute] = "", |
|||
}, |
}, |
||
["ur"] = { |
|||
["name"] = "Urdu", |
|||
["article"] = "Urdu", |
|||
}, |
}, |
||
["xcl"] = { |
["xcl"] = { |
||
["name"] = "Old Armenian", |
|||
["article"] = "Classical Armenian", |
|||
["replacements"] = { |
["replacements"] = { |
||
["[՞՜՛՟]"] = "", |
["[՞՜՛՟]"] = "", |
||
Line 536: | Line 311: | ||
}, |
}, |
||
["xgf"] = { |
["xgf"] = { |
||
["name"] = "Tongva", -- not ISO name "Gabrielino-Fernandeño" |
|||
["article"] = "Tongva language", |
|||
["replacements"] = { |
["replacements"] = { |
||
["['`ʔ]"] = "ʼ", |
["['`ʔ]"] = "ʼ", |
||
Line 543: | Line 316: | ||
}, |
}, |
||
["xlu"] = { |
["xlu"] = { |
||
[" |
["Wiktionary_name"] = "Luwian", -- not name "Cuneiform Luwian" |
||
["article"] = "Cuneiform Luwian" |
|||
}, |
}, |
||
[" |
["zle-x-ort"] = { |
||
[" |
["Wiktionary_name"] = "Old Ruthenian", |
||
["replacements"] = { |
|||
}, |
|||
[ |
[acute] = "", |
||
}, |
|||
["name"] = "Tambora", |
|||
["article"] = "Tambora language", |
|||
}, |
|||
["xvn"] = { |
|||
["name"] = "Vandalic", |
|||
["article"] = "Vandalic language", |
|||
}, |
|||
["yua"] = { |
|||
["name"] = "Yucatec Maya", |
|||
["article"] = "Yucatec Maya language", |
|||
}, |
|||
["zh"] = { |
|||
["name"] = "Chinese", |
|||
["article"] = "Chinese language", |
|||
-- ["scripts"] = { "Hani" }, |
|||
}, |
|||
["zle-ort"] = { |
|||
["name"] = "Old Ruthenian", |
|||
["article"] = "Old Ruthenian", |
|||
["replacements"] = { [acute] = "", }, |
|||
}, |
}, |
||
}, |
|||
-- Here, keys (for example, "gem") are Wikipedia language codes used in |
|||
-- {{lang}}, and values (for example, "gem-pro") are the equivalent Wiktionary |
|||
-- code. |
|||
-- Subtags are not currently supported. |
|||
["redirects"] = { |
|||
["aae"] = "sq", |
|||
["aiq"] = "fa", |
|||
["aln"] = "sq", |
|||
["als"] = "sq", |
|||
["azb"] = "az", |
|||
["azj"] = "az", |
|||
["bgn"] = "bal", |
|||
["bs"] = "sh", |
|||
["bxr"] = "bua", |
|||
["ciw"] = "oj", |
|||
["cnr"] = "sh", |
|||
["fil"] = "tl", |
|||
["fuf"] = "ff", |
|||
["gem"] = "gem-pro", -- Not correct, but is commonly used. |
|||
["hak"] = "zh", |
|||
["hbo"] = "he", |
|||
["hr"] = "sh", |
|||
["ine"] = "ine-pro", -- Not correct, but might be commonly used. |
|||
["kjv"] = "sh", |
|||
["nan"] = "zh", |
|||
["prs"] = "fa", |
|||
["rn"] = "rw", |
|||
["sli"] = "gmw-ecg", |
|||
["sr"] = "sh", |
|||
["src"] = "sc", |
|||
["sro"] = "sc", |
|||
["tw"] = "ak", |
|||
["wae"] = "gsw", |
|||
["wep"] = "nds-de", |
|||
["yue"] = "zh", |
|||
["xno"] = "fro", |
|||
}, |
}, |
||
} |
} |