Jump to content

Module:Wikt-lang/data and Module:Wikt-lang/data/sandbox: Difference between pages

(Difference between pages)
Page 1
Page 2
Content deleted Content added
+ilo;
 
No edit summary
 
Line 18: Line 18:


--[[
--[[
This is a table of Wiktionary language codes with data belonging to them.
* "Wiktionary_name" is the "canonical name" used on Wiktionary. Should be set only if different from the name from Module:Lang.
This is a table of Wiktionary language codes with data belonging to them.
Name is the "canonical name" used on Wiktionary.
Article is the Wikipedia article.
Script is the ISO 15924 code.
]]
]]
local data = {
local data = {
["languages"] = {
["languages"] = {
["aaq"] = {
["aaq"] = {
["name"] = "Penobscot",
["Wiktionary_name"] = "Penobscot",
},
["ab"] = {
["name"] = "Abkhaz",
},
},
["abe"] = {
["abe"] = {
["name"] = "Abenaki",
["Wiktionary_name"] = "Abenaki",
},
},
["ang"] = {
["ajp"] = {
["name"] = "Old English",
["Wiktionary_name"] = "South Levantine Arabic",
["article"] = {"Old English"},
-- Remove macrons, acutes, and overdots
["replacements"] = {
decompose = true,
from = { "[" .. macron .. acute .. dot .. "]" },
},
},
["ar"] = {
["name"] = "Arabic",
["article"] = "Arabic language",
["direction"] = "rtl", -- Should be in the script data module.
["replacements"] = {
["replacements"] = {
-- ālif with wasla is replaced by ālif;
-- ālif with wasla is replaced by ālif;
Line 59: Line 42:
},
},
},
},
["ara"] = {
["ang"] = {
-- Remove macrons, acutes, and overdots
["name"] = "Arabic",
["article"] = "Arabic language",
["direction"] = "rtl", -- Should be in the script data module.
["replacements"] = {
["replacements"] = {
decompose = true,
-- ālif with wasla is replaced by ālif;
from = { "[" .. macron .. acute .. dot .. "]" },
[U(0x0671)] = U(0x0627),
-- taṭwīl, fatḥatan, ḍammatan, kasratan,
-- fatḥa, ḍamma, kasra,
-- shadda, sukūn, and superscript (dagger) ālif are removed.
["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
..U(0x064E)..U(0x064F)..U(0x0650)
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
},
},
},
},
["arb"] = {
["apc"] = {
["name"] = "Modern Standard Arabic",
["Wiktionary_name"] = "North Levantine Arabic",
["article"] = "Modern Standard Arabic",
["direction"] = "rtl", -- Should be in the script data module.
["replacements"] = {
["replacements"] = {
-- ālif with wasla is replaced by ālif;
-- ālif with wasla is replaced by ālif;
Line 89: Line 62:
},
},
},
},
["apc"] = {
["ar"] = {
["name"] = "North Levantine Arabic",
["article"] = "North Levantine Arabic",
["direction"] = "rtl", -- Should be in the script data module.
["replacements"] = {
["replacements"] = {
-- ālif with wasla is replaced by ālif;
-- ālif with wasla is replaced by ālif;
Line 104: Line 74:
},
},
},
},
["ajp"] = {
["arb"] = {
["name"] = "South Levantine Arabic",
["Wiktionary_name"] = "Modern Standard Arabic",
["article"] = "South Levantine Arabic",
["direction"] = "rtl", -- Should be in the script data module.
["replacements"] = {
["replacements"] = {
-- ālif with wasla is replaced by ālif;
-- ālif with wasla is replaced by ālif;
Line 120: Line 88:
},
},
["arz"] = {
["arz"] = {
["name"] = "Egyptian Arabic",
["article"] = "Egyptian Arabic",
["direction"] = "rtl", -- Should be in the script data module.
["replacements"] = {
["replacements"] = {
-- ālif with wasla is replaced by ālif;
-- ālif with wasla is replaced by ālif;
Line 133: Line 98:
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
},
},
},
["av"] = {
["name"] = "Avar"
},
},
["be"] = {
["be"] = {
["article"] = "Belarusian language",
["replacements"] = {
["replacements"] = { [acute] = "", },
[acute] = "",
},
},
},
["bn"] = {
["cel-x-bryproto"] = {
["name"] = "Bengali",
["Wiktionary_name"] = "Proto-Brythonic",
["article"] = "Bengali language",
},
["bua"] = {
["name"] = "Buryat",
},
["cel-pro"] = { -- Incorrect tag
["name"] = "Proto-Celtic",
["Wikipedia_code"] = "cel-x-proto",
},
["cel-x-proto"] = {
["name"] = "Proto-Celtic",
},
["cel-bry-pro"] = { -- Incorrect tag
["name"] = "Proto-Brythonic",
["article"] = "Common Brittonic",
["type"] = "reconstructed",
["type"] = "reconstructed",
},
},
["com"] = {
["name"] = "Comanche",
["article"] = "Comanche language",
},
["cu"] = {
["cu"] = {
["name"] = "Old Church Slavonic",
["Wiktionary_name"] = "Old Church Slavonic",
["article"] = "Old Church Slavonic",
},
["de"] = {
["name"] = "German",
["article"] = "German language",
},
["en"] = {
["name"] = "English",
["article"] = "English language",
},
["es"] = {
["name"] = "Spanish",
["article"] = "Spanish language",
},
},
["egy"] = {
["egy"] = {
["name"] = "Egyptian",
["Wiktionary_name"] = "Egyptian",
},
["evn"] = {
["name"] = "Evenki",
["article"] = "Evenki language",
},
["fr"] = {
["name"] = "French",
["article"] = "French language",
},
["frm"] = {
["name"] = "Middle French",
["article"] = "Middle French",
},
},
["frp"] = {
["frp"] = {
["name"] = "Franco-Provençal",
["Wiktionary_name"] = "Franco-Provençal",
},
["ff"] = {
["name"] = "Fula",
},
["gem-pro"] = { -- Incorrect tag
["name"] = "Proto-Germanic",
["article"] = "Proto-Germanic language",
["type"] = "reconstructed",
["replacements"] = {},
["Wikipedia_code"] = "gem-x-proto",
},
},
["gem-x-proto"] = {
["gem-x-proto"] = {
["name"] = "Proto-Germanic",
["article"] = "Proto-Germanic language",
["type"] = "reconstructed",
["type"] = "reconstructed",
["replacements"] = {},
},
["gml"] = {
["name"] = "Middle Low German",
},
["gmw-ecg"] = {
["name"] = "East Central German",
},
},
["gmw-x-proto"] = {
["gmw-x-proto"] = {
["name"] = "Proto-West Germanic",
["Wiktionary_name"] = "Proto-West Germanic",
["article"] = "Proto-West Germanic language",
["type"] = "reconstructed",
["type"] = "reconstructed",
["replacements"] = {},
},
["gmq-x-gut"] = {
["name"] = "Gutnish",
["article"] = "Gutnish",
},
},
["goh"] = {
["goh"] = {
Line 239: Line 133:
},
},
["got"] = {
["got"] = {
["name"] = "Gothic",
["article"] = "Gothic language",
["replacements"] = {
["replacements"] = {
-- Latin to Gothic since people will not want to have to copy
-- Latin to Gothic since people will not want to have to copy
Line 270: Line 162:
["[OoŌō]"] = "𐍉",
["[OoŌō]"] = "𐍉",
},
},
},
["gsw"] = {
["name"] = "Alemannic German",
},
},
["grc"] = {
["grc"] = {
["name"] = "Ancient Greek",
["article"] = "Ancient Greek",
["replacements"] = {
["replacements"] = {
decompose = true,
decompose = true,
Line 289: Line 176:
}
}
},
},
},
["grk-pro"] = { -- Incorrect tag
["name"] = "Proto-Hellenic",
["Wikipedia_name"] = "Proto-Greek",
["article"] = "Proto-Greek language",
["type"] = "reconstructed",
["replacements"] = {},
["Wikipedia_code"] = "grk-x-proto",
},
},
["grk-x-proto"] = {
["grk-x-proto"] = {
["name"] = "Proto-Hellenic",
["Wiktionary_name"] = "Proto-Hellenic",
["Wikipedia_name"] = "Proto-Greek",
["article"] = "Proto-Greek language",
["type"] = "reconstructed",
["type"] = "reconstructed",
["replacements"] = {},
["replacements"] = {},
},
["grt"] = {
["name"] = "Garo",
},
},
["ha"] = {
["ha"] = {
["name"] = "Hausa",
-- remove tilde, grave, acute, macron, circumflex
-- remove tilde, grave, acute, macron, circumflex
["replacements"] = {
["replacements"] = {
Line 316: Line 189:
},
},
},
},
["hi"] = {
["ine-x-bsproto"] = {
["name"] = "Hindi",
["Wiktionary_name"] = "Proto-Balto-Slavic",
["article"] = "Hindi",
},
["ilo"] = {
["name"] = "Ilocano",
["article"] = "Ilocano language",
},
["ine-bsl-pro"] = {
["name"] = "Proto-Balto-Slavic",
["article"] = "Proto-Balto-Slavic language",
["type"] = "reconstructed",
["type"] = "reconstructed",
},
["ine-pro"] = { -- Incorrect tag
["name"] = "Proto-Indo-European",
["article"] = "Proto-Indo-European language",
["type"] = "reconstructed",
["replacements"] = {},
["Wikipedia_code"] = "ine-x-proto",
},
},
["ine-x-proto"] = {
["ine-x-proto"] = {
["name"] = "Proto-Indo-European",
["article"] = "Proto-Indo-European language",
["type"] = "reconstructed",
["type"] = "reconstructed",
["replacements"] = {},
},
["ja"] = {
["name"] = "Japanese",
["article"] = "Japanese language",
},
},
["jbo"] = { -- Lojban
["jbo"] = { -- Lojban
["type"] = "appendix",
["type"] = "appendix",
},
["ket"] = {
["name"] = "Ket",
["article"] = "Ket language",
},
["ksk"] = {
["name"] = "Kansa",
["article"] = "Kansa language",
},
},
["la"] = {
["la"] = {
["name"] = "Latin",
["article"] = "Latin",
["replacements"] = {
["replacements"] = {
decompose = true,
decompose = true,
Line 366: Line 206:
},
},
["lt"] = {
["lt"] = {
["name"] = "Lithuanian",
-- remove acute, tilde, grave
-- remove acute, tilde, grave
["replacements"] = {
["replacements"] = {
Line 372: Line 211:
from = { "[" .. acute .. tilde .. grave .. "]" },
from = { "[" .. acute .. tilde .. grave .. "]" },
},
},
},
["mkh-mvi"] = {
["name"] = "Middle Vietnamese",
},
},
["moe"] = {
["moe"] = {
["name"] = "Cree",
["Wiktionary_name"] = "Cree",
},
},
["mul"] = {
["mul"] = {
["name"] = "Translingual",
["Wiktionary_name"] = "Translingual",
["article"] = "",
},
},
["nci"] = {
["nci"] = {
["name"] = "Classical Nahuatl",
["article"] = "Classical Nahuatl",
-- Remove macrons, acutes, circumflexes and graves
-- Remove macrons, acutes, circumflexes and graves
["replacements"] = {
["replacements"] = {
Line 395: Line 228:
},
},
["nds-de"] = {
["nds-de"] = {
["name"] = "German Low German",
["Wiktionary_name"] = "German Low German",
},
["non"] = {
["name"] = "Old Norse",
},
},
["non-x-proto"] = {
["non-x-proto"] = {
["name"] = "Proto-Norse",
["Wiktionary_name"] = "Proto-Norse",
},
["odt"] = {
["name"] = "Old Dutch",
},
["oge"] = {
["name"] = "Old Georgian",
},
["oj"] = {
["name"] = "Ojibwe",
},
},
["orv"] = {
["orv"] = {
["name"] = "Old East Slavic",
["article"] = "Old East Slavic",
["replacements"] = {
["replacements"] = {
[U(0x484)] = "",
[U(0x484)] = "",
},
},
},
},
["osx"] = {
["poz-x-polproto"] = { -- is this even in use?
["name"] = "Old Saxon",
["Wiktionary_name"] = "Proto-Nuclear Polynesian",
},
["pt"] = {
["name"] = "Portuguese",
["article"] = "Portuguese language",
-- ["scripts"] = { "Latn" },
},
["pa"] = {
["name"] = "Punjabi",
["article"] = "Punjabi language",
},
["pgl"] = {
["name"] = "Primitive Irish",
["article"] = "Primitive Irish",
},
["pis"] = {
["name"] = "Pijin",
["article"] = "Pijin language",
},
["poz-x-poly-proto"] = {
["name"] = "Proto-Nuclear Polynesian",
["article"] = "Proto-Polynesian language",
["type"] = "reconstructed",
["type"] = "reconstructed",
},
["rap"] = {
["name"] = "Rapa Nui",
["article"] = "Rapa Nui language",
},
},
["ru"] = {
["ru"] = {
["name"] = "Russian",
["replacements"] = {
["article"] = "Russian language",
[acute] = "",
},
["replacements"] = { [acute] = "", },
},
},
["rw"] = {
["rw"] = {
["name"] = "Rwanda-Rundi",
["Wiktionary_name"] = "Rwanda-Rundi",
},
},
["se"] = {
["se"] = {
Line 461: Line 255:
},
},
},
},
["sem-pro"] = {
["sem-x-proto"] = {
["name"] = "Proto-Semitic",
["article"] = "Proto-Semitic",
["type"] = "reconstructed",
["type"] = "reconstructed",
},
},
["sh"] = {
["sh"] = {
["article"] = "Serbo-Croatian language",
["replacements"] = {
["replacements"] = {
decompose = true,
decompose = true,
from = { "([AaEeIiOoUuRrАаЕеИиОоУуРр])[" .. double_grave
from = { "([AaEeIiOoUuRrАаЕеИиОоУуРр])[" .. double_grave
.. grave .. invbreve .. acute .. macron .. tilde .. "]" },
.. grave .. invbreve .. acute .. macron .. tilde .. "]" },
to = { "%1" },
to = {"%1"},
},
},
},
},
["sl"] = {
["sl"] = {
["name"] = "Slovene",
["replacements"] = {
["replacements"] = {
decompose = true,
decompose = true,
Line 484: Line 274:
},
},
},
},
["sla-pro"] = {
["sla-x-proto"] = {
["name"] = "Proto-Slavic", -- also Common Slavic
["type"] = "reconstructed",
["type"] = "reconstructed",
["replacements"] = {
["replacements"] = {
Line 508: Line 297:
},
},
["tts"] = {
["tts"] = {
["name"] = "Isan", -- also "Northeastern Thai"
["Wiktionary_name"] = "Isan", -- also "Northeastern Thai"
["article"] = "Isan language",
},
["tzo"] = {
["name"] = "Tzotzil",
["article"] = "Tzotzil language",
},
["ug"] = {
["name"] = "Uyghur", --also less commonly "Uighur"
["article"] = "Uyghur language",
},
},
["uk"] = {
["uk"] = {
["article"] = "Ukrainian language",
["replacements"] = {
["replacements"] = { [acute] = "", }
[acute] = "",
},
},
["ur"] = {
["name"] = "Urdu",
["article"] = "Urdu",
},
},
["xcl"] = {
["xcl"] = {
["name"] = "Old Armenian",
["article"] = "Classical Armenian",
["replacements"] = {
["replacements"] = {
["[՞՜՛՟]"] = "",
["[՞՜՛՟]"] = "",
Line 536: Line 311:
},
},
["xgf"] = {
["xgf"] = {
["name"] = "Tongva", -- not ISO name "Gabrielino-Fernandeño"
["article"] = "Tongva language",
["replacements"] = {
["replacements"] = {
["['`ʔ]"] = "ʼ",
["['`ʔ]"] = "ʼ",
Line 543: Line 316:
},
},
["xlu"] = {
["xlu"] = {
["name"] = "Luwian", -- not ISO name "Cuneiform Luwian"
["Wiktionary_name"] = "Luwian", -- not name "Cuneiform Luwian"
["article"] = "Cuneiform Luwian"
},
},
["xpq"] = {
["zle-x-ort"] = {
["name"] = "Mohegan-Pequot",
["Wiktionary_name"] = "Old Ruthenian",
["replacements"] = {
},
["xxt"] = {
[acute] = "",
},
["name"] = "Tambora",
["article"] = "Tambora language",
},
["xvn"] = {
["name"] = "Vandalic",
["article"] = "Vandalic language",
},
["yua"] = {
["name"] = "Yucatec Maya",
["article"] = "Yucatec Maya language",
},
["zh"] = {
["name"] = "Chinese",
["article"] = "Chinese language",
-- ["scripts"] = { "Hani" },
},
["zle-ort"] = {
["name"] = "Old Ruthenian",
["article"] = "Old Ruthenian",
["replacements"] = { [acute] = "", },
},
},
},

-- Here, keys (for example, "gem") are Wikipedia language codes used in
-- {{lang}}, and values (for example, "gem-pro") are the equivalent Wiktionary
-- code.
-- Subtags are not currently supported.
["redirects"] = {
["aae"] = "sq",
["aiq"] = "fa",
["aln"] = "sq",
["als"] = "sq",
["azb"] = "az",
["azj"] = "az",
["bgn"] = "bal",
["bs"] = "sh",
["bxr"] = "bua",
["ciw"] = "oj",
["cnr"] = "sh",
["fil"] = "tl",
["fuf"] = "ff",
["gem"] = "gem-pro", -- Not correct, but is commonly used.
["hak"] = "zh",
["hbo"] = "he",
["hr"] = "sh",
["ine"] = "ine-pro", -- Not correct, but might be commonly used.
["kjv"] = "sh",
["nan"] = "zh",
["prs"] = "fa",
["rn"] = "rw",
["sli"] = "gmw-ecg",
["sr"] = "sh",
["src"] = "sc",
["sro"] = "sc",
["tw"] = "ak",
["wae"] = "gsw",
["wep"] = "nds-de",
["yue"] = "zh",
["xno"] = "fro",
},
},
}
}