Module:Wikt-lang/data/sandbox: Difference between revisions
Appearance
< Module:Wikt-lang | data
Content deleted Content added
No edit summary |
No edit summary |
||
Line 19: | Line 19: | ||
--[[ |
--[[ |
||
This is a table of Wiktionary language codes with data belonging to them. |
This is a table of Wiktionary language codes with data belonging to them. |
||
* " |
* "Wiktionary_name" is the "canonical name" used on Wiktionary. Should be set only if different from the name from Module:Lang. |
||
]] |
]] |
||
local data = { |
local data = { |
||
["languages"] = { |
["languages"] = { |
||
["aaq"] = { |
["aaq"] = { |
||
[" |
["wWiktionary_name"] = "Penobscot", |
||
}, |
}, |
||
["abe"] = { |
["abe"] = { |
||
[" |
["Wiktionary_name"] = "Abenaki", |
||
}, |
}, |
||
["ang"] = { |
["ang"] = { |
||
[" |
["Wiktionary_name"] = "Old English", |
||
-- Remove macrons, acutes, and overdots |
-- Remove macrons, acutes, and overdots |
||
["replacements"] = { |
["replacements"] = { |
||
Line 38: | Line 38: | ||
}, |
}, |
||
["ar"] = { |
["ar"] = { |
||
[" |
["Wiktionary_name"] = "Arabic", |
||
["replacements"] = { |
["replacements"] = { |
||
-- ālif with wasla is replaced by ālif; |
-- ālif with wasla is replaced by ālif; |
||
Line 51: | Line 51: | ||
}, |
}, |
||
["arb"] = { |
["arb"] = { |
||
[" |
["Wiktionary_name"] = "Modern Standard Arabic", |
||
["replacements"] = { |
["replacements"] = { |
||
-- ālif with wasla is replaced by ālif; |
-- ālif with wasla is replaced by ālif; |
||
Line 64: | Line 64: | ||
}, |
}, |
||
["apc"] = { |
["apc"] = { |
||
[" |
["Wiktionary_name"] = "North Levantine Arabic", |
||
["replacements"] = { |
["replacements"] = { |
||
-- ālif with wasla is replaced by ālif; |
-- ālif with wasla is replaced by ālif; |
||
Line 77: | Line 77: | ||
}, |
}, |
||
["ajp"] = { |
["ajp"] = { |
||
[" |
["Wiktionary_name"] = "South Levantine Arabic", |
||
["replacements"] = { |
["replacements"] = { |
||
-- ālif with wasla is replaced by ālif; |
-- ālif with wasla is replaced by ālif; |
||
Line 90: | Line 90: | ||
}, |
}, |
||
["arz"] = { |
["arz"] = { |
||
[" |
["Wiktionary_name"] = "Egyptian Arabic", |
||
["replacements"] = { |
["replacements"] = { |
||
-- ālif with wasla is replaced by ālif; |
-- ālif with wasla is replaced by ālif; |
||
Line 108: | Line 108: | ||
}, |
}, |
||
["cel-x-bryproto"] = { |
["cel-x-bryproto"] = { |
||
[" |
["Wiktionary_name"] = "Proto-Brythonic", |
||
["type"] = "reconstructed", |
["type"] = "reconstructed", |
||
}, |
}, |
||
["cu"] = { |
["cu"] = { |
||
[" |
["Wiktionary_name"] = "Old Church Slavonic", |
||
}, |
}, |
||
["egy"] = { |
["egy"] = { |
||
[" |
["Wiktionary_name"] = "Egyptian", |
||
}, |
}, |
||
["frp"] = { |
["frp"] = { |
||
[" |
["Wiktionary_name"] = "Franco-Provençal", |
||
}, |
}, |
||
["gem-x-proto"] = { |
["gem-x-proto"] = { |
||
[" |
["Wiktionary_name"] = "Proto-Germanic", |
||
["type"] = "reconstructed", |
["type"] = "reconstructed", |
||
}, |
}, |
||
["gmw-x-proto"] = { |
["gmw-x-proto"] = { |
||
[" |
["Wiktionary_name"] = "Proto-West Germanic", |
||
["type"] = "reconstructed", |
["type"] = "reconstructed", |
||
}, |
}, |
||
["gmq-x-gut"] = { |
["gmq-x-gut"] = { |
||
[" |
["Wiktionary_name"] = "Gutnish", |
||
}, |
}, |
||
["goh"] = { |
["goh"] = { |
||
Line 140: | Line 140: | ||
}, |
}, |
||
["got"] = { |
["got"] = { |
||
[" |
["Wiktionary_name"] = "Gothic", |
||
["replacements"] = { |
["replacements"] = { |
||
-- Latin to Gothic since people will not want to have to copy |
-- Latin to Gothic since people will not want to have to copy |
||
Line 172: | Line 172: | ||
}, |
}, |
||
["grc"] = { |
["grc"] = { |
||
[" |
["Wiktionary_name"] = "Ancient Greek", |
||
["replacements"] = { |
["replacements"] = { |
||
decompose = true, |
decompose = true, |
||
Line 187: | Line 187: | ||
}, |
}, |
||
["grk-x-proto"] = { |
["grk-x-proto"] = { |
||
[" |
["Wiktionary_name"] = "Proto-Hellenic", |
||
["type"] = "reconstructed", |
["type"] = "reconstructed", |
||
["replacements"] = {}, |
["replacements"] = {}, |
||
}, |
}, |
||
["ha"] = { |
["ha"] = { |
||
[" |
["Wiktionary_name"] = "Hausa", |
||
-- remove tilde, grave, acute, macron, circumflex |
-- remove tilde, grave, acute, macron, circumflex |
||
["replacements"] = { |
["replacements"] = { |
||
Line 200: | Line 200: | ||
}, |
}, |
||
["ine-x-bsproto"] = { |
["ine-x-bsproto"] = { |
||
[" |
["Wiktionary_name"] = "Proto-Balto-Slavic", |
||
["type"] = "reconstructed", |
["type"] = "reconstructed", |
||
}, |
}, |
||
["ine-x-proto"] = { |
["ine-x-proto"] = { |
||
[" |
["Wiktionary_name"] = "Proto-Indo-European", |
||
["type"] = "reconstructed", |
["type"] = "reconstructed", |
||
}, |
}, |
||
Line 211: | Line 211: | ||
}, |
}, |
||
["la"] = { |
["la"] = { |
||
[" |
["Wiktionary_name"] = "Latin", |
||
["replacements"] = { |
["replacements"] = { |
||
decompose = true, |
decompose = true, |
||
Line 218: | Line 218: | ||
}, |
}, |
||
["lt"] = { |
["lt"] = { |
||
[" |
["Wiktionary_name"] = "Lithuanian", |
||
-- remove acute, tilde, grave |
-- remove acute, tilde, grave |
||
["replacements"] = { |
["replacements"] = { |
||
Line 226: | Line 226: | ||
}, |
}, |
||
["moe"] = { |
["moe"] = { |
||
[" |
["Wiktionary_name"] = "Cree", |
||
}, |
}, |
||
["mul"] = { |
["mul"] = { |
||
[" |
["Wiktionary_name"] = "Translingual", |
||
}, |
}, |
||
["nci"] = { |
["nci"] = { |
||
[" |
["Wiktionary_name"] = "Classical Nahuatl", |
||
-- Remove macrons, acutes, circumflexes and graves |
-- Remove macrons, acutes, circumflexes and graves |
||
["replacements"] = { |
["replacements"] = { |
||
Line 242: | Line 242: | ||
}, |
}, |
||
["nds-de"] = { |
["nds-de"] = { |
||
[" |
["Wiktionary_name"] = "German Low German", |
||
}, |
}, |
||
["non-x-proto"] = { |
["non-x-proto"] = { |
||
[" |
["Wiktionary_name"] = "Proto-Norse", |
||
}, |
}, |
||
["orv"] = { |
["orv"] = { |
||
[" |
["Wiktionary_name"] = "Old East Slavic", |
||
["replacements"] = { |
["replacements"] = { |
||
[U(0x484)] = "", |
[U(0x484)] = "", |
||
Line 254: | Line 254: | ||
}, |
}, |
||
["poz-x-polproto"] = { -- is this even in use? |
["poz-x-polproto"] = { -- is this even in use? |
||
[" |
["Wiktionary_name"] = "Proto-Nuclear Polynesian", |
||
["type"] = "reconstructed", |
["type"] = "reconstructed", |
||
}, |
}, |
||
["ru"] = { |
["ru"] = { |
||
[" |
["Wiktionary_name"] = "Russian", |
||
["replacements"] = { |
["replacements"] = { |
||
[acute] = "", |
[acute] = "", |
||
Line 264: | Line 264: | ||
}, |
}, |
||
["rw"] = { |
["rw"] = { |
||
[" |
["Wiktionary_name"] = "Rwanda-Rundi", |
||
}, |
}, |
||
["se"] = { |
["se"] = { |
||
Line 272: | Line 272: | ||
}, |
}, |
||
["sem-x-proto"] = { |
["sem-x-proto"] = { |
||
[" |
["Wiktionary_name"] = "Proto-Semitic", |
||
["type"] = "reconstructed", |
["type"] = "reconstructed", |
||
}, |
}, |
||
Line 284: | Line 284: | ||
}, |
}, |
||
["sl"] = { |
["sl"] = { |
||
[" |
["Wiktionary_name"] = "Slovene", |
||
["replacements"] = { |
["replacements"] = { |
||
decompose = true, |
decompose = true, |
||
Line 293: | Line 293: | ||
}, |
}, |
||
["sla-x-proto"] = { |
["sla-x-proto"] = { |
||
[" |
["Wiktionary_name"] = "Proto-Slavic", -- also Common Slavic |
||
["type"] = "reconstructed", |
["type"] = "reconstructed", |
||
["replacements"] = { |
["replacements"] = { |
||
Line 316: | Line 316: | ||
}, |
}, |
||
["tts"] = { |
["tts"] = { |
||
[" |
["Wiktionary_name"] = "Isan", -- also "Northeastern Thai" |
||
}, |
}, |
||
["uk"] = { |
["uk"] = { |
||
Line 322: | Line 322: | ||
}, |
}, |
||
["xcl"] = { |
["xcl"] = { |
||
[" |
["Wiktionary_name"] = "Old Armenian", |
||
["replacements"] = { |
["replacements"] = { |
||
["[՞՜՛՟]"] = "", |
["[՞՜՛՟]"] = "", |
||
Line 329: | Line 329: | ||
}, |
}, |
||
["xgf"] = { |
["xgf"] = { |
||
[" |
["Wiktionary_name"] = "Tongva", -- not ISO Wiktionary_name "Gabrielino-Fernandeño" |
||
["replacements"] = { |
["replacements"] = { |
||
["['`ʔ]"] = "ʼ", |
["['`ʔ]"] = "ʼ", |
||
Line 335: | Line 335: | ||
}, |
}, |
||
["xlu"] = { |
["xlu"] = { |
||
[" |
["Wiktionary_name"] = "Luwian", -- not ISO Wiktionary_name "Cuneiform Luwian" |
||
}, |
}, |
||
["zle-x-ort"] = { |
["zle-x-ort"] = { |
||
[" |
["Wiktionary_name"] = "Old Ruthenian", |
||
["replacements"] = { |
["replacements"] = { |
||
[acute] = "", |
[acute] = "", |
Revision as of 11:49, 9 December 2024
![]() | This is the module sandbox page for Module:Wikt-lang/data (diff). |
Language templates |
---|
Language names (ISO 639) |
|
Interwiki links |
Foreign-language text |
|
Other |
|
The redirects
table in Module:Wikt-lang/data connects Wikipedia language codes to the corresponding code used on the English Wiktionary. wikt:Wiktionary:Language treatment records this relationship for ISO codes. For instance, all of bs
(Bosnian), hr
(Croatian), sr
(Serbian), cnr
(Montenegrin), kjv
(Kajkavian) are placed under the header for sh
(Serbo-Croatian) in Wiktionary entries. See for instance wikt:kaj#Serbo-Croatian, the word that Kajkavian is named after. The subsumed codes should still be used in language-tagging on Wikipedia.
local U = mw.ustring.char
-- Diacritics, from the [[Combining Diacritical Marks]] block.
local grave = U(0x300)
local acute = U(0x301)
local circumflex = U(0x302)
local tilde = U(0x303)
local macron = U(0x304)
local breve = U(0x306)
local dot = U(0x307)
local diaeresis = U(0x308)
local double_acute = U(0x30B)
local caron = U(0x30C)
local double_grave = U(0x30F)
local invbreve = U(0x311)
local dot_below = U(0x323)
local undertie = U(0x35C)
--[[
This is a table of Wiktionary language codes with data belonging to them.
* "Wiktionary_name" is the "canonical name" used on Wiktionary. Should be set only if different from the name from Module:Lang.
]]
local data = {
["languages"] = {
["aaq"] = {
["wWiktionary_name"] = "Penobscot",
},
["abe"] = {
["Wiktionary_name"] = "Abenaki",
},
["ang"] = {
["Wiktionary_name"] = "Old English",
-- Remove macrons, acutes, and overdots
["replacements"] = {
decompose = true,
from = { "[" .. macron .. acute .. dot .. "]" },
},
},
["ar"] = {
["Wiktionary_name"] = "Arabic",
["replacements"] = {
-- ālif with wasla is replaced by ālif;
[U(0x0671)] = U(0x0627),
-- taṭwīl, fatḥatan, ḍammatan, kasratan,
-- fatḥa, ḍamma, kasra,
-- shadda, sukūn, and superscript (dagger) ālif are removed.
["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
..U(0x064E)..U(0x064F)..U(0x0650)
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
},
},
["arb"] = {
["Wiktionary_name"] = "Modern Standard Arabic",
["replacements"] = {
-- ālif with wasla is replaced by ālif;
[U(0x0671)] = U(0x0627),
-- taṭwīl, fatḥatan, ḍammatan, kasratan,
-- fatḥa, ḍamma, kasra,
-- shadda, sukūn, and superscript (dagger) ālif are removed.
["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
..U(0x064E)..U(0x064F)..U(0x0650)
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
},
},
["apc"] = {
["Wiktionary_name"] = "North Levantine Arabic",
["replacements"] = {
-- ālif with wasla is replaced by ālif;
[U(0x0671)] = U(0x0627),
-- taṭwīl, fatḥatan, ḍammatan, kasratan,
-- fatḥa, ḍamma, kasra,
-- shadda, sukūn, and superscript (dagger) ālif are removed.
["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
..U(0x064E)..U(0x064F)..U(0x0650)
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
},
},
["ajp"] = {
["Wiktionary_name"] = "South Levantine Arabic",
["replacements"] = {
-- ālif with wasla is replaced by ālif;
[U(0x0671)] = U(0x0627),
-- taṭwīl, fatḥatan, ḍammatan, kasratan,
-- fatḥa, ḍamma, kasra,
-- shadda, sukūn, and superscript (dagger) ālif are removed.
["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
..U(0x064E)..U(0x064F)..U(0x0650)
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
},
},
["arz"] = {
["Wiktionary_name"] = "Egyptian Arabic",
["replacements"] = {
-- ālif with wasla is replaced by ālif;
[U(0x0671)] = U(0x0627),
-- taṭwīl, fatḥatan, ḍammatan, kasratan,
-- fatḥa, ḍamma, kasra,
-- shadda, sukūn, and superscript (dagger) ālif are removed.
["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
..U(0x064E)..U(0x064F)..U(0x0650)
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
},
},
["be"] = {
["replacements"] = {
[acute] = "",
},
},
["cel-x-bryproto"] = {
["Wiktionary_name"] = "Proto-Brythonic",
["type"] = "reconstructed",
},
["cu"] = {
["Wiktionary_name"] = "Old Church Slavonic",
},
["egy"] = {
["Wiktionary_name"] = "Egyptian",
},
["frp"] = {
["Wiktionary_name"] = "Franco-Provençal",
},
["gem-x-proto"] = {
["Wiktionary_name"] = "Proto-Germanic",
["type"] = "reconstructed",
},
["gmw-x-proto"] = {
["Wiktionary_name"] = "Proto-West Germanic",
["type"] = "reconstructed",
},
["gmq-x-gut"] = {
["Wiktionary_name"] = "Gutnish",
},
["goh"] = {
["replacements"] = {
decompose = true,
from = {
"[" .. macron .. circumflex .. diaeresis .. "]",
},
},
},
["got"] = {
["Wiktionary_name"] = "Gothic",
["replacements"] = {
-- Latin to Gothic since people will not want to have to copy
-- and paste Gothic letters in
["[AÁaáĀā]"] = "𐌰",
["[Bb]"] = "𐌱",
["[Gg]"] = "𐌲",
["[Dd]"] = "𐌳",
["[EeĒē]"] = "𐌴",
["[Qq]"] = "𐌵",
["[Zz]"] = "𐌶",
["[Hh]"] = "𐌷",
["[Þþ]"] = "𐌸",
["[IiÍí]"] = "𐌹",
["[Kk]"] = "𐌺",
["[Ll]"] = "𐌻",
["[Mm]"] = "𐌼",
["[Nn]"] = "𐌽",
["[Jj]"] = "𐌾",
["[UuÚúŪū]"] = "𐌿",
["[Pp]"] = "𐍀",
["[Rr]"] = "𐍂",
["[Ss]"] = "𐍃",
["[Tt]"] = "𐍄",
["[WwYy]"] = "𐍅",
["[Ff]"] = "𐍆",
["[Xx]"] = "𐍇",
["[Ƕƕ]"] = "𐍈", -- Not sure if "hw" and "hv" can safely be converted
["[OoŌō]"] = "𐍉",
},
},
["grc"] = {
["Wiktionary_name"] = "Ancient Greek",
["replacements"] = {
decompose = true,
from = {
-- Replace variant letterforms with standard ones.
"ϐ", "ϵ", "ϑ", "ϰ", "ϱ", "ϲ", "ϕ",
-- Remove macrons and breves.
"[" .. macron .. breve .. undertie .. "]"
},
to = {
"β", "ε", "θ", "κ", "ρ", "σ", "φ",
}
},
},
["grk-x-proto"] = {
["Wiktionary_name"] = "Proto-Hellenic",
["type"] = "reconstructed",
["replacements"] = {},
},
["ha"] = {
["Wiktionary_name"] = "Hausa",
-- remove tilde, grave, acute, macron, circumflex
["replacements"] = {
decompose = true,
from = { "[" .. grave .. circumflex .. macron .. acute .. tilde .. "]" },
},
},
["ine-x-bsproto"] = {
["Wiktionary_name"] = "Proto-Balto-Slavic",
["type"] = "reconstructed",
},
["ine-x-proto"] = {
["Wiktionary_name"] = "Proto-Indo-European",
["type"] = "reconstructed",
},
["jbo"] = { -- Lojban
["type"] = "appendix",
},
["la"] = {
["Wiktionary_name"] = "Latin",
["replacements"] = {
decompose = true,
from = { "[" .. macron .. breve .. diaeresis .. "]" },
},
},
["lt"] = {
["Wiktionary_name"] = "Lithuanian",
-- remove acute, tilde, grave
["replacements"] = {
decompose = true,
from = { "[" .. acute .. tilde .. grave .. "]" },
},
},
["moe"] = {
["Wiktionary_name"] = "Cree",
},
["mul"] = {
["Wiktionary_name"] = "Translingual",
},
["nci"] = {
["Wiktionary_name"] = "Classical Nahuatl",
-- Remove macrons, acutes, circumflexes and graves
["replacements"] = {
decompose = true,
-- Remove macrons, acutes, circumflexes, graves, and saltillo;
-- see [[Saltillo (linguistics)]].
from = { "[" .. grave .. acute .. macron .. circumflex .. "Ꞌꞌʻʼ'ʔ]" },
},
},
["nds-de"] = {
["Wiktionary_name"] = "German Low German",
},
["non-x-proto"] = {
["Wiktionary_name"] = "Proto-Norse",
},
["orv"] = {
["Wiktionary_name"] = "Old East Slavic",
["replacements"] = {
[U(0x484)] = "",
},
},
["poz-x-polproto"] = { -- is this even in use?
["Wiktionary_name"] = "Proto-Nuclear Polynesian",
["type"] = "reconstructed",
},
["ru"] = {
["Wiktionary_name"] = "Russian",
["replacements"] = {
[acute] = "",
},
},
["rw"] = {
["Wiktionary_name"] = "Rwanda-Rundi",
},
["se"] = {
["replacements"] = {
["([đflmnŋrsšŧv])'%1"] = "%1%1",
},
},
["sem-x-proto"] = {
["Wiktionary_name"] = "Proto-Semitic",
["type"] = "reconstructed",
},
["sh"] = {
["replacements"] = {
decompose = true,
from = { "([AaEeIiOoUuRrАаЕеИиОоУуРр])[" .. double_grave
.. grave .. invbreve .. acute .. macron .. tilde .. "]" },
to = { "%1" },
},
},
["sl"] = {
["Wiktionary_name"] = "Slovene",
["replacements"] = {
decompose = true,
-- remove tonal orthography
from = {"ł", "[" .. grave .. acute .. macron .. double_grave .. invbreve .. circumflex .. dot_below .. "]"},
to = {"l"},
},
},
["sla-x-proto"] = {
["Wiktionary_name"] = "Proto-Slavic", -- also Common Slavic
["type"] = "reconstructed",
["replacements"] = {
["[ÀÁÃĀȀȂ]"] = "A",
["[àáãāȁȃ]"] = "a",
["[ÈÉẼĒȄȆ]"] = "E",
["[èéẽēȅȇ]"] = "e",
["[ÌÍĨĪȈȊ]"] = "I",
["[ìíĩīȉȋ]"] = "i",
["[ÒÓÕŌȌȎŐ]"] = "O",
["[òóõōȍȏő]"] = "o",
["[ÙÚŨŪȔȖŰ]"] = "U",
["[ùúũūȕȗű]"] = "u",
["[ỲÝỸȲ]"] = "Y",
["[ỳýỹȳ]"] = "y",
["Ǭ"] = "Ǫ",
["ǭ"] = "ǫ",
["[" .. grave .. acute .. double_acute .. tilde .. macron .. double_grave .. invbreve .. "]"] = "",
["ĭ"] = "ь",
["ŭ"] = "ъ",
},
},
["tts"] = {
["Wiktionary_name"] = "Isan", -- also "Northeastern Thai"
},
["uk"] = {
["replacements"] = { [acute] = "", }
},
["xcl"] = {
["Wiktionary_name"] = "Old Armenian",
["replacements"] = {
["[՞՜՛՟]"] = "",
["և"] = "եւ",
},
},
["xgf"] = {
["Wiktionary_name"] = "Tongva", -- not ISO Wiktionary_name "Gabrielino-Fernandeño"
["replacements"] = {
["['`ʔ]"] = "ʼ",
},
},
["xlu"] = {
["Wiktionary_name"] = "Luwian", -- not ISO Wiktionary_name "Cuneiform Luwian"
},
["zle-x-ort"] = {
["Wiktionary_name"] = "Old Ruthenian",
["replacements"] = {
[acute] = "",
},
},
},
}
return data