Module:Wikt-lang/sandbox: Difference between revisions
No edit summary Tag: Reverted |
Undid revision 1262068387 by Gonnym (talk) |
||
Line 9: | Line 9: | ||
internal = { |
internal = { |
||
["get_clean_Wiktionary_page_name"] = "The function get_clean_Wiktionary_page_name requires a page_name.", |
["get_clean_Wiktionary_page_name"] = "The function get_clean_Wiktionary_page_name requires a page_name.", |
||
["language_code_missing"] = "Name for the language code %q could not be retrieved. Add it to [[Module: |
["language_code_missing"] = "Name for the language code %q could not be retrieved. Add it to [[Module:Wikt-lang/data]].", |
||
["make_Wiktionary_link"] = "The function make_Wiktionary_link needs a Wiktionary page name, display text and language code.", |
["make_Wiktionary_link"] = "The function make_Wiktionary_link needs a Wiktionary page name, display text and language code.", |
||
}, |
}, |
Revision as of 12:29, 9 December 2024
![]() | This is the module sandbox page for Module:Wikt-lang (diff). See also the companion subpage for test cases (run). |
![]() | This module is rated as beta, and is ready for widespread use. It is still new and should be used with some caution to ensure the results are as expected. |
![]() | This module is subject to page protection. It is a highly visible module in use by a very large number of pages, or is substituted very frequently. Because vandalism or mistakes would affect many pages, and even trivial editing might cause substantial load on the servers, it is protected from editing. |
Language templates |
---|
Language names (ISO 639) |
|
Interwiki links |
Foreign-language text |
|
Other |
|
![]() | This module depends on the following other modules: |
This module is used by {{Wikt-lang}}
. It is inspired by the templates {{m}} and {{l}} and their associated modules on Wiktionary. It has a Wiktionary link function that links to the correct section of the Wiktionary entry, and applies correct language formatting and italics. The language-tagging function does most of what {{Lang}} does, except that italics can't be customized and categories aren't added.
The module uses Module:Wikt-lang/data to retrieve the language name for a language code, and to perform the necessary entry-name replacements (for instance, removing macrons from Latin entry names). These are unfortunately not automatically retrieved from Wiktionary's Wikt-lang data modules. For language codes that do not have a name
value in this module, the language name is fetched with mw.language.fetchLanguage
. When mw.language.fetchLanguage
does not fetch the correct language name (or any language name at all), please add it to Module:Wikt-lang/data, and similarly when the correct entry name is not generated, please add the entry name replacements to the module.
Examples
{{#invoke:Wikt-lang|wiktlang|en|language}}
-> language{{#invoke:Wikt-lang|wiktlang|en|language|languages}}
-> languages{{#invoke:Wikt-lang|wiktlang|fr|bourguignon}}
-> bourguignon{{#invoke:Wikt-lang|wiktlang|la|homō}}
-> homō{{#invoke:Wikt-lang|wiktlang|grc|δημοκρατίᾱ}}
-> δημοκρατίᾱ{{#invoke:Wikt-lang|wiktlang|ru|язы́к}}
-> язы́к{{#invoke:Wikt-lang|wiktlang|ar|العَرَبِيَّة}}
-> العَرَبِيَّة{{#invoke:Wikt-lang|wiktlang|fa|فارسی}}
-> فارسی{{#invoke:Wikt-lang|wiktlang|ine-x-proto|*h₂enǵʰ-}}
-> *h₂enǵʰ-
Invalid codes
{{#invoke:Wikt-lang|wiktlang|EN|language}}
-> language{{#invoke:Wikt-lang|wiktlang|En|language|languages}}
-> languages{{#invoke:Wikt-lang|wiktlang|La|homō}}
-> homō{{#invoke:Wikt-lang|wiktlang|Grc|δημοκρατίᾱ}}
-> δημοκρατίᾱ{{#invoke:Wikt-lang|wiktlang|Ru|язы́к}}
-> язы́к{{#invoke:Wikt-lang|wiktlang|Ar|العَرَبِيَّة}}
-> العَرَبِيَّة{{#invoke:Wikt-lang|wiktlang|Fa|فارسی}}
-> فارسی
Errors
{{#invoke:Wikt-lang|wiktlang|en-Latin|language}}
-> language [Latin
is not a valid script code.]
Comparison of codes
Language code | Wiktionary name | English Wikipedia name |
---|---|---|
aaq | Penobscot | Eastern Abnaki |
abe | Abenaki | Western Abnaki |
ajp | South Levantine Arabic | South Levantine Arabic |
apc | North Levantine Arabic | Levantine Arabic |
arb | Modern Standard Arabic | Standard Arabic |
cel-x-bryproto | Proto-Brythonic | Error: unrecognized private tag: bryproto |
cu | Old Church Slavonic | Church Slavonic |
egy | Egyptian | Ancient Egyptian |
frp | Franco-Provençal | Arpitan |
gmw-x-proto | Proto-West Germanic | Error: unrecognized private tag: proto |
grk-x-proto | Proto-Hellenic | Proto-Greek |
ine-x-bsproto | Proto-Balto-Slavic | Error: unrecognized private tag: bsproto |
moe | Cree | Innu |
mul | Translingual | multiple |
nds-de | German Low German | Low German |
non-x-proto | Proto-Norse | Error: unrecognized private tag: proto |
poz-x-polproto | Proto-Nuclear Polynesian | Error: unrecognized private tag: polproto |
rw | Rwanda-Rundi | Kinyarwanda |
tts | Isan | Northeastern Thai |
xlu | Luwian | Cuneiform Luwian |
zle-x-ort | Old Ruthenian | Error: unrecognized private tag: ort |
Tracking categories
require('strict')
local m_data = mw.loadData("Module:Wikt-lang/data/sandbox")
local lang_data = m_data.languages or m_data
local p = {}
local error_msgs = {
internal = {
["get_clean_Wiktionary_page_name"] = "The function get_clean_Wiktionary_page_name requires a page_name.",
["language_code_missing"] = "Name for the language code %q could not be retrieved. Add it to [[Module:Wikt-lang/data]].",
["make_Wiktionary_link"] = "The function make_Wiktionary_link needs a Wiktionary page name, display text and language code.",
},
["no_text"] = "A Wiktionary entry is required.",
["invalid_parameters"] = 'Invalid parameter: |%s=',
}
local cfg = {
template = "Wikt-lang",
valid_parameters = {
[1] = true,
[2] = true,
[3] = true,
["italic"] = true,
},
error_msg = '<span style="color: #d33;">Error: {{%s}}: %s</span>[[Category:%s]]',
category = "Wikt-lang template errors",
namespace = {
appendix = {
name = "Appendix:%s/%s",
data_type = "appendix",
},
reconstruction = {
name = "Reconstruction:%s/%s",
data_type = "reconstructed",
},
},
}
--[[--------------------------< M A K E _ E R R O R >--------------------------------------------------
Creates an error span with the supplied error message and attaches the error category.
]]
local function make_error(msg)
return string.format(cfg.error_msg, cfg.template, msg, cfg.category)
end
--[[--------------------------< A D D _ N A M E S P A C E _ T O _ L I N K >--------------------------------------------------
Returns the page_name with a prefix of a Wiktionary namespace, if relevant.
Current namespaces that can be returned: "Appendix:" and "Reconstruction:".
If not one of the above namespaces, returns the unalerted link_text.
]]
local function add_namespace_to_link(data, language_name, page_name)
if page_name:sub(1, 1) == "*" then
return string.format(cfg.namespace.reconstruction.name, language_name, page_name:sub(2))
end
if data and data.type then
if data.type == cfg.namespace.reconstruction.data_type then
return string.format(cfg.namespace.reconstruction.name, language_name, page_name)
elseif data.type == cfg.namespace.appendix.data_type then
return string.format(cfg.namespace.appendix.name, language_name, page_name)
end
end
-- If for any reason this reaches here, return the unaltered page_name.
return page_name
end
--[[--------------------------< G E T _ L A N G U A G E _ N A M E >--------------------------------------------------
Retrieves the language name.
A langauge is first searched in Module:Wikt-lang/data and if found and has a language name set, returns it.
That database is used to override the language names produced by Module:Lang/data.
If no language is found or the language does not have a language name set, returns the language name from Module:Lang/data.
]]
local function get_Wiktionary_language_name(data, language_name)
if data and data.Wiktionary_name then
return data.Wiktionary_name
end
return language_name
end
--[[--------------------------< M A K E _ W I K T I O N A R Y _ L I N K >--------------------------------------------------
Creates a Wiktionary link.
A page_name, display_text and language_code are always needed. Error if they are missing.
A language name can sometimes be nil if the private code is only listed at Module:Wikt-lang/data and not on Module:Lang/data.
If a language name cannot be retrieved, an erorr is returned.
]]
local function make_Wiktionary_link(page_name, display_text, language_code, language_name)
if not page_name and not display_text and not language_code then
return nil, make_error(error_msgs.internal.make_Wiktionary_link)
end
local data = lang_data[language_code]
language_name = get_Wiktionary_language_name(data, language_name)
if not language_name then
return make_error(error_msgs.language_code_missing)
end
page_name = add_namespace_to_link(data, language_name, page_name)
local link = "[[wikt:%s#%s|%s]]"
return string.format(link, page_name, language_name, display_text)
end
--[[--------------------------< R E P L A C E _ C H A R A C T E R S >--------------------------------------------------
Replaces specific characters as defined in Module:Wikt-lang/data in a language's "replacement" value.
]]
local function replace_characters(data, text)
local replacements = data and data["replacements"]
if replacements == nil then
-- No replacements needed; use text as is.
return text
end
-- Decompose so that the diacritics of characters such
-- as á can be removed in one go.
-- No need to compose at the end, because the MediaWiki software
-- will handle that.
if replacements.decompose then
text = mw.ustring.toNFD(text)
for i, from in ipairs(replacements.from) do
text = mw.ustring.gsub(text, from, replacements.to and replacements.to[i] or "")
end
return text
end
for regex, replacement in pairs(replacements) do
text = mw.ustring.gsub(text, regex, replacement)
end
return text
end
--[[--------------------------< R E M O V E _ B O L D _ I T A L I C >--------------------------------------------------
Removes bold and italics, so that words that contain bolding or emphasis can be linked without piping.
]]
local function remove_bold_italic(text)
if not text then
return text
end
text = text:gsub("\'\'\'", "")
text = text:gsub("\'\'", "")
return text
end
--[[--------------------------< G E T _ C L E A N _ W I K T I O N A R Y _ P A G E _ N A M E >--------------------------------------------------
Returns a clean a Wiktionary page name by removing bold and italics, and by replacing specific characters as defined in Module:Wikt-lang/data.
]]
local function get_clean_Wiktionary_page_name(page_name, language_code)
page_name = tostring(page_name)
if page_name == nil or page_name == "" then
return nil, make_error(error_msgs.internal.get_clean_Wiktionary_page_name)
end
page_name = remove_bold_italic(page_name)
local data = lang_data[language_code]
if data == nil then
-- No language specific data in module; use text as is.
return page_name
end
return replace_characters(data, page_name)
end
--[[--------------------------< C H E C K _ F O R _ U N K N O W N _ P A R A M E T E R S >--------------------------------------------------
Checks that all user-used parameters are valid.
]]
local function check_for_unknown_parameters(args)
for param, _ in pairs(args) do
if not cfg.valid_parameters[param] then
return make_error(string.format(error_msgs.invalid_parameters, param))
end
end
end
--[[--------------------------< M A I N >--------------------------------------------------------------------
Entry point for {{Wikt-lang}}.
Parameters are received from the template's frame (parent frame).
* |1= – language code
* |2= – Wiktionary page name
* |3= – display text
* |italic= – "no" to disable
]]
function p.main(frame)
local getArgs = require('Module:Arguments').getArgs
local args = getArgs(frame)
if not args[2] then
-- A Wiktionary page name is required.
return make_error(error_msgs.no_text)
end
local error_msg = check_for_unknown_parameters(args)
if error_msg then
return error_msg
end
-- For the display text, use args[3] if supplied, if not, use the Wiktionary page name (args[2])
args[2] = args[3] or args[2]
-- To allow the errors to be associated with this template.
args.template = cfg.template
args.error_category = cfg.category
-- Handle the display text html tag.
--TODO: replace /sandbox when live.
local lang = require("Module:Lang/sandbox2")
local result = lang._lang(args)
-- An error returned, stop here.
if type(result) == "string" and string.find(result, "Error") then
return result
end
--TODO: Do we need the result to return with a <span title=""> tag?
local page_name, error_msg = get_clean_Wiktionary_page_name(args[2], result.code)
if error_msg then
return error_msg
end
local link, error_msg = make_Wiktionary_link(page_name, result.html, result.code, result.name)
if error_msg then
return error_msg
end
return link .. result.language_categories .. result.maintenance
end
return p