Jump to content

Module:Utilities

Frae Wikipedia, the free beuk o knawledge
local m_languages = mw.loadData("Module:languages")
local export = {}

-- Detect the script based on the first alphabetical characters of a string
function export.detect_script(text, lang)
    local m_scripts = mw.loadData("Module:scripts")
    
    local langinfo = m_languages[lang]
    local scFix = false
    
    -- Does this language have more than one script?
    -- If not, we can bypass the detection for a speed bonus.
    -- But always do the detection if the script is "None" or "Zyyy"
    if not (langinfo.scripts[2] or langinfo.scripts[1] == "None" or langinfo.scripts[1] == "Zyyy") then
        return langinfo.scripts[1], scFix
    end
    
    for i, script in ipairs(langinfo.scripts) do
        local script2check = script:gsub(".-%-", "") -- removes the language code from script name, e.g. "nv-Latn" > "Latn"
        if script2check == "Latf" or script2check == "Latinx" or script2check == "unicode" then
            script2check = "Latn"
        elseif script2check == "Hans" or script2check == "Hant" then
            script2check = "Hani"
        end
        
        local scriptinfo = m_scripts[script2check] or error("The script code \"" .. script .. "\" is not valid.")
        if scriptinfo.characters and mw.ustring.match(text, scriptinfo.characters) then
            return script, scFix
        end
    end
    
    scFix = (langinfo.scripts[1] ~= "Zyyy" and langinfo.scripts[1] ~= "None")
    
    -- not written in native script(s); check for all scripts
    -- TODO: This is slow; we really shouldn't be doing this!
    for script, scriptinfo in pairs(m_scripts) do
        if scriptinfo.characters and mw.ustring.match(text, "[%[%d%p%s]-" .. scriptinfo.characters) then
            return script, scFix
        end
    end
    
    return langinfo.scripts[1], scFix
end

-- Format the categories with the appropriate sort key
function export.format_categories(categories, lang, sort_key, sort_base)
    NAMESPACE = NAMESPACE or mw.title.getCurrentTitle().nsText
    
    if NAMESPACE == "" or NAMESPACE == "Appendix" then
        PAGENAME = PAGENAME or mw.title.getCurrentTitle().text
        SUBPAGENAME = SUBPAGENAME or mw.title.getCurrentTitle().subpageText
        
        -- Generate a default language-independent sort key
        sort_base = mw.ustring.lower(sort_base or SUBPAGENAME)
        
        -- Remove initial hyphens and *
        sort_base = mw.ustring.gsub(sort_base, "^[-־ـ*]+(.)",
            "%1")
        -- Remove anything in parentheses, as long as they are either preceded or followed by something
        sort_base = mw.ustring.gsub(sort_base, "(.)%([^()]+%)", "%1")
        sort_base = mw.ustring.gsub(sort_base, "%([^()]+%)(.)", "%1")
        
        -- If there are language-specific rules to generate the key, use those
        if lang then
			local langinfo = m_languages[lang] or error("The language code \"" .. lang .. "\" is not valid.")
			
			if langinfo.sort_key then
				for i, from in ipairs(langinfo.sort_key.from) do
					local to = langinfo.sort_key.to[i] or ""
					sort_base = mw.ustring.gsub(sort_base, from, to)
				end
			end
        end
        
        if sort_key then
            -- Gather some statistics regarding sort keys
            if mw.ustring.lower(sort_key) == sort_base then
                table.insert(categories, "Sort key tracking/redundant")
            elseif lang and lang ~= "cmn" and lang ~= "ja" and lang ~= "zu" and lang ~= "nan" and lang ~= "yue" then
                if lang == "ga" or lang == "gv" or lang == "nv" or lang == "roa-jer" or lang == "fr" or lang == "rm" or lang == "prg" or lang == "gd" or lang == "twf" or lang == "en" or lang == "ro" or lang == "egl" or lang == "roa-tar" or lang == "gl" or lang == "ast" or lang == "br" then
                    table.insert(categories, "Sort key tracking/needed/" .. lang)
                else
                    table.insert(categories, "Sort key tracking/needed")
                end
            end
        else
            sort_key = sort_base
        end
        
        -- If the resulting key is the same as the wiki software's default, remove it
        if sort_key == PAGENAME then
            sort_key = nil
        end
        
        for key, cat in ipairs(categories) do
            categories[key] = "[[Category:" .. cat .. (sort_key and "|" .. sort_key or "") .. "]]"
        end
        
        return table.concat(categories, "")
    else
        return ""
    end
end

-- Used by {{categorize}}
function export.template_categorize(frame)
    NAMESPACE = NAMESPACE or mw.title.getCurrentTitle().nsText
    local format = frame.args["format"]
    local args = frame:getParent().args
    
    local lang = args[1]
    local sort_key = args["sort"]; if sort_key == "" then sort_key = nil end
    local categories = {}
    
    if lang == "" or lang == nil then
        if NAMESPACE == "Template" then
            lang = "und"
        else
            error("Language code has not been specified. Please pass parameter 1 to the template.")
        end
    end
    
    local langinfo = m_languages[lang] or error("The language code \"" .. lang .. "\" is not valid.")
    
    local prefix = ""
    if format == "pos" then
        prefix = langinfo.names[1] .. " "
    elseif format == "topic" then
        prefix = lang .. ":"
    end
    
    local i = 2
    local cat = args[i]
    
    while cat do
        if cat ~= "" then
            table.insert(categories, prefix .. cat)
        end
        
        i = i + 1
        cat = args[i]
    end
    
    return export.format_categories(categories, lang, sort_key)
end

return export