Module:Urltowiki: Difference between revisions
Appearance
Content deleted Content added
use Module:Arguments |
don't pipe links if the display is the same as the page name, or the same but with a lower-case first letter; use tabs for indenting |
||
Line 3: | Line 3: | ||
-- are converted to normal text. |
-- are converted to normal text. |
||
p = {} |
local p = {} |
||
local current_lang = mw.language.getContentLanguage() |
|||
local interwiki_table = mw.loadData("Module:InterwikiTable") |
local interwiki_table = mw.loadData("Module:InterwikiTable") |
||
local function getHostId(host) |
local function getHostId(host) |
||
if type(host) ~= "string" then return end |
|||
for id, t in pairs(interwiki_table) do |
|||
if mw.ustring.match(host, t.domain) and t.domain_primary then -- Match partial domains (e.g. "www.foo.org" and "foo.org") but don't match non-primary domains. |
|||
return id |
|||
end |
|||
end |
|||
end |
end |
||
local function getInterwiki(host) |
local function getInterwiki(host) |
||
-- This function returns a table with information about the interwiki prefix of the specified host. |
|||
local ret = {} |
|||
-- Return a blank table for invalid input. |
|||
if type(host) ~= "string" then |
|||
return ret |
|||
end |
|||
-- Get the host ID. |
|||
host = mw.ustring.lower(host) |
|||
local host_id = getHostId(host) |
|||
if not host_id then |
|||
return ret |
|||
end |
|||
ret.host_id = host_id |
|||
-- Get the host ID. |
|||
-- Find the language in the interwiki prefix, if applicable. |
|||
host = mw.ustring.lower(host) |
|||
local host_id = getHostId(host) |
|||
if not lang or not mw.language.isSupportedLanguage(lang) then -- Check if lang is a valid language code. |
|||
if not host_id then |
|||
lang = false |
|||
return ret |
|||
end |
|||
end |
|||
-- A language prefix is not necessary if there is already a language prefix for the host in the interwiki table. |
|||
ret.host_id = host_id |
|||
local domain_lang = mw.ustring.match(interwiki_table[host_id].domain, "^(.-)%.") -- Find the text before the first period. |
|||
if mw.language.isSupportedLanguage(domain_lang) then |
|||
lang = false |
|||
end |
|||
ret.lang = lang |
|||
-- No need for an interwiki link if we are on the same site as the URL. |
|||
local current_host = mw.uri.new(mw.title.getCurrentTitle():fullUrl()).host -- Get the host portion of the current page URL. |
|||
if host == current_host then |
|||
return ret |
|||
end |
|||
-- Check if the URL language is the same as the current language. |
|||
local same_lang |
|||
if lang and lang == mw.ustring.match(current_host, "^(.-)%.") then |
|||
same_lang = true |
|||
end |
|||
-- Check if the project is the same as the current project (but a different language). |
|||
local current_host_id = getHostId(current_host) |
|||
local same_project |
|||
if current_host_id == host_id then |
|||
same_project = true |
|||
end |
|||
-- Find the interwiki prefix. |
|||
local interwiki |
|||
local project = interwiki_table[host_id].iw_prefix[1] |
|||
if same_lang or ( not lang and interwiki_table[host_id].takes_lang_prefix == false ) then |
|||
interwiki = project |
|||
elseif same_project then |
|||
interwiki = lang |
|||
elseif not lang then -- If the language code is bad but the rest of the host name is ok. |
|||
interwiki = nil |
|||
else |
|||
interwiki = project .. ":" .. lang |
|||
end |
|||
ret.interwiki = interwiki |
|||
-- Find the language in the interwiki prefix, if applicable. |
|||
return ret |
|||
local lang = mw.ustring.match(host, "^(.-)%.") -- Find the text before the first period. |
|||
if not lang or not mw.language.isSupportedLanguage(lang) then -- Check if lang is a valid language code. |
|||
lang = false |
|||
end |
|||
-- A language prefix is not necessary if there is already a language prefix for the host in the interwiki table. |
|||
local domain_lang = mw.ustring.match(interwiki_table[host_id].domain, "^(.-)%.") -- Find the text before the first period. |
|||
if mw.language.isSupportedLanguage(domain_lang) then |
|||
lang = false |
|||
end |
|||
ret.lang = lang |
|||
-- No need for an interwiki link if we are on the same site as the URL. |
|||
local current_host = mw.uri.new(mw.title.getCurrentTitle():fullUrl()).host -- Get the host portion of the current page URL. |
|||
if host == current_host then |
|||
return ret |
|||
end |
|||
-- Check if the URL language is the same as the current language. |
|||
local same_lang |
|||
if lang and lang == mw.ustring.match(current_host, "^(.-)%.") then |
|||
same_lang = true |
|||
end |
|||
-- Check if the project is the same as the current project (but a different language). |
|||
local current_host_id = getHostId(current_host) |
|||
local same_project |
|||
if current_host_id == host_id then |
|||
same_project = true |
|||
end |
|||
-- Find the interwiki prefix. |
|||
local interwiki |
|||
local project = interwiki_table[host_id].iw_prefix[1] |
|||
if same_lang or ( not lang and interwiki_table[host_id].takes_lang_prefix == false ) then |
|||
interwiki = project |
|||
elseif same_project then |
|||
interwiki = lang |
|||
elseif not lang then -- If the language code is bad but the rest of the host name is ok. |
|||
interwiki = nil |
|||
else |
|||
interwiki = project .. ":" .. lang |
|||
end |
|||
ret.interwiki = interwiki |
|||
return ret |
|||
end |
end |
||
function p._urlToWiki(args) |
function p._urlToWiki(args) |
||
-- Check the input is valid. |
|||
local input = args[1] or args.url or error("No URL specified", 2) |
|||
if type(input) ~= "string" then |
|||
error("The URL must be a string value", 2) |
|||
end |
|||
input = mw.text.trim(input) |
|||
-- Get the URI object. |
|||
url = mw.uri.new(input) |
|||
local host = url.host |
|||
-- Get the interwiki prefix. |
|||
local interwiki, lang, host_id |
|||
if host then |
|||
local iw_data = getInterwiki(host) |
|||
interwiki, lang, host_id = iw_data.interwiki, iw_data.lang, iw_data.host_id |
|||
end |
|||
local link = true -- This decides whether the resulting wikitext will be linked or not. Default is yes. |
|||
if args.link == "no" then |
|||
link = false |
|||
end |
|||
-- Get the page title. |
|||
local pagetitle, title_prefix |
|||
if host_id and not ( interwiki_table[host_id].takes_lang_prefix == true and not lang ) then |
|||
title_prefix = interwiki_table[host_id].title_prefix |
|||
end |
|||
-- If the URL path starts with the title prefix in the interwiki table, use that to get the title. |
|||
if title_prefix and mw.ustring.sub(url.path, 1, mw.ustring.len(title_prefix)) == title_prefix then |
|||
pagetitle = mw.ustring.sub(url.path, mw.ustring.len(title_prefix) + 1, -1) |
|||
-- Else, if the URL is a history "index.php", use url.query.title. Check for host_id |
|||
-- in case the URL isn't of a Wikimedia site. |
|||
elseif host_id and mw.ustring.match(url.path, "index%.php") and url.query.title then |
|||
pagetitle = url.query.title |
|||
-- Special case for Bugzilla. |
|||
elseif host_id == "bugzilla" and url.query.id then |
|||
pagetitle = url.query.id |
|||
elseif host_id == "bugzilla" and not url.query.id then |
|||
interwiki = false -- disable the interwiki prefix as we are returning a full URL. |
|||
link = false -- don't use double square brackets for URLs. |
|||
pagetitle = tostring(url) |
|||
-- If the URL is valid but not a recognised interwiki, use the URL and don't link it. |
|||
elseif host and not host_id then |
|||
link = false -- Don't use double square brackets for URLs. |
|||
pagetitle = tostring(url) |
|||
-- Otherwise, use our original input minus any fragment |
|||
else |
|||
pagetitle = mw.ustring.match(input, "^(.-)#") or input |
|||
end |
|||
-- Get the fragment and pre-process percent-encoded characters. |
|||
local fragment = url.fragment -- This also works for non-urls like "Foo#Bar". |
|||
if fragment then |
|||
fragment = mw.ustring.gsub(fragment, "%.([0-9A-F][0-9A-F])", "%%%1") |
|||
end |
|||
-- Assemble the wikilink. |
|||
local wikitext = pagetitle |
|||
if interwiki then |
|||
wikitext = interwiki .. ":" .. wikitext |
|||
end |
|||
if fragment and not (args.section == "no") then |
|||
wikitext = wikitext .. "#" .. fragment |
|||
end |
|||
-- Decode percent-encoded characters and convert underscores to spaces. |
|||
wikitext = mw.uri.decode(wikitext, "WIKI") |
|||
-- If the wikitext is to be linked, re-encode illegal characters. Don't re-encode |
|||
-- characters from invalid URLs to make the default [[{{{1}}}]] display correctly. |
|||
if link and host then |
|||
wikitext = mw.ustring.gsub(wikitext, "[<>%[%]|{}%c\n]", mw.uri.encode) |
|||
end |
|||
-- Find the display value |
|||
local display |
|||
if link then |
|||
display = args[2] or args.display -- The display text in piped links. |
|||
if display then |
|||
if type(display) ~= "string" then |
|||
error("Non-string display value detected") |
|||
end |
|||
display = mw.text.trim(display) -- Trim whitespace. |
|||
-- If the page name is the same as the display value, don't pipe |
|||
-- the link. |
|||
if current_lang:lcfirst(wikitext) == display then |
|||
wikitext = display |
|||
display = nil |
|||
elseif wikitext == display then |
|||
display = nil |
|||
end |
|||
end |
|||
end |
|||
-- Use the [[Help:Colon trick]] with categories, interwikis, and files. |
|||
local colon_prefix = mw.ustring.match(wikitext, "^(.-):.*$") or "" -- Get the text before the first colon. |
|||
local ns = mw.site.namespaces |
|||
local need_colon_trick |
|||
if mw.language.isSupportedLanguage(colon_prefix) -- Check for interwiki links. |
|||
or current_lang:lc(ns[6].name) == current_lang:lc(colon_prefix) -- Check for files. |
|||
or current_lang:lc(ns[14].name) == current_lang:lc(colon_prefix) then -- Check for categories. |
|||
need_colon_trick = true |
|||
end |
|||
for i,v in ipairs(ns[6].aliases) do -- Check for file namespace aliases. |
|||
if current_lang:lc(v) == current_lang:lc(colon_prefix) then |
|||
need_colon_trick = true |
|||
break |
|||
end |
|||
end |
|||
for i,v in ipairs(ns[14].aliases) do -- Check for category namespace aliases. |
|||
if current_lang:lc(v) == current_lang:lc(colon_prefix) then |
|||
need_colon_trick = true |
|||
break |
|||
end |
|||
end |
|||
-- Don't use the colon trick if the user says so or if we are not linking |
|||
-- (due to [[bugzilla:12974]]). |
|||
if need_colon_trick and link and args.colontrick ~= "no" then |
|||
wikitext = ":" .. wikitext |
|||
end |
|||
-- Make the link |
|||
if link then |
|||
url = mw.uri.new(input) |
|||
if display then |
|||
local host = url.host |
|||
wikitext = wikitext .. '|' .. display |
|||
end |
|||
wikitext = "[[" .. wikitext .. "]]" |
|||
end |
|||
return wikitext |
|||
-- Get the interwiki prefix. |
|||
local interwiki, lang, host_id |
|||
if host then |
|||
local iw_data = getInterwiki(host) |
|||
interwiki, lang, host_id = iw_data.interwiki, iw_data.lang, iw_data.host_id |
|||
end |
|||
local link = true -- This decides whether the resulting wikitext will be linked or not. Default is yes. |
|||
if args.link == "no" then |
|||
link = false |
|||
end |
|||
-- Get the page title. |
|||
local pagetitle, title_prefix |
|||
if host_id and not ( interwiki_table[host_id].takes_lang_prefix == true and not lang ) then |
|||
title_prefix = interwiki_table[host_id].title_prefix |
|||
end |
|||
-- If the URL path starts with the title prefix in the interwiki table, use that to get the title. |
|||
if title_prefix and mw.ustring.sub(url.path, 1, mw.ustring.len(title_prefix)) == title_prefix then |
|||
pagetitle = mw.ustring.sub(url.path, mw.ustring.len(title_prefix) + 1, -1) |
|||
-- Else, if the URL is a history "index.php", use url.query.title. Check for host_id |
|||
-- in case the URL isn't of a Wikimedia site. |
|||
elseif host_id and mw.ustring.match(url.path, "index%.php") and url.query.title then |
|||
pagetitle = url.query.title |
|||
-- Special case for Bugzilla. |
|||
elseif host_id == "bugzilla" and url.query.id then |
|||
pagetitle = url.query.id |
|||
elseif host_id == "bugzilla" and not url.query.id then |
|||
interwiki = false -- disable the interwiki prefix as we are returning a full URL. |
|||
link = false -- don't use double square brackets for URLs. |
|||
pagetitle = tostring(url) |
|||
-- If the URL is valid but not a recognised interwiki, use the URL and don't link it. |
|||
elseif host and not host_id then |
|||
link = false -- Don't use double square brackets for URLs. |
|||
pagetitle = tostring(url) |
|||
-- Otherwise, use our original input minus any fragment |
|||
else |
|||
pagetitle = mw.ustring.match(input, "^(.-)#") or input |
|||
end |
|||
-- Get the fragment and pre-process percent-encoded characters. |
|||
local fragment = url.fragment -- This also works for non-urls like "Foo#Bar". |
|||
if fragment then |
|||
fragment = mw.ustring.gsub(fragment, "%.([0-9A-F][0-9A-F])", "%%%1") |
|||
end |
|||
-- Assemble the wikilink. |
|||
local wikitext = pagetitle |
|||
if interwiki then |
|||
wikitext = interwiki .. ":" .. wikitext |
|||
end |
|||
if fragment and not (args.section == "no") then |
|||
wikitext = wikitext .. "#" .. fragment |
|||
end |
|||
-- Decode percent-encoded characters and convert underscores to spaces. |
|||
wikitext = mw.uri.decode(wikitext, "WIKI") |
|||
-- If the wikitext is to be linked, re-encode illegal characters. Don't re-encode |
|||
-- characters from invalid URLs to make the default [[{{{1}}}]] display correctly. |
|||
if link and host then |
|||
wikitext = mw.ustring.gsub(wikitext, "[<>%[%]|{}%c\n]", mw.uri.encode) |
|||
end |
|||
-- Use the [[Help:Colon trick]] with categories, interwikis, and files. |
|||
local colon_prefix = mw.ustring.match(wikitext, "^(.-):.*$") or "" -- Get the text before the first colon. |
|||
local current_lang = mw.language.getContentLanguage() |
|||
local ns = mw.site.namespaces |
|||
local need_colon_trick |
|||
if mw.language.isSupportedLanguage(colon_prefix) -- Check for interwiki links. |
|||
or current_lang:lc(ns[6].name) == current_lang:lc(colon_prefix) -- Check for files. |
|||
or current_lang:lc(ns[14].name) == current_lang:lc(colon_prefix) then -- Check for categories. |
|||
need_colon_trick = true |
|||
end |
|||
for i,v in ipairs(ns[6].aliases) do -- Check for file namespace aliases. |
|||
if current_lang:lc(v) == current_lang:lc(colon_prefix) then |
|||
need_colon_trick = true |
|||
break |
|||
end |
|||
end |
|||
for i,v in ipairs(ns[14].aliases) do -- Check for category namespace aliases. |
|||
if current_lang:lc(v) == current_lang:lc(colon_prefix) then |
|||
need_colon_trick = true |
|||
break |
|||
end |
|||
end |
|||
-- Don't use the colon trick if the user says so or if we are not linking |
|||
-- (due to [[bugzilla:12974]]). |
|||
if need_colon_trick and link and args.colontrick ~= "no" then |
|||
wikitext = ":" .. wikitext |
|||
end |
|||
-- Add the link |
|||
if link then |
|||
local display = args[2] or args.display -- The display text in piped links. |
|||
if display then |
|||
if type(display) ~= "string" then |
|||
error("Non-string display value detected") |
|||
end |
|||
display = mw.text.trim(display) -- Trim whitespace. |
|||
wikitext = wikitext .. "|" .. display |
|||
end |
|||
wikitext = "[[" .. wikitext .. "]]" |
|||
end |
|||
return wikitext |
|||
end |
end |
||
Line 203: | Line 218: | ||
wrappers = 'Template:Urltowiki' |
wrappers = 'Template:Urltowiki' |
||
}) |
}) |
||
return p._urlToWiki(args) |
|||
end |
end |
||
Revision as of 08:29, 28 July 2015
-- This module takes a URL from a Wikimedia project and returns the equivalent wikitext.
-- Any actions such as edit, history, etc., are stripped, and percent-encoded characters
-- are converted to normal text.
local p = {}
local current_lang = mw.language.getContentLanguage()
local interwiki_table = mw.loadData("Module:InterwikiTable")
local function getHostId(host)
if type(host) ~= "string" then return end
for id, t in pairs(interwiki_table) do
if mw.ustring.match(host, t.domain) and t.domain_primary then -- Match partial domains (e.g. "www.foo.org" and "foo.org") but don't match non-primary domains.
return id
end
end
end
local function getInterwiki(host)
-- This function returns a table with information about the interwiki prefix of the specified host.
local ret = {}
-- Return a blank table for invalid input.
if type(host) ~= "string" then
return ret
end
-- Get the host ID.
host = mw.ustring.lower(host)
local host_id = getHostId(host)
if not host_id then
return ret
end
ret.host_id = host_id
-- Find the language in the interwiki prefix, if applicable.
local lang = mw.ustring.match(host, "^(.-)%.") -- Find the text before the first period.
if not lang or not mw.language.isSupportedLanguage(lang) then -- Check if lang is a valid language code.
lang = false
end
-- A language prefix is not necessary if there is already a language prefix for the host in the interwiki table.
local domain_lang = mw.ustring.match(interwiki_table[host_id].domain, "^(.-)%.") -- Find the text before the first period.
if mw.language.isSupportedLanguage(domain_lang) then
lang = false
end
ret.lang = lang
-- No need for an interwiki link if we are on the same site as the URL.
local current_host = mw.uri.new(mw.title.getCurrentTitle():fullUrl()).host -- Get the host portion of the current page URL.
if host == current_host then
return ret
end
-- Check if the URL language is the same as the current language.
local same_lang
if lang and lang == mw.ustring.match(current_host, "^(.-)%.") then
same_lang = true
end
-- Check if the project is the same as the current project (but a different language).
local current_host_id = getHostId(current_host)
local same_project
if current_host_id == host_id then
same_project = true
end
-- Find the interwiki prefix.
local interwiki
local project = interwiki_table[host_id].iw_prefix[1]
if same_lang or ( not lang and interwiki_table[host_id].takes_lang_prefix == false ) then
interwiki = project
elseif same_project then
interwiki = lang
elseif not lang then -- If the language code is bad but the rest of the host name is ok.
interwiki = nil
else
interwiki = project .. ":" .. lang
end
ret.interwiki = interwiki
return ret
end
function p._urlToWiki(args)
-- Check the input is valid.
local input = args[1] or args.url or error("No URL specified", 2)
if type(input) ~= "string" then
error("The URL must be a string value", 2)
end
input = mw.text.trim(input)
-- Get the URI object.
url = mw.uri.new(input)
local host = url.host
-- Get the interwiki prefix.
local interwiki, lang, host_id
if host then
local iw_data = getInterwiki(host)
interwiki, lang, host_id = iw_data.interwiki, iw_data.lang, iw_data.host_id
end
local link = true -- This decides whether the resulting wikitext will be linked or not. Default is yes.
if args.link == "no" then
link = false
end
-- Get the page title.
local pagetitle, title_prefix
if host_id and not ( interwiki_table[host_id].takes_lang_prefix == true and not lang ) then
title_prefix = interwiki_table[host_id].title_prefix
end
-- If the URL path starts with the title prefix in the interwiki table, use that to get the title.
if title_prefix and mw.ustring.sub(url.path, 1, mw.ustring.len(title_prefix)) == title_prefix then
pagetitle = mw.ustring.sub(url.path, mw.ustring.len(title_prefix) + 1, -1)
-- Else, if the URL is a history "index.php", use url.query.title. Check for host_id
-- in case the URL isn't of a Wikimedia site.
elseif host_id and mw.ustring.match(url.path, "index%.php") and url.query.title then
pagetitle = url.query.title
-- Special case for Bugzilla.
elseif host_id == "bugzilla" and url.query.id then
pagetitle = url.query.id
elseif host_id == "bugzilla" and not url.query.id then
interwiki = false -- disable the interwiki prefix as we are returning a full URL.
link = false -- don't use double square brackets for URLs.
pagetitle = tostring(url)
-- If the URL is valid but not a recognised interwiki, use the URL and don't link it.
elseif host and not host_id then
link = false -- Don't use double square brackets for URLs.
pagetitle = tostring(url)
-- Otherwise, use our original input minus any fragment
else
pagetitle = mw.ustring.match(input, "^(.-)#") or input
end
-- Get the fragment and pre-process percent-encoded characters.
local fragment = url.fragment -- This also works for non-urls like "Foo#Bar".
if fragment then
fragment = mw.ustring.gsub(fragment, "%.([0-9A-F][0-9A-F])", "%%%1")
end
-- Assemble the wikilink.
local wikitext = pagetitle
if interwiki then
wikitext = interwiki .. ":" .. wikitext
end
if fragment and not (args.section == "no") then
wikitext = wikitext .. "#" .. fragment
end
-- Decode percent-encoded characters and convert underscores to spaces.
wikitext = mw.uri.decode(wikitext, "WIKI")
-- If the wikitext is to be linked, re-encode illegal characters. Don't re-encode
-- characters from invalid URLs to make the default [[{{{1}}}]] display correctly.
if link and host then
wikitext = mw.ustring.gsub(wikitext, "[<>%[%]|{}%c\n]", mw.uri.encode)
end
-- Find the display value
local display
if link then
display = args[2] or args.display -- The display text in piped links.
if display then
if type(display) ~= "string" then
error("Non-string display value detected")
end
display = mw.text.trim(display) -- Trim whitespace.
-- If the page name is the same as the display value, don't pipe
-- the link.
if current_lang:lcfirst(wikitext) == display then
wikitext = display
display = nil
elseif wikitext == display then
display = nil
end
end
end
-- Use the [[Help:Colon trick]] with categories, interwikis, and files.
local colon_prefix = mw.ustring.match(wikitext, "^(.-):.*$") or "" -- Get the text before the first colon.
local ns = mw.site.namespaces
local need_colon_trick
if mw.language.isSupportedLanguage(colon_prefix) -- Check for interwiki links.
or current_lang:lc(ns[6].name) == current_lang:lc(colon_prefix) -- Check for files.
or current_lang:lc(ns[14].name) == current_lang:lc(colon_prefix) then -- Check for categories.
need_colon_trick = true
end
for i,v in ipairs(ns[6].aliases) do -- Check for file namespace aliases.
if current_lang:lc(v) == current_lang:lc(colon_prefix) then
need_colon_trick = true
break
end
end
for i,v in ipairs(ns[14].aliases) do -- Check for category namespace aliases.
if current_lang:lc(v) == current_lang:lc(colon_prefix) then
need_colon_trick = true
break
end
end
-- Don't use the colon trick if the user says so or if we are not linking
-- (due to [[bugzilla:12974]]).
if need_colon_trick and link and args.colontrick ~= "no" then
wikitext = ":" .. wikitext
end
-- Make the link
if link then
if display then
wikitext = wikitext .. '|' .. display
end
wikitext = "[[" .. wikitext .. "]]"
end
return wikitext
end
function p.urlToWiki(frame)
local args = require('Module:Arguments').getArgs(frame, {
wrappers = 'Template:Urltowiki'
})
return p._urlToWiki(args)
end
return p