Module:Delink/sandbox
Appearance
![]() | This is the module sandbox page for Module:Delink (diff). See also the companion subpage for test cases (run). |
![]() | This module is rated as ready for general use. It has reached a mature form and is thought to be relatively bug-free and ready for use wherever appropriate. It is ready to mention on help pages and other Wikipedia resources as an option for new users to learn. To reduce server load and bad output, it should be improved by sandbox testing rather than repeated trial-and-error editing. |
![]() | This module is subject to page protection. It is a highly visible module in use by a very large number of pages, or is substituted very frequently. Because vandalism or mistakes would affect many pages, and even trivial editing might cause substantial load on the servers, it is protected from editing. |
![]() | This module can only be edited by administrators because it is transcluded onto one or more cascade-protected pages. |
![]() | This Lua module is used in system messages, and on approximately 3,460,000 pages, or roughly 5% of all pages. Changes to it can cause immediate changes to the Wikipedia user interface. To avoid major disruption and server load, any changes should be tested in the module's /sandbox or /testcases subpages, or in your own module sandbox. The tested changes can be added to this page in a single edit. Please discuss changes on the talk page before implementing them. |
This module implements the {{delink}} template. Please see the template page for documentation.
-- This module de-links most wikitext.
local getArgs = require('Module:Arguments').getArgs
local yesno = require('Module:Yesno')
-- Often-used functions and variables
local htmlDecode = mw.text.decode
local uriDecode = mw.uri.decode
local isKnownLanguageTag = mw.language.isKnownLanguageTag
local namespaces = mw.site.namespaces
p = {}
local function whitespaceYesno(val)
-- Like yesno, but trims whitespace from vals and removes blank strings.
if type(val) == 'string' then
val = val:match('^%s*(.-)%s*$')
if val == '' then
return nil
end
end
return yesno(val)
end
function p._delinkWikilink(s)
-- s is a string starting with '[[' and ending with ']]'. It does not contain any other ']]' strings.
local linkText = s:sub(3, -3)
-- Deal with nested links
if linkText:find('%[%[') then
return '[[' .. s:sub(3):gsub('%[%[.-%]%]$', p._delinkWikilink)
end
local titleArea, display = linkText:match('^(.-)|(.*)$')
-- Process links with display areas. Pipe tricks aren't processed here, as we need to know more about the link title first.
if display then
display = htmlDecode(display, true) -- decode HTML entities.
if titleArea == '' then
-- We are dealing with a reverse pipe trick.
if display:find('[|\n]') or s == '[[|]]' then
-- The link is invalid.
return s
else
return display
end
elseif display ~= '' then
-- We are dealing with a normal piped link.
return display
end
end
titleArea = titleArea or linkText
-- Decode percent-encoded and HTML-encoded characters.
titleArea = uriDecode(titleArea, 'PATH')
titleArea = htmlDecode(titleArea, true)
-- Find the fragment, if any.
local titleAreaNoFragment, fragment = titleArea:match('^(.-)#(.*)$')
titleAreaNoFragment = titleAreaNoFragment or titleArea
-- Check for bad characters.
if titleAreaNoFragment:find('[%[%]<>{}%%%c\n]') then
return s
end
-- Find the interwiki and the title. Actually, only the prefix before the first
-- comma is counted as the interwiki, so the "title" may contain another interwiki
-- prefix and/or a namespace name, but it's close enough for our purposes.
local interwiki, title = titleAreaNoFragment:match('^(.-):(.*)$') or ''
title = title or titleAreaNoFragment
-- Check for unescaped categories, interwikis, and files. If any are found,
-- return the blank string, as nothing would be displayed.
local ns = namespaces[interwiki]
if isKnownLanguageTag(interwiki)
or ns and (ns.id == 6 or ns.id == 14)
then
return ''
end
-- Remove the colon if the link is using the [[Help:Colon trick]].
titleArea = titleArea:match('^:(.*)$') or titleArea
-- Deal with links using the [[Help:Pipe trick]].
if display == '' then
if fragment then
-- Fragments in a pipe trick are invalid, so return the input string.
return s
end
-- Pipe tricks don't display interwikis, so we only need the title text here.
-- We need to remove parentheses and commas. Parentheses have priority.
local pipeTrickText = title:match('^(.-) ?%(.-%)$')
if pipeTrickText then
return pipeTrickText
else
-- If there are no parentheses, display only the text before the first comma.
pipeTrickText = title:match('(.-),.*$') or title
return pipeTrickText
end
end
-- If we haven't returned any text yet, display the title area.
return titleArea
end
function p._delinkURL(s)
-- Assume we have already delinked internal wikilinks, and that
-- we have been passed some text between two square brackets [foo].
-- If the text contains a line break it is not formatted as a URL, regardless of other content.
if s:find('\n') then
return s
end
-- Check if the text has a valid URL prefix and at least one valid URL character.
local valid_url_prefixes = {'//', 'http://', 'https://', 'ftp://', 'gopher://', 'mailto:', 'news:', 'irc://'}
local url_prefix
for i,v in ipairs(valid_url_prefixes) do
if s:find('^%[' .. v ..'[^"%s].*%]' ) then
url_prefix = v
break
end
end
-- Get display text
if not url_prefix then
-- Deal with nested links or send back original string.
return '[' .. s:sub(2):gsub('%[.-%]', p._delinkURL)
end
s = s:match('^%[' .. url_prefix .. '(.*)%]') -- Grab all of the text after the URL prefix and before the final square bracket.
s = s:match('^.-(["<> [].*)') or '' -- Grab all of the text after the first URL separator character ("<> ).
s = s:match('^%s*(%S.*)$') or '' -- If the separating character was a space, trim it off.
local s_decoded = mw.text.decode(s, true)
if s_decoded:find('%c') then
return s
else
return s_decoded
end
end
function p._delink(args)
local text = args[1] or ''
if whitespaceYesno(args.refs) then
-- Remove any [[Help:Strip markers]] representing ref tags. In most situations
-- this is not a good idea - only use it if you know what you are doing!
text = mw.ustring.gsub(text, 'UNIQ%w*%-ref%-%d*%-QINU', '')
end
if whitespaceYesno(args.comments) ~= false then
text = text:gsub('<!%-%-.-%-%->', '') -- Remove html comments.
end
if whitespaceYesno(args.wikilinks) ~= false then
text = text:gsub('%[%[.-%]%]', p._delinkWikilink) -- De-link wikilinks.
end
if whitespaceYesno(args.urls) ~= false then
text = text:gsub('%[.-%]', p._delinkURL) -- De-link URLs.
end
if whitespaceYesno(args.whitespace) ~= false then
-- Replace single new lines with a single space, but leave double new lines
-- and new lines only containing spaces or tabs before a second new line.
text = text:gsub('([^\n \t][ \t]*)\n([ \t]*[^\n \t])', '%1 %2')
text = text:gsub('[ \t]+', ' ') -- Remove extra tabs and spaces.
end
return text
end
function p.delink(frame)
local args = frame:getParent().args
return p._delink(args)
end
return p