Jump to content

Module:WikitextParser: Difference between revisions

From Wikipedia, the free encyclopedia
Content deleted Content added
Refine paragraph matching
Make string calls explicit for easier debugging
Line 7: Line 7:
-- Private helper method to escape a string for use in regexes
-- Private helper method to escape a string for use in regexes
local function escapeString( str )
local function escapeString( str )
return str:gsub( '[%^%$%(%)%.%[%]%*%+%-%?%%]', '%%%0' )
return string.gsub( str, '[%^%$%(%)%.%[%]%*%+%-%?%%]', '%%%0' )
end
end


Line 16: Line 16:
function WikitextParser.getLead( wikitext )
function WikitextParser.getLead( wikitext )
wikitext = '\n' .. wikitext
wikitext = '\n' .. wikitext
wikitext = wikitext:gsub( '\n==.*', '' )
wikitext = string.gsub( wikitext, '\n==.*', '' )
wikitext = mw.text.trim( wikitext )
wikitext = mw.text.trim( wikitext )
return wikitext
return wikitext
Line 28: Line 28:
local sections = {}
local sections = {}
wikitext = '\n' .. wikitext .. '\n=='
wikitext = '\n' .. wikitext .. '\n=='
for title in wikitext:gmatch( '\n==+ *([^=]-) *==+' ) do
for title in string.gmatch( wikitext, '\n==+ *([^=]-) *==+' ) do
local section = wikitext:match( '\n==+ *' .. escapeString( title ) .. ' *==+(.-)\n==' )
local section = string.match( wikitext, '\n==+ *' .. escapeString( title ) .. ' *==+(.-)\n==' )
section = mw.text.trim( section )
section = mw.text.trim( section )
sections[ title ] = section
sections[ title ] = section
Line 45: Line 45:
title = escapeString( title )
title = escapeString( title )
wikitext = '\n' .. wikitext .. '\n'
wikitext = '\n' .. wikitext .. '\n'
local level, wikitext = wikitext:match( '\n(==+) *' .. title .. ' *==.-\n(.*)' )
local level, wikitext = string.match( wikitext, '\n(==+) *' .. title .. ' *==.-\n(.*)' )
if wikitext then
if wikitext then
local nextSection = '\n==' .. string.rep( '=?', #level - 2 ) .. '[^=].*'
local nextSection = '\n==' .. string.rep( '=?', #level - 2 ) .. '[^=].*'
wikitext = wikitext:gsub( nextSection, '' ) -- remove later sections at this level or higher
wikitext = string.gsub( wikitext, nextSection, '' ) -- remove later sections at this level or higher
wikitext = mw.text.trim( wikitext )
wikitext = mw.text.trim( wikitext )
return wikitext
return wikitext
Line 62: Line 62:
name = mw.text.trim( name )
name = mw.text.trim( name )
name = escapeString( name )
name = escapeString( name )
wikitext = wikitext:match( '< *section +begin *= *["\']? *' .. name .. ' *["\']? */>(.-)< *section +end= *["\']? *'.. name ..' *["\']? */>' )
wikitext = string.match( wikitext, '< *section +begin *= *["\']? *' .. name .. ' *["\']? */>(.-)< *section +end= *["\']? *'.. name ..' *["\']? */>' )
if wikitext then
if wikitext then
return mw.text.trim( wikitext )
return mw.text.trim( wikitext )
Line 74: Line 74:
local lists = {}
local lists = {}
wikitext = '\n' .. wikitext .. '\n\n'
wikitext = '\n' .. wikitext .. '\n\n'
for list in wikitext:gmatch( '\n([*#].-)\n[^*#]' ) do
for list in string.gmatch( wikitext, '\n([*#].-)\n[^*#]' ) do
table.insert( lists, list )
table.insert( lists, list )
end
end
Line 88: Line 88:
-- Remove non-paragraphs
-- Remove non-paragraphs
wikitext = '\n' .. wikitext .. '\n'
wikitext = '\n' .. wikitext .. '\n'
wikitext = wikitext:gsub( '\n[*#][^\n]*', '\n' ) -- remove lists
wikitext = string.gsub( wikitext, '\n[*#][^\n]*', '\n' ) -- remove lists
wikitext = wikitext:gsub( '\n%[%b[]%]\n', '\n' ) -- remove files and categories
wikitext = string.gsub( wikitext, '\n%[%b[]%]\n', '\n' ) -- remove files and categories
wikitext = wikitext:gsub( '\n%b{} *\n', '\n%0\n' ) -- add spacing between tables and block templates
wikitext = string.gsub( wikitext, '\n%b{} *\n', '\n%0\n' ) -- add spacing between tables and block templates
wikitext = wikitext:gsub( '\n%b{} *\n', '\n' ) -- remove tables and block templates
wikitext = string.gsub( wikitext, '\n%b{} *\n', '\n' ) -- remove tables and block templates
wikitext = wikitext:gsub( '\n==+[^=]+==+ *\n', '\n' ) -- remove section titles
wikitext = string.gsub( wikitext, '\n==+[^=]+==+ *\n', '\n' ) -- remove section titles
wikitext = mw.text.trim( wikitext )
wikitext = mw.text.trim( wikitext )


Line 108: Line 108:
function WikitextParser.getTemplates( wikitext )
function WikitextParser.getTemplates( wikitext )
local templates = {}
local templates = {}
for template in wikitext:gmatch( '{%b{}}' ) do
for template in string.gmatch( wikitext, '{%b{}}' ) do
if wikitext:sub( 1, 3 ) ~= '{{#' then -- skip parser functions like #if
if string.sub( wikitext, 1, 3 ) ~= '{{#' then -- skip parser functions like #if
table.insert( templates, template )
table.insert( templates, template )
end
end
Line 137: Line 137:
-- @todo Strip "Template:" namespace?
-- @todo Strip "Template:" namespace?
function WikitextParser.getTemplateName( templateWikitext )
function WikitextParser.getTemplateName( templateWikitext )
return templateWikitext:match( '^{{ *([^}|\n]+)' )
return string.match( templateWikitext, '^{{ *([^}|\n]+)' )
end
end


Line 147: Line 147:
local parameters = {}
local parameters = {}
local paramOrder = {}
local paramOrder = {}
local params = templateWikitext:match( '{{[^|}]-|(.*)}}' )
local params = string.match( templateWikitext, '{{[^|}]-|(.*)}}' )
if params then
if params then
-- Temporarily replace pipes in subtemplates and links to avoid chaos
-- Temporarily replace pipes in subtemplates and links to avoid chaos
for subtemplate in params:gmatch( '{%b{}}' ) do
for subtemplate in string.gmatch( params, '{%b{}}' ) do
params = params:gsub( escapeString( subtemplate ), subtemplate:gsub( '.', { ['%']='%%', ['|']="@@:@@", ['=']='@@_@@' } ) )
params = string.gsub( params, escapeString( subtemplate ), string.gsub( subtemplate, '.', { ['%']='%%', ['|']="@@:@@", ['=']='@@_@@' } ) )
end
end
for link in params:gmatch( '%[%b[]%]' ) do
for link in string.gmatch( params, '%[%b[]%]' ) do
params = params:gsub( escapeString( link ), link:gsub( '.', { ['%']='%%', ['|']='@@:@@', ['=']='@@_@@' } ) )
params = string.gsub( params, escapeString( link ), string.gsub( link, '.', { ['%']='%%', ['|']='@@:@@', ['=']='@@_@@' } ) )
end
end
local count = 0
local count = 0
Line 169: Line 169:
value = mw.text.trim( value )
value = mw.text.trim( value )
end
end
value = value:gsub( '@@_@@', '=' )
value = string.gsub( value, '@@_@@', '=' )
value = value:gsub( '@@:@@', '|' )
value = string.gsub( value, '@@:@@', '|' )
parameters[ name ] = value
parameters[ name ] = value
table.insert( paramOrder, name )
table.insert( paramOrder, name )
Line 185: Line 185:
local tag, tagName, tagEnd
local tag, tagName, tagEnd
-- Don't match closing tags like </div>, comments like <!--foo-->, comparisons like 1<2 or things like <3
-- Don't match closing tags like </div>, comments like <!--foo-->, comparisons like 1<2 or things like <3
for tagStart, tagOpen in wikitext:gmatch( '()(<[^/!%d].->)' ) do
for tagStart, tagOpen in string.gmatch( wikitext, '()(<[^/!%d].->)' ) do
tagName = WikitextParser.getTagName( tagOpen )
tagName = WikitextParser.getTagName( tagOpen )


-- If we're in a self-closing tag, like <ref name="foo" />, <references/>, <br/>, <br>, <hr>, etc.
-- If we're in a self-closing tag, like <ref name="foo" />, <references/>, <br/>, <br>, <hr>, etc.
if tagOpen:match( '<.-/>' ) or tagName == 'br' or tagName == 'hr' then
if string.match( tagOpen, '<.-/>' ) or tagName == 'br' or tagName == 'hr' then
tag = tagOpen
tag = tagOpen


Line 197: Line 197:
local depth = 1
local depth = 1
while depth > 0 do
while depth > 0 do
tagEnd = wikitext:match( '</ ?' .. tagName .. ' ?>()', position )
tagEnd = string.match( wikitext, '</ ?' .. tagName .. ' ?>()', position )
if tagEnd then
if tagEnd then
tagEnd = tagEnd - 1
tagEnd = tagEnd - 1
Line 203: Line 203:
break -- unclosed tag
break -- unclosed tag
end
end
position = wikitext:match( '()< ?' .. tagName .. '[ >]', position + 1 )
position = string.match( wikitext, '()< ?' .. tagName .. '[ >]', position + 1 )
if not position then
if not position then
position = tagEnd + 1
position = tagEnd + 1
Line 213: Line 213:
end
end
end
end
tag = wikitext:sub( tagStart, tagEnd )
tag = string.sub( wikitext, tagStart, tagEnd )


-- Else we're probably in tag that shouldn't contain others like it, like <math> or <strong>
-- Else we're probably in tag that shouldn't contain others like it, like <math> or <strong>
else
else
tagEnd = wikitext:match( '</ ?' .. tagName .. ' ?>()', tagStart )
tagEnd = string.match( wikitext, '</ ?' .. tagName .. ' ?>()', tagStart )
if tagEnd then
if tagEnd then
tag = wikitext:sub( tagStart, tagEnd - 1 )
tag = string.sub( wikitext, tagStart, tagEnd - 1 )


-- If no end tag is found, assume we matched something that wasn't a tag, like <no. 1>
-- If no end tag is found, assume we matched something that wasn't a tag, like <no. 1>
Line 235: Line 235:
-- @return Name of the tag or nil if not found
-- @return Name of the tag or nil if not found
function WikitextParser.getTagName( tagWikitext )
function WikitextParser.getTagName( tagWikitext )
local tagName = tagWikitext:match( '^< *(.-)[ />]' )
local tagName = string.match( tagWikitext, '^< *(.-)[ />]' )
if tagName then tagName = tagName:lower() end
if tagName then tagName = string.lower( tagName ) end
return tagName
return tagName
end
end
Line 245: Line 245:
-- @return Value of the attribute or nil if not found
-- @return Value of the attribute or nil if not found
function WikitextParser.getTagAttribute( tagWikitext, attribute )
function WikitextParser.getTagAttribute( tagWikitext, attribute )
return tagWikitext:match( '^< *.- *[^/>]*' .. attribute .. ' *= *["\']?([^"\'/>]+)["\']?[ />]' )
return string.match( tagWikitext, '^< *.- *[^/>]*' .. attribute .. ' *= *["\']?([^"\'/>]+)["\']?[ />]' )
end
end


Line 253: Line 253:
-- @todo May fail with nested tags
-- @todo May fail with nested tags
function WikitextParser.getTagContent( tagWikitext, attribute )
function WikitextParser.getTagContent( tagWikitext, attribute )
return tagWikitext:match( '^<.->.-</.->' )
return string.match( tagWikitext, '^<.->.-</.->' )
end
end


Line 307: Line 307:
local tables = {}
local tables = {}
wikitext = '\n' .. wikitext
wikitext = '\n' .. wikitext
for t in wikitext:gmatch( '\n%b{}' ) do
for t in string.gmatch( gmatch, '\n%b{}' ) do
if t:sub( 1, 3 ) == '\n{|' then
if string.sub( t, 1, 3 ) == '\n{|' then
t = mw.text.trim( t ) -- exclude the leading newline
t = mw.text.trim( t ) -- exclude the leading newline
table.insert( tables, t )
table.insert( tables, t )
Line 321: Line 321:
-- @return Value of the attribute or nil is not found
-- @return Value of the attribute or nil is not found
function WikitextParser.getTableAttribute( tableWikitext, attribute )
function WikitextParser.getTableAttribute( tableWikitext, attribute )
return tableWikitext:match( '^{|[^\n]*' .. attribute .. ' *= *["\']?([^"\'\n]+)["\']?[^\n]*\n' )
return string.match( tableWikitext, '^{|[^\n]*' .. attribute .. ' *= *["\']?([^"\'\n]+)["\']?[^\n]*\n' )
end
end


Line 344: Line 344:
local tableData = {}
local tableData = {}
tableWikitext = mw.text.trim( tableWikitext );
tableWikitext = mw.text.trim( tableWikitext );
tableWikitext = tableWikitext:gsub( '^{|.-\n', '' ) -- remove the header
tableWikitext = string.gsub( tableWikitext, '^{|.-\n', '' ) -- remove the header
tableWikitext = tableWikitext:gsub( '\n|}$', '' ) -- remove the footer
tableWikitext = string.gsub( tableWikitext, '\n|}$', '' ) -- remove the footer
tableWikitext = tableWikitext:gsub( '^|%+.-\n', '' ) -- remove any caption
tableWikitext = string.gsub( tableWikitext, '^|%+.-\n', '' ) -- remove any caption
tableWikitext = tableWikitext:gsub( '|%-.-\n', '|-\n' ) -- remove any row attributes
tableWikitext = string.gsub( tableWikitext, '|%-.-\n', '|-\n' ) -- remove any row attributes
tableWikitext = tableWikitext:gsub( '^|%-\n', '' ) -- remove any leading empty row
tableWikitext = string.gsub( tableWikitext, '^|%-\n', '' ) -- remove any leading empty row
tableWikitext = tableWikitext:gsub( '\n|%-$', '' ) -- remove any trailing empty row
tableWikitext = string.gsub( tableWikitext, '\n|%-$', '' ) -- remove any trailing empty row
for rowWikitext in mw.text.gsplit( '|-', true ) do
for rowWikitext in mw.text.gsplit( '|-', true ) do
local rowData = {}
local rowData = {}
rowWikitext = rowWikitext:gsub( '||', '\n|' )
rowWikitext = string.gsub( rowWikitext, '||', '\n|' )
rowWikitext = rowWikitext:gsub( '!!', '\n|' )
rowWikitext = string.gsub( rowWikitext, '!!', '\n|' )
rowWikitext = rowWikitext:gsub( '\n!', '\n|' )
rowWikitext = string.gsub( rowWikitext, '\n!', '\n|' )
rowWikitext = rowWikitext:gsub( '^!', '\n|' )
rowWikitext = string.gsub( rowWikitext, '^!', '\n|' )
rowWikitext = rowWikitext:gsub( '^\n|', '' )
rowWikitext = string.gsub( rowWikitext, '^\n|', '' )
for cellWikitext in mw.text.gsplit( rowWikitext, '\n|' ) do
for cellWikitext in mw.text.gsplit( rowWikitext, '\n|' ) do
cellWikitext = mw.text.trim( cellWikitext )
cellWikitext = mw.text.trim( cellWikitext )
Line 371: Line 371:
function WikitextParser.getLinks( wikitext )
function WikitextParser.getLinks( wikitext )
local links = {}
local links = {}
for link in wikitext:gmatch( '%[%b[]%]' ) do
for link in string.gmatch( wikitext, '%[%b[]%]' ) do
table.insert( links, link )
table.insert( links, link )
end
end
Line 384: Line 384:
local links = WikitextParser.getLinks( wikitext )
local links = WikitextParser.getLinks( wikitext )
for _, link in pairs( links ) do
for _, link in pairs( links ) do
local namespace = link:match( '^%[%[ *(.-) *:' )
local namespace = string.match( link, '^%[%[ *(.-) *:' )
if namespace and mw.site.namespaces[ namespace ] and mw.site.namespaces[ namespace ].canonicalName == 'File' then
if namespace and mw.site.namespaces[ namespace ] and mw.site.namespaces[ namespace ].canonicalName == 'File' then
table.insert( files, link )
table.insert( files, link )
Line 396: Line 396:
-- @return Name of the file
-- @return Name of the file
function WikitextParser.getFileName( fileWikitext )
function WikitextParser.getFileName( fileWikitext )
return fileWikitext:match( '^%[%[ *.- *: *(.-) *[]|]' )
return string.match( fileWikitext, '^%[%[ *.- *: *(.-) *[]|]' )
end
end


Line 406: Line 406:
local links = WikitextParser.getLinks( wikitext )
local links = WikitextParser.getLinks( wikitext )
for _, link in pairs( links ) do
for _, link in pairs( links ) do
local namespace = link:match( '^%[%[ -(.-) -:' )
local namespace = string.match( link, '^%[%[ -(.-) -:' )
if namespace and mw.site.namespaces[ namespace ] and mw.site.namespaces[ namespace ].canonicalName == 'Category' then
if namespace and mw.site.namespaces[ namespace ] and mw.site.namespaces[ namespace ].canonicalName == 'Category' then
table.insert( categories, link )
table.insert( categories, link )
Line 419: Line 419:
function WikitextParser.getExternalLinks( wikitext )
function WikitextParser.getExternalLinks( wikitext )
local links = {}
local links = {}
for link in wikitext:gmatch( '%b[]' ) do
for link in string.gmatch( wikitext, '%b[]' ) do
if link:match( '^%[//' ) or link:match( '^%[https?://' ) then
if string.match( link, '^%[//' ) or string.match( link, '^%[https?://' ) then
table.insert( links, link )
table.insert( links, link )
end
end

Revision as of 14:15, 9 May 2025

-- Module:WikitextParser is a general-purpose wikitext parser
-- Documentation and master version: https://en.wikipedia.org/wiki/Module:WikitextParser
-- Authors: User:Sophivorus, User:Certes, User:Aidan9382, et al.
-- License: CC-BY-SA-4.0
local WikitextParser = {}

-- Private helper method to escape a string for use in regexes
local function escapeString( str )
	return string.gsub( str, '[%^%$%(%)%.%[%]%*%+%-%?%%]', '%%%0' )
end

-- Get the lead section from the given wikitext
-- The lead section is any content before the first section title.
-- @param wikitext Required. Wikitext to parse.
-- @return Wikitext of the lead section. May be empty if the lead section is empty.
function WikitextParser.getLead( wikitext )
	wikitext = '\n' .. wikitext
	wikitext = string.gsub( wikitext, '\n==.*', '' )
	wikitext = mw.text.trim( wikitext )
	return wikitext
end

-- Get the sections from the given wikitext
-- This method doesn't get the lead section, use getLead for that
-- @param wikitext Required. Wikitext to parse.
-- @return Map from section title to section content
function WikitextParser.getSections( wikitext )
	local sections = {}
	wikitext = '\n' .. wikitext .. '\n=='
	for title in string.gmatch( wikitext, '\n==+ *([^=]-) *==+' ) do
		local section = string.match( wikitext, '\n==+ *' .. escapeString( title ) .. ' *==+(.-)\n==' )
		section = mw.text.trim( section )
		sections[ title ] = section
	end
	return sections
end

-- Get a section from the given wikitext (including any subsections)
-- If the given section title appears more than once, only the section of the first instance will be returned
-- @param wikitext Required. Wikitext to parse.
-- @param title Required. Title of the section
-- @return Wikitext of the section, or nil if it isn't found. May be empty if the section is empty or contains only subsections.
function WikitextParser.getSection( wikitext, title )
	title = mw.text.trim( title )
	title = escapeString( title )
	wikitext = '\n' .. wikitext .. '\n'
	local level, wikitext = string.match( wikitext, '\n(==+) *' .. title .. ' *==.-\n(.*)' )
	if wikitext then
		local nextSection = '\n==' .. string.rep( '=?', #level - 2 ) .. '[^=].*'
		wikitext = string.gsub( wikitext, nextSection, '' ) -- remove later sections at this level or higher
		wikitext = mw.text.trim( wikitext )
		return wikitext
	end
end

-- Get the content of a <section> tag from the given wikitext.
-- We can't use getTags because unlike all other tags, both opening and closing <section> tags are self-closing.
-- @param wikitext Required. Wikitext to parse.
-- @param name Required. Name of the <section> tag
-- @return Content of the <section> tag, or nil if it isn't found. May be empty if the section tag is empty.
function WikitextParser.getSectionTag( wikitext, name )
	name = mw.text.trim( name )
	name = escapeString( name )
	wikitext = string.match( wikitext, '< *section +begin *= *["\']? *' .. name .. ' *["\']? */>(.-)< *section +end= *["\']? *'.. name ..' *["\']? */>' )
	if wikitext then
		return mw.text.trim( wikitext )
	end
end

-- Get the lists from the given wikitext.
-- @param wikitext Required. Wikitext to parse.
-- @return Sequence of lists.
function WikitextParser.getLists( wikitext )
	local lists = {}
	wikitext = '\n' .. wikitext .. '\n\n'
	for list in string.gmatch( wikitext, '\n([*#].-)\n[^*#]' ) do
		table.insert( lists, list )
	end
	return lists
end

-- Get the paragraphs from the given wikitext.
-- @param wikitext Required. Wikitext to parse.
-- @return Sequence of paragraphs.
function WikitextParser.getParagraphs( wikitext )
	local paragraphs = {}

	-- Remove non-paragraphs
	wikitext = '\n' .. wikitext .. '\n'
	wikitext = string.gsub( wikitext, '\n[*#][^\n]*', '\n' ) -- remove lists
	wikitext = string.gsub( wikitext, '\n%[%b[]%]\n', '\n' ) -- remove files and categories
	wikitext = string.gsub( wikitext, '\n%b{} *\n', '\n%0\n' ) -- add spacing between tables and block templates
	wikitext = string.gsub( wikitext, '\n%b{} *\n', '\n' ) -- remove tables and block templates
	wikitext = string.gsub( wikitext, '\n==+[^=]+==+ *\n', '\n' ) -- remove section titles
	wikitext = mw.text.trim( wikitext )

	for paragraph in mw.text.gsplit( wikitext, '\n\n+' ) do
		if mw.text.trim( paragraph ) ~= '' then
			table.insert( paragraphs, paragraph )
		end
	end
	return paragraphs
end

-- Get the templates from the given wikitext.
-- @param wikitext Required. Wikitext to parse.
-- @return Sequence of templates.
function WikitextParser.getTemplates( wikitext )
	local templates = {}
	for template in string.gmatch( wikitext, '{%b{}}' ) do
		if string.sub( wikitext, 1, 3 ) ~= '{{#' then -- skip parser functions like #if
			table.insert( templates, template )
		end
	end
	return templates
end

-- Get the requested template from the given wikitext.
-- If the template appears more than once, only the first instance will be returned
-- @param wikitext Required. Wikitext to parse.
-- @param name Name of the template to get
-- @return Wikitext of the template, or nil if it wasn't found
function WikitextParser.getTemplate( wikitext, name )
	local templates = WikitextParser.getTemplates( wikitext )
	local lang = mw.language.getContentLanguage()
	for _, template in pairs( templates ) do
		local templateName = WikitextParser.getTemplateName( template )
		if lang:ucfirst( templateName ) == lang:ucfirst( name ) then
			return template
		end
	end
end

-- Get name of the template from the given template wikitext.
-- @param templateWikitext Required. Wikitext of the template to parse.
-- @return Name of the template
-- @todo Strip "Template:" namespace?
function WikitextParser.getTemplateName( templateWikitext )
	return string.match( templateWikitext, '^{{ *([^}|\n]+)' )
end

-- Get the parameters from the given template wikitext.
-- @param templateWikitext Required. Wikitext of the template to parse.
-- @return Map from parameter names to parameter values, NOT IN THE ORIGINAL ORDER.
-- @return Order in which the parameters were parsed.
function WikitextParser.getTemplateParameters( templateWikitext )
	local parameters = {}
	local paramOrder = {}
	local params = string.match( templateWikitext, '{{[^|}]-|(.*)}}' )
	if params then
		-- Temporarily replace pipes in subtemplates and links to avoid chaos
		for subtemplate in string.gmatch( params, '{%b{}}' ) do
			params = string.gsub( params, escapeString( subtemplate ), string.gsub( subtemplate, '.', { ['%']='%%', ['|']="@@:@@", ['=']='@@_@@' } ) )
		end
		for link in string.gmatch( params, '%[%b[]%]' ) do
			params = string.gsub( params, escapeString( link ), string.gsub( link, '.', { ['%']='%%', ['|']='@@:@@', ['=']='@@_@@' } ) )
		end
		local count = 0
		local parts, name, value
		for param in mw.text.gsplit( params, '|' ) do
			parts = mw.text.split( param, '=' )
			name = mw.text.trim( parts[1] )
			if #parts == 1 then
				value = name
				count = count + 1
				name = count
			else
				value = table.concat( parts, '=', 2 );
				value = mw.text.trim( value )
			end
			value = string.gsub( value, '@@_@@', '=' )
			value = string.gsub( value, '@@:@@', '|' )
			parameters[ name ] = value
			table.insert( paramOrder, name )
		end
	end
	return parameters, paramOrder
end

-- Get the tags from the given wikitext.
-- @param wikitext Required. Wikitext to parse.
-- @return Sequence of tags.
function WikitextParser.getTags( wikitext )
	local tags = {}
	local tag, tagName, tagEnd
	-- Don't match closing tags like </div>, comments like <!--foo-->, comparisons like 1<2 or things like <3
	for tagStart, tagOpen in string.gmatch( wikitext, '()(<[^/!%d].->)' ) do
		tagName = WikitextParser.getTagName( tagOpen )

		-- If we're in a self-closing tag, like <ref name="foo" />, <references/>, <br/>, <br>, <hr>, etc.
		if string.match( tagOpen, '<.-/>' ) or tagName == 'br' or tagName == 'hr' then
			tag = tagOpen

		-- If we're in a tag that may contain others like it, like <div> or <span>
		elseif tagName == 'div' or tagName == 'span' then
			local position = tagStart + #tagOpen - 1
			local depth = 1
			while depth > 0 do
				tagEnd = string.match( wikitext, '</ ?' .. tagName .. ' ?>()', position )
				if tagEnd then
					tagEnd = tagEnd - 1
				else
					break -- unclosed tag
				end 
				position = string.match( wikitext, '()< ?' .. tagName .. '[ >]', position + 1 )
				if not position then
					position = tagEnd + 1
				end
				if position > tagEnd then
					depth = depth - 1
				else
					depth = depth + 1
				end
			end
			tag = string.sub( wikitext, tagStart, tagEnd )

		-- Else we're probably in tag that shouldn't contain others like it, like <math> or <strong>
		else
			tagEnd = string.match( wikitext, '</ ?' .. tagName .. ' ?>()', tagStart )
			if tagEnd then
				tag = string.sub( wikitext, tagStart, tagEnd - 1 )

			-- If no end tag is found, assume we matched something that wasn't a tag, like <no. 1>
			else
				tag = nil
			end
		end
		table.insert( tags, tag )
	end
	return tags
end

-- Get the name of the tag in the given wikitext
-- @param tag Required. Tag to parse.
-- @return Name of the tag or nil if not found
function WikitextParser.getTagName( tagWikitext )
	local tagName = string.match( tagWikitext, '^< *(.-)[ />]' )
	if tagName then tagName = string.lower( tagName ) end
	return tagName
end

-- Get the value of an attribute in the given tag.
-- @param tagWikitext Required. Wikitext of the tag to parse.
-- @param attribute Required. Name of the attribute.
-- @return Value of the attribute or nil if not found
function WikitextParser.getTagAttribute( tagWikitext, attribute )
	return string.match( tagWikitext, '^< *.- *[^/>]*' .. attribute .. ' *= *["\']?([^"\'/>]+)["\']?[ />]' )
end

-- Get the content of the given tag.
-- @param tagWikitext Required. Wikitext of the tag to parse.
-- @return Content of the tag. May be empty if the tag is empty. Will be nil if the tag is self-closing.
-- @todo May fail with nested tags
function WikitextParser.getTagContent( tagWikitext, attribute )
	return string.match( tagWikitext, '^<.->.-</.->' )
end

-- Get the <gallery> tags from the given wikitext.
-- @param wikitext Required. Wikitext to parse.
-- @return Sequence of gallery tags.
function WikitextParser.getGalleries( wikitext )
	local galleries = {}
	local tags = WikitextParser.getTags( wikitext )
	for _, tag in pairs( tags ) do
		local tagName = WikitextParser.getTagName( tag )
		if tagName == 'gallery' then
			table.insert( galleries, tag )
		end
	end
	return galleries
end

-- Get the <ref> tags from the given wikitext.
-- @param wikitext Required. Wikitext to parse.
-- @return Sequence of ref tags.
function WikitextParser.getReferences( wikitext )
	local references = {}
	local tags = WikitextParser.getTags( wikitext )
	for _, tag in pairs( tags ) do
		local tagName = WikitextParser.getTagName( tag )
		if tagName == 'ref' then
			table.insert( references, tag )
		end
	end
	return references
end

-- Get the reference with the given name from the given wikitext.
-- @param wikitext Required. Wikitext to parse.
-- @param referenceName Required. Name of the reference.
-- @return Wikitext of the reference
function WikitextParser.getReference( wikitext, referenceName )
	local references = WikitextParser.getReferences( wikitext )
	for _, reference in pairs( references ) do
		local content = WikitextParser.getTagContent( reference )
		local name = WikitextParser.getTagAttribute( reference, 'name' )
		if content and name == referenceName then
			return reference
		end
	end
end

-- Get the tables from the given wikitext.
-- @param wikitext Required. Wikitext to parse.
-- @return Sequence of tables.
function WikitextParser.getTables( wikitext )
	local tables = {}
	wikitext = '\n' .. wikitext
	for t in string.gmatch( gmatch, '\n%b{}' ) do
		if string.sub( t, 1, 3 ) == '\n{|' then
			t = mw.text.trim( t ) -- exclude the leading newline
			table.insert( tables, t )
		end
	end
	return tables
end

-- Get the id from the given table wikitext
-- @param tableWikitext Required. Wikitext of the table to parse.
-- @param attribute Required. Name of the attribute.
-- @return Value of the attribute or nil is not found
function WikitextParser.getTableAttribute( tableWikitext, attribute )
	return string.match( tableWikitext, '^{|[^\n]*' .. attribute .. ' *= *["\']?([^"\'\n]+)["\']?[^\n]*\n' )
end

-- Get a table by id from the given wikitext
-- @param wikitext Required. Wikitext to parse.
-- @param id Required. Id of the table
-- @return Wikitext of the table or nil if not found
function WikitextParser.getTable( wikitext, id )
	local tables = WikitextParser.getTables( wikitext )
	for _, t in pairs( tables ) do
		if id == WikitextParser.getTableAttribute( t, 'id' ) then
			return t
		end
	end
end

-- Get the data from the given table wikitext
-- @param tableWikitext Required. Wikitext of the table to parse.
-- @return Table data
-- @todo Test and make more robust
function WikitextParser.getTableData( tableWikitext )
	local tableData = {}
	tableWikitext = mw.text.trim( tableWikitext );
	tableWikitext = string.gsub( tableWikitext, '^{|.-\n', '' ) -- remove the header
	tableWikitext = string.gsub( tableWikitext, '\n|}$', '' ) -- remove the footer
	tableWikitext = string.gsub( tableWikitext, '^|%+.-\n', '' ) -- remove any caption
	tableWikitext = string.gsub( tableWikitext, '|%-.-\n', '|-\n' ) -- remove any row attributes
	tableWikitext = string.gsub( tableWikitext, '^|%-\n', '' ) -- remove any leading empty row
	tableWikitext = string.gsub( tableWikitext, '\n|%-$', '' ) -- remove any trailing empty row
	for rowWikitext in mw.text.gsplit( '|-', true ) do
		local rowData = {}
		rowWikitext = string.gsub( rowWikitext, '||', '\n|' )
		rowWikitext = string.gsub( rowWikitext, '!!', '\n|' )
		rowWikitext = string.gsub( rowWikitext, '\n!', '\n|' )
		rowWikitext = string.gsub( rowWikitext, '^!', '\n|' )
		rowWikitext = string.gsub( rowWikitext, '^\n|', '' )
		for cellWikitext in mw.text.gsplit( rowWikitext, '\n|' ) do
			cellWikitext = mw.text.trim( cellWikitext )
			table.insert( rowData, cellWikitext )
		end
		table.insert( tableData, rowData )
	end
	return tableData
end

-- Get the internal links from the given wikitext (includes category and file links).
-- @param wikitext Required. Wikitext to parse.
-- @return Sequence of internal links.
function WikitextParser.getLinks( wikitext )
	local links = {}
	for link in string.gmatch( wikitext, '%[%b[]%]' ) do
		table.insert( links, link )
	end
	return links
end

-- Get the file links from the given wikitext.
-- @param wikitext Required. Wikitext to parse.
-- @return Sequence of file links.
function WikitextParser.getFiles( wikitext )
	local files = {}
	local links = WikitextParser.getLinks( wikitext )
	for _, link in pairs( links ) do
		local namespace = string.match( link, '^%[%[ *(.-) *:' )
		if namespace and mw.site.namespaces[ namespace ] and mw.site.namespaces[ namespace ].canonicalName == 'File' then
			table.insert( files, link )
		end
	end
	return files
end

-- Get name of the file from the given file wikitext.
-- @param fileWikitext Required. Wikitext of the file to parse.
-- @return Name of the file
function WikitextParser.getFileName( fileWikitext )
	return string.match( fileWikitext, '^%[%[ *.- *: *(.-) *[]|]' )
end

-- Get the category links from the given wikitext.
-- @param wikitext Required. Wikitext to parse.
-- @return Sequence of category links.
function WikitextParser.getCategories( wikitext )
	local categories = {}
	local links = WikitextParser.getLinks( wikitext )
	for _, link in pairs( links ) do
		local namespace = string.match( link, '^%[%[ -(.-) -:' )
		if namespace and mw.site.namespaces[ namespace ] and mw.site.namespaces[ namespace ].canonicalName == 'Category' then
			table.insert( categories, link )
		end
	end
	return categories
end

-- Get the external links from the given wikitext.
-- @param wikitext Required. Wikitext to parse.
-- @return Sequence of external links.
function WikitextParser.getExternalLinks( wikitext )
	local links = {}
	for link in string.gmatch( wikitext, '%b[]' ) do
		if string.match( link, '^%[//' ) or string.match( link, '^%[https?://' ) then
			table.insert( links, link )
		end
	end
	return links
end

return WikitextParser