Jump to content

Module:Ref info/data

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Trappist the monk (talk | contribs) at 23:19, 9 May 2020. The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
--[[--------------------------< C S 1 _ T E M P L A T E _ P A T T E R N S >------------------------------------

These are patterns for cs1 templates and their redirects.  These patterns exclude redirects that are vcite-like
which redirects should be deleted because vcite is not cs1.

]]

local cs1_template_patterns = {													-- lua patterns of the cannonical names and redirects
	'[Cc]ite ar[Xx]iv',															-- arXiv is the canonical name
		'[Cc]ite ArXiv',

	'[Cc]ite AV media',															-- canonical
		'[Cc]ite audio', '[Cc]ite AV', '[Cc]ite AV ?Media', '[Cc]ite av media', 
		'[Cc]ite cd', '[Cc]ite DVD', '[Cc]ite dvd', '[Cc]ite film',
		'[Cc]ite image', '[Cc]ite media', '[Cc]ite movie',
		'[Cc]ite music video', '[Cc]ite radio', '[Cc]ite song',
		'[Cc]ite ?video', '[Cc]ite visual', '[Cc]ite You[Tt]ube',
		'[Cc]ita vídeo',														-- non-English redirect; TODO: tally separately?

	'[Cc]ite AV media notes',													-- canonical
		'[Cc]ite album[ %-]notes', '[Cc]ite av media notes',
		'[Cc]ite DVD[ %-]notes', '[Cc]ite dvd%-notes', '[Cc]ite liner notes',
		'[Cc]ite music release notes', '[Ll]iner notes',

	'[Cc]ite bio[Rr]xiv',														-- bioRxiv is the canonical form
	'[Cc]ite [Bb]ook',															-- book is the canonical form
		'[Bb]ook cite', '[Bb]ook reference', '[Bb]ook reference url',
		'[Cc] book', '[Cc]it book', '[Cc]ite books', '[Cc]ite chapter',
		'[Cc]ite ebook', '[Cc]ite manual', '[Cc]ite page',
		'[Cc]ite publication', '[Cc]ite score',
		'[Cc]ite work', '[Cc]ite%-?book', 
		'[Bb]okref', '[Cc]itace monografie', '[Cc]itar livro',					-- non-English redirects; TODO: tally separately?
		'[Cc]iteer boek', '[Oo]uvrage', '[Rr]ef%-llibre', '서적 인용',

	'[Cc]ite citeseerx',														-- canonical
	'[Cc]ite conference',														-- canonical
		'[Cc]ita conferenza', '[Cc]ite proceedings', '[Cc]onference reference',	-- cita conferenza is non-English; TODO: tally separately?
		
	'[Cc]ite ?encyclopedia',													-- cite encyclopedia is the canonical name
		'[Cc]ite contribution', '[Cc]ite dic', '[Cc]ite dictionary',
		'[Cc]ite encyclopaedia', '[Cc]ite encyclopædia', '[Ee]ncyclopedia',

	'[Cc]ite [Ee]pisode',														-- episode is the canonical form
		'[Cc]ite show',

	'[Cc]ite interview',														-- canonical
	'[Cc]ite ?journal',															-- cite journal is the canonical form
		'[Cc] journal', '[Cc]itation journal', '[Cc]ite abstract',
		'[Cc]ite document', '[Cc]ite Journal', '[Cc]ite journal zh',
		'[Cc]ite ?paper', '[Vv]cite2 journal',
		'[Cc]ita pubblicazione', '[Cc]itace periodika', '[Cc]itar jornal',		-- non-English redirects; TODO: tally separately?
		'[Cc]itar publicació', '[Cc]ytuj pismo', '[Tt]idskriftsref',
		'Навод из стручног часописа', '저널 인용',

	'[Cc]ite [Mm]agazine',														-- magazine is the canonical form
		'[Cc]ite mag', '[Cc]ite magazine article', '[Cc]ite newsletter',
		'[Cc]ite periodical',

	'[Cc]ite mailing ?list',													-- mailing list is the canonical form
		'[Cc]ite list',

	'[Cc]ite maps?',															-- map is the canonical form
	'[Cc]ite[ %-]?news',														-- cite news is the canonical form
		'[Cc] news', '[Cc]it news', '[Cc]itation news', '[Cc]ite article',
		'[Cc]ite n', '[Cc]ite new', '[Cc]ite newspaper', '[Cc]ite News',
		'[Cc]ite news%-q', '[Cc]ite news2', '[Cc]itenewsauthor', '[Cc]ute news',
		'[Cc]itar notícia', '[Hh]aber kaynağı', '[Tt]idningsref', 'استشهاد بخبر',	-- non-English redirects; TODO: tally separately?
		'뉴스 인용',

	'[Cc]ite newsgroup',														-- canonical
		'[Cc]ite usenet',
		
	'[Cc]ite podcast',															-- canonical
	'[Cc]ite [Pp]ress release',													-- press release is the canonical form
		'[Cc]ite media release', '[Cc]ite news release', '[Cc]ite pr',
		'[Cc]ite press', '[Cc]ite press release\.', '[Cc]ite press[%-]?release',

	'[Cc]ite report',															-- canonical
	'[Cc]ite serial',															-- canonical
	'[Cc]ite sign',																-- canonical
		'[Cc]ite plaque',

	'[Cc]ite speech',															-- canonical
	'[Cc]ite ssrn',																-- canonical
		'[Cc]ite SSRN',
		
	'[Cc]ite tech ?report',														-- techreport is the canonical form
		'[Cc]ite standard', '[Cc]ite technical report', '[Tt]echrep reference',

	'[Cc]ite thesis',															-- canonical
		'[Cc]ite dissertation',
		'[Cc]itar tese',														-- non-English redirect; TODO: tally separately?

	'[Cc]ite [Ww]eb',															-- web is the canonical form
		'[Cc] web', '[Cc]it web', '[Cc]ite blog', '[Cc]ite URL', '[Cc]ite url',
		'[Cc]ite w', '[Cc]ite wb', '[Cc]ite we', '[Cc]ite web\.',
		'[Cc]ite webpage', '[Cc]ite website', '[Cc]ite website article',
		'[Cc]ite%-?web', '[Cc]itweb', '[Cc]w', '[Rr]ef web', '[Ww]eb citation',
		'[Ww]eb cite', '[Ww]eb link', '[Ww]eb[ %-]reference', '[Ww]eblink',
		'[Cc]hú thích web', '[Cc]ita web', '[Cc]itace elektronické monografie',	-- non-English redirects; TODO: tally separately?
		'[Cc]itat web', 'مرجع ويب', 'یادکرد وب', '웹 인용',
	}


--[[--------------------------< C S 2 _ T E M P L A T E _ P A T T E R N S >------------------------------------

These are patterns for cs2 templates redirects.

]]

local cs2_template_patterns = {													-- lua patterns of the cannonical names and redirects
	'[Cc]itation',
		'[Cc]ite', '[Cc]ite citation', '[Cc]ite study',
		'[Cc]ite [Tt]echnical standard',
	}


--[[--------------------------< C S 1 2 _ S T R I P P E D _ L I S T >------------------------------------------

This table is created from cs1_template_patterns and cs2_template_patterns.  To make this table, entries in
the source tables are evaluated to replace lua patterns with the appropriate characters and create names for
this list.  For example:
	[Cc]ite ar[Xx]iv
becomes
	Cite arXiv
	Cite arxiv
	cite arXiv
	cite arxiv

]]

local cs12_stripped_list = {};
	local function add_stripped (name)
		if not cs12_stripped_list[name] then
			cs12_stripped_list[name] = true;
		end
	end


	local function pattern_convert (name)
		if name:match ('(.-)%[(%a)(%a)%](.*)') then								-- mixed case optional letters
			local l;
			lead, c, l, tail = name:match ('(.-)%[(%a)(%a)%](.*)');
			add_stripped (lead .. c .. tail);									-- uppercase
			add_stripped (lead .. l .. tail);									-- lowercase
	
		elseif name:match ('^([^%[]+)(%[ %%%-%]%?)(.+)$') then					-- [ %-]?
			lead, c, tail = name:match ('^([^%[]+)(%[ %%%-%]%?)(.+)$');
			add_stripped (lead .. tail);										-- neither char
			add_stripped (lead .. ' ' .. tail);									-- space
			add_stripped (lead .. '-' .. tail);									-- hyphen
	
		elseif name:match ('^([^%[]+)(%[%%%-%]%?)(.+)$') then					-- [%-]?
			lead, c, tail = name:match ('^([^%[]+)(%[%%%-%]%?)(.+)$');
			add_stripped (lead .. tail);										-- no hyphen
			add_stripped (lead .. '-' .. tail);									-- hyphen
	
		elseif name:match ('^([^%[]+)(%[ %%%-%])(.+)$') then					-- [ %-]
			lead, c, tail = name:match ('^([^%[]+)(%[ %%%-%])(.+)$');
			add_stripped (lead .. ' ' .. tail);									-- space
			add_stripped (lead .. '-' .. tail);									-- hyphen
	
		elseif name:match ('^([^%?]+)(%%%-%?)(.+)$') then						-- %-?
			lead, c, tail = name:match ('^([^%?]+)(%%%-%?)(.+)$');
			add_stripped (lead .. tail);										-- no hyphen
			add_stripped (lead .. '-' .. tail);									-- hyphen
	
		elseif name:match ('^(.-)(%%%-)(.+)$') then								-- %-
			lead, c, tail = name:match ('^(.-)(%%%-)(.+)$');
			add_stripped (lead .. '-' .. tail);									-- hyphen

		elseif name:match ('^(.-)(.)%?(.*)$') then								-- .?
			lead, c, tail = name:match ('^(.-)(.)%?(.*)$');
			add_stripped (lead .. tail);										-- no character
			add_stripped (lead .. c .. tail);									-- character
	
		else
			add_stripped (name);												-- no patterns so save as is
		end
	end


	for _, list in ipairs ({cs1_template_patterns, cs2_template_patterns}) do
		for _, pattern in ipairs (list) do
			local name = pattern:gsub ('^%[(%a)%a%]', '%1');					-- leading character (usually uppercase)
			pattern_convert (name);
			name = pattern:gsub ('^%[%a(%a)%]', '%1');							-- leading character (usually lowercase)
			pattern_convert (name);
		end
	end


--[[--------------------------< H A R V _ T E M P L A T E _ P A T T E R N S >----------------------------------

These are patterns for the harv family of templates and their redirects.

]]

local harv_template_patterns = {												-- lua patterns of the cannonical names and redirects
	'[Hh]arvard citation no brackets',											-- canonical
		'[Hh]arnvb', '[Hh]arvardnb', '[Hh]arvnb',

	'[Hh]arvard citation',														-- canonical
		'[Hh]arv', '[Hh]arvsp',

	'[Hh]arvard citation text',													-- canonical
		'[Hh]arvtxt',

	'[Hh]arvcoltxt',															-- canonical
	'[Hh]arvcol',																-- canonical
		'[Hh]rvcoln',

	'[Hh]arvcolnb',																-- canonical
	'[Hh]arvard citations',														-- canonical
		'[Hh]arvs',
		
	'[Hh]arvp',																	-- canonical
	};


--[[--------------------------< S F N _ T E M P L A T E _ P A T T E R N S >------------------------------------

These are patterns for the sfn family of templates and their redirects.

]]

local sfn_template_patterns = {													-- lua patterns of the cannonical names and redirects
	'[Ss]fn',																	-- canonical
		'[Hh]f', '[Ss]f', '[Ss]hortened footnote',
		'[Ss]hortened footnote template', '[Ss]nf',
	
	'[Ss]fnp',																	-- canonical
		'[Ss]fb', '[Ss]fnb',
	
	'[Ss]fnm',																	-- canonical
	
	'[Ss]fnmp',																	-- canonical	
	};


--[[--------------------------< R E F L I S T _ T E M P L A T E _ P A T T E R N S >----------------------------

These are patterns for the reflist template and its redirects.

]]

local reflist_template_patterns = {
	'[Rr]ef[Ll]ist',															-- reflist is the canonical form
		'[Ff]ootnotes?', '[Ff]ootnotesSmall', '[Rr]ealist', '[Rr]ef [Ll]ist',
		'[Rr]ef%-list', '[Rr]eference', '[Rr]eference list', '[Rr]efIist',
		'[Rr]EFLIST', '[Rr]efs', '[Rr]FS', '[Rr]fs',
		'[Jj]egyzetek', '[Ll]istaref', '[Rr]efer[eê]ncias', '[Rr]éférences',	-- non-English redirects
		'[Tt]ham khảo', 'Примечания', 'مراجع', 'پانویس',
	};


--[[--------------------------< C L E A N U P _ T E M P L A T E _ P A T T E R N S >----------------------------

These are patterns for some of the cleanup templates and their redirects.

]]

local cleanup_template_patterns = {
	'[Cc]itation [Nn]eeded',													-- Citation needed is the canonical form
		'[Aa]re you sure%?', '[Cc][Bb]', '[Cc]cn', '[Cc]iation needed', '[Cc]it',
		'[Cc]itaiton needed', '[Cc]itation missing', '[Cc]itation need',
		'[Cc]itation [Rr]equested', '[Cc]itation ?required',
		'[Cc]itation[-Nn]?eeded', '[Cc]itationeeded', '[Cc]ite missing',
		'[Cc]ite[ %-]?needed', '[Cc]ite source', '[Cc]itesource', '[Cc]itn',
		'[Cc]N', '[Cc]n', '[Cc]tn', '[Ff]ACT', '[Ff]act[s%?]?',
		'[Ff]citation needed', '[Mm]e%-fact', '[Nn]eed [Cc]itation',
		'[Nn]eed sources', '[Nn]eed%-ref', '[Nn]eedcitation', '[Nn]eedcite',
		'[Nn]eeds citations?', '[Nn]eeds reference', '[Nn]eedsref',
		'[Nn]o source given', '[Pp]ROV%-statement', '[Pp]rove ?it',
		'[Rr]ef%-?needed', '[Rr]ef%?', '[Rr]eference needed', '[Rr]efplease',
		'[Rr]equest [Cc]itation', '[Rr]éférence nécessaire', '[Ss]ource needed',
		'[Ss]ource%?', '[Ss]ourceme', '[Uu]ncited', '[Uu]nreferenced inline',
		'[Uu]nsourced%-inline',
		'[Cc]ita requerida', '[Cc]itazione necessaria', '[Kk]älla behövs',		-- non-English redirects

	'[Dd]isputed[ %-]inline',													-- Disputed inline is canonical form
		'[Dd]ispute[ %-]inline',

	'[Dd]ubious',																-- canonical
		'[Dd]ebatable', '[Dd]isputable', '[Dd]isputed?Assertion',
		'[Dd]isputed Point', '[Dd]oubtful', '[Dd]UB', '[Dd]ub',
		'[Dd]ubious %- [Dd]iscuss', '[Dd]ubious [Ii]nline', 
		'[Dd]ubious%-inline', '[Oo]dd',
		'[Ii]frågasatt uppgift',												-- non-English redirect

	'[Ff]ailed ?verification',													-- Failed verification is the canonical form
		'[Bb]adref', '[Ff]ailed ref', '[Ff]ailed reference',
		'[Ff]ails verification', '[Ff][Vv]', '[Nn]cg', '[Nn]ICG', '[Nn]icg', 
		'[Nn]IGC', '[Nn]igc', '[Nn]ot in citation', '[Nn]ot in citation given',
		'[Nn]ot in cited source', '[Nn]ot in ref', '[Nn]ot in ref given',
		'[Nn]ot in reference', '[Nn]ot in reference given', '[Nn]ot in source',
		'[Nn]ot in source given', '[Nn]ot specifically in source', 
		'[Nn]otincitation', '[Nn]otincitationgiven', '[Nn]otinref', 
		'[Nn]otinsource', '[Nn]otinsourcegiven', '[Vv]erification[ %-]failed',
	};


--[[--------------------------< D E A D _ L I N K _ T E M P L A T E _ P A T T E R N S >------------------------

These are patterns for the dead link template and its redirects.

]]

local dead_link_template_patterns = {
	'[Dd]ead [Ll]ink',															-- Dead link is the canonical form
		'404', '[Bb]ad ?link', '[Bb]roken', '[Bb]roken ?link', '[Dd]ead',
		'[Dd]ead cite', '[Dd]ead link%-now', '[Dd]ead links', '[Dd]ead page',
		'[Dd]ead URL', '[Dd]ead[ %-]?url', '[Dd]ead%-inline', '[Dd]ead%-link',
		'[Dd]eadcite', '[Dd]eadlinks?', '[Dd][Ll]', '[Dd]l%-now',
		'[Ll]ink ?broken',
		'[Cc]ollegamento interrotto', '[Tt]oter Link', 'Недоступная ссылка',	-- non-English redirects
	};


--[[--------------------------< W E B A R C H I V E _ T E M P L A T E _ P A T T E R N S >----------------------

These are patterns for the webarchive template and its redirects.

]]

local webarchive_template_patterns = {
	'[Ww]eb ?archive',															-- Webarchive is the canonical form
		'[Aa]rchive url', '[Ii]AWM', '[Ii]awm', '[Uu]rl archive', '[Ww]ayBack',
		'[Ww]aybackdate', '[Ww]ebarchiv', '[Ww]ebcitation',
	};


--[[--------------------------< R E F B E G I N _ T E M P L A T E _ P A T T E R N S >--------------------------

These are patterns for the refbegin template and its redirects.

]]

local refbegin_template_patterns = {
	'[Rr]ef ?begin',															-- Refbegin is the canonical form
		'[Bb]eginref', '[Ss]ourcesstart', '[Ss]ourcestart',
	}


--[[--------------------------< R P _ T E M P L A T E _ P A T T E R N S >--------------------------------------

These are patterns for the rp template and its redirects.

]]

local rp_template_patterns = {
	'[Rr][Pp]',																	-- Rp is the canonical form
		'[Pp]age reference',
		'[Rr]efpages?',
	}


--[[--------------------------< C S 1 | 2   A U T H O R - N A M E _ P A T T E R N S >--------------------------

These are patterns for the cs1|2 author-name parameters.

]]

local authors_param_patterns = {'authors', 'people', 'credits'};
local author_param_patterns = {'author1?', 'host1?', 'subject1?'};
local last_param_patterns = {'last1?', 'author1?%-last1?', 'surname1?'};


--[[--------------------------< E X P O R T S >----------------------------------------------------------------
]]

return
	{
	author_param_patterns = author_param_patterns,
	authors_param_patterns = authors_param_patterns,
	cleanup_template_patterns = cleanup_template_patterns,
	cs1_template_patterns = cs1_template_patterns,
	cs12_stripped_list = cs12_stripped_list,
	cs2_template_patterns = cs2_template_patterns,
	dead_link_template_patterns = dead_link_template_patterns,
	harv_template_patterns = harv_template_patterns,
	last_param_patterns = last_param_patterns,
	refbegin_template_patterns = refbegin_template_patterns,
	reflist_template_patterns = reflist_template_patterns,
	rp_template_patterns = rp_template_patterns,
	sfn_template_patterns = sfn_template_patterns,
	webarchive_template_patterns = webarchive_template_patterns,
	}