Jump to content

Module:Ref info/data

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Trappist the monk (talk | contribs) at 13:32, 9 May 2020. The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
--[[--------------------------< C S 1 _ T E M P L A T E _ P A T T E R N S >------------------------------------

These are patterns for cs1 templates and their redirects.  These patterns exclude redirects that are vcite-like
which redirects should be deleted because vcite is not cs1.

]]

local cs1_template_patterns = {													-- lua patterns of the cannonical names and redirects
	'[Cc]ite ar[Xx]iv',															-- arXiv is the canonical name
		'[Cc]ite ArXiv',

	'[Cc]ite AV media',															-- canonical
		'[Cc]ite audio', '[Cc]ite AV', '[Cc]ite AV ?Media', '[Cc]ite av media', 
		'[Cc]ite cd', '[Cc]ite DVD', '[Cc]ite dvd', '[Cc]ite film',
		'[Cc]ite image', '[Cc]ite media', '[Cc]ite movie',
		'[Cc]ite music video', '[Cc]ite radio', '[Cc]ite song',
		'[Cc]ite ?video', '[Cc]ite visual', '[Cc]ite You[Tt]ube',
		'[Cc]ita vídeo',														-- non-English redirect; TODO: tally separately?

	'[Cc]ite AV media notes',													-- canonical
		'[Cc]ite album[ %-]notes', '[Cc]ite av media notes',
		'[Cc]ite DVD[ %-]notes', '[Cc]ite dvd%-notes', '[Cc]ite liner notes',
		'[Cc]ite music release notes', '[Ll]iner notes',

	'[Cc]ite bio[Rr]xiv',														-- bioRxiv is the canonical form
	'[Cc]ite [Bb]ook',															-- book is the canonical form
		'[Bb]ook cite', '[Bb]ook reference', '[Bb]ook reference url',
		'[Cc] book', '[Cc]it book', '[Cc]ite books', '[Cc]ite chapter',
		'[Cc]ite ebook', '[Cc]ite manual', '[Cc]ite page',
		'[Cc]ite publication', '[Cc]ite score',
		'[Cc]ite work', '[Cc]ite%-?book', 
		'[Bb]okref', '[Cc]itace monografie', '[Cc]itar livro',					-- non-English redirects; TODO: tally separately?
		'[Cc]iteer boek', '[Oo]uvrage', '[Rr]ef-llibre', '서적 인용',

	'[Cc]ite citeseerx',														-- canonical
	'[Cc]ite conference',														-- canonical
		'[Cc]ita conferenza', '[Cc]ite proceedings', '[Cc]onference reference',	-- cita conferenza is non-English; TODO: tally separately?
		
	'[Cc]ite ?encyclopedia',													-- cite encyclopedia is the canonical name
		'[Cc]ite contribution', '[Cc]ite dic', '[Cc]ite dictionary',
		'[Cc]ite encyclopaedia', '[Cc]ite encyclopædia', '[Ee]ncyclopedia',

	'[Cc]ite [Ee]pisode',														-- episode is the canonical form
		'[Cc]ite show',

	'[Cc]ite interview',														-- canonical
	'[Cc]ite ?journal',															-- cite journal is the canonical form
		'[Cc] journal', '[Cc]itation journal', '[Cc]ite abstract',
		'[Cc]ite document', '[Cc]ite Journal', '[Cc]ite journal zh',
		'[Cc]ite ?paper', '[Vv]cite2 journal',
		'[Cc]ita pubblicazione', '[Cc]itace periodika', '[Cc]itar jornal',		-- non-English redirects; TODO: tally separately?
		'[Cc]itar publicació', '[Cc]ytuj pismo', '[Tt]idskriftsref',
		'Навод из стручног часописа', '저널 인용',

	'[Cc]ite [Mm]agazine',														-- magazine is the canonical form
		'[Cc]ite mag', '[Cc]ite magazine article', '[Cc]ite newsletter',
		'[Cc]ite periodical',

	'[Cc]ite mailing ?list',													-- mailing list is the canonical form
		'[Cc]ite list',

	'[Cc]ite maps?',															-- map is the canonical form
	'[Cc]ite[ %-]?news',														-- cite news is the canonical form
		'[Cc] news', '[Cc]it news', '[Cc]itation news', '[Cc]ite article',
		'[Cc]ite n', '[Cc]ite new', '[Cc]ite newspaper', '[Cc]ite News',
		'[Cc]ite news%-q', '[Cc]ite news2', '[Cc]itenewsauthor', '[Cc]ute news',
		'[Cc]itar notícia', '[Hh]aber kaynağı', '[Tt]idningsref', 'استشهاد بخبر',	-- non-English redirects; TODO: tally separately?
		'뉴스 인용',

	'[Cc]ite newsgroup',														-- canonical
		'[Cc]ite usenet',
		
	'[Cc]ite podcast',															-- canonical
	'[Cc]ite [Pp]ress release',													-- press release is the canonical form
		'[Cc]ite media release', '[Cc]ite news release', '[Cc]ite pr',
		'[Cc]ite press', '[Cc]ite press release\.', '[Cc]ite press[%-]?release',

	'[Cc]ite report',															-- canonical
	'[Cc]ite serial',															-- canonical
	'[Cc]ite sign',																-- canonical
		'[Cc]ite plaque',

	'[Cc]ite speech',															-- canonical
	'[Cc]ite ssrn',																-- canonical
		'[Cc]ite SSRN',
		
	'[Cc]ite tech ?report',														-- techreport is the canonical form
		'[Cc]ite standard', '[Cc]ite technical report', '[Tt]echrep reference',

	'[Cc]ite thesis',															-- canonical
		'[Cc]ite dissertation',
		'[Cc]itar tese',														-- non-English redirect; TODO: tally separately?

	'[Cc]ite [Ww]eb',															-- web is the canonical form
		'[Cc] web', '[Cc]it web', '[Cc]ite blog', '[Cc]ite URL', '[Cc]ite url',
		'[Cc]ite w', '[Cc]ite wb', '[Cc]ite we', '[Cc]ite web\.',
		'[Cc]ite webpage', '[Cc]ite website', '[Cc]ite website article',
		'[Cc]ite%-?web', '[Cc]itweb', '[Cc]w', '[Rr]ef web', '[Ww]eb citation',
		'[Ww]eb cite', '[Ww]eb link', '[Ww]eb[ %-]reference', '[Ww]eblink',
		'[Cc]hú thích web', '[Cc]ita web', '[Cc]itace elektronické monografie',	-- non-English redirects; TODO: tally separately?
		'[Cc]itat web', 'مرجع ويب', 'یادکرد وب', '웹 인용',
	}

--[[--------------------------< C S 2 _ T E M P L A T E _ P A T T E R N S >------------------------------------

These are patterns for cs2 templates redirects.

]]

local cs2_template_patterns = {													-- lua patterns of the cannonical names and redirects
	'[Cc]itation',
		'[Cc]ite', '[Cc]ite citation', '[Cc]ite study',
		'[Cc]ite [Tt]echnical standard',
	}


--[[--------------------------< C S 1 2 _ S T R I P P E D _ L I S T >------------------------------------------

This table is created from cs1_template_patterns and cs2_template_patterns.  To make this table, entries in
the source tables are evaluated to replace lua patterns with the appropriate characters and create names for
this list.  For example:
	[Cc]ite ar[Xx]iv
becomes
	Cite arXiv
	Cite arxiv
	cite arXiv
	cite arxiv

]]

local cs12_stripped_list = {};
	local function add_stripped (name)
		if not cs12_stripped_list[name] then
			cs12_stripped_list[name] = true;
		end
	end


	local function pattern_convert (name)
		if name:match ('(.-)%[(%a)(%a)%](.*)') then								-- mixed case optional letters
			local l;
			lead, c, l, tail = name:match ('(.-)%[(%a)(%a)%](.*)');
			add_stripped (lead .. c .. tail);									-- uppercase
			add_stripped (lead .. l .. tail);									-- lowercase
	
		elseif name:match ('^([^%[]+)(%[ %%%-%]%?)(.+)$') then					-- [ %-]?
			lead, c, tail = name:match ('^([^%[]+)(%[ %%%-%]%?)(.+)$');
			add_stripped (lead .. tail);										-- neither char
			add_stripped (lead .. ' ' .. tail);									-- space
			add_stripped (lead .. '-' .. tail);									-- hyphen
	
		elseif name:match ('^([^%[]+)(%[%%%-%]%?)(.+)$') then					-- [%-]?
			lead, c, tail = name:match ('^([^%[]+)(%[%%%-%]%?)(.+)$');
			add_stripped (lead .. tail);										-- no hyphen
			add_stripped (lead .. '-' .. tail);									-- hyphen
	
		elseif name:match ('^([^%[]+)(%[ %%%-%])(.+)$') then					-- [ %-]
			lead, c, tail = name:match ('^([^%[]+)(%[ %%%-%])(.+)$');
			add_stripped (lead .. ' ' .. tail);									-- space
			add_stripped (lead .. '-' .. tail);									-- hyphen
	
		elseif name:match ('^([^%?]+)(%%%-%?)(.+)$') then						-- %-?
			lead, c, tail = name:match ('^([^%?]+)(%%%-%?)(.+)$');
			add_stripped (lead .. tail);										-- no hyphen
			add_stripped (lead .. '-' .. tail);									-- hyphen
	
		elseif name:match ('^(.-)(%%%-)(.+)$') then								-- %-
			lead, c, tail = name:match ('^(.-)(%%%-)(.+)$');
			add_stripped (lead .. '-' .. tail);									-- hyphen

		elseif name:match ('^(.-)(.)%?(.*)$') then								-- .?
			lead, c, tail = name:match ('^(.-)(.)%?(.*)$');
			add_stripped (lead .. tail);										-- no character
			add_stripped (lead .. c .. tail);									-- character
	
		else
			add_stripped (name);												-- no patterns so save as is
		end
	end


	for _, list in ipairs ({cs1_template_patterns, cs2_template_patterns}) do
		for _, pattern in ipairs (list) do
			local name = pattern:gsub ('^%[(%a)%a%]', '%1');					-- leading character (usually uppercase)
			pattern_convert (name);
			name = pattern:gsub ('^%[%a(%a)%]', '%1');							-- leading character (usually lowercase)
			pattern_convert (name);
		end
	end


--[[--------------------------< H A R V _ T E M P L A T E _ P A T T E R N S >----------------------------------

These are patterns for the harv family of templates and their redirects.

]]

local harv_template_patterns = {												-- lua patterns of the cannonical names and redirects
	'[Hh]arvard citation no brackets',											-- canonical
		'[Hh]arnvb', '[Hh]arvardnb', '[Hh]arvnb',

	'[Hh]arvard citation',														-- canonical
		'[Hh]arv', '[Hh]arvsp',

	'[Hh]arvard citation text',													-- canonical
		'[Hh]arvtxt',

	'[Hh]arvcoltxt',															-- canonical
	'[Hh]arvcol',																-- canonical
		'[Hh]rvcoln',

	'[Hh]arvcolnb',																-- canonical
	'[Hh]arvard citations',														-- canonical
		'[Hh]arvs',
		
	'[Hh]arvp',																	-- canonical
	}


--[[--------------------------< S F N _ T E M P L A T E _ P A T T E R N S >------------------------------------

These are patterns for the sfn family of templates and their redirects.

]]

local sfn_template_patterns = {													-- lua patterns of the cannonical names and redirects
	'[Ss]fn',																	-- canonical
		'[Hh]f', '[Ss]f', '[Ss]hortened footnote',
		'[Ss]hortened footnote template', '[Ss]nf',
	
	'[Ss]fnp',																	-- canonical
		'[Ss]fb', '[Ss]fnb',
	
	'[Ss]fnm',																	-- canonical
	
	'[Ss]fnmp',																	-- canonical	
	}


--[[--------------------------< C L E A N U P _ T E M P L A T E _ P A T T E R N S >----------------------------

These are patterns for some of the cleanup templates and their redirects.
TODO: add redirects
]]

local cleanup_template_patterns = {
	'[Cc]itation needed',
	'[Dd]isputed inline',
	'[Dd]ubious',
	'[Ff]ailed verification'
	};


--[[--------------------------< D E A D _ L I N K _ T E M P L A T E _ P A T T E R N S >------------------------

These are patterns for the dead link template and its redirects.

]]

local dead_link_template_patterns = {
	'[Dd]ea?d[%- ]?[Ll]inks?',
		'[Dd]ead ?cite', '[Dd]ead page', '[Dd]ead ?url', '[Dd]ead%-inline',
		'404', '[Bb]ad ?link', '[Bb]roken ?link', '[Dd][Ll]', '[Ll]ink ?broken',
		'[Dd]ead'
	};


--[[--------------------------< W E B A R C H I V E _ T E M P L A T E _ P A T T E R N S >----------------------

These are patterns for the webarchive template and its redirects.

]]

local webarchive_template_patterns = {
	'[Ww]eb ?archive?',
		'[Aa]rchive url', '[Aa]rchive.org', 'IAWM', '[Ii]awm', '[Uu]rl archive',
		'[Ww]ay[Bb]ack', '[Ww]aybackdate', '[Ww]ebcitation'
	};


--[[--------------------------< C S 1 | 2   A U T H O R - N A M E _ P A T T E R N S >--------------------------

These are patterns for the cs1|2 author-name parameters.

]]

local authors_param_patterns = {'authors', 'people', 'credits'};
local author_param_patterns = {'author1?', 'host1?', 'subject1?'};
local last_param_patterns = {'last1?', 'author1?%-last1?', 'surname1?'};


--[[--------------------------< E X P O R T S >----------------------------------------------------------------
]]

return
	{
	author_param_patterns = author_param_patterns,
	authors_param_patterns = authors_param_patterns,
	cleanup_template_patterns = cleanup_template_patterns,
	cs1_template_patterns = cs1_template_patterns,
	cs12_stripped_list = cs12_stripped_list,
	cs2_template_patterns = cs2_template_patterns,
	dead_link_template_patterns = dead_link_template_patterns,
	harv_template_patterns = harv_template_patterns,
	last_param_patterns = last_param_patterns,
	sfn_template_patterns = sfn_template_patterns,
	webarchive_template_patterns = webarchive_template_patterns,
	}