Jump to content

Module:Doi

Permanently protected module
From Wikipedia, the free encyclopedia
require ('strict');
local cfg = mw.loadData ('Module:Citation/CS1/Configuration');

local utilities = require ('Module:Citation/CS1/Utilities');					-- forward declarations for functions in Module:Citation/CS1/Utilities
	utilities.set_selected_modules (cfg);										-- so that functions in Utilities can see the selected cfg tables
	local has_accept_as_written = utilities.has_accept_as_written;				-- import functions from Module:Citation/CS1/Utilities
	local is_set = utilities.is_set;
	local make_wikilink = utilities.make_wikilink;
	local set_message = utilities.set_message;
	local substitute = utilities.substitute;

local this_page = mw.title.getCurrentTitle();									-- used to limit categorization to certain namepsaces

-- check this page to see if it is in one of the namespaces that cs1 is not supposed to add to the error categories
local no_cat;
if cfg.uncategorized_namespaces[this_page.namespace] then						-- is this page's namespace id one of the uncategorized namespace ids?
	no_cat = "true";															-- set no_tracking_cats
end
for _, v in ipairs (cfg.uncategorized_subpages) do								-- cycle through page name patterns
	if this_page.text:match (v) then											-- test page name against each pattern
		no_cat = "true";														-- set no_tracking_cats
		break;																	-- bail out if one is found
	end
end


--[[--------------------------< L I N K _ L A B E L _ M A K E >------------------------------------------------

adapted from Module:Citation/CS1/Identifiers

function to create identifier link label from handler table

returns the first available of
	1. redirect from local wiki's handler table (if enabled)
	2. label specified in the local wiki's handler table
	
]]

local function link_label_make (handler)
	return (cfg.use_identifier_redirects and is_set (handler.redirect) and handler.redirect) or handler.link;
end


--[[--------------------------< E X T E R N A L _ L I N K _ I D >----------------------------------------------

copied from Module:Citation/CS1/Identifiers

Formats a wiki-style external link

]]

local function external_link_id (options)
	local url_string = options.id;
	local ext_link;
	local this_wiki_code = cfg.this_wiki_code;									-- Wikipedia subdomain; 'en' for en.wikipedia.org
	local wd_article;															-- article title from Wikidata
	
	if options.encode == true or options.encode == nil then
		url_string = mw.uri.encode (url_string, 'PATH');
	end

--	if options.auto_link and is_set (options.access) then						-- not supported in this module
--		auto_link_urls[options.auto_link] = table.concat ({options.prefix, url_string, options.suffix});
--	end

	ext_link = mw.ustring.format ('[%s%s%s %s]', options.prefix, url_string, options.suffix or "", mw.text.nowiki (options.id));
	if is_set (options.access) then
		ext_link = substitute (cfg.presentation['ext-link-access-signal'], {cfg.presentation[options.access].class, cfg.presentation[options.access].title, ext_link});	-- add the free-to-read / paywall lock
	end

	return table.concat	({
		make_wikilink (link_label_make (options), options.label),				-- redirect, Wikidata link, or locally specified link (in that order)
		options.separator or '&nbsp;',
		ext_link
		});
end


--[[--------------------------< D O I >------------------------------------------------------------------------

copied from Module:Citation/CS1/Identifiers

Formats a DOI and checks for DOI errors.

DOI names contain two parts: prefix and suffix separated by a forward slash.
	Prefix: directory indicator '10.' followed by a registrant code
	Suffix: character string of any length chosen by the registrant

This function checks a DOI name for: prefix/suffix.  If the DOI name contains spaces or endashes, or, if it ends
with a period or a comma, this function will emit a bad_doi error message.

DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash,
and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely
if ever used in DOI names.

https://www.doi.org/doi_handbook/2_Numbering.html				-- 2.2 Syntax of a DOI name
https://www.doi.org/doi_handbook/2_Numbering.html#2.2.2			-- 2.2.2 DOI prefix

]]

local function doi (options)
	local id = options.id;
	local inactive = nil;														-- |doi-broken-date= not supported in this module
	local access = options.access;
	local ignore_invalid = options.accept;
	local handler = options.handler;
	local err_flag;

	local function is_extended_free (registrant, suffix)						-- local function to check those few registrants that are mixed; identifiable by the doi suffix <incipit>
		if cfg.extended_registrants_t[registrant] then							-- if this registrant has known free-to-read extentions
			for _, incipit in ipairs (cfg.extended_registrants_t[registrant]) do	-- loop through the registrant's incipits
				if mw.ustring.find (suffix, '^' .. incipit) then				-- if found
					return true;
				end
			end
		end
	end

	local text;
--	if is_set (inactive) then													-- |doi-broken-date= not supported in this module
--		local inactive_year = inactive:match("%d%d%d%d");						-- try to get the year portion from the inactive date
--		local inactive_month, good;
--
--		if is_set (inactive_year) then
--			if 4 < inactive:len() then											-- inactive date has more than just a year (could be anything)
--				local lang_obj = mw.getContentLanguage();						-- get a language object for this wiki
--				good, inactive_month = pcall (lang_obj.formatDate, lang_obj, 'F', inactive);	-- try to get the month name from the inactive date
--				if not good then
--					inactive_month = nil;										-- something went wrong so make sure this is unset
--				end
--			end
--		end																		-- otherwise, |doi-broken-date= has something but it isn't a date
--		
--		if is_set (inactive_year) and is_set (inactive_month) then
--			set_message ('maint_doi_inactive_dated', {inactive_year, inactive_month, ' '});
--		elseif is_set (inactive_year) then
--			set_message ('maint_doi_inactive_dated', {inactive_year, '', ''});
--		else
--			set_message ('maint_doi_inactive');
--		end
--		inactive = " (" .. cfg.messages['inactive'] .. ' ' .. inactive .. ')';
--	end

	local suffix;
	local registrant, suffix = mw.ustring.match (id, '^10%.([^/]+)/([^%s–]-[^%.,])$');	-- registrant and suffix set when DOI has the proper basic form

	local registrant_err_patterns = {											-- these patterns are for code ranges that are not supported 
		'^[^1-3]%d%d%d%d%.%d+$',												-- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999
		'^[^1-7]%d%d%d%d$',														-- 5 digits without subcode (0xxxx, 60000+); accepts: 10000–69999
		'^[^1-9]%d%d%d%.%d+$',												-- 4 digits with subcode (0xxx); accepts: 1000–9999
		'^[^1-9]%d%d%d$',														-- 4 digits without subcode (0xxx); accepts: 1000–9999
		'^%d%d%d%d%d%d+',														-- 6 or more digits
		'^%d%d?%d?$',															-- less than 4 digits without subcode (3 digits with subcode is legitimate)
		'^%d%d?%.[%d%.]+',														-- 1 or 2 digits with subcode
		'^5555$',																-- test registrant will never resolve
		'[^%d%.]',																-- any character that isn't a digit or a dot
		}

	if not ignore_invalid then
		if registrant then														-- when DOI has proper form
			for i, pattern in ipairs (registrant_err_patterns) do				-- spin through error patterns
				if registrant:match (pattern) then								-- to validate registrant codes
					err_flag = set_message ('err_bad_doi');						-- when found, mark this DOI as bad
					break;														-- and done
				end
			end
		else
			err_flag = set_message ('err_bad_doi');								-- invalid directory or malformed
		end
	else
		set_message ('maint_doi_ignore');
	end

	if err_flag then
--		options.coins_list_t['DOI'] = nil;										-- when error, unset so not included in COinS; COinS not supported in this module
	else
		if not access and (cfg.known_free_doi_registrants_t[registrant] or is_extended_free (registrant, suffix)) then		-- |doi-access=free not set and <registrant> is known to be free
			set_message ('maint_doi_unflagged_free');							-- set a maint cat
		end
	end
	
	text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
		prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access,
		auto_link = not (err_flag or is_set (inactive) or ignore_invalid) and 'doi' or nil -- do not auto-link when |doi-broken-date= has a value or when there is a DOI error or (to play it safe, after all, auto-linking is not essential) when invalid DOIs are ignored
		}) .. (inactive or '');

	return text;
end


--[[--------------------------< _ M A I N >--------------------------------------------------------------------

entry point from another module

]]

local function _main (args_t)
	local id, accept = utilities.has_accept_as_written (args_t[1] or args_t.id);	-- strip accept-as-written markup if present

	local empty_flag;
	if not id then																-- in case args_t[1] is nil
		id = '';																-- set <id> to empty string
		empty_flag = true;														-- and set a flag
	end

--	local inactive = args_t['doi-broken-date'];									-- |doi-broken-date= not currently supported in this module
	local access = args_t['doi-access'];										-- |doi-access=
	if 'free' ~= access then													-- 'free' is the only supported value
		access = nil;															-- sommat other than 'free' so unset
	end
	
	local handler = cfg.id_handlers.DOI;										-- handler sepcific to |doi=

	local rendered_doi = doi ({id=id, access=access, handler=handler, accept=accept});	-- go render the doi

	if utilities.z.error_msgs_t[1] then											-- only one error message considered
		local msg = utilities.z.error_msgs_t[1]:gsub ('Help:CS1 errors#bad_doi', 'Template:doi');
		if empty_flag then														-- if args_t[1] was empty
			rendered_doi = rendered_doi:match ('^[^:]+:');						-- keep only the linked label from the rendering
		end
		rendered_doi = rendered_doi .. ' ' .. msg .. (no_cat and '' or '[[Category:Pages with DOI errors]]');	-- limited to certain namespaces

	elseif utilities.z.maint_cats_t[1] then										-- only one maint message considered per rendering
		local msg = utilities.z.maint_cats_t[1]:gsub ('CS1 maint: ', '');		-- strip cs1-specific prefix from cat name

		rendered_doi = table.concat ({											-- assemble maint message with category
			rendered_doi,
			' ',
			no_cat and '' or substitute (cfg.messages['cat wikilink'], msg),	-- the category link; limited to certain namespaces
			substitute (cfg.presentation['hidden-maint'], msg),					-- the maint message text
			' (',																-- and the help link
			substitute (cfg.messages[':cat wikilink'], msg),					-- links to the maint cat, just as cs1|2 links to its maint cats
			')'
			});
	end

	return rendered_doi;
end


--[[--------------------------< M A I N >----------------------------------------------------------------------

entry point from an #invoke; implements {{doi}}
	{{#invoke:doi|main}}

accepted parameters are:
	{{{1}}} – digital object identifier; takes precedence over |id=; may use accept-as-written ((..)) markup
	|id= – digital object identifier; yeikds to {{{1}}}; may use accept-as-written ((..)) markup
	|doi-access= – accepts one value: 'free'

]]

local function main (frame)
	local args_t = require ('Module:Arguments').getArgs (frame);
	return frame:extensionTag ('templatestyles', '', {src='Module:Citation/CS1/styles.css'}) .. _main (args_t);
end


--[[--------------------------< E X P O R T S >----------------------------------------------------------------
]]

return {
	main = main,																-- entry point for an #invoke (template call)
	_main = _main,																-- entry point from another module
	}