Module:Doi
Appearance
require ('strict');
local cfg = mw.loadData ('Module:Citation/CS1/Configuration');
local utilities = require ('Module:Citation/CS1/Utilities'); -- forward declarations for functions in Module:Citation/CS1/Utilities
utilities.set_selected_modules (cfg); -- so that functions in Utilities can see the selected cfg tables
local has_accept_as_written = utilities.has_accept_as_written; -- import functions from Module:Citation/CS1/Utilities
local is_set = utilities.is_set;
local make_wikilink = utilities.make_wikilink;
local set_message = utilities.set_message;
local substitute = utilities.substitute;
local this_page = mw.title.getCurrentTitle(); -- used to limit categorization to certain namepsaces
-- check this page to see if it is in one of the namespaces that cs1 is not supposed to add to the error categories
local no_cat;
if cfg.uncategorized_namespaces[this_page.namespace] then -- is this page's namespace id one of the uncategorized namespace ids?
no_cat = "true"; -- set no_tracking_cats
end
for _, v in ipairs (cfg.uncategorized_subpages) do -- cycle through page name patterns
if this_page.text:match (v) then -- test page name against each pattern
no_cat = "true"; -- set no_tracking_cats
break; -- bail out if one is found
end
end
--[[--------------------------< L I N K _ L A B E L _ M A K E >------------------------------------------------
adapted from Module:Citation/CS1/Identifiers
function to create identifier link label from handler table
returns the first available of
1. redirect from local wiki's handler table (if enabled)
2. label specified in the local wiki's handler table
]]
local function link_label_make (handler)
return (cfg.use_identifier_redirects and is_set (handler.redirect) and handler.redirect) or handler.link;
end
--[[--------------------------< E X T E R N A L _ L I N K _ I D >----------------------------------------------
copied from Module:Citation/CS1/Identifiers
Formats a wiki-style external link
]]
local function external_link_id (options)
local url_string = options.id;
local ext_link;
local this_wiki_code = cfg.this_wiki_code; -- Wikipedia subdomain; 'en' for en.wikipedia.org
local wd_article; -- article title from Wikidata
if options.encode == true or options.encode == nil then
url_string = mw.uri.encode (url_string, 'PATH');
end
-- if options.auto_link and is_set (options.access) then -- not supported in this module
-- auto_link_urls[options.auto_link] = table.concat ({options.prefix, url_string, options.suffix});
-- end
ext_link = mw.ustring.format ('[%s%s%s %s]', options.prefix, url_string, options.suffix or "", mw.text.nowiki (options.id));
if is_set (options.access) then
ext_link = substitute (cfg.presentation['ext-link-access-signal'], {cfg.presentation[options.access].class, cfg.presentation[options.access].title, ext_link}); -- add the free-to-read / paywall lock
end
return table.concat ({
make_wikilink (link_label_make (options), options.label), -- redirect, Wikidata link, or locally specified link (in that order)
options.separator or ' ',
ext_link
});
end
--[[--------------------------< D O I >------------------------------------------------------------------------
copied from Module:Citation/CS1/Identifiers
Formats a DOI and checks for DOI errors.
DOI names contain two parts: prefix and suffix separated by a forward slash.
Prefix: directory indicator '10.' followed by a registrant code
Suffix: character string of any length chosen by the registrant
This function checks a DOI name for: prefix/suffix. If the DOI name contains spaces or endashes, or, if it ends
with a period or a comma, this function will emit a bad_doi error message.
DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash,
and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely
if ever used in DOI names.
https://www.doi.org/doi_handbook/2_Numbering.html -- 2.2 Syntax of a DOI name
https://www.doi.org/doi_handbook/2_Numbering.html#2.2.2 -- 2.2.2 DOI prefix
]]
local function doi (options)
local id = options.id;
local inactive = nil; -- |doi-broken-date= not supported in this module
local access = options.access;
local ignore_invalid = options.accept;
local handler = options.handler;
local err_flag;
local function is_extended_free (registrant, suffix) -- local function to check those few registrants that are mixed; identifiable by the doi suffix <incipit>
if cfg.extended_registrants_t[registrant] then -- if this registrant has known free-to-read extentions
for _, incipit in ipairs (cfg.extended_registrants_t[registrant]) do -- loop through the registrant's incipits
if mw.ustring.find (suffix, '^' .. incipit) then -- if found
return true;
end
end
end
end
local text;
-- if is_set (inactive) then -- |doi-broken-date= not supported in this module
-- local inactive_year = inactive:match("%d%d%d%d"); -- try to get the year portion from the inactive date
-- local inactive_month, good;
--
-- if is_set (inactive_year) then
-- if 4 < inactive:len() then -- inactive date has more than just a year (could be anything)
-- local lang_obj = mw.getContentLanguage(); -- get a language object for this wiki
-- good, inactive_month = pcall (lang_obj.formatDate, lang_obj, 'F', inactive); -- try to get the month name from the inactive date
-- if not good then
-- inactive_month = nil; -- something went wrong so make sure this is unset
-- end
-- end
-- end -- otherwise, |doi-broken-date= has something but it isn't a date
--
-- if is_set (inactive_year) and is_set (inactive_month) then
-- set_message ('maint_doi_inactive_dated', {inactive_year, inactive_month, ' '});
-- elseif is_set (inactive_year) then
-- set_message ('maint_doi_inactive_dated', {inactive_year, '', ''});
-- else
-- set_message ('maint_doi_inactive');
-- end
-- inactive = " (" .. cfg.messages['inactive'] .. ' ' .. inactive .. ')';
-- end
local suffix;
local registrant, suffix = mw.ustring.match (id, '^10%.([^/]+)/([^%s–]-[^%.,])$'); -- registrant and suffix set when DOI has the proper basic form
local registrant_err_patterns = { -- these patterns are for code ranges that are not supported
'^[^1-3]%d%d%d%d%.%d+$', -- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999
'^[^1-7]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 60000+); accepts: 10000–69999
'^[^1-9]%d%d%d%.%d+$', -- 4 digits with subcode (0xxx); accepts: 1000–9999
'^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999
'^%d%d%d%d%d%d+', -- 6 or more digits
'^%d%d?%d?$', -- less than 4 digits without subcode (3 digits with subcode is legitimate)
'^%d%d?%.[%d%.]+', -- 1 or 2 digits with subcode
'^5555$', -- test registrant will never resolve
'[^%d%.]', -- any character that isn't a digit or a dot
}
if not ignore_invalid then
if registrant then -- when DOI has proper form
for i, pattern in ipairs (registrant_err_patterns) do -- spin through error patterns
if registrant:match (pattern) then -- to validate registrant codes
err_flag = set_message ('err_bad_doi'); -- when found, mark this DOI as bad
break; -- and done
end
end
else
err_flag = set_message ('err_bad_doi'); -- invalid directory or malformed
end
else
set_message ('maint_doi_ignore');
end
if err_flag then
-- options.coins_list_t['DOI'] = nil; -- when error, unset so not included in COinS; COinS not supported in this module
else
if not access and (cfg.known_free_doi_registrants_t[registrant] or is_extended_free (registrant, suffix)) then -- |doi-access=free not set and <registrant> is known to be free
set_message ('maint_doi_unflagged_free'); -- set a maint cat
end
end
text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access,
auto_link = not (err_flag or is_set (inactive) or ignore_invalid) and 'doi' or nil -- do not auto-link when |doi-broken-date= has a value or when there is a DOI error or (to play it safe, after all, auto-linking is not essential) when invalid DOIs are ignored
}) .. (inactive or '');
return text;
end
--[[--------------------------< _ M A I N >--------------------------------------------------------------------
entry point from another module
]]
local function _main (args_t)
local id, accept = utilities.has_accept_as_written (args_t[1] or args_t.id); -- strip accept-as-written markup if present
local empty_flag;
if not id then -- in case args_t[1] is nil
id = ''; -- set <id> to empty string
empty_flag = true; -- and set a flag
end
-- local inactive = args_t['doi-broken-date']; -- |doi-broken-date= not currently supported in this module
local access = args_t['doi-access']; -- |doi-access=
if 'free' ~= access then -- 'free' is the only supported value
access = nil; -- sommat other than 'free' so unset
end
local handler = cfg.id_handlers.DOI; -- handler sepcific to |doi=
local rendered_doi = doi ({id=id, access=access, handler=handler, accept=accept}); -- go render the doi
if utilities.z.error_msgs_t[1] then -- only one error message considered
local msg = utilities.z.error_msgs_t[1]:gsub ('Help:CS1 errors#bad_doi', 'Template:doi');
if empty_flag then -- if args_t[1] was empty
rendered_doi = rendered_doi:match ('^[^:]+:'); -- keep only the linked label from the rendering
end
rendered_doi = rendered_doi .. ' ' .. msg .. (no_cat and '' or '[[Category:Pages with DOI errors]]'); -- limited to certain namespaces
elseif utilities.z.maint_cats_t[1] then -- only one maint message considered per rendering
local msg = utilities.z.maint_cats_t[1]:gsub ('CS1 maint: ', ''); -- strip cs1-specific prefix from cat name
rendered_doi = table.concat ({ -- assemble maint message with category
rendered_doi,
' ',
no_cat and '' or substitute (cfg.messages['cat wikilink'], msg), -- the category link; limited to certain namespaces
substitute (cfg.presentation['hidden-maint'], msg), -- the maint message text
' (', -- and the help link
substitute (cfg.messages[':cat wikilink'], msg), -- links to the maint cat, just as cs1|2 links to its maint cats
')'
});
end
return rendered_doi;
end
--[[--------------------------< M A I N >----------------------------------------------------------------------
entry point from an #invoke; implements {{doi}}
{{#invoke:doi|main}}
accepted parameters are:
{{{1}}} – digital object identifier; takes precedence over |id=; may use accept-as-written ((..)) markup
|id= – digital object identifier; yeikds to {{{1}}}; may use accept-as-written ((..)) markup
|doi-access= – accepts one value: 'free'
]]
local function main (frame)
local args_t = require ('Module:Arguments').getArgs (frame);
return frame:extensionTag ('templatestyles', '', {src='Module:Citation/CS1/styles.css'}) .. _main (args_t);
end
--[[--------------------------< E X P O R T S >----------------------------------------------------------------
]]
return {
main = main, -- entry point for an #invoke (template call)
_main = _main, -- entry point from another module
}