Jump to content

Module:TaxonItalics/sandbox: Difference between revisions

From Wikipedia, the free encyclopedia
Content deleted Content added
Simplifications
Candidatus and ""
Tag: Reverted
Line 1: Line 1:
--[[
--[[=========================================================================
This module provides the core functionality to a set of templates used to
Italicize a taxon name appropriately by invoking italicizeTaxonName.
display a list of taxon name/authority pairs, with the taxon names optionally
The algorithm used is:
italicized, wikilinked and/or emboldened. Such lists are usually part of
* If the name has italic markup at the start or the end, do nothing.
taxoboxes.
* Else
]]
* Remove (internal) italic markup.

* If the name is made up of four words and the third word is a
-- use a function from Module:TaxonItalics to italicize a taxon name
botanical connecting term, de-italicize the connecting term and add italic
local TaxonItalics = require("Module:TaxonItalics")
markup to the outside of the name.
local IfPreview = require([[Module:If preview]])
* Else if the name is made up of three words and the second word is a
botanical connecting term or a variant of "cf.", de-italicize the
connecting term and add italic markup to the outside of the name.
* Else just add italic markup to the outside of the name.
The module also:
* Ensures that the hybrid symbol, ×, and parentheses are not italicized, as
well as any string inside parentheses if dab is true.
* Has an option to abbreviate all parts of taxon names other than the last
to the first letter (e.g. "Pinus sylvestris var. sylvestris" becomes
"P. s. var. sylvestris").
* Has an option to wikilink the italicized name to the input name.
=============================================================================]]


local p = {}
local p = {}
local l = {} -- used to store purely local functions

--connecting terms in three part names (e.g. Pinus sylvestris var. sylvestris)
local cTerms3 = {
--subsp.
subspecies = "subsp.",
["subsp."] = "subsp.",
subsp = "subsp.",
["ssp."] = "subsp.",
ssp = "subsp.",
--var.
varietas = "var.",
["var."] = "var.",
var = "var.",
--subvar.
subvarietas = "subvar.",
["subvar."] = "subvar.",
subvar = "subvar.",
--f.
forma = "f.",
["f."] = "f.",
f = "f.",
--subf.
subforma = "subf.",
["subf."] = "subf.",
subf = "subf."
}
--connecting terms in two part names (e.g. Pinus sect. Pinus)
local cTerms2 = {
--subg.
subgenus = "subg.",
["subgen."] = "subg.",
["subg."] = "subg.",
subg = "subg.",
--supersect.
supersection = "supersect.",
["supersect."] = "supersect.",
supersect = "supersect.",
--sect.
section = "sect.",
["sect."] = "sect.",
sect = "sect.",
--subsect.
subsection = "subsect.",
["subsect."] = "subsect.",
subsect = "subsect.",
--ser.
series = "ser.",
["ser."] = "ser.",
ser = "ser.",
--subser.
subseries = "subser.",
["subser."] = "subser.",
subser = "subser.",
--cf.
cf = "cf.",
["cf."] = "cf.",
["c.f."] = "cf."
}


--[[=========================================================================
--[[=========================================================================
Main function to italicize a taxon name appropriately. For the purpose of the
Utility function to strip off any initial present to mark the taxon as
extinct. The † must not be italicized, emboldened, or included in the
parameters, see p.italicizeTaxonName().
wikilinked text, so needs to be added back afterwards.
† is assumed to be present as one of:
* the unicode character †
* the HTML entity †
* the output of {{extinct}} – this will have been expanded before reaching this
module and is assumed to have the form '<span ... </span>'
The function returns two values: the taxon name with any † before it removed
and either '†' if it was present or the empty string if not.
=============================================================================]]
=============================================================================]]
function p.main(frame)
function p.stripDagger(taxonName)
local name = frame.args[1] or ''
local dagger = ''
if mw.ustring.sub(taxonName,1,1) == '†' then
local linked = frame.args['linked'] == 'yes'
taxonName = mw.ustring.sub(taxonName,2,#taxonName)
local abbreviated = frame.args['abbreviated'] == 'yes'
dagger = '†'
local dab = frame.args['dab'] == 'yes'
else
return p.italicizeTaxonName(name, linked, abbreviated, dab)
if string.sub(taxonName,1,8) == '&dagger;' then
end
taxonName = string.sub(taxonName,9,#taxonName)

dagger = '†'
--[[=========================================================================
else
Utility local function to abbreviate an input string to its first character
-- did the taxon name originally have {{extinct}} before it?
followed by ".".
if (string.sub(taxonName,1,5) == '<abbr') and mw.ustring.find(taxonName, '†') then
Both "×" and an HTML entity at the start of the string are skipped over in
taxonName = string.gsub(taxonName, '^.*</abbr>', '', 1)
determining first character, as is an opening parenthesis and an opening ",
dagger = '†'
which cause a matching closing character to be included.
=============================================================================]]
function l.abbreviate(str)
local result = ""
local hasParentheses = false
local isQuoted = false
if mw.ustring.len(str) < 2 then
--single character strings are left unchanged
result = str
else
--skip over an opening parenthesis that could be present at the start of the string
if mw.ustring.sub(str,1,1) == "(" then
hasParentheses = true
result = "("
str = mw.ustring.sub(str,2,mw.ustring.len(str))
elseif mw.ustring.sub(str,1,1) == '"' then
isQuoted = true
result = '"'
str = mw.ustring.sub(str,2,mw.ustring.len(str))
end
--skip over a hybrid symbol that could be present at the start of the string
if mw.ustring.sub(str,1,1) == "×" then
result = "×"
str = mw.ustring.sub(str,2,mw.ustring.len(str))
end
--skip over an HTML entity that could be present at the start of the string
if mw.ustring.sub(str,1,1) == "&" then
local i,dummy = mw.ustring.find(str,";",2,plain)
result = result .. mw.ustring.sub(str,1,i)
str = mw.ustring.sub(str,i+1,mw.ustring.len(str))
end
--if there's anything left, reduce it to its first character plus ".",
--adding the closing parenthesis or quote if required
if str ~= "" then
result = result .. mw.ustring.sub(str,1,1) .. "."
if hasParentheses then result = result .. ")"
elseif isQuoted then result = result .. '"'
end
end
end
end
end
end
return result
return taxonName, dagger
end
end


--[[=========================================================================
--[[=========================================================================
The function which does the italicization. Parameters:
Utility function to do the following:

name (string) – the taxon name to be processed
1. Strip off any initial † present to mark the taxon as extinct. We outsource
linked (boolean) – should a wikilink be generated?
to p.stripDagger() for this.
abbreviated (boolean) – should the first parts of the taxon name be

reduced to capital letters?
2. Strip off any double quotation marks present to mark the taxon as invalid.
dab (boolean) – should any parenthesized part be treated as a disambiguation
The double-quotation marks, too, should not be formatted.
term and left unitalicized?

3. Strip off any Candidatus or Ca. to mark the taxon as Candidatus.

The function returns four values:
* the taxon name with all of the three modifiers removed
* either '†' if it was present or the empty string if not
* either a single dquote if it was present in a pair or the empty string if not
* either italicized "Candidatus " or "Ca. " if it was present or the empty string if not

The function can error in case of an unpaired quotation mark. In that case, a
IfPreview._warning() is mixed into the first return.
=============================================================================]]
=============================================================================]]
function p.italicizeTaxonName(name, linked, abbreviated, dab)
function p.parseName(taxonName)
local name, dagger = p.stripDagger(taxonName)
name = mw.text.trim(name)
local dquote = ''
-- if the name begins with '[', then assume formatting is present
if mw.ustring.sub(name,1,1) == '[' then return name end
if string.sub(name,1,1) == '"' then
name = string.sub(name,2,#name)
-- otherwise begin by replacing any use of the HTML italic tags
dquote = '"'
-- by Wikimedia markup; replace any entity alternatives to the hybrid symbol
if string.sub(name,1,1) == '"' then
-- by the symbol itself; prevent the hybrid symbol being treated as
name = string.sub(name,2,#name)
-- a 'word' by converting a following space to the HTML entity
else
local italMarker = "''"
name = IfPreview._warning('Unmatched double quotation mark in taxon name: ' .. name)
name = string.gsub(mw.text.trim(name), "</?i>", italMarker)
end
name = string.gsub(string.gsub(name, "&#215;", "×"), "&times;", "×")
end
name = string.gsub(name, "</?span.->", "") -- remove any span markup

name = string.gsub(name, "× ", "×&#32;")
local candidatus = ''
-- now italicize and abbreviate if required
local result = name
if name ~= '' then
if string.sub(name,1,11) == 'Candidatus ' then
if string.sub(name,1,2) == italMarker or string.sub(name,-2) == italMarker then
name = string.sub(name,12,#name)
candidatus = "''Candidatus'' "
-- do nothing if the name already has italic markers at the start or end
elseif string.sub(name,1,4) == 'Ca. ' then
else
name = string.gsub(name, italMarker, "") -- first remove any internal italics
name = string.sub(name,5,#name)
candidatus = "''Ca.'' "
local words = mw.text.split(name, " ", true)
end
if #words == 4 and cTerms3[words[3]] then

-- the third word of a four word name is a connecting term
return name, dagger, dquote, candidatus
-- ensure the connecting term isn't italicized
words[3] = '<span style="font-style:normal;">' .. cTerms3[words[3]] .. '</span>'
if abbreviated then
words[1] = l.abbreviate(words[1])
words[2] = l.abbreviate(words[2])
end
result = words[1] .. " " .. words[2] .. " " .. words[3] .. " " .. words[4]
elseif #words == 3 and cTerms2[words[2]] then
-- the second word of a three word name is a connecting term
-- ensure the connecting term isn't italicized
words[2] = '<span style="font-style:normal;">' .. cTerms2[words[2]] .. '</span>'
if abbreviated then
words[1] = l.abbreviate(words[1])
end
result = words[1] .. " " .. words[2] .. " " .. words[3]
elseif abbreviated then -- not a name as above; only deal with abbreviation
if #words > 1 then
result = l.abbreviate(words[1])
for i = 2, #words-1, 1 do
result = result .. " " .. l.abbreviate(words[i])
end
result = result .. " " .. words[#words]
end
else
result = name
end
-- deal with any hybrid symbol as it should not be italicized
result = string.gsub(result, "×", '<span style="font-style:normal;">×</span>')
-- deal with any parentheses as they should not be italicized
if dab then
result = string.gsub(string.gsub(result,"%(",'<span style="font-style:normal;">('),"%)",')</span>')
else
result = string.gsub(string.gsub(result,"%(",'<span style="font-style:normal;">(</span>'),"%)",'<span style="font-style:normal;">)</span>')
end
-- any question marks surrounded by spans can have the spans joined
result = string.gsub(result,'</span>%?<span style="font%-style:normal;">','?')
-- add outside markup
if linked then
if result ~= name then
result = "[[" .. name .. "|" .. italMarker .. result .. italMarker .. "]]"
else
result = italMarker .. "[[" .. name .. "]]" .. italMarker
end
else
result = italMarker .. result .. italMarker
end
end
end
return result
end
end


--[[=========================================================================
--[[=========================================================================
The function returns a list of taxon names and authorities, appropriately
Utility function used by other modules to check if a connecting term is
formatted.
present in a name. The value of name is assumed to be plain text.
Usage:
{{#invoke:TaxonList|main
|italic = yes - to italicize the taxon name
|linked = yes - to wikilink the taxon name
|bold = yes - to emboldent the taxon name
|incomplete = yes - to output "(incomplete)" at the end of the list
}}
The template that transcludes the invoking template must supply an indefinite
even number of arguments in the format
|Name1|Author1 |Name2|Author2| ... |NameN|AuthorN
=============================================================================]]
=============================================================================]]
function p.hasCT(frame)
function p.main(frame)
return p.hasConnectingTerm(frame.args[1] or '')
local italic = frame.args['italic'] == 'yes'
local bold = frame.args['bold'] == 'yes'
local linked = frame.args['linked'] == 'yes'
if bold then linked = false end -- must not have bold and wikilinked
local abbreviated = frame.args['abbreviated'] == 'yes'
local incomplete = frame.args['incomplete'] == 'yes'
local taxonArgs = frame:getParent().args
local result = ''
-- iterate over unnamed variables
local taxonName
local dagger
local dquote
local candidatus
local first = true -- is this the first of a taxon name/author pair?
for param, value in pairs(taxonArgs) do
if tonumber(param) then
if first then
taxonName = mw.text.trim(value)
-- if necessary separate any initial modifier
taxonName, dagger, dquote, candidatus = p.parseName(taxonName)
if linked and not italic then
taxonName = '[[' .. taxonName .. ']]'
end
if italic and candidatus == '' then
taxonName = TaxonItalics.italicizeTaxonName(taxonName, linked, abbreviated)
end
if bold then
taxonName = '<b>' .. taxonName .. '</b>'
end
result = result .. '<li>' .. dagger .. dquote .. candidatus .. taxonName .. dquote
else
result = result .. ' <small>' .. value .. '</small></li>'
end
first = not first
end
end
if incomplete then
result = result .. '<small>(incomplete list)</small>'
end
return '<ul class="taxonlist">' .. result .. '</ul>'
end
end

function p.hasConnectingTerm(name)
local words = mw.text.split(name, " ", true)
return (#words == 4 and cTerms3[words[3]])
or (#words == 3 and cTerms2[words[2]])
end
return p
return p

Revision as of 10:28, 21 May 2025

--[[
This module provides the core functionality to a set of templates used to
display a list of taxon name/authority pairs, with the taxon names optionally
italicized, wikilinked and/or emboldened. Such lists are usually part of
taxoboxes.
]]

-- use a function from Module:TaxonItalics to italicize a taxon name
local TaxonItalics = require("Module:TaxonItalics")
local IfPreview = require([[Module:If preview]])

local p = {}

--[[=========================================================================
Utility function to strip off any initial † present to mark the taxon as
extinct. The † must not be italicized, emboldened, or included in the
wikilinked text, so needs to be added back afterwards.
† is assumed to be present as one of:
* the unicode character †
* the HTML entity &dagger;
* the output of {{extinct}} – this will have been expanded before reaching this
  module and is assumed to have the form '<span ... </span>'
The function returns two values: the taxon name with any † before it removed
and either '†' if it was present or the empty string if not.
=============================================================================]]
function p.stripDagger(taxonName)
	local dagger = ''
	if mw.ustring.sub(taxonName,1,1) == '†' then
		taxonName = mw.ustring.sub(taxonName,2,#taxonName)
		dagger = '†'
	else 
		if string.sub(taxonName,1,8) == '&dagger;' then
			taxonName = string.sub(taxonName,9,#taxonName)
			dagger = '†'
		else
			-- did the taxon name originally have {{extinct}} before it?
			if (string.sub(taxonName,1,5) == '<abbr') and mw.ustring.find(taxonName, '†') then
				taxonName = string.gsub(taxonName, '^.*</abbr>', '', 1)
				dagger = '†'
			end
		end
	end
	return taxonName, dagger
end

--[[=========================================================================
Utility function to do the following:

1. Strip off any initial † present to mark the taxon as extinct. We outsource
to p.stripDagger() for this.

2. Strip off any double quotation marks present to mark the taxon as invalid.
The double-quotation marks, too, should not be formatted.

3. Strip off any Candidatus or Ca. to mark the taxon as Candidatus.

The function returns four values:
* the taxon name with all of the three modifiers removed
* either '†' if it was present or the empty string if not
* either a single dquote if it was present in a pair or the empty string if not
* either italicized "Candidatus " or "Ca. " if it was present or the empty string if not

The function can error in case of an unpaired quotation mark. In that case, a
IfPreview._warning() is mixed into the first return.
=============================================================================]]
function p.parseName(taxonName)
	local name, dagger = p.stripDagger(taxonName)
	local dquote = ''
	if string.sub(name,1,1) == '"' then
		name = string.sub(name,2,#name)
		dquote = '"'
		if string.sub(name,1,1) == '"' then
			name = string.sub(name,2,#name)
		else
			name = IfPreview._warning('Unmatched double quotation mark in taxon name: ' .. name)
		end
	end

	local candidatus = ''
	
	if string.sub(name,1,11) == 'Candidatus ' then
		name = string.sub(name,12,#name)
		candidatus = "''Candidatus'' "
	elseif string.sub(name,1,4) == 'Ca. ' then
		name = string.sub(name,5,#name)
		candidatus = "''Ca.'' "
	end

	return name, dagger, dquote, candidatus
end

--[[=========================================================================
The function returns a list of taxon names and authorities, appropriately
formatted.
Usage:
{{#invoke:TaxonList|main
|italic = yes - to italicize the taxon name
|linked = yes - to wikilink the taxon name
|bold = yes - to emboldent the taxon name
|incomplete = yes - to output "(incomplete)" at the end of the list
}}
The template that transcludes the invoking template must supply an indefinite
even number of arguments in the format
|Name1|Author1 |Name2|Author2| ... |NameN|AuthorN
=============================================================================]]
function p.main(frame)
	local italic = frame.args['italic'] == 'yes'
	local bold = frame.args['bold'] == 'yes'
	local linked = frame.args['linked'] == 'yes'
	if bold then linked = false end -- must not have bold and wikilinked
	local abbreviated = frame.args['abbreviated'] == 'yes'
	local incomplete = frame.args['incomplete'] == 'yes'
	local taxonArgs = frame:getParent().args
	local result = ''
	-- iterate over unnamed variables
	local taxonName
	local dagger
	local dquote
	local candidatus
	local first = true -- is this the first of a taxon name/author pair?
	for param, value in pairs(taxonArgs) do
		if tonumber(param) then
			if first then
				taxonName = mw.text.trim(value)
				-- if necessary separate any initial modifier
				taxonName, dagger, dquote, candidatus = p.parseName(taxonName)
				if linked and not italic then
					taxonName = '[[' .. taxonName .. ']]'
				end
				if italic and candidatus == '' then
					taxonName = TaxonItalics.italicizeTaxonName(taxonName, linked, abbreviated)
				end
				if bold then
					taxonName = '<b>' .. taxonName .. '</b>'
				end
				result = result .. '<li>' .. dagger .. dquote .. candidatus .. taxonName .. dquote
			else
				result = result .. ' <small>' .. value .. '</small></li>'
			end
			first = not first
		end
	end
	if incomplete then
		result = result .. '<small>(incomplete list)</small>'
	end
	return '<ul class="taxonlist">' .. result .. '</ul>'
end

return p