Module:TaxonItalics/sandbox: Difference between revisions
Simplifications |
Artoria2e5 (talk | contribs) Candidatus and "" Tag: Reverted |
||
Line 1: | Line 1: | ||
--[[ |
|||
--[[========================================================================= |
|||
This module provides the core functionality to a set of templates used to |
|||
Italicize a taxon name appropriately by invoking italicizeTaxonName. |
|||
display a list of taxon name/authority pairs, with the taxon names optionally |
|||
The algorithm used is: |
|||
italicized, wikilinked and/or emboldened. Such lists are usually part of |
|||
* If the name has italic markup at the start or the end, do nothing. |
|||
taxoboxes. |
|||
* Else |
|||
]] |
|||
* Remove (internal) italic markup. |
|||
* If the name is made up of four words and the third word is a |
|||
-- use a function from Module:TaxonItalics to italicize a taxon name |
|||
botanical connecting term, de-italicize the connecting term and add italic |
|||
local TaxonItalics = require("Module:TaxonItalics") |
|||
markup to the outside of the name. |
|||
local IfPreview = require([[Module:If preview]]) |
|||
* Else if the name is made up of three words and the second word is a |
|||
botanical connecting term or a variant of "cf.", de-italicize the |
|||
connecting term and add italic markup to the outside of the name. |
|||
* Else just add italic markup to the outside of the name. |
|||
The module also: |
|||
* Ensures that the hybrid symbol, ×, and parentheses are not italicized, as |
|||
well as any string inside parentheses if dab is true. |
|||
* Has an option to abbreviate all parts of taxon names other than the last |
|||
to the first letter (e.g. "Pinus sylvestris var. sylvestris" becomes |
|||
"P. s. var. sylvestris"). |
|||
* Has an option to wikilink the italicized name to the input name. |
|||
=============================================================================]] |
|||
local p = {} |
local p = {} |
||
local l = {} -- used to store purely local functions |
|||
--connecting terms in three part names (e.g. Pinus sylvestris var. sylvestris) |
|||
local cTerms3 = { |
|||
--subsp. |
|||
subspecies = "subsp.", |
|||
["subsp."] = "subsp.", |
|||
subsp = "subsp.", |
|||
["ssp."] = "subsp.", |
|||
ssp = "subsp.", |
|||
--var. |
|||
varietas = "var.", |
|||
["var."] = "var.", |
|||
var = "var.", |
|||
--subvar. |
|||
subvarietas = "subvar.", |
|||
["subvar."] = "subvar.", |
|||
subvar = "subvar.", |
|||
--f. |
|||
forma = "f.", |
|||
["f."] = "f.", |
|||
f = "f.", |
|||
--subf. |
|||
subforma = "subf.", |
|||
["subf."] = "subf.", |
|||
subf = "subf." |
|||
} |
|||
--connecting terms in two part names (e.g. Pinus sect. Pinus) |
|||
local cTerms2 = { |
|||
--subg. |
|||
subgenus = "subg.", |
|||
["subgen."] = "subg.", |
|||
["subg."] = "subg.", |
|||
subg = "subg.", |
|||
--supersect. |
|||
supersection = "supersect.", |
|||
["supersect."] = "supersect.", |
|||
supersect = "supersect.", |
|||
--sect. |
|||
section = "sect.", |
|||
["sect."] = "sect.", |
|||
sect = "sect.", |
|||
--subsect. |
|||
subsection = "subsect.", |
|||
["subsect."] = "subsect.", |
|||
subsect = "subsect.", |
|||
--ser. |
|||
series = "ser.", |
|||
["ser."] = "ser.", |
|||
ser = "ser.", |
|||
--subser. |
|||
subseries = "subser.", |
|||
["subser."] = "subser.", |
|||
subser = "subser.", |
|||
--cf. |
|||
cf = "cf.", |
|||
["cf."] = "cf.", |
|||
["c.f."] = "cf." |
|||
} |
|||
--[[========================================================================= |
--[[========================================================================= |
||
Utility function to strip off any initial † present to mark the taxon as |
|||
extinct. The † must not be italicized, emboldened, or included in the |
|||
parameters, see p.italicizeTaxonName(). |
|||
wikilinked text, so needs to be added back afterwards. |
|||
† is assumed to be present as one of: |
|||
* the unicode character † |
|||
* the HTML entity † |
|||
* the output of {{extinct}} – this will have been expanded before reaching this |
|||
module and is assumed to have the form '<span ... </span>' |
|||
The function returns two values: the taxon name with any † before it removed |
|||
and either '†' if it was present or the empty string if not. |
|||
=============================================================================]] |
=============================================================================]] |
||
function p. |
function p.stripDagger(taxonName) |
||
local dagger = '' |
|||
if mw.ustring.sub(taxonName,1,1) == '†' then |
|||
local linked = frame.args['linked'] == 'yes' |
|||
taxonName = mw.ustring.sub(taxonName,2,#taxonName) |
|||
local abbreviated = frame.args['abbreviated'] == 'yes' |
|||
dagger = '†' |
|||
local dab = frame.args['dab'] == 'yes' |
|||
else |
|||
return p.italicizeTaxonName(name, linked, abbreviated, dab) |
|||
if string.sub(taxonName,1,8) == '†' then |
|||
end |
|||
taxonName = string.sub(taxonName,9,#taxonName) |
|||
dagger = '†' |
|||
--[[========================================================================= |
|||
else |
|||
Utility local function to abbreviate an input string to its first character |
|||
-- did the taxon name originally have {{extinct}} before it? |
|||
followed by ".". |
|||
if (string.sub(taxonName,1,5) == '<abbr') and mw.ustring.find(taxonName, '†') then |
|||
Both "×" and an HTML entity at the start of the string are skipped over in |
|||
taxonName = string.gsub(taxonName, '^.*</abbr>', '', 1) |
|||
determining first character, as is an opening parenthesis and an opening ", |
|||
dagger = '†' |
|||
which cause a matching closing character to be included. |
|||
=============================================================================]] |
|||
function l.abbreviate(str) |
|||
local result = "" |
|||
local hasParentheses = false |
|||
local isQuoted = false |
|||
if mw.ustring.len(str) < 2 then |
|||
--single character strings are left unchanged |
|||
result = str |
|||
else |
|||
--skip over an opening parenthesis that could be present at the start of the string |
|||
if mw.ustring.sub(str,1,1) == "(" then |
|||
hasParentheses = true |
|||
result = "(" |
|||
str = mw.ustring.sub(str,2,mw.ustring.len(str)) |
|||
elseif mw.ustring.sub(str,1,1) == '"' then |
|||
isQuoted = true |
|||
result = '"' |
|||
str = mw.ustring.sub(str,2,mw.ustring.len(str)) |
|||
end |
|||
--skip over a hybrid symbol that could be present at the start of the string |
|||
if mw.ustring.sub(str,1,1) == "×" then |
|||
result = "×" |
|||
str = mw.ustring.sub(str,2,mw.ustring.len(str)) |
|||
end |
|||
--skip over an HTML entity that could be present at the start of the string |
|||
if mw.ustring.sub(str,1,1) == "&" then |
|||
local i,dummy = mw.ustring.find(str,";",2,plain) |
|||
result = result .. mw.ustring.sub(str,1,i) |
|||
str = mw.ustring.sub(str,i+1,mw.ustring.len(str)) |
|||
end |
|||
--if there's anything left, reduce it to its first character plus ".", |
|||
--adding the closing parenthesis or quote if required |
|||
if str ~= "" then |
|||
result = result .. mw.ustring.sub(str,1,1) .. "." |
|||
if hasParentheses then result = result .. ")" |
|||
elseif isQuoted then result = result .. '"' |
|||
end |
end |
||
end |
end |
||
end |
end |
||
return |
return taxonName, dagger |
||
end |
end |
||
--[[========================================================================= |
--[[========================================================================= |
||
Utility function to do the following: |
|||
name (string) – the taxon name to be processed |
|||
1. Strip off any initial † present to mark the taxon as extinct. We outsource |
|||
linked (boolean) – should a wikilink be generated? |
|||
to p.stripDagger() for this. |
|||
abbreviated (boolean) – should the first parts of the taxon name be |
|||
reduced to capital letters? |
|||
2. Strip off any double quotation marks present to mark the taxon as invalid. |
|||
dab (boolean) – should any parenthesized part be treated as a disambiguation |
|||
The double-quotation marks, too, should not be formatted. |
|||
term and left unitalicized? |
|||
3. Strip off any Candidatus or Ca. to mark the taxon as Candidatus. |
|||
The function returns four values: |
|||
* the taxon name with all of the three modifiers removed |
|||
* either '†' if it was present or the empty string if not |
|||
* either a single dquote if it was present in a pair or the empty string if not |
|||
* either italicized "Candidatus " or "Ca. " if it was present or the empty string if not |
|||
The function can error in case of an unpaired quotation mark. In that case, a |
|||
IfPreview._warning() is mixed into the first return. |
|||
=============================================================================]] |
=============================================================================]] |
||
function p. |
function p.parseName(taxonName) |
||
local name, dagger = p.stripDagger(taxonName) |
|||
name = mw.text.trim(name) |
|||
local dquote = '' |
|||
-- if the name begins with '[', then assume formatting is present |
|||
if string.sub(name,1,1) == '"' then |
|||
name = string.sub(name,2,#name) |
|||
-- otherwise begin by replacing any use of the HTML italic tags |
|||
dquote = '"' |
|||
-- by Wikimedia markup; replace any entity alternatives to the hybrid symbol |
|||
if string.sub(name,1,1) == '"' then |
|||
-- by the symbol itself; prevent the hybrid symbol being treated as |
|||
name = string.sub(name,2,#name) |
|||
-- a 'word' by converting a following space to the HTML entity |
|||
else |
|||
local italMarker = "''" |
|||
name = IfPreview._warning('Unmatched double quotation mark in taxon name: ' .. name) |
|||
name = string.gsub(mw.text.trim(name), "</?i>", italMarker) |
|||
end |
|||
name = string.gsub(string.gsub(name, "×", "×"), "×", "×") |
|||
end |
|||
name = string.gsub(name, "</?span.->", "") -- remove any span markup |
|||
name = string.gsub(name, "× ", "× ") |
|||
local candidatus = '' |
|||
-- now italicize and abbreviate if required |
|||
local result = name |
|||
if string.sub(name,1,11) == 'Candidatus ' then |
|||
name = string.sub(name,12,#name) |
|||
candidatus = "''Candidatus'' " |
|||
-- do nothing if the name already has italic markers at the start or end |
|||
elseif string.sub(name,1,4) == 'Ca. ' then |
|||
else |
|||
name = string.sub(name,5,#name) |
|||
candidatus = "''Ca.'' " |
|||
local words = mw.text.split(name, " ", true) |
|||
end |
|||
if #words == 4 and cTerms3[words[3]] then |
|||
-- the third word of a four word name is a connecting term |
|||
return name, dagger, dquote, candidatus |
|||
-- ensure the connecting term isn't italicized |
|||
words[3] = '<span style="font-style:normal;">' .. cTerms3[words[3]] .. '</span>' |
|||
if abbreviated then |
|||
words[1] = l.abbreviate(words[1]) |
|||
words[2] = l.abbreviate(words[2]) |
|||
end |
|||
result = words[1] .. " " .. words[2] .. " " .. words[3] .. " " .. words[4] |
|||
elseif #words == 3 and cTerms2[words[2]] then |
|||
-- the second word of a three word name is a connecting term |
|||
-- ensure the connecting term isn't italicized |
|||
words[2] = '<span style="font-style:normal;">' .. cTerms2[words[2]] .. '</span>' |
|||
if abbreviated then |
|||
words[1] = l.abbreviate(words[1]) |
|||
end |
|||
result = words[1] .. " " .. words[2] .. " " .. words[3] |
|||
elseif abbreviated then -- not a name as above; only deal with abbreviation |
|||
if #words > 1 then |
|||
result = l.abbreviate(words[1]) |
|||
for i = 2, #words-1, 1 do |
|||
result = result .. " " .. l.abbreviate(words[i]) |
|||
end |
|||
result = result .. " " .. words[#words] |
|||
end |
|||
else |
|||
result = name |
|||
end |
|||
-- deal with any hybrid symbol as it should not be italicized |
|||
result = string.gsub(result, "×", '<span style="font-style:normal;">×</span>') |
|||
-- deal with any parentheses as they should not be italicized |
|||
if dab then |
|||
result = string.gsub(string.gsub(result,"%(",'<span style="font-style:normal;">('),"%)",')</span>') |
|||
else |
|||
result = string.gsub(string.gsub(result,"%(",'<span style="font-style:normal;">(</span>'),"%)",'<span style="font-style:normal;">)</span>') |
|||
end |
|||
-- any question marks surrounded by spans can have the spans joined |
|||
result = string.gsub(result,'</span>%?<span style="font%-style:normal;">','?') |
|||
-- add outside markup |
|||
if linked then |
|||
if result ~= name then |
|||
result = "[[" .. name .. "|" .. italMarker .. result .. italMarker .. "]]" |
|||
else |
|||
result = italMarker .. "[[" .. name .. "]]" .. italMarker |
|||
end |
|||
else |
|||
result = italMarker .. result .. italMarker |
|||
end |
|||
end |
|||
end |
|||
return result |
|||
end |
end |
||
--[[========================================================================= |
--[[========================================================================= |
||
The function returns a list of taxon names and authorities, appropriately |
|||
Utility function used by other modules to check if a connecting term is |
|||
formatted. |
|||
present in a name. The value of name is assumed to be plain text. |
|||
Usage: |
|||
{{#invoke:TaxonList|main |
|||
|italic = yes - to italicize the taxon name |
|||
|linked = yes - to wikilink the taxon name |
|||
|bold = yes - to emboldent the taxon name |
|||
|incomplete = yes - to output "(incomplete)" at the end of the list |
|||
}} |
|||
The template that transcludes the invoking template must supply an indefinite |
|||
even number of arguments in the format |
|||
|Name1|Author1 |Name2|Author2| ... |NameN|AuthorN |
|||
=============================================================================]] |
=============================================================================]] |
||
function p. |
function p.main(frame) |
||
local italic = frame.args['italic'] == 'yes' |
|||
local bold = frame.args['bold'] == 'yes' |
|||
local linked = frame.args['linked'] == 'yes' |
|||
if bold then linked = false end -- must not have bold and wikilinked |
|||
local abbreviated = frame.args['abbreviated'] == 'yes' |
|||
local incomplete = frame.args['incomplete'] == 'yes' |
|||
local taxonArgs = frame:getParent().args |
|||
local result = '' |
|||
-- iterate over unnamed variables |
|||
local taxonName |
|||
local dagger |
|||
local dquote |
|||
local candidatus |
|||
local first = true -- is this the first of a taxon name/author pair? |
|||
for param, value in pairs(taxonArgs) do |
|||
if tonumber(param) then |
|||
if first then |
|||
taxonName = mw.text.trim(value) |
|||
-- if necessary separate any initial modifier |
|||
taxonName, dagger, dquote, candidatus = p.parseName(taxonName) |
|||
if linked and not italic then |
|||
taxonName = '[[' .. taxonName .. ']]' |
|||
end |
|||
if italic and candidatus == '' then |
|||
taxonName = TaxonItalics.italicizeTaxonName(taxonName, linked, abbreviated) |
|||
end |
|||
if bold then |
|||
taxonName = '<b>' .. taxonName .. '</b>' |
|||
end |
|||
result = result .. '<li>' .. dagger .. dquote .. candidatus .. taxonName .. dquote |
|||
else |
|||
result = result .. ' <small>' .. value .. '</small></li>' |
|||
end |
|||
first = not first |
|||
end |
|||
end |
|||
if incomplete then |
|||
result = result .. '<small>(incomplete list)</small>' |
|||
end |
|||
return '<ul class="taxonlist">' .. result .. '</ul>' |
|||
end |
end |
||
function p.hasConnectingTerm(name) |
|||
local words = mw.text.split(name, " ", true) |
|||
return (#words == 4 and cTerms3[words[3]]) |
|||
or (#words == 3 and cTerms2[words[2]]) |
|||
end |
|||
return p |
return p |
Revision as of 10:28, 21 May 2025
![]() | This is the module sandbox page for Module:TaxonItalics (diff). See also the companion subpage for test cases (run). |
Module:TaxonItalics (talk · · hist · links · doc · subpages · sandbox · testcases)
![]() | This Lua module is used on approximately 603,000 pages, or roughly 1% of all pages. To avoid major disruption and server load, any changes should be tested in the module's /sandbox or /testcases subpages, or in your own module sandbox. The tested changes can be added to this page in a single edit. Consider discussing changes on the talk page before implementing them. |
Purpose
The module is primarily intended for use by the automated taxobox system. It supports the correct italicization of scientific names. Botanical (ICNafp) names may contain "connecting terms"; these must not be italicized. The hybrid symbol, ×, should also not be italicized. The module optionally wikilinks and abbreviates italicized names.
For non-virus taxa, italics are used at the rank of genus or below. The module does not decide whether a scientific name should be italicized. Use {{Is italic taxon}}
for this purpose.
Usage
- {{#invoke:TaxonItalics|main|TAXON_NAME}} – italicizes a taxon name
- {{#invoke:TaxonItalics|main|TAXON_NAME|linked=yes}} – italicizes a taxon name, wikilinking the italicized output to the unchanged input
- {{#invoke:TaxonItalics|main|TAXON_NAME|abbreviated=yes}} – italicizes a taxon name, abbreviating all but the last part to the first letter
- {{#invoke:TaxonItalics|main|TAXON_NAME|dab=yes}} – italicizes a taxon name, treating any parenthesized part as a disambiguation term, and not italicizing it
The parameters can be combined. It can also be used via {{Taxon italics}}.
Examples
Just italicized
- Connecting terms
- Pinus subg. Pinus → Pinus subg. Pinus
- P. subgenus Pinus → P. subg. Pinus
- P. subsect. Pinaster → P. subsect. Pinaster
- Acer tataricum subsp. ginnala → Acer tataricum subsp. ginnala
- Aster ericoides var. ericoides → Aster ericoides var. ericoides
- A. ericoides varietas ericoides → A. ericoides var. ericoides
- A. e. subvar. ericoides → A. e. subvar. ericoides
Botanical names may contain only one infraspecific epithet; a string like "Fragaria vesca subsp. vesca f. semperflorens" is a classification, not a name, and is not handled by the module.
- Hybrid symbols
- Elaeagnus × submacrophylla → Elaeagnus × submacrophylla
- ×Beallara → ×Beallara
- × Beallara → × Beallara
- {{hybrid}}Beallara → ×Beallara
Linked
Using |linked=yes
- Populus sect. Aigeiros → Populus sect. Aigeiros
- Elaeagnus × submacrophylla → Elaeagnus × submacrophylla
Abbreviated
Using |abbreviated=yes
- Populus sect. Aigeiros → P. sect. Aigeiros
- Acer tataricum subsp. ginnala → A. t. subsp. ginnala
- [also linked] × Sorbaronia fallax → × S. fallax
- [also linked] Elaeagnus × submacrophylla → E. × submacrophylla
- Elaeagnus ×submacrophylla → E. ×submacrophylla
- Elaeagnus {{hybrid}} submacrophylla → E. × submacrophylla
Disambiguation terms
By default, a parenthesized part of a taxon name is assumed to be a subgenus name, and is italicized:
- Varanus (Hapturosaurus) → Varanus (Hapturosaurus)
- Caia (plant) → Caia (plant) – wrong
To treat a parenthesized part as a disambiguation term, use |dab=yes
- Caia (plant) → Caia (plant)
- (also linked) Caia (plant) → Caia (plant)
For even more examples, see the testcases.
--[[
This module provides the core functionality to a set of templates used to
display a list of taxon name/authority pairs, with the taxon names optionally
italicized, wikilinked and/or emboldened. Such lists are usually part of
taxoboxes.
]]
-- use a function from Module:TaxonItalics to italicize a taxon name
local TaxonItalics = require("Module:TaxonItalics")
local IfPreview = require([[Module:If preview]])
local p = {}
--[[=========================================================================
Utility function to strip off any initial † present to mark the taxon as
extinct. The † must not be italicized, emboldened, or included in the
wikilinked text, so needs to be added back afterwards.
† is assumed to be present as one of:
* the unicode character †
* the HTML entity †
* the output of {{extinct}} – this will have been expanded before reaching this
module and is assumed to have the form '<span ... </span>'
The function returns two values: the taxon name with any † before it removed
and either '†' if it was present or the empty string if not.
=============================================================================]]
function p.stripDagger(taxonName)
local dagger = ''
if mw.ustring.sub(taxonName,1,1) == '†' then
taxonName = mw.ustring.sub(taxonName,2,#taxonName)
dagger = '†'
else
if string.sub(taxonName,1,8) == '†' then
taxonName = string.sub(taxonName,9,#taxonName)
dagger = '†'
else
-- did the taxon name originally have {{extinct}} before it?
if (string.sub(taxonName,1,5) == '<abbr') and mw.ustring.find(taxonName, '†') then
taxonName = string.gsub(taxonName, '^.*</abbr>', '', 1)
dagger = '†'
end
end
end
return taxonName, dagger
end
--[[=========================================================================
Utility function to do the following:
1. Strip off any initial † present to mark the taxon as extinct. We outsource
to p.stripDagger() for this.
2. Strip off any double quotation marks present to mark the taxon as invalid.
The double-quotation marks, too, should not be formatted.
3. Strip off any Candidatus or Ca. to mark the taxon as Candidatus.
The function returns four values:
* the taxon name with all of the three modifiers removed
* either '†' if it was present or the empty string if not
* either a single dquote if it was present in a pair or the empty string if not
* either italicized "Candidatus " or "Ca. " if it was present or the empty string if not
The function can error in case of an unpaired quotation mark. In that case, a
IfPreview._warning() is mixed into the first return.
=============================================================================]]
function p.parseName(taxonName)
local name, dagger = p.stripDagger(taxonName)
local dquote = ''
if string.sub(name,1,1) == '"' then
name = string.sub(name,2,#name)
dquote = '"'
if string.sub(name,1,1) == '"' then
name = string.sub(name,2,#name)
else
name = IfPreview._warning('Unmatched double quotation mark in taxon name: ' .. name)
end
end
local candidatus = ''
if string.sub(name,1,11) == 'Candidatus ' then
name = string.sub(name,12,#name)
candidatus = "''Candidatus'' "
elseif string.sub(name,1,4) == 'Ca. ' then
name = string.sub(name,5,#name)
candidatus = "''Ca.'' "
end
return name, dagger, dquote, candidatus
end
--[[=========================================================================
The function returns a list of taxon names and authorities, appropriately
formatted.
Usage:
{{#invoke:TaxonList|main
|italic = yes - to italicize the taxon name
|linked = yes - to wikilink the taxon name
|bold = yes - to emboldent the taxon name
|incomplete = yes - to output "(incomplete)" at the end of the list
}}
The template that transcludes the invoking template must supply an indefinite
even number of arguments in the format
|Name1|Author1 |Name2|Author2| ... |NameN|AuthorN
=============================================================================]]
function p.main(frame)
local italic = frame.args['italic'] == 'yes'
local bold = frame.args['bold'] == 'yes'
local linked = frame.args['linked'] == 'yes'
if bold then linked = false end -- must not have bold and wikilinked
local abbreviated = frame.args['abbreviated'] == 'yes'
local incomplete = frame.args['incomplete'] == 'yes'
local taxonArgs = frame:getParent().args
local result = ''
-- iterate over unnamed variables
local taxonName
local dagger
local dquote
local candidatus
local first = true -- is this the first of a taxon name/author pair?
for param, value in pairs(taxonArgs) do
if tonumber(param) then
if first then
taxonName = mw.text.trim(value)
-- if necessary separate any initial modifier
taxonName, dagger, dquote, candidatus = p.parseName(taxonName)
if linked and not italic then
taxonName = '[[' .. taxonName .. ']]'
end
if italic and candidatus == '' then
taxonName = TaxonItalics.italicizeTaxonName(taxonName, linked, abbreviated)
end
if bold then
taxonName = '<b>' .. taxonName .. '</b>'
end
result = result .. '<li>' .. dagger .. dquote .. candidatus .. taxonName .. dquote
else
result = result .. ' <small>' .. value .. '</small></li>'
end
first = not first
end
end
if incomplete then
result = result .. '<small>(incomplete list)</small>'
end
return '<ul class="taxonlist">' .. result .. '</ul>'
end
return p