Module:Infobox date field metadata: Difference between revisions
Appearance
Content deleted Content added
paste from Module:Sandbox/Hellknowz/Test for initial deployment on a few infoboxes |
(No difference)
|
Revision as of 10:40, 3 September 2013
![]() | This module is rated as ready for general use. It has reached a mature form and is thought to be relatively bug-free and ready for use wherever appropriate. It is ready to mention on help pages and other Wikipedia resources as an option for new users to learn. To reduce server load and bad output, it should be improved by sandbox testing rather than repeated trial-and-error editing. |
![]() | This module is subject to page protection. It is a highly visible module in use by a very large number of pages, or is substituted very frequently. Because vandalism or mistakes would affect many pages, and even trivial editing might cause substantial load on the servers, it is protected from editing. |
This module allows infoboxes using {{Infobox date field metadata}} to automatically produce machine-readable metadata in form of microformats (see also WP Microformats) from infobox fields using recognized dates, times, and date ranges. Unrecognized, ambiguous and incomplete values are skipped and some will produce an error tracking category.
Usage
This module is not intended to be used directly other than from its parent template {{Infobox date field metadata}}. See that template for description.
Parameters
- First paramater is always the input string – this should be the infobox's date field value
|dtstart=yes
specifies that hCalendar (WP:UF#hCalendar)dtstart
(event start date) class is to be output if possible|dtend=yes
specifies that hCalendardtend
(event end date) class is to be output if possible|trackingCat=yes
specifies that Category:Articles with automatically detected infobox date metadata should be added to every article where metadata was successfully emitted|noErrorCats=yes
specifies that parsing error categories (currently Category:Articles with incomplete date ranges for automatic metadata) should not be added by default
Examples
local main = {};
local monthIndices = {
['january'] = 1,
['february'] = 2,
['march'] = 3,
['april'] = 4,
['may'] = 5,
['june'] = 6,
['july'] = 7,
['august'] = 8,
['september'] = 9,
['october'] = 10,
['november'] = 11,
['december'] = 12
}
local monthShortIndices = {
['jan'] = 1,
['feb'] = 2,
['mar'] = 3,
['apr'] = 4,
['may'] = 5, -- long one would have caught this already
['jun'] = 6,
['jul'] = 7,
['aug'] = 8,
['sep'] = 9,
['oct'] = 10,
['nov'] = 11,
['dec'] = 12
}
local monthDays = {
[1] = 31,
[2] = 29, -- will check below
[3] = 31,
[4] = 30,
[5] = 31,
[6] = 30,
[7] = 31,
[8] = 31,
[9] = 30,
[10] = 31,
[11] = 30,
[12] = 31
}
function checkIfDayValid(day, month, year)
-- First check that the month can have at least this many days
if (day > monthDays[month]) then return false end
-- February leap year check
if (month == 2) then
if (day == 29 and not ((year % 4 == 0) and (year % 100 ~= 0) or (year % 400 == 0))) then
return false
end
end
return true
end
function checkIfMonthValid(month)
return month ~= 0 and month <= 12 -- <0 never happens with [0-9] pattern
end
function checkIfYearValid(year)
return year >= 1583 -- up to 9999
end
function checkIfHourValid(hour)
return hour < 24 -- <0 never happens with [0-9] pattern
end
function checkIfMinuteValid(minute)
return minute < 60 -- <0 never happens with [0-9] pattern
end
function checkIfSecondValid(second)
return second < 60 -- <0 never happens with [0-9] pattern
end
local PARSERESULT_OKAY = 1
local PARSERESULT_FAIL = 2 -- whatever we encountered isn't expected for any pattern
local PARSERESULT_UNCRECOGNIZED = 3 -- 14 May 12, 2014 (all elements okay, no pattern)
local PARSERESULT_INCOMPLETE = 4 -- May 3
local PARSERESULT_INCOMPLETERANGE = 5 -- 3 May 2012 - June 2013
local PARSERESULT_INVALID = 6 -- May 32
local PARSERESULT_INVALIDRANGE = 7 -- May 3 - May 2
-- This will first verify that we have a valid date and time and then output an ISO datetime string
function checkAndOutput(year, month, day, hour, minute, second, year2, month2, day2, hour2, minute2, second2)
local s
if (year and not checkIfYearValid(year)) then return PARSERESULT_INVALID; end
if (month and not checkIfMonthValid(month)) then return PARSERESULT_INVALID; end
if (day and not checkIfDayValid(day, month, year)) then return PARSERESULT_INVALID; end
if (hour and not checkIfHourValid(hour)) then return PARSERESULT_INVALID; end
if (minute and not checkIfMinuteValid(minute)) then return PARSERESULT_INVALID; end
if (second and not checkIfSecondValid(second)) then return PARSERESULT_INVALID; end
if (year2 and not checkIfYearValid(year2)) then return PARSERESULT_INVALID; end
if (month2 and not checkIfMonthValid(month2)) then return PARSERESULT_INVALID; end
if (day2 and not checkIfDayValid(day2, month2, year2)) then return PARSERESULT_INVALID; end
if (hour2 and not checkIfHourValid(hour2)) then return PARSERESULT_INVALID; end
if (minute2 and not checkIfMinuteValid(minute2)) then return PARSERESULT_INVALID; end
if (second2 and not checkIfSecondValid(second2)) then return PARSERESULT_INVALID; end
-- Check that end date is actually after start date
if (year2 and year) then
if (year2 < year) then return PARSERESULT_INVALIDRANGE end
if (year2 == year) then
if (month2 and month) then
if (month2 < month) then return PARSERESULT_INVALIDRANGE end
if (month2 == month) then
if (day2 and day) then
if (day2 < day) then return PARSERESULT_INVALIDRANGE end
-- TODO: compare time
end
end
end
end
end
-- Check that the date is actually complete even if valid
if (month and month2 and not year) then return PARSERESULT_INCOMPLETERANGE end -- any of 'd-dM', 'dM-dM', 'Md-d', 'Md-Md'
if (month and not year) then return PARSERESULT_INCOMPLETE end -- 'May', 'May 15', '15 May'
if (month2 and not year2) then return PARSERESULT_INCOMPLETE end -- same but other end
-- While technically there are more cases, none should have been matched and been given to us
local date1, time1, date2, time2
-- time only
if (second and not year) then time1 = string.format('%02d:%02d:%02d', hour, minute, second)
elseif (minute and not year) then time1 = string.format('%02d:%02d', hour, minute)
elseif (hour and not year) then time1 = string.format('%02d', hour)
-- date and time
elseif (second) then date1 = string.format('%d-%02d-%02d', year, month, day) time1 = string.format('%02d:%02d:%02d', hour, minute, second)
elseif (minute) then date1 = string.format('%d-%02d-%02d', year, month, day) time1 = string.format('%02d:%02d', hour, minute)
elseif (hour) then date1 = string.format('%d-%02d-%02d', year, month, day) time1 = string.format('%02d', hour)
-- date only
elseif (day) then date1 = string.format('%d-%02d-%02d', year, month, day)
elseif (month) then date1 = string.format('%d-%02d', year, month)
elseif (year) then date1 = string.format('%d', year)
end
-- time only
if (second2 and not year2) then time2 = string.format('%02d:%02d:%02d', hour2, minute2, second2)
elseif (minute2 and not year2) then time2 = string.format('%02d:%02d', hour2, minute2)
elseif (hour2 and not year2) then time2 = string.format('%02d', hour2)
-- date and time
elseif (second2) then date2 = string.format('%d-%02d-%02d', year2, month2, day2) time2 = string.format('%02d:%02d:%02d', hour2, minute2, second2)
elseif (minute2) then date2 = string.format('%d-%02d-%02d', year2, month2, day2) time2 = string.format('%02d:%02d', hour2, minute2)
elseif (hour2) then date2 = string.format('%d-%02d-%02d', year2, month2, day2) time2 = string.format('%02d', hour2)
-- date only
elseif (day2) then date2 = string.format('%d-%02d-%02d', year2, month2, day2)
elseif (month2) then date2 = string.format('%d-%02d', year2, month2)
elseif (year2) then date2 = string.format('%d', year2)
end
return PARSERESULT_OKAY, date1, time1, date2, time2 -- this function wouldn't be called withotu matching pattern, so at least 1 value should have been filled
end
function periodHourAdd(period)
if (period == 'pm' or period == 'p.m' or period == 'pm.' or period == 'p.m.') then -- random '.' is pattern match artifact
return 12
else
return 0
end
end
local seekString -- this is our local seek string, so we don't have to pass it as parameter every time
local currentPosition -- this keeps track of where we are in seeking our current string
-- These are the element type "constants" for readability mostly
local ELEMENT_INVALID = 1
local ELEMENT_ONETWODIGITS = 2
local ELEMENT_FOURDIGITS = 3
local ELEMENT_WHITESPACE = 4
local ELEMENT_MONTHWORD = 5
local ELEMENT_COMMA = 6
local ELEMENT_DASH = 7
local ELEMENT_DATESEPARATOR = 8
local ELEMENT_TIMESEPARATOR = 9
local ELEMENT_TIMEPERIOD = 10
function seekNextElement()
-- Profiler says mw.ustring.find is the bottleneck, probably because it's unicode; not sure how to improve though besides writing my own pattern matcher
-- Digits
local foundPositionStart, foundPositionEnd, foundMatch = mw.ustring.find(seekString, '^([0-9]+)', currentPosition)
if (foundPositionStart) then
currentPosition = foundPositionEnd + 1 -- this is our new start location
-- Additionally check how many digits we actually have, as arbitrary number isn't valid
if (#foundMatch <= 2) then -- most likely a day number or time number
return ELEMENT_ONETWODIGITS, tonumber(foundMatch), (currentPosition > mw.ustring.len(seekString))
elseif (#foundMatch == 4) then -- most likely a year
return ELEMENT_FOURDIGITS, tonumber(foundMatch), (currentPosition > mw.ustring.len(seekString))
else
return ELEMENT_INVALID -- just the invalid, the number of digits (3 or 5+) won't match any patterns
end
end
-- Time period - a.m./p.m. (before letters)
foundPositionStart, foundPositionEnd, foundMatch = mw.ustring.find(seekString, '^%s*([ap]%.?m%.?)', currentPosition)
if (foundPositionStart) then
currentPosition = foundPositionEnd + 1 -- this is our new start location
return ELEMENT_TIMEPERIOD, foundMatch, (currentPosition > mw.ustring.len(seekString))
end
-- Word
foundPositionStart, foundPositionEnd, foundMatch = mw.ustring.find(seekString, '^([A-Za-z]+)', currentPosition)
if (foundPositionStart) then
currentPosition = foundPositionEnd + 1 -- this is our new start location
if (#foundMatch >= 3) then
-- Find the possible month name index
monthIndex = monthIndices[mw.ustring.lower(foundMatch)]
if (not monthIndex) then monthIndex = monthShortIndices[mw.ustring.lower(foundMatch)] end
if (monthIndex) then
return ELEMENT_MONTHWORD, monthIndex, (currentPosition > mw.ustring.len(seekString))
else
return ELEMENT_INVALID -- just the invalid, the word didn't match a valid month name
end
else
-- TODO LETTERS
return ELEMENT_INVALID -- just the invalid, the word was too short to be valid month name
end
end
-- Time separator (colon without whitespace)
foundPositionStart, foundPositionEnd, foundMatch = mw.ustring.find(seekString, '^(:)', currentPosition)
if (foundPositionStart) then
currentPosition = foundPositionEnd + 1 -- this is our new start location
return ELEMENT_TIMESEPARATOR, foundMatch, (currentPosition > mw.ustring.len(seekString))
end
-- Comma and any following whitespace
foundPositionStart, foundPositionEnd, foundMatch = mw.ustring.find(seekString, '^(,%s*)', currentPosition)
if (foundPositionStart) then
currentPosition = foundPositionEnd + 1 -- this is our new start location
return ELEMENT_COMMA, foundMatch, (currentPosition > mw.ustring.len(seekString))
end
-- Dash with possible whitespace or Date separator (dash without whitespace)
-- Both non-breaking spaces - ' - '
foundPositionStart, foundPositionEnd, foundMatch = mw.ustring.find(seekString, '^( [%-–—] )', currentPosition)
if (foundPositionStart) then
currentPosition = foundPositionEnd + 1 -- this is our new start location
return ELEMENT_DASH, foundMatch, (currentPosition > mw.ustring.len(seekString))
end
-- Non-breaking space - ' - '
foundPositionStart, foundPositionEnd, foundMatch = mw.ustring.find(seekString, '^( [%-–—]%s*)', currentPosition)
if (foundPositionStart) then
currentPosition = foundPositionEnd + 1 -- this is our new start location
return ELEMENT_DASH, foundMatch, (currentPosition > mw.ustring.len(seekString))
end
-- Dash entity code and both non-breaking spaces - ' – '
foundPositionStart, foundPositionEnd, foundMatch = mw.ustring.find(seekString, '^( &[nm]dash; )', currentPosition)
if (foundPositionStart) then
currentPosition = foundPositionEnd + 1 -- this is our new start location
return ELEMENT_DASH, foundMatch, (currentPosition > mw.ustring.len(seekString))
end
-- Dash entity code and non-breaking space - ' – '
foundPositionStart, foundPositionEnd, foundMatch = mw.ustring.find(seekString, '^( &[nm]dash;%s*)', currentPosition)
if (foundPositionStart) then
currentPosition = foundPositionEnd + 1 -- this is our new start location
return ELEMENT_DASH, foundMatch, (currentPosition > mw.ustring.len(seekString))
end
-- Dash entity code - ' – '
foundPositionStart, foundPositionEnd, foundMatch = mw.ustring.find(seekString, '^(%s*&[nm]dash;%s*)', currentPosition)
if (foundPositionStart) then
currentPosition = foundPositionEnd + 1 -- this is our new start location
return ELEMENT_DASH, foundMatch, (currentPosition > mw.ustring.len(seekString))
end
-- Regular whitespace
foundPositionStart, foundPositionEnd, foundMatch = mw.ustring.find(seekString, '^(%s*[%-–—]%s*)', currentPosition)
if (foundPositionStart) then
currentPosition = foundPositionEnd + 1 -- this is our new start location
if (foundMatch == '-') then -- nothing else is date separator, no hyphens no stuff like that
return ELEMENT_DATESEPARATOR, foundMatch, (currentPosition > mw.ustring.len(seekString))
else
return ELEMENT_DASH, foundMatch, (currentPosition > mw.ustring.len(seekString)) -- we will actually need to check for DATESEPARATOR as well, as that one stole the '-' case
end
end
-- Whitespace (after all others that capture whitespace)
foundPositionStart, foundPositionEnd, foundMatch = mw.ustring.find(seekString, '^(%s+)', currentPosition)
if (foundPositionStart) then
currentPosition = foundPositionEnd + 1 -- this is our new start location
return ELEMENT_WHITESPACE, foundMatch, (currentPosition > mw.ustring.len(seekString))
end
-- Whitespace -- same as above but using (other that for dashes)
foundPositionStart, foundPositionEnd, foundMatch = mw.ustring.find(seekString, '^ ', currentPosition)
if (foundPositionStart) then
currentPosition = foundPositionEnd + 1 -- this is our new start location
return ELEMENT_WHITESPACE, foundMatch, (currentPosition > mw.ustring.len(seekString))
end
return ELEMENT_INVALID -- just the invalid, we won't be parsing this further
end
function parseDateString(input)
-- Reset our seek string and position
seekString = input
currentPosition = 1
local elements = {}
local values = {}
-- Seek the entire string now
local numberOfElements = 0
repeat
foundElement, foundValue, eos = seekNextElement()
-- If we found something we can't process, return as unparsable
if (foundElement == ELEMENT_INVALID) then return nil end
numberOfElements = numberOfElements + 1
elements[numberOfElements] = foundElement
values[numberOfElements] = foundValue
until eos
--[[
local s = input .. ' -> ' .. numberOfElements .. ' elements: '
for currentElementIndex = 1, numberOfElements do
s = s .. ' #' .. elements[currentElementIndex] .. '=' .. values[currentElementIndex]
end
do return s end
]]
-- Now comes an uber-deep if-then-else tree
-- This is roughly the most efficient step-by-step parsing, something like log(N)
-- Doing each combination via pattern/"Regex" is way slower
-- Having each combination a clean function/preset means checking every element, so way slower
-- Only immediate big improvement is to only seekNextElement() when actually checking that deep, though this will make a (even bigger) mess
if (elements[1] == ELEMENT_ONETWODIGITS) then -- '3' or '10'
if (elements[2] == ELEMENT_WHITESPACE) then -- '3 '
if (elements[3] == ELEMENT_MONTHWORD) then -- '3 May'
if (numberOfElements == 3) then return checkAndOutput(nil, values[3], values[1], nil, nil, nil) end
if (elements[4] == ELEMENT_WHITESPACE) then -- '3 May '
if (elements[5] == ELEMENT_FOURDIGITS) then -- '3 May 2013'
if (numberOfElements == 5) then return checkAndOutput(values[5], values[3], values[1], nil, nil, nil) end
if (elements[6] == ELEMENT_WHITESPACE or elements[6] == ELEMENT_COMMA) then -- '3 May 2013, '
if (elements[7] == ELEMENT_ONETWODIGITS) then -- '3 May 2013, 10'
if (elements[8] == ELEMENT_TIMEPERIOD) then -- '3 May 2013, 10 am'
if (numberOfElements == 8) then return checkAndOutput(values[5], values[3], values[1], values[7] + periodHourAdd(values[8]), nil, nil) end
elseif (elements[8] == ELEMENT_TIMESEPARATOR) then -- '3 May 2013, 10:'
if (elements[9] == ELEMENT_ONETWODIGITS) then -- '3 May 2013, 10:38'
if (numberOfElements == 9) then return checkAndOutput(values[5], values[3], values[1], values[7], values[9], nil) end
if (elements[10] == ELEMENT_TIMEPERIOD) then -- '3 May 2013, 10:38 am'
if (numberOfElements == 10) then return checkAndOutput(values[5], values[3], values[1], values[7] + periodHourAdd(values[10]), values[9], nil) end
elseif (elements[10] == ELEMENT_TIMESEPARATOR) then -- '3 May 2013, 10:38:'
if (elements[11] == ELEMENT_ONETWODIGITS) then -- '3 May 2013, 10:38:27'
if (numberOfElements == 11) then return checkAndOutput(values[5], values[3], values[1], values[7], values[9], values[11]) end
if (elements[12] == ELEMENT_TIMEPERIOD) then -- '3 May 2013, 10:38:27 am'
if (numberOfElements == 12) then return checkAndOutput(values[5], values[3], values[1], values[7] + periodHourAdd(values[12]), values[9], values[11]) end
end
end
end
end
end
end
elseif (elements[6] == ELEMENT_DASH or elements[6] == ELEMENT_DATESEPARATOR) then -- '3 May 2013 - '
if (elements[7] == ELEMENT_ONETWODIGITS) then -- '3 May 2013 - 12'
if (elements[8] == ELEMENT_WHITESPACE) then -- '3 May 2013 - 12 '
if (elements[9] == ELEMENT_MONTHWORD) then -- '3 May 2013 - 12 February'
if (elements[10] == ELEMENT_WHITESPACE) then -- '3 May 2013 - 12 February '
if (elements[11] == ELEMENT_FOURDIGITS) then -- '3 May 2013 - 12 February 2014'
if (numberOfElements == 11) then return checkAndOutput(values[5], values[3], values[1], nil, nil, nil, values[11], values[9], values[7], nil, nil, nil) end
end
end
end
end
end
end
end
elseif (elements[4] == ELEMENT_DASH or elements[4] == ELEMENT_DATESEPARATOR) then -- '3 May - '
if (elements[5] == ELEMENT_ONETWODIGITS) then -- '3 May - 12'
if (elements[6] == ELEMENT_WHITESPACE) then -- '3 May - 12 '
if (elements[7] == ELEMENT_MONTHWORD) then -- '3 May - 12 October'
if (numberOfElements == 7) then return checkAndOutput(nil, values[3], values[1], nil, nil, nil, nil, values[7], values[5], nil, nil, nil) end
if (elements[8] == ELEMENT_COMMA or elements[8] == ELEMENT_WHITESPACE) then -- '3 May - 12 October '
if (elements[9] == ELEMENT_FOURDIGITS) then -- '3 May - 12 October 2013'
if (numberOfElements == 9) then return checkAndOutput(values[9], values[3], values[1], nil, nil, nil, values[9], values[7], values[5], nil, nil, nil) end
end
end
end
end
end
end
end
elseif (elements[2] == ELEMENT_DASH or elements[2] == ELEMENT_DATESEPARATOR) then -- '3 - '
if (elements[3] == ELEMENT_ONETWODIGITS) then -- '3 - 12'
if (elements[4] == ELEMENT_WHITESPACE) then -- '3 - 12 '
if (elements[5] == ELEMENT_MONTHWORD) then -- '3 - 12 May'
if (numberOfElements == 5) then return checkAndOutput(nil, values[5], values[1], nil, nil, nil, nil, values[5], values[3], nil, nil, nil) end
if (elements[6] == ELEMENT_COMMA or elements[6] == ELEMENT_WHITESPACE) then -- '3 - 12 May '
if (elements[7] == ELEMENT_FOURDIGITS) then -- '3 - 12 May 2013'
if (numberOfElements == 7) then return checkAndOutput(values[7], values[5], values[1], nil, nil, nil, values[7], values[5], values[3], nil, nil, nil) end
end
end
end
end
end
end
-- Here's a case where we want to optimize or rather add readability and trim redundancy
-- Basically, any time '10am', '10:28', '10:28am', '10:28:27', '10:28:27am' can be followed by a date, which means 5 copies of 30+ lines (urgh)
-- Instead we will only check once, but using a different element index offset if needed, so date might start at element 4 or 5 or 6 etc.
-- Currently we only have '10', but if it turns out to be a time we will be checking if it's followed by a date
local wasTime = false -- by default we didn't find a valid time syntax, we have '10' and that's not a time by itself without 'am/pm' or further precision
local possibleHour, possibleMinute, possibleSecond -- temporary values that we will fill as far as we can when parsing time and use for time+date combo if one is found
local i = 0 -- this is our offset from the closest possible location for date seeking
if (elements[2] == ELEMENT_TIMESEPARATOR) then -- '10:'
possibleHour = values[1] -- only once we see ':' (or 'am' below) it is likely a time
if (elements[3] == ELEMENT_ONETWODIGITS) then -- '10:28'
if (numberOfElements == 3) then return checkAndOutput(nil, nil, nil, values[1], values[3], nil) end
possibleMinute = values[3]
wasTime = true -- this is a valid final time, so we can check date appended to this
i = 1 -- '10', ':' and '28' are three elements, so we start 1 further from 3
if (elements[4] == ELEMENT_TIMESEPARATOR) then -- '10:28:'
wasTime = false -- a time can't end with separator, so if this is last match, we aren't appending any dates
if (elements[5] == ELEMENT_ONETWODIGITS) then -- '10:28:27'
if (numberOfElements == 5) then return checkAndOutput(nil, nil, nil, values[1], values[3], values[5]) end
possibleSecond = values[5]
wasTime = true -- this is a valid final time, so we can check date appended to this
i = 3 -- '10', ':', '28', ':' and '27' are five elements, so we start 3 further from 3
if (elements[6] == ELEMENT_TIMEPERIOD) then -- '10:28:27 am'
if (numberOfElements == 6) then return checkAndOutput(nil, nil, nil, values[1] + periodHourAdd(values[6]), values[3], values[5]) end
possibleHour = values[1] + periodHourAdd(values[6]) -- hour now needs possible adjusting since we saw a time period
-- wasTime = true -- already set
i = 4 -- '10', ':', '28', ':', '27' and 'am' are six elements, so we start 4 further from 3
end
end
elseif (elements[4] == ELEMENT_TIMEPERIOD) then -- '10:28 am'
if (numberOfElements == 4) then return checkAndOutput(nil, nil, nil, values[1] + periodHourAdd(values[4]), values[3], nil) end
wasTime = true -- this is a valid final time, so we can check date appended to this
possibleHour = values[1] + periodHourAdd(values[4]) -- hour now needs possible adjusting since we saw a time period
i = 2 -- '10', ':', '28' and 'am' are four elements, so we start 2 further from 3
end
end
elseif (elements[2] == ELEMENT_TIMEPERIOD) then -- '10 am'
if (numberOfElements == 2) then return checkAndOutput(nil, nil, nil, values[1] + periodHourAdd(values[2]), nil, nil) end
possibleHour = values[1] + periodHourAdd(values[2]) -- only once we see 'am' (or ':' above) it is likely a time
wasTime = true -- this is a valid final time, so we can check date appended to this
i = 0 -- '10' and 'am' are two elements, so we start at 3 - default
end
if (wasTime) then -- '10am', '10:28', '10:28am', '10:28:27', '10:28:27am' (using just '10:28:27...' below)
-- Now we will try to append a date to the time
if (elements[3+i] == ELEMENT_WHITESPACE or elements[3+i] == ELEMENT_COMMA) then -- '10:28:27, '
if (elements[4+i] == ELEMENT_ONETWODIGITS) then -- '10:28:27, 3'
if (elements[5+i] == ELEMENT_WHITESPACE) then -- '10:28:27, 3 '
if (elements[6+i] == ELEMENT_MONTHWORD) then -- '10:28:27, 3 May'
if (elements[7+i] == ELEMENT_WHITESPACE) then -- '10:28:27, 3 May '
if (elements[8+i] == ELEMENT_FOURDIGITS) then -- '10:28:27, 3 May 2013'
if (numberOfElements == 8+i) then return checkAndOutput(values[8+i], values[6+i], values[4+i], possibleHour, possibleMinute, possibleSecond) end
end
end
end
end
elseif (elements[4+i] == ELEMENT_MONTHWORD) then -- '10:28:27, May'
if (elements[5+i] == ELEMENT_WHITESPACE) then -- '10:28:27, May '
if (elements[6+i] == ELEMENT_ONETWODIGITS) then -- '10:28:27, May 3'
if (elements[7+i] == ELEMENT_COMMA or elements[7+i] == ELEMENT_WHITESPACE) then -- '10:28:27, May 3, '
if (elements[8+i] == ELEMENT_FOURDIGITS) then -- '10:28:27, May 3, 2013'
if (numberOfElements == 8+i) then return checkAndOutput(values[8+i], values[4+i], values[6+i], possibleHour, possibleMinute, possibleSecond) end
end
end
end
end
elseif (elements[4+i] == ELEMENT_FOURDIGITS) then -- '10:28:27, 2013'
if (elements[5+i] == ELEMENT_DATESEPARATOR) then -- '10:28:27, 2013-'
if (elements[6+i] == ELEMENT_ONETWODIGITS) then -- '10:28:27, 2013-05'
if (elements[7+i] == ELEMENT_DATESEPARATOR) then -- '10:28:27, 2013-05-'
if (elements[8+i] == ELEMENT_ONETWODIGITS) then -- '10:28:27, 2013-05-03'
if (numberOfElements == 8+i) then return checkAndOutput(values[4+i], values[6+i], values[8+i], possibleHour, possibleMinute, possibleSecond) end
end
end
end
end
end
end
end
elseif (elements[1] == ELEMENT_FOURDIGITS) then -- '2013'
if (numberOfElements == 1) then return checkAndOutput(values[1], nil, nil, nil, nil, nil) end
if (elements[2] == ELEMENT_DATESEPARATOR) then -- '2013-'
if (elements[3] == ELEMENT_ONETWODIGITS) then -- '2013-05'
if (numberOfElements == 3) then return checkAndOutput(values[1], values[3], nil, nil, nil, nil) end
if (elements[4] == ELEMENT_DATESEPARATOR) then -- '2013-05-'
if (elements[5] == ELEMENT_ONETWODIGITS) then -- '2013-05-03'
if (numberOfElements == 5) then return checkAndOutput(values[1], values[3], values[5], nil, nil, nil) end
if (elements[6] == ELEMENT_WHITESPACE or elements[6] == ELEMENT_COMMA) then -- '2013-05-03, '
if (elements[7] == ELEMENT_ONETWODIGITS) then -- '2013-05-03, 10'
if (elements[8] == ELEMENT_TIMEPERIOD) then -- '2013-05-03, 10 am'
if (numberOfElements == 8) then return checkAndOutput(values[1], values[3], values[5], values[7] + periodHourAdd(values[8]), nil, nil) end
elseif (elements[8] == ELEMENT_TIMESEPARATOR) then -- '2013-05-03, 10:'
if (elements[9] == ELEMENT_ONETWODIGITS) then -- '2013-05-03, 10:38'
if (numberOfElements == 9) then return checkAndOutput(values[1], values[3], values[5], values[7], values[9], nil) end
if (elements[10] == ELEMENT_TIMEPERIOD) then -- '2013-05-03, 10:38 am'
if (numberOfElements == 10) then return checkAndOutput(values[1], values[3], values[5], values[7] + periodHourAdd(values[10]), values[9], nil) end
elseif (elements[10] == ELEMENT_TIMESEPARATOR) then -- '2013-05-03, 10:38:'
if (elements[11] == ELEMENT_ONETWODIGITS) then -- '2013-05-03, 10:38:27'
if (numberOfElements == 11) then return checkAndOutput(values[1], values[3], values[5], values[7], values[9], values[11]) end
if (elements[12] == ELEMENT_TIMEPERIOD) then -- '2013-05-03, 10:38:27 am'
if (numberOfElements == 12) then return checkAndOutput(values[1], values[3], values[5], values[7] + periodHourAdd(values[12]), values[9], values[11]) end
end
end
end
end
end
end
end
end
end
end
end -- can't elseif, because we have ELEMENT_DATESEPARATOR, which repeats above
if (elements[2] == ELEMENT_DASH or elements[2] == ELEMENT_DATESEPARATOR) then -- '2013 - '
if (elements[3] == ELEMENT_FOURDIGITS) then -- '2013 - 2014'
if (numberOfElements == 3) then return checkAndOutput(values[1], nil, nil, nil, nil, nil, values[3], nil, nil, nil, nil, nil) end
end
elseif (elements[2] == ELEMENT_WHITESPACE) then -- '2013 '
if (elements[3] == ELEMENT_MONTHWORD) then -- '2013 May'
if (numberOfElements == 3) then return checkAndOutput(values[1], values[3], nil, nil, nil, nil) end
-- 2013 May - 2013 April (let's see first if this is ever used real-world)
end
end
elseif (elements[1] == ELEMENT_MONTHWORD) then -- 'May'
if (numberOfElements == 1) then return checkAndOutput(nil, values[1], nil, nil, nil, nil) end
if (elements[2] == ELEMENT_WHITESPACE) then -- 'May '
if (elements[3] == ELEMENT_ONETWODIGITS) then -- 'May 3'
if (numberOfElements == 3) then return checkAndOutput(nil, values[1], values[3], nil, nil, nil) end
if (elements[4] == ELEMENT_COMMA or elements[4] == ELEMENT_WHITESPACE) then -- 'May 3, '
if (elements[5] == ELEMENT_FOURDIGITS) then -- 'May 3, 2013'
if (numberOfElements == 5) then return checkAndOutput(values[5], values[1], values[3], nil, nil, nil) end
if (elements[6] == ELEMENT_WHITESPACE or elements[6] == ELEMENT_COMMA) then -- ''May 3, 2013, '
if (elements[7] == ELEMENT_ONETWODIGITS) then -- ''May 3, 2013, 10'
if (elements[8] == ELEMENT_TIMEPERIOD) then -- ''May 3, 2013, 10 am'
if (numberOfElements == 8) then return checkAndOutput(values[5], values[1], values[3], values[7] + periodHourAdd(values[8]), nil, nil) end
elseif (elements[8] == ELEMENT_TIMESEPARATOR) then -- ''May 3, 2013, 10:'
if (elements[9] == ELEMENT_ONETWODIGITS) then -- ''May 3, 2013, 10:38'
if (numberOfElements == 9) then return checkAndOutput(values[5], values[1], values[3], values[7], values[9], nil) end
if (elements[10] == ELEMENT_TIMEPERIOD) then -- ''May 3, 2013, 10:38 am'
if (numberOfElements == 10) then return checkAndOutput(values[5], values[1], values[3], values[7] + periodHourAdd(values[10]), values[9], nil) end
elseif (elements[10] == ELEMENT_TIMESEPARATOR) then -- ''May 3, 2013, 10:38:'
if (elements[11] == ELEMENT_ONETWODIGITS) then -- ''May 3, 2013, 10:38:27'
if (numberOfElements == 11) then return checkAndOutput(values[5], values[1], values[3], values[7], values[9], values[11]) end
if (elements[12] == ELEMENT_TIMEPERIOD) then -- ''May 3, 2013, 10:38:27 am'
if (numberOfElements == 12) then return checkAndOutput(values[5], values[1], values[3], values[7] + periodHourAdd(values[12]), values[9], values[11]) end
end
end
end
end
end
end
elseif (elements[6] == ELEMENT_DASH or elements[6] == ELEMENT_DATESEPARATOR) then -- 'May 3, 2013 - '
if (elements[7] == ELEMENT_MONTHWORD) then -- 'May 3, 2013 - February'
if (elements[8] == ELEMENT_WHITESPACE) then -- 'May 3, 2013 - February '
if (elements[9] == ELEMENT_ONETWODIGITS) then -- 'May 3, 2013 - February 12'
if (elements[10] == ELEMENT_COMMA or elements[10] == ELEMENT_WHITESPACE) then -- 'May 3, 2013 - February 12, '
if (elements[11] == ELEMENT_FOURDIGITS) then -- 'May 3, 2013 - February 12, 2014'
if (numberOfElements == 11) then return checkAndOutput(values[5], values[1], values[3], nil, nil, nil, values[11], values[7], values[9], nil, nil, nil) end
end
end
end
end
end
end
end
elseif (elements[4] == ELEMENT_DASH or elements[4] == ELEMENT_DATESEPARATOR) then -- 'May 3 - '
if (elements[5] == ELEMENT_MONTHWORD) then -- 'May 3 - June'
if (elements[6] == ELEMENT_WHITESPACE) then -- 'May 3 - June '
if (elements[7] == ELEMENT_ONETWODIGITS) then -- 'May 3 - June 12'
if (numberOfElements == 7) then return checkAndOutput(nil, values[1], values[3], nil, nil, nil, nil, values[5], values[7], nil, nil, nil) end
if (elements[8] == ELEMENT_COMMA or elements[8] == ELEMENT_WHITESPACE) then -- 'May 3 - June 12, '
if (elements[9] == ELEMENT_FOURDIGITS) then -- 'May 3 - June 12, 2014'
if (numberOfElements == 9) then return checkAndOutput(values[9], values[1], values[3], nil, nil, nil, values[9], values[5], values[7], nil, nil, nil) end
end
end
end
end
elseif (elements[5] == ELEMENT_ONETWODIGITS) then -- 'May 3 - 12'
if (numberOfElements == 5) then return checkAndOutput(nil, values[1], values[3], nil, nil, nil, nil, values[1], values[5], nil, nil, nil) end
if (elements[6] == ELEMENT_COMMA or elements[6] == ELEMENT_WHITESPACE) then -- 'May 3 - 12, '
if (elements[7] == ELEMENT_FOURDIGITS) then -- 'May 3 - 12, 2013'
if (numberOfElements == 7) then return checkAndOutput(values[7], values[1], values[3], nil, nil, nil, values[7], values[1], values[5], nil, nil, nil) end
end
end
end
end
elseif (elements[3] == ELEMENT_FOURDIGITS) then -- 'May 2013'
if (numberOfElements == 3) then return checkAndOutput(values[3], values[1], nil, nil, nil, nil) end
if (elements[4] == ELEMENT_DASH or elements[4] == ELEMENT_DATESEPARATOR) then -- 'May 2013 -'
if (elements[5] == ELEMENT_MONTHWORD) then -- 'May 2013 - June'
if (elements[6] == ELEMENT_WHITESPACE) then -- 'May 2013 - June '
if (elements[7] == ELEMENT_FOURDIGITS) then -- 'May 2013 - June 2013'
if (numberOfElements == 7) then return checkAndOutput(values[3], values[1], nil, nil, nil, nil, values[7], values[5], nil, nil, nil, nil) end
end
end
end
end
end
elseif (elements[2] == ELEMENT_DASH or elements[2] == ELEMENT_DATESEPARATOR) then -- 'May - '
if (elements[3] == ELEMENT_MONTHWORD) then -- 'May - June'
if (elements[4] == ELEMENT_WHITESPACE) then -- 'May - June '
if (elements[5] == ELEMENT_FOURDIGITS) then -- 'May - June 2013'
if (numberOfElements == 5) then return checkAndOutput(values[5], values[1], nil, nil, nil, nil, values[5], values[3], nil, nil, nil, nil) end
end
end
end
end
else
return PARSERESULT_UNRECOGNIZED -- the combination of elements was not a recognized one
end
end
-- This function will return a raw string for generic checks and unit test, including defined parse errors
function main.parseDateOutputRaw(frame)
local result, startDate, startTime, endDate, endTime = parseDateString(frame.args[1])
if (result == PARSERESULT_FAIL) then if (frame.args[2] == 'pretty') then return frame:preprocess('\'\'{{gray|Failed parse}}\'\'') else return 'Failed parse' end end
if (result == PARSERESULT_UNRECOGNIZED) then if (frame.args[2] == 'pretty') then return frame:preprocess('\'\'{{gray|Unrecognized pattern}}\'\'') else return 'Unrecognized pattern' end end
if (result == PARSERESULT_INVALID) then if (frame.args[2] == 'pretty') then return frame:preprocess('\'\'{{maroon|Invalid date/time}}\'\'') else return 'Invalid date/time' end end
if (result == PARSERESULT_INVALIDRANGE) then if (frame.args[2] == 'pretty') then return frame:preprocess('\'\'{{maroon|Invalid date range}}\'\'') else return 'Invalid date range' end end
if (result == PARSERESULT_INCOMPLETE) then if (frame.args[2] == 'pretty') then return frame:preprocess('\'\'{{cyan|Incomplete date}}\'\'') else return 'Incomplete date' end end
if (result == PARSERESULT_INCOMPLETERANGE) then if (frame.args[2] == 'pretty') then return frame:preprocess('\'\'{{cyan|Incomplete date range}}\'\'') else return 'Incomplete date range' end end
local s
if (startDate) then s = startDate end
if (startTime) then if (startDate) then s = s .. ' ' .. startTime else s = startTime end end
if (endDate) then s = s .. '; ' .. endDate end -- currently end date implies start date
-- currently no end time
return s
end
function main.emitMetadata(frame)
-- First parse the date and see if we get a valid output date
local result, startDate, startTime, endDate, endTime = parseDateString(frame.args[1])
if (not frame.args.noErrorCats or frame.args.noErrorCats ~= 'yes') then
--if (result == PARSERESULT_FAIL) then return frame:preprocess('<includeonly>[[Category:]]</includeonly>') end
--if (result == PARSERESULT_UNRECOGNIZED) then return frame:preprocess('<includeonly>[[Category:]]</includeonly>') end
--if (result == PARSERESULT_INVALID) then return frame:preprocess('<includeonly>[[Category:]]</includeonly>') end
--if (result == PARSERESULT_INVALIDRANGE) then return frame:preprocess('<includeonly>[[Category:]]</includeonly>') end
--if (result == PARSERESULT_INCOMPLETE) then return frame:preprocess('<includeonly>[[Category:]]</includeonly>') end
if (result == PARSERESULT_INCOMPLETERANGE) then return frame:preprocess('<includeonly>[[Category:Articles with incomplete date ranges for automatic metadata]]</includeonly>') end
end
-- We are only doing the rest for a valid date
if (result ~= PARSERESULT_OKAY) then return nil end
local dtstartSpan, dtendSpan
-- If we have a start value and we're told to output it
if ((startDate or startTime) and frame.args.dtstart and frame.args.dtstart == 'yes') then
if (startDate and startTime) then dtstartSpan = '<span class="dtstart">' .. startDate .. 'T' .. startTime .. '</span>'
elseif (startDate) then dtstartSpan = '<span class="dtstart">' .. startDate .. '</span>'
else dtstartSpan = '<span class="dtstart">' .. startTime .. '</span>' end
end
-- If we have an end value and we're told to output it
if ((endDate or endTime) and frame.args.dtend and frame.args.dtend == 'yes') then -- end values only happen when start values happen
if (endDate and endTime) then dtendSpan = '<span class="dtend">' .. endDate .. 'T' .. endTime .. '</span>'
elseif (endDate) then dtendSpan = '<span class="dtend">' .. endDate .. '</span>'
else dtendSpan = '<span class="dtend">' .. endTime .. '</span>' end
end
local trackingCat = ''
if (frame.args.trackingCat and frame.args.trackingCat == 'yes') then
trackingCat = '[[Category:Articles with automatically detected infobox date metadata]]'
end
if (dtstartSpan and dtendSpan) then return '<span style="display:none"> (' .. dtstartSpan .. dtendSpan .. ')</span>' .. trackingCat
elseif (dtstartSpan) then return '<span style="display:none"> (' .. dtstartSpan .. ')</span>' .. trackingCat
elseif (dtendSpan) then return '<span style="display:none"> (' .. dtendSpan .. ')</span>' .. trackingCat
else return nil end
end
return main