Jump to content

Module:ISBN RangeMessage xlate: Difference between revisions

From Wikipedia, the free encyclopedia
Content deleted Content added
No edit summary
No edit summary
Line 15: Line 15:
local range_pattern = '<Range>(%d+)%-(%d+)</Range>'; -- registrant; captures are min and max ranges right-padded to 7 digits; min not currently used
local range_pattern = '<Range>(%d+)%-(%d+)</Range>'; -- registrant; captures are min and max ranges right-padded to 7 digits; min not currently used
local length_pattern = '<Length>(%d)</Length>'; -- registrant; capture holds number of (left) digits to use from <Range>; for len=2 in max=1299999 -> 12
local length_pattern = '<Length>(%d)</Length>'; -- registrant; capture holds number of (left) digits to use from <Range>; for len=2 in max=1299999 -> 12


--[[--------------------------< R E N D E R _ O U T P U T >----------------------------------------------------

[9798229999999] = 9798200000000, -- United States
[9798399999999] = 9798350000000,
[9798849999999] = 9798400000000,
[9798884999999] = 9798850000000,
[9798899999999] = 9798885000000,
[9798989999999] = 9798985000000,

local hyphen_pos_t = { -- RangeMessage timestamp: 2023-04-19T10:41:25 BST
[9780199999999] = {1, 2, 6}, -- English language
[9780227999999] = {1, 3, 5},
local inverse_range_t = { -- RangeMessage timestamp: 2023-04-19T10:41:25 BST
[9780199999999] = 9780000000000, -- English language
[9780227999999] = 9780200000000,
local inverse_range_t = { -- RangeMessage timestamp: 2023-04-19T10:41:25 BST
[9780199999999] = 9780000000000, -- English language

]]

local function render_output (frame, table_name, out_t, timestamp, timezone, rep)
table.insert (out_t, '\t}</syntaxhighlight>'); -- to close the table
table.insert (out_t, 1, table.concat ({ -- insert this at the start of the output sequence
'<syntaxhighlight lang="lua">local ' .. table_name .. ' = {', -- opening stuff
string.rep ('\t', rep), -- tabs to place the timestamp (15 for hyphen_pos_t, 14 for inverse_range_t)
frame:callParserFunction ('#time', {'-- "RangeMessage timestamp": Y-m-d"T"h:i:s', timestamp}), -- the timestamp
timezone -- and timezone
}));
return frame:preprocess (table.concat (out_t, '\n')); -- make a big string and done
end




Line 36: Line 70:
local function range_message_xlate (frame)
local function range_message_xlate (frame)
local out_t = {}; -- the base output goes here (before prettifying)
local out_t = {}; -- the base output goes here (before prettifying)
local out_range_t = {}
local timestamp, timezone = range_message_raw:match (timestamp_pattern); -- get the RangeMessage timestamp
local timestamp, timezone = range_message_raw:match (timestamp_pattern); -- get the RangeMessage timestamp
Line 45: Line 80:
for rule in group:gmatch (rule_pattern) do -- get a <Rule>...</Rule> block
for rule in group:gmatch (rule_pattern) do -- get a <Rule>...</Rule> block
local isbn_parts_t = {}; -- the prefix, registration_group, and registrant, then filled right with '9's to thirteen digits
local isbn_parts_min_t = {}; -- the prefix, registration_group, and registrant, then filled right with '0's to thirteen digits
local isbn_parts_max_t = {}; -- the prefix, registration_group, and registrant, then filled right with '9's to thirteen digits
local element_lengths_t = {}; -- sequence of lengths for registration group, registrant, and publication elements in an isbn
local element_lengths_t = {}; -- sequence of lengths for registration group, registrant, and publication elements in an isbn


table.insert (isbn_parts_t, prefix); -- add prefix (1) This applies to each <Rule> in the <Group>
table.insert (isbn_parts_max_t, prefix); -- add prefix (1) This applies to each <Rule> in the <Group>
table.insert (isbn_parts_t, registration_group); -- add registration group (2) This applies to each <Rule> in the <Group>
table.insert (isbn_parts_min_t, prefix); -- add prefix (1) This applies to each <Rule> in the <Group>
table.insert (isbn_parts_max_t, registration_group); -- add registration group (2) This applies to each <Rule> in the <Group>
table.insert (isbn_parts_min_t, registration_group); -- add registration group (2) This applies to each <Rule> in the <Group>


table.insert (element_lengths_t, #registration_group); -- add length of isbn registration group (1) This applies to each <Rule> in the <Group>
table.insert (element_lengths_t, #registration_group); -- add length of isbn registration group (1) This applies to each <Rule> in the <Group>
Line 59: Line 98:
table.insert (element_lengths_t, 9-#registration_group-registrant_len); -- calculate and add length available for publication element (3)
table.insert (element_lengths_t, 9-#registration_group-registrant_len); -- calculate and add length available for publication element (3)
table.insert (isbn_parts_t, range_max:sub (1, registrant_len)); -- add <registrant_len> digits from left side of <range_max> (3)
table.insert (isbn_parts_max_t, range_max:sub (1, registrant_len)); -- add <registrant_len> digits from left side of <range_max> (3)
table.insert (isbn_parts_min_t, range_min:sub (1, registrant_len)); -- add <registrant_len> digits from left side of <range_min> (3)


local fill_length = 0; -- calculate how many '9's are needed to right-fill an isbn
local fill_length = 0; -- calculate how many '9's are needed to right-fill an isbn
for _, v in ipairs (isbn_parts_t) do -- for each part get its length
for _, v in ipairs (isbn_parts_max_t) do -- for each part get its length
fill_length = fill_length + #(tostring (v)); -- and accumulate
fill_length = fill_length + #(tostring (v)); -- and accumulate
end
end
fill_length = 13 - fill_length; -- and figure how many are needed
fill_length = 13 - fill_length; -- and figure how many are needed
table.insert (isbn_parts_t, tonumber (string.rep ('9', fill_length))); -- and right fill to 13 digits
table.insert (isbn_parts_max_t, string.rep ('9', fill_length)); -- and right fill to 13 digits
table.insert (isbn_parts_min_t, string.rep ('0', fill_length)); -- and right fill to 13 digits
--error (string.rep ('0', fill_length))
local pretty_string = '\t[' .. table.concat (isbn_parts_t) .. '] = {' .. table.concat (element_lengths_t, ', ') .. '},'; -- prettify
--error (mw.dumpObject (isbn_parts_min_t))
local pretty_string = '\t[' .. table.concat (isbn_parts_max_t) .. '] = {' .. table.concat (element_lengths_t, ', ') .. '},'; -- prettify
local pretty_string_range = '\t[' .. table.concat (isbn_parts_max_t) .. '] = ' .. table.concat (isbn_parts_min_t) .. ','; -- prettify
if not agency_used then
if not agency_used then
pretty_string = pretty_string .. string.rep ('\t', 12) .. '-- ' .. agency; -- add agency to output
pretty_string = pretty_string .. string.rep ('\t', 12) .. '-- ' .. agency; -- add agency to output
pretty_string_range = pretty_string_range .. string.rep ('\t', 11) .. '-- ' .. agency; -- add agency to output
agency_used = true; -- this 'agency' added to output; don't repeat it
agency_used = true; -- this 'agency' added to output; don't repeat it
end
end
table.insert (out_t, pretty_string); -- add the prettified string to the output sequence
table.insert (out_t, pretty_string); -- add the prettified string to the output sequence
table.insert (out_range_t, pretty_string_range);
end
end
end
end
end
end


if 'yes' == frame.args.range then -- select the output
table.insert (out_t, '\t}</syntaxhighlight>'); -- to close the table
return render_output (frame, 'inverse_range_t', out_range_t, timestamp, timezone, 14); -- inverse_range_t table
table.insert (out_t, 1, table.concat ({ -- insert this at the start of the output sequence
else
'<syntaxhighlight lang="lua">local hyphen_pos_t = {', -- opening stuff
return render_output (frame, 'hyphen_pos_t', out_t, timestamp, timezone, 15); -- hyphen_pos_t table
string.rep ('\t', 15), -- tabs to place the timestamp
end
frame:callParserFunction ('#time', {'-- "RangeMessage timestamp": Y-m-d"T"h:i:s', timestamp}), -- the timestamp
timezone -- and timezone
}));
return frame:preprocess (table.concat (out_t, '\n')); -- make a big string and done
end
end



Revision as of 22:47, 21 April 2023

require ('strict');

local title_object = mw.title.getCurrentTitle ();								-- get this module's title object
if not title_object.fullText:find ('/doc$') then								-- are we are looking at the ~/doc page or the module page?
	local module_doc_title = title_object.fullText .. '/doc';					-- looking at the module page so make a page name for this module's doc page
	title_object = mw.title.new (module_doc_title);								-- reset title object to this module's doc page
end
local range_message_raw = title_object:getContent();							-- get the doc page content

local timestamp_pattern = '<MessageDate>%D+(%d%d %w%w%w %d%d%d%d %d%d:%d%d:%d%d)( %w%w%w)</MessageDate>' -- captures are timestamp and time zone (BST or GMT)
local group_pattern = '<Group>(.-)</Group>';									-- capture holds <Prefix>, <Agency>, and <Rules> groups
local prefix_pattern = '<Prefix>(%d+)%-(%d+)</Prefix>';							-- captures are GS1 prefix (978 or 979) and registration group
local agency_pattern = '<Agency>(.-)</Agency>';									-- capture holds language, country, territory
local rule_pattern = '<Rule>(.-)</Rule>';										-- capture holds registrant <Range> and <Length> groups
local range_pattern = '<Range>(%d+)%-(%d+)</Range>';							-- registrant; captures are min and max ranges right-padded to 7 digits; min not currently used
local length_pattern = '<Length>(%d)</Length>';									-- registrant; capture holds number of (left) digits to use from <Range>; for len=2 in max=1299999 -> 12


--[[--------------------------< R E N D E R _ O U T P U T >----------------------------------------------------

	[9798229999999] = 9798200000000,											-- United States
	[9798399999999] = 9798350000000,
	[9798849999999] = 9798400000000,
	[9798884999999] = 9798850000000,
	[9798899999999] = 9798885000000,
	[9798989999999] = 9798985000000,

local hyphen_pos_t = {															-- RangeMessage timestamp: 2023-04-19T10:41:25 BST
	[9780199999999] = {1, 2, 6},												-- English language
	[9780227999999] = {1, 3, 5},
	
local inverse_range_t = {														-- RangeMessage timestamp: 2023-04-19T10:41:25 BST
	[9780199999999] = 9780000000000,												-- English language
	[9780227999999] = 9780200000000,
	
local inverse_range_t = {														-- RangeMessage timestamp: 2023-04-19T10:41:25 BST
	[9780199999999] = 9780000000000,											-- English language

]]

local function render_output (frame, table_name, out_t, timestamp, timezone, rep)
	table.insert (out_t, '\t}</syntaxhighlight>');								-- to close the table
	table.insert (out_t, 1, table.concat ({										-- insert this at the start of the output sequence
		'<syntaxhighlight lang="lua">local ' .. table_name .. ' = {',			-- opening stuff
		string.rep ('\t', rep),													-- tabs to place the timestamp (15 for hyphen_pos_t, 14 for inverse_range_t)
		frame:callParserFunction ('#time', {'-- "RangeMessage timestamp": Y-m-d"T"h:i:s', timestamp}),	-- the timestamp
		timezone																-- and timezone
		}));
	return frame:preprocess (table.concat (out_t, '\n'));						-- make a big string and done	
end


--[[--------------------------< R A N G E _ M E S S A G E _ X L A T E >----------------------------------------

{{#invoke:Sandbox/trappist the monk/ISBN RangeMessage xlate|range_message_xlate}}

This function translates a local copy of https://www.isbn-international.org/export_rangemessage.xml for use by
Module:Format ISBN by way of Module:Format ISBN/data.

To update <hyphen_pos_t> in Module:Format ISBN/data:
	1. copy the new RangeMessage.xml text from the ISBN international export_rangemessage.xml page (url above) to
		your clipboard and paste it over the existing xml data inside the html comments in this module's doc page.
	2. save.  The new translation is presented on this module's doc page.
	3. copy the new translation from the doc page to your clipboard
	4. edit Module:Format ISBN/data and paste the new <hyphen_pos_t> table over the old
	5. save

]]

local function range_message_xlate (frame)
	local out_t = {};															-- the base output goes here (before prettifying)
	local out_range_t = {}
	local timestamp, timezone = range_message_raw:match (timestamp_pattern);	-- get the RangeMessage timestamp
	
	for group in range_message_raw:gmatch (group_pattern) do					-- get a <Group>...</Group> block
		local prefix, registration_group = group:match (prefix_pattern);		-- get the prefix and registration group

		local agency = group:match (agency_pattern);							-- get the 'agency'
		local agency_used = false;												-- flag to for the avoidance of repeats
		
		for rule in group:gmatch (rule_pattern) do								-- get a <Rule>...</Rule> block
			local isbn_parts_min_t = {};										-- the prefix, registration_group, and registrant, then filled right with '0's to thirteen digits
			local isbn_parts_max_t = {};										-- the prefix, registration_group, and registrant, then filled right with '9's to thirteen digits
			local element_lengths_t = {};										-- sequence of lengths for registration group, registrant, and publication elements in an isbn

			table.insert (isbn_parts_max_t, prefix);							-- add prefix (1)  This applies to each <Rule> in the <Group>
			table.insert (isbn_parts_min_t, prefix);							-- add prefix (1)  This applies to each <Rule> in the <Group>
			
			table.insert (isbn_parts_max_t, registration_group);				-- add registration group (2)  This applies to each <Rule> in the <Group>
			table.insert (isbn_parts_min_t, registration_group);				-- add registration group (2)  This applies to each <Rule> in the <Group>

			table.insert (element_lengths_t, #registration_group);				-- add length of isbn registration group (1)  This applies to each <Rule> in the <Group>
			local range_min, range_max = rule:match (range_pattern);			-- get range limits; range_min not currently used
			local registrant_len = rule:match (length_pattern);					-- get the length of the registrant length

			if '0' ~= registrant_len then
				table.insert (element_lengths_t, tonumber(registrant_len));		-- add length of isbn registrant (2)
				table.insert (element_lengths_t, 9-#registration_group-registrant_len);	-- calculate and add length available for publication element (3)
				
				table.insert (isbn_parts_max_t, range_max:sub (1, registrant_len));	-- add <registrant_len> digits from left side of <range_max> (3)
				table.insert (isbn_parts_min_t, range_min:sub (1, registrant_len));	-- add <registrant_len> digits from left side of <range_min> (3)

				local fill_length = 0;											-- calculate how many '9's are needed to right-fill an isbn
				for _, v in ipairs (isbn_parts_max_t) do						-- for each part get its length
					fill_length = fill_length + #(tostring (v));				-- and accumulate
				end
				fill_length = 13 - fill_length;									-- and figure how many are needed
				table.insert (isbn_parts_max_t, string.rep ('9', fill_length));	-- and right fill to 13 digits
				table.insert (isbn_parts_min_t, string.rep ('0', fill_length));	-- and right fill to 13 digits
--error (string.rep ('0', fill_length))				
--error (mw.dumpObject (isbn_parts_min_t))
				local pretty_string = '\t[' .. table.concat (isbn_parts_max_t) .. '] = {' .. table.concat (element_lengths_t, ', ') .. '},';	-- prettify
				local pretty_string_range = '\t[' .. table.concat (isbn_parts_max_t) .. '] = ' .. table.concat (isbn_parts_min_t) .. ',';		-- prettify
				if not agency_used then
					pretty_string = pretty_string .. string.rep ('\t', 12) .. '-- ' .. agency;	-- add agency to output
					pretty_string_range = pretty_string_range .. string.rep ('\t', 11) .. '-- ' .. agency;	-- add agency to output
					agency_used = true;											-- this 'agency' added to output; don't repeat it
				end
				table.insert (out_t, pretty_string);							-- add the prettified string to the output sequence
				table.insert (out_range_t, pretty_string_range);
			end
		end
	end

	if 'yes' == frame.args.range then											-- select the output
		return render_output (frame, 'inverse_range_t', out_range_t, timestamp, timezone, 14);	-- inverse_range_t table
	else
		return render_output (frame, 'hyphen_pos_t', out_t, timestamp, timezone, 15);			-- hyphen_pos_t table
	end
end


--[[--------------------------< E X P O R T S >----------------------------------------------------------------
]]

return {
	range_message_xlate = range_message_xlate,
	}