Jump to content

Module:Unicode data/scripts/make: Difference between revisions

From Wikipedia, the free encyclopedia
Content deleted Content added
m consistency
m array objects
Line 1: Line 1:
local p = {}
local p = {}


local scripts_txt = "some Wikipedia page"
local scripts_txt = 'some Wikipedia page'
local property_value_aliases_txt = "some Wikipedia page"
local property_value_aliases_txt = 'some Wikipedia page'

local Array = require 'Module:Array'


local function pattern_escape(str)
local function pattern_escape(str)
Line 31: Line 33:
end })
end })
local script_ranges = {}
local script_ranges = Array()
local prev_codepoint, prev_script_name, prev_script_range
local prev_codepoint, prev_script_name, prev_script_range
Line 41: Line 43:
else
else
script_range = { codepoint1, codepoint2 or codepoint1, script_name_to_code[script_name] }
script_range = { codepoint1, codepoint2 or codepoint1, script_name_to_code[script_name] }
table.insert(script_ranges, script_range)
script_ranges:insert(script_range)
end
end
prev_codepoint, prev_script_name, prev_script_range =
prev_codepoint, prev_script_name, prev_script_range =
Line 54: Line 56:
if low == high then
if low == high then
singles[low] = script_code
singles[low] = script_code
table.remove(script_ranges, i)
script_ranges:remove(i)
else
else
i = i + 1
i = i + 1
Line 60: Line 62:
end
end
script_ranges:sort(
table.sort(script_ranges,
function (range1, range2)
function (range1, range2)
return range1[1] < range2[1]
return range1[1] < range2[1]
Line 81: Line 83:
]]
]]
local printed_ranges = {}
local printed_ranges = Array()
for _, range in ipairs(script_ranges) do
for _, range in ipairs(script_ranges) do
local low, high, script_code = table.unpack(range)
local low, high, script_code = table.unpack(range)
table.insert(printed_ranges, ('\t{ 0x%05X, 0x%05X, "%s" },'):format(low, high, script_code))
printed_ranges:insert(('\t{ 0x%05X, 0x%05X, "%s" },'):format(low, high, script_code))
end
end
local printed_singles = {}
local printed_singles = Array()
for codepoint, script_code in require 'Module:table'.sortedPairs(singles) do
for codepoint, script_code in require 'Module:TableTools'.sortedPairs(singles) do
table.insert(printed_singles, ('\t[0x%05X] = "%s",'):format(codepoint, script_code))
printed_singles:insert(('\t[0x%05X] = "%s",'):format(codepoint, script_code))
end
end
local printed_script_name_to_code = {}
local printed_script_name_to_code = Array()
for name, code in require 'Module:table'.sortedPairs(script_name_to_code) do
for name, code in require 'Module:TableTools'.sortedPairs(script_name_to_code) do
table.insert(printed_script_name_to_code, ('%s = "%s",'):format(code, name:gsub('_', ' ')))
printed_script_name_to_code:insert(('%s = "%s",'):format(code, name:gsub('_', ' ')))
end
end
local data = template
local data = template
:gsub('%.%.%.', table.concat(printed_ranges, '\n'), 1)
:gsub('%.%.%.', printed_ranges:concat('\n'), 1)
:gsub('%.%.%.', table.concat(printed_singles, '\n'), 1)
:gsub('%.%.%.', printed_singles:concat('\n'), 1)
:gsub('%.%.%.', table.concat(printed_script_name_to_code, '\n'), 1)
:gsub('%.%.%.', printed_script_name_to_code:concat('\n'), 1)
return data
return data

Revision as of 22:05, 11 March 2019

local p = {}

local scripts_txt = 'some Wikipedia page'
local property_value_aliases_txt = 'some Wikipedia page'

local Array = require 'Module:Array'

local function pattern_escape(str)
	return (str:gsub('%p', '%%%1'))
end

function p.make_script_name_to_code(page_name)
	local content = mw.title.new(page_name):getContent()

	local script_aliases = property_value_aliases:match(
		pattern_escape '# Script (sc)'
		.. '%s+(.-)%s+'
		.. pattern_escape '# Script_Extensions (scx)')

	local script_name_to_code = {}
	
	for code, name in script_aliases:gmatch 'sc%s+;%s+(%a+)%s+;%s+([%a_]+)' do
		script_name_to_code[name] = code
	end
	
	return script_name_to_code
end

function p.main(frame)
	local script_name_to_code = p.make_script_name_to_code()
	setmetatable(script_name_to_code, { __index = function (self, k)
		error(('No code for "%s"'):format(k))
	end })
	
	local script_ranges = Array()
	
	local prev_codepoint, prev_script_name, prev_script_range
	for codepoint1, codepoint2, script_name in script_data:gmatch '%f[^\n%z](%x+)%.?%.?(%x*)%s+;%s*([%w_]+)' do
		codepoint1, codepoint2 = tonumber(codepoint1, 16), tonumber(codepoint2, 16)
		local script_range
		if prev_script_range and script_name == prev_script_name and codepoint1 - prev_codepoint == 1 then
			prev_script_range[2] = codepoint2 or codepoint1
		else
			script_range = { codepoint1, codepoint2 or codepoint1, script_name_to_code[script_name] }
			script_ranges:insert(script_range)
		end
		prev_codepoint, prev_script_name, prev_script_range =
			codepoint2 or codepoint1, script_name, script_range or prev_script_range
	end
	
	local singles = {}
	local i = 1
	
	while script_ranges[i] do
		local low, high, script_code = table.unpack(script_ranges[i])
		if low == high then
			singles[low] = script_code
			script_ranges:remove(i)
		else
			i = i + 1
		end
	end
	
	script_ranges:sort(
		function (range1, range2)
			return range1[1] < range2[1]
		end)
	
	local template = [[
local data = {
	singles = {
...
	},
	
	ranges = {
...
	},
	-- Scripts.txt gives full names; here we consider them aliases to save space.
	aliases = {
...
	},
}
]]
	
	local printed_ranges = Array()
	for _, range in ipairs(script_ranges) do
		local low, high, script_code = table.unpack(range)
		printed_ranges:insert(('\t{ 0x%05X, 0x%05X, "%s" },'):format(low, high, script_code))
	end
	
	local printed_singles = Array()
	for codepoint, script_code in require 'Module:TableTools'.sortedPairs(singles) do
		printed_singles:insert(('\t[0x%05X] = "%s",'):format(codepoint, script_code))
	end
	
	local printed_script_name_to_code = Array()
	for name, code in require 'Module:TableTools'.sortedPairs(script_name_to_code) do
		printed_script_name_to_code:insert(('%s = "%s",'):format(code, name:gsub('_', ' ')))
	end
	
	local data = template
		:gsub('%.%.%.', printed_ranges:concat('\n'), 1)
		:gsub('%.%.%.', printed_singles:concat('\n'), 1)
		:gsub('%.%.%.', printed_script_name_to_code:concat('\n'), 1)
	
	return data
end

return p