Module:Unicode data/scripts/make: Difference between revisions
Appearance
Content deleted Content added
m consistency |
m array objects |
||
Line 1: | Line 1: | ||
local p = {} |
local p = {} |
||
local scripts_txt = |
local scripts_txt = 'some Wikipedia page' |
||
local property_value_aliases_txt = |
local property_value_aliases_txt = 'some Wikipedia page' |
||
local Array = require 'Module:Array' |
|||
local function pattern_escape(str) |
local function pattern_escape(str) |
||
Line 31: | Line 33: | ||
end }) |
end }) |
||
local script_ranges = |
local script_ranges = Array() |
||
local prev_codepoint, prev_script_name, prev_script_range |
local prev_codepoint, prev_script_name, prev_script_range |
||
Line 41: | Line 43: | ||
else |
else |
||
script_range = { codepoint1, codepoint2 or codepoint1, script_name_to_code[script_name] } |
script_range = { codepoint1, codepoint2 or codepoint1, script_name_to_code[script_name] } |
||
script_ranges:insert(script_range) |
|||
end |
end |
||
prev_codepoint, prev_script_name, prev_script_range = |
prev_codepoint, prev_script_name, prev_script_range = |
||
Line 54: | Line 56: | ||
if low == high then |
if low == high then |
||
singles[low] = script_code |
singles[low] = script_code |
||
script_ranges:remove(i) |
|||
else |
else |
||
i = i + 1 |
i = i + 1 |
||
Line 60: | Line 62: | ||
end |
end |
||
script_ranges:sort( |
|||
table.sort(script_ranges, |
|||
function (range1, range2) |
function (range1, range2) |
||
return range1[1] < range2[1] |
return range1[1] < range2[1] |
||
Line 81: | Line 83: | ||
]] |
]] |
||
local printed_ranges = |
local printed_ranges = Array() |
||
for _, range in ipairs(script_ranges) do |
for _, range in ipairs(script_ranges) do |
||
local low, high, script_code = table.unpack(range) |
local low, high, script_code = table.unpack(range) |
||
printed_ranges:insert(('\t{ 0x%05X, 0x%05X, "%s" },'):format(low, high, script_code)) |
|||
end |
end |
||
local printed_singles = |
local printed_singles = Array() |
||
for codepoint, script_code in require 'Module: |
for codepoint, script_code in require 'Module:TableTools'.sortedPairs(singles) do |
||
printed_singles:insert(('\t[0x%05X] = "%s",'):format(codepoint, script_code)) |
|||
end |
end |
||
local printed_script_name_to_code = |
local printed_script_name_to_code = Array() |
||
for name, code in require 'Module: |
for name, code in require 'Module:TableTools'.sortedPairs(script_name_to_code) do |
||
printed_script_name_to_code:insert(('%s = "%s",'):format(code, name:gsub('_', ' '))) |
|||
end |
end |
||
local data = template |
local data = template |
||
:gsub('%.%.%.', |
:gsub('%.%.%.', printed_ranges:concat('\n'), 1) |
||
:gsub('%.%.%.', |
:gsub('%.%.%.', printed_singles:concat('\n'), 1) |
||
:gsub('%.%.%.', |
:gsub('%.%.%.', printed_script_name_to_code:concat('\n'), 1) |
||
return data |
return data |
Revision as of 22:05, 11 March 2019
For generating Module:Unicode data/scripts.
Lua error: bad argument #1 to 'title.new' (number or string expected, got nil).
local p = {}
local scripts_txt = 'some Wikipedia page'
local property_value_aliases_txt = 'some Wikipedia page'
local Array = require 'Module:Array'
local function pattern_escape(str)
return (str:gsub('%p', '%%%1'))
end
function p.make_script_name_to_code(page_name)
local content = mw.title.new(page_name):getContent()
local script_aliases = property_value_aliases:match(
pattern_escape '# Script (sc)'
.. '%s+(.-)%s+'
.. pattern_escape '# Script_Extensions (scx)')
local script_name_to_code = {}
for code, name in script_aliases:gmatch 'sc%s+;%s+(%a+)%s+;%s+([%a_]+)' do
script_name_to_code[name] = code
end
return script_name_to_code
end
function p.main(frame)
local script_name_to_code = p.make_script_name_to_code()
setmetatable(script_name_to_code, { __index = function (self, k)
error(('No code for "%s"'):format(k))
end })
local script_ranges = Array()
local prev_codepoint, prev_script_name, prev_script_range
for codepoint1, codepoint2, script_name in script_data:gmatch '%f[^\n%z](%x+)%.?%.?(%x*)%s+;%s*([%w_]+)' do
codepoint1, codepoint2 = tonumber(codepoint1, 16), tonumber(codepoint2, 16)
local script_range
if prev_script_range and script_name == prev_script_name and codepoint1 - prev_codepoint == 1 then
prev_script_range[2] = codepoint2 or codepoint1
else
script_range = { codepoint1, codepoint2 or codepoint1, script_name_to_code[script_name] }
script_ranges:insert(script_range)
end
prev_codepoint, prev_script_name, prev_script_range =
codepoint2 or codepoint1, script_name, script_range or prev_script_range
end
local singles = {}
local i = 1
while script_ranges[i] do
local low, high, script_code = table.unpack(script_ranges[i])
if low == high then
singles[low] = script_code
script_ranges:remove(i)
else
i = i + 1
end
end
script_ranges:sort(
function (range1, range2)
return range1[1] < range2[1]
end)
local template = [[
local data = {
singles = {
...
},
ranges = {
...
},
-- Scripts.txt gives full names; here we consider them aliases to save space.
aliases = {
...
},
}
]]
local printed_ranges = Array()
for _, range in ipairs(script_ranges) do
local low, high, script_code = table.unpack(range)
printed_ranges:insert(('\t{ 0x%05X, 0x%05X, "%s" },'):format(low, high, script_code))
end
local printed_singles = Array()
for codepoint, script_code in require 'Module:TableTools'.sortedPairs(singles) do
printed_singles:insert(('\t[0x%05X] = "%s",'):format(codepoint, script_code))
end
local printed_script_name_to_code = Array()
for name, code in require 'Module:TableTools'.sortedPairs(script_name_to_code) do
printed_script_name_to_code:insert(('%s = "%s",'):format(code, name:gsub('_', ' ')))
end
local data = template
:gsub('%.%.%.', printed_ranges:concat('\n'), 1)
:gsub('%.%.%.', printed_singles:concat('\n'), 1)
:gsub('%.%.%.', printed_script_name_to_code:concat('\n'), 1)
return data
end
return p