Jump to content

Module:Unicode data/derived core properties/make: Difference between revisions

From Wikipedia, the free encyclopedia
Content deleted Content added
fix range merging
m un-camel-case
Line 4: Line 4:
local page = frame.args[1] or "User:Erutuon/Unicode/DerivedCoreProperties.txt"
local page = frame.args[1] or "User:Erutuon/Unicode/DerivedCoreProperties.txt"
local text = assert(mw.title.new(page):getContent())
local text = assert(mw.title.new(page):getContent())
local defaultIgnorable = text
local default_ignorable = text
:match("Derived Property: Default_Ignorable_Code_Point.-(%f[^\n]%x%x%x%x.-)%s*\n# Total code points")
:match("Derived Property: Default_Ignorable_Code_Point.-(%f[^\n]%x%x%x%x.-)%s*\n# Total code points")
local singles, ranges = {}, {}
local singles, ranges = {}, {}
for codePoint1, codePoint2 in defaultIgnorable:gmatch("%f[^\n%z](%x+)%.?%.?(%x*)") do
for code_point1, code_point2 in default_ignorable:gmatch("%f[^\n%z](%x+)%.?%.?(%x*)") do
codePoint1, codePoint2 = tonumber(codePoint1, 16), tonumber(codePoint2, 16)
code_point1, code_point2 = tonumber(code_point1, 16), tonumber(code_point2, 16)
local lastRange = ranges[#ranges]
local last_range = ranges[#ranges]
if lastRange and lastRange[2] == codePoint1 - 1 then
if last_range and last_range[2] == code_point1 - 1 then
lastRange[2] = codePoint2 or codePoint1
last_range[2] = code_point2 or code_point1
elseif singles[codePoint1 - 1] then
elseif singles[code_point1 - 1] then
singles[codePoint1 - 1] = nil
singles[code_point1 - 1] = nil
table.insert(ranges, { codePoint1 - 1, codePoint2 or codePoint1 })
table.insert(ranges, { code_point1 - 1, code_point2 or code_point1 })
else
else
if not codePoint2 then
if not code_point2 then
singles[codePoint1] = true
singles[code_point1] = true
else
else
table.insert(ranges, { codePoint1, codePoint2 })
table.insert(ranges, { code_point1, code_point2 })
end
end
end
end
Line 27: Line 27:
local data = {}
local data = {}


data.defaultIgnorable = {
data.default_ignorable = {
singles = {
singles = {
...
...
Line 41: Line 41:


local Array = require "Module:array"
local Array = require "Module:array"
local printedRanges = Array()
local printed_ranges = Array()
for _, range in ipairs(ranges) do
for _, range in ipairs(ranges) do
local low, high, script_code = unpack(range)
local low, high, script_code = unpack(range)
printedRanges:insert(('\t\t{ 0x%05X, 0x%05X },'):format(low, high))
printed_ranges:insert(('\t\t{ 0x%05X, 0x%05X },'):format(low, high))
end
end
local printedSingles = Array()
local printed_singles = Array()
for codepoint in require 'Module:TableTools'.sortedPairs(singles) do
for codepoint in require 'Module:TableTools'.sortedPairs(singles) do
printedSingles:insert(('\t\t[0x%05X] = true,'):format(codepoint))
printed_singles:insert(('\t\t[0x%05X] = true,'):format(codepoint))
end
end
local data = template
local data = template
:gsub('%.%.%.', printedSingles:concat('\n'), 1)
:gsub('%.%.%.', printed_singles:concat('\n'), 1)
:gsub('%.%.%.', printedRanges:concat('\n'), 1)
:gsub('%.%.%.', printed_ranges:concat('\n'), 1)
return mw.getCurrentFrame():extensionTag{
return mw.getCurrentFrame():extensionTag{

Revision as of 01:51, 18 September 2019

local p = {}

function p.main(frame)
	local page = frame.args[1] or "User:Erutuon/Unicode/DerivedCoreProperties.txt"
	local text = assert(mw.title.new(page):getContent())
	local default_ignorable = text
		:match("Derived Property: Default_Ignorable_Code_Point.-(%f[^\n]%x%x%x%x.-)%s*\n# Total code points")
	local singles, ranges = {}, {}
	for code_point1, code_point2 in default_ignorable:gmatch("%f[^\n%z](%x+)%.?%.?(%x*)") do
		code_point1, code_point2 = tonumber(code_point1, 16), tonumber(code_point2, 16)
		local last_range = ranges[#ranges]
		if last_range and last_range[2] == code_point1 - 1 then
			last_range[2] = code_point2 or code_point1
		elseif singles[code_point1 - 1] then
			singles[code_point1 - 1] = nil
			table.insert(ranges, { code_point1 - 1, code_point2 or code_point1 })
		else
			if not code_point2 then
				singles[code_point1] = true
			else
				table.insert(ranges, { code_point1, code_point2 })
			end
		end
	end
	
		local template = [[
local data = {}

data.default_ignorable = {
	singles = {
...
	},
	
	ranges = {
...
	},
}

return data
]]

	local Array = require "Module:array"
	local printed_ranges = Array()
	for _, range in ipairs(ranges) do
		local low, high, script_code = unpack(range)
		printed_ranges:insert(('\t\t{ 0x%05X, 0x%05X },'):format(low, high))
	end
	
	local printed_singles = Array()
	for codepoint in require 'Module:TableTools'.sortedPairs(singles) do
		printed_singles:insert(('\t\t[0x%05X] = true,'):format(codepoint))
	end
	
	local data = template
		:gsub('%.%.%.', printed_singles:concat('\n'), 1)
		:gsub('%.%.%.', printed_ranges:concat('\n'), 1)
	
	return mw.getCurrentFrame():extensionTag{
		name = "syntaxhighlight",
		content = data,
		args = { lang = "lua" }
	}
end

return p