Jump to content

Module:Unicode data/derived core properties

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Erutuon (talk | contribs) at 09:41, 16 September 2019 (generate "default ignorable" data table). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
(diff) ← Previous revision | Latest revision (diff) | Newer revision → (diff)

local p = {}

function p.show(frame)
	local page = frame.args[1] or "User:Erutuon/Unicode/DerivedCoreProperties.txt"
	local text = assert(mw.title.new(page):getContent())
	local defaultIgnorable = text
		:match("Derived Property: Default_Ignorable_Code_Point.-(%f[^\n]%x%x%x%x.-)%s*\n# Total code points")
	local singles, ranges = {}, {}
	for codePoint1, codePoint2 in defaultIgnorable:gmatch("%f[^\n%z](%x+)%.?%.?(%x*)") do
		codePoint1, codePoint2 = tonumber(codePoint1, 16), tonumber(codePoint2, 16)
		local lastRange = ranges[#ranges]
		if lastRange and lastRange[2] == codePoint1 - 1 then
			lastRange[2] = codePoint2 or codePoint1
		else
			if not codePoint2 then
				singles[codePoint1] = true
			else
				table.insert(ranges, { codePoint1, codePoint2 })
			end
		end
	end
	
		local template = [[
local data = {}

data.defaultIgnorable = {
	singles = {
...
	},
	
	ranges = {
...
	},
}

return data
]]

	local Array = require "Module:array"
	local printedRanges = Array()
	for _, range in ipairs(ranges) do
		local low, high, script_code = unpack(range)
		printedRanges:insert(('\t\t{ 0x%05X, 0x%05X },'):format(low, high))
	end
	
	local printedSingles = Array()
	for codepoint in require 'Module:TableTools'.sortedPairs(singles) do
		printedSingles:insert(('\t\t[0x%05X] = true,'):format(codepoint))
	end
	
	local data = template
		:gsub('%.%.%.', printedSingles:concat('\n'), 1)
		:gsub('%.%.%.', printedRanges:concat('\n'), 1)
	
	return mw.getCurrentFrame():extensionTag{
		name = "syntaxhighlight",
		content = data,
		args = { lang = "lua" }
	}
end

return p