Jump to content

Module:Unicode data/category/make: Difference between revisions

From Wikipedia, the free encyclopedia
Content deleted Content added
fixed
add long names
Line 20: Line 20:
end
end
end
end
return singles, ranges
local long_names = {}
for long_name, code in text:gmatch "# General_Category=([%a_]+).-%f[^\n]#.-; (%u%l)" do
long_names[code] = long_name
end
return singles, ranges, long_names
end
end


Line 51: Line 57:
for _, range in ipairs(data.ranges) do
for _, range in ipairs(data.ranges) do
writef('\t\t{ 0x%06X, 0x%06X, "%s" },\n', unpack(range))
writef('\t\t{ 0x%06X, 0x%06X, "%s" },\n', unpack(range))
end
writef [[
},
long_names = {
]]
for code, long_name in require "Module:TableTools".sortedPairs(data.long_names) do
writef('\t\t%s = "%s",\n', code, long_name)
end
end
Line 62: Line 76:
function p.main(frame)
function p.main(frame)
local data = {}
local data = {}
data.singles, data.ranges = p.make_singles_and_ranges()
data.singles, data.ranges, data.long_names = p.make_singles_and_ranges()
return frame:extensionTag{
return frame:extensionTag{
name = "syntaxhighlight",
name = "syntaxhighlight",

Revision as of 19:08, 16 September 2019

local p = {}

function p.make_singles_and_ranges()
	local text = mw.title.new("User:Erutuon/Unicode/DerivedGeneralCategory.txt"):getContent()
	
	local singles, ranges = {}, {}
	for code_point1, code_point2, general_category in text:gmatch("%f[^\n%z](%x+)%.?%.?(%x*)%s+;%s+(%u%l)") do
		if category ~= "Cn" then
			code_point1 = tonumber(code_point1, 16)
			
			-- XXXX..XXXX ; gc
			if code_point2 ~= "" then
				code_point2 = tonumber(code_point2, 16)
				table.insert(ranges, { code_point1, code_point2, general_category })
			
			-- XXXX ; gc
			else
				singles[code_point1] = general_category
			end
		end
	end
	
	local long_names = {}
	for long_name, code in text:gmatch "# General_Category=([%a_]+).-%f[^\n]#.-; (%u%l)" do
		long_names[code] = long_name
	end
	
	return singles, ranges, long_names
end

function p.print_data(data)
	local output = require "Module:array"()
	local function writef(...)
		output:insert(string.format(...))
	end
	
	writef [[
return {
	singles = {
]]
	
	-- Check that maximum "singles" codepoint is less than 0x100000?
	for codepoint, category in require "Module:TableTools".sortedPairs(data.singles) do
		writef('\t\t[0x%05X] = "%s",\n', codepoint, category)
	end
	
	writef [[
	},
	ranges = {
]]
	
	local function compare_ranges(range1, range2)
		return range1[1] < range2[1]
	end
	
	table.sort(data.ranges, compare_ranges)
	for _, range in ipairs(data.ranges) do
		writef('\t\t{ 0x%06X, 0x%06X, "%s" },\n', unpack(range))
	end
	
	writef [[
	},
	long_names = {
]]
	for code, long_name in require "Module:TableTools".sortedPairs(data.long_names) do
		writef('\t\t%s = "%s",\n', code, long_name)
	end
	
	writef [[
	},
}]]

	return output:concat()
end

function p.main(frame)
	local data = {}
	data.singles, data.ranges, data.long_names = p.make_singles_and_ranges()
	return frame:extensionTag{
		name = "syntaxhighlight",
		content = p.print_data(data),
		args = { lang = "lua" }
	}
end

return p