Module:Unicode data/derived core properties: Difference between revisions
Appearance
Content deleted Content added
generate "default ignorable" data table |
replace with output |
||
Line 1: | Line 1: | ||
local p = {} |
|||
function p.show(frame) |
|||
local page = frame.args[1] or "User:Erutuon/Unicode/DerivedCoreProperties.txt" |
|||
local text = assert(mw.title.new(page):getContent()) |
|||
local defaultIgnorable = text |
|||
:match("Derived Property: Default_Ignorable_Code_Point.-(%f[^\n]%x%x%x%x.-)%s*\n# Total code points") |
|||
local singles, ranges = {}, {} |
|||
for codePoint1, codePoint2 in defaultIgnorable:gmatch("%f[^\n%z](%x+)%.?%.?(%x*)") do |
|||
codePoint1, codePoint2 = tonumber(codePoint1, 16), tonumber(codePoint2, 16) |
|||
local lastRange = ranges[#ranges] |
|||
if lastRange and lastRange[2] == codePoint1 - 1 then |
|||
lastRange[2] = codePoint2 or codePoint1 |
|||
else |
|||
if not codePoint2 then |
|||
singles[codePoint1] = true |
|||
else |
|||
table.insert(ranges, { codePoint1, codePoint2 }) |
|||
end |
|||
end |
|||
end |
|||
local template = [[ |
|||
local data = {} |
local data = {} |
||
data.defaultIgnorable = { |
data.defaultIgnorable = { |
||
singles = { |
singles = { |
||
[0x000AD] = true, |
|||
... |
|||
[0x0034F] = true, |
|||
[0x0061C] = true, |
|||
[0x03164] = true, |
|||
[0x0FEFF] = true, |
|||
[0x0FFA0] = true, |
|||
[0xE0000] = true, |
|||
[0xE0001] = true, |
|||
}, |
}, |
||
ranges = { |
ranges = { |
||
{ 0x0115F, 0x01160 }, |
|||
... |
|||
{ 0x017B4, 0x017B5 }, |
|||
{ 0x0180B, 0x0180E }, |
|||
{ 0x0200B, 0x0200F }, |
|||
{ 0x0202A, 0x0202E }, |
|||
{ 0x02060, 0x0206F }, |
|||
{ 0x0FE00, 0x0FE0F }, |
|||
{ 0x0FFF0, 0x0FFF8 }, |
|||
{ 0x1BCA0, 0x1BCA3 }, |
|||
{ 0x1D173, 0x1D17A }, |
|||
{ 0xE0002, 0xE0FFF }, |
|||
}, |
}, |
||
} |
} |
||
return data |
return data |
||
]] |
|||
local Array = require "Module:array" |
|||
local printedRanges = Array() |
|||
for _, range in ipairs(ranges) do |
|||
local low, high, script_code = unpack(range) |
|||
printedRanges:insert(('\t\t{ 0x%05X, 0x%05X },'):format(low, high)) |
|||
end |
|||
local printedSingles = Array() |
|||
for codepoint in require 'Module:TableTools'.sortedPairs(singles) do |
|||
printedSingles:insert(('\t\t[0x%05X] = true,'):format(codepoint)) |
|||
end |
|||
local data = template |
|||
:gsub('%.%.%.', printedSingles:concat('\n'), 1) |
|||
:gsub('%.%.%.', printedRanges:concat('\n'), 1) |
|||
return mw.getCurrentFrame():extensionTag{ |
|||
name = "syntaxhighlight", |
|||
content = data, |
|||
args = { lang = "lua" } |
|||
} |
|||
end |
|||
return p |
Revision as of 09:44, 16 September 2019
Contains data for the Default_Ignorable_Code_Point
property, which is used by the function is_default_ignorable
in Module:Unicode data/sandbox. Generated by Module:Unicode data/derived core properties/make from DerivedCoreProperties.txt in the Unicode Character Database.
local data = {}
data.defaultIgnorable = {
singles = {
[0x000AD] = true,
[0x0034F] = true,
[0x0061C] = true,
[0x03164] = true,
[0x0FEFF] = true,
[0x0FFA0] = true,
[0xE0000] = true,
[0xE0001] = true,
},
ranges = {
{ 0x0115F, 0x01160 },
{ 0x017B4, 0x017B5 },
{ 0x0180B, 0x0180E },
{ 0x0200B, 0x0200F },
{ 0x0202A, 0x0202E },
{ 0x02060, 0x0206F },
{ 0x0FE00, 0x0FE0F },
{ 0x0FFF0, 0x0FFF8 },
{ 0x1BCA0, 0x1BCA3 },
{ 0x1D173, 0x1D17A },
{ 0xE0002, 0xE0FFF },
},
}
return data