Module:Unicode data/category/doc: Difference between revisions

Content deleted Content added

Inline

Revision as of 19:19, 16 September 2019

Unicode General Category data generated by Module:Unicode data/category/make from DerivedGeneralCategory.txt in the Unicode Character Database. The category Cn (Unassigned) is omitted because it is the default for characters not assigned to another category.

@@ Line 1: / Line 1: @@
 <!-- Please place categories where indicated at the bottom of this page and interwikis at Wikidata (see [[Wikipedia:Wikidata]]) -->
-Unicode [[General Category]] data derived from [https://www.unicode.org/Public/UNIDATA/extracted/DerivedGeneralCategory.txt DerivedGeneralCategory.txt] in the Unicode Character Database. The category Cn (Unassigned) is omitted because it is the default for characters not assigned to another category.
+Unicode [[General Category]] data generated by [[Module:Unicode data/category/make]] from [https://www.unicode.org/Public/UNIDATA/extracted/DerivedGeneralCategory.txt DerivedGeneralCategory.txt] in the Unicode Character Database. The category Cn (Unassigned) is omitted because it is the default for characters not assigned to another category.
-The data was generated by the two Lua 5.3 scripts below. [http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html LPeg] is required. If the two scripts are in the same folder as <code>DerivedGeneralCategory.txt</code>, all one has to do is go to the directory with the command line and type <code>lua print_data.lua</code> to print the data to <code>data.lua</code>.
-{{collapse top|title=Lua 5.3 scripts}}
-; parse_data.lua
-<source lang="lua">
-local f = assert(io.open 'DerivedGeneralCategory.txt', 'r')
-local Derived_General_Category = f:read 'a'
-f:close()
-local lpeg = require 'lpeg'
-for k, v in pairs(lpeg) do
-	if type(k) == 'string' then
-		local first_letter = k:sub(1, 1)
-		if first_letter == first_letter:upper() then
-			_ENV[k] = v
-		end
-	end
-end
-local General_Category_data = { singles = {}, ranges = {} }
-local function process_match(str, pos, ...)
-	if select(3, ...) then -- three arguments: XXXX..XXXX ; gc
-		local low, high, category = ...
-		if category ~= 'Cn' then
-			low, high = tonumber(low, 16), tonumber(high, 16)
-			table.insert(General_Category_data.ranges, { low, high, category })
-		end
-	else -- two arguments: XXXX ; gc
-		local codepoint, category = ...
-		if category ~= 'Cn' then
-			codepoint = tonumber(codepoint, 16)
-			General_Category_data.singles[codepoint] = category
-		end
-	end
-	return pos
-end
-local patt = P {
-	(V 'line' + 1)^1,
-	line = Cmt((V 'range' + C(V 'codepoint')) * V 'white' * P ';' * V 'white' * C(V 'gc') * (1 - V 'nl')^0,
-		process_match),
-	range = C(V 'codepoint') * P '..' * C(V 'codepoint'),
-	codepoint = V 'hex' * V 'hex' * V 'hex' * V 'hex' * V 'hex'^-2,
-	gc = R 'AZ' * P(1),
-	hex = R("09", "AF"),
-	white = S ' \t'^0,
-	nl = P '\r'^-1 * P '\n',
-}
-patt:match(Derived_General_Category)
-return General_Category_data
-</source>
-; print_data.lua
-<source lang="lua">
-local data_filename = [[make_data.lua]]
-local data = dofile(data_filename)
-local output_filename = [[data.lua]]
-local output = assert(io.open(output_filename, 'w'))
-local function writef(...)
-	output:write(string.format(...))
-end
-writef [[
-return {
-	singles = {
-]]
--- Check that maximum "singles" codepoint is less than 0x100000?
-for codepoint, category in require 't'.spairs(data.singles) do
-	writef('\t\t[0x%05X] = "%s",\n', codepoint, category)
-end
-writef [[
-	},
-	ranges = {
-]]
-local function compare_ranges(range1, range2)
-	return range1[1] < range2[1]
-end
-table.sort(data.ranges, compare_ranges)
-for _, range in ipairs(data.ranges) do
-	writef('\t\t{ 0x%06X, 0x%06X, "%s" },\n', table.unpack(range))
-end
-writef [[
-	},
-}]]
-</source>
-{{collapse bottom}}
 <includeonly>{{#ifeq:{{SUBPAGENAME}}|sandbox | |