Jump to content

Module:Unicode chart

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Cobaltcigs (talk | contribs) at 23:23, 12 September 2019 (add whitespace footnote, mv userRefs to last). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

local mTableTools = require('Module:TableTools')
local mUnicode = require('Module:Unicode data')
local mAliases = require('Module:Unicode data/aliases')
local mBlocks = require('Module:Unicode data/blocks')
local mScripts = require('Module:Unicode data/scripts')
local mArguments = require('Module:Arguments')
local p = {} 

local scriptCode = nil
function getScriptCode(n)
	for i,r in ipairs(mScripts.ranges) do
		if n >= r[1] and n <= r[2] then scriptCode = r[3] end
	end
	return scriptCode
end

function getDefaultRange(blockName)
	blockName = string.lower(blockName)
	for i,b in ipairs(mBlocks) do
		if blockName == string.lower(b[3]) then return {first=b[1], last=b[2]} end
	end
end

function parseRanges(str)
	local r = {}
	for a,b in mw.ustring.gmatch(str, "[UuXx0+-]*([a-fA-F0-9]+)[-–][UuXx0+-]*([a-fA-F0-9]+)") do
		table.insert(r, {first=parseHex(a),last=parseHex(b)})
	end
	return r
end

function getAbbreviation(n) return getAliasData(n, "abbreviation") end
function getControlName(n) return getAliasData(n, "control") or getAliasData(n, "figment") end

function getAliasData(n, key)
	local b = mAliases[n]
	if b == nil then return nil end
	local abbr = nil
	for i,t in ipairs(b) do 
		if t[1] == key then abbr = t[2] end
	end
	return abbr
end

function parseHex(s) if s then return tonumber(s,16) else return nil end end
function isBadTitle(str)
	if type(str) == "number" then str = mw.ustring.char(str) end
	if not mUnicode.is_valid_pagename(str) then return true end
	if mw.ustring.match(str, "[\<\>]") then return true end
	if #str == 1 and mw.ustring.match(str, "[\/\.\:\_]") then return true end
	return false
end

function p.main( frame )
	local args = {}
	for k, v in pairs(mArguments.getArgs(frame)) do args[k] = v end

	local blockName, linkmode, version = args["name"], args["link"], args["version"]
	local userRefs = args["refs"] or args["notes"] or args["ref"] or args["note"] or "" 
	local ranges = parseRanges(args["ranges"] or args["range"] or '')
	local defaultRange = getDefaultRange(blockName)
	if #ranges == 0 then ranges = { defaultRange } end

	local pdf = args["pdf"] or string.format('https://www.unicode.org/charts/PDF/U%04X.pdf', defaultRange.first)
	local tableBody = {}
	local whiteCount, controlCount, reservedCount, noncharCount = 0,0,0,0
	for i,range in ipairs(ranges) do
		local first, last = math.min(range.first, range.last), math.max(range.first, range.last)
		local firstR, lastR = (first-first%16), (last-last%16)
		for r = firstR, lastR, 16 do
			local dataRow = {}
			local rowOpen, rowClose = '<tr>', '</tr>'
			local rowHeader = string.format('<th class="row">U+%03Xx</th>', r/16)
	
			for c = 0,15,1 do
				local n = (r+c)
				local charname = mUnicode.lookup_name(n)
				local cell = ''
				if n < first or n > last then
					cell = '<td class="excluded"></td>'					
				elseif string.match(charname, '<control') then
					controlCount = controlCount + 1
					local str = getAbbreviation(n) or ''
					charname = getControlName(n) or "&lt;control&gt;"
					cell = string.format('<td title="U+%04X %s" class="char control"><span>%s</span></td>', n, charname, str);
				elseif string.match(charname, '<reserved') then
					reservedCount = reservedCount + 1
					cell = string.format('<td title="U+%04X RESERVED" class="reserved"></td>', n);
				elseif string.match(charname, '<noncharacter') then
					noncharCount = noncharCount + 1
					cell = string.format('<td title="U+%04X NONCHARACTER" class="nonchar"></td>', n);
				else
					local sCode = getScriptCode(n)
					local sClass = ""
					if sCode then sClass = string.format(' script-%s', sCode) end
					local str = mw.ustring.char(n)
					local isWhite = mUnicode.is_whitespace(n)
					if isWhite then sClass = sClass..' whitespace' end;
					local badTitle = isBadTitle(str) or isWhite
					local doLink = (not badTitle) and (linkmode=="yes" or (linkmode=="ifexist" and mw.title.new(s, 0).exists))
					if doLink then str = "[["..str.."]]" end
					if badTitle then str = '&#'..n..';' end
					if isWhite then str = string.format("<span>%s</span>", str) end
					local sDir = ''
					if mUnicode.is_rtl(str) then sDir = ' dir="rtl"' end
					local cellFmt = '<td title="U+%04X %s" class="char%s"%s>%s</td>'
					cell = string.format(cellFmt, n, charname, sClass, sDir, str);
				end
				table.insert(dataRow, cell)
			end
			local rowHtml = {rowOpen, rowHeader, table.concat(dataRow), rowClose}
			table.insert(tableBody, table.concat(rowHtml))
		end
	end

	local tableOpen, tableClose = '<table class="wikitable nounderlines unicode-chart">', '</table>'

	local whiteRefFmt = '<ref name="white">White area%s within light green cell%s show%s size of otherwise invisible whitespace character%s.</ref>'
	local controlRefFmt = '<ref name="control">Light blue cell%s indicate%s non-printable control character%s.</ref>'
	local reservedRefFmt = '<ref name="reserved">Gray cell%s indicate%s unassigned (reserved) code point%s.</ref>'
	local noncharRefFmt = '<ref name="nonchar">Black cell%s indicate%s noncharacter%s (code point%s that %s guaranteed never to be assigned as %sencoded character%s in the Unicode Standard).</ref>'
	local whiteRef, controlRef, reservedRef, noncharRef = '', '', '', ''
	if whiteCount == 1 then whiteRef = string.format(whiteRefFmt,  '', '','s', '' ) end
	if whiteCount >= 2 then whiteRef = string.format(whiteRefFmt, 's','s', '',  's') end
	if controlCount == 1 then controlRef = string.format(controlRefFmt,  '', 's', '' ) end
	if controlCount >= 2 then controlRef = string.format(controlRefFmt, 's', '',  's') end
	if reservedCount == 1 then reservedRef = string.format(reservedRefFmt,  '', 's', '' ) end
	if reservedCount >= 2 then reservedRef = string.format(reservedRefFmt, 's', '',  's') end
	if noncharCount == 1 then noncharRef = string.format(noncharRefFmt, '','s', '', '', 'is','an ', '' ) end
	if noncharCount >= 2 then noncharRef = string.format(noncharRefFmt,'s','', 's','s','are',   '','s' ) end

	local versionRef = ''
	if version then versionRef = string.format('<ref name="version">As of Unicode version %s.</ref>', version) end
	local refs = table.concat({ whiteRef, controlRef, reservedRef, noncharRef, versionRef, userRefs}) 

	local titleBar = string.format('<div class="title">[[%s (Unicode block)|%s]]%s</div>', blockName, blockName, refs)
	local fmtpdf = '<div class="pdf-link">[%s Official Unicode Consortium code chart] (PDF)</div>'
	if pdf then titleBar = titleBar..string.format(fmtpdf, pdf) end
	local titleBarRow = '<tr><th class="title-bar" colspan="17">'..titleBar..'</th></tr>'

	local columnHeaders = { '<tr>', '<th class="empty"></th>' }
	for c = 0,15,1 do table.insert(columnHeaders, string.format('<th class="column">%X</th>', c)) end
	table.insert(columnHeaders, '</tr>');
	local notesFooter = ''
	if string.len(refs) > 0 then
		notesFooter = '<tr><td class="notes" colspan="17">'.."'''Notes:'''{{reflist}}"..'</td></tr>'
	end

	local tStyles = frame:extensionTag{ name = 'templatestyles', args = { src = 'Unicode chart/styles.css'} }
	local cStyles = frame:extensionTag{ name = 'templatestyles', args = { src = 'Unicode chart/script styles.css'} }
	local html = table.concat({
		tStyles, cStyles, tableOpen, titleBarRow,
		table.concat(columnHeaders), table.concat(tableBody),
		notesFooter, tableClose
		})
	mw.log(html)
	return frame:preprocess(html)
end
		
return p