Jump to content

Module:Unicode chart

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Cobaltcigs (talk | contribs) at 15:40, 11 September 2019 (count reserved/nonchars to determine note content, parse multiple ranges from a single parameter). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

local libUtil = require('libraryUtil')
local checkType = libUtil.checkType
local mTableTools = require('Module:TableTools')
local mUnicode = require('Module:Unicode data')
local mBlocks = require('Module:Unicode data/blocks')
local mScripts = require('Module:Unicode data/scripts')
local mArguments = require('Module:Arguments')
local p = {} 

local scriptCode = nil
function getScriptCode(n)
	for i,r in ipairs(mScripts.ranges) do
		if n >= r[1] and n <= r[2] then scriptCode = r[3] end
	end
	return scriptCode
end

function getDefaultRange(blockName)
	blockName = string.lower(blockName)
	for i,b in ipairs(mBlocks) do
		if blockName == string.lower(b[3]) then return {first=b[1], last=b[2]} end
	end
end

function parseRanges(str)
	local r = {}
	for a,b in mw.ustring.gmatch(str, "[UuXx0+-]*([a-fA-F0-9]+)[-–][UuXx0+-]*([a-fA-F0-9]+)") do
		table.insert(r, {first=parseHex(a),last=parseHex(b)})
	end
	return r
end

function parseHex(s) if s then return tonumber(s,16) else return nil end end
function scriptTemplate(n, linkmode)
	local code, s = getScriptCode(n), mw.ustring.char(n)
	if code == nil or s == nil then return '' end
	doLink = mUnicode.is_valid_pagename(s) and (linkmode=="yes" or (linkmode=="ifexist" and mw.title.new(s, 0).exists))
	if doLink then s = string.format("[[%s]]", s) end
	return string.format("{{Script|%s|%s}}",code,s)
end

function p.main( frame )
	local args = {}
	for k, v in pairs(mArguments.getArgs(frame)) do args[k] = v end

	local blockName, linkmode, version = args["name"], args["link"], args["version"]
	local userRefs = args["refs"] or args["notes"] or args["ref"] or args["note"] or "" 
	local ranges = parseRanges(args["ranges"] or args["range"] or '')
	local defaultRange = getDefaultRange(blockName)
	if #ranges == 0 then ranges = { defaultRange } end

	local pdf = args["pdf"] or string.format('https://www.unicode.org/charts/PDF/U%04X.pdf', defaultRange.first)

	local tableBody = {}
	local reservedCount, noncharCount = 0,0
	for i,range in ipairs(ranges) do
		local first, last = math.min(range.first, range.last), math.max(range.first, range.last)
		local firstR, lastR = (first-first%16), (last-last%16)
		for r = firstR, lastR, 16 do
			local dataRow = {}
			local rowOpen, rowClose = '<tr>', '</tr>'
			local rowHeader = string.format('<th class="row">U+%03Xx</th>', r/16)
	
			for c = 0,15,1 do
				local n = (r+c)
				local charname = mUnicode.lookup_name(n)
				local cell = ''
				if n < first or n > last then
					cell = '<td class="excluded" />'
				elseif string.match(charname, '<reserved') then
					reservedCount = reservedCount + 1
					cell = string.format('<td title="U+%X RESERVED" class="reserved" />', n);
				elseif string.match(charname, '<noncharacter') then
					noncharCount = noncharCount + 1
					cell = string.format('<td title="U+%X NONCHARACTER" class="nonchar" />', n);
				else
					str = scriptTemplate(n, linkmode)
					cell = string.format('<td title="U+%X %s" class="char">%s</td>', n, charname, str);
				end
				table.insert(dataRow, cell)
			end
			local rowHtml = {rowOpen, rowHeader, table.concat(dataRow), rowClose}
			table.insert(tableBody, table.concat(rowHtml))
		end
	end

	local tableOpen, tableClose = '<table class="wikitable nounderlines unicode-chart">', '</table>'

	local grayRef, blackRef = '', ''
	local grayFmt = '<ref name="gray">Gray area%s indicate%s non-assigned code point%s</ref>'
	local blackFmt = '<ref name="black">Black area%s indicate%s noncharacter%s (code point%s that %s guaranteed never to be assigned as %sencoded character%s in the Unicode Standard)</ref>'
	if reservedCount == 1 then grayRef = string.format(grayFmt,  '', 's', '' ) end
	if reservedCount >= 2 then grayRef = string.format(grayFmt, 's', '',  's') end
	if noncharCount == 1 then blackRef = string.format(blackFmt, '','s', '', '', 'is','an ', '' ) end
	if noncharCount >= 2 then blackRef = string.format(blackFmt,'s','', 's','s','are',   '','s' ) end

	local versionRef = ''
	if version then versionRef = string.format('<ref name="version">As of Unicode version %s</ref>', version) end
	local refs = table.concat({ userRefs, grayRef, blackRef, versionRef}) 

	local titleBar = string.format('<div class="title">[[%s (Unicode block)|%s]]%s</div>', blockName, blockName, refs)
	local fmtpdf = '<div class="pdf-link">[%s Official Unicode Consortium code chart] (PDF)</div>'
	if pdf then titleBar = titleBar..string.format(fmtpdf, pdf) end
	local titleBarRow = '<tr><th class="title-bar" colspan="17">'..titleBar..'</th></tr>'

	local columnHeaders = { '<tr>', '<th class="empty" />' }
	for c = 0,15,1 do table.insert(columnHeaders, string.format('<th class="column">%X</th>', c)) end
	table.insert(columnHeaders, '</tr>');
	local notesFooter = ''
	if string.len(refs) > 0 then
		notesFooter = '<tr><td class="notes" colspan="17">'.."'''Notes:'''{{reflist}}"..'</td></tr>'
	end

	local tStyles = frame:extensionTag{ name = 'templatestyles', args = { src = 'Unicode chart/styles.css'} }

	wt = { tStyles, tableOpen, titleBarRow,
		table.concat(columnHeaders), table.concat(tableBody),
		notesFooter, tableClose
		}
	local html = table.concat(wt)
	return frame:preprocess(html)
end
		
return p