Jump to content

Module:Unicode chart

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Cobaltcigs (talk | contribs) at 03:55, 13 September 2019 (added ability to override link, display text, box status). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

local mTableTools = require('Module:TableTools')
local mUnicode = require('Module:Unicode data')
local mAliases = require('Module:Unicode data/aliases')
local mBlocks = require('Module:Unicode data/blocks')
local mScripts = require('Module:Unicode data/scripts')
local mArguments = require('Module:Arguments')
local p = {} 

local scriptCode = nil
function getScriptCode(n)
	for i,r in ipairs(mScripts.ranges) do
		if n >= r[1] and n <= r[2] then scriptCode = r[3] end
	end
	return scriptCode
end

function errorFormat(...) return error(string.format(unpack({...})), 0) end
local errBadBlockName = 'Unrecognized block name "%s" does not match those defined in [[Module:Unicode data/blocks]].'

function getDefaultRange(blockName)
	blockName = string.lower(blockName)
	for i,b in ipairs(mBlocks) do
		if blockName == string.lower(b[3]) then return {first=b[1], last=b[2]} end
	end
end

function parseRanges(str)
	local r = {}
	for a,b in mw.ustring.gmatch(str, "[UuXx0+-]*([a-fA-F0-9]+)[-–][UuXx0+-]*([a-fA-F0-9]+)") do
		table.insert(r, {first=parseHex(a),last=parseHex(b)})
	end
	return r
end

function getAbbreviation(n) return getAliasData(n, "abbreviation") end
function getControlName(n) return getAliasData(n, "control") or getAliasData(n, "figment") end

function getAliasData(n, key)
	local b = mAliases[n]
	if b == nil then return nil end
	local abbr = nil
	for i,t in ipairs(b) do 
		if t[1] == key then abbr = t[2] end
	end
	return abbr
end

function parseHex(s) if s then return tonumber(s,16) else return nil end end
function isBadTitle(str)
	if str == nil then return true end
	if type(str) == "number" then str = mw.ustring.char(str) end
	if not mUnicode.is_valid_pagename(str) then return true end
	if mw.ustring.match(str, "[\<\>]") then return true end
	if #str == 1 and mw.ustring.match(str, "[\/\.\:\_]") then return true end
	return false
end

function getParamNx(args, key, n, c)
	local key4 = string.format("%s_%04X", key, n)
	if args[key4] then return args[key4] end
	if c then
		local key3 = string.format("%s_%03Xx", key, math.floor(n/16))
		return args[key3] or args[key]
	end
	return nil
end

function p.main( frame )
	local args = {}
	for k, v in pairs(mArguments.getArgs(frame)) do args[k] = v end

	local blockName, version = args["name"] or args[1], args["version"]
	local userRefs = args["refs"] or args["notes"] or args["ref"] or args["note"] or "" 
	local defaultRange = getDefaultRange(blockName)
	if defaultRange == nil then errorFormat(errBadBlockName, blockName) end

	local ranges = parseRanges(args["ranges"] or args["range"] or '')
	if #ranges == 0 then ranges = { defaultRange } end

	local pdf = args["pdf"] or string.format('https://www.unicode.org/charts/PDF/U%04X.pdf', defaultRange.first)
	local tableBody = {}
	local whiteCount, controlCount, reservedCount, noncharCount, combiningCount = 0,0,0,0,0
	for i,range in ipairs(ranges) do
		local first, last = math.min(range.first, range.last), math.max(range.first, range.last)
		local firstR, lastR = (first-first%16), (last-last%16)
		for r = firstR, lastR, 16 do
			local dataRow = {}
			local rowOpen, rowClose = '<tr>', '</tr>'
			local rowHeader = string.format('<th class="row">U+%03Xx</th>', r/16)
	
			for c = 0,15,1 do
				local n = (r+c)
				local charname = mUnicode.lookup_name(n)
				local cell = ''
				box = getParamNx(args, "box", n, true)
				if n < first or n > last then
					cell = '<td class="excluded"></td>'					
				elseif string.match(charname, '<control') then
					controlCount = controlCount + 1
					local str = getParamNx(args, "display", n, false) or getAbbreviation(n) or ''
					charname = getControlName(n) or "&lt;control&gt;"
					local c = ' box'
					if box == "no" then c = '' end
					local fmt = '<td title="U+%04X %s" class="char control%s"><span>%s</span></td>'
					cell = string.format(fmt, n, charname, c, str)
				elseif string.match(charname, '<reserved') then
					reservedCount = reservedCount + 1
					cell = string.format('<td title="U+%04X RESERVED" class="reserved"></td>', n)
				elseif string.match(charname, '<noncharacter') then
					noncharCount = noncharCount + 1
					cell = string.format('<td title="U+%04X NONCHARACTER" class="nonchar"></td>', n)
				else
					local sCode = getScriptCode(n)
					local sClass = ""
					if sCode then sClass = string.format(' script-%s', sCode) end
					if box == "yes" then sClass = sClass..' box' end
					local isWhite = mUnicode.is_whitespace(n)
					if isWhite then
						whiteCount = whiteCount + 1
						sClass = sClass..' whitespace'
					end
					local char = mw.ustring.char(n)
					local str = char
					local badTitle = isBadTitle(str) or isWhite
					local linkThis = getParamNx(args, "link", n, true) 
					if linkThis=="yes" then linkThis = char 
					elseif linkThis=="no" then linkThis = nil 
					elseif linkThis=="wikt" then linkThis = ":wikt:"..char
					elseif linkThis=="ifexist" then
						if mw.title.new(linkThis, 0).exists then linkThis = char 
						else linkThis = nil end
					end

					if linkThis then
						str = string.format("[[%s|%s]]", linkThis, char)
					end
					isCombining = mUnicode.is_combining(n)
					if isCombining then
						combiningCount = combiningCount + 1
						str = "◌"..char
					end
					local sDir = ''
					if mUnicode.is_rtl(str) then sDir = ' dir="rtl"' end
					local cellFmt = '<td title="U+%04X %s" class="char%s"%s><span>%s</span></td>'
					cell = string.format(cellFmt, n, charname, sClass, sDir, str)
				end
				table.insert(dataRow, cell)
			end
			local rowHtml = {rowOpen, rowHeader, table.concat(dataRow), rowClose}
			table.insert(tableBody, table.concat(rowHtml))
		end
	end

	local tableOpen, tableClose = '<table class="wikitable nounderlines unicode-chart">', '</table>'

	local whiteRefFmt = '<ref name="white">White area%s within light green cell%s show%s size of otherwise invisible whitespace character%s.</ref>'
	local controlRefFmt = '<ref name="control">Light blue cell%s indicate%s non-printable control character%s.</ref>'
	local reservedRefFmt = '<ref name="reserved">Gray cell%s indicate%s unassigned (reserved) code point%s.</ref>'
	local noncharRefFmt = '<ref name="nonchar">Black cell%s indicate%s noncharacter%s (code point%s that %s guaranteed never to be assigned as %sencoded character%s in the Unicode Standard).</ref>'
	local whiteRef, controlRef, reservedRef, noncharRef = '', '', '', ''
	if whiteCount == 1 then whiteRef = string.format(whiteRefFmt,  '', '','s', '' ) end
	if whiteCount >= 2 then whiteRef = string.format(whiteRefFmt, 's','s', '',  's') end
	if controlCount == 1 then controlRef = string.format(controlRefFmt,  '', 's', '' ) end
	if controlCount >= 2 then controlRef = string.format(controlRefFmt, 's', '',  's') end
	if reservedCount == 1 then reservedRef = string.format(reservedRefFmt,  '', 's', '' ) end
	if reservedCount >= 2 then reservedRef = string.format(reservedRefFmt, 's', '',  's') end
	if noncharCount == 1 then noncharRef = string.format(noncharRefFmt, '','s', '', '', 'is','an ', '' ) end
	if noncharCount >= 2 then noncharRef = string.format(noncharRefFmt,'s','', 's','s','are',   '','s' ) end

	local versionRef = ''
	if version then versionRef = string.format('<ref name="version">As of Unicode version %s.</ref>', version) end
	local refs = table.concat({ whiteRef, controlRef, reservedRef, noncharRef, versionRef, userRefs}) 

	local titleBar = string.format('<div class="title">[[%s (Unicode block)|%s]]%s</div>', blockName, blockName, refs)
	local fmtpdf = '<div class="pdf-link">[%s Official Unicode Consortium code chart] (PDF)</div>'
	if pdf then titleBar = titleBar..string.format(fmtpdf, pdf) end
	local titleBarRow = '<tr><th class="title-bar" colspan="17">'..titleBar..'</th></tr>'

	local columnHeaders = { '<tr>', '<th class="empty"></th>' }
	for c = 0,15,1 do table.insert(columnHeaders, string.format('<th class="column">%X</th>', c)) end
	table.insert(columnHeaders, '</tr>')
	local notesFooter = ''
	if string.len(refs) > 0 then
		notesFooter = '<tr><td class="notes" colspan="17">'.."'''Notes:'''{{reflist}}"..'</td></tr>'
	end

	local tStyles = frame:extensionTag{ name = 'templatestyles', args = { src = 'Unicode chart/styles.css'} }
	local cStyles = frame:extensionTag{ name = 'templatestyles', args = { src = 'Unicode chart/script styles.css'} }
	local html = table.concat({
		tStyles, cStyles, tableOpen, titleBarRow,
		table.concat(columnHeaders), table.concat(tableBody),
		notesFooter, tableClose
		})
	mw.log(html)
	return frame:preprocess(html)
end
		
return p