Jump to content

Module:Unicode chart

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Cobaltcigs (talk | contribs) at 20:09, 13 September 2019 (mongolian FVS have abbreviations in the aliases file). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

local mTableTools = require('Module:TableTools')
local mUnicode = require('Module:Unicode data')
local mAliases = require('Module:Unicode data/aliases')
local mBlocks = require('Module:Unicode data/blocks')
local mControl = require('Module:Unicode data/control')
local mScripts = require('Module:Unicode data/scripts')
local mArguments = require('Module:Arguments')
local p = {} 

local scriptCode = nil
function getScriptCode(n)
	for i,r in ipairs(mScripts.ranges) do
		if n >= r[1] and n <= r[2] then scriptCode = r[3] end
	end
	return scriptCode
end

function errorFormat(...) return error(string.format(unpack({...})), 0) end
local errBadBlockName = 'Unrecognized block name "%s" does not match those defined in [[Module:Unicode data/blocks]].'

function getDefaultRange(blockName)
	blockName = string.lower(blockName)
	for i,b in ipairs(mBlocks) do
		if blockName == string.lower(b[3]) then return {first=b[1], last=b[2]} end
	end
end

function parseRanges(str)
	local r = {}
	for a,b in mw.ustring.gmatch(str, "[UuXx0+-]*([a-fA-F0-9]+)[-–][UuXx0+-]*([a-fA-F0-9]+)") do
		table.insert(r, {first=parseHex(a),last=parseHex(b)})
	end
	return r
end

function getAbbreviation(n) return getAliasData(n, "abbreviation") end
function getControlName(n) return getAliasData(n, "control") or getAliasData(n, "figment") end

function getAliasData(n, key)
	local b = mAliases[n]
	if b == nil then return nil end
	local abbr = nil
	for i,t in ipairs(b) do 
		if t[1] == key then abbr = t[2] end
	end
	return abbr
end

function isControl(n) return mUnicode.lookup_control(n) == "control" end
function isFormat(n) return mUnicode.lookup_control(n) == "format" end

function parseHex(s) if s then return tonumber(s,16) else return nil end end
function isBadTitle(str)
	if str == nil then return true end
	if type(str) == "number" then str = mw.ustring.char(str) end
	if not mUnicode.is_valid_pagename(str) then return true end
	if mw.ustring.match(str, "[\<\>]") then return true end
	if #str == 1 and mw.ustring.match(str, "[\/\.\:\_̸]") then return true end
	return false
end

function getParamNx(args, key, n, c)
	local key4 = string.format("%s_%04X", key, n)
	if args[key4] then return args[key4] end
	if c then
		local key3 = string.format("%s_%03Xx", key, math.floor(n/16))
		return args[key3] or args[key]
	end
	return nil
end
function getAutoRefs(count)
	local fmt = {
		white = '<ref name="white">White area%s within light green cell%s show%s size of otherwise invisible whitespace character%s.</ref>',
		control = '<ref name="control">Light blue cell%s indicate%s non-printable control character%s.</ref>',
		format = '<ref name="format">Pink cell%s indicate%s non-printable format character%s.</ref>',
		reserved = '<ref name="reserved">Gray cell%s indicate%s unassigned (reserved) code point%s.</ref>',
		nonchar = '<ref name="nonchar">Black cell%s indicate%s noncharacter%s (code point%s that %s guaranteed never to be assigned as %sencoded character%s in the Unicode Standard).</ref>',
		combining = '<ref name="combining"></ref>',
		}
	local refs = ''
	if count.white == 1 then refs = refs..string.format(fmt.white,  '', '','s', '' ) end
	if count.white >= 2 then refs = refs..string.format(fmt.white, 's','s', '',  's') end
	if count.control == 1 then refs = refs..string.format(fmt.control,  '', 's', '' ) end
	if count.control >= 2 then refs = refs..string.format(fmt.control, 's', '',  's') end
	if count.format == 1 then refs = refs..string.format(fmt.format,  '', 's', '' ) end
	if count.format >= 2 then refs = refs..string.format(fmt.format, 's', '',  's') end
	if count.reserved == 1 then refs = refs..string.format(fmt.reserved,  '', 's', '' ) end
	if count.reserved >= 2 then refs = refs..string.format(fmt.reserved, 's', '',  's') end
	if count.nonchar == 1 then refs = refs..string.format(fmt.nonchar, '','s', '', '', 'is','an ', '' ) end
	if count.nonchar >= 2 then refs = refs..string.format(fmt.nonchar,'s','', 's','s','are',   '','s' ) end
	return refs
end

function scaleMe(str)
	local s,x = mw.ustring.gsub(str, '%s+', '\n')
	return string.format('<span class="small-%s">%s</span>', x, s)
end

function p.main( frame )
	local args = {}
	for k, v in pairs(mArguments.getArgs(frame)) do args[k] = v end

	local blockName = args["block"] or args["name"] or args[1]
	local defaultRange = getDefaultRange(blockName)
	if defaultRange == nil then errorFormat(errBadBlockName, blockName) end

	local blockNameLink = args["link_block"] or args["link_name"] or blockName.." (Unicode block)"
	local blockNameDisplay = args["display_block"] or args["display_name"] or blockName
	local userRefs = args["refs"] or args["notes"] or args["ref"] or args["note"] or "" 
	local version = args["version"]
	local pdf = args["pdf"] or string.format('https://www.unicode.org/charts/PDF/U%04X.pdf', defaultRange.first)

	local ranges = parseRanges(args["ranges"] or args["range"] or '')
	if #ranges == 0 then ranges = { defaultRange } end

	local tableBody = {}
	local count = { white=0, control=0, format=0, reserved=0, nonchar=0, combining=0 }
	for i,range in ipairs(ranges) do
		local first, last = math.min(range.first, range.last), math.max(range.first, range.last)
		local firstR, lastR = (first-first%16), (last-last%16)
		for r = firstR, lastR, 16 do
			local dataRow = {}
			local rowOpen, rowClose = '<tr>', '</tr>'
			local rowHeader = string.format('<th class="row">U+%03Xx</th>', r/16)
	
			for c = 0,15,1 do
				local n = (r+c)
				local charName = mUnicode.lookup_name(n)
				local cell = ''
				local display = getParamNx(args, "display", n, false)
				local box = getParamNx(args, "box", n, true)
				local sCode = getScriptCode(n)
				local sDir = ''
				local char = mw.ustring.char(n)
				if mUnicode.is_rtl(char) then sDir = ' dir="rtl"' end
				local sClass = ""
				if n < first or n > last then
					cell = '<td class="excluded"></td>'					
				elseif isControl(n) then
					count.control = count.control + 1
					local str = display or getAbbreviation(n) or ''
					charName = getControlName(n) or "&lt;control&gt;"
					local c = ' box'
					if box == "no" then c = '' end
					local fmt = '<td title="U+%04X %s" class="char control%s"><div>\n%s\n</div></td>'
					cell = string.format(fmt, n, charName, c, scaleMe(str))
				elseif isFormat(n) then
					count.format = count.format + 1
					local str = display or getAbbreviation(n) or mw.ustring.char(n)
					local c = ' box'
					if box == "no" then c = '' end
					local fmt = '<td title="U+%04X %s" class="char format%s"><div>\n%s\n</div></td>'
					cell = string.format(fmt, n, charName, c, scaleMe(str))
				elseif string.match(charName, '<reserved') then
					count.reserved = count.reserved + 1
					cell = string.format('<td title="U+%04X RESERVED" class="reserved"></td>', n)
				elseif string.match(charName, '<noncharacter') then
					count.nonchar = count.nonchar + 1
					cell = string.format('<td title="U+%04X NONCHARACTER" class="nonchar"></td>', n)
				elseif string.match(charName, "VARIATION SELECTOR") then
					if string.match(charName, "MONGOLIAN") then
						display = getAbbreviation(n)
					else 
						display = mw.ustring.gsub(charName, "VARIATION SELECTOR%-", "VS ")
					end
					display = scaleMe(display)
					local cellFmt = '<td title="U+%04X %s" class="char box vs"><div>%s</div></td>'
					cell = string.format(cellFmt, n, charName, display)
				elseif mUnicode.is_whitespace(n) then
					count.white = count.white + 1
					sClass = sClass..' whitespace'
					display = string.format("<span>%s</span>", char)
					local cellFmt = '<td title="U+%04X %s" class="char%s"%s><div>%s</div></td>'
					cell = string.format(cellFmt, n, charName, sClass, sDir, display)
				else
					if sCode then sClass = sClass..string.format(' script-%s', sCode) end
					if box == "yes" then sClass = sClass..' box' end
					display = display or char
					local linkThis = getParamNx(args, "link", n, true) 
					if linkThis=="yes" then linkThis = char 
					elseif linkThis=="no" then linkThis = nil 
					elseif linkThis=="wikt" then linkThis = ":wikt:"..char
					elseif linkThis=="ifexist" then
						if mw.title.new(linkThis, 0).exists then linkThis = char 
						else linkThis = nil end
					end
					isCombining = mUnicode.is_combining(n)
					if isCombining then
						count.combining = count.combining + 1
						sClass = sClass.." combining"
						display = "◌"..char
					end
					if linkThis then
						str = string.format("[[%s|%s]]", linkThis, display)
					end
					display = scaleMe(display)
					local cellFmt = '<td title="U+%04X %s" class="char%s"%s><div>\n%s\n</div></td>'
					cell = string.format(cellFmt, n, charName, sClass, sDir, display)
				end
				table.insert(dataRow, cell)
			end
			local rowHtml = {rowOpen, rowHeader, table.concat(dataRow), rowClose}
			table.insert(tableBody, table.concat(rowHtml))
		end
	end

	local tableOpen, tableClose = '<table class="wikitable nounderlines unicode-chart">', '</table>'

	local autoRefs = getAutoRefs(count)
	local versionRef = ''
	if version then versionRef = string.format('<ref name="version">As of Unicode version %s.</ref>', version) end
	local refs = table.concat({ autoRefs, versionRef, userRefs}) 

	local titleBar = string.format('<div class="title">[[%s|%s]]%s</div>', blockNameLink, blockNameDisplay, refs)
	local fmtpdf = '<div class="pdf-link">[%s Official Unicode Consortium code chart] (PDF)</div>'
	if pdf then titleBar = titleBar..string.format(fmtpdf, pdf) end
	local titleBarRow = '<tr><th class="title-bar" colspan="17">'..titleBar..'</th></tr>'

	local columnHeaders = { '<tr>', '<th class="empty"></th>' }
	for c = 0,15,1 do table.insert(columnHeaders, string.format('<th class="column">%X</th>', c)) end
	table.insert(columnHeaders, '</tr>')
	local notesFooter = ''
	if string.len(refs) > 0 then
		notesFooter = '<tr><td class="notes" colspan="17">'.."'''Notes:'''{{reflist}}"..'</td></tr>'
	end

	local tStyles = frame:extensionTag{ name = 'templatestyles', args = { src = 'Unicode chart/styles.css'} }
	local cStyles = frame:extensionTag{ name = 'templatestyles', args = { src = 'Unicode chart/script styles.css'} }
	local html = table.concat({
		tStyles, cStyles, tableOpen, titleBarRow,
		table.concat(columnHeaders), table.concat(tableBody),
		notesFooter, tableClose
		})
	return frame:preprocess(html)
end
		
return p