Jump to content

Module:Unicode chart

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Erutuon (talk | contribs) at 18:17, 16 September 2019 (shorter). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

local mTableTools = require('Module:TableTools')
local mUnicode = require('Module:Unicode data')
local mAliases = require('Module:Unicode data/aliases')
local mBlocks = require('Module:Unicode data/blocks')
local mControl = require('Module:Unicode data/control')
local mScripts = require('Module:Unicode data/scripts')
local mArguments = require('Module:Arguments')
local mEntities = require('Module:Unicode chart/entities')
local p = {} 

local args = {}
local infoMode = false
local infoTable = {}
function debug(...)
	local a = {...}
	if type(a[1]) ~= "string" then mw.log(a[1]) return end
	local _,c = string.gsub(string.gsub(a[1], "%%%%", ""), "%%", "")
	for i = 1,math.max(#a, c+1) do 
		if (type(a[i]) == "nil" or type(a[i]) == "boolean") then a[i] = tostring(a[i]) end
	end
	return mw.log(string.format(unpack(a)))
end
function errorFormat(...) return error(string.format(unpack({...})), 0) end
local errBadBlockName = 'Unrecognized block name "%s" does not match those defined in [[Module:Unicode data/blocks]].'

function getDefaultRange(blockName)
	blockName = string.lower(blockName)
	for i,b in ipairs(mBlocks) do
		if blockName == string.lower(b[3]) then return makeRange(b[1],b[2]) end
	end
end

function makeRange(a,b) return {first=math.min(a,b),last=math.max(a,b)} end
function rangeContains(r, n) return (n >= r.first and n <= r.last) end
function rangeCombine(r1,r2) return {first=math.min(r1.first,r2.first), last=math.max(r1.last,r2.last)} end
function rangesMergeable(r1,r2)
	if not r1 or not r2 then return false end
	return rangeContains(r1, r2.first-1) or rangeContains(r1, r2.last+1) or
		rangeContains(r2, r1.first-1) or rangeContains(r2, r1.last+1)
end
function rangeSort(r1,r2)
	if r1 and not r2 then return true end
	if not r1 then return false end
	if r1.first == r2.first then return r1.last < r2.last end
	return r1.first < r2.first
end
function parseRanges(str)
	local r = {}
	for a,b in mw.ustring.gmatch(str, "[UuXx0+-]*([a-fA-F0-9]+)[-–][UuXx0+-]*([a-fA-F0-9]+)") do
		table.insert(r, makeRange(parseHex(a),parseHex(b)))
	end
	for i = #r,2,-1 do for j = i-1,1,-1 do if rangesMergeable(r[i], r[j]) then
		r[j] = rangeCombine(r[i], r[j]) r[i] = nil
	end end end
	r2 = {}
	for k,v in pairs(r) do table.insert(r2,v) end
	table.sort(r2, rangeSort)
	return r2
end

function getAbbreviation(n) return getAliasData(n, "abbreviation") end
function getControlName(n) return getAliasData(n, "control") or getAliasData(n, "figment") end

function getAliasData(n, key)
	local b = mAliases[n]
	if b == nil then return nil end
	local abbr = nil
	for i,t in ipairs(b) do 
		if t[1] == key then abbr = t[2] end
	end
	return abbr
end

function isControl(n) return mUnicode.lookup_control(n) == "control" end
function isFormat(n) return mUnicode.lookup_control(n) == "format" end

function parseHex(s) if s then return tonumber(s,16) else return nil end end
function isBadTitle(str)
	if str == nil then return true end
	if type(str) == "number" then str = mw.ustring.char(str) end
	if not mUnicode.is_valid_pagename(str) then return true end
	if mw.ustring.match(str, "[\<\>]") then return true end
	if #str == 1 and mw.ustring.match(str, "[\/\.\:\_̸]") then return true end
	return false
end

function getParamNx(key, n, c)
	local key4 = string.format("%s_%04X", key, n)
	if args[key4] then return args[key4] end
	if c then
		local key3 = string.format("%s_%03Xx", key, math.floor(n/16))
		return args[key3] or args[key]
	end
	return nil
end
function getAutoRefs(count)
	local fmt = {
		white = '<ref name="white">White area%s within light green cell%s show%s size of otherwise invisible whitespace character%s.</ref>',
		control = '<ref name="control">Light blue cell%s indicate%s non-printable control character%s.</ref>',
		format = '<ref name="format">Pink cell%s indicate%s non-printable format character%s.</ref>',
		reserved = '<ref name="reserved">Gray cell%s indicate%s unassigned (reserved) code point%s.</ref>',
		nonchar = '<ref name="nonchar">Black cell%s indicate%s noncharacter%s (code point%s that %s guaranteed never to be assigned as %sencoded character%s in the Unicode Standard).</ref>',
		combining = '<ref name="combining"></ref>',
		}
	local refs = ''
	if count.white == 1 then refs = refs..string.format(fmt.white,  '', '','s', '' ) end
	if count.white >= 2 then refs = refs..string.format(fmt.white, 's','s', '',  's') end
	if count.control == 1 then refs = refs..string.format(fmt.control,  '', 's', '' ) end
	if count.control >= 2 then refs = refs..string.format(fmt.control, 's', '',  's') end
	if count.format == 1 then refs = refs..string.format(fmt.format,  '', 's', '' ) end
	if count.format >= 2 then refs = refs..string.format(fmt.format, 's', '',  's') end
	if count.reserved == 1 then refs = refs..string.format(fmt.reserved,  '', 's', '' ) end
	if count.reserved >= 2 then refs = refs..string.format(fmt.reserved, 's', '',  's') end
	if count.nonchar == 1 then refs = refs..string.format(fmt.nonchar, '','s', '', '', 'is','an ', '' ) end
	if count.nonchar >= 2 then refs = refs..string.format(fmt.nonchar,'s','', 's','s','are',   '','s' ) end
	return refs
end

function makeSpan(str, title, repl)
	local c,t = '',''
	if title then t = string.format(' title="%s"', title) end
	if repl then
		local s,x = mw.ustring.gsub(str, '%s+', '\n')
		if x > 0 then c = string.format(' class="small-%s"', x) str = s end
	end
	return string.format('<span %s%s>%s</span>', c, t, str)
end
function makeLink(a, b)
	if not a or (isBadTitle(a) and not infoMode) then return (b or '') end
	if not b then b = a end
	return string.format("[[%s|%s]]",a,b)
end

function getAnchorId(n) return string.format("info-%04X", n) end
function getTarget(n)
	if(infoMode) then return "#"..getAnchorId(n) end
	local t = getParamNx("link", n, true) 
	if(t=="yes") then t = char end
	if(t=="no" or t=="ifexist") then t = nil end
	if(t=="wikt") then t = ":wikt:"..mw.ustring.char(n) end
	return t
end
function getEntity(n)
	for k,v in ipairs(mEntities) do
		if(v[2] == n) then return string.gsub(v[1], "&", "&amp;") end
	end
	return nil
end


function makeInfoRow(info)						
	local html, htmlX = string.format('&amp;#%d;', info.n), string.format('&amp;#x%X;', info.n)
	local e = getEntity(info.n)
	local entity = ''
	if e then entity = string.format('<li>%s</li>', e) end
	local h = string.format('<ul>%s<li>%s</li><li>%s</li></ul>', entity, html, htmlX)
	local uc = {}
	for b in info.char:gmatch('.') do
		table.insert(uc, string.format("0x%02X", b:byte()))
	end
	local utf8 = '<ul><li>'..table.concat(uc, '</li><li>')..'</li></ul>'
	local fmt = '<tr class="info-row" id="%s"><th class="thumb script-%s">%s</th><td colspan="16" class="info"><div class="title">%s %s</div><div class="category">%s</div><div class="utf8">%s</div><div class="html">%s</div></td></tr>'
	return string.format(fmt, getAnchorId(info.n), info.sCode, info.display, info.uPlus, info.name, info.category, utf8, h)
end

function p.main( frame )
	for k, v in pairs(mArguments.getArgs(frame)) do args[k] = v end
	if(args["info"] and args["info"] ~= "no") then infoMode = true end

	local blockName = args["block"] or args["name"] or args[1]
	local defaultRange = getDefaultRange(blockName)
	if defaultRange == nil then errorFormat(errBadBlockName, blockName) end

	local blockNameLink = args["link_block"] or args["link_name"] or blockName.." (Unicode block)"
	local blockNameDisplay = args["display_block"] or args["display_name"] or blockName
	local userRefs = args["refs"] or args["notes"] or args["ref"] or args["note"] or "" 
	local version = args["version"]
	local pdf = args["pdf"] or string.format('https://www.unicode.org/charts/PDF/U%04X.pdf', defaultRange.first)
	local cfFmt = '<td title="%s" class="char%s%s"><div>\n%s\n</div></td>'
	local ranges = parseRanges(args["ranges"] or args["range"] or '')
	if #ranges == 0 then ranges = { defaultRange } end

	local tableBody = {}
	local count = { white=0, control=0, format=0, reserved=0, nonchar=0, combining=0 }
	for i,range in ipairs(ranges) do
		local first, last = range.first, range.last
		local firstR, lastR = (first-first%16), (last-last%16)
		for r = firstR, lastR, 16 do
			local dataRow = {}
			local rowOpen, rowClose = '<tr>', '</tr>'
			local rowHeader = string.format('<th class="row">U+%03Xx</th>', r/16)
	
			for c = 0,15,1 do
				local n = (r+c)
				local uPlus =  string.format("U+%04X", n)
				local char = mw.ustring.char(n)
				local isControlN, isFormatN = isControl(n), isFormat(n)
				local charName = getControlName(n) or mUnicode.lookup_name(n)
				if isControlN then charName = charName or "&lt;control&gt;" end
				
				local box = getParamNx("box", n, true)
				local cBox = ' box'
				if box == "no" then cBox = '' end
				local title = uPlus..' '..charName
				local display = getParamNx("display", n, false) or getAbbreviation(n) or char

				if isControlN or isFormatN then
					display = makeSpan(display, title, true)
				end
				local sCode = mUnicode.lookup_script(n)
				local sDir = ''
				if mUnicode.is_rtl(char) then sDir = ' dir="rtl"' end
				local sClass = ""
				local linkThis = getTarget(n)
				local cell = ''
				local doInfo = true
				if(n < first or n > last) then
					cell = '<td class="excluded"></td>'
					doInfo = false					
				elseif string.match(charName, '<reserved') then
					count.reserved = count.reserved + 1
					cell = string.format('<td title="%s RESERVED" class="reserved"></td>', uPlus)
					doInfo = false					
				elseif string.match(charName, '<noncharacter') then
					count.nonchar = count.nonchar + 1
					cell = string.format('<td title="%s NONCHARACTER" class="nonchar"></td>', uPlus)
					doInfo = false					
				elseif isControlN then
					count.control = count.control + 1
					cell = string.format(cfFmt, title, " control", cBox, makeLink(linkThis, display))
				elseif isFormatN then
					count.format = count.format + 1
					cell = string.format(cfFmt, title, " format", cBox, makeLink(linkThis, display))
				elseif string.match(charName, "VARIATION SELECTOR") then
					if string.match(charName, "MONGOLIAN") then
						display = getAbbreviation(n)
					else 
						display = mw.ustring.gsub(charName, "VARIATION SELECTOR%-", "VS ")
					end
					local cellFmt = '<td title="%s" class="char box vs"><div>\n%s\n</div></td>'
					cell = string.format(cellFmt, title, makeLink(linkThis, makeSpan(display, title, true)))
				elseif mUnicode.is_whitespace(n) then
					count.white = count.white + 1
					local cellFmt = '<td title="%s" class="char whitespace"%s><div>\n%s\n</div></td>'
					cell = string.format(cellFmt, title, sDir, makeLink(linkThis, makeSpan(char, title, false)))
				else
					if sCode then sClass = sClass..string.format(' script-%s', sCode) end
					if box == "yes" then sClass = sClass..' box' end
					isCombining = mUnicode.is_combining(n)
					if isCombining then
						count.combining = count.combining + 1
						sClass = sClass.." combining"
						display = "◌"..char
					end
					display = makeSpan(display, title, true)
					local cellFmt = '<td title="%s" class="char%s"%s><div>\n%s\n</div></td>'
					cell = string.format(cellFmt, title, sClass, sDir, makeLink(linkThis,display))
				end
				if(infoMode and doInfo) then
					local printable, category = mUnicode.is_printable(n)
					local info = {
						n = n,
						char = char,
						name = charName,
						sCode = sCode,
						display = display,
						uPlus = uPlus, 
						printable = printable,
						category = category,
						}
					table.insert(infoTable, makeInfoRow(info))
				end				
				table.insert(dataRow, cell)
			end
			local rowHtml = {rowOpen, rowHeader, table.concat(dataRow), rowClose}
			table.insert(tableBody, table.concat(rowHtml))
		end
	end
	
	local tableOpen, tableClose = '<table class="wikitable nounderlines unicode-chart">', '</table>'

	local autoRefs = getAutoRefs(count)
	local versionRef = ''
	if version then versionRef = string.format('<ref name="version">As of Unicode version %s.</ref>', version) end
	local refs = table.concat({ versionRef, autoRefs, userRefs}) 

	local titleBar = string.format('<div class="title">[[%s|%s]]%s</div>', blockNameLink, blockNameDisplay, refs)
	local fmtpdf = '<div class="pdf-link">[%s Official Unicode Consortium code chart] (PDF)</div>'
	if pdf then titleBar = titleBar..string.format(fmtpdf, pdf) end
	local titleBarRow = '<tr><th class="title-bar" colspan="17">'..titleBar..'</th></tr>'

	local columnHeaders = { '<tr>', '<th class="empty"></th>' }
	for c = 0,15,1 do table.insert(columnHeaders, string.format('<th class="column">%X</th>', c)) end
	table.insert(columnHeaders, '</tr>')

	local infoFooter = ''
	if(infoMode) then infoFooter = table.concat(infoTable) end

	local notesFooter = ''
	if string.len(refs) > 0 then
		notesFooter = '<tr><td class="notes" colspan="17">'.."'''Notes:'''{{reflist}}"..'</td></tr>'
	end

	local tStyles = frame:extensionTag{ name = 'templatestyles', args = { src = 'Unicode chart/styles.css'} }
	local cStyles = frame:extensionTag{ name = 'templatestyles', args = { src = 'Unicode chart/script styles.css'} }
	local html = table.concat({
		tStyles, cStyles, tableOpen, titleBarRow,
		table.concat(columnHeaders), table.concat(tableBody),
		infoFooter, notesFooter, tableClose
		})
	return frame:preprocess(html)
end
		
return p