Jump to content

Module:Unicode chart

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Cobaltcigs (talk | contribs) at 14:37, 18 September 2019 (source module is now converted to that layout). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

local mArguments = require('Module:Arguments')
local mTableTools = require('Module:TableTools')
local mUnicode = require('Module:Unicode data')
local mAliases = require('Module:Unicode data/aliases')
local mBlocks = require('Module:Unicode data/blocks')
local mCategory = require('Module:Unicode data/category')
local mControl = require('Module:Unicode data/control')
local mScripts = require('Module:Unicode data/scripts')
local mVersion = require('Module:Unicode data/version')
local mEntities = require('Module:Unicode chart/entities')

local p = {} 
local args = {}
local useFontCss = true
local infoMode = false
local infoTable = {}
local err = {}
err.format = function(...) return error(string.format(...), 0) end
err.blockName = 'Unrecognized block name "%s" does not match those defined in [[Module:Unicode data/blocks]].'
err.refGarbage = 'Refs contain non-ref content: "%s"'

function debug(...)
	local a = {...}
	if type(a[1]) ~= "string" then mw.log(a[1]) return end
	local _,c = string.gsub(string.gsub(a[1], "%%%%", ""), "%%", "")
	for i = 1,math.max(#a, c+1) do 
		if (type(a[i]) == "nil" or type(a[i]) == "boolean") then a[i] = tostring(a[i]) end
	end
	return mw.log(string.format(unpack(a)))
end

table.concat2 = function(t1,t2) for i=1,#t2 do t1[#t1+1] = t2[i] end return t1 end
table.last = function(t) if t then return t[#t] else return nil end end

function makeRange(a,b) return {first=math.min(a,b),last=math.max(a,b)} end
function rangeContains(r, n) return (n >= r.first and n <= r.last) end
function rangeCombine(r1,r2) return {first=math.min(r1.first,r2.first), last=math.max(r1.last,r2.last)} end
function rangesMergeable(r1,r2)
	if not r1 or not r2 then return false end
	return rangeContains(r1, r2.first-1) or rangeContains(r1, r2.last+1) or
		rangeContains(r2, r1.first-1) or rangeContains(r2, r1.last+1)
end
function rangeSort(r1,r2)
	if r1 and not r2 then return true end
	if not r1 then return false end
	if r1.first == r2.first then return r1.last < r2.last end
	return r1.first < r2.first
end

function parseHex(s) if s then return tonumber(s,16) else return nil end end
function parseRanges(str)
	local r = {}
	for a,b in mw.ustring.gmatch(str, "[UuXx0+-]*([a-fA-F0-9]+)[-–][UuXx0+-]*([a-fA-F0-9]+)") do
		table.insert(r, makeRange(parseHex(a),parseHex(b)))
	end
	for i = #r,2,-1 do for j = i-1,1,-1 do if rangesMergeable(r[i], r[j]) then
		r[j] = rangeCombine(r[i], r[j]) r[i] = nil
	end end end
	r2 = {}
	for k,v in pairs(r) do table.insert(r2,v) end
	table.sort(r2, rangeSort)
	return r2
end

-- Official way to match property values that are strings (including block names):
-- Ignore case, whitespace, underscore ('_'), hyphens, and any initial prefix string "is".
-- http://www.unicode.org/reports/tr44/#UAX44-LM3
local function propertyValueKey(val)
	val = val:lower():gsub('^is', ''):gsub('[-_%s]+', '')
	return val
end

function getDefaultRange(blockName)
	blockName = propertyValueKey(blockName)
	for i,b in ipairs(mBlocks) do
		if blockName == propertyValueKey(b[3]) then return makeRange(b[1],b[2]) end
	end
end

function getCategory(n)
	local cc = mUnicode.lookup_category(n)
	local cat = mCategory.long_names[cc]
	if cat then return string.gsub(string.lower(cat), "_", " ") else return nil end
end

function getControlAbbrs(n) return getAliasData(n, "abbreviation") end
function getControlAliases(n) return table.concat2(getAliasData(n, "control"), getAliasData(n, "figment")) end

function getAliasData(n, key)
	local b,r = mAliases[n], {}
	if b then for i,t in ipairs(b) do if t[1] == key then table.insert(r, t[2]) end end end
	return r
end

function getCorrection(n)
	for ak,av in pairs(mAliases) do
		if ak == n then
			for bk,bv in ipairs(av) do
				if bv[1] == "correction" then return bv[2] end
			end
		end
	end
	return nil
end

function getAnchorId(n) return string.format("info-%04X", n) end
function getTarget(n)
	if(infoMode) then return "#"..getAnchorId(n) end
	local t = getParamNx("link", n, true) 
	if(t=="yes") then t = char end
	if(t=="no" or t=="ifexist") then t = nil end
	if(t=="wikt") then t = ":wikt:"..mw.ustring.char(n) end
	return t
end

function getEntity(n)
	local e = mEntities[n]
	if e then return string.gsub(e, "&", "&amp;") else return nil end
end

function isControl(n) return mUnicode.lookup_control(n) == "control" end
function isFormat(n) return mUnicode.lookup_control(n) == "format" end

function isBadTitle(str)
	if str == nil then return true end
	if type(str) == "number" then str = mw.ustring.char(str) end
	if not mUnicode.is_valid_pagename(str) then return true end
	if mw.ustring.match(str, "[\<\>]") then return true end
	if #str == 1 and mw.ustring.match(str, "[\/\.\:\_̸]") then return true end
	return false
end

function makeVersionRef()
	if(mVersion == nil or mVersion == '') then return ''
	else return string.format('<ref name="version">As of Unicode version %s.</ref>', mw.text.nowiki(mVersion)) end
end
function makeAutoRefs(count)
	local fmt = {
		white = '<ref name="white">White area%s within light green cell%s show%s size of otherwise invisible whitespace character%s.</ref>',
		control = '<ref name="control">Light blue cell%s indicate%s non-printable control character%s.</ref>',
		format = '<ref name="format">Pink cell%s indicate%s non-printable format character%s.</ref>',
		reserved = '<ref name="reserved">Gray cell%s indicate%s unassigned (reserved) code point%s.</ref>',
		nonchar = '<ref name="nonchar">Black cell%s indicate%s noncharacter%s (code point%s that %s guaranteed never to be assigned as %sencoded character%s in the Unicode Standard).</ref>',
		combining = '<ref name="combining"></ref>',
		}
	local refs = ''
	if count.white == 1 then refs = refs..string.format(fmt.white,  '', '','s', '' ) end
	if count.white >= 2 then refs = refs..string.format(fmt.white, 's','s', '',  's') end
	if count.control == 1 then refs = refs..string.format(fmt.control,  '', 's', '' ) end
	if count.control >= 2 then refs = refs..string.format(fmt.control, 's', '',  's') end
	if count.format == 1 then refs = refs..string.format(fmt.format,  '', 's', '' ) end
	if count.format >= 2 then refs = refs..string.format(fmt.format, 's', '',  's') end
	if count.reserved == 1 then refs = refs..string.format(fmt.reserved,  '', 's', '' ) end
	if count.reserved >= 2 then refs = refs..string.format(fmt.reserved, 's', '',  's') end
	if count.nonchar == 1 then refs = refs..string.format(fmt.nonchar, '','s', '', '', 'is','an ', '' ) end
	if count.nonchar >= 2 then refs = refs..string.format(fmt.nonchar,'s','', 's','s','are',   '','s' ) end
	return refs
end
function sanitizeUserRefs(refTxt)
	mw.log(refTxt)
	local trim1 = mw.text.killMarkers(refTxt)
	local trim2 = mw.ustring.gsub(trim1, '%s', '')
	if string.len(trim2) > 0 then err.format(err.refGarbage, mw.text.nowiki(trim1))
	else return refTxt end
end
function makeSpan(str, title, repl)
	local c,t = '',''
	if title then t = string.format(' title="%s"', title) end
	if repl then
		local s,x = mw.ustring.gsub(str, '%s+', '\n')
		if x > 0 then c = string.format(' class="small-%s"', x) str = s end
	end
	return string.format('<span %s%s>%s</span>', c, t, str)
end
function makeLink(a, b)
	if not a or (isBadTitle(a) and not infoMode) then return (b or '') end
	if not b then b = a end
	return string.format("[[%s|%s]]",a,b)
end

function makeInfoRow(info)						
	local html, htmlX = string.format('&amp;#%d;', info.n), string.format('&amp;#x%X;', info.n)
	local e = getEntity(info.n)
	local entity = ''
	if e then entity = string.format('<li>%s</li>', e) end
	local h = string.format('<ul>%s<li>%s</li><li>%s</li></ul>', entity, html, htmlX)
	local uc = {}
	for b in info.char:gmatch('.') do
		table.insert(uc, string.format("0x%02X", b:byte()))
	end
	local utf8 = '<ul><li>'..table.concat(uc, '</li><li>')..'</li></ul>'
	local cat = info.category
	if(cat == 'control') then info.name = mw.text.nowiki('<control>') end
	local alii = ''
	if #info.aliases > 0 then
		alii = '<div class="alias"><ul><li>'..table.concat(info.aliases, '</li><li>')..'</li></ul></div>'
	end
	local corr, correction = getCorrection(info.n), ''
	if(corr) then correction = string.format('<div class="correction">%s</div>', corr) end
	local class = ''
	if useFontCss then class = class..'script-'..info.sCode end
	local doBox = cat=='control' or cat=='format' or cat=='space separator' or
		mw.ustring.match(info.display, '<span%s+class="small')
	if(doBox) then class = class.." box" end
	local fmt = '<tr class="info-row" id="%s"><th class="thumb %s">%s</th><td colspan="16" class="info"><div class="title">%s %s</div><div class="category">%s</div>%s%s<div class="utf8">%s</div><div class="html">%s</div></td></tr>'
	return string.format(fmt, getAnchorId(info.n), class, info.display, info.uPlus, info.name, info.category, correction, alii, utf8, h)
end

function getParamNx(key, n, c)
	local key4 = string.format("%s_%04X", key, n)
	if args[key4] then return args[key4] end
	if c then
		local key3 = string.format("%s_%03Xx", key, math.floor(n/16))
		return args[key3] or args[key]
	end
	return nil
end

function p.main( frame )
	for k, v in pairs(mArguments.getArgs(frame)) do args[k] = v end
	if(args["info"] and args["info"] ~= "no") then infoMode = true end
	useFontCss = (args["fonts"] or args["font"] or ''):lower() ~= "no"
	local blockName = args["block"] or args["name"] or args[1]
	local defaultRange = getDefaultRange(blockName)
	if defaultRange == nil then err.format(err.blockName, blockName) end
	local state = args["state"] or "expanded"
	local blockNameLink = args["link_block"] or args["link_name"] or blockName.." (Unicode block)"
	local blockNameDisplay = args["display_block"] or args["display_name"] or blockName
	local userRefs = args["refs"] or args["notes"] or args["ref"] or args["note"] or "" 
	local pdf = args["pdf"] or string.format('https://www.unicode.org/charts/PDF/U%04X.pdf', defaultRange.first)
	local cfFmt = '<td title="%s" class="char%s%s"><div>\n%s\n</div></td>'
	local ranges = parseRanges(args["ranges"] or args["range"] or '')
	if #ranges == 0 then ranges = { defaultRange } end

	local tableBody = {}
	local count = { white=0, control=0, format=0, reserved=0, nonchar=0, combining=0 }
	for i,range in ipairs(ranges) do
		local first, last = range.first, range.last
		local firstR, lastR = (first-first%16), (last-last%16)
		for r = firstR, lastR, 16 do
			local dataRow = {}
			local rowOpen, rowClose = '<tr>', '</tr>'
			local rowHeader = string.format('<th class="row">U+%03Xx</th>', r/16)
	
			for c = 0,15,1 do
				local n = (r+c)
				local uPlus =  string.format("U+%04X", n)
				local char = mw.ustring.char(n)
				local isControlN, isFormatN = isControl(n), isFormat(n)
				local aliases = getControlAliases(n)
				local charName = table.last(aliases) or mUnicode.lookup_name(n)
				if isControlN then charName = charName or "&lt;control&gt;" end
				
				local box = getParamNx("box", n, true)
				local cBox = ' box'
				if box == "no" then cBox = '' end
				local title = uPlus..' '..charName
				local display = getParamNx("display", n, false) or table.last(getControlAbbrs(n)) or char

				if isControlN or isFormatN then
					display = makeSpan(display, title, true)
				end
				local sCode = nil
				if useFontCss then sCode = mUnicode.lookup_script(n) end
				local sDir = ''
				if mUnicode.is_rtl(char) then sDir = ' dir="rtl"' end
				local sClass = ""
				local linkThis = getTarget(n)
				local cell = ''
				local doInfo = true
				if(n < first or n > last) then
					cell = '<td class="excluded"></td>'
					doInfo = false					
				elseif string.match(charName, '<reserved') then
					count.reserved = count.reserved + 1
					cell = string.format('<td title="%s RESERVED" class="reserved"></td>', uPlus)
					doInfo = false					
				elseif string.match(charName, '<noncharacter') then
					count.nonchar = count.nonchar + 1
					cell = string.format('<td title="%s NONCHARACTER" class="nonchar"></td>', uPlus)
					doInfo = false					
				elseif isControlN then
					count.control = count.control + 1
					cell = string.format(cfFmt, title, " control", cBox, makeLink(linkThis, display))
				elseif isFormatN then
					count.format = count.format + 1
					cell = string.format(cfFmt, title, " format", cBox, makeLink(linkThis, display))
				elseif string.match(charName, "VARIATION SELECTOR") then
					if string.match(charName, "MONGOLIAN") then
						display = table.last(getControlAbbrs(n))
					else 
						display = mw.ustring.gsub(charName, "VARIATION SELECTOR%-", "VS ")
					end
					local cellFmt = '<td title="%s" class="char box vs"><div>\n%s\n</div></td>'
					display = makeSpan(display, title, true)
					cell = string.format(cellFmt, title, makeLink(linkThis, display))
				elseif mUnicode.is_whitespace(n) then
					count.white = count.white + 1
					local cellFmt = '<td title="%s" class="char whitespace"%s><div>\n%s\n</div></td>'
					display = makeSpan(display, title, false)
					cell = string.format(cellFmt, title, sDir, makeLink(linkThis, makeSpan(char, title, false)))
				else
					if sCode then sClass = sClass..string.format(' script-%s', sCode) end
					if box == "yes" then sClass = sClass..' box' end
					isCombining = mUnicode.is_combining(n)
					if isCombining then
						count.combining = count.combining + 1
						sClass = sClass.." combining"
						display = "◌"..char
					end
					display = makeSpan(display, title, true)
					local cellFmt = '<td title="%s" class="char%s"%s><div>\n%s\n</div></td>'
					cell = string.format(cellFmt, title, sClass, sDir, makeLink(linkThis,display))
				end
				if(infoMode and doInfo) then
					local printable = mUnicode.is_printable(n)
					local category = getCategory(n)
					local info = {
						n = n,
						char = char,
						name = charName,
						sCode = sCode,
						display = display,
						uPlus = uPlus, 
						printable = printable,
						category = category,
						aliases = aliases
						}
					table.insert(infoTable, makeInfoRow(info))
				end				
				table.insert(dataRow, cell)
			end
			local rowHtml = {rowOpen, rowHeader, table.concat(dataRow), rowClose}
			table.insert(tableBody, table.concat(rowHtml))
		end
	end
	local tableOpenFmt = '<table class="wikitable nounderlines unicode-chart collapsible %s">'
	local tableOpen, tableClose = string.format(tableOpenFmt, state), '</table>'

	local versionRef = makeVersionRef()
	local autoRefs = makeAutoRefs(count)
	userRefs = sanitizeUserRefs(userRefs)
	local refs = table.concat({ versionRef, autoRefs, userRefs }) 

	local titleBar = string.format('<div class="title">[[%s|%s]]%s</div>', blockNameLink, blockNameDisplay, refs)
	local fmtpdf = '<div class="pdf-link">[%s Official Unicode Consortium code chart] (PDF)</div>'
	if pdf then titleBar = titleBar..string.format(fmtpdf, pdf) end
	local titleBarRow = '<tr><th class="title-bar" colspan="17">'..titleBar..'</th></tr>'

	local columnHeaders = { '<tr>', '<th class="empty"></th>' }
	for c = 0,15,1 do table.insert(columnHeaders, string.format('<th class="column">%X</th>', c)) end
	table.insert(columnHeaders, '</tr>')

	local infoFooter = ''
	if(infoMode) then infoFooter = table.concat(infoTable) end

	local notesFooter = ''
	if string.len(refs) > 0 then
		notesFooter = '<tr><td class="notes" colspan="17">'.."'''Notes:'''{{reflist}}"..'</td></tr>'
	end

	local tStyles = frame:extensionTag{ name = 'templatestyles', args = { src = 'Unicode chart/styles.css'} }
	local cStyles = ''
	if useFontCss then
		cStyles = frame:extensionTag{ name = 'templatestyles', args = { src = 'Unicode chart/script styles.css'} }
	end
	local html = table.concat({
		tStyles, cStyles, tableOpen, titleBarRow,
		table.concat(columnHeaders), table.concat(tableBody),
		infoFooter, notesFooter, tableClose
		})
	return frame:preprocess(html)
end
		
return p