Jump to content

Module:Unicode convert: Difference between revisions

From Wikipedia, the free encyclopedia
Content deleted Content added
Create a module to replace Template:UTF-8 and Template:UTF-16
 
Better match originals in case of no input
Line 2: Line 2:


p.getUTF8 = function (frame)
p.getUTF8 = function (frame)
local ch = mw.ustring.char(tonumber(frame.args[1], 16))
local ch = mw.ustring.char(tonumber(frame.args[1] or '0', 16) or 0)
local bytes = {mw.ustring.byte(ch, 1, -1)}
local bytes = {mw.ustring.byte(ch, 1, -1)}
local format = ({ -- TODO reduce the number of options.
local format = ({ -- TODO reduce the number of options.
Line 19: Line 19:


p.getUTF16 = function (frame)
p.getUTF16 = function (frame)
local codepoint = tonumber(frame.args[1], 16)
local codepoint = tonumber(frame.args[1] or '0', 16) or 0
local format = ({ -- TODO reduce the number of options.
local format = ({ -- TODO reduce the number of options.
['10'] = '%d',
['10'] = '%d',

Revision as of 09:15, 10 April 2021

local p = {}

p.getUTF8 = function (frame)
	local ch = mw.ustring.char(tonumber(frame.args[1] or '0', 16) or 0)
	local bytes = {mw.ustring.byte(ch, 1, -1)}
	local format = ({ -- TODO reduce the number of options.
		['10'] = '%d',
		dec = '%d',
		LChex = '%02x',
		LC16 = '%02x',
		['Lower Case Hex'] = '%02x',
		['Lower Case 16'] = '%02x'
	})[frame.args['base']] or '%02X'
	for i = 1, #bytes do
		bytes[i] = format:format(bytes[i])
	end
	return table.concat(bytes, ' ')
end

p.getUTF16 = function (frame)
	local codepoint = tonumber(frame.args[1] or '0', 16) or 0
	local format = ({ -- TODO reduce the number of options.
		['10'] = '%d',
		dec = '%d',
		LChex = '%04x',
		LC16 = '%04x',
		['Lower Case Hex'] = '%04x',
		['Lower Case 16'] = '%04x'
	})[frame.args['base']] or '%04X'
	if codepoint <= 0xFFFF then -- NB this also returns lone surrogate characters
		return format:format(codepoint)
	elseif codepoint > 0x10FFFF then -- There are no codepoints above this
		return ''
	end
	codepoint = codepoint - 0x10000
	bit32 = require('bit32')
	return (format .. ' ' .. format):format(
		bit32.rshift(codepoint, 10) + 0xD800,
		bit32.band(codepoint, 0x3FF) + 0xDC00)
end

return p