Jump to content

Module:Unicode data/testcases

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Erutuon (talk | contribs) at 02:14, 30 June 2018. The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
-- Example Unit tests for [[Module:Bananas]]. Click talk page to run tests.
local p = require 'Module:UnitTests'
local Unicode_data = require 'Module:Unicode data'

function p:iterate(examples, func)
	if type(examples) ~= 'table' then
		error('First argument of iterate should be a table, not ' .. type(examples) .. '.')
	end
	if type(func) == 'string' then
		func = self[func]
		for i, example in ipairs(examples) do
			if type(example) == 'table' then
				func(self, unpack(example))
			else
				error('iterate does not know what to do with example number ' .. i .. ', whose type is ' .. type(example) .. '.')
			end
		end
	else
		error('Second argument of iterate should be a string, not ' .. type(func) .. '.')
	end
end

local U = mw.ustring.char
local function show(codepoint)
	if Unicode_data.is_printable(codepoint) then
		local printed_codepoint = U(codepoint)
		if mw.ustring.toNFC(printed_codepoint) ~= printed_codepoint then
			printed_codepoint = ("&#x%X;"):format(codepoint)
		end
		if Unicode_data.is_combining(codepoint) then
			printed_codepoint = "◌" .. printed_codepoint
		end
		return ("U+%04X: %s"):format(codepoint, printed_codepoint)
	else
		return ("U+%04X"):format(codepoint)
	end
end

function p:check_lookup_name(codepoint, name)
	self:equals(show(codepoint), Unicode_data.lookup_name(codepoint), name)
end

function p:test_lookup_name()
	local examples = {
		{   0x0000, "<control-0000>" },
		{   0x007F, "<control-007F>" },
		{   0x00C1, "LATIN CAPITAL LETTER A WITH ACUTE" },
		{   0x0300, "COMBINING GRAVE ACCENT" },
		{   0x0378, "<reserved-0378>" },
		{   0x1B44, "BALINESE ADEG ADEG" },
		{   0x1F71, "GREEK SMALL LETTER ALPHA WITH OXIA" },
		{   0x3555, "CJK UNIFIED IDEOGRAPH-3555" },
		{   0xAC01, "HANGUL SYLLABLE GAG" },
		{   0xD5FF, "HANGUL SYLLABLE HEH" },
		{   0xDC00, "<surrogate-DC00>", },
		{   0xEEEE, "<private-use-EEEE>" },
		{   0xFDD1, "<noncharacter-FDD1>", },
		{   0xFFFD, "REPLACEMENT CHARACTER" },
		{   0xFFFF, "<noncharacter-FFFF>" },
		{  0x1F4A9, "PILE OF POO" },
		{  0xE0000, "<reserved-E0000>" },
		{  0xF0F0F, "<private-use-F0F0F>" },
		{ 0x10FFFF, "<noncharacter-10FFFF>" },
	}
	
	self:iterate(examples, "check_lookup_name")
end

function p:check_is_combining(codepoint, expected)
	self:equals(
		("%s (%s)"):format(show(codepoint), Unicode_data.lookup_name(codepoint)),
		Unicode_data.is_combining(codepoint),
		expected
	)
end

function p:test_is_combining()
	local examples = {
		{ 0x0300, true },
		{ 0x0060, false },
	}
	
	self:iterate(examples, "check_is_combining")
end

function p:check_lookup_script(codepoint, expected)
	self:equals(
		("%s (%s)"):format(show(codepoint), Unicode_data.lookup_name(codepoint)),
		Unicode_data.lookup_script(codepoint),
		expected)
end

function p:test_lookup_script()
	local examples = {
		{ 0x0061, "Latn" },
		{ 0x002F, "Zyyy" },
		{ 0x0300, "Zinh" },
		{ 0x0378, "Zzzz" },
		{ 0x0398, "Grek" },
		{ 0x03E2, "Copt" },
		{ 0x2014, "Zyyy" },
	}
	
	self:iterate(examples, "check_lookup_script")
end

local fun = require "Module:Fun"
local m_table = require "Module:Table"
local script_to_count_mt = {
	__index = function (self, key)
		self[key] = 0
		return 0
	end,
}
local function show_scripts(str)
	local script_to_count = setmetatable({}, script_to_count_mt)
	for codepoint in mw.ustring.gcodepoint(str) do
		local script = Unicode_data.lookup_script(codepoint)
		script_to_count[script] = script_to_count[script] + 1
	end
	return table.concat(
		fun.mapIter(
			function (count, script)
				return ("%s (%d)"):format(script, count)
			end,
			m_table.sortedPairs(
				script_to_count,
				function (script1, script2)
					return script_to_count[script1] > script_to_count[script2]
				end)),
		", ")
end

function p:check_get_best_script(str, expected)
	self:equals(
		str:gsub('\n', '<br>') .. ": " .. show_scripts(str),
		Unicode_data.get_best_script(str),
		expected)
end

function p:test_get_best_script()
	local examples = {
		-- Two examples from [[Template talk:Lang#Italicisation of Halkomelem]]
		{ "lá:yelhp", "Latn" },
		{ "xʷməθkʷəy̓əm", nil }, -- one Greek (Grek) character
		{ "hən̓q̓əmin̓əm̓", "Latn" },
		
		{ "L'Armadio della vergogna. Nutrimenti, Roma", "Latn" }, -- [[Armadio della vergogna]]
		
		{	-- The opening of the Iliad ([[s:el:Ιλιάς/Α]]), with macrons and
			-- breves added to mark the length of the monophthongs α, ι, υ: 
[[Μῆνῐν ᾰ̓́ειδε, θεᾱ́, Πηληῐ̈ᾰ́δεω Ᾰ̓χῐλῆος
οὐλομένην, ἣ μῡρῐ́᾽ Ᾰ̓χαιοῖς ᾰ̓́λγε᾽ ἔθηκε,
πολλᾱ̀ς δ᾽ ῐ̓φθῑ́μους ψῡχᾱ̀ς Ἄῐ̈δῐ προῐ̈́ᾰψεν
ἡρώων, αὐτοὺς δὲ ἑλώρῐᾰ τεῦχε κῠ́νεσσιν
οἰωνοῖσῐ́ τε πᾶσῐ· Δῐὸς δ᾽ ἐτελείετο βουλή·]],
			"Grek"
		},
		{ -- The Brothers Karamazov: [[w:ru:Братья Карамазовы (Достоевский)/Книга первая]]
"Вот если вы не согласитесь с этим последним тезисом и ответите: «Не так» или "
.. "«не всегда так», то я, пожалуй, и ободрюсь духом насчет значения героя моего "
.. "Алексея Федоровича. Ибо не только чудак «не всегда» частность и обособление, "
.. "а напротив, бывает так, что он-то, пожалуй, и носит в себе иной раз "
.. "сердцевину целого, а остальные люди его эпохи — все, каким-нибудь наплывным "
.. "ветром, на время почему-то от него оторвались…",
			"Cyrl"
		},
		{ -- Rig Veda: [[https://sa.wikisource.org/wiki/ऋग्वेदः_सूक्तं_१.१]]
[[ॐ अग्निमीळे पुरोहितं यज्ञस्य देवमृत्विजम् ।
होतारं रत्नधातमम् ॥१॥
अग्निः पूर्वेभिरृषिभिरीड्यो नूतनैरुत ।
स देवाँ एह वक्षति ॥२॥
अग्निना रयिमश्नवत् पोषमेव दिवेदिवे ।
यशसं वीरवत्तमम् ॥३॥
अग्ने यं यज्ञमध्वरं विश्वतः परिभूरसि ।
स इद्देवेषु गच्छति ॥४॥
अग्निर्होता कविक्रतुः सत्यश्चित्रश्रवस्तमः ।
देवो देवेभिरा गमत् ॥५॥
यदङ्ग दाशुषे त्वमग्ने भद्रं करिष्यसि ।
तवेत् तत् सत्यमङ्गिरः ॥६॥
उप त्वाग्ने दिवेदिवे दोषावस्तर्धिया वयम् ।
नमो भरन्त एमसि ॥७॥
राजन्तमध्वराणां गोपामृतस्य दीदिविम् ।
वर्धमानं स्वे दमे ॥८॥
स नः पितेव सूनवेऽग्ने सूपायनो भव ।
सचस्वा नः स्वस्तये ॥९॥]],
			"Deva"
		},
		{   -- A blessing in Navajo:
			--[[User talk:Stephen G. Brown/text8]]
"Díí Késhmish biyiʼ yáʼąąshdę́ę́ʼ ląʼígóó bee nikʼihojidlíi dooleeł. "
.. "Niheechʼínáánáháhígíí biyiʼ iłhodeezyéél, iłhózhǫ́, ayóóʼóʼóʼní bee "
.. "nikʼihojidlíi dooleeł. Tʼáá sahdiigiʼ átʼéego baa hózhǫ́ǫgo nihił "
.. "hanááhoolzhiizhígí biyiʼ tʼáá ałtsojįʼ iłhózhǫ́ nííʼ dooleeł.",
			"Latn"
		},
	}
	
	self:iterate(examples, "check_get_best_script")
end

for k, v in require "Module:table".sortedPairs(p) do
	if type(k) == "string" then
		p[k] = nil
		p[k:gsub("^test_(.+)$", "test <code>%1</code>")] = v
	end
	if k > "test" then
		break
	end
end

return p