Jump to content

Module:Dump

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Johnuniq (talk | contribs) at 11:26, 23 May 2016 (need larger default limits for Wikidata). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

-- Dump a table for assistance in the development of other modules.
-- A similar result is available using mw.dumpObject() but the result from
-- this module is close to valid Lua source.
-- The purpose of this module is to allow easy inspection of Wikidata items.
-- For example, [[Southern African Large Telescope]] is Q833639.
-- Preview the following in a sandbox to see that entity as a Lua table:
--   {{#invoke:dump|wikidata|Q833639}}
-- Preview the following to dump a built-in table:
--   {{#invoke:dump|testcase}}

local function collection()
	-- Return a table to hold items.
	return {
		n = 0,
		add = function (self, item)
			self.n = self.n + 1
			self[self.n] = item
		end,
		join = function (self, sep)
			return table.concat(self, sep)
		end,
		sort = function (self, comp)
			table.sort(self, comp)
		end,
	}
end

local function quoted(str)
	return (string.format('%q', str):gsub('\\\n', '\\n'))
end

local function iterkeys(var, control)
	-- Return an iterator over the keys of var (which should be a table).
	-- The keys are sorted with numbered keys first, then other types.
	-- The iterator returns key, repr where key is the actual key, and
	-- repr is its representation: a number for the ipairs keys, or
	-- a string, including for number keys above the table length.
	if type(var) ~= 'table' then
		return function () return nil end
	end
	local nums = {}
	local results = collection()
	for i, _ in ipairs(var) do
		nums[i] = true
		results:add({ i, i })
	end
	local keys = collection()
	for k, _ in pairs(var) do
		if not nums[k] then
			keys:add(k)
		end
	end
	local autoname = control.autoname
	keys:sort(function (a, b)
			local ta, tb = type(a), type(b)
			if ta == tb then
				if ta == 'number' or ta == 'string' then
					return a < b
				end
				if ta == 'boolean' then
					return b and not a
				end
				return autoname(a) < autoname(b)
			end
			if ta == 'number' then
				return true
			elseif tb == 'number' then
				return false
			else
				return ta < tb
			end
		end)
	for _, k in ipairs(keys) do
		local repr
		local tk = type(k)
		if tk == 'number' then
			repr = '[' .. k .. ']'
		elseif tk == 'string' then
			if k:match('^[%a_][%w_]*$') then
				repr = k
			else
				repr = '[' .. quoted(k) .. ']'
			end
		elseif tk == 'boolean' then
			repr = '[' .. tostring(k) .. ']'
		else
			repr = autoname(k)
			control.needed[repr] = true
		end
		results:add({ k, repr })
	end
	local last = 0
	return function ()
		if last < results.n then
			last = last + 1
			return unpack(results[last])
		end
	end
end

local function vardump(var, vname, depth, control, self, parents)
	-- Update items in control with results from dumping a variable.
	local function put(value, options)
		options = options or {}
		local indent = options.indent or depth
		local comma = (options.kind == 'open' or indent == 0) and '' or ','
		control.items:add({
			key = (type(vname) == 'string' and options.kind ~= 'close') and vname or nil,
			value = value .. comma,
			depth = indent,
			note = options.note
		})
	end
	if var == nil then
		put('nil')
	elseif type(var) == 'string' then
		put(quoted(var))
	elseif type(var) == 'table' then
		local this = control.autoname(var)
		if depth >= control.limitdepth then
			put(this)
		elseif parents and parents[this] then
			control.needed[this] = true
			if self == this then
				put(this, {note = 'self'})
				control.needed['self'] = true
			else
				put(this, {note = 'recurse'})
				control.needed['recurse'] = true
			end
		else
			parents = parents or {}
			parents[this] = true
			self = this
			put('{', {kind = 'open', note = this})
			local mt = getmetatable(var)
			if mt then
				vardump(mt, '__metatable', depth + 1, control, self, parents)
			end
			local maxsize = control.items.n + control.limititems
			for key, keyrep in iterkeys(var, control) do
				if control.items.n > maxsize then
					put('...more...')
					break
				end
				vardump(var[key], keyrep, depth + 1, control, self, parents)
			end
			put('}', { kind = 'close' })
		end
	elseif type(var) == 'function' then
		put(control.autoname(var))
	else  -- boolean or number (or userdata or thread)
		put(tostring(var))
	end
end

local function dumper(var, vname, limititems, limitdepth)
	-- Return a string representing var in almost-correct Lua syntax.
	-- There is no newline at the end of the result.
	local onames = {}
	local tcounts = {}
	local function autoname(var)
		-- Return a string that is a unique name for var, given it is not
		-- a number or string.
		if not onames[var] then
			local name = type(var)
			tcounts[name] = (tcounts[name] or 0) + 1
			onames[var] = name .. '_' .. tcounts[name]
		end
		return onames[var]
	end
	local control = {
		autoname = autoname,
		limititems = limititems or 2000,
		limitdepth = limitdepth or 20,
		items = collection(),
		needed = {},
	}
	vardump(var, tostring(vname or 'variable'), 0, control)
	local lines = collection()
	for i, v in ipairs(control.items) do
		-- Using tabs does not work well in a browser edit window.
		local indent = string.rep('    ', v.depth)
		local note = v.note
		if note and control.needed[note] then
			note = '  -- ' .. note
		else
			note = ''
		end
		local k = v.key and (v.key .. ' = ') or ''
		lines:add(indent .. k .. v.value .. note)
	end
	return lines:join('\n')
end

local function dump_testcase()
	local fruit = { 'apple', 'banana', [0] = 'zero', [{'anon'}] = 'anon' }
	local testcase = {
		[100] = 'one hundred',
		[99] = 'ninety nine',
		[0.5] = 'one half',
		[-1] = 'negative one',
		'one',
		'two',
		[' '] = 'space',
		['1 –◆— z'] = 'unicode',
		alpha = 'aaa',
		beta = 'bbb',
		c = 123,
		data = {
			dumper = dumper,
			[dumper] = 'dumper',
			'three',
			'four',
			T = true,
			[true] = 'T',
			alpha2 = 'aaa2',
			beta2 = 'bbb2',
			F = false,
			[false] = 'F',
			c2 = 1234,
			data2 = {
				'five',
				'six',
				alpha3 = 'aaa3',
				beta3 = 'bbb3',
				c3 = 12345,
				fruit = fruit,
				[fruit] = 'fruit',
			},
		},
		z = 'zoo',
	}
	testcase.testcase = testcase
	testcase.data.me = testcase.data
	testcase.data.data2.me = testcase
	testcase.data.data2.fruit.back = testcase.data
	setmetatable(testcase.data, {
		__index = function (self, key) return type(key) == 'string' and #key or nil end,
		__tostring = function (self) return tostring(#self) end,
		__eq = function (lhs, rhs) return #lhs == #rhs end,
	})
	return dumper(testcase, 'testcase')
end

local function wikidata(frame)
	local item = frame.args[1]
	if item then
		local qid = item:match('^%s*(Q%d+)%s*$')
		if qid then
			local entity = mw.wikibase.getEntity(qid)
			local result = dumper(entity, qid)
			return '<pre>\n' .. mw.text.nowiki(result) .. '\n</pre>\n'
		end
	end
	return 'Parameter should be a Wikidata entity identifier such as Q833639'
end

return { wikidata = wikidata, _dump = dumper, testcase = dump_testcase }