Jump to content

Module:Excerpt: Difference between revisions

From Wikipedia, the free encyclopedia
Content deleted Content added
If the random page is unavailable, bring on a substitute
Also remove Template:Rp
Line 43: Line 43:
text = mw.ustring.gsub(text, "<%s*ref[^>]-/%s*>", "") -- remove refs cited elsewhere
text = mw.ustring.gsub(text, "<%s*ref[^>]-/%s*>", "") -- remove refs cited elsewhere
text = mw.ustring.gsub(text, "<%s*ref.->.-<%s*/%s*ref%s*>", "") -- remove refs
text = mw.ustring.gsub(text, "<%s*ref.->.-<%s*/%s*ref%s*>", "") -- remove refs
for _, t in pairs {"[Ee]fn", "[Ee]fn-la", "[Ee]l[mn]", "[Rr]", "[Ss]fn[bp]", "[Ss]f[bn]", "NoteTag", "#[Tt]ag:%s*[Rr]ef"} do
for _, t in pairs {"[Ee]fn", "[Ee]fn-la", "[Ee]l[mn]", "[Rr]p?", "[Ss]fn[bp]", "[Ss]f[bn]", "NoteTag", "#[Tt]ag:%s*[Rr]ef"} do
text = mw.ustring.gsub(text, "{{%s*" .. t .. "%s*|.-}}", "") -- remove ref and footnote templates
text = mw.ustring.gsub(text, "{{%s*" .. t .. "%s*|.-}}", "") -- remove ref and footnote templates
end
end

Revision as of 18:48, 6 May 2018

local p = {}
local mRedirect = require('Module:Redirect')
local errors

-- Entry point for Lua callers
-- Returns a string value: text of the lead of a page
function p._lead(pagenames, options)
	errors = options.errors

	if not pagenames or #pagenames < 1 then return p.err("No page names given") end

	local pagename
	local text
	local pagecount = #pagenames
	local firstpage = pagenames[1] or "(nil)" -- save for error message, as it the name will be deleted

	-- read the page, or a random one if multiple pages were provided
	if pagecount > 1 then math.randomseed(os.time()) end
	while not text and pagecount > 0 do
		local pagenum = 1
		if pagecount > 1 then pagenum = math.random(pagecount) end -- pick a random title
		pagename = pagenames[pagenum]
		if pagename and pagename ~= "" then
			pagename = mw.ustring.match(pagename, "%[%[%s*(.-)[]|#]") or pagename -- "[[Foo|Bar]]" → "Foo"
			pagename = mw.ustring.match(pagename, "%S.*%S") -- strip leading and trailing white space

			if pagename and pagename ~= "" then
				local title = mw.title.new(pagename) -- Find the lead section of the named page
				if not title then return p.err("No title for page name " .. pagename) end
				local redir = mRedirect.getTarget(title)
				if redir then title = mw.title.new(redir) end

				text = title:getContent()
			end
		end
		if not text then table.remove(pagenames, pagenum) end -- this one didn't work; try another
		pagecount = pagecount - 1 -- ensure that we exit the loop eventually
	end
	if not text then return p.err("Cannot read a valid page: first name is " .. firstpage) end

	text = mw.ustring.gsub(text, "%c%s*==.*","") -- remove first heading and everything after it
	text = mw.ustring.gsub(text, "<noinclude>.-</noinclude>", "") -- remove noinclude bits
	text = mw.ustring.gsub(text, "<%s*ref[^>]-/%s*>", "") -- remove refs cited elsewhere
	text = mw.ustring.gsub(text, "<%s*ref.->.-<%s*/%s*ref%s*>", "") -- remove refs
	for _, t in pairs {"[Ee]fn", "[Ee]fn-la", "[Ee]l[mn]", "[Rr]p?", "[Ss]fn[bp]", "[Ss]f[bn]", "NoteTag", "#[Tt]ag:%s*[Rr]ef"} do
		text = mw.ustring.gsub(text, "{{%s*" .. t .. "%s*|.-}}", "") -- remove ref and footnote templates
	end
	text = mw.ustring.gsub(text, "\n%s*{{%s*[Tt][Oo][Cc].-}}", "\n") -- remove most common tables of contents

	local allparas = true -- keep all paragraphs?
	if options.paraflags then
		for _, v in pairs(options.paraflags) do
			if v then allparas = false end -- if any para specifically requested, don't keep all
		end
	end

	-- a basic parser to trim down the lead
	local inlead = false -- have we found some text yet?
	local t = "" -- the stripped down output text
	local files = 0 -- how many [[Image: or [[File: so far
	local paras = 0 -- how many paragraphs so far

	text = mw.ustring.gsub(text,"^%s*","") -- remove initial white space
	repeat -- loop around parsing a comment, template, image or paragraph
		local token = mw.ustring.match(text, "^%s*<!%-%-.-%-%->%s*") -- <!--HTML comment-->
		 or mw.ustring.match(text, "^%b{}%s*") or false -- or {{Template}}
		if token then
			 if inlead then t = t .. token end -- keep comments and templates only within text body
		else
			token = mw.ustring.match(text, "^%[%[%s*[Ff]ile%s*:") -- [[File: ...
			 or mw.ustring.match(text, "^%[%[%s*[Ii]mage%s*:") -- or [[Image: ...
			if token then
				token = mw.ustring.match(text, "^%b[]%s*") -- match [[...]] to handle nesting
				files = files + 1
				if options.fileflags and options.fileflags[files] then t = t .. token end
			else -- got a paragraph, which ends at a file, image, blank line or end of text
				local afterend = mw.ustring.len(text) + 1
				local blankpos = mw.ustring.find(text, "\n%s*\n") or afterend
				local endpos = math.min(
				 mw.ustring.find(text, "%[%[%s*[Ff]ile%s*:") or afterend,
				 mw.ustring.find(text, "%[%[%s*[Ff]ile%s*:") or afterend,
				 blankpos)
				token = mw.ustring.sub(text, 1, endpos-1)
				if blankpos < afterend and blankpos == endpos then -- paragraph ends with a blank line
					token = token .. mw.ustring.match(text, "\n%s*\n", blankpos)
				end
				inlead = true
				paras = paras + 1
				if allparas or (options.paraflags and options.paraflags[paras]) then t = t .. token end
			end
		end

		if token then text = mw.ustring.sub(text, mw.ustring.len(token)+1) end
	until not text or text == "" or not token or token == ""

	text = mw.ustring.gsub(t, "\n+$", "") -- remove  trailing line feeds, so "{{Transclude text excerpt|Foo}} more" flows on one line

	if options.more then text = text .. " '''[[" .. pagename .. "|" .. options.more .. "]]'''" end
	return text
end

-- Return blank text, or an error message if requested
function p.err(text, options)
	if errors then error(text, 2) end
	return ""
end

-- Convert a comma-separated list of numbers or min-max ranges into a list of booleans, e.g. "1,3-5" → {1=true,2=false,3=true,4=true,5=true}
function p.numberflags(str)
	local ranges = mw.text.split(str, ",") -- parse ranges, e.g. "1,3-5" → {"1","3-5"}
	local flags = {}
	for _, r in pairs(ranges) do
		local min, max = mw.ustring.match(r, "^%s*(%d+)%s*%-%s*(%d+)%s*$") -- "3-5" → min=3 max=5
		if not max then	min, max = mw.ustring.match(r, "^%s*((%d+))%s*$") end -- "1" → min=1 max=1
		if max then
			for p = min, max do flags[p] = true end
		end
	end
	return flags
end

-- Shared template invocation code for lead and random functions
function p.leadrandom(frame, israndom)
	-- args = { 1,2,... = page names, paragraphs = list e.g. "1,3-5", files = list, more = text}
	local args = frame.args -- from calling module
	local pargs = frame:getParent().args -- from template

	local pagenames = { args[1] or pargs[1] } -- For lead, ignore all but the first unnamed argument
	if israndom then
		-- For random, accept any number of page names.  If more than one, we'll pick one randomly
		for i, p in pairs(args) do
			if p and type(i) == 'number' and i > 1 then table.insert(pagenames, p) end
		end
		for i, p in pairs(pargs) do
			if p and type(i) == 'number' and i > 1 and not args[i] then table.insert(pagenames, p) end
		end
	end

	local options = {}
	options.paraflags = p.numberflags(args["paragraphs"] or pargs["paragraphs"] or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}
	options.fileflags = p.numberflags(args["files"] or pargs["files"] or "") -- parse file numbers
	options.more = args["more"] or pargs["more"]
	if options.more and options.more == "" then options.more = "Read more..." end -- more= is short for this default text
	options.errors = args["errors"] or pargs["errors"]

	local text = p._lead(pagenames, options)
	return frame:preprocess(text)
end

-- Entry points for template callers using #invoke:
function p.lead(frame) return p.leadrandom(frame, false) end
function p.random(frame) return p.leadrandom(frame, true) end

return p