Module:Internet Archive
Appearance
![]() | This module is rated as ready for general use. It has reached a mature form and is thought to be relatively bug-free and ready for use wherever appropriate. It is ready to mention on help pages and other Wikipedia resources as an option for new users to learn. To reduce server load and bad output, it should be improved by sandbox testing rather than repeated trial-and-error editing. |
![]() | This module is subject to page protection. It is a highly visible module in use by a very large number of pages, or is substituted very frequently. Because vandalism or mistakes would affect many pages, and even trivial editing might cause substantial load on the servers, it is protected from editing. |
![]() | This Lua module is used on approximately 19,000 pages and changes may be widely noticed. Test changes in the module's /sandbox or /testcases subpages, or in your own module sandbox. Consider discussing changes on the talk page before implementing them. |
Usage
There is currently 1 template that invokes this module, {{Internet Archive author}}
.
If future Lua scripts for Internet Archive are created (books, film, audio, etc), this Module would be a natural location to build.
--[[
This module is for functions related to Internet Archive.
Function author, invoked from Template:Internet Archive author
{{Internet Archive author|name=|birth=|death=}}
Return an optimized Internet Archive search URL for use in External links section eg.
==External links==
* {{Internet Archive author|name=Albert Einstein|birth=1879|death=1955}}
. All arguments are optional.
name = Defaults to article title.
birth = Defaults to #### in %[%[Category:#### births%]%]
death = Defaults to #### in %[%[Category:#### deaths%]%]
. Uses Protocol Relative URLs. See WP:PRURL.
. The script produces an optimized Internet Archive search string. By way of background, Internet Archive metadata is inconsistent. Books are cataloged by thousands of independent entities (libraries, persons, etc). Each will choose to use some metadata fields or not others (of which IA has dozens). And they will fill out the fields differently, for example there are many ways to enter a 3-word name (John H. Smith, J.H. Smith, etc). Just entering a name into the IA search box will usually miss many books; I have seen the optimized search string find up to 3X as many books. The search strategy is based on the number of words in a name so that various combination possibilities can be built. The IA fields "subject" and "creator" are the most common and will usually find the majority of books. However "title" and "description" are also useful. Complicating matters, IA limits how long a search string can be so the below strategy breaks down after 3 word names due to limits at IA, 4+ word searches are by necessity relatively basic compared to a 3 word.
. Project Gutenberg is mirrored on Internet Archive. Since Wikipedia already uses {{Gutenberg author}}, those books are removed from the Internet Archive search results (-contributor%3Agutenberg).
. LibriVox is mirrored there also. For the same reason audio results are not included. Questions: there are other audio/video works on IA besides LV, should those be included? Consider quality of works, cluttered results, readers expectations for books, other templates.
]]
local p = {}
function p.author(frame)
local pframe = frame:getParent()
local args = pframe.args
local pagetext = nil
local name = nil
local birth = nil
local death = nil
--- Determine name
if args.name == "" or args.name == nil then
name = mw.title.getCurrentTitle().text
else
name = args.name
end
name = string.gsub(name,"%s%(.*%)", "") -- remove disambiguation () from title
--- Determine dob
if args.birth == "" or args.birth == nil then
-- Load the page
t = mw.title.getCurrentTitle()
pagetext = t:getContent()
if pagetext == nil then
return ""
end
-- Remove false positives
pagetext = mw.ustring.gsub( mw.ustring.gsub(pagetext, "<!--.--->", ""), "<nowiki>.-</nowiki>", "")
-- Scrape for the Category and find date
local birthcheck = mw.ustring.match(pagetext, "%[%[%s-[Cc]ategory:%s-%d+%.?%d*%s-births%s-%]%]" )
if birthcheck ~= nil then
birth = mw.ustring.match(birthcheck, "%d+%.?%d*")
else
birth = "none"
end
else
birth = string.gsub(args.birth, " ", "")
end
--Determine dod
if args.death == "" or args.death == nil then
-- Load the page
if pagetext == nil then -- don't load again if already done above
t = mw.title.getCurrentTitle()
pagetext = t:getContent()
if pagetext == nil then
return ""
end
-- Remove false positives
pagetext = mw.ustring.gsub( mw.ustring.gsub(pagetext, "<!--.--->", ""), "<nowiki>.-</nowiki>", "")
end
-- Scrape for the Category and find date
local deathcheck = mw.ustring.match(pagetext, "%[%[%s-[Cc]ategory:%s-%d+%.?%d*%s-deaths%s-%]%]" )
if deathcheck ~= nil then
death = mw.ustring.match(deathcheck, "%d+%.?%d*")
else
death = "none"
end
else
death = string.gsub(args.death, " ", "")
end
--- Static status strings
local byabout = "Works by or about"
local tagline = "at [[Internet Archive]] (scanned books original editions color illustrated)"
--- Split name into words and count words
exploded = split(name, " ")
l, count = string.gsub(name, "%S+", "")
--[[
Begin formatting URL
]]
-- If no dob and dod, return a simple search
if birth == "none" or death == "none" then
nameurl = url_encode(name)
return "[//archive.org/search.php?query=mediatype%3A(texts)%20-contributor%3Agutenberg%20AND%20(subject%3A%22"..nameurl.."%22%20OR%20creator%3A%22"..nameurl.."%22%20) "..byabout.." "..name.."] "..tagline
else -- Optimized search based on number of words
-- One or Five+ words search string
if count == 1 or count > 4 then
nameurl = url_encode(name)
return "[//archive.org/search.php?query=mediatype%3A(texts)%20-contributor%3Agutenberg%20AND%20(subject%3A%22"..nameurl.."%22%20OR%20creator%3A%22"..nameurl.."%22%20) "..byabout.." "..name.."] "..tagline
end
-- Two words search string
if count == 2 then
return "[//archive.org/search.php?query=mediatype%3A(texts)%20-contributor%3Agutenberg%20AND%20(subject%3A%22"..exploded[2].."%2C%20"..exploded[1].."%2C%20"..birth.."-"..death.."%22%20OR%20creator%3A%22"..exploded[2].."%2C%20"..exploded[1].."%2C%20"..birth.."-"..death.."%22%20OR%20creator%3A%22"..exploded[1].."%20"..exploded[2].."%22%20OR%20title%3A%22"..exploded[1].."%20"..exploded[2].."%22%20OR%20description%3A%22"..exploded[1].."%20"..exploded[2].."%22) "..byabout.." "..name.."] "..tagline
end
-- Three words search string. This is at its near-maximum length for Internet Archive to handle.
if count == 3 then
firstinitial = string.sub(exploded[1], 1, 1)
middleinitial = string.sub(exploded[2], 1, 1)
part1 = "[//archive.org/search.php?query=mediatype%3A(texts)%20-contributor%3Agutenberg%20AND%20(subject%3A%22"..exploded[3].."%2C%20"..exploded[1].."%20"..exploded[2].."%2C%20"..birth.."-"..death.."%22%20OR%20subject%3A%22"..exploded[3].."%2C%20"..firstinitial.."%2E%20"..middleinitial.."%2E%20%28"..exploded[1].."%20"..exploded[2].."%29%2C%20"..birth.."-"..death.."%22%20OR%20subject%3A%22"..exploded[3].."%2C%20"..exploded[1].."%20"..middleinitial.."%2E%20%28"..exploded[1].."%20"..exploded[2].."%29%2C%20"..birth.."-"..death.."%22%20OR%20subject%3A%22"..exploded[3].."%2C%20"..exploded[1].."%20"..exploded[2].."%22%20OR%20subject%3A%22"..exploded[3].."%2C%20"..firstinitial.."%2E%20"..middleinitial.."%2E%20%28"..exploded[1].."%20"..exploded[2].."%29%22%20OR%20subject%3A%22"..exploded[3].."%2C%20"..exploded[1].."%20"..middleinitial.."%2E%20%28"..exploded[1].."%20"..exploded[2].."%29%22%20OR%20subject%3A%22"..exploded[1].."%20"..exploded[2].."%20"..exploded[3].."%22%20OR%20subject%3A%22"..exploded[1].."%20"..middleinitial.."%2E%20"..exploded[3].."%22%20OR%20subject%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..exploded[3].."%22%20OR%20creator%3A%22"..exploded[3].."%2C%20"..exploded[1].."%20"..exploded[2].."%2C%20"..birth.."-"..death.."%22%20OR%20creator%3A%22"..exploded[3].."%2C%20"..exploded[1].."%20"..exploded[2].."%2C%20Sir%2C%20"..birth.."-"..death.."%22%20OR%20creator%3A%22"..exploded[3].."%2C%20"..firstinitial.."%2E%20"..middleinitial.."%2E%20%28"..exploded[1].."%20"..exploded[2].."%29%2C%20"..birth.."-"..death.."%22"
part2 = "%20OR%20creator%3A%22"..exploded[3].."%2C%20"..exploded[1].."%20"..middleinitial.."%2E%20%28"..exploded[1].."%20"..exploded[2].."%29%2C%20"..birth.."-"..death.."%22%20OR%20creator%3A%22"..exploded[3].."%2C%20"..exploded[1].."%20"..exploded[2].."%22%20OR%20creator%3A%22"..exploded[3].."%2C%20"..firstinitial.."%2E%20"..middleinitial.."%2E%20%28"..exploded[1].."%20"..exploded[2].."%29%22%20OR%20creator%3A%22"..exploded[3].."%2C%20"..exploded[1].."%20"..middleinitial.."%2E%20%28"..exploded[1].."%20"..exploded[2].."%29%22%20OR%20creator%3A%22"..exploded[1].."%20"..exploded[2].."%20"..exploded[3].."%22%20OR%20creator%3A%22"..exploded[1].."%20"..middleinitial.."%2E%20"..exploded[3].."%22%20OR%20creator%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..exploded[3].."%22%20OR%20title%3A%22"..exploded[1].."%20"..exploded[2].."%20"..exploded[3].."%22%20OR%20title%3A%22"..exploded[1].."%20"..middleinitial.."%2E%20"..exploded[3].."%22%20OR%20title%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..exploded[3].."%22%20OR%20description%3A%22"..exploded[1].."%20"..exploded[2].."%20"..exploded[3].."%22%20OR%20description%3A%22"..exploded[1].."%20"..middleinitial.."%2E%20"..exploded[3].."%22%20OR%20description%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..exploded[3].."%22%20OR%20description%3A%22"..exploded[3].."%2C%20"..exploded[1].."%20"..exploded[2].."%22%20OR%20description%3A%22"..exploded[3].."%2C%20"..firstinitial.."%2E%20"..middleinitial.."%2E%20%28"..exploded[1].."%20"..exploded[2].."%29%22%20OR%20description%3A%22"..exploded[3].."%2C%20"..exploded[1].."%20"..middleinitial.."%2E%20%28"..exploded[1].."%20"..exploded[2].."%29%22) "..byabout.." "..name.."] "..tagline
return part1 .. part2
end
-- Four words search string
if count == 4 then
return "[//archive.org/search.php?query=mediatype%3A(texts)%20-contributor%3Agutenberg%20AND%20(subject%3A%22"..exploded[4].."%2C%20"..exploded[1].."%20"..exploded[2].."%20"..exploded[3].."%2C%20"..birth.."-"..death.."%22%20OR%20creator%3A%22"..exploded[4].."%2C%20"..exploded[1].."%20"..exploded[2].."%20"..exploded[3].."%2C%20"..birth.."-"..death.."%22%20OR%20creator%3A"..exploded[1].."%20"..exploded[2].."%20"..exploded[3].."%20"..exploded[4]..") "..byabout.." "..name.."] "..tagline
end
end
return "Unknown error (1). Please check documentation for [[Template:Internet Archive author]]"
end
--- URL-encode a string
--- http://lua-users.org/wiki/StringRecipes
---
function url_encode(str)
if (str) then
str = string.gsub (str, "\n", "\r\n")
str = string.gsub (str, "([^%w %-%_%.%~])",
function (c) return string.format ("%%%02X", string.byte(c)) end)
str = string.gsub (str, " ", "+")
end
return str
end
--- split a string into parts
--- http://stackoverflow.com/questions/1426954/split-string-in-lua
---
function split(s, delimiter)
result = {};
for match in (s..delimiter):gmatch("(.-)"..delimiter) do
table.insert(result, match);
end
return result;
end
return p