Jump to content

Module:Internet Archive

Permanently protected module
From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by GreenC (talk | contribs) at 15:57, 16 January 2015 (now using "*" instead of "?" for extended character wildcard in 3-word search). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

--[[ 

For functions related to Internet Archive

]]

local p = {}

--[[ 

For Template:Internet Archive author

]]
function p.author(frame)

  local pframe = frame:getParent()
  local args = pframe.args

  local tname = "Internet Archive author" -- name of calling template. Change if template rename.
  
  local name = nil -- article name (default: current page)
  local dname = nil -- display name (default: current page name)
  local sname = nil -- search name (default: current page name)
  local byabout = "Works by or about"
  local tagline = "at [[Internet Archive]]"
  local urlhead = "//archive.org/search.php?query="

  --- Determine name
  if args.name == "" or args.name == nil then
    name = mw.title.getCurrentTitle().text
    dname = name
    sname = dname
  else
    name = mw.text.trim(args.name)
    dname = name
    sname = dname
  end
  if args.sname ~= nil and args.sname ~= "" then
    sname = mw.text.trim(args.sname)
  end
  if args.dname ~= nil and args.dname ~= "" then
    dname = mw.text.trim(args.dname)
  end
  dname = mw.ustring.gsub(dname,"%s%(.*%)", "") -- remove disambiguation () 
  sname = mw.ustring.gsub(sname,"%s%(.*%)", "")
 
  --- Determine tagline
  if args.coda ~= "" and args.coda ~= nil then
    tagline = tagline .. " " .. mw.text.trim(args.coda)
  end

  --- Custom search. Do early to avoid unnecessary processing. 
  if args.search ~= "" and args.search ~= nil then
    local search = p.ia_url_encode(mw.text.trim(args.search))
    return "[" .. urlhead .. search .. " " .. byabout .. " " .. dname .. "] " .. tagline
  end

  -- Determine media string
  local media = p.mediaTypes(args.media)

  -- Determine date of birth and death string
  local temp = mw.text.split(p.bdDate(args.birth, args.death, name), " ")
  local birth = temp[1]
  local death = temp[2]
  if birth == "Error" or death == "Error" then
    return "Error in [[:Template:"..tname.."]]: [[" ..name.. "]] doesn't exist."
  end
    
  --- Split sname into words and count words
  local N = mw.text.split(sname, " ")
  local l, count = mw.ustring.gsub(sname, "%S+", "")
 
  --[[ 

      Format URL

  ]]
  -- If no dob and dod
  if birth == "none" or death == "none" then

    if count == 1 then

      local nameurl = p.ia_url_encode(sname)
      local search = "(subject%3A%22"..nameurl.."%22%20OR%20creator%3A%22"..nameurl.."%22%20OR%20description%3A%22"..nameurl.."%22%20OR%20title%3A%22"..nameurl.."%22)"
      return "[" .. urlhead .. media .. search .. " " .. byabout .. " " .. dname .. "] " .. tagline

    end

    if count == 2 then
      local FIRST  = 1
      local LAST   = 2

      -- # First record (S1) starts with "%28", following records start with "%22%20OR%20", last record ends with "%22%29"
      --   Sx = Subject search record "x"
      --   Cx = Creator search record "x"
      --   Tx = Title search record "x" 
      --   Dx = Description search record "x"

      -- Last, First
      local S1 = "%28subject%3A%22"..N[LAST].."%2C%20"..N[FIRST]
      -- First Last
      local S3 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[LAST]
      local SALL = S1..S3
      -- Last, First
      local C1 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST]
      -- First Last
      local C3 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[LAST]
      local CALL = C1..C3
      -- First Last
      local T1 = "%22%20OR%20title%3A%22"..N[FIRST].."%20"..N[LAST]
      local TALL = T1
      -- Last, First
      local D1 = "%22%20OR%20description%3A%22"..N[LAST].."%2C%20"..N[FIRST]
      -- First Last
      local D3 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[LAST]
      local DALL = D1..D3.."%22%29"

      return "[" .. urlhead .. media .. SALL .. CALL .. TALL .. DALL .. " " .. byabout .. " " .. dname .. "] " .. tagline
    end

    if count == 3 then
      local FIRST  = 1
      local MIDDLE = 2
      local LAST   = 3
    
      if p.ia_extendedascii(sname) == 1 then
        local S1 = p.ia_url_encode(p.ia_extendedasciireplace(sname))
        return "[" .. urlhead .. media .. S1 .. " " .. byabout .. " " .. dname .. "] " .. tagline
      end

      local firstinitial  = mw.ustring.sub(N[FIRST], 1, 1)
      local middleinitial = mw.ustring.sub(N[MIDDLE], 1, 1)

      -- (First Middle Last)
      local S1 = "%28"..N[FIRST].."%20"..N[MIDDLE].."%20"..N[LAST].."%29"
      -- (F. M. Last)
      local S3 = "%20OR%20%28"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..N[LAST].."%29"
      -- (First M. Last)
      local S5 = "%20OR%20%28"..N[FIRST].."%20"..middleinitial.."%2E%20"..N[LAST].."%29"
      -- (First Last)
      local S7 = "%20OR%20%28"..N[FIRST].."%20"..N[LAST].."%29"
      local SALL = S1..S3..S5..S7

      return "[" .. urlhead .. media .. SALL .. " " .. byabout .. " " .. dname .. "] " .. tagline
    end

    if count == 4 then
      local FIRST  = 1
      local SECOND = 2
      local THIRD  = 3
      local LAST   = 4
      local firstinitial  = mw.ustring.sub(N[FIRST], 1, 1)
      local secondinitial  = mw.ustring.sub(N[SECOND], 1, 1)
      local thirdinitial = mw.ustring.sub(N[THIRD], 1, 1)
 
      -- Last, First Second Third
      local S1 = "%28subject%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD]
      -- First Second Third Last
      local S2 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST]
      -- Last, First Second Third
      local C1 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD]
      -- First Second Third Last
      local C2 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST]
      -- Last, F. S. T.
      local C3 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..firstinitial.."%2E%20"..secondinitial.."%2E%20"..thirdinitial.."%2E"
      -- First Second Third Last
      local T1 = "%22%20OR%20title%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST]
      -- First Second Third Last
      local D1 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST]
      local SALL = S1..S2..C1..C2..C3..T1..D1.."%22%29"
 
      return "[" .. urlhead .. media .. SALL .. " " .. byabout .. " " .. dname .. "] " .. tagline
    end
 
    if count > 4 then
      local nameurl = p.ia_url_encode(sname)
      local search = "("..nameurl..")"
      return "[" .. urlhead .. media .. search .. " " .. byabout .. " " .. dname .. "] " .. tagline

    end

  else -- Date available

    if count == 1 then

      local nameurl = p.ia_url_encode(sname)

      local search = "(subject%3A%22"..nameurl.."%22%20OR%20creator%3A%22"..nameurl.."%22%20OR%20description%3A%22"..nameurl.."%22%20OR%20title%3A%22"..nameurl.."%22)"

      return "[" .. urlhead .. media .. search .. " " .. byabout .. " " .. dname .. "] " .. tagline

    end

    if count == 2 then
      local FIRST  = 1
      local LAST   = 2
      
      -- Last, First, birthyear-deathyear
      local S1 = "%28subject%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%2C%20"..birth.."-"..death
      -- Last, First
      local S2 = "%22%20OR%20subject%3A%22"..N[LAST].."%2C%20"..N[FIRST]
      -- First Last, birthyear-deathyear
      local S3 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[LAST].."%2C%20"..birth.."-"..death
      -- First Last
      local S4 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[LAST]
      local SALL = S1..S2..S3..S4
      -- Last, First, birthyear-deathyear
      local C1 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%2C%20"..birth.."-"..death
      -- Last, First
      local C2 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST]
      -- First Last, birthyear-deathyear
      local C3 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[LAST].."%2C%20"..birth.."-"..death
      -- First Last
      local C4 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[LAST]
      local CALL = C1..C2..C3..C4
      -- First Last
      local T1 = "%22%20OR%20title%3A%22"..N[FIRST].."%20"..N[LAST]
      local TALL = T1
      -- Last, First, birthyear-deathyear
      local D1 = "%22%20OR%20description%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%2C%20"..birth.."-"..death
      -- Last, First
      local D2 = "%22%20OR%20description%3A%22"..N[LAST].."%2C%20"..N[FIRST]
      -- First Last, birthyear-deathyear
      local D3 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[LAST].."%2C%20"..birth.."-"..death
      -- First Last
      local D4 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[LAST]
      local DALL = D1..D2..D3..D4.."%22%29"

      return "[" .. urlhead .. media .. SALL .. CALL .. TALL .. DALL .. " " .. byabout .. " " .. dname .. "] " .. tagline

    end

    if count == 3 then
      local FIRST  = 1
      local MIDDLE = 2
      local LAST   = 3

      if p.ia_extendedascii(sname) == 1 then
        local S1 = p.ia_url_encode(p.ia_extendedasciireplace(sname))
        return "[" .. urlhead .. media .. S1 .. " " .. byabout .. " " .. dname .. "] " .. tagline
      end

      local firstinitial  = mw.ustring.sub(N[FIRST], 1, 1)
      local middleinitial = mw.ustring.sub(N[MIDDLE], 1, 1)

      -- (First Middle Last)
      local S1 = "%28"..N[FIRST].."%20"..N[MIDDLE].."%20"..N[LAST].."%29"
      -- (First Middle Last birthyear-deathyear)
      local S2 = "%20OR%20%28"..N[FIRST].."%20"..N[MIDDLE].."%20"..N[LAST].."%20"..birth.."-"..death.."%29"
      -- (F. M. Last)
      local S3 = "%20OR%20%28"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..N[LAST].."%29"
      -- (F. M. Last birthyear-deathyear)
      local S4 = "%20OR%20%28"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..N[LAST].."%20"..birth.."-"..death.."%29"
      -- (First M. Last)
      local S5 = "%20OR%20%28"..N[FIRST].."%20"..middleinitial.."%2E%20"..N[LAST].."%29"
      -- (First M. Last birthyear-deathyear)
      local S6 = "%20OR%20%28"..N[FIRST].."%20"..middleinitial.."%2E%20"..N[LAST].."%20"..birth.."-"..death.."%29"
      -- (First Last)
      local S7 = "%20OR%20%28"..N[FIRST].."%20"..N[LAST].."%29"
      -- (First Last birthyear-deathyear)
      local S8 = "%20OR%20%28"..N[FIRST].."%20"..N[LAST].."%20"..birth.."-"..death.."%29"
      local SALL = S1..S2..S3..S4..S5..S6..S7..S8
      
      return "[" .. urlhead .. media .. SALL .. " " .. byabout .. " " .. dname .. "] " .. tagline

    end

    if count == 4 then
      local FIRST  = 1
      local SECOND = 2
      local THIRD  = 3
      local LAST   = 4
      local firstinitial  = mw.ustring.sub(N[FIRST], 1, 1)
      local secondinitial  = mw.ustring.sub(N[SECOND], 1, 1)
      local thirdinitial = mw.ustring.sub(N[THIRD], 1, 1)

      -- Last, First Second Third, birthyear-deathyear
      local S1 = "%28subject%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%2C%20"..birth.."-"..death
      -- First Second Third Last
      local S2 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST]
      -- Last, First Second Third, birthyear-deathyear
      local C1 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%2C%20"..birth.."-"..death
      -- First Second Third Last
      local C2 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST]
      -- Last, F. S. T.
      local C3 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..firstinitial.."%2E%20"..secondinitial.."%2E%20"..thirdinitial.."%2E"
      -- First Second Third Last
      local T1 = "%22%20OR%20title%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST]
      -- First Second Third Last
      local D1 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST]
      local SALL = S1..S2..C1..C2..C3..T1..D1.."%22%29"

      return "[" .. urlhead .. media .. SALL .. " " .. byabout .. " " .. dname .. "] " .. tagline
 
    end

    if count > 4 then
      local nameurl = p.ia_url_encode(sname)
      local search = "("..nameurl..")"
      return "[" .. urlhead .. media .. search .. " " .. byabout .. " " .. dname .. "] " .. tagline
    end

  end
  return "Unknown error (1). Please check documentation for [[Template:"..tname.."]]"

end

function p.mediaTypes(argsmedia)

  local media = ""

  if argsmedia ~="" and argsmedia ~=nil then
    local medialist = mw.text.split(mw.text.trim(argsmedia), " ")
    local al, acount = mw.ustring.gsub(mw.text.trim(argsmedia), "%S+", "")
    local i = 0
    repeat -- the following could be condensed but repetitive for clarity 
      i = i + 1
      if(mw.ustring.lower(medialist[i]) == "text" or mw.ustring.lower(medialist[i]) == "texts") then
        if(i == 1) then
          media = media .. p.ia_url_encode("(mediatype:texts")        
        else
          media = media .. p.ia_url_encode(" OR mediatype:texts")         
        end
      end
      if(mw.ustring.lower(medialist[i]) == "audio") then
        if(i == 1) then
          media = media .. p.ia_url_encode("(mediatype:audio")
        else
          media = media .. p.ia_url_encode(" OR mediatype:audio")
        end
      end
      if(mw.ustring.lower(medialist[i]) == "video") then
        if(i == 1) then
          media = media .. p.ia_url_encode("(mediatype:video")
        else
          media = media .. p.ia_url_encode(" OR mediatype:video")
        end
      end
    until i == acount
    if media ~= nil then
      media = media .. ")%20AND%20"
    else
      media = ""
    end
  else
    media = ""
  end
  return media
end

-- Alt way to get b/d dates via getContent()
function p.bdDateAlt(argsbirth, argsdeath, name)

    local pagetext = nil
    local birth = "none"
    local death = "none"

    -- Load the page
    local t = mw.title.new(name)
    if(t.exists) then
      pagetext = t:getContent()
    end
    if pagetext == nil then 
      return "Error"     
    end
 
    -- Remove false positives
    pagetext = mw.ustring.gsub( mw.ustring.gsub(pagetext, "<!--.--->", ""), "<nowiki>.-</nowiki>", "")
 
    -- "Category:1900 births" 
    if argsbirth == "" or argsbirth == nil then
      local birthcheck = mw.ustring.match(pagetext, "%[%[%s-[Cc]ategory:%s-%d+%.?%d*%s-births%s-%]%]" )
      if birthcheck ~= nil then
        birth = mw.ustring.match(birthcheck, "%d+%.?%d*")
      else
        birth = "none"
      end
    else
      birth = mw.text.trim(argsbirth)
    end

    -- "Category:2000 deaths" 
    if argsdeath == "" or argsdeath == nil then
      local deathcheck = mw.ustring.match(pagetext, "%[%[%s-[Cc]ategory:%s-%d+%.?%d*%s-deaths%s-%]%]" )
      if deathcheck ~= nil then
        death = mw.ustring.match(deathcheck, "%d+%.?%d*")
      else
        death = "none"
      end
    else
      death = mw.text.trim(argsdeath)
    end

    return birth .. " " .. death

end

-- Get b/d dates via Wikidata.
-- ‎ 
function p.bdDate(argsbirth, argsdeath, name)

  local pagetext = nil
  local birth = "none"
  local death = "none"

  
  entity = mw.wikibase.getEntityObject()
  if not entity or not entity.claims then 
    -- Alternative if template not on a page in mainspace. This is needed since Wikidata can only be retrieved
    -- for the article where the template is located.
    return p.bdDateAlt(argsbirth, argsdeath, name)
  end

  -- Note: The below uses formatPropertyValues() to get and format the date from Wikidata.
  --       For an alternative method, see sandbox revision dated 5:58 am, 15 October 2014
  if argsbirth == "" or argsbirth == nil then
    local birthtable = entity:formatPropertyValues( 'P569' )
    local birthsplit = mw.text.split(birthtable["value"], " ")
    local l, count = mw.ustring.gsub(birthtable["value"], "%S+", "")
    if count > 0 then
      if string.find(birthsplit[count], "^%d") then
        birth = birthsplit[count]
      elseif string.find(birthsplit[count], "BCE") then
        birth = birthsplit[count - 1]
      elseif string.find(birthsplit[count], "BC") then
        birth = birthsplit[count - 1]
      elseif string.find(birthsplit[count], "AD") then
        birth = birthsplit[count - 1]
      end
    end
  else
    birth = mw.text.trim(argsbirth)
  end

  if argsdeath == "" or argsdeath == nil then
    local deathtable = entity:formatPropertyValues( 'P570' )
    local deathsplit = mw.text.split(deathtable["value"], " ")
    local l, count = mw.ustring.gsub(deathtable["value"], "%S+", "")
    if count > 0 then
      if string.find(deathsplit[count], "^%d") then
        death = deathsplit[count]
      elseif string.find(deathsplit[count], "BCE") then
        death = deathsplit[count - 1]
      elseif string.find(deathsplit[count], "BC") then
        death = deathsplit[count - 1]
      elseif string.find(deathsplit[count], "AD") then
        death = deathsplit[count - 1]
      end
    end
  else
    death = mw.text.trim(argsdeath)
  end

  if birth == "none" and death == "none" then 
    -- Alternative if Wikidata is missing data
    -- return p.bdDateAlt(name)
    return birth .. " " .. death
  else
    return birth .. " " .. death
  end

end

--- URL-encode a string
--- http://lua-users.org/wiki/StringRecipes
---
function p.ia_url_encode(str)
  if (str) then
    str = mw.ustring.gsub (str, "\n", "\r\n")
    str = mw.ustring.gsub (str, "([^%w %-%_%.%~])",
        function (c) return mw.ustring.format ("%%%02X", string.byte(c)) end)
    str = mw.ustring.gsub (str, " ", "+")
  end
  return str	
end

-- Does str contain extended ascii? 1 = yes
function p.ia_extendedascii(str)
    for i = 1, str:len() do
      if str:byte(i) >= 32 and str:byte(i) <= 126 then
        --do nothing
      else
        return 1
      end
    end
    return 0
end

-- Replace all extended ascii characters with wildcard '*'
function p.ia_extendedasciireplace(str)
    local s = ""
    local j = 0
    for i = 1, str:len() do
      if str:byte(i) >= 32 and str:byte(i) <= 126 then
        s = s .. str:sub(i,i)
      else
        if j == 1 then
          s = s .. "*"
          j = 2
        end
        if j == 0 then j = 1 end
        if j == 2 then j = 0 end
      end
    end
    return s
end

return p