Module:Import table
Appearance
![]() | This module is rated as alpha. It is ready for third-party input, and may be used on a few pages to see if problems arise, but should be watched. Suggestions for new features or changes in their input and output mechanisms are welcome. |
Usage
{{#invoke:Import table|import |page= |config= |common= }}
This module is designed to import data from tables in Wikipedia articles into Wikidata. The first column of the table must be the name of the item.
- If this is a link (or a redirect) to an existing article which has a corresponding Wikidata item, then the data will be imported into this item.
- If the first column contains a valid identifier for a Wikidata item (e.g. Q123456) then the data will be imported into this item.
- If this is plain text or a redlink, then the data will be imported into a new Wikidata item.
Parameters
- page - the name of the article/page to parse, e.g.
|page=List of dams in South Africa
- config - details about what type of data is held in each column of the table formatted as type-property-option. Currently recognised types are:
- label - the name of the article
- wikilink - a link to an article which should be the value of the property, e.g.
wikilink-P131
- quantity - specify the unit in the third parameter, e.g.
quantity-P2048-Q11573
- year - a 4-digit year, e.g.
year-P571
- text - any text to import into a string datatype, e.g.
text-P3562
- coord - coordinate position, inside the {{coord}} template, e.g.
coord-P625
- donotuse (or any other unrecognised type) - indicates a column that will not be imported
- common - a set of values that every item in the list should have, formatted in pairs as property-value, e.g.
|common=P31-Q12323,P17-Q258
Notes
- The module will not import any claim if there is already a statement for that property in Wikidata, no matter if the value is the same or different to that being imported, and even if the value is marked as deprecated.
require("strict")
local p = {}
local function resolveqid(title)
local resolveEntity = require("Module:ResolveEntityId")._id
local qid
if title then
local rawarticle = string.match(title,'%[%[([^%|]+)%|') or string.match(title,'%[%[([^%]]+)%]%]')
if rawarticle and rawarticle ~= "" then
qid = resolveEntity(rawarticle)
end
end
return qid
end
function p.import(frame)
local tab = "|"
local wikiqid = "Q328" -- QID for English Wikipedia
local eol = "<br>" -- end of line string
local coord2text = require("Module:Coordinates/sandbox")._coord2text
local args = frame.args or frame:getParent().args
if not args.page then
return "No page specified."
end
if not args.config then
return "No configuation."
end
local config = {}
for c1,col in ipairs(mw.text.split(args.config,",")) do
config[c1] = mw.text.split(col,"-")
end
local common = {}
if args.common then
for c1,prop in ipairs(mw.text.split(args.common,",")) do
common[c1] = mw.text.split(prop,"-")
common[common[c1][1]] = common[c1][2] -- create index, e.g. common["P31"]=Q39715
end
end
local content = mw.title.new(args.page):getContent() -- read page specified
content = string.match(content,"%{%|(.+)%|%}") -- keep table only
content = string.gsub(content,"||","\n|") -- use \n| for column breaks
local rows = mw.text.split(content,"|-",true) -- split table into rows
table.remove(rows,1) -- remove table definition
table.remove(rows,1) -- remove heading row
local output = ""
local v2 = ""
for rn,row in ipairs(rows) do
local columns = mw.text.split(row,"\n|") -- split table rom into columns
table.remove(columns,1) -- remove content before the first \n| character
local label = columns[1]
if label then
label = mw.text.trim(label)
local qid = resolveqid(label) -- resolve qid if first column is link
if not qid then
v2 = v2 .. "CREATE" .. eol
qid = "LAST" -- creating new item, so use LAST
v2 = v2 .. qid .. tab .. 'Len' .. tab .. '"' .. label .. '"' .. eol
if common["P31"] then -- create auto-description
v2 = v2 .. qid .. tab .. 'Den' .. tab .. '"' .. mw.wikibase.getLabel(common["P31"])
if common["P17"] then -- add country
v2 = v2 .. ' in ' .. mw.wikibase.getLabel(common["P17"])
end
v2 = v2 .. '"' .. eol
end
for c = 1,#common do
v2 = v2 .. qid .. tab .. common[c][1] .. tab .. common[c][2] .. eol
end
end
local function addtov2(prop,val)
v2 = v2 .. qid .. tab .. prop .. tab .. val .. tab .. "S143" .. tab .. wikiqid .. eol
return v2
end
for cn,col in ipairs(columns) do
if config[cn] then
col = mw.ustring.gsub(col,"%<ref.+%<%/ref%>","") -- remove references
col = mw.text.trim(col)
if config[cn][1] == "wikilink" then
local val = resolveqid(col)
if val then
v2 = addtov2(config[cn][2],val)
end
elseif config[cn][1] == "year" then
local val = string.match(col,"%d%d%d%d")
if val then
v2 = addtov2(config[cn][2],"+" ..val .. "-00-00T00:00:00Z/9")
end
elseif config[cn][1] == "quantity" then
local val = string.gsub(col,",","") -- remove any commas
val = string.match(val,"%d+%.?%d*") -- extract value, possibly from inside convert template
if val then
if string.upper(string.sub(config[cn][3],1,1)) == "Q" then
config[cn][3] = string.sub(config[cn][3],2)
end
v2 = addtov2(config[cn][2],val .. "U" .. config[cn][3])
end
elseif config[cn][1] == "text" then
if col ~= "" then
v2 = addtov2(config[cn][2],'"' .. col .. '"')
end
elseif config[cn][1] == "coord" then
if col ~= "" then
local val = mw.getCurrentFrame():preprocess(col)
v2 = addtov2(config[cn][2],"@" .. coord2text(val,"lat") .. "/" .. coord2text(val,"long"))
end
end
end
end
end
end
return v2
end
return p