Aller au contenu

Module:Categorizer

Si Wikipedia, tasanayt tilellit.
Aceggir-agi yettwag di 7 Wamber 2020 à 14:41 sɣuṛ Sami At Ferḥat (mmeslay | attekki)

La documentation pour ce module peut être créée à Module:Categorizer/doc

-- version 202011051200 from master @kabwiki

local number_ids = { 
	'Q21199', -- natural number
	'Q12503', -- integer
}

local year_ids = { 
	'Q577', -- year
	'Q3186692', --calendar year
	'Q3311614', --century leap year
	'Q19828', --leap year
	'Q2378962', --century year
}


local human_ids = {	
	'Q5', -- human 
	'Q15632617', -- fictional human
	'Q15773317', -- television character
	'Q95074', -- fictional character
	'Q21070598', -- narrative entity
	'Q21070568', -- human who may be fictional
	'Q178885', -- deity
	'Q13405593', -- nature deity
	'Q235113', --angel 
}
local language_ids = {
	'Q34770', -- language 
	'Q33742', -- natural language 
	'Q38058796', -- extinct language 
	'Q2315359', --historical language
	'Q45762', --dead language
	'Q33384', -- dialect
	'Q17376908', --languoid
	'Q4536530', --language
	'Q152559', --macrolanguage
}
local lanfamily_ids = { 
	'Q25295', -- language family
	'Q11820611', --language subfamily
}
local taxon_ids = { 
	'Q16521', -- taxon 
	'Q713623', -- clade 
	'Q23038290', -- fossil taxon
	'Q310890', -- monotypic taxon
	'Q855769', -- strain 
	'Q75913269', -- group or class of strains
}
local country_ids = { 
	'Q6256', -- country 
	'Q3624078', --  sovereign state
	'Q3024240', -- historical country
	'Q417175', --kingdom 
	'Q1048835', -- political territorial entity
	'Q15634554', --state with limited recognition
	'Q107390', --federated state
	'Q35657', -- state of the United States* 
	'Q34876', -- province*
	'Q10864048', --first-level administrative country subdivision*
	'Q6465', -- department of France*
	'Q36784', -- region of France*
	'Q859869', --region of Niger
	
}

local city_ids = { 
	'Q5119', -- capital 
	'Q515', -- city  
	'Q2264924', -- port city
	'Q1549591', -- big city 
	'Q15253706', -- like a city
	'Q7930989', -- city/town
	'Q174844', -- megacity 
	'Q200250', -- metropolis 
	'Q149621', -- district 
	
	'Q1637706', --city with millions of inhabitants
	'Q123705', -- neighborhood
	
	'Q532', -- village
	'Q5084', -- hamlet
	'Q21507948', -- former village
	'Q350895', -- abandoned village
	'Q486972', -- human settlement
	'Q22674925', -- former populated place
	'Q10354598', -- rural settlement
	
	'Q1093829', -- city of the United States
	'Q3327870', -- municipal corporation in the United States 
	'Q21518270', -- state or insular area capital in the United States 
	'Q54935504', --city of Switzerland 
	'Q137773', -- ward of Japan
	'Q2989398', -- commune of Algeria
	'Q484170', -- commune of France
	'Q2590631', --municipality of Hungary 
	'Q2074737', -- municipality of Spain 
	'Q605291', -- municipality of Niger
	'Q41067667', -- municipality of Tunisia
	'Q3327862', -- urban commune of Morocco
	'Q2989470', -- commune of Mauritania  
	'Q17318027', -- rural commune of Morocco
	
	'Q15284', -- municipality
	'Q3266850', -- commune
	'Q515483', -- baladiyah
	
	'Q30059', --arrondissement
	
	'Q1639634', --local government area of Nigeria* 
	'Q2914501', --department of Niger*
}

local musical_band_ids = { 
	'Q215380', -- band 
	'Q56816954', --  heavy metal band 
	'Q2088357' -- musical ensemble
}
local album_ids = { 
	'Q482994', -- album 
	'Q208569' -- studio album
}
local tv_ids = { 
	'Q5398426', --television series
	'Q63952888', -- anime television series
	'Q581714', -- animated series
	'Q11424', --film
}

local book_ids = { 
	'Q47461344', -- written work 
	'Q179461', -- religious text 
	'Q7725634', --literary work
	'Q14406742', -- comic book series
	'Q21198342', -- manga series 
	'Q10901350', -- anime and manga *
}

local int_work_ids = { 
	'Q1344', -- opera
	'Q58483083', --dramatico-musical work
	'Q2188189', --musical work
	'Q838948', -- work of art
}

local religion_ids = { 
	'Q9174', -- religion 
	'Q6957341', -- major religious group
	'Q1189816', -- ethnic religion
	'Q13414953', -- religious denomination
	'Q1192063', -- Islamic schools and branches
	'Q19097', -- sect 
	'Q879146', -- Christian denomination
	'Q1530022', -- religious organization
	'Q3001185', -- Jewish denomination
	'Q5839321', -- religious school of thought
	'Q222516', -- school of Buddhism
	'Q1826286', --religious movement
	'Q209928', --madhhab
}

local script_ids = { 
	'Q8192', -- writing system
	'Q29517555', -- natural script 
	'Q1191702', -- constructed script 
	'Q4004706', -- unicase alphabet
	'Q65045986', -- bicameral alphabet 
	'Q9779', -- alphabet 
	'Q185087', --abjad 
	'Q1049394', -- phonetic writing system
	'Q1191127', --featural writing system
}

local school_ids = { 
	'Q3918', --university 
	'Q1663017', --engineering school 
	'Q38723', --higher education institution 
	'Q847027', --grande école 
	'Q3354859', --collegiate university 
	'Q875538', --public university 
	'Q15936437', -- research university 
	'Q265662', --national university 
	'Q3914', --school 
	'Q57775519', --upper secondary school 
	'Q2511322', --lycée
	'Q4671277', --academic institution
	'Q31855', --research institute
	
	'Q845392', -- polytechnic
	'Q1371037', --institute of technology
}


local hydrography_ids = { 
    'Q4022', --river
    'Q23397', --lake
	'Q165', -- sea
	'Q949819', --ship canal
	'Q12284', -- canal
	'Q1267889', -- waterway
	'Q1233637', -- river mouth
	'Q124714', --spring (water) 
	'Q1322134', --gulf
	'Q9430', --ocean
	'Q15324', --body of water
	'Q9019918',	'Q131681',	'Q4862338',	'Q3267675',	'Q204324',	'Q4366834',	'Q8261440',	'Q211302','Q13586859','Q3215290','Q47053','Q43197',
	'Q39594','Q187223','Q940023','Q5926864','Q37901','Q1210950','Q3705882','Q2507626','Q31615','Q33837','Q13137873','Q2936105','Q204894',
	'Q17018380','Q6341928','Q1140477','Q491713',
}

local orography_ids = { 
	'Q46831', --mountain range 
	'Q8502', --mountain
	'Q1437459', --non-geologically related mountain range  
	'Q39816', --valley
	'Q271669', -- landform
	'Q75520', --plateau
	'Q34763', -- peninsula
	'Q55818', -- impact crater
	'Q3240715', --crater
	
	'Q5107', --continent
	'Q205895', --landmass
}

local software_ids = { 
	'Q9135', -- operating system 
	'Q218616', -- proprietary software 
	'Q20983788', -- free operating system 
	'Q7397', -- software 
	'Q341', -- free software 
	'Q6368', -- web browser 
	'Q20825628', -- GNU package
}

local disease_ids = { 
	'Q12136', -- disease 
	'Q18123741', -- infectious disease 
	'Q179630', -- syndrome  
	'Q12136', -- notifiable disease  
	'Q506680', -- endemic disease 
	'Q169872', -- symptom
}

local website_ids = { 
	'Q35127', --website 
	'Q171', --wiki 
	'Q15633582', --MediaWiki website 
	'Q327349', --web directory 
	'Q4182287', --web search engine 
	'Q10876391', -- Wikipedia language edition 
	'Q19967801', --online service 
	'Q1273203', --email service provider 
	'Q1668024', --service on internet 
	'Q1343205', --file hosting service 
	'Q1210425', --internet hosting service 
	'Q17232649', --news website 
	'Q62694393', --URL shortener 
	'Q193424', --web service 
}

local organization_ids = { 
	'Q4830453', --business 
	'Q6881511', --enterprise 
	'Q17990971', --public enterprise
	'Q18388277', --technology company 
	'Q1589009', --privately held company 
	'Q1110794', --daily newspaper 
	'Q1153191', --online newspaper 
	'Q11032', --newspaper 
	'Q43229', --organization 
	'Q484652', --international organization
	'Q245065', --intergovernmental organization
	'Q2001305', --television channel
	'Q7188', --government
	'Q7210356', --political organisation
	'Q7278', -- political party
	'Q46970', --airline
	'Q157031', --foundation 
	'Q708676', --charitable organization 
	'Q163740', --nonprofit organization
	'Q17127659', --terrorist organization
	'Q1788992', --criminal organization
	
	'Q20857065', -- United States federal agency
	
	'Q327333', --government agency
	
	'Q22687', --bank 
	'Q66344', --central bank
	'Q895526', --organ
	
	'Q476028', --association football club
	'Q17270000', --football club
	'Q847017', --sports club
	'Q4438121', --sports organization
	'Q6979593', --national association football team
	'Q1194951', --national sports team
	'Q15944511', --association football team

	'Q988108', --club
	
	'Q15899789', --principal organ of the United Nations
	'Q15285626', --organization established by the United Nations
}
 
local building_ids = { 
	'Q41176', -- building
	'Q294422', --public building
	'Q1021645', --office building
	'Q79146420', --multistorey building
	'Q11755959', --multi-storey urban building
	'Q18142', --high-rise building

	'Q4989906', --monument
	'Q811979', --architectural structure
	'skyscraper', --Q11303
	
	'Q44539', --temple 
	'Q867143', --Roman temple 
	'Q5393308', --Buddhist temple
	'Q842402', --Hindu temple
	'Q96352513', --religious building ruin 
	'Q24398318', --religious building 
	'Q1370598', --place of worship
	'Q120560', --minor basilica
	'Q163687', --basilica
	'Q16970', -- church building
	'Q1088552', --Catholic church building 
	'Q108325', --chapel 
	'Q56750657', --hermitage 
	'Q56242063', --Protestant church building
	'Q56242250', -- anglican or episcopal cathedral
	'Q56242045', --Anglican church
	'Q58079064', -- protestant cathedral
	'Q32815', --mosque
	
	'Q1154710', --association football stadium
	
	'Q12819564', --station
	'Q1248784', --airport 
	'Q644371', --international airport 
	'Q94993988', --commercial traffic aerodrome 
	'Q62447', --aerodrome 

	

	
	'Q9259', --UNESCO World Heritage Site 
	
	'Q88291', --citadel
	'Q57821', --fortification
	'Q1784293', -- cordon
	
	'Q1440300', --observation tower 
	'Q12518', --tower 
	'Q33506', --museum 
	'Q200334', --bell tower 
	'Q72926449', --church tower
	'Q797765', --inclined tower 
	
	'Q16560', --palace
	
	'Q15911738', --hydroelectric power station 
	'Q159719', --power station  
	
	'Q3497167', --gravity dam 
	'Q12323', --dam 
	
	'Q483110', --stadium
	'Q641226', --arena
}

local chemical_ids = { 
	'Q11173', -- chemical compound
}

local event_ids = {
	'Q12184', --pandemic
	'Q44512', --epidemic
}

local categories = {
	["number"] = number_ids,
	["year"] = year_ids,
	["human"] = human_ids,
	["language"] = language_ids,
	["language family"] = lanfamily_ids,
	["taxon"] = taxon_ids,
	["city"] = city_ids,
	["country"] = country_ids,
	["musical band"] = musical_band_ids,
	["album"] = album_ids,
	["tv"] = tv_ids,
	["book"] = book_ids,
	["intellectual work"] = int_work_ids,
	["religion"] = religion_ids,
	["script"] = script_ids,
	["school"] = school_ids,
	["hydrography"] = hydrography_ids,
	["orography"] = orography_ids,
	["software"] = software_ids,
	["disease"] = disease_ids,
	["website"] = website_ids,
	["organization"] = organization_ids,
	["building"] = building_ids,
	["chemical"] = chemical_ids,
	["event"] = event_ids,
}

local queryindex = {
	"number",
	"year",
	"human",
	"language",
	"language family",
	"taxon",
	"city",
	"country",
	"musical band",
	"album",
	"tv",
	"book",
	"intellectual work",
	"religion",
	"script",
	"school",
	"hydrography",
	"orography",
	"software",
	"disease",
	"website",
	"organization",
	"building",
	"chemical",
	"event"
}

local p = {}
local lualinq = require "Module:LuaLinq"

function belongsTo(item_ids, category_ids)
	return lualinq.main(category_ids):any(function(x) return lualinq.main(item_ids):any(function(y) return x == y; end) end)
end

function findCategoryName(item_ids)
	for _,k in ipairs(queryindex) do
		if belongsTo(item_ids, categories[k]) then return k end
	end
    return nil
end

function isTaxonCommonName(item)
	return lualinq.main(item.claims["P31"]):where(function(c) return c.mainsnak.datavalue.value['id'] == "Q55983715" and c.qualifiers ~= nil and lualinq.main(c.qualifiers["P642"]):first().datavalue.value['id']  ~= nil ; end):any()
end

-- if parent organization (P749) OR subsidiary (P355) OR headquarters location (P159) OR legal form (P1454) => org
function isOrganization(item)
	return item.claims["P749"] ~= nil or item.claims["P355"] ~= nil  or item.claims["P159"] ~= nil or item.claims["P1454"] ~= nil
end

function p.findCategory(frame)
	local args = frame.args or frame
	local item = mw.wikibase.getEntity(args.item or args[1])
    
    if item == nil or item.claims == nil then
        return nil
    end
    
    local queryable_ids = lualinq.main(item.claims["P31"]):select(function(c) return c.mainsnak.datavalue.value['id']; end)
    
    if not queryable_ids:any() then
        return nil
    end
    
    local name = findCategoryName(queryable_ids:toArray())
    
    if name == nil then --try once more with parent
         name = findCategoryName(lualinq.main(mw.wikibase.getEntity(queryable_ids:first()).claims["P279"]):select(function(c) return c.mainsnak.datavalue.value['id']; end):toArray())
    end
    
    if name == nil and isTaxonCommonName(item) then
         name = "taxon common name"
    end
    
    if name == nil and isOrganization(item) then
         name = "organization"
    end
    
    return name
end

function p.find(frame)
	 return p.findCategory(frame)
end


return p