Module:Plain text/sandbox: Difference between revisions
Appearance
Content deleted Content added
No edit summary Tag: Reverted |
Guarapiranga (talk | contribs) some performance and results improvements Tag: Reverted |
||
Line 13: | Line 13: | ||
:gsub(' ', ' ') --replace nbsp spaces with regular spaces |
:gsub(' ', ' ') --replace nbsp spaces with regular spaces |
||
:gsub('<br ?/?>', ', ') --replace br with commas |
:gsub('<br ?/?>', ', ') --replace br with commas |
||
:gsub('<(%a+)[^>]+>(.-)</%1>', function(tag, contents) |
|||
:gsub('^%s*<span.->(.-)</span>%s*$', '%1') --remove outer spans while keeping text inside |
|||
if tag:lower() == 'span' then |
|||
:gsub('<span.->(.-)</span>', '%1') --repeat for nested span tags. |
|||
return contents |
|||
:gsub('<i.->(.-)</i>', '%1') --remove italics while keeping text inside |
|||
else |
|||
⚫ | |||
return '' |
|||
⚫ | |||
end |
|||
:gsub('%[%[%s*[Ff]ile%s*:.-%]%]', '') --strip out files |
|||
end) |
|||
:gsub('%[%[%s*[Ii]mage%s*:.-%]%]', '') --strip out use of image: |
|||
:gsub(' |
:gsub('<i[^>]+>([^<]+)</i>', '%1') --remove italics while keeping text inside |
||
:gsub(' |
:gsub('<[^>]+>[^<]+<[^>]+>', '') --strip out remaining tags and the text inside |
||
⚫ | |||
:gsub('__[^_]+__', '') --remove __ markups |
|||
:gsub('^=+[^=]+=+', ''):gsub('\n=+[^=]+=+', '') --remove section titles |
|||
:gsub('%b[]', |
|||
function(bracketed) |
|||
return bracketed:gsub('^%[%[%s*(%a+):.-%]%]$', |
|||
function(link_prefix) |
|||
link_prefix = link_prefix:lower() |
|||
if link_prefix == 'image' or link_prefix == 'file' |
|||
or link_prefix == 'media' or link_prefix == 'category' then |
|||
return "" |
|||
end -- otherwise leave it alone |
|||
end) |
|||
end) |
|||
⚫ | |||
:gsub('[%[%]]', '') --then strip out remaining [ and ] |
:gsub('[%[%]]', '') --then strip out remaining [ and ] |
||
:gsub("'''''", "") --strip out bold italic markup |
:gsub("'''''", "") --strip out bold italic markup |
||
:gsub("'''?", "") --not stripping out '''' gives correct output for bolded text in quotes |
:gsub("'''?", "") --not stripping out '''' gives correct output for bolded text in quotes |
||
:gsub('----', '') --remove ---- lines |
:gsub('----', '') --remove ---- lines |
||
:gsub( |
:gsub('^%s+', ''):gsub('\n%s+', '\n') --strip leading |
||
:gsub( |
:gsub('%s+$', ''):gsub('%s+\n', '\n') --and trailing spaces |
||
:gsub( |
:gsub('(%s)%s+', '%1') --strip redundant spaces |
||
return text |
return text |
||
end |
end |
Revision as of 13:37, 21 June 2021
![]() | This is the module sandbox page for Module:Plain text (diff). |
![]() | This Lua module is used on approximately 1,810,000 pages, or roughly 3% of all pages. To avoid major disruption and server load, any changes should be tested in the module's /sandbox or /testcases subpages, or in your own module sandbox. The tested changes can be added to this page in a single edit. Consider discussing changes on the talk page before implementing them. |
![]() | This module depends on the following other modules: |
Implements {{Plain text}}. Developed for producing short descriptions from text that may have markup, or other stuff that needs removing from short descriptions.
Usage
- Code
'''[[Foo|hah]]'''<sup>e</sup> <span style="color:red">is</span> '''''[[gah]]'''''<nowiki>?</nowiki>
→- hahe is gah?
- Using module
{{#invoke:Plain text|main|1='''[[Foo|hah]]'''<sup>e</sup> <span style="color:red">is</span> '''''[[gah]]'''''<nowiki>?</nowiki>}}
→- hahe is gah
See also
--converts text with wikilinks to plain text, e.g "[[foo|gah]] is [[bar]]" to "gah is bar"
--removes anything enclosed in tags that isn't nested, mediawiki strip markers (references etc), files, italic and bold markup
local p = {}
function p.main(frame)
local text = frame.args[1]
return p._main(text)
end
function p._main(text)
if not text then return end
text = mw.text.killMarkers(text)
:gsub(' ', ' ') --replace nbsp spaces with regular spaces
:gsub('<br ?/?>', ', ') --replace br with commas
:gsub('<(%a+)[^>]+>(.-)</%1>', function(tag, contents)
if tag:lower() == 'span' then
return contents
else
return ''
end
end)
:gsub('<i[^>]+>([^<]+)</i>', '%1') --remove italics while keeping text inside
:gsub('<[^>]+>[^<]+<[^>]+>', '') --strip out remaining tags and the text inside
:gsub('%b<>', '') --remove any other tag markup
:gsub('__[^_]+__', '') --remove __ markups
:gsub('^=+[^=]+=+', ''):gsub('\n=+[^=]+=+', '') --remove section titles
:gsub('%b[]',
function(bracketed)
return bracketed:gsub('^%[%[%s*(%a+):.-%]%]$',
function(link_prefix)
link_prefix = link_prefix:lower()
if link_prefix == 'image' or link_prefix == 'file'
or link_prefix == 'media' or link_prefix == 'category' then
return ""
end -- otherwise leave it alone
end)
end)
:gsub('%[%[[^%]|]+|', '') --strip out piped link text
:gsub('[%[%]]', '') --then strip out remaining [ and ]
:gsub("'''''", "") --strip out bold italic markup
:gsub("'''?", "") --not stripping out '''' gives correct output for bolded text in quotes
:gsub('----', '') --remove ---- lines
:gsub('^%s+', ''):gsub('\n%s+', '\n') --strip leading
:gsub('%s+$', ''):gsub('%s+\n', '\n') --and trailing spaces
:gsub('(%s)%s+', '%1') --strip redundant spaces
return text
end
return p