https://en.wikipedia.org/w/index.php?action=history&feed=atom&title=Module%3ALua_lexer
Module:Lua lexer - Revision history
2025-06-03T05:43:01Z
Revision history for this page on the wiki
MediaWiki 1.45.0-wmf.3
https://en.wikipedia.org/w/index.php?title=Module:Lua_lexer&diff=1247642568&oldid=prev
Awesome Aasim: copy from testwiki, based on work from wikia:dev:Module:Lexer (MIT licensed)
2024-09-25T06:40:22Z
<p>copy from testwiki, based on work from <a href="https://community.fandom.com/wiki/w:c:dev:Module:Lexer" class="extiw" title="wikia:dev:Module:Lexer">wikia:dev:Module:Lexer</a> (MIT licensed)</p>
<p><b>New page</b></p><div>-- <nowiki><br />
--- Lexer for Lua source code written in pure Lua.<br />
-- @script lexer<br />
-- @license MIT<br />
-- @author https://github.com/LoganDark<br />
-- @param {string} text Lua source code to lex.<br />
-- @return {string} Table of line arrays containing lexemes.<br />
<br />
--- Mapper for individual token list string.<br />
-- @param {string} src List of characters or keywords to map.<br />
-- @param[opt] {table} list Table to extend by reference.<br />
-- @return {{char=true,...}}, map <br />
-- @local<br />
local function lookupify(src, list)<br />
list = list or {}<br />
<br />
if type(src) == 'string' then<br />
for i = 1, src:len() do<br />
list[src:sub(i, i)] = true<br />
end<br />
elseif type(src) == 'table' then<br />
for i = 1, #src do<br />
list[src[i]] = true<br />
end<br />
end<br />
<br />
return list<br />
end<br />
<br />
--- Base identifier character set.<br />
-- @variable {string} base_ident<br />
local base_ident = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'<br />
<br />
--- Base identifier character set.<br />
-- @variable {string} base_digits<br />
local base_digits = '0123456789'<br />
<br />
--- Base identifier character set.<br />
-- @variable {string} base_operators<br />
local base_operators = '+-*/^%#'<br />
<br />
--- Map of Lua character patterns.<br />
-- @table chars<br />
-- @field {table} whitespace Boolean map of whitespace<br />
-- tokens.<br />
-- @field {table} validEscapes Boolean map of valid escape<br />
-- characters.<br />
-- @field {table} ident Boolean map of valid identifier<br />
-- characters.<br />
-- @field {table} symbols Boolean map of valid symbol and<br />
-- operator characters.<br />
-- @local<br />
local chars = {<br />
whitespace = lookupify(' \n\t\r'),<br />
validEscapes = lookupify('abfnrtv"\'\\'),<br />
ident = lookupify(<br />
base_ident .. base_digits,<br />
{<br />
start = lookupify(base_ident),<br />
}<br />
),<br />
<br />
digits = lookupify(<br />
base_digits,<br />
{<br />
hex = lookupify(base_digits .. 'abcdefABCDEF')<br />
}<br />
),<br />
<br />
symbols = lookupify(<br />
base_operators .. ',{}[]();.:', {<br />
equality = lookupify('~=><'),<br />
operators = lookupify(base_operators)<br />
}<br />
)<br />
}<br />
<br />
--- List of Lua keywords.<br />
-- @table keywords<br />
-- @field structure Boolean map of structure keywords.<br />
-- @field values Boolean map of primitive keywords.<br />
local keywords = {<br />
structure = lookupify({<br />
'and', 'break', 'do', 'else', 'elseif', 'end', 'for', 'function',<br />
'goto', 'if', 'in', 'local', 'not', 'or', 'repeat', 'return', 'then',<br />
'until', 'while'<br />
}),<br />
<br />
values = lookupify({<br />
'true', 'false', 'nil'<br />
})<br />
}<br />
<br />
-- Lexer function export.<br />
return function(text)<br />
local pos = 1<br />
local start = 1<br />
local buffer = {}<br />
local lines = {}<br />
<br />
local function look(delta)<br />
delta = pos + (delta or 0)<br />
<br />
return text:sub(delta, delta)<br />
end<br />
<br />
local function get()<br />
pos = pos + 1<br />
<br />
return look(-1)<br />
end<br />
<br />
local function getDataLevel()<br />
local num = 0<br />
<br />
while look(num) == '=' do<br />
num = num + 1<br />
end<br />
<br />
if look(num) == '[' then<br />
pos = pos + num + 1<br />
<br />
return num<br />
end<br />
end<br />
<br />
local function getCurrentTokenText()<br />
return text:sub(start, pos - 1)<br />
end<br />
<br />
local currentLineLength = 0<br />
local lineoffset = 0<br />
<br />
local function pushToken(type, text)<br />
text = text or getCurrentTokenText()<br />
<br />
local tk = buffer[#buffer]<br />
<br />
if not tk or tk.type ~= type then<br />
tk = {<br />
type = type,<br />
data = text,<br />
posFirst = start - lineoffset,<br />
posLast = pos - 1 - lineoffset<br />
}<br />
<br />
if tk.data ~= '' then<br />
buffer[#buffer + 1] = tk<br />
end<br />
else<br />
tk.data = tk.data .. text<br />
tk.posLast = tk.posLast + text:len()<br />
end<br />
<br />
currentLineLength = currentLineLength + text:len()<br />
start = pos<br />
<br />
return tk<br />
end<br />
<br />
local function newline()<br />
lines[#lines + 1] = buffer<br />
buffer = {}<br />
<br />
get()<br />
pushToken('newline')<br />
buffer[1] = nil<br />
<br />
lineoffset = lineoffset + currentLineLength<br />
currentLineLength = 0<br />
end<br />
<br />
local function getData(level, type)<br />
while true do<br />
local char = get()<br />
<br />
if char == '' then<br />
return<br />
elseif char == '\n' then<br />
pos = pos - 1<br />
pushToken(type)<br />
newline()<br />
elseif char == ']' then<br />
local valid = true<br />
<br />
for i = 1, level do<br />
if look() == '=' then<br />
pos = pos + 1<br />
else<br />
valid = false<br />
break<br />
end<br />
end<br />
<br />
if valid and look() == ']' then<br />
pos = pos - level - 1<br />
<br />
return<br />
end<br />
end<br />
end<br />
end<br />
<br />
local function chompWhitespace()<br />
while true do<br />
local char = look()<br />
<br />
if char == '\n' then<br />
pushToken('whitespace')<br />
newline()<br />
elseif chars.whitespace[char] then<br />
pos = pos + 1<br />
else<br />
break<br />
end<br />
end<br />
<br />
pushToken('whitespace')<br />
end<br />
<br />
while true do<br />
chompWhitespace()<br />
<br />
local char = get()<br />
<br />
if char == '' then<br />
break<br />
elseif char == '-' and look() == '-' then<br />
pos = pos + 1<br />
<br />
if look() == '[' then<br />
pos = pos + 1<br />
<br />
local level = getDataLevel()<br />
<br />
if level then<br />
getData(level, 'comment')<br />
<br />
pos = pos + level + 2<br />
pushToken('comment')<br />
else<br />
while true do<br />
local char2 = get()<br />
<br />
if char2 == '' or char2 == '\n' then<br />
pos = pos - 1<br />
pushToken('comment')<br />
<br />
if char2 == '\n' then<br />
newline()<br />
end<br />
<br />
break<br />
end<br />
end<br />
end<br />
else<br />
while true do<br />
local char2 = get()<br />
<br />
if char2 == '' or char2 == '\n' then<br />
pos = pos - 1<br />
pushToken('comment')<br />
<br />
if char2 == '\n' then<br />
newline()<br />
end<br />
<br />
break<br />
end<br />
end<br />
end<br />
<br />
pushToken('comment')<br />
elseif char == '\'' or char == '"' then<br />
pushToken('string_start')<br />
<br />
while true do<br />
local char2 = get()<br />
<br />
if char2 == '\\' then<br />
pos = pos - 1<br />
pushToken('string')<br />
get()<br />
<br />
local char3 = get()<br />
<br />
if chars.digits[char3] then<br />
for i = 1, 2 do<br />
if chars.digits[look()] then<br />
pos = pos + 1<br />
end<br />
end<br />
elseif char3 == 'x' then<br />
if chars.digits.hex[look()] and chars.digits.hex[look(1)] then<br />
pos = pos + 2<br />
else<br />
pushToken('unidentified')<br />
end<br />
elseif char3 == '\n' then<br />
pos = pos - 1<br />
pushToken('escape')<br />
newline()<br />
elseif not chars.validEscapes[char3] then<br />
pushToken('unidentified')<br />
end<br />
<br />
pushToken('escape')<br />
elseif char2 == '\n' then<br />
pos = pos - 1<br />
pushToken('string')<br />
newline()<br />
<br />
break<br />
elseif char2 == char or char2 == '' then<br />
pos = pos - 1<br />
pushToken('string')<br />
get()<br />
<br />
break<br />
end<br />
end<br />
<br />
pushToken('string_end')<br />
elseif chars.ident.start[char] then<br />
while chars.ident[look()] do<br />
pos = pos + 1<br />
end<br />
<br />
local word = getCurrentTokenText()<br />
<br />
if keywords.structure[word] then<br />
pushToken('keyword')<br />
elseif keywords.values[word] then<br />
pushToken('value')<br />
else<br />
pushToken('ident')<br />
end<br />
elseif chars.digits[char] or (char == '.' and chars.digits[look()]) then<br />
if char == '0' and look() == 'x' then<br />
pos = pos + 1<br />
<br />
while chars.digits.hex[look()] do<br />
pos = pos + 1<br />
end<br />
else<br />
while chars.digits[look()] do<br />
pos = pos + 1<br />
end<br />
<br />
if look() == '.' then<br />
pos = pos + 1<br />
<br />
while chars.digits[look()] do<br />
pos = pos + 1<br />
end<br />
end<br />
<br />
if look():lower() == 'e' then<br />
pos = pos + 1<br />
<br />
if look() == '-' then<br />
pos = pos + 1<br />
end<br />
<br />
while chars.digits[look()] do<br />
pos = pos + 1<br />
end<br />
end<br />
end<br />
<br />
pushToken('number')<br />
elseif char == '[' then<br />
local level = getDataLevel()<br />
<br />
if level then<br />
pushToken('string_start')<br />
<br />
getData(level, 'string')<br />
pushToken('string')<br />
<br />
pos = pos + level + 2<br />
pushToken('string_end')<br />
else<br />
pushToken('symbol')<br />
end<br />
elseif char == '.' then<br />
if look() == '.' then<br />
pos = pos + 1<br />
<br />
if look() == '.' then<br />
pos = pos + 1<br />
end<br />
end<br />
<br />
if getCurrentTokenText():len() == 3 then<br />
pushToken('vararg')<br />
else<br />
pushToken('symbol')<br />
end<br />
elseif char == ':' and look() == ':' then<br />
get()<br />
<br />
pushToken('label_start')<br />
<br />
chompWhitespace()<br />
<br />
if chars.ident.start[look()] then<br />
get()<br />
<br />
while chars.ident[look()] do<br />
get()<br />
end<br />
<br />
pushToken('label')<br />
<br />
chompWhitespace()<br />
<br />
if look() == ':' and look(1) == ':' then<br />
get()<br />
get()<br />
<br />
pushToken('label_end')<br />
end<br />
end<br />
elseif chars.symbols.equality[char] then<br />
if look() == '=' then<br />
pos = pos + 1<br />
end<br />
<br />
pushToken('operator')<br />
elseif chars.symbols[char] then<br />
if chars.symbols.operators[char] then<br />
pushToken('operator')<br />
else<br />
pushToken('symbol')<br />
end<br />
else<br />
pushToken('unidentified')<br />
end<br />
end<br />
<br />
lines[#lines + 1] = buffer<br />
<br />
return lines<br />
end</div>
Awesome Aasim