https://en.wikipedia.org/w/index.php?action=history&feed=atom&title=Module%3ALua_lexer Module:Lua lexer - Revision history 2025-06-03T05:43:01Z Revision history for this page on the wiki MediaWiki 1.45.0-wmf.3 https://en.wikipedia.org/w/index.php?title=Module:Lua_lexer&diff=1247642568&oldid=prev Awesome Aasim: copy from testwiki, based on work from wikia:dev:Module:Lexer (MIT licensed) 2024-09-25T06:40:22Z <p>copy from testwiki, based on work from <a href="https://community.fandom.com/wiki/w:c:dev:Module:Lexer" class="extiw" title="wikia:dev:Module:Lexer">wikia:dev:Module:Lexer</a> (MIT licensed)</p> <p><b>New page</b></p><div>-- &lt;nowiki&gt;<br /> --- Lexer for Lua source code written in pure Lua.<br /> -- @script lexer<br /> -- @license MIT<br /> -- @author https://github.com/LoganDark<br /> -- @param {string} text Lua source code to lex.<br /> -- @return {string} Table of line arrays containing lexemes.<br /> <br /> --- Mapper for individual token list string.<br /> -- @param {string} src List of characters or keywords to map.<br /> -- @param[opt] {table} list Table to extend by reference.<br /> -- @return {{char=true,...}}, map <br /> -- @local<br /> local function lookupify(src, list)<br /> list = list or {}<br /> <br /> if type(src) == &#039;string&#039; then<br /> for i = 1, src:len() do<br /> list[src:sub(i, i)] = true<br /> end<br /> elseif type(src) == &#039;table&#039; then<br /> for i = 1, #src do<br /> list[src[i]] = true<br /> end<br /> end<br /> <br /> return list<br /> end<br /> <br /> --- Base identifier character set.<br /> -- @variable {string} base_ident<br /> local base_ident = &#039;abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_&#039;<br /> <br /> --- Base identifier character set.<br /> -- @variable {string} base_digits<br /> local base_digits = &#039;0123456789&#039;<br /> <br /> --- Base identifier character set.<br /> -- @variable {string} base_operators<br /> local base_operators = &#039;+-*/^%#&#039;<br /> <br /> --- Map of Lua character patterns.<br /> -- @table chars<br /> -- @field {table} whitespace Boolean map of whitespace<br /> -- tokens.<br /> -- @field {table} validEscapes Boolean map of valid escape<br /> -- characters.<br /> -- @field {table} ident Boolean map of valid identifier<br /> -- characters.<br /> -- @field {table} symbols Boolean map of valid symbol and<br /> -- operator characters.<br /> -- @local<br /> local chars = {<br /> whitespace = lookupify(&#039; \n\t\r&#039;),<br /> validEscapes = lookupify(&#039;abfnrtv&quot;\&#039;\\&#039;),<br /> ident = lookupify(<br /> base_ident .. base_digits,<br /> {<br /> start = lookupify(base_ident),<br /> }<br /> ),<br /> <br /> digits = lookupify(<br /> base_digits,<br /> {<br /> hex = lookupify(base_digits .. &#039;abcdefABCDEF&#039;)<br /> }<br /> ),<br /> <br /> symbols = lookupify(<br /> base_operators .. &#039;,{}[]();.:&#039;, {<br /> equality = lookupify(&#039;~=&gt;&lt;&#039;),<br /> operators = lookupify(base_operators)<br /> }<br /> )<br /> }<br /> <br /> --- List of Lua keywords.<br /> -- @table keywords<br /> -- @field structure Boolean map of structure keywords.<br /> -- @field values Boolean map of primitive keywords.<br /> local keywords = {<br /> structure = lookupify({<br /> &#039;and&#039;, &#039;break&#039;, &#039;do&#039;, &#039;else&#039;, &#039;elseif&#039;, &#039;end&#039;, &#039;for&#039;, &#039;function&#039;,<br /> &#039;goto&#039;, &#039;if&#039;, &#039;in&#039;, &#039;local&#039;, &#039;not&#039;, &#039;or&#039;, &#039;repeat&#039;, &#039;return&#039;, &#039;then&#039;,<br /> &#039;until&#039;, &#039;while&#039;<br /> }),<br /> <br /> values = lookupify({<br /> &#039;true&#039;, &#039;false&#039;, &#039;nil&#039;<br /> })<br /> }<br /> <br /> -- Lexer function export.<br /> return function(text)<br /> local pos = 1<br /> local start = 1<br /> local buffer = {}<br /> local lines = {}<br /> <br /> local function look(delta)<br /> delta = pos + (delta or 0)<br /> <br /> return text:sub(delta, delta)<br /> end<br /> <br /> local function get()<br /> pos = pos + 1<br /> <br /> return look(-1)<br /> end<br /> <br /> local function getDataLevel()<br /> local num = 0<br /> <br /> while look(num) == &#039;=&#039; do<br /> num = num + 1<br /> end<br /> <br /> if look(num) == &#039;[&#039; then<br /> pos = pos + num + 1<br /> <br /> return num<br /> end<br /> end<br /> <br /> local function getCurrentTokenText()<br /> return text:sub(start, pos - 1)<br /> end<br /> <br /> local currentLineLength = 0<br /> local lineoffset = 0<br /> <br /> local function pushToken(type, text)<br /> text = text or getCurrentTokenText()<br /> <br /> local tk = buffer[#buffer]<br /> <br /> if not tk or tk.type ~= type then<br /> tk = {<br /> type = type,<br /> data = text,<br /> posFirst = start - lineoffset,<br /> posLast = pos - 1 - lineoffset<br /> }<br /> <br /> if tk.data ~= &#039;&#039; then<br /> buffer[#buffer + 1] = tk<br /> end<br /> else<br /> tk.data = tk.data .. text<br /> tk.posLast = tk.posLast + text:len()<br /> end<br /> <br /> currentLineLength = currentLineLength + text:len()<br /> start = pos<br /> <br /> return tk<br /> end<br /> <br /> local function newline()<br /> lines[#lines + 1] = buffer<br /> buffer = {}<br /> <br /> get()<br /> pushToken(&#039;newline&#039;)<br /> buffer[1] = nil<br /> <br /> lineoffset = lineoffset + currentLineLength<br /> currentLineLength = 0<br /> end<br /> <br /> local function getData(level, type)<br /> while true do<br /> local char = get()<br /> <br /> if char == &#039;&#039; then<br /> return<br /> elseif char == &#039;\n&#039; then<br /> pos = pos - 1<br /> pushToken(type)<br /> newline()<br /> elseif char == &#039;]&#039; then<br /> local valid = true<br /> <br /> for i = 1, level do<br /> if look() == &#039;=&#039; then<br /> pos = pos + 1<br /> else<br /> valid = false<br /> break<br /> end<br /> end<br /> <br /> if valid and look() == &#039;]&#039; then<br /> pos = pos - level - 1<br /> <br /> return<br /> end<br /> end<br /> end<br /> end<br /> <br /> local function chompWhitespace()<br /> while true do<br /> local char = look()<br /> <br /> if char == &#039;\n&#039; then<br /> pushToken(&#039;whitespace&#039;)<br /> newline()<br /> elseif chars.whitespace[char] then<br /> pos = pos + 1<br /> else<br /> break<br /> end<br /> end<br /> <br /> pushToken(&#039;whitespace&#039;)<br /> end<br /> <br /> while true do<br /> chompWhitespace()<br /> <br /> local char = get()<br /> <br /> if char == &#039;&#039; then<br /> break<br /> elseif char == &#039;-&#039; and look() == &#039;-&#039; then<br /> pos = pos + 1<br /> <br /> if look() == &#039;[&#039; then<br /> pos = pos + 1<br /> <br /> local level = getDataLevel()<br /> <br /> if level then<br /> getData(level, &#039;comment&#039;)<br /> <br /> pos = pos + level + 2<br /> pushToken(&#039;comment&#039;)<br /> else<br /> while true do<br /> local char2 = get()<br /> <br /> if char2 == &#039;&#039; or char2 == &#039;\n&#039; then<br /> pos = pos - 1<br /> pushToken(&#039;comment&#039;)<br /> <br /> if char2 == &#039;\n&#039; then<br /> newline()<br /> end<br /> <br /> break<br /> end<br /> end<br /> end<br /> else<br /> while true do<br /> local char2 = get()<br /> <br /> if char2 == &#039;&#039; or char2 == &#039;\n&#039; then<br /> pos = pos - 1<br /> pushToken(&#039;comment&#039;)<br /> <br /> if char2 == &#039;\n&#039; then<br /> newline()<br /> end<br /> <br /> break<br /> end<br /> end<br /> end<br /> <br /> pushToken(&#039;comment&#039;)<br /> elseif char == &#039;\&#039;&#039; or char == &#039;&quot;&#039; then<br /> pushToken(&#039;string_start&#039;)<br /> <br /> while true do<br /> local char2 = get()<br /> <br /> if char2 == &#039;\\&#039; then<br /> pos = pos - 1<br /> pushToken(&#039;string&#039;)<br /> get()<br /> <br /> local char3 = get()<br /> <br /> if chars.digits[char3] then<br /> for i = 1, 2 do<br /> if chars.digits[look()] then<br /> pos = pos + 1<br /> end<br /> end<br /> elseif char3 == &#039;x&#039; then<br /> if chars.digits.hex[look()] and chars.digits.hex[look(1)] then<br /> pos = pos + 2<br /> else<br /> pushToken(&#039;unidentified&#039;)<br /> end<br /> elseif char3 == &#039;\n&#039; then<br /> pos = pos - 1<br /> pushToken(&#039;escape&#039;)<br /> newline()<br /> elseif not chars.validEscapes[char3] then<br /> pushToken(&#039;unidentified&#039;)<br /> end<br /> <br /> pushToken(&#039;escape&#039;)<br /> elseif char2 == &#039;\n&#039; then<br /> pos = pos - 1<br /> pushToken(&#039;string&#039;)<br /> newline()<br /> <br /> break<br /> elseif char2 == char or char2 == &#039;&#039; then<br /> pos = pos - 1<br /> pushToken(&#039;string&#039;)<br /> get()<br /> <br /> break<br /> end<br /> end<br /> <br /> pushToken(&#039;string_end&#039;)<br /> elseif chars.ident.start[char] then<br /> while chars.ident[look()] do<br /> pos = pos + 1<br /> end<br /> <br /> local word = getCurrentTokenText()<br /> <br /> if keywords.structure[word] then<br /> pushToken(&#039;keyword&#039;)<br /> elseif keywords.values[word] then<br /> pushToken(&#039;value&#039;)<br /> else<br /> pushToken(&#039;ident&#039;)<br /> end<br /> elseif chars.digits[char] or (char == &#039;.&#039; and chars.digits[look()]) then<br /> if char == &#039;0&#039; and look() == &#039;x&#039; then<br /> pos = pos + 1<br /> <br /> while chars.digits.hex[look()] do<br /> pos = pos + 1<br /> end<br /> else<br /> while chars.digits[look()] do<br /> pos = pos + 1<br /> end<br /> <br /> if look() == &#039;.&#039; then<br /> pos = pos + 1<br /> <br /> while chars.digits[look()] do<br /> pos = pos + 1<br /> end<br /> end<br /> <br /> if look():lower() == &#039;e&#039; then<br /> pos = pos + 1<br /> <br /> if look() == &#039;-&#039; then<br /> pos = pos + 1<br /> end<br /> <br /> while chars.digits[look()] do<br /> pos = pos + 1<br /> end<br /> end<br /> end<br /> <br /> pushToken(&#039;number&#039;)<br /> elseif char == &#039;[&#039; then<br /> local level = getDataLevel()<br /> <br /> if level then<br /> pushToken(&#039;string_start&#039;)<br /> <br /> getData(level, &#039;string&#039;)<br /> pushToken(&#039;string&#039;)<br /> <br /> pos = pos + level + 2<br /> pushToken(&#039;string_end&#039;)<br /> else<br /> pushToken(&#039;symbol&#039;)<br /> end<br /> elseif char == &#039;.&#039; then<br /> if look() == &#039;.&#039; then<br /> pos = pos + 1<br /> <br /> if look() == &#039;.&#039; then<br /> pos = pos + 1<br /> end<br /> end<br /> <br /> if getCurrentTokenText():len() == 3 then<br /> pushToken(&#039;vararg&#039;)<br /> else<br /> pushToken(&#039;symbol&#039;)<br /> end<br /> elseif char == &#039;:&#039; and look() == &#039;:&#039; then<br /> get()<br /> <br /> pushToken(&#039;label_start&#039;)<br /> <br /> chompWhitespace()<br /> <br /> if chars.ident.start[look()] then<br /> get()<br /> <br /> while chars.ident[look()] do<br /> get()<br /> end<br /> <br /> pushToken(&#039;label&#039;)<br /> <br /> chompWhitespace()<br /> <br /> if look() == &#039;:&#039; and look(1) == &#039;:&#039; then<br /> get()<br /> get()<br /> <br /> pushToken(&#039;label_end&#039;)<br /> end<br /> end<br /> elseif chars.symbols.equality[char] then<br /> if look() == &#039;=&#039; then<br /> pos = pos + 1<br /> end<br /> <br /> pushToken(&#039;operator&#039;)<br /> elseif chars.symbols[char] then<br /> if chars.symbols.operators[char] then<br /> pushToken(&#039;operator&#039;)<br /> else<br /> pushToken(&#039;symbol&#039;)<br /> end<br /> else<br /> pushToken(&#039;unidentified&#039;)<br /> end<br /> end<br /> <br /> lines[#lines + 1] = buffer<br /> <br /> return lines<br /> end</div> Awesome Aasim