Jump to content

Module:Delink/sandbox

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Darklama (talk | contribs) at 21:03, 26 April 2013 (try another way, most cases covered). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
-- This module de-links most wikitext.

p = {};

local function internalLink(s)
    return (mw.ustring.gsub( s, "^([^|]*)(|?)(.*)$", function(l, p, t)
        -- check for malformed links and text
        if mw.ustring.match( l, "[<>{}%c\n]" ) or mw.ustring.match( t, "\n" ) then
            return s;
        end
        
        -- remove whitespace from begining and end of link and text
        l = mw.text.trim(l);
        t = mw.text.trim(t);
        
        local file = false;
        
        if l == "" then
            return s;
        elseif mw.ustring.match( l, "^:" ) then
            l = mw.ustring.gsub( l, "^:+", "" );
        elseif mw.ustring.match( l, "^[Cc][Aa][Tt][Ee][Gg][Oo][Rr][Yy]:" ) then
            return "";
        elseif mw.ustring.match( l, "^[Ff][Ii][Ll][Ee]:" ) then
            file = true;
        elseif mw.ustring.match( l, "^[Ii][Mm][Aa][Gg][Ee]:" ) then
            file = true;
        end
        
        if file == true then
            return "";
        elseif p == "|" and t ~= "" then
            return t;
        else
            l = mw.ustring.gsub( l, "(#.+)$", function(h)
                return mw.ustring.gsub( h, "%.([0-9A-Fa-f][0-9A-Fa-f])", "%%%1" );
            end);
            
            if p == "" and t == "" then
                return mw.uri.decode(l, "PATH");
            end
            
            -- Need to transform text based on link per [[Help:Pipe trick]].
            t = mw.ustring.match( l, "#(.+)$" );
            if t ~= nil then
                return mw.uri.decode(t, "PATH");
            end
            
            t = mw.ustring.gsub( l, "^([^#]*)#$", "%1", 1 );
            t = mw.ustring.gsub( t, "(.-) ?%b()$", "%1", 1 );
            t = mw.ustring.gsub( t, "^(.-),.*$", "%1", 1 );
            t = mw.uri.decode(t, "PATH");
            return t;
        end
    end, 1));
end

local function externalLink(s)
    -- If the text contains a line break it is not formatted as a URL, regardless of other content.
    if mw.ustring.match(s, "\n") then
        return s;
    end
    
    return (mw.ustring.gsub(mw.text.trim(s), "^([^ \t]*)[ \t]*(.*)$", function(l, t)
        t = mw.text.trim(t);
        
        if t == "" then
            return s;
        elseif mw.ustring.sub( l, 1, 2 ) == "//" then
            l = "http:" .. l;
        end
        
        l = mw.uri.new( l );
        
        if l.protocol == nil or l.host == nil or mw.ustring.match( l.host, "%." ) == nil then
            return s;
        else
            return t;
        end
    end, 1));
end

local function delinkLinkClass(s, pattern, delinker)
    if type(s) ~= "string" then
        error("Attempt to de-link non-string input.", 2);
    elseif type(pattern) ~= "string" then
        error('Patterns must begin a string.', 2);
    end
    -- Iterate over the text string, and replace any matched text. using the 
    -- delink function. We need to iterate one by one otherwise  nested links
    -- aren't detected properly.
    local result, f, e, b, a = {};
    
    repeat
        f, e = mw.ustring.find(s, pattern);
        
        if f == nil then
            table.insert( result, s );
            break;
        end
        
        if f > 1 then
            b = mw.ustring.gsub( mw.ustring.sub(s, 1, f-1), "(%[+)$", mw.text.nowiki, 1 );
        end
        
        if e ~= nil then
            a = mw.ustring.gsub( mw.ustring.sub(s, e+1), "^(%]+)", mw.text.nowiki, 1 );
        else
            a = "";
        end
        
        -- Replace text using one iteration.
        s = mw.ustring.gsub( mw.ustring.sub(s, f, e), pattern, delinker, 1 );
        
        -- Append to result and start after last result.
        table.insert( result, b );
        table.insert( result, s );
        s = a;
    until f == nil or s == "";
    
    return table.concat( result );
end

local function _delink(args)
    local text = args[1] or ""
    if args.refs == "yes" then
        -- Remove any [[Help:Strip markers]] representing ref tags. In most situations 
        -- this is not a good idea - only use it if you know what you are doing!
        text = mw.ustring.gsub(text, "UNIQ%w*%-ref%-%d*%-QINU", "")
    end
    if args.comments ~= "no" then
        text = mw.ustring.gsub(text, "<!%-%-.-%-%->", "") -- Remove html comments.
    end
    if args.wikilinks ~= "no" then
        text = delinkLinkClass(text, "%[%[([^%[%]]*)%]%]", internalLink) -- De-link wikilinks.
    end
    if args.urls ~= "no" then
        text = delinkLinkClass(text, "%[([^%[%]]*)%]", externalLink) -- De-link URLs.
    end
    if args.whitespace ~= "no" then
        -- Replace single new lines with a single space, but leave double new lines
        -- and new lines only containing spaces or tabs before a second new line.
        text = mw.ustring.gsub(text, "([^\n \t][ \t]*)\n([ \t]*[^\n \t])", "%1 %2")
        text = mw.ustring.gsub(text, "[ \t]+", " ") -- Remove extra tabs and spaces.
    end
    return text
end

function p.delink(frame)
    local args
    if frame == mw.getCurrentFrame() then
        -- We're being called via #invoke. If the invoking template passed any args, use
        -- them. Otherwise, use the args that were passed into the template.
        for k, v in pairs(frame.args) do
            args = frame.args;
            break;
        end
        if args == nil then args = frame:getParent().args; end
    else
        -- We're being called from another module or from the debug console, so assume
        -- the args are passed in directly.
        args = frame;
    end
    
    return _delink(args);
end

return p