Module:Diff
Appearance
Documentation for this module may be created at Module:Diff/doc
-----------------------------------------------------------------------------
-- Diff
-- By IP 2023, 2024
-- 2023
-- [[User:Navajcmer]] Old user
-- bye
-- License: MIT/X, see http://sputnik.freewisdom.org/en/License
----------------------------------------------------------------------------
local SKIP_SEPARATOR = true -- a constant
-- token statuses
local IN = "in"
local OUT = "out"
local SAME = "same"
local HI = "hi"
-----------------------------------------------------------------------------
-- Split a string into tokens. (Adapted from Gavin Kistner's split on
-- http://lua-users.org/wiki/SplitJoin.
--
-- @param text A string to be split.
-- @param separator [optional] the separator pattern (defaults to any
-- white space - %s+).
-- @param skip_separator [optional] don't include the sepator in the results.
-- @return A list of tokens.
-----------------------------------------------------------------------------
local function split(text, separator, skip_separator)
separator = separator or "%s+"
local parts = {}
local start = 1
local split_start, split_end = mw.ustring.find(text, separator, start)
while split_start do
table.insert(parts, mw.ustring.sub(text, start, split_start-1))
if not skip_separator then
table.insert(parts, mw.ustring.sub(text, split_start, split_end))
end
start = split_end + 1
split_start, split_end = mw.ustring.find(text, separator, start)
end
if mw.ustring.sub(text, start) ~= "" then
table.insert(parts, mw.ustring.sub(text, start))
end
return parts
end
-----------------------------------------------------------------------------
-- Derives the longest common subsequence of two strings. This is a faster
-- implementation than one provided by stdlib. Submitted by Hisham Muhammad.
-- The algorithm was taken from:
-- http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Longest_common_subsequence
--
-- @param t1 the first string.
-- @param t2 the second string.
-- @return the least common subsequence as a matrix.
-----------------------------------------------------------------------------
local function quick_LCS(t1, t2)
local m = #t1
local n = #t2
-- Build matrix on demand
local C = {}
local setmetatable = setmetatable
local mt_tbl = {
__index = function(t, k)
t[k] = 0
return 0
end
}
local mt_C = {
__index = function(t, k)
local tbl = {}
setmetatable(tbl, mt_tbl)
t[k] = tbl
return tbl
end
}
setmetatable(C, mt_C)
local max = math.max
for i = 1, m+1 do
local ci1 = C[i+1]
local ci = C[i]
for j = 1, n+1 do
if t1[i-1] == t2[j-1] then
ci1[j+1] = ci[j] + 1
else
ci1[j+1] = max(ci1[j], ci[j+1])
end
end
end
end
return C
-----------------------------------------------------------------------------
-- Formats an inline diff as HTML, with <ins> and <del> tags.
--
-- @param tokens a table of {token, status} pairs.
-- @return an HTML string.
-----------------------------------------------------------------------------
-----------------------------------------------------------------------------
-- Returns a diff of two strings as a list of pairs, where the first value
-- represents a token and the second the token's status ("same", "in", "out").
--
-- @param old The "old" text string
-- @param new The "new" text string
-- @param separator [optional] the separator pattern (defaults ot any
-- white space).
-- @return A list of annotated tokens.
-----------------------------------------------------------------------------
-- First, compare the beginnings and ends of strings to remove the common
-- prefix and suffix. Chances are, there is only a small number of tokens
-- in the middle that differ, in which case we can save ourselves a lot
-- in terms of LCS computation.
-- Put the suffix as the first token (we are storing the diff in the
-- reverse order)
-- Define a function that will scan the LCS matrix backwards and build the
-- diff output recursively.
-- Then call it.
-- Put the prefix in at the end
-- Reverse the diff.
-----------------------------------------------------------------------------
-- Wiki diff style, currently just for a line
-----------------------------------------------------------------------------