Module:Diff

Documentation for this module may be created at Module:Diff/doc
-----------------------------------------------------------------------------
-- Diff 
-- By IP 2023, 2024
-- 2023
-- [[User:Navajcmer]] Old user
-- bye
-- License: MIT/X, see http://sputnik.freewisdom.org/en/License
----------------------------------------------------------------------------

local SKIP_SEPARATOR = true  -- a constant

-- token statuses
local IN   = "in"
local OUT  = "out"
local SAME = "same"
local HI   = "hi"

-----------------------------------------------------------------------------
-- Split a string into tokens.  (Adapted from Gavin Kistner's split on
-- http://lua-users.org/wiki/SplitJoin.
--
-- @param text           A string to be split.
-- @param separator      [optional] the separator pattern (defaults to any
--                       white space - %s+).
-- @param skip_separator [optional] don't include the sepator in the results.     
-- @return               A list of tokens.
-----------------------------------------------------------------------------
local function split(text, separator, skip_separator)
   separator = separator or "%s+"
   local parts = {}  
   local start = 1
   local split_start, split_end = mw.ustring.find(text, separator, start)
   while split_start do
      table.insert(parts, mw.ustring.sub(text, start, split_start-1))
      if not skip_separator then
         table.insert(parts, mw.ustring.sub(text, split_start, split_end))
      end
      start = split_end + 1
      split_start, split_end = mw.ustring.find(text, separator, start)
   end
   if mw.ustring.sub(text, start) ~= "" then
      table.insert(parts, mw.ustring.sub(text, start))
   end
   return parts
end


-----------------------------------------------------------------------------
-- Derives the longest common subsequence of two strings.  This is a faster
-- implementation than one provided by stdlib.  Submitted by Hisham Muhammad. 
-- The algorithm was taken from:
-- http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Longest_common_subsequence
--
-- @param t1             the first string.
-- @param t2             the second string.
-- @return               the least common subsequence as a matrix.
-----------------------------------------------------------------------------
local function quick_LCS(t1, t2)
   local m = #t1
   local n = #t2

   -- Build matrix on demand
   local C = {}
   local setmetatable = setmetatable
   local mt_tbl = {
      __index = function(t, k)
         t[k] = 0
         return 0
      end
   }
   local mt_C = {
      __index = function(t, k)
         local tbl = {}
         setmetatable(tbl, mt_tbl)
         t[k] = tbl
         return tbl
      end
   }
   setmetatable(C, mt_C)
   local max = math.max
   for i = 1, m+1 do
      local ci1 = C[i+1]
      local ci = C[i]
      for j = 1, n+1 do
         if t1[i-1] == t2[j-1] then
            ci1[j+1] = ci[j] + 1
         else
            ci1[j+1] = max(ci1[j], ci[j+1])
            end
         end
      end
   end
   return C



-----------------------------------------------------------------------------
-- Formats an inline diff as HTML, with <ins> and <del> tags.
-- 
-- @param tokens         a table of {token, status} pairs.
-- @return               an HTML string.
-----------------------------------------------------------------------------

   
 
   


-----------------------------------------------------------------------------
-- Returns a diff of two strings as a list of pairs, where the first value
-- represents a token and the second the token's status ("same", "in", "out").
--
-- @param old             The "old" text string
-- @param new             The "new" text string
-- @param separator      [optional] the separator pattern (defaults ot any
--                       white space).
-- @return               A list of annotated tokens.
-----------------------------------------------------------------------------



   -- First, compare the beginnings and ends of strings to remove the common
   -- prefix and suffix.  Chances are, there is only a small number of tokens
   -- in the middle that differ, in which case  we can save ourselves a lot
   -- in terms of LCS computation.
  
   -- Put the suffix as the first token (we are storing the diff in the
   -- reverse order)

 
   -- Define a function that will scan the LCS matrix backwards and build the
   -- diff output recursively.
 
   -- Then call it.


   -- Put the prefix in at the end

   -- Reverse the diff.



-----------------------------------------------------------------------------
-- Wiki diff style, currently just for a line
-----------------------------------------------------------------------------