Module:IPAc2-mh: Difference between revisions
Appearance
Content deleted Content added
Updated some comments near the top. This update mirrors that at Wiktionary. |
Uncaching the library functions, per User:Erutuon's performance analysis. |
||
Line 11: | Line 11: | ||
local export = {} |
local export = {} |
||
-- Cached functions, to help improve performance at Wiktionary. |
|||
local math_max = math.max |
|||
local math_min = math.min |
|||
local mw_text_gsplit = mw.text.gsplit |
|||
local mw_text_split = mw.text.split |
|||
local mw_text_trim = mw.text.trim |
|||
local mw_ustring_gsub = mw.ustring.gsub |
|||
local mw_ustring_lower = mw.ustring.lower |
|||
local string_byte = string.byte |
|||
local string_find = string.find |
|||
local string_gmatch = string.gmatch |
|||
local string_gsub = string.gsub |
|||
local string_lower = string.lower |
|||
local string_sub = string.sub |
|||
local table_concat = table.concat |
|||
local table_remove = table.remove |
|||
local ASYLL = "̯" |
local ASYLL = "̯" |
||
Line 74: | Line 57: | ||
return "" |
return "" |
||
else |
else |
||
local atLeft = |
local atLeft = string.byte(text) == 32 |
||
local atRight = |
local atRight = string.byte(text, -1) == 32 |
||
if atLeft then |
if atLeft then |
||
if atRight then |
if atRight then |
||
return |
return string.sub(text, 2, -2) |
||
else |
else |
||
return |
return string.sub(text, 2) |
||
end |
end |
||
elseif atRight then |
elseif atRight then |
||
return |
return string.sub(text, 1, -2) |
||
else |
else |
||
return text |
return text |
||
Line 92: | Line 75: | ||
local function parseBoolean(text) |
local function parseBoolean(text) |
||
if type(text) == "string" then |
if type(text) == "string" then |
||
text = |
text = string.gsub(text, "[^0-9A-Za-z]", "") |
||
if text ~= "" and text ~= "0" and |
if text ~= "" and text ~= "0" and string.lower(text) ~= "false" then |
||
return true |
return true |
||
end |
end |
||
Line 103: | Line 86: | ||
chars = chars or {} |
chars = chars or {} |
||
local index = 1 |
local index = 1 |
||
for ch in |
for ch in string.gmatch(text, pattern or UTF8_CHAR) do |
||
chars[index] = ch |
chars[index] = ch |
||
index = index + 1 |
index = index + 1 |
||
Line 109: | Line 92: | ||
if index <= #chars then |
if index <= #chars then |
||
if shorten then |
if shorten then |
||
table.remove(chars, index) |
|||
else |
else |
||
repeat |
repeat |
||
Line 121: | Line 104: | ||
local function string_gsub2(text, pattern, subst) |
local function string_gsub2(text, pattern, subst) |
||
return |
return string.gsub(string.gsub(text, pattern, subst), pattern, subst) |
||
end |
end |
||
Line 147: | Line 130: | ||
local function ZTBL(text, sep) |
local function ZTBL(text, sep) |
||
local tbl = {} |
local tbl = {} |
||
for key in |
for key in mw.text.gsplit(text, sep or " ") do |
||
tbl[key] = true |
tbl[key] = true |
||
end |
end |
||
Line 162: | Line 145: | ||
local outSeq = {} |
local outSeq = {} |
||
code = |
code = mw.ustring.gsub(code, "%s+", " ") |
||
code = |
code = string.lower(code) |
||
for text in |
for text in mw.text.gsplit(code, " *,[ ,]*") do |
||
text = fastTrim(text) |
text = fastTrim(text) |
||
if text ~= "" then |
if text ~= "" then |
||
local temp = |
local temp = string.gsub(text, "[abdeghijklmnprtwy_&'%- ]", "") |
||
if temp ~= "" then |
if temp ~= "" then |
||
error("'"..code.."' contains unsupported characters: "..temp) |
error("'"..code.."' contains unsupported characters: "..temp) |
||
Line 178: | Line 161: | ||
["y"] = "0", ["h"] = "0h", ["w"] = "0w" |
["y"] = "0", ["h"] = "0h", ["w"] = "0w" |
||
} |
} |
||
text = |
text = string.gsub(text, "_*([hwy])_+", PARSE_PSEUDO_GLIDE) |
||
text = |
text = string.gsub(text, "_+([hwy])", PARSE_PSEUDO_GLIDE) |
||
if |
if string.find(text, "_") then |
||
error("contains misplaced underscores: "..code) |
error("contains misplaced underscores: "..code) |
||
end |
end |
||
-- a plain {i} protected from dialect-specific reflexes |
-- a plain {i} protected from dialect-specific reflexes |
||
text = |
text = string.gsub(text, "'i", "I") |
||
-- "yi'y" and "'yiy" sequences |
-- "yi'y" and "'yiy" sequences |
||
text = |
text = string.gsub(text, "('?)yi('*)y", function(aposA, aposB) |
||
if aposA ~= "" then |
if aposA ~= "" then |
||
-- "dwelling upon" i |
-- "dwelling upon" i |
||
Line 222: | Line 205: | ||
["0w"] = "_W" |
["0w"] = "_W" |
||
} |
} |
||
text = |
text = string.gsub(text, "[klmnr0]g?[hw]?", PARSE_C_CH_CW) |
||
if |
if string.find(text, "g") then |
||
error("contains g that is not part of ng: "..code) |
error("contains g that is not part of ng: "..code) |
||
end |
end |
||
Line 245: | Line 228: | ||
["'"] = "" |
["'"] = "" |
||
} |
} |
||
text = |
text = string.gsub(text, ".", PARSE_REMAINING) |
||
-- Enforce CVC, CVCVC, CVCCVC, etc. phonotactics, |
-- Enforce CVC, CVCVC, CVCCVC, etc. phonotactics, |
||
-- but allow VC, CV at affix boundaries |
-- but allow VC, CV at affix boundaries |
||
-- where a vowel may link to another morpheme's consonant. |
-- where a vowel may link to another morpheme's consonant. |
||
temp = |
temp = string.gsub(text, "[%s%-]+", "") |
||
if |
if string.find(temp, "_..[jGw]") or |
||
string.find(temp, ".[jGw]_.") |
|||
then |
then |
||
error("pseudo-glides may not neighbor a consonant") |
error("pseudo-glides may not neighbor a consonant") |
||
end |
end |
||
if |
if string.find(temp, VI.."_."..VI) then |
||
error("pseudo-glides may only be at the beginning or end"..code) |
error("pseudo-glides may only be at the beginning or end"..code) |
||
end |
end |
||
if |
if string.find(temp, VI..VI) then |
||
error("vowels must be separated by a consonant: "..code) |
error("vowels must be separated by a consonant: "..code) |
||
end |
end |
||
if |
if string.find(temp, ".[jGw].[jGw].[jGw]") then |
||
error("each consonant cluster is limited to two: "..code) |
error("each consonant cluster is limited to two: "..code) |
||
end |
end |
||
if |
if string.find(temp, ".[jGw].[jGw]$") then |
||
error("may not end with a consonant cluster: "..code) |
error("may not end with a consonant cluster: "..code) |
||
end |
end |
||
string.gsub(temp, "^(.[jGw])(.[jGw])", function(consonX, consonY) |
|||
if consonX ~= consonY then |
if consonX ~= consonY then |
||
error("may only begin with single or geminated consonant: " |
error("may only begin with single or geminated consonant: " |
||
Line 350: | Line 333: | ||
local version = args and args.version |
local version = args and args.version |
||
local map = BENDER_MAPS[ |
local map = BENDER_MAPS[ |
||
type(version) == "string" and |
type(version) == "string" and string.lower(version) or "" |
||
] or BENDER_DEFAULT |
] or BENDER_DEFAULT |
||
local outSeq = {} |
local outSeq = {} |
||
for _, text in pairs(inSeq) do |
for _, text in pairs(inSeq) do |
||
text = |
text = string.gsub(text, ".[jGw]?", map) |
||
addUnique(outSeq, text) |
addUnique(outSeq, text) |
||
end |
end |
||
Line 372: | Line 355: | ||
local function toMOD(text) |
local function toMOD(text) |
||
text = |
text = mw.ustring.gsub(text, ".["..CEDILLA..MACRON.."]?", TO_MOD) |
||
return text |
return text |
||
end |
end |
||
Line 412: | Line 395: | ||
local outSeq = {} |
local outSeq = {} |
||
for _, text in pairs(inSeq) do |
for _, text in pairs(inSeq) do |
||
text = |
text = string.gsub(text, ".[jGw]?", PHONEMIC_MAP) |
||
addUnique(outSeq, text) |
addUnique(outSeq, text) |
||
end |
end |
||
Line 452: | Line 435: | ||
local function maxF1(a, b) |
local function maxF1(a, b) |
||
if b then |
if b then |
||
return VOWEL[ |
return VOWEL[math.max(2, F1[a], F1[b])][F2_FRONT] |
||
else |
else |
||
return VOWEL[ |
return VOWEL[math.max(2, F1[a])][F2_FRONT] |
||
end |
end |
||
end |
end |
||
Line 461: | Line 444: | ||
-- Morphemes can begin with geminated consonants, but spoken words cannot. |
-- Morphemes can begin with geminated consonants, but spoken words cannot. |
||
text = |
text = string.gsub(text, "^(.[jGw])( *)%1( *)("..VI..")", |
||
function(conson, _, __, vowel) |
function(conson, _, __, vowel) |
||
if conson == "hG" then |
if conson == "hG" then |
||
Line 482: | Line 465: | ||
-- To block this in the template argument, use "'i" instead of "i". |
-- To block this in the template argument, use "'i" instead of "i". |
||
text = " "..text |
text = " "..text |
||
text = |
text = string.gsub(text, |
||
"([ jGw])( *)(h[jw])( *)i( *)(h[jw])( *)("..VI..")", |
"([ jGw])( *)(h[jw])( *)i( *)(h[jw])( *)("..VI..")", |
||
function(nonVowel, _, consonX, __, ___, consonY, ____, vowel) |
function(nonVowel, _, consonX, __, ___, consonY, ____, vowel) |
||
Line 506: | Line 489: | ||
end |
end |
||
) |
) |
||
text = |
text = string.sub(text, 2) |
||
-- Restore protected {i}, we won't be checking for it anymore. |
-- Restore protected {i}, we won't be checking for it anymore. |
||
text = |
text = string.gsub(text, "I", "i") |
||
return text |
return text |
||
Line 584: | Line 567: | ||
["l"] = { ["j"] = "L" } |
["l"] = { ["j"] = "L" } |
||
} |
} |
||
for primary in |
for primary in mw.text.gsplit("ptkmnNrl", "") do |
||
local map2 = map[primary] |
local map2 = map[primary] |
||
if not map2 then |
if not map2 then |
||
Line 687: | Line 670: | ||
map["Hj"] = map["Hj"] or map["i"]..map["^"] |
map["Hj"] = map["Hj"] or map["i"]..map["^"] |
||
local key |
local key |
||
for primary in |
for primary in mw.text.gsplit("pbBtdTDSZszkgmnJNrRlL_", "") do |
||
for secondary in |
for secondary in mw.text.gsplit("jGw", "") do |
||
key = primary..secondary |
key = primary..secondary |
||
map[key] = map[key] or (map[primary]..map[secondary]) |
map[key] = map[key] or (map[primary]..map[secondary]) |
||
end |
end |
||
end |
end |
||
for vowel in |
for vowel in mw.text.gsplit(V_, "") do |
||
key = vowel.."@" |
key = vowel.."@" |
||
map[key] = map[key] or (map[vowel]..map["@"]) |
map[key] = map[key] or (map[vowel]..map["@"]) |
||
Line 712: | Line 695: | ||
-- and no pseudo-glide, display phrase up to five times |
-- and no pseudo-glide, display phrase up to five times |
||
-- with each of the different pseudo-glides and possible vowel reflexes. |
-- with each of the different pseudo-glides and possible vowel reflexes. |
||
if IS_VOWEL[ |
if IS_VOWEL[string.sub(text, 1, 1)] then |
||
text = "_j"..code |
text = "_j"..code |
||
toPhoneticRemainder(text, config, false, rightFlag) |
toPhoneticRemainder(text, config, false, rightFlag) |
||
Line 730: | Line 713: | ||
return |
return |
||
end |
end |
||
if IS_VOWEL[ |
if IS_VOWEL[string.sub(text, -1)] then |
||
text = code.."_j" |
text = code.."_j" |
||
toPhoneticRemainder(text, config, leftFlag, false) |
toPhoneticRemainder(text, config, leftFlag, false) |
||
Line 782: | Line 765: | ||
end |
end |
||
toPhoneticRemainder(code, config) |
toPhoneticRemainder(code, config) |
||
addUnique(outSeq, |
addUnique(outSeq, table.concat(subSeq, " ~ ")) |
||
config.outSeq = outSeq |
config.outSeq = outSeq |
||
config.initialJ = initialJ |
config.initialJ = initialJ |
||
Line 793: | Line 776: | ||
if not diphthongs then |
if not diphthongs then |
||
-- {ww} always causes the second glide to surface. |
-- {ww} always causes the second glide to surface. |
||
text = |
text = string.gsub(text, "([aEei])( *)hw( *)hw", "%1%2hw%1@%3Hw") |
||
end |
end |
||
text = |
text = string.gsub(text, "([aEei])( *)hG( *.[jGw])", "%1%2hG%1@%3") |
||
text = |
text = string.gsub(text, "(.[jGw])( *)hG( *)([aEei])", "%1%4@%2hG%3%4") |
||
text = |
text = string.gsub(text, "([aEei])( *)h(.)( *.[jGw])", "%1%2h%3%1@%4") |
||
text = |
text = string.gsub(text, "(.[jGw])( *)h(. *)([aEei])", "%1%4@%2h%3%4") |
||
text = |
text = string.gsub(text, "(.[jGw])( *[yY].)", "%1i@%2") |
||
-- Preserve these exceptionally stable clusters. |
-- Preserve these exceptionally stable clusters. |
||
text = |
text = string.gsub(text, "l([jG] *)tG", "l%1|tG") |
||
-- Unstable consonant clusters trigger epenthesis. |
-- Unstable consonant clusters trigger epenthesis. |
||
-- Liquids before coronal obstruents. |
-- Liquids before coronal obstruents. |
||
text = |
text = string.gsub(text, "([rl].)( *)t", "%1v%2t") |
||
-- Nasals and liquids after coronal obstruents. |
-- Nasals and liquids after coronal obstruents. |
||
text = |
text = string.gsub(text, "t(.)( *[nrl])", "t%1v%2") |
||
-- Heterorganic clusters. |
-- Heterorganic clusters. |
||
-- Labial consonants neighboring coronal or dorsal consonants. |
-- Labial consonants neighboring coronal or dorsal consonants. |
||
text = |
text = string.gsub(text, "([pm].)( *[tnrlkN])", "%1v%2") |
||
-- Coronal consonants neighboring labial or dorsal consonants. |
-- Coronal consonants neighboring labial or dorsal consonants. |
||
text = |
text = string.gsub(text, "([tnrl].)( *[pmkN])", "%1v%2") |
||
-- Dorsal consonants neighboring labial or coronal consonants. |
-- Dorsal consonants neighboring labial or coronal consonants. |
||
text = |
text = string.gsub(text, "([kN].)( *[pmtnrl])", "%1v%2") |
||
-- Organic speech involves certain consonant cluster assimilations. |
-- Organic speech involves certain consonant cluster assimilations. |
||
Line 827: | Line 810: | ||
-- Forward assimilation of rounded consonants. |
-- Forward assimilation of rounded consonants. |
||
-- There is no rounded coronal obstruent. |
-- There is no rounded coronal obstruent. |
||
text = |
text = string.gsub(text, "(w *[^t])[jG]", "%1w") |
||
-- Backward assimilation of remaining secondary articulations. |
-- Backward assimilation of remaining secondary articulations. |
||
text = |
text = string.gsub(text, "[jGw]( *.)([jGw])", "%2%1%2") |
||
-- Backward nasal assimilation of primary articulations. |
-- Backward nasal assimilation of primary articulations. |
||
text = |
text = string.gsub(text, "[pkrl](. *)([mnN])", "%2%1%2") |
||
-- No longer need to protect exceptionally stable consonant clusters. |
-- No longer need to protect exceptionally stable consonant clusters. |
||
text = |
text = string.gsub(text, "|", "") |
||
-- Give a vowel height to all epenthetic vowels that still lack one. |
-- Give a vowel height to all epenthetic vowels that still lack one. |
||
Line 846: | Line 829: | ||
-- Tag all vowels for next set of operations. |
-- Tag all vowels for next set of operations. |
||
text = |
text = string.gsub(text, "([aEei])", "/%1") |
||
-- There is no variation in the surface realizations of vowels |
-- There is no variation in the surface realizations of vowels |
||
Line 885: | Line 868: | ||
local f2 |
local f2 |
||
if flag then |
if flag then |
||
f2 = |
f2 = math.max(f2L, f2R) |
||
else |
else |
||
f2 = |
f2 = math.min(f2L, f2R) |
||
end |
end |
||
return ( |
return ( |
||
Line 895: | Line 878: | ||
) |
) |
||
end |
end |
||
text = |
text = string.gsub(text, "(_)([jGw])( *)/("..V..")(@?)( *)(.)([jGw])", |
||
function(a, b, c, d, e, f, g, h) |
function(a, b, c, d, e, f, g, h) |
||
return subst(a, b, c, d, e, f, g, h, leftFlag) |
return subst(a, b, c, d, e, f, g, h, leftFlag) |
||
end |
end |
||
) |
) |
||
text = |
text = string.gsub(text, "(.)([jGw])( *)/("..V..")(@?)( *)(_)([jGw])", |
||
function(a, b, c, d, e, f, g, h) |
function(a, b, c, d, e, f, g, h) |
||
return subst(a, b, c, d, e, f, g, h, rightFlag) |
return subst(a, b, c, d, e, f, g, h, rightFlag) |
||
Line 920: | Line 903: | ||
-- Exceptionally for the single word "rej". |
-- Exceptionally for the single word "rej". |
||
text = |
text = string.gsub(text, "^(rG *)([V7])( *tj)$", |
||
function(prefix, vowel, suffix) |
function(prefix, vowel, suffix) |
||
return prefix..FRONT_VOWEL[vowel]..suffix |
return prefix..FRONT_VOWEL[vowel]..suffix |
||
Line 928: | Line 911: | ||
-- Vowels always claim the secondary articulation |
-- Vowels always claim the secondary articulation |
||
-- of a neighboring back unrounded glide. |
-- of a neighboring back unrounded glide. |
||
text = |
text = string.gsub(text, "(hG *)/([aEei])", function(prefix, vowel) |
||
return prefix..BACK_VOWEL[vowel] |
return prefix..BACK_VOWEL[vowel] |
||
end) |
end) |
||
text = |
text = string.gsub(text, "/([aEei])(@? *hG)", function(vowel, suffix) |
||
return BACK_VOWEL[vowel]..suffix |
return BACK_VOWEL[vowel]..suffix |
||
end) |
end) |
||
Line 937: | Line 920: | ||
-- Unless already claimed, epenthetic vowels after a glide |
-- Unless already claimed, epenthetic vowels after a glide |
||
-- always claim the secondary articulation to the left. |
-- always claim the secondary articulation to the left. |
||
text = |
text = string.gsub(text, "([hH])(.)( *)/([aEei])@", |
||
function(primaryL, secondaryL, _, vowel) |
function(primaryL, secondaryL, _, vowel) |
||
return ( |
return ( |
||
Line 948: | Line 931: | ||
-- Unless already claimed, vowels before a glide |
-- Unless already claimed, vowels before a glide |
||
-- always claim the secondary articulation to the right. |
-- always claim the secondary articulation to the right. |
||
text = |
text = string.gsub(text, "/([aEei])(@?)( *[hHyY])(.)", |
||
function(vowel, epenth, primaryR, secondaryR) |
function(vowel, epenth, primaryR, secondaryR) |
||
return ( |
return ( |
||
Line 959: | Line 942: | ||
-- For now, unless already claimed, vowels before a rounded consonant |
-- For now, unless already claimed, vowels before a rounded consonant |
||
-- claim the secondary articulation to the right. |
-- claim the secondary articulation to the right. |
||
text = |
text = string.gsub(text, "/([aEei])(@? *.w)", function(vowel, suffix) |
||
return ROUND_VOWEL[vowel]..suffix |
return ROUND_VOWEL[vowel]..suffix |
||
end) |
end) |
||
Line 965: | Line 948: | ||
-- For now, unless already claimed, remaining vowels |
-- For now, unless already claimed, remaining vowels |
||
-- claim the secondary articulation to the left. |
-- claim the secondary articulation to the left. |
||
text = |
text = string.gsub(text, "([jGw])( *)/([aEei])", |
||
function(secondaryL, _, vowel) |
function(secondaryL, _, vowel) |
||
return secondaryL.._..VOWEL[F1[vowel]][F2[secondaryL]] |
return secondaryL.._..VOWEL[F1[vowel]][F2[secondaryL]] |
||
Line 976: | Line 959: | ||
return prefix..VOWEL[F1[vowel]][F2[secondaryR]]..infix..secondaryR |
return prefix..VOWEL[F1[vowel]][F2[secondaryR]]..infix..secondaryR |
||
end |
end |
||
text = |
text = string.gsub(text, "([yY]. *)([aEei])(@? *.)([jGw])", subst) |
||
-- Change certain vowels in a special environment from round to front. |
-- Change certain vowels in a special environment from round to front. |
||
Line 984: | Line 967: | ||
end |
end |
||
) |
) |
||
text = |
text = string.gsub(text, "(hj *)([Oou])( *)(.w)( *)("..V..")", |
||
function(prefix, vowelL, _, conson, __, vowelR) |
function(prefix, vowelL, _, conson, __, vowelR) |
||
if conson ~= "hw" or F1[vowelL] ~= F1[vowelR] then |
if conson ~= "hw" or F1[vowelL] ~= F1[vowelR] then |
||
Line 991: | Line 974: | ||
end |
end |
||
) |
) |
||
text = |
text = string.gsub(text, "(hj *)([Oou])( *.w *.w)", |
||
function(prefix, vowel, suffix) |
function(prefix, vowel, suffix) |
||
return prefix..FRONT_VOWEL[vowel]..suffix |
return prefix..FRONT_VOWEL[vowel]..suffix |
||
end |
end |
||
) |
) |
||
text = |
text = string.gsub(text, "(a@? *hj *)Q( *.w *"..V..")", "%1a%2") |
||
text = |
text = string.gsub(text, "(a@? *hj *)Q( *.w *.w)", "%1a%2") |
||
-- Tag certain glide-vowel-non-glide sequences for special reflexes. |
-- Tag certain glide-vowel-non-glide sequences for special reflexes. |
||
text = |
text = string.gsub(text, "(H[jw] *)("..V.." *[ptkmnNrl])", "%1/%2") |
||
text = |
text = string.gsub(text, "^ *(h[jw] *)("..V.." *[ptkmnNrl])", "%1/%2") |
||
text = |
text = string.gsub(text, "(@ *h[jw] *)("..V.." *[ptkmnNrl])", "%1/%2") |
||
text = |
text = string.gsub(text, |
||
"([EeiAV7MOou]@? *h[jw] *)([aAQ] *[ptkmnNrl])", "%1/%2") |
"([EeiAV7MOou]@? *h[jw] *)([aAQ] *[ptkmnNrl])", "%1/%2") |
||
text = |
text = string.gsub(text, |
||
"(hj *[aEei]@? *hw *)("..V.." *[ptkmnNrl])", "%1/%2") |
"(hj *[aEei]@? *hw *)("..V.." *[ptkmnNrl])", "%1/%2") |
||
-- Untag certain sequences, exempting them from special reflexes. |
-- Untag certain sequences, exempting them from special reflexes. |
||
text = |
text = string.gsub(text, "(hj *)/([aEei] *[knNrl]w)", "%1%2") |
||
-- Special reflexes. |
-- Special reflexes. |
||
text = |
text = string.gsub(text, "([jw])( *)/("..V..")( *)(.)([jGw])", |
||
function(secondaryL, _, vowel, __, primaryR, secondaryR) |
function(secondaryL, _, vowel, __, primaryR, secondaryR) |
||
return secondaryL.._.. |
return secondaryL.._.. |
||
Line 1,024: | Line 1,007: | ||
-- Temporarily cancel epenthetic {i} neighboring {yi'y}. |
-- Temporarily cancel epenthetic {i} neighboring {yi'y}. |
||
text = |
text = string.gsub(text, "i@( *yj)", "%1") |
||
-- {yi'y} neighboring {i} may now be demoted to {y}. |
-- {yi'y} neighboring {i} may now be demoted to {y}. |
||
text = |
text = string.gsub(text, "([iMu]@? *)yj", "%1hj") |
||
text = |
text = string.gsub(text, "yj( *[iMu])", "hj%1") |
||
-- {'yiy} may now be demoted everywhere. |
-- {'yiy} may now be demoted everywhere. |
||
text = |
text = string.gsub(text, "(i@ *)Yj", "%1hjihj") |
||
text = |
text = string.gsub(text, "Yj", "hjihji@hj") |
||
-- For the purposes of this template, |
-- For the purposes of this template, |
||
-- surface all glides pronounced in isolation. |
-- surface all glides pronounced in isolation. |
||
text = |
text = string.gsub(text, "^ *h(.) *$", "H%1") |
||
if not diphthongs then |
if not diphthongs then |
||
-- Opportunistically front this vowel. |
-- Opportunistically front this vowel. |
||
text = |
text = string.gsub(text, "(hj *)A( *[kN]G *[kN]?G? *"..V..")", "%1a%2") |
||
-- Surface certain glides. |
-- Surface certain glides. |
||
text = |
text = string.gsub(text, "^ *h(w *[Oou])", "H%1") |
||
text = |
text = string.gsub(text, "h(w *[aEeiAV7M])", "H%1") |
||
text = |
text = string.gsub(text, "^ *h(j *[AV7MQOou])", "H%1") |
||
text = |
text = string.gsub(text, "([ptkmnNrl]..@ *)h(w *[Oou])", "%1H%2") |
||
text = |
text = string.gsub(text, |
||
"([ptkmnNrl].).@( *)h(j *[AV7MQOou])", "%1%2H%3") |
"([ptkmnNrl].).@( *)h(j *[AV7MQOou])", "%1%2H%3") |
||
text = |
text = string.gsub(text, "([AV7MQOou]@? *)h(j *[AV7MQOou])", "%1H%2") |
||
text = |
text = string.gsub(text, "([aEeiAV7M])(@? *)hw( *)([QOou])", |
||
function(vowelL, infix, _, vowelR) |
function(vowelL, infix, _, vowelR) |
||
if F1[vowelL] > F1[vowelR] then |
if F1[vowelL] > F1[vowelR] then |
||
Line 1,056: | Line 1,039: | ||
end |
end |
||
) |
) |
||
text = |
text = string.gsub(text, "([AV7MQOou])(@? *)hj( *)([aEei])", |
||
function(vowelL, infix, _, vowelR) |
function(vowelL, infix, _, vowelR) |
||
if F1[vowelL] > F1[vowelR] then |
if F1[vowelL] > F1[vowelR] then |
||
Line 1,063: | Line 1,046: | ||
end |
end |
||
) |
) |
||
text = |
text = string.gsub(text, "([aEei])(@? *)hj( *)([AV7MQOou])", |
||
function(vowelL, infix, _, vowelR) |
function(vowelL, infix, _, vowelR) |
||
if F1[vowelL] < F1[vowelR] then |
if F1[vowelL] < F1[vowelR] then |
||
Line 1,070: | Line 1,053: | ||
end |
end |
||
) |
) |
||
text = |
text = string.gsub(text, "("..V..")( *)h([jw]) *$", |
||
function(vowel, _, secondary) |
function(vowel, _, secondary) |
||
if F2[vowel] ~= F2[secondary] then |
if F2[vowel] ~= F2[secondary] then |
||
Line 1,080: | Line 1,063: | ||
-- Protect word-final epenthetic vowels after non-glides |
-- Protect word-final epenthetic vowels after non-glides |
||
-- from the next operation. |
-- from the next operation. |
||
text = |
text = string.gsub(text, "([ptkmnNrl][jGw]"..V..")(@ )", "%1/%2") |
||
-- De-epenthesize vowels if they still neighbor unsurfaced glides. |
-- De-epenthesize vowels if they still neighbor unsurfaced glides. |
||
text = |
text = string.gsub(text, "("..V..")@( *h.)", "%1%2") |
||
text = |
text = string.gsub(text, "(h. *"..V..")@", "%1") |
||
-- Adjust F1 of currently remaining epenthetic vowels. |
-- Adjust F1 of currently remaining epenthetic vowels. |
||
Line 1,096: | Line 1,079: | ||
) |
) |
||
text = |
text = string.gsub(text, "/", "") |
||
end |
end |
||
-- Delete all remaining unsurfaced glides. |
-- Delete all remaining unsurfaced glides. |
||
text = |
text = string.gsub(text, "h.", "") |
||
-- Surface realization for {yi'y}. |
-- Surface realization for {yi'y}. |
||
text = |
text = string.gsub(text, "yj", "i^") |
||
if not diphthongs then |
if not diphthongs then |
||
Line 1,131: | Line 1,114: | ||
-- Turn this surfaced glide into an epenthetic vowel. |
-- Turn this surfaced glide into an epenthetic vowel. |
||
text = |
text = string.gsub(text, "([ptkmnNrl]. *[aEei])%^( *"..V..")", "%1@%2") |
||
end |
end |
||
if MERGED_VOWELS then |
if MERGED_VOWELS then |
||
text = |
text = string.gsub(text, "[EO]", function(vowel) |
||
return VOWEL[F1[vowel] + 1][F2[vowel]] |
return VOWEL[F1[vowel] + 1][F2[vowel]] |
||
end) |
end) |
||
Line 1,160: | Line 1,143: | ||
index = index2 |
index = index2 |
||
until index == 1 |
until index == 1 |
||
text = |
text = table.concat(chars, "") |
||
end |
end |
||
-- Tweak remaining consonants, using offsets as a guide. |
-- Tweak remaining consonants, using offsets as a guide. |
||
text = |
text = string.gsub(text, "()(.)([jGw])( *)([ptkmnNrl]?)([jGw]?)()", |
||
function( |
function( |
||
offsetL, primaryL, secondaryL, _, primaryR, secondaryR, offsetR |
offsetL, primaryL, secondaryL, _, primaryR, secondaryR, offsetR |
||
Line 1,263: | Line 1,246: | ||
-- Elegantly connect long and epenthetic vowels across word gaps. |
-- Elegantly connect long and epenthetic vowels across word gaps. |
||
text = |
text = string.gsub(text, "(["..V_..":]): +", "%1 : ") |
||
text = |
text = string.gsub(text, "("..V..") +%1([^%^])", "%1 :%2") |
||
text = |
text = string.gsub(text, "("..V..") +%1$", "%1 :") |
||
text = |
text = string.gsub(text, "("..V..")@ +%1", " %1 :") |
||
text = |
text = string.gsub(text, "("..V.."@) +", " %1 ") |
||
if W_OFF_GLIDES then |
if W_OFF_GLIDES then |
||
Line 1,277: | Line 1,260: | ||
end |
end |
||
if false and PHONETIC_DETAILS then |
if false and PHONETIC_DETAILS then |
||
text = |
text = string.gsub(text, "([pbm])(G *[aEei])(@?)", |
||
function(primary, _, epenth) |
function(primary, _, epenth) |
||
if epenth == "" then |
if epenth == "" then |
||
Line 1,285: | Line 1,268: | ||
) |
) |
||
else |
else |
||
text = |
text = string.gsub(text, "([pbm])G( *[aEei])(@?)", subst) |
||
end |
end |
||
text = |
text = string.gsub(text, "([kgnNrl])w( *[aEeiAV7M])(@?)", subst) |
||
-- Remove [w] off-glides after certain consonants |
-- Remove [w] off-glides after certain consonants |
||
-- when they occur after rounded vowels. |
-- when they occur after rounded vowels. |
||
text = |
text = string.gsub(text, "([QOou] *[nrl]? *[nrl])Hw", "%1w") |
||
text = |
text = string.gsub(text, "([QOou] *[kgN]? *N)Hw( *M)", "%1w%2") |
||
end |
end |
||
Line 1,298: | Line 1,281: | ||
if IMPLIED_SECONDARIES then |
if IMPLIED_SECONDARIES then |
||
if PARENTHETICAL_EPENTHESIS then |
if PARENTHETICAL_EPENTHESIS then |
||
text = |
text = string.gsub(text, "(.@)", "/%1") |
||
if diphthongs then |
if diphthongs then |
||
text = |
text = string.gsub(text, "(.=.@)", "/%1") |
||
end |
end |
||
end |
end |
||
if PARENTHETICAL_EPENTHESIS then |
if PARENTHETICAL_EPENTHESIS then |
||
text = |
text = string.gsub(text, "/", "") |
||
end |
end |
||
end |
end |
||
Line 1,311: | Line 1,294: | ||
if PARENTHETICAL_EPENTHESIS then |
if PARENTHETICAL_EPENTHESIS then |
||
if not diphthongs then |
if not diphthongs then |
||
text = |
text = string.gsub(text, "(.)@("..V..")", "%1^%2") |
||
end |
end |
||
text = |
text = string.gsub(text, "(.)@", "(%1)") |
||
text = |
text = string.gsub(text, "%)(=?)%(", "%1") |
||
end |
end |
||
-- Convert remaining word gaps to liaison. |
-- Convert remaining word gaps to liaison. |
||
text = |
text = string.gsub(text, "^ *", "") |
||
text = |
text = string.gsub(text, " *$", "") |
||
text = |
text = string.gsub(text, " +", false and "_" or "") |
||
text = |
text = string.gsub(text, ".[jGw@%^]?", PHONETIC_IPA) |
||
addUnique(outSeq, text) |
addUnique(outSeq, text) |
||
Line 1,338: | Line 1,321: | ||
-- For other values, list both possible dialect reflexes where applicable. |
-- For other values, list both possible dialect reflexes where applicable. |
||
local dialect = args and args.dialect and |
local dialect = args and args.dialect and |
||
mw.ustring.lower(mw.text.trim(args.dialect)) or "" |
|||
if dialect == "rālik" then |
if dialect == "rālik" then |
||
dialect = "ralik" |
dialect = "ralik" |
||
Line 1,351: | Line 1,334: | ||
-- Real-world pronunciation said to vary by sociological factors, |
-- Real-world pronunciation said to vary by sociological factors, |
||
-- but all realizations may occur in free variation. |
-- but all realizations may occur in free variation. |
||
local modeJ = splitChars(args and args.J and |
local modeJ = splitChars(args and args.J and string.lower(args.J) or "tst") |
||
local initialJ = PHONETIC_ARG_J[modeJ[1] or ""] or "t" |
local initialJ = PHONETIC_ARG_J[modeJ[1] or ""] or "t" |
||
local medialJ = PHONETIC_ARG_J[modeJ[2] or ""] or "s" |
local medialJ = PHONETIC_ARG_J[modeJ[2] or ""] or "s" |
||
Line 1,380: | Line 1,363: | ||
for _, str in pairs(inSeq) do |
for _, str in pairs(inSeq) do |
||
str = |
str = string.gsub(str, S, " ") |
||
str = |
str = string.gsub(str, "^ *", "") |
||
str = |
str = string.gsub(str, " *$", "") |
||
local isRalik = dialect == "ralik" |
local isRalik = dialect == "ralik" |
||
if isRalik or dialect == "ratak" then |
if isRalik or dialect == "ratak" then |
||
Line 1,411: | Line 1,394: | ||
function export.bender(frame) |
function export.bender(frame) |
||
return |
return table.concat(toBender(parse(frame.args[1], frame.args)), ", ") |
||
end |
end |
||
Line 1,419: | Line 1,402: | ||
function export.parse(frame) |
function export.parse(frame) |
||
return |
return table.concat(parse(frame.args[1]), ", ") |
||
end |
end |
||
function export.phonemic(frame) |
function export.phonemic(frame) |
||
return |
return table.concat(toPhonemic(parse(frame.args[1])), ", ") |
||
end |
end |
||
function export.phonetic(frame) |
function export.phonetic(frame) |
||
return |
return table.concat(toPhonetic(parse(frame.args[1]), frame.args), ", ") |
||
end |
end |
||
Revision as of 10:11, 9 January 2020
-- This module is primarily maintained at:
-- https://en.wiktionary.org/wiki/Module:mh-pronunc
-- Please direct all technical queries and contributions there.
-- The version of this script on Wikipedia is only a mirror.
local IMPLIED_SECONDARIES = true
local MERGED_VOWELS = false
local PARENTHETICAL_EPENTHESIS = true
local PHONETIC_DETAILS = false
local W_OFF_GLIDES = true
local export = {}
local ASYLL = "̯"
local BREVE = "̆"
local CEDILLA = "̧"
local MACRON = "̄"
local TIE = "͡"
local TIE2 = "͜"
local C1_ = "pbtdSZszkgmnNrlyYhH_"
local C1 = "["..C1_.."]"
local C = ".[jGw]"
local V_ = "aEeiAV7MQOou"
local V = "["..V_.."]"
local VI_ = V_.."I"
local VI = "["..VI_.."]"
local S = "[%s%-]+"
local UTF8_CHAR = "[%z\1-\127\194-\244][\128-\191]*"
local EMPTY = {}
local function addUnique(seq, value)
for _, value2 in pairs(seq) do
if value == value2 then
return
end
end
seq[#seq + 1] = value
end
local function assign(target, ...)
local args = { ... }
for _, source in pairs(args) do
if type(source) == "table" then
for key, value in pairs(source) do
target[key] = value
end
end
end
return target
end
local function fastTrim(text)
if text == "" or text == " " then
return ""
else
local atLeft = string.byte(text) == 32
local atRight = string.byte(text, -1) == 32
if atLeft then
if atRight then
return string.sub(text, 2, -2)
else
return string.sub(text, 2)
end
elseif atRight then
return string.sub(text, 1, -2)
else
return text
end
end
end
local function parseBoolean(text)
if type(text) == "string" then
text = string.gsub(text, "[^0-9A-Za-z]", "")
if text ~= "" and text ~= "0" and string.lower(text) ~= "false" then
return true
end
end
return false
end
local function splitChars(text, pattern, chars, shorten)
chars = chars or {}
local index = 1
for ch in string.gmatch(text, pattern or UTF8_CHAR) do
chars[index] = ch
index = index + 1
end
if index <= #chars then
if shorten then
table.remove(chars, index)
else
repeat
chars[index] = nil
index = index + 1
until index > #chars
end
end
return chars
end
local function string_gsub2(text, pattern, subst)
return string.gsub(string.gsub(text, pattern, subst), pattern, subst)
end
local function tableGet(value, key1, key2, key3)
if type(value) ~= "table" or key1 == nil then
return value
end
value = value[key1]
if key2 == nil then
return value
end
if type(value) ~= "table" then
return nil
end
value = value[key2]
if key3 == nil then
return value
end
if type(value) ~= "table" then
return nil
end
return value[key3]
end
local function ZTBL(text, sep)
local tbl = {}
for key in mw.text.gsplit(text, sep or " ") do
tbl[key] = true
end
return tbl
end
local PARSE_PSEUDO_GLIDE
local PARSE_C_CH_CW
local PARSE_REMAINING
local function parse(code)
local outSeq = {}
code = mw.ustring.gsub(code, "%s+", " ")
code = string.lower(code)
for text in mw.text.gsplit(code, " *,[ ,]*") do
text = fastTrim(text)
if text ~= "" then
local temp = string.gsub(text, "[abdeghijklmnprtwy_&'%- ]", "")
if temp ~= "" then
error("'"..code.."' contains unsupported characters: "..temp)
end
-- Recognize "y_", "h_", "w_", "_y", "_h", "_w" as pseudo-glides.
PARSE_PSEUDO_GLIDE = PARSE_PSEUDO_GLIDE or {
["y"] = "0", ["h"] = "0h", ["w"] = "0w"
}
text = string.gsub(text, "_*([hwy])_+", PARSE_PSEUDO_GLIDE)
text = string.gsub(text, "_+([hwy])", PARSE_PSEUDO_GLIDE)
if string.find(text, "_") then
error("contains misplaced underscores: "..code)
end
-- a plain {i} protected from dialect-specific reflexes
text = string.gsub(text, "'i", "I")
-- "yi'y" and "'yiy" sequences
text = string.gsub(text, "('?)yi('*)y", function(aposA, aposB)
if aposA ~= "" then
-- "dwelling upon" i
return "Z"
elseif aposB ~= "" then
-- "passing over lightly" i
return "z"
end
end)
-- Convert multigraphs to pseudo-X-SAMPA format.
PARSE_C_CH_CW = PARSE_C_CH_CW or {
["k"] = "kG",
["kh"] = "kGh", -- N\A
["kw"] = "kW",
["l"] = "lJ",
["lh"] = "lG",
["lw"] = "lW",
["m"] = "mJ",
["mh"] = "mG",
["mw"] = "mJw", -- N\A
["n"] = "nJ",
["nh"] = "nG",
["nw"] = "nW",
["ng"] = "NG",
["ngh"] = "NGh", -- N\A
["ngw"] = "NW",
["r"] = "rG",
["rh"] = "rGh", -- N\A
["rw"] = "rW",
["0"] = "_J",
["0h"] = "_G",
["0w"] = "_W"
}
text = string.gsub(text, "[klmnr0]g?[hw]?", PARSE_C_CH_CW)
if string.find(text, "g") then
error("contains g that is not part of ng: "..code)
end
-- Convert remaining sequences to pseudo-X-SAMPA format.
PARSE_REMAINING = PARSE_REMAINING or {
["b"] = "pG",
["d"] = "rj",
["e"] = "E",
["&"] = "e",
["h"] = "hG",
["j"] = "tj",
["J"] = "j",
["p"] = "pj",
["t"] = "tG",
["w"] = "hw",
["W"] = "w",
["y"] = "hj",
["z"] = "yj",
["Z"] = "Yj",
["'"] = ""
}
text = string.gsub(text, ".", PARSE_REMAINING)
-- Enforce CVC, CVCVC, CVCCVC, etc. phonotactics,
-- but allow VC, CV at affix boundaries
-- where a vowel may link to another morpheme's consonant.
temp = string.gsub(text, "[%s%-]+", "")
if string.find(temp, "_..[jGw]") or
string.find(temp, ".[jGw]_.")
then
error("pseudo-glides may not neighbor a consonant")
end
if string.find(temp, VI.."_."..VI) then
error("pseudo-glides may only be at the beginning or end"..code)
end
if string.find(temp, VI..VI) then
error("vowels must be separated by a consonant: "..code)
end
if string.find(temp, ".[jGw].[jGw].[jGw]") then
error("each consonant cluster is limited to two: "..code)
end
if string.find(temp, ".[jGw].[jGw]$") then
error("may not end with a consonant cluster: "..code)
end
string.gsub(temp, "^(.[jGw])(.[jGw])", function(consonX, consonY)
if consonX ~= consonY then
error("may only begin with single or geminated consonant: "
..code)
end
end)
if text ~= "" then
addUnique(outSeq, text)
end
end
end
return outSeq
end
local BENDER_1968 = {
["pj"] = "p", ["pG"] = "b",
["tj"] = "j", ["tG"] = "t",
["kG"] = "k", ["kw"] = "q",
["mj"] = "m", ["mG"] = "ṁ",
["nj"] = "n", ["nG"] = "ṅ", ["nw"] = "n̈",
["NG"] = "g", ["Nw"] = "g̈",
["rj"] = "d", ["rG"] = "r", ["rw"] = "r̈",
["lj"] = "l", ["lG"] = "ł", ["lw"] = "l̈",
["yj"] = "yi'y",
["Yj"] = "'yiy",
["hj"] = "y", ["hG"] = "h", ["hw"] = "w",
["_j"] = "", ["_G"] = "", ["_w"] = "",
["a"] = "a",
["E"] = "e",
["e"] = "&",
["i"] = "i",
["I"] = "i"
}
local BENDER_MED = assign({}, BENDER_1968, {
["mG"] = "m̧",
["nG"] = "ņ",
["nw"] = "ņ°",
["Nw"] = "g°",
["rw"] = "r°",
["lG"] = "ļ",
["lw"] = "ļ°",
["e"] = "ȩ"
})
local BENDER_MOD = assign({}, BENDER_MED, {
["kw"] = "kʷ",
["mG"] = "ṃ",
["nG"] = "ṇ",
["nw"] = "ṇʷ",
["Nw"] = "gʷ",
["rw"] = "rʷ",
["lG"] = "ḷ",
["lw"] = "ḷʷ",
["e"] = "ẹ"
})
local BENDER_DEFAULT = assign({}, BENDER_MOD, {
["mG"] = "m̧",
["nG"] = "ņ",
["nw"] = "ņʷ",
["lG"] = "ļ",
["lw"] = "ļʷ",
["e"] = "ȩ"
})
local BENDER_MAPS = {
["1968"] = BENDER_1968,
["med"] = BENDER_MED,
["mod"] = BENDER_MOD
}
local function toBender(inSeq, args)
-- "1968" is from "Marshallese Phonology" (1968 by Byron W. Bender).
-- "med" is from the Marshallese-English Dictionary (1976).
-- "mod" is from the Marshallese-English Online Dictionary.
-- "default" is the same as "mod" but with cedillas.
local version = args and args.version
local map = BENDER_MAPS[
type(version) == "string" and string.lower(version) or ""
] or BENDER_DEFAULT
local outSeq = {}
for _, text in pairs(inSeq) do
text = string.gsub(text, ".[jGw]?", map)
addUnique(outSeq, text)
end
return outSeq
end
local TO_MOD = {
["Ȩ"] = "Ẹ", ["ȩ"] = "ẹ",
["Ļ"] = "Ḷ", ["ļ"] = "ḷ",
["M̧"] = "Ṃ", ["m̧"] = "ṃ",
["Ņ"] = "Ṇ", ["ņ"] = "ṇ",
["N̄"] = "Ñ", ["n̄"] = "ñ",
["O̧"] = "Ọ", ["o̧"] = "ọ"
}
local function toMOD(text)
text = mw.ustring.gsub(text, ".["..CEDILLA..MACRON.."]?", TO_MOD)
return text
end
local PHONEMIC_MAP = {
["pj"] = "pʲ", ["pG"] = "pˠ",
["tj"] = "tʲ", ["tG"] = "tˠ",
["kG"] = "k", ["kw"] = "kʷ",
["mj"] = "mʲ", ["mG"] = "mˠ",
["nj"] = "nʲ", ["nG"] = "nˠ", ["nw"] = "nʷ",
["NG"] = "ŋ", ["Nw"] = "ŋʷ",
["rj"] = "rʲ", ["rG"] = "rˠ", ["rw"] = "rʷ",
["lj"] = "lʲ", ["lG"] = "lˠ", ["lw"] = "lʷ",
["hj"] = "j", ["hG"] = "ɰ", ["hw"] = "w",
["_j"] = "", ["_G"] = "", ["_w"] = "",
["a"] = "æ",
["E"] = "ɛ",
["e"] = "e",
["i"] = "i",
["I"] = "i"
}
if false then
assign(PHONEMIC_MAP, {
["a"] = "ɐ",
["E"] = "ə",
["e"] = "ɘ",
["i"] = "ɨ",
["I"] = "ɨ"
})
end
assign(PHONEMIC_MAP, {
["yj"] = PHONEMIC_MAP.hj..PHONEMIC_MAP.i..ASYLL..PHONEMIC_MAP.hj,
["Yj"] = PHONEMIC_MAP.hj..PHONEMIC_MAP.i..PHONEMIC_MAP.hj..PHONEMIC_MAP.hj
})
local function toPhonemic(inSeq)
local outSeq = {}
for _, text in pairs(inSeq) do
text = string.gsub(text, ".[jGw]?", PHONEMIC_MAP)
addUnique(outSeq, text)
end
return outSeq
end
local VOWEL = { -- VOWELS[f1][f2]
{ "a", "A", "Q" },
{ "E", "V", "O" },
{ "e", "7", "o" },
{ "i", "M", "u" }
}
local F1 = {}
local F2_FRONT = 1
local F2_BACK = 2
local F2_ROUND = 3
local F2 = { ["j"] = F2_FRONT, ["G"] = F2_BACK, ["w"] = F2_ROUND }
local FRONT_VOWEL = {}
local BACK_VOWEL = {}
local ROUND_VOWEL = {}
for f1, row in pairs(VOWEL) do
local front = row[F2_FRONT]
local back = row[F2_BACK]
local round = row[F2_ROUND]
for f2, vowel in pairs(row) do
F1[vowel] = f1
F2[vowel] = f2
FRONT_VOWEL[vowel] = front
BACK_VOWEL[vowel] = back
ROUND_VOWEL[vowel] = round
end
end
local function maxF1(a, b)
if b then
return VOWEL[math.max(2, F1[a], F1[b])][F2_FRONT]
else
return VOWEL[math.max(2, F1[a])][F2_FRONT]
end
end
local function toPhoneticDialect(text, config, isRalik)
-- Morphemes can begin with geminated consonants, but spoken words cannot.
text = string.gsub(text, "^(.[jGw])( *)%1( *)("..VI..")",
function(conson, _, __, vowel)
if conson == "hG" then
if isRalik then
return "hG"..vowel.._.."hG"..__..vowel
else
return "hG".._..__..vowel
end
else
if isRalik then
return "hj"..maxF1(vowel)..conson.._..conson..__..vowel
else
return conson..maxF1(vowel).._..conson..__..vowel
end
end
end
)
-- Initial {yiyV-, yiwV-, wiwV-} sequences have special behavior.
-- To block this in the template argument, use "'i" instead of "i".
text = " "..text
text = string.gsub(text,
"([ jGw])( *)(h[jw])( *)i( *)(h[jw])( *)("..VI..")",
function(nonVowel, _, consonX, __, ___, consonY, ____, vowel)
if consonY == "hw" then
-- {yiwV-, wiwV-} sequences
if isRalik then
-- Rālik {wiwV-} becomes {yiwV-}.
consonX = "hj"
end
-- {[yw]iwV-} becomes {[yw]iwwV-} in both dialects.
return nonVowel.._..consonX..__..
"I"..___..consonY..____..consonY..vowel
elseif consonX == "hj" then
-- {yiyV-} sequences
if isRalik then
-- "dwelling upon" i
return nonVowel.._..__.."Yj"..___..____..vowel
else
-- "passing over lightly" i
return nonVowel.._..__.."yj"..___..____..vowel
end
end
end
)
text = string.sub(text, 2)
-- Restore protected {i}, we won't be checking for it anymore.
text = string.gsub(text, "I", "i")
return text
end
local IS_VOWEL = FRONT_VOWEL
local VOWEL_REFLEX
if true then
-- [f1]
local aEei = { "a", "E", "e", "i" }
local AEei = { "A", "E", "e", "i" }
local AV7i = { "A", "V", "7", "i" }
local AV7M = { "A", "V", "7", "M" }
local AV7u = { "A", "V", "7", "u" }
local AOou = { "A", "O", "o", "u" }
local QOou = { "Q", "O", "o", "u" }
-- [F2[secondaryR]][f1]
local _jv_X = { aEei, AEei, QOou }
local njv_X = { aEei, AV7i, QOou }
local hjvtX = { aEei, aEei, QOou }
local _Gv_X = { AV7i, AV7M, QOou }
local rGv_X = { AEei, AV7M, QOou } -- not currently used
local hGv_X = { AV7M, AV7M, AV7M }
local _wv_X = { AV7u, AOou, QOou }
local rwv_X = { AOou, AOou, QOou }
local hwv_X = { AV7M, AOou, QOou }
local hwvtX = { AV7M, AV7u, QOou }
-- [F2[secondaryL]][F2[secondaryR]][f1]
local _Xv__ = { _jv_X, _Gv_X, _wv_X }
local nXv__ = { njv_X, _Gv_X, hwv_X }
local rXv__ = { _jv_X, _Gv_X, rwv_X }
local hXv__ = { _jv_X, hGv_X, hwv_X }
local hXvt_ = { hjvtX, hGv_X, hwvtX }
local hXvr_ = { hjvtX, hGv_X, hwv_X }
-- [primaryR][F2[secondaryL]][F2[secondaryR]][f1]
local __vX_ = {
["p"] = _Xv__, ["t"] = _Xv__, ["k"] = _Xv__,
["m"] = _Xv__, ["n"] = _Xv__, ["N"] = _Xv__,
["r"] = _Xv__, ["l"] = _Xv__
}
local n_vX_ = {
["p"] = nXv__, ["t"] = nXv__, ["k"] = nXv__,
["m"] = nXv__, ["n"] = nXv__, ["N"] = nXv__,
["r"] = nXv__, ["l"] = nXv__
}
local r_vX_ = {
["p"] = rXv__, ["t"] = rXv__, ["k"] = rXv__,
["m"] = rXv__, ["n"] = rXv__, ["N"] = rXv__,
["r"] = rXv__, ["l"] = _Xv__
}
local h_vX_ = {
["p"] = hXv__, ["t"] = hXvt_, ["k"] = hXv__,
["m"] = hXv__, ["n"] = hXv__, ["N"] = hXv__,
["r"] = hXvr_, ["l"] = hXv__
}
-- [primaryL][primaryR][F2[secondaryL]][F2[secondaryR]][f1]
VOWEL_REFLEX = {
["p"] = __vX_, ["t"] = __vX_, ["k"] = __vX_,
["m"] = __vX_, ["n"] = n_vX_, ["N"] = n_vX_,
["r"] = r_vX_, ["l"] = n_vX_, ["h"] = h_vX_
}
end
local CONSON_REFLEX
if true then
local map = {
["t"] = { ["j"] = "T" },
["n"] = { ["j"] = "J" },
["r"] = { ["j"] = "R" },
["l"] = { ["j"] = "L" }
}
for primary in mw.text.gsplit("ptkmnNrl", "") do
local map2 = map[primary]
if not map2 then
map2 = {}
map[primary] = map2
end
map2["j"] = map2["j"] or primary
map2["G"] = map2["G"] or primary
map2["w"] = map2["w"] or primary
end
map["T"] = map["t"]
map["J"] = map["n"]
map["R"] = map["r"]
map["L"] = map["l"]
CONSON_REFLEX = map
end
local VOICED_PRIMARY =
{ ["p"]="b", ["t"]="d", ["T"]="D", ["S"]="Z", ["s"]="z", ["k"]="g" }
local VOICELESS_PRIMARY =
{ ["b"]="p", ["d"]="t", ["D"]="T", ["Z"]="S", ["z"]="s", ["g"]="k" }
local PHONETIC_IPA
if true then
local map = {
["p"] = "p",
["b"] = "b",
["B"] = "β̞",
["t"] = "t",
["d"] = "d",
["s"] = "s",
["z"] = "z",
["k"] = "k",
["g"] = "ɡ",
["m"] = "m",
["n"] = "n",
["N"] = "ŋ",
["r"] = "r",
["l"] = "l",
["HG"] = "ʔ",
["Hw"] = "w",
["_"] = "‿",
["j"] = "ʲ",
["G"] = "ˠ",
["w"] = "ʷ",
["a"] = "æ",
["E"] = "ɛ",
["e"] = "e",
["i"] = "i",
["A"] = "ɑ",
["V"] = "ʌ",
["7"] = "ɤ",
["M"] = "ɯ",
["Q"] = "ɒ",
["O"] = "ɔ",
["o"] = "o",
["u"] = "u",
["^"] = ASYLL,
["@"] = ASYLL,
["("] = "(",
[")"] = ")",
[":"] = "ː",
["="] = TIE2
}
if PHONETIC_DETAILS then
assign(map, {
["t"] = "t̪",
["T"] = "t̠",
["d"] = "d̪",
["D"] = "d̠",
["s"] = "s̠",
["z"] = "z̠",
["k"] = "k̠",
["g"] = "ɡ̠",
["n"] = "n̠",
["J"] = "n̪",
["N"] = "ŋ̠",
["r"] = "r̠",
["R"] = "r̪",
["l"] = "l̠",
["L"] = "l̪",
["a"] = "æ̝",
["E"] = "ɛ̝",
["E@"] = "e"..map["@"],
["E^"] = "e"..map["^"],
["Q"] = "ɒ̝",
["O"] = "ɔ̝",
["O@"] = "o"..map["@"],
["O^"] = "o"..map["^"]
})
end
map["T"] = map["T"] or map["t"]
map["D"] = map["D"] or map["d"]
map["S"] = map["S"] or (map["T"]..map["s"])
map["Z"] = map["Z"] or (map["D"]..map["z"])
map["kG"] = map["kG"] or map["k"]
map["gG"] = map["gG"] or map["g"]
map["J"] = map["J"] or map["n"]
map["NG"] = map["NG"] or map["N"]
map["R"] = map["R"] or map["r"]
map["L"] = map["L"] or map["l"]
map["Hj"] = map["Hj"] or map["i"]..map["^"]
local key
for primary in mw.text.gsplit("pbBtdTDSZszkgmnJNrRlL_", "") do
for secondary in mw.text.gsplit("jGw", "") do
key = primary..secondary
map[key] = map[key] or (map[primary]..map[secondary])
end
end
for vowel in mw.text.gsplit(V_, "") do
key = vowel.."@"
map[key] = map[key] or (map[vowel]..map["@"])
key = vowel.."^"
map[key] = map[key] or (map[vowel]..map["^"])
end
PHONETIC_IPA = map
end
local function toPhoneticRemainder(code, config, leftFlag, rightFlag)
local text = code
local chars, subst
local diphthongs = config.diphthongs
-- If the phrase begins or ends with a bare vowel
-- and no pseudo-glide, display phrase up to five times
-- with each of the different pseudo-glides and possible vowel reflexes.
if IS_VOWEL[string.sub(text, 1, 1)] then
text = "_j"..code
toPhoneticRemainder(text, config, false, rightFlag)
if not diphthongs then
toPhoneticRemainder(text, config, true, rightFlag)
end
text = "_G"..code
toPhoneticRemainder(text, config, false, rightFlag)
if not diphthongs then
toPhoneticRemainder(text, config, true, rightFlag)
end
text = "_w"..code
toPhoneticRemainder(text, config, false, rightFlag)
if not diphthongs then
toPhoneticRemainder(text, config, true, rightFlag)
end
return
end
if IS_VOWEL[string.sub(text, -1)] then
text = code.."_j"
toPhoneticRemainder(text, config, leftFlag, false)
if not diphthongs then
toPhoneticRemainder(text, config, leftFlag, true)
end
text = code.."_G"
toPhoneticRemainder(text, config, leftFlag, false)
if not diphthongs then
toPhoneticRemainder(text, config, leftFlag, true)
end
text = code.."_w"
toPhoneticRemainder(text, config, leftFlag, false)
if not diphthongs then
toPhoneticRemainder(text, config, leftFlag, true)
end
return
end
local initialJ = config.initialJ
local medialJ = config.medialJ
local finalJ = config.finalJ
local noHints = config.noHints
local outSeq = config.outSeq
local voice = config.voice
if initialJ == "x" or
medialJ == "x" or
finalJ == "x"
then
local subSeq = {}
config.outSeq = subSeq
if initialJ == "x" then
config.initialJ = "t"
end
if medialJ == "x" then
config.medialJ = "t"
end
if finalJ == "x" then
config.finalJ = "t"
end
toPhoneticRemainder(code, config)
if initialJ == "x" then
config.initialJ = "s"
end
if medialJ == "x" then
config.medialJ = "s"
end
if finalJ == "x" then
config.finalJ = "s"
end
toPhoneticRemainder(code, config)
addUnique(outSeq, table.concat(subSeq, " ~ "))
config.outSeq = outSeq
config.initialJ = initialJ
config.medialJ = medialJ
config.finalJ = finalJ
return
end
-- Glides always trigger epenthesis, even neighboring other glides.
if not diphthongs then
-- {ww} always causes the second glide to surface.
text = string.gsub(text, "([aEei])( *)hw( *)hw", "%1%2hw%1@%3Hw")
end
text = string.gsub(text, "([aEei])( *)hG( *.[jGw])", "%1%2hG%1@%3")
text = string.gsub(text, "(.[jGw])( *)hG( *)([aEei])", "%1%4@%2hG%3%4")
text = string.gsub(text, "([aEei])( *)h(.)( *.[jGw])", "%1%2h%3%1@%4")
text = string.gsub(text, "(.[jGw])( *)h(. *)([aEei])", "%1%4@%2h%3%4")
text = string.gsub(text, "(.[jGw])( *[yY].)", "%1i@%2")
-- Preserve these exceptionally stable clusters.
text = string.gsub(text, "l([jG] *)tG", "l%1|tG")
-- Unstable consonant clusters trigger epenthesis.
-- Liquids before coronal obstruents.
text = string.gsub(text, "([rl].)( *)t", "%1v%2t")
-- Nasals and liquids after coronal obstruents.
text = string.gsub(text, "t(.)( *[nrl])", "t%1v%2")
-- Heterorganic clusters.
-- Labial consonants neighboring coronal or dorsal consonants.
text = string.gsub(text, "([pm].)( *[tnrlkN])", "%1v%2")
-- Coronal consonants neighboring labial or dorsal consonants.
text = string.gsub(text, "([tnrl].)( *[pmkN])", "%1v%2")
-- Dorsal consonants neighboring labial or coronal consonants.
text = string.gsub(text, "([kN].)( *[pmtnrl])", "%1v%2")
-- Organic speech involves certain consonant cluster assimilations.
-- Forward assimilation of rounded consonants.
-- There is no rounded coronal obstruent.
text = string.gsub(text, "(w *[^t])[jG]", "%1w")
-- Backward assimilation of remaining secondary articulations.
text = string.gsub(text, "[jGw]( *.)([jGw])", "%2%1%2")
-- Backward nasal assimilation of primary articulations.
text = string.gsub(text, "[pkrl](. *)([mnN])", "%2%1%2")
-- No longer need to protect exceptionally stable consonant clusters.
text = string.gsub(text, "|", "")
-- Give a vowel height to all epenthetic vowels that still lack one.
text = string_gsub2(text, "(.)( *..)v( *.. *)(.)",
function(vowelL, consonL, consonR, vowelR)
return vowelL..consonL..maxF1(vowelL, vowelR).."@"..consonR..vowelR
end
)
-- Tag all vowels for next set of operations.
text = string.gsub(text, "([aEei])", "/%1")
-- There is no variation in the surface realizations of vowels
-- between two identical secondary articulations.
text = string_gsub2(text, "([jGw])( *)/([aEei])(@? *.)%1",
function(secondary, _, vowel, infix)
return secondary.._..VOWEL[F1[vowel]][F2[secondary]]..
infix..secondary
end
)
if diphthongs then
text = string_gsub2(text,
"(.)([jGw])( *)/([aEei])(@?)( *)(.)([jGw])",
function(
primaryL, secondaryL, _, vowel, epenth, __, primaryR, secondaryR
)
local f1 = F1[vowel]
return (
primaryL..secondaryL.._..
VOWEL[f1][F2[secondaryL]]..epenth.."="..
VOWEL[f1][F2[secondaryR]]..epenth..__..
primaryR..secondaryR
)
end
)
else
-- Vowels neighboring pseudo-glides.
subst = function(
primaryL, secondaryL, _, vowel, epenth,
__, primaryR, secondaryR, flag
)
local f2L = F2[secondaryL]
local f2R = F2[secondaryR]
local f2
if flag then
f2 = math.max(f2L, f2R)
else
f2 = math.min(f2L, f2R)
end
return (
primaryL..secondaryL.._..
VOWEL[F1[vowel]][f2]..epenth..__..
primaryR..secondaryR
)
end
text = string.gsub(text, "(_)([jGw])( *)/("..V..")(@?)( *)(.)([jGw])",
function(a, b, c, d, e, f, g, h)
return subst(a, b, c, d, e, f, g, h, leftFlag)
end
)
text = string.gsub(text, "(.)([jGw])( *)/("..V..")(@?)( *)(_)([jGw])",
function(a, b, c, d, e, f, g, h)
return subst(a, b, c, d, e, f, g, h, rightFlag)
end
)
-- Vowels between two non-glides have the most predictable reflexes.
text = string_gsub2(text,
"([ptkmnNrl])(.)( *)/([aEei])(@? *)([ptkmnNrl])(.)",
function(
primaryL, secondaryL, _, vowel, infix, primaryR, secondaryR
)
return primaryL..secondaryL.._..
VOWEL_REFLEX[primaryL][primaryR]
[F2[secondaryL]][F2[secondaryR]][F1[vowel]]..
infix..primaryR..secondaryR
end
)
-- Exceptionally for the single word "rej".
text = string.gsub(text, "^(rG *)([V7])( *tj)$",
function(prefix, vowel, suffix)
return prefix..FRONT_VOWEL[vowel]..suffix
end
)
-- Vowels always claim the secondary articulation
-- of a neighboring back unrounded glide.
text = string.gsub(text, "(hG *)/([aEei])", function(prefix, vowel)
return prefix..BACK_VOWEL[vowel]
end)
text = string.gsub(text, "/([aEei])(@? *hG)", function(vowel, suffix)
return BACK_VOWEL[vowel]..suffix
end)
-- Unless already claimed, epenthetic vowels after a glide
-- always claim the secondary articulation to the left.
text = string.gsub(text, "([hH])(.)( *)/([aEei])@",
function(primaryL, secondaryL, _, vowel)
return (
primaryL..secondaryL.._..
VOWEL[F1[vowel]][F2[secondaryL]].."@"
)
end
)
-- Unless already claimed, vowels before a glide
-- always claim the secondary articulation to the right.
text = string.gsub(text, "/([aEei])(@?)( *[hHyY])(.)",
function(vowel, epenth, primaryR, secondaryR)
return (
VOWEL[F1[vowel]][F2[secondaryR]]..epenth..
primaryR..secondaryR
)
end
)
-- For now, unless already claimed, vowels before a rounded consonant
-- claim the secondary articulation to the right.
text = string.gsub(text, "/([aEei])(@? *.w)", function(vowel, suffix)
return ROUND_VOWEL[vowel]..suffix
end)
-- For now, unless already claimed, remaining vowels
-- claim the secondary articulation to the left.
text = string.gsub(text, "([jGw])( *)/([aEei])",
function(secondaryL, _, vowel)
return secondaryL.._..VOWEL[F1[vowel]][F2[secondaryL]]
end
)
-- Vowels after {yi'y} and {'yiy}
-- claim the secondary articulation to the right.
subst = function(prefix, vowel, infix, secondaryR)
return prefix..VOWEL[F1[vowel]][F2[secondaryR]]..infix..secondaryR
end
text = string.gsub(text, "([yY]. *)([aEei])(@? *.)([jGw])", subst)
-- Change certain vowels in a special environment from round to front.
text = string_gsub2(text, "(hj *)([Oou])( *.w *"..V.." *h[jh])",
function(prefix, vowel, suffix)
return prefix..FRONT_VOWEL[vowel]..suffix
end
)
text = string.gsub(text, "(hj *)([Oou])( *)(.w)( *)("..V..")",
function(prefix, vowelL, _, conson, __, vowelR)
if conson ~= "hw" or F1[vowelL] ~= F1[vowelR] then
return prefix..FRONT_VOWEL[vowelL].._..conson..__..vowelR
end
end
)
text = string.gsub(text, "(hj *)([Oou])( *.w *.w)",
function(prefix, vowel, suffix)
return prefix..FRONT_VOWEL[vowel]..suffix
end
)
text = string.gsub(text, "(a@? *hj *)Q( *.w *"..V..")", "%1a%2")
text = string.gsub(text, "(a@? *hj *)Q( *.w *.w)", "%1a%2")
-- Tag certain glide-vowel-non-glide sequences for special reflexes.
text = string.gsub(text, "(H[jw] *)("..V.." *[ptkmnNrl])", "%1/%2")
text = string.gsub(text, "^ *(h[jw] *)("..V.." *[ptkmnNrl])", "%1/%2")
text = string.gsub(text, "(@ *h[jw] *)("..V.." *[ptkmnNrl])", "%1/%2")
text = string.gsub(text,
"([EeiAV7MOou]@? *h[jw] *)([aAQ] *[ptkmnNrl])", "%1/%2")
text = string.gsub(text,
"(hj *[aEei]@? *hw *)("..V.." *[ptkmnNrl])", "%1/%2")
-- Untag certain sequences, exempting them from special reflexes.
text = string.gsub(text, "(hj *)/([aEei] *[knNrl]w)", "%1%2")
-- Special reflexes.
text = string.gsub(text, "([jw])( *)/("..V..")( *)(.)([jGw])",
function(secondaryL, _, vowel, __, primaryR, secondaryR)
return secondaryL.._..
VOWEL_REFLEX["h"][primaryR]
[F2[secondaryL]][F2[secondaryR]][F1[vowel]]..
__..primaryR..secondaryR
end
)
end
-- Temporarily cancel epenthetic {i} neighboring {yi'y}.
text = string.gsub(text, "i@( *yj)", "%1")
-- {yi'y} neighboring {i} may now be demoted to {y}.
text = string.gsub(text, "([iMu]@? *)yj", "%1hj")
text = string.gsub(text, "yj( *[iMu])", "hj%1")
-- {'yiy} may now be demoted everywhere.
text = string.gsub(text, "(i@ *)Yj", "%1hjihj")
text = string.gsub(text, "Yj", "hjihji@hj")
-- For the purposes of this template,
-- surface all glides pronounced in isolation.
text = string.gsub(text, "^ *h(.) *$", "H%1")
if not diphthongs then
-- Opportunistically front this vowel.
text = string.gsub(text, "(hj *)A( *[kN]G *[kN]?G? *"..V..")", "%1a%2")
-- Surface certain glides.
text = string.gsub(text, "^ *h(w *[Oou])", "H%1")
text = string.gsub(text, "h(w *[aEeiAV7M])", "H%1")
text = string.gsub(text, "^ *h(j *[AV7MQOou])", "H%1")
text = string.gsub(text, "([ptkmnNrl]..@ *)h(w *[Oou])", "%1H%2")
text = string.gsub(text,
"([ptkmnNrl].).@( *)h(j *[AV7MQOou])", "%1%2H%3")
text = string.gsub(text, "([AV7MQOou]@? *)h(j *[AV7MQOou])", "%1H%2")
text = string.gsub(text, "([aEeiAV7M])(@? *)hw( *)([QOou])",
function(vowelL, infix, _, vowelR)
if F1[vowelL] > F1[vowelR] then
return vowelL..infix.."Hw".._..vowelR
end
end
)
text = string.gsub(text, "([AV7MQOou])(@? *)hj( *)([aEei])",
function(vowelL, infix, _, vowelR)
if F1[vowelL] > F1[vowelR] then
return vowelL..infix.."Hj".._..vowelR
end
end
)
text = string.gsub(text, "([aEei])(@? *)hj( *)([AV7MQOou])",
function(vowelL, infix, _, vowelR)
if F1[vowelL] < F1[vowelR] then
return vowelL..infix.."Hj".._..vowelR
end
end
)
text = string.gsub(text, "("..V..")( *)h([jw]) *$",
function(vowel, _, secondary)
if F2[vowel] ~= F2[secondary] then
return vowel.._.."H"..secondary
end
end
)
-- Protect word-final epenthetic vowels after non-glides
-- from the next operation.
text = string.gsub(text, "([ptkmnNrl][jGw]"..V..")(@ )", "%1/%2")
-- De-epenthesize vowels if they still neighbor unsurfaced glides.
text = string.gsub(text, "("..V..")@( *h.)", "%1%2")
text = string.gsub(text, "(h. *"..V..")@", "%1")
-- Adjust F1 of currently remaining epenthetic vowels.
text = string_gsub2(text,
"("..V..")( *.[jGw])(.)@( *.[jGw] *)("..V..")",
function(vowelL, infixL, vowel, infixR, vowelR)
return vowelL..infixL..
VOWEL[F1[maxF1(vowelL, vowelR)]][F2[vowel]].."/@"..
infixR..vowelR
end
)
text = string.gsub(text, "/", "")
end
-- Delete all remaining unsurfaced glides.
text = string.gsub(text, "h.", "")
-- Surface realization for {yi'y}.
text = string.gsub(text, "yj", "i^")
if not diphthongs then
-- Realization for surfaced {y}.
text = string_gsub2(text, "("..V.."?)(@?)( *)Hj( *)("..V.."?)",
function(vowelL, epenthL, _, __, vowelR)
if vowelL ~= "" then
if vowelR ~= "" then
return (
vowelL..epenthL.._..
maxF1(vowelL, vowelR).."^"..__..vowelR
)
else
return vowelL.._..epenthL..maxF1(vowelL).."^"..__
end
else
if vowelR ~= "" then
return _..maxF1(vowelR).."^"..__..vowelR
else
return _.."i^"..__
end
end
end
)
-- Turn this surfaced glide into an epenthetic vowel.
text = string.gsub(text, "([ptkmnNrl]. *[aEei])%^( *"..V..")", "%1@%2")
end
if MERGED_VOWELS then
text = string.gsub(text, "[EO]", function(vowel)
return VOWEL[F1[vowel] + 1][F2[vowel]]
end)
end
chars = splitChars(text, ".")
if not diphthongs then
-- Geminate long vowels.
local index = #chars
repeat
local ch = chars[index]
local index2 = index - 1
if IS_VOWEL[ch] then
local ch2 = chars[index + 1]
if ch2 ~= "@" and
ch2 ~= "^" and
chars[index2] == ch
then
chars[index] = ":"
end
end
index = index2
until index == 1
text = table.concat(chars, "")
end
-- Tweak remaining consonants, using offsets as a guide.
text = string.gsub(text, "()(.)([jGw])( *)([ptkmnNrl]?)([jGw]?)()",
function(
offsetL, primaryL, secondaryL, _, primaryR, secondaryR, offsetR
)
local isInitial = offsetL == 1
local isFinal = offsetR == #chars + 1
if primaryL == "H" or
primaryL == "y"
then
return primaryL..secondaryL.._
end
if primaryL == "_" then
if noHints then
-- Delete pseudo-glide.
return _
end
if isInitial then
-- Show secondary articulation to the left, not the right.
return secondaryL..primaryL.._
end
return primaryL..secondaryL.._
end
local geminated = primaryL == primaryR
if primaryL ~= "t" and primaryR == "t" then
-- /tʲ/ is palatalized postalveolar.
-- /tˠ/ is velarized dental.
-- /nʲ, rʲ, lʲ/ are palatalized dental.
-- /nˠ, rˠ, lˠ/ are velarized postalveolar.
-- Regressively assimilate primary dental or postalveolar.
-- None of this will be visible unless PHONETIC_DETAILS == true.
primaryL = CONSON_REFLEX[primaryL]
[secondaryL == "j" and "G" or "j"]
primaryR = CONSON_REFLEX[primaryR][secondaryR]
else
primaryL = CONSON_REFLEX[primaryL][secondaryL]
if primaryR ~= "" then
primaryR = CONSON_REFLEX[primaryR][secondaryR]
end
end
if primaryR == "T" then
if primaryL == "T" then
primaryL = finalJ
primaryR = initialJ
if primaryL == "S" and
primaryR ~= "s"
then
primaryL = "T"
elseif
primaryL == "T" and
primaryR == "s" and
medialJ == "S"
then
primaryL = "S"
end
else
primaryR = medialJ
end
elseif primaryL == "T" then
if isInitial then
primaryL = initialJ
elseif isFinal then
primaryL = finalJ
else
primaryL = medialJ
end
end
if primaryR ~= "" then
-- Consonant cluster.
-- For some reason, the {t} in {lt} and {ļt} is voiceless.
if not geminated and
primaryL ~= "l" and
primaryL ~= "L"
then
primaryL = VOICED_PRIMARY[primaryL] or primaryL
primaryR = VOICED_PRIMARY[primaryR] or primaryR
end
-- Display secondary articulation only once for the cluster.
secondaryL = ""
elseif
not isInitial and
not isFinal
then
-- Medial single consonant.
primaryL = VOICED_PRIMARY[primaryL] or primaryL
end
if voice == false then
primaryL = VOICELESS_PRIMARY[primaryL] or primaryL
primaryR = VOICELESS_PRIMARY[primaryR] or primaryR
elseif voice == true then
primaryL = VOICED_PRIMARY[primaryL] or primaryL
primaryR = VOICED_PRIMARY[primaryR] or primaryR
end
return primaryL..secondaryL.._..primaryR..secondaryR
end
)
if not diphthongs then
-- Elegantly connect long and epenthetic vowels across word gaps.
text = string.gsub(text, "(["..V_..":]): +", "%1 : ")
text = string.gsub(text, "("..V..") +%1([^%^])", "%1 :%2")
text = string.gsub(text, "("..V..") +%1$", "%1 :")
text = string.gsub(text, "("..V..")@ +%1", " %1 :")
text = string.gsub(text, "("..V.."@) +", " %1 ")
if W_OFF_GLIDES then
-- Add [w] off-glides after certain consonants.
subst = function(primary, _, epenth)
if epenth == "" then
return primary.."Hw".._
end
end
if false and PHONETIC_DETAILS then
text = string.gsub(text, "([pbm])(G *[aEei])(@?)",
function(primary, _, epenth)
if epenth == "" then
return primary.."B".._
end
end
)
else
text = string.gsub(text, "([pbm])G( *[aEei])(@?)", subst)
end
text = string.gsub(text, "([kgnNrl])w( *[aEeiAV7M])(@?)", subst)
-- Remove [w] off-glides after certain consonants
-- when they occur after rounded vowels.
text = string.gsub(text, "([QOou] *[nrl]? *[nrl])Hw", "%1w")
text = string.gsub(text, "([QOou] *[kgN]? *N)Hw( *M)", "%1w%2")
end
end
if IMPLIED_SECONDARIES then
if PARENTHETICAL_EPENTHESIS then
text = string.gsub(text, "(.@)", "/%1")
if diphthongs then
text = string.gsub(text, "(.=.@)", "/%1")
end
end
if PARENTHETICAL_EPENTHESIS then
text = string.gsub(text, "/", "")
end
end
if PARENTHETICAL_EPENTHESIS then
if not diphthongs then
text = string.gsub(text, "(.)@("..V..")", "%1^%2")
end
text = string.gsub(text, "(.)@", "(%1)")
text = string.gsub(text, "%)(=?)%(", "%1")
end
-- Convert remaining word gaps to liaison.
text = string.gsub(text, "^ *", "")
text = string.gsub(text, " *$", "")
text = string.gsub(text, " +", false and "_" or "")
text = string.gsub(text, ".[jGw@%^]?", PHONETIC_IPA)
addUnique(outSeq, text)
end
local PHONETIC_ARG_J = { ["t"] = "T", ["c"] = "S", ["s"] = "s", ["x"] = "x" }
local function toPhonetic(inSeq, args)
-- Recognize "ralik" for Rālik Chain (western dialect).
-- Recognize "ratak" for Ratak Chain (eastern dialect).
-- For other values, list both possible dialect reflexes where applicable.
local dialect = args and args.dialect and
mw.ustring.lower(mw.text.trim(args.dialect)) or ""
if dialect == "rālik" then
dialect = "ralik"
end
-- If enabled, display full diphthong allophones for short vowels.
local diphthongs = not not (args and parseBoolean(args.diphthongs))
-- Argument "J" has format like "tst".
-- Recognized letters are "t" = plosive, "c" = affricate, "s" = fricative.
-- Letters for initial, medial and final respectively.
-- Real-world pronunciation said to vary by sociological factors,
-- but all realizations may occur in free variation.
local modeJ = splitChars(args and args.J and string.lower(args.J) or "tst")
local initialJ = PHONETIC_ARG_J[modeJ[1] or ""] or "t"
local medialJ = PHONETIC_ARG_J[modeJ[2] or ""] or "s"
local finalJ = PHONETIC_ARG_J[modeJ[3] or ""] or initialJ
-- If enabled, do not display pseudo-glide hints at all.
local noHints = not not (args and parseBoolean(args.nohints))
-- "false" will display all obstruent allophones as voiceless.
-- "true" will display all obstruent allophones as voiced.
-- Empty string or absent by default will display
-- only medial obstruent allophones as semi-voiced.
local voice = args and args.voice or ""
if voice ~= "" then
voice = parseBoolean(voice)
end
local outSeq = {}
local config = {
["outSeq"] = outSeq,
["diphthongs"] = diphthongs,
["initialJ"] = initialJ,
["medialJ"] = medialJ,
["finalJ"] = finalJ,
["noHints"] = noHints,
["voice"] = voice
}
for _, str in pairs(inSeq) do
str = string.gsub(str, S, " ")
str = string.gsub(str, "^ *", "")
str = string.gsub(str, " *$", "")
local isRalik = dialect == "ralik"
if isRalik or dialect == "ratak" then
str = toPhoneticDialect(str, config, isRalik)
toPhoneticRemainder(str, config)
else
local ralik = toPhoneticDialect(str, config, true)
local ratak = toPhoneticDialect(str, config, false)
-- If both dialect reflexes are the same, display only one of them.
toPhoneticRemainder(ralik, config)
if ralik ~= ratak then
toPhoneticRemainder(ratak, config)
end
end
end
return outSeq
end
export._parse = parse
export._toBender = toBender
export._toMOD = toMOD
export._toPhonemic = toPhonemic
export._toPhonetic = toPhonetic
function export.bender(frame)
return table.concat(toBender(parse(frame.args[1], frame.args)), ", ")
end
function export.MOD(frame)
return toMOD(frame.args[1])
end
function export.parse(frame)
return table.concat(parse(frame.args[1]), ", ")
end
function export.phonemic(frame)
return table.concat(toPhonemic(parse(frame.args[1])), ", ")
end
function export.phonetic(frame)
return table.concat(toPhonetic(parse(frame.args[1]), frame.args), ", ")
end
function export.phoneticMED(frame)
return "DEPRECATED"
end
function export.phoneticChoi(frame)
return "DEPRECATED"
end
function export.phoneticWillson(frame)
return "DEPRECATED"
end
return export