Module:Chak-utilities: Difference between revisions
Jump to navigation
Jump to search
No edit summary |
No edit summary |
||
(40 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
export = {} | export = {} | ||
require("Module:Chak-utilities/data") | |||
local m_links = require("Module:links") | local m_links = require("Module:links") | ||
local m_utilities = require("Module:utilities") | local m_utilities = require("Module:utilities") | ||
local m_headword = require("Module:headword") | --local m_headword = require("Module:headword") | ||
local m_stru = require("Module:String utilities") | |||
local m_table = require("Module:Table") | |||
local lang = require("Module:Languages").getByCode("chak") | local lang = require("Module:Languages").getByCode("chak") | ||
function export.l(term, face, alt) | |||
return m_links.full_link( { term = term, lang = lang, alt = alt }, face ) | return m_links.full_link( { term = term, lang = lang, alt = alt }, face ) | ||
end | end | ||
function export.is_geminate(char) | function export.is_geminate(char) | ||
return char ~= nil and #char > 1 and #char < 4 and | return char ~= nil and #char > 1 and #char < 4 and mw.ustring.sub(char,1,1) == mw.ustring.sub(char,2,2) | ||
end | |||
function export.compress_consonants(str) | |||
str = str or '' | |||
replace = {{"ssh","ʃʃ"}, | |||
{"ddh","ðð"}, | |||
{"tth","θθ"}, | |||
{"cch","ʧʧ"}, | |||
{"sh","ʃ"}, | |||
{"dh","ð"}, | |||
{"th","θ"}, | |||
{"j","ʤ"}, | |||
{"ch","ʧ"}, | |||
{"'",""}, | |||
} | |||
for _,v in ipairs(replace) do | |||
str = mw.ustring.gsub(str,v[1],v[2]) | |||
end | |||
return str | |||
end | |||
function export.expand_consonants(str) | |||
replace = { | |||
{"s'h","sh"}, | |||
{"s'sh","sʃ"}, | |||
{"d'h","dh"}, | |||
{"d'dh","dð"}, | |||
{"t'h","th"}, | |||
{"t'th","tθ"}, | |||
{"ssh","ʃʃ"}, | |||
{"ddh","ðð"}, | |||
{"tth","θθ"}, | |||
{"cch","ʧʧ"}, | |||
{"sh","ʃ"}, | |||
{"dh","ð"}, | |||
{"th","θ"}, | |||
{"j","ʤ"}, | |||
{"ch","ʧ"}, | |||
} | |||
for _,v in ipairs(replace) do | |||
str = mw.ustring.gsub(str,v[2],v[1]) | |||
end | |||
return str | |||
end | |||
function export.parse_consonants(str) | |||
str = export.compress_consonants(str) | |||
str = mw.ustring.gsub(str,".",'\0%0%0') | |||
str = mw.ustring.gsub(str,"(.)%z%1","%1") | |||
str = mw.ustring.gsub(str,"%z.","-") | |||
str = mw.ustring.gsub(str,"^.","") | |||
str = mw.ustring.gsub(str,"%-'%-","-") | |||
str = export.expand_consonants(str) | |||
return str | |||
end | |||
function export.parse_root(root_str,expandFinal) | |||
if expandFinal == nil then | |||
expandFinal = false | |||
end | |||
stems = {} | |||
local root = m_stru.capturing_split(root_str,"(["..VOWELS.."]+)") | |||
if root[1] == '' then | |||
table.remove(root,1) | |||
end | |||
if expandFinal then | |||
local tail = root[#root] | |||
local new_tail = export.parse_consonants(tail) | |||
root[#root] = new_tail | |||
end | |||
mw.logObject(root) | |||
return root | |||
end | |||
function export.condense_vowels(vowel_str) | |||
local output = vowel_str or '' | |||
if #output > 3 or #output < 2 then | |||
return output | |||
elseif #output == 3 then | |||
output = string.sub(output,1,2) | |||
end | |||
output = string.gsub(output,"%S%S",{["aw"]="o", | |||
["au"]="o", | |||
["ao"]="o", | |||
["ai"]="e", | |||
["ay"]="e", | |||
["ae"]="e", | |||
["wu"]="uu", | |||
["uw"]="uu", | |||
["yi"]="ii", | |||
["iy"]="ii", | |||
}) | |||
return output | |||
end | end | ||
function export.extract_root(root_str) | function export.extract_root(root_str) | ||
roots = { | roots = { | ||
Line 30: | Line 144: | ||
if root_str then | if root_str then | ||
root_str = string.gsub(root_str,'%s','-') | root_str = string.gsub(root_str,'%s','-') | ||
parts = mw.text.split(root_str,'-') | parts = mw.text.split(root_str,'-') | ||
Line 39: | Line 152: | ||
roots['coda'] = parts[3] | roots['coda'] = parts[3] | ||
elseif #parts == 2 then | elseif #parts == 2 then | ||
if string.find(parts[1],"^[ | if string.find(parts[1],"^["..VOWELS.."]") then | ||
roots['onset'] = '' | roots['onset'] = '' | ||
roots['vowel'] = parts[1] | roots['vowel'] = parts[1] | ||
roots['coda'] = parts[2] | roots['coda'] = parts[2] | ||
elseif string.find(parts[2],"^[ | elseif string.find(parts[2],"^["..VOWELS.."]") then | ||
roots['onset'] = parts[1] | roots['onset'] = parts[1] | ||
roots['vowel'] = parts[2] | roots['vowel'] = parts[2] | ||
Line 54: | Line 167: | ||
elseif #parts == 1 and parts[1] ~= '' then | elseif #parts == 1 and parts[1] ~= '' then | ||
local temp = root_str | local temp = root_str | ||
s,e = string.find(temp,"[".. | s,e = string.find(temp,"["..VOWELS.."]+") | ||
if s then | if s then | ||
roots['vowel'] = string.sub(temp,s,e) | roots['vowel'] = string.sub(temp,s,e) | ||
end | end | ||
s,e = string.find(temp,"^[^".. | s,e = string.find(temp,"^[^"..VOWELS.."]+") | ||
if s then | if s then | ||
roots['onset'] = string.sub(temp,s,e) | roots['onset'] = string.sub(temp,s,e) | ||
temp = string.gsub(temp,roots['onset'],'',1) | temp = string.gsub(temp,roots['onset'],'',1) | ||
end | end | ||
s,e = string.find(temp,"[^".. | s,e = string.find(temp,"[^"..VOWELS.."]+") | ||
if s then | if s then | ||
roots['coda'] = string.sub(temp,s,e) | roots['coda'] = string.sub(temp,s,e) | ||
Line 75: | Line 188: | ||
return roots | return roots | ||
end | end | ||
Line 104: | Line 211: | ||
function export.assimilate(str) | function export.assimilate(str) | ||
output = export.compress_consonants(str) | |||
output = str | |||
if str and str ~= '' then | if str and str ~= '' then | ||
--VvC assimilition | --Find and record location of hyphen, if any | ||
output = string.gsub(output,"([ | hyphen = mw.ustring.find(output,"-",1,true) or 0 | ||
output = mw.ustring.gsub(output,"-","") | |||
--VvC assimilition FIXME - should not happen if v is geminate. Disabling for now. | |||
-- output = string.gsub(output,"(["..VOWELS.."])v([^"..VOWELS.."])","%1~v%2"):gsub("(.)~v",{VvC) | |||
--Sibilant Assimilation | |||
output = mw.ustring.gsub(output,"(["..SIBILANTS.."])(["..SIBILANTS.."])","%2%2") | |||
--Nasal Assimilation | |||
output = mw.ustring.gsub(output,"m([tdszkgθðl])","n%1") | |||
output = mw.ustring.gsub(output,"n([bfv])","m%1") | |||
--General Devoicing | --General Devoicing | ||
output = | output = mw.ustring.gsub(output,"(["..DEVOICED.."])(["..VOICED.."])","%1~%2") | ||
output = | output = mw.ustring.gsub(output,"~(.)",DEVOICING) | ||
output = mw.ustring.gsub(output,"(["..VOICED.."])(["..DEVOICED.."])","~%1%2") | |||
output = mw.ustring.gsub(output,"~(.)",DEVOICING) | |||
--H devoicing | --H devoicing | ||
output = | output = mw.ustring.gsub(output,"h(["..VOICED.."])","h~%1") | ||
output = | output = mw.ustring.gsub(output,"~(.)",H_DEVOICING) | ||
output = | output = mw.ustring.gsub(output,"(["..VOICED.."])h","~%1h") | ||
output = mw.ustring.gsub(output,"~(.)",H_DEVOICING) | |||
output = mw.ustring.gsub(output,"hj","hʧ") | |||
--Misc | |||
output = mw.ustring.gsub(output,"qk","kk") | |||
output = mw.ustring.gsub(output,"kq","kk") | |||
output = mw.ustring.gsub(output,"ao","au") | |||
output = mw.ustring.gsub(output,"ae","ai") | |||
output = mw.ustring.gsub(output,"aw","au") | |||
output = mw.ustring.gsub(output,"ua","wa") | |||
--Add hyphen, if any | |||
if hyphen > 0 then | |||
output = mw.ustring.sub(output,1,hyphen-1)..'-'..mw.ustring.sub(output,hyphen) | |||
end | |||
end | end | ||
return output | return export.expand_consonants(output) | ||
end | |||
function export.prothetic(str) | |||
str = str or '' | |||
str2 = export.compress_consonants(str) | |||
char1 = mw.ustring.sub(str2,1,1) | |||
char2 = mw.ustring.sub(str2,2,2) | |||
local result = "" | |||
if mw.ustring.find(VOWELS,char1) or mw.ustring.find(VOWELS,char2) then | |||
result = str | |||
else | |||
local v = mw.ustring.match(str,"["..VOWELS.."]") or 'a' | |||
result = v..str | |||
end | |||
return result | |||
end | end | ||
Line 163: | Line 296: | ||
args["mod"] = "t" | args["mod"] = "t" | ||
end | end | ||
local link_text = export.l(args["root"], args["face"], args["root"] ) | |||
mw.getCurrentFrame():callParserFunction( '#vardefine', { 'chak-root', args["root"] } ) | |||
if args["mod"] then | if args["mod"] then | ||
link_text = link_text.." + | mod_str = export.parse_consonants(args["mod"]) | ||
mods = mw.text.split(mod_str,"-") | |||
for k in ipairs(mods) do | |||
link_text = link_text.." + "..export.l("-"..mods[k], args["face"], "-"..mods[k]) | |||
table.insert(categories, m_utilities.format_categories( { "Chakobsa terms with the modifier -" .. mods[k] }, lang) ) | |||
end | |||
mw.getCurrentFrame():callParserFunction( '#vardefine', { 'chak-mod', args["mod"] } ) | mw.getCurrentFrame():callParserFunction( '#vardefine', { 'chak-mod', args["mod"] } ) | ||
end | end | ||
table.insert(output, link_text) | table.insert(output, link_text) | ||
table.insert(categories, m_utilities.format_categories( { "Chakobsa terms belonging to the root " .. args["root"] }, lang) ) | table.insert(categories, m_utilities.format_categories( { "Chakobsa terms belonging to the root " .. args["root"]:gsub('-','') }, lang) ) | ||
if args["plain"] then | if args["plain"] then | ||
return args["root"].." + -"..args["mod"] | return args["root"]:gsub('-','').." + -"..args["mod"] | ||
elseif args["nocat"] then | elseif args["nocat"] then | ||
return table.concat(output) | return table.concat(output) |
Latest revision as of 16:18, 21 July 2024
- The following documentation is located at Module:Chak-utilities/documentation. [edit]
- Useful links: subpage list • transclusions • testcases • sandbox
Utilities for Chakobsa scripts. Implements {{chak-from-root}}
.
Exposed functions
l(term, face, alt)
- Creates a link to a Chakobsa lexeme - similar to
{{l|chak|term}}
is_geminate(char)
- returns true if the supplied 'character' is geminate
compress_consonants(str)
- Returns a 'compressed' version of the string where consonant digraphs are reduced to a single character and trigraph geminates are reduced to a geminate of a single character. Mostly for use with other functions for parsing.
expand_consonants(str)
- Returns an 'expanded' version of the string where reductions from
compress_consonants
are replaced with the original romanizations.
parse_consonants(str)
- Returns a string where the consonants have been split out, delimited by hyphens. Understands geminate consonants and considers them a 'single' consonant.
parse_root(root_str,expandFinal)
- Returns a table of parts of the root string is grouped by consonant and vowels. Example: "kkaalatg" > "kk","aa","l","a","tg". If expandFinal is true, returns the final part parsed out through
parse_consonants
The above example would be "kkaalatg" > "kk","aa","l","a","t-g". Used for inflection functions.
export = {}
require("Module:Chak-utilities/data")
local m_links = require("Module:links")
local m_utilities = require("Module:utilities")
--local m_headword = require("Module:headword")
local m_stru = require("Module:String utilities")
local m_table = require("Module:Table")
local lang = require("Module:Languages").getByCode("chak")
function export.l(term, face, alt)
return m_links.full_link( { term = term, lang = lang, alt = alt }, face )
end
function export.is_geminate(char)
return char ~= nil and #char > 1 and #char < 4 and mw.ustring.sub(char,1,1) == mw.ustring.sub(char,2,2)
end
function export.compress_consonants(str)
str = str or ''
replace = {{"ssh","ʃʃ"},
{"ddh","ðð"},
{"tth","θθ"},
{"cch","ʧʧ"},
{"sh","ʃ"},
{"dh","ð"},
{"th","θ"},
{"j","ʤ"},
{"ch","ʧ"},
{"'",""},
}
for _,v in ipairs(replace) do
str = mw.ustring.gsub(str,v[1],v[2])
end
return str
end
function export.expand_consonants(str)
replace = {
{"s'h","sh"},
{"s'sh","sʃ"},
{"d'h","dh"},
{"d'dh","dð"},
{"t'h","th"},
{"t'th","tθ"},
{"ssh","ʃʃ"},
{"ddh","ðð"},
{"tth","θθ"},
{"cch","ʧʧ"},
{"sh","ʃ"},
{"dh","ð"},
{"th","θ"},
{"j","ʤ"},
{"ch","ʧ"},
}
for _,v in ipairs(replace) do
str = mw.ustring.gsub(str,v[2],v[1])
end
return str
end
function export.parse_consonants(str)
str = export.compress_consonants(str)
str = mw.ustring.gsub(str,".",'\0%0%0')
str = mw.ustring.gsub(str,"(.)%z%1","%1")
str = mw.ustring.gsub(str,"%z.","-")
str = mw.ustring.gsub(str,"^.","")
str = mw.ustring.gsub(str,"%-'%-","-")
str = export.expand_consonants(str)
return str
end
function export.parse_root(root_str,expandFinal)
if expandFinal == nil then
expandFinal = false
end
stems = {}
local root = m_stru.capturing_split(root_str,"(["..VOWELS.."]+)")
if root[1] == '' then
table.remove(root,1)
end
if expandFinal then
local tail = root[#root]
local new_tail = export.parse_consonants(tail)
root[#root] = new_tail
end
mw.logObject(root)
return root
end
function export.condense_vowels(vowel_str)
local output = vowel_str or ''
if #output > 3 or #output < 2 then
return output
elseif #output == 3 then
output = string.sub(output,1,2)
end
output = string.gsub(output,"%S%S",{["aw"]="o",
["au"]="o",
["ao"]="o",
["ai"]="e",
["ay"]="e",
["ae"]="e",
["wu"]="uu",
["uw"]="uu",
["yi"]="ii",
["iy"]="ii",
})
return output
end
function export.extract_root(root_str)
roots = {
onset = '',
vowel = '',
coda = '',
}
if root_str then
root_str = string.gsub(root_str,'%s','-')
parts = mw.text.split(root_str,'-')
if #parts == 3 then
roots['onset'] = parts[1]
roots['vowel'] = parts[2]
roots['coda'] = parts[3]
elseif #parts == 2 then
if string.find(parts[1],"^["..VOWELS.."]") then
roots['onset'] = ''
roots['vowel'] = parts[1]
roots['coda'] = parts[2]
elseif string.find(parts[2],"^["..VOWELS.."]") then
roots['onset'] = parts[1]
roots['vowel'] = parts[2]
roots['coda'] = ''
else
roots['onset'] = parts[1]
roots['vowel'] = ''
roots['coda'] = parts[2]
end
elseif #parts == 1 and parts[1] ~= '' then
local temp = root_str
s,e = string.find(temp,"["..VOWELS.."]+")
if s then
roots['vowel'] = string.sub(temp,s,e)
end
s,e = string.find(temp,"^[^"..VOWELS.."]+")
if s then
roots['onset'] = string.sub(temp,s,e)
temp = string.gsub(temp,roots['onset'],'',1)
end
s,e = string.find(temp,"[^"..VOWELS.."]+")
if s then
roots['coda'] = string.sub(temp,s,e)
end
else
return nil
end
else
return nil
end
return roots
end
function export.geminate_char(char)
char = char or ''
if #char == 1 or (#char == 2 and string.sub(char,2) == 'h') then
return string.sub(char,1,1)..char
else
return char
end
end
function export.degeminate_char(char)
char = char or ''
if (#char == 2 or #char == 3) and string.sub(char,1,1) == string.sub(char,2,2) then
return string.sub(char,2)
else
return char
end
end
function export.assimilate(str)
output = export.compress_consonants(str)
if str and str ~= '' then
--Find and record location of hyphen, if any
hyphen = mw.ustring.find(output,"-",1,true) or 0
output = mw.ustring.gsub(output,"-","")
--VvC assimilition FIXME - should not happen if v is geminate. Disabling for now.
-- output = string.gsub(output,"(["..VOWELS.."])v([^"..VOWELS.."])","%1~v%2"):gsub("(.)~v",{VvC)
--Sibilant Assimilation
output = mw.ustring.gsub(output,"(["..SIBILANTS.."])(["..SIBILANTS.."])","%2%2")
--Nasal Assimilation
output = mw.ustring.gsub(output,"m([tdszkgθðl])","n%1")
output = mw.ustring.gsub(output,"n([bfv])","m%1")
--General Devoicing
output = mw.ustring.gsub(output,"(["..DEVOICED.."])(["..VOICED.."])","%1~%2")
output = mw.ustring.gsub(output,"~(.)",DEVOICING)
output = mw.ustring.gsub(output,"(["..VOICED.."])(["..DEVOICED.."])","~%1%2")
output = mw.ustring.gsub(output,"~(.)",DEVOICING)
--H devoicing
output = mw.ustring.gsub(output,"h(["..VOICED.."])","h~%1")
output = mw.ustring.gsub(output,"~(.)",H_DEVOICING)
output = mw.ustring.gsub(output,"(["..VOICED.."])h","~%1h")
output = mw.ustring.gsub(output,"~(.)",H_DEVOICING)
output = mw.ustring.gsub(output,"hj","hʧ")
--Misc
output = mw.ustring.gsub(output,"qk","kk")
output = mw.ustring.gsub(output,"kq","kk")
output = mw.ustring.gsub(output,"ao","au")
output = mw.ustring.gsub(output,"ae","ai")
output = mw.ustring.gsub(output,"aw","au")
output = mw.ustring.gsub(output,"ua","wa")
--Add hyphen, if any
if hyphen > 0 then
output = mw.ustring.sub(output,1,hyphen-1)..'-'..mw.ustring.sub(output,hyphen)
end
end
return export.expand_consonants(output)
end
function export.prothetic(str)
str = str or ''
str2 = export.compress_consonants(str)
char1 = mw.ustring.sub(str2,1,1)
char2 = mw.ustring.sub(str2,2,2)
local result = ""
if mw.ustring.find(VOWELS,char1) or mw.ustring.find(VOWELS,char2) then
result = str
else
local v = mw.ustring.match(str,"["..VOWELS.."]") or 'a'
result = v..str
end
return result
end
function export.chak_from_root(frame)
local output = {}
local categories = {}
local title = mw.title.getCurrentTitle()
local namespace = title.nsText
local params = {
[1] = { alias_of = "root"},
[2] = { alias_of = "mod"},
["root"] = {required = true},
["mod"] = {},
["nocat"] = { type = "boolean", default = false },
["plain"] = { type = "boolean", default = false },
["alt"] = {},
["face"] = { default = "term" },
["notext"] = { type = "boolean", default = false },
["nolink"] = { type = "boolean", default = false },
}
local args = require("Module:parameters").process(frame:getParent().args, params)
if not args["root"] and namespace == "Template" then
args["root"] = "tes"
args["mod"] = "t"
end
local link_text = export.l(args["root"], args["face"], args["root"] )
mw.getCurrentFrame():callParserFunction( '#vardefine', { 'chak-root', args["root"] } )
if args["mod"] then
mod_str = export.parse_consonants(args["mod"])
mods = mw.text.split(mod_str,"-")
for k in ipairs(mods) do
link_text = link_text.." + "..export.l("-"..mods[k], args["face"], "-"..mods[k])
table.insert(categories, m_utilities.format_categories( { "Chakobsa terms with the modifier -" .. mods[k] }, lang) )
end
mw.getCurrentFrame():callParserFunction( '#vardefine', { 'chak-mod', args["mod"] } )
end
table.insert(output, link_text)
table.insert(categories, m_utilities.format_categories( { "Chakobsa terms belonging to the root " .. args["root"]:gsub('-','') }, lang) )
if args["plain"] then
return args["root"]:gsub('-','').." + -"..args["mod"]
elseif args["nocat"] then
return table.concat(output)
elseif args["notext"] then
return table.concat(categories)
else
return table.concat(output) .. table.concat(categories)
end
return output
end
return export