Module:Chak-utilities: Difference between revisions
Jump to navigation
Jump to search
No edit summary |
No edit summary |
||
Line 214: | Line 214: | ||
if str and str ~= '' then | if str and str ~= '' then | ||
--Find and record location of hyphen, if any | |||
hyphen = mw.ustring.find(output,"-",1,true) or 0 | |||
output = mw.ustring.gsub(output,"-","") | |||
--VvC assimilition FIXME - should not happen if v is geminate. Disabling for now. | --VvC assimilition FIXME - should not happen if v is geminate. Disabling for now. | ||
-- output = string.gsub(output,"(["..VOWELS.."])v([^"..VOWELS.."])","%1~v%2"):gsub("(.)~v",{VvC) | -- output = string.gsub(output,"(["..VOWELS.."])v([^"..VOWELS.."])","%1~v%2"):gsub("(.)~v",{VvC) | ||
--Sibilant Assimilation | |||
output = mw.ustring.gsub(output,"(["..SIBILANTS.."])(["..SIBILANTS.."])","%2%2") | |||
--Nasal Assimilation | --Nasal Assimilation | ||
output = mw.ustring.gsub(output,"m([tdszkgθðl])","n%1") | output = mw.ustring.gsub(output,"m([tdszkgθðl])","n%1") | ||
Line 238: | Line 244: | ||
output = mw.ustring.gsub(output,"ua","wa") | output = mw.ustring.gsub(output,"ua","wa") | ||
--Add hyphen, if any | |||
if hyphen > 0 then | |||
output = mw.ustring.sub(output,1,hyphen-1)..'-'..mw.ustring.sub(output,hyphen) | |||
end | |||
end | end | ||
Revision as of 20:17, 5 March 2024
- The following documentation is located at Module:Chak-utilities/documentation. [edit]
- Useful links: subpage list • transclusions • testcases • sandbox
Utilities for Chakobsa scripts. Implements {{chak-from-root}}
.
Exposed functions
l(term, face, alt)
- Creates a link to a Chakobsa lexeme - similar to
{{l|chak|term}}
is_geminate(char)
- returns true if the supplied 'character' is geminate
compress_consonants(str)
- Returns a 'compressed' version of the string where consonant digraphs are reduced to a single character and trigraph geminates are reduced to a geminate of a single character. Mostly for use with other functions for parsing.
expand_consonants(str)
- Returns an 'expanded' version of the string where reductions from
compress_consonants
are replaced with the original romanizations.
parse_consonants(str)
- Returns a string where the consonants have been split out, delimited by hyphens. Understands geminate consonants and considers them a 'single' consonant.
parse_root(root_str,expandFinal)
- Returns a table of parts of the root string is grouped by consonant and vowels. Example: "kkaalatg" > "kk","aa","l","a","tg". If expandFinal is true, returns the final part parsed out through
parse_consonants
The above example would be "kkaalatg" > "kk","aa","l","a","t-g". Used for inflection functions.
export = {}
require("Module:Chak-utilities/data")
local m_links = require("Module:links")
local m_utilities = require("Module:utilities")
--local m_headword = require("Module:headword")
local m_stru = require("Module:String utilities")
local m_table = require("Module:Table")
local lang = require("Module:Languages").getByCode("chak")
function export.l(term, face, alt)
return m_links.full_link( { term = term, lang = lang, alt = alt }, face )
end
function export.is_geminate(char)
return char ~= nil and #char > 1 and #char < 4 and string.sub(char,1,1) == string.sub(char,2,2)
end
function export.compress_consonants(str)
str = str or ''
replace = {{"ssh","ʃʃ"},
{"ddh","ðð"},
{"tth","θθ"},
{"cch","ʧʧ"},
{"sh","ʃ"},
{"dh","ð"},
{"th","θ"},
{"j","ʤ"},
{"ch","ʧ"},
{"'",""},
}
for _,v in ipairs(replace) do
str = mw.ustring.gsub(str,v[1],v[2])
end
return str
end
function export.expand_consonants(str)
replace = {
{"s'h","sh"},
{"s'sh","sʃ"},
{"d'h","dh"},
{"d'dh","dð"},
{"t'h","th"},
{"t'th","tθ"},
{"ssh","ʃʃ"},
{"ddh","ðð"},
{"tth","θθ"},
{"cch","ʧʧ"},
{"sh","ʃ"},
{"dh","ð"},
{"th","θ"},
{"j","ʤ"},
{"ch","ʧ"},
}
for _,v in ipairs(replace) do
str = mw.ustring.gsub(str,v[2],v[1])
end
return str
end
function export.parse_consonants(str)
str = export.compress_consonants(str)
str = mw.ustring.gsub(str,".",'\0%0%0')
str = mw.ustring.gsub(str,"(.)%z%1","%1")
str = mw.ustring.gsub(str,"%z.","-")
str = mw.ustring.gsub(str,"^.","")
str = mw.ustring.gsub(str,"%-'%-","-")
str = export.expand_consonants(str)
return str
end
function export.parse_root(root_str,expandFinal)
if expandFinal == nil then
expandFinal = false
end
stems = {}
local root = m_stru.capturing_split(root_str,"(["..VOWELS.."]+)")
if root[1] == '' then
table.remove(root,1)
end
if expandFinal then
local tail = root[#root]
local new_tail = export.parse_consonants(tail)
root[#root] = new_tail
end
mw.logObject(root)
return root
end
function export.condense_vowels(vowel_str)
local output = vowel_str or ''
if #output > 3 or #output < 2 then
return output
elseif #output == 3 then
output = string.sub(output,1,2)
end
output = string.gsub(output,"%S%S",{["aw"]="o",
["au"]="o",
["ao"]="o",
["ai"]="e",
["ay"]="e",
["ae"]="e",
["wu"]="uu",
["uw"]="uu",
["yi"]="ii",
["iy"]="ii",
})
return output
end
function export.extract_root(root_str)
roots = {
onset = '',
vowel = '',
coda = '',
}
if root_str then
root_str = string.gsub(root_str,'%s','-')
parts = mw.text.split(root_str,'-')
if #parts == 3 then
roots['onset'] = parts[1]
roots['vowel'] = parts[2]
roots['coda'] = parts[3]
elseif #parts == 2 then
if string.find(parts[1],"^["..VOWELS.."]") then
roots['onset'] = ''
roots['vowel'] = parts[1]
roots['coda'] = parts[2]
elseif string.find(parts[2],"^["..VOWELS.."]") then
roots['onset'] = parts[1]
roots['vowel'] = parts[2]
roots['coda'] = ''
else
roots['onset'] = parts[1]
roots['vowel'] = ''
roots['coda'] = parts[2]
end
elseif #parts == 1 and parts[1] ~= '' then
local temp = root_str
s,e = string.find(temp,"["..VOWELS.."]+")
if s then
roots['vowel'] = string.sub(temp,s,e)
end
s,e = string.find(temp,"^[^"..VOWELS.."]+")
if s then
roots['onset'] = string.sub(temp,s,e)
temp = string.gsub(temp,roots['onset'],'',1)
end
s,e = string.find(temp,"[^"..VOWELS.."]+")
if s then
roots['coda'] = string.sub(temp,s,e)
end
else
return nil
end
else
return nil
end
return roots
end
function export.geminate_char(char)
char = char or ''
if #char == 1 or (#char == 2 and string.sub(char,2) == 'h') then
return string.sub(char,1,1)..char
else
return char
end
end
function export.degeminate_char(char)
char = char or ''
if (#char == 2 or #char == 3) and string.sub(char,1,1) == string.sub(char,2,2) then
return string.sub(char,2)
else
return char
end
end
function export.assimilate(str)
output = export.compress_consonants(str)
if str and str ~= '' then
--Find and record location of hyphen, if any
hyphen = mw.ustring.find(output,"-",1,true) or 0
output = mw.ustring.gsub(output,"-","")
--VvC assimilition FIXME - should not happen if v is geminate. Disabling for now.
-- output = string.gsub(output,"(["..VOWELS.."])v([^"..VOWELS.."])","%1~v%2"):gsub("(.)~v",{VvC)
--Sibilant Assimilation
output = mw.ustring.gsub(output,"(["..SIBILANTS.."])(["..SIBILANTS.."])","%2%2")
--Nasal Assimilation
output = mw.ustring.gsub(output,"m([tdszkgθðl])","n%1")
output = mw.ustring.gsub(output,"n([bfv])","m%1")
--General Devoicing
output = mw.ustring.gsub(output,"(["..DEVOICED.."])(["..VOICED.."])","%1~%2")
output = mw.ustring.gsub(output,"~(.)",DEVOICING)
output = mw.ustring.gsub(output,"(["..VOICED.."])(["..DEVOICED.."])","~%1%2")
output = mw.ustring.gsub(output,"~(.)",DEVOICING)
--H devoicing
output = mw.ustring.gsub(output,"h(["..VOICED.."])","h~%1")
output = mw.ustring.gsub(output,"~(.)",H_DEVOICING)
output = mw.ustring.gsub(output,"(["..VOICED.."])h","~%1h")
output = mw.ustring.gsub(output,"~(.)",H_DEVOICING)
output = mw.ustring.gsub(output,"hj","hʧ")
--Misc
output = mw.ustring.gsub(output,"qk","kk")
output = mw.ustring.gsub(output,"kq","kk")
output = mw.ustring.gsub(output,"ao","au")
output = mw.ustring.gsub(output,"ae","ai")
output = mw.ustring.gsub(output,"aw","au")
output = mw.ustring.gsub(output,"ua","wa")
--Add hyphen, if any
if hyphen > 0 then
output = mw.ustring.sub(output,1,hyphen-1)..'-'..mw.ustring.sub(output,hyphen)
end
end
return export.expand_consonants(output)
end
function export.chak_from_root(frame)
local output = {}
local categories = {}
local title = mw.title.getCurrentTitle()
local namespace = title.nsText
local params = {
[1] = { alias_of = "root"},
[2] = { alias_of = "mod"},
["root"] = {required = true},
["mod"] = {},
["nocat"] = { type = "boolean", default = false },
["plain"] = { type = "boolean", default = false },
["alt"] = {},
["face"] = { default = "term" },
["notext"] = { type = "boolean", default = false },
["nolink"] = { type = "boolean", default = false },
}
local args = require("Module:parameters").process(frame:getParent().args, params)
if not args["root"] and namespace == "Template" then
args["root"] = "tes"
args["mod"] = "t"
end
local link_text = export.l(args["root"], args["face"], args["root"] )
mw.getCurrentFrame():callParserFunction( '#vardefine', { 'chak-root', args["root"] } )
if args["mod"] then
mod_str = export.parse_consonants(args["mod"])
mods = mw.text.split(mod_str,"-")
for k in ipairs(mods) do
link_text = link_text.." + "..export.l("-"..mods[k], args["face"], "-"..mods[k])
table.insert(categories, m_utilities.format_categories( { "Chakobsa terms with the modifier -" .. mods[k] }, lang) )
end
mw.getCurrentFrame():callParserFunction( '#vardefine', { 'chak-mod', args["mod"] } )
end
table.insert(output, link_text)
table.insert(categories, m_utilities.format_categories( { "Chakobsa terms belonging to the root " .. args["root"]:gsub('-','') }, lang) )
if args["plain"] then
return args["root"]:gsub('-','').." + -"..args["mod"]
elseif args["nocat"] then
return table.concat(output)
elseif args["notext"] then
return table.concat(categories)
else
return table.concat(output) .. table.concat(categories)
end
return output
end
return export