Module:Chak-utilities: Difference between revisions

From The Languages of David J. Peterson
Jump to navigation Jump to search
No edit summary
No edit summary
 
(46 intermediate revisions by the same user not shown)
Line 1: Line 1:
export = {}
export = {}


require("Module:Chak-utilities/data")
local m_links = require("Module:links")
local m_links = require("Module:links")
local m_utilities = require("Module:utilities")
local m_utilities = require("Module:utilities")
local m_headword = require("Module:headword")
--local m_headword = require("Module:headword")
local m_stru = require("Module:String utilities")
local m_table = require("Module:Table")
 


local lang = require("Module:Languages").getByCode("chak")
local lang = require("Module:Languages").getByCode("chak")
local zwnj = "\u200C"


local function link(term, face, alt)
function export.l(term, face, alt)
return m_links.full_link( { term = term, lang = lang, alt = alt }, face )
return m_links.full_link( { term = term, lang = lang, alt = alt }, face )
end
end


function export.is_geminate(char)
function export.is_geminate(char)
return char ~= nil and #char > 1 and #char < 4 and string.sub(char,1,1) == string.sub(char,2,2)
return char ~= nil and #char > 1 and #char < 4 and mw.ustring.sub(char,1,1) == mw.ustring.sub(char,2,2)
end
 
function export.compress_consonants(str)
str = str or ''
replace = {{"ssh","ʃʃ"},
{"ddh","ðð"},
{"tth","θθ"},
{"cch","ʧʧ"},
{"sh","ʃ"},
{"dh","ð"},
{"th","θ"},
{"j","ʤ"},
{"ch","ʧ"},
{"'",""},
}
for _,v in ipairs(replace) do
str = mw.ustring.gsub(str,v[1],v[2])
end
return str
end
 
function export.expand_consonants(str)
replace = {
{"s'h","sh"},
{"s'sh","sʃ"},
{"d'h","dh"},
{"d'dh","dð"},
{"t'h","th"},
{"t'th","tθ"},
{"ssh","ʃʃ"},
{"ddh","ðð"},
{"tth","θθ"},
{"cch","ʧʧ"},
{"sh","ʃ"},
{"dh","ð"},
{"th","θ"},
{"j","ʤ"},
{"ch","ʧ"},
}
for _,v in ipairs(replace) do
str = mw.ustring.gsub(str,v[2],v[1])
end
return str
end
 
function export.parse_consonants(str)
str = export.compress_consonants(str)
str = mw.ustring.gsub(str,".",'\0%0%0')
str = mw.ustring.gsub(str,"(.)%z%1","%1")
str = mw.ustring.gsub(str,"%z.","-")
str = mw.ustring.gsub(str,"^.","")
str = mw.ustring.gsub(str,"%-'%-","-")
 
str = export.expand_consonants(str)
 
return str
end
 
 
function export.parse_root(root_str,expandFinal)
if expandFinal == nil then
expandFinal = false
end
 
stems = {}
 
local root = m_stru.capturing_split(root_str,"(["..VOWELS.."]+)")
if root[1] == '' then
table.remove(root,1)
end
if expandFinal then
local tail = root[#root]
local new_tail = export.parse_consonants(tail)
root[#root] = new_tail
end
 
mw.logObject(root)
return root
 
end
 
function export.condense_vowels(vowel_str)
local output = vowel_str or ''
if #output > 3 or #output < 2 then
return output
elseif #output == 3 then
output = string.sub(output,1,2)
end
output = string.gsub(output,"%S%S",{["aw"]="o",
["au"]="o",
["ao"]="o",
["ai"]="e",
["ay"]="e",
["ae"]="e",
["wu"]="uu",
["uw"]="uu",
["yi"]="ii",
["iy"]="ii",
})
return output
end
end


function export.extract_root(root_str)
function export.extract_root(root_str)
vowels = "aeiou"
roots = {
roots = {
Line 30: Line 144:
if root_str then
if root_str then


root_str = mw.ustring.gsub(root_str,zwnj,'-')
root_str = string.gsub(root_str,'%s','-')
root_str = string.gsub(root_str,'%s','-')
parts = mw.text.split(root_str,'-')
parts = mw.text.split(root_str,'-')
Line 39: Line 152:
roots['coda'] = parts[3]
roots['coda'] = parts[3]
elseif #parts == 2 then
elseif #parts == 2 then
if string.find(parts[1],"^[aeiou]") then
if string.find(parts[1],"^["..VOWELS.."]") then
roots['onset'] = ''
roots['onset'] = ''
roots['vowel'] = parts[1]
roots['vowel'] = parts[1]
roots['coda'] = parts[2]
roots['coda'] = parts[2]
elseif string.find(parts[2],"^[aeiou]") then
elseif string.find(parts[2],"^["..VOWELS.."]") then
roots['onset'] = parts[1]
roots['onset'] = parts[1]
roots['vowel'] = parts[2]
roots['vowel'] = parts[2]
Line 54: Line 167:
elseif #parts == 1 and parts[1] ~= '' then
elseif #parts == 1 and parts[1] ~= '' then
local temp = root_str
local temp = root_str
s,e = string.find(temp,"["..vowels.."]+")
s,e = string.find(temp,"["..VOWELS.."]+")
if s then  
if s then  
roots['vowel'] = string.sub(temp,s,e)
roots['vowel'] = string.sub(temp,s,e)
end
end
s,e = string.find(temp,"^[^"..vowels.."]+")
s,e = string.find(temp,"^[^"..VOWELS.."]+")
if s then
if s then
roots['onset'] = string.sub(temp,s,e)
roots['onset'] = string.sub(temp,s,e)
temp = string.gsub(temp,roots['onset'],'',1)
temp = string.gsub(temp,roots['onset'],'',1)
end
end
s,e = string.find(temp,"[^"..vowels.."]+")
s,e = string.find(temp,"[^"..VOWELS.."]+")
if s then
if s then
roots['coda'] = string.sub(temp,s,e)
roots['coda'] = string.sub(temp,s,e)
Line 98: Line 211:
function export.assimilate(str)
function export.assimilate(str)


devoiced = "[ktfs]"
output = export.compress_consonants(str)
voiced = "[gdvz]"
devoicing = {
["g"] = "k",
["d"] = "t",
["v"] = "f",
["z"] = "s",
}
h_devoicing = {
["g"] = "k",
["d"] = "t",
["v"] = "f",
["z"] = "z",
}
 
output = str


if str and str ~= '' then
if str and str ~= '' then
--VvC assimilition
--Find and record location of hyphen, if any
output = string.gsub(output,"([au])v([^iua])","%1~v%2"):gsub("(.)~v",{["a"] = "au", ["u"] = "uu"})
hyphen = mw.ustring.find(output,"-",1,true) or 0
output = mw.ustring.gsub(output,"-","")
--VvC assimilition FIXME - should not happen if v is geminate. Disabling for now.
-- output = string.gsub(output,"(["..VOWELS.."])v([^"..VOWELS.."])","%1~v%2"):gsub("(.)~v",{VvC)
--Sibilant Assimilation
output = mw.ustring.gsub(output,"(["..SIBILANTS.."])(["..SIBILANTS.."])","%2%2")
--Nasal Assimilation
output = mw.ustring.gsub(output,"m([tdszkgθðl])","n%1")
output = mw.ustring.gsub(output,"n([bfv])","m%1")
--General Devoicing
--General Devoicing
output = string.gsub(output,"("..devoiced..")("..voiced..")","%1~%2"):gsub("~(.)",devoicing)
output = mw.ustring.gsub(output,"(["..DEVOICED.."])(["..VOICED.."])","%1~%2")
output = string.gsub(output,"("..voiced..")("..devoiced..")","~%1%2"):gsub("~(.)",devoicing)
output = mw.ustring.gsub(output,"~(.)",DEVOICING)
output = mw.ustring.gsub(output,"(["..VOICED.."])(["..DEVOICED.."])","~%1%2")
output = mw.ustring.gsub(output,"~(.)",DEVOICING)
--H devoicing
--H devoicing
output = string.gsub(output,"h("..voiced..")","%1~%2"):gsub("~(.)",h_devoicing)
output = mw.ustring.gsub(output,"h(["..VOICED.."])","h~%1")
output = string.gsub(output,"("..voiced..")(h)","~%1%2"):gsub("~(.)",h_devoicing)
output = mw.ustring.gsub(output,"~(.)",H_DEVOICING)
output = string.gsub(output,"("..voiced..")(h)","~%1%2"):gsub("~(.)",h_devoicing)
output = mw.ustring.gsub(output,"(["..VOICED.."])h","~%1h")
output = mw.ustring.gsub(output,"~(.)",H_DEVOICING)
output = mw.ustring.gsub(output,"hj","hʧ")
--Misc
output = mw.ustring.gsub(output,"qk","kk")
output = mw.ustring.gsub(output,"kq","kk")
output = mw.ustring.gsub(output,"ao","au")
output = mw.ustring.gsub(output,"ae","ai")
output = mw.ustring.gsub(output,"aw","au")
output = mw.ustring.gsub(output,"ua","wa")
--Add hyphen, if any
if hyphen > 0 then
output = mw.ustring.sub(output,1,hyphen-1)..'-'..mw.ustring.sub(output,hyphen)
end
end
end
return output
return export.expand_consonants(output)
end
 
function export.prothetic(str)
str = str or ''
str2 = export.compress_consonants(str)
char1 = mw.ustring.sub(str2,1,1)
char2 = mw.ustring.sub(str2,2,2)
local result = ""
if mw.ustring.find(VOWELS,char1) or mw.ustring.find(VOWELS,char2) then
result = str
else
local v = mw.ustring.match(str,"["..VOWELS.."]") or 'a'
result = v..str
end
return result
end
end


Line 157: Line 296:
args["mod"] = "t"
args["mod"] = "t"
end
end
local link_text = export.l(args["root"], args["face"], args["root"] )
mw.getCurrentFrame():callParserFunction( '#vardefine', { 'chak-root', args["root"] } )
local verbose_root = export.extract_root(args["root"])
local link_text = link(args["root"], args["face"], verbose_root)
mw.getCurrentFrame():callParserFunction( '#vardefine', { 'chak-root', verbose_root } )
if args["mod"] then
if args["mod"] then
link_text = link_text.." + -"..link(args["mod"], args["face"], args["mod"])
mod_str = export.parse_consonants(args["mod"])
table.insert(categories, m_utilities.format_categories( { "Chakobsa terms with the modifier -" .. args["mod"] }, lang) )
mods = mw.text.split(mod_str,"-")
for k in ipairs(mods) do
link_text = link_text.." + "..export.l("-"..mods[k], args["face"], "-"..mods[k])
table.insert(categories, m_utilities.format_categories( { "Chakobsa terms with the modifier -" .. mods[k] }, lang) )
end
 
mw.getCurrentFrame():callParserFunction( '#vardefine', { 'chak-mod', args["mod"] } )
mw.getCurrentFrame():callParserFunction( '#vardefine', { 'chak-mod', args["mod"] } )
end
end
table.insert(output, link_text)
table.insert(output, link_text)
table.insert(categories, m_utilities.format_categories( { "Chakobsa terms belonging to the root " .. args["root"] }, lang) )
table.insert(categories, m_utilities.format_categories( { "Chakobsa terms belonging to the root " .. args["root"]:gsub('-','') }, lang) )
if args["plain"] then
if args["plain"] then
return args["root"].." + -"..args["mod"]
return args["root"]:gsub('-','').." + -"..args["mod"]
elseif args["nocat"] then
elseif args["nocat"] then
return table.concat(output)
return table.concat(output)

Latest revision as of 16:18, 21 July 2024

Utilities for Chakobsa scripts. Implements {{chak-from-root}}.

Exposed functions

l(term, face, alt)
Creates a link to a Chakobsa lexeme - similar to {{l|chak|term}}
is_geminate(char)
returns true if the supplied 'character' is geminate
compress_consonants(str)
Returns a 'compressed' version of the string where consonant digraphs are reduced to a single character and trigraph geminates are reduced to a geminate of a single character. Mostly for use with other functions for parsing.
expand_consonants(str)
Returns an 'expanded' version of the string where reductions from compress_consonants are replaced with the original romanizations.
parse_consonants(str)
Returns a string where the consonants have been split out, delimited by hyphens. Understands geminate consonants and considers them a 'single' consonant.
parse_root(root_str,expandFinal)
Returns a table of parts of the root string is grouped by consonant and vowels. Example: "kkaalatg" > "kk","aa","l","a","tg". If expandFinal is true, returns the final part parsed out through parse_consonants The above example would be "kkaalatg" > "kk","aa","l","a","t-g". Used for inflection functions.

export = {}

require("Module:Chak-utilities/data")
local m_links = require("Module:links")
local m_utilities = require("Module:utilities")
--local m_headword = require("Module:headword")
local m_stru = require("Module:String utilities")
local m_table = require("Module:Table")


local lang = require("Module:Languages").getByCode("chak")

	
function export.l(term, face, alt)
	return m_links.full_link( { term = term, lang = lang, alt = alt }, face )
end

function export.is_geminate(char)
	return char ~= nil and #char > 1 and #char < 4 and mw.ustring.sub(char,1,1) == mw.ustring.sub(char,2,2)
end

function export.compress_consonants(str)
	
	str = str or ''
	
	replace = {{"ssh","ʃʃ"},
				{"ddh","ðð"},
				{"tth","θθ"}, 
				{"cch","ʧʧ"},
				{"sh","ʃ"},
				{"dh","ð"},
				{"th","θ"},
				{"j","ʤ"},
				{"ch","ʧ"},
				{"'",""},
	}
	
	for _,v in ipairs(replace) do
		str = mw.ustring.gsub(str,v[1],v[2])
	end
	
	return str
end

function export.expand_consonants(str)
	replace = {
				{"s'h","sh"},
				{"s'sh","sʃ"},
				{"d'h","dh"},
				{"d'dh","dð"},
				{"t'h","th"},
				{"t'th","tθ"},
				{"ssh","ʃʃ"},
				{"ddh","ðð"},
				{"tth","θθ"}, 
				{"cch","ʧʧ"},
				{"sh","ʃ"},
				{"dh","ð"},
				{"th","θ"},
				{"j","ʤ"},
				{"ch","ʧ"},
	}
	
	for _,v in ipairs(replace) do
		str = mw.ustring.gsub(str,v[2],v[1])
	end
	
	return str
end

function export.parse_consonants(str)
	
	str = export.compress_consonants(str)
	
	str = mw.ustring.gsub(str,".",'\0%0%0')
	str = mw.ustring.gsub(str,"(.)%z%1","%1")
	str = mw.ustring.gsub(str,"%z.","-")
	str = mw.ustring.gsub(str,"^.","")
	str = mw.ustring.gsub(str,"%-'%-","-")

	str = export.expand_consonants(str)

	return str
end


function export.parse_root(root_str,expandFinal)
	
	if expandFinal == nil then
		expandFinal = false	
	end

	stems = {}

	local root = m_stru.capturing_split(root_str,"(["..VOWELS.."]+)")
	if root[1] == '' then
		table.remove(root,1)
	end
	
	if expandFinal then
		local tail = root[#root]
		local new_tail = export.parse_consonants(tail)
		root[#root] = new_tail
	end

	mw.logObject(root)
	return root

end

function export.condense_vowels(vowel_str)
	
	local output = vowel_str or ''
	
	if #output > 3 or #output < 2 then
		return output
	elseif #output == 3 then
		output = string.sub(output,1,2)
	end
	
	output = string.gsub(output,"%S%S",{["aw"]="o",
										["au"]="o",
										["ao"]="o",
										["ai"]="e",
										["ay"]="e",
										["ae"]="e",
										["wu"]="uu",
										["uw"]="uu",
										["yi"]="ii",
										["iy"]="ii",
	})
	return output 
	
end

function export.extract_root(root_str)
	
	roots = {
		onset = '',
		vowel = '',
		coda = '',
	}
	
	if root_str then

		root_str = string.gsub(root_str,'%s','-')
		parts = mw.text.split(root_str,'-')
	
		if #parts == 3 then
			roots['onset'] = parts[1]
			roots['vowel'] = parts[2]
			roots['coda'] = parts[3]
		elseif #parts == 2 then
			if string.find(parts[1],"^["..VOWELS.."]") then
				roots['onset'] = ''
				roots['vowel'] = parts[1]
				roots['coda'] = parts[2]
			elseif string.find(parts[2],"^["..VOWELS.."]") then
				roots['onset'] = parts[1]
				roots['vowel'] = parts[2]
				roots['coda'] = ''
			else
				roots['onset'] = parts[1]
				roots['vowel'] = ''
				roots['coda'] = parts[2]
			end
		elseif #parts == 1 and parts[1] ~= '' then
			local temp = root_str
			s,e = string.find(temp,"["..VOWELS.."]+")
			if s then 
				roots['vowel'] = string.sub(temp,s,e)
			end
			s,e = string.find(temp,"^[^"..VOWELS.."]+")
			if s then
				roots['onset'] = string.sub(temp,s,e)
				temp = string.gsub(temp,roots['onset'],'',1)
			end
			s,e = string.find(temp,"[^"..VOWELS.."]+")
			if s then
				roots['coda'] = string.sub(temp,s,e)	
			end
		else 
			return nil
		end
	else
		return nil
	end	
	
	return roots
end

function export.geminate_char(char)
	char = char or '' 
	if #char == 1 or (#char == 2 and string.sub(char,2) == 'h') then
		return string.sub(char,1,1)..char
	else
		return char
	end
end

function export.degeminate_char(char)
	char = char or ''
	
	if (#char == 2 or #char == 3) and string.sub(char,1,1) == string.sub(char,2,2)  then
		return string.sub(char,2)
	else
		return char
	end
end

function export.assimilate(str)

	output = export.compress_consonants(str)

	if str and str ~= '' then
			--Find and record location of hyphen, if any
			hyphen = mw.ustring.find(output,"-",1,true) or 0
			output = mw.ustring.gsub(output,"-","")
			
			--VvC assimilition FIXME - should not happen if v is geminate. Disabling for now.
			-- output = string.gsub(output,"(["..VOWELS.."])v([^"..VOWELS.."])","%1~v%2"):gsub("(.)~v",{VvC)
			--Sibilant Assimilation
			output = mw.ustring.gsub(output,"(["..SIBILANTS.."])(["..SIBILANTS.."])","%2%2")
			--Nasal Assimilation
			output = mw.ustring.gsub(output,"m([tdszkgθðl])","n%1")
			output = mw.ustring.gsub(output,"n([bfv])","m%1")
			--General Devoicing
			output = mw.ustring.gsub(output,"(["..DEVOICED.."])(["..VOICED.."])","%1~%2")
			output = mw.ustring.gsub(output,"~(.)",DEVOICING)
			output = mw.ustring.gsub(output,"(["..VOICED.."])(["..DEVOICED.."])","~%1%2")
			output = mw.ustring.gsub(output,"~(.)",DEVOICING)
			--H devoicing
			output = mw.ustring.gsub(output,"h(["..VOICED.."])","h~%1")
			output = mw.ustring.gsub(output,"~(.)",H_DEVOICING)
			output = mw.ustring.gsub(output,"(["..VOICED.."])h","~%1h")
			output = mw.ustring.gsub(output,"~(.)",H_DEVOICING)	
			output = mw.ustring.gsub(output,"hj","hʧ")	
			--Misc
			output = mw.ustring.gsub(output,"qk","kk")
			output = mw.ustring.gsub(output,"kq","kk")
			output = mw.ustring.gsub(output,"ao","au")
			output = mw.ustring.gsub(output,"ae","ai")
			output = mw.ustring.gsub(output,"aw","au")
			output = mw.ustring.gsub(output,"ua","wa")
		
			--Add hyphen, if any
			if hyphen > 0 then
				output = mw.ustring.sub(output,1,hyphen-1)..'-'..mw.ustring.sub(output,hyphen)	
			end
	end
	
	return export.expand_consonants(output)
end

function export.prothetic(str)
	str = str or ''
	str2 = export.compress_consonants(str)
	char1 = mw.ustring.sub(str2,1,1)
	char2 = mw.ustring.sub(str2,2,2)
	
	local result = ""
	
	if mw.ustring.find(VOWELS,char1) or mw.ustring.find(VOWELS,char2) then
		result = str
	else
		local v = mw.ustring.match(str,"["..VOWELS.."]") or 'a'
		result = v..str
	end
	return result
end

function export.chak_from_root(frame)
	local output = {}
	local categories = {}
	
	local title = mw.title.getCurrentTitle()
	local namespace = title.nsText
	
	local params = {
		[1] = { alias_of = "root"},
		[2] = { alias_of = "mod"},
		["root"] = {required = true},
		["mod"] = {},		
		["nocat"] = { type = "boolean", default = false },
		["plain"] = { type = "boolean", default = false },
		["alt"] = {},
		["face"] = { default = "term" },
		["notext"] = { type = "boolean", default = false },
		["nolink"] = { type = "boolean", default = false },
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	
	if not args["root"] and namespace == "Template" then
		args["root"] = "tes"
		args["mod"] = "t"
	end

	local link_text = export.l(args["root"], args["face"], args["root"] )
	mw.getCurrentFrame():callParserFunction( '#vardefine', { 'chak-root', args["root"] } )
	
	if args["mod"] then
		mod_str = export.parse_consonants(args["mod"])
		mods = mw.text.split(mod_str,"-")
		for k in ipairs(mods) do
			link_text = link_text.." + "..export.l("-"..mods[k], args["face"], "-"..mods[k])
			table.insert(categories, m_utilities.format_categories( { "Chakobsa terms with the modifier -" .. mods[k] }, lang) )
		end

		mw.getCurrentFrame():callParserFunction( '#vardefine', { 'chak-mod', args["mod"] } )
	end
		
		
	table.insert(output, link_text)
	table.insert(categories, m_utilities.format_categories( { "Chakobsa terms belonging to the root " .. args["root"]:gsub('-','') }, lang) )
	
	
	if args["plain"] then
		return args["root"]:gsub('-','').." + -"..args["mod"]
	elseif args["nocat"] then
		return table.concat(output)
	elseif args["notext"] then
		return table.concat(categories)
	else
		return table.concat(output) .. table.concat(categories)
	end
	
	return output
end

return export