Module:Chak-utilities

From The Languages of David J. Peterson
Revision as of 11:11, 29 December 2023 by Vaxjedi (talk | contribs)
Jump to navigation Jump to search

Utilities for Chakobsa scripts. Implements {{chak-from-root}}.

Exposed functions

l(term, face, alt)
Creates a link to a Chakobsa lexeme - similar to {{l|chak|term}}
is_geminate(char)
returns true if the supplied 'character' is geminate
compress_consonants(str)
Returns a 'compressed' version of the string where consonant digraphs are reduced to a single character and trigraph geminates are reduced to a geminate of a single character. Mostly for use with other functions for parsing.
expand_consonants(str)
Returns an 'expanded' version of the string where reductions from compress_consonants are replaced with the original romanizations.
parse_consonants(str)
Returns a string where the consonants have been split out, delimited by hyphens. Understands geminate consonants and considers them a 'single' consonant.
parse_root(root_str,expandFinal)
Returns a table of parts of the root string is grouped by consonant and vowels. Example: "kkaalatg" > "kk","aa","l","a","tg". If expandFinal is true, returns the final part parsed out through parse_consonants The above example would be "kkaalatg" > "kk","aa","l","a","t-g". Used for inflection functions.

export = {}

require("Module:Chak-utilities/data")
local m_links = require("Module:links")
local m_utilities = require("Module:utilities")
--local m_headword = require("Module:headword")
local m_stru = require("Module:String utilities")
local m_table = require("Module:Table")


local lang = require("Module:Languages").getByCode("chak")

	
function export.l(term, face, alt)
	return m_links.full_link( { term = term, lang = lang, alt = alt }, face )
end

function export.is_geminate(char)
	return char ~= nil and #char > 1 and #char < 4 and string.sub(char,1,1) == string.sub(char,2,2)
end

function export.compress_consonants(str)
	
	str = str or ''
	
	replace = {{"ssh","ʃʃ"},
				{"ddh","ðð"},
				{"tth","θθ"}, 
				{"cch","ʧʧ"},
				{"sh","ʃ"},
				{"dh","ð"},
				{"th","θ"}, 
				{"ch","ʧ"},
--				{"'",""},
	}
	
	for _,v in ipairs(replace) do
		str = mw.ustring.gsub(str,v[1],v[2])
	end
	
	return str
end

function export.expand_consonants(str)
	replace = {
				{"s'h","sh"},
				{"s'sh","sʃ"},
				{"d'h","dh"},
				{"d'dh","dð"},
				{"t'h","th"},
				{"t'th","tθ"},
				{"ssh","ʃʃ"},
				{"ddh","ðð"},
				{"tth","θθ"}, 
				{"cch","ʧʧ"},
				{"sh","ʃ"},
				{"dh","ð"},
				{"th","θ"}, 
				{"ch","ʧ"},
	}
	
	for _,v in ipairs(replace) do
		str = mw.ustring.gsub(str,v[2],v[1])
	end
	
	return str
end

function export.parse_consonants(str)
	
	str = export.compress_consonants(str)
	
	str = mw.ustring.gsub(str,".",'\0%0%0')
	str = mw.ustring.gsub(str,"(.)%z%1","%1")
	str = mw.ustring.gsub(str,"%z.","-")
	str = mw.ustring.gsub(str,"^.","")
	str = mw.ustring.gsub(str,"%-'%-","-")

	str = export.expand_consonants(str)

	return str
end


function export.parse_root(root_str,expandFinal)
	
	if expandFinal == nil then
		expandFinal = false	
	end

	stems = {}

	local root = m_stru.capturing_split(root_str,"(["..VOWELS.."]+)")
	if root[1] == '' then
		table.remove(root,1)
	end
	
	if expandFinal then
		local tail = root[#root]
		local new_tail = export.parse_consonants(tail)
		root[#root] = new_tail
	end

	mw.logObject(root)
	return root

end

function export.condense_vowels(vowel_str)
	
	local output = vowel_str or ''
	
	if #output > 3 or #output < 2 then
		return output
	elseif #output == 3 then
		output = string.sub(output,1,2)
	end
	
	output = string.gsub(output,"%S%S",{["aw"]="o",
										["au"]="o",
										["ao"]="o",
										["ai"]="e",
										["ay"]="e",
										["ae"]="e",
										["wu"]="uu",
										["uw"]="uu",
										["yi"]="ii",
										["iy"]="ii",
	})
	return output 
	
end

function export.extract_root(root_str)
	
	roots = {
		onset = '',
		vowel = '',
		coda = '',
	}
	
	if root_str then

		root_str = string.gsub(root_str,'%s','-')
		parts = mw.text.split(root_str,'-')
	
		if #parts == 3 then
			roots['onset'] = parts[1]
			roots['vowel'] = parts[2]
			roots['coda'] = parts[3]
		elseif #parts == 2 then
			if string.find(parts[1],"^["..VOWELS.."]") then
				roots['onset'] = ''
				roots['vowel'] = parts[1]
				roots['coda'] = parts[2]
			elseif string.find(parts[2],"^["..VOWELS.."]") then
				roots['onset'] = parts[1]
				roots['vowel'] = parts[2]
				roots['coda'] = ''
			else
				roots['onset'] = parts[1]
				roots['vowel'] = ''
				roots['coda'] = parts[2]
			end
		elseif #parts == 1 and parts[1] ~= '' then
			local temp = root_str
			s,e = string.find(temp,"["..VOWELS.."]+")
			if s then 
				roots['vowel'] = string.sub(temp,s,e)
			end
			s,e = string.find(temp,"^[^"..VOWELS.."]+")
			if s then
				roots['onset'] = string.sub(temp,s,e)
				temp = string.gsub(temp,roots['onset'],'',1)
			end
			s,e = string.find(temp,"[^"..VOWELS.."]+")
			if s then
				roots['coda'] = string.sub(temp,s,e)	
			end
		else 
			return nil
		end
	else
		return nil
	end	
	
	return roots
end

function export.geminate_char(char)
	char = char or '' 
	if #char == 1 or (#char == 2 and string.sub(char,2) == 'h') then
		return string.sub(char,1,1)..char
	else
		return char
	end
end

function export.degeminate_char(char)
	char = char or ''
	
	if (#char == 2 or #char == 3) and string.sub(char,1,1) == string.sub(char,2,2)  then
		return string.sub(char,2)
	else
		return char
	end
end

function export.assimilate(str)

	output = export.compress_consonants(str)

	if str and str ~= '' then
			--VvC assimilition FIXME - should not happen if v is geminate. Disabling for now.
			-- output = string.gsub(output,"(["..VOWELS.."])v([^"..VOWELS.."])","%1~v%2"):gsub("(.)~v",{VvC)
			--Nasal Assimilation
			output = mw.ustring.gsub(output,"m([tdszkgθð])","n%1")
			output = mw.ustring.gsub(output,"n([bfv])","m%1")
			--General Devoicing
			output = mw.ustring.gsub(output,"(["..DEVOICED.."])(["..VOICED.."])","%1~%2")
			output = mw.ustring.gsub(output,"~(.)",DEVOICING)
			output = mw.ustring.gsub(output,"(["..VOICED.."])(["..DEVOICED.."])","~%1%2")
			output = mw.ustring.gsub(output,"~(.)",DEVOICING)
			--H devoicing
			output = mw.ustring.gsub(output,"h(["..VOICED.."])","h~%1")
			output = mw.ustring.gsub(output,"~(.)",H_DEVOICING)
			output = mw.ustring.gsub(output,"(["..VOICED.."])h","~%1h")
			output = mw.ustring.gsub(output,"~(.)",H_DEVOICING)	
			output = mw.ustring.gsub(output,"hj","hʧ")	
			--Misc
			output = mw.ustring.gsub(output,"qk","kk")
			output = mw.ustring.gsub(output,"kq","kk")
			output = mw.ustring.gsub(output,"ao","au")
			output = mw.ustring.gsub(output,"ae","ai")
			output = mw.ustring.gsub(output,"aw","au")
		
			
	end
	
	return export.expand_consonants(output)
end

function export.chak_from_root(frame)
	local output = {}
	local categories = {}
	
	local title = mw.title.getCurrentTitle()
	local namespace = title.nsText
	
	local params = {
		[1] = { alias_of = "root"},
		[2] = { alias_of = "mod"},
		["root"] = {required = true},
		["mod"] = {},		
		["nocat"] = { type = "boolean", default = false },
		["plain"] = { type = "boolean", default = false },
		["alt"] = {},
		["face"] = { default = "term" },
		["notext"] = { type = "boolean", default = false },
		["nolink"] = { type = "boolean", default = false },
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	
	if not args["root"] and namespace == "Template" then
		args["root"] = "tes"
		args["mod"] = "t"
	end

	local link_text = export.l(args["root"], args["face"], args["root"] )
	mw.getCurrentFrame():callParserFunction( '#vardefine', { 'chak-root', args["root"] } )
	
	if args["mod"] then
		mod_str = export.parse_consonants(args["mod"])
		mods = mw.text.split(mod_str,"-")
		for k in ipairs(mods) do
			link_text = link_text.." + -"..export.l(mods[k], args["face"], mods[k])
			table.insert(categories, m_utilities.format_categories( { "Chakobsa terms with the modifier -" .. mods[k] }, lang) )
		end

		mw.getCurrentFrame():callParserFunction( '#vardefine', { 'chak-mod', args["mod"] } )
	end
		
		
	table.insert(output, link_text)
	table.insert(categories, m_utilities.format_categories( { "Chakobsa terms belonging to the root " .. args["root"]:gsub('-','') }, lang) )
	
	
	if args["plain"] then
		return args["root"]:gsub('-','').." + -"..args["mod"]
	elseif args["nocat"] then
		return table.concat(output)
	elseif args["notext"] then
		return table.concat(categories)
	else
		return table.concat(output) .. table.concat(categories)
	end
	
	return output
end

return export