Module:Chak-utilities: Difference between revisions

From The Languages of David J. Peterson
Jump to navigation Jump to search
No edit summary
No edit summary
Line 78: Line 78:
stems = {}
stems = {}


root = m_stru.capturing_split(root_str,"(["..VOWELS.."]+)")
local root = m_stru.capturing_split(root_str,"(["..VOWELS.."]+)")
if root[1] == '' then
if root[1] == '' then
table.remove(root,1)
table.remove(root,1)
Line 84: Line 84:
if expandFinal then
if expandFinal then
root[#root] = export.parse_consonants(root[#root])
local tail = root[#root]
local new_tail = export.parse_consonants(tail)
root[#root] = new_tail
end
end


return root
return table.concat(root,"|")


end
end

Revision as of 16:49, 24 October 2023

Utilities for Chakobsa scripts. Implements {{chak-from-root}}.

Exposed functions

l(term, face, alt)
Creates a link to a Chakobsa lexeme - similar to {{l|chak|term}}
is_geminate(char)
returns true if the supplied 'character' is geminate
compress_consonants(str)
Returns a 'compressed' version of the string where consonant digraphs are reduced to a single character and trigraph geminates are reduced to a geminate of a single character. Mostly for use with other functions for parsing.
expand_consonants(str)
Returns an 'expanded' version of the string where reductions from compress_consonants are replaced with the original romanizations.
parse_consonants(str)
Returns a string where the consonants have been split out, delimited by hyphens. Understands geminate consonants and considers them a 'single' consonant.
parse_root(root_str,expandFinal)
Returns a table of parts of the root string is grouped by consonant and vowels. Example: "kkaalatg" > "kk","aa","l","a","tg". If expandFinal is true, returns the final part parsed out through parse_consonants The above example would be "kkaalatg" > "kk","aa","l","a","t-g". Used for inflection functions.

export = {}

require("Module:Chak-utilities/data")
local m_links = require("Module:links")
local m_utilities = require("Module:utilities")
local m_headword = require("Module:headword")
local m_stru = require("Module:String utilities")
local m_table = require("Module:Table")


local lang = require("Module:Languages").getByCode("chak")

	
function export.l(term, face, alt)
	return m_links.full_link( { term = term, lang = lang, alt = alt }, face )
end

function export.is_geminate(char)
	return char ~= nil and #char > 1 and #char < 4 and string.sub(char,1,1) == string.sub(char,2,2)
end

function export.compress_consonants(str)
	
	replace = {{"ssh","ʃʃ"},
				{"ddh","ðð"},
				{"tth","θθ"}, 
				{"cch","ʧʧ"},
				{"sh","ʃ"},
				{"dh","ð"},
				{"th","θ"}, 
				{"ch","ʧ"},}
	
	for _,v in ipairs(replace) do
		str = mw.ustring.gsub(str,v[1],v[2])
	end
	
	return str
end

function export.expand_consonants(str)
	replace = {{"ssh","ʃʃ"},
				{"ddh","ðð"},
				{"tth","θθ"}, 
				{"cch","ʧʧ"},
				{"sh","ʃ"},
				{"dh","ð"},
				{"th","θ"}, 
				{"ch","ʧ"},}
	
	for _,v in ipairs(replace) do
		str = mw.ustring.gsub(str,v[2],v[1])
	end
	
	return str
end

function export.parse_consonants(str)
	
	str = export.compress_consonants(str)
	
	str = mw.ustring.gsub(str,".",'\0%0%0')
	str = mw.ustring.gsub(str,"(.)%z%1","%1")
	str = mw.ustring.gsub(str,"%z.","-")
	str = mw.ustring.gsub(str,"^.","")

	str = export.expand_consonants(str)

	return str
end


function export.parse_root(root_str,expandFinal)
	
	if expandFinal == nil then
		expandFinal = false	
	end

	stems = {}

	local root = m_stru.capturing_split(root_str,"(["..VOWELS.."]+)")
	if root[1] == '' then
		table.remove(root,1)
	end
	
	if expandFinal then
		local tail = root[#root]
		local new_tail = export.parse_consonants(tail)
		root[#root] = new_tail
	end

	return table.concat(root,"|")

end


function export.extract_root(root_str)
	
	roots = {
		onset = '',
		vowel = '',
		coda = '',
	}
	
	if root_str then

		root_str = string.gsub(root_str,'%s','-')
		parts = mw.text.split(root_str,'-')
	
		if #parts == 3 then
			roots['onset'] = parts[1]
			roots['vowel'] = parts[2]
			roots['coda'] = parts[3]
		elseif #parts == 2 then
			if string.find(parts[1],"^["..VOWELS.."]") then
				roots['onset'] = ''
				roots['vowel'] = parts[1]
				roots['coda'] = parts[2]
			elseif string.find(parts[2],"^["..VOWELS.."]") then
				roots['onset'] = parts[1]
				roots['vowel'] = parts[2]
				roots['coda'] = ''
			else
				roots['onset'] = parts[1]
				roots['vowel'] = ''
				roots['coda'] = parts[2]
			end
		elseif #parts == 1 and parts[1] ~= '' then
			local temp = root_str
			s,e = string.find(temp,"["..VOWELS.."]+")
			if s then 
				roots['vowel'] = string.sub(temp,s,e)
			end
			s,e = string.find(temp,"^[^"..VOWELS.."]+")
			if s then
				roots['onset'] = string.sub(temp,s,e)
				temp = string.gsub(temp,roots['onset'],'',1)
			end
			s,e = string.find(temp,"[^"..VOWELS.."]+")
			if s then
				roots['coda'] = string.sub(temp,s,e)	
			end
		else 
			return nil
		end
	else
		return nil
	end	
	
	return roots
end

function export.geminate_char(char)
	char = char or '' 
	if #char == 1 or (#char == 2 and string.sub(char,2) == 'h') then
		return string.sub(char,1,1)..char
	else
		return char
	end
end

function export.degeminate_char(char)
	char = char or ''
	
	if (#char == 2 or #char == 3) and string.sub(char,1,1) == string.sub(char,2,2)  then
		return string.sub(char,2)
	else
		return char
	end
end

function export.assimilate(str)

	output = str

	if str and str ~= '' then
			--VvC assimilition FIXME - should not happen if v is geminate. Disabling for now.
			-- output = string.gsub(output,"(["..VOWELS.."])v([^"..VOWELS.."])","%1~v%2"):gsub("(.)~v",{VvC)
			--General Devoicing
			output = string.gsub(output,"(["..DEVOICED.."])(["..VOICED.."])","%1~%2"):gsub("~(.)",DEVOICING)
			output = string.gsub(output,"(["..VOICED.."])(["..DEVOICED.."])","~%1%2"):gsub("~(.)",DEVOICING)
			--H devoicing
			output = string.gsub(output,"h(["..VOICED.."])","h~%1"):gsub("~(.)",H_DEVOICING)
			output = string.gsub(output,"(["..VOICED.."])h","~%1h"):gsub("~(.)",H_DEVOICING)	
			output = string.gsub(output,"(["..VOICED.."])h","~%1h"):gsub("~(.)",H_DEVOICING)
			
	end
	
	return output
end

function export.chak_from_root(frame)
	local output = {}
	local categories = {}
	
	local title = mw.title.getCurrentTitle()
	local namespace = title.nsText
	
	local params = {
		[1] = { alias_of = "root"},
		[2] = { alias_of = "mod"},
		["root"] = {required = true},
		["mod"] = {},		
		["nocat"] = { type = "boolean", default = false },
		["plain"] = { type = "boolean", default = false },
		["alt"] = {},
		["face"] = { default = "term" },
		["notext"] = { type = "boolean", default = false },
		["nolink"] = { type = "boolean", default = false },
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	
	if not args["root"] and namespace == "Template" then
		args["root"] = "tes"
		args["mod"] = "t"
	end
	
	--Adding gsub for pulling dashes out of a root so they can be specified in the template (and thus the variable),
	-- but not show elsewhere.
	local link_text = export.l(args["root"]:gsub('-',''), args["face"], args["root"]:gsub('-','') )
	mw.getCurrentFrame():callParserFunction( '#vardefine', { 'chak-root', args["root"] } )
	if args["mod"] then
		link_text = link_text.." + -"..export.l(args["mod"], args["face"], args["mod"])
		table.insert(categories, m_utilities.format_categories( { "Chakobsa terms with the modifier -" .. args["mod"] }, lang) )
		mw.getCurrentFrame():callParserFunction( '#vardefine', { 'chak-mod', args["mod"] } )
	end
		
	table.insert(output, link_text)
	table.insert(categories, m_utilities.format_categories( { "Chakobsa terms belonging to the root " .. args["root"]:gsub('-','') }, lang) )
	
	
	if args["plain"] then
		return args["root"]:gsub('-','').." + -"..args["mod"]
	elseif args["nocat"] then
		return table.concat(output)
	elseif args["notext"] then
		return table.concat(categories)
	else
		return table.concat(output) .. table.concat(categories)
	end
	
	return output
end

return export