Module:Chak-utilities: Difference between revisions

From The Languages of David J. Peterson
Jump to navigation Jump to search
No edit summary
No edit summary
 
(25 intermediate revisions by the same user not shown)
Line 4: Line 4:
local m_links = require("Module:links")
local m_links = require("Module:links")
local m_utilities = require("Module:utilities")
local m_utilities = require("Module:utilities")
local m_headword = require("Module:headword")
--local m_headword = require("Module:headword")
local m_stru = require("Module:String utilities")
local m_stru = require("Module:String utilities")
local m_table = require("Module:Table")
local m_table = require("Module:Table")
Line 17: Line 17:


function export.is_geminate(char)
function export.is_geminate(char)
return char ~= nil and #char > 1 and #char < 4 and string.sub(char,1,1) == string.sub(char,2,2)
return char ~= nil and #char > 1 and #char < 4 and mw.ustring.sub(char,1,1) == mw.ustring.sub(char,2,2)
end
end


function compress_consonants(str)
function export.compress_consonants(str)
str = str or ''
replace = {{"ssh","ʃʃ"},
replace = {{"ssh","ʃʃ"},
Line 28: Line 30:
{"sh","ʃ"},
{"sh","ʃ"},
{"dh","ð"},
{"dh","ð"},
{"th","θ"},  
{"th","θ"},
{"ch","ʧ"},}
{"j","ʤ"},
{"ch","ʧ"},
{"'",""},
}
for _,v in ipairs(replace) do
for _,v in ipairs(replace) do
Line 38: Line 43:
end
end


function expand_consonants(str)
function export.expand_consonants(str)
replace = {{"ssh","ʃʃ"},
replace = {
{"s'h","sh"},
{"s'sh","sʃ"},
{"d'h","dh"},
{"d'dh","dð"},
{"t'h","th"},
{"t'th","tθ"},
{"ssh","ʃʃ"},
{"ddh","ðð"},
{"ddh","ðð"},
{"tth","θθ"},  
{"tth","θθ"},  
Line 45: Line 57:
{"sh","ʃ"},
{"sh","ʃ"},
{"dh","ð"},
{"dh","ð"},
{"th","θ"},  
{"th","θ"},
{"ch","ʧ"},}
{"j","ʤ"},
{"ch","ʧ"},
}
for _,v in ipairs(replace) do
for _,v in ipairs(replace) do
Line 55: Line 69:
end
end


function parse_consonants(str)
function export.parse_consonants(str)
str = export.compress_consonants(str)
str = export.compress_consonants(str)
Line 63: Line 77:
str = mw.ustring.gsub(str,"%z.","-")
str = mw.ustring.gsub(str,"%z.","-")
str = mw.ustring.gsub(str,"^.","")
str = mw.ustring.gsub(str,"^.","")
str = mw.ustring.gsub(str,"%-'%-","-")


str = export.expand_consonants(str)
str = export.expand_consonants(str)
Line 85: Line 100:
if expandFinal then
if expandFinal then
local tail = root[#root]
local tail = root[#root]
local new_tail = parse_consonants(tail)
local new_tail = export.parse_consonants(tail)
root[#root] = new_tail
root[#root] = new_tail
end
end


mw.logObject(root)
return root
return root


end
end


function export.condense_vowels(vowel_str)
local output = vowel_str or ''
if #output > 3 or #output < 2 then
return output
elseif #output == 3 then
output = string.sub(output,1,2)
end
output = string.gsub(output,"%S%S",{["aw"]="o",
["au"]="o",
["ao"]="o",
["ai"]="e",
["ay"]="e",
["ae"]="e",
["wu"]="uu",
["uw"]="uu",
["yi"]="ii",
["iy"]="ii",
})
return output
end


function export.extract_root(root_str)
function export.extract_root(root_str)
Line 171: Line 211:
function export.assimilate(str)
function export.assimilate(str)


output = str
output = export.compress_consonants(str)


if str and str ~= '' then
if str and str ~= '' then
--Find and record location of hyphen, if any
hyphen = mw.ustring.find(output,"-",1,true) or 0
output = mw.ustring.gsub(output,"-","")
--VvC assimilition FIXME - should not happen if v is geminate. Disabling for now.
--VvC assimilition FIXME - should not happen if v is geminate. Disabling for now.
-- output = string.gsub(output,"(["..VOWELS.."])v([^"..VOWELS.."])","%1~v%2"):gsub("(.)~v",{VvC)
-- output = string.gsub(output,"(["..VOWELS.."])v([^"..VOWELS.."])","%1~v%2"):gsub("(.)~v",{VvC)
--Sibilant Assimilation
output = mw.ustring.gsub(output,"(["..SIBILANTS.."])(["..SIBILANTS.."])","%2%2")
--Nasal Assimilation
output = mw.ustring.gsub(output,"m([tdszkgθðl])","n%1")
output = mw.ustring.gsub(output,"n([bfv])","m%1")
--General Devoicing
--General Devoicing
output = string.gsub(output,"(["..DEVOICED.."])(["..VOICED.."])","%1~%2"):gsub("~(.)",DEVOICING)
output = mw.ustring.gsub(output,"(["..DEVOICED.."])(["..VOICED.."])","%1~%2")
output = string.gsub(output,"(["..VOICED.."])(["..DEVOICED.."])","~%1%2"):gsub("~(.)",DEVOICING)
output = mw.ustring.gsub(output,"~(.)",DEVOICING)
output = mw.ustring.gsub(output,"(["..VOICED.."])(["..DEVOICED.."])","~%1%2")
output = mw.ustring.gsub(output,"~(.)",DEVOICING)
--H devoicing
--H devoicing
output = string.gsub(output,"h(["..VOICED.."])","h~%1"):gsub("~(.)",H_DEVOICING)
output = mw.ustring.gsub(output,"h(["..VOICED.."])","h~%1")
output = string.gsub(output,"(["..VOICED.."])h","~%1h"):gsub("~(.)",H_DEVOICING)
output = mw.ustring.gsub(output,"~(.)",H_DEVOICING)
output = string.gsub(output,"(["..VOICED.."])h","~%1h"):gsub("~(.)",H_DEVOICING)
output = mw.ustring.gsub(output,"(["..VOICED.."])h","~%1h")
output = mw.ustring.gsub(output,"~(.)",H_DEVOICING)
output = mw.ustring.gsub(output,"hj","hʧ")
--Misc
output = mw.ustring.gsub(output,"qk","kk")
output = mw.ustring.gsub(output,"kq","kk")
output = mw.ustring.gsub(output,"ao","au")
output = mw.ustring.gsub(output,"ae","ai")
output = mw.ustring.gsub(output,"aw","au")
output = mw.ustring.gsub(output,"ua","wa")
--Add hyphen, if any
if hyphen > 0 then
output = mw.ustring.sub(output,1,hyphen-1)..'-'..mw.ustring.sub(output,hyphen)
end
end
end
return output
return export.expand_consonants(output)
end
 
function export.prothetic(str)
str = str or ''
str2 = export.compress_consonants(str)
char1 = mw.ustring.sub(str2,1,1)
char2 = mw.ustring.sub(str2,2,2)
local result = ""
if mw.ustring.find(VOWELS,char1) or mw.ustring.find(VOWELS,char2) then
result = str
else
local v = mw.ustring.match(str,"["..VOWELS.."]") or 'a'
result = v..str
end
return result
end
end


Line 215: Line 296:
args["mod"] = "t"
args["mod"] = "t"
end
end
local link_text = export.l(args["root"], args["face"], args["root"] )
mw.getCurrentFrame():callParserFunction( '#vardefine', { 'chak-root', args["root"] } )
--Adding gsub for pulling dashes out of a root so they can be specified in the template (and thus the variable),
-- but not show elsewhere.
local link_text = export.l(args["root"]:gsub('-',''), args["face"], args["root"]:gsub('-','') )
mw.getCurrentFrame():callParserFunction( '#vardefine', { 'chak-root', args["root"] } )
if args["mod"] then
if args["mod"] then
link_text = link_text.." + -"..export.l(args["mod"], args["face"], args["mod"])
mod_str = export.parse_consonants(args["mod"])
table.insert(categories, m_utilities.format_categories( { "Chakobsa terms with the modifier -" .. args["mod"] }, lang) )
mods = mw.text.split(mod_str,"-")
for k in ipairs(mods) do
link_text = link_text.." + "..export.l("-"..mods[k], args["face"], "-"..mods[k])
table.insert(categories, m_utilities.format_categories( { "Chakobsa terms with the modifier -" .. mods[k] }, lang) )
end
 
mw.getCurrentFrame():callParserFunction( '#vardefine', { 'chak-mod', args["mod"] } )
mw.getCurrentFrame():callParserFunction( '#vardefine', { 'chak-mod', args["mod"] } )
end
end
table.insert(output, link_text)
table.insert(output, link_text)

Latest revision as of 16:18, 21 July 2024

Utilities for Chakobsa scripts. Implements {{chak-from-root}}.

Exposed functions

l(term, face, alt)
Creates a link to a Chakobsa lexeme - similar to {{l|chak|term}}
is_geminate(char)
returns true if the supplied 'character' is geminate
compress_consonants(str)
Returns a 'compressed' version of the string where consonant digraphs are reduced to a single character and trigraph geminates are reduced to a geminate of a single character. Mostly for use with other functions for parsing.
expand_consonants(str)
Returns an 'expanded' version of the string where reductions from compress_consonants are replaced with the original romanizations.
parse_consonants(str)
Returns a string where the consonants have been split out, delimited by hyphens. Understands geminate consonants and considers them a 'single' consonant.
parse_root(root_str,expandFinal)
Returns a table of parts of the root string is grouped by consonant and vowels. Example: "kkaalatg" > "kk","aa","l","a","tg". If expandFinal is true, returns the final part parsed out through parse_consonants The above example would be "kkaalatg" > "kk","aa","l","a","t-g". Used for inflection functions.

export = {}

require("Module:Chak-utilities/data")
local m_links = require("Module:links")
local m_utilities = require("Module:utilities")
--local m_headword = require("Module:headword")
local m_stru = require("Module:String utilities")
local m_table = require("Module:Table")


local lang = require("Module:Languages").getByCode("chak")

	
function export.l(term, face, alt)
	return m_links.full_link( { term = term, lang = lang, alt = alt }, face )
end

function export.is_geminate(char)
	return char ~= nil and #char > 1 and #char < 4 and mw.ustring.sub(char,1,1) == mw.ustring.sub(char,2,2)
end

function export.compress_consonants(str)
	
	str = str or ''
	
	replace = {{"ssh","ʃʃ"},
				{"ddh","ðð"},
				{"tth","θθ"}, 
				{"cch","ʧʧ"},
				{"sh","ʃ"},
				{"dh","ð"},
				{"th","θ"},
				{"j","ʤ"},
				{"ch","ʧ"},
				{"'",""},
	}
	
	for _,v in ipairs(replace) do
		str = mw.ustring.gsub(str,v[1],v[2])
	end
	
	return str
end

function export.expand_consonants(str)
	replace = {
				{"s'h","sh"},
				{"s'sh","sʃ"},
				{"d'h","dh"},
				{"d'dh","dð"},
				{"t'h","th"},
				{"t'th","tθ"},
				{"ssh","ʃʃ"},
				{"ddh","ðð"},
				{"tth","θθ"}, 
				{"cch","ʧʧ"},
				{"sh","ʃ"},
				{"dh","ð"},
				{"th","θ"},
				{"j","ʤ"},
				{"ch","ʧ"},
	}
	
	for _,v in ipairs(replace) do
		str = mw.ustring.gsub(str,v[2],v[1])
	end
	
	return str
end

function export.parse_consonants(str)
	
	str = export.compress_consonants(str)
	
	str = mw.ustring.gsub(str,".",'\0%0%0')
	str = mw.ustring.gsub(str,"(.)%z%1","%1")
	str = mw.ustring.gsub(str,"%z.","-")
	str = mw.ustring.gsub(str,"^.","")
	str = mw.ustring.gsub(str,"%-'%-","-")

	str = export.expand_consonants(str)

	return str
end


function export.parse_root(root_str,expandFinal)
	
	if expandFinal == nil then
		expandFinal = false	
	end

	stems = {}

	local root = m_stru.capturing_split(root_str,"(["..VOWELS.."]+)")
	if root[1] == '' then
		table.remove(root,1)
	end
	
	if expandFinal then
		local tail = root[#root]
		local new_tail = export.parse_consonants(tail)
		root[#root] = new_tail
	end

	mw.logObject(root)
	return root

end

function export.condense_vowels(vowel_str)
	
	local output = vowel_str or ''
	
	if #output > 3 or #output < 2 then
		return output
	elseif #output == 3 then
		output = string.sub(output,1,2)
	end
	
	output = string.gsub(output,"%S%S",{["aw"]="o",
										["au"]="o",
										["ao"]="o",
										["ai"]="e",
										["ay"]="e",
										["ae"]="e",
										["wu"]="uu",
										["uw"]="uu",
										["yi"]="ii",
										["iy"]="ii",
	})
	return output 
	
end

function export.extract_root(root_str)
	
	roots = {
		onset = '',
		vowel = '',
		coda = '',
	}
	
	if root_str then

		root_str = string.gsub(root_str,'%s','-')
		parts = mw.text.split(root_str,'-')
	
		if #parts == 3 then
			roots['onset'] = parts[1]
			roots['vowel'] = parts[2]
			roots['coda'] = parts[3]
		elseif #parts == 2 then
			if string.find(parts[1],"^["..VOWELS.."]") then
				roots['onset'] = ''
				roots['vowel'] = parts[1]
				roots['coda'] = parts[2]
			elseif string.find(parts[2],"^["..VOWELS.."]") then
				roots['onset'] = parts[1]
				roots['vowel'] = parts[2]
				roots['coda'] = ''
			else
				roots['onset'] = parts[1]
				roots['vowel'] = ''
				roots['coda'] = parts[2]
			end
		elseif #parts == 1 and parts[1] ~= '' then
			local temp = root_str
			s,e = string.find(temp,"["..VOWELS.."]+")
			if s then 
				roots['vowel'] = string.sub(temp,s,e)
			end
			s,e = string.find(temp,"^[^"..VOWELS.."]+")
			if s then
				roots['onset'] = string.sub(temp,s,e)
				temp = string.gsub(temp,roots['onset'],'',1)
			end
			s,e = string.find(temp,"[^"..VOWELS.."]+")
			if s then
				roots['coda'] = string.sub(temp,s,e)	
			end
		else 
			return nil
		end
	else
		return nil
	end	
	
	return roots
end

function export.geminate_char(char)
	char = char or '' 
	if #char == 1 or (#char == 2 and string.sub(char,2) == 'h') then
		return string.sub(char,1,1)..char
	else
		return char
	end
end

function export.degeminate_char(char)
	char = char or ''
	
	if (#char == 2 or #char == 3) and string.sub(char,1,1) == string.sub(char,2,2)  then
		return string.sub(char,2)
	else
		return char
	end
end

function export.assimilate(str)

	output = export.compress_consonants(str)

	if str and str ~= '' then
			--Find and record location of hyphen, if any
			hyphen = mw.ustring.find(output,"-",1,true) or 0
			output = mw.ustring.gsub(output,"-","")
			
			--VvC assimilition FIXME - should not happen if v is geminate. Disabling for now.
			-- output = string.gsub(output,"(["..VOWELS.."])v([^"..VOWELS.."])","%1~v%2"):gsub("(.)~v",{VvC)
			--Sibilant Assimilation
			output = mw.ustring.gsub(output,"(["..SIBILANTS.."])(["..SIBILANTS.."])","%2%2")
			--Nasal Assimilation
			output = mw.ustring.gsub(output,"m([tdszkgθðl])","n%1")
			output = mw.ustring.gsub(output,"n([bfv])","m%1")
			--General Devoicing
			output = mw.ustring.gsub(output,"(["..DEVOICED.."])(["..VOICED.."])","%1~%2")
			output = mw.ustring.gsub(output,"~(.)",DEVOICING)
			output = mw.ustring.gsub(output,"(["..VOICED.."])(["..DEVOICED.."])","~%1%2")
			output = mw.ustring.gsub(output,"~(.)",DEVOICING)
			--H devoicing
			output = mw.ustring.gsub(output,"h(["..VOICED.."])","h~%1")
			output = mw.ustring.gsub(output,"~(.)",H_DEVOICING)
			output = mw.ustring.gsub(output,"(["..VOICED.."])h","~%1h")
			output = mw.ustring.gsub(output,"~(.)",H_DEVOICING)	
			output = mw.ustring.gsub(output,"hj","hʧ")	
			--Misc
			output = mw.ustring.gsub(output,"qk","kk")
			output = mw.ustring.gsub(output,"kq","kk")
			output = mw.ustring.gsub(output,"ao","au")
			output = mw.ustring.gsub(output,"ae","ai")
			output = mw.ustring.gsub(output,"aw","au")
			output = mw.ustring.gsub(output,"ua","wa")
		
			--Add hyphen, if any
			if hyphen > 0 then
				output = mw.ustring.sub(output,1,hyphen-1)..'-'..mw.ustring.sub(output,hyphen)	
			end
	end
	
	return export.expand_consonants(output)
end

function export.prothetic(str)
	str = str or ''
	str2 = export.compress_consonants(str)
	char1 = mw.ustring.sub(str2,1,1)
	char2 = mw.ustring.sub(str2,2,2)
	
	local result = ""
	
	if mw.ustring.find(VOWELS,char1) or mw.ustring.find(VOWELS,char2) then
		result = str
	else
		local v = mw.ustring.match(str,"["..VOWELS.."]") or 'a'
		result = v..str
	end
	return result
end

function export.chak_from_root(frame)
	local output = {}
	local categories = {}
	
	local title = mw.title.getCurrentTitle()
	local namespace = title.nsText
	
	local params = {
		[1] = { alias_of = "root"},
		[2] = { alias_of = "mod"},
		["root"] = {required = true},
		["mod"] = {},		
		["nocat"] = { type = "boolean", default = false },
		["plain"] = { type = "boolean", default = false },
		["alt"] = {},
		["face"] = { default = "term" },
		["notext"] = { type = "boolean", default = false },
		["nolink"] = { type = "boolean", default = false },
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	
	if not args["root"] and namespace == "Template" then
		args["root"] = "tes"
		args["mod"] = "t"
	end

	local link_text = export.l(args["root"], args["face"], args["root"] )
	mw.getCurrentFrame():callParserFunction( '#vardefine', { 'chak-root', args["root"] } )
	
	if args["mod"] then
		mod_str = export.parse_consonants(args["mod"])
		mods = mw.text.split(mod_str,"-")
		for k in ipairs(mods) do
			link_text = link_text.." + "..export.l("-"..mods[k], args["face"], "-"..mods[k])
			table.insert(categories, m_utilities.format_categories( { "Chakobsa terms with the modifier -" .. mods[k] }, lang) )
		end

		mw.getCurrentFrame():callParserFunction( '#vardefine', { 'chak-mod', args["mod"] } )
	end
		
		
	table.insert(output, link_text)
	table.insert(categories, m_utilities.format_categories( { "Chakobsa terms belonging to the root " .. args["root"]:gsub('-','') }, lang) )
	
	
	if args["plain"] then
		return args["root"]:gsub('-','').." + -"..args["mod"]
	elseif args["nocat"] then
		return table.concat(output)
	elseif args["notext"] then
		return table.concat(categories)
	else
		return table.concat(output) .. table.concat(categories)
	end
	
	return output
end

return export