Module:Grc-headword
Jump to navigation
Jump to search
Documentation for this module may be created at Module:Grc-headword/documentation
local export = {}
local m_params = require("Module:parameters")
local m_grc_utils = require("Module:grc-utilities")
local m_str_utils = require("Module:string utilities")
local tokenize = m_grc_utils.tokenize
local find_ambig = m_grc_utils.findAmbig
local diacritic = mw.loadData("Module:grc-utilities/data").diacritic
local full_headword = require("Module:headword").full_headword
local get_accent_term = require("Module:grc-accent").get_accent_term
local serial_comma_join = require("Module:table").serialCommaJoin
local lang = require("Module:languages").getByCode("grc")
local canonical_name = lang:getCanonicalName()
local NAMESPACE = mw.title.getCurrentTitle().nsText
local PAGENAME = mw.loadData("Module:headword/data").pagename
local MAINSPACE = NAMESPACE == ""
local reconstructed_prefix = NAMESPACE == "Reconstruction" and "reconstructed " or ""
local toNFD = mw.ustring.toNFD
local ufind = m_str_utils.find
local umatch = m_str_utils.match
local pos_functions = {}
local legal_declension = {
["first"] = true,
["second"] = true,
["Attic"] = true,
["third"] = true,
["irregular"] = true,
}
-- Also used to validate genders.
local gender_names = {
["m"] = "masculine",
["m-s"] = "masculine",
["m-d"] = "masculine",
["m-p"] = "masculine",
["f"] = "feminine",
["f-s"] = "feminine",
["f-d"] = "feminine",
["f-p"] = "feminine",
["n"] = "neuter",
["n-s"] = "neuter",
["n-d"] = "neuter",
["n-p"] = "neuter",
["?"] = "unknown gender",
["?-s"] = "unknown gender",
["?-d"] = "unknown gender",
["?-p"] = "unknown gender",
}
local function quote(text)
return "“" .. text .. "”"
end
local function format(array, concatenater)
if not array[1] then
return ""
else
return "; ''" .. table.concat(array, concatenater) .. "''"
end
end
-- Process arg the way [[Module:parameters]] would.
local function process_arg(val)
if val == "" then
val = nil
end
if val then
val = mw.text.trim(val)
end
return val
end
-- Returns true if text contains one character from the Greek and Coptic or
-- Greek Extended blocks.
local function contains_Greek(text)
-- Matches any character in Greek and Coptic block except the first line:
-- ͰͱͲͳʹ͵Ͷͷͺͻͼͽ;Ϳ
local basic_Greek = "[\206-\207][\128-\191]"
-- Exactly matches entire Greek Extended block.
local Greek_extended = "\225[\188-\191][\128-\191]"
return (string.find(text, basic_Greek) or string.find(text, Greek_extended)) and true or false
end
-- A cheaper version of makeEntryName. Doesn't remove underties, which should
-- not appear in headwords, or convert curly apostrophes, spacing smooth
-- breathings, and spacing coronides to straight apostrophes.
local function remove_macron_breve(text)
return toNFD(text):gsub("\204[\132\134]", "")
end
local function remove_links(text)
if text:find("%[%[") then
return (text
:gsub("%[%[[^|]+|([^%]]+)%]%]", "%1")
:gsub("%[%[([^%]]+)%]%]", "%1"))
else
return text
end
end
local U = m_str_utils.char
local macron = U(0x304)
local breve = U(0x306)
local rough = U(0x314)
local smooth = U(0x313)
local diaeresis = U(0x308)
local acute = U(0x301)
local grave = U(0x300)
local circumflex = U(0x342)
local subscript = U(0x345)
local diacritic_patt = table.concat{
"[",
macron, breve,
rough, smooth, diaeresis,
acute, grave, circumflex,
subscript,
"]"
}
local accent_patt = "[" .. acute .. grave .. circumflex .. "]"
-- Controls whether or not the headword can be provided in the first numbered parameter.
local function needs_headword(text)
local lengthDiacritic = "[" .. macron .. breve .. circumflex .. subscript .. "]"
local aiu_diacritic = "^([ΑαΙιΥυ])(" .. diacritic_patt .. "*)$"
text = remove_links(text)
-- If page name has straight apostrophe, a headword with curly apostrophe should be provided.
if text:find("'") then
return true
end
-- breaks the word into units
for _, token in ipairs(tokenize(text)) do
local vowel, diacritics = umatch(token, aiu_diacritic)
if vowel and (diacritics == "" or
not ufind(diacritics, lengthDiacritic)) then
return true
end
end
return false
end
-- Process numbered parameters before using [[Module:parameters]], as
-- [[Module:parameters]] converts several named parameters into arrays, which
-- makes them more difficult to manipulate.
local function process_numbered_params(args, Greek_params, nonGreek_params)
if not nonGreek_params then
nonGreek_params = { false }
end
local max_Greek_param_index = #Greek_params
-- Clone args table so that its values can be modified.
args = require("Module:table").shallowcopy(args)
if args.head then
-- [[Special:WhatLinksHere/Wiktionary:Tracking/grc-headword/head param]]
require("Module:debug").track("grc-headword/head param")
end
local last_Greek_param_index = 0
for i, arg in ipairs(args) do
if arg == "-" or contains_Greek(arg) then
last_Greek_param_index = i
else
break
end
end
local head_in_arg1 = false
if last_Greek_param_index == max_Greek_param_index then
if not MAINSPACE or needs_headword(PAGENAME) then
head_in_arg1 = true
else
error(("The pagename does not have ambiguous vowels, so there cannot be "
.. max_Greek_param_index
.. " numbered parameter%s. See template documentation for more details.")
:format(max_Greek_param_index == 1 and "" or "s"))
end
elseif last_Greek_param_index > max_Greek_param_index then
error("Too many numbered parameters containing Greek text or hyphens. There can be at most "
.. max_Greek_param_index .. ".")
-- For indeclinable nouns: {{grc-noun|Ἰσρᾱήλ|m}}
-- First parameter is headword if equal to pagename when macrons and breves are removed.
elseif args[1] and remove_macron_breve(args[1]):gsub("’", "'") == toNFD(PAGENAME) then
if args.head then
error("Parameter 1 appears to be the headword, so the head parameter " .. quote(args.head) .. " is not needed.")
end
args.head, args[1] = args[1], nil
else
table.remove(Greek_params, 1) -- Remove "head" parameter.
end
local function process_params(start_i, end_i, param_names)
local i = 1 -- Index in the table of parameter names.
for numbered = start_i, end_i do
local named = param_names[i]
i = i + 1
if named then
-- Process parameters, as they have not been processed by [[Module:parameters]].
args[numbered], args[named] =
process_arg(args[numbered]), process_arg(args[named])
-- This should not happen, because the number of Greek parameters
-- has already been checked.
elseif args[numbered] then
error("No purpose for parameter " .. numbered .. ".")
end
if args[numbered] then
if named then
-- This fixes an error caused by the kludgy way in which the
-- numbered parameters of {{grc-preposition}} are handled.
if numbered ~= named then
if args[named] then
error("Parameter " .. numbered .. " is not needed when parameter " .. named .. " is present.")
end
args[named], args[numbered] = args[numbered], nil
end
else
error("Parameter " .. numbered .. ", " .. args[numbered] .. ", has no purpose.")
end
end
end
end
process_params(1, last_Greek_param_index, Greek_params)
process_params(last_Greek_param_index + 1, #Greek_params + #nonGreek_params, nonGreek_params)
if args.head == "-" then
error("The headword cannot be absent.")
end
return args
end
local function process_heads(data, poscat)
data.no_redundant_head_cat = #data.heads == 0
if #data.heads == 0 then
table.insert(data.heads, PAGENAME)
end
local suffix = data.heads[1]:find("^%*?%-") and true or false
for _, head in ipairs(data.heads) do
if suffix and head:sub(1, 1) ~= "-" then
error("The first headword has a hyphen, so headword #" .. i ..
", " .. quote(head) .. ", should as well.")
end
local accent = get_accent_term(head)
if accent then
table.insert(data.categories,
("%s %s terms"):format(canonical_name, accent))
elseif not ufind(toNFD(head), accent_patt) then
table.insert(data.categories,
("%s unaccented terms"):format(canonical_name))
else
table.insert(data.categories,
("%s terms with irregular accent"):format(canonical_name))
end
if MAINSPACE then
local _, vowel_set = find_ambig(head, false)
for vowel in pairs(vowel_set) do
require("Module:debug").track {
"grc-headword/ambig",
"grc-headword/ambig/" .. vowel
}
end
if not head:find(" ") and toNFD(head):find(grave) then
error("Head #" .. i .. ", " .. quote(head) ..
", contained a grave accent, but no space. Grave accent can only be used in multi-word terms.")
end
end
end
if suffix then
data.pos_category = "suffixes"
if not poscat:find "forms$" then
table.insert(data.categories, canonical_name .. " " .. poscat .. "-forming suffixes")
end
end
end
local function unlinked_form(label)
return { label = label, { nolink = true, term = "—" } }
end
local function add_gender_form(inflections, gender_arg, gender_name, allow_blank_forms)
if gender_arg[1] then
if allow_blank_forms and not gender_arg[2] and gender_arg[1] == "-" then
table.insert(inflections, unlinked_form(gender_name))
else
gender_arg.label = gender_name
table.insert(inflections, gender_arg)
end
end
end
local function adj_and_part_forms(total_forms, args, inflections, allow_blank_forms)
if total_forms == 2 then
add_gender_form(inflections, args.f, "feminine", allow_blank_forms)
end
add_gender_form(inflections, args.n, "neuter", allow_blank_forms)
end
local function handle_degree_of_comparison(args, data, is_declined_form)
if args.deg ~= nil then
if args.deg == 'comp' then
data.pos_category = reconstructed_prefix .. "comparative adjectives"
elseif args.deg == 'super' then
data.pos_category = reconstructed_prefix .. "superlative adjectives"
else
error('Adjective degree ' .. quote(args.deg) .. ' not recognized.')
end
if is_declined_form then
data.pos_category = data.pos_category:gsub("adjectives", "adjective forms")
end
end
end
function export.show(frame)
local args = frame:getParent().args
local poscat = frame.args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.")
local subclass = frame.args[2]
local data = {
lang = lang,
pos_category = reconstructed_prefix .. poscat,
categories = {}, heads = {}, genders = {}, inflections = {}
}
local appendix = {}
if pos_functions[poscat] then
pos_functions[poscat](args, data, appendix, poscat, subclass)
end
return full_headword(data) .. format(appendix, ", ")
end
function export.test(frame_args, parent_args, pagename)
PAGENAME = pagename
local poscat = frame_args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.")
local subclass = frame_args[2]
local data = {
pos_category = reconstructed_prefix .. poscat,
categories = {}, heads = {}, genders = {}, inflections = {}
}
local appendix = {}
if pos_functions[poscat] then
pos_functions[poscat](parent_args, data, appendix, poscat, subclass)
end
return data
end
pos_functions["nouns"] = function(args, data, appendix, poscat)
args = process_numbered_params(args, { "head", "gen" }, { "g", "decl" })
local params = {
-- Numbered parameters 1, 2, 3, 4 handled above.
head = { list = true },
gen = { list = true },
g = { list = true, default = '?' },
dim = { list = true },
decl = { list = true },
sort = {}, -- for [[Unsupported titles/Ancient Greek dish]]; please do not use otherwise
}
args = m_params.process(args, params, nil, "grc-headword", "nouns")
data.heads = args.head
process_heads(data, "noun")
for _, g in ipairs(args.g) do
local gender_name = gender_names[g]
if gender_name then
table.insert(data.genders, g)
table.insert(data.categories,
("%s %s %s"):format(canonical_name, gender_name, poscat))
else
error("Gender " .. quote(g) .. " is not an valid " .. canonical_name .. " gender.")
end
end
if not args.gen[1] then
table.insert(data.inflections, { label = "[[Appendix:Glossary#indeclinable|indeclinable]]" })
table.insert(data.categories,
("%s indeclinable %s")
:format(canonical_name, poscat))
for _, g in ipairs(args.g) do
table.insert(data.categories,
("%s %s indeclinable %s")
:format(canonical_name, gender_names[g], poscat))
end
if args.decl[1] then
error("Declension class " .. quote(args.decl[1])
.. " has been given, but no genitive form has been given, so the word cannot belong to a declension class.")
end
else
if not args.gen[2] and args.gen[1] == "-" then
table.insert(data.inflections, unlinked_form("genitive"))
else
args.gen.label = "genitive"
table.insert(data.inflections, args.gen)
end
if args.decl[2] then
table.insert(data.inflections, { label = 'variously declined' })
table.insert(data.categories,
("%s %s with multiple declensions")
:format(canonical_name, poscat))
elseif not args.decl[1] then
table.insert(appendix, "? declension")
end
for _, decl_class in ipairs(args.decl) do
if legal_declension[decl_class] then
local not_irregular = decl_class ~= "irregular"
if not_irregular then
table.insert(appendix,
("[[Appendix:%s %s declension|%s declension]]")
:format(canonical_name, decl_class, decl_class))
table.insert(data.categories,
("%s %s-declension %s")
:format(canonical_name, decl_class, poscat))
else
table.insert(appendix,
("%s declension"):format(decl_class))
table.insert(data.categories,
("%s irregular %s"):format(canonical_name, poscat))
end
if not_irregular then
for _, g in ipairs(args.g) do
table.insert(data.categories,
("%s %s %s in the %s declension")
:format(canonical_name, gender_names[g], poscat, decl_class))
end
end
else
error("Declension " .. quote(decl_class) .. " is not a legal " ..
canonical_name .. " declension. Choose “first”, “second”, “third”, or “irregular”.")
end
end
end
-- Check first-declension endings and gender.
if args.decl[1] == "first" then
local alpha = "α[" .. macron .. breve .. "]?[" .. acute .. circumflex .. "]?"
local eta = "η[" .. acute .. circumflex .. "]?"
local gender = args.g[1]
local alpha_ending, eta_ending
if gender == "f" then
alpha_ending = alpha .. "$"
eta_ending = eta .. "$"
elseif gender == "m" then
alpha_ending = alpha .. "ς$"
eta_ending = eta .. "ς$"
else
gender = nil
require("Module:debug").track("grc-noun/1st/incorrect or no gender")
end
if gender then
for _, head in ipairs(data.heads) do
head = toNFD(remove_links(head))
if not (ufind(head, eta_ending) or ufind(head, alpha_ending)) then
require("Module:debug").track("grc-noun/1st/" .. gender .. " with incorrect ending")
end
end
end
end
if args.dim[1] then
args.dim.label = "diminutive"
table.insert(data.inflections, args.dim)
end
end
pos_functions["proper nouns"] = pos_functions["nouns"]
pos_functions["verbs"] = function(args, data)
args = process_numbered_params(args, { "head" })
local params = {
head = { list = true }
}
local args = m_params.process(args, params, nil, "grc-headword", "verbs")
data.heads = args.head
process_heads(data, "verb")
end
pos_functions["adverbs"] = function(args, data)
args = process_numbered_params(args, { "head", "comp", "super" }, { "type" })
local params = {
head = { list = true },
comp = { list = true },
super = { list = true },
type = { list = true },
}
local args = m_params.process(args, params, nil, "grc-headword", "adverbs")
data.heads = args.head
process_heads(data, "adverb")
-- Show comparative and superlative. If comparative or superlative is absent
-- while the other form is present, show "no comparative" or "no superlative".
if args.comp[1] then
args.comp.label = 'comparative'
table.insert(data.inflections, args.comp)
elseif args.super[1] then
table.insert(data.inflections, { label = 'no comparative' })
end
if args.super[1] then
args.super.label = 'superlative'
table.insert(data.inflections, args.super)
elseif args.comp[1] then
table.insert(data.inflections, { label = 'no superlative' })
end
if args.type[1] then
local adverb_types = require "Module:table".listToSet {
"demonstrative", "indefinite", "interrogative", "relative",
}
for _, type in ipairs(args.type) do
if adverb_types[type] then
table.insert(data.categories, canonical_name .. " " .. type .. " adverbs")
else
error(quote(type) .. " is not a valid subcategory of adverb.")
end
end
end
end
pos_functions["numerals"] = function(args, data)
args = process_numbered_params(args, { "head", "f", "n" })
local params = {
head = { list = true },
f = { list = true },
n = { list = true },
car = { list = true },
ord = { list = true },
adv = { list = true },
coll = { list = true },
}
local args = m_params.process(args, params, nil, "grc-headword", "numerals")
data.heads = args.head
process_heads(data, "numeral")
adj_and_part_forms(2, args, data.inflections, false)
local num_type_names = {
car = "cardinal", ord = "ordinal", adv = "adverbial", coll = "collective",
}
for _, num_type in ipairs { "car", "ord", "adv", "coll" } do
if args[num_type][1] then
args[num_type].label = num_type_names[num_type]
table.insert(data.inflections, args[num_type])
end
end
end
pos_functions["participles"] = function(args, data, appendix, _, subclass)
if subclass == "1&2" or subclass == "1&3" then
pos_functions["part-" .. subclass](args, data, appendix)
else
error('Participle subclass ' .. quote(subclass) .. ' not recognized.')
end
end
pos_functions["part-1&2"] = function(args, data, appendix)
args = process_numbered_params(args, { "head", "f", "n" })
local params = {
-- Parameters 1, 2, and 3 handled above.
head = { list = true },
f = { list = true, required = true },
n = { list = true, required = true },
}
local args = m_params.process(args, params, nil, "grc-headword", "part-1&2")
data.heads = args.head
process_heads(data, "participle")
table.insert(data.genders, "m")
table.insert(appendix, "[[Appendix:" .. canonical_name ..
" first declension|first]]/[[Appendix:" .. canonical_name ..
" second declension|second declension]]")
adj_and_part_forms(2, args, data.inflections, false)
end
pos_functions["part-1&3"] = function(args, data, appendix)
args = process_numbered_params(args, { "head", "f", "n" })
local params = {
-- Parameters 1, 2, and 3 handled above.
head = { list = true },
f = { list = true, required = true },
n = { list = true, required = true },
}
local args = m_params.process(args, params, nil, "grc-headword", "part-1&3")
data.heads = args.head
process_heads(data, "participle")
table.insert(data.genders, "m")
table.insert(appendix, "[[Appendix:" .. canonical_name ..
" first declension|first]]/[[Appendix:" .. canonical_name ..
" third declension|third declension]]")
adj_and_part_forms(2, args, data.inflections, false)
end
pos_functions["adjectives"] = function(args, data, appendix, _, subclass)
local subclasses = {
["1&2"] = true, ["1&3"] = true, ["2nd"] = true, ["3rd"] = true
}
if subclasses[subclass] then
pos_functions["adj-" .. subclass](args, data, appendix)
else
error('Adjective subclass ' .. quote(subclass) .. ' not recognized.')
end
end
pos_functions["adj-1&2"] = function(args, data, appendix)
args = process_numbered_params(args, { "head", "f", "n" })
local params = {
-- Parameters 1, 2, and 3 handled above.
head = { list = true },
f = { list = true, required = true },
n = { list = true, required = true },
deg = {},
}
local args = m_params.process(args, params, nil, "grc-headword", "adj-1&2")
data.heads = args.head
process_heads(data, "adjective")
table.insert(data.genders, "m")
table.insert(appendix, "[[Appendix:" .. canonical_name ..
" first declension|first]]/[[Appendix:" .. canonical_name ..
" second declension|second declension]]")
handle_degree_of_comparison(args, data, false)
adj_and_part_forms(2, args, data.inflections, true)
end
pos_functions["adj-1&3"] = function(args, data, appendix)
args = process_numbered_params(args, { "head", "f", "n" })
local params = {
-- Parameters 1, 2, and 3 handled above.
head = { list = true },
f = { list = true, required = true },
n = { list = true, required = true },
}
local args = m_params.process(args, params, nil, "grc-headword", "adj-1&3")
data.heads = args.head
process_heads(data, "adjective")
table.insert(data.genders, "m")
table.insert(appendix, "[[Appendix:" .. canonical_name ..
" first declension|first]]/[[Appendix:" .. canonical_name ..
" third declension|third declension]]")
adj_and_part_forms(2, args, data.inflections, true)
end
pos_functions["adj-2nd"] = function(args, data, appendix)
args = process_numbered_params(args, { "head", "n" })
local params = {
-- Parameters 1 and 2 handled above.
head = { list = true },
n = { list = true, required = true },
}
local args = m_params.process(args, params, nil, "grc-headword", "adj-2nd")
data.heads = args.head
process_heads(data, "adjective")
table.insert(data.genders, "m")
table.insert(data.genders, "f")
table.insert(appendix, "[[Appendix:" .. canonical_name .. " second declension|second declension]]")
adj_and_part_forms(1, args, data.inflections, true)
end
pos_functions["adj-3rd"] = function(args, data, appendix)
args = process_numbered_params(args, { "head", "n" })
local params = {
-- Parameters 1 and 2 handled above.
head = { list = true },
n = { list = true, required = true },
deg = {},
}
local args = m_params.process(args, params, nil, "grc-headword", "adj-3rd")
data.heads = args.head
process_heads(data, "adjective")
table.insert(data.genders, "m")
table.insert(data.genders, "f")
table.insert(appendix, "[[Appendix:" .. canonical_name .. " third declension|third declension]]")
handle_degree_of_comparison(args, data, false)
adj_and_part_forms(1, args, data.inflections, true)
end
local case_abbreviations = {
nom = 'nominative',
gen = 'genitive',
dat = 'dative',
acc = 'accusative',
voc = 'vocative',
}
pos_functions["prepositions"] = function(args, data, appendix)
-- This allows up to 4 numbered parameters, which is the number of cases
-- that can appear after prepositions.
args = process_numbered_params(args, { "head" }, { 1, 2, 3 })
local params = {
[1] = { list = true },
head = { list = true },
}
local args = m_params.process(args, params, nil, "grc-headword", "prepositions")
data.heads = args.head
process_heads(data, "preposition")
if args[1][1] then
local cases = {}
for _, case in ipairs(args[1]) do
if case_abbreviations[case] then
table.insert(data.categories, canonical_name .. " " .. case_abbreviations[case] .. " prepositions")
table.insert(cases, "[[Appendix:Glossary#" .. case_abbreviations[case] .. "|" .. case_abbreviations[case] .. "]]")
else
error('Case abbreviation ' .. quote(case) ..
' not recognized. Please choose from ' ..
serial_comma_join(
require("Module:fun").map(
quote,
{ "gen", "dat", "acc" }),
{ dontTag = true })
.. '.')
end
end
table.insert(data.inflections, { label = 'governs the ' .. serial_comma_join(cases) })
end
end
pos_functions["particles"] = function(args, data)
args = process_numbered_params(args, { "head" })
local params = {
head = { list = true },
disc = { type = 'boolean' },
mod = { type = 'boolean' },
inter = { type = 'boolean' },
neg = { type = 'boolean' },
}
local args = m_params.process(args, params, nil, "grc-headword", "particles")
data.heads = args.head
process_heads(data, "particles")
for _, item in ipairs{ { "disc", "discourse" }, { "mod", "modal" }, { "inter", "interrogative" }, { "neg", "negative" } } do
if args[item[1]] then
local descriptor = item[2]
table.insert(data.categories, canonical_name .. " " .. descriptor .. " particles")
table.insert(data.inflections, { label = descriptor .. ' particle' })
end
end
end
local valid_pos
setmetatable(pos_functions, {
__index = function (self, key)
if not key:find(" forms$") then
return nil
end
valid_pos = valid_pos or require "Module:table".listToSet{
"adjective", "determiner", "noun", "numeral", "participle",
"proper noun", "verb", "pronoun",
}
local pos = key:match("^(.+) forms$")
if not valid_pos[pos] then
error ("No function for the POS " .. quote(key) .. ".")
end
-- POS function for "noun forms", "verb forms", etc.
return function(args, data)
args = process_numbered_params(args, { "head" },
(pos == "noun" or pos == "proper noun") and { "g" })
local params = {
head = { list = true },
}
if pos == "noun" or pos == "proper noun" then
params.g = { list = true }
elseif pos == "adjective" then
params.deg = {}
end
local args = m_params.process(args, params, nil, "grc-headword", "forms")
data.heads = args.head
process_heads(data, key)
if args.g then
for _, g in ipairs(args.g) do
if gender_names[g] then
table.insert(data.genders, g)
else
error("Gender " .. quote(g) .. " is not an valid " .. canonical_name .. " gender.")
end
end
end
handle_degree_of_comparison(args, data, true)
mw.logObject(data)
end
end
})
return export