Module:Headword: Difference between revisions

From The Languages of David J. Peterson
Jump to navigation Jump to search
(Undo revision 222971 by Juelos (talk))
Tag: Undo
No edit summary
Line 10: Line 10:
local toBeTagged = m_data.toBeTagged
local toBeTagged = m_data.toBeTagged


local parameters = {
-- If set to true, categories always appear, even in non-mainspace pages
lang = { type = "object" },
local test_force_categories = false
script = { type = "object" },
heads = { type = "table" },
translits = { type = "table" },
transcriptions = { type = "table" },
inflections = { type = "table" },
genders = { type = "table" },
categories = { type = "table" },
pos_category = { type = "string" },
sort_key = { type = "string" },
id = { type = "string" },
}


local function test_script(text, script_code)
local function test_script(text, script_code)
Line 50: Line 39:




local function preprocess(data)
local spacingPunctuation = "[%s%p]+"
--[[ List of punctuation or spacing characters that are found inside of words.
Used to exclude characters from the regex above. ]]
local wordPunc = "-־׳״'.·*’་•"
local notWordPunc = "[^" .. wordPunc .. "]+"
 
 
-- Return true if the given head is multiword according to the algorithm used
-- in full_headword().
function export.head_is_multiword(head)
for possibleWordBreak in mw.ustring.gmatch(head, spacingPunctuation) do
if mw.ustring.find(possibleWordBreak, notWordPunc) then
return true
end
end
 
return false
end
 
 
-- Add links to a multiword head.
function export.add_multiword_links(head)
local function workaround_to_exclude_chars(s)
return mw.ustring.gsub(s, notWordPunc, "]]%1[[")
end
head = "[["
.. mw.ustring.gsub(
head,
spacingPunctuation,
workaround_to_exclude_chars
)
.. "]]"
--[=[
use this when workaround is no longer needed:
head = "[["
.. mw.ustring.gsub(head, WORDBREAKCHARS, "]]%1[[")
.. "]]"
Remove any empty links, which could have been created above
at the beginning or end of the string.
]=]
head = mw.ustring.gsub(head, "%[%[%]%]", "")
return head
end
 
 
local function non_categorizable()
return (title:inNamespace("") and title.text:find("^Unsupported titles/"))
or (title:inNamespace("Appendix") and title.text:find("^Gestures/"))
end
 
 
local function preprocess(data, postype)
--[=[
--[=[
[[Special:WhatLinksHere/Template:tracking/headword/heads-not-table]]
[[Special:WhatLinksHere/Template:tracking/headword/heads-not-table]]
Line 57: Line 99:
if type(data.heads) ~= "table" then
if type(data.heads) ~= "table" then
if data.heads then
if data.heads then
require("Module:debug").track("headword/heads-not-table")
require("Module:debug/track")("headword/heads-not-table")
end
end
Line 65: Line 107:
if type(data.translits) ~= "table" then
if type(data.translits) ~= "table" then
if data.translits then
if data.translits then
require("Module:debug").track("headword/translits-not-table")
require("Module:debug/track")("headword/translits-not-table")
end
end
Line 73: Line 115:
if type(data.transcriptions) ~= "table" then
if type(data.transcriptions) ~= "table" then
if data.transcriptions then
if data.transcriptions then
require("Module:debug").track("headword/transcriptions-not-table")
require("Module:debug/track")("headword/transcriptions-not-table")
end
end
Line 92: Line 134:
local default_head
local default_head
if is_reconstructed then
if is_reconstructed then
-- default_head = require("Module:utilities").plain_gsub(pagename, data.lang:getCanonicalName() .. "/", "")
default_head = require("Module:utilities").plain_gsub(pagename, data.lang:getCanonicalName() .. "/", "")
default_head = mw.ustring.lower(subpagename)
else
else
default_head = mw.ustring.lower(subpagename)
default_head = subpagename
end
end
 
local unmodified_default_head = default_head
 
-- Add links to multi-word page names when appropriate
-- Add links to multi-word page names when appropriate
if data.lang:getCode() ~= "zh" then
if data.lang:getCode() ~= "zh" and (not is_reconstructed) and
local spacingPunctuation = "[%s%p]+"
export.head_is_multiword(default_head) then
--[[ List of punctuation or spacing characters that are found inside of words.
default_head = export.add_multiword_links(default_head)
Used to exclude characters from the regex above. ]]
local wordPunc = "-־׳״'.·*’ ་"
local notWordPunc = "[^" .. wordPunc .. "]+"
local contains_words = false
for possibleWordBreak in mw.ustring.gmatch(default_head, spacingPunctuation) do
if mw.ustring.find(possibleWordBreak, notWordPunc) then
contains_words = true
break
end
end
if (not is_reconstructed) and contains_words then
local function workaround_to_exclude_chars(s)
return mw.ustring.gsub(s, notWordPunc, "]]%1[[")
end
default_head = "[["
.. mw.ustring.gsub(
default_head,
spacingPunctuation,
workaround_to_exclude_chars
)
.. "]]"
--
--[=[use this when workaround is no longer needed:
default_head = "[["
.. mw.ustring.gsub(default_head, WORDBREAKCHARS, "]]%1[[")
.. "]]"
--[=[
Remove any empty links, which could have been created above
at the beginning or end of the string.
]=]
default_head = mw.ustring.gsub(default_head, "%[%[%]%]", "")
end
end
end
Line 154: Line 162:
data.heads[i] = head
data.heads[i] = head
end
end
 
-- If the first head is multiword (after removing links), maybe insert into "LANG multiword terms"
if not data.nomultiwordcat and postype == "lemma" and not m_data.no_multiword_cat[data.lang:getCode()] then
-- Check for spaces or hyphens, but exclude prefixes and suffixes.
-- Use the pagename, not the head= value, because the latter may have extra
-- junk in it, e.g. superscripted text that throws off the algorithm.
local checkpattern = ".[%s%-፡]."
if m_data.hyphen_not_multiword_sep[data.lang:getCode()] then
-- Exclude hyphens if the data module states that they should for this language
checkpattern = ".[%s፡]."
end
if mw.ustring.find(unmodified_default_head, checkpattern) and not non_categorizable() then
table.insert(data.categories, data.lang:getCanonicalName() .. " multiword terms")
end
end
 
--[[ Try to detect the script if it was not provided
--[[ Try to detect the script if it was not provided
We use the first headword for this, and assume
We use the first headword for this, and assume
Line 211: Line 234:
-- Format a headword with transliterations
-- Format a headword with transliterations
local function format_headword(data)
local function format_headword(data)
local m_links = require("Module:links")
local m_scriptutils = require("Module:script utilities")
local m_scriptutils = require("Module:script utilities")
Line 217: Line 239:
-- Need to do it this way because translit[1] might be nil while translit[2] is not
-- Need to do it this way because translit[1] might be nil while translit[2] is not
local has_translits = false
local has_translits = false
local has_manual_translits = false
-- Format the headwords
-- Format the headwords
Line 222: Line 245:
if data.translits[i] or data.transcriptions[i] then
if data.translits[i] or data.transcriptions[i] then
has_translits = true
has_translits = true
end
if data.translits[i] and data.translits[i].is_manual or data.transcriptions[i] then
has_manual_translits = true
end
end
-- Apply processing to the headword, for formatting links and such
-- Apply processing to the headword, for formatting links and such
if head:find("[[", nil, true) and (not data.sc or data.sc:getCode() ~= "Imag") then
if head:find("[[", nil, true) and (not data.sc or data.sc:getCode() ~= "Imag") then
head = m_links.language_link({term = head, lang = data.lang}, false)
head = require("Module:links").language_link({term = head, lang = data.lang}, false)
end
end
Line 240: Line 266:
local translits_formatted = ""
local translits_formatted = ""
 
if has_manual_translits then
-- [[Special:WhatLinksHere/Template:tracking/headword/has-manual-translit/LANG]]
require("Module:debug/track")("headword/has-manual-translit/" .. data.lang:getCode())
end
if has_translits then
if has_translits then
-- Format the transliterations
-- Format the transliterations
local translits = data.translits
local translits = data.translits
Line 289: Line 321:
local function format_genders(data)
local function format_genders(data)
if data.genders and #data.genders > 0 then
if data.genders and #data.genders > 0 then
local gen = require("Module:gender and number")
local pos_for_cat
return " " .. gen.format_list(data.genders, data.lang)
if not data.nogendercat and not m_data.no_gender_cat[data.lang:getCode()] then
local pos_category = data.pos_category:gsub("^reconstructed ", "")
pos_for_cat = m_data.pos_for_gender_number_cat[pos_category]
end
local text, cats = require("Module:gender and number").format_genders(data.genders, data.lang, pos_for_cat)
for _, cat in ipairs(cats) do
table.insert(data.categories, cat)
end
return " " .. text
else
else
return ""
return ""
Line 298: Line 338:


local function format_inflection_parts(data, parts)
local function format_inflection_parts(data, parts)
local m_links = require("Module:links")
for key, part in ipairs(parts) do
for key, part in ipairs(parts) do
if type(part) ~= "table" then
if type(part) ~= "table" then
Line 305: Line 343:
end
end
local qualifiers = ""
local qualifiers
local reftext
if part.qualifiers and #part.qualifiers > 0 then
if part.qualifiers and #part.qualifiers > 0 then
Line 311: Line 350:
-- [[Special:WhatLinksHere/Template:tracking/headword/qualifier]]
-- [[Special:WhatLinksHere/Template:tracking/headword/qualifier]]
require("Module:debug").track("headword/qualifier")
require("Module:debug/track")("headword/qualifier")
end
if part.refs and #part.refs > 0 then
local refs = {}
for _, ref in ipairs(part.refs) do
if type(ref) ~= "table" then
ref = {text = ref}
end
local refargs
if ref.name or ref.group then
refargs = {name = ref.name, group = ref.group}
end
table.insert(refs, mw.getCurrentFrame():extensionTag("ref", ref.text, refargs))
end
reftext = table.concat(refs)
end
end
Line 317: Line 370:
local face = part.hypothetical and "hypothetical" or "bold"
local face = part.hypothetical and "hypothetical" or "bold"
local nolink = part.hypothetical or part.nolink
local nolink = part.hypothetical or part.nolink
if part.label then
-- There should be a better way of italicizing a label. As is, this isn't customizable.
part = "<i>" .. part.label .. "</i>"
else
-- Convert the term into a full link
-- Don't show a transliteration here, the consensus seems to be not to
-- show them in headword lines to avoid clutter.
part = require("Module:links").full_link(
{
term = not nolink and part.term or nil,
alt = part.alt or (nolink and part.term or nil),
lang = part.lang or data.lang,
sc = part.sc or parts.sc or (not part.lang and data.sc),
id = part.id,
genders = part.genders,
tr = part.translit or (not (parts.enable_auto_translit or data.inflections.enable_auto_translit) and "-" or nil),
ts = part.transcription,
accel = parts.accel or partaccel,
},
face,
false
)
end
-- Convert the term into a full link
if qualifiers then
-- Don't show a transliteration here, the consensus seems to be not to
part = qualifiers .. part
-- show them in headword lines to avoid clutter.
end
part = m_links.full_link(
if reftext then
{
part = part .. reftext
term = not nolink and part.term or nil,
end
alt = part.alt or (nolink and part.term or nil),
lang = part.lang or data.lang,
sc = part.sc or parts.sc or (not part.lang and data.sc),
id = part.id,
genders = part.genders,
tr = part.translit or (not (parts.enable_auto_translit or data.inflections.enable_auto_translit) and "-" or nil),
ts = part.transcription,
accel = parts.accel or partaccel,
},
face,
false
)
part = qualifiers .. part
parts[key] = part
parts[key] = part
Line 348: Line 411:
elseif parts.request then
elseif parts.request then
parts_output = " <small>[please provide]</small>"
parts_output = " <small>[please provide]</small>"
.. require("Module:utilities").format_categories(
.. require("Module:utilities/format_categories")(
{"Requests for inflections in " .. data.lang:getCanonicalName() .. " entries"},
{"Requests for inflections in " .. data.lang:getCanonicalName() .. " entries"},
lang,
lang,
nil,
nil,
nil,
nil,
data.force_cat_output,
data.force_cat_output or test_force_categories,
data.sc
data.sc
)
)
Line 374: Line 437:
end
end
end
end
-- Return "lemma" if the given POS is a lemma, "non-lemma form" if a non-lemma form, or nil
-- if unknown. The POS passed in must be in its plural form ("nouns", "prefixes", etc.).
-- If you have a POS in its singular form, call pluralize() in [[Module:string utilities]] to
-- pluralize it in a smart fashion that knows when to add '-s' and when to add '-es'.
--
-- If `best_guess` is given and the POS is in neither the lemma nor non-lemma list, guess
-- based on whether it ends in " forms"; otherwise, return nil.
function export.pos_lemma_or_nonlemma(plpos, best_guess)
-- Is it a lemma category?
if isLemma[plpos] or isLemma[plpos:gsub("^reconstructed ", "")] then
return "lemma"
-- Is it a nonlemma category?
elseif isNonLemma[plpos]
or isNonLemma[plpos:gsub("^reconstructed ", "")]
or isLemma[plpos:gsub("^mutated ", "")]
or isNonLemma[plpos:gsub("^mutated ", "")] then
return "non-lemma form"
elseif best_guess then
return plpos:find(" forms$") and "non-lemma form" or "lemma"
else
return nil
end
end


local function show_headword_line(data)
local function show_headword_line(data)
Line 379: Line 468:


-- Check the namespace against the language type
-- Check the namespace against the language type
--if namespace == "" then
if namespace == "" then
-- if data.lang:getType() == "reconstructed" then
if data.lang:getType() == "reconstructed" then
-- error("Entries for this language must be placed in the Reconstruction: namespace.")
error("Entries for this language must be placed in the Reconstruction: namespace.")
-- elseif data.lang:getType() == "appendix-constructed" then
elseif data.lang:getType() == "appendix-constructed" then
-- error("Entries for this language must be placed in the Appendix: namespace.")
error("Entries for this language must be placed in the Appendix: namespace.")
-- end
end
--end
end
local tracking_categories = {}
local tracking_categories = {}
Line 394: Line 483:
table.insert(data.categories, 1, pos_category)
table.insert(data.categories, 1, pos_category)
end
end
end
if data.sccat and data.sc then
table.insert(data.categories, data.lang:getCanonicalName() .. " " .. data.pos_category
.. " in " .. data.sc:getDisplayForm())
end
end
-- Is it a lemma category?
-- Is it a lemma category?
if isLemma[data.pos_category] or isLemma[data.pos_category:gsub("^reconstructed ", "")] then
local postype = export.pos_lemma_or_nonlemma(data.pos_category)
if not data.noposcat then
if not postype then
table.insert(data.categories, 1, data.lang:getCanonicalName() .. " lemmas")
-- We don't know what this category is, so tag it with a tracking category.
end
-- Is it a nonlemma category?
elseif isNonLemma[data.pos_category]
or isNonLemma[data.pos_category:gsub("^reconstructed ", "")]
or isLemma[data.pos_category:gsub("^mutated ", "")]
or isNonLemma[data.pos_category:gsub("^mutated ", "")] then
if not data.noposcat then
table.insert(data.categories, 1, data.lang:getCanonicalName() .. " non-lemma forms")
end
-- It's neither; we don't know what this category is, so tag it with a tracking category.
else
--[=[
--[=[
[[Special:WhatLinksHere/Template:tracking/headword/unrecognized pos]]
[[Special:WhatLinksHere/Template:tracking/headword/unrecognized pos]]
]=]
]=]
table.insert(tracking_categories, "head tracking/unrecognized pos")
table.insert(tracking_categories, "head tracking/unrecognized pos")
require("Module:debug").track{
require("Module:debug/track"){
"headword/unrecognized pos",
"headword/unrecognized pos",
"headword/unrecognized pos/lang/" .. data.lang:getCode(),
"headword/unrecognized pos/lang/" .. data.lang:getCode(),
"headword/unrecognized pos/pos/" .. data.pos_category
"headword/unrecognized pos/pos/" .. data.pos_category
}
}
elseif not data.noposcat then
table.insert(data.categories, 1, data.lang:getCanonicalName() .. " " .. postype .. "s")
end
end
 
-- Preprocess
-- Preprocess
preprocess(data)
preprocess(data, postype)
local m_links = require("Module:links")
if namespace == "" and data.lang:getType() ~= "reconstructed" then
if namespace == "" and data.lang:getType() ~= "reconstructed" then
local m_links = require("Module:links")
for _, head in ipairs(data.heads) do
for _, head in ipairs(data.heads) do
if title.prefixedText ~= m_links.getLinkPage(m_links.remove_links(head), data.lang) then
if title.prefixedText ~= m_links.getLinkPage(m_links.remove_links(head), data.lang) then
Line 434: Line 517:
[[Special:WhatLinksHere/Template:tracking/headword/pagename spelling mismatch]]
[[Special:WhatLinksHere/Template:tracking/headword/pagename spelling mismatch]]
]=]
]=]
require("Module:debug").track{
require("Module:debug/track"){
"headword/pagename spelling mismatch",
"headword/pagename spelling mismatch",
"headword/pagename spelling mismatch/" .. data.lang:getCode()
"headword/pagename spelling mismatch/" .. data.lang:getCode()
Line 448: Line 531:
format_genders(data) ..
format_genders(data) ..
format_inflections(data) ..
format_inflections(data) ..
require("Module:utilities").format_categories(
require("Module:utilities/format_categories")(
tracking_categories, data.lang, data.sort_key, nil, data.force_cat_output, data.sc
tracking_categories, data.lang, data.sort_key, nil,
data.force_cat_output or test_force_categories, data.sc
)
)
end
end
Line 470: Line 554:
-- Track uses of sc parameter
-- Track uses of sc parameter
local best = require("Module:scripts").findBestScript(pagename, data.lang)
local best = require("Module:scripts").findBestScript(pagename, data.lang)
require("Module:debug").track("headword/sc")
require("Module:debug/track")("headword/sc")
if data.sc:getCode() == best:getCode() then
if data.sc:getCode() == best:getCode() then
require("Module:debug").track("headword/sc/redundant")
require("Module:debug/track")("headword/sc/redundant")
require("Module:debug").track("headword/sc/redundant/" .. data.sc:getCode())
require("Module:debug/track")("headword/sc/redundant/" .. data.sc:getCode())
else
else
require("Module:debug").track("headword/sc/needed")
require("Module:debug/track")("headword/sc/needed")
require("Module:debug").track("headword/sc/needed/" .. data.sc:getCode())
require("Module:debug/track")("headword/sc/needed/" .. data.sc:getCode())
end
end
end
end
Line 483: Line 567:
local displayTitle
local displayTitle
-- Assumes that the scripts in "toBeTagged" will never occur in the Reconstruction namespace.
-- Assumes that the scripts in "toBeTagged" will never occur in the Reconstruction namespace.
if namespace == "" and data.sc and toBeTagged[data.sc:getCode()] or
-- Avoid tagging ASCII as Hani even when it is tagged as Hani in the
data.sc:getCode() == "Jpan" and (test_script(pagename, "Hira") or test_script(pagename, "Kana")) then
-- headword, as in [[check]]. The check for ASCII might need to be expanded
-- to a check for any Latin characters and whitespace or punctuation.
if (namespace == "" and data.sc and toBeTagged[data.sc:getCode()]
and not pagename:find "^[%z\1-\127]+$")
or (data.sc:getCode() == "Jpan" and (test_script(pagename, "Hira") or test_script(pagename, "Kana"))) then
displayTitle = '<span class="' .. data.sc:getCode() .. '">' .. pagename .. '</span>'
displayTitle = '<span class="' .. data.sc:getCode() .. '">' .. pagename .. '</span>'
elseif namespace == "Reconstruction" then
elseif namespace == "Reconstruction" then
Line 517: Line 605:
[[Special:WhatLinksHere/Template:tracking/headword/force cat output]]
[[Special:WhatLinksHere/Template:tracking/headword/force cat output]]
]=]
]=]
require("Module:debug").track("headword/force cat output")
require("Module:debug/track")("headword/force cat output")
end
end
Line 526: Line 614:
-- Were any categories specified?
-- Were any categories specified?
if data.categories and #data.categories > 0 then
if data.categories and #data.categories > 0 then
local lang_name = require("Module:string").pattern_escape(data.lang:getCanonicalName())
local lang_name = require("Module:string/pattern_escape")(data.lang:getCanonicalName())
for _, cat in ipairs(data.categories) do
for _, cat in ipairs(data.categories) do
-- Does the category begin with the language name? If not, tag it with a tracking category.
-- Does the category begin with the language name? If not, tag it with a tracking category.
Line 536: Line 624:
[[Special:WhatLinksHere/Template:tracking/head tracking/no lang category]]
[[Special:WhatLinksHere/Template:tracking/head tracking/no lang category]]
]=]
]=]
require("Module:debug").track{
require("Module:debug/track"){
"headword/no lang category",
"headword/no lang category",
"headword/no lang category/lang/" .. data.lang:getCode()
"headword/no lang category/lang/" .. data.lang:getCode()
Line 564: Line 652:
if standard then
if standard then
if mw.ustring.len(title.subpageText) ~= 1 and not mw.ustring.match(title.text, "^Unsupported titles/") then
if mw.ustring.len(title.subpageText) ~= 1 and not non_categorizable() then
for character in mw.ustring.gmatch(title.subpageText, "([^" .. standard .. "])") do
for character in mw.ustring.gmatch(title.subpageText, "([^" .. standard .. "])") do
local upper = mw.ustring.upper(character)
local upper = mw.ustring.upper(character)
Line 579: Line 667:
-- Categorise for palindromes
-- Categorise for palindromes
if title.nsText ~= "Reconstruction"
if title.nsText ~= "Reconstruction" and mw.ustring.len(title.subpageText)>2
and require('Module:palindromes').is_palindrome(
and require('Module:palindromes').is_palindrome(
title.subpageText, data.lang, data.sc
title.subpageText, data.lang, data.sc
Line 585: Line 673:
table.insert(data.categories, data.lang:getCanonicalName() .. " palindromes")
table.insert(data.categories, data.lang:getCanonicalName() .. " palindromes")
end
end
 
-- This may add more categories (e.g. gender categories), so make sure it gets
-- evaluated first.
local text = show_headword_line(data)
return
return
show_headword_line(data) ..
text ..
require("Module:utilities").format_categories(
require("Module:utilities/format_categories")(
data.categories, data.lang, data.sort_key, nil, data.force_cat_output, data.sc
data.categories, data.lang, data.sort_key, nil,
data.force_cat_output or test_force_categories, data.sc
) ..
) ..
require("Module:utilities").format_categories(
require("Module:utilities/format_categories")(
tracking_categories, data.lang, data.sort_key, nil, data.force_cat_output, data.sc
tracking_categories, data.lang, data.sort_key, nil,
data.force_cat_output or test_force_categories, data.sc
)
)
end
end


return export
return export

Revision as of 00:33, 10 April 2022

This module is used to show headword lines, along with any annotations like genders, transliterations and inflections. It's used by the template {{head}}, via the submodule Module:headword/templates. It's also used by many other headword modules; for a full list, see Category:Headword-line modules. Some of the data used by this module is found in Module:headword/data.

export.head_is_multiword

function export.head_is_multiword(head)

This function lacks documentation. Please add a description of its usages, inputs and outputs, or its difference from similar functions, or make it local to remove it from the function list.

export.add_multiword_links

function export.add_multiword_links(head)

This function lacks documentation. Please add a description of its usages, inputs and outputs, or its difference from similar functions, or make it local to remove it from the function list.

export.pos_lemma_or_nonlemma

function export.pos_lemma_or_nonlemma(plpos, best_guess)

This function lacks documentation. Please add a description of its usages, inputs and outputs, or its difference from similar functions, or make it local to remove it from the function list.

export.full_headword

function export.full_headword(data)

This function lacks documentation. Please add a description of its usages, inputs and outputs, or its difference from similar functions, or make it local to remove it from the function list.

Further explanations for full_headword()

The sole argument, data, is a table containing the following items (WARNING: they will be destructively modified):

{
	lang = language_object,
	pagename = nil or "pagename",
	heads = { "head1", "head2", "head3", ... } or {
		{
			term = nil or "head1",
			tr = nil or "translit1",
			ts = nil or "transcription1",
			sc = nil or script_object,
			q = nil or {"left_qualifier1", "left_qualifier2", ...},
			qq = nil or {"right_qualifier1", "right_qualifier2", ...},
			refs = nil or {{text = "ref_text1" or "", name = nil or "ref_name1", group = nil or "ref_group1"}, ...},
			separator = nil or "separator",
		},
		...
	},
	translits = { [1] = "translit1", [3] = "translit3", ... },
	transcriptions = { [2] = "transcription2", [3] = "transcription3", ... },
	sc = script_object,
	inflections = {
		enable_auto_translit = boolean,
		{ label = "grammatical_category", "inflected_form1", "inflected_form2", ... },
		{ label = "grammatical_category", accel = {form = "tag|tag", lemma = "lémma"}, "inflected_form1", "inflected_form2", ... },
		{
			label = "grammatical_category",
			accel = {
				form = "tag|tag",
				target = "form_target",
				tr = nil or "form_manual_translit",
				gender = "gender_spec" or {"gender_spec1", "gender_spec2", ...},
				pos = "form_part_of_speech",
				lemma = nil or "lémma",
				lemma_translit = nil or "lemma_manual_translit",
				no_store = boolean,
			},
			sc = nil or inflection_specific_script_object,
			enable_auto_translit = boolean,
			"inflected_form1",
			{
				term = "inflected_form2",
				alt = nil or "display_text",
				translit = nil or "manual_transliteration",
				transcription = nil or "manual_transcription",
				gender = {"gender1", "gender2", {spec = "gender3", qualifiers = nil or {"qualifier1", "qualifier2", ... }}},
				accel = {form = "tag|tag|tag", lemma = "lemma_of_inflected_form", lemma_translit = "manual_translit" },
				lang = nil or term_specific_lang_object,
				sc = nil or term_specific_script_object,
				id = "sense_id",
				q = nil or {"left_qualifier1", "left_qualifier2", ... },
				qq = nil or {"right_qualifier1", "right_qualifier2", ... },
				refs = nil or {{text = "ref_text1" or "", name = nil or "ref_name1", group = nil or "ref_group1"}, ...},
				separator = nil or "separator",
				nolink = boolean,
				hypothetical = boolean,
			},
			{
				label = "raw_textual_label",
				q = nil or {"left_qualifier1", "left_qualifier2", ... },
				qq = nil or {"right_qualifier1", "right_qualifier2", ... },
				refs = nil or {{text = "ref_text1" or "", name = nil or "ref_name1", group = nil or "ref_group1"}, ...},
				separator = nil or "separator",
			},
			...
		},
		{ label = "grammatical_category", request = true },
		...
	},
	genders = {
		"gender1",
		{spec = "gender2", qualifiers = {"qualifier1", "qualifier2", ...}},
		...
	},
	pos_category = "plural_part_of_speech",
	categories = { "category1", "category2", ... },
	whole_page_categories = { "category1", "category2", ... },
	force_cat_output = boolean,
	sccat = boolean,
	noposcat = boolean,
	nogendercat = boolean,
	nomultiwordcat = boolean,
	nopalindromecat = boolean,
	nolink = boolean,
	sort_key = "sort_key",
	id = "sense_id",
}

Further explanation:

  • is required and is a language object from Module:languages corresponding to a given language. For example, use to retrieve the object corresponding to Russian.
    • is optional and allows you to override the pagename used variously in the module (e.g. as the default value when a head is omitted, for setting categories such as palindromes and terms spelled with CHAR, etc.).
      • is a table listing the heads of the headword. Each element is either a string specifying only the headword itself (old-style), or an object specifying all the properties of the headword (new-style). You cannot mix and match these two styles; all elements should be of one type or the other. If no heads are specified at all ( is omitted or is an empty array), a default head is set based on the assumed pagename (either the actual pagename or the value of , if set). When using old-style head strings, a given head in the array can be , in which case a default head is set as above. When head objects are used, a given object can have the following properties:
          • : A string specifying the headword. This can be omitted, in which case a default head is set as above. Explicit headwords are generally used to specify extra diacritics (in languages with such diacritics, e.g. Russian, Arabic, Latin, Ancient Greek, Old English, etc.), or to link individual words of a multiword term, particularly when the words are inflected forms. Note that by default, each word is linked individually to itself, so there is no need to specify links for a term like a golden key can open any door. Some additional notes:
                • If a headword string contains wikilinks, they are converted into language-section links for the given language (using Module:links#language_link, which is also used by {{l}}). For example, giving , if the language provided is English, will produce: . If string is prefixed with * or if any of the links are, then they are interpreted as reconstructed terms and it will create links to the Reconstruction namespace as appropriate.
                      • If the page name contains spaces or punctuation marks (except for punctuation marks that are used inside of words), it is split and each individual word is automatically wikilinked as above.
                      • If the current page is in the Reconstruction: namespace, then an asterisk will be prepended to the headword to indicate that it is a reconstructed term.
                          • : A string specifying the transliteration of the headword. This is only needed when the headword is in a non-Latin script, and even then only when the automatic transliteration specified using the language's transliteration module is incorrect (or the language has no transliteration module, such as with Persian and Hebrew). For languages with a transliteration module, pass in to suppress the transliteration entirely.
                              • : A string specifying the transcription of the headword. This is only used in a few languages with non-Latin scripts where the spelling is significantly different from the pronunciation, such as Akkadian, Old Persian or Hittite. In cases like this, the transliteration usually reflects the spelling and the transcription reflects the pronunciation. For this reason, transcriptions are displayed between slashes. Transcriptions should NOT be used simply to display IPA pronunciation of a language like Russian or Arabic. Unlike for transliterations, there are no automatic transcription modules.
                                  • : An optional script object from Module:scripts corresponding to a given script, specifying the script that the headword is in. If omitted, defaults to the top-level value. Most of the time, neither the per-headword script nor top-level script need to be specified: If both are omitted, Module:scripts will determine the script(s) using the list of scripts in the language's data file and the characters that are in the headword. Specifically, if there are multiple possible scripts for a language, the script with the largest number of characters in the headword is chosen.
                                      • : An optional array specifying one of more qualifiers displayed to the left of the headword. Qualifiers are displayed in italics and with parentheses around them, and are intended to specify relevant properties of the headword, especially when there is more than one headword.
                                          • : An optional array specifying one of more qualifiers displayed to the right of the headword, as above.
                                              • : An optional array specifying one of more references (i.e. footnotes) for the headword. This is similar to using <ref>...</ref> to specify a reference/footnote after a given word in the text. Each element of the array is either a string (the text of the reference) or an object of the form . In this latter format:
                                                    • specifies the reference text (which cannot be ; use a blank string when cross-referencing to another reference);
                                                          • gives an optional name to the reference for cross-reference purposes, if the reference text is non-empty, similarly to <ref name="ref_name">ref_text</ref>; however, if the reference text is empty, it specifies a cross-reference to a previously-named reference, similarly to <ref name="ref_name"/>;
                                                                • gives an optional group to the reference for grouping purposes, similarly to <ref name="ref_name" group="ref_group">ref_text</ref>; however, if the reference text is empty, it specifies the group of a cross-reference to a previously-named reference, similarly to <ref name="ref_name" group="ref_group"/>.
                                                                    • : The separator preceding the headword. If omitted, the default value is  <i>or</i>  (i.e. the italicized word or surrounded by spaces) for the second and higher headword, and a blank string for the first headword. Use a blank string to request no separator at all.
                                                                      • is an optional table listing the transliterations corresponding to each headword in , when old-style head strings are used; omitting this field is equivalent to setting it to an empty list. If new-style head objects are used, this field must be omitted. The Nth numbered entry should be either a string specifying the transliteration of headword N, or may be omitted, as with the property described above. Note that, if there are multiple headwords, the table in might have entries in the middle of the list that are . A list of this sort cannot be created with , as attempting to insert this way does nothing. Instead, each transliteration must be explicitly assigned using a number as index, e.g. ; here, item is , because no value was assigned to it.
                                                                        • is an optional table listing the transcriptions corresponding to each headword in , when old-style head strings are used; omitting this field is equivalent to setting it to an empty list. If new-style head objects are used, this field must be omitted. It is of the same format as , and can have holes in it as needed. The meaning of the transcription field is as described abobe for .
                                                                          • is an optional script object from Module:scripts corresponding to a given script. If specified, this applies equally to all heads specified using ; if you need to specify per-head scripts, use the head object format documented above. Most of the time you can omit this item, and Module:scripts will determine the script(s) as specified above for the headword property.
                                                                            • is a table listing the gender/number specifications for the headwords. This can be omitted for no genders or numbers. Each element is either a string specifying a gender/number spec, or a table of the form . In either case, the accepted values for genders or numbers are given in Module:gender and number; examples are for masculine, for feminine animate plural and for noun class 2 in languages such as Swahili that have noun classes. If the format with qualifiers is given, the qualifiers are displayed to the left of the gender/number specification. Categories are automatically added according to the specific genders, e.g. LANG masculine nouns for the language specified in if the gender is masculine and the part of speech (see below) is nouns or reconstructed nouns. To suppress the addition of these categories, specify .
                                                                              • is a table listing the inflections to be displayed in the headword entry. The format of this table is somewhat complex and is described below under format_inflections.
                                                                                • is the part-of-speech category for the entry. This is one of the lemma and nonlemma parts of speech listed in Module:headword/data. It should be in the plural: for example, . If this item is omitted, the part of speech category must be included in as the first item in .
                                                                                  • is a table listing the categories to which the entry containing the headword will be added. The first category should be a part-of-speech category, with the canonical name of the language at the beginning – – unless the part of speech is given in the field .
                                                                                    • is a table listing language-agnostic categories to which the page will be added, which it is nevertheless useful for the headword module to handle (e.g. Category:Unsupported titles). Because they are not tied to a language, pages in them should be sorted according to their {{DEFAULTSORT:}} values for the sake of consistency. Note that some of these - including "Category:Unsupported titles" - are already handled automatically.
                                                                                      • is a string specifying a sort key for the categories listed in . Sort keys should usually be omitted, because the format_categories function in Module:utilities will generate a suitable sortkey in most cases. The sortkey is used to ensure that the page is listed in the correct order in the categories to which it belongs.
                                                                                        • is a boolean value determining whether or not to link the forms of the entire headword. Not to be confused with , which disables linking only for one of the forms. It is used, for example, by Module:la-headword for reconstructed terms.

                                                                                          Examples

                                                                                          A simple example

                                                                                          full_headword{
                                                                                          	lang = require("Module:languages").getByCode("en"),	-- language code
                                                                                          	heads = {"book"},									-- headwords
                                                                                          	inflections = {
                                                                                          		{label = "plural", "books"}						-- inflections
                                                                                          	},
                                                                                          	categories = {"English nouns"},						-- part-of-speech category
                                                                                          }
                                                                                          

                                                                                          might give (depending on the page it's run on):

                                                                                          <strong class="Latn headword" lang="en">book</strong> (''plural'' <b class="Latn" lang="en">[[books#English|books]]</b>)[[Category:English lemmas|HEADWORD]][[Category:English nouns|HEADWORD]]
                                                                                          

                                                                                          which displays as:

                                                                                          book (plural books)

                                                                                          A fuller example

                                                                                          full_headword{
                                                                                          	lang = require("Module:languages").getByCode("de"),
                                                                                          	heads = {"Hund"},
                                                                                          	genders = {"m"},
                                                                                          	inflections = {
                                                                                          		{label = "genitive", "Hundes", "Hunds"},
                                                                                                  {label = "plural", "Hunde", {term="Hünde", q="nonstandard"}},
                                                                                          		{label = "diminutive",
                                                                                          			{term = "Hündchen", genders = {"n"}},
                                                                                          			{nolink=true, term = "Hündlein", genders = {"n"}}
                                                                                          		}
                                                                                          	},
                                                                                          	categories = {"German nouns"},
                                                                                          }
                                                                                          

                                                                                          might give (depending on the page it's run on):

                                                                                          <strong class="Latn headword" lang="de">Hund</strong>&nbsp;<span class="gender"><abbr title="masculine gender">m</abbr></span> (''genitive'' <b class="Latn" lang="de">[[Hundes#German|Hundes]]</b> ''or'' <b class="Latn" lang="de">[[Hunds#German|Hunds]]</b>, ''plural'' <b class="Latn" lang="de">[[Hunde#German|Hunde]] </b>''or (nonstandard)''<b> [[Hünde#German|Hünde]]</b>, ''diminutive'' <b class="Latn" lang="de">[[Hündchen#German|Hündchen]]</b>&nbsp;<span class="gender"><abbr title="neuter gender">n</abbr></span> ''or'' <b class="Latn" lang="de">Hündlein</b>&nbsp;<span class="gender"><abbr title="neuter gender">n</abbr></span>)[[Category:German lemmas|HEADWORD]][[Category:German nouns|HEADWORD]]
                                                                                          

                                                                                          which displays as:

                                                                                          Hund m (genitive Hundes or Hunds, plural Hunde or (nonstandard) Hünde, diminutive Hündchen n or Hündlein n)

                                                                                          An example in a non-Latin script

                                                                                          This example is in Russian, which has automatic transliteration:

                                                                                          full_headword{
                                                                                          	lang = require("Module:languages").getByCode("ru"),
                                                                                          	heads = {"кни́га"},
                                                                                          	genders = {"f-in"},
                                                                                          	inflections = {
                                                                                          		{label = "genitive", "кни́ги"},
                                                                                          		{label = "nominative plural", "кни́ги"},
                                                                                          		{label = "genitive plural", "книг"}
                                                                                          	},
                                                                                          	categories = {"Russian nouns"},
                                                                                          }
                                                                                          

                                                                                          might give (depending on the page it's run on):

                                                                                          <strong class="Cyrl headword" lang="ru">кни́га</strong> [[Wiktionary:Russian transliteration|•]] (<span class="tr" lang=""><span class="tr" lang="">kníga</span></span>)&nbsp;<span class="gender"><abbr title="feminine gender">f</abbr>&nbsp;<abbr title="inanimate">inan</abbr></span> (''genitive'' <b class="Cyrl" lang="ru">[[книги#Russian|кни́ги]]</b>, ''nominative plural'' <b class="Cyrl" lang="ru">[[книги#Russian|кни́ги]]</b>, ''genitive plural'' <b class="Cyrl" lang="ru">[[книг#Russian|книг]]</b>)[[Category:Russian lemmas|HEADWORD]][[Category:Russian nouns|HEADWORD]]
                                                                                          

                                                                                          which displays as

                                                                                          кни́га (knígaf inan (genitive кни́ги, nominative plural кни́ги, genitive plural книг)

                                                                                          Note a few things about the transliteration:

                                                                                          • If the transliteration is specified and non-empty, Module:headword adds some stuff before and after it. For example, if the transliteration is and the language is Hebrew, produces
                                                                                             [[Wiktionary:Hebrew transliteration|•]] (<span lang="">foo</span>)
                                                                                            
                                                                                            which looks like “ (foo)”.

                                                                                            A fuller example in a non-Latin script

                                                                                            This example is in Russian, with two headwords, each of which requires manual transliteration:

                                                                                            full_headword{
                                                                                            	lang = require("Module:languages").getByCode("ru"),
                                                                                            	heads = {
                                                                                            		{term = "интервьюе́р", tr = "intɛrvʹjuér"},
                                                                                            		{term = "интервью́ер", "intɛrvʹjújer"},
                                                                                            	},
                                                                                            	genders = {"m-an"},
                                                                                            	inflections = {
                                                                                            		{label = "genitive", "интервьюе́ра", "интервью́ера"},
                                                                                            		{label = "nominative plural", "интервьюе́ры", "интервью́еры"},
                                                                                            		{label = "genitive plural", "интервьюе́ров", "интервью́еров"},
                                                                                            	},
                                                                                            	categories = {"Russian nouns"},
                                                                                            }
                                                                                            

                                                                                            might give (depending on the page it's run on):

                                                                                            <strong class="Cyrl headword" lang="ru">интервьюе́р</strong> ''or'' <strong class="Cyrl headword" lang="ru">интервью́ер</strong> [[Wiktionary:Russian transliteration|•]] (<span class="tr" lang=""><span class="tr" lang="">intɛrvʹjuér</span> ''or'' <span class="tr" lang="">intɛrvʹjújer</span></span>)&nbsp;<span class="gender"><abbr title="masculine gender">m</abbr>&nbsp;<abbr title="animate">anim</abbr></span> (''genitive'' <b class="Cyrl" lang="ru">[[интервьюера#Russian|интервьюе́ра]]</b> ''or'' <b class="Cyrl" lang="ru">[[интервьюера#Russian|интервью́ера]]</b>, ''nominative plural'' <b class="Cyrl" lang="ru">[[интервьюеры#Russian|интервьюе́ры]]</b> ''or'' <b class="Cyrl" lang="ru">[[интервьюеры#Russian|интервью́еры]]</b>, ''genitive plural'' <b class="Cyrl" lang="ru">[[интервьюеров#Russian|интервьюе́ров]]</b> ''or'' <b class="Cyrl" lang="ru">[[интервьюеров#Russian|интервью́еров]]</b>)[[Category:Russian lemmas|HEADWORD]][[Category:Russian nouns|HEADWORD]]
                                                                                            

                                                                                            which displays as

                                                                                            интервьюе́р or интервью́ер (intɛrvʹjuér or intɛrvʹjújerm anim (genitive интервьюе́ра or интервью́ера, nominative plural интервьюе́ры or интервью́еры, genitive plural интервьюе́ров or интервью́еров)

                                                                                            Another fuller example in a non-Latin script

                                                                                            This example is in Arabic, with embedded links in the headword, manual transliteration in an inflection and use of :
                                                                                            full_headword{
                                                                                            	lang = require("Module:languages").getByCode("ar"),
                                                                                            	heads = {
                                                                                            		{term = "[[غُدّة]] [[بَصَلِيّ|بَصَلِيّة]] [[إحْلِيلِيّ|إحْلِيلِيّة]]", tr = "ḡudda baṣaliyya ʾiḥlīliyya"},
                                                                                            	},
                                                                                            	genders = {"f"},
                                                                                            	inflections = {
                                                                                            		enable_auto_translit = true,
                                                                                            		{label = "plural", {term="غُدَد بَصَلِيَّة إِحْلِيلِيَة", translit="ḡudad baṣaliyya ʾiḥlīliyya"}},
                                                                                            	},
                                                                                            	categories = {"Arabic nouns"},
                                                                                            }
                                                                                            

                                                                                            might give (depending on the page it's run on):

                                                                                            <strong class="Arab headword" lang="ar">[[غدة#Arabic|غُدّة]] [[بصلي#Arabic|بَصَلِيّة]] [[إحليلي#Arabic|إحْلِيلِيّة]]</strong> [[Wiktionary:Arabic transliteration|•]] (<span class="tr" lang=""><span class="tr" lang="">ḡudda baṣaliyya ʾiḥlīliyya</span></span>)&nbsp;<span class="gender"><abbr title="feminine gender">f</abbr></span> (''plural'' <b class="Arab" lang="ar">[[غدد بصلية إحليلية#Arabic|غُدَد بَصَلِيَّة إِحْلِيلِيَة]]</b> (<span lang="" class="tr">ḡudad baṣaliyya ʾiḥlīliyya</span>))[[Category:Arabic lemmas|HEADWORD]][[Category:Arabic nouns|HEADWORD]]
                                                                                            

                                                                                            which displays as

                                                                                            غُدّة بَصَلِيّة إحْلِيلِيّة (ḡudda baṣaliyya ʾiḥlīliyyaf (plural غُدَد بَصَلِيَّة إِحْلِيلِيَة (ḡudad baṣaliyya ʾiḥlīliyya))

                                                                                            Proposed/planned changes

                                                                                            • Checking for invalid genders, given a list of genders that are valid for a particular language.

local export = {}

local m_data = mw.loadData("Module:headword/data")

local title = mw.title.getCurrentTitle()

local isLemma = m_data.lemmas
local isNonLemma = m_data.nonlemmas
local notranslit = m_data.notranslit
local toBeTagged = m_data.toBeTagged

-- If set to true, categories always appear, even in non-mainspace pages
local test_force_categories = false

local function test_script(text, script_code)
	if type(text) == "string" and type(script_code) == "string" then
		local sc = require("Module:scripts").getByCode(script_code)
		local characters
		if sc then
			characters = sc:getCharacters()
		end
		
		local out
		if characters then
			text = mw.ustring.gsub(text, "%W", "")
			out = mw.ustring.find(text, "[" .. characters .. "]")
		end
		
		if out then
			return true
		else
			return false
		end
	else
		mw.log("Parameters to test_script were incorrect.")
		return nil
	end
end


local spacingPunctuation = "[%s%p]+"
--[[ List of punctuation or spacing characters that are found inside of words.
	 Used to exclude characters from the regex above. ]]
local wordPunc = "-־׳״'.·*’་•"
local notWordPunc = "[^" .. wordPunc .. "]+"


-- Return true if the given head is multiword according to the algorithm used
-- in full_headword().
function export.head_is_multiword(head)
	for possibleWordBreak in mw.ustring.gmatch(head, spacingPunctuation) do
		if mw.ustring.find(possibleWordBreak, notWordPunc) then
			return true
		end
	end

	return false
end


-- Add links to a multiword head.
function export.add_multiword_links(head)
	local function workaround_to_exclude_chars(s)
		return mw.ustring.gsub(s, notWordPunc, "]]%1[[")
	end
	
	head = "[["
		.. mw.ustring.gsub(
			head,
			spacingPunctuation,
			workaround_to_exclude_chars
			)
		.. "]]"
	--[=[
	use this when workaround is no longer needed:
	head = "[["
		.. mw.ustring.gsub(head, WORDBREAKCHARS, "]]%1[[")
		.. "]]"
	
	Remove any empty links, which could have been created above
	at the beginning or end of the string.
	]=]
	head = mw.ustring.gsub(head, "%[%[%]%]", "")
	return head
end


local function non_categorizable()
	return (title:inNamespace("") and title.text:find("^Unsupported titles/"))
		or (title:inNamespace("Appendix") and title.text:find("^Gestures/"))
end


local function preprocess(data, postype)
	--[=[
	[[Special:WhatLinksHere/Template:tracking/headword/heads-not-table]]
	[[Special:WhatLinksHere/Template:tracking/headword/translits-not-table]]
	]=]
	if type(data.heads) ~= "table" then
		if data.heads then
			require("Module:debug/track")("headword/heads-not-table")
		end
		
		data.heads = { data.heads }
	end
	
	if type(data.translits) ~= "table" then
		if data.translits then
			require("Module:debug/track")("headword/translits-not-table")
		end
		
		data.translits = { data.translits }
	end
	
	if type(data.transcriptions) ~= "table" then
		if data.transcriptions then
			require("Module:debug/track")("headword/transcriptions-not-table")
		end
		
		data.transcriptions = { data.transcriptions }
	end
	
	if not data.heads or #data.heads == 0 then
		data.heads = {""}
	end
	
	-- Determine if term is reconstructed
	local is_reconstructed = data.lang:getType() == "reconstructed"
		or title.nsText == "Reconstruction"
	
	-- Create a default headword.
	local subpagename = title.subpageText
	local pagename = title.text
	local default_head
	if is_reconstructed then
		default_head = require("Module:utilities").plain_gsub(pagename, data.lang:getCanonicalName() .. "/", "")
	else
		default_head = subpagename
	end

	local unmodified_default_head = default_head

	-- Add links to multi-word page names when appropriate
	if data.lang:getCode() ~= "zh" and (not is_reconstructed) and
			export.head_is_multiword(default_head) then
		default_head = export.add_multiword_links(default_head)
	end
	
	if is_reconstructed then
		default_head = "*" .. default_head
	end
	
	-- If a head is the empty string "", then replace it with the default
	for i, head in ipairs(data.heads) do
		if head == "" then
			head = default_head
		else
			if head == default_head and data.lang:getCanonicalName() == "English" then
				table.insert(data.categories, data.lang:getCanonicalName() .. " terms with redundant head parameter")
			end			
		end
		data.heads[i] = head
	end

	-- If the first head is multiword (after removing links), maybe insert into "LANG multiword terms"
	if not data.nomultiwordcat and postype == "lemma" and not m_data.no_multiword_cat[data.lang:getCode()] then
		-- Check for spaces or hyphens, but exclude prefixes and suffixes.
		-- Use the pagename, not the head= value, because the latter may have extra
		-- junk in it, e.g. superscripted text that throws off the algorithm.
		local checkpattern = ".[%s%-፡]."
		if m_data.hyphen_not_multiword_sep[data.lang:getCode()] then
			-- Exclude hyphens if the data module states that they should for this language
			checkpattern = ".[%s፡]."
		end
		if mw.ustring.find(unmodified_default_head, checkpattern) and not non_categorizable() then
			table.insert(data.categories, data.lang:getCanonicalName() .. " multiword terms")
		end
	end

	--[[	Try to detect the script if it was not provided
			We use the first headword for this, and assume
			that all of them have the same script
			This *should* always be true, right?		]]
	if not data.sc then
		data.sc = require("Module:scripts").findBestScript(data.heads[1], data.lang)
	end
	
	for i, val in pairs(data.translits) do
		data.translits[i] = {display = val, is_manual = true}
	end
	
	-- Make transliterations
	for i, head in ipairs(data.heads) do
		local translit = data.translits[i]
		
		-- Try to generate a transliteration if necessary
		-- Generate it if the script is not Latn or similar, and if no transliteration was provided
		if translit and translit.display == "-" then
			translit = nil
		elseif not translit and not (data.sc:getCode():find("Latn", nil, true) or data.sc:getCode() == "Latinx" or data.sc:getCode() == "None") and (not data.sc or data.sc:getCode() ~= "Imag") then
			translit = data.lang:transliterate(require("Module:links").remove_links(head), data.sc)
			
			-- There is still no transliteration?
			-- Add the entry to a cleanup category.
			if not translit and not notranslit[data.lang:getCode()] then
				translit = "<small>transliteration needed</small>"
				table.insert(data.categories, "Requests for transliteration of " .. data.lang:getCanonicalName() .. " terms")
			end
			
			if translit then
				translit = {display = translit, is_manual = false}
			end
		end
		
		-- Link to the transliteration entry for languages that require this
		if translit and data.lang:link_tr() then
			translit.display = require("Module:links").full_link{
				term = translit.display,
				lang = data.lang,
				sc = require("Module:scripts").getByCode("Latn"),
				tr = "-"
				}
		end
		
		data.translits[i] = translit
	end
	
	if data.id and type(data.id) ~= "string" then
		error("The id in the data table should be a string.")
	end
end


-- Format a headword with transliterations
local function format_headword(data)
	local m_scriptutils = require("Module:script utilities")
	
	-- Are there non-empty transliterations?
	-- Need to do it this way because translit[1] might be nil while translit[2] is not
	local has_translits = false
	local has_manual_translits = false
	
	-- Format the headwords
	for i, head in ipairs(data.heads) do
		if data.translits[i] or data.transcriptions[i] then
			has_translits = true
		end
		if data.translits[i] and data.translits[i].is_manual or data.transcriptions[i] then
			has_manual_translits = true
		end
		
		-- Apply processing to the headword, for formatting links and such
		if head:find("[[", nil, true) and (not data.sc or data.sc:getCode() ~= "Imag") then
			head = require("Module:links").language_link({term = head, lang = data.lang}, false)
		end
		
		-- Add language and script wrapper
		if i == 1 then
			head = m_scriptutils.tag_text(head, data.lang, data.sc, "head", nil, data.id)
		else
			head = m_scriptutils.tag_text(head, data.lang, data.sc, "head", nil)
		end
		
		data.heads[i] = head
	end
	
	local translits_formatted = ""

	if has_manual_translits then
		-- [[Special:WhatLinksHere/Template:tracking/headword/has-manual-translit/LANG]]
		require("Module:debug/track")("headword/has-manual-translit/" .. data.lang:getCode())
	end
		
	if has_translits then

		-- Format the transliterations
		local translits = data.translits
		local transcriptions = data.transcriptions
		
		if translits then
			-- using pairs() instead of ipairs() in case there is a gap
			for i, _ in pairs(translits) do
				if type(i) == "number" then
					translits[i] = m_scriptutils.tag_translit(translits[i].display, data.lang:getCode(), "head", nil, translits[i].is_manual)
				end
			end
		end

		if transcriptions then
			for i, _ in pairs(transcriptions) do
				if type(i) == "number" then
					transcriptions[i] = m_scriptutils.tag_transcription(transcriptions[i], data.lang:getCode(), "head")
				end
			end
		end

		for i = 1, math.max(#translits, #transcriptions) do
			local translits_formatted = {}
			table.insert(translits_formatted, translits[i] and translits[i] or "")
			table.insert(translits_formatted, (translits[i] and transcriptions[i]) and " " or "")
			table.insert(translits_formatted, transcriptions[i] and "/" .. transcriptions[i] .. "/" or "")
			data.translits[i] = table.concat(translits_formatted)
		end
		
		translits_formatted = " (" .. table.concat(data.translits, " <i>or</i> ") .. ")"
		
		local transliteration_page = mw.title.new(data.lang:getCanonicalName() .. " transliteration", "Wiktionary")
		
		if transliteration_page then
			local success, exists = pcall(function () return transliteration_page.exists end)
			if success and exists then
				translits_formatted = " [[Wiktionary:" .. data.lang:getCanonicalName() .. " transliteration|•]]" .. translits_formatted
			end
		end
	end
	
	return table.concat(data.heads, " <i>or</i> ") .. translits_formatted
end


local function format_genders(data)
	if data.genders and #data.genders > 0 then
		local pos_for_cat
		if not data.nogendercat and not m_data.no_gender_cat[data.lang:getCode()] then
			local pos_category = data.pos_category:gsub("^reconstructed ", "")
			pos_for_cat = m_data.pos_for_gender_number_cat[pos_category]
		end
		local text, cats = require("Module:gender and number").format_genders(data.genders, data.lang, pos_for_cat)
		for _, cat in ipairs(cats) do
			table.insert(data.categories, cat)
		end
		return "&nbsp;" .. text
	else
		return ""
	end
end


local function format_inflection_parts(data, parts)
	for key, part in ipairs(parts) do
		if type(part) ~= "table" then
			part = {term = part}
		end
		
		local qualifiers
		local reftext
		
		if part.qualifiers and #part.qualifiers > 0 then
			qualifiers = require("Module:qualifier").format_qualifier(part.qualifiers) .. " "
			
			-- [[Special:WhatLinksHere/Template:tracking/headword/qualifier]]
			require("Module:debug/track")("headword/qualifier")
		end
		if part.refs and #part.refs > 0 then
			local refs = {}
			for _, ref in ipairs(part.refs) do
				if type(ref) ~= "table" then
					ref = {text = ref}
				end
				local refargs
				if ref.name or ref.group then
					refargs = {name = ref.name, group = ref.group}
				end
				table.insert(refs, mw.getCurrentFrame():extensionTag("ref", ref.text, refargs))
			end
			reftext = table.concat(refs)
		end
		
		local partaccel = part.accel
		local face = part.hypothetical and "hypothetical" or "bold"
		local nolink = part.hypothetical or part.nolink

		if part.label then
			-- There should be a better way of italicizing a label. As is, this isn't customizable.
			part = "<i>" .. part.label .. "</i>"
		else
			-- Convert the term into a full link
			-- Don't show a transliteration here, the consensus seems to be not to
			-- show them in headword lines to avoid clutter.
			part = require("Module:links").full_link(
				{
					term = not nolink and part.term or nil,
					alt = part.alt or (nolink and part.term or nil),
					lang = part.lang or data.lang,
					sc = part.sc or parts.sc or (not part.lang and data.sc),
					id = part.id,
					genders = part.genders,
					tr = part.translit or (not (parts.enable_auto_translit or data.inflections.enable_auto_translit) and "-" or nil),
					ts = part.transcription,
					accel = parts.accel or partaccel,
				},
				face,
				false
				)
		end
		
		if qualifiers then
			part = qualifiers .. part
		end
		if reftext then
			part = part .. reftext
		end
		
		parts[key] = part
	end
	
	local parts_output = ""
	
	if #parts > 0 then
		parts_output = " " .. table.concat(parts, " <i>or</i> ")
	elseif parts.request then
		parts_output = " <small>[please provide]</small>"
			.. require("Module:utilities/format_categories")(
				{"Requests for inflections in " .. data.lang:getCanonicalName() .. " entries"},
				lang,
				nil,
				nil,
				data.force_cat_output or test_force_categories,
				data.sc
				)
	end
	
	return "<i>" .. parts.label .. "</i>" .. parts_output
end

-- Format the inflections following the headword
local function format_inflections(data)
	if data.inflections and #data.inflections > 0 then
		-- Format each inflection individually
		for key, infl in ipairs(data.inflections) do
			data.inflections[key] = format_inflection_parts(data, infl)
		end
		
		return " (" .. table.concat(data.inflections, ", ") .. ")"
	else
		return ""
	end
end


-- Return "lemma" if the given POS is a lemma, "non-lemma form" if a non-lemma form, or nil
-- if unknown. The POS passed in must be in its plural form ("nouns", "prefixes", etc.).
-- If you have a POS in its singular form, call pluralize() in [[Module:string utilities]] to
-- pluralize it in a smart fashion that knows when to add '-s' and when to add '-es'.
--
-- If `best_guess` is given and the POS is in neither the lemma nor non-lemma list, guess
-- based on whether it ends in " forms"; otherwise, return nil.
function export.pos_lemma_or_nonlemma(plpos, best_guess)
	-- Is it a lemma category?
	if isLemma[plpos] or isLemma[plpos:gsub("^reconstructed ", "")] then
		return "lemma"
	-- Is it a nonlemma category?
	elseif isNonLemma[plpos]
		or isNonLemma[plpos:gsub("^reconstructed ", "")]
		or isLemma[plpos:gsub("^mutated ", "")]
		or isNonLemma[plpos:gsub("^mutated ", "")] then
		return "non-lemma form"
	elseif best_guess then
		return plpos:find(" forms$") and "non-lemma form" or "lemma"
	else
		return nil
	end
end


local function show_headword_line(data)
	local namespace = title.nsText

	-- Check the namespace against the language type
	if namespace == "" then
		if data.lang:getType() == "reconstructed" then
			error("Entries for this language must be placed in the Reconstruction: namespace.")
		elseif data.lang:getType() == "appendix-constructed" then
			error("Entries for this language must be placed in the Appendix: namespace.")
		end
	end
	
	local tracking_categories = {}

	if not data.noposcat then	
		local pos_category = data.lang:getCanonicalName() .. " " .. data.pos_category
		if pos_category ~= "Translingual Han characters" then
			table.insert(data.categories, 1, pos_category)
		end
	end

	if data.sccat and data.sc then
		table.insert(data.categories, data.lang:getCanonicalName() .. " " .. data.pos_category
			.. " in " .. data.sc:getDisplayForm())
	end
	
	-- Is it a lemma category?
	local postype = export.pos_lemma_or_nonlemma(data.pos_category)
	if not postype then
		-- We don't know what this category is, so tag it with a tracking category.
		--[=[
		[[Special:WhatLinksHere/Template:tracking/headword/unrecognized pos]]
		]=]
		table.insert(tracking_categories, "head tracking/unrecognized pos")
		require("Module:debug/track"){
			"headword/unrecognized pos",
			"headword/unrecognized pos/lang/" .. data.lang:getCode(),
			"headword/unrecognized pos/pos/" .. data.pos_category
		}
	elseif not data.noposcat then
		table.insert(data.categories, 1, data.lang:getCanonicalName() .. " " .. postype .. "s")
	end

	-- Preprocess
	preprocess(data, postype)
	
	if namespace == "" and data.lang:getType() ~= "reconstructed" then
		local m_links = require("Module:links")
		for _, head in ipairs(data.heads) do
			if title.prefixedText ~= m_links.getLinkPage(m_links.remove_links(head), data.lang) then
				--[=[
				[[Special:WhatLinksHere/Template:tracking/headword/pagename spelling mismatch]]
				]=]
				require("Module:debug/track"){
					"headword/pagename spelling mismatch",
					"headword/pagename spelling mismatch/" .. data.lang:getCode()
				}
				break
			end
		end
	end
	
	-- Format and return all the gathered information
	return
		format_headword(data) ..
		format_genders(data) ..
		format_inflections(data) ..
		require("Module:utilities/format_categories")(
			tracking_categories, data.lang, data.sort_key, nil,
			data.force_cat_output or test_force_categories, data.sc
			)
end

function export.full_headword(data)
	local tracking_categories = {}
	
	-- Script-tags the topmost header.
	local pagename = title.text
	local fullPagename = title.fullText
	local namespace = title.nsText
	
	if not data.lang or type(data.lang) ~= "table" or not data.lang.getCode then
		error("In data, the first argument to full_headword, data.lang should be a language object.")
	end
	
	if not data.sc then
		data.sc = require("Module:scripts").findBestScript(data.heads and data.heads[1] ~= "" and data.heads[1] or pagename, data.lang)
	else
		-- Track uses of sc parameter
		local best = require("Module:scripts").findBestScript(pagename, data.lang)
		require("Module:debug/track")("headword/sc")
		
		if data.sc:getCode() == best:getCode() then
			require("Module:debug/track")("headword/sc/redundant")
			require("Module:debug/track")("headword/sc/redundant/" .. data.sc:getCode())
		else
			require("Module:debug/track")("headword/sc/needed")
			require("Module:debug/track")("headword/sc/needed/" .. data.sc:getCode())
		end
	end
	
	local displayTitle
	-- Assumes that the scripts in "toBeTagged" will never occur in the Reconstruction namespace.
	-- Avoid tagging ASCII as Hani even when it is tagged as Hani in the
	-- headword, as in [[check]]. The check for ASCII might need to be expanded
	-- to a check for any Latin characters and whitespace or punctuation.
	if (namespace == "" and data.sc and toBeTagged[data.sc:getCode()]
			and not pagename:find "^[%z\1-\127]+$")
			or (data.sc:getCode() == "Jpan" and (test_script(pagename, "Hira") or test_script(pagename, "Kana"))) then
		displayTitle = '<span class="' .. data.sc:getCode() .. '">' .. pagename .. '</span>'
	elseif namespace == "Reconstruction" then
		displayTitle, matched = mw.ustring.gsub(
			fullPagename,
			"^(Reconstruction:[^/]+/)(.+)$",
			function(before, term)
				return before ..
					require("Module:script utilities").tag_text(
						term,
						data.lang,
						data.sc
					)
			end
		)
		
		if matched == 0 then
			displayTitle = nil
		end
	end
	
	if displayTitle then
		local frame = mw.getCurrentFrame()
		frame:callParserFunction(
			"DISPLAYTITLE",
			displayTitle
		)
	end
	
	if data.force_cat_output then
		--[=[
		[[Special:WhatLinksHere/Template:tracking/headword/force cat output]]
		]=]
		require("Module:debug/track")("headword/force cat output")
	end
	
	if data.getCanonicalName then
		error('The "data" variable supplied to "full_headword" should not be a language object.')
	end
		
	-- Were any categories specified?
	if data.categories and #data.categories > 0 then
		local lang_name = require("Module:string/pattern_escape")(data.lang:getCanonicalName())
		for _, cat in ipairs(data.categories) do
			-- Does the category begin with the language name? If not, tag it with a tracking category.
			if not mw.ustring.find(cat, "^" .. lang_name) then
				mw.log(cat, data.lang:getCanonicalName())
				table.insert(tracking_categories, "head tracking/no lang category")
				
				--[=[
				[[Special:WhatLinksHere/Template:tracking/head tracking/no lang category]]
				]=]
				require("Module:debug/track"){
					"headword/no lang category",
					"headword/no lang category/lang/" .. data.lang:getCode()
				}
			end
		end
		
		if not data.pos_category
			and mw.ustring.find(data.categories[1], "^" .. data.lang:getCanonicalName())
				then
			data.pos_category = mw.ustring.gsub(data.categories[1], "^" .. data.lang:getCanonicalName() .. " ", "")
			table.remove(data.categories, 1)
		end
	end
	
	if not data.pos_category then
		error(
			'No valid part-of-speech categories were found in the list '
			.. 'of categories passed to the function "full_headword". '
			.. 'The part-of-speech category should consist of a language\'s '
			.. 'canonical name plus a part of speech.'
			)
	end
	
	-- Categorise for unusual characters
	local standard = data.lang:getStandardCharacters()
	
	if standard then
		if mw.ustring.len(title.subpageText) ~= 1 and not non_categorizable() then
			for character in mw.ustring.gmatch(title.subpageText, "([^" .. standard .. "])") do
				local upper = mw.ustring.upper(character)
				if not mw.ustring.find(upper, "[" .. standard .. "]") then
					character = upper
				end
				table.insert(
					data.categories,
					data.lang:getCanonicalName() .. " terms spelled with " .. character
				)
			end
		end
	end
	
	-- Categorise for palindromes
	if title.nsText ~= "Reconstruction" and mw.ustring.len(title.subpageText)>2
		and require('Module:palindromes').is_palindrome(
			title.subpageText, data.lang, data.sc
			) then
		table.insert(data.categories, data.lang:getCanonicalName() .. " palindromes")
	end

	-- This may add more categories (e.g. gender categories), so make sure it gets
	-- evaluated first.
	local text = show_headword_line(data)
	return
		text ..
		require("Module:utilities/format_categories")(
			data.categories, data.lang, data.sort_key, nil,
			data.force_cat_output or test_force_categories, data.sc
			) ..
		require("Module:utilities/format_categories")(
			tracking_categories, data.lang, data.sort_key, nil,
			data.force_cat_output or test_force_categories, data.sc
			)
end

return export