Module:Category tree/poscatboiler/data/characters: Difference between revisions
Tag: Undo |
m (Reverted edits by Juelos (talk) to last revision by Djpwikiadmin) Tag: Rollback |
||
(12 intermediate revisions by 3 users not shown) | |||
Line 51: | Line 51: | ||
local titlechar = data.label:match("^terms spelled with (.+)$") | local titlechar = data.label:match("^terms spelled with (.+)$") | ||
if not titlechar then | if not titlechar then | ||
return | return nil | ||
end | end | ||
local ja_ryu = data.lang and (data.lang: | local ja_ryu = data.lang and (data.lang:getFamilyCode() == "jpx" or data.lang:getFamilyCode() == "jpx-ryu") | ||
-- If Japanese or | -- If Japanese or Ryukyuan, only fire on a single kanji character. | ||
if ja_ryu and (titlechar:find("[A-Za-z]") or ulen(titlechar) > 1) then | if ja_ryu and (titlechar:find("[A-Za-z]") or ulen(titlechar) > 1) then | ||
return | return nil | ||
end | end | ||
local params = { | local params = { | ||
Line 104: | Line 104: | ||
-- which has standard I as its uppercase equivalent. | -- which has standard I as its uppercase equivalent. | ||
local standard_chars = lang:getStandardCharacters() | local standard_chars = lang:getStandardCharacters() | ||
if data.lang and standard_chars | if data.lang and standard_chars then | ||
( | local function err() | ||
rfind(upper, "[" .. standard_chars .. "]") | error("Category titles should use uppercase characters: '" .. data.label .. "'", 2) | ||
end | |||
if data.lang:getCode() ~= "lo" then | |||
if not rfind(standard_chars, upper) then | |||
err() | |||
end | |||
elseif not rfind(upper, "[" .. standard_chars .. "]") then | |||
err() | |||
end | |||
end | end | ||
end | end | ||
Line 159: | Line 165: | ||
return { | return { | ||
description = description, | description = description, | ||
additional = "Note that categories of the form '''''LANG terms spelled with CHAR''''' are intended for characters not " | |||
.. "part of the standard repertoire of a language (e.g. Cyrillic characters in English or Latin characters in Russian).", | |||
displaytitle = not titlechar_is_desc and "{{{langname}}} terms spelled with " .. tagged_titlechar or nil, | displaytitle = not titlechar_is_desc and "{{{langname}}} terms spelled with " .. tagged_titlechar or nil, | ||
parents = {{name = "terms by their individual characters", sort = sortkey }}, | parents = {{name = "terms by their individual characters", sort = sortkey }}, |
Latest revision as of 00:03, 3 September 2023
Documentation for this module may be created at Module:Category tree/poscatboiler/data/characters/documentation
local raw_categories = {}
local handlers = {}
local ulen = mw.ustring.len
local uupper = mw.ustring.upper
local ulower = mw.ustring.lower
local rfind = mw.ustring.find
local function track(page)
require("Module:debug/track")("poscatboiler-characters/" .. page)
return true
end
-----------------------------------------------------------------------------
-- --
-- RAW CATEGORIES --
-- --
-----------------------------------------------------------------------------
raw_categories["Terms by their individual characters subcategories by language"] = {
description = "Umbrella categories covering terms categorized by unusual characters contained in them.",
additional = "{{{umbrella_meta_msg}}}",
parents = {
"Umbrella metacategories",
{name = "terms by their individual characters", is_label = true, sort = " "},
},
}
-----------------------------------------------------------------------------
-- --
-- HANDLERS --
-- --
-----------------------------------------------------------------------------
-- If char is a combining character, returns a version with a dotted circle before it.
function add_dotted_circle(char, combining)
return combining and "◌" .. char or char
end
table.insert(handlers, function(data)
-- NOTE: The "character" in the title may actually be a description such as
-- "gershayim". In that case, char= is specified as a parameter indicating the
-- actual character.
local titlechar = data.label:match("^terms spelled with (.+)$")
if not titlechar then
return nil
end
local ja_ryu = data.lang and (data.lang:getFamilyCode() == "jpx" or data.lang:getFamilyCode() == "jpx-ryu")
-- If Japanese or Ryukyuan, only fire on a single kanji character.
if ja_ryu and (titlechar:find("[A-Za-z]") or ulen(titlechar) > 1) then
return nil
end
local params = {
["char"] = {},
["sort"] = {},
-- Not sure what used to be done with the following parameters.
["context"] = {},
["context2"] = {},
}
local args = require("Module:parameters").process(data.args, params)
if args.context or args.context2 then
track("terms-spelled-with-context")
end
if titlechar == "numbers" then
local sortkey = args.sort or "#"
return {
description = "{{{langname}}} terms spelled with one or more numeric digits.",
parents = {{name = "terms by their individual characters", sort = sortkey }},
breadcrumb = "numbers",
umbrella = {
breadcrumb = "numbers",
parents = {{name = "Terms by their individual characters subcategories by language", sort = " " .. sortkey }}
},
}, true
end
local char = args.char or titlechar
local titlechar_is_desc = args.char and args.char ~= titlechar
if titlechar_is_desc then
track("titlechar_is_desc")
end
local lang = data.lang or require("Module:languages").getByCode("mul")
local combining = ulen(char) == 1 and require("Module:Unicode_data").is_combining(mw.ustring.codepoint(char))
local specials = {["ß"] = "ẞ", ["ͅ"] = "ͅ"}
local upper = mw.ustring.toNFD(char)
:gsub("[%z\1-\127\194-\244][\128-\191]*", function(m)
return specials[m] or mw.ustring.upper(m)
end)
upper = mw.ustring.toNFC(upper)
if char ~= upper and ulen(char) == 1 then
-- We want uppercase characters; but unless we're careful, we run into an issue with
-- [[Category:English terms spelled with ı]] due to the weird behavior of this character,
-- which has standard I as its uppercase equivalent.
local standard_chars = lang:getStandardCharacters()
if data.lang and standard_chars then
local function err()
error("Category titles should use uppercase characters: '" .. data.label .. "'", 2)
end
if data.lang:getCode() ~= "lo" then
if not rfind(standard_chars, upper) then
err()
end
elseif not rfind(upper, "[" .. standard_chars .. "]") then
err()
end
end
end
-- Compute description.
-- If the letter has a lowercase form, show it.
local character = require("Module:links").full_link(
{
term = char,
alt = combining and add_dotted_circle(char, true) or nil,
lang = lang,
tr = combining and "-" or nil,
},
"term"
)
if ulower(char) ~= char then
character = "upper case " .. character .. " or lower case " ..
require("Module:links").full_link(
{
term = ulower(char),
lang = lang
},
"term"
)
end
if titlechar_is_desc then
character = character .. " (" .. titlechar .. ")"
end
local description = "{{{langname}}} terms spelled with " .. character .. "."
-- Set tagged character for displaytitle and breadcrumb.
local tagged_titlechar = not titlechar_is_desc and
require("Module:script utilities").tag_text(titlechar, lang, nil, "term") or nil
local tagged_char = titlechar_is_desc and titlechar or
require("Module:script utilities").tag_text(add_dotted_circle(char, combining), lang, nil, "term")
-- Compute sort key.
local sortkey =
args.sort or
(lang:makeSortKey(char))
if sortkey == "" then
sortkey = char
end
return {
description = description,
additional = "Note that categories of the form '''''LANG terms spelled with CHAR''''' are intended for characters not "
.. "part of the standard repertoire of a language (e.g. Cyrillic characters in English or Latin characters in Russian).",
displaytitle = not titlechar_is_desc and "{{{langname}}} terms spelled with " .. tagged_titlechar or nil,
parents = {{name = "terms by their individual characters", sort = sortkey }},
breadcrumb = tagged_char,
umbrella = not ja_ryu and {
displaytitle = not titlechar_is_desc and "Terms spelled with " .. tagged_titlechar .. " by language" or nil,
breadcrumb = tagged_char,
parents = {{name = "Terms by their individual characters subcategories by language", sort = " " .. sortkey }}
} or false,
}, true
end)
return {RAW_CATEGORIES = raw_categories, HANDLERS = handlers}