Module:Utilities: Difference between revisions
Djpwikiadmin (talk | contribs) (Created page with "local export = {} local data = mw.loadData("Module:utilities/data") local notneeded = data.notneeded local neededhassubpage = data.neededhassubpage -- A helper function to e...") |
Djpwikiadmin (talk | contribs) No edit summary |
||
Line 1: | Line 1: | ||
local | local decode = mw.text.decode | ||
local u = mw.ustring.char | |||
local data = mw.loadData("Module:utilities/data") | local data = mw.loadData("Module:utilities/data") | ||
Line 5: | Line 6: | ||
local neededhassubpage = data.neededhassubpage | local neededhassubpage = data.neededhassubpage | ||
-- A helper function to escape magic characters in a string | local export = {} | ||
function export.require_when_needed(text) | |||
return setmetatable({}, { | |||
__index = function(t, k) | |||
t = require(text) | |||
return t[k] | |||
end, | |||
__call = function(t, ...) | |||
t = require(text) | |||
return t(...) | |||
end | |||
}) | |||
end | |||
-- A helper function to escape magic characters in a string. | |||
-- Magic characters: ^$()%.[]*+-? | -- Magic characters: ^$()%.[]*+-? | ||
function export.pattern_escape(text) | function export.pattern_escape(text) | ||
Line 11: | Line 27: | ||
text = text.args[1] | text = text.args[1] | ||
end | end | ||
text = mw. | return (text:gsub("[%^$()%%.[%]*+%-?]", "%%%0")) | ||
return text | end | ||
-- A helper function to resolve HTML entities into plaintext. | |||
-- Iterates over entities in a string, and decodes them into plaintext. We use iteration (instead of decoding the whole string in one go) because it means we can avoid loading the lookup string unnecessarily, as it uses more memory. | |||
local entities | |||
function export.get_entities(text) | |||
return (text:gsub("&(#?[%w]-);", function(entity) | |||
-- Check if mw.text.decode is able to decode the entity. | |||
if ( | |||
entity == "lt" or | |||
entity == "gt" or | |||
entity == "amp" or | |||
entity == "quot" or | |||
entity == "nbsp" | |||
) then | |||
return decode("&" .. entity .. ";") | |||
-- Catch hex entities beginning with &#X, which are valid but unsupported by mw.text.decode. | |||
elseif entity:sub(1, 1) == "#" then | |||
entity = entity:lower() | |||
return decode("&" .. entity .. ";") | |||
-- [[Module:utilities/data/entities]] is a lookup string of every named HTML entity (except the ones listed above), as they aren't covered by mw.text.decode. | |||
-- mw.text.decode can decode lots of named entities if the second parameter is true, but around 600 are still not covered, and it's less efficient than doing it this way anyway. | |||
else | |||
entities = entities or require("Module:utilities/data/entities") | |||
return entities:match("%f[%Z]" .. entity .. "(%Z+)") | |||
end | |||
end)) | |||
end | |||
-- A helper function to convert plaintext into HTML entities where these match the characters given in set. | |||
-- By default, this resolves any pre-existing entities into plaintext first, to allow mixed input and to avoid accidental double-conversion. This can be turned off with the raw parameter. | |||
function export.make_entities(text, set, raw) | |||
text = not raw and export.get_entities(text) or text | |||
return mw.text.encode(text, set) | |||
end | |||
-- A helper function to strip wiki markup, giving the plaintext of what is displayed on the page. | |||
function export.get_plaintext(text) | |||
text = text | |||
:gsub("%[%[", "\1") | |||
:gsub("%]%]", "\2") | |||
-- Remove strip markers and HTML tags. | |||
text = mw.text.unstrip(text) | |||
:gsub("<[^<>\1\2]+>", "") | |||
-- Parse internal links for the display text, and remove categories. | |||
text = require("Module:links").remove_links(text) | |||
-- Remove files. | |||
for _, falsePositive in ipairs({"File", "Image"}) do | |||
text = text:gsub("\1" .. falsePositive .. ":[^\1\2]+\2", "") | |||
end | |||
-- Parse external links for the display text. | |||
text = text:gsub("%[(https?://[^%[%]]+)%]", | |||
function(capture) | |||
return capture:match("https?://[^%s%]]+%s([^%]]+)") or "" | |||
end) | |||
text = text | |||
:gsub("\1", "[[") | |||
:gsub("\2", "]]") | |||
-- Any remaining square brackets aren't involved in links, but must be escaped to avoid creating new links. | |||
text = text:gsub("[%[%]]", mw.text.nowiki) | |||
-- Strip bold, italics and soft hyphens. | |||
text = text | |||
:gsub("('*)'''(.-'*)'''", "%1%2") | |||
:gsub("('*)''(.-'*)''", "%1%2") | |||
:gsub("", "") | |||
-- Get any HTML entities. | |||
-- Note: don't decode URL percent encoding, as it shouldn't be used in display text and may cause problems if % is used. | |||
text = export.get_entities(text) | |||
return mw.text.trim(text) | |||
end | end | ||
Line 30: | Line 123: | ||
} | } | ||
local args = require("Module:parameters").process(frame.args, params) | local args = require("Module:parameters").process(frame.args, params, nil, "utilities", "plain_gsub") | ||
text = args[1] | text = args[1] | ||
Line 50: | Line 143: | ||
pattern = export.pattern_escape(pattern) | pattern = export.pattern_escape(pattern) | ||
local gsub = require("Module:string utilities").gsub | |||
if invoked then | if invoked then | ||
return (gsub(text, pattern, replacement)) | |||
else | else | ||
return | return gsub(text, pattern, replacement) | ||
end | end | ||
end | end | ||
Line 77: | Line 170: | ||
is output if the page isn't in the main, Appendix:, Reconstruction: or | is output if the page isn't in the main, Appendix:, Reconstruction: or | ||
Citations: namespaces. | Citations: namespaces. | ||
-- SC is a script object; if nil, the default will be used from the sort | |||
base. | |||
]] | ]] | ||
function export.format_categories(categories, lang, sort_key, sort_base, force_output, sc) | function export.format_categories(categories, lang, sort_key, sort_base, force_output, sc) | ||
if type(lang) == "table" and not lang.getCode then | if type(lang) == "table" and not lang.getCode then | ||
error("The second argument to format_categories should be a language object.") | error("The second argument to format_categories should be a language object.") | ||
end | end | ||
if force_output or | local title_obj = mw.title.getCurrentTitle() | ||
local | local allowedNamespaces = { | ||
local | [0] = true, [100] = true, [114] = true, [118] = true -- (main), Appendix, Citations, Reconstruction | ||
} | |||
if force_output or allowedNamespaces[title_obj.namespace] or title_obj.prefixedText == "Wiktionary:Sandbox" then | |||
local headword_data = mw.loadData("Module:headword/data") | |||
local pagename = headword_data.pagename | |||
local pagename_defaultsort = headword_data.pagename_defaultsort | |||
if not lang then | -- Generate a default sort key. | ||
if sort_key ~= "-" then | |||
if not lang then | |||
lang = require("Module:languages").getByCode("und") | |||
end | |||
sort_base = (lang:makeSortKey(sort_base or pagename, sc)) | |||
if sort_key and sort_key ~= "" then | |||
-- Gather some statistics regarding sort keys | |||
if not no_track and sort_key:uupper() == sort_base then | |||
table.insert(categories, "Sort key tracking/redundant") | |||
end | |||
else | |||
sort_key = sort_base | |||
end | |||
-- If the sort key is empty, remove it. | |||
if sort_key == "" then | |||
sort_key = nil | |||
end | end | ||
-- If the sort key is "-", bypass the process of generating a sort key altogether. This is desirable when categorising (e.g.) translation requests, as the pages to be categorised are always in English/Translingual. | |||
else | else | ||
sort_key = sort_base | sort_key = sort_base and sort_base:uupper() or pagename_defaultsort | ||
end | end | ||
Line 120: | Line 220: | ||
return "" | return "" | ||
end | end | ||
end | end | ||
Line 201: | Line 256: | ||
} | } | ||
local args = require("Module:parameters").process(frame:getParent().args, params) | local args = require("Module:parameters").process(frame:getParent().args, params, nil, "utilities", "catfix_template") | ||
local lang = require("Module:languages").getByCode(args[1]) or require("Module:languages").err(args[1], 1) | local lang = require("Module:languages").getByCode(args[1]) or require("Module:languages").err(args[1], 1) | ||
Line 247: | Line 302: | ||
} | } | ||
local args = require("Module:parameters").process(frame:getParent().args, params) | local args = require("Module:parameters").process(frame:getParent().args, params, nil, "utilities", "make_id") | ||
local langCode = args[1] | local langCode = args[1] | ||
Line 264: | Line 319: | ||
end | end | ||
local | local id = require("Module:senseid").anchor(lang, str) | ||
if invoked then | if invoked then | ||
Line 275: | Line 326: | ||
return id | return id | ||
end | end | ||
end | |||
-- Given a type (as a string) and an arbitrary number of entities, checks whether all of those entities are language, family, script, writing system or Wikimedia language objects. Useful for error handling in functions that require one of these kinds of object. | |||
-- If noErr is set, the function returns false instead of throwing an error, which allows customised error handling to be done in the calling function. | |||
function export.check_object(typ, noErr, ...) | |||
local function fail(message) | |||
if noErr then | |||
return false | |||
else | |||
error(message, 3) | |||
end | |||
end | |||
local objs = {...} | |||
if #objs == 0 then | |||
return fail("Must provide at least one object to check.") | |||
end | |||
for _, obj in ipairs{...} do | |||
if type(obj) ~= "table" or type(obj.hasType) ~= "function" then | |||
return fail("Function expected a " .. typ .. " object, but received a " .. type(obj) .. " instead.") | |||
elseif not (typ == "object" or obj:hasType(typ)) then | |||
for _, wrong_type in ipairs{"family", "language", "script", "Wikimedia language", "writing system"} do | |||
if obj:hasType(wrong_type) then | |||
return fail("Function expected a " .. typ .. " object, but received a " .. wrong_type .. " object instead.") | |||
end | |||
end | |||
return fail("Function expected a " .. typ .. " object, but received another type of object instead.") | |||
end | |||
end | |||
return true | |||
end | end | ||
return export | return export |
Latest revision as of 17:34, 6 September 2023
- The following documentation is located at Module:Utilities/documentation. [edit]
- Useful links: subpage list • transclusions • testcases • sandbox
This module exports various general utility functions, which can be used by other modules.
pattern_escape
pattern_escape(text)
Escapes the magic characters used in patterns (Lua's version of regular expressions). For example, "^$()%.[]*+-?"
becomes "%^%$%(%)%%%.%[%]%*%+%-%?"
.
format_categories
format_categories(categories, lang, sort_key, sort_base, force_output)
Formats a list (table) of category names. The output is a string consisting of all categories with [[Category:...]]
applied to each one, and the given sort key added. If the namespace is not the main, Appendix or Reconstruction namespaces, the output will be an empty string unless force_output
is given. If no sort key is given:
- A default one is generated by using
sort_base
(if given) or the current subpage name, and by removing hyphens from the beginning (so that suffixes can be sorted without a key). - If a sort key is available for the given language, it is then used to create a sort key that follows the rules for that language.
- If the final sort key ends up being identical to the page name (which is the default sort key used by the software), then it is omitted entirely, so that it can be used in combination with DEFAULTSORT.
template_categorize
{{#invoke:utilities|template_categorize}}
This function is used by the {{categorize}}
, {{catlangname}}
and {{catlangcode}}
templates.
catfix
This function adds a "catfix", which is used on language-specific category pages to add language attributes and often script classes to all entry names. The addition of language attributes and script classes makes the entry names display better (using the language- or script-specific styles specified in MediaWiki:Common.css), which is particularly important for non-English languages that do not have consistent font support in browsers.
Language attributes are added for all languages, but script classes are only added for languages with one script listed in their data file, or for languages that have a default script listed in the catfix_script
list in Module:utilities/data. Some languages clearly have a default script, but still have other scripts listed in their data file and therefore need their default script to be specified. Others do not have a default script.
- Serbo-Croatian is regularly written in both the Latin and Cyrillic scripts. Because it uses two scripts, Serbo-Croation cannot have a script class applied to entries in its category pages, as only one script class can be specified at a time.
- Russian is usually written in the Cyrillic script (
Cyrl
), but Braille (Brai
) is also listed in its data file. So Russian needs an entry in thecatfix_script
list, so that theCyrl
(Cyrillic) script class will be applied to entries in its category pages.
To find the scripts listed for a language, go to Module:languages and use the search box to find the data file for the language. To find out what a script code means, search the script code in Module:scripts/data
local decode = mw.text.decode
local u = mw.ustring.char
local data = mw.loadData("Module:utilities/data")
local notneeded = data.notneeded
local neededhassubpage = data.neededhassubpage
local export = {}
function export.require_when_needed(text)
return setmetatable({}, {
__index = function(t, k)
t = require(text)
return t[k]
end,
__call = function(t, ...)
t = require(text)
return t(...)
end
})
end
-- A helper function to escape magic characters in a string.
-- Magic characters: ^$()%.[]*+-?
function export.pattern_escape(text)
if type(text) == "table" then
text = text.args[1]
end
return (text:gsub("[%^$()%%.[%]*+%-?]", "%%%0"))
end
-- A helper function to resolve HTML entities into plaintext.
-- Iterates over entities in a string, and decodes them into plaintext. We use iteration (instead of decoding the whole string in one go) because it means we can avoid loading the lookup string unnecessarily, as it uses more memory.
local entities
function export.get_entities(text)
return (text:gsub("&(#?[%w]-);", function(entity)
-- Check if mw.text.decode is able to decode the entity.
if (
entity == "lt" or
entity == "gt" or
entity == "amp" or
entity == "quot" or
entity == "nbsp"
) then
return decode("&" .. entity .. ";")
-- Catch hex entities beginning with &#X, which are valid but unsupported by mw.text.decode.
elseif entity:sub(1, 1) == "#" then
entity = entity:lower()
return decode("&" .. entity .. ";")
-- [[Module:utilities/data/entities]] is a lookup string of every named HTML entity (except the ones listed above), as they aren't covered by mw.text.decode.
-- mw.text.decode can decode lots of named entities if the second parameter is true, but around 600 are still not covered, and it's less efficient than doing it this way anyway.
else
entities = entities or require("Module:utilities/data/entities")
return entities:match("%f[%Z]" .. entity .. "(%Z+)")
end
end))
end
-- A helper function to convert plaintext into HTML entities where these match the characters given in set.
-- By default, this resolves any pre-existing entities into plaintext first, to allow mixed input and to avoid accidental double-conversion. This can be turned off with the raw parameter.
function export.make_entities(text, set, raw)
text = not raw and export.get_entities(text) or text
return mw.text.encode(text, set)
end
-- A helper function to strip wiki markup, giving the plaintext of what is displayed on the page.
function export.get_plaintext(text)
text = text
:gsub("%[%[", "\1")
:gsub("%]%]", "\2")
-- Remove strip markers and HTML tags.
text = mw.text.unstrip(text)
:gsub("<[^<>\1\2]+>", "")
-- Parse internal links for the display text, and remove categories.
text = require("Module:links").remove_links(text)
-- Remove files.
for _, falsePositive in ipairs({"File", "Image"}) do
text = text:gsub("\1" .. falsePositive .. ":[^\1\2]+\2", "")
end
-- Parse external links for the display text.
text = text:gsub("%[(https?://[^%[%]]+)%]",
function(capture)
return capture:match("https?://[^%s%]]+%s([^%]]+)") or ""
end)
text = text
:gsub("\1", "[[")
:gsub("\2", "]]")
-- Any remaining square brackets aren't involved in links, but must be escaped to avoid creating new links.
text = text:gsub("[%[%]]", mw.text.nowiki)
-- Strip bold, italics and soft hyphens.
text = text
:gsub("('*)'''(.-'*)'''", "%1%2")
:gsub("('*)''(.-'*)''", "%1%2")
:gsub("", "")
-- Get any HTML entities.
-- Note: don't decode URL percent encoding, as it shouldn't be used in display text and may cause problems if % is used.
text = export.get_entities(text)
return mw.text.trim(text)
end
function export.plain_gsub(text, pattern, replacement)
local invoked = false
if type(text) == "table" then
invoked = true
if text.args then
local frame = text
local params = {
[1] = {},
[2] = {},
[3] = { allow_empty = true },
}
local args = require("Module:parameters").process(frame.args, params, nil, "utilities", "plain_gsub")
text = args[1]
pattern = args[2]
replacement = args[3]
else
error("If the first argument to plain_gsub is a table, it should be a frame object.")
end
else
if not ( type(pattern) == "string" or type(pattern) == "number" ) then
error("The second argument to plain_gsub should be a string or a number.")
end
if not ( type(replacement) == "string" or type(replacement) == "number" ) then
error("The third argument to plain_gsub should be a string or a number.")
end
end
pattern = export.pattern_escape(pattern)
local gsub = require("Module:string utilities").gsub
if invoked then
return (gsub(text, pattern, replacement))
else
return gsub(text, pattern, replacement)
end
end
--[[
Format the categories with the appropriate sort key. CATEGORIES is a list of
categories.
-- LANG is an object encapsulating a language; if nil, the object for
language code 'und' (undetermined) will be used.
-- SORT_KEY is placed in the category invocation, and indicates how the
page will sort in the respective category. Normally this should be nil,
and a default sort key based on the subpage name (the part after the
colon) will be used.
-- SORT_BASE lets you override the default sort key used when SORT_KEY is
nil. Normally, this should be nil, and a language-specific default sort
key is computed from the subpage name (e.g. for Russian this converts
Cyrillic ё to a string consisting of Cyrillic е followed by U+10FFFF,
so that effectively ё sorts after е instead of the default Wikimedia
sort, which (I think) is based on Unicode sort order and puts ё after я,
the last letter of the Cyrillic alphabet.
-- FORCE_OUTPUT forces normal output in all namespaces. Normally, nothing
is output if the page isn't in the main, Appendix:, Reconstruction: or
Citations: namespaces.
-- SC is a script object; if nil, the default will be used from the sort
base.
]]
function export.format_categories(categories, lang, sort_key, sort_base, force_output, sc)
if type(lang) == "table" and not lang.getCode then
error("The second argument to format_categories should be a language object.")
end
local title_obj = mw.title.getCurrentTitle()
local allowedNamespaces = {
[0] = true, [100] = true, [114] = true, [118] = true -- (main), Appendix, Citations, Reconstruction
}
if force_output or allowedNamespaces[title_obj.namespace] or title_obj.prefixedText == "Wiktionary:Sandbox" then
local headword_data = mw.loadData("Module:headword/data")
local pagename = headword_data.pagename
local pagename_defaultsort = headword_data.pagename_defaultsort
-- Generate a default sort key.
if sort_key ~= "-" then
if not lang then
lang = require("Module:languages").getByCode("und")
end
sort_base = (lang:makeSortKey(sort_base or pagename, sc))
if sort_key and sort_key ~= "" then
-- Gather some statistics regarding sort keys
if not no_track and sort_key:uupper() == sort_base then
table.insert(categories, "Sort key tracking/redundant")
end
else
sort_key = sort_base
end
-- If the sort key is empty, remove it.
if sort_key == "" then
sort_key = nil
end
-- If the sort key is "-", bypass the process of generating a sort key altogether. This is desirable when categorising (e.g.) translation requests, as the pages to be categorised are always in English/Translingual.
else
sort_key = sort_base and sort_base:uupper() or pagename_defaultsort
end
local out_categories = {}
for key, cat in ipairs(categories) do
out_categories[key] = "[[Category:" .. cat .. (sort_key and "|" .. sort_key or "") .. "]]"
end
return table.concat(out_categories, "")
else
return ""
end
end
function export.catfix(lang, sc)
if not lang then
require("Module:debug").track("catfix/no lang")
return nil
elseif type(lang) ~= "table" then
require("Module:debug").track("catfix/lang not table")
return nil
end
local canonicalName = lang:getCanonicalName() or error('The first argument to the function "catfix" should be a language object from Module:languages.')
if sc and not sc.getCode then
error('The second argument to the function "catfix" should be a script object from Module:scripts.')
end
-- To add script classes to links on pages created by category boilerplate templates.
if not sc then
sc = data.catfix_scripts[lang:getCode()]
if sc then
sc = require("Module:scripts").getByCode(sc)
end
end
return "<span id=\"catfix\" style=\"display:none;\" class=\"CATFIX-" .. mw.uri.anchorEncode(canonicalName) .. "\">" ..
require("Module:script utilities").tag_text(" ", lang, sc, nil) ..
"</span>"
end
function export.catfix_template(frame)
local params = {
[1] = {},
[2] = { alias_of = "sc" },
["sc"] = {},
}
local args = require("Module:parameters").process(frame:getParent().args, params, nil, "utilities", "catfix_template")
local lang = require("Module:languages").getByCode(args[1]) or require("Module:languages").err(args[1], 1)
local sc = args.sc
if sc then
sc = require("Module:scripts").getByCode(sc) or error('The script code "' .. sc .. '", provided in the second parameter, is not valid.')
end
return export.catfix(lang, sc)
end
-- Not exporting because it is not used yet.
local function getDateTense(frame)
local name_num_mapping = {["January"] = 1, ["February"] = 2, ["March"] = 3, ["April"] = 4, ["May"] = 5, ["June"] = 6,
["July"] = 7, ["August"] = 8, ["September"] = 9, ["October"] = 10, ["November"] = 11, ["December"] = 12,
[1] = 1, [2] = 2, [3] = 3, [4] = 4, [5] = 5, [6] = 6, [7] = 7, [8] = 8, [9] = 9, [10] = 10, [11] = 11, [12] = 12}
local month = name_num_mapping[frame.args[2]]
local date = os.time({year = frame.args[1], day = frame.args[3], month = month})
local today = os.time() -- 12 AM/PM
local diff = os.difftime(date, today)
local daylength = 24 * 3600
if diff < -daylength / 2 then return "past"
else
if diff > daylength / 2 then return "future"
else return "present" end
end
end
function export.make_id(lang, str)
--[[ If called with invoke, first argument is a frame object.
If called by a module, first argument is a language object. ]]
local invoked = false
if type(lang) == "table" then
if lang.args then
invoked = true
local frame = lang
local params = {
[1] = {},
[2] = {},
}
local args = require("Module:parameters").process(frame:getParent().args, params, nil, "utilities", "make_id")
local langCode = args[1]
str = args[2]
local m_languages = require("Module:languages")
lang = m_languages.getByCode(langCode) or m_languages.err(langCode, 1)
elseif not lang.getCanonicalName then
error("The first argument to make_id should be a language object.")
end
end
if not ( type(str) == "string" or type(str) == "number" ) then
error("The second argument to make_id should be a string or a number.")
end
local id = require("Module:senseid").anchor(lang, str)
if invoked then
return '<li class="senseid" id="' .. id .. '">'
else
return id
end
end
-- Given a type (as a string) and an arbitrary number of entities, checks whether all of those entities are language, family, script, writing system or Wikimedia language objects. Useful for error handling in functions that require one of these kinds of object.
-- If noErr is set, the function returns false instead of throwing an error, which allows customised error handling to be done in the calling function.
function export.check_object(typ, noErr, ...)
local function fail(message)
if noErr then
return false
else
error(message, 3)
end
end
local objs = {...}
if #objs == 0 then
return fail("Must provide at least one object to check.")
end
for _, obj in ipairs{...} do
if type(obj) ~= "table" or type(obj.hasType) ~= "function" then
return fail("Function expected a " .. typ .. " object, but received a " .. type(obj) .. " instead.")
elseif not (typ == "object" or obj:hasType(typ)) then
for _, wrong_type in ipairs{"family", "language", "script", "Wikimedia language", "writing system"} do
if obj:hasType(wrong_type) then
return fail("Function expected a " .. typ .. " object, but received a " .. wrong_type .. " object instead.")
end
end
return fail("Function expected a " .. typ .. " object, but received another type of object instead.")
end
end
return true
end
return export