https://wiki.languageinvention.com/index.php?title=Module:String&feed=atom&action=historyModule:String - Revision history2024-03-29T08:44:29ZRevision history for this page on the wikiMediaWiki 1.41.0https://wiki.languageinvention.com/index.php?title=Module:String&diff=2&oldid=prevDjpwikiadmin: Created page with "local str = {} -- Cannot include null byte. local UTF8_char = "[\1-\127\194-\244][\128-\191]*" --[[ ulen Counts UTF-8 characters. Faster than mw.ustring.len. Assumes that..."2019-04-01T04:22:00Z<p>Created page with "local str = {} -- Cannot include null byte. local UTF8_char = "[\1-\127\194-\244][\128-\191]*" --[[ ulen Counts UTF-8 characters. Faster than mw.ustring.len. Assumes that..."</p>
<p><b>New page</b></p><div>local str = {}<br />
<br />
-- Cannot include null byte.<br />
local UTF8_char = "[\1-\127\194-\244][\128-\191]*"<br />
<br />
--[[<br />
ulen<br />
<br />
Counts UTF-8 characters. Faster than mw.ustring.len.<br />
<br />
Assumes that the encoding is correct. Unlike mw.ustring.len, does not return nil<br />
if encoding is invalid.<br />
<br />
Does not count the bytes 192, 193, and 245-255. They are not used in UTF-8 and<br />
will not occur if the string is valid. They are replaced with the replacement<br />
character (U+FFFD) on MediaWiki pages.<br />
--]]<br />
function str.ulen(text)<br />
local _, length = string.gsub(text, UTF8_char, "")<br />
return length<br />
end<br />
<br />
--[[<br />
len<br />
<br />
This function returns the length of the target string.<br />
<br />
Usage:<br />
{{#invoke:string|len|target_string|}}<br />
OR<br />
{{#invoke:string|len|s=target_string}}<br />
<br />
Parameters<br />
s: The string whose length to report<br />
<br />
If invoked using named parameters, Mediawiki will automatically remove any leading or<br />
trailing whitespace from the target string.<br />
]]<br />
function str.len(frame)<br />
local new_args = str._getParameters(frame.args, { 's' });<br />
local s = new_args['s'] or '';<br />
return mw.ustring.len(s)<br />
end<br />
<br />
--[[<br />
sub<br />
<br />
This function returns a substring of the target string at specified indices.<br />
<br />
Usage:<br />
{{#invoke:string|sub|target_string|start_index|end_index}}<br />
OR<br />
{{#invoke:string|sub|s=target_string|i=start_index|j=end_index}}<br />
<br />
Parameters<br />
s: The string to return a subset of<br />
i: The fist index of the substring to return, defaults to 1.<br />
j: The last index of the string to return, defaults to the last character.<br />
<br />
The first character of the string is assigned an index of 1. If either i or j<br />
is a negative value, it is interpreted the same as selecting a character by<br />
counting from the end of the string. Hence, a value of -1 is the same as<br />
selecting the last character of the string.<br />
<br />
If the requested indices are out of range for the given string, an error is<br />
reported.<br />
]]<br />
function str.sub(frame)<br />
local new_args = str._getParameters(frame.args, { 's', 'i', 'j' });<br />
local s = new_args['s'] or '';<br />
local i = tonumber(new_args['i']) or 1;<br />
local j = tonumber(new_args['j']) or -1;<br />
<br />
local len = mw.ustring.len(s);<br />
<br />
-- Convert negatives for range checking<br />
if i < 0 then<br />
i = len + i + 1;<br />
end<br />
if j < 0 then<br />
j = len + j + 1;<br />
end<br />
<br />
if i > len or j > len or i < 1 or j < 1 then<br />
return str._error('String subset index out of range');<br />
end<br />
if j < i then<br />
return str._error('String subset indices out of order');<br />
end<br />
<br />
return mw.ustring.sub(s, i, j)<br />
end<br />
<br />
--[[<br />
This function implements that features of {{str sub old}} and is kept in order<br />
to maintain these older templates.<br />
]]<br />
function str.sublength(frame)<br />
local i = tonumber(frame.args.i) or 0<br />
local len = tonumber(frame.args.len)<br />
return mw.ustring.sub(frame.args.s, i + 1, len and (i + len))<br />
end<br />
<br />
--[[<br />
match<br />
<br />
This function returns a substring from the source string that matches a<br />
specified pattern.<br />
<br />
Usage:<br />
{{#invoke:string|match|source_string|pattern_string|start_index|match_number|plain_flag|nomatch_output}}<br />
OR<br />
{{#invoke:string|pos|s=source_string|pattern=pattern_string|start=start_index<br />
|match=match_number|plain=plain_flag|nomatch=nomatch_output}}<br />
<br />
Parameters<br />
s: The string to search<br />
pattern: The pattern or string to find within the string<br />
start: The index within the source string to start the search. The first<br />
character of the string has index 1. Defaults to 1.<br />
match: In some cases it may be possible to make multiple matches on a single<br />
string. This specifies which match to return, where the first match is<br />
match= 1. If a negative number is specified then a match is returned<br />
counting from the last match. Hence match = -1 is the same as requesting<br />
the last match. Defaults to 1.<br />
plain: A flag indicating that the pattern should be understood as plain<br />
text. Defaults to false.<br />
nomatch: If no match is found, output the "nomatch" value rather than an error.<br />
<br />
If invoked using named parameters, Mediawiki will automatically remove any leading or<br />
trailing whitespace from each string. In some circumstances this is desirable, in<br />
other cases one may want to preserve the whitespace.<br />
<br />
If the match_number or start_index are out of range for the string being queried, then<br />
this function generates an error. An error is also generated if no match is found.<br />
If one adds the parameter ignore_errors=true, then the error will be suppressed and<br />
an empty string will be returned on any failure.<br />
<br />
For information on constructing Lua patterns, a form of [regular expression], see:<br />
<br />
* http://www.lua.org/manual/5.1/manual.html#5.4.1<br />
* http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Patterns<br />
* http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns<br />
<br />
]]<br />
function str.match(frame)<br />
local new_args = str._getParameters(frame.args, { 's', 'pattern', 'start', 'match', 'plain', 'nomatch' });<br />
local s = new_args['s'] or '';<br />
local start = tonumber(new_args['start']) or 1;<br />
local plain_flag = str._getBoolean(new_args['plain'] or false);<br />
local pattern = new_args['pattern'] or '';<br />
local match_index = math.floor(tonumber(new_args['match']) or 1);<br />
local nomatch = new_args['nomatch'];<br />
<br />
if s == '' then<br />
return str._error('Target string is empty');<br />
end<br />
if pattern == '' then<br />
return str._error('Pattern string is empty');<br />
end<br />
if math.abs(start) < 1 or math.abs(start) > mw.ustring.len(s) then<br />
return str._error('Requested start is out of range');<br />
end<br />
if match_index == 0 then<br />
return str._error('Match index is out of range');<br />
end<br />
if plain_flag then<br />
pattern = str.pattern_escape(pattern);<br />
end<br />
<br />
local result<br />
if match_index == 1 then<br />
-- Find first match is simple case<br />
result = mw.ustring.match(s, pattern, start)<br />
else<br />
if start > 1 then<br />
s = mw.ustring.sub(s, start);<br />
end<br />
<br />
local iterator = mw.ustring.gmatch(s, pattern);<br />
if match_index > 0 then<br />
-- Forward search<br />
for w in iterator do<br />
match_index = match_index - 1;<br />
if match_index == 0 then<br />
result = w;<br />
break;<br />
end<br />
end<br />
else<br />
-- Reverse search<br />
local result_table = {};<br />
local count = 1;<br />
for w in iterator do<br />
result_table[count] = w;<br />
count = count + 1;<br />
end<br />
<br />
result = result_table[count + match_index];<br />
end<br />
end<br />
<br />
if result == nil then<br />
if nomatch == nil then<br />
return str._error('Match not found');<br />
else<br />
return nomatch;<br />
end<br />
else<br />
return result;<br />
end<br />
end<br />
<br />
--[[<br />
pos<br />
<br />
This function returns a single character from the target string at position pos.<br />
<br />
Usage:<br />
{{#invoke:string|pos|target_string|index_value}}<br />
OR<br />
{{#invoke:string|pos|target=target_string|pos=index_value}}<br />
<br />
Parameters<br />
target: The string to search<br />
pos: The index for the character to return<br />
<br />
If invoked using named parameters, Mediawiki will automatically remove any leading or<br />
trailing whitespace from the target string. In some circumstances this is desirable, in<br />
other cases one may want to preserve the whitespace.<br />
<br />
The first character has an index value of 1.<br />
<br />
If one requests a negative value, this function will select a character by counting backwards<br />
from the end of the string. In other words pos = -1 is the same as asking for the last character.<br />
<br />
A requested value of zero, or a value greater than the length of the string returns an error.<br />
]]<br />
function str.pos(frame)<br />
local new_args = str._getParameters(frame.args, { 'target', 'pos' });<br />
local target_str = new_args['target'] or '';<br />
local pos = tonumber(new_args['pos']) or 0;<br />
<br />
if pos == 0 or math.abs(pos) > mw.ustring.len(target_str) then<br />
return str._error('String index out of range');<br />
end<br />
<br />
return mw.ustring.sub(target_str, pos, pos);<br />
end<br />
<br />
--[[<br />
str_find<br />
<br />
This function duplicates the behavior of {{str_find}}, including all of its quirks.<br />
This is provided in order to support existing templates, but is NOT RECOMMENDED for<br />
new code and templates. New code is recommended to use the "find" function instead.<br />
<br />
Returns the first index in "source" that is a match to "target". Indexing is 1-based,<br />
and the function returns -1 if the "target" string is not present in "source".<br />
<br />
Important Note: If the "target" string is empty / missing, this function returns a<br />
value of "1", which is generally unexpected behavior, and must be accounted for<br />
separatetly.<br />
]]<br />
function str.str_find(frame)<br />
local new_args = str._getParameters(frame.args, { 'source', 'target' });<br />
local source_str = new_args['source'] or '';<br />
local target_str = new_args['target'] or '';<br />
<br />
if target_str == '' then<br />
return 1;<br />
end<br />
<br />
local start = mw.ustring.find(source_str, target_str, 1, true)<br />
if start == nil then<br />
start = -1<br />
end<br />
<br />
return start<br />
end<br />
<br />
--[[<br />
find<br />
<br />
This function allows one to search for a target string or pattern within another<br />
string.<br />
<br />
Usage:<br />
{{#invoke:string|find|source_str|target_string|start_index|plain_flag}}<br />
OR<br />
{{#invoke:string|find|source=source_str|target=target_str|start=start_index|plain=plain_flag}}<br />
<br />
Parameters<br />
source: The string to search<br />
target: The string or pattern to find within source<br />
start: The index within the source string to start the search, defaults to 1<br />
plain: Boolean flag indicating that target should be understood as plain<br />
text and not as a Lua style regular expression, defaults to true<br />
<br />
If invoked using named parameters, Mediawiki will automatically remove any leading or<br />
trailing whitespace from the parameter. In some circumstances this is desirable, in<br />
other cases one may want to preserve the whitespace.<br />
<br />
This function returns the first index >= "start" where "target" can be found<br />
within "source". Indices are 1-based. If "target" is not found, then this<br />
function returns an empty string. If either "source" or "target" are missing / empty, this<br />
function also returns an empty string.<br />
<br />
This function should be safe for UTF-8 strings.<br />
]]<br />
function str.find(frame)<br />
local params = {<br />
[1] = { required = true },<br />
[2] = { required = true },<br />
[3] = { type = "number" },<br />
[4] = { type = "boolean" },<br />
}<br />
<br />
local args = require("Module:parameters").process(frame.args, params)<br />
<br />
return mw.ustring.find(args[1], args[2], args[3], args[4])<br />
end<br />
<br />
--[[<br />
replace<br />
<br />
This function allows one to replace a target string or pattern within another<br />
string.<br />
<br />
Usage:<br />
{{#invoke:string|replace|source_str|pattern_string|replace_string|replacement_count|plain_flag}}<br />
OR<br />
{{#invoke:string|replace|source=source_string|pattern=pattern_string|replace=replace_string|<br />
count=replacement_count|plain=plain_flag}}<br />
<br />
Parameters<br />
source: The string to search<br />
pattern: The string or pattern to find within source<br />
replace: The replacement text<br />
count: The number of occurences to replace, defaults to all.<br />
plain: Boolean flag indicating that pattern should be understood as plain<br />
text and not as a Lua style regular expression, defaults to true<br />
]]<br />
function str.replace(frame)<br />
local new_args = str._getParameters(frame.args, { 'source', 'pattern', 'replace', 'count', 'plain' });<br />
local source_str = new_args['source'] or '';<br />
local pattern = new_args['pattern'] or '';<br />
local replace = new_args['replace'] or '';<br />
local count = tonumber(new_args['count']);<br />
local plain = new_args['plain'] or true;<br />
<br />
if source_str == '' or pattern == '' then<br />
return source_str;<br />
end<br />
plain = str._getBoolean(plain);<br />
<br />
if plain then<br />
pattern = str.pattern_escape(pattern);<br />
replace = mw.ustring.gsub(replace, "%%", "%%%%"); --Only need to escape replacement sequences.<br />
end<br />
<br />
local result;<br />
<br />
result = mw.ustring.gsub(source_str, pattern, replace, count);<br />
<br />
return result;<br />
end<br />
<br />
<br />
function str.gsub(frame)<br />
local params = {<br />
[1] = { required = true, allow_empty = true },<br />
[2] = { required = true, allow_empty = true },<br />
[3] = { required = true, allow_empty = true },<br />
[4] = { type = "number" },<br />
}<br />
<br />
local args = require("Module:parameters").process(frame.args, params)<br />
<br />
return (mw.ustring.gsub(args[1], args[2], args[3], args[4]))<br />
end<br />
<br />
<br />
--[[<br />
simple function to pipe string.rep to templates.<br />
]]<br />
<br />
function str.rep(frame)<br />
local repetitions = tonumber(frame.args[2])<br />
if not repetitions then<br />
return str._error('function rep expects a number as second parameter, received "' .. (frame.args[2] or '') .. '"')<br />
end<br />
return string.rep(frame.args[1] or '', repetitions)<br />
end<br />
<br />
function str.lower(frame)<br />
local text = frame.args[1] or ''<br />
return mw.ustring.lower(text)<br />
end<br />
<br />
str.lc = str.lower<br />
<br />
--[[<br />
Helper function that populates the argument list given that user may need to use a mix of<br />
named and unnamed parameters. This is relevant because named parameters are not<br />
identical to unnamed parameters due to string trimming, and when dealing with strings<br />
we sometimes want to either preserve or remove that whitespace depending on the application.<br />
]]<br />
function str._getParameters(frame_args, arg_list)<br />
local new_args = {};<br />
local index = 1;<br />
local value;<br />
<br />
for _, arg in ipairs(arg_list) do<br />
value = frame_args[arg]<br />
if value == nil then<br />
value = frame_args[index];<br />
index = index + 1;<br />
end<br />
new_args[arg] = value;<br />
end<br />
<br />
return new_args;<br />
end<br />
<br />
--[[<br />
Helper function to handle error messages.<br />
]]<br />
function str._error(error_str)<br />
local frame = mw.getCurrentFrame();<br />
local error_category = frame.args.error_category or 'Errors reported by Module String';<br />
local ignore_errors = frame.args.ignore_errors or false;<br />
local no_category = frame.args.no_category or false;<br />
<br />
if str._getBoolean(ignore_errors) then<br />
return '';<br />
end<br />
<br />
local error_str = '<strong class="error">String Module Error: ' .. error_str .. '</strong>';<br />
if error_category ~= '' and not str._getBoolean(no_category) then<br />
error_str = '[[Category:' .. error_category .. ']]' .. error_str;<br />
end<br />
<br />
return error_str;<br />
end<br />
<br />
--[[<br />
Helper Function to interpret boolean strings<br />
]]<br />
function str._getBoolean(boolean_str)<br />
local boolean_value;<br />
<br />
if type(boolean_str) == 'string' then<br />
boolean_str = boolean_str:lower();<br />
if boolean_str == 'false' or boolean_str == 'no' or boolean_str == '0'<br />
or boolean_str == '' then<br />
boolean_value = false;<br />
else<br />
boolean_value = true;<br />
end<br />
elseif type(boolean_str) == 'boolean' then<br />
boolean_value = boolean_str;<br />
else<br />
error('No boolean value found');<br />
end<br />
return boolean_value<br />
end<br />
<br />
--[[<br />
Helper function that escapes all pattern characters – ().%+-*?[^$] – so that they will be treated<br />
as plain text.<br />
]]<br />
function str.pattern_escape(pattern_str)<br />
local invoked = false<br />
<br />
if type(pattern_str) == "table" then<br />
if pattern_str.args then<br />
local frame = pattern_str<br />
invoked = true<br />
<br />
if frame.args[1] then<br />
pattern_str = frame.args[1]<br />
else<br />
pattern_str = frame:getParent().args[1]<br />
end<br />
else<br />
error("First argument to pattern_escape should be a string, a number, or a frame object.")<br />
end<br />
elseif not (type(pattern_str) == "string" or type(pattern_str) == "number") then<br />
error("First argument to pattern_escape should be a string or a number.")<br />
end<br />
<br />
if invoked then<br />
local escaped = mw.ustring.gsub(pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1")<br />
return escaped<br />
else<br />
return mw.ustring.gsub(pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1");<br />
end<br />
end<br />
<br />
function str.count(text, pattern, plain)<br />
if not (type(text) == "string" or type(text) == "number") then<br />
error('The first argument to the function "count" must be a string or a number, not a ' .. type(text) .. '.')<br />
end<br />
<br />
if not (type(pattern) == "string" or type(pattern) == "number") then<br />
error('The first argument to the function "count" must be a string or a number, not a ' .. type(text) .. '.')<br />
end<br />
<br />
if plain then<br />
pattern = str.pattern_escape(pattern)<br />
end<br />
<br />
local _, count = mw.ustring.gsub(text, pattern, "")<br />
<br />
return count<br />
end<br />
<br />
function str.plain_gsub(text, pattern, replacement)<br />
local invoked = false<br />
<br />
if type(text) == "table" then<br />
invoked = true<br />
<br />
if text.args then<br />
local frame = text<br />
<br />
local params = {<br />
[1] = {},<br />
[2] = {},<br />
[3] = { allow_empty = true },<br />
}<br />
<br />
local args = require("Module:parameters").process(frame.args, params)<br />
<br />
text = args[1]<br />
pattern = args[2]<br />
replacement = args[3]<br />
else<br />
error("If the first argument to plain_gsub is a table, it should be a frame object.")<br />
end<br />
else<br />
if not (type(pattern) == "string" or type(pattern) == "number") then<br />
error("The second argument to plain_gsub should be a string or a number.")<br />
end<br />
<br />
if not (type(replacement) == "string" or type(replacement) == "number") then<br />
error("The third argument to plain_gsub should be a string or a number.")<br />
end<br />
end<br />
<br />
pattern = str.pattern_escape(pattern)<br />
<br />
if invoked then<br />
text = mw.ustring.gsub(text, pattern, replacement)<br />
return text<br />
else<br />
return mw.ustring.gsub(text, pattern, replacement)<br />
end<br />
end<br />
<br />
function str.matchToArray(text, pattern)<br />
local invoked = false<br />
<br />
if type(text) == "table" then<br />
invoked = true<br />
<br />
if text.args then<br />
local frame = text<br />
<br />
local params = {<br />
[1] = { required = true },<br />
[2] = { required = true },<br />
}<br />
<br />
local args = require("Module:parameters").process(frame.args, params)<br />
<br />
text = args[1]<br />
pattern = args[2]<br />
else<br />
error("If the first argument to matchToArray is a table, it should be a frame object.")<br />
end<br />
else<br />
if not (type(pattern) == "string" or type(pattern) == "number") then<br />
error("The second argument to matchToArray should be a string or a number.")<br />
end<br />
end<br />
<br />
local matches = {}<br />
local i = 0<br />
for match in mw.ustring.gmatch(text, pattern) do<br />
i = i + 1<br />
matches[i] = match<br />
end<br />
<br />
if i > 0 then<br />
if invoked then<br />
return table.concat(matches, ", ")<br />
else<br />
return matches<br />
end<br />
else<br />
if invoked then<br />
return ""<br />
else<br />
return nil<br />
end<br />
end<br />
end<br />
<br />
--[=[<br />
Similar to gmatch, but it returns the count of the match in addition to the<br />
list of captures, something like ipairs().<br />
<br />
If the pattern doesn't contain any captures, the whole match is returned.<br />
<br />
Invoke thus:<br />
<br />
for i, whole_match in require("Module:string").imatch(text, pattern) do<br />
[ do something with i and whole_match ]<br />
end<br />
<br />
or<br />
<br />
for i, capture1[, capture2[, capture3[, ...]]] in require("Module:string").imatch(text, pattern) do<br />
[ do something with i and capture1 ]<br />
end<br />
<br />
For example, this code<br />
for i, whole_match in require("Module:string").imatch("a b c", "[a-z]") do<br />
mw.log(i, whole_match)<br />
end<br />
will log<br />
1 a<br />
2 b<br />
3 c<br />
]=]<br />
function str.imatch(text, pattern, pos, plain, use_basic_Lua_function)<br />
local i = 0<br />
pos = pos or 0<br />
if not string.find(pattern, "%b()") then<br />
pattern = "(" .. pattern .. ")"<br />
end<br />
local find = use_basic_Lua_function and string.find or mw.ustring.find<br />
return function()<br />
i = i + 1<br />
local return_values = { find(text, pattern, pos, plain) }<br />
local j = return_values[2]<br />
<br />
if return_values[3] then<br />
pos = j + 1<br />
-- Skip the first two returned values, which are the indices of the<br />
-- whole match.<br />
return i, unpack(return_values, 3)<br />
end<br />
end<br />
end<br />
<br />
function str.escapebytes(s)<br />
return (string.gsub(s,<br />
'.',<br />
function(char)<br />
return ('\\%03d'):format(string.byte(char))<br />
end))<br />
end<br />
<br />
function str.URIdecode(frame)<br />
return mw.uri.decode(frame.args[1], frame.args[2] or "PATH")<br />
end<br />
<br />
return str</div>Djpwikiadmin