Module:Links/data: Difference between revisions
Jump to navigation
Jump to search
Djpwikiadmin (talk | contribs) (Created page with "local data = {} data.high_memory_entries = { "a", "animal", "book", "coffee", "do", "e", "language", "night", "smoke", "son", "sun", "water", "wind", } local U...") |
Djpwikiadmin (talk | contribs) No edit summary |
||
Line 1: | Line 1: | ||
local encode = mw.text.encode | |||
local u = mw.ustring.char | |||
local data = {} | local data = {} | ||
data. | data.ignore_cap = { | ||
" | ["ko"] = true, | ||
} | } | ||
data.phonetic_extraction = { | |||
["th"] = "Module:th", | |||
["km"] = "Module:km", | |||
} | |||
data.pos_tags = { | |||
["a"] = "adjective", | |||
["adv"] = "adverb", | |||
["int"] = "interjection", | |||
["n"] = "noun", | |||
["pron"] = "pronoun", | |||
["v"] = "verb", | |||
["vi"] = "intransitive verb", | |||
["vt"] = "transitive verb", | |||
["vti"] = "transitive and intransitive verb", | |||
} | |||
-- Scheme for using unsupported characters in titles. | |||
data.unsupported_characters = { | |||
["#"] = "`num`", | |||
["%"] = "`percnt`", -- only escaped in percent encoding | |||
["&"] = "`amp`", -- only escaped in HTML entities | |||
["."] = "`period`", -- only escaped in dot-slash notation | |||
["<"] = "`lt`", | |||
[">"] = "`gt`", | |||
["["] = "`lsqb`", | |||
["]"] = "`rsqb`", | |||
["_"] = "`lowbar`", | |||
["`"] = "`grave`", -- used to enclose unsupported characters in the scheme, so a raw use in an unsupported title must be escaped to prevent interference | |||
["{"] = "`lcub`", | |||
["|"] = "`vert`", | |||
["}"] = "`rcub`", | |||
["~"] = "`tilde`", -- only escaped when 3 or more are consecutive | |||
["\239\191\189"] = "`repl`" -- replacement character U+FFFD, which can't be typed directly here due to an abuse filter | |||
} | |||
-- | -- Manually specified unsupported titles. Only put titles here if there is a different reason why they are unsupported, and not just because they contain one of the unsupported characters above. | ||
data.unsupported_titles = { | data.unsupported_titles = { | ||
[" "] = "Space", | [" "] = "Space", | ||
[" | ["&"] = "`amp`amp;", | ||
[" | ["λοπαδοτεμαχοσελαχογαλεοκρανιολειψανοδριμυποτριμματοσιλφιοκαραβομελιτοκατακεχυμενοκιχλεπικοσσυφοφαττοπεριστεραλεκτρυονοπτοκεφαλλιοκιγκλοπελειολαγῳοσιραιοβαφητραγανοπτερύγων"] = "Ancient Greek dish", | ||
["กรุงเทพมหานคร อมรรัตนโกสินทร์ มหินทรายุธยา มหาดิลกภพ นพรัตนราชธานีบูรีรมย์ อุดมราชนิเวศน์มหาสถาน อมรพิมานอวตารสถิต สักกะทัตติยวิษณุกรรมประสิทธิ์"] = "Thai name of Bangkok", | ["กรุงเทพมหานคร อมรรัตนโกสินทร์ มหินทรายุธยา มหาดิลกภพ นพรัตนราชธานีบูรีรมย์ อุดมราชนิเวศน์มหาสถาน อมรพิมานอวตารสถิต สักกะทัตติยวิษณุกรรมประสิทธิ์"] = "Thai name of Bangkok", | ||
[ | [u(0x1680)] = "Ogham space", | ||
[ | [u(0x3000)] = "Ideographic space" | ||
} | } | ||
data.display_change = { | |||
[" "] = "] [", -- Space | |||
[u(0x00A0)] = "]" .. u(0x00A0) .. "[", -- No-break space | |||
[u(0x180E)] = "]" .. u(0x180E) .. "[", -- Mongolian vowel separator | |||
[u(0x2000)] = "]" .. u(0x2000) .. "[", -- En quad | |||
[u(0x2001)] = "]" .. u(0x2001) .. "[", -- Em quad | |||
[u(0x2002)] = "]" .. u(0x2002) .. "[", -- En space | |||
[u(0x2003)] = "]" .. u(0x2003) .. "[", -- Em space | |||
[u(0x2004)] = "]" .. u(0x2004) .. "[", -- Three-per-em space | |||
[u(0x2005)] = "]" .. u(0x2005) .. "[", -- Four-per-em space | |||
[u(0x2006)] = "]" .. u(0x2006) .. "[", -- Six-per-em space | |||
[u(0x2007)] = "]" .. u(0x2007) .. "[", -- Figure space | |||
[u(0x2008)] = "]" .. u(0x2008) .. "[", -- Punctuation space | |||
[u(0x2009)] = "]" .. u(0x2009) .. "[", -- Thin space | |||
[u(0x200A)] = "]" .. u(0x200A) .. "[", -- Hair space | |||
[u(0x202F)] = "]" .. u(0x202F) .. "[", -- Narrow no-break space | |||
[u(0x205F)] = "]" .. u(0x205F) .. "[", -- Medium mathematical space | |||
[u(0x3000)] = "]" .. u(0x3000) .. "[", -- Ideographic space | |||
} | |||
-- Valid URI schemes in external links, which therefore have to be escaped if used in entry names (e.g. [[sms:a]]). | |||
local uri_schemes = { | |||
"bitcoin:", | |||
"ftp://", | |||
"ftps://", | |||
"geo:", | |||
"git://", | |||
"gopher://", | |||
"http://", | |||
"https://", | |||
"irc:", | |||
"ircs:", | |||
"magnet:", | |||
"mailto:", | |||
"matrix:", | |||
"mms://", | |||
"news:", | |||
"nntp://", | |||
"redis://", | |||
"sftp://", | |||
"sip:", | |||
"sips:", | |||
"sms:", | |||
"ssh://", | |||
"svn://", | |||
"tel:", | |||
"telnet://", | |||
"urn:", | |||
"worldwind://", | |||
"xmpp:", | |||
} | |||
-- Convert into lookup table. | |||
local uri_lookup = {} | |||
for _, scheme in ipairs(uri_schemes) do | |||
uri_lookup[scheme] = encode(scheme, ":") | |||
end | end | ||
data.uri_schemes = uri_lookup | |||
return data | return data |
Latest revision as of 17:05, 6 September 2023
Documentation for this module may be created at Module:Links/data/documentation
local encode = mw.text.encode
local u = mw.ustring.char
local data = {}
data.ignore_cap = {
["ko"] = true,
}
data.phonetic_extraction = {
["th"] = "Module:th",
["km"] = "Module:km",
}
data.pos_tags = {
["a"] = "adjective",
["adv"] = "adverb",
["int"] = "interjection",
["n"] = "noun",
["pron"] = "pronoun",
["v"] = "verb",
["vi"] = "intransitive verb",
["vt"] = "transitive verb",
["vti"] = "transitive and intransitive verb",
}
-- Scheme for using unsupported characters in titles.
data.unsupported_characters = {
["#"] = "`num`",
["%"] = "`percnt`", -- only escaped in percent encoding
["&"] = "`amp`", -- only escaped in HTML entities
["."] = "`period`", -- only escaped in dot-slash notation
["<"] = "`lt`",
[">"] = "`gt`",
["["] = "`lsqb`",
["]"] = "`rsqb`",
["_"] = "`lowbar`",
["`"] = "`grave`", -- used to enclose unsupported characters in the scheme, so a raw use in an unsupported title must be escaped to prevent interference
["{"] = "`lcub`",
["|"] = "`vert`",
["}"] = "`rcub`",
["~"] = "`tilde`", -- only escaped when 3 or more are consecutive
["\239\191\189"] = "`repl`" -- replacement character U+FFFD, which can't be typed directly here due to an abuse filter
}
-- Manually specified unsupported titles. Only put titles here if there is a different reason why they are unsupported, and not just because they contain one of the unsupported characters above.
data.unsupported_titles = {
[" "] = "Space",
["&"] = "`amp`amp;",
["λοπαδοτεμαχοσελαχογαλεοκρανιολειψανοδριμυποτριμματοσιλφιοκαραβομελιτοκατακεχυμενοκιχλεπικοσσυφοφαττοπεριστεραλεκτρυονοπτοκεφαλλιοκιγκλοπελειολαγῳοσιραιοβαφητραγανοπτερύγων"] = "Ancient Greek dish",
["กรุงเทพมหานคร อมรรัตนโกสินทร์ มหินทรายุธยา มหาดิลกภพ นพรัตนราชธานีบูรีรมย์ อุดมราชนิเวศน์มหาสถาน อมรพิมานอวตารสถิต สักกะทัตติยวิษณุกรรมประสิทธิ์"] = "Thai name of Bangkok",
[u(0x1680)] = "Ogham space",
[u(0x3000)] = "Ideographic space"
}
data.display_change = {
[" "] = "] [", -- Space
[u(0x00A0)] = "]" .. u(0x00A0) .. "[", -- No-break space
[u(0x180E)] = "]" .. u(0x180E) .. "[", -- Mongolian vowel separator
[u(0x2000)] = "]" .. u(0x2000) .. "[", -- En quad
[u(0x2001)] = "]" .. u(0x2001) .. "[", -- Em quad
[u(0x2002)] = "]" .. u(0x2002) .. "[", -- En space
[u(0x2003)] = "]" .. u(0x2003) .. "[", -- Em space
[u(0x2004)] = "]" .. u(0x2004) .. "[", -- Three-per-em space
[u(0x2005)] = "]" .. u(0x2005) .. "[", -- Four-per-em space
[u(0x2006)] = "]" .. u(0x2006) .. "[", -- Six-per-em space
[u(0x2007)] = "]" .. u(0x2007) .. "[", -- Figure space
[u(0x2008)] = "]" .. u(0x2008) .. "[", -- Punctuation space
[u(0x2009)] = "]" .. u(0x2009) .. "[", -- Thin space
[u(0x200A)] = "]" .. u(0x200A) .. "[", -- Hair space
[u(0x202F)] = "]" .. u(0x202F) .. "[", -- Narrow no-break space
[u(0x205F)] = "]" .. u(0x205F) .. "[", -- Medium mathematical space
[u(0x3000)] = "]" .. u(0x3000) .. "[", -- Ideographic space
}
-- Valid URI schemes in external links, which therefore have to be escaped if used in entry names (e.g. [[sms:a]]).
local uri_schemes = {
"bitcoin:",
"ftp://",
"ftps://",
"geo:",
"git://",
"gopher://",
"http://",
"https://",
"irc:",
"ircs:",
"magnet:",
"mailto:",
"matrix:",
"mms://",
"news:",
"nntp://",
"redis://",
"sftp://",
"sip:",
"sips:",
"sms:",
"ssh://",
"svn://",
"tel:",
"telnet://",
"urn:",
"worldwind://",
"xmpp:",
}
-- Convert into lookup table.
local uri_lookup = {}
for _, scheme in ipairs(uri_schemes) do
uri_lookup[scheme] = encode(scheme, ":")
end
data.uri_schemes = uri_lookup
return data