Module:Syllables: Difference between revisions
Jump to navigation
Jump to search
No edit summary |
m (Text replacement - "(\[\[)w:([^\|\]\[]+?)(\]\])" to "\1Wikipedia:\2|\2\3") |
||
Line 16: | Line 16: | ||
}, | }, | ||
["ca"] = { -- Catalan has diphthongs, but they are generally transcribed using | ["ca"] = { -- Catalan has diphthongs, but they are generally transcribed using | ||
-- /w/ and /j/, so do not need to be listed (see [[ | -- /w/ and /j/, so do not need to be listed (see [[Wikipedia:Catalan language#Diphthongs and triphthongs|Catalan language#Diphthongs and triphthongs]]. | ||
}, | }, | ||
["cs"] = { -- [[ | ["cs"] = { -- [[Wikipedia:Czech phonology#Diphthongs|Czech phonology#Diphthongs]] | ||
"[aeo]u", | "[aeo]u", | ||
}, | }, | ||
Line 31: | Line 31: | ||
"[aʌ][ʊɪ]ə", -- May be a disyllabic sequence in some or all dialects? | "[aʌ][ʊɪ]ə", -- May be a disyllabic sequence in some or all dialects? | ||
}, | }, | ||
["fi"] = { -- [[ | ["fi"] = { -- [[Wikipedia:Finnish phonology#Diphthongs|Finnish phonology#Diphthongs]] | ||
"[ɑeouyæø]i", | "[ɑeouyæø]i", | ||
"[ɑoei]u", | "[ɑoei]u", | ||
Line 40: | Line 40: | ||
}, | }, | ||
["fr"] = { -- French has diphthongs, but they are transcribed | ["fr"] = { -- French has diphthongs, but they are transcribed | ||
}, -- with semivowel symbols: [[ | }, -- with semivowel symbols: [[Wikipedia:French phonology#Glides and diphthongs|French phonology#Glides and diphthongs]]. | ||
["grc"] = { | ["grc"] = { | ||
"[aeyo]i", | "[aeyo]i", | ||
Line 50: | Line 50: | ||
"aː*[eo]", | "aː*[eo]", | ||
}, | }, | ||
["is"] = { -- [[ | ["is"] = { -- [[Wikipedia:Icelandic phonology#Vowels|Icelandic phonology#Vowels]] | ||
"[aeø][iɪy]", -- Wikipedia is oddly specific about the second element: ei and ai, but øɪ. | "[aeø][iɪy]", -- Wikipedia is oddly specific about the second element: ei and ai, but øɪ. | ||
"[ao]u", | "[ao]u", | ||
Line 74: | Line 74: | ||
["ru"] = { -- No diphthongs, properly speaking; sequences of a vowel and /j/ though. | ["ru"] = { -- No diphthongs, properly speaking; sequences of a vowel and /j/ though. | ||
}, | }, | ||
["sq"] = { -- [[ | ["sq"] = { -- [[Wikipedia:Albanian language#Vowels|Albanian language#Vowels]] doesn't mention anything about diphthongs. | ||
}, | }, | ||
["ug"] = { -- No diphthongs. | ["ug"] = { -- No diphthongs. |
Latest revision as of 04:59, 30 August 2024
- The following documentation is located at Module:Syllables/documentation. [edit]
- Useful links: subpage list • transclusions • testcases • sandbox
Testcases
{{#invoke:syllables|countVowels|/ɛː.e.lí.o͜i.o/}} {{#invoke:syllables|countVowels|/ˈhaɪweɪ/}} {{#invoke:syllables|countVowels|/ˈloʊɚ/}} {{#invoke:syllables|countVowels|/ænˈdreɪə/}} {{#invoke:syllables|countVowels|/ˈbaɪəʊ/}} {{#invoke:syllables|countVowels|/ˈɑːmiɑi̯sˌmɑjoi̯tus/}} {{#invoke:syllables|countVowels|/vr̩x/}} {{#invoke:syllables|countVowels|/vl̩k/}} {{#invoke:syllables|countVowels|/ˈvr̩ːba/}} {{#invoke:syllables|countVowels|/ˈɛi̯ərə(n)/}} {{#invoke:syllables|countVowels|/ˈleːu̯ə(n)/}}
- The text "/ɛː.e.lí.o͜i.o/" contains 6 vowels, 0 vowel sequences and 5 vowels or vowels and diphthongs.
- The text "/ˈhaɪweɪ/" contains 4 vowels, 0 vowel sequences and 4 vowels or vowels and diphthongs.
- The text "/ˈloʊɚ/" contains 3 vowels, 0 vowel sequences and 3 vowels or vowels and diphthongs.
- The text "/ænˈdreɪə/" contains 4 vowels, 0 vowel sequences and 4 vowels or vowels and diphthongs.
- The text "/ˈbaɪəʊ/" contains 4 vowels, 0 vowel sequences and 4 vowels or vowels and diphthongs.
- The text "/ˈɑːmiɑi̯sˌmɑjoi̯tus/" contains 8 vowels, 0 vowel sequences and 6 vowels or vowels and diphthongs.
- The text "/vr̩x/" contains 0 vowels, 0 vowel sequences and 0 vowels or vowels and diphthongs.
- The text "/vl̩k/" contains 0 vowels, 0 vowel sequences and 0 vowels or vowels and diphthongs.
- The text "/ˈvr̩ːba/" contains 1 vowels, 0 vowel sequences and 1 vowels or vowels and diphthongs.
- The text "/ˈɛi̯ərə(n)/" contains 4 vowels, 0 vowel sequences and 3 vowels or vowels and diphthongs.
- The text "/ˈleːu̯ə(n)/" contains 3 vowels, 0 vowel sequences and 2 vowels or vowels and diphthongs.
{{#invoke:syllables|countVowelsDiphthongs|en|/ˈhaɪweɪ/}} {{#invoke:syllables|countVowelsDiphthongs|en|/ˈloʊɚ/}} {{#invoke:syllables|countVowelsDiphthongs|en|/aɪˈdiə/}} {{#invoke:syllables|countVowelsDiphthongs|en|/ænˈdreɪə/}} {{#invoke:syllables|countVowelsDiphthongs|en|/ˈbaɪəʊ/}} {{#invoke:syllables|countVowelsDiphthongs|fi|/ˈɑːmiɑi̯sˌmɑjoi̯tus/}} {{#invoke:syllables|countVowelsDiphthongs|sk|/vr̩x/}} {{#invoke:syllables|countVowelsDiphthongs|sk|/vl̩k/}} {{#invoke:syllables|countVowelsDiphthongs|sk|/ˈvr̩ːba/}} {{#invoke:syllables|countVowelsDiphthongs|nl|/ˈɛi̯ərə(n)/}} {{#invoke:syllables|countVowelsDiphthongs|nl|/ˈleːu̯ə(n)/}}
- The text "/ˈhaɪweɪ/" contains 2 vowels or diphthongs.
- The text "/ˈloʊɚ/" contains 2 vowels or diphthongs.
- The text "/aɪˈdiə/" contains 2 vowels or diphthongs. – /iə/ is disyllabic sequence in GA but diphthong in NZ; unfortunately, no language code for dialect.
- The text "/ænˈdreɪə/" contains 2 vowels or diphthongs.
- The text "/ˈbaɪəʊ/" contains 0 vowels or diphthongs.
- The text "/ˈɑːmiɑi̯sˌmɑjoi̯tus/" contains 6 vowels or diphthongs.
- The text "/vr̩x/" contains 0 vowels or diphthongs.
- The text "/vl̩k/" contains 0 vowels or diphthongs.
- The text "/ˈvr̩ːba/" contains 1 vowel or diphthong.
- The text "/ˈɛi̯ərə(n)/" contains 4 vowels or diphthongs.
- The text "/ˈleːu̯ə(n)/" contains 3 vowels or diphthongs.
{{#invoke:syllables|countVowels2Test|en|/ˈhaɪweɪ/}} {{#invoke:syllables|countVowels2Test|en|/ˈloʊɚ/}} {{#invoke:syllables|countVowels2Test|en|/aɪˈdiə/}} {{#invoke:syllables|countVowels2Test|en|/ænˈdreɪə/}} {{#invoke:syllables|countVowels2Test|en|/ˈbaɪəʊ/}} {{#invoke:syllables|countVowels2Test|en|/avə(ʊ)ˈkeɪʃən/}} {{#invoke:syllables|countVowels2Test|en|/ˈflaʊə/}} {{#invoke:syllables|countVowels2Test|en|/ˈfaɪ̯ə/}} {{#invoke:syllables|countVowels2Test|fi|/ˈɑːmiɑi̯sˌmɑjoi̯tus/}} {{#invoke:syllables|countVowels2Test|sk|/vr̩x/}} {{#invoke:syllables|countVowels2Test|sk|/vl̩k/}} {{#invoke:syllables|countVowels2Test|sk|/ˈvr̩ːba/}} {{#invoke:syllables|countVowels2Test|nl|/ˈɛi̯ərə(n)/}} {{#invoke:syllables|countVowels2Test|nl|/ˈleːu̯ə(n)/}}
- The text "/ˈhaɪweɪ/" contains 2 vowels.
- The text "/ˈloʊɚ/" contains 2 vowels.
- The text "/aɪˈdiə/" contains 2 vowels. – /iə/ is disyllabic sequence in GA but diphthong in NZ; unfortunately, no language code for dialect.
- The text "/ænˈdreɪə/" contains 3 vowels.
- The text "/ˈbaɪəʊ/" contains 2 vowels.
- The text "/avə(ʊ)ˈkeɪʃən/" contains 4 vowels.
- The text "/ˈflaʊə/" contains 2 vowels.
- The text "/ˈfaɪ̯ə/" contains 2 vowels.
- The text "/ˈɑːmiɑi̯sˌmɑjoi̯tus/" contains 6 vowels.
- The text "/vr̩x/" contains 1 vowels.
- The text "/vl̩k/" contains 1 vowels.
- The text "/ˈvr̩ːba/" contains 2 vowels.
- The text "/ˈɛi̯ərə(n)/" contains 3 vowels.
- The text "/ˈleːu̯ə(n)/" contains 2 vowels.
local export = {}
local vowels = mw.loadData("Module:IPA/data/symbols").vowels .. "ᵻ" .. "ᵿ"
local U = mw.ustring.char
--[[
Add diphthongs to the list if they do not contain semivowel symbols: /j w ɰ ɥ/.
Do not include non-syllabic diacritics: /au/, not /au̯/.
The module automatically does not count vowels with non-syllabic diacritics.
]]--
local diphthongs = {
["de"] = {
"a[ɪʊ]",
"ɔ[ʏɪ]",
},
["ca"] = { -- Catalan has diphthongs, but they are generally transcribed using
-- /w/ and /j/, so do not need to be listed (see [[Wikipedia:Catalan language#Diphthongs and triphthongs|Catalan language#Diphthongs and triphthongs]].
},
["cs"] = { -- [[Wikipedia:Czech phonology#Diphthongs|Czech phonology#Diphthongs]]
"[aeo]u",
},
["en"] = { -- from [[Appendix:English pronunciation]] mostly, but /ʌɪ/ is from the OED
"[aɑeɛoɔʌ][ɪi]",
"[ɑɒæo]e",
"[əɐ]ʉ",
"[aɒəoɔʌ]ʊ",
"æo",
"[ɛeɪiɔʊʉ]ə", -- /iə/ is a diphthong in NZE, but a disyllabic sequence in GA.
-- /ɪə/ is both a disyllabic sequence and a diphthong in old-fashioned RP.
"[aʌ][ʊɪ]ə", -- May be a disyllabic sequence in some or all dialects?
},
["fi"] = { -- [[Wikipedia:Finnish phonology#Diphthongs|Finnish phonology#Diphthongs]]
"[ɑeouyæø]i",
"[ɑoei]u",
"[eiæø]y",
"uo",
"ie",
"yø",
},
["fr"] = { -- French has diphthongs, but they are transcribed
}, -- with semivowel symbols: [[Wikipedia:French phonology#Glides and diphthongs|French phonology#Glides and diphthongs]].
["grc"] = {
"[aeyo]i",
"[ae]u",
"[ɛɔa]ː[iu]",
},
["hval"] = {
"[iu][aeo]ː*",
"aː*[eo]",
},
["is"] = { -- [[Wikipedia:Icelandic phonology#Vowels|Icelandic phonology#Vowels]]
"[aeø][iɪy]", -- Wikipedia is oddly specific about the second element: ei and ai, but øɪ.
"[ao]u",
},
["it"] = {
"[aeɛoɔu]i",
"[aeɛioɔ]u",
},
["la"] = {
"[eaou]i",
"[eao]u",
"[ao]e",
},
["lb"] = {
"[iu]ə",
"[ɜoæɑ]ɪ",
"[əæɑ]ʊ",
},
["sl"] = { -- No diphthongs, properly speaking; sequences of a vowel, /j/ and /w/ though
},
["sk"] = { -- Slovak has rising diphthongs, /i̯e, i̯a, i̯u, u̯o/, which are probably always spelled with the nonsyllabic diacritic, so do not need to be listed.
},
["ru"] = { -- No diphthongs, properly speaking; sequences of a vowel and /j/ though.
},
["sq"] = { -- [[Wikipedia:Albanian language#Vowels|Albanian language#Vowels]] doesn't mention anything about diphthongs.
},
["ug"] = { -- No diphthongs.
},
}
--[[ No use for this at the moment, though it is an interesting catalogue.
It might be usable for phonetic transcriptions.
Diacritics added to vowels:
inverted breve above, inverted breve below,
up tack, down tack,
left tack, right tack,
diaeresis (above), diaeresis below,
right half ring, left half ring,
plus sign below, minus sign below,
combining x above, rhotic hook,
tilde (above), tilde below
ligature tie (combining double breve), ligature tie below
]]
local diacritics = U(0x311)..U(0x32F)
..U(0x31D)..U(0x31E)
..U(0x318)..U(0x319)
..U(0x308)..U(0x324)
..U(0x339)..U(0x31C)
..U(0x31F)..U(0x320)
..U(0x33D)..U(0x2DE)
..U(0x303)..U(0x330)
..U(0x361)..U(0x35C)
--[[
combining acute and grave tone marks, circumflex
]]--
local tone = "[" .. U(0x341) .. U(0x340) ..U (0x302) .. "]"
local nonsyllabicDiacritics = U(0x311) .. U(0x32F)
local syllabicDiacritics = U(0x0329) .. U(0x030D)
local ties = U(0x361) .. U(0x35C)
-- long, half-long, extra short
local lengthDiacritics = U(0x2D0) .. U(0x2D1) .. U(0x306)
local vowel = "[" .. vowels .. "]" .. tone .. "?"
local tie = "[" .. ties .. "]"
local nonsyllabicDiacritic = "[" .. nonsyllabicDiacritics .. "]"
local syllabicDiacritic = "[" .. syllabicDiacritics .. "]"
local UTF8Char = "[\1-\127\194-\244][\128-\191]*"
function export.getVowels(remainder, lang)
if string.find(remainder, "^[%\[/]?%-") or string.find(remainder, "%-[%\[/]?$") then
return nil
end -- If a hyphen is at the beginning or end of the transcription, do not count syllables.
local count = 0
local diphs = diphthongs[lang:getCode()] or {}
remainder = mw.ustring.toNFD(remainder)
remainder = string.gsub(remainder, "%((.*)%)", "%1") -- Remove parentheses.
while remainder ~= "" do
-- Ignore nonsyllabic vowels
remainder = mw.ustring.gsub(remainder, "^" .. vowel .. nonsyllabicDiacritic, "")
local m =
mw.ustring.match(remainder, "^." .. syllabicDiacritic) or -- Syllabic consonant
mw.ustring.match(remainder, "^" .. vowel .. tie .. vowel) -- Tie bar
-- Starts with a recognised diphthong?
for _, diph in ipairs(diphs) do
if m then
break
end
m = m or mw.ustring.match(remainder, "^" .. diph)
end
-- If we haven't found anything yet, just match on a single vowel
m = m or mw.ustring.match(remainder, "^" .. vowel)
if m then
-- Found a vowel, add it
count = count + 1
remainder = string.sub(remainder, #m + 1)
else
-- Found a non-vowel, skip it
remainder = string.gsub(remainder, "^" .. UTF8Char, "")
end
end
if count ~= 0 then return count end
return nil
end
function export.countVowels2Test(frame)
local params = {
[1] = {required = true},
[2] = {default = ""},
}
local args = require("Module:parameters").process(frame.args, params)
local lang = require("Module:languages").getByCode(args[1]) or require("Module:languages").err(args[1], 1)
local count = export.getVowels(args[2], lang)
return 'The text "' .. args[2] .. '" contains ' .. count .. ' vowels.'
end
local function countVowels(text)
text = mw.ustring.toNFD(text) or error("Invalid UTF-8")
local _, count = mw.ustring.gsub(text, vowel, "")
local _, sequenceCount = mw.ustring.gsub(text, vowel.."+", "")
local _, nonsyllabicCount = mw.ustring.gsub(text, vowel .. nonsyllabicDiacritic, "")
local _, tieCount = mw.ustring.gsub(text, vowel .. tie .. vowel, "")
local diphthongCount = count - (nonsyllabicCount + tieCount)
return count, sequenceCount, diphthongCount
end
local function countDiphthongs(text, lang)
text = mw.ustring.toNFD(text) or error("Invalid UTF-8")
local diphthongs = diphthongs[lang:getCode()] or {}
local _, count
local total = 0
if diphthongs then
for i, diphthong in pairs(diphthongs) do
_, count = mw.ustring.gsub(text, diphthong, "")
total = total + count
end
end
return total
end
-- Used by [[Module:IPA]]
function export.hasDiphthongs(lang)
if (diphthongs[lang:getCode()]) then
return true
end
return false
end
function export.countVowels(frame)
local params = {
[1] = {default = ""},
}
local args = require("Module:parameters").process(frame.args, params)
local count, sequenceCount, diphthongCount = countVowels(args[1])
local outputs = {}
table.insert(outputs, (count or 'an unknown number of') .. ' vowels')
table.insert(outputs, (sequenceCount or 'an unknown number of') .. ' vowel sequences')
table.insert(outputs, (diphthongCount or 'an unknown number of') .. ' vowels or vowels and diphthongs')
return 'The text "' .. args[1] .. '" contains ' .. mw.text.listToText(outputs) .. "."
end
function export.countVowelsDiphthongs(frame)
local params = {
[1] = {required = true},
[2] = {default = ""},
}
local args = require("Module:parameters").process(frame.args, params)
local lang = require("Module:languages").getByCode(args[1]) or require("Module:languages").err(args[1], 1)
local vowels = countVowels(args[2])
local count = vowels - countDiphthongs(args[2], lang) or 0
local out = 'The text "' .. args[2] .. '" contains ' .. (count or 'an unknown number of')
if count == 1 then
out = out .. ' vowel or diphthong.'
else
out = out .. ' vowels or diphthongs.'
end
return out
end
return export