Module:Languages/data

From The Languages of David J. Peterson
Jump to navigation Jump to search

Documentation for this module may be created at Module:Languages/data/documentation

local u = mw.ustring.char

-- UTF-8 encoded strings for some commonly-used diacritics
local GRAVE     = u(0x0300)
local ACUTE     = u(0x0301)
local CIRC      = u(0x0302)
local TILDE     = u(0x0303)
local MACRON    = u(0x0304)
local BREVE     = u(0x0306)
local DOTABOVE  = u(0x0307)
local DIAER     = u(0x0308)
local CARON     = u(0x030C)
local DGRAVE    = u(0x030F)
local INVBREVE  = u(0x0311)
local DOTBELOW  = u(0x0323)
local RINGBELOW = u(0x0325)
local CEDILLA   = u(0x0327)
local OGONEK    = u(0x0328)

-- Puncuation to be used for standardChars field
local PUNCTUATION = ' !#$%&*+,-./:;<=>?@^_`|~\'()'

local Cyrl = {"Cyrl"}
local Latn = {"Latn"}
local LatnArab = {"Latn", "Arab"}

local m = {}

-- DJP Languages
m["afat"] = {
	"Afata",
	"Q999999017",
	"atha",
	otherNames = {"Modern Afata", "Afatan"},
	scripts = Latn,
	ancestors = {"veda"},
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâå]", "[ëèéê]", "[ïìíî]", "[öòóô]", "[üùúû]" },
		to   = {"a"	 ,  "e",   "i"	 , "o"	 , "u"	 }} ,
	standardChars = "A-Za-z0-9ÁáÉéÍíÓóÖöÜüÚú" .. PUNCTUATION,
}

m["asgr"] = {
	"Asgardian",
	"Q99999900A",
	scripts = Latn,
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâå]", "[ëèéê]", "[ïìíî]", "[öòóô]", "[üùúû]" },
		to   = {"a"	 ,  "e",   "i"	 , "o"	 , "u"	 }} ,
	standardChars = "A-Za-z0-9ÁáÉéÍíÓóÖöÜüÚú" .. PUNCTUATION,
}

m["asta"] = {
	"Astapori Valyrian",
	"Q999999005",
	"valy-high",
	otherNames = {"Slaver's Bay Valyrian", "Bastard Valyrian", "Valyrian", "Low Valyrian"},
	scripts = Latn,
	ancestors = {"hval"},
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâå]", "[ëèéê]", "[ïìíî]", "[öòóô]", "[üùúû]" },
		to   = {"a"	 ,  "e",   "i"	 , "o"	 , "u"	 }} ,
	standardChars = "A-Za-z0-9ÁáÉéÍíÓóÚú" .. PUNCTUATION,
}

m["azra"] = {
	"Azrán",
	"Q999999030",
	"roa-ibe",
	otherNames = {"Azran", "Pilgrim"},
	scripts = Latn,
	ancestors = {"es"},
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâå]", "[ëèéê]", "[ïìíî]", "[öòóô]", "[üùúû]" },
		to   = {"a"	  , "e"	 , "i"	 , "o"	 , "u"	 }} ,
	standardChars = "A-Za-z0-9ÁáÂâÉéÊêÍíÎîÓóÔôÚúÛû" .. PUNCTUATION,
}

m["bodz"] = {
	"Bodzvokhan",
	"Q999999002",
	"orci",
	otherNames = {"Orcish", "Orkish", "Orc"},
	scripts = Latn,
	ancestors = {"sung"},
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâå]", "[ëèéêǝ]", "[ïìíî]", "[öòóô]", "[üùúû]" },
		to   = {"a"	  , "e"	 , "i"	 , "o"	 , "u"	 }} ,
	standardChars = "A-Za-z0-9ÖöÜüƎǝ" .. PUNCTUATION,
}

m["cast"] = {
	"Castithan",
	"Q999999010",
	"kast",
	otherNames = {"Casti", "Kastithanu"},
	scripts = Latn,
	ancestors = {"ocst"},
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâå]", "[ëèéêǝ]", "[ïìíî]", "[öòóô]", "[üùúû]" },
		to   = {"a"	  , "e"	 , "i"	 , "o"	 , "u"	 }} ,
	standardChars = "A-Za-z0-9ÁáÉéÍíÓóÚú" .. PUNCTUATION,
}

m["doth"] = {
	"Dothraki",
	"Q999999007",
	"plai",
	otherNames = {"Lekh Dothraki", "Modern Dothraki"},
	scripts = Latn,
	ancestors = {"ppla"},
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâå]", "[ïìíî]", "[öòóô]", "[üùúû]" },
		to   = {"a"	 , "i"	 , "o"	 , "u"	 }} ,
	standardChars = "A-Za-z0-9ÁáÍíÓóÚú" .. PUNCTUATION,
}

m["fjer"] = {
	"Fjerdan",
	"x",
	"x",
	scripts = Latn, 
	case_insensitive = true
}

m["gand"] = {
	"Gandal",
	"Q999999016",
	"atha",
	otherNames = {"Modern Gandal", "Human"},
	scripts = Latn,
	ancestors = {"veda"},
	case_insensitive = true
}

m["gern"] = {
	"Gerna Mohr",
	otherNames = {"The Song of the Earth", "Gerna Mossha"},
	"x",
	"x",
	scripts = Latn, 
	case_insensitive = true
}

m["gvun"] = {
	"G'Vunna",
	"Q999999018",
	"atha",
	otherNames = {"G'Vunnǝ", "Lokheim", "Gvunna", "Gvunnǝ"},
	scripts = Latn,
	ancestors = {"veda"},
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâå]", "Ǝ", "[ëèéêǝ]", "[ïìíî]", "[öòóô]", "[üùúû]" },
		to   = {"a"	, "E", "e"	 , "i"	 , "o"	 , "u"	 }} ,
	standardChars = "A-Za-z0-9ÖöÜüƎǝ" .. PUNCTUATION,
}

m["henl"] = {
	"Hen Linge",
	"Q999999021",
	"elve",
	otherNames = {"Hen Llinge", "Elder", "Elder Speech", "Elder Tongue", "Elven Tongue", "Elvish"},
	scripts = Latn,
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâå]", "[ïìíî]", "[öòóô]", "[üùúû]" },
		to   = {"a"	 , "i"	 , "o"	 , "u"	 }} ,
	standardChars = "A-Za-z0-9ÁáÍíÓóÖöÚú" .. PUNCTUATION,
}

m["hval"] = {
	"High Valyrian",
	"Q999999004",
	"valy-high",
	otherNames = {"Valyrian", "Classical Valyrian", "Classic Valyrian"},
	scripts = Latn,
	ancestors = {"oval"},
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâåā]", "[ëèéêē]", "[ïìíîī]", "ñ", "[öòóôō]", "[üùúûū]", "[ÿȳ]" },
		to   = {"a"	  , "e"	 , "i"	 , "n", "o"	 , "u" , "y"	 }} ,
	standardChars = "A-Za-z0-9ĀāĒēĪīÑñŌōŪūȲȳ" .. PUNCTUATION,
}

m["indo"] = {
	"Indojisnen",
	"Q999999026",
	otherNames = {"Indogene"},
	scripts = Latn,
	case_insensitive = true
}

m["inha"] = {
	"Inha",
	"Q999999027",
	otherNames = {"Witch"},
	scripts = Latn,
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâå]", "[ïìíî]", "[ëèéêǝ]", "[öòóô]", "[üùúû]" },
		to   = {"a"	 , "i", "e"	 , "o"	 , "u"	 }} ,
	standardChars = "A-Za-z0-9ÁáÉéÍíÓóÚú" .. PUNCTUATION,
}

m["irat"] = {
	"Irathient",
	"x",
	"x",
	otherNames = {"L'Irathi"},
	scripts = Latn,
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâå]", "Ǝ", "[ëèéêǝ]", "[ïìíî]", "[öòóô]", "[üùúû]" },
		to   = {"a"	, "E", "e"	 , "i"	 , "o"	 , "u"	 }} ,
	standardChars = "A-Za-z0-9Ǝǝ" .. PUNCTUATION,
}

m["kama"] = {
	"Kamakawi",
	"x",
	"x",
	scripts = Latn,
	case_insensitive = true
}

m["kinu"] = {
	"Kinuk'aaz",
	"Q999999023",
	"omec",
	otherNames = {"Omec", "Enchanter"},
	scripts = Latn,
	ancestors = {"pkin"},
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâå]", "[ëèéê]", "[ïìíî]", "[öòóô]", "[üùúû]" },
		to   = {"a"	  , "e"	 , "i"	 , "o"	 , "u"	 }} ,
	standardChars = "A-Za-z0-9ÖöÜü" .. PUNCTUATION,
}

m["lish"] = {
	"Lishepus",
	"Q999999022",
	otherNames = {"Angelic"},
	scripts = Latn,
	ancestors = {"ine-pro", "afa-pro"},
	otherNames = {"Angelic"},
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâå]", "[ëèéê]", "[ïìíî]", "[öòóô]", "[üùúû]" },
		to   = {"a"	  , "e"	 , "i"	 , "o"	 , "u"	 }} ,
	standardChars = "A-Za-z0-9ÁáÂâÉéÊêÍíÎîÓóÔôÚúÛû" .. PUNCTUATION,
}

m["meer"] = {
	"Meereenese Valyrian",
	"Q999999006",
	"valy-high",
	otherNames = {"Slaver's Bay Valyrian", "Low Valyrian", "Bastard Valyrian", "Valyrian"},
	scripts = Latn,
	ancestors = {"hval"},
	case_insensitive = true
}

m["munj"] = {
	"Munja'kin",
	"Q999999013",
	"munc",
	otherNames = {"Munchkin"},
	scripts = Latn,
	ancestors = {"pmun"},
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâå]", "[ïìíî]", "[öòóô]", "[üùúû]" },
		to   = {"a"	 , "i"	 , "o"	 , "u"	 }} ,
	standardChars = "A-Za-z0-9ÁáÍíÓóÚú" .. PUNCTUATION,
}

m["noal"] = {
	"Noalath",
	"Q999999031",
	otherNames = {"Elvish", "Druid", "Druidic"},
	scripts = Latn,
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâå]", "[ëèéê]", "[ïìíî]", "[öòóôő]", "[üùúûű]" },
		to   = {"a"	  , "e"	 , "i"	 , "o"	 , "u"	 }} ,
	standardChars = "A-Za-z0-9ÁáÉéÍíÓóÔôÖöŐőÚúÛûÜüŰű" .. PUNCTUATION,
}

m["ocst"] = {
	"Old Castithan",
	"Q999999009",
	"kast",
	otherNames = {"Flood Form", "Pure Castithan", "Traditional Castithan"},
	scripts = Latn,
	ancestors = {"pcst"},
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâå]", "[ëèéêǝ]", "[ïìíî]", "[öòóô]", "[üùúû]" },
		to   = {"a"	  , "e"	 , "i"	 , "o"	 , "u"	 }} ,
	standardChars = "A-Za-z0-9ÁáÉéÍíÓóÚú" .. PUNCTUATION,
}

m["oval"] = {
	"Old Valyrian",
	"Q999999003",
	"valy",
	otherNames = {"Valyrian", "Ancient Valyrian"},
	scripts = Latn,
	case_insensitive = true
}

m["övüs"] = {
	"Övüsi",
	"x",
	"x",
	otherNames = {"Elvish", "Elf"},
	scripts = Latn,
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâå]", "[ëèéê]", "[ïìíî]", "[öòóô]", "[üùúû]" },
		to   = {"a"	  , "e"	 , "i"	 , "o"	 , "u"	 }} ,
	standardChars = "A-Za-z0-9ËëÏïÖöÜü" .. PUNCTUATION,
}

m["pcst"] = {
	"Proto-Castithan",
	"Q999999011",
	"kast",
	otherNames = {"Ancient Castithan", "Classical Castithan"},
	type = "reconstructed",
	scripts = Latn,
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâåāʔ]", "[ɓ]", "[ðɗɖ]", "[gɡɠɣɢ]", "[ŋɴ]", "[ʂ]", "[θƭ]", "[ʐʒ]" },
		to   = {"a", "b", "d"	  , "g", "n", "s", "t", "z"	 }} ,
	standardChars = "A-Za-z0-9ɡʔ" .. PUNCTUATION,
}

m["pkin"] = {
	"Proto-Kinuk'aaz",
	"Q999999024",
	"omec",
	otherNames = {"Omec", "Ancient Omec", "Old Omec", "Old Kinuk'aaz", "Ancient Kinuk'aaz"},
	type = "reconstructed",
	scripts = Latn,
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâåāʔ]", "[gɡɣ]" },
		to   = {"a"	  , "g"	 }} ,
	standardChars = "A-Za-z0-9ɡʔ" .. PUNCTUATION,
}

m["pmun"] = {
	"Proto-Munja'kin",
	"Q999999012",
	"munc",
	otherNames = {"Munchkin", "Old Munja'kin", "Proto-Munchkin"},
	type = "reconstructed",
	scripts = Latn,
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâåā]", "[gɡɣ]", "[šʃ]", "[žʒ]" },
		to   = {"a"	  , "g", "s", "z"	 }} ,
	standardChars = "A-Za-z0-9ɡɣʃʒʔ" .. PUNCTUATION,
}

m["ppla"] = {
	"Proto-Plains",
	"Q999999008",
	"plai",
	otherNames = {"Old Dothraki", "Old Lhazareen", "Plains"},
	type = "reconstructed",
	scripts = Latn,
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâåāʔ]", "[gɡɣ]" },
		to   = {"a"	  , "g"	 }} ,
	standardChars = "A-Za-z0-9ɡʔ" .. PUNCTUATION,
}

m["psnd"] = {
	"Proto-Sondiv",
	"Q999999020",
	"atri",
	otherNames = {"Ancient Atrian", "Old Atrian", "Proto-Atrian"},
	type = "reconstructed",
	scripts = Latn,
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâåāʔ]", "[gɡɣ]" },
		to   = {"a"	  , "g"	 }} ,
	standardChars = "A-Za-z0-9ɡʔ" .. PUNCTUATION,
}

m["pved"] = {
	"Proto-Veda",
	"Q999999014",
	"atha",
	otherNames = {"Athanoran", "Old Veda", "Ancient Veda"},
	type = "reconstructed",
	scripts = Latn,
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâåāʔ]", "[gɡɣ]" },
		to   = {"a"	  , "g"	 }} ,
	standardChars = "A-Za-z0-9ɡʔ" .. PUNCTUATION,
}

m["ravk"] = {
	"Ravkan",
	"x",
	"x",
	otherNames = {"Modern Ravkan"},
	scripts = Latn, 
	case_insensitive = true
}

m["shiv"] = {
	"Shiväisith",
	"Q999999029",
	otherNames = {"Shiväisith language", "Dark Elf", "Elf", "Elvish", "Dark Elvish"},
	scripts = Latn, 
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâå]", "[ëèéê]", "[ïìíî]", "[öòóô]", "[üùúû]" },
		to   = {"a"	  , "e"	 , "i"	 , "o"	 , "u"	 }} ,
	standardChars = "A-Za-z0-9ÄäÖö" .. PUNCTUATION,
}

m["sond"] = {
	"Sondiv",
	"Q999999019",
	"atri",
	otherNames = {"Sondiv language", "Atrian"},
	scripts = Latn,
	ancestors = {"psnd"},
	case_insensitive = true
}

m["sung"] = {
	"Sungdin",
	"Q999999001",
	"orci",
	otherNames = {"Orcish", "Proto-Orcish", "Proto-Orc", "Proto-Bodzvokhan", "Old Bodzvokhan"},
	type = "reconstructed",
	scripts = Latn, 
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâåāʔ]", "[gɡɣɢ]" },
		to   = {"a"	  , "g"	 }} ,
	standardChars = "A-Za-z0-9ɡʔ" .. PUNCTUATION,
}

m["trig"] = {
	"Trigedasleng",
	"Q999999028",
	"gmw",
	otherNames = {"Grounder", "Trisleng"},
	scripts = Latn,
	ancestors = {"en"},
	case_insensitive = true
}

m["væyn"] = {
	"Væyne Zaanics",
	"Q999999025",
	otherNames = {"God's Gift"},
	scripts = Latn, 
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâåæ]", "[ëèéê]", "[ïìíî]", "[öòóô]", "[þ]", "[üùúû]" },
		to   = {"a"	  , "e"	 , "i"	 , "o", "t"	 , "u"	 }} ,
	standardChars = "A-Za-z0-9ÆæÞþ" .. PUNCTUATION,
}

m["veda"] = {
	"Veda",
	"Q999999015",
	"atha",
	otherNames = {"Pure Veda", "Pure Vedan", "Vedan"},
	scripts = Latn,
	ancestors = {"pved"},
	case_insensitive = true,
	sort_key = {
		from = {"Ǝ", "[əǝ]" },
		to   = {"E", "e" }} ,
	standardChars = "A-Za-z0-9Ǝǝ" .. PUNCTUATION,
}

m["yuli"] = {
	"Yulish",
	"Q999999032",
	otherNames = {"Elvish", "The Christmas Tongue"},
	scripts = Latn, 
	case_insensitive = true,
	sort_key = {
		from = {"[äàáâå]", "[ëèéê]", "[ïìíî]", "[öòóô]", "[üùúû]" },
		to   = {"a"	  , "e"	 , "i"	 , "o"	 , "u"	 }} ,
	standardChars = "A-Za-z0-9ÖöÜü" .. PUNCTUATION,
}

-- Added Proto-Afro-Asiatic and Proto-Indo-European from somewhere else cause I need 'em. -DJP
m["afa-pro"] = {
	"Proto-Afro-Asiatic",
	"Q269125",
	"afa",
	otherNames = {"Hamito-Semitic"},
	type = "reconstructed",
	scripts = {"Latinx"},
}

m["ine-pro"] = {
	"Proto-Indo-European",
	"Q37178",
	"ine",
	type = "reconstructed",
	scripts = {"Latinx"},
	sort_key = {
		from = {"[áā]", "[éēḗ]", "[íī]", "[óōṓ]", "[úū]", "ĺ", "ḿ", "ń", "ŕ", "ǵ" , "ḱ" , "ʰ", "ʷ", "₁", "₂", "₃", RINGBELOW, ACUTE, MACRON},
		to   = {"a"   , "e"	, "i"   , "o"	, "u"   , "l", "m", "n", "r", "g'", "k'", "¯h", "¯w", "1", "2", "3"}},
}


return m