Module:Category tree/topic cat/data/Places

From The Languages of David J. Peterson
Jump to navigation Jump to search

Documentation for this module may be created at Module:Category tree/topic cat/data/Places/documentation

local labels = {}
local handlers = {}

local m_shared = require("Module:place/shared-data")
local m_strutils = require("Module:string utilities")

--[=[

This module contains specifications that are used to create labels that allow {{auto cat}} and
{{topic cat}} to create the appropriate definitions for topic categories for places (e.g.
'en:Waterfalls', 'de:Hokkaido', 'es:Cities in France', 'pt:Municipalities of Tocantins, Brazil',
etc.). Note that this module doesn't actually create the categories; that must be done manually,
with the text "{{auto cat}}" as the definition of the category. (This process should automatically
happen periodically for non-empty categories, because they will appear in [[Special:WantedCategories]]
and a bot will periodically examine that list and create any needed category.)

There are two ways that such labels are created: (1) by manually adding an entry to the 'labels'
table, keyed by the label (minus the language code) with a value consisting of a Lua table
specifying the description text and the category's parents; (2) through handlers (pieces of
Lua code) added to the 'handlers' list, which recognize labels of a specific type (e.g.
'Cities in France') and generate the appropriate specification for that label on-the-fly.
]=]

local function lcfirst(label)
	return mw.getContentLanguage():lcfirst(label)
end

labels["places"] = {
	description = "{{{langname}}} names for geographical [[Wiktionary:place|place]]s; [[Wiktionary:toponym|toponym]]s.",
	parents = {"names", "list of sets"},
}


-- general labels

-- Each entry is {LABEL, DESCRIPTION, PARENTS}.
local general_labels = {
	{"airports", "[[Wiktionary:airport|airport]]s", {"places"}},
	{"atolls", "[[Wiktionary:atoll|atoll]]s", {"islands"}},
	{"bays", "[[Wiktionary:bay|bay]]s", {"places", "water"}},
	{"beaches", "[[Wiktionary:beach|beach]]es", {"places", "water"}},
	{"boroughs", "[[Wiktionary:borough|borough]]s", {"polities"}},
	{"capital cities", "[[Wiktionary:capital|capital]] [[Wiktionary:city|cities]]: the [[Wiktionary:seat of government|seats of government]] for a country", {"cities"}},
	{"census-designated places", "[[Wiktionary:census-designated place|census-designated place]]s", {"places"}},
	{"cities", "[[Wiktionary:city|cities]], [[Wiktionary:town|town]]s and [[Wiktionary:village|village]]s of all sizes", {"polities"}},
	{"communities", "[[Wiktionary:community|communities]] of all sizes", {"polities"}},
	{"continents", "the [[Wiktionary:continents|continents]] of the world", {"places"}},
	{"countries", "[[Wiktionary:country|countries]]", {"polities"}},
	{"dependencies", "[[Wiktionary:dependency|dependencies]]", {"polities"}},
	{"deserts", "[[Wiktionary:desert|desert]]s", {"places"}},
	{"forests", "[[Wiktionary:forest|forest]]s", {"places"}},
	{"gulfs", "[[Wiktionary:gulf|gulf]]s", {"places", "water"}},
	{"headlands", "[[Wiktionary:headland|headland]]s", {"places"}},
	{"historical and traditional regions", "regions that have no administrative significance", {"places"}},
	{"historical political subdivisions", "[[Wiktionary:political|political]] [[Wiktionary:subdivision|subdivision]]s (states, provinces, counties, etc.) that no longer exist", {"polities"}},
	{"historical polities", "[[Wiktionary:polity|polities]] (countries, kingdoms, empires, etc.) that no longer exist", {"polities"}},
	{"hills", "[[Wiktionary:hill|hill]]s", {"places"}},
	{"islands", "[[Wiktionary:island|island]]s", {"places"}},
	{"kibbutzim", "[[Wiktionary:kibbutz|kibbutz]]im", {"places"}},
	{"lakes", "[[Wiktionary:lake|lake]]s", {"places", "water"}},
	{"landforms", "[[Wiktionary:landform|landform]]s", {"Earth"}},
	{"mountains", "[[Wiktionary:mountain|mountain]]s", {"places"}},
	{"moors", "[[Wiktionary:moor|moor]]s", {"places"}},
	{"neighborhoods", "[[Wiktionary:neighborhood|neighborhood]]s, [[Wiktionary:district|district]]s and other subportions of a [[Wiktionary:city|city]]", {"places"}},
	-- FIXME, is the following parent correct?
	{"oceans", "[[Wiktionary:ocean|ocean]]s", {"Seas"}},
	{"parks", "[[Wiktionary:park|park]]s", {"places"}},
	{"peninsulas", "[[Wiktionary:peninsula|peninsula]]s", {"places"}},
	{"plateaus", "[[Wiktionary:plateau|plateau]]s", {"places"}},
	{"political subdivisions", "[[Wiktionary:political|political]] [[Wiktionary:subdivision|subdivision]]s, such as [[Wiktionary:province|province]]s, [[Wiktionary:state|state]]s or [[Wiktionary:region|region]]s", {"polities"}},
	{"polities", "[[Wiktionary:polity|polities]] or [[Wiktionary:political|political]] [[Wiktionary:division|division]]s", {"places"}},
	{"provinces", "[[Wiktionary:province|province]]s", {"political subdivisions"}},
	{"rivers", "[[Wiktionary:river|river]]s", {"places", "water"}},
	{"rural municipalities", "[[Wikipedia:rural municipality|rural municipalities]]", {"political subdivisions"}},
	{"seas", "[[Wiktionary:sea|sea]]s", {"places", "water"}},
	{"straits", "[[Wiktionary:strait|strait]]s", {"places", "water"}},
	{"subdistricts", "[[Wiktionary:subdistrict|subdistrict]]s", {"polities"}},
	{"suburbs", "[[Wiktionary:suburb|suburb]]s of a [[Wiktionary:city|city]]", {"places"}},
	{"towns", "[[Wiktionary:town|town]]s", {"polities"}},
	{"townships", "[[Wiktionary:township|township]]s", {"polities"}},
	{"unincorporated communities", "[[Wiktionary:unincorporated|unincorporated]] [[Wiktionary:community|communities]]", {"places"}},
	{"valleys", "[[Wiktionary:valley|valley]]s", {"places", "water"}},
	{"villages", "[[Wiktionary:village|village]]s", {"polities"}},
	{"waterfalls", "[[Wiktionary:waterfall|waterfall]]s", {"landforms", "water"}},
}

for _, label_spec in ipairs(general_labels) do
	local label, desc, parents = unpack(label_spec)
	table.insert(parents, "list of sets")
	labels[label] = {
		description = "{{{langname}}} names of " .. desc .. ".",
		parents = parents,
	}
end

labels["city nicknames"] = {
	-- special-cased description
	description = "{{{langname}}} informal alternative names for [[Wiktionary:city|cities]] (e.g., [[Wiktionary:Big Apple|Big Apple]] for [[Wiktionary:New York City|New York City]]).",
	parents = {"cities", "list of sets"},
}

labels["exonyms"] = {
	-- special-cased description
	description = "{{{langname}}} [[Wiktionary:exonym|exonym]]s.",
	parents = {"places", "list of sets"},
}

-- Generate bare labels in 'label' for all polities.
for _, group in ipairs(m_shared.polities) do
	for key, value in pairs(group.data) do
		group.bare_label_setter(labels, group, key, value)
	end
end

-- Handler for "cities in the Bahamas", "rivers in Western Australia", etc.
-- Places that begin with "the" are recognized and handled specially.
table.insert(handlers, function(label)
	label = lcfirst(label)
	local place_type, place = label:match("^([a-z%- ]-) in (.*)$")
	if place_type and m_shared.generic_place_types[place_type] then
		for _, group in ipairs(m_shared.polities) do
			local placedata = group.data[place]
			if placedata then
				placedata = group.value_transformer(group, place, placedata)
				local spelling_matches = true
				if place_type == "neighborhoods" and placedata.british_spelling or
					place_type == "neighbourhoods" and not placedata.british_spelling then
					spelling_matches = false
				end
				if spelling_matches and not placedata.nocities then
					local parent
					if placedata.containing_polity then
						parent = place_type .. " in " .. placedata.containing_polity
					elseif place_type == "neighbourhoods" then
						parent = "neighborhoods"
					else
						parent = place_type
					end
					local bare_place, linked_place = m_shared.construct_bare_and_linked_version(place)
					local keydesc = placedata.keydesc or linked_place
					if place_type == "places" then
						parents1 = {{name = parent, sort = bare_place}, bare_place, "list of sets"}
					else
						parents1 = {{name = parent, sort = bare_place}, bare_place, "list of sets", "places in " .. place}
					end
					return {
						description = "{{{langname}}} names of " .. m_shared.generic_place_types[place_type] .. " in " .. keydesc .. ".",
						parents = parents1
					}
				end
			end
		end
	end
end)

-- Handler for "provinces of the Philippines", "counties of Wales", etc.
-- Places that begin with "the" are recognized and handled specially.
table.insert(handlers, function(label)
	label = lcfirst(label)
	local place_type, place = label:match("^([a-z%- ]-) of (.*)$")
	if place then
		for _, group in ipairs(m_shared.polities) do
			local placedata = group.data[place]
			if placedata then
				placedata = group.value_transformer(group, place, placedata)
				local divcat = nil
				if placedata.poldiv then
					for _, div in ipairs(placedata.poldiv) do
						if place_type == div then
							divcat = "poldiv"
							break
						end
					end
				end
				if not divcat and placedata.miscdiv then
					for _, div in ipairs(placedata.miscdiv) do
						if place_type == div then
							divcat = "miscdiv"
							break
						end
					end
				end
				if divcat then
					local linkdiv = m_shared.political_subdivisions[place_type]
					if not linkdiv then
						error("Saw unknown place type '" .. place_type .. "' in label '" .. label .. "'")
					end
					local bare_place, linked_place = m_shared.construct_bare_and_linked_version(place)
					local keydesc = placedata.keydesc or linked_place
					local desc = "{{{langname}}} names of " .. linkdiv .. " of " .. keydesc .. "."
					if divcat == "poldiv" then
						return {
							description = desc,
							parents = {{name = "political subdivisions", sort = bare_place}, bare_place, "list of sets"},
						}
					else
						return {
							description = desc,
							parents = {bare_place, "list of sets"},
						}
					end
				end
			end
		end
	end
end)

-- Handler for "counties of Alabama", "parishes of Louisiana", etc.
table.insert(handlers, function(label)
	label = lcfirst(label)
	local county_type, state = label:match("^([a-z ]-) of (.*)$")
	if state then
		local state_desc = m_shared.us_states[state .. ", USA"]
		if state_desc then
			local expected_county_type = state_desc.county_type or "counties"
			local linked_county_type = m_shared.political_subdivisions[expected_county_type]
			if county_type == expected_county_type then
				return {
					description = "{{{langname}}} names of " .. linked_county_type .. " of [[Wiktionary:" .. state .. "|" .. state .. "]], a state of the [[Wiktionary:United States|United States]].",
					parents = {{name = "counties of the United States",
						sort = state}, state .. ", USA", "list of sets"},
				}
			end
		end
	end
end)

-- Handler for "county seats of Alabama", "parish seats of Louisiana", etc.
table.insert(handlers, function(label)
	label = lcfirst(label)
	local seat_type, state = label:match("^([a-z ]-) of (.*)$")
	if state then
		local state_desc = m_shared.us_states[state .. ", USA"]
		if state_desc then
			local expected_county_type = state_desc.county_type or "counties"
			local expected_seat_type = m_strutils.singularize(expected_county_type) .. " seats"
			local linked_seat_type = m_shared.political_subdivisions[expected_seat_type]
			if seat_type == expected_seat_type then
				return {
					description = "{{{langname}}} names of " .. linked_seat_type .. " of [[Wiktionary:" .. state .. "|" .. state .. "]], a state of the [[Wiktionary:United States|United States]].",
					parents = {{name = "county seats of the United States",
						sort = state}, state .. ", USA", "list of sets"},
				}
			end
		end
	end
end)

-- Handler for "municipalities of Tocantins, Brazil", etc.
table.insert(handlers, function(label)
	label = lcfirst(label)
	local state = label:match("^municipalities of (.*), Brazil$")
	if state and m_shared.brazilian_states[state .. ", Brazil"] then
		return {
			description = "{{{langname}}} names of [[Wiktionary:municipality|municipalities]] of [[Wiktionary:" .. state .. "|" .. state .. "]], a state of [[Wiktionary:Brazil|Brazil]].",
			parents = {{name = "municipalities of Brazil", sort = state}, state .. ", Brazil", "list of sets"},
		}
	end
end)

-- Handler for "municipalities of Cebu, Philippines", etc.
table.insert(handlers, function(label)
	label = lcfirst(label)
	local province = label:match("^municipalities of (.*), Philippines$")
	if province and m_shared.philippine_provinces[province .. ", Philippines"] then
		return {
			description = "{{{langname}}} names of [[Wiktionary:municipality|municipalities]] of [[Wiktionary:" .. province .. "|" .. province .. "]], a province of the [[Wiktionary:Philippines|Philippines]].",
			parents = {{name = "municipalities of the Philippines", sort = province}, province .. ", Philippines", "list of sets"},
		}
	end
end)

-- Handler for "municipalities of Upper Austria" and other Austrian states.
table.insert(handlers, function(label)
	label = lcfirst(label)
	local state = label:match("^municipalities of (.*)$")
	if state and m_shared.austrian_states[state] then
		return {
			description = "{{{langname}}} names of [[Wiktionary:municipality|municipalities]] of [[Wiktionary:" .. state .. "|" .. state .. "]], a state of [[Wiktionary:Austria|Austria]].",
			parents = {{name = "municipalities of Austria", sort = state}, state, "list of sets"},
		}
	end
end)

-- Handler for "municipalities of Ostrobothnia, Finland", etc.
table.insert(handlers, function(label)
	label = lcfirst(label)
	local region = label:match("^municipalities of (.*), Finland$")
	if region and m_shared.finnish_regions[region .. ", Finland"] then
		-- Need to construct bare and linked versions due to "the Åland Islands".
		local bare_region, linked_region = m_shared.construct_bare_and_linked_version(region)
		return {
			description = "{{{langname}}} names of [[Wiktionary:municipality|municipalities]] of " .. linked_region .. ", a region of [[Wiktionary:Finland|Finland]].",
			parents = {{name = "municipalities of Finland", sort = bare_region}, bare_region .. ", Finland", "list of sets"},
		}
	end
end)

labels["Hokkaido"] = {
	description = "{{{langname}}} terms related to [[Wiktionary:Hokkaido|Hokkaido]], a [[Wiktionary:prefecture|prefecture]] of [[Wiktionary:Japan|Japan]].",
	parents = {"Prefectures of Japan"},
}

-- "regions in (continent)", esp. for regions that span multiple countries

labels["regions in the world"] = { -- for multinational regions which do not fit neatly within one continent
	description = "{{{langname}}} names of [[Wiktionary:region|region]]s in the world (which do not fit neatly within one country or continent).",
	parents = {"places", "list of sets"},
}

labels["regions in Africa"] = {
	description = "{{{langname}}} names of [[Wiktionary:region|region]]s in Africa.",
	parents = {"Africa", "list of sets"},
}

labels["regions in the Americas"] = {
	description = "{{{langname}}} names of [[Wiktionary:region|region]]s in the Americas.",
	parents = {"America", "list of sets"},
}

labels["regions in Asia"] = {
	description = "{{{langname}}} names of [[Wiktionary:region|region]]s in Asia.",
	parents = {"Asia", "list of sets"},
}

labels["regions in Europe"] = { 
	description = "{{{langname}}} names of [[Wiktionary:region|region]]s in Europe.",
	parents = {"Europe", "list of sets"},
}

-- "countries in (continent)", "rivers in (continent)"

for _, continent in ipairs({"Africa", "Asia", "Central America", "Europe", "North America", "Oceania", "South America"}) do
	labels["countries in " .. continent] = {
		description = "{{{langname}}} names of [[Wiktionary:country|countries]] in [[Wiktionary:" .. continent .. "|" .. continent .. "]].",
		parents = {{name = "countries", sort = continent}, continent, "list of sets"},
	}
	labels["rivers in " .. continent] = {
		description = "{{{langname}}} names of [[Wiktionary:river|river]]s in [[Wiktionary:" .. continent .. "|" .. continent .. "]].",
		parents = {{name = "rivers", sort = continent}, continent, "list of sets"},
	}
end

-- autonomous communities, oblasts, etc

labels["autonomous communities of Spain"] = {
	-- special-cased description
	description = "{{{langname}}} names of the [[Wikipedia:Autonomous communities of Spain|autonomous communities of Spain]].",
	parents = {{name = "political subdivisions", sort = "Spain"}, "Spain", "list of sets"},
}

-- boroughs

labels["boroughs in England"] = {
	description = "{{{langname}}} names of boroughs, local government districts and unitary authorities in [[Wiktionary:England|England]].", 
	parents = {{name = "boroughs", sort = "England"}, "England", "list of sets"},
}

labels["boroughs in Pennsylvania"] = {
	description = "{{{langname}}} names of boroughs in [[Wiktionary:Pennsylvania|Pennsylvania]].",
	parents = {{name = "boroughs in the United States", sort = "Pennsylvania"}, "Pennsylvania, USA", "list of sets"},
}

labels["boroughs in New Jersey"] = {
	description = "{{{langname}}} names of boroughs in [[Wiktionary:New Jersey|New Jersey]].",
	parents = {{name = "boroughs in the United States", sort = "New Jersey"}, "New Jersey, USA", "list of sets"},
}

labels["boroughs in New York City"] = {
	description = "{{{langname}}} names of boroughs in [[Wiktionary:New York City|New York City]].",
	parents = {{name = "boroughs in the United States", sort = "New York City"}, "New York City", "list of sets"},
}

labels["boroughs in the United States"] = {
	description = "{{{langname}}} names of [[Wiktionary:borough|borough]]s in the [[Wiktionary:United States|United States]].",
	-- parent is "boroughs" not "political subdivisions" and category says "in"
	-- not "of", because boroughs aren't really political subdivisions in the US
	-- (more like cities)
	parents = {{name = "boroughs", sort = "United States"}, "United States", "list of sets"},
}

-- census-designated places

labels["census-designated places in the United States"] = {
	description = "{{{langname}}} names of [[Wiktionary:census-designated place|census-designated place]]s in the [[Wiktionary:United States|United States]].",
	-- parent is just United States; census-designated places have no political
	-- status and exist only in the US, so no need for a top-level
	-- "census-designated places" category
	parents = {"United States", "list of sets"},
}

-- cities

labels["cities in Hokkaido"] = {
	-- special-cased description
	description = "{{{langname}}} names of cities in [[Wiktionary:Hokkaido|Hokkaido]] Prefecture.",
	parents = {{name = "cities in Japan", sort = "Hokkaido"}, "Hokkaido", "list of sets"},
}

labels["cities in Tokyo"] = {
	-- special-cased description
	description = "{{{langname}}} names of cities in [[Wiktionary:Tokyo|Tokyo]] Metropolis.",
	parents = {{name = "cities in Japan", sort = "Tokyo"}, "Tokyo", "list of sets"},
}

-- counties

labels["counties of Northern Ireland"] = {
	description = "{{{langname}}} names of the counties of [[Wiktionary:Northern Ireland|Northern Ireland]]",
	-- has two parents: "political subdivisions" and "counties of Ireland"
	parents = {{name = "political subdivisions", sort = "Northern Ireland"}, {name = "counties of Ireland", sort = "Northern Ireland"}, "Northern Ireland", "list of sets"},
}

--Canadian counties
-- only these five provinces have counties
for _, province in ipairs({"New Brunswick", "Nova Scotia", "Ontario", "Prince Edward Island", "Quebec"}) do
	labels["counties of " .. province] = {
		description = "default-set",
		parents = {{name ="counties of Canada", sort = province}, province, "list of sets"},
	}
end

-- municipalities

for _, province in ipairs({"Saskatchewan", "Manitoba", "Prince Edward Island"}) do
	labels["rural municipalities of " .. province] = {
		description = "{{{langname}}} names of [[Wikipedia:rural municipality|rural municipalities]] of [[Wiktionary:" .. province .. "|" .. province .. "]], a [[Wiktionary:province|province]] of [[Wiktionary:Canada|Canada]].", 
		parents = {{name = "rural municipalities", sort = province}, province, "list of sets"},
	}
end

-- regions and "regional units"

labels["regions of Greece"] = {
	-- special-cased description
	description = "{{{langname}}} names of the regions (peripheries) of [[Wiktionary:Greece|Greece]]",
	parents = {{name = "political subdivisions", sort = "Greece"}, "Greece", "list of sets"},
}

-- subdistricts and subprefectures

labels["subdistricts of Jakarta"] = {
	description = "default-set",
	-- not listed in the normal place because no categories like "cities in Jakarta"
	parents = {{name = "political subdivisions", sort = "Jakarta"}, "Indonesia", "list of sets"},
}

labels["subprefectures of Hokkaido"] = {
	description = "{{{langname}}} names of subprefectures of [[Wiktionary:Hokkaido|Hokkaido]] Prefecture.",
	parents = {{name = "subprefectures of Japan", sort = "Hokkaido"}, "Hokkaido", "list of sets"},
}

labels["subprefectures of Japan"] = {
	-- special-cased description
	description = "{{{langname}}} names of subprefectures of Japanese prefectures.",
	parents = {{name = "political subdivisions", sort = "Japan"}, "Japan", "list of sets"},
}

labels["subprefectures of Tokyo"] = {
	description = "{{{langname}}} names of subprefectures of [[Wiktionary:Tokyo|Tokyo]] Metropolis.",
	parents = {{name = "subprefectures of Japan", sort = "Tokyo"}, "Tokyo", "list of sets"},
}

-- towns and townships

labels["townships in Canada"] = {
	description = "{{{langname}}} names of townships in [[Wiktionary:Canada|Canada]].",
	parents = {{name = "townships", sort = "Canada"}, "Canada", "list of sets"},
}

labels["townships in Ontario"] = {
	description = "{{{langname}}} names of townships in [[Wiktionary:Ontario|Ontario]]. Municipalities in Ontario can be called as a city, a town, a township, or a village.",
	parents = {{name = "townships in Canada", sort = "Ontario"}, "Ontario", "list of sets"},
}

-- misc to be sorted; putting here so old module can be deleted

labels["special wards of Tokyo, Japan"] = {
	description = "{{{langname}}} names of special wards of [[Wiktionary:Tokyo|Tokyo]] Metropolis, [[Wiktionary:Japan|Japan]].",
	parents = {{name = "political subdivisions", sort = "Tokyo"}, "Tokyo", "list of sets"},
}

-- temporary while users adjust to recent changes, also kept in case of desire to use for its topical purpose, see description; can be removed later if unused

labels["place names"] = {
	description = "{{{langname}}} terms like ''hydronym'', for names for geographical [[Wiktionary:place|place]]s.",
	parents = {"names", "list of sets"},
}

return {LABELS = labels, HANDLERS = handlers}