Modul:languages
Dokumentasi untuk modul ini dapat dibuat di Modul:languages/doc
local export = {}
local function do_replacements(text, self, sc, replacement_data, function_name, recursed)
-- If there are language-specific substitutes given in the data module, use those.
if type(replacement_data) == "table" then
-- If a script is specified, run this function with the script-specific data before continuing.
local sc_code = sc:getCode()
if replacement_data[sc_code] then
text = do_replacements(text, self, sc, replacement_data[sc_code], function_name, true)
-- Hant, Hans and Hani don't sort differently, so add a special case to avoid having to specify each one separately.
elseif replacement_data.Han and (sc_code == "Hant" or sc_code == "Hans" or sc_code == "Hani") then
text = do_replacements(text, self, sc, replacement_data.Han, function_name, true)
end
-- Iterate over all strings in the "from" subtable, and gsub with the corresponding string in "to". We work with the NFD decomposed forms, as this simplifies many replacements.
if replacement_data.from then
text = sc:toFixedNFD(text)
local gsub
for i, from in ipairs(replacement_data.from) do
-- Check whether specific magic characters are present, as they rely on UTF-8 compatibility. If not, just use string.gsub. In most cases, doing this is faster than using mw.ustring.gsub every time.
if from:match("[%%.[%]*+%-?]") then gsub = mw.ustring.gsub else gsub = string.gsub end
text = gsub(text, sc:toFixedNFD(from), replacement_data.to[i] or "")
end
end
if replacement_data.remove_diacritics then
text = sc:toFixedNFD(text)
-- Convert any specified exceptions into PUA characters, to avoid having diacritics stripped. Uses the supplemetary PUA planes (U+FXXXX & U+10XXXX), to ensure that any characters in the BMP (U+0XXXX) or SMP (U+1XXXX) can be round-trip converted to PUA.
-- This will need to be reviewed if any characters in the SIP (U+2XXXX) or TIP (U+3XXXX) need to be processed by it, but as these planes are exclusively CJK characters as of 2022, this is unlikely to happen for the time being. However, it is unwise to start using non-PUA codepoints in the U+4XXXX-U+EXXXX range, as support for these is completely untested, so they may result in unpredictable behaviour.
if replacement_data.remove_exceptions then
local char, codepoint, len, substitute = mw.ustring.char, mw.ustring.codepoint, mw.ustring.len
for _,exception in ipairs(replacement_data.remove_exceptions) do
exception = sc:toFixedNFD(exception)
substitute = {codepoint(exception, 1, len(exception))}
for i, codepoint in ipairs(substitute) do substitute[i] = char(codepoint+0xF0000) end
text = text:gsub(exception, table.concat(substitute))
end
end
-- Strip diacritics. This must use mw.ustring.gsub, to ensure the character class is UTF-8 compatible.
text = mw.ustring.gsub(text, "[" .. replacement_data.remove_diacritics .. "]", "")
-- Convert any exceptions back.
if replacement_data.remove_exceptions then
for _,exception in ipairs(replacement_data.remove_exceptions) do
local char, codepoint, len, substitute = mw.ustring.char, mw.ustring.codepoint, mw.ustring.len
exception = sc:toFixedNFD(exception)
substitute = {codepoint(exception, 1, len(exception))}
for i, codepoint in ipairs(substitute) do substitute[i] = char(codepoint+0xF0000) end
text = text:gsub(table.concat(substitute), exception)
end
end
end
elseif type(replacement_data) == "string" then
-- If there is a dedicated function module, use that.
local is_module, module = pcall(require, "Module:" .. replacement_data)
if is_module then
text = module[function_name](sc:toFixedNFD(text), self:getCode(), sc:getCode())
-- If there is matching shared data, run this function again using that as the replacement data.
else
local m_shared = mw.loadData("Module:languages/shareddata")
if m_shared[replacement_data] then
return do_replacements(text, self, sc, m_shared[replacement_data], function_name, recursed)
else
error("Replacement data does not match any shared data or an existing module.")
end
end
end
if not recursed then
return mw.ustring.toNFC(text)
else
return text
end
end
local Language = {}
function Language:getCode()
return self._code
end
function Language:getCanonicalName()
return self._rawData[1] or self._rawData.canonicalName
end
function Language:getDisplayForm()
return self:getCanonicalName()
end
function Language:getOtherNames(onlyOtherNames)
self:loadInExtraData()
return require("Module:language-like").getOtherNames(self, onlyOtherNames)
end
function Language:getAliases()
self:loadInExtraData()
return self._extraData.aliases or {}
end
function Language:getVarieties(flatten)
self:loadInExtraData()
return require("Module:language-like").getVarieties(self, flatten)
end
function Language:getType()
return self._rawData.type or "regular"
end
function Language:getWikimediaLanguages()
if not self._wikimediaLanguageObjects then
local m_wikimedia_languages = require("Module:wikimedia languages")
self._wikimediaLanguageObjects = {}
local wikimedia_codes = self._rawData.wikimedia_codes or {self:getCode()}
for _, wlangcode in ipairs(wikimedia_codes) do
table.insert(self._wikimediaLanguageObjects, m_wikimedia_languages.getByCode(wlangcode))
end
end
return self._wikimediaLanguageObjects
end
function Language:getWikipediaArticle()
if self._rawData.wikipedia_article then
return self._rawData.wikipedia_article
elseif self._wikipedia_article then
return self._wikipedia_article
elseif self:getWikidataItem() and mw.wikibase then
self._wikipedia_article = mw.wikibase.sitelink(self:getWikidataItem(), 'enwiki')
end
if not self._wikipedia_article then
self._wikipedia_article = self:getCategoryName():gsub("Creole language", "Creole")
end
return self._wikipedia_article
end
function Language:makeWikipediaLink()
return "[[w:" .. self:getWikipediaArticle() .. "|" .. self:getCanonicalName() .. "]]"
end
function Language:getWikidataItem()
local item = self._rawData[2]
if type(item) == "number" then
return "Q" .. item
else
return item
end
end
function Language:getScripts()
if not self._scriptObjects then
local m_scripts = require("Module:scripts")
self._scriptObjects = {}
for _, sc in ipairs(self:getScriptCodes()) do
table.insert(self._scriptObjects, m_scripts.getByCode(sc))
end
end
return self._scriptObjects
end
function Language:getScriptCodes()
return self._rawData.scripts or self._rawData[4] or {"None"}
end
function Language:getFamily()
if self._familyObject then
return self._familyObject
end
local family = self._rawData[3] or self._rawData.family
if family then
self._familyObject = require("Module:families").getByCode(family)
end
return self._familyObject
end
function Language:getAncestors()
if not self._ancestorObjects then
self._ancestorObjects = {}
if self._rawData.ancestors then
for _, ancestor in ipairs(self._rawData.ancestors) do
table.insert(self._ancestorObjects, export.getByCode(ancestor) or require("Module:etymology languages").getByCode(ancestor))
end
else
local fam = self:getFamily()
local protoLang = fam and fam:getProtoLanguage() or nil
-- For the case where the current language is the proto-language
-- of its family, we need to step up a level higher right from the start.
if protoLang and protoLang:getCode() == self:getCode() then
fam = fam:getFamily()
protoLang = fam and fam:getProtoLanguage() or nil
end
while not protoLang and not (not fam or fam:getCode() == "qfa-not") do
fam = fam:getFamily()
protoLang = fam and fam:getProtoLanguage() or nil
end
table.insert(self._ancestorObjects, protoLang)
end
end
return self._ancestorObjects
end
local function iterateOverAncestorTree(node, func)
for _, ancestor in ipairs(node:getAncestors()) do
if ancestor then
local ret = func(ancestor) or iterateOverAncestorTree(ancestor, func)
if ret then
return ret
end
end
end
end
function Language:getAncestorChain()
if not self._ancestorChain then
self._ancestorChain = {}
local step = #self:getAncestors() == 1 and self:getAncestors()[1] or nil
while step do
table.insert(self._ancestorChain, 1, step)
step = #step:getAncestors() == 1 and step:getAncestors()[1] or nil
end
end
return self._ancestorChain
end
function Language:hasAncestor(otherlang)
local function compare(ancestor)
return ancestor:getCode() == otherlang:getCode()
end
return iterateOverAncestorTree(self, compare) or false
end
function Language:getCategoryName(nocap)
local name = self:getCanonicalName()
-- If the name already has "language" in it, don't add it.
if not name:find("[Ll]anguage$") then
name = name .. " language"
end
if not nocap then
name = mw.getContentLanguage():ucfirst(name)
end
return name
end
function Language:makeCategoryLink()
return "[[:Category:" .. self:getCategoryName() .. "|" .. self:getDisplayForm() .. "]]"
end
function Language:getStandardCharacters()
return self._rawData.standardChars
end
function Language:makeEntryName(text, sc)
if not sc or sc._type ~= "script object" then sc = require("Module:scripts").findBestScript(text, self) end
text = sc:toFixedNFD(text)
text = mw.text.unstrip(text)
text = mw.ustring.match(text, "^[¿¡]?(.-[^%s%p].-)%s*[؟?!;՛՜ ՞ ՟?!︖︕।॥။၊་།]?$") or text
text = text:gsub("", "") -- strip soft hyphens
return do_replacements(text, self, sc, self._rawData.entry_name, "makeEntryName")
end
-- Return true if the language has display processing enabled, i.e. lang:makeDisplayText()
-- does non-trivial processing.
function Language:hasDisplayProcessing()
return not not self._rawData.display_text
end
-- Apply display-text replacements to `text`, if any.
function Language:makeDisplayText(text, sc)
if not sc or sc._type ~= "script object" then sc = require("Module:scripts").findBestScript(text, self) end
text = mw.text.unstrip(text)
text = sc:toFixedNFD(text)
return do_replacements(text, self, sc, self._rawData.display_text, "makeDisplayText")
end
function Language:makeSortKey(text, sc)
if not sc or sc._type ~= "script object" then sc = require("Module:scripts").findBestScript(text, self) end
text = sc:toFixedNFD(text)
local gsub = mw.ustring.gsub
-- Remove initial hyphens and *.
text = mw.text.unstrip(text)
text = gsub(text, "^[-־ـ᠊*]+(.)", "%1")
-- For languages with dotted dotless i, ensure that "İ" is sorted as "i", and "I" is sorted as "ı".
if self._rawData.dotted_dotless_i then
text = text:gsub(mw.ustring.toNFD("İ"), "i")
text = text:gsub("I", "ı")
end
-- Convert to lowercase, make the sortkey, then convert to uppercase. Where the language has dotted dotless i, it is usually not necessary to convert "i" to "İ" and "ı" to "I" first, because "I" will always be interpreted as conventional "I" (not dotless "İ") by any sorting algorithms, which will have been taken into account by the sortkey substitutions themselves. However, if no sortkey substitutions have been specified, then conversion is necessary so as to prevent "i" and "ı" both being sorted as "I".
text = mw.ustring.lower(text)
text = do_replacements(text, self, sc, self._rawData.sort_key, "makeSortKey")
if self._rawData.dotted_dotless_i and not self._rawData.sort_key then
text = text:gsub("ı", "I")
text = text:gsub("i", "İ")
end
text = mw.ustring.upper(text)
-- Remove parentheses, as long as they are either preceded or followed by something.
text = gsub(text, "(.)[()]+", "%1")
text = gsub(text, "[()]+(.)", "%1")
return text
end
function Language:overrideManualTranslit()
return not not self._rawData.override_translit
end
function Language:transliterate(text, sc, module_override)
if not ((module_override or self._rawData.translit_module) and text) then
return nil
end
if module_override then
require("Module:debug").track("module_override")
end
-- Get the display form.
if not sc or sc._type ~= "script object" then sc = require("Module:scripts").findBestScript(text, self) end
text = self:makeDisplayText(text, sc)
return require("Module:" .. (module_override or self._rawData.translit_module)).tr(text, self:getCode(), sc:getCode())
end
function Language:hasTranslit()
return self._rawData.translit_module and true or false
end
function Language:link_tr()
return self._rawData.link_tr and true or false
end
function Language:toJSON()
local entryNamePatterns = nil
local entryNameRemoveDiacritics = nil
if self._rawData.entry_name then
entryNameRemoveDiacritics = self._rawData.entry_name.remove_diacritics
if self._rawData.entry_name.from then
entryNamePatterns = {}
for i, from in ipairs(self._rawData.entry_name.from) do
table.insert(entryNamePatterns, {from = from, to = self._rawData.entry_name.to[i] or ""})
end
end
end
local ret = {
ancestors = self._rawData.ancestors,
canonicalName = self:getCanonicalName(),
categoryName = self:getCategoryName("nocap"),
code = self:getCode(),
entryNamePatterns = entryNamePatterns,
entryNameRemoveDiacritics = entryNameRemoveDiacritics,
family = self._rawData[3] or self._rawData.family,
otherNames = self:getOtherNames(true),
aliases = self:getAliases(),
varieties = self:getVarieties(),
scripts = self._rawData.scripts or self._rawData[4],
type = self:getType(),
wikimediaLanguages = self._rawData.wikimedia_codes,
wikidataItem = self:getWikidataItem(),
}
return require("Module:JSON").toJSON(ret)
end
-- Do NOT use these methods!
-- All uses should be pre-approved on the talk page!
function Language:getRawData()
return self._rawData
end
function Language:getRawExtraData()
self:loadInExtraData()
return self._extraData
end
Language.__index = Language
function export.getDataModuleName(code)
if code:find("^%l%l$") then
return "languages/data2"
elseif code:find("^%l%l%l$") then
local prefix = code:sub(1, 1)
return "languages/data3/" .. prefix
elseif code:find("^[%l-]+$") then
return "languages/datax"
else
return nil
end
end
function export.getExtraDataModuleName(code)
if code:find("^%l%l$") then
return "languages/extradata2"
elseif code:find("^%l%l%l$") then
local prefix = code:sub(1, 1)
return "languages/extradata3/" .. prefix
elseif code:find("^[%l-]+$") then
return "languages/extradatax"
else
return nil
end
end
local function getRawLanguageData(code)
local modulename = export.getDataModuleName(code)
return modulename and mw.loadData("Module:" .. modulename)[code] or nil
end
local function getRawExtraLanguageData(code)
local modulename = export.getExtraDataModuleName(code)
return modulename and mw.loadData("Module:" .. modulename)[code] or nil
end
function Language:loadInExtraData()
if not self._extraData then
-- load extra data from module and assign to meta table
-- use empty table as a fallback if extra data is nil
local meta = getmetatable(self)
meta._extraData = getRawExtraLanguageData(self:getCode()) or {}
setmetatable(self, meta)
end
end
function export.makeObject(code, data)
if data and data.deprecated then
require("Module:debug").track {
"languages/deprecated",
"languages/deprecated/" .. code
}
end
return data and setmetatable({_rawData = data, _code = code, _type = "language object"}, Language) or nil
end
function export.getByCode(code, paramForError, allowEtymLang, allowFamily)
if type(code) ~= "string" then
error("The function getByCode expects a string as its first argument, but received " .. (code == nil and "nil" or "a " .. type(code)) .. ".")
end
local retval = export.makeObject(code, getRawLanguageData(code))
if not retval and allowEtymLang then
retval = require("Module:etymology languages").getByCode(code)
end
if not retval and allowFamily then
retval = require("Module:families").getByCode(code)
end
if not retval and paramForError then
require("Module:languages/errorGetBy").code(code, paramForError, allowEtymLang, allowFamily)
end
return retval
end
function export.getByName(name, errorIfInvalid)
local byName = mw.loadData("Module:languages/by name")
local code = byName.all and byName.all[name] or byName[name]
if not code then
if errorIfInvalid then
error("The language name \"" .. name .. "\" is not valid. See [[Wiktionary:List of languages]].")
else
return nil
end
end
return export.makeObject(code, getRawLanguageData(code))
end
function export.getByCanonicalName(name, errorIfInvalid, allowEtymLang, allowFamily)
local byName = mw.loadData("Module:languages/canonical names")
local code = byName and byName[name]
local retval = code and export.makeObject(code, getRawLanguageData(code)) or nil
if not retval and allowEtymLang then
retval = require("Module:etymology languages").getByCanonicalName(name)
end
if not retval and allowFamily then
local famname = name:match("^(.*) languages$")
famname = famname or name
retval = require("Module:families").getByCanonicalName(famname)
end
if not retval and errorIfInvalid then
require("Module:languages/errorGetBy").canonicalName(name, allowEtymLang, allowFamily)
end
return retval
end
--[[ If language is an etymology language, iterates through parent languages
until it finds a non-etymology language. ]]
function export.getNonEtymological(lang)
while lang:getType() == "etymology language" do
local parentCode = lang:getParentCode()
lang = export.getByCode(parentCode)
or require("Module:etymology languages").getByCode(parentCode)
or require("Module:families").getByCode(parentCode)
end
return lang
end
-- for backwards compatibility only; modules should require the /error themselves
function export.err(lang_code, param, code_desc, template_tag, not_real_lang)
return require("Module:languages/error")(lang_code, param, code_desc, template_tag, not_real_lang)
end
return export