Module:vi-sortkey
Appearance
- The following documentation is located at Module:vi-sortkey/documentation. [edit] Categories were auto-generated by Module:module categorization. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
This module will sort Vietnamese language text. It is also used to sort Muong and Nguôn.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{sortkey}}
.
Within a module, use Module:languages#Language:makeSortKey.
For testcases, see Module:vi-sortkey/testcases.
Functions
makeSortKey(text, lang, sc)
- Generates a sortkey for a given piece of
text
written in the script specified by the codesc
, and language specified by the codelang
. - When the sort fails, returns
nil
.
Alphabetic order: a ă â b c d đ e ê g h i k l m n o ô ơ p q r s t u ư v x y.
Order of tonal diacritics: currently a, à, ả, ã, á, ạ; alternative, a, á, à, ả, ã, ạ.
See the discussion in the Beer Parlour.
Examples
TUYE₂N¹ NGO₂N¹ TOAN² THE₂⁵ GIO₃I⁵ VE₂² NHA₃N¹ QUYE₂N² CUA³ LIE₂N¹ HO₃P⁶ QUO₂C⁵
- Tuyên ngôn toàn thế giới về nhân quyền của Liên Hợp Quốc
CO₂NG¹ BA₂NG²
- công bằng
D₂AI⁶
- Đại
AC-SI-ME⁵T⁵
- Ác-si-mét
TA₃M¹ LY⁵
- tâm lý
- nghề (
NGHE₂²
) - nghệ (
NGHE₂⁶
) - ngon (
NGON¹
) - ngón (
NGON⁵
) - ngọn (
NGON⁶
) - ngoy (
NGOY¹
) - ngôi (
NGO₂I¹
) - ngôn (
NGO₂N¹
) - ngôn (
NGO₂N¹
)
- a (
A¹
) - à (
A²
) - ả (
A³
) - ã (
A⁴
) - á (
A⁵
) - ạ (
A⁶
) - ac (
AC¹
) - àc (
AC²
) - ảc (
AC³
) - ãc (
AC⁴
) - ác (
AC⁵
) - ạc (
AC⁶
) - an (
AN¹
) - àn (
AN²
) - ản (
AN³
) - ãn (
AN⁴
) - án (
AN⁵
) - ạn (
AN⁶
) - ă (
A₂¹
) - ằ (
A₂²
) - ẳ (
A₂³
) - ẵ (
A₂⁴
) - ắ (
A₂⁵
) - ặ (
A₂⁶
) - ăc (
A₂C¹
) - ằc (
A₂C²
) - ẳc (
A₂C³
) - ẵc (
A₂C⁴
) - ắc (
A₂C⁵
) - ặc (
A₂C⁶
) - ăn (
A₂N¹
) - ằn (
A₂N²
) - ẳn (
A₂N³
) - ẵn (
A₂N⁴
) - ắn (
A₂N⁵
) - ặn (
A₂N⁶
) - â (
A₃¹
) - ầ (
A₃²
) - ẩ (
A₃³
) - ẫ (
A₃⁴
) - ấ (
A₃⁵
) - ậ (
A₃⁶
) - âc (
A₃C¹
) - ầc (
A₃C²
) - ẩc (
A₃C³
) - ẫc (
A₃C⁴
) - ấc (
A₃C⁵
) - ậc (
A₃C⁶
) - ân (
A₃N¹
) - ần (
A₃N²
) - ẩn (
A₃N³
) - ẫn (
A₃N⁴
) - ấn (
A₃N⁵
) - ận (
A₃N⁶
) - b (
B¹
) - bc (
BC¹
) - bn (
BN¹
) - c (
C¹
) - cc (
CC¹
) - cn (
CN¹
) - d (
D¹
) - dc (
DC¹
) - dn (
DN¹
) - đ (
D₂¹
) - đc (
D₂C¹
) - đn (
D₂N¹
) - e (
E¹
) - è (
E²
) - ẻ (
E³
) - ẽ (
E⁴
) - é (
E⁵
) - ẹ (
E⁶
) - ec (
EC¹
) - èc (
EC²
) - ẻc (
EC³
) - ẽc (
EC⁴
) - éc (
EC⁵
) - ẹc (
EC⁶
) - en (
EN¹
) - èn (
EN²
) - ẻn (
EN³
) - ẽn (
EN⁴
) - én (
EN⁵
) - ẹn (
EN⁶
) - ê (
E₂¹
) - ề (
E₂²
) - ể (
E₂³
) - ễ (
E₂⁴
) - ế (
E₂⁵
) - ệ (
E₂⁶
) - êc (
E₂C¹
) - ềc (
E₂C²
) - ểc (
E₂C³
) - ễc (
E₂C⁴
) - ếc (
E₂C⁵
) - ệc (
E₂C⁶
) - ên (
E₂N¹
) - ền (
E₂N²
) - ển (
E₂N³
) - ễn (
E₂N⁴
) - ến (
E₂N⁵
) - ện (
E₂N⁶
) - g (
G¹
) - gc (
GC¹
) - gn (
GN¹
) - h (
H¹
) - hc (
HC¹
) - hn (
HN¹
) - i (
I¹
) - ì (
I²
) - ỉ (
I³
) - ĩ (
I⁴
) - í (
I⁵
) - ị (
I⁶
) - ic (
IC¹
) - ìc (
IC²
) - ỉc (
IC³
) - ĩc (
IC⁴
) - íc (
IC⁵
) - ịc (
IC⁶
) - in (
IN¹
) - ìn (
IN²
) - ỉn (
IN³
) - ĩn (
IN⁴
) - ín (
IN⁵
) - ịn (
IN⁶
) - k (
K¹
) - kc (
KC¹
) - kn (
KN¹
) - l (
L¹
) - lc (
LC¹
) - ln (
LN¹
) - m (
M¹
) - mc (
MC¹
) - mn (
MN¹
) - n (
N¹
) - nc (
NC¹
) - nn (
NN¹
) - o (
O¹
) - ò (
O²
) - ỏ (
O³
) - õ (
O⁴
) - ó (
O⁵
) - ọ (
O⁶
) - oc (
OC¹
) - òc (
OC²
) - ỏc (
OC³
) - õc (
OC⁴
) - óc (
OC⁵
) - ọc (
OC⁶
) - on (
ON¹
) - òn (
ON²
) - ỏn (
ON³
) - õn (
ON⁴
) - ón (
ON⁵
) - ọn (
ON⁶
) - ô (
O₂¹
) - ồ (
O₂²
) - ổ (
O₂³
) - ỗ (
O₂⁴
) - ố (
O₂⁵
) - ộ (
O₂⁶
) - ôc (
O₂C¹
) - ồc (
O₂C²
) - ổc (
O₂C³
) - ỗc (
O₂C⁴
) - ốc (
O₂C⁵
) - ộc (
O₂C⁶
) - ôn (
O₂N¹
) - ồn (
O₂N²
) - ổn (
O₂N³
) - ỗn (
O₂N⁴
) - ốn (
O₂N⁵
) - ộn (
O₂N⁶
) - ơ (
O₃¹
) - ờ (
O₃²
) - ở (
O₃³
) - ỡ (
O₃⁴
) - ớ (
O₃⁵
) - ợ (
O₃⁶
) - ơc (
O₃C¹
) - ờc (
O₃C²
) - ởc (
O₃C³
) - ỡc (
O₃C⁴
) - ớc (
O₃C⁵
) - ợc (
O₃C⁶
) - ơn (
O₃N¹
) - ờn (
O₃N²
) - ởn (
O₃N³
) - ỡn (
O₃N⁴
) - ớn (
O₃N⁵
) - ợn (
O₃N⁶
) - p (
P¹
) - pc (
PC¹
) - pn (
PN¹
) - q (
Q¹
) - qc (
QC¹
) - qn (
QN¹
) - r (
R¹
) - rc (
RC¹
) - rn (
RN¹
) - s (
S¹
) - sc (
SC¹
) - sn (
SN¹
) - t (
T¹
) - tc (
TC¹
) - tn (
TN¹
) - u (
U¹
) - ù (
U²
) - ủ (
U³
) - ũ (
U⁴
) - ú (
U⁵
) - ụ (
U⁶
) - uc (
UC¹
) - ùc (
UC²
) - ủc (
UC³
) - ũc (
UC⁴
) - úc (
UC⁵
) - ục (
UC⁶
) - un (
UN¹
) - ùn (
UN²
) - ủn (
UN³
) - ũn (
UN⁴
) - ún (
UN⁵
) - ụn (
UN⁶
) - ư (
U₂¹
) - ừ (
U₂²
) - ử (
U₂³
) - ữ (
U₂⁴
) - ứ (
U₂⁵
) - ự (
U₂⁶
) - ưc (
U₂C¹
) - ừc (
U₂C²
) - ửc (
U₂C³
) - ữc (
U₂C⁴
) - ức (
U₂C⁵
) - ực (
U₂C⁶
) - ưn (
U₂N¹
) - ừn (
U₂N²
) - ửn (
U₂N³
) - ữn (
U₂N⁴
) - ứn (
U₂N⁵
) - ựn (
U₂N⁶
) - v (
V¹
) - vc (
VC¹
) - vn (
VN¹
) - x (
X¹
) - xc (
XC¹
) - xn (
XN¹
) - y (
Y¹
) - ỳ (
Y²
) - ỷ (
Y³
) - ỹ (
Y⁴
) - ý (
Y⁵
) - ỵ (
Y⁶
) - yc (
YC¹
) - ỳc (
YC²
) - ỷc (
YC³
) - ỹc (
YC⁴
) - ýc (
YC⁵
) - ỵc (
YC⁶
) - yn (
YN¹
) - ỳn (
YN²
) - ỷn (
YN³
) - ỹn (
YN⁴
) - ýn (
YN⁵
) - ỵn (
YN⁶
)
local export = {}
local m_str_utils = require("Module:string utilities")
local gsub = m_str_utils.gsub
local lower = m_str_utils.lower
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local u = m_str_utils.char
local upper = m_str_utils.upper
local a, b = u(0xF000), u(0xF001)
local diacritics = {
--[===[--Order given by Stephen G. Brown
[u(0x0301)] = c, -- acute
[u(0x0300)] = d, -- grave
[u(0x0309)] = e, -- hook
[u(0x0303)] = f, -- tilde
[u(0x0323)] = g -- dot above]===]
-- Order given by Fumiko Take
[u(0x0300)] = "!", -- grave
[u(0x0309)] = "#", -- hook
[u(0x0303)] = "$", -- tilde
[u(0x0301)] = "%", -- acute
[u(0x0323)] = "&" -- dot above
}
local oneChar = { -- separate letters
["ă"] = "a" .. a, ["â"] = "a" .. b, ["đ"] = "d" .. a, ["ê"] = "e" .. a, ["ô"] = "o" .. a, ["ơ"] = "o" .. b, ["ư"] = "u" .. a
}
function export.makeSortKey(text, lang, sc)
text = gsub(lower(toNFD(text)), ".", diacritics)
text = gsub(text, "([!-&])([^%s]+)", "%2%1")
return upper(gsub(toNFC(text), ".", oneChar))
end
local vi = require("Module:languages").getByCode("vi")
local function tag(text)
return require("Module:script utilities").tag_text(text, vi)
end
local showsubst = {
[a] = "₂", [b] = "₃", ["!"] = "²", ["#"] = "³", ["$"] = "⁴", ["%"] = "⁵", ["&"] = "⁶"
}
function export.showSortkey(frame)
local output = {}
for _, word in ipairs(frame.args) do
local sc = vi:findBestScript(word):getCode()
local sortkey = gsub(export.makeSortKey(word, "vi", sc), ".", showsubst)
sortkey = gsub(sortkey, "([^0-9²³⁴⁵⁶])$", "%1¹")
sortkey = gsub(sortkey, "([^0-9²³⁴⁵⁶])(%s)", "%1¹%2")
local example = "\n* <code>" .. sortkey .. "</code>\n: " .. tag(word)
table.insert(output, example)
end
return table.concat(output)
end
function export.showSorting(frame)
local terms = {}
for _, term in ipairs(frame.args) do
table.insert(terms, term)
end
local makeSortKey = require("Module:fun").memoize(export.makeSortKey)
local function comp(term1, term2)
return makeSortKey(term1) < makeSortKey(term2)
end
table.sort(terms, comp)
for i, term in pairs(terms) do
local sc = vi:findBestScript(term):getCode()
local sortkey = gsub(export.makeSortKey(term, "vi", sc), ".", showsubst)
sortkey = gsub(sortkey, "([^0-9²³⁴⁵⁶])$", "%1¹")
sortkey = gsub(sortkey, "([^0-9²³⁴⁵⁶])(%s)", "%1¹%2")
terms[i] = "\n* " .. tag(term) .. " (<code>" .. sortkey .. "</code>)"
end
return table.concat(terms)
end
return export