Module:ja/data/range

From Wiktionary, the free dictionary
Jump to navigation Jump to search


local u = require("Module:string utilities").char

local range = {}

range.kanji =
	u(0x2E80) .. "-" .. u(0x2FDF) .. -- CJK Unified Ideographs
	u(0x4E00) .. "-" .. u(0x9FFF) .. -- CJK Unified Ideographs
	u(0x3400) .. "-" .. u(0x4DBF) .. -- CJK Unified Ideographs Extension A
	u(0xF900) .. "-" .. u(0xFAFF) .. -- CJK Compatibility Ideographs
	u(0x20000) .. "-" .. u(0x2A6DF) .. -- CJK Unified Ideographs Extension B
	u(0x2A700) .. "-" .. u(0x2EE5F) .. -- CJK Unified Ideographs Extension C-F & I
	u(0x2F800) .. "-" .. u(0x2FA1F) .. -- CJK Compatibility Ideographs Supplement
	u(0x30000) .. "-" .. u(0x323AF) -- CJK Unified Ideographs Extension C-F & I

range.kana_combining_characters =
	u(0x3099) .. "-" .. u(0x309C) .. -- Hiragana
	u(0xFF9E) .. u(0xFF9F) .. -- Halfwidth and Fullwidth Forms
	u(0x0305) .. u(0x0323) -- Combining Diacritical Marks

range.kana_overlap =
	range.kana_combining_characters ..
	"〰-〵" .. -- CJK Symbols and Punctuation
	"ー" -- Katakana

local hiragana_exclusive =
	"ぁ-ゖゝゞ" .. -- Hiragana
	"𛀁𛀆𛄟" .. -- Kana Supplement + Kana Extended-A
	"𛄲𛅐-𛅒" -- Small Kana Extension
	
range.hiragana = range.kana_overlap .. hiragana_exclusive

local katakana_exclusive =
	"ァ-ヺヽヾ" .. -- Katakana
	"ㇰ-ㇿ" .. -- Katakana Phonetic Extensions
	u(0xFF66) .. "-" .. u(0xFF9D) .. -- Halfwidth and Fullwidth Forms
	"𚿰-𚿾" .. -- Kana Extended-B
	"𛀀𛄠-𛄢" .. -- Kana Supplement + Kana Extended-A
	"𛅕𛅤-𛅧" -- Small Kana Extension
	
range.katakana = range.kana_overlap .. katakana_exclusive

range.hentaigana =
	"𛀂-𛀅𛀇-𛄞" -- Kana Supplement + Kana Extended-A

range.kana = range.kana_overlap .. hiragana_exclusive .. katakana_exclusive .. range.hentaigana

-- Note: not other sutegana like っ, as they aren't submoraic.
range.submoraic_kana =
	"ぁぃぅぇぉゃゅょゎ" .. -- Hiragana
	"ァィゥェォャュョヮ" .. -- Katakana
	"ァ-ョ" .. -- Halfwidth and Fullwidth Forms
	"𛅐𛅑𛅒𛅤𛅥𛅦" -- Small Kana Extension
	
range.vowels = {
	a = "ぁあかがさざただなはばぱまゃやらゎわァアカガサザタダナハバパマャヤラヮワヷ",
	i = "ぃいきぎしじちぢにひびぴみ𛀆り𛅐ゐィイキギシジチヂニヒビピミ𛄠リ𛅤ヰヸ",
	u = "ぅうゔくぐすずつづぬふぶぷむゅゆる𛄟ゥウヴクグスズツヅヌフブプムュユル𛄢",
	e = "ぇえけげせぜてでねへべぺめ𛀁れ𛅑ゑェエ𛀀ケゲセゼテデネヘベペメ𛄡レ𛅥ヱヹ",
	o = "ぉおこごそぞとどのほぼぽもょよろ𛅒をォオコゴソゾトドノホボポモョヨロ𛅦ヲヺ",
	n = "んン"
}

range.ideograph =
	"〃々-〇〱-〵〻〼" .. -- CJK Symbols and Punctuation
	"㈠-㉟㊀-㋿" .. -- Enclosed CJK Letters and Months
	"㍘-㏿" .. -- CJK Compatibility
	"🈂-🋿" -- Enclosed Ideographic Supplement
	
range.kana_graph =
	"ゟヿ" .. -- Hiragana + Katakana
	"㌀-㍗" .. -- CJK Compatibility
	"🈀🈁" -- Enclosed Ideographic Supplement

range.punctuation =
	" -。〈-】〔-〟〽" .. -- CJK Symbols and Punctuation
	"゠・" .. -- Katakana
	"!-/:-@[-`{-・¢-○" -- Halfwidth and Fullwidth Forms

range.latin = require("Module:scripts").getByCode("Latn"):getCharacters()

range.numbers =
	"0-9" .. -- Basic Latin
	"0-9" -- Halfwidth and Fullwidth Forms

return range