auto merge of #15619 : kwantam/rust/master, r=huonw

- `width()` computes the displayed width of a string, ignoring the width of control characters.
    - arguably we might do *something* else for control characters, but the question is, what?
    - users who want to do something else can iterate over chars()

- `graphemes()` returns a `Graphemes` struct, which implements an iterator over the grapheme clusters of a &str.
    - fully compliant with [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
    - passes all [Unicode-supplied tests](http://www.unicode.org/reports/tr41/tr41-15.html#Tests29)

- added code to generate additionial categories in `unicode.py`
    - `Cn` aka `Not_Assigned`
    - categories necessary for grapheme cluster breaking

- tidied up the exports from libunicode
  - all exports are exposed through a module rather than directly at crate root.
  - std::prelude imports UnicodeChar and UnicodeStrSlice from std::char and std::str rather than directly from libunicode

closes #7043
This commit is contained in:
bors 2014-07-15 22:51:17 +00:00
commit 2692ae1ddd
9 changed files with 1615 additions and 39 deletions

View File

@ -51,6 +51,30 @@ expanded_categories = {
'Cc': ['C'], 'Cf': ['C'], 'Cs': ['C'], 'Co': ['C'], 'Cn': ['C'],
}
# Grapheme cluster data
# taken from UAX29, http://www.unicode.org/reports/tr29/
# these code points are excluded from the Control category
# NOTE: CR and LF are also technically excluded, but for
# the sake of convenience we leave them in the Control group
# and manually check them in the appropriate place. This is
# still compliant with the implementation requirements.
grapheme_control_exceptions = set([0x200c, 0x200d])
# the Regional_Indicator category
grapheme_regional_indicator = [(0x1f1e6, 0x1f1ff)]
# "The following ... are specifically excluded" from the SpacingMark category
# http://www.unicode.org/reports/tr29/#SpacingMark
grapheme_spacingmark_exceptions = [(0x102b, 0x102c), (0x1038, 0x1038),
(0x1062, 0x1064), (0x1067, 0x106d), (0x1083, 0x1083), (0x1087, 0x108c),
(0x108f, 0x108f), (0x109a, 0x109c), (0x19b0, 0x19b4), (0x19b8, 0x19b9),
(0x19bb, 0x19c0), (0x19c8, 0x19c9), (0x1a61, 0x1a61), (0x1a63, 0x1a64),
(0xaa7b, 0xaa7b), (0xaa7d, 0xaa7d)]
# these are included in the SpacingMark category
grapheme_spacingmark_extra = set([0xe33, 0xeb3])
def fetch(f):
if not os.path.exists(f):
os.system("curl -O http://www.unicode.org/Public/UNIDATA/%s"
@ -109,7 +133,7 @@ def load_unicode_data(f):
canon_decomp[code] = seq
# place letter in categories as appropriate
for cat in [gencat] + expanded_categories.get(gencat, []):
for cat in [gencat, "Assigned"] + expanded_categories.get(gencat, []):
if cat not in gencats:
gencats[cat] = []
gencats[cat].append(code)
@ -120,6 +144,12 @@ def load_unicode_data(f):
combines[combine] = []
combines[combine].append(code)
# generate Not_Assigned from Assigned
gencats["Cn"] = gen_unassigned(gencats["Assigned"])
# Assigned is not a real category
del(gencats["Assigned"])
# Other contains Not_Assigned
gencats["C"].extend(gencats["Cn"])
gencats = group_cats(gencats)
combines = to_combines(group_cats(combines))
@ -155,6 +185,11 @@ def ungroup_cat(cat):
lo += 1
return cat_out
def gen_unassigned(assigned):
assigned = set(assigned)
return ([i for i in range(0, 0xd800) if i not in assigned] +
[i for i in range(0xe000, 0x110000) if i not in assigned])
def to_combines(combs):
combs_out = []
for comb in combs:
@ -350,6 +385,45 @@ def emit_conversions_module(f, lowerupper, upperlower):
sorted(lowerupper.iteritems(), key=operator.itemgetter(0)), is_pub=False)
f.write("}\n\n")
def emit_grapheme_module(f, grapheme_table, grapheme_cats):
f.write("""pub mod grapheme {
use core::option::{Some, None};
use core::slice::ImmutableVector;
#[allow(non_camel_case_types)]
#[deriving(Clone)]
pub enum GraphemeCat {
""")
for cat in grapheme_cats + ["Any"]:
f.write(" GC_" + cat + ",\n")
f.write(""" }
fn bsearch_range_value_table(c: char, r: &'static [(char, char, GraphemeCat)]) -> GraphemeCat {
use core::cmp::{Equal, Less, Greater};
match r.bsearch(|&(lo, hi, _)| {
if lo <= c && c <= hi { Equal }
else if hi < c { Less }
else { Greater }
}) {
Some(idx) => {
let (_, _, cat) = r[idx];
cat
}
None => GC_Any
}
}
pub fn grapheme_category(c: char) -> GraphemeCat {
bsearch_range_value_table(c, grapheme_cat_table)
}
""")
emit_table(f, "grapheme_cat_table", grapheme_table, "&'static [(char, char, GraphemeCat)]",
pfun=lambda x: "(%s,%s,GC_%s)" % (escape_char(x[0]), escape_char(x[1]), x[2]),
is_pub=False)
f.write("}\n")
def emit_charwidth_module(f, width_table):
f.write("pub mod charwidth {\n")
f.write(" use core::option::{Option, Some, None};\n")
@ -388,7 +462,7 @@ def emit_charwidth_module(f, width_table):
f.write(" // http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c\n")
emit_table(f, "charwidth_table", width_table, "&'static [(char, char, u8, u8)]", is_pub=False,
pfun=lambda x: "(%s,%s,%s,%s)" % (escape_char(x[0]), escape_char(x[1]), x[2], x[3]))
f.write("}\n")
f.write("}\n\n")
def emit_norm_module(f, canon, compat, combine):
canon_keys = canon.keys()
@ -473,6 +547,8 @@ def remove_from_wtable(wtable, val):
wtable_out.extend(wtable)
return wtable_out
def optimize_width_table(wtable):
wtable_out = []
w_this = wtable.pop(0)
@ -487,7 +563,7 @@ def optimize_width_table(wtable):
return wtable_out
if __name__ == "__main__":
r = "unicode.rs"
r = "tables.rs"
if os.path.exists(r):
os.remove(r)
with open(r, "w") as rf:
@ -498,12 +574,18 @@ if __name__ == "__main__":
(canon_decomp, compat_decomp, gencats, combines,
lowerupper, upperlower) = load_unicode_data("UnicodeData.txt")
want_derived = ["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase"]
other_derived = ["Default_Ignorable_Code_Point"]
other_derived = ["Default_Ignorable_Code_Point", "Grapheme_Extend"]
derived = load_properties("DerivedCoreProperties.txt", want_derived + other_derived)
scripts = load_properties("Scripts.txt", [])
props = load_properties("PropList.txt",
["White_Space", "Join_Control", "Noncharacter_Code_Point"])
# grapheme cluster category from DerivedCoreProperties
# the rest are defined below
grapheme_cats = {}
grapheme_cats["Extend"] = derived["Grapheme_Extend"]
del(derived["Grapheme_Extend"])
# bsearch_range_table is used in all the property modules below
emit_bsearch_range_table(rf)
@ -533,7 +615,7 @@ if __name__ == "__main__":
emit_norm_module(rf, canon_decomp, compat_decomp, combines)
emit_conversions_module(rf, lowerupper, upperlower)
# character width module
### character width module
width_table = []
for zwcat in ["Me", "Mn", "Cf"]:
width_table.extend(map(lambda (lo, hi): (lo, hi, 0, 0), gencats[zwcat]))
@ -555,3 +637,40 @@ if __name__ == "__main__":
# optimize the width table by collapsing adjacent entities when possible
width_table = optimize_width_table(width_table)
emit_charwidth_module(rf, width_table)
### grapheme cluster module
# from http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Break_Property_Values
# Hangul syllable categories
want_hangul = ["L", "V", "T", "LV", "LVT"]
grapheme_cats.update(load_properties("HangulSyllableType.txt", want_hangul))
# Control
# This category also includes Cs (surrogate codepoints), but Rust's `char`s are
# Unicode Scalar Values only, and surrogates are thus invalid `char`s.
grapheme_cats["Control"] = set()
for cat in ["Zl", "Zp", "Cc", "Cf"]:
grapheme_cats["Control"] |= set(ungroup_cat(gencats[cat]))
grapheme_cats["Control"] = group_cat(list(
grapheme_cats["Control"]
- grapheme_control_exceptions
| (set(ungroup_cat(gencats["Cn"]))
& set(ungroup_cat(derived["Default_Ignorable_Code_Point"])))))
# Regional Indicator
grapheme_cats["RegionalIndicator"] = grapheme_regional_indicator
# Prepend - "Currently there are no characters with this value"
# (from UAX#29, Unicode 7.0)
# SpacingMark
grapheme_cats["SpacingMark"] = group_cat(list(
set(ungroup_cat(gencats["Mc"]))
- set(ungroup_cat(grapheme_cats["Extend"]))
| grapheme_spacingmark_extra
- set(ungroup_cat(grapheme_spacingmark_exceptions))))
grapheme_table = []
for cat in grapheme_cats:
grapheme_table.extend([(x, y, cat) for (x, y) in grapheme_cats[cat]])
grapheme_table.sort(key=lambda w: w[0])
emit_grapheme_module(rf, grapheme_table, grapheme_cats.keys())

View File

@ -88,7 +88,7 @@ pub use core::str::{eq_slice, is_utf8, is_utf16, Utf16Items};
pub use core::str::{Utf16Item, ScalarValue, LoneSurrogate, utf16_items};
pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange};
pub use core::str::{Str, StrSlice};
pub use unicode::{Words, UnicodeStrSlice};
pub use unicode::str::{UnicodeStrSlice, Words, Graphemes, GraphemeIndices};
/*
Section: Creating a string
@ -245,8 +245,6 @@ pub struct Decompositions<'a> {
impl<'a> Iterator<char> for Decompositions<'a> {
#[inline]
fn next(&mut self) -> Option<char> {
use unicode::canonical_combining_class;
match self.buffer.as_slice().head() {
Some(&(c, 0)) => {
self.sorted = false;
@ -270,7 +268,7 @@ impl<'a> Iterator<char> for Decompositions<'a> {
let buffer = &mut self.buffer;
let sorted = &mut self.sorted;
decomposer(ch, |d| {
let class = canonical_combining_class(d);
let class = unicode::char::canonical_combining_class(d);
if class == 0 && !*sorted {
canonical_sort(buffer.as_mut_slice());
*sorted = true;
@ -824,7 +822,7 @@ mod tests {
use string::String;
use vec::Vec;
use unicode::UnicodeChar;
use unicode::char::UnicodeChar;
#[test]
fn test_eq_slice() {
@ -859,6 +857,15 @@ mod tests {
assert_eq!("\u2620".char_len(), 1u);
assert_eq!("\U0001d11e".char_len(), 1u);
assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
assert_eq!("".width(false), 10u);
assert_eq!("".width(true), 10u);
assert_eq!("\0\0\0\0\0".width(false), 0u);
assert_eq!("\0\0\0\0\0".width(true), 0u);
assert_eq!("".width(false), 0u);
assert_eq!("".width(true), 0u);
assert_eq!("\u2081\u2082\u2083\u2084".width(false), 4u);
assert_eq!("\u2081\u2082\u2083\u2084".width(true), 8u);
}
#[test]
@ -1815,7 +1822,7 @@ mod tests {
assert_eq!("\u0301a".nfkd_chars().collect::<String>(),
String::from_str("\u0301a"));
assert_eq!("\ud4db".nfkd_chars().collect::<String>(),
String::from_str("\u1111\u1171\u11b6"));
String::from_str("\u1111\u1171\u11b6"));
assert_eq!("\uac1c".nfkd_chars().collect::<String>(), String::from_str("\u1100\u1162"));
}
@ -1830,6 +1837,286 @@ String::from_str("\u1111\u1171\u11b6"));
assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
}
#[test]
fn test_graphemes() {
use std::iter::order;
// official Unicode test data
// from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
let test_same = [
("\u0020\u0020", &["\u0020", "\u0020"]), ("\u0020\u0308\u0020", &["\u0020\u0308",
"\u0020"]), ("\u0020\u000D", &["\u0020", "\u000D"]), ("\u0020\u0308\u000D",
&["\u0020\u0308", "\u000D"]), ("\u0020\u000A", &["\u0020", "\u000A"]),
("\u0020\u0308\u000A", &["\u0020\u0308", "\u000A"]), ("\u0020\u0001", &["\u0020",
"\u0001"]), ("\u0020\u0308\u0001", &["\u0020\u0308", "\u0001"]), ("\u0020\u0300",
&["\u0020\u0300"]), ("\u0020\u0308\u0300", &["\u0020\u0308\u0300"]), ("\u0020\u1100",
&["\u0020", "\u1100"]), ("\u0020\u0308\u1100", &["\u0020\u0308", "\u1100"]),
("\u0020\u1160", &["\u0020", "\u1160"]), ("\u0020\u0308\u1160", &["\u0020\u0308",
"\u1160"]), ("\u0020\u11A8", &["\u0020", "\u11A8"]), ("\u0020\u0308\u11A8",
&["\u0020\u0308", "\u11A8"]), ("\u0020\uAC00", &["\u0020", "\uAC00"]),
("\u0020\u0308\uAC00", &["\u0020\u0308", "\uAC00"]), ("\u0020\uAC01", &["\u0020",
"\uAC01"]), ("\u0020\u0308\uAC01", &["\u0020\u0308", "\uAC01"]), ("\u0020\U0001F1E6",
&["\u0020", "\U0001F1E6"]), ("\u0020\u0308\U0001F1E6", &["\u0020\u0308",
"\U0001F1E6"]), ("\u0020\u0378", &["\u0020", "\u0378"]), ("\u0020\u0308\u0378",
&["\u0020\u0308", "\u0378"]), ("\u000D\u0020", &["\u000D", "\u0020"]),
("\u000D\u0308\u0020", &["\u000D", "\u0308", "\u0020"]), ("\u000D\u000D", &["\u000D",
"\u000D"]), ("\u000D\u0308\u000D", &["\u000D", "\u0308", "\u000D"]), ("\u000D\u000A",
&["\u000D\u000A"]), ("\u000D\u0308\u000A", &["\u000D", "\u0308", "\u000A"]),
("\u000D\u0001", &["\u000D", "\u0001"]), ("\u000D\u0308\u0001", &["\u000D", "\u0308",
"\u0001"]), ("\u000D\u0300", &["\u000D", "\u0300"]), ("\u000D\u0308\u0300",
&["\u000D", "\u0308\u0300"]), ("\u000D\u0903", &["\u000D", "\u0903"]),
("\u000D\u1100", &["\u000D", "\u1100"]), ("\u000D\u0308\u1100", &["\u000D", "\u0308",
"\u1100"]), ("\u000D\u1160", &["\u000D", "\u1160"]), ("\u000D\u0308\u1160",
&["\u000D", "\u0308", "\u1160"]), ("\u000D\u11A8", &["\u000D", "\u11A8"]),
("\u000D\u0308\u11A8", &["\u000D", "\u0308", "\u11A8"]), ("\u000D\uAC00", &["\u000D",
"\uAC00"]), ("\u000D\u0308\uAC00", &["\u000D", "\u0308", "\uAC00"]), ("\u000D\uAC01",
&["\u000D", "\uAC01"]), ("\u000D\u0308\uAC01", &["\u000D", "\u0308", "\uAC01"]),
("\u000D\U0001F1E6", &["\u000D", "\U0001F1E6"]), ("\u000D\u0308\U0001F1E6",
&["\u000D", "\u0308", "\U0001F1E6"]), ("\u000D\u0378", &["\u000D", "\u0378"]),
("\u000D\u0308\u0378", &["\u000D", "\u0308", "\u0378"]), ("\u000A\u0020", &["\u000A",
"\u0020"]), ("\u000A\u0308\u0020", &["\u000A", "\u0308", "\u0020"]), ("\u000A\u000D",
&["\u000A", "\u000D"]), ("\u000A\u0308\u000D", &["\u000A", "\u0308", "\u000D"]),
("\u000A\u000A", &["\u000A", "\u000A"]), ("\u000A\u0308\u000A", &["\u000A", "\u0308",
"\u000A"]), ("\u000A\u0001", &["\u000A", "\u0001"]), ("\u000A\u0308\u0001",
&["\u000A", "\u0308", "\u0001"]), ("\u000A\u0300", &["\u000A", "\u0300"]),
("\u000A\u0308\u0300", &["\u000A", "\u0308\u0300"]), ("\u000A\u0903", &["\u000A",
"\u0903"]), ("\u000A\u1100", &["\u000A", "\u1100"]), ("\u000A\u0308\u1100",
&["\u000A", "\u0308", "\u1100"]), ("\u000A\u1160", &["\u000A", "\u1160"]),
("\u000A\u0308\u1160", &["\u000A", "\u0308", "\u1160"]), ("\u000A\u11A8", &["\u000A",
"\u11A8"]), ("\u000A\u0308\u11A8", &["\u000A", "\u0308", "\u11A8"]), ("\u000A\uAC00",
&["\u000A", "\uAC00"]), ("\u000A\u0308\uAC00", &["\u000A", "\u0308", "\uAC00"]),
("\u000A\uAC01", &["\u000A", "\uAC01"]), ("\u000A\u0308\uAC01", &["\u000A", "\u0308",
"\uAC01"]), ("\u000A\U0001F1E6", &["\u000A", "\U0001F1E6"]),
("\u000A\u0308\U0001F1E6", &["\u000A", "\u0308", "\U0001F1E6"]), ("\u000A\u0378",
&["\u000A", "\u0378"]), ("\u000A\u0308\u0378", &["\u000A", "\u0308", "\u0378"]),
("\u0001\u0020", &["\u0001", "\u0020"]), ("\u0001\u0308\u0020", &["\u0001", "\u0308",
"\u0020"]), ("\u0001\u000D", &["\u0001", "\u000D"]), ("\u0001\u0308\u000D",
&["\u0001", "\u0308", "\u000D"]), ("\u0001\u000A", &["\u0001", "\u000A"]),
("\u0001\u0308\u000A", &["\u0001", "\u0308", "\u000A"]), ("\u0001\u0001", &["\u0001",
"\u0001"]), ("\u0001\u0308\u0001", &["\u0001", "\u0308", "\u0001"]), ("\u0001\u0300",
&["\u0001", "\u0300"]), ("\u0001\u0308\u0300", &["\u0001", "\u0308\u0300"]),
("\u0001\u0903", &["\u0001", "\u0903"]), ("\u0001\u1100", &["\u0001", "\u1100"]),
("\u0001\u0308\u1100", &["\u0001", "\u0308", "\u1100"]), ("\u0001\u1160", &["\u0001",
"\u1160"]), ("\u0001\u0308\u1160", &["\u0001", "\u0308", "\u1160"]), ("\u0001\u11A8",
&["\u0001", "\u11A8"]), ("\u0001\u0308\u11A8", &["\u0001", "\u0308", "\u11A8"]),
("\u0001\uAC00", &["\u0001", "\uAC00"]), ("\u0001\u0308\uAC00", &["\u0001", "\u0308",
"\uAC00"]), ("\u0001\uAC01", &["\u0001", "\uAC01"]), ("\u0001\u0308\uAC01",
&["\u0001", "\u0308", "\uAC01"]), ("\u0001\U0001F1E6", &["\u0001", "\U0001F1E6"]),
("\u0001\u0308\U0001F1E6", &["\u0001", "\u0308", "\U0001F1E6"]), ("\u0001\u0378",
&["\u0001", "\u0378"]), ("\u0001\u0308\u0378", &["\u0001", "\u0308", "\u0378"]),
("\u0300\u0020", &["\u0300", "\u0020"]), ("\u0300\u0308\u0020", &["\u0300\u0308",
"\u0020"]), ("\u0300\u000D", &["\u0300", "\u000D"]), ("\u0300\u0308\u000D",
&["\u0300\u0308", "\u000D"]), ("\u0300\u000A", &["\u0300", "\u000A"]),
("\u0300\u0308\u000A", &["\u0300\u0308", "\u000A"]), ("\u0300\u0001", &["\u0300",
"\u0001"]), ("\u0300\u0308\u0001", &["\u0300\u0308", "\u0001"]), ("\u0300\u0300",
&["\u0300\u0300"]), ("\u0300\u0308\u0300", &["\u0300\u0308\u0300"]), ("\u0300\u1100",
&["\u0300", "\u1100"]), ("\u0300\u0308\u1100", &["\u0300\u0308", "\u1100"]),
("\u0300\u1160", &["\u0300", "\u1160"]), ("\u0300\u0308\u1160", &["\u0300\u0308",
"\u1160"]), ("\u0300\u11A8", &["\u0300", "\u11A8"]), ("\u0300\u0308\u11A8",
&["\u0300\u0308", "\u11A8"]), ("\u0300\uAC00", &["\u0300", "\uAC00"]),
("\u0300\u0308\uAC00", &["\u0300\u0308", "\uAC00"]), ("\u0300\uAC01", &["\u0300",
"\uAC01"]), ("\u0300\u0308\uAC01", &["\u0300\u0308", "\uAC01"]), ("\u0300\U0001F1E6",
&["\u0300", "\U0001F1E6"]), ("\u0300\u0308\U0001F1E6", &["\u0300\u0308",
"\U0001F1E6"]), ("\u0300\u0378", &["\u0300", "\u0378"]), ("\u0300\u0308\u0378",
&["\u0300\u0308", "\u0378"]), ("\u0903\u0020", &["\u0903", "\u0020"]),
("\u0903\u0308\u0020", &["\u0903\u0308", "\u0020"]), ("\u0903\u000D", &["\u0903",
"\u000D"]), ("\u0903\u0308\u000D", &["\u0903\u0308", "\u000D"]), ("\u0903\u000A",
&["\u0903", "\u000A"]), ("\u0903\u0308\u000A", &["\u0903\u0308", "\u000A"]),
("\u0903\u0001", &["\u0903", "\u0001"]), ("\u0903\u0308\u0001", &["\u0903\u0308",
"\u0001"]), ("\u0903\u0300", &["\u0903\u0300"]), ("\u0903\u0308\u0300",
&["\u0903\u0308\u0300"]), ("\u0903\u1100", &["\u0903", "\u1100"]),
("\u0903\u0308\u1100", &["\u0903\u0308", "\u1100"]), ("\u0903\u1160", &["\u0903",
"\u1160"]), ("\u0903\u0308\u1160", &["\u0903\u0308", "\u1160"]), ("\u0903\u11A8",
&["\u0903", "\u11A8"]), ("\u0903\u0308\u11A8", &["\u0903\u0308", "\u11A8"]),
("\u0903\uAC00", &["\u0903", "\uAC00"]), ("\u0903\u0308\uAC00", &["\u0903\u0308",
"\uAC00"]), ("\u0903\uAC01", &["\u0903", "\uAC01"]), ("\u0903\u0308\uAC01",
&["\u0903\u0308", "\uAC01"]), ("\u0903\U0001F1E6", &["\u0903", "\U0001F1E6"]),
("\u0903\u0308\U0001F1E6", &["\u0903\u0308", "\U0001F1E6"]), ("\u0903\u0378",
&["\u0903", "\u0378"]), ("\u0903\u0308\u0378", &["\u0903\u0308", "\u0378"]),
("\u1100\u0020", &["\u1100", "\u0020"]), ("\u1100\u0308\u0020", &["\u1100\u0308",
"\u0020"]), ("\u1100\u000D", &["\u1100", "\u000D"]), ("\u1100\u0308\u000D",
&["\u1100\u0308", "\u000D"]), ("\u1100\u000A", &["\u1100", "\u000A"]),
("\u1100\u0308\u000A", &["\u1100\u0308", "\u000A"]), ("\u1100\u0001", &["\u1100",
"\u0001"]), ("\u1100\u0308\u0001", &["\u1100\u0308", "\u0001"]), ("\u1100\u0300",
&["\u1100\u0300"]), ("\u1100\u0308\u0300", &["\u1100\u0308\u0300"]), ("\u1100\u1100",
&["\u1100\u1100"]), ("\u1100\u0308\u1100", &["\u1100\u0308", "\u1100"]),
("\u1100\u1160", &["\u1100\u1160"]), ("\u1100\u0308\u1160", &["\u1100\u0308",
"\u1160"]), ("\u1100\u11A8", &["\u1100", "\u11A8"]), ("\u1100\u0308\u11A8",
&["\u1100\u0308", "\u11A8"]), ("\u1100\uAC00", &["\u1100\uAC00"]),
("\u1100\u0308\uAC00", &["\u1100\u0308", "\uAC00"]), ("\u1100\uAC01",
&["\u1100\uAC01"]), ("\u1100\u0308\uAC01", &["\u1100\u0308", "\uAC01"]),
("\u1100\U0001F1E6", &["\u1100", "\U0001F1E6"]), ("\u1100\u0308\U0001F1E6",
&["\u1100\u0308", "\U0001F1E6"]), ("\u1100\u0378", &["\u1100", "\u0378"]),
("\u1100\u0308\u0378", &["\u1100\u0308", "\u0378"]), ("\u1160\u0020", &["\u1160",
"\u0020"]), ("\u1160\u0308\u0020", &["\u1160\u0308", "\u0020"]), ("\u1160\u000D",
&["\u1160", "\u000D"]), ("\u1160\u0308\u000D", &["\u1160\u0308", "\u000D"]),
("\u1160\u000A", &["\u1160", "\u000A"]), ("\u1160\u0308\u000A", &["\u1160\u0308",
"\u000A"]), ("\u1160\u0001", &["\u1160", "\u0001"]), ("\u1160\u0308\u0001",
&["\u1160\u0308", "\u0001"]), ("\u1160\u0300", &["\u1160\u0300"]),
("\u1160\u0308\u0300", &["\u1160\u0308\u0300"]), ("\u1160\u1100", &["\u1160",
"\u1100"]), ("\u1160\u0308\u1100", &["\u1160\u0308", "\u1100"]), ("\u1160\u1160",
&["\u1160\u1160"]), ("\u1160\u0308\u1160", &["\u1160\u0308", "\u1160"]),
("\u1160\u11A8", &["\u1160\u11A8"]), ("\u1160\u0308\u11A8", &["\u1160\u0308",
"\u11A8"]), ("\u1160\uAC00", &["\u1160", "\uAC00"]), ("\u1160\u0308\uAC00",
&["\u1160\u0308", "\uAC00"]), ("\u1160\uAC01", &["\u1160", "\uAC01"]),
("\u1160\u0308\uAC01", &["\u1160\u0308", "\uAC01"]), ("\u1160\U0001F1E6", &["\u1160",
"\U0001F1E6"]), ("\u1160\u0308\U0001F1E6", &["\u1160\u0308", "\U0001F1E6"]),
("\u1160\u0378", &["\u1160", "\u0378"]), ("\u1160\u0308\u0378", &["\u1160\u0308",
"\u0378"]), ("\u11A8\u0020", &["\u11A8", "\u0020"]), ("\u11A8\u0308\u0020",
&["\u11A8\u0308", "\u0020"]), ("\u11A8\u000D", &["\u11A8", "\u000D"]),
("\u11A8\u0308\u000D", &["\u11A8\u0308", "\u000D"]), ("\u11A8\u000A", &["\u11A8",
"\u000A"]), ("\u11A8\u0308\u000A", &["\u11A8\u0308", "\u000A"]), ("\u11A8\u0001",
&["\u11A8", "\u0001"]), ("\u11A8\u0308\u0001", &["\u11A8\u0308", "\u0001"]),
("\u11A8\u0300", &["\u11A8\u0300"]), ("\u11A8\u0308\u0300", &["\u11A8\u0308\u0300"]),
("\u11A8\u1100", &["\u11A8", "\u1100"]), ("\u11A8\u0308\u1100", &["\u11A8\u0308",
"\u1100"]), ("\u11A8\u1160", &["\u11A8", "\u1160"]), ("\u11A8\u0308\u1160",
&["\u11A8\u0308", "\u1160"]), ("\u11A8\u11A8", &["\u11A8\u11A8"]),
("\u11A8\u0308\u11A8", &["\u11A8\u0308", "\u11A8"]), ("\u11A8\uAC00", &["\u11A8",
"\uAC00"]), ("\u11A8\u0308\uAC00", &["\u11A8\u0308", "\uAC00"]), ("\u11A8\uAC01",
&["\u11A8", "\uAC01"]), ("\u11A8\u0308\uAC01", &["\u11A8\u0308", "\uAC01"]),
("\u11A8\U0001F1E6", &["\u11A8", "\U0001F1E6"]), ("\u11A8\u0308\U0001F1E6",
&["\u11A8\u0308", "\U0001F1E6"]), ("\u11A8\u0378", &["\u11A8", "\u0378"]),
("\u11A8\u0308\u0378", &["\u11A8\u0308", "\u0378"]), ("\uAC00\u0020", &["\uAC00",
"\u0020"]), ("\uAC00\u0308\u0020", &["\uAC00\u0308", "\u0020"]), ("\uAC00\u000D",
&["\uAC00", "\u000D"]), ("\uAC00\u0308\u000D", &["\uAC00\u0308", "\u000D"]),
("\uAC00\u000A", &["\uAC00", "\u000A"]), ("\uAC00\u0308\u000A", &["\uAC00\u0308",
"\u000A"]), ("\uAC00\u0001", &["\uAC00", "\u0001"]), ("\uAC00\u0308\u0001",
&["\uAC00\u0308", "\u0001"]), ("\uAC00\u0300", &["\uAC00\u0300"]),
("\uAC00\u0308\u0300", &["\uAC00\u0308\u0300"]), ("\uAC00\u1100", &["\uAC00",
"\u1100"]), ("\uAC00\u0308\u1100", &["\uAC00\u0308", "\u1100"]), ("\uAC00\u1160",
&["\uAC00\u1160"]), ("\uAC00\u0308\u1160", &["\uAC00\u0308", "\u1160"]),
("\uAC00\u11A8", &["\uAC00\u11A8"]), ("\uAC00\u0308\u11A8", &["\uAC00\u0308",
"\u11A8"]), ("\uAC00\uAC00", &["\uAC00", "\uAC00"]), ("\uAC00\u0308\uAC00",
&["\uAC00\u0308", "\uAC00"]), ("\uAC00\uAC01", &["\uAC00", "\uAC01"]),
("\uAC00\u0308\uAC01", &["\uAC00\u0308", "\uAC01"]), ("\uAC00\U0001F1E6", &["\uAC00",
"\U0001F1E6"]), ("\uAC00\u0308\U0001F1E6", &["\uAC00\u0308", "\U0001F1E6"]),
("\uAC00\u0378", &["\uAC00", "\u0378"]), ("\uAC00\u0308\u0378", &["\uAC00\u0308",
"\u0378"]), ("\uAC01\u0020", &["\uAC01", "\u0020"]), ("\uAC01\u0308\u0020",
&["\uAC01\u0308", "\u0020"]), ("\uAC01\u000D", &["\uAC01", "\u000D"]),
("\uAC01\u0308\u000D", &["\uAC01\u0308", "\u000D"]), ("\uAC01\u000A", &["\uAC01",
"\u000A"]), ("\uAC01\u0308\u000A", &["\uAC01\u0308", "\u000A"]), ("\uAC01\u0001",
&["\uAC01", "\u0001"]), ("\uAC01\u0308\u0001", &["\uAC01\u0308", "\u0001"]),
("\uAC01\u0300", &["\uAC01\u0300"]), ("\uAC01\u0308\u0300", &["\uAC01\u0308\u0300"]),
("\uAC01\u1100", &["\uAC01", "\u1100"]), ("\uAC01\u0308\u1100", &["\uAC01\u0308",
"\u1100"]), ("\uAC01\u1160", &["\uAC01", "\u1160"]), ("\uAC01\u0308\u1160",
&["\uAC01\u0308", "\u1160"]), ("\uAC01\u11A8", &["\uAC01\u11A8"]),
("\uAC01\u0308\u11A8", &["\uAC01\u0308", "\u11A8"]), ("\uAC01\uAC00", &["\uAC01",
"\uAC00"]), ("\uAC01\u0308\uAC00", &["\uAC01\u0308", "\uAC00"]), ("\uAC01\uAC01",
&["\uAC01", "\uAC01"]), ("\uAC01\u0308\uAC01", &["\uAC01\u0308", "\uAC01"]),
("\uAC01\U0001F1E6", &["\uAC01", "\U0001F1E6"]), ("\uAC01\u0308\U0001F1E6",
&["\uAC01\u0308", "\U0001F1E6"]), ("\uAC01\u0378", &["\uAC01", "\u0378"]),
("\uAC01\u0308\u0378", &["\uAC01\u0308", "\u0378"]), ("\U0001F1E6\u0020",
&["\U0001F1E6", "\u0020"]), ("\U0001F1E6\u0308\u0020", &["\U0001F1E6\u0308",
"\u0020"]), ("\U0001F1E6\u000D", &["\U0001F1E6", "\u000D"]),
("\U0001F1E6\u0308\u000D", &["\U0001F1E6\u0308", "\u000D"]), ("\U0001F1E6\u000A",
&["\U0001F1E6", "\u000A"]), ("\U0001F1E6\u0308\u000A", &["\U0001F1E6\u0308",
"\u000A"]), ("\U0001F1E6\u0001", &["\U0001F1E6", "\u0001"]),
("\U0001F1E6\u0308\u0001", &["\U0001F1E6\u0308", "\u0001"]), ("\U0001F1E6\u0300",
&["\U0001F1E6\u0300"]), ("\U0001F1E6\u0308\u0300", &["\U0001F1E6\u0308\u0300"]),
("\U0001F1E6\u1100", &["\U0001F1E6", "\u1100"]), ("\U0001F1E6\u0308\u1100",
&["\U0001F1E6\u0308", "\u1100"]), ("\U0001F1E6\u1160", &["\U0001F1E6", "\u1160"]),
("\U0001F1E6\u0308\u1160", &["\U0001F1E6\u0308", "\u1160"]), ("\U0001F1E6\u11A8",
&["\U0001F1E6", "\u11A8"]), ("\U0001F1E6\u0308\u11A8", &["\U0001F1E6\u0308",
"\u11A8"]), ("\U0001F1E6\uAC00", &["\U0001F1E6", "\uAC00"]),
("\U0001F1E6\u0308\uAC00", &["\U0001F1E6\u0308", "\uAC00"]), ("\U0001F1E6\uAC01",
&["\U0001F1E6", "\uAC01"]), ("\U0001F1E6\u0308\uAC01", &["\U0001F1E6\u0308",
"\uAC01"]), ("\U0001F1E6\U0001F1E6", &["\U0001F1E6\U0001F1E6"]),
("\U0001F1E6\u0308\U0001F1E6", &["\U0001F1E6\u0308", "\U0001F1E6"]),
("\U0001F1E6\u0378", &["\U0001F1E6", "\u0378"]), ("\U0001F1E6\u0308\u0378",
&["\U0001F1E6\u0308", "\u0378"]), ("\u0378\u0020", &["\u0378", "\u0020"]),
("\u0378\u0308\u0020", &["\u0378\u0308", "\u0020"]), ("\u0378\u000D", &["\u0378",
"\u000D"]), ("\u0378\u0308\u000D", &["\u0378\u0308", "\u000D"]), ("\u0378\u000A",
&["\u0378", "\u000A"]), ("\u0378\u0308\u000A", &["\u0378\u0308", "\u000A"]),
("\u0378\u0001", &["\u0378", "\u0001"]), ("\u0378\u0308\u0001", &["\u0378\u0308",
"\u0001"]), ("\u0378\u0300", &["\u0378\u0300"]), ("\u0378\u0308\u0300",
&["\u0378\u0308\u0300"]), ("\u0378\u1100", &["\u0378", "\u1100"]),
("\u0378\u0308\u1100", &["\u0378\u0308", "\u1100"]), ("\u0378\u1160", &["\u0378",
"\u1160"]), ("\u0378\u0308\u1160", &["\u0378\u0308", "\u1160"]), ("\u0378\u11A8",
&["\u0378", "\u11A8"]), ("\u0378\u0308\u11A8", &["\u0378\u0308", "\u11A8"]),
("\u0378\uAC00", &["\u0378", "\uAC00"]), ("\u0378\u0308\uAC00", &["\u0378\u0308",
"\uAC00"]), ("\u0378\uAC01", &["\u0378", "\uAC01"]), ("\u0378\u0308\uAC01",
&["\u0378\u0308", "\uAC01"]), ("\u0378\U0001F1E6", &["\u0378", "\U0001F1E6"]),
("\u0378\u0308\U0001F1E6", &["\u0378\u0308", "\U0001F1E6"]), ("\u0378\u0378",
&["\u0378", "\u0378"]), ("\u0378\u0308\u0378", &["\u0378\u0308", "\u0378"]),
("\u0061\U0001F1E6\u0062", &["\u0061", "\U0001F1E6", "\u0062"]),
("\U0001F1F7\U0001F1FA", &["\U0001F1F7\U0001F1FA"]),
("\U0001F1F7\U0001F1FA\U0001F1F8", &["\U0001F1F7\U0001F1FA\U0001F1F8"]),
("\U0001F1F7\U0001F1FA\U0001F1F8\U0001F1EA",
&["\U0001F1F7\U0001F1FA\U0001F1F8\U0001F1EA"]),
("\U0001F1F7\U0001F1FA\u200B\U0001F1F8\U0001F1EA", &["\U0001F1F7\U0001F1FA", "\u200B",
"\U0001F1F8\U0001F1EA"]), ("\U0001F1E6\U0001F1E7\U0001F1E8",
&["\U0001F1E6\U0001F1E7\U0001F1E8"]), ("\U0001F1E6\u200D\U0001F1E7\U0001F1E8",
&["\U0001F1E6\u200D", "\U0001F1E7\U0001F1E8"]),
("\U0001F1E6\U0001F1E7\u200D\U0001F1E8", &["\U0001F1E6\U0001F1E7\u200D",
"\U0001F1E8"]), ("\u0020\u200D\u0646", &["\u0020\u200D", "\u0646"]),
("\u0646\u200D\u0020", &["\u0646\u200D", "\u0020"]),
];
let test_diff = [
("\u0020\u0903", &["\u0020\u0903"], &["\u0020", "\u0903"]), ("\u0020\u0308\u0903",
&["\u0020\u0308\u0903"], &["\u0020\u0308", "\u0903"]), ("\u000D\u0308\u0903",
&["\u000D", "\u0308\u0903"], &["\u000D", "\u0308", "\u0903"]), ("\u000A\u0308\u0903",
&["\u000A", "\u0308\u0903"], &["\u000A", "\u0308", "\u0903"]), ("\u0001\u0308\u0903",
&["\u0001", "\u0308\u0903"], &["\u0001", "\u0308", "\u0903"]), ("\u0300\u0903",
&["\u0300\u0903"], &["\u0300", "\u0903"]), ("\u0300\u0308\u0903",
&["\u0300\u0308\u0903"], &["\u0300\u0308", "\u0903"]), ("\u0903\u0903",
&["\u0903\u0903"], &["\u0903", "\u0903"]), ("\u0903\u0308\u0903",
&["\u0903\u0308\u0903"], &["\u0903\u0308", "\u0903"]), ("\u1100\u0903",
&["\u1100\u0903"], &["\u1100", "\u0903"]), ("\u1100\u0308\u0903",
&["\u1100\u0308\u0903"], &["\u1100\u0308", "\u0903"]), ("\u1160\u0903",
&["\u1160\u0903"], &["\u1160", "\u0903"]), ("\u1160\u0308\u0903",
&["\u1160\u0308\u0903"], &["\u1160\u0308", "\u0903"]), ("\u11A8\u0903",
&["\u11A8\u0903"], &["\u11A8", "\u0903"]), ("\u11A8\u0308\u0903",
&["\u11A8\u0308\u0903"], &["\u11A8\u0308", "\u0903"]), ("\uAC00\u0903",
&["\uAC00\u0903"], &["\uAC00", "\u0903"]), ("\uAC00\u0308\u0903",
&["\uAC00\u0308\u0903"], &["\uAC00\u0308", "\u0903"]), ("\uAC01\u0903",
&["\uAC01\u0903"], &["\uAC01", "\u0903"]), ("\uAC01\u0308\u0903",
&["\uAC01\u0308\u0903"], &["\uAC01\u0308", "\u0903"]), ("\U0001F1E6\u0903",
&["\U0001F1E6\u0903"], &["\U0001F1E6", "\u0903"]), ("\U0001F1E6\u0308\u0903",
&["\U0001F1E6\u0308\u0903"], &["\U0001F1E6\u0308", "\u0903"]), ("\u0378\u0903",
&["\u0378\u0903"], &["\u0378", "\u0903"]), ("\u0378\u0308\u0903",
&["\u0378\u0308\u0903"], &["\u0378\u0308", "\u0903"]),
];
for &(s, g) in test_same.iter() {
// test forward iterator
assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
// test reverse iterator
assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
}
for &(s, gt, gf) in test_diff.iter() {
// test forward iterator
assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
// test reverse iterator
assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
}
// test the indices iterators
let s = "a̐éö̲\r\n";
let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
assert_eq!(gr_inds.as_slice(), &[(0u, ""), (3, ""), (6, "ö̲"), (11, "\r\n")]);
let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
assert_eq!(gr_inds.as_slice(), &[(11, "\r\n"), (6, "ö̲"), (3, ""), (0u, "")]);
let mut gr_inds = s.grapheme_indices(true);
let e1 = gr_inds.size_hint();
assert_eq!(e1, (1, Some(13)));
let c = gr_inds.count();
assert_eq!(c, 4);
let e2 = gr_inds.size_hint();
assert_eq!(e2, (0, Some(0)));
// make sure the reverse iterator does the right thing with "\n" at beginning of string
let s = "\n\r\n\r";
let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
assert_eq!(gr.as_slice(), &["\r", "\r\n", "\n"]);
}
#[test]
fn test_split_strator() {
fn t(s: &str, sep: &str, u: &[&str]) {

View File

@ -237,7 +237,7 @@ use str::{Str, StrSlice};
use str;
use string::String;
use uint;
use unicode::UnicodeChar;
use unicode::char::UnicodeChar;
use vec::Vec;
// Reexports

View File

@ -24,7 +24,7 @@ use option::{Option, Some, None};
use slice::{Vector, ImmutableVector};
use str::{CharSplits, Str, StrAllocating, StrVector, StrSlice};
use string::String;
use unicode::UnicodeChar;
use unicode::char::UnicodeChar;
use vec::Vec;
use super::{contains_nul, BytesContainer, GenericPath, GenericPathUnsafe};

View File

@ -59,7 +59,7 @@
#[doc(no_inline)] pub use ascii::{Ascii, AsciiCast, OwnedAsciiCast, AsciiStr};
#[doc(no_inline)] pub use ascii::IntoBytes;
#[doc(no_inline)] pub use c_str::ToCStr;
#[doc(no_inline)] pub use char::Char;
#[doc(no_inline)] pub use char::{Char, UnicodeChar};
#[doc(no_inline)] pub use clone::Clone;
#[doc(no_inline)] pub use cmp::{PartialEq, PartialOrd, Eq, Ord};
#[doc(no_inline)] pub use cmp::{Ordering, Less, Equal, Greater, Equiv};
@ -77,7 +77,7 @@
#[doc(no_inline)] pub use ptr::RawPtr;
#[doc(no_inline)] pub use io::{Buffer, Writer, Reader, Seek};
#[doc(no_inline)] pub use str::{Str, StrVector, StrSlice, OwnedStr};
#[doc(no_inline)] pub use str::{IntoMaybeOwned, StrAllocating};
#[doc(no_inline)] pub use str::{IntoMaybeOwned, StrAllocating, UnicodeStrSlice};
#[doc(no_inline)] pub use to_str::{ToString, IntoStr};
#[doc(no_inline)] pub use tuple::{Tuple1, Tuple2, Tuple3, Tuple4};
#[doc(no_inline)] pub use tuple::{Tuple5, Tuple6, Tuple7, Tuple8};
@ -89,7 +89,6 @@
#[doc(no_inline)] pub use slice::{Vector, VectorVector};
#[doc(no_inline)] pub use slice::MutableVectorAllocating;
#[doc(no_inline)] pub use string::String;
#[doc(no_inline)] pub use unicode::{UnicodeChar, UnicodeStrSlice};
#[doc(no_inline)] pub use vec::Vec;
// Reexported runtime types

View File

@ -21,7 +21,7 @@ use os;
use result::{Ok, Err};
use str::StrSlice;
use sync::atomics;
use unicode::UnicodeChar;
use unicode::char::UnicodeChar;
pub use self::imp::write;

View File

@ -33,13 +33,9 @@
extern crate core;
pub use tables::normalization::canonical_combining_class;
// regex module
pub use tables::regex;
pub use u_char::UnicodeChar;
pub use u_str::UnicodeStrSlice;
pub use u_str::Words;
mod decompose;
mod tables;
mod u_char;
@ -66,11 +62,22 @@ pub mod char {
pub use core::char::{from_digit, escape_unicode, escape_default};
pub use core::char::{len_utf8_bytes, Char};
pub use decompose::decompose_canonical;
pub use decompose::decompose_compatible;
pub use decompose::{decompose_canonical, decompose_compatible};
pub use tables::normalization::canonical_combining_class;
pub use u_char::{is_alphabetic, is_XID_start, is_XID_continue};
pub use u_char::{is_lowercase, is_uppercase, is_whitespace};
pub use u_char::{is_alphanumeric, is_control, is_digit};
pub use u_char::{to_uppercase, to_lowercase, width, UnicodeChar};
}
pub mod str {
pub use u_str::{UnicodeStrSlice, Words, Graphemes, GraphemeIndices};
}
// this lets us use #[deriving(Clone)]
mod std {
pub use core::clone;
pub use core::cmp;
}

View File

@ -25,13 +25,178 @@ fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
pub mod general_category {
pub static C_table: &'static [(char, char)] = &[
('\x00', '\x1f'), ('\x7f', '\x9f'), ('\xad', '\xad'), ('\u0600', '\u0605'), ('\u061c',
'\u061c'), ('\u06dd', '\u06dd'), ('\u070f', '\u070f'), ('\u180e', '\u180e'), ('\u200b',
'\u200f'), ('\u202a', '\u202e'), ('\u2060', '\u2064'), ('\u2066', '\u206f'), ('\ue000',
'\ue000'), ('\uf8ff', '\uf8ff'), ('\ufeff', '\ufeff'), ('\ufff9', '\ufffb'), ('\U000110bd',
'\U000110bd'), ('\U0001bca0', '\U0001bca3'), ('\U0001d173', '\U0001d17a'), ('\U000e0001',
'\U000e0001'), ('\U000e0020', '\U000e007f'), ('\U000f0000', '\U000f0000'), ('\U000ffffd',
'\U000ffffd'), ('\U00100000', '\U00100000'), ('\U0010fffd', '\U0010fffd')
('\x00', '\x1f'), ('\x7f', '\x9f'), ('\xad', '\xad'), ('\u0378', '\u0379'), ('\u0380',
'\u0383'), ('\u038b', '\u038b'), ('\u038d', '\u038d'), ('\u03a2', '\u03a2'), ('\u0530',
'\u0530'), ('\u0557', '\u0558'), ('\u0560', '\u0560'), ('\u0588', '\u0588'), ('\u058b',
'\u058c'), ('\u0590', '\u0590'), ('\u05c8', '\u05cf'), ('\u05eb', '\u05ef'), ('\u05f5',
'\u0605'), ('\u061c', '\u061d'), ('\u06dd', '\u06dd'), ('\u070e', '\u070f'), ('\u074b',
'\u074c'), ('\u07b2', '\u07bf'), ('\u07fb', '\u07ff'), ('\u082e', '\u082f'), ('\u083f',
'\u083f'), ('\u085c', '\u085d'), ('\u085f', '\u089f'), ('\u08b3', '\u08e3'), ('\u0984',
'\u0984'), ('\u098d', '\u098e'), ('\u0991', '\u0992'), ('\u09a9', '\u09a9'), ('\u09b1',
'\u09b1'), ('\u09b3', '\u09b5'), ('\u09ba', '\u09bb'), ('\u09c5', '\u09c6'), ('\u09c9',
'\u09ca'), ('\u09cf', '\u09d6'), ('\u09d8', '\u09db'), ('\u09de', '\u09de'), ('\u09e4',
'\u09e5'), ('\u09fc', '\u0a00'), ('\u0a04', '\u0a04'), ('\u0a0b', '\u0a0e'), ('\u0a11',
'\u0a12'), ('\u0a29', '\u0a29'), ('\u0a31', '\u0a31'), ('\u0a34', '\u0a34'), ('\u0a37',
'\u0a37'), ('\u0a3a', '\u0a3b'), ('\u0a3d', '\u0a3d'), ('\u0a43', '\u0a46'), ('\u0a49',
'\u0a4a'), ('\u0a4e', '\u0a50'), ('\u0a52', '\u0a58'), ('\u0a5d', '\u0a5d'), ('\u0a5f',
'\u0a65'), ('\u0a76', '\u0a80'), ('\u0a84', '\u0a84'), ('\u0a8e', '\u0a8e'), ('\u0a92',
'\u0a92'), ('\u0aa9', '\u0aa9'), ('\u0ab1', '\u0ab1'), ('\u0ab4', '\u0ab4'), ('\u0aba',
'\u0abb'), ('\u0ac6', '\u0ac6'), ('\u0aca', '\u0aca'), ('\u0ace', '\u0acf'), ('\u0ad1',
'\u0adf'), ('\u0ae4', '\u0ae5'), ('\u0af2', '\u0b00'), ('\u0b04', '\u0b04'), ('\u0b0d',
'\u0b0e'), ('\u0b11', '\u0b12'), ('\u0b29', '\u0b29'), ('\u0b31', '\u0b31'), ('\u0b34',
'\u0b34'), ('\u0b3a', '\u0b3b'), ('\u0b45', '\u0b46'), ('\u0b49', '\u0b4a'), ('\u0b4e',
'\u0b55'), ('\u0b58', '\u0b5b'), ('\u0b5e', '\u0b5e'), ('\u0b64', '\u0b65'), ('\u0b78',
'\u0b81'), ('\u0b84', '\u0b84'), ('\u0b8b', '\u0b8d'), ('\u0b91', '\u0b91'), ('\u0b96',
'\u0b98'), ('\u0b9b', '\u0b9b'), ('\u0b9d', '\u0b9d'), ('\u0ba0', '\u0ba2'), ('\u0ba5',
'\u0ba7'), ('\u0bab', '\u0bad'), ('\u0bba', '\u0bbd'), ('\u0bc3', '\u0bc5'), ('\u0bc9',
'\u0bc9'), ('\u0bce', '\u0bcf'), ('\u0bd1', '\u0bd6'), ('\u0bd8', '\u0be5'), ('\u0bfb',
'\u0bff'), ('\u0c04', '\u0c04'), ('\u0c0d', '\u0c0d'), ('\u0c11', '\u0c11'), ('\u0c29',
'\u0c29'), ('\u0c3a', '\u0c3c'), ('\u0c45', '\u0c45'), ('\u0c49', '\u0c49'), ('\u0c4e',
'\u0c54'), ('\u0c57', '\u0c57'), ('\u0c5a', '\u0c5f'), ('\u0c64', '\u0c65'), ('\u0c70',
'\u0c77'), ('\u0c80', '\u0c80'), ('\u0c84', '\u0c84'), ('\u0c8d', '\u0c8d'), ('\u0c91',
'\u0c91'), ('\u0ca9', '\u0ca9'), ('\u0cb4', '\u0cb4'), ('\u0cba', '\u0cbb'), ('\u0cc5',
'\u0cc5'), ('\u0cc9', '\u0cc9'), ('\u0cce', '\u0cd4'), ('\u0cd7', '\u0cdd'), ('\u0cdf',
'\u0cdf'), ('\u0ce4', '\u0ce5'), ('\u0cf0', '\u0cf0'), ('\u0cf3', '\u0d00'), ('\u0d04',
'\u0d04'), ('\u0d0d', '\u0d0d'), ('\u0d11', '\u0d11'), ('\u0d3b', '\u0d3c'), ('\u0d45',
'\u0d45'), ('\u0d49', '\u0d49'), ('\u0d4f', '\u0d56'), ('\u0d58', '\u0d5f'), ('\u0d64',
'\u0d65'), ('\u0d76', '\u0d78'), ('\u0d80', '\u0d81'), ('\u0d84', '\u0d84'), ('\u0d97',
'\u0d99'), ('\u0db2', '\u0db2'), ('\u0dbc', '\u0dbc'), ('\u0dbe', '\u0dbf'), ('\u0dc7',
'\u0dc9'), ('\u0dcb', '\u0dce'), ('\u0dd5', '\u0dd5'), ('\u0dd7', '\u0dd7'), ('\u0de0',
'\u0de5'), ('\u0df0', '\u0df1'), ('\u0df5', '\u0e00'), ('\u0e3b', '\u0e3e'), ('\u0e5c',
'\u0e80'), ('\u0e83', '\u0e83'), ('\u0e85', '\u0e86'), ('\u0e89', '\u0e89'), ('\u0e8b',
'\u0e8c'), ('\u0e8e', '\u0e93'), ('\u0e98', '\u0e98'), ('\u0ea0', '\u0ea0'), ('\u0ea4',
'\u0ea4'), ('\u0ea6', '\u0ea6'), ('\u0ea8', '\u0ea9'), ('\u0eac', '\u0eac'), ('\u0eba',
'\u0eba'), ('\u0ebe', '\u0ebf'), ('\u0ec5', '\u0ec5'), ('\u0ec7', '\u0ec7'), ('\u0ece',
'\u0ecf'), ('\u0eda', '\u0edb'), ('\u0ee0', '\u0eff'), ('\u0f48', '\u0f48'), ('\u0f6d',
'\u0f70'), ('\u0f98', '\u0f98'), ('\u0fbd', '\u0fbd'), ('\u0fcd', '\u0fcd'), ('\u0fdb',
'\u0fff'), ('\u10c6', '\u10c6'), ('\u10c8', '\u10cc'), ('\u10ce', '\u10cf'), ('\u1249',
'\u1249'), ('\u124e', '\u124f'), ('\u1257', '\u1257'), ('\u1259', '\u1259'), ('\u125e',
'\u125f'), ('\u1289', '\u1289'), ('\u128e', '\u128f'), ('\u12b1', '\u12b1'), ('\u12b6',
'\u12b7'), ('\u12bf', '\u12bf'), ('\u12c1', '\u12c1'), ('\u12c6', '\u12c7'), ('\u12d7',
'\u12d7'), ('\u1311', '\u1311'), ('\u1316', '\u1317'), ('\u135b', '\u135c'), ('\u137d',
'\u137f'), ('\u139a', '\u139f'), ('\u13f5', '\u13ff'), ('\u169d', '\u169f'), ('\u16f9',
'\u16ff'), ('\u170d', '\u170d'), ('\u1715', '\u171f'), ('\u1737', '\u173f'), ('\u1754',
'\u175f'), ('\u176d', '\u176d'), ('\u1771', '\u1771'), ('\u1774', '\u177f'), ('\u17de',
'\u17df'), ('\u17ea', '\u17ef'), ('\u17fa', '\u17ff'), ('\u180e', '\u180f'), ('\u181a',
'\u181f'), ('\u1878', '\u187f'), ('\u18ab', '\u18af'), ('\u18f6', '\u18ff'), ('\u191f',
'\u191f'), ('\u192c', '\u192f'), ('\u193c', '\u193f'), ('\u1941', '\u1943'), ('\u196e',
'\u196f'), ('\u1975', '\u197f'), ('\u19ac', '\u19af'), ('\u19ca', '\u19cf'), ('\u19db',
'\u19dd'), ('\u1a1c', '\u1a1d'), ('\u1a5f', '\u1a5f'), ('\u1a7d', '\u1a7e'), ('\u1a8a',
'\u1a8f'), ('\u1a9a', '\u1a9f'), ('\u1aae', '\u1aaf'), ('\u1abf', '\u1aff'), ('\u1b4c',
'\u1b4f'), ('\u1b7d', '\u1b7f'), ('\u1bf4', '\u1bfb'), ('\u1c38', '\u1c3a'), ('\u1c4a',
'\u1c4c'), ('\u1c80', '\u1cbf'), ('\u1cc8', '\u1ccf'), ('\u1cf7', '\u1cf7'), ('\u1cfa',
'\u1cff'), ('\u1df6', '\u1dfb'), ('\u1f16', '\u1f17'), ('\u1f1e', '\u1f1f'), ('\u1f46',
'\u1f47'), ('\u1f4e', '\u1f4f'), ('\u1f58', '\u1f58'), ('\u1f5a', '\u1f5a'), ('\u1f5c',
'\u1f5c'), ('\u1f5e', '\u1f5e'), ('\u1f7e', '\u1f7f'), ('\u1fb5', '\u1fb5'), ('\u1fc5',
'\u1fc5'), ('\u1fd4', '\u1fd5'), ('\u1fdc', '\u1fdc'), ('\u1ff0', '\u1ff1'), ('\u1ff5',
'\u1ff5'), ('\u1fff', '\u1fff'), ('\u200b', '\u200f'), ('\u202a', '\u202e'), ('\u2060',
'\u206f'), ('\u2072', '\u2073'), ('\u208f', '\u208f'), ('\u209d', '\u209f'), ('\u20be',
'\u20cf'), ('\u20f1', '\u20ff'), ('\u218a', '\u218f'), ('\u23fb', '\u23ff'), ('\u2427',
'\u243f'), ('\u244b', '\u245f'), ('\u2b74', '\u2b75'), ('\u2b96', '\u2b97'), ('\u2bba',
'\u2bbc'), ('\u2bc9', '\u2bc9'), ('\u2bd2', '\u2bff'), ('\u2c2f', '\u2c2f'), ('\u2c5f',
'\u2c5f'), ('\u2cf4', '\u2cf8'), ('\u2d26', '\u2d26'), ('\u2d28', '\u2d2c'), ('\u2d2e',
'\u2d2f'), ('\u2d68', '\u2d6e'), ('\u2d71', '\u2d7e'), ('\u2d97', '\u2d9f'), ('\u2da7',
'\u2da7'), ('\u2daf', '\u2daf'), ('\u2db7', '\u2db7'), ('\u2dbf', '\u2dbf'), ('\u2dc7',
'\u2dc7'), ('\u2dcf', '\u2dcf'), ('\u2dd7', '\u2dd7'), ('\u2ddf', '\u2ddf'), ('\u2e43',
'\u2e7f'), ('\u2e9a', '\u2e9a'), ('\u2ef4', '\u2eff'), ('\u2fd6', '\u2fef'), ('\u2ffc',
'\u2fff'), ('\u3040', '\u3040'), ('\u3097', '\u3098'), ('\u3100', '\u3104'), ('\u312e',
'\u3130'), ('\u318f', '\u318f'), ('\u31bb', '\u31bf'), ('\u31e4', '\u31ef'), ('\u321f',
'\u321f'), ('\u32ff', '\u32ff'), ('\u3401', '\u4db4'), ('\u4db6', '\u4dbf'), ('\u4e01',
'\u9fcb'), ('\u9fcd', '\u9fff'), ('\ua48d', '\ua48f'), ('\ua4c7', '\ua4cf'), ('\ua62c',
'\ua63f'), ('\ua69e', '\ua69e'), ('\ua6f8', '\ua6ff'), ('\ua78f', '\ua78f'), ('\ua7ae',
'\ua7af'), ('\ua7b2', '\ua7f6'), ('\ua82c', '\ua82f'), ('\ua83a', '\ua83f'), ('\ua878',
'\ua87f'), ('\ua8c5', '\ua8cd'), ('\ua8da', '\ua8df'), ('\ua8fc', '\ua8ff'), ('\ua954',
'\ua95e'), ('\ua97d', '\ua97f'), ('\ua9ce', '\ua9ce'), ('\ua9da', '\ua9dd'), ('\ua9ff',
'\ua9ff'), ('\uaa37', '\uaa3f'), ('\uaa4e', '\uaa4f'), ('\uaa5a', '\uaa5b'), ('\uaac3',
'\uaada'), ('\uaaf7', '\uab00'), ('\uab07', '\uab08'), ('\uab0f', '\uab10'), ('\uab17',
'\uab1f'), ('\uab27', '\uab27'), ('\uab2f', '\uab2f'), ('\uab60', '\uab63'), ('\uab66',
'\uabbf'), ('\uabee', '\uabef'), ('\uabfa', '\uabff'), ('\uac01', '\ud7a2'), ('\ud7a4',
'\ud7af'), ('\ud7c7', '\ud7ca'), ('\ud7fc', '\ud7ff'), ('\ue000', '\uf8ff'), ('\ufa6e',
'\ufa6f'), ('\ufada', '\ufaff'), ('\ufb07', '\ufb12'), ('\ufb18', '\ufb1c'), ('\ufb37',
'\ufb37'), ('\ufb3d', '\ufb3d'), ('\ufb3f', '\ufb3f'), ('\ufb42', '\ufb42'), ('\ufb45',
'\ufb45'), ('\ufbc2', '\ufbd2'), ('\ufd40', '\ufd4f'), ('\ufd90', '\ufd91'), ('\ufdc8',
'\ufdef'), ('\ufdfe', '\ufdff'), ('\ufe1a', '\ufe1f'), ('\ufe2e', '\ufe2f'), ('\ufe53',
'\ufe53'), ('\ufe67', '\ufe67'), ('\ufe6c', '\ufe6f'), ('\ufe75', '\ufe75'), ('\ufefd',
'\uff00'), ('\uffbf', '\uffc1'), ('\uffc8', '\uffc9'), ('\uffd0', '\uffd1'), ('\uffd8',
'\uffd9'), ('\uffdd', '\uffdf'), ('\uffe7', '\uffe7'), ('\uffef', '\ufffb'), ('\ufffe',
'\uffff'), ('\U0001000c', '\U0001000c'), ('\U00010027', '\U00010027'), ('\U0001003b',
'\U0001003b'), ('\U0001003e', '\U0001003e'), ('\U0001004e', '\U0001004f'), ('\U0001005e',
'\U0001007f'), ('\U000100fb', '\U000100ff'), ('\U00010103', '\U00010106'), ('\U00010134',
'\U00010136'), ('\U0001018d', '\U0001018f'), ('\U0001019c', '\U0001019f'), ('\U000101a1',
'\U000101cf'), ('\U000101fe', '\U0001027f'), ('\U0001029d', '\U0001029f'), ('\U000102d1',
'\U000102df'), ('\U000102fc', '\U000102ff'), ('\U00010324', '\U0001032f'), ('\U0001034b',
'\U0001034f'), ('\U0001037b', '\U0001037f'), ('\U0001039e', '\U0001039e'), ('\U000103c4',
'\U000103c7'), ('\U000103d6', '\U000103ff'), ('\U0001049e', '\U0001049f'), ('\U000104aa',
'\U000104ff'), ('\U00010528', '\U0001052f'), ('\U00010564', '\U0001056e'), ('\U00010570',
'\U000105ff'), ('\U00010737', '\U0001073f'), ('\U00010756', '\U0001075f'), ('\U00010768',
'\U000107ff'), ('\U00010806', '\U00010807'), ('\U00010809', '\U00010809'), ('\U00010836',
'\U00010836'), ('\U00010839', '\U0001083b'), ('\U0001083d', '\U0001083e'), ('\U00010856',
'\U00010856'), ('\U0001089f', '\U000108a6'), ('\U000108b0', '\U000108ff'), ('\U0001091c',
'\U0001091e'), ('\U0001093a', '\U0001093e'), ('\U00010940', '\U0001097f'), ('\U000109b8',
'\U000109bd'), ('\U000109c0', '\U000109ff'), ('\U00010a04', '\U00010a04'), ('\U00010a07',
'\U00010a0b'), ('\U00010a14', '\U00010a14'), ('\U00010a18', '\U00010a18'), ('\U00010a34',
'\U00010a37'), ('\U00010a3b', '\U00010a3e'), ('\U00010a48', '\U00010a4f'), ('\U00010a59',
'\U00010a5f'), ('\U00010aa0', '\U00010abf'), ('\U00010ae7', '\U00010aea'), ('\U00010af7',
'\U00010aff'), ('\U00010b36', '\U00010b38'), ('\U00010b56', '\U00010b57'), ('\U00010b73',
'\U00010b77'), ('\U00010b92', '\U00010b98'), ('\U00010b9d', '\U00010ba8'), ('\U00010bb0',
'\U00010bff'), ('\U00010c49', '\U00010e5f'), ('\U00010e7f', '\U00010fff'), ('\U0001104e',
'\U00011051'), ('\U00011070', '\U0001107e'), ('\U000110bd', '\U000110bd'), ('\U000110c2',
'\U000110cf'), ('\U000110e9', '\U000110ef'), ('\U000110fa', '\U000110ff'), ('\U00011135',
'\U00011135'), ('\U00011144', '\U0001114f'), ('\U00011177', '\U0001117f'), ('\U000111c9',
'\U000111cc'), ('\U000111ce', '\U000111cf'), ('\U000111db', '\U000111e0'), ('\U000111f5',
'\U000111ff'), ('\U00011212', '\U00011212'), ('\U0001123e', '\U000112af'), ('\U000112eb',
'\U000112ef'), ('\U000112fa', '\U00011300'), ('\U00011304', '\U00011304'), ('\U0001130d',
'\U0001130e'), ('\U00011311', '\U00011312'), ('\U00011329', '\U00011329'), ('\U00011331',
'\U00011331'), ('\U00011334', '\U00011334'), ('\U0001133a', '\U0001133b'), ('\U00011345',
'\U00011346'), ('\U00011349', '\U0001134a'), ('\U0001134e', '\U00011356'), ('\U00011358',
'\U0001135c'), ('\U00011364', '\U00011365'), ('\U0001136d', '\U0001136f'), ('\U00011375',
'\U0001147f'), ('\U000114c8', '\U000114cf'), ('\U000114da', '\U0001157f'), ('\U000115b6',
'\U000115b7'), ('\U000115ca', '\U000115ff'), ('\U00011645', '\U0001164f'), ('\U0001165a',
'\U0001167f'), ('\U000116b8', '\U000116bf'), ('\U000116ca', '\U0001189f'), ('\U000118f3',
'\U000118fe'), ('\U00011900', '\U00011abf'), ('\U00011af9', '\U00011fff'), ('\U00012399',
'\U000123ff'), ('\U0001246f', '\U0001246f'), ('\U00012475', '\U00012fff'), ('\U0001342f',
'\U000167ff'), ('\U00016a39', '\U00016a3f'), ('\U00016a5f', '\U00016a5f'), ('\U00016a6a',
'\U00016a6d'), ('\U00016a70', '\U00016acf'), ('\U00016aee', '\U00016aef'), ('\U00016af6',
'\U00016aff'), ('\U00016b46', '\U00016b4f'), ('\U00016b5a', '\U00016b5a'), ('\U00016b62',
'\U00016b62'), ('\U00016b78', '\U00016b7c'), ('\U00016b90', '\U00016eff'), ('\U00016f45',
'\U00016f4f'), ('\U00016f7f', '\U00016f8e'), ('\U00016fa0', '\U0001afff'), ('\U0001b002',
'\U0001bbff'), ('\U0001bc6b', '\U0001bc6f'), ('\U0001bc7d', '\U0001bc7f'), ('\U0001bc89',
'\U0001bc8f'), ('\U0001bc9a', '\U0001bc9b'), ('\U0001bca0', '\U0001cfff'), ('\U0001d0f6',
'\U0001d0ff'), ('\U0001d127', '\U0001d128'), ('\U0001d173', '\U0001d17a'), ('\U0001d1de',
'\U0001d1ff'), ('\U0001d246', '\U0001d2ff'), ('\U0001d357', '\U0001d35f'), ('\U0001d372',
'\U0001d3ff'), ('\U0001d455', '\U0001d455'), ('\U0001d49d', '\U0001d49d'), ('\U0001d4a0',
'\U0001d4a1'), ('\U0001d4a3', '\U0001d4a4'), ('\U0001d4a7', '\U0001d4a8'), ('\U0001d4ad',
'\U0001d4ad'), ('\U0001d4ba', '\U0001d4ba'), ('\U0001d4bc', '\U0001d4bc'), ('\U0001d4c4',
'\U0001d4c4'), ('\U0001d506', '\U0001d506'), ('\U0001d50b', '\U0001d50c'), ('\U0001d515',
'\U0001d515'), ('\U0001d51d', '\U0001d51d'), ('\U0001d53a', '\U0001d53a'), ('\U0001d53f',
'\U0001d53f'), ('\U0001d545', '\U0001d545'), ('\U0001d547', '\U0001d549'), ('\U0001d551',
'\U0001d551'), ('\U0001d6a6', '\U0001d6a7'), ('\U0001d7cc', '\U0001d7cd'), ('\U0001d800',
'\U0001e7ff'), ('\U0001e8c5', '\U0001e8c6'), ('\U0001e8d7', '\U0001edff'), ('\U0001ee04',
'\U0001ee04'), ('\U0001ee20', '\U0001ee20'), ('\U0001ee23', '\U0001ee23'), ('\U0001ee25',
'\U0001ee26'), ('\U0001ee28', '\U0001ee28'), ('\U0001ee33', '\U0001ee33'), ('\U0001ee38',
'\U0001ee38'), ('\U0001ee3a', '\U0001ee3a'), ('\U0001ee3c', '\U0001ee41'), ('\U0001ee43',
'\U0001ee46'), ('\U0001ee48', '\U0001ee48'), ('\U0001ee4a', '\U0001ee4a'), ('\U0001ee4c',
'\U0001ee4c'), ('\U0001ee50', '\U0001ee50'), ('\U0001ee53', '\U0001ee53'), ('\U0001ee55',
'\U0001ee56'), ('\U0001ee58', '\U0001ee58'), ('\U0001ee5a', '\U0001ee5a'), ('\U0001ee5c',
'\U0001ee5c'), ('\U0001ee5e', '\U0001ee5e'), ('\U0001ee60', '\U0001ee60'), ('\U0001ee63',
'\U0001ee63'), ('\U0001ee65', '\U0001ee66'), ('\U0001ee6b', '\U0001ee6b'), ('\U0001ee73',
'\U0001ee73'), ('\U0001ee78', '\U0001ee78'), ('\U0001ee7d', '\U0001ee7d'), ('\U0001ee7f',
'\U0001ee7f'), ('\U0001ee8a', '\U0001ee8a'), ('\U0001ee9c', '\U0001eea0'), ('\U0001eea4',
'\U0001eea4'), ('\U0001eeaa', '\U0001eeaa'), ('\U0001eebc', '\U0001eeef'), ('\U0001eef2',
'\U0001efff'), ('\U0001f02c', '\U0001f02f'), ('\U0001f094', '\U0001f09f'), ('\U0001f0af',
'\U0001f0b0'), ('\U0001f0c0', '\U0001f0c0'), ('\U0001f0d0', '\U0001f0d0'), ('\U0001f0f6',
'\U0001f0ff'), ('\U0001f10d', '\U0001f10f'), ('\U0001f12f', '\U0001f12f'), ('\U0001f16c',
'\U0001f16f'), ('\U0001f19b', '\U0001f1e5'), ('\U0001f203', '\U0001f20f'), ('\U0001f23b',
'\U0001f23f'), ('\U0001f249', '\U0001f24f'), ('\U0001f252', '\U0001f2ff'), ('\U0001f32d',
'\U0001f32f'), ('\U0001f37e', '\U0001f37f'), ('\U0001f3cf', '\U0001f3d3'), ('\U0001f3f8',
'\U0001f3ff'), ('\U0001f4ff', '\U0001f4ff'), ('\U0001f54b', '\U0001f54f'), ('\U0001f57a',
'\U0001f57a'), ('\U0001f5a4', '\U0001f5a4'), ('\U0001f643', '\U0001f644'), ('\U0001f6d0',
'\U0001f6df'), ('\U0001f6ed', '\U0001f6ef'), ('\U0001f6f4', '\U0001f6ff'), ('\U0001f774',
'\U0001f77f'), ('\U0001f7d5', '\U0001f7ff'), ('\U0001f80c', '\U0001f80f'), ('\U0001f848',
'\U0001f84f'), ('\U0001f85a', '\U0001f85f'), ('\U0001f888', '\U0001f88f'), ('\U0001f8ae',
'\U0001ffff'), ('\U00020001', '\U0002a6d5'), ('\U0002a6d7', '\U0002a6ff'), ('\U0002a701',
'\U0002b733'), ('\U0002b735', '\U0002b73f'), ('\U0002b741', '\U0002b81c'), ('\U0002b81e',
'\U0002f7ff'), ('\U0002fa1e', '\U000e00ff'), ('\U000e01f0', '\U0010ffff')
];
pub static Cc_table: &'static [(char, char)] = &[
@ -50,6 +215,181 @@ pub mod general_category {
('\U000e0001', '\U000e0001'), ('\U000e0020', '\U000e007f')
];
pub static Cn_table: &'static [(char, char)] = &[
('\u0378', '\u0379'), ('\u0380', '\u0383'), ('\u038b', '\u038b'), ('\u038d', '\u038d'),
('\u03a2', '\u03a2'), ('\u0530', '\u0530'), ('\u0557', '\u0558'), ('\u0560', '\u0560'),
('\u0588', '\u0588'), ('\u058b', '\u058c'), ('\u0590', '\u0590'), ('\u05c8', '\u05cf'),
('\u05eb', '\u05ef'), ('\u05f5', '\u05ff'), ('\u061d', '\u061d'), ('\u070e', '\u070e'),
('\u074b', '\u074c'), ('\u07b2', '\u07bf'), ('\u07fb', '\u07ff'), ('\u082e', '\u082f'),
('\u083f', '\u083f'), ('\u085c', '\u085d'), ('\u085f', '\u089f'), ('\u08b3', '\u08e3'),
('\u0984', '\u0984'), ('\u098d', '\u098e'), ('\u0991', '\u0992'), ('\u09a9', '\u09a9'),
('\u09b1', '\u09b1'), ('\u09b3', '\u09b5'), ('\u09ba', '\u09bb'), ('\u09c5', '\u09c6'),
('\u09c9', '\u09ca'), ('\u09cf', '\u09d6'), ('\u09d8', '\u09db'), ('\u09de', '\u09de'),
('\u09e4', '\u09e5'), ('\u09fc', '\u0a00'), ('\u0a04', '\u0a04'), ('\u0a0b', '\u0a0e'),
('\u0a11', '\u0a12'), ('\u0a29', '\u0a29'), ('\u0a31', '\u0a31'), ('\u0a34', '\u0a34'),
('\u0a37', '\u0a37'), ('\u0a3a', '\u0a3b'), ('\u0a3d', '\u0a3d'), ('\u0a43', '\u0a46'),
('\u0a49', '\u0a4a'), ('\u0a4e', '\u0a50'), ('\u0a52', '\u0a58'), ('\u0a5d', '\u0a5d'),
('\u0a5f', '\u0a65'), ('\u0a76', '\u0a80'), ('\u0a84', '\u0a84'), ('\u0a8e', '\u0a8e'),
('\u0a92', '\u0a92'), ('\u0aa9', '\u0aa9'), ('\u0ab1', '\u0ab1'), ('\u0ab4', '\u0ab4'),
('\u0aba', '\u0abb'), ('\u0ac6', '\u0ac6'), ('\u0aca', '\u0aca'), ('\u0ace', '\u0acf'),
('\u0ad1', '\u0adf'), ('\u0ae4', '\u0ae5'), ('\u0af2', '\u0b00'), ('\u0b04', '\u0b04'),
('\u0b0d', '\u0b0e'), ('\u0b11', '\u0b12'), ('\u0b29', '\u0b29'), ('\u0b31', '\u0b31'),
('\u0b34', '\u0b34'), ('\u0b3a', '\u0b3b'), ('\u0b45', '\u0b46'), ('\u0b49', '\u0b4a'),
('\u0b4e', '\u0b55'), ('\u0b58', '\u0b5b'), ('\u0b5e', '\u0b5e'), ('\u0b64', '\u0b65'),
('\u0b78', '\u0b81'), ('\u0b84', '\u0b84'), ('\u0b8b', '\u0b8d'), ('\u0b91', '\u0b91'),
('\u0b96', '\u0b98'), ('\u0b9b', '\u0b9b'), ('\u0b9d', '\u0b9d'), ('\u0ba0', '\u0ba2'),
('\u0ba5', '\u0ba7'), ('\u0bab', '\u0bad'), ('\u0bba', '\u0bbd'), ('\u0bc3', '\u0bc5'),
('\u0bc9', '\u0bc9'), ('\u0bce', '\u0bcf'), ('\u0bd1', '\u0bd6'), ('\u0bd8', '\u0be5'),
('\u0bfb', '\u0bff'), ('\u0c04', '\u0c04'), ('\u0c0d', '\u0c0d'), ('\u0c11', '\u0c11'),
('\u0c29', '\u0c29'), ('\u0c3a', '\u0c3c'), ('\u0c45', '\u0c45'), ('\u0c49', '\u0c49'),
('\u0c4e', '\u0c54'), ('\u0c57', '\u0c57'), ('\u0c5a', '\u0c5f'), ('\u0c64', '\u0c65'),
('\u0c70', '\u0c77'), ('\u0c80', '\u0c80'), ('\u0c84', '\u0c84'), ('\u0c8d', '\u0c8d'),
('\u0c91', '\u0c91'), ('\u0ca9', '\u0ca9'), ('\u0cb4', '\u0cb4'), ('\u0cba', '\u0cbb'),
('\u0cc5', '\u0cc5'), ('\u0cc9', '\u0cc9'), ('\u0cce', '\u0cd4'), ('\u0cd7', '\u0cdd'),
('\u0cdf', '\u0cdf'), ('\u0ce4', '\u0ce5'), ('\u0cf0', '\u0cf0'), ('\u0cf3', '\u0d00'),
('\u0d04', '\u0d04'), ('\u0d0d', '\u0d0d'), ('\u0d11', '\u0d11'), ('\u0d3b', '\u0d3c'),
('\u0d45', '\u0d45'), ('\u0d49', '\u0d49'), ('\u0d4f', '\u0d56'), ('\u0d58', '\u0d5f'),
('\u0d64', '\u0d65'), ('\u0d76', '\u0d78'), ('\u0d80', '\u0d81'), ('\u0d84', '\u0d84'),
('\u0d97', '\u0d99'), ('\u0db2', '\u0db2'), ('\u0dbc', '\u0dbc'), ('\u0dbe', '\u0dbf'),
('\u0dc7', '\u0dc9'), ('\u0dcb', '\u0dce'), ('\u0dd5', '\u0dd5'), ('\u0dd7', '\u0dd7'),
('\u0de0', '\u0de5'), ('\u0df0', '\u0df1'), ('\u0df5', '\u0e00'), ('\u0e3b', '\u0e3e'),
('\u0e5c', '\u0e80'), ('\u0e83', '\u0e83'), ('\u0e85', '\u0e86'), ('\u0e89', '\u0e89'),
('\u0e8b', '\u0e8c'), ('\u0e8e', '\u0e93'), ('\u0e98', '\u0e98'), ('\u0ea0', '\u0ea0'),
('\u0ea4', '\u0ea4'), ('\u0ea6', '\u0ea6'), ('\u0ea8', '\u0ea9'), ('\u0eac', '\u0eac'),
('\u0eba', '\u0eba'), ('\u0ebe', '\u0ebf'), ('\u0ec5', '\u0ec5'), ('\u0ec7', '\u0ec7'),
('\u0ece', '\u0ecf'), ('\u0eda', '\u0edb'), ('\u0ee0', '\u0eff'), ('\u0f48', '\u0f48'),
('\u0f6d', '\u0f70'), ('\u0f98', '\u0f98'), ('\u0fbd', '\u0fbd'), ('\u0fcd', '\u0fcd'),
('\u0fdb', '\u0fff'), ('\u10c6', '\u10c6'), ('\u10c8', '\u10cc'), ('\u10ce', '\u10cf'),
('\u1249', '\u1249'), ('\u124e', '\u124f'), ('\u1257', '\u1257'), ('\u1259', '\u1259'),
('\u125e', '\u125f'), ('\u1289', '\u1289'), ('\u128e', '\u128f'), ('\u12b1', '\u12b1'),
('\u12b6', '\u12b7'), ('\u12bf', '\u12bf'), ('\u12c1', '\u12c1'), ('\u12c6', '\u12c7'),
('\u12d7', '\u12d7'), ('\u1311', '\u1311'), ('\u1316', '\u1317'), ('\u135b', '\u135c'),
('\u137d', '\u137f'), ('\u139a', '\u139f'), ('\u13f5', '\u13ff'), ('\u169d', '\u169f'),
('\u16f9', '\u16ff'), ('\u170d', '\u170d'), ('\u1715', '\u171f'), ('\u1737', '\u173f'),
('\u1754', '\u175f'), ('\u176d', '\u176d'), ('\u1771', '\u1771'), ('\u1774', '\u177f'),
('\u17de', '\u17df'), ('\u17ea', '\u17ef'), ('\u17fa', '\u17ff'), ('\u180f', '\u180f'),
('\u181a', '\u181f'), ('\u1878', '\u187f'), ('\u18ab', '\u18af'), ('\u18f6', '\u18ff'),
('\u191f', '\u191f'), ('\u192c', '\u192f'), ('\u193c', '\u193f'), ('\u1941', '\u1943'),
('\u196e', '\u196f'), ('\u1975', '\u197f'), ('\u19ac', '\u19af'), ('\u19ca', '\u19cf'),
('\u19db', '\u19dd'), ('\u1a1c', '\u1a1d'), ('\u1a5f', '\u1a5f'), ('\u1a7d', '\u1a7e'),
('\u1a8a', '\u1a8f'), ('\u1a9a', '\u1a9f'), ('\u1aae', '\u1aaf'), ('\u1abf', '\u1aff'),
('\u1b4c', '\u1b4f'), ('\u1b7d', '\u1b7f'), ('\u1bf4', '\u1bfb'), ('\u1c38', '\u1c3a'),
('\u1c4a', '\u1c4c'), ('\u1c80', '\u1cbf'), ('\u1cc8', '\u1ccf'), ('\u1cf7', '\u1cf7'),
('\u1cfa', '\u1cff'), ('\u1df6', '\u1dfb'), ('\u1f16', '\u1f17'), ('\u1f1e', '\u1f1f'),
('\u1f46', '\u1f47'), ('\u1f4e', '\u1f4f'), ('\u1f58', '\u1f58'), ('\u1f5a', '\u1f5a'),
('\u1f5c', '\u1f5c'), ('\u1f5e', '\u1f5e'), ('\u1f7e', '\u1f7f'), ('\u1fb5', '\u1fb5'),
('\u1fc5', '\u1fc5'), ('\u1fd4', '\u1fd5'), ('\u1fdc', '\u1fdc'), ('\u1ff0', '\u1ff1'),
('\u1ff5', '\u1ff5'), ('\u1fff', '\u1fff'), ('\u2065', '\u2065'), ('\u2072', '\u2073'),
('\u208f', '\u208f'), ('\u209d', '\u209f'), ('\u20be', '\u20cf'), ('\u20f1', '\u20ff'),
('\u218a', '\u218f'), ('\u23fb', '\u23ff'), ('\u2427', '\u243f'), ('\u244b', '\u245f'),
('\u2b74', '\u2b75'), ('\u2b96', '\u2b97'), ('\u2bba', '\u2bbc'), ('\u2bc9', '\u2bc9'),
('\u2bd2', '\u2bff'), ('\u2c2f', '\u2c2f'), ('\u2c5f', '\u2c5f'), ('\u2cf4', '\u2cf8'),
('\u2d26', '\u2d26'), ('\u2d28', '\u2d2c'), ('\u2d2e', '\u2d2f'), ('\u2d68', '\u2d6e'),
('\u2d71', '\u2d7e'), ('\u2d97', '\u2d9f'), ('\u2da7', '\u2da7'), ('\u2daf', '\u2daf'),
('\u2db7', '\u2db7'), ('\u2dbf', '\u2dbf'), ('\u2dc7', '\u2dc7'), ('\u2dcf', '\u2dcf'),
('\u2dd7', '\u2dd7'), ('\u2ddf', '\u2ddf'), ('\u2e43', '\u2e7f'), ('\u2e9a', '\u2e9a'),
('\u2ef4', '\u2eff'), ('\u2fd6', '\u2fef'), ('\u2ffc', '\u2fff'), ('\u3040', '\u3040'),
('\u3097', '\u3098'), ('\u3100', '\u3104'), ('\u312e', '\u3130'), ('\u318f', '\u318f'),
('\u31bb', '\u31bf'), ('\u31e4', '\u31ef'), ('\u321f', '\u321f'), ('\u32ff', '\u32ff'),
('\u3401', '\u4db4'), ('\u4db6', '\u4dbf'), ('\u4e01', '\u9fcb'), ('\u9fcd', '\u9fff'),
('\ua48d', '\ua48f'), ('\ua4c7', '\ua4cf'), ('\ua62c', '\ua63f'), ('\ua69e', '\ua69e'),
('\ua6f8', '\ua6ff'), ('\ua78f', '\ua78f'), ('\ua7ae', '\ua7af'), ('\ua7b2', '\ua7f6'),
('\ua82c', '\ua82f'), ('\ua83a', '\ua83f'), ('\ua878', '\ua87f'), ('\ua8c5', '\ua8cd'),
('\ua8da', '\ua8df'), ('\ua8fc', '\ua8ff'), ('\ua954', '\ua95e'), ('\ua97d', '\ua97f'),
('\ua9ce', '\ua9ce'), ('\ua9da', '\ua9dd'), ('\ua9ff', '\ua9ff'), ('\uaa37', '\uaa3f'),
('\uaa4e', '\uaa4f'), ('\uaa5a', '\uaa5b'), ('\uaac3', '\uaada'), ('\uaaf7', '\uab00'),
('\uab07', '\uab08'), ('\uab0f', '\uab10'), ('\uab17', '\uab1f'), ('\uab27', '\uab27'),
('\uab2f', '\uab2f'), ('\uab60', '\uab63'), ('\uab66', '\uabbf'), ('\uabee', '\uabef'),
('\uabfa', '\uabff'), ('\uac01', '\ud7a2'), ('\ud7a4', '\ud7af'), ('\ud7c7', '\ud7ca'),
('\ud7fc', '\ud7ff'), ('\ue001', '\uf8fe'), ('\ufa6e', '\ufa6f'), ('\ufada', '\ufaff'),
('\ufb07', '\ufb12'), ('\ufb18', '\ufb1c'), ('\ufb37', '\ufb37'), ('\ufb3d', '\ufb3d'),
('\ufb3f', '\ufb3f'), ('\ufb42', '\ufb42'), ('\ufb45', '\ufb45'), ('\ufbc2', '\ufbd2'),
('\ufd40', '\ufd4f'), ('\ufd90', '\ufd91'), ('\ufdc8', '\ufdef'), ('\ufdfe', '\ufdff'),
('\ufe1a', '\ufe1f'), ('\ufe2e', '\ufe2f'), ('\ufe53', '\ufe53'), ('\ufe67', '\ufe67'),
('\ufe6c', '\ufe6f'), ('\ufe75', '\ufe75'), ('\ufefd', '\ufefe'), ('\uff00', '\uff00'),
('\uffbf', '\uffc1'), ('\uffc8', '\uffc9'), ('\uffd0', '\uffd1'), ('\uffd8', '\uffd9'),
('\uffdd', '\uffdf'), ('\uffe7', '\uffe7'), ('\uffef', '\ufff8'), ('\ufffe', '\uffff'),
('\U0001000c', '\U0001000c'), ('\U00010027', '\U00010027'), ('\U0001003b', '\U0001003b'),
('\U0001003e', '\U0001003e'), ('\U0001004e', '\U0001004f'), ('\U0001005e', '\U0001007f'),
('\U000100fb', '\U000100ff'), ('\U00010103', '\U00010106'), ('\U00010134', '\U00010136'),
('\U0001018d', '\U0001018f'), ('\U0001019c', '\U0001019f'), ('\U000101a1', '\U000101cf'),
('\U000101fe', '\U0001027f'), ('\U0001029d', '\U0001029f'), ('\U000102d1', '\U000102df'),
('\U000102fc', '\U000102ff'), ('\U00010324', '\U0001032f'), ('\U0001034b', '\U0001034f'),
('\U0001037b', '\U0001037f'), ('\U0001039e', '\U0001039e'), ('\U000103c4', '\U000103c7'),
('\U000103d6', '\U000103ff'), ('\U0001049e', '\U0001049f'), ('\U000104aa', '\U000104ff'),
('\U00010528', '\U0001052f'), ('\U00010564', '\U0001056e'), ('\U00010570', '\U000105ff'),
('\U00010737', '\U0001073f'), ('\U00010756', '\U0001075f'), ('\U00010768', '\U000107ff'),
('\U00010806', '\U00010807'), ('\U00010809', '\U00010809'), ('\U00010836', '\U00010836'),
('\U00010839', '\U0001083b'), ('\U0001083d', '\U0001083e'), ('\U00010856', '\U00010856'),
('\U0001089f', '\U000108a6'), ('\U000108b0', '\U000108ff'), ('\U0001091c', '\U0001091e'),
('\U0001093a', '\U0001093e'), ('\U00010940', '\U0001097f'), ('\U000109b8', '\U000109bd'),
('\U000109c0', '\U000109ff'), ('\U00010a04', '\U00010a04'), ('\U00010a07', '\U00010a0b'),
('\U00010a14', '\U00010a14'), ('\U00010a18', '\U00010a18'), ('\U00010a34', '\U00010a37'),
('\U00010a3b', '\U00010a3e'), ('\U00010a48', '\U00010a4f'), ('\U00010a59', '\U00010a5f'),
('\U00010aa0', '\U00010abf'), ('\U00010ae7', '\U00010aea'), ('\U00010af7', '\U00010aff'),
('\U00010b36', '\U00010b38'), ('\U00010b56', '\U00010b57'), ('\U00010b73', '\U00010b77'),
('\U00010b92', '\U00010b98'), ('\U00010b9d', '\U00010ba8'), ('\U00010bb0', '\U00010bff'),
('\U00010c49', '\U00010e5f'), ('\U00010e7f', '\U00010fff'), ('\U0001104e', '\U00011051'),
('\U00011070', '\U0001107e'), ('\U000110c2', '\U000110cf'), ('\U000110e9', '\U000110ef'),
('\U000110fa', '\U000110ff'), ('\U00011135', '\U00011135'), ('\U00011144', '\U0001114f'),
('\U00011177', '\U0001117f'), ('\U000111c9', '\U000111cc'), ('\U000111ce', '\U000111cf'),
('\U000111db', '\U000111e0'), ('\U000111f5', '\U000111ff'), ('\U00011212', '\U00011212'),
('\U0001123e', '\U000112af'), ('\U000112eb', '\U000112ef'), ('\U000112fa', '\U00011300'),
('\U00011304', '\U00011304'), ('\U0001130d', '\U0001130e'), ('\U00011311', '\U00011312'),
('\U00011329', '\U00011329'), ('\U00011331', '\U00011331'), ('\U00011334', '\U00011334'),
('\U0001133a', '\U0001133b'), ('\U00011345', '\U00011346'), ('\U00011349', '\U0001134a'),
('\U0001134e', '\U00011356'), ('\U00011358', '\U0001135c'), ('\U00011364', '\U00011365'),
('\U0001136d', '\U0001136f'), ('\U00011375', '\U0001147f'), ('\U000114c8', '\U000114cf'),
('\U000114da', '\U0001157f'), ('\U000115b6', '\U000115b7'), ('\U000115ca', '\U000115ff'),
('\U00011645', '\U0001164f'), ('\U0001165a', '\U0001167f'), ('\U000116b8', '\U000116bf'),
('\U000116ca', '\U0001189f'), ('\U000118f3', '\U000118fe'), ('\U00011900', '\U00011abf'),
('\U00011af9', '\U00011fff'), ('\U00012399', '\U000123ff'), ('\U0001246f', '\U0001246f'),
('\U00012475', '\U00012fff'), ('\U0001342f', '\U000167ff'), ('\U00016a39', '\U00016a3f'),
('\U00016a5f', '\U00016a5f'), ('\U00016a6a', '\U00016a6d'), ('\U00016a70', '\U00016acf'),
('\U00016aee', '\U00016aef'), ('\U00016af6', '\U00016aff'), ('\U00016b46', '\U00016b4f'),
('\U00016b5a', '\U00016b5a'), ('\U00016b62', '\U00016b62'), ('\U00016b78', '\U00016b7c'),
('\U00016b90', '\U00016eff'), ('\U00016f45', '\U00016f4f'), ('\U00016f7f', '\U00016f8e'),
('\U00016fa0', '\U0001afff'), ('\U0001b002', '\U0001bbff'), ('\U0001bc6b', '\U0001bc6f'),
('\U0001bc7d', '\U0001bc7f'), ('\U0001bc89', '\U0001bc8f'), ('\U0001bc9a', '\U0001bc9b'),
('\U0001bca4', '\U0001cfff'), ('\U0001d0f6', '\U0001d0ff'), ('\U0001d127', '\U0001d128'),
('\U0001d1de', '\U0001d1ff'), ('\U0001d246', '\U0001d2ff'), ('\U0001d357', '\U0001d35f'),
('\U0001d372', '\U0001d3ff'), ('\U0001d455', '\U0001d455'), ('\U0001d49d', '\U0001d49d'),
('\U0001d4a0', '\U0001d4a1'), ('\U0001d4a3', '\U0001d4a4'), ('\U0001d4a7', '\U0001d4a8'),
('\U0001d4ad', '\U0001d4ad'), ('\U0001d4ba', '\U0001d4ba'), ('\U0001d4bc', '\U0001d4bc'),
('\U0001d4c4', '\U0001d4c4'), ('\U0001d506', '\U0001d506'), ('\U0001d50b', '\U0001d50c'),
('\U0001d515', '\U0001d515'), ('\U0001d51d', '\U0001d51d'), ('\U0001d53a', '\U0001d53a'),
('\U0001d53f', '\U0001d53f'), ('\U0001d545', '\U0001d545'), ('\U0001d547', '\U0001d549'),
('\U0001d551', '\U0001d551'), ('\U0001d6a6', '\U0001d6a7'), ('\U0001d7cc', '\U0001d7cd'),
('\U0001d800', '\U0001e7ff'), ('\U0001e8c5', '\U0001e8c6'), ('\U0001e8d7', '\U0001edff'),
('\U0001ee04', '\U0001ee04'), ('\U0001ee20', '\U0001ee20'), ('\U0001ee23', '\U0001ee23'),
('\U0001ee25', '\U0001ee26'), ('\U0001ee28', '\U0001ee28'), ('\U0001ee33', '\U0001ee33'),
('\U0001ee38', '\U0001ee38'), ('\U0001ee3a', '\U0001ee3a'), ('\U0001ee3c', '\U0001ee41'),
('\U0001ee43', '\U0001ee46'), ('\U0001ee48', '\U0001ee48'), ('\U0001ee4a', '\U0001ee4a'),
('\U0001ee4c', '\U0001ee4c'), ('\U0001ee50', '\U0001ee50'), ('\U0001ee53', '\U0001ee53'),
('\U0001ee55', '\U0001ee56'), ('\U0001ee58', '\U0001ee58'), ('\U0001ee5a', '\U0001ee5a'),
('\U0001ee5c', '\U0001ee5c'), ('\U0001ee5e', '\U0001ee5e'), ('\U0001ee60', '\U0001ee60'),
('\U0001ee63', '\U0001ee63'), ('\U0001ee65', '\U0001ee66'), ('\U0001ee6b', '\U0001ee6b'),
('\U0001ee73', '\U0001ee73'), ('\U0001ee78', '\U0001ee78'), ('\U0001ee7d', '\U0001ee7d'),
('\U0001ee7f', '\U0001ee7f'), ('\U0001ee8a', '\U0001ee8a'), ('\U0001ee9c', '\U0001eea0'),
('\U0001eea4', '\U0001eea4'), ('\U0001eeaa', '\U0001eeaa'), ('\U0001eebc', '\U0001eeef'),
('\U0001eef2', '\U0001efff'), ('\U0001f02c', '\U0001f02f'), ('\U0001f094', '\U0001f09f'),
('\U0001f0af', '\U0001f0b0'), ('\U0001f0c0', '\U0001f0c0'), ('\U0001f0d0', '\U0001f0d0'),
('\U0001f0f6', '\U0001f0ff'), ('\U0001f10d', '\U0001f10f'), ('\U0001f12f', '\U0001f12f'),
('\U0001f16c', '\U0001f16f'), ('\U0001f19b', '\U0001f1e5'), ('\U0001f203', '\U0001f20f'),
('\U0001f23b', '\U0001f23f'), ('\U0001f249', '\U0001f24f'), ('\U0001f252', '\U0001f2ff'),
('\U0001f32d', '\U0001f32f'), ('\U0001f37e', '\U0001f37f'), ('\U0001f3cf', '\U0001f3d3'),
('\U0001f3f8', '\U0001f3ff'), ('\U0001f4ff', '\U0001f4ff'), ('\U0001f54b', '\U0001f54f'),
('\U0001f57a', '\U0001f57a'), ('\U0001f5a4', '\U0001f5a4'), ('\U0001f643', '\U0001f644'),
('\U0001f6d0', '\U0001f6df'), ('\U0001f6ed', '\U0001f6ef'), ('\U0001f6f4', '\U0001f6ff'),
('\U0001f774', '\U0001f77f'), ('\U0001f7d5', '\U0001f7ff'), ('\U0001f80c', '\U0001f80f'),
('\U0001f848', '\U0001f84f'), ('\U0001f85a', '\U0001f85f'), ('\U0001f888', '\U0001f88f'),
('\U0001f8ae', '\U0001ffff'), ('\U00020001', '\U0002a6d5'), ('\U0002a6d7', '\U0002a6ff'),
('\U0002a701', '\U0002b733'), ('\U0002b735', '\U0002b73f'), ('\U0002b741', '\U0002b81c'),
('\U0002b81e', '\U0002f7ff'), ('\U0002fa1e', '\U000e0000'), ('\U000e0002', '\U000e001f'),
('\U000e0080', '\U000e00ff'), ('\U000e01f0', '\U000effff'), ('\U000f0001', '\U000ffffc'),
('\U000ffffe', '\U000fffff'), ('\U00100001', '\U0010fffc'), ('\U0010fffe', '\U0010ffff')
];
pub static Co_table: &'static [(char, char)] = &[
('\ue000', '\ue000'), ('\uf8ff', '\uf8ff'), ('\U000f0000', '\U000f0000'), ('\U000ffffd',
'\U000ffffd'), ('\U00100000', '\U00100000'), ('\U0010fffd', '\U0010fffd')
@ -1540,6 +1880,17 @@ pub mod derived_property {
super::bsearch_range_table(c, Alphabetic_table)
}
pub static Default_Ignorable_Code_Point_table: &'static [(char, char)] = &[
('\xad', '\xad'), ('\u034f', '\u034f'), ('\u061c', '\u061c'), ('\u115f', '\u1160'),
('\u17b4', '\u17b5'), ('\u180b', '\u180d'), ('\u180e', '\u180e'), ('\u200b', '\u200f'),
('\u202a', '\u202e'), ('\u2060', '\u2064'), ('\u2065', '\u2065'), ('\u2066', '\u206f'),
('\u3164', '\u3164'), ('\ufe00', '\ufe0f'), ('\ufeff', '\ufeff'), ('\uffa0', '\uffa0'),
('\ufff0', '\ufff8'), ('\U0001bca0', '\U0001bca3'), ('\U0001d173', '\U0001d17a'),
('\U000e0000', '\U000e0000'), ('\U000e0001', '\U000e0001'), ('\U000e0002', '\U000e001f'),
('\U000e0020', '\U000e007f'), ('\U000e0080', '\U000e00ff'), ('\U000e0100', '\U000e01ef'),
('\U000e01f0', '\U000e0fff')
];
pub static Lowercase_table: &'static [(char, char)] = &[
('\x61', '\x7a'), ('\xaa', '\xaa'), ('\xb5', '\xb5'), ('\xba', '\xba'), ('\xdf', '\xf6'),
('\xf8', '\xff'), ('\u0101', '\u0101'), ('\u0103', '\u0103'), ('\u0105', '\u0105'),
@ -3264,6 +3615,15 @@ pub mod property {
('\u200c', '\u200d')
];
pub static Noncharacter_Code_Point_table: &'static [(char, char)] = &[
('\ufdd0', '\ufdef'), ('\ufffe', '\uffff'), ('\U0001fffe', '\U0001ffff'), ('\U0002fffe',
'\U0002ffff'), ('\U0003fffe', '\U0003ffff'), ('\U0004fffe', '\U0004ffff'), ('\U0005fffe',
'\U0005ffff'), ('\U0006fffe', '\U0006ffff'), ('\U0007fffe', '\U0007ffff'), ('\U0008fffe',
'\U0008ffff'), ('\U0009fffe', '\U0009ffff'), ('\U000afffe', '\U000affff'), ('\U000bfffe',
'\U000bffff'), ('\U000cfffe', '\U000cffff'), ('\U000dfffe', '\U000dffff'), ('\U000efffe',
'\U000effff'), ('\U000ffffe', '\U000fffff')
];
pub static White_Space_table: &'static [(char, char)] = &[
('\x09', '\x0d'), ('\x20', '\x20'), ('\x85', '\x85'), ('\xa0', '\xa0'), ('\u1680',
'\u1680'), ('\u2000', '\u200a'), ('\u2028', '\u2028'), ('\u2029', '\u2029'), ('\u202f',
@ -3290,10 +3650,12 @@ pub mod regex {
super::script::Carian_table), ("Caucasian_Albanian",
super::script::Caucasian_Albanian_table), ("Cc", super::general_category::Cc_table), ("Cf",
super::general_category::Cf_table), ("Chakma", super::script::Chakma_table), ("Cham",
super::script::Cham_table), ("Cherokee", super::script::Cherokee_table), ("Co",
super::general_category::Co_table), ("Common", super::script::Common_table), ("Coptic",
super::script::Coptic_table), ("Cuneiform", super::script::Cuneiform_table), ("Cypriot",
super::script::Cypriot_table), ("Cyrillic", super::script::Cyrillic_table), ("Deseret",
super::script::Cham_table), ("Cherokee", super::script::Cherokee_table), ("Cn",
super::general_category::Cn_table), ("Co", super::general_category::Co_table), ("Common",
super::script::Common_table), ("Coptic", super::script::Coptic_table), ("Cuneiform",
super::script::Cuneiform_table), ("Cypriot", super::script::Cypriot_table), ("Cyrillic",
super::script::Cyrillic_table), ("Default_Ignorable_Code_Point",
super::derived_property::Default_Ignorable_Code_Point_table), ("Deseret",
super::script::Deseret_table), ("Devanagari", super::script::Devanagari_table), ("Duployan",
super::script::Duployan_table), ("Egyptian_Hieroglyphs",
super::script::Egyptian_Hieroglyphs_table), ("Elbasan", super::script::Elbasan_table),
@ -3333,7 +3695,8 @@ pub mod regex {
super::script::Myanmar_table), ("N", super::general_category::N_table), ("Nabataean",
super::script::Nabataean_table), ("Nd", super::general_category::Nd_table), ("New_Tai_Lue",
super::script::New_Tai_Lue_table), ("Nko", super::script::Nko_table), ("Nl",
super::general_category::Nl_table), ("No", super::general_category::No_table), ("Ogham",
super::general_category::Nl_table), ("No", super::general_category::No_table),
("Noncharacter_Code_Point", super::property::Noncharacter_Code_Point_table), ("Ogham",
super::script::Ogham_table), ("Ol_Chiki", super::script::Ol_Chiki_table), ("Old_Italic",
super::script::Old_Italic_table), ("Old_North_Arabian",
super::script::Old_North_Arabian_table), ("Old_Permic", super::script::Old_Permic_table),
@ -6443,3 +6806,501 @@ pub mod charwidth {
];
}
pub mod grapheme {
use core::option::{Some, None};
use core::slice::ImmutableVector;
#[allow(non_camel_case_types)]
#[deriving(Clone)]
pub enum GraphemeCat {
GC_LV,
GC_LVT,
GC_T,
GC_Extend,
GC_V,
GC_Control,
GC_SpacingMark,
GC_L,
GC_RegionalIndicator,
GC_Any,
}
fn bsearch_range_value_table(c: char, r: &'static [(char, char, GraphemeCat)]) -> GraphemeCat {
use core::cmp::{Equal, Less, Greater};
match r.bsearch(|&(lo, hi, _)| {
if lo <= c && c <= hi { Equal }
else if hi < c { Less }
else { Greater }
}) {
Some(idx) => {
let (_, _, cat) = r[idx];
cat
}
None => GC_Any
}
}
pub fn grapheme_category(c: char) -> GraphemeCat {
bsearch_range_value_table(c, grapheme_cat_table)
}
static grapheme_cat_table: &'static [(char, char, GraphemeCat)] = &[
('\x00', '\x1f', GC_Control), ('\x7f', '\x9f', GC_Control), ('\xad', '\xad', GC_Control),
('\u0300', '\u036f', GC_Extend), ('\u0483', '\u0487', GC_Extend), ('\u0488', '\u0489',
GC_Extend), ('\u0591', '\u05bd', GC_Extend), ('\u05bf', '\u05bf', GC_Extend), ('\u05c1',
'\u05c2', GC_Extend), ('\u05c4', '\u05c5', GC_Extend), ('\u05c7', '\u05c7', GC_Extend),
('\u0600', '\u0605', GC_Control), ('\u0610', '\u061a', GC_Extend), ('\u061c', '\u061c',
GC_Control), ('\u064b', '\u065f', GC_Extend), ('\u0670', '\u0670', GC_Extend), ('\u06d6',
'\u06dc', GC_Extend), ('\u06dd', '\u06dd', GC_Control), ('\u06df', '\u06e4', GC_Extend),
('\u06e7', '\u06e8', GC_Extend), ('\u06ea', '\u06ed', GC_Extend), ('\u070f', '\u070f',
GC_Control), ('\u0711', '\u0711', GC_Extend), ('\u0730', '\u074a', GC_Extend), ('\u07a6',
'\u07b0', GC_Extend), ('\u07eb', '\u07f3', GC_Extend), ('\u0816', '\u0819', GC_Extend),
('\u081b', '\u0823', GC_Extend), ('\u0825', '\u0827', GC_Extend), ('\u0829', '\u082d',
GC_Extend), ('\u0859', '\u085b', GC_Extend), ('\u08e4', '\u0902', GC_Extend), ('\u0903',
'\u0903', GC_SpacingMark), ('\u093a', '\u093a', GC_Extend), ('\u093b', '\u093b',
GC_SpacingMark), ('\u093c', '\u093c', GC_Extend), ('\u093e', '\u0940', GC_SpacingMark),
('\u0941', '\u0948', GC_Extend), ('\u0949', '\u094c', GC_SpacingMark), ('\u094d', '\u094d',
GC_Extend), ('\u094e', '\u094f', GC_SpacingMark), ('\u0951', '\u0957', GC_Extend),
('\u0962', '\u0963', GC_Extend), ('\u0981', '\u0981', GC_Extend), ('\u0982', '\u0983',
GC_SpacingMark), ('\u09bc', '\u09bc', GC_Extend), ('\u09be', '\u09be', GC_Extend),
('\u09bf', '\u09c0', GC_SpacingMark), ('\u09c1', '\u09c4', GC_Extend), ('\u09c7', '\u09c8',
GC_SpacingMark), ('\u09cb', '\u09cc', GC_SpacingMark), ('\u09cd', '\u09cd', GC_Extend),
('\u09d7', '\u09d7', GC_Extend), ('\u09e2', '\u09e3', GC_Extend), ('\u0a01', '\u0a02',
GC_Extend), ('\u0a03', '\u0a03', GC_SpacingMark), ('\u0a3c', '\u0a3c', GC_Extend),
('\u0a3e', '\u0a40', GC_SpacingMark), ('\u0a41', '\u0a42', GC_Extend), ('\u0a47', '\u0a48',
GC_Extend), ('\u0a4b', '\u0a4d', GC_Extend), ('\u0a51', '\u0a51', GC_Extend), ('\u0a70',
'\u0a71', GC_Extend), ('\u0a75', '\u0a75', GC_Extend), ('\u0a81', '\u0a82', GC_Extend),
('\u0a83', '\u0a83', GC_SpacingMark), ('\u0abc', '\u0abc', GC_Extend), ('\u0abe', '\u0ac0',
GC_SpacingMark), ('\u0ac1', '\u0ac5', GC_Extend), ('\u0ac7', '\u0ac8', GC_Extend),
('\u0ac9', '\u0ac9', GC_SpacingMark), ('\u0acb', '\u0acc', GC_SpacingMark), ('\u0acd',
'\u0acd', GC_Extend), ('\u0ae2', '\u0ae3', GC_Extend), ('\u0b01', '\u0b01', GC_Extend),
('\u0b02', '\u0b03', GC_SpacingMark), ('\u0b3c', '\u0b3c', GC_Extend), ('\u0b3e', '\u0b3e',
GC_Extend), ('\u0b3f', '\u0b3f', GC_Extend), ('\u0b40', '\u0b40', GC_SpacingMark),
('\u0b41', '\u0b44', GC_Extend), ('\u0b47', '\u0b48', GC_SpacingMark), ('\u0b4b', '\u0b4c',
GC_SpacingMark), ('\u0b4d', '\u0b4d', GC_Extend), ('\u0b56', '\u0b56', GC_Extend),
('\u0b57', '\u0b57', GC_Extend), ('\u0b62', '\u0b63', GC_Extend), ('\u0b82', '\u0b82',
GC_Extend), ('\u0bbe', '\u0bbe', GC_Extend), ('\u0bbf', '\u0bbf', GC_SpacingMark),
('\u0bc0', '\u0bc0', GC_Extend), ('\u0bc1', '\u0bc2', GC_SpacingMark), ('\u0bc6', '\u0bc8',
GC_SpacingMark), ('\u0bca', '\u0bcc', GC_SpacingMark), ('\u0bcd', '\u0bcd', GC_Extend),
('\u0bd7', '\u0bd7', GC_Extend), ('\u0c00', '\u0c00', GC_Extend), ('\u0c01', '\u0c03',
GC_SpacingMark), ('\u0c3e', '\u0c40', GC_Extend), ('\u0c41', '\u0c44', GC_SpacingMark),
('\u0c46', '\u0c48', GC_Extend), ('\u0c4a', '\u0c4d', GC_Extend), ('\u0c55', '\u0c56',
GC_Extend), ('\u0c62', '\u0c63', GC_Extend), ('\u0c81', '\u0c81', GC_Extend), ('\u0c82',
'\u0c83', GC_SpacingMark), ('\u0cbc', '\u0cbc', GC_Extend), ('\u0cbe', '\u0cbe',
GC_SpacingMark), ('\u0cbf', '\u0cbf', GC_Extend), ('\u0cc0', '\u0cc1', GC_SpacingMark),
('\u0cc2', '\u0cc2', GC_Extend), ('\u0cc3', '\u0cc4', GC_SpacingMark), ('\u0cc6', '\u0cc6',
GC_Extend), ('\u0cc7', '\u0cc8', GC_SpacingMark), ('\u0cca', '\u0ccb', GC_SpacingMark),
('\u0ccc', '\u0ccd', GC_Extend), ('\u0cd5', '\u0cd6', GC_Extend), ('\u0ce2', '\u0ce3',
GC_Extend), ('\u0d01', '\u0d01', GC_Extend), ('\u0d02', '\u0d03', GC_SpacingMark),
('\u0d3e', '\u0d3e', GC_Extend), ('\u0d3f', '\u0d40', GC_SpacingMark), ('\u0d41', '\u0d44',
GC_Extend), ('\u0d46', '\u0d48', GC_SpacingMark), ('\u0d4a', '\u0d4c', GC_SpacingMark),
('\u0d4d', '\u0d4d', GC_Extend), ('\u0d57', '\u0d57', GC_Extend), ('\u0d62', '\u0d63',
GC_Extend), ('\u0d82', '\u0d83', GC_SpacingMark), ('\u0dca', '\u0dca', GC_Extend),
('\u0dcf', '\u0dcf', GC_Extend), ('\u0dd0', '\u0dd1', GC_SpacingMark), ('\u0dd2', '\u0dd4',
GC_Extend), ('\u0dd6', '\u0dd6', GC_Extend), ('\u0dd8', '\u0dde', GC_SpacingMark),
('\u0ddf', '\u0ddf', GC_Extend), ('\u0df2', '\u0df3', GC_SpacingMark), ('\u0e31', '\u0e31',
GC_Extend), ('\u0e33', '\u0e33', GC_SpacingMark), ('\u0e34', '\u0e3a', GC_Extend),
('\u0e47', '\u0e4e', GC_Extend), ('\u0eb1', '\u0eb1', GC_Extend), ('\u0eb3', '\u0eb3',
GC_SpacingMark), ('\u0eb4', '\u0eb9', GC_Extend), ('\u0ebb', '\u0ebc', GC_Extend),
('\u0ec8', '\u0ecd', GC_Extend), ('\u0f18', '\u0f19', GC_Extend), ('\u0f35', '\u0f35',
GC_Extend), ('\u0f37', '\u0f37', GC_Extend), ('\u0f39', '\u0f39', GC_Extend), ('\u0f3e',
'\u0f3f', GC_SpacingMark), ('\u0f71', '\u0f7e', GC_Extend), ('\u0f7f', '\u0f7f',
GC_SpacingMark), ('\u0f80', '\u0f84', GC_Extend), ('\u0f86', '\u0f87', GC_Extend),
('\u0f8d', '\u0f97', GC_Extend), ('\u0f99', '\u0fbc', GC_Extend), ('\u0fc6', '\u0fc6',
GC_Extend), ('\u102b', '\u102c', GC_SpacingMark), ('\u102d', '\u1030', GC_Extend),
('\u1031', '\u1031', GC_SpacingMark), ('\u1032', '\u1037', GC_Extend), ('\u1038', '\u1038',
GC_SpacingMark), ('\u1039', '\u103a', GC_Extend), ('\u103b', '\u103c', GC_SpacingMark),
('\u103d', '\u103e', GC_Extend), ('\u1056', '\u1057', GC_SpacingMark), ('\u1058', '\u1059',
GC_Extend), ('\u105e', '\u1060', GC_Extend), ('\u1062', '\u1064', GC_SpacingMark),
('\u1067', '\u106d', GC_SpacingMark), ('\u1071', '\u1074', GC_Extend), ('\u1082', '\u1082',
GC_Extend), ('\u1083', '\u1084', GC_SpacingMark), ('\u1085', '\u1086', GC_Extend),
('\u1087', '\u108c', GC_SpacingMark), ('\u108d', '\u108d', GC_Extend), ('\u108f', '\u108f',
GC_SpacingMark), ('\u109a', '\u109c', GC_SpacingMark), ('\u109d', '\u109d', GC_Extend),
('\u1100', '\u115f', GC_L), ('\u1160', '\u11a7', GC_V), ('\u11a8', '\u11ff', GC_T),
('\u135d', '\u135f', GC_Extend), ('\u1712', '\u1714', GC_Extend), ('\u1732', '\u1734',
GC_Extend), ('\u1752', '\u1753', GC_Extend), ('\u1772', '\u1773', GC_Extend), ('\u17b4',
'\u17b5', GC_Extend), ('\u17b6', '\u17b6', GC_SpacingMark), ('\u17b7', '\u17bd', GC_Extend),
('\u17be', '\u17c5', GC_SpacingMark), ('\u17c6', '\u17c6', GC_Extend), ('\u17c7', '\u17c8',
GC_SpacingMark), ('\u17c9', '\u17d3', GC_Extend), ('\u17dd', '\u17dd', GC_Extend),
('\u180b', '\u180d', GC_Extend), ('\u180e', '\u180e', GC_Control), ('\u18a9', '\u18a9',
GC_Extend), ('\u1920', '\u1922', GC_Extend), ('\u1923', '\u1926', GC_SpacingMark),
('\u1927', '\u1928', GC_Extend), ('\u1929', '\u192b', GC_SpacingMark), ('\u1930', '\u1931',
GC_SpacingMark), ('\u1932', '\u1932', GC_Extend), ('\u1933', '\u1938', GC_SpacingMark),
('\u1939', '\u193b', GC_Extend), ('\u19b0', '\u19c0', GC_SpacingMark), ('\u19c8', '\u19c9',
GC_SpacingMark), ('\u1a17', '\u1a18', GC_Extend), ('\u1a19', '\u1a1a', GC_SpacingMark),
('\u1a1b', '\u1a1b', GC_Extend), ('\u1a55', '\u1a55', GC_SpacingMark), ('\u1a56', '\u1a56',
GC_Extend), ('\u1a57', '\u1a57', GC_SpacingMark), ('\u1a58', '\u1a5e', GC_Extend),
('\u1a60', '\u1a60', GC_Extend), ('\u1a61', '\u1a61', GC_SpacingMark), ('\u1a62', '\u1a62',
GC_Extend), ('\u1a63', '\u1a64', GC_SpacingMark), ('\u1a65', '\u1a6c', GC_Extend),
('\u1a6d', '\u1a72', GC_SpacingMark), ('\u1a73', '\u1a7c', GC_Extend), ('\u1a7f', '\u1a7f',
GC_Extend), ('\u1ab0', '\u1abd', GC_Extend), ('\u1abe', '\u1abe', GC_Extend), ('\u1b00',
'\u1b03', GC_Extend), ('\u1b04', '\u1b04', GC_SpacingMark), ('\u1b34', '\u1b34', GC_Extend),
('\u1b35', '\u1b35', GC_SpacingMark), ('\u1b36', '\u1b3a', GC_Extend), ('\u1b3b', '\u1b3b',
GC_SpacingMark), ('\u1b3c', '\u1b3c', GC_Extend), ('\u1b3d', '\u1b41', GC_SpacingMark),
('\u1b42', '\u1b42', GC_Extend), ('\u1b43', '\u1b44', GC_SpacingMark), ('\u1b6b', '\u1b73',
GC_Extend), ('\u1b80', '\u1b81', GC_Extend), ('\u1b82', '\u1b82', GC_SpacingMark),
('\u1ba1', '\u1ba1', GC_SpacingMark), ('\u1ba2', '\u1ba5', GC_Extend), ('\u1ba6', '\u1ba7',
GC_SpacingMark), ('\u1ba8', '\u1ba9', GC_Extend), ('\u1baa', '\u1baa', GC_SpacingMark),
('\u1bab', '\u1bad', GC_Extend), ('\u1be6', '\u1be6', GC_Extend), ('\u1be7', '\u1be7',
GC_SpacingMark), ('\u1be8', '\u1be9', GC_Extend), ('\u1bea', '\u1bec', GC_SpacingMark),
('\u1bed', '\u1bed', GC_Extend), ('\u1bee', '\u1bee', GC_SpacingMark), ('\u1bef', '\u1bf1',
GC_Extend), ('\u1bf2', '\u1bf3', GC_SpacingMark), ('\u1c24', '\u1c2b', GC_SpacingMark),
('\u1c2c', '\u1c33', GC_Extend), ('\u1c34', '\u1c35', GC_SpacingMark), ('\u1c36', '\u1c37',
GC_Extend), ('\u1cd0', '\u1cd2', GC_Extend), ('\u1cd4', '\u1ce0', GC_Extend), ('\u1ce1',
'\u1ce1', GC_SpacingMark), ('\u1ce2', '\u1ce8', GC_Extend), ('\u1ced', '\u1ced', GC_Extend),
('\u1cf2', '\u1cf3', GC_SpacingMark), ('\u1cf4', '\u1cf4', GC_Extend), ('\u1cf8', '\u1cf9',
GC_Extend), ('\u1dc0', '\u1df5', GC_Extend), ('\u1dfc', '\u1dff', GC_Extend), ('\u200b',
'\u200b', GC_Control), ('\u200c', '\u200d', GC_Extend), ('\u200e', '\u200f', GC_Control),
('\u2028', '\u202e', GC_Control), ('\u2060', '\u206f', GC_Control), ('\u20d0', '\u20dc',
GC_Extend), ('\u20dd', '\u20e0', GC_Extend), ('\u20e1', '\u20e1', GC_Extend), ('\u20e2',
'\u20e4', GC_Extend), ('\u20e5', '\u20f0', GC_Extend), ('\u2cef', '\u2cf1', GC_Extend),
('\u2d7f', '\u2d7f', GC_Extend), ('\u2de0', '\u2dff', GC_Extend), ('\u302a', '\u302d',
GC_Extend), ('\u302e', '\u302f', GC_Extend), ('\u3099', '\u309a', GC_Extend), ('\ua66f',
'\ua66f', GC_Extend), ('\ua670', '\ua672', GC_Extend), ('\ua674', '\ua67d', GC_Extend),
('\ua69f', '\ua69f', GC_Extend), ('\ua6f0', '\ua6f1', GC_Extend), ('\ua802', '\ua802',
GC_Extend), ('\ua806', '\ua806', GC_Extend), ('\ua80b', '\ua80b', GC_Extend), ('\ua823',
'\ua824', GC_SpacingMark), ('\ua825', '\ua826', GC_Extend), ('\ua827', '\ua827',
GC_SpacingMark), ('\ua880', '\ua881', GC_SpacingMark), ('\ua8b4', '\ua8c3', GC_SpacingMark),
('\ua8c4', '\ua8c4', GC_Extend), ('\ua8e0', '\ua8f1', GC_Extend), ('\ua926', '\ua92d',
GC_Extend), ('\ua947', '\ua951', GC_Extend), ('\ua952', '\ua953', GC_SpacingMark),
('\ua960', '\ua97c', GC_L), ('\ua980', '\ua982', GC_Extend), ('\ua983', '\ua983',
GC_SpacingMark), ('\ua9b3', '\ua9b3', GC_Extend), ('\ua9b4', '\ua9b5', GC_SpacingMark),
('\ua9b6', '\ua9b9', GC_Extend), ('\ua9ba', '\ua9bb', GC_SpacingMark), ('\ua9bc', '\ua9bc',
GC_Extend), ('\ua9bd', '\ua9c0', GC_SpacingMark), ('\ua9e5', '\ua9e5', GC_Extend),
('\uaa29', '\uaa2e', GC_Extend), ('\uaa2f', '\uaa30', GC_SpacingMark), ('\uaa31', '\uaa32',
GC_Extend), ('\uaa33', '\uaa34', GC_SpacingMark), ('\uaa35', '\uaa36', GC_Extend),
('\uaa43', '\uaa43', GC_Extend), ('\uaa4c', '\uaa4c', GC_Extend), ('\uaa4d', '\uaa4d',
GC_SpacingMark), ('\uaa7b', '\uaa7b', GC_SpacingMark), ('\uaa7c', '\uaa7c', GC_Extend),
('\uaa7d', '\uaa7d', GC_SpacingMark), ('\uaab0', '\uaab0', GC_Extend), ('\uaab2', '\uaab4',
GC_Extend), ('\uaab7', '\uaab8', GC_Extend), ('\uaabe', '\uaabf', GC_Extend), ('\uaac1',
'\uaac1', GC_Extend), ('\uaaeb', '\uaaeb', GC_SpacingMark), ('\uaaec', '\uaaed', GC_Extend),
('\uaaee', '\uaaef', GC_SpacingMark), ('\uaaf5', '\uaaf5', GC_SpacingMark), ('\uaaf6',
'\uaaf6', GC_Extend), ('\uabe3', '\uabe4', GC_SpacingMark), ('\uabe5', '\uabe5', GC_Extend),
('\uabe6', '\uabe7', GC_SpacingMark), ('\uabe8', '\uabe8', GC_Extend), ('\uabe9', '\uabea',
GC_SpacingMark), ('\uabec', '\uabec', GC_SpacingMark), ('\uabed', '\uabed', GC_Extend),
('\uac00', '\uac00', GC_LV), ('\uac01', '\uac1b', GC_LVT), ('\uac1c', '\uac1c', GC_LV),
('\uac1d', '\uac37', GC_LVT), ('\uac38', '\uac38', GC_LV), ('\uac39', '\uac53', GC_LVT),
('\uac54', '\uac54', GC_LV), ('\uac55', '\uac6f', GC_LVT), ('\uac70', '\uac70', GC_LV),
('\uac71', '\uac8b', GC_LVT), ('\uac8c', '\uac8c', GC_LV), ('\uac8d', '\uaca7', GC_LVT),
('\uaca8', '\uaca8', GC_LV), ('\uaca9', '\uacc3', GC_LVT), ('\uacc4', '\uacc4', GC_LV),
('\uacc5', '\uacdf', GC_LVT), ('\uace0', '\uace0', GC_LV), ('\uace1', '\uacfb', GC_LVT),
('\uacfc', '\uacfc', GC_LV), ('\uacfd', '\uad17', GC_LVT), ('\uad18', '\uad18', GC_LV),
('\uad19', '\uad33', GC_LVT), ('\uad34', '\uad34', GC_LV), ('\uad35', '\uad4f', GC_LVT),
('\uad50', '\uad50', GC_LV), ('\uad51', '\uad6b', GC_LVT), ('\uad6c', '\uad6c', GC_LV),
('\uad6d', '\uad87', GC_LVT), ('\uad88', '\uad88', GC_LV), ('\uad89', '\uada3', GC_LVT),
('\uada4', '\uada4', GC_LV), ('\uada5', '\uadbf', GC_LVT), ('\uadc0', '\uadc0', GC_LV),
('\uadc1', '\uaddb', GC_LVT), ('\uaddc', '\uaddc', GC_LV), ('\uaddd', '\uadf7', GC_LVT),
('\uadf8', '\uadf8', GC_LV), ('\uadf9', '\uae13', GC_LVT), ('\uae14', '\uae14', GC_LV),
('\uae15', '\uae2f', GC_LVT), ('\uae30', '\uae30', GC_LV), ('\uae31', '\uae4b', GC_LVT),
('\uae4c', '\uae4c', GC_LV), ('\uae4d', '\uae67', GC_LVT), ('\uae68', '\uae68', GC_LV),
('\uae69', '\uae83', GC_LVT), ('\uae84', '\uae84', GC_LV), ('\uae85', '\uae9f', GC_LVT),
('\uaea0', '\uaea0', GC_LV), ('\uaea1', '\uaebb', GC_LVT), ('\uaebc', '\uaebc', GC_LV),
('\uaebd', '\uaed7', GC_LVT), ('\uaed8', '\uaed8', GC_LV), ('\uaed9', '\uaef3', GC_LVT),
('\uaef4', '\uaef4', GC_LV), ('\uaef5', '\uaf0f', GC_LVT), ('\uaf10', '\uaf10', GC_LV),
('\uaf11', '\uaf2b', GC_LVT), ('\uaf2c', '\uaf2c', GC_LV), ('\uaf2d', '\uaf47', GC_LVT),
('\uaf48', '\uaf48', GC_LV), ('\uaf49', '\uaf63', GC_LVT), ('\uaf64', '\uaf64', GC_LV),
('\uaf65', '\uaf7f', GC_LVT), ('\uaf80', '\uaf80', GC_LV), ('\uaf81', '\uaf9b', GC_LVT),
('\uaf9c', '\uaf9c', GC_LV), ('\uaf9d', '\uafb7', GC_LVT), ('\uafb8', '\uafb8', GC_LV),
('\uafb9', '\uafd3', GC_LVT), ('\uafd4', '\uafd4', GC_LV), ('\uafd5', '\uafef', GC_LVT),
('\uaff0', '\uaff0', GC_LV), ('\uaff1', '\ub00b', GC_LVT), ('\ub00c', '\ub00c', GC_LV),
('\ub00d', '\ub027', GC_LVT), ('\ub028', '\ub028', GC_LV), ('\ub029', '\ub043', GC_LVT),
('\ub044', '\ub044', GC_LV), ('\ub045', '\ub05f', GC_LVT), ('\ub060', '\ub060', GC_LV),
('\ub061', '\ub07b', GC_LVT), ('\ub07c', '\ub07c', GC_LV), ('\ub07d', '\ub097', GC_LVT),
('\ub098', '\ub098', GC_LV), ('\ub099', '\ub0b3', GC_LVT), ('\ub0b4', '\ub0b4', GC_LV),
('\ub0b5', '\ub0cf', GC_LVT), ('\ub0d0', '\ub0d0', GC_LV), ('\ub0d1', '\ub0eb', GC_LVT),
('\ub0ec', '\ub0ec', GC_LV), ('\ub0ed', '\ub107', GC_LVT), ('\ub108', '\ub108', GC_LV),
('\ub109', '\ub123', GC_LVT), ('\ub124', '\ub124', GC_LV), ('\ub125', '\ub13f', GC_LVT),
('\ub140', '\ub140', GC_LV), ('\ub141', '\ub15b', GC_LVT), ('\ub15c', '\ub15c', GC_LV),
('\ub15d', '\ub177', GC_LVT), ('\ub178', '\ub178', GC_LV), ('\ub179', '\ub193', GC_LVT),
('\ub194', '\ub194', GC_LV), ('\ub195', '\ub1af', GC_LVT), ('\ub1b0', '\ub1b0', GC_LV),
('\ub1b1', '\ub1cb', GC_LVT), ('\ub1cc', '\ub1cc', GC_LV), ('\ub1cd', '\ub1e7', GC_LVT),
('\ub1e8', '\ub1e8', GC_LV), ('\ub1e9', '\ub203', GC_LVT), ('\ub204', '\ub204', GC_LV),
('\ub205', '\ub21f', GC_LVT), ('\ub220', '\ub220', GC_LV), ('\ub221', '\ub23b', GC_LVT),
('\ub23c', '\ub23c', GC_LV), ('\ub23d', '\ub257', GC_LVT), ('\ub258', '\ub258', GC_LV),
('\ub259', '\ub273', GC_LVT), ('\ub274', '\ub274', GC_LV), ('\ub275', '\ub28f', GC_LVT),
('\ub290', '\ub290', GC_LV), ('\ub291', '\ub2ab', GC_LVT), ('\ub2ac', '\ub2ac', GC_LV),
('\ub2ad', '\ub2c7', GC_LVT), ('\ub2c8', '\ub2c8', GC_LV), ('\ub2c9', '\ub2e3', GC_LVT),
('\ub2e4', '\ub2e4', GC_LV), ('\ub2e5', '\ub2ff', GC_LVT), ('\ub300', '\ub300', GC_LV),
('\ub301', '\ub31b', GC_LVT), ('\ub31c', '\ub31c', GC_LV), ('\ub31d', '\ub337', GC_LVT),
('\ub338', '\ub338', GC_LV), ('\ub339', '\ub353', GC_LVT), ('\ub354', '\ub354', GC_LV),
('\ub355', '\ub36f', GC_LVT), ('\ub370', '\ub370', GC_LV), ('\ub371', '\ub38b', GC_LVT),
('\ub38c', '\ub38c', GC_LV), ('\ub38d', '\ub3a7', GC_LVT), ('\ub3a8', '\ub3a8', GC_LV),
('\ub3a9', '\ub3c3', GC_LVT), ('\ub3c4', '\ub3c4', GC_LV), ('\ub3c5', '\ub3df', GC_LVT),
('\ub3e0', '\ub3e0', GC_LV), ('\ub3e1', '\ub3fb', GC_LVT), ('\ub3fc', '\ub3fc', GC_LV),
('\ub3fd', '\ub417', GC_LVT), ('\ub418', '\ub418', GC_LV), ('\ub419', '\ub433', GC_LVT),
('\ub434', '\ub434', GC_LV), ('\ub435', '\ub44f', GC_LVT), ('\ub450', '\ub450', GC_LV),
('\ub451', '\ub46b', GC_LVT), ('\ub46c', '\ub46c', GC_LV), ('\ub46d', '\ub487', GC_LVT),
('\ub488', '\ub488', GC_LV), ('\ub489', '\ub4a3', GC_LVT), ('\ub4a4', '\ub4a4', GC_LV),
('\ub4a5', '\ub4bf', GC_LVT), ('\ub4c0', '\ub4c0', GC_LV), ('\ub4c1', '\ub4db', GC_LVT),
('\ub4dc', '\ub4dc', GC_LV), ('\ub4dd', '\ub4f7', GC_LVT), ('\ub4f8', '\ub4f8', GC_LV),
('\ub4f9', '\ub513', GC_LVT), ('\ub514', '\ub514', GC_LV), ('\ub515', '\ub52f', GC_LVT),
('\ub530', '\ub530', GC_LV), ('\ub531', '\ub54b', GC_LVT), ('\ub54c', '\ub54c', GC_LV),
('\ub54d', '\ub567', GC_LVT), ('\ub568', '\ub568', GC_LV), ('\ub569', '\ub583', GC_LVT),
('\ub584', '\ub584', GC_LV), ('\ub585', '\ub59f', GC_LVT), ('\ub5a0', '\ub5a0', GC_LV),
('\ub5a1', '\ub5bb', GC_LVT), ('\ub5bc', '\ub5bc', GC_LV), ('\ub5bd', '\ub5d7', GC_LVT),
('\ub5d8', '\ub5d8', GC_LV), ('\ub5d9', '\ub5f3', GC_LVT), ('\ub5f4', '\ub5f4', GC_LV),
('\ub5f5', '\ub60f', GC_LVT), ('\ub610', '\ub610', GC_LV), ('\ub611', '\ub62b', GC_LVT),
('\ub62c', '\ub62c', GC_LV), ('\ub62d', '\ub647', GC_LVT), ('\ub648', '\ub648', GC_LV),
('\ub649', '\ub663', GC_LVT), ('\ub664', '\ub664', GC_LV), ('\ub665', '\ub67f', GC_LVT),
('\ub680', '\ub680', GC_LV), ('\ub681', '\ub69b', GC_LVT), ('\ub69c', '\ub69c', GC_LV),
('\ub69d', '\ub6b7', GC_LVT), ('\ub6b8', '\ub6b8', GC_LV), ('\ub6b9', '\ub6d3', GC_LVT),
('\ub6d4', '\ub6d4', GC_LV), ('\ub6d5', '\ub6ef', GC_LVT), ('\ub6f0', '\ub6f0', GC_LV),
('\ub6f1', '\ub70b', GC_LVT), ('\ub70c', '\ub70c', GC_LV), ('\ub70d', '\ub727', GC_LVT),
('\ub728', '\ub728', GC_LV), ('\ub729', '\ub743', GC_LVT), ('\ub744', '\ub744', GC_LV),
('\ub745', '\ub75f', GC_LVT), ('\ub760', '\ub760', GC_LV), ('\ub761', '\ub77b', GC_LVT),
('\ub77c', '\ub77c', GC_LV), ('\ub77d', '\ub797', GC_LVT), ('\ub798', '\ub798', GC_LV),
('\ub799', '\ub7b3', GC_LVT), ('\ub7b4', '\ub7b4', GC_LV), ('\ub7b5', '\ub7cf', GC_LVT),
('\ub7d0', '\ub7d0', GC_LV), ('\ub7d1', '\ub7eb', GC_LVT), ('\ub7ec', '\ub7ec', GC_LV),
('\ub7ed', '\ub807', GC_LVT), ('\ub808', '\ub808', GC_LV), ('\ub809', '\ub823', GC_LVT),
('\ub824', '\ub824', GC_LV), ('\ub825', '\ub83f', GC_LVT), ('\ub840', '\ub840', GC_LV),
('\ub841', '\ub85b', GC_LVT), ('\ub85c', '\ub85c', GC_LV), ('\ub85d', '\ub877', GC_LVT),
('\ub878', '\ub878', GC_LV), ('\ub879', '\ub893', GC_LVT), ('\ub894', '\ub894', GC_LV),
('\ub895', '\ub8af', GC_LVT), ('\ub8b0', '\ub8b0', GC_LV), ('\ub8b1', '\ub8cb', GC_LVT),
('\ub8cc', '\ub8cc', GC_LV), ('\ub8cd', '\ub8e7', GC_LVT), ('\ub8e8', '\ub8e8', GC_LV),
('\ub8e9', '\ub903', GC_LVT), ('\ub904', '\ub904', GC_LV), ('\ub905', '\ub91f', GC_LVT),
('\ub920', '\ub920', GC_LV), ('\ub921', '\ub93b', GC_LVT), ('\ub93c', '\ub93c', GC_LV),
('\ub93d', '\ub957', GC_LVT), ('\ub958', '\ub958', GC_LV), ('\ub959', '\ub973', GC_LVT),
('\ub974', '\ub974', GC_LV), ('\ub975', '\ub98f', GC_LVT), ('\ub990', '\ub990', GC_LV),
('\ub991', '\ub9ab', GC_LVT), ('\ub9ac', '\ub9ac', GC_LV), ('\ub9ad', '\ub9c7', GC_LVT),
('\ub9c8', '\ub9c8', GC_LV), ('\ub9c9', '\ub9e3', GC_LVT), ('\ub9e4', '\ub9e4', GC_LV),
('\ub9e5', '\ub9ff', GC_LVT), ('\uba00', '\uba00', GC_LV), ('\uba01', '\uba1b', GC_LVT),
('\uba1c', '\uba1c', GC_LV), ('\uba1d', '\uba37', GC_LVT), ('\uba38', '\uba38', GC_LV),
('\uba39', '\uba53', GC_LVT), ('\uba54', '\uba54', GC_LV), ('\uba55', '\uba6f', GC_LVT),
('\uba70', '\uba70', GC_LV), ('\uba71', '\uba8b', GC_LVT), ('\uba8c', '\uba8c', GC_LV),
('\uba8d', '\ubaa7', GC_LVT), ('\ubaa8', '\ubaa8', GC_LV), ('\ubaa9', '\ubac3', GC_LVT),
('\ubac4', '\ubac4', GC_LV), ('\ubac5', '\ubadf', GC_LVT), ('\ubae0', '\ubae0', GC_LV),
('\ubae1', '\ubafb', GC_LVT), ('\ubafc', '\ubafc', GC_LV), ('\ubafd', '\ubb17', GC_LVT),
('\ubb18', '\ubb18', GC_LV), ('\ubb19', '\ubb33', GC_LVT), ('\ubb34', '\ubb34', GC_LV),
('\ubb35', '\ubb4f', GC_LVT), ('\ubb50', '\ubb50', GC_LV), ('\ubb51', '\ubb6b', GC_LVT),
('\ubb6c', '\ubb6c', GC_LV), ('\ubb6d', '\ubb87', GC_LVT), ('\ubb88', '\ubb88', GC_LV),
('\ubb89', '\ubba3', GC_LVT), ('\ubba4', '\ubba4', GC_LV), ('\ubba5', '\ubbbf', GC_LVT),
('\ubbc0', '\ubbc0', GC_LV), ('\ubbc1', '\ubbdb', GC_LVT), ('\ubbdc', '\ubbdc', GC_LV),
('\ubbdd', '\ubbf7', GC_LVT), ('\ubbf8', '\ubbf8', GC_LV), ('\ubbf9', '\ubc13', GC_LVT),
('\ubc14', '\ubc14', GC_LV), ('\ubc15', '\ubc2f', GC_LVT), ('\ubc30', '\ubc30', GC_LV),
('\ubc31', '\ubc4b', GC_LVT), ('\ubc4c', '\ubc4c', GC_LV), ('\ubc4d', '\ubc67', GC_LVT),
('\ubc68', '\ubc68', GC_LV), ('\ubc69', '\ubc83', GC_LVT), ('\ubc84', '\ubc84', GC_LV),
('\ubc85', '\ubc9f', GC_LVT), ('\ubca0', '\ubca0', GC_LV), ('\ubca1', '\ubcbb', GC_LVT),
('\ubcbc', '\ubcbc', GC_LV), ('\ubcbd', '\ubcd7', GC_LVT), ('\ubcd8', '\ubcd8', GC_LV),
('\ubcd9', '\ubcf3', GC_LVT), ('\ubcf4', '\ubcf4', GC_LV), ('\ubcf5', '\ubd0f', GC_LVT),
('\ubd10', '\ubd10', GC_LV), ('\ubd11', '\ubd2b', GC_LVT), ('\ubd2c', '\ubd2c', GC_LV),
('\ubd2d', '\ubd47', GC_LVT), ('\ubd48', '\ubd48', GC_LV), ('\ubd49', '\ubd63', GC_LVT),
('\ubd64', '\ubd64', GC_LV), ('\ubd65', '\ubd7f', GC_LVT), ('\ubd80', '\ubd80', GC_LV),
('\ubd81', '\ubd9b', GC_LVT), ('\ubd9c', '\ubd9c', GC_LV), ('\ubd9d', '\ubdb7', GC_LVT),
('\ubdb8', '\ubdb8', GC_LV), ('\ubdb9', '\ubdd3', GC_LVT), ('\ubdd4', '\ubdd4', GC_LV),
('\ubdd5', '\ubdef', GC_LVT), ('\ubdf0', '\ubdf0', GC_LV), ('\ubdf1', '\ube0b', GC_LVT),
('\ube0c', '\ube0c', GC_LV), ('\ube0d', '\ube27', GC_LVT), ('\ube28', '\ube28', GC_LV),
('\ube29', '\ube43', GC_LVT), ('\ube44', '\ube44', GC_LV), ('\ube45', '\ube5f', GC_LVT),
('\ube60', '\ube60', GC_LV), ('\ube61', '\ube7b', GC_LVT), ('\ube7c', '\ube7c', GC_LV),
('\ube7d', '\ube97', GC_LVT), ('\ube98', '\ube98', GC_LV), ('\ube99', '\ubeb3', GC_LVT),
('\ubeb4', '\ubeb4', GC_LV), ('\ubeb5', '\ubecf', GC_LVT), ('\ubed0', '\ubed0', GC_LV),
('\ubed1', '\ubeeb', GC_LVT), ('\ubeec', '\ubeec', GC_LV), ('\ubeed', '\ubf07', GC_LVT),
('\ubf08', '\ubf08', GC_LV), ('\ubf09', '\ubf23', GC_LVT), ('\ubf24', '\ubf24', GC_LV),
('\ubf25', '\ubf3f', GC_LVT), ('\ubf40', '\ubf40', GC_LV), ('\ubf41', '\ubf5b', GC_LVT),
('\ubf5c', '\ubf5c', GC_LV), ('\ubf5d', '\ubf77', GC_LVT), ('\ubf78', '\ubf78', GC_LV),
('\ubf79', '\ubf93', GC_LVT), ('\ubf94', '\ubf94', GC_LV), ('\ubf95', '\ubfaf', GC_LVT),
('\ubfb0', '\ubfb0', GC_LV), ('\ubfb1', '\ubfcb', GC_LVT), ('\ubfcc', '\ubfcc', GC_LV),
('\ubfcd', '\ubfe7', GC_LVT), ('\ubfe8', '\ubfe8', GC_LV), ('\ubfe9', '\uc003', GC_LVT),
('\uc004', '\uc004', GC_LV), ('\uc005', '\uc01f', GC_LVT), ('\uc020', '\uc020', GC_LV),
('\uc021', '\uc03b', GC_LVT), ('\uc03c', '\uc03c', GC_LV), ('\uc03d', '\uc057', GC_LVT),
('\uc058', '\uc058', GC_LV), ('\uc059', '\uc073', GC_LVT), ('\uc074', '\uc074', GC_LV),
('\uc075', '\uc08f', GC_LVT), ('\uc090', '\uc090', GC_LV), ('\uc091', '\uc0ab', GC_LVT),
('\uc0ac', '\uc0ac', GC_LV), ('\uc0ad', '\uc0c7', GC_LVT), ('\uc0c8', '\uc0c8', GC_LV),
('\uc0c9', '\uc0e3', GC_LVT), ('\uc0e4', '\uc0e4', GC_LV), ('\uc0e5', '\uc0ff', GC_LVT),
('\uc100', '\uc100', GC_LV), ('\uc101', '\uc11b', GC_LVT), ('\uc11c', '\uc11c', GC_LV),
('\uc11d', '\uc137', GC_LVT), ('\uc138', '\uc138', GC_LV), ('\uc139', '\uc153', GC_LVT),
('\uc154', '\uc154', GC_LV), ('\uc155', '\uc16f', GC_LVT), ('\uc170', '\uc170', GC_LV),
('\uc171', '\uc18b', GC_LVT), ('\uc18c', '\uc18c', GC_LV), ('\uc18d', '\uc1a7', GC_LVT),
('\uc1a8', '\uc1a8', GC_LV), ('\uc1a9', '\uc1c3', GC_LVT), ('\uc1c4', '\uc1c4', GC_LV),
('\uc1c5', '\uc1df', GC_LVT), ('\uc1e0', '\uc1e0', GC_LV), ('\uc1e1', '\uc1fb', GC_LVT),
('\uc1fc', '\uc1fc', GC_LV), ('\uc1fd', '\uc217', GC_LVT), ('\uc218', '\uc218', GC_LV),
('\uc219', '\uc233', GC_LVT), ('\uc234', '\uc234', GC_LV), ('\uc235', '\uc24f', GC_LVT),
('\uc250', '\uc250', GC_LV), ('\uc251', '\uc26b', GC_LVT), ('\uc26c', '\uc26c', GC_LV),
('\uc26d', '\uc287', GC_LVT), ('\uc288', '\uc288', GC_LV), ('\uc289', '\uc2a3', GC_LVT),
('\uc2a4', '\uc2a4', GC_LV), ('\uc2a5', '\uc2bf', GC_LVT), ('\uc2c0', '\uc2c0', GC_LV),
('\uc2c1', '\uc2db', GC_LVT), ('\uc2dc', '\uc2dc', GC_LV), ('\uc2dd', '\uc2f7', GC_LVT),
('\uc2f8', '\uc2f8', GC_LV), ('\uc2f9', '\uc313', GC_LVT), ('\uc314', '\uc314', GC_LV),
('\uc315', '\uc32f', GC_LVT), ('\uc330', '\uc330', GC_LV), ('\uc331', '\uc34b', GC_LVT),
('\uc34c', '\uc34c', GC_LV), ('\uc34d', '\uc367', GC_LVT), ('\uc368', '\uc368', GC_LV),
('\uc369', '\uc383', GC_LVT), ('\uc384', '\uc384', GC_LV), ('\uc385', '\uc39f', GC_LVT),
('\uc3a0', '\uc3a0', GC_LV), ('\uc3a1', '\uc3bb', GC_LVT), ('\uc3bc', '\uc3bc', GC_LV),
('\uc3bd', '\uc3d7', GC_LVT), ('\uc3d8', '\uc3d8', GC_LV), ('\uc3d9', '\uc3f3', GC_LVT),
('\uc3f4', '\uc3f4', GC_LV), ('\uc3f5', '\uc40f', GC_LVT), ('\uc410', '\uc410', GC_LV),
('\uc411', '\uc42b', GC_LVT), ('\uc42c', '\uc42c', GC_LV), ('\uc42d', '\uc447', GC_LVT),
('\uc448', '\uc448', GC_LV), ('\uc449', '\uc463', GC_LVT), ('\uc464', '\uc464', GC_LV),
('\uc465', '\uc47f', GC_LVT), ('\uc480', '\uc480', GC_LV), ('\uc481', '\uc49b', GC_LVT),
('\uc49c', '\uc49c', GC_LV), ('\uc49d', '\uc4b7', GC_LVT), ('\uc4b8', '\uc4b8', GC_LV),
('\uc4b9', '\uc4d3', GC_LVT), ('\uc4d4', '\uc4d4', GC_LV), ('\uc4d5', '\uc4ef', GC_LVT),
('\uc4f0', '\uc4f0', GC_LV), ('\uc4f1', '\uc50b', GC_LVT), ('\uc50c', '\uc50c', GC_LV),
('\uc50d', '\uc527', GC_LVT), ('\uc528', '\uc528', GC_LV), ('\uc529', '\uc543', GC_LVT),
('\uc544', '\uc544', GC_LV), ('\uc545', '\uc55f', GC_LVT), ('\uc560', '\uc560', GC_LV),
('\uc561', '\uc57b', GC_LVT), ('\uc57c', '\uc57c', GC_LV), ('\uc57d', '\uc597', GC_LVT),
('\uc598', '\uc598', GC_LV), ('\uc599', '\uc5b3', GC_LVT), ('\uc5b4', '\uc5b4', GC_LV),
('\uc5b5', '\uc5cf', GC_LVT), ('\uc5d0', '\uc5d0', GC_LV), ('\uc5d1', '\uc5eb', GC_LVT),
('\uc5ec', '\uc5ec', GC_LV), ('\uc5ed', '\uc607', GC_LVT), ('\uc608', '\uc608', GC_LV),
('\uc609', '\uc623', GC_LVT), ('\uc624', '\uc624', GC_LV), ('\uc625', '\uc63f', GC_LVT),
('\uc640', '\uc640', GC_LV), ('\uc641', '\uc65b', GC_LVT), ('\uc65c', '\uc65c', GC_LV),
('\uc65d', '\uc677', GC_LVT), ('\uc678', '\uc678', GC_LV), ('\uc679', '\uc693', GC_LVT),
('\uc694', '\uc694', GC_LV), ('\uc695', '\uc6af', GC_LVT), ('\uc6b0', '\uc6b0', GC_LV),
('\uc6b1', '\uc6cb', GC_LVT), ('\uc6cc', '\uc6cc', GC_LV), ('\uc6cd', '\uc6e7', GC_LVT),
('\uc6e8', '\uc6e8', GC_LV), ('\uc6e9', '\uc703', GC_LVT), ('\uc704', '\uc704', GC_LV),
('\uc705', '\uc71f', GC_LVT), ('\uc720', '\uc720', GC_LV), ('\uc721', '\uc73b', GC_LVT),
('\uc73c', '\uc73c', GC_LV), ('\uc73d', '\uc757', GC_LVT), ('\uc758', '\uc758', GC_LV),
('\uc759', '\uc773', GC_LVT), ('\uc774', '\uc774', GC_LV), ('\uc775', '\uc78f', GC_LVT),
('\uc790', '\uc790', GC_LV), ('\uc791', '\uc7ab', GC_LVT), ('\uc7ac', '\uc7ac', GC_LV),
('\uc7ad', '\uc7c7', GC_LVT), ('\uc7c8', '\uc7c8', GC_LV), ('\uc7c9', '\uc7e3', GC_LVT),
('\uc7e4', '\uc7e4', GC_LV), ('\uc7e5', '\uc7ff', GC_LVT), ('\uc800', '\uc800', GC_LV),
('\uc801', '\uc81b', GC_LVT), ('\uc81c', '\uc81c', GC_LV), ('\uc81d', '\uc837', GC_LVT),
('\uc838', '\uc838', GC_LV), ('\uc839', '\uc853', GC_LVT), ('\uc854', '\uc854', GC_LV),
('\uc855', '\uc86f', GC_LVT), ('\uc870', '\uc870', GC_LV), ('\uc871', '\uc88b', GC_LVT),
('\uc88c', '\uc88c', GC_LV), ('\uc88d', '\uc8a7', GC_LVT), ('\uc8a8', '\uc8a8', GC_LV),
('\uc8a9', '\uc8c3', GC_LVT), ('\uc8c4', '\uc8c4', GC_LV), ('\uc8c5', '\uc8df', GC_LVT),
('\uc8e0', '\uc8e0', GC_LV), ('\uc8e1', '\uc8fb', GC_LVT), ('\uc8fc', '\uc8fc', GC_LV),
('\uc8fd', '\uc917', GC_LVT), ('\uc918', '\uc918', GC_LV), ('\uc919', '\uc933', GC_LVT),
('\uc934', '\uc934', GC_LV), ('\uc935', '\uc94f', GC_LVT), ('\uc950', '\uc950', GC_LV),
('\uc951', '\uc96b', GC_LVT), ('\uc96c', '\uc96c', GC_LV), ('\uc96d', '\uc987', GC_LVT),
('\uc988', '\uc988', GC_LV), ('\uc989', '\uc9a3', GC_LVT), ('\uc9a4', '\uc9a4', GC_LV),
('\uc9a5', '\uc9bf', GC_LVT), ('\uc9c0', '\uc9c0', GC_LV), ('\uc9c1', '\uc9db', GC_LVT),
('\uc9dc', '\uc9dc', GC_LV), ('\uc9dd', '\uc9f7', GC_LVT), ('\uc9f8', '\uc9f8', GC_LV),
('\uc9f9', '\uca13', GC_LVT), ('\uca14', '\uca14', GC_LV), ('\uca15', '\uca2f', GC_LVT),
('\uca30', '\uca30', GC_LV), ('\uca31', '\uca4b', GC_LVT), ('\uca4c', '\uca4c', GC_LV),
('\uca4d', '\uca67', GC_LVT), ('\uca68', '\uca68', GC_LV), ('\uca69', '\uca83', GC_LVT),
('\uca84', '\uca84', GC_LV), ('\uca85', '\uca9f', GC_LVT), ('\ucaa0', '\ucaa0', GC_LV),
('\ucaa1', '\ucabb', GC_LVT), ('\ucabc', '\ucabc', GC_LV), ('\ucabd', '\ucad7', GC_LVT),
('\ucad8', '\ucad8', GC_LV), ('\ucad9', '\ucaf3', GC_LVT), ('\ucaf4', '\ucaf4', GC_LV),
('\ucaf5', '\ucb0f', GC_LVT), ('\ucb10', '\ucb10', GC_LV), ('\ucb11', '\ucb2b', GC_LVT),
('\ucb2c', '\ucb2c', GC_LV), ('\ucb2d', '\ucb47', GC_LVT), ('\ucb48', '\ucb48', GC_LV),
('\ucb49', '\ucb63', GC_LVT), ('\ucb64', '\ucb64', GC_LV), ('\ucb65', '\ucb7f', GC_LVT),
('\ucb80', '\ucb80', GC_LV), ('\ucb81', '\ucb9b', GC_LVT), ('\ucb9c', '\ucb9c', GC_LV),
('\ucb9d', '\ucbb7', GC_LVT), ('\ucbb8', '\ucbb8', GC_LV), ('\ucbb9', '\ucbd3', GC_LVT),
('\ucbd4', '\ucbd4', GC_LV), ('\ucbd5', '\ucbef', GC_LVT), ('\ucbf0', '\ucbf0', GC_LV),
('\ucbf1', '\ucc0b', GC_LVT), ('\ucc0c', '\ucc0c', GC_LV), ('\ucc0d', '\ucc27', GC_LVT),
('\ucc28', '\ucc28', GC_LV), ('\ucc29', '\ucc43', GC_LVT), ('\ucc44', '\ucc44', GC_LV),
('\ucc45', '\ucc5f', GC_LVT), ('\ucc60', '\ucc60', GC_LV), ('\ucc61', '\ucc7b', GC_LVT),
('\ucc7c', '\ucc7c', GC_LV), ('\ucc7d', '\ucc97', GC_LVT), ('\ucc98', '\ucc98', GC_LV),
('\ucc99', '\uccb3', GC_LVT), ('\uccb4', '\uccb4', GC_LV), ('\uccb5', '\ucccf', GC_LVT),
('\uccd0', '\uccd0', GC_LV), ('\uccd1', '\ucceb', GC_LVT), ('\uccec', '\uccec', GC_LV),
('\ucced', '\ucd07', GC_LVT), ('\ucd08', '\ucd08', GC_LV), ('\ucd09', '\ucd23', GC_LVT),
('\ucd24', '\ucd24', GC_LV), ('\ucd25', '\ucd3f', GC_LVT), ('\ucd40', '\ucd40', GC_LV),
('\ucd41', '\ucd5b', GC_LVT), ('\ucd5c', '\ucd5c', GC_LV), ('\ucd5d', '\ucd77', GC_LVT),
('\ucd78', '\ucd78', GC_LV), ('\ucd79', '\ucd93', GC_LVT), ('\ucd94', '\ucd94', GC_LV),
('\ucd95', '\ucdaf', GC_LVT), ('\ucdb0', '\ucdb0', GC_LV), ('\ucdb1', '\ucdcb', GC_LVT),
('\ucdcc', '\ucdcc', GC_LV), ('\ucdcd', '\ucde7', GC_LVT), ('\ucde8', '\ucde8', GC_LV),
('\ucde9', '\uce03', GC_LVT), ('\uce04', '\uce04', GC_LV), ('\uce05', '\uce1f', GC_LVT),
('\uce20', '\uce20', GC_LV), ('\uce21', '\uce3b', GC_LVT), ('\uce3c', '\uce3c', GC_LV),
('\uce3d', '\uce57', GC_LVT), ('\uce58', '\uce58', GC_LV), ('\uce59', '\uce73', GC_LVT),
('\uce74', '\uce74', GC_LV), ('\uce75', '\uce8f', GC_LVT), ('\uce90', '\uce90', GC_LV),
('\uce91', '\uceab', GC_LVT), ('\uceac', '\uceac', GC_LV), ('\ucead', '\ucec7', GC_LVT),
('\ucec8', '\ucec8', GC_LV), ('\ucec9', '\ucee3', GC_LVT), ('\ucee4', '\ucee4', GC_LV),
('\ucee5', '\uceff', GC_LVT), ('\ucf00', '\ucf00', GC_LV), ('\ucf01', '\ucf1b', GC_LVT),
('\ucf1c', '\ucf1c', GC_LV), ('\ucf1d', '\ucf37', GC_LVT), ('\ucf38', '\ucf38', GC_LV),
('\ucf39', '\ucf53', GC_LVT), ('\ucf54', '\ucf54', GC_LV), ('\ucf55', '\ucf6f', GC_LVT),
('\ucf70', '\ucf70', GC_LV), ('\ucf71', '\ucf8b', GC_LVT), ('\ucf8c', '\ucf8c', GC_LV),
('\ucf8d', '\ucfa7', GC_LVT), ('\ucfa8', '\ucfa8', GC_LV), ('\ucfa9', '\ucfc3', GC_LVT),
('\ucfc4', '\ucfc4', GC_LV), ('\ucfc5', '\ucfdf', GC_LVT), ('\ucfe0', '\ucfe0', GC_LV),
('\ucfe1', '\ucffb', GC_LVT), ('\ucffc', '\ucffc', GC_LV), ('\ucffd', '\ud017', GC_LVT),
('\ud018', '\ud018', GC_LV), ('\ud019', '\ud033', GC_LVT), ('\ud034', '\ud034', GC_LV),
('\ud035', '\ud04f', GC_LVT), ('\ud050', '\ud050', GC_LV), ('\ud051', '\ud06b', GC_LVT),
('\ud06c', '\ud06c', GC_LV), ('\ud06d', '\ud087', GC_LVT), ('\ud088', '\ud088', GC_LV),
('\ud089', '\ud0a3', GC_LVT), ('\ud0a4', '\ud0a4', GC_LV), ('\ud0a5', '\ud0bf', GC_LVT),
('\ud0c0', '\ud0c0', GC_LV), ('\ud0c1', '\ud0db', GC_LVT), ('\ud0dc', '\ud0dc', GC_LV),
('\ud0dd', '\ud0f7', GC_LVT), ('\ud0f8', '\ud0f8', GC_LV), ('\ud0f9', '\ud113', GC_LVT),
('\ud114', '\ud114', GC_LV), ('\ud115', '\ud12f', GC_LVT), ('\ud130', '\ud130', GC_LV),
('\ud131', '\ud14b', GC_LVT), ('\ud14c', '\ud14c', GC_LV), ('\ud14d', '\ud167', GC_LVT),
('\ud168', '\ud168', GC_LV), ('\ud169', '\ud183', GC_LVT), ('\ud184', '\ud184', GC_LV),
('\ud185', '\ud19f', GC_LVT), ('\ud1a0', '\ud1a0', GC_LV), ('\ud1a1', '\ud1bb', GC_LVT),
('\ud1bc', '\ud1bc', GC_LV), ('\ud1bd', '\ud1d7', GC_LVT), ('\ud1d8', '\ud1d8', GC_LV),
('\ud1d9', '\ud1f3', GC_LVT), ('\ud1f4', '\ud1f4', GC_LV), ('\ud1f5', '\ud20f', GC_LVT),
('\ud210', '\ud210', GC_LV), ('\ud211', '\ud22b', GC_LVT), ('\ud22c', '\ud22c', GC_LV),
('\ud22d', '\ud247', GC_LVT), ('\ud248', '\ud248', GC_LV), ('\ud249', '\ud263', GC_LVT),
('\ud264', '\ud264', GC_LV), ('\ud265', '\ud27f', GC_LVT), ('\ud280', '\ud280', GC_LV),
('\ud281', '\ud29b', GC_LVT), ('\ud29c', '\ud29c', GC_LV), ('\ud29d', '\ud2b7', GC_LVT),
('\ud2b8', '\ud2b8', GC_LV), ('\ud2b9', '\ud2d3', GC_LVT), ('\ud2d4', '\ud2d4', GC_LV),
('\ud2d5', '\ud2ef', GC_LVT), ('\ud2f0', '\ud2f0', GC_LV), ('\ud2f1', '\ud30b', GC_LVT),
('\ud30c', '\ud30c', GC_LV), ('\ud30d', '\ud327', GC_LVT), ('\ud328', '\ud328', GC_LV),
('\ud329', '\ud343', GC_LVT), ('\ud344', '\ud344', GC_LV), ('\ud345', '\ud35f', GC_LVT),
('\ud360', '\ud360', GC_LV), ('\ud361', '\ud37b', GC_LVT), ('\ud37c', '\ud37c', GC_LV),
('\ud37d', '\ud397', GC_LVT), ('\ud398', '\ud398', GC_LV), ('\ud399', '\ud3b3', GC_LVT),
('\ud3b4', '\ud3b4', GC_LV), ('\ud3b5', '\ud3cf', GC_LVT), ('\ud3d0', '\ud3d0', GC_LV),
('\ud3d1', '\ud3eb', GC_LVT), ('\ud3ec', '\ud3ec', GC_LV), ('\ud3ed', '\ud407', GC_LVT),
('\ud408', '\ud408', GC_LV), ('\ud409', '\ud423', GC_LVT), ('\ud424', '\ud424', GC_LV),
('\ud425', '\ud43f', GC_LVT), ('\ud440', '\ud440', GC_LV), ('\ud441', '\ud45b', GC_LVT),
('\ud45c', '\ud45c', GC_LV), ('\ud45d', '\ud477', GC_LVT), ('\ud478', '\ud478', GC_LV),
('\ud479', '\ud493', GC_LVT), ('\ud494', '\ud494', GC_LV), ('\ud495', '\ud4af', GC_LVT),
('\ud4b0', '\ud4b0', GC_LV), ('\ud4b1', '\ud4cb', GC_LVT), ('\ud4cc', '\ud4cc', GC_LV),
('\ud4cd', '\ud4e7', GC_LVT), ('\ud4e8', '\ud4e8', GC_LV), ('\ud4e9', '\ud503', GC_LVT),
('\ud504', '\ud504', GC_LV), ('\ud505', '\ud51f', GC_LVT), ('\ud520', '\ud520', GC_LV),
('\ud521', '\ud53b', GC_LVT), ('\ud53c', '\ud53c', GC_LV), ('\ud53d', '\ud557', GC_LVT),
('\ud558', '\ud558', GC_LV), ('\ud559', '\ud573', GC_LVT), ('\ud574', '\ud574', GC_LV),
('\ud575', '\ud58f', GC_LVT), ('\ud590', '\ud590', GC_LV), ('\ud591', '\ud5ab', GC_LVT),
('\ud5ac', '\ud5ac', GC_LV), ('\ud5ad', '\ud5c7', GC_LVT), ('\ud5c8', '\ud5c8', GC_LV),
('\ud5c9', '\ud5e3', GC_LVT), ('\ud5e4', '\ud5e4', GC_LV), ('\ud5e5', '\ud5ff', GC_LVT),
('\ud600', '\ud600', GC_LV), ('\ud601', '\ud61b', GC_LVT), ('\ud61c', '\ud61c', GC_LV),
('\ud61d', '\ud637', GC_LVT), ('\ud638', '\ud638', GC_LV), ('\ud639', '\ud653', GC_LVT),
('\ud654', '\ud654', GC_LV), ('\ud655', '\ud66f', GC_LVT), ('\ud670', '\ud670', GC_LV),
('\ud671', '\ud68b', GC_LVT), ('\ud68c', '\ud68c', GC_LV), ('\ud68d', '\ud6a7', GC_LVT),
('\ud6a8', '\ud6a8', GC_LV), ('\ud6a9', '\ud6c3', GC_LVT), ('\ud6c4', '\ud6c4', GC_LV),
('\ud6c5', '\ud6df', GC_LVT), ('\ud6e0', '\ud6e0', GC_LV), ('\ud6e1', '\ud6fb', GC_LVT),
('\ud6fc', '\ud6fc', GC_LV), ('\ud6fd', '\ud717', GC_LVT), ('\ud718', '\ud718', GC_LV),
('\ud719', '\ud733', GC_LVT), ('\ud734', '\ud734', GC_LV), ('\ud735', '\ud74f', GC_LVT),
('\ud750', '\ud750', GC_LV), ('\ud751', '\ud76b', GC_LVT), ('\ud76c', '\ud76c', GC_LV),
('\ud76d', '\ud787', GC_LVT), ('\ud788', '\ud788', GC_LV), ('\ud789', '\ud7a3', GC_LVT),
('\ud7b0', '\ud7c6', GC_V), ('\ud7cb', '\ud7fb', GC_T), ('\ufb1e', '\ufb1e', GC_Extend),
('\ufe00', '\ufe0f', GC_Extend), ('\ufe20', '\ufe2d', GC_Extend), ('\ufeff', '\ufeff',
GC_Control), ('\uff9e', '\uff9f', GC_Extend), ('\ufff0', '\ufffb', GC_Control),
('\U000101fd', '\U000101fd', GC_Extend), ('\U000102e0', '\U000102e0', GC_Extend),
('\U00010376', '\U0001037a', GC_Extend), ('\U00010a01', '\U00010a03', GC_Extend),
('\U00010a05', '\U00010a06', GC_Extend), ('\U00010a0c', '\U00010a0f', GC_Extend),
('\U00010a38', '\U00010a3a', GC_Extend), ('\U00010a3f', '\U00010a3f', GC_Extend),
('\U00010ae5', '\U00010ae6', GC_Extend), ('\U00011000', '\U00011000', GC_SpacingMark),
('\U00011001', '\U00011001', GC_Extend), ('\U00011002', '\U00011002', GC_SpacingMark),
('\U00011038', '\U00011046', GC_Extend), ('\U0001107f', '\U00011081', GC_Extend),
('\U00011082', '\U00011082', GC_SpacingMark), ('\U000110b0', '\U000110b2', GC_SpacingMark),
('\U000110b3', '\U000110b6', GC_Extend), ('\U000110b7', '\U000110b8', GC_SpacingMark),
('\U000110b9', '\U000110ba', GC_Extend), ('\U000110bd', '\U000110bd', GC_Control),
('\U00011100', '\U00011102', GC_Extend), ('\U00011127', '\U0001112b', GC_Extend),
('\U0001112c', '\U0001112c', GC_SpacingMark), ('\U0001112d', '\U00011134', GC_Extend),
('\U00011173', '\U00011173', GC_Extend), ('\U00011180', '\U00011181', GC_Extend),
('\U00011182', '\U00011182', GC_SpacingMark), ('\U000111b3', '\U000111b5', GC_SpacingMark),
('\U000111b6', '\U000111be', GC_Extend), ('\U000111bf', '\U000111c0', GC_SpacingMark),
('\U0001122c', '\U0001122e', GC_SpacingMark), ('\U0001122f', '\U00011231', GC_Extend),
('\U00011232', '\U00011233', GC_SpacingMark), ('\U00011234', '\U00011234', GC_Extend),
('\U00011235', '\U00011235', GC_SpacingMark), ('\U00011236', '\U00011237', GC_Extend),
('\U000112df', '\U000112df', GC_Extend), ('\U000112e0', '\U000112e2', GC_SpacingMark),
('\U000112e3', '\U000112ea', GC_Extend), ('\U00011301', '\U00011301', GC_Extend),
('\U00011302', '\U00011303', GC_SpacingMark), ('\U0001133c', '\U0001133c', GC_Extend),
('\U0001133e', '\U0001133e', GC_Extend), ('\U0001133f', '\U0001133f', GC_SpacingMark),
('\U00011340', '\U00011340', GC_Extend), ('\U00011341', '\U00011344', GC_SpacingMark),
('\U00011347', '\U00011348', GC_SpacingMark), ('\U0001134b', '\U0001134d', GC_SpacingMark),
('\U00011357', '\U00011357', GC_Extend), ('\U00011362', '\U00011363', GC_SpacingMark),
('\U00011366', '\U0001136c', GC_Extend), ('\U00011370', '\U00011374', GC_Extend),
('\U000114b0', '\U000114b0', GC_Extend), ('\U000114b1', '\U000114b2', GC_SpacingMark),
('\U000114b3', '\U000114b8', GC_Extend), ('\U000114b9', '\U000114b9', GC_SpacingMark),
('\U000114ba', '\U000114ba', GC_Extend), ('\U000114bb', '\U000114bc', GC_SpacingMark),
('\U000114bd', '\U000114bd', GC_Extend), ('\U000114be', '\U000114be', GC_SpacingMark),
('\U000114bf', '\U000114c0', GC_Extend), ('\U000114c1', '\U000114c1', GC_SpacingMark),
('\U000114c2', '\U000114c3', GC_Extend), ('\U000115af', '\U000115af', GC_Extend),
('\U000115b0', '\U000115b1', GC_SpacingMark), ('\U000115b2', '\U000115b5', GC_Extend),
('\U000115b8', '\U000115bb', GC_SpacingMark), ('\U000115bc', '\U000115bd', GC_Extend),
('\U000115be', '\U000115be', GC_SpacingMark), ('\U000115bf', '\U000115c0', GC_Extend),
('\U00011630', '\U00011632', GC_SpacingMark), ('\U00011633', '\U0001163a', GC_Extend),
('\U0001163b', '\U0001163c', GC_SpacingMark), ('\U0001163d', '\U0001163d', GC_Extend),
('\U0001163e', '\U0001163e', GC_SpacingMark), ('\U0001163f', '\U00011640', GC_Extend),
('\U000116ab', '\U000116ab', GC_Extend), ('\U000116ac', '\U000116ac', GC_SpacingMark),
('\U000116ad', '\U000116ad', GC_Extend), ('\U000116ae', '\U000116af', GC_SpacingMark),
('\U000116b0', '\U000116b5', GC_Extend), ('\U000116b6', '\U000116b6', GC_SpacingMark),
('\U000116b7', '\U000116b7', GC_Extend), ('\U00016af0', '\U00016af4', GC_Extend),
('\U00016b30', '\U00016b36', GC_Extend), ('\U00016f51', '\U00016f7e', GC_SpacingMark),
('\U00016f8f', '\U00016f92', GC_Extend), ('\U0001bc9d', '\U0001bc9e', GC_Extend),
('\U0001bca0', '\U0001bca3', GC_Control), ('\U0001d165', '\U0001d165', GC_Extend),
('\U0001d166', '\U0001d166', GC_SpacingMark), ('\U0001d167', '\U0001d169', GC_Extend),
('\U0001d16d', '\U0001d16d', GC_SpacingMark), ('\U0001d16e', '\U0001d172', GC_Extend),
('\U0001d173', '\U0001d17a', GC_Control), ('\U0001d17b', '\U0001d182', GC_Extend),
('\U0001d185', '\U0001d18b', GC_Extend), ('\U0001d1aa', '\U0001d1ad', GC_Extend),
('\U0001d242', '\U0001d244', GC_Extend), ('\U0001e8d0', '\U0001e8d6', GC_Extend),
('\U0001f1e6', '\U0001f1ff', GC_RegionalIndicator), ('\U000e0000', '\U000e00ff',
GC_Control), ('\U000e0100', '\U000e01ef', GC_Extend), ('\U000e01f0', '\U000e0fff',
GC_Control)
];
}

View File

@ -15,11 +15,15 @@
* methods provided by the UnicodeChar trait.
*/
use core::clone::Clone;
use core::cmp;
use core::collections::Collection;
use core::iter::{Filter};
use core::iter::{Filter, AdditiveIterator, Iterator, DoubleEndedIterator};
use core::option::{Option, None, Some};
use core::str::{CharSplits, StrSlice};
use core::iter::Iterator;
use u_char;
use u_char::UnicodeChar;
use tables::grapheme::GraphemeCat;
/// An iterator over the words of a string, separated by a sequence of whitespace
pub type Words<'a> =
@ -27,6 +31,36 @@ pub type Words<'a> =
/// Methods for Unicode string slices
pub trait UnicodeStrSlice<'a> {
/// Returns an iterator over the
/// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
/// of the string.
///
/// If `is_extended` is true, the iterator is over the *extended grapheme clusters*;
/// otherwise, the iterator is over the *legacy grapheme clusters*.
/// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
/// recommends extended grapheme cluster boundaries for general processing.
///
/// # Example
///
/// ```rust
/// let gr1 = "a\u0310e\u0301o\u0308\u0332".graphemes(true).collect::<Vec<&str>>();
/// assert_eq!(gr1.as_slice(), &["a\u0310", "e\u0301", "o\u0308\u0332"]);
/// let gr2 = "a\r\nb🇷🇺🇸🇹".graphemes(true).collect::<Vec<&str>>();
/// assert_eq!(gr2.as_slice(), &["a", "\r\n", "b", "🇷🇺🇸🇹"]);
/// ```
fn graphemes(&self, is_extended: bool) -> Graphemes<'a>;
/// Returns an iterator over the grapheme clusters of self and their byte offsets.
/// See `graphemes()` method for more information.
///
/// # Example
///
/// ```rust
/// let gr_inds = "a̐éö̲\r\n".grapheme_indices(true).collect::<Vec<(uint, &str)>>();
/// assert_eq!(gr_inds.as_slice(), &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")]);
/// ```
fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices<'a>;
/// An iterator over the words of a string (subsequences separated
/// by any sequence of whitespace). Sequences of whitespace are
/// collapsed, so empty "words" are not included.
@ -78,7 +112,7 @@ pub trait UnicodeStrSlice<'a> {
/// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
/// recommends that these characters be treated as 1 column (i.e.,
/// `is_cjk` = `false`) if the locale is unknown.
//fn width(&self, is_cjk: bool) -> uint;
fn width(&self, is_cjk: bool) -> uint;
/// Returns a string with leading and trailing whitespace removed.
fn trim(&self) -> &'a str;
@ -91,6 +125,16 @@ pub trait UnicodeStrSlice<'a> {
}
impl<'a> UnicodeStrSlice<'a> for &'a str {
#[inline]
fn graphemes(&self, is_extended: bool) -> Graphemes<'a> {
Graphemes { string: *self, extended: is_extended, cat: None, catb: None }
}
#[inline]
fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices<'a> {
GraphemeIndices { start_offset: self.as_ptr() as uint, iter: self.graphemes(is_extended) }
}
#[inline]
fn words(&self) -> Words<'a> {
self.split(u_char::is_whitespace).filter(|s| !s.is_empty())
@ -102,6 +146,11 @@ impl<'a> UnicodeStrSlice<'a> for &'a str {
#[inline]
fn is_alphanumeric(&self) -> bool { self.chars().all(u_char::is_alphanumeric) }
#[inline]
fn width(&self, is_cjk: bool) -> uint {
self.chars().map(|c| c.width(is_cjk).unwrap_or(0)).sum()
}
#[inline]
fn trim(&self) -> &'a str {
self.trim_left().trim_right()
@ -117,3 +166,257 @@ impl<'a> UnicodeStrSlice<'a> for &'a str {
self.trim_right_chars(u_char::is_whitespace)
}
}
/// External iterator for grapheme clusters and byte offsets.
#[deriving(Clone)]
pub struct GraphemeIndices<'a> {
start_offset: uint,
iter: Graphemes<'a>,
}
impl<'a> Iterator<(uint, &'a str)> for GraphemeIndices<'a> {
#[inline]
fn next(&mut self) -> Option<(uint, &'a str)> {
self.iter.next().map(|s| (s.as_ptr() as uint - self.start_offset, s))
}
#[inline]
fn size_hint(&self) -> (uint, Option<uint>) {
self.iter.size_hint()
}
}
impl<'a> DoubleEndedIterator<(uint, &'a str)> for GraphemeIndices<'a> {
#[inline]
fn next_back(&mut self) -> Option<(uint, &'a str)> {
self.iter.next_back().map(|s| (s.as_ptr() as uint - self.start_offset, s))
}
}
/// External iterator for a string's
/// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries).
#[deriving(Clone)]
pub struct Graphemes<'a> {
string: &'a str,
extended: bool,
cat: Option<GraphemeCat>,
catb: Option<GraphemeCat>,
}
// state machine for cluster boundary rules
#[deriving(PartialEq,Eq)]
enum GraphemeState {
Start,
FindExtend,
HangulL,
HangulLV,
HangulLVT,
Regional,
}
impl<'a> Iterator<&'a str> for Graphemes<'a> {
#[inline]
fn size_hint(&self) -> (uint, Option<uint>) {
let slen = self.string.len();
(cmp::min(slen, 1u), Some(slen))
}
#[inline]
fn next(&mut self) -> Option<&'a str> {
use gr = tables::grapheme;
if self.string.len() == 0 {
return None;
}
let mut take_curr = true;
let mut idx = 0;
let mut state = Start;
let mut cat = gr::GC_Any;
for (curr, ch) in self.string.char_indices() {
idx = curr;
// retrieve cached category, if any
// We do this because most of the time we would end up
// looking up each character twice.
cat = match self.cat {
None => gr::grapheme_category(ch),
_ => self.cat.take_unwrap()
};
if match cat {
gr::GC_Extend => true,
gr::GC_SpacingMark if self.extended => true,
_ => false
} {
state = FindExtend; // rule GB9/GB9a
continue;
}
state = match state {
Start if '\r' == ch => {
let slen = self.string.len();
let nidx = idx + 1;
if nidx != slen && self.string.char_at(nidx) == '\n' {
idx = nidx; // rule GB3
}
break; // rule GB4
}
Start => match cat {
gr::GC_Control => break,
gr::GC_L => HangulL,
gr::GC_LV | gr::GC_V => HangulLV,
gr::GC_LVT | gr::GC_T => HangulLVT,
gr::GC_RegionalIndicator => Regional,
_ => FindExtend
},
FindExtend => { // found non-extending when looking for extending
take_curr = false;
break;
},
HangulL => match cat { // rule GB6: L x (L|V|LV|LVT)
gr::GC_L => continue,
gr::GC_LV | gr::GC_V => HangulLV,
gr::GC_LVT => HangulLVT,
_ => {
take_curr = false;
break;
}
},
HangulLV => match cat { // rule GB7: (LV|V) x (V|T)
gr::GC_V => continue,
gr::GC_T => HangulLVT,
_ => {
take_curr = false;
break;
}
},
HangulLVT => match cat { // rule GB8: (LVT|T) x T
gr::GC_T => continue,
_ => {
take_curr = false;
break;
}
},
Regional => match cat { // rule GB8a
gr::GC_RegionalIndicator => continue,
_ => {
take_curr = false;
break;
}
}
}
}
self.cat = if take_curr {
idx = self.string.char_range_at(idx).next;
None
} else {
Some(cat)
};
let retstr = self.string.slice_to(idx);
self.string = self.string.slice_from(idx);
Some(retstr)
}
}
impl<'a> DoubleEndedIterator<&'a str> for Graphemes<'a> {
#[inline]
fn next_back(&mut self) -> Option<&'a str> {
use gr = tables::grapheme;
if self.string.len() == 0 {
return None;
}
let mut take_curr = true;
let mut idx = self.string.len();
let mut previdx = idx;
let mut state = Start;
let mut cat = gr::GC_Any;
for (curr, ch) in self.string.char_indices().rev() {
previdx = idx;
idx = curr;
// cached category, if any
cat = match self.catb {
None => gr::grapheme_category(ch),
_ => self.catb.take_unwrap()
};
// a matching state machine that runs *backwards* across an input string
// note that this has some implications for the Hangul matching, since
// we now need to know what the rightward letter is:
//
// Right to left, we have:
// L x L
// V x (L|V|LV)
// T x (V|T|LV|LVT)
// HangulL means the letter to the right is L
// HangulLV means the letter to the right is V
// HangulLVT means the letter to the right is T
state = match state {
Start if '\n' == ch => {
if idx > 0 && '\r' == self.string.char_at_reverse(idx) {
idx -= 1; // rule GB3
}
break; // rule GB4
},
Start | FindExtend => match cat {
gr::GC_Extend => FindExtend,
gr::GC_SpacingMark if self.extended => FindExtend,
gr::GC_L | gr::GC_LV | gr::GC_LVT => HangulL,
gr::GC_V => HangulLV,
gr::GC_T => HangulLVT,
gr::GC_RegionalIndicator => Regional,
gr::GC_Control => {
take_curr = Start == state;
break;
},
_ => break
},
HangulL => match cat { // char to right is an L
gr::GC_L => continue, // L x L is the only legal match
_ => {
take_curr = false;
break;
}
},
HangulLV => match cat { // char to right is a V
gr::GC_V => continue, // V x V, right char is still V
gr::GC_L | gr::GC_LV => HangulL, // (L|V) x V, right char is now L
_ => {
take_curr = false;
break;
}
},
HangulLVT => match cat { // char to right is a T
gr::GC_T => continue, // T x T, right char is still T
gr::GC_V => HangulLV, // V x T, right char is now V
gr::GC_LV | gr::GC_LVT => HangulL, // (LV|LVT) x T, right char is now L
_ => {
take_curr = false;
break;
}
},
Regional => match cat { // rule GB8a
gr::GC_RegionalIndicator => continue,
_ => {
take_curr = false;
break;
}
}
}
}
self.catb = if take_curr {
None
} else {
idx = previdx;
Some(cat)
};
let retstr = self.string.slice_from(idx);
self.string = self.string.slice_to(idx);
Some(retstr)
}
}