to_lowercase/to_uppercase: also map chars not in Lu/Ll categories.
This adds 120 mappings: Dž dž Dž DŽ Lj lj Lj LJ Nj nj Nj NJ Dz dz Dz DZ Ι ᾈ ᾀ ᾉ ᾁ ᾊ ᾂ ᾋ ᾃ ᾌ ᾄ ᾍ ᾅ ᾎ ᾆ ᾏ ᾇ ᾘ ᾐ ᾙ ᾑ ᾚ ᾒ ᾛ ᾓ ᾜ ᾔ ᾝ ᾕ ᾞ ᾖ ᾟ ᾗ ᾨ ᾠ ᾩ ᾡ ᾪ ᾢ ᾫ ᾣ ᾬ ᾤ ᾭ ᾥ ᾮ ᾦ ᾯ ᾧ ᾼ ᾳ ῌ ῃ ῼ ῳ Ⅰ ⅰ Ⅱ ⅱ Ⅲ ⅲ Ⅳ ⅳ Ⅴ ⅴ Ⅵ ⅵ Ⅶ ⅶ Ⅷ ⅷ Ⅸ ⅸ Ⅹ ⅹ Ⅺ ⅺ Ⅻ ⅻ Ⅼ ⅼ Ⅽ ⅽ Ⅾ ⅾ Ⅿ ⅿ ⅰ Ⅰ ⅱ Ⅱ ⅲ Ⅲ ⅳ Ⅳ ⅴ Ⅴ ⅵ Ⅵ ⅶ Ⅶ ⅷ Ⅷ ⅸ Ⅸ ⅹ Ⅹ ⅺ Ⅺ ⅻ Ⅻ ⅼ Ⅼ ⅽ Ⅽ ⅾ Ⅾ ⅿ Ⅿ Ⓐ ⓐ Ⓑ ⓑ Ⓒ ⓒ Ⓓ ⓓ Ⓔ ⓔ Ⓕ ⓕ Ⓖ ⓖ Ⓗ ⓗ Ⓘ ⓘ Ⓙ ⓙ Ⓚ ⓚ Ⓛ ⓛ Ⓜ ⓜ Ⓝ ⓝ Ⓞ ⓞ Ⓟ ⓟ Ⓠ ⓠ Ⓡ ⓡ Ⓢ ⓢ Ⓣ ⓣ Ⓤ ⓤ Ⓥ ⓥ Ⓦ ⓦ Ⓧ ⓧ Ⓨ ⓨ Ⓩ ⓩ ⓐ Ⓐ ⓑ Ⓑ ⓒ Ⓒ ⓓ Ⓓ ⓔ Ⓔ ⓕ Ⓕ ⓖ Ⓖ ⓗ Ⓗ ⓘ Ⓘ ⓙ Ⓙ ⓚ Ⓚ ⓛ Ⓛ ⓜ Ⓜ ⓝ Ⓝ ⓞ Ⓞ ⓟ Ⓟ ⓠ Ⓠ ⓡ Ⓡ ⓢ Ⓢ ⓣ Ⓣ ⓤ Ⓤ ⓥ Ⓥ ⓦ Ⓦ ⓧ Ⓧ ⓨ Ⓨ ⓩ Ⓩ
This commit is contained in:
parent
0b0c89efb3
commit
66af12721a
@ -72,8 +72,8 @@ def is_surrogate(n):
|
||||
def load_unicode_data(f):
|
||||
fetch(f)
|
||||
gencats = {}
|
||||
upperlower = {}
|
||||
lowerupper = {}
|
||||
to_lower = {}
|
||||
to_upper = {}
|
||||
combines = {}
|
||||
canon_decomp = {}
|
||||
compat_decomp = {}
|
||||
@ -103,12 +103,12 @@ def load_unicode_data(f):
|
||||
|
||||
# generate char to char direct common and simple conversions
|
||||
# uppercase to lowercase
|
||||
if gencat == "Lu" and lowcase != "" and code_org != lowcase:
|
||||
upperlower[code] = int(lowcase, 16)
|
||||
if lowcase != "" and code_org != lowcase:
|
||||
to_lower[code] = int(lowcase, 16)
|
||||
|
||||
# lowercase to uppercase
|
||||
if gencat == "Ll" and upcase != "" and code_org != upcase:
|
||||
lowerupper[code] = int(upcase, 16)
|
||||
if upcase != "" and code_org != upcase:
|
||||
to_upper[code] = int(upcase, 16)
|
||||
|
||||
# store decomposition, if given
|
||||
if decomp != "":
|
||||
@ -144,7 +144,7 @@ def load_unicode_data(f):
|
||||
gencats = group_cats(gencats)
|
||||
combines = to_combines(group_cats(combines))
|
||||
|
||||
return (canon_decomp, compat_decomp, gencats, combines, lowerupper, upperlower)
|
||||
return (canon_decomp, compat_decomp, gencats, combines, to_upper, to_lower)
|
||||
|
||||
def group_cats(cats):
|
||||
cats_out = {}
|
||||
@ -319,7 +319,7 @@ def emit_property_module(f, mod, tbl, emit):
|
||||
f.write(" }\n\n")
|
||||
f.write("}\n\n")
|
||||
|
||||
def emit_conversions_module(f, lowerupper, upperlower):
|
||||
def emit_conversions_module(f, to_upper, to_lower):
|
||||
f.write("pub mod conversions {")
|
||||
f.write("""
|
||||
use core::cmp::Ordering::{Equal, Less, Greater};
|
||||
@ -329,16 +329,16 @@ def emit_conversions_module(f, lowerupper, upperlower):
|
||||
use core::result::Result::{Ok, Err};
|
||||
|
||||
pub fn to_lower(c: char) -> char {
|
||||
match bsearch_case_table(c, LuLl_table) {
|
||||
match bsearch_case_table(c, to_lowercase_table) {
|
||||
None => c,
|
||||
Some(index) => LuLl_table[index].1
|
||||
Some(index) => to_lowercase_table[index].1
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_upper(c: char) -> char {
|
||||
match bsearch_case_table(c, LlLu_table) {
|
||||
match bsearch_case_table(c, to_uppercase_table) {
|
||||
None => c,
|
||||
Some(index) => LlLu_table[index].1
|
||||
Some(index) => to_uppercase_table[index].1
|
||||
}
|
||||
}
|
||||
|
||||
@ -354,10 +354,10 @@ def emit_conversions_module(f, lowerupper, upperlower):
|
||||
}
|
||||
|
||||
""")
|
||||
emit_table(f, "LuLl_table",
|
||||
sorted(upperlower.iteritems(), key=operator.itemgetter(0)), is_pub=False)
|
||||
emit_table(f, "LlLu_table",
|
||||
sorted(lowerupper.iteritems(), key=operator.itemgetter(0)), is_pub=False)
|
||||
emit_table(f, "to_lowercase_table",
|
||||
sorted(to_lower.iteritems(), key=operator.itemgetter(0)), is_pub=False)
|
||||
emit_table(f, "to_uppercase_table",
|
||||
sorted(to_upper.iteritems(), key=operator.itemgetter(0)), is_pub=False)
|
||||
f.write("}\n\n")
|
||||
|
||||
def emit_grapheme_module(f, grapheme_table, grapheme_cats):
|
||||
@ -591,7 +591,7 @@ if __name__ == "__main__":
|
||||
pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);
|
||||
""" % unicode_version)
|
||||
(canon_decomp, compat_decomp, gencats, combines,
|
||||
lowerupper, upperlower) = load_unicode_data("UnicodeData.txt")
|
||||
to_upper, to_lower) = load_unicode_data("UnicodeData.txt")
|
||||
want_derived = ["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase"]
|
||||
derived = load_properties("DerivedCoreProperties.txt", want_derived)
|
||||
scripts = load_properties("Scripts.txt", [])
|
||||
@ -611,7 +611,7 @@ pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);
|
||||
|
||||
# normalizations and conversions module
|
||||
emit_norm_module(rf, canon_decomp, compat_decomp, combines, norm_props)
|
||||
emit_conversions_module(rf, lowerupper, upperlower)
|
||||
emit_conversions_module(rf, to_upper, to_lower)
|
||||
|
||||
### character width module
|
||||
width_table = []
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user