Fix handling of upper/lowercase, and whitespace

This commit is contained in:
Florian Zeitz 2013-11-26 06:15:45 +01:00
parent c234614950
commit dfe38dbca4
5 changed files with 689 additions and 29 deletions

View File

@ -41,7 +41,7 @@ def load_unicode_data(f):
continue
[code, name, gencat, combine, bidi,
decomp, deci, digit, num, mirror,
old, iso, upcase, lowcsae, titlecase ] = fields
old, iso, upcase, lowcase, titlecase ] = fields
code = int(code, 16)
@ -89,11 +89,9 @@ def load_unicode_data(f):
return (canon_decomp, compat_decomp, gencats, combines)
def load_derived_core_properties(f):
def load_properties(f, interestingprops):
fetch(f)
derivedprops = {}
interestingprops = ["XID_Start", "XID_Continue", "Alphabetic"]
props = {}
re1 = re.compile("^([0-9A-F]+) +; (\w+)")
re2 = re.compile("^([0-9A-F]+)\.\.([0-9A-F]+) +; (\w+)")
@ -118,10 +116,10 @@ def load_derived_core_properties(f):
continue
d_lo = int(d_lo, 16)
d_hi = int(d_hi, 16)
if prop not in derivedprops:
derivedprops[prop] = []
derivedprops[prop].append((d_lo, d_hi))
return derivedprops
if prop not in props:
props[prop] = []
props[prop].append((d_lo, d_hi))
return props
def escape_char(c):
if c <= 0xff:
@ -376,5 +374,9 @@ emit_property_module(rf, "general_category", gencats)
emit_decomp_module(rf, canon_decomp, compat_decomp, combines)
derived = load_derived_core_properties("DerivedCoreProperties.txt")
derived = load_properties("DerivedCoreProperties.txt",
["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase"])
emit_property_module(rf, "derived_property", derived)
props = load_properties("PropList.txt", ["White_Space"])
emit_property_module(rf, "property", props)

View File

@ -14,7 +14,7 @@ use cast::transmute;
use option::{None, Option, Some};
use iter::{Iterator, range_step};
use str::StrSlice;
use unicode::{derived_property, general_category, decompose};
use unicode::{derived_property, property, general_category, decompose};
use to_str::ToStr;
use str;
@ -89,30 +89,28 @@ pub fn is_XID_continue(c: char) -> bool { derived_property::XID_Continue(c) }
///
/// Indicates whether a character is in lower case, defined
/// in terms of the Unicode General Category 'Ll'
/// in terms of the Unicode Derived Core Property 'Lowercase'.
///
#[inline]
pub fn is_lowercase(c: char) -> bool { general_category::Ll(c) }
pub fn is_lowercase(c: char) -> bool { derived_property::Lowercase(c) }
///
/// Indicates whether a character is in upper case, defined
/// in terms of the Unicode General Category 'Lu'.
/// in terms of the Unicode Derived Core Property 'Uppercase'.
///
#[inline]
pub fn is_uppercase(c: char) -> bool { general_category::Lu(c) }
pub fn is_uppercase(c: char) -> bool { derived_property::Uppercase(c) }
///
/// Indicates whether a character is whitespace. Whitespace is defined in
/// terms of the Unicode General Categories 'Zs', 'Zl', 'Zp'
/// additional 'Cc'-category control codes in the range [0x09, 0x0d]
/// terms of the Unicode Property 'White_Space'.
///
#[inline]
pub fn is_whitespace(c: char) -> bool {
// As an optimization ASCII whitespace characters are checked separately
c == ' '
|| ('\x09' <= c && c <= '\x0d')
|| general_category::Zs(c)
|| general_category::Zl(c)
|| general_category::Zp(c)
|| property::White_Space(c)
}
///

View File

@ -4087,6 +4087,640 @@ pub mod derived_property {
bsearch_range_table(c, Alphabetic_table)
}
static Lowercase_table : &'static [(char,char)] = &[
('\x61', '\x7a'), ('\xaa', '\xaa'),
('\xb5', '\xb5'), ('\xba', '\xba'),
('\xdf', '\xf6'), ('\xf8', '\xff'),
('\u0101', '\u0101'), ('\u0103', '\u0103'),
('\u0105', '\u0105'), ('\u0107', '\u0107'),
('\u0109', '\u0109'), ('\u010b', '\u010b'),
('\u010d', '\u010d'), ('\u010f', '\u010f'),
('\u0111', '\u0111'), ('\u0113', '\u0113'),
('\u0115', '\u0115'), ('\u0117', '\u0117'),
('\u0119', '\u0119'), ('\u011b', '\u011b'),
('\u011d', '\u011d'), ('\u011f', '\u011f'),
('\u0121', '\u0121'), ('\u0123', '\u0123'),
('\u0125', '\u0125'), ('\u0127', '\u0127'),
('\u0129', '\u0129'), ('\u012b', '\u012b'),
('\u012d', '\u012d'), ('\u012f', '\u012f'),
('\u0131', '\u0131'), ('\u0133', '\u0133'),
('\u0135', '\u0135'), ('\u0137', '\u0138'),
('\u013a', '\u013a'), ('\u013c', '\u013c'),
('\u013e', '\u013e'), ('\u0140', '\u0140'),
('\u0142', '\u0142'), ('\u0144', '\u0144'),
('\u0146', '\u0146'), ('\u0148', '\u0149'),
('\u014b', '\u014b'), ('\u014d', '\u014d'),
('\u014f', '\u014f'), ('\u0151', '\u0151'),
('\u0153', '\u0153'), ('\u0155', '\u0155'),
('\u0157', '\u0157'), ('\u0159', '\u0159'),
('\u015b', '\u015b'), ('\u015d', '\u015d'),
('\u015f', '\u015f'), ('\u0161', '\u0161'),
('\u0163', '\u0163'), ('\u0165', '\u0165'),
('\u0167', '\u0167'), ('\u0169', '\u0169'),
('\u016b', '\u016b'), ('\u016d', '\u016d'),
('\u016f', '\u016f'), ('\u0171', '\u0171'),
('\u0173', '\u0173'), ('\u0175', '\u0175'),
('\u0177', '\u0177'), ('\u017a', '\u017a'),
('\u017c', '\u017c'), ('\u017e', '\u0180'),
('\u0183', '\u0183'), ('\u0185', '\u0185'),
('\u0188', '\u0188'), ('\u018c', '\u018d'),
('\u0192', '\u0192'), ('\u0195', '\u0195'),
('\u0199', '\u019b'), ('\u019e', '\u019e'),
('\u01a1', '\u01a1'), ('\u01a3', '\u01a3'),
('\u01a5', '\u01a5'), ('\u01a8', '\u01a8'),
('\u01aa', '\u01ab'), ('\u01ad', '\u01ad'),
('\u01b0', '\u01b0'), ('\u01b4', '\u01b4'),
('\u01b6', '\u01b6'), ('\u01b9', '\u01ba'),
('\u01bd', '\u01bf'), ('\u01c6', '\u01c6'),
('\u01c9', '\u01c9'), ('\u01cc', '\u01cc'),
('\u01ce', '\u01ce'), ('\u01d0', '\u01d0'),
('\u01d2', '\u01d2'), ('\u01d4', '\u01d4'),
('\u01d6', '\u01d6'), ('\u01d8', '\u01d8'),
('\u01da', '\u01da'), ('\u01dc', '\u01dd'),
('\u01df', '\u01df'), ('\u01e1', '\u01e1'),
('\u01e3', '\u01e3'), ('\u01e5', '\u01e5'),
('\u01e7', '\u01e7'), ('\u01e9', '\u01e9'),
('\u01eb', '\u01eb'), ('\u01ed', '\u01ed'),
('\u01ef', '\u01f0'), ('\u01f3', '\u01f3'),
('\u01f5', '\u01f5'), ('\u01f9', '\u01f9'),
('\u01fb', '\u01fb'), ('\u01fd', '\u01fd'),
('\u01ff', '\u01ff'), ('\u0201', '\u0201'),
('\u0203', '\u0203'), ('\u0205', '\u0205'),
('\u0207', '\u0207'), ('\u0209', '\u0209'),
('\u020b', '\u020b'), ('\u020d', '\u020d'),
('\u020f', '\u020f'), ('\u0211', '\u0211'),
('\u0213', '\u0213'), ('\u0215', '\u0215'),
('\u0217', '\u0217'), ('\u0219', '\u0219'),
('\u021b', '\u021b'), ('\u021d', '\u021d'),
('\u021f', '\u021f'), ('\u0221', '\u0221'),
('\u0223', '\u0223'), ('\u0225', '\u0225'),
('\u0227', '\u0227'), ('\u0229', '\u0229'),
('\u022b', '\u022b'), ('\u022d', '\u022d'),
('\u022f', '\u022f'), ('\u0231', '\u0231'),
('\u0233', '\u0239'), ('\u023c', '\u023c'),
('\u023f', '\u0240'), ('\u0242', '\u0242'),
('\u0247', '\u0247'), ('\u0249', '\u0249'),
('\u024b', '\u024b'), ('\u024d', '\u024d'),
('\u024f', '\u0293'), ('\u0295', '\u02af'),
('\u02b0', '\u02b8'), ('\u02c0', '\u02c1'),
('\u02e0', '\u02e4'), ('\u0345', '\u0345'),
('\u0371', '\u0371'), ('\u0373', '\u0373'),
('\u0377', '\u0377'), ('\u037a', '\u037a'),
('\u037b', '\u037d'), ('\u0390', '\u0390'),
('\u03ac', '\u03ce'), ('\u03d0', '\u03d1'),
('\u03d5', '\u03d7'), ('\u03d9', '\u03d9'),
('\u03db', '\u03db'), ('\u03dd', '\u03dd'),
('\u03df', '\u03df'), ('\u03e1', '\u03e1'),
('\u03e3', '\u03e3'), ('\u03e5', '\u03e5'),
('\u03e7', '\u03e7'), ('\u03e9', '\u03e9'),
('\u03eb', '\u03eb'), ('\u03ed', '\u03ed'),
('\u03ef', '\u03f3'), ('\u03f5', '\u03f5'),
('\u03f8', '\u03f8'), ('\u03fb', '\u03fc'),
('\u0430', '\u045f'), ('\u0461', '\u0461'),
('\u0463', '\u0463'), ('\u0465', '\u0465'),
('\u0467', '\u0467'), ('\u0469', '\u0469'),
('\u046b', '\u046b'), ('\u046d', '\u046d'),
('\u046f', '\u046f'), ('\u0471', '\u0471'),
('\u0473', '\u0473'), ('\u0475', '\u0475'),
('\u0477', '\u0477'), ('\u0479', '\u0479'),
('\u047b', '\u047b'), ('\u047d', '\u047d'),
('\u047f', '\u047f'), ('\u0481', '\u0481'),
('\u048b', '\u048b'), ('\u048d', '\u048d'),
('\u048f', '\u048f'), ('\u0491', '\u0491'),
('\u0493', '\u0493'), ('\u0495', '\u0495'),
('\u0497', '\u0497'), ('\u0499', '\u0499'),
('\u049b', '\u049b'), ('\u049d', '\u049d'),
('\u049f', '\u049f'), ('\u04a1', '\u04a1'),
('\u04a3', '\u04a3'), ('\u04a5', '\u04a5'),
('\u04a7', '\u04a7'), ('\u04a9', '\u04a9'),
('\u04ab', '\u04ab'), ('\u04ad', '\u04ad'),
('\u04af', '\u04af'), ('\u04b1', '\u04b1'),
('\u04b3', '\u04b3'), ('\u04b5', '\u04b5'),
('\u04b7', '\u04b7'), ('\u04b9', '\u04b9'),
('\u04bb', '\u04bb'), ('\u04bd', '\u04bd'),
('\u04bf', '\u04bf'), ('\u04c2', '\u04c2'),
('\u04c4', '\u04c4'), ('\u04c6', '\u04c6'),
('\u04c8', '\u04c8'), ('\u04ca', '\u04ca'),
('\u04cc', '\u04cc'), ('\u04ce', '\u04cf'),
('\u04d1', '\u04d1'), ('\u04d3', '\u04d3'),
('\u04d5', '\u04d5'), ('\u04d7', '\u04d7'),
('\u04d9', '\u04d9'), ('\u04db', '\u04db'),
('\u04dd', '\u04dd'), ('\u04df', '\u04df'),
('\u04e1', '\u04e1'), ('\u04e3', '\u04e3'),
('\u04e5', '\u04e5'), ('\u04e7', '\u04e7'),
('\u04e9', '\u04e9'), ('\u04eb', '\u04eb'),
('\u04ed', '\u04ed'), ('\u04ef', '\u04ef'),
('\u04f1', '\u04f1'), ('\u04f3', '\u04f3'),
('\u04f5', '\u04f5'), ('\u04f7', '\u04f7'),
('\u04f9', '\u04f9'), ('\u04fb', '\u04fb'),
('\u04fd', '\u04fd'), ('\u04ff', '\u04ff'),
('\u0501', '\u0501'), ('\u0503', '\u0503'),
('\u0505', '\u0505'), ('\u0507', '\u0507'),
('\u0509', '\u0509'), ('\u050b', '\u050b'),
('\u050d', '\u050d'), ('\u050f', '\u050f'),
('\u0511', '\u0511'), ('\u0513', '\u0513'),
('\u0515', '\u0515'), ('\u0517', '\u0517'),
('\u0519', '\u0519'), ('\u051b', '\u051b'),
('\u051d', '\u051d'), ('\u051f', '\u051f'),
('\u0521', '\u0521'), ('\u0523', '\u0523'),
('\u0525', '\u0525'), ('\u0527', '\u0527'),
('\u0561', '\u0587'), ('\u1d00', '\u1d2b'),
('\u1d2c', '\u1d6a'), ('\u1d6b', '\u1d77'),
('\u1d78', '\u1d78'), ('\u1d79', '\u1d9a'),
('\u1d9b', '\u1dbf'), ('\u1e01', '\u1e01'),
('\u1e03', '\u1e03'), ('\u1e05', '\u1e05'),
('\u1e07', '\u1e07'), ('\u1e09', '\u1e09'),
('\u1e0b', '\u1e0b'), ('\u1e0d', '\u1e0d'),
('\u1e0f', '\u1e0f'), ('\u1e11', '\u1e11'),
('\u1e13', '\u1e13'), ('\u1e15', '\u1e15'),
('\u1e17', '\u1e17'), ('\u1e19', '\u1e19'),
('\u1e1b', '\u1e1b'), ('\u1e1d', '\u1e1d'),
('\u1e1f', '\u1e1f'), ('\u1e21', '\u1e21'),
('\u1e23', '\u1e23'), ('\u1e25', '\u1e25'),
('\u1e27', '\u1e27'), ('\u1e29', '\u1e29'),
('\u1e2b', '\u1e2b'), ('\u1e2d', '\u1e2d'),
('\u1e2f', '\u1e2f'), ('\u1e31', '\u1e31'),
('\u1e33', '\u1e33'), ('\u1e35', '\u1e35'),
('\u1e37', '\u1e37'), ('\u1e39', '\u1e39'),
('\u1e3b', '\u1e3b'), ('\u1e3d', '\u1e3d'),
('\u1e3f', '\u1e3f'), ('\u1e41', '\u1e41'),
('\u1e43', '\u1e43'), ('\u1e45', '\u1e45'),
('\u1e47', '\u1e47'), ('\u1e49', '\u1e49'),
('\u1e4b', '\u1e4b'), ('\u1e4d', '\u1e4d'),
('\u1e4f', '\u1e4f'), ('\u1e51', '\u1e51'),
('\u1e53', '\u1e53'), ('\u1e55', '\u1e55'),
('\u1e57', '\u1e57'), ('\u1e59', '\u1e59'),
('\u1e5b', '\u1e5b'), ('\u1e5d', '\u1e5d'),
('\u1e5f', '\u1e5f'), ('\u1e61', '\u1e61'),
('\u1e63', '\u1e63'), ('\u1e65', '\u1e65'),
('\u1e67', '\u1e67'), ('\u1e69', '\u1e69'),
('\u1e6b', '\u1e6b'), ('\u1e6d', '\u1e6d'),
('\u1e6f', '\u1e6f'), ('\u1e71', '\u1e71'),
('\u1e73', '\u1e73'), ('\u1e75', '\u1e75'),
('\u1e77', '\u1e77'), ('\u1e79', '\u1e79'),
('\u1e7b', '\u1e7b'), ('\u1e7d', '\u1e7d'),
('\u1e7f', '\u1e7f'), ('\u1e81', '\u1e81'),
('\u1e83', '\u1e83'), ('\u1e85', '\u1e85'),
('\u1e87', '\u1e87'), ('\u1e89', '\u1e89'),
('\u1e8b', '\u1e8b'), ('\u1e8d', '\u1e8d'),
('\u1e8f', '\u1e8f'), ('\u1e91', '\u1e91'),
('\u1e93', '\u1e93'), ('\u1e95', '\u1e9d'),
('\u1e9f', '\u1e9f'), ('\u1ea1', '\u1ea1'),
('\u1ea3', '\u1ea3'), ('\u1ea5', '\u1ea5'),
('\u1ea7', '\u1ea7'), ('\u1ea9', '\u1ea9'),
('\u1eab', '\u1eab'), ('\u1ead', '\u1ead'),
('\u1eaf', '\u1eaf'), ('\u1eb1', '\u1eb1'),
('\u1eb3', '\u1eb3'), ('\u1eb5', '\u1eb5'),
('\u1eb7', '\u1eb7'), ('\u1eb9', '\u1eb9'),
('\u1ebb', '\u1ebb'), ('\u1ebd', '\u1ebd'),
('\u1ebf', '\u1ebf'), ('\u1ec1', '\u1ec1'),
('\u1ec3', '\u1ec3'), ('\u1ec5', '\u1ec5'),
('\u1ec7', '\u1ec7'), ('\u1ec9', '\u1ec9'),
('\u1ecb', '\u1ecb'), ('\u1ecd', '\u1ecd'),
('\u1ecf', '\u1ecf'), ('\u1ed1', '\u1ed1'),
('\u1ed3', '\u1ed3'), ('\u1ed5', '\u1ed5'),
('\u1ed7', '\u1ed7'), ('\u1ed9', '\u1ed9'),
('\u1edb', '\u1edb'), ('\u1edd', '\u1edd'),
('\u1edf', '\u1edf'), ('\u1ee1', '\u1ee1'),
('\u1ee3', '\u1ee3'), ('\u1ee5', '\u1ee5'),
('\u1ee7', '\u1ee7'), ('\u1ee9', '\u1ee9'),
('\u1eeb', '\u1eeb'), ('\u1eed', '\u1eed'),
('\u1eef', '\u1eef'), ('\u1ef1', '\u1ef1'),
('\u1ef3', '\u1ef3'), ('\u1ef5', '\u1ef5'),
('\u1ef7', '\u1ef7'), ('\u1ef9', '\u1ef9'),
('\u1efb', '\u1efb'), ('\u1efd', '\u1efd'),
('\u1eff', '\u1f07'), ('\u1f10', '\u1f15'),
('\u1f20', '\u1f27'), ('\u1f30', '\u1f37'),
('\u1f40', '\u1f45'), ('\u1f50', '\u1f57'),
('\u1f60', '\u1f67'), ('\u1f70', '\u1f7d'),
('\u1f80', '\u1f87'), ('\u1f90', '\u1f97'),
('\u1fa0', '\u1fa7'), ('\u1fb0', '\u1fb4'),
('\u1fb6', '\u1fb7'), ('\u1fbe', '\u1fbe'),
('\u1fc2', '\u1fc4'), ('\u1fc6', '\u1fc7'),
('\u1fd0', '\u1fd3'), ('\u1fd6', '\u1fd7'),
('\u1fe0', '\u1fe7'), ('\u1ff2', '\u1ff4'),
('\u1ff6', '\u1ff7'), ('\u2071', '\u2071'),
('\u207f', '\u207f'), ('\u2090', '\u209c'),
('\u210a', '\u210a'), ('\u210e', '\u210f'),
('\u2113', '\u2113'), ('\u212f', '\u212f'),
('\u2134', '\u2134'), ('\u2139', '\u2139'),
('\u213c', '\u213d'), ('\u2146', '\u2149'),
('\u214e', '\u214e'), ('\u2170', '\u217f'),
('\u2184', '\u2184'), ('\u24d0', '\u24e9'),
('\u2c30', '\u2c5e'), ('\u2c61', '\u2c61'),
('\u2c65', '\u2c66'), ('\u2c68', '\u2c68'),
('\u2c6a', '\u2c6a'), ('\u2c6c', '\u2c6c'),
('\u2c71', '\u2c71'), ('\u2c73', '\u2c74'),
('\u2c76', '\u2c7b'), ('\u2c7c', '\u2c7d'),
('\u2c81', '\u2c81'), ('\u2c83', '\u2c83'),
('\u2c85', '\u2c85'), ('\u2c87', '\u2c87'),
('\u2c89', '\u2c89'), ('\u2c8b', '\u2c8b'),
('\u2c8d', '\u2c8d'), ('\u2c8f', '\u2c8f'),
('\u2c91', '\u2c91'), ('\u2c93', '\u2c93'),
('\u2c95', '\u2c95'), ('\u2c97', '\u2c97'),
('\u2c99', '\u2c99'), ('\u2c9b', '\u2c9b'),
('\u2c9d', '\u2c9d'), ('\u2c9f', '\u2c9f'),
('\u2ca1', '\u2ca1'), ('\u2ca3', '\u2ca3'),
('\u2ca5', '\u2ca5'), ('\u2ca7', '\u2ca7'),
('\u2ca9', '\u2ca9'), ('\u2cab', '\u2cab'),
('\u2cad', '\u2cad'), ('\u2caf', '\u2caf'),
('\u2cb1', '\u2cb1'), ('\u2cb3', '\u2cb3'),
('\u2cb5', '\u2cb5'), ('\u2cb7', '\u2cb7'),
('\u2cb9', '\u2cb9'), ('\u2cbb', '\u2cbb'),
('\u2cbd', '\u2cbd'), ('\u2cbf', '\u2cbf'),
('\u2cc1', '\u2cc1'), ('\u2cc3', '\u2cc3'),
('\u2cc5', '\u2cc5'), ('\u2cc7', '\u2cc7'),
('\u2cc9', '\u2cc9'), ('\u2ccb', '\u2ccb'),
('\u2ccd', '\u2ccd'), ('\u2ccf', '\u2ccf'),
('\u2cd1', '\u2cd1'), ('\u2cd3', '\u2cd3'),
('\u2cd5', '\u2cd5'), ('\u2cd7', '\u2cd7'),
('\u2cd9', '\u2cd9'), ('\u2cdb', '\u2cdb'),
('\u2cdd', '\u2cdd'), ('\u2cdf', '\u2cdf'),
('\u2ce1', '\u2ce1'), ('\u2ce3', '\u2ce4'),
('\u2cec', '\u2cec'), ('\u2cee', '\u2cee'),
('\u2cf3', '\u2cf3'), ('\u2d00', '\u2d25'),
('\u2d27', '\u2d27'), ('\u2d2d', '\u2d2d'),
('\ua641', '\ua641'), ('\ua643', '\ua643'),
('\ua645', '\ua645'), ('\ua647', '\ua647'),
('\ua649', '\ua649'), ('\ua64b', '\ua64b'),
('\ua64d', '\ua64d'), ('\ua64f', '\ua64f'),
('\ua651', '\ua651'), ('\ua653', '\ua653'),
('\ua655', '\ua655'), ('\ua657', '\ua657'),
('\ua659', '\ua659'), ('\ua65b', '\ua65b'),
('\ua65d', '\ua65d'), ('\ua65f', '\ua65f'),
('\ua661', '\ua661'), ('\ua663', '\ua663'),
('\ua665', '\ua665'), ('\ua667', '\ua667'),
('\ua669', '\ua669'), ('\ua66b', '\ua66b'),
('\ua66d', '\ua66d'), ('\ua681', '\ua681'),
('\ua683', '\ua683'), ('\ua685', '\ua685'),
('\ua687', '\ua687'), ('\ua689', '\ua689'),
('\ua68b', '\ua68b'), ('\ua68d', '\ua68d'),
('\ua68f', '\ua68f'), ('\ua691', '\ua691'),
('\ua693', '\ua693'), ('\ua695', '\ua695'),
('\ua697', '\ua697'), ('\ua723', '\ua723'),
('\ua725', '\ua725'), ('\ua727', '\ua727'),
('\ua729', '\ua729'), ('\ua72b', '\ua72b'),
('\ua72d', '\ua72d'), ('\ua72f', '\ua731'),
('\ua733', '\ua733'), ('\ua735', '\ua735'),
('\ua737', '\ua737'), ('\ua739', '\ua739'),
('\ua73b', '\ua73b'), ('\ua73d', '\ua73d'),
('\ua73f', '\ua73f'), ('\ua741', '\ua741'),
('\ua743', '\ua743'), ('\ua745', '\ua745'),
('\ua747', '\ua747'), ('\ua749', '\ua749'),
('\ua74b', '\ua74b'), ('\ua74d', '\ua74d'),
('\ua74f', '\ua74f'), ('\ua751', '\ua751'),
('\ua753', '\ua753'), ('\ua755', '\ua755'),
('\ua757', '\ua757'), ('\ua759', '\ua759'),
('\ua75b', '\ua75b'), ('\ua75d', '\ua75d'),
('\ua75f', '\ua75f'), ('\ua761', '\ua761'),
('\ua763', '\ua763'), ('\ua765', '\ua765'),
('\ua767', '\ua767'), ('\ua769', '\ua769'),
('\ua76b', '\ua76b'), ('\ua76d', '\ua76d'),
('\ua76f', '\ua76f'), ('\ua770', '\ua770'),
('\ua771', '\ua778'), ('\ua77a', '\ua77a'),
('\ua77c', '\ua77c'), ('\ua77f', '\ua77f'),
('\ua781', '\ua781'), ('\ua783', '\ua783'),
('\ua785', '\ua785'), ('\ua787', '\ua787'),
('\ua78c', '\ua78c'), ('\ua78e', '\ua78e'),
('\ua791', '\ua791'), ('\ua793', '\ua793'),
('\ua7a1', '\ua7a1'), ('\ua7a3', '\ua7a3'),
('\ua7a5', '\ua7a5'), ('\ua7a7', '\ua7a7'),
('\ua7a9', '\ua7a9'), ('\ua7f8', '\ua7f9'),
('\ua7fa', '\ua7fa'), ('\ufb00', '\ufb06'),
('\ufb13', '\ufb17'), ('\uff41', '\uff5a'),
('\U00010428', '\U0001044f'), ('\U0001d41a', '\U0001d433'),
('\U0001d44e', '\U0001d454'), ('\U0001d456', '\U0001d467'),
('\U0001d482', '\U0001d49b'), ('\U0001d4b6', '\U0001d4b9'),
('\U0001d4bb', '\U0001d4bb'), ('\U0001d4bd', '\U0001d4c3'),
('\U0001d4c5', '\U0001d4cf'), ('\U0001d4ea', '\U0001d503'),
('\U0001d51e', '\U0001d537'), ('\U0001d552', '\U0001d56b'),
('\U0001d586', '\U0001d59f'), ('\U0001d5ba', '\U0001d5d3'),
('\U0001d5ee', '\U0001d607'), ('\U0001d622', '\U0001d63b'),
('\U0001d656', '\U0001d66f'), ('\U0001d68a', '\U0001d6a5'),
('\U0001d6c2', '\U0001d6da'), ('\U0001d6dc', '\U0001d6e1'),
('\U0001d6fc', '\U0001d714'), ('\U0001d716', '\U0001d71b'),
('\U0001d736', '\U0001d74e'), ('\U0001d750', '\U0001d755'),
('\U0001d770', '\U0001d788'), ('\U0001d78a', '\U0001d78f'),
('\U0001d7aa', '\U0001d7c2'), ('\U0001d7c4', '\U0001d7c9'),
('\U0001d7cb', '\U0001d7cb')
];
pub fn Lowercase(c: char) -> bool {
bsearch_range_table(c, Lowercase_table)
}
static Uppercase_table : &'static [(char,char)] = &[
('\x41', '\x5a'), ('\xc0', '\xd6'),
('\xd8', '\xde'), ('\u0100', '\u0100'),
('\u0102', '\u0102'), ('\u0104', '\u0104'),
('\u0106', '\u0106'), ('\u0108', '\u0108'),
('\u010a', '\u010a'), ('\u010c', '\u010c'),
('\u010e', '\u010e'), ('\u0110', '\u0110'),
('\u0112', '\u0112'), ('\u0114', '\u0114'),
('\u0116', '\u0116'), ('\u0118', '\u0118'),
('\u011a', '\u011a'), ('\u011c', '\u011c'),
('\u011e', '\u011e'), ('\u0120', '\u0120'),
('\u0122', '\u0122'), ('\u0124', '\u0124'),
('\u0126', '\u0126'), ('\u0128', '\u0128'),
('\u012a', '\u012a'), ('\u012c', '\u012c'),
('\u012e', '\u012e'), ('\u0130', '\u0130'),
('\u0132', '\u0132'), ('\u0134', '\u0134'),
('\u0136', '\u0136'), ('\u0139', '\u0139'),
('\u013b', '\u013b'), ('\u013d', '\u013d'),
('\u013f', '\u013f'), ('\u0141', '\u0141'),
('\u0143', '\u0143'), ('\u0145', '\u0145'),
('\u0147', '\u0147'), ('\u014a', '\u014a'),
('\u014c', '\u014c'), ('\u014e', '\u014e'),
('\u0150', '\u0150'), ('\u0152', '\u0152'),
('\u0154', '\u0154'), ('\u0156', '\u0156'),
('\u0158', '\u0158'), ('\u015a', '\u015a'),
('\u015c', '\u015c'), ('\u015e', '\u015e'),
('\u0160', '\u0160'), ('\u0162', '\u0162'),
('\u0164', '\u0164'), ('\u0166', '\u0166'),
('\u0168', '\u0168'), ('\u016a', '\u016a'),
('\u016c', '\u016c'), ('\u016e', '\u016e'),
('\u0170', '\u0170'), ('\u0172', '\u0172'),
('\u0174', '\u0174'), ('\u0176', '\u0176'),
('\u0178', '\u0179'), ('\u017b', '\u017b'),
('\u017d', '\u017d'), ('\u0181', '\u0182'),
('\u0184', '\u0184'), ('\u0186', '\u0187'),
('\u0189', '\u018b'), ('\u018e', '\u0191'),
('\u0193', '\u0194'), ('\u0196', '\u0198'),
('\u019c', '\u019d'), ('\u019f', '\u01a0'),
('\u01a2', '\u01a2'), ('\u01a4', '\u01a4'),
('\u01a6', '\u01a7'), ('\u01a9', '\u01a9'),
('\u01ac', '\u01ac'), ('\u01ae', '\u01af'),
('\u01b1', '\u01b3'), ('\u01b5', '\u01b5'),
('\u01b7', '\u01b8'), ('\u01bc', '\u01bc'),
('\u01c4', '\u01c4'), ('\u01c7', '\u01c7'),
('\u01ca', '\u01ca'), ('\u01cd', '\u01cd'),
('\u01cf', '\u01cf'), ('\u01d1', '\u01d1'),
('\u01d3', '\u01d3'), ('\u01d5', '\u01d5'),
('\u01d7', '\u01d7'), ('\u01d9', '\u01d9'),
('\u01db', '\u01db'), ('\u01de', '\u01de'),
('\u01e0', '\u01e0'), ('\u01e2', '\u01e2'),
('\u01e4', '\u01e4'), ('\u01e6', '\u01e6'),
('\u01e8', '\u01e8'), ('\u01ea', '\u01ea'),
('\u01ec', '\u01ec'), ('\u01ee', '\u01ee'),
('\u01f1', '\u01f1'), ('\u01f4', '\u01f4'),
('\u01f6', '\u01f8'), ('\u01fa', '\u01fa'),
('\u01fc', '\u01fc'), ('\u01fe', '\u01fe'),
('\u0200', '\u0200'), ('\u0202', '\u0202'),
('\u0204', '\u0204'), ('\u0206', '\u0206'),
('\u0208', '\u0208'), ('\u020a', '\u020a'),
('\u020c', '\u020c'), ('\u020e', '\u020e'),
('\u0210', '\u0210'), ('\u0212', '\u0212'),
('\u0214', '\u0214'), ('\u0216', '\u0216'),
('\u0218', '\u0218'), ('\u021a', '\u021a'),
('\u021c', '\u021c'), ('\u021e', '\u021e'),
('\u0220', '\u0220'), ('\u0222', '\u0222'),
('\u0224', '\u0224'), ('\u0226', '\u0226'),
('\u0228', '\u0228'), ('\u022a', '\u022a'),
('\u022c', '\u022c'), ('\u022e', '\u022e'),
('\u0230', '\u0230'), ('\u0232', '\u0232'),
('\u023a', '\u023b'), ('\u023d', '\u023e'),
('\u0241', '\u0241'), ('\u0243', '\u0246'),
('\u0248', '\u0248'), ('\u024a', '\u024a'),
('\u024c', '\u024c'), ('\u024e', '\u024e'),
('\u0370', '\u0370'), ('\u0372', '\u0372'),
('\u0376', '\u0376'), ('\u0386', '\u0386'),
('\u0388', '\u038a'), ('\u038c', '\u038c'),
('\u038e', '\u038f'), ('\u0391', '\u03a1'),
('\u03a3', '\u03ab'), ('\u03cf', '\u03cf'),
('\u03d2', '\u03d4'), ('\u03d8', '\u03d8'),
('\u03da', '\u03da'), ('\u03dc', '\u03dc'),
('\u03de', '\u03de'), ('\u03e0', '\u03e0'),
('\u03e2', '\u03e2'), ('\u03e4', '\u03e4'),
('\u03e6', '\u03e6'), ('\u03e8', '\u03e8'),
('\u03ea', '\u03ea'), ('\u03ec', '\u03ec'),
('\u03ee', '\u03ee'), ('\u03f4', '\u03f4'),
('\u03f7', '\u03f7'), ('\u03f9', '\u03fa'),
('\u03fd', '\u042f'), ('\u0460', '\u0460'),
('\u0462', '\u0462'), ('\u0464', '\u0464'),
('\u0466', '\u0466'), ('\u0468', '\u0468'),
('\u046a', '\u046a'), ('\u046c', '\u046c'),
('\u046e', '\u046e'), ('\u0470', '\u0470'),
('\u0472', '\u0472'), ('\u0474', '\u0474'),
('\u0476', '\u0476'), ('\u0478', '\u0478'),
('\u047a', '\u047a'), ('\u047c', '\u047c'),
('\u047e', '\u047e'), ('\u0480', '\u0480'),
('\u048a', '\u048a'), ('\u048c', '\u048c'),
('\u048e', '\u048e'), ('\u0490', '\u0490'),
('\u0492', '\u0492'), ('\u0494', '\u0494'),
('\u0496', '\u0496'), ('\u0498', '\u0498'),
('\u049a', '\u049a'), ('\u049c', '\u049c'),
('\u049e', '\u049e'), ('\u04a0', '\u04a0'),
('\u04a2', '\u04a2'), ('\u04a4', '\u04a4'),
('\u04a6', '\u04a6'), ('\u04a8', '\u04a8'),
('\u04aa', '\u04aa'), ('\u04ac', '\u04ac'),
('\u04ae', '\u04ae'), ('\u04b0', '\u04b0'),
('\u04b2', '\u04b2'), ('\u04b4', '\u04b4'),
('\u04b6', '\u04b6'), ('\u04b8', '\u04b8'),
('\u04ba', '\u04ba'), ('\u04bc', '\u04bc'),
('\u04be', '\u04be'), ('\u04c0', '\u04c1'),
('\u04c3', '\u04c3'), ('\u04c5', '\u04c5'),
('\u04c7', '\u04c7'), ('\u04c9', '\u04c9'),
('\u04cb', '\u04cb'), ('\u04cd', '\u04cd'),
('\u04d0', '\u04d0'), ('\u04d2', '\u04d2'),
('\u04d4', '\u04d4'), ('\u04d6', '\u04d6'),
('\u04d8', '\u04d8'), ('\u04da', '\u04da'),
('\u04dc', '\u04dc'), ('\u04de', '\u04de'),
('\u04e0', '\u04e0'), ('\u04e2', '\u04e2'),
('\u04e4', '\u04e4'), ('\u04e6', '\u04e6'),
('\u04e8', '\u04e8'), ('\u04ea', '\u04ea'),
('\u04ec', '\u04ec'), ('\u04ee', '\u04ee'),
('\u04f0', '\u04f0'), ('\u04f2', '\u04f2'),
('\u04f4', '\u04f4'), ('\u04f6', '\u04f6'),
('\u04f8', '\u04f8'), ('\u04fa', '\u04fa'),
('\u04fc', '\u04fc'), ('\u04fe', '\u04fe'),
('\u0500', '\u0500'), ('\u0502', '\u0502'),
('\u0504', '\u0504'), ('\u0506', '\u0506'),
('\u0508', '\u0508'), ('\u050a', '\u050a'),
('\u050c', '\u050c'), ('\u050e', '\u050e'),
('\u0510', '\u0510'), ('\u0512', '\u0512'),
('\u0514', '\u0514'), ('\u0516', '\u0516'),
('\u0518', '\u0518'), ('\u051a', '\u051a'),
('\u051c', '\u051c'), ('\u051e', '\u051e'),
('\u0520', '\u0520'), ('\u0522', '\u0522'),
('\u0524', '\u0524'), ('\u0526', '\u0526'),
('\u0531', '\u0556'), ('\u10a0', '\u10c5'),
('\u10c7', '\u10c7'), ('\u10cd', '\u10cd'),
('\u1e00', '\u1e00'), ('\u1e02', '\u1e02'),
('\u1e04', '\u1e04'), ('\u1e06', '\u1e06'),
('\u1e08', '\u1e08'), ('\u1e0a', '\u1e0a'),
('\u1e0c', '\u1e0c'), ('\u1e0e', '\u1e0e'),
('\u1e10', '\u1e10'), ('\u1e12', '\u1e12'),
('\u1e14', '\u1e14'), ('\u1e16', '\u1e16'),
('\u1e18', '\u1e18'), ('\u1e1a', '\u1e1a'),
('\u1e1c', '\u1e1c'), ('\u1e1e', '\u1e1e'),
('\u1e20', '\u1e20'), ('\u1e22', '\u1e22'),
('\u1e24', '\u1e24'), ('\u1e26', '\u1e26'),
('\u1e28', '\u1e28'), ('\u1e2a', '\u1e2a'),
('\u1e2c', '\u1e2c'), ('\u1e2e', '\u1e2e'),
('\u1e30', '\u1e30'), ('\u1e32', '\u1e32'),
('\u1e34', '\u1e34'), ('\u1e36', '\u1e36'),
('\u1e38', '\u1e38'), ('\u1e3a', '\u1e3a'),
('\u1e3c', '\u1e3c'), ('\u1e3e', '\u1e3e'),
('\u1e40', '\u1e40'), ('\u1e42', '\u1e42'),
('\u1e44', '\u1e44'), ('\u1e46', '\u1e46'),
('\u1e48', '\u1e48'), ('\u1e4a', '\u1e4a'),
('\u1e4c', '\u1e4c'), ('\u1e4e', '\u1e4e'),
('\u1e50', '\u1e50'), ('\u1e52', '\u1e52'),
('\u1e54', '\u1e54'), ('\u1e56', '\u1e56'),
('\u1e58', '\u1e58'), ('\u1e5a', '\u1e5a'),
('\u1e5c', '\u1e5c'), ('\u1e5e', '\u1e5e'),
('\u1e60', '\u1e60'), ('\u1e62', '\u1e62'),
('\u1e64', '\u1e64'), ('\u1e66', '\u1e66'),
('\u1e68', '\u1e68'), ('\u1e6a', '\u1e6a'),
('\u1e6c', '\u1e6c'), ('\u1e6e', '\u1e6e'),
('\u1e70', '\u1e70'), ('\u1e72', '\u1e72'),
('\u1e74', '\u1e74'), ('\u1e76', '\u1e76'),
('\u1e78', '\u1e78'), ('\u1e7a', '\u1e7a'),
('\u1e7c', '\u1e7c'), ('\u1e7e', '\u1e7e'),
('\u1e80', '\u1e80'), ('\u1e82', '\u1e82'),
('\u1e84', '\u1e84'), ('\u1e86', '\u1e86'),
('\u1e88', '\u1e88'), ('\u1e8a', '\u1e8a'),
('\u1e8c', '\u1e8c'), ('\u1e8e', '\u1e8e'),
('\u1e90', '\u1e90'), ('\u1e92', '\u1e92'),
('\u1e94', '\u1e94'), ('\u1e9e', '\u1e9e'),
('\u1ea0', '\u1ea0'), ('\u1ea2', '\u1ea2'),
('\u1ea4', '\u1ea4'), ('\u1ea6', '\u1ea6'),
('\u1ea8', '\u1ea8'), ('\u1eaa', '\u1eaa'),
('\u1eac', '\u1eac'), ('\u1eae', '\u1eae'),
('\u1eb0', '\u1eb0'), ('\u1eb2', '\u1eb2'),
('\u1eb4', '\u1eb4'), ('\u1eb6', '\u1eb6'),
('\u1eb8', '\u1eb8'), ('\u1eba', '\u1eba'),
('\u1ebc', '\u1ebc'), ('\u1ebe', '\u1ebe'),
('\u1ec0', '\u1ec0'), ('\u1ec2', '\u1ec2'),
('\u1ec4', '\u1ec4'), ('\u1ec6', '\u1ec6'),
('\u1ec8', '\u1ec8'), ('\u1eca', '\u1eca'),
('\u1ecc', '\u1ecc'), ('\u1ece', '\u1ece'),
('\u1ed0', '\u1ed0'), ('\u1ed2', '\u1ed2'),
('\u1ed4', '\u1ed4'), ('\u1ed6', '\u1ed6'),
('\u1ed8', '\u1ed8'), ('\u1eda', '\u1eda'),
('\u1edc', '\u1edc'), ('\u1ede', '\u1ede'),
('\u1ee0', '\u1ee0'), ('\u1ee2', '\u1ee2'),
('\u1ee4', '\u1ee4'), ('\u1ee6', '\u1ee6'),
('\u1ee8', '\u1ee8'), ('\u1eea', '\u1eea'),
('\u1eec', '\u1eec'), ('\u1eee', '\u1eee'),
('\u1ef0', '\u1ef0'), ('\u1ef2', '\u1ef2'),
('\u1ef4', '\u1ef4'), ('\u1ef6', '\u1ef6'),
('\u1ef8', '\u1ef8'), ('\u1efa', '\u1efa'),
('\u1efc', '\u1efc'), ('\u1efe', '\u1efe'),
('\u1f08', '\u1f0f'), ('\u1f18', '\u1f1d'),
('\u1f28', '\u1f2f'), ('\u1f38', '\u1f3f'),
('\u1f48', '\u1f4d'), ('\u1f59', '\u1f59'),
('\u1f5b', '\u1f5b'), ('\u1f5d', '\u1f5d'),
('\u1f5f', '\u1f5f'), ('\u1f68', '\u1f6f'),
('\u1fb8', '\u1fbb'), ('\u1fc8', '\u1fcb'),
('\u1fd8', '\u1fdb'), ('\u1fe8', '\u1fec'),
('\u1ff8', '\u1ffb'), ('\u2102', '\u2102'),
('\u2107', '\u2107'), ('\u210b', '\u210d'),
('\u2110', '\u2112'), ('\u2115', '\u2115'),
('\u2119', '\u211d'), ('\u2124', '\u2124'),
('\u2126', '\u2126'), ('\u2128', '\u2128'),
('\u212a', '\u212d'), ('\u2130', '\u2133'),
('\u213e', '\u213f'), ('\u2145', '\u2145'),
('\u2160', '\u216f'), ('\u2183', '\u2183'),
('\u24b6', '\u24cf'), ('\u2c00', '\u2c2e'),
('\u2c60', '\u2c60'), ('\u2c62', '\u2c64'),
('\u2c67', '\u2c67'), ('\u2c69', '\u2c69'),
('\u2c6b', '\u2c6b'), ('\u2c6d', '\u2c70'),
('\u2c72', '\u2c72'), ('\u2c75', '\u2c75'),
('\u2c7e', '\u2c80'), ('\u2c82', '\u2c82'),
('\u2c84', '\u2c84'), ('\u2c86', '\u2c86'),
('\u2c88', '\u2c88'), ('\u2c8a', '\u2c8a'),
('\u2c8c', '\u2c8c'), ('\u2c8e', '\u2c8e'),
('\u2c90', '\u2c90'), ('\u2c92', '\u2c92'),
('\u2c94', '\u2c94'), ('\u2c96', '\u2c96'),
('\u2c98', '\u2c98'), ('\u2c9a', '\u2c9a'),
('\u2c9c', '\u2c9c'), ('\u2c9e', '\u2c9e'),
('\u2ca0', '\u2ca0'), ('\u2ca2', '\u2ca2'),
('\u2ca4', '\u2ca4'), ('\u2ca6', '\u2ca6'),
('\u2ca8', '\u2ca8'), ('\u2caa', '\u2caa'),
('\u2cac', '\u2cac'), ('\u2cae', '\u2cae'),
('\u2cb0', '\u2cb0'), ('\u2cb2', '\u2cb2'),
('\u2cb4', '\u2cb4'), ('\u2cb6', '\u2cb6'),
('\u2cb8', '\u2cb8'), ('\u2cba', '\u2cba'),
('\u2cbc', '\u2cbc'), ('\u2cbe', '\u2cbe'),
('\u2cc0', '\u2cc0'), ('\u2cc2', '\u2cc2'),
('\u2cc4', '\u2cc4'), ('\u2cc6', '\u2cc6'),
('\u2cc8', '\u2cc8'), ('\u2cca', '\u2cca'),
('\u2ccc', '\u2ccc'), ('\u2cce', '\u2cce'),
('\u2cd0', '\u2cd0'), ('\u2cd2', '\u2cd2'),
('\u2cd4', '\u2cd4'), ('\u2cd6', '\u2cd6'),
('\u2cd8', '\u2cd8'), ('\u2cda', '\u2cda'),
('\u2cdc', '\u2cdc'), ('\u2cde', '\u2cde'),
('\u2ce0', '\u2ce0'), ('\u2ce2', '\u2ce2'),
('\u2ceb', '\u2ceb'), ('\u2ced', '\u2ced'),
('\u2cf2', '\u2cf2'), ('\ua640', '\ua640'),
('\ua642', '\ua642'), ('\ua644', '\ua644'),
('\ua646', '\ua646'), ('\ua648', '\ua648'),
('\ua64a', '\ua64a'), ('\ua64c', '\ua64c'),
('\ua64e', '\ua64e'), ('\ua650', '\ua650'),
('\ua652', '\ua652'), ('\ua654', '\ua654'),
('\ua656', '\ua656'), ('\ua658', '\ua658'),
('\ua65a', '\ua65a'), ('\ua65c', '\ua65c'),
('\ua65e', '\ua65e'), ('\ua660', '\ua660'),
('\ua662', '\ua662'), ('\ua664', '\ua664'),
('\ua666', '\ua666'), ('\ua668', '\ua668'),
('\ua66a', '\ua66a'), ('\ua66c', '\ua66c'),
('\ua680', '\ua680'), ('\ua682', '\ua682'),
('\ua684', '\ua684'), ('\ua686', '\ua686'),
('\ua688', '\ua688'), ('\ua68a', '\ua68a'),
('\ua68c', '\ua68c'), ('\ua68e', '\ua68e'),
('\ua690', '\ua690'), ('\ua692', '\ua692'),
('\ua694', '\ua694'), ('\ua696', '\ua696'),
('\ua722', '\ua722'), ('\ua724', '\ua724'),
('\ua726', '\ua726'), ('\ua728', '\ua728'),
('\ua72a', '\ua72a'), ('\ua72c', '\ua72c'),
('\ua72e', '\ua72e'), ('\ua732', '\ua732'),
('\ua734', '\ua734'), ('\ua736', '\ua736'),
('\ua738', '\ua738'), ('\ua73a', '\ua73a'),
('\ua73c', '\ua73c'), ('\ua73e', '\ua73e'),
('\ua740', '\ua740'), ('\ua742', '\ua742'),
('\ua744', '\ua744'), ('\ua746', '\ua746'),
('\ua748', '\ua748'), ('\ua74a', '\ua74a'),
('\ua74c', '\ua74c'), ('\ua74e', '\ua74e'),
('\ua750', '\ua750'), ('\ua752', '\ua752'),
('\ua754', '\ua754'), ('\ua756', '\ua756'),
('\ua758', '\ua758'), ('\ua75a', '\ua75a'),
('\ua75c', '\ua75c'), ('\ua75e', '\ua75e'),
('\ua760', '\ua760'), ('\ua762', '\ua762'),
('\ua764', '\ua764'), ('\ua766', '\ua766'),
('\ua768', '\ua768'), ('\ua76a', '\ua76a'),
('\ua76c', '\ua76c'), ('\ua76e', '\ua76e'),
('\ua779', '\ua779'), ('\ua77b', '\ua77b'),
('\ua77d', '\ua77e'), ('\ua780', '\ua780'),
('\ua782', '\ua782'), ('\ua784', '\ua784'),
('\ua786', '\ua786'), ('\ua78b', '\ua78b'),
('\ua78d', '\ua78d'), ('\ua790', '\ua790'),
('\ua792', '\ua792'), ('\ua7a0', '\ua7a0'),
('\ua7a2', '\ua7a2'), ('\ua7a4', '\ua7a4'),
('\ua7a6', '\ua7a6'), ('\ua7a8', '\ua7a8'),
('\ua7aa', '\ua7aa'), ('\uff21', '\uff3a'),
('\U00010400', '\U00010427'), ('\U0001d400', '\U0001d419'),
('\U0001d434', '\U0001d44d'), ('\U0001d468', '\U0001d481'),
('\U0001d49c', '\U0001d49c'), ('\U0001d49e', '\U0001d49f'),
('\U0001d4a2', '\U0001d4a2'), ('\U0001d4a5', '\U0001d4a6'),
('\U0001d4a9', '\U0001d4ac'), ('\U0001d4ae', '\U0001d4b5'),
('\U0001d4d0', '\U0001d4e9'), ('\U0001d504', '\U0001d505'),
('\U0001d507', '\U0001d50a'), ('\U0001d50d', '\U0001d514'),
('\U0001d516', '\U0001d51c'), ('\U0001d538', '\U0001d539'),
('\U0001d53b', '\U0001d53e'), ('\U0001d540', '\U0001d544'),
('\U0001d546', '\U0001d546'), ('\U0001d54a', '\U0001d550'),
('\U0001d56c', '\U0001d585'), ('\U0001d5a0', '\U0001d5b9'),
('\U0001d5d4', '\U0001d5ed'), ('\U0001d608', '\U0001d621'),
('\U0001d63c', '\U0001d655'), ('\U0001d670', '\U0001d689'),
('\U0001d6a8', '\U0001d6c0'), ('\U0001d6e2', '\U0001d6fa'),
('\U0001d71c', '\U0001d734'), ('\U0001d756', '\U0001d76e'),
('\U0001d790', '\U0001d7a8'), ('\U0001d7ca', '\U0001d7ca')
];
pub fn Uppercase(c: char) -> bool {
bsearch_range_table(c, Uppercase_table)
}
static XID_Continue_table : &'static [(char,char)] = &[
('\x30', '\x39'), ('\x41', '\x5a'),
('\x5f', '\x5f'), ('\x61', '\x7a'),
@ -4856,3 +5490,31 @@ pub mod derived_property {
}
}
pub mod property {
fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
use cmp::{Equal, Less, Greater};
use vec::ImmutableVector;
use option::None;
r.bsearch(|&(lo,hi)| {
if lo <= c && c <= hi { Equal }
else if hi < c { Less }
else { Greater }
}) != None
}
static White_Space_table : &'static [(char,char)] = &[
('\x09', '\x0d'), ('\x20', '\x20'),
('\x85', '\x85'), ('\xa0', '\xa0'),
('\u1680', '\u1680'), ('\u2000', '\u200a'),
('\u2028', '\u2028'), ('\u2029', '\u2029'),
('\u202f', '\u202f'), ('\u205f', '\u205f'),
('\u3000', '\u3000')
];
pub fn White_Space(c: char) -> bool {
bsearch_range_table(c, White_Space_table)
}
}

View File

@ -51,7 +51,7 @@ fn f() {
CR4+2: (should align)
*/
/*
// (NEL deliberately omitted)
NEL4+2: (should align)
*/
/*
Ogham Space Mark 4+2: (should align)
@ -103,11 +103,10 @@ fn f() {
fn main() {
// Taken from http://www.unicode.org/Public/UNIDATA/PropList.txt
let chars =
['\x0A', '\x0B', '\x0C', '\x0D', '\x20',
// '\x85', // for some reason Rust thinks NEL isn't whitespace
'\xA0', '\u1680', '\u2000', '\u2001', '\u2002', '\u2003', '\u2004',
'\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A', '\u2028',
'\u2029', '\u202F', '\u205F', '\u3000'];
['\x0A', '\x0B', '\x0C', '\x0D', '\x20', '\x85', '\xA0', '\u1680',
'\u2000', '\u2001', '\u2002', '\u2003', '\u2004', '\u2005', '\u2006',
'\u2007', '\u2008', '\u2009', '\u200A', '\u2028', '\u2029', '\u202F',
'\u205F', '\u3000'];
for c in chars.iter() {
let ws = c.is_whitespace();
println!("{:?} {:?}" , c , ws);

View File

@ -51,7 +51,7 @@ fn f() {
CR4+2: (should align)
*/
/*
// (NEL deliberately omitted)
………… NEL4+2: (should align)
*/
/*
Ogham Space Mark 4+2: (should align)
@ -97,8 +97,7 @@ fn f() {
fn main() {
// Taken from http://www.unicode.org/Public/UNIDATA/PropList.txt
let chars =
['\x0A', '\x0B', '\x0C', '\x0D', '\x20',
// '\x85', // for some reason Rust thinks NEL isn't whitespace
['\x0A', '\x0B', '\x0C', '\x0D', '\x20', '\x85',
'\xA0', '\u1680', '\u2000', '\u2001', '\u2002', '\u2003',
'\u2004', '\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A',
'\u2028', '\u2029', '\u202F', '\u205F', '\u3000'];