auto merge of #5945 : graydon/rust/fix-unicode-tables, r=pcwalton
This switches the unicode functions in core to use static character-range tables and a binary search helper rather than open-coded switch statements. It adds about 50k of read only data to the libcore binary but cuts out a similar amount of compiled IR. Would have done it this way in the first place but we didn't have structured statics for a long time.
This commit is contained in:
commit
ce4f73a243
@ -112,7 +112,49 @@ def escape_char(c):
|
||||
return "'\\u%4.4x'" % c
|
||||
return "'\\U%8.8x'" % c
|
||||
|
||||
def ch_prefix(ix):
|
||||
if ix == 0:
|
||||
return " "
|
||||
if ix % 2 == 0:
|
||||
return ",\n "
|
||||
else:
|
||||
return ", "
|
||||
|
||||
def emit_bsearch_range_table(f):
|
||||
f.write("""
|
||||
pure fn bsearch_range_table(c: char, r: &[(char,char)]) -> bool {
|
||||
use cmp::{EQ, LT, GT};
|
||||
use vec::bsearch;
|
||||
use option::None;
|
||||
(do bsearch(r) |&(lo,hi)| {
|
||||
if lo <= c && c <= hi { EQ }
|
||||
else if hi < c { LT }
|
||||
else { GT }
|
||||
}) != None
|
||||
}\n\n
|
||||
""");
|
||||
|
||||
def emit_property_module(f, mod, tbl):
|
||||
f.write("pub mod %s {\n" % mod)
|
||||
keys = tbl.keys()
|
||||
keys.sort()
|
||||
emit_bsearch_range_table(f);
|
||||
for cat in keys:
|
||||
f.write(" const %s_table : &[(char,char)] = &[\n" % cat)
|
||||
ix = 0
|
||||
for pair in tbl[cat]:
|
||||
f.write(ch_prefix(ix))
|
||||
f.write("(%s, %s)" % (escape_char(pair[0]), escape_char(pair[1])))
|
||||
ix += 1
|
||||
f.write("\n ];\n\n")
|
||||
|
||||
f.write(" pub pure fn %s(c: char) -> bool {\n" % cat)
|
||||
f.write(" bsearch_range_table(c, %s_table)\n" % cat)
|
||||
f.write(" }\n\n")
|
||||
f.write("}\n")
|
||||
|
||||
|
||||
def emit_property_module_old(f, mod, tbl):
|
||||
f.write("mod %s {\n" % mod)
|
||||
keys = tbl.keys()
|
||||
keys.sort()
|
||||
@ -193,8 +235,9 @@ for i in [r]:
|
||||
rf = open(r, "w")
|
||||
|
||||
(canon_decomp, compat_decomp, gencats) = load_unicode_data("UnicodeData.txt")
|
||||
emit_decomp_module(rf, canon_decomp, compat_decomp)
|
||||
emit_property_module(rf, "general_category", gencats)
|
||||
|
||||
#emit_decomp_module(rf, canon_decomp, compat_decomp)
|
||||
|
||||
derived = load_derived_core_properties("DerivedCoreProperties.txt")
|
||||
emit_property_module(rf, "derived_property", derived)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1227,6 +1227,46 @@ pub fn rposition_between<T>(v: &[T], start: uint, end: uint,
|
||||
None
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Binary search a sorted vector with a comparator function.
|
||||
*
|
||||
* The comparator should implement an order consistent with the sort
|
||||
* order of the underlying vector, returning an order code that indicates
|
||||
* whether its argument is `Less`, `Equal` or `Greater` the desired target.
|
||||
*
|
||||
* Returns the index where the comparator returned `Equal`, or `None` if
|
||||
* not found.
|
||||
*/
|
||||
pub fn bsearch<T>(v: &[T], f: &fn(&T) -> Ordering) -> Option<uint> {
|
||||
let mut base : uint = 0;
|
||||
let mut lim : uint = v.len();
|
||||
|
||||
while lim != 0 {
|
||||
let ix = base + (lim >> 1);
|
||||
match f(&v[ix]) {
|
||||
Equal => return Some(ix),
|
||||
Less => {
|
||||
base = ix + 1;
|
||||
lim -= 1;
|
||||
}
|
||||
Greater => ()
|
||||
}
|
||||
lim >>= 1;
|
||||
}
|
||||
return None;
|
||||
}
|
||||
|
||||
/**
|
||||
* Binary search a sorted vector for a given element.
|
||||
*
|
||||
* Returns the index of the element or None if not found.
|
||||
*/
|
||||
pub fn bsearch_elem<T:TotalOrd>(v: &[T], x: &T) -> Option<uint> {
|
||||
bsearch(v, |p| p.cmp(x))
|
||||
}
|
||||
|
||||
// FIXME: if issue #586 gets implemented, could have a postcondition
|
||||
// saying the two result lists have the same length -- or, could
|
||||
// return a nominal record with a constraint saying that, instead of
|
||||
@ -3789,6 +3829,51 @@ mod tests {
|
||||
assert!(rfind_between(v, 4u, 4u, f).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bsearch_elem() {
|
||||
assert!(bsearch_elem([1,2,3,4,5], &5) == Some(4));
|
||||
assert!(bsearch_elem([1,2,3,4,5], &4) == Some(3));
|
||||
assert!(bsearch_elem([1,2,3,4,5], &3) == Some(2));
|
||||
assert!(bsearch_elem([1,2,3,4,5], &2) == Some(1));
|
||||
assert!(bsearch_elem([1,2,3,4,5], &1) == Some(0));
|
||||
|
||||
assert!(bsearch_elem([2,4,6,8,10], &1) == None);
|
||||
assert!(bsearch_elem([2,4,6,8,10], &5) == None);
|
||||
assert!(bsearch_elem([2,4,6,8,10], &4) == Some(1));
|
||||
assert!(bsearch_elem([2,4,6,8,10], &10) == Some(4));
|
||||
|
||||
assert!(bsearch_elem([2,4,6,8], &1) == None);
|
||||
assert!(bsearch_elem([2,4,6,8], &5) == None);
|
||||
assert!(bsearch_elem([2,4,6,8], &4) == Some(1));
|
||||
assert!(bsearch_elem([2,4,6,8], &8) == Some(3));
|
||||
|
||||
assert!(bsearch_elem([2,4,6], &1) == None);
|
||||
assert!(bsearch_elem([2,4,6], &5) == None);
|
||||
assert!(bsearch_elem([2,4,6], &4) == Some(1));
|
||||
assert!(bsearch_elem([2,4,6], &6) == Some(2));
|
||||
|
||||
assert!(bsearch_elem([2,4], &1) == None);
|
||||
assert!(bsearch_elem([2,4], &5) == None);
|
||||
assert!(bsearch_elem([2,4], &2) == Some(0));
|
||||
assert!(bsearch_elem([2,4], &4) == Some(1));
|
||||
|
||||
assert!(bsearch_elem([2], &1) == None);
|
||||
assert!(bsearch_elem([2], &5) == None);
|
||||
assert!(bsearch_elem([2], &2) == Some(0));
|
||||
|
||||
assert!(bsearch_elem([], &1) == None);
|
||||
assert!(bsearch_elem([], &5) == None);
|
||||
|
||||
assert!(bsearch_elem([1,1,1,1,1], &1) != None);
|
||||
assert!(bsearch_elem([1,1,1,1,2], &1) != None);
|
||||
assert!(bsearch_elem([1,1,1,2,2], &1) != None);
|
||||
assert!(bsearch_elem([1,1,2,2,2], &1) != None);
|
||||
assert!(bsearch_elem([1,2,2,2,2], &1) == Some(0));
|
||||
|
||||
assert!(bsearch_elem([1,2,3,4,5], &6) == None);
|
||||
assert!(bsearch_elem([1,2,3,4,5], &0) == None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reverse_and_reversed() {
|
||||
let mut v: ~[int] = ~[10, 20];
|
||||
|
@ -535,7 +535,7 @@ fn ident_start(c: char) -> bool {
|
||||
(c >= 'a' && c <= 'z')
|
||||
|| (c >= 'A' && c <= 'Z')
|
||||
|| c == '_'
|
||||
|| (c > 'z' && char::is_XID_start(c))
|
||||
|| (c > '\x7f' && char::is_XID_start(c))
|
||||
}
|
||||
|
||||
fn ident_continue(c: char) -> bool {
|
||||
@ -543,7 +543,7 @@ fn ident_continue(c: char) -> bool {
|
||||
|| (c >= 'A' && c <= 'Z')
|
||||
|| (c >= '0' && c <= '9')
|
||||
|| c == '_'
|
||||
|| (c > 'z' && char::is_XID_continue(c))
|
||||
|| (c > '\x7f' && char::is_XID_continue(c))
|
||||
}
|
||||
|
||||
// return the next token from the string
|
||||
|
Loading…
Reference in New Issue
Block a user