From 62a343452978f570454be38556839049aef7792b Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 3 Sep 2013 19:24:12 -0400 Subject: [PATCH] stop treating char as an integer type Closes #7609 --- src/etc/unicode.py | 1 + src/libextra/ebml.rs | 4 +- src/libextra/json.rs | 11 +++-- src/libextra/terminfo/parm.rs | 10 ++-- src/libextra/url.rs | 10 ++-- src/librustc/metadata/tyencode.rs | 2 +- src/librustc/middle/check_const.rs | 16 +++--- src/librustc/middle/const_eval.rs | 1 + src/librustc/middle/lint.rs | 1 - src/librustc/middle/resolve.rs | 2 +- src/librustc/middle/trans/base.rs | 1 + src/librustc/middle/trans/consts.rs | 1 + src/librustc/middle/trans/debuginfo.rs | 3 +- src/librustc/middle/trans/expr.rs | 1 + src/librustc/middle/trans/reflect.rs | 2 +- src/librustc/middle/trans/type_.rs | 1 - src/librustc/middle/trans/type_of.rs | 2 + src/librustc/middle/ty.rs | 52 ++++++++++---------- src/librustc/middle/typeck/astconv.rs | 4 ++ src/librustc/middle/typeck/check/_match.rs | 2 +- src/librustc/middle/typeck/check/method.rs | 2 +- src/librustc/middle/typeck/check/mod.rs | 27 ++++++++--- src/librustc/middle/typeck/coherence.rs | 4 +- src/librustc/middle/typeck/infer/combine.rs | 12 ++--- src/librustc/util/ppaux.rs | 4 +- src/libstd/char.rs | 54 ++++++++++++++------- src/libstd/io.rs | 22 ++++++--- src/libstd/rand.rs | 9 +--- src/libstd/rt/io/net/ip.rs | 13 ++--- src/libstd/str.rs | 46 +++--------------- src/libstd/str/ascii.rs | 25 ++++++---- src/libstd/unicode.rs | 8 --- src/libsyntax/ast.rs | 3 +- src/libsyntax/ast_util.rs | 3 +- src/libsyntax/ext/bytes.rs | 6 ++- src/libsyntax/ext/ifmt.rs | 3 +- src/libsyntax/ext/quote.rs | 10 +++- src/libsyntax/parse/lexer.rs | 22 +++++---- src/libsyntax/parse/parser.rs | 3 +- src/libsyntax/parse/token.rs | 8 ++- src/libsyntax/print/pprust.rs | 5 +- src/test/run-pass/binops.rs | 17 ------- src/test/run-pass/cast.rs | 5 +- src/test/run-pass/shift.rs | 2 +- src/test/run-pass/utf8_chars.rs | 2 +- 45 files changed, 227 insertions(+), 215 deletions(-) diff --git a/src/etc/unicode.py b/src/etc/unicode.py index 0f6e1c4c606..d7c2016912c 100755 --- a/src/etc/unicode.py +++ b/src/etc/unicode.py @@ -158,6 +158,7 @@ def emit_property_module(f, mod, tbl): keys.sort() emit_bsearch_range_table(f); for cat in keys: + if cat == "Cs": continue f.write(" static %s_table : &'static [(char,char)] = &[\n" % cat) ix = 0 for pair in tbl[cat]: diff --git a/src/libextra/ebml.rs b/src/libextra/ebml.rs index f66677c21f7..3527a7b5e55 100644 --- a/src/libextra/ebml.rs +++ b/src/libextra/ebml.rs @@ -10,7 +10,6 @@ #[allow(missing_doc)]; - use std::str; // Simple Extensible Binary Markup Language (ebml) reader and writer on a @@ -90,6 +89,7 @@ pub enum EbmlEncoderTag { // -------------------------------------- pub mod reader { + use std::char; use super::*; use serialize; @@ -426,7 +426,7 @@ pub mod reader { (unsafe { transmute::(bits) }) as float } fn read_char(&mut self) -> char { - doc_as_u32(self.next_doc(EsChar)) as char + char::from_u32(doc_as_u32(self.next_doc(EsChar))).unwrap() } fn read_str(&mut self) -> ~str { self.next_doc(EsStr).as_str() diff --git a/src/libextra/json.rs b/src/libextra/json.rs index 911d53da909..bc8c08d4643 100644 --- a/src/libextra/json.rs +++ b/src/libextra/json.rs @@ -16,6 +16,8 @@ //! json parsing and serialization +use std::char; +use std::cast::transmute; use std::iterator; use std::float; use std::hashmap::HashMap; @@ -490,7 +492,7 @@ pub struct Parser { pub fn Parser>(rdr: ~T) -> Parser { let mut p = Parser { rdr: rdr, - ch: 0 as char, + ch: '\x00', line: 1, col: 0, }; @@ -517,12 +519,13 @@ impl> Parser { } impl> Parser { - fn eof(&self) -> bool { self.ch == -1 as char } + // FIXME: #8971: unsound + fn eof(&self) -> bool { self.ch == unsafe { transmute(-1u32) } } fn bump(&mut self) { match self.rdr.next() { Some(ch) => self.ch = ch, - None() => self.ch = -1 as char, + None() => self.ch = unsafe { transmute(-1u32) }, // FIXME: #8971: unsound } if self.ch == '\n' { @@ -755,7 +758,7 @@ impl> Parser { ~"invalid \\u escape (not four digits)"); } - res.push_char(n as char); + res.push_char(char::from_u32(n as u32).unwrap()); } _ => return self.error(~"invalid escape") } diff --git a/src/libextra/terminfo/parm.rs b/src/libextra/terminfo/parm.rs index 10f1b330c5b..c607ea03bb3 100644 --- a/src/libextra/terminfo/parm.rs +++ b/src/libextra/terminfo/parm.rs @@ -258,7 +258,7 @@ pub fn expand(cap: &[u8], params: &[Param], vars: &mut Variables) ' ' => flags.space = true, '.' => fstate = FormatStatePrecision, '0'..'9' => { - flags.width = (cur - '0') as uint; + flags.width = (cur as uint - '0' as uint); fstate = FormatStateWidth; } _ => util::unreachable() @@ -330,7 +330,7 @@ pub fn expand(cap: &[u8], params: &[Param], vars: &mut Variables) state = Nothing; } '0'..'9' => { - state = IntConstant(i*10 + ((cur - '0') as int)); + state = IntConstant(i*10 + (cur as int - '0' as int)); old_state = Nothing; } _ => return Err(~"bad int constant") @@ -358,7 +358,7 @@ pub fn expand(cap: &[u8], params: &[Param], vars: &mut Variables) flags.space = true; } (FormatStateFlags,'0'..'9') => { - flags.width = (cur - '0') as uint; + flags.width = (cur as uint - '0' as uint); *fstate = FormatStateWidth; } (FormatStateFlags,'.') => { @@ -366,7 +366,7 @@ pub fn expand(cap: &[u8], params: &[Param], vars: &mut Variables) } (FormatStateWidth,'0'..'9') => { let old = flags.width; - flags.width = flags.width * 10 + ((cur - '0') as uint); + flags.width = flags.width * 10 + (cur as uint - '0' as uint); if flags.width < old { return Err(~"format width overflow") } } (FormatStateWidth,'.') => { @@ -374,7 +374,7 @@ pub fn expand(cap: &[u8], params: &[Param], vars: &mut Variables) } (FormatStatePrecision,'0'..'9') => { let old = flags.precision; - flags.precision = flags.precision * 10 + ((cur - '0') as uint); + flags.precision = flags.precision * 10 + (cur as uint - '0' as uint); if flags.precision < old { return Err(~"format precision overflow") } } _ => return Err(~"invalid format specifier") diff --git a/src/libextra/url.rs b/src/libextra/url.rs index 579e17e9f8f..8a00be0e18a 100644 --- a/src/libextra/url.rs +++ b/src/libextra/url.rs @@ -72,7 +72,7 @@ fn encode_inner(s: &str, full_url: bool) -> ~str { let mut out = ~""; while !rdr.eof() { - let ch = rdr.read_byte() as char; + let ch = rdr.read_byte() as u8 as char; match ch { // unreserved: 'A' .. 'Z' | @@ -135,7 +135,7 @@ fn decode_inner(s: &str, full_url: bool) -> ~str { match rdr.read_char() { '%' => { let bytes = rdr.read_bytes(2u); - let ch = uint::parse_bytes(bytes, 16u).unwrap() as char; + let ch = uint::parse_bytes(bytes, 16u).unwrap() as u8 as char; if full_url { // Only decode some characters: @@ -186,7 +186,7 @@ fn encode_plus(s: &str) -> ~str { let mut out = ~""; while !rdr.eof() { - let ch = rdr.read_byte() as char; + let ch = rdr.read_byte() as u8 as char; match ch { 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '.' | '-' => { out.push_char(ch); @@ -258,7 +258,7 @@ pub fn decode_form_urlencoded(s: &[u8]) -> HashMap<~str, ~[~str]> { let ch = match ch { '%' => { let bytes = rdr.read_bytes(2u); - uint::parse_bytes(bytes, 16u).unwrap() as char + uint::parse_bytes(bytes, 16u).unwrap() as u8 as char } '+' => ' ', ch => ch @@ -295,7 +295,7 @@ fn split_char_first(s: &str, c: char) -> (~str, ~str) { do io::with_str_reader(s) |rdr| { let mut ch; while !rdr.eof() { - ch = rdr.read_byte() as char; + ch = rdr.read_byte() as u8 as char; if ch == c { // found a match, adjust markers index = rdr.tell()-1; diff --git a/src/librustc/metadata/tyencode.rs b/src/librustc/metadata/tyencode.rs index 86821200796..e81da60ed17 100644 --- a/src/librustc/metadata/tyencode.rs +++ b/src/librustc/metadata/tyencode.rs @@ -239,10 +239,10 @@ fn enc_sty(w: @io::Writer, cx: @ctxt, st: &ty::sty) { ty::ty_nil => w.write_char('n'), ty::ty_bot => w.write_char('z'), ty::ty_bool => w.write_char('b'), + ty::ty_char => w.write_char('c'), ty::ty_int(t) => { match t { ty_i => w.write_char('i'), - ty_char => w.write_char('c'), ty_i8 => w.write_str(&"MB"), ty_i16 => w.write_str(&"MW"), ty_i32 => w.write_str(&"ML"), diff --git a/src/librustc/middle/check_const.rs b/src/librustc/middle/check_const.rs index 1ef3b6b7869..26427648674 100644 --- a/src/librustc/middle/check_const.rs +++ b/src/librustc/middle/check_const.rs @@ -200,21 +200,19 @@ pub fn check_expr(v: &mut CheckCrateVisitor, } } match e.node { - ExprLit(@codemap::Spanned {node: lit_int(v, t), _}) => { - if t != ty_char { + ExprLit(@codemap::Spanned {node: lit_int(v, t), _}) => { if (v as u64) > ast_util::int_ty_max( if t == ty_i { sess.targ_cfg.int_type } else { t }) { sess.span_err(e.span, "literal out of range for its type"); } } - } - ExprLit(@codemap::Spanned {node: lit_uint(v, t), _}) => { - if v > ast_util::uint_ty_max( - if t == ty_u { sess.targ_cfg.uint_type } else { t }) { - sess.span_err(e.span, "literal out of range for its type"); + ExprLit(@codemap::Spanned {node: lit_uint(v, t), _}) => { + if v > ast_util::uint_ty_max( + if t == ty_u { sess.targ_cfg.uint_type } else { t }) { + sess.span_err(e.span, "literal out of range for its type"); + } } - } - _ => () + _ => () } visit::walk_expr(v, e, is_const); } diff --git a/src/librustc/middle/const_eval.rs b/src/librustc/middle/const_eval.rs index be09dde322b..ad9a3b2f4e7 100644 --- a/src/librustc/middle/const_eval.rs +++ b/src/librustc/middle/const_eval.rs @@ -472,6 +472,7 @@ pub fn eval_const_expr_partial(tcx: &T, e: &Expr) pub fn lit_to_const(lit: &lit) -> const_val { match lit.node { lit_str(s) => const_str(s), + lit_char(n) => const_uint(n as u64), lit_int(n, _) => const_int(n), lit_uint(n, _) => const_uint(n), lit_int_unsuffixed(n) => const_int(n), diff --git a/src/librustc/middle/lint.rs b/src/librustc/middle/lint.rs index 1e1be5f7a13..6f4d94e2a64 100644 --- a/src/librustc/middle/lint.rs +++ b/src/librustc/middle/lint.rs @@ -778,7 +778,6 @@ impl TypeLimitsLintVisitor { fn int_ty_range(&mut self, int_ty: ast::int_ty) -> (i64, i64) { match int_ty { ast::ty_i => (i64::min_value, i64::max_value), - ast::ty_char => (u32::min_value as i64, u32::max_value as i64), ast::ty_i8 => (i8::min_value as i64, i8::max_value as i64), ast::ty_i16 => (i16::min_value as i64, i16::max_value as i64), ast::ty_i32 => (i32::min_value as i64, i32::max_value as i64), diff --git a/src/librustc/middle/resolve.rs b/src/librustc/middle/resolve.rs index ddf358841f7..09491bd28be 100644 --- a/src/librustc/middle/resolve.rs +++ b/src/librustc/middle/resolve.rs @@ -777,7 +777,7 @@ pub fn PrimitiveTypeTable() -> PrimitiveTypeTable { }; table.intern("bool", ty_bool); - table.intern("char", ty_int(ty_char)); + table.intern("char", ty_char); table.intern("float", ty_float(ty_f)); table.intern("f32", ty_float(ty_f32)); table.intern("f64", ty_float(ty_f64)); diff --git a/src/librustc/middle/trans/base.rs b/src/librustc/middle/trans/base.rs index 251ce697d53..2ed9af42248 100644 --- a/src/librustc/middle/trans/base.rs +++ b/src/librustc/middle/trans/base.rs @@ -566,6 +566,7 @@ pub fn compare_scalar_types(cx: @mut Block, match ty::get(t).sty { ty::ty_nil => rslt(cx, f(nil_type)), ty::ty_bool | ty::ty_ptr(_) => rslt(cx, f(unsigned_int)), + ty::ty_char => rslt(cx, f(unsigned_int)), ty::ty_int(_) => rslt(cx, f(signed_int)), ty::ty_uint(_) => rslt(cx, f(unsigned_int)), ty::ty_float(_) => rslt(cx, f(floating_point)), diff --git a/src/librustc/middle/trans/consts.rs b/src/librustc/middle/trans/consts.rs index 64cfe13553d..096e37136ac 100644 --- a/src/librustc/middle/trans/consts.rs +++ b/src/librustc/middle/trans/consts.rs @@ -39,6 +39,7 @@ pub fn const_lit(cx: &mut CrateContext, e: &ast::Expr, lit: ast::lit) -> ValueRef { let _icx = push_ctxt("trans_lit"); match lit.node { + ast::lit_char(i) => C_integral(Type::char(), i as u64, false), ast::lit_int(i, t) => C_integral(Type::int_from_ty(cx, t), i as u64, true), ast::lit_uint(u, t) => C_integral(Type::uint_from_ty(cx, t), u, false), ast::lit_int_unsuffixed(i) => { diff --git a/src/librustc/middle/trans/debuginfo.rs b/src/librustc/middle/trans/debuginfo.rs index 71a9f4de4a0..a9e3a869be0 100644 --- a/src/librustc/middle/trans/debuginfo.rs +++ b/src/librustc/middle/trans/debuginfo.rs @@ -746,9 +746,9 @@ fn basic_type_metadata(cx: &mut CrateContext, t: ty::t) -> DIType { let (name, encoding) = match ty::get(t).sty { ty::ty_nil | ty::ty_bot => (~"uint", DW_ATE_unsigned), ty::ty_bool => (~"bool", DW_ATE_boolean), + ty::ty_char => (~"char", DW_ATE_unsigned_char), ty::ty_int(int_ty) => match int_ty { ast::ty_i => (~"int", DW_ATE_signed), - ast::ty_char => (~"char", DW_ATE_signed_char), ast::ty_i8 => (~"i8", DW_ATE_signed), ast::ty_i16 => (~"i16", DW_ATE_signed), ast::ty_i32 => (~"i32", DW_ATE_signed), @@ -1344,6 +1344,7 @@ fn type_metadata(cx: &mut CrateContext, ty::ty_nil | ty::ty_bot | ty::ty_bool | + ty::ty_char | ty::ty_int(_) | ty::ty_uint(_) | ty::ty_float(_) => { diff --git a/src/librustc/middle/trans/expr.rs b/src/librustc/middle/trans/expr.rs index 91f26b25865..ce6fb6d3e77 100644 --- a/src/librustc/middle/trans/expr.rs +++ b/src/librustc/middle/trans/expr.rs @@ -1630,6 +1630,7 @@ pub enum cast_kind { pub fn cast_type_kind(t: ty::t) -> cast_kind { match ty::get(t).sty { + ty::ty_char => cast_integral, ty::ty_float(*) => cast_float, ty::ty_ptr(*) => cast_pointer, ty::ty_rptr(*) => cast_pointer, diff --git a/src/librustc/middle/trans/reflect.rs b/src/librustc/middle/trans/reflect.rs index b83ddf27dba..fb46aefbafe 100644 --- a/src/librustc/middle/trans/reflect.rs +++ b/src/librustc/middle/trans/reflect.rs @@ -157,8 +157,8 @@ impl Reflector { ty::ty_bot => self.leaf("bot"), ty::ty_nil => self.leaf("nil"), ty::ty_bool => self.leaf("bool"), + ty::ty_char => self.leaf("char"), ty::ty_int(ast::ty_i) => self.leaf("int"), - ty::ty_int(ast::ty_char) => self.leaf("char"), ty::ty_int(ast::ty_i8) => self.leaf("i8"), ty::ty_int(ast::ty_i16) => self.leaf("i16"), ty::ty_int(ast::ty_i32) => self.leaf("i32"), diff --git a/src/librustc/middle/trans/type_.rs b/src/librustc/middle/trans/type_.rs index 281c047641c..8b221a89c36 100644 --- a/src/librustc/middle/trans/type_.rs +++ b/src/librustc/middle/trans/type_.rs @@ -119,7 +119,6 @@ impl Type { pub fn int_from_ty(ctx: &CrateContext, t: ast::int_ty) -> Type { match t { ast::ty_i => ctx.int_type, - ast::ty_char => Type::char(), ast::ty_i8 => Type::i8(), ast::ty_i16 => Type::i16(), ast::ty_i32 => Type::i32(), diff --git a/src/librustc/middle/trans/type_of.rs b/src/librustc/middle/trans/type_of.rs index 3fc5bcc8f7b..945d5a048bb 100644 --- a/src/librustc/middle/trans/type_of.rs +++ b/src/librustc/middle/trans/type_of.rs @@ -108,6 +108,7 @@ pub fn sizing_type_of(cx: &mut CrateContext, t: ty::t) -> Type { let llsizingty = match ty::get(t).sty { ty::ty_nil | ty::ty_bot => Type::nil(), ty::ty_bool => Type::bool(), + ty::ty_char => Type::char(), ty::ty_int(t) => Type::int_from_ty(cx, t), ty::ty_uint(t) => Type::uint_from_ty(cx, t), ty::ty_float(t) => Type::float_from_ty(cx, t), @@ -195,6 +196,7 @@ pub fn type_of(cx: &mut CrateContext, t: ty::t) -> Type { let mut llty = match ty::get(t).sty { ty::ty_nil | ty::ty_bot => Type::nil(), ty::ty_bool => Type::bool(), + ty::ty_char => Type::char(), ty::ty_int(t) => Type::int_from_ty(cx, t), ty::ty_uint(t) => Type::uint_from_ty(cx, t), ty::ty_float(t) => Type::float_from_ty(cx, t), diff --git a/src/librustc/middle/ty.rs b/src/librustc/middle/ty.rs index dc26350d88d..0958eeb7097 100644 --- a/src/librustc/middle/ty.rs +++ b/src/librustc/middle/ty.rs @@ -572,8 +572,8 @@ mod primitives { def_prim_ty!(TY_NIL, super::ty_nil, 0) def_prim_ty!(TY_BOOL, super::ty_bool, 1) - def_prim_ty!(TY_INT, super::ty_int(ast::ty_i), 2) - def_prim_ty!(TY_CHAR, super::ty_int(ast::ty_char), 3) + def_prim_ty!(TY_CHAR, super::ty_char, 2) + def_prim_ty!(TY_INT, super::ty_int(ast::ty_i), 3) def_prim_ty!(TY_I8, super::ty_int(ast::ty_i8), 4) def_prim_ty!(TY_I16, super::ty_int(ast::ty_i16), 5) def_prim_ty!(TY_I32, super::ty_int(ast::ty_i32), 6) @@ -609,6 +609,7 @@ pub enum sty { ty_nil, ty_bot, ty_bool, + ty_char, ty_int(ast::int_ty), ty_uint(ast::uint_ty), ty_float(ast::float_ty), @@ -1016,7 +1017,7 @@ fn mk_t(cx: ctxt, st: sty) -> t { flags |= rflags(r); flags |= get(mt.ty).flags; } - &ty_nil | &ty_bool | &ty_int(_) | &ty_float(_) | &ty_uint(_) | + &ty_nil | &ty_bool | &ty_char | &ty_int(_) | &ty_float(_) | &ty_uint(_) | &ty_estr(_) | &ty_type | &ty_opaque_closure_ptr(_) | &ty_opaque_box => (), // You might think that we could just return ty_err for @@ -1147,7 +1148,6 @@ pub fn mk_u64() -> t { mk_prim_t(&primitives::TY_U64) } pub fn mk_mach_int(tm: ast::int_ty) -> t { match tm { ast::ty_i => mk_int(), - ast::ty_char => mk_char(), ast::ty_i8 => mk_i8(), ast::ty_i16 => mk_i16(), ast::ty_i32 => mk_i32(), @@ -1303,7 +1303,7 @@ pub fn maybe_walk_ty(ty: t, f: &fn(t) -> bool) { return; } match get(ty).sty { - ty_nil | ty_bot | ty_bool | ty_int(_) | ty_uint(_) | ty_float(_) | + ty_nil | ty_bot | ty_bool | ty_char | ty_int(_) | ty_uint(_) | ty_float(_) | ty_estr(_) | ty_type | ty_opaque_box | ty_self(_) | ty_opaque_closure_ptr(_) | ty_infer(_) | ty_param(_) | ty_err => { } @@ -1400,7 +1400,7 @@ fn fold_sty(sty: &sty, fldop: &fn(t) -> t) -> sty { ty_struct(did, ref substs) => { ty_struct(did, fold_substs(substs, fldop)) } - ty_nil | ty_bot | ty_bool | ty_int(_) | ty_uint(_) | ty_float(_) | + ty_nil | ty_bot | ty_bool | ty_char | ty_int(_) | ty_uint(_) | ty_float(_) | ty_estr(_) | ty_type | ty_opaque_closure_ptr(_) | ty_err | ty_opaque_box | ty_infer(_) | ty_param(*) | ty_self(_) => { (*sty).clone() @@ -1745,7 +1745,7 @@ pub fn type_is_unique(ty: t) -> bool { */ pub fn type_is_scalar(ty: t) -> bool { match get(ty).sty { - ty_nil | ty_bool | ty_int(_) | ty_float(_) | ty_uint(_) | + ty_nil | ty_bool | ty_char | ty_int(_) | ty_float(_) | ty_uint(_) | ty_infer(IntVar(_)) | ty_infer(FloatVar(_)) | ty_type | ty_bare_fn(*) | ty_ptr(_) => true, _ => false @@ -2079,7 +2079,7 @@ pub fn type_contents(cx: ctxt, ty: t) -> TypeContents { let result = match get(ty).sty { // Scalar and unique types are sendable, freezable, and durable - ty_nil | ty_bot | ty_bool | ty_int(_) | ty_uint(_) | ty_float(_) | + ty_nil | ty_bot | ty_bool | ty_char | ty_int(_) | ty_uint(_) | ty_float(_) | ty_bare_fn(_) | ty_ptr(_) => { TC_NONE } @@ -2414,6 +2414,7 @@ pub fn is_instantiable(cx: ctxt, r_ty: t) -> bool { ty_nil | ty_bot | ty_bool | + ty_char | ty_int(_) | ty_uint(_) | ty_float(_) | @@ -2551,7 +2552,7 @@ pub fn type_is_integral(ty: t) -> bool { pub fn type_is_char(ty: t) -> bool { match get(ty).sty { - ty_int(ty_char) => true, + ty_char => true, _ => false } } @@ -2588,7 +2589,7 @@ pub fn type_is_pod(cx: ctxt, ty: t) -> bool { let mut result = true; match get(ty).sty { // Scalar types - ty_nil | ty_bot | ty_bool | ty_int(_) | ty_float(_) | ty_uint(_) | + ty_nil | ty_bot | ty_bool | ty_char | ty_int(_) | ty_float(_) | ty_uint(_) | ty_type | ty_ptr(_) | ty_bare_fn(_) => result = true, // Boxed types ty_box(_) | ty_uniq(_) | ty_closure(_) | @@ -3428,7 +3429,7 @@ pub fn occurs_check(tcx: ctxt, sp: Span, vid: TyVid, rt: t) { pub fn ty_sort_str(cx: ctxt, t: t) -> ~str { match get(t).sty { - ty_nil | ty_bot | ty_bool | ty_int(_) | + ty_nil | ty_bot | ty_bool | ty_char | ty_int(_) | ty_uint(_) | ty_float(_) | ty_estr(_) | ty_type | ty_opaque_box | ty_opaque_closure_ptr(_) => { ::util::ppaux::ty_to_str(cx, t) @@ -4262,10 +4263,11 @@ pub fn struct_fields(cx: ctxt, did: ast::DefId, substs: &substs) pub fn is_binopable(cx: ctxt, ty: t, op: ast::BinOp) -> bool { static tycat_other: int = 0; static tycat_bool: int = 1; - static tycat_int: int = 2; - static tycat_float: int = 3; - static tycat_struct: int = 4; - static tycat_bot: int = 5; + static tycat_char: int = 2; + static tycat_int: int = 3; + static tycat_float: int = 4; + static tycat_struct: int = 5; + static tycat_bot: int = 6; static opcat_add: int = 0; static opcat_sub: int = 1; @@ -4304,6 +4306,7 @@ pub fn is_binopable(cx: ctxt, ty: t, op: ast::BinOp) -> bool { return tycat(cx, simd_type(cx, ty)) } match get(ty).sty { + ty_char => tycat_char, ty_bool => tycat_bool, ty_int(_) | ty_uint(_) | ty_infer(IntVar(_)) => tycat_int, ty_float(_) | ty_infer(FloatVar(_)) => tycat_float, @@ -4316,16 +4319,15 @@ pub fn is_binopable(cx: ctxt, ty: t, op: ast::BinOp) -> bool { static t: bool = true; static f: bool = false; - let tbl = ~[ - /*. add, shift, bit - . sub, rel, logic - . mult, eq, */ - /*other*/ ~[f, f, f, f, f, f, f, f], - /*bool*/ ~[f, f, f, f, t, t, t, t], - /*int*/ ~[t, t, t, t, t, t, t, f], - /*float*/ ~[t, t, t, f, t, t, f, f], - /*bot*/ ~[f, f, f, f, f, f, f, f], - /*struct*/ ~[t, t, t, t, f, f, t, t]]; + let tbl = [ + // +, -, *, shift, rel, ==, bit, logic + /*other*/ [f, f, f, f, f, f, f, f], + /*bool*/ [f, f, f, f, t, t, t, t], + /*char*/ [f, f, f, f, t, t, f, f], + /*int*/ [t, t, t, t, t, t, t, f], + /*float*/ [t, t, t, f, t, t, f, f], + /*bot*/ [f, f, f, f, f, f, f, f], + /*struct*/ [t, t, t, t, f, f, t, t]]; return tbl[tycat(cx, ty)][opcat(op)]; } diff --git a/src/librustc/middle/typeck/astconv.rs b/src/librustc/middle/typeck/astconv.rs index c0825a9c0b6..c5f85d26e44 100644 --- a/src/librustc/middle/typeck/astconv.rs +++ b/src/librustc/middle/typeck/astconv.rs @@ -455,6 +455,10 @@ pub fn ast_ty_to_ty( check_path_args(tcx, path, NO_TPS | NO_REGIONS); ty::mk_bool() } + ast::ty_char => { + check_path_args(tcx, path, NO_TPS | NO_REGIONS); + ty::mk_char() + } ast::ty_int(it) => { check_path_args(tcx, path, NO_TPS | NO_REGIONS); ty::mk_mach_int(it) diff --git a/src/librustc/middle/typeck/check/_match.rs b/src/librustc/middle/typeck/check/_match.rs index bb3ed2e15d0..d0beb5c609e 100644 --- a/src/librustc/middle/typeck/check/_match.rs +++ b/src/librustc/middle/typeck/check/_match.rs @@ -434,7 +434,7 @@ pub fn check_pat(pcx: &pat_ctxt, pat: @ast::Pat, expected: ty::t) { || ~"mismatched types in range") { // no-op - } else if !ty::type_is_numeric(b_ty) { + } else if !ty::type_is_numeric(b_ty) && !ty::type_is_char(b_ty) { tcx.sess.span_err(pat.span, "non-numeric type used in range"); } else { match valid_range_bounds(fcx.ccx, begin, end) { diff --git a/src/librustc/middle/typeck/check/method.rs b/src/librustc/middle/typeck/check/method.rs index 75ccfdbf9dd..3588fb3f51e 100644 --- a/src/librustc/middle/typeck/check/method.rs +++ b/src/librustc/middle/typeck/check/method.rs @@ -783,7 +783,7 @@ impl<'self> LookupContext<'self> { ty_infer(IntVar(_)) | ty_infer(FloatVar(_)) | ty_self(_) | ty_param(*) | ty_nil | ty_bot | ty_bool | - ty_int(*) | ty_uint(*) | + ty_char | ty_int(*) | ty_uint(*) | ty_float(*) | ty_enum(*) | ty_ptr(*) | ty_struct(*) | ty_tup(*) | ty_estr(*) | ty_evec(*) | ty_trait(*) | ty_closure(*) => { self.search_for_some_kind_of_autorefd_method( diff --git a/src/librustc/middle/typeck/check/mod.rs b/src/librustc/middle/typeck/check/mod.rs index ddf5a91202b..9afcec7c340 100644 --- a/src/librustc/middle/typeck/check/mod.rs +++ b/src/librustc/middle/typeck/check/mod.rs @@ -1021,6 +1021,7 @@ pub fn check_lit(fcx: @mut FnCtxt, lit: @ast::lit) -> ty::t { match lit.node { ast::lit_str(*) => ty::mk_estr(tcx, ty::vstore_slice(ty::re_static)), + ast::lit_char(_) => ty::mk_char(), ast::lit_int(_, t) => ty::mk_mach_int(t), ast::lit_uint(_, t) => ty::mk_mach_uint(t), ast::lit_int_unsuffixed(_) => { @@ -2695,10 +2696,20 @@ pub fn check_expr_with_unifier(fcx: @mut FnCtxt, }, t_e, None); } - let t_1_is_scalar = type_is_scalar(fcx, expr.span, t_1); - if type_is_c_like_enum(fcx,expr.span,t_e) - && t_1_is_scalar { - /* this case is allowed */ + let te = structurally_resolved_type(fcx, e.span, t_e); + let t_1_is_char = type_is_char(fcx, expr.span, t_1); + + // casts to scalars other than `char` are allowed + let t_1_is_trivial = type_is_scalar(fcx, expr.span, t_1) && !t_1_is_char; + + if type_is_c_like_enum(fcx, expr.span, t_e) && t_1_is_trivial { + // casts from C-like enums are allowed + } else if t_1_is_char { + if ty::get(te).sty != ty::ty_uint(ast::ty_u8) { + fcx.type_error_message(expr.span, |actual| { + fmt!("only `u8` can be cast as `char`, not `%s`", actual) + }, t_e, None); + } } else if type_is_region_ptr(fcx, expr.span, t_e) && type_is_unsafe_ptr(fcx, expr.span, t_1) { @@ -2729,7 +2740,6 @@ pub fn check_expr_with_unifier(fcx: @mut FnCtxt, /* this cast is only allowed from &[T] to *T or &T to *T. */ - let te = structurally_resolved_type(fcx, e.span, t_e); match (&ty::get(te).sty, &ty::get(t_1).sty) { (&ty::ty_rptr(_, mt1), &ty::ty_ptr(mt2)) if types_compatible(fcx, e.span, @@ -2741,7 +2751,7 @@ pub fn check_expr_with_unifier(fcx: @mut FnCtxt, } } } else if !(type_is_scalar(fcx,expr.span,t_e) - && t_1_is_scalar) { + && t_1_is_trivial) { /* If more type combinations should be supported than are supported here, then file an enhancement issue and @@ -3439,6 +3449,11 @@ pub fn type_is_scalar(fcx: @mut FnCtxt, sp: Span, typ: ty::t) -> bool { return ty::type_is_scalar(typ_s); } +pub fn type_is_char(fcx: @mut FnCtxt, sp: Span, typ: ty::t) -> bool { + let typ_s = structurally_resolved_type(fcx, sp, typ); + return ty::type_is_char(typ_s); +} + pub fn type_is_unsafe_ptr(fcx: @mut FnCtxt, sp: Span, typ: ty::t) -> bool { let typ_s = structurally_resolved_type(fcx, sp, typ); return ty::type_is_unsafe_ptr(typ_s); diff --git a/src/librustc/middle/typeck/coherence.rs b/src/librustc/middle/typeck/coherence.rs index 3f18b21dcd3..738ed9656e3 100644 --- a/src/librustc/middle/typeck/coherence.rs +++ b/src/librustc/middle/typeck/coherence.rs @@ -20,7 +20,7 @@ use metadata::csearch; use metadata::cstore::iter_crate_data; use middle::ty::get; use middle::ty::{ImplContainer, lookup_item_type, subst}; -use middle::ty::{substs, t, ty_bool, ty_bot, ty_box, ty_enum, ty_err}; +use middle::ty::{substs, t, ty_bool, ty_char, ty_bot, ty_box, ty_enum, ty_err}; use middle::ty::{ty_estr, ty_evec, ty_float, ty_infer, ty_int, ty_nil}; use middle::ty::{ty_opaque_box, ty_param, ty_param_bounds_and_ty, ty_ptr}; use middle::ty::{ty_rptr, ty_self, ty_struct, ty_trait, ty_tup}; @@ -81,7 +81,7 @@ pub fn get_base_type(inference_context: @mut InferCtxt, Some(resolved_type) } - ty_nil | ty_bot | ty_bool | ty_int(*) | ty_uint(*) | ty_float(*) | + ty_nil | ty_bot | ty_bool | ty_char | ty_int(*) | ty_uint(*) | ty_float(*) | ty_estr(*) | ty_evec(*) | ty_bare_fn(*) | ty_closure(*) | ty_tup(*) | ty_infer(*) | ty_param(*) | ty_self(*) | ty_type | ty_opaque_box | ty_opaque_closure_ptr(*) | ty_unboxed_vec(*) | ty_err | ty_box(_) | diff --git a/src/librustc/middle/typeck/infer/combine.rs b/src/librustc/middle/typeck/infer/combine.rs index 2dd7a4e88b1..b04719ee3ce 100644 --- a/src/librustc/middle/typeck/infer/combine.rs +++ b/src/librustc/middle/typeck/infer/combine.rs @@ -610,14 +610,10 @@ pub fn super_tys( vid: ty::IntVid, val: ty::IntVarValue) -> cres { - if val == IntType(ast::ty_char) { - Err(ty::terr_integer_as_char) - } else { - if_ok!(this.infcx().simple_var_t(vid_is_expected, vid, val)); - match val { - IntType(v) => Ok(ty::mk_mach_int(v)), - UintType(v) => Ok(ty::mk_mach_uint(v)) - } + if_ok!(this.infcx().simple_var_t(vid_is_expected, vid, val)); + match val { + IntType(v) => Ok(ty::mk_mach_int(v)), + UintType(v) => Ok(ty::mk_mach_uint(v)) } } diff --git a/src/librustc/util/ppaux.rs b/src/librustc/util/ppaux.rs index 104c3753452..4ce29f39131 100644 --- a/src/librustc/util/ppaux.rs +++ b/src/librustc/util/ppaux.rs @@ -16,7 +16,7 @@ use middle::ty::{br_fresh, ctxt, field}; use middle::ty::{mt, t, param_ty}; use middle::ty::{re_bound, re_free, re_scope, re_infer, re_static, Region, re_empty}; -use middle::ty::{ty_bool, ty_bot, ty_box, ty_struct, ty_enum}; +use middle::ty::{ty_bool, ty_char, ty_bot, ty_box, ty_struct, ty_enum}; use middle::ty::{ty_err, ty_estr, ty_evec, ty_float, ty_bare_fn, ty_closure}; use middle::ty::{ty_nil, ty_opaque_box, ty_opaque_closure_ptr, ty_param}; use middle::ty::{ty_ptr, ty_rptr, ty_self, ty_tup, ty_type, ty_uniq}; @@ -412,8 +412,8 @@ pub fn ty_to_str(cx: ctxt, typ: t) -> ~str { ty_nil => ~"()", ty_bot => ~"!", ty_bool => ~"bool", + ty_char => ~"char", ty_int(ast::ty_i) => ~"int", - ty_int(ast::ty_char) => ~"char", ty_int(t) => ast_util::int_ty_to_str(t), ty_uint(ast::ty_u) => ~"uint", ty_uint(t) => ast_util::uint_ty_to_str(t), diff --git a/src/libstd/char.rs b/src/libstd/char.rs index 87314b98c51..7f043b2ecaa 100644 --- a/src/libstd/char.rs +++ b/src/libstd/char.rs @@ -10,8 +10,9 @@ //! Utilities for manipulating the char type +use cast::transmute; use option::{None, Option, Some}; -use int; +use i32; use str::StrSlice; use unicode::{derived_property, general_category, decompose}; use to_str::ToStr; @@ -64,6 +65,19 @@ static TAG_FOUR_B: uint = 240u; Cn Unassigned a reserved unassigned code point or a noncharacter */ +/// The highest valid code point +pub static MAX: char = '\U0010ffff'; + +/// Convert from `u32` to a character. +pub fn from_u32(i: u32) -> Option { + // catch out-of-bounds and surrogates + if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) { + None + } else { + Some(unsafe { transmute(i) }) + } +} + /// Returns whether the specified character is considered a unicode alphabetic /// character pub fn is_alphabetic(c: char) -> bool { derived_property::Alphabetic(c) } @@ -194,10 +208,12 @@ pub fn from_digit(num: uint, radix: uint) -> Option { fail!("from_digit: radix %? is to high (maximum 36)", num); } if num < radix { - if num < 10 { - Some(('0' as uint + num) as char) - } else { - Some(('a' as uint + num - 10u) as char) + unsafe { + if num < 10 { + Some(transmute(('0' as uint + num) as u32)) + } else { + Some(transmute(('a' as uint + num - 10u) as u32)) + } } } else { None @@ -220,14 +236,16 @@ fn decompose_hangul(s: char, f: &fn(char)) { let si = s as uint - S_BASE; let li = si / N_COUNT; - f((L_BASE + li) as char); + unsafe { + f(transmute((L_BASE + li) as u32)); - let vi = (si % N_COUNT) / T_COUNT; - f((V_BASE + vi) as char); + let vi = (si % N_COUNT) / T_COUNT; + f(transmute((V_BASE + vi) as u32)); - let ti = si % T_COUNT; - if ti > 0 { - f((T_BASE + ti) as char); + let ti = si % T_COUNT; + if ti > 0 { + f(transmute((T_BASE + ti) as u32)); + } } } @@ -267,10 +285,12 @@ pub fn escape_unicode(c: char, f: &fn(char)) { (c <= '\uffff') { f('u'); 4 } _ { f('U'); 8 } ); - do int::range_step(4 * (pad - 1), -1, -4) |offset| { - match ((c as u32) >> offset) & 0xf { - i @ 0 .. 9 => { f('0' + i as char); } - i => { f('a' + (i - 10) as char); } + do i32::range_step(4 * (pad - 1), -1, -4) |offset| { + unsafe { + match ((c as i32) >> offset) & 0xf { + i @ 0 .. 9 => { f(transmute('0' as i32 + i)); } + i => { f(transmute('a' as i32 + (i - 10))); } + } } true }; @@ -416,8 +436,8 @@ impl Ord for char { #[cfg(not(test))] impl Zero for char { - fn zero() -> char { 0 as char } - fn is_zero(&self) -> bool { *self == 0 as char } + fn zero() -> char { '\x00' } + fn is_zero(&self) -> bool { *self == '\x00' } } #[test] diff --git a/src/libstd/io.rs b/src/libstd/io.rs index 25b94e1e45d..59329c5bdd2 100644 --- a/src/libstd/io.rs +++ b/src/libstd/io.rs @@ -47,6 +47,7 @@ implement `Reader` and `Writer`, where appropriate. #[allow(missing_doc)]; use cast; +use cast::transmute; use clone::Clone; use c_str::ToCStr; use container::Container; @@ -661,7 +662,9 @@ impl ReaderUtil for T { i += 1; assert!((w > 0)); if w == 1 { - chars.push(b0 as char); + unsafe { + chars.push(transmute(b0 as u32)); + } loop; } // can't satisfy this char with the existing data @@ -680,7 +683,9 @@ impl ReaderUtil for T { // See str::StrSlice::char_at val += ((b0 << ((w + 1) as u8)) as uint) << (w - 1) * 6 - w - 1u; - chars.push(val as char); + unsafe { + chars.push(transmute(val as u32)); + } } return (i, 0); } @@ -712,7 +717,7 @@ impl ReaderUtil for T { fn read_char(&self) -> char { let c = self.read_chars(1); if c.len() == 0 { - return -1 as char; // FIXME will this stay valid? // #2004 + return unsafe { transmute(-1u32) }; // FIXME: #8971: unsound } assert_eq!(c.len(), 1); return c[0]; @@ -739,9 +744,11 @@ impl ReaderUtil for T { } fn each_char(&self, it: &fn(char) -> bool) -> bool { + // FIXME: #8971: unsound + let eof: char = unsafe { transmute(-1u32) }; loop { match self.read_char() { - eof if eof == (-1 as char) => break, + c if c == eof => break, ch => if !it(ch) { return false; } } } @@ -1896,6 +1903,7 @@ mod tests { use result::{Ok, Err}; use u64; use vec; + use cast::transmute; #[test] fn test_simple() { @@ -2002,7 +2010,7 @@ mod tests { #[test] fn test_readchar() { do io::with_str_reader("生") |inp| { - let res : char = inp.read_char(); + let res = inp.read_char(); assert_eq!(res as int, 29983); } } @@ -2010,8 +2018,8 @@ mod tests { #[test] fn test_readchar_empty() { do io::with_str_reader("") |inp| { - let res : char = inp.read_char(); - assert_eq!(res as int, -1); + let res = inp.read_char(); + assert_eq!(res, unsafe { transmute(-1u32) }); // FIXME: #8971: unsound } } diff --git a/src/libstd/rand.rs b/src/libstd/rand.rs index c7f3fd7740b..58c75d3b408 100644 --- a/src/libstd/rand.rs +++ b/src/libstd/rand.rs @@ -174,13 +174,6 @@ impl Rand for f64 { } } -impl Rand for char { - #[inline] - fn rand(rng: &mut R) -> char { - rng.next() as char - } -} - impl Rand for bool { #[inline] fn rand(rng: &mut R) -> bool { @@ -1137,7 +1130,7 @@ mod test { let _f : f32 = random(); let _o : Option> = random(); let _many : ((), - (~uint, @int, ~Option<~(@char, ~(@bool,))>), + (~uint, @int, ~Option<~(@u32, ~(@bool,))>), (u8, i8, u16, i16, u32, i32, u64, i64), (f32, (f64, (float,)))) = random(); } diff --git a/src/libstd/rt/io/net/ip.rs b/src/libstd/rt/io/net/ip.rs index d1d6b16e2eb..956dd08ac91 100644 --- a/src/libstd/rt/io/net/ip.rs +++ b/src/libstd/rt/io/net/ip.rs @@ -163,13 +163,14 @@ impl<'self> Parser<'self> { // Read digit fn read_digit(&mut self, radix: u8) -> Option { fn parse_digit(c: char, radix: u8) -> Option { + let c = c as u8; // assuming radix is either 10 or 16 - if c >= '0' && c <= '9' { - Some((c - '0') as u8) - } else if radix > 10 && c >= 'a' && c < 'a' + (radix - 10) as char { - Some((c - 'a' + (10 as char)) as u8) - } else if radix > 10 && c >= 'A' && c < 'A' + (radix - 10) as char { - Some((c - 'A' + (10 as char)) as u8) + if c >= '0' as u8 && c <= '9' as u8 { + Some((c - '0' as u8) as u8) + } else if radix > 10 && c >= 'a' as u8 && c < 'a' as u8 + (radix - 10) { + Some((c - 'a' as u8 + 10) as u8) + } else if radix > 10 && c >= 'A' as u8 && c < 'A' as u8 + (radix - 10) { + Some((c - 'A' as u8 + 10) as u8) } else { None } diff --git a/src/libstd/str.rs b/src/libstd/str.rs index e4d1b324e73..6a5ec9aa6d6 100644 --- a/src/libstd/str.rs +++ b/src/libstd/str.rs @@ -17,6 +17,7 @@ use at_vec; use cast; +use cast::transmute; use char; use char::Char; use clone::{Clone, DeepClone}; @@ -875,18 +876,18 @@ pub fn utf16_chars(v: &[u16], f: &fn(char)) { let u = v[i]; if u <= 0xD7FF_u16 || u >= 0xE000_u16 { - f(u as char); + f(unsafe { cast::transmute(u as u32) }); i += 1u; } else { let u2 = v[i+1u]; assert!(u >= 0xD800_u16 && u <= 0xDBFF_u16); assert!(u2 >= 0xDC00_u16 && u2 <= 0xDFFF_u16); - let mut c = (u - 0xD800_u16) as char; + let mut c: u32 = (u - 0xD800_u16) as u32; c = c << 10; - c |= (u2 - 0xDC00_u16) as char; - c |= 0x1_0000_u32 as char; - f(c); + c |= (u2 - 0xDC00_u16) as u32; + c |= 0x1_0000_u32 as u32; + f(unsafe { cast::transmute(c) }); i += 2u; } } @@ -953,7 +954,6 @@ macro_rules! utf8_acc_cont_byte( ) static TAG_CONT_U8: u8 = 128u8; -static MAX_UNICODE: uint = 1114112u; /// Unsafe operations pub mod raw { @@ -1942,7 +1942,7 @@ impl<'self> StrSlice<'self> for &'self str { if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); } if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); } - return CharRange {ch: val as char, next: i + w}; + return CharRange {ch: unsafe { transmute(val as u32) }, next: i + w}; } return multibyte_char_range_at(*self, i); @@ -1980,7 +1980,7 @@ impl<'self> StrSlice<'self> for &'self str { if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); } if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); } - return CharRange {ch: val as char, next: i}; + return CharRange {ch: unsafe { transmute(val as u32) }, next: i}; } return multibyte_char_range_at_rev(*self, prev); @@ -2236,7 +2236,6 @@ impl OwnedStr for ~str { /// Appends a character to the back of a string #[inline] fn push_char(&mut self, c: char) { - assert!((c as uint) < MAX_UNICODE); // FIXME: #7609: should be enforced on all `char` let cur_len = self.len(); self.reserve_at_least(cur_len + 4); // may use up to 4 bytes @@ -2433,8 +2432,6 @@ impl Default for @str { mod tests { use container::Container; use option::{None, Some}; - use libc::c_char; - use libc; use ptr; use str::*; use vec; @@ -3178,13 +3175,6 @@ mod tests { assert!(!"".contains_char('a')); } - #[test] - fn test_map() { - #[fixed_stack_segment]; #[inline(never)]; - assert_eq!(~"", "".map_chars(|c| unsafe {libc::toupper(c as c_char)} as char)); - assert_eq!(~"YMCA", "ymca".map_chars(|c| unsafe {libc::toupper(c as c_char)} as char)); - } - #[test] fn test_utf16() { let pairs = @@ -3903,26 +3893,6 @@ mod bench { } } - #[bench] - fn map_chars_100_ascii(bh: &mut BenchHarness) { - let s = "HelloHelloHelloHelloHelloHelloHelloHelloHelloHello\ - HelloHelloHelloHelloHelloHelloHelloHelloHelloHello"; - do bh.iter { - s.map_chars(|c| ((c as uint) + 1) as char); - } - } - - #[bench] - fn map_chars_100_multibytes(bh: &mut BenchHarness) { - let s = "𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑\ - 𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑\ - 𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑\ - 𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑"; - do bh.iter { - s.map_chars(|c| ((c as uint) + 1) as char); - } - } - #[bench] fn bench_with_capacity(bh: &mut BenchHarness) { do bh.iter { diff --git a/src/libstd/str/ascii.rs b/src/libstd/str/ascii.rs index 57730349e01..4ed969f628e 100644 --- a/src/libstd/str/ascii.rs +++ b/src/libstd/str/ascii.rs @@ -149,7 +149,7 @@ impl AsciiCast for char { #[inline] fn is_ascii(&self) -> bool { - *self - ('\x7F' & *self) == '\x00' + *self as u32 - ('\x7F' as u32 & *self as u32) == 0 } } @@ -380,6 +380,7 @@ static ASCII_UPPER_MAP: &'static [u8] = &[ mod tests { use super::*; use str::from_char; + use char::from_u32; macro_rules! v2ascii ( ( [$($e:expr),*]) => ( [$(Ascii{chr:$e}),*]); @@ -469,9 +470,10 @@ mod tests { let mut i = 0; while i <= 500 { - let c = i as char; - let upper = if 'a' <= c && c <= 'z' { c + 'A' - 'a' } else { c }; - assert_eq!(from_char(i as char).to_ascii_upper(), from_char(upper)) + let upper = if 'a' as u32 <= i && i <= 'z' as u32 { i + 'A' as u32 - 'a' as u32 } + else { i }; + assert_eq!(from_char(from_u32(i).unwrap()).to_ascii_upper(), + from_char(from_u32(upper).unwrap())) i += 1; } } @@ -484,9 +486,10 @@ mod tests { let mut i = 0; while i <= 500 { - let c = i as char; - let lower = if 'A' <= c && c <= 'Z' { c + 'a' - 'A' } else { c }; - assert_eq!(from_char(i as char).to_ascii_lower(), from_char(lower)) + let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 } + else { i }; + assert_eq!(from_char(from_u32(i).unwrap()).to_ascii_lower(), + from_char(from_u32(lower).unwrap())) i += 1; } } @@ -503,9 +506,11 @@ mod tests { let mut i = 0; while i <= 500 { - let c = i as char; - let lower = if 'A' <= c && c <= 'Z' { c + 'a' - 'A' } else { c }; - assert!(from_char(i as char).eq_ignore_ascii_case(from_char(lower))); + let c = i; + let lower = if 'A' as u32 <= c && c <= 'Z' as u32 { c + 'a' as u32 - 'A' as u32 } + else { c }; + assert!(from_char(from_u32(i).unwrap()). + eq_ignore_ascii_case(from_char(from_u32(lower).unwrap()))); i += 1; } } diff --git a/src/libstd/unicode.rs b/src/libstd/unicode.rs index 6d763b58cd1..1b1e4be4ee1 100644 --- a/src/libstd/unicode.rs +++ b/src/libstd/unicode.rs @@ -56,14 +56,6 @@ pub mod general_category { bsearch_range_table(c, Co_table) } - static Cs_table : &'static [(char,char)] = &[ - ('\ud800', '\udfff') - ]; - - pub fn Cs(c: char) -> bool { - bsearch_range_table(c, Cs_table) - } - static Ll_table : &'static [(char,char)] = &[ ('\x61', '\x7a'), ('\xb5', '\xb5'), ('\xdf', '\xf6'), ('\xf8', '\xff'), diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index b411316d626..c7ebc344a9c 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -632,6 +632,7 @@ pub type lit = Spanned; #[deriving(Clone, Eq, Encodable, Decodable, IterBytes)] pub enum lit_ { lit_str(@str), + lit_char(u32), lit_int(i64, int_ty), lit_uint(u64, uint_ty), lit_int_unsuffixed(i64), @@ -680,7 +681,6 @@ pub enum trait_method { #[deriving(Clone, Eq, Encodable, Decodable, IterBytes)] pub enum int_ty { ty_i, - ty_char, ty_i8, ty_i16, ty_i32, @@ -737,6 +737,7 @@ pub enum prim_ty { ty_float(float_ty), ty_str, ty_bool, + ty_char } #[deriving(Clone, Eq, Encodable, Decodable, IterBytes)] diff --git a/src/libsyntax/ast_util.rs b/src/libsyntax/ast_util.rs index 62b8fc687a6..5ee8537750e 100644 --- a/src/libsyntax/ast_util.rs +++ b/src/libsyntax/ast_util.rs @@ -151,7 +151,6 @@ pub fn is_path(e: @Expr) -> bool { pub fn int_ty_to_str(t: int_ty) -> ~str { match t { - ty_char => ~"u8", // ??? ty_i => ~"", ty_i8 => ~"i8", ty_i16 => ~"i16", @@ -164,7 +163,7 @@ pub fn int_ty_max(t: int_ty) -> u64 { match t { ty_i8 => 0x80u64, ty_i16 => 0x8000u64, - ty_i | ty_char | ty_i32 => 0x80000000u64, // actually ni about ty_i + ty_i | ty_i32 => 0x80000000u64, // actually ni about ty_i ty_i64 => 0x8000000000000000u64 } } diff --git a/src/libsyntax/ext/bytes.rs b/src/libsyntax/ext/bytes.rs index faf3e2653b9..cac311b0088 100644 --- a/src/libsyntax/ext/bytes.rs +++ b/src/libsyntax/ext/bytes.rs @@ -16,6 +16,8 @@ use ext::base::*; use ext::base; use ext::build::AstBuilder; +use std::char; + pub fn expand_syntax_ext(cx: @ExtCtxt, sp: Span, tts: &[ast::token_tree]) -> base::MacResult { // Gather all argument expressions let exprs = get_exprs_from_tts(cx, sp, tts); @@ -53,8 +55,8 @@ pub fn expand_syntax_ext(cx: @ExtCtxt, sp: Span, tts: &[ast::token_tree]) -> bas } // char literal, push to vector expression - ast::lit_int(v, ast::ty_char) => { - if (v as char).is_ascii() { + ast::lit_char(v) => { + if char::from_u32(v).unwrap().is_ascii() { bytes.push(cx.expr_u8(sp, v as u8)); } else { cx.span_err(sp, "Non-ascii char literal in bytes!") diff --git a/src/libsyntax/ext/ifmt.rs b/src/libsyntax/ext/ifmt.rs index 213aeee66c7..b63b829a392 100644 --- a/src/libsyntax/ext/ifmt.rs +++ b/src/libsyntax/ext/ifmt.rs @@ -495,8 +495,7 @@ impl Context { // Translate the format let fill = match arg.format.fill { Some(c) => c, None => ' ' }; - let fill = self.ecx.expr_lit(sp, ast::lit_int(fill as i64, - ast::ty_char)); + let fill = self.ecx.expr_lit(sp, ast::lit_char(fill as u32)); let align = match arg.format.align { parse::AlignLeft => { self.ecx.path_global(sp, parsepath("AlignLeft")) diff --git a/src/libsyntax/ext/quote.rs b/src/libsyntax/ext/quote.rs index 8d43872e9c1..4e8b7467c5c 100644 --- a/src/libsyntax/ext/quote.rs +++ b/src/libsyntax/ext/quote.rs @@ -387,10 +387,18 @@ fn mk_token(cx: @ExtCtxt, sp: Span, tok: &token::Token) -> @ast::Expr { ~[mk_binop(cx, sp, binop)]); } + LIT_CHAR(i) => { + let s_ity = ~"ty_char"; + let e_ity = cx.expr_ident(sp, id_ext(s_ity)); + + let e_char = cx.expr_lit(sp, ast::lit_char(i)); + + return cx.expr_call_ident(sp, id_ext("LIT_CHAR"), ~[e_char, e_ity]); + } + LIT_INT(i, ity) => { let s_ity = match ity { ast::ty_i => ~"ty_i", - ast::ty_char => ~"ty_char", ast::ty_i8 => ~"ty_i8", ast::ty_i16 => ~"ty_i16", ast::ty_i32 => ~"ty_i32", diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs index 26d66cfcaab..9dfea678b87 100644 --- a/src/libsyntax/parse/lexer.rs +++ b/src/libsyntax/parse/lexer.rs @@ -17,6 +17,7 @@ use ext::tt::transcribe::{dup_tt_reader}; use parse::token; use parse::token::{str_to_ident}; +use std::cast::transmute; use std::char; use std::either; use std::u64; @@ -184,7 +185,7 @@ pub fn bump(rdr: &mut StringReader) { rdr.last_pos = rdr.pos; let current_byte_offset = byte_offset(rdr, rdr.pos).to_uint(); if current_byte_offset < (rdr.src).len() { - assert!(rdr.curr != -1 as char); + assert!(rdr.curr != unsafe { transmute(-1u32) }); // FIXME: #8971: unsound let last_char = rdr.curr; let next = rdr.src.char_range_at(current_byte_offset); let byte_offset_diff = next.next - current_byte_offset; @@ -201,17 +202,17 @@ pub fn bump(rdr: &mut StringReader) { BytePos(current_byte_offset), byte_offset_diff); } } else { - rdr.curr = -1 as char; + rdr.curr = unsafe { transmute(-1u32) }; // FIXME: #8971: unsound } } pub fn is_eof(rdr: @mut StringReader) -> bool { - rdr.curr == -1 as char + rdr.curr == unsafe { transmute(-1u32) } // FIXME: #8971: unsound } pub fn nextch(rdr: @mut StringReader) -> char { let offset = byte_offset(rdr, rdr.pos).to_uint(); if offset < (rdr.src).len() { return rdr.src.char_at(offset); - } else { return -1 as char; } + } else { return unsafe { transmute(-1u32) }; } // FIXME: #8971: unsound } fn dec_digit_val(c: char) -> int { return (c as int) - ('0' as int); } @@ -532,7 +533,10 @@ fn scan_numeric_escape(rdr: @mut StringReader, n_hex_digits: uint) -> char { accum_int += hex_digit_val(n); i -= 1u; } - return accum_int as char; + match char::from_u32(accum_int as u32) { + Some(x) => x, + None => rdr.fatal(fmt!("illegal numeric character escape")) + } } fn ident_start(c: char) -> bool { @@ -707,7 +711,7 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token { rdr.fatal(~"unterminated character constant"); } bump(rdr); // advance curr past token - return token::LIT_INT(c2 as i64, ast::ty_char); + return token::LIT_CHAR(c2 as u32); } '"' => { let mut accum_str = ~""; @@ -891,21 +895,21 @@ mod test { let env = setup(@"'a'"); let TokenAndSpan {tok, sp: _} = env.string_reader.next_token(); - assert_eq!(tok,token::LIT_INT('a' as i64, ast::ty_char)); + assert_eq!(tok,token::LIT_CHAR('a' as u32)); } #[test] fn character_space() { let env = setup(@"' '"); let TokenAndSpan {tok, sp: _} = env.string_reader.next_token(); - assert_eq!(tok, token::LIT_INT(' ' as i64, ast::ty_char)); + assert_eq!(tok, token::LIT_CHAR(' ' as u32)); } #[test] fn character_escaped() { let env = setup(@"'\n'"); let TokenAndSpan {tok, sp: _} = env.string_reader.next_token(); - assert_eq!(tok, token::LIT_INT('\n' as i64, ast::ty_char)); + assert_eq!(tok, token::LIT_CHAR('\n' as u32)); } #[test] fn lifetime_name() { diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index d40a71a80e4..51c5522ae2f 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -37,7 +37,7 @@ use ast::{foreign_item, foreign_item_static, foreign_item_fn, foreign_mod}; use ast::{Ident, impure_fn, inherited, item, item_, item_static}; use ast::{item_enum, item_fn, item_foreign_mod, item_impl}; use ast::{item_mac, item_mod, item_struct, item_trait, item_ty, lit, lit_}; -use ast::{lit_bool, lit_float, lit_float_unsuffixed, lit_int}; +use ast::{lit_bool, lit_float, lit_float_unsuffixed, lit_int, lit_char}; use ast::{lit_int_unsuffixed, lit_nil, lit_str, lit_uint, Local}; use ast::{MutImmutable, MutMutable, mac_, mac_invoc_tt, matcher, match_nonterminal}; use ast::{match_seq, match_tok, method, mt, BiMul, Mutability}; @@ -1334,6 +1334,7 @@ impl Parser { // matches token_lit = LIT_INT | ... pub fn lit_from_token(&self, tok: &token::Token) -> lit_ { match *tok { + token::LIT_CHAR(i) => lit_char(i), token::LIT_INT(i, it) => lit_int(i, it), token::LIT_UINT(u, ut) => lit_uint(u, ut), token::LIT_INT_UNSUFFIXED(i) => lit_int_unsuffixed(i), diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index 1092b2ddf57..591b4b10bd3 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -15,6 +15,7 @@ use parse::token; use util::interner::StrInterner; use util::interner; +use std::char; use std::cmp::Equiv; use std::local_data; use std::rand; @@ -73,6 +74,7 @@ pub enum Token { DOLLAR, /* Literals */ + LIT_CHAR(u32), LIT_INT(i64, ast::int_ty), LIT_UINT(u64, ast::uint_ty), LIT_INT_UNSUFFIXED(i64), @@ -164,9 +166,9 @@ pub fn to_str(input: @ident_interner, t: &Token) -> ~str { DOLLAR => ~"$", /* Literals */ - LIT_INT(c, ast::ty_char) => { + LIT_CHAR(c) => { let mut res = ~"'"; - do (c as char).escape_default |c| { + do char::from_u32(c).unwrap().escape_default |c| { res.push_char(c); } res.push_char('\''); @@ -236,6 +238,7 @@ pub fn can_begin_expr(t: &Token) -> bool { IDENT(_, _) => true, UNDERSCORE => true, TILDE => true, + LIT_CHAR(_) => true, LIT_INT(_, _) => true, LIT_UINT(_, _) => true, LIT_INT_UNSUFFIXED(_) => true, @@ -276,6 +279,7 @@ pub fn flip_delimiter(t: &token::Token) -> token::Token { pub fn is_lit(t: &Token) -> bool { match *t { + LIT_CHAR(_) => true, LIT_INT(_, _) => true, LIT_UINT(_, _) => true, LIT_INT_UNSUFFIXED(_) => true, diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs index e1a0e1bf08d..32cf30fd3a0 100644 --- a/src/libsyntax/print/pprust.rs +++ b/src/libsyntax/print/pprust.rs @@ -27,6 +27,7 @@ use print::pp::{breaks, consistent, inconsistent, eof}; use print::pp; use print::pprust; +use std::char; use std::io; // The @ps is stored here to prevent recursive type. @@ -2044,9 +2045,9 @@ pub fn print_literal(s: @ps, lit: &ast::lit) { } match lit.node { ast::lit_str(st) => print_string(s, st), - ast::lit_int(ch, ast::ty_char) => { + ast::lit_char(ch) => { let mut res = ~"'"; - do (ch as char).escape_default |c| { + do char::from_u32(ch).unwrap().escape_default |c| { res.push_char(c); } res.push_char('\''); diff --git a/src/test/run-pass/binops.rs b/src/test/run-pass/binops.rs index eb0f04b8b7d..333794e98bf 100644 --- a/src/test/run-pass/binops.rs +++ b/src/test/run-pass/binops.rs @@ -42,22 +42,6 @@ fn test_bool() { assert_eq!(true ^ true, false); } -fn test_char() { - let ch10 = 10 as char; - let ch4 = 4 as char; - let ch2 = 2 as char; - assert_eq!(ch10 + ch4, 14 as char); - assert_eq!(ch10 - ch4, 6 as char); - assert_eq!(ch10 * ch4, 40 as char); - assert_eq!(ch10 / ch4, ch2); - assert_eq!(ch10 % ch4, ch2); - assert_eq!(ch10 >> ch2, ch2); - assert_eq!(ch10 << ch4, 160 as char); - assert_eq!(ch10 | ch4, 14 as char); - assert_eq!(ch10 & ch2, ch2); - assert_eq!(ch10 ^ ch2, 8 as char); -} - fn test_box() { assert_eq!(@10, @10); } @@ -111,7 +95,6 @@ fn test_class() { pub fn main() { test_nil(); test_bool(); - test_char(); test_box(); test_ptr(); test_class(); diff --git a/src/test/run-pass/cast.rs b/src/test/run-pass/cast.rs index 10407420a43..00d3155cd98 100644 --- a/src/test/run-pass/cast.rs +++ b/src/test/run-pass/cast.rs @@ -8,9 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. - - - // -*- rust -*- pub fn main() { let i: int = 'Q' as int; @@ -20,7 +17,7 @@ pub fn main() { assert_eq!(u, 'Q' as u32); assert_eq!(i as u8, 'Q' as u8); assert_eq!(i as u8 as i8, 'Q' as u8 as i8); - assert_eq!(0x51 as char, 'Q'); + assert_eq!(0x51u8 as char, 'Q'); assert_eq!(true, 1 as bool); assert_eq!(0 as u32, false as u32); } diff --git a/src/test/run-pass/shift.rs b/src/test/run-pass/shift.rs index 7b676f05c90..945bb885ad1 100644 --- a/src/test/run-pass/shift.rs +++ b/src/test/run-pass/shift.rs @@ -18,7 +18,7 @@ pub fn main() { } fn test_misc() { - assert_eq!(1 << 1i8 << 1u8 << 1i16 << 1 as char << 1u64, 32); + assert_eq!(1 << 1i8 << 1u8 << 1i16 << 1u8 << 1u64, 32); } fn test_expr() { diff --git a/src/test/run-pass/utf8_chars.rs b/src/test/run-pass/utf8_chars.rs index 881606d673c..db258d48f9f 100644 --- a/src/test/run-pass/utf8_chars.rs +++ b/src/test/run-pass/utf8_chars.rs @@ -14,7 +14,7 @@ use std::str; pub fn main() { // Chars of 1, 2, 3, and 4 bytes - let chs: ~[char] = ~['e', 'é', '€', 0x10000 as char]; + let chs: ~[char] = ~['e', 'é', '€', '\U00010000']; let s: ~str = str::from_chars(chs); let schs: ~[char] = s.iter().collect();