auto merge of #14880 : SimonSapin/rust/byte-literals, r=alexcrichton
See #14646 (tracking issue) and rust-lang/rfcs#69. This does not close the tracking issue, as the `bytes!()` macro still needs to be removed. It will be later, after a snapshot is made with the changes in this PR, so that the new syntax can be used when bootstrapping the compiler.
This commit is contained in:
commit
d6736a1440
@ -234,7 +234,7 @@ rule. A literal is a form of constant expression, so is evaluated (primarily)
|
||||
at compile time.
|
||||
|
||||
~~~~ {.ebnf .gram}
|
||||
literal : string_lit | char_lit | num_lit ;
|
||||
literal : string_lit | char_lit | byte_string_lit | byte_lit | num_lit ;
|
||||
~~~~
|
||||
|
||||
#### Character and string literals
|
||||
@ -244,17 +244,17 @@ char_lit : '\x27' char_body '\x27' ;
|
||||
string_lit : '"' string_body * '"' | 'r' raw_string ;
|
||||
|
||||
char_body : non_single_quote
|
||||
| '\x5c' [ '\x27' | common_escape ] ;
|
||||
| '\x5c' [ '\x27' | common_escape | unicode_escape ] ;
|
||||
|
||||
string_body : non_double_quote
|
||||
| '\x5c' [ '\x22' | common_escape ] ;
|
||||
| '\x5c' [ '\x22' | common_escape | unicode_escape ] ;
|
||||
raw_string : '"' raw_string_body '"' | '#' raw_string '#' ;
|
||||
|
||||
common_escape : '\x5c'
|
||||
| 'n' | 'r' | 't' | '0'
|
||||
| 'x' hex_digit 2
|
||||
| 'u' hex_digit 4
|
||||
| 'U' hex_digit 8 ;
|
||||
unicode_escape : 'u' hex_digit 4
|
||||
| 'U' hex_digit 8 ;
|
||||
|
||||
hex_digit : 'a' | 'b' | 'c' | 'd' | 'e' | 'f'
|
||||
| 'A' | 'B' | 'C' | 'D' | 'E' | 'F'
|
||||
@ -294,7 +294,7 @@ the following forms:
|
||||
escaped in order to denote *itself*.
|
||||
|
||||
Raw string literals do not process any escapes. They start with the character
|
||||
`U+0072` (`r`), followed zero or more of the character `U+0023` (`#`) and a
|
||||
`U+0072` (`r`), followed by zero or more of the character `U+0023` (`#`) and a
|
||||
`U+0022` (double-quote) character. The _raw string body_ is not defined in the
|
||||
EBNF grammar above: it can contain any sequence of Unicode characters and is
|
||||
terminated only by another `U+0022` (double-quote) character, followed by the
|
||||
@ -319,6 +319,65 @@ r##"foo #"# bar"##; // foo #"# bar
|
||||
"\\x52"; r"\x52"; // \x52
|
||||
~~~~
|
||||
|
||||
#### Byte and byte string literals
|
||||
|
||||
~~~~ {.ebnf .gram}
|
||||
byte_lit : 'b' '\x27' byte_body '\x27' ;
|
||||
byte_string_lit : 'b' '"' string_body * '"' | 'b' 'r' raw_byte_string ;
|
||||
|
||||
byte_body : ascii_non_single_quote
|
||||
| '\x5c' [ '\x27' | common_escape ] ;
|
||||
|
||||
byte_string_body : ascii_non_double_quote
|
||||
| '\x5c' [ '\x22' | common_escape ] ;
|
||||
raw_byte_string : '"' raw_byte_string_body '"' | '#' raw_byte_string '#' ;
|
||||
|
||||
~~~~
|
||||
|
||||
A _byte literal_ is a single ASCII character (in the `U+0000` to `U+007F` range)
|
||||
enclosed within two `U+0027` (single-quote) characters,
|
||||
with the exception of `U+0027` itself,
|
||||
which must be _escaped_ by a preceding U+005C character (`\`),
|
||||
or a single _escape_.
|
||||
It is equivalent to a `u8` unsigned 8-bit integer _number literal_.
|
||||
|
||||
A _byte string literal_ is a sequence of ASCII characters and _escapes_
|
||||
enclosed within two `U+0022` (double-quote) characters,
|
||||
with the exception of `U+0022` itself,
|
||||
which must be _escaped_ by a preceding `U+005C` character (`\`),
|
||||
or a _raw byte string literal_.
|
||||
It is equivalent to a `&'static [u8]` borrowed vectior unsigned 8-bit integers.
|
||||
|
||||
Some additional _escapes_ are available in either byte or non-raw byte string
|
||||
literals. An escape starts with a `U+005C` (`\`) and continues with one of
|
||||
the following forms:
|
||||
|
||||
* An _byte escape_ escape starts with `U+0078` (`x`) and is
|
||||
followed by exactly two _hex digits_. It denotes the byte
|
||||
equal to the provided hex value.
|
||||
* A _whitespace escape_ is one of the characters `U+006E` (`n`), `U+0072`
|
||||
(`r`), or `U+0074` (`t`), denoting the bytes values `0x0A` (ASCII LF),
|
||||
`0x0D` (ASCII CR) or `0x09` (ASCII HT) respectively.
|
||||
* The _backslash escape_ is the character `U+005C` (`\`) which must be
|
||||
escaped in order to denote its ASCII encoding `0x5C`.
|
||||
|
||||
Raw byte string literals do not process any escapes.
|
||||
They start with the character `U+0072` (`r`),
|
||||
followed by `U+0062` (`b`),
|
||||
followed by zero or more of the character `U+0023` (`#`),
|
||||
and a `U+0022` (double-quote) character.
|
||||
The _raw string body_ is not defined in the EBNF grammar above:
|
||||
it can contain any sequence of ASCII characters and is
|
||||
terminated only by another `U+0022` (double-quote) character, followed by the
|
||||
same number of `U+0023` (`#`) characters that preceded the opening `U+0022`
|
||||
(double-quote) character.
|
||||
A raw byte string literal can not contain any non-ASCII byte.
|
||||
|
||||
All characters contained in the raw string body represent their ASCII encoding,
|
||||
the characters `U+0022` (double-quote) (except when followed by at least as
|
||||
many `U+0023` (`#`) characters as were used to start the raw string literal) or
|
||||
`U+005C` (`\`) do not have any special meaning.
|
||||
|
||||
#### Number literals
|
||||
|
||||
~~~~ {.ebnf .gram}
|
||||
|
@ -560,6 +560,8 @@ Section: Comparing strings
|
||||
|
||||
// share the implementation of the lang-item vs. non-lang-item
|
||||
// eq_slice.
|
||||
/// NOTE: This function is (ab)used in rustc::middle::trans::_match
|
||||
/// to compare &[u8] byte slices that are not necessarily valid UTF-8.
|
||||
#[inline]
|
||||
fn eq_slice_(a: &str, b: &str) -> bool {
|
||||
#[allow(ctypes)]
|
||||
@ -572,6 +574,8 @@ fn eq_slice_(a: &str, b: &str) -> bool {
|
||||
}
|
||||
|
||||
/// Bytewise slice equality
|
||||
/// NOTE: This function is (ab)used in rustc::middle::trans::_match
|
||||
/// to compare &[u8] byte slices that are not necessarily valid UTF-8.
|
||||
#[cfg(not(test))]
|
||||
#[lang="str_eq"]
|
||||
#[inline]
|
||||
|
@ -182,7 +182,7 @@ fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str,
|
||||
#[allow(unused_variable)]
|
||||
fn run(&mut self, start: uint, end: uint) -> Vec<Option<uint>> {
|
||||
let mut matched = false;
|
||||
let prefix_bytes: &[u8] = &$prefix_bytes;
|
||||
let prefix_bytes: &[u8] = $prefix_bytes;
|
||||
let mut clist = &mut Threads::new(self.which);
|
||||
let mut nlist = &mut Threads::new(self.which);
|
||||
|
||||
|
@ -506,6 +506,7 @@ pub fn lit_to_const(lit: &Lit) -> const_val {
|
||||
LitBinary(ref data) => {
|
||||
const_binary(Rc::new(data.iter().map(|x| *x).collect()))
|
||||
}
|
||||
LitByte(n) => const_uint(n as u64),
|
||||
LitChar(n) => const_uint(n as u64),
|
||||
LitInt(n, _) => const_int(n),
|
||||
LitUint(n, _) => const_uint(n),
|
||||
@ -528,6 +529,7 @@ pub fn compare_const_vals(a: &const_val, b: &const_val) -> Option<int> {
|
||||
(&const_float(a), &const_float(b)) => compare_vals(a, b),
|
||||
(&const_str(ref a), &const_str(ref b)) => compare_vals(a, b),
|
||||
(&const_bool(a), &const_bool(b)) => compare_vals(a, b),
|
||||
(&const_binary(ref a), &const_binary(ref b)) => compare_vals(a, b),
|
||||
_ => None
|
||||
}
|
||||
}
|
||||
|
@ -805,6 +805,7 @@ fn check_type_limits(cx: &Context, e: &ast::Expr) {
|
||||
} else { t };
|
||||
let (min, max) = uint_ty_range(uint_type);
|
||||
let lit_val: u64 = match lit.node {
|
||||
ast::LitByte(_v) => return, // _v is u8, within range by definition
|
||||
ast::LitInt(v, _) => v as u64,
|
||||
ast::LitUint(v, _) => v,
|
||||
ast::LitIntUnsuffixed(v) => v as u64,
|
||||
|
@ -1273,13 +1273,24 @@ fn compare_values<'a>(
|
||||
val: bool_to_i1(result.bcx, result.val)
|
||||
}
|
||||
}
|
||||
_ => cx.sess().bug("only scalars and strings supported in compare_values"),
|
||||
_ => cx.sess().bug("only strings supported in compare_values"),
|
||||
},
|
||||
ty::ty_rptr(_, mt) => match ty::get(mt.ty).sty {
|
||||
ty::ty_str => compare_str(cx, lhs, rhs, rhs_t),
|
||||
_ => cx.sess().bug("only scalars and strings supported in compare_values"),
|
||||
ty::ty_vec(mt, _) => match ty::get(mt.ty).sty {
|
||||
ty::ty_uint(ast::TyU8) => {
|
||||
// NOTE: cast &[u8] to &str and abuse the str_eq lang item,
|
||||
// which calls memcmp().
|
||||
let t = ty::mk_str_slice(cx.tcx(), ty::ReStatic, ast::MutImmutable);
|
||||
let lhs = BitCast(cx, lhs, type_of::type_of(cx.ccx(), t).ptr_to());
|
||||
let rhs = BitCast(cx, rhs, type_of::type_of(cx.ccx(), t).ptr_to());
|
||||
compare_str(cx, lhs, rhs, rhs_t)
|
||||
},
|
||||
_ => cx.sess().bug("only byte strings supported in compare_values"),
|
||||
},
|
||||
_ => cx.sess().bug("on string and byte strings supported in compare_values"),
|
||||
},
|
||||
_ => cx.sess().bug("only scalars and strings supported in compare_values"),
|
||||
_ => cx.sess().bug("only scalars, byte strings, and strings supported in compare_values"),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -43,6 +43,7 @@ pub fn const_lit(cx: &CrateContext, e: &ast::Expr, lit: ast::Lit)
|
||||
-> ValueRef {
|
||||
let _icx = push_ctxt("trans_lit");
|
||||
match lit.node {
|
||||
ast::LitByte(b) => C_integral(Type::uint_from_ty(cx, ast::TyU8), b as u64, false),
|
||||
ast::LitChar(i) => C_integral(Type::char(cx), i as u64, false),
|
||||
ast::LitInt(i, t) => C_integral(Type::int_from_ty(cx, t), i as u64, true),
|
||||
ast::LitUint(u, t) => C_integral(Type::uint_from_ty(cx, t), u, false),
|
||||
|
@ -1715,6 +1715,7 @@ pub fn check_lit(fcx: &FnCtxt, lit: &ast::Lit) -> ty::t {
|
||||
ast::LitBinary(..) => {
|
||||
ty::mk_slice(tcx, ty::ReStatic, ty::mt{ ty: ty::mk_u8(), mutbl: ast::MutImmutable })
|
||||
}
|
||||
ast::LitByte(_) => ty::mk_u8(),
|
||||
ast::LitChar(_) => ty::mk_char(),
|
||||
ast::LitInt(_, t) => ty::mk_mach_int(t),
|
||||
ast::LitUint(_, t) => ty::mk_mach_uint(t),
|
||||
|
@ -1924,6 +1924,14 @@ fn lit_to_str(lit: &ast::Lit) -> String {
|
||||
match lit.node {
|
||||
ast::LitStr(ref st, _) => st.get().to_string(),
|
||||
ast::LitBinary(ref data) => format!("{:?}", data.as_slice()),
|
||||
ast::LitByte(b) => {
|
||||
let mut res = String::from_str("b'");
|
||||
(b as char).escape_default(|c| {
|
||||
res.push_char(c);
|
||||
});
|
||||
res.push_char('\'');
|
||||
res
|
||||
},
|
||||
ast::LitChar(c) => format!("'{}'", c),
|
||||
ast::LitInt(i, _t) => i.to_str(),
|
||||
ast::LitUint(u, _t) => u.to_str(),
|
||||
|
@ -140,7 +140,8 @@ fn doit(sess: &parse::ParseSess, mut lexer: lexer::StringReader,
|
||||
}
|
||||
|
||||
// text literals
|
||||
t::LIT_CHAR(..) | t::LIT_STR(..) | t::LIT_STR_RAW(..) => "string",
|
||||
t::LIT_BYTE(..) | t::LIT_BINARY(..) | t::LIT_BINARY_RAW(..) |
|
||||
t::LIT_CHAR(..) | t::LIT_STR(..) | t::LIT_STR_RAW(..) => "string",
|
||||
|
||||
// number literals
|
||||
t::LIT_INT(..) | t::LIT_UINT(..) | t::LIT_INT_UNSUFFIXED(..) |
|
||||
|
@ -616,6 +616,7 @@ pub type Lit = Spanned<Lit_>;
|
||||
pub enum Lit_ {
|
||||
LitStr(InternedString, StrStyle),
|
||||
LitBinary(Rc<Vec<u8> >),
|
||||
LitByte(u8),
|
||||
LitChar(char),
|
||||
LitInt(i64, IntTy),
|
||||
LitUint(u64, UintTy),
|
||||
|
@ -47,6 +47,7 @@ pub fn expand_syntax_ext(cx: &mut base::ExtCtxt,
|
||||
ast::LitBool(b) => {
|
||||
accumulator.push_str(format!("{}", b).as_slice());
|
||||
}
|
||||
ast::LitByte(..) |
|
||||
ast::LitBinary(..) => {
|
||||
cx.span_err(e.span, "cannot concatenate a binary literal");
|
||||
}
|
||||
|
@ -436,6 +436,12 @@ fn mk_token(cx: &ExtCtxt, sp: Span, tok: &token::Token) -> Gc<ast::Expr> {
|
||||
vec!(mk_binop(cx, sp, binop)));
|
||||
}
|
||||
|
||||
LIT_BYTE(i) => {
|
||||
let e_byte = cx.expr_lit(sp, ast::LitByte(i));
|
||||
|
||||
return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_BYTE"), vec!(e_byte));
|
||||
}
|
||||
|
||||
LIT_CHAR(i) => {
|
||||
let e_char = cx.expr_lit(sp, ast::LitChar(i));
|
||||
|
||||
|
@ -636,6 +636,67 @@ impl<'a> StringReader<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Scan for a single (possibly escaped) byte or char
|
||||
/// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
|
||||
/// `start` is the position of `first_source_char`, which is already consumed.
|
||||
fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
|
||||
ascii_only: bool, delim: char) -> Option<char> {
|
||||
match first_source_char {
|
||||
'\\' => {
|
||||
// '\X' for some X must be a character constant:
|
||||
let escaped = self.curr;
|
||||
let escaped_pos = self.last_pos;
|
||||
self.bump();
|
||||
match escaped {
|
||||
None => {}, // EOF here is an error that will be checked later.
|
||||
Some(e) => {
|
||||
return Some(match e {
|
||||
'n' => '\n',
|
||||
'r' => '\r',
|
||||
't' => '\t',
|
||||
'\\' => '\\',
|
||||
'\'' => '\'',
|
||||
'"' => '"',
|
||||
'0' => '\x00',
|
||||
'x' => self.scan_numeric_escape(2u, delim),
|
||||
'u' if !ascii_only => self.scan_numeric_escape(4u, delim),
|
||||
'U' if !ascii_only => self.scan_numeric_escape(8u, delim),
|
||||
'\n' if delim == '"' => {
|
||||
self.consume_whitespace();
|
||||
return None
|
||||
},
|
||||
c => {
|
||||
let last_pos = self.last_pos;
|
||||
self.err_span_char(
|
||||
escaped_pos, last_pos,
|
||||
if ascii_only { "unknown byte escape" }
|
||||
else { "unknown character escape" },
|
||||
c);
|
||||
c
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
'\t' | '\n' | '\r' | '\'' if delim == '\'' => {
|
||||
let last_pos = self.last_pos;
|
||||
self.err_span_char(
|
||||
start, last_pos,
|
||||
if ascii_only { "byte constant must be escaped" }
|
||||
else { "character constant must be escaped" },
|
||||
first_source_char);
|
||||
}
|
||||
_ => if ascii_only && first_source_char > '\x7F' {
|
||||
let last_pos = self.last_pos;
|
||||
self.err_span_char(
|
||||
start, last_pos,
|
||||
"byte constant must be ASCII. \
|
||||
Use a \\xHH escape for a non-ASCII byte", first_source_char);
|
||||
}
|
||||
}
|
||||
Some(first_source_char)
|
||||
}
|
||||
|
||||
fn binop(&mut self, op: token::BinOp) -> token::Token {
|
||||
self.bump();
|
||||
if self.curr_is('=') {
|
||||
@ -650,10 +711,15 @@ impl<'a> StringReader<'a> {
|
||||
/// token, and updates the interner
|
||||
fn next_token_inner(&mut self) -> token::Token {
|
||||
let c = self.curr;
|
||||
if ident_start(c) && !self.nextch_is('"') && !self.nextch_is('#') {
|
||||
if ident_start(c) && match (c.unwrap(), self.nextch(), self.nextnextch()) {
|
||||
// Note: r as in r" or r#" is part of a raw string literal,
|
||||
// not an identifier, and is handled further down.
|
||||
|
||||
// b as in b' is part of a byte literal.
|
||||
// They are not identifiers, and are handled further down.
|
||||
('r', Some('"'), _) | ('r', Some('#'), _) |
|
||||
('b', Some('"'), _) | ('b', Some('\''), _) |
|
||||
('b', Some('r'), Some('"')) | ('b', Some('r'), Some('#')) => false,
|
||||
_ => true
|
||||
} {
|
||||
let start = self.last_pos;
|
||||
while ident_continue(self.curr) {
|
||||
self.bump();
|
||||
@ -805,43 +871,7 @@ impl<'a> StringReader<'a> {
|
||||
}
|
||||
|
||||
// Otherwise it is a character constant:
|
||||
match c2 {
|
||||
'\\' => {
|
||||
// '\X' for some X must be a character constant:
|
||||
let escaped = self.curr;
|
||||
let escaped_pos = self.last_pos;
|
||||
self.bump();
|
||||
match escaped {
|
||||
None => {}
|
||||
Some(e) => {
|
||||
c2 = match e {
|
||||
'n' => '\n',
|
||||
'r' => '\r',
|
||||
't' => '\t',
|
||||
'\\' => '\\',
|
||||
'\'' => '\'',
|
||||
'"' => '"',
|
||||
'0' => '\x00',
|
||||
'x' => self.scan_numeric_escape(2u, '\''),
|
||||
'u' => self.scan_numeric_escape(4u, '\''),
|
||||
'U' => self.scan_numeric_escape(8u, '\''),
|
||||
c2 => {
|
||||
let last_bpos = self.last_pos;
|
||||
self.err_span_char(escaped_pos, last_bpos,
|
||||
"unknown character escape", c2);
|
||||
c2
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
'\t' | '\n' | '\r' | '\'' => {
|
||||
let last_bpos = self.last_pos;
|
||||
self.err_span_char( start, last_bpos,
|
||||
"character constant must be escaped", c2);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
c2 = self.scan_char_or_byte(start, c2, /* ascii_only = */ false, '\'').unwrap();
|
||||
if !self.curr_is('\'') {
|
||||
let last_bpos = self.last_pos;
|
||||
self.fatal_span_verbose(
|
||||
@ -854,6 +884,112 @@ impl<'a> StringReader<'a> {
|
||||
self.bump(); // advance curr past token
|
||||
return token::LIT_CHAR(c2);
|
||||
}
|
||||
'b' => {
|
||||
self.bump();
|
||||
return match self.curr {
|
||||
Some('\'') => parse_byte(self),
|
||||
Some('"') => parse_byte_string(self),
|
||||
Some('r') => parse_raw_byte_string(self),
|
||||
_ => unreachable!() // Should have been a token::IDENT above.
|
||||
};
|
||||
|
||||
fn parse_byte(self_: &mut StringReader) -> token::Token {
|
||||
self_.bump();
|
||||
let start = self_.last_pos;
|
||||
|
||||
// the eof will be picked up by the final `'` check below
|
||||
let mut c2 = self_.curr.unwrap_or('\x00');
|
||||
self_.bump();
|
||||
|
||||
c2 = self_.scan_char_or_byte(start, c2, /* ascii_only = */ true, '\'').unwrap();
|
||||
if !self_.curr_is('\'') {
|
||||
// Byte offsetting here is okay because the
|
||||
// character before position `start` are an
|
||||
// ascii single quote and ascii 'b'.
|
||||
let last_pos = self_.last_pos;
|
||||
self_.fatal_span_verbose(
|
||||
start - BytePos(2), last_pos,
|
||||
"unterminated byte constant".to_string());
|
||||
}
|
||||
self_.bump(); // advance curr past token
|
||||
return token::LIT_BYTE(c2 as u8);
|
||||
}
|
||||
|
||||
fn parse_byte_string(self_: &mut StringReader) -> token::Token {
|
||||
self_.bump();
|
||||
let start = self_.last_pos;
|
||||
let mut value = Vec::new();
|
||||
while !self_.curr_is('"') {
|
||||
if self_.is_eof() {
|
||||
let last_pos = self_.last_pos;
|
||||
self_.fatal_span(start, last_pos,
|
||||
"unterminated double quote byte string");
|
||||
}
|
||||
|
||||
let ch_start = self_.last_pos;
|
||||
let ch = self_.curr.unwrap();
|
||||
self_.bump();
|
||||
self_.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"')
|
||||
.map(|ch| value.push(ch as u8));
|
||||
}
|
||||
self_.bump();
|
||||
return token::LIT_BINARY(Rc::new(value));
|
||||
}
|
||||
|
||||
fn parse_raw_byte_string(self_: &mut StringReader) -> token::Token {
|
||||
let start_bpos = self_.last_pos;
|
||||
self_.bump();
|
||||
let mut hash_count = 0u;
|
||||
while self_.curr_is('#') {
|
||||
self_.bump();
|
||||
hash_count += 1;
|
||||
}
|
||||
|
||||
if self_.is_eof() {
|
||||
let last_pos = self_.last_pos;
|
||||
self_.fatal_span(start_bpos, last_pos, "unterminated raw string");
|
||||
} else if !self_.curr_is('"') {
|
||||
let last_pos = self_.last_pos;
|
||||
let ch = self_.curr.unwrap();
|
||||
self_.fatal_span_char(start_bpos, last_pos,
|
||||
"only `#` is allowed in raw string delimitation; \
|
||||
found illegal character",
|
||||
ch);
|
||||
}
|
||||
self_.bump();
|
||||
let content_start_bpos = self_.last_pos;
|
||||
let mut content_end_bpos;
|
||||
'outer: loop {
|
||||
match self_.curr {
|
||||
None => {
|
||||
let last_pos = self_.last_pos;
|
||||
self_.fatal_span(start_bpos, last_pos, "unterminated raw string")
|
||||
},
|
||||
Some('"') => {
|
||||
content_end_bpos = self_.last_pos;
|
||||
for _ in range(0, hash_count) {
|
||||
self_.bump();
|
||||
if !self_.curr_is('#') {
|
||||
continue 'outer;
|
||||
}
|
||||
}
|
||||
break;
|
||||
},
|
||||
Some(c) => if c > '\x7F' {
|
||||
let last_pos = self_.last_pos;
|
||||
self_.err_span_char(
|
||||
last_pos, last_pos, "raw byte string must be ASCII", c);
|
||||
}
|
||||
}
|
||||
self_.bump();
|
||||
}
|
||||
self_.bump();
|
||||
let bytes = self_.with_str_from_to(content_start_bpos,
|
||||
content_end_bpos,
|
||||
|s| s.as_bytes().to_owned());
|
||||
return token::LIT_BINARY_RAW(Rc::new(bytes), hash_count);
|
||||
}
|
||||
}
|
||||
'"' => {
|
||||
let mut accum_str = String::new();
|
||||
let start_bpos = self.last_pos;
|
||||
@ -864,46 +1000,11 @@ impl<'a> StringReader<'a> {
|
||||
self.fatal_span(start_bpos, last_bpos, "unterminated double quote string");
|
||||
}
|
||||
|
||||
let ch_start = self.last_pos;
|
||||
let ch = self.curr.unwrap();
|
||||
self.bump();
|
||||
match ch {
|
||||
'\\' => {
|
||||
if self.is_eof() {
|
||||
let last_bpos = self.last_pos;
|
||||
self.fatal_span(start_bpos, last_bpos,
|
||||
"unterminated double quote string");
|
||||
}
|
||||
|
||||
let escaped = self.curr.unwrap();
|
||||
let escaped_pos = self.last_pos;
|
||||
self.bump();
|
||||
match escaped {
|
||||
'n' => accum_str.push_char('\n'),
|
||||
'r' => accum_str.push_char('\r'),
|
||||
't' => accum_str.push_char('\t'),
|
||||
'\\' => accum_str.push_char('\\'),
|
||||
'\'' => accum_str.push_char('\''),
|
||||
'"' => accum_str.push_char('"'),
|
||||
'\n' => self.consume_whitespace(),
|
||||
'0' => accum_str.push_char('\x00'),
|
||||
'x' => {
|
||||
accum_str.push_char(self.scan_numeric_escape(2u, '"'));
|
||||
}
|
||||
'u' => {
|
||||
accum_str.push_char(self.scan_numeric_escape(4u, '"'));
|
||||
}
|
||||
'U' => {
|
||||
accum_str.push_char(self.scan_numeric_escape(8u, '"'));
|
||||
}
|
||||
c2 => {
|
||||
let last_bpos = self.last_pos;
|
||||
self.err_span_char(escaped_pos, last_bpos,
|
||||
"unknown string escape", c2);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => accum_str.push_char(ch)
|
||||
}
|
||||
self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ false, '"')
|
||||
.map(|ch| accum_str.push_char(ch));
|
||||
}
|
||||
self.bump();
|
||||
return token::LIT_STR(str_to_ident(accum_str.as_slice()));
|
||||
|
@ -33,7 +33,7 @@ use ast::{ForeignItem, ForeignItemStatic, ForeignItemFn, ForeignMod};
|
||||
use ast::{Ident, NormalFn, Inherited, Item, Item_, ItemStatic};
|
||||
use ast::{ItemEnum, ItemFn, ItemForeignMod, ItemImpl};
|
||||
use ast::{ItemMac, ItemMod, ItemStruct, ItemTrait, ItemTy, Lit, Lit_};
|
||||
use ast::{LitBool, LitFloat, LitFloatUnsuffixed, LitInt, LitChar};
|
||||
use ast::{LitBool, LitFloat, LitFloatUnsuffixed, LitInt, LitChar, LitByte, LitBinary};
|
||||
use ast::{LitIntUnsuffixed, LitNil, LitStr, LitUint, Local, LocalLet};
|
||||
use ast::{MutImmutable, MutMutable, Mac_, MacInvocTT, Matcher, MatchNonterminal};
|
||||
use ast::{MatchSeq, MatchTok, Method, MutTy, BiMul, Mutability};
|
||||
@ -1512,6 +1512,7 @@ impl<'a> Parser<'a> {
|
||||
// matches token_lit = LIT_INT | ...
|
||||
pub fn lit_from_token(&mut self, tok: &token::Token) -> Lit_ {
|
||||
match *tok {
|
||||
token::LIT_BYTE(i) => LitByte(i),
|
||||
token::LIT_CHAR(i) => LitChar(i),
|
||||
token::LIT_INT(i, it) => LitInt(i, it),
|
||||
token::LIT_UINT(u, ut) => LitUint(u, ut),
|
||||
@ -1528,6 +1529,8 @@ impl<'a> Parser<'a> {
|
||||
token::LIT_STR_RAW(s, n) => {
|
||||
LitStr(self.id_to_interned_str(s), ast::RawStr(n))
|
||||
}
|
||||
token::LIT_BINARY_RAW(ref v, _) |
|
||||
token::LIT_BINARY(ref v) => LitBinary(v.clone()),
|
||||
token::LPAREN => { self.expect(&token::RPAREN); LitNil },
|
||||
_ => { self.unexpected_last(tok); }
|
||||
}
|
||||
|
@ -78,6 +78,7 @@ pub enum Token {
|
||||
DOLLAR,
|
||||
|
||||
/* Literals */
|
||||
LIT_BYTE(u8),
|
||||
LIT_CHAR(char),
|
||||
LIT_INT(i64, ast::IntTy),
|
||||
LIT_UINT(u64, ast::UintTy),
|
||||
@ -86,6 +87,8 @@ pub enum Token {
|
||||
LIT_FLOAT_UNSUFFIXED(ast::Ident),
|
||||
LIT_STR(ast::Ident),
|
||||
LIT_STR_RAW(ast::Ident, uint), /* raw str delimited by n hash symbols */
|
||||
LIT_BINARY(Rc<Vec<u8>>),
|
||||
LIT_BINARY_RAW(Rc<Vec<u8>>, uint), /* raw binary str delimited by n hash symbols */
|
||||
|
||||
/* Name components */
|
||||
// an identifier contains an "is_mod_name" boolean,
|
||||
@ -193,6 +196,14 @@ pub fn to_str(t: &Token) -> String {
|
||||
DOLLAR => "$".to_string(),
|
||||
|
||||
/* Literals */
|
||||
LIT_BYTE(b) => {
|
||||
let mut res = String::from_str("b'");
|
||||
(b as char).escape_default(|c| {
|
||||
res.push_char(c);
|
||||
});
|
||||
res.push_char('\'');
|
||||
res
|
||||
}
|
||||
LIT_CHAR(c) => {
|
||||
let mut res = String::from_str("'");
|
||||
c.escape_default(|c| {
|
||||
@ -222,17 +233,26 @@ pub fn to_str(t: &Token) -> String {
|
||||
body
|
||||
}
|
||||
LIT_STR(s) => {
|
||||
(format!("\"{}\"", get_ident(s).get().escape_default())).to_string()
|
||||
format!("\"{}\"", get_ident(s).get().escape_default())
|
||||
}
|
||||
LIT_STR_RAW(s, n) => {
|
||||
(format!("r{delim}\"{string}\"{delim}",
|
||||
delim="#".repeat(n), string=get_ident(s))).to_string()
|
||||
format!("r{delim}\"{string}\"{delim}",
|
||||
delim="#".repeat(n), string=get_ident(s))
|
||||
}
|
||||
LIT_BINARY(ref v) => {
|
||||
format!(
|
||||
"b\"{}\"",
|
||||
v.iter().map(|&b| b as char).collect::<String>().escape_default())
|
||||
}
|
||||
LIT_BINARY_RAW(ref s, n) => {
|
||||
format!("br{delim}\"{string}\"{delim}",
|
||||
delim="#".repeat(n), string=s.as_slice().to_ascii().as_str_ascii())
|
||||
}
|
||||
|
||||
/* Name components */
|
||||
IDENT(s, _) => get_ident(s).get().to_string(),
|
||||
LIFETIME(s) => {
|
||||
(format!("{}", get_ident(s))).to_string()
|
||||
format!("{}", get_ident(s))
|
||||
}
|
||||
UNDERSCORE => "_".to_string(),
|
||||
|
||||
@ -273,6 +293,7 @@ pub fn can_begin_expr(t: &Token) -> bool {
|
||||
IDENT(_, _) => true,
|
||||
UNDERSCORE => true,
|
||||
TILDE => true,
|
||||
LIT_BYTE(_) => true,
|
||||
LIT_CHAR(_) => true,
|
||||
LIT_INT(_, _) => true,
|
||||
LIT_UINT(_, _) => true,
|
||||
@ -281,6 +302,8 @@ pub fn can_begin_expr(t: &Token) -> bool {
|
||||
LIT_FLOAT_UNSUFFIXED(_) => true,
|
||||
LIT_STR(_) => true,
|
||||
LIT_STR_RAW(_, _) => true,
|
||||
LIT_BINARY(_) => true,
|
||||
LIT_BINARY_RAW(_, _) => true,
|
||||
POUND => true,
|
||||
AT => true,
|
||||
NOT => true,
|
||||
@ -311,6 +334,7 @@ pub fn close_delimiter_for(t: &Token) -> Option<Token> {
|
||||
|
||||
pub fn is_lit(t: &Token) -> bool {
|
||||
match *t {
|
||||
LIT_BYTE(_) => true,
|
||||
LIT_CHAR(_) => true,
|
||||
LIT_INT(_, _) => true,
|
||||
LIT_UINT(_, _) => true,
|
||||
@ -319,6 +343,8 @@ pub fn is_lit(t: &Token) -> bool {
|
||||
LIT_FLOAT_UNSUFFIXED(_) => true,
|
||||
LIT_STR(_) => true,
|
||||
LIT_STR_RAW(_, _) => true,
|
||||
LIT_BINARY(_) => true,
|
||||
LIT_BINARY_RAW(_, _) => true,
|
||||
_ => false
|
||||
}
|
||||
}
|
||||
|
@ -2305,6 +2305,12 @@ impl<'a> State<'a> {
|
||||
}
|
||||
match lit.node {
|
||||
ast::LitStr(ref st, style) => self.print_string(st.get(), style),
|
||||
ast::LitByte(byte) => {
|
||||
let mut res = String::from_str("b'");
|
||||
(byte as char).escape_default(|c| res.push_char(c));
|
||||
res.push_char('\'');
|
||||
word(&mut self.s, res.as_slice())
|
||||
}
|
||||
ast::LitChar(ch) => {
|
||||
let mut res = String::from_str("'");
|
||||
ch.escape_default(|c| res.push_char(c));
|
||||
@ -2336,19 +2342,9 @@ impl<'a> State<'a> {
|
||||
ast::LitBool(val) => {
|
||||
if val { word(&mut self.s, "true") } else { word(&mut self.s, "false") }
|
||||
}
|
||||
ast::LitBinary(ref arr) => {
|
||||
try!(self.ibox(indent_unit));
|
||||
try!(word(&mut self.s, "["));
|
||||
try!(self.commasep_cmnt(Inconsistent,
|
||||
arr.as_slice(),
|
||||
|s, u| {
|
||||
word(&mut s.s,
|
||||
format!("{}",
|
||||
*u).as_slice())
|
||||
},
|
||||
|_| lit.span));
|
||||
try!(word(&mut self.s, "]"));
|
||||
self.end()
|
||||
ast::LitBinary(ref v) => {
|
||||
let escaped: String = v.iter().map(|&b| b as char).collect();
|
||||
word(&mut self.s, format!("b\"{}\"", escaped.escape_default()).as_slice())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
25
src/test/compile-fail/byte-literals.rs
Normal file
25
src/test/compile-fail/byte-literals.rs
Normal file
@ -0,0 +1,25 @@
|
||||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
|
||||
// ignore-tidy-tab
|
||||
|
||||
static FOO: u8 = b'\f'; //~ ERROR unknown byte escape
|
||||
|
||||
pub fn main() {
|
||||
b'\f'; //~ ERROR unknown byte escape
|
||||
b'\x0Z'; //~ ERROR illegal character in numeric character escape: Z
|
||||
b' '; //~ ERROR byte constant must be escaped
|
||||
b'''; //~ ERROR byte constant must be escaped
|
||||
b'é'; //~ ERROR byte constant must be ASCII
|
||||
b'a //~ ERROR unterminated byte constant
|
||||
}
|
||||
|
||||
|
23
src/test/compile-fail/byte-string-literals.rs
Normal file
23
src/test/compile-fail/byte-string-literals.rs
Normal file
@ -0,0 +1,23 @@
|
||||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
|
||||
// ignore-tidy-tab
|
||||
|
||||
static FOO: &'static [u8] = b"\f"; //~ ERROR unknown byte escape
|
||||
|
||||
pub fn main() {
|
||||
b"\f"; //~ ERROR unknown byte escape
|
||||
b"\x0Z"; //~ ERROR illegal character in numeric character escape: Z
|
||||
b"é"; //~ ERROR byte constant must be ASCII
|
||||
b"a //~ ERROR unterminated double quote byte string
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,8 @@
|
||||
// except according to those terms.
|
||||
|
||||
fn main() {
|
||||
concat!(b'f'); //~ ERROR: cannot concatenate a binary literal
|
||||
concat!(b"foo"); //~ ERROR: cannot concatenate a binary literal
|
||||
concat!(foo); //~ ERROR: expected a literal
|
||||
concat!(foo()); //~ ERROR: expected a literal
|
||||
}
|
||||
|
@ -9,5 +9,5 @@
|
||||
// except according to those terms.
|
||||
|
||||
static s: &'static str =
|
||||
"\●" //~ ERROR: unknown string escape
|
||||
"\●" //~ ERROR: unknown character escape
|
||||
;
|
||||
|
16
src/test/compile-fail/raw-byte-string-eof.rs
Normal file
16
src/test/compile-fail/raw-byte-string-eof.rs
Normal file
@ -0,0 +1,16 @@
|
||||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
|
||||
pub fn main() {
|
||||
br##"a"#; //~ unterminated raw string
|
||||
}
|
||||
|
||||
|
17
src/test/compile-fail/raw-byte-string-literals.rs
Normal file
17
src/test/compile-fail/raw-byte-string-literals.rs
Normal file
@ -0,0 +1,17 @@
|
||||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
|
||||
pub fn main() {
|
||||
br"é"; //~ raw byte string must be ASCII
|
||||
br##~"a"~##; //~ only `#` is allowed in raw string delimitation
|
||||
}
|
||||
|
||||
|
56
src/test/run-pass/byte-literals.rs
Normal file
56
src/test/run-pass/byte-literals.rs
Normal file
@ -0,0 +1,56 @@
|
||||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
|
||||
static FOO: u8 = b'\xF0';
|
||||
static BAR: &'static [u8] = b"a\xF0\t";
|
||||
static BAZ: &'static [u8] = br"a\n";
|
||||
|
||||
pub fn main() {
|
||||
assert_eq!(b'a', 97u8);
|
||||
assert_eq!(b'\n', 10u8);
|
||||
assert_eq!(b'\r', 13u8);
|
||||
assert_eq!(b'\t', 9u8);
|
||||
assert_eq!(b'\\', 92u8);
|
||||
assert_eq!(b'\'', 39u8);
|
||||
assert_eq!(b'\"', 34u8);
|
||||
assert_eq!(b'\0', 0u8);
|
||||
assert_eq!(b'\xF0', 240u8);
|
||||
assert_eq!(FOO, 240u8);
|
||||
|
||||
assert_eq!([42, ..b'\t'].as_slice(), &[42, 42, 42, 42, 42, 42, 42, 42, 42]);
|
||||
|
||||
match 42 {
|
||||
b'*' => {},
|
||||
_ => fail!()
|
||||
}
|
||||
|
||||
match 100 {
|
||||
b'a' .. b'z' => {},
|
||||
_ => fail!()
|
||||
}
|
||||
|
||||
assert_eq!(b"a\n\r\t\\\'\"\0\xF0",
|
||||
&[97u8, 10u8, 13u8, 9u8, 92u8, 39u8, 34u8, 0u8, 240u8]);
|
||||
assert_eq!(b"a\
|
||||
b", &[97u8, 98u8]);
|
||||
assert_eq!(BAR, &[97u8, 240u8, 9u8]);
|
||||
|
||||
match &[97u8, 10u8] {
|
||||
b"a\n" => {},
|
||||
_ => fail!(),
|
||||
}
|
||||
|
||||
assert_eq!(BAZ, &[97u8, 92u8, 110u8]);
|
||||
assert_eq!(br"a\n", &[97u8, 92u8, 110u8]);
|
||||
assert_eq!(br"a\n", b"a\\n");
|
||||
assert_eq!(br###"a"##b"###, &[97u8, 34u8, 35u8, 35u8, 98u8]);
|
||||
assert_eq!(br###"a"##b"###, b"a\"##b");
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user