introduce unescape module
Currently, we deal with escape sequences twice: once when we lex a string, and a second time when we unescape literals. This PR aims to remove this duplication, by introducing a new `unescape` mode as a single source of truth for character escaping rules
This commit is contained in:
parent
9b67bd42b7
commit
bfa5f27847
@ -184,7 +184,7 @@ impl<'a> DiagnosticBuilder<'a> {
|
||||
) -> &mut Self);
|
||||
forward!(pub fn warn(&mut self, msg: &str) -> &mut Self);
|
||||
forward!(pub fn span_warn<S: Into<MultiSpan>>(&mut self, sp: S, msg: &str) -> &mut Self);
|
||||
forward!(pub fn help(&mut self , msg: &str) -> &mut Self);
|
||||
forward!(pub fn help(&mut self, msg: &str) -> &mut Self);
|
||||
forward!(pub fn span_help<S: Into<MultiSpan>>(&mut self,
|
||||
sp: S,
|
||||
msg: &str,
|
||||
|
@ -1,8 +1,10 @@
|
||||
use crate::ast::{self, Ident};
|
||||
use crate::parse::{token, ParseSess};
|
||||
use crate::symbol::Symbol;
|
||||
use crate::parse::unescape;
|
||||
use crate::parse::unescape_error_reporting::{emit_unescape_error, push_escaped_char};
|
||||
|
||||
use errors::{Applicability, FatalError, Diagnostic, DiagnosticBuilder};
|
||||
use errors::{FatalError, Diagnostic, DiagnosticBuilder};
|
||||
use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION};
|
||||
use core::unicode::property::Pattern_White_Space;
|
||||
|
||||
@ -334,25 +336,12 @@ impl<'a> StringReader<'a> {
|
||||
self.err_span(self.mk_sp(from_pos, to_pos), m)
|
||||
}
|
||||
|
||||
/// Pushes a character to a message string for error reporting
|
||||
fn push_escaped_char_for_msg(m: &mut String, c: char) {
|
||||
match c {
|
||||
'\u{20}'..='\u{7e}' => {
|
||||
// Don't escape \, ' or " for user-facing messages
|
||||
m.push(c);
|
||||
}
|
||||
_ => {
|
||||
m.extend(c.escape_default());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
|
||||
/// escaped character to the error message
|
||||
fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> FatalError {
|
||||
let mut m = m.to_string();
|
||||
m.push_str(": ");
|
||||
Self::push_escaped_char_for_msg(&mut m, c);
|
||||
push_escaped_char(&mut m, c);
|
||||
|
||||
self.fatal_span_(from_pos, to_pos, &m[..])
|
||||
}
|
||||
@ -368,7 +357,7 @@ impl<'a> StringReader<'a> {
|
||||
{
|
||||
let mut m = m.to_string();
|
||||
m.push_str(": ");
|
||||
Self::push_escaped_char_for_msg(&mut m, c);
|
||||
push_escaped_char(&mut m, c);
|
||||
|
||||
self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..])
|
||||
}
|
||||
@ -378,29 +367,10 @@ impl<'a> StringReader<'a> {
|
||||
fn err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) {
|
||||
let mut m = m.to_string();
|
||||
m.push_str(": ");
|
||||
Self::push_escaped_char_for_msg(&mut m, c);
|
||||
push_escaped_char(&mut m, c);
|
||||
self.err_span_(from_pos, to_pos, &m[..]);
|
||||
}
|
||||
|
||||
fn struct_err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char)
|
||||
-> DiagnosticBuilder<'a>
|
||||
{
|
||||
let mut m = m.to_string();
|
||||
m.push_str(": ");
|
||||
Self::push_escaped_char_for_msg(&mut m, c);
|
||||
|
||||
self.sess.span_diagnostic.struct_span_err(self.mk_sp(from_pos, to_pos), &m[..])
|
||||
}
|
||||
|
||||
/// Report a lexical error spanning [`from_pos`, `to_pos`), appending the
|
||||
/// offending string to the error message
|
||||
fn fatal_span_verbose(&self, from_pos: BytePos, to_pos: BytePos, mut m: String) -> FatalError {
|
||||
m.push_str(": ");
|
||||
m.push_str(&self.src[self.src_index(from_pos)..self.src_index(to_pos)]);
|
||||
|
||||
self.fatal_span_(from_pos, to_pos, &m[..])
|
||||
}
|
||||
|
||||
/// Advance peek_tok and peek_span to refer to the next token, and
|
||||
/// possibly update the interner.
|
||||
fn advance_token(&mut self) -> Result<(), ()> {
|
||||
@ -863,271 +833,6 @@ impl<'a> StringReader<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Scan over `n_digits` hex digits, stopping at `delim`, reporting an
|
||||
/// error if too many or too few digits are encountered.
|
||||
fn scan_hex_digits(&mut self, n_digits: usize, delim: char, below_0x7f_only: bool) -> bool {
|
||||
debug!("scanning {} digits until {:?}", n_digits, delim);
|
||||
let start_bpos = self.pos;
|
||||
let mut accum_int = 0;
|
||||
|
||||
let mut valid = true;
|
||||
for _ in 0..n_digits {
|
||||
if self.is_eof() {
|
||||
let last_bpos = self.pos;
|
||||
self.fatal_span_(start_bpos,
|
||||
last_bpos,
|
||||
"unterminated numeric character escape").raise();
|
||||
}
|
||||
if self.ch_is(delim) {
|
||||
let last_bpos = self.pos;
|
||||
self.err_span_(start_bpos,
|
||||
last_bpos,
|
||||
"numeric character escape is too short");
|
||||
valid = false;
|
||||
break;
|
||||
}
|
||||
let c = self.ch.unwrap_or('\x00');
|
||||
accum_int *= 16;
|
||||
accum_int += c.to_digit(16).unwrap_or_else(|| {
|
||||
self.err_span_char(self.pos,
|
||||
self.next_pos,
|
||||
"invalid character in numeric character escape",
|
||||
c);
|
||||
|
||||
valid = false;
|
||||
0
|
||||
});
|
||||
self.bump();
|
||||
}
|
||||
|
||||
if below_0x7f_only && accum_int >= 0x80 {
|
||||
self.err_span_(start_bpos,
|
||||
self.pos,
|
||||
"this form of character escape may only be used with characters in \
|
||||
the range [\\x00-\\x7f]");
|
||||
valid = false;
|
||||
}
|
||||
|
||||
match char::from_u32(accum_int) {
|
||||
Some(_) => valid,
|
||||
None => {
|
||||
let last_bpos = self.pos;
|
||||
self.err_span_(start_bpos, last_bpos, "invalid numeric character escape");
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Scan for a single (possibly escaped) byte or char
|
||||
/// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
|
||||
/// `start` is the position of `first_source_char`, which is already consumed.
|
||||
///
|
||||
/// Returns `true` if there was a valid char/byte.
|
||||
fn scan_char_or_byte(&mut self,
|
||||
start: BytePos,
|
||||
first_source_char: char,
|
||||
ascii_only: bool,
|
||||
delim: char)
|
||||
-> bool
|
||||
{
|
||||
match first_source_char {
|
||||
'\\' => {
|
||||
// '\X' for some X must be a character constant:
|
||||
let escaped = self.ch;
|
||||
let escaped_pos = self.pos;
|
||||
self.bump();
|
||||
match escaped {
|
||||
None => {} // EOF here is an error that will be checked later.
|
||||
Some(e) => {
|
||||
return match e {
|
||||
'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true,
|
||||
'x' => self.scan_byte_escape(delim, !ascii_only),
|
||||
'u' => {
|
||||
let valid = if self.ch_is('{') {
|
||||
self.scan_unicode_escape(delim) && !ascii_only
|
||||
} else {
|
||||
let span = self.mk_sp(start, self.pos);
|
||||
let mut suggestion = "\\u{".to_owned();
|
||||
let msg = "incorrect unicode escape sequence";
|
||||
let mut err = self.sess.span_diagnostic.struct_span_err(
|
||||
span,
|
||||
msg,
|
||||
);
|
||||
let mut i = 0;
|
||||
while let (Some(ch), true) = (self.ch, i < 6) {
|
||||
if ch.is_digit(16) {
|
||||
suggestion.push(ch);
|
||||
self.bump();
|
||||
i += 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if i != 0 {
|
||||
suggestion.push('}');
|
||||
err.span_suggestion(
|
||||
self.mk_sp(start, self.pos),
|
||||
"format of unicode escape sequences uses braces",
|
||||
suggestion,
|
||||
Applicability::MaybeIncorrect,
|
||||
);
|
||||
} else {
|
||||
err.span_label(span, msg);
|
||||
err.help(
|
||||
"format of unicode escape sequences is `\\u{...}`",
|
||||
);
|
||||
}
|
||||
err.emit();
|
||||
false
|
||||
};
|
||||
if ascii_only {
|
||||
self.err_span_(start,
|
||||
self.pos,
|
||||
"unicode escape sequences cannot be used as a \
|
||||
byte or in a byte string");
|
||||
}
|
||||
valid
|
||||
|
||||
}
|
||||
'\n' if delim == '"' => {
|
||||
self.consume_whitespace();
|
||||
true
|
||||
}
|
||||
'\r' if delim == '"' && self.ch_is('\n') => {
|
||||
self.consume_whitespace();
|
||||
true
|
||||
}
|
||||
c => {
|
||||
let pos = self.pos;
|
||||
let msg = if ascii_only {
|
||||
"unknown byte escape"
|
||||
} else {
|
||||
"unknown character escape"
|
||||
};
|
||||
let mut err = self.struct_err_span_char(escaped_pos, pos, msg, c);
|
||||
err.span_label(self.mk_sp(escaped_pos, pos), msg);
|
||||
if e == '\r' {
|
||||
err.help(
|
||||
"this is an isolated carriage return; consider checking \
|
||||
your editor and version control settings",
|
||||
);
|
||||
}
|
||||
if (e == '{' || e == '}') && !ascii_only {
|
||||
err.help(
|
||||
"if used in a formatting string, curly braces are escaped \
|
||||
with `{{` and `}}`",
|
||||
);
|
||||
}
|
||||
err.emit();
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
'\t' | '\n' | '\r' | '\'' if delim == '\'' => {
|
||||
let pos = self.pos;
|
||||
self.err_span_char(start,
|
||||
pos,
|
||||
if ascii_only {
|
||||
"byte constant must be escaped"
|
||||
} else {
|
||||
"character constant must be escaped"
|
||||
},
|
||||
first_source_char);
|
||||
return false;
|
||||
}
|
||||
'\r' => {
|
||||
if self.ch_is('\n') {
|
||||
self.bump();
|
||||
return true;
|
||||
} else {
|
||||
self.err_span_(start,
|
||||
self.pos,
|
||||
"bare CR not allowed in string, use \\r instead");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
if ascii_only && first_source_char > '\x7F' {
|
||||
let pos = self.pos;
|
||||
self.err_span_(start,
|
||||
pos,
|
||||
"byte constant must be ASCII. Use a \\xHH escape for a \
|
||||
non-ASCII byte");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Scan over a `\u{...}` escape
|
||||
///
|
||||
/// At this point, we have already seen the `\` and the `u`, the `{` is the current character.
|
||||
/// We will read a hex number (with `_` separators), with 1 to 6 actual digits,
|
||||
/// and pass over the `}`.
|
||||
fn scan_unicode_escape(&mut self, delim: char) -> bool {
|
||||
self.bump(); // past the {
|
||||
let start_bpos = self.pos;
|
||||
let mut valid = true;
|
||||
|
||||
if let Some('_') = self.ch {
|
||||
// disallow leading `_`
|
||||
self.err_span_(self.pos,
|
||||
self.next_pos,
|
||||
"invalid start of unicode escape");
|
||||
valid = false;
|
||||
}
|
||||
|
||||
let count = self.scan_digits(16, 16);
|
||||
|
||||
if count > 6 {
|
||||
self.err_span_(start_bpos,
|
||||
self.pos,
|
||||
"overlong unicode escape (must have at most 6 hex digits)");
|
||||
valid = false;
|
||||
}
|
||||
|
||||
loop {
|
||||
match self.ch {
|
||||
Some('}') => {
|
||||
if valid && count == 0 {
|
||||
self.err_span_(start_bpos,
|
||||
self.pos,
|
||||
"empty unicode escape (must have at least 1 hex digit)");
|
||||
valid = false;
|
||||
}
|
||||
self.bump(); // past the ending `}`
|
||||
break;
|
||||
},
|
||||
Some(c) => {
|
||||
if c == delim {
|
||||
self.err_span_(self.pos,
|
||||
self.pos,
|
||||
"unterminated unicode escape (needed a `}`)");
|
||||
valid = false;
|
||||
break;
|
||||
} else if valid {
|
||||
self.err_span_char(start_bpos,
|
||||
self.pos,
|
||||
"invalid character in unicode escape",
|
||||
c);
|
||||
valid = false;
|
||||
}
|
||||
},
|
||||
None => {
|
||||
self.fatal_span_(start_bpos,
|
||||
self.pos,
|
||||
"unterminated unicode escape (found EOF)").raise();
|
||||
}
|
||||
}
|
||||
self.bump();
|
||||
}
|
||||
|
||||
valid
|
||||
}
|
||||
|
||||
/// Scan over a float exponent.
|
||||
fn scan_float_exponent(&mut self) {
|
||||
if self.ch_is('e') || self.ch_is('E') {
|
||||
@ -1393,26 +1098,21 @@ impl<'a> StringReader<'a> {
|
||||
self.bump();
|
||||
let start = self.pos;
|
||||
|
||||
// the eof will be picked up by the final `'` check below
|
||||
let c2 = self.ch.unwrap_or('\x00');
|
||||
self.bump();
|
||||
|
||||
// If the character is an ident start not followed by another single
|
||||
// quote, then this is a lifetime name:
|
||||
if (ident_start(Some(c2)) || c2.is_numeric()) && !self.ch_is('\'') {
|
||||
let starts_with_number = self.ch.unwrap_or('\x00').is_numeric();
|
||||
if (ident_start(self.ch) || starts_with_number) && !self.nextch_is('\'') {
|
||||
self.bump();
|
||||
while ident_continue(self.ch) {
|
||||
self.bump();
|
||||
}
|
||||
// lifetimes shouldn't end with a single quote
|
||||
// if we find one, then this is an invalid character literal
|
||||
if self.ch_is('\'') {
|
||||
self.err_span_(
|
||||
start_with_quote,
|
||||
self.next_pos,
|
||||
"character literal may only contain one codepoint");
|
||||
let id = self.name_from(start);
|
||||
self.bump();
|
||||
return Ok(token::Literal(token::Err(Symbol::intern("??")), None))
|
||||
|
||||
self.validate_char_escape(start_with_quote);
|
||||
return Ok(token::Literal(token::Char(id), None))
|
||||
}
|
||||
|
||||
// Include the leading `'` in the real identifier, for macro
|
||||
@ -1422,7 +1122,7 @@ impl<'a> StringReader<'a> {
|
||||
self.mk_ident(lifetime_name)
|
||||
});
|
||||
|
||||
if c2.is_numeric() {
|
||||
if starts_with_number {
|
||||
// this is a recovered lifetime written `'1`, error but accept it
|
||||
self.err_span_(
|
||||
start_with_quote,
|
||||
@ -1433,58 +1133,30 @@ impl<'a> StringReader<'a> {
|
||||
|
||||
return Ok(token::Lifetime(ident));
|
||||
}
|
||||
|
||||
let valid = self.scan_char_or_byte(start, c2, /* ascii_only */ false, '\'');
|
||||
|
||||
if !self.ch_is('\'') {
|
||||
let pos = self.pos;
|
||||
|
||||
loop {
|
||||
self.bump();
|
||||
if self.ch_is('\'') {
|
||||
let start = self.src_index(start);
|
||||
let end = self.src_index(self.pos);
|
||||
self.bump();
|
||||
let span = self.mk_sp(start_with_quote, self.pos);
|
||||
self.sess.span_diagnostic
|
||||
.struct_span_err(span,
|
||||
"character literal may only contain one codepoint")
|
||||
.span_suggestion(
|
||||
span,
|
||||
"if you meant to write a `str` literal, use double quotes",
|
||||
format!("\"{}\"", &self.src[start..end]),
|
||||
Applicability::MachineApplicable
|
||||
).emit();
|
||||
return Ok(token::Literal(token::Err(Symbol::intern("??")), None))
|
||||
}
|
||||
if self.ch_is('\n') || self.is_eof() || self.ch_is('/') {
|
||||
// Only attempt to infer single line string literals. If we encounter
|
||||
// a slash, bail out in order to avoid nonsensical suggestion when
|
||||
// involving comments.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
self.fatal_span_verbose(start_with_quote, pos,
|
||||
String::from("character literal may only contain one codepoint")).raise();
|
||||
}
|
||||
|
||||
let id = if valid {
|
||||
self.name_from(start)
|
||||
} else {
|
||||
Symbol::intern("0")
|
||||
};
|
||||
|
||||
self.bump(); // advance ch past token
|
||||
let msg = "unterminated character literal";
|
||||
let id = self.scan_single_quoted_string(start_with_quote, msg);
|
||||
self.validate_char_escape(start_with_quote);
|
||||
let suffix = self.scan_optional_raw_name();
|
||||
|
||||
Ok(token::Literal(token::Char(id), suffix))
|
||||
}
|
||||
'b' => {
|
||||
self.bump();
|
||||
let lit = match self.ch {
|
||||
Some('\'') => self.scan_byte(),
|
||||
Some('"') => self.scan_byte_string(),
|
||||
Some('\'') => {
|
||||
let start_with_quote = self.pos;
|
||||
self.bump();
|
||||
let msg = "unterminated byte constant";
|
||||
let id = self.scan_single_quoted_string(start_with_quote, msg);
|
||||
self.validate_byte_escape(start_with_quote);
|
||||
token::Byte(id)
|
||||
},
|
||||
Some('"') => {
|
||||
let start_with_quote = self.pos;
|
||||
let msg = "unterminated double quote byte string";
|
||||
let id = self.scan_double_quoted_string(msg);
|
||||
self.validate_byte_str_escape(start_with_quote);
|
||||
token::ByteStr(id)
|
||||
},
|
||||
Some('r') => self.scan_raw_byte_string(),
|
||||
_ => unreachable!(), // Should have been a token::Ident above.
|
||||
};
|
||||
@ -1493,32 +1165,11 @@ impl<'a> StringReader<'a> {
|
||||
Ok(token::Literal(lit, suffix))
|
||||
}
|
||||
'"' => {
|
||||
let start_bpos = self.pos;
|
||||
let mut valid = true;
|
||||
self.bump();
|
||||
|
||||
while !self.ch_is('"') {
|
||||
if self.is_eof() {
|
||||
let last_bpos = self.pos;
|
||||
self.fatal_span_(start_bpos,
|
||||
last_bpos,
|
||||
"unterminated double quote string").raise();
|
||||
}
|
||||
|
||||
let ch_start = self.pos;
|
||||
let ch = self.ch.unwrap();
|
||||
self.bump();
|
||||
valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only */ false, '"');
|
||||
}
|
||||
// adjust for the ASCII " at the start of the literal
|
||||
let id = if valid {
|
||||
self.name_from(start_bpos + BytePos(1))
|
||||
} else {
|
||||
Symbol::intern("??")
|
||||
};
|
||||
self.bump();
|
||||
let start_with_quote = self.pos;
|
||||
let msg = "unterminated double quote string";
|
||||
let id = self.scan_double_quoted_string(msg);
|
||||
self.validate_str_escape(start_with_quote);
|
||||
let suffix = self.scan_optional_raw_name();
|
||||
|
||||
Ok(token::Literal(token::Str_(id), suffix))
|
||||
}
|
||||
'r' => {
|
||||
@ -1659,12 +1310,6 @@ impl<'a> StringReader<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn consume_whitespace(&mut self) {
|
||||
while is_pattern_whitespace(self.ch) && !self.is_eof() {
|
||||
self.bump();
|
||||
}
|
||||
}
|
||||
|
||||
fn read_to_eol(&mut self) -> String {
|
||||
let mut val = String::new();
|
||||
while !self.ch_is('\n') && !self.is_eof() {
|
||||
@ -1698,73 +1343,63 @@ impl<'a> StringReader<'a> {
|
||||
(self.ch_is('#') && self.nextch_is('!') && !self.nextnextch_is('['))
|
||||
}
|
||||
|
||||
fn scan_byte(&mut self) -> token::Lit {
|
||||
self.bump();
|
||||
fn scan_single_quoted_string(&mut self,
|
||||
start_with_quote: BytePos,
|
||||
unterminated_msg: &str) -> ast::Name {
|
||||
// assumes that first `'` is consumed
|
||||
let start = self.pos;
|
||||
// lex `'''` as a single char, for recovery
|
||||
if self.ch_is('\'') && self.nextch_is('\'') {
|
||||
self.bump();
|
||||
} else {
|
||||
let mut first = true;
|
||||
loop {
|
||||
if self.ch_is('\'') {
|
||||
break;
|
||||
}
|
||||
if self.ch_is('\\') && (self.nextch_is('\'') || self.nextch_is('\\')) {
|
||||
self.bump();
|
||||
self.bump();
|
||||
} else {
|
||||
// Only attempt to infer single line string literals. If we encounter
|
||||
// a slash, bail out in order to avoid nonsensical suggestion when
|
||||
// involving comments.
|
||||
if self.is_eof()
|
||||
|| (self.ch_is('/') && !first)
|
||||
|| (self.ch_is('\n') && !self.nextch_is('\'')) {
|
||||
|
||||
// the eof will be picked up by the final `'` check below
|
||||
let c2 = self.ch.unwrap_or('\x00');
|
||||
self.bump();
|
||||
|
||||
let valid = self.scan_char_or_byte(start,
|
||||
c2,
|
||||
// ascii_only =
|
||||
true,
|
||||
'\'');
|
||||
if !self.ch_is('\'') {
|
||||
// Byte offsetting here is okay because the
|
||||
// character before position `start` are an
|
||||
// ascii single quote and ascii 'b'.
|
||||
let pos = self.pos;
|
||||
self.fatal_span_verbose(start - BytePos(2),
|
||||
pos,
|
||||
"unterminated byte constant".to_string()).raise();
|
||||
self.fatal_span_(start_with_quote, self.pos, unterminated_msg.into())
|
||||
.raise()
|
||||
}
|
||||
self.bump();
|
||||
}
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
|
||||
let id = if valid {
|
||||
self.name_from(start)
|
||||
} else {
|
||||
Symbol::intern("?")
|
||||
};
|
||||
self.bump(); // advance ch past token
|
||||
|
||||
token::Byte(id)
|
||||
let id = self.name_from(start);
|
||||
self.bump();
|
||||
id
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn scan_byte_escape(&mut self, delim: char, below_0x7f_only: bool) -> bool {
|
||||
self.scan_hex_digits(2, delim, below_0x7f_only)
|
||||
}
|
||||
|
||||
fn scan_byte_string(&mut self) -> token::Lit {
|
||||
fn scan_double_quoted_string(&mut self, unterminated_msg: &str) -> ast::Name {
|
||||
debug_assert!(self.ch_is('\"'));
|
||||
let start_with_quote = self.pos;
|
||||
self.bump();
|
||||
let start = self.pos;
|
||||
let mut valid = true;
|
||||
|
||||
while !self.ch_is('"') {
|
||||
if self.is_eof() {
|
||||
let pos = self.pos;
|
||||
self.fatal_span_(start, pos, "unterminated double quote byte string").raise();
|
||||
self.fatal_span_(start_with_quote, pos, unterminated_msg).raise();
|
||||
}
|
||||
if self.ch_is('\\') && (self.nextch_is('\\') || self.nextch_is('"')) {
|
||||
self.bump();
|
||||
}
|
||||
|
||||
let ch_start = self.pos;
|
||||
let ch = self.ch.unwrap();
|
||||
self.bump();
|
||||
valid &= self.scan_char_or_byte(ch_start,
|
||||
ch,
|
||||
// ascii_only =
|
||||
true,
|
||||
'"');
|
||||
}
|
||||
|
||||
let id = if valid {
|
||||
self.name_from(start)
|
||||
} else {
|
||||
Symbol::intern("??")
|
||||
};
|
||||
let id = self.name_from(start);
|
||||
self.bump();
|
||||
|
||||
token::ByteStr(id)
|
||||
id
|
||||
}
|
||||
|
||||
fn scan_raw_byte_string(&mut self) -> token::Lit {
|
||||
@ -1826,6 +1461,70 @@ impl<'a> StringReader<'a> {
|
||||
|
||||
token::ByteStrRaw(self.name_from_to(content_start_bpos, content_end_bpos), hash_count)
|
||||
}
|
||||
|
||||
fn validate_char_escape(&self, start_with_quote: BytePos) {
|
||||
self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
|
||||
if let Err((off, err)) = unescape::unescape_char(lit) {
|
||||
emit_unescape_error(
|
||||
&self.sess.span_diagnostic,
|
||||
lit,
|
||||
self.mk_sp(start_with_quote, self.pos),
|
||||
unescape::Mode::Char,
|
||||
0..off,
|
||||
err,
|
||||
)
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
fn validate_byte_escape(&self, start_with_quote: BytePos) {
|
||||
self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
|
||||
if let Err((off, err)) = unescape::unescape_byte(lit) {
|
||||
emit_unescape_error(
|
||||
&self.sess.span_diagnostic,
|
||||
lit,
|
||||
self.mk_sp(start_with_quote, self.pos),
|
||||
unescape::Mode::Byte,
|
||||
0..off,
|
||||
err,
|
||||
)
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
fn validate_str_escape(&self, start_with_quote: BytePos) {
|
||||
self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
|
||||
unescape::unescape_str(lit, &mut |range, c| {
|
||||
if let Err(err) = c {
|
||||
emit_unescape_error(
|
||||
&self.sess.span_diagnostic,
|
||||
lit,
|
||||
self.mk_sp(start_with_quote, self.pos),
|
||||
unescape::Mode::Str,
|
||||
range,
|
||||
err,
|
||||
)
|
||||
}
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn validate_byte_str_escape(&self, start_with_quote: BytePos) {
|
||||
self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
|
||||
unescape::unescape_byte_str(lit, &mut |range, c| {
|
||||
if let Err(err) = c {
|
||||
emit_unescape_error(
|
||||
&self.sess.span_diagnostic,
|
||||
lit,
|
||||
self.mk_sp(start_with_quote, self.pos),
|
||||
unescape::Mode::ByteStr,
|
||||
range,
|
||||
err,
|
||||
)
|
||||
}
|
||||
})
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// This tests the character for the unicode property 'PATTERN_WHITE_SPACE' which
|
||||
|
@ -18,7 +18,6 @@ use log::debug;
|
||||
|
||||
use rustc_data_structures::fx::FxHashSet;
|
||||
use std::borrow::Cow;
|
||||
use std::iter;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str;
|
||||
|
||||
@ -33,6 +32,11 @@ pub mod attr;
|
||||
|
||||
pub mod classify;
|
||||
|
||||
pub(crate) mod unescape;
|
||||
use unescape::{unescape_str, unescape_char, unescape_byte_str, unescape_byte, EscapeError};
|
||||
|
||||
pub(crate) mod unescape_error_reporting;
|
||||
|
||||
/// Info about a parsing session.
|
||||
pub struct ParseSess {
|
||||
pub span_diagnostic: Handler,
|
||||
@ -306,133 +310,6 @@ pub fn stream_to_parser(sess: &ParseSess, stream: TokenStream) -> Parser<'_> {
|
||||
Parser::new(sess, stream, None, true, false)
|
||||
}
|
||||
|
||||
/// Parses a string representing a character literal into its final form.
|
||||
/// Rather than just accepting/rejecting a given literal, unescapes it as
|
||||
/// well. Can take any slice prefixed by a character escape. Returns the
|
||||
/// character and the number of characters consumed.
|
||||
fn char_lit(lit: &str, diag: Option<(Span, &Handler)>) -> (char, isize) {
|
||||
use std::char;
|
||||
|
||||
// Handle non-escaped chars first.
|
||||
if lit.as_bytes()[0] != b'\\' {
|
||||
// If the first byte isn't '\\' it might part of a multi-byte char, so
|
||||
// get the char with chars().
|
||||
let c = lit.chars().next().unwrap();
|
||||
return (c, 1);
|
||||
}
|
||||
|
||||
// Handle escaped chars.
|
||||
match lit.as_bytes()[1] as char {
|
||||
'"' => ('"', 2),
|
||||
'n' => ('\n', 2),
|
||||
'r' => ('\r', 2),
|
||||
't' => ('\t', 2),
|
||||
'\\' => ('\\', 2),
|
||||
'\'' => ('\'', 2),
|
||||
'0' => ('\0', 2),
|
||||
'x' => {
|
||||
let v = u32::from_str_radix(&lit[2..4], 16).unwrap();
|
||||
let c = char::from_u32(v).unwrap();
|
||||
(c, 4)
|
||||
}
|
||||
'u' => {
|
||||
assert_eq!(lit.as_bytes()[2], b'{');
|
||||
let idx = lit.find('}').unwrap();
|
||||
|
||||
// All digits and '_' are ascii, so treat each byte as a char.
|
||||
let mut v: u32 = 0;
|
||||
for c in lit[3..idx].bytes() {
|
||||
let c = char::from(c);
|
||||
if c != '_' {
|
||||
let x = c.to_digit(16).unwrap();
|
||||
v = v.checked_mul(16).unwrap().checked_add(x).unwrap();
|
||||
}
|
||||
}
|
||||
let c = char::from_u32(v).unwrap_or_else(|| {
|
||||
if let Some((span, diag)) = diag {
|
||||
let mut diag = diag.struct_span_err(span, "invalid unicode character escape");
|
||||
if v > 0x10FFFF {
|
||||
diag.help("unicode escape must be at most 10FFFF").emit();
|
||||
} else {
|
||||
diag.help("unicode escape must not be a surrogate").emit();
|
||||
}
|
||||
}
|
||||
'\u{FFFD}'
|
||||
});
|
||||
(c, (idx + 1) as isize)
|
||||
}
|
||||
_ => panic!("lexer should have rejected a bad character escape {}", lit)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses a string representing a string literal into its final form. Does unescaping.
|
||||
fn str_lit(lit: &str, diag: Option<(Span, &Handler)>) -> String {
|
||||
debug!("str_lit: given {}", lit.escape_default());
|
||||
let mut res = String::with_capacity(lit.len());
|
||||
|
||||
let error = |i| format!("lexer should have rejected {} at {}", lit, i);
|
||||
|
||||
/// Eat everything up to a non-whitespace.
|
||||
fn eat<'a>(it: &mut iter::Peekable<str::CharIndices<'a>>) {
|
||||
loop {
|
||||
match it.peek().map(|x| x.1) {
|
||||
Some(' ') | Some('\n') | Some('\r') | Some('\t') => {
|
||||
it.next();
|
||||
},
|
||||
_ => { break; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut chars = lit.char_indices().peekable();
|
||||
while let Some((i, c)) = chars.next() {
|
||||
match c {
|
||||
'\\' => {
|
||||
let ch = chars.peek().unwrap_or_else(|| {
|
||||
panic!("{}", error(i))
|
||||
}).1;
|
||||
|
||||
if ch == '\n' {
|
||||
eat(&mut chars);
|
||||
} else if ch == '\r' {
|
||||
chars.next();
|
||||
let ch = chars.peek().unwrap_or_else(|| {
|
||||
panic!("{}", error(i))
|
||||
}).1;
|
||||
|
||||
if ch != '\n' {
|
||||
panic!("lexer accepted bare CR");
|
||||
}
|
||||
eat(&mut chars);
|
||||
} else {
|
||||
// otherwise, a normal escape
|
||||
let (c, n) = char_lit(&lit[i..], diag);
|
||||
for _ in 0..n - 1 { // we don't need to move past the first \
|
||||
chars.next();
|
||||
}
|
||||
res.push(c);
|
||||
}
|
||||
},
|
||||
'\r' => {
|
||||
let ch = chars.peek().unwrap_or_else(|| {
|
||||
panic!("{}", error(i))
|
||||
}).1;
|
||||
|
||||
if ch != '\n' {
|
||||
panic!("lexer accepted bare CR");
|
||||
}
|
||||
chars.next();
|
||||
res.push('\n');
|
||||
}
|
||||
c => res.push(c),
|
||||
}
|
||||
}
|
||||
|
||||
res.shrink_to_fit(); // probably not going to do anything, unless there was an escape.
|
||||
debug!("parse_str_lit: returning {}", res);
|
||||
res
|
||||
}
|
||||
|
||||
/// Parses a string representing a raw string literal into its final form. The
|
||||
/// only operation this does is convert embedded CRLF into a single LF.
|
||||
fn raw_str_lit(lit: &str) -> String {
|
||||
@ -475,9 +352,23 @@ crate fn lit_token(lit: token::Lit, suf: Option<Symbol>, diag: Option<(Span, &Ha
|
||||
use ast::LitKind;
|
||||
|
||||
match lit {
|
||||
token::Byte(i) => (true, Some(LitKind::Byte(byte_lit(&i.as_str()).0))),
|
||||
token::Char(i) => (true, Some(LitKind::Char(char_lit(&i.as_str(), diag).0))),
|
||||
token::Err(i) => (true, Some(LitKind::Err(i))),
|
||||
token::Byte(i) => {
|
||||
let lit_kind = match unescape_byte(&i.as_str()) {
|
||||
Ok(c) => LitKind::Byte(c),
|
||||
Err((_, EscapeError::MoreThanOneChar)) => LitKind::Err(i),
|
||||
Err(_) => LitKind::Byte(0),
|
||||
};
|
||||
(true, Some(lit_kind))
|
||||
},
|
||||
token::Char(i) => {
|
||||
let lit_kind = match unescape_char(&i.as_str()) {
|
||||
Ok(c) => LitKind::Char(c),
|
||||
Err((_, EscapeError::MoreThanOneChar)) => LitKind::Err(i),
|
||||
Err(_) => LitKind::Char('\u{FFFD}'),
|
||||
};
|
||||
(true, Some(lit_kind))
|
||||
},
|
||||
token::Err(i) => (true, Some(LitKind::Err(i))),
|
||||
|
||||
// There are some valid suffixes for integer and float literals,
|
||||
// so all the handling is done internally.
|
||||
@ -491,7 +382,14 @@ crate fn lit_token(lit: token::Lit, suf: Option<Symbol>, diag: Option<(Span, &Ha
|
||||
// string in the Token.
|
||||
let s = &sym.as_str();
|
||||
if s.as_bytes().iter().any(|&c| c == b'\\' || c == b'\r') {
|
||||
sym = Symbol::intern(&str_lit(s, diag));
|
||||
let mut buf = String::with_capacity(s.len());
|
||||
unescape_str(s, &mut |_, unescaped_char| {
|
||||
match unescaped_char {
|
||||
Ok(c) => buf.push(c),
|
||||
Err(_) => buf.push('\u{FFFD}'),
|
||||
}
|
||||
});
|
||||
sym = Symbol::intern(&buf)
|
||||
}
|
||||
(true, Some(LitKind::Str(sym, ast::StrStyle::Cooked)))
|
||||
}
|
||||
@ -504,7 +402,16 @@ crate fn lit_token(lit: token::Lit, suf: Option<Symbol>, diag: Option<(Span, &Ha
|
||||
(true, Some(LitKind::Str(sym, ast::StrStyle::Raw(n))))
|
||||
}
|
||||
token::ByteStr(i) => {
|
||||
(true, Some(LitKind::ByteStr(byte_str_lit(&i.as_str()))))
|
||||
let s = &i.as_str();
|
||||
let mut buf = Vec::with_capacity(s.len());
|
||||
unescape_byte_str(s, &mut |_, unescaped_byte| {
|
||||
match unescaped_byte {
|
||||
Ok(c) => buf.push(c),
|
||||
Err(_) => buf.push(0),
|
||||
}
|
||||
});
|
||||
buf.shrink_to_fit();
|
||||
(true, Some(LitKind::ByteStr(Lrc::new(buf))))
|
||||
}
|
||||
token::ByteStrRaw(i, _) => {
|
||||
(true, Some(LitKind::ByteStr(Lrc::new(i.to_string().into_bytes()))))
|
||||
@ -559,95 +466,6 @@ fn float_lit(s: &str, suffix: Option<Symbol>, diag: Option<(Span, &Handler)>)
|
||||
filtered_float_lit(Symbol::intern(s), suffix, diag)
|
||||
}
|
||||
|
||||
/// Parses a string representing a byte literal into its final form. Similar to `char_lit`.
|
||||
fn byte_lit(lit: &str) -> (u8, usize) {
|
||||
let err = |i| format!("lexer accepted invalid byte literal {} step {}", lit, i);
|
||||
|
||||
if lit.len() == 1 {
|
||||
(lit.as_bytes()[0], 1)
|
||||
} else {
|
||||
assert_eq!(lit.as_bytes()[0], b'\\', "{}", err(0));
|
||||
let b = match lit.as_bytes()[1] {
|
||||
b'"' => b'"',
|
||||
b'n' => b'\n',
|
||||
b'r' => b'\r',
|
||||
b't' => b'\t',
|
||||
b'\\' => b'\\',
|
||||
b'\'' => b'\'',
|
||||
b'0' => b'\0',
|
||||
_ => {
|
||||
match u64::from_str_radix(&lit[2..4], 16).ok() {
|
||||
Some(c) =>
|
||||
if c > 0xFF {
|
||||
panic!(err(2))
|
||||
} else {
|
||||
return (c as u8, 4)
|
||||
},
|
||||
None => panic!(err(3))
|
||||
}
|
||||
}
|
||||
};
|
||||
(b, 2)
|
||||
}
|
||||
}
|
||||
|
||||
fn byte_str_lit(lit: &str) -> Lrc<Vec<u8>> {
|
||||
let mut res = Vec::with_capacity(lit.len());
|
||||
|
||||
let error = |i| panic!("lexer should have rejected {} at {}", lit, i);
|
||||
|
||||
/// Eat everything up to a non-whitespace.
|
||||
fn eat<I: Iterator<Item=(usize, u8)>>(it: &mut iter::Peekable<I>) {
|
||||
loop {
|
||||
match it.peek().map(|x| x.1) {
|
||||
Some(b' ') | Some(b'\n') | Some(b'\r') | Some(b'\t') => {
|
||||
it.next();
|
||||
},
|
||||
_ => { break; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// byte string literals *must* be ASCII, but the escapes don't have to be
|
||||
let mut chars = lit.bytes().enumerate().peekable();
|
||||
loop {
|
||||
match chars.next() {
|
||||
Some((i, b'\\')) => {
|
||||
match chars.peek().unwrap_or_else(|| error(i)).1 {
|
||||
b'\n' => eat(&mut chars),
|
||||
b'\r' => {
|
||||
chars.next();
|
||||
if chars.peek().unwrap_or_else(|| error(i)).1 != b'\n' {
|
||||
panic!("lexer accepted bare CR");
|
||||
}
|
||||
eat(&mut chars);
|
||||
}
|
||||
_ => {
|
||||
// otherwise, a normal escape
|
||||
let (c, n) = byte_lit(&lit[i..]);
|
||||
// we don't need to move past the first \
|
||||
for _ in 0..n - 1 {
|
||||
chars.next();
|
||||
}
|
||||
res.push(c);
|
||||
}
|
||||
}
|
||||
},
|
||||
Some((i, b'\r')) => {
|
||||
if chars.peek().unwrap_or_else(|| error(i)).1 != b'\n' {
|
||||
panic!("lexer accepted bare CR");
|
||||
}
|
||||
chars.next();
|
||||
res.push(b'\n');
|
||||
}
|
||||
Some((_, c)) => res.push(c),
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
|
||||
Lrc::new(res)
|
||||
}
|
||||
|
||||
fn integer_lit(s: &str, suffix: Option<Symbol>, diag: Option<(Span, &Handler)>)
|
||||
-> Option<ast::LitKind> {
|
||||
// s can only be ascii, byte indexing is fine
|
||||
|
515
src/libsyntax/parse/unescape.rs
Normal file
515
src/libsyntax/parse/unescape.rs
Normal file
@ -0,0 +1,515 @@
|
||||
//! Utilities for validating string and char literals and turning them into
|
||||
//! values they represent.
|
||||
|
||||
use std::str::Chars;
|
||||
use std::ops::Range;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub(crate) enum EscapeError {
|
||||
ZeroChars,
|
||||
MoreThanOneChar,
|
||||
|
||||
LoneSlash,
|
||||
InvalidEscape,
|
||||
BareCarriageReturn,
|
||||
EscapeOnlyChar,
|
||||
|
||||
TooShortHexEscape,
|
||||
InvalidCharInHexEscape,
|
||||
OutOfRangeHexEscape,
|
||||
|
||||
NoBraceInUnicodeEscape,
|
||||
InvalidCharInUnicodeEscape,
|
||||
EmptyUnicodeEscape,
|
||||
UnclosedUnicodeEscape,
|
||||
LeadingUnderscoreUnicodeEscape,
|
||||
OverlongUnicodeEscape,
|
||||
LoneSurrogateUnicodeEscape,
|
||||
OutOfRangeUnicodeEscape,
|
||||
|
||||
UnicodeEscapeInByte,
|
||||
NonAsciiCharInByte,
|
||||
}
|
||||
|
||||
/// Takes a contents of a char literal (without quotes), and returns an
|
||||
/// unescaped char or an error
|
||||
pub(crate) fn unescape_char(literal_text: &str) -> Result<char, (usize, EscapeError)> {
|
||||
let mut chars = literal_text.chars();
|
||||
unescape_char_or_byte(&mut chars, Mode::Char)
|
||||
.map_err(|err| (literal_text.len() - chars.as_str().len(), err))
|
||||
}
|
||||
|
||||
/// Takes a contents of a string literal (without quotes) and produces a
|
||||
/// sequence of escaped characters or errors.
|
||||
pub(crate) fn unescape_str<F>(literal_text: &str, callback: &mut F)
|
||||
where
|
||||
F: FnMut(Range<usize>, Result<char, EscapeError>),
|
||||
{
|
||||
unescape_str_or_byte_str(literal_text, Mode::Str, callback)
|
||||
}
|
||||
|
||||
pub(crate) fn unescape_byte(literal_text: &str) -> Result<u8, (usize, EscapeError)> {
|
||||
let mut chars = literal_text.chars();
|
||||
unescape_char_or_byte(&mut chars, Mode::Byte)
|
||||
.map(byte_from_char)
|
||||
.map_err(|err| (literal_text.len() - chars.as_str().len(), err))
|
||||
}
|
||||
|
||||
/// Takes a contents of a string literal (without quotes) and produces a
|
||||
/// sequence of escaped characters or errors.
|
||||
pub(crate) fn unescape_byte_str<F>(literal_text: &str, callback: &mut F)
|
||||
where
|
||||
F: FnMut(Range<usize>, Result<u8, EscapeError>),
|
||||
{
|
||||
unescape_str_or_byte_str(literal_text, Mode::ByteStr, &mut |range, char| {
|
||||
callback(range, char.map(byte_from_char))
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub(crate) enum Mode {
|
||||
Char,
|
||||
Str,
|
||||
Byte,
|
||||
ByteStr,
|
||||
}
|
||||
|
||||
impl Mode {
|
||||
fn in_single_quotes(self) -> bool {
|
||||
match self {
|
||||
Mode::Char | Mode::Byte => true,
|
||||
Mode::Str | Mode::ByteStr => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn in_double_quotes(self) -> bool {
|
||||
!self.in_single_quotes()
|
||||
}
|
||||
|
||||
pub(crate) fn is_bytes(self) -> bool {
|
||||
match self {
|
||||
Mode::Byte | Mode::ByteStr => true,
|
||||
Mode::Char | Mode::Str => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
|
||||
if first_char != '\\' {
|
||||
return match first_char {
|
||||
'\t' | '\n' => Err(EscapeError::EscapeOnlyChar),
|
||||
'\r' => Err(if chars.clone().next() == Some('\n') {
|
||||
EscapeError::EscapeOnlyChar
|
||||
} else {
|
||||
EscapeError::BareCarriageReturn
|
||||
}),
|
||||
'\'' if mode.in_single_quotes() => Err(EscapeError::EscapeOnlyChar),
|
||||
'"' if mode.in_double_quotes() => Err(EscapeError::EscapeOnlyChar),
|
||||
_ => {
|
||||
if mode.is_bytes() && !first_char.is_ascii() {
|
||||
return Err(EscapeError::NonAsciiCharInByte);
|
||||
}
|
||||
Ok(first_char)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
let second_char = chars.next().ok_or(EscapeError::LoneSlash)?;
|
||||
|
||||
let res = match second_char {
|
||||
'"' => '"',
|
||||
'n' => '\n',
|
||||
'r' => '\r',
|
||||
't' => '\t',
|
||||
'\\' => '\\',
|
||||
'\'' => '\'',
|
||||
'0' => '\0',
|
||||
|
||||
'x' => {
|
||||
let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
|
||||
let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
|
||||
|
||||
let lo = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
|
||||
let lo = lo.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
|
||||
|
||||
let value = hi * 16 + lo;
|
||||
|
||||
if !mode.is_bytes() && !is_ascii(value) {
|
||||
return Err(EscapeError::OutOfRangeHexEscape);
|
||||
}
|
||||
let value = value as u8;
|
||||
|
||||
value as char
|
||||
}
|
||||
|
||||
'u' => {
|
||||
if chars.next() != Some('{') {
|
||||
return Err(EscapeError::NoBraceInUnicodeEscape);
|
||||
}
|
||||
|
||||
let mut n_digits = 1;
|
||||
let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
|
||||
'_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
|
||||
'}' => return Err(EscapeError::EmptyUnicodeEscape),
|
||||
c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?,
|
||||
};
|
||||
|
||||
loop {
|
||||
match chars.next() {
|
||||
None => return Err(EscapeError::UnclosedUnicodeEscape),
|
||||
Some('_') => continue,
|
||||
Some('}') => {
|
||||
if n_digits > 6 {
|
||||
return Err(EscapeError::OverlongUnicodeEscape);
|
||||
}
|
||||
if mode.is_bytes() {
|
||||
return Err(EscapeError::UnicodeEscapeInByte);
|
||||
}
|
||||
|
||||
break std::char::from_u32(value).ok_or_else(|| {
|
||||
if value > 0x10FFFF {
|
||||
EscapeError::OutOfRangeUnicodeEscape
|
||||
} else {
|
||||
EscapeError::LoneSurrogateUnicodeEscape
|
||||
}
|
||||
})?;
|
||||
}
|
||||
Some(c) => {
|
||||
let digit = c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
|
||||
n_digits += 1;
|
||||
if n_digits > 6 {
|
||||
continue;
|
||||
}
|
||||
let digit = digit as u32;
|
||||
value = value * 16 + digit;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
_ => return Err(EscapeError::InvalidEscape),
|
||||
};
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
|
||||
let first_char = chars.next().ok_or(EscapeError::ZeroChars)?;
|
||||
let res = scan_escape(first_char, chars, mode)?;
|
||||
if chars.next().is_some() {
|
||||
return Err(EscapeError::MoreThanOneChar);
|
||||
}
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
/// Takes a contents of a string literal (without quotes) and produces a
|
||||
/// sequence of escaped characters or errors.
|
||||
fn unescape_str_or_byte_str<F>(src: &str, mode: Mode, callback: &mut F)
|
||||
where
|
||||
F: FnMut(Range<usize>, Result<char, EscapeError>),
|
||||
{
|
||||
assert!(mode.in_double_quotes());
|
||||
let initial_len = src.len();
|
||||
let mut chars = src.chars();
|
||||
while let Some(first_char) = chars.next() {
|
||||
let start = initial_len - chars.as_str().len() - first_char.len_utf8();
|
||||
|
||||
let unescaped_char = match first_char {
|
||||
'\\' => {
|
||||
let (second_char, third_char) = {
|
||||
let mut chars = chars.clone();
|
||||
(chars.next(), chars.next())
|
||||
};
|
||||
match (second_char, third_char) {
|
||||
(Some('\n'), _) | (Some('\r'), Some('\n')) => {
|
||||
skip_ascii_whitespace(&mut chars);
|
||||
continue;
|
||||
}
|
||||
_ => scan_escape(first_char, &mut chars, mode),
|
||||
}
|
||||
}
|
||||
'\r' => {
|
||||
let second_char = chars.clone().next();
|
||||
if second_char == Some('\n') {
|
||||
chars.next();
|
||||
Ok('\n')
|
||||
} else {
|
||||
scan_escape(first_char, &mut chars, mode)
|
||||
}
|
||||
}
|
||||
'\n' => Ok('\n'),
|
||||
'\t' => Ok('\t'),
|
||||
_ => scan_escape(first_char, &mut chars, mode),
|
||||
};
|
||||
let end = initial_len - chars.as_str().len();
|
||||
callback(start..end, unescaped_char);
|
||||
}
|
||||
|
||||
fn skip_ascii_whitespace(chars: &mut Chars<'_>) {
|
||||
let str = chars.as_str();
|
||||
let first_non_space = str
|
||||
.bytes()
|
||||
.position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
|
||||
.unwrap_or(str.len());
|
||||
*chars = str[first_non_space..].chars()
|
||||
}
|
||||
}
|
||||
|
||||
fn byte_from_char(c: char) -> u8 {
|
||||
let res = c as u32;
|
||||
assert!(res <= u8::max_value() as u32, "guaranteed because of Mode::Byte");
|
||||
res as u8
|
||||
}
|
||||
|
||||
fn is_ascii(x: u32) -> bool {
|
||||
x <= 0x7F
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_unescape_char_bad() {
|
||||
fn check(literal_text: &str, expected_error: EscapeError) {
|
||||
let actual_result = unescape_char(literal_text).map_err(|(_offset, err)| err);
|
||||
assert_eq!(actual_result, Err(expected_error));
|
||||
}
|
||||
|
||||
check("", EscapeError::ZeroChars);
|
||||
check(r"\", EscapeError::LoneSlash);
|
||||
|
||||
check("\n", EscapeError::EscapeOnlyChar);
|
||||
check("\r\n", EscapeError::EscapeOnlyChar);
|
||||
check("\t", EscapeError::EscapeOnlyChar);
|
||||
check("'", EscapeError::EscapeOnlyChar);
|
||||
check("\r", EscapeError::BareCarriageReturn);
|
||||
|
||||
check("spam", EscapeError::MoreThanOneChar);
|
||||
check(r"\x0ff", EscapeError::MoreThanOneChar);
|
||||
check(r#"\"a"#, EscapeError::MoreThanOneChar);
|
||||
check(r"\na", EscapeError::MoreThanOneChar);
|
||||
check(r"\ra", EscapeError::MoreThanOneChar);
|
||||
check(r"\ta", EscapeError::MoreThanOneChar);
|
||||
check(r"\\a", EscapeError::MoreThanOneChar);
|
||||
check(r"\'a", EscapeError::MoreThanOneChar);
|
||||
check(r"\0a", EscapeError::MoreThanOneChar);
|
||||
check(r"\u{0}x", EscapeError::MoreThanOneChar);
|
||||
check(r"\u{1F63b}}", EscapeError::MoreThanOneChar);
|
||||
|
||||
check(r"\v", EscapeError::InvalidEscape);
|
||||
check(r"\💩", EscapeError::InvalidEscape);
|
||||
check(r"\●", EscapeError::InvalidEscape);
|
||||
|
||||
check(r"\x", EscapeError::TooShortHexEscape);
|
||||
check(r"\x0", EscapeError::TooShortHexEscape);
|
||||
check(r"\xf", EscapeError::TooShortHexEscape);
|
||||
check(r"\xa", EscapeError::TooShortHexEscape);
|
||||
check(r"\xx", EscapeError::InvalidCharInHexEscape);
|
||||
check(r"\xы", EscapeError::InvalidCharInHexEscape);
|
||||
check(r"\x🦀", EscapeError::InvalidCharInHexEscape);
|
||||
check(r"\xtt", EscapeError::InvalidCharInHexEscape);
|
||||
check(r"\xff", EscapeError::OutOfRangeHexEscape);
|
||||
check(r"\xFF", EscapeError::OutOfRangeHexEscape);
|
||||
check(r"\x80", EscapeError::OutOfRangeHexEscape);
|
||||
|
||||
check(r"\u", EscapeError::NoBraceInUnicodeEscape);
|
||||
check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape);
|
||||
check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape);
|
||||
check(r"\u{", EscapeError::UnclosedUnicodeEscape);
|
||||
check(r"\u{0000", EscapeError::UnclosedUnicodeEscape);
|
||||
check(r"\u{}", EscapeError::EmptyUnicodeEscape);
|
||||
check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape);
|
||||
check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape);
|
||||
check(r"\u{FFFFFF}", EscapeError::OutOfRangeUnicodeEscape);
|
||||
check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape);
|
||||
check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape);
|
||||
|
||||
check(r"\u{DC00}", EscapeError::LoneSurrogateUnicodeEscape);
|
||||
check(r"\u{DDDD}", EscapeError::LoneSurrogateUnicodeEscape);
|
||||
check(r"\u{DFFF}", EscapeError::LoneSurrogateUnicodeEscape);
|
||||
|
||||
check(r"\u{D800}", EscapeError::LoneSurrogateUnicodeEscape);
|
||||
check(r"\u{DAAA}", EscapeError::LoneSurrogateUnicodeEscape);
|
||||
check(r"\u{DBFF}", EscapeError::LoneSurrogateUnicodeEscape);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unescape_char_good() {
|
||||
fn check(literal_text: &str, expected_char: char) {
|
||||
let actual_result = unescape_char(literal_text);
|
||||
assert_eq!(actual_result, Ok(expected_char));
|
||||
}
|
||||
|
||||
check("a", 'a');
|
||||
check("ы", 'ы');
|
||||
check("🦀", '🦀');
|
||||
|
||||
check(r#"\""#, '"');
|
||||
check(r"\n", '\n');
|
||||
check(r"\r", '\r');
|
||||
check(r"\t", '\t');
|
||||
check(r"\\", '\\');
|
||||
check(r"\'", '\'');
|
||||
check(r"\0", '\0');
|
||||
|
||||
check(r"\x00", '\0');
|
||||
check(r"\x5a", 'Z');
|
||||
check(r"\x5A", 'Z');
|
||||
check(r"\x7f", 127 as char);
|
||||
|
||||
check(r"\u{0}", '\0');
|
||||
check(r"\u{000000}", '\0');
|
||||
check(r"\u{41}", 'A');
|
||||
check(r"\u{0041}", 'A');
|
||||
check(r"\u{00_41}", 'A');
|
||||
check(r"\u{4__1__}", 'A');
|
||||
check(r"\u{1F63b}", '😻');
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unescape_str_good() {
|
||||
fn check(literal_text: &str, expected: &str) {
|
||||
let mut buf = Ok(String::with_capacity(literal_text.len()));
|
||||
unescape_str(literal_text, &mut |range, c| {
|
||||
if let Ok(b) = &mut buf {
|
||||
match c {
|
||||
Ok(c) => b.push(c),
|
||||
Err(e) => buf = Err((range, e)),
|
||||
}
|
||||
}
|
||||
});
|
||||
let buf = buf.as_ref().map(|it| it.as_ref());
|
||||
assert_eq!(buf, Ok(expected))
|
||||
}
|
||||
|
||||
check("foo", "foo");
|
||||
check("", "");
|
||||
check(" \t\n\r\n", " \t\n\n");
|
||||
|
||||
check("hello \\\n world", "hello world");
|
||||
check("hello \\\r\n world", "hello world");
|
||||
check("thread's", "thread's")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unescape_byte_bad() {
|
||||
fn check(literal_text: &str, expected_error: EscapeError) {
|
||||
let actual_result = unescape_byte(literal_text).map_err(|(_offset, err)| err);
|
||||
assert_eq!(actual_result, Err(expected_error));
|
||||
}
|
||||
|
||||
check("", EscapeError::ZeroChars);
|
||||
check(r"\", EscapeError::LoneSlash);
|
||||
|
||||
check("\n", EscapeError::EscapeOnlyChar);
|
||||
check("\r\n", EscapeError::EscapeOnlyChar);
|
||||
check("\t", EscapeError::EscapeOnlyChar);
|
||||
check("'", EscapeError::EscapeOnlyChar);
|
||||
check("\r", EscapeError::BareCarriageReturn);
|
||||
|
||||
check("spam", EscapeError::MoreThanOneChar);
|
||||
check(r"\x0ff", EscapeError::MoreThanOneChar);
|
||||
check(r#"\"a"#, EscapeError::MoreThanOneChar);
|
||||
check(r"\na", EscapeError::MoreThanOneChar);
|
||||
check(r"\ra", EscapeError::MoreThanOneChar);
|
||||
check(r"\ta", EscapeError::MoreThanOneChar);
|
||||
check(r"\\a", EscapeError::MoreThanOneChar);
|
||||
check(r"\'a", EscapeError::MoreThanOneChar);
|
||||
check(r"\0a", EscapeError::MoreThanOneChar);
|
||||
|
||||
check(r"\v", EscapeError::InvalidEscape);
|
||||
check(r"\💩", EscapeError::InvalidEscape);
|
||||
check(r"\●", EscapeError::InvalidEscape);
|
||||
|
||||
check(r"\x", EscapeError::TooShortHexEscape);
|
||||
check(r"\x0", EscapeError::TooShortHexEscape);
|
||||
check(r"\xa", EscapeError::TooShortHexEscape);
|
||||
check(r"\xf", EscapeError::TooShortHexEscape);
|
||||
check(r"\xx", EscapeError::InvalidCharInHexEscape);
|
||||
check(r"\xы", EscapeError::InvalidCharInHexEscape);
|
||||
check(r"\x🦀", EscapeError::InvalidCharInHexEscape);
|
||||
check(r"\xtt", EscapeError::InvalidCharInHexEscape);
|
||||
|
||||
check(r"\u", EscapeError::NoBraceInUnicodeEscape);
|
||||
check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape);
|
||||
check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape);
|
||||
check(r"\u{", EscapeError::UnclosedUnicodeEscape);
|
||||
check(r"\u{0000", EscapeError::UnclosedUnicodeEscape);
|
||||
check(r"\u{}", EscapeError::EmptyUnicodeEscape);
|
||||
check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape);
|
||||
check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape);
|
||||
|
||||
check("ы", EscapeError::NonAsciiCharInByte);
|
||||
check("🦀", EscapeError::NonAsciiCharInByte);
|
||||
|
||||
check(r"\u{0}", EscapeError::UnicodeEscapeInByte);
|
||||
check(r"\u{000000}", EscapeError::UnicodeEscapeInByte);
|
||||
check(r"\u{41}", EscapeError::UnicodeEscapeInByte);
|
||||
check(r"\u{0041}", EscapeError::UnicodeEscapeInByte);
|
||||
check(r"\u{00_41}", EscapeError::UnicodeEscapeInByte);
|
||||
check(r"\u{4__1__}", EscapeError::UnicodeEscapeInByte);
|
||||
check(r"\u{1F63b}", EscapeError::UnicodeEscapeInByte);
|
||||
check(r"\u{0}x", EscapeError::UnicodeEscapeInByte);
|
||||
check(r"\u{1F63b}}", EscapeError::UnicodeEscapeInByte);
|
||||
check(r"\u{FFFFFF}", EscapeError::UnicodeEscapeInByte);
|
||||
check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte);
|
||||
check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte);
|
||||
check(r"\u{DC00}", EscapeError::UnicodeEscapeInByte);
|
||||
check(r"\u{DDDD}", EscapeError::UnicodeEscapeInByte);
|
||||
check(r"\u{DFFF}", EscapeError::UnicodeEscapeInByte);
|
||||
check(r"\u{D800}", EscapeError::UnicodeEscapeInByte);
|
||||
check(r"\u{DAAA}", EscapeError::UnicodeEscapeInByte);
|
||||
check(r"\u{DBFF}", EscapeError::UnicodeEscapeInByte);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unescape_byte_good() {
|
||||
fn check(literal_text: &str, expected_byte: u8) {
|
||||
let actual_result = unescape_byte(literal_text);
|
||||
assert_eq!(actual_result, Ok(expected_byte));
|
||||
}
|
||||
|
||||
check("a", b'a');
|
||||
|
||||
check(r#"\""#, b'"');
|
||||
check(r"\n", b'\n');
|
||||
check(r"\r", b'\r');
|
||||
check(r"\t", b'\t');
|
||||
check(r"\\", b'\\');
|
||||
check(r"\'", b'\'');
|
||||
check(r"\0", b'\0');
|
||||
|
||||
check(r"\x00", b'\0');
|
||||
check(r"\x5a", b'Z');
|
||||
check(r"\x5A", b'Z');
|
||||
check(r"\x7f", 127);
|
||||
check(r"\x80", 128);
|
||||
check(r"\xff", 255);
|
||||
check(r"\xFF", 255);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unescape_byte_str_good() {
|
||||
fn check(literal_text: &str, expected: &[u8]) {
|
||||
let mut buf = Ok(Vec::with_capacity(literal_text.len()));
|
||||
unescape_byte_str(literal_text, &mut |range, c| {
|
||||
if let Ok(b) = &mut buf {
|
||||
match c {
|
||||
Ok(c) => b.push(c),
|
||||
Err(e) => buf = Err((range, e)),
|
||||
}
|
||||
}
|
||||
});
|
||||
let buf = buf.as_ref().map(|it| it.as_ref());
|
||||
assert_eq!(buf, Ok(expected))
|
||||
}
|
||||
|
||||
check("foo", b"foo");
|
||||
check("", b"");
|
||||
check(" \t\n\r\n", b" \t\n\n");
|
||||
|
||||
check("hello \\\n world", b"hello world");
|
||||
check("hello \\\r\n world", b"hello world");
|
||||
check("thread's", b"thread's")
|
||||
}
|
||||
}
|
200
src/libsyntax/parse/unescape_error_reporting.rs
Normal file
200
src/libsyntax/parse/unescape_error_reporting.rs
Normal file
@ -0,0 +1,200 @@
|
||||
//! Utilities for rendering escape sequence errors as diagnostics.
|
||||
|
||||
use std::ops::Range;
|
||||
use std::iter::once;
|
||||
|
||||
use syntax_pos::{Span, BytePos};
|
||||
|
||||
use crate::errors::{Handler, Applicability};
|
||||
|
||||
use super::unescape::{EscapeError, Mode};
|
||||
|
||||
pub(crate) fn emit_unescape_error(
|
||||
handler: &Handler,
|
||||
// interior part of the literal, without quotes
|
||||
lit: &str,
|
||||
// full span of the literal, including quotes
|
||||
span_with_quotes: Span,
|
||||
mode: Mode,
|
||||
// range of the error inside `lit`
|
||||
range: Range<usize>,
|
||||
error: EscapeError,
|
||||
) {
|
||||
log::debug!("emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}",
|
||||
lit, span_with_quotes, mode, range, error);
|
||||
let span = {
|
||||
let Range { start, end } = range;
|
||||
let (start, end) = (start as u32, end as u32);
|
||||
let lo = span_with_quotes.lo() + BytePos(start + 1);
|
||||
let hi = lo + BytePos(end - start);
|
||||
span_with_quotes
|
||||
.with_lo(lo)
|
||||
.with_hi(hi)
|
||||
};
|
||||
let last_char = || {
|
||||
let c = lit[range.clone()].chars().rev().next().unwrap();
|
||||
let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32));
|
||||
(c, span)
|
||||
};
|
||||
match error {
|
||||
EscapeError::LoneSurrogateUnicodeEscape => {
|
||||
handler.struct_span_err(span, "invalid unicode character escape")
|
||||
.help("unicode escape must not be a surrogate")
|
||||
.emit();
|
||||
}
|
||||
EscapeError::OutOfRangeUnicodeEscape => {
|
||||
handler.struct_span_err(span, "invalid unicode character escape")
|
||||
.help("unicode escape must be at most 10FFFF")
|
||||
.emit();
|
||||
}
|
||||
EscapeError::MoreThanOneChar => {
|
||||
handler
|
||||
.struct_span_err(
|
||||
span_with_quotes,
|
||||
"character literal may only contain one codepoint",
|
||||
)
|
||||
.span_suggestion(
|
||||
span_with_quotes,
|
||||
"if you meant to write a `str` literal, use double quotes",
|
||||
format!("\"{}\"", lit),
|
||||
Applicability::MachineApplicable,
|
||||
).emit()
|
||||
}
|
||||
EscapeError::EscapeOnlyChar => {
|
||||
let (c, _span) = last_char();
|
||||
|
||||
let mut msg = if mode.is_bytes() {
|
||||
"byte constant must be escaped: "
|
||||
} else {
|
||||
"character constant must be escaped: "
|
||||
}.to_string();
|
||||
push_escaped_char(&mut msg, c);
|
||||
|
||||
handler.span_err(span, msg.as_str())
|
||||
}
|
||||
EscapeError::BareCarriageReturn => {
|
||||
let msg = if mode.in_double_quotes() {
|
||||
"bare CR not allowed in string, use \\r instead"
|
||||
} else {
|
||||
"character constant must be escaped: \\r"
|
||||
};
|
||||
handler.span_err(span, msg);
|
||||
}
|
||||
EscapeError::InvalidEscape => {
|
||||
let (c, span) = last_char();
|
||||
|
||||
let label = if mode.is_bytes() {
|
||||
"unknown byte escape"
|
||||
} else {
|
||||
"unknown character escape"
|
||||
};
|
||||
let mut msg = label.to_string();
|
||||
msg.push_str(": ");
|
||||
push_escaped_char(&mut msg, c);
|
||||
|
||||
let mut diag = handler.struct_span_err(span, msg.as_str());
|
||||
diag.span_label(span, label);
|
||||
if c == '{' || c == '}' && !mode.is_bytes() {
|
||||
diag.help("if used in a formatting string, \
|
||||
curly braces are escaped with `{{` and `}}`");
|
||||
} else if c == '\r' {
|
||||
diag.help("this is an isolated carriage return; \
|
||||
consider checking your editor and version control settings");
|
||||
}
|
||||
diag.emit();
|
||||
}
|
||||
EscapeError::TooShortHexEscape => {
|
||||
handler.span_err(span, "numeric character escape is too short")
|
||||
}
|
||||
EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => {
|
||||
let (c, span) = last_char();
|
||||
|
||||
let mut msg = if error == EscapeError::InvalidCharInHexEscape {
|
||||
"invalid character in numeric character escape: "
|
||||
} else {
|
||||
"invalid character in unicode escape: "
|
||||
}.to_string();
|
||||
push_escaped_char(&mut msg, c);
|
||||
|
||||
handler.span_err(span, msg.as_str())
|
||||
}
|
||||
EscapeError::NonAsciiCharInByte => {
|
||||
assert!(mode.is_bytes());
|
||||
let (_c, span) = last_char();
|
||||
handler.span_err(span, "byte constant must be ASCII. \
|
||||
Use a \\xHH escape for a non-ASCII byte")
|
||||
}
|
||||
EscapeError::OutOfRangeHexEscape => {
|
||||
handler.span_err(span, "this form of character escape may only be used \
|
||||
with characters in the range [\\x00-\\x7f]")
|
||||
}
|
||||
EscapeError::LeadingUnderscoreUnicodeEscape => {
|
||||
let (_c, span) = last_char();
|
||||
handler.span_err(span, "invalid start of unicode escape")
|
||||
}
|
||||
EscapeError::OverlongUnicodeEscape => {
|
||||
handler.span_err(span, "overlong unicode escape (must have at most 6 hex digits)")
|
||||
}
|
||||
EscapeError::UnclosedUnicodeEscape => {
|
||||
handler.span_err(span, "unterminated unicode escape (needed a `}`)")
|
||||
}
|
||||
EscapeError::NoBraceInUnicodeEscape => {
|
||||
let msg = "incorrect unicode escape sequence";
|
||||
let mut diag = handler.struct_span_err(span, msg);
|
||||
|
||||
let mut suggestion = "\\u{".to_owned();
|
||||
let mut suggestion_len = 0;
|
||||
let (c, char_span) = last_char();
|
||||
let chars = once(c).chain(lit[range.end..].chars());
|
||||
for c in chars.take(6).take_while(|c| c.is_digit(16)) {
|
||||
suggestion.push(c);
|
||||
suggestion_len += c.len_utf8();
|
||||
}
|
||||
|
||||
if suggestion_len > 0 {
|
||||
suggestion.push('}');
|
||||
let lo = char_span.lo();
|
||||
let hi = lo + BytePos(suggestion_len as u32);
|
||||
diag.span_suggestion(
|
||||
span.with_lo(lo).with_hi(hi),
|
||||
"format of unicode escape sequences uses braces",
|
||||
suggestion,
|
||||
Applicability::MaybeIncorrect,
|
||||
);
|
||||
} else {
|
||||
diag.span_label(span, msg);
|
||||
diag.help(
|
||||
"format of unicode escape sequences is `\\u{...}`",
|
||||
);
|
||||
}
|
||||
|
||||
diag.emit();
|
||||
}
|
||||
EscapeError::UnicodeEscapeInByte => {
|
||||
handler.span_err(span, "unicode escape sequences cannot be used \
|
||||
as a byte or in a byte string")
|
||||
}
|
||||
EscapeError::EmptyUnicodeEscape => {
|
||||
handler.span_err(span, "empty unicode escape (must have at least 1 hex digit)")
|
||||
}
|
||||
EscapeError::ZeroChars => {
|
||||
handler.span_err(span, "empty character literal")
|
||||
}
|
||||
EscapeError::LoneSlash => {
|
||||
panic!("lexer accepted unterminated literal with trailing slash")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Pushes a character to a message string for error reporting
|
||||
pub(crate) fn push_escaped_char(msg: &mut String, c: char) {
|
||||
match c {
|
||||
'\u{20}'..='\u{7e}' => {
|
||||
// Don't escape \, ' or " for user-facing messages
|
||||
msg.push(c);
|
||||
}
|
||||
_ => {
|
||||
msg.extend(c.escape_default());
|
||||
}
|
||||
}
|
||||
}
|
@ -1,3 +1,4 @@
|
||||
// compile-flags: -Z continue-parse-after-error
|
||||
// ignore-tidy-tab
|
||||
|
||||
fn main() {
|
||||
@ -76,7 +77,7 @@ raw { \n
|
||||
|
||||
println!("\x7B}\u8 {", 1);
|
||||
//~^ ERROR incorrect unicode escape sequence
|
||||
//~| ERROR argument never used
|
||||
//~| ERROR invalid format string: expected `'}'` but string was terminated
|
||||
|
||||
// note: raw strings don't escape `\xFF` and `\u{FF}` sequences
|
||||
println!(r#"\x7B}\u{8} {"#, 1);
|
||||
|
@ -1,13 +1,13 @@
|
||||
error: incorrect unicode escape sequence
|
||||
--> $DIR/format-string-error-2.rs:77:20
|
||||
--> $DIR/format-string-error-2.rs:78:20
|
||||
|
|
||||
LL | println!("\x7B}\u8 {", 1);
|
||||
| ^^-
|
||||
| |
|
||||
| help: format of unicode escape sequences uses braces: `\u{8}`
|
||||
| |
|
||||
| help: format of unicode escape sequences uses braces: `\u{8}`
|
||||
|
||||
error: invalid format string: expected `'}'`, found `'a'`
|
||||
--> $DIR/format-string-error-2.rs:5:5
|
||||
--> $DIR/format-string-error-2.rs:6:5
|
||||
|
|
||||
LL | format!("{
|
||||
| - because of this opening brace
|
||||
@ -17,7 +17,7 @@ LL | a");
|
||||
= note: if you intended to print `{`, you can escape it using `{{`
|
||||
|
||||
error: invalid format string: expected `'}'`, found `'b'`
|
||||
--> $DIR/format-string-error-2.rs:9:5
|
||||
--> $DIR/format-string-error-2.rs:10:5
|
||||
|
|
||||
LL | format!("{ \
|
||||
| - because of this opening brace
|
||||
@ -28,7 +28,7 @@ LL | b");
|
||||
= note: if you intended to print `{`, you can escape it using `{{`
|
||||
|
||||
error: invalid format string: expected `'}'`, found `'\'`
|
||||
--> $DIR/format-string-error-2.rs:11:18
|
||||
--> $DIR/format-string-error-2.rs:12:18
|
||||
|
|
||||
LL | format!(r#"{ \
|
||||
| - ^ expected `}` in format string
|
||||
@ -38,7 +38,7 @@ LL | format!(r#"{ \
|
||||
= note: if you intended to print `{`, you can escape it using `{{`
|
||||
|
||||
error: invalid format string: expected `'}'`, found `'\'`
|
||||
--> $DIR/format-string-error-2.rs:15:18
|
||||
--> $DIR/format-string-error-2.rs:16:18
|
||||
|
|
||||
LL | format!(r#"{ \n
|
||||
| - ^ expected `}` in format string
|
||||
@ -48,7 +48,7 @@ LL | format!(r#"{ \n
|
||||
= note: if you intended to print `{`, you can escape it using `{{`
|
||||
|
||||
error: invalid format string: expected `'}'`, found `'e'`
|
||||
--> $DIR/format-string-error-2.rs:21:5
|
||||
--> $DIR/format-string-error-2.rs:22:5
|
||||
|
|
||||
LL | format!("{ \n
|
||||
| - because of this opening brace
|
||||
@ -59,7 +59,7 @@ LL | e");
|
||||
= note: if you intended to print `{`, you can escape it using `{{`
|
||||
|
||||
error: invalid format string: expected `'}'`, found `'a'`
|
||||
--> $DIR/format-string-error-2.rs:25:5
|
||||
--> $DIR/format-string-error-2.rs:26:5
|
||||
|
|
||||
LL | {
|
||||
| - because of this opening brace
|
||||
@ -69,7 +69,7 @@ LL | a");
|
||||
= note: if you intended to print `{`, you can escape it using `{{`
|
||||
|
||||
error: invalid format string: expected `'}'`, found `'a'`
|
||||
--> $DIR/format-string-error-2.rs:29:5
|
||||
--> $DIR/format-string-error-2.rs:30:5
|
||||
|
|
||||
LL | {
|
||||
| - because of this opening brace
|
||||
@ -79,7 +79,7 @@ LL | a
|
||||
= note: if you intended to print `{`, you can escape it using `{{`
|
||||
|
||||
error: invalid format string: expected `'}'`, found `'b'`
|
||||
--> $DIR/format-string-error-2.rs:35:5
|
||||
--> $DIR/format-string-error-2.rs:36:5
|
||||
|
|
||||
LL | { \
|
||||
| - because of this opening brace
|
||||
@ -90,7 +90,7 @@ LL | b");
|
||||
= note: if you intended to print `{`, you can escape it using `{{`
|
||||
|
||||
error: invalid format string: expected `'}'`, found `'b'`
|
||||
--> $DIR/format-string-error-2.rs:40:5
|
||||
--> $DIR/format-string-error-2.rs:41:5
|
||||
|
|
||||
LL | { \
|
||||
| - because of this opening brace
|
||||
@ -101,7 +101,7 @@ LL | b \
|
||||
= note: if you intended to print `{`, you can escape it using `{{`
|
||||
|
||||
error: invalid format string: expected `'}'`, found `'\'`
|
||||
--> $DIR/format-string-error-2.rs:45:8
|
||||
--> $DIR/format-string-error-2.rs:46:8
|
||||
|
|
||||
LL | raw { \
|
||||
| - ^ expected `}` in format string
|
||||
@ -111,7 +111,7 @@ LL | raw { \
|
||||
= note: if you intended to print `{`, you can escape it using `{{`
|
||||
|
||||
error: invalid format string: expected `'}'`, found `'\'`
|
||||
--> $DIR/format-string-error-2.rs:50:8
|
||||
--> $DIR/format-string-error-2.rs:51:8
|
||||
|
|
||||
LL | raw { \n
|
||||
| - ^ expected `}` in format string
|
||||
@ -121,7 +121,7 @@ LL | raw { \n
|
||||
= note: if you intended to print `{`, you can escape it using `{{`
|
||||
|
||||
error: invalid format string: expected `'}'`, found `'e'`
|
||||
--> $DIR/format-string-error-2.rs:57:5
|
||||
--> $DIR/format-string-error-2.rs:58:5
|
||||
|
|
||||
LL | { \n
|
||||
| - because of this opening brace
|
||||
@ -132,7 +132,7 @@ LL | e");
|
||||
= note: if you intended to print `{`, you can escape it using `{{`
|
||||
|
||||
error: invalid format string: expected `'}'`, found `'a'`
|
||||
--> $DIR/format-string-error-2.rs:67:5
|
||||
--> $DIR/format-string-error-2.rs:68:5
|
||||
|
|
||||
LL | {
|
||||
| - because of this opening brace
|
||||
@ -142,13 +142,13 @@ LL | asdf}
|
||||
= note: if you intended to print `{`, you can escape it using `{{`
|
||||
|
||||
error: 1 positional argument in format string, but no arguments were given
|
||||
--> $DIR/format-string-error-2.rs:70:17
|
||||
--> $DIR/format-string-error-2.rs:71:17
|
||||
|
|
||||
LL | println!("\t{}");
|
||||
| ^^
|
||||
|
||||
error: invalid format string: expected `'}'` but string was terminated
|
||||
--> $DIR/format-string-error-2.rs:74:27
|
||||
--> $DIR/format-string-error-2.rs:75:27
|
||||
|
|
||||
LL | println!("\x7B}\u{8} {", 1);
|
||||
| -^ expected `'}'` in format string
|
||||
@ -157,16 +157,18 @@ LL | println!("\x7B}\u{8} {", 1);
|
||||
|
|
||||
= note: if you intended to print `{`, you can escape it using `{{`
|
||||
|
||||
error: argument never used
|
||||
--> $DIR/format-string-error-2.rs:77:28
|
||||
error: invalid format string: expected `'}'` but string was terminated
|
||||
--> $DIR/format-string-error-2.rs:78:27
|
||||
|
|
||||
LL | println!("\x7B}\u8 {", 1);
|
||||
| ------------ ^ argument never used
|
||||
| |
|
||||
| formatting specifier missing
|
||||
| -^ expected `'}'` in format string
|
||||
| |
|
||||
| because of this opening brace
|
||||
|
|
||||
= note: if you intended to print `{`, you can escape it using `{{`
|
||||
|
||||
error: invalid format string: unmatched `}` found
|
||||
--> $DIR/format-string-error-2.rs:82:21
|
||||
--> $DIR/format-string-error-2.rs:83:21
|
||||
|
|
||||
LL | println!(r#"\x7B}\u{8} {"#, 1);
|
||||
| ^ unmatched `}` in format string
|
||||
@ -174,7 +176,7 @@ LL | println!(r#"\x7B}\u{8} {"#, 1);
|
||||
= note: if you intended to print `}`, you can escape it using `}}`
|
||||
|
||||
error: invalid format string: unmatched `}` found
|
||||
--> $DIR/format-string-error-2.rs:85:21
|
||||
--> $DIR/format-string-error-2.rs:86:21
|
||||
|
|
||||
LL | println!(r#"\x7B}\u8 {"#, 1);
|
||||
| ^ unmatched `}` in format string
|
||||
|
@ -1,20 +1,20 @@
|
||||
error: this form of character escape may only be used with characters in the range [\x00-\x7f]
|
||||
--> $DIR/ascii-only-character-escape.rs:4:16
|
||||
--> $DIR/ascii-only-character-escape.rs:4:14
|
||||
|
|
||||
LL | let x = "\x80";
|
||||
| ^^
|
||||
| ^^^^
|
||||
|
||||
error: this form of character escape may only be used with characters in the range [\x00-\x7f]
|
||||
--> $DIR/ascii-only-character-escape.rs:5:16
|
||||
--> $DIR/ascii-only-character-escape.rs:5:14
|
||||
|
|
||||
LL | let y = "\xff";
|
||||
| ^^
|
||||
| ^^^^
|
||||
|
||||
error: this form of character escape may only be used with characters in the range [\x00-\x7f]
|
||||
--> $DIR/ascii-only-character-escape.rs:6:16
|
||||
--> $DIR/ascii-only-character-escape.rs:6:14
|
||||
|
|
||||
LL | let z = "\xe2";
|
||||
| ^^
|
||||
| ^^^^
|
||||
|
||||
error: aborting due to 3 previous errors
|
||||
|
||||
|
@ -34,11 +34,11 @@ error: byte constant must be ASCII. Use a \xHH escape for a non-ASCII byte
|
||||
LL | b'é';
|
||||
| ^
|
||||
|
||||
error: unterminated byte constant: b'a
|
||||
--> $DIR/byte-literals.rs:14:5
|
||||
error: unterminated byte constant
|
||||
--> $DIR/byte-literals.rs:14:6
|
||||
|
|
||||
LL | b'a
|
||||
| ^^^
|
||||
| ^^^^
|
||||
|
||||
error: aborting due to 7 previous errors
|
||||
|
||||
|
@ -23,10 +23,10 @@ LL | b"é";
|
||||
| ^
|
||||
|
||||
error: unterminated double quote byte string
|
||||
--> $DIR/byte-string-literals.rs:9:7
|
||||
--> $DIR/byte-string-literals.rs:9:6
|
||||
|
|
||||
LL | b"a
|
||||
| _______^
|
||||
| ______^
|
||||
LL | | }
|
||||
| |__^
|
||||
|
||||
|
@ -9,32 +9,27 @@ fn main() {
|
||||
|
||||
let _ = b'\u';
|
||||
//~^ ERROR incorrect unicode escape sequence
|
||||
//~^^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
|
||||
|
||||
let _ = b'\x5';
|
||||
//~^ ERROR numeric character escape is too short
|
||||
|
||||
let _ = b'\xxy';
|
||||
//~^ ERROR invalid character in numeric character escape: x
|
||||
//~^^ ERROR invalid character in numeric character escape: y
|
||||
|
||||
let _ = '\x5';
|
||||
//~^ ERROR numeric character escape is too short
|
||||
|
||||
let _ = '\xxy';
|
||||
//~^ ERROR invalid character in numeric character escape: x
|
||||
//~^^ ERROR invalid character in numeric character escape: y
|
||||
|
||||
let _ = b"\u{a4a4} \xf \u";
|
||||
//~^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
|
||||
//~^^ ERROR invalid character in numeric character escape:
|
||||
//~^^^ ERROR incorrect unicode escape sequence
|
||||
//~^^^^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
|
||||
|
||||
let _ = "\xf \u";
|
||||
//~^ ERROR invalid character in numeric character escape:
|
||||
//~^^ ERROR form of character escape may only be used with characters in the range [\x00-\x7f]
|
||||
//~^^^ ERROR incorrect unicode escape sequence
|
||||
//~^^ ERROR incorrect unicode escape sequence
|
||||
|
||||
let _ = "\u8f";
|
||||
//~^ ERROR incorrect unicode escape sequence
|
||||
|
@ -18,88 +18,58 @@ LL | let _ = b'\u';
|
||||
|
|
||||
= help: format of unicode escape sequences is `\u{...}`
|
||||
|
||||
error: unicode escape sequences cannot be used as a byte or in a byte string
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:10:15
|
||||
|
|
||||
LL | let _ = b'\u';
|
||||
| ^^
|
||||
|
||||
error: numeric character escape is too short
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:14:17
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:13:15
|
||||
|
|
||||
LL | let _ = b'\x5';
|
||||
| ^
|
||||
| ^^^
|
||||
|
||||
error: invalid character in numeric character escape: x
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:17:17
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:16:17
|
||||
|
|
||||
LL | let _ = b'\xxy';
|
||||
| ^
|
||||
|
||||
error: invalid character in numeric character escape: y
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:17:18
|
||||
|
|
||||
LL | let _ = b'\xxy';
|
||||
| ^
|
||||
|
||||
error: numeric character escape is too short
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:21:16
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:19:14
|
||||
|
|
||||
LL | let _ = '\x5';
|
||||
| ^
|
||||
| ^^^
|
||||
|
||||
error: invalid character in numeric character escape: x
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:24:16
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:22:16
|
||||
|
|
||||
LL | let _ = '\xxy';
|
||||
| ^
|
||||
|
||||
error: invalid character in numeric character escape: y
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:24:17
|
||||
|
|
||||
LL | let _ = '\xxy';
|
||||
| ^
|
||||
|
||||
error: unicode escape sequences cannot be used as a byte or in a byte string
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:28:15
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:25:15
|
||||
|
|
||||
LL | let _ = b"\u{a4a4} \xf \u";
|
||||
| ^^^^^^^^
|
||||
|
||||
error: invalid character in numeric character escape:
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:28:27
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:25:27
|
||||
|
|
||||
LL | let _ = b"\u{a4a4} \xf \u";
|
||||
| ^
|
||||
|
||||
error: incorrect unicode escape sequence
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:28:28
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:25:28
|
||||
|
|
||||
LL | let _ = b"\u{a4a4} \xf \u";
|
||||
| ^^ incorrect unicode escape sequence
|
||||
|
|
||||
= help: format of unicode escape sequences is `\u{...}`
|
||||
|
||||
error: unicode escape sequences cannot be used as a byte or in a byte string
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:28:28
|
||||
|
|
||||
LL | let _ = b"\u{a4a4} \xf \u";
|
||||
| ^^
|
||||
|
||||
error: invalid character in numeric character escape:
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:34:17
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:30:17
|
||||
|
|
||||
LL | let _ = "\xf \u";
|
||||
| ^
|
||||
|
||||
error: this form of character escape may only be used with characters in the range [\x00-\x7f]
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:34:16
|
||||
|
|
||||
LL | let _ = "\xf \u";
|
||||
| ^^
|
||||
|
||||
error: incorrect unicode escape sequence
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:34:18
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:30:18
|
||||
|
|
||||
LL | let _ = "\xf \u";
|
||||
| ^^ incorrect unicode escape sequence
|
||||
@ -107,12 +77,12 @@ LL | let _ = "\xf \u";
|
||||
= help: format of unicode escape sequences is `\u{...}`
|
||||
|
||||
error: incorrect unicode escape sequence
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:39:14
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:34:14
|
||||
|
|
||||
LL | let _ = "\u8f";
|
||||
| ^^--
|
||||
| |
|
||||
| help: format of unicode escape sequences uses braces: `\u{8f}`
|
||||
| |
|
||||
| help: format of unicode escape sequences uses braces: `\u{8f}`
|
||||
|
||||
error: aborting due to 18 previous errors
|
||||
error: aborting due to 13 previous errors
|
||||
|
||||
|
@ -1,14 +1,14 @@
|
||||
error: numeric character escape is too short
|
||||
--> $DIR/lex-bad-char-literals-1.rs:3:8
|
||||
--> $DIR/lex-bad-char-literals-1.rs:3:6
|
||||
|
|
||||
LL | '\x1'
|
||||
| ^
|
||||
| ^^^
|
||||
|
||||
error: numeric character escape is too short
|
||||
--> $DIR/lex-bad-char-literals-1.rs:7:8
|
||||
--> $DIR/lex-bad-char-literals-1.rs:7:6
|
||||
|
|
||||
LL | "\x1"
|
||||
| ^
|
||||
| ^^^
|
||||
|
||||
error: unknown character escape: \u{25cf}
|
||||
--> $DIR/lex-bad-char-literals-1.rs:11:7
|
||||
|
@ -3,6 +3,10 @@ error: character literal may only contain one codepoint
|
||||
|
|
||||
LL | 'nope'
|
||||
| ^^^^^^
|
||||
help: if you meant to write a `str` literal, use double quotes
|
||||
|
|
||||
LL | "nope"
|
||||
| ^^^^^^
|
||||
|
||||
error[E0601]: `main` function not found in crate `lex_bad_char_literals_2`
|
||||
|
|
||||
|
@ -1,5 +1,5 @@
|
||||
//
|
||||
// This test needs to the last one appearing in this file as it kills the parser
|
||||
static c: char =
|
||||
'● //~ ERROR: character literal may only contain one codepoint
|
||||
'● //~ ERROR: unterminated character literal
|
||||
;
|
||||
|
@ -1,8 +1,8 @@
|
||||
error: character literal may only contain one codepoint: '●
|
||||
error: unterminated character literal
|
||||
--> $DIR/lex-bad-char-literals-4.rs:4:5
|
||||
|
|
||||
LL | '●
|
||||
| ^^
|
||||
| ^^^^
|
||||
|
||||
error: aborting due to previous error
|
||||
|
||||
|
@ -3,18 +3,30 @@ error: character literal may only contain one codepoint
|
||||
|
|
||||
LL | let x: &str = 'ab';
|
||||
| ^^^^
|
||||
help: if you meant to write a `str` literal, use double quotes
|
||||
|
|
||||
LL | let x: &str = "ab";
|
||||
| ^^^^
|
||||
|
||||
error: character literal may only contain one codepoint
|
||||
--> $DIR/lex-bad-char-literals-6.rs:4:19
|
||||
|
|
||||
LL | let y: char = 'cd';
|
||||
| ^^^^
|
||||
help: if you meant to write a `str` literal, use double quotes
|
||||
|
|
||||
LL | let y: char = "cd";
|
||||
| ^^^^
|
||||
|
||||
error: character literal may only contain one codepoint
|
||||
--> $DIR/lex-bad-char-literals-6.rs:6:13
|
||||
|
|
||||
LL | let z = 'ef';
|
||||
| ^^^^
|
||||
help: if you meant to write a `str` literal, use double quotes
|
||||
|
|
||||
LL | let z = "ef";
|
||||
| ^^^^
|
||||
|
||||
error[E0277]: can't compare `&str` with `char`
|
||||
--> $DIR/lex-bad-char-literals-6.rs:9:10
|
||||
|
14
src/test/ui/parser/lex-bad-char-literals-7.rs
Normal file
14
src/test/ui/parser/lex-bad-char-literals-7.rs
Normal file
@ -0,0 +1,14 @@
|
||||
// compile-flags: -Z continue-parse-after-error
|
||||
fn main() {
|
||||
let _: char = '';
|
||||
//~^ ERROR: empty character literal
|
||||
let _: char = '\u{}';
|
||||
//~^ ERROR: empty unicode escape (must have at least 1 hex digit)
|
||||
|
||||
// Next two are OK, but may befool error recovery
|
||||
let _ = '/';
|
||||
let _ = b'/';
|
||||
|
||||
let _ = ' hello // here's a comment
|
||||
//~^ ERROR: unterminated character literal
|
||||
}
|
20
src/test/ui/parser/lex-bad-char-literals-7.stderr
Normal file
20
src/test/ui/parser/lex-bad-char-literals-7.stderr
Normal file
@ -0,0 +1,20 @@
|
||||
error: empty character literal
|
||||
--> $DIR/lex-bad-char-literals-7.rs:3:20
|
||||
|
|
||||
LL | let _: char = '';
|
||||
| ^
|
||||
|
||||
error: empty unicode escape (must have at least 1 hex digit)
|
||||
--> $DIR/lex-bad-char-literals-7.rs:5:20
|
||||
|
|
||||
LL | let _: char = '\u{}';
|
||||
| ^^^^
|
||||
|
||||
error: unterminated character literal
|
||||
--> $DIR/lex-bad-char-literals-7.rs:12:13
|
||||
|
|
||||
LL | let _ = ' hello // here's a comment
|
||||
| ^^^^^^^^
|
||||
|
||||
error: aborting due to 3 previous errors
|
||||
|
@ -0,0 +1,10 @@
|
||||
macro_rules! black_hole {
|
||||
($($tt:tt)*) => {}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
black_hole! { '\u{FFFFFF}' }
|
||||
//~^ ERROR: invalid unicode character escape
|
||||
black_hole! { "this is surrogate: \u{DAAA}" }
|
||||
//~^ ERROR: invalid unicode character escape
|
||||
}
|
@ -0,0 +1,18 @@
|
||||
error: invalid unicode character escape
|
||||
--> $DIR/literals-are-validated-before-expansion.rs:6:20
|
||||
|
|
||||
LL | black_hole! { '\u{FFFFFF}' }
|
||||
| ^^^^^^^^^^
|
||||
|
|
||||
= help: unicode escape must be at most 10FFFF
|
||||
|
||||
error: invalid unicode character escape
|
||||
--> $DIR/literals-are-validated-before-expansion.rs:8:39
|
||||
|
|
||||
LL | black_hole! { "this is surrogate: \u{DAAA}" }
|
||||
| ^^^^^^^^
|
||||
|
|
||||
= help: unicode escape must not be a surrogate
|
||||
|
||||
error: aborting due to 2 previous errors
|
||||
|
@ -1,8 +1,8 @@
|
||||
error: unterminated unicode escape (needed a `}`)
|
||||
--> $DIR/new-unicode-escapes-1.rs:2:21
|
||||
--> $DIR/new-unicode-escapes-1.rs:2:14
|
||||
|
|
||||
LL | let s = "\u{2603";
|
||||
| ^
|
||||
| ^^^^^^^
|
||||
|
||||
error: aborting due to previous error
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
error: overlong unicode escape (must have at most 6 hex digits)
|
||||
--> $DIR/new-unicode-escapes-2.rs:2:17
|
||||
--> $DIR/new-unicode-escapes-2.rs:2:14
|
||||
|
|
||||
LL | let s = "\u{260311111111}";
|
||||
| ^^^^^^^^^^^^
|
||||
| ^^^^^^^^^^^^^^^^
|
||||
|
||||
error: aborting due to previous error
|
||||
|
||||
|
@ -1,16 +1,16 @@
|
||||
error: invalid unicode character escape
|
||||
--> $DIR/new-unicode-escapes-3.rs:2:14
|
||||
--> $DIR/new-unicode-escapes-3.rs:2:15
|
||||
|
|
||||
LL | let s1 = "\u{d805}";
|
||||
| ^^^^^^^^^^
|
||||
| ^^^^^^^^
|
||||
|
|
||||
= help: unicode escape must not be a surrogate
|
||||
|
||||
error: invalid unicode character escape
|
||||
--> $DIR/new-unicode-escapes-3.rs:3:14
|
||||
--> $DIR/new-unicode-escapes-3.rs:3:15
|
||||
|
|
||||
LL | let s2 = "\u{ffffff}";
|
||||
| ^^^^^^^^^^^^
|
||||
| ^^^^^^^^^^
|
||||
|
|
||||
= help: unicode escape must be at most 10FFFF
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user