introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a string, and a second time when we unescape literals. This PR aims to remove this duplication, by introducing a new `unescape` mode as a single source of truth for character escaping rules
2019-04-25 11:48:25 +03:00 · 2019-04-25 11:48:25 +03:00 · bfa5f27847
commit bfa5f27847
parent 9b67bd42b7
24 changed files with 1046 additions and 768 deletions
--- a/src/librustc_errors/diagnostic_builder.rs
+++ b/src/librustc_errors/diagnostic_builder.rs
@ -184,7 +184,7 @@ impl<'a> DiagnosticBuilder<'a> {
                                                  ) -> &mut Self);
    forward!(pub fn warn(&mut self, msg: &str) -> &mut Self);
    forward!(pub fn span_warn<S: Into<MultiSpan>>(&mut self, sp: S, msg: &str) -> &mut Self);
-    forward!(pub fn help(&mut self , msg: &str) -> &mut Self);
+    forward!(pub fn help(&mut self, msg: &str) -> &mut Self);
    forward!(pub fn span_help<S: Into<MultiSpan>>(&mut self,
                                                  sp: S,
                                                  msg: &str,
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@ -1,8 +1,10 @@
 use crate::ast::{self, Ident};
 use crate::parse::{token, ParseSess};
 use crate::symbol::Symbol;
+use crate::parse::unescape;
+use crate::parse::unescape_error_reporting::{emit_unescape_error, push_escaped_char};

-use errors::{Applicability, FatalError, Diagnostic, DiagnosticBuilder};
+use errors::{FatalError, Diagnostic, DiagnosticBuilder};
 use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION};
 use core::unicode::property::Pattern_White_Space;

@ -334,25 +336,12 @@ impl<'a> StringReader<'a> {
        self.err_span(self.mk_sp(from_pos, to_pos), m)
    }

-    /// Pushes a character to a message string for error reporting
-    fn push_escaped_char_for_msg(m: &mut String, c: char) {
-        match c {
-            '\u{20}'..='\u{7e}' => {
-                // Don't escape \, ' or " for user-facing messages
-                m.push(c);
-            }
-            _ => {
-                m.extend(c.escape_default());
-            }
-        }
-    }
-
    /// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
    /// escaped character to the error message
    fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> FatalError {
        let mut m = m.to_string();
        m.push_str(": ");
-        Self::push_escaped_char_for_msg(&mut m, c);
+        push_escaped_char(&mut m, c);

        self.fatal_span_(from_pos, to_pos, &m[..])
    }
@ -368,7 +357,7 @@ impl<'a> StringReader<'a> {
    {
        let mut m = m.to_string();
        m.push_str(": ");
-        Self::push_escaped_char_for_msg(&mut m, c);
+        push_escaped_char(&mut m, c);

        self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..])
    }
@ -378,29 +367,10 @@ impl<'a> StringReader<'a> {
    fn err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) {
        let mut m = m.to_string();
        m.push_str(": ");
-        Self::push_escaped_char_for_msg(&mut m, c);
+        push_escaped_char(&mut m, c);
        self.err_span_(from_pos, to_pos, &m[..]);
    }

-    fn struct_err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char)
-        -> DiagnosticBuilder<'a>
-    {
-        let mut m = m.to_string();
-        m.push_str(": ");
-        Self::push_escaped_char_for_msg(&mut m, c);
-
-        self.sess.span_diagnostic.struct_span_err(self.mk_sp(from_pos, to_pos), &m[..])
-    }
-
-    /// Report a lexical error spanning [`from_pos`, `to_pos`), appending the
-    /// offending string to the error message
-    fn fatal_span_verbose(&self, from_pos: BytePos, to_pos: BytePos, mut m: String) -> FatalError {
-        m.push_str(": ");
-        m.push_str(&self.src[self.src_index(from_pos)..self.src_index(to_pos)]);
-
-        self.fatal_span_(from_pos, to_pos, &m[..])
-    }
-
    /// Advance peek_tok and peek_span to refer to the next token, and
    /// possibly update the interner.
    fn advance_token(&mut self) -> Result<(), ()> {
@ -863,271 +833,6 @@ impl<'a> StringReader<'a> {
        }
    }

-    /// Scan over `n_digits` hex digits, stopping at `delim`, reporting an
-    /// error if too many or too few digits are encountered.
-    fn scan_hex_digits(&mut self, n_digits: usize, delim: char, below_0x7f_only: bool) -> bool {
-        debug!("scanning {} digits until {:?}", n_digits, delim);
-        let start_bpos = self.pos;
-        let mut accum_int = 0;
-
-        let mut valid = true;
-        for _ in 0..n_digits {
-            if self.is_eof() {
-                let last_bpos = self.pos;
-                self.fatal_span_(start_bpos,
-                                 last_bpos,
-                                 "unterminated numeric character escape").raise();
-            }
-            if self.ch_is(delim) {
-                let last_bpos = self.pos;
-                self.err_span_(start_bpos,
-                               last_bpos,
-                               "numeric character escape is too short");
-                valid = false;
-                break;
-            }
-            let c = self.ch.unwrap_or('\x00');
-            accum_int *= 16;
-            accum_int += c.to_digit(16).unwrap_or_else(|| {
-                self.err_span_char(self.pos,
-                                   self.next_pos,
-                                   "invalid character in numeric character escape",
-                                   c);
-
-                valid = false;
-                0
-            });
-            self.bump();
-        }
-
-        if below_0x7f_only && accum_int >= 0x80 {
-            self.err_span_(start_bpos,
-                           self.pos,
-                           "this form of character escape may only be used with characters in \
-                            the range [\\x00-\\x7f]");
-            valid = false;
-        }
-
-        match char::from_u32(accum_int) {
-            Some(_) => valid,
-            None => {
-                let last_bpos = self.pos;
-                self.err_span_(start_bpos, last_bpos, "invalid numeric character escape");
-                false
-            }
-        }
-    }
-
-    /// Scan for a single (possibly escaped) byte or char
-    /// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
-    /// `start` is the position of `first_source_char`, which is already consumed.
-    ///
-    /// Returns `true` if there was a valid char/byte.
-    fn scan_char_or_byte(&mut self,
-                         start: BytePos,
-                         first_source_char: char,
-                         ascii_only: bool,
-                         delim: char)
-                         -> bool
-    {
-        match first_source_char {
-            '\\' => {
-                // '\X' for some X must be a character constant:
-                let escaped = self.ch;
-                let escaped_pos = self.pos;
-                self.bump();
-                match escaped {
-                    None => {}  // EOF here is an error that will be checked later.
-                    Some(e) => {
-                        return match e {
-                            'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true,
-                            'x' => self.scan_byte_escape(delim, !ascii_only),
-                            'u' => {
-                                let valid = if self.ch_is('{') {
-                                    self.scan_unicode_escape(delim) && !ascii_only
-                                } else {
-                                    let span = self.mk_sp(start, self.pos);
-                                    let mut suggestion = "\\u{".to_owned();
-                                    let msg = "incorrect unicode escape sequence";
-                                    let mut err = self.sess.span_diagnostic.struct_span_err(
-                                        span,
-                                        msg,
-                                    );
-                                    let mut i = 0;
-                                    while let (Some(ch), true) = (self.ch, i < 6) {
-                                        if ch.is_digit(16) {
-                                            suggestion.push(ch);
-                                            self.bump();
-                                            i += 1;
-                                        } else {
-                                            break;
-                                        }
-                                    }
-                                    if i != 0 {
-                                        suggestion.push('}');
-                                        err.span_suggestion(
-                                            self.mk_sp(start, self.pos),
-                                            "format of unicode escape sequences uses braces",
-                                            suggestion,
-                                            Applicability::MaybeIncorrect,
-                                        );
-                                    } else {
-                                        err.span_label(span, msg);
-                                        err.help(
-                                            "format of unicode escape sequences is `\\u{...}`",
-                                        );
-                                    }
-                                    err.emit();
-                                    false
-                                };
-                                if ascii_only {
-                                    self.err_span_(start,
-                                                   self.pos,
-                                                   "unicode escape sequences cannot be used as a \
-                                                    byte or in a byte string");
-                                }
-                                valid
-
-                            }
-                            '\n' if delim == '"' => {
-                                self.consume_whitespace();
-                                true
-                            }
-                            '\r' if delim == '"' && self.ch_is('\n') => {
-                                self.consume_whitespace();
-                                true
-                            }
-                            c => {
-                                let pos = self.pos;
-                                let msg = if ascii_only {
-                                    "unknown byte escape"
-                                } else {
-                                    "unknown character escape"
-                                };
-                                let mut err = self.struct_err_span_char(escaped_pos, pos, msg, c);
-                                err.span_label(self.mk_sp(escaped_pos, pos), msg);
-                                if e == '\r' {
-                                    err.help(
-                                        "this is an isolated carriage return; consider checking \
-                                         your editor and version control settings",
-                                    );
-                                }
-                                if (e == '{' || e == '}') && !ascii_only {
-                                    err.help(
-                                        "if used in a formatting string, curly braces are escaped \
-                                         with `{{` and `}}`",
-                                    );
-                                }
-                                err.emit();
-                                false
-                            }
-                        }
-                    }
-                }
-            }
-            '\t' | '\n' | '\r' | '\'' if delim == '\'' => {
-                let pos = self.pos;
-                self.err_span_char(start,
-                                   pos,
-                                   if ascii_only {
-                                       "byte constant must be escaped"
-                                   } else {
-                                       "character constant must be escaped"
-                                   },
-                                   first_source_char);
-                return false;
-            }
-            '\r' => {
-                if self.ch_is('\n') {
-                    self.bump();
-                    return true;
-                } else {
-                    self.err_span_(start,
-                                   self.pos,
-                                   "bare CR not allowed in string, use \\r instead");
-                    return false;
-                }
-            }
-            _ => {
-                if ascii_only && first_source_char > '\x7F' {
-                    let pos = self.pos;
-                    self.err_span_(start,
-                                   pos,
-                                   "byte constant must be ASCII. Use a \\xHH escape for a \
-                                    non-ASCII byte");
-                    return false;
-                }
-            }
-        }
-        true
-    }
-
-    /// Scan over a `\u{...}` escape
-    ///
-    /// At this point, we have already seen the `\` and the `u`, the `{` is the current character.
-    /// We will read a hex number (with `_` separators), with 1 to 6 actual digits,
-    /// and pass over the `}`.
-    fn scan_unicode_escape(&mut self, delim: char) -> bool {
-        self.bump(); // past the {
-        let start_bpos = self.pos;
-        let mut valid = true;
-
-        if let Some('_') = self.ch {
-            // disallow leading `_`
-            self.err_span_(self.pos,
-                           self.next_pos,
-                           "invalid start of unicode escape");
-            valid = false;
-        }
-
-        let count = self.scan_digits(16, 16);
-
-        if count > 6 {
-            self.err_span_(start_bpos,
-                           self.pos,
-                           "overlong unicode escape (must have at most 6 hex digits)");
-            valid = false;
-        }
-
-        loop {
-            match self.ch {
-                Some('}') => {
-                    if valid && count == 0 {
-                        self.err_span_(start_bpos,
-                                       self.pos,
-                                       "empty unicode escape (must have at least 1 hex digit)");
-                        valid = false;
-                    }
-                    self.bump(); // past the ending `}`
-                    break;
-                },
-                Some(c) => {
-                    if c == delim {
-                        self.err_span_(self.pos,
-                                       self.pos,
-                                       "unterminated unicode escape (needed a `}`)");
-                        valid = false;
-                        break;
-                    } else if valid {
-                        self.err_span_char(start_bpos,
-                                           self.pos,
-                                           "invalid character in unicode escape",
-                                           c);
-                        valid = false;
-                    }
-                },
-                None => {
-                    self.fatal_span_(start_bpos,
-                                     self.pos,
-                                     "unterminated unicode escape (found EOF)").raise();
-                }
-            }
-            self.bump();
-        }
-
-        valid
-    }
-
    /// Scan over a float exponent.
    fn scan_float_exponent(&mut self) {
        if self.ch_is('e') || self.ch_is('E') {
@ -1393,26 +1098,21 @@ impl<'a> StringReader<'a> {
                self.bump();
                let start = self.pos;

-                // the eof will be picked up by the final `'` check below
-                let c2 = self.ch.unwrap_or('\x00');
-                self.bump();
-
                // If the character is an ident start not followed by another single
                // quote, then this is a lifetime name:
-                if (ident_start(Some(c2)) || c2.is_numeric()) && !self.ch_is('\'') {
+                let starts_with_number = self.ch.unwrap_or('\x00').is_numeric();
+                if (ident_start(self.ch) || starts_with_number) && !self.nextch_is('\'') {
+                    self.bump();
                    while ident_continue(self.ch) {
                        self.bump();
                    }
                    // lifetimes shouldn't end with a single quote
                    // if we find one, then this is an invalid character literal
                    if self.ch_is('\'') {
-                        self.err_span_(
-                            start_with_quote,
-                            self.next_pos,
-                            "character literal may only contain one codepoint");
+                        let id = self.name_from(start);
                        self.bump();
-                        return Ok(token::Literal(token::Err(Symbol::intern("??")), None))
-
+                        self.validate_char_escape(start_with_quote);
+                        return Ok(token::Literal(token::Char(id), None))
                    }

                    // Include the leading `'` in the real identifier, for macro
@ -1422,7 +1122,7 @@ impl<'a> StringReader<'a> {
                        self.mk_ident(lifetime_name)
                    });

-                    if c2.is_numeric() {
+                    if starts_with_number {
                        // this is a recovered lifetime written `'1`, error but accept it
                        self.err_span_(
                            start_with_quote,
@ -1433,58 +1133,30 @@ impl<'a> StringReader<'a> {

                    return Ok(token::Lifetime(ident));
                }
-
-                let valid = self.scan_char_or_byte(start, c2, /* ascii_only */ false, '\'');
-
-                if !self.ch_is('\'') {
-                    let pos = self.pos;
-
-                    loop {
-                        self.bump();
-                        if self.ch_is('\'') {
-                            let start = self.src_index(start);
-                            let end = self.src_index(self.pos);
-                            self.bump();
-                            let span = self.mk_sp(start_with_quote, self.pos);
-                            self.sess.span_diagnostic
-                                .struct_span_err(span,
-                                                 "character literal may only contain one codepoint")
-                                .span_suggestion(
-                                    span,
-                                    "if you meant to write a `str` literal, use double quotes",
-                                    format!("\"{}\"", &self.src[start..end]),
-                                    Applicability::MachineApplicable
-                                ).emit();
-                            return Ok(token::Literal(token::Err(Symbol::intern("??")), None))
-                        }
-                        if self.ch_is('\n') || self.is_eof() || self.ch_is('/') {
-                            // Only attempt to infer single line string literals. If we encounter
-                            // a slash, bail out in order to avoid nonsensical suggestion when
-                            // involving comments.
-                            break;
-                        }
-                    }
-
-                    self.fatal_span_verbose(start_with_quote, pos,
-                        String::from("character literal may only contain one codepoint")).raise();
-                }
-
-                let id = if valid {
-                    self.name_from(start)
-                } else {
-                    Symbol::intern("0")
-                };
-
-                self.bump(); // advance ch past token
+                let msg = "unterminated character literal";
+                let id = self.scan_single_quoted_string(start_with_quote, msg);
+                self.validate_char_escape(start_with_quote);
                let suffix = self.scan_optional_raw_name();
-
                Ok(token::Literal(token::Char(id), suffix))
            }
            'b' => {
                self.bump();
                let lit = match self.ch {
-                    Some('\'') => self.scan_byte(),
-                    Some('"') => self.scan_byte_string(),
+                    Some('\'') => {
+                        let start_with_quote = self.pos;
+                        self.bump();
+                        let msg = "unterminated byte constant";
+                        let id = self.scan_single_quoted_string(start_with_quote, msg);
+                        self.validate_byte_escape(start_with_quote);
+                        token::Byte(id)
+                    },
+                    Some('"') => {
+                        let start_with_quote = self.pos;
+                        let msg = "unterminated double quote byte string";
+                        let id = self.scan_double_quoted_string(msg);
+                        self.validate_byte_str_escape(start_with_quote);
+                        token::ByteStr(id)
+                    },
                    Some('r') => self.scan_raw_byte_string(),
                    _ => unreachable!(),  // Should have been a token::Ident above.
                };
@ -1493,32 +1165,11 @@ impl<'a> StringReader<'a> {
                Ok(token::Literal(lit, suffix))
            }
            '"' => {
-                let start_bpos = self.pos;
-                let mut valid = true;
-                self.bump();
-
-                while !self.ch_is('"') {
-                    if self.is_eof() {
-                        let last_bpos = self.pos;
-                        self.fatal_span_(start_bpos,
-                                         last_bpos,
-                                         "unterminated double quote string").raise();
-                    }
-
-                    let ch_start = self.pos;
-                    let ch = self.ch.unwrap();
-                    self.bump();
-                    valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only */ false, '"');
-                }
-                // adjust for the ASCII " at the start of the literal
-                let id = if valid {
-                    self.name_from(start_bpos + BytePos(1))
-                } else {
-                    Symbol::intern("??")
-                };
-                self.bump();
+                let start_with_quote = self.pos;
+                let msg = "unterminated double quote string";
+                let id = self.scan_double_quoted_string(msg);
+                self.validate_str_escape(start_with_quote);
                let suffix = self.scan_optional_raw_name();
-
                Ok(token::Literal(token::Str_(id), suffix))
            }
            'r' => {
@ -1659,12 +1310,6 @@ impl<'a> StringReader<'a> {
        }
    }

-    fn consume_whitespace(&mut self) {
-        while is_pattern_whitespace(self.ch) && !self.is_eof() {
-            self.bump();
-        }
-    }
-
    fn read_to_eol(&mut self) -> String {
        let mut val = String::new();
        while !self.ch_is('\n') && !self.is_eof() {
@ -1698,73 +1343,63 @@ impl<'a> StringReader<'a> {
        (self.ch_is('#') && self.nextch_is('!') && !self.nextnextch_is('['))
    }

-    fn scan_byte(&mut self) -> token::Lit {
-        self.bump();
+    fn scan_single_quoted_string(&mut self,
+                                 start_with_quote: BytePos,
+                                 unterminated_msg: &str) -> ast::Name {
+        // assumes that first `'` is consumed
        let start = self.pos;
+        // lex `'''` as a single char, for recovery
+        if self.ch_is('\'') && self.nextch_is('\'') {
+            self.bump();
+        } else {
+            let mut first = true;
+            loop {
+                if self.ch_is('\'') {
+                    break;
+                }
+                if self.ch_is('\\') && (self.nextch_is('\'') || self.nextch_is('\\')) {
+                    self.bump();
+                    self.bump();
+                } else {
+                    // Only attempt to infer single line string literals. If we encounter
+                    // a slash, bail out in order to avoid nonsensical suggestion when
+                    // involving comments.
+                    if self.is_eof()
+                        || (self.ch_is('/') && !first)
+                        || (self.ch_is('\n') && !self.nextch_is('\'')) {

-        // the eof will be picked up by the final `'` check below
-        let c2 = self.ch.unwrap_or('\x00');
-        self.bump();
-
-        let valid = self.scan_char_or_byte(start,
-                                           c2,
-                                           // ascii_only =
-                                           true,
-                                           '\'');
-        if !self.ch_is('\'') {
-            // Byte offsetting here is okay because the
-            // character before position `start` are an
-            // ascii single quote and ascii 'b'.
-            let pos = self.pos;
-            self.fatal_span_verbose(start - BytePos(2),
-                                    pos,
-                                    "unterminated byte constant".to_string()).raise();
+                        self.fatal_span_(start_with_quote, self.pos, unterminated_msg.into())
+                            .raise()
+                    }
+                    self.bump();
+                }
+                first = false;
+            }
        }

-        let id = if valid {
-            self.name_from(start)
-        } else {
-            Symbol::intern("?")
-        };
-        self.bump(); // advance ch past token
-
-        token::Byte(id)
+        let id = self.name_from(start);
+        self.bump();
+        id
    }

-    #[inline]
-    fn scan_byte_escape(&mut self, delim: char, below_0x7f_only: bool) -> bool {
-        self.scan_hex_digits(2, delim, below_0x7f_only)
-    }
-
-    fn scan_byte_string(&mut self) -> token::Lit {
+    fn scan_double_quoted_string(&mut self, unterminated_msg: &str) -> ast::Name {
+        debug_assert!(self.ch_is('\"'));
+        let start_with_quote = self.pos;
        self.bump();
        let start = self.pos;
-        let mut valid = true;
-
        while !self.ch_is('"') {
            if self.is_eof() {
                let pos = self.pos;
-                self.fatal_span_(start, pos, "unterminated double quote byte string").raise();
+                self.fatal_span_(start_with_quote, pos, unterminated_msg).raise();
+            }
+            if self.ch_is('\\') && (self.nextch_is('\\') || self.nextch_is('"')) {
+                self.bump();
            }
-
-            let ch_start = self.pos;
-            let ch = self.ch.unwrap();
            self.bump();
-            valid &= self.scan_char_or_byte(ch_start,
-                                            ch,
-                                            // ascii_only =
-                                            true,
-                                            '"');
        }
-
-        let id = if valid {
-            self.name_from(start)
-        } else {
-            Symbol::intern("??")
-        };
+        let id = self.name_from(start);
        self.bump();
-
-        token::ByteStr(id)
+        id
    }

    fn scan_raw_byte_string(&mut self) -> token::Lit {
@ -1826,6 +1461,70 @@ impl<'a> StringReader<'a> {

        token::ByteStrRaw(self.name_from_to(content_start_bpos, content_end_bpos), hash_count)
    }
+
+    fn validate_char_escape(&self, start_with_quote: BytePos) {
+        self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
+            if let Err((off, err)) = unescape::unescape_char(lit) {
+                emit_unescape_error(
+                    &self.sess.span_diagnostic,
+                    lit,
+                    self.mk_sp(start_with_quote, self.pos),
+                    unescape::Mode::Char,
+                    0..off,
+                    err,
+                )
+            }
+        });
+    }
+
+    fn validate_byte_escape(&self, start_with_quote: BytePos) {
+        self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
+            if let Err((off, err)) = unescape::unescape_byte(lit) {
+                emit_unescape_error(
+                    &self.sess.span_diagnostic,
+                    lit,
+                    self.mk_sp(start_with_quote, self.pos),
+                    unescape::Mode::Byte,
+                    0..off,
+                    err,
+                )
+            }
+        });
+    }
+
+    fn validate_str_escape(&self, start_with_quote: BytePos) {
+        self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
+            unescape::unescape_str(lit, &mut |range, c| {
+                if let Err(err) = c {
+                    emit_unescape_error(
+                        &self.sess.span_diagnostic,
+                        lit,
+                        self.mk_sp(start_with_quote, self.pos),
+                        unescape::Mode::Str,
+                        range,
+                        err,
+                    )
+                }
+            })
+        });
+    }
+
+    fn validate_byte_str_escape(&self, start_with_quote: BytePos) {
+        self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
+            unescape::unescape_byte_str(lit, &mut |range, c| {
+                if let Err(err) = c {
+                    emit_unescape_error(
+                        &self.sess.span_diagnostic,
+                        lit,
+                        self.mk_sp(start_with_quote, self.pos),
+                        unescape::Mode::ByteStr,
+                        range,
+                        err,
+                    )
+                }
+            })
+        });
+    }
 }

 // This tests the character for the unicode property 'PATTERN_WHITE_SPACE' which
--- a/src/libsyntax/parse/mod.rs
+++ b/src/libsyntax/parse/mod.rs
@ -18,7 +18,6 @@ use log::debug;

 use rustc_data_structures::fx::FxHashSet;
 use std::borrow::Cow;
-use std::iter;
 use std::path::{Path, PathBuf};
 use std::str;

@ -33,6 +32,11 @@ pub mod attr;

 pub mod classify;

+pub(crate) mod unescape;
+use unescape::{unescape_str, unescape_char, unescape_byte_str, unescape_byte, EscapeError};
+
+pub(crate) mod unescape_error_reporting;
+
 /// Info about a parsing session.
 pub struct ParseSess {
    pub span_diagnostic: Handler,
@ -306,133 +310,6 @@ pub fn stream_to_parser(sess: &ParseSess, stream: TokenStream) -> Parser<'_> {
    Parser::new(sess, stream, None, true, false)
 }

-/// Parses a string representing a character literal into its final form.
-/// Rather than just accepting/rejecting a given literal, unescapes it as
-/// well. Can take any slice prefixed by a character escape. Returns the
-/// character and the number of characters consumed.
-fn char_lit(lit: &str, diag: Option<(Span, &Handler)>) -> (char, isize) {
-    use std::char;
-
-    // Handle non-escaped chars first.
-    if lit.as_bytes()[0] != b'\\' {
-        // If the first byte isn't '\\' it might part of a multi-byte char, so
-        // get the char with chars().
-        let c = lit.chars().next().unwrap();
-        return (c, 1);
-    }
-
-    // Handle escaped chars.
-    match lit.as_bytes()[1] as char {
-        '"' => ('"', 2),
-        'n' => ('\n', 2),
-        'r' => ('\r', 2),
-        't' => ('\t', 2),
-        '\\' => ('\\', 2),
-        '\'' => ('\'', 2),
-        '0' => ('\0', 2),
-        'x' => {
-            let v = u32::from_str_radix(&lit[2..4], 16).unwrap();
-            let c = char::from_u32(v).unwrap();
-            (c, 4)
-        }
-        'u' => {
-            assert_eq!(lit.as_bytes()[2], b'{');
-            let idx = lit.find('}').unwrap();
-
-            // All digits and '_' are ascii, so treat each byte as a char.
-            let mut v: u32 = 0;
-            for c in lit[3..idx].bytes() {
-                let c = char::from(c);
-                if c != '_' {
-                    let x = c.to_digit(16).unwrap();
-                    v = v.checked_mul(16).unwrap().checked_add(x).unwrap();
-                }
-            }
-            let c = char::from_u32(v).unwrap_or_else(|| {
-                if let Some((span, diag)) = diag {
-                    let mut diag = diag.struct_span_err(span, "invalid unicode character escape");
-                    if v > 0x10FFFF {
-                        diag.help("unicode escape must be at most 10FFFF").emit();
-                    } else {
-                        diag.help("unicode escape must not be a surrogate").emit();
-                    }
-                }
-                '\u{FFFD}'
-            });
-            (c, (idx + 1) as isize)
-        }
-        _ => panic!("lexer should have rejected a bad character escape {}", lit)
-    }
-}
-
-/// Parses a string representing a string literal into its final form. Does unescaping.
-fn str_lit(lit: &str, diag: Option<(Span, &Handler)>) -> String {
-    debug!("str_lit: given {}", lit.escape_default());
-    let mut res = String::with_capacity(lit.len());
-
-    let error = |i| format!("lexer should have rejected {} at {}", lit, i);
-
-    /// Eat everything up to a non-whitespace.
-    fn eat<'a>(it: &mut iter::Peekable<str::CharIndices<'a>>) {
-        loop {
-            match it.peek().map(|x| x.1) {
-                Some(' ') | Some('\n') | Some('\r') | Some('\t') => {
-                    it.next();
-                },
-                _ => { break; }
-            }
-        }
-    }
-
-    let mut chars = lit.char_indices().peekable();
-    while let Some((i, c)) = chars.next() {
-        match c {
-            '\\' => {
-                let ch = chars.peek().unwrap_or_else(|| {
-                    panic!("{}", error(i))
-                }).1;
-
-                if ch == '\n' {
-                    eat(&mut chars);
-                } else if ch == '\r' {
-                    chars.next();
-                    let ch = chars.peek().unwrap_or_else(|| {
-                        panic!("{}", error(i))
-                    }).1;
-
-                    if ch != '\n' {
-                        panic!("lexer accepted bare CR");
-                    }
-                    eat(&mut chars);
-                } else {
-                    // otherwise, a normal escape
-                    let (c, n) = char_lit(&lit[i..], diag);
-                    for _ in 0..n - 1 { // we don't need to move past the first \
-                        chars.next();
-                    }
-                    res.push(c);
-                }
-            },
-            '\r' => {
-                let ch = chars.peek().unwrap_or_else(|| {
-                    panic!("{}", error(i))
-                }).1;
-
-                if ch != '\n' {
-                    panic!("lexer accepted bare CR");
-                }
-                chars.next();
-                res.push('\n');
-            }
-            c => res.push(c),
-        }
-    }
-
-    res.shrink_to_fit(); // probably not going to do anything, unless there was an escape.
-    debug!("parse_str_lit: returning {}", res);
-    res
-}
-
 /// Parses a string representing a raw string literal into its final form. The
 /// only operation this does is convert embedded CRLF into a single LF.
 fn raw_str_lit(lit: &str) -> String {
@ -475,9 +352,23 @@ crate fn lit_token(lit: token::Lit, suf: Option<Symbol>, diag: Option<(Span, &Ha
    use ast::LitKind;

    match lit {
-       token::Byte(i) => (true, Some(LitKind::Byte(byte_lit(&i.as_str()).0))),
-       token::Char(i) => (true, Some(LitKind::Char(char_lit(&i.as_str(), diag).0))),
-       token::Err(i) => (true, Some(LitKind::Err(i))),
+        token::Byte(i) => {
+            let lit_kind = match unescape_byte(&i.as_str()) {
+                Ok(c) => LitKind::Byte(c),
+                Err((_, EscapeError::MoreThanOneChar)) => LitKind::Err(i),
+                Err(_) => LitKind::Byte(0),
+            };
+            (true, Some(lit_kind))
+        },
+        token::Char(i) => {
+            let lit_kind = match unescape_char(&i.as_str()) {
+                Ok(c) => LitKind::Char(c),
+                Err((_, EscapeError::MoreThanOneChar)) => LitKind::Err(i),
+                Err(_) => LitKind::Char('\u{FFFD}'),
+            };
+            (true, Some(lit_kind))
+        },
+        token::Err(i) => (true, Some(LitKind::Err(i))),

        // There are some valid suffixes for integer and float literals,
        // so all the handling is done internally.
@ -491,7 +382,14 @@ crate fn lit_token(lit: token::Lit, suf: Option<Symbol>, diag: Option<(Span, &Ha
            // string in the Token.
            let s = &sym.as_str();
            if s.as_bytes().iter().any(|&c| c == b'\\' || c == b'\r') {
-                sym = Symbol::intern(&str_lit(s, diag));
+                let mut buf = String::with_capacity(s.len());
+                unescape_str(s, &mut |_, unescaped_char| {
+                    match unescaped_char {
+                        Ok(c) => buf.push(c),
+                        Err(_) => buf.push('\u{FFFD}'),
+                    }
+                });
+                sym = Symbol::intern(&buf)
            }
            (true, Some(LitKind::Str(sym, ast::StrStyle::Cooked)))
        }
@ -504,7 +402,16 @@ crate fn lit_token(lit: token::Lit, suf: Option<Symbol>, diag: Option<(Span, &Ha
            (true, Some(LitKind::Str(sym, ast::StrStyle::Raw(n))))
        }
        token::ByteStr(i) => {
-            (true, Some(LitKind::ByteStr(byte_str_lit(&i.as_str()))))
+            let s = &i.as_str();
+            let mut buf = Vec::with_capacity(s.len());
+            unescape_byte_str(s, &mut |_, unescaped_byte| {
+                match unescaped_byte {
+                    Ok(c) => buf.push(c),
+                    Err(_) => buf.push(0),
+                }
+            });
+            buf.shrink_to_fit();
+            (true, Some(LitKind::ByteStr(Lrc::new(buf))))
        }
        token::ByteStrRaw(i, _) => {
            (true, Some(LitKind::ByteStr(Lrc::new(i.to_string().into_bytes()))))
@ -559,95 +466,6 @@ fn float_lit(s: &str, suffix: Option<Symbol>, diag: Option<(Span, &Handler)>)
    filtered_float_lit(Symbol::intern(s), suffix, diag)
 }

-/// Parses a string representing a byte literal into its final form. Similar to `char_lit`.
-fn byte_lit(lit: &str) -> (u8, usize) {
-    let err = |i| format!("lexer accepted invalid byte literal {} step {}", lit, i);
-
-    if lit.len() == 1 {
-        (lit.as_bytes()[0], 1)
-    } else {
-        assert_eq!(lit.as_bytes()[0], b'\\', "{}", err(0));
-        let b = match lit.as_bytes()[1] {
-            b'"' => b'"',
-            b'n' => b'\n',
-            b'r' => b'\r',
-            b't' => b'\t',
-            b'\\' => b'\\',
-            b'\'' => b'\'',
-            b'0' => b'\0',
-            _ => {
-                match u64::from_str_radix(&lit[2..4], 16).ok() {
-                    Some(c) =>
-                        if c > 0xFF {
-                            panic!(err(2))
-                        } else {
-                            return (c as u8, 4)
-                        },
-                    None => panic!(err(3))
-                }
-            }
-        };
-        (b, 2)
-    }
-}
-
-fn byte_str_lit(lit: &str) -> Lrc<Vec<u8>> {
-    let mut res = Vec::with_capacity(lit.len());
-
-    let error = |i| panic!("lexer should have rejected {} at {}", lit, i);
-
-    /// Eat everything up to a non-whitespace.
-    fn eat<I: Iterator<Item=(usize, u8)>>(it: &mut iter::Peekable<I>) {
-        loop {
-            match it.peek().map(|x| x.1) {
-                Some(b' ') | Some(b'\n') | Some(b'\r') | Some(b'\t') => {
-                    it.next();
-                },
-                _ => { break; }
-            }
-        }
-    }
-
-    // byte string literals *must* be ASCII, but the escapes don't have to be
-    let mut chars = lit.bytes().enumerate().peekable();
-    loop {
-        match chars.next() {
-            Some((i, b'\\')) => {
-                match chars.peek().unwrap_or_else(|| error(i)).1 {
-                    b'\n' => eat(&mut chars),
-                    b'\r' => {
-                        chars.next();
-                        if chars.peek().unwrap_or_else(|| error(i)).1 != b'\n' {
-                            panic!("lexer accepted bare CR");
-                        }
-                        eat(&mut chars);
-                    }
-                    _ => {
-                        // otherwise, a normal escape
-                        let (c, n) = byte_lit(&lit[i..]);
-                        // we don't need to move past the first \
-                        for _ in 0..n - 1 {
-                            chars.next();
-                        }
-                        res.push(c);
-                    }
-                }
-            },
-            Some((i, b'\r')) => {
-                if chars.peek().unwrap_or_else(|| error(i)).1 != b'\n' {
-                    panic!("lexer accepted bare CR");
-                }
-                chars.next();
-                res.push(b'\n');
-            }
-            Some((_, c)) => res.push(c),
-            None => break,
-        }
-    }
-
-    Lrc::new(res)
-}
-
 fn integer_lit(s: &str, suffix: Option<Symbol>, diag: Option<(Span, &Handler)>)
                   -> Option<ast::LitKind> {
    // s can only be ascii, byte indexing is fine
--- a/src/libsyntax/parse/unescape.rs
+++ b/src/libsyntax/parse/unescape.rs
@ -0,0 +1,515 @@
+//! Utilities for validating  string and char literals and turning them into
+//! values they represent.
+
+use std::str::Chars;
+use std::ops::Range;
+
+#[derive(Debug, PartialEq, Eq)]
+pub(crate) enum EscapeError {
+    ZeroChars,
+    MoreThanOneChar,
+
+    LoneSlash,
+    InvalidEscape,
+    BareCarriageReturn,
+    EscapeOnlyChar,
+
+    TooShortHexEscape,
+    InvalidCharInHexEscape,
+    OutOfRangeHexEscape,
+
+    NoBraceInUnicodeEscape,
+    InvalidCharInUnicodeEscape,
+    EmptyUnicodeEscape,
+    UnclosedUnicodeEscape,
+    LeadingUnderscoreUnicodeEscape,
+    OverlongUnicodeEscape,
+    LoneSurrogateUnicodeEscape,
+    OutOfRangeUnicodeEscape,
+
+    UnicodeEscapeInByte,
+    NonAsciiCharInByte,
+}
+
+/// Takes a contents of a char literal (without quotes), and returns an
+/// unescaped char or an error
+pub(crate) fn unescape_char(literal_text: &str) -> Result<char, (usize, EscapeError)> {
+    let mut chars = literal_text.chars();
+    unescape_char_or_byte(&mut chars, Mode::Char)
+        .map_err(|err| (literal_text.len() - chars.as_str().len(), err))
+}
+
+/// Takes a contents of a string literal (without quotes) and produces a
+/// sequence of escaped characters or errors.
+pub(crate) fn unescape_str<F>(literal_text: &str, callback: &mut F)
+where
+    F: FnMut(Range<usize>, Result<char, EscapeError>),
+{
+    unescape_str_or_byte_str(literal_text, Mode::Str, callback)
+}
+
+pub(crate) fn unescape_byte(literal_text: &str) -> Result<u8, (usize, EscapeError)> {
+    let mut chars = literal_text.chars();
+    unescape_char_or_byte(&mut chars, Mode::Byte)
+        .map(byte_from_char)
+        .map_err(|err| (literal_text.len() - chars.as_str().len(), err))
+}
+
+/// Takes a contents of a string literal (without quotes) and produces a
+/// sequence of escaped characters or errors.
+pub(crate) fn unescape_byte_str<F>(literal_text: &str, callback: &mut F)
+where
+    F: FnMut(Range<usize>, Result<u8, EscapeError>),
+{
+    unescape_str_or_byte_str(literal_text, Mode::ByteStr, &mut |range, char| {
+        callback(range, char.map(byte_from_char))
+    })
+}
+
+#[derive(Debug, Clone, Copy)]
+pub(crate) enum Mode {
+    Char,
+    Str,
+    Byte,
+    ByteStr,
+}
+
+impl Mode {
+    fn in_single_quotes(self) -> bool {
+        match self {
+            Mode::Char | Mode::Byte => true,
+            Mode::Str | Mode::ByteStr => false,
+        }
+    }
+
+    pub(crate) fn in_double_quotes(self) -> bool {
+        !self.in_single_quotes()
+    }
+
+    pub(crate) fn is_bytes(self) -> bool {
+        match self {
+            Mode::Byte | Mode::ByteStr => true,
+            Mode::Char | Mode::Str => false,
+        }
+    }
+}
+
+
+fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
+    if first_char != '\\' {
+        return match first_char {
+            '\t' | '\n' => Err(EscapeError::EscapeOnlyChar),
+            '\r' => Err(if chars.clone().next() == Some('\n') {
+                EscapeError::EscapeOnlyChar
+            } else {
+                EscapeError::BareCarriageReturn
+            }),
+            '\'' if mode.in_single_quotes() => Err(EscapeError::EscapeOnlyChar),
+            '"' if mode.in_double_quotes() => Err(EscapeError::EscapeOnlyChar),
+            _ => {
+                if mode.is_bytes() && !first_char.is_ascii() {
+                    return Err(EscapeError::NonAsciiCharInByte);
+                }
+                Ok(first_char)
+            }
+        };
+    }
+
+    let second_char = chars.next().ok_or(EscapeError::LoneSlash)?;
+
+    let res = match second_char {
+        '"' => '"',
+        'n' => '\n',
+        'r' => '\r',
+        't' => '\t',
+        '\\' => '\\',
+        '\'' => '\'',
+        '0' => '\0',
+
+        'x' => {
+            let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
+            let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
+
+            let lo = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
+            let lo = lo.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
+
+            let value = hi * 16 + lo;
+
+            if !mode.is_bytes() && !is_ascii(value) {
+                return Err(EscapeError::OutOfRangeHexEscape);
+            }
+            let value = value as u8;
+
+            value as char
+        }
+
+        'u' => {
+            if chars.next() != Some('{') {
+                return Err(EscapeError::NoBraceInUnicodeEscape);
+            }
+
+            let mut n_digits = 1;
+            let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
+                '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
+                '}' => return Err(EscapeError::EmptyUnicodeEscape),
+                c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?,
+            };
+
+            loop {
+                match chars.next() {
+                    None => return Err(EscapeError::UnclosedUnicodeEscape),
+                    Some('_') => continue,
+                    Some('}') => {
+                        if n_digits > 6 {
+                            return Err(EscapeError::OverlongUnicodeEscape);
+                        }
+                        if mode.is_bytes() {
+                            return Err(EscapeError::UnicodeEscapeInByte);
+                        }
+
+                        break std::char::from_u32(value).ok_or_else(|| {
+                            if value > 0x10FFFF {
+                                EscapeError::OutOfRangeUnicodeEscape
+                            } else {
+                                EscapeError::LoneSurrogateUnicodeEscape
+                            }
+                        })?;
+                    }
+                    Some(c) => {
+                        let digit = c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
+                        n_digits += 1;
+                        if n_digits > 6 {
+                            continue;
+                        }
+                        let digit = digit as u32;
+                        value = value * 16 + digit;
+                    }
+                };
+            }
+        }
+        _ => return Err(EscapeError::InvalidEscape),
+    };
+    Ok(res)
+}
+
+fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
+    let first_char = chars.next().ok_or(EscapeError::ZeroChars)?;
+    let res = scan_escape(first_char, chars, mode)?;
+    if chars.next().is_some() {
+        return Err(EscapeError::MoreThanOneChar);
+    }
+    Ok(res)
+}
+
+/// Takes a contents of a string literal (without quotes) and produces a
+/// sequence of escaped characters or errors.
+fn unescape_str_or_byte_str<F>(src: &str, mode: Mode, callback: &mut F)
+where
+    F: FnMut(Range<usize>, Result<char, EscapeError>),
+{
+    assert!(mode.in_double_quotes());
+    let initial_len = src.len();
+    let mut chars = src.chars();
+    while let Some(first_char) = chars.next() {
+        let start = initial_len - chars.as_str().len() - first_char.len_utf8();
+
+        let unescaped_char = match first_char {
+            '\\' => {
+                let (second_char, third_char) = {
+                    let mut chars = chars.clone();
+                    (chars.next(), chars.next())
+                };
+                match (second_char, third_char) {
+                    (Some('\n'), _) | (Some('\r'), Some('\n')) => {
+                        skip_ascii_whitespace(&mut chars);
+                        continue;
+                    }
+                    _ => scan_escape(first_char, &mut chars, mode),
+                }
+            }
+            '\r' => {
+                let second_char = chars.clone().next();
+                if second_char == Some('\n') {
+                    chars.next();
+                    Ok('\n')
+                } else {
+                    scan_escape(first_char, &mut chars, mode)
+                }
+            }
+            '\n' => Ok('\n'),
+            '\t' => Ok('\t'),
+            _ => scan_escape(first_char, &mut chars, mode),
+        };
+        let end = initial_len - chars.as_str().len();
+        callback(start..end, unescaped_char);
+    }
+
+    fn skip_ascii_whitespace(chars: &mut Chars<'_>) {
+        let str = chars.as_str();
+        let first_non_space = str
+            .bytes()
+            .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
+            .unwrap_or(str.len());
+        *chars = str[first_non_space..].chars()
+    }
+}
+
+fn byte_from_char(c: char) -> u8 {
+    let res = c as u32;
+    assert!(res <= u8::max_value() as u32, "guaranteed because of Mode::Byte");
+    res as u8
+}
+
+fn is_ascii(x: u32) -> bool {
+    x <= 0x7F
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_unescape_char_bad() {
+        fn check(literal_text: &str, expected_error: EscapeError) {
+            let actual_result = unescape_char(literal_text).map_err(|(_offset, err)| err);
+            assert_eq!(actual_result, Err(expected_error));
+        }
+
+        check("", EscapeError::ZeroChars);
+        check(r"\", EscapeError::LoneSlash);
+
+        check("\n", EscapeError::EscapeOnlyChar);
+        check("\r\n", EscapeError::EscapeOnlyChar);
+        check("\t", EscapeError::EscapeOnlyChar);
+        check("'", EscapeError::EscapeOnlyChar);
+        check("\r", EscapeError::BareCarriageReturn);
+
+        check("spam", EscapeError::MoreThanOneChar);
+        check(r"\x0ff", EscapeError::MoreThanOneChar);
+        check(r#"\"a"#, EscapeError::MoreThanOneChar);
+        check(r"\na", EscapeError::MoreThanOneChar);
+        check(r"\ra", EscapeError::MoreThanOneChar);
+        check(r"\ta", EscapeError::MoreThanOneChar);
+        check(r"\\a", EscapeError::MoreThanOneChar);
+        check(r"\'a", EscapeError::MoreThanOneChar);
+        check(r"\0a", EscapeError::MoreThanOneChar);
+        check(r"\u{0}x", EscapeError::MoreThanOneChar);
+        check(r"\u{1F63b}}", EscapeError::MoreThanOneChar);
+
+        check(r"\v", EscapeError::InvalidEscape);
+        check(r"\💩", EscapeError::InvalidEscape);
+        check(r"\●", EscapeError::InvalidEscape);
+
+        check(r"\x", EscapeError::TooShortHexEscape);
+        check(r"\x0", EscapeError::TooShortHexEscape);
+        check(r"\xf", EscapeError::TooShortHexEscape);
+        check(r"\xa", EscapeError::TooShortHexEscape);
+        check(r"\xx", EscapeError::InvalidCharInHexEscape);
+        check(r"\xы", EscapeError::InvalidCharInHexEscape);
+        check(r"\x🦀", EscapeError::InvalidCharInHexEscape);
+        check(r"\xtt", EscapeError::InvalidCharInHexEscape);
+        check(r"\xff", EscapeError::OutOfRangeHexEscape);
+        check(r"\xFF", EscapeError::OutOfRangeHexEscape);
+        check(r"\x80", EscapeError::OutOfRangeHexEscape);
+
+        check(r"\u", EscapeError::NoBraceInUnicodeEscape);
+        check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape);
+        check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape);
+        check(r"\u{", EscapeError::UnclosedUnicodeEscape);
+        check(r"\u{0000", EscapeError::UnclosedUnicodeEscape);
+        check(r"\u{}", EscapeError::EmptyUnicodeEscape);
+        check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape);
+        check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape);
+        check(r"\u{FFFFFF}", EscapeError::OutOfRangeUnicodeEscape);
+        check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape);
+        check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape);
+
+        check(r"\u{DC00}", EscapeError::LoneSurrogateUnicodeEscape);
+        check(r"\u{DDDD}", EscapeError::LoneSurrogateUnicodeEscape);
+        check(r"\u{DFFF}", EscapeError::LoneSurrogateUnicodeEscape);
+
+        check(r"\u{D800}", EscapeError::LoneSurrogateUnicodeEscape);
+        check(r"\u{DAAA}", EscapeError::LoneSurrogateUnicodeEscape);
+        check(r"\u{DBFF}", EscapeError::LoneSurrogateUnicodeEscape);
+    }
+
+    #[test]
+    fn test_unescape_char_good() {
+        fn check(literal_text: &str, expected_char: char) {
+            let actual_result = unescape_char(literal_text);
+            assert_eq!(actual_result, Ok(expected_char));
+        }
+
+        check("a", 'a');
+        check("ы", 'ы');
+        check("🦀", '🦀');
+
+        check(r#"\""#, '"');
+        check(r"\n", '\n');
+        check(r"\r", '\r');
+        check(r"\t", '\t');
+        check(r"\\", '\\');
+        check(r"\'", '\'');
+        check(r"\0", '\0');
+
+        check(r"\x00", '\0');
+        check(r"\x5a", 'Z');
+        check(r"\x5A", 'Z');
+        check(r"\x7f", 127 as char);
+
+        check(r"\u{0}", '\0');
+        check(r"\u{000000}", '\0');
+        check(r"\u{41}", 'A');
+        check(r"\u{0041}", 'A');
+        check(r"\u{00_41}", 'A');
+        check(r"\u{4__1__}", 'A');
+        check(r"\u{1F63b}", '😻');
+    }
+
+    #[test]
+    fn test_unescape_str_good() {
+        fn check(literal_text: &str, expected: &str) {
+            let mut buf = Ok(String::with_capacity(literal_text.len()));
+            unescape_str(literal_text, &mut |range, c| {
+                if let Ok(b) = &mut buf {
+                    match c {
+                        Ok(c) => b.push(c),
+                        Err(e) => buf = Err((range, e)),
+                    }
+                }
+            });
+            let buf = buf.as_ref().map(|it| it.as_ref());
+            assert_eq!(buf, Ok(expected))
+        }
+
+        check("foo", "foo");
+        check("", "");
+        check(" \t\n\r\n", " \t\n\n");
+
+        check("hello \\\n     world", "hello world");
+        check("hello \\\r\n     world", "hello world");
+        check("thread's", "thread's")
+    }
+
+    #[test]
+    fn test_unescape_byte_bad() {
+        fn check(literal_text: &str, expected_error: EscapeError) {
+            let actual_result = unescape_byte(literal_text).map_err(|(_offset, err)| err);
+            assert_eq!(actual_result, Err(expected_error));
+        }
+
+        check("", EscapeError::ZeroChars);
+        check(r"\", EscapeError::LoneSlash);
+
+        check("\n", EscapeError::EscapeOnlyChar);
+        check("\r\n", EscapeError::EscapeOnlyChar);
+        check("\t", EscapeError::EscapeOnlyChar);
+        check("'", EscapeError::EscapeOnlyChar);
+        check("\r", EscapeError::BareCarriageReturn);
+
+        check("spam", EscapeError::MoreThanOneChar);
+        check(r"\x0ff", EscapeError::MoreThanOneChar);
+        check(r#"\"a"#, EscapeError::MoreThanOneChar);
+        check(r"\na", EscapeError::MoreThanOneChar);
+        check(r"\ra", EscapeError::MoreThanOneChar);
+        check(r"\ta", EscapeError::MoreThanOneChar);
+        check(r"\\a", EscapeError::MoreThanOneChar);
+        check(r"\'a", EscapeError::MoreThanOneChar);
+        check(r"\0a", EscapeError::MoreThanOneChar);
+
+        check(r"\v", EscapeError::InvalidEscape);
+        check(r"\💩", EscapeError::InvalidEscape);
+        check(r"\●", EscapeError::InvalidEscape);
+
+        check(r"\x", EscapeError::TooShortHexEscape);
+        check(r"\x0", EscapeError::TooShortHexEscape);
+        check(r"\xa", EscapeError::TooShortHexEscape);
+        check(r"\xf", EscapeError::TooShortHexEscape);
+        check(r"\xx", EscapeError::InvalidCharInHexEscape);
+        check(r"\xы", EscapeError::InvalidCharInHexEscape);
+        check(r"\x🦀", EscapeError::InvalidCharInHexEscape);
+        check(r"\xtt", EscapeError::InvalidCharInHexEscape);
+
+        check(r"\u", EscapeError::NoBraceInUnicodeEscape);
+        check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape);
+        check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape);
+        check(r"\u{", EscapeError::UnclosedUnicodeEscape);
+        check(r"\u{0000", EscapeError::UnclosedUnicodeEscape);
+        check(r"\u{}", EscapeError::EmptyUnicodeEscape);
+        check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape);
+        check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape);
+
+        check("ы", EscapeError::NonAsciiCharInByte);
+        check("🦀", EscapeError::NonAsciiCharInByte);
+
+        check(r"\u{0}", EscapeError::UnicodeEscapeInByte);
+        check(r"\u{000000}", EscapeError::UnicodeEscapeInByte);
+        check(r"\u{41}", EscapeError::UnicodeEscapeInByte);
+        check(r"\u{0041}", EscapeError::UnicodeEscapeInByte);
+        check(r"\u{00_41}", EscapeError::UnicodeEscapeInByte);
+        check(r"\u{4__1__}", EscapeError::UnicodeEscapeInByte);
+        check(r"\u{1F63b}", EscapeError::UnicodeEscapeInByte);
+        check(r"\u{0}x", EscapeError::UnicodeEscapeInByte);
+        check(r"\u{1F63b}}", EscapeError::UnicodeEscapeInByte);
+        check(r"\u{FFFFFF}", EscapeError::UnicodeEscapeInByte);
+        check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte);
+        check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte);
+        check(r"\u{DC00}", EscapeError::UnicodeEscapeInByte);
+        check(r"\u{DDDD}", EscapeError::UnicodeEscapeInByte);
+        check(r"\u{DFFF}", EscapeError::UnicodeEscapeInByte);
+        check(r"\u{D800}", EscapeError::UnicodeEscapeInByte);
+        check(r"\u{DAAA}", EscapeError::UnicodeEscapeInByte);
+        check(r"\u{DBFF}", EscapeError::UnicodeEscapeInByte);
+    }
+
+    #[test]
+    fn test_unescape_byte_good() {
+        fn check(literal_text: &str, expected_byte: u8) {
+            let actual_result = unescape_byte(literal_text);
+            assert_eq!(actual_result, Ok(expected_byte));
+        }
+
+        check("a", b'a');
+
+        check(r#"\""#, b'"');
+        check(r"\n", b'\n');
+        check(r"\r", b'\r');
+        check(r"\t", b'\t');
+        check(r"\\", b'\\');
+        check(r"\'", b'\'');
+        check(r"\0", b'\0');
+
+        check(r"\x00", b'\0');
+        check(r"\x5a", b'Z');
+        check(r"\x5A", b'Z');
+        check(r"\x7f", 127);
+        check(r"\x80", 128);
+        check(r"\xff", 255);
+        check(r"\xFF", 255);
+    }
+
+    #[test]
+    fn test_unescape_byte_str_good() {
+        fn check(literal_text: &str, expected: &[u8]) {
+            let mut buf = Ok(Vec::with_capacity(literal_text.len()));
+            unescape_byte_str(literal_text, &mut |range, c| {
+                if let Ok(b) = &mut buf {
+                    match c {
+                        Ok(c) => b.push(c),
+                        Err(e) => buf = Err((range, e)),
+                    }
+                }
+            });
+            let buf = buf.as_ref().map(|it| it.as_ref());
+            assert_eq!(buf, Ok(expected))
+        }
+
+        check("foo", b"foo");
+        check("", b"");
+        check(" \t\n\r\n", b" \t\n\n");
+
+        check("hello \\\n     world", b"hello world");
+        check("hello \\\r\n     world", b"hello world");
+        check("thread's", b"thread's")
+    }
+}
--- a/src/libsyntax/parse/unescape_error_reporting.rs
+++ b/src/libsyntax/parse/unescape_error_reporting.rs
@ -0,0 +1,200 @@
+//! Utilities for rendering escape sequence errors as diagnostics.
+
+use std::ops::Range;
+use std::iter::once;
+
+use syntax_pos::{Span, BytePos};
+
+use crate::errors::{Handler, Applicability};
+
+use super::unescape::{EscapeError, Mode};
+
+pub(crate) fn emit_unescape_error(
+    handler: &Handler,
+    // interior part of the literal, without quotes
+    lit: &str,
+    // full span of the literal, including quotes
+    span_with_quotes: Span,
+    mode: Mode,
+    // range of the error inside `lit`
+    range: Range<usize>,
+    error: EscapeError,
+) {
+    log::debug!("emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}",
+                lit, span_with_quotes, mode, range, error);
+    let span = {
+        let Range { start, end } = range;
+        let (start, end) = (start as u32, end as u32);
+        let lo = span_with_quotes.lo() + BytePos(start + 1);
+        let hi = lo + BytePos(end - start);
+            span_with_quotes
+            .with_lo(lo)
+            .with_hi(hi)
+    };
+    let last_char = || {
+        let c = lit[range.clone()].chars().rev().next().unwrap();
+        let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32));
+        (c, span)
+    };
+    match error {
+        EscapeError::LoneSurrogateUnicodeEscape => {
+            handler.struct_span_err(span, "invalid unicode character escape")
+                .help("unicode escape must not be a surrogate")
+                .emit();
+        }
+        EscapeError::OutOfRangeUnicodeEscape => {
+            handler.struct_span_err(span, "invalid unicode character escape")
+                .help("unicode escape must be at most 10FFFF")
+                .emit();
+        }
+        EscapeError::MoreThanOneChar => {
+            handler
+                .struct_span_err(
+                    span_with_quotes,
+                    "character literal may only contain one codepoint",
+                )
+                .span_suggestion(
+                    span_with_quotes,
+                    "if you meant to write a `str` literal, use double quotes",
+                    format!("\"{}\"", lit),
+                    Applicability::MachineApplicable,
+                ).emit()
+        }
+        EscapeError::EscapeOnlyChar => {
+            let (c, _span) = last_char();
+
+            let mut msg = if mode.is_bytes() {
+                "byte constant must be escaped: "
+            } else {
+                "character constant must be escaped: "
+            }.to_string();
+            push_escaped_char(&mut msg, c);
+
+            handler.span_err(span, msg.as_str())
+        }
+        EscapeError::BareCarriageReturn => {
+            let msg = if mode.in_double_quotes() {
+                "bare CR not allowed in string, use \\r instead"
+            } else {
+                "character constant must be escaped: \\r"
+            };
+            handler.span_err(span, msg);
+        }
+        EscapeError::InvalidEscape => {
+            let (c, span) = last_char();
+
+            let label = if mode.is_bytes() {
+                "unknown byte escape"
+            } else {
+                "unknown character escape"
+            };
+            let mut msg = label.to_string();
+            msg.push_str(": ");
+            push_escaped_char(&mut msg, c);
+
+            let mut diag = handler.struct_span_err(span, msg.as_str());
+            diag.span_label(span, label);
+            if c == '{' || c == '}' && !mode.is_bytes() {
+                diag.help("if used in a formatting string, \
+                           curly braces are escaped with `{{` and `}}`");
+            } else if c == '\r' {
+                diag.help("this is an isolated carriage return; \
+                           consider checking your editor and version control settings");
+            }
+            diag.emit();
+        }
+        EscapeError::TooShortHexEscape => {
+            handler.span_err(span, "numeric character escape is too short")
+        }
+        EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => {
+            let (c, span) = last_char();
+
+            let mut msg = if error == EscapeError::InvalidCharInHexEscape {
+                "invalid character in numeric character escape: "
+            } else {
+                "invalid character in unicode escape: "
+            }.to_string();
+            push_escaped_char(&mut msg, c);
+
+            handler.span_err(span, msg.as_str())
+        }
+        EscapeError::NonAsciiCharInByte => {
+            assert!(mode.is_bytes());
+            let (_c, span) = last_char();
+            handler.span_err(span, "byte constant must be ASCII. \
+                                    Use a \\xHH escape for a non-ASCII byte")
+        }
+        EscapeError::OutOfRangeHexEscape => {
+            handler.span_err(span, "this form of character escape may only be used \
+                                    with characters in the range [\\x00-\\x7f]")
+        }
+        EscapeError::LeadingUnderscoreUnicodeEscape => {
+            let (_c, span) = last_char();
+            handler.span_err(span, "invalid start of unicode escape")
+        }
+        EscapeError::OverlongUnicodeEscape => {
+            handler.span_err(span, "overlong unicode escape (must have at most 6 hex digits)")
+        }
+        EscapeError::UnclosedUnicodeEscape => {
+            handler.span_err(span, "unterminated unicode escape (needed a `}`)")
+        }
+        EscapeError::NoBraceInUnicodeEscape => {
+            let msg = "incorrect unicode escape sequence";
+            let mut diag = handler.struct_span_err(span, msg);
+
+            let mut suggestion = "\\u{".to_owned();
+            let mut suggestion_len = 0;
+            let (c, char_span) = last_char();
+            let chars = once(c).chain(lit[range.end..].chars());
+            for c in chars.take(6).take_while(|c| c.is_digit(16)) {
+                suggestion.push(c);
+                suggestion_len += c.len_utf8();
+            }
+
+            if suggestion_len > 0 {
+                suggestion.push('}');
+                let lo = char_span.lo();
+                let hi = lo + BytePos(suggestion_len as u32);
+                diag.span_suggestion(
+                    span.with_lo(lo).with_hi(hi),
+                    "format of unicode escape sequences uses braces",
+                    suggestion,
+                    Applicability::MaybeIncorrect,
+                );
+            } else {
+                diag.span_label(span, msg);
+                diag.help(
+                    "format of unicode escape sequences is `\\u{...}`",
+                );
+            }
+
+            diag.emit();
+        }
+        EscapeError::UnicodeEscapeInByte => {
+            handler.span_err(span, "unicode escape sequences cannot be used \
+                                    as a byte or in a byte string")
+        }
+        EscapeError::EmptyUnicodeEscape => {
+            handler.span_err(span, "empty unicode escape (must have at least 1 hex digit)")
+        }
+        EscapeError::ZeroChars => {
+            handler.span_err(span, "empty character literal")
+        }
+        EscapeError::LoneSlash => {
+            panic!("lexer accepted unterminated literal with trailing slash")
+        }
+    }
+}
+
+/// Pushes a character to a message string for error reporting
+pub(crate) fn push_escaped_char(msg: &mut String, c: char) {
+    match c {
+        '\u{20}'..='\u{7e}' => {
+            // Don't escape \, ' or " for user-facing messages
+            msg.push(c);
+        }
+        _ => {
+            msg.extend(c.escape_default());
+        }
+    }
+}
--- a/src/test/ui/fmt/format-string-error-2.rs
+++ b/src/test/ui/fmt/format-string-error-2.rs
@ -1,3 +1,4 @@
+// compile-flags: -Z continue-parse-after-error
 // ignore-tidy-tab

 fn main() {
@ -76,7 +77,7 @@ raw  { \n

    println!("\x7B}\u8 {", 1);
    //~^ ERROR incorrect unicode escape sequence
-    //~| ERROR argument never used
+    //~| ERROR invalid format string: expected `'}'` but string was terminated

    // note: raw strings don't escape `\xFF` and `\u{FF}` sequences
    println!(r#"\x7B}\u{8} {"#, 1);
--- a/src/test/ui/fmt/format-string-error-2.stderr
+++ b/src/test/ui/fmt/format-string-error-2.stderr
@ -1,13 +1,13 @@
 error: incorrect unicode escape sequence
-  --> $DIR/format-string-error-2.rs:77:20
+  --> $DIR/format-string-error-2.rs:78:20
   |
 LL |     println!("\x7B}\u8 {", 1);
   |                    ^^-
-   |                    |
-   |                    help: format of unicode escape sequences uses braces: `\u{8}`
+   |                      |
+   |                      help: format of unicode escape sequences uses braces: `\u{8}`

 error: invalid format string: expected `'}'`, found `'a'`
-  --> $DIR/format-string-error-2.rs:5:5
+  --> $DIR/format-string-error-2.rs:6:5
   |
 LL |     format!("{
   |              - because of this opening brace
@ -17,7 +17,7 @@ LL |     a");
   = note: if you intended to print `{`, you can escape it using `{{`

 error: invalid format string: expected `'}'`, found `'b'`
-  --> $DIR/format-string-error-2.rs:9:5
+  --> $DIR/format-string-error-2.rs:10:5
   |
 LL |     format!("{ \
   |              - because of this opening brace
@ -28,7 +28,7 @@ LL |     b");
   = note: if you intended to print `{`, you can escape it using `{{`

 error: invalid format string: expected `'}'`, found `'\'`
-  --> $DIR/format-string-error-2.rs:11:18
+  --> $DIR/format-string-error-2.rs:12:18
   |
 LL |     format!(r#"{ \
   |                - ^ expected `}` in format string
@ -38,7 +38,7 @@ LL |     format!(r#"{ \
   = note: if you intended to print `{`, you can escape it using `{{`

 error: invalid format string: expected `'}'`, found `'\'`
-  --> $DIR/format-string-error-2.rs:15:18
+  --> $DIR/format-string-error-2.rs:16:18
   |
 LL |     format!(r#"{ \n
   |                - ^ expected `}` in format string
@ -48,7 +48,7 @@ LL |     format!(r#"{ \n
   = note: if you intended to print `{`, you can escape it using `{{`

 error: invalid format string: expected `'}'`, found `'e'`
-  --> $DIR/format-string-error-2.rs:21:5
+  --> $DIR/format-string-error-2.rs:22:5
   |
 LL |     format!("{ \n
   |              - because of this opening brace
@ -59,7 +59,7 @@ LL |     e");
   = note: if you intended to print `{`, you can escape it using `{{`

 error: invalid format string: expected `'}'`, found `'a'`
-  --> $DIR/format-string-error-2.rs:25:5
+  --> $DIR/format-string-error-2.rs:26:5
   |
 LL |     {
   |     - because of this opening brace
@ -69,7 +69,7 @@ LL |     a");
   = note: if you intended to print `{`, you can escape it using `{{`

 error: invalid format string: expected `'}'`, found `'a'`
-  --> $DIR/format-string-error-2.rs:29:5
+  --> $DIR/format-string-error-2.rs:30:5
   |
 LL |     {
   |     - because of this opening brace
@ -79,7 +79,7 @@ LL |     a
   = note: if you intended to print `{`, you can escape it using `{{`

 error: invalid format string: expected `'}'`, found `'b'`
-  --> $DIR/format-string-error-2.rs:35:5
+  --> $DIR/format-string-error-2.rs:36:5
   |
 LL |     { \
   |     - because of this opening brace
@ -90,7 +90,7 @@ LL |     b");
   = note: if you intended to print `{`, you can escape it using `{{`

 error: invalid format string: expected `'}'`, found `'b'`
-  --> $DIR/format-string-error-2.rs:40:5
+  --> $DIR/format-string-error-2.rs:41:5
   |
 LL |     { \
   |     - because of this opening brace
@ -101,7 +101,7 @@ LL |     b \
   = note: if you intended to print `{`, you can escape it using `{{`

 error: invalid format string: expected `'}'`, found `'\'`
-  --> $DIR/format-string-error-2.rs:45:8
+  --> $DIR/format-string-error-2.rs:46:8
   |
 LL | raw  { \
   |      - ^ expected `}` in format string
@ -111,7 +111,7 @@ LL | raw  { \
   = note: if you intended to print `{`, you can escape it using `{{`

 error: invalid format string: expected `'}'`, found `'\'`
-  --> $DIR/format-string-error-2.rs:50:8
+  --> $DIR/format-string-error-2.rs:51:8
   |
 LL | raw  { \n
   |      - ^ expected `}` in format string
@ -121,7 +121,7 @@ LL | raw  { \n
   = note: if you intended to print `{`, you can escape it using `{{`

 error: invalid format string: expected `'}'`, found `'e'`
-  --> $DIR/format-string-error-2.rs:57:5
+  --> $DIR/format-string-error-2.rs:58:5
   |
 LL |   { \n
   |   - because of this opening brace
@ -132,7 +132,7 @@ LL |     e");
   = note: if you intended to print `{`, you can escape it using `{{`

 error: invalid format string: expected `'}'`, found `'a'`
-  --> $DIR/format-string-error-2.rs:67:5
+  --> $DIR/format-string-error-2.rs:68:5
   |
 LL |     {
   |     - because of this opening brace
@ -142,13 +142,13 @@ LL |     asdf}
   = note: if you intended to print `{`, you can escape it using `{{`

 error: 1 positional argument in format string, but no arguments were given
-  --> $DIR/format-string-error-2.rs:70:17
+  --> $DIR/format-string-error-2.rs:71:17
   |
 LL |     println!("\t{}");
   |                 ^^

 error: invalid format string: expected `'}'` but string was terminated
-  --> $DIR/format-string-error-2.rs:74:27
+  --> $DIR/format-string-error-2.rs:75:27
   |
 LL |     println!("\x7B}\u{8} {", 1);
   |                          -^ expected `'}'` in format string
@ -157,16 +157,18 @@ LL |     println!("\x7B}\u{8} {", 1);
   |
   = note: if you intended to print `{`, you can escape it using `{{`

-error: argument never used
-  --> $DIR/format-string-error-2.rs:77:28
+error: invalid format string: expected `'}'` but string was terminated
+  --> $DIR/format-string-error-2.rs:78:27
   |
 LL |     println!("\x7B}\u8 {", 1);
-   |              ------------  ^ argument never used
-   |              |
-   |              formatting specifier missing
+   |                          -^ expected `'}'` in format string
+   |                          |
+   |                          because of this opening brace
+   |
+   = note: if you intended to print `{`, you can escape it using `{{`

 error: invalid format string: unmatched `}` found
-  --> $DIR/format-string-error-2.rs:82:21
+  --> $DIR/format-string-error-2.rs:83:21
   |
 LL |     println!(r#"\x7B}\u{8} {"#, 1);
   |                     ^ unmatched `}` in format string
@ -174,7 +176,7 @@ LL |     println!(r#"\x7B}\u{8} {"#, 1);
   = note: if you intended to print `}`, you can escape it using `}}`

 error: invalid format string: unmatched `}` found
-  --> $DIR/format-string-error-2.rs:85:21
+  --> $DIR/format-string-error-2.rs:86:21
   |
 LL |     println!(r#"\x7B}\u8 {"#, 1);
   |                     ^ unmatched `}` in format string
--- a/src/test/ui/parser/ascii-only-character-escape.stderr
+++ b/src/test/ui/parser/ascii-only-character-escape.stderr
@ -1,20 +1,20 @@
 error: this form of character escape may only be used with characters in the range [\x00-\x7f]
-  --> $DIR/ascii-only-character-escape.rs:4:16
+  --> $DIR/ascii-only-character-escape.rs:4:14
   |
 LL |     let x = "\x80";
-   |                ^^
+   |              ^^^^

 error: this form of character escape may only be used with characters in the range [\x00-\x7f]
-  --> $DIR/ascii-only-character-escape.rs:5:16
+  --> $DIR/ascii-only-character-escape.rs:5:14
   |
 LL |     let y = "\xff";
-   |                ^^
+   |              ^^^^

 error: this form of character escape may only be used with characters in the range [\x00-\x7f]
-  --> $DIR/ascii-only-character-escape.rs:6:16
+  --> $DIR/ascii-only-character-escape.rs:6:14
   |
 LL |     let z = "\xe2";
-   |                ^^
+   |              ^^^^

 error: aborting due to 3 previous errors

--- a/src/test/ui/parser/byte-literals.stderr
+++ b/src/test/ui/parser/byte-literals.stderr
@ -34,11 +34,11 @@ error: byte constant must be ASCII. Use a \xHH escape for a non-ASCII byte
 LL |     b'é';
   |       ^

-error: unterminated byte constant: b'a
-  --> $DIR/byte-literals.rs:14:5
+error: unterminated byte constant
+  --> $DIR/byte-literals.rs:14:6
   |
 LL |     b'a
-   |     ^^^
+   |      ^^^^

 error: aborting due to 7 previous errors

--- a/src/test/ui/parser/byte-string-literals.stderr
+++ b/src/test/ui/parser/byte-string-literals.stderr
@ -23,10 +23,10 @@ LL |     b"é";
   |       ^

 error: unterminated double quote byte string
-  --> $DIR/byte-string-literals.rs:9:7
+  --> $DIR/byte-string-literals.rs:9:6
   |
 LL |       b"a
-   |  _______^
+   |  ______^
 LL | | }
   | |__^

--- a/src/test/ui/parser/issue-23620-invalid-escapes.rs
+++ b/src/test/ui/parser/issue-23620-invalid-escapes.rs
@ -9,32 +9,27 @@ fn main() {

    let _ = b'\u';
    //~^ ERROR incorrect unicode escape sequence
-    //~^^ ERROR unicode escape sequences cannot be used as a byte or in a byte string

    let _ = b'\x5';
    //~^ ERROR numeric character escape is too short

    let _ = b'\xxy';
    //~^ ERROR invalid character in numeric character escape: x
-    //~^^ ERROR invalid character in numeric character escape: y

    let _ = '\x5';
    //~^ ERROR numeric character escape is too short

    let _ = '\xxy';
    //~^ ERROR invalid character in numeric character escape: x
-    //~^^ ERROR invalid character in numeric character escape: y

    let _ = b"\u{a4a4} \xf \u";
    //~^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
    //~^^ ERROR invalid character in numeric character escape:
    //~^^^ ERROR incorrect unicode escape sequence
-    //~^^^^ ERROR unicode escape sequences cannot be used as a byte or in a byte string

    let _ = "\xf \u";
    //~^ ERROR invalid character in numeric character escape:
-    //~^^ ERROR form of character escape may only be used with characters in the range [\x00-\x7f]
-    //~^^^ ERROR incorrect unicode escape sequence
+    //~^^ ERROR incorrect unicode escape sequence

    let _ = "\u8f";
    //~^ ERROR incorrect unicode escape sequence
--- a/src/test/ui/parser/issue-23620-invalid-escapes.stderr
+++ b/src/test/ui/parser/issue-23620-invalid-escapes.stderr
@ -18,88 +18,58 @@ LL |     let _ = b'\u';
   |
   = help: format of unicode escape sequences is `\u{...}`

-error: unicode escape sequences cannot be used as a byte or in a byte string
-  --> $DIR/issue-23620-invalid-escapes.rs:10:15
-   |
-LL |     let _ = b'\u';
-   |               ^^
-
 error: numeric character escape is too short
-  --> $DIR/issue-23620-invalid-escapes.rs:14:17
+  --> $DIR/issue-23620-invalid-escapes.rs:13:15
   |
 LL |     let _ = b'\x5';
-   |                 ^
+   |               ^^^

 error: invalid character in numeric character escape: x
-  --> $DIR/issue-23620-invalid-escapes.rs:17:17
+  --> $DIR/issue-23620-invalid-escapes.rs:16:17
   |
 LL |     let _ = b'\xxy';
   |                 ^

-error: invalid character in numeric character escape: y
-  --> $DIR/issue-23620-invalid-escapes.rs:17:18
-   |
-LL |     let _ = b'\xxy';
-   |                  ^
-
 error: numeric character escape is too short
-  --> $DIR/issue-23620-invalid-escapes.rs:21:16
+  --> $DIR/issue-23620-invalid-escapes.rs:19:14
   |
 LL |     let _ = '\x5';
-   |                ^
+   |              ^^^

 error: invalid character in numeric character escape: x
-  --> $DIR/issue-23620-invalid-escapes.rs:24:16
+  --> $DIR/issue-23620-invalid-escapes.rs:22:16
   |
 LL |     let _ = '\xxy';
   |                ^

-error: invalid character in numeric character escape: y
-  --> $DIR/issue-23620-invalid-escapes.rs:24:17
-   |
-LL |     let _ = '\xxy';
-   |                 ^
-
 error: unicode escape sequences cannot be used as a byte or in a byte string
-  --> $DIR/issue-23620-invalid-escapes.rs:28:15
+  --> $DIR/issue-23620-invalid-escapes.rs:25:15
   |
 LL |     let _ = b"\u{a4a4} \xf \u";
   |               ^^^^^^^^

 error: invalid character in numeric character escape:  
-  --> $DIR/issue-23620-invalid-escapes.rs:28:27
+  --> $DIR/issue-23620-invalid-escapes.rs:25:27
   |
 LL |     let _ = b"\u{a4a4} \xf \u";
   |                           ^

 error: incorrect unicode escape sequence
-  --> $DIR/issue-23620-invalid-escapes.rs:28:28
+  --> $DIR/issue-23620-invalid-escapes.rs:25:28
   |
 LL |     let _ = b"\u{a4a4} \xf \u";
   |                            ^^ incorrect unicode escape sequence
   |
   = help: format of unicode escape sequences is `\u{...}`

-error: unicode escape sequences cannot be used as a byte or in a byte string
-  --> $DIR/issue-23620-invalid-escapes.rs:28:28
-   |
-LL |     let _ = b"\u{a4a4} \xf \u";
-   |                            ^^
-
 error: invalid character in numeric character escape:  
-  --> $DIR/issue-23620-invalid-escapes.rs:34:17
+  --> $DIR/issue-23620-invalid-escapes.rs:30:17
   |
 LL |     let _ = "\xf \u";
   |                 ^

-error: this form of character escape may only be used with characters in the range [\x00-\x7f]
-  --> $DIR/issue-23620-invalid-escapes.rs:34:16
-   |
-LL |     let _ = "\xf \u";
-   |                ^^
-
 error: incorrect unicode escape sequence
-  --> $DIR/issue-23620-invalid-escapes.rs:34:18
+  --> $DIR/issue-23620-invalid-escapes.rs:30:18
   |
 LL |     let _ = "\xf \u";
   |                  ^^ incorrect unicode escape sequence
@ -107,12 +77,12 @@ LL |     let _ = "\xf \u";
   = help: format of unicode escape sequences is `\u{...}`

 error: incorrect unicode escape sequence
-  --> $DIR/issue-23620-invalid-escapes.rs:39:14
+  --> $DIR/issue-23620-invalid-escapes.rs:34:14
   |
 LL |     let _ = "\u8f";
   |              ^^--
-   |              |
-   |              help: format of unicode escape sequences uses braces: `\u{8f}`
+   |                |
+   |                help: format of unicode escape sequences uses braces: `\u{8f}`

-error: aborting due to 18 previous errors
+error: aborting due to 13 previous errors

--- a/src/test/ui/parser/lex-bad-char-literals-1.stderr
+++ b/src/test/ui/parser/lex-bad-char-literals-1.stderr
@ -1,14 +1,14 @@
 error: numeric character escape is too short
-  --> $DIR/lex-bad-char-literals-1.rs:3:8
+  --> $DIR/lex-bad-char-literals-1.rs:3:6
   |
 LL |     '\x1'
-   |        ^
+   |      ^^^

 error: numeric character escape is too short
-  --> $DIR/lex-bad-char-literals-1.rs:7:8
+  --> $DIR/lex-bad-char-literals-1.rs:7:6
   |
 LL |     "\x1"
-   |        ^
+   |      ^^^

 error: unknown character escape: \u{25cf}
  --> $DIR/lex-bad-char-literals-1.rs:11:7
--- a/src/test/ui/parser/lex-bad-char-literals-2.stderr
+++ b/src/test/ui/parser/lex-bad-char-literals-2.stderr
@ -3,6 +3,10 @@ error: character literal may only contain one codepoint
   |
 LL |     'nope'
   |     ^^^^^^
+help: if you meant to write a `str` literal, use double quotes
+   |
+LL |     "nope"
+   |     ^^^^^^

 error[E0601]: `main` function not found in crate `lex_bad_char_literals_2`
   |
--- a/src/test/ui/parser/lex-bad-char-literals-4.rs
+++ b/src/test/ui/parser/lex-bad-char-literals-4.rs
@ -1,5 +1,5 @@
 //
 // This test needs to the last one appearing in this file as it kills the parser
 static c: char =
-    '●  //~ ERROR: character literal may only contain one codepoint
+    '●  //~ ERROR: unterminated character literal
 ;
--- a/src/test/ui/parser/lex-bad-char-literals-4.stderr
+++ b/src/test/ui/parser/lex-bad-char-literals-4.stderr
@ -1,8 +1,8 @@
-error: character literal may only contain one codepoint: '●
+error: unterminated character literal
  --> $DIR/lex-bad-char-literals-4.rs:4:5
   |
 LL |     '●
-   |     ^^
+   |     ^^^^

 error: aborting due to previous error

--- a/src/test/ui/parser/lex-bad-char-literals-6.stderr
+++ b/src/test/ui/parser/lex-bad-char-literals-6.stderr
@ -3,18 +3,30 @@ error: character literal may only contain one codepoint
   |
 LL |     let x: &str = 'ab';
   |                   ^^^^
+help: if you meant to write a `str` literal, use double quotes
+   |
+LL |     let x: &str = "ab";
+   |                   ^^^^

 error: character literal may only contain one codepoint
  --> $DIR/lex-bad-char-literals-6.rs:4:19
   |
 LL |     let y: char = 'cd';
   |                   ^^^^
+help: if you meant to write a `str` literal, use double quotes
+   |
+LL |     let y: char = "cd";
+   |                   ^^^^

 error: character literal may only contain one codepoint
  --> $DIR/lex-bad-char-literals-6.rs:6:13
   |
 LL |     let z = 'ef';
   |             ^^^^
+help: if you meant to write a `str` literal, use double quotes
+   |
+LL |     let z = "ef";
+   |             ^^^^

 error[E0277]: can't compare `&str` with `char`
  --> $DIR/lex-bad-char-literals-6.rs:9:10
--- a/src/test/ui/parser/lex-bad-char-literals-7.rs
+++ b/src/test/ui/parser/lex-bad-char-literals-7.rs
@ -0,0 +1,14 @@
+// compile-flags: -Z continue-parse-after-error
+fn main() {
+    let _: char = '';
+    //~^ ERROR: empty character literal
+    let _: char = '\u{}';
+    //~^ ERROR: empty unicode escape (must have at least 1 hex digit)
+
+    // Next two are OK, but may befool error recovery
+    let _ = '/';
+    let _ = b'/';
+
+    let _ = ' hello // here's a comment
+    //~^ ERROR: unterminated character literal
+}
--- a/src/test/ui/parser/lex-bad-char-literals-7.stderr
+++ b/src/test/ui/parser/lex-bad-char-literals-7.stderr
@ -0,0 +1,20 @@
+error: empty character literal
+  --> $DIR/lex-bad-char-literals-7.rs:3:20
+   |
+LL |     let _: char = '';
+   |                    ^
+
+error: empty unicode escape (must have at least 1 hex digit)
+  --> $DIR/lex-bad-char-literals-7.rs:5:20
+   |
+LL |     let _: char = '\u{}';
+   |                    ^^^^
+
+error: unterminated character literal
+  --> $DIR/lex-bad-char-literals-7.rs:12:13
+   |
+LL |     let _ = ' hello // here's a comment
+   |             ^^^^^^^^
+
+error: aborting due to 3 previous errors
+
--- a/src/test/ui/parser/macro/literals-are-validated-before-expansion.rs
+++ b/src/test/ui/parser/macro/literals-are-validated-before-expansion.rs
@ -0,0 +1,10 @@
+macro_rules! black_hole {
+    ($($tt:tt)*) => {}
+}
+
+fn main() {
+    black_hole! { '\u{FFFFFF}' }
+    //~^ ERROR: invalid unicode character escape
+    black_hole! { "this is surrogate: \u{DAAA}" }
+    //~^ ERROR: invalid unicode character escape
+}
--- a/src/test/ui/parser/macro/literals-are-validated-before-expansion.stderr
+++ b/src/test/ui/parser/macro/literals-are-validated-before-expansion.stderr
@ -0,0 +1,18 @@
+error: invalid unicode character escape
+  --> $DIR/literals-are-validated-before-expansion.rs:6:20
+   |
+LL |     black_hole! { '\u{FFFFFF}' }
+   |                    ^^^^^^^^^^
+   |
+   = help: unicode escape must be at most 10FFFF
+
+error: invalid unicode character escape
+  --> $DIR/literals-are-validated-before-expansion.rs:8:39
+   |
+LL |     black_hole! { "this is surrogate: \u{DAAA}" }
+   |                                       ^^^^^^^^
+   |
+   = help: unicode escape must not be a surrogate
+
+error: aborting due to 2 previous errors
+
--- a/src/test/ui/parser/new-unicode-escapes-1.stderr
+++ b/src/test/ui/parser/new-unicode-escapes-1.stderr
@ -1,8 +1,8 @@
 error: unterminated unicode escape (needed a `}`)
-  --> $DIR/new-unicode-escapes-1.rs:2:21
+  --> $DIR/new-unicode-escapes-1.rs:2:14
   |
 LL |     let s = "\u{2603";
-   |                     ^
+   |              ^^^^^^^

 error: aborting due to previous error

--- a/src/test/ui/parser/new-unicode-escapes-2.stderr
+++ b/src/test/ui/parser/new-unicode-escapes-2.stderr
@ -1,8 +1,8 @@
 error: overlong unicode escape (must have at most 6 hex digits)
-  --> $DIR/new-unicode-escapes-2.rs:2:17
+  --> $DIR/new-unicode-escapes-2.rs:2:14
   |
 LL |     let s = "\u{260311111111}";
-   |                 ^^^^^^^^^^^^
+   |              ^^^^^^^^^^^^^^^^

 error: aborting due to previous error

--- a/src/test/ui/parser/new-unicode-escapes-3.stderr
+++ b/src/test/ui/parser/new-unicode-escapes-3.stderr
@ -1,16 +1,16 @@
 error: invalid unicode character escape
-  --> $DIR/new-unicode-escapes-3.rs:2:14
+  --> $DIR/new-unicode-escapes-3.rs:2:15
   |
 LL |     let s1 = "\u{d805}";
-   |              ^^^^^^^^^^
+   |               ^^^^^^^^
   |
   = help: unicode escape must not be a surrogate

 error: invalid unicode character escape
-  --> $DIR/new-unicode-escapes-3.rs:3:14
+  --> $DIR/new-unicode-escapes-3.rs:3:15
   |
 LL |     let s2 = "\u{ffffff}";
-   |              ^^^^^^^^^^^^
+   |               ^^^^^^^^^^
   |
   = help: unicode escape must be at most 10FFFF