From 101917795898b602340d96a6c0f7815d108af1af Mon Sep 17 00:00:00 2001 From: Benjamin Herr Date: Thu, 19 Sep 2013 16:11:23 +0200 Subject: [PATCH] lexer: show correct span on lexical errors Previously, the lexer calling `rdr.fatal(...)` would report the span of the last complete token, instead of a span within the erroneous token (besides one span fixed in 1ac90bb). This commit adds a wrapper around `rdr.fatal(...)` that sets the span explicilty, so that all fatal errors in `libsyntax/parse/lexer.rs` now report the offending code more precisely. A number of tests try to verify that, though the `compile-fail` testing setup can only check that the spans are on the right lines, and the "unterminated string/block comment" errors can't have the line marked at all, so that's incomplete. Closes #9149. --- src/libsyntax/parse/lexer.rs | 78 ++++++++++++++----- src/test/compile-fail/lex-bad-fp-lit.rs | 13 ++++ src/test/compile-fail/lex-hex-float-lit.rs | 13 ++++ .../lex-illegal-num-char-escape-2.rs | 13 ++++ .../lex-illegal-num-char-escape.rs | 13 ++++ .../compile-fail/lex-int-lit-too-large-2.rs | 13 ++++ .../compile-fail/lex-int-lit-too-large.rs | 13 ++++ .../compile-fail/lex-no-valid-digits-2.rs | 13 ++++ src/test/compile-fail/lex-no-valid-digits.rs | 13 ++++ .../compile-fail/lex-unknown-char-escape.rs | 13 ++++ .../compile-fail/lex-unknown-start-tok.rs | 13 ++++ .../compile-fail/lex-unknown-str-escape.rs | 13 ++++ .../lex-unterminated-char-const.rs | 13 ++++ 13 files changed, 215 insertions(+), 19 deletions(-) create mode 100644 src/test/compile-fail/lex-bad-fp-lit.rs create mode 100644 src/test/compile-fail/lex-hex-float-lit.rs create mode 100644 src/test/compile-fail/lex-illegal-num-char-escape-2.rs create mode 100644 src/test/compile-fail/lex-illegal-num-char-escape.rs create mode 100644 src/test/compile-fail/lex-int-lit-too-large-2.rs create mode 100644 src/test/compile-fail/lex-int-lit-too-large.rs create mode 100644 src/test/compile-fail/lex-no-valid-digits-2.rs create mode 100644 src/test/compile-fail/lex-no-valid-digits.rs create mode 100644 src/test/compile-fail/lex-unknown-char-escape.rs create mode 100644 src/test/compile-fail/lex-unknown-start-tok.rs create mode 100644 src/test/compile-fail/lex-unknown-str-escape.rs create mode 100644 src/test/compile-fail/lex-unterminated-char-const.rs diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs index 0bc9e619274..3d686167ce1 100644 --- a/src/libsyntax/parse/lexer.rs +++ b/src/libsyntax/parse/lexer.rs @@ -149,6 +149,16 @@ impl reader for TtReader { fn dup(@mut self) -> @mut reader { dup_tt_reader(self) as @mut reader } } +// report a lexical error spanning [`from_pos`, `to_pos`) +fn fatal_span(rdr: @mut StringReader, + from_pos: BytePos, + to_pos: BytePos, + m: ~str) + -> ! { + rdr.peek_span = codemap::mk_sp(from_pos, to_pos); + rdr.fatal(m); +} + // EFFECT: advance peek_tok and peek_span to refer to the next token. // EFFECT: update the interner, maybe. fn string_advance_token(r: @mut StringReader) { @@ -327,7 +337,8 @@ fn consume_block_comment(rdr: @mut StringReader) bump(rdr); } if is_eof(rdr) { - rdr.fatal(~"unterminated block doc-comment"); + fatal_span(rdr, start_bpos, rdr.last_pos, + ~"unterminated block doc-comment"); } else { bump(rdr); bump(rdr); @@ -344,8 +355,12 @@ fn consume_block_comment(rdr: @mut StringReader) } } } else { + let start_bpos = rdr.last_pos - BytePos(2u); loop { - if is_eof(rdr) { rdr.fatal(~"unterminated block comment"); } + if is_eof(rdr) { + fatal_span(rdr, start_bpos, rdr.last_pos, + ~"unterminated block comment"); + } if rdr.curr == '*' && nextch(rdr) == '/' { bump(rdr); bump(rdr); @@ -362,6 +377,7 @@ fn consume_block_comment(rdr: @mut StringReader) } fn scan_exponent(rdr: @mut StringReader) -> Option<~str> { + let start_bpos = rdr.last_pos; let mut c = rdr.curr; let mut rslt = ~""; if c == 'e' || c == 'E' { @@ -375,7 +391,10 @@ fn scan_exponent(rdr: @mut StringReader) -> Option<~str> { let exponent = scan_digits(rdr, 10u); if exponent.len() > 0u { return Some(rslt + exponent); - } else { rdr.fatal(~"scan_exponent: bad fp literal"); } + } else { + fatal_span(rdr, start_bpos, rdr.last_pos, + ~"scan_exponent: bad fp literal"); + } } else { return None::<~str>; } } @@ -399,6 +418,7 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token { let mut base = 10u; let mut c = c; let mut n = nextch(rdr); + let start_bpos = rdr.last_pos; if c == '0' && n == 'x' { bump(rdr); bump(rdr); @@ -442,11 +462,13 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token { else { either::Right(ast::ty_u64) }; } if num_str.len() == 0u { - rdr.fatal(~"no valid digits found for number"); + fatal_span(rdr, start_bpos, rdr.last_pos, + ~"no valid digits found for number"); } let parsed = match from_str_radix::(num_str, base as uint) { Some(p) => p, - None => rdr.fatal(~"int literal is too large") + None => fatal_span(rdr, start_bpos, rdr.last_pos, + ~"int literal is too large") }; match tp { @@ -464,8 +486,10 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token { } if is_float { match base { - 16u => rdr.fatal(~"hexadecimal float literal is not supported"), - 2u => rdr.fatal(~"binary float literal is not supported"), + 16u => fatal_span(rdr, start_bpos, rdr.last_pos, + ~"hexadecimal float literal is not supported"), + 2u => fatal_span(rdr, start_bpos, rdr.last_pos, + ~"binary float literal is not supported"), _ => () } } @@ -507,11 +531,13 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token { return token::LIT_FLOAT_UNSUFFIXED(str_to_ident(num_str)); } else { if num_str.len() == 0u { - rdr.fatal(~"no valid digits found for number"); + fatal_span(rdr, start_bpos, rdr.last_pos, + ~"no valid digits found for number"); } let parsed = match from_str_radix::(num_str, base as uint) { Some(p) => p, - None => rdr.fatal(~"int literal is too large") + None => fatal_span(rdr, start_bpos, rdr.last_pos, + ~"int literal is too large") }; debug!("lexing %s as an unsuffixed integer literal", @@ -523,19 +549,23 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token { fn scan_numeric_escape(rdr: @mut StringReader, n_hex_digits: uint) -> char { let mut accum_int = 0; let mut i = n_hex_digits; + let start_bpos = rdr.last_pos; while i != 0u { let n = rdr.curr; - bump(rdr); if !is_hex_digit(n) { - rdr.fatal(fmt!("illegal numeric character escape: %d", n as int)); + fatal_span(rdr, rdr.last_pos, rdr.pos, + fmt!("illegal numeric character escape: %d", + n as int)); } + bump(rdr); accum_int *= 16; accum_int += hex_digit_val(n); i -= 1u; } match char::from_u32(accum_int as u32) { Some(x) => x, - None => rdr.fatal(fmt!("illegal numeric character escape")) + None => fatal_span(rdr, start_bpos, rdr.last_pos, + fmt!("illegal numeric character escape")) } } @@ -691,6 +721,7 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token { if c2 == '\\' { // '\X' for some X must be a character constant: let escaped = rdr.curr; + let escaped_pos = rdr.last_pos; bump(rdr); match escaped { 'n' => { c2 = '\n'; } @@ -704,12 +735,18 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token { 'u' => { c2 = scan_numeric_escape(rdr, 4u); } 'U' => { c2 = scan_numeric_escape(rdr, 8u); } c2 => { - rdr.fatal(fmt!("unknown character escape: %d", c2 as int)); + fatal_span(rdr, escaped_pos, rdr.last_pos, + fmt!("unknown character escape: %d", c2 as int)); } } } if rdr.curr != '\'' { - rdr.fatal(~"unterminated character constant"); + fatal_span(rdr, + // Byte offsetting here is okay because the character + // before position `start` is an ascii single quote. + start - BytePos(1u), + rdr.last_pos, + ~"unterminated character constant"); } bump(rdr); // advance curr past token return token::LIT_CHAR(c2 as u32); @@ -721,7 +758,9 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token { while rdr.curr != '"' { if is_eof(rdr) { do with_str_from(rdr, n) |s| { - rdr.fatal(fmt!("unterminated double quote string: %s", s)); + fatal_span(rdr, n, rdr.last_pos, + fmt!("unterminated double quote string: %s", + s)); } } @@ -730,6 +769,7 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token { match ch { '\\' => { let escaped = rdr.curr; + let escaped_pos = rdr.last_pos; bump(rdr); match escaped { 'n' => accum_str.push_char('\n'), @@ -750,7 +790,8 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token { accum_str.push_char(scan_numeric_escape(rdr, 8u)); } c2 => { - rdr.fatal(fmt!("unknown string escape: %d", c2 as int)); + fatal_span(rdr, escaped_pos, rdr.last_pos, + fmt!("unknown string escape: %d", c2 as int)); } } } @@ -786,11 +827,10 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token { '^' => { return binop(rdr, token::CARET); } '%' => { return binop(rdr, token::PERCENT); } c => { - // So the error span points to the unrecognized character - rdr.peek_span = codemap::mk_sp(rdr.last_pos, rdr.pos); let mut cs = ~""; char::escape_default(c, |c| cs.push_char(c)); - rdr.fatal(fmt!("unknown start of token: %s", cs)); + fatal_span(rdr, rdr.last_pos, rdr.pos, + fmt!("unknown start of token: %s", cs)); } } } diff --git a/src/test/compile-fail/lex-bad-fp-lit.rs b/src/test/compile-fail/lex-bad-fp-lit.rs new file mode 100644 index 00000000000..5a5e9d7d8f2 --- /dev/null +++ b/src/test/compile-fail/lex-bad-fp-lit.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static f: float = + 1e+ //~ ERROR: scan_exponent: bad fp literal +; diff --git a/src/test/compile-fail/lex-hex-float-lit.rs b/src/test/compile-fail/lex-hex-float-lit.rs new file mode 100644 index 00000000000..457c6126c44 --- /dev/null +++ b/src/test/compile-fail/lex-hex-float-lit.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static f: float = + 0x539.0 //~ ERROR: hexadecimal float literal is not supported +; diff --git a/src/test/compile-fail/lex-illegal-num-char-escape-2.rs b/src/test/compile-fail/lex-illegal-num-char-escape-2.rs new file mode 100644 index 00000000000..fe46cec776d --- /dev/null +++ b/src/test/compile-fail/lex-illegal-num-char-escape-2.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static c: char = + '\Uffffffff' //~ ERROR: illegal numeric character escape +; diff --git a/src/test/compile-fail/lex-illegal-num-char-escape.rs b/src/test/compile-fail/lex-illegal-num-char-escape.rs new file mode 100644 index 00000000000..2760371b8e2 --- /dev/null +++ b/src/test/compile-fail/lex-illegal-num-char-escape.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static c: char = + '\u539_' //~ ERROR: illegal numeric character escape +; diff --git a/src/test/compile-fail/lex-int-lit-too-large-2.rs b/src/test/compile-fail/lex-int-lit-too-large-2.rs new file mode 100644 index 00000000000..39d1cba64b0 --- /dev/null +++ b/src/test/compile-fail/lex-int-lit-too-large-2.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static i: int = + 99999999999999999999999999999999u32 //~ ERROR: int literal is too large +; diff --git a/src/test/compile-fail/lex-int-lit-too-large.rs b/src/test/compile-fail/lex-int-lit-too-large.rs new file mode 100644 index 00000000000..6343be651fa --- /dev/null +++ b/src/test/compile-fail/lex-int-lit-too-large.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static i: int = + 99999999999999999999999999999999 //~ ERROR: int literal is too large +; diff --git a/src/test/compile-fail/lex-no-valid-digits-2.rs b/src/test/compile-fail/lex-no-valid-digits-2.rs new file mode 100644 index 00000000000..549dbf5bc8c --- /dev/null +++ b/src/test/compile-fail/lex-no-valid-digits-2.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static i: int = + 0xu32 //~ ERROR: no valid digits +; diff --git a/src/test/compile-fail/lex-no-valid-digits.rs b/src/test/compile-fail/lex-no-valid-digits.rs new file mode 100644 index 00000000000..6a5b8e93f01 --- /dev/null +++ b/src/test/compile-fail/lex-no-valid-digits.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static i: int = + 0x //~ ERROR: no valid digits +; diff --git a/src/test/compile-fail/lex-unknown-char-escape.rs b/src/test/compile-fail/lex-unknown-char-escape.rs new file mode 100644 index 00000000000..f2445c2b60e --- /dev/null +++ b/src/test/compile-fail/lex-unknown-char-escape.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static c: char = + '\●' //~ ERROR: unknown character escape +; diff --git a/src/test/compile-fail/lex-unknown-start-tok.rs b/src/test/compile-fail/lex-unknown-start-tok.rs new file mode 100644 index 00000000000..1bb68230345 --- /dev/null +++ b/src/test/compile-fail/lex-unknown-start-tok.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +fn main() { + ● //~ ERROR: unknown start of token +} diff --git a/src/test/compile-fail/lex-unknown-str-escape.rs b/src/test/compile-fail/lex-unknown-str-escape.rs new file mode 100644 index 00000000000..f7809b02b0b --- /dev/null +++ b/src/test/compile-fail/lex-unknown-str-escape.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static s: &'static str = + "\●" //~ ERROR: unknown string escape +; diff --git a/src/test/compile-fail/lex-unterminated-char-const.rs b/src/test/compile-fail/lex-unterminated-char-const.rs new file mode 100644 index 00000000000..551360ff9e0 --- /dev/null +++ b/src/test/compile-fail/lex-unterminated-char-const.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static c: char = + '● //~ ERROR: unterminated character constant +;