diff --git a/src/libproc_macro/lib.rs b/src/libproc_macro/lib.rs index 798cc52cea0..472307682d8 100644 --- a/src/libproc_macro/lib.rs +++ b/src/libproc_macro/lib.rs @@ -55,9 +55,9 @@ use std::str::FromStr; use syntax::ast; use syntax::errors::DiagnosticBuilder; use syntax::parse::{self, token}; -use syntax::symbol::Symbol; +use syntax::symbol::{keywords, Symbol}; use syntax::tokenstream; -use syntax::parse::lexer::comments; +use syntax::parse::lexer::{self, comments}; use syntax_pos::{FileMap, Pos, SyntaxContext, FileName}; use syntax_pos::hygiene::Mark; @@ -700,16 +700,13 @@ impl fmt::Display for Group { /// Multicharacter operators like `+=` are represented as two instances of `Punct` with different /// forms of `Spacing` returned. /// -/// REVIEW We should guarantee that `Punct` contains a valid punctuation character permitted by -/// REVIEW the language and not a random unicode code point. The check is already performed in -/// REVIEW `TokenTree::to_internal`, but we should do it on construction. -/// REVIEW `Punct` can also avoid using `char` internally and keep an u8-like enum. -/// /// REVIEW ATTENTION: `Copy` impl on a struct with private fields. /// REVIEW Do we want to guarantee `Punct` to be `Copy`? #[unstable(feature = "proc_macro", issue = "38356")] #[derive(Copy, Clone, Debug)] pub struct Punct { + // REVIEW(INTERNAL) `Punct` can avoid using `char` internally and + // REVIEW(INTERNAL) can keep u8 or an u8-like enum. ch: char, spacing: Spacing, span: Span, @@ -733,17 +730,21 @@ pub enum Spacing { impl Punct { /// Creates a new `Punct` from the given character and spacing. + /// The `ch` argument must be a valid punctuation character permitted by the language, + /// otherwise the function will panic. /// /// The returned `Punct` will have the default span of `Span::call_site()` /// which can be further configured with the `set_span` method below. /// /// REVIEW Why we even use `char` here? There's no reason to use unicode here. /// REVIEW I guess because it's more convenient to write `new('+')` than `new(b'+')`, that's ok. - /// - /// REVIEW TO_DO Do input validation on construction, the argument should be a valid punctuation - /// REVIEW character permitted by the language. #[unstable(feature = "proc_macro", issue = "38356")] pub fn new(ch: char, spacing: Spacing) -> Punct { + const LEGAL_CHARS: &[char] = &['=', '<', '>', '!', '~', '+', '-', '*', '/', '%', + '^', '&', '|', '@', '.', ',', ';', ':', '#', '$', '?']; + if !LEGAL_CHARS.contains(&ch) { + panic!("unsupported character `{:?}`", ch) + } Punct { ch: ch, spacing: spacing, @@ -794,9 +795,6 @@ impl fmt::Display for Punct { /// An identifier (`ident`) or lifetime identifier (`'ident`). /// -/// REVIEW We should guarantee that `Ident` contains a valid identifier permitted by -/// REVIEW the language and not a random unicode string, at least for a start. -/// /// REVIEW ATTENTION: `Copy` impl on a struct with private fields. /// REVIEW Do we want to guarantee `Ident` to be `Copy`? #[derive(Copy, Clone, Debug)] @@ -816,6 +814,8 @@ impl !Sync for Ident {} impl Ident { /// Creates a new `Ident` with the given `string` as well as the specified /// `span`. + /// The `string` argument must be a valid identifier or lifetime identifier permitted by the + /// language, otherwise the function will panic. /// /// Note that `span`, currently in rustc, configures the hygiene information /// for this identifier. @@ -831,11 +831,11 @@ impl Ident { /// /// Due to the current importance of hygiene this constructor, unlike other /// tokens, requires a `Span` to be specified at construction. - /// - /// REVIEW TO_DO Do input validation, the argument should be a valid identifier or - /// REVIEW lifetime identifier. #[unstable(feature = "proc_macro", issue = "38356")] pub fn new(string: &str, span: Span) -> Ident { + if !lexer::is_valid_ident(string) { + panic!("`{:?}` is not a valid identifier", string) + } Ident { sym: Symbol::intern(string), span, @@ -847,6 +847,11 @@ impl Ident { #[unstable(feature = "proc_macro", issue = "38356")] pub fn new_raw(string: &str, span: Span) -> Ident { let mut ident = Ident::new(string, span); + if ident.sym == keywords::Underscore.name() || + token::is_path_segment_keyword(ast::Ident::with_empty_ctxt(ident.sym)) || + ident.sym.as_str().starts_with("\'") { + panic!("`{:?}` is not a valid raw identifier", string) + } ident.is_raw = true; ident } @@ -1365,7 +1370,7 @@ impl TokenTree { #[unstable(feature = "proc_macro_internals", issue = "27812")] #[doc(hidden)] pub mod __internal { - pub use quote::{LiteralKind, Quoter, unquote}; + pub use quote::{LiteralKind, SpannedSymbol, Quoter, unquote}; use std::cell::Cell; diff --git a/src/libproc_macro/quote.rs b/src/libproc_macro/quote.rs index 8fbd9b640c5..e77e1d01a01 100644 --- a/src/libproc_macro/quote.rs +++ b/src/libproc_macro/quote.rs @@ -18,6 +18,7 @@ use {Delimiter, Literal, Spacing, Span, Ident, Punct, Group, TokenStream, TokenT use syntax::ext::base::{ExtCtxt, ProcMacro}; use syntax::parse::token; +use syntax::symbol::Symbol; use syntax::tokenstream; pub struct Quoter; @@ -195,14 +196,32 @@ impl Quote for Span { macro_rules! literals { ($($i:ident),*; $($raw:ident),*) => { + pub struct SpannedSymbol { + sym: Symbol, + span: Span, + } + + impl SpannedSymbol { + pub fn new(string: &str, span: Span) -> SpannedSymbol { + SpannedSymbol { sym: Symbol::intern(string), span } + } + } + + impl Quote for SpannedSymbol { + fn quote(self) -> TokenStream { + quote!(::__internal::SpannedSymbol::new((quote self.sym.as_str()), + (quote self.span))) + } + } + pub enum LiteralKind { $($i,)* $($raw(u16),)* } impl LiteralKind { - pub fn with_contents_and_suffix(self, contents: Ident, suffix: Option) - -> Literal { + pub fn with_contents_and_suffix(self, contents: SpannedSymbol, + suffix: Option) -> Literal { let sym = contents.sym; let suffix = suffix.map(|t| t.sym); match self { @@ -225,13 +244,14 @@ macro_rules! literals { } impl Literal { - fn kind_contents_and_suffix(self) -> (LiteralKind, Ident, Option) { + fn kind_contents_and_suffix(self) -> (LiteralKind, SpannedSymbol, Option) + { let (kind, contents) = match self.lit { $(token::Lit::$i(contents) => (LiteralKind::$i, contents),)* $(token::Lit::$raw(contents, n) => (LiteralKind::$raw(n), contents),)* }; - let suffix = self.suffix.map(|sym| Ident::new(&sym.as_str(), self.span())); - (kind, Ident::new(&contents.as_str(), self.span()), suffix) + let suffix = self.suffix.map(|sym| SpannedSymbol::new(&sym.as_str(), self.span())); + (kind, SpannedSymbol::new(&contents.as_str(), self.span()), suffix) } } diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 22a0261d8c6..4da7b8e93d9 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -1770,6 +1770,15 @@ fn ident_continue(c: Option) -> bool { (c > '\x7f' && c.is_xid_continue()) } +// The string is a valid identifier or a lifetime identifier. +pub fn is_valid_ident(s: &str) -> bool { + let mut chars = s.chars(); + match chars.next() { + Some('\'') => ident_start(chars.next()) && chars.all(|ch| ident_continue(Some(ch))), + ch => ident_start(ch) && chars.all(|ch| ident_continue(Some(ch))) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/test/ui-fulldeps/auxiliary/invalid-punct-ident.rs b/src/test/ui-fulldeps/auxiliary/invalid-punct-ident.rs new file mode 100644 index 00000000000..6bdfe5f86aa --- /dev/null +++ b/src/test/ui-fulldeps/auxiliary/invalid-punct-ident.rs @@ -0,0 +1,38 @@ +// Copyright 2017 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// force-host +// no-prefer-dynamic + +#![feature(proc_macro)] +#![crate_type = "proc-macro"] + +extern crate proc_macro; +use proc_macro::*; + +#[proc_macro] +pub fn invalid_punct(_: TokenStream) -> TokenStream { + TokenTree::from(Punct::new('`', Spacing::Alone)).into() +} + +#[proc_macro] +pub fn invalid_ident(_: TokenStream) -> TokenStream { + TokenTree::from(Ident::new("*", Span::call_site())).into() +} + +#[proc_macro] +pub fn invalid_raw_ident(_: TokenStream) -> TokenStream { + TokenTree::from(Ident::new_raw("self", Span::call_site())).into() +} + +#[proc_macro] +pub fn lexer_failure(_: TokenStream) -> TokenStream { + "a b ) c".parse().expect("parsing failed without panic") +} diff --git a/src/test/ui-fulldeps/invalid-punct-ident-1.rs b/src/test/ui-fulldeps/invalid-punct-ident-1.rs new file mode 100644 index 00000000000..95397f490c2 --- /dev/null +++ b/src/test/ui-fulldeps/invalid-punct-ident-1.rs @@ -0,0 +1,16 @@ +// Copyright 2018 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// aux-build:invalid-punct-ident.rs +#![feature(proc_macro)] +#[macro_use] +extern crate invalid_punct_ident; + +invalid_punct!(); //~ ERROR proc macro panicked diff --git a/src/test/ui-fulldeps/invalid-punct-ident-1.stderr b/src/test/ui-fulldeps/invalid-punct-ident-1.stderr new file mode 100644 index 00000000000..3b3619e2637 --- /dev/null +++ b/src/test/ui-fulldeps/invalid-punct-ident-1.stderr @@ -0,0 +1,10 @@ +error: proc macro panicked + --> $DIR/invalid-punct-ident-1.rs:16:1 + | +LL | invalid_punct!(); //~ ERROR proc macro panicked + | ^^^^^^^^^^^^^^^^^ + | + = help: message: unsupported character `'`'` + +error: aborting due to previous error + diff --git a/src/test/ui-fulldeps/invalid-punct-ident-2.rs b/src/test/ui-fulldeps/invalid-punct-ident-2.rs new file mode 100644 index 00000000000..2d9aa69f711 --- /dev/null +++ b/src/test/ui-fulldeps/invalid-punct-ident-2.rs @@ -0,0 +1,16 @@ +// Copyright 2018 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// aux-build:invalid-punct-ident.rs +#![feature(proc_macro)] +#[macro_use] +extern crate invalid_punct_ident; + +invalid_ident!(); //~ ERROR proc macro panicked diff --git a/src/test/ui-fulldeps/invalid-punct-ident-2.stderr b/src/test/ui-fulldeps/invalid-punct-ident-2.stderr new file mode 100644 index 00000000000..869c0908bb5 --- /dev/null +++ b/src/test/ui-fulldeps/invalid-punct-ident-2.stderr @@ -0,0 +1,10 @@ +error: proc macro panicked + --> $DIR/invalid-punct-ident-2.rs:16:1 + | +LL | invalid_ident!(); //~ ERROR proc macro panicked + | ^^^^^^^^^^^^^^^^^ + | + = help: message: `"*"` is not a valid identifier + +error: aborting due to previous error + diff --git a/src/test/ui-fulldeps/invalid-punct-ident-3.rs b/src/test/ui-fulldeps/invalid-punct-ident-3.rs new file mode 100644 index 00000000000..3f8b7a32c80 --- /dev/null +++ b/src/test/ui-fulldeps/invalid-punct-ident-3.rs @@ -0,0 +1,16 @@ +// Copyright 2018 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// aux-build:invalid-punct-ident.rs +#![feature(proc_macro)] +#[macro_use] +extern crate invalid_punct_ident; + +invalid_raw_ident!(); //~ ERROR proc macro panicked diff --git a/src/test/ui-fulldeps/invalid-punct-ident-3.stderr b/src/test/ui-fulldeps/invalid-punct-ident-3.stderr new file mode 100644 index 00000000000..716f6ffa098 --- /dev/null +++ b/src/test/ui-fulldeps/invalid-punct-ident-3.stderr @@ -0,0 +1,10 @@ +error: proc macro panicked + --> $DIR/invalid-punct-ident-3.rs:16:1 + | +LL | invalid_raw_ident!(); //~ ERROR proc macro panicked + | ^^^^^^^^^^^^^^^^^^^^^ + | + = help: message: `"self"` is not a valid raw identifier + +error: aborting due to previous error + diff --git a/src/test/ui-fulldeps/invalid-punct-ident-4.rs b/src/test/ui-fulldeps/invalid-punct-ident-4.rs new file mode 100644 index 00000000000..14b8f658336 --- /dev/null +++ b/src/test/ui-fulldeps/invalid-punct-ident-4.rs @@ -0,0 +1,17 @@ +// Copyright 2018 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// aux-build:invalid-punct-ident.rs +#![feature(proc_macro)] +#[macro_use] +extern crate invalid_punct_ident; + +lexer_failure!(); //~ ERROR proc macro panicked + //~| ERROR unexpected close delimiter: `)` diff --git a/src/test/ui-fulldeps/invalid-punct-ident-4.stderr b/src/test/ui-fulldeps/invalid-punct-ident-4.stderr new file mode 100644 index 00000000000..4493e37eeb2 --- /dev/null +++ b/src/test/ui-fulldeps/invalid-punct-ident-4.stderr @@ -0,0 +1,14 @@ +error: unexpected close delimiter: `)` + --> $DIR/invalid-punct-ident-4.rs:16:1 + | +LL | lexer_failure!(); //~ ERROR proc macro panicked + | ^^^^^^^^^^^^^^^^^ + +error: proc macro panicked + --> $DIR/invalid-punct-ident-4.rs:16:1 + | +LL | lexer_failure!(); //~ ERROR proc macro panicked + | ^^^^^^^^^^^^^^^^^ + +error: aborting due to 2 previous errors +