From be437132b8c53520598131bf542020966099352b Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 14 Jan 2015 23:51:51 +0100 Subject: [PATCH 1/3] Add proper XID_Start and XID_Continue rules and use CharPos for span comparison, closes #15679 --- src/grammar/RustLexer.g4 | 8 +- src/grammar/verify.rs | 11 +- src/grammar/xidcontinue.g4 | 375 +++++++++++++++++++++++++++++++++++++ src/grammar/xidstart.g4 | 289 ++++++++++++++++++++++++++++ 4 files changed, 676 insertions(+), 7 deletions(-) create mode 100644 src/grammar/xidcontinue.g4 create mode 100644 src/grammar/xidstart.g4 diff --git a/src/grammar/RustLexer.g4 b/src/grammar/RustLexer.g4 index 7d071d5e724..6578f79f92b 100644 --- a/src/grammar/RustLexer.g4 +++ b/src/grammar/RustLexer.g4 @@ -11,11 +11,7 @@ tokens { COMMENT } -/* Note: due to antlr limitations, we can't represent XID_start and - * XID_continue properly. ASCII-only substitute. */ - -fragment XID_start : [_a-zA-Z] ; -fragment XID_continue : [_a-zA-Z0-9] ; +import xidstart , xidcontinue; /* Expression-operator symbols */ @@ -197,7 +193,7 @@ LIT_STR_RAW QUESTION : '?'; -IDENT : XID_start XID_continue* ; +IDENT : XID_Start XID_Continue* ; fragment QUESTION_IDENTIFIER : QUESTION? IDENT; diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs index 75b56f54ccc..cf408c91609 100644 --- a/src/grammar/verify.rs +++ b/src/grammar/verify.rs @@ -25,6 +25,7 @@ use rustc::session::{self, config}; use syntax::ast; use syntax::ast::Name; +use syntax::codemap::Pos; use syntax::parse::token; use syntax::parse::lexer::TokenAndSpan; @@ -234,6 +235,13 @@ fn tok_cmp(a: &token::Token, b: &token::Token) -> bool { } } +fn span_cmp(rust_sp: syntax::codemap::Span, antlr_sp: syntax::codemap::Span, cm: &syntax::codemap::CodeMap) -> bool { + println!("{} {}", cm.bytepos_to_file_charpos(rust_sp.lo).to_uint(), cm.bytepos_to_file_charpos(rust_sp.hi).to_uint()); + antlr_sp.lo.to_uint() == cm.bytepos_to_file_charpos(rust_sp.lo).to_uint() && + antlr_sp.hi.to_uint() == cm.bytepos_to_file_charpos(rust_sp.hi).to_uint() && + antlr_sp.expn_id == rust_sp.expn_id +} + fn main() { fn next(r: &mut lexer::StringReader) -> TokenAndSpan { use syntax::parse::lexer::Reader; @@ -259,6 +267,7 @@ fn main() { code, String::from_str("")); let mut lexer = lexer::StringReader::new(session.diagnostic(), filemap); + let ref cm = lexer.span_diagnostic.cm; for antlr_tok in antlr_tokens { let rustc_tok = next(&mut lexer); @@ -266,7 +275,7 @@ fn main() { continue } - assert!(rustc_tok.sp == antlr_tok.sp, "{:?} and {:?} have different spans", rustc_tok, + assert!(span_cmp(rustc_tok.sp, antlr_tok.sp, cm), "{:?} and {:?} have different spans", rustc_tok, antlr_tok); macro_rules! matches { diff --git a/src/grammar/xidcontinue.g4 b/src/grammar/xidcontinue.g4 new file mode 100644 index 00000000000..6000648f5fb --- /dev/null +++ b/src/grammar/xidcontinue.g4 @@ -0,0 +1,375 @@ +lexer grammar Xidcontinue; + +fragment XID_Continue: + '\u0030' .. '\u0039' + | '\u0041' .. '\u005a' + | '\u005f' + | '\u0061' .. '\u007a' + | '\u00aa' + | '\u00b5' + | '\u00b7' + | '\u00ba' + | '\u00c0' .. '\u00d6' + | '\u00d8' .. '\u00f6' + | '\u00f8' .. '\u0236' + | '\u0250' .. '\u02c1' + | '\u02c6' .. '\u02d1' + | '\u02e0' .. '\u02e4' + | '\u02ee' + | '\u0300' .. '\u0357' + | '\u035d' .. '\u036f' + | '\u0386' + | '\u0388' .. '\u038a' + | '\u038c' + | '\u038e' .. '\u03a1' + | '\u03a3' .. '\u03ce' + | '\u03d0' .. '\u03f5' + | '\u03f7' .. '\u03fb' + | '\u0400' .. '\u0481' + | '\u0483' .. '\u0486' + | '\u048a' .. '\u04ce' + | '\u04d0' .. '\u04f5' + | '\u04f8' .. '\u04f9' + | '\u0500' .. '\u050f' + | '\u0531' .. '\u0556' + | '\u0559' + | '\u0561' .. '\u0587' + | '\u0591' .. '\u05a1' + | '\u05a3' .. '\u05b9' + | '\u05bb' .. '\u05bd' + | '\u05bf' + | '\u05c1' .. '\u05c2' + | '\u05c4' + | '\u05d0' .. '\u05ea' + | '\u05f0' .. '\u05f2' + | '\u0610' .. '\u0615' + | '\u0621' .. '\u063a' + | '\u0640' .. '\u0658' + | '\u0660' .. '\u0669' + | '\u066e' .. '\u06d3' + | '\u06d5' .. '\u06dc' + | '\u06df' .. '\u06e8' + | '\u06ea' .. '\u06fc' + | '\u06ff' + | '\u0710' .. '\u074a' + | '\u074d' .. '\u074f' + | '\u0780' .. '\u07b1' + | '\u0901' .. '\u0939' + | '\u093c' .. '\u094d' + | '\u0950' .. '\u0954' + | '\u0958' .. '\u0963' + | '\u0966' .. '\u096f' + | '\u0981' .. '\u0983' + | '\u0985' .. '\u098c' + | '\u098f' .. '\u0990' + | '\u0993' .. '\u09a8' + | '\u09aa' .. '\u09b0' + | '\u09b2' + | '\u09b6' .. '\u09b9' + | '\u09bc' .. '\u09c4' + | '\u09c7' .. '\u09c8' + | '\u09cb' .. '\u09cd' + | '\u09d7' + | '\u09dc' .. '\u09dd' + | '\u09df' .. '\u09e3' + | '\u09e6' .. '\u09f1' + | '\u0a01' .. '\u0a03' + | '\u0a05' .. '\u0a0a' + | '\u0a0f' .. '\u0a10' + | '\u0a13' .. '\u0a28' + | '\u0a2a' .. '\u0a30' + | '\u0a32' .. '\u0a33' + | '\u0a35' .. '\u0a36' + | '\u0a38' .. '\u0a39' + | '\u0a3c' + | '\u0a3e' .. '\u0a42' + | '\u0a47' .. '\u0a48' + | '\u0a4b' .. '\u0a4d' + | '\u0a59' .. '\u0a5c' + | '\u0a5e' + | '\u0a66' .. '\u0a74' + | '\u0a81' .. '\u0a83' + | '\u0a85' .. '\u0a8d' + | '\u0a8f' .. '\u0a91' + | '\u0a93' .. '\u0aa8' + | '\u0aaa' .. '\u0ab0' + | '\u0ab2' .. '\u0ab3' + | '\u0ab5' .. '\u0ab9' + | '\u0abc' .. '\u0ac5' + | '\u0ac7' .. '\u0ac9' + | '\u0acb' .. '\u0acd' + | '\u0ad0' + | '\u0ae0' .. '\u0ae3' + | '\u0ae6' .. '\u0aef' + | '\u0b01' .. '\u0b03' + | '\u0b05' .. '\u0b0c' + | '\u0b0f' .. '\u0b10' + | '\u0b13' .. '\u0b28' + | '\u0b2a' .. '\u0b30' + | '\u0b32' .. '\u0b33' + | '\u0b35' .. '\u0b39' + | '\u0b3c' .. '\u0b43' + | '\u0b47' .. '\u0b48' + | '\u0b4b' .. '\u0b4d' + | '\u0b56' .. '\u0b57' + | '\u0b5c' .. '\u0b5d' + | '\u0b5f' .. '\u0b61' + | '\u0b66' .. '\u0b6f' + | '\u0b71' + | '\u0b82' .. '\u0b83' + | '\u0b85' .. '\u0b8a' + | '\u0b8e' .. '\u0b90' + | '\u0b92' .. '\u0b95' + | '\u0b99' .. '\u0b9a' + | '\u0b9c' + | '\u0b9e' .. '\u0b9f' + | '\u0ba3' .. '\u0ba4' + | '\u0ba8' .. '\u0baa' + | '\u0bae' .. '\u0bb5' + | '\u0bb7' .. '\u0bb9' + | '\u0bbe' .. '\u0bc2' + | '\u0bc6' .. '\u0bc8' + | '\u0bca' .. '\u0bcd' + | '\u0bd7' + | '\u0be7' .. '\u0bef' + | '\u0c01' .. '\u0c03' + | '\u0c05' .. '\u0c0c' + | '\u0c0e' .. '\u0c10' + | '\u0c12' .. '\u0c28' + | '\u0c2a' .. '\u0c33' + | '\u0c35' .. '\u0c39' + | '\u0c3e' .. '\u0c44' + | '\u0c46' .. '\u0c48' + | '\u0c4a' .. '\u0c4d' + | '\u0c55' .. '\u0c56' + | '\u0c60' .. '\u0c61' + | '\u0c66' .. '\u0c6f' + | '\u0c82' .. '\u0c83' + | '\u0c85' .. '\u0c8c' + | '\u0c8e' .. '\u0c90' + | '\u0c92' .. '\u0ca8' + | '\u0caa' .. '\u0cb3' + | '\u0cb5' .. '\u0cb9' + | '\u0cbc' .. '\u0cc4' + | '\u0cc6' .. '\u0cc8' + | '\u0cca' .. '\u0ccd' + | '\u0cd5' .. '\u0cd6' + | '\u0cde' + | '\u0ce0' .. '\u0ce1' + | '\u0ce6' .. '\u0cef' + | '\u0d02' .. '\u0d03' + | '\u0d05' .. '\u0d0c' + | '\u0d0e' .. '\u0d10' + | '\u0d12' .. '\u0d28' + | '\u0d2a' .. '\u0d39' + | '\u0d3e' .. '\u0d43' + | '\u0d46' .. '\u0d48' + | '\u0d4a' .. '\u0d4d' + | '\u0d57' + | '\u0d60' .. '\u0d61' + | '\u0d66' .. '\u0d6f' + | '\u0d82' .. '\u0d83' + | '\u0d85' .. '\u0d96' + | '\u0d9a' .. '\u0db1' + | '\u0db3' .. '\u0dbb' + | '\u0dbd' + | '\u0dc0' .. '\u0dc6' + | '\u0dca' + | '\u0dcf' .. '\u0dd4' + | '\u0dd6' + | '\u0dd8' .. '\u0ddf' + | '\u0df2' .. '\u0df3' + | '\u0e01' .. '\u0e3a' + | '\u0e40' .. '\u0e4e' + | '\u0e50' .. '\u0e59' + | '\u0e81' .. '\u0e82' + | '\u0e84' + | '\u0e87' .. '\u0e88' + | '\u0e8a' + | '\u0e8d' + | '\u0e94' .. '\u0e97' + | '\u0e99' .. '\u0e9f' + | '\u0ea1' .. '\u0ea3' + | '\u0ea5' + | '\u0ea7' + | '\u0eaa' .. '\u0eab' + | '\u0ead' .. '\u0eb9' + | '\u0ebb' .. '\u0ebd' + | '\u0ec0' .. '\u0ec4' + | '\u0ec6' + | '\u0ec8' .. '\u0ecd' + | '\u0ed0' .. '\u0ed9' + | '\u0edc' .. '\u0edd' + | '\u0f00' + | '\u0f18' .. '\u0f19' + | '\u0f20' .. '\u0f29' + | '\u0f35' + | '\u0f37' + | '\u0f39' + | '\u0f3e' .. '\u0f47' + | '\u0f49' .. '\u0f6a' + | '\u0f71' .. '\u0f84' + | '\u0f86' .. '\u0f8b' + | '\u0f90' .. '\u0f97' + | '\u0f99' .. '\u0fbc' + | '\u0fc6' + | '\u1000' .. '\u1021' + | '\u1023' .. '\u1027' + | '\u1029' .. '\u102a' + | '\u102c' .. '\u1032' + | '\u1036' .. '\u1039' + | '\u1040' .. '\u1049' + | '\u1050' .. '\u1059' + | '\u10a0' .. '\u10c5' + | '\u10d0' .. '\u10f8' + | '\u1100' .. '\u1159' + | '\u115f' .. '\u11a2' + | '\u11a8' .. '\u11f9' + | '\u1200' .. '\u1206' + | '\u1208' .. '\u1246' + | '\u1248' + | '\u124a' .. '\u124d' + | '\u1250' .. '\u1256' + | '\u1258' + | '\u125a' .. '\u125d' + | '\u1260' .. '\u1286' + | '\u1288' + | '\u128a' .. '\u128d' + | '\u1290' .. '\u12ae' + | '\u12b0' + | '\u12b2' .. '\u12b5' + | '\u12b8' .. '\u12be' + | '\u12c0' + | '\u12c2' .. '\u12c5' + | '\u12c8' .. '\u12ce' + | '\u12d0' .. '\u12d6' + | '\u12d8' .. '\u12ee' + | '\u12f0' .. '\u130e' + | '\u1310' + | '\u1312' .. '\u1315' + | '\u1318' .. '\u131e' + | '\u1320' .. '\u1346' + | '\u1348' .. '\u135a' + | '\u1369' .. '\u1371' + | '\u13a0' .. '\u13f4' + | '\u1401' .. '\u166c' + | '\u166f' .. '\u1676' + | '\u1681' .. '\u169a' + | '\u16a0' .. '\u16ea' + | '\u16ee' .. '\u16f0' + | '\u1700' .. '\u170c' + | '\u170e' .. '\u1714' + | '\u1720' .. '\u1734' + | '\u1740' .. '\u1753' + | '\u1760' .. '\u176c' + | '\u176e' .. '\u1770' + | '\u1772' .. '\u1773' + | '\u1780' .. '\u17b3' + | '\u17b6' .. '\u17d3' + | '\u17d7' + | '\u17dc' .. '\u17dd' + | '\u17e0' .. '\u17e9' + | '\u180b' .. '\u180d' + | '\u1810' .. '\u1819' + | '\u1820' .. '\u1877' + | '\u1880' .. '\u18a9' + | '\u1900' .. '\u191c' + | '\u1920' .. '\u192b' + | '\u1930' .. '\u193b' + | '\u1946' .. '\u196d' + | '\u1970' .. '\u1974' + | '\u1d00' .. '\u1d6b' + | '\u1e00' .. '\u1e9b' + | '\u1ea0' .. '\u1ef9' + | '\u1f00' .. '\u1f15' + | '\u1f18' .. '\u1f1d' + | '\u1f20' .. '\u1f45' + | '\u1f48' .. '\u1f4d' + | '\u1f50' .. '\u1f57' + | '\u1f59' + | '\u1f5b' + | '\u1f5d' + | '\u1f5f' .. '\u1f7d' + | '\u1f80' .. '\u1fb4' + | '\u1fb6' .. '\u1fbc' + | '\u1fbe' + | '\u1fc2' .. '\u1fc4' + | '\u1fc6' .. '\u1fcc' + | '\u1fd0' .. '\u1fd3' + | '\u1fd6' .. '\u1fdb' + | '\u1fe0' .. '\u1fec' + | '\u1ff2' .. '\u1ff4' + | '\u1ff6' .. '\u1ffc' + | '\u203f' .. '\u2040' + | '\u2054' + | '\u2071' + | '\u207f' + | '\u20d0' .. '\u20dc' + | '\u20e1' + | '\u20e5' .. '\u20ea' + | '\u2102' + | '\u2107' + | '\u210a' .. '\u2113' + | '\u2115' + | '\u2118' .. '\u211d' + | '\u2124' + | '\u2126' + | '\u2128' + | '\u212a' .. '\u2131' + | '\u2133' .. '\u2139' + | '\u213d' .. '\u213f' + | '\u2145' .. '\u2149' + | '\u2160' .. '\u2183' + | '\u3005' .. '\u3007' + | '\u3021' .. '\u302f' + | '\u3031' .. '\u3035' + | '\u3038' .. '\u303c' + | '\u3041' .. '\u3096' + | '\u3099' .. '\u309a' + | '\u309d' .. '\u309f' + | '\u30a1' .. '\u30ff' + | '\u3105' .. '\u312c' + | '\u3131' .. '\u318e' + | '\u31a0' .. '\u31b7' + | '\u31f0' .. '\u31ff' + | '\u3400' .. '\u4db5' + | '\u4e00' .. '\u9fa5' + | '\ua000' .. '\ua48c' + | '\uac00' .. '\ud7a3' + | '\uf900' .. '\ufa2d' + | '\ufa30' .. '\ufa6a' + | '\ufb00' .. '\ufb06' + | '\ufb13' .. '\ufb17' + | '\ufb1d' .. '\ufb28' + | '\ufb2a' .. '\ufb36' + | '\ufb38' .. '\ufb3c' + | '\ufb3e' + | '\ufb40' .. '\ufb41' + | '\ufb43' .. '\ufb44' + | '\ufb46' .. '\ufbb1' + | '\ufbd3' .. '\ufc5d' + | '\ufc64' .. '\ufd3d' + | '\ufd50' .. '\ufd8f' + | '\ufd92' .. '\ufdc7' + | '\ufdf0' .. '\ufdf9' + | '\ufe00' .. '\ufe0f' + | '\ufe20' .. '\ufe23' + | '\ufe33' .. '\ufe34' + | '\ufe4d' .. '\ufe4f' + | '\ufe71' + | '\ufe73' + | '\ufe77' + | '\ufe79' + | '\ufe7b' + | '\ufe7d' + | '\ufe7f' .. '\ufefc' + | '\uff10' .. '\uff19' + | '\uff21' .. '\uff3a' + | '\uff3f' + | '\uff41' .. '\uff5a' + | '\uff65' .. '\uffbe' + | '\uffc2' .. '\uffc7' + | '\uffca' .. '\uffcf' + | '\uffd2' .. '\uffd7' + | '\uffda' .. '\uffdc' + ; diff --git a/src/grammar/xidstart.g4 b/src/grammar/xidstart.g4 new file mode 100644 index 00000000000..d02774c6135 --- /dev/null +++ b/src/grammar/xidstart.g4 @@ -0,0 +1,289 @@ +lexer grammar Xidstart; + +fragment XID_Start : + '\u0041' .. '\u005a' + | '_' + | '\u0061' .. '\u007a' + | '\u00aa' + | '\u00b5' + | '\u00ba' + | '\u00c0' .. '\u00d6' + | '\u00d8' .. '\u00f6' + | '\u00f8' .. '\u0236' + | '\u0250' .. '\u02c1' + | '\u02c6' .. '\u02d1' + | '\u02e0' .. '\u02e4' + | '\u02ee' + | '\u0386' + | '\u0388' .. '\u038a' + | '\u038c' + | '\u038e' .. '\u03a1' + | '\u03a3' .. '\u03ce' + | '\u03d0' .. '\u03f5' + | '\u03f7' .. '\u03fb' + | '\u0400' .. '\u0481' + | '\u048a' .. '\u04ce' + | '\u04d0' .. '\u04f5' + | '\u04f8' .. '\u04f9' + | '\u0500' .. '\u050f' + | '\u0531' .. '\u0556' + | '\u0559' + | '\u0561' .. '\u0587' + | '\u05d0' .. '\u05ea' + | '\u05f0' .. '\u05f2' + | '\u0621' .. '\u063a' + | '\u0640' .. '\u064a' + | '\u066e' .. '\u066f' + | '\u0671' .. '\u06d3' + | '\u06d5' + | '\u06e5' .. '\u06e6' + | '\u06ee' .. '\u06ef' + | '\u06fa' .. '\u06fc' + | '\u06ff' + | '\u0710' + | '\u0712' .. '\u072f' + | '\u074d' .. '\u074f' + | '\u0780' .. '\u07a5' + | '\u07b1' + | '\u0904' .. '\u0939' + | '\u093d' + | '\u0950' + | '\u0958' .. '\u0961' + | '\u0985' .. '\u098c' + | '\u098f' .. '\u0990' + | '\u0993' .. '\u09a8' + | '\u09aa' .. '\u09b0' + | '\u09b2' + | '\u09b6' .. '\u09b9' + | '\u09bd' + | '\u09dc' .. '\u09dd' + | '\u09df' .. '\u09e1' + | '\u09f0' .. '\u09f1' + | '\u0a05' .. '\u0a0a' + | '\u0a0f' .. '\u0a10' + | '\u0a13' .. '\u0a28' + | '\u0a2a' .. '\u0a30' + | '\u0a32' .. '\u0a33' + | '\u0a35' .. '\u0a36' + | '\u0a38' .. '\u0a39' + | '\u0a59' .. '\u0a5c' + | '\u0a5e' + | '\u0a72' .. '\u0a74' + | '\u0a85' .. '\u0a8d' + | '\u0a8f' .. '\u0a91' + | '\u0a93' .. '\u0aa8' + | '\u0aaa' .. '\u0ab0' + | '\u0ab2' .. '\u0ab3' + | '\u0ab5' .. '\u0ab9' + | '\u0abd' + | '\u0ad0' + | '\u0ae0' .. '\u0ae1' + | '\u0b05' .. '\u0b0c' + | '\u0b0f' .. '\u0b10' + | '\u0b13' .. '\u0b28' + | '\u0b2a' .. '\u0b30' + | '\u0b32' .. '\u0b33' + | '\u0b35' .. '\u0b39' + | '\u0b3d' + | '\u0b5c' .. '\u0b5d' + | '\u0b5f' .. '\u0b61' + | '\u0b71' + | '\u0b83' + | '\u0b85' .. '\u0b8a' + | '\u0b8e' .. '\u0b90' + | '\u0b92' .. '\u0b95' + | '\u0b99' .. '\u0b9a' + | '\u0b9c' + | '\u0b9e' .. '\u0b9f' + | '\u0ba3' .. '\u0ba4' + | '\u0ba8' .. '\u0baa' + | '\u0bae' .. '\u0bb5' + | '\u0bb7' .. '\u0bb9' + | '\u0c05' .. '\u0c0c' + | '\u0c0e' .. '\u0c10' + | '\u0c12' .. '\u0c28' + | '\u0c2a' .. '\u0c33' + | '\u0c35' .. '\u0c39' + | '\u0c60' .. '\u0c61' + | '\u0c85' .. '\u0c8c' + | '\u0c8e' .. '\u0c90' + | '\u0c92' .. '\u0ca8' + | '\u0caa' .. '\u0cb3' + | '\u0cb5' .. '\u0cb9' + | '\u0cbd' + | '\u0cde' + | '\u0ce0' .. '\u0ce1' + | '\u0d05' .. '\u0d0c' + | '\u0d0e' .. '\u0d10' + | '\u0d12' .. '\u0d28' + | '\u0d2a' .. '\u0d39' + | '\u0d60' .. '\u0d61' + | '\u0d85' .. '\u0d96' + | '\u0d9a' .. '\u0db1' + | '\u0db3' .. '\u0dbb' + | '\u0dbd' + | '\u0dc0' .. '\u0dc6' + | '\u0e01' .. '\u0e30' + | '\u0e32' + | '\u0e40' .. '\u0e46' + | '\u0e81' .. '\u0e82' + | '\u0e84' + | '\u0e87' .. '\u0e88' + | '\u0e8a' + | '\u0e8d' + | '\u0e94' .. '\u0e97' + | '\u0e99' .. '\u0e9f' + | '\u0ea1' .. '\u0ea3' + | '\u0ea5' + | '\u0ea7' + | '\u0eaa' .. '\u0eab' + | '\u0ead' .. '\u0eb0' + | '\u0eb2' + | '\u0ebd' + | '\u0ec0' .. '\u0ec4' + | '\u0ec6' + | '\u0edc' .. '\u0edd' + | '\u0f00' + | '\u0f40' .. '\u0f47' + | '\u0f49' .. '\u0f6a' + | '\u0f88' .. '\u0f8b' + | '\u1000' .. '\u1021' + | '\u1023' .. '\u1027' + | '\u1029' .. '\u102a' + | '\u1050' .. '\u1055' + | '\u10a0' .. '\u10c5' + | '\u10d0' .. '\u10f8' + | '\u1100' .. '\u1159' + | '\u115f' .. '\u11a2' + | '\u11a8' .. '\u11f9' + | '\u1200' .. '\u1206' + | '\u1208' .. '\u1246' + | '\u1248' + | '\u124a' .. '\u124d' + | '\u1250' .. '\u1256' + | '\u1258' + | '\u125a' .. '\u125d' + | '\u1260' .. '\u1286' + | '\u1288' + | '\u128a' .. '\u128d' + | '\u1290' .. '\u12ae' + | '\u12b0' + | '\u12b2' .. '\u12b5' + | '\u12b8' .. '\u12be' + | '\u12c0' + | '\u12c2' .. '\u12c5' + | '\u12c8' .. '\u12ce' + | '\u12d0' .. '\u12d6' + | '\u12d8' .. '\u12ee' + | '\u12f0' .. '\u130e' + | '\u1310' + | '\u1312' .. '\u1315' + | '\u1318' .. '\u131e' + | '\u1320' .. '\u1346' + | '\u1348' .. '\u135a' + | '\u13a0' .. '\u13f4' + | '\u1401' .. '\u166c' + | '\u166f' .. '\u1676' + | '\u1681' .. '\u169a' + | '\u16a0' .. '\u16ea' + | '\u16ee' .. '\u16f0' + | '\u1700' .. '\u170c' + | '\u170e' .. '\u1711' + | '\u1720' .. '\u1731' + | '\u1740' .. '\u1751' + | '\u1760' .. '\u176c' + | '\u176e' .. '\u1770' + | '\u1780' .. '\u17b3' + | '\u17d7' + | '\u17dc' + | '\u1820' .. '\u1877' + | '\u1880' .. '\u18a8' + | '\u1900' .. '\u191c' + | '\u1950' .. '\u196d' + | '\u1970' .. '\u1974' + | '\u1d00' .. '\u1d6b' + | '\u1e00' .. '\u1e9b' + | '\u1ea0' .. '\u1ef9' + | '\u1f00' .. '\u1f15' + | '\u1f18' .. '\u1f1d' + | '\u1f20' .. '\u1f45' + | '\u1f48' .. '\u1f4d' + | '\u1f50' .. '\u1f57' + | '\u1f59' + | '\u1f5b' + | '\u1f5d' + | '\u1f5f' .. '\u1f7d' + | '\u1f80' .. '\u1fb4' + | '\u1fb6' .. '\u1fbc' + | '\u1fbe' + | '\u1fc2' .. '\u1fc4' + | '\u1fc6' .. '\u1fcc' + | '\u1fd0' .. '\u1fd3' + | '\u1fd6' .. '\u1fdb' + | '\u1fe0' .. '\u1fec' + | '\u1ff2' .. '\u1ff4' + | '\u1ff6' .. '\u1ffc' + | '\u2071' + | '\u207f' + | '\u2102' + | '\u2107' + | '\u210a' .. '\u2113' + | '\u2115' + | '\u2118' .. '\u211d' + | '\u2124' + | '\u2126' + | '\u2128' + | '\u212a' .. '\u2131' + | '\u2133' .. '\u2139' + | '\u213d' .. '\u213f' + | '\u2145' .. '\u2149' + | '\u2160' .. '\u2183' + | '\u3005' .. '\u3007' + | '\u3021' .. '\u3029' + | '\u3031' .. '\u3035' + | '\u3038' .. '\u303c' + | '\u3041' .. '\u3096' + | '\u309d' .. '\u309f' + | '\u30a1' .. '\u30fa' + | '\u30fc' .. '\u30ff' + | '\u3105' .. '\u312c' + | '\u3131' .. '\u318e' + | '\u31a0' .. '\u31b7' + | '\u31f0' .. '\u31ff' + | '\u3400' .. '\u4db5' + | '\u4e00' .. '\u9fa5' + | '\ua000' .. '\ua48c' + | '\uac00' .. '\ud7a3' + | '\uf900' .. '\ufa2d' + | '\ufa30' .. '\ufa6a' + | '\ufb00' .. '\ufb06' + | '\ufb13' .. '\ufb17' + | '\ufb1d' + | '\ufb1f' .. '\ufb28' + | '\ufb2a' .. '\ufb36' + | '\ufb38' .. '\ufb3c' + | '\ufb3e' + | '\ufb40' .. '\ufb41' + | '\ufb43' .. '\ufb44' + | '\ufb46' .. '\ufbb1' + | '\ufbd3' .. '\ufc5d' + | '\ufc64' .. '\ufd3d' + | '\ufd50' .. '\ufd8f' + | '\ufd92' .. '\ufdc7' + | '\ufdf0' .. '\ufdf9' + | '\ufe71' + | '\ufe73' + | '\ufe77' + | '\ufe79' + | '\ufe7b' + | '\ufe7d' + | '\ufe7f' .. '\ufefc' + | '\uff21' .. '\uff3a' + | '\uff41' .. '\uff5a' + | '\uff66' .. '\uff9d' + | '\uffa0' .. '\uffbe' + | '\uffc2' .. '\uffc7' + | '\uffca' .. '\uffcf' + | '\uffd2' .. '\uffd7' + | '\uffda' .. '\uffdc' + ; From e5e343aeb78a8fe7fed897eae1e79019383691e8 Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Sat, 17 Jan 2015 21:59:44 +0100 Subject: [PATCH 2/3] Finished unicode support in the model lexer. Completed XID_Start and XID_Continue rules --- src/grammar/RustLexer.g4 | 2 +- src/grammar/verify.rs | 59 ++++++++++++++++------- src/grammar/xidcontinue.g4 | 98 ++++++++++++++++++++++++++++++++++++++ src/grammar/xidstart.g4 | 90 ++++++++++++++++++++++++++++++++++ 4 files changed, 230 insertions(+), 19 deletions(-) diff --git a/src/grammar/RustLexer.g4 b/src/grammar/RustLexer.g4 index 6578f79f92b..8739d135b4f 100644 --- a/src/grammar/RustLexer.g4 +++ b/src/grammar/RustLexer.g4 @@ -93,7 +93,7 @@ fragment SUFFIX ; LIT_CHAR - : '\'' ( '\\' CHAR_ESCAPE | ~[\\'\n\t\r] ) '\'' SUFFIX? + : '\'' ( '\\' CHAR_ESCAPE | ~[\\'\n\t\r] | '\ud800' .. '\udbff' '\udc00' .. '\udfff' ) '\'' SUFFIX? ; LIT_BYTE diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs index cf408c91609..8bf501c7f3f 100644 --- a/src/grammar/verify.rs +++ b/src/grammar/verify.rs @@ -10,6 +10,8 @@ #![feature(plugin)] +#![allow(unstable)] + extern crate syntax; extern crate rustc; @@ -164,7 +166,8 @@ fn count(lit: &str) -> usize { lit.chars().take_while(|c| *c == '#').count() } -fn parse_antlr_token(s: &str, tokens: &HashMap) -> TokenAndSpan { +fn parse_antlr_token(s: &str, tokens: &HashMap, surrogate_pairs_pos: &[usize]) + -> TokenAndSpan { // old regex: // \[@(?P\d+),(?P\d+):(?P\d+)='(?P.+?)',<(?P-?\d+)>,\d+:\d+] let start = s.find_str("[@").unwrap(); @@ -213,9 +216,16 @@ fn parse_antlr_token(s: &str, tokens: &HashMap) -> TokenAn 0 }; + let mut lo = start.parse::().unwrap() - offset; + let mut hi = end.parse::().unwrap() + 1; + + // Adjust the span: For each surrogate pair already encountered, subtract one position. + lo -= surrogate_pairs_pos.binary_search(&(lo as usize)).unwrap_or_else(|x| x) as u32; + hi -= surrogate_pairs_pos.binary_search(&(hi as usize)).unwrap_or_else(|x| x) as u32; + let sp = syntax::codemap::Span { - lo: syntax::codemap::BytePos(start.parse::().unwrap() - offset), - hi: syntax::codemap::BytePos(end.parse::().unwrap() + 1), + lo: syntax::codemap::BytePos(lo), + hi: syntax::codemap::BytePos(hi), expn_id: syntax::codemap::NO_EXPANSION }; @@ -235,11 +245,10 @@ fn tok_cmp(a: &token::Token, b: &token::Token) -> bool { } } -fn span_cmp(rust_sp: syntax::codemap::Span, antlr_sp: syntax::codemap::Span, cm: &syntax::codemap::CodeMap) -> bool { - println!("{} {}", cm.bytepos_to_file_charpos(rust_sp.lo).to_uint(), cm.bytepos_to_file_charpos(rust_sp.hi).to_uint()); - antlr_sp.lo.to_uint() == cm.bytepos_to_file_charpos(rust_sp.lo).to_uint() && - antlr_sp.hi.to_uint() == cm.bytepos_to_file_charpos(rust_sp.hi).to_uint() && - antlr_sp.expn_id == rust_sp.expn_id +fn span_cmp(antlr_sp: syntax::codemap::Span, rust_sp: syntax::codemap::Span, cm: &syntax::codemap::CodeMap) -> bool { + antlr_sp.expn_id == rust_sp.expn_id && + antlr_sp.lo.to_uint() == cm.bytepos_to_file_charpos(rust_sp.lo).to_uint() && + antlr_sp.hi.to_uint() == cm.bytepos_to_file_charpos(rust_sp.hi).to_uint() } fn main() { @@ -250,16 +259,18 @@ fn main() { let args = std::os::args(); - let mut token_file = File::open(&Path::new(args[2])); - let token_map = parse_token_list(token_file.read_to_string().unwrap()); - - let mut stdin = std::io::stdin(); - let mut lock = stdin.lock(); - let lines = lock.lines(); - let mut antlr_tokens = lines.map(|l| parse_antlr_token(l.unwrap().trim(), - &token_map)); - + // Rust's lexer let code = File::open(&Path::new(args[1])).unwrap().read_to_string().unwrap(); + + let surrogate_pairs_pos: Vec = code.chars().enumerate() + .filter(|&(_, c)| c as usize > 0xFFFF) + .map(|(n, _)| n) + .enumerate() + .map(|(x, n)| x + n) + .collect(); + + debug!("Pairs: {:?}", surrogate_pairs_pos); + let options = config::basic_options(); let session = session::build_session(options, None, syntax::diagnostics::registry::Registry::new(&[])); @@ -269,13 +280,25 @@ fn main() { let mut lexer = lexer::StringReader::new(session.diagnostic(), filemap); let ref cm = lexer.span_diagnostic.cm; + // ANTLR + let mut token_file = File::open(&Path::new(args[2])); + let token_map = parse_token_list(token_file.read_to_string().unwrap()); + + let mut stdin = std::io::stdin(); + let mut lock = stdin.lock(); + let lines = lock.lines(); + let mut antlr_tokens = lines.map(|l| parse_antlr_token(l.unwrap().trim(), + &token_map, + &surrogate_pairs_pos[])); + for antlr_tok in antlr_tokens { let rustc_tok = next(&mut lexer); if rustc_tok.tok == token::Eof && antlr_tok.tok == token::Eof { continue } - assert!(span_cmp(rustc_tok.sp, antlr_tok.sp, cm), "{:?} and {:?} have different spans", rustc_tok, + assert!(span_cmp(antlr_tok.sp, rustc_tok.sp, cm), "{:?} and {:?} have different spans", + rustc_tok, antlr_tok); macro_rules! matches { diff --git a/src/grammar/xidcontinue.g4 b/src/grammar/xidcontinue.g4 index 6000648f5fb..f3a1a3b40f9 100644 --- a/src/grammar/xidcontinue.g4 +++ b/src/grammar/xidcontinue.g4 @@ -372,4 +372,102 @@ fragment XID_Continue: | '\uffca' .. '\uffcf' | '\uffd2' .. '\uffd7' | '\uffda' .. '\uffdc' + | '\ud800' '\udc00' .. '\udc0a' + | '\ud800' '\udc0d' .. '\udc25' + | '\ud800' '\udc28' .. '\udc39' + | '\ud800' '\udc3c' .. '\udc3c' + | '\ud800' '\udc3f' .. '\udc4c' + | '\ud800' '\udc50' .. '\udc5c' + | '\ud800' '\udc80' .. '\udcf9' + | '\ud800' '\udf00' .. '\udf1d' + | '\ud800' '\udf30' .. '\udf49' + | '\ud800' '\udf80' .. '\udf9c' + | '\ud801' '\ue000' .. '\ue09c' + | '\ud801' '\ue0a0' .. '\ue0a8' + | '\ud802' '\ue400' .. '\ue404' + | '\ud802' '\u0808' + | '\ud802' '\ue40a' .. '\ue434' + | '\ud802' '\ue437' .. '\ue437' + | '\ud802' '\u083c' + | '\ud802' '\u083f' + | '\ud834' '\uad65' .. '\uad68' + | '\ud834' '\uad6d' .. '\uad71' + | '\ud834' '\uad7b' .. '\uad81' + | '\ud834' '\uad85' .. '\uad8a' + | '\ud834' '\uadaa' .. '\uadac' + | '\ud835' '\ub000' .. '\ub053' + | '\ud835' '\ub056' .. '\ub09b' + | '\ud835' '\ub09e' .. '\ub09e' + | '\ud835' '\ud4a2' + | '\ud835' '\ub0a5' .. '\ub0a5' + | '\ud835' '\ub0a9' .. '\ub0ab' + | '\ud835' '\ub0ae' .. '\ub0b8' + | '\ud835' '\ud4bb' + | '\ud835' '\ub0bd' .. '\ub0c2' + | '\ud835' '\ub0c5' .. '\ub104' + | '\ud835' '\ub107' .. '\ub109' + | '\ud835' '\ub10d' .. '\ub113' + | '\ud835' '\ub116' .. '\ub11b' + | '\ud835' '\ub11e' .. '\ub138' + | '\ud835' '\ub13b' .. '\ub13d' + | '\ud835' '\ub140' .. '\ub143' + | '\ud835' '\ud546' + | '\ud835' '\ub14a' .. '\ub14f' + | '\ud835' '\ub152' .. '\ub2a2' + | '\ud835' '\ub2a8' .. '\ub2bf' + | '\ud835' '\ub2c2' .. '\ub2d9' + | '\ud835' '\ub2dc' .. '\ub2f9' + | '\ud835' '\ub2fc' .. '\ub313' + | '\ud835' '\ub316' .. '\ub333' + | '\ud835' '\ub336' .. '\ub34d' + | '\ud835' '\ub350' .. '\ub36d' + | '\ud835' '\ub370' .. '\ub387' + | '\ud835' '\ub38a' .. '\ub3a7' + | '\ud835' '\ub3aa' .. '\ub3c1' + | '\ud835' '\ub3c4' .. '\ub3c8' + | '\ud835' '\ub3ce' .. '\ub3fe' + | '\ud840' '\udc00' .. '\udffe' + | '\ud841' '\ue000' .. '\ue3fe' + | '\ud842' '\ue400' .. '\ue7fe' + | '\ud843' '\ue800' .. '\uebfe' + | '\ud844' '\uec00' .. '\ueffe' + | '\ud845' '\uf000' .. '\uf3fe' + | '\ud846' '\uf400' .. '\uf7fe' + | '\ud847' '\uf800' .. '\ufbfe' + | '\ud848' '\ufc00' .. '\ufffe' + | '\ud849' '\u0000' .. '\u03fe' + | '\ud84a' '\u0400' .. '\u07fe' + | '\ud84b' '\u0800' .. '\u0bfe' + | '\ud84c' '\u0c00' .. '\u0ffe' + | '\ud84d' '\u1000' .. '\u13fe' + | '\ud84e' '\u1400' .. '\u17fe' + | '\ud84f' '\u1800' .. '\u1bfe' + | '\ud850' '\u1c00' .. '\u1ffe' + | '\ud851' '\u2000' .. '\u23fe' + | '\ud852' '\u2400' .. '\u27fe' + | '\ud853' '\u2800' .. '\u2bfe' + | '\ud854' '\u2c00' .. '\u2ffe' + | '\ud855' '\u3000' .. '\u33fe' + | '\ud856' '\u3400' .. '\u37fe' + | '\ud857' '\u3800' .. '\u3bfe' + | '\ud858' '\u3c00' .. '\u3ffe' + | '\ud859' '\u4000' .. '\u43fe' + | '\ud85a' '\u4400' .. '\u47fe' + | '\ud85b' '\u4800' .. '\u4bfe' + | '\ud85c' '\u4c00' .. '\u4ffe' + | '\ud85d' '\u5000' .. '\u53fe' + | '\ud85e' '\u5400' .. '\u57fe' + | '\ud85f' '\u5800' .. '\u5bfe' + | '\ud860' '\u5c00' .. '\u5ffe' + | '\ud861' '\u6000' .. '\u63fe' + | '\ud862' '\u6400' .. '\u67fe' + | '\ud863' '\u6800' .. '\u6bfe' + | '\ud864' '\u6c00' .. '\u6ffe' + | '\ud865' '\u7000' .. '\u73fe' + | '\ud866' '\u7400' .. '\u77fe' + | '\ud867' '\u7800' .. '\u7bfe' + | '\ud868' '\u7c00' .. '\u7ffe' + | '\ud869' '\u8000' .. '\u82d5' + | '\ud87e' '\ud400' .. '\ud61c' + | '\udb40' '\udd00' .. '\uddee' ; diff --git a/src/grammar/xidstart.g4 b/src/grammar/xidstart.g4 index d02774c6135..53fb50f4584 100644 --- a/src/grammar/xidstart.g4 +++ b/src/grammar/xidstart.g4 @@ -286,4 +286,94 @@ fragment XID_Start : | '\uffca' .. '\uffcf' | '\uffd2' .. '\uffd7' | '\uffda' .. '\uffdc' + | '\ud800' '\udc00' .. '\udc0a' + | '\ud800' '\udc0d' .. '\udc25' + | '\ud800' '\udc28' .. '\udc39' + | '\ud800' '\udc3c' .. '\udc3c' + | '\ud800' '\udc3f' .. '\udc4c' + | '\ud800' '\udc50' .. '\udc5c' + | '\ud800' '\udc80' .. '\udcf9' + | '\ud800' '\udf00' .. '\udf1d' + | '\ud800' '\udf30' .. '\udf49' + | '\ud800' '\udf80' .. '\udf9c' + | '\ud801' '\ue000' .. '\ue09c' + | '\ud802' '\ue400' .. '\ue404' + | '\ud802' '\u0808' + | '\ud802' '\ue40a' .. '\ue434' + | '\ud802' '\ue437' .. '\ue437' + | '\ud802' '\u083c' + | '\ud802' '\u083f' + | '\ud835' '\ub000' .. '\ub053' + | '\ud835' '\ub056' .. '\ub09b' + | '\ud835' '\ub09e' .. '\ub09e' + | '\ud835' '\ud4a2' + | '\ud835' '\ub0a5' .. '\ub0a5' + | '\ud835' '\ub0a9' .. '\ub0ab' + | '\ud835' '\ub0ae' .. '\ub0b8' + | '\ud835' '\ud4bb' + | '\ud835' '\ub0bd' .. '\ub0c2' + | '\ud835' '\ub0c5' .. '\ub104' + | '\ud835' '\ub107' .. '\ub109' + | '\ud835' '\ub10d' .. '\ub113' + | '\ud835' '\ub116' .. '\ub11b' + | '\ud835' '\ub11e' .. '\ub138' + | '\ud835' '\ub13b' .. '\ub13d' + | '\ud835' '\ub140' .. '\ub143' + | '\ud835' '\ud546' + | '\ud835' '\ub14a' .. '\ub14f' + | '\ud835' '\ub152' .. '\ub2a2' + | '\ud835' '\ub2a8' .. '\ub2bf' + | '\ud835' '\ub2c2' .. '\ub2d9' + | '\ud835' '\ub2dc' .. '\ub2f9' + | '\ud835' '\ub2fc' .. '\ub313' + | '\ud835' '\ub316' .. '\ub333' + | '\ud835' '\ub336' .. '\ub34d' + | '\ud835' '\ub350' .. '\ub36d' + | '\ud835' '\ub370' .. '\ub387' + | '\ud835' '\ub38a' .. '\ub3a7' + | '\ud835' '\ub3aa' .. '\ub3c1' + | '\ud835' '\ub3c4' .. '\ub3c8' + | '\ud840' '\udc00' .. '\udffe' + | '\ud841' '\ue000' .. '\ue3fe' + | '\ud842' '\ue400' .. '\ue7fe' + | '\ud843' '\ue800' .. '\uebfe' + | '\ud844' '\uec00' .. '\ueffe' + | '\ud845' '\uf000' .. '\uf3fe' + | '\ud846' '\uf400' .. '\uf7fe' + | '\ud847' '\uf800' .. '\ufbfe' + | '\ud848' '\ufc00' .. '\ufffe' + | '\ud849' '\u0000' .. '\u03fe' + | '\ud84a' '\u0400' .. '\u07fe' + | '\ud84b' '\u0800' .. '\u0bfe' + | '\ud84c' '\u0c00' .. '\u0ffe' + | '\ud84d' '\u1000' .. '\u13fe' + | '\ud84e' '\u1400' .. '\u17fe' + | '\ud84f' '\u1800' .. '\u1bfe' + | '\ud850' '\u1c00' .. '\u1ffe' + | '\ud851' '\u2000' .. '\u23fe' + | '\ud852' '\u2400' .. '\u27fe' + | '\ud853' '\u2800' .. '\u2bfe' + | '\ud854' '\u2c00' .. '\u2ffe' + | '\ud855' '\u3000' .. '\u33fe' + | '\ud856' '\u3400' .. '\u37fe' + | '\ud857' '\u3800' .. '\u3bfe' + | '\ud858' '\u3c00' .. '\u3ffe' + | '\ud859' '\u4000' .. '\u43fe' + | '\ud85a' '\u4400' .. '\u47fe' + | '\ud85b' '\u4800' .. '\u4bfe' + | '\ud85c' '\u4c00' .. '\u4ffe' + | '\ud85d' '\u5000' .. '\u53fe' + | '\ud85e' '\u5400' .. '\u57fe' + | '\ud85f' '\u5800' .. '\u5bfe' + | '\ud860' '\u5c00' .. '\u5ffe' + | '\ud861' '\u6000' .. '\u63fe' + | '\ud862' '\u6400' .. '\u67fe' + | '\ud863' '\u6800' .. '\u6bfe' + | '\ud864' '\u6c00' .. '\u6ffe' + | '\ud865' '\u7000' .. '\u73fe' + | '\ud866' '\u7400' .. '\u77fe' + | '\ud867' '\u7800' .. '\u7bfe' + | '\ud868' '\u7c00' .. '\u7ffe' + | '\ud869' '\u8000' .. '\u82d5' + | '\ud87e' '\ud400' .. '\ud61c' ; From 13bc8afa4b56e8d7b012f35a35a11087739abf15 Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Tue, 21 Apr 2015 12:02:12 +0200 Subject: [PATCH 3/3] Model lexer: Fix remaining issues --- src/grammar/README.md | 2 +- src/grammar/RustLexer.g4 | 112 ++++++++---------- src/grammar/check.sh | 8 +- src/grammar/verify.rs | 84 +++++++------ src/libcollections/fmt.rs | 2 - src/libcollections/str.rs | 2 - src/libcollections/string.rs | 2 - src/libcore/hash/sip.rs | 2 - src/libcore/num/mod.rs | 2 - src/libcore/str/mod.rs | 2 - src/libcoretest/char.rs | 2 - src/libgetopts/lib.rs | 2 - src/librand/distributions/gamma.rs | 2 - src/librustc_unicode/u_str.rs | 2 - src/libserialize/hex.rs | 2 - src/libstd/ascii.rs | 2 - src/libstd/collections/hash/map.rs | 2 - src/libstd/collections/hash/set.rs | 2 - src/libstd/collections/hash/table.rs | 2 - src/libstd/io/buffered.rs | 2 - src/libstd/num/strconv.rs | 2 - src/libstd/rt/util.rs | 2 - src/libsyntax/codemap.rs | 2 - src/libsyntax/ext/tt/macro_parser.rs | 2 - src/test/bench/core-std.rs | 1 - src/test/bench/msgsend-ring-mutex-arcs.rs | 1 - src/test/bench/noise.rs | 1 - src/test/compile-fail/utf8_idents.rs | 1 - src/test/pretty/block-comment-wchar.pp | 1 - src/test/pretty/block-comment-wchar.rs | 1 - src/test/run-pass/byte-literals.rs | 1 - .../default-method-supertrait-vtable.rs | 1 - src/test/run-pass/ifmt.rs | 1 - src/test/run-pass/issue-12582.rs | 1 - src/test/run-pass/issue-13027.rs | 1 - src/test/run-pass/issue-2718.rs | 1 - src/test/run-pass/issue-3683.rs | 1 - src/test/run-pass/issue-4759-1.rs | 2 - src/test/run-pass/issue-5280.rs | 1 - .../issue-5321-immediates-with-bare-self.rs | 1 - ...line-endings-string-literal-doc-comment.rs | 1 - ...ase-types-non-uppercase-statics-unicode.rs | 1 - src/test/run-pass/match-range.rs | 1 - src/test/run-pass/multibyte.rs | 1 - src/test/run-pass/raw-str.rs | Bin 1341 -> 1307 bytes src/test/run-pass/shebang.rs | 1 - src/test/run-pass/struct-return.rs | 1 - src/test/run-pass/trait-to-str.rs | 1 - .../run-pass/trait-with-bounds-default.rs | 1 - .../run-pass/traits-default-method-self.rs | 1 - .../run-pass/traits-default-method-trivial.rs | 1 - src/test/run-pass/unsized.rs | 2 - src/test/run-pass/unsized2.rs | 2 - src/test/run-pass/utf8-bom.rs | 1 - src/test/run-pass/utf8.rs | 1 - src/test/run-pass/utf8_chars.rs | 1 - src/test/run-pass/utf8_idents.rs | 1 - 57 files changed, 102 insertions(+), 179 deletions(-) diff --git a/src/grammar/README.md b/src/grammar/README.md index 1f7923e1caf..6e0cf17a880 100644 --- a/src/grammar/README.md +++ b/src/grammar/README.md @@ -12,7 +12,7 @@ javac *.java rustc -O verify.rs for file in ../*/**.rs; do echo $file; - grun RustLexer tokens -tokens < $file | ./verify $file RustLexer.tokens || break + grun RustLexer tokens -tokens < "$file" | ./verify "$file" RustLexer.tokens || break done ``` diff --git a/src/grammar/RustLexer.g4 b/src/grammar/RustLexer.g4 index 8739d135b4f..3d8f3aeb28f 100644 --- a/src/grammar/RustLexer.g4 +++ b/src/grammar/RustLexer.g4 @@ -1,5 +1,12 @@ lexer grammar RustLexer; +@lexer::members { + public boolean is_at(int pos) { + return _input.index() == pos; + } +} + + tokens { EQ, LT, LE, EQEQ, NE, GE, GT, ANDAND, OROR, NOT, TILDE, PLUT, MINUS, STAR, SLASH, PERCENT, CARET, AND, OR, SHL, SHR, BINOP, @@ -8,7 +15,7 @@ tokens { LBRACE, RBRACE, POUND, DOLLAR, UNDERSCORE, LIT_CHAR, LIT_INTEGER, LIT_FLOAT, LIT_STR, LIT_STR_RAW, LIT_BINARY, LIT_BINARY_RAW, IDENT, LIFETIME, WHITESPACE, DOC_COMMENT, - COMMENT + COMMENT, SHEBANG } import xidstart , xidcontinue; @@ -86,85 +93,54 @@ fragment CHAR_ESCAPE | [xX] HEXIT HEXIT | 'u' HEXIT HEXIT HEXIT HEXIT | 'U' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT + | 'u{' HEXIT '}' + | 'u{' HEXIT HEXIT '}' + | 'u{' HEXIT HEXIT HEXIT '}' + | 'u{' HEXIT HEXIT HEXIT HEXIT '}' + | 'u{' HEXIT HEXIT HEXIT HEXIT HEXIT '}' + | 'u{' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT '}' ; fragment SUFFIX : IDENT ; +fragment INTEGER_SUFFIX + : { _input.LA(1) != 'e' && _input.LA(1) != 'E' }? SUFFIX + ; + LIT_CHAR - : '\'' ( '\\' CHAR_ESCAPE | ~[\\'\n\t\r] | '\ud800' .. '\udbff' '\udc00' .. '\udfff' ) '\'' SUFFIX? + : '\'' ( '\\' CHAR_ESCAPE + | ~[\\'\n\t\r] + | '\ud800' .. '\udbff' '\udc00' .. '\udfff' + ) + '\'' SUFFIX? ; LIT_BYTE - : 'b\'' ( '\\' ( [xX] HEXIT HEXIT | [nrt\\'"0] ) | ~[\\'\n\t\r] ) '\'' SUFFIX? + : 'b\'' ( '\\' ( [xX] HEXIT HEXIT + | [nrt\\'"0] ) + | ~[\\'\n\t\r] '\udc00'..'\udfff'? + ) + '\'' SUFFIX? ; LIT_INTEGER - : [0-9][0-9_]* SUFFIX? - | '0b' [01][01_]* SUFFIX? - | '0o' [0-7][0-7_]* SUFFIX? - | '0x' [0-9a-fA-F][0-9a-fA-F_]* SUFFIX? + + : [0-9][0-9_]* INTEGER_SUFFIX? + | '0b' [01_]+ INTEGER_SUFFIX? + | '0o' [0-7_]+ INTEGER_SUFFIX? + | '0x' [0-9a-fA-F_]+ INTEGER_SUFFIX? ; LIT_FLOAT : [0-9][0-9_]* ('.' { - /* dot followed by another dot is a range, no float */ + /* dot followed by another dot is a range, not a float */ _input.LA(1) != '.' && - /* dot followed by an identifier is an integer with a function call, no float */ + /* dot followed by an identifier is an integer with a function call, not a float */ _input.LA(1) != '_' && - _input.LA(1) != 'a' && - _input.LA(1) != 'b' && - _input.LA(1) != 'c' && - _input.LA(1) != 'd' && - _input.LA(1) != 'e' && - _input.LA(1) != 'f' && - _input.LA(1) != 'g' && - _input.LA(1) != 'h' && - _input.LA(1) != 'i' && - _input.LA(1) != 'j' && - _input.LA(1) != 'k' && - _input.LA(1) != 'l' && - _input.LA(1) != 'm' && - _input.LA(1) != 'n' && - _input.LA(1) != 'o' && - _input.LA(1) != 'p' && - _input.LA(1) != 'q' && - _input.LA(1) != 'r' && - _input.LA(1) != 's' && - _input.LA(1) != 't' && - _input.LA(1) != 'u' && - _input.LA(1) != 'v' && - _input.LA(1) != 'w' && - _input.LA(1) != 'x' && - _input.LA(1) != 'y' && - _input.LA(1) != 'z' && - _input.LA(1) != 'A' && - _input.LA(1) != 'B' && - _input.LA(1) != 'C' && - _input.LA(1) != 'D' && - _input.LA(1) != 'E' && - _input.LA(1) != 'F' && - _input.LA(1) != 'G' && - _input.LA(1) != 'H' && - _input.LA(1) != 'I' && - _input.LA(1) != 'J' && - _input.LA(1) != 'K' && - _input.LA(1) != 'L' && - _input.LA(1) != 'M' && - _input.LA(1) != 'N' && - _input.LA(1) != 'O' && - _input.LA(1) != 'P' && - _input.LA(1) != 'Q' && - _input.LA(1) != 'R' && - _input.LA(1) != 'S' && - _input.LA(1) != 'T' && - _input.LA(1) != 'U' && - _input.LA(1) != 'V' && - _input.LA(1) != 'W' && - _input.LA(1) != 'X' && - _input.LA(1) != 'Y' && - _input.LA(1) != 'Z' + !(_input.LA(1) >= 'a' && _input.LA(1) <= 'z') && + !(_input.LA(1) >= 'A' && _input.LA(1) <= 'Z') }? | ('.' [0-9][0-9_]*)? ([eE] [-+]? [0-9][0-9_]*)? SUFFIX?) ; @@ -172,8 +148,8 @@ LIT_STR : '"' ('\\\n' | '\\\r\n' | '\\' CHAR_ESCAPE | .)*? '"' SUFFIX? ; -LIT_BINARY : 'b' LIT_STR SUFFIX?; -LIT_BINARY_RAW : 'rb' LIT_STR_RAW SUFFIX?; +LIT_BINARY : 'b' LIT_STR ; +LIT_BINARY_RAW : 'b' LIT_STR_RAW ; /* this is a bit messy */ @@ -201,13 +177,19 @@ LIFETIME : '\'' IDENT ; WHITESPACE : [ \r\n\t]+ ; -UNDOC_COMMENT : '////' ~[\r\n]* -> type(COMMENT) ; +UNDOC_COMMENT : '////' ~[\n]* -> type(COMMENT) ; YESDOC_COMMENT : '///' ~[\r\n]* -> type(DOC_COMMENT) ; OUTER_DOC_COMMENT : '//!' ~[\r\n]* -> type(DOC_COMMENT) ; -LINE_COMMENT : '//' ~[\r\n]* -> type(COMMENT) ; +LINE_COMMENT : '//' ( ~[/\n] ~[\n]* )? -> type(COMMENT) ; DOC_BLOCK_COMMENT : ('/**' ~[*] | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' -> type(DOC_COMMENT) ; BLOCK_COMMENT : '/*' (BLOCK_COMMENT | .)*? '*/' -> type(COMMENT) ; + +/* these appear at the beginning of a file */ + +SHEBANG : '#!' { is_at(2) && _input.LA(1) != '[' }? ~[\r\n]* -> type(SHEBANG) ; + +UTF8_BOM : '\ufeff' { is_at(1) }? -> skip ; diff --git a/src/grammar/check.sh b/src/grammar/check.sh index b0628303b66..560b6b72471 100755 --- a/src/grammar/check.sh +++ b/src/grammar/check.sh @@ -18,13 +18,13 @@ failed=0 skipped=0 check() { - grep --silent "// ignore-lexer-test" $1; + grep --silent "// ignore-lexer-test" "$1"; # if it's *not* found... if [ $? -eq 1 ]; then cd $2 # This `cd` is so java will pick up RustLexer.class. I couldn't - # figure out how to wrangle the CLASSPATH, just adding build/grammr didn't - # seem to have anny effect. + # figure out how to wrangle the CLASSPATH, just adding build/grammar + # didn't seem to have any effect. if $3 RustLexer tokens -tokens < $1 | $4 $1 $5; then echo "pass: $1" passed=`expr $passed + 1` @@ -39,7 +39,7 @@ check() { } for file in $(find $1 -iname '*.rs' ! -path '*/test/compile-fail*'); do - check $file $2 $3 $4 $5 + check "$file" $2 $3 $4 $5 done printf "\ntest result: " diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs index 8bf501c7f3f..dec797747c2 100644 --- a/src/grammar/verify.rs +++ b/src/grammar/verify.rs @@ -8,9 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#![feature(plugin)] - -#![allow(unstable)] +#![feature(plugin, rustc_private, str_char, collections)] extern crate syntax; extern crate rustc; @@ -19,7 +17,10 @@ extern crate rustc; extern crate log; use std::collections::HashMap; -use std::io::File; +use std::env; +use std::fs::File; +use std::io::{BufRead, Read}; +use std::path::Path; use syntax::parse; use syntax::parse::lexer; @@ -27,6 +28,7 @@ use rustc::session::{self, config}; use syntax::ast; use syntax::ast::Name; +use syntax::codemap; use syntax::codemap::Pos; use syntax::parse::token; use syntax::parse::lexer::TokenAndSpan; @@ -108,6 +110,7 @@ fn parse_token_list(file: &str) -> HashMap { "LIT_BINARY" => token::Literal(token::Binary(Name(0)), None), "LIT_BINARY_RAW" => token::Literal(token::BinaryRaw(Name(0), 0), None), "QUESTION" => token::Question, + "SHEBANG" => token::Shebang(Name(0)), _ => continue, }; @@ -166,24 +169,26 @@ fn count(lit: &str) -> usize { lit.chars().take_while(|c| *c == '#').count() } -fn parse_antlr_token(s: &str, tokens: &HashMap, surrogate_pairs_pos: &[usize]) +fn parse_antlr_token(s: &str, tokens: &HashMap, surrogate_pairs_pos: &[usize], + has_bom: bool) -> TokenAndSpan { // old regex: // \[@(?P\d+),(?P\d+):(?P\d+)='(?P.+?)',<(?P-?\d+)>,\d+:\d+] - let start = s.find_str("[@").unwrap(); - let comma = start + s[start..].find_str(",").unwrap(); - let colon = comma + s[comma..].find_str(":").unwrap(); - let content_start = colon + s[colon..].find_str("='").unwrap(); - let content_end = content_start + s[content_start..].find_str("',<").unwrap(); - let toknum_end = content_end + s[content_end..].find_str(">,").unwrap(); + let start = s.find("[@").unwrap(); + let comma = start + s[start..].find(",").unwrap(); + let colon = comma + s[comma..].find(":").unwrap(); + let content_start = colon + s[colon..].find("='").unwrap(); + // Use rfind instead of find, because we don't want to stop at the content + let content_end = content_start + s[content_start..].rfind("',<").unwrap(); + let toknum_end = content_end + s[content_end..].find(">,").unwrap(); let start = &s[comma + 1 .. colon]; let end = &s[colon + 1 .. content_start]; let content = &s[content_start + 2 .. content_end]; let toknum = &s[content_end + 3 .. toknum_end]; - let proto_tok = tokens.get(toknum).expect(format!("didn't find token {:?} in the map", - toknum)); + let not_found = format!("didn't find token {:?} in the map", toknum); + let proto_tok = tokens.get(toknum).expect(¬_found[..]); let nm = parse::token::intern(content); @@ -209,24 +214,25 @@ fn parse_antlr_token(s: &str, tokens: &HashMap, surrogate_ ref t => t.clone() }; - let offset = if real_tok == token::Eof - { + let start_offset = if real_tok == token::Eof { 1 } else { 0 }; - let mut lo = start.parse::().unwrap() - offset; - let mut hi = end.parse::().unwrap() + 1; + let offset = if has_bom { 1 } else { 0 }; + + let mut lo = start.parse::().unwrap() - start_offset - offset; + let mut hi = end.parse::().unwrap() + 1 - offset; // Adjust the span: For each surrogate pair already encountered, subtract one position. lo -= surrogate_pairs_pos.binary_search(&(lo as usize)).unwrap_or_else(|x| x) as u32; hi -= surrogate_pairs_pos.binary_search(&(hi as usize)).unwrap_or_else(|x| x) as u32; - let sp = syntax::codemap::Span { - lo: syntax::codemap::BytePos(lo), - hi: syntax::codemap::BytePos(hi), - expn_id: syntax::codemap::NO_EXPANSION + let sp = codemap::Span { + lo: codemap::BytePos(lo), + hi: codemap::BytePos(hi), + expn_id: codemap::NO_EXPANSION }; TokenAndSpan { @@ -245,10 +251,10 @@ fn tok_cmp(a: &token::Token, b: &token::Token) -> bool { } } -fn span_cmp(antlr_sp: syntax::codemap::Span, rust_sp: syntax::codemap::Span, cm: &syntax::codemap::CodeMap) -> bool { +fn span_cmp(antlr_sp: codemap::Span, rust_sp: codemap::Span, cm: &codemap::CodeMap) -> bool { antlr_sp.expn_id == rust_sp.expn_id && - antlr_sp.lo.to_uint() == cm.bytepos_to_file_charpos(rust_sp.lo).to_uint() && - antlr_sp.hi.to_uint() == cm.bytepos_to_file_charpos(rust_sp.hi).to_uint() + antlr_sp.lo.to_usize() == cm.bytepos_to_file_charpos(rust_sp.lo).to_usize() && + antlr_sp.hi.to_usize() == cm.bytepos_to_file_charpos(rust_sp.hi).to_usize() } fn main() { @@ -257,10 +263,15 @@ fn main() { r.next_token() } - let args = std::os::args(); + let mut args = env::args().skip(1); + let filename = args.next().unwrap(); + if filename.find("parse-fail").is_some() { + return; + } // Rust's lexer - let code = File::open(&Path::new(args[1])).unwrap().read_to_string().unwrap(); + let mut code = String::new(); + File::open(&Path::new(&filename)).unwrap().read_to_string(&mut code).unwrap(); let surrogate_pairs_pos: Vec = code.chars().enumerate() .filter(|&(_, c)| c as usize > 0xFFFF) @@ -269,6 +280,8 @@ fn main() { .map(|(x, n)| x + n) .collect(); + let has_bom = code.starts_with("\u{feff}"); + debug!("Pairs: {:?}", surrogate_pairs_pos); let options = config::basic_options(); @@ -281,15 +294,18 @@ fn main() { let ref cm = lexer.span_diagnostic.cm; // ANTLR - let mut token_file = File::open(&Path::new(args[2])); - let token_map = parse_token_list(token_file.read_to_string().unwrap()); + let mut token_file = File::open(&Path::new(&args.next().unwrap())).unwrap(); + let mut token_list = String::new(); + token_file.read_to_string(&mut token_list).unwrap(); + let token_map = parse_token_list(&token_list[..]); - let mut stdin = std::io::stdin(); - let mut lock = stdin.lock(); + let stdin = std::io::stdin(); + let lock = stdin.lock(); let lines = lock.lines(); - let mut antlr_tokens = lines.map(|l| parse_antlr_token(l.unwrap().trim(), - &token_map, - &surrogate_pairs_pos[])); + let antlr_tokens = lines.map(|l| parse_antlr_token(l.unwrap().trim(), + &token_map, + &surrogate_pairs_pos[..], + has_bom)); for antlr_tok in antlr_tokens { let rustc_tok = next(&mut lexer); @@ -314,7 +330,7 @@ fn main() { } _ => panic!("{:?} is not {:?}", antlr_tok, rustc_tok) },)* - ref c => assert!(c == &antlr_tok.tok, "{:?} is not {:?}", rustc_tok, antlr_tok) + ref c => assert!(c == &antlr_tok.tok, "{:?} is not {:?}", antlr_tok, rustc_tok) } ) } diff --git a/src/libcollections/fmt.rs b/src/libcollections/fmt.rs index 5f0d9012d1a..40b64b5c3b4 100644 --- a/src/libcollections/fmt.rs +++ b/src/libcollections/fmt.rs @@ -7,8 +7,6 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -// -// ignore-lexer-test FIXME #15679 //! Utilities for formatting and printing strings //! diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index 0f902e258b9..266cda9a237 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -7,8 +7,6 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -// -// ignore-lexer-test FIXME #15679 //! Unicode string manipulation (the `str` type). //! diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs index 74af5783fa8..3422bfe5423 100644 --- a/src/libcollections/string.rs +++ b/src/libcollections/string.rs @@ -7,8 +7,6 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -// -// ignore-lexer-test FIXME #15679 //! An owned, growable string that enforces that its contents are valid UTF-8. diff --git a/src/libcore/hash/sip.rs b/src/libcore/hash/sip.rs index 6820a7025fc..65f790d5d43 100644 --- a/src/libcore/hash/sip.rs +++ b/src/libcore/hash/sip.rs @@ -7,8 +7,6 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -// -// ignore-lexer-test FIXME #15883 //! An implementation of SipHash 2-4. diff --git a/src/libcore/num/mod.rs b/src/libcore/num/mod.rs index a056e585fee..bcfdcfcd5e6 100644 --- a/src/libcore/num/mod.rs +++ b/src/libcore/num/mod.rs @@ -7,8 +7,6 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -// -// ignore-lexer-test FIXME #15679 //! Numeric traits and functions for the built-in numeric types. diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs index 2d6ef39361e..34810b4864e 100644 --- a/src/libcore/str/mod.rs +++ b/src/libcore/str/mod.rs @@ -7,8 +7,6 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -// -// ignore-lexer-test FIXME #15679 //! String manipulation //! diff --git a/src/libcoretest/char.rs b/src/libcoretest/char.rs index 4939277aa59..b73807aa317 100644 --- a/src/libcoretest/char.rs +++ b/src/libcoretest/char.rs @@ -7,8 +7,6 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -// -// ignore-lexer-test FIXME #15679 #[test] fn test_is_lowercase() { diff --git a/src/libgetopts/lib.rs b/src/libgetopts/lib.rs index 02c4a233996..197199e743f 100644 --- a/src/libgetopts/lib.rs +++ b/src/libgetopts/lib.rs @@ -7,8 +7,6 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -// -// ignore-lexer-test FIXME #15677 //! Simple getopt alternative. //! diff --git a/src/librand/distributions/gamma.rs b/src/librand/distributions/gamma.rs index 1125d096536..f37093c6db8 100644 --- a/src/librand/distributions/gamma.rs +++ b/src/librand/distributions/gamma.rs @@ -7,8 +7,6 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -// -// ignore-lexer-test FIXME #15679 //! The Gamma and derived distributions. diff --git a/src/librustc_unicode/u_str.rs b/src/librustc_unicode/u_str.rs index 09a5feb5fef..c63c586b6a9 100644 --- a/src/librustc_unicode/u_str.rs +++ b/src/librustc_unicode/u_str.rs @@ -7,8 +7,6 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -// -// ignore-lexer-test FIXME #15679 //! Unicode-intensive string manipulations. //! diff --git a/src/libserialize/hex.rs b/src/libserialize/hex.rs index 0676edf8169..87f1dca2cae 100644 --- a/src/libserialize/hex.rs +++ b/src/libserialize/hex.rs @@ -7,8 +7,6 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -// -// ignore-lexer-test FIXME #15679 //! Hex binary-to-text encoding diff --git a/src/libstd/ascii.rs b/src/libstd/ascii.rs index a2ba8c4c1ba..ccc56960b02 100644 --- a/src/libstd/ascii.rs +++ b/src/libstd/ascii.rs @@ -7,8 +7,6 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -// -// ignore-lexer-test FIXME #15679 //! Operations on ASCII strings and characters diff --git a/src/libstd/collections/hash/map.rs b/src/libstd/collections/hash/map.rs index 4ac15b7991b..a5bbbee790a 100644 --- a/src/libstd/collections/hash/map.rs +++ b/src/libstd/collections/hash/map.rs @@ -7,8 +7,6 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -// -// ignore-lexer-test FIXME #15883 use self::Entry::*; use self::SearchResult::*; diff --git a/src/libstd/collections/hash/set.rs b/src/libstd/collections/hash/set.rs index 62c03389b24..82109900bf2 100644 --- a/src/libstd/collections/hash/set.rs +++ b/src/libstd/collections/hash/set.rs @@ -7,8 +7,6 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -// -// ignore-lexer-test FIXME #15883 use borrow::Borrow; use clone::Clone; diff --git a/src/libstd/collections/hash/table.rs b/src/libstd/collections/hash/table.rs index dec6d1e2209..65ebf8515e6 100644 --- a/src/libstd/collections/hash/table.rs +++ b/src/libstd/collections/hash/table.rs @@ -7,8 +7,6 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -// -// ignore-lexer-test FIXME #15883 use self::BucketState::*; diff --git a/src/libstd/io/buffered.rs b/src/libstd/io/buffered.rs index bd44a9547b4..67cac42c35e 100644 --- a/src/libstd/io/buffered.rs +++ b/src/libstd/io/buffered.rs @@ -7,8 +7,6 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -// -// ignore-lexer-test FIXME #15883 //! Buffering wrappers for I/O traits diff --git a/src/libstd/num/strconv.rs b/src/libstd/num/strconv.rs index 8ab66f2328f..ce1da4742d1 100644 --- a/src/libstd/num/strconv.rs +++ b/src/libstd/num/strconv.rs @@ -7,8 +7,6 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -// -// ignore-lexer-test FIXME #15679 #![allow(missing_docs)] #![allow(deprecated)] diff --git a/src/libstd/rt/util.rs b/src/libstd/rt/util.rs index 9919238c208..31e970a9550 100644 --- a/src/libstd/rt/util.rs +++ b/src/libstd/rt/util.rs @@ -7,8 +7,6 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -// -// ignore-lexer-test FIXME #15677 use io::prelude::*; diff --git a/src/libsyntax/codemap.rs b/src/libsyntax/codemap.rs index a0bde8f6c52..dfdaa47d8b9 100644 --- a/src/libsyntax/codemap.rs +++ b/src/libsyntax/codemap.rs @@ -7,8 +7,6 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -// -// ignore-lexer-test FIXME #15679 //! The CodeMap tracks all the source code used within a single crate, mapping //! from integer byte positions to the original source code location. Each bit diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs index 9c3a556b210..58df4038403 100644 --- a/src/libsyntax/ext/tt/macro_parser.rs +++ b/src/libsyntax/ext/tt/macro_parser.rs @@ -7,8 +7,6 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. -// -// ignore-lexer-test FIXME #15679 //! This is an Earley-like parser, without support for in-grammar nonterminals, //! only by calling out to the main rust parser for named nonterminals (which it diff --git a/src/test/bench/core-std.rs b/src/test/bench/core-std.rs index 46caed6f9f5..19f83c7817c 100644 --- a/src/test/bench/core-std.rs +++ b/src/test/bench/core-std.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// ignore-lexer-test FIXME #15679 // Microbenchmarks for various functions in std and extra #![feature(rand, collections, std_misc)] diff --git a/src/test/bench/msgsend-ring-mutex-arcs.rs b/src/test/bench/msgsend-ring-mutex-arcs.rs index c87cdb617a4..8048f3dde96 100644 --- a/src/test/bench/msgsend-ring-mutex-arcs.rs +++ b/src/test/bench/msgsend-ring-mutex-arcs.rs @@ -16,7 +16,6 @@ // This also serves as a pipes test, because Arcs are implemented with pipes. // no-pretty-expanded FIXME #15189 -// ignore-lexer-test FIXME #15679 #![feature(std_misc)] diff --git a/src/test/bench/noise.rs b/src/test/bench/noise.rs index c21470d4bb3..530c499f5fd 100644 --- a/src/test/bench/noise.rs +++ b/src/test/bench/noise.rs @@ -10,7 +10,6 @@ // Multi-language Perlin noise benchmark. // See https://github.com/nsf/pnoise for timings and alternative implementations. -// ignore-lexer-test FIXME #15679 #![feature(rand, core)] diff --git a/src/test/compile-fail/utf8_idents.rs b/src/test/compile-fail/utf8_idents.rs index a5471e87f22..8594c35f8dd 100644 --- a/src/test/compile-fail/utf8_idents.rs +++ b/src/test/compile-fail/utf8_idents.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. // -// ignore-lexer-test FIXME #15679 fn foo< 'β, //~ ERROR non-ascii idents are not fully supported diff --git a/src/test/pretty/block-comment-wchar.pp b/src/test/pretty/block-comment-wchar.pp index a5d82277d2f..2dc7e8f9525 100644 --- a/src/test/pretty/block-comment-wchar.pp +++ b/src/test/pretty/block-comment-wchar.pp @@ -14,7 +14,6 @@ // ignore-tidy-cr // ignore-tidy-tab // pp-exact:block-comment-wchar.pp -// ignore-lexer-test FIXME #15679 fn f() { fn nested() { /* diff --git a/src/test/pretty/block-comment-wchar.rs b/src/test/pretty/block-comment-wchar.rs index eb6d2a4a0a1..6f4a95e7c9b 100644 --- a/src/test/pretty/block-comment-wchar.rs +++ b/src/test/pretty/block-comment-wchar.rs @@ -14,7 +14,6 @@ // ignore-tidy-cr // ignore-tidy-tab // pp-exact:block-comment-wchar.pp -// ignore-lexer-test FIXME #15679 fn f() { fn nested() { /* diff --git a/src/test/run-pass/byte-literals.rs b/src/test/run-pass/byte-literals.rs index fbe2a65bc89..9f7b98a57fc 100644 --- a/src/test/run-pass/byte-literals.rs +++ b/src/test/run-pass/byte-literals.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. // -// ignore-lexer-test FIXME #15883 static FOO: u8 = b'\xF0'; diff --git a/src/test/run-pass/default-method-supertrait-vtable.rs b/src/test/run-pass/default-method-supertrait-vtable.rs index 3b1e04be78d..0d45a5d5212 100644 --- a/src/test/run-pass/default-method-supertrait-vtable.rs +++ b/src/test/run-pass/default-method-supertrait-vtable.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. // -// ignore-lexer-test FIXME #15877 // Tests that we can call a function bounded over a supertrait from diff --git a/src/test/run-pass/ifmt.rs b/src/test/run-pass/ifmt.rs index ea9db9b1e1f..7ae1347f2c7 100644 --- a/src/test/run-pass/ifmt.rs +++ b/src/test/run-pass/ifmt.rs @@ -9,7 +9,6 @@ // except according to those terms. // no-pretty-expanded unnecessary unsafe block generated -// ignore-lexer-test FIXME #15679 #![deny(warnings)] #![allow(unused_must_use)] diff --git a/src/test/run-pass/issue-12582.rs b/src/test/run-pass/issue-12582.rs index 4009d17139d..7bab2ddfed0 100644 --- a/src/test/run-pass/issue-12582.rs +++ b/src/test/run-pass/issue-12582.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. // -// ignore-lexer-test FIXME #15877 pub fn main() { let x = 1; diff --git a/src/test/run-pass/issue-13027.rs b/src/test/run-pass/issue-13027.rs index dadd480dc6a..14987484711 100644 --- a/src/test/run-pass/issue-13027.rs +++ b/src/test/run-pass/issue-13027.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. // -// ignore-lexer-test FIXME #15877 // Tests that match expression handles overlapped literal and range // properly in the presence of guard function. diff --git a/src/test/run-pass/issue-2718.rs b/src/test/run-pass/issue-2718.rs index 71d1439dd2b..0df89c72424 100644 --- a/src/test/run-pass/issue-2718.rs +++ b/src/test/run-pass/issue-2718.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. // -// ignore-lexer-test FIXME #15883 #![feature(unsafe_destructor, std_misc)] diff --git a/src/test/run-pass/issue-3683.rs b/src/test/run-pass/issue-3683.rs index 096eec803ff..ed9b8066104 100644 --- a/src/test/run-pass/issue-3683.rs +++ b/src/test/run-pass/issue-3683.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. // -// ignore-lexer-test FIXME #15877 trait Foo { diff --git a/src/test/run-pass/issue-4759-1.rs b/src/test/run-pass/issue-4759-1.rs index 3532a395b7a..a565460c42e 100644 --- a/src/test/run-pass/issue-4759-1.rs +++ b/src/test/run-pass/issue-4759-1.rs @@ -8,8 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// pretty-expanded FIXME #23616 - trait U { fn f(self); } impl U for isize { fn f(self) {} } pub fn main() { 4.f(); } diff --git a/src/test/run-pass/issue-5280.rs b/src/test/run-pass/issue-5280.rs index bd892465054..5e2e4df95b3 100644 --- a/src/test/run-pass/issue-5280.rs +++ b/src/test/run-pass/issue-5280.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. // -// ignore-lexer-test FIXME #15877 type FontTableTag = u32; diff --git a/src/test/run-pass/issue-5321-immediates-with-bare-self.rs b/src/test/run-pass/issue-5321-immediates-with-bare-self.rs index d0bc396c368..dd00fab5020 100644 --- a/src/test/run-pass/issue-5321-immediates-with-bare-self.rs +++ b/src/test/run-pass/issue-5321-immediates-with-bare-self.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. // -// ignore-lexer-test FIXME #15877 trait Fooable { fn yes(self); diff --git a/src/test/run-pass/lexer-crlf-line-endings-string-literal-doc-comment.rs b/src/test/run-pass/lexer-crlf-line-endings-string-literal-doc-comment.rs index 421ae8e9497..5c8db524cc2 100644 --- a/src/test/run-pass/lexer-crlf-line-endings-string-literal-doc-comment.rs +++ b/src/test/run-pass/lexer-crlf-line-endings-string-literal-doc-comment.rs @@ -16,7 +16,6 @@ // this directory should enforce it. // ignore-pretty -// ignore-lexer-test FIXME #15882 /// Doc comment that ends in CRLF pub fn foo() {} diff --git a/src/test/run-pass/lint-non-camel-case-types-non-uppercase-statics-unicode.rs b/src/test/run-pass/lint-non-camel-case-types-non-uppercase-statics-unicode.rs index 6ddaee9c8bd..6e65cb2afd4 100644 --- a/src/test/run-pass/lint-non-camel-case-types-non-uppercase-statics-unicode.rs +++ b/src/test/run-pass/lint-non-camel-case-types-non-uppercase-statics-unicode.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. // -// ignore-lexer-test FIXME #15679 #![forbid(non_camel_case_types)] diff --git a/src/test/run-pass/match-range.rs b/src/test/run-pass/match-range.rs index 68719090cff..0b2e19d6c79 100644 --- a/src/test/run-pass/match-range.rs +++ b/src/test/run-pass/match-range.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. // -// ignore-lexer-test FIXME #15877 pub fn main() { match 5_usize { diff --git a/src/test/run-pass/multibyte.rs b/src/test/run-pass/multibyte.rs index 77084836408..0475dd10fde 100644 --- a/src/test/run-pass/multibyte.rs +++ b/src/test/run-pass/multibyte.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. // -// ignore-lexer-test FIXME #15679 // Test that multibyte characters don't crash the compiler pub fn main() { diff --git a/src/test/run-pass/raw-str.rs b/src/test/run-pass/raw-str.rs index 298ac8f77eb11dd6f5a4a63ea68920e34a5e7d61..9ee824d4185851b4477dadb969fae7a857ff2bb8 100644 GIT binary patch delta 12 TcmdnXHJfY0BgV}vOqR?5AAAG# delta 36 rcmbQuwU=wdBS!I@)QZ$1-ICPe5(PKU2wztPWkXXlbIZ-w8C{qG@9qoU diff --git a/src/test/run-pass/shebang.rs b/src/test/run-pass/shebang.rs index 87da814771b..15ab21bbc8d 100644 --- a/src/test/run-pass/shebang.rs +++ b/src/test/run-pass/shebang.rs @@ -11,6 +11,5 @@ // ignore-pretty: `expand` adds some preludes before shebang // -// ignore-lexer-test FIXME #15878 pub fn main() { println!("Hello World"); } diff --git a/src/test/run-pass/struct-return.rs b/src/test/run-pass/struct-return.rs index 1ff13d4eaea..109287a83b1 100644 --- a/src/test/run-pass/struct-return.rs +++ b/src/test/run-pass/struct-return.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. // -// ignore-lexer-test FIXME #15883 #[derive(Copy, Clone)] pub struct Quad { a: u64, b: u64, c: u64, d: u64 } diff --git a/src/test/run-pass/trait-to-str.rs b/src/test/run-pass/trait-to-str.rs index 3d84092c062..a29e0e932c0 100644 --- a/src/test/run-pass/trait-to-str.rs +++ b/src/test/run-pass/trait-to-str.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. // -// ignore-lexer-test FIXME #15883 trait to_str { diff --git a/src/test/run-pass/trait-with-bounds-default.rs b/src/test/run-pass/trait-with-bounds-default.rs index 34a79c4cf31..cfd81240094 100644 --- a/src/test/run-pass/trait-with-bounds-default.rs +++ b/src/test/run-pass/trait-with-bounds-default.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. // -// ignore-lexer-test FIXME #15877 pub trait Clone2 { /// Returns a copy of the value. The contents of owned pointers diff --git a/src/test/run-pass/traits-default-method-self.rs b/src/test/run-pass/traits-default-method-self.rs index d9536108f4d..36b0eb527b6 100644 --- a/src/test/run-pass/traits-default-method-self.rs +++ b/src/test/run-pass/traits-default-method-self.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. // -// ignore-lexer-test FIXME #15877 trait Cat { diff --git a/src/test/run-pass/traits-default-method-trivial.rs b/src/test/run-pass/traits-default-method-trivial.rs index 0e71fcab9d1..a2e7f54bba6 100644 --- a/src/test/run-pass/traits-default-method-trivial.rs +++ b/src/test/run-pass/traits-default-method-trivial.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. // -// ignore-lexer-test FIXME #15877 trait Cat { diff --git a/src/test/run-pass/unsized.rs b/src/test/run-pass/unsized.rs index 449d6b37e9f..26f7b767988 100644 --- a/src/test/run-pass/unsized.rs +++ b/src/test/run-pass/unsized.rs @@ -10,8 +10,6 @@ // Test syntax checks for `?Sized` syntax. -// pretty-expanded FIXME #23616 - use std::marker::PhantomData; trait T1 { } diff --git a/src/test/run-pass/unsized2.rs b/src/test/run-pass/unsized2.rs index 965ce6bad16..1cce98ae6b7 100644 --- a/src/test/run-pass/unsized2.rs +++ b/src/test/run-pass/unsized2.rs @@ -8,8 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// pretty-expanded FIXME #23616 - #![allow(unknown_features)] #![feature(box_syntax)] diff --git a/src/test/run-pass/utf8-bom.rs b/src/test/run-pass/utf8-bom.rs index baa4e941ff0..c3052a928d6 100644 --- a/src/test/run-pass/utf8-bom.rs +++ b/src/test/run-pass/utf8-bom.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. // -// ignore-lexer-test FIXME #15679 // This file has utf-8 BOM, it should be compiled normally without error. diff --git a/src/test/run-pass/utf8.rs b/src/test/run-pass/utf8.rs index 07fd7b297b4..4782edf4e12 100644 --- a/src/test/run-pass/utf8.rs +++ b/src/test/run-pass/utf8.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. // -// ignore-lexer-test FIXME #15679 // no-pretty-expanded FIXME #15189 pub fn main() { diff --git a/src/test/run-pass/utf8_chars.rs b/src/test/run-pass/utf8_chars.rs index 45a3f2327aa..36b64551ef2 100644 --- a/src/test/run-pass/utf8_chars.rs +++ b/src/test/run-pass/utf8_chars.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. // -// ignore-lexer-test FIXME #15679 #![feature(collections, core, str_char)] diff --git a/src/test/run-pass/utf8_idents.rs b/src/test/run-pass/utf8_idents.rs index b11b7e83eb6..559afcd1641 100644 --- a/src/test/run-pass/utf8_idents.rs +++ b/src/test/run-pass/utf8_idents.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. // -// ignore-lexer-test FIXME #15679 #![feature(non_ascii_idents)]