From be437132b8c53520598131bf542020966099352b Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Wed, 14 Jan 2015 23:51:51 +0100
Subject: [PATCH 1/3] Add proper XID_Start and XID_Continue rules and use
 CharPos for span comparison, closes #15679

---
 src/grammar/RustLexer.g4   |   8 +-
 src/grammar/verify.rs      |  11 +-
 src/grammar/xidcontinue.g4 | 375 +++++++++++++++++++++++++++++++++++++
 src/grammar/xidstart.g4    | 289 ++++++++++++++++++++++++++++
 4 files changed, 676 insertions(+), 7 deletions(-)
 create mode 100644 src/grammar/xidcontinue.g4
 create mode 100644 src/grammar/xidstart.g4

diff --git a/src/grammar/RustLexer.g4 b/src/grammar/RustLexer.g4
index 7d071d5e724..6578f79f92b 100644
--- a/src/grammar/RustLexer.g4
+++ b/src/grammar/RustLexer.g4
@@ -11,11 +11,7 @@ tokens {
     COMMENT
 }
 
-/* Note: due to antlr limitations, we can't represent XID_start and
- * XID_continue properly. ASCII-only substitute. */
-
-fragment XID_start : [_a-zA-Z] ;
-fragment XID_continue : [_a-zA-Z0-9] ;
+import xidstart , xidcontinue;
 
 
 /* Expression-operator symbols */
@@ -197,7 +193,7 @@ LIT_STR_RAW
 
 QUESTION : '?';
 
-IDENT : XID_start XID_continue* ;
+IDENT : XID_Start XID_Continue* ;
 
 fragment QUESTION_IDENTIFIER : QUESTION? IDENT;
 
diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs
index 75b56f54ccc..cf408c91609 100644
--- a/src/grammar/verify.rs
+++ b/src/grammar/verify.rs
@@ -25,6 +25,7 @@ use rustc::session::{self, config};
 
 use syntax::ast;
 use syntax::ast::Name;
+use syntax::codemap::Pos;
 use syntax::parse::token;
 use syntax::parse::lexer::TokenAndSpan;
 
@@ -234,6 +235,13 @@ fn tok_cmp(a: &token::Token, b: &token::Token) -> bool {
     }
 }
 
+fn span_cmp(rust_sp: syntax::codemap::Span, antlr_sp: syntax::codemap::Span, cm: &syntax::codemap::CodeMap) -> bool {
+    println!("{} {}", cm.bytepos_to_file_charpos(rust_sp.lo).to_uint(), cm.bytepos_to_file_charpos(rust_sp.hi).to_uint());
+    antlr_sp.lo.to_uint() == cm.bytepos_to_file_charpos(rust_sp.lo).to_uint() &&
+    antlr_sp.hi.to_uint() == cm.bytepos_to_file_charpos(rust_sp.hi).to_uint() &&
+    antlr_sp.expn_id == rust_sp.expn_id
+}
+
 fn main() {
     fn next(r: &mut lexer::StringReader) -> TokenAndSpan {
         use syntax::parse::lexer::Reader;
@@ -259,6 +267,7 @@ fn main() {
                                            code,
                                            String::from_str("<n/a>"));
     let mut lexer = lexer::StringReader::new(session.diagnostic(), filemap);
+    let ref cm = lexer.span_diagnostic.cm;
 
     for antlr_tok in antlr_tokens {
         let rustc_tok = next(&mut lexer);
@@ -266,7 +275,7 @@ fn main() {
             continue
         }
 
-        assert!(rustc_tok.sp == antlr_tok.sp, "{:?} and {:?} have different spans", rustc_tok,
+        assert!(span_cmp(rustc_tok.sp, antlr_tok.sp, cm), "{:?} and {:?} have different spans", rustc_tok,
                 antlr_tok);
 
         macro_rules! matches {
diff --git a/src/grammar/xidcontinue.g4 b/src/grammar/xidcontinue.g4
new file mode 100644
index 00000000000..6000648f5fb
--- /dev/null
+++ b/src/grammar/xidcontinue.g4
@@ -0,0 +1,375 @@
+lexer grammar Xidcontinue;
+
+fragment XID_Continue:
+      '\u0030' .. '\u0039'
+    | '\u0041' .. '\u005a'
+    | '\u005f'
+    | '\u0061' .. '\u007a'
+    | '\u00aa'
+    | '\u00b5'
+    | '\u00b7'
+    | '\u00ba'
+    | '\u00c0' .. '\u00d6'
+    | '\u00d8' .. '\u00f6'
+    | '\u00f8' .. '\u0236'
+    | '\u0250' .. '\u02c1'
+    | '\u02c6' .. '\u02d1'
+    | '\u02e0' .. '\u02e4'
+    | '\u02ee'
+    | '\u0300' .. '\u0357'
+    | '\u035d' .. '\u036f'
+    | '\u0386'
+    | '\u0388' .. '\u038a'
+    | '\u038c'
+    | '\u038e' .. '\u03a1'
+    | '\u03a3' .. '\u03ce'
+    | '\u03d0' .. '\u03f5'
+    | '\u03f7' .. '\u03fb'
+    | '\u0400' .. '\u0481'
+    | '\u0483' .. '\u0486'
+    | '\u048a' .. '\u04ce'
+    | '\u04d0' .. '\u04f5'
+    | '\u04f8' .. '\u04f9'
+    | '\u0500' .. '\u050f'
+    | '\u0531' .. '\u0556'
+    | '\u0559'
+    | '\u0561' .. '\u0587'
+    | '\u0591' .. '\u05a1'
+    | '\u05a3' .. '\u05b9'
+    | '\u05bb' .. '\u05bd'
+    | '\u05bf'
+    | '\u05c1' .. '\u05c2'
+    | '\u05c4'
+    | '\u05d0' .. '\u05ea'
+    | '\u05f0' .. '\u05f2'
+    | '\u0610' .. '\u0615'
+    | '\u0621' .. '\u063a'
+    | '\u0640' .. '\u0658'
+    | '\u0660' .. '\u0669'
+    | '\u066e' .. '\u06d3'
+    | '\u06d5' .. '\u06dc'
+    | '\u06df' .. '\u06e8'
+    | '\u06ea' .. '\u06fc'
+    | '\u06ff'
+    | '\u0710' .. '\u074a'
+    | '\u074d' .. '\u074f'
+    | '\u0780' .. '\u07b1'
+    | '\u0901' .. '\u0939'
+    | '\u093c' .. '\u094d'
+    | '\u0950' .. '\u0954'
+    | '\u0958' .. '\u0963'
+    | '\u0966' .. '\u096f'
+    | '\u0981' .. '\u0983'
+    | '\u0985' .. '\u098c'
+    | '\u098f' .. '\u0990'
+    | '\u0993' .. '\u09a8'
+    | '\u09aa' .. '\u09b0'
+    | '\u09b2'
+    | '\u09b6' .. '\u09b9'
+    | '\u09bc' .. '\u09c4'
+    | '\u09c7' .. '\u09c8'
+    | '\u09cb' .. '\u09cd'
+    | '\u09d7'
+    | '\u09dc' .. '\u09dd'
+    | '\u09df' .. '\u09e3'
+    | '\u09e6' .. '\u09f1'
+    | '\u0a01' .. '\u0a03'
+    | '\u0a05' .. '\u0a0a'
+    | '\u0a0f' .. '\u0a10'
+    | '\u0a13' .. '\u0a28'
+    | '\u0a2a' .. '\u0a30'
+    | '\u0a32' .. '\u0a33'
+    | '\u0a35' .. '\u0a36'
+    | '\u0a38' .. '\u0a39'
+    | '\u0a3c'
+    | '\u0a3e' .. '\u0a42'
+    | '\u0a47' .. '\u0a48'
+    | '\u0a4b' .. '\u0a4d'
+    | '\u0a59' .. '\u0a5c'
+    | '\u0a5e'
+    | '\u0a66' .. '\u0a74'
+    | '\u0a81' .. '\u0a83'
+    | '\u0a85' .. '\u0a8d'
+    | '\u0a8f' .. '\u0a91'
+    | '\u0a93' .. '\u0aa8'
+    | '\u0aaa' .. '\u0ab0'
+    | '\u0ab2' .. '\u0ab3'
+    | '\u0ab5' .. '\u0ab9'
+    | '\u0abc' .. '\u0ac5'
+    | '\u0ac7' .. '\u0ac9'
+    | '\u0acb' .. '\u0acd'
+    | '\u0ad0'
+    | '\u0ae0' .. '\u0ae3'
+    | '\u0ae6' .. '\u0aef'
+    | '\u0b01' .. '\u0b03'
+    | '\u0b05' .. '\u0b0c'
+    | '\u0b0f' .. '\u0b10'
+    | '\u0b13' .. '\u0b28'
+    | '\u0b2a' .. '\u0b30'
+    | '\u0b32' .. '\u0b33'
+    | '\u0b35' .. '\u0b39'
+    | '\u0b3c' .. '\u0b43'
+    | '\u0b47' .. '\u0b48'
+    | '\u0b4b' .. '\u0b4d'
+    | '\u0b56' .. '\u0b57'
+    | '\u0b5c' .. '\u0b5d'
+    | '\u0b5f' .. '\u0b61'
+    | '\u0b66' .. '\u0b6f'
+    | '\u0b71'
+    | '\u0b82' .. '\u0b83'
+    | '\u0b85' .. '\u0b8a'
+    | '\u0b8e' .. '\u0b90'
+    | '\u0b92' .. '\u0b95'
+    | '\u0b99' .. '\u0b9a'
+    | '\u0b9c'
+    | '\u0b9e' .. '\u0b9f'
+    | '\u0ba3' .. '\u0ba4'
+    | '\u0ba8' .. '\u0baa'
+    | '\u0bae' .. '\u0bb5'
+    | '\u0bb7' .. '\u0bb9'
+    | '\u0bbe' .. '\u0bc2'
+    | '\u0bc6' .. '\u0bc8'
+    | '\u0bca' .. '\u0bcd'
+    | '\u0bd7'
+    | '\u0be7' .. '\u0bef'
+    | '\u0c01' .. '\u0c03'
+    | '\u0c05' .. '\u0c0c'
+    | '\u0c0e' .. '\u0c10'
+    | '\u0c12' .. '\u0c28'
+    | '\u0c2a' .. '\u0c33'
+    | '\u0c35' .. '\u0c39'
+    | '\u0c3e' .. '\u0c44'
+    | '\u0c46' .. '\u0c48'
+    | '\u0c4a' .. '\u0c4d'
+    | '\u0c55' .. '\u0c56'
+    | '\u0c60' .. '\u0c61'
+    | '\u0c66' .. '\u0c6f'
+    | '\u0c82' .. '\u0c83'
+    | '\u0c85' .. '\u0c8c'
+    | '\u0c8e' .. '\u0c90'
+    | '\u0c92' .. '\u0ca8'
+    | '\u0caa' .. '\u0cb3'
+    | '\u0cb5' .. '\u0cb9'
+    | '\u0cbc' .. '\u0cc4'
+    | '\u0cc6' .. '\u0cc8'
+    | '\u0cca' .. '\u0ccd'
+    | '\u0cd5' .. '\u0cd6'
+    | '\u0cde'
+    | '\u0ce0' .. '\u0ce1'
+    | '\u0ce6' .. '\u0cef'
+    | '\u0d02' .. '\u0d03'
+    | '\u0d05' .. '\u0d0c'
+    | '\u0d0e' .. '\u0d10'
+    | '\u0d12' .. '\u0d28'
+    | '\u0d2a' .. '\u0d39'
+    | '\u0d3e' .. '\u0d43'
+    | '\u0d46' .. '\u0d48'
+    | '\u0d4a' .. '\u0d4d'
+    | '\u0d57'
+    | '\u0d60' .. '\u0d61'
+    | '\u0d66' .. '\u0d6f'
+    | '\u0d82' .. '\u0d83'
+    | '\u0d85' .. '\u0d96'
+    | '\u0d9a' .. '\u0db1'
+    | '\u0db3' .. '\u0dbb'
+    | '\u0dbd'
+    | '\u0dc0' .. '\u0dc6'
+    | '\u0dca'
+    | '\u0dcf' .. '\u0dd4'
+    | '\u0dd6'
+    | '\u0dd8' .. '\u0ddf'
+    | '\u0df2' .. '\u0df3'
+    | '\u0e01' .. '\u0e3a'
+    | '\u0e40' .. '\u0e4e'
+    | '\u0e50' .. '\u0e59'
+    | '\u0e81' .. '\u0e82'
+    | '\u0e84'
+    | '\u0e87' .. '\u0e88'
+    | '\u0e8a'
+    | '\u0e8d'
+    | '\u0e94' .. '\u0e97'
+    | '\u0e99' .. '\u0e9f'
+    | '\u0ea1' .. '\u0ea3'
+    | '\u0ea5'
+    | '\u0ea7'
+    | '\u0eaa' .. '\u0eab'
+    | '\u0ead' .. '\u0eb9'
+    | '\u0ebb' .. '\u0ebd'
+    | '\u0ec0' .. '\u0ec4'
+    | '\u0ec6'
+    | '\u0ec8' .. '\u0ecd'
+    | '\u0ed0' .. '\u0ed9'
+    | '\u0edc' .. '\u0edd'
+    | '\u0f00'
+    | '\u0f18' .. '\u0f19'
+    | '\u0f20' .. '\u0f29'
+    | '\u0f35'
+    | '\u0f37'
+    | '\u0f39'
+    | '\u0f3e' .. '\u0f47'
+    | '\u0f49' .. '\u0f6a'
+    | '\u0f71' .. '\u0f84'
+    | '\u0f86' .. '\u0f8b'
+    | '\u0f90' .. '\u0f97'
+    | '\u0f99' .. '\u0fbc'
+    | '\u0fc6'
+    | '\u1000' .. '\u1021'
+    | '\u1023' .. '\u1027'
+    | '\u1029' .. '\u102a'
+    | '\u102c' .. '\u1032'
+    | '\u1036' .. '\u1039'
+    | '\u1040' .. '\u1049'
+    | '\u1050' .. '\u1059'
+    | '\u10a0' .. '\u10c5'
+    | '\u10d0' .. '\u10f8'
+    | '\u1100' .. '\u1159'
+    | '\u115f' .. '\u11a2'
+    | '\u11a8' .. '\u11f9'
+    | '\u1200' .. '\u1206'
+    | '\u1208' .. '\u1246'
+    | '\u1248'
+    | '\u124a' .. '\u124d'
+    | '\u1250' .. '\u1256'
+    | '\u1258'
+    | '\u125a' .. '\u125d'
+    | '\u1260' .. '\u1286'
+    | '\u1288'
+    | '\u128a' .. '\u128d'
+    | '\u1290' .. '\u12ae'
+    | '\u12b0'
+    | '\u12b2' .. '\u12b5'
+    | '\u12b8' .. '\u12be'
+    | '\u12c0'
+    | '\u12c2' .. '\u12c5'
+    | '\u12c8' .. '\u12ce'
+    | '\u12d0' .. '\u12d6'
+    | '\u12d8' .. '\u12ee'
+    | '\u12f0' .. '\u130e'
+    | '\u1310'
+    | '\u1312' .. '\u1315'
+    | '\u1318' .. '\u131e'
+    | '\u1320' .. '\u1346'
+    | '\u1348' .. '\u135a'
+    | '\u1369' .. '\u1371'
+    | '\u13a0' .. '\u13f4'
+    | '\u1401' .. '\u166c'
+    | '\u166f' .. '\u1676'
+    | '\u1681' .. '\u169a'
+    | '\u16a0' .. '\u16ea'
+    | '\u16ee' .. '\u16f0'
+    | '\u1700' .. '\u170c'
+    | '\u170e' .. '\u1714'
+    | '\u1720' .. '\u1734'
+    | '\u1740' .. '\u1753'
+    | '\u1760' .. '\u176c'
+    | '\u176e' .. '\u1770'
+    | '\u1772' .. '\u1773'
+    | '\u1780' .. '\u17b3'
+    | '\u17b6' .. '\u17d3'
+    | '\u17d7'
+    | '\u17dc' .. '\u17dd'
+    | '\u17e0' .. '\u17e9'
+    | '\u180b' .. '\u180d'
+    | '\u1810' .. '\u1819'
+    | '\u1820' .. '\u1877'
+    | '\u1880' .. '\u18a9'
+    | '\u1900' .. '\u191c'
+    | '\u1920' .. '\u192b'
+    | '\u1930' .. '\u193b'
+    | '\u1946' .. '\u196d'
+    | '\u1970' .. '\u1974'
+    | '\u1d00' .. '\u1d6b'
+    | '\u1e00' .. '\u1e9b'
+    | '\u1ea0' .. '\u1ef9'
+    | '\u1f00' .. '\u1f15'
+    | '\u1f18' .. '\u1f1d'
+    | '\u1f20' .. '\u1f45'
+    | '\u1f48' .. '\u1f4d'
+    | '\u1f50' .. '\u1f57'
+    | '\u1f59'
+    | '\u1f5b'
+    | '\u1f5d'
+    | '\u1f5f' .. '\u1f7d'
+    | '\u1f80' .. '\u1fb4'
+    | '\u1fb6' .. '\u1fbc'
+    | '\u1fbe'
+    | '\u1fc2' .. '\u1fc4'
+    | '\u1fc6' .. '\u1fcc'
+    | '\u1fd0' .. '\u1fd3'
+    | '\u1fd6' .. '\u1fdb'
+    | '\u1fe0' .. '\u1fec'
+    | '\u1ff2' .. '\u1ff4'
+    | '\u1ff6' .. '\u1ffc'
+    | '\u203f' .. '\u2040'
+    | '\u2054'
+    | '\u2071'
+    | '\u207f'
+    | '\u20d0' .. '\u20dc'
+    | '\u20e1'
+    | '\u20e5' .. '\u20ea'
+    | '\u2102'
+    | '\u2107'
+    | '\u210a' .. '\u2113'
+    | '\u2115'
+    | '\u2118' .. '\u211d'
+    | '\u2124'
+    | '\u2126'
+    | '\u2128'
+    | '\u212a' .. '\u2131'
+    | '\u2133' .. '\u2139'
+    | '\u213d' .. '\u213f'
+    | '\u2145' .. '\u2149'
+    | '\u2160' .. '\u2183'
+    | '\u3005' .. '\u3007'
+    | '\u3021' .. '\u302f'
+    | '\u3031' .. '\u3035'
+    | '\u3038' .. '\u303c'
+    | '\u3041' .. '\u3096'
+    | '\u3099' .. '\u309a'
+    | '\u309d' .. '\u309f'
+    | '\u30a1' .. '\u30ff'
+    | '\u3105' .. '\u312c'
+    | '\u3131' .. '\u318e'
+    | '\u31a0' .. '\u31b7'
+    | '\u31f0' .. '\u31ff'
+    | '\u3400' .. '\u4db5'
+    | '\u4e00' .. '\u9fa5'
+    | '\ua000' .. '\ua48c'
+    | '\uac00' .. '\ud7a3'
+    | '\uf900' .. '\ufa2d'
+    | '\ufa30' .. '\ufa6a'
+    | '\ufb00' .. '\ufb06'
+    | '\ufb13' .. '\ufb17'
+    | '\ufb1d' .. '\ufb28'
+    | '\ufb2a' .. '\ufb36'
+    | '\ufb38' .. '\ufb3c'
+    | '\ufb3e'
+    | '\ufb40' .. '\ufb41'
+    | '\ufb43' .. '\ufb44'
+    | '\ufb46' .. '\ufbb1'
+    | '\ufbd3' .. '\ufc5d'
+    | '\ufc64' .. '\ufd3d'
+    | '\ufd50' .. '\ufd8f'
+    | '\ufd92' .. '\ufdc7'
+    | '\ufdf0' .. '\ufdf9'
+    | '\ufe00' .. '\ufe0f'
+    | '\ufe20' .. '\ufe23'
+    | '\ufe33' .. '\ufe34'
+    | '\ufe4d' .. '\ufe4f'
+    | '\ufe71'
+    | '\ufe73'
+    | '\ufe77'
+    | '\ufe79'
+    | '\ufe7b'
+    | '\ufe7d'
+    | '\ufe7f' .. '\ufefc'
+    | '\uff10' .. '\uff19'
+    | '\uff21' .. '\uff3a'
+    | '\uff3f'
+    | '\uff41' .. '\uff5a'
+    | '\uff65' .. '\uffbe'
+    | '\uffc2' .. '\uffc7'
+    | '\uffca' .. '\uffcf'
+    | '\uffd2' .. '\uffd7'
+    | '\uffda' .. '\uffdc'
+    ;
diff --git a/src/grammar/xidstart.g4 b/src/grammar/xidstart.g4
new file mode 100644
index 00000000000..d02774c6135
--- /dev/null
+++ b/src/grammar/xidstart.g4
@@ -0,0 +1,289 @@
+lexer grammar Xidstart;
+
+fragment XID_Start :
+      '\u0041' .. '\u005a'
+    | '_'
+    | '\u0061' .. '\u007a'
+    | '\u00aa'
+    | '\u00b5'
+    | '\u00ba'
+    | '\u00c0' .. '\u00d6'
+    | '\u00d8' .. '\u00f6'
+    | '\u00f8' .. '\u0236'
+    | '\u0250' .. '\u02c1'
+    | '\u02c6' .. '\u02d1'
+    | '\u02e0' .. '\u02e4'
+    | '\u02ee'
+    | '\u0386'
+    | '\u0388' .. '\u038a'
+    | '\u038c'
+    | '\u038e' .. '\u03a1'
+    | '\u03a3' .. '\u03ce'
+    | '\u03d0' .. '\u03f5'
+    | '\u03f7' .. '\u03fb'
+    | '\u0400' .. '\u0481'
+    | '\u048a' .. '\u04ce'
+    | '\u04d0' .. '\u04f5'
+    | '\u04f8' .. '\u04f9'
+    | '\u0500' .. '\u050f'
+    | '\u0531' .. '\u0556'
+    | '\u0559'
+    | '\u0561' .. '\u0587'
+    | '\u05d0' .. '\u05ea'
+    | '\u05f0' .. '\u05f2'
+    | '\u0621' .. '\u063a'
+    | '\u0640' .. '\u064a'
+    | '\u066e' .. '\u066f'
+    | '\u0671' .. '\u06d3'
+    | '\u06d5'
+    | '\u06e5' .. '\u06e6'
+    | '\u06ee' .. '\u06ef'
+    | '\u06fa' .. '\u06fc'
+    | '\u06ff'
+    | '\u0710'
+    | '\u0712' .. '\u072f'
+    | '\u074d' .. '\u074f'
+    | '\u0780' .. '\u07a5'
+    | '\u07b1'
+    | '\u0904' .. '\u0939'
+    | '\u093d'
+    | '\u0950'
+    | '\u0958' .. '\u0961'
+    | '\u0985' .. '\u098c'
+    | '\u098f' .. '\u0990'
+    | '\u0993' .. '\u09a8'
+    | '\u09aa' .. '\u09b0'
+    | '\u09b2'
+    | '\u09b6' .. '\u09b9'
+    | '\u09bd'
+    | '\u09dc' .. '\u09dd'
+    | '\u09df' .. '\u09e1'
+    | '\u09f0' .. '\u09f1'
+    | '\u0a05' .. '\u0a0a'
+    | '\u0a0f' .. '\u0a10'
+    | '\u0a13' .. '\u0a28'
+    | '\u0a2a' .. '\u0a30'
+    | '\u0a32' .. '\u0a33'
+    | '\u0a35' .. '\u0a36'
+    | '\u0a38' .. '\u0a39'
+    | '\u0a59' .. '\u0a5c'
+    | '\u0a5e'
+    | '\u0a72' .. '\u0a74'
+    | '\u0a85' .. '\u0a8d'
+    | '\u0a8f' .. '\u0a91'
+    | '\u0a93' .. '\u0aa8'
+    | '\u0aaa' .. '\u0ab0'
+    | '\u0ab2' .. '\u0ab3'
+    | '\u0ab5' .. '\u0ab9'
+    | '\u0abd'
+    | '\u0ad0'
+    | '\u0ae0' .. '\u0ae1'
+    | '\u0b05' .. '\u0b0c'
+    | '\u0b0f' .. '\u0b10'
+    | '\u0b13' .. '\u0b28'
+    | '\u0b2a' .. '\u0b30'
+    | '\u0b32' .. '\u0b33'
+    | '\u0b35' .. '\u0b39'
+    | '\u0b3d'
+    | '\u0b5c' .. '\u0b5d'
+    | '\u0b5f' .. '\u0b61'
+    | '\u0b71'
+    | '\u0b83'
+    | '\u0b85' .. '\u0b8a'
+    | '\u0b8e' .. '\u0b90'
+    | '\u0b92' .. '\u0b95'
+    | '\u0b99' .. '\u0b9a'
+    | '\u0b9c'
+    | '\u0b9e' .. '\u0b9f'
+    | '\u0ba3' .. '\u0ba4'
+    | '\u0ba8' .. '\u0baa'
+    | '\u0bae' .. '\u0bb5'
+    | '\u0bb7' .. '\u0bb9'
+    | '\u0c05' .. '\u0c0c'
+    | '\u0c0e' .. '\u0c10'
+    | '\u0c12' .. '\u0c28'
+    | '\u0c2a' .. '\u0c33'
+    | '\u0c35' .. '\u0c39'
+    | '\u0c60' .. '\u0c61'
+    | '\u0c85' .. '\u0c8c'
+    | '\u0c8e' .. '\u0c90'
+    | '\u0c92' .. '\u0ca8'
+    | '\u0caa' .. '\u0cb3'
+    | '\u0cb5' .. '\u0cb9'
+    | '\u0cbd'
+    | '\u0cde'
+    | '\u0ce0' .. '\u0ce1'
+    | '\u0d05' .. '\u0d0c'
+    | '\u0d0e' .. '\u0d10'
+    | '\u0d12' .. '\u0d28'
+    | '\u0d2a' .. '\u0d39'
+    | '\u0d60' .. '\u0d61'
+    | '\u0d85' .. '\u0d96'
+    | '\u0d9a' .. '\u0db1'
+    | '\u0db3' .. '\u0dbb'
+    | '\u0dbd'
+    | '\u0dc0' .. '\u0dc6'
+    | '\u0e01' .. '\u0e30'
+    | '\u0e32'
+    | '\u0e40' .. '\u0e46'
+    | '\u0e81' .. '\u0e82'
+    | '\u0e84'
+    | '\u0e87' .. '\u0e88'
+    | '\u0e8a'
+    | '\u0e8d'
+    | '\u0e94' .. '\u0e97'
+    | '\u0e99' .. '\u0e9f'
+    | '\u0ea1' .. '\u0ea3'
+    | '\u0ea5'
+    | '\u0ea7'
+    | '\u0eaa' .. '\u0eab'
+    | '\u0ead' .. '\u0eb0'
+    | '\u0eb2'
+    | '\u0ebd'
+    | '\u0ec0' .. '\u0ec4'
+    | '\u0ec6'
+    | '\u0edc' .. '\u0edd'
+    | '\u0f00'
+    | '\u0f40' .. '\u0f47'
+    | '\u0f49' .. '\u0f6a'
+    | '\u0f88' .. '\u0f8b'
+    | '\u1000' .. '\u1021'
+    | '\u1023' .. '\u1027'
+    | '\u1029' .. '\u102a'
+    | '\u1050' .. '\u1055'
+    | '\u10a0' .. '\u10c5'
+    | '\u10d0' .. '\u10f8'
+    | '\u1100' .. '\u1159'
+    | '\u115f' .. '\u11a2'
+    | '\u11a8' .. '\u11f9'
+    | '\u1200' .. '\u1206'
+    | '\u1208' .. '\u1246'
+    | '\u1248'
+    | '\u124a' .. '\u124d'
+    | '\u1250' .. '\u1256'
+    | '\u1258'
+    | '\u125a' .. '\u125d'
+    | '\u1260' .. '\u1286'
+    | '\u1288'
+    | '\u128a' .. '\u128d'
+    | '\u1290' .. '\u12ae'
+    | '\u12b0'
+    | '\u12b2' .. '\u12b5'
+    | '\u12b8' .. '\u12be'
+    | '\u12c0'
+    | '\u12c2' .. '\u12c5'
+    | '\u12c8' .. '\u12ce'
+    | '\u12d0' .. '\u12d6'
+    | '\u12d8' .. '\u12ee'
+    | '\u12f0' .. '\u130e'
+    | '\u1310'
+    | '\u1312' .. '\u1315'
+    | '\u1318' .. '\u131e'
+    | '\u1320' .. '\u1346'
+    | '\u1348' .. '\u135a'
+    | '\u13a0' .. '\u13f4'
+    | '\u1401' .. '\u166c'
+    | '\u166f' .. '\u1676'
+    | '\u1681' .. '\u169a'
+    | '\u16a0' .. '\u16ea'
+    | '\u16ee' .. '\u16f0'
+    | '\u1700' .. '\u170c'
+    | '\u170e' .. '\u1711'
+    | '\u1720' .. '\u1731'
+    | '\u1740' .. '\u1751'
+    | '\u1760' .. '\u176c'
+    | '\u176e' .. '\u1770'
+    | '\u1780' .. '\u17b3'
+    | '\u17d7'
+    | '\u17dc'
+    | '\u1820' .. '\u1877'
+    | '\u1880' .. '\u18a8'
+    | '\u1900' .. '\u191c'
+    | '\u1950' .. '\u196d'
+    | '\u1970' .. '\u1974'
+    | '\u1d00' .. '\u1d6b'
+    | '\u1e00' .. '\u1e9b'
+    | '\u1ea0' .. '\u1ef9'
+    | '\u1f00' .. '\u1f15'
+    | '\u1f18' .. '\u1f1d'
+    | '\u1f20' .. '\u1f45'
+    | '\u1f48' .. '\u1f4d'
+    | '\u1f50' .. '\u1f57'
+    | '\u1f59'
+    | '\u1f5b'
+    | '\u1f5d'
+    | '\u1f5f' .. '\u1f7d'
+    | '\u1f80' .. '\u1fb4'
+    | '\u1fb6' .. '\u1fbc'
+    | '\u1fbe'
+    | '\u1fc2' .. '\u1fc4'
+    | '\u1fc6' .. '\u1fcc'
+    | '\u1fd0' .. '\u1fd3'
+    | '\u1fd6' .. '\u1fdb'
+    | '\u1fe0' .. '\u1fec'
+    | '\u1ff2' .. '\u1ff4'
+    | '\u1ff6' .. '\u1ffc'
+    | '\u2071'
+    | '\u207f'
+    | '\u2102'
+    | '\u2107'
+    | '\u210a' .. '\u2113'
+    | '\u2115'
+    | '\u2118' .. '\u211d'
+    | '\u2124'
+    | '\u2126'
+    | '\u2128'
+    | '\u212a' .. '\u2131'
+    | '\u2133' .. '\u2139'
+    | '\u213d' .. '\u213f'
+    | '\u2145' .. '\u2149'
+    | '\u2160' .. '\u2183'
+    | '\u3005' .. '\u3007'
+    | '\u3021' .. '\u3029'
+    | '\u3031' .. '\u3035'
+    | '\u3038' .. '\u303c'
+    | '\u3041' .. '\u3096'
+    | '\u309d' .. '\u309f'
+    | '\u30a1' .. '\u30fa'
+    | '\u30fc' .. '\u30ff'
+    | '\u3105' .. '\u312c'
+    | '\u3131' .. '\u318e'
+    | '\u31a0' .. '\u31b7'
+    | '\u31f0' .. '\u31ff'
+    | '\u3400' .. '\u4db5'
+    | '\u4e00' .. '\u9fa5'
+    | '\ua000' .. '\ua48c'
+    | '\uac00' .. '\ud7a3'
+    | '\uf900' .. '\ufa2d'
+    | '\ufa30' .. '\ufa6a'
+    | '\ufb00' .. '\ufb06'
+    | '\ufb13' .. '\ufb17'
+    | '\ufb1d'
+    | '\ufb1f' .. '\ufb28'
+    | '\ufb2a' .. '\ufb36'
+    | '\ufb38' .. '\ufb3c'
+    | '\ufb3e'
+    | '\ufb40' .. '\ufb41'
+    | '\ufb43' .. '\ufb44'
+    | '\ufb46' .. '\ufbb1'
+    | '\ufbd3' .. '\ufc5d'
+    | '\ufc64' .. '\ufd3d'
+    | '\ufd50' .. '\ufd8f'
+    | '\ufd92' .. '\ufdc7'
+    | '\ufdf0' .. '\ufdf9'
+    | '\ufe71'
+    | '\ufe73'
+    | '\ufe77'
+    | '\ufe79'
+    | '\ufe7b'
+    | '\ufe7d'
+    | '\ufe7f' .. '\ufefc'
+    | '\uff21' .. '\uff3a'
+    | '\uff41' .. '\uff5a'
+    | '\uff66' .. '\uff9d'
+    | '\uffa0' .. '\uffbe'
+    | '\uffc2' .. '\uffc7'
+    | '\uffca' .. '\uffcf'
+    | '\uffd2' .. '\uffd7'
+    | '\uffda' .. '\uffdc'
+    ;

From e5e343aeb78a8fe7fed897eae1e79019383691e8 Mon Sep 17 00:00:00 2001
From: Piotr Czarnecki <pioczarn@gmail.com>
Date: Sat, 17 Jan 2015 21:59:44 +0100
Subject: [PATCH 2/3] Finished unicode support in the model lexer.

Completed XID_Start and XID_Continue rules
---
 src/grammar/RustLexer.g4   |  2 +-
 src/grammar/verify.rs      | 59 ++++++++++++++++-------
 src/grammar/xidcontinue.g4 | 98 ++++++++++++++++++++++++++++++++++++++
 src/grammar/xidstart.g4    | 90 ++++++++++++++++++++++++++++++++++
 4 files changed, 230 insertions(+), 19 deletions(-)

diff --git a/src/grammar/RustLexer.g4 b/src/grammar/RustLexer.g4
index 6578f79f92b..8739d135b4f 100644
--- a/src/grammar/RustLexer.g4
+++ b/src/grammar/RustLexer.g4
@@ -93,7 +93,7 @@ fragment SUFFIX
   ;
 
 LIT_CHAR
-  : '\'' ( '\\' CHAR_ESCAPE | ~[\\'\n\t\r] ) '\'' SUFFIX?
+  : '\'' ( '\\' CHAR_ESCAPE | ~[\\'\n\t\r] | '\ud800' .. '\udbff' '\udc00' .. '\udfff' ) '\'' SUFFIX?
   ;
 
 LIT_BYTE
diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs
index cf408c91609..8bf501c7f3f 100644
--- a/src/grammar/verify.rs
+++ b/src/grammar/verify.rs
@@ -10,6 +10,8 @@
 
 #![feature(plugin)]
 
+#![allow(unstable)]
+
 extern crate syntax;
 extern crate rustc;
 
@@ -164,7 +166,8 @@ fn count(lit: &str) -> usize {
     lit.chars().take_while(|c| *c == '#').count()
 }
 
-fn parse_antlr_token(s: &str, tokens: &HashMap<String, token::Token>) -> TokenAndSpan {
+fn parse_antlr_token(s: &str, tokens: &HashMap<String, token::Token>, surrogate_pairs_pos: &[usize])
+                     -> TokenAndSpan {
     // old regex:
     // \[@(?P<seq>\d+),(?P<start>\d+):(?P<end>\d+)='(?P<content>.+?)',<(?P<toknum>-?\d+)>,\d+:\d+]
     let start = s.find_str("[@").unwrap();
@@ -213,9 +216,16 @@ fn parse_antlr_token(s: &str, tokens: &HashMap<String, token::Token>) -> TokenAn
         0
     };
 
+    let mut lo = start.parse::<u32>().unwrap() - offset;
+    let mut hi = end.parse::<u32>().unwrap() + 1;
+
+    // Adjust the span: For each surrogate pair already encountered, subtract one position.
+    lo -= surrogate_pairs_pos.binary_search(&(lo as usize)).unwrap_or_else(|x| x) as u32;
+    hi -= surrogate_pairs_pos.binary_search(&(hi as usize)).unwrap_or_else(|x| x) as u32;
+
     let sp = syntax::codemap::Span {
-        lo: syntax::codemap::BytePos(start.parse::<u32>().unwrap() - offset),
-        hi: syntax::codemap::BytePos(end.parse::<u32>().unwrap() + 1),
+        lo: syntax::codemap::BytePos(lo),
+        hi: syntax::codemap::BytePos(hi),
         expn_id: syntax::codemap::NO_EXPANSION
     };
 
@@ -235,11 +245,10 @@ fn tok_cmp(a: &token::Token, b: &token::Token) -> bool {
     }
 }
 
-fn span_cmp(rust_sp: syntax::codemap::Span, antlr_sp: syntax::codemap::Span, cm: &syntax::codemap::CodeMap) -> bool {
-    println!("{} {}", cm.bytepos_to_file_charpos(rust_sp.lo).to_uint(), cm.bytepos_to_file_charpos(rust_sp.hi).to_uint());
-    antlr_sp.lo.to_uint() == cm.bytepos_to_file_charpos(rust_sp.lo).to_uint() &&
-    antlr_sp.hi.to_uint() == cm.bytepos_to_file_charpos(rust_sp.hi).to_uint() &&
-    antlr_sp.expn_id == rust_sp.expn_id
+fn span_cmp(antlr_sp: syntax::codemap::Span, rust_sp: syntax::codemap::Span, cm: &syntax::codemap::CodeMap) -> bool {
+    antlr_sp.expn_id == rust_sp.expn_id &&
+        antlr_sp.lo.to_uint() == cm.bytepos_to_file_charpos(rust_sp.lo).to_uint() &&
+        antlr_sp.hi.to_uint() == cm.bytepos_to_file_charpos(rust_sp.hi).to_uint()
 }
 
 fn main() {
@@ -250,16 +259,18 @@ fn main() {
 
     let args = std::os::args();
 
-    let mut token_file = File::open(&Path::new(args[2]));
-    let token_map = parse_token_list(token_file.read_to_string().unwrap());
-
-    let mut stdin = std::io::stdin();
-    let mut lock = stdin.lock();
-    let lines = lock.lines();
-    let mut antlr_tokens = lines.map(|l| parse_antlr_token(l.unwrap().trim(),
-                                                                   &token_map));
-
+    // Rust's lexer
     let code = File::open(&Path::new(args[1])).unwrap().read_to_string().unwrap();
+
+    let surrogate_pairs_pos: Vec<usize> = code.chars().enumerate()
+                                                     .filter(|&(_, c)| c as usize > 0xFFFF)
+                                                     .map(|(n, _)| n)
+                                                     .enumerate()
+                                                     .map(|(x, n)| x + n)
+                                                     .collect();
+
+    debug!("Pairs: {:?}", surrogate_pairs_pos);
+
     let options = config::basic_options();
     let session = session::build_session(options, None,
                                          syntax::diagnostics::registry::Registry::new(&[]));
@@ -269,13 +280,25 @@ fn main() {
     let mut lexer = lexer::StringReader::new(session.diagnostic(), filemap);
     let ref cm = lexer.span_diagnostic.cm;
 
+    // ANTLR
+    let mut token_file = File::open(&Path::new(args[2]));
+    let token_map = parse_token_list(token_file.read_to_string().unwrap());
+
+    let mut stdin = std::io::stdin();
+    let mut lock = stdin.lock();
+    let lines = lock.lines();
+    let mut antlr_tokens = lines.map(|l| parse_antlr_token(l.unwrap().trim(),
+                                                           &token_map,
+                                                           &surrogate_pairs_pos[]));
+
     for antlr_tok in antlr_tokens {
         let rustc_tok = next(&mut lexer);
         if rustc_tok.tok == token::Eof && antlr_tok.tok == token::Eof {
             continue
         }
 
-        assert!(span_cmp(rustc_tok.sp, antlr_tok.sp, cm), "{:?} and {:?} have different spans", rustc_tok,
+        assert!(span_cmp(antlr_tok.sp, rustc_tok.sp, cm), "{:?} and {:?} have different spans",
+                rustc_tok,
                 antlr_tok);
 
         macro_rules! matches {
diff --git a/src/grammar/xidcontinue.g4 b/src/grammar/xidcontinue.g4
index 6000648f5fb..f3a1a3b40f9 100644
--- a/src/grammar/xidcontinue.g4
+++ b/src/grammar/xidcontinue.g4
@@ -372,4 +372,102 @@ fragment XID_Continue:
     | '\uffca' .. '\uffcf'
     | '\uffd2' .. '\uffd7'
     | '\uffda' .. '\uffdc'
+    | '\ud800' '\udc00' .. '\udc0a'
+    | '\ud800' '\udc0d' .. '\udc25'
+    | '\ud800' '\udc28' .. '\udc39'
+    | '\ud800' '\udc3c' .. '\udc3c'
+    | '\ud800' '\udc3f' .. '\udc4c'
+    | '\ud800' '\udc50' .. '\udc5c'
+    | '\ud800' '\udc80' .. '\udcf9'
+    | '\ud800' '\udf00' .. '\udf1d'
+    | '\ud800' '\udf30' .. '\udf49'
+    | '\ud800' '\udf80' .. '\udf9c'
+    | '\ud801' '\ue000' .. '\ue09c'
+    | '\ud801' '\ue0a0' .. '\ue0a8'
+    | '\ud802' '\ue400' .. '\ue404'
+    | '\ud802' '\u0808'
+    | '\ud802' '\ue40a' .. '\ue434'
+    | '\ud802' '\ue437' .. '\ue437'
+    | '\ud802' '\u083c'
+    | '\ud802' '\u083f'
+    | '\ud834' '\uad65' .. '\uad68'
+    | '\ud834' '\uad6d' .. '\uad71'
+    | '\ud834' '\uad7b' .. '\uad81'
+    | '\ud834' '\uad85' .. '\uad8a'
+    | '\ud834' '\uadaa' .. '\uadac'
+    | '\ud835' '\ub000' .. '\ub053'
+    | '\ud835' '\ub056' .. '\ub09b'
+    | '\ud835' '\ub09e' .. '\ub09e'
+    | '\ud835' '\ud4a2'
+    | '\ud835' '\ub0a5' .. '\ub0a5'
+    | '\ud835' '\ub0a9' .. '\ub0ab'
+    | '\ud835' '\ub0ae' .. '\ub0b8'
+    | '\ud835' '\ud4bb'
+    | '\ud835' '\ub0bd' .. '\ub0c2'
+    | '\ud835' '\ub0c5' .. '\ub104'
+    | '\ud835' '\ub107' .. '\ub109'
+    | '\ud835' '\ub10d' .. '\ub113'
+    | '\ud835' '\ub116' .. '\ub11b'
+    | '\ud835' '\ub11e' .. '\ub138'
+    | '\ud835' '\ub13b' .. '\ub13d'
+    | '\ud835' '\ub140' .. '\ub143'
+    | '\ud835' '\ud546'
+    | '\ud835' '\ub14a' .. '\ub14f'
+    | '\ud835' '\ub152' .. '\ub2a2'
+    | '\ud835' '\ub2a8' .. '\ub2bf'
+    | '\ud835' '\ub2c2' .. '\ub2d9'
+    | '\ud835' '\ub2dc' .. '\ub2f9'
+    | '\ud835' '\ub2fc' .. '\ub313'
+    | '\ud835' '\ub316' .. '\ub333'
+    | '\ud835' '\ub336' .. '\ub34d'
+    | '\ud835' '\ub350' .. '\ub36d'
+    | '\ud835' '\ub370' .. '\ub387'
+    | '\ud835' '\ub38a' .. '\ub3a7'
+    | '\ud835' '\ub3aa' .. '\ub3c1'
+    | '\ud835' '\ub3c4' .. '\ub3c8'
+    | '\ud835' '\ub3ce' .. '\ub3fe'
+    | '\ud840' '\udc00' .. '\udffe'
+    | '\ud841' '\ue000' .. '\ue3fe'
+    | '\ud842' '\ue400' .. '\ue7fe'
+    | '\ud843' '\ue800' .. '\uebfe'
+    | '\ud844' '\uec00' .. '\ueffe'
+    | '\ud845' '\uf000' .. '\uf3fe'
+    | '\ud846' '\uf400' .. '\uf7fe'
+    | '\ud847' '\uf800' .. '\ufbfe'
+    | '\ud848' '\ufc00' .. '\ufffe'
+    | '\ud849' '\u0000' .. '\u03fe'
+    | '\ud84a' '\u0400' .. '\u07fe'
+    | '\ud84b' '\u0800' .. '\u0bfe'
+    | '\ud84c' '\u0c00' .. '\u0ffe'
+    | '\ud84d' '\u1000' .. '\u13fe'
+    | '\ud84e' '\u1400' .. '\u17fe'
+    | '\ud84f' '\u1800' .. '\u1bfe'
+    | '\ud850' '\u1c00' .. '\u1ffe'
+    | '\ud851' '\u2000' .. '\u23fe'
+    | '\ud852' '\u2400' .. '\u27fe'
+    | '\ud853' '\u2800' .. '\u2bfe'
+    | '\ud854' '\u2c00' .. '\u2ffe'
+    | '\ud855' '\u3000' .. '\u33fe'
+    | '\ud856' '\u3400' .. '\u37fe'
+    | '\ud857' '\u3800' .. '\u3bfe'
+    | '\ud858' '\u3c00' .. '\u3ffe'
+    | '\ud859' '\u4000' .. '\u43fe'
+    | '\ud85a' '\u4400' .. '\u47fe'
+    | '\ud85b' '\u4800' .. '\u4bfe'
+    | '\ud85c' '\u4c00' .. '\u4ffe'
+    | '\ud85d' '\u5000' .. '\u53fe'
+    | '\ud85e' '\u5400' .. '\u57fe'
+    | '\ud85f' '\u5800' .. '\u5bfe'
+    | '\ud860' '\u5c00' .. '\u5ffe'
+    | '\ud861' '\u6000' .. '\u63fe'
+    | '\ud862' '\u6400' .. '\u67fe'
+    | '\ud863' '\u6800' .. '\u6bfe'
+    | '\ud864' '\u6c00' .. '\u6ffe'
+    | '\ud865' '\u7000' .. '\u73fe'
+    | '\ud866' '\u7400' .. '\u77fe'
+    | '\ud867' '\u7800' .. '\u7bfe'
+    | '\ud868' '\u7c00' .. '\u7ffe'
+    | '\ud869' '\u8000' .. '\u82d5'
+    | '\ud87e' '\ud400' .. '\ud61c'
+    | '\udb40' '\udd00' .. '\uddee'
     ;
diff --git a/src/grammar/xidstart.g4 b/src/grammar/xidstart.g4
index d02774c6135..53fb50f4584 100644
--- a/src/grammar/xidstart.g4
+++ b/src/grammar/xidstart.g4
@@ -286,4 +286,94 @@ fragment XID_Start :
     | '\uffca' .. '\uffcf'
     | '\uffd2' .. '\uffd7'
     | '\uffda' .. '\uffdc'
+    | '\ud800' '\udc00' .. '\udc0a'
+    | '\ud800' '\udc0d' .. '\udc25'
+    | '\ud800' '\udc28' .. '\udc39'
+    | '\ud800' '\udc3c' .. '\udc3c'
+    | '\ud800' '\udc3f' .. '\udc4c'
+    | '\ud800' '\udc50' .. '\udc5c'
+    | '\ud800' '\udc80' .. '\udcf9'
+    | '\ud800' '\udf00' .. '\udf1d'
+    | '\ud800' '\udf30' .. '\udf49'
+    | '\ud800' '\udf80' .. '\udf9c'
+    | '\ud801' '\ue000' .. '\ue09c'
+    | '\ud802' '\ue400' .. '\ue404'
+    | '\ud802' '\u0808'
+    | '\ud802' '\ue40a' .. '\ue434'
+    | '\ud802' '\ue437' .. '\ue437'
+    | '\ud802' '\u083c'
+    | '\ud802' '\u083f'
+    | '\ud835' '\ub000' .. '\ub053'
+    | '\ud835' '\ub056' .. '\ub09b'
+    | '\ud835' '\ub09e' .. '\ub09e'
+    | '\ud835' '\ud4a2'
+    | '\ud835' '\ub0a5' .. '\ub0a5'
+    | '\ud835' '\ub0a9' .. '\ub0ab'
+    | '\ud835' '\ub0ae' .. '\ub0b8'
+    | '\ud835' '\ud4bb'
+    | '\ud835' '\ub0bd' .. '\ub0c2'
+    | '\ud835' '\ub0c5' .. '\ub104'
+    | '\ud835' '\ub107' .. '\ub109'
+    | '\ud835' '\ub10d' .. '\ub113'
+    | '\ud835' '\ub116' .. '\ub11b'
+    | '\ud835' '\ub11e' .. '\ub138'
+    | '\ud835' '\ub13b' .. '\ub13d'
+    | '\ud835' '\ub140' .. '\ub143'
+    | '\ud835' '\ud546'
+    | '\ud835' '\ub14a' .. '\ub14f'
+    | '\ud835' '\ub152' .. '\ub2a2'
+    | '\ud835' '\ub2a8' .. '\ub2bf'
+    | '\ud835' '\ub2c2' .. '\ub2d9'
+    | '\ud835' '\ub2dc' .. '\ub2f9'
+    | '\ud835' '\ub2fc' .. '\ub313'
+    | '\ud835' '\ub316' .. '\ub333'
+    | '\ud835' '\ub336' .. '\ub34d'
+    | '\ud835' '\ub350' .. '\ub36d'
+    | '\ud835' '\ub370' .. '\ub387'
+    | '\ud835' '\ub38a' .. '\ub3a7'
+    | '\ud835' '\ub3aa' .. '\ub3c1'
+    | '\ud835' '\ub3c4' .. '\ub3c8'
+    | '\ud840' '\udc00' .. '\udffe'
+    | '\ud841' '\ue000' .. '\ue3fe'
+    | '\ud842' '\ue400' .. '\ue7fe'
+    | '\ud843' '\ue800' .. '\uebfe'
+    | '\ud844' '\uec00' .. '\ueffe'
+    | '\ud845' '\uf000' .. '\uf3fe'
+    | '\ud846' '\uf400' .. '\uf7fe'
+    | '\ud847' '\uf800' .. '\ufbfe'
+    | '\ud848' '\ufc00' .. '\ufffe'
+    | '\ud849' '\u0000' .. '\u03fe'
+    | '\ud84a' '\u0400' .. '\u07fe'
+    | '\ud84b' '\u0800' .. '\u0bfe'
+    | '\ud84c' '\u0c00' .. '\u0ffe'
+    | '\ud84d' '\u1000' .. '\u13fe'
+    | '\ud84e' '\u1400' .. '\u17fe'
+    | '\ud84f' '\u1800' .. '\u1bfe'
+    | '\ud850' '\u1c00' .. '\u1ffe'
+    | '\ud851' '\u2000' .. '\u23fe'
+    | '\ud852' '\u2400' .. '\u27fe'
+    | '\ud853' '\u2800' .. '\u2bfe'
+    | '\ud854' '\u2c00' .. '\u2ffe'
+    | '\ud855' '\u3000' .. '\u33fe'
+    | '\ud856' '\u3400' .. '\u37fe'
+    | '\ud857' '\u3800' .. '\u3bfe'
+    | '\ud858' '\u3c00' .. '\u3ffe'
+    | '\ud859' '\u4000' .. '\u43fe'
+    | '\ud85a' '\u4400' .. '\u47fe'
+    | '\ud85b' '\u4800' .. '\u4bfe'
+    | '\ud85c' '\u4c00' .. '\u4ffe'
+    | '\ud85d' '\u5000' .. '\u53fe'
+    | '\ud85e' '\u5400' .. '\u57fe'
+    | '\ud85f' '\u5800' .. '\u5bfe'
+    | '\ud860' '\u5c00' .. '\u5ffe'
+    | '\ud861' '\u6000' .. '\u63fe'
+    | '\ud862' '\u6400' .. '\u67fe'
+    | '\ud863' '\u6800' .. '\u6bfe'
+    | '\ud864' '\u6c00' .. '\u6ffe'
+    | '\ud865' '\u7000' .. '\u73fe'
+    | '\ud866' '\u7400' .. '\u77fe'
+    | '\ud867' '\u7800' .. '\u7bfe'
+    | '\ud868' '\u7c00' .. '\u7ffe'
+    | '\ud869' '\u8000' .. '\u82d5'
+    | '\ud87e' '\ud400' .. '\ud61c'
     ;

From 13bc8afa4b56e8d7b012f35a35a11087739abf15 Mon Sep 17 00:00:00 2001
From: Piotr Czarnecki <pioczarn@gmail.com>
Date: Tue, 21 Apr 2015 12:02:12 +0200
Subject: [PATCH 3/3] Model lexer: Fix remaining issues

---
 src/grammar/README.md                         |   2 +-
 src/grammar/RustLexer.g4                      | 112 ++++++++----------
 src/grammar/check.sh                          |   8 +-
 src/grammar/verify.rs                         |  84 +++++++------
 src/libcollections/fmt.rs                     |   2 -
 src/libcollections/str.rs                     |   2 -
 src/libcollections/string.rs                  |   2 -
 src/libcore/hash/sip.rs                       |   2 -
 src/libcore/num/mod.rs                        |   2 -
 src/libcore/str/mod.rs                        |   2 -
 src/libcoretest/char.rs                       |   2 -
 src/libgetopts/lib.rs                         |   2 -
 src/librand/distributions/gamma.rs            |   2 -
 src/librustc_unicode/u_str.rs                 |   2 -
 src/libserialize/hex.rs                       |   2 -
 src/libstd/ascii.rs                           |   2 -
 src/libstd/collections/hash/map.rs            |   2 -
 src/libstd/collections/hash/set.rs            |   2 -
 src/libstd/collections/hash/table.rs          |   2 -
 src/libstd/io/buffered.rs                     |   2 -
 src/libstd/num/strconv.rs                     |   2 -
 src/libstd/rt/util.rs                         |   2 -
 src/libsyntax/codemap.rs                      |   2 -
 src/libsyntax/ext/tt/macro_parser.rs          |   2 -
 src/test/bench/core-std.rs                    |   1 -
 src/test/bench/msgsend-ring-mutex-arcs.rs     |   1 -
 src/test/bench/noise.rs                       |   1 -
 src/test/compile-fail/utf8_idents.rs          |   1 -
 src/test/pretty/block-comment-wchar.pp        |   1 -
 src/test/pretty/block-comment-wchar.rs        |   1 -
 src/test/run-pass/byte-literals.rs            |   1 -
 .../default-method-supertrait-vtable.rs       |   1 -
 src/test/run-pass/ifmt.rs                     |   1 -
 src/test/run-pass/issue-12582.rs              |   1 -
 src/test/run-pass/issue-13027.rs              |   1 -
 src/test/run-pass/issue-2718.rs               |   1 -
 src/test/run-pass/issue-3683.rs               |   1 -
 src/test/run-pass/issue-4759-1.rs             |   2 -
 src/test/run-pass/issue-5280.rs               |   1 -
 .../issue-5321-immediates-with-bare-self.rs   |   1 -
 ...line-endings-string-literal-doc-comment.rs |   1 -
 ...ase-types-non-uppercase-statics-unicode.rs |   1 -
 src/test/run-pass/match-range.rs              |   1 -
 src/test/run-pass/multibyte.rs                |   1 -
 src/test/run-pass/raw-str.rs                  | Bin 1341 -> 1307 bytes
 src/test/run-pass/shebang.rs                  |   1 -
 src/test/run-pass/struct-return.rs            |   1 -
 src/test/run-pass/trait-to-str.rs             |   1 -
 .../run-pass/trait-with-bounds-default.rs     |   1 -
 .../run-pass/traits-default-method-self.rs    |   1 -
 .../run-pass/traits-default-method-trivial.rs |   1 -
 src/test/run-pass/unsized.rs                  |   2 -
 src/test/run-pass/unsized2.rs                 |   2 -
 src/test/run-pass/utf8-bom.rs                 |   1 -
 src/test/run-pass/utf8.rs                     |   1 -
 src/test/run-pass/utf8_chars.rs               |   1 -
 src/test/run-pass/utf8_idents.rs              |   1 -
 57 files changed, 102 insertions(+), 179 deletions(-)

diff --git a/src/grammar/README.md b/src/grammar/README.md
index 1f7923e1caf..6e0cf17a880 100644
--- a/src/grammar/README.md
+++ b/src/grammar/README.md
@@ -12,7 +12,7 @@ javac *.java
 rustc -O verify.rs
 for file in ../*/**.rs; do
     echo $file;
-    grun RustLexer tokens -tokens < $file | ./verify $file RustLexer.tokens || break
+    grun RustLexer tokens -tokens < "$file" | ./verify "$file" RustLexer.tokens || break
 done
 ```
 
diff --git a/src/grammar/RustLexer.g4 b/src/grammar/RustLexer.g4
index 8739d135b4f..3d8f3aeb28f 100644
--- a/src/grammar/RustLexer.g4
+++ b/src/grammar/RustLexer.g4
@@ -1,5 +1,12 @@
 lexer grammar RustLexer;
 
+@lexer::members {
+  public boolean is_at(int pos) {
+    return _input.index() == pos;
+  }
+}
+
+
 tokens {
     EQ, LT, LE, EQEQ, NE, GE, GT, ANDAND, OROR, NOT, TILDE, PLUT,
     MINUS, STAR, SLASH, PERCENT, CARET, AND, OR, SHL, SHR, BINOP,
@@ -8,7 +15,7 @@ tokens {
     LBRACE, RBRACE, POUND, DOLLAR, UNDERSCORE, LIT_CHAR,
     LIT_INTEGER, LIT_FLOAT, LIT_STR, LIT_STR_RAW, LIT_BINARY,
     LIT_BINARY_RAW, IDENT, LIFETIME, WHITESPACE, DOC_COMMENT,
-    COMMENT
+    COMMENT, SHEBANG
 }
 
 import xidstart , xidcontinue;
@@ -86,85 +93,54 @@ fragment CHAR_ESCAPE
   | [xX] HEXIT HEXIT
   | 'u' HEXIT HEXIT HEXIT HEXIT
   | 'U' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT
+  | 'u{' HEXIT '}'
+  | 'u{' HEXIT HEXIT '}'
+  | 'u{' HEXIT HEXIT HEXIT '}'
+  | 'u{' HEXIT HEXIT HEXIT HEXIT '}'
+  | 'u{' HEXIT HEXIT HEXIT HEXIT HEXIT '}'
+  | 'u{' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT '}'
   ;
 
 fragment SUFFIX
   : IDENT
   ;
 
+fragment INTEGER_SUFFIX
+  : { _input.LA(1) != 'e' && _input.LA(1) != 'E' }? SUFFIX
+  ;
+
 LIT_CHAR
-  : '\'' ( '\\' CHAR_ESCAPE | ~[\\'\n\t\r] | '\ud800' .. '\udbff' '\udc00' .. '\udfff' ) '\'' SUFFIX?
+  : '\'' ( '\\' CHAR_ESCAPE
+         | ~[\\'\n\t\r]
+         | '\ud800' .. '\udbff' '\udc00' .. '\udfff'
+         )
+    '\'' SUFFIX?
   ;
 
 LIT_BYTE
-  : 'b\'' ( '\\' ( [xX] HEXIT HEXIT | [nrt\\'"0] ) | ~[\\'\n\t\r] ) '\'' SUFFIX?
+  : 'b\'' ( '\\' ( [xX] HEXIT HEXIT
+                 | [nrt\\'"0] )
+          | ~[\\'\n\t\r] '\udc00'..'\udfff'?
+          )
+    '\'' SUFFIX?
   ;
 
 LIT_INTEGER
-  : [0-9][0-9_]* SUFFIX?
-  | '0b' [01][01_]* SUFFIX?
-  | '0o' [0-7][0-7_]* SUFFIX?
-  | '0x' [0-9a-fA-F][0-9a-fA-F_]* SUFFIX?
+
+  : [0-9][0-9_]* INTEGER_SUFFIX?
+  | '0b' [01_]+ INTEGER_SUFFIX?
+  | '0o' [0-7_]+ INTEGER_SUFFIX?
+  | '0x' [0-9a-fA-F_]+ INTEGER_SUFFIX?
   ;
 
 LIT_FLOAT
   : [0-9][0-9_]* ('.' {
-        /* dot followed by another dot is a range, no float */
+        /* dot followed by another dot is a range, not a float */
         _input.LA(1) != '.' &&
-        /* dot followed by an identifier is an integer with a function call, no float */
+        /* dot followed by an identifier is an integer with a function call, not a float */
         _input.LA(1) != '_' &&
-        _input.LA(1) != 'a' &&
-        _input.LA(1) != 'b' &&
-        _input.LA(1) != 'c' &&
-        _input.LA(1) != 'd' &&
-        _input.LA(1) != 'e' &&
-        _input.LA(1) != 'f' &&
-        _input.LA(1) != 'g' &&
-        _input.LA(1) != 'h' &&
-        _input.LA(1) != 'i' &&
-        _input.LA(1) != 'j' &&
-        _input.LA(1) != 'k' &&
-        _input.LA(1) != 'l' &&
-        _input.LA(1) != 'm' &&
-        _input.LA(1) != 'n' &&
-        _input.LA(1) != 'o' &&
-        _input.LA(1) != 'p' &&
-        _input.LA(1) != 'q' &&
-        _input.LA(1) != 'r' &&
-        _input.LA(1) != 's' &&
-        _input.LA(1) != 't' &&
-        _input.LA(1) != 'u' &&
-        _input.LA(1) != 'v' &&
-        _input.LA(1) != 'w' &&
-        _input.LA(1) != 'x' &&
-        _input.LA(1) != 'y' &&
-        _input.LA(1) != 'z' &&
-        _input.LA(1) != 'A' &&
-        _input.LA(1) != 'B' &&
-        _input.LA(1) != 'C' &&
-        _input.LA(1) != 'D' &&
-        _input.LA(1) != 'E' &&
-        _input.LA(1) != 'F' &&
-        _input.LA(1) != 'G' &&
-        _input.LA(1) != 'H' &&
-        _input.LA(1) != 'I' &&
-        _input.LA(1) != 'J' &&
-        _input.LA(1) != 'K' &&
-        _input.LA(1) != 'L' &&
-        _input.LA(1) != 'M' &&
-        _input.LA(1) != 'N' &&
-        _input.LA(1) != 'O' &&
-        _input.LA(1) != 'P' &&
-        _input.LA(1) != 'Q' &&
-        _input.LA(1) != 'R' &&
-        _input.LA(1) != 'S' &&
-        _input.LA(1) != 'T' &&
-        _input.LA(1) != 'U' &&
-        _input.LA(1) != 'V' &&
-        _input.LA(1) != 'W' &&
-        _input.LA(1) != 'X' &&
-        _input.LA(1) != 'Y' &&
-        _input.LA(1) != 'Z'
+        !(_input.LA(1) >= 'a' && _input.LA(1) <= 'z') &&
+        !(_input.LA(1) >= 'A' && _input.LA(1) <= 'Z')
   }? | ('.' [0-9][0-9_]*)? ([eE] [-+]? [0-9][0-9_]*)? SUFFIX?)
   ;
 
@@ -172,8 +148,8 @@ LIT_STR
   : '"' ('\\\n' | '\\\r\n' | '\\' CHAR_ESCAPE | .)*? '"' SUFFIX?
   ;
 
-LIT_BINARY : 'b' LIT_STR SUFFIX?;
-LIT_BINARY_RAW : 'rb' LIT_STR_RAW SUFFIX?;
+LIT_BINARY : 'b' LIT_STR ;
+LIT_BINARY_RAW : 'b' LIT_STR_RAW ;
 
 /* this is a bit messy */
 
@@ -201,13 +177,19 @@ LIFETIME : '\'' IDENT ;
 
 WHITESPACE : [ \r\n\t]+ ;
 
-UNDOC_COMMENT     : '////' ~[\r\n]* -> type(COMMENT) ;
+UNDOC_COMMENT     : '////' ~[\n]* -> type(COMMENT) ;
 YESDOC_COMMENT    : '///' ~[\r\n]* -> type(DOC_COMMENT) ;
 OUTER_DOC_COMMENT : '//!' ~[\r\n]* -> type(DOC_COMMENT) ;
-LINE_COMMENT      : '//' ~[\r\n]* -> type(COMMENT) ;
+LINE_COMMENT      : '//' ( ~[/\n] ~[\n]* )? -> type(COMMENT) ;
 
 DOC_BLOCK_COMMENT
   : ('/**' ~[*] | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' -> type(DOC_COMMENT)
   ;
 
 BLOCK_COMMENT : '/*' (BLOCK_COMMENT | .)*? '*/' -> type(COMMENT) ;
+
+/* these appear at the beginning of a file */
+
+SHEBANG : '#!' { is_at(2) && _input.LA(1) != '[' }? ~[\r\n]* -> type(SHEBANG) ;
+
+UTF8_BOM : '\ufeff' { is_at(1) }? -> skip ;
diff --git a/src/grammar/check.sh b/src/grammar/check.sh
index b0628303b66..560b6b72471 100755
--- a/src/grammar/check.sh
+++ b/src/grammar/check.sh
@@ -18,13 +18,13 @@ failed=0
 skipped=0
 
 check() {
-    grep --silent "// ignore-lexer-test" $1;
+    grep --silent "// ignore-lexer-test" "$1";
 
     # if it's *not* found...
     if [ $? -eq 1 ]; then
         cd $2 # This `cd` is so java will pick up RustLexer.class. I couldn't
-        # figure out how to wrangle the CLASSPATH, just adding build/grammr didn't
-        # seem to have anny effect.
+        # figure out how to wrangle the CLASSPATH, just adding build/grammar
+        # didn't seem to have any effect.
         if $3 RustLexer tokens -tokens < $1 | $4 $1 $5; then
             echo "pass: $1"
             passed=`expr $passed + 1`
@@ -39,7 +39,7 @@ check() {
 }
 
 for file in $(find $1 -iname '*.rs' ! -path '*/test/compile-fail*'); do
-    check $file $2 $3 $4 $5
+    check "$file" $2 $3 $4 $5
 done
 
 printf "\ntest result: "
diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs
index 8bf501c7f3f..dec797747c2 100644
--- a/src/grammar/verify.rs
+++ b/src/grammar/verify.rs
@@ -8,9 +8,7 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-#![feature(plugin)]
-
-#![allow(unstable)]
+#![feature(plugin, rustc_private, str_char, collections)]
 
 extern crate syntax;
 extern crate rustc;
@@ -19,7 +17,10 @@ extern crate rustc;
 extern crate log;
 
 use std::collections::HashMap;
-use std::io::File;
+use std::env;
+use std::fs::File;
+use std::io::{BufRead, Read};
+use std::path::Path;
 
 use syntax::parse;
 use syntax::parse::lexer;
@@ -27,6 +28,7 @@ use rustc::session::{self, config};
 
 use syntax::ast;
 use syntax::ast::Name;
+use syntax::codemap;
 use syntax::codemap::Pos;
 use syntax::parse::token;
 use syntax::parse::lexer::TokenAndSpan;
@@ -108,6 +110,7 @@ fn parse_token_list(file: &str) -> HashMap<String, token::Token> {
             "LIT_BINARY"        => token::Literal(token::Binary(Name(0)), None),
             "LIT_BINARY_RAW"    => token::Literal(token::BinaryRaw(Name(0), 0), None),
             "QUESTION"          => token::Question,
+            "SHEBANG"           => token::Shebang(Name(0)),
             _                   => continue,
         };
 
@@ -166,24 +169,26 @@ fn count(lit: &str) -> usize {
     lit.chars().take_while(|c| *c == '#').count()
 }
 
-fn parse_antlr_token(s: &str, tokens: &HashMap<String, token::Token>, surrogate_pairs_pos: &[usize])
+fn parse_antlr_token(s: &str, tokens: &HashMap<String, token::Token>, surrogate_pairs_pos: &[usize],
+                     has_bom: bool)
                      -> TokenAndSpan {
     // old regex:
     // \[@(?P<seq>\d+),(?P<start>\d+):(?P<end>\d+)='(?P<content>.+?)',<(?P<toknum>-?\d+)>,\d+:\d+]
-    let start = s.find_str("[@").unwrap();
-    let comma = start + s[start..].find_str(",").unwrap();
-    let colon = comma + s[comma..].find_str(":").unwrap();
-    let content_start = colon + s[colon..].find_str("='").unwrap();
-    let content_end = content_start + s[content_start..].find_str("',<").unwrap();
-    let toknum_end = content_end + s[content_end..].find_str(">,").unwrap();
+    let start = s.find("[@").unwrap();
+    let comma = start + s[start..].find(",").unwrap();
+    let colon = comma + s[comma..].find(":").unwrap();
+    let content_start = colon + s[colon..].find("='").unwrap();
+    // Use rfind instead of find, because we don't want to stop at the content
+    let content_end = content_start + s[content_start..].rfind("',<").unwrap();
+    let toknum_end = content_end + s[content_end..].find(">,").unwrap();
 
     let start = &s[comma + 1 .. colon];
     let end = &s[colon + 1 .. content_start];
     let content = &s[content_start + 2 .. content_end];
     let toknum = &s[content_end + 3 .. toknum_end];
 
-    let proto_tok = tokens.get(toknum).expect(format!("didn't find token {:?} in the map",
-                                                              toknum));
+    let not_found = format!("didn't find token {:?} in the map", toknum);
+    let proto_tok = tokens.get(toknum).expect(&not_found[..]);
 
     let nm = parse::token::intern(content);
 
@@ -209,24 +214,25 @@ fn parse_antlr_token(s: &str, tokens: &HashMap<String, token::Token>, surrogate_
         ref t => t.clone()
     };
 
-    let offset = if real_tok == token::Eof
- {
+    let start_offset = if real_tok == token::Eof {
         1
     } else {
         0
     };
 
-    let mut lo = start.parse::<u32>().unwrap() - offset;
-    let mut hi = end.parse::<u32>().unwrap() + 1;
+    let offset = if has_bom { 1 } else { 0 };
+
+    let mut lo = start.parse::<u32>().unwrap() - start_offset - offset;
+    let mut hi = end.parse::<u32>().unwrap() + 1 - offset;
 
     // Adjust the span: For each surrogate pair already encountered, subtract one position.
     lo -= surrogate_pairs_pos.binary_search(&(lo as usize)).unwrap_or_else(|x| x) as u32;
     hi -= surrogate_pairs_pos.binary_search(&(hi as usize)).unwrap_or_else(|x| x) as u32;
 
-    let sp = syntax::codemap::Span {
-        lo: syntax::codemap::BytePos(lo),
-        hi: syntax::codemap::BytePos(hi),
-        expn_id: syntax::codemap::NO_EXPANSION
+    let sp = codemap::Span {
+        lo: codemap::BytePos(lo),
+        hi: codemap::BytePos(hi),
+        expn_id: codemap::NO_EXPANSION
     };
 
     TokenAndSpan {
@@ -245,10 +251,10 @@ fn tok_cmp(a: &token::Token, b: &token::Token) -> bool {
     }
 }
 
-fn span_cmp(antlr_sp: syntax::codemap::Span, rust_sp: syntax::codemap::Span, cm: &syntax::codemap::CodeMap) -> bool {
+fn span_cmp(antlr_sp: codemap::Span, rust_sp: codemap::Span, cm: &codemap::CodeMap) -> bool {
     antlr_sp.expn_id == rust_sp.expn_id &&
-        antlr_sp.lo.to_uint() == cm.bytepos_to_file_charpos(rust_sp.lo).to_uint() &&
-        antlr_sp.hi.to_uint() == cm.bytepos_to_file_charpos(rust_sp.hi).to_uint()
+        antlr_sp.lo.to_usize() == cm.bytepos_to_file_charpos(rust_sp.lo).to_usize() &&
+        antlr_sp.hi.to_usize() == cm.bytepos_to_file_charpos(rust_sp.hi).to_usize()
 }
 
 fn main() {
@@ -257,10 +263,15 @@ fn main() {
         r.next_token()
     }
 
-    let args = std::os::args();
+    let mut args = env::args().skip(1);
+    let filename = args.next().unwrap();
+    if filename.find("parse-fail").is_some() {
+        return;
+    }
 
     // Rust's lexer
-    let code = File::open(&Path::new(args[1])).unwrap().read_to_string().unwrap();
+    let mut code = String::new();
+    File::open(&Path::new(&filename)).unwrap().read_to_string(&mut code).unwrap();
 
     let surrogate_pairs_pos: Vec<usize> = code.chars().enumerate()
                                                      .filter(|&(_, c)| c as usize > 0xFFFF)
@@ -269,6 +280,8 @@ fn main() {
                                                      .map(|(x, n)| x + n)
                                                      .collect();
 
+    let has_bom = code.starts_with("\u{feff}");
+
     debug!("Pairs: {:?}", surrogate_pairs_pos);
 
     let options = config::basic_options();
@@ -281,15 +294,18 @@ fn main() {
     let ref cm = lexer.span_diagnostic.cm;
 
     // ANTLR
-    let mut token_file = File::open(&Path::new(args[2]));
-    let token_map = parse_token_list(token_file.read_to_string().unwrap());
+    let mut token_file = File::open(&Path::new(&args.next().unwrap())).unwrap();
+    let mut token_list = String::new();
+    token_file.read_to_string(&mut token_list).unwrap();
+    let token_map = parse_token_list(&token_list[..]);
 
-    let mut stdin = std::io::stdin();
-    let mut lock = stdin.lock();
+    let stdin = std::io::stdin();
+    let lock = stdin.lock();
     let lines = lock.lines();
-    let mut antlr_tokens = lines.map(|l| parse_antlr_token(l.unwrap().trim(),
-                                                           &token_map,
-                                                           &surrogate_pairs_pos[]));
+    let antlr_tokens = lines.map(|l| parse_antlr_token(l.unwrap().trim(),
+                                                       &token_map,
+                                                       &surrogate_pairs_pos[..],
+                                                       has_bom));
 
     for antlr_tok in antlr_tokens {
         let rustc_tok = next(&mut lexer);
@@ -314,7 +330,7 @@ fn main() {
                         }
                         _ => panic!("{:?} is not {:?}", antlr_tok, rustc_tok)
                     },)*
-                    ref c => assert!(c == &antlr_tok.tok, "{:?} is not {:?}", rustc_tok, antlr_tok)
+                    ref c => assert!(c == &antlr_tok.tok, "{:?} is not {:?}", antlr_tok, rustc_tok)
                 }
             )
         }
diff --git a/src/libcollections/fmt.rs b/src/libcollections/fmt.rs
index 5f0d9012d1a..40b64b5c3b4 100644
--- a/src/libcollections/fmt.rs
+++ b/src/libcollections/fmt.rs
@@ -7,8 +7,6 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-//
-// ignore-lexer-test FIXME #15679
 
 //! Utilities for formatting and printing strings
 //!
diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs
index 0f902e258b9..266cda9a237 100644
--- a/src/libcollections/str.rs
+++ b/src/libcollections/str.rs
@@ -7,8 +7,6 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-//
-// ignore-lexer-test FIXME #15679
 
 //! Unicode string manipulation (the `str` type).
 //!
diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs
index 74af5783fa8..3422bfe5423 100644
--- a/src/libcollections/string.rs
+++ b/src/libcollections/string.rs
@@ -7,8 +7,6 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-//
-// ignore-lexer-test FIXME #15679
 
 //! An owned, growable string that enforces that its contents are valid UTF-8.
 
diff --git a/src/libcore/hash/sip.rs b/src/libcore/hash/sip.rs
index 6820a7025fc..65f790d5d43 100644
--- a/src/libcore/hash/sip.rs
+++ b/src/libcore/hash/sip.rs
@@ -7,8 +7,6 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-//
-// ignore-lexer-test FIXME #15883
 
 //! An implementation of SipHash 2-4.
 
diff --git a/src/libcore/num/mod.rs b/src/libcore/num/mod.rs
index a056e585fee..bcfdcfcd5e6 100644
--- a/src/libcore/num/mod.rs
+++ b/src/libcore/num/mod.rs
@@ -7,8 +7,6 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-//
-// ignore-lexer-test FIXME #15679
 
 //! Numeric traits and functions for the built-in numeric types.
 
diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs
index 2d6ef39361e..34810b4864e 100644
--- a/src/libcore/str/mod.rs
+++ b/src/libcore/str/mod.rs
@@ -7,8 +7,6 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-//
-// ignore-lexer-test FIXME #15679
 
 //! String manipulation
 //!
diff --git a/src/libcoretest/char.rs b/src/libcoretest/char.rs
index 4939277aa59..b73807aa317 100644
--- a/src/libcoretest/char.rs
+++ b/src/libcoretest/char.rs
@@ -7,8 +7,6 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-//
-// ignore-lexer-test FIXME #15679
 
 #[test]
 fn test_is_lowercase() {
diff --git a/src/libgetopts/lib.rs b/src/libgetopts/lib.rs
index 02c4a233996..197199e743f 100644
--- a/src/libgetopts/lib.rs
+++ b/src/libgetopts/lib.rs
@@ -7,8 +7,6 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-//
-// ignore-lexer-test FIXME #15677
 
 //! Simple getopt alternative.
 //!
diff --git a/src/librand/distributions/gamma.rs b/src/librand/distributions/gamma.rs
index 1125d096536..f37093c6db8 100644
--- a/src/librand/distributions/gamma.rs
+++ b/src/librand/distributions/gamma.rs
@@ -7,8 +7,6 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-//
-// ignore-lexer-test FIXME #15679
 
 //! The Gamma and derived distributions.
 
diff --git a/src/librustc_unicode/u_str.rs b/src/librustc_unicode/u_str.rs
index 09a5feb5fef..c63c586b6a9 100644
--- a/src/librustc_unicode/u_str.rs
+++ b/src/librustc_unicode/u_str.rs
@@ -7,8 +7,6 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-//
-// ignore-lexer-test FIXME #15679
 
 //! Unicode-intensive string manipulations.
 //!
diff --git a/src/libserialize/hex.rs b/src/libserialize/hex.rs
index 0676edf8169..87f1dca2cae 100644
--- a/src/libserialize/hex.rs
+++ b/src/libserialize/hex.rs
@@ -7,8 +7,6 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-//
-// ignore-lexer-test FIXME #15679
 
 //! Hex binary-to-text encoding
 
diff --git a/src/libstd/ascii.rs b/src/libstd/ascii.rs
index a2ba8c4c1ba..ccc56960b02 100644
--- a/src/libstd/ascii.rs
+++ b/src/libstd/ascii.rs
@@ -7,8 +7,6 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-//
-// ignore-lexer-test FIXME #15679
 
 //! Operations on ASCII strings and characters
 
diff --git a/src/libstd/collections/hash/map.rs b/src/libstd/collections/hash/map.rs
index 4ac15b7991b..a5bbbee790a 100644
--- a/src/libstd/collections/hash/map.rs
+++ b/src/libstd/collections/hash/map.rs
@@ -7,8 +7,6 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-//
-// ignore-lexer-test FIXME #15883
 
 use self::Entry::*;
 use self::SearchResult::*;
diff --git a/src/libstd/collections/hash/set.rs b/src/libstd/collections/hash/set.rs
index 62c03389b24..82109900bf2 100644
--- a/src/libstd/collections/hash/set.rs
+++ b/src/libstd/collections/hash/set.rs
@@ -7,8 +7,6 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-//
-// ignore-lexer-test FIXME #15883
 
 use borrow::Borrow;
 use clone::Clone;
diff --git a/src/libstd/collections/hash/table.rs b/src/libstd/collections/hash/table.rs
index dec6d1e2209..65ebf8515e6 100644
--- a/src/libstd/collections/hash/table.rs
+++ b/src/libstd/collections/hash/table.rs
@@ -7,8 +7,6 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-//
-// ignore-lexer-test FIXME #15883
 
 use self::BucketState::*;
 
diff --git a/src/libstd/io/buffered.rs b/src/libstd/io/buffered.rs
index bd44a9547b4..67cac42c35e 100644
--- a/src/libstd/io/buffered.rs
+++ b/src/libstd/io/buffered.rs
@@ -7,8 +7,6 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-//
-// ignore-lexer-test FIXME #15883
 
 //! Buffering wrappers for I/O traits
 
diff --git a/src/libstd/num/strconv.rs b/src/libstd/num/strconv.rs
index 8ab66f2328f..ce1da4742d1 100644
--- a/src/libstd/num/strconv.rs
+++ b/src/libstd/num/strconv.rs
@@ -7,8 +7,6 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-//
-// ignore-lexer-test FIXME #15679
 
 #![allow(missing_docs)]
 #![allow(deprecated)]
diff --git a/src/libstd/rt/util.rs b/src/libstd/rt/util.rs
index 9919238c208..31e970a9550 100644
--- a/src/libstd/rt/util.rs
+++ b/src/libstd/rt/util.rs
@@ -7,8 +7,6 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-//
-// ignore-lexer-test FIXME #15677
 
 use io::prelude::*;
 
diff --git a/src/libsyntax/codemap.rs b/src/libsyntax/codemap.rs
index a0bde8f6c52..dfdaa47d8b9 100644
--- a/src/libsyntax/codemap.rs
+++ b/src/libsyntax/codemap.rs
@@ -7,8 +7,6 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-//
-// ignore-lexer-test FIXME #15679
 
 //! The CodeMap tracks all the source code used within a single crate, mapping
 //! from integer byte positions to the original source code location. Each bit
diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs
index 9c3a556b210..58df4038403 100644
--- a/src/libsyntax/ext/tt/macro_parser.rs
+++ b/src/libsyntax/ext/tt/macro_parser.rs
@@ -7,8 +7,6 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-//
-// ignore-lexer-test FIXME #15679
 
 //! This is an Earley-like parser, without support for in-grammar nonterminals,
 //! only by calling out to the main rust parser for named nonterminals (which it
diff --git a/src/test/bench/core-std.rs b/src/test/bench/core-std.rs
index 46caed6f9f5..19f83c7817c 100644
--- a/src/test/bench/core-std.rs
+++ b/src/test/bench/core-std.rs
@@ -8,7 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-// ignore-lexer-test FIXME #15679
 // Microbenchmarks for various functions in std and extra
 
 #![feature(rand, collections, std_misc)]
diff --git a/src/test/bench/msgsend-ring-mutex-arcs.rs b/src/test/bench/msgsend-ring-mutex-arcs.rs
index c87cdb617a4..8048f3dde96 100644
--- a/src/test/bench/msgsend-ring-mutex-arcs.rs
+++ b/src/test/bench/msgsend-ring-mutex-arcs.rs
@@ -16,7 +16,6 @@
 // This also serves as a pipes test, because Arcs are implemented with pipes.
 
 // no-pretty-expanded FIXME #15189
-// ignore-lexer-test FIXME #15679
 
 #![feature(std_misc)]
 
diff --git a/src/test/bench/noise.rs b/src/test/bench/noise.rs
index c21470d4bb3..530c499f5fd 100644
--- a/src/test/bench/noise.rs
+++ b/src/test/bench/noise.rs
@@ -10,7 +10,6 @@
 
 // Multi-language Perlin noise benchmark.
 // See https://github.com/nsf/pnoise for timings and alternative implementations.
-// ignore-lexer-test FIXME #15679
 
 #![feature(rand, core)]
 
diff --git a/src/test/compile-fail/utf8_idents.rs b/src/test/compile-fail/utf8_idents.rs
index a5471e87f22..8594c35f8dd 100644
--- a/src/test/compile-fail/utf8_idents.rs
+++ b/src/test/compile-fail/utf8_idents.rs
@@ -8,7 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 //
-// ignore-lexer-test FIXME #15679
 
 fn foo<
     'β, //~ ERROR non-ascii idents are not fully supported
diff --git a/src/test/pretty/block-comment-wchar.pp b/src/test/pretty/block-comment-wchar.pp
index a5d82277d2f..2dc7e8f9525 100644
--- a/src/test/pretty/block-comment-wchar.pp
+++ b/src/test/pretty/block-comment-wchar.pp
@@ -14,7 +14,6 @@
 // ignore-tidy-cr
 // ignore-tidy-tab
 // pp-exact:block-comment-wchar.pp
-// ignore-lexer-test FIXME #15679
 fn f() {
     fn nested() {
         /*
diff --git a/src/test/pretty/block-comment-wchar.rs b/src/test/pretty/block-comment-wchar.rs
index eb6d2a4a0a1..6f4a95e7c9b 100644
--- a/src/test/pretty/block-comment-wchar.rs
+++ b/src/test/pretty/block-comment-wchar.rs
@@ -14,7 +14,6 @@
 // ignore-tidy-cr
 // ignore-tidy-tab
 // pp-exact:block-comment-wchar.pp
-// ignore-lexer-test FIXME #15679
 fn f() {
     fn nested() {
         /*
diff --git a/src/test/run-pass/byte-literals.rs b/src/test/run-pass/byte-literals.rs
index fbe2a65bc89..9f7b98a57fc 100644
--- a/src/test/run-pass/byte-literals.rs
+++ b/src/test/run-pass/byte-literals.rs
@@ -8,7 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 //
-// ignore-lexer-test FIXME #15883
 
 
 static FOO: u8 = b'\xF0';
diff --git a/src/test/run-pass/default-method-supertrait-vtable.rs b/src/test/run-pass/default-method-supertrait-vtable.rs
index 3b1e04be78d..0d45a5d5212 100644
--- a/src/test/run-pass/default-method-supertrait-vtable.rs
+++ b/src/test/run-pass/default-method-supertrait-vtable.rs
@@ -8,7 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 //
-// ignore-lexer-test FIXME #15877
 
 
 // Tests that we can call a function bounded over a supertrait from
diff --git a/src/test/run-pass/ifmt.rs b/src/test/run-pass/ifmt.rs
index ea9db9b1e1f..7ae1347f2c7 100644
--- a/src/test/run-pass/ifmt.rs
+++ b/src/test/run-pass/ifmt.rs
@@ -9,7 +9,6 @@
 // except according to those terms.
 
 // no-pretty-expanded unnecessary unsafe block generated
-// ignore-lexer-test FIXME #15679
 
 #![deny(warnings)]
 #![allow(unused_must_use)]
diff --git a/src/test/run-pass/issue-12582.rs b/src/test/run-pass/issue-12582.rs
index 4009d17139d..7bab2ddfed0 100644
--- a/src/test/run-pass/issue-12582.rs
+++ b/src/test/run-pass/issue-12582.rs
@@ -8,7 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 //
-// ignore-lexer-test FIXME #15877
 
 pub fn main() {
     let x = 1;
diff --git a/src/test/run-pass/issue-13027.rs b/src/test/run-pass/issue-13027.rs
index dadd480dc6a..14987484711 100644
--- a/src/test/run-pass/issue-13027.rs
+++ b/src/test/run-pass/issue-13027.rs
@@ -8,7 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 //
-// ignore-lexer-test FIXME #15877
 
 // Tests that match expression handles overlapped literal and range
 // properly in the presence of guard function.
diff --git a/src/test/run-pass/issue-2718.rs b/src/test/run-pass/issue-2718.rs
index 71d1439dd2b..0df89c72424 100644
--- a/src/test/run-pass/issue-2718.rs
+++ b/src/test/run-pass/issue-2718.rs
@@ -8,7 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 //
-// ignore-lexer-test FIXME #15883
 
 #![feature(unsafe_destructor, std_misc)]
 
diff --git a/src/test/run-pass/issue-3683.rs b/src/test/run-pass/issue-3683.rs
index 096eec803ff..ed9b8066104 100644
--- a/src/test/run-pass/issue-3683.rs
+++ b/src/test/run-pass/issue-3683.rs
@@ -8,7 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 //
-// ignore-lexer-test FIXME #15877
 
 
 trait Foo {
diff --git a/src/test/run-pass/issue-4759-1.rs b/src/test/run-pass/issue-4759-1.rs
index 3532a395b7a..a565460c42e 100644
--- a/src/test/run-pass/issue-4759-1.rs
+++ b/src/test/run-pass/issue-4759-1.rs
@@ -8,8 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-// pretty-expanded FIXME #23616
-
 trait U { fn f(self); }
 impl U for isize { fn f(self) {} }
 pub fn main() { 4.f(); }
diff --git a/src/test/run-pass/issue-5280.rs b/src/test/run-pass/issue-5280.rs
index bd892465054..5e2e4df95b3 100644
--- a/src/test/run-pass/issue-5280.rs
+++ b/src/test/run-pass/issue-5280.rs
@@ -8,7 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 //
-// ignore-lexer-test FIXME #15877
 
 type FontTableTag = u32;
 
diff --git a/src/test/run-pass/issue-5321-immediates-with-bare-self.rs b/src/test/run-pass/issue-5321-immediates-with-bare-self.rs
index d0bc396c368..dd00fab5020 100644
--- a/src/test/run-pass/issue-5321-immediates-with-bare-self.rs
+++ b/src/test/run-pass/issue-5321-immediates-with-bare-self.rs
@@ -8,7 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 //
-// ignore-lexer-test FIXME #15877
 
 trait Fooable {
     fn yes(self);
diff --git a/src/test/run-pass/lexer-crlf-line-endings-string-literal-doc-comment.rs b/src/test/run-pass/lexer-crlf-line-endings-string-literal-doc-comment.rs
index 421ae8e9497..5c8db524cc2 100644
--- a/src/test/run-pass/lexer-crlf-line-endings-string-literal-doc-comment.rs
+++ b/src/test/run-pass/lexer-crlf-line-endings-string-literal-doc-comment.rs
@@ -16,7 +16,6 @@
 // this directory should enforce it.
 
 // ignore-pretty
-// ignore-lexer-test FIXME #15882
 
 /// Doc comment that ends in CRLF
 pub fn foo() {}
diff --git a/src/test/run-pass/lint-non-camel-case-types-non-uppercase-statics-unicode.rs b/src/test/run-pass/lint-non-camel-case-types-non-uppercase-statics-unicode.rs
index 6ddaee9c8bd..6e65cb2afd4 100644
--- a/src/test/run-pass/lint-non-camel-case-types-non-uppercase-statics-unicode.rs
+++ b/src/test/run-pass/lint-non-camel-case-types-non-uppercase-statics-unicode.rs
@@ -8,7 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 //
-// ignore-lexer-test FIXME #15679
 
 
 #![forbid(non_camel_case_types)]
diff --git a/src/test/run-pass/match-range.rs b/src/test/run-pass/match-range.rs
index 68719090cff..0b2e19d6c79 100644
--- a/src/test/run-pass/match-range.rs
+++ b/src/test/run-pass/match-range.rs
@@ -8,7 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 //
-// ignore-lexer-test FIXME #15877
 
 pub fn main() {
     match 5_usize {
diff --git a/src/test/run-pass/multibyte.rs b/src/test/run-pass/multibyte.rs
index 77084836408..0475dd10fde 100644
--- a/src/test/run-pass/multibyte.rs
+++ b/src/test/run-pass/multibyte.rs
@@ -8,7 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 //
-// ignore-lexer-test FIXME #15679
 
 // Test that multibyte characters don't crash the compiler
 pub fn main() {
diff --git a/src/test/run-pass/raw-str.rs b/src/test/run-pass/raw-str.rs
index 298ac8f77eb11dd6f5a4a63ea68920e34a5e7d61..9ee824d4185851b4477dadb969fae7a857ff2bb8 100644
GIT binary patch
delta 12
TcmdnXHJfY0BgV}vOqR?5AAAG#

delta 36
rcmbQuwU=wdBS!I@)QZ$1-ICPe5(PKU2wztPWkXXlbIZ-w8C{qG@9qoU

diff --git a/src/test/run-pass/shebang.rs b/src/test/run-pass/shebang.rs
index 87da814771b..15ab21bbc8d 100644
--- a/src/test/run-pass/shebang.rs
+++ b/src/test/run-pass/shebang.rs
@@ -11,6 +11,5 @@
 
 // ignore-pretty: `expand` adds some preludes before shebang
 //
-// ignore-lexer-test FIXME #15878
 
 pub fn main() { println!("Hello World"); }
diff --git a/src/test/run-pass/struct-return.rs b/src/test/run-pass/struct-return.rs
index 1ff13d4eaea..109287a83b1 100644
--- a/src/test/run-pass/struct-return.rs
+++ b/src/test/run-pass/struct-return.rs
@@ -8,7 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 //
-// ignore-lexer-test FIXME #15883
 
 #[derive(Copy, Clone)]
 pub struct Quad { a: u64, b: u64, c: u64, d: u64 }
diff --git a/src/test/run-pass/trait-to-str.rs b/src/test/run-pass/trait-to-str.rs
index 3d84092c062..a29e0e932c0 100644
--- a/src/test/run-pass/trait-to-str.rs
+++ b/src/test/run-pass/trait-to-str.rs
@@ -8,7 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 //
-// ignore-lexer-test FIXME #15883
 
 
 trait to_str {
diff --git a/src/test/run-pass/trait-with-bounds-default.rs b/src/test/run-pass/trait-with-bounds-default.rs
index 34a79c4cf31..cfd81240094 100644
--- a/src/test/run-pass/trait-with-bounds-default.rs
+++ b/src/test/run-pass/trait-with-bounds-default.rs
@@ -8,7 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 //
-// ignore-lexer-test FIXME #15877
 
 pub trait Clone2 {
     /// Returns a copy of the value. The contents of owned pointers
diff --git a/src/test/run-pass/traits-default-method-self.rs b/src/test/run-pass/traits-default-method-self.rs
index d9536108f4d..36b0eb527b6 100644
--- a/src/test/run-pass/traits-default-method-self.rs
+++ b/src/test/run-pass/traits-default-method-self.rs
@@ -8,7 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 //
-// ignore-lexer-test FIXME #15877
 
 
 trait Cat {
diff --git a/src/test/run-pass/traits-default-method-trivial.rs b/src/test/run-pass/traits-default-method-trivial.rs
index 0e71fcab9d1..a2e7f54bba6 100644
--- a/src/test/run-pass/traits-default-method-trivial.rs
+++ b/src/test/run-pass/traits-default-method-trivial.rs
@@ -8,7 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 //
-// ignore-lexer-test FIXME #15877
 
 
 trait Cat {
diff --git a/src/test/run-pass/unsized.rs b/src/test/run-pass/unsized.rs
index 449d6b37e9f..26f7b767988 100644
--- a/src/test/run-pass/unsized.rs
+++ b/src/test/run-pass/unsized.rs
@@ -10,8 +10,6 @@
 
 // Test syntax checks for `?Sized` syntax.
 
-// pretty-expanded FIXME #23616
-
 use std::marker::PhantomData;
 
 trait T1  { }
diff --git a/src/test/run-pass/unsized2.rs b/src/test/run-pass/unsized2.rs
index 965ce6bad16..1cce98ae6b7 100644
--- a/src/test/run-pass/unsized2.rs
+++ b/src/test/run-pass/unsized2.rs
@@ -8,8 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-// pretty-expanded FIXME #23616
-
 #![allow(unknown_features)]
 #![feature(box_syntax)]
 
diff --git a/src/test/run-pass/utf8-bom.rs b/src/test/run-pass/utf8-bom.rs
index baa4e941ff0..c3052a928d6 100644
--- a/src/test/run-pass/utf8-bom.rs
+++ b/src/test/run-pass/utf8-bom.rs
@@ -8,7 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 //
-// ignore-lexer-test FIXME #15679
 
 // This file has utf-8 BOM, it should be compiled normally without error.
 
diff --git a/src/test/run-pass/utf8.rs b/src/test/run-pass/utf8.rs
index 07fd7b297b4..4782edf4e12 100644
--- a/src/test/run-pass/utf8.rs
+++ b/src/test/run-pass/utf8.rs
@@ -8,7 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 //
-// ignore-lexer-test FIXME #15679
 // no-pretty-expanded FIXME #15189
 
 pub fn main() {
diff --git a/src/test/run-pass/utf8_chars.rs b/src/test/run-pass/utf8_chars.rs
index 45a3f2327aa..36b64551ef2 100644
--- a/src/test/run-pass/utf8_chars.rs
+++ b/src/test/run-pass/utf8_chars.rs
@@ -8,7 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 //
-// ignore-lexer-test FIXME #15679
 
 #![feature(collections, core, str_char)]
 
diff --git a/src/test/run-pass/utf8_idents.rs b/src/test/run-pass/utf8_idents.rs
index b11b7e83eb6..559afcd1641 100644
--- a/src/test/run-pass/utf8_idents.rs
+++ b/src/test/run-pass/utf8_idents.rs
@@ -8,7 +8,6 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 //
-// ignore-lexer-test FIXME #15679
 
 #![feature(non_ascii_idents)]