Auto merge of #33128 - xen0n:more-confusing-unicode-chars, r=nagisa

Add more aliases for Unicode confusable chars Building upon #29837, this PR: * added aliases for space characters, * distinguished square brackets from parens, and * added common CJK punctuation characters as aliases. This will especially help CJK users who may have forgotten to switch off IME when coding.
2016-05-05 08:50:23 -07:00 · 2016-05-05 08:50:23 -07:00 · 413bafdabf
parent 3f65afa694 496081c5c7
commit 413bafdabf
1 changed files with 53 additions and 6 deletions
--- a/src/libsyntax/parse/lexer/unicode_chars.rs
+++ b/src/libsyntax/parse/lexer/unicode_chars.rs
@ -16,6 +16,22 @@ use errors::DiagnosticBuilder;
 use super::StringReader;

 const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
+    (' ', "No-Break Space", ' '),
+    (' ', "Ogham Space Mark", ' '),
+    (' ', "En Quad", ' '),
+    (' ', "Em Quad", ' '),
+    (' ', "En Space", ' '),
+    (' ', "Em Space", ' '),
+    (' ', "Three-Per-Em Space", ' '),
+    (' ', "Four-Per-Em Space", ' '),
+    (' ', "Six-Per-Em Space", ' '),
+    (' ', "Figure Space", ' '),
+    (' ', "Punctuation Space", ' '),
+    (' ', "Thin Space", ' '),
+    (' ', "Hair Space", ' '),
+    (' ', "Narrow No-Break Space", ' '),
+    (' ', "Medium Mathematical Space", ' '),
+    ('　', "Ideographic Space", ' '),
    ('ߺ', "Nko Lajanyalan", '_'),
    ('﹍', "Dashed Low Line", '_'),
    ('﹎', "Centreline Low Line", '_'),
@ -24,14 +40,18 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
    ('‑', "Non-Breaking Hyphen", '-'),
    ('‒', "Figure Dash", '-'),
    ('–', "En Dash", '-'),
+    ('—', "Em Dash", '-'),
    ('﹘', "Small Em Dash", '-'),
    ('⁃', "Hyphen Bullet", '-'),
    ('˗', "Modifier Letter Minus Sign", '-'),
    ('−', "Minus Sign", '-'),
+    ('ー', "Katakana-Hiragana Prolonged Sound Mark", '-'),
    ('٫', "Arabic Decimal Separator", ','),
    ('‚', "Single Low-9 Quotation Mark", ','),
    ('ꓹ', "Lisu Letter Tone Na Po", ','),
+    ('，', "Fullwidth Comma", ','),
    (';', "Greek Question Mark", ';'),
+    ('；', "Fullwidth Semicolon", ';'),
    ('ः', "Devanagari Sign Visarga", ':'),
    ('ઃ', "Gujarati Sign Visarga", ':'),
    ('：', "Fullwidth Colon", ':'),
@ -53,6 +73,7 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
    ('ʔ', "Latin Letter Glottal Stop", '?'),
    ('ॽ', "Devanagari Letter Glottal Stop", '?'),
    ('Ꭾ', "Cherokee Letter He", '?'),
+    ('？', "Fullwidth Question Mark", '?'),
    ('𝅭', "Musical Symbol Combining Augmentation Dot", '.'),
    ('․', "One Dot Leader", '.'),
    ('۔', "Arabic Full Stop", '.'),
@ -60,9 +81,12 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
    ('܂', "Syriac Sublinear Full Stop", '.'),
    ('꘎', "Vai Full Stop", '.'),
    ('𐩐', "Kharoshthi Punctuation Dot", '.'),
+    ('·', "Middle Dot", '.'),
    ('٠', "Arabic-Indic Digit Zero", '.'),
    ('۰', "Extended Arabic-Indic Digit Zero", '.'),
    ('ꓸ', "Lisu Letter Tone Mya Ti", '.'),
+    ('。', "Ideographic Full Stop", '.'),
+    ('・', "Katakana Middle Dot", '.'),
    ('՝', "Armenian Comma", '\''),
    ('＇', "Fullwidth Apostrophe", '\''),
    ('‘', "Left Single Quotation Mark", '\''),
@ -108,16 +132,30 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
    ('ײ', "Hebrew Ligature Yiddish Double Yod", '"'),
    ('❞', "Heavy Double Comma Quotation Mark Ornament", '"'),
    ('❝', "Heavy Double Turned Comma Quotation Mark Ornament", '"'),
-    ('［', "Fullwidth Left Square Bracket", '('),
    ('❨', "Medium Left Parenthesis Ornament", '('),
-    ('❲', "Light Left Tortoise Shell Bracket Ornament", '('),
-    ('〔', "Left Tortoise Shell Bracket", '('),
    ('﴾', "Ornate Left Parenthesis", '('),
-    ('］', "Fullwidth Right Square Bracket", ')'),
+    ('（', "Fullwidth Left Parenthesis", '('),
    ('❩', "Medium Right Parenthesis Ornament", ')'),
-    ('❳', "Light Right Tortoise Shell Bracket Ornament", ')'),
-    ('〕', "Right Tortoise Shell Bracket", ')'),
    ('﴿', "Ornate Right Parenthesis", ')'),
+    ('）', "Fullwidth Right Parenthesis", ')'),
+    ('［', "Fullwidth Left Square Bracket", '['),
+    ('❲', "Light Left Tortoise Shell Bracket Ornament", '['),
+    ('「', "Left Corner Bracket", '['),
+    ('『', "Left White Corner Bracket", '['),
+    ('【', "Left Black Lenticular Bracket", '['),
+    ('〔', "Left Tortoise Shell Bracket", '['),
+    ('〖', "Left White Lenticular Bracket", '['),
+    ('〘', "Left White Tortoise Shell Bracket", '['),
+    ('〚', "Left White Square Bracket", '['),
+    ('］', "Fullwidth Right Square Bracket", ']'),
+    ('❳', "Light Right Tortoise Shell Bracket Ornament", ']'),
+    ('」', "Right Corner Bracket", ']'),
+    ('』', "Right White Corner Bracket", ']'),
+    ('】', "Right Black Lenticular Bracket", ']'),
+    ('〕', "Right Tortoise Shell Bracket", ']'),
+    ('〗', "Right White Lenticular Bracket", ']'),
+    ('〙', "Right White Tortoise Shell Bracket", ']'),
+    ('〛', "Right White Square Bracket", ']'),
    ('❴', "Medium Left Curly Bracket Ornament", '{'),
    ('❵', "Medium Right Curly Bracket Ornament", '}'),
    ('⁎', "Low Asterisk", '*'),
@ -140,6 +178,8 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
    ('⟍', "Mathematical Falling Diagonal", '\\'),
    ('⧵', "Reverse Solidus Operator", '\\'),
    ('⧹', "Big Reverse Solidus", '\\'),
+    ('、', "Ideographic Comma", '\\'),
+    ('ヽ', "Katakana Iteration Mark", '\\'),
    ('㇔', "Cjk Stroke D", '\\'),
    ('丶', "Cjk Unified Ideograph-4E36", '\\'),
    ('⼂', "Kangxi Radical Dot", '\\'),
@ -148,15 +188,20 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
    ('‹', "Single Left-Pointing Angle Quotation Mark", '<'),
    ('❮', "Heavy Left-Pointing Angle Quotation Mark Ornament", '<'),
    ('˂', "Modifier Letter Left Arrowhead", '<'),
+    ('〈', "Left Angle Bracket", '<'),
+    ('《', "Left Double Angle Bracket", '<'),
    ('꓿', "Lisu Punctuation Full Stop", '='),
    ('›', "Single Right-Pointing Angle Quotation Mark", '>'),
    ('❯', "Heavy Right-Pointing Angle Quotation Mark Ornament", '>'),
    ('˃', "Modifier Letter Right Arrowhead", '>'),
+    ('〉', "Right Angle Bracket", '>'),
+    ('》', "Right Double Angle Bracket", '>'),
    ('Ⲻ', "Coptic Capital Letter Dialect-P Ni", '-'),
    ('Ɂ', "Latin Capital Letter Glottal Stop", '?'),
    ('Ⳇ', "Coptic Capital Letter Old Coptic Esh", '/'), ];

 const ASCII_ARRAY: &'static [(char, &'static str)] = &[
+    (' ', "Space"),
    ('_', "Underscore"),
    ('-', "Minus/Hyphen"),
    (',', "Comma"),
@ -169,6 +214,8 @@ const ASCII_ARRAY: &'static [(char, &'static str)] = &[
    ('"', "Quotation Mark"),
    ('(', "Left Parenthesis"),
    (')', "Right Parenthesis"),
+    ('[', "Left Square Bracket"),
+    (']', "Right Square Bracket"),
    ('{', "Left Curly Brace"),
    ('}', "Right Curly Brace"),
    ('*', "Asterisk"),