Auto merge of #43698 - MaloJaffre:confusables, r=eddyb

Update the list of confusable characters

Also reorder and space the list to make it clearer for futures updates
and to come closer to the original list.

This was tedious but somewhat rewarding!

Thanks @est31 for the instructions.

Fixes #43629.
r? @est31
This commit is contained in:
bors 2017-08-08 14:39:27 +00:00
commit ddc02deb07

View File

@ -1,4 +1,4 @@
// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
// Copyright 2012-2017 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
@ -9,15 +9,16 @@
// except according to those terms.
// Characters and their corresponding confusables were collected from
// http://www.unicode.org/Public/security/revision-06/confusables.txt
// http://www.unicode.org/Public/security/10.0.0/confusables.txt
use syntax_pos::{Span, NO_EXPANSION};
use errors::DiagnosticBuilder;
use super::StringReader;
const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
(' ', "No-Break Space", ' '),
('', "Ogham Space Mark", ' '),
('', "Line Separator", ' '),
('', "Paragraph Separator", ' '),
('', "Ogham Space mark", ' '),
(' ', "En Quad", ' '),
('', "Em Quad", ' '),
('', "En Space", ' '),
@ -25,39 +26,63 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
('', "Three-Per-Em Space", ' '),
('', "Four-Per-Em Space", ' '),
('', "Six-Per-Em Space", ' '),
('', "Figure Space", ' '),
('', "Punctuation Space", ' '),
('', "Thin Space", ' '),
('', "Hair Space", ' '),
('', "Narrow No-Break Space", ' '),
('', "Medium Mathematical Space", ' '),
(' ', "No-Break Space", ' '),
('', "Figure Space", ' '),
('', "Narrow No-Break Space", ' '),
(' ', "Ideographic Space", ' '),
('ߺ', "Nko Lajanyalan", '_'),
('', "Dashed Low Line", '_'),
('', "Centreline Low Line", '_'),
('', "Wavy Low Line", '_'),
('_', "Fullwidth Low Line", '_'),
('', "Hyphen", '-'),
('', "Non-Breaking Hyphen", '-'),
('', "Figure Dash", '-'),
('', "En Dash", '-'),
('—', "Em Dash", '-'),
('', "Small Em Dash", '-'),
('۔', "Arabic Full Stop", '-'),
('', "Hyphen Bullet", '-'),
('˗', "Modifier Letter Minus Sign", '-'),
('', "Minus Sign", '-'),
('', "Heavy Minus Sign", '-'),
('', "Coptic Letter Dialect-P Ni", '-'),
('ー', "Katakana-Hiragana Prolonged Sound Mark", '-'),
('', "Fullwidth Hyphen-Minus", '-'),
('―', "Horizontal Bar", '-'),
('─', "Box Drawings Light Horizontal", '-'),
('━', "Box Drawings Heavy Horizontal", '-'),
('㇐', "CJK Stroke H", '-'),
('ꟷ', "Latin Epigraphic Letter Dideways", '-'),
('ᅳ', "Hangul Jungseong Eu", '-'),
('ㅡ', "Hangul Letter Eu", '-'),
('一', "CJK Unified Ideograph-4E00", '-'),
('⼀', "Kangxi Radical One", '-'),
('؍', "Arabic Date Separator", ','),
('٫', "Arabic Decimal Separator", ','),
('', "Single Low-9 Quotation Mark", ','),
('¸', "Cedilla", ','),
('', "Lisu Letter Tone Na Po", ','),
('', "Fullwidth Comma", ','),
(';', "Greek Question Mark", ';'),
('', "Fullwidth Semicolon", ';'),
('︔', "Presentation Form For Vertical Semicolon", ';'),
('', "Devanagari Sign Visarga", ':'),
('', "Gujarati Sign Visarga", ':'),
('', "Fullwidth Colon", ':'),
('։', "Armenian Full Stop", ':'),
('܃', "Syriac Supralinear Colon", ':'),
('܄', "Syriac Sublinear Colon", ':'),
('', "Runic Multiple Ponctuation", ':'),
('', "Presentation Form For Vertical Two Dot Leader", ':'),
('', "Mongolian Full Stop", ':'),
('', "Mongolian Manchu Full Stop", ':'),
@ -68,25 +93,48 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
('', "Ratio", ':'),
('ː', "Modifier Letter Triangular Colon", ':'),
('', "Lisu Letter Tone Mya Jeu", ':'),
('︓', "Presentation Form For Vertical Colon", ':'),
('', "Fullwidth Exclamation Mark", '!'),
('ǃ', "Latin Letter Retroflex Click", '!'),
('', "Tifinagh Letter Tuareg Yang", '!'),
('︕', "Presentation Form For Vertical Exclamation Mark", '!'),
('ʔ', "Latin Letter Glottal Stop", '?'),
('Ɂ', "Latin Capital Letter Glottal Stop", '?'),
('', "Devanagari Letter Glottal Stop", '?'),
('', "Cherokee Letter He", '?'),
('', "Bamum Letter Ntuu", '?'),
('', "Fullwidth Question Mark", '?'),
('︖', "Presentation Form For Vertical Question Mark", '?'),
('𝅭', "Musical Symbol Combining Augmentation Dot", '.'),
('', "One Dot Leader", '.'),
('۔', "Arabic Full Stop", '.'),
('܁', "Syriac Supralinear Full Stop", '.'),
('܂', "Syriac Sublinear Full Stop", '.'),
('', "Vai Full Stop", '.'),
('𐩐', "Kharoshthi Punctuation Dot", '.'),
('·', "Middle Dot", '.'),
('٠', "Arabic-Indic Digit Zero", '.'),
('۰', "Extended Arabic-Indic Digit Zero", '.'),
('', "Lisu Letter Tone Mya Ti", '.'),
('。', "Ideographic Full Stop", '.'),
('·', "Middle Dot", '.'),
('・', "Katakana Middle Dot", '.'),
('・', "Halfwidth Katakana Middle Dot", '.'),
('᛫', "Runic Single Punctuation", '.'),
('·', "Greek Ano Teleia", '.'),
('⸱', "Word Separator Middle Dot", '.'),
('𐄁', "Aegean Word Separator Dot", '.'),
('•', "Bullet", '.'),
('‧', "Hyphenation Point", '.'),
('∙', "Bullet Operator", '.'),
('⋅', "Dot Operator", '.'),
('ꞏ', "Latin Letter Sinological Dot", '.'),
('ᐧ', "Canadian Syllabics Final Middle Dot", '.'),
('ᐧ', "Canadian Syllabics Final Middle Dot", '.'),
('', "Fullwidth Full Stop", '.'),
('。', "Ideographic Full Stop", '.'),
('︒', "Presentation Form For Vertical Ideographic Full Stop", '.'),
('՝', "Armenian Comma", '\''),
('', "Fullwidth Apostrophe", '\''),
('', "Left Single Quotation Mark", '\''),
@ -96,8 +144,10 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
('', "Reversed Prime", '\''),
('՚', "Armenian Apostrophe", '\''),
('׳', "Hebrew Punctuation Geresh", '\''),
('`', "Greek Accent", '\''),
('', "Greek Varia", '\''),
('', "Fullwidth Grave Accent", '\''),
('´', "Acute Accent", '\''),
('΄', "Greek Tonos", '\''),
('', "Greek Oxia", '\''),
('', "Greek Koronis", '\''),
@ -105,6 +155,7 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
('', "Greek Dasia", '\''),
('ʹ', "Modifier Letter Prime", '\''),
('ʹ', "Greek Numeral Sign", '\''),
('ˈ', "Modifier Letter Vertical Line", '\''),
('ˊ', "Modifier Letter Acute Accent", '\''),
('ˋ', "Modifier Letter Grave Accent", '\''),
('˴', "Modifier Letter Middle Grave Accent", '\''),
@ -116,6 +167,12 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
('י', "Hebrew Letter Yod", '\''),
('ߴ', "Nko High Tone Apostrophe", '\''),
('ߵ', "Nko Low Tone Apostrophe", '\''),
('', "Canadian Syllabics West-Cree P", '\''),
('', "Runic Letter Short-Twig-Sol S", '\''),
('𖽑', "Miao Sign Aspiration", '\''),
('𖽒', "Miao Sign Reformed Voicing", '\''),
('᳓', "Vedic Sign Nihshvasa", '"'),
('', "Fullwidth Quotation Mark", '"'),
('“', "Left Double Quotation Mark", '"'),
('”', "Right Double Quotation Mark", '"'),
@ -132,12 +189,15 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
('ײ', "Hebrew Ligature Yiddish Double Yod", '"'),
('❞', "Heavy Double Comma Quotation Mark Ornament", '"'),
('❝', "Heavy Double Turned Comma Quotation Mark Ornament", '"'),
('', "Fullwidth Left Parenthesis", '('),
('', "Medium Left Parenthesis Ornament", '('),
('', "Ornate Left Parenthesis", '('),
('', "Fullwidth Left Parenthesis", '('),
('', "Fullwidth Right Parenthesis", ')'),
('', "Medium Right Parenthesis Ornament", ')'),
('﴿', "Ornate Right Parenthesis", ')'),
('', "Fullwidth Right Parenthesis", ')'),
('', "Fullwidth Left Square Bracket", '['),
('', "Light Left Tortoise Shell Bracket Ornament", '['),
('「', "Left Corner Bracket", '['),
@ -147,6 +207,7 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
('〖', "Left White Lenticular Bracket", '['),
('〘', "Left White Tortoise Shell Bracket", '['),
('〚', "Left White Square Bracket", '['),
('', "Fullwidth Right Square Bracket", ']'),
('', "Light Right Tortoise Shell Bracket Ornament", ']'),
('」', "Right Corner Bracket", ']'),
@ -156,11 +217,20 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
('〗', "Right White Lenticular Bracket", ']'),
('〙', "Right White Tortoise Shell Bracket", ']'),
('〛', "Right White Square Bracket", ']'),
('', "Medium Left Curly Bracket Ornament", '{'),
('𝄔', "Musical Symbol Brace", '{'),
('', "Fullwidth Left Curly Bracket", '{'),
('', "Medium Right Curly Bracket Ornament", '}'),
('', "Fullwidth Right Curly Bracket", '}'),
('', "Low Asterisk", '*'),
('٭', "Arabic Five Pointed Star", '*'),
('', "Asterisk Operator", '*'),
('𐌟', "Old Italic Letter Ess", '*'),
('', "Fullwidth Asterisk", '*'),
('', "Philippine Single Punctuation", '/'),
('', "Caret Insertion Point", '/'),
('', "Division Slash", '/'),
@ -168,37 +238,73 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
('', "Box Drawings Light Diagonal Upper Right To Lower Left", '/'),
('', "Mathematical Rising Diagonal", '/'),
('', "Big Solidus", '/'),
('', "Cjk Stroke Sp", '/'),
('𝈺', "Greek Instrumental Notation Symbol-47", '/'),
('', "CJK Stroke Sp", '/'),
('', "Vertical Kana Repeat Mark Upper Half", '/'),
('丿', "Cjk Unified Ideograph-4E3F", '/'),
('', "Coptic Capital Letter Old Coptic Esh", '/'),
('', "Katakana Letter No", '/'),
('丿', "CJK Unified Ideograph-4E3F", '/'),
('', "Kangxi Radical Slash", '/'),
('', "Fullwidth Solidus", '/'),
('', "Fullwidth Reverse Solidus", '\\'),
('', "Small Reverse Solidus", '\\'),
('', "Set Minus", '\\'),
('', "Mathematical Falling Diagonal", '\\'),
('', "Reverse Solidus Operator", '\\'),
('', "Big Reverse Solidus", '\\'),
('', "Greek Vocal Notation Symbol-16", '\\'),
('', "Greek Instrumental Symbol-48", '\\'),
('', "CJK Stroke D", '\\'),
('', "CJK Unified Ideograph-4E36", '\\'),
('', "Kangxi Radical Dot", '\\'),
('、', "Ideographic Comma", '\\'),
('ヽ', "Katakana Iteration Mark", '\\'),
('', "Cjk Stroke D", '\\'),
('', "Cjk Unified Ideograph-4E36", '\\'),
('', "Kangxi Radical Dot", '\\'),
('', "Latin Small Letter Um", '&'),
('', "Fullwidth Ampersand", '&'),
('', "Runic Cros Punctuation", '+'),
('', "Heavy Plus Sign", '+'),
('𐊛', "Lycian Letter H", '+'),
('﬩', "Hebrew Letter Alternative Plus Sign", '+'),
('', "Fullwidth Plus Sign", '+'),
('', "Single Left-Pointing Angle Quotation Mark", '<'),
('', "Heavy Left-Pointing Angle Quotation Mark Ornament", '<'),
('˂', "Modifier Letter Left Arrowhead", '<'),
('𝈶', "Greek Instrumental Symbol-40", '<'),
('', "Canadian Syllabics Pa", '<'),
('', "Runic Letter Kauna", '<'),
('❬', "Medium Left-Pointing Angle Bracket Ornament", '<'),
('⟨', "Mathematical Left Angle Bracket", '<'),
('〈', "Left-Pointing Angle Bracket", '<'),
('〈', "Left Angle Bracket", '<'),
('㇛', "CJK Stroke Pd", '<'),
('く', "Hiragana Letter Ku", '<'),
('𡿨', "CJK Unified Ideograph-21FE8", '<'),
('《', "Left Double Angle Bracket", '<'),
('', "Fullwidth Less-Than Sign", '<'),
('', "Canadian Syllabics Hyphen", '='),
('', "Double Hyphen", '='),
('', "Katakana-Hiragana Double Hyphen", '='),
('', "Lisu Punctuation Full Stop", '='),
('', "Fullwidth Equals Sign", '='),
('', "Single Right-Pointing Angle Quotation Mark", '>'),
('', "Heavy Right-Pointing Angle Quotation Mark Ornament", '>'),
('˃', "Modifier Letter Right Arrowhead", '>'),
('𝈷', "Greek Instrumental Symbol-42", '>'),
('', "Canadian Syllabics Po", '>'),
('𖼿', "Miao Letter Archaic Zza", '>'),
('❭', "Medium Right-Pointing Angle Bracket Ornament", '>'),
('⟩', "Mathematical Right Angle Bracket", '>'),
('〉', "Right-Pointing Angle Bracket", '>'),
('〉', "Right Angle Bracket", '>'),
('》', "Right Double Angle Bracket", '>'),
('', "Coptic Capital Letter Dialect-P Ni", '-'),
('Ɂ', "Latin Capital Letter Glottal Stop", '?'),
('', "Coptic Capital Letter Old Coptic Esh", '/'), ];
('', "Fullwidth Greater-Than Sign", '>'), ];
const ASCII_ARRAY: &'static [(char, &'static str)] = &[
(' ', "Space"),