1- // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
1+ // Copyright 2012-2017 The Rust Project Developers. See the COPYRIGHT
22// file at the top-level directory of this distribution and at
33// http://rust-lang.org/COPYRIGHT.
44//
99// except according to those terms.
1010
1111// Characters and their corresponding confusables were collected from
12- // http://www.unicode.org/Public/security/revision-06 /confusables.txt
12+ // http://www.unicode.org/Public/security/10.0.0 /confusables.txt
1313
1414use syntax_pos:: { Span , NO_EXPANSION } ;
1515use errors:: DiagnosticBuilder ;
1616use super :: StringReader ;
1717
1818const UNICODE_ARRAY : & ' static [ ( char , & ' static str , char ) ] = & [
19- ( ' ' , "No-Break Space" , ' ' ) ,
20- ( ' ' , "Ogham Space Mark" , ' ' ) ,
19+ ( '
' , "Line Separator" , ' ' ) ,
20+ ( '
' , "Paragraph Separator" , ' ' ) ,
21+ ( ' ' , "Ogham Space mark" , ' ' ) ,
2122 ( ' ' , "En Quad" , ' ' ) ,
2223 ( ' ' , "Em Quad" , ' ' ) ,
2324 ( ' ' , "En Space" , ' ' ) ,
2425 ( ' ' , "Em Space" , ' ' ) ,
2526 ( ' ' , "Three-Per-Em Space" , ' ' ) ,
2627 ( ' ' , "Four-Per-Em Space" , ' ' ) ,
2728 ( ' ' , "Six-Per-Em Space" , ' ' ) ,
28- ( ' ' , "Figure Space" , ' ' ) ,
2929 ( ' ' , "Punctuation Space" , ' ' ) ,
3030 ( ' ' , "Thin Space" , ' ' ) ,
3131 ( ' ' , "Hair Space" , ' ' ) ,
32- ( ' ' , "Narrow No-Break Space" , ' ' ) ,
3332 ( ' ' , "Medium Mathematical Space" , ' ' ) ,
33+ ( ' ' , "No-Break Space" , ' ' ) ,
34+ ( ' ' , "Figure Space" , ' ' ) ,
35+ ( ' ' , "Narrow No-Break Space" , ' ' ) ,
3436 ( ' ' , "Ideographic Space" , ' ' ) ,
37+
3538 ( 'ߺ' , "Nko Lajanyalan" , '_' ) ,
3639 ( '﹍' , "Dashed Low Line" , '_' ) ,
3740 ( '﹎' , "Centreline Low Line" , '_' ) ,
3841 ( '﹏' , "Wavy Low Line" , '_' ) ,
42+ ( '_' , "Fullwidth Low Line" , '_' ) ,
43+
3944 ( '‐' , "Hyphen" , '-' ) ,
4045 ( '‑' , "Non-Breaking Hyphen" , '-' ) ,
4146 ( '‒' , "Figure Dash" , '-' ) ,
4247 ( '–' , "En Dash" , '-' ) ,
4348 ( '—' , "Em Dash" , '-' ) ,
4449 ( '﹘' , "Small Em Dash" , '-' ) ,
50+ ( '۔' , "Arabic Full Stop" , '-' ) ,
4551 ( '⁃' , "Hyphen Bullet" , '-' ) ,
4652 ( '˗' , "Modifier Letter Minus Sign" , '-' ) ,
4753 ( '−' , "Minus Sign" , '-' ) ,
54+ ( '➖' , "Heavy Minus Sign" , '-' ) ,
55+ ( 'Ⲻ' , "Coptic Letter Dialect-P Ni" , '-' ) ,
4856 ( 'ー' , "Katakana-Hiragana Prolonged Sound Mark" , '-' ) ,
57+ ( '-' , "Fullwidth Hyphen-Minus" , '-' ) ,
58+ ( '―' , "Horizontal Bar" , '-' ) ,
59+ ( '─' , "Box Drawings Light Horizontal" , '-' ) ,
60+ ( '━' , "Box Drawings Heavy Horizontal" , '-' ) ,
61+ ( '㇐' , "CJK Stroke H" , '-' ) ,
62+ ( 'ꟷ' , "Latin Epigraphic Letter Dideways" , '-' ) ,
63+ ( 'ᅳ' , "Hangul Jungseong Eu" , '-' ) ,
64+ ( 'ㅡ' , "Hangul Letter Eu" , '-' ) ,
65+ ( '一' , "CJK Unified Ideograph-4E00" , '-' ) ,
66+ ( '⼀' , "Kangxi Radical One" , '-' ) ,
67+
68+ ( '؍' , "Arabic Date Separator" , ',' ) ,
4969 ( '٫' , "Arabic Decimal Separator" , ',' ) ,
5070 ( '‚' , "Single Low-9 Quotation Mark" , ',' ) ,
71+ ( '¸' , "Cedilla" , ',' ) ,
5172 ( 'ꓹ' , "Lisu Letter Tone Na Po" , ',' ) ,
5273 ( ',' , "Fullwidth Comma" , ',' ) ,
74+
5375 ( ';' , "Greek Question Mark" , ';' ) ,
5476 ( ';' , "Fullwidth Semicolon" , ';' ) ,
77+ ( '︔' , "Presentation Form For Vertical Semicolon" , ';' ) ,
78+
5579 ( 'ः' , "Devanagari Sign Visarga" , ':' ) ,
5680 ( 'ઃ' , "Gujarati Sign Visarga" , ':' ) ,
5781 ( ':' , "Fullwidth Colon" , ':' ) ,
5882 ( '։' , "Armenian Full Stop" , ':' ) ,
5983 ( '܃' , "Syriac Supralinear Colon" , ':' ) ,
6084 ( '܄' , "Syriac Sublinear Colon" , ':' ) ,
85+ ( '᛬' , "Runic Multiple Ponctuation" , ':' ) ,
6186 ( '︰' , "Presentation Form For Vertical Two Dot Leader" , ':' ) ,
6287 ( '᠃' , "Mongolian Full Stop" , ':' ) ,
6388 ( '᠉' , "Mongolian Manchu Full Stop" , ':' ) ,
@@ -68,25 +93,48 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
6893 ( '∶' , "Ratio" , ':' ) ,
6994 ( 'ː' , "Modifier Letter Triangular Colon" , ':' ) ,
7095 ( 'ꓽ' , "Lisu Letter Tone Mya Jeu" , ':' ) ,
96+ ( '︓' , "Presentation Form For Vertical Colon" , ':' ) ,
97+
7198 ( '!' , "Fullwidth Exclamation Mark" , '!' ) ,
7299 ( 'ǃ' , "Latin Letter Retroflex Click" , '!' ) ,
100+ ( 'ⵑ' , "Tifinagh Letter Tuareg Yang" , '!' ) ,
101+ ( '︕' , "Presentation Form For Vertical Exclamation Mark" , '!' ) ,
102+
73103 ( 'ʔ' , "Latin Letter Glottal Stop" , '?' ) ,
104+ ( 'Ɂ' , "Latin Capital Letter Glottal Stop" , '?' ) ,
74105 ( 'ॽ' , "Devanagari Letter Glottal Stop" , '?' ) ,
75106 ( 'Ꭾ' , "Cherokee Letter He" , '?' ) ,
107+ ( 'ꛫ' , "Bamum Letter Ntuu" , '?' ) ,
76108 ( '?' , "Fullwidth Question Mark" , '?' ) ,
109+ ( '︖' , "Presentation Form For Vertical Question Mark" , '?' ) ,
110+
77111 ( '𝅭' , "Musical Symbol Combining Augmentation Dot" , '.' ) ,
78112 ( '․' , "One Dot Leader" , '.' ) ,
79- ( '۔' , "Arabic Full Stop" , '.' ) ,
80113 ( '܁' , "Syriac Supralinear Full Stop" , '.' ) ,
81114 ( '܂' , "Syriac Sublinear Full Stop" , '.' ) ,
82115 ( '꘎' , "Vai Full Stop" , '.' ) ,
83116 ( '𐩐' , "Kharoshthi Punctuation Dot" , '.' ) ,
84- ( '·' , "Middle Dot" , '.' ) ,
85117 ( '٠' , "Arabic-Indic Digit Zero" , '.' ) ,
86118 ( '۰' , "Extended Arabic-Indic Digit Zero" , '.' ) ,
87119 ( 'ꓸ' , "Lisu Letter Tone Mya Ti" , '.' ) ,
88- ( '。 ' , "Ideographic Full Stop " , '.' ) ,
120+ ( '· ' , "Middle Dot " , '.' ) ,
89121 ( '・' , "Katakana Middle Dot" , '.' ) ,
122+ ( '・' , "Halfwidth Katakana Middle Dot" , '.' ) ,
123+ ( '᛫' , "Runic Single Punctuation" , '.' ) ,
124+ ( '·' , "Greek Ano Teleia" , '.' ) ,
125+ ( '⸱' , "Word Separator Middle Dot" , '.' ) ,
126+ ( '𐄁' , "Aegean Word Separator Dot" , '.' ) ,
127+ ( '•' , "Bullet" , '.' ) ,
128+ ( '‧' , "Hyphenation Point" , '.' ) ,
129+ ( '∙' , "Bullet Operator" , '.' ) ,
130+ ( '⋅' , "Dot Operator" , '.' ) ,
131+ ( 'ꞏ' , "Latin Letter Sinological Dot" , '.' ) ,
132+ ( 'ᐧ' , "Canadian Syllabics Final Middle Dot" , '.' ) ,
133+ ( 'ᐧ' , "Canadian Syllabics Final Middle Dot" , '.' ) ,
134+ ( '.' , "Fullwidth Full Stop" , '.' ) ,
135+ ( '。' , "Ideographic Full Stop" , '.' ) ,
136+ ( '︒' , "Presentation Form For Vertical Ideographic Full Stop" , '.' ) ,
137+
90138 ( '՝' , "Armenian Comma" , '\'' ) ,
91139 ( ''' , "Fullwidth Apostrophe" , '\'' ) ,
92140 ( '‘' , "Left Single Quotation Mark" , '\'' ) ,
@@ -96,15 +144,18 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
96144 ( '‵' , "Reversed Prime" , '\'' ) ,
97145 ( '՚' , "Armenian Apostrophe" , '\'' ) ,
98146 ( '׳' , "Hebrew Punctuation Geresh" , '\'' ) ,
147+ ( '`' , "Greek Accent" , '\'' ) ,
99148 ( '`' , "Greek Varia" , '\'' ) ,
100149 ( '`' , "Fullwidth Grave Accent" , '\'' ) ,
150+ ( '´' , "Acute Accent" , '\'' ) ,
101151 ( '΄' , "Greek Tonos" , '\'' ) ,
102152 ( '´' , "Greek Oxia" , '\'' ) ,
103153 ( '᾽' , "Greek Koronis" , '\'' ) ,
104154 ( '᾿' , "Greek Psili" , '\'' ) ,
105155 ( '῾' , "Greek Dasia" , '\'' ) ,
106156 ( 'ʹ' , "Modifier Letter Prime" , '\'' ) ,
107157 ( 'ʹ' , "Greek Numeral Sign" , '\'' ) ,
158+ ( 'ˈ' , "Modifier Letter Vertical Line" , '\'' ) ,
108159 ( 'ˊ' , "Modifier Letter Acute Accent" , '\'' ) ,
109160 ( 'ˋ' , "Modifier Letter Grave Accent" , '\'' ) ,
110161 ( '˴' , "Modifier Letter Middle Grave Accent" , '\'' ) ,
@@ -116,6 +167,12 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
116167 ( 'י' , "Hebrew Letter Yod" , '\'' ) ,
117168 ( 'ߴ' , "Nko High Tone Apostrophe" , '\'' ) ,
118169 ( 'ߵ' , "Nko Low Tone Apostrophe" , '\'' ) ,
170+ ( 'ᑊ' , "Canadian Syllabics West-Cree P" , '\'' ) ,
171+ ( 'ᛌ' , "Runic Letter Short-Twig-Sol S" , '\'' ) ,
172+ ( '𖽑' , "Miao Sign Aspiration" , '\'' ) ,
173+ ( '𖽒' , "Miao Sign Reformed Voicing" , '\'' ) ,
174+
175+ ( '᳓' , "Vedic Sign Nihshvasa" , '"' ) ,
119176 ( '"' , "Fullwidth Quotation Mark" , '"' ) ,
120177 ( '“' , "Left Double Quotation Mark" , '"' ) ,
121178 ( '”' , "Right Double Quotation Mark" , '"' ) ,
@@ -132,12 +189,15 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
132189 ( 'ײ' , "Hebrew Ligature Yiddish Double Yod" , '"' ) ,
133190 ( '❞' , "Heavy Double Comma Quotation Mark Ornament" , '"' ) ,
134191 ( '❝' , "Heavy Double Turned Comma Quotation Mark Ornament" , '"' ) ,
192+
193+ ( '(' , "Fullwidth Left Parenthesis" , '(' ) ,
135194 ( '❨' , "Medium Left Parenthesis Ornament" , '(' ) ,
136195 ( '﴾' , "Ornate Left Parenthesis" , '(' ) ,
137- ( '(' , "Fullwidth Left Parenthesis" , '(' ) ,
196+
197+ ( ')' , "Fullwidth Right Parenthesis" , ')' ) ,
138198 ( '❩' , "Medium Right Parenthesis Ornament" , ')' ) ,
139199 ( '﴿' , "Ornate Right Parenthesis" , ')' ) ,
140- ( ')' , "Fullwidth Right Parenthesis" , ')' ) ,
200+
141201 ( '[' , "Fullwidth Left Square Bracket" , '[' ) ,
142202 ( '❲' , "Light Left Tortoise Shell Bracket Ornament" , '[' ) ,
143203 ( '「' , "Left Corner Bracket" , '[' ) ,
@@ -147,6 +207,7 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
147207 ( '〖' , "Left White Lenticular Bracket" , '[' ) ,
148208 ( '〘' , "Left White Tortoise Shell Bracket" , '[' ) ,
149209 ( '〚' , "Left White Square Bracket" , '[' ) ,
210+
150211 ( ']' , "Fullwidth Right Square Bracket" , ']' ) ,
151212 ( '❳' , "Light Right Tortoise Shell Bracket Ornament" , ']' ) ,
152213 ( '」' , "Right Corner Bracket" , ']' ) ,
@@ -156,49 +217,94 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
156217 ( '〗' , "Right White Lenticular Bracket" , ']' ) ,
157218 ( '〙' , "Right White Tortoise Shell Bracket" , ']' ) ,
158219 ( '〛' , "Right White Square Bracket" , ']' ) ,
220+
159221 ( '❴' , "Medium Left Curly Bracket Ornament" , '{' ) ,
222+ ( '𝄔' , "Musical Symbol Brace" , '{' ) ,
223+ ( '{' , "Fullwidth Left Curly Bracket" , '{' ) ,
224+
160225 ( '❵' , "Medium Right Curly Bracket Ornament" , '}' ) ,
226+ ( '}' , "Fullwidth Right Curly Bracket" , '}' ) ,
227+
161228 ( '⁎' , "Low Asterisk" , '*' ) ,
162229 ( '٭' , "Arabic Five Pointed Star" , '*' ) ,
163230 ( '∗' , "Asterisk Operator" , '*' ) ,
231+ ( '𐌟' , "Old Italic Letter Ess" , '*' ) ,
232+ ( '*' , "Fullwidth Asterisk" , '*' ) ,
233+
164234 ( '᜵' , "Philippine Single Punctuation" , '/' ) ,
165235 ( '⁁' , "Caret Insertion Point" , '/' ) ,
166236 ( '∕' , "Division Slash" , '/' ) ,
167237 ( '⁄' , "Fraction Slash" , '/' ) ,
168238 ( '╱' , "Box Drawings Light Diagonal Upper Right To Lower Left" , '/' ) ,
169239 ( '⟋' , "Mathematical Rising Diagonal" , '/' ) ,
170240 ( '⧸' , "Big Solidus" , '/' ) ,
171- ( '㇓' , "Cjk Stroke Sp" , '/' ) ,
241+ ( '𝈺' , "Greek Instrumental Notation Symbol-47" , '/' ) ,
242+ ( '㇓' , "CJK Stroke Sp" , '/' ) ,
172243 ( '〳' , "Vertical Kana Repeat Mark Upper Half" , '/' ) ,
173- ( '丿' , "Cjk Unified Ideograph-4E3F" , '/' ) ,
244+ ( 'Ⳇ' , "Coptic Capital Letter Old Coptic Esh" , '/' ) ,
245+ ( 'ノ' , "Katakana Letter No" , '/' ) ,
246+ ( '丿' , "CJK Unified Ideograph-4E3F" , '/' ) ,
174247 ( '⼃' , "Kangxi Radical Slash" , '/' ) ,
248+ ( '/' , "Fullwidth Solidus" , '/' ) ,
249+
175250 ( '\' , "Fullwidth Reverse Solidus" , '\\' ) ,
176251 ( '﹨' , "Small Reverse Solidus" , '\\' ) ,
177252 ( '∖' , "Set Minus" , '\\' ) ,
178253 ( '⟍' , "Mathematical Falling Diagonal" , '\\' ) ,
179254 ( '⧵' , "Reverse Solidus Operator" , '\\' ) ,
180255 ( '⧹' , "Big Reverse Solidus" , '\\' ) ,
256+ ( '⧹' , "Greek Vocal Notation Symbol-16" , '\\' ) ,
257+ ( '⧹' , "Greek Instrumental Symbol-48" , '\\' ) ,
258+ ( '㇔' , "CJK Stroke D" , '\\' ) ,
259+ ( '丶' , "CJK Unified Ideograph-4E36" , '\\' ) ,
260+ ( '⼂' , "Kangxi Radical Dot" , '\\' ) ,
181261 ( '、' , "Ideographic Comma" , '\\' ) ,
182262 ( 'ヽ' , "Katakana Iteration Mark" , '\\' ) ,
183- ( '㇔' , "Cjk Stroke D" , '\\' ) ,
184- ( '丶' , "Cjk Unified Ideograph-4E36" , '\\' ) ,
185- ( '⼂' , "Kangxi Radical Dot" , '\\' ) ,
263+
186264 ( 'ꝸ' , "Latin Small Letter Um" , '&' ) ,
265+ ( '&' , "Fullwidth Ampersand" , '&' ) ,
266+
267+ ( '᛭' , "Runic Cros Punctuation" , '+' ) ,
268+ ( '➕' , "Heavy Plus Sign" , '+' ) ,
269+ ( '𐊛' , "Lycian Letter H" , '+' ) ,
187270 ( '﬩' , "Hebrew Letter Alternative Plus Sign" , '+' ) ,
271+ ( '+' , "Fullwidth Plus Sign" , '+' ) ,
272+
188273 ( '‹' , "Single Left-Pointing Angle Quotation Mark" , '<' ) ,
189274 ( '❮' , "Heavy Left-Pointing Angle Quotation Mark Ornament" , '<' ) ,
190275 ( '˂' , "Modifier Letter Left Arrowhead" , '<' ) ,
276+ ( '𝈶' , "Greek Instrumental Symbol-40" , '<' ) ,
277+ ( 'ᐸ' , "Canadian Syllabics Pa" , '<' ) ,
278+ ( 'ᚲ' , "Runic Letter Kauna" , '<' ) ,
279+ ( '❬' , "Medium Left-Pointing Angle Bracket Ornament" , '<' ) ,
280+ ( '⟨' , "Mathematical Left Angle Bracket" , '<' ) ,
281+ ( '〈' , "Left-Pointing Angle Bracket" , '<' ) ,
191282 ( '〈' , "Left Angle Bracket" , '<' ) ,
283+ ( '㇛' , "CJK Stroke Pd" , '<' ) ,
284+ ( 'く' , "Hiragana Letter Ku" , '<' ) ,
285+ ( '𡿨' , "CJK Unified Ideograph-21FE8" , '<' ) ,
192286 ( '《' , "Left Double Angle Bracket" , '<' ) ,
287+ ( '<' , "Fullwidth Less-Than Sign" , '<' ) ,
288+
289+ ( '᐀' , "Canadian Syllabics Hyphen" , '=' ) ,
290+ ( '⹀' , "Double Hyphen" , '=' ) ,
291+ ( '゠' , "Katakana-Hiragana Double Hyphen" , '=' ) ,
193292 ( '꓿' , "Lisu Punctuation Full Stop" , '=' ) ,
293+ ( '=' , "Fullwidth Equals Sign" , '=' ) ,
294+
194295 ( '›' , "Single Right-Pointing Angle Quotation Mark" , '>' ) ,
195296 ( '❯' , "Heavy Right-Pointing Angle Quotation Mark Ornament" , '>' ) ,
196297 ( '˃' , "Modifier Letter Right Arrowhead" , '>' ) ,
298+ ( '𝈷' , "Greek Instrumental Symbol-42" , '>' ) ,
299+ ( 'ᐳ' , "Canadian Syllabics Po" , '>' ) ,
300+ ( '𖼿' , "Miao Letter Archaic Zza" , '>' ) ,
301+ ( '❭' , "Medium Right-Pointing Angle Bracket Ornament" , '>' ) ,
302+ ( '⟩' , "Mathematical Right Angle Bracket" , '>' ) ,
303+ ( '〉' , "Right-Pointing Angle Bracket" , '>' ) ,
197304 ( '〉' , "Right Angle Bracket" , '>' ) ,
198305 ( '》' , "Right Double Angle Bracket" , '>' ) ,
199- ( 'Ⲻ' , "Coptic Capital Letter Dialect-P Ni" , '-' ) ,
200- ( 'Ɂ' , "Latin Capital Letter Glottal Stop" , '?' ) ,
201- ( 'Ⳇ' , "Coptic Capital Letter Old Coptic Esh" , '/' ) , ] ;
306+ ( '>' , "Fullwidth Greater-Than Sign" , '>' ) , ] ;
307+
202308
203309const ASCII_ARRAY : & ' static [ ( char , & ' static str ) ] = & [
204310 ( ' ' , "Space" ) ,
0 commit comments